diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core')
32 files changed, 4531 insertions, 1953 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 117170014e88..a84b652f9b54 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -31,3 +31,10 @@ config MLX5_CORE_EN_DCB This flag is depended on the kernel's DCB support. If unsure, set to Y + +config MLX5_CORE_IPOIB + bool "Mellanox Technologies ConnectX-4 IPoIB offloads support" + depends on MLX5_CORE_EN + default y + ---help--- + MLX5 IPoIB offloads & acceleration support. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 9f43beb86250..9e644615f07a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -11,3 +11,5 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \ en_tc.o en_arfs.o en_rep.o en_fs_ethtool.o en_selftest.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o + +mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index a380353a78c2..5bdaf3d545b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -279,6 +279,8 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_DESTROY_XRC_SRQ: case MLX5_CMD_OP_DESTROY_DCT: case MLX5_CMD_OP_DEALLOC_Q_COUNTER: + case MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_DESTROY_QOS_PARA_VPORT: case MLX5_CMD_OP_DEALLOC_PD: case MLX5_CMD_OP_DEALLOC_UAR: case MLX5_CMD_OP_DETACH_FROM_MCG: @@ -305,8 +307,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT: case MLX5_CMD_OP_DEALLOC_ENCAP_HEADER: - case MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT: - case MLX5_CMD_OP_DESTROY_QOS_PARA_VPORT: + case MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT: return MLX5_CMD_STAT_OK; case MLX5_CMD_OP_QUERY_HCA_CAP: @@ -363,6 +364,10 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_QUERY_Q_COUNTER: case MLX5_CMD_OP_SET_RATE_LIMIT: case MLX5_CMD_OP_QUERY_RATE_LIMIT: + case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_CREATE_QOS_PARA_VPORT: case MLX5_CMD_OP_ALLOC_PD: case MLX5_CMD_OP_ALLOC_UAR: case MLX5_CMD_OP_CONFIG_INT_MODERATION: @@ -414,10 +419,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_ALLOC_FLOW_COUNTER: case MLX5_CMD_OP_QUERY_FLOW_COUNTER: case MLX5_CMD_OP_ALLOC_ENCAP_HEADER: - case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT: - case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT: - case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT: - case MLX5_CMD_OP_CREATE_QOS_PARA_VPORT: + case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT: *status = MLX5_DRIVER_STATUS_ABORTED; *synd = MLX5_DRIVER_SYND; return -EIO; @@ -501,6 +503,12 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER); MLX5_COMMAND_STR_CASE(SET_RATE_LIMIT); MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT); + MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT); + MLX5_COMMAND_STR_CASE(DESTROY_SCHEDULING_ELEMENT); + MLX5_COMMAND_STR_CASE(QUERY_SCHEDULING_ELEMENT); + MLX5_COMMAND_STR_CASE(MODIFY_SCHEDULING_ELEMENT); + MLX5_COMMAND_STR_CASE(CREATE_QOS_PARA_VPORT); + MLX5_COMMAND_STR_CASE(DESTROY_QOS_PARA_VPORT); MLX5_COMMAND_STR_CASE(ALLOC_PD); MLX5_COMMAND_STR_CASE(DEALLOC_PD); MLX5_COMMAND_STR_CASE(ALLOC_UAR); @@ -576,12 +584,8 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(MODIFY_FLOW_TABLE); MLX5_COMMAND_STR_CASE(ALLOC_ENCAP_HEADER); MLX5_COMMAND_STR_CASE(DEALLOC_ENCAP_HEADER); - MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT); - MLX5_COMMAND_STR_CASE(DESTROY_SCHEDULING_ELEMENT); - MLX5_COMMAND_STR_CASE(QUERY_SCHEDULING_ELEMENT); - MLX5_COMMAND_STR_CASE(MODIFY_SCHEDULING_ELEMENT); - MLX5_COMMAND_STR_CASE(CREATE_QOS_PARA_VPORT); - MLX5_COMMAND_STR_CASE(DESTROY_QOS_PARA_VPORT); + MLX5_COMMAND_STR_CASE(ALLOC_MODIFY_HEADER_CONTEXT); + MLX5_COMMAND_STR_CASE(DEALLOC_MODIFY_HEADER_CONTEXT); default: return "unknown command opcode"; } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 3d9490cd2db1..0099a3e397bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -37,6 +37,7 @@ #include <linux/timecounter.h> #include <linux/net_tstamp.h> #include <linux/ptp_clock_kernel.h> +#include <linux/crash_dump.h> #include <linux/mlx5/driver.h> #include <linux/mlx5/qp.h> #include <linux/mlx5/cq.h> @@ -111,18 +112,13 @@ #define MLX5E_MAX_NUM_SQS (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC) #define MLX5E_TX_CQ_POLL_BUDGET 128 #define MLX5E_UPDATE_STATS_INTERVAL 200 /* msecs */ -#define MLX5E_SQ_BF_BUDGET 16 #define MLX5E_ICOSQ_MAX_WQEBBS \ (DIV_ROUND_UP(sizeof(struct mlx5e_umr_wqe), MLX5_SEND_WQE_BB)) #define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN) -#define MLX5E_XDP_IHS_DS_COUNT \ - DIV_ROUND_UP(MLX5E_XDP_MIN_INLINE - 2, MLX5_SEND_WQE_DS) #define MLX5E_XDP_TX_DS_COUNT \ ((sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) + 1 /* SG DS */) -#define MLX5E_XDP_TX_WQEBBS \ - DIV_ROUND_UP(MLX5E_XDP_TX_DS_COUNT, MLX5_SEND_WQEBB_NUM_DS) #define MLX5E_NUM_MAIN_GROUPS 9 @@ -158,6 +154,14 @@ static inline int mlx5_max_log_rq_size(int wq_type) } } +static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) +{ + return is_kdump_kernel() ? + MLX5E_MIN_NUM_CHANNELS : + min_t(int, mdev->priv.eq_table.num_comp_vectors, + MLX5E_MAX_NUM_CHANNELS); +} + struct mlx5e_tx_wqe { struct mlx5_wqe_ctrl_seg ctrl; struct mlx5_wqe_eth_seg eth; @@ -187,15 +191,15 @@ enum mlx5e_priv_flag { MLX5E_PFLAG_RX_CQE_COMPRESS = (1 << 1), }; -#define MLX5E_SET_PFLAG(priv, pflag, enable) \ +#define MLX5E_SET_PFLAG(params, pflag, enable) \ do { \ if (enable) \ - (priv)->params.pflags |= (pflag); \ + (params)->pflags |= (pflag); \ else \ - (priv)->params.pflags &= ~(pflag); \ + (params)->pflags &= ~(pflag); \ } while (0) -#define MLX5E_GET_PFLAG(priv, pflag) (!!((priv)->params.pflags & (pflag))) +#define MLX5E_GET_PFLAG(params, pflag) (!!((params)->pflags & (pflag))) #ifdef CONFIG_MLX5_CORE_EN_DCB #define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */ @@ -218,7 +222,6 @@ struct mlx5e_params { bool rx_cqe_compress_def; struct mlx5e_cq_moder rx_cq_moderation; struct mlx5e_cq_moder tx_cq_moderation; - u16 min_rx_wqes; bool lro_en; u32 lro_wqe_sz; u16 tx_max_inline; @@ -227,9 +230,11 @@ struct mlx5e_params { u8 toeplitz_hash_key[40]; u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE]; bool vlan_strip_disable; + bool scatter_fcs_en; bool rx_am_enabled; u32 lro_timeout; u32 pflags; + struct bpf_prog *xdp_prog; }; #ifdef CONFIG_MLX5_CORE_EN_DCB @@ -285,7 +290,6 @@ struct mlx5e_cq { struct napi_struct *napi; struct mlx5_core_cq mcq; struct mlx5e_channel *channel; - struct mlx5e_priv *priv; /* cqe decompression */ struct mlx5_cqe64 title; @@ -295,22 +299,163 @@ struct mlx5e_cq { u16 decmprs_wqe_counter; /* control */ + struct mlx5_core_dev *mdev; struct mlx5_frag_wq_ctrl wq_ctrl; } ____cacheline_aligned_in_smp; -struct mlx5e_rq; -typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq *rq, - struct mlx5_cqe64 *cqe); -typedef int (*mlx5e_fp_alloc_wqe)(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, - u16 ix); +struct mlx5e_tx_wqe_info { + struct sk_buff *skb; + u32 num_bytes; + u8 num_wqebbs; + u8 num_dma; +}; + +enum mlx5e_dma_map_type { + MLX5E_DMA_MAP_SINGLE, + MLX5E_DMA_MAP_PAGE +}; + +struct mlx5e_sq_dma { + dma_addr_t addr; + u32 size; + enum mlx5e_dma_map_type type; +}; + +enum { + MLX5E_SQ_STATE_ENABLED, +}; + +struct mlx5e_sq_wqe_info { + u8 opcode; + u8 num_wqebbs; +}; + +struct mlx5e_txqsq { + /* data path */ + + /* dirtied @completion */ + u16 cc; + u32 dma_fifo_cc; + + /* dirtied @xmit */ + u16 pc ____cacheline_aligned_in_smp; + u32 dma_fifo_pc; + struct mlx5e_sq_stats stats; + + struct mlx5e_cq cq; + + /* write@xmit, read@completion */ + struct { + struct mlx5e_sq_dma *dma_fifo; + struct mlx5e_tx_wqe_info *wqe_info; + } db; + + /* read only */ + struct mlx5_wq_cyc wq; + u32 dma_fifo_mask; + void __iomem *uar_map; + struct netdev_queue *txq; + u32 sqn; + u16 max_inline; + u8 min_inline_mode; + u16 edge; + struct device *pdev; + struct mlx5e_tstamp *tstamp; + __be32 mkey_be; + unsigned long state; + + /* control path */ + struct mlx5_wq_ctrl wq_ctrl; + struct mlx5e_channel *channel; + int txq_ix; + u32 rate_limit; +} ____cacheline_aligned_in_smp; + +struct mlx5e_xdpsq { + /* data path */ + + /* dirtied @rx completion */ + u16 cc; + u16 pc; + + struct mlx5e_cq cq; + + /* write@xmit, read@completion */ + struct { + struct mlx5e_dma_info *di; + bool doorbell; + } db; + + /* read only */ + struct mlx5_wq_cyc wq; + void __iomem *uar_map; + u32 sqn; + struct device *pdev; + __be32 mkey_be; + u8 min_inline_mode; + unsigned long state; + + /* control path */ + struct mlx5_wq_ctrl wq_ctrl; + struct mlx5e_channel *channel; +} ____cacheline_aligned_in_smp; + +struct mlx5e_icosq { + /* data path */ + + /* dirtied @completion */ + u16 cc; + + /* dirtied @xmit */ + u16 pc ____cacheline_aligned_in_smp; + u32 dma_fifo_pc; + u16 prev_cc; -typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq *rq, u16 ix); + struct mlx5e_cq cq; + + /* write@xmit, read@completion */ + struct { + struct mlx5e_sq_wqe_info *ico_wqe; + } db; + + /* read only */ + struct mlx5_wq_cyc wq; + void __iomem *uar_map; + u32 sqn; + u16 edge; + struct device *pdev; + __be32 mkey_be; + unsigned long state; + + /* control path */ + struct mlx5_wq_ctrl wq_ctrl; + struct mlx5e_channel *channel; +} ____cacheline_aligned_in_smp; + +static inline bool +mlx5e_wqc_has_room_for(struct mlx5_wq_cyc *wq, u16 cc, u16 pc, u16 n) +{ + return (((wq->sz_m1 & (cc - pc)) >= n) || (cc == pc)); +} struct mlx5e_dma_info { struct page *page; dma_addr_t addr; }; +struct mlx5e_umr_dma_info { + __be64 *mtt; + dma_addr_t mtt_addr; + struct mlx5e_dma_info dma_info[MLX5_MPWRQ_PAGES_PER_WQE]; + struct mlx5e_umr_wqe wqe; +}; + +struct mlx5e_mpw_info { + struct mlx5e_umr_dma_info umr; + u16 consumed_strides; + u16 skbs_frags[MLX5_MPWRQ_PAGES_PER_WQE]; +}; + struct mlx5e_rx_am_stats { int ppms; /* packets per msec */ int epms; /* events per msec */ @@ -347,6 +492,11 @@ struct mlx5e_page_cache { struct mlx5e_dma_info page_cache[MLX5E_CACHE_SIZE]; }; +struct mlx5e_rq; +typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq*, struct mlx5_cqe64*); +typedef int (*mlx5e_fp_alloc_wqe)(struct mlx5e_rq*, struct mlx5e_rx_wqe*, u16); +typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq*, u16); + struct mlx5e_rq { /* data path */ struct mlx5_wq_ll wq; @@ -381,7 +531,10 @@ struct mlx5e_rq { u16 rx_headroom; struct mlx5e_rx_am am; /* Adaptive Moderation */ + + /* XDP */ struct bpf_prog *xdp_prog; + struct mlx5e_xdpsq xdpsq; /* control */ struct mlx5_wq_ctrl wq_ctrl; @@ -390,118 +543,10 @@ struct mlx5e_rq { u32 mpwqe_num_strides; u32 rqn; struct mlx5e_channel *channel; - struct mlx5e_priv *priv; + struct mlx5_core_dev *mdev; struct mlx5_core_mkey umr_mkey; } ____cacheline_aligned_in_smp; -struct mlx5e_umr_dma_info { - __be64 *mtt; - dma_addr_t mtt_addr; - struct mlx5e_dma_info dma_info[MLX5_MPWRQ_PAGES_PER_WQE]; - struct mlx5e_umr_wqe wqe; -}; - -struct mlx5e_mpw_info { - struct mlx5e_umr_dma_info umr; - u16 consumed_strides; - u16 skbs_frags[MLX5_MPWRQ_PAGES_PER_WQE]; -}; - -struct mlx5e_tx_wqe_info { - u32 num_bytes; - u8 num_wqebbs; - u8 num_dma; -}; - -enum mlx5e_dma_map_type { - MLX5E_DMA_MAP_SINGLE, - MLX5E_DMA_MAP_PAGE -}; - -struct mlx5e_sq_dma { - dma_addr_t addr; - u32 size; - enum mlx5e_dma_map_type type; -}; - -enum { - MLX5E_SQ_STATE_ENABLED, - MLX5E_SQ_STATE_BF_ENABLE, -}; - -struct mlx5e_sq_wqe_info { - u8 opcode; - u8 num_wqebbs; -}; - -enum mlx5e_sq_type { - MLX5E_SQ_TXQ, - MLX5E_SQ_ICO, - MLX5E_SQ_XDP -}; - -struct mlx5e_sq { - /* data path */ - - /* dirtied @completion */ - u16 cc; - u32 dma_fifo_cc; - - /* dirtied @xmit */ - u16 pc ____cacheline_aligned_in_smp; - u32 dma_fifo_pc; - u16 bf_offset; - u16 prev_cc; - u8 bf_budget; - struct mlx5e_sq_stats stats; - - struct mlx5e_cq cq; - - /* pointers to per tx element info: write@xmit, read@completion */ - union { - struct { - struct sk_buff **skb; - struct mlx5e_sq_dma *dma_fifo; - struct mlx5e_tx_wqe_info *wqe_info; - } txq; - struct mlx5e_sq_wqe_info *ico_wqe; - struct { - struct mlx5e_sq_wqe_info *wqe_info; - struct mlx5e_dma_info *di; - bool doorbell; - } xdp; - } db; - - /* read only */ - struct mlx5_wq_cyc wq; - u32 dma_fifo_mask; - void __iomem *uar_map; - struct netdev_queue *txq; - u32 sqn; - u16 bf_buf_size; - u16 max_inline; - u8 min_inline_mode; - u16 edge; - struct device *pdev; - struct mlx5e_tstamp *tstamp; - __be32 mkey_be; - unsigned long state; - - /* control path */ - struct mlx5_wq_ctrl wq_ctrl; - struct mlx5_sq_bfreg bfreg; - struct mlx5e_channel *channel; - int tc; - u32 rate_limit; - u8 type; -} ____cacheline_aligned_in_smp; - -static inline bool mlx5e_sq_has_room_for(struct mlx5e_sq *sq, u16 n) -{ - return (((sq->wq.sz_m1 & (sq->cc - sq->pc)) >= n) || - (sq->cc == sq->pc)); -} - enum channel_flags { MLX5E_CHANNEL_NAPI_SCHED = 1, }; @@ -509,9 +554,8 @@ enum channel_flags { struct mlx5e_channel { /* data path */ struct mlx5e_rq rq; - struct mlx5e_sq xdp_sq; - struct mlx5e_sq sq[MLX5E_MAX_NUM_TC]; - struct mlx5e_sq icosq; /* internal control operations */ + struct mlx5e_txqsq sq[MLX5E_MAX_NUM_TC]; + struct mlx5e_icosq icosq; /* internal control operations */ bool xdp; struct napi_struct napi; struct device *pdev; @@ -522,10 +566,18 @@ struct mlx5e_channel { /* control */ struct mlx5e_priv *priv; + struct mlx5_core_dev *mdev; + struct mlx5e_tstamp *tstamp; int ix; int cpu; }; +struct mlx5e_channels { + struct mlx5e_channel **c; + unsigned int num; + struct mlx5e_params params; +}; + enum mlx5e_traffic_types { MLX5E_TT_IPV4_TCP, MLX5E_TT_IPV6_TCP, @@ -675,34 +727,17 @@ enum { MLX5E_NIC_PRIO }; -struct mlx5e_profile { - void (*init)(struct mlx5_core_dev *mdev, - struct net_device *netdev, - const struct mlx5e_profile *profile, void *ppriv); - void (*cleanup)(struct mlx5e_priv *priv); - int (*init_rx)(struct mlx5e_priv *priv); - void (*cleanup_rx)(struct mlx5e_priv *priv); - int (*init_tx)(struct mlx5e_priv *priv); - void (*cleanup_tx)(struct mlx5e_priv *priv); - void (*enable)(struct mlx5e_priv *priv); - void (*disable)(struct mlx5e_priv *priv); - void (*update_stats)(struct mlx5e_priv *priv); - int (*max_nch)(struct mlx5_core_dev *mdev); - int max_tc; -}; - struct mlx5e_priv { /* priv data path fields - start */ - struct mlx5e_sq **txq_to_sq_map; - int channeltc_to_txq_map[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC]; - struct bpf_prog *xdp_prog; + struct mlx5e_txqsq *txq2sq[MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC]; + int channel_tc2txq[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC]; /* priv data path fields - end */ unsigned long state; struct mutex state_lock; /* Protects Interface state */ struct mlx5e_rq drop_rq; - struct mlx5e_channel **channel; + struct mlx5e_channels channels; u32 tisn[MLX5E_MAX_NUM_TC]; struct mlx5e_rqt indir_rqt; struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS]; @@ -712,7 +747,6 @@ struct mlx5e_priv { struct mlx5e_flow_steering fs; struct mlx5e_vxlan_db vxlan; - struct mlx5e_params params; struct workqueue_struct *wq; struct work_struct update_carrier_work; struct work_struct set_rx_mode_work; @@ -732,9 +766,28 @@ struct mlx5e_priv { void *ppriv; }; +struct mlx5e_profile { + void (*init)(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, void *ppriv); + void (*cleanup)(struct mlx5e_priv *priv); + int (*init_rx)(struct mlx5e_priv *priv); + void (*cleanup_rx)(struct mlx5e_priv *priv); + int (*init_tx)(struct mlx5e_priv *priv); + void (*cleanup_tx)(struct mlx5e_priv *priv); + void (*enable)(struct mlx5e_priv *priv); + void (*disable)(struct mlx5e_priv *priv); + void (*update_stats)(struct mlx5e_priv *priv); + int (*max_nch)(struct mlx5_core_dev *mdev); + struct { + mlx5e_fp_handle_rx_cqe handle_rx_cqe; + mlx5e_fp_handle_rx_cqe handle_rx_cqe_mpwqe; + } rx_handlers; + int max_tc; +}; + void mlx5e_build_ptys2ethtool_map(void); -void mlx5e_send_nop(struct mlx5e_sq *sq, bool notify_hw); u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, void *accel_priv, select_queue_fallback_t fallback); netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev); @@ -744,7 +797,9 @@ void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event); int mlx5e_napi_poll(struct napi_struct *napi, int budget); bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); -void mlx5e_free_sq_descs(struct mlx5e_sq *sq); +bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq); +void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq); +void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq); void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, bool recycle); @@ -792,7 +847,7 @@ void mlx5e_pps_event_handler(struct mlx5e_priv *priv, struct ptp_clock_event *event); int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr); int mlx5e_hwstamp_get(struct net_device *dev, struct ifreq *ifr); -void mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool val); +int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool val); int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, u16 vid); @@ -801,14 +856,40 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv); void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv); -int mlx5e_modify_rqs_vsd(struct mlx5e_priv *priv, bool vsd); +struct mlx5e_redirect_rqt_param { + bool is_rss; + union { + u32 rqn; /* Direct RQN (Non-RSS) */ + struct { + u8 hfunc; + struct mlx5e_channels *channels; + } rss; /* RSS data */ + }; +}; -int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, int ix); -void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_priv *priv, void *tirc, - enum mlx5e_traffic_types tt); +int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, + struct mlx5e_redirect_rqt_param rrp); +void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_params *params, + enum mlx5e_traffic_types tt, + void *tirc); int mlx5e_open_locked(struct net_device *netdev); int mlx5e_close_locked(struct net_device *netdev); + +int mlx5e_open_channels(struct mlx5e_priv *priv, + struct mlx5e_channels *chs); +void mlx5e_close_channels(struct mlx5e_channels *chs); + +/* Function pointer to be used to modify WH settings while + * switching channels + */ +typedef int (*mlx5e_fp_hw_modify)(struct mlx5e_priv *priv); +void mlx5e_switch_priv_channels(struct mlx5e_priv *priv, + struct mlx5e_channels *new_chs, + mlx5e_fp_hw_modify hw_modify); +void mlx5e_activate_priv_channels(struct mlx5e_priv *priv); +void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv); + void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev, u32 *indirection_rqt, int len, int num_channels); @@ -816,30 +897,43 @@ int mlx5e_get_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode); -void mlx5e_set_rq_type_params(struct mlx5e_priv *priv, u8 rq_type); +void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, u8 rq_type); -static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq, - struct mlx5_wqe_ctrl_seg *ctrl, int bf_sz) +static inline +struct mlx5e_tx_wqe *mlx5e_post_nop(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc) { - u16 ofst = sq->bf_offset; + u16 pi = *pc & wq->sz_m1; + struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + + memset(cseg, 0, sizeof(*cseg)); + + cseg->opmod_idx_opcode = cpu_to_be32((*pc << 8) | MLX5_OPCODE_NOP); + cseg->qpn_ds = cpu_to_be32((sqn << 8) | 0x01); + (*pc)++; + + return wqe; +} + +static inline +void mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc, + void __iomem *uar_map, + struct mlx5_wqe_ctrl_seg *ctrl) +{ + ctrl->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; /* ensure wqe is visible to device before updating doorbell record */ dma_wmb(); - *sq->wq.db = cpu_to_be32(sq->pc); + *wq->db = cpu_to_be32(pc); /* ensure doorbell record is visible to device before ringing the * doorbell */ wmb(); - if (bf_sz) - __iowrite64_copy(sq->uar_map + ofst, ctrl, bf_sz); - else - mlx5_write64((__be32 *)ctrl, sq->uar_map + ofst, NULL); - /* flush the write-combining mapped buffer */ - wmb(); - sq->bf_offset ^= sq->bf_buf_size; + mlx5_write64((__be32 *)ctrl, uar_map, NULL); } static inline void mlx5e_cq_arm(struct mlx5e_cq *cq) @@ -895,44 +989,43 @@ void mlx5e_destroy_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir); int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev); void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev); -int mlx5e_refresh_tirs_self_loopback(struct mlx5_core_dev *mdev, - bool enable_uc_lb); - -struct mlx5_eswitch_rep; -int mlx5e_vport_rep_load(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep *rep); -void mlx5e_vport_rep_unload(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep *rep); -int mlx5e_nic_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep); -void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep *rep); -int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv); -void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv); -int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr); -void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); -void mlx5e_update_hw_rep_counters(struct mlx5e_priv *priv); +int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb); + +/* common netdev helpers */ +int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv); + +int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv); +void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv); int mlx5e_create_direct_rqts(struct mlx5e_priv *priv); -void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt); +void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv); int mlx5e_create_direct_tirs(struct mlx5e_priv *priv); void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv); +void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt); + +int mlx5e_create_ttc_table(struct mlx5e_priv *priv, u32 underlay_qpn); +void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv); + +int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc, + u32 underlay_qpn, u32 *tisn); +void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn); + int mlx5e_create_tises(struct mlx5e_priv *priv); void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv); int mlx5e_close(struct net_device *netdev); int mlx5e_open(struct net_device *netdev); void mlx5e_update_stats_work(struct work_struct *work); -struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev, - const struct mlx5e_profile *profile, - void *ppriv); -void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv); -int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev); -void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev); u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout); -int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev, - void *sp); -bool mlx5e_has_offload_stats(const struct net_device *dev, int attr_id); +/* mlx5e generic netdev management API */ +struct net_device* +mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile, + void *ppriv); +int mlx5e_attach_netdev(struct mlx5e_priv *priv); +void mlx5e_detach_netdev(struct mlx5e_priv *priv); +void mlx5e_destroy_netdev(struct mlx5e_priv *priv); +void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + u16 max_channels); -bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv); -bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv); #endif /* __MLX5_EN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index 68419a01db36..c8a005326e30 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -174,13 +174,9 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv, enum arfs_type type) { struct arfs_table *arfs_t = &priv->fs.arfs.arfs_tables[type]; - struct mlx5_flow_act flow_act = { - .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG, - .encap_id = 0, - }; - struct mlx5_flow_destination dest; struct mlx5e_tir *tir = priv->indir_tir; + struct mlx5_flow_destination dest; + MLX5_DECLARE_FLOW_ACT(flow_act); struct mlx5_flow_spec *spec; int err = 0; @@ -325,10 +321,16 @@ static int arfs_create_table(struct mlx5e_priv *priv, { struct mlx5e_arfs_tables *arfs = &priv->fs.arfs; struct mlx5e_flow_table *ft = &arfs->arfs_tables[type].ft; + struct mlx5_flow_table_attr ft_attr = {}; int err; - ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO, - MLX5E_ARFS_TABLE_SIZE, MLX5E_ARFS_FT_LEVEL, 0); + ft->num_groups = 0; + + ft_attr.max_fte = MLX5E_ARFS_TABLE_SIZE; + ft_attr.level = MLX5E_ARFS_FT_LEVEL; + ft_attr.prio = MLX5E_NIC_PRIO; + + ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); ft->t = NULL; @@ -469,15 +471,11 @@ static struct arfs_table *arfs_get_table(struct mlx5e_arfs_tables *arfs, static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv, struct arfs_rule *arfs_rule) { - struct mlx5_flow_act flow_act = { - .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG, - .encap_id = 0, - }; struct mlx5e_arfs_tables *arfs = &priv->fs.arfs; struct arfs_tuple *tuple = &arfs_rule->tuple; struct mlx5_flow_handle *rule = NULL; struct mlx5_flow_destination dest; + MLX5_DECLARE_FLOW_ACT(flow_act); struct arfs_table *arfs_table; struct mlx5_flow_spec *spec; struct mlx5_flow_table *ft; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c index 37e66eef6fb5..e706a87fc8b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c @@ -90,6 +90,7 @@ int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr) { struct mlx5e_priv *priv = netdev_priv(dev); struct hwtstamp_config config; + int err; if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz)) return -EOPNOTSUPP; @@ -111,7 +112,7 @@ int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr) switch (config.rx_filter) { case HWTSTAMP_FILTER_NONE: /* Reset CQE compression to Admin default */ - mlx5e_modify_rx_cqe_compression_locked(priv, priv->params.rx_cqe_compress_def); + mlx5e_modify_rx_cqe_compression_locked(priv, priv->channels.params.rx_cqe_compress_def); break; case HWTSTAMP_FILTER_ALL: case HWTSTAMP_FILTER_SOME: @@ -129,7 +130,12 @@ int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr) case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: /* Disable CQE compression */ netdev_warn(dev, "Disabling cqe compression"); - mlx5e_modify_rx_cqe_compression_locked(priv, false); + err = mlx5e_modify_rx_cqe_compression_locked(priv, false); + if (err) { + netdev_err(dev, "Failed disabling cqe compression err=%d\n", err); + mutex_unlock(&priv->state_lock); + return err; + } config.rx_filter = HWTSTAMP_FILTER_ALL; break; default: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index bd898d8deda0..f1f17f7a3cd0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -107,10 +107,18 @@ int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev) goto err_dealloc_transport_domain; } + err = mlx5_alloc_bfreg(mdev, &res->bfreg, false, false); + if (err) { + mlx5_core_err(mdev, "alloc bfreg failed, %d\n", err); + goto err_destroy_mkey; + } + INIT_LIST_HEAD(&mdev->mlx5e_res.td.tirs_list); return 0; +err_destroy_mkey: + mlx5_core_destroy_mkey(mdev, &res->mkey); err_dealloc_transport_domain: mlx5_core_dealloc_transport_domain(mdev, res->td.tdn); err_dealloc_pd: @@ -122,23 +130,26 @@ void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev) { struct mlx5e_resources *res = &mdev->mlx5e_res; + mlx5_free_bfreg(mdev, &res->bfreg); mlx5_core_destroy_mkey(mdev, &res->mkey); mlx5_core_dealloc_transport_domain(mdev, res->td.tdn); mlx5_core_dealloc_pd(mdev, res->pdn); } -int mlx5e_refresh_tirs_self_loopback(struct mlx5_core_dev *mdev, - bool enable_uc_lb) +int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb) { + struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_tir *tir; - void *in; + int err = -ENOMEM; + u32 tirn = 0; int inlen; - int err = 0; + void *in; + inlen = MLX5_ST_SZ_BYTES(modify_tir_in); in = mlx5_vzalloc(inlen); if (!in) - return -ENOMEM; + goto out; if (enable_uc_lb) MLX5_SET(modify_tir_in, in, ctx.self_lb_block, @@ -147,13 +158,16 @@ int mlx5e_refresh_tirs_self_loopback(struct mlx5_core_dev *mdev, MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1); list_for_each_entry(tir, &mdev->mlx5e_res.td.tirs_list, list) { - err = mlx5_core_modify_tir(mdev, tir->tirn, in, inlen); + tirn = tir->tirn; + err = mlx5_core_modify_tir(mdev, tirn, in, inlen); if (err) goto out; } out: kvfree(in); + if (err) + netdev_err(priv->netdev, "refresh tir(0x%x) failed, %d\n", tirn, err); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index a004a5a1a4c2..ce7b09d72ff6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -42,8 +42,9 @@ static void mlx5e_get_drvinfo(struct net_device *dev, strlcpy(drvinfo->version, DRIVER_VERSION " (" DRIVER_RELDATE ")", sizeof(drvinfo->version)); snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), - "%d.%d.%d", - fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev)); + "%d.%d.%04d (%.16s)", + fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev), + mdev->board_id); strlcpy(drvinfo->bus_info, pci_name(mdev->pdev), sizeof(drvinfo->bus_info)); } @@ -152,12 +153,9 @@ static bool mlx5e_query_global_pause_combined(struct mlx5e_priv *priv) } #define MLX5E_NUM_Q_CNTRS(priv) (NUM_Q_COUNTERS * (!!priv->q_counter)) -#define MLX5E_NUM_RQ_STATS(priv) \ - (NUM_RQ_STATS * priv->params.num_channels * \ - test_bit(MLX5E_STATE_OPENED, &priv->state)) +#define MLX5E_NUM_RQ_STATS(priv) (NUM_RQ_STATS * (priv)->channels.num) #define MLX5E_NUM_SQ_STATS(priv) \ - (NUM_SQ_STATS * priv->params.num_channels * priv->params.num_tc * \ - test_bit(MLX5E_STATE_OPENED, &priv->state)) + (NUM_SQ_STATS * (priv)->channels.num * (priv)->channels.params.num_tc) #define MLX5E_NUM_PFC_COUNTERS(priv) \ ((mlx5e_query_global_pause_combined(priv) + hweight8(mlx5e_query_pfc_combined(priv))) * \ NUM_PPORT_PER_PRIO_PFC_COUNTERS) @@ -262,17 +260,17 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data) return; /* per channel counters */ - for (i = 0; i < priv->params.num_channels; i++) + for (i = 0; i < priv->channels.num; i++) for (j = 0; j < NUM_RQ_STATS; j++) sprintf(data + (idx++) * ETH_GSTRING_LEN, rq_stats_desc[j].format, i); - for (tc = 0; tc < priv->params.num_tc; tc++) - for (i = 0; i < priv->params.num_channels; i++) + for (tc = 0; tc < priv->channels.params.num_tc; tc++) + for (i = 0; i < priv->channels.num; i++) for (j = 0; j < NUM_SQ_STATS; j++) sprintf(data + (idx++) * ETH_GSTRING_LEN, sq_stats_desc[j].format, - priv->channeltc_to_txq_map[i][tc]); + priv->channel_tc2txq[i][tc]); } static void mlx5e_get_strings(struct net_device *dev, @@ -303,6 +301,7 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, u64 *data) { struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_channels *channels; struct mlx5_priv *mlx5_priv; int i, j, tc, prio, idx = 0; unsigned long pfc_combined; @@ -313,6 +312,7 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev, mutex_lock(&priv->state_lock); if (test_bit(MLX5E_STATE_OPENED, &priv->state)) mlx5e_update_stats(priv); + channels = &priv->channels; mutex_unlock(&priv->state_lock); for (i = 0; i < NUM_SW_COUNTERS; i++) @@ -382,16 +382,16 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev, return; /* per channel counters */ - for (i = 0; i < priv->params.num_channels; i++) + for (i = 0; i < channels->num; i++) for (j = 0; j < NUM_RQ_STATS; j++) data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->channel[i]->rq.stats, + MLX5E_READ_CTR64_CPU(&channels->c[i]->rq.stats, rq_stats_desc, j); - for (tc = 0; tc < priv->params.num_tc; tc++) - for (i = 0; i < priv->params.num_channels; i++) + for (tc = 0; tc < priv->channels.params.num_tc; tc++) + for (i = 0; i < channels->num; i++) for (j = 0; j < NUM_SQ_STATS; j++) - data[idx++] = MLX5E_READ_CTR64_CPU(&priv->channel[i]->sq[tc].stats, + data[idx++] = MLX5E_READ_CTR64_CPU(&channels->c[i]->sq[tc].stats, sq_stats_desc, j); } @@ -406,8 +406,8 @@ static u32 mlx5e_rx_wqes_to_packets(struct mlx5e_priv *priv, int rq_wq_type, if (rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) return num_wqe; - stride_size = 1 << priv->params.mpwqe_log_stride_sz; - num_strides = 1 << priv->params.mpwqe_log_num_strides; + stride_size = 1 << priv->channels.params.mpwqe_log_stride_sz; + num_strides = 1 << priv->channels.params.mpwqe_log_num_strides; wqe_size = stride_size * num_strides; packets_per_wqe = wqe_size / @@ -427,8 +427,8 @@ static u32 mlx5e_packets_to_rx_wqes(struct mlx5e_priv *priv, int rq_wq_type, if (rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) return num_packets; - stride_size = 1 << priv->params.mpwqe_log_stride_sz; - num_strides = 1 << priv->params.mpwqe_log_num_strides; + stride_size = 1 << priv->channels.params.mpwqe_log_stride_sz; + num_strides = 1 << priv->channels.params.mpwqe_log_num_strides; wqe_size = stride_size * num_strides; num_packets = (1 << order_base_2(num_packets)); @@ -443,26 +443,25 @@ static void mlx5e_get_ringparam(struct net_device *dev, struct ethtool_ringparam *param) { struct mlx5e_priv *priv = netdev_priv(dev); - int rq_wq_type = priv->params.rq_wq_type; + int rq_wq_type = priv->channels.params.rq_wq_type; param->rx_max_pending = mlx5e_rx_wqes_to_packets(priv, rq_wq_type, 1 << mlx5_max_log_rq_size(rq_wq_type)); param->tx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE; param->rx_pending = mlx5e_rx_wqes_to_packets(priv, rq_wq_type, - 1 << priv->params.log_rq_size); - param->tx_pending = 1 << priv->params.log_sq_size; + 1 << priv->channels.params.log_rq_size); + param->tx_pending = 1 << priv->channels.params.log_sq_size; } static int mlx5e_set_ringparam(struct net_device *dev, struct ethtool_ringparam *param) { struct mlx5e_priv *priv = netdev_priv(dev); - bool was_opened; - int rq_wq_type = priv->params.rq_wq_type; + int rq_wq_type = priv->channels.params.rq_wq_type; + struct mlx5e_channels new_channels = {}; u32 rx_pending_wqes; u32 min_rq_size; u32 max_rq_size; - u16 min_rx_wqes; u8 log_rq_size; u8 log_sq_size; u32 num_mtts; @@ -500,7 +499,7 @@ static int mlx5e_set_ringparam(struct net_device *dev, } num_mtts = MLX5E_REQUIRED_MTTS(rx_pending_wqes); - if (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ && + if (priv->channels.params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ && !MLX5E_VALID_NUM_MTTS(num_mtts)) { netdev_info(dev, "%s: rx_pending (%d) request can't be satisfied, try to reduce.\n", __func__, param->rx_pending); @@ -522,26 +521,29 @@ static int mlx5e_set_ringparam(struct net_device *dev, log_rq_size = order_base_2(rx_pending_wqes); log_sq_size = order_base_2(param->tx_pending); - min_rx_wqes = mlx5_min_rx_wqes(rq_wq_type, rx_pending_wqes); - if (log_rq_size == priv->params.log_rq_size && - log_sq_size == priv->params.log_sq_size && - min_rx_wqes == priv->params.min_rx_wqes) + if (log_rq_size == priv->channels.params.log_rq_size && + log_sq_size == priv->channels.params.log_sq_size) return 0; mutex_lock(&priv->state_lock); - was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); - if (was_opened) - mlx5e_close_locked(dev); + new_channels.params = priv->channels.params; + new_channels.params.log_rq_size = log_rq_size; + new_channels.params.log_sq_size = log_sq_size; - priv->params.log_rq_size = log_rq_size; - priv->params.log_sq_size = log_sq_size; - priv->params.min_rx_wqes = min_rx_wqes; + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + priv->channels.params = new_channels.params; + goto unlock; + } + + err = mlx5e_open_channels(priv, &new_channels); + if (err) + goto unlock; - if (was_opened) - err = mlx5e_open_locked(dev); + mlx5e_switch_priv_channels(priv, &new_channels, NULL); +unlock: mutex_unlock(&priv->state_lock); return err; @@ -553,7 +555,7 @@ static void mlx5e_get_channels(struct net_device *dev, struct mlx5e_priv *priv = netdev_priv(dev); ch->max_combined = priv->profile->max_nch(priv->mdev); - ch->combined_count = priv->params.num_channels; + ch->combined_count = priv->channels.params.num_channels; } static int mlx5e_set_channels(struct net_device *dev, @@ -561,8 +563,8 @@ static int mlx5e_set_channels(struct net_device *dev, { struct mlx5e_priv *priv = netdev_priv(dev); unsigned int count = ch->combined_count; + struct mlx5e_channels new_channels = {}; bool arfs_enabled; - bool was_opened; int err = 0; if (!count) { @@ -571,27 +573,32 @@ static int mlx5e_set_channels(struct net_device *dev, return -EINVAL; } - if (priv->params.num_channels == count) + if (priv->channels.params.num_channels == count) return 0; mutex_lock(&priv->state_lock); - was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); - if (was_opened) - mlx5e_close_locked(dev); + new_channels.params = priv->channels.params; + new_channels.params.num_channels = count; + mlx5e_build_default_indir_rqt(priv->mdev, new_channels.params.indirection_rqt, + MLX5E_INDIR_RQT_SIZE, count); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + priv->channels.params = new_channels.params; + goto out; + } + + /* Create fresh channels with new parameters */ + err = mlx5e_open_channels(priv, &new_channels); + if (err) + goto out; arfs_enabled = dev->features & NETIF_F_NTUPLE; if (arfs_enabled) mlx5e_arfs_disable(priv); - priv->params.num_channels = count; - mlx5e_build_default_indir_rqt(priv->mdev, priv->params.indirection_rqt, - MLX5E_INDIR_RQT_SIZE, count); - - if (was_opened) - err = mlx5e_open_locked(dev); - if (err) - goto out; + /* Switch to new channels, set new parameters and close old ones */ + mlx5e_switch_priv_channels(priv, &new_channels, NULL); if (arfs_enabled) { err = mlx5e_arfs_enable(priv); @@ -614,49 +621,24 @@ static int mlx5e_get_coalesce(struct net_device *netdev, if (!MLX5_CAP_GEN(priv->mdev, cq_moderation)) return -EOPNOTSUPP; - coal->rx_coalesce_usecs = priv->params.rx_cq_moderation.usec; - coal->rx_max_coalesced_frames = priv->params.rx_cq_moderation.pkts; - coal->tx_coalesce_usecs = priv->params.tx_cq_moderation.usec; - coal->tx_max_coalesced_frames = priv->params.tx_cq_moderation.pkts; - coal->use_adaptive_rx_coalesce = priv->params.rx_am_enabled; + coal->rx_coalesce_usecs = priv->channels.params.rx_cq_moderation.usec; + coal->rx_max_coalesced_frames = priv->channels.params.rx_cq_moderation.pkts; + coal->tx_coalesce_usecs = priv->channels.params.tx_cq_moderation.usec; + coal->tx_max_coalesced_frames = priv->channels.params.tx_cq_moderation.pkts; + coal->use_adaptive_rx_coalesce = priv->channels.params.rx_am_enabled; return 0; } -static int mlx5e_set_coalesce(struct net_device *netdev, - struct ethtool_coalesce *coal) +static void +mlx5e_set_priv_channels_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal) { - struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5e_channel *c; - bool restart = - !!coal->use_adaptive_rx_coalesce != priv->params.rx_am_enabled; - bool was_opened; - int err = 0; int tc; int i; - if (!MLX5_CAP_GEN(mdev, cq_moderation)) - return -EOPNOTSUPP; - - mutex_lock(&priv->state_lock); - - was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); - if (was_opened && restart) { - mlx5e_close_locked(netdev); - priv->params.rx_am_enabled = !!coal->use_adaptive_rx_coalesce; - } - - priv->params.tx_cq_moderation.usec = coal->tx_coalesce_usecs; - priv->params.tx_cq_moderation.pkts = coal->tx_max_coalesced_frames; - priv->params.rx_cq_moderation.usec = coal->rx_coalesce_usecs; - priv->params.rx_cq_moderation.pkts = coal->rx_max_coalesced_frames; - - if (!was_opened || restart) - goto out; - - for (i = 0; i < priv->params.num_channels; ++i) { - c = priv->channel[i]; + for (i = 0; i < priv->channels.num; ++i) { + struct mlx5e_channel *c = priv->channels.c[i]; for (tc = 0; tc < c->num_tc; tc++) { mlx5_core_modify_cq_moderation(mdev, @@ -669,11 +651,50 @@ static int mlx5e_set_coalesce(struct net_device *netdev, coal->rx_coalesce_usecs, coal->rx_max_coalesced_frames); } +} -out: - if (was_opened && restart) - err = mlx5e_open_locked(netdev); +static int mlx5e_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_channels new_channels = {}; + int err = 0; + bool reset; + + if (!MLX5_CAP_GEN(mdev, cq_moderation)) + return -EOPNOTSUPP; + + mutex_lock(&priv->state_lock); + new_channels.params = priv->channels.params; + + new_channels.params.tx_cq_moderation.usec = coal->tx_coalesce_usecs; + new_channels.params.tx_cq_moderation.pkts = coal->tx_max_coalesced_frames; + new_channels.params.rx_cq_moderation.usec = coal->rx_coalesce_usecs; + new_channels.params.rx_cq_moderation.pkts = coal->rx_max_coalesced_frames; + new_channels.params.rx_am_enabled = !!coal->use_adaptive_rx_coalesce; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + priv->channels.params = new_channels.params; + goto out; + } + /* we are opened */ + + reset = !!coal->use_adaptive_rx_coalesce != priv->channels.params.rx_am_enabled; + if (!reset) { + mlx5e_set_priv_channels_coalesce(priv, coal); + priv->channels.params = new_channels.params; + goto out; + } + + /* open fresh channels with new coal parameters */ + err = mlx5e_open_channels(priv, &new_channels); + if (err) + goto out; + + mlx5e_switch_priv_channels(priv, &new_channels, NULL); +out: mutex_unlock(&priv->state_lock); return err; } @@ -968,7 +989,7 @@ static u32 mlx5e_get_rxfh_key_size(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); - return sizeof(priv->params.toeplitz_hash_key); + return sizeof(priv->channels.params.toeplitz_hash_key); } static u32 mlx5e_get_rxfh_indir_size(struct net_device *netdev) @@ -982,15 +1003,15 @@ static int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, struct mlx5e_priv *priv = netdev_priv(netdev); if (indir) - memcpy(indir, priv->params.indirection_rqt, - sizeof(priv->params.indirection_rqt)); + memcpy(indir, priv->channels.params.indirection_rqt, + sizeof(priv->channels.params.indirection_rqt)); if (key) - memcpy(key, priv->params.toeplitz_hash_key, - sizeof(priv->params.toeplitz_hash_key)); + memcpy(key, priv->channels.params.toeplitz_hash_key, + sizeof(priv->channels.params.toeplitz_hash_key)); if (hfunc) - *hfunc = priv->params.rss_hfunc; + *hfunc = priv->channels.params.rss_hfunc; return 0; } @@ -1006,7 +1027,7 @@ static void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen) for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { memset(tirc, 0, ctxlen); - mlx5e_build_indir_tir_ctx_hash(priv, tirc, tt); + mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc); mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in, inlen); } } @@ -1030,25 +1051,37 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, mutex_lock(&priv->state_lock); - if (indir) { - u32 rqtn = priv->indir_rqt.rqtn; - - memcpy(priv->params.indirection_rqt, indir, - sizeof(priv->params.indirection_rqt)); - mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, 0); - } - if (hfunc != ETH_RSS_HASH_NO_CHANGE && - hfunc != priv->params.rss_hfunc) { - priv->params.rss_hfunc = hfunc; + hfunc != priv->channels.params.rss_hfunc) { + priv->channels.params.rss_hfunc = hfunc; hash_changed = true; } + if (indir) { + memcpy(priv->channels.params.indirection_rqt, indir, + sizeof(priv->channels.params.indirection_rqt)); + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + u32 rqtn = priv->indir_rqt.rqtn; + struct mlx5e_redirect_rqt_param rrp = { + .is_rss = true, + { + .rss = { + .hfunc = priv->channels.params.rss_hfunc, + .channels = &priv->channels, + }, + }, + }; + + mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, rrp); + } + } + if (key) { - memcpy(priv->params.toeplitz_hash_key, key, - sizeof(priv->params.toeplitz_hash_key)); + memcpy(priv->channels.params.toeplitz_hash_key, key, + sizeof(priv->channels.params.toeplitz_hash_key)); hash_changed = hash_changed || - priv->params.rss_hfunc == ETH_RSS_HASH_TOP; + priv->channels.params.rss_hfunc == ETH_RSS_HASH_TOP; } if (hash_changed) @@ -1069,7 +1102,7 @@ static int mlx5e_get_rxnfc(struct net_device *netdev, switch (info->cmd) { case ETHTOOL_GRXRINGS: - info->data = priv->params.num_channels; + info->data = priv->channels.params.num_channels; break; case ETHTOOL_GRXCLSRLCNT: info->rule_cnt = priv->fs.ethtool.tot_num_rules; @@ -1097,7 +1130,7 @@ static int mlx5e_get_tunable(struct net_device *dev, switch (tuna->id) { case ETHTOOL_TX_COPYBREAK: - *(u32 *)data = priv->params.tx_max_inline; + *(u32 *)data = priv->channels.params.tx_max_inline; break; default: err = -EINVAL; @@ -1113,9 +1146,11 @@ static int mlx5e_set_tunable(struct net_device *dev, { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; - bool was_opened; - u32 val; + struct mlx5e_channels new_channels = {}; int err = 0; + u32 val; + + mutex_lock(&priv->state_lock); switch (tuna->id) { case ETHTOOL_TX_COPYBREAK: @@ -1125,24 +1160,26 @@ static int mlx5e_set_tunable(struct net_device *dev, break; } - mutex_lock(&priv->state_lock); + new_channels.params = priv->channels.params; + new_channels.params.tx_max_inline = val; - was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); - if (was_opened) - mlx5e_close_locked(dev); - - priv->params.tx_max_inline = val; + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + priv->channels.params = new_channels.params; + break; + } - if (was_opened) - err = mlx5e_open_locked(dev); + err = mlx5e_open_channels(priv, &new_channels); + if (err) + break; + mlx5e_switch_priv_channels(priv, &new_channels, NULL); - mutex_unlock(&priv->state_lock); break; default: err = -EINVAL; break; } + mutex_unlock(&priv->state_lock); return err; } @@ -1442,15 +1479,15 @@ static int set_pflag_rx_cqe_based_moder(struct net_device *netdev, bool enable) { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_channels new_channels = {}; bool rx_mode_changed; u8 rx_cq_period_mode; int err = 0; - bool reset; rx_cq_period_mode = enable ? MLX5_CQ_PERIOD_MODE_START_FROM_CQE : MLX5_CQ_PERIOD_MODE_START_FROM_EQE; - rx_mode_changed = rx_cq_period_mode != priv->params.rx_cq_period_mode; + rx_mode_changed = rx_cq_period_mode != priv->channels.params.rx_cq_period_mode; if (rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE && !MLX5_CAP_GEN(mdev, cq_period_start_from_cqe)) @@ -1459,16 +1496,51 @@ static int set_pflag_rx_cqe_based_moder(struct net_device *netdev, bool enable) if (!rx_mode_changed) return 0; - reset = test_bit(MLX5E_STATE_OPENED, &priv->state); - if (reset) - mlx5e_close_locked(netdev); + new_channels.params = priv->channels.params; + mlx5e_set_rx_cq_mode_params(&new_channels.params, rx_cq_period_mode); - mlx5e_set_rx_cq_mode_params(&priv->params, rx_cq_period_mode); + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + priv->channels.params = new_channels.params; + return 0; + } - if (reset) - err = mlx5e_open_locked(netdev); + err = mlx5e_open_channels(priv, &new_channels); + if (err) + return err; - return err; + mlx5e_switch_priv_channels(priv, &new_channels, NULL); + return 0; +} + +int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val) +{ + bool curr_val = MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS); + struct mlx5e_channels new_channels = {}; + int err = 0; + + if (!MLX5_CAP_GEN(priv->mdev, cqe_compression)) + return new_val ? -EOPNOTSUPP : 0; + + if (curr_val == new_val) + return 0; + + new_channels.params = priv->channels.params; + MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val); + + mlx5e_set_rq_type_params(priv->mdev, &new_channels.params, + new_channels.params.rq_wq_type); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + priv->channels.params = new_channels.params; + return 0; + } + + err = mlx5e_open_channels(priv, &new_channels); + if (err) + return err; + + mlx5e_switch_priv_channels(priv, &new_channels, NULL); + return 0; } static int set_pflag_rx_cqe_compress(struct net_device *netdev, @@ -1486,8 +1558,7 @@ static int set_pflag_rx_cqe_compress(struct net_device *netdev, } mlx5e_modify_rx_cqe_compression_locked(priv, enable); - priv->params.rx_cqe_compress_def = enable; - mlx5e_set_rq_type_params(priv, priv->params.rq_wq_type); + priv->channels.params.rx_cqe_compress_def = enable; return 0; } @@ -1499,7 +1570,7 @@ static int mlx5e_handle_pflag(struct net_device *netdev, { struct mlx5e_priv *priv = netdev_priv(netdev); bool enable = !!(wanted_flags & flag); - u32 changes = wanted_flags ^ priv->params.pflags; + u32 changes = wanted_flags ^ priv->channels.params.pflags; int err; if (!(changes & flag)) @@ -1512,7 +1583,7 @@ static int mlx5e_handle_pflag(struct net_device *netdev, return err; } - MLX5E_SET_PFLAG(priv, flag, enable); + MLX5E_SET_PFLAG(&priv->channels.params, flag, enable); return 0; } @@ -1541,7 +1612,7 @@ static u32 mlx5e_get_priv_flags(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); - return priv->params.pflags; + return priv->channels.params.pflags; } static int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index f2762e45c8ae..576d6787b484 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -159,14 +159,10 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, enum mlx5e_vlan_rule_type rule_type, u16 vid, struct mlx5_flow_spec *spec) { - struct mlx5_flow_act flow_act = { - .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG, - .encap_id = 0, - }; struct mlx5_flow_table *ft = priv->fs.vlan.ft.t; struct mlx5_flow_destination dest; struct mlx5_flow_handle **rule_p; + MLX5_DECLARE_FLOW_ACT(flow_act); int err = 0; dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; @@ -659,11 +655,7 @@ mlx5e_generate_ttc_rule(struct mlx5e_priv *priv, u16 etype, u8 proto) { - struct mlx5_flow_act flow_act = { - .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG, - .encap_id = 0, - }; + MLX5_DECLARE_FLOW_ACT(flow_act); struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; int err = 0; @@ -800,7 +792,7 @@ err: return err; } -static void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv) +void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv) { struct mlx5e_ttc_table *ttc = &priv->fs.ttc; @@ -808,14 +800,19 @@ static void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv) mlx5e_destroy_flow_table(&ttc->ft); } -static int mlx5e_create_ttc_table(struct mlx5e_priv *priv) +int mlx5e_create_ttc_table(struct mlx5e_priv *priv, u32 underlay_qpn) { struct mlx5e_ttc_table *ttc = &priv->fs.ttc; + struct mlx5_flow_table_attr ft_attr = {}; struct mlx5e_flow_table *ft = &ttc->ft; int err; - ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO, - MLX5E_TTC_TABLE_SIZE, MLX5E_TTC_FT_LEVEL, 0); + ft_attr.max_fte = MLX5E_TTC_TABLE_SIZE; + ft_attr.level = MLX5E_TTC_FT_LEVEL; + ft_attr.prio = MLX5E_NIC_PRIO; + ft_attr.underlay_qpn = underlay_qpn; + + ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); ft->t = NULL; @@ -848,13 +845,9 @@ static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv, static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv, struct mlx5e_l2_rule *ai, int type) { - struct mlx5_flow_act flow_act = { - .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG, - .encap_id = 0, - }; struct mlx5_flow_table *ft = priv->fs.l2.ft.t; struct mlx5_flow_destination dest; + MLX5_DECLARE_FLOW_ACT(flow_act); struct mlx5_flow_spec *spec; int err = 0; u8 *mc_dmac; @@ -985,12 +978,16 @@ static int mlx5e_create_l2_table(struct mlx5e_priv *priv) { struct mlx5e_l2_table *l2_table = &priv->fs.l2; struct mlx5e_flow_table *ft = &l2_table->ft; + struct mlx5_flow_table_attr ft_attr = {}; int err; ft->num_groups = 0; - ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO, - MLX5E_L2_TABLE_SIZE, MLX5E_L2_FT_LEVEL, 0); + ft_attr.max_fte = MLX5E_L2_TABLE_SIZE; + ft_attr.level = MLX5E_L2_FT_LEVEL; + ft_attr.prio = MLX5E_NIC_PRIO; + + ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); ft->t = NULL; @@ -1088,11 +1085,16 @@ static int mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft) static int mlx5e_create_vlan_table(struct mlx5e_priv *priv) { struct mlx5e_flow_table *ft = &priv->fs.vlan.ft; + struct mlx5_flow_table_attr ft_attr = {}; int err; ft->num_groups = 0; - ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO, - MLX5E_VLAN_TABLE_SIZE, MLX5E_VLAN_FT_LEVEL, 0); + + ft_attr.max_fte = MLX5E_VLAN_TABLE_SIZE; + ft_attr.level = MLX5E_VLAN_FT_LEVEL; + ft_attr.prio = MLX5E_NIC_PRIO; + + ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); @@ -1145,7 +1147,7 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv) priv->netdev->hw_features &= ~NETIF_F_NTUPLE; } - err = mlx5e_create_ttc_table(priv); + err = mlx5e_create_ttc_table(priv, 0); if (err) { netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n", err); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index 26fc77e80f7b..85bf4a389295 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -390,7 +390,7 @@ static int validate_flow(struct mlx5e_priv *priv, if (fs->location >= MAX_NUM_OF_ETHTOOL_RULES) return -EINVAL; - if (fs->ring_cookie >= priv->params.num_channels && + if (fs->ring_cookie >= priv->channels.params.num_channels && fs->ring_cookie != RX_CLS_FLOW_DISC) return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 15cc7b469d2e..a61b71b6fff3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -31,28 +31,24 @@ */ #include <net/tc_act/tc_gact.h> -#include <linux/crash_dump.h> #include <net/pkt_cls.h> #include <linux/mlx5/fs.h> #include <net/vxlan.h> #include <linux/bpf.h> +#include "eswitch.h" #include "en.h" #include "en_tc.h" -#include "eswitch.h" +#include "en_rep.h" #include "vxlan.h" struct mlx5e_rq_param { u32 rqc[MLX5_ST_SZ_DW(rqc)]; struct mlx5_wq_param wq; - bool am_enabled; }; struct mlx5e_sq_param { u32 sqc[MLX5_ST_SZ_DW(sqc)]; struct mlx5_wq_param wq; - u16 max_inline; - u8 min_inline_mode; - enum mlx5e_sq_type type; }; struct mlx5e_cq_param { @@ -79,49 +75,47 @@ static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) MLX5_CAP_ETH(mdev, reg_umr_sq); } -void mlx5e_set_rq_type_params(struct mlx5e_priv *priv, u8 rq_type) +void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, u8 rq_type) { - priv->params.rq_wq_type = rq_type; - priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; - switch (priv->params.rq_wq_type) { + params->rq_wq_type = rq_type; + params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; + switch (params->rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - priv->params.log_rq_size = is_kdump_kernel() ? + params->log_rq_size = is_kdump_kernel() ? MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW : MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW; - priv->params.mpwqe_log_stride_sz = - MLX5E_GET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS) ? - MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(priv->mdev) : - MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(priv->mdev); - priv->params.mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - - priv->params.mpwqe_log_stride_sz; + params->mpwqe_log_stride_sz = + MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS) ? + MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) : + MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev); + params->mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - + params->mpwqe_log_stride_sz; break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ - priv->params.log_rq_size = is_kdump_kernel() ? + params->log_rq_size = is_kdump_kernel() ? MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE : MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; /* Extra room needed for build_skb */ - priv->params.lro_wqe_sz -= MLX5_RX_HEADROOM + + params->lro_wqe_sz -= MLX5_RX_HEADROOM + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); } - priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type, - BIT(priv->params.log_rq_size)); - mlx5_core_info(priv->mdev, - "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n", - priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ, - BIT(priv->params.log_rq_size), - BIT(priv->params.mpwqe_log_stride_sz), - MLX5E_GET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS)); + mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n", + params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ, + BIT(params->log_rq_size), + BIT(params->mpwqe_log_stride_sz), + MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)); } -static void mlx5e_set_rq_priv_params(struct mlx5e_priv *priv) +static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params) { - u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(priv->mdev) && - !priv->xdp_prog ? + u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) && + !params->xdp_prog ? MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : MLX5_WQ_TYPE_LINKED_LIST; - mlx5e_set_rq_type_params(priv, rq_type); + mlx5e_set_rq_type_params(mdev, params, rq_type); } static void mlx5e_update_carrier(struct mlx5e_priv *priv) @@ -181,8 +175,10 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) int i, j; memset(s, 0, sizeof(*s)); - for (i = 0; i < priv->params.num_channels; i++) { - rq_stats = &priv->channel[i]->rq.stats; + for (i = 0; i < priv->channels.num; i++) { + struct mlx5e_channel *c = priv->channels.c[i]; + + rq_stats = &c->rq.stats; s->rx_packets += rq_stats->packets; s->rx_bytes += rq_stats->bytes; @@ -204,8 +200,8 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->rx_cache_empty += rq_stats->cache_empty; s->rx_cache_busy += rq_stats->cache_busy; - for (j = 0; j < priv->params.num_tc; j++) { - sq_stats = &priv->channel[i]->sq[j].stats; + for (j = 0; j < priv->channels.params.num_tc; j++) { + sq_stats = &c->sq[j].stats; s->tx_packets += sq_stats->packets; s->tx_bytes += sq_stats->bytes; @@ -402,8 +398,10 @@ static inline int mlx5e_get_wqe_mtt_sz(void) MLX5_UMR_MTT_ALIGNMENT); } -static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, struct mlx5e_sq *sq, - struct mlx5e_umr_wqe *wqe, u16 ix) +static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, + struct mlx5e_icosq *sq, + struct mlx5e_umr_wqe *wqe, + u16 ix) { struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl; @@ -493,11 +491,10 @@ static void mlx5e_rq_free_mpwqe_info(struct mlx5e_rq *rq) kfree(rq->mpwqe.info); } -static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv, +static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev, u64 npages, u8 page_shift, struct mlx5_core_mkey *umr_mkey) { - struct mlx5_core_dev *mdev = priv->mdev; int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); void *mkc; u32 *in; @@ -531,21 +528,20 @@ static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv, return err; } -static int mlx5e_create_rq_umr_mkey(struct mlx5e_rq *rq) +static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq *rq) { - struct mlx5e_priv *priv = rq->priv; - u64 num_mtts = MLX5E_REQUIRED_MTTS(BIT(priv->params.log_rq_size)); + u64 num_mtts = MLX5E_REQUIRED_MTTS(mlx5_wq_ll_get_size(&rq->wq)); - return mlx5e_create_umr_mkey(priv, num_mtts, PAGE_SHIFT, &rq->umr_mkey); + return mlx5e_create_umr_mkey(mdev, num_mtts, PAGE_SHIFT, &rq->umr_mkey); } -static int mlx5e_create_rq(struct mlx5e_channel *c, - struct mlx5e_rq_param *param, - struct mlx5e_rq *rq) +static int mlx5e_alloc_rq(struct mlx5e_channel *c, + struct mlx5e_params *params, + struct mlx5e_rq_param *rqp, + struct mlx5e_rq *rq) { - struct mlx5e_priv *priv = c->priv; - struct mlx5_core_dev *mdev = priv->mdev; - void *rqc = param->rqc; + struct mlx5_core_dev *mdev = c->mdev; + void *rqc = rqp->rqc; void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); u32 byte_count; u32 frag_sz; @@ -554,9 +550,9 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, int err; int i; - param->wq.db_numa_node = cpu_to_node(c->cpu); + rqp->wq.db_numa_node = cpu_to_node(c->cpu); - err = mlx5_wq_ll_create(mdev, ¶m->wq, rqc_wq, &rq->wq, + err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->wq, &rq->wq_ctrl); if (err) return err; @@ -565,15 +561,15 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, wq_sz = mlx5_wq_ll_get_size(&rq->wq); - rq->wq_type = priv->params.rq_wq_type; + rq->wq_type = params->rq_wq_type; rq->pdev = c->pdev; rq->netdev = c->netdev; - rq->tstamp = &priv->tstamp; + rq->tstamp = c->tstamp; rq->channel = c; rq->ix = c->ix; - rq->priv = c->priv; + rq->mdev = mdev; - rq->xdp_prog = priv->xdp_prog ? bpf_prog_inc(priv->xdp_prog) : NULL; + rq->xdp_prog = params->xdp_prog ? bpf_prog_inc(params->xdp_prog) : NULL; if (IS_ERR(rq->xdp_prog)) { err = PTR_ERR(rq->xdp_prog); rq->xdp_prog = NULL; @@ -588,24 +584,26 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, rq->rx_headroom = MLX5_RX_HEADROOM; } - switch (priv->params.rq_wq_type) { + switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - if (mlx5e_is_vf_vport_rep(priv)) { - err = -EINVAL; - goto err_rq_wq_destroy; - } - rq->handle_rx_cqe = mlx5e_handle_rx_cqe_mpwrq; rq->alloc_wqe = mlx5e_alloc_rx_mpwqe; rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; - rq->mpwqe_stride_sz = BIT(priv->params.mpwqe_log_stride_sz); - rq->mpwqe_num_strides = BIT(priv->params.mpwqe_log_num_strides); + rq->handle_rx_cqe = c->priv->profile->rx_handlers.handle_rx_cqe_mpwqe; + if (!rq->handle_rx_cqe) { + err = -EINVAL; + netdev_err(c->netdev, "RX handler of MPWQE RQ is not set, err %d\n", err); + goto err_rq_wq_destroy; + } + + rq->mpwqe_stride_sz = BIT(params->mpwqe_log_stride_sz); + rq->mpwqe_num_strides = BIT(params->mpwqe_log_num_strides); rq->buff.wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides; byte_count = rq->buff.wqe_sz; - err = mlx5e_create_rq_umr_mkey(rq); + err = mlx5e_create_rq_umr_mkey(mdev, rq); if (err) goto err_rq_wq_destroy; rq->mkey_be = cpu_to_be32(rq->umr_mkey.key); @@ -621,18 +619,20 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, err = -ENOMEM; goto err_rq_wq_destroy; } - - if (mlx5e_is_vf_vport_rep(priv)) - rq->handle_rx_cqe = mlx5e_handle_rx_cqe_rep; - else - rq->handle_rx_cqe = mlx5e_handle_rx_cqe; - rq->alloc_wqe = mlx5e_alloc_rx_wqe; rq->dealloc_wqe = mlx5e_dealloc_rx_wqe; - rq->buff.wqe_sz = (priv->params.lro_en) ? - priv->params.lro_wqe_sz : - MLX5E_SW2HW_MTU(priv->netdev->mtu); + rq->handle_rx_cqe = c->priv->profile->rx_handlers.handle_rx_cqe; + if (!rq->handle_rx_cqe) { + kfree(rq->dma_info); + err = -EINVAL; + netdev_err(c->netdev, "RX handler of RQ is not set, err %d\n", err); + goto err_rq_wq_destroy; + } + + rq->buff.wqe_sz = params->lro_en ? + params->lro_wqe_sz : + MLX5E_SW2HW_MTU(c->netdev->mtu); byte_count = rq->buff.wqe_sz; /* calc the required page order */ @@ -656,8 +656,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, } INIT_WORK(&rq->am.work, mlx5e_rx_am_work); - rq->am.mode = priv->params.rx_cq_period_mode; - + rq->am.mode = params->rx_cq_period_mode; rq->page_cache.head = 0; rq->page_cache.tail = 0; @@ -674,7 +673,7 @@ err_rq_wq_destroy: return err; } -static void mlx5e_destroy_rq(struct mlx5e_rq *rq) +static void mlx5e_free_rq(struct mlx5e_rq *rq) { int i; @@ -684,7 +683,7 @@ static void mlx5e_destroy_rq(struct mlx5e_rq *rq) switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: mlx5e_rq_free_mpwqe_info(rq); - mlx5_core_destroy_mkey(rq->priv->mdev, &rq->umr_mkey); + mlx5_core_destroy_mkey(rq->mdev, &rq->umr_mkey); break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ kfree(rq->dma_info); @@ -699,10 +698,10 @@ static void mlx5e_destroy_rq(struct mlx5e_rq *rq) mlx5_wq_destroy(&rq->wq_ctrl); } -static int mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param) +static int mlx5e_create_rq(struct mlx5e_rq *rq, + struct mlx5e_rq_param *param) { - struct mlx5e_priv *priv = rq->priv; - struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_core_dev *mdev = rq->mdev; void *in; void *rqc; @@ -723,7 +722,6 @@ static int mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param) MLX5_SET(rqc, rqc, cqn, rq->cq.mcq.cqn); MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST); - MLX5_SET(rqc, rqc, vsd, priv->params.vlan_strip_disable); MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma); @@ -742,8 +740,7 @@ static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state) { struct mlx5e_channel *c = rq->channel; - struct mlx5e_priv *priv = c->priv; - struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_core_dev *mdev = c->mdev; void *in; void *rqc; @@ -767,7 +764,7 @@ static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, return err; } -static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd) +static int mlx5e_modify_rq_scatter_fcs(struct mlx5e_rq *rq, bool enable) { struct mlx5e_channel *c = rq->channel; struct mlx5e_priv *priv = c->priv; @@ -787,6 +784,35 @@ static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd) MLX5_SET(modify_rq_in, in, rq_state, MLX5_RQC_STATE_RDY); MLX5_SET64(modify_rq_in, in, modify_bitmask, + MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_SCATTER_FCS); + MLX5_SET(rqc, rqc, scatter_fcs, enable); + MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY); + + err = mlx5_core_modify_rq(mdev, rq->rqn, in, inlen); + + kvfree(in); + + return err; +} + +static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd) +{ + struct mlx5e_channel *c = rq->channel; + struct mlx5_core_dev *mdev = c->mdev; + void *in; + void *rqc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_rq_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); + + MLX5_SET(modify_rq_in, in, rq_state, MLX5_RQC_STATE_RDY); + MLX5_SET64(modify_rq_in, in, modify_bitmask, MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD); MLX5_SET(rqc, rqc, vsd, vsd); MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY); @@ -798,25 +824,28 @@ static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd) return err; } -static void mlx5e_disable_rq(struct mlx5e_rq *rq) +static void mlx5e_destroy_rq(struct mlx5e_rq *rq) { - mlx5_core_destroy_rq(rq->priv->mdev, rq->rqn); + mlx5_core_destroy_rq(rq->mdev, rq->rqn); } static int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq) { unsigned long exp_time = jiffies + msecs_to_jiffies(20000); struct mlx5e_channel *c = rq->channel; - struct mlx5e_priv *priv = c->priv; + struct mlx5_wq_ll *wq = &rq->wq; + u16 min_wqes = mlx5_min_rx_wqes(rq->wq_type, mlx5_wq_ll_get_size(wq)); while (time_before(jiffies, exp_time)) { - if (wq->cur_sz >= priv->params.min_rx_wqes) + if (wq->cur_sz >= min_wqes) return 0; msleep(20); } + netdev_warn(c->netdev, "Failed to get min RX wqes on RQN[0x%x] wq cur_sz(%d) min_rx_wqes(%d)\n", + rq->rqn, wq->cur_sz, min_wqes); return -ETIMEDOUT; } @@ -842,83 +871,128 @@ static void mlx5e_free_rx_descs(struct mlx5e_rq *rq) } static int mlx5e_open_rq(struct mlx5e_channel *c, + struct mlx5e_params *params, struct mlx5e_rq_param *param, struct mlx5e_rq *rq) { - struct mlx5e_sq *sq = &c->icosq; - u16 pi = sq->pc & sq->wq.sz_m1; int err; - err = mlx5e_create_rq(c, param, rq); + err = mlx5e_alloc_rq(c, params, param, rq); if (err) return err; - err = mlx5e_enable_rq(rq, param); + err = mlx5e_create_rq(rq, param); if (err) - goto err_destroy_rq; + goto err_free_rq; - set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); if (err) - goto err_disable_rq; + goto err_destroy_rq; - if (param->am_enabled) + if (params->rx_am_enabled) set_bit(MLX5E_RQ_STATE_AM, &c->rq.state); - sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP; - sq->db.ico_wqe[pi].num_wqebbs = 1; - mlx5e_send_nop(sq, true); /* trigger mlx5e_post_rx_wqes() */ - return 0; -err_disable_rq: - clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); - mlx5e_disable_rq(rq); err_destroy_rq: mlx5e_destroy_rq(rq); +err_free_rq: + mlx5e_free_rq(rq); return err; } -static void mlx5e_close_rq(struct mlx5e_rq *rq) +static void mlx5e_activate_rq(struct mlx5e_rq *rq) +{ + struct mlx5e_icosq *sq = &rq->channel->icosq; + u16 pi = sq->pc & sq->wq.sz_m1; + struct mlx5e_tx_wqe *nopwqe; + + set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); + sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP; + sq->db.ico_wqe[pi].num_wqebbs = 1; + nopwqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc); + mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &nopwqe->ctrl); +} + +static void mlx5e_deactivate_rq(struct mlx5e_rq *rq) { clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */ - cancel_work_sync(&rq->am.work); +} - mlx5e_disable_rq(rq); - mlx5e_free_rx_descs(rq); +static void mlx5e_close_rq(struct mlx5e_rq *rq) +{ + cancel_work_sync(&rq->am.work); mlx5e_destroy_rq(rq); + mlx5e_free_rx_descs(rq); + mlx5e_free_rq(rq); } -static void mlx5e_free_sq_xdp_db(struct mlx5e_sq *sq) +static void mlx5e_free_xdpsq_db(struct mlx5e_xdpsq *sq) { - kfree(sq->db.xdp.di); - kfree(sq->db.xdp.wqe_info); + kfree(sq->db.di); } -static int mlx5e_alloc_sq_xdp_db(struct mlx5e_sq *sq, int numa) +static int mlx5e_alloc_xdpsq_db(struct mlx5e_xdpsq *sq, int numa) { int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); - sq->db.xdp.di = kzalloc_node(sizeof(*sq->db.xdp.di) * wq_sz, + sq->db.di = kzalloc_node(sizeof(*sq->db.di) * wq_sz, GFP_KERNEL, numa); - sq->db.xdp.wqe_info = kzalloc_node(sizeof(*sq->db.xdp.wqe_info) * wq_sz, - GFP_KERNEL, numa); - if (!sq->db.xdp.di || !sq->db.xdp.wqe_info) { - mlx5e_free_sq_xdp_db(sq); + if (!sq->db.di) { + mlx5e_free_xdpsq_db(sq); return -ENOMEM; } return 0; } -static void mlx5e_free_sq_ico_db(struct mlx5e_sq *sq) +static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c, + struct mlx5e_params *params, + struct mlx5e_sq_param *param, + struct mlx5e_xdpsq *sq) +{ + void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq); + struct mlx5_core_dev *mdev = c->mdev; + int err; + + sq->pdev = c->pdev; + sq->mkey_be = c->mkey_be; + sq->channel = c; + sq->uar_map = mdev->mlx5e_res.bfreg.map; + sq->min_inline_mode = params->tx_min_inline_mode; + + param->wq.db_numa_node = cpu_to_node(c->cpu); + err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); + if (err) + return err; + sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; + + err = mlx5e_alloc_xdpsq_db(sq, cpu_to_node(c->cpu)); + if (err) + goto err_sq_wq_destroy; + + return 0; + +err_sq_wq_destroy: + mlx5_wq_destroy(&sq->wq_ctrl); + + return err; +} + +static void mlx5e_free_xdpsq(struct mlx5e_xdpsq *sq) +{ + mlx5e_free_xdpsq_db(sq); + mlx5_wq_destroy(&sq->wq_ctrl); +} + +static void mlx5e_free_icosq_db(struct mlx5e_icosq *sq) { kfree(sq->db.ico_wqe); } -static int mlx5e_alloc_sq_ico_db(struct mlx5e_sq *sq, int numa) +static int mlx5e_alloc_icosq_db(struct mlx5e_icosq *sq, int numa) { u8 wq_sz = mlx5_wq_cyc_get_size(&sq->wq); @@ -930,155 +1004,128 @@ static int mlx5e_alloc_sq_ico_db(struct mlx5e_sq *sq, int numa) return 0; } -static void mlx5e_free_sq_txq_db(struct mlx5e_sq *sq) +static int mlx5e_alloc_icosq(struct mlx5e_channel *c, + struct mlx5e_sq_param *param, + struct mlx5e_icosq *sq) { - kfree(sq->db.txq.wqe_info); - kfree(sq->db.txq.dma_fifo); - kfree(sq->db.txq.skb); -} + void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq); + struct mlx5_core_dev *mdev = c->mdev; + int err; -static int mlx5e_alloc_sq_txq_db(struct mlx5e_sq *sq, int numa) -{ - int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); - int df_sz = wq_sz * MLX5_SEND_WQEBB_NUM_DS; + sq->pdev = c->pdev; + sq->mkey_be = c->mkey_be; + sq->channel = c; + sq->uar_map = mdev->mlx5e_res.bfreg.map; - sq->db.txq.skb = kzalloc_node(wq_sz * sizeof(*sq->db.txq.skb), - GFP_KERNEL, numa); - sq->db.txq.dma_fifo = kzalloc_node(df_sz * sizeof(*sq->db.txq.dma_fifo), - GFP_KERNEL, numa); - sq->db.txq.wqe_info = kzalloc_node(wq_sz * sizeof(*sq->db.txq.wqe_info), - GFP_KERNEL, numa); - if (!sq->db.txq.skb || !sq->db.txq.dma_fifo || !sq->db.txq.wqe_info) { - mlx5e_free_sq_txq_db(sq); - return -ENOMEM; - } + param->wq.db_numa_node = cpu_to_node(c->cpu); + err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); + if (err) + return err; + sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; - sq->dma_fifo_mask = df_sz - 1; + err = mlx5e_alloc_icosq_db(sq, cpu_to_node(c->cpu)); + if (err) + goto err_sq_wq_destroy; + + sq->edge = (sq->wq.sz_m1 + 1) - MLX5E_ICOSQ_MAX_WQEBBS; return 0; + +err_sq_wq_destroy: + mlx5_wq_destroy(&sq->wq_ctrl); + + return err; } -static void mlx5e_free_sq_db(struct mlx5e_sq *sq) +static void mlx5e_free_icosq(struct mlx5e_icosq *sq) { - switch (sq->type) { - case MLX5E_SQ_TXQ: - mlx5e_free_sq_txq_db(sq); - break; - case MLX5E_SQ_ICO: - mlx5e_free_sq_ico_db(sq); - break; - case MLX5E_SQ_XDP: - mlx5e_free_sq_xdp_db(sq); - break; - } + mlx5e_free_icosq_db(sq); + mlx5_wq_destroy(&sq->wq_ctrl); } -static int mlx5e_alloc_sq_db(struct mlx5e_sq *sq, int numa) +static void mlx5e_free_txqsq_db(struct mlx5e_txqsq *sq) { - switch (sq->type) { - case MLX5E_SQ_TXQ: - return mlx5e_alloc_sq_txq_db(sq, numa); - case MLX5E_SQ_ICO: - return mlx5e_alloc_sq_ico_db(sq, numa); - case MLX5E_SQ_XDP: - return mlx5e_alloc_sq_xdp_db(sq, numa); - } - - return 0; + kfree(sq->db.wqe_info); + kfree(sq->db.dma_fifo); } -static int mlx5e_sq_get_max_wqebbs(u8 sq_type) +static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa) { - switch (sq_type) { - case MLX5E_SQ_ICO: - return MLX5E_ICOSQ_MAX_WQEBBS; - case MLX5E_SQ_XDP: - return MLX5E_XDP_TX_WQEBBS; + int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); + int df_sz = wq_sz * MLX5_SEND_WQEBB_NUM_DS; + + sq->db.dma_fifo = kzalloc_node(df_sz * sizeof(*sq->db.dma_fifo), + GFP_KERNEL, numa); + sq->db.wqe_info = kzalloc_node(wq_sz * sizeof(*sq->db.wqe_info), + GFP_KERNEL, numa); + if (!sq->db.dma_fifo || !sq->db.wqe_info) { + mlx5e_free_txqsq_db(sq); + return -ENOMEM; } - return MLX5_SEND_WQE_MAX_WQEBBS; + + sq->dma_fifo_mask = df_sz - 1; + + return 0; } -static int mlx5e_create_sq(struct mlx5e_channel *c, - int tc, - struct mlx5e_sq_param *param, - struct mlx5e_sq *sq) +static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, + int txq_ix, + struct mlx5e_params *params, + struct mlx5e_sq_param *param, + struct mlx5e_txqsq *sq) { - struct mlx5e_priv *priv = c->priv; - struct mlx5_core_dev *mdev = priv->mdev; - - void *sqc = param->sqc; - void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq); + void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq); + struct mlx5_core_dev *mdev = c->mdev; int err; - sq->type = param->type; sq->pdev = c->pdev; - sq->tstamp = &priv->tstamp; + sq->tstamp = c->tstamp; sq->mkey_be = c->mkey_be; sq->channel = c; - sq->tc = tc; - - err = mlx5_alloc_bfreg(mdev, &sq->bfreg, MLX5_CAP_GEN(mdev, bf), false); - if (err) - return err; + sq->txq_ix = txq_ix; + sq->uar_map = mdev->mlx5e_res.bfreg.map; + sq->max_inline = params->tx_max_inline; + sq->min_inline_mode = params->tx_min_inline_mode; - sq->uar_map = sq->bfreg.map; param->wq.db_numa_node = cpu_to_node(c->cpu); - - err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, - &sq->wq_ctrl); + err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); if (err) - goto err_unmap_free_uar; - - sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; - if (sq->bfreg.wc) - set_bit(MLX5E_SQ_STATE_BF_ENABLE, &sq->state); - - sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2; - sq->max_inline = param->max_inline; - sq->min_inline_mode = param->min_inline_mode; + return err; + sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; - err = mlx5e_alloc_sq_db(sq, cpu_to_node(c->cpu)); + err = mlx5e_alloc_txqsq_db(sq, cpu_to_node(c->cpu)); if (err) goto err_sq_wq_destroy; - if (sq->type == MLX5E_SQ_TXQ) { - int txq_ix; - - txq_ix = c->ix + tc * priv->params.num_channels; - sq->txq = netdev_get_tx_queue(priv->netdev, txq_ix); - priv->txq_to_sq_map[txq_ix] = sq; - } - - sq->edge = (sq->wq.sz_m1 + 1) - mlx5e_sq_get_max_wqebbs(sq->type); - sq->bf_budget = MLX5E_SQ_BF_BUDGET; + sq->edge = (sq->wq.sz_m1 + 1) - MLX5_SEND_WQE_MAX_WQEBBS; return 0; err_sq_wq_destroy: mlx5_wq_destroy(&sq->wq_ctrl); -err_unmap_free_uar: - mlx5_free_bfreg(mdev, &sq->bfreg); - return err; } -static void mlx5e_destroy_sq(struct mlx5e_sq *sq) +static void mlx5e_free_txqsq(struct mlx5e_txqsq *sq) { - struct mlx5e_channel *c = sq->channel; - struct mlx5e_priv *priv = c->priv; - - mlx5e_free_sq_db(sq); + mlx5e_free_txqsq_db(sq); mlx5_wq_destroy(&sq->wq_ctrl); - mlx5_free_bfreg(priv->mdev, &sq->bfreg); } -static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param) -{ - struct mlx5e_channel *c = sq->channel; - struct mlx5e_priv *priv = c->priv; - struct mlx5_core_dev *mdev = priv->mdev; +struct mlx5e_create_sq_param { + struct mlx5_wq_ctrl *wq_ctrl; + u32 cqn; + u32 tisn; + u8 tis_lst_sz; + u8 min_inline_mode; +}; +static int mlx5e_create_sq(struct mlx5_core_dev *mdev, + struct mlx5e_sq_param *param, + struct mlx5e_create_sq_param *csp, + u32 *sqn) +{ void *in; void *sqc; void *wq; @@ -1086,7 +1133,7 @@ static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param) int err; inlen = MLX5_ST_SZ_BYTES(create_sq_in) + - sizeof(u64) * sq->wq_ctrl.buf.npages; + sizeof(u64) * csp->wq_ctrl->buf.npages; in = mlx5_vzalloc(inlen); if (!in) return -ENOMEM; @@ -1095,40 +1142,40 @@ static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param) wq = MLX5_ADDR_OF(sqc, sqc, wq); memcpy(sqc, param->sqc, sizeof(param->sqc)); - - MLX5_SET(sqc, sqc, tis_num_0, param->type == MLX5E_SQ_ICO ? - 0 : priv->tisn[sq->tc]); - MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn); + MLX5_SET(sqc, sqc, tis_lst_sz, csp->tis_lst_sz); + MLX5_SET(sqc, sqc, tis_num_0, csp->tisn); + MLX5_SET(sqc, sqc, cqn, csp->cqn); if (MLX5_CAP_ETH(mdev, wqe_inline_mode) == MLX5_CAP_INLINE_MODE_VPORT_CONTEXT) - MLX5_SET(sqc, sqc, min_wqe_inline_mode, sq->min_inline_mode); + MLX5_SET(sqc, sqc, min_wqe_inline_mode, csp->min_inline_mode); - MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); - MLX5_SET(sqc, sqc, tis_lst_sz, param->type == MLX5E_SQ_ICO ? 0 : 1); + MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); - MLX5_SET(wq, wq, uar_page, sq->bfreg.index); - MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift - + MLX5_SET(wq, wq, uar_page, mdev->mlx5e_res.bfreg.index); + MLX5_SET(wq, wq, log_wq_pg_sz, csp->wq_ctrl->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); - MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma); + MLX5_SET64(wq, wq, dbr_addr, csp->wq_ctrl->db.dma); - mlx5_fill_page_array(&sq->wq_ctrl.buf, - (__be64 *)MLX5_ADDR_OF(wq, wq, pas)); + mlx5_fill_page_array(&csp->wq_ctrl->buf, (__be64 *)MLX5_ADDR_OF(wq, wq, pas)); - err = mlx5_core_create_sq(mdev, in, inlen, &sq->sqn); + err = mlx5_core_create_sq(mdev, in, inlen, sqn); kvfree(in); return err; } -static int mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, - int next_state, bool update_rl, int rl_index) -{ - struct mlx5e_channel *c = sq->channel; - struct mlx5e_priv *priv = c->priv; - struct mlx5_core_dev *mdev = priv->mdev; +struct mlx5e_modify_sq_param { + int curr_state; + int next_state; + bool rl_update; + int rl_index; +}; +static int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn, + struct mlx5e_modify_sq_param *p) +{ void *in; void *sqc; int inlen; @@ -1141,68 +1188,94 @@ static int mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); - MLX5_SET(modify_sq_in, in, sq_state, curr_state); - MLX5_SET(sqc, sqc, state, next_state); - if (update_rl && next_state == MLX5_SQC_STATE_RDY) { + MLX5_SET(modify_sq_in, in, sq_state, p->curr_state); + MLX5_SET(sqc, sqc, state, p->next_state); + if (p->rl_update && p->next_state == MLX5_SQC_STATE_RDY) { MLX5_SET64(modify_sq_in, in, modify_bitmask, 1); - MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, rl_index); + MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, p->rl_index); } - err = mlx5_core_modify_sq(mdev, sq->sqn, in, inlen); + err = mlx5_core_modify_sq(mdev, sqn, in, inlen); kvfree(in); return err; } -static void mlx5e_disable_sq(struct mlx5e_sq *sq) +static void mlx5e_destroy_sq(struct mlx5_core_dev *mdev, u32 sqn) { - struct mlx5e_channel *c = sq->channel; - struct mlx5e_priv *priv = c->priv; - struct mlx5_core_dev *mdev = priv->mdev; - - mlx5_core_destroy_sq(mdev, sq->sqn); - if (sq->rate_limit) - mlx5_rl_remove_rate(mdev, sq->rate_limit); + mlx5_core_destroy_sq(mdev, sqn); } -static int mlx5e_open_sq(struct mlx5e_channel *c, - int tc, - struct mlx5e_sq_param *param, - struct mlx5e_sq *sq) +static int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev, + struct mlx5e_sq_param *param, + struct mlx5e_create_sq_param *csp, + u32 *sqn) { + struct mlx5e_modify_sq_param msp = {0}; int err; - err = mlx5e_create_sq(c, tc, param, sq); + err = mlx5e_create_sq(mdev, param, csp, sqn); if (err) return err; - err = mlx5e_enable_sq(sq, param); + msp.curr_state = MLX5_SQC_STATE_RST; + msp.next_state = MLX5_SQC_STATE_RDY; + err = mlx5e_modify_sq(mdev, *sqn, &msp); if (err) - goto err_destroy_sq; + mlx5e_destroy_sq(mdev, *sqn); - set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); - err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY, - false, 0); + return err; +} + +static int mlx5e_set_sq_maxrate(struct net_device *dev, + struct mlx5e_txqsq *sq, u32 rate); + +static int mlx5e_open_txqsq(struct mlx5e_channel *c, + u32 tisn, + int txq_ix, + struct mlx5e_params *params, + struct mlx5e_sq_param *param, + struct mlx5e_txqsq *sq) +{ + struct mlx5e_create_sq_param csp = {}; + u32 tx_rate; + int err; + + err = mlx5e_alloc_txqsq(c, txq_ix, params, param, sq); if (err) - goto err_disable_sq; + return err; - if (sq->txq) { - netdev_tx_reset_queue(sq->txq); - netif_tx_start_queue(sq->txq); - } + csp.tisn = tisn; + csp.tis_lst_sz = 1; + csp.cqn = sq->cq.mcq.cqn; + csp.wq_ctrl = &sq->wq_ctrl; + csp.min_inline_mode = sq->min_inline_mode; + err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn); + if (err) + goto err_free_txqsq; + + tx_rate = c->priv->tx_rates[sq->txq_ix]; + if (tx_rate) + mlx5e_set_sq_maxrate(c->netdev, sq, tx_rate); return 0; -err_disable_sq: +err_free_txqsq: clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); - mlx5e_disable_sq(sq); -err_destroy_sq: - mlx5e_destroy_sq(sq); + mlx5e_free_txqsq(sq); return err; } +static void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq) +{ + sq->txq = netdev_get_tx_queue(sq->channel->netdev, sq->txq_ix); + set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + netdev_tx_reset_queue(sq->txq); + netif_tx_start_queue(sq->txq); +} + static inline void netif_tx_disable_queue(struct netdev_queue *txq) { __netif_tx_lock_bh(txq); @@ -1210,43 +1283,153 @@ static inline void netif_tx_disable_queue(struct netdev_queue *txq) __netif_tx_unlock_bh(txq); } -static void mlx5e_close_sq(struct mlx5e_sq *sq) +static void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq) { + struct mlx5e_channel *c = sq->channel; + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); /* prevent netif_tx_wake_queue */ - napi_synchronize(&sq->channel->napi); + napi_synchronize(&c->napi); - if (sq->txq) { - netif_tx_disable_queue(sq->txq); + netif_tx_disable_queue(sq->txq); - /* last doorbell out, godspeed .. */ - if (mlx5e_sq_has_room_for(sq, 1)) { - sq->db.txq.skb[(sq->pc & sq->wq.sz_m1)] = NULL; - mlx5e_send_nop(sq, true); - } + /* last doorbell out, godspeed .. */ + if (mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, 1)) { + struct mlx5e_tx_wqe *nop; + + sq->db.wqe_info[(sq->pc & sq->wq.sz_m1)].skb = NULL; + nop = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc); + mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &nop->ctrl); } +} + +static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq) +{ + struct mlx5e_channel *c = sq->channel; + struct mlx5_core_dev *mdev = c->mdev; - mlx5e_disable_sq(sq); - mlx5e_free_sq_descs(sq); - mlx5e_destroy_sq(sq); + mlx5e_destroy_sq(mdev, sq->sqn); + if (sq->rate_limit) + mlx5_rl_remove_rate(mdev, sq->rate_limit); + mlx5e_free_txqsq_descs(sq); + mlx5e_free_txqsq(sq); } -static int mlx5e_create_cq(struct mlx5e_channel *c, - struct mlx5e_cq_param *param, - struct mlx5e_cq *cq) +static int mlx5e_open_icosq(struct mlx5e_channel *c, + struct mlx5e_params *params, + struct mlx5e_sq_param *param, + struct mlx5e_icosq *sq) +{ + struct mlx5e_create_sq_param csp = {}; + int err; + + err = mlx5e_alloc_icosq(c, param, sq); + if (err) + return err; + + csp.cqn = sq->cq.mcq.cqn; + csp.wq_ctrl = &sq->wq_ctrl; + csp.min_inline_mode = params->tx_min_inline_mode; + set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn); + if (err) + goto err_free_icosq; + + return 0; + +err_free_icosq: + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + mlx5e_free_icosq(sq); + + return err; +} + +static void mlx5e_close_icosq(struct mlx5e_icosq *sq) +{ + struct mlx5e_channel *c = sq->channel; + + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + napi_synchronize(&c->napi); + + mlx5e_destroy_sq(c->mdev, sq->sqn); + mlx5e_free_icosq(sq); +} + +static int mlx5e_open_xdpsq(struct mlx5e_channel *c, + struct mlx5e_params *params, + struct mlx5e_sq_param *param, + struct mlx5e_xdpsq *sq) +{ + unsigned int ds_cnt = MLX5E_XDP_TX_DS_COUNT; + struct mlx5e_create_sq_param csp = {}; + unsigned int inline_hdr_sz = 0; + int err; + int i; + + err = mlx5e_alloc_xdpsq(c, params, param, sq); + if (err) + return err; + + csp.tis_lst_sz = 1; + csp.tisn = c->priv->tisn[0]; /* tc = 0 */ + csp.cqn = sq->cq.mcq.cqn; + csp.wq_ctrl = &sq->wq_ctrl; + csp.min_inline_mode = sq->min_inline_mode; + set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn); + if (err) + goto err_free_xdpsq; + + if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) { + inline_hdr_sz = MLX5E_XDP_MIN_INLINE; + ds_cnt++; + } + + /* Pre initialize fixed WQE fields */ + for (i = 0; i < mlx5_wq_cyc_get_size(&sq->wq); i++) { + struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, i); + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + struct mlx5_wqe_eth_seg *eseg = &wqe->eth; + struct mlx5_wqe_data_seg *dseg; + + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); + eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz); + + dseg = (struct mlx5_wqe_data_seg *)cseg + (ds_cnt - 1); + dseg->lkey = sq->mkey_be; + } + + return 0; + +err_free_xdpsq: + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + mlx5e_free_xdpsq(sq); + + return err; +} + +static void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq) +{ + struct mlx5e_channel *c = sq->channel; + + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + napi_synchronize(&c->napi); + + mlx5e_destroy_sq(c->mdev, sq->sqn); + mlx5e_free_xdpsq_descs(sq); + mlx5e_free_xdpsq(sq); +} + +static int mlx5e_alloc_cq_common(struct mlx5_core_dev *mdev, + struct mlx5e_cq_param *param, + struct mlx5e_cq *cq) { - struct mlx5e_priv *priv = c->priv; - struct mlx5_core_dev *mdev = priv->mdev; struct mlx5_core_cq *mcq = &cq->mcq; int eqn_not_used; unsigned int irqn; int err; u32 i; - param->wq.buf_numa_node = cpu_to_node(c->cpu); - param->wq.db_numa_node = cpu_to_node(c->cpu); - param->eq_ix = c->ix; - err = mlx5_cqwq_create(mdev, ¶m->wq, param->cqc, &cq->wq, &cq->wq_ctrl); if (err) @@ -1254,8 +1437,6 @@ static int mlx5e_create_cq(struct mlx5e_channel *c, mlx5_vector2eqn(mdev, param->eq_ix, &eqn_not_used, &irqn); - cq->napi = &c->napi; - mcq->cqe_sz = 64; mcq->set_ci_db = cq->wq_ctrl.db.db; mcq->arm_db = cq->wq_ctrl.db.db + 1; @@ -1272,21 +1453,38 @@ static int mlx5e_create_cq(struct mlx5e_channel *c, cqe->op_own = 0xf1; } - cq->channel = c; - cq->priv = priv; + cq->mdev = mdev; return 0; } -static void mlx5e_destroy_cq(struct mlx5e_cq *cq) +static int mlx5e_alloc_cq(struct mlx5e_channel *c, + struct mlx5e_cq_param *param, + struct mlx5e_cq *cq) +{ + struct mlx5_core_dev *mdev = c->priv->mdev; + int err; + + param->wq.buf_numa_node = cpu_to_node(c->cpu); + param->wq.db_numa_node = cpu_to_node(c->cpu); + param->eq_ix = c->ix; + + err = mlx5e_alloc_cq_common(mdev, param, cq); + + cq->napi = &c->napi; + cq->channel = c; + + return err; +} + +static void mlx5e_free_cq(struct mlx5e_cq *cq) { mlx5_cqwq_destroy(&cq->wq_ctrl); } -static int mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) +static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) { - struct mlx5e_priv *priv = cq->priv; - struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_core_dev *mdev = cq->mdev; struct mlx5_core_cq *mcq = &cq->mcq; void *in; @@ -1330,47 +1528,41 @@ static int mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) return 0; } -static void mlx5e_disable_cq(struct mlx5e_cq *cq) +static void mlx5e_destroy_cq(struct mlx5e_cq *cq) { - struct mlx5e_priv *priv = cq->priv; - struct mlx5_core_dev *mdev = priv->mdev; - - mlx5_core_destroy_cq(mdev, &cq->mcq); + mlx5_core_destroy_cq(cq->mdev, &cq->mcq); } static int mlx5e_open_cq(struct mlx5e_channel *c, + struct mlx5e_cq_moder moder, struct mlx5e_cq_param *param, - struct mlx5e_cq *cq, - struct mlx5e_cq_moder moderation) + struct mlx5e_cq *cq) { + struct mlx5_core_dev *mdev = c->mdev; int err; - struct mlx5e_priv *priv = c->priv; - struct mlx5_core_dev *mdev = priv->mdev; - err = mlx5e_create_cq(c, param, cq); + err = mlx5e_alloc_cq(c, param, cq); if (err) return err; - err = mlx5e_enable_cq(cq, param); + err = mlx5e_create_cq(cq, param); if (err) - goto err_destroy_cq; + goto err_free_cq; if (MLX5_CAP_GEN(mdev, cq_moderation)) - mlx5_core_modify_cq_moderation(mdev, &cq->mcq, - moderation.usec, - moderation.pkts); + mlx5_core_modify_cq_moderation(mdev, &cq->mcq, moder.usec, moder.pkts); return 0; -err_destroy_cq: - mlx5e_destroy_cq(cq); +err_free_cq: + mlx5e_free_cq(cq); return err; } static void mlx5e_close_cq(struct mlx5e_cq *cq) { - mlx5e_disable_cq(cq); mlx5e_destroy_cq(cq); + mlx5e_free_cq(cq); } static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix) @@ -1379,15 +1571,15 @@ static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix) } static int mlx5e_open_tx_cqs(struct mlx5e_channel *c, + struct mlx5e_params *params, struct mlx5e_channel_param *cparam) { - struct mlx5e_priv *priv = c->priv; int err; int tc; for (tc = 0; tc < c->num_tc; tc++) { - err = mlx5e_open_cq(c, &cparam->tx_cq, &c->sq[tc].cq, - priv->params.tx_cq_moderation); + err = mlx5e_open_cq(c, params->tx_cq_moderation, + &cparam->tx_cq, &c->sq[tc].cq); if (err) goto err_close_tx_cqs; } @@ -1410,13 +1602,17 @@ static void mlx5e_close_tx_cqs(struct mlx5e_channel *c) } static int mlx5e_open_sqs(struct mlx5e_channel *c, + struct mlx5e_params *params, struct mlx5e_channel_param *cparam) { int err; int tc; - for (tc = 0; tc < c->num_tc; tc++) { - err = mlx5e_open_sq(c, tc, &cparam->sq, &c->sq[tc]); + for (tc = 0; tc < params->num_tc; tc++) { + int txq_ix = c->ix + tc * params->num_channels; + + err = mlx5e_open_txqsq(c, c->priv->tisn[tc], txq_ix, + params, &cparam->sq, &c->sq[tc]); if (err) goto err_close_sqs; } @@ -1425,7 +1621,7 @@ static int mlx5e_open_sqs(struct mlx5e_channel *c, err_close_sqs: for (tc--; tc >= 0; tc--) - mlx5e_close_sq(&c->sq[tc]); + mlx5e_close_txqsq(&c->sq[tc]); return err; } @@ -1435,23 +1631,15 @@ static void mlx5e_close_sqs(struct mlx5e_channel *c) int tc; for (tc = 0; tc < c->num_tc; tc++) - mlx5e_close_sq(&c->sq[tc]); -} - -static void mlx5e_build_channeltc_to_txq_map(struct mlx5e_priv *priv, int ix) -{ - int i; - - for (i = 0; i < priv->profile->max_tc; i++) - priv->channeltc_to_txq_map[ix][i] = - ix + i * priv->params.num_channels; + mlx5e_close_txqsq(&c->sq[tc]); } static int mlx5e_set_sq_maxrate(struct net_device *dev, - struct mlx5e_sq *sq, u32 rate) + struct mlx5e_txqsq *sq, u32 rate) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_modify_sq_param msp = {0}; u16 rl_index = 0; int err; @@ -1474,8 +1662,11 @@ static int mlx5e_set_sq_maxrate(struct net_device *dev, } } - err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, - MLX5_SQC_STATE_RDY, true, rl_index); + msp.curr_state = MLX5_SQC_STATE_RDY; + msp.next_state = MLX5_SQC_STATE_RDY; + msp.rl_index = rl_index; + msp.rl_update = true; + err = mlx5e_modify_sq(mdev, sq->sqn, &msp); if (err) { netdev_err(dev, "Failed configuring rate %u: %d\n", rate, err); @@ -1493,7 +1684,7 @@ static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5e_sq *sq = priv->txq_to_sq_map[index]; + struct mlx5e_txqsq *sq = priv->txq2sq[index]; int err = 0; if (!mlx5_rl_is_supported(mdev)) { @@ -1520,114 +1711,87 @@ static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate) return err; } -static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) -{ - return is_kdump_kernel() ? - MLX5E_MIN_NUM_CHANNELS : - min_t(int, mdev->priv.eq_table.num_comp_vectors, - MLX5E_MAX_NUM_CHANNELS); -} - static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, + struct mlx5e_params *params, struct mlx5e_channel_param *cparam, struct mlx5e_channel **cp) { - struct mlx5e_cq_moder icosq_cq_moder = {0, 0}; + struct mlx5e_cq_moder icocq_moder = {0, 0}; struct net_device *netdev = priv->netdev; - struct mlx5e_cq_moder rx_cq_profile; int cpu = mlx5e_get_cpu(priv, ix); struct mlx5e_channel *c; - struct mlx5e_sq *sq; int err; - int i; c = kzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu)); if (!c) return -ENOMEM; c->priv = priv; + c->mdev = priv->mdev; + c->tstamp = &priv->tstamp; c->ix = ix; c->cpu = cpu; c->pdev = &priv->mdev->pdev->dev; c->netdev = priv->netdev; c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key); - c->num_tc = priv->params.num_tc; - c->xdp = !!priv->xdp_prog; - - if (priv->params.rx_am_enabled) - rx_cq_profile = mlx5e_am_get_def_profile(priv->params.rx_cq_period_mode); - else - rx_cq_profile = priv->params.rx_cq_moderation; - - mlx5e_build_channeltc_to_txq_map(priv, ix); + c->num_tc = params->num_tc; + c->xdp = !!params->xdp_prog; netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64); - err = mlx5e_open_cq(c, &cparam->icosq_cq, &c->icosq.cq, icosq_cq_moder); + err = mlx5e_open_cq(c, icocq_moder, &cparam->icosq_cq, &c->icosq.cq); if (err) goto err_napi_del; - err = mlx5e_open_tx_cqs(c, cparam); + err = mlx5e_open_tx_cqs(c, params, cparam); if (err) goto err_close_icosq_cq; - err = mlx5e_open_cq(c, &cparam->rx_cq, &c->rq.cq, - rx_cq_profile); + err = mlx5e_open_cq(c, params->rx_cq_moderation, &cparam->rx_cq, &c->rq.cq); if (err) goto err_close_tx_cqs; /* XDP SQ CQ params are same as normal TXQ sq CQ params */ - err = c->xdp ? mlx5e_open_cq(c, &cparam->tx_cq, &c->xdp_sq.cq, - priv->params.tx_cq_moderation) : 0; + err = c->xdp ? mlx5e_open_cq(c, params->tx_cq_moderation, + &cparam->tx_cq, &c->rq.xdpsq.cq) : 0; if (err) goto err_close_rx_cq; napi_enable(&c->napi); - err = mlx5e_open_sq(c, 0, &cparam->icosq, &c->icosq); + err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq); if (err) goto err_disable_napi; - err = mlx5e_open_sqs(c, cparam); + err = mlx5e_open_sqs(c, params, cparam); if (err) goto err_close_icosq; - for (i = 0; i < priv->params.num_tc; i++) { - u32 txq_ix = priv->channeltc_to_txq_map[ix][i]; - - if (priv->tx_rates[txq_ix]) { - sq = priv->txq_to_sq_map[txq_ix]; - mlx5e_set_sq_maxrate(priv->netdev, sq, - priv->tx_rates[txq_ix]); - } - } - - err = c->xdp ? mlx5e_open_sq(c, 0, &cparam->xdp_sq, &c->xdp_sq) : 0; + err = c->xdp ? mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, &c->rq.xdpsq) : 0; if (err) goto err_close_sqs; - err = mlx5e_open_rq(c, &cparam->rq, &c->rq); + err = mlx5e_open_rq(c, params, &cparam->rq, &c->rq); if (err) goto err_close_xdp_sq; - netif_set_xps_queue(netdev, get_cpu_mask(c->cpu), ix); *cp = c; return 0; err_close_xdp_sq: if (c->xdp) - mlx5e_close_sq(&c->xdp_sq); + mlx5e_close_xdpsq(&c->rq.xdpsq); err_close_sqs: mlx5e_close_sqs(c); err_close_icosq: - mlx5e_close_sq(&c->icosq); + mlx5e_close_icosq(&c->icosq); err_disable_napi: napi_disable(&c->napi); if (c->xdp) - mlx5e_close_cq(&c->xdp_sq.cq); + mlx5e_close_cq(&c->rq.xdpsq.cq); err_close_rx_cq: mlx5e_close_cq(&c->rq.cq); @@ -1645,16 +1809,35 @@ err_napi_del: return err; } +static void mlx5e_activate_channel(struct mlx5e_channel *c) +{ + int tc; + + for (tc = 0; tc < c->num_tc; tc++) + mlx5e_activate_txqsq(&c->sq[tc]); + mlx5e_activate_rq(&c->rq); + netif_set_xps_queue(c->netdev, get_cpu_mask(c->cpu), c->ix); +} + +static void mlx5e_deactivate_channel(struct mlx5e_channel *c) +{ + int tc; + + mlx5e_deactivate_rq(&c->rq); + for (tc = 0; tc < c->num_tc; tc++) + mlx5e_deactivate_txqsq(&c->sq[tc]); +} + static void mlx5e_close_channel(struct mlx5e_channel *c) { mlx5e_close_rq(&c->rq); if (c->xdp) - mlx5e_close_sq(&c->xdp_sq); + mlx5e_close_xdpsq(&c->rq.xdpsq); mlx5e_close_sqs(c); - mlx5e_close_sq(&c->icosq); + mlx5e_close_icosq(&c->icosq); napi_disable(&c->napi); if (c->xdp) - mlx5e_close_cq(&c->xdp_sq.cq); + mlx5e_close_cq(&c->rq.xdpsq.cq); mlx5e_close_cq(&c->rq.cq); mlx5e_close_tx_cqs(c); mlx5e_close_cq(&c->icosq.cq); @@ -1664,17 +1847,16 @@ static void mlx5e_close_channel(struct mlx5e_channel *c) } static void mlx5e_build_rq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, struct mlx5e_rq_param *param) { void *rqc = param->rqc; void *wq = MLX5_ADDR_OF(rqc, rqc, wq); - switch (priv->params.rq_wq_type) { + switch (params->rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - MLX5_SET(wq, wq, log_wqe_num_of_strides, - priv->params.mpwqe_log_num_strides - 9); - MLX5_SET(wq, wq, log_wqe_stride_size, - priv->params.mpwqe_log_stride_sz - 6); + MLX5_SET(wq, wq, log_wqe_num_of_strides, params->mpwqe_log_num_strides - 9); + MLX5_SET(wq, wq, log_wqe_stride_size, params->mpwqe_log_stride_sz - 6); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ); break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ @@ -1683,14 +1865,14 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv, MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe))); - MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size); + MLX5_SET(wq, wq, log_wq_sz, params->log_rq_size); MLX5_SET(wq, wq, pd, priv->mdev->mlx5e_res.pdn); MLX5_SET(rqc, rqc, counter_set_id, priv->q_counter); + MLX5_SET(rqc, rqc, vsd, params->vlan_strip_disable); + MLX5_SET(rqc, rqc, scatter_fcs, params->scatter_fcs_en); param->wq.buf_numa_node = dev_to_node(&priv->mdev->pdev->dev); param->wq.linear = 1; - - param->am_enabled = priv->params.rx_am_enabled; } static void mlx5e_build_drop_rq_param(struct mlx5e_rq_param *param) @@ -1715,17 +1897,14 @@ static void mlx5e_build_sq_param_common(struct mlx5e_priv *priv, } static void mlx5e_build_sq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, struct mlx5e_sq_param *param) { void *sqc = param->sqc; void *wq = MLX5_ADDR_OF(sqc, sqc, wq); mlx5e_build_sq_param_common(priv, param); - MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size); - - param->max_inline = priv->params.tx_max_inline; - param->min_inline_mode = priv->params.tx_min_inline_mode; - param->type = MLX5E_SQ_TXQ; + MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); } static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, @@ -1737,37 +1916,36 @@ static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, } static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, struct mlx5e_cq_param *param) { void *cqc = param->cqc; u8 log_cq_size; - switch (priv->params.rq_wq_type) { + switch (params->rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - log_cq_size = priv->params.log_rq_size + - priv->params.mpwqe_log_num_strides; + log_cq_size = params->log_rq_size + params->mpwqe_log_num_strides; break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ - log_cq_size = priv->params.log_rq_size; + log_cq_size = params->log_rq_size; } MLX5_SET(cqc, cqc, log_cq_size, log_cq_size); - if (MLX5E_GET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS)) { + if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) { MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_CSUM); MLX5_SET(cqc, cqc, cqe_comp_en, 1); } mlx5e_build_common_cq_param(priv, param); - - param->cq_period_mode = priv->params.rx_cq_period_mode; } static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, struct mlx5e_cq_param *param) { void *cqc = param->cqc; - MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size); + MLX5_SET(cqc, cqc, log_cq_size, params->log_sq_size); mlx5e_build_common_cq_param(priv, param); @@ -1775,8 +1953,8 @@ static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, } static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv, - struct mlx5e_cq_param *param, - u8 log_wq_size) + u8 log_wq_size, + struct mlx5e_cq_param *param) { void *cqc = param->cqc; @@ -1788,8 +1966,8 @@ static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv, } static void mlx5e_build_icosq_param(struct mlx5e_priv *priv, - struct mlx5e_sq_param *param, - u8 log_wq_size) + u8 log_wq_size, + struct mlx5e_sq_param *param) { void *sqc = param->sqc; void *wq = MLX5_ADDR_OF(sqc, sqc, wq); @@ -1798,162 +1976,119 @@ static void mlx5e_build_icosq_param(struct mlx5e_priv *priv, MLX5_SET(wq, wq, log_wq_sz, log_wq_size); MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(priv->mdev, reg_umr_sq)); - - param->type = MLX5E_SQ_ICO; } static void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, struct mlx5e_sq_param *param) { void *sqc = param->sqc; void *wq = MLX5_ADDR_OF(sqc, sqc, wq); mlx5e_build_sq_param_common(priv, param); - MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size); - - param->max_inline = priv->params.tx_max_inline; - param->min_inline_mode = priv->params.tx_min_inline_mode; - param->type = MLX5E_SQ_XDP; + MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); } -static void mlx5e_build_channel_param(struct mlx5e_priv *priv, struct mlx5e_channel_param *cparam) +static void mlx5e_build_channel_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_channel_param *cparam) { u8 icosq_log_wq_sz = MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; - mlx5e_build_rq_param(priv, &cparam->rq); - mlx5e_build_sq_param(priv, &cparam->sq); - mlx5e_build_xdpsq_param(priv, &cparam->xdp_sq); - mlx5e_build_icosq_param(priv, &cparam->icosq, icosq_log_wq_sz); - mlx5e_build_rx_cq_param(priv, &cparam->rx_cq); - mlx5e_build_tx_cq_param(priv, &cparam->tx_cq); - mlx5e_build_ico_cq_param(priv, &cparam->icosq_cq, icosq_log_wq_sz); + mlx5e_build_rq_param(priv, params, &cparam->rq); + mlx5e_build_sq_param(priv, params, &cparam->sq); + mlx5e_build_xdpsq_param(priv, params, &cparam->xdp_sq); + mlx5e_build_icosq_param(priv, icosq_log_wq_sz, &cparam->icosq); + mlx5e_build_rx_cq_param(priv, params, &cparam->rx_cq); + mlx5e_build_tx_cq_param(priv, params, &cparam->tx_cq); + mlx5e_build_ico_cq_param(priv, icosq_log_wq_sz, &cparam->icosq_cq); } -static int mlx5e_open_channels(struct mlx5e_priv *priv) +int mlx5e_open_channels(struct mlx5e_priv *priv, + struct mlx5e_channels *chs) { struct mlx5e_channel_param *cparam; - int nch = priv->params.num_channels; int err = -ENOMEM; int i; - int j; - - priv->channel = kcalloc(nch, sizeof(struct mlx5e_channel *), - GFP_KERNEL); - priv->txq_to_sq_map = kcalloc(nch * priv->params.num_tc, - sizeof(struct mlx5e_sq *), GFP_KERNEL); + chs->num = chs->params.num_channels; + chs->c = kcalloc(chs->num, sizeof(struct mlx5e_channel *), GFP_KERNEL); cparam = kzalloc(sizeof(struct mlx5e_channel_param), GFP_KERNEL); + if (!chs->c || !cparam) + goto err_free; - if (!priv->channel || !priv->txq_to_sq_map || !cparam) - goto err_free_txq_to_sq_map; - - mlx5e_build_channel_param(priv, cparam); - - for (i = 0; i < nch; i++) { - err = mlx5e_open_channel(priv, i, cparam, &priv->channel[i]); - if (err) - goto err_close_channels; - } - - for (j = 0; j < nch; j++) { - err = mlx5e_wait_for_min_rx_wqes(&priv->channel[j]->rq); + mlx5e_build_channel_param(priv, &chs->params, cparam); + for (i = 0; i < chs->num; i++) { + err = mlx5e_open_channel(priv, i, &chs->params, cparam, &chs->c[i]); if (err) goto err_close_channels; } - /* FIXME: This is a W/A for tx timeout watch dog false alarm when - * polling for inactive tx queues. - */ - netif_tx_start_all_queues(priv->netdev); - kfree(cparam); return 0; err_close_channels: for (i--; i >= 0; i--) - mlx5e_close_channel(priv->channel[i]); + mlx5e_close_channel(chs->c[i]); -err_free_txq_to_sq_map: - kfree(priv->txq_to_sq_map); - kfree(priv->channel); +err_free: + kfree(chs->c); kfree(cparam); - + chs->num = 0; return err; } -static void mlx5e_close_channels(struct mlx5e_priv *priv) +static void mlx5e_activate_channels(struct mlx5e_channels *chs) { int i; - /* FIXME: This is a W/A only for tx timeout watch dog false alarm when - * polling for inactive tx queues. - */ - netif_tx_stop_all_queues(priv->netdev); - netif_tx_disable(priv->netdev); - - for (i = 0; i < priv->params.num_channels; i++) - mlx5e_close_channel(priv->channel[i]); - - kfree(priv->txq_to_sq_map); - kfree(priv->channel); + for (i = 0; i < chs->num; i++) + mlx5e_activate_channel(chs->c[i]); } -static int mlx5e_rx_hash_fn(int hfunc) +static int mlx5e_wait_channels_min_rx_wqes(struct mlx5e_channels *chs) { - return (hfunc == ETH_RSS_HASH_TOP) ? - MLX5_RX_HASH_FN_TOEPLITZ : - MLX5_RX_HASH_FN_INVERTED_XOR8; -} - -static int mlx5e_bits_invert(unsigned long a, int size) -{ - int inv = 0; + int err = 0; int i; - for (i = 0; i < size; i++) - inv |= (test_bit(size - i - 1, &a) ? 1 : 0) << i; + for (i = 0; i < chs->num; i++) { + err = mlx5e_wait_for_min_rx_wqes(&chs->c[i]->rq); + if (err) + break; + } - return inv; + return err; } -static void mlx5e_fill_indir_rqt_rqns(struct mlx5e_priv *priv, void *rqtc) +static void mlx5e_deactivate_channels(struct mlx5e_channels *chs) { int i; - for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++) { - int ix = i; - u32 rqn; - - if (priv->params.rss_hfunc == ETH_RSS_HASH_XOR) - ix = mlx5e_bits_invert(i, MLX5E_LOG_INDIR_RQT_SIZE); - - ix = priv->params.indirection_rqt[ix]; - rqn = test_bit(MLX5E_STATE_OPENED, &priv->state) ? - priv->channel[ix]->rq.rqn : - priv->drop_rq.rqn; - MLX5_SET(rqtc, rqtc, rq_num[i], rqn); - } + for (i = 0; i < chs->num; i++) + mlx5e_deactivate_channel(chs->c[i]); } -static void mlx5e_fill_direct_rqt_rqn(struct mlx5e_priv *priv, void *rqtc, - int ix) +void mlx5e_close_channels(struct mlx5e_channels *chs) { - u32 rqn = test_bit(MLX5E_STATE_OPENED, &priv->state) ? - priv->channel[ix]->rq.rqn : - priv->drop_rq.rqn; + int i; - MLX5_SET(rqtc, rqtc, rq_num[0], rqn); + for (i = 0; i < chs->num; i++) + mlx5e_close_channel(chs->c[i]); + + kfree(chs->c); + chs->num = 0; } -static int mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, - int ix, struct mlx5e_rqt *rqt) +static int +mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, struct mlx5e_rqt *rqt) { struct mlx5_core_dev *mdev = priv->mdev; void *rqtc; int inlen; int err; u32 *in; + int i; inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz; in = mlx5_vzalloc(inlen); @@ -1965,10 +2100,8 @@ static int mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, MLX5_SET(rqtc, rqtc, rqt_actual_size, sz); MLX5_SET(rqtc, rqtc, rqt_max_size, sz); - if (sz > 1) /* RSS */ - mlx5e_fill_indir_rqt_rqns(priv, rqtc); - else - mlx5e_fill_direct_rqt_rqn(priv, rqtc, ix); + for (i = 0; i < sz; i++) + MLX5_SET(rqtc, rqtc, rq_num[i], priv->drop_rq.rqn); err = mlx5_core_create_rqt(mdev, in, inlen, &rqt->rqtn); if (!err) @@ -1984,11 +2117,15 @@ void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt) mlx5_core_destroy_rqt(priv->mdev, rqt->rqtn); } -static int mlx5e_create_indirect_rqts(struct mlx5e_priv *priv) +int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv) { struct mlx5e_rqt *rqt = &priv->indir_rqt; + int err; - return mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, 0, rqt); + err = mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, rqt); + if (err) + mlx5_core_warn(priv->mdev, "create indirect rqts failed, %d\n", err); + return err; } int mlx5e_create_direct_rqts(struct mlx5e_priv *priv) @@ -1999,7 +2136,7 @@ int mlx5e_create_direct_rqts(struct mlx5e_priv *priv) for (ix = 0; ix < priv->profile->max_nch(priv->mdev); ix++) { rqt = &priv->direct_tir[ix].rqt; - err = mlx5e_create_rqt(priv, 1 /*size */, ix, rqt); + err = mlx5e_create_rqt(priv, 1 /*size */, rqt); if (err) goto err_destroy_rqts; } @@ -2007,13 +2144,64 @@ int mlx5e_create_direct_rqts(struct mlx5e_priv *priv) return 0; err_destroy_rqts: + mlx5_core_warn(priv->mdev, "create direct rqts failed, %d\n", err); for (ix--; ix >= 0; ix--) mlx5e_destroy_rqt(priv, &priv->direct_tir[ix].rqt); return err; } -int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, int ix) +void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv) +{ + int i; + + for (i = 0; i < priv->profile->max_nch(priv->mdev); i++) + mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt); +} + +static int mlx5e_rx_hash_fn(int hfunc) +{ + return (hfunc == ETH_RSS_HASH_TOP) ? + MLX5_RX_HASH_FN_TOEPLITZ : + MLX5_RX_HASH_FN_INVERTED_XOR8; +} + +static int mlx5e_bits_invert(unsigned long a, int size) +{ + int inv = 0; + int i; + + for (i = 0; i < size; i++) + inv |= (test_bit(size - i - 1, &a) ? 1 : 0) << i; + + return inv; +} + +static void mlx5e_fill_rqt_rqns(struct mlx5e_priv *priv, int sz, + struct mlx5e_redirect_rqt_param rrp, void *rqtc) +{ + int i; + + for (i = 0; i < sz; i++) { + u32 rqn; + + if (rrp.is_rss) { + int ix = i; + + if (rrp.rss.hfunc == ETH_RSS_HASH_XOR) + ix = mlx5e_bits_invert(i, ilog2(sz)); + + ix = priv->channels.params.indirection_rqt[ix]; + rqn = rrp.rss.channels->c[ix]->rq.rqn; + } else { + rqn = rrp.rqn; + } + MLX5_SET(rqtc, rqtc, rq_num[i], rqn); + } +} + +int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, + struct mlx5e_redirect_rqt_param rrp) { struct mlx5_core_dev *mdev = priv->mdev; void *rqtc; @@ -2029,41 +2217,86 @@ int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, int ix) rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx); MLX5_SET(rqtc, rqtc, rqt_actual_size, sz); - if (sz > 1) /* RSS */ - mlx5e_fill_indir_rqt_rqns(priv, rqtc); - else - mlx5e_fill_direct_rqt_rqn(priv, rqtc, ix); - MLX5_SET(modify_rqt_in, in, bitmask.rqn_list, 1); - + mlx5e_fill_rqt_rqns(priv, sz, rrp, rqtc); err = mlx5_core_modify_rqt(mdev, rqtn, in, inlen); kvfree(in); - return err; } -static void mlx5e_redirect_rqts(struct mlx5e_priv *priv) +static u32 mlx5e_get_direct_rqn(struct mlx5e_priv *priv, int ix, + struct mlx5e_redirect_rqt_param rrp) +{ + if (!rrp.is_rss) + return rrp.rqn; + + if (ix >= rrp.rss.channels->num) + return priv->drop_rq.rqn; + + return rrp.rss.channels->c[ix]->rq.rqn; +} + +static void mlx5e_redirect_rqts(struct mlx5e_priv *priv, + struct mlx5e_redirect_rqt_param rrp) { u32 rqtn; int ix; if (priv->indir_rqt.enabled) { + /* RSS RQ table */ rqtn = priv->indir_rqt.rqtn; - mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, 0); + mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, rrp); } - for (ix = 0; ix < priv->params.num_channels; ix++) { + for (ix = 0; ix < priv->profile->max_nch(priv->mdev); ix++) { + struct mlx5e_redirect_rqt_param direct_rrp = { + .is_rss = false, + { + .rqn = mlx5e_get_direct_rqn(priv, ix, rrp) + }, + }; + + /* Direct RQ Tables */ if (!priv->direct_tir[ix].rqt.enabled) continue; + rqtn = priv->direct_tir[ix].rqt.rqtn; - mlx5e_redirect_rqt(priv, rqtn, 1, ix); + mlx5e_redirect_rqt(priv, rqtn, 1, direct_rrp); } } -static void mlx5e_build_tir_ctx_lro(void *tirc, struct mlx5e_priv *priv) +static void mlx5e_redirect_rqts_to_channels(struct mlx5e_priv *priv, + struct mlx5e_channels *chs) +{ + struct mlx5e_redirect_rqt_param rrp = { + .is_rss = true, + { + .rss = { + .channels = chs, + .hfunc = chs->params.rss_hfunc, + } + }, + }; + + mlx5e_redirect_rqts(priv, rrp); +} + +static void mlx5e_redirect_rqts_to_drop(struct mlx5e_priv *priv) +{ + struct mlx5e_redirect_rqt_param drop_rrp = { + .is_rss = false, + { + .rqn = priv->drop_rq.rqn, + }, + }; + + mlx5e_redirect_rqts(priv, drop_rrp); +} + +static void mlx5e_build_tir_ctx_lro(struct mlx5e_params *params, void *tirc) { - if (!priv->params.lro_en) + if (!params->lro_en) return; #define ROUGH_MAX_L2_L3_HDR_SZ 256 @@ -2072,13 +2305,13 @@ static void mlx5e_build_tir_ctx_lro(void *tirc, struct mlx5e_priv *priv) MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO | MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO); MLX5_SET(tirc, tirc, lro_max_ip_payload_size, - (priv->params.lro_wqe_sz - - ROUGH_MAX_L2_L3_HDR_SZ) >> 8); - MLX5_SET(tirc, tirc, lro_timeout_period_usecs, priv->params.lro_timeout); + (params->lro_wqe_sz - ROUGH_MAX_L2_L3_HDR_SZ) >> 8); + MLX5_SET(tirc, tirc, lro_timeout_period_usecs, params->lro_timeout); } -void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_priv *priv, void *tirc, - enum mlx5e_traffic_types tt) +void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_params *params, + enum mlx5e_traffic_types tt, + void *tirc) { void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); @@ -2094,16 +2327,15 @@ void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_priv *priv, void *tirc, MLX5_HASH_FIELD_SEL_DST_IP |\ MLX5_HASH_FIELD_SEL_IPSEC_SPI) - MLX5_SET(tirc, tirc, rx_hash_fn, - mlx5e_rx_hash_fn(priv->params.rss_hfunc)); - if (priv->params.rss_hfunc == ETH_RSS_HASH_TOP) { + MLX5_SET(tirc, tirc, rx_hash_fn, mlx5e_rx_hash_fn(params->rss_hfunc)); + if (params->rss_hfunc == ETH_RSS_HASH_TOP) { void *rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key); size_t len = MLX5_FLD_SZ_BYTES(tirc, rx_hash_toeplitz_key); MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); - memcpy(rss_key, priv->params.toeplitz_hash_key, len); + memcpy(rss_key, params->toeplitz_hash_key, len); } switch (tt) { @@ -2208,7 +2440,7 @@ static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv) MLX5_SET(modify_tir_in, in, bitmask.lro, 1); tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx); - mlx5e_build_tir_ctx_lro(tirc, priv); + mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc); for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { err = mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in, @@ -2258,9 +2490,9 @@ static void mlx5e_query_mtu(struct mlx5e_priv *priv, u16 *mtu) *mtu = MLX5E_HW2SW_MTU(hw_mtu); } -static int mlx5e_set_dev_port_mtu(struct net_device *netdev) +static int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv) { - struct mlx5e_priv *priv = netdev_priv(netdev); + struct net_device *netdev = priv->netdev; u16 mtu; int err; @@ -2280,8 +2512,8 @@ static int mlx5e_set_dev_port_mtu(struct net_device *netdev) static void mlx5e_netdev_set_tcs(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); - int nch = priv->params.num_channels; - int ntc = priv->params.num_tc; + int nch = priv->channels.params.num_channels; + int ntc = priv->channels.params.num_tc; int tc; netdev_reset_tc(netdev); @@ -2298,53 +2530,116 @@ static void mlx5e_netdev_set_tcs(struct net_device *netdev) netdev_set_tc_queue(netdev, tc, nch, 0); } +static void mlx5e_build_channels_tx_maps(struct mlx5e_priv *priv) +{ + struct mlx5e_channel *c; + struct mlx5e_txqsq *sq; + int i, tc; + + for (i = 0; i < priv->channels.num; i++) + for (tc = 0; tc < priv->profile->max_tc; tc++) + priv->channel_tc2txq[i][tc] = i + tc * priv->channels.num; + + for (i = 0; i < priv->channels.num; i++) { + c = priv->channels.c[i]; + for (tc = 0; tc < c->num_tc; tc++) { + sq = &c->sq[tc]; + priv->txq2sq[sq->txq_ix] = sq; + } + } +} + +static bool mlx5e_is_eswitch_vport_mngr(struct mlx5_core_dev *mdev) +{ + return (MLX5_CAP_GEN(mdev, vport_group_manager) && + MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH); +} + +void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) +{ + int num_txqs = priv->channels.num * priv->channels.params.num_tc; + struct net_device *netdev = priv->netdev; + + mlx5e_netdev_set_tcs(netdev); + netif_set_real_num_tx_queues(netdev, num_txqs); + netif_set_real_num_rx_queues(netdev, priv->channels.num); + + mlx5e_build_channels_tx_maps(priv); + mlx5e_activate_channels(&priv->channels); + netif_tx_start_all_queues(priv->netdev); + + if (mlx5e_is_eswitch_vport_mngr(priv->mdev)) + mlx5e_add_sqs_fwd_rules(priv); + + mlx5e_wait_channels_min_rx_wqes(&priv->channels); + mlx5e_redirect_rqts_to_channels(priv, &priv->channels); +} + +void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv) +{ + mlx5e_redirect_rqts_to_drop(priv); + + if (mlx5e_is_eswitch_vport_mngr(priv->mdev)) + mlx5e_remove_sqs_fwd_rules(priv); + + /* FIXME: This is a W/A only for tx timeout watch dog false alarm when + * polling for inactive tx queues. + */ + netif_tx_stop_all_queues(priv->netdev); + netif_tx_disable(priv->netdev); + mlx5e_deactivate_channels(&priv->channels); +} + +void mlx5e_switch_priv_channels(struct mlx5e_priv *priv, + struct mlx5e_channels *new_chs, + mlx5e_fp_hw_modify hw_modify) +{ + struct net_device *netdev = priv->netdev; + int new_num_txqs; + + new_num_txqs = new_chs->num * new_chs->params.num_tc; + + netif_carrier_off(netdev); + + if (new_num_txqs < netdev->real_num_tx_queues) + netif_set_real_num_tx_queues(netdev, new_num_txqs); + + mlx5e_deactivate_priv_channels(priv); + mlx5e_close_channels(&priv->channels); + + priv->channels = *new_chs; + + /* New channels are ready to roll, modify HW settings if needed */ + if (hw_modify) + hw_modify(priv); + + mlx5e_refresh_tirs(priv, false); + mlx5e_activate_priv_channels(priv); + + mlx5e_update_carrier(priv); +} + int mlx5e_open_locked(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); - struct mlx5_core_dev *mdev = priv->mdev; - int num_txqs; int err; set_bit(MLX5E_STATE_OPENED, &priv->state); - mlx5e_netdev_set_tcs(netdev); - - num_txqs = priv->params.num_channels * priv->params.num_tc; - netif_set_real_num_tx_queues(netdev, num_txqs); - netif_set_real_num_rx_queues(netdev, priv->params.num_channels); - - err = mlx5e_open_channels(priv); - if (err) { - netdev_err(netdev, "%s: mlx5e_open_channels failed, %d\n", - __func__, err); + err = mlx5e_open_channels(priv, &priv->channels); + if (err) goto err_clear_state_opened_flag; - } - - err = mlx5e_refresh_tirs_self_loopback(priv->mdev, false); - if (err) { - netdev_err(netdev, "%s: mlx5e_refresh_tirs_self_loopback_enable failed, %d\n", - __func__, err); - goto err_close_channels; - } - mlx5e_redirect_rqts(priv); + mlx5e_refresh_tirs(priv, false); + mlx5e_activate_priv_channels(priv); mlx5e_update_carrier(priv); mlx5e_timestamp_init(priv); -#ifdef CONFIG_RFS_ACCEL - priv->netdev->rx_cpu_rmap = priv->mdev->rmap; -#endif + if (priv->profile->update_stats) queue_delayed_work(priv->wq, &priv->update_stats_work, 0); - if (MLX5_CAP_GEN(mdev, vport_group_manager)) { - err = mlx5e_add_sqs_fwd_rules(priv); - if (err) - goto err_close_channels; - } return 0; -err_close_channels: - mlx5e_close_channels(priv); err_clear_state_opened_flag: clear_bit(MLX5E_STATE_OPENED, &priv->state); return err; @@ -2365,7 +2660,6 @@ int mlx5e_open(struct net_device *netdev) int mlx5e_close_locked(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); - struct mlx5_core_dev *mdev = priv->mdev; /* May already be CLOSED in case a previous configuration operation * (e.g RX/TX queue size change) that involves close&open failed. @@ -2375,13 +2669,10 @@ int mlx5e_close_locked(struct net_device *netdev) clear_bit(MLX5E_STATE_OPENED, &priv->state); - if (MLX5_CAP_GEN(mdev, vport_group_manager)) - mlx5e_remove_sqs_fwd_rules(priv); - mlx5e_timestamp_cleanup(priv); netif_carrier_off(priv->netdev); - mlx5e_redirect_rqts(priv); - mlx5e_close_channels(priv); + mlx5e_deactivate_priv_channels(priv); + mlx5e_close_channels(&priv->channels); return 0; } @@ -2401,11 +2692,10 @@ int mlx5e_close(struct net_device *netdev) return err; } -static int mlx5e_create_drop_rq(struct mlx5e_priv *priv, - struct mlx5e_rq *rq, - struct mlx5e_rq_param *param) +static int mlx5e_alloc_drop_rq(struct mlx5_core_dev *mdev, + struct mlx5e_rq *rq, + struct mlx5e_rq_param *param) { - struct mlx5_core_dev *mdev = priv->mdev; void *rqc = param->rqc; void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); int err; @@ -2417,111 +2707,85 @@ static int mlx5e_create_drop_rq(struct mlx5e_priv *priv, if (err) return err; - rq->priv = priv; + rq->mdev = mdev; return 0; } -static int mlx5e_create_drop_cq(struct mlx5e_priv *priv, - struct mlx5e_cq *cq, - struct mlx5e_cq_param *param) +static int mlx5e_alloc_drop_cq(struct mlx5_core_dev *mdev, + struct mlx5e_cq *cq, + struct mlx5e_cq_param *param) { - struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5_core_cq *mcq = &cq->mcq; - int eqn_not_used; - unsigned int irqn; - int err; - - err = mlx5_cqwq_create(mdev, ¶m->wq, param->cqc, &cq->wq, - &cq->wq_ctrl); - if (err) - return err; - - mlx5_vector2eqn(mdev, param->eq_ix, &eqn_not_used, &irqn); - - mcq->cqe_sz = 64; - mcq->set_ci_db = cq->wq_ctrl.db.db; - mcq->arm_db = cq->wq_ctrl.db.db + 1; - *mcq->set_ci_db = 0; - *mcq->arm_db = 0; - mcq->vector = param->eq_ix; - mcq->comp = mlx5e_completion_event; - mcq->event = mlx5e_cq_error_event; - mcq->irqn = irqn; - - cq->priv = priv; - - return 0; + return mlx5e_alloc_cq_common(mdev, param, cq); } -static int mlx5e_open_drop_rq(struct mlx5e_priv *priv) +static int mlx5e_open_drop_rq(struct mlx5_core_dev *mdev, + struct mlx5e_rq *drop_rq) { - struct mlx5e_cq_param cq_param; - struct mlx5e_rq_param rq_param; - struct mlx5e_rq *rq = &priv->drop_rq; - struct mlx5e_cq *cq = &priv->drop_rq.cq; + struct mlx5e_cq_param cq_param = {}; + struct mlx5e_rq_param rq_param = {}; + struct mlx5e_cq *cq = &drop_rq->cq; int err; - memset(&cq_param, 0, sizeof(cq_param)); - memset(&rq_param, 0, sizeof(rq_param)); mlx5e_build_drop_rq_param(&rq_param); - err = mlx5e_create_drop_cq(priv, cq, &cq_param); + err = mlx5e_alloc_drop_cq(mdev, cq, &cq_param); if (err) return err; - err = mlx5e_enable_cq(cq, &cq_param); + err = mlx5e_create_cq(cq, &cq_param); if (err) - goto err_destroy_cq; + goto err_free_cq; - err = mlx5e_create_drop_rq(priv, rq, &rq_param); + err = mlx5e_alloc_drop_rq(mdev, drop_rq, &rq_param); if (err) - goto err_disable_cq; + goto err_destroy_cq; - err = mlx5e_enable_rq(rq, &rq_param); + err = mlx5e_create_rq(drop_rq, &rq_param); if (err) - goto err_destroy_rq; + goto err_free_rq; return 0; -err_destroy_rq: - mlx5e_destroy_rq(&priv->drop_rq); - -err_disable_cq: - mlx5e_disable_cq(&priv->drop_rq.cq); +err_free_rq: + mlx5e_free_rq(drop_rq); err_destroy_cq: - mlx5e_destroy_cq(&priv->drop_rq.cq); + mlx5e_destroy_cq(cq); + +err_free_cq: + mlx5e_free_cq(cq); return err; } -static void mlx5e_close_drop_rq(struct mlx5e_priv *priv) +static void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq) { - mlx5e_disable_rq(&priv->drop_rq); - mlx5e_destroy_rq(&priv->drop_rq); - mlx5e_disable_cq(&priv->drop_rq.cq); - mlx5e_destroy_cq(&priv->drop_rq.cq); + mlx5e_destroy_rq(drop_rq); + mlx5e_free_rq(drop_rq); + mlx5e_destroy_cq(&drop_rq->cq); + mlx5e_free_cq(&drop_rq->cq); } -static int mlx5e_create_tis(struct mlx5e_priv *priv, int tc) +int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc, + u32 underlay_qpn, u32 *tisn) { - struct mlx5_core_dev *mdev = priv->mdev; u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0}; void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); MLX5_SET(tisc, tisc, prio, tc << 1); + MLX5_SET(tisc, tisc, underlay_qpn, underlay_qpn); MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.td.tdn); if (mlx5_lag_is_lacp_owner(mdev)) MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1); - return mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc]); + return mlx5_core_create_tis(mdev, in, sizeof(in), tisn); } -static void mlx5e_destroy_tis(struct mlx5e_priv *priv, int tc) +void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn) { - mlx5_core_destroy_tis(priv->mdev, priv->tisn[tc]); + mlx5_core_destroy_tis(mdev, tisn); } int mlx5e_create_tises(struct mlx5e_priv *priv) @@ -2530,7 +2794,7 @@ int mlx5e_create_tises(struct mlx5e_priv *priv) int tc; for (tc = 0; tc < priv->profile->max_tc; tc++) { - err = mlx5e_create_tis(priv, tc); + err = mlx5e_create_tis(priv->mdev, tc, 0, &priv->tisn[tc]); if (err) goto err_close_tises; } @@ -2539,7 +2803,7 @@ int mlx5e_create_tises(struct mlx5e_priv *priv) err_close_tises: for (tc--; tc >= 0; tc--) - mlx5e_destroy_tis(priv, tc); + mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]); return err; } @@ -2549,34 +2813,34 @@ void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv) int tc; for (tc = 0; tc < priv->profile->max_tc; tc++) - mlx5e_destroy_tis(priv, tc); + mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]); } -static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, - enum mlx5e_traffic_types tt) +static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv, + enum mlx5e_traffic_types tt, + u32 *tirc) { MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn); - mlx5e_build_tir_ctx_lro(tirc, priv); + mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc); MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn); - mlx5e_build_indir_tir_ctx_hash(priv, tirc, tt); + mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc); } -static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, - u32 rqtn) +static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 rqtn, u32 *tirc) { MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn); - mlx5e_build_tir_ctx_lro(tirc, priv); + mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc); MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); MLX5_SET(tirc, tirc, indirect_table, rqtn); MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8); } -static int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv) +int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv) { struct mlx5e_tir *tir; void *tirc; @@ -2594,7 +2858,7 @@ static int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv) memset(in, 0, inlen); tir = &priv->indir_tir[tt]; tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); - mlx5e_build_indir_tir_ctx(priv, tirc, tt); + mlx5e_build_indir_tir_ctx(priv, tt, tirc); err = mlx5e_create_tir(priv->mdev, tir, in, inlen); if (err) goto err_destroy_tirs; @@ -2605,6 +2869,7 @@ static int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv) return 0; err_destroy_tirs: + mlx5_core_warn(priv->mdev, "create indirect tirs failed, %d\n", err); for (tt--; tt >= 0; tt--) mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[tt]); @@ -2632,8 +2897,7 @@ int mlx5e_create_direct_tirs(struct mlx5e_priv *priv) memset(in, 0, inlen); tir = &priv->direct_tir[ix]; tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); - mlx5e_build_direct_tir_ctx(priv, tirc, - priv->direct_tir[ix].rqt.rqtn); + mlx5e_build_direct_tir_ctx(priv, priv->direct_tir[ix].rqt.rqtn, tirc); err = mlx5e_create_tir(priv->mdev, tir, in, inlen); if (err) goto err_destroy_ch_tirs; @@ -2644,6 +2908,7 @@ int mlx5e_create_direct_tirs(struct mlx5e_priv *priv) return 0; err_destroy_ch_tirs: + mlx5_core_warn(priv->mdev, "create direct tirs failed, %d\n", err); for (ix--; ix >= 0; ix--) mlx5e_destroy_tir(priv->mdev, &priv->direct_tir[ix]); @@ -2652,7 +2917,7 @@ err_destroy_ch_tirs: return err; } -static void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv) +void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv) { int i; @@ -2669,16 +2934,27 @@ void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv) mlx5e_destroy_tir(priv->mdev, &priv->direct_tir[i]); } -int mlx5e_modify_rqs_vsd(struct mlx5e_priv *priv, bool vsd) +static int mlx5e_modify_channels_scatter_fcs(struct mlx5e_channels *chs, bool enable) { int err = 0; int i; - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) - return 0; + for (i = 0; i < chs->num; i++) { + err = mlx5e_modify_rq_scatter_fcs(&chs->c[i]->rq, enable); + if (err) + return err; + } - for (i = 0; i < priv->params.num_channels; i++) { - err = mlx5e_modify_rq_vsd(&priv->channel[i]->rq, vsd); + return 0; +} + +static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd) +{ + int err = 0; + int i; + + for (i = 0; i < chs->num; i++) { + err = mlx5e_modify_rq_vsd(&chs->c[i]->rq, vsd); if (err) return err; } @@ -2689,7 +2965,7 @@ int mlx5e_modify_rqs_vsd(struct mlx5e_priv *priv, bool vsd) static int mlx5e_setup_tc(struct net_device *netdev, u8 tc) { struct mlx5e_priv *priv = netdev_priv(netdev); - bool was_opened; + struct mlx5e_channels new_channels = {}; int err = 0; if (tc && tc != MLX5E_MAX_NUM_TC) @@ -2697,17 +2973,21 @@ static int mlx5e_setup_tc(struct net_device *netdev, u8 tc) mutex_lock(&priv->state_lock); - was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); - if (was_opened) - mlx5e_close_locked(priv->netdev); + new_channels.params = priv->channels.params; + new_channels.params.num_tc = tc ? tc : 1; - priv->params.num_tc = tc ? tc : 1; + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + priv->channels.params = new_channels.params; + goto out; + } - if (was_opened) - err = mlx5e_open_locked(priv->netdev); + err = mlx5e_open_channels(priv, &new_channels); + if (err) + goto out; + mlx5e_switch_priv_channels(priv, &new_channels, NULL); +out: mutex_unlock(&priv->state_lock); - return err; } @@ -2737,7 +3017,9 @@ mqprio: if (tc->type != TC_SETUP_MQPRIO) return -EINVAL; - return mlx5e_setup_tc(dev, tc->tc); + tc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; + + return mlx5e_setup_tc(dev, tc->mqprio->num_tc); } static void @@ -2822,26 +3104,31 @@ typedef int (*mlx5e_feature_handler)(struct net_device *netdev, bool enable); static int set_feature_lro(struct net_device *netdev, bool enable) { struct mlx5e_priv *priv = netdev_priv(netdev); - bool was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); - int err; + struct mlx5e_channels new_channels = {}; + int err = 0; + bool reset; mutex_lock(&priv->state_lock); - if (was_opened && (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST)) - mlx5e_close_locked(priv->netdev); + reset = (priv->channels.params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST); + reset = reset && test_bit(MLX5E_STATE_OPENED, &priv->state); - priv->params.lro_en = enable; - err = mlx5e_modify_tirs_lro(priv); - if (err) { - netdev_err(netdev, "lro modify failed, %d\n", err); - priv->params.lro_en = !enable; + new_channels.params = priv->channels.params; + new_channels.params.lro_en = enable; + + if (!reset) { + priv->channels.params = new_channels.params; + err = mlx5e_modify_tirs_lro(priv); + goto out; } - if (was_opened && (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST)) - mlx5e_open_locked(priv->netdev); + err = mlx5e_open_channels(priv, &new_channels); + if (err) + goto out; + mlx5e_switch_priv_channels(priv, &new_channels, mlx5e_modify_tirs_lro); +out: mutex_unlock(&priv->state_lock); - return err; } @@ -2878,23 +3165,44 @@ static int set_feature_rx_all(struct net_device *netdev, bool enable) return mlx5_set_port_fcs(mdev, !enable); } -static int set_feature_rx_vlan(struct net_device *netdev, bool enable) +static int set_feature_rx_fcs(struct net_device *netdev, bool enable) { struct mlx5e_priv *priv = netdev_priv(netdev); int err; mutex_lock(&priv->state_lock); - priv->params.vlan_strip_disable = !enable; - err = mlx5e_modify_rqs_vsd(priv, !enable); + priv->channels.params.scatter_fcs_en = enable; + err = mlx5e_modify_channels_scatter_fcs(&priv->channels, enable); if (err) - priv->params.vlan_strip_disable = enable; + priv->channels.params.scatter_fcs_en = !enable; mutex_unlock(&priv->state_lock); return err; } +static int set_feature_rx_vlan(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err = 0; + + mutex_lock(&priv->state_lock); + + priv->channels.params.vlan_strip_disable = !enable; + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto unlock; + + err = mlx5e_modify_channels_vsd(&priv->channels, !enable); + if (err) + priv->channels.params.vlan_strip_disable = enable; + +unlock: + mutex_unlock(&priv->state_lock); + + return err; +} + #ifdef CONFIG_RFS_ACCEL static int set_feature_arfs(struct net_device *netdev, bool enable) { @@ -2947,6 +3255,8 @@ static int mlx5e_set_features(struct net_device *netdev, set_feature_tc_num_filters); err |= mlx5e_handle_feature(netdev, features, NETIF_F_RXALL, set_feature_rx_all); + err |= mlx5e_handle_feature(netdev, features, NETIF_F_RXFCS, + set_feature_rx_fcs); err |= mlx5e_handle_feature(netdev, features, NETIF_F_HW_VLAN_CTAG_RX, set_feature_rx_vlan); #ifdef CONFIG_RFS_ACCEL @@ -2960,28 +3270,38 @@ static int mlx5e_set_features(struct net_device *netdev, static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu) { struct mlx5e_priv *priv = netdev_priv(netdev); - bool was_opened; + struct mlx5e_channels new_channels = {}; + int curr_mtu; int err = 0; bool reset; mutex_lock(&priv->state_lock); - reset = !priv->params.lro_en && - (priv->params.rq_wq_type != + reset = !priv->channels.params.lro_en && + (priv->channels.params.rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ); - was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); - if (was_opened && reset) - mlx5e_close_locked(netdev); + reset = reset && test_bit(MLX5E_STATE_OPENED, &priv->state); + curr_mtu = netdev->mtu; netdev->mtu = new_mtu; - mlx5e_set_dev_port_mtu(netdev); - if (was_opened && reset) - err = mlx5e_open_locked(netdev); + if (!reset) { + mlx5e_set_dev_port_mtu(priv); + goto out; + } - mutex_unlock(&priv->state_lock); + new_channels.params = priv->channels.params; + err = mlx5e_open_channels(priv, &new_channels); + if (err) { + netdev->mtu = curr_mtu; + goto out; + } + + mlx5e_switch_priv_channels(priv, &new_channels, mlx5e_set_dev_port_mtu); +out: + mutex_unlock(&priv->state_lock); return err; } @@ -3186,8 +3506,8 @@ static void mlx5e_tx_timeout(struct net_device *dev) netdev_err(dev, "TX timeout detected\n"); - for (i = 0; i < priv->params.num_channels * priv->params.num_tc; i++) { - struct mlx5e_sq *sq = priv->txq_to_sq_map[i]; + for (i = 0; i < priv->channels.num * priv->channels.params.num_tc; i++) { + struct mlx5e_txqsq *sq = priv->txq2sq[i]; if (!netif_xmit_stopped(netdev_get_tx_queue(dev, i))) continue; @@ -3219,7 +3539,7 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); /* no need for full reset when exchanging programs */ - reset = (!priv->xdp_prog || !prog); + reset = (!priv->channels.params.xdp_prog || !prog); if (was_opened && reset) mlx5e_close_locked(netdev); @@ -3227,7 +3547,7 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) /* num_channels is invariant here, so we can take the * batched reference right upfront. */ - prog = bpf_prog_add(prog, priv->params.num_channels); + prog = bpf_prog_add(prog, priv->channels.num); if (IS_ERR(prog)) { err = PTR_ERR(prog); goto unlock; @@ -3237,12 +3557,12 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) /* exchange programs, extra prog reference we got from caller * as long as we don't fail from this point onwards. */ - old_prog = xchg(&priv->xdp_prog, prog); + old_prog = xchg(&priv->channels.params.xdp_prog, prog); if (old_prog) bpf_prog_put(old_prog); if (reset) /* change RQ type according to priv->xdp_prog */ - mlx5e_set_rq_priv_params(priv); + mlx5e_set_rq_params(priv->mdev, &priv->channels.params); if (was_opened && reset) mlx5e_open_locked(netdev); @@ -3253,8 +3573,8 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) /* exchanging programs w/o reset, we update ref counts on behalf * of the channels RQs here. */ - for (i = 0; i < priv->params.num_channels; i++) { - struct mlx5e_channel *c = priv->channel[i]; + for (i = 0; i < priv->channels.num; i++) { + struct mlx5e_channel *c = priv->channels.c[i]; clear_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state); napi_synchronize(&c->napi); @@ -3280,7 +3600,7 @@ static bool mlx5e_xdp_attached(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); - return !!priv->xdp_prog; + return !!priv->channels.params.xdp_prog; } static int mlx5e_xdp(struct net_device *dev, struct netdev_xdp *xdp) @@ -3303,10 +3623,12 @@ static int mlx5e_xdp(struct net_device *dev, struct netdev_xdp *xdp) static void mlx5e_netpoll(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_channels *chs = &priv->channels; + int i; - for (i = 0; i < priv->params.num_channels; i++) - napi_schedule(&priv->channel[i]->napi); + for (i = 0; i < chs->num; i++) + napi_schedule(&chs->c[i]->napi); } #endif @@ -3463,6 +3785,12 @@ static bool cqe_compress_heuristic(u32 link_speed, u32 pci_bw) (pci_bw < 40000) && (pci_bw < link_speed)); } +static bool hw_lro_heuristic(u32 link_speed, u32 pci_bw) +{ + return !(link_speed && pci_bw && + (pci_bw <= 16000) && (pci_bw < link_speed)); +} + void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) { params->rx_cq_period_mode = cq_period_mode; @@ -3475,6 +3803,13 @@ void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE) params->rx_cq_moderation.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE; + + if (params->rx_am_enabled) + params->rx_cq_moderation = + mlx5e_am_get_def_profile(params->rx_cq_period_mode); + + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_BASED_MODER, + params->rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE); } u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout) @@ -3489,75 +3824,81 @@ u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout) return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]); } -static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, - struct net_device *netdev, - const struct mlx5e_profile *profile, - void *ppriv) +void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + u16 max_channels) { - struct mlx5e_priv *priv = netdev_priv(netdev); + u8 cq_period_mode = 0; u32 link_speed = 0; u32 pci_bw = 0; - u8 cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? - MLX5_CQ_PERIOD_MODE_START_FROM_CQE : - MLX5_CQ_PERIOD_MODE_START_FROM_EQE; - priv->mdev = mdev; - priv->netdev = netdev; - priv->params.num_channels = profile->max_nch(mdev); - priv->profile = profile; - priv->ppriv = ppriv; + params->num_channels = max_channels; + params->num_tc = 1; - priv->params.lro_timeout = - mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT); + mlx5e_get_max_linkspeed(mdev, &link_speed); + mlx5e_get_pci_bw(mdev, &pci_bw); + mlx5_core_dbg(mdev, "Max link speed = %d, PCI BW = %d\n", + link_speed, pci_bw); - priv->params.log_sq_size = is_kdump_kernel() ? + /* SQ */ + params->log_sq_size = is_kdump_kernel() ? MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE : MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; /* set CQE compression */ - priv->params.rx_cqe_compress_def = false; + params->rx_cqe_compress_def = false; if (MLX5_CAP_GEN(mdev, cqe_compression) && - MLX5_CAP_GEN(mdev, vport_group_manager)) { - mlx5e_get_max_linkspeed(mdev, &link_speed); - mlx5e_get_pci_bw(mdev, &pci_bw); - mlx5_core_dbg(mdev, "Max link speed = %d, PCI BW = %d\n", - link_speed, pci_bw); - priv->params.rx_cqe_compress_def = - cqe_compress_heuristic(link_speed, pci_bw); - } - - MLX5E_SET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS, - priv->params.rx_cqe_compress_def); - - mlx5e_set_rq_priv_params(priv); - if (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) - priv->params.lro_en = true; - - priv->params.rx_am_enabled = MLX5_CAP_GEN(mdev, cq_moderation); - mlx5e_set_rx_cq_mode_params(&priv->params, cq_period_mode); - - priv->params.tx_cq_moderation.usec = - MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC; - priv->params.tx_cq_moderation.pkts = - MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; - priv->params.tx_max_inline = mlx5e_get_max_inline_cap(mdev); - mlx5_query_min_inline(mdev, &priv->params.tx_min_inline_mode); - if (priv->params.tx_min_inline_mode == MLX5_INLINE_MODE_NONE && + MLX5_CAP_GEN(mdev, vport_group_manager)) + params->rx_cqe_compress_def = cqe_compress_heuristic(link_speed, pci_bw); + + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def); + + /* RQ */ + mlx5e_set_rq_params(mdev, params); + + /* HW LRO */ + /* TODO: && MLX5_CAP_ETH(mdev, lro_cap) */ + if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) + params->lro_en = hw_lro_heuristic(link_speed, pci_bw); + params->lro_timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT); + + /* CQ moderation params */ + cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? + MLX5_CQ_PERIOD_MODE_START_FROM_CQE : + MLX5_CQ_PERIOD_MODE_START_FROM_EQE; + params->rx_am_enabled = MLX5_CAP_GEN(mdev, cq_moderation); + mlx5e_set_rx_cq_mode_params(params, cq_period_mode); + + params->tx_cq_moderation.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC; + params->tx_cq_moderation.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; + + /* TX inline */ + params->tx_max_inline = mlx5e_get_max_inline_cap(mdev); + mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); + if (params->tx_min_inline_mode == MLX5_INLINE_MODE_NONE && !MLX5_CAP_ETH(mdev, wqe_vlan_insert)) - priv->params.tx_min_inline_mode = MLX5_INLINE_MODE_L2; + params->tx_min_inline_mode = MLX5_INLINE_MODE_L2; - priv->params.num_tc = 1; - priv->params.rss_hfunc = ETH_RSS_HASH_XOR; + /* RSS */ + params->rss_hfunc = ETH_RSS_HASH_XOR; + netdev_rss_key_fill(params->toeplitz_hash_key, sizeof(params->toeplitz_hash_key)); + mlx5e_build_default_indir_rqt(mdev, params->indirection_rqt, + MLX5E_INDIR_RQT_SIZE, max_channels); +} - netdev_rss_key_fill(priv->params.toeplitz_hash_key, - sizeof(priv->params.toeplitz_hash_key)); +static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, + void *ppriv) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); - mlx5e_build_default_indir_rqt(mdev, priv->params.indirection_rqt, - MLX5E_INDIR_RQT_SIZE, profile->max_nch(mdev)); + priv->mdev = mdev; + priv->netdev = netdev; + priv->profile = profile; + priv->ppriv = ppriv; - /* Initialize pflags */ - MLX5E_SET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_BASED_MODER, - priv->params.rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE); + mlx5e_build_nic_params(mdev, &priv->channels.params, profile->max_nch(mdev)); mutex_init(&priv->state_lock); @@ -3642,13 +3983,19 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) if (fcs_supported) netdev->hw_features |= NETIF_F_RXALL; + if (MLX5_CAP_ETH(mdev, scatter_fcs)) + netdev->hw_features |= NETIF_F_RXFCS; + netdev->features = netdev->hw_features; - if (!priv->params.lro_en) + if (!priv->channels.params.lro_en) netdev->features &= ~NETIF_F_LRO; if (fcs_enabled) netdev->features &= ~NETIF_F_RXALL; + if (!priv->channels.params.scatter_fcs_en) + netdev->features &= ~NETIF_F_RXFCS; + #define FT_CAP(f) MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.f) if (FT_CAP(flow_modify_en) && FT_CAP(modify_root) && @@ -3708,39 +4055,30 @@ static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) { mlx5e_vxlan_cleanup(priv); - if (priv->xdp_prog) - bpf_prog_put(priv->xdp_prog); + if (priv->channels.params.xdp_prog) + bpf_prog_put(priv->channels.params.xdp_prog); } static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; int err; - int i; - err = mlx5e_create_indirect_rqts(priv); - if (err) { - mlx5_core_warn(mdev, "create indirect rqts failed, %d\n", err); + err = mlx5e_create_indirect_rqt(priv); + if (err) return err; - } err = mlx5e_create_direct_rqts(priv); - if (err) { - mlx5_core_warn(mdev, "create direct rqts failed, %d\n", err); + if (err) goto err_destroy_indirect_rqts; - } err = mlx5e_create_indirect_tirs(priv); - if (err) { - mlx5_core_warn(mdev, "create indirect tirs failed, %d\n", err); + if (err) goto err_destroy_direct_rqts; - } err = mlx5e_create_direct_tirs(priv); - if (err) { - mlx5_core_warn(mdev, "create direct tirs failed, %d\n", err); + if (err) goto err_destroy_indirect_tirs; - } err = mlx5e_create_flow_steering(priv); if (err) { @@ -3761,8 +4099,7 @@ err_destroy_direct_tirs: err_destroy_indirect_tirs: mlx5e_destroy_indirect_tirs(priv); err_destroy_direct_rqts: - for (i = 0; i < priv->profile->max_nch(mdev); i++) - mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt); + mlx5e_destroy_direct_rqts(priv); err_destroy_indirect_rqts: mlx5e_destroy_rqt(priv, &priv->indir_rqt); return err; @@ -3770,14 +4107,11 @@ err_destroy_indirect_rqts: static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv) { - int i; - mlx5e_tc_cleanup(priv); mlx5e_destroy_flow_steering(priv); mlx5e_destroy_direct_tirs(priv); mlx5e_destroy_indirect_tirs(priv); - for (i = 0; i < priv->profile->max_nch(priv->mdev); i++) - mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt); + mlx5e_destroy_direct_rqts(priv); mlx5e_destroy_rqt(priv, &priv->indir_rqt); } @@ -3801,21 +4135,22 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) { struct net_device *netdev = priv->netdev; struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5_eswitch *esw = mdev->priv.eswitch; - struct mlx5_eswitch_rep rep; + u16 max_mtu; + + mlx5e_init_l2_addr(priv); + + /* MTU range: 68 - hw-specific max */ + netdev->min_mtu = ETH_MIN_MTU; + mlx5_query_port_max_mtu(priv->mdev, &max_mtu, 1); + netdev->max_mtu = MLX5E_HW2SW_MTU(max_mtu); + mlx5e_set_dev_port_mtu(priv); mlx5_lag_add(mdev, netdev); mlx5e_enable_async_events(priv); - if (MLX5_CAP_GEN(mdev, vport_group_manager)) { - mlx5_query_nic_vport_mac_address(mdev, 0, rep.hw_id); - rep.load = mlx5e_nic_rep_load; - rep.unload = mlx5e_nic_rep_unload; - rep.vport = FDB_UPLINK_VPORT; - rep.netdev = netdev; - mlx5_eswitch_register_vport_rep(esw, 0, &rep); - } + if (MLX5_CAP_GEN(mdev, vport_group_manager)) + mlx5e_register_vport_reps(priv); if (netdev->reg_state != NETREG_REGISTERED) return; @@ -3828,16 +4163,29 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) } queue_work(priv->wq, &priv->set_rx_mode_work); + + rtnl_lock(); + if (netif_running(netdev)) + mlx5e_open(netdev); + netif_device_attach(netdev); + rtnl_unlock(); } static void mlx5e_nic_disable(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5_eswitch *esw = mdev->priv.eswitch; + + rtnl_lock(); + if (netif_running(priv->netdev)) + mlx5e_close(priv->netdev); + netif_device_detach(priv->netdev); + rtnl_unlock(); queue_work(priv->wq, &priv->set_rx_mode_work); + if (MLX5_CAP_GEN(mdev, vport_group_manager)) - mlx5_eswitch_unregister_vport_rep(esw, 0); + mlx5e_unregister_vport_reps(priv); + mlx5e_disable_async_events(priv); mlx5_lag_remove(mdev); } @@ -3853,9 +4201,13 @@ static const struct mlx5e_profile mlx5e_nic_profile = { .disable = mlx5e_nic_disable, .update_stats = mlx5e_update_stats, .max_nch = mlx5e_get_max_num_channels, + .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe, + .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq, .max_tc = MLX5E_MAX_NUM_TC, }; +/* mlx5e generic netdev management API (move to en_common.c) */ + struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile, void *ppriv) @@ -3872,6 +4224,10 @@ struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev, return NULL; } +#ifdef CONFIG_RFS_ACCEL + netdev->rx_cpu_rmap = mdev->rmap; +#endif + profile->init(mdev, netdev, profile, ppriv); netif_carrier_off(netdev); @@ -3891,14 +4247,12 @@ err_cleanup_nic: return NULL; } -int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev) +int mlx5e_attach_netdev(struct mlx5e_priv *priv) { + struct mlx5_core_dev *mdev = priv->mdev; const struct mlx5e_profile *profile; - struct mlx5e_priv *priv; - u16 max_mtu; int err; - priv = netdev_priv(netdev); profile = priv->profile; clear_bit(MLX5E_STATE_DESTROYING, &priv->state); @@ -3906,7 +4260,7 @@ int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev) if (err) goto out; - err = mlx5e_open_drop_rq(priv); + err = mlx5e_open_drop_rq(mdev, &priv->drop_rq); if (err) { mlx5_core_err(mdev, "open drop rq failed, %d\n", err); goto err_cleanup_tx; @@ -3918,28 +4272,13 @@ int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev) mlx5e_create_q_counter(priv); - mlx5e_init_l2_addr(priv); - - /* MTU range: 68 - hw-specific max */ - netdev->min_mtu = ETH_MIN_MTU; - mlx5_query_port_max_mtu(priv->mdev, &max_mtu, 1); - netdev->max_mtu = MLX5E_HW2SW_MTU(max_mtu); - - mlx5e_set_dev_port_mtu(netdev); - if (profile->enable) profile->enable(priv); - rtnl_lock(); - if (netif_running(netdev)) - mlx5e_open(netdev); - netif_device_attach(netdev); - rtnl_unlock(); - return 0; err_close_drop_rq: - mlx5e_close_drop_rq(priv); + mlx5e_close_drop_rq(&priv->drop_rq); err_cleanup_tx: profile->cleanup_tx(priv); @@ -3948,66 +4287,34 @@ out: return err; } -static void mlx5e_register_vport_rep(struct mlx5_core_dev *mdev) -{ - struct mlx5_eswitch *esw = mdev->priv.eswitch; - int total_vfs = MLX5_TOTAL_VPORTS(mdev); - int vport; - u8 mac[ETH_ALEN]; - - if (!MLX5_CAP_GEN(mdev, vport_group_manager)) - return; - - mlx5_query_nic_vport_mac_address(mdev, 0, mac); - - for (vport = 1; vport < total_vfs; vport++) { - struct mlx5_eswitch_rep rep; - - rep.load = mlx5e_vport_rep_load; - rep.unload = mlx5e_vport_rep_unload; - rep.vport = vport; - ether_addr_copy(rep.hw_id, mac); - mlx5_eswitch_register_vport_rep(esw, vport, &rep); - } -} - -static void mlx5e_unregister_vport_rep(struct mlx5_core_dev *mdev) -{ - struct mlx5_eswitch *esw = mdev->priv.eswitch; - int total_vfs = MLX5_TOTAL_VPORTS(mdev); - int vport; - - if (!MLX5_CAP_GEN(mdev, vport_group_manager)) - return; - - for (vport = 1; vport < total_vfs; vport++) - mlx5_eswitch_unregister_vport_rep(esw, vport); -} - -void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev) +void mlx5e_detach_netdev(struct mlx5e_priv *priv) { - struct mlx5e_priv *priv = netdev_priv(netdev); const struct mlx5e_profile *profile = priv->profile; set_bit(MLX5E_STATE_DESTROYING, &priv->state); - rtnl_lock(); - if (netif_running(netdev)) - mlx5e_close(netdev); - netif_device_detach(netdev); - rtnl_unlock(); - if (profile->disable) profile->disable(priv); flush_workqueue(priv->wq); mlx5e_destroy_q_counter(priv); profile->cleanup_rx(priv); - mlx5e_close_drop_rq(priv); + mlx5e_close_drop_rq(&priv->drop_rq); profile->cleanup_tx(priv); cancel_delayed_work_sync(&priv->update_stats_work); } +void mlx5e_destroy_netdev(struct mlx5e_priv *priv) +{ + const struct mlx5e_profile *profile = priv->profile; + struct net_device *netdev = priv->netdev; + + destroy_workqueue(priv->wq); + if (profile->cleanup) + profile->cleanup(priv); + free_netdev(netdev); +} + /* mlx5e_attach and mlx5e_detach scope should be only creating/destroying * hardware contexts and to connect it to the current netdev. */ @@ -4024,13 +4331,12 @@ static int mlx5e_attach(struct mlx5_core_dev *mdev, void *vpriv) if (err) return err; - err = mlx5e_attach_netdev(mdev, netdev); + err = mlx5e_attach_netdev(priv); if (err) { mlx5e_destroy_mdev_resources(mdev); return err; } - mlx5e_register_vport_rep(mdev); return 0; } @@ -4042,8 +4348,7 @@ static void mlx5e_detach(struct mlx5_core_dev *mdev, void *vpriv) if (!netif_device_present(netdev)) return; - mlx5e_unregister_vport_rep(mdev); - mlx5e_detach_netdev(mdev, netdev); + mlx5e_detach_netdev(priv); mlx5e_destroy_mdev_resources(mdev); } @@ -4051,7 +4356,7 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev) { struct mlx5_eswitch *esw = mdev->priv.eswitch; int total_vfs = MLX5_TOTAL_VPORTS(mdev); - void *ppriv = NULL; + struct mlx5e_rep_priv *rpriv = NULL; void *priv; int vport; int err; @@ -4061,10 +4366,17 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev) if (err) return NULL; - if (MLX5_CAP_GEN(mdev, vport_group_manager)) - ppriv = &esw->offloads.vport_reps[0]; + if (MLX5_CAP_GEN(mdev, vport_group_manager)) { + rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL); + if (!rpriv) { + mlx5_core_warn(mdev, + "Not creating net device, Failed to alloc rep priv data\n"); + return NULL; + } + rpriv->rep = &esw->offloads.vport_reps[0]; + } - netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, ppriv); + netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, rpriv); if (!netdev) { mlx5_core_err(mdev, "mlx5e_create_netdev failed\n"); goto err_unregister_reps; @@ -4090,33 +4402,25 @@ err_detach: mlx5e_detach(mdev, priv); err_destroy_netdev: - mlx5e_destroy_netdev(mdev, priv); + mlx5e_destroy_netdev(priv); err_unregister_reps: for (vport = 1; vport < total_vfs; vport++) mlx5_eswitch_unregister_vport_rep(esw, vport); + kfree(rpriv); return NULL; } -void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv) -{ - const struct mlx5e_profile *profile = priv->profile; - struct net_device *netdev = priv->netdev; - - destroy_workqueue(priv->wq); - if (profile->cleanup) - profile->cleanup(priv); - free_netdev(netdev); -} - static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv) { struct mlx5e_priv *priv = vpriv; + void *ppriv = priv->ppriv; unregister_netdev(priv->netdev); mlx5e_detach(mdev, vpriv); - mlx5e_destroy_netdev(mdev, priv); + mlx5e_destroy_netdev(priv); + kfree(ppriv); } static void *mlx5e_get_netdev(void *vpriv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index f621373bd7a5..79462c0368a0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -34,10 +34,14 @@ #include <linux/mlx5/fs.h> #include <net/switchdev.h> #include <net/pkt_cls.h> +#include <net/netevent.h> +#include <net/arp.h> #include "eswitch.h" #include "en.h" +#include "en_rep.h" #include "en_tc.h" +#include "fs_core.h" static const char mlx5e_rep_driver_name[] = "mlx5e_rep"; @@ -75,7 +79,8 @@ static void mlx5e_rep_get_strings(struct net_device *dev, static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; struct rtnl_link_stats64 *vport_stats; struct ifla_vf_stats vf_stats; int err; @@ -102,14 +107,16 @@ static void mlx5e_rep_update_sw_counters(struct mlx5e_priv *priv) int i, j; memset(s, 0, sizeof(*s)); - for (i = 0; i < priv->params.num_channels; i++) { - rq_stats = &priv->channel[i]->rq.stats; + for (i = 0; i < priv->channels.num; i++) { + struct mlx5e_channel *c = priv->channels.c[i]; + + rq_stats = &c->rq.stats; s->rx_packets += rq_stats->packets; s->rx_bytes += rq_stats->bytes; - for (j = 0; j < priv->params.num_tc; j++) { - sq_stats = &priv->channel[i]->sq[j].stats; + for (j = 0; j < priv->channels.params.num_tc; j++) { + sq_stats = &c->sq[j].stats; s->tx_packets += sq_stats->packets; s->tx_bytes += sq_stats->bytes; @@ -163,7 +170,8 @@ static const struct ethtool_ops mlx5e_rep_ethtool_ops = { int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; if (esw->mode == SRIOV_NONE) @@ -182,66 +190,426 @@ int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr) } int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv) - { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; struct mlx5e_channel *c; - int n, tc, err, num_sqs = 0; + int n, tc, num_sqs = 0; + int err = -ENOMEM; u16 *sqs; - sqs = kcalloc(priv->params.num_channels * priv->params.num_tc, sizeof(u16), GFP_KERNEL); + sqs = kcalloc(priv->channels.num * priv->channels.params.num_tc, sizeof(u16), GFP_KERNEL); if (!sqs) - return -ENOMEM; + goto out; - for (n = 0; n < priv->params.num_channels; n++) { - c = priv->channel[n]; + for (n = 0; n < priv->channels.num; n++) { + c = priv->channels.c[n]; for (tc = 0; tc < c->num_tc; tc++) sqs[num_sqs++] = c->sq[tc].sqn; } err = mlx5_eswitch_sqs2vport_start(esw, rep, sqs, num_sqs); - kfree(sqs); + +out: + if (err) + netdev_warn(priv->netdev, "Failed to add SQs FWD rules %d\n", err); return err; } -int mlx5e_nic_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) +void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv) { - struct net_device *netdev = rep->netdev; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + + mlx5_eswitch_sqs2vport_stop(esw, rep); +} + +static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv) +{ +#if IS_ENABLED(CONFIG_IPV6) + unsigned long ipv6_interval = NEIGH_VAR(&ipv6_stub->nd_tbl->parms, + DELAY_PROBE_TIME); +#else + unsigned long ipv6_interval = ~0UL; +#endif + unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms, + DELAY_PROBE_TIME); + struct net_device *netdev = rpriv->rep->netdev; struct mlx5e_priv *priv = netdev_priv(netdev); - if (test_bit(MLX5E_STATE_OPENED, &priv->state)) - return mlx5e_add_sqs_fwd_rules(priv); - return 0; + rpriv->neigh_update.min_interval = min_t(unsigned long, ipv6_interval, ipv4_interval); + mlx5_fc_update_sampling_interval(priv->mdev, rpriv->neigh_update.min_interval); } -void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv) +void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv) { - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; - mlx5_eswitch_sqs2vport_stop(esw, rep); + mlx5_fc_queue_stats_work(priv->mdev, + &neigh_update->neigh_stats_work, + neigh_update->min_interval); } -void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep *rep) +static void mlx5e_rep_neigh_stats_work(struct work_struct *work) { - struct net_device *netdev = rep->netdev; + struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv, + neigh_update.neigh_stats_work.work); + struct net_device *netdev = rpriv->rep->netdev; struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_neigh_hash_entry *nhe; - if (test_bit(MLX5E_STATE_OPENED, &priv->state)) - mlx5e_remove_sqs_fwd_rules(priv); + rtnl_lock(); + if (!list_empty(&rpriv->neigh_update.neigh_list)) + mlx5e_rep_queue_neigh_stats_work(priv); - /* clean (and re-init) existing uplink offloaded TC rules */ - mlx5e_tc_cleanup(priv); - mlx5e_tc_init(priv); + list_for_each_entry(nhe, &rpriv->neigh_update.neigh_list, neigh_list) + mlx5e_tc_update_neigh_used_value(nhe); + + rtnl_unlock(); +} + +static void mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe) +{ + refcount_inc(&nhe->refcnt); +} + +static void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe) +{ + if (refcount_dec_and_test(&nhe->refcnt)) + kfree(nhe); +} + +static void mlx5e_rep_update_flows(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + bool neigh_connected, + unsigned char ha[ETH_ALEN]) +{ + struct ethhdr *eth = (struct ethhdr *)e->encap_header; + + ASSERT_RTNL(); + + if ((!neigh_connected && (e->flags & MLX5_ENCAP_ENTRY_VALID)) || + !ether_addr_equal(e->h_dest, ha)) + mlx5e_tc_encap_flows_del(priv, e); + + if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) { + ether_addr_copy(e->h_dest, ha); + ether_addr_copy(eth->h_dest, ha); + + mlx5e_tc_encap_flows_add(priv, e); + } +} + +static void mlx5e_rep_neigh_update(struct work_struct *work) +{ + struct mlx5e_neigh_hash_entry *nhe = + container_of(work, struct mlx5e_neigh_hash_entry, neigh_update_work); + struct neighbour *n = nhe->n; + struct mlx5e_encap_entry *e; + unsigned char ha[ETH_ALEN]; + struct mlx5e_priv *priv; + bool neigh_connected; + bool encap_connected; + u8 nud_state, dead; + + rtnl_lock(); + + /* If these parameters are changed after we release the lock, + * we'll receive another event letting us know about it. + * We use this lock to avoid inconsistency between the neigh validity + * and it's hw address. + */ + read_lock_bh(&n->lock); + memcpy(ha, n->ha, ETH_ALEN); + nud_state = n->nud_state; + dead = n->dead; + read_unlock_bh(&n->lock); + + neigh_connected = (nud_state & NUD_VALID) && !dead; + + list_for_each_entry(e, &nhe->encap_list, encap_list) { + encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID); + priv = netdev_priv(e->out_dev); + + if (encap_connected != neigh_connected || + !ether_addr_equal(e->h_dest, ha)) + mlx5e_rep_update_flows(priv, e, neigh_connected, ha); + } + mlx5e_rep_neigh_entry_release(nhe); + rtnl_unlock(); + neigh_release(n); +} + +static struct mlx5e_neigh_hash_entry * +mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, + struct mlx5e_neigh *m_neigh); + +static int mlx5e_rep_netevent_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv, + neigh_update.netevent_nb); + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; + struct net_device *netdev = rpriv->rep->netdev; + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_neigh_hash_entry *nhe = NULL; + struct mlx5e_neigh m_neigh = {}; + struct neigh_parms *p; + struct neighbour *n; + bool found = false; + + switch (event) { + case NETEVENT_NEIGH_UPDATE: + n = ptr; +#if IS_ENABLED(CONFIG_IPV6) + if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl) +#else + if (n->tbl != &arp_tbl) +#endif + return NOTIFY_DONE; + + m_neigh.dev = n->dev; + m_neigh.family = n->ops->family; + memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len); + + /* We are in atomic context and can't take RTNL mutex, so use + * spin_lock_bh to lookup the neigh table. bh is used since + * netevent can be called from a softirq context. + */ + spin_lock_bh(&neigh_update->encap_lock); + nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh); + if (!nhe) { + spin_unlock_bh(&neigh_update->encap_lock); + return NOTIFY_DONE; + } + + /* This assignment is valid as long as the the neigh reference + * is taken + */ + nhe->n = n; + + /* Take a reference to ensure the neighbour and mlx5 encap + * entry won't be destructed until we drop the reference in + * delayed work. + */ + neigh_hold(n); + mlx5e_rep_neigh_entry_hold(nhe); + + if (!queue_work(priv->wq, &nhe->neigh_update_work)) { + mlx5e_rep_neigh_entry_release(nhe); + neigh_release(n); + } + spin_unlock_bh(&neigh_update->encap_lock); + break; + + case NETEVENT_DELAY_PROBE_TIME_UPDATE: + p = ptr; + + /* We check the device is present since we don't care about + * changes in the default table, we only care about changes + * done per device delay prob time parameter. + */ +#if IS_ENABLED(CONFIG_IPV6) + if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl)) +#else + if (!p->dev || p->tbl != &arp_tbl) +#endif + return NOTIFY_DONE; + + /* We are in atomic context and can't take RTNL mutex, + * so use spin_lock_bh to walk the neigh list and look for + * the relevant device. bh is used since netevent can be + * called from a softirq context. + */ + spin_lock_bh(&neigh_update->encap_lock); + list_for_each_entry(nhe, &neigh_update->neigh_list, neigh_list) { + if (p->dev == nhe->m_neigh.dev) { + found = true; + break; + } + } + spin_unlock_bh(&neigh_update->encap_lock); + if (!found) + return NOTIFY_DONE; + + neigh_update->min_interval = min_t(unsigned long, + NEIGH_VAR(p, DELAY_PROBE_TIME), + neigh_update->min_interval); + mlx5_fc_update_sampling_interval(priv->mdev, + neigh_update->min_interval); + break; + } + return NOTIFY_DONE; +} + +static const struct rhashtable_params mlx5e_neigh_ht_params = { + .head_offset = offsetof(struct mlx5e_neigh_hash_entry, rhash_node), + .key_offset = offsetof(struct mlx5e_neigh_hash_entry, m_neigh), + .key_len = sizeof(struct mlx5e_neigh), + .automatic_shrinking = true, +}; + +static int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv) +{ + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; + int err; + + err = rhashtable_init(&neigh_update->neigh_ht, &mlx5e_neigh_ht_params); + if (err) + return err; + + INIT_LIST_HEAD(&neigh_update->neigh_list); + spin_lock_init(&neigh_update->encap_lock); + INIT_DELAYED_WORK(&neigh_update->neigh_stats_work, + mlx5e_rep_neigh_stats_work); + mlx5e_rep_neigh_update_init_interval(rpriv); + + rpriv->neigh_update.netevent_nb.notifier_call = mlx5e_rep_netevent_event; + err = register_netevent_notifier(&rpriv->neigh_update.netevent_nb); + if (err) + goto out_err; + return 0; + +out_err: + rhashtable_destroy(&neigh_update->neigh_ht); + return err; +} + +static void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv) +{ + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; + struct mlx5e_priv *priv = netdev_priv(rpriv->rep->netdev); + + unregister_netevent_notifier(&neigh_update->netevent_nb); + + flush_workqueue(priv->wq); /* flush neigh update works */ + + cancel_delayed_work_sync(&rpriv->neigh_update.neigh_stats_work); + + rhashtable_destroy(&neigh_update->neigh_ht); +} + +static int mlx5e_rep_neigh_entry_insert(struct mlx5e_priv *priv, + struct mlx5e_neigh_hash_entry *nhe) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + int err; + + err = rhashtable_insert_fast(&rpriv->neigh_update.neigh_ht, + &nhe->rhash_node, + mlx5e_neigh_ht_params); + if (err) + return err; + + list_add(&nhe->neigh_list, &rpriv->neigh_update.neigh_list); + + return err; +} + +static void mlx5e_rep_neigh_entry_remove(struct mlx5e_priv *priv, + struct mlx5e_neigh_hash_entry *nhe) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + + spin_lock_bh(&rpriv->neigh_update.encap_lock); + + list_del(&nhe->neigh_list); + + rhashtable_remove_fast(&rpriv->neigh_update.neigh_ht, + &nhe->rhash_node, + mlx5e_neigh_ht_params); + spin_unlock_bh(&rpriv->neigh_update.encap_lock); +} + +/* This function must only be called under RTNL lock or under the + * representor's encap_lock in case RTNL mutex can't be held. + */ +static struct mlx5e_neigh_hash_entry * +mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, + struct mlx5e_neigh *m_neigh) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; + + return rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh, + mlx5e_neigh_ht_params); +} + +static int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct mlx5e_neigh_hash_entry **nhe) +{ + int err; + + *nhe = kzalloc(sizeof(**nhe), GFP_KERNEL); + if (!*nhe) + return -ENOMEM; + + memcpy(&(*nhe)->m_neigh, &e->m_neigh, sizeof(e->m_neigh)); + INIT_WORK(&(*nhe)->neigh_update_work, mlx5e_rep_neigh_update); + INIT_LIST_HEAD(&(*nhe)->encap_list); + refcount_set(&(*nhe)->refcnt, 1); + + err = mlx5e_rep_neigh_entry_insert(priv, *nhe); + if (err) + goto out_free; + return 0; + +out_free: + kfree(*nhe); + return err; +} + +static void mlx5e_rep_neigh_entry_destroy(struct mlx5e_priv *priv, + struct mlx5e_neigh_hash_entry *nhe) +{ + /* The neigh hash entry must be removed from the hash table regardless + * of the reference count value, so it won't be found by the next + * neigh notification call. The neigh hash entry reference count is + * incremented only during creation and neigh notification calls and + * protects from freeing the nhe struct. + */ + mlx5e_rep_neigh_entry_remove(priv, nhe); + mlx5e_rep_neigh_entry_release(nhe); +} + +int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e) +{ + struct mlx5e_neigh_hash_entry *nhe; + int err; + + nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh); + if (!nhe) { + err = mlx5e_rep_neigh_entry_create(priv, e, &nhe); + if (err) + return err; + } + list_add(&e->encap_list, &nhe->encap_list); + return 0; +} + +void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e) +{ + struct mlx5e_neigh_hash_entry *nhe; + + list_del(&e->encap_list); + nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh); + + if (list_empty(&nhe->encap_list)) + mlx5e_rep_neigh_entry_destroy(priv, nhe); } static int mlx5e_rep_open(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; int err; @@ -259,7 +627,8 @@ static int mlx5e_rep_open(struct net_device *dev) static int mlx5e_rep_close(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; (void)mlx5_eswitch_set_vport_state(esw, rep->vport, MLX5_ESW_VPORT_ADMIN_STATE_DOWN); @@ -271,7 +640,8 @@ static int mlx5e_rep_get_phys_port_name(struct net_device *dev, char *buf, size_t len) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; int ret; ret = snprintf(buf, len, "%d", rep->vport - 1); @@ -314,18 +684,25 @@ static int mlx5e_rep_ndo_setup_tc(struct net_device *dev, u32 handle, bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) { - struct mlx5_eswitch_rep *rep = (struct mlx5_eswitch_rep *)priv->ppriv; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep; - if (rep && rep->vport == FDB_UPLINK_VPORT && esw->mode == SRIOV_OFFLOADS) + if (!MLX5_CAP_GEN(priv->mdev, vport_group_manager)) + return false; + + rep = rpriv->rep; + if (esw->mode == SRIOV_OFFLOADS && + rep && rep->vport == FDB_UPLINK_VPORT) return true; return false; } -bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv) +static bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv) { - struct mlx5_eswitch_rep *rep = (struct mlx5_eswitch_rep *)priv->ppriv; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; if (rep && rep->vport != FDB_UPLINK_VPORT) return true; @@ -397,42 +774,23 @@ static const struct net_device_ops mlx5e_netdev_ops_rep = { .ndo_get_offload_stats = mlx5e_get_offload_stats, }; -static void mlx5e_build_rep_netdev_priv(struct mlx5_core_dev *mdev, - struct net_device *netdev, - const struct mlx5e_profile *profile, - void *ppriv) +static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) { - struct mlx5e_priv *priv = netdev_priv(netdev); u8 cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? MLX5_CQ_PERIOD_MODE_START_FROM_CQE : MLX5_CQ_PERIOD_MODE_START_FROM_EQE; - priv->params.log_sq_size = - MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; - priv->params.rq_wq_type = MLX5_WQ_TYPE_LINKED_LIST; - priv->params.log_rq_size = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE; - - priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type, - BIT(priv->params.log_rq_size)); - - priv->params.rx_am_enabled = MLX5_CAP_GEN(mdev, cq_moderation); - mlx5e_set_rx_cq_mode_params(&priv->params, cq_period_mode); - - priv->params.tx_max_inline = mlx5e_get_max_inline_cap(mdev); - priv->params.num_tc = 1; - - priv->params.lro_wqe_sz = - MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; - - priv->mdev = mdev; - priv->netdev = netdev; - priv->params.num_channels = profile->max_nch(mdev); - priv->profile = profile; - priv->ppriv = ppriv; + params->log_sq_size = MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; + params->rq_wq_type = MLX5_WQ_TYPE_LINKED_LIST; + params->log_rq_size = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE; - mutex_init(&priv->state_lock); + params->rx_am_enabled = MLX5_CAP_GEN(mdev, cq_moderation); + mlx5e_set_rx_cq_mode_params(params, cq_period_mode); - INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work); + params->tx_max_inline = mlx5e_get_max_inline_cap(mdev); + params->num_tc = 1; + params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; } static void mlx5e_build_rep_netdev(struct net_device *netdev) @@ -458,30 +816,39 @@ static void mlx5e_init_rep(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile, void *ppriv) { - mlx5e_build_rep_netdev_priv(mdev, netdev, profile, ppriv); + struct mlx5e_priv *priv = netdev_priv(netdev); + + priv->mdev = mdev; + priv->netdev = netdev; + priv->profile = profile; + priv->ppriv = ppriv; + + mutex_init(&priv->state_lock); + + INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work); + + priv->channels.params.num_channels = profile->max_nch(mdev); + mlx5e_build_rep_params(mdev, &priv->channels.params); mlx5e_build_rep_netdev(netdev); } static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5_eswitch_rep *rep = priv->ppriv; - struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; struct mlx5_flow_handle *flow_rule; int err; - int i; + + mlx5e_init_l2_addr(priv); err = mlx5e_create_direct_rqts(priv); - if (err) { - mlx5_core_warn(mdev, "create direct rqts failed, %d\n", err); + if (err) return err; - } err = mlx5e_create_direct_tirs(priv); - if (err) { - mlx5_core_warn(mdev, "create direct tirs failed, %d\n", err); + if (err) goto err_destroy_direct_rqts; - } flow_rule = mlx5_eswitch_create_vport_rx_rule(esw, rep->vport, @@ -503,21 +870,19 @@ err_del_flow_rule: err_destroy_direct_tirs: mlx5e_destroy_direct_tirs(priv); err_destroy_direct_rqts: - for (i = 0; i < priv->params.num_channels; i++) - mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt); + mlx5e_destroy_direct_rqts(priv); return err; } static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv) { - struct mlx5_eswitch_rep *rep = priv->ppriv; - int i; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; mlx5e_tc_cleanup(priv); mlx5_del_flow_rules(rep->vport_rx_rule); mlx5e_destroy_direct_tirs(priv); - for (i = 0; i < priv->params.num_channels; i++) - mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt); + mlx5e_destroy_direct_rqts(priv); } static int mlx5e_init_rep_tx(struct mlx5e_priv *priv) @@ -546,56 +911,181 @@ static struct mlx5e_profile mlx5e_rep_profile = { .cleanup_tx = mlx5e_cleanup_nic_tx, .update_stats = mlx5e_rep_update_stats, .max_nch = mlx5e_get_rep_max_num_channels, + .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep, + .rx_handlers.handle_rx_cqe_mpwqe = NULL /* Not supported */, .max_tc = 1, }; -int mlx5e_vport_rep_load(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep *rep) +/* e-Switch vport representors */ + +static int +mlx5e_nic_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_priv *priv = netdev_priv(rep->netdev); + struct mlx5e_rep_priv *rpriv = priv->ppriv; + + int err; + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + err = mlx5e_add_sqs_fwd_rules(priv); + if (err) + return err; + } + + err = mlx5e_rep_neigh_init(rpriv); + if (err) + goto err_remove_sqs; + + return 0; + +err_remove_sqs: + mlx5e_remove_sqs_fwd_rules(priv); + return err; +} + +static void +mlx5e_nic_rep_unload(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) { + struct mlx5e_priv *priv = netdev_priv(rep->netdev); + struct mlx5e_rep_priv *rpriv = priv->ppriv; + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + mlx5e_remove_sqs_fwd_rules(priv); + + /* clean (and re-init) existing uplink offloaded TC rules */ + mlx5e_tc_cleanup(priv); + mlx5e_tc_init(priv); + + mlx5e_rep_neigh_cleanup(rpriv); +} + +static int +mlx5e_vport_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_rep_priv *rpriv; struct net_device *netdev; int err; - netdev = mlx5e_create_netdev(esw->dev, &mlx5e_rep_profile, rep); + rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL); + if (!rpriv) + return -ENOMEM; + + netdev = mlx5e_create_netdev(esw->dev, &mlx5e_rep_profile, rpriv); if (!netdev) { pr_warn("Failed to create representor netdev for vport %d\n", rep->vport); + kfree(rpriv); return -EINVAL; } rep->netdev = netdev; + rpriv->rep = rep; - err = mlx5e_attach_netdev(esw->dev, netdev); + err = mlx5e_attach_netdev(netdev_priv(netdev)); if (err) { pr_warn("Failed to attach representor netdev for vport %d\n", rep->vport); goto err_destroy_netdev; } + err = mlx5e_rep_neigh_init(rpriv); + if (err) { + pr_warn("Failed to initialized neighbours handling for vport %d\n", + rep->vport); + goto err_detach_netdev; + } + err = register_netdev(netdev); if (err) { pr_warn("Failed to register representor netdev for vport %d\n", rep->vport); - goto err_detach_netdev; + goto err_neigh_cleanup; } return 0; +err_neigh_cleanup: + mlx5e_rep_neigh_cleanup(rpriv); + err_detach_netdev: - mlx5e_detach_netdev(esw->dev, netdev); + mlx5e_detach_netdev(netdev_priv(netdev)); err_destroy_netdev: - mlx5e_destroy_netdev(esw->dev, netdev_priv(netdev)); - + mlx5e_destroy_netdev(netdev_priv(netdev)); + kfree(rpriv); return err; } -void mlx5e_vport_rep_unload(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep *rep) +static void +mlx5e_vport_rep_unload(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) { struct net_device *netdev = rep->netdev; + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_rep_priv *rpriv = priv->ppriv; + void *ppriv = priv->ppriv; + + unregister_netdev(rep->netdev); + + mlx5e_rep_neigh_cleanup(rpriv); + mlx5e_detach_netdev(priv); + mlx5e_destroy_netdev(priv); + kfree(ppriv); /* mlx5e_rep_priv */ +} + +static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_eswitch *esw = mdev->priv.eswitch; + int total_vfs = MLX5_TOTAL_VPORTS(mdev); + int vport; + u8 mac[ETH_ALEN]; + + mlx5_query_nic_vport_mac_address(mdev, 0, mac); + + for (vport = 1; vport < total_vfs; vport++) { + struct mlx5_eswitch_rep rep; + + rep.load = mlx5e_vport_rep_load; + rep.unload = mlx5e_vport_rep_unload; + rep.vport = vport; + ether_addr_copy(rep.hw_id, mac); + mlx5_eswitch_register_vport_rep(esw, vport, &rep); + } +} + +static void mlx5e_rep_unregister_vf_vports(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_eswitch *esw = mdev->priv.eswitch; + int total_vfs = MLX5_TOTAL_VPORTS(mdev); + int vport; + + for (vport = 1; vport < total_vfs; vport++) + mlx5_eswitch_unregister_vport_rep(esw, vport); +} + +void mlx5e_register_vport_reps(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_eswitch *esw = mdev->priv.eswitch; + struct mlx5_eswitch_rep rep; + + mlx5_query_nic_vport_mac_address(mdev, 0, rep.hw_id); + rep.load = mlx5e_nic_rep_load; + rep.unload = mlx5e_nic_rep_unload; + rep.vport = FDB_UPLINK_VPORT; + rep.netdev = priv->netdev; + mlx5_eswitch_register_vport_rep(esw, 0, &rep); /* UPLINK PF vport*/ + + mlx5e_rep_register_vf_vports(priv); /* VFs vports */ +} + +void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_eswitch *esw = mdev->priv.eswitch; - unregister_netdev(netdev); - mlx5e_detach_netdev(esw->dev, netdev); - mlx5e_destroy_netdev(esw->dev, netdev_priv(netdev)); + mlx5e_rep_unregister_vf_vports(priv); /* VFs vports */ + mlx5_eswitch_unregister_vport_rep(esw, 0); /* UPLINK PF*/ } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h new file mode 100644 index 000000000000..a0a1a7a1d6c0 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5E_REP_H__ +#define __MLX5E_REP_H__ + +#include <net/ip_tunnels.h> +#include <linux/rhashtable.h> +#include "eswitch.h" +#include "en.h" + +struct mlx5e_neigh_update_table { + struct rhashtable neigh_ht; + /* Save the neigh hash entries in a list in addition to the hash table + * (neigh_ht). In order to iterate easily over the neigh entries. + * Used for stats query. + */ + struct list_head neigh_list; + /* protect lookup/remove operations */ + spinlock_t encap_lock; + struct notifier_block netevent_nb; + struct delayed_work neigh_stats_work; + unsigned long min_interval; /* jiffies */ +}; + +struct mlx5e_rep_priv { + struct mlx5_eswitch_rep *rep; + struct mlx5e_neigh_update_table neigh_update; +}; + +struct mlx5e_neigh { + struct net_device *dev; + union { + __be32 v4; + struct in6_addr v6; + } dst_ip; + int family; +}; + +struct mlx5e_neigh_hash_entry { + struct rhash_head rhash_node; + struct mlx5e_neigh m_neigh; + + /* Save the neigh hash entry in a list on the representor in + * addition to the hash table. In order to iterate easily over the + * neighbour entries. Used for stats query. + */ + struct list_head neigh_list; + + /* encap list sharing the same neigh */ + struct list_head encap_list; + + /* valid only when the neigh reference is taken during + * neigh_update_work workqueue callback. + */ + struct neighbour *n; + struct work_struct neigh_update_work; + + /* neigh hash entry can be deleted only when the refcount is zero. + * refcount is needed to avoid neigh hash entry removal by TC, while + * it's used by the neigh notification call. + */ + refcount_t refcnt; + + /* Save the last reported time offloaded trafic pass over one of the + * neigh hash entry flows. Use it to periodically update the neigh + * 'used' value and avoid neigh deleting by the kernel. + */ + unsigned long reported_lastuse; +}; + +enum { + /* set when the encap entry is successfully offloaded into HW */ + MLX5_ENCAP_ENTRY_VALID = BIT(0), +}; + +struct mlx5e_encap_entry { + /* neigh hash entry list of encaps sharing the same neigh */ + struct list_head encap_list; + struct mlx5e_neigh m_neigh; + /* a node of the eswitch encap hash table which keeping all the encap + * entries + */ + struct hlist_node encap_hlist; + struct list_head flows; + u32 encap_id; + struct ip_tunnel_info tun_info; + unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ + + struct net_device *out_dev; + int tunnel_type; + u8 flags; + char *encap_header; + int encap_size; +}; + +void mlx5e_register_vport_reps(struct mlx5e_priv *priv); +void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv); +bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv); +int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv); +void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv); + +int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev, void *sp); +bool mlx5e_has_offload_stats(const struct net_device *dev, int attr_id); + +int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr); +void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); + +int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e); +void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e); + +void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv); + +#endif /* __MLX5E_REP_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index bafcb349a50c..7b1566f0ae58 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -39,6 +39,8 @@ #include "en.h" #include "en_tc.h" #include "eswitch.h" +#include "en_rep.h" +#include "ipoib.h" static inline bool mlx5e_rx_hw_stamp(struct mlx5e_tstamp *tstamp) { @@ -156,28 +158,6 @@ static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq, return mlx5e_decompress_cqes_cont(rq, cq, 1, budget_rem) - 1; } -void mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool val) -{ - bool was_opened; - - if (!MLX5_CAP_GEN(priv->mdev, cqe_compression)) - return; - - if (MLX5E_GET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS) == val) - return; - - was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); - if (was_opened) - mlx5e_close_locked(priv->netdev); - - MLX5E_SET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS, val); - mlx5e_set_rq_type_params(priv, priv->params.rq_wq_type); - - if (was_opened) - mlx5e_open_locked(priv->netdev); - -} - #define RQ_PAGE_SIZE(rq) ((1 << rq->buff.page_order) << PAGE_SHIFT) static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, @@ -331,7 +311,7 @@ mlx5e_copy_skb_header_mpwqe(struct device *pdev, static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) { struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix]; - struct mlx5e_sq *sq = &rq->channel->icosq; + struct mlx5e_icosq *sq = &rq->channel->icosq; struct mlx5_wq_cyc *wq = &sq->wq; struct mlx5e_umr_wqe *wqe; u8 num_wqebbs = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_BB); @@ -341,7 +321,7 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) { sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP; sq->db.ico_wqe[pi].num_wqebbs = 1; - mlx5e_send_nop(sq, false); + mlx5e_post_nop(wq, sq->sqn, &sq->pc); } wqe = mlx5_wq_cyc_get_wqe(wq, pi); @@ -353,7 +333,7 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR; sq->db.ico_wqe[pi].num_wqebbs = num_wqebbs; sq->pc += num_wqebbs; - mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); + mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &wqe->ctrl); } static int mlx5e_alloc_rx_umr_mpwqe(struct mlx5e_rq *rq, @@ -637,37 +617,36 @@ static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq, mlx5e_build_rx_skb(cqe, cqe_bcnt, rq, skb); } -static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_sq *sq) +static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq) { struct mlx5_wq_cyc *wq = &sq->wq; struct mlx5e_tx_wqe *wqe; - u16 pi = (sq->pc - MLX5E_XDP_TX_WQEBBS) & wq->sz_m1; /* last pi */ + u16 pi = (sq->pc - 1) & wq->sz_m1; /* last pi */ wqe = mlx5_wq_cyc_get_wqe(wq, pi); - wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; - mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); + mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &wqe->ctrl); } static inline bool mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, const struct xdp_buff *xdp) { - struct mlx5e_sq *sq = &rq->channel->xdp_sq; + struct mlx5e_xdpsq *sq = &rq->xdpsq; struct mlx5_wq_cyc *wq = &sq->wq; - u16 pi = sq->pc & wq->sz_m1; + u16 pi = sq->pc & wq->sz_m1; struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); - struct mlx5e_sq_wqe_info *wi = &sq->db.xdp.wqe_info[pi]; struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; struct mlx5_wqe_eth_seg *eseg = &wqe->eth; struct mlx5_wqe_data_seg *dseg; - u8 ds_cnt = MLX5E_XDP_TX_DS_COUNT; ptrdiff_t data_offset = xdp->data - xdp->data_hard_start; dma_addr_t dma_addr = di->addr + data_offset; unsigned int dma_len = xdp->data_end - xdp->data; + prefetchw(wqe); + if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || MLX5E_SW2HW_MTU(rq->netdev->mtu) < dma_len)) { rq->stats.xdp_drop++; @@ -675,48 +654,42 @@ static inline bool mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, return false; } - if (unlikely(!mlx5e_sq_has_room_for(sq, MLX5E_XDP_TX_WQEBBS))) { - if (sq->db.xdp.doorbell) { + if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1))) { + if (sq->db.doorbell) { /* SQ is full, ring doorbell */ mlx5e_xmit_xdp_doorbell(sq); - sq->db.xdp.doorbell = false; + sq->db.doorbell = false; } rq->stats.xdp_tx_full++; mlx5e_page_release(rq, di, true); return false; } - dma_sync_single_for_device(sq->pdev, dma_addr, dma_len, - PCI_DMA_TODEVICE); + dma_sync_single_for_device(sq->pdev, dma_addr, dma_len, PCI_DMA_TODEVICE); - memset(wqe, 0, sizeof(*wqe)); + cseg->fm_ce_se = 0; dseg = (struct mlx5_wqe_data_seg *)eseg + 1; + /* copy the inline part if required */ if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) { memcpy(eseg->inline_hdr.start, xdp->data, MLX5E_XDP_MIN_INLINE); eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE); dma_len -= MLX5E_XDP_MIN_INLINE; dma_addr += MLX5E_XDP_MIN_INLINE; - - ds_cnt += MLX5E_XDP_IHS_DS_COUNT; dseg++; } /* write the dma part */ dseg->addr = cpu_to_be64(dma_addr); dseg->byte_count = cpu_to_be32(dma_len); - dseg->lkey = sq->mkey_be; cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND); - cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); - sq->db.xdp.di[pi] = *di; - wi->opcode = MLX5_OPCODE_SEND; - wi->num_wqebbs = MLX5E_XDP_TX_WQEBBS; - sq->pc += MLX5E_XDP_TX_WQEBBS; + sq->db.di[pi] = *di; + sq->pc++; - sq->db.xdp.doorbell = true; + sq->db.doorbell = true; rq->stats.xdp_tx++; return true; } @@ -837,7 +810,8 @@ void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) { struct net_device *netdev = rq->netdev; struct mlx5e_priv *priv = netdev_priv(netdev); - struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; struct mlx5e_rx_wqe *wqe; struct sk_buff *skb; __be16 wqe_counter_be; @@ -932,7 +906,7 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) goto mpwrq_cqe_out; } - prefetch(skb->data); + prefetchw(skb->data); cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe); mlx5e_mpwqe_fill_rx_skb(rq, cqe, wi, cqe_bcnt, skb); @@ -950,7 +924,7 @@ mpwrq_cqe_out: int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) { struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq); - struct mlx5e_sq *xdp_sq = &rq->channel->xdp_sq; + struct mlx5e_xdpsq *xdpsq = &rq->xdpsq; int work_done = 0; if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) @@ -977,9 +951,9 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) rq->handle_rx_cqe(rq, cqe); } - if (xdp_sq->db.xdp.doorbell) { - mlx5e_xmit_xdp_doorbell(xdp_sq); - xdp_sq->db.xdp.doorbell = false; + if (xdpsq->db.doorbell) { + mlx5e_xmit_xdp_doorbell(xdpsq); + xdpsq->db.doorbell = false; } mlx5_cqwq_update_db_record(&cq->wq); @@ -989,3 +963,152 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) return work_done; } + +bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) +{ + struct mlx5e_xdpsq *sq; + struct mlx5e_rq *rq; + u16 sqcc; + int i; + + sq = container_of(cq, struct mlx5e_xdpsq, cq); + + if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) + return false; + + rq = container_of(sq, struct mlx5e_rq, xdpsq); + + /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), + * otherwise a cq overrun may occur + */ + sqcc = sq->cc; + + for (i = 0; i < MLX5E_TX_CQ_POLL_BUDGET; i++) { + struct mlx5_cqe64 *cqe; + u16 wqe_counter; + bool last_wqe; + + cqe = mlx5e_get_cqe(cq); + if (!cqe) + break; + + mlx5_cqwq_pop(&cq->wq); + + wqe_counter = be16_to_cpu(cqe->wqe_counter); + + do { + struct mlx5e_dma_info *di; + u16 ci; + + last_wqe = (sqcc == wqe_counter); + + ci = sqcc & sq->wq.sz_m1; + di = &sq->db.di[ci]; + + sqcc++; + /* Recycle RX page */ + mlx5e_page_release(rq, di, true); + } while (!last_wqe); + } + + mlx5_cqwq_update_db_record(&cq->wq); + + /* ensure cq space is freed before enabling more cqes */ + wmb(); + + sq->cc = sqcc; + return (i == MLX5E_TX_CQ_POLL_BUDGET); +} + +void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq) +{ + struct mlx5e_rq *rq = container_of(sq, struct mlx5e_rq, xdpsq); + struct mlx5e_dma_info *di; + u16 ci; + + while (sq->cc != sq->pc) { + ci = sq->cc & sq->wq.sz_m1; + di = &sq->db.di[ci]; + sq->cc++; + + mlx5e_page_release(rq, di, false); + } +} + +#ifdef CONFIG_MLX5_CORE_IPOIB + +#define MLX5_IB_GRH_DGID_OFFSET 24 +#define MLX5_IB_GRH_BYTES 40 +#define MLX5_IPOIB_ENCAP_LEN 4 +#define MLX5_GID_SIZE 16 + +static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe, + u32 cqe_bcnt, + struct sk_buff *skb) +{ + struct net_device *netdev = rq->netdev; + u8 *dgid; + u8 g; + + g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3; + dgid = skb->data + MLX5_IB_GRH_DGID_OFFSET; + if ((!g) || dgid[0] != 0xff) + skb->pkt_type = PACKET_HOST; + else if (memcmp(dgid, netdev->broadcast + 4, MLX5_GID_SIZE) == 0) + skb->pkt_type = PACKET_BROADCAST; + else + skb->pkt_type = PACKET_MULTICAST; + + /* TODO: IB/ipoib: Allow mcast packets from other VFs + * 68996a6e760e5c74654723eeb57bf65628ae87f4 + */ + + skb_pull(skb, MLX5_IB_GRH_BYTES); + + skb->protocol = *((__be16 *)(skb->data)); + + skb->ip_summed = CHECKSUM_COMPLETE; + skb->csum = csum_unfold((__force __sum16)cqe->check_sum); + + skb_record_rx_queue(skb, rq->ix); + + if (likely(netdev->features & NETIF_F_RXHASH)) + mlx5e_skb_set_hash(cqe, skb); + + skb_reset_mac_header(skb); + skb_pull(skb, MLX5_IPOIB_ENCAP_LEN); + + skb->dev = netdev; + + rq->stats.csum_complete++; + rq->stats.packets++; + rq->stats.bytes += cqe_bcnt; +} + +void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + struct mlx5e_rx_wqe *wqe; + __be16 wqe_counter_be; + struct sk_buff *skb; + u16 wqe_counter; + u32 cqe_bcnt; + + wqe_counter_be = cqe->wqe_counter; + wqe_counter = be16_to_cpu(wqe_counter_be); + wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter); + cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + + skb = skb_from_cqe(rq, cqe, wqe_counter, cqe_bcnt); + if (!skb) + goto wq_ll_pop; + + mlx5i_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + napi_gro_receive(rq->cq.napi, skb); + +wq_ll_pop: + mlx5_wq_ll_pop(&rq->wq, wqe_counter_be, + &wqe->next.next_wqe_index); +} + +#endif /* CONFIG_MLX5_CORE_IPOIB */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c index cbfac06b7ffd..02dd3a95ed8f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c @@ -293,7 +293,7 @@ void mlx5e_rx_am_work(struct work_struct *work) struct mlx5e_rq *rq = container_of(am, struct mlx5e_rq, am); struct mlx5e_cq_moder cur_profile = profile[am->mode][am->profile_ix]; - mlx5_core_modify_cq_moderation(rq->priv->mdev, &rq->cq.mcq, + mlx5_core_modify_cq_moderation(rq->mdev, &rq->cq.mcq, cur_profile.usec, cur_profile.pkts); am->state = MLX5E_AM_START_MEASURE; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c index 5621dcfda4f1..5225f2226a67 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c @@ -236,12 +236,9 @@ static int mlx5e_test_loopback_setup(struct mlx5e_priv *priv, { int err = 0; - err = mlx5e_refresh_tirs_self_loopback(priv->mdev, true); - if (err) { - netdev_err(priv->netdev, - "\tFailed to enable UC loopback err(%d)\n", err); + err = mlx5e_refresh_tirs(priv, true); + if (err) return err; - } lbtp->loopback_ok = false; init_completion(&lbtp->comp); @@ -258,7 +255,7 @@ static void mlx5e_test_loopback_cleanup(struct mlx5e_priv *priv, struct mlx5e_lbt_priv *lbtp) { dev_remove_pack(&lbtp->pt); - mlx5e_refresh_tirs_self_loopback(priv->mdev, false); + mlx5e_refresh_tirs(priv, false); } #define MLX5E_LB_VERIFY_TIMEOUT (msecs_to_jiffies(200)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 5436866798f4..11c27e4fadf6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -42,14 +42,25 @@ #include <net/tc_act/tc_mirred.h> #include <net/tc_act/tc_vlan.h> #include <net/tc_act/tc_tunnel_key.h> +#include <net/tc_act/tc_pedit.h> #include <net/vxlan.h> +#include <net/arp.h> #include "en.h" +#include "en_rep.h" #include "en_tc.h" #include "eswitch.h" #include "vxlan.h" +struct mlx5_nic_flow_attr { + u32 action; + u32 flow_tag; + u32 mod_hdr_id; +}; + enum { MLX5E_TC_FLOW_ESWITCH = BIT(0), + MLX5E_TC_FLOW_NIC = BIT(1), + MLX5E_TC_FLOW_OFFLOADED = BIT(2), }; struct mlx5e_tc_flow { @@ -58,7 +69,16 @@ struct mlx5e_tc_flow { u8 flags; struct mlx5_flow_handle *rule; struct list_head encap; /* flows sharing the same encap */ - struct mlx5_esw_flow_attr *attr; + union { + struct mlx5_esw_flow_attr esw_attr[0]; + struct mlx5_nic_flow_attr nic_attr[0]; + }; +}; + +struct mlx5e_tc_flow_parse_attr { + struct mlx5_flow_spec spec; + int num_mod_hdr_actions; + void *mod_hdr_actions; }; enum { @@ -71,24 +91,26 @@ enum { static struct mlx5_flow_handle * mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, - struct mlx5_flow_spec *spec, - u32 action, u32 flow_tag) + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct mlx5e_tc_flow *flow) { + struct mlx5_nic_flow_attr *attr = flow->nic_attr; struct mlx5_core_dev *dev = priv->mdev; - struct mlx5_flow_destination dest = { 0 }; + struct mlx5_flow_destination dest = {}; struct mlx5_flow_act flow_act = { - .action = action, - .flow_tag = flow_tag, + .action = attr->action, + .flow_tag = attr->flow_tag, .encap_id = 0, }; struct mlx5_fc *counter = NULL; struct mlx5_flow_handle *rule; bool table_created = false; + int err; - if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest.ft = priv->fs.vlan.ft.t; - } else if (action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + } else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { counter = mlx5_fc_create(dev, true); if (IS_ERR(counter)) return ERR_CAST(counter); @@ -97,6 +119,19 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, dest.counter = counter; } + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { + err = mlx5_modify_header_alloc(dev, MLX5_FLOW_NAMESPACE_KERNEL, + parse_attr->num_mod_hdr_actions, + parse_attr->mod_hdr_actions, + &attr->mod_hdr_id); + flow_act.modify_id = attr->mod_hdr_id; + kfree(parse_attr->mod_hdr_actions); + if (err) { + rule = ERR_PTR(err); + goto err_create_mod_hdr_id; + } + } + if (IS_ERR_OR_NULL(priv->fs.tc.t)) { priv->fs.tc.t = mlx5_create_auto_grouped_flow_table(priv->fs.ns, @@ -114,8 +149,9 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, table_created = true; } - spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - rule = mlx5_add_flow_rules(priv->fs.tc.t, spec, &flow_act, &dest, 1); + parse_attr->spec.match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + rule = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec, + &flow_act, &dest, 1); if (IS_ERR(rule)) goto err_add_rule; @@ -128,6 +164,10 @@ err_add_rule: priv->fs.tc.t = NULL; } err_create_ft: + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + mlx5_modify_header_dealloc(priv->mdev, + attr->mod_hdr_id); +err_create_mod_hdr_id: mlx5_fc_destroy(dev, counter); return rule; @@ -138,47 +178,195 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, { struct mlx5_fc *counter = NULL; - if (!IS_ERR(flow->rule)) { - counter = mlx5_flow_rule_counter(flow->rule); - mlx5_del_flow_rules(flow->rule); - mlx5_fc_destroy(priv->mdev, counter); - } + counter = mlx5_flow_rule_counter(flow->rule); + mlx5_del_flow_rules(flow->rule); + mlx5_fc_destroy(priv->mdev, counter); if (!mlx5e_tc_num_filters(priv) && (priv->fs.tc.t)) { mlx5_destroy_flow_table(priv->fs.tc.t); priv->fs.tc.t = NULL; } + + if (flow->nic_attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + mlx5_modify_header_dealloc(priv->mdev, + flow->nic_attr->mod_hdr_id); } +static void mlx5e_detach_encap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow); + static struct mlx5_flow_handle * mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, - struct mlx5_flow_spec *spec, - struct mlx5_esw_flow_attr *attr) + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct mlx5e_tc_flow *flow) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_esw_flow_attr *attr = flow->esw_attr; + struct mlx5_flow_handle *rule; int err; err = mlx5_eswitch_add_vlan_action(esw, attr); - if (err) - return ERR_PTR(err); + if (err) { + rule = ERR_PTR(err); + goto err_add_vlan; + } - return mlx5_eswitch_add_offloaded_rule(esw, spec, attr); -} + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { + err = mlx5_modify_header_alloc(priv->mdev, MLX5_FLOW_NAMESPACE_FDB, + parse_attr->num_mod_hdr_actions, + parse_attr->mod_hdr_actions, + &attr->mod_hdr_id); + kfree(parse_attr->mod_hdr_actions); + if (err) { + rule = ERR_PTR(err); + goto err_mod_hdr; + } + } -static void mlx5e_detach_encap(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow); + rule = mlx5_eswitch_add_offloaded_rule(esw, &parse_attr->spec, attr); + if (IS_ERR(rule)) + goto err_add_rule; + + return rule; + +err_add_rule: + if (flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + mlx5_modify_header_dealloc(priv->mdev, + attr->mod_hdr_id); +err_mod_hdr: + mlx5_eswitch_del_vlan_action(esw, attr); +err_add_vlan: + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) + mlx5e_detach_encap(priv, flow); + return rule; +} static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_esw_flow_attr *attr = flow->esw_attr; - mlx5_eswitch_del_offloaded_rule(esw, flow->rule, flow->attr); + if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { + flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED; + mlx5_eswitch_del_offloaded_rule(esw, flow->rule, flow->esw_attr); + } - mlx5_eswitch_del_vlan_action(esw, flow->attr); + mlx5_eswitch_del_vlan_action(esw, flow->esw_attr); - if (flow->attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) + if (flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) { mlx5e_detach_encap(priv, flow); + kvfree(flow->esw_attr->parse_attr); + } + + if (flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + mlx5_modify_header_dealloc(priv->mdev, + attr->mod_hdr_id); +} + +void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e) +{ + struct mlx5e_tc_flow *flow; + int err; + + err = mlx5_encap_alloc(priv->mdev, e->tunnel_type, + e->encap_size, e->encap_header, + &e->encap_id); + if (err) { + mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %d\n", + err); + return; + } + e->flags |= MLX5_ENCAP_ENTRY_VALID; + mlx5e_rep_queue_neigh_stats_work(priv); + + list_for_each_entry(flow, &e->flows, encap) { + flow->esw_attr->encap_id = e->encap_id; + flow->rule = mlx5e_tc_add_fdb_flow(priv, + flow->esw_attr->parse_attr, + flow); + if (IS_ERR(flow->rule)) { + err = PTR_ERR(flow->rule); + mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", + err); + continue; + } + flow->flags |= MLX5E_TC_FLOW_OFFLOADED; + } +} + +void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e) +{ + struct mlx5e_tc_flow *flow; + struct mlx5_fc *counter; + + list_for_each_entry(flow, &e->flows, encap) { + if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { + flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED; + counter = mlx5_flow_rule_counter(flow->rule); + mlx5_del_flow_rules(flow->rule); + mlx5_fc_destroy(priv->mdev, counter); + } + } + + if (e->flags & MLX5_ENCAP_ENTRY_VALID) { + e->flags &= ~MLX5_ENCAP_ENTRY_VALID; + mlx5_encap_dealloc(priv->mdev, e->encap_id); + } +} + +void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) +{ + struct mlx5e_neigh *m_neigh = &nhe->m_neigh; + u64 bytes, packets, lastuse = 0; + struct mlx5e_tc_flow *flow; + struct mlx5e_encap_entry *e; + struct mlx5_fc *counter; + struct neigh_table *tbl; + bool neigh_used = false; + struct neighbour *n; + + if (m_neigh->family == AF_INET) + tbl = &arp_tbl; +#if IS_ENABLED(CONFIG_IPV6) + else if (m_neigh->family == AF_INET6) + tbl = ipv6_stub->nd_tbl; +#endif + else + return; + + list_for_each_entry(e, &nhe->encap_list, encap_list) { + if (!(e->flags & MLX5_ENCAP_ENTRY_VALID)) + continue; + list_for_each_entry(flow, &e->flows, encap) { + if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { + counter = mlx5_flow_rule_counter(flow->rule); + mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); + if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) { + neigh_used = true; + break; + } + } + } + } + + if (neigh_used) { + nhe->reported_lastuse = jiffies; + + /* find the relevant neigh according to the cached device and + * dst ip pair + */ + n = neigh_lookup(tbl, &m_neigh->dst_ip, m_neigh->dev); + if (!n) { + WARN(1, "The neighbour already freed\n"); + return; + } + + neigh_event_send(n, NULL); + neigh_release(n); + } } static void mlx5e_detach_encap(struct mlx5e_priv *priv, @@ -188,22 +376,20 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv, list_del(&flow->encap); if (list_empty(next)) { - struct mlx5_encap_entry *e; + struct mlx5e_encap_entry *e; + + e = list_entry(next, struct mlx5e_encap_entry, flows); + mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); - e = list_entry(next, struct mlx5_encap_entry, flows); - if (e->n) { + if (e->flags & MLX5_ENCAP_ENTRY_VALID) mlx5_encap_dealloc(priv->mdev, e->encap_id); - neigh_release(e->n); - } + hlist_del_rcu(&e->encap_hlist); + kfree(e->encap_header); kfree(e); } } -/* we get here also when setting rule to the FW failed, etc. It means that the - * flow rule itself might not exist, but some offloading related to the actions - * should be cleaned. - */ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { @@ -631,16 +817,18 @@ static int parse_cls_flower(struct mlx5e_priv *priv, { struct mlx5_core_dev *dev = priv->mdev; struct mlx5_eswitch *esw = dev->priv.eswitch; - struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep; u8 min_inline; int err; err = __parse_cls_flower(priv, spec, f, &min_inline); - if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH) && - rep->vport != FDB_UPLINK_VPORT) { - if (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE && - esw->offloads.inline_mode < min_inline) { + if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH)) { + rep = rpriv->rep; + if (rep->vport != FDB_UPLINK_VPORT && + (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE && + esw->offloads.inline_mode < min_inline)) { netdev_warn(priv->netdev, "Flow is not offloaded due to min inline setting, required %d actual %d\n", min_inline, esw->offloads.inline_mode); @@ -651,29 +839,313 @@ static int parse_cls_flower(struct mlx5e_priv *priv, return err; } +struct pedit_headers { + struct ethhdr eth; + struct iphdr ip4; + struct ipv6hdr ip6; + struct tcphdr tcp; + struct udphdr udp; +}; + +static int pedit_header_offsets[] = { + [TCA_PEDIT_KEY_EX_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth), + [TCA_PEDIT_KEY_EX_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4), + [TCA_PEDIT_KEY_EX_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6), + [TCA_PEDIT_KEY_EX_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp), + [TCA_PEDIT_KEY_EX_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp), +}; + +#define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype]) + +static int set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset, + struct pedit_headers *masks, + struct pedit_headers *vals) +{ + u32 *curr_pmask, *curr_pval; + + if (hdr_type >= __PEDIT_HDR_TYPE_MAX) + goto out_err; + + curr_pmask = (u32 *)(pedit_header(masks, hdr_type) + offset); + curr_pval = (u32 *)(pedit_header(vals, hdr_type) + offset); + + if (*curr_pmask & mask) /* disallow acting twice on the same location */ + goto out_err; + + *curr_pmask |= mask; + *curr_pval |= (val & mask); + + return 0; + +out_err: + return -EOPNOTSUPP; +} + +struct mlx5_fields { + u8 field; + u8 size; + u32 offset; +}; + +static struct mlx5_fields fields[] = { + {MLX5_ACTION_IN_FIELD_OUT_DMAC_47_16, 4, offsetof(struct pedit_headers, eth.h_dest[0])}, + {MLX5_ACTION_IN_FIELD_OUT_DMAC_15_0, 2, offsetof(struct pedit_headers, eth.h_dest[4])}, + {MLX5_ACTION_IN_FIELD_OUT_SMAC_47_16, 4, offsetof(struct pedit_headers, eth.h_source[0])}, + {MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0, 2, offsetof(struct pedit_headers, eth.h_source[4])}, + {MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE, 2, offsetof(struct pedit_headers, eth.h_proto)}, + + {MLX5_ACTION_IN_FIELD_OUT_IP_DSCP, 1, offsetof(struct pedit_headers, ip4.tos)}, + {MLX5_ACTION_IN_FIELD_OUT_IP_TTL, 1, offsetof(struct pedit_headers, ip4.ttl)}, + {MLX5_ACTION_IN_FIELD_OUT_SIPV4, 4, offsetof(struct pedit_headers, ip4.saddr)}, + {MLX5_ACTION_IN_FIELD_OUT_DIPV4, 4, offsetof(struct pedit_headers, ip4.daddr)}, + + {MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96, 4, offsetof(struct pedit_headers, ip6.saddr.s6_addr32[0])}, + {MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64, 4, offsetof(struct pedit_headers, ip6.saddr.s6_addr32[1])}, + {MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32, 4, offsetof(struct pedit_headers, ip6.saddr.s6_addr32[2])}, + {MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0, 4, offsetof(struct pedit_headers, ip6.saddr.s6_addr32[3])}, + {MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96, 4, offsetof(struct pedit_headers, ip6.daddr.s6_addr32[0])}, + {MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64, 4, offsetof(struct pedit_headers, ip6.daddr.s6_addr32[1])}, + {MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32, 4, offsetof(struct pedit_headers, ip6.daddr.s6_addr32[2])}, + {MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0, 4, offsetof(struct pedit_headers, ip6.daddr.s6_addr32[3])}, + + {MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT, 2, offsetof(struct pedit_headers, tcp.source)}, + {MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT, 2, offsetof(struct pedit_headers, tcp.dest)}, + {MLX5_ACTION_IN_FIELD_OUT_TCP_FLAGS, 1, offsetof(struct pedit_headers, tcp.ack_seq) + 5}, + + {MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT, 2, offsetof(struct pedit_headers, udp.source)}, + {MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT, 2, offsetof(struct pedit_headers, udp.dest)}, +}; + +/* On input attr->num_mod_hdr_actions tells how many HW actions can be parsed at + * max from the SW pedit action. On success, it says how many HW actions were + * actually parsed. + */ +static int offload_pedit_fields(struct pedit_headers *masks, + struct pedit_headers *vals, + struct mlx5e_tc_flow_parse_attr *parse_attr) +{ + struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals; + int i, action_size, nactions, max_actions, first, last; + void *s_masks_p, *a_masks_p, *vals_p; + u32 s_mask, a_mask, val; + struct mlx5_fields *f; + u8 cmd, field_bsize; + unsigned long mask; + void *action; + + set_masks = &masks[TCA_PEDIT_KEY_EX_CMD_SET]; + add_masks = &masks[TCA_PEDIT_KEY_EX_CMD_ADD]; + set_vals = &vals[TCA_PEDIT_KEY_EX_CMD_SET]; + add_vals = &vals[TCA_PEDIT_KEY_EX_CMD_ADD]; + + action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); + action = parse_attr->mod_hdr_actions; + max_actions = parse_attr->num_mod_hdr_actions; + nactions = 0; + + for (i = 0; i < ARRAY_SIZE(fields); i++) { + f = &fields[i]; + /* avoid seeing bits set from previous iterations */ + s_mask = a_mask = mask = val = 0; + + s_masks_p = (void *)set_masks + f->offset; + a_masks_p = (void *)add_masks + f->offset; + + memcpy(&s_mask, s_masks_p, f->size); + memcpy(&a_mask, a_masks_p, f->size); + + if (!s_mask && !a_mask) /* nothing to offload here */ + continue; + + if (s_mask && a_mask) { + printk(KERN_WARNING "mlx5: can't set and add to the same HW field (%x)\n", f->field); + return -EOPNOTSUPP; + } + + if (nactions == max_actions) { + printk(KERN_WARNING "mlx5: parsed %d pedit actions, can't do more\n", nactions); + return -EOPNOTSUPP; + } + + if (s_mask) { + cmd = MLX5_ACTION_TYPE_SET; + mask = s_mask; + vals_p = (void *)set_vals + f->offset; + /* clear to denote we consumed this field */ + memset(s_masks_p, 0, f->size); + } else { + cmd = MLX5_ACTION_TYPE_ADD; + mask = a_mask; + vals_p = (void *)add_vals + f->offset; + /* clear to denote we consumed this field */ + memset(a_masks_p, 0, f->size); + } + + memcpy(&val, vals_p, f->size); + + field_bsize = f->size * BITS_PER_BYTE; + first = find_first_bit(&mask, field_bsize); + last = find_last_bit(&mask, field_bsize); + if (first > 0 || last != (field_bsize - 1)) { + printk(KERN_WARNING "mlx5: partial rewrite (mask %lx) is currently not offloaded\n", + mask); + return -EOPNOTSUPP; + } + + MLX5_SET(set_action_in, action, action_type, cmd); + MLX5_SET(set_action_in, action, field, f->field); + + if (cmd == MLX5_ACTION_TYPE_SET) { + MLX5_SET(set_action_in, action, offset, 0); + /* length is num of bits to be written, zero means length of 32 */ + MLX5_SET(set_action_in, action, length, field_bsize); + } + + if (field_bsize == 32) + MLX5_SET(set_action_in, action, data, ntohl(val)); + else if (field_bsize == 16) + MLX5_SET(set_action_in, action, data, ntohs(val)); + else if (field_bsize == 8) + MLX5_SET(set_action_in, action, data, val); + + action += action_size; + nactions++; + } + + parse_attr->num_mod_hdr_actions = nactions; + return 0; +} + +static int alloc_mod_hdr_actions(struct mlx5e_priv *priv, + const struct tc_action *a, int namespace, + struct mlx5e_tc_flow_parse_attr *parse_attr) +{ + int nkeys, action_size, max_actions; + + nkeys = tcf_pedit_nkeys(a); + action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); + + if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */ + max_actions = MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, max_modify_header_actions); + else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */ + max_actions = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, max_modify_header_actions); + + /* can get up to crazingly 16 HW actions in 32 bits pedit SW key */ + max_actions = min(max_actions, nkeys * 16); + + parse_attr->mod_hdr_actions = kcalloc(max_actions, action_size, GFP_KERNEL); + if (!parse_attr->mod_hdr_actions) + return -ENOMEM; + + parse_attr->num_mod_hdr_actions = max_actions; + return 0; +} + +static const struct pedit_headers zero_masks = {}; + +static int parse_tc_pedit_action(struct mlx5e_priv *priv, + const struct tc_action *a, int namespace, + struct mlx5e_tc_flow_parse_attr *parse_attr) +{ + struct pedit_headers masks[__PEDIT_CMD_MAX], vals[__PEDIT_CMD_MAX], *cmd_masks; + int nkeys, i, err = -EOPNOTSUPP; + u32 mask, val, offset; + u8 cmd, htype; + + nkeys = tcf_pedit_nkeys(a); + + memset(masks, 0, sizeof(struct pedit_headers) * __PEDIT_CMD_MAX); + memset(vals, 0, sizeof(struct pedit_headers) * __PEDIT_CMD_MAX); + + for (i = 0; i < nkeys; i++) { + htype = tcf_pedit_htype(a, i); + cmd = tcf_pedit_cmd(a, i); + err = -EOPNOTSUPP; /* can't be all optimistic */ + + if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK) { + printk(KERN_WARNING "mlx5: legacy pedit isn't offloaded\n"); + goto out_err; + } + + if (cmd != TCA_PEDIT_KEY_EX_CMD_SET && cmd != TCA_PEDIT_KEY_EX_CMD_ADD) { + printk(KERN_WARNING "mlx5: pedit cmd %d isn't offloaded\n", cmd); + goto out_err; + } + + mask = tcf_pedit_mask(a, i); + val = tcf_pedit_val(a, i); + offset = tcf_pedit_offset(a, i); + + err = set_pedit_val(htype, ~mask, val, offset, &masks[cmd], &vals[cmd]); + if (err) + goto out_err; + } + + err = alloc_mod_hdr_actions(priv, a, namespace, parse_attr); + if (err) + goto out_err; + + err = offload_pedit_fields(masks, vals, parse_attr); + if (err < 0) + goto out_dealloc_parsed_actions; + + for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) { + cmd_masks = &masks[cmd]; + if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) { + printk(KERN_WARNING "mlx5: attempt to offload an unsupported field (cmd %d)\n", + cmd); + print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS, + 16, 1, cmd_masks, sizeof(zero_masks), true); + err = -EOPNOTSUPP; + goto out_dealloc_parsed_actions; + } + } + + return 0; + +out_dealloc_parsed_actions: + kfree(parse_attr->mod_hdr_actions); +out_err: + return err; +} + static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, - u32 *action, u32 *flow_tag) + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct mlx5e_tc_flow *flow) { + struct mlx5_nic_flow_attr *attr = flow->nic_attr; const struct tc_action *a; LIST_HEAD(actions); + int err; if (tc_no_actions(exts)) return -EINVAL; - *flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; - *action = 0; + attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; + attr->action = 0; tcf_exts_to_list(exts, &actions); list_for_each_entry(a, &actions, list) { /* Only support a single action per rule */ - if (*action) + if (attr->action) return -EINVAL; if (is_tcf_gact_shot(a)) { - *action |= MLX5_FLOW_CONTEXT_ACTION_DROP; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP; if (MLX5_CAP_FLOWTABLE(priv->mdev, flow_table_properties_nic_receive.flow_counter)) - *action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + continue; + } + + if (is_tcf_pedit(a)) { + err = parse_tc_pedit_action(priv, a, MLX5_FLOW_NAMESPACE_KERNEL, + parse_attr); + if (err) + return err; + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; continue; } @@ -686,8 +1158,8 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return -EINVAL; } - *flow_tag = mark; - *action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + attr->flow_tag = mark; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; continue; } @@ -853,16 +1325,17 @@ static void gen_vxlan_header_ipv6(struct net_device *out_dev, static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, struct net_device *mirred_dev, - struct mlx5_encap_entry *e, - struct net_device **out_dev) + struct mlx5e_encap_entry *e) { int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); int ipv4_encap_size = ETH_HLEN + sizeof(struct iphdr) + VXLAN_HLEN; struct ip_tunnel_key *tun_key = &e->tun_info.key; + struct net_device *out_dev; struct neighbour *n = NULL; struct flowi4 fl4 = {}; char *encap_header; int ttl, err; + u8 nud_state; if (max_encap_size < ipv4_encap_size) { mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", @@ -887,25 +1360,36 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, fl4.daddr = tun_key->u.ipv4.dst; fl4.saddr = tun_key->u.ipv4.src; - err = mlx5e_route_lookup_ipv4(priv, mirred_dev, out_dev, + err = mlx5e_route_lookup_ipv4(priv, mirred_dev, &out_dev, &fl4, &n, &ttl); if (err) goto out; - if (!(n->nud_state & NUD_VALID)) { - pr_warn("%s: can't offload, neighbour to %pI4 invalid\n", __func__, &fl4.daddr); - err = -EOPNOTSUPP; + /* used by mlx5e_detach_encap to lookup a neigh hash table + * entry in the neigh hash table when a user deletes a rule + */ + e->m_neigh.dev = n->dev; + e->m_neigh.family = n->ops->family; + memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len); + e->out_dev = out_dev; + + /* It's importent to add the neigh to the hash table before checking + * the neigh validity state. So if we'll get a notification, in case the + * neigh changes it's validity state, we would find the relevant neigh + * in the hash. + */ + err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e); + if (err) goto out; - } - e->n = n; - e->out_dev = *out_dev; - - neigh_ha_snapshot(e->h_dest, n, *out_dev); + read_lock_bh(&n->lock); + nud_state = n->nud_state; + ether_addr_copy(e->h_dest, n->ha); + read_unlock_bh(&n->lock); switch (e->tunnel_type) { case MLX5_HEADER_TYPE_VXLAN: - gen_vxlan_header_ipv4(*out_dev, encap_header, + gen_vxlan_header_ipv4(out_dev, encap_header, ipv4_encap_size, e->h_dest, ttl, fl4.daddr, fl4.saddr, tun_key->tp_dst, @@ -913,31 +1397,49 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, break; default: err = -EOPNOTSUPP; - goto out; + goto destroy_neigh_entry; + } + e->encap_size = ipv4_encap_size; + e->encap_header = encap_header; + + if (!(nud_state & NUD_VALID)) { + neigh_event_send(n, NULL); + neigh_release(n); + return -EAGAIN; } err = mlx5_encap_alloc(priv->mdev, e->tunnel_type, ipv4_encap_size, encap_header, &e->encap_id); + if (err) + goto destroy_neigh_entry; + + e->flags |= MLX5_ENCAP_ENTRY_VALID; + mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev)); + neigh_release(n); + return err; + +destroy_neigh_entry: + mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); out: - if (err && n) - neigh_release(n); kfree(encap_header); + if (n) + neigh_release(n); return err; } static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, struct net_device *mirred_dev, - struct mlx5_encap_entry *e, - struct net_device **out_dev) - + struct mlx5e_encap_entry *e) { int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); int ipv6_encap_size = ETH_HLEN + sizeof(struct ipv6hdr) + VXLAN_HLEN; struct ip_tunnel_key *tun_key = &e->tun_info.key; + struct net_device *out_dev; struct neighbour *n = NULL; struct flowi6 fl6 = {}; char *encap_header; int err, ttl = 0; + u8 nud_state; if (max_encap_size < ipv6_encap_size) { mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", @@ -963,25 +1465,36 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, fl6.daddr = tun_key->u.ipv6.dst; fl6.saddr = tun_key->u.ipv6.src; - err = mlx5e_route_lookup_ipv6(priv, mirred_dev, out_dev, + err = mlx5e_route_lookup_ipv6(priv, mirred_dev, &out_dev, &fl6, &n, &ttl); if (err) goto out; - if (!(n->nud_state & NUD_VALID)) { - pr_warn("%s: can't offload, neighbour to %pI6 invalid\n", __func__, &fl6.daddr); - err = -EOPNOTSUPP; + /* used by mlx5e_detach_encap to lookup a neigh hash table + * entry in the neigh hash table when a user deletes a rule + */ + e->m_neigh.dev = n->dev; + e->m_neigh.family = n->ops->family; + memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len); + e->out_dev = out_dev; + + /* It's importent to add the neigh to the hash table before checking + * the neigh validity state. So if we'll get a notification, in case the + * neigh changes it's validity state, we would find the relevant neigh + * in the hash. + */ + err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e); + if (err) goto out; - } - - e->n = n; - e->out_dev = *out_dev; - neigh_ha_snapshot(e->h_dest, n, *out_dev); + read_lock_bh(&n->lock); + nud_state = n->nud_state; + ether_addr_copy(e->h_dest, n->ha); + read_unlock_bh(&n->lock); switch (e->tunnel_type) { case MLX5_HEADER_TYPE_VXLAN: - gen_vxlan_header_ipv6(*out_dev, encap_header, + gen_vxlan_header_ipv6(out_dev, encap_header, ipv6_encap_size, e->h_dest, ttl, &fl6.daddr, &fl6.saddr, tun_key->tp_dst, @@ -989,31 +1502,51 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, break; default: err = -EOPNOTSUPP; - goto out; + goto destroy_neigh_entry; + } + + e->encap_size = ipv6_encap_size; + e->encap_header = encap_header; + + if (!(nud_state & NUD_VALID)) { + neigh_event_send(n, NULL); + neigh_release(n); + return -EAGAIN; } err = mlx5_encap_alloc(priv->mdev, e->tunnel_type, ipv6_encap_size, encap_header, &e->encap_id); + if (err) + goto destroy_neigh_entry; + + e->flags |= MLX5_ENCAP_ENTRY_VALID; + mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev)); + neigh_release(n); + return err; + +destroy_neigh_entry: + mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); out: - if (err && n) - neigh_release(n); kfree(encap_header); + if (n) + neigh_release(n); return err; } static int mlx5e_attach_encap(struct mlx5e_priv *priv, struct ip_tunnel_info *tun_info, struct net_device *mirred_dev, - struct mlx5_esw_flow_attr *attr) + struct net_device **encap_dev, + struct mlx5e_tc_flow *flow) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct net_device *up_dev = mlx5_eswitch_get_uplink_netdev(esw); - struct mlx5e_priv *up_priv = netdev_priv(up_dev); unsigned short family = ip_tunnel_info_af(tun_info); + struct mlx5e_priv *up_priv = netdev_priv(up_dev); + struct mlx5_esw_flow_attr *attr = flow->esw_attr; struct ip_tunnel_key *key = &tun_info->key; - struct mlx5_encap_entry *e; - struct net_device *out_dev; - int tunnel_type, err = -EOPNOTSUPP; + struct mlx5e_encap_entry *e; + int tunnel_type, err = 0; uintptr_t hash_key; bool found = false; @@ -1048,10 +1581,8 @@ vxlan_encap_offload_err: } } - if (found) { - attr->encap = e; - return 0; - } + if (found) + goto attach_flow; e = kzalloc(sizeof(*e), GFP_KERNEL); if (!e) @@ -1062,16 +1593,21 @@ vxlan_encap_offload_err: INIT_LIST_HEAD(&e->flows); if (family == AF_INET) - err = mlx5e_create_encap_header_ipv4(priv, mirred_dev, e, &out_dev); + err = mlx5e_create_encap_header_ipv4(priv, mirred_dev, e); else if (family == AF_INET6) - err = mlx5e_create_encap_header_ipv6(priv, mirred_dev, e, &out_dev); + err = mlx5e_create_encap_header_ipv6(priv, mirred_dev, e); - if (err) + if (err && err != -EAGAIN) goto out_err; - attr->encap = e; hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key); +attach_flow: + list_add(&flow->encap, &e->flows); + *encap_dev = e->out_dev; + if (e->flags & MLX5_ENCAP_ENTRY_VALID) + attr->encap_id = e->encap_id; + return err; out_err: @@ -1080,20 +1616,22 @@ out_err: } static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, + struct mlx5e_tc_flow_parse_attr *parse_attr, struct mlx5e_tc_flow *flow) { - struct mlx5_esw_flow_attr *attr = flow->attr; + struct mlx5_esw_flow_attr *attr = flow->esw_attr; + struct mlx5e_rep_priv *rpriv = priv->ppriv; struct ip_tunnel_info *info = NULL; const struct tc_action *a; LIST_HEAD(actions); bool encap = false; - int err; + int err = 0; if (tc_no_actions(exts)) return -EINVAL; memset(attr, 0, sizeof(*attr)); - attr->in_rep = priv->ppriv; + attr->in_rep = rpriv->rep; tcf_exts_to_list(exts, &actions); list_for_each_entry(a, &actions, list) { @@ -1103,9 +1641,19 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, continue; } + if (is_tcf_pedit(a)) { + err = parse_tc_pedit_action(priv, a, MLX5_FLOW_NAMESPACE_FDB, + parse_attr); + if (err) + return err; + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + continue; + } + if (is_tcf_mirred_egress_redirect(a)) { int ifindex = tcf_mirred_ifindex(a); - struct net_device *out_dev; + struct net_device *out_dev, *encap_dev = NULL; struct mlx5e_priv *out_priv; out_dev = __dev_get_by_index(dev_net(priv->netdev), ifindex); @@ -1115,18 +1663,20 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; out_priv = netdev_priv(out_dev); - attr->out_rep = out_priv->ppriv; + rpriv = out_priv->ppriv; + attr->out_rep = rpriv->rep; } else if (encap) { err = mlx5e_attach_encap(priv, info, - out_dev, attr); - if (err) + out_dev, &encap_dev, flow); + if (err && err != -EAGAIN) return err; - list_add(&flow->encap, &attr->encap->flows); attr->action |= MLX5_FLOW_CONTEXT_ACTION_ENCAP | MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; - out_priv = netdev_priv(attr->encap->out_dev); - attr->out_rep = out_priv->ppriv; + out_priv = netdev_priv(encap_dev); + rpriv = out_priv->ppriv; + attr->out_rep = rpriv->rep; + attr->parse_attr = parse_attr; } else { pr_err("devices %s %s not on same switch HW, can't offload forwarding\n", priv->netdev->name, out_dev->name); @@ -1166,28 +1716,30 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return -EINVAL; } - return 0; + return err; } int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, struct tc_cls_flower_offload *f) { + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5e_tc_table *tc = &priv->fs.tc; - int err, attr_size = 0; - u32 flow_tag, action; struct mlx5e_tc_flow *flow; - struct mlx5_flow_spec *spec; - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + int attr_size, err = 0; u8 flow_flags = 0; if (esw && esw->mode == SRIOV_OFFLOADS) { flow_flags = MLX5E_TC_FLOW_ESWITCH; attr_size = sizeof(struct mlx5_esw_flow_attr); + } else { + flow_flags = MLX5E_TC_FLOW_NIC; + attr_size = sizeof(struct mlx5_nic_flow_attr); } flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL); - spec = mlx5_vzalloc(sizeof(*spec)); - if (!spec || !flow) { + parse_attr = mlx5_vzalloc(sizeof(*parse_attr)); + if (!parse_attr || !flow) { err = -ENOMEM; goto err_free; } @@ -1195,42 +1747,54 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, flow->cookie = f->cookie; flow->flags = flow_flags; - err = parse_cls_flower(priv, flow, spec, f); + err = parse_cls_flower(priv, flow, &parse_attr->spec, f); if (err < 0) goto err_free; if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { - flow->attr = (struct mlx5_esw_flow_attr *)(flow + 1); - err = parse_tc_fdb_actions(priv, f->exts, flow); + err = parse_tc_fdb_actions(priv, f->exts, parse_attr, flow); if (err < 0) - goto err_free; - flow->rule = mlx5e_tc_add_fdb_flow(priv, spec, flow->attr); + goto err_handle_encap_flow; + flow->rule = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow); } else { - err = parse_tc_nic_actions(priv, f->exts, &action, &flow_tag); + err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow); if (err < 0) goto err_free; - flow->rule = mlx5e_tc_add_nic_flow(priv, spec, action, flow_tag); + flow->rule = mlx5e_tc_add_nic_flow(priv, parse_attr, flow); } if (IS_ERR(flow->rule)) { err = PTR_ERR(flow->rule); - goto err_del_rule; + goto err_free; } + flow->flags |= MLX5E_TC_FLOW_OFFLOADED; err = rhashtable_insert_fast(&tc->ht, &flow->node, tc->ht_params); if (err) goto err_del_rule; - goto out; + if (flow->flags & MLX5E_TC_FLOW_ESWITCH && + !(flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)) + kvfree(parse_attr); + return err; err_del_rule: mlx5e_tc_del_flow(priv, flow); +err_handle_encap_flow: + if (err == -EAGAIN) { + err = rhashtable_insert_fast(&tc->ht, &flow->node, + tc->ht_params); + if (err) + mlx5e_tc_del_flow(priv, flow); + else + return 0; + } + err_free: + kvfree(parse_attr); kfree(flow); -out: - kvfree(spec); return err; } @@ -1249,7 +1813,6 @@ int mlx5e_delete_flower(struct mlx5e_priv *priv, mlx5e_tc_del_flow(priv, flow); - kfree(flow); return 0; @@ -1272,6 +1835,9 @@ int mlx5e_stats_flower(struct mlx5e_priv *priv, if (!flow) return -EINVAL; + if (!(flow->flags & MLX5E_TC_FLOW_OFFLOADED)) + return 0; + counter = mlx5_flow_rule_counter(flow->rule); if (!counter) return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index 34bf903fc886..ecbe30d808ae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -46,6 +46,15 @@ int mlx5e_delete_flower(struct mlx5e_priv *priv, int mlx5e_stats_flower(struct mlx5e_priv *priv, struct tc_cls_flower_offload *f); +struct mlx5e_encap_entry; +void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e); +void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e); + +struct mlx5e_neigh_hash_entry; +void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe); + static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv) { return atomic_read(&priv->fs.tc.ht.nelems); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 57f5e2d7ebd1..ab3bb026ff9e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -33,34 +33,12 @@ #include <linux/tcp.h> #include <linux/if_vlan.h> #include "en.h" +#include "ipoib.h" #define MLX5E_SQ_NOPS_ROOM MLX5_SEND_WQE_MAX_WQEBBS #define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\ MLX5E_SQ_NOPS_ROOM) -void mlx5e_send_nop(struct mlx5e_sq *sq, bool notify_hw) -{ - struct mlx5_wq_cyc *wq = &sq->wq; - - u16 pi = sq->pc & wq->sz_m1; - struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); - - struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; - - memset(cseg, 0, sizeof(*cseg)); - - cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_NOP); - cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | 0x01); - - sq->pc++; - sq->stats.nop++; - - if (notify_hw) { - cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; - mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); - } -} - static inline void mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma) { @@ -76,25 +54,25 @@ static inline void mlx5e_tx_dma_unmap(struct device *pdev, } } -static inline void mlx5e_dma_push(struct mlx5e_sq *sq, +static inline void mlx5e_dma_push(struct mlx5e_txqsq *sq, dma_addr_t addr, u32 size, enum mlx5e_dma_map_type map_type) { u32 i = sq->dma_fifo_pc & sq->dma_fifo_mask; - sq->db.txq.dma_fifo[i].addr = addr; - sq->db.txq.dma_fifo[i].size = size; - sq->db.txq.dma_fifo[i].type = map_type; + sq->db.dma_fifo[i].addr = addr; + sq->db.dma_fifo[i].size = size; + sq->db.dma_fifo[i].type = map_type; sq->dma_fifo_pc++; } -static inline struct mlx5e_sq_dma *mlx5e_dma_get(struct mlx5e_sq *sq, u32 i) +static inline struct mlx5e_sq_dma *mlx5e_dma_get(struct mlx5e_txqsq *sq, u32 i) { - return &sq->db.txq.dma_fifo[i & sq->dma_fifo_mask]; + return &sq->db.dma_fifo[i & sq->dma_fifo_mask]; } -static void mlx5e_dma_unmap_wqe_err(struct mlx5e_sq *sq, u8 num_dma) +static void mlx5e_dma_unmap_wqe_err(struct mlx5e_txqsq *sq, u8 num_dma) { int i; @@ -111,6 +89,7 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, { struct mlx5e_priv *priv = netdev_priv(dev); int channel_ix = fallback(dev, skb); + u16 num_channels; int up = 0; if (!netdev_get_num_tc(dev)) @@ -122,11 +101,11 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, /* channel_ix can be larger than num_channels since * dev->num_real_tx_queues = num_channels * num_tc */ - if (channel_ix >= priv->params.num_channels) - channel_ix = reciprocal_scale(channel_ix, - priv->params.num_channels); + num_channels = priv->channels.params.num_channels; + if (channel_ix >= num_channels) + channel_ix = reciprocal_scale(channel_ix, num_channels); - return priv->channeltc_to_txq_map[channel_ix][up]; + return priv->channel_tc2txq[channel_ix][up]; } static inline int mlx5e_skb_l2_header_offset(struct sk_buff *skb) @@ -175,25 +154,6 @@ static inline unsigned int mlx5e_calc_min_inline(enum mlx5_inline_modes mode, } } -static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq, - struct sk_buff *skb, bool bf) -{ - /* Some NIC TX decisions, e.g loopback, are based on the packet - * headers and occur before the data gather. - * Therefore these headers must be copied into the WQE - */ - if (bf) { - u16 ihs = skb_headlen(skb); - - if (skb_vlan_tag_present(skb)) - ihs += VLAN_HLEN; - - if (ihs <= sq->max_inline) - return skb_headlen(skb); - } - return mlx5e_calc_min_inline(sq->min_inline_mode, skb); -} - static inline void mlx5e_tx_skb_pull_inline(unsigned char **skb_data, unsigned int *skb_len, unsigned int len) @@ -218,31 +178,9 @@ static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs, mlx5e_tx_skb_pull_inline(skb_data, skb_len, cpy2_sz); } -static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) +static inline void +mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg) { - struct mlx5_wq_cyc *wq = &sq->wq; - - u16 pi = sq->pc & wq->sz_m1; - struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); - struct mlx5e_tx_wqe_info *wi = &sq->db.txq.wqe_info[pi]; - - struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; - struct mlx5_wqe_eth_seg *eseg = &wqe->eth; - struct mlx5_wqe_data_seg *dseg; - - unsigned char *skb_data = skb->data; - unsigned int skb_len = skb->len; - u8 opcode = MLX5_OPCODE_SEND; - dma_addr_t dma_addr = 0; - unsigned int num_bytes; - bool bf = false; - u16 headlen; - u16 ds_cnt; - u16 ihs; - int i; - - memset(wqe, 0, sizeof(*wqe)); - if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM; if (skb->encapsulation) { @@ -254,74 +192,51 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) } } else sq->stats.csum_none++; +} - if (sq->cc != sq->prev_cc) { - sq->prev_cc = sq->cc; - sq->bf_budget = (sq->cc == sq->pc) ? MLX5E_SQ_BF_BUDGET : 0; - } - - if (skb_is_gso(skb)) { - eseg->mss = cpu_to_be16(skb_shinfo(skb)->gso_size); - opcode = MLX5_OPCODE_LSO; +static inline u16 +mlx5e_txwqe_build_eseg_gso(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5_wqe_eth_seg *eseg, unsigned int *num_bytes) +{ + u16 ihs; - if (skb->encapsulation) { - ihs = skb_inner_transport_offset(skb) + inner_tcp_hdrlen(skb); - sq->stats.tso_inner_packets++; - sq->stats.tso_inner_bytes += skb->len - ihs; - } else { - ihs = skb_transport_offset(skb) + tcp_hdrlen(skb); - sq->stats.tso_packets++; - sq->stats.tso_bytes += skb->len - ihs; - } + eseg->mss = cpu_to_be16(skb_shinfo(skb)->gso_size); - sq->stats.packets += skb_shinfo(skb)->gso_segs; - num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs; + if (skb->encapsulation) { + ihs = skb_inner_transport_offset(skb) + inner_tcp_hdrlen(skb); + sq->stats.tso_inner_packets++; + sq->stats.tso_inner_bytes += skb->len - ihs; } else { - bf = sq->bf_budget && - !skb->xmit_more && - !skb_shinfo(skb)->nr_frags; - ihs = mlx5e_get_inline_hdr_size(sq, skb, bf); - sq->stats.packets++; - num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN); - } - - sq->stats.bytes += num_bytes; - wi->num_bytes = num_bytes; - - ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; - if (ihs) { - if (skb_vlan_tag_present(skb)) { - mlx5e_insert_vlan(eseg->inline_hdr.start, skb, ihs, &skb_data, &skb_len); - ihs += VLAN_HLEN; - } else { - memcpy(eseg->inline_hdr.start, skb_data, ihs); - mlx5e_tx_skb_pull_inline(&skb_data, &skb_len, ihs); - } - eseg->inline_hdr.sz = cpu_to_be16(ihs); - ds_cnt += DIV_ROUND_UP(ihs - sizeof(eseg->inline_hdr.start), MLX5_SEND_WQE_DS); - } else if (skb_vlan_tag_present(skb)) { - eseg->insert.type = cpu_to_be16(MLX5_ETH_WQE_INSERT_VLAN); - eseg->insert.vlan_tci = cpu_to_be16(skb_vlan_tag_get(skb)); + ihs = skb_transport_offset(skb) + tcp_hdrlen(skb); + sq->stats.tso_packets++; + sq->stats.tso_bytes += skb->len - ihs; } - dseg = (struct mlx5_wqe_data_seg *)cseg + ds_cnt; + *num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs; + return ihs; +} - wi->num_dma = 0; +static inline int +mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb, + unsigned char *skb_data, u16 headlen, + struct mlx5_wqe_data_seg *dseg) +{ + dma_addr_t dma_addr = 0; + u8 num_dma = 0; + int i; - headlen = skb_len - skb->data_len; if (headlen) { dma_addr = dma_map_single(sq->pdev, skb_data, headlen, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(sq->pdev, dma_addr))) - goto dma_unmap_wqe_err; + return -ENOMEM; dseg->addr = cpu_to_be64(dma_addr); dseg->lkey = sq->mkey_be; dseg->byte_count = cpu_to_be32(headlen); mlx5e_dma_push(sq, dma_addr, headlen, MLX5E_DMA_MAP_SINGLE); - wi->num_dma++; - + num_dma++; dseg++; } @@ -330,59 +245,120 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) int fsz = skb_frag_size(frag); dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz, - DMA_TO_DEVICE); + DMA_TO_DEVICE); if (unlikely(dma_mapping_error(sq->pdev, dma_addr))) - goto dma_unmap_wqe_err; + return -ENOMEM; dseg->addr = cpu_to_be64(dma_addr); dseg->lkey = sq->mkey_be; dseg->byte_count = cpu_to_be32(fsz); mlx5e_dma_push(sq, dma_addr, fsz, MLX5E_DMA_MAP_PAGE); - wi->num_dma++; - + num_dma++; dseg++; } - ds_cnt += wi->num_dma; - - cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); - cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); + return num_dma; +} - sq->db.txq.skb[pi] = skb; +static inline void +mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, + u8 opcode, u16 ds_cnt, u32 num_bytes, u8 num_dma, + struct mlx5e_tx_wqe_info *wi, struct mlx5_wqe_ctrl_seg *cseg) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + u16 pi; + wi->num_bytes = num_bytes; + wi->num_dma = num_dma; wi->num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); - sq->pc += wi->num_wqebbs; + wi->skb = skb; + + cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); - netdev_tx_sent_queue(sq->txq, wi->num_bytes); + netdev_tx_sent_queue(sq->txq, num_bytes); if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; - if (unlikely(!mlx5e_sq_has_room_for(sq, MLX5E_SQ_STOP_ROOM))) { + sq->pc += wi->num_wqebbs; + if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, MLX5E_SQ_STOP_ROOM))) { netif_tx_stop_queue(sq->txq); sq->stats.stopped++; } - sq->stats.xmit_more += skb->xmit_more; - if (!skb->xmit_more || netif_xmit_stopped(sq->txq)) { - int bf_sz = 0; + if (!skb->xmit_more || netif_xmit_stopped(sq->txq)) + mlx5e_notify_hw(wq, sq->pc, sq->uar_map, cseg); - if (bf && test_bit(MLX5E_SQ_STATE_BF_ENABLE, &sq->state)) - bf_sz = wi->num_wqebbs << 3; + /* fill sq edge with nops to avoid wqe wrap around */ + while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) { + sq->db.wqe_info[pi].skb = NULL; + mlx5e_post_nop(wq, sq->sqn, &sq->pc); + sq->stats.nop++; + } +} - cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; - mlx5e_tx_notify_hw(sq, &wqe->ctrl, bf_sz); +static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + + u16 pi = sq->pc & wq->sz_m1; + struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); + struct mlx5e_tx_wqe_info *wi = &sq->db.wqe_info[pi]; + + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + struct mlx5_wqe_eth_seg *eseg = &wqe->eth; + + unsigned char *skb_data = skb->data; + unsigned int skb_len = skb->len; + u8 opcode = MLX5_OPCODE_SEND; + unsigned int num_bytes; + int num_dma; + u16 headlen; + u16 ds_cnt; + u16 ihs; + + memset(wqe, 0, sizeof(*wqe)); + + mlx5e_txwqe_build_eseg_csum(sq, skb, eseg); + + if (skb_is_gso(skb)) { + opcode = MLX5_OPCODE_LSO; + ihs = mlx5e_txwqe_build_eseg_gso(sq, skb, eseg, &num_bytes); + sq->stats.packets += skb_shinfo(skb)->gso_segs; + } else { + ihs = mlx5e_calc_min_inline(sq->min_inline_mode, skb); + num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN); + sq->stats.packets++; } + sq->stats.bytes += num_bytes; + sq->stats.xmit_more += skb->xmit_more; - /* fill sq edge with nops to avoid wqe wrap around */ - while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) { - sq->db.txq.skb[pi] = NULL; - mlx5e_send_nop(sq, false); + ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; + if (ihs) { + if (skb_vlan_tag_present(skb)) { + mlx5e_insert_vlan(eseg->inline_hdr.start, skb, ihs, &skb_data, &skb_len); + ihs += VLAN_HLEN; + } else { + memcpy(eseg->inline_hdr.start, skb_data, ihs); + mlx5e_tx_skb_pull_inline(&skb_data, &skb_len, ihs); + } + eseg->inline_hdr.sz = cpu_to_be16(ihs); + ds_cnt += DIV_ROUND_UP(ihs - sizeof(eseg->inline_hdr.start), MLX5_SEND_WQE_DS); + } else if (skb_vlan_tag_present(skb)) { + eseg->insert.type = cpu_to_be16(MLX5_ETH_WQE_INSERT_VLAN); + eseg->insert.vlan_tci = cpu_to_be16(skb_vlan_tag_get(skb)); } - if (bf) - sq->bf_budget--; + headlen = skb_len - skb->data_len; + num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb_data, headlen, + (struct mlx5_wqe_data_seg *)cseg + ds_cnt); + if (unlikely(num_dma < 0)) + goto dma_unmap_wqe_err; + + mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt + num_dma, + num_bytes, num_dma, wi, cseg); return NETDEV_TX_OK; @@ -398,21 +374,21 @@ dma_unmap_wqe_err: netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5e_sq *sq = priv->txq_to_sq_map[skb_get_queue_mapping(skb)]; + struct mlx5e_txqsq *sq = priv->txq2sq[skb_get_queue_mapping(skb)]; return mlx5e_sq_xmit(sq, skb); } bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) { - struct mlx5e_sq *sq; + struct mlx5e_txqsq *sq; u32 dma_fifo_cc; u32 nbytes; u16 npkts; u16 sqcc; int i; - sq = container_of(cq, struct mlx5e_sq, cq); + sq = container_of(cq, struct mlx5e_txqsq, cq); if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) return false; @@ -450,8 +426,8 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) last_wqe = (sqcc == wqe_counter); ci = sqcc & sq->wq.sz_m1; - skb = sq->db.txq.skb[ci]; - wi = &sq->db.txq.wqe_info[ci]; + wi = &sq->db.wqe_info[ci]; + skb = wi->skb; if (unlikely(!skb)) { /* nop */ sqcc++; @@ -492,7 +468,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) netdev_tx_completed_queue(sq->txq, npkts, nbytes); if (netif_tx_queue_stopped(sq->txq) && - mlx5e_sq_has_room_for(sq, MLX5E_SQ_STOP_ROOM)) { + mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, MLX5E_SQ_STOP_ROOM)) { netif_tx_wake_queue(sq->txq); sq->stats.wake++; } @@ -500,7 +476,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) return (i == MLX5E_TX_CQ_POLL_BUDGET); } -static void mlx5e_free_txq_sq_descs(struct mlx5e_sq *sq) +void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq) { struct mlx5e_tx_wqe_info *wi; struct sk_buff *skb; @@ -509,8 +485,8 @@ static void mlx5e_free_txq_sq_descs(struct mlx5e_sq *sq) while (sq->cc != sq->pc) { ci = sq->cc & sq->wq.sz_m1; - skb = sq->db.txq.skb[ci]; - wi = &sq->db.txq.wqe_info[ci]; + wi = &sq->db.wqe_info[ci]; + skb = wi->skb; if (!skb) { /* nop */ sq->cc++; @@ -529,36 +505,89 @@ static void mlx5e_free_txq_sq_descs(struct mlx5e_sq *sq) } } -static void mlx5e_free_xdp_sq_descs(struct mlx5e_sq *sq) +#ifdef CONFIG_MLX5_CORE_IPOIB + +struct mlx5_wqe_eth_pad { + u8 rsvd0[16]; +}; + +struct mlx5i_tx_wqe { + struct mlx5_wqe_ctrl_seg ctrl; + struct mlx5_wqe_datagram_seg datagram; + struct mlx5_wqe_eth_pad pad; + struct mlx5_wqe_eth_seg eth; +}; + +static inline void +mlx5i_txwqe_build_datagram(struct mlx5_av *av, u32 dqpn, u32 dqkey, + struct mlx5_wqe_datagram_seg *dseg) { - struct mlx5e_sq_wqe_info *wi; - struct mlx5e_dma_info *di; - u16 ci; + memcpy(&dseg->av, av, sizeof(struct mlx5_av)); + dseg->av.dqp_dct = cpu_to_be32(dqpn | MLX5_EXTENDED_UD_AV); + dseg->av.key.qkey.qkey = cpu_to_be32(dqkey); +} - while (sq->cc != sq->pc) { - ci = sq->cc & sq->wq.sz_m1; - di = &sq->db.xdp.di[ci]; - wi = &sq->db.xdp.wqe_info[ci]; +netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5_av *av, u32 dqpn, u32 dqkey) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + u16 pi = sq->pc & wq->sz_m1; + struct mlx5i_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); + struct mlx5e_tx_wqe_info *wi = &sq->db.wqe_info[pi]; - if (wi->opcode == MLX5_OPCODE_NOP) { - sq->cc++; - continue; - } + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + struct mlx5_wqe_datagram_seg *datagram = &wqe->datagram; + struct mlx5_wqe_eth_seg *eseg = &wqe->eth; - sq->cc += wi->num_wqebbs; + unsigned char *skb_data = skb->data; + unsigned int skb_len = skb->len; + u8 opcode = MLX5_OPCODE_SEND; + unsigned int num_bytes; + int num_dma; + u16 headlen; + u16 ds_cnt; + u16 ihs; - mlx5e_page_release(&sq->channel->rq, di, false); + memset(wqe, 0, sizeof(*wqe)); + + mlx5i_txwqe_build_datagram(av, dqpn, dqkey, datagram); + + mlx5e_txwqe_build_eseg_csum(sq, skb, eseg); + + if (skb_is_gso(skb)) { + opcode = MLX5_OPCODE_LSO; + ihs = mlx5e_txwqe_build_eseg_gso(sq, skb, eseg, &num_bytes); + } else { + ihs = mlx5e_calc_min_inline(sq->min_inline_mode, skb); + num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN); } -} -void mlx5e_free_sq_descs(struct mlx5e_sq *sq) -{ - switch (sq->type) { - case MLX5E_SQ_TXQ: - mlx5e_free_txq_sq_descs(sq); - break; - case MLX5E_SQ_XDP: - mlx5e_free_xdp_sq_descs(sq); - break; + ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; + if (ihs) { + memcpy(eseg->inline_hdr.start, skb_data, ihs); + mlx5e_tx_skb_pull_inline(&skb_data, &skb_len, ihs); + eseg->inline_hdr.sz = cpu_to_be16(ihs); + ds_cnt += DIV_ROUND_UP(ihs - sizeof(eseg->inline_hdr.start), MLX5_SEND_WQE_DS); } + + headlen = skb_len - skb->data_len; + num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb_data, headlen, + (struct mlx5_wqe_data_seg *)cseg + ds_cnt); + if (unlikely(num_dma < 0)) + goto dma_unmap_wqe_err; + + mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt + num_dma, + num_bytes, num_dma, wi, cseg); + + return NETDEV_TX_OK; + +dma_unmap_wqe_err: + sq->stats.dropped++; + mlx5e_dma_unmap_wqe_err(sq, wi->num_dma); + + dev_kfree_skb_any(skb); + + return NETDEV_TX_OK; } + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index e5c12a732aa1..5ca6714e3e02 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -37,124 +37,69 @@ struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq) struct mlx5_cqwq *wq = &cq->wq; u32 ci = mlx5_cqwq_get_ci(wq); struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci); - int cqe_ownership_bit = cqe->op_own & MLX5_CQE_OWNER_MASK; - int sw_ownership_val = mlx5_cqwq_get_wrap_cnt(wq) & 1; + u8 cqe_ownership_bit = cqe->op_own & MLX5_CQE_OWNER_MASK; + u8 sw_ownership_val = mlx5_cqwq_get_wrap_cnt(wq) & 1; if (cqe_ownership_bit != sw_ownership_val) return NULL; /* ensure cqe content is read after cqe ownership bit */ - rmb(); + dma_rmb(); return cqe; } -static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) +static inline void mlx5e_poll_ico_single_cqe(struct mlx5e_cq *cq, + struct mlx5e_icosq *sq, + struct mlx5_cqe64 *cqe, + u16 *sqcc) { - struct mlx5e_sq *sq = container_of(cq, struct mlx5e_sq, cq); - struct mlx5_wq_cyc *wq; - struct mlx5_cqe64 *cqe; - u16 sqcc; - - if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) + struct mlx5_wq_cyc *wq = &sq->wq; + u16 ci = be16_to_cpu(cqe->wqe_counter) & wq->sz_m1; + struct mlx5e_sq_wqe_info *icowi = &sq->db.ico_wqe[ci]; + struct mlx5e_rq *rq = &sq->channel->rq; + + prefetch(rq); + mlx5_cqwq_pop(&cq->wq); + *sqcc += icowi->num_wqebbs; + + if (unlikely((cqe->op_own >> 4) != MLX5_CQE_REQ)) { + WARN_ONCE(true, "mlx5e: Bad OP in ICOSQ CQE: 0x%x\n", + cqe->op_own); return; + } - cqe = mlx5e_get_cqe(cq); - if (likely(!cqe)) + if (likely(icowi->opcode == MLX5_OPCODE_UMR)) { + mlx5e_post_rx_mpwqe(rq); return; + } - wq = &sq->wq; - - /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), - * otherwise a cq overrun may occur - */ - sqcc = sq->cc; - - do { - u16 ci = be16_to_cpu(cqe->wqe_counter) & wq->sz_m1; - struct mlx5e_sq_wqe_info *icowi = &sq->db.ico_wqe[ci]; - - mlx5_cqwq_pop(&cq->wq); - sqcc += icowi->num_wqebbs; - - if (unlikely((cqe->op_own >> 4) != MLX5_CQE_REQ)) { - WARN_ONCE(true, "mlx5e: Bad OP in ICOSQ CQE: 0x%x\n", - cqe->op_own); - break; - } - - switch (icowi->opcode) { - case MLX5_OPCODE_NOP: - break; - case MLX5_OPCODE_UMR: - mlx5e_post_rx_mpwqe(&sq->channel->rq); - break; - default: - WARN_ONCE(true, - "mlx5e: Bad OPCODE in ICOSQ WQE info: 0x%x\n", - icowi->opcode); - } - - } while ((cqe = mlx5e_get_cqe(cq))); - - mlx5_cqwq_update_db_record(&cq->wq); - - /* ensure cq space is freed before enabling more cqes */ - wmb(); - - sq->cc = sqcc; + if (unlikely(icowi->opcode != MLX5_OPCODE_NOP)) + WARN_ONCE(true, + "mlx5e: Bad OPCODE in ICOSQ WQE info: 0x%x\n", + icowi->opcode); } -static inline bool mlx5e_poll_xdp_tx_cq(struct mlx5e_cq *cq) +static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) { - struct mlx5e_sq *sq; + struct mlx5e_icosq *sq = container_of(cq, struct mlx5e_icosq, cq); + struct mlx5_cqe64 *cqe; u16 sqcc; - int i; - - sq = container_of(cq, struct mlx5e_sq, cq); if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) - return false; + return; + + cqe = mlx5e_get_cqe(cq); + if (likely(!cqe)) + return; /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), * otherwise a cq overrun may occur */ sqcc = sq->cc; - for (i = 0; i < MLX5E_TX_CQ_POLL_BUDGET; i++) { - struct mlx5_cqe64 *cqe; - u16 wqe_counter; - bool last_wqe; - - cqe = mlx5e_get_cqe(cq); - if (!cqe) - break; - - mlx5_cqwq_pop(&cq->wq); - - wqe_counter = be16_to_cpu(cqe->wqe_counter); - - do { - struct mlx5e_sq_wqe_info *wi; - struct mlx5e_dma_info *di; - u16 ci; - - last_wqe = (sqcc == wqe_counter); - - ci = sqcc & sq->wq.sz_m1; - di = &sq->db.xdp.di[ci]; - wi = &sq->db.xdp.wqe_info[ci]; - - if (unlikely(wi->opcode == MLX5_OPCODE_NOP)) { - sqcc++; - continue; - } - - sqcc += wi->num_wqebbs; - /* Recycle RX page */ - mlx5e_page_release(&sq->channel->rq, di, true); - } while (!last_wqe); - } + /* by design, there's only a single cqe */ + mlx5e_poll_ico_single_cqe(cq, sq, cqe, &sqcc); mlx5_cqwq_update_db_record(&cq->wq); @@ -162,7 +107,6 @@ static inline bool mlx5e_poll_xdp_tx_cq(struct mlx5e_cq *cq) wmb(); sq->cc = sqcc; - return (i == MLX5E_TX_CQ_POLL_BUDGET); } int mlx5e_napi_poll(struct napi_struct *napi, int budget) @@ -178,12 +122,12 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) for (i = 0; i < c->num_tc; i++) busy |= mlx5e_poll_tx_cq(&c->sq[i].cq, budget); + if (c->xdp) + busy |= mlx5e_poll_xdpsq_cq(&c->rq.xdpsq.cq); + work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget); busy |= work_done == budget; - if (c->xdp) - busy |= mlx5e_poll_xdp_tx_cq(&c->xdp_sq.cq); - mlx5e_poll_ico_cq(&c->icosq.cq); busy |= mlx5e_post_rx_wqes(&c->rq); @@ -224,8 +168,7 @@ void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event) { struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq); struct mlx5e_channel *c = cq->channel; - struct mlx5e_priv *priv = c->priv; - struct net_device *netdev = priv->netdev; + struct net_device *netdev = c->netdev; netdev_err(netdev, "%s: cqn=0x%.6x event=0x%.2x\n", __func__, mcq->cqn, event); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index fcd5bc7e31db..2e34d95ea776 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -53,13 +53,6 @@ struct esw_uc_addr { u32 vport; }; -/* E-Switch MC FDB table hash node */ -struct esw_mc_addr { /* SRIOV only */ - struct l2addr_node node; - struct mlx5_flow_handle *uplink_rule; /* Forward to uplink rule */ - u32 refcnt; -}; - /* Vport UC/MC hash node */ struct vport_addr { struct l2addr_node node; @@ -337,6 +330,7 @@ esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u32 vport) static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw, int nvports) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_core_dev *dev = esw->dev; struct mlx5_flow_namespace *root_ns; struct mlx5_flow_table *fdb; @@ -362,7 +356,9 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw, int nvports) memset(flow_group_in, 0, inlen); table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); - fdb = mlx5_create_flow_table(root_ns, 0, table_size, 0, 0); + + ft_attr.max_fte = table_size; + fdb = mlx5_create_flow_table(root_ns, &ft_attr); if (IS_ERR(fdb)) { err = PTR_ERR(fdb); esw_warn(dev, "Failed to create FDB Table err %d\n", err); @@ -814,7 +810,7 @@ static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, u32 vport_num) static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num, bool promisc, bool mc_promisc) { - struct esw_mc_addr *allmulti_addr = esw->mc_promisc; + struct esw_mc_addr *allmulti_addr = &esw->mc_promisc; struct mlx5_vport *vport = &esw->vports[vport_num]; if (IS_ERR_OR_NULL(vport->allmulti_rule) != mc_promisc) @@ -1685,7 +1681,7 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) esw_info(esw->dev, "disable SRIOV: active vports(%d) mode(%d)\n", esw->enabled_vports, esw->mode); - mc_promisc = esw->mc_promisc; + mc_promisc = &esw->mc_promisc; nvports = esw->enabled_vports; for (i = 0; i < esw->total_vports; i++) @@ -1729,7 +1725,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) { int l2_table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table); int total_vports = MLX5_TOTAL_VPORTS(dev); - struct esw_mc_addr *mc_promisc; struct mlx5_eswitch *esw; int vport_num; int err; @@ -1758,13 +1753,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) } esw->l2_table.size = l2_table_size; - mc_promisc = kzalloc(sizeof(*mc_promisc), GFP_KERNEL); - if (!mc_promisc) { - err = -ENOMEM; - goto abort; - } - esw->mc_promisc = mc_promisc; - esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq"); if (!esw->work_queue) { err = -ENOMEM; @@ -1803,6 +1791,11 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) esw->enabled_vports = 0; esw->mode = SRIOV_NONE; esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE; + if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) && + MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap)) + esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC; + else + esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE; dev->priv.eswitch = esw; return 0; @@ -1827,7 +1820,6 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) esw->dev->priv.eswitch = NULL; destroy_workqueue(esw->work_queue); kfree(esw->l2_table.bitmap); - kfree(esw->mc_promisc); kfree(esw->offloads.vport_reps); kfree(esw->vports); kfree(esw); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index ad329b1680b4..b746f62c8c79 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -36,7 +36,6 @@ #include <linux/if_ether.h> #include <linux/if_link.h> #include <net/devlink.h> -#include <net/ip_tunnels.h> #include <linux/mlx5/device.h> #define MLX5_MAX_UC_PER_VPORT(dev) \ @@ -210,6 +209,14 @@ struct mlx5_esw_offload { DECLARE_HASHTABLE(encap_tbl, 8); u8 inline_mode; u64 num_flows; + u8 encap; +}; + +/* E-Switch MC FDB table hash node */ +struct esw_mc_addr { /* SRIOV only */ + struct l2addr_node node; + struct mlx5_flow_handle *uplink_rule; /* Forward to uplink rule */ + u32 refcnt; }; struct mlx5_eswitch { @@ -225,7 +232,7 @@ struct mlx5_eswitch { * and async SRIOV admin state changes */ struct mutex state_lock; - struct esw_mc_addr *mc_promisc; + struct esw_mc_addr mc_promisc; struct { bool enabled; @@ -285,20 +292,8 @@ enum { SET_VLAN_INSERT = BIT(1) }; -#define MLX5_FLOW_CONTEXT_ACTION_VLAN_POP 0x40 -#define MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH 0x80 - -struct mlx5_encap_entry { - struct hlist_node encap_hlist; - struct list_head flows; - u32 encap_id; - struct neighbour *n; - struct ip_tunnel_info tun_info; - unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ - - struct net_device *out_dev; - int tunnel_type; -}; +#define MLX5_FLOW_CONTEXT_ACTION_VLAN_POP 0x4000 +#define MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH 0x8000 struct mlx5_esw_flow_attr { struct mlx5_eswitch_rep *in_rep; @@ -307,7 +302,9 @@ struct mlx5_esw_flow_attr { int action; u16 vlan; bool vlan_handled; - struct mlx5_encap_entry *encap; + u32 encap_id; + u32 mod_hdr_id; + struct mlx5e_tc_flow_parse_attr *parse_attr; }; int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw, @@ -321,6 +318,8 @@ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode); int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode); int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode); int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode); +int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap); +int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap); void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, int vport_index, struct mlx5_eswitch_rep *rep); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index d111cebca9f1..f991f669047e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -68,8 +68,10 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, } if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { counter = mlx5_fc_create(esw->dev, true); - if (IS_ERR(counter)) - return ERR_CAST(counter); + if (IS_ERR(counter)) { + rule = ERR_CAST(counter); + goto err_counter_alloc; + } dest[i].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; dest[i].counter = counter; i++; @@ -86,17 +88,25 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS; - if (attr->encap) - flow_act.encap_id = attr->encap->encap_id; + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + flow_act.modify_id = attr->mod_hdr_id; + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) + flow_act.encap_id = attr->encap_id; rule = mlx5_add_flow_rules((struct mlx5_flow_table *)esw->fdb_table.fdb, spec, &flow_act, dest, i); if (IS_ERR(rule)) - mlx5_fc_destroy(esw->dev, counter); + goto err_add_rule; else esw->offloads.num_flows++; return rule; + +err_add_rule: + mlx5_fc_destroy(esw->dev, counter); +err_counter_alloc: + return rule; } void @@ -106,12 +116,10 @@ mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw, { struct mlx5_fc *counter = NULL; - if (!IS_ERR(rule)) { - counter = mlx5_flow_rule_counter(rule); - mlx5_del_flow_rules(rule); - mlx5_fc_destroy(esw->dev, counter); - esw->offloads.num_flows--; - } + counter = mlx5_flow_rule_counter(rule); + mlx5_del_flow_rules(rule); + mlx5_fc_destroy(esw->dev, counter); + esw->offloads.num_flows--; } static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val) @@ -418,30 +426,21 @@ out: return err; } -#define MAX_PF_SQ 256 #define ESW_OFFLOADS_NUM_GROUPS 4 -static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) +static int esw_create_offloads_fast_fdb_table(struct mlx5_eswitch *esw) { - int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); - int table_size, ix, esw_size, err = 0; struct mlx5_core_dev *dev = esw->dev; struct mlx5_flow_namespace *root_ns; struct mlx5_flow_table *fdb = NULL; - struct mlx5_flow_group *g; - u32 *flow_group_in; - void *match_criteria; + int esw_size, err = 0; u32 flags = 0; - flow_group_in = mlx5_vzalloc(inlen); - if (!flow_group_in) - return -ENOMEM; - root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); if (!root_ns) { esw_warn(dev, "Failed to get FDB flow namespace\n"); err = -EOPNOTSUPP; - goto ns_err; + goto out; } esw_debug(dev, "Create offloads FDB table, min (max esw size(2^%d), max counters(%d)*groups(%d))\n", @@ -451,8 +450,7 @@ static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) esw_size = min_t(int, MLX5_CAP_GEN(dev, max_flow_counter) * ESW_OFFLOADS_NUM_GROUPS, 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); - if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) && - MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap)) + if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) flags |= MLX5_FLOW_TABLE_TUNNEL_EN; fdb = mlx5_create_auto_grouped_flow_table(root_ns, FDB_FAST_PATH, @@ -462,12 +460,55 @@ static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) if (IS_ERR(fdb)) { err = PTR_ERR(fdb); esw_warn(dev, "Failed to create Fast path FDB Table err %d\n", err); - goto fast_fdb_err; + goto out; } esw->fdb_table.fdb = fdb; +out: + return err; +} + +static void esw_destroy_offloads_fast_fdb_table(struct mlx5_eswitch *esw) +{ + mlx5_destroy_flow_table(esw->fdb_table.fdb); +} + +#define MAX_PF_SQ 256 + +static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *root_ns; + struct mlx5_flow_table *fdb = NULL; + int table_size, ix, err = 0; + struct mlx5_flow_group *g; + void *match_criteria; + u32 *flow_group_in; + + esw_debug(esw->dev, "Create offloads FDB Tables\n"); + flow_group_in = mlx5_vzalloc(inlen); + if (!flow_group_in) + return -ENOMEM; + + root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); + if (!root_ns) { + esw_warn(dev, "Failed to get FDB flow namespace\n"); + err = -EOPNOTSUPP; + goto ns_err; + } + + err = esw_create_offloads_fast_fdb_table(esw); + if (err) + goto fast_fdb_err; + table_size = nvports + MAX_PF_SQ + 1; - fdb = mlx5_create_flow_table(root_ns, FDB_SLOW_PATH, table_size, 0, 0); + + ft_attr.max_fte = table_size; + ft_attr.prio = FDB_SLOW_PATH; + + fdb = mlx5_create_flow_table(root_ns, &ft_attr); if (IS_ERR(fdb)) { err = PTR_ERR(fdb); esw_warn(dev, "Failed to create slow path FDB Table err %d\n", err); @@ -532,25 +573,26 @@ ns_err: return err; } -static void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw) +static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw) { if (!esw->fdb_table.fdb) return; - esw_debug(esw->dev, "Destroy offloads FDB Table\n"); + esw_debug(esw->dev, "Destroy offloads FDB Tables\n"); mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule); mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); mlx5_destroy_flow_table(esw->fdb_table.offloads.fdb); - mlx5_destroy_flow_table(esw->fdb_table.fdb); + esw_destroy_offloads_fast_fdb_table(esw); } static int esw_create_offloads_table(struct mlx5_eswitch *esw) { - struct mlx5_flow_namespace *ns; - struct mlx5_flow_table *ft_offloads; + struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_table *ft_offloads; + struct mlx5_flow_namespace *ns; int err = 0; ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_OFFLOADS); @@ -559,7 +601,9 @@ static int esw_create_offloads_table(struct mlx5_eswitch *esw) return -EOPNOTSUPP; } - ft_offloads = mlx5_create_flow_table(ns, 0, dev->priv.sriov.num_vfs + 2, 0, 0); + ft_attr.max_fte = dev->priv.sriov.num_vfs + 2; + + ft_offloads = mlx5_create_flow_table(ns, &ft_attr); if (IS_ERR(ft_offloads)) { err = PTR_ERR(ft_offloads); esw_warn(esw->dev, "Failed to create offloads table, err %d\n", err); @@ -700,7 +744,7 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) mlx5_remove_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); mlx5_dev_list_unlock(); - err = esw_create_offloads_fdb_table(esw, nvports); + err = esw_create_offloads_fdb_tables(esw, nvports); if (err) goto create_fdb_err; @@ -737,7 +781,7 @@ create_fg_err: esw_destroy_offloads_table(esw); create_ft_err: - esw_destroy_offloads_fdb_table(esw); + esw_destroy_offloads_fdb_tables(esw); create_fdb_err: /* enable back PF RoCE */ @@ -783,7 +827,7 @@ void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports) esw_destroy_vport_rx_group(esw); esw_destroy_offloads_table(esw); - esw_destroy_offloads_fdb_table(esw); + esw_destroy_offloads_fdb_tables(esw); } static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode) @@ -1012,6 +1056,66 @@ out: return 0; } +int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_eswitch *esw = dev->priv.eswitch; + int err; + + if (!MLX5_CAP_GEN(dev, vport_group_manager)) + return -EOPNOTSUPP; + + if (esw->mode == SRIOV_NONE) + return -EOPNOTSUPP; + + if (encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE && + (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) || + !MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))) + return -EOPNOTSUPP; + + if (encap && encap != DEVLINK_ESWITCH_ENCAP_MODE_BASIC) + return -EOPNOTSUPP; + + if (esw->mode == SRIOV_LEGACY) { + esw->offloads.encap = encap; + return 0; + } + + if (esw->offloads.encap == encap) + return 0; + + if (esw->offloads.num_flows > 0) { + esw_warn(dev, "Can't set encapsulation when flows are configured\n"); + return -EOPNOTSUPP; + } + + esw_destroy_offloads_fast_fdb_table(esw); + + esw->offloads.encap = encap; + err = esw_create_offloads_fast_fdb_table(esw); + if (err) { + esw_warn(esw->dev, "Failed re-creating fast FDB table, err %d\n", err); + esw->offloads.encap = !encap; + (void) esw_create_offloads_fast_fdb_table(esw); + } + return err; +} + +int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_eswitch *esw = dev->priv.eswitch; + + if (!MLX5_CAP_GEN(dev, vport_group_manager)) + return -EOPNOTSUPP; + + if (esw->mode == SRIOV_NONE) + return -EOPNOTSUPP; + + *encap = esw->offloads.encap; + return 0; +} + void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, int vport_index, struct mlx5_eswitch_rep *__rep) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index b64a781c7e85..19e3d2fc2099 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -45,6 +45,10 @@ int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {0}; u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {0}; + if ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) && + ft->underlay_qpn == 0) + return 0; + MLX5_SET(set_flow_table_root_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ROOT); MLX5_SET(set_flow_table_root_in, in, table_type, ft->type); @@ -54,6 +58,10 @@ int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, MLX5_SET(set_flow_table_root_in, in, other_vport, 1); } + if ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) && + ft->underlay_qpn != 0) + MLX5_SET(set_flow_table_root_in, in, underlay_qpn, ft->underlay_qpn); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } @@ -249,6 +257,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, MLX5_SET(flow_context, in_flow_context, flow_tag, fte->flow_tag); MLX5_SET(flow_context, in_flow_context, action, fte->action); MLX5_SET(flow_context, in_flow_context, encap_id, fte->encap_id); + MLX5_SET(flow_context, in_flow_context, modify_header_id, fte->modify_id); in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context, match_value); memcpy(in_match_value, &fte->val, MLX5_ST_SZ_BYTES(fte_match_param)); @@ -515,3 +524,69 @@ void mlx5_encap_dealloc(struct mlx5_core_dev *dev, u32 encap_id) mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } + +int mlx5_modify_header_alloc(struct mlx5_core_dev *dev, + u8 namespace, u8 num_actions, + void *modify_actions, u32 *modify_header_id) +{ + u32 out[MLX5_ST_SZ_DW(alloc_modify_header_context_out)]; + int max_actions, actions_size, inlen, err; + void *actions_in; + u8 table_type; + u32 *in; + + switch (namespace) { + case MLX5_FLOW_NAMESPACE_FDB: + max_actions = MLX5_CAP_ESW_FLOWTABLE_FDB(dev, max_modify_header_actions); + table_type = FS_FT_FDB; + break; + case MLX5_FLOW_NAMESPACE_KERNEL: + max_actions = MLX5_CAP_FLOWTABLE_NIC_RX(dev, max_modify_header_actions); + table_type = FS_FT_NIC_RX; + break; + default: + return -EOPNOTSUPP; + } + + if (num_actions > max_actions) { + mlx5_core_warn(dev, "too many modify header actions %d, max supported %d\n", + num_actions, max_actions); + return -EOPNOTSUPP; + } + + actions_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto) * num_actions; + inlen = MLX5_ST_SZ_BYTES(alloc_modify_header_context_in) + actions_size; + + in = kzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(alloc_modify_header_context_in, in, opcode, + MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT); + MLX5_SET(alloc_modify_header_context_in, in, table_type, table_type); + MLX5_SET(alloc_modify_header_context_in, in, num_of_actions, num_actions); + + actions_in = MLX5_ADDR_OF(alloc_modify_header_context_in, in, actions); + memcpy(actions_in, modify_actions, actions_size); + + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + + *modify_header_id = MLX5_GET(alloc_modify_header_context_out, out, modify_header_id); + kfree(in); + return err; +} + +void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_modify_header_context_in)]; + u32 out[MLX5_ST_SZ_DW(dealloc_modify_header_context_out)]; + + memset(in, 0, sizeof(in)); + MLX5_SET(dealloc_modify_header_context_in, in, opcode, + MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT); + MLX5_SET(dealloc_modify_header_context_in, in, modify_header_id, + modify_header_id); + + mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index ded27bb9a3b6..b8a176503d38 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -476,6 +476,7 @@ static struct fs_fte *alloc_fte(struct mlx5_flow_act *flow_act, fte->index = index; fte->action = flow_act->action; fte->encap_id = flow_act->encap_id; + fte->modify_id = flow_act->modify_id; return fte; } @@ -777,18 +778,16 @@ static void list_add_flow_table(struct mlx5_flow_table *ft, } static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespace *ns, + struct mlx5_flow_table_attr *ft_attr, enum fs_flow_table_op_mod op_mod, - u16 vport, int prio, - int max_fte, u32 level, - u32 flags) + u16 vport) { + struct mlx5_flow_root_namespace *root = find_root(&ns->node); struct mlx5_flow_table *next_ft = NULL; + struct fs_prio *fs_prio = NULL; struct mlx5_flow_table *ft; - int err; int log_table_sz; - struct mlx5_flow_root_namespace *root = - find_root(&ns->node); - struct fs_prio *fs_prio = NULL; + int err; if (!root) { pr_err("mlx5: flow steering failed to find root of namespace\n"); @@ -796,29 +795,31 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa } mutex_lock(&root->chain_lock); - fs_prio = find_prio(ns, prio); + fs_prio = find_prio(ns, ft_attr->prio); if (!fs_prio) { err = -EINVAL; goto unlock_root; } - if (level >= fs_prio->num_levels) { + if (ft_attr->level >= fs_prio->num_levels) { err = -ENOSPC; goto unlock_root; } /* The level is related to the * priority level range. */ - level += fs_prio->start_level; - ft = alloc_flow_table(level, + ft_attr->level += fs_prio->start_level; + ft = alloc_flow_table(ft_attr->level, vport, - max_fte ? roundup_pow_of_two(max_fte) : 0, + ft_attr->max_fte ? roundup_pow_of_two(ft_attr->max_fte) : 0, root->table_type, - op_mod, flags); + op_mod, ft_attr->flags); if (!ft) { err = -ENOMEM; goto unlock_root; } + ft->underlay_qpn = ft_attr->underlay_qpn; + tree_init_node(&ft->node, 1, del_flow_table); log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0; next_ft = find_next_chained_ft(fs_prio); @@ -848,44 +849,56 @@ unlock_root: } struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns, - int prio, int max_fte, - u32 level, - u32 flags) + struct mlx5_flow_table_attr *ft_attr) { - return __mlx5_create_flow_table(ns, FS_FT_OP_MOD_NORMAL, 0, prio, - max_fte, level, flags); + return __mlx5_create_flow_table(ns, ft_attr, FS_FT_OP_MOD_NORMAL, 0); } struct mlx5_flow_table *mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns, int prio, int max_fte, u32 level, u16 vport) { - return __mlx5_create_flow_table(ns, FS_FT_OP_MOD_NORMAL, vport, prio, - max_fte, level, 0); + struct mlx5_flow_table_attr ft_attr = {}; + + ft_attr.max_fte = max_fte; + ft_attr.level = level; + ft_attr.prio = prio; + + return __mlx5_create_flow_table(ns, &ft_attr, FS_FT_OP_MOD_NORMAL, 0); } -struct mlx5_flow_table *mlx5_create_lag_demux_flow_table( - struct mlx5_flow_namespace *ns, - int prio, u32 level) +struct mlx5_flow_table* +mlx5_create_lag_demux_flow_table(struct mlx5_flow_namespace *ns, + int prio, u32 level) { - return __mlx5_create_flow_table(ns, FS_FT_OP_MOD_LAG_DEMUX, 0, prio, 0, - level, 0); + struct mlx5_flow_table_attr ft_attr = {}; + + ft_attr.level = level; + ft_attr.prio = prio; + return __mlx5_create_flow_table(ns, &ft_attr, FS_FT_OP_MOD_LAG_DEMUX, 0); } EXPORT_SYMBOL(mlx5_create_lag_demux_flow_table); -struct mlx5_flow_table *mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns, - int prio, - int num_flow_table_entries, - int max_num_groups, - u32 level, - u32 flags) +struct mlx5_flow_table* +mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns, + int prio, + int num_flow_table_entries, + int max_num_groups, + u32 level, + u32 flags) { + struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_flow_table *ft; if (max_num_groups > num_flow_table_entries) return ERR_PTR(-EINVAL); - ft = mlx5_create_flow_table(ns, prio, num_flow_table_entries, level, flags); + ft_attr.max_fte = num_flow_table_entries; + ft_attr.prio = prio; + ft_attr.level = level; + ft_attr.flags = flags; + + ft = mlx5_create_flow_table(ns, &ft_attr); if (IS_ERR(ft)) return ft; @@ -1827,12 +1840,18 @@ static void set_prio_attrs(struct mlx5_flow_root_namespace *root_ns) static int create_anchor_flow_table(struct mlx5_flow_steering *steering) { struct mlx5_flow_namespace *ns = NULL; + struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_flow_table *ft; ns = mlx5_get_flow_namespace(steering->dev, MLX5_FLOW_NAMESPACE_ANCHOR); if (WARN_ON(!ns)) return -EINVAL; - ft = mlx5_create_flow_table(ns, ANCHOR_PRIO, ANCHOR_SIZE, ANCHOR_LEVEL, 0); + + ft_attr.max_fte = ANCHOR_SIZE; + ft_attr.level = ANCHOR_LEVEL; + ft_attr.prio = ANCHOR_PRIO; + + ft = mlx5_create_flow_table(ns, &ft_attr); if (IS_ERR(ft)) { mlx5_core_err(steering->dev, "Failed to create last anchor flow table"); return PTR_ERR(ft); @@ -1886,9 +1905,6 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev) { struct mlx5_flow_steering *steering = dev->priv.steering; - if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) - return; - cleanup_root_ns(steering->root_ns); cleanup_root_ns(steering->esw_egress_root_ns); cleanup_root_ns(steering->esw_ingress_root_ns); @@ -1991,9 +2007,6 @@ int mlx5_init_fs(struct mlx5_core_dev *dev) struct mlx5_flow_steering *steering; int err = 0; - if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) - return 0; - err = mlx5_init_fc_stats(dev); if (err) return err; @@ -2004,7 +2017,10 @@ int mlx5_init_fs(struct mlx5_core_dev *dev) steering->dev = dev; dev->priv.steering = steering; - if (MLX5_CAP_GEN(dev, nic_flow_table) && + if ((((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && + (MLX5_CAP_GEN(dev, nic_flow_table))) || + ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) && + MLX5_CAP_GEN(dev, ipoib_enhanced_offloads))) && MLX5_CAP_FLOWTABLE_NIC_RX(dev, ft_support)) { err = init_root_ns(steering); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 8e668c63f69e..81eafc7b9dd9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -118,6 +118,7 @@ struct mlx5_flow_table { /* FWD rules that point on this flow table */ struct list_head fwd_rules; u32 flags; + u32 underlay_qpn; }; struct mlx5_fc_cache { @@ -152,6 +153,7 @@ struct fs_fte { u32 index; u32 action; u32 encap_id; + u32 modify_id; enum fs_fte_status status; struct mlx5_fc *counter; }; @@ -197,6 +199,11 @@ struct mlx5_flow_root_namespace { int mlx5_init_fc_stats(struct mlx5_core_dev *dev); void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev); +void mlx5_fc_queue_stats_work(struct mlx5_core_dev *dev, + struct delayed_work *dwork, + unsigned long delay); +void mlx5_fc_update_sampling_interval(struct mlx5_core_dev *dev, + unsigned long interval); int mlx5_init_fs(struct mlx5_core_dev *dev); void mlx5_cleanup_fs(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index 7431f633de31..6507d8acc54d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -165,7 +165,8 @@ static void mlx5_fc_stats_work(struct work_struct *work) list_splice_tail_init(&fc_stats->addlist, &tmplist); if (!list_empty(&tmplist) || !RB_EMPTY_ROOT(&fc_stats->counters)) - queue_delayed_work(fc_stats->wq, &fc_stats->work, MLX5_FC_STATS_PERIOD); + queue_delayed_work(fc_stats->wq, &fc_stats->work, + fc_stats->sampling_interval); spin_unlock(&fc_stats->addlist_lock); @@ -200,7 +201,7 @@ static void mlx5_fc_stats_work(struct work_struct *work) node = mlx5_fc_stats_query(dev, counter, last->id); } - fc_stats->next_query = now + MLX5_FC_STATS_PERIOD; + fc_stats->next_query = now + fc_stats->sampling_interval; } struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging) @@ -265,6 +266,7 @@ int mlx5_init_fc_stats(struct mlx5_core_dev *dev) if (!fc_stats->wq) return -ENOMEM; + fc_stats->sampling_interval = MLX5_FC_STATS_PERIOD; INIT_DELAYED_WORK(&fc_stats->work, mlx5_fc_stats_work); return 0; @@ -317,3 +319,21 @@ void mlx5_fc_query_cached(struct mlx5_fc *counter, counter->lastbytes = c.bytes; counter->lastpackets = c.packets; } + +void mlx5_fc_queue_stats_work(struct mlx5_core_dev *dev, + struct delayed_work *dwork, + unsigned long delay) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + + queue_delayed_work(fc_stats->wq, dwork, delay); +} + +void mlx5_fc_update_sampling_interval(struct mlx5_core_dev *dev, + unsigned long interval) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + + fc_stats->sampling_interval = min_t(unsigned long, interval, + fc_stats->sampling_interval); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index d0bbefa08af7..1bc14d0fded8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -137,7 +137,8 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) return err; } - if (MLX5_CAP_GEN(dev, nic_flow_table)) { + if (MLX5_CAP_GEN(dev, nic_flow_table) || + MLX5_CAP_GEN(dev, ipoib_enhanced_offloads)) { err = mlx5_core_get_caps(dev, MLX5_CAP_FLOW_TABLE); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c new file mode 100644 index 000000000000..3c84e36af018 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c @@ -0,0 +1,498 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/mlx5/fs.h> +#include "en.h" +#include "ipoib.h" + +#define IB_DEFAULT_Q_KEY 0xb1b + +static int mlx5i_open(struct net_device *netdev); +static int mlx5i_close(struct net_device *netdev); +static int mlx5i_dev_init(struct net_device *dev); +static void mlx5i_dev_cleanup(struct net_device *dev); + +static const struct net_device_ops mlx5i_netdev_ops = { + .ndo_open = mlx5i_open, + .ndo_stop = mlx5i_close, + .ndo_init = mlx5i_dev_init, + .ndo_uninit = mlx5i_dev_cleanup, +}; + +/* IPoIB mlx5 netdev profile */ + +/* Called directly after IPoIB netdevice was created to initialize SW structs */ +static void mlx5i_init(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, + void *ppriv) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + + priv->mdev = mdev; + priv->netdev = netdev; + priv->profile = profile; + priv->ppriv = ppriv; + + mlx5e_build_nic_params(mdev, &priv->channels.params, profile->max_nch(mdev)); + + mutex_init(&priv->state_lock); + + netdev->hw_features |= NETIF_F_SG; + netdev->hw_features |= NETIF_F_IP_CSUM; + netdev->hw_features |= NETIF_F_IPV6_CSUM; + netdev->hw_features |= NETIF_F_GRO; + netdev->hw_features |= NETIF_F_TSO; + netdev->hw_features |= NETIF_F_TSO6; + netdev->hw_features |= NETIF_F_RXCSUM; + netdev->hw_features |= NETIF_F_RXHASH; + + netdev->netdev_ops = &mlx5i_netdev_ops; +} + +/* Called directly before IPoIB netdevice is destroyed to cleanup SW structs */ +static void mlx5i_cleanup(struct mlx5e_priv *priv) +{ + /* Do nothing .. */ +} + +#define MLX5_QP_ENHANCED_ULP_STATELESS_MODE 2 + +static int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp) +{ + struct mlx5_qp_context *context = NULL; + u32 *in = NULL; + void *addr_path; + int ret = 0; + int inlen; + void *qpc; + + inlen = MLX5_ST_SZ_BYTES(create_qp_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); + MLX5_SET(qpc, qpc, st, MLX5_QP_ST_UD); + MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); + MLX5_SET(qpc, qpc, ulp_stateless_offload_mode, + MLX5_QP_ENHANCED_ULP_STATELESS_MODE); + + addr_path = MLX5_ADDR_OF(qpc, qpc, primary_address_path); + MLX5_SET(ads, addr_path, port, 1); + MLX5_SET(ads, addr_path, grh, 1); + + ret = mlx5_core_create_qp(mdev, qp, in, inlen); + if (ret) { + mlx5_core_err(mdev, "Failed creating IPoIB QP err : %d\n", ret); + goto out; + } + + /* QP states */ + context = kzalloc(sizeof(*context), GFP_KERNEL); + if (!context) { + ret = -ENOMEM; + goto out; + } + + context->flags = cpu_to_be32(MLX5_QP_PM_MIGRATED << 11); + context->pri_path.port = 1; + context->qkey = cpu_to_be32(IB_DEFAULT_Q_KEY); + + ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RST2INIT_QP, 0, context, qp); + if (ret) { + mlx5_core_err(mdev, "Failed to modify qp RST2INIT, err: %d\n", ret); + goto out; + } + memset(context, 0, sizeof(*context)); + + ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_INIT2RTR_QP, 0, context, qp); + if (ret) { + mlx5_core_err(mdev, "Failed to modify qp INIT2RTR, err: %d\n", ret); + goto out; + } + + ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RTR2RTS_QP, 0, context, qp); + if (ret) { + mlx5_core_err(mdev, "Failed to modify qp RTR2RTS, err: %d\n", ret); + goto out; + } + +out: + kfree(context); + kvfree(in); + return ret; +} + +static void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp) +{ + mlx5_core_destroy_qp(mdev, qp); +} + +static int mlx5i_init_tx(struct mlx5e_priv *priv) +{ + struct mlx5i_priv *ipriv = priv->ppriv; + int err; + + err = mlx5i_create_underlay_qp(priv->mdev, &ipriv->qp); + if (err) { + mlx5_core_warn(priv->mdev, "create underlay QP failed, %d\n", err); + return err; + } + + err = mlx5e_create_tis(priv->mdev, 0 /* tc */, ipriv->qp.qpn, &priv->tisn[0]); + if (err) { + mlx5_core_warn(priv->mdev, "create tis failed, %d\n", err); + return err; + } + + return 0; +} + +static void mlx5i_cleanup_tx(struct mlx5e_priv *priv) +{ + struct mlx5i_priv *ipriv = priv->ppriv; + + mlx5e_destroy_tis(priv->mdev, priv->tisn[0]); + mlx5i_destroy_underlay_qp(priv->mdev, &ipriv->qp); +} + +static int mlx5i_create_flow_steering(struct mlx5e_priv *priv) +{ + struct mlx5i_priv *ipriv = priv->ppriv; + int err; + + priv->fs.ns = mlx5_get_flow_namespace(priv->mdev, + MLX5_FLOW_NAMESPACE_KERNEL); + + if (!priv->fs.ns) + return -EINVAL; + + err = mlx5e_arfs_create_tables(priv); + if (err) { + netdev_err(priv->netdev, "Failed to create arfs tables, err=%d\n", + err); + priv->netdev->hw_features &= ~NETIF_F_NTUPLE; + } + + err = mlx5e_create_ttc_table(priv, ipriv->qp.qpn); + if (err) { + netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n", + err); + goto err_destroy_arfs_tables; + } + + return 0; + +err_destroy_arfs_tables: + mlx5e_arfs_destroy_tables(priv); + + return err; +} + +static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv) +{ + mlx5e_destroy_ttc_table(priv); + mlx5e_arfs_destroy_tables(priv); +} + +static int mlx5i_init_rx(struct mlx5e_priv *priv) +{ + int err; + + err = mlx5e_create_indirect_rqt(priv); + if (err) + return err; + + err = mlx5e_create_direct_rqts(priv); + if (err) + goto err_destroy_indirect_rqts; + + err = mlx5e_create_indirect_tirs(priv); + if (err) + goto err_destroy_direct_rqts; + + err = mlx5e_create_direct_tirs(priv); + if (err) + goto err_destroy_indirect_tirs; + + err = mlx5i_create_flow_steering(priv); + if (err) + goto err_destroy_direct_tirs; + + return 0; + +err_destroy_direct_tirs: + mlx5e_destroy_direct_tirs(priv); +err_destroy_indirect_tirs: + mlx5e_destroy_indirect_tirs(priv); +err_destroy_direct_rqts: + mlx5e_destroy_direct_rqts(priv); +err_destroy_indirect_rqts: + mlx5e_destroy_rqt(priv, &priv->indir_rqt); + return err; +} + +static void mlx5i_cleanup_rx(struct mlx5e_priv *priv) +{ + mlx5i_destroy_flow_steering(priv); + mlx5e_destroy_direct_tirs(priv); + mlx5e_destroy_indirect_tirs(priv); + mlx5e_destroy_direct_rqts(priv); + mlx5e_destroy_rqt(priv, &priv->indir_rqt); +} + +static const struct mlx5e_profile mlx5i_nic_profile = { + .init = mlx5i_init, + .cleanup = mlx5i_cleanup, + .init_tx = mlx5i_init_tx, + .cleanup_tx = mlx5i_cleanup_tx, + .init_rx = mlx5i_init_rx, + .cleanup_rx = mlx5i_cleanup_rx, + .enable = NULL, /* mlx5i_enable */ + .disable = NULL, /* mlx5i_disable */ + .update_stats = NULL, /* mlx5i_update_stats */ + .max_nch = mlx5e_get_max_num_channels, + .rx_handlers.handle_rx_cqe = mlx5i_handle_rx_cqe, + .rx_handlers.handle_rx_cqe_mpwqe = NULL, /* Not supported */ + .max_tc = MLX5I_MAX_NUM_TC, +}; + +/* mlx5i netdev NDos */ + +static int mlx5i_dev_init(struct net_device *dev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + struct mlx5i_priv *ipriv = priv->ppriv; + + /* Set dev address using underlay QP */ + dev->dev_addr[1] = (ipriv->qp.qpn >> 16) & 0xff; + dev->dev_addr[2] = (ipriv->qp.qpn >> 8) & 0xff; + dev->dev_addr[3] = (ipriv->qp.qpn) & 0xff; + + return 0; +} + +static void mlx5i_dev_cleanup(struct net_device *dev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5i_priv *ipriv = priv->ppriv; + struct mlx5_qp_context context; + + /* detach qp from flow-steering by reset it */ + mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2RST_QP, 0, &context, &ipriv->qp); +} + +static int mlx5i_open(struct net_device *netdev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + int err; + + mutex_lock(&priv->state_lock); + + set_bit(MLX5E_STATE_OPENED, &priv->state); + + err = mlx5e_open_channels(priv, &priv->channels); + if (err) + goto err_clear_state_opened_flag; + + mlx5e_refresh_tirs(priv, false); + mlx5e_activate_priv_channels(priv); + mutex_unlock(&priv->state_lock); + return 0; + +err_clear_state_opened_flag: + clear_bit(MLX5E_STATE_OPENED, &priv->state); + mutex_unlock(&priv->state_lock); + return err; +} + +static int mlx5i_close(struct net_device *netdev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + + /* May already be CLOSED in case a previous configuration operation + * (e.g RX/TX queue size change) that involves close&open failed. + */ + mutex_lock(&priv->state_lock); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto unlock; + + clear_bit(MLX5E_STATE_OPENED, &priv->state); + + netif_carrier_off(priv->netdev); + mlx5e_deactivate_priv_channels(priv); + mlx5e_close_channels(&priv->channels); +unlock: + mutex_unlock(&priv->state_lock); + return 0; +} + +#ifdef notusedyet +/* IPoIB RDMA netdev callbacks */ +static int mlx5i_attach_mcast(struct net_device *netdev, struct ib_device *hca, + union ib_gid *gid, u16 lid, int set_qkey) +{ + struct mlx5e_priv *epriv = mlx5i_epriv(netdev); + struct mlx5_core_dev *mdev = epriv->mdev; + struct mlx5i_priv *ipriv = epriv->ppriv; + int err; + + mlx5_core_dbg(mdev, "attaching QPN 0x%x, MGID %pI6\n", ipriv->qp.qpn, gid->raw); + err = mlx5_core_attach_mcg(mdev, gid, ipriv->qp.qpn); + if (err) + mlx5_core_warn(mdev, "failed attaching QPN 0x%x, MGID %pI6\n", + ipriv->qp.qpn, gid->raw); + + return err; +} + +static int mlx5i_detach_mcast(struct net_device *netdev, struct ib_device *hca, + union ib_gid *gid, u16 lid) +{ + struct mlx5e_priv *epriv = mlx5i_epriv(netdev); + struct mlx5_core_dev *mdev = epriv->mdev; + struct mlx5i_priv *ipriv = epriv->ppriv; + int err; + + mlx5_core_dbg(mdev, "detaching QPN 0x%x, MGID %pI6\n", ipriv->qp.qpn, gid->raw); + + err = mlx5_core_detach_mcg(mdev, gid, ipriv->qp.qpn); + if (err) + mlx5_core_dbg(mdev, "failed dettaching QPN 0x%x, MGID %pI6\n", + ipriv->qp.qpn, gid->raw); + + return err; +} + +static int mlx5i_xmit(struct net_device *dev, struct sk_buff *skb, + struct ib_ah *address, u32 dqpn, u32 dqkey) +{ + struct mlx5e_priv *epriv = mlx5i_epriv(dev); + struct mlx5e_txqsq *sq = epriv->txq2sq[skb_get_queue_mapping(skb)]; + struct mlx5_ib_ah *mah = to_mah(address); + + return mlx5i_sq_xmit(sq, skb, &mah->av, dqpn, dqkey); +} +#endif + +static int mlx5i_check_required_hca_cap(struct mlx5_core_dev *mdev) +{ + if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_IB) + return -EOPNOTSUPP; + + if (!MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads)) { + mlx5_core_warn(mdev, "IPoIB enhanced offloads are not supported\n"); + return -ENOTSUPP; + } + + return 0; +} + +static struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, + struct ib_device *ibdev, + const char *name, + void (*setup)(struct net_device *)) +{ + const struct mlx5e_profile *profile = &mlx5i_nic_profile; + int nch = profile->max_nch(mdev); + struct net_device *netdev; + struct mlx5i_priv *ipriv; + struct mlx5e_priv *epriv; + int err; + + if (mlx5i_check_required_hca_cap(mdev)) { + mlx5_core_warn(mdev, "Accelerated mode is not supported\n"); + return ERR_PTR(-EOPNOTSUPP); + } + + /* This function should only be called once per mdev */ + err = mlx5e_create_mdev_resources(mdev); + if (err) + return NULL; + + netdev = alloc_netdev_mqs(sizeof(struct mlx5i_priv) + sizeof(struct mlx5e_priv), + name, NET_NAME_UNKNOWN, + setup, + nch * MLX5E_MAX_NUM_TC, + nch); + if (!netdev) { + mlx5_core_warn(mdev, "alloc_netdev_mqs failed\n"); + goto free_mdev_resources; + } + + ipriv = netdev_priv(netdev); + epriv = mlx5i_epriv(netdev); + + epriv->wq = create_singlethread_workqueue("mlx5i"); + if (!epriv->wq) + goto err_free_netdev; + + profile->init(mdev, netdev, profile, ipriv); + + mlx5e_attach_netdev(epriv); + netif_carrier_off(netdev); + + /* TODO: set rdma_netdev func pointers + * rn = &ipriv->rn; + * rn->hca = ibdev; + * rn->send = mlx5i_xmit; + * rn->attach_mcast = mlx5i_attach_mcast; + * rn->detach_mcast = mlx5i_detach_mcast; + */ + return netdev; + +err_free_netdev: + free_netdev(netdev); +free_mdev_resources: + mlx5e_destroy_mdev_resources(mdev); + + return NULL; +} +EXPORT_SYMBOL(mlx5_rdma_netdev_alloc); + +static void mlx5_rdma_netdev_free(struct net_device *netdev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + const struct mlx5e_profile *profile = priv->profile; + + mlx5e_detach_netdev(priv); + profile->cleanup(priv); + destroy_workqueue(priv->wq); + free_netdev(netdev); + + mlx5e_destroy_mdev_resources(priv->mdev); +} +EXPORT_SYMBOL(mlx5_rdma_netdev_free); + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib.h new file mode 100644 index 000000000000..bae0a5cbc8ad --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5E_IPOB_H__ +#define __MLX5E_IPOB_H__ + +#include <linux/mlx5/fs.h> +#include "en.h" + +#define MLX5I_MAX_NUM_TC 1 + +/* ipoib rdma netdev's private data structure */ +struct mlx5i_priv { + struct mlx5_core_qp qp; + char *mlx5e_priv[0]; +}; + +/* Extract mlx5e_priv from IPoIB netdev */ +#define mlx5i_epriv(netdev) ((void *)(((struct mlx5i_priv *)netdev_priv(netdev))->mlx5e_priv)) + +netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5_av *av, u32 dqpn, u32 dqkey); +void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); + +#endif /* __MLX5E_IPOB_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 0ad66324247f..0c123d571b4c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1280,6 +1280,8 @@ static const struct devlink_ops mlx5_devlink_ops = { .eswitch_mode_get = mlx5_devlink_eswitch_mode_get, .eswitch_inline_mode_set = mlx5_devlink_eswitch_inline_mode_set, .eswitch_inline_mode_get = mlx5_devlink_eswitch_inline_mode_get, + .eswitch_encap_mode_set = mlx5_devlink_eswitch_encap_mode_set, + .eswitch_encap_mode_get = mlx5_devlink_eswitch_encap_mode_get, #endif }; @@ -1514,8 +1516,10 @@ static const struct pci_device_id mlx5_core_pci_table[] = { { PCI_VDEVICE(MELLANOX, 0x1016), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4LX VF */ { PCI_VDEVICE(MELLANOX, 0x1017) }, /* ConnectX-5, PCIe 3.0 */ { PCI_VDEVICE(MELLANOX, 0x1018), MLX5_PCI_DEV_IS_VF}, /* ConnectX-5 VF */ - { PCI_VDEVICE(MELLANOX, 0x1019) }, /* ConnectX-5, PCIe 4.0 */ - { PCI_VDEVICE(MELLANOX, 0x101a), MLX5_PCI_DEV_IS_VF}, /* ConnectX-5, PCIe 4.0 VF */ + { PCI_VDEVICE(MELLANOX, 0x1019) }, /* ConnectX-5 Ex */ + { PCI_VDEVICE(MELLANOX, 0x101a), MLX5_PCI_DEV_IS_VF}, /* ConnectX-5 Ex VF */ + { PCI_VDEVICE(MELLANOX, 0x101b) }, /* ConnectX-6 */ + { PCI_VDEVICE(MELLANOX, 0x101c), MLX5_PCI_DEV_IS_VF}, /* ConnectX-6 VF */ { 0, } }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index b3dabe6e8836..fbc6e9e9e305 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -141,6 +141,11 @@ int mlx5_encap_alloc(struct mlx5_core_dev *dev, u32 *encap_id); void mlx5_encap_dealloc(struct mlx5_core_dev *dev, u32 encap_id); +int mlx5_modify_header_alloc(struct mlx5_core_dev *dev, + u8 namespace, u8 num_actions, + void *modify_actions, u32 *modify_header_id); +void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id); + bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv); int mlx5_query_mtpps(struct mlx5_core_dev *dev, u32 *mtpps, u32 mtpps_size); |