diff options
Diffstat (limited to 'drivers/net/ethernet/intel/ice')
29 files changed, 2371 insertions, 1225 deletions
diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink.c b/drivers/net/ethernet/intel/ice/devlink/devlink.c index fb2de521731a..d88b7f3fd1f9 100644 --- a/drivers/net/ethernet/intel/ice/devlink/devlink.c +++ b/drivers/net/ethernet/intel/ice/devlink/devlink.c @@ -459,6 +459,7 @@ static void ice_devlink_reinit_down(struct ice_pf *pf) rtnl_lock(); ice_vsi_decfg(ice_get_main_vsi(pf)); rtnl_unlock(); + ice_deinit_pf(pf); ice_deinit_dev(pf); } @@ -609,11 +610,13 @@ exit_release_res: * @devlink: pointer to the devlink instance * @id: the parameter ID to set * @ctx: context to store the parameter value + * @extack: netlink extended ACK structure * * Return: zero on success and negative value on failure. */ static int ice_devlink_tx_sched_layers_get(struct devlink *devlink, u32 id, - struct devlink_param_gset_ctx *ctx) + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) { struct ice_pf *pf = devlink_priv(devlink); int err; @@ -1231,11 +1234,13 @@ static void ice_set_min_max_msix(struct ice_pf *pf) static int ice_devlink_reinit_up(struct ice_pf *pf) { struct ice_vsi *vsi = ice_get_main_vsi(pf); + struct device *dev = ice_pf_to_dev(pf); + bool need_dev_deinit = false; int err; err = ice_init_hw(&pf->hw); if (err) { - dev_err(ice_pf_to_dev(pf), "ice_init_hw failed: %d\n", err); + dev_err(dev, "ice_init_hw failed: %d\n", err); return err; } @@ -1246,13 +1251,19 @@ static int ice_devlink_reinit_up(struct ice_pf *pf) if (err) goto unroll_hw_init; + err = ice_init_pf(pf); + if (err) { + dev_err(dev, "ice_init_pf failed: %d\n", err); + goto unroll_dev_init; + } + vsi->flags = ICE_VSI_FLAG_INIT; rtnl_lock(); err = ice_vsi_cfg(vsi); rtnl_unlock(); if (err) - goto err_vsi_cfg; + goto unroll_pf_init; /* No need to take devl_lock, it's already taken by devlink API */ err = ice_load(pf); @@ -1265,10 +1276,14 @@ err_load: rtnl_lock(); ice_vsi_decfg(vsi); rtnl_unlock(); -err_vsi_cfg: - ice_deinit_dev(pf); +unroll_pf_init: + ice_deinit_pf(pf); +unroll_dev_init: + need_dev_deinit = true; unroll_hw_init: ice_deinit_hw(&pf->hw); + if (need_dev_deinit) + ice_deinit_dev(pf); return err; } @@ -1336,7 +1351,8 @@ static const struct devlink_ops ice_sf_devlink_ops; static int ice_devlink_enable_roce_get(struct devlink *devlink, u32 id, - struct devlink_param_gset_ctx *ctx) + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) { struct ice_pf *pf = devlink_priv(devlink); struct iidc_rdma_core_dev_info *cdev; @@ -1402,7 +1418,8 @@ ice_devlink_enable_roce_validate(struct devlink *devlink, u32 id, static int ice_devlink_enable_iw_get(struct devlink *devlink, u32 id, - struct devlink_param_gset_ctx *ctx) + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) { struct ice_pf *pf = devlink_priv(devlink); struct iidc_rdma_core_dev_info *cdev; @@ -1509,11 +1526,13 @@ static int ice_devlink_local_fwd_str_to_mode(const char *mode_str) * @devlink: Pointer to the devlink instance. * @id: The parameter ID to set. * @ctx: Context to store the parameter value. + * @extack: netlink extended ACK structure * * Return: Zero. */ static int ice_devlink_local_fwd_get(struct devlink *devlink, u32 id, - struct devlink_param_gset_ctx *ctx) + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) { struct ice_pf *pf = devlink_priv(devlink); struct ice_port_info *pi; diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 22b8323ff0d0..147aaee192a7 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -351,6 +351,7 @@ struct ice_vsi { u16 num_q_vectors; /* tell if only dynamic irq allocation is allowed */ bool irq_dyn_alloc; + bool hsplit:1; u16 vsi_num; /* HW (absolute) index of this VSI */ u16 idx; /* software index in pf->vsi[] */ @@ -374,6 +375,8 @@ struct ice_vsi { spinlock_t arfs_lock; /* protects aRFS hash table and filter state */ atomic_t *arfs_last_fltr_id; + u16 max_frame; + struct ice_aqc_vsi_props info; /* VSI properties */ struct ice_vsi_vlan_info vlan_info; /* vlan config to be restored */ @@ -509,7 +512,6 @@ enum ice_pf_flags { ICE_FLAG_MOD_POWER_UNSUPPORTED, ICE_FLAG_PHY_FW_LOAD_FAILED, ICE_FLAG_ETHTOOL_CTXT, /* set when ethtool holds RTNL lock */ - ICE_FLAG_LEGACY_RX, ICE_FLAG_VF_TRUE_PROMISC_ENA, ICE_FLAG_MDD_AUTO_RESET_VF, ICE_FLAG_VF_VLAN_PRUNING, @@ -1029,11 +1031,15 @@ int ice_open(struct net_device *netdev); int ice_open_internal(struct net_device *netdev); int ice_stop(struct net_device *netdev); void ice_service_task_schedule(struct ice_pf *pf); +void ice_start_service_task(struct ice_pf *pf); int ice_load(struct ice_pf *pf); void ice_unload(struct ice_pf *pf); void ice_adv_lnk_speed_maps_init(void); +void ice_init_dev_hw(struct ice_pf *pf); int ice_init_dev(struct ice_pf *pf); void ice_deinit_dev(struct ice_pf *pf); +int ice_init_pf(struct ice_pf *pf); +void ice_deinit_pf(struct ice_pf *pf); int ice_change_mtu(struct net_device *netdev, int new_mtu); void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue); int ice_xdp(struct net_device *dev, struct netdev_bpf *xdp); diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index 2d35a278c555..eadb1e3d12b3 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -2,6 +2,7 @@ /* Copyright (c) 2019, Intel Corporation. */ #include <net/xdp_sock_drv.h> +#include <linux/net/intel/libie/rx.h> #include "ice_base.h" #include "ice_lib.h" #include "ice_dcb_lib.h" @@ -462,19 +463,6 @@ u16 ice_calc_ts_ring_count(struct ice_tx_ring *tx_ring) } /** - * ice_rx_offset - Return expected offset into page to access data - * @rx_ring: Ring we are requesting offset of - * - * Returns the offset value for ring into the data buffer. - */ -static unsigned int ice_rx_offset(struct ice_rx_ring *rx_ring) -{ - if (ice_ring_uses_build_skb(rx_ring)) - return ICE_SKB_PAD; - return 0; -} - -/** * ice_setup_rx_ctx - Configure a receive ring context * @ring: The Rx ring to configure * @@ -536,8 +524,29 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring) else rlan_ctx.l2tsel = 1; - rlan_ctx.dtype = ICE_RX_DTYPE_NO_SPLIT; - rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_NO_SPLIT; + if (ring->hdr_pp) { + rlan_ctx.hbuf = ring->rx_hdr_len >> ICE_RLAN_CTX_HBUF_S; + rlan_ctx.dtype = ICE_RX_DTYPE_HEADER_SPLIT; + + /* + * If the frame is TCP/UDP/SCTP, it will be split by the + * payload. + * If not, but it's an IPv4/IPv6 frame, it will be split by + * the IP header. + * If not IP, it will be split by the Ethernet header. + * + * In any case, the header buffer will never be left empty. + */ + rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_SPLIT_L2 | + ICE_RLAN_RX_HSPLIT_0_SPLIT_IP | + ICE_RLAN_RX_HSPLIT_0_SPLIT_TCP_UDP | + ICE_RLAN_RX_HSPLIT_0_SPLIT_SCTP; + } else { + rlan_ctx.hbuf = 0; + rlan_ctx.dtype = ICE_RX_DTYPE_NO_SPLIT; + rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_NO_SPLIT; + } + rlan_ctx.hsplit_1 = ICE_RLAN_RX_HSPLIT_1_NO_SPLIT; /* This controls whether VLAN is stripped from inner headers @@ -549,7 +558,7 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring) /* Max packet size for this queue - must not be set to a larger value * than 5 x DBUF */ - rlan_ctx.rxmax = min_t(u32, ring->max_frame, + rlan_ctx.rxmax = min_t(u32, vsi->max_frame, ICE_MAX_CHAINED_RX_BUFS * ring->rx_buf_len); /* Rx queue threshold in units of 64 */ @@ -586,14 +595,6 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring) if (vsi->type == ICE_VSI_VF) return 0; - /* configure Rx buffer alignment */ - if (!vsi->netdev || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags)) - ice_clear_ring_build_skb_ena(ring); - else - ice_set_ring_build_skb_ena(ring); - - ring->rx_offset = ice_rx_offset(ring); - /* init queue specific tail register */ ring->tail = hw->hw_addr + QRX_TAIL(pf_q); writel(0, ring->tail); @@ -601,36 +602,51 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring) return 0; } -static void ice_xsk_pool_fill_cb(struct ice_rx_ring *ring) +static int ice_rxq_pp_create(struct ice_rx_ring *rq) { - void *ctx_ptr = &ring->pkt_ctx; - struct xsk_cb_desc desc = {}; - - XSK_CHECK_PRIV_TYPE(struct ice_xdp_buff); - desc.src = &ctx_ptr; - desc.off = offsetof(struct ice_xdp_buff, pkt_ctx) - - sizeof(struct xdp_buff); - desc.bytes = sizeof(ctx_ptr); - xsk_pool_fill_cb(ring->xsk_pool, &desc); -} + struct libeth_fq fq = { + .count = rq->count, + .nid = NUMA_NO_NODE, + .hsplit = rq->vsi->hsplit, + .xdp = ice_is_xdp_ena_vsi(rq->vsi), + .buf_len = LIBIE_MAX_RX_BUF_LEN, + }; + int err; -/** - * ice_get_frame_sz - calculate xdp_buff::frame_sz - * @rx_ring: the ring being configured - * - * Return frame size based on underlying PAGE_SIZE - */ -static unsigned int ice_get_frame_sz(struct ice_rx_ring *rx_ring) -{ - unsigned int frame_sz; + err = libeth_rx_fq_create(&fq, &rq->q_vector->napi); + if (err) + return err; + + rq->pp = fq.pp; + rq->rx_fqes = fq.fqes; + rq->truesize = fq.truesize; + rq->rx_buf_len = fq.buf_len; -#if (PAGE_SIZE >= 8192) - frame_sz = rx_ring->rx_buf_len; -#else - frame_sz = ice_rx_pg_size(rx_ring) / 2; -#endif + if (!fq.hsplit) + return 0; + + fq = (struct libeth_fq){ + .count = rq->count, + .type = LIBETH_FQE_HDR, + .nid = NUMA_NO_NODE, + .xdp = ice_is_xdp_ena_vsi(rq->vsi), + }; - return frame_sz; + err = libeth_rx_fq_create(&fq, &rq->q_vector->napi); + if (err) + goto destroy; + + rq->hdr_pp = fq.pp; + rq->hdr_fqes = fq.fqes; + rq->hdr_truesize = fq.truesize; + rq->rx_hdr_len = fq.buf_len; + + return 0; + +destroy: + ice_rxq_pp_destroy(rq); + + return err; } /** @@ -642,7 +658,8 @@ static unsigned int ice_get_frame_sz(struct ice_rx_ring *rx_ring) static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) { struct device *dev = ice_pf_to_dev(ring->vsi->back); - u32 num_bufs = ICE_RX_DESC_UNUSED(ring); + u32 num_bufs = ICE_DESC_UNUSED(ring); + u32 rx_buf_len; int err; if (ring->vsi->type == ICE_VSI_PF || ring->vsi->type == ICE_VSI_SF) { @@ -656,15 +673,19 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) } ice_rx_xsk_pool(ring); + err = ice_realloc_rx_xdp_bufs(ring, ring->xsk_pool); + if (err) + return err; + if (ring->xsk_pool) { xdp_rxq_info_unreg(&ring->xdp_rxq); - ring->rx_buf_len = + rx_buf_len = xsk_pool_get_rx_frame_size(ring->xsk_pool); err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, ring->q_index, ring->q_vector->napi.napi_id, - ring->rx_buf_len); + rx_buf_len); if (err) return err; err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, @@ -673,36 +694,33 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) if (err) return err; xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq); - ice_xsk_pool_fill_cb(ring); dev_info(dev, "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n", ring->q_index); } else { + err = ice_rxq_pp_create(ring); + if (err) + return err; + if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) { err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, ring->q_index, ring->q_vector->napi.napi_id, ring->rx_buf_len); if (err) - return err; + goto err_destroy_fq; } - - err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, - MEM_TYPE_PAGE_SHARED, - NULL); - if (err) - return err; + xdp_rxq_info_attach_page_pool(&ring->xdp_rxq, + ring->pp); } } - xdp_init_buff(&ring->xdp, ice_get_frame_sz(ring), &ring->xdp_rxq); ring->xdp.data = NULL; - ring->xdp_ext.pkt_ctx = &ring->pkt_ctx; err = ice_setup_rx_ctx(ring); if (err) { dev_err(dev, "ice_setup_rx_ctx failed for RxQ %d, err %d\n", ring->q_index, err); - return err; + goto err_destroy_fq; } if (ring->xsk_pool) { @@ -730,9 +748,17 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) if (ring->vsi->type == ICE_VSI_CTRL) ice_init_ctrl_rx_descs(ring, num_bufs); else - ice_alloc_rx_bufs(ring, num_bufs); + err = ice_alloc_rx_bufs(ring, num_bufs); + + if (err) + goto err_destroy_fq; return 0; + +err_destroy_fq: + ice_rxq_pp_destroy(ring); + + return err; } int ice_vsi_cfg_single_rxq(struct ice_vsi *vsi, u16 q_idx) @@ -753,18 +779,10 @@ int ice_vsi_cfg_single_rxq(struct ice_vsi *vsi, u16 q_idx) */ static void ice_vsi_cfg_frame_size(struct ice_vsi *vsi, struct ice_rx_ring *ring) { - if (!vsi->netdev || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags)) { - ring->max_frame = ICE_MAX_FRAME_LEGACY_RX; - ring->rx_buf_len = ICE_RXBUF_1664; -#if (PAGE_SIZE < 8192) - } else if (!ICE_2K_TOO_SMALL_WITH_PADDING && - (vsi->netdev->mtu <= ETH_DATA_LEN)) { - ring->max_frame = ICE_RXBUF_1536 - NET_IP_ALIGN; - ring->rx_buf_len = ICE_RXBUF_1536 - NET_IP_ALIGN; -#endif + if (!vsi->netdev) { + vsi->max_frame = ICE_MAX_FRAME_LEGACY_RX; } else { - ring->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX; - ring->rx_buf_len = ICE_RXBUF_3072; + vsi->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX; } } diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 2532b6f82e97..046bc9c65c51 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -1161,6 +1161,9 @@ int ice_init_hw(struct ice_hw *hw) status = ice_init_hw_tbls(hw); if (status) goto err_unroll_fltr_mgmt_struct; + + ice_init_dev_hw(hw->back); + mutex_init(&hw->tnl_lock); ice_init_chk_recipe_reuse_support(hw); @@ -3389,6 +3392,7 @@ bool ice_is_100m_speed_supported(struct ice_hw *hw) case ICE_DEV_ID_E822L_SGMII: case ICE_DEV_ID_E823L_1GBE: case ICE_DEV_ID_E823C_SGMII: + case ICE_DEV_ID_E825C_SGMII: return true; default: return false; diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index dc131779d426..969d4f8f9c02 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -10,6 +10,7 @@ #include "ice_lib.h" #include "ice_dcb_lib.h" #include <net/dcbnl.h> +#include <net/libeth/rx.h> struct ice_stats { char stat_string[ETH_GSTRING_LEN]; @@ -340,7 +341,6 @@ static const struct ice_priv_flag ice_gstrings_priv_flags[] = { ICE_FLAG_VF_TRUE_PROMISC_ENA), ICE_PRIV_FLAG("mdd-auto-reset-vf", ICE_FLAG_MDD_AUTO_RESET_VF), ICE_PRIV_FLAG("vf-vlan-pruning", ICE_FLAG_VF_VLAN_PRUNING), - ICE_PRIV_FLAG("legacy-rx", ICE_FLAG_LEGACY_RX), }; #define ICE_PRIV_FLAG_ARRAY_SIZE ARRAY_SIZE(ice_gstrings_priv_flags) @@ -794,8 +794,7 @@ static int ice_get_extended_regs(struct net_device *netdev, void *p) static void ice_get_regs(struct net_device *netdev, struct ethtool_regs *regs, void *p) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_pf *pf = np->vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); struct ice_hw *hw = &pf->hw; u32 *regs_buf = (u32 *)p; unsigned int i; @@ -810,8 +809,7 @@ ice_get_regs(struct net_device *netdev, struct ethtool_regs *regs, void *p) static u32 ice_get_msglevel(struct net_device *netdev) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_pf *pf = np->vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); #ifndef CONFIG_DYNAMIC_DEBUG if (pf->hw.debug_mask) @@ -824,8 +822,7 @@ static u32 ice_get_msglevel(struct net_device *netdev) static void ice_set_msglevel(struct net_device *netdev, u32 data) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_pf *pf = np->vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); #ifndef CONFIG_DYNAMIC_DEBUG if (ICE_DBG_USER & data) @@ -840,16 +837,14 @@ static void ice_set_msglevel(struct net_device *netdev, u32 data) static void ice_get_link_ext_stats(struct net_device *netdev, struct ethtool_link_ext_stats *stats) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_pf *pf = np->vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); stats->link_down_events = pf->link_down_events; } static int ice_get_eeprom_len(struct net_device *netdev) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_pf *pf = np->vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); return (int)pf->hw.flash.flash_size; } @@ -858,9 +853,7 @@ static int ice_get_eeprom(struct net_device *netdev, struct ethtool_eeprom *eeprom, u8 *bytes) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_vsi *vsi = np->vsi; - struct ice_pf *pf = vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); struct ice_hw *hw = &pf->hw; struct device *dev; int ret; @@ -959,8 +952,7 @@ static u64 ice_link_test(struct net_device *netdev) */ static u64 ice_eeprom_test(struct net_device *netdev) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_pf *pf = np->vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); netdev_info(netdev, "EEPROM test\n"); return !!(ice_nvm_validate_checksum(&pf->hw)); @@ -1239,8 +1231,9 @@ static int ice_diag_send(struct ice_tx_ring *tx_ring, u8 *data, u16 size) */ static int ice_lbtest_receive_frames(struct ice_rx_ring *rx_ring) { - struct ice_rx_buf *rx_buf; + struct libeth_fqe *rx_buf; int valid_frames, i; + struct page *page; u8 *received_buf; valid_frames = 0; @@ -1255,8 +1248,10 @@ static int ice_lbtest_receive_frames(struct ice_rx_ring *rx_ring) cpu_to_le16(BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S))))) continue; - rx_buf = &rx_ring->rx_buf[i]; - received_buf = page_address(rx_buf->page) + rx_buf->page_offset; + rx_buf = &rx_ring->rx_fqes[i]; + page = __netmem_to_page(rx_buf->netmem); + received_buf = page_address(page) + rx_buf->offset + + page->pp->p.offset; if (ice_lbtest_check_frame(received_buf)) valid_frames++; @@ -1274,9 +1269,8 @@ static int ice_lbtest_receive_frames(struct ice_rx_ring *rx_ring) */ static u64 ice_loopback_test(struct net_device *netdev) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_vsi *orig_vsi = np->vsi, *test_vsi; - struct ice_pf *pf = orig_vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_vsi *test_vsi; u8 *tx_frame __free(kfree) = NULL; u8 broadcast[ETH_ALEN], ret = 0; int num_frames, valid_frames; @@ -1365,8 +1359,7 @@ lbtest_vsi_close: */ static u64 ice_intr_test(struct net_device *netdev) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_pf *pf = np->vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); u16 swic_old = pf->sw_int_count; netdev_info(netdev, "interrupt test\n"); @@ -1394,9 +1387,8 @@ static void ice_self_test(struct net_device *netdev, struct ethtool_test *eth_test, u64 *data) { - struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_pf *pf = ice_netdev_to_pf(netdev); bool if_running = netif_running(netdev); - struct ice_pf *pf = np->vsi->back; struct device *dev; dev = ice_pf_to_dev(pf); @@ -1720,9 +1712,7 @@ static int ice_nway_reset(struct net_device *netdev) */ static u32 ice_get_priv_flags(struct net_device *netdev) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_vsi *vsi = np->vsi; - struct ice_pf *pf = vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); u32 i, ret_flags = 0; for (i = 0; i < ICE_PRIV_FLAG_ARRAY_SIZE; i++) { @@ -1869,10 +1859,6 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags) ice_nway_reset(netdev); } } - if (test_bit(ICE_FLAG_LEGACY_RX, change_flags)) { - /* down and up VSI so that changes of Rx cfg are reflected. */ - ice_down_up(vsi); - } /* don't allow modification of this flag when a single VF is in * promiscuous mode because it's not supported */ @@ -3098,6 +3084,20 @@ static int ice_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd) } /** + * ice_get_rx_ring_count - get RX ring count + * @netdev: network interface device structure + * + * Return: number of RX rings. + */ +static u32 ice_get_rx_ring_count(struct net_device *netdev) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + + return vsi->rss_size; +} + +/** * ice_get_rxnfc - command to get Rx flow classification rules * @netdev: network interface device structure * @cmd: ethtool rxnfc command @@ -3117,10 +3117,6 @@ ice_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, hw = &vsi->back->hw; switch (cmd->cmd) { - case ETHTOOL_GRXRINGS: - cmd->data = vsi->rss_size; - ret = 0; - break; case ETHTOOL_GRXCLSRLCNT: cmd->rule_cnt = hw->fdir_active_fltr; /* report total rule count */ @@ -3165,6 +3161,10 @@ ice_get_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, ring->rx_jumbo_max_pending = 0; ring->rx_mini_pending = 0; ring->rx_jumbo_pending = 0; + + kernel_ring->tcp_data_split = vsi->hsplit ? + ETHTOOL_TCP_DATA_SPLIT_ENABLED : + ETHTOOL_TCP_DATA_SPLIT_DISABLED; } static int @@ -3181,6 +3181,7 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, int i, timeout = 50, err = 0; struct ice_hw *hw = &pf->hw; u16 new_rx_cnt, new_tx_cnt; + bool hsplit; if (ring->tx_pending > ICE_MAX_NUM_DESC_BY_MAC(hw) || ring->tx_pending < ICE_MIN_NUM_DESC || @@ -3206,9 +3207,12 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, netdev_info(netdev, "Requested Rx descriptor count rounded up to %d\n", new_rx_cnt); + hsplit = kernel_ring->tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED; + /* if nothing to do return success */ if (new_tx_cnt == vsi->tx_rings[0]->count && - new_rx_cnt == vsi->rx_rings[0]->count) { + new_rx_cnt == vsi->rx_rings[0]->count && + hsplit == vsi->hsplit) { netdev_dbg(netdev, "Nothing to change, descriptor count is same as requested\n"); return 0; } @@ -3238,6 +3242,8 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, vsi->xdp_rings[i]->count = new_tx_cnt; vsi->num_tx_desc = (u16)new_tx_cnt; vsi->num_rx_desc = (u16)new_rx_cnt; + vsi->hsplit = hsplit; + netdev_dbg(netdev, "Link is down, descriptor count change happens when link is brought up\n"); goto done; } @@ -3321,7 +3327,8 @@ process_rx: rx_rings[i].count = new_rx_cnt; rx_rings[i].cached_phctime = pf->ptp.cached_phc_time; rx_rings[i].desc = NULL; - rx_rings[i].rx_buf = NULL; + rx_rings[i].xdp_buf = NULL; + /* this is to allow wr32 to have something to write to * during early allocation of Rx buffers */ @@ -3330,10 +3337,6 @@ process_rx: err = ice_setup_rx_ring(&rx_rings[i]); if (err) goto rx_unwind; - - /* allocate Rx buffers */ - err = ice_alloc_rx_bufs(&rx_rings[i], - ICE_RX_DESC_UNUSED(&rx_rings[i])); rx_unwind: if (err) { while (i) { @@ -3347,6 +3350,8 @@ rx_unwind: } process_link: + vsi->hsplit = hsplit; + /* Bring interface down, copy in the new ring info, then restore the * interface. if VSI is up, bring it down and then back up */ @@ -4417,9 +4422,7 @@ static int ice_get_module_info(struct net_device *netdev, struct ethtool_modinfo *modinfo) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_vsi *vsi = np->vsi; - struct ice_pf *pf = vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); struct ice_hw *hw = &pf->hw; u8 sff8472_comp = 0; u8 sff8472_swap = 0; @@ -4491,12 +4494,10 @@ static int ice_get_module_eeprom(struct net_device *netdev, struct ethtool_eeprom *ee, u8 *data) { - struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_pf *pf = ice_netdev_to_pf(netdev); #define SFF_READ_BLOCK_SIZE 8 u8 value[SFF_READ_BLOCK_SIZE] = { 0 }; u8 addr = ICE_I2C_EEPROM_DEV_ADDR; - struct ice_vsi *vsi = np->vsi; - struct ice_pf *pf = vsi->back; struct ice_hw *hw = &pf->hw; bool is_sfp = false; unsigned int i, j; @@ -4661,6 +4662,98 @@ static void ice_get_fec_stats(struct net_device *netdev, pi->lport, err); } +static void ice_get_eth_mac_stats(struct net_device *netdev, + struct ethtool_eth_mac_stats *mac_stats) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_hw_port_stats *ps = &pf->stats; + + mac_stats->FramesTransmittedOK = ps->eth.tx_unicast + + ps->eth.tx_multicast + + ps->eth.tx_broadcast; + mac_stats->FramesReceivedOK = ps->eth.rx_unicast + + ps->eth.rx_multicast + + ps->eth.rx_broadcast; + mac_stats->FrameCheckSequenceErrors = ps->crc_errors; + mac_stats->OctetsTransmittedOK = ps->eth.tx_bytes; + mac_stats->OctetsReceivedOK = ps->eth.rx_bytes; + mac_stats->MulticastFramesXmittedOK = ps->eth.tx_multicast; + mac_stats->BroadcastFramesXmittedOK = ps->eth.tx_broadcast; + mac_stats->MulticastFramesReceivedOK = ps->eth.rx_multicast; + mac_stats->BroadcastFramesReceivedOK = ps->eth.rx_broadcast; + mac_stats->InRangeLengthErrors = ps->rx_len_errors; + mac_stats->FrameTooLongErrors = ps->rx_oversize; +} + +static void ice_get_pause_stats(struct net_device *netdev, + struct ethtool_pause_stats *pause_stats) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_hw_port_stats *ps = &pf->stats; + + pause_stats->tx_pause_frames = ps->link_xon_tx + ps->link_xoff_tx; + pause_stats->rx_pause_frames = ps->link_xon_rx + ps->link_xoff_rx; +} + +static const struct ethtool_rmon_hist_range ice_rmon_ranges[] = { + { 0, 64 }, + { 65, 127 }, + { 128, 255 }, + { 256, 511 }, + { 512, 1023 }, + { 1024, 1522 }, + { 1523, 9522 }, + {} +}; + +static void ice_get_rmon_stats(struct net_device *netdev, + struct ethtool_rmon_stats *rmon, + const struct ethtool_rmon_hist_range **ranges) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_hw_port_stats *ps = &pf->stats; + + rmon->undersize_pkts = ps->rx_undersize; + rmon->oversize_pkts = ps->rx_oversize; + rmon->fragments = ps->rx_fragments; + rmon->jabbers = ps->rx_jabber; + + rmon->hist[0] = ps->rx_size_64; + rmon->hist[1] = ps->rx_size_127; + rmon->hist[2] = ps->rx_size_255; + rmon->hist[3] = ps->rx_size_511; + rmon->hist[4] = ps->rx_size_1023; + rmon->hist[5] = ps->rx_size_1522; + rmon->hist[6] = ps->rx_size_big; + + rmon->hist_tx[0] = ps->tx_size_64; + rmon->hist_tx[1] = ps->tx_size_127; + rmon->hist_tx[2] = ps->tx_size_255; + rmon->hist_tx[3] = ps->tx_size_511; + rmon->hist_tx[4] = ps->tx_size_1023; + rmon->hist_tx[5] = ps->tx_size_1522; + rmon->hist_tx[6] = ps->tx_size_big; + + *ranges = ice_rmon_ranges; +} + +/* ice_get_ts_stats - provide timestamping stats + * @netdev: the netdevice pointer from ethtool + * @ts_stats: the ethtool data structure to fill in + */ +static void ice_get_ts_stats(struct net_device *netdev, + struct ethtool_ts_stats *ts_stats) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_ptp *ptp = &pf->ptp; + + ts_stats->pkts = ptp->tx_hwtstamp_good; + ts_stats->err = ptp->tx_hwtstamp_skipped + + ptp->tx_hwtstamp_flushed + + ptp->tx_hwtstamp_discarded; + ts_stats->lost = ptp->tx_hwtstamp_timeouts; +} + #define ICE_ETHTOOL_PFR (ETH_RESET_IRQ | ETH_RESET_DMA | \ ETH_RESET_FILTER | ETH_RESET_OFFLOAD) @@ -4682,8 +4775,7 @@ static void ice_get_fec_stats(struct net_device *netdev, */ static int ice_ethtool_reset(struct net_device *dev, u32 *flags) { - struct ice_netdev_priv *np = netdev_priv(dev); - struct ice_pf *pf = np->vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(dev); enum ice_reset_req reset; switch (*flags) { @@ -4741,9 +4833,14 @@ static const struct ethtool_ops ice_ethtool_ops = { ETHTOOL_COALESCE_USE_ADAPTIVE | ETHTOOL_COALESCE_RX_USECS_HIGH, .supported_input_xfrm = RXH_XFRM_SYM_XOR, + .supported_ring_params = ETHTOOL_RING_USE_TCP_DATA_SPLIT, .get_link_ksettings = ice_get_link_ksettings, .set_link_ksettings = ice_set_link_ksettings, .get_fec_stats = ice_get_fec_stats, + .get_eth_mac_stats = ice_get_eth_mac_stats, + .get_pause_stats = ice_get_pause_stats, + .get_rmon_stats = ice_get_rmon_stats, + .get_ts_stats = ice_get_ts_stats, .get_drvinfo = ice_get_drvinfo, .get_regs_len = ice_get_regs_len, .get_regs = ice_get_regs, @@ -4766,6 +4863,7 @@ static const struct ethtool_ops ice_ethtool_ops = { .get_sset_count = ice_get_sset_count, .get_rxnfc = ice_get_rxnfc, .set_rxnfc = ice_set_rxnfc, + .get_rx_ring_count = ice_get_rx_ring_count, .get_ringparam = ice_get_ringparam, .set_ringparam = ice_set_ringparam, .nway_reset = ice_nway_reset, diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.c b/drivers/net/ethernet/intel/ice/ice_fdir.c index 26b357c0ae15..b29fbdec9442 100644 --- a/drivers/net/ethernet/intel/ice/ice_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_fdir.c @@ -1121,7 +1121,7 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input, * ice_fdir_has_frag - does flow type have 2 ptypes * @flow: flow ptype * - * returns true is there is a fragment packet for this ptype + * Return: true if there is a fragment packet for this ptype */ bool ice_fdir_has_frag(enum ice_fltr_ptype flow) { diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c index 013c93b6605e..c0dbec369366 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c +++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c @@ -574,9 +574,7 @@ ice_destroy_tunnel_end: int ice_udp_tunnel_set_port(struct net_device *netdev, unsigned int table, unsigned int idx, struct udp_tunnel_info *ti) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_vsi *vsi = np->vsi; - struct ice_pf *pf = vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); enum ice_tunnel_type tnl_type; int status; u16 index; @@ -598,9 +596,7 @@ int ice_udp_tunnel_set_port(struct net_device *netdev, unsigned int table, int ice_udp_tunnel_unset_port(struct net_device *netdev, unsigned int table, unsigned int idx, struct udp_tunnel_info *ti) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_vsi *vsi = np->vsi; - struct ice_pf *pf = vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); enum ice_tunnel_type tnl_type; int status; @@ -3582,6 +3578,19 @@ ice_move_vsi(struct ice_hw *hw, enum ice_block blk, u16 vsi, u16 vsig, } /** + * ice_set_tcam_flags - set TCAM flag don't care mask + * @mask: mask for flags + * @dc_mask: pointer to the don't care mask + */ +static void ice_set_tcam_flags(u16 mask, u8 dc_mask[ICE_TCAM_KEY_VAL_SZ]) +{ + u16 inverted_mask = ~mask; + + /* flags are lowest u16 */ + put_unaligned_le16(inverted_mask, dc_mask); +} + +/** * ice_rem_chg_tcam_ent - remove a specific TCAM entry from change list * @hw: pointer to the HW struct * @idx: the index of the TCAM entry to remove @@ -3651,6 +3660,9 @@ ice_prof_tcam_ena_dis(struct ice_hw *hw, enum ice_block blk, bool enable, if (!p) return -ENOMEM; + /* set don't care masks for TCAM flags */ + ice_set_tcam_flags(tcam->attr.mask, dc_msk); + status = ice_tcam_write_entry(hw, blk, tcam->tcam_idx, tcam->prof_id, tcam->ptg, vsig, 0, tcam->attr.flags, vl_msk, dc_msk, nm_msk); @@ -3677,6 +3689,34 @@ err_ice_prof_tcam_ena_dis: } /** + * ice_ptg_attr_in_use - determine if PTG and attribute pair is in use + * @ptg_attr: pointer to the PTG and attribute pair to check + * @ptgs_used: bitmap that denotes which PTGs are in use + * @attr_used: array of PTG and attributes pairs already used + * @attr_cnt: count of entries in the attr_used array + * + * Return: true if the PTG and attribute pair is in use, false otherwise. + */ +static bool +ice_ptg_attr_in_use(struct ice_tcam_inf *ptg_attr, unsigned long *ptgs_used, + struct ice_tcam_inf *attr_used[], u16 attr_cnt) +{ + u16 i; + + if (!test_bit(ptg_attr->ptg, ptgs_used)) + return false; + + /* the PTG is used, so now look for correct attributes */ + for (i = 0; i < attr_cnt; i++) + if (attr_used[i]->ptg == ptg_attr->ptg && + attr_used[i]->attr.flags == ptg_attr->attr.flags && + attr_used[i]->attr.mask == ptg_attr->attr.mask) + return true; + + return false; +} + +/** * ice_adj_prof_priorities - adjust profile based on priorities * @hw: pointer to the HW struct * @blk: hardware block @@ -3688,10 +3728,16 @@ ice_adj_prof_priorities(struct ice_hw *hw, enum ice_block blk, u16 vsig, struct list_head *chg) { DECLARE_BITMAP(ptgs_used, ICE_XLT1_CNT); + struct ice_tcam_inf **attr_used; struct ice_vsig_prof *t; - int status; + u16 attr_used_cnt = 0; + int status = 0; u16 idx; + attr_used = kcalloc(ICE_MAX_PTG_ATTRS, sizeof(*attr_used), GFP_KERNEL); + if (!attr_used) + return -ENOMEM; + bitmap_zero(ptgs_used, ICE_XLT1_CNT); idx = vsig & ICE_VSIG_IDX_M; @@ -3709,11 +3755,15 @@ ice_adj_prof_priorities(struct ice_hw *hw, enum ice_block blk, u16 vsig, u16 i; for (i = 0; i < t->tcam_count; i++) { + bool used; + /* Scan the priorities from newest to oldest. * Make sure that the newest profiles take priority. */ - if (test_bit(t->tcam[i].ptg, ptgs_used) && - t->tcam[i].in_use) { + used = ice_ptg_attr_in_use(&t->tcam[i], ptgs_used, + attr_used, attr_used_cnt); + + if (used && t->tcam[i].in_use) { /* need to mark this PTG as never match, as it * was already in use and therefore duplicate * (and lower priority) @@ -3723,9 +3773,8 @@ ice_adj_prof_priorities(struct ice_hw *hw, enum ice_block blk, u16 vsig, &t->tcam[i], chg); if (status) - return status; - } else if (!test_bit(t->tcam[i].ptg, ptgs_used) && - !t->tcam[i].in_use) { + goto free_attr_used; + } else if (!used && !t->tcam[i].in_use) { /* need to enable this PTG, as it in not in use * and not enabled (highest priority) */ @@ -3734,15 +3783,21 @@ ice_adj_prof_priorities(struct ice_hw *hw, enum ice_block blk, u16 vsig, &t->tcam[i], chg); if (status) - return status; + goto free_attr_used; } /* keep track of used ptgs */ - __set_bit(t->tcam[i].ptg, ptgs_used); + set_bit(t->tcam[i].ptg, ptgs_used); + if (attr_used_cnt < ICE_MAX_PTG_ATTRS) + attr_used[attr_used_cnt++] = &t->tcam[i]; + else + ice_debug(hw, ICE_DBG_INIT, "Warn: ICE_MAX_PTG_ATTRS exceeded\n"); } } - return 0; +free_attr_used: + kfree(attr_used); + return status; } /** @@ -3825,11 +3880,15 @@ ice_add_prof_id_vsig(struct ice_hw *hw, enum ice_block blk, u16 vsig, u64 hdl, p->vsig = vsig; p->tcam_idx = t->tcam[i].tcam_idx; + /* set don't care masks for TCAM flags */ + ice_set_tcam_flags(t->tcam[i].attr.mask, dc_msk); + /* write the TCAM entry */ status = ice_tcam_write_entry(hw, blk, t->tcam[i].tcam_idx, t->tcam[i].prof_id, - t->tcam[i].ptg, vsig, 0, 0, - vl_msk, dc_msk, nm_msk); + t->tcam[i].ptg, vsig, 0, + t->tcam[i].attr.flags, vl_msk, + dc_msk, nm_msk); if (status) { devm_kfree(ice_hw_to_dev(hw), p); goto err_ice_add_prof_id_vsig; @@ -4143,9 +4202,6 @@ ice_flow_assoc_fdir_prof(struct ice_hw *hw, enum ice_block blk, u16 vsi_num; int status; - if (blk != ICE_BLK_FD) - return -EINVAL; - vsi_num = ice_get_hw_vsi_num(hw, dest_vsi); status = ice_add_prof_id_flow(hw, blk, vsi_num, hdl); if (status) { @@ -4154,6 +4210,9 @@ ice_flow_assoc_fdir_prof(struct ice_hw *hw, enum ice_block blk, return status; } + if (blk != ICE_BLK_FD) + return 0; + vsi_num = ice_get_hw_vsi_num(hw, fdir_vsi); status = ice_add_prof_id_flow(hw, blk, vsi_num, hdl); if (status) { diff --git a/drivers/net/ethernet/intel/ice/ice_flex_type.h b/drivers/net/ethernet/intel/ice/ice_flex_type.h index 817beca591e0..80c9e7c749c2 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_type.h +++ b/drivers/net/ethernet/intel/ice/ice_flex_type.h @@ -187,6 +187,7 @@ struct ice_prof_map { }; #define ICE_INVALID_TCAM 0xFFFF +#define ICE_MAX_PTG_ATTRS 1024 struct ice_tcam_inf { u16 tcam_idx; diff --git a/drivers/net/ethernet/intel/ice/ice_flow.c b/drivers/net/ethernet/intel/ice/ice_flow.c index 6d5c939dc8a5..c9b6d0a84bd1 100644 --- a/drivers/net/ethernet/intel/ice/ice_flow.c +++ b/drivers/net/ethernet/intel/ice/ice_flow.c @@ -5,6 +5,38 @@ #include "ice_flow.h" #include <net/gre.h> +/* Size of known protocol header fields */ +#define ICE_FLOW_FLD_SZ_ETH_TYPE 2 +#define ICE_FLOW_FLD_SZ_VLAN 2 +#define ICE_FLOW_FLD_SZ_IPV4_ADDR 4 +#define ICE_FLOW_FLD_SZ_IPV6_ADDR 16 +#define ICE_FLOW_FLD_SZ_IPV6_PRE32_ADDR 4 +#define ICE_FLOW_FLD_SZ_IPV6_PRE48_ADDR 6 +#define ICE_FLOW_FLD_SZ_IPV6_PRE64_ADDR 8 +#define ICE_FLOW_FLD_SZ_IPV4_ID 2 +#define ICE_FLOW_FLD_SZ_IPV6_ID 4 +#define ICE_FLOW_FLD_SZ_IP_CHKSUM 2 +#define ICE_FLOW_FLD_SZ_TCP_CHKSUM 2 +#define ICE_FLOW_FLD_SZ_UDP_CHKSUM 2 +#define ICE_FLOW_FLD_SZ_SCTP_CHKSUM 4 +#define ICE_FLOW_FLD_SZ_IP_DSCP 1 +#define ICE_FLOW_FLD_SZ_IP_TTL 1 +#define ICE_FLOW_FLD_SZ_IP_PROT 1 +#define ICE_FLOW_FLD_SZ_PORT 2 +#define ICE_FLOW_FLD_SZ_TCP_FLAGS 1 +#define ICE_FLOW_FLD_SZ_ICMP_TYPE 1 +#define ICE_FLOW_FLD_SZ_ICMP_CODE 1 +#define ICE_FLOW_FLD_SZ_ARP_OPER 2 +#define ICE_FLOW_FLD_SZ_GRE_KEYID 4 +#define ICE_FLOW_FLD_SZ_GTP_TEID 4 +#define ICE_FLOW_FLD_SZ_GTP_QFI 2 +#define ICE_FLOW_FLD_SZ_PFCP_SEID 8 +#define ICE_FLOW_FLD_SZ_ESP_SPI 4 +#define ICE_FLOW_FLD_SZ_AH_SPI 4 +#define ICE_FLOW_FLD_SZ_NAT_T_ESP_SPI 4 +#define ICE_FLOW_FLD_SZ_L2TPV2_SESS_ID 2 +#define ICE_FLOW_FLD_SZ_L2TPV2_LEN_SESS_ID 2 + /* Describe properties of a protocol header field */ struct ice_flow_field_info { enum ice_flow_seg_hdr hdr; @@ -20,6 +52,7 @@ struct ice_flow_field_info { .mask = 0, \ } +/* QFI: 6-bit field in GTP-U PDU Session Container (3GPP TS 38.415) */ #define ICE_FLOW_FLD_INFO_MSK(_hdr, _offset_bytes, _size_bytes, _mask) { \ .hdr = _hdr, \ .off = (_offset_bytes) * BITS_PER_BYTE, \ @@ -61,7 +94,33 @@ struct ice_flow_field_info ice_flds_info[ICE_FLOW_FIELD_IDX_MAX] = { /* ICE_FLOW_FIELD_IDX_IPV6_SA */ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV6, 8, sizeof(struct in6_addr)), /* ICE_FLOW_FIELD_IDX_IPV6_DA */ - ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV6, 24, sizeof(struct in6_addr)), + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV6, 24, ICE_FLOW_FLD_SZ_IPV6_ADDR), + /* ICE_FLOW_FIELD_IDX_IPV4_CHKSUM */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV4, 10, ICE_FLOW_FLD_SZ_IP_CHKSUM), + /* ICE_FLOW_FIELD_IDX_IPV4_FRAG */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV_FRAG, 4, + ICE_FLOW_FLD_SZ_IPV4_ID), + /* ICE_FLOW_FIELD_IDX_IPV6_FRAG */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV_FRAG, 4, + ICE_FLOW_FLD_SZ_IPV6_ID), + /* ICE_FLOW_FIELD_IDX_IPV6_PRE32_SA */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV6, 8, + ICE_FLOW_FLD_SZ_IPV6_PRE32_ADDR), + /* ICE_FLOW_FIELD_IDX_IPV6_PRE32_DA */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV6, 24, + ICE_FLOW_FLD_SZ_IPV6_PRE32_ADDR), + /* ICE_FLOW_FIELD_IDX_IPV6_PRE48_SA */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV6, 8, + ICE_FLOW_FLD_SZ_IPV6_PRE48_ADDR), + /* ICE_FLOW_FIELD_IDX_IPV6_PRE48_DA */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV6, 24, + ICE_FLOW_FLD_SZ_IPV6_PRE48_ADDR), + /* ICE_FLOW_FIELD_IDX_IPV6_PRE64_SA */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV6, 8, + ICE_FLOW_FLD_SZ_IPV6_PRE64_ADDR), + /* ICE_FLOW_FIELD_IDX_IPV6_PRE64_DA */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV6, 24, + ICE_FLOW_FLD_SZ_IPV6_PRE64_ADDR), /* Transport */ /* ICE_FLOW_FIELD_IDX_TCP_SRC_PORT */ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_TCP, 0, sizeof(__be16)), @@ -76,7 +135,14 @@ struct ice_flow_field_info ice_flds_info[ICE_FLOW_FIELD_IDX_MAX] = { /* ICE_FLOW_FIELD_IDX_SCTP_DST_PORT */ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_SCTP, 2, sizeof(__be16)), /* ICE_FLOW_FIELD_IDX_TCP_FLAGS */ - ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_TCP, 13, 1), + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_TCP, 13, ICE_FLOW_FLD_SZ_TCP_FLAGS), + /* ICE_FLOW_FIELD_IDX_TCP_CHKSUM */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_TCP, 16, ICE_FLOW_FLD_SZ_TCP_CHKSUM), + /* ICE_FLOW_FIELD_IDX_UDP_CHKSUM */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_UDP, 6, ICE_FLOW_FLD_SZ_UDP_CHKSUM), + /* ICE_FLOW_FIELD_IDX_SCTP_CHKSUM */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_SCTP, 8, + ICE_FLOW_FLD_SZ_SCTP_CHKSUM), /* ARP */ /* ICE_FLOW_FIELD_IDX_ARP_SIP */ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ARP, 14, sizeof(struct in_addr)), @@ -108,9 +174,17 @@ struct ice_flow_field_info ice_flds_info[ICE_FLOW_FIELD_IDX_MAX] = { ICE_FLOW_FLD_INFO_MSK(ICE_FLOW_SEG_HDR_GTPU_EH, 22, sizeof(__be16), 0x3f00), /* ICE_FLOW_FIELD_IDX_GTPU_UP_TEID */ - ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_GTPU_UP, 12, sizeof(__be32)), + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_GTPU_UP, 12, + ICE_FLOW_FLD_SZ_GTP_TEID), + /* ICE_FLOW_FIELD_IDX_GTPU_UP_QFI */ + ICE_FLOW_FLD_INFO_MSK(ICE_FLOW_SEG_HDR_GTPU_UP, 22, + ICE_FLOW_FLD_SZ_GTP_QFI, 0x3f00), /* ICE_FLOW_FIELD_IDX_GTPU_DWN_TEID */ - ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_GTPU_DWN, 12, sizeof(__be32)), + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_GTPU_DWN, 12, + ICE_FLOW_FLD_SZ_GTP_TEID), + /* ICE_FLOW_FIELD_IDX_GTPU_DWN_QFI */ + ICE_FLOW_FLD_INFO_MSK(ICE_FLOW_SEG_HDR_GTPU_DWN, 22, + ICE_FLOW_FLD_SZ_GTP_QFI, 0x3f00), /* PPPoE */ /* ICE_FLOW_FIELD_IDX_PPPOE_SESS_ID */ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_PPPOE, 2, sizeof(__be16)), @@ -128,7 +202,16 @@ struct ice_flow_field_info ice_flds_info[ICE_FLOW_FIELD_IDX_MAX] = { ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_AH, 4, sizeof(__be32)), /* NAT_T_ESP */ /* ICE_FLOW_FIELD_IDX_NAT_T_ESP_SPI */ - ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_NAT_T_ESP, 8, sizeof(__be32)), + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_NAT_T_ESP, 8, + ICE_FLOW_FLD_SZ_NAT_T_ESP_SPI), + /* L2TPV2 */ + /* ICE_FLOW_FIELD_IDX_L2TPV2_SESS_ID */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_L2TPV2, 12, + ICE_FLOW_FLD_SZ_L2TPV2_SESS_ID), + /* L2TPV2_LEN */ + /* ICE_FLOW_FIELD_IDX_L2TPV2_LEN_SESS_ID */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_L2TPV2, 14, + ICE_FLOW_FLD_SZ_L2TPV2_LEN_SESS_ID), }; /* Bitmaps indicating relevant packet types for a particular protocol header @@ -137,9 +220,9 @@ struct ice_flow_field_info ice_flds_info[ICE_FLOW_FIELD_IDX_MAX] = { */ static const u32 ice_ptypes_mac_ofos[] = { 0xFDC00846, 0xBFBF7F7E, 0xF70001DF, 0xFEFDFDFB, - 0x0000077E, 0x00000000, 0x00000000, 0x00000000, - 0x00400000, 0x03FFF000, 0x7FFFFFE0, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x0000077E, 0x000003FF, 0x00000000, 0x00000000, + 0x00400000, 0x03FFF000, 0xFFFFFFE0, 0x00000707, + 0xFFFFF000, 0x000003FF, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -162,10 +245,10 @@ static const u32 ice_ptypes_macvlan_il[] = { * include IPv4 other PTYPEs */ static const u32 ice_ptypes_ipv4_ofos[] = { - 0x1DC00000, 0x04000800, 0x00000000, 0x00000000, + 0x1D800000, 0xBFBF7800, 0x000001DF, 0x00000000, 0x00000000, 0x00000155, 0x00000000, 0x00000000, - 0x00000000, 0x000FC000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x000FC000, 0x000002A0, 0x00000000, + 0x00015000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -176,10 +259,10 @@ static const u32 ice_ptypes_ipv4_ofos[] = { * IPv4 other PTYPEs */ static const u32 ice_ptypes_ipv4_ofos_all[] = { - 0x1DC00000, 0x04000800, 0x00000000, 0x00000000, + 0x1D800000, 0x27BF7800, 0x00000000, 0x00000000, 0x00000000, 0x00000155, 0x00000000, 0x00000000, - 0x00000000, 0x000FC000, 0x83E0F800, 0x00000101, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x000FC000, 0x83E0FAA0, 0x00000101, + 0x3FFD5000, 0x00000000, 0x02FBEFBC, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -191,7 +274,7 @@ static const u32 ice_ptypes_ipv4_il[] = { 0xE0000000, 0xB807700E, 0x80000003, 0xE01DC03B, 0x0000000E, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x001FF800, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xC0FC0000, 0x0000000F, 0xBC0BC0BC, 0x00000BC0, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -202,10 +285,10 @@ static const u32 ice_ptypes_ipv4_il[] = { * include IPv6 other PTYPEs */ static const u32 ice_ptypes_ipv6_ofos[] = { - 0x00000000, 0x00000000, 0x77000000, 0x10002000, + 0x00000000, 0x00000000, 0x76000000, 0x10002000, 0x00000000, 0x000002AA, 0x00000000, 0x00000000, - 0x00000000, 0x03F00000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x03F00000, 0x00000540, 0x00000000, + 0x0002A000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -216,10 +299,10 @@ static const u32 ice_ptypes_ipv6_ofos[] = { * IPv6 other PTYPEs */ static const u32 ice_ptypes_ipv6_ofos_all[] = { - 0x00000000, 0x00000000, 0x77000000, 0x10002000, - 0x00000000, 0x000002AA, 0x00000000, 0x00000000, - 0x00080F00, 0x03F00000, 0x7C1F0000, 0x00000206, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x76000000, 0xFEFDE000, + 0x0000077E, 0x000002AA, 0x00000000, 0x00000000, + 0x00000000, 0x03F00000, 0x7C1F0540, 0x00000206, + 0xC002A000, 0x000003FF, 0xBC000000, 0x0002FBEF, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -231,7 +314,7 @@ static const u32 ice_ptypes_ipv6_il[] = { 0x00000000, 0x03B80770, 0x000001DC, 0x0EE00000, 0x00000770, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x7FE00000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x3F000000, 0x000003F0, 0x02F02F00, 0x0002F02F, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -304,8 +387,8 @@ static const u32 ice_ptypes_ipv6_il_no_l4[] = { static const u32 ice_ptypes_udp_il[] = { 0x81000000, 0x20204040, 0x04000010, 0x80810102, 0x00000040, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00410000, 0x90842000, 0x00000007, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00410000, 0x908427E0, 0x00000007, + 0x0413F000, 0x00000041, 0x10410410, 0x00004104, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -317,7 +400,7 @@ static const u32 ice_ptypes_tcp_il[] = { 0x04000000, 0x80810102, 0x10000040, 0x02040408, 0x00000102, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00820000, 0x21084000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x08200000, 0x00000082, 0x20820820, 0x00008208, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -329,7 +412,7 @@ static const u32 ice_ptypes_sctp_il[] = { 0x08000000, 0x01020204, 0x20000081, 0x04080810, 0x00000204, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x01040000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x10400000, 0x00000104, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -353,7 +436,7 @@ static const u32 ice_ptypes_icmp_il[] = { 0x00000000, 0x02040408, 0x40000102, 0x08101020, 0x00000408, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x42108000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x20800000, 0x00000208, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -365,7 +448,7 @@ static const u32 ice_ptypes_gre_of[] = { 0x00000000, 0xBFBF7800, 0x000001DF, 0xFEFDE000, 0x0000017E, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0xBEFBEFBC, 0x0002FBEF, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -374,7 +457,7 @@ static const u32 ice_ptypes_gre_of[] = { /* Packet types for packets with an Innermost/Last MAC header */ static const u32 ice_ptypes_mac_il[] = { - 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x20000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -388,7 +471,7 @@ static const u32 ice_ptypes_mac_il[] = { static const u32 ice_ptypes_gtpc[] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000180, 0x00000000, + 0x00000000, 0x00000000, 0x000001E0, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -2325,6 +2408,130 @@ static void ice_rss_set_symm(struct ice_hw *hw, struct ice_flow_prof *prof) } /** + * ice_rss_cfg_raw_symm - Configure symmetric RSS for a raw parser profile + * @hw: device HW + * @prof: parser profile describing extracted FV (field vector) entries + * @prof_id: RSS profile identifier used to program symmetry registers + * + * The routine scans the parser profile's FV entries and looks for + * direction-sensitive pairs (L3 src/dst, L4 src/dst). When a pair is found, + * it programs XOR-based symmetry so that flows hash identically regardless + * of packet direction. This preserves CPU affinity for the same 5-tuple. + * + * Notes: + * - The size of each logical field (IPv4/IPv6 address, L4 port) is expressed + * in units of ICE_FLOW_FV_EXTRACT_SZ so we can step across fv[] correctly. + * - We guard against out-of-bounds access before looking at fv[i + len]. + */ +static void ice_rss_cfg_raw_symm(struct ice_hw *hw, + const struct ice_parser_profile *prof, + u64 prof_id) +{ + for (size_t i = 0; i < prof->fv_num; i++) { + u8 proto_id = prof->fv[i].proto_id; + u16 src_off = 0, dst_off = 0; + size_t src_idx, dst_idx; + bool is_matched = false; + unsigned int len = 0; + + switch (proto_id) { + /* IPv4 address pairs (outer/inner variants) */ + case ICE_PROT_IPV4_OF_OR_S: + case ICE_PROT_IPV4_IL: + case ICE_PROT_IPV4_IL_IL: + len = ICE_FLOW_FLD_SZ_IPV4_ADDR / + ICE_FLOW_FV_EXTRACT_SZ; + src_off = ICE_FLOW_FIELD_IPV4_SRC_OFFSET; + dst_off = ICE_FLOW_FIELD_IPV4_DST_OFFSET; + break; + + /* IPv6 address pairs (outer/inner variants) */ + case ICE_PROT_IPV6_OF_OR_S: + case ICE_PROT_IPV6_IL: + case ICE_PROT_IPV6_IL_IL: + len = ICE_FLOW_FLD_SZ_IPV6_ADDR / + ICE_FLOW_FV_EXTRACT_SZ; + src_off = ICE_FLOW_FIELD_IPV6_SRC_OFFSET; + dst_off = ICE_FLOW_FIELD_IPV6_DST_OFFSET; + break; + + /* L4 port pairs (TCP/UDP/SCTP) */ + case ICE_PROT_TCP_IL: + case ICE_PROT_UDP_IL_OR_S: + case ICE_PROT_SCTP_IL: + len = ICE_FLOW_FLD_SZ_PORT / ICE_FLOW_FV_EXTRACT_SZ; + src_off = ICE_FLOW_FIELD_SRC_PORT_OFFSET; + dst_off = ICE_FLOW_FIELD_DST_PORT_OFFSET; + break; + + default: + continue; + } + + /* Bounds check before accessing fv[i + len]. */ + if (i + len >= prof->fv_num) + continue; + + /* Verify src/dst pairing for this protocol id. */ + is_matched = prof->fv[i].offset == src_off && + prof->fv[i + len].proto_id == proto_id && + prof->fv[i + len].offset == dst_off; + if (!is_matched) + continue; + + /* Program XOR symmetry for this field pair. */ + src_idx = i; + dst_idx = i + len; + + ice_rss_config_xor(hw, prof_id, src_idx, dst_idx, len); + + /* Skip over the pair we just handled; the loop's ++i advances + * one more element, hence the --i after the jump. + */ + i += (2 * len); + /* not strictly needed; keeps static analyzers happy */ + if (i == 0) + break; + --i; + } +} + +/* Max registers index per packet profile */ +#define ICE_SYMM_REG_INDEX_MAX 6 + +/** + * ice_rss_update_raw_symm - update symmetric hash configuration + * for raw pattern + * @hw: pointer to the hardware structure + * @cfg: configure parameters for raw pattern + * @id: profile tracking ID + * + * Update symmetric hash configuration for raw pattern if required. + * Otherwise only clear to default. + */ +void +ice_rss_update_raw_symm(struct ice_hw *hw, + struct ice_rss_raw_cfg *cfg, u64 id) +{ + struct ice_prof_map *map; + u8 prof_id, m; + + mutex_lock(&hw->blk[ICE_BLK_RSS].es.prof_map_lock); + map = ice_search_prof_id(hw, ICE_BLK_RSS, id); + if (map) + prof_id = map->prof_id; + mutex_unlock(&hw->blk[ICE_BLK_RSS].es.prof_map_lock); + if (!map) + return; + /* clear to default */ + for (m = 0; m < ICE_SYMM_REG_INDEX_MAX; m++) + wr32(hw, GLQF_HSYMM(prof_id, m), 0); + + if (cfg->symm) + ice_rss_cfg_raw_symm(hw, &cfg->prof, prof_id); +} + +/** * ice_add_rss_cfg_sync - add an RSS configuration * @hw: pointer to the hardware structure * @vsi_handle: software VSI handle diff --git a/drivers/net/ethernet/intel/ice/ice_flow.h b/drivers/net/ethernet/intel/ice/ice_flow.h index 52f906d89eca..6c6cdc8addb1 100644 --- a/drivers/net/ethernet/intel/ice/ice_flow.h +++ b/drivers/net/ethernet/intel/ice/ice_flow.h @@ -22,6 +22,15 @@ #define ICE_FLOW_HASH_IPV6 \ (BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_SA) | \ BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_DA)) +#define ICE_FLOW_HASH_IPV6_PRE32 \ + (BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PRE32_SA) | \ + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PRE32_DA)) +#define ICE_FLOW_HASH_IPV6_PRE48 \ + (BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PRE48_SA) | \ + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PRE48_DA)) +#define ICE_FLOW_HASH_IPV6_PRE64 \ + (BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PRE64_SA) | \ + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PRE64_DA)) #define ICE_FLOW_HASH_TCP_PORT \ (BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_SRC_PORT) | \ BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_DST_PORT)) @@ -40,6 +49,33 @@ #define ICE_HASH_SCTP_IPV4 (ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_SCTP_PORT) #define ICE_HASH_SCTP_IPV6 (ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_SCTP_PORT) +#define ICE_HASH_TCP_IPV6_PRE32 \ + (ICE_FLOW_HASH_IPV6_PRE32 | ICE_FLOW_HASH_TCP_PORT) +#define ICE_HASH_UDP_IPV6_PRE32 \ + (ICE_FLOW_HASH_IPV6_PRE32 | ICE_FLOW_HASH_UDP_PORT) +#define ICE_HASH_SCTP_IPV6_PRE32 \ + (ICE_FLOW_HASH_IPV6_PRE32 | ICE_FLOW_HASH_SCTP_PORT) +#define ICE_HASH_TCP_IPV6_PRE48 \ + (ICE_FLOW_HASH_IPV6_PRE48 | ICE_FLOW_HASH_TCP_PORT) +#define ICE_HASH_UDP_IPV6_PRE48 \ + (ICE_FLOW_HASH_IPV6_PRE48 | ICE_FLOW_HASH_UDP_PORT) +#define ICE_HASH_SCTP_IPV6_PRE48 \ + (ICE_FLOW_HASH_IPV6_PRE48 | ICE_FLOW_HASH_SCTP_PORT) +#define ICE_HASH_TCP_IPV6_PRE64 \ + (ICE_FLOW_HASH_IPV6_PRE64 | ICE_FLOW_HASH_TCP_PORT) +#define ICE_HASH_UDP_IPV6_PRE64 \ + (ICE_FLOW_HASH_IPV6_PRE64 | ICE_FLOW_HASH_UDP_PORT) +#define ICE_HASH_SCTP_IPV6_PRE64 \ + (ICE_FLOW_HASH_IPV6_PRE64 | ICE_FLOW_HASH_SCTP_PORT) + +#define ICE_FLOW_HASH_GTP_TEID \ + (BIT_ULL(ICE_FLOW_FIELD_IDX_GTPC_TEID)) + +#define ICE_FLOW_HASH_GTP_IPV4_TEID \ + (ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_GTP_TEID) +#define ICE_FLOW_HASH_GTP_IPV6_TEID \ + (ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_GTP_TEID) + #define ICE_FLOW_HASH_GTP_C_TEID \ (BIT_ULL(ICE_FLOW_FIELD_IDX_GTPC_TEID)) @@ -128,6 +164,23 @@ #define ICE_FLOW_HASH_NAT_T_ESP_IPV6_SPI \ (ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_NAT_T_ESP_SPI) +#define ICE_FLOW_HASH_L2TPV2_SESS_ID \ + (BIT_ULL(ICE_FLOW_FIELD_IDX_L2TPV2_SESS_ID)) +#define ICE_FLOW_HASH_L2TPV2_SESS_ID_ETH \ + (ICE_FLOW_HASH_ETH | ICE_FLOW_HASH_L2TPV2_SESS_ID) + +#define ICE_FLOW_HASH_L2TPV2_LEN_SESS_ID \ + (BIT_ULL(ICE_FLOW_FIELD_IDX_L2TPV2_LEN_SESS_ID)) +#define ICE_FLOW_HASH_L2TPV2_LEN_SESS_ID_ETH \ + (ICE_FLOW_HASH_ETH | ICE_FLOW_HASH_L2TPV2_LEN_SESS_ID) + +#define ICE_FLOW_FIELD_IPV4_SRC_OFFSET 12 +#define ICE_FLOW_FIELD_IPV4_DST_OFFSET 16 +#define ICE_FLOW_FIELD_IPV6_SRC_OFFSET 8 +#define ICE_FLOW_FIELD_IPV6_DST_OFFSET 24 +#define ICE_FLOW_FIELD_SRC_PORT_OFFSET 0 +#define ICE_FLOW_FIELD_DST_PORT_OFFSET 2 + /* Protocol header fields within a packet segment. A segment consists of one or * more protocol headers that make up a logical group of protocol headers. Each * logical group of protocol headers encapsulates or is encapsulated using/by @@ -160,10 +213,13 @@ enum ice_flow_seg_hdr { ICE_FLOW_SEG_HDR_AH = 0x00200000, ICE_FLOW_SEG_HDR_NAT_T_ESP = 0x00400000, ICE_FLOW_SEG_HDR_ETH_NON_IP = 0x00800000, + ICE_FLOW_SEG_HDR_GTPU_NON_IP = 0x01000000, + ICE_FLOW_SEG_HDR_L2TPV2 = 0x10000000, /* The following is an additive bit for ICE_FLOW_SEG_HDR_IPV4 and - * ICE_FLOW_SEG_HDR_IPV6 which include the IPV4 other PTYPEs + * ICE_FLOW_SEG_HDR_IPV6. */ - ICE_FLOW_SEG_HDR_IPV_OTHER = 0x20000000, + ICE_FLOW_SEG_HDR_IPV_FRAG = 0x40000000, + ICE_FLOW_SEG_HDR_IPV_OTHER = 0x80000000, }; /* These segments all have the same PTYPES, but are otherwise distinguished by @@ -200,6 +256,15 @@ enum ice_flow_field { ICE_FLOW_FIELD_IDX_IPV4_DA, ICE_FLOW_FIELD_IDX_IPV6_SA, ICE_FLOW_FIELD_IDX_IPV6_DA, + ICE_FLOW_FIELD_IDX_IPV4_CHKSUM, + ICE_FLOW_FIELD_IDX_IPV4_ID, + ICE_FLOW_FIELD_IDX_IPV6_ID, + ICE_FLOW_FIELD_IDX_IPV6_PRE32_SA, + ICE_FLOW_FIELD_IDX_IPV6_PRE32_DA, + ICE_FLOW_FIELD_IDX_IPV6_PRE48_SA, + ICE_FLOW_FIELD_IDX_IPV6_PRE48_DA, + ICE_FLOW_FIELD_IDX_IPV6_PRE64_SA, + ICE_FLOW_FIELD_IDX_IPV6_PRE64_DA, /* L4 */ ICE_FLOW_FIELD_IDX_TCP_SRC_PORT, ICE_FLOW_FIELD_IDX_TCP_DST_PORT, @@ -208,6 +273,9 @@ enum ice_flow_field { ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT, ICE_FLOW_FIELD_IDX_SCTP_DST_PORT, ICE_FLOW_FIELD_IDX_TCP_FLAGS, + ICE_FLOW_FIELD_IDX_TCP_CHKSUM, + ICE_FLOW_FIELD_IDX_UDP_CHKSUM, + ICE_FLOW_FIELD_IDX_SCTP_CHKSUM, /* ARP */ ICE_FLOW_FIELD_IDX_ARP_SIP, ICE_FLOW_FIELD_IDX_ARP_DIP, @@ -228,13 +296,13 @@ enum ice_flow_field { ICE_FLOW_FIELD_IDX_GTPU_EH_QFI, /* GTPU_UP */ ICE_FLOW_FIELD_IDX_GTPU_UP_TEID, + ICE_FLOW_FIELD_IDX_GTPU_UP_QFI, /* GTPU_DWN */ ICE_FLOW_FIELD_IDX_GTPU_DWN_TEID, - /* PPPoE */ + ICE_FLOW_FIELD_IDX_GTPU_DWN_QFI, ICE_FLOW_FIELD_IDX_PPPOE_SESS_ID, /* PFCP */ ICE_FLOW_FIELD_IDX_PFCP_SEID, - /* L2TPv3 */ ICE_FLOW_FIELD_IDX_L2TPV3_SESS_ID, /* ESP */ ICE_FLOW_FIELD_IDX_ESP_SPI, @@ -242,10 +310,16 @@ enum ice_flow_field { ICE_FLOW_FIELD_IDX_AH_SPI, /* NAT_T ESP */ ICE_FLOW_FIELD_IDX_NAT_T_ESP_SPI, + /* L2TPV2 SESSION ID*/ + ICE_FLOW_FIELD_IDX_L2TPV2_SESS_ID, + /* L2TPV2_LEN SESSION ID */ + ICE_FLOW_FIELD_IDX_L2TPV2_LEN_SESS_ID, /* The total number of enums must not exceed 64 */ ICE_FLOW_FIELD_IDX_MAX }; +static_assert(ICE_FLOW_FIELD_IDX_MAX <= 64, "The total number of enums must not exceed 64"); + #define ICE_FLOW_HASH_FLD_IPV4_SA BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA) #define ICE_FLOW_HASH_FLD_IPV6_SA BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_SA) #define ICE_FLOW_HASH_FLD_IPV4_DA BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA) @@ -296,6 +370,10 @@ enum ice_rss_cfg_hdr_type { /* take inner headers as inputset for packet with outer ipv6. */ ICE_RSS_INNER_HEADERS_W_OUTER_IPV6, /* take outer headers first then inner headers as inputset */ + /* take inner as inputset for GTPoGRE with outer IPv4 + GRE. */ + ICE_RSS_INNER_HEADERS_W_OUTER_IPV4_GRE, + /* take inner as inputset for GTPoGRE with outer IPv6 + GRE. */ + ICE_RSS_INNER_HEADERS_W_OUTER_IPV6_GRE, ICE_RSS_ANY_HEADERS }; @@ -406,6 +484,12 @@ struct ice_flow_prof { bool symm; /* Symmetric Hash for RSS */ }; +struct ice_rss_raw_cfg { + struct ice_parser_profile prof; + bool raw_ena; + bool symm; +}; + struct ice_rss_cfg { struct list_head l_entry; /* bitmap of VSIs added to the RSS entry */ @@ -444,4 +528,6 @@ int ice_add_rss_cfg(struct ice_hw *hw, struct ice_vsi *vsi, int ice_rem_rss_cfg(struct ice_hw *hw, u16 vsi_handle, const struct ice_rss_hash_cfg *cfg); u64 ice_get_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u32 hdrs, bool *symm); +void ice_rss_update_raw_symm(struct ice_hw *hw, + struct ice_rss_raw_cfg *cfg, u64 id); #endif /* _ICE_FLOW_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_fw_update.c b/drivers/net/ethernet/intel/ice/ice_fw_update.c index d86db081579f..973a13d3d92a 100644 --- a/drivers/net/ethernet/intel/ice/ice_fw_update.c +++ b/drivers/net/ethernet/intel/ice/ice_fw_update.c @@ -534,7 +534,7 @@ ice_erase_nvm_module(struct ice_pf *pf, u16 module, const char *component, } if (completion_retval) { - dev_err(dev, "Firmware failed to erase %s (module 0x02%x), aq_err %s\n", + dev_err(dev, "Firmware failed to erase %s (module 0x%02x), aq_err %s\n", component, module, libie_aq_str((enum libie_aq_err)completion_retval)); NL_SET_ERR_MSG_MOD(extack, "Firmware failed to erase flash"); diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c index aebf8e08a297..d2576d606e10 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.c +++ b/drivers/net/ethernet/intel/ice/ice_lag.c @@ -2177,8 +2177,7 @@ static void ice_lag_chk_disabled_bond(struct ice_lag *lag, void *ptr) */ static void ice_lag_disable_sriov_bond(struct ice_lag *lag) { - struct ice_netdev_priv *np = netdev_priv(lag->netdev); - struct ice_pf *pf = np->vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(lag->netdev); ice_clear_feature_support(pf, ICE_F_SRIOV_LAG); ice_clear_feature_support(pf, ICE_F_SRIOV_AA_LAG); diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h index 10c312d49e05..185672c7e17d 100644 --- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h +++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h @@ -342,6 +342,9 @@ enum ice_flg64_bits { /* for ice_32byte_rx_flex_desc.pkt_length member */ #define ICE_RX_FLX_DESC_PKT_LEN_M (0x3FFF) /* 14-bits */ +/* ice_32byte_rx_flex_desc::hdr_len_sph_flex_flags1 */ +#define ICE_RX_FLEX_DESC_HDR_LEN_M GENMASK(10, 0) + enum ice_rx_flex_desc_status_error_0_bits { /* Note: These are predefined bit offsets */ ICE_RX_FLEX_DESC_STATUS0_DD_S = 0, diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 4479c824561e..15621707fbf8 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -1427,7 +1427,6 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi) ring->reg_idx = vsi->rxq_map[i]; ring->vsi = vsi; ring->netdev = vsi->netdev; - ring->dev = dev; ring->count = vsi->num_rx_desc; ring->cached_phctime = pf->ptp.cached_phc_time; @@ -2769,7 +2768,6 @@ void ice_dis_vsi(struct ice_vsi *vsi, bool locked) * @vsi: VSI pointer * * Associate queue[s] with napi for all vectors. - * The caller must hold rtnl_lock. */ void ice_vsi_set_napi_queues(struct ice_vsi *vsi) { @@ -2779,6 +2777,7 @@ void ice_vsi_set_napi_queues(struct ice_vsi *vsi) if (!netdev) return; + ASSERT_RTNL(); ice_for_each_rxq(vsi, q_idx) netif_queue_set_napi(netdev, q_idx, NETDEV_QUEUE_TYPE_RX, &vsi->rx_rings[q_idx]->q_vector->napi); @@ -2799,7 +2798,6 @@ void ice_vsi_set_napi_queues(struct ice_vsi *vsi) * @vsi: VSI pointer * * Clear the association between all VSI queues queue[s] and napi. - * The caller must hold rtnl_lock. */ void ice_vsi_clear_napi_queues(struct ice_vsi *vsi) { @@ -2809,6 +2807,7 @@ void ice_vsi_clear_napi_queues(struct ice_vsi *vsi) if (!netdev) return; + ASSERT_RTNL(); /* Clear the NAPI's interrupt number */ ice_for_each_q_vector(vsi, v_idx) { struct ice_q_vector *q_vector = vsi->q_vectors[v_idx]; diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 86f5859e88ef..2533876f1a2f 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -37,6 +37,8 @@ static const char ice_copyright[] = "Copyright (c) 2018, Intel Corporation."; #define ICE_DDP_PKG_FILE ICE_DDP_PKG_PATH "ice.pkg" MODULE_DESCRIPTION(DRV_SUMMARY); +MODULE_IMPORT_NS("LIBETH"); +MODULE_IMPORT_NS("LIBETH_XDP"); MODULE_IMPORT_NS("LIBIE"); MODULE_IMPORT_NS("LIBIE_ADMINQ"); MODULE_IMPORT_NS("LIBIE_FWLOG"); @@ -2957,10 +2959,7 @@ int ice_vsi_determine_xdp_res(struct ice_vsi *vsi) */ static int ice_max_xdp_frame_size(struct ice_vsi *vsi) { - if (test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags)) - return ICE_RXBUF_1664; - else - return ICE_RXBUF_3072; + return ICE_RXBUF_3072; } /** @@ -3018,19 +3017,11 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, } } xdp_features_set_redirect_target(vsi->netdev, true); - /* reallocate Rx queues that are used for zero-copy */ - xdp_ring_err = ice_realloc_zc_buf(vsi, true); - if (xdp_ring_err) - NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Rx resources failed"); } else if (ice_is_xdp_ena_vsi(vsi) && !prog) { xdp_features_clear_redirect_target(vsi->netdev); xdp_ring_err = ice_destroy_xdp_rings(vsi, ICE_XDP_CFG_FULL); if (xdp_ring_err) NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Tx resources failed"); - /* reallocate Rx queues that were used for zero-copy */ - xdp_ring_err = ice_realloc_zc_buf(vsi, false); - if (xdp_ring_err) - NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Rx resources failed"); } resume_if: @@ -3949,9 +3940,10 @@ u16 ice_get_avail_rxq_count(struct ice_pf *pf) * ice_deinit_pf - Unrolls initialziations done by ice_init_pf * @pf: board private structure to initialize */ -static void ice_deinit_pf(struct ice_pf *pf) +void ice_deinit_pf(struct ice_pf *pf) { - ice_service_task_stop(pf); + /* note that we unroll also on ice_init_pf() failure here */ + mutex_destroy(&pf->lag_mutex); mutex_destroy(&pf->adev_mutex); mutex_destroy(&pf->sw_mutex); @@ -3977,6 +3969,9 @@ static void ice_deinit_pf(struct ice_pf *pf) if (pf->ptp.clock) ptp_clock_unregister(pf->ptp.clock); + if (!xa_empty(&pf->irq_tracker.entries)) + ice_free_irq_msix_misc(pf); + xa_destroy(&pf->dyn_ports); xa_destroy(&pf->sf_nums); } @@ -4030,13 +4025,25 @@ static void ice_set_pf_caps(struct ice_pf *pf) pf->max_pf_rxqs = func_caps->common_cap.num_rxq; } +void ice_start_service_task(struct ice_pf *pf) +{ + timer_setup(&pf->serv_tmr, ice_service_timer, 0); + pf->serv_tmr_period = HZ; + INIT_WORK(&pf->serv_task, ice_service_task); + clear_bit(ICE_SERVICE_SCHED, pf->state); +} + /** * ice_init_pf - Initialize general software structures (struct ice_pf) * @pf: board private structure to initialize + * Return: 0 on success, negative errno otherwise. */ -static int ice_init_pf(struct ice_pf *pf) +int ice_init_pf(struct ice_pf *pf) { - ice_set_pf_caps(pf); + struct udp_tunnel_nic_info *udp_tunnel_nic = &pf->hw.udp_tunnel_nic; + struct device *dev = ice_pf_to_dev(pf); + struct ice_hw *hw = &pf->hw; + int err = -ENOMEM; mutex_init(&pf->sw_mutex); mutex_init(&pf->tc_mutex); @@ -4049,32 +4056,7 @@ static int ice_init_pf(struct ice_pf *pf) init_waitqueue_head(&pf->reset_wait_queue); - /* setup service timer and periodic service task */ - timer_setup(&pf->serv_tmr, ice_service_timer, 0); - pf->serv_tmr_period = HZ; - INIT_WORK(&pf->serv_task, ice_service_task); - clear_bit(ICE_SERVICE_SCHED, pf->state); - mutex_init(&pf->avail_q_mutex); - pf->avail_txqs = bitmap_zalloc(pf->max_pf_txqs, GFP_KERNEL); - if (!pf->avail_txqs) - return -ENOMEM; - - pf->avail_rxqs = bitmap_zalloc(pf->max_pf_rxqs, GFP_KERNEL); - if (!pf->avail_rxqs) { - bitmap_free(pf->avail_txqs); - pf->avail_txqs = NULL; - return -ENOMEM; - } - - pf->txtime_txqs = bitmap_zalloc(pf->max_pf_txqs, GFP_KERNEL); - if (!pf->txtime_txqs) { - bitmap_free(pf->avail_txqs); - pf->avail_txqs = NULL; - bitmap_free(pf->avail_rxqs); - pf->avail_rxqs = NULL; - return -ENOMEM; - } mutex_init(&pf->vfs.table_lock); hash_init(pf->vfs.table); @@ -4087,7 +4069,36 @@ static int ice_init_pf(struct ice_pf *pf) xa_init(&pf->dyn_ports); xa_init(&pf->sf_nums); + pf->avail_txqs = bitmap_zalloc(pf->max_pf_txqs, GFP_KERNEL); + pf->avail_rxqs = bitmap_zalloc(pf->max_pf_rxqs, GFP_KERNEL); + pf->txtime_txqs = bitmap_zalloc(pf->max_pf_txqs, GFP_KERNEL); + if (!pf->avail_txqs || !pf->avail_rxqs || !pf->txtime_txqs) + goto undo_init; + + udp_tunnel_nic->set_port = ice_udp_tunnel_set_port; + udp_tunnel_nic->unset_port = ice_udp_tunnel_unset_port; + udp_tunnel_nic->shared = &hw->udp_tunnel_shared; + udp_tunnel_nic->tables[0].n_entries = hw->tnl.valid_count[TNL_VXLAN]; + udp_tunnel_nic->tables[0].tunnel_types = UDP_TUNNEL_TYPE_VXLAN; + udp_tunnel_nic->tables[1].n_entries = hw->tnl.valid_count[TNL_GENEVE]; + udp_tunnel_nic->tables[1].tunnel_types = UDP_TUNNEL_TYPE_GENEVE; + + /* In case of MSIX we are going to setup the misc vector right here + * to handle admin queue events etc. In case of legacy and MSI + * the misc functionality and queue processing is combined in + * the same vector and that gets setup at open. + */ + err = ice_req_irq_msix_misc(pf); + if (err) { + dev_err(dev, "setup of misc vector failed: %d\n", err); + goto undo_init; + } + return 0; +undo_init: + /* deinit handles half-initialized pf just fine */ + ice_deinit_pf(pf); + return err; } /** @@ -4722,9 +4733,8 @@ static void ice_decfg_netdev(struct ice_vsi *vsi) vsi->netdev = NULL; } -int ice_init_dev(struct ice_pf *pf) +void ice_init_dev_hw(struct ice_pf *pf) { - struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; int err; @@ -4744,61 +4754,28 @@ int ice_init_dev(struct ice_pf *pf) */ ice_set_safe_mode_caps(hw); } +} - err = ice_init_pf(pf); - if (err) { - dev_err(dev, "ice_init_pf failed: %d\n", err); - return err; - } - - pf->hw.udp_tunnel_nic.set_port = ice_udp_tunnel_set_port; - pf->hw.udp_tunnel_nic.unset_port = ice_udp_tunnel_unset_port; - pf->hw.udp_tunnel_nic.shared = &pf->hw.udp_tunnel_shared; - if (pf->hw.tnl.valid_count[TNL_VXLAN]) { - pf->hw.udp_tunnel_nic.tables[0].n_entries = - pf->hw.tnl.valid_count[TNL_VXLAN]; - pf->hw.udp_tunnel_nic.tables[0].tunnel_types = - UDP_TUNNEL_TYPE_VXLAN; - } - if (pf->hw.tnl.valid_count[TNL_GENEVE]) { - pf->hw.udp_tunnel_nic.tables[1].n_entries = - pf->hw.tnl.valid_count[TNL_GENEVE]; - pf->hw.udp_tunnel_nic.tables[1].tunnel_types = - UDP_TUNNEL_TYPE_GENEVE; - } +int ice_init_dev(struct ice_pf *pf) +{ + struct device *dev = ice_pf_to_dev(pf); + int err; + ice_set_pf_caps(pf); err = ice_init_interrupt_scheme(pf); if (err) { dev_err(dev, "ice_init_interrupt_scheme failed: %d\n", err); - err = -EIO; - goto unroll_pf_init; + return -EIO; } - /* In case of MSIX we are going to setup the misc vector right here - * to handle admin queue events etc. In case of legacy and MSI - * the misc functionality and queue processing is combined in - * the same vector and that gets setup at open. - */ - err = ice_req_irq_msix_misc(pf); - if (err) { - dev_err(dev, "setup of misc vector failed: %d\n", err); - goto unroll_irq_scheme_init; - } + ice_start_service_task(pf); return 0; - -unroll_irq_scheme_init: - ice_clear_interrupt_scheme(pf); -unroll_pf_init: - ice_deinit_pf(pf); - return err; } void ice_deinit_dev(struct ice_pf *pf) { - ice_free_irq_msix_misc(pf); - ice_deinit_pf(pf); - ice_deinit_hw(&pf->hw); + ice_service_task_stop(pf); /* Service task is already stopped, so call reset directly. */ ice_reset(&pf->hw, ICE_RESET_PFR); @@ -5038,21 +5015,24 @@ static void ice_deinit_devlink(struct ice_pf *pf) static int ice_init(struct ice_pf *pf) { + struct device *dev = ice_pf_to_dev(pf); int err; - err = ice_init_dev(pf); - if (err) + err = ice_init_pf(pf); + if (err) { + dev_err(dev, "ice_init_pf failed: %d\n", err); return err; + } if (pf->hw.mac_type == ICE_MAC_E830) { err = pci_enable_ptm(pf->pdev, NULL); if (err) - dev_dbg(ice_pf_to_dev(pf), "PCIe PTM not supported by PCIe bus/controller\n"); + dev_dbg(dev, "PCIe PTM not supported by PCIe bus/controller\n"); } err = ice_alloc_vsis(pf); if (err) - goto err_alloc_vsis; + goto unroll_pf_init; err = ice_init_pf_sw(pf); if (err) @@ -5089,8 +5069,8 @@ err_init_link: ice_deinit_pf_sw(pf); err_init_pf_sw: ice_dealloc_vsis(pf); -err_alloc_vsis: - ice_deinit_dev(pf); +unroll_pf_init: + ice_deinit_pf(pf); return err; } @@ -5101,7 +5081,7 @@ static void ice_deinit(struct ice_pf *pf) ice_deinit_pf_sw(pf); ice_dealloc_vsis(pf); - ice_deinit_dev(pf); + ice_deinit_pf(pf); } /** @@ -5235,6 +5215,7 @@ static int ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) { struct device *dev = &pdev->dev; + bool need_dev_deinit = false; struct ice_adapter *adapter; struct ice_pf *pf; struct ice_hw *hw; @@ -5331,10 +5312,14 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) } pf->adapter = adapter; - err = ice_init(pf); + err = ice_init_dev(pf); if (err) goto unroll_adapter; + err = ice_init(pf); + if (err) + goto unroll_dev_init; + devl_lock(priv_to_devlink(pf)); err = ice_load(pf); if (err) @@ -5352,10 +5337,14 @@ unroll_load: unroll_init: devl_unlock(priv_to_devlink(pf)); ice_deinit(pf); +unroll_dev_init: + need_dev_deinit = true; unroll_adapter: ice_adapter_put(pdev); unroll_hw_init: ice_deinit_hw(hw); + if (need_dev_deinit) + ice_deinit_dev(pf); return err; } @@ -5450,10 +5439,6 @@ static void ice_remove(struct pci_dev *pdev) ice_hwmon_exit(pf); - ice_service_task_stop(pf); - ice_aq_cancel_waiting_tasks(pf); - set_bit(ICE_DOWN, pf->state); - if (!ice_is_safe_mode(pf)) ice_remove_arfs(pf); @@ -5471,6 +5456,11 @@ static void ice_remove(struct pci_dev *pdev) ice_set_wake(pf); ice_adapter_put(pdev); + ice_deinit_hw(&pf->hw); + + ice_deinit_dev(pf); + ice_aq_cancel_waiting_tasks(pf); + set_bit(ICE_DOWN, pf->state); } /** @@ -7138,6 +7128,9 @@ void ice_update_pf_stats(struct ice_pf *pf) &prev_ps->mac_remote_faults, &cur_ps->mac_remote_faults); + ice_stat_update32(hw, GLPRT_RLEC(port), pf->stat_prev_loaded, + &prev_ps->rx_len_errors, &cur_ps->rx_len_errors); + ice_stat_update32(hw, GLPRT_RUC(port), pf->stat_prev_loaded, &prev_ps->rx_undersize, &cur_ps->rx_undersize); @@ -7862,12 +7855,6 @@ int ice_change_mtu(struct net_device *netdev, int new_mtu) frame_size - ICE_ETH_PKT_HDR_PAD); return -EINVAL; } - } else if (test_bit(ICE_FLAG_LEGACY_RX, pf->flags)) { - if (new_mtu + ICE_ETH_PKT_HDR_PAD > ICE_MAX_FRAME_LEGACY_RX) { - netdev_err(netdev, "Too big MTU for legacy-rx; Max is %d\n", - ICE_MAX_FRAME_LEGACY_RX - ICE_ETH_PKT_HDR_PAD); - return -EINVAL; - } } /* if a reset is in progress, wait for some time for it to complete */ @@ -8071,9 +8058,7 @@ static int ice_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, u32 filter_mask, int nlflags) { - struct ice_netdev_priv *np = netdev_priv(dev); - struct ice_vsi *vsi = np->vsi; - struct ice_pf *pf = vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(dev); u16 bmode; bmode = pf->first_sw->bridge_mode; @@ -8143,8 +8128,7 @@ ice_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 __always_unused flags, struct netlink_ext_ack __always_unused *extack) { - struct ice_netdev_priv *np = netdev_priv(dev); - struct ice_pf *pf = np->vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(dev); struct nlattr *attr, *br_spec; struct ice_hw *hw = &pf->hw; struct ice_sw *pf_sw; @@ -9578,8 +9562,7 @@ ice_indr_setup_tc_cb(struct net_device *netdev, struct Qdisc *sch, */ int ice_open(struct net_device *netdev) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_pf *pf = np->vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); if (ice_is_reset_in_progress(pf->state)) { netdev_err(netdev, "can't open net device while reset is in progress"); diff --git a/drivers/net/ethernet/intel/ice/ice_protocol_type.h b/drivers/net/ethernet/intel/ice/ice_protocol_type.h index 7c09ea0f03ba..725167d557a8 100644 --- a/drivers/net/ethernet/intel/ice/ice_protocol_type.h +++ b/drivers/net/ethernet/intel/ice/ice_protocol_type.h @@ -82,26 +82,46 @@ enum ice_sw_tunnel_type { enum ice_prot_id { ICE_PROT_ID_INVAL = 0, ICE_PROT_MAC_OF_OR_S = 1, + ICE_PROT_MAC_O2 = 2, ICE_PROT_MAC_IL = 4, + ICE_PROT_MAC_IN_MAC = 7, ICE_PROT_ETYPE_OL = 9, ICE_PROT_ETYPE_IL = 10, + ICE_PROT_PAY = 15, + ICE_PROT_EVLAN_O = 16, + ICE_PROT_VLAN_O = 17, + ICE_PROT_VLAN_IF = 18, + ICE_PROT_MPLS_OL_MINUS_1 = 27, + ICE_PROT_MPLS_OL_OR_OS = 28, + ICE_PROT_MPLS_IL = 29, ICE_PROT_IPV4_OF_OR_S = 32, ICE_PROT_IPV4_IL = 33, + ICE_PROT_IPV4_IL_IL = 34, ICE_PROT_IPV6_OF_OR_S = 40, ICE_PROT_IPV6_IL = 41, + ICE_PROT_IPV6_IL_IL = 42, + ICE_PROT_IPV6_NEXT_PROTO = 43, + ICE_PROT_IPV6_FRAG = 47, ICE_PROT_TCP_IL = 49, ICE_PROT_UDP_OF = 52, ICE_PROT_UDP_IL_OR_S = 53, ICE_PROT_GRE_OF = 64, + ICE_PROT_NSH_F = 84, ICE_PROT_ESP_F = 88, ICE_PROT_ESP_2 = 89, ICE_PROT_SCTP_IL = 96, ICE_PROT_ICMP_IL = 98, ICE_PROT_ICMPV6_IL = 100, + ICE_PROT_VRRP_F = 101, + ICE_PROT_OSPF = 102, ICE_PROT_PPPOE = 103, ICE_PROT_L2TPV3 = 104, + ICE_PROT_ATAOE_OF = 114, + ICE_PROT_CTRL_OF = 116, + ICE_PROT_LLDP_OF = 117, ICE_PROT_ARP_OF = 118, ICE_PROT_META_ID = 255, /* when offset == metadata */ + ICE_PROT_EAPOL_OF = 120, ICE_PROT_INVALID = 255 /* when offset == ICE_FV_OFFSET_INVAL */ }; diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index fb0f6365a6d6..4c8d20f2d2c0 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -500,6 +500,9 @@ void ice_ptp_complete_tx_single_tstamp(struct ice_ptp_tx *tx) if (tstamp) { shhwtstamps.hwtstamp = ns_to_ktime(tstamp); ice_trace(tx_tstamp_complete, skb, idx); + + /* Count the number of Tx timestamps that succeeded */ + pf->ptp.tx_hwtstamp_good++; } skb_tstamp_tx(skb, &shhwtstamps); @@ -558,6 +561,7 @@ static void ice_ptp_process_tx_tstamp(struct ice_ptp_tx *tx) { struct ice_ptp_port *ptp_port; unsigned long flags; + u32 tstamp_good = 0; struct ice_pf *pf; struct ice_hw *hw; u64 tstamp_ready; @@ -658,11 +662,16 @@ skip_ts_read: if (tstamp) { shhwtstamps.hwtstamp = ns_to_ktime(tstamp); ice_trace(tx_tstamp_complete, skb, idx); + + /* Count the number of Tx timestamps that succeeded */ + tstamp_good++; } skb_tstamp_tx(skb, &shhwtstamps); dev_kfree_skb_any(skb); } + + pf->ptp.tx_hwtstamp_good += tstamp_good; } /** @@ -2206,8 +2215,7 @@ static int ice_ptp_getcrosststamp(struct ptp_clock_info *info, int ice_ptp_hwtstamp_get(struct net_device *netdev, struct kernel_hwtstamp_config *config) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_pf *pf = np->vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); if (pf->ptp.state != ICE_PTP_READY) return -EIO; @@ -2278,8 +2286,7 @@ int ice_ptp_hwtstamp_set(struct net_device *netdev, struct kernel_hwtstamp_config *config, struct netlink_ext_ack *extack) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_pf *pf = np->vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); int err; if (pf->ptp.state != ICE_PTP_READY) @@ -3246,7 +3253,7 @@ void ice_ptp_init(struct ice_pf *pf) err = ice_ptp_init_port(pf, &ptp->port); if (err) - goto err_exit; + goto err_clean_pf; /* Start the PHY timestamping block */ ice_ptp_reset_phy_timestamping(pf); @@ -3263,13 +3270,19 @@ void ice_ptp_init(struct ice_pf *pf) dev_info(ice_pf_to_dev(pf), "PTP init successful\n"); return; +err_clean_pf: + mutex_destroy(&ptp->port.ps_lock); + ice_ptp_cleanup_pf(pf); err_exit: /* If we registered a PTP clock, release it */ if (pf->ptp.clock) { ptp_clock_unregister(ptp->clock); pf->ptp.clock = NULL; } - ptp->state = ICE_PTP_ERROR; + /* Keep ICE_PTP_UNINIT state to avoid ambiguity at driver unload + * and to avoid duplicated resources release. + */ + ptp->state = ICE_PTP_UNINIT; dev_err(ice_pf_to_dev(pf), "PTP failed %d\n", err); } @@ -3282,8 +3295,18 @@ err_exit: */ void ice_ptp_release(struct ice_pf *pf) { - if (pf->ptp.state != ICE_PTP_READY) + if (pf->ptp.state == ICE_PTP_UNINIT) + return; + + if (pf->ptp.state != ICE_PTP_READY) { + mutex_destroy(&pf->ptp.port.ps_lock); + ice_ptp_cleanup_pf(pf); + if (pf->ptp.clock) { + ptp_clock_unregister(pf->ptp.clock); + pf->ptp.clock = NULL; + } return; + } pf->ptp.state = ICE_PTP_UNINIT; diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h index 137f2070a2d9..27016aac4f1e 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp.h @@ -237,6 +237,7 @@ struct ice_ptp_pin_desc { * @clock: pointer to registered PTP clock device * @tstamp_config: hardware timestamping configuration * @reset_time: kernel time after clock stop on reset + * @tx_hwtstamp_good: number of completed Tx timestamp requests * @tx_hwtstamp_skipped: number of Tx time stamp requests skipped * @tx_hwtstamp_timeouts: number of Tx skbs discarded with no time stamp * @tx_hwtstamp_flushed: number of Tx skbs flushed due to interface closed @@ -261,6 +262,7 @@ struct ice_ptp { struct ptp_clock *clock; struct kernel_hwtstamp_config tstamp_config; u64 reset_time; + u64 tx_hwtstamp_good; u32 tx_hwtstamp_skipped; u32 tx_hwtstamp_timeouts; u32 tx_hwtstamp_flushed; diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index 843e82fd3bf9..6b1126ddb561 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -1190,8 +1190,7 @@ ice_vf_lan_overflow_event(struct ice_pf *pf, struct ice_rq_event_info *event) */ int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_pf *pf = np->vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); struct ice_vsi *vf_vsi; struct device *dev; struct ice_vf *vf; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 73f08d02f9c7..ad76768a4232 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -7,6 +7,8 @@ #include <linux/netdevice.h> #include <linux/prefetch.h> #include <linux/bpf_trace.h> +#include <linux/net/intel/libie/rx.h> +#include <net/libeth/xdp.h> #include <net/dsfield.h> #include <net/mpls.h> #include <net/xdp.h> @@ -111,7 +113,7 @@ ice_prgm_fdir_fltr(struct ice_vsi *vsi, struct ice_fltr_desc *fdir_desc, static void ice_unmap_and_free_tx_buf(struct ice_tx_ring *ring, struct ice_tx_buf *tx_buf) { - if (dma_unmap_len(tx_buf, len)) + if (tx_buf->type != ICE_TX_BUF_XDP_TX && dma_unmap_len(tx_buf, len)) dma_unmap_page(ring->dev, dma_unmap_addr(tx_buf, dma), dma_unmap_len(tx_buf, len), @@ -125,7 +127,7 @@ ice_unmap_and_free_tx_buf(struct ice_tx_ring *ring, struct ice_tx_buf *tx_buf) dev_kfree_skb_any(tx_buf->skb); break; case ICE_TX_BUF_XDP_TX: - page_frag_free(tx_buf->raw_buf); + libeth_xdp_return_va(tx_buf->raw_buf, false); break; case ICE_TX_BUF_XDP_XMIT: xdp_return_frame(tx_buf->xdpf); @@ -506,61 +508,67 @@ err: return -ENOMEM; } +void ice_rxq_pp_destroy(struct ice_rx_ring *rq) +{ + struct libeth_fq fq = { + .fqes = rq->rx_fqes, + .pp = rq->pp, + }; + + libeth_rx_fq_destroy(&fq); + rq->rx_fqes = NULL; + rq->pp = NULL; + + if (!rq->hdr_pp) + return; + + fq.fqes = rq->hdr_fqes; + fq.pp = rq->hdr_pp; + + libeth_rx_fq_destroy(&fq); + rq->hdr_fqes = NULL; + rq->hdr_pp = NULL; +} + /** * ice_clean_rx_ring - Free Rx buffers * @rx_ring: ring to be cleaned */ void ice_clean_rx_ring(struct ice_rx_ring *rx_ring) { - struct xdp_buff *xdp = &rx_ring->xdp; - struct device *dev = rx_ring->dev; u32 size; - u16 i; - - /* ring already cleared, nothing to do */ - if (!rx_ring->rx_buf) - return; if (rx_ring->xsk_pool) { ice_xsk_clean_rx_ring(rx_ring); goto rx_skip_free; } - if (xdp->data) { - xdp_return_buff(xdp); - xdp->data = NULL; - } + /* ring already cleared, nothing to do */ + if (!rx_ring->rx_fqes) + return; + + libeth_xdp_return_stash(&rx_ring->xdp); /* Free all the Rx ring sk_buffs */ - for (i = 0; i < rx_ring->count; i++) { - struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i]; + for (u32 i = rx_ring->next_to_clean; i != rx_ring->next_to_use; ) { + libeth_rx_recycle_slow(rx_ring->rx_fqes[i].netmem); - if (!rx_buf->page) - continue; + if (rx_ring->hdr_pp) + libeth_rx_recycle_slow(rx_ring->hdr_fqes[i].netmem); - /* Invalidate cache lines that may have been written to by - * device so that we avoid corrupting memory. - */ - dma_sync_single_range_for_cpu(dev, rx_buf->dma, - rx_buf->page_offset, - rx_ring->rx_buf_len, - DMA_FROM_DEVICE); - - /* free resources associated with mapping */ - dma_unmap_page_attrs(dev, rx_buf->dma, ice_rx_pg_size(rx_ring), - DMA_FROM_DEVICE, ICE_RX_DMA_ATTR); - __page_frag_cache_drain(rx_buf->page, rx_buf->pagecnt_bias); - - rx_buf->page = NULL; - rx_buf->page_offset = 0; + if (unlikely(++i == rx_ring->count)) + i = 0; } -rx_skip_free: - if (rx_ring->xsk_pool) - memset(rx_ring->xdp_buf, 0, array_size(rx_ring->count, sizeof(*rx_ring->xdp_buf))); - else - memset(rx_ring->rx_buf, 0, array_size(rx_ring->count, sizeof(*rx_ring->rx_buf))); + if (rx_ring->vsi->type == ICE_VSI_PF && + xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) { + xdp_rxq_info_detach_mem_model(&rx_ring->xdp_rxq); + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); + } + ice_rxq_pp_destroy(rx_ring); + +rx_skip_free: /* Zero out the descriptor ring */ size = ALIGN(rx_ring->count * sizeof(union ice_32byte_rx_desc), PAGE_SIZE); @@ -568,7 +576,6 @@ rx_skip_free: rx_ring->next_to_alloc = 0; rx_ring->next_to_clean = 0; - rx_ring->first_desc = 0; rx_ring->next_to_use = 0; } @@ -580,26 +587,20 @@ rx_skip_free: */ void ice_free_rx_ring(struct ice_rx_ring *rx_ring) { + struct device *dev = ice_pf_to_dev(rx_ring->vsi->back); u32 size; ice_clean_rx_ring(rx_ring); - if (rx_ring->vsi->type == ICE_VSI_PF) - if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) - xdp_rxq_info_unreg(&rx_ring->xdp_rxq); WRITE_ONCE(rx_ring->xdp_prog, NULL); if (rx_ring->xsk_pool) { kfree(rx_ring->xdp_buf); rx_ring->xdp_buf = NULL; - } else { - kfree(rx_ring->rx_buf); - rx_ring->rx_buf = NULL; } if (rx_ring->desc) { size = ALIGN(rx_ring->count * sizeof(union ice_32byte_rx_desc), PAGE_SIZE); - dmam_free_coherent(rx_ring->dev, size, - rx_ring->desc, rx_ring->dma); + dmam_free_coherent(dev, size, rx_ring->desc, rx_ring->dma); rx_ring->desc = NULL; } } @@ -612,19 +613,9 @@ void ice_free_rx_ring(struct ice_rx_ring *rx_ring) */ int ice_setup_rx_ring(struct ice_rx_ring *rx_ring) { - struct device *dev = rx_ring->dev; + struct device *dev = ice_pf_to_dev(rx_ring->vsi->back); u32 size; - if (!dev) - return -ENOMEM; - - /* warn if we are about to overwrite the pointer */ - WARN_ON(rx_ring->rx_buf); - rx_ring->rx_buf = - kcalloc(rx_ring->count, sizeof(*rx_ring->rx_buf), GFP_KERNEL); - if (!rx_ring->rx_buf) - return -ENOMEM; - /* round up to nearest page */ size = ALIGN(rx_ring->count * sizeof(union ice_32byte_rx_desc), PAGE_SIZE); @@ -633,22 +624,16 @@ int ice_setup_rx_ring(struct ice_rx_ring *rx_ring) if (!rx_ring->desc) { dev_err(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n", size); - goto err; + return -ENOMEM; } rx_ring->next_to_use = 0; rx_ring->next_to_clean = 0; - rx_ring->first_desc = 0; if (ice_is_xdp_ena_vsi(rx_ring->vsi)) WRITE_ONCE(rx_ring->xdp_prog, rx_ring->vsi->xdp_prog); return 0; - -err: - kfree(rx_ring->rx_buf); - rx_ring->rx_buf = NULL; - return -ENOMEM; } /** @@ -662,7 +647,7 @@ err: * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR} */ static u32 -ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, +ice_run_xdp(struct ice_rx_ring *rx_ring, struct libeth_xdp_buff *xdp, struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring, union ice_32b_rx_flex_desc *eop_desc) { @@ -672,23 +657,23 @@ ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, if (!xdp_prog) goto exit; - ice_xdp_meta_set_desc(xdp, eop_desc); + xdp->desc = eop_desc; - act = bpf_prog_run_xdp(xdp_prog, xdp); + act = bpf_prog_run_xdp(xdp_prog, &xdp->base); switch (act) { case XDP_PASS: break; case XDP_TX: if (static_branch_unlikely(&ice_xdp_locking_key)) spin_lock(&xdp_ring->tx_lock); - ret = __ice_xmit_xdp_ring(xdp, xdp_ring, false); + ret = __ice_xmit_xdp_ring(&xdp->base, xdp_ring, false); if (static_branch_unlikely(&ice_xdp_locking_key)) spin_unlock(&xdp_ring->tx_lock); if (ret == ICE_XDP_CONSUMED) goto out_failure; break; case XDP_REDIRECT: - if (xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog)) + if (xdp_do_redirect(rx_ring->netdev, &xdp->base, xdp_prog)) goto out_failure; ret = ICE_XDP_REDIR; break; @@ -700,8 +685,10 @@ out_failure: trace_xdp_exception(rx_ring->netdev, xdp_prog, act); fallthrough; case XDP_DROP: + libeth_xdp_return_buff(xdp); ret = ICE_XDP_CONSUMED; } + exit: return ret; } @@ -790,53 +777,6 @@ ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, } /** - * ice_alloc_mapped_page - recycle or make a new page - * @rx_ring: ring to use - * @bi: rx_buf struct to modify - * - * Returns true if the page was successfully allocated or - * reused. - */ -static bool -ice_alloc_mapped_page(struct ice_rx_ring *rx_ring, struct ice_rx_buf *bi) -{ - struct page *page = bi->page; - dma_addr_t dma; - - /* since we are recycling buffers we should seldom need to alloc */ - if (likely(page)) - return true; - - /* alloc new page for storage */ - page = dev_alloc_pages(ice_rx_pg_order(rx_ring)); - if (unlikely(!page)) { - rx_ring->ring_stats->rx_stats.alloc_page_failed++; - return false; - } - - /* map page for use */ - dma = dma_map_page_attrs(rx_ring->dev, page, 0, ice_rx_pg_size(rx_ring), - DMA_FROM_DEVICE, ICE_RX_DMA_ATTR); - - /* if mapping failed free memory back to system since - * there isn't much point in holding memory we can't use - */ - if (dma_mapping_error(rx_ring->dev, dma)) { - __free_pages(page, ice_rx_pg_order(rx_ring)); - rx_ring->ring_stats->rx_stats.alloc_page_failed++; - return false; - } - - bi->dma = dma; - bi->page = page; - bi->page_offset = rx_ring->rx_offset; - page_ref_add(page, USHRT_MAX - 1); - bi->pagecnt_bias = USHRT_MAX; - - return true; -} - -/** * ice_init_ctrl_rx_descs - Initialize Rx descriptors for control vsi. * @rx_ring: ring to init descriptors on * @count: number of descriptors to initialize @@ -882,9 +822,20 @@ void ice_init_ctrl_rx_descs(struct ice_rx_ring *rx_ring, u32 count) */ bool ice_alloc_rx_bufs(struct ice_rx_ring *rx_ring, unsigned int cleaned_count) { + const struct libeth_fq_fp hdr_fq = { + .pp = rx_ring->hdr_pp, + .fqes = rx_ring->hdr_fqes, + .truesize = rx_ring->hdr_truesize, + .count = rx_ring->count, + }; + const struct libeth_fq_fp fq = { + .pp = rx_ring->pp, + .fqes = rx_ring->rx_fqes, + .truesize = rx_ring->truesize, + .count = rx_ring->count, + }; union ice_32b_rx_flex_desc *rx_desc; u16 ntu = rx_ring->next_to_use; - struct ice_rx_buf *bi; /* do nothing if no valid netdev defined */ if (!rx_ring->netdev || !cleaned_count) @@ -892,30 +843,39 @@ bool ice_alloc_rx_bufs(struct ice_rx_ring *rx_ring, unsigned int cleaned_count) /* get the Rx descriptor and buffer based on next_to_use */ rx_desc = ICE_RX_DESC(rx_ring, ntu); - bi = &rx_ring->rx_buf[ntu]; do { - /* if we fail here, we have work remaining */ - if (!ice_alloc_mapped_page(rx_ring, bi)) - break; + dma_addr_t addr; - /* sync the buffer for use by the device */ - dma_sync_single_range_for_device(rx_ring->dev, bi->dma, - bi->page_offset, - rx_ring->rx_buf_len, - DMA_FROM_DEVICE); + addr = libeth_rx_alloc(&fq, ntu); + if (addr == DMA_MAPPING_ERROR) { + rx_ring->ring_stats->rx_stats.alloc_page_failed++; + break; + } /* Refresh the desc even if buffer_addrs didn't change * because each write-back erases this info. */ - rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset); + rx_desc->read.pkt_addr = cpu_to_le64(addr); + + if (!hdr_fq.pp) + goto next; + + addr = libeth_rx_alloc(&hdr_fq, ntu); + if (addr == DMA_MAPPING_ERROR) { + rx_ring->ring_stats->rx_stats.alloc_page_failed++; + + libeth_rx_recycle_slow(fq.fqes[ntu].netmem); + break; + } + + rx_desc->read.hdr_addr = cpu_to_le64(addr); +next: rx_desc++; - bi++; ntu++; if (unlikely(ntu == rx_ring->count)) { rx_desc = ICE_RX_DESC(rx_ring, 0); - bi = rx_ring->rx_buf; ntu = 0; } @@ -932,402 +892,6 @@ bool ice_alloc_rx_bufs(struct ice_rx_ring *rx_ring, unsigned int cleaned_count) } /** - * ice_rx_buf_adjust_pg_offset - Prepare Rx buffer for reuse - * @rx_buf: Rx buffer to adjust - * @size: Size of adjustment - * - * Update the offset within page so that Rx buf will be ready to be reused. - * For systems with PAGE_SIZE < 8192 this function will flip the page offset - * so the second half of page assigned to Rx buffer will be used, otherwise - * the offset is moved by "size" bytes - */ -static void -ice_rx_buf_adjust_pg_offset(struct ice_rx_buf *rx_buf, unsigned int size) -{ -#if (PAGE_SIZE < 8192) - /* flip page offset to other buffer */ - rx_buf->page_offset ^= size; -#else - /* move offset up to the next cache line */ - rx_buf->page_offset += size; -#endif -} - -/** - * ice_can_reuse_rx_page - Determine if page can be reused for another Rx - * @rx_buf: buffer containing the page - * - * If page is reusable, we have a green light for calling ice_reuse_rx_page, - * which will assign the current buffer to the buffer that next_to_alloc is - * pointing to; otherwise, the DMA mapping needs to be destroyed and - * page freed - */ -static bool -ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf) -{ - unsigned int pagecnt_bias = rx_buf->pagecnt_bias; - struct page *page = rx_buf->page; - - /* avoid re-using remote and pfmemalloc pages */ - if (!dev_page_is_reusable(page)) - return false; - - /* if we are only owner of page we can reuse it */ - if (unlikely(rx_buf->pgcnt - pagecnt_bias > 1)) - return false; -#if (PAGE_SIZE >= 8192) -#define ICE_LAST_OFFSET \ - (SKB_WITH_OVERHEAD(PAGE_SIZE) - ICE_RXBUF_3072) - if (rx_buf->page_offset > ICE_LAST_OFFSET) - return false; -#endif /* PAGE_SIZE >= 8192) */ - - /* If we have drained the page fragment pool we need to update - * the pagecnt_bias and page count so that we fully restock the - * number of references the driver holds. - */ - if (unlikely(pagecnt_bias == 1)) { - page_ref_add(page, USHRT_MAX - 1); - rx_buf->pagecnt_bias = USHRT_MAX; - } - - return true; -} - -/** - * ice_add_xdp_frag - Add contents of Rx buffer to xdp buf as a frag - * @rx_ring: Rx descriptor ring to transact packets on - * @xdp: xdp buff to place the data into - * @rx_buf: buffer containing page to add - * @size: packet length from rx_desc - * - * This function will add the data contained in rx_buf->page to the xdp buf. - * It will just attach the page as a frag. - */ -static int -ice_add_xdp_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, - struct ice_rx_buf *rx_buf, const unsigned int size) -{ - struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); - - if (!size) - return 0; - - if (!xdp_buff_has_frags(xdp)) { - sinfo->nr_frags = 0; - sinfo->xdp_frags_size = 0; - xdp_buff_set_frags_flag(xdp); - } - - if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) - return -ENOMEM; - - __skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, rx_buf->page, - rx_buf->page_offset, size); - sinfo->xdp_frags_size += size; - - if (page_is_pfmemalloc(rx_buf->page)) - xdp_buff_set_frag_pfmemalloc(xdp); - - return 0; -} - -/** - * ice_reuse_rx_page - page flip buffer and store it back on the ring - * @rx_ring: Rx descriptor ring to store buffers on - * @old_buf: donor buffer to have page reused - * - * Synchronizes page for reuse by the adapter - */ -static void -ice_reuse_rx_page(struct ice_rx_ring *rx_ring, struct ice_rx_buf *old_buf) -{ - u16 nta = rx_ring->next_to_alloc; - struct ice_rx_buf *new_buf; - - new_buf = &rx_ring->rx_buf[nta]; - - /* update, and store next to alloc */ - nta++; - rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; - - /* Transfer page from old buffer to new buffer. - * Move each member individually to avoid possible store - * forwarding stalls and unnecessary copy of skb. - */ - new_buf->dma = old_buf->dma; - new_buf->page = old_buf->page; - new_buf->page_offset = old_buf->page_offset; - new_buf->pagecnt_bias = old_buf->pagecnt_bias; -} - -/** - * ice_get_rx_buf - Fetch Rx buffer and synchronize data for use - * @rx_ring: Rx descriptor ring to transact packets on - * @size: size of buffer to add to skb - * @ntc: index of next to clean element - * - * This function will pull an Rx buffer from the ring and synchronize it - * for use by the CPU. - */ -static struct ice_rx_buf * -ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size, - const unsigned int ntc) -{ - struct ice_rx_buf *rx_buf; - - rx_buf = &rx_ring->rx_buf[ntc]; - prefetchw(rx_buf->page); - - if (!size) - return rx_buf; - /* we are reusing so sync this buffer for CPU use */ - dma_sync_single_range_for_cpu(rx_ring->dev, rx_buf->dma, - rx_buf->page_offset, size, - DMA_FROM_DEVICE); - - /* We have pulled a buffer for use, so decrement pagecnt_bias */ - rx_buf->pagecnt_bias--; - - return rx_buf; -} - -/** - * ice_get_pgcnts - grab page_count() for gathered fragments - * @rx_ring: Rx descriptor ring to store the page counts on - * @ntc: the next to clean element (not included in this frame!) - * - * This function is intended to be called right before running XDP - * program so that the page recycling mechanism will be able to take - * a correct decision regarding underlying pages; this is done in such - * way as XDP program can change the refcount of page - */ -static void ice_get_pgcnts(struct ice_rx_ring *rx_ring, unsigned int ntc) -{ - u32 idx = rx_ring->first_desc; - struct ice_rx_buf *rx_buf; - u32 cnt = rx_ring->count; - - while (idx != ntc) { - rx_buf = &rx_ring->rx_buf[idx]; - rx_buf->pgcnt = page_count(rx_buf->page); - - if (++idx == cnt) - idx = 0; - } -} - -/** - * ice_build_skb - Build skb around an existing buffer - * @rx_ring: Rx descriptor ring to transact packets on - * @xdp: xdp_buff pointing to the data - * - * This function builds an skb around an existing XDP buffer, taking care - * to set up the skb correctly and avoid any memcpy overhead. Driver has - * already combined frags (if any) to skb_shared_info. - */ -static struct sk_buff * -ice_build_skb(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp) -{ - u8 metasize = xdp->data - xdp->data_meta; - struct skb_shared_info *sinfo = NULL; - unsigned int nr_frags; - struct sk_buff *skb; - - if (unlikely(xdp_buff_has_frags(xdp))) { - sinfo = xdp_get_shared_info_from_buff(xdp); - nr_frags = sinfo->nr_frags; - } - - /* Prefetch first cache line of first page. If xdp->data_meta - * is unused, this points exactly as xdp->data, otherwise we - * likely have a consumer accessing first few bytes of meta - * data, and then actual data. - */ - net_prefetch(xdp->data_meta); - /* build an skb around the page buffer */ - skb = napi_build_skb(xdp->data_hard_start, xdp->frame_sz); - if (unlikely(!skb)) - return NULL; - - /* must to record Rx queue, otherwise OS features such as - * symmetric queue won't work - */ - skb_record_rx_queue(skb, rx_ring->q_index); - - /* update pointers within the skb to store the data */ - skb_reserve(skb, xdp->data - xdp->data_hard_start); - __skb_put(skb, xdp->data_end - xdp->data); - if (metasize) - skb_metadata_set(skb, metasize); - - if (unlikely(xdp_buff_has_frags(xdp))) - xdp_update_skb_frags_info(skb, nr_frags, sinfo->xdp_frags_size, - nr_frags * xdp->frame_sz, - xdp_buff_get_skb_flags(xdp)); - - return skb; -} - -/** - * ice_construct_skb - Allocate skb and populate it - * @rx_ring: Rx descriptor ring to transact packets on - * @xdp: xdp_buff pointing to the data - * - * This function allocates an skb. It then populates it with the page - * data from the current receive descriptor, taking care to set up the - * skb correctly. - */ -static struct sk_buff * -ice_construct_skb(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp) -{ - unsigned int size = xdp->data_end - xdp->data; - struct skb_shared_info *sinfo = NULL; - struct ice_rx_buf *rx_buf; - unsigned int nr_frags = 0; - unsigned int headlen; - struct sk_buff *skb; - - /* prefetch first cache line of first page */ - net_prefetch(xdp->data); - - if (unlikely(xdp_buff_has_frags(xdp))) { - sinfo = xdp_get_shared_info_from_buff(xdp); - nr_frags = sinfo->nr_frags; - } - - /* allocate a skb to store the frags */ - skb = napi_alloc_skb(&rx_ring->q_vector->napi, ICE_RX_HDR_SIZE); - if (unlikely(!skb)) - return NULL; - - rx_buf = &rx_ring->rx_buf[rx_ring->first_desc]; - skb_record_rx_queue(skb, rx_ring->q_index); - /* Determine available headroom for copy */ - headlen = size; - if (headlen > ICE_RX_HDR_SIZE) - headlen = eth_get_headlen(skb->dev, xdp->data, ICE_RX_HDR_SIZE); - - /* align pull length to size of long to optimize memcpy performance */ - memcpy(__skb_put(skb, headlen), xdp->data, ALIGN(headlen, - sizeof(long))); - - /* if we exhaust the linear part then add what is left as a frag */ - size -= headlen; - if (size) { - /* besides adding here a partial frag, we are going to add - * frags from xdp_buff, make sure there is enough space for - * them - */ - if (unlikely(nr_frags >= MAX_SKB_FRAGS - 1)) { - dev_kfree_skb(skb); - return NULL; - } - skb_add_rx_frag(skb, 0, rx_buf->page, - rx_buf->page_offset + headlen, size, - xdp->frame_sz); - } else { - /* buffer is unused, restore biased page count in Rx buffer; - * data was copied onto skb's linear part so there's no - * need for adjusting page offset and we can reuse this buffer - * as-is - */ - rx_buf->pagecnt_bias++; - } - - if (unlikely(xdp_buff_has_frags(xdp))) { - struct skb_shared_info *skinfo = skb_shinfo(skb); - - memcpy(&skinfo->frags[skinfo->nr_frags], &sinfo->frags[0], - sizeof(skb_frag_t) * nr_frags); - - xdp_update_skb_frags_info(skb, skinfo->nr_frags + nr_frags, - sinfo->xdp_frags_size, - nr_frags * xdp->frame_sz, - xdp_buff_get_skb_flags(xdp)); - } - - return skb; -} - -/** - * ice_put_rx_buf - Clean up used buffer and either recycle or free - * @rx_ring: Rx descriptor ring to transact packets on - * @rx_buf: Rx buffer to pull data from - * - * This function will clean up the contents of the rx_buf. It will either - * recycle the buffer or unmap it and free the associated resources. - */ -static void -ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf) -{ - if (!rx_buf) - return; - - if (ice_can_reuse_rx_page(rx_buf)) { - /* hand second half of page back to the ring */ - ice_reuse_rx_page(rx_ring, rx_buf); - } else { - /* we are not reusing the buffer so unmap it */ - dma_unmap_page_attrs(rx_ring->dev, rx_buf->dma, - ice_rx_pg_size(rx_ring), DMA_FROM_DEVICE, - ICE_RX_DMA_ATTR); - __page_frag_cache_drain(rx_buf->page, rx_buf->pagecnt_bias); - } - - /* clear contents of buffer_info */ - rx_buf->page = NULL; -} - -/** - * ice_put_rx_mbuf - ice_put_rx_buf() caller, for all buffers in frame - * @rx_ring: Rx ring with all the auxiliary data - * @xdp: XDP buffer carrying linear + frags part - * @ntc: the next to clean element (not included in this frame!) - * @verdict: return code from XDP program execution - * - * Called after XDP program is completed, or on error with verdict set to - * ICE_XDP_CONSUMED. - * - * Walk through buffers from first_desc to the end of the frame, releasing - * buffers and satisfying internal page recycle mechanism. The action depends - * on verdict from XDP program. - */ -static void ice_put_rx_mbuf(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, - u32 ntc, u32 verdict) -{ - u32 idx = rx_ring->first_desc; - u32 cnt = rx_ring->count; - struct ice_rx_buf *buf; - u32 xdp_frags = 0; - int i = 0; - - if (unlikely(xdp_buff_has_frags(xdp))) - xdp_frags = xdp_get_shared_info_from_buff(xdp)->nr_frags; - - while (idx != ntc) { - buf = &rx_ring->rx_buf[idx]; - if (++idx == cnt) - idx = 0; - - /* An XDP program could release fragments from the end of the - * buffer. For these, we need to keep the pagecnt_bias as-is. - * To do this, only adjust pagecnt_bias for fragments up to - * the total remaining after the XDP program has run. - */ - if (verdict != ICE_XDP_CONSUMED) - ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz); - else if (i++ <= xdp_frags) - buf->pagecnt_bias++; - - ice_put_rx_buf(rx_ring, buf); - } - - xdp->data = NULL; - rx_ring->first_desc = ntc; -} - -/** * ice_clean_ctrl_rx_irq - Clean descriptors from flow director Rx ring * @rx_ring: Rx descriptor ring for ctrl_vsi to transact packets on * @@ -1361,9 +925,8 @@ void ice_clean_ctrl_rx_irq(struct ice_rx_ring *rx_ring) total_rx_pkts++; } - rx_ring->first_desc = ntc; rx_ring->next_to_clean = ntc; - ice_init_ctrl_rx_descs(rx_ring, ICE_RX_DESC_UNUSED(rx_ring)); + ice_init_ctrl_rx_descs(rx_ring, ICE_DESC_UNUSED(rx_ring)); } /** @@ -1381,16 +944,17 @@ void ice_clean_ctrl_rx_irq(struct ice_rx_ring *rx_ring) static int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) { unsigned int total_rx_bytes = 0, total_rx_pkts = 0; - unsigned int offset = rx_ring->rx_offset; - struct xdp_buff *xdp = &rx_ring->xdp; struct ice_tx_ring *xdp_ring = NULL; struct bpf_prog *xdp_prog = NULL; u32 ntc = rx_ring->next_to_clean; + LIBETH_XDP_ONSTACK_BUFF(xdp); u32 cached_ntu, xdp_verdict; u32 cnt = rx_ring->count; u32 xdp_xmit = 0; bool failure; + libeth_xdp_init_buff(xdp, &rx_ring->xdp, &rx_ring->xdp_rxq); + xdp_prog = READ_ONCE(rx_ring->xdp_prog); if (xdp_prog) { xdp_ring = rx_ring->xdp_ring; @@ -1400,19 +964,21 @@ static int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) /* start the loop to process Rx packets bounded by 'budget' */ while (likely(total_rx_pkts < (unsigned int)budget)) { union ice_32b_rx_flex_desc *rx_desc; - struct ice_rx_buf *rx_buf; + struct libeth_fqe *rx_buf; struct sk_buff *skb; unsigned int size; u16 stat_err_bits; u16 vlan_tci; + bool rxe; /* get the Rx desc from Rx ring based on 'next_to_clean' */ rx_desc = ICE_RX_DESC(rx_ring, ntc); - /* status_error_len will always be zero for unused descriptors - * because it's cleared in cleanup, and overlaps with hdr_addr - * which is always zero because packet split isn't used, if the - * hardware wrote DD then it will be non-zero + /* + * The DD bit will always be zero for unused descriptors + * because it's cleared in cleanup or when setting the DMA + * address of the header buffer, which never uses the DD bit. + * If the hardware wrote the descriptor, it will be non-zero. */ stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S); if (!ice_test_staterr(rx_desc->wb.status_error0, stat_err_bits)) @@ -1426,71 +992,65 @@ static int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) ice_trace(clean_rx_irq, rx_ring, rx_desc); + stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_HBO_S) | + BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S); + rxe = ice_test_staterr(rx_desc->wb.status_error0, + stat_err_bits); + + if (!rx_ring->hdr_pp) + goto payload; + + size = le16_get_bits(rx_desc->wb.hdr_len_sph_flex_flags1, + ICE_RX_FLEX_DESC_HDR_LEN_M); + if (unlikely(rxe)) + size = 0; + + rx_buf = &rx_ring->hdr_fqes[ntc]; + libeth_xdp_process_buff(xdp, rx_buf, size); + rx_buf->netmem = 0; + +payload: size = le16_to_cpu(rx_desc->wb.pkt_len) & ICE_RX_FLX_DESC_PKT_LEN_M; + if (unlikely(rxe)) + size = 0; /* retrieve a buffer from the ring */ - rx_buf = ice_get_rx_buf(rx_ring, size, ntc); + rx_buf = &rx_ring->rx_fqes[ntc]; + libeth_xdp_process_buff(xdp, rx_buf, size); - /* Increment ntc before calls to ice_put_rx_mbuf() */ if (++ntc == cnt) ntc = 0; - if (!xdp->data) { - void *hard_start; - - hard_start = page_address(rx_buf->page) + rx_buf->page_offset - - offset; - xdp_prepare_buff(xdp, hard_start, offset, size, !!offset); - xdp_buff_clear_frags_flag(xdp); - } else if (ice_add_xdp_frag(rx_ring, xdp, rx_buf, size)) { - ice_put_rx_mbuf(rx_ring, xdp, ntc, ICE_XDP_CONSUMED); - break; - } - /* skip if it is NOP desc */ - if (ice_is_non_eop(rx_ring, rx_desc)) + if (ice_is_non_eop(rx_ring, rx_desc) || unlikely(!xdp->data)) continue; - ice_get_pgcnts(rx_ring, ntc); xdp_verdict = ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_desc); if (xdp_verdict == ICE_XDP_PASS) goto construct_skb; - total_rx_bytes += xdp_get_buff_len(xdp); - total_rx_pkts++; - ice_put_rx_mbuf(rx_ring, xdp, ntc, xdp_verdict); - xdp_xmit |= xdp_verdict & (ICE_XDP_TX | ICE_XDP_REDIR); + if (xdp_verdict & (ICE_XDP_TX | ICE_XDP_REDIR)) + xdp_xmit |= xdp_verdict; + total_rx_bytes += xdp_get_buff_len(&xdp->base); + total_rx_pkts++; + xdp->data = NULL; continue; + construct_skb: - if (likely(ice_ring_uses_build_skb(rx_ring))) - skb = ice_build_skb(rx_ring, xdp); - else - skb = ice_construct_skb(rx_ring, xdp); + skb = xdp_build_skb_from_buff(&xdp->base); + xdp->data = NULL; + /* exit if we failed to retrieve a buffer */ if (!skb) { + libeth_xdp_return_buff_slow(xdp); rx_ring->ring_stats->rx_stats.alloc_buf_failed++; - xdp_verdict = ICE_XDP_CONSUMED; - } - ice_put_rx_mbuf(rx_ring, xdp, ntc, xdp_verdict); - - if (!skb) - break; - - stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S); - if (unlikely(ice_test_staterr(rx_desc->wb.status_error0, - stat_err_bits))) { - dev_kfree_skb_any(skb); continue; } vlan_tci = ice_get_vlan_tci(rx_desc); - /* pad the skb if needed, to make a valid ethernet frame */ - if (eth_skb_pad(skb)) - continue; - /* probably a little skewed due to removing CRC */ total_rx_bytes += skb->len; @@ -1507,11 +1067,13 @@ construct_skb: rx_ring->next_to_clean = ntc; /* return up to cleaned_count buffers to hardware */ - failure = ice_alloc_rx_bufs(rx_ring, ICE_RX_DESC_UNUSED(rx_ring)); + failure = ice_alloc_rx_bufs(rx_ring, ICE_DESC_UNUSED(rx_ring)); if (xdp_xmit) ice_finalize_xdp_rx(xdp_ring, xdp_xmit, cached_ntu); + libeth_xdp_save_buff(&rx_ring->xdp, xdp); + if (rx_ring->ring_stats) ice_update_rx_ring_stats(rx_ring, total_rx_pkts, total_rx_bytes); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 841a07bfba54..e440c55d9e9f 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -4,6 +4,8 @@ #ifndef _ICE_TXRX_H_ #define _ICE_TXRX_H_ +#include <net/libeth/types.h> + #include "ice_type.h" #define ICE_DFLT_IRQ_WORK 256 @@ -27,72 +29,6 @@ #define ICE_MAX_TXQ_PER_TXQG 128 -/* Attempt to maximize the headroom available for incoming frames. We use a 2K - * buffer for MTUs <= 1500 and need 1536/1534 to store the data for the frame. - * This leaves us with 512 bytes of room. From that we need to deduct the - * space needed for the shared info and the padding needed to IP align the - * frame. - * - * Note: For cache line sizes 256 or larger this value is going to end - * up negative. In these cases we should fall back to the legacy - * receive path. - */ -#if (PAGE_SIZE < 8192) -#define ICE_2K_TOO_SMALL_WITH_PADDING \ - ((unsigned int)(NET_SKB_PAD + ICE_RXBUF_1536) > \ - SKB_WITH_OVERHEAD(ICE_RXBUF_2048)) - -/** - * ice_compute_pad - compute the padding - * @rx_buf_len: buffer length - * - * Figure out the size of half page based on given buffer length and - * then subtract the skb_shared_info followed by subtraction of the - * actual buffer length; this in turn results in the actual space that - * is left for padding usage - */ -static inline int ice_compute_pad(int rx_buf_len) -{ - int half_page_size; - - half_page_size = ALIGN(rx_buf_len, PAGE_SIZE / 2); - return SKB_WITH_OVERHEAD(half_page_size) - rx_buf_len; -} - -/** - * ice_skb_pad - determine the padding that we can supply - * - * Figure out the right Rx buffer size and based on that calculate the - * padding - */ -static inline int ice_skb_pad(void) -{ - int rx_buf_len; - - /* If a 2K buffer cannot handle a standard Ethernet frame then - * optimize padding for a 3K buffer instead of a 1.5K buffer. - * - * For a 3K buffer we need to add enough padding to allow for - * tailroom due to NET_IP_ALIGN possibly shifting us out of - * cache-line alignment. - */ - if (ICE_2K_TOO_SMALL_WITH_PADDING) - rx_buf_len = ICE_RXBUF_3072 + SKB_DATA_ALIGN(NET_IP_ALIGN); - else - rx_buf_len = ICE_RXBUF_1536; - - /* if needed make room for NET_IP_ALIGN */ - rx_buf_len -= NET_IP_ALIGN; - - return ice_compute_pad(rx_buf_len); -} - -#define ICE_SKB_PAD ice_skb_pad() -#else -#define ICE_2K_TOO_SMALL_WITH_PADDING false -#define ICE_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN) -#endif - /* We are assuming that the cache line is always 64 Bytes here for ice. * In order to make sure that is a correct assumption there is a check in probe * to print a warning if the read from GLPCI_CNF2 tells us that the cache line @@ -112,10 +48,6 @@ static inline int ice_skb_pad(void) (u16)((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \ (R)->next_to_clean - (R)->next_to_use - 1) -#define ICE_RX_DESC_UNUSED(R) \ - ((((R)->first_desc > (R)->next_to_use) ? 0 : (R)->count) + \ - (R)->first_desc - (R)->next_to_use - 1) - #define ICE_RING_QUARTER(R) ((R)->count >> 2) #define ICE_TX_FLAGS_TSO BIT(0) @@ -197,14 +129,6 @@ struct ice_tx_offload_params { u8 header_len; }; -struct ice_rx_buf { - dma_addr_t dma; - struct page *page; - unsigned int page_offset; - unsigned int pgcnt; - unsigned int pagecnt_bias; -}; - struct ice_q_stats { u64 pkts; u64 bytes; @@ -262,15 +186,6 @@ struct ice_pkt_ctx { __be16 vlan_proto; }; -struct ice_xdp_buff { - struct xdp_buff xdp_buff; - const union ice_32b_rx_flex_desc *eop_desc; - const struct ice_pkt_ctx *pkt_ctx; -}; - -/* Required for compatibility with xdp_buffs from xsk_pool */ -static_assert(offsetof(struct ice_xdp_buff, xdp_buff) == 0); - /* indices into GLINT_ITR registers */ #define ICE_RX_ITR ICE_IDX_ITR0 #define ICE_TX_ITR ICE_IDX_ITR1 @@ -323,7 +238,7 @@ struct ice_tstamp_ring { struct ice_rx_ring { /* CL1 - 1st cacheline starts here */ void *desc; /* Descriptor ring memory */ - struct device *dev; /* Used for DMA mapping */ + struct page_pool *pp; struct net_device *netdev; /* netdev ring maps to */ struct ice_vsi *vsi; /* Backreference to associated VSI */ struct ice_q_vector *q_vector; /* Backreference to associated vector */ @@ -335,14 +250,19 @@ struct ice_rx_ring { u16 next_to_alloc; union { - struct ice_rx_buf *rx_buf; + struct libeth_fqe *rx_fqes; struct xdp_buff **xdp_buf; }; + /* CL2 - 2nd cacheline starts here */ + struct libeth_fqe *hdr_fqes; + struct page_pool *hdr_pp; + union { - struct ice_xdp_buff xdp_ext; - struct xdp_buff xdp; + struct libeth_xdp_buff_stash xdp; + struct libeth_xdp_buff *xsk; }; + /* CL3 - 3rd cacheline starts here */ union { struct ice_pkt_ctx pkt_ctx; @@ -352,12 +272,13 @@ struct ice_rx_ring { }; }; struct bpf_prog *xdp_prog; - u16 rx_offset; /* used in interrupt processing */ u16 next_to_use; u16 next_to_clean; - u16 first_desc; + + u32 hdr_truesize; + u32 truesize; /* stats structs */ struct ice_ring_stats *ring_stats; @@ -368,12 +289,11 @@ struct ice_rx_ring { struct ice_tx_ring *xdp_ring; struct ice_rx_ring *next; /* pointer to next ring in q_vector */ struct xsk_buff_pool *xsk_pool; - u16 max_frame; + u16 rx_hdr_len; u16 rx_buf_len; dma_addr_t dma; /* physical address of ring */ u8 dcb_tc; /* Traffic class of ring */ u8 ptp_rx; -#define ICE_RX_FLAGS_RING_BUILD_SKB BIT(1) #define ICE_RX_FLAGS_CRC_STRIP_DIS BIT(2) #define ICE_RX_FLAGS_MULTIDEV BIT(3) #define ICE_RX_FLAGS_RING_GCS BIT(4) @@ -422,21 +342,6 @@ struct ice_tx_ring { u16 quanta_prof_id; } ____cacheline_internodealigned_in_smp; -static inline bool ice_ring_uses_build_skb(struct ice_rx_ring *ring) -{ - return !!(ring->flags & ICE_RX_FLAGS_RING_BUILD_SKB); -} - -static inline void ice_set_ring_build_skb_ena(struct ice_rx_ring *ring) -{ - ring->flags |= ICE_RX_FLAGS_RING_BUILD_SKB; -} - -static inline void ice_clear_ring_build_skb_ena(struct ice_rx_ring *ring) -{ - ring->flags &= ~ICE_RX_FLAGS_RING_BUILD_SKB; -} - static inline bool ice_ring_ch_enabled(struct ice_tx_ring *ring) { return !!ring->ch; @@ -491,18 +396,13 @@ struct ice_coalesce_stored { static inline unsigned int ice_rx_pg_order(struct ice_rx_ring *ring) { -#if (PAGE_SIZE < 8192) - if (ring->rx_buf_len > (PAGE_SIZE / 2)) - return 1; -#endif return 0; } -#define ice_rx_pg_size(_ring) (PAGE_SIZE << ice_rx_pg_order(_ring)) - union ice_32b_rx_flex_desc; void ice_init_ctrl_rx_descs(struct ice_rx_ring *rx_ring, u32 num_descs); +void ice_rxq_pp_destroy(struct ice_rx_ring *rq); bool ice_alloc_rx_bufs(struct ice_rx_ring *rxr, unsigned int cleaned_count); netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev); u16 diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c index 45cfaabc41cb..956da38d63b0 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c @@ -3,6 +3,7 @@ #include <linux/filter.h> #include <linux/net/intel/libie/rx.h> +#include <net/libeth/xdp.h> #include "ice_txrx_lib.h" #include "ice_eswitch.h" @@ -230,9 +231,12 @@ ice_process_skb_fields(struct ice_rx_ring *rx_ring, if (ice_is_port_repr_netdev(netdev)) ice_repr_inc_rx_stats(netdev, skb->len); + + /* __skb_push() is needed because xdp_build_skb_from_buff() + * calls eth_type_trans() + */ + __skb_push(skb, ETH_HLEN); skb->protocol = eth_type_trans(skb, netdev); - } else { - skb->protocol = eth_type_trans(skb, rx_ring->netdev); } ice_rx_csum(rx_ring, skb, rx_desc, ptype); @@ -270,19 +274,18 @@ static void ice_clean_xdp_tx_buf(struct device *dev, struct ice_tx_buf *tx_buf, struct xdp_frame_bulk *bq) { - dma_unmap_single(dev, dma_unmap_addr(tx_buf, dma), - dma_unmap_len(tx_buf, len), DMA_TO_DEVICE); - dma_unmap_len_set(tx_buf, len, 0); - switch (tx_buf->type) { case ICE_TX_BUF_XDP_TX: - page_frag_free(tx_buf->raw_buf); + libeth_xdp_return_va(tx_buf->raw_buf, true); break; case ICE_TX_BUF_XDP_XMIT: + dma_unmap_single(dev, dma_unmap_addr(tx_buf, dma), + dma_unmap_len(tx_buf, len), DMA_TO_DEVICE); xdp_return_frame_bulk(tx_buf->xdpf, bq); break; } + dma_unmap_len_set(tx_buf, len, 0); tx_buf->type = ICE_TX_BUF_EMPTY; } @@ -377,9 +380,11 @@ int __ice_xmit_xdp_ring(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring, struct ice_tx_buf *tx_buf; u32 cnt = xdp_ring->count; void *data = xdp->data; + struct page *page; u32 nr_frags = 0; u32 free_space; u32 frag = 0; + u32 offset; free_space = ICE_DESC_UNUSED(xdp_ring); if (free_space < ICE_RING_QUARTER(xdp_ring)) @@ -399,24 +404,28 @@ int __ice_xmit_xdp_ring(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring, tx_head = &xdp_ring->tx_buf[ntu]; tx_buf = tx_head; + page = virt_to_page(data); + offset = offset_in_page(xdp->data); + for (;;) { dma_addr_t dma; - dma = dma_map_single(dev, data, size, DMA_TO_DEVICE); - if (dma_mapping_error(dev, dma)) - goto dma_unmap; - - /* record length, and DMA address */ - dma_unmap_len_set(tx_buf, len, size); - dma_unmap_addr_set(tx_buf, dma, dma); - if (frame) { + dma = dma_map_single(dev, data, size, DMA_TO_DEVICE); + if (dma_mapping_error(dev, dma)) + goto dma_unmap; tx_buf->type = ICE_TX_BUF_FRAG; } else { + dma = page_pool_get_dma_addr(page) + offset; + dma_sync_single_for_device(dev, dma, size, DMA_BIDIRECTIONAL); tx_buf->type = ICE_TX_BUF_XDP_TX; tx_buf->raw_buf = data; } + /* record length, and DMA address */ + dma_unmap_len_set(tx_buf, len, size); + dma_unmap_addr_set(tx_buf, dma, dma); + tx_desc->buf_addr = cpu_to_le64(dma); tx_desc->cmd_type_offset_bsz = ice_build_ctob(0, 0, size, 0); @@ -430,6 +439,8 @@ int __ice_xmit_xdp_ring(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring, tx_desc = ICE_TX_DESC(xdp_ring, ntu); tx_buf = &xdp_ring->tx_buf[ntu]; + page = skb_frag_page(&sinfo->frags[frag]); + offset = skb_frag_off(&sinfo->frags[frag]); data = skb_frag_address(&sinfo->frags[frag]); size = skb_frag_size(&sinfo->frags[frag]); frag++; @@ -514,10 +525,13 @@ void ice_finalize_xdp_rx(struct ice_tx_ring *xdp_ring, unsigned int xdp_res, */ static int ice_xdp_rx_hw_ts(const struct xdp_md *ctx, u64 *ts_ns) { - const struct ice_xdp_buff *xdp_ext = (void *)ctx; + const struct libeth_xdp_buff *xdp_ext = (void *)ctx; + struct ice_rx_ring *rx_ring; - *ts_ns = ice_ptp_get_rx_hwts(xdp_ext->eop_desc, - xdp_ext->pkt_ctx); + rx_ring = libeth_xdp_buff_to_rq(xdp_ext, typeof(*rx_ring), xdp_rxq); + + *ts_ns = ice_ptp_get_rx_hwts(xdp_ext->desc, + &rx_ring->pkt_ctx); if (!*ts_ns) return -ENODATA; @@ -545,10 +559,10 @@ ice_xdp_rx_hash_type(const union ice_32b_rx_flex_desc *eop_desc) static int ice_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash, enum xdp_rss_hash_type *rss_type) { - const struct ice_xdp_buff *xdp_ext = (void *)ctx; + const struct libeth_xdp_buff *xdp_ext = (void *)ctx; - *hash = ice_get_rx_hash(xdp_ext->eop_desc); - *rss_type = ice_xdp_rx_hash_type(xdp_ext->eop_desc); + *hash = ice_get_rx_hash(xdp_ext->desc); + *rss_type = ice_xdp_rx_hash_type(xdp_ext->desc); if (!likely(*hash)) return -ENODATA; @@ -567,13 +581,16 @@ static int ice_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash, static int ice_xdp_rx_vlan_tag(const struct xdp_md *ctx, __be16 *vlan_proto, u16 *vlan_tci) { - const struct ice_xdp_buff *xdp_ext = (void *)ctx; + const struct libeth_xdp_buff *xdp_ext = (void *)ctx; + struct ice_rx_ring *rx_ring; + + rx_ring = libeth_xdp_buff_to_rq(xdp_ext, typeof(*rx_ring), xdp_rxq); - *vlan_proto = xdp_ext->pkt_ctx->vlan_proto; + *vlan_proto = rx_ring->pkt_ctx.vlan_proto; if (!*vlan_proto) return -ENODATA; - *vlan_tci = ice_get_vlan_tci(xdp_ext->eop_desc); + *vlan_tci = ice_get_vlan_tci(xdp_ext->desc); if (!*vlan_tci) return -ENODATA; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h index 99717730f21a..6a3f10f7a53f 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h @@ -135,13 +135,4 @@ ice_process_skb_fields(struct ice_rx_ring *rx_ring, void ice_receive_skb(struct ice_rx_ring *rx_ring, struct sk_buff *skb, u16 vlan_tci); -static inline void -ice_xdp_meta_set_desc(struct xdp_buff *xdp, - union ice_32b_rx_flex_desc *eop_desc) -{ - struct ice_xdp_buff *xdp_ext = container_of(xdp, struct ice_xdp_buff, - xdp_buff); - - xdp_ext->eop_desc = eop_desc; -} #endif /* !_ICE_TXRX_LIB_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index b0a1b67071c5..6a2ec8389a8f 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -1063,6 +1063,7 @@ struct ice_hw_port_stats { u64 error_bytes; /* errbc */ u64 mac_local_faults; /* mlfc */ u64 mac_remote_faults; /* mrfc */ + u64 rx_len_errors; /* rlec */ u64 link_xon_rx; /* lxonrxc */ u64 link_xoff_rx; /* lxoffrxc */ u64 link_xon_tx; /* lxontxc */ diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.h b/drivers/net/ethernet/intel/ice/ice_vf_lib.h index b00708907176..7a9c75d1d07c 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.h @@ -53,6 +53,46 @@ struct ice_mdd_vf_events { u16 last_printed; }; +enum ice_hash_ip_ctx_type { + ICE_HASH_IP_CTX_IP = 0, + ICE_HASH_IP_CTX_IP_ESP, + ICE_HASH_IP_CTX_IP_UDP_ESP, + ICE_HASH_IP_CTX_IP_AH, + ICE_HASH_IP_CTX_IP_PFCP, + ICE_HASH_IP_CTX_IP_UDP, + ICE_HASH_IP_CTX_IP_TCP, + ICE_HASH_IP_CTX_IP_SCTP, + ICE_HASH_IP_CTX_MAX, +}; + +struct ice_vf_hash_ip_ctx { + struct ice_rss_hash_cfg ctx[ICE_HASH_IP_CTX_MAX]; +}; + +enum ice_hash_gtpu_ctx_type { + ICE_HASH_GTPU_CTX_EH_IP = 0, + ICE_HASH_GTPU_CTX_EH_IP_UDP, + ICE_HASH_GTPU_CTX_EH_IP_TCP, + ICE_HASH_GTPU_CTX_UP_IP, + ICE_HASH_GTPU_CTX_UP_IP_UDP, + ICE_HASH_GTPU_CTX_UP_IP_TCP, + ICE_HASH_GTPU_CTX_DW_IP, + ICE_HASH_GTPU_CTX_DW_IP_UDP, + ICE_HASH_GTPU_CTX_DW_IP_TCP, + ICE_HASH_GTPU_CTX_MAX, +}; + +struct ice_vf_hash_gtpu_ctx { + struct ice_rss_hash_cfg ctx[ICE_HASH_GTPU_CTX_MAX]; +}; + +struct ice_vf_hash_ctx { + struct ice_vf_hash_ip_ctx v4; + struct ice_vf_hash_ip_ctx v6; + struct ice_vf_hash_gtpu_ctx ipv4; + struct ice_vf_hash_gtpu_ctx ipv6; +}; + /* Structure to store fdir fv entry */ struct ice_fdir_prof_info { struct ice_parser_profile prof; @@ -66,6 +106,12 @@ struct ice_vf_qs_bw { u8 tc; }; +/* Structure to store RSS field vector entry */ +struct ice_rss_prof_info { + struct ice_parser_profile prof; + bool symm; +}; + /* VF operations */ struct ice_vf_ops { enum ice_disq_rst_src reset_type; @@ -106,6 +152,8 @@ struct ice_vf { u16 ctrl_vsi_idx; struct ice_vf_fdir fdir; struct ice_fdir_prof_info fdir_prof_info[ICE_MAX_PTGS]; + struct ice_rss_prof_info rss_prof_info[ICE_MAX_PTGS]; + struct ice_vf_hash_ctx hash_ctx; u64 rss_hashcfg; /* RSS hash configuration */ struct ice_sw *vf_sw_id; /* switch ID the VF VSIs connect to */ struct virtchnl_version_info vf_ver; diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 575fd48f485f..989ff1fd9110 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -3,6 +3,7 @@ #include <linux/bpf_trace.h> #include <linux/unroll.h> +#include <net/libeth/xdp.h> #include <net/xdp_sock_drv.h> #include <net/xdp.h> #include "ice.h" @@ -169,50 +170,18 @@ ice_xsk_pool_enable(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid) * If allocation was successful, substitute buffer with allocated one. * Returns 0 on success, negative on failure */ -static int +int ice_realloc_rx_xdp_bufs(struct ice_rx_ring *rx_ring, bool pool_present) { - size_t elem_size = pool_present ? sizeof(*rx_ring->xdp_buf) : - sizeof(*rx_ring->rx_buf); - void *sw_ring = kcalloc(rx_ring->count, elem_size, GFP_KERNEL); - - if (!sw_ring) - return -ENOMEM; - if (pool_present) { - kfree(rx_ring->rx_buf); - rx_ring->rx_buf = NULL; - rx_ring->xdp_buf = sw_ring; + rx_ring->xdp_buf = kcalloc(rx_ring->count, + sizeof(*rx_ring->xdp_buf), + GFP_KERNEL); + if (!rx_ring->xdp_buf) + return -ENOMEM; } else { kfree(rx_ring->xdp_buf); rx_ring->xdp_buf = NULL; - rx_ring->rx_buf = sw_ring; - } - - return 0; -} - -/** - * ice_realloc_zc_buf - reallocate XDP ZC queue pairs - * @vsi: Current VSI - * @zc: is zero copy set - * - * Reallocate buffer for rx_rings that might be used by XSK. - * XDP requires more memory, than rx_buf provides. - * Returns 0 on success, negative on failure - */ -int ice_realloc_zc_buf(struct ice_vsi *vsi, bool zc) -{ - struct ice_rx_ring *rx_ring; - uint i; - - ice_for_each_rxq(vsi, i) { - rx_ring = vsi->rx_rings[i]; - if (!rx_ring->xsk_pool) - continue; - - if (ice_realloc_rx_xdp_bufs(rx_ring, zc)) - return -ENOMEM; } return 0; @@ -228,6 +197,7 @@ int ice_realloc_zc_buf(struct ice_vsi *vsi, bool zc) */ int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid) { + struct ice_rx_ring *rx_ring = vsi->rx_rings[qid]; bool if_running, pool_present = !!pool; int ret = 0, pool_failure = 0; @@ -241,8 +211,6 @@ int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid) ice_is_xdp_ena_vsi(vsi); if (if_running) { - struct ice_rx_ring *rx_ring = vsi->rx_rings[qid]; - ret = ice_qp_dis(vsi, qid); if (ret) { netdev_err(vsi->netdev, "ice_qp_dis error = %d\n", ret); @@ -303,11 +271,6 @@ static u16 ice_fill_rx_descs(struct xsk_buff_pool *pool, struct xdp_buff **xdp, rx_desc->read.pkt_addr = cpu_to_le64(dma); rx_desc->wb.status_error0 = 0; - /* Put private info that changes on a per-packet basis - * into xdp_buff_xsk->cb. - */ - ice_xdp_meta_set_desc(*xdp, rx_desc); - rx_desc++; xdp++; } @@ -393,69 +356,6 @@ bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, } /** - * ice_construct_skb_zc - Create an sk_buff from zero-copy buffer - * @rx_ring: Rx ring - * @xdp: Pointer to XDP buffer - * - * This function allocates a new skb from a zero-copy Rx buffer. - * - * Returns the skb on success, NULL on failure. - */ -static struct sk_buff * -ice_construct_skb_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp) -{ - unsigned int totalsize = xdp->data_end - xdp->data_meta; - unsigned int metasize = xdp->data - xdp->data_meta; - struct skb_shared_info *sinfo = NULL; - struct sk_buff *skb; - u32 nr_frags = 0; - - if (unlikely(xdp_buff_has_frags(xdp))) { - sinfo = xdp_get_shared_info_from_buff(xdp); - nr_frags = sinfo->nr_frags; - } - net_prefetch(xdp->data_meta); - - skb = napi_alloc_skb(&rx_ring->q_vector->napi, totalsize); - if (unlikely(!skb)) - return NULL; - - memcpy(__skb_put(skb, totalsize), xdp->data_meta, - ALIGN(totalsize, sizeof(long))); - - if (metasize) { - skb_metadata_set(skb, metasize); - __skb_pull(skb, metasize); - } - - if (likely(!xdp_buff_has_frags(xdp))) - goto out; - - for (int i = 0; i < nr_frags; i++) { - struct skb_shared_info *skinfo = skb_shinfo(skb); - skb_frag_t *frag = &sinfo->frags[i]; - struct page *page; - void *addr; - - page = dev_alloc_page(); - if (!page) { - dev_kfree_skb(skb); - return NULL; - } - addr = page_to_virt(page); - - memcpy(addr, skb_frag_page(frag), skb_frag_size(frag)); - - __skb_fill_page_desc_noacc(skinfo, skinfo->nr_frags++, - addr, 0, skb_frag_size(frag)); - } - -out: - xsk_buff_free(xdp); - return skb; -} - -/** * ice_clean_xdp_irq_zc - produce AF_XDP descriptors to CQ * @xdp_ring: XDP Tx ring * @xsk_pool: AF_XDP buffer pool pointer @@ -669,10 +569,10 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, struct xsk_buff_pool *xsk_pool, int budget) { + struct xdp_buff *first = (struct xdp_buff *)rx_ring->xsk; unsigned int total_rx_bytes = 0, total_rx_packets = 0; u32 ntc = rx_ring->next_to_clean; u32 ntu = rx_ring->next_to_use; - struct xdp_buff *first = NULL; struct ice_tx_ring *xdp_ring; unsigned int xdp_xmit = 0; struct bpf_prog *xdp_prog; @@ -686,9 +586,6 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, xdp_prog = READ_ONCE(rx_ring->xdp_prog); xdp_ring = rx_ring->xdp_ring; - if (ntc != rx_ring->first_desc) - first = *ice_xdp_buf(rx_ring, rx_ring->first_desc); - while (likely(total_rx_packets < (unsigned int)budget)) { union ice_32b_rx_flex_desc *rx_desc; unsigned int size, xdp_res = 0; @@ -724,15 +621,17 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, first = xdp; } else if (likely(size) && !xsk_buff_add_frag(first, xdp)) { xsk_buff_free(first); - break; + first = NULL; } if (++ntc == cnt) ntc = 0; - if (ice_is_non_eop(rx_ring, rx_desc)) + if (ice_is_non_eop(rx_ring, rx_desc) || unlikely(!first)) continue; + ((struct libeth_xdp_buff *)first)->desc = rx_desc; + xdp_res = ice_run_xdp_zc(rx_ring, first, xdp_prog, xdp_ring, xsk_pool); if (likely(xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR))) { @@ -740,7 +639,6 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, } else if (xdp_res == ICE_XDP_EXIT) { failure = true; first = NULL; - rx_ring->first_desc = ntc; break; } else if (xdp_res == ICE_XDP_CONSUMED) { xsk_buff_free(first); @@ -752,24 +650,20 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, total_rx_packets++; first = NULL; - rx_ring->first_desc = ntc; continue; construct_skb: /* XDP_PASS path */ - skb = ice_construct_skb_zc(rx_ring, first); + skb = xdp_build_skb_from_zc(first); if (!skb) { + xsk_buff_free(first); + first = NULL; + rx_ring->ring_stats->rx_stats.alloc_buf_failed++; - break; + continue; } first = NULL; - rx_ring->first_desc = ntc; - - if (eth_skb_pad(skb)) { - skb = NULL; - continue; - } total_rx_bytes += skb->len; total_rx_packets++; @@ -781,7 +675,9 @@ construct_skb: } rx_ring->next_to_clean = ntc; - entries_to_alloc = ICE_RX_DESC_UNUSED(rx_ring); + rx_ring->xsk = (struct libeth_xdp_buff *)first; + + entries_to_alloc = ICE_DESC_UNUSED(rx_ring); if (entries_to_alloc > ICE_RING_QUARTER(rx_ring)) failure |= !ice_alloc_rx_bufs_zc(rx_ring, xsk_pool, entries_to_alloc); diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.h b/drivers/net/ethernet/intel/ice/ice_xsk.h index 600cbeeaa203..5275fcedc9e1 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.h +++ b/drivers/net/ethernet/intel/ice/ice_xsk.h @@ -22,7 +22,7 @@ bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi); void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring); void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring); bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, struct xsk_buff_pool *xsk_pool); -int ice_realloc_zc_buf(struct ice_vsi *vsi, bool zc); +int ice_realloc_rx_xdp_bufs(struct ice_rx_ring *rx_ring, bool pool_present); void ice_qvec_cfg_msix(struct ice_vsi *vsi, struct ice_q_vector *q_vector, u16 qid); void ice_qvec_toggle_napi(struct ice_vsi *vsi, struct ice_q_vector *q_vector, @@ -77,8 +77,8 @@ static inline void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring) { } static inline void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring) { } static inline int -ice_realloc_zc_buf(struct ice_vsi __always_unused *vsi, - bool __always_unused zc) +ice_realloc_rx_xdp_bufs(struct ice_rx_ring *rx_ring, + bool __always_unused pool_present) { return 0; } diff --git a/drivers/net/ethernet/intel/ice/virt/queues.c b/drivers/net/ethernet/intel/ice/virt/queues.c index 370f6ec2a374..f73d5a3e83d4 100644 --- a/drivers/net/ethernet/intel/ice/virt/queues.c +++ b/drivers/net/ethernet/intel/ice/virt/queues.c @@ -842,18 +842,20 @@ int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) (qpi->rxq.databuffer_size > ((16 * 1024) - 128) || qpi->rxq.databuffer_size < 1024)) goto error_param; + ring->rx_buf_len = qpi->rxq.databuffer_size; + if (qpi->rxq.max_pkt_size > max_frame_size || qpi->rxq.max_pkt_size < 64) goto error_param; - ring->max_frame = qpi->rxq.max_pkt_size; + vsi->max_frame = qpi->rxq.max_pkt_size; /* add space for the port VLAN since the VF driver is * not expected to account for it in the MTU * calculation */ if (ice_vf_is_port_vlan_ena(vf)) - ring->max_frame += VLAN_HLEN; + vsi->max_frame += VLAN_HLEN; if (ice_vsi_cfg_single_rxq(vsi, q_idx)) { dev_warn(ice_pf_to_dev(pf), "VF-%d failed to configure RX queue %d\n", diff --git a/drivers/net/ethernet/intel/ice/virt/rss.c b/drivers/net/ethernet/intel/ice/virt/rss.c index cbdbb32d512b..085e69ec0cfc 100644 --- a/drivers/net/ethernet/intel/ice/virt/rss.c +++ b/drivers/net/ethernet/intel/ice/virt/rss.c @@ -36,6 +36,11 @@ static const struct ice_vc_hdr_match_type ice_vc_hdr_list[] = { {VIRTCHNL_PROTO_HDR_ESP, ICE_FLOW_SEG_HDR_ESP}, {VIRTCHNL_PROTO_HDR_AH, ICE_FLOW_SEG_HDR_AH}, {VIRTCHNL_PROTO_HDR_PFCP, ICE_FLOW_SEG_HDR_PFCP_SESSION}, + {VIRTCHNL_PROTO_HDR_GTPC, ICE_FLOW_SEG_HDR_GTPC}, + {VIRTCHNL_PROTO_HDR_L2TPV2, ICE_FLOW_SEG_HDR_L2TPV2}, + {VIRTCHNL_PROTO_HDR_IPV4_FRAG, ICE_FLOW_SEG_HDR_IPV_FRAG}, + {VIRTCHNL_PROTO_HDR_IPV6_EH_FRAG, ICE_FLOW_SEG_HDR_IPV_FRAG}, + {VIRTCHNL_PROTO_HDR_GRE, ICE_FLOW_SEG_HDR_GRE}, }; struct ice_vc_hash_field_match_type { @@ -87,8 +92,125 @@ ice_vc_hash_field_match_type ice_vc_hash_field_list[] = { FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) | FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT), ICE_FLOW_HASH_IPV4 | BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)}, - {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT), + {VIRTCHNL_PROTO_HDR_IPV4, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_FRAG_PKID), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_ID)}, + {VIRTCHNL_PROTO_HDR_IPV4, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_CHKSUM), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_CHKSUM), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_CHKSUM), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_CHKSUM), + ICE_FLOW_HASH_IPV4 | BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_CHKSUM), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_CHKSUM), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_CHKSUM), + ICE_FLOW_HASH_IPV4 | BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_IPV4, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_CHKSUM), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_IPV4_FRAG, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)}, + {VIRTCHNL_PROTO_HDR_IPV4_FRAG, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA)}, + {VIRTCHNL_PROTO_HDR_IPV4_FRAG, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA)}, + {VIRTCHNL_PROTO_HDR_IPV4_FRAG, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST), + ICE_FLOW_HASH_IPV4}, + {VIRTCHNL_PROTO_HDR_IPV4_FRAG, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)}, + {VIRTCHNL_PROTO_HDR_IPV4_FRAG, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA) | BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)}, + {VIRTCHNL_PROTO_HDR_IPV4_FRAG, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT), + ICE_FLOW_HASH_IPV4 | BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)}, + {VIRTCHNL_PROTO_HDR_IPV4_FRAG, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)}, + {VIRTCHNL_PROTO_HDR_IPV4_FRAG, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_FRAG_PKID), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_ID)}, + {VIRTCHNL_PROTO_HDR_IPV4_FRAG, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_CHKSUM), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_IPV4_FRAG, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_CHKSUM), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_IPV4_FRAG, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_CHKSUM), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_IPV4_FRAG, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_CHKSUM), + ICE_FLOW_HASH_IPV4 | BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_IPV4_FRAG, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_CHKSUM), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_IPV4_FRAG, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_CHKSUM), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_IPV4_FRAG, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_CHKSUM), + ICE_FLOW_HASH_IPV4 | BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_IPV4_FRAG, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_CHKSUM), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_CHKSUM)}, {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC), BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_SA)}, {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST), @@ -110,6 +232,35 @@ ice_vc_hash_field_match_type ice_vc_hash_field_list[] = { ICE_FLOW_HASH_IPV6 | BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)}, {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT), BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)}, + {VIRTCHNL_PROTO_HDR_IPV6_EH_FRAG, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_EH_FRAG_PKID), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_ID)}, + {VIRTCHNL_PROTO_HDR_IPV6, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PREFIX64_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PREFIX64_DST), + ICE_FLOW_HASH_IPV6_PRE64}, + {VIRTCHNL_PROTO_HDR_IPV6, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PREFIX64_SRC), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PRE64_SA)}, + {VIRTCHNL_PROTO_HDR_IPV6, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PREFIX64_DST), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PRE64_DA)}, + {VIRTCHNL_PROTO_HDR_IPV6, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PREFIX64_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PREFIX64_DST) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT), + ICE_FLOW_HASH_IPV6_PRE64 | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)}, + {VIRTCHNL_PROTO_HDR_IPV6, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PREFIX64_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PRE64_SA) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)}, + {VIRTCHNL_PROTO_HDR_IPV6, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PREFIX64_DST) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PRE64_DA) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)}, {VIRTCHNL_PROTO_HDR_TCP, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_SRC_PORT), BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_SRC_PORT)}, @@ -120,6 +271,25 @@ ice_vc_hash_field_match_type ice_vc_hash_field_list[] = { FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_SRC_PORT) | FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_DST_PORT), ICE_FLOW_HASH_TCP_PORT}, + {VIRTCHNL_PROTO_HDR_TCP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_CHKSUM), + BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_TCP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_SRC_PORT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_CHKSUM), + BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_SRC_PORT) | + BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_TCP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_DST_PORT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_CHKSUM), + BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_DST_PORT) | + BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_TCP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_SRC_PORT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_DST_PORT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_CHKSUM), + ICE_FLOW_HASH_TCP_PORT | + BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_CHKSUM)}, {VIRTCHNL_PROTO_HDR_UDP, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_SRC_PORT), BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_SRC_PORT)}, @@ -130,6 +300,25 @@ ice_vc_hash_field_match_type ice_vc_hash_field_list[] = { FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_SRC_PORT) | FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_DST_PORT), ICE_FLOW_HASH_UDP_PORT}, + {VIRTCHNL_PROTO_HDR_UDP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_CHKSUM), + BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_UDP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_SRC_PORT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_CHKSUM), + BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_SRC_PORT) | + BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_UDP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_DST_PORT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_CHKSUM), + BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_DST_PORT) | + BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_UDP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_SRC_PORT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_DST_PORT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_CHKSUM), + ICE_FLOW_HASH_UDP_PORT | + BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_CHKSUM)}, {VIRTCHNL_PROTO_HDR_SCTP, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT), BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT)}, @@ -140,6 +329,25 @@ ice_vc_hash_field_match_type ice_vc_hash_field_list[] = { FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT) | FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_DST_PORT), ICE_FLOW_HASH_SCTP_PORT}, + {VIRTCHNL_PROTO_HDR_SCTP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_CHKSUM), + BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_SCTP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_CHKSUM), + BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT) | + BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_SCTP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_DST_PORT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_CHKSUM), + BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_DST_PORT) | + BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_SCTP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_DST_PORT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_CHKSUM), + ICE_FLOW_HASH_SCTP_PORT | + BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_CHKSUM)}, {VIRTCHNL_PROTO_HDR_PPPOE, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_PPPOE_SESS_ID), BIT_ULL(ICE_FLOW_FIELD_IDX_PPPOE_SESS_ID)}, @@ -155,8 +363,54 @@ ice_vc_hash_field_match_type ice_vc_hash_field_list[] = { BIT_ULL(ICE_FLOW_FIELD_IDX_AH_SPI)}, {VIRTCHNL_PROTO_HDR_PFCP, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_PFCP_SEID), BIT_ULL(ICE_FLOW_FIELD_IDX_PFCP_SEID)}, + {VIRTCHNL_PROTO_HDR_GTPC, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_GTPC_TEID), + BIT_ULL(ICE_FLOW_FIELD_IDX_GTPC_TEID)}, + {VIRTCHNL_PROTO_HDR_L2TPV2, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_L2TPV2_SESS_ID), + BIT_ULL(ICE_FLOW_FIELD_IDX_L2TPV2_SESS_ID)}, + {VIRTCHNL_PROTO_HDR_L2TPV2, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_L2TPV2_LEN_SESS_ID), + BIT_ULL(ICE_FLOW_FIELD_IDX_L2TPV2_LEN_SESS_ID)}, }; +static int +ice_vc_rss_hash_update(struct ice_hw *hw, struct ice_vsi *vsi, u8 hash_type) +{ + struct ice_vsi_ctx *ctx; + int ret; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + /* clear previous hash_type */ + ctx->info.q_opt_rss = vsi->info.q_opt_rss & + ~ICE_AQ_VSI_Q_OPT_RSS_HASH_M; + /* hash_type is passed in as ICE_AQ_VSI_Q_OPT_RSS_<XOR|TPLZ|SYM_TPLZ */ + ctx->info.q_opt_rss |= FIELD_PREP(ICE_AQ_VSI_Q_OPT_RSS_HASH_M, + hash_type); + + /* Preserve existing queueing option setting */ + ctx->info.q_opt_tc = vsi->info.q_opt_tc; + ctx->info.q_opt_flags = vsi->info.q_opt_flags; + + ctx->info.valid_sections = + cpu_to_le16(ICE_AQ_VSI_PROP_Q_OPT_VALID); + + ret = ice_update_vsi(hw, vsi->idx, ctx, NULL); + if (ret) { + dev_err(ice_hw_to_dev(hw), "update VSI for RSS failed, err %d aq_err %s\n", + ret, libie_aq_str(hw->adminq.sq_last_status)); + } else { + vsi->info.q_opt_rss = ctx->info.q_opt_rss; + } + + kfree(ctx); + + return ret; +} + /** * ice_vc_validate_pattern * @vf: pointer to the VF info @@ -271,6 +525,11 @@ static bool ice_vc_parse_rss_cfg(struct ice_hw *hw, const struct ice_vc_hash_field_match_type *hf_list; const struct ice_vc_hdr_match_type *hdr_list; int i, hf_list_len, hdr_list_len; + bool outer_ipv4 = false; + bool outer_ipv6 = false; + bool inner_hdr = false; + bool has_gre = false; + u32 *addl_hdrs = &hash_cfg->addl_hdrs; u64 *hash_flds = &hash_cfg->hash_flds; @@ -290,17 +549,17 @@ static bool ice_vc_parse_rss_cfg(struct ice_hw *hw, for (i = 0; i < rss_cfg->proto_hdrs.count; i++) { struct virtchnl_proto_hdr *proto_hdr = &rss_cfg->proto_hdrs.proto_hdr[i]; - bool hdr_found = false; + u32 hdr_found = 0; int j; - /* Find matched ice headers according to virtchnl headers. */ + /* Find matched ice headers according to virtchnl headers. + * Also figure out the outer type of GTPU headers. + */ for (j = 0; j < hdr_list_len; j++) { struct ice_vc_hdr_match_type hdr_map = hdr_list[j]; - if (proto_hdr->type == hdr_map.vc_hdr) { - *addl_hdrs |= hdr_map.ice_hdr; - hdr_found = true; - } + if (proto_hdr->type == hdr_map.vc_hdr) + hdr_found = hdr_map.ice_hdr; } if (!hdr_found) @@ -318,8 +577,98 @@ static bool ice_vc_parse_rss_cfg(struct ice_hw *hw, break; } } + + if (proto_hdr->type == VIRTCHNL_PROTO_HDR_IPV4 && !inner_hdr) + outer_ipv4 = true; + else if (proto_hdr->type == VIRTCHNL_PROTO_HDR_IPV6 && + !inner_hdr) + outer_ipv6 = true; + /* for GRE and L2TPv2, take inner header as input set if no + * any field is selected from outer headers. + * for GTPU, take inner header and GTPU teid as input set. + */ + else if ((proto_hdr->type == VIRTCHNL_PROTO_HDR_GTPU_IP || + proto_hdr->type == VIRTCHNL_PROTO_HDR_GTPU_EH || + proto_hdr->type == VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_DWN || + proto_hdr->type == + VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_UP) || + ((proto_hdr->type == VIRTCHNL_PROTO_HDR_L2TPV2 || + proto_hdr->type == VIRTCHNL_PROTO_HDR_GRE) && + *hash_flds == 0)) { + /* set inner_hdr flag, and clean up outer header */ + inner_hdr = true; + + /* clear outer headers */ + *addl_hdrs = 0; + + if (outer_ipv4 && outer_ipv6) + return false; + + if (outer_ipv4) + hash_cfg->hdr_type = ICE_RSS_INNER_HEADERS_W_OUTER_IPV4; + else if (outer_ipv6) + hash_cfg->hdr_type = ICE_RSS_INNER_HEADERS_W_OUTER_IPV6; + else + hash_cfg->hdr_type = ICE_RSS_INNER_HEADERS; + + if (has_gre && outer_ipv4) + hash_cfg->hdr_type = + ICE_RSS_INNER_HEADERS_W_OUTER_IPV4_GRE; + if (has_gre && outer_ipv6) + hash_cfg->hdr_type = + ICE_RSS_INNER_HEADERS_W_OUTER_IPV6_GRE; + + if (proto_hdr->type == VIRTCHNL_PROTO_HDR_GRE) + has_gre = true; + } + + *addl_hdrs |= hdr_found; + + /* refine hash hdrs and fields for IP fragment */ + if (VIRTCHNL_TEST_PROTO_HDR_FIELD(proto_hdr, + VIRTCHNL_PROTO_HDR_IPV4_FRAG_PKID) && + proto_hdr->type == VIRTCHNL_PROTO_HDR_IPV4_FRAG) { + *addl_hdrs |= ICE_FLOW_SEG_HDR_IPV_FRAG; + *addl_hdrs &= ~(ICE_FLOW_SEG_HDR_IPV_OTHER); + *hash_flds |= BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_ID); + VIRTCHNL_DEL_PROTO_HDR_FIELD(proto_hdr, + VIRTCHNL_PROTO_HDR_IPV4_FRAG_PKID); + } + if (VIRTCHNL_TEST_PROTO_HDR_FIELD(proto_hdr, + VIRTCHNL_PROTO_HDR_IPV6_EH_FRAG_PKID) && + proto_hdr->type == VIRTCHNL_PROTO_HDR_IPV6_EH_FRAG) { + *addl_hdrs |= ICE_FLOW_SEG_HDR_IPV_FRAG; + *addl_hdrs &= ~(ICE_FLOW_SEG_HDR_IPV_OTHER); + *hash_flds |= BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_ID); + VIRTCHNL_DEL_PROTO_HDR_FIELD(proto_hdr, + VIRTCHNL_PROTO_HDR_IPV6_EH_FRAG_PKID); + } + } + + /* refine gtpu header if we take outer as input set for a no inner + * ip gtpu flow. + */ + if (hash_cfg->hdr_type == ICE_RSS_OUTER_HEADERS && + *addl_hdrs & ICE_FLOW_SEG_HDR_GTPU_IP) { + *addl_hdrs &= ~(ICE_FLOW_SEG_HDR_GTPU_IP); + *addl_hdrs |= ICE_FLOW_SEG_HDR_GTPU_NON_IP; } + /* refine hash field for esp and nat-t-esp. */ + if ((*addl_hdrs & ICE_FLOW_SEG_HDR_UDP) && + (*addl_hdrs & ICE_FLOW_SEG_HDR_ESP)) { + *addl_hdrs &= ~(ICE_FLOW_SEG_HDR_ESP | ICE_FLOW_SEG_HDR_UDP); + *addl_hdrs |= ICE_FLOW_SEG_HDR_NAT_T_ESP; + *hash_flds &= ~(BIT_ULL(ICE_FLOW_FIELD_IDX_ESP_SPI)); + *hash_flds |= BIT_ULL(ICE_FLOW_FIELD_IDX_NAT_T_ESP_SPI); + } + + /* refine hash hdrs for L4 udp/tcp/sctp. */ + if (*addl_hdrs & (ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_UDP | + ICE_FLOW_SEG_HDR_SCTP) && + *addl_hdrs & ICE_FLOW_SEG_HDR_IPV_OTHER) + *addl_hdrs &= ~ICE_FLOW_SEG_HDR_IPV_OTHER; + return true; } @@ -337,6 +686,874 @@ static bool ice_vf_adv_rss_offload_ena(u32 caps) } /** + * ice_is_hash_cfg_valid - Check whether an RSS hash context is valid + * @cfg: RSS hash configuration to test + * + * Return: true if both @cfg->hash_flds and @cfg->addl_hdrs are non-zero; false otherwise. + */ +static bool ice_is_hash_cfg_valid(struct ice_rss_hash_cfg *cfg) +{ + return cfg->hash_flds && cfg->addl_hdrs; +} + +/** + * ice_hash_cfg_reset - Reset an RSS hash context + * @cfg: RSS hash configuration to reset + * + * Reset fields of @cfg that store the active rule information. + */ +static void ice_hash_cfg_reset(struct ice_rss_hash_cfg *cfg) +{ + cfg->hash_flds = 0; + cfg->addl_hdrs = 0; + cfg->hdr_type = ICE_RSS_OUTER_HEADERS; + cfg->symm = 0; +} + +/** + * ice_hash_cfg_record - Record an RSS hash context + * @ctx: destination (global) RSS hash configuration + * @cfg: source RSS hash configuration to record + * + * Copy the active rule information from @cfg into @ctx. + */ +static void ice_hash_cfg_record(struct ice_rss_hash_cfg *ctx, + struct ice_rss_hash_cfg *cfg) +{ + ctx->hash_flds = cfg->hash_flds; + ctx->addl_hdrs = cfg->addl_hdrs; + ctx->hdr_type = cfg->hdr_type; + ctx->symm = cfg->symm; +} + +/** + * ice_hash_moveout - Delete an RSS configuration (keep context) + * @vf: VF pointer + * @cfg: RSS hash configuration + * + * Return: 0 on success (including when already absent); -ENOENT if @cfg is + * invalid or VSI is missing; -EBUSY on hardware removal failure. + */ +static int +ice_hash_moveout(struct ice_vf *vf, struct ice_rss_hash_cfg *cfg) +{ + struct device *dev = ice_pf_to_dev(vf->pf); + struct ice_vsi *vsi = ice_get_vf_vsi(vf); + struct ice_hw *hw = &vf->pf->hw; + int ret; + + if (!ice_is_hash_cfg_valid(cfg) || !vsi) + return -ENOENT; + + ret = ice_rem_rss_cfg(hw, vsi->idx, cfg); + if (ret && ret != -ENOENT) { + dev_err(dev, "ice_rem_rss_cfg failed for VF %d, VSI %d, error:%d\n", + vf->vf_id, vf->lan_vsi_idx, ret); + return -EBUSY; + } + + return 0; +} + +/** + * ice_hash_moveback - Add an RSS hash configuration for a VF + * @vf: VF pointer + * @cfg: RSS hash configuration to apply + * + * Add @cfg to @vf if the context is valid and VSI exists; programs HW. + * + * Return: + * * 0 on success + * * -ENOENT if @cfg is invalid or VSI is missing + * * -EBUSY if hardware programming fails + */ +static int +ice_hash_moveback(struct ice_vf *vf, struct ice_rss_hash_cfg *cfg) +{ + struct device *dev = ice_pf_to_dev(vf->pf); + struct ice_vsi *vsi = ice_get_vf_vsi(vf); + struct ice_hw *hw = &vf->pf->hw; + int ret; + + if (!ice_is_hash_cfg_valid(cfg) || !vsi) + return -ENOENT; + + ret = ice_add_rss_cfg(hw, vsi, cfg); + if (ret) { + dev_err(dev, "ice_add_rss_cfg failed for VF %d, VSI %d, error:%d\n", + vf->vf_id, vf->lan_vsi_idx, ret); + return -EBUSY; + } + + return 0; +} + +/** + * ice_hash_remove - remove a RSS configuration + * @vf: pointer to the VF info + * @cfg: pointer to the RSS hash configuration + * + * This function will delete a RSS hash configuration and also delete the + * hash context which stores the rule info. + * + * Return: 0 on success, or a negative error code on failure. + */ +static int +ice_hash_remove(struct ice_vf *vf, struct ice_rss_hash_cfg *cfg) +{ + int ret; + + ret = ice_hash_moveout(vf, cfg); + if (ret && ret != -ENOENT) + return ret; + + ice_hash_cfg_reset(cfg); + + return 0; +} + +struct ice_gtpu_ctx_action { + u32 ctx_idx; + const u32 *remove_list; + int remove_count; + const u32 *moveout_list; + int moveout_count; +}; + +/** + * ice_add_rss_cfg_pre_gtpu - Pre-process the GTPU RSS configuration + * @vf: pointer to the VF info + * @ctx: pointer to the context of the GTPU hash + * @ctx_idx: index of the hash context + * + * Pre-processes the GTPU hash configuration before adding a new + * hash context. It removes or reorders existing hash configurations that may + * conflict with the new one. For example, if a GTPU_UP or GTPU_DWN rule is + * configured after a GTPU_EH rule, the GTPU_EH hash will be matched first due + * to TCAM write and match order (top-down). In such cases, the GTPU_EH rule + * must be moved after the GTPU_UP/DWN rule. Conversely, if a GTPU_EH rule is + * configured after a GTPU_UP/DWN rule, the UP/DWN rules should be removed to + * avoid conflict. + * + * Return: 0 on success or a negative error code on failure + */ +static int ice_add_rss_cfg_pre_gtpu(struct ice_vf *vf, + struct ice_vf_hash_gtpu_ctx *ctx, + u32 ctx_idx) +{ + int ret, i; + + static const u32 remove_eh_ip[] = { + ICE_HASH_GTPU_CTX_EH_IP_UDP, ICE_HASH_GTPU_CTX_EH_IP_TCP, + ICE_HASH_GTPU_CTX_UP_IP, ICE_HASH_GTPU_CTX_UP_IP_UDP, + ICE_HASH_GTPU_CTX_UP_IP_TCP, ICE_HASH_GTPU_CTX_DW_IP, + ICE_HASH_GTPU_CTX_DW_IP_UDP, ICE_HASH_GTPU_CTX_DW_IP_TCP, + }; + + static const u32 remove_eh_ip_udp[] = { + ICE_HASH_GTPU_CTX_UP_IP_UDP, + ICE_HASH_GTPU_CTX_DW_IP_UDP, + }; + static const u32 moveout_eh_ip_udp[] = { + ICE_HASH_GTPU_CTX_UP_IP, + ICE_HASH_GTPU_CTX_UP_IP_TCP, + ICE_HASH_GTPU_CTX_DW_IP, + ICE_HASH_GTPU_CTX_DW_IP_TCP, + }; + + static const u32 remove_eh_ip_tcp[] = { + ICE_HASH_GTPU_CTX_UP_IP_TCP, + ICE_HASH_GTPU_CTX_DW_IP_TCP, + }; + static const u32 moveout_eh_ip_tcp[] = { + ICE_HASH_GTPU_CTX_UP_IP, + ICE_HASH_GTPU_CTX_UP_IP_UDP, + ICE_HASH_GTPU_CTX_DW_IP, + ICE_HASH_GTPU_CTX_DW_IP_UDP, + }; + + static const u32 remove_up_ip[] = { + ICE_HASH_GTPU_CTX_UP_IP_UDP, + ICE_HASH_GTPU_CTX_UP_IP_TCP, + }; + static const u32 moveout_up_ip[] = { + ICE_HASH_GTPU_CTX_EH_IP, + ICE_HASH_GTPU_CTX_EH_IP_UDP, + ICE_HASH_GTPU_CTX_EH_IP_TCP, + }; + + static const u32 moveout_up_ip_udp_tcp[] = { + ICE_HASH_GTPU_CTX_EH_IP, + ICE_HASH_GTPU_CTX_EH_IP_UDP, + ICE_HASH_GTPU_CTX_EH_IP_TCP, + }; + + static const u32 remove_dw_ip[] = { + ICE_HASH_GTPU_CTX_DW_IP_UDP, + ICE_HASH_GTPU_CTX_DW_IP_TCP, + }; + static const u32 moveout_dw_ip[] = { + ICE_HASH_GTPU_CTX_EH_IP, + ICE_HASH_GTPU_CTX_EH_IP_UDP, + ICE_HASH_GTPU_CTX_EH_IP_TCP, + }; + + static const struct ice_gtpu_ctx_action actions[] = { + { ICE_HASH_GTPU_CTX_EH_IP, remove_eh_ip, + ARRAY_SIZE(remove_eh_ip), NULL, 0 }, + { ICE_HASH_GTPU_CTX_EH_IP_UDP, remove_eh_ip_udp, + ARRAY_SIZE(remove_eh_ip_udp), moveout_eh_ip_udp, + ARRAY_SIZE(moveout_eh_ip_udp) }, + { ICE_HASH_GTPU_CTX_EH_IP_TCP, remove_eh_ip_tcp, + ARRAY_SIZE(remove_eh_ip_tcp), moveout_eh_ip_tcp, + ARRAY_SIZE(moveout_eh_ip_tcp) }, + { ICE_HASH_GTPU_CTX_UP_IP, remove_up_ip, + ARRAY_SIZE(remove_up_ip), moveout_up_ip, + ARRAY_SIZE(moveout_up_ip) }, + { ICE_HASH_GTPU_CTX_UP_IP_UDP, NULL, 0, moveout_up_ip_udp_tcp, + ARRAY_SIZE(moveout_up_ip_udp_tcp) }, + { ICE_HASH_GTPU_CTX_UP_IP_TCP, NULL, 0, moveout_up_ip_udp_tcp, + ARRAY_SIZE(moveout_up_ip_udp_tcp) }, + { ICE_HASH_GTPU_CTX_DW_IP, remove_dw_ip, + ARRAY_SIZE(remove_dw_ip), moveout_dw_ip, + ARRAY_SIZE(moveout_dw_ip) }, + { ICE_HASH_GTPU_CTX_DW_IP_UDP, NULL, 0, moveout_dw_ip, + ARRAY_SIZE(moveout_dw_ip) }, + { ICE_HASH_GTPU_CTX_DW_IP_TCP, NULL, 0, moveout_dw_ip, + ARRAY_SIZE(moveout_dw_ip) }, + }; + + for (i = 0; i < ARRAY_SIZE(actions); i++) { + if (actions[i].ctx_idx != ctx_idx) + continue; + + if (actions[i].remove_list) { + for (int j = 0; j < actions[i].remove_count; j++) { + u16 rm = actions[i].remove_list[j]; + + ret = ice_hash_remove(vf, &ctx->ctx[rm]); + if (ret && ret != -ENOENT) + return ret; + } + } + + if (actions[i].moveout_list) { + for (int j = 0; j < actions[i].moveout_count; j++) { + u16 mv = actions[i].moveout_list[j]; + + ret = ice_hash_moveout(vf, &ctx->ctx[mv]); + if (ret && ret != -ENOENT) + return ret; + } + } + break; + } + + return 0; +} + +/** + * ice_add_rss_cfg_pre_ip - Pre-process IP-layer RSS configuration + * @vf: VF pointer + * @ctx: IP L4 hash context (ESP/UDP-ESP/AH/PFCP and UDP/TCP/SCTP) + * + * Remove covered/recorded IP RSS configurations prior to adding a new one. + * + * Return: 0 on success; negative error code on failure. + */ +static int +ice_add_rss_cfg_pre_ip(struct ice_vf *vf, struct ice_vf_hash_ip_ctx *ctx) +{ + int i, ret; + + for (i = 1; i < ICE_HASH_IP_CTX_MAX; i++) + if (ice_is_hash_cfg_valid(&ctx->ctx[i])) { + ret = ice_hash_remove(vf, &ctx->ctx[i]); + if (ret) + return ret; + } + + return 0; +} + +/** + * ice_calc_gtpu_ctx_idx - Calculate GTPU hash context index + * @hdrs: Bitmask of protocol headers prefixed with ICE_FLOW_SEG_HDR_* + * + * Determine the GTPU hash context index based on the combination of + * encapsulation headers (GTPU_EH, GTPU_UP, GTPU_DWN) and transport + * protocols (UDP, TCP) within IPv4 or IPv6 flows. + * + * Return: A valid context index (0-8) if the header combination is supported, + * or ICE_HASH_GTPU_CTX_MAX if the combination is invalid. + */ +static enum ice_hash_gtpu_ctx_type ice_calc_gtpu_ctx_idx(u32 hdrs) +{ + u32 eh_idx, ip_idx; + + if (hdrs & ICE_FLOW_SEG_HDR_GTPU_EH) + eh_idx = 0; + else if (hdrs & ICE_FLOW_SEG_HDR_GTPU_UP) + eh_idx = 1; + else if (hdrs & ICE_FLOW_SEG_HDR_GTPU_DWN) + eh_idx = 2; + else + return ICE_HASH_GTPU_CTX_MAX; + + ip_idx = 0; + if (hdrs & ICE_FLOW_SEG_HDR_UDP) + ip_idx = 1; + else if (hdrs & ICE_FLOW_SEG_HDR_TCP) + ip_idx = 2; + + if (hdrs & (ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_IPV6)) + return eh_idx * 3 + ip_idx; + else + return ICE_HASH_GTPU_CTX_MAX; +} + +/** + * ice_map_ip_ctx_idx - map the index of the IP L4 hash context + * @hdrs: protocol headers prefix with ICE_FLOW_SEG_HDR_XXX. + * + * The IP L4 hash context use the index to classify for IPv4/IPv6 with + * ESP/UDP_ESP/AH/PFCP and non-tunnel UDP/TCP/SCTP + * this function map the index based on the protocol headers. + * + * Return: The mapped IP context index on success, or ICE_HASH_IP_CTX_MAX + * if no matching context is found. + */ +static u8 ice_map_ip_ctx_idx(u32 hdrs) +{ + u8 i; + + static struct { + u32 hdrs; + u8 ctx_idx; + } ip_ctx_idx_map[] = { + { ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_IPV_OTHER | + ICE_FLOW_SEG_HDR_ESP, + ICE_HASH_IP_CTX_IP_ESP }, + { ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_IPV_OTHER | + ICE_FLOW_SEG_HDR_NAT_T_ESP, + ICE_HASH_IP_CTX_IP_UDP_ESP }, + { ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_IPV_OTHER | + ICE_FLOW_SEG_HDR_AH, + ICE_HASH_IP_CTX_IP_AH }, + { ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_IPV_OTHER | + ICE_FLOW_SEG_HDR_PFCP_SESSION, + ICE_HASH_IP_CTX_IP_PFCP }, + { ICE_FLOW_SEG_HDR_ETH | ICE_FLOW_SEG_HDR_VLAN | + ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_UDP, + ICE_HASH_IP_CTX_IP_UDP }, + { ICE_FLOW_SEG_HDR_ETH | ICE_FLOW_SEG_HDR_VLAN | + ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_TCP, + ICE_HASH_IP_CTX_IP_TCP }, + { ICE_FLOW_SEG_HDR_ETH | ICE_FLOW_SEG_HDR_VLAN | + ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_SCTP, + ICE_HASH_IP_CTX_IP_SCTP }, + { ICE_FLOW_SEG_HDR_ETH | ICE_FLOW_SEG_HDR_VLAN | + ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_IPV_OTHER, + ICE_HASH_IP_CTX_IP }, + { ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_IPV_OTHER | + ICE_FLOW_SEG_HDR_ESP, + ICE_HASH_IP_CTX_IP_ESP }, + { ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_IPV_OTHER | + ICE_FLOW_SEG_HDR_NAT_T_ESP, + ICE_HASH_IP_CTX_IP_UDP_ESP }, + { ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_IPV_OTHER | + ICE_FLOW_SEG_HDR_AH, + ICE_HASH_IP_CTX_IP_AH }, + { ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_IPV_OTHER | + ICE_FLOW_SEG_HDR_PFCP_SESSION, + ICE_HASH_IP_CTX_IP_PFCP }, + { ICE_FLOW_SEG_HDR_ETH | ICE_FLOW_SEG_HDR_VLAN | + ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_UDP, + ICE_HASH_IP_CTX_IP_UDP }, + { ICE_FLOW_SEG_HDR_ETH | ICE_FLOW_SEG_HDR_VLAN | + ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_TCP, + ICE_HASH_IP_CTX_IP_TCP }, + { ICE_FLOW_SEG_HDR_ETH | ICE_FLOW_SEG_HDR_VLAN | + ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_SCTP, + ICE_HASH_IP_CTX_IP_SCTP }, + { ICE_FLOW_SEG_HDR_ETH | ICE_FLOW_SEG_HDR_VLAN | + ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_IPV_OTHER, + ICE_HASH_IP_CTX_IP }, + /* the remaining mappings are used for default RSS */ + { ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_UDP, + ICE_HASH_IP_CTX_IP_UDP }, + { ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_TCP, + ICE_HASH_IP_CTX_IP_TCP }, + { ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_SCTP, + ICE_HASH_IP_CTX_IP_SCTP }, + { ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_IPV_OTHER, + ICE_HASH_IP_CTX_IP }, + { ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_UDP, + ICE_HASH_IP_CTX_IP_UDP }, + { ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_TCP, + ICE_HASH_IP_CTX_IP_TCP }, + { ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_SCTP, + ICE_HASH_IP_CTX_IP_SCTP }, + { ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_IPV_OTHER, + ICE_HASH_IP_CTX_IP }, + }; + + for (i = 0; i < ARRAY_SIZE(ip_ctx_idx_map); i++) { + if (hdrs == ip_ctx_idx_map[i].hdrs) + return ip_ctx_idx_map[i].ctx_idx; + } + + return ICE_HASH_IP_CTX_MAX; +} + +/** + * ice_add_rss_cfg_pre - Prepare RSS configuration context for a VF + * @vf: pointer to the VF structure + * @cfg: pointer to the RSS hash configuration + * + * Prepare the RSS hash context for a given VF based on the additional + * protocol headers specified in @cfg. This includes pre-configuration + * for IP and GTPU-based flows. + * + * If the configuration matches a known IP context, the function sets up + * the appropriate IP hash context. If the configuration includes GTPU + * headers, it prepares the GTPU-specific context accordingly. + * + * Return: 0 on success, or a negative error code on failure. + */ +static int +ice_add_rss_cfg_pre(struct ice_vf *vf, struct ice_rss_hash_cfg *cfg) +{ + u32 ice_gtpu_ctx_idx = ice_calc_gtpu_ctx_idx(cfg->addl_hdrs); + u8 ip_ctx_idx = ice_map_ip_ctx_idx(cfg->addl_hdrs); + + if (ip_ctx_idx == ICE_HASH_IP_CTX_IP) { + int ret = 0; + + if (cfg->addl_hdrs & ICE_FLOW_SEG_HDR_IPV4) + ret = ice_add_rss_cfg_pre_ip(vf, &vf->hash_ctx.v4); + else if (cfg->addl_hdrs & ICE_FLOW_SEG_HDR_IPV6) + ret = ice_add_rss_cfg_pre_ip(vf, &vf->hash_ctx.v6); + + if (ret) + return ret; + } + + if (cfg->addl_hdrs & ICE_FLOW_SEG_HDR_IPV4) { + return ice_add_rss_cfg_pre_gtpu(vf, &vf->hash_ctx.ipv4, + ice_gtpu_ctx_idx); + } else if (cfg->addl_hdrs & ICE_FLOW_SEG_HDR_IPV6) { + return ice_add_rss_cfg_pre_gtpu(vf, &vf->hash_ctx.ipv6, + ice_gtpu_ctx_idx); + } + + return 0; +} + +/** + * ice_add_rss_cfg_post_gtpu - Post-process GTPU RSS configuration + * @vf: pointer to the VF info + * @ctx: pointer to the context of the GTPU hash + * @cfg: pointer to the RSS hash configuration + * @ctx_idx: index of the hash context + * + * Post-processes the GTPU hash configuration after a new hash + * context has been successfully added. It updates the context with the new + * configuration and restores any previously removed hash contexts that need + * to be re-applied. This ensures proper TCAM rule ordering and avoids + * conflicts between overlapping GTPU rules. + * + * Return: 0 on success or a negative error code on failure + */ +static int ice_add_rss_cfg_post_gtpu(struct ice_vf *vf, + struct ice_vf_hash_gtpu_ctx *ctx, + struct ice_rss_hash_cfg *cfg, u32 ctx_idx) +{ + /* GTPU hash moveback lookup table indexed by context ID. + * Each entry is a bitmap indicating which contexts need moveback + * operations when the corresponding context index is processed. + */ + static const unsigned long + ice_gtpu_moveback_tbl[ICE_HASH_GTPU_CTX_MAX] = { + [ICE_HASH_GTPU_CTX_EH_IP] = 0, + [ICE_HASH_GTPU_CTX_EH_IP_UDP] = + BIT(ICE_HASH_GTPU_CTX_UP_IP) | + BIT(ICE_HASH_GTPU_CTX_UP_IP_TCP) | + BIT(ICE_HASH_GTPU_CTX_DW_IP) | + BIT(ICE_HASH_GTPU_CTX_DW_IP_TCP), + [ICE_HASH_GTPU_CTX_EH_IP_TCP] = + BIT(ICE_HASH_GTPU_CTX_UP_IP) | + BIT(ICE_HASH_GTPU_CTX_UP_IP_UDP) | + BIT(ICE_HASH_GTPU_CTX_DW_IP) | + BIT(ICE_HASH_GTPU_CTX_DW_IP_UDP), + [ICE_HASH_GTPU_CTX_UP_IP] = + BIT(ICE_HASH_GTPU_CTX_EH_IP) | + BIT(ICE_HASH_GTPU_CTX_EH_IP_UDP) | + BIT(ICE_HASH_GTPU_CTX_EH_IP_TCP), + [ICE_HASH_GTPU_CTX_UP_IP_UDP] = + BIT(ICE_HASH_GTPU_CTX_EH_IP) | + BIT(ICE_HASH_GTPU_CTX_EH_IP_UDP) | + BIT(ICE_HASH_GTPU_CTX_EH_IP_TCP), + [ICE_HASH_GTPU_CTX_UP_IP_TCP] = + BIT(ICE_HASH_GTPU_CTX_EH_IP) | + BIT(ICE_HASH_GTPU_CTX_EH_IP_UDP) | + BIT(ICE_HASH_GTPU_CTX_EH_IP_TCP), + [ICE_HASH_GTPU_CTX_DW_IP] = + BIT(ICE_HASH_GTPU_CTX_EH_IP) | + BIT(ICE_HASH_GTPU_CTX_EH_IP_UDP) | + BIT(ICE_HASH_GTPU_CTX_EH_IP_TCP), + [ICE_HASH_GTPU_CTX_DW_IP_UDP] = + BIT(ICE_HASH_GTPU_CTX_EH_IP) | + BIT(ICE_HASH_GTPU_CTX_EH_IP_UDP) | + BIT(ICE_HASH_GTPU_CTX_EH_IP_TCP), + [ICE_HASH_GTPU_CTX_DW_IP_TCP] = + BIT(ICE_HASH_GTPU_CTX_EH_IP) | + BIT(ICE_HASH_GTPU_CTX_EH_IP_UDP) | + BIT(ICE_HASH_GTPU_CTX_EH_IP_TCP), + }; + unsigned long moveback_mask; + int ret; + int i; + + if (unlikely(ctx_idx >= ICE_HASH_GTPU_CTX_MAX)) + return 0; + + ctx->ctx[ctx_idx].addl_hdrs = cfg->addl_hdrs; + ctx->ctx[ctx_idx].hash_flds = cfg->hash_flds; + ctx->ctx[ctx_idx].hdr_type = cfg->hdr_type; + ctx->ctx[ctx_idx].symm = cfg->symm; + + moveback_mask = ice_gtpu_moveback_tbl[ctx_idx]; + for_each_set_bit(i, &moveback_mask, ICE_HASH_GTPU_CTX_MAX) { + ret = ice_hash_moveback(vf, &ctx->ctx[i]); + if (ret && ret != -ENOENT) + return ret; + } + + return 0; +} + +static int +ice_add_rss_cfg_post(struct ice_vf *vf, struct ice_rss_hash_cfg *cfg) +{ + u32 ice_gtpu_ctx_idx = ice_calc_gtpu_ctx_idx(cfg->addl_hdrs); + u8 ip_ctx_idx = ice_map_ip_ctx_idx(cfg->addl_hdrs); + + if (ip_ctx_idx && ip_ctx_idx < ICE_HASH_IP_CTX_MAX) { + if (cfg->addl_hdrs & ICE_FLOW_SEG_HDR_IPV4) + ice_hash_cfg_record(&vf->hash_ctx.v4.ctx[ip_ctx_idx], cfg); + else if (cfg->addl_hdrs & ICE_FLOW_SEG_HDR_IPV6) + ice_hash_cfg_record(&vf->hash_ctx.v6.ctx[ip_ctx_idx], cfg); + } + + if (cfg->addl_hdrs & ICE_FLOW_SEG_HDR_IPV4) { + return ice_add_rss_cfg_post_gtpu(vf, &vf->hash_ctx.ipv4, + cfg, ice_gtpu_ctx_idx); + } else if (cfg->addl_hdrs & ICE_FLOW_SEG_HDR_IPV6) { + return ice_add_rss_cfg_post_gtpu(vf, &vf->hash_ctx.ipv6, + cfg, ice_gtpu_ctx_idx); + } + + return 0; +} + +/** + * ice_rem_rss_cfg_post - post-process the RSS configuration + * @vf: pointer to the VF info + * @cfg: pointer to the RSS hash configuration + * + * Post process the RSS hash configuration after deleting a hash + * config. Such as, it will reset the hash context for the GTPU hash. + */ +static void +ice_rem_rss_cfg_post(struct ice_vf *vf, struct ice_rss_hash_cfg *cfg) +{ + u32 ice_gtpu_ctx_idx = ice_calc_gtpu_ctx_idx(cfg->addl_hdrs); + u8 ip_ctx_idx = ice_map_ip_ctx_idx(cfg->addl_hdrs); + + if (ip_ctx_idx && ip_ctx_idx < ICE_HASH_IP_CTX_MAX) { + if (cfg->addl_hdrs & ICE_FLOW_SEG_HDR_IPV4) + ice_hash_cfg_reset(&vf->hash_ctx.v4.ctx[ip_ctx_idx]); + else if (cfg->addl_hdrs & ICE_FLOW_SEG_HDR_IPV6) + ice_hash_cfg_reset(&vf->hash_ctx.v6.ctx[ip_ctx_idx]); + } + + if (ice_gtpu_ctx_idx >= ICE_HASH_GTPU_CTX_MAX) + return; + + if (cfg->addl_hdrs & ICE_FLOW_SEG_HDR_IPV4) + ice_hash_cfg_reset(&vf->hash_ctx.ipv4.ctx[ice_gtpu_ctx_idx]); + else if (cfg->addl_hdrs & ICE_FLOW_SEG_HDR_IPV6) + ice_hash_cfg_reset(&vf->hash_ctx.ipv6.ctx[ice_gtpu_ctx_idx]); +} + +/** + * ice_rem_rss_cfg_wrap - Wrapper for deleting an RSS configuration + * @vf: pointer to the VF info + * @cfg: pointer to the RSS hash configuration + * + * Wrapper function to delete a flow profile base on an RSS configuration, + * and also post process the hash context base on the rollback mechanism + * which handle some rules conflict by ice_add_rss_cfg_wrap. + * + * Return: 0 on success; negative error code on failure. + */ +static int +ice_rem_rss_cfg_wrap(struct ice_vf *vf, struct ice_rss_hash_cfg *cfg) +{ + struct device *dev = ice_pf_to_dev(vf->pf); + struct ice_vsi *vsi = ice_get_vf_vsi(vf); + struct ice_hw *hw = &vf->pf->hw; + int ret; + + ret = ice_rem_rss_cfg(hw, vsi->idx, cfg); + /* We just ignore -ENOENT, because if two configurations share the same + * profile remove one of them actually removes both, since the + * profile is deleted. + */ + if (ret && ret != -ENOENT) { + dev_err(dev, "ice_rem_rss_cfg failed for VF %d, VSI %d, error:%d\n", + vf->vf_id, vf->lan_vsi_idx, ret); + return ret; + } + + ice_rem_rss_cfg_post(vf, cfg); + + return 0; +} + +/** + * ice_add_rss_cfg_wrap - Wrapper for adding an RSS configuration + * @vf: pointer to the VF info + * @cfg: pointer to the RSS hash configuration + * + * Add a flow profile based on an RSS configuration. Use a rollback + * mechanism to handle rule conflicts due to TCAM + * write sequence from top to down. + * + * Return: 0 on success; negative error code on failure. + */ +static int +ice_add_rss_cfg_wrap(struct ice_vf *vf, struct ice_rss_hash_cfg *cfg) +{ + struct device *dev = ice_pf_to_dev(vf->pf); + struct ice_vsi *vsi = ice_get_vf_vsi(vf); + struct ice_hw *hw = &vf->pf->hw; + int ret; + + if (ice_add_rss_cfg_pre(vf, cfg)) + return -EINVAL; + + ret = ice_add_rss_cfg(hw, vsi, cfg); + if (ret) { + dev_err(dev, "ice_add_rss_cfg failed for VF %d, VSI %d, error:%d\n", + vf->vf_id, vf->lan_vsi_idx, ret); + return ret; + } + + if (ice_add_rss_cfg_post(vf, cfg)) + ret = -EINVAL; + + return ret; +} + +/** + * ice_parse_raw_rss_pattern - Parse raw pattern spec and mask for RSS + * @vf: pointer to the VF info + * @proto: pointer to the virtchnl protocol header + * @raw_cfg: pointer to the RSS raw pattern configuration + * + * Parser function to get spec and mask from virtchnl message, and parse + * them to get the corresponding profile and offset. The profile is used + * to add RSS configuration. + * + * Return: 0 on success; negative error code on failure. + */ +static int +ice_parse_raw_rss_pattern(struct ice_vf *vf, struct virtchnl_proto_hdrs *proto, + struct ice_rss_raw_cfg *raw_cfg) +{ + struct ice_parser_result pkt_parsed; + struct ice_hw *hw = &vf->pf->hw; + struct ice_parser_profile prof; + struct ice_parser *psr; + u8 *pkt_buf, *msk_buf; + u16 pkt_len; + int ret = 0; + + pkt_len = proto->raw.pkt_len; + if (!pkt_len) + return -EINVAL; + if (pkt_len > VIRTCHNL_MAX_SIZE_RAW_PACKET) + pkt_len = VIRTCHNL_MAX_SIZE_RAW_PACKET; + + pkt_buf = kzalloc(pkt_len, GFP_KERNEL); + msk_buf = kzalloc(pkt_len, GFP_KERNEL); + if (!pkt_buf || !msk_buf) { + ret = -ENOMEM; + goto free_alloc; + } + + memcpy(pkt_buf, proto->raw.spec, pkt_len); + memcpy(msk_buf, proto->raw.mask, pkt_len); + + psr = ice_parser_create(hw); + if (IS_ERR(psr)) { + ret = PTR_ERR(psr); + goto free_alloc; + } + + ret = ice_parser_run(psr, pkt_buf, pkt_len, &pkt_parsed); + if (ret) + goto parser_destroy; + + ret = ice_parser_profile_init(&pkt_parsed, pkt_buf, msk_buf, + pkt_len, ICE_BLK_RSS, &prof); + if (ret) + goto parser_destroy; + + memcpy(&raw_cfg->prof, &prof, sizeof(prof)); + +parser_destroy: + ice_parser_destroy(psr); +free_alloc: + kfree(pkt_buf); + kfree(msk_buf); + return ret; +} + +/** + * ice_add_raw_rss_cfg - add RSS configuration for raw pattern + * @vf: pointer to the VF info + * @cfg: pointer to the RSS raw pattern configuration + * + * This function adds the RSS configuration for raw pattern. + * Check if current profile is matched. If not, remove the old + * one and add the new profile to HW directly. Update the symmetric + * hash configuration as well. + * + * Return: 0 on success; negative error code on failure. + */ +static int +ice_add_raw_rss_cfg(struct ice_vf *vf, struct ice_rss_raw_cfg *cfg) +{ + struct ice_parser_profile *prof = &cfg->prof; + struct device *dev = ice_pf_to_dev(vf->pf); + struct ice_rss_prof_info *rss_prof; + struct ice_hw *hw = &vf->pf->hw; + int i, ptg, ret = 0; + u16 vsi_handle; + u64 id; + + vsi_handle = vf->lan_vsi_idx; + id = find_first_bit(prof->ptypes, ICE_FLOW_PTYPE_MAX); + + ptg = hw->blk[ICE_BLK_RSS].xlt1.t[id]; + rss_prof = &vf->rss_prof_info[ptg]; + + /* check if ptg already has a profile */ + if (rss_prof->prof.fv_num) { + for (i = 0; i < ICE_MAX_FV_WORDS; i++) { + if (rss_prof->prof.fv[i].proto_id != + prof->fv[i].proto_id || + rss_prof->prof.fv[i].offset != + prof->fv[i].offset) + break; + } + + /* current profile is matched, check symmetric hash */ + if (i == ICE_MAX_FV_WORDS) { + if (rss_prof->symm != cfg->symm) + goto update_symm; + return ret; + } + + /* current profile is not matched, remove it */ + ret = + ice_rem_prof_id_flow(hw, ICE_BLK_RSS, + ice_get_hw_vsi_num(hw, vsi_handle), + id); + if (ret) { + dev_err(dev, "remove RSS flow failed\n"); + return ret; + } + + ret = ice_rem_prof(hw, ICE_BLK_RSS, id); + if (ret) { + dev_err(dev, "remove RSS profile failed\n"); + return ret; + } + } + + /* add new profile */ + ret = ice_flow_set_parser_prof(hw, vsi_handle, 0, prof, ICE_BLK_RSS); + if (ret) { + dev_err(dev, "HW profile add failed\n"); + return ret; + } + + memcpy(&rss_prof->prof, prof, sizeof(struct ice_parser_profile)); + +update_symm: + rss_prof->symm = cfg->symm; + ice_rss_update_raw_symm(hw, cfg, id); + return ret; +} + +/** + * ice_rem_raw_rss_cfg - remove RSS configuration for raw pattern + * @vf: pointer to the VF info + * @cfg: pointer to the RSS raw pattern configuration + * + * This function removes the RSS configuration for raw pattern. + * Check if vsi group is already removed first. If not, remove the + * profile. + * + * Return: 0 on success; negative error code on failure. + */ +static int +ice_rem_raw_rss_cfg(struct ice_vf *vf, struct ice_rss_raw_cfg *cfg) +{ + struct ice_parser_profile *prof = &cfg->prof; + struct device *dev = ice_pf_to_dev(vf->pf); + struct ice_hw *hw = &vf->pf->hw; + int ptg, ret = 0; + u16 vsig, vsi; + u64 id; + + id = find_first_bit(prof->ptypes, ICE_FLOW_PTYPE_MAX); + + ptg = hw->blk[ICE_BLK_RSS].xlt1.t[id]; + + memset(&vf->rss_prof_info[ptg], 0, + sizeof(struct ice_rss_prof_info)); + + /* check if vsig is already removed */ + vsi = ice_get_hw_vsi_num(hw, vf->lan_vsi_idx); + if (vsi >= ICE_MAX_VSI) { + ret = -EINVAL; + goto err; + } + + vsig = hw->blk[ICE_BLK_RSS].xlt2.vsis[vsi].vsig; + if (vsig) { + ret = ice_rem_prof_id_flow(hw, ICE_BLK_RSS, vsi, id); + if (ret) + goto err; + + ret = ice_rem_prof(hw, ICE_BLK_RSS, id); + if (ret) + goto err; + } + + return ret; + +err: + dev_err(dev, "HW profile remove failed\n"); + return ret; +} + +/** * ice_vc_handle_rss_cfg * @vf: pointer to the VF info * @msg: pointer to the message buffer @@ -352,6 +1569,9 @@ int ice_vc_handle_rss_cfg(struct ice_vf *vf, u8 *msg, bool add) struct device *dev = ice_pf_to_dev(vf->pf); struct ice_hw *hw = &vf->pf->hw; struct ice_vsi *vsi; + u8 hash_type; + bool symm; + int ret; if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) { dev_dbg(dev, "VF %d attempting to configure RSS, but RSS is not supported by the PF\n", @@ -387,49 +1607,44 @@ int ice_vc_handle_rss_cfg(struct ice_vf *vf, u8 *msg, bool add) goto error_param; } - if (!ice_vc_validate_pattern(vf, &rss_cfg->proto_hdrs)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; + if (rss_cfg->rss_algorithm == VIRTCHNL_RSS_ALG_R_ASYMMETRIC) { + hash_type = add ? ICE_AQ_VSI_Q_OPT_RSS_HASH_XOR : + ICE_AQ_VSI_Q_OPT_RSS_HASH_TPLZ; + + ret = ice_vc_rss_hash_update(hw, vsi, hash_type); + if (ret) + v_ret = ice_err_to_virt_err(ret); goto error_param; } - if (rss_cfg->rss_algorithm == VIRTCHNL_RSS_ALG_R_ASYMMETRIC) { - struct ice_vsi_ctx *ctx; - u8 lut_type, hash_type; - int status; + hash_type = add ? ICE_AQ_VSI_Q_OPT_RSS_HASH_SYM_TPLZ : + ICE_AQ_VSI_Q_OPT_RSS_HASH_TPLZ; + ret = ice_vc_rss_hash_update(hw, vsi, hash_type); + if (ret) { + v_ret = ice_err_to_virt_err(ret); + goto error_param; + } - lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI; - hash_type = add ? ICE_AQ_VSI_Q_OPT_RSS_HASH_XOR : - ICE_AQ_VSI_Q_OPT_RSS_HASH_TPLZ; + symm = rss_cfg->rss_algorithm == VIRTCHNL_RSS_ALG_TOEPLITZ_SYMMETRIC; + /* Configure RSS hash for raw pattern */ + if (rss_cfg->proto_hdrs.tunnel_level == 0 && + rss_cfg->proto_hdrs.count == 0) { + struct ice_rss_raw_cfg raw_cfg; - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); - if (!ctx) { - v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY; + if (ice_parse_raw_rss_pattern(vf, &rss_cfg->proto_hdrs, + &raw_cfg)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } - ctx->info.q_opt_rss = - FIELD_PREP(ICE_AQ_VSI_Q_OPT_RSS_LUT_M, lut_type) | - FIELD_PREP(ICE_AQ_VSI_Q_OPT_RSS_HASH_M, hash_type); - - /* Preserve existing queueing option setting */ - ctx->info.q_opt_rss |= (vsi->info.q_opt_rss & - ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_M); - ctx->info.q_opt_tc = vsi->info.q_opt_tc; - ctx->info.q_opt_flags = vsi->info.q_opt_rss; - - ctx->info.valid_sections = - cpu_to_le16(ICE_AQ_VSI_PROP_Q_OPT_VALID); - - status = ice_update_vsi(hw, vsi->idx, ctx, NULL); - if (status) { - dev_err(dev, "update VSI for RSS failed, err %d aq_err %s\n", - status, libie_aq_str(hw->adminq.sq_last_status)); - v_ret = VIRTCHNL_STATUS_ERR_PARAM; + if (add) { + raw_cfg.symm = symm; + if (ice_add_raw_rss_cfg(vf, &raw_cfg)) + v_ret = VIRTCHNL_STATUS_ERR_PARAM; } else { - vsi->info.q_opt_rss = ctx->info.q_opt_rss; + if (ice_rem_raw_rss_cfg(vf, &raw_cfg)) + v_ret = VIRTCHNL_STATUS_ERR_PARAM; } - - kfree(ctx); } else { struct ice_rss_hash_cfg cfg; @@ -448,24 +1663,12 @@ int ice_vc_handle_rss_cfg(struct ice_vf *vf, u8 *msg, bool add) } if (add) { - if (ice_add_rss_cfg(hw, vsi, &cfg)) { + cfg.symm = symm; + if (ice_add_rss_cfg_wrap(vf, &cfg)) v_ret = VIRTCHNL_STATUS_ERR_PARAM; - dev_err(dev, "ice_add_rss_cfg failed for vsi = %d, v_ret = %d\n", - vsi->vsi_num, v_ret); - } } else { - int status; - - status = ice_rem_rss_cfg(hw, vsi->idx, &cfg); - /* We just ignore -ENOENT, because if two configurations - * share the same profile remove one of them actually - * removes both, since the profile is deleted. - */ - if (status && status != -ENOENT) { + if (ice_rem_rss_cfg_wrap(vf, &cfg)) v_ret = VIRTCHNL_STATUS_ERR_PARAM; - dev_err(dev, "ice_rem_rss_cfg failed for VF ID:%d, error:%d\n", - vf->vf_id, status); - } } } |
