diff options
68 files changed, 2500 insertions, 251 deletions
diff --git a/Documentation/devicetree/bindings/net/fsl,enetc.yaml b/Documentation/devicetree/bindings/net/fsl,enetc.yaml index ca70f0050171..aac20ab72ace 100644 --- a/Documentation/devicetree/bindings/net/fsl,enetc.yaml +++ b/Documentation/devicetree/bindings/net/fsl,enetc.yaml @@ -27,6 +27,7 @@ properties: - const: fsl,enetc - enum: - pci1131,e101 + - pci1131,e110 reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/net/nxp,netc-blk-ctrl.yaml b/Documentation/devicetree/bindings/net/nxp,netc-blk-ctrl.yaml index 97389fd5dbbf..deea4fd73d76 100644 --- a/Documentation/devicetree/bindings/net/nxp,netc-blk-ctrl.yaml +++ b/Documentation/devicetree/bindings/net/nxp,netc-blk-ctrl.yaml @@ -21,6 +21,7 @@ maintainers: properties: compatible: enum: + - nxp,imx94-netc-blk-ctrl - nxp,imx95-netc-blk-ctrl reg: diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index a06cb99d66dc..7cd35bfd39e6 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1796,6 +1796,23 @@ icmp_errors_use_inbound_ifaddr - BOOLEAN Default: 0 (disabled) +icmp_errors_extension_mask - UNSIGNED INTEGER + Bitmask of ICMP extensions to append to ICMPv4 error messages + ("Destination Unreachable", "Time Exceeded" and "Parameter Problem"). + The original datagram is trimmed / padded to 128 bytes in order to be + compatible with applications that do not comply with RFC 4884. + + Possible extensions are: + + ==== ============================================================== + 0x01 Incoming IP interface information according to RFC 5837. + Extension will include the index, IPv4 address (if present), + name and MTU of the IP interface that received the datagram + which elicited the ICMP error. + ==== ============================================================== + + Default: 0x00 (no extensions) + igmp_max_memberships - INTEGER Change the maximum number of multicast groups we can subscribe to. Default: 20 @@ -3262,6 +3279,23 @@ error_anycast_as_unicast - BOOLEAN Default: 0 (disabled) +errors_extension_mask - UNSIGNED INTEGER + Bitmask of ICMP extensions to append to ICMPv6 error messages + ("Destination Unreachable" and "Time Exceeded"). The original datagram + is trimmed / padded to 128 bytes in order to be compatible with + applications that do not comply with RFC 4884. + + Possible extensions are: + + ==== ============================================================== + 0x01 Incoming IP interface information according to RFC 5837. + Extension will include the index, IPv6 address (if present), + name and MTU of the IP interface that received the datagram + which elicited the ICMP error. + ==== ============================================================== + + Default: 0x00 (no extensions) + xfrm6_gc_thresh - INTEGER (Obsolete since linux-4.14) The threshold at which we will start garbage collecting for IPv6 diff --git a/Documentation/networking/smc-sysctl.rst b/Documentation/networking/smc-sysctl.rst index a874d007f2db..904a910f198e 100644 --- a/Documentation/networking/smc-sysctl.rst +++ b/Documentation/networking/smc-sysctl.rst @@ -71,3 +71,43 @@ smcr_max_conns_per_lgr - INTEGER acceptable value ranges from 16 to 255. Only for SMC-R v2.1 and later. Default: 255 + +smcr_max_send_wr - INTEGER + So-called work request buffers are SMCR link (and RDMA queue pair) level + resources necessary for performing RDMA operations. Since up to 255 + connections can share a link group and thus also a link and the number + of the work request buffers is decided when the link is allocated, + depending on the workload it can be a bottleneck in a sense that threads + have to wait for work request buffers to become available. Before the + introduction of this control the maximal number of work request buffers + available on the send path used to be hard coded to 16. With this control + it becomes configurable. The acceptable range is between 2 and 2048. + + Please be aware that all the buffers need to be allocated as a physically + continuous array in which each element is a single buffer and has the size + of SMC_WR_BUF_SIZE (48) bytes. If the allocation fails, we keep retrying + with half of the buffer count until it is ether successful or (unlikely) + we dip below the old hard coded value which is 16 where we give up much + like before having this control. + + Default: 16 + +smcr_max_recv_wr - INTEGER + So-called work request buffers are SMCR link (and RDMA queue pair) level + resources necessary for performing RDMA operations. Since up to 255 + connections can share a link group and thus also a link and the number + of the work request buffers is decided when the link is allocated, + depending on the workload it can be a bottleneck in a sense that threads + have to wait for work request buffers to become available. Before the + introduction of this control the maximal number of work request buffers + available on the receive path used to be hard coded to 16. With this control + it becomes configurable. The acceptable range is between 2 and 2048. + + Please be aware that all the buffers need to be allocated as a physically + continuous array in which each element is a single buffer and has the size + of SMC_WR_BUF_SIZE (48) bytes. If the allocation fails, we keep retrying + with half of the buffer count until it is ether successful or (unlikely) + we dip below the old hard coded value which is 16 where we give up much + like before having this control. + + Default: 48 diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index b1ed98d9c438..e461f5072884 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -327,7 +327,6 @@ static int macb_mdio_read_c22(struct mii_bus *bus, int mii_id, int regnum) status = MACB_BFEXT(DATA, macb_readl(bp, MAN)); mdio_read_exit: - pm_runtime_mark_last_busy(&bp->pdev->dev); pm_runtime_put_autosuspend(&bp->pdev->dev); mdio_pm_exit: return status; @@ -373,7 +372,6 @@ static int macb_mdio_read_c45(struct mii_bus *bus, int mii_id, int devad, status = MACB_BFEXT(DATA, macb_readl(bp, MAN)); mdio_read_exit: - pm_runtime_mark_last_busy(&bp->pdev->dev); pm_runtime_put_autosuspend(&bp->pdev->dev); mdio_pm_exit: return status; @@ -405,7 +403,6 @@ static int macb_mdio_write_c22(struct mii_bus *bus, int mii_id, int regnum, goto mdio_write_exit; mdio_write_exit: - pm_runtime_mark_last_busy(&bp->pdev->dev); pm_runtime_put_autosuspend(&bp->pdev->dev); mdio_pm_exit: return status; @@ -451,7 +448,6 @@ static int macb_mdio_write_c45(struct mii_bus *bus, int mii_id, goto mdio_write_exit; mdio_write_exit: - pm_runtime_mark_last_busy(&bp->pdev->dev); pm_runtime_put_autosuspend(&bp->pdev->dev); mdio_pm_exit: return status; @@ -5622,7 +5618,6 @@ static int macb_probe(struct platform_device *pdev) macb_is_gem(bp) ? "GEM" : "MACB", macb_readl(bp, MID), dev->base_addr, dev->irq, dev->dev_addr); - pm_runtime_mark_last_busy(&bp->pdev->dev); pm_runtime_put_autosuspend(&bp->pdev->dev); return 0; diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 0535e92404e3..d5e5800b84ef 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -14,12 +14,21 @@ u32 enetc_port_mac_rd(struct enetc_si *si, u32 reg) { + /* ENETC with pseudo MAC does not have Ethernet MAC + * port registers. + */ + if (enetc_is_pseudo_mac(si)) + return 0; + return enetc_port_rd(&si->hw, reg); } EXPORT_SYMBOL_GPL(enetc_port_mac_rd); void enetc_port_mac_wr(struct enetc_si *si, u32 reg, u32 val) { + if (enetc_is_pseudo_mac(si)) + return; + enetc_port_wr(&si->hw, reg, val); if (si->hw_features & ENETC_SI_F_QBU) enetc_port_wr(&si->hw, reg + si->drvdata->pmac_offset, val); @@ -3367,7 +3376,8 @@ int enetc_hwtstamp_set(struct net_device *ndev, new_offloads |= ENETC_F_TX_TSTAMP; break; case HWTSTAMP_TX_ONESTEP_SYNC: - if (!enetc_si_is_pf(priv->si)) + if (!enetc_si_is_pf(priv->si) || + enetc_is_pseudo_mac(priv->si)) return -EOPNOTSUPP; new_offloads &= ~ENETC_F_TX_TSTAMP_MASK; @@ -3708,6 +3718,13 @@ static const struct enetc_drvdata enetc4_pf_data = { .eth_ops = &enetc4_pf_ethtool_ops, }; +static const struct enetc_drvdata enetc4_ppm_data = { + .sysclk_freq = ENETC_CLK_333M, + .tx_csum = true, + .max_frags = ENETC4_MAX_SKB_FRAGS, + .eth_ops = &enetc4_ppm_ethtool_ops, +}; + static const struct enetc_drvdata enetc_vf_data = { .sysclk_freq = ENETC_CLK_400M, .max_frags = ENETC_MAX_SKB_FRAGS, @@ -3727,6 +3744,15 @@ static const struct enetc_platform_info enetc_info[] = { .dev_id = ENETC_DEV_ID_VF, .data = &enetc_vf_data, }, + { + .revision = ENETC_REV_4_3, + .dev_id = NXP_ENETC_PPM_DEV_ID, + .data = &enetc4_ppm_data, + }, + { .revision = ENETC_REV_4_3, + .dev_id = NXP_ENETC_PF_DEV_ID, + .data = &enetc4_pf_data, + }, }; int enetc_get_driver_data(struct enetc_si *si) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h index f279fa597991..dce27bd67a7d 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.h +++ b/drivers/net/ethernet/freescale/enetc/enetc.h @@ -273,6 +273,7 @@ enum enetc_errata { #define ENETC_SI_F_QBV BIT(1) #define ENETC_SI_F_QBU BIT(2) #define ENETC_SI_F_LSO BIT(3) +#define ENETC_SI_F_PPM BIT(4) /* pseudo MAC */ struct enetc_drvdata { u32 pmac_offset; /* Only valid for PSI which supports 802.1Qbu */ @@ -362,6 +363,11 @@ static inline int enetc_pf_to_port(struct pci_dev *pf_pdev) } } +static inline bool enetc_is_pseudo_mac(struct enetc_si *si) +{ + return si->hw_features & ENETC_SI_F_PPM; +} + #define ENETC_MAX_NUM_TXQS 8 #define ENETC_INT_NAME_MAX (IFNAMSIZ + 8) @@ -534,6 +540,8 @@ int enetc_hwtstamp_set(struct net_device *ndev, extern const struct ethtool_ops enetc_pf_ethtool_ops; extern const struct ethtool_ops enetc4_pf_ethtool_ops; extern const struct ethtool_ops enetc_vf_ethtool_ops; +extern const struct ethtool_ops enetc4_ppm_ethtool_ops; + void enetc_set_ethtool_ops(struct net_device *ndev); void enetc_mm_link_state_update(struct enetc_ndev_priv *priv, bool link); void enetc_mm_commit_preemptible_tcs(struct enetc_ndev_priv *priv); diff --git a/drivers/net/ethernet/freescale/enetc/enetc4_hw.h b/drivers/net/ethernet/freescale/enetc/enetc4_hw.h index 19bf0e89cdc2..ebea4298791c 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc4_hw.h +++ b/drivers/net/ethernet/freescale/enetc/enetc4_hw.h @@ -11,6 +11,7 @@ #define NXP_ENETC_VENDOR_ID 0x1131 #define NXP_ENETC_PF_DEV_ID 0xe101 +#define NXP_ENETC_PPM_DEV_ID 0xe110 /**********************Station interface registers************************/ /* Station interface LSO segmentation flag mask register 0/1 */ @@ -115,6 +116,10 @@ #define PMCAPR_HD BIT(8) #define PMCAPR_FP GENMASK(10, 9) +/* Port capability register */ +#define ENETC4_PCAPR 0x4000 +#define PCAPR_LINK_TYPE BIT(4) + /* Port configuration register */ #define ENETC4_PCR 0x4010 #define PCR_HDR_FMT BIT(0) @@ -193,4 +198,29 @@ #define SSP_1G 2 #define PM_IF_MODE_ENA BIT(15) +/**********************ENETC Pseudo MAC port registers************************/ +/* Port pseudo MAC receive octets counter (64-bit) */ +#define ENETC4_PPMROCR 0x5080 + +/* Port pseudo MAC receive unicast frame counter register (64-bit) */ +#define ENETC4_PPMRUFCR 0x5088 + +/* Port pseudo MAC receive multicast frame counter register (64-bit) */ +#define ENETC4_PPMRMFCR 0x5090 + +/* Port pseudo MAC receive broadcast frame counter register (64-bit) */ +#define ENETC4_PPMRBFCR 0x5098 + +/* Port pseudo MAC transmit octets counter (64-bit) */ +#define ENETC4_PPMTOCR 0x50c0 + +/* Port pseudo MAC transmit unicast frame counter register (64-bit) */ +#define ENETC4_PPMTUFCR 0x50c8 + +/* Port pseudo MAC transmit multicast frame counter register (64-bit) */ +#define ENETC4_PPMTMFCR 0x50d0 + +/* Port pseudo MAC transmit broadcast frame counter register (64-bit) */ +#define ENETC4_PPMTBFCR 0x50d8 + #endif diff --git a/drivers/net/ethernet/freescale/enetc/enetc4_pf.c b/drivers/net/ethernet/freescale/enetc/enetc4_pf.c index 82c443b28b15..498346dd996a 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc4_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc4_pf.c @@ -41,6 +41,16 @@ static void enetc4_get_port_caps(struct enetc_pf *pf) pf->caps.mac_filter_num = val & PSIMAFCAPR_NUM_MAC_AFTE; } +static void enetc4_get_psi_hw_features(struct enetc_si *si) +{ + struct enetc_hw *hw = &si->hw; + u32 val; + + val = enetc_port_rd(hw, ENETC4_PCAPR); + if (val & PCAPR_LINK_TYPE) + si->hw_features |= ENETC_SI_F_PPM; +} + static void enetc4_pf_set_si_primary_mac(struct enetc_hw *hw, int si, const u8 *addr) { @@ -277,6 +287,7 @@ static int enetc4_pf_struct_init(struct enetc_si *si) pf->ops = &enetc4_pf_ops; enetc4_get_port_caps(pf); + enetc4_get_psi_hw_features(si); return 0; } @@ -589,6 +600,9 @@ static void enetc4_mac_config(struct enetc_pf *pf, unsigned int mode, struct enetc_si *si = pf->si; u32 val; + if (enetc_is_pseudo_mac(si)) + return; + val = enetc_port_mac_rd(si, ENETC4_PM_IF_MODE(0)); val &= ~(PM_IF_MODE_IFMODE | PM_IF_MODE_ENA); @@ -1071,6 +1085,7 @@ static void enetc4_pf_remove(struct pci_dev *pdev) static const struct pci_device_id enetc4_pf_id_table[] = { { PCI_DEVICE(NXP_ENETC_VENDOR_ID, NXP_ENETC_PF_DEV_ID) }, + { PCI_DEVICE(NXP_ENETC_VENDOR_ID, NXP_ENETC_PPM_DEV_ID) }, { 0, } /* End of table. */ }; MODULE_DEVICE_TABLE(pci, enetc4_pf_id_table); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c index 71d052de669a..3e222321b937 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c @@ -435,6 +435,48 @@ static void enetc_get_eth_mac_stats(struct net_device *ndev, } } +static void enetc_ppm_mac_stats(struct enetc_si *si, + struct ethtool_eth_mac_stats *s) +{ + struct enetc_hw *hw = &si->hw; + u64 rufcr, rmfcr, rbfcr; + u64 tufcr, tmfcr, tbfcr; + + rufcr = enetc_port_rd64(hw, ENETC4_PPMRUFCR); + rmfcr = enetc_port_rd64(hw, ENETC4_PPMRMFCR); + rbfcr = enetc_port_rd64(hw, ENETC4_PPMRBFCR); + + tufcr = enetc_port_rd64(hw, ENETC4_PPMTUFCR); + tmfcr = enetc_port_rd64(hw, ENETC4_PPMTMFCR); + tbfcr = enetc_port_rd64(hw, ENETC4_PPMTBFCR); + + s->FramesTransmittedOK = tufcr + tmfcr + tbfcr; + s->FramesReceivedOK = rufcr + rmfcr + rbfcr; + s->OctetsTransmittedOK = enetc_port_rd64(hw, ENETC4_PPMTOCR); + s->OctetsReceivedOK = enetc_port_rd64(hw, ENETC4_PPMROCR); + s->MulticastFramesXmittedOK = tmfcr; + s->BroadcastFramesXmittedOK = tbfcr; + s->MulticastFramesReceivedOK = rmfcr; + s->BroadcastFramesReceivedOK = rbfcr; +} + +static void enetc_ppm_get_eth_mac_stats(struct net_device *ndev, + struct ethtool_eth_mac_stats *mac_stats) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + + switch (mac_stats->src) { + case ETHTOOL_MAC_STATS_SRC_EMAC: + enetc_ppm_mac_stats(priv->si, mac_stats); + break; + case ETHTOOL_MAC_STATS_SRC_PMAC: + break; + case ETHTOOL_MAC_STATS_SRC_AGGREGATE: + ethtool_aggregate_mac_stats(ndev, mac_stats); + break; + } +} + static void enetc_get_eth_ctrl_stats(struct net_device *ndev, struct ethtool_eth_ctrl_stats *ctrl_stats) { @@ -894,6 +936,9 @@ static int enetc_get_phc_index_by_pdev(struct enetc_si *si) case ENETC_REV_4_1: devfn = PCI_DEVFN(24, 0); break; + case ENETC_REV_4_3: + devfn = PCI_DEVFN(0, 1); + break; default: return -1; } @@ -1313,6 +1358,25 @@ const struct ethtool_ops enetc_pf_ethtool_ops = { .get_mm_stats = enetc_get_mm_stats, }; +const struct ethtool_ops enetc4_ppm_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_MAX_FRAMES | + ETHTOOL_COALESCE_USE_ADAPTIVE_RX, + .get_eth_mac_stats = enetc_ppm_get_eth_mac_stats, + .get_rxnfc = enetc4_get_rxnfc, + .get_rxfh_key_size = enetc_get_rxfh_key_size, + .get_rxfh_indir_size = enetc_get_rxfh_indir_size, + .get_rxfh = enetc_get_rxfh, + .set_rxfh = enetc_set_rxfh, + .get_rxfh_fields = enetc_get_rxfh_fields, + .get_ringparam = enetc_get_ringparam, + .get_coalesce = enetc_get_coalesce, + .set_coalesce = enetc_set_coalesce, + .get_link_ksettings = enetc_get_link_ksettings, + .set_link_ksettings = enetc_set_link_ksettings, + .get_link = ethtool_op_get_link, +}; + const struct ethtool_ops enetc_vf_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_MAX_FRAMES | diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h index 377c96325814..7b882b8921fe 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h +++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h @@ -378,6 +378,7 @@ enum enetc_bdr_type {TX, RX}; #define EIPBRR0_REVISION GENMASK(15, 0) #define ENETC_REV_1_0 0x0100 #define ENETC_REV_4_1 0X0401 +#define ENETC_REV_4_3 0x0403 #define ENETC_G_EIPBRR1 0x0bfc #define ENETC_G_EPFBLPR(n) (0xd00 + 4 * (n)) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf_common.c b/drivers/net/ethernet/freescale/enetc/enetc_pf_common.c index edf14a95cab7..9c634205e2a7 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf_common.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf_common.c @@ -109,7 +109,7 @@ void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev, ndev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | - NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_LOOPBACK | + NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_CSUM | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_UDP_L4; ndev->features = NETIF_F_HIGHDMA | NETIF_F_SG | NETIF_F_RXCSUM | @@ -133,6 +133,9 @@ void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev, ndev->features |= NETIF_F_RXHASH; } + if (!enetc_is_pseudo_mac(si)) + ndev->hw_features |= NETIF_F_LOOPBACK; + /* TODO: currently, i.MX95 ENETC driver does not support advanced features */ if (!is_enetc_rev1(si)) goto end; diff --git a/drivers/net/ethernet/freescale/enetc/netc_blk_ctrl.c b/drivers/net/ethernet/freescale/enetc/netc_blk_ctrl.c index bcb8eefeb93c..d7aee3c934d3 100644 --- a/drivers/net/ethernet/freescale/enetc/netc_blk_ctrl.c +++ b/drivers/net/ethernet/freescale/enetc/netc_blk_ctrl.c @@ -47,6 +47,13 @@ #define PCS_PROT_SFI BIT(4) #define PCS_PROT_10G_SXGMII BIT(6) +#define IMX94_EXT_PIN_CONTROL 0x10 +#define MAC2_MAC3_SEL BIT(1) + +#define IMX94_NETC_LINK_CFG(a) (0x4c + (a) * 4) +#define NETC_LINK_CFG_MII_PROT GENMASK(3, 0) +#define NETC_LINK_CFG_IO_VAR GENMASK(19, 16) + /* NETC privileged register block register */ #define PRB_NETCRR 0x100 #define NETCRR_SR BIT(0) @@ -59,6 +66,7 @@ /* NETC integrated endpoint register block register */ #define IERB_EMDIOFAUXR 0x344 #define IERB_T0FAUXR 0x444 +#define IERB_ETBCR(a) (0x300c + 0x100 * (a)) #define IERB_EFAUXR(a) (0x3044 + 0x100 * (a)) #define IERB_VFAUXR(a) (0x4004 + 0x40 * (a)) #define FAUXR_LDID GENMASK(3, 0) @@ -68,6 +76,19 @@ #define IMX95_ENETC1_BUS_DEVFN 0x40 #define IMX95_ENETC2_BUS_DEVFN 0x80 +#define IMX94_ENETC0_BUS_DEVFN 0x100 +#define IMX94_ENETC1_BUS_DEVFN 0x140 +#define IMX94_ENETC2_BUS_DEVFN 0x180 +#define IMX94_TIMER0_BUS_DEVFN 0x1 +#define IMX94_TIMER1_BUS_DEVFN 0x101 +#define IMX94_TIMER2_BUS_DEVFN 0x181 +#define IMX94_ENETC0_LINK 3 +#define IMX94_ENETC1_LINK 4 +#define IMX94_ENETC2_LINK 5 + +#define NETC_ENETC_ID(a) (a) +#define NETC_TIMER_ID(a) (a) + /* Flags for different platforms */ #define NETC_HAS_NETCMIX BIT(0) @@ -192,6 +213,90 @@ static int imx95_netcmix_init(struct platform_device *pdev) return 0; } +static int imx94_enetc_get_link_id(struct device_node *np) +{ + int bus_devfn = netc_of_pci_get_bus_devfn(np); + + /* Parse ENETC link number */ + switch (bus_devfn) { + case IMX94_ENETC0_BUS_DEVFN: + return IMX94_ENETC0_LINK; + case IMX94_ENETC1_BUS_DEVFN: + return IMX94_ENETC1_LINK; + case IMX94_ENETC2_BUS_DEVFN: + return IMX94_ENETC2_LINK; + default: + return -EINVAL; + } +} + +static int imx94_link_config(struct netc_blk_ctrl *priv, + struct device_node *np, int link_id) +{ + phy_interface_t interface; + int mii_proto; + u32 val; + + /* The node may be disabled and does not have a 'phy-mode' + * or 'phy-connection-type' property. + */ + if (of_get_phy_mode(np, &interface)) + return 0; + + mii_proto = netc_get_link_mii_protocol(interface); + if (mii_proto < 0) + return mii_proto; + + val = mii_proto & NETC_LINK_CFG_MII_PROT; + if (val == MII_PROT_SERIAL) + val = u32_replace_bits(val, IO_VAR_16FF_16G_SERDES, + NETC_LINK_CFG_IO_VAR); + + netc_reg_write(priv->netcmix, IMX94_NETC_LINK_CFG(link_id), val); + + return 0; +} + +static int imx94_enetc_link_config(struct netc_blk_ctrl *priv, + struct device_node *np) +{ + int link_id = imx94_enetc_get_link_id(np); + + if (link_id < 0) + return link_id; + + return imx94_link_config(priv, np, link_id); +} + +static int imx94_netcmix_init(struct platform_device *pdev) +{ + struct netc_blk_ctrl *priv = platform_get_drvdata(pdev); + struct device_node *np = pdev->dev.of_node; + u32 val; + int err; + + for_each_child_of_node_scoped(np, child) { + for_each_child_of_node_scoped(child, gchild) { + if (!of_device_is_compatible(gchild, "pci1131,e101")) + continue; + + err = imx94_enetc_link_config(priv, gchild); + if (err) + return err; + } + } + + /* ENETC 0 and switch port 2 share the same parallel interface. + * Currently, the switch is not supported, so this interface is + * used by ENETC 0 by default. + */ + val = netc_reg_read(priv->netcmix, IMX94_EXT_PIN_CONTROL); + val |= MAC2_MAC3_SEL; + netc_reg_write(priv->netcmix, IMX94_EXT_PIN_CONTROL, val); + + return 0; +} + static bool netc_ierb_is_locked(struct netc_blk_ctrl *priv) { return !!(netc_reg_read(priv->prb, PRB_NETCRR) & NETCRR_LOCK); @@ -247,6 +352,98 @@ static int imx95_ierb_init(struct platform_device *pdev) return 0; } +static int imx94_get_enetc_id(struct device_node *np) +{ + int bus_devfn = netc_of_pci_get_bus_devfn(np); + + /* Parse ENETC offset */ + switch (bus_devfn) { + case IMX94_ENETC0_BUS_DEVFN: + return NETC_ENETC_ID(0); + case IMX94_ENETC1_BUS_DEVFN: + return NETC_ENETC_ID(1); + case IMX94_ENETC2_BUS_DEVFN: + return NETC_ENETC_ID(2); + default: + return -EINVAL; + } +} + +static int imx94_get_timer_id(struct device_node *np) +{ + int bus_devfn = netc_of_pci_get_bus_devfn(np); + + /* Parse NETC PTP timer ID, the timer0 is on bus 0, + * the timer 1 and timer2 is on bus 1. + */ + switch (bus_devfn) { + case IMX94_TIMER0_BUS_DEVFN: + return NETC_TIMER_ID(0); + case IMX94_TIMER1_BUS_DEVFN: + return NETC_TIMER_ID(1); + case IMX94_TIMER2_BUS_DEVFN: + return NETC_TIMER_ID(2); + default: + return -EINVAL; + } +} + +static int imx94_enetc_update_tid(struct netc_blk_ctrl *priv, + struct device_node *np) +{ + struct device *dev = &priv->pdev->dev; + struct device_node *timer_np; + int eid, tid; + + eid = imx94_get_enetc_id(np); + if (eid < 0) { + dev_err(dev, "Failed to get ENETC ID\n"); + return eid; + } + + timer_np = of_parse_phandle(np, "ptp-timer", 0); + if (!timer_np) { + /* If 'ptp-timer' is not present, the timer1 is the default + * timer of all standalone ENETCs, which is on the same PCIe + * bus as these ENETCs. + */ + tid = NETC_TIMER_ID(1); + goto end; + } + + tid = imx94_get_timer_id(timer_np); + of_node_put(timer_np); + if (tid < 0) { + dev_err(dev, "Failed to get NETC Timer ID\n"); + return tid; + } + +end: + netc_reg_write(priv->ierb, IERB_ETBCR(eid), tid); + + return 0; +} + +static int imx94_ierb_init(struct platform_device *pdev) +{ + struct netc_blk_ctrl *priv = platform_get_drvdata(pdev); + struct device_node *np = pdev->dev.of_node; + int err; + + for_each_child_of_node_scoped(np, child) { + for_each_child_of_node_scoped(child, gchild) { + if (!of_device_is_compatible(gchild, "pci1131,e101")) + continue; + + err = imx94_enetc_update_tid(priv, gchild); + if (err) + return err; + } + } + + return 0; +} + static int netc_ierb_init(struct platform_device *pdev) { struct netc_blk_ctrl *priv = platform_get_drvdata(pdev); @@ -340,8 +537,15 @@ static const struct netc_devinfo imx95_devinfo = { .ierb_init = imx95_ierb_init, }; +static const struct netc_devinfo imx94_devinfo = { + .flags = NETC_HAS_NETCMIX, + .netcmix_init = imx94_netcmix_init, + .ierb_init = imx94_ierb_init, +}; + static const struct of_device_id netc_blk_ctrl_match[] = { { .compatible = "nxp,imx95-netc-blk-ctrl", .data = &imx95_devinfo }, + { .compatible = "nxp,imx94-netc-blk-ctrl", .data = &imx94_devinfo }, {}, }; MODULE_DEVICE_TABLE(of, netc_blk_ctrl_match); diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 1edcfaee6819..024dd443bfbc 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2231,7 +2231,6 @@ static int fec_enet_mdio_read_c22(struct mii_bus *bus, int mii_id, int regnum) ret = FEC_MMFR_DATA(readl(fep->hwp + FEC_MII_DATA)); out: - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return ret; @@ -2280,7 +2279,6 @@ static int fec_enet_mdio_read_c45(struct mii_bus *bus, int mii_id, ret = FEC_MMFR_DATA(readl(fep->hwp + FEC_MII_DATA)); out: - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return ret; @@ -2312,7 +2310,6 @@ static int fec_enet_mdio_write_c22(struct mii_bus *bus, int mii_id, int regnum, if (ret) netdev_err(fep->netdev, "MDIO write timeout\n"); - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return ret; @@ -2356,7 +2353,6 @@ static int fec_enet_mdio_write_c45(struct mii_bus *bus, int mii_id, netdev_err(fep->netdev, "MDIO write timeout\n"); out: - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return ret; @@ -2552,7 +2548,6 @@ static int fec_enet_mii_init(struct platform_device *pdev) int err = -ENXIO; u32 mii_speed, holdtime; u32 bus_freq; - int addr; /* * The i.MX28 dual fec interfaces are not equal. @@ -2667,11 +2662,8 @@ static int fec_enet_mii_init(struct platform_device *pdev) of_node_put(node); /* find all the PHY devices on the bus and set mac_managed_pm to true */ - for (addr = 0; addr < PHY_MAX_ADDR; addr++) { - phydev = mdiobus_get_phy(fep->mii_bus, addr); - if (phydev) - phydev->mac_managed_pm = true; - } + mdiobus_for_each_phy(fep->mii_bus, phydev) + phydev->mac_managed_pm = true; mii_cnt++; @@ -2839,7 +2831,6 @@ static void fec_enet_get_regs(struct net_device *ndev, buf[off] = readl(&theregs[off]); } - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); } @@ -3616,7 +3607,6 @@ err_enet_mii_probe: err_enet_alloc: fec_enet_clk_enable(ndev, false); clk_enable: - pm_runtime_mark_last_busy(&fep->pdev->dev); pm_runtime_put_autosuspend(&fep->pdev->dev); pinctrl_pm_select_sleep_state(&fep->pdev->dev); return ret; @@ -3647,7 +3637,6 @@ fec_enet_close(struct net_device *ndev) cpu_latency_qos_remove_request(&fep->pm_qos_req); pinctrl_pm_select_sleep_state(&fep->pdev->dev); - pm_runtime_mark_last_busy(&fep->pdev->dev); pm_runtime_put_autosuspend(&fep->pdev->dev); fec_enet_free_buffers(ndev); @@ -4616,7 +4605,6 @@ fec_probe(struct platform_device *pdev) INIT_WORK(&fep->tx_timeout_work, fec_enet_timeout_work); - pm_runtime_mark_last_busy(&pdev->dev); pm_runtime_put_autosuspend(&pdev->dev); return 0; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/Makefile b/drivers/net/ethernet/marvell/octeontx2/af/Makefile index 532813d8d028..244de500963e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/Makefile +++ b/drivers/net/ethernet/marvell/octeontx2/af/Makefile @@ -12,4 +12,5 @@ rvu_af-y := cgx.o rvu.o rvu_cgx.o rvu_npa.o rvu_nix.o \ rvu_reg.o rvu_npc.o rvu_debugfs.o ptp.o rvu_npc_fs.o \ rvu_cpt.o rvu_devlink.o rpm.o rvu_cn10k.o rvu_switch.o \ rvu_sdp.o rvu_npc_hash.o mcs.o mcs_rvu_if.o mcs_cnf10kb.o \ - rvu_rep.o cn20k/mbox_init.o + rvu_rep.o cn20k/mbox_init.o cn20k/nix.o cn20k/debugfs.o \ + cn20k/npa.o diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.c new file mode 100644 index 000000000000..498968bf4cf5 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.c @@ -0,0 +1,218 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell RVU Admin Function driver + * + * Copyright (C) 2024 Marvell. + * + */ + +#include <linux/fs.h> +#include <linux/debugfs.h> +#include <linux/module.h> +#include <linux/pci.h> + +#include "struct.h" +#include "debugfs.h" + +void print_nix_cn20k_sq_ctx(struct seq_file *m, + struct nix_cn20k_sq_ctx_s *sq_ctx) +{ + seq_printf(m, "W0: ena \t\t\t%d\nW0: qint_idx \t\t\t%d\n", + sq_ctx->ena, sq_ctx->qint_idx); + seq_printf(m, "W0: substream \t\t\t0x%03x\nW0: sdp_mcast \t\t\t%d\n", + sq_ctx->substream, sq_ctx->sdp_mcast); + seq_printf(m, "W0: cq \t\t\t\t%d\nW0: sqe_way_mask \t\t%d\n\n", + sq_ctx->cq, sq_ctx->sqe_way_mask); + + seq_printf(m, "W1: smq \t\t\t%d\nW1: cq_ena \t\t\t%d\nW1: xoff\t\t\t%d\n", + sq_ctx->smq, sq_ctx->cq_ena, sq_ctx->xoff); + seq_printf(m, "W1: sso_ena \t\t\t%d\nW1: smq_rr_weight\t\t%d\n", + sq_ctx->sso_ena, sq_ctx->smq_rr_weight); + seq_printf(m, "W1: default_chan\t\t%d\nW1: sqb_count\t\t\t%d\n\n", + sq_ctx->default_chan, sq_ctx->sqb_count); + + seq_printf(m, "W1: smq_rr_count_lb \t\t%d\n", sq_ctx->smq_rr_count_lb); + seq_printf(m, "W2: smq_rr_count_ub \t\t%d\n", sq_ctx->smq_rr_count_ub); + seq_printf(m, "W2: sqb_aura \t\t\t%d\nW2: sq_int \t\t\t%d\n", + sq_ctx->sqb_aura, sq_ctx->sq_int); + seq_printf(m, "W2: sq_int_ena \t\t\t%d\nW2: sqe_stype \t\t\t%d\n", + sq_ctx->sq_int_ena, sq_ctx->sqe_stype); + + seq_printf(m, "W3: max_sqe_size\t\t%d\nW3: cq_limit\t\t\t%d\n", + sq_ctx->max_sqe_size, sq_ctx->cq_limit); + seq_printf(m, "W3: lmt_dis \t\t\t%d\nW3: mnq_dis \t\t\t%d\n", + sq_ctx->lmt_dis, sq_ctx->mnq_dis); + seq_printf(m, "W3: smq_next_sq\t\t\t%d\nW3: smq_lso_segnum\t\t%d\n", + sq_ctx->smq_next_sq, sq_ctx->smq_lso_segnum); + seq_printf(m, "W3: tail_offset \t\t%d\nW3: smenq_offset\t\t%d\n", + sq_ctx->tail_offset, sq_ctx->smenq_offset); + seq_printf(m, "W3: head_offset\t\t\t%d\nW3: smenq_next_sqb_vld\t\t%d\n\n", + sq_ctx->head_offset, sq_ctx->smenq_next_sqb_vld); + + seq_printf(m, "W3: smq_next_sq_vld\t\t%d\nW3: smq_pend\t\t\t%d\n", + sq_ctx->smq_next_sq_vld, sq_ctx->smq_pend); + seq_printf(m, "W4: next_sqb \t\t\t%llx\n\n", sq_ctx->next_sqb); + seq_printf(m, "W5: tail_sqb \t\t\t%llx\n\n", sq_ctx->tail_sqb); + seq_printf(m, "W6: smenq_sqb \t\t\t%llx\n\n", sq_ctx->smenq_sqb); + seq_printf(m, "W7: smenq_next_sqb \t\t%llx\n\n", + sq_ctx->smenq_next_sqb); + + seq_printf(m, "W8: head_sqb\t\t\t%llx\n\n", sq_ctx->head_sqb); + + seq_printf(m, "W9: vfi_lso_total\t\t%d\n", sq_ctx->vfi_lso_total); + seq_printf(m, "W9: vfi_lso_sizem1\t\t%d\nW9: vfi_lso_sb\t\t\t%d\n", + sq_ctx->vfi_lso_sizem1, sq_ctx->vfi_lso_sb); + seq_printf(m, "W9: vfi_lso_mps\t\t\t%d\nW9: vfi_lso_vlan0_ins_ena\t%d\n", + sq_ctx->vfi_lso_mps, sq_ctx->vfi_lso_vlan0_ins_ena); + seq_printf(m, "W9: vfi_lso_vlan1_ins_ena\t%d\nW9: vfi_lso_vld \t\t%d\n\n", + sq_ctx->vfi_lso_vld, sq_ctx->vfi_lso_vlan1_ins_ena); + + seq_printf(m, "W10: scm_lso_rem \t\t%llu\n\n", + (u64)sq_ctx->scm_lso_rem); + seq_printf(m, "W11: octs \t\t\t%llu\n\n", (u64)sq_ctx->octs); + seq_printf(m, "W12: pkts \t\t\t%llu\n\n", (u64)sq_ctx->pkts); + seq_printf(m, "W13: aged_drop_octs \t\t\t%llu\n\n", + (u64)sq_ctx->aged_drop_octs); + seq_printf(m, "W13: aged_drop_pkts \t\t\t%llu\n\n", + (u64)sq_ctx->aged_drop_pkts); + seq_printf(m, "W14: dropped_octs \t\t%llu\n\n", + (u64)sq_ctx->dropped_octs); + seq_printf(m, "W15: dropped_pkts \t\t%llu\n\n", + (u64)sq_ctx->dropped_pkts); +} + +void print_nix_cn20k_cq_ctx(struct seq_file *m, + struct nix_cn20k_aq_enq_rsp *rsp) +{ + struct nix_cn20k_cq_ctx_s *cq_ctx = &rsp->cq; + + seq_printf(m, "W0: base \t\t\t%llx\n\n", cq_ctx->base); + + seq_printf(m, "W1: wrptr \t\t\t%llx\n", (u64)cq_ctx->wrptr); + seq_printf(m, "W1: avg_con \t\t\t%d\nW1: cint_idx \t\t\t%d\n", + cq_ctx->avg_con, cq_ctx->cint_idx); + seq_printf(m, "W1: cq_err \t\t\t%d\nW1: qint_idx \t\t\t%d\n", + cq_ctx->cq_err, cq_ctx->qint_idx); + seq_printf(m, "W1: bpid \t\t\t%d\nW1: bp_ena \t\t\t%d\n\n", + cq_ctx->bpid, cq_ctx->bp_ena); + + seq_printf(m, "W1: lbpid_high \t\t\t0x%03x\n", cq_ctx->lbpid_high); + seq_printf(m, "W1: lbpid_med \t\t\t0x%03x\n", cq_ctx->lbpid_med); + seq_printf(m, "W1: lbpid_low \t\t\t0x%03x\n", cq_ctx->lbpid_low); + seq_printf(m, "(W1: lbpid) \t\t\t0x%03x\n", + cq_ctx->lbpid_high << 6 | cq_ctx->lbpid_med << 3 | + cq_ctx->lbpid_low); + seq_printf(m, "W1: lbp_ena \t\t\t\t%d\n\n", cq_ctx->lbp_ena); + + seq_printf(m, "W2: update_time \t\t%d\nW2:avg_level \t\t\t%d\n", + cq_ctx->update_time, cq_ctx->avg_level); + seq_printf(m, "W2: head \t\t\t%d\nW2:tail \t\t\t%d\n\n", + cq_ctx->head, cq_ctx->tail); + + seq_printf(m, "W3: cq_err_int_ena \t\t%d\nW3:cq_err_int \t\t\t%d\n", + cq_ctx->cq_err_int_ena, cq_ctx->cq_err_int); + seq_printf(m, "W3: qsize \t\t\t%d\nW3:stashing \t\t\t%d\n", + cq_ctx->qsize, cq_ctx->stashing); + + seq_printf(m, "W3: caching \t\t\t%d\n", cq_ctx->caching); + seq_printf(m, "W3: lbp_frac \t\t\t%d\n", cq_ctx->lbp_frac); + seq_printf(m, "W3: stash_thresh \t\t\t%d\n", + cq_ctx->stash_thresh); + + seq_printf(m, "W3: msh_valid \t\t\t%d\nW3:msh_dst \t\t\t%d\n", + cq_ctx->msh_valid, cq_ctx->msh_dst); + + seq_printf(m, "W3: cpt_drop_err_en \t\t\t%d\n", + cq_ctx->cpt_drop_err_en); + seq_printf(m, "W3: ena \t\t\t%d\n", + cq_ctx->ena); + seq_printf(m, "W3: drop_ena \t\t\t%d\nW3: drop \t\t\t%d\n", + cq_ctx->drop_ena, cq_ctx->drop); + seq_printf(m, "W3: bp \t\t\t\t%d\n\n", cq_ctx->bp); + + seq_printf(m, "W4: lbpid_ext \t\t\t\t%d\n\n", cq_ctx->lbpid_ext); + seq_printf(m, "W4: bpid_ext \t\t\t\t%d\n\n", cq_ctx->bpid_ext); +} + +void print_npa_cn20k_aura_ctx(struct seq_file *m, + struct npa_cn20k_aq_enq_rsp *rsp) +{ + struct npa_cn20k_aura_s *aura = &rsp->aura; + + seq_printf(m, "W0: Pool addr\t\t%llx\n", aura->pool_addr); + + seq_printf(m, "W1: ena\t\t\t%d\nW1: pool caching\t%d\n", + aura->ena, aura->pool_caching); + seq_printf(m, "W1: avg con\t\t%d\n", aura->avg_con); + seq_printf(m, "W1: pool drop ena\t%d\nW1: aura drop ena\t%d\n", + aura->pool_drop_ena, aura->aura_drop_ena); + seq_printf(m, "W1: bp_ena\t\t%d\nW1: aura drop\t\t%d\n", + aura->bp_ena, aura->aura_drop); + seq_printf(m, "W1: aura shift\t\t%d\nW1: avg_level\t\t%d\n", + aura->shift, aura->avg_level); + + seq_printf(m, "W2: count\t\t%llu\nW2: nix_bpid\t\t%d\n", + (u64)aura->count, aura->bpid); + + seq_printf(m, "W3: limit\t\t%llu\nW3: bp\t\t\t%d\nW3: fc_ena\t\t%d\n", + (u64)aura->limit, aura->bp, aura->fc_ena); + + seq_printf(m, "W3: fc_up_crossing\t%d\nW3: fc_stype\t\t%d\n", + aura->fc_up_crossing, aura->fc_stype); + seq_printf(m, "W3: fc_hyst_bits\t%d\n", aura->fc_hyst_bits); + + seq_printf(m, "W4: fc_addr\t\t%llx\n", aura->fc_addr); + + seq_printf(m, "W5: pool_drop\t\t%d\nW5: update_time\t\t%d\n", + aura->pool_drop, aura->update_time); + seq_printf(m, "W5: err_int \t\t%d\nW5: err_int_ena\t\t%d\n", + aura->err_int, aura->err_int_ena); + seq_printf(m, "W5: thresh_int\t\t%d\nW5: thresh_int_ena \t%d\n", + aura->thresh_int, aura->thresh_int_ena); + seq_printf(m, "W5: thresh_up\t\t%d\nW5: thresh_qint_idx\t%d\n", + aura->thresh_up, aura->thresh_qint_idx); + seq_printf(m, "W5: err_qint_idx \t%d\n", aura->err_qint_idx); + + seq_printf(m, "W6: thresh\t\t%llu\n", (u64)aura->thresh); + seq_printf(m, "W6: fc_msh_dst\t\t%d\n", aura->fc_msh_dst); +} + +void print_npa_cn20k_pool_ctx(struct seq_file *m, + struct npa_cn20k_aq_enq_rsp *rsp) +{ + struct npa_cn20k_pool_s *pool = &rsp->pool; + + seq_printf(m, "W0: Stack base\t\t%llx\n", pool->stack_base); + + seq_printf(m, "W1: ena \t\t%d\nW1: nat_align \t\t%d\n", + pool->ena, pool->nat_align); + seq_printf(m, "W1: stack_caching\t%d\n", + pool->stack_caching); + seq_printf(m, "W1: buf_offset\t\t%d\nW1: buf_size\t\t%d\n", + pool->buf_offset, pool->buf_size); + + seq_printf(m, "W2: stack_max_pages \t%d\nW2: stack_pages\t\t%d\n", + pool->stack_max_pages, pool->stack_pages); + + seq_printf(m, "W4: stack_offset\t%d\nW4: shift\t\t%d\nW4: avg_level\t\t%d\n", + pool->stack_offset, pool->shift, pool->avg_level); + seq_printf(m, "W4: avg_con \t\t%d\nW4: fc_ena\t\t%d\nW4: fc_stype\t\t%d\n", + pool->avg_con, pool->fc_ena, pool->fc_stype); + seq_printf(m, "W4: fc_hyst_bits\t%d\nW4: fc_up_crossing\t%d\n", + pool->fc_hyst_bits, pool->fc_up_crossing); + seq_printf(m, "W4: update_time\t\t%d\n", pool->update_time); + + seq_printf(m, "W5: fc_addr\t\t%llx\n", pool->fc_addr); + + seq_printf(m, "W6: ptr_start\t\t%llx\n", pool->ptr_start); + + seq_printf(m, "W7: ptr_end\t\t%llx\n", pool->ptr_end); + + seq_printf(m, "W8: err_int\t\t%d\nW8: err_int_ena\t\t%d\n", + pool->err_int, pool->err_int_ena); + seq_printf(m, "W8: thresh_int\t\t%d\n", pool->thresh_int); + seq_printf(m, "W8: thresh_int_ena\t%d\nW8: thresh_up\t\t%d\n", + pool->thresh_int_ena, pool->thresh_up); + seq_printf(m, "W8: thresh_qint_idx\t%d\nW8: err_qint_idx\t%d\n", + pool->thresh_qint_idx, pool->err_qint_idx); + seq_printf(m, "W8: fc_msh_dst\t\t%d\n", pool->fc_msh_dst); +} diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.h b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.h new file mode 100644 index 000000000000..a2e3a2cd6edb --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Marvell OcteonTx2 CGX driver + * + * Copyright (C) 2024 Marvell. + * + */ + +#ifndef DEBUFS_H +#define DEBUFS_H + +#include <linux/fs.h> +#include <linux/debugfs.h> +#include <linux/module.h> +#include <linux/pci.h> + +#include "struct.h" +#include "../mbox.h" + +void print_nix_cn20k_sq_ctx(struct seq_file *m, + struct nix_cn20k_sq_ctx_s *sq_ctx); +void print_nix_cn20k_cq_ctx(struct seq_file *m, + struct nix_cn20k_aq_enq_rsp *rsp); +void print_npa_cn20k_aura_ctx(struct seq_file *m, + struct npa_cn20k_aq_enq_rsp *rsp); +void print_npa_cn20k_pool_ctx(struct seq_file *m, + struct npa_cn20k_aq_enq_rsp *rsp); + +#endif diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/nix.c b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/nix.c new file mode 100644 index 000000000000..aa2016fd1bba --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/nix.c @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell RVU Admin Function driver + * + * Copyright (C) 2024 Marvell. + * + */ + +#include <linux/module.h> +#include <linux/pci.h> + +#include "struct.h" +#include "../rvu.h" + +int rvu_mbox_handler_nix_cn20k_aq_enq(struct rvu *rvu, + struct nix_cn20k_aq_enq_req *req, + struct nix_cn20k_aq_enq_rsp *rsp) +{ + return rvu_nix_aq_enq_inst(rvu, (struct nix_aq_enq_req *)req, + (struct nix_aq_enq_rsp *)rsp); +} diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npa.c b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npa.c new file mode 100644 index 000000000000..fe8f926c8b75 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npa.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell RVU Admin Function driver + * + * Copyright (C) 2024 Marvell. + * + */ + +#include <linux/module.h> +#include <linux/pci.h> + +#include "struct.h" +#include "../rvu.h" + +int rvu_mbox_handler_npa_cn20k_aq_enq(struct rvu *rvu, + struct npa_cn20k_aq_enq_req *req, + struct npa_cn20k_aq_enq_rsp *rsp) +{ + return rvu_npa_aq_enq_inst(rvu, (struct npa_aq_enq_req *)req, + (struct npa_aq_enq_rsp *)rsp); +} +EXPORT_SYMBOL(rvu_mbox_handler_npa_cn20k_aq_enq); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/struct.h b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/struct.h index 76ce3ec6da9c..763f6cabd7c2 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/struct.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/struct.h @@ -8,6 +8,8 @@ #ifndef STRUCT_H #define STRUCT_H +#define NIX_MAX_CTX_SIZE 128 + /* * CN20k RVU PF MBOX Interrupt Vector Enumeration * @@ -37,4 +39,342 @@ enum rvu_af_cn20k_int_vec_e { RVU_AF_CN20K_INT_VEC_PFAF1_MBOX1 = 0x9, RVU_AF_CN20K_INT_VEC_CNT = 0xa, }; + +struct nix_cn20k_sq_ctx_s { + u64 ena : 1; /* W0 */ + u64 qint_idx : 6; + u64 substream : 20; + u64 sdp_mcast : 1; + u64 cq : 20; + u64 sqe_way_mask : 16; + u64 smq : 11; /* W1 */ + u64 cq_ena : 1; + u64 xoff : 1; + u64 sso_ena : 1; + u64 smq_rr_weight : 14; + u64 default_chan : 12; + u64 sqb_count : 16; + u64 reserved_120_120 : 1; + u64 smq_rr_count_lb : 7; + u64 smq_rr_count_ub : 25; /* W2 */ + u64 sqb_aura : 20; + u64 sq_int : 8; + u64 sq_int_ena : 8; + u64 sqe_stype : 2; + u64 reserved_191_191 : 1; + u64 max_sqe_size : 2; /* W3 */ + u64 cq_limit : 8; + u64 lmt_dis : 1; + u64 mnq_dis : 1; + u64 smq_next_sq : 20; + u64 smq_lso_segnum : 8; + u64 tail_offset : 6; + u64 smenq_offset : 6; + u64 head_offset : 6; + u64 smenq_next_sqb_vld : 1; + u64 smq_pend : 1; + u64 smq_next_sq_vld : 1; + u64 reserved_253_255 : 3; + u64 next_sqb : 64; /* W4 */ + u64 tail_sqb : 64; /* W5 */ + u64 smenq_sqb : 64; /* W6 */ + u64 smenq_next_sqb : 64; /* W7 */ + u64 head_sqb : 64; /* W8 */ + u64 reserved_576_583 : 8; /* W9 */ + u64 vfi_lso_total : 18; + u64 vfi_lso_sizem1 : 3; + u64 vfi_lso_sb : 8; + u64 vfi_lso_mps : 14; + u64 vfi_lso_vlan0_ins_ena : 1; + u64 vfi_lso_vlan1_ins_ena : 1; + u64 vfi_lso_vld : 1; + u64 reserved_630_639 : 10; + u64 scm_lso_rem : 18; /* W10 */ + u64 reserved_658_703 : 46; + u64 octs : 48; /* W11 */ + u64 reserved_752_767 : 16; + u64 pkts : 48; /* W12 */ + u64 reserved_816_831 : 16; + u64 aged_drop_octs : 32; /* W13 */ + u64 aged_drop_pkts : 32; + u64 dropped_octs : 48; /* W14 */ + u64 reserved_944_959 : 16; + u64 dropped_pkts : 48; /* W15 */ + u64 reserved_1008_1023 : 16; +}; + +static_assert(sizeof(struct nix_cn20k_sq_ctx_s) == NIX_MAX_CTX_SIZE); + +struct nix_cn20k_cq_ctx_s { + u64 base : 64; /* W0 */ + u64 lbp_ena : 1; /* W1 */ + u64 lbpid_low : 3; + u64 bp_ena : 1; + u64 lbpid_med : 3; + u64 bpid : 9; + u64 lbpid_high : 3; + u64 qint_idx : 7; + u64 cq_err : 1; + u64 cint_idx : 7; + u64 avg_con : 9; + u64 wrptr : 20; + u64 tail : 20; /* W2 */ + u64 head : 20; + u64 avg_level : 8; + u64 update_time : 16; + u64 bp : 8; /* W3 */ + u64 drop : 8; + u64 drop_ena : 1; + u64 ena : 1; + u64 cpt_drop_err_en : 1; + u64 reserved_211_211 : 1; + u64 msh_dst : 11; + u64 msh_valid : 1; + u64 stash_thresh : 4; + u64 lbp_frac : 4; + u64 caching : 1; + u64 stashing : 1; + u64 reserved_234_235 : 2; + u64 qsize : 4; + u64 cq_err_int : 8; + u64 cq_err_int_ena : 8; + u64 bpid_ext : 2; /* W4 */ + u64 reserved_258_259 : 2; + u64 lbpid_ext : 2; + u64 reserved_262_319 : 58; + u64 reserved_320_383 : 64; /* W5 */ + u64 reserved_384_447 : 64; /* W6 */ + u64 reserved_448_511 : 64; /* W7 */ + u64 padding[8]; +}; + +static_assert(sizeof(struct nix_cn20k_sq_ctx_s) == NIX_MAX_CTX_SIZE); + +struct nix_cn20k_rq_ctx_s { + u64 ena : 1; + u64 sso_ena : 1; + u64 ipsech_ena : 1; + u64 ena_wqwd : 1; + u64 cq : 20; + u64 reserved_24_34 : 11; + u64 port_il4_dis : 1; + u64 port_ol4_dis : 1; + u64 lenerr_dis : 1; + u64 csum_il4_dis : 1; + u64 csum_ol4_dis : 1; + u64 len_il4_dis : 1; + u64 len_il3_dis : 1; + u64 len_ol4_dis : 1; + u64 len_ol3_dis : 1; + u64 wqe_aura : 20; + u64 spb_aura : 20; + u64 lpb_aura : 20; + u64 sso_grp : 10; + u64 sso_tt : 2; + u64 pb_caching : 2; + u64 wqe_caching : 1; + u64 xqe_drop_ena : 1; + u64 spb_drop_ena : 1; + u64 lpb_drop_ena : 1; + u64 pb_stashing : 1; + u64 ipsecd_drop_en : 1; + u64 chi_ena : 1; + u64 reserved_125_127 : 3; + u64 band_prof_id_l : 10; + u64 sso_fc_ena : 1; + u64 policer_ena : 1; + u64 spb_sizem1 : 6; + u64 wqe_skip : 2; + u64 spb_high_sizem1 : 3; + u64 spb_ena : 1; + u64 lpb_sizem1 : 12; + u64 first_skip : 7; + u64 reserved_171_171 : 1; + u64 later_skip : 6; + u64 xqe_imm_size : 6; + u64 band_prof_id_h : 4; + u64 reserved_188_189 : 2; + u64 xqe_imm_copy : 1; + u64 xqe_hdr_split : 1; + u64 xqe_drop : 8; + u64 xqe_pass : 8; + u64 wqe_pool_drop : 8; + u64 wqe_pool_pass : 8; + u64 spb_aura_drop : 8; + u64 spb_aura_pass : 8; + u64 spb_pool_drop : 8; + u64 spb_pool_pass : 8; + u64 lpb_aura_drop : 8; + u64 lpb_aura_pass : 8; + u64 lpb_pool_drop : 8; + u64 lpb_pool_pass : 8; + u64 reserved_288_291 : 4; + u64 rq_int : 8; + u64 rq_int_ena : 8; + u64 qint_idx : 7; + u64 reserved_315_319 : 5; + u64 ltag : 24; + u64 good_utag : 8; + u64 bad_utag : 8; + u64 flow_tagw : 6; + u64 ipsec_vwqe : 1; + u64 vwqe_ena : 1; + u64 vtime_wait : 8; + u64 max_vsize_exp : 4; + u64 vwqe_skip : 2; + u64 reserved_382_383 : 2; + u64 octs : 48; + u64 reserved_432_447 : 16; + u64 pkts : 48; + u64 reserved_496_511 : 16; + u64 drop_octs : 48; + u64 reserved_560_575 : 16; + u64 drop_pkts : 48; + u64 reserved_624_639 : 16; + u64 re_pkts : 48; + u64 reserved_688_703 : 16; + u64 reserved_704_767 : 64; + u64 reserved_768_831 : 64; + u64 reserved_832_895 : 64; + u64 reserved_896_959 : 64; + u64 reserved_960_1023 : 64; +}; + +static_assert(sizeof(struct nix_cn20k_rq_ctx_s) == NIX_MAX_CTX_SIZE); + +struct npa_cn20k_aura_s { + u64 pool_addr; /* W0 */ + u64 ena : 1; /* W1 */ + u64 reserved_65 : 2; + u64 pool_caching : 1; + u64 reserved_68 : 16; + u64 avg_con : 9; + u64 reserved_93 : 1; + u64 pool_drop_ena : 1; + u64 aura_drop_ena : 1; + u64 bp_ena : 1; + u64 reserved_97_103 : 7; + u64 aura_drop : 8; + u64 shift : 6; + u64 reserved_118_119 : 2; + u64 avg_level : 8; + u64 count : 36; /* W2 */ + u64 reserved_164_167 : 4; + u64 bpid : 12; + u64 reserved_180_191 : 12; + u64 limit : 36; /* W3 */ + u64 reserved_228_231 : 4; + u64 bp : 7; + u64 reserved_239_243 : 5; + u64 fc_ena : 1; + u64 fc_up_crossing : 1; + u64 fc_stype : 2; + u64 fc_hyst_bits : 4; + u64 reserved_252_255 : 4; + u64 fc_addr; /* W4 */ + u64 pool_drop : 8; /* W5 */ + u64 update_time : 16; + u64 err_int : 8; + u64 err_int_ena : 8; + u64 thresh_int : 1; + u64 thresh_int_ena : 1; + u64 thresh_up : 1; + u64 reserved_363 : 1; + u64 thresh_qint_idx : 7; + u64 reserved_371 : 1; + u64 err_qint_idx : 7; + u64 reserved_379_383 : 5; + u64 thresh : 36; /* W6*/ + u64 rsvd_423_420 : 4; + u64 fc_msh_dst : 11; + u64 reserved_435_438 : 4; + u64 op_dpc_ena : 1; + u64 op_dpc_set : 5; + u64 reserved_445_445 : 1; + u64 stream_ctx : 1; + u64 unified_ctx : 1; + u64 reserved_448_511; /* W7 */ + u64 padding[8]; +}; + +static_assert(sizeof(struct npa_cn20k_aura_s) == NIX_MAX_CTX_SIZE); + +struct npa_cn20k_pool_s { + u64 stack_base; /* W0 */ + u64 ena : 1; + u64 nat_align : 1; + u64 reserved_66_67 : 2; + u64 stack_caching : 1; + u64 reserved_69_87 : 19; + u64 buf_offset : 12; + u64 reserved_100_103 : 4; + u64 buf_size : 12; + u64 reserved_116_119 : 4; + u64 ref_cnt_prof : 3; + u64 reserved_123_127 : 5; + u64 stack_max_pages : 32; + u64 stack_pages : 32; + uint64_t bp_0 : 7; + uint64_t bp_1 : 7; + uint64_t bp_2 : 7; + uint64_t bp_3 : 7; + uint64_t bp_4 : 7; + uint64_t bp_5 : 7; + uint64_t bp_6 : 7; + uint64_t bp_7 : 7; + uint64_t bp_ena_0 : 1; + uint64_t bp_ena_1 : 1; + uint64_t bp_ena_2 : 1; + uint64_t bp_ena_3 : 1; + uint64_t bp_ena_4 : 1; + uint64_t bp_ena_5 : 1; + uint64_t bp_ena_6 : 1; + uint64_t bp_ena_7 : 1; + u64 stack_offset : 4; + u64 reserved_260_263 : 4; + u64 shift : 6; + u64 reserved_270_271 : 2; + u64 avg_level : 8; + u64 avg_con : 9; + u64 fc_ena : 1; + u64 fc_stype : 2; + u64 fc_hyst_bits : 4; + u64 fc_up_crossing : 1; + u64 reserved_297_299 : 3; + u64 update_time : 16; + u64 reserved_316_319 : 4; + u64 fc_addr; /* W5 */ + u64 ptr_start; /* W6 */ + u64 ptr_end; /* W7 */ + u64 bpid_0 : 12; + u64 reserved_524_535 : 12; + u64 err_int : 8; + u64 err_int_ena : 8; + u64 thresh_int : 1; + u64 thresh_int_ena : 1; + u64 thresh_up : 1; + u64 reserved_555 : 1; + u64 thresh_qint_idx : 7; + u64 reserved_563 : 1; + u64 err_qint_idx : 7; + u64 reserved_571_575 : 5; + u64 thresh : 36; + u64 rsvd_612_615 : 4; + u64 fc_msh_dst : 11; + u64 reserved_627_630 : 4; + u64 op_dpc_ena : 1; + u64 op_dpc_set : 5; + u64 reserved_637_637 : 1; + u64 stream_ctx : 1; + u64 reserved_639 : 1; + u64 reserved_640_703; /* W10 */ + u64 reserved_704_767; /* W11 */ + u64 reserved_768_831; /* W12 */ + u64 reserved_832_895; /* W13 */ + u64 reserved_896_959; /* W14 */ + u64 reserved_960_1023; /* W15 */ +}; + +static_assert(sizeof(struct npa_cn20k_pool_s) == NIX_MAX_CTX_SIZE); + #endif diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 933073cd2280..a3e273126e4e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -203,6 +203,8 @@ M(NPA_LF_ALLOC, 0x400, npa_lf_alloc, \ M(NPA_LF_FREE, 0x401, npa_lf_free, msg_req, msg_rsp) \ M(NPA_AQ_ENQ, 0x402, npa_aq_enq, npa_aq_enq_req, npa_aq_enq_rsp) \ M(NPA_HWCTX_DISABLE, 0x403, npa_hwctx_disable, hwctx_disable_req, msg_rsp)\ +M(NPA_CN20K_AQ_ENQ, 0x404, npa_cn20k_aq_enq, npa_cn20k_aq_enq_req, \ + npa_cn20k_aq_enq_rsp) \ /* SSO/SSOW mbox IDs (range 0x600 - 0x7FF) */ \ /* TIM mbox IDs (range 0x800 - 0x9FF) */ \ /* CPT mbox IDs (range 0xA00 - 0xBFF) */ \ @@ -336,6 +338,8 @@ M(NIX_MCAST_GRP_UPDATE, 0x802d, nix_mcast_grp_update, \ nix_mcast_grp_update_req, \ nix_mcast_grp_update_rsp) \ M(NIX_LF_STATS, 0x802e, nix_lf_stats, nix_stats_req, nix_stats_rsp) \ +M(NIX_CN20K_AQ_ENQ, 0x802f, nix_cn20k_aq_enq, nix_cn20k_aq_enq_req, \ + nix_cn20k_aq_enq_rsp) \ /* MCS mbox IDs (range 0xA000 - 0xBFFF) */ \ M(MCS_ALLOC_RESOURCES, 0xa000, mcs_alloc_resources, mcs_alloc_rsrc_req, \ mcs_alloc_rsrc_rsp) \ @@ -832,6 +836,39 @@ struct npa_aq_enq_rsp { }; }; +struct npa_cn20k_aq_enq_req { + struct mbox_msghdr hdr; + u32 aura_id; + u8 ctype; + u8 op; + union { + /* Valid when op == WRITE/INIT and ctype == AURA. + * LF fills the pool_id in aura.pool_addr. AF will translate + * the pool_id to pool context pointer. + */ + struct npa_cn20k_aura_s aura; + /* Valid when op == WRITE/INIT and ctype == POOL */ + struct npa_cn20k_pool_s pool; + }; + /* Mask data when op == WRITE (1=write, 0=don't write) */ + union { + /* Valid when op == WRITE and ctype == AURA */ + struct npa_cn20k_aura_s aura_mask; + /* Valid when op == WRITE and ctype == POOL */ + struct npa_cn20k_pool_s pool_mask; + }; +}; + +struct npa_cn20k_aq_enq_rsp { + struct mbox_msghdr hdr; + union { + /* Valid when op == READ and ctype == AURA */ + struct npa_cn20k_aura_s aura; + /* Valid when op == READ and ctype == POOL */ + struct npa_cn20k_pool_s pool; + }; +}; + /* Disable all contexts of type 'ctype' */ struct hwctx_disable_req { struct mbox_msghdr hdr; @@ -940,6 +977,42 @@ struct nix_lf_free_req { u64 flags; }; +/* CN20K NIX AQ enqueue msg */ +struct nix_cn20k_aq_enq_req { + struct mbox_msghdr hdr; + u32 qidx; + u8 ctype; + u8 op; + union { + struct nix_cn20k_rq_ctx_s rq; + struct nix_cn20k_sq_ctx_s sq; + struct nix_cn20k_cq_ctx_s cq; + struct nix_rsse_s rss; + struct nix_rx_mce_s mce; + struct nix_bandprof_s prof; + }; + union { + struct nix_cn20k_rq_ctx_s rq_mask; + struct nix_cn20k_sq_ctx_s sq_mask; + struct nix_cn20k_cq_ctx_s cq_mask; + struct nix_rsse_s rss_mask; + struct nix_rx_mce_s mce_mask; + struct nix_bandprof_s prof_mask; + }; +}; + +struct nix_cn20k_aq_enq_rsp { + struct mbox_msghdr hdr; + union { + struct nix_cn20k_rq_ctx_s rq; + struct nix_cn20k_sq_ctx_s sq; + struct nix_cn20k_cq_ctx_s cq; + struct nix_rsse_s rss; + struct nix_rx_mce_s mce; + struct nix_bandprof_s prof; + }; +}; + /* CN10K NIX AQ enqueue msg */ struct nix_cn10k_aq_enq_req { struct mbox_msghdr hdr; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index b58283341923..e85dac2c806d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -498,6 +498,14 @@ struct channel_fwdata { u8 reserved[RVU_CHANL_INFO_RESERVED]; }; +struct altaf_intr_notify { + unsigned long flr_pf_bmap[2]; + unsigned long flr_vf_bmap[2]; + unsigned long gint_paddr; + unsigned long gint_iova_addr; + unsigned long reserved[6]; +}; + struct rvu_fwdata { #define RVU_FWDATA_HEADER_MAGIC 0xCFDA /* Custom Firmware Data*/ #define RVU_FWDATA_VERSION 0x0001 @@ -517,7 +525,8 @@ struct rvu_fwdata { u32 ptp_ext_clk_rate; u32 ptp_ext_tstamp; struct channel_fwdata channel_data; -#define FWDATA_RESERVED_MEM 958 + struct altaf_intr_notify altaf_intr_info; +#define FWDATA_RESERVED_MEM 946 u64 reserved[FWDATA_RESERVED_MEM]; #define CGX_MAX 9 #define CGX_LMACS_MAX 4 @@ -648,6 +657,7 @@ struct rvu { struct mutex mbox_lock; /* Serialize mbox up and down msgs */ u16 rep_pcifunc; + bool altaf_ready; int rep_cnt; u16 *rep2pfvf_map; u8 rep_mode; @@ -1032,6 +1042,9 @@ void rvu_nix_flr_free_bpids(struct rvu *rvu, u16 pcifunc); int rvu_alloc_cint_qint_mem(struct rvu *rvu, struct rvu_pfvf *pfvf, int blkaddr, int nixlf); void rvu_block_bcast_xon(struct rvu *rvu, int blkaddr); +int rvu_nix_aq_enq_inst(struct rvu *rvu, struct nix_aq_enq_req *req, + struct nix_aq_enq_rsp *rsp); + /* NPC APIs */ void rvu_npc_freemem(struct rvu *rvu); int rvu_npc_get_pkind(struct rvu *rvu, u16 pf); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index 8375f18c8e07..7370812ece2a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -21,6 +21,8 @@ #include "rvu_npc_hash.h" #include "mcs.h" +#include "cn20k/debugfs.h" + #define DEBUGFS_DIR_NAME "octeontx2" enum { @@ -1101,6 +1103,11 @@ static void print_npa_aura_ctx(struct seq_file *m, struct npa_aq_enq_rsp *rsp) struct npa_aura_s *aura = &rsp->aura; struct rvu *rvu = m->private; + if (is_cn20k(rvu->pdev)) { + print_npa_cn20k_aura_ctx(m, (struct npa_cn20k_aq_enq_rsp *)rsp); + return; + } + seq_printf(m, "W0: Pool addr\t\t%llx\n", aura->pool_addr); seq_printf(m, "W1: ena\t\t\t%d\nW1: pool caching\t%d\n", @@ -1149,6 +1156,11 @@ static void print_npa_pool_ctx(struct seq_file *m, struct npa_aq_enq_rsp *rsp) struct npa_pool_s *pool = &rsp->pool; struct rvu *rvu = m->private; + if (is_cn20k(rvu->pdev)) { + print_npa_cn20k_pool_ctx(m, (struct npa_cn20k_aq_enq_rsp *)rsp); + return; + } + seq_printf(m, "W0: Stack base\t\t%llx\n", pool->stack_base); seq_printf(m, "W1: ena \t\t%d\nW1: nat_align \t\t%d\n", @@ -2009,10 +2021,16 @@ static void print_nix_sq_ctx(struct seq_file *m, struct nix_aq_enq_rsp *rsp) struct nix_hw *nix_hw = m->private; struct rvu *rvu = nix_hw->rvu; + if (is_cn20k(rvu->pdev)) { + print_nix_cn20k_sq_ctx(m, (struct nix_cn20k_sq_ctx_s *)sq_ctx); + return; + } + if (!is_rvu_otx2(rvu)) { print_nix_cn10k_sq_ctx(m, (struct nix_cn10k_sq_ctx_s *)sq_ctx); return; } + seq_printf(m, "W0: sqe_way_mask \t\t%d\nW0: cq \t\t\t\t%d\n", sq_ctx->sqe_way_mask, sq_ctx->cq); seq_printf(m, "W0: sdp_mcast \t\t\t%d\nW0: substream \t\t\t0x%03x\n", @@ -2103,7 +2121,9 @@ static void print_nix_cn10k_rq_ctx(struct seq_file *m, seq_printf(m, "W1: ipsecd_drop_ena \t\t%d\nW1: chi_ena \t\t\t%d\n\n", rq_ctx->ipsecd_drop_ena, rq_ctx->chi_ena); - seq_printf(m, "W2: band_prof_id \t\t%d\n", rq_ctx->band_prof_id); + seq_printf(m, "W2: band_prof_id \t\t%d\n", + (u16)rq_ctx->band_prof_id_h << 10 | rq_ctx->band_prof_id); + seq_printf(m, "W2: policer_ena \t\t%d\n", rq_ctx->policer_ena); seq_printf(m, "W2: spb_sizem1 \t\t\t%d\n", rq_ctx->spb_sizem1); seq_printf(m, "W2: wqe_skip \t\t\t%d\nW2: sqb_ena \t\t\t%d\n", @@ -2225,6 +2245,11 @@ static void print_nix_cq_ctx(struct seq_file *m, struct nix_aq_enq_rsp *rsp) struct nix_hw *nix_hw = m->private; struct rvu *rvu = nix_hw->rvu; + if (is_cn20k(rvu->pdev)) { + print_nix_cn20k_cq_ctx(m, (struct nix_cn20k_aq_enq_rsp *)rsp); + return; + } + seq_printf(m, "W0: base \t\t\t%llx\n\n", cq_ctx->base); seq_printf(m, "W1: wrptr \t\t\t%llx\n", (u64)cq_ctx->wrptr); @@ -2254,6 +2279,7 @@ static void print_nix_cq_ctx(struct seq_file *m, struct nix_aq_enq_rsp *rsp) cq_ctx->cq_err_int_ena, cq_ctx->cq_err_int); seq_printf(m, "W3: qsize \t\t\t%d\nW3:caching \t\t\t%d\n", cq_ctx->qsize, cq_ctx->caching); + seq_printf(m, "W3: substream \t\t\t0x%03x\nW3: ena \t\t\t%d\n", cq_ctx->substream, cq_ctx->ena); if (!is_rvu_otx2(rvu)) { @@ -2615,7 +2641,10 @@ static void print_band_prof_ctx(struct seq_file *m, (prof->rc_action == 1) ? "DROP" : "RED"; seq_printf(m, "W1: rc_action\t\t%s\n", str); seq_printf(m, "W1: meter_algo\t\t%d\n", prof->meter_algo); - seq_printf(m, "W1: band_prof_id\t%d\n", prof->band_prof_id); + + seq_printf(m, "W1: band_prof_id\t%d\n", + (u16)prof->band_prof_id_h << 7 | prof->band_prof_id); + seq_printf(m, "W1: hl_en\t\t%d\n", prof->hl_en); seq_printf(m, "W2: ts\t\t\t%lld\n", (u64)prof->ts); @@ -2784,6 +2813,9 @@ static void rvu_dbg_npa_init(struct rvu *rvu) &rvu_dbg_npa_aura_ctx_fops); debugfs_create_file("pool_ctx", 0600, rvu->rvu_dbg.npa, rvu, &rvu_dbg_npa_pool_ctx_fops); + + if (is_cn20k(rvu->pdev)) /* NDC not appliable for cn20k */ + return; debugfs_create_file("ndc_cache", 0600, rvu->rvu_dbg.npa, rvu, &rvu_dbg_npa_ndc_cache_fops); debugfs_create_file("ndc_hits_miss", 0600, rvu->rvu_dbg.npa, rvu, @@ -3950,6 +3982,9 @@ static void rvu_dbg_cpt_init(struct rvu *rvu, int blkaddr) static const char *rvu_get_dbg_dir_name(struct rvu *rvu) { + if (is_cn20k(rvu->pdev)) + return "cn20k"; + if (!is_rvu_otx2(rvu)) return "cn10k"; else diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 828316211b24..2f485a930edd 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -1019,6 +1019,12 @@ static void nix_get_aq_req_smq(struct rvu *rvu, struct nix_aq_enq_req *req, { struct nix_cn10k_aq_enq_req *aq_req; + if (is_cn20k(rvu->pdev)) { + *smq = ((struct nix_cn20k_aq_enq_req *)req)->sq.smq; + *smq_mask = ((struct nix_cn20k_aq_enq_req *)req)->sq_mask.smq; + return; + } + if (!is_rvu_otx2(rvu)) { aq_req = (struct nix_cn10k_aq_enq_req *)req; *smq = aq_req->sq.smq; @@ -1149,36 +1155,36 @@ static int rvu_nix_blk_aq_enq_inst(struct rvu *rvu, struct nix_hw *nix_hw, case NIX_AQ_INSTOP_WRITE: if (req->ctype == NIX_AQ_CTYPE_RQ) memcpy(mask, &req->rq_mask, - sizeof(struct nix_rq_ctx_s)); + NIX_MAX_CTX_SIZE); else if (req->ctype == NIX_AQ_CTYPE_SQ) memcpy(mask, &req->sq_mask, - sizeof(struct nix_sq_ctx_s)); + NIX_MAX_CTX_SIZE); else if (req->ctype == NIX_AQ_CTYPE_CQ) memcpy(mask, &req->cq_mask, - sizeof(struct nix_cq_ctx_s)); + NIX_MAX_CTX_SIZE); else if (req->ctype == NIX_AQ_CTYPE_RSS) memcpy(mask, &req->rss_mask, - sizeof(struct nix_rsse_s)); + NIX_MAX_CTX_SIZE); else if (req->ctype == NIX_AQ_CTYPE_MCE) memcpy(mask, &req->mce_mask, - sizeof(struct nix_rx_mce_s)); + NIX_MAX_CTX_SIZE); else if (req->ctype == NIX_AQ_CTYPE_BANDPROF) memcpy(mask, &req->prof_mask, - sizeof(struct nix_bandprof_s)); + NIX_MAX_CTX_SIZE); fallthrough; case NIX_AQ_INSTOP_INIT: if (req->ctype == NIX_AQ_CTYPE_RQ) - memcpy(ctx, &req->rq, sizeof(struct nix_rq_ctx_s)); + memcpy(ctx, &req->rq, NIX_MAX_CTX_SIZE); else if (req->ctype == NIX_AQ_CTYPE_SQ) - memcpy(ctx, &req->sq, sizeof(struct nix_sq_ctx_s)); + memcpy(ctx, &req->sq, NIX_MAX_CTX_SIZE); else if (req->ctype == NIX_AQ_CTYPE_CQ) - memcpy(ctx, &req->cq, sizeof(struct nix_cq_ctx_s)); + memcpy(ctx, &req->cq, NIX_MAX_CTX_SIZE); else if (req->ctype == NIX_AQ_CTYPE_RSS) - memcpy(ctx, &req->rss, sizeof(struct nix_rsse_s)); + memcpy(ctx, &req->rss, NIX_MAX_CTX_SIZE); else if (req->ctype == NIX_AQ_CTYPE_MCE) - memcpy(ctx, &req->mce, sizeof(struct nix_rx_mce_s)); + memcpy(ctx, &req->mce, NIX_MAX_CTX_SIZE); else if (req->ctype == NIX_AQ_CTYPE_BANDPROF) - memcpy(ctx, &req->prof, sizeof(struct nix_bandprof_s)); + memcpy(ctx, &req->prof, NIX_MAX_CTX_SIZE); break; case NIX_AQ_INSTOP_NOP: case NIX_AQ_INSTOP_READ: @@ -1243,22 +1249,22 @@ static int rvu_nix_blk_aq_enq_inst(struct rvu *rvu, struct nix_hw *nix_hw, if (req->op == NIX_AQ_INSTOP_READ) { if (req->ctype == NIX_AQ_CTYPE_RQ) memcpy(&rsp->rq, ctx, - sizeof(struct nix_rq_ctx_s)); + NIX_MAX_CTX_SIZE); else if (req->ctype == NIX_AQ_CTYPE_SQ) memcpy(&rsp->sq, ctx, - sizeof(struct nix_sq_ctx_s)); + NIX_MAX_CTX_SIZE); else if (req->ctype == NIX_AQ_CTYPE_CQ) memcpy(&rsp->cq, ctx, - sizeof(struct nix_cq_ctx_s)); + NIX_MAX_CTX_SIZE); else if (req->ctype == NIX_AQ_CTYPE_RSS) memcpy(&rsp->rss, ctx, - sizeof(struct nix_rsse_s)); + NIX_MAX_CTX_SIZE); else if (req->ctype == NIX_AQ_CTYPE_MCE) memcpy(&rsp->mce, ctx, - sizeof(struct nix_rx_mce_s)); + NIX_MAX_CTX_SIZE); else if (req->ctype == NIX_AQ_CTYPE_BANDPROF) memcpy(&rsp->prof, ctx, - sizeof(struct nix_bandprof_s)); + NIX_MAX_CTX_SIZE); } } @@ -1289,8 +1295,8 @@ static int rvu_nix_verify_aq_ctx(struct rvu *rvu, struct nix_hw *nix_hw, /* Make copy of original context & mask which are required * for resubmission */ - memcpy(&aq_req.cq_mask, &req->cq_mask, sizeof(struct nix_cq_ctx_s)); - memcpy(&aq_req.cq, &req->cq, sizeof(struct nix_cq_ctx_s)); + memcpy(&aq_req.cq_mask, &req->cq_mask, NIX_MAX_CTX_SIZE); + memcpy(&aq_req.cq, &req->cq, NIX_MAX_CTX_SIZE); /* exclude fields which HW can update */ aq_req.cq_mask.cq_err = 0; @@ -1309,7 +1315,7 @@ static int rvu_nix_verify_aq_ctx(struct rvu *rvu, struct nix_hw *nix_hw, * updated fields are masked out for request and response * comparison */ - for (word = 0; word < sizeof(struct nix_cq_ctx_s) / sizeof(u64); + for (word = 0; word < NIX_MAX_CTX_SIZE / sizeof(u64); word++) { *(u64 *)((u8 *)&aq_rsp.cq + word * 8) &= (*(u64 *)((u8 *)&aq_req.cq_mask + word * 8)); @@ -1317,14 +1323,14 @@ static int rvu_nix_verify_aq_ctx(struct rvu *rvu, struct nix_hw *nix_hw, (*(u64 *)((u8 *)&aq_req.cq_mask + word * 8)); } - if (memcmp(&aq_req.cq, &aq_rsp.cq, sizeof(struct nix_cq_ctx_s))) + if (memcmp(&aq_req.cq, &aq_rsp.cq, NIX_MAX_CTX_SIZE)) return NIX_AF_ERR_AQ_CTX_RETRY_WRITE; return 0; } -static int rvu_nix_aq_enq_inst(struct rvu *rvu, struct nix_aq_enq_req *req, - struct nix_aq_enq_rsp *rsp) +int rvu_nix_aq_enq_inst(struct rvu *rvu, struct nix_aq_enq_req *req, + struct nix_aq_enq_rsp *rsp) { struct nix_hw *nix_hw; int err, retries = 5; @@ -5812,6 +5818,8 @@ static void nix_ipolicer_freemem(struct rvu *rvu, struct nix_hw *nix_hw) } } +#define NIX_BW_PROF_HI_MASK GENMASK(10, 7) + static int nix_verify_bandprof(struct nix_cn10k_aq_enq_req *req, struct nix_hw *nix_hw, u16 pcifunc) { @@ -5850,7 +5858,8 @@ static int nix_verify_bandprof(struct nix_cn10k_aq_enq_req *req, return -EINVAL; ipolicer = &nix_hw->ipolicer[hi_layer]; - prof_idx = req->prof.band_prof_id; + prof_idx = FIELD_PREP(NIX_BW_PROF_HI_MASK, req->prof.band_prof_id_h); + prof_idx |= req->prof.band_prof_id; if (prof_idx >= ipolicer->band_prof.max || ipolicer->pfvf_map[prof_idx] != pcifunc) return -EINVAL; @@ -6015,8 +6024,10 @@ static int nix_ipolicer_map_leaf_midprofs(struct rvu *rvu, aq_req->op = NIX_AQ_INSTOP_WRITE; aq_req->qidx = leaf_prof; - aq_req->prof.band_prof_id = mid_prof; + aq_req->prof.band_prof_id = mid_prof & 0x7F; aq_req->prof_mask.band_prof_id = GENMASK(6, 0); + aq_req->prof.band_prof_id_h = FIELD_GET(NIX_BW_PROF_HI_MASK, mid_prof); + aq_req->prof_mask.band_prof_id_h = GENMASK(3, 0); aq_req->prof.hl_en = 1; aq_req->prof_mask.hl_en = 1; @@ -6025,6 +6036,8 @@ static int nix_ipolicer_map_leaf_midprofs(struct rvu *rvu, (struct nix_aq_enq_rsp *)aq_rsp); } +#define NIX_RQ_PROF_HI_MASK GENMASK(13, 10) + int rvu_nix_setup_ratelimit_aggr(struct rvu *rvu, u16 pcifunc, u16 rq_idx, u16 match_id) { @@ -6056,7 +6069,8 @@ int rvu_nix_setup_ratelimit_aggr(struct rvu *rvu, u16 pcifunc, return 0; /* Get the bandwidth profile ID mapped to this RQ */ - leaf_prof = aq_rsp.rq.band_prof_id; + leaf_prof = FIELD_PREP(NIX_RQ_PROF_HI_MASK, aq_rsp.rq.band_prof_id_h); + leaf_prof |= aq_rsp.rq.band_prof_id; ipolicer = &nix_hw->ipolicer[BAND_PROF_LEAF_LAYER]; ipolicer->match_id[leaf_prof] = match_id; @@ -6094,7 +6108,10 @@ int rvu_nix_setup_ratelimit_aggr(struct rvu *rvu, u16 pcifunc, * to different RQs and marked with same match_id * are rate limited in a aggregate fashion */ - mid_prof = aq_rsp.prof.band_prof_id; + mid_prof = FIELD_PREP(NIX_BW_PROF_HI_MASK, + aq_rsp.prof.band_prof_id_h); + mid_prof |= aq_rsp.prof.band_prof_id; + rc = nix_ipolicer_map_leaf_midprofs(rvu, nix_hw, &aq_req, &aq_rsp, leaf_prof, mid_prof); @@ -6216,7 +6233,8 @@ static void nix_clear_ratelimit_aggr(struct rvu *rvu, struct nix_hw *nix_hw, if (!aq_rsp.prof.hl_en) return; - mid_prof = aq_rsp.prof.band_prof_id; + mid_prof = FIELD_PREP(NIX_BW_PROF_HI_MASK, aq_rsp.prof.band_prof_id_h); + mid_prof |= aq_rsp.prof.band_prof_id; ipolicer = &nix_hw->ipolicer[BAND_PROF_MID_LAYER]; ipolicer->ref_count[mid_prof]--; /* If ref_count is zero, free mid layer profile */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c index 4f5ca5ab13a4..e2a33e46b48a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c @@ -464,6 +464,23 @@ int rvu_mbox_handler_npa_lf_free(struct rvu *rvu, struct msg_req *req, return 0; } +static void npa_aq_ndc_config(struct rvu *rvu, struct rvu_block *block) +{ + u64 cfg; + + if (is_cn20k(rvu->pdev)) /* NDC not applicable to cn20k */ + return; + + /* Do not bypass NDC cache */ + cfg = rvu_read64(rvu, block->addr, NPA_AF_NDC_CFG); + cfg &= ~0x03DULL; +#ifdef CONFIG_NDC_DIS_DYNAMIC_CACHING + /* Disable caching of stack pages */ + cfg |= 0x10ULL; +#endif + rvu_write64(rvu, block->addr, NPA_AF_NDC_CFG, cfg); +} + static int npa_aq_init(struct rvu *rvu, struct rvu_block *block) { u64 cfg; @@ -479,14 +496,7 @@ static int npa_aq_init(struct rvu *rvu, struct rvu_block *block) rvu_write64(rvu, block->addr, NPA_AF_GEN_CFG, cfg); #endif - /* Do not bypass NDC cache */ - cfg = rvu_read64(rvu, block->addr, NPA_AF_NDC_CFG); - cfg &= ~0x03DULL; -#ifdef CONFIG_NDC_DIS_DYNAMIC_CACHING - /* Disable caching of stack pages */ - cfg |= 0x10ULL; -#endif - rvu_write64(rvu, block->addr, NPA_AF_NDC_CFG, cfg); + npa_aq_ndc_config(rvu, block); /* For CN10K NPA BATCH DMA set 35 cache lines */ if (!is_rvu_otx2(rvu)) { @@ -567,6 +577,9 @@ int rvu_ndc_fix_locked_cacheline(struct rvu *rvu, int blkaddr) int bank, max_bank, line, max_line, err; u64 reg, ndc_af_const; + if (is_cn20k(rvu->pdev)) /* NDC not applicable to cn20k */ + return 0; + /* Set the ENABLE bit(63) to '0' */ reg = rvu_read64(rvu, blkaddr, NDC_AF_CAMS_RD_INTERVAL); rvu_write64(rvu, blkaddr, NDC_AF_CAMS_RD_INTERVAL, reg & GENMASK_ULL(62, 0)); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h index 0596a3ac4c12..8e868f815de1 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h @@ -13,6 +13,8 @@ #define RVU_MULTI_BLK_VER 0x7ULL +#define NIX_MAX_CTX_SIZE 128 + /* RVU Block Address Enumeration */ enum rvu_block_addr_e { BLKADDR_RVUM = 0x0ULL, @@ -370,8 +372,12 @@ struct nix_cq_ctx_s { u64 qsize : 4; u64 cq_err_int : 8; u64 cq_err_int_ena : 8; + /* Ensure all context sizes are 128 bytes */ + u64 padding[12]; }; +static_assert(sizeof(struct nix_cq_ctx_s) == NIX_MAX_CTX_SIZE); + /* CN10K NIX Receive queue context structure */ struct nix_cn10k_rq_ctx_s { u64 ena : 1; @@ -413,7 +419,8 @@ struct nix_cn10k_rq_ctx_s { u64 rsvd_171 : 1; u64 later_skip : 6; u64 xqe_imm_size : 6; - u64 rsvd_189_184 : 6; + u64 band_prof_id_h : 4; + u64 rsvd_189_188 : 2; u64 xqe_imm_copy : 1; u64 xqe_hdr_split : 1; u64 xqe_drop : 8; /* W3 */ @@ -460,6 +467,8 @@ struct nix_cn10k_rq_ctx_s { u64 rsvd_1023_960; /* W15 */ }; +static_assert(sizeof(struct nix_cn10k_rq_ctx_s) == NIX_MAX_CTX_SIZE); + /* CN10K NIX Send queue context structure */ struct nix_cn10k_sq_ctx_s { u64 ena : 1; @@ -523,6 +532,8 @@ struct nix_cn10k_sq_ctx_s { u64 rsvd_1023_1008 : 16; }; +static_assert(sizeof(struct nix_cn10k_sq_ctx_s) == NIX_MAX_CTX_SIZE); + /* NIX Receive queue context structure */ struct nix_rq_ctx_s { u64 ena : 1; @@ -594,6 +605,8 @@ struct nix_rq_ctx_s { u64 rsvd_1023_960; /* W15 */ }; +static_assert(sizeof(struct nix_rq_ctx_s) == NIX_MAX_CTX_SIZE); + /* NIX sqe sizes */ enum nix_maxsqesz { NIX_MAXSQESZ_W16 = 0x0, @@ -668,13 +681,18 @@ struct nix_sq_ctx_s { u64 rsvd_1023_1008 : 16; }; +static_assert(sizeof(struct nix_sq_ctx_s) == NIX_MAX_CTX_SIZE); + /* NIX Receive side scaling entry structure*/ struct nix_rsse_s { uint32_t rq : 20; uint32_t reserved_20_31 : 12; - + /* Ensure all context sizes are minimum 128 bytes */ + u64 padding[15]; }; +static_assert(sizeof(struct nix_rsse_s) == NIX_MAX_CTX_SIZE); + /* NIX receive multicast/mirror entry structure */ struct nix_rx_mce_s { uint64_t op : 2; @@ -684,8 +702,12 @@ struct nix_rx_mce_s { uint64_t rsvd_31_24 : 8; uint64_t pf_func : 16; uint64_t next : 16; + /* Ensure all context sizes are minimum 128 bytes */ + u64 padding[15]; }; +static_assert(sizeof(struct nix_rx_mce_s) == NIX_MAX_CTX_SIZE); + enum nix_band_prof_layers { BAND_PROF_LEAF_LAYER = 0, BAND_PROF_INVAL_LAYER = 1, @@ -736,7 +758,8 @@ struct nix_bandprof_s { uint64_t rc_action : 2; uint64_t meter_algo : 2; uint64_t band_prof_id : 7; - uint64_t reserved_111_118 : 8; + uint64_t band_prof_id_h : 4; + uint64_t reserved_115_118 : 4; uint64_t hl_en : 1; uint64_t reserved_120_127 : 8; uint64_t ts : 48; /* W2 */ @@ -769,6 +792,8 @@ struct nix_bandprof_s { uint64_t reserved_1008_1023 : 16; }; +static_assert(sizeof(struct nix_bandprof_s) == NIX_MAX_CTX_SIZE); + enum nix_lsoalg { NIX_LSOALG_NOP, NIX_LSOALG_ADD_SEGNUM, diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c index bec7d5b4d7cc..3e1bf22cba69 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c @@ -15,6 +15,8 @@ static struct dev_hw_ops otx2_hw_ops = { .aura_freeptr = otx2_aura_freeptr, .refill_pool_ptrs = otx2_refill_pool_ptrs, .pfaf_mbox_intr_handler = otx2_pfaf_mbox_intr_handler, + .aura_aq_init = otx2_aura_aq_init, + .pool_aq_init = otx2_pool_aq_init, }; static struct dev_hw_ops cn10k_hw_ops = { @@ -23,6 +25,8 @@ static struct dev_hw_ops cn10k_hw_ops = { .aura_freeptr = cn10k_aura_freeptr, .refill_pool_ptrs = cn10k_refill_pool_ptrs, .pfaf_mbox_intr_handler = otx2_pfaf_mbox_intr_handler, + .aura_aq_init = otx2_aura_aq_init, + .pool_aq_init = otx2_pool_aq_init, }; void otx2_init_hw_ops(struct otx2_nic *pfvf) @@ -337,6 +341,12 @@ int cn10k_map_unmap_rq_policer(struct otx2_nic *pfvf, int rq_idx, aq->rq.band_prof_id = policer; aq->rq_mask.band_prof_id = GENMASK(9, 0); + /* If policer id is greater than 1023 then it implies hardware supports + * more leaf profiles. In that case use band_prof_id_h for 4 MSBs. + */ + aq->rq.band_prof_id_h = policer >> 10; + aq->rq_mask.band_prof_id_h = GENMASK(3, 0); + /* Fill AQ info */ aq->qidx = rq_idx; aq->ctype = NIX_AQ_CTYPE_RQ; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn20k.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn20k.c index ec8cde98076d..a60f8cf53feb 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn20k.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn20k.c @@ -10,17 +10,6 @@ #include "otx2_struct.h" #include "cn10k.h" -static struct dev_hw_ops cn20k_hw_ops = { - .pfaf_mbox_intr_handler = cn20k_pfaf_mbox_intr_handler, - .vfaf_mbox_intr_handler = cn20k_vfaf_mbox_intr_handler, - .pfvf_mbox_intr_handler = cn20k_pfvf_mbox_intr_handler, -}; - -void cn20k_init(struct otx2_nic *pfvf) -{ - pfvf->hw_ops = &cn20k_hw_ops; -} -EXPORT_SYMBOL(cn20k_init); /* CN20K mbox AF => PFx irq handler */ irqreturn_t cn20k_pfaf_mbox_intr_handler(int irq, void *pf_irq) { @@ -250,3 +239,212 @@ int cn20k_register_pfvf_mbox_intr(struct otx2_nic *pf, int numvfs) return 0; } + +#define RQ_BP_LVL_AURA (255 - ((85 * 256) / 100)) /* BP when 85% is full */ + +static u8 cn20k_aura_bpid_idx(struct otx2_nic *pfvf, int aura_id) +{ +#ifdef CONFIG_DCB + return pfvf->queue_to_pfc_map[aura_id]; +#else + return 0; +#endif +} + +static int cn20k_aura_aq_init(struct otx2_nic *pfvf, int aura_id, + int pool_id, int numptrs) +{ + struct npa_cn20k_aq_enq_req *aq; + struct otx2_pool *pool; + u8 bpid_idx; + int err; + + pool = &pfvf->qset.pool[pool_id]; + + /* Allocate memory for HW to update Aura count. + * Alloc one cache line, so that it fits all FC_STYPE modes. + */ + if (!pool->fc_addr) { + err = qmem_alloc(pfvf->dev, &pool->fc_addr, 1, OTX2_ALIGN); + if (err) + return err; + } + + /* Initialize this aura's context via AF */ + aq = otx2_mbox_alloc_msg_npa_cn20k_aq_enq(&pfvf->mbox); + if (!aq) { + /* Shared mbox memory buffer is full, flush it and retry */ + err = otx2_sync_mbox_msg(&pfvf->mbox); + if (err) + return err; + aq = otx2_mbox_alloc_msg_npa_cn20k_aq_enq(&pfvf->mbox); + if (!aq) + return -ENOMEM; + } + + aq->aura_id = aura_id; + + /* Will be filled by AF with correct pool context address */ + aq->aura.pool_addr = pool_id; + aq->aura.pool_caching = 1; + aq->aura.shift = ilog2(numptrs) - 8; + aq->aura.count = numptrs; + aq->aura.limit = numptrs; + aq->aura.avg_level = 255; + aq->aura.ena = 1; + aq->aura.fc_ena = 1; + aq->aura.fc_addr = pool->fc_addr->iova; + aq->aura.fc_hyst_bits = 0; /* Store count on all updates */ + + /* Enable backpressure for RQ aura */ + if (aura_id < pfvf->hw.rqpool_cnt && !is_otx2_lbkvf(pfvf->pdev)) { + aq->aura.bp_ena = 0; + /* If NIX1 LF is attached then specify NIX1_RX. + * + * Below NPA_AURA_S[BP_ENA] is set according to the + * NPA_BPINTF_E enumeration given as: + * 0x0 + a*0x1 where 'a' is 0 for NIX0_RX and 1 for NIX1_RX so + * NIX0_RX is 0x0 + 0*0x1 = 0 + * NIX1_RX is 0x0 + 1*0x1 = 1 + * But in HRM it is given that + * "NPA_AURA_S[BP_ENA](w1[33:32]) - Enable aura backpressure to + * NIX-RX based on [BP] level. One bit per NIX-RX; index + * enumerated by NPA_BPINTF_E." + */ + if (pfvf->nix_blkaddr == BLKADDR_NIX1) + aq->aura.bp_ena = 1; + + bpid_idx = cn20k_aura_bpid_idx(pfvf, aura_id); + aq->aura.bpid = pfvf->bpid[bpid_idx]; + + /* Set backpressure level for RQ's Aura */ + aq->aura.bp = RQ_BP_LVL_AURA; + } + + /* Fill AQ info */ + aq->ctype = NPA_AQ_CTYPE_AURA; + aq->op = NPA_AQ_INSTOP_INIT; + + return 0; +} + +static int cn20k_pool_aq_init(struct otx2_nic *pfvf, u16 pool_id, + int stack_pages, int numptrs, int buf_size, + int type) +{ + struct page_pool_params pp_params = { 0 }; + struct npa_cn20k_aq_enq_req *aq; + struct otx2_pool *pool; + int err, sz; + + pool = &pfvf->qset.pool[pool_id]; + /* Alloc memory for stack which is used to store buffer pointers */ + err = qmem_alloc(pfvf->dev, &pool->stack, + stack_pages, pfvf->hw.stack_pg_bytes); + if (err) + return err; + + pool->rbsize = buf_size; + + /* Initialize this pool's context via AF */ + aq = otx2_mbox_alloc_msg_npa_cn20k_aq_enq(&pfvf->mbox); + if (!aq) { + /* Shared mbox memory buffer is full, flush it and retry */ + err = otx2_sync_mbox_msg(&pfvf->mbox); + if (err) { + qmem_free(pfvf->dev, pool->stack); + return err; + } + aq = otx2_mbox_alloc_msg_npa_cn20k_aq_enq(&pfvf->mbox); + if (!aq) { + qmem_free(pfvf->dev, pool->stack); + return -ENOMEM; + } + } + + aq->aura_id = pool_id; + aq->pool.stack_base = pool->stack->iova; + aq->pool.stack_caching = 1; + aq->pool.ena = 1; + aq->pool.buf_size = buf_size / 128; + aq->pool.stack_max_pages = stack_pages; + aq->pool.shift = ilog2(numptrs) - 8; + aq->pool.ptr_start = 0; + aq->pool.ptr_end = ~0ULL; + + /* Fill AQ info */ + aq->ctype = NPA_AQ_CTYPE_POOL; + aq->op = NPA_AQ_INSTOP_INIT; + + if (type != AURA_NIX_RQ) { + pool->page_pool = NULL; + return 0; + } + + sz = ALIGN(ALIGN(SKB_DATA_ALIGN(buf_size), OTX2_ALIGN), PAGE_SIZE); + pp_params.order = get_order(sz); + pp_params.flags = PP_FLAG_DMA_MAP; + pp_params.pool_size = min(OTX2_PAGE_POOL_SZ, numptrs); + pp_params.nid = NUMA_NO_NODE; + pp_params.dev = pfvf->dev; + pp_params.dma_dir = DMA_FROM_DEVICE; + pool->page_pool = page_pool_create(&pp_params); + if (IS_ERR(pool->page_pool)) { + netdev_err(pfvf->netdev, "Creation of page pool failed\n"); + return PTR_ERR(pool->page_pool); + } + + return 0; +} + +static int cn20k_sq_aq_init(void *dev, u16 qidx, u8 chan_offset, u16 sqb_aura) +{ + struct nix_cn20k_aq_enq_req *aq; + struct otx2_nic *pfvf = dev; + + /* Get memory to put this msg */ + aq = otx2_mbox_alloc_msg_nix_cn20k_aq_enq(&pfvf->mbox); + if (!aq) + return -ENOMEM; + + aq->sq.cq = pfvf->hw.rx_queues + qidx; + aq->sq.max_sqe_size = NIX_MAXSQESZ_W16; /* 128 byte */ + aq->sq.cq_ena = 1; + aq->sq.ena = 1; + aq->sq.smq = otx2_get_smq_idx(pfvf, qidx); + aq->sq.smq_rr_weight = mtu_to_dwrr_weight(pfvf, pfvf->tx_max_pktlen); + aq->sq.default_chan = pfvf->hw.tx_chan_base + chan_offset; + aq->sq.sqe_stype = NIX_STYPE_STF; /* Cache SQB */ + aq->sq.sqb_aura = sqb_aura; + aq->sq.sq_int_ena = NIX_SQINT_BITS; + aq->sq.qint_idx = 0; + /* Due pipelining impact minimum 2000 unused SQ CQE's + * need to maintain to avoid CQ overflow. + */ + aq->sq.cq_limit = (SEND_CQ_SKID * 256) / (pfvf->qset.sqe_cnt); + + /* Fill AQ info */ + aq->qidx = qidx; + aq->ctype = NIX_AQ_CTYPE_SQ; + aq->op = NIX_AQ_INSTOP_INIT; + + return otx2_sync_mbox_msg(&pfvf->mbox); +} + +static struct dev_hw_ops cn20k_hw_ops = { + .pfaf_mbox_intr_handler = cn20k_pfaf_mbox_intr_handler, + .vfaf_mbox_intr_handler = cn20k_vfaf_mbox_intr_handler, + .pfvf_mbox_intr_handler = cn20k_pfvf_mbox_intr_handler, + .sq_aq_init = cn20k_sq_aq_init, + .sqe_flush = cn10k_sqe_flush, + .aura_freeptr = cn10k_aura_freeptr, + .refill_pool_ptrs = cn10k_refill_pool_ptrs, + .aura_aq_init = cn20k_aura_aq_init, + .pool_aq_init = cn20k_pool_aq_init, +}; + +void cn20k_init(struct otx2_nic *pfvf) +{ + pfvf->hw_ops = &cn20k_hw_ops; +} +EXPORT_SYMBOL(cn20k_init); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index aff17c37ddde..3378be87a473 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -1369,6 +1369,13 @@ void otx2_aura_pool_free(struct otx2_nic *pfvf) int otx2_aura_init(struct otx2_nic *pfvf, int aura_id, int pool_id, int numptrs) { + return pfvf->hw_ops->aura_aq_init(pfvf, aura_id, pool_id, + numptrs); +} + +int otx2_aura_aq_init(struct otx2_nic *pfvf, int aura_id, + int pool_id, int numptrs) +{ struct npa_aq_enq_req *aq; struct otx2_pool *pool; int err; @@ -1446,6 +1453,13 @@ int otx2_aura_init(struct otx2_nic *pfvf, int aura_id, int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id, int stack_pages, int numptrs, int buf_size, int type) { + return pfvf->hw_ops->pool_aq_init(pfvf, pool_id, stack_pages, numptrs, + buf_size, type); +} + +int otx2_pool_aq_init(struct otx2_nic *pfvf, u16 pool_id, + int stack_pages, int numptrs, int buf_size, int type) +{ struct page_pool_params pp_params = { 0 }; struct xsk_buff_pool *xsk_pool; struct npa_aq_enq_req *aq; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index ec26d1b6c789..e616a727a3a9 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -14,6 +14,7 @@ #include <linux/net_tstamp.h> #include <linux/ptp_clock_kernel.h> #include <linux/timecounter.h> +#include <linux/soc/marvell/silicons.h> #include <linux/soc/marvell/octeontx2/asm.h> #include <net/macsec.h> #include <net/pkt_cls.h> @@ -375,6 +376,11 @@ struct dev_hw_ops { irqreturn_t (*pfaf_mbox_intr_handler)(int irq, void *pf_irq); irqreturn_t (*vfaf_mbox_intr_handler)(int irq, void *pf_irq); irqreturn_t (*pfvf_mbox_intr_handler)(int irq, void *pf_irq); + int (*aura_aq_init)(struct otx2_nic *pfvf, int aura_id, + int pool_id, int numptrs); + int (*pool_aq_init)(struct otx2_nic *pfvf, u16 pool_id, + int stack_pages, int numptrs, int buf_size, + int type); }; #define CN10K_MCS_SA_PER_SC 4 @@ -1059,6 +1065,10 @@ irqreturn_t otx2_cq_intr_handler(int irq, void *cq_irq); int otx2_rq_init(struct otx2_nic *pfvf, u16 qidx, u16 lpb_aura); int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx); int otx2_set_hw_capabilities(struct otx2_nic *pfvf); +int otx2_aura_aq_init(struct otx2_nic *pfvf, int aura_id, + int pool_id, int numptrs); +int otx2_pool_aq_init(struct otx2_nic *pfvf, u16 pool_id, + int stack_pages, int numptrs, int buf_size, int type); /* RSS configuration APIs*/ int otx2_rss_init(struct otx2_nic *pfvf); diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index edfaa5850527..cc619dbebf9d 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1975,7 +1975,6 @@ out_ptp_stop: out_set_reset: ravb_set_opmode(ndev, CCC_OPC_RESET); out_rpm_put: - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); out_napi_off: if (info->nc_queues) @@ -2404,7 +2403,6 @@ static int ravb_close(struct net_device *ndev) if (error) return error; - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return 0; @@ -3093,7 +3091,6 @@ static int ravb_probe(struct platform_device *pdev) netdev_info(ndev, "Base address at %#x, %pM, IRQ %d.\n", (u32)ndev->base_addr, ndev->dev_addr, ndev->irq); - pm_runtime_mark_last_busy(&pdev->dev); pm_runtime_put_autosuspend(&pdev->dev); return 0; @@ -3277,10 +3274,8 @@ static int ravb_resume(struct device *dev) return 0; out_rpm_put: - if (!priv->wol_enabled) { - pm_runtime_mark_last_busy(dev); + if (!priv->wol_enabled) pm_runtime_put_autosuspend(dev); - } return ret; } diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 553a8897b005..27083af54568 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -26,6 +26,9 @@ #include "hwif.h" #include "mmc.h" +#define DWMAC_SNPSVER GENMASK_U32(7, 0) +#define DWMAC_USERVER GENMASK_U32(15, 8) + /* Synopsys Core versions */ #define DWMAC_CORE_3_40 0x34 #define DWMAC_CORE_3_50 0x35 diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c index 41a7e1841227..8212441f9826 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.c +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c @@ -13,31 +13,42 @@ #include "dwmac4_descs.h" #include "dwxgmac2.h" -static u32 stmmac_get_id(struct stmmac_priv *priv, u32 id_reg) +struct stmmac_version { + u8 snpsver; + u8 dev_id; +}; + +static void stmmac_get_version(struct stmmac_priv *priv, + struct stmmac_version *ver) { - u32 reg = readl(priv->ioaddr + id_reg); + enum dwmac_core_type core_type = priv->plat->core_type; + unsigned int version_offset; + u32 version; - if (!reg) { - dev_info(priv->device, "Version ID not available\n"); - return 0x0; - } + ver->snpsver = 0; + ver->dev_id = 0; - dev_info(priv->device, "User ID: 0x%x, Synopsys ID: 0x%x\n", - (unsigned int)(reg & GENMASK(15, 8)) >> 8, - (unsigned int)(reg & GENMASK(7, 0))); - return reg & GENMASK(7, 0); -} + if (core_type == DWMAC_CORE_MAC100) + return; -static u32 stmmac_get_dev_id(struct stmmac_priv *priv, u32 id_reg) -{ - u32 reg = readl(priv->ioaddr + id_reg); + if (core_type == DWMAC_CORE_GMAC) + version_offset = GMAC_VERSION; + else + version_offset = GMAC4_VERSION; - if (!reg) { + version = readl(priv->ioaddr + version_offset); + if (version == 0) { dev_info(priv->device, "Version ID not available\n"); - return 0x0; + return; } - return (reg & GENMASK(15, 8)) >> 8; + dev_info(priv->device, "User ID: 0x%x, Synopsys ID: 0x%x\n", + FIELD_GET(DWMAC_USERVER, version), + FIELD_GET(DWMAC_SNPSVER, version)); + + ver->snpsver = FIELD_GET(DWMAC_SNPSVER, version); + if (core_type == DWMAC_CORE_XGMAC) + ver->dev_id = FIELD_GET(DWMAC_USERVER, version); } static void stmmac_dwmac_mode_quirk(struct stmmac_priv *priv) @@ -288,27 +299,43 @@ static const struct stmmac_hwif_entry { }, }; +static const struct stmmac_hwif_entry * +stmmac_hwif_find(enum dwmac_core_type core_type, u8 snpsver, u8 dev_id) +{ + const struct stmmac_hwif_entry *entry; + int i; + + for (i = ARRAY_SIZE(stmmac_hw) - 1; i >= 0; i--) { + entry = &stmmac_hw[i]; + + if (core_type != entry->core_type) + continue; + /* Use synopsys_id var because some setups can override this */ + if (snpsver < entry->min_id) + continue; + if (core_type == DWMAC_CORE_XGMAC && + dev_id != entry->dev_id) + continue; + + return entry; + } + + return NULL; +} + int stmmac_hwif_init(struct stmmac_priv *priv) { enum dwmac_core_type core_type = priv->plat->core_type; const struct stmmac_hwif_entry *entry; + struct stmmac_version version; struct mac_device_info *mac; bool needs_setup = true; - u32 id, dev_id = 0; - int i, ret; + int ret; - if (core_type == DWMAC_CORE_GMAC) { - id = stmmac_get_id(priv, GMAC_VERSION); - } else if (dwmac_is_xmac(core_type)) { - id = stmmac_get_id(priv, GMAC4_VERSION); - if (core_type == DWMAC_CORE_XGMAC) - dev_id = stmmac_get_dev_id(priv, GMAC4_VERSION); - } else { - id = 0; - } + stmmac_get_version(priv, &version); /* Save ID for later use */ - priv->synopsys_id = id; + priv->synopsys_id = version.snpsver; /* Lets assume some safe values first */ if (core_type == DWMAC_CORE_GMAC4) { @@ -336,51 +363,48 @@ int stmmac_hwif_init(struct stmmac_priv *priv) spin_lock_init(&mac->irq_ctrl_lock); /* Fallback to generic HW */ - for (i = ARRAY_SIZE(stmmac_hw) - 1; i >= 0; i--) { - entry = &stmmac_hw[i]; - if (core_type != entry->core_type) - continue; - /* Use synopsys_id var because some setups can override this */ - if (priv->synopsys_id < entry->min_id) - continue; - if (core_type == DWMAC_CORE_XGMAC && (dev_id ^ entry->dev_id)) - continue; + /* Use synopsys_id var because some setups can override this */ + entry = stmmac_hwif_find(core_type, priv->synopsys_id, version.dev_id); + if (!entry) { + dev_err(priv->device, + "Failed to find HW IF (id=0x%x, gmac=%d/%d)\n", + version.snpsver, core_type == DWMAC_CORE_GMAC, + core_type == DWMAC_CORE_GMAC4); - /* Only use generic HW helpers if needed */ - mac->desc = mac->desc ? : entry->desc; - mac->dma = mac->dma ? : entry->dma; - mac->mac = mac->mac ? : entry->mac; - mac->ptp = mac->ptp ? : entry->hwtimestamp; - mac->mode = mac->mode ? : entry->mode; - mac->tc = mac->tc ? : entry->tc; - mac->mmc = mac->mmc ? : entry->mmc; - mac->est = mac->est ? : entry->est; - mac->vlan = mac->vlan ? : entry->vlan; - - priv->hw = mac; - priv->fpe_cfg.reg = entry->regs.fpe_reg; - priv->ptpaddr = priv->ioaddr + entry->regs.ptp_off; - priv->mmcaddr = priv->ioaddr + entry->regs.mmc_off; - memcpy(&priv->ptp_clock_ops, entry->ptp, - sizeof(struct ptp_clock_info)); - if (entry->est) - priv->estaddr = priv->ioaddr + entry->regs.est_off; - - /* Entry found */ - if (needs_setup) { - ret = entry->setup(priv); - if (ret) - return ret; - } + return -EINVAL; + } - /* Save quirks, if needed for posterior use */ - priv->hwif_quirks = entry->quirks; - return 0; + /* Only use generic HW helpers if needed */ + mac->desc = mac->desc ? : entry->desc; + mac->dma = mac->dma ? : entry->dma; + mac->mac = mac->mac ? : entry->mac; + mac->ptp = mac->ptp ? : entry->hwtimestamp; + mac->mode = mac->mode ? : entry->mode; + mac->tc = mac->tc ? : entry->tc; + mac->mmc = mac->mmc ? : entry->mmc; + mac->est = mac->est ? : entry->est; + mac->vlan = mac->vlan ? : entry->vlan; + + priv->hw = mac; + priv->fpe_cfg.reg = entry->regs.fpe_reg; + priv->ptpaddr = priv->ioaddr + entry->regs.ptp_off; + priv->mmcaddr = priv->ioaddr + entry->regs.mmc_off; + memcpy(&priv->ptp_clock_ops, entry->ptp, + sizeof(struct ptp_clock_info)); + + if (entry->est) + priv->estaddr = priv->ioaddr + entry->regs.est_off; + + /* Entry found */ + if (needs_setup) { + ret = entry->setup(priv); + if (ret) + return ret; } - dev_err(priv->device, "Failed to find HW IF (id=0x%x, gmac=%d/%d)\n", - id, core_type == DWMAC_CORE_GMAC, - core_type == DWMAC_CORE_GMAC4); - return -EINVAL; + /* Save quirks, if needed for posterior use */ + priv->hwif_quirks = entry->quirks; + + return 0; } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index 3f8cc3293964..1e82850f2a25 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -583,9 +583,9 @@ int stmmac_mdio_register(struct net_device *ndev) struct device_node *mdio_node = priv->plat->mdio_node; struct device *dev = ndev->dev.parent; struct fwnode_handle *fixed_node; + int max_addr = PHY_MAX_ADDR - 1; struct fwnode_handle *fwnode; struct phy_device *phydev; - int max_addr; if (!mdio_bus_data) return 0; @@ -609,15 +609,12 @@ int stmmac_mdio_register(struct net_device *ndev) if (priv->synopsys_id < DWXGMAC_CORE_2_20) { /* Right now only C22 phys are supported */ - max_addr = MII_XGMAC_MAX_C22ADDR + 1; + max_addr = MII_XGMAC_MAX_C22ADDR; /* Check if DT specified an unsupported phy addr */ if (priv->plat->phy_addr > MII_XGMAC_MAX_C22ADDR) dev_err(dev, "Unsupported phy_addr (max=%d)\n", MII_XGMAC_MAX_C22ADDR); - } else { - /* XGMAC version 2.20 onwards support 32 phy addr */ - max_addr = PHY_MAX_ADDR; } } else { new_bus->read = &stmmac_mdio_read_c22; @@ -626,8 +623,6 @@ int stmmac_mdio_register(struct net_device *ndev) new_bus->read_c45 = &stmmac_mdio_read_c45; new_bus->write_c45 = &stmmac_mdio_write_c45; } - - max_addr = PHY_MAX_ADDR; } if (mdio_bus_data->needs_reset) diff --git a/drivers/net/ethernet/ti/davinci_mdio.c b/drivers/net/ethernet/ti/davinci_mdio.c index 68507126be8e..48f85a3649b2 100644 --- a/drivers/net/ethernet/ti/davinci_mdio.c +++ b/drivers/net/ethernet/ti/davinci_mdio.c @@ -234,7 +234,6 @@ static int davinci_mdiobb_read_c22(struct mii_bus *bus, int phy, int reg) ret = mdiobb_read_c22(bus, phy, reg); - pm_runtime_mark_last_busy(bus->parent); pm_runtime_put_autosuspend(bus->parent); return ret; @@ -251,7 +250,6 @@ static int davinci_mdiobb_write_c22(struct mii_bus *bus, int phy, int reg, ret = mdiobb_write_c22(bus, phy, reg, val); - pm_runtime_mark_last_busy(bus->parent); pm_runtime_put_autosuspend(bus->parent); return ret; @@ -268,7 +266,6 @@ static int davinci_mdiobb_read_c45(struct mii_bus *bus, int phy, int devad, ret = mdiobb_read_c45(bus, phy, devad, reg); - pm_runtime_mark_last_busy(bus->parent); pm_runtime_put_autosuspend(bus->parent); return ret; @@ -285,7 +282,6 @@ static int davinci_mdiobb_write_c45(struct mii_bus *bus, int phy, int devad, ret = mdiobb_write_c45(bus, phy, devad, reg, val); - pm_runtime_mark_last_busy(bus->parent); pm_runtime_put_autosuspend(bus->parent); return ret; @@ -332,7 +328,6 @@ static int davinci_mdio_common_reset(struct davinci_mdio_data *data) data->bus->phy_mask = phy_mask; done: - pm_runtime_mark_last_busy(data->dev); pm_runtime_put_autosuspend(data->dev); return 0; @@ -441,7 +436,6 @@ static int davinci_mdio_read(struct mii_bus *bus, int phy_id, int phy_reg) break; } - pm_runtime_mark_last_busy(data->dev); pm_runtime_put_autosuspend(data->dev); return ret; } @@ -478,7 +472,6 @@ static int davinci_mdio_write(struct mii_bus *bus, int phy_id, break; } - pm_runtime_mark_last_busy(data->dev); pm_runtime_put_autosuspend(data->dev); return ret; @@ -548,8 +541,8 @@ static int davinci_mdio_probe(struct platform_device *pdev) struct davinci_mdio_data *data; struct resource *res; struct phy_device *phy; - int ret, addr; int autosuspend_delay_ms = -1; + int ret; data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); if (!data) @@ -652,14 +645,10 @@ static int davinci_mdio_probe(struct platform_device *pdev) goto bail_out; /* scan and dump the bus */ - for (addr = 0; addr < PHY_MAX_ADDR; addr++) { - phy = mdiobus_get_phy(data->bus, addr); - if (phy) { - dev_info(dev, "phy[%d]: device %s, driver %s\n", - phy->mdio.addr, phydev_name(phy), - phy->drv ? phy->drv->name : "unknown"); - } - } + mdiobus_for_each_phy(data->bus, phy) + dev_info(dev, "phy[%d]: device %s, driver %s\n", + phy->mdio.addr, phydev_name(phy), + phy->drv ? phy->drv->name : "unknown"); return 0; diff --git a/drivers/net/ipa/ipa_interrupt.c b/drivers/net/ipa/ipa_interrupt.c index 245a06997055..8336596b1247 100644 --- a/drivers/net/ipa/ipa_interrupt.c +++ b/drivers/net/ipa/ipa_interrupt.c @@ -149,7 +149,6 @@ static irqreturn_t ipa_isr_thread(int irq, void *dev_id) iowrite32(pending, ipa->reg_virt + reg_offset(reg)); } out_power_put: - pm_runtime_mark_last_busy(dev); (void)pm_runtime_put_autosuspend(dev); return IRQ_HANDLED; diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c index 25500c5a6928..95a61bae3124 100644 --- a/drivers/net/ipa/ipa_main.c +++ b/drivers/net/ipa/ipa_main.c @@ -903,7 +903,6 @@ static int ipa_probe(struct platform_device *pdev) if (ret) goto err_deconfig; done: - pm_runtime_mark_last_busy(dev); (void)pm_runtime_put_autosuspend(dev); return 0; diff --git a/drivers/net/ipa/ipa_modem.c b/drivers/net/ipa/ipa_modem.c index 8fe0d0e1a00f..9b136f6b8b4a 100644 --- a/drivers/net/ipa/ipa_modem.c +++ b/drivers/net/ipa/ipa_modem.c @@ -71,7 +71,6 @@ static int ipa_open(struct net_device *netdev) netif_start_queue(netdev); - pm_runtime_mark_last_busy(dev); (void)pm_runtime_put_autosuspend(dev); return 0; @@ -102,7 +101,6 @@ static int ipa_stop(struct net_device *netdev) ipa_endpoint_disable_one(priv->rx); ipa_endpoint_disable_one(priv->tx); out_power_put: - pm_runtime_mark_last_busy(dev); (void)pm_runtime_put_autosuspend(dev); return 0; @@ -175,7 +173,6 @@ ipa_start_xmit(struct sk_buff *skb, struct net_device *netdev) ret = ipa_endpoint_skb_tx(endpoint, skb); - pm_runtime_mark_last_busy(dev); (void)pm_runtime_put_autosuspend(dev); if (ret) { @@ -432,7 +429,6 @@ static void ipa_modem_crashed(struct ipa *ipa) dev_err(dev, "error %d zeroing modem memory regions\n", ret); out_power_put: - pm_runtime_mark_last_busy(dev); (void)pm_runtime_put_autosuspend(dev); } diff --git a/drivers/net/ipa/ipa_smp2p.c b/drivers/net/ipa/ipa_smp2p.c index fcaadd111a8a..420098796eec 100644 --- a/drivers/net/ipa/ipa_smp2p.c +++ b/drivers/net/ipa/ipa_smp2p.c @@ -171,7 +171,6 @@ static irqreturn_t ipa_smp2p_modem_setup_ready_isr(int irq, void *dev_id) WARN(ret != 0, "error %d from ipa_setup()\n", ret); out_power_put: - pm_runtime_mark_last_busy(dev); (void)pm_runtime_put_autosuspend(dev); return IRQ_HANDLED; @@ -213,7 +212,6 @@ static void ipa_smp2p_power_release(struct ipa *ipa) if (!ipa->smp2p->power_on) return; - pm_runtime_mark_last_busy(dev); (void)pm_runtime_put_autosuspend(dev); ipa->smp2p->power_on = false; } diff --git a/drivers/net/ipa/ipa_uc.c b/drivers/net/ipa/ipa_uc.c index 2963db83ab6b..dc7e92f2a4fb 100644 --- a/drivers/net/ipa/ipa_uc.c +++ b/drivers/net/ipa/ipa_uc.c @@ -158,7 +158,6 @@ static void ipa_uc_response_hdlr(struct ipa *ipa) if (ipa->uc_powered) { ipa->uc_loaded = true; ipa_power_retention(ipa, true); - pm_runtime_mark_last_busy(dev); (void)pm_runtime_put_autosuspend(dev); ipa->uc_powered = false; } else { @@ -203,7 +202,6 @@ void ipa_uc_deconfig(struct ipa *ipa) if (!ipa->uc_powered) return; - pm_runtime_mark_last_busy(dev); (void)pm_runtime_put_autosuspend(dev); } diff --git a/drivers/net/phy/mdio_bus_provider.c b/drivers/net/phy/mdio_bus_provider.c index a2391d4b7e5c..4b0637405740 100644 --- a/drivers/net/phy/mdio_bus_provider.c +++ b/drivers/net/phy/mdio_bus_provider.c @@ -249,20 +249,15 @@ static int mdiobus_scan_bus_c45(struct mii_bus *bus) */ static bool mdiobus_prevent_c45_scan(struct mii_bus *bus) { - int i; + struct phy_device *phydev; - for (i = 0; i < PHY_MAX_ADDR; i++) { - struct phy_device *phydev; - u32 oui; - - phydev = mdiobus_get_phy(bus, i); - if (!phydev) - continue; - oui = phydev->phy_id >> 10; + mdiobus_for_each_phy(bus, phydev) { + u32 oui = phydev->phy_id >> 10; if (oui == MICREL_OUI) return true; } + return false; } diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index b7feaf0cb1df..737747cf1906 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1224,22 +1224,24 @@ int phy_get_c45_ids(struct phy_device *phydev) EXPORT_SYMBOL(phy_get_c45_ids); /** - * phy_find_first - finds the first PHY device on the bus + * phy_find_next - finds the next PHY device on the bus * @bus: the target MII bus + * @pos: cursor + * + * Return: next phy_device on the bus, or NULL */ -struct phy_device *phy_find_first(struct mii_bus *bus) +struct phy_device *phy_find_next(struct mii_bus *bus, struct phy_device *pos) { - struct phy_device *phydev; - int addr; + for (int addr = pos ? pos->mdio.addr + 1 : 0; + addr < PHY_MAX_ADDR; addr++) { + struct phy_device *phydev = mdiobus_get_phy(bus, addr); - for (addr = 0; addr < PHY_MAX_ADDR; addr++) { - phydev = mdiobus_get_phy(bus, addr); if (phydev) return phydev; } return NULL; } -EXPORT_SYMBOL(phy_find_first); +EXPORT_SYMBOL_GPL(phy_find_next); /** * phy_prepare_link - prepares the PHY layer to monitor link status diff --git a/drivers/net/wwan/qcom_bam_dmux.c b/drivers/net/wwan/qcom_bam_dmux.c index 64dab8b57611..6a5b22589af4 100644 --- a/drivers/net/wwan/qcom_bam_dmux.c +++ b/drivers/net/wwan/qcom_bam_dmux.c @@ -162,7 +162,6 @@ static void bam_dmux_tx_done(struct bam_dmux_skb_dma *skb_dma) struct bam_dmux *dmux = skb_dma->dmux; unsigned long flags; - pm_runtime_mark_last_busy(dmux->dev); pm_runtime_put_autosuspend(dmux->dev); if (skb_dma->addr) @@ -397,7 +396,6 @@ static void bam_dmux_tx_wakeup_work(struct work_struct *work) dma_async_issue_pending(dmux->tx); out: - pm_runtime_mark_last_busy(dmux->dev); pm_runtime_put_autosuspend(dmux->dev); } diff --git a/drivers/net/wwan/t7xx/t7xx_hif_cldma.c b/drivers/net/wwan/t7xx/t7xx_hif_cldma.c index 97163e1e5783..689c920ca898 100644 --- a/drivers/net/wwan/t7xx/t7xx_hif_cldma.c +++ b/drivers/net/wwan/t7xx/t7xx_hif_cldma.c @@ -250,7 +250,6 @@ static void t7xx_cldma_rx_done(struct work_struct *work) t7xx_cldma_clear_ip_busy(&md_ctrl->hw_info); t7xx_cldma_hw_irq_en_txrx(&md_ctrl->hw_info, queue->index, MTK_RX); t7xx_cldma_hw_irq_en_eq(&md_ctrl->hw_info, queue->index, MTK_RX); - pm_runtime_mark_last_busy(md_ctrl->dev); pm_runtime_put_autosuspend(md_ctrl->dev); } @@ -362,7 +361,6 @@ static void t7xx_cldma_tx_done(struct work_struct *work) } spin_unlock_irqrestore(&md_ctrl->cldma_lock, flags); - pm_runtime_mark_last_busy(md_ctrl->dev); pm_runtime_put_autosuspend(md_ctrl->dev); } @@ -987,7 +985,6 @@ int t7xx_cldma_send_skb(struct cldma_ctrl *md_ctrl, int qno, struct sk_buff *skb allow_sleep: t7xx_pci_enable_sleep(md_ctrl->t7xx_dev); - pm_runtime_mark_last_busy(md_ctrl->dev); pm_runtime_put_autosuspend(md_ctrl->dev); return ret; } diff --git a/drivers/net/wwan/t7xx/t7xx_hif_dpmaif_rx.c b/drivers/net/wwan/t7xx/t7xx_hif_dpmaif_rx.c index 2310493203d3..b76bea6ab2d7 100644 --- a/drivers/net/wwan/t7xx/t7xx_hif_dpmaif_rx.c +++ b/drivers/net/wwan/t7xx/t7xx_hif_dpmaif_rx.c @@ -877,7 +877,6 @@ int t7xx_dpmaif_napi_rx_poll(struct napi_struct *napi, const int budget) t7xx_dpmaif_clr_ip_busy_sts(&rxq->dpmaif_ctrl->hw_info); t7xx_dpmaif_dlq_unmask_rx_done(&rxq->dpmaif_ctrl->hw_info, rxq->index); t7xx_pci_enable_sleep(rxq->dpmaif_ctrl->t7xx_dev); - pm_runtime_mark_last_busy(rxq->dpmaif_ctrl->dev); pm_runtime_put_autosuspend(rxq->dpmaif_ctrl->dev); atomic_set(&rxq->rx_processing, 0); } else { @@ -1078,7 +1077,6 @@ static void t7xx_dpmaif_bat_release_work(struct work_struct *work) } t7xx_pci_enable_sleep(dpmaif_ctrl->t7xx_dev); - pm_runtime_mark_last_busy(dpmaif_ctrl->dev); pm_runtime_put_autosuspend(dpmaif_ctrl->dev); } diff --git a/drivers/net/wwan/t7xx/t7xx_hif_dpmaif_tx.c b/drivers/net/wwan/t7xx/t7xx_hif_dpmaif_tx.c index 8dab025a088a..236d632cf591 100644 --- a/drivers/net/wwan/t7xx/t7xx_hif_dpmaif_tx.c +++ b/drivers/net/wwan/t7xx/t7xx_hif_dpmaif_tx.c @@ -185,7 +185,6 @@ static void t7xx_dpmaif_tx_done(struct work_struct *work) } t7xx_pci_enable_sleep(dpmaif_ctrl->t7xx_dev); - pm_runtime_mark_last_busy(dpmaif_ctrl->dev); pm_runtime_put_autosuspend(dpmaif_ctrl->dev); } @@ -468,7 +467,6 @@ static int t7xx_dpmaif_tx_hw_push_thread(void *arg) t7xx_pci_disable_sleep(dpmaif_ctrl->t7xx_dev); t7xx_do_tx_hw_push(dpmaif_ctrl); t7xx_pci_enable_sleep(dpmaif_ctrl->t7xx_dev); - pm_runtime_mark_last_busy(dpmaif_ctrl->dev); pm_runtime_put_autosuspend(dpmaif_ctrl->dev); } diff --git a/include/linux/icmp.h b/include/linux/icmp.h index 0af4d210ee31..043ec5d9c882 100644 --- a/include/linux/icmp.h +++ b/include/linux/icmp.h @@ -40,4 +40,36 @@ void ip_icmp_error_rfc4884(const struct sk_buff *skb, struct sock_ee_data_rfc4884 *out, int thlen, int off); +/* RFC 4884 */ +#define ICMP_EXT_ORIG_DGRAM_MIN_LEN 128 +#define ICMP_EXT_VERSION_2 2 + +/* ICMP Extension Object Classes */ +#define ICMP_EXT_OBJ_CLASS_IIO 2 /* RFC 5837 */ + +/* Interface Information Object - RFC 5837 */ +enum { + ICMP_EXT_CTYPE_IIO_ROLE_IIF, +}; + +#define ICMP_EXT_CTYPE_IIO_ROLE(ROLE) ((ROLE) << 6) +#define ICMP_EXT_CTYPE_IIO_MTU BIT(0) +#define ICMP_EXT_CTYPE_IIO_NAME BIT(1) +#define ICMP_EXT_CTYPE_IIO_IPADDR BIT(2) +#define ICMP_EXT_CTYPE_IIO_IFINDEX BIT(3) + +struct icmp_ext_iio_name_subobj { + u8 len; + char name[IFNAMSIZ]; +}; + +enum { + /* RFC 5837 - Incoming IP Interface Role */ + ICMP_ERR_EXT_IIO_IIF, + /* Add new constants above. Used by "icmp_errors_extension_mask" + * sysctl. + */ + ICMP_ERR_EXT_COUNT, +}; + #endif /* _LINUX_ICMP_H */ diff --git a/include/linux/phy.h b/include/linux/phy.h index 17a2cdc9f1a0..358dd6f0ff96 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1869,7 +1869,7 @@ int phy_sfp_probe(struct phy_device *phydev, const struct sfp_upstream_ops *ops); struct phy_device *phy_attach(struct net_device *dev, const char *bus_id, phy_interface_t interface); -struct phy_device *phy_find_first(struct mii_bus *bus); +struct phy_device *phy_find_next(struct mii_bus *bus, struct phy_device *pos); int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, u32 flags, phy_interface_t interface); int phy_connect_direct(struct net_device *dev, struct phy_device *phydev, @@ -1896,6 +1896,15 @@ bool phy_check_valid(int speed, int duplex, unsigned long *features); int phy_restart_aneg(struct phy_device *phydev); int phy_reset_after_clk_enable(struct phy_device *phydev); +static inline struct phy_device *phy_find_first(struct mii_bus *bus) +{ + return phy_find_next(bus, NULL); +} + +#define mdiobus_for_each_phy(_bus, _phydev) \ + for (_phydev = phy_find_first(_bus); _phydev; \ + _phydev = phy_find_next(_bus, _phydev)) + #if IS_ENABLED(CONFIG_PHYLIB) int phy_start_cable_test(struct phy_device *phydev, struct netlink_ext_ack *extack); diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 34eb3aecb3f2..0e96c90e56c6 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -135,6 +135,7 @@ struct netns_ipv4 { u8 sysctl_icmp_echo_ignore_broadcasts; u8 sysctl_icmp_ignore_bogus_error_responses; u8 sysctl_icmp_errors_use_inbound_ifaddr; + u8 sysctl_icmp_errors_extension_mask; int sysctl_icmp_ratelimit; int sysctl_icmp_ratemask; int sysctl_icmp_msgs_per_sec; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 47dc70d8100a..08d2ecc96e2b 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -56,6 +56,7 @@ struct netns_sysctl_ipv6 { u8 skip_notify_on_dev_down; u8 fib_notify_on_flag_change; u8 icmpv6_error_anycast_as_unicast; + u8 icmpv6_errors_extension_mask; }; struct netns_ipv6 { diff --git a/include/net/netns/smc.h b/include/net/netns/smc.h index fc752a50f91b..6ceb12baec24 100644 --- a/include/net/netns/smc.h +++ b/include/net/netns/smc.h @@ -24,5 +24,7 @@ struct netns_smc { int sysctl_rmem; int sysctl_max_links_per_lgr; int sysctl_max_conns_per_lgr; + unsigned int sysctl_smcr_max_send_wr; + unsigned int sysctl_smcr_max_recv_wr; }; #endif diff --git a/net/core/dev.c b/net/core/dev.c index d32f0b0c03bb..dccc1176f3c6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1163,6 +1163,7 @@ void netdev_copy_name(struct net_device *dev, char *name) strscpy(name, dev->name, IFNAMSIZ); } while (read_seqretry(&netdev_rename_lock, seq)); } +EXPORT_IPV6_MOD_GPL(netdev_copy_name); /** * netdev_get_name - get a netdevice name, knowing its ifindex. diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 1b7fb5d935ed..4abbec2f47ef 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -582,6 +582,185 @@ relookup_failed: return ERR_PTR(err); } +struct icmp_ext_iio_addr4_subobj { + __be16 afi; + __be16 reserved; + __be32 addr4; +}; + +static unsigned int icmp_ext_iio_len(void) +{ + return sizeof(struct icmp_extobj_hdr) + + /* ifIndex */ + sizeof(__be32) + + /* Interface Address Sub-Object */ + sizeof(struct icmp_ext_iio_addr4_subobj) + + /* Interface Name Sub-Object. Length must be a multiple of 4 + * bytes. + */ + ALIGN(sizeof(struct icmp_ext_iio_name_subobj), 4) + + /* MTU */ + sizeof(__be32); +} + +static unsigned int icmp_ext_max_len(u8 ext_objs) +{ + unsigned int ext_max_len; + + ext_max_len = sizeof(struct icmp_ext_hdr); + + if (ext_objs & BIT(ICMP_ERR_EXT_IIO_IIF)) + ext_max_len += icmp_ext_iio_len(); + + return ext_max_len; +} + +static __be32 icmp_ext_iio_addr4_find(const struct net_device *dev) +{ + struct in_device *in_dev; + struct in_ifaddr *ifa; + + in_dev = __in_dev_get_rcu(dev); + if (!in_dev) + return 0; + + /* It is unclear from RFC 5837 which IP address should be chosen, but + * it makes sense to choose a global unicast address. + */ + in_dev_for_each_ifa_rcu(ifa, in_dev) { + if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY) + continue; + if (ifa->ifa_scope != RT_SCOPE_UNIVERSE || + ipv4_is_multicast(ifa->ifa_address)) + continue; + return ifa->ifa_address; + } + + return 0; +} + +static void icmp_ext_iio_iif_append(struct net *net, struct sk_buff *skb, + int iif) +{ + struct icmp_ext_iio_name_subobj *name_subobj; + struct icmp_extobj_hdr *objh; + struct net_device *dev; + __be32 data; + + if (!iif) + return; + + /* Add the fields in the order specified by RFC 5837. */ + objh = skb_put(skb, sizeof(*objh)); + objh->class_num = ICMP_EXT_OBJ_CLASS_IIO; + objh->class_type = ICMP_EXT_CTYPE_IIO_ROLE(ICMP_EXT_CTYPE_IIO_ROLE_IIF); + + data = htonl(iif); + skb_put_data(skb, &data, sizeof(__be32)); + objh->class_type |= ICMP_EXT_CTYPE_IIO_IFINDEX; + + rcu_read_lock(); + + dev = dev_get_by_index_rcu(net, iif); + if (!dev) + goto out; + + data = icmp_ext_iio_addr4_find(dev); + if (data) { + struct icmp_ext_iio_addr4_subobj *addr4_subobj; + + addr4_subobj = skb_put_zero(skb, sizeof(*addr4_subobj)); + addr4_subobj->afi = htons(ICMP_AFI_IP); + addr4_subobj->addr4 = data; + objh->class_type |= ICMP_EXT_CTYPE_IIO_IPADDR; + } + + name_subobj = skb_put_zero(skb, ALIGN(sizeof(*name_subobj), 4)); + name_subobj->len = ALIGN(sizeof(*name_subobj), 4); + netdev_copy_name(dev, name_subobj->name); + objh->class_type |= ICMP_EXT_CTYPE_IIO_NAME; + + data = htonl(READ_ONCE(dev->mtu)); + skb_put_data(skb, &data, sizeof(__be32)); + objh->class_type |= ICMP_EXT_CTYPE_IIO_MTU; + +out: + rcu_read_unlock(); + objh->length = htons(skb_tail_pointer(skb) - (unsigned char *)objh); +} + +static void icmp_ext_objs_append(struct net *net, struct sk_buff *skb, + u8 ext_objs, int iif) +{ + if (ext_objs & BIT(ICMP_ERR_EXT_IIO_IIF)) + icmp_ext_iio_iif_append(net, skb, iif); +} + +static struct sk_buff * +icmp_ext_append(struct net *net, struct sk_buff *skb_in, struct icmphdr *icmph, + unsigned int room, int iif) +{ + unsigned int payload_len, ext_max_len, ext_len; + struct icmp_ext_hdr *ext_hdr; + struct sk_buff *skb; + u8 ext_objs; + int nhoff; + + switch (icmph->type) { + case ICMP_DEST_UNREACH: + case ICMP_TIME_EXCEEDED: + case ICMP_PARAMETERPROB: + break; + default: + return NULL; + } + + ext_objs = READ_ONCE(net->ipv4.sysctl_icmp_errors_extension_mask); + if (!ext_objs) + return NULL; + + ext_max_len = icmp_ext_max_len(ext_objs); + if (ICMP_EXT_ORIG_DGRAM_MIN_LEN + ext_max_len > room) + return NULL; + + skb = skb_clone(skb_in, GFP_ATOMIC); + if (!skb) + return NULL; + + nhoff = skb_network_offset(skb); + payload_len = min(skb->len - nhoff, ICMP_EXT_ORIG_DGRAM_MIN_LEN); + + if (!pskb_network_may_pull(skb, payload_len)) + goto free_skb; + + if (pskb_trim(skb, nhoff + ICMP_EXT_ORIG_DGRAM_MIN_LEN) || + __skb_put_padto(skb, nhoff + ICMP_EXT_ORIG_DGRAM_MIN_LEN, false)) + goto free_skb; + + if (pskb_expand_head(skb, 0, ext_max_len, GFP_ATOMIC)) + goto free_skb; + + ext_hdr = skb_put_zero(skb, sizeof(*ext_hdr)); + ext_hdr->version = ICMP_EXT_VERSION_2; + + icmp_ext_objs_append(net, skb, ext_objs, iif); + + /* Do not send an empty extension structure. */ + ext_len = skb_tail_pointer(skb) - (unsigned char *)ext_hdr; + if (ext_len == sizeof(*ext_hdr)) + goto free_skb; + + ext_hdr->checksum = ip_compute_csum(ext_hdr, ext_len); + /* The length of the original datagram in 32-bit words (RFC 4884). */ + icmph->un.reserved[1] = ICMP_EXT_ORIG_DGRAM_MIN_LEN / sizeof(u32); + + return skb; + +free_skb: + consume_skb(skb); + return NULL; +} + /* * Send an ICMP message in response to a situation * @@ -601,6 +780,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, struct icmp_bxm icmp_param; struct rtable *rt = skb_rtable(skb_in); bool apply_ratelimit = false; + struct sk_buff *ext_skb; struct ipcm_cookie ipc; struct flowi4 fl4; __be32 saddr; @@ -770,7 +950,12 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, if (room <= (int)sizeof(struct iphdr)) goto ende; - icmp_param.data_len = skb_in->len - icmp_param.offset; + ext_skb = icmp_ext_append(net, skb_in, &icmp_param.data.icmph, room, + parm->iif); + if (ext_skb) + icmp_param.skb = ext_skb; + + icmp_param.data_len = icmp_param.skb->len - icmp_param.offset; if (icmp_param.data_len > room) icmp_param.data_len = room; icmp_param.head_len = sizeof(struct icmphdr); @@ -785,6 +970,9 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, trace_icmp_send(skb_in, type, code); icmp_push_reply(sk, &icmp_param, &fl4, &ipc, &rt); + + if (ext_skb) + consume_skb(ext_skb); ende: ip_rt_put(rt); out_unlock: @@ -1502,6 +1690,7 @@ static int __net_init icmp_sk_init(struct net *net) net->ipv4.sysctl_icmp_ratelimit = 1 * HZ; net->ipv4.sysctl_icmp_ratemask = 0x1818; net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr = 0; + net->ipv4.sysctl_icmp_errors_extension_mask = 0; net->ipv4.sysctl_icmp_msgs_per_sec = 1000; net->ipv4.sysctl_icmp_msgs_burst = 50; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 273578579a6b..19d3141dad1f 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -141,6 +141,8 @@ #include <linux/mroute.h> #include <linux/netlink.h> #include <net/dst_metadata.h> +#include <net/udp.h> +#include <net/tcp.h> /* * Process Router Attention IP option (RFC 2113) @@ -317,8 +319,6 @@ static bool ip_can_use_hint(const struct sk_buff *skb, const struct iphdr *iph, ip_hdr(hint)->tos == iph->tos; } -int tcp_v4_early_demux(struct sk_buff *skb); -enum skb_drop_reason udp_v4_early_demux(struct sk_buff *skb); static int ip_rcv_finish_core(struct net *net, struct sk_buff *skb, struct net_device *dev, const struct sk_buff *hint) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 24dbc603cc44..0c7c8f9041cb 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -48,6 +48,8 @@ static int tcp_plb_max_rounds = 31; static int tcp_plb_max_cong_thresh = 256; static unsigned int tcp_tw_reuse_delay_max = TCP_PAWS_MSL * MSEC_PER_SEC; static int tcp_ecn_mode_max = 2; +static u32 icmp_errors_extension_mask_all = + GENMASK_U8(ICMP_ERR_EXT_COUNT - 1, 0); /* obsolete */ static int sysctl_tcp_low_latency __read_mostly; @@ -675,6 +677,15 @@ static struct ctl_table ipv4_net_table[] = { .extra2 = SYSCTL_ONE }, { + .procname = "icmp_errors_extension_mask", + .data = &init_net.ipv4.sysctl_icmp_errors_extension_mask, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = &icmp_errors_extension_mask_all, + }, + { .procname = "icmp_ratelimit", .data = &init_net.ipv4.sysctl_icmp_ratelimit, .maxlen = sizeof(int), diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 1b0314644e0c..44d7de1eec4f 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -960,6 +960,7 @@ static int __net_init inet6_net_init(struct net *net) net->ipv6.sysctl.icmpv6_echo_ignore_multicast = 0; net->ipv6.sysctl.icmpv6_echo_ignore_anycast = 0; net->ipv6.sysctl.icmpv6_error_anycast_as_unicast = 0; + net->ipv6.sysctl.icmpv6_errors_extension_mask = 0; /* By default, rate limit error messages. * Except for pmtu discovery, it would break it. diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 56c974cf75d1..5d2f90babaa5 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -444,6 +444,193 @@ static int icmp6_iif(const struct sk_buff *skb) return icmp6_dev(skb)->ifindex; } +struct icmp6_ext_iio_addr6_subobj { + __be16 afi; + __be16 reserved; + struct in6_addr addr6; +}; + +static unsigned int icmp6_ext_iio_len(void) +{ + return sizeof(struct icmp_extobj_hdr) + + /* ifIndex */ + sizeof(__be32) + + /* Interface Address Sub-Object */ + sizeof(struct icmp6_ext_iio_addr6_subobj) + + /* Interface Name Sub-Object. Length must be a multiple of 4 + * bytes. + */ + ALIGN(sizeof(struct icmp_ext_iio_name_subobj), 4) + + /* MTU */ + sizeof(__be32); +} + +static unsigned int icmp6_ext_max_len(u8 ext_objs) +{ + unsigned int ext_max_len; + + ext_max_len = sizeof(struct icmp_ext_hdr); + + if (ext_objs & BIT(ICMP_ERR_EXT_IIO_IIF)) + ext_max_len += icmp6_ext_iio_len(); + + return ext_max_len; +} + +static struct in6_addr *icmp6_ext_iio_addr6_find(const struct net_device *dev) +{ + struct inet6_dev *in6_dev; + struct inet6_ifaddr *ifa; + + in6_dev = __in6_dev_get(dev); + if (!in6_dev) + return NULL; + + /* It is unclear from RFC 5837 which IP address should be chosen, but + * it makes sense to choose a global unicast address. + */ + list_for_each_entry_rcu(ifa, &in6_dev->addr_list, if_list) { + if (ifa->flags & (IFA_F_TENTATIVE | IFA_F_DADFAILED)) + continue; + if (ipv6_addr_type(&ifa->addr) != IPV6_ADDR_UNICAST || + ipv6_addr_src_scope(&ifa->addr) != IPV6_ADDR_SCOPE_GLOBAL) + continue; + return &ifa->addr; + } + + return NULL; +} + +static void icmp6_ext_iio_iif_append(struct net *net, struct sk_buff *skb, + int iif) +{ + struct icmp_ext_iio_name_subobj *name_subobj; + struct icmp_extobj_hdr *objh; + struct net_device *dev; + struct in6_addr *addr6; + __be32 data; + + if (!iif) + return; + + /* Add the fields in the order specified by RFC 5837. */ + objh = skb_put(skb, sizeof(*objh)); + objh->class_num = ICMP_EXT_OBJ_CLASS_IIO; + objh->class_type = ICMP_EXT_CTYPE_IIO_ROLE(ICMP_EXT_CTYPE_IIO_ROLE_IIF); + + data = htonl(iif); + skb_put_data(skb, &data, sizeof(__be32)); + objh->class_type |= ICMP_EXT_CTYPE_IIO_IFINDEX; + + rcu_read_lock(); + + dev = dev_get_by_index_rcu(net, iif); + if (!dev) + goto out; + + addr6 = icmp6_ext_iio_addr6_find(dev); + if (addr6) { + struct icmp6_ext_iio_addr6_subobj *addr6_subobj; + + addr6_subobj = skb_put_zero(skb, sizeof(*addr6_subobj)); + addr6_subobj->afi = htons(ICMP_AFI_IP6); + addr6_subobj->addr6 = *addr6; + objh->class_type |= ICMP_EXT_CTYPE_IIO_IPADDR; + } + + name_subobj = skb_put_zero(skb, ALIGN(sizeof(*name_subobj), 4)); + name_subobj->len = ALIGN(sizeof(*name_subobj), 4); + netdev_copy_name(dev, name_subobj->name); + objh->class_type |= ICMP_EXT_CTYPE_IIO_NAME; + + data = htonl(READ_ONCE(dev->mtu)); + skb_put_data(skb, &data, sizeof(__be32)); + objh->class_type |= ICMP_EXT_CTYPE_IIO_MTU; + +out: + rcu_read_unlock(); + objh->length = htons(skb_tail_pointer(skb) - (unsigned char *)objh); +} + +static void icmp6_ext_objs_append(struct net *net, struct sk_buff *skb, + u8 ext_objs, int iif) +{ + if (ext_objs & BIT(ICMP_ERR_EXT_IIO_IIF)) + icmp6_ext_iio_iif_append(net, skb, iif); +} + +static struct sk_buff * +icmp6_ext_append(struct net *net, struct sk_buff *skb_in, + struct icmp6hdr *icmp6h, unsigned int room, int iif) +{ + unsigned int payload_len, ext_max_len, ext_len; + struct icmp_ext_hdr *ext_hdr; + struct sk_buff *skb; + u8 ext_objs; + int nhoff; + + switch (icmp6h->icmp6_type) { + case ICMPV6_DEST_UNREACH: + case ICMPV6_TIME_EXCEED: + break; + default: + return NULL; + } + + /* Do not overwrite existing extensions. This can happen when we + * receive an ICMPv4 message with extensions from a tunnel and + * translate it to an ICMPv6 message towards an IPv6 host in the + * overlay network. + */ + if (icmp6h->icmp6_datagram_len) + return NULL; + + ext_objs = READ_ONCE(net->ipv6.sysctl.icmpv6_errors_extension_mask); + if (!ext_objs) + return NULL; + + ext_max_len = icmp6_ext_max_len(ext_objs); + if (ICMP_EXT_ORIG_DGRAM_MIN_LEN + ext_max_len > room) + return NULL; + + skb = skb_clone(skb_in, GFP_ATOMIC); + if (!skb) + return NULL; + + nhoff = skb_network_offset(skb); + payload_len = min(skb->len - nhoff, ICMP_EXT_ORIG_DGRAM_MIN_LEN); + + if (!pskb_network_may_pull(skb, payload_len)) + goto free_skb; + + if (pskb_trim(skb, nhoff + ICMP_EXT_ORIG_DGRAM_MIN_LEN) || + __skb_put_padto(skb, nhoff + ICMP_EXT_ORIG_DGRAM_MIN_LEN, false)) + goto free_skb; + + if (pskb_expand_head(skb, 0, ext_max_len, GFP_ATOMIC)) + goto free_skb; + + ext_hdr = skb_put_zero(skb, sizeof(*ext_hdr)); + ext_hdr->version = ICMP_EXT_VERSION_2; + + icmp6_ext_objs_append(net, skb, ext_objs, iif); + + /* Do not send an empty extension structure. */ + ext_len = skb_tail_pointer(skb) - (unsigned char *)ext_hdr; + if (ext_len == sizeof(*ext_hdr)) + goto free_skb; + + ext_hdr->checksum = ip_compute_csum(ext_hdr, ext_len); + /* The length of the original datagram in 64-bit words (RFC 4884). */ + icmp6h->icmp6_datagram_len = ICMP_EXT_ORIG_DGRAM_MIN_LEN / sizeof(u64); + + return skb; + +free_skb: + consume_skb(skb); + return NULL; +} + /* * Send an ICMP message in response to a packet in error */ @@ -458,7 +645,9 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, struct ipv6_pinfo *np; const struct in6_addr *saddr = NULL; bool apply_ratelimit = false; + struct sk_buff *ext_skb; struct dst_entry *dst; + unsigned int room; struct icmp6hdr tmp_hdr; struct flowi6 fl6; struct icmpv6_msg msg; @@ -612,8 +801,13 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, msg.offset = skb_network_offset(skb); msg.type = type; - len = skb->len - msg.offset; - len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr)); + room = IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr); + ext_skb = icmp6_ext_append(net, skb, &tmp_hdr, room, parm->iif); + if (ext_skb) + msg.skb = ext_skb; + + len = msg.skb->len - msg.offset; + len = min_t(unsigned int, len, room); if (len < 0) { net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n", &hdr->saddr, &hdr->daddr); @@ -635,6 +829,8 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, } out_dst_release: + if (ext_skb) + consume_skb(ext_skb); dst_release(dst); out_unlock: icmpv6_xmit_unlock(sk); @@ -1171,6 +1367,10 @@ int icmpv6_err_convert(u8 type, u8 code, int *err) EXPORT_SYMBOL(icmpv6_err_convert); #ifdef CONFIG_SYSCTL + +static u32 icmpv6_errors_extension_mask_all = + GENMASK_U8(ICMP_ERR_EXT_COUNT - 1, 0); + static struct ctl_table ipv6_icmp_table_template[] = { { .procname = "ratelimit", @@ -1216,6 +1416,15 @@ static struct ctl_table ipv6_icmp_table_template[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, + { + .procname = "errors_extension_mask", + .data = &init_net.ipv6.sysctl.icmpv6_errors_extension_mask, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = &icmpv6_errors_extension_mask_all, + }, }; struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net) @@ -1233,6 +1442,7 @@ struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net) table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast; table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr; table[5].data = &net->ipv6.sysctl.icmpv6_error_anycast_as_unicast; + table[6].data = &net->ipv6.sysctl.icmpv6_errors_extension_mask; } return table; } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index ecec0a1e1c1a..f751cd5eeac8 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1866,7 +1866,7 @@ int tcf_classify(struct sk_buff *skb, struct tc_skb_cb *cb = tc_skb_cb(skb); ext = tc_skb_ext_alloc(skb); - if (WARN_ON_ONCE(!ext)) { + if (!ext) { tcf_set_drop_reason(skb, SKB_DROP_REASON_NOMEM); return TC_ACT_SHOT; } diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index be0c2da83d2b..e4eabc83719e 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -810,6 +810,8 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, lnk->clearing = 0; lnk->path_mtu = lnk->smcibdev->pattr[lnk->ibport - 1].active_mtu; lnk->link_id = smcr_next_link_id(lgr); + lnk->max_send_wr = lgr->max_send_wr; + lnk->max_recv_wr = lgr->max_recv_wr; lnk->lgr = lgr; smc_lgr_hold(lgr); /* lgr_put in smcr_link_clear() */ lnk->link_idx = link_idx; @@ -836,27 +838,39 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, rc = smc_llc_link_init(lnk); if (rc) goto out; - rc = smc_wr_alloc_link_mem(lnk); - if (rc) - goto clear_llc_lnk; rc = smc_ib_create_protection_domain(lnk); if (rc) - goto free_link_mem; - rc = smc_ib_create_queue_pair(lnk); - if (rc) - goto dealloc_pd; + goto clear_llc_lnk; + do { + rc = smc_ib_create_queue_pair(lnk); + if (rc) + goto dealloc_pd; + rc = smc_wr_alloc_link_mem(lnk); + if (!rc) + break; + else if (rc != -ENOMEM) /* give up */ + goto destroy_qp; + /* retry with smaller ... */ + lnk->max_send_wr /= 2; + lnk->max_recv_wr /= 2; + /* ... unless droping below old SMC_WR_BUF_SIZE */ + if (lnk->max_send_wr < 16 || lnk->max_recv_wr < 48) + goto destroy_qp; + smc_ib_destroy_queue_pair(lnk); + } while (1); + rc = smc_wr_create_link(lnk); if (rc) - goto destroy_qp; + goto free_link_mem; lnk->state = SMC_LNK_ACTIVATING; return 0; +free_link_mem: + smc_wr_free_link_mem(lnk); destroy_qp: smc_ib_destroy_queue_pair(lnk); dealloc_pd: smc_ib_dealloc_protection_domain(lnk); -free_link_mem: - smc_wr_free_link_mem(lnk); clear_llc_lnk: smc_llc_link_clear(lnk, false); out: diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index a5a78cbff341..5c18f08a4c8a 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -34,6 +34,8 @@ * distributions may modify it to a value between * 16-255 as needed. */ +#define SMCR_MAX_SEND_WR_DEF 16 /* Default number of work requests per send queue */ +#define SMCR_MAX_RECV_WR_DEF 48 /* Default number of work requests per recv queue */ struct smc_lgr_list { /* list of link group definition */ struct list_head list; @@ -173,6 +175,8 @@ struct smc_link { struct completion llc_testlink_resp; /* wait for rx of testlink */ int llc_testlink_time; /* testlink interval */ atomic_t conn_cnt; /* connections on this link */ + u16 max_send_wr; + u16 max_recv_wr; }; /* For now we just allow one parallel link per link group. The SMC protocol @@ -366,6 +370,10 @@ struct smc_link_group { /* max conn can be assigned to lgr */ u8 max_links; /* max links can be added in lgr */ + u16 max_send_wr; + /* number of WR buffers on send */ + u16 max_recv_wr; + /* number of WR buffers on recv */ }; struct { /* SMC-D */ struct smcd_gid peer_gid; diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 0052f02756eb..1154907c5c05 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -669,11 +669,6 @@ int smc_ib_create_queue_pair(struct smc_link *lnk) .recv_cq = lnk->smcibdev->roce_cq_recv, .srq = NULL, .cap = { - /* include unsolicited rdma_writes as well, - * there are max. 2 RDMA_WRITE per 1 WR_SEND - */ - .max_send_wr = SMC_WR_BUF_CNT * 3, - .max_recv_wr = SMC_WR_BUF_CNT * 3, .max_send_sge = SMC_IB_MAX_SEND_SGE, .max_recv_sge = lnk->wr_rx_sge_cnt, .max_inline_data = 0, @@ -683,6 +678,11 @@ int smc_ib_create_queue_pair(struct smc_link *lnk) }; int rc; + /* include unsolicited rdma_writes as well, + * there are max. 2 RDMA_WRITE per 1 WR_SEND + */ + qp_attr.cap.max_send_wr = 3 * lnk->lgr->max_send_wr; + qp_attr.cap.max_recv_wr = lnk->lgr->max_recv_wr; lnk->roce_qp = ib_create_qp(lnk->roce_pd, &qp_attr); rc = PTR_ERR_OR_ZERO(lnk->roce_qp); if (IS_ERR(lnk->roce_qp)) diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index f865c58c3aa7..f5d5eb617526 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -2157,6 +2157,8 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc) init_waitqueue_head(&lgr->llc_msg_waiter); init_rwsem(&lgr->llc_conf_mutex); lgr->llc_testlink_time = READ_ONCE(net->smc.sysctl_smcr_testlink_time); + lgr->max_send_wr = (u16)(READ_ONCE(net->smc.sysctl_smcr_max_send_wr)); + lgr->max_recv_wr = (u16)(READ_ONCE(net->smc.sysctl_smcr_max_recv_wr)); } /* called after lgr was removed from lgr_list */ diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c index 2fab6456f765..7b2471904d04 100644 --- a/net/smc/smc_sysctl.c +++ b/net/smc/smc_sysctl.c @@ -29,6 +29,8 @@ static int links_per_lgr_min = SMC_LINKS_ADD_LNK_MIN; static int links_per_lgr_max = SMC_LINKS_ADD_LNK_MAX; static int conns_per_lgr_min = SMC_CONN_PER_LGR_MIN; static int conns_per_lgr_max = SMC_CONN_PER_LGR_MAX; +static unsigned int smcr_max_wr_min = 2; +static unsigned int smcr_max_wr_max = 2048; static struct ctl_table smc_table[] = { { @@ -99,6 +101,24 @@ static struct ctl_table smc_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, + { + .procname = "smcr_max_send_wr", + .data = &init_net.smc.sysctl_smcr_max_send_wr, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &smcr_max_wr_min, + .extra2 = &smcr_max_wr_max, + }, + { + .procname = "smcr_max_recv_wr", + .data = &init_net.smc.sysctl_smcr_max_recv_wr, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &smcr_max_wr_min, + .extra2 = &smcr_max_wr_max, + }, }; int __net_init smc_sysctl_net_init(struct net *net) @@ -130,6 +150,8 @@ int __net_init smc_sysctl_net_init(struct net *net) WRITE_ONCE(net->smc.sysctl_rmem, net_smc_rmem_init); net->smc.sysctl_max_links_per_lgr = SMC_LINKS_PER_LGR_MAX_PREFER; net->smc.sysctl_max_conns_per_lgr = SMC_CONN_PER_LGR_PREFER; + net->smc.sysctl_smcr_max_send_wr = SMCR_MAX_SEND_WR_DEF; + net->smc.sysctl_smcr_max_recv_wr = SMCR_MAX_RECV_WR_DEF; /* disable handshake limitation by default */ net->smc.limit_smc_hs = 0; diff --git a/net/smc/smc_sysctl.h b/net/smc/smc_sysctl.h index eb2465ae1e15..8538915af7af 100644 --- a/net/smc/smc_sysctl.h +++ b/net/smc/smc_sysctl.h @@ -25,6 +25,8 @@ static inline int smc_sysctl_net_init(struct net *net) net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE; net->smc.sysctl_max_links_per_lgr = SMC_LINKS_PER_LGR_MAX_PREFER; net->smc.sysctl_max_conns_per_lgr = SMC_CONN_PER_LGR_PREFER; + net->smc.sysctl_smcr_max_send_wr = SMCR_MAX_SEND_WR_DEF; + net->smc.sysctl_smcr_max_recv_wr = SMCR_MAX_RECV_WR_DEF; return 0; } diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index b04a21b8c511..5feafa98ab1a 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -547,9 +547,9 @@ void smc_wr_remember_qp_attr(struct smc_link *lnk) IB_QP_DEST_QPN, &init_attr); - lnk->wr_tx_cnt = min_t(size_t, SMC_WR_BUF_CNT, + lnk->wr_tx_cnt = min_t(size_t, lnk->max_send_wr, lnk->qp_attr.cap.max_send_wr); - lnk->wr_rx_cnt = min_t(size_t, SMC_WR_BUF_CNT * 3, + lnk->wr_rx_cnt = min_t(size_t, lnk->max_recv_wr, lnk->qp_attr.cap.max_recv_wr); } @@ -741,50 +741,51 @@ int smc_wr_alloc_lgr_mem(struct smc_link_group *lgr) int smc_wr_alloc_link_mem(struct smc_link *link) { /* allocate link related memory */ - link->wr_tx_bufs = kcalloc(SMC_WR_BUF_CNT, SMC_WR_BUF_SIZE, GFP_KERNEL); + link->wr_tx_bufs = kcalloc(link->max_send_wr, + SMC_WR_BUF_SIZE, GFP_KERNEL); if (!link->wr_tx_bufs) goto no_mem; - link->wr_rx_bufs = kcalloc(SMC_WR_BUF_CNT * 3, link->wr_rx_buflen, + link->wr_rx_bufs = kcalloc(link->max_recv_wr, link->wr_rx_buflen, GFP_KERNEL); if (!link->wr_rx_bufs) goto no_mem_wr_tx_bufs; - link->wr_tx_ibs = kcalloc(SMC_WR_BUF_CNT, sizeof(link->wr_tx_ibs[0]), - GFP_KERNEL); + link->wr_tx_ibs = kcalloc(link->max_send_wr, + sizeof(link->wr_tx_ibs[0]), GFP_KERNEL); if (!link->wr_tx_ibs) goto no_mem_wr_rx_bufs; - link->wr_rx_ibs = kcalloc(SMC_WR_BUF_CNT * 3, + link->wr_rx_ibs = kcalloc(link->max_recv_wr, sizeof(link->wr_rx_ibs[0]), GFP_KERNEL); if (!link->wr_rx_ibs) goto no_mem_wr_tx_ibs; - link->wr_tx_rdmas = kcalloc(SMC_WR_BUF_CNT, + link->wr_tx_rdmas = kcalloc(link->max_send_wr, sizeof(link->wr_tx_rdmas[0]), GFP_KERNEL); if (!link->wr_tx_rdmas) goto no_mem_wr_rx_ibs; - link->wr_tx_rdma_sges = kcalloc(SMC_WR_BUF_CNT, + link->wr_tx_rdma_sges = kcalloc(link->max_send_wr, sizeof(link->wr_tx_rdma_sges[0]), GFP_KERNEL); if (!link->wr_tx_rdma_sges) goto no_mem_wr_tx_rdmas; - link->wr_tx_sges = kcalloc(SMC_WR_BUF_CNT, sizeof(link->wr_tx_sges[0]), + link->wr_tx_sges = kcalloc(link->max_send_wr, sizeof(link->wr_tx_sges[0]), GFP_KERNEL); if (!link->wr_tx_sges) goto no_mem_wr_tx_rdma_sges; - link->wr_rx_sges = kcalloc(SMC_WR_BUF_CNT * 3, + link->wr_rx_sges = kcalloc(link->max_recv_wr, sizeof(link->wr_rx_sges[0]) * link->wr_rx_sge_cnt, GFP_KERNEL); if (!link->wr_rx_sges) goto no_mem_wr_tx_sges; - link->wr_tx_mask = bitmap_zalloc(SMC_WR_BUF_CNT, GFP_KERNEL); + link->wr_tx_mask = bitmap_zalloc(link->max_send_wr, GFP_KERNEL); if (!link->wr_tx_mask) goto no_mem_wr_rx_sges; - link->wr_tx_pends = kcalloc(SMC_WR_BUF_CNT, + link->wr_tx_pends = kcalloc(link->max_send_wr, sizeof(link->wr_tx_pends[0]), GFP_KERNEL); if (!link->wr_tx_pends) goto no_mem_wr_tx_mask; - link->wr_tx_compl = kcalloc(SMC_WR_BUF_CNT, + link->wr_tx_compl = kcalloc(link->max_send_wr, sizeof(link->wr_tx_compl[0]), GFP_KERNEL); if (!link->wr_tx_compl) @@ -905,7 +906,7 @@ int smc_wr_create_link(struct smc_link *lnk) goto dma_unmap; } smc_wr_init_sge(lnk); - bitmap_zero(lnk->wr_tx_mask, SMC_WR_BUF_CNT); + bitmap_zero(lnk->wr_tx_mask, lnk->max_send_wr); init_waitqueue_head(&lnk->wr_tx_wait); rc = percpu_ref_init(&lnk->wr_tx_refs, smcr_wr_tx_refs_free, 0, GFP_KERNEL); if (rc) diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index f3008dda222a..aa4533af9122 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -19,8 +19,6 @@ #include "smc.h" #include "smc_core.h" -#define SMC_WR_BUF_CNT 16 /* # of ctrl buffers per link */ - #define SMC_WR_TX_WAIT_FREE_SLOT_TIME (10 * HZ) #define SMC_WR_TX_SIZE 44 /* actual size of wr_send data (<=SMC_WR_BUF_SIZE) */ diff --git a/tools/testing/selftests/net/netfilter/sctp_collision.c b/tools/testing/selftests/net/netfilter/sctp_collision.c index 21bb1cfd8a85..b282d1785c9b 100644 --- a/tools/testing/selftests/net/netfilter/sctp_collision.c +++ b/tools/testing/selftests/net/netfilter/sctp_collision.c @@ -9,9 +9,10 @@ int main(int argc, char *argv[]) { struct sockaddr_in saddr = {}, daddr = {}; - int sd, ret, len = sizeof(daddr); + socklen_t len = sizeof(daddr); struct timeval tv = {25, 0}; char buf[] = "hello"; + int sd, ret; if (argc != 6 || (strcmp(argv[1], "server") && strcmp(argv[1], "client"))) { printf("%s <server|client> <LOCAL_IP> <LOCAL_PORT> <REMOTE_IP> <REMOTE_PORT>\n", diff --git a/tools/testing/selftests/net/traceroute.sh b/tools/testing/selftests/net/traceroute.sh index dbb34c7e09ce..a7c6ab8a0347 100755 --- a/tools/testing/selftests/net/traceroute.sh +++ b/tools/testing/selftests/net/traceroute.sh @@ -36,6 +36,35 @@ run_cmd() return $rc } +__check_traceroute_version() +{ + local cmd=$1; shift + local req_ver=$1; shift + local ver + + req_ver=$(echo "$req_ver" | sed 's/\.//g') + ver=$($cmd -V 2>&1 | grep -Eo '[0-9]+.[0-9]+.[0-9]+' | sed 's/\.//g') + if [[ $ver -lt $req_ver ]]; then + return 1 + else + return 0 + fi +} + +check_traceroute6_version() +{ + local req_ver=$1; shift + + __check_traceroute_version traceroute6 "$req_ver" +} + +check_traceroute_version() +{ + local req_ver=$1; shift + + __check_traceroute_version traceroute "$req_ver" +} + ################################################################################ # create namespaces and interconnects @@ -59,6 +88,8 @@ create_ns() ip netns exec ${ns} ip -6 ro add unreachable default metric 8192 ip netns exec ${ns} sysctl -qw net.ipv4.ip_forward=1 + ip netns exec ${ns} sysctl -qw net.ipv4.icmp_ratelimit=0 + ip netns exec ${ns} sysctl -qw net.ipv6.icmp.ratelimit=0 ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1 ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.forwarding=1 ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.forwarding=1 @@ -298,6 +329,144 @@ run_traceroute6_vrf() } ################################################################################ +# traceroute6 with ICMP extensions test +# +# Verify that in this scenario +# +# ---- ---- ---- +# |H1|--------------------------|R1|--------------------------|H2| +# ---- N1 ---- N2 ---- +# +# ICMP extensions are correctly reported. The loopback interfaces on all the +# nodes are assigned global addresses and the interfaces connecting the nodes +# are assigned IPv6 link-local addresses. + +cleanup_traceroute6_ext() +{ + cleanup_all_ns +} + +setup_traceroute6_ext() +{ + # Start clean + cleanup_traceroute6_ext + + setup_ns h1 r1 h2 + create_ns "$h1" + create_ns "$r1" + create_ns "$h2" + + # Setup N1 + connect_ns "$h1" eth1 - fe80::1/64 "$r1" eth1 - fe80::2/64 + # Setup N2 + connect_ns "$r1" eth2 - fe80::3/64 "$h2" eth2 - fe80::4/64 + + # Setup H1 + ip -n "$h1" address add 2001:db8:1::1/128 dev lo + ip -n "$h1" route add ::/0 nexthop via fe80::2 dev eth1 + + # Setup R1 + ip -n "$r1" address add 2001:db8:1::2/128 dev lo + ip -n "$r1" route add 2001:db8:1::1/128 nexthop via fe80::1 dev eth1 + ip -n "$r1" route add 2001:db8:1::3/128 nexthop via fe80::4 dev eth2 + + # Setup H2 + ip -n "$h2" address add 2001:db8:1::3/128 dev lo + ip -n "$h2" route add ::/0 nexthop via fe80::3 dev eth2 + + # Prime the network + ip netns exec "$h1" ping6 -c5 2001:db8:1::3 >/dev/null 2>&1 +} + +traceroute6_ext_iio_iif_test() +{ + local r1_ifindex h2_ifindex + local pkt_len=$1; shift + + # Test that incoming interface info is not appended by default. + run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep INC" + check_fail $? "Incoming interface info appended by default when should not" + + # Test that the extension is appended when enabled. + run_cmd "$r1" "bash -c \"echo 0x01 > /proc/sys/net/ipv6/icmp/errors_extension_mask\"" + check_err $? "Failed to enable incoming interface info extension on R1" + + run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep INC" + check_err $? "Incoming interface info not appended after enable" + + # Test that the extension is not appended when disabled. + run_cmd "$r1" "bash -c \"echo 0x00 > /proc/sys/net/ipv6/icmp/errors_extension_mask\"" + check_err $? "Failed to disable incoming interface info extension on R1" + + run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep INC" + check_fail $? "Incoming interface info appended after disable" + + # Test that the extension is sent correctly from both R1 and H2. + run_cmd "$r1" "sysctl -w net.ipv6.icmp.errors_extension_mask=0x01" + r1_ifindex=$(ip -n "$r1" -j link show dev eth1 | jq '.[]["ifindex"]') + run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep '<INC:$r1_ifindex,\"eth1\",mtu=1500>'" + check_err $? "Wrong incoming interface info reported from R1" + + run_cmd "$h2" "sysctl -w net.ipv6.icmp.errors_extension_mask=0x01" + h2_ifindex=$(ip -n "$h2" -j link show dev eth2 | jq '.[]["ifindex"]') + run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep '<INC:$h2_ifindex,\"eth2\",mtu=1500>'" + check_err $? "Wrong incoming interface info reported from H2" + + # Add a global address on the incoming interface of R1 and check that + # it is reported. + run_cmd "$r1" "ip address add 2001:db8:100::1/64 dev eth1 nodad" + run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep '<INC:$r1_ifindex,2001:db8:100::1,\"eth1\",mtu=1500>'" + check_err $? "Wrong incoming interface info reported from R1 after address addition" + run_cmd "$r1" "ip address del 2001:db8:100::1/64 dev eth1" + + # Change name and MTU and make sure the result is still correct. + run_cmd "$r1" "ip link set dev eth1 name eth1tag mtu 1501" + run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep '<INC:$r1_ifindex,\"eth1tag\",mtu=1501>'" + check_err $? "Wrong incoming interface info reported from R1 after name and MTU change" + run_cmd "$r1" "ip link set dev eth1tag name eth1 mtu 1500" + + run_cmd "$r1" "sysctl -w net.ipv6.icmp.errors_extension_mask=0x00" + run_cmd "$h2" "sysctl -w net.ipv6.icmp.errors_extension_mask=0x00" +} + +run_traceroute6_ext() +{ + # Need at least version 2.1.5 for RFC 5837 support. + if ! check_traceroute6_version 2.1.5; then + log_test_skip "traceroute6 too old, missing ICMP extensions support" + return + fi + + setup_traceroute6_ext + + RET=0 + + ## General ICMP extensions tests + + # Test that ICMP extensions are disabled by default. + run_cmd "$h1" "sysctl net.ipv6.icmp.errors_extension_mask | grep \"= 0$\"" + check_err $? "ICMP extensions are not disabled by default" + + # Test that unsupported values are rejected. Do not use "sysctl" as + # older versions do not return an error code upon failure. + run_cmd "$h1" "bash -c \"echo 0x80 > /proc/sys/net/ipv6/icmp/errors_extension_mask\"" + check_fail $? "Unsupported sysctl value was not rejected" + + ## Extension-specific tests + + # Incoming interface info test. Test with various packet sizes, + # including the default one. + traceroute6_ext_iio_iif_test + traceroute6_ext_iio_iif_test 127 + traceroute6_ext_iio_iif_test 128 + traceroute6_ext_iio_iif_test 129 + + log_test "IPv6 traceroute with ICMP extensions" + + cleanup_traceroute6_ext +} + +################################################################################ # traceroute test # # Verify that traceroute from H1 to H2 shows 1.0.3.1 and 1.0.1.1 when @@ -438,14 +607,157 @@ run_traceroute_vrf() } ################################################################################ +# traceroute with ICMP extensions test +# +# Verify that in this scenario +# +# ---- ---- ---- +# |H1|--------------------------|R1|--------------------------|H2| +# ---- N1 ---- N2 ---- +# +# ICMP extensions are correctly reported. The loopback interfaces on all the +# nodes are assigned global addresses and the interfaces connecting the nodes +# are assigned IPv6 link-local addresses. + +cleanup_traceroute_ext() +{ + cleanup_all_ns +} + +setup_traceroute_ext() +{ + # Start clean + cleanup_traceroute_ext + + setup_ns h1 r1 h2 + create_ns "$h1" + create_ns "$r1" + create_ns "$h2" + + # Setup N1 + connect_ns "$h1" eth1 - fe80::1/64 "$r1" eth1 - fe80::2/64 + # Setup N2 + connect_ns "$r1" eth2 - fe80::3/64 "$h2" eth2 - fe80::4/64 + + # Setup H1 + ip -n "$h1" address add 192.0.2.1/32 dev lo + ip -n "$h1" route add 0.0.0.0/0 nexthop via inet6 fe80::2 dev eth1 + + # Setup R1 + ip -n "$r1" address add 192.0.2.2/32 dev lo + ip -n "$r1" route add 192.0.2.1/32 nexthop via inet6 fe80::1 dev eth1 + ip -n "$r1" route add 192.0.2.3/32 nexthop via inet6 fe80::4 dev eth2 + + # Setup H2 + ip -n "$h2" address add 192.0.2.3/32 dev lo + ip -n "$h2" route add 0.0.0.0/0 nexthop via inet6 fe80::3 dev eth2 + + # Prime the network + ip netns exec "$h1" ping -c5 192.0.2.3 >/dev/null 2>&1 +} + +traceroute_ext_iio_iif_test() +{ + local r1_ifindex h2_ifindex + local pkt_len=$1; shift + + # Test that incoming interface info is not appended by default. + run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep INC" + check_fail $? "Incoming interface info appended by default when should not" + + # Test that the extension is appended when enabled. + run_cmd "$r1" "bash -c \"echo 0x01 > /proc/sys/net/ipv4/icmp_errors_extension_mask\"" + check_err $? "Failed to enable incoming interface info extension on R1" + + run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep INC" + check_err $? "Incoming interface info not appended after enable" + + # Test that the extension is not appended when disabled. + run_cmd "$r1" "bash -c \"echo 0x00 > /proc/sys/net/ipv4/icmp_errors_extension_mask\"" + check_err $? "Failed to disable incoming interface info extension on R1" + + run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep INC" + check_fail $? "Incoming interface info appended after disable" + + # Test that the extension is sent correctly from both R1 and H2. + run_cmd "$r1" "sysctl -w net.ipv4.icmp_errors_extension_mask=0x01" + r1_ifindex=$(ip -n "$r1" -j link show dev eth1 | jq '.[]["ifindex"]') + run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep '<INC:$r1_ifindex,\"eth1\",mtu=1500>'" + check_err $? "Wrong incoming interface info reported from R1" + + run_cmd "$h2" "sysctl -w net.ipv4.icmp_errors_extension_mask=0x01" + h2_ifindex=$(ip -n "$h2" -j link show dev eth2 | jq '.[]["ifindex"]') + run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep '<INC:$h2_ifindex,\"eth2\",mtu=1500>'" + check_err $? "Wrong incoming interface info reported from H2" + + # Add a global address on the incoming interface of R1 and check that + # it is reported. + run_cmd "$r1" "ip address add 198.51.100.1/24 dev eth1" + run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep '<INC:$r1_ifindex,198.51.100.1,\"eth1\",mtu=1500>'" + check_err $? "Wrong incoming interface info reported from R1 after address addition" + run_cmd "$r1" "ip address del 198.51.100.1/24 dev eth1" + + # Change name and MTU and make sure the result is still correct. + # Re-add the route towards H1 since it was deleted when we removed the + # last IPv4 address from eth1 on R1. + run_cmd "$r1" "ip route add 192.0.2.1/32 nexthop via inet6 fe80::1 dev eth1" + run_cmd "$r1" "ip link set dev eth1 name eth1tag mtu 1501" + run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep '<INC:$r1_ifindex,\"eth1tag\",mtu=1501>'" + check_err $? "Wrong incoming interface info reported from R1 after name and MTU change" + run_cmd "$r1" "ip link set dev eth1tag name eth1 mtu 1500" + + run_cmd "$r1" "sysctl -w net.ipv4.icmp_errors_extension_mask=0x00" + run_cmd "$h2" "sysctl -w net.ipv4.icmp_errors_extension_mask=0x00" +} + +run_traceroute_ext() +{ + # Need at least version 2.1.5 for RFC 5837 support. + if ! check_traceroute_version 2.1.5; then + log_test_skip "traceroute too old, missing ICMP extensions support" + return + fi + + setup_traceroute_ext + + RET=0 + + ## General ICMP extensions tests + + # Test that ICMP extensions are disabled by default. + run_cmd "$h1" "sysctl net.ipv4.icmp_errors_extension_mask | grep \"= 0$\"" + check_err $? "ICMP extensions are not disabled by default" + + # Test that unsupported values are rejected. Do not use "sysctl" as + # older versions do not return an error code upon failure. + run_cmd "$h1" "bash -c \"echo 0x80 > /proc/sys/net/ipv4/icmp_errors_extension_mask\"" + check_fail $? "Unsupported sysctl value was not rejected" + + ## Extension-specific tests + + # Incoming interface info test. Test with various packet sizes, + # including the default one. + traceroute_ext_iio_iif_test + traceroute_ext_iio_iif_test 127 + traceroute_ext_iio_iif_test 128 + traceroute_ext_iio_iif_test 129 + + log_test "IPv4 traceroute with ICMP extensions" + + cleanup_traceroute_ext +} + +################################################################################ # Run tests run_tests() { run_traceroute6 run_traceroute6_vrf + run_traceroute6_ext run_traceroute run_traceroute_vrf + run_traceroute_ext } ################################################################################ @@ -462,6 +774,7 @@ done require_command traceroute6 require_command traceroute +require_command jq run_tests |
