diff options
Diffstat (limited to 'drivers/net/ethernet/marvell/mvneta.c')
| -rw-r--r-- | drivers/net/ethernet/marvell/mvneta.c | 4520 |
1 files changed, 3816 insertions, 704 deletions
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 712779fb12b7..7af44f858fa3 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -11,28 +11,43 @@ * warranty of any kind, whether express or implied. */ -#include <linux/kernel.h> -#include <linux/netdevice.h> +#include <linux/clk.h> +#include <linux/cpu.h> #include <linux/etherdevice.h> -#include <linux/platform_device.h> -#include <linux/skbuff.h> +#include <linux/if_vlan.h> #include <linux/inetdevice.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/kernel.h> #include <linux/mbus.h> #include <linux/module.h> -#include <linux/interrupt.h> -#include <net/ip.h> -#include <net/ipv6.h> +#include <linux/netdevice.h> #include <linux/of.h> +#include <linux/of_address.h> #include <linux/of_irq.h> #include <linux/of_mdio.h> #include <linux/of_net.h> -#include <linux/of_address.h> +#include <linux/phy/phy.h> #include <linux/phy.h> -#include <linux/clk.h> +#include <linux/phylink.h> +#include <linux/platform_device.h> +#include <linux/skbuff.h> +#include <net/hwbm.h> +#include "mvneta_bm.h" +#include <net/ip.h> +#include <net/ipv6.h> +#include <net/tso.h> +#include <net/page_pool/helpers.h> +#include <net/pkt_sched.h> +#include <linux/bpf_trace.h> /* Registers */ #define MVNETA_RXQ_CONFIG_REG(q) (0x1400 + ((q) << 2)) -#define MVNETA_RXQ_HW_BUF_ALLOC BIT(1) +#define MVNETA_RXQ_HW_BUF_ALLOC BIT(0) +#define MVNETA_RXQ_SHORT_POOL_ID_SHIFT 4 +#define MVNETA_RXQ_SHORT_POOL_ID_MASK 0x30 +#define MVNETA_RXQ_LONG_POOL_ID_SHIFT 6 +#define MVNETA_RXQ_LONG_POOL_ID_MASK 0xc0 #define MVNETA_RXQ_PKT_OFFSET_ALL_MASK (0xf << 8) #define MVNETA_RXQ_PKT_OFFSET_MASK(offs) ((offs) << 8) #define MVNETA_RXQ_THRESHOLD_REG(q) (0x14c0 + ((q) << 2)) @@ -46,6 +61,9 @@ #define MVNETA_RXQ_STATUS_UPDATE_REG(q) (0x1500 + ((q) << 2)) #define MVNETA_RXQ_ADD_NON_OCCUPIED_SHIFT 16 #define MVNETA_RXQ_ADD_NON_OCCUPIED_MAX 255 +#define MVNETA_PORT_POOL_BUFFER_SZ_REG(pool) (0x1700 + ((pool) << 2)) +#define MVNETA_PORT_POOL_BUFFER_SZ_SHIFT 3 +#define MVNETA_PORT_POOL_BUFFER_SZ_MASK 0xfff8 #define MVNETA_PORT_RX_RESET 0x1cc0 #define MVNETA_PORT_RX_DMA_RESET BIT(0) #define MVNETA_PHY_ADDR 0x2000 @@ -58,6 +76,9 @@ #define MVNETA_WIN_SIZE(w) (0x2204 + ((w) << 3)) #define MVNETA_WIN_REMAP(w) (0x2280 + ((w) << 2)) #define MVNETA_BASE_ADDR_ENABLE 0x2290 +#define MVNETA_AC5_CNM_DDR_TARGET 0x2 +#define MVNETA_AC5_CNM_DDR_ATTR 0xb +#define MVNETA_ACCESS_PROTECT_ENABLE 0x2294 #define MVNETA_PORT_CONFIG 0x2400 #define MVNETA_UNI_PROMISC_MODE BIT(0) #define MVNETA_DEF_RXQ(q) ((q) << 1) @@ -79,36 +100,97 @@ #define MVNETA_MAC_ADDR_HIGH 0x2418 #define MVNETA_SDMA_CONFIG 0x241c #define MVNETA_SDMA_BRST_SIZE_16 4 -#define MVNETA_NO_DESC_SWAP 0x0 #define MVNETA_RX_BRST_SZ_MASK(burst) ((burst) << 1) #define MVNETA_RX_NO_DATA_SWAP BIT(4) #define MVNETA_TX_NO_DATA_SWAP BIT(5) +#define MVNETA_DESC_SWAP BIT(6) #define MVNETA_TX_BRST_SZ_MASK(burst) ((burst) << 22) +#define MVNETA_VLAN_PRIO_TO_RXQ 0x2440 +#define MVNETA_VLAN_PRIO_RXQ_MAP(prio, rxq) ((rxq) << ((prio) * 3)) #define MVNETA_PORT_STATUS 0x2444 -#define MVNETA_TX_IN_PRGRS BIT(1) +#define MVNETA_TX_IN_PRGRS BIT(0) #define MVNETA_TX_FIFO_EMPTY BIT(8) #define MVNETA_RX_MIN_FRAME_SIZE 0x247c +/* Only exists on Armada XP and Armada 370 */ +#define MVNETA_SERDES_CFG 0x24A0 +#define MVNETA_SGMII_SERDES_PROTO 0x0cc7 +#define MVNETA_QSGMII_SERDES_PROTO 0x0667 +#define MVNETA_HSGMII_SERDES_PROTO 0x1107 #define MVNETA_TYPE_PRIO 0x24bc #define MVNETA_FORCE_UNI BIT(21) #define MVNETA_TXQ_CMD_1 0x24e4 #define MVNETA_TXQ_CMD 0x2448 #define MVNETA_TXQ_DISABLE_SHIFT 8 #define MVNETA_TXQ_ENABLE_MASK 0x000000ff +#define MVNETA_RX_DISCARD_FRAME_COUNT 0x2484 +#define MVNETA_OVERRUN_FRAME_COUNT 0x2488 +#define MVNETA_GMAC_CLOCK_DIVIDER 0x24f4 +#define MVNETA_GMAC_1MS_CLOCK_ENABLE BIT(31) #define MVNETA_ACC_MODE 0x2500 +#define MVNETA_BM_ADDRESS 0x2504 #define MVNETA_CPU_MAP(cpu) (0x2540 + ((cpu) << 2)) #define MVNETA_CPU_RXQ_ACCESS_ALL_MASK 0x000000ff #define MVNETA_CPU_TXQ_ACCESS_ALL_MASK 0x0000ff00 +#define MVNETA_CPU_RXQ_ACCESS(rxq) BIT(rxq) +#define MVNETA_CPU_TXQ_ACCESS(txq) BIT(txq + 8) #define MVNETA_RXQ_TIME_COAL_REG(q) (0x2580 + ((q) << 2)) + +/* Exception Interrupt Port/Queue Cause register + * + * Their behavior depend of the mapping done using the PCPX2Q + * registers. For a given CPU if the bit associated to a queue is not + * set, then for the register a read from this CPU will always return + * 0 and a write won't do anything + */ + #define MVNETA_INTR_NEW_CAUSE 0x25a0 -#define MVNETA_RX_INTR_MASK(nr_rxqs) (((1 << nr_rxqs) - 1) << 8) #define MVNETA_INTR_NEW_MASK 0x25a4 + +/* bits 0..7 = TXQ SENT, one bit per queue. + * bits 8..15 = RXQ OCCUP, one bit per queue. + * bits 16..23 = RXQ FREE, one bit per queue. + * bit 29 = OLD_REG_SUM, see old reg ? + * bit 30 = TX_ERR_SUM, one bit for 4 ports + * bit 31 = MISC_SUM, one bit for 4 ports + */ +#define MVNETA_TX_INTR_MASK(nr_txqs) (((1 << nr_txqs) - 1) << 0) +#define MVNETA_TX_INTR_MASK_ALL (0xff << 0) +#define MVNETA_RX_INTR_MASK(nr_rxqs) (((1 << nr_rxqs) - 1) << 8) +#define MVNETA_RX_INTR_MASK_ALL (0xff << 8) +#define MVNETA_MISCINTR_INTR_MASK BIT(31) + #define MVNETA_INTR_OLD_CAUSE 0x25a8 #define MVNETA_INTR_OLD_MASK 0x25ac + +/* Data Path Port/Queue Cause Register */ #define MVNETA_INTR_MISC_CAUSE 0x25b0 #define MVNETA_INTR_MISC_MASK 0x25b4 + +#define MVNETA_CAUSE_PHY_STATUS_CHANGE BIT(0) +#define MVNETA_CAUSE_LINK_CHANGE BIT(1) +#define MVNETA_CAUSE_PTP BIT(4) + +#define MVNETA_CAUSE_INTERNAL_ADDR_ERR BIT(7) +#define MVNETA_CAUSE_RX_OVERRUN BIT(8) +#define MVNETA_CAUSE_RX_CRC_ERROR BIT(9) +#define MVNETA_CAUSE_RX_LARGE_PKT BIT(10) +#define MVNETA_CAUSE_TX_UNDERUN BIT(11) +#define MVNETA_CAUSE_PRBS_ERR BIT(12) +#define MVNETA_CAUSE_PSC_SYNC_CHANGE BIT(13) +#define MVNETA_CAUSE_SERDES_SYNC_ERR BIT(14) + +#define MVNETA_CAUSE_BMU_ALLOC_ERR_SHIFT 16 +#define MVNETA_CAUSE_BMU_ALLOC_ERR_ALL_MASK (0xF << MVNETA_CAUSE_BMU_ALLOC_ERR_SHIFT) +#define MVNETA_CAUSE_BMU_ALLOC_ERR_MASK(pool) (1 << (MVNETA_CAUSE_BMU_ALLOC_ERR_SHIFT + (pool))) + +#define MVNETA_CAUSE_TXQ_ERROR_SHIFT 24 +#define MVNETA_CAUSE_TXQ_ERROR_ALL_MASK (0xFF << MVNETA_CAUSE_TXQ_ERROR_SHIFT) +#define MVNETA_CAUSE_TXQ_ERROR_MASK(q) (1 << (MVNETA_CAUSE_TXQ_ERROR_SHIFT + (q))) + #define MVNETA_INTR_ENABLE 0x25b8 #define MVNETA_TXQ_INTR_ENABLE_ALL_MASK 0x0000ff00 -#define MVNETA_RXQ_INTR_ENABLE_ALL_MASK 0xff000000 +#define MVNETA_RXQ_INTR_ENABLE_ALL_MASK 0x000000ff + #define MVNETA_RXQ_CMD 0x2680 #define MVNETA_RXQ_DISABLE_SHIFT 8 #define MVNETA_RXQ_ENABLE_MASK 0x000000ff @@ -117,9 +199,11 @@ #define MVNETA_GMAC_CTRL_0 0x2c00 #define MVNETA_GMAC_MAX_RX_SIZE_SHIFT 2 #define MVNETA_GMAC_MAX_RX_SIZE_MASK 0x7ffc +#define MVNETA_GMAC0_PORT_1000BASE_X BIT(1) #define MVNETA_GMAC0_PORT_ENABLE BIT(0) #define MVNETA_GMAC_CTRL_2 0x2c08 -#define MVNETA_GMAC2_PSC_ENABLE BIT(3) +#define MVNETA_GMAC2_INBAND_AN_ENABLE BIT(0) +#define MVNETA_GMAC2_PCS_ENABLE BIT(3) #define MVNETA_GMAC2_PORT_RGMII BIT(4) #define MVNETA_GMAC2_PORT_RESET BIT(6) #define MVNETA_GMAC_STATUS 0x2c10 @@ -131,13 +215,25 @@ #define MVNETA_GMAC_TX_FLOW_CTRL_ENABLE BIT(5) #define MVNETA_GMAC_RX_FLOW_CTRL_ACTIVE BIT(6) #define MVNETA_GMAC_TX_FLOW_CTRL_ACTIVE BIT(7) +#define MVNETA_GMAC_AN_COMPLETE BIT(11) +#define MVNETA_GMAC_SYNC_OK BIT(14) #define MVNETA_GMAC_AUTONEG_CONFIG 0x2c0c #define MVNETA_GMAC_FORCE_LINK_DOWN BIT(0) #define MVNETA_GMAC_FORCE_LINK_PASS BIT(1) +#define MVNETA_GMAC_INBAND_AN_ENABLE BIT(2) +#define MVNETA_GMAC_AN_BYPASS_ENABLE BIT(3) +#define MVNETA_GMAC_INBAND_RESTART_AN BIT(4) #define MVNETA_GMAC_CONFIG_MII_SPEED BIT(5) #define MVNETA_GMAC_CONFIG_GMII_SPEED BIT(6) +#define MVNETA_GMAC_AN_SPEED_EN BIT(7) +#define MVNETA_GMAC_CONFIG_FLOW_CTRL BIT(8) +#define MVNETA_GMAC_ADVERT_SYM_FLOW_CTRL BIT(9) +#define MVNETA_GMAC_AN_FLOW_CTRL_EN BIT(11) #define MVNETA_GMAC_CONFIG_FULL_DUPLEX BIT(12) -#define MVNETA_MIB_COUNTERS_BASE 0x3080 +#define MVNETA_GMAC_AN_DUPLEX_EN BIT(13) +#define MVNETA_GMAC_CTRL_4 0x2c90 +#define MVNETA_GMAC4_SHORT_PREAMBLE_ENABLE BIT(1) +#define MVNETA_MIB_COUNTERS_BASE 0x3000 #define MVNETA_MIB_LATE_COLLISION 0x7c #define MVNETA_DA_FILT_SPEC_MCAST 0x3400 #define MVNETA_DA_FILT_OTH_MCAST 0x3500 @@ -148,17 +244,55 @@ #define MVNETA_TXQ_SENT_THRESH_MASK(coal) ((coal) << 16) #define MVNETA_TXQ_UPDATE_REG(q) (0x3c60 + ((q) << 2)) #define MVNETA_TXQ_DEC_SENT_SHIFT 16 +#define MVNETA_TXQ_DEC_SENT_MASK 0xff #define MVNETA_TXQ_STATUS_REG(q) (0x3c40 + ((q) << 2)) #define MVNETA_TXQ_SENT_DESC_SHIFT 16 #define MVNETA_TXQ_SENT_DESC_MASK 0x3fff0000 #define MVNETA_PORT_TX_RESET 0x3cf0 #define MVNETA_PORT_TX_DMA_RESET BIT(0) +#define MVNETA_TXQ_CMD1_REG 0x3e00 +#define MVNETA_TXQ_CMD1_BW_LIM_SEL_V1 BIT(3) +#define MVNETA_TXQ_CMD1_BW_LIM_EN BIT(0) +#define MVNETA_REFILL_NUM_CLK_REG 0x3e08 +#define MVNETA_REFILL_MAX_NUM_CLK 0x0000ffff #define MVNETA_TX_MTU 0x3e0c #define MVNETA_TX_TOKEN_SIZE 0x3e14 #define MVNETA_TX_TOKEN_SIZE_MAX 0xffffffff +#define MVNETA_TXQ_BUCKET_REFILL_REG(q) (0x3e20 + ((q) << 2)) +#define MVNETA_TXQ_BUCKET_REFILL_PERIOD_MASK 0x3ff00000 +#define MVNETA_TXQ_BUCKET_REFILL_PERIOD_SHIFT 20 +#define MVNETA_TXQ_BUCKET_REFILL_VALUE_MAX 0x0007ffff #define MVNETA_TXQ_TOKEN_SIZE_REG(q) (0x3e40 + ((q) << 2)) #define MVNETA_TXQ_TOKEN_SIZE_MAX 0x7fffffff +/* The values of the bucket refill base period and refill period are taken from + * the reference manual, and adds up to a base resolution of 10Kbps. This allows + * to cover all rate-limit values from 10Kbps up to 5Gbps + */ + +/* Base period for the rate limit algorithm */ +#define MVNETA_TXQ_BUCKET_REFILL_BASE_PERIOD_NS 100 + +/* Number of Base Period to wait between each bucket refill */ +#define MVNETA_TXQ_BUCKET_REFILL_PERIOD 1000 + +/* The base resolution for rate limiting, in bps. Any max_rate value should be + * a multiple of that value. + */ +#define MVNETA_TXQ_RATE_LIMIT_RESOLUTION (NSEC_PER_SEC / \ + (MVNETA_TXQ_BUCKET_REFILL_BASE_PERIOD_NS * \ + MVNETA_TXQ_BUCKET_REFILL_PERIOD)) + +#define MVNETA_LPI_CTRL_0 0x2cc0 +#define MVNETA_LPI_CTRL_0_TS (0xff << 8) +#define MVNETA_LPI_CTRL_1 0x2cc4 +#define MVNETA_LPI_CTRL_1_REQUEST_ENABLE BIT(0) +#define MVNETA_LPI_CTRL_1_REQUEST_FORCE BIT(1) +#define MVNETA_LPI_CTRL_1_MANUAL_MODE BIT(2) +#define MVNETA_LPI_CTRL_1_TW (0xfff << 4) +#define MVNETA_LPI_CTRL_2 0x2cc8 +#define MVNETA_LPI_STATUS 0x2ccc + #define MVNETA_CAUSE_TXQ_SENT_DESC_ALL_MASK 0xff /* Descriptor ring Macros */ @@ -168,16 +302,10 @@ /* Various constants */ /* Coalescing */ -#define MVNETA_TXDONE_COAL_PKTS 16 +#define MVNETA_TXDONE_COAL_PKTS 0 /* interrupt per packet */ #define MVNETA_RX_COAL_PKTS 32 #define MVNETA_RX_COAL_USEC 100 -/* Timer */ -#define MVNETA_TX_DONE_TIMER_PERIOD 10 - -/* Napi polling weight */ -#define MVNETA_RX_POLL_WEIGHT 64 - /* The two bytes Marvell header. Either contains a special value used * by Marvell switches when a specific hardware mode is enabled (not * supported by this driver) or is filled automatically by zeroes on @@ -190,9 +318,12 @@ #define MVNETA_VLAN_TAG_LEN 4 -#define MVNETA_CPU_D_CACHE_LINE_SIZE 32 +#define MVNETA_TX_CSUM_DEF_SIZE 1600 #define MVNETA_TX_CSUM_MAX_SIZE 9800 -#define MVNETA_ACC_MODE_EXT 1 +#define MVNETA_ACC_MODE_EXT1 1 +#define MVNETA_ACC_MODE_EXT2 2 + +#define MVNETA_MAX_DECODE_WIN 6 /* Timeout constants */ #define MVNETA_TX_DISABLE_TIMEOUT_MSEC 1000 @@ -201,69 +332,235 @@ #define MVNETA_TX_MTU_MAX 0x3ffff +/* The RSS lookup table actually has 256 entries but we do not use + * them yet + */ +#define MVNETA_RSS_LU_TABLE_SIZE 1 + /* Max number of Rx descriptors */ -#define MVNETA_MAX_RXD 128 +#define MVNETA_MAX_RXD 512 /* Max number of Tx descriptors */ -#define MVNETA_MAX_TXD 532 +#define MVNETA_MAX_TXD 1024 + +/* Max number of allowed TCP segments for software TSO */ +#define MVNETA_MAX_TSO_SEGS 100 + +#define MVNETA_MAX_SKB_DESCS (MVNETA_MAX_TSO_SEGS * 2 + MAX_SKB_FRAGS) + +/* The size of a TSO header page */ +#define MVNETA_TSO_PAGE_SIZE (2 * PAGE_SIZE) + +/* Number of TSO headers per page. This should be a power of 2 */ +#define MVNETA_TSO_PER_PAGE (MVNETA_TSO_PAGE_SIZE / TSO_HEADER_SIZE) + +/* Maximum number of TSO header pages */ +#define MVNETA_MAX_TSO_PAGES (MVNETA_MAX_TXD / MVNETA_TSO_PER_PAGE) /* descriptor aligned size */ #define MVNETA_DESC_ALIGNED_SIZE 32 +/* Number of bytes to be taken into account by HW when putting incoming data + * to the buffers. It is needed in case NET_SKB_PAD exceeds maximum packet + * offset supported in MVNETA_RXQ_CONFIG_REG(q) registers. + */ +#define MVNETA_RX_PKT_OFFSET_CORRECTION 64 + #define MVNETA_RX_PKT_SIZE(mtu) \ ALIGN((mtu) + MVNETA_MH_SIZE + MVNETA_VLAN_TAG_LEN + \ ETH_HLEN + ETH_FCS_LEN, \ - MVNETA_CPU_D_CACHE_LINE_SIZE) + cache_line_size()) + +/* Driver assumes that the last 3 bits are 0 */ +#define MVNETA_SKB_HEADROOM ALIGN(max(NET_SKB_PAD, XDP_PACKET_HEADROOM), 8) +#define MVNETA_SKB_PAD (SKB_DATA_ALIGN(sizeof(struct skb_shared_info) + \ + MVNETA_SKB_HEADROOM)) +#define MVNETA_MAX_RX_BUF_SIZE (PAGE_SIZE - MVNETA_SKB_PAD) + +#define MVNETA_RX_GET_BM_POOL_ID(rxd) \ + (((rxd)->status & MVNETA_RXD_BM_POOL_MASK) >> MVNETA_RXD_BM_POOL_SHIFT) + +enum { + ETHTOOL_STAT_EEE_WAKEUP, + ETHTOOL_STAT_SKB_ALLOC_ERR, + ETHTOOL_STAT_REFILL_ERR, + ETHTOOL_XDP_REDIRECT, + ETHTOOL_XDP_PASS, + ETHTOOL_XDP_DROP, + ETHTOOL_XDP_TX, + ETHTOOL_XDP_TX_ERR, + ETHTOOL_XDP_XMIT, + ETHTOOL_XDP_XMIT_ERR, + ETHTOOL_MAX_STATS, +}; + +struct mvneta_statistic { + unsigned short offset; + unsigned short type; + const char name[ETH_GSTRING_LEN]; +}; -#define MVNETA_RX_BUF_SIZE(pkt_size) ((pkt_size) + NET_SKB_PAD) +#define T_REG_32 32 +#define T_REG_64 64 +#define T_SW 1 + +#define MVNETA_XDP_PASS 0 +#define MVNETA_XDP_DROPPED BIT(0) +#define MVNETA_XDP_TX BIT(1) +#define MVNETA_XDP_REDIR BIT(2) + +static const struct mvneta_statistic mvneta_statistics[] = { + { 0x3000, T_REG_64, "good_octets_received", }, + { 0x3010, T_REG_32, "good_frames_received", }, + { 0x3008, T_REG_32, "bad_octets_received", }, + { 0x3014, T_REG_32, "bad_frames_received", }, + { 0x3018, T_REG_32, "broadcast_frames_received", }, + { 0x301c, T_REG_32, "multicast_frames_received", }, + { 0x3050, T_REG_32, "unrec_mac_control_received", }, + { 0x3058, T_REG_32, "good_fc_received", }, + { 0x305c, T_REG_32, "bad_fc_received", }, + { 0x3060, T_REG_32, "undersize_received", }, + { 0x3064, T_REG_32, "fragments_received", }, + { 0x3068, T_REG_32, "oversize_received", }, + { 0x306c, T_REG_32, "jabber_received", }, + { 0x3070, T_REG_32, "mac_receive_error", }, + { 0x3074, T_REG_32, "bad_crc_event", }, + { 0x3078, T_REG_32, "collision", }, + { 0x307c, T_REG_32, "late_collision", }, + { 0x2484, T_REG_32, "rx_discard", }, + { 0x2488, T_REG_32, "rx_overrun", }, + { 0x3020, T_REG_32, "frames_64_octets", }, + { 0x3024, T_REG_32, "frames_65_to_127_octets", }, + { 0x3028, T_REG_32, "frames_128_to_255_octets", }, + { 0x302c, T_REG_32, "frames_256_to_511_octets", }, + { 0x3030, T_REG_32, "frames_512_to_1023_octets", }, + { 0x3034, T_REG_32, "frames_1024_to_max_octets", }, + { 0x3038, T_REG_64, "good_octets_sent", }, + { 0x3040, T_REG_32, "good_frames_sent", }, + { 0x3044, T_REG_32, "excessive_collision", }, + { 0x3048, T_REG_32, "multicast_frames_sent", }, + { 0x304c, T_REG_32, "broadcast_frames_sent", }, + { 0x3054, T_REG_32, "fc_sent", }, + { 0x300c, T_REG_32, "internal_mac_transmit_err", }, + { ETHTOOL_STAT_EEE_WAKEUP, T_SW, "eee_wakeup_errors", }, + { ETHTOOL_STAT_SKB_ALLOC_ERR, T_SW, "skb_alloc_errors", }, + { ETHTOOL_STAT_REFILL_ERR, T_SW, "refill_errors", }, + { ETHTOOL_XDP_REDIRECT, T_SW, "rx_xdp_redirect", }, + { ETHTOOL_XDP_PASS, T_SW, "rx_xdp_pass", }, + { ETHTOOL_XDP_DROP, T_SW, "rx_xdp_drop", }, + { ETHTOOL_XDP_TX, T_SW, "rx_xdp_tx", }, + { ETHTOOL_XDP_TX_ERR, T_SW, "rx_xdp_tx_errors", }, + { ETHTOOL_XDP_XMIT, T_SW, "tx_xdp_xmit", }, + { ETHTOOL_XDP_XMIT_ERR, T_SW, "tx_xdp_xmit_errors", }, +}; struct mvneta_stats { - struct u64_stats_sync syncp; - u64 packets; - u64 bytes; + u64 rx_packets; + u64 rx_bytes; + u64 tx_packets; + u64 tx_bytes; + /* xdp */ + u64 xdp_redirect; + u64 xdp_pass; + u64 xdp_drop; + u64 xdp_xmit; + u64 xdp_xmit_err; + u64 xdp_tx; + u64 xdp_tx_err; +}; + +struct mvneta_ethtool_stats { + struct mvneta_stats ps; + u64 skb_alloc_error; + u64 refill_error; +}; + +struct mvneta_pcpu_stats { + struct u64_stats_sync syncp; + + struct mvneta_ethtool_stats es; + u64 rx_dropped; + u64 rx_errors; +}; + +struct mvneta_pcpu_port { + /* Pointer to the shared port */ + struct mvneta_port *pp; + + /* Pointer to the CPU-local NAPI struct */ + struct napi_struct napi; + + /* Cause of the previous interrupt */ + u32 cause_rx_tx; +}; + +enum { + __MVNETA_DOWN, }; struct mvneta_port { + u8 id; + struct mvneta_pcpu_port __percpu *ports; + struct mvneta_pcpu_stats __percpu *stats; + + unsigned long state; + int pkt_size; void __iomem *base; struct mvneta_rx_queue *rxqs; struct mvneta_tx_queue *txqs; - struct timer_list tx_done_timer; struct net_device *dev; + struct hlist_node node_online; + struct hlist_node node_dead; + int rxq_def; + /* Protect the access to the percpu interrupt registers, + * ensuring that the configuration remains coherent. + */ + spinlock_t lock; + bool is_stopped; u32 cause_rx_tx; struct napi_struct napi; - /* Flags */ - unsigned long flags; -#define MVNETA_F_TX_DONE_TIMER_BIT 0 - - /* Napi weight */ - int weight; + struct bpf_prog *xdp_prog; /* Core clock */ struct clk *clk; + /* AXI clock */ + struct clk *clk_bus; u8 mcast_count[256]; u16 tx_ring_size; u16 rx_ring_size; - struct mvneta_stats tx_stats; - struct mvneta_stats rx_stats; - struct mii_bus *mii_bus; - struct phy_device *phy_dev; phy_interface_t phy_interface; - struct device_node *phy_node; - unsigned int link; - unsigned int duplex; - unsigned int speed; + struct device_node *dn; + unsigned int tx_csum_limit; + struct phylink *phylink; + struct phylink_config phylink_config; + struct phylink_pcs phylink_pcs; + struct phy *comphy; + + struct mvneta_bm *bm_priv; + struct mvneta_bm_pool *pool_long; + struct mvneta_bm_pool *pool_short; + int bm_win_id; + + u64 ethtool_stats[ARRAY_SIZE(mvneta_statistics)]; + + u32 indir[MVNETA_RSS_LU_TABLE_SIZE]; + + /* Flags for special SoC configurations */ + bool neta_armada3700; + bool neta_ac5; + u16 rx_offset_correction; + const struct mbus_dram_target_info *dram_target_info; }; /* The mvneta_tx_desc and mvneta_rx_desc structures describe the * layout of the transmit and reception DMA descriptors, and their * layout is therefore defined by the hardware design */ -struct mvneta_tx_desc { - u32 command; /* Options used by HW for packet transmitting.*/ + #define MVNETA_TX_L3_OFF_SHIFT 0 #define MVNETA_TX_IP_HLEN_SHIFT 8 #define MVNETA_TX_L4_UDP BIT(16) @@ -278,35 +575,87 @@ struct mvneta_tx_desc { #define MVNETA_TX_L4_CSUM_FULL BIT(30) #define MVNETA_TX_L4_CSUM_NOT BIT(31) - u16 reserverd1; /* csum_l4 (for future use) */ - u16 data_size; /* Data size of transmitted packet in bytes */ - u32 buf_phys_addr; /* Physical addr of transmitted buffer */ - u32 reserved2; /* hw_cmd - (for future use, PMT) */ - u32 reserved3[4]; /* Reserved - (for future use) */ -}; - -struct mvneta_rx_desc { - u32 status; /* Info about received packet */ #define MVNETA_RXD_ERR_CRC 0x0 +#define MVNETA_RXD_BM_POOL_SHIFT 13 +#define MVNETA_RXD_BM_POOL_MASK (BIT(13) | BIT(14)) #define MVNETA_RXD_ERR_SUMMARY BIT(16) #define MVNETA_RXD_ERR_OVERRUN BIT(17) #define MVNETA_RXD_ERR_LEN BIT(18) #define MVNETA_RXD_ERR_RESOURCE (BIT(17) | BIT(18)) #define MVNETA_RXD_ERR_CODE_MASK (BIT(17) | BIT(18)) #define MVNETA_RXD_L3_IP4 BIT(25) -#define MVNETA_RXD_FIRST_LAST_DESC (BIT(26) | BIT(27)) +#define MVNETA_RXD_LAST_DESC BIT(26) +#define MVNETA_RXD_FIRST_DESC BIT(27) +#define MVNETA_RXD_FIRST_LAST_DESC (MVNETA_RXD_FIRST_DESC | \ + MVNETA_RXD_LAST_DESC) #define MVNETA_RXD_L4_CSUM_OK BIT(30) +#if defined(__LITTLE_ENDIAN) +struct mvneta_tx_desc { + u32 command; /* Options used by HW for packet transmitting.*/ + u16 reserved1; /* csum_l4 (for future use) */ + u16 data_size; /* Data size of transmitted packet in bytes */ + u32 buf_phys_addr; /* Physical addr of transmitted buffer */ + u32 reserved2; /* hw_cmd - (for future use, PMT) */ + u32 reserved3[4]; /* Reserved - (for future use) */ +}; + +struct mvneta_rx_desc { + u32 status; /* Info about received packet */ u16 reserved1; /* pnc_info - (for future use, PnC) */ u16 data_size; /* Size of received packet in bytes */ + u32 buf_phys_addr; /* Physical address of the buffer */ u32 reserved2; /* pnc_flow_id (for future use, PnC) */ + u32 buf_cookie; /* cookie for access to RX buffer in rx path */ u16 reserved3; /* prefetch_cmd, for future use */ u16 reserved4; /* csum_l4 - (for future use, PnC) */ + + u32 reserved5; /* pnc_extra PnC (for future use, PnC) */ + u32 reserved6; /* hw_cmd (for future use, PnC and HWF) */ +}; +#else +struct mvneta_tx_desc { + u16 data_size; /* Data size of transmitted packet in bytes */ + u16 reserved1; /* csum_l4 (for future use) */ + u32 command; /* Options used by HW for packet transmitting.*/ + u32 reserved2; /* hw_cmd - (for future use, PMT) */ + u32 buf_phys_addr; /* Physical addr of transmitted buffer */ + u32 reserved3[4]; /* Reserved - (for future use) */ +}; + +struct mvneta_rx_desc { + u16 data_size; /* Size of received packet in bytes */ + u16 reserved1; /* pnc_info - (for future use, PnC) */ + u32 status; /* Info about received packet */ + + u32 reserved2; /* pnc_flow_id (for future use, PnC) */ + u32 buf_phys_addr; /* Physical address of the buffer */ + + u16 reserved4; /* csum_l4 - (for future use, PnC) */ + u16 reserved3; /* prefetch_cmd, for future use */ + u32 buf_cookie; /* cookie for access to RX buffer in rx path */ + u32 reserved5; /* pnc_extra PnC (for future use, PnC) */ u32 reserved6; /* hw_cmd (for future use, PnC and HWF) */ }; +#endif + +enum mvneta_tx_buf_type { + MVNETA_TYPE_TSO, + MVNETA_TYPE_SKB, + MVNETA_TYPE_XDP_TX, + MVNETA_TYPE_XDP_NDO, +}; + +struct mvneta_tx_buf { + enum mvneta_tx_buf_type type; + union { + struct xdp_frame *xdpf; + struct sk_buff *skb; + }; +}; struct mvneta_tx_queue { /* Number of this TX queue, in the range 0-7 */ @@ -319,9 +668,12 @@ struct mvneta_tx_queue { * descriptor ring */ int count; + int pending; + int tx_stop_threshold; + int tx_wake_threshold; - /* Array of transmitted skb */ - struct sk_buff **tx_skb; + /* Array of transmitted buffers */ + struct mvneta_tx_buf *buf; /* Index of last TX DMA descriptor that was inserted */ int txq_put_index; @@ -342,6 +694,15 @@ struct mvneta_tx_queue { /* Index of the next TX DMA descriptor to process */ int next_desc_to_proc; + + /* DMA buffers for TSO headers */ + char *tso_hdrs[MVNETA_MAX_TSO_PAGES]; + + /* DMA address of TSO headers */ + dma_addr_t tso_hdrs_phys[MVNETA_MAX_TSO_PAGES]; + + /* Affinity mask for CPUs*/ + cpumask_t affinity_mask; }; struct mvneta_rx_queue { @@ -351,12 +712,16 @@ struct mvneta_rx_queue { /* num of rx descriptors in the rx descriptor ring */ int size; - /* counter of times when mvneta_refill() failed */ - int missed; - u32 pkts_coal; u32 time_coal; + /* page_pool */ + struct page_pool *page_pool; + struct xdp_rxq_info xdp_rxq; + + /* Virtual address of the RX buffer */ + void **buf_virt_addr; + /* Virtual address of the RX DMA descriptors array */ struct mvneta_rx_desc *descs; @@ -368,13 +733,26 @@ struct mvneta_rx_queue { /* Index of the next RX DMA descriptor to process */ int next_desc_to_proc; + + /* Index of first RX DMA descriptor to refill */ + int first_to_refill; + u32 refill_num; }; +static enum cpuhp_state online_hpstate; +/* The hardware supports eight (8) rx queues, but we are only allowing + * the first one to be used. Therefore, let's just allocate one queue. + */ static int rxq_number = 8; static int txq_number = 8; static int rxq_def; +static int rx_copybreak __read_mostly = 256; + +/* HW BM need that each port be identify by a unique ID */ +static int global_port_id; + #define MVNETA_DRIVER_NAME "mvneta" #define MVNETA_DRIVER_VERSION "1.0" @@ -413,53 +791,64 @@ static void mvneta_txq_inc_put(struct mvneta_tx_queue *txq) static void mvneta_mib_counters_clear(struct mvneta_port *pp) { int i; - u32 dummy; /* Perform dummy reads from MIB counters */ for (i = 0; i < MVNETA_MIB_LATE_COLLISION; i += 4) - dummy = mvreg_read(pp, (MVNETA_MIB_COUNTERS_BASE + i)); + mvreg_read(pp, (MVNETA_MIB_COUNTERS_BASE + i)); + mvreg_read(pp, MVNETA_RX_DISCARD_FRAME_COUNT); + mvreg_read(pp, MVNETA_OVERRUN_FRAME_COUNT); } /* Get System Network Statistics */ -struct rtnl_link_stats64 *mvneta_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *stats) +static void +mvneta_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) { struct mvneta_port *pp = netdev_priv(dev); unsigned int start; + int cpu; - memset(stats, 0, sizeof(struct rtnl_link_stats64)); - - do { - start = u64_stats_fetch_begin_bh(&pp->rx_stats.syncp); - stats->rx_packets = pp->rx_stats.packets; - stats->rx_bytes = pp->rx_stats.bytes; - } while (u64_stats_fetch_retry_bh(&pp->rx_stats.syncp, start)); - - - do { - start = u64_stats_fetch_begin_bh(&pp->tx_stats.syncp); - stats->tx_packets = pp->tx_stats.packets; - stats->tx_bytes = pp->tx_stats.bytes; - } while (u64_stats_fetch_retry_bh(&pp->tx_stats.syncp, start)); - - stats->rx_errors = dev->stats.rx_errors; - stats->rx_dropped = dev->stats.rx_dropped; + for_each_possible_cpu(cpu) { + struct mvneta_pcpu_stats *cpu_stats; + u64 rx_packets; + u64 rx_bytes; + u64 rx_dropped; + u64 rx_errors; + u64 tx_packets; + u64 tx_bytes; + + cpu_stats = per_cpu_ptr(pp->stats, cpu); + do { + start = u64_stats_fetch_begin(&cpu_stats->syncp); + rx_packets = cpu_stats->es.ps.rx_packets; + rx_bytes = cpu_stats->es.ps.rx_bytes; + rx_dropped = cpu_stats->rx_dropped; + rx_errors = cpu_stats->rx_errors; + tx_packets = cpu_stats->es.ps.tx_packets; + tx_bytes = cpu_stats->es.ps.tx_bytes; + } while (u64_stats_fetch_retry(&cpu_stats->syncp, start)); + + stats->rx_packets += rx_packets; + stats->rx_bytes += rx_bytes; + stats->rx_dropped += rx_dropped; + stats->rx_errors += rx_errors; + stats->tx_packets += tx_packets; + stats->tx_bytes += tx_bytes; + } stats->tx_dropped = dev->stats.tx_dropped; - - return stats; } /* Rx descriptors helper methods */ -/* Checks whether the given RX descriptor is both the first and the - * last descriptor for the RX packet. Each RX packet is currently +/* Checks whether the RX descriptor having this status is both the first + * and the last descriptor for the RX packet. Each RX packet is currently * received through a single RX descriptor, so not having each RX * descriptor with its first and last bits set is an error */ -static int mvneta_rxq_desc_is_first_last(struct mvneta_rx_desc *desc) +static int mvneta_rxq_desc_is_first_last(u32 status) { - return (desc->status & MVNETA_RXD_FIRST_LAST_DESC) == + return (status & MVNETA_RXD_FIRST_LAST_DESC) == MVNETA_RXD_FIRST_LAST_DESC; } @@ -535,6 +924,7 @@ mvneta_rxq_next_desc_get(struct mvneta_rx_queue *rxq) int rx_desc = rxq->next_desc_to_proc; rxq->next_desc_to_proc = MVNETA_QUEUE_NEXT_DESC(rxq, rx_desc); + prefetch(rxq->descs + rxq->next_desc_to_proc); return rxq->descs + rx_desc; } @@ -576,11 +966,15 @@ static void mvneta_txq_pend_desc_add(struct mvneta_port *pp, { u32 val; - /* Only 255 descriptors can be added at once ; Assume caller - * process TX desriptors in quanta less than 256 - */ - val = pend_desc; - mvreg_write(pp, MVNETA_TXQ_UPDATE_REG(txq->id), val); + pend_desc += txq->pending; + + /* Only 255 Tx descriptors can be added at once */ + do { + val = min(pend_desc, 255); + mvreg_write(pp, MVNETA_TXQ_UPDATE_REG(txq->id), val); + pend_desc -= val; + } while (pend_desc > 0); + txq->pending = 0; } /* Get pointer to next TX descriptor to be processed (send) by HW */ @@ -630,31 +1024,228 @@ static void mvneta_rxq_bm_disable(struct mvneta_port *pp, mvreg_write(pp, MVNETA_RXQ_CONFIG_REG(rxq->id), val); } +/* Enable buffer management (BM) */ +static void mvneta_rxq_bm_enable(struct mvneta_port *pp, + struct mvneta_rx_queue *rxq) +{ + u32 val; + val = mvreg_read(pp, MVNETA_RXQ_CONFIG_REG(rxq->id)); + val |= MVNETA_RXQ_HW_BUF_ALLOC; + mvreg_write(pp, MVNETA_RXQ_CONFIG_REG(rxq->id), val); +} -/* Sets the RGMII Enable bit (RGMIIEn) in port MAC control register */ -static void mvneta_gmac_rgmii_set(struct mvneta_port *pp, int enable) +/* Notify HW about port's assignment of pool for bigger packets */ +static void mvneta_rxq_long_pool_set(struct mvneta_port *pp, + struct mvneta_rx_queue *rxq) { - u32 val; + u32 val; - val = mvreg_read(pp, MVNETA_GMAC_CTRL_2); + val = mvreg_read(pp, MVNETA_RXQ_CONFIG_REG(rxq->id)); + val &= ~MVNETA_RXQ_LONG_POOL_ID_MASK; + val |= (pp->pool_long->id << MVNETA_RXQ_LONG_POOL_ID_SHIFT); - if (enable) - val |= MVNETA_GMAC2_PORT_RGMII; - else - val &= ~MVNETA_GMAC2_PORT_RGMII; + mvreg_write(pp, MVNETA_RXQ_CONFIG_REG(rxq->id), val); +} + +/* Notify HW about port's assignment of pool for smaller packets */ +static void mvneta_rxq_short_pool_set(struct mvneta_port *pp, + struct mvneta_rx_queue *rxq) +{ + u32 val; + + val = mvreg_read(pp, MVNETA_RXQ_CONFIG_REG(rxq->id)); + val &= ~MVNETA_RXQ_SHORT_POOL_ID_MASK; + val |= (pp->pool_short->id << MVNETA_RXQ_SHORT_POOL_ID_SHIFT); - mvreg_write(pp, MVNETA_GMAC_CTRL_2, val); + mvreg_write(pp, MVNETA_RXQ_CONFIG_REG(rxq->id), val); } -/* Config SGMII port */ -static void mvneta_port_sgmii_config(struct mvneta_port *pp) +/* Set port's receive buffer size for assigned BM pool */ +static inline void mvneta_bm_pool_bufsize_set(struct mvneta_port *pp, + int buf_size, + u8 pool_id) { u32 val; - val = mvreg_read(pp, MVNETA_GMAC_CTRL_2); - val |= MVNETA_GMAC2_PSC_ENABLE; - mvreg_write(pp, MVNETA_GMAC_CTRL_2, val); + if (!IS_ALIGNED(buf_size, 8)) { + dev_warn(pp->dev->dev.parent, + "illegal buf_size value %d, round to %d\n", + buf_size, ALIGN(buf_size, 8)); + buf_size = ALIGN(buf_size, 8); + } + + val = mvreg_read(pp, MVNETA_PORT_POOL_BUFFER_SZ_REG(pool_id)); + val |= buf_size & MVNETA_PORT_POOL_BUFFER_SZ_MASK; + mvreg_write(pp, MVNETA_PORT_POOL_BUFFER_SZ_REG(pool_id), val); +} + +/* Configure MBUS window in order to enable access BM internal SRAM */ +static int mvneta_mbus_io_win_set(struct mvneta_port *pp, u32 base, u32 wsize, + u8 target, u8 attr) +{ + u32 win_enable, win_protect; + int i; + + win_enable = mvreg_read(pp, MVNETA_BASE_ADDR_ENABLE); + + if (pp->bm_win_id < 0) { + /* Find first not occupied window */ + for (i = 0; i < MVNETA_MAX_DECODE_WIN; i++) { + if (win_enable & (1 << i)) { + pp->bm_win_id = i; + break; + } + } + if (i == MVNETA_MAX_DECODE_WIN) + return -ENOMEM; + } else { + i = pp->bm_win_id; + } + + mvreg_write(pp, MVNETA_WIN_BASE(i), 0); + mvreg_write(pp, MVNETA_WIN_SIZE(i), 0); + + if (i < 4) + mvreg_write(pp, MVNETA_WIN_REMAP(i), 0); + + mvreg_write(pp, MVNETA_WIN_BASE(i), (base & 0xffff0000) | + (attr << 8) | target); + + mvreg_write(pp, MVNETA_WIN_SIZE(i), (wsize - 1) & 0xffff0000); + + win_protect = mvreg_read(pp, MVNETA_ACCESS_PROTECT_ENABLE); + win_protect |= 3 << (2 * i); + mvreg_write(pp, MVNETA_ACCESS_PROTECT_ENABLE, win_protect); + + win_enable &= ~(1 << i); + mvreg_write(pp, MVNETA_BASE_ADDR_ENABLE, win_enable); + + return 0; +} + +static int mvneta_bm_port_mbus_init(struct mvneta_port *pp) +{ + u32 wsize; + u8 target, attr; + int err; + + /* Get BM window information */ + err = mvebu_mbus_get_io_win_info(pp->bm_priv->bppi_phys_addr, &wsize, + &target, &attr); + if (err < 0) + return err; + + pp->bm_win_id = -1; + + /* Open NETA -> BM window */ + err = mvneta_mbus_io_win_set(pp, pp->bm_priv->bppi_phys_addr, wsize, + target, attr); + if (err < 0) { + netdev_info(pp->dev, "fail to configure mbus window to BM\n"); + return err; + } + return 0; +} + +/* Assign and initialize pools for port. In case of fail + * buffer manager will remain disabled for current port. + */ +static int mvneta_bm_port_init(struct platform_device *pdev, + struct mvneta_port *pp) +{ + struct device_node *dn = pdev->dev.of_node; + u32 long_pool_id, short_pool_id; + + if (!pp->neta_armada3700) { + int ret; + + ret = mvneta_bm_port_mbus_init(pp); + if (ret) + return ret; + } + + if (of_property_read_u32(dn, "bm,pool-long", &long_pool_id)) { + netdev_info(pp->dev, "missing long pool id\n"); + return -EINVAL; + } + + /* Create port's long pool depending on mtu */ + pp->pool_long = mvneta_bm_pool_use(pp->bm_priv, long_pool_id, + MVNETA_BM_LONG, pp->id, + MVNETA_RX_PKT_SIZE(pp->dev->mtu)); + if (!pp->pool_long) { + netdev_info(pp->dev, "fail to obtain long pool for port\n"); + return -ENOMEM; + } + + pp->pool_long->port_map |= 1 << pp->id; + + mvneta_bm_pool_bufsize_set(pp, pp->pool_long->buf_size, + pp->pool_long->id); + + /* If short pool id is not defined, assume using single pool */ + if (of_property_read_u32(dn, "bm,pool-short", &short_pool_id)) + short_pool_id = long_pool_id; + + /* Create port's short pool */ + pp->pool_short = mvneta_bm_pool_use(pp->bm_priv, short_pool_id, + MVNETA_BM_SHORT, pp->id, + MVNETA_BM_SHORT_PKT_SIZE); + if (!pp->pool_short) { + netdev_info(pp->dev, "fail to obtain short pool for port\n"); + mvneta_bm_pool_destroy(pp->bm_priv, pp->pool_long, 1 << pp->id); + return -ENOMEM; + } + + if (short_pool_id != long_pool_id) { + pp->pool_short->port_map |= 1 << pp->id; + mvneta_bm_pool_bufsize_set(pp, pp->pool_short->buf_size, + pp->pool_short->id); + } + + return 0; +} + +/* Update settings of a pool for bigger packets */ +static void mvneta_bm_update_mtu(struct mvneta_port *pp, int mtu) +{ + struct mvneta_bm_pool *bm_pool = pp->pool_long; + struct hwbm_pool *hwbm_pool = &bm_pool->hwbm_pool; + int num; + + /* Release all buffers from long pool */ + mvneta_bm_bufs_free(pp->bm_priv, bm_pool, 1 << pp->id); + if (hwbm_pool->buf_num) { + WARN(1, "cannot free all buffers in pool %d\n", + bm_pool->id); + goto bm_mtu_err; + } + + bm_pool->pkt_size = MVNETA_RX_PKT_SIZE(mtu); + bm_pool->buf_size = MVNETA_RX_BUF_SIZE(bm_pool->pkt_size); + hwbm_pool->frag_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + + SKB_DATA_ALIGN(MVNETA_RX_BUF_SIZE(bm_pool->pkt_size)); + + /* Fill entire long pool */ + num = hwbm_pool_add(hwbm_pool, hwbm_pool->size); + if (num != hwbm_pool->size) { + WARN(1, "pool %d: %d of %d allocated\n", + bm_pool->id, num, hwbm_pool->size); + goto bm_mtu_err; + } + mvneta_bm_pool_bufsize_set(pp, bm_pool->buf_size, bm_pool->id); + + return; + +bm_mtu_err: + mvneta_bm_pool_destroy(pp->bm_priv, pp->pool_long, 1 << pp->id); + mvneta_bm_pool_destroy(pp->bm_priv, pp->pool_short, 1 << pp->id); + + pp->bm_priv = NULL; + pp->rx_offset_correction = MVNETA_SKB_HEADROOM; + mvreg_write(pp, MVNETA_ACC_MODE, MVNETA_ACC_MODE_EXT1); + netdev_info(pp->dev, "fail to update MTU, fall back to software BM\n"); } /* Start the Ethernet port RX and TX activity */ @@ -664,23 +1255,22 @@ static void mvneta_port_up(struct mvneta_port *pp) u32 q_map; /* Enable all initialized TXs. */ - mvneta_mib_counters_clear(pp); q_map = 0; for (queue = 0; queue < txq_number; queue++) { struct mvneta_tx_queue *txq = &pp->txqs[queue]; - if (txq->descs != NULL) + if (txq->descs) q_map |= (1 << queue); } mvreg_write(pp, MVNETA_TXQ_CMD, q_map); - /* Enable all initialized RXQs. */ q_map = 0; + /* Enable all initialized RXQs. */ for (queue = 0; queue < rxq_number; queue++) { struct mvneta_rx_queue *rxq = &pp->rxqs[queue]; - if (rxq->descs != NULL) + + if (rxq->descs) q_map |= (1 << queue); } - mvreg_write(pp, MVNETA_RXQ_CMD, q_map); } @@ -703,14 +1293,14 @@ static void mvneta_port_down(struct mvneta_port *pp) do { if (count++ >= MVNETA_RX_DISABLE_TIMEOUT_MSEC) { netdev_warn(pp->dev, - "TIMEOUT for RX stopped ! rx_queue_cmd: 0x08%x\n", + "TIMEOUT for RX stopped ! rx_queue_cmd: 0x%08x\n", val); break; } mdelay(1); val = mvreg_read(pp, MVNETA_RXQ_CMD); - } while (val & 0xff); + } while (val & MVNETA_RXQ_ENABLE_MASK); /* Stop Tx port activity. Check port Tx activity. Issue stop * command for active channels only @@ -735,14 +1325,14 @@ static void mvneta_port_down(struct mvneta_port *pp) /* Check TX Command reg that all Txqs are stopped */ val = mvreg_read(pp, MVNETA_TXQ_CMD); - } while (val & 0xff); + } while (val & MVNETA_TXQ_ENABLE_MASK); /* Double check to verify that TX FIFO is empty */ count = 0; do { if (count++ >= MVNETA_TX_FIFO_EMPTY_TIMEOUT) { netdev_warn(pp->dev, - "TX FIFO empty timeout status=0x08%x\n", + "TX FIFO empty timeout status=0x%08x\n", val); break; } @@ -835,6 +1425,43 @@ static void mvneta_set_other_mcast_table(struct mvneta_port *pp, int queue) mvreg_write(pp, MVNETA_DA_FILT_OTH_MCAST + offset, val); } +static void mvneta_percpu_unmask_interrupt(void *arg) +{ + struct mvneta_port *pp = arg; + + /* All the queue are unmasked, but actually only the ones + * mapped to this CPU will be unmasked + */ + mvreg_write(pp, MVNETA_INTR_NEW_MASK, + MVNETA_RX_INTR_MASK_ALL | + MVNETA_TX_INTR_MASK_ALL | + MVNETA_MISCINTR_INTR_MASK); +} + +static void mvneta_percpu_mask_interrupt(void *arg) +{ + struct mvneta_port *pp = arg; + + /* All the queue are masked, but actually only the ones + * mapped to this CPU will be masked + */ + mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0); + mvreg_write(pp, MVNETA_INTR_OLD_MASK, 0); + mvreg_write(pp, MVNETA_INTR_MISC_MASK, 0); +} + +static void mvneta_percpu_clear_intr_cause(void *arg) +{ + struct mvneta_port *pp = arg; + + /* All the queue are cleared, but actually only the ones + * mapped to this CPU will be cleared + */ + mvreg_write(pp, MVNETA_INTR_NEW_CAUSE, 0); + mvreg_write(pp, MVNETA_INTR_MISC_CAUSE, 0); + mvreg_write(pp, MVNETA_INTR_OLD_CAUSE, 0); +} + /* This method sets defaults to the NETA port: * Clears interrupt Cause and Mask registers. * Clears all MAC tables. @@ -849,28 +1476,50 @@ static void mvneta_defaults_set(struct mvneta_port *pp) int cpu; int queue; u32 val; + int max_cpu = num_present_cpus(); /* Clear all Cause registers */ - mvreg_write(pp, MVNETA_INTR_NEW_CAUSE, 0); - mvreg_write(pp, MVNETA_INTR_OLD_CAUSE, 0); - mvreg_write(pp, MVNETA_INTR_MISC_CAUSE, 0); + on_each_cpu(mvneta_percpu_clear_intr_cause, pp, true); /* Mask all interrupts */ - mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0); - mvreg_write(pp, MVNETA_INTR_OLD_MASK, 0); - mvreg_write(pp, MVNETA_INTR_MISC_MASK, 0); + on_each_cpu(mvneta_percpu_mask_interrupt, pp, true); mvreg_write(pp, MVNETA_INTR_ENABLE, 0); /* Enable MBUS Retry bit16 */ mvreg_write(pp, MVNETA_MBUS_RETRY, 0x20); - /* Set CPU queue access map - all CPUs have access to all RX - * queues and to all TX queues + /* Set CPU queue access map. CPUs are assigned to the RX and + * TX queues modulo their number. If there is only one TX + * queue then it is assigned to the CPU associated to the + * default RX queue. */ - for (cpu = 0; cpu < CONFIG_NR_CPUS; cpu++) - mvreg_write(pp, MVNETA_CPU_MAP(cpu), - (MVNETA_CPU_RXQ_ACCESS_ALL_MASK | - MVNETA_CPU_TXQ_ACCESS_ALL_MASK)); + for_each_present_cpu(cpu) { + int rxq_map = 0, txq_map = 0; + int rxq, txq; + if (!pp->neta_armada3700) { + for (rxq = 0; rxq < rxq_number; rxq++) + if ((rxq % max_cpu) == cpu) + rxq_map |= MVNETA_CPU_RXQ_ACCESS(rxq); + + for (txq = 0; txq < txq_number; txq++) + if ((txq % max_cpu) == cpu) + txq_map |= MVNETA_CPU_TXQ_ACCESS(txq); + + /* With only one TX queue we configure a special case + * which will allow to get all the irq on a single + * CPU + */ + if (txq_number == 1) + txq_map = (cpu == pp->rxq_def) ? + MVNETA_CPU_TXQ_ACCESS(0) : 0; + + } else { + txq_map = MVNETA_CPU_TXQ_ACCESS_ALL_MASK; + rxq_map = MVNETA_CPU_RXQ_ACCESS_ALL_MASK; + } + + mvreg_write(pp, MVNETA_CPU_MAP(cpu), rxq_map | txq_map); + } /* Reset RX and TX DMAs */ mvreg_write(pp, MVNETA_PORT_RX_RESET, MVNETA_PORT_RX_DMA_RESET); @@ -887,11 +1536,19 @@ static void mvneta_defaults_set(struct mvneta_port *pp) mvreg_write(pp, MVNETA_PORT_RX_RESET, 0); /* Set Port Acceleration Mode */ - val = MVNETA_ACC_MODE_EXT; + if (pp->bm_priv) + /* HW buffer management + legacy parser */ + val = MVNETA_ACC_MODE_EXT2; + else + /* SW buffer management + legacy parser */ + val = MVNETA_ACC_MODE_EXT1; mvreg_write(pp, MVNETA_ACC_MODE, val); + if (pp->bm_priv) + mvreg_write(pp, MVNETA_BM_ADDRESS, pp->bm_priv->bppi_phys_addr); + /* Update val of portCfg register accordingly with all RxQueue types */ - val = MVNETA_PORT_CONFIG_DEFL_VALUE(rxq_def); + val = MVNETA_PORT_CONFIG_DEFL_VALUE(pp->rxq_def); mvreg_write(pp, MVNETA_PORT_CONFIG, val); val = 0; @@ -904,13 +1561,22 @@ static void mvneta_defaults_set(struct mvneta_port *pp) /* Default burst size */ val |= MVNETA_TX_BRST_SZ_MASK(MVNETA_SDMA_BRST_SIZE_16); val |= MVNETA_RX_BRST_SZ_MASK(MVNETA_SDMA_BRST_SIZE_16); + val |= MVNETA_RX_NO_DATA_SWAP | MVNETA_TX_NO_DATA_SWAP; - val |= (MVNETA_RX_NO_DATA_SWAP | MVNETA_TX_NO_DATA_SWAP | - MVNETA_NO_DESC_SWAP); +#if defined(__BIG_ENDIAN) + val |= MVNETA_DESC_SWAP; +#endif /* Assign port SDMA configuration */ mvreg_write(pp, MVNETA_SDMA_CONFIG, val); + /* Disable PHY polling in hardware, since we're using the + * kernel phylib to do this. + */ + val = mvreg_read(pp, MVNETA_UNIT_CONTROL); + val &= ~MVNETA_PHY_POLLING_ENABLE; + mvreg_write(pp, MVNETA_UNIT_CONTROL, val); + mvneta_set_ucast_table(pp, -1); mvneta_set_special_mcast_table(pp, -1); mvneta_set_other_mcast_table(pp, -1); @@ -919,6 +1585,8 @@ static void mvneta_defaults_set(struct mvneta_port *pp) mvreg_write(pp, MVNETA_INTR_ENABLE, (MVNETA_RXQ_INTR_ENABLE_ALL_MASK | MVNETA_TXQ_INTR_ENABLE_ALL_MASK)); + + mvneta_mib_counters_clear(pp); } /* Set max sizes for tx queues */ @@ -992,8 +1660,8 @@ static void mvneta_set_ucast_addr(struct mvneta_port *pp, u8 last_nibble, } /* Set mac address */ -static void mvneta_mac_addr_set(struct mvneta_port *pp, unsigned char *addr, - int queue) +static void mvneta_mac_addr_set(struct mvneta_port *pp, + const unsigned char *addr, int queue) { unsigned int mac_h; unsigned int mac_l; @@ -1019,7 +1687,6 @@ static void mvneta_rx_pkts_coal_set(struct mvneta_port *pp, { mvreg_write(pp, MVNETA_RXQ_THRESHOLD_REG(rxq->id), value | MVNETA_RXQ_NON_OCCUPIED(0)); - rxq->pkts_coal = value; } /* Set the time delay in usec before RX interrupt will be generated by @@ -1035,7 +1702,6 @@ static void mvneta_rx_time_coal_set(struct mvneta_port *pp, val = (clk_rate / 1000000) * value; mvreg_write(pp, MVNETA_RXQ_TIME_COAL_REG(rxq->id), val); - rxq->time_coal = value; } /* Set threshold for TX_DONE pkts coalescing */ @@ -1050,27 +1716,18 @@ static void mvneta_tx_done_pkts_coal_set(struct mvneta_port *pp, val |= MVNETA_TXQ_SENT_THRESH_MASK(value); mvreg_write(pp, MVNETA_TXQ_SIZE_REG(txq->id), val); - - txq->done_pkts_coal = value; } -/* Trigger tx done timer in MVNETA_TX_DONE_TIMER_PERIOD msecs */ -static void mvneta_add_tx_done_timer(struct mvneta_port *pp) -{ - if (test_and_set_bit(MVNETA_F_TX_DONE_TIMER_BIT, &pp->flags) == 0) { - pp->tx_done_timer.expires = jiffies + - msecs_to_jiffies(MVNETA_TX_DONE_TIMER_PERIOD); - add_timer(&pp->tx_done_timer); - } -} - - /* Handle rx descriptor fill by setting buf_cookie and buf_phys_addr */ static void mvneta_rx_desc_fill(struct mvneta_rx_desc *rx_desc, - u32 phys_addr, u32 cookie) + u32 phys_addr, void *virt_addr, + struct mvneta_rx_queue *rxq) { - rx_desc->buf_cookie = cookie; + int i; + rx_desc->buf_phys_addr = phys_addr; + i = rx_desc - rxq->descs; + rxq->buf_virt_addr[i] = virt_addr; } /* Decrement sent descriptors counter */ @@ -1124,7 +1781,7 @@ static int mvneta_txq_sent_desc_proc(struct mvneta_port *pp, } /* Set TXQ descriptors fields relevant for CSUM calculation */ -static u32 mvneta_txq_desc_csum(int l3_offs, int l3_proto, +static u32 mvneta_txq_desc_csum(int l3_offs, __be16 l3_proto, int ip_hdr_len, int l4_proto) { u32 command; @@ -1136,7 +1793,7 @@ static u32 mvneta_txq_desc_csum(int l3_offs, int l3_proto, command = l3_offs << MVNETA_TX_L3_OFF_SHIFT; command |= ip_hdr_len << MVNETA_TX_IP_HLEN_SHIFT; - if (l3_proto == swab16(ETH_P_IP)) + if (l3_proto == htons(ETH_P_IP)) command |= MVNETA_TXD_IP_CSUM; else command |= MVNETA_TX_L3_IP6; @@ -1156,14 +1813,13 @@ static u32 mvneta_txq_desc_csum(int l3_offs, int l3_proto, static void mvneta_rx_error(struct mvneta_port *pp, struct mvneta_rx_desc *rx_desc) { + struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats); u32 status = rx_desc->status; - if (!mvneta_rxq_desc_is_first_last(rx_desc)) { - netdev_err(pp->dev, - "bad rx status %08x (buffer oversize), size=%d\n", - rx_desc->status, rx_desc->data_size); - return; - } + /* update per-cpu counter */ + u64_stats_update_begin(&stats->syncp); + stats->rx_errors++; + u64_stats_update_end(&stats->syncp); switch (status & MVNETA_RXD_ERR_CODE_MASK) { case MVNETA_RXD_ERR_CRC: @@ -1185,113 +1841,131 @@ static void mvneta_rx_error(struct mvneta_port *pp, } } -/* Handle RX checksum offload */ -static void mvneta_rx_csum(struct mvneta_port *pp, - struct mvneta_rx_desc *rx_desc, - struct sk_buff *skb) +/* Handle RX checksum offload based on the descriptor's status */ +static int mvneta_rx_csum(struct mvneta_port *pp, u32 status) { - if ((rx_desc->status & MVNETA_RXD_L3_IP4) && - (rx_desc->status & MVNETA_RXD_L4_CSUM_OK)) { - skb->csum = 0; - skb->ip_summed = CHECKSUM_UNNECESSARY; - return; - } + if ((pp->dev->features & NETIF_F_RXCSUM) && + (status & MVNETA_RXD_L3_IP4) && + (status & MVNETA_RXD_L4_CSUM_OK)) + return CHECKSUM_UNNECESSARY; - skb->ip_summed = CHECKSUM_NONE; + return CHECKSUM_NONE; } -/* Return tx queue pointer (find last set bit) according to causeTxDone reg */ +/* Return tx queue pointer (find last set bit) according to <cause> returned + * form tx_done reg. <cause> must not be null. The return value is always a + * valid queue for matching the first one found in <cause>. + */ static struct mvneta_tx_queue *mvneta_tx_done_policy(struct mvneta_port *pp, u32 cause) { int queue = fls(cause) - 1; - return (queue < 0 || queue >= txq_number) ? NULL : &pp->txqs[queue]; + return &pp->txqs[queue]; } /* Free tx queue skbuffs */ static void mvneta_txq_bufs_free(struct mvneta_port *pp, - struct mvneta_tx_queue *txq, int num) + struct mvneta_tx_queue *txq, int num, + struct netdev_queue *nq, bool napi) { + unsigned int bytes_compl = 0, pkts_compl = 0; + struct xdp_frame_bulk bq; int i; + xdp_frame_bulk_init(&bq); + + rcu_read_lock(); /* need for xdp_return_frame_bulk */ + for (i = 0; i < num; i++) { + struct mvneta_tx_buf *buf = &txq->buf[txq->txq_get_index]; struct mvneta_tx_desc *tx_desc = txq->descs + txq->txq_get_index; - struct sk_buff *skb = txq->tx_skb[txq->txq_get_index]; mvneta_txq_inc_get(txq); - if (!skb) - continue; - - dma_unmap_single(pp->dev->dev.parent, tx_desc->buf_phys_addr, - tx_desc->data_size, DMA_TO_DEVICE); - dev_kfree_skb_any(skb); + if (buf->type == MVNETA_TYPE_XDP_NDO || + buf->type == MVNETA_TYPE_SKB) + dma_unmap_single(pp->dev->dev.parent, + tx_desc->buf_phys_addr, + tx_desc->data_size, DMA_TO_DEVICE); + if ((buf->type == MVNETA_TYPE_TSO || + buf->type == MVNETA_TYPE_SKB) && buf->skb) { + bytes_compl += buf->skb->len; + pkts_compl++; + dev_kfree_skb_any(buf->skb); + } else if ((buf->type == MVNETA_TYPE_XDP_TX || + buf->type == MVNETA_TYPE_XDP_NDO) && buf->xdpf) { + if (napi && buf->type == MVNETA_TYPE_XDP_TX) + xdp_return_frame_rx_napi(buf->xdpf); + else + xdp_return_frame_bulk(buf->xdpf, &bq); + } } + xdp_flush_frame_bulk(&bq); + + rcu_read_unlock(); + + netdev_tx_completed_queue(nq, pkts_compl, bytes_compl); } /* Handle end of transmission */ -static int mvneta_txq_done(struct mvneta_port *pp, +static void mvneta_txq_done(struct mvneta_port *pp, struct mvneta_tx_queue *txq) { struct netdev_queue *nq = netdev_get_tx_queue(pp->dev, txq->id); int tx_done; tx_done = mvneta_txq_sent_desc_proc(pp, txq); - if (tx_done == 0) - return tx_done; - mvneta_txq_bufs_free(pp, txq, tx_done); + if (!tx_done) + return; + + mvneta_txq_bufs_free(pp, txq, tx_done, nq, true); txq->count -= tx_done; if (netif_tx_queue_stopped(nq)) { - if (txq->size - txq->count >= MAX_SKB_FRAGS + 1) + if (txq->count <= txq->tx_wake_threshold) netif_tx_wake_queue(nq); } - - return tx_done; } -/* Refill processing */ +/* Refill processing for SW buffer management */ +/* Allocate page per descriptor */ static int mvneta_rx_refill(struct mvneta_port *pp, - struct mvneta_rx_desc *rx_desc) - + struct mvneta_rx_desc *rx_desc, + struct mvneta_rx_queue *rxq, + gfp_t gfp_mask) { dma_addr_t phys_addr; - struct sk_buff *skb; + struct page *page; - skb = netdev_alloc_skb(pp->dev, pp->pkt_size); - if (!skb) - return -ENOMEM; - - phys_addr = dma_map_single(pp->dev->dev.parent, skb->head, - MVNETA_RX_BUF_SIZE(pp->pkt_size), - DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(pp->dev->dev.parent, phys_addr))) { - dev_kfree_skb(skb); + page = page_pool_alloc_pages(rxq->page_pool, + gfp_mask | __GFP_NOWARN); + if (!page) return -ENOMEM; - } - mvneta_rx_desc_fill(rx_desc, phys_addr, (u32)skb); + phys_addr = page_pool_get_dma_addr(page) + pp->rx_offset_correction; + mvneta_rx_desc_fill(rx_desc, phys_addr, page, rxq); return 0; } /* Handle tx checksum */ -static u32 mvneta_skb_tx_csum(struct mvneta_port *pp, struct sk_buff *skb) +static u32 mvneta_skb_tx_csum(struct sk_buff *skb) { if (skb->ip_summed == CHECKSUM_PARTIAL) { int ip_hdr_len = 0; + __be16 l3_proto = vlan_get_protocol(skb); u8 l4_proto; - if (skb->protocol == htons(ETH_P_IP)) { + if (l3_proto == htons(ETH_P_IP)) { struct iphdr *ip4h = ip_hdr(skb); /* Calculate IPv4 checksum and L4 checksum */ ip_hdr_len = ip4h->ihl; l4_proto = ip4h->protocol; - } else if (skb->protocol == htons(ETH_P_IPV6)) { + } else if (l3_proto == htons(ETH_P_IPV6)) { struct ipv6hdr *ip6h = ipv6_hdr(skb); /* Read l4_protocol from one of IPv6 extra headers */ @@ -1302,23 +1976,12 @@ static u32 mvneta_skb_tx_csum(struct mvneta_port *pp, struct sk_buff *skb) return MVNETA_TX_L4_CSUM_NOT; return mvneta_txq_desc_csum(skb_network_offset(skb), - skb->protocol, ip_hdr_len, l4_proto); + l3_proto, ip_hdr_len, l4_proto); } return MVNETA_TX_L4_CSUM_NOT; } -/* Returns rx queue pointer (find last set bit) according to causeRxTx - * value - */ -static struct mvneta_rx_queue *mvneta_rx_policy(struct mvneta_port *pp, - u32 cause) -{ - int queue = fls(cause >> 8) - 1; - - return (queue < 0 || queue >= rxq_number) ? NULL : &pp->rxqs[queue]; -} - /* Drop packets received by the RXQ and free buffers */ static void mvneta_rxq_drop_pkts(struct mvneta_port *pp, struct mvneta_rx_queue *rxq) @@ -1326,25 +1989,562 @@ static void mvneta_rxq_drop_pkts(struct mvneta_port *pp, int rx_done, i; rx_done = mvneta_rxq_busy_desc_num_get(pp, rxq); + if (rx_done) + mvneta_rxq_desc_num_update(pp, rxq, rx_done, rx_done); + + if (pp->bm_priv) { + for (i = 0; i < rx_done; i++) { + struct mvneta_rx_desc *rx_desc = + mvneta_rxq_next_desc_get(rxq); + u8 pool_id = MVNETA_RX_GET_BM_POOL_ID(rx_desc); + struct mvneta_bm_pool *bm_pool; + + bm_pool = &pp->bm_priv->bm_pools[pool_id]; + /* Return dropped buffer to the pool */ + mvneta_bm_pool_put_bp(pp->bm_priv, bm_pool, + rx_desc->buf_phys_addr); + } + return; + } + for (i = 0; i < rxq->size; i++) { struct mvneta_rx_desc *rx_desc = rxq->descs + i; - struct sk_buff *skb = (struct sk_buff *)rx_desc->buf_cookie; + void *data = rxq->buf_virt_addr[i]; + if (!data || !(rx_desc->buf_phys_addr)) + continue; - dev_kfree_skb_any(skb); - dma_unmap_single(pp->dev->dev.parent, rx_desc->buf_phys_addr, - rx_desc->data_size, DMA_FROM_DEVICE); + page_pool_put_full_page(rxq->page_pool, data, false); } + if (xdp_rxq_info_is_reg(&rxq->xdp_rxq)) + xdp_rxq_info_unreg(&rxq->xdp_rxq); + page_pool_destroy(rxq->page_pool); + rxq->page_pool = NULL; +} - if (rx_done) - mvneta_rxq_desc_num_update(pp, rxq, rx_done, rx_done); +static void +mvneta_update_stats(struct mvneta_port *pp, + struct mvneta_stats *ps) +{ + struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats); + + u64_stats_update_begin(&stats->syncp); + stats->es.ps.rx_packets += ps->rx_packets; + stats->es.ps.rx_bytes += ps->rx_bytes; + /* xdp */ + stats->es.ps.xdp_redirect += ps->xdp_redirect; + stats->es.ps.xdp_pass += ps->xdp_pass; + stats->es.ps.xdp_drop += ps->xdp_drop; + u64_stats_update_end(&stats->syncp); } -/* Main rx processing */ -static int mvneta_rx(struct mvneta_port *pp, int rx_todo, - struct mvneta_rx_queue *rxq) +static inline +int mvneta_rx_refill_queue(struct mvneta_port *pp, struct mvneta_rx_queue *rxq) { + struct mvneta_rx_desc *rx_desc; + int curr_desc = rxq->first_to_refill; + int i; + + for (i = 0; (i < rxq->refill_num) && (i < 64); i++) { + rx_desc = rxq->descs + curr_desc; + if (!(rx_desc->buf_phys_addr)) { + if (mvneta_rx_refill(pp, rx_desc, rxq, GFP_ATOMIC)) { + struct mvneta_pcpu_stats *stats; + + pr_err("Can't refill queue %d. Done %d from %d\n", + rxq->id, i, rxq->refill_num); + + stats = this_cpu_ptr(pp->stats); + u64_stats_update_begin(&stats->syncp); + stats->es.refill_error++; + u64_stats_update_end(&stats->syncp); + break; + } + } + curr_desc = MVNETA_QUEUE_NEXT_DESC(rxq, curr_desc); + } + rxq->refill_num -= i; + rxq->first_to_refill = curr_desc; + + return i; +} + +static void +mvneta_xdp_put_buff(struct mvneta_port *pp, struct mvneta_rx_queue *rxq, + struct xdp_buff *xdp, int sync_len) +{ + struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); + int i; + + if (likely(!xdp_buff_has_frags(xdp))) + goto out; + + for (i = 0; i < sinfo->nr_frags; i++) + page_pool_put_full_page(rxq->page_pool, + skb_frag_page(&sinfo->frags[i]), true); + +out: + page_pool_put_page(rxq->page_pool, virt_to_head_page(xdp->data), + sync_len, true); +} + +static int +mvneta_xdp_submit_frame(struct mvneta_port *pp, struct mvneta_tx_queue *txq, + struct xdp_frame *xdpf, int *nxmit_byte, bool dma_map) +{ + struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf); + struct device *dev = pp->dev->dev.parent; + struct mvneta_tx_desc *tx_desc; + int i, num_frames = 1; + struct page *page; + + if (unlikely(xdp_frame_has_frags(xdpf))) + num_frames += sinfo->nr_frags; + + if (txq->count + num_frames >= txq->size) + return MVNETA_XDP_DROPPED; + + for (i = 0; i < num_frames; i++) { + struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index]; + skb_frag_t *frag = NULL; + int len = xdpf->len; + dma_addr_t dma_addr; + + if (unlikely(i)) { /* paged area */ + frag = &sinfo->frags[i - 1]; + len = skb_frag_size(frag); + } + + tx_desc = mvneta_txq_next_desc_get(txq); + if (dma_map) { + /* ndo_xdp_xmit */ + void *data; + + data = unlikely(frag) ? skb_frag_address(frag) + : xdpf->data; + dma_addr = dma_map_single(dev, data, len, + DMA_TO_DEVICE); + if (dma_mapping_error(dev, dma_addr)) { + mvneta_txq_desc_put(txq); + goto unmap; + } + + buf->type = MVNETA_TYPE_XDP_NDO; + } else { + page = unlikely(frag) ? skb_frag_page(frag) + : virt_to_page(xdpf->data); + dma_addr = page_pool_get_dma_addr(page); + if (unlikely(frag)) + dma_addr += skb_frag_off(frag); + else + dma_addr += sizeof(*xdpf) + xdpf->headroom; + dma_sync_single_for_device(dev, dma_addr, len, + DMA_BIDIRECTIONAL); + buf->type = MVNETA_TYPE_XDP_TX; + } + buf->xdpf = unlikely(i) ? NULL : xdpf; + + tx_desc->command = unlikely(i) ? 0 : MVNETA_TXD_F_DESC; + tx_desc->buf_phys_addr = dma_addr; + tx_desc->data_size = len; + *nxmit_byte += len; + + mvneta_txq_inc_put(txq); + } + /*last descriptor */ + tx_desc->command |= MVNETA_TXD_L_DESC | MVNETA_TXD_Z_PAD; + + txq->pending += num_frames; + txq->count += num_frames; + + return MVNETA_XDP_TX; + +unmap: + for (i--; i >= 0; i--) { + mvneta_txq_desc_put(txq); + tx_desc = txq->descs + txq->next_desc_to_proc; + dma_unmap_single(dev, tx_desc->buf_phys_addr, + tx_desc->data_size, + DMA_TO_DEVICE); + } + + return MVNETA_XDP_DROPPED; +} + +static int +mvneta_xdp_xmit_back(struct mvneta_port *pp, struct xdp_buff *xdp) +{ + struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats); + struct mvneta_tx_queue *txq; + struct netdev_queue *nq; + int cpu, nxmit_byte = 0; + struct xdp_frame *xdpf; + u32 ret; + + xdpf = xdp_convert_buff_to_frame(xdp); + if (unlikely(!xdpf)) + return MVNETA_XDP_DROPPED; + + cpu = smp_processor_id(); + txq = &pp->txqs[cpu % txq_number]; + nq = netdev_get_tx_queue(pp->dev, txq->id); + + __netif_tx_lock(nq, cpu); + ret = mvneta_xdp_submit_frame(pp, txq, xdpf, &nxmit_byte, false); + if (ret == MVNETA_XDP_TX) { + u64_stats_update_begin(&stats->syncp); + stats->es.ps.tx_bytes += nxmit_byte; + stats->es.ps.tx_packets++; + stats->es.ps.xdp_tx++; + u64_stats_update_end(&stats->syncp); + + mvneta_txq_pend_desc_add(pp, txq, 0); + } else { + u64_stats_update_begin(&stats->syncp); + stats->es.ps.xdp_tx_err++; + u64_stats_update_end(&stats->syncp); + } + __netif_tx_unlock(nq); + + return ret; +} + +static int +mvneta_xdp_xmit(struct net_device *dev, int num_frame, + struct xdp_frame **frames, u32 flags) +{ + struct mvneta_port *pp = netdev_priv(dev); + struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats); + int i, nxmit_byte = 0, nxmit = 0; + int cpu = smp_processor_id(); + struct mvneta_tx_queue *txq; + struct netdev_queue *nq; + u32 ret; + + if (unlikely(test_bit(__MVNETA_DOWN, &pp->state))) + return -ENETDOWN; + + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) + return -EINVAL; + + txq = &pp->txqs[cpu % txq_number]; + nq = netdev_get_tx_queue(pp->dev, txq->id); + + __netif_tx_lock(nq, cpu); + for (i = 0; i < num_frame; i++) { + ret = mvneta_xdp_submit_frame(pp, txq, frames[i], &nxmit_byte, + true); + if (ret != MVNETA_XDP_TX) + break; + + nxmit++; + } + + if (unlikely(flags & XDP_XMIT_FLUSH)) + mvneta_txq_pend_desc_add(pp, txq, 0); + __netif_tx_unlock(nq); + + u64_stats_update_begin(&stats->syncp); + stats->es.ps.tx_bytes += nxmit_byte; + stats->es.ps.tx_packets += nxmit; + stats->es.ps.xdp_xmit += nxmit; + stats->es.ps.xdp_xmit_err += num_frame - nxmit; + u64_stats_update_end(&stats->syncp); + + return nxmit; +} + +static int +mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq, + struct bpf_prog *prog, struct xdp_buff *xdp, + u32 frame_sz, struct mvneta_stats *stats) +{ + unsigned int len, data_len, sync; + u32 ret, act; + + len = xdp->data_end - xdp->data_hard_start - pp->rx_offset_correction; + data_len = xdp->data_end - xdp->data; + act = bpf_prog_run_xdp(prog, xdp); + + /* Due xdp_adjust_tail: DMA sync for_device cover max len CPU touch */ + sync = xdp->data_end - xdp->data_hard_start - pp->rx_offset_correction; + sync = max(sync, len); + + switch (act) { + case XDP_PASS: + stats->xdp_pass++; + return MVNETA_XDP_PASS; + case XDP_REDIRECT: { + int err; + + err = xdp_do_redirect(pp->dev, xdp, prog); + if (unlikely(err)) { + mvneta_xdp_put_buff(pp, rxq, xdp, sync); + ret = MVNETA_XDP_DROPPED; + } else { + ret = MVNETA_XDP_REDIR; + stats->xdp_redirect++; + } + break; + } + case XDP_TX: + ret = mvneta_xdp_xmit_back(pp, xdp); + if (ret != MVNETA_XDP_TX) + mvneta_xdp_put_buff(pp, rxq, xdp, sync); + break; + default: + bpf_warn_invalid_xdp_action(pp->dev, prog, act); + fallthrough; + case XDP_ABORTED: + trace_xdp_exception(pp->dev, prog, act); + fallthrough; + case XDP_DROP: + mvneta_xdp_put_buff(pp, rxq, xdp, sync); + ret = MVNETA_XDP_DROPPED; + stats->xdp_drop++; + break; + } + + stats->rx_bytes += frame_sz + xdp->data_end - xdp->data - data_len; + stats->rx_packets++; + + return ret; +} + +static void +mvneta_swbm_rx_frame(struct mvneta_port *pp, + struct mvneta_rx_desc *rx_desc, + struct mvneta_rx_queue *rxq, + struct xdp_buff *xdp, int *size, + struct page *page) +{ + unsigned char *data = page_address(page); + int data_len = -MVNETA_MH_SIZE, len; + struct net_device *dev = pp->dev; + enum dma_data_direction dma_dir; + + if (*size > MVNETA_MAX_RX_BUF_SIZE) { + len = MVNETA_MAX_RX_BUF_SIZE; + data_len += len; + } else { + len = *size; + data_len += len - ETH_FCS_LEN; + } + *size = *size - len; + + dma_dir = page_pool_get_dma_dir(rxq->page_pool); + dma_sync_single_for_cpu(dev->dev.parent, + rx_desc->buf_phys_addr, + len, dma_dir); + + rx_desc->buf_phys_addr = 0; + + /* Prefetch header */ + prefetch(data); + xdp_buff_clear_frags_flag(xdp); + xdp_prepare_buff(xdp, data, pp->rx_offset_correction + MVNETA_MH_SIZE, + data_len, true); +} + +static void +mvneta_swbm_add_rx_fragment(struct mvneta_port *pp, + struct mvneta_rx_desc *rx_desc, + struct mvneta_rx_queue *rxq, + struct xdp_buff *xdp, int *size, + struct page *page) +{ + struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); + struct net_device *dev = pp->dev; + enum dma_data_direction dma_dir; + int data_len, len; + + if (*size > MVNETA_MAX_RX_BUF_SIZE) { + len = MVNETA_MAX_RX_BUF_SIZE; + data_len = len; + } else { + len = *size; + data_len = len - ETH_FCS_LEN; + } + dma_dir = page_pool_get_dma_dir(rxq->page_pool); + dma_sync_single_for_cpu(dev->dev.parent, + rx_desc->buf_phys_addr, + len, dma_dir); + rx_desc->buf_phys_addr = 0; + + if (!xdp_buff_has_frags(xdp)) + sinfo->nr_frags = 0; + + if (data_len > 0 && sinfo->nr_frags < MAX_SKB_FRAGS) { + skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags++]; + + skb_frag_fill_page_desc(frag, page, + pp->rx_offset_correction, data_len); + + if (!xdp_buff_has_frags(xdp)) { + sinfo->xdp_frags_size = *size; + xdp_buff_set_frags_flag(xdp); + } + if (page_is_pfmemalloc(page)) + xdp_buff_set_frag_pfmemalloc(xdp); + } else { + page_pool_put_full_page(rxq->page_pool, page, true); + } + *size -= len; +} + +static struct sk_buff * +mvneta_swbm_build_skb(struct mvneta_port *pp, struct page_pool *pool, + struct xdp_buff *xdp, u32 desc_status) +{ + struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); + u32 metasize = xdp->data - xdp->data_meta; + struct sk_buff *skb; + u8 num_frags; + + if (unlikely(xdp_buff_has_frags(xdp))) + num_frags = sinfo->nr_frags; + + skb = build_skb(xdp->data_hard_start, PAGE_SIZE); + if (!skb) + return ERR_PTR(-ENOMEM); + + skb_mark_for_recycle(skb); + + skb_reserve(skb, xdp->data - xdp->data_hard_start); + skb_put(skb, xdp->data_end - xdp->data); + if (metasize) + skb_metadata_set(skb, metasize); + skb->ip_summed = mvneta_rx_csum(pp, desc_status); + + if (unlikely(xdp_buff_has_frags(xdp))) + xdp_update_skb_frags_info(skb, num_frags, sinfo->xdp_frags_size, + num_frags * xdp->frame_sz, + xdp_buff_get_skb_flags(xdp)); + + return skb; +} + +/* Main rx processing when using software buffer management */ +static int mvneta_rx_swbm(struct napi_struct *napi, + struct mvneta_port *pp, int budget, + struct mvneta_rx_queue *rxq) +{ + int rx_proc = 0, rx_todo, refill, size = 0; struct net_device *dev = pp->dev; - int rx_done, rx_filled; + struct mvneta_stats ps = {}; + struct bpf_prog *xdp_prog; + u32 desc_status, frame_sz; + struct xdp_buff xdp_buf; + + xdp_init_buff(&xdp_buf, PAGE_SIZE, &rxq->xdp_rxq); + xdp_buf.data_hard_start = NULL; + + /* Get number of received packets */ + rx_todo = mvneta_rxq_busy_desc_num_get(pp, rxq); + + xdp_prog = READ_ONCE(pp->xdp_prog); + + /* Fairness NAPI loop */ + while (rx_proc < budget && rx_proc < rx_todo) { + struct mvneta_rx_desc *rx_desc = mvneta_rxq_next_desc_get(rxq); + u32 rx_status, index; + struct sk_buff *skb; + struct page *page; + + index = rx_desc - rxq->descs; + page = (struct page *)rxq->buf_virt_addr[index]; + + rx_status = rx_desc->status; + rx_proc++; + rxq->refill_num++; + + if (rx_status & MVNETA_RXD_FIRST_DESC) { + /* Check errors only for FIRST descriptor */ + if (rx_status & MVNETA_RXD_ERR_SUMMARY) { + mvneta_rx_error(pp, rx_desc); + goto next; + } + + size = rx_desc->data_size; + frame_sz = size - ETH_FCS_LEN; + desc_status = rx_status; + + mvneta_swbm_rx_frame(pp, rx_desc, rxq, &xdp_buf, + &size, page); + } else { + if (unlikely(!xdp_buf.data_hard_start)) { + rx_desc->buf_phys_addr = 0; + page_pool_put_full_page(rxq->page_pool, page, + true); + goto next; + } + + mvneta_swbm_add_rx_fragment(pp, rx_desc, rxq, &xdp_buf, + &size, page); + } /* Middle or Last descriptor */ + + if (!(rx_status & MVNETA_RXD_LAST_DESC)) + /* no last descriptor this time */ + continue; + + if (size) { + mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1); + goto next; + } + + if (xdp_prog && + mvneta_run_xdp(pp, rxq, xdp_prog, &xdp_buf, frame_sz, &ps)) + goto next; + + skb = mvneta_swbm_build_skb(pp, rxq->page_pool, &xdp_buf, desc_status); + if (IS_ERR(skb)) { + struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats); + + mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1); + + u64_stats_update_begin(&stats->syncp); + stats->es.skb_alloc_error++; + stats->rx_dropped++; + u64_stats_update_end(&stats->syncp); + + goto next; + } + + ps.rx_bytes += skb->len; + ps.rx_packets++; + + skb->protocol = eth_type_trans(skb, dev); + napi_gro_receive(napi, skb); +next: + xdp_buf.data_hard_start = NULL; + } + + if (xdp_buf.data_hard_start) + mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1); + + if (ps.xdp_redirect) + xdp_do_flush(); + + if (ps.rx_packets) + mvneta_update_stats(pp, &ps); + + /* return some buffers to hardware queue, one at a time is too slow */ + refill = mvneta_rx_refill_queue(pp, rxq); + + /* Update rxq management counters */ + mvneta_rxq_desc_num_update(pp, rxq, rx_proc, refill); + + return ps.rx_packets; +} + +/* Main rx processing when using hardware buffer management */ +static int mvneta_rx_hwbm(struct napi_struct *napi, + struct mvneta_port *pp, int rx_todo, + struct mvneta_rx_queue *rxq) +{ + struct net_device *dev = pp->dev; + int rx_done; + u32 rcvd_pkts = 0; + u32 rcvd_bytes = 0; /* Get number of received packets */ rx_done = mvneta_rxq_busy_desc_num_get(pp, rxq); @@ -1353,78 +2553,328 @@ static int mvneta_rx(struct mvneta_port *pp, int rx_todo, rx_todo = rx_done; rx_done = 0; - rx_filled = 0; /* Fairness NAPI loop */ while (rx_done < rx_todo) { struct mvneta_rx_desc *rx_desc = mvneta_rxq_next_desc_get(rxq); + struct mvneta_bm_pool *bm_pool = NULL; struct sk_buff *skb; - u32 rx_status; + unsigned char *data; + dma_addr_t phys_addr; + u32 rx_status, frag_size; int rx_bytes, err; + u8 pool_id; - prefetch(rx_desc); rx_done++; - rx_filled++; rx_status = rx_desc->status; - skb = (struct sk_buff *)rx_desc->buf_cookie; + rx_bytes = rx_desc->data_size - (ETH_FCS_LEN + MVNETA_MH_SIZE); + data = (u8 *)(uintptr_t)rx_desc->buf_cookie; + phys_addr = rx_desc->buf_phys_addr; + pool_id = MVNETA_RX_GET_BM_POOL_ID(rx_desc); + bm_pool = &pp->bm_priv->bm_pools[pool_id]; - if (!mvneta_rxq_desc_is_first_last(rx_desc) || + if (!mvneta_rxq_desc_is_first_last(rx_status) || (rx_status & MVNETA_RXD_ERR_SUMMARY)) { - dev->stats.rx_errors++; +err_drop_frame_ret_pool: + /* Return the buffer to the pool */ + mvneta_bm_pool_put_bp(pp->bm_priv, bm_pool, + rx_desc->buf_phys_addr); +err_drop_frame: mvneta_rx_error(pp, rx_desc); - mvneta_rx_desc_fill(rx_desc, rx_desc->buf_phys_addr, - (u32)skb); + /* leave the descriptor untouched */ continue; } - dma_unmap_single(pp->dev->dev.parent, rx_desc->buf_phys_addr, - rx_desc->data_size, DMA_FROM_DEVICE); + if (rx_bytes <= rx_copybreak) { + /* better copy a small frame and not unmap the DMA region */ + skb = netdev_alloc_skb_ip_align(dev, rx_bytes); + if (unlikely(!skb)) + goto err_drop_frame_ret_pool; - rx_bytes = rx_desc->data_size - - (ETH_FCS_LEN + MVNETA_MH_SIZE); - u64_stats_update_begin(&pp->rx_stats.syncp); - pp->rx_stats.packets++; - pp->rx_stats.bytes += rx_bytes; - u64_stats_update_end(&pp->rx_stats.syncp); + dma_sync_single_range_for_cpu(&pp->bm_priv->pdev->dev, + rx_desc->buf_phys_addr, + MVNETA_MH_SIZE + NET_SKB_PAD, + rx_bytes, + DMA_FROM_DEVICE); + skb_put_data(skb, data + MVNETA_MH_SIZE + NET_SKB_PAD, + rx_bytes); - /* Linux processing */ - skb_reserve(skb, MVNETA_MH_SIZE); - skb_put(skb, rx_bytes); + skb->protocol = eth_type_trans(skb, dev); + skb->ip_summed = mvneta_rx_csum(pp, rx_status); + napi_gro_receive(napi, skb); - skb->protocol = eth_type_trans(skb, dev); + rcvd_pkts++; + rcvd_bytes += rx_bytes; - mvneta_rx_csum(pp, rx_desc, skb); + /* Return the buffer to the pool */ + mvneta_bm_pool_put_bp(pp->bm_priv, bm_pool, + rx_desc->buf_phys_addr); - napi_gro_receive(&pp->napi, skb); + /* leave the descriptor and buffer untouched */ + continue; + } /* Refill processing */ - err = mvneta_rx_refill(pp, rx_desc); + err = hwbm_pool_refill(&bm_pool->hwbm_pool, GFP_ATOMIC); if (err) { - netdev_err(pp->dev, "Linux processing - Can't refill\n"); - rxq->missed++; - rx_filled--; + struct mvneta_pcpu_stats *stats; + + netdev_err(dev, "Linux processing - Can't refill\n"); + + stats = this_cpu_ptr(pp->stats); + u64_stats_update_begin(&stats->syncp); + stats->es.refill_error++; + u64_stats_update_end(&stats->syncp); + + goto err_drop_frame_ret_pool; } + + frag_size = bm_pool->hwbm_pool.frag_size; + + skb = build_skb(data, frag_size > PAGE_SIZE ? 0 : frag_size); + + /* After refill old buffer has to be unmapped regardless + * the skb is successfully built or not. + */ + dma_unmap_single(&pp->bm_priv->pdev->dev, phys_addr, + bm_pool->buf_size, DMA_FROM_DEVICE); + if (!skb) + goto err_drop_frame; + + rcvd_pkts++; + rcvd_bytes += rx_bytes; + + /* Linux processing */ + skb_reserve(skb, MVNETA_MH_SIZE + NET_SKB_PAD); + skb_put(skb, rx_bytes); + + skb->protocol = eth_type_trans(skb, dev); + skb->ip_summed = mvneta_rx_csum(pp, rx_status); + + napi_gro_receive(napi, skb); + } + + if (rcvd_pkts) { + struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats); + + u64_stats_update_begin(&stats->syncp); + stats->es.ps.rx_packets += rcvd_pkts; + stats->es.ps.rx_bytes += rcvd_bytes; + u64_stats_update_end(&stats->syncp); } /* Update rxq management counters */ - mvneta_rxq_desc_num_update(pp, rxq, rx_done, rx_filled); + mvneta_rxq_desc_num_update(pp, rxq, rx_done, rx_done); return rx_done; } +static void mvneta_free_tso_hdrs(struct mvneta_port *pp, + struct mvneta_tx_queue *txq) +{ + struct device *dev = pp->dev->dev.parent; + int i; + + for (i = 0; i < MVNETA_MAX_TSO_PAGES; i++) { + if (txq->tso_hdrs[i]) { + dma_free_coherent(dev, MVNETA_TSO_PAGE_SIZE, + txq->tso_hdrs[i], + txq->tso_hdrs_phys[i]); + txq->tso_hdrs[i] = NULL; + } + } +} + +static int mvneta_alloc_tso_hdrs(struct mvneta_port *pp, + struct mvneta_tx_queue *txq) +{ + struct device *dev = pp->dev->dev.parent; + int i, num; + + num = DIV_ROUND_UP(txq->size, MVNETA_TSO_PER_PAGE); + for (i = 0; i < num; i++) { + txq->tso_hdrs[i] = dma_alloc_coherent(dev, MVNETA_TSO_PAGE_SIZE, + &txq->tso_hdrs_phys[i], + GFP_KERNEL); + if (!txq->tso_hdrs[i]) { + mvneta_free_tso_hdrs(pp, txq); + return -ENOMEM; + } + } + + return 0; +} + +static char *mvneta_get_tso_hdr(struct mvneta_tx_queue *txq, dma_addr_t *dma) +{ + int index, offset; + + index = txq->txq_put_index / MVNETA_TSO_PER_PAGE; + offset = (txq->txq_put_index % MVNETA_TSO_PER_PAGE) * TSO_HEADER_SIZE; + + *dma = txq->tso_hdrs_phys[index] + offset; + + return txq->tso_hdrs[index] + offset; +} + +static void mvneta_tso_put_hdr(struct sk_buff *skb, struct mvneta_tx_queue *txq, + struct tso_t *tso, int size, bool is_last) +{ + struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index]; + int hdr_len = skb_tcp_all_headers(skb); + struct mvneta_tx_desc *tx_desc; + dma_addr_t hdr_phys; + char *hdr; + + hdr = mvneta_get_tso_hdr(txq, &hdr_phys); + tso_build_hdr(skb, hdr, tso, size, is_last); + + tx_desc = mvneta_txq_next_desc_get(txq); + tx_desc->data_size = hdr_len; + tx_desc->command = mvneta_skb_tx_csum(skb); + tx_desc->command |= MVNETA_TXD_F_DESC; + tx_desc->buf_phys_addr = hdr_phys; + buf->type = MVNETA_TYPE_TSO; + buf->skb = NULL; + + mvneta_txq_inc_put(txq); +} + +static inline int +mvneta_tso_put_data(struct net_device *dev, struct mvneta_tx_queue *txq, + struct sk_buff *skb, char *data, int size, + bool last_tcp, bool is_last) +{ + struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index]; + struct mvneta_tx_desc *tx_desc; + + tx_desc = mvneta_txq_next_desc_get(txq); + tx_desc->data_size = size; + tx_desc->buf_phys_addr = dma_map_single(dev->dev.parent, data, + size, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev->dev.parent, + tx_desc->buf_phys_addr))) { + mvneta_txq_desc_put(txq); + return -ENOMEM; + } + + tx_desc->command = 0; + buf->type = MVNETA_TYPE_SKB; + buf->skb = NULL; + + if (last_tcp) { + /* last descriptor in the TCP packet */ + tx_desc->command = MVNETA_TXD_L_DESC; + + /* last descriptor in SKB */ + if (is_last) + buf->skb = skb; + } + mvneta_txq_inc_put(txq); + return 0; +} + +static void mvneta_release_descs(struct mvneta_port *pp, + struct mvneta_tx_queue *txq, + int first, int num) +{ + int desc_idx, i; + + desc_idx = first + num; + if (desc_idx >= txq->size) + desc_idx -= txq->size; + + for (i = num; i >= 0; i--) { + struct mvneta_tx_desc *tx_desc = txq->descs + desc_idx; + struct mvneta_tx_buf *buf = &txq->buf[desc_idx]; + + if (buf->type == MVNETA_TYPE_SKB) + dma_unmap_single(pp->dev->dev.parent, + tx_desc->buf_phys_addr, + tx_desc->data_size, + DMA_TO_DEVICE); + + mvneta_txq_desc_put(txq); + + if (desc_idx == 0) + desc_idx = txq->size; + desc_idx -= 1; + } +} + +static int mvneta_tx_tso(struct sk_buff *skb, struct net_device *dev, + struct mvneta_tx_queue *txq) +{ + int hdr_len, total_len, data_left; + int first_desc, desc_count = 0; + struct mvneta_port *pp = netdev_priv(dev); + struct tso_t tso; + + /* Count needed descriptors */ + if ((txq->count + tso_count_descs(skb)) >= txq->size) + return 0; + + if (skb_headlen(skb) < skb_tcp_all_headers(skb)) { + pr_info("*** Is this even possible?\n"); + return 0; + } + + first_desc = txq->txq_put_index; + + /* Initialize the TSO handler, and prepare the first payload */ + hdr_len = tso_start(skb, &tso); + + total_len = skb->len - hdr_len; + while (total_len > 0) { + data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len); + total_len -= data_left; + desc_count++; + + /* prepare packet headers: MAC + IP + TCP */ + mvneta_tso_put_hdr(skb, txq, &tso, data_left, total_len == 0); + + while (data_left > 0) { + int size; + desc_count++; + + size = min_t(int, tso.size, data_left); + + if (mvneta_tso_put_data(dev, txq, skb, + tso.data, size, + size == data_left, + total_len == 0)) + goto err_release; + data_left -= size; + + tso_build_data(skb, &tso, size); + } + } + + return desc_count; + +err_release: + /* Release all used data descriptors; header descriptors must not + * be DMA-unmapped. + */ + mvneta_release_descs(pp, txq, first_desc, desc_count - 1); + return 0; +} + /* Handle tx fragmentation processing */ static int mvneta_tx_frag_process(struct mvneta_port *pp, struct sk_buff *skb, struct mvneta_tx_queue *txq) { struct mvneta_tx_desc *tx_desc; - int i; + int i, nr_frags = skb_shinfo(skb)->nr_frags; + int first_desc = txq->txq_put_index; - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + for (i = 0; i < nr_frags; i++) { + struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index]; skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - void *addr = page_address(frag->page.p) + frag->page_offset; + void *addr = skb_frag_address(frag); tx_desc = mvneta_txq_next_desc_get(txq); - tx_desc->data_size = frag->size; + tx_desc->data_size = skb_frag_size(frag); tx_desc->buf_phys_addr = dma_map_single(pp->dev->dev.parent, addr, @@ -1436,20 +2886,17 @@ static int mvneta_tx_frag_process(struct mvneta_port *pp, struct sk_buff *skb, goto error; } - if (i == (skb_shinfo(skb)->nr_frags - 1)) { + if (i == nr_frags - 1) { /* Last descriptor */ tx_desc->command = MVNETA_TXD_L_DESC | MVNETA_TXD_Z_PAD; - - txq->tx_skb[txq->txq_put_index] = skb; - - mvneta_txq_inc_put(txq); + buf->skb = skb; } else { /* Descriptor in the middle: Not First, Not Last */ tx_desc->command = 0; - - txq->tx_skb[txq->txq_put_index] = NULL; - mvneta_txq_inc_put(txq); + buf->skb = NULL; } + buf->type = MVNETA_TYPE_SKB; + mvneta_txq_inc_put(txq); } return 0; @@ -1458,39 +2905,36 @@ error: /* Release all descriptors that were used to map fragments of * this packet, as well as the corresponding DMA mappings */ - for (i = i - 1; i >= 0; i--) { - tx_desc = txq->descs + i; - dma_unmap_single(pp->dev->dev.parent, - tx_desc->buf_phys_addr, - tx_desc->data_size, - DMA_TO_DEVICE); - mvneta_txq_desc_put(txq); - } - + mvneta_release_descs(pp, txq, first_desc, i - 1); return -ENOMEM; } /* Main tx processing */ -static int mvneta_tx(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t mvneta_tx(struct sk_buff *skb, struct net_device *dev) { struct mvneta_port *pp = netdev_priv(dev); u16 txq_id = skb_get_queue_mapping(skb); struct mvneta_tx_queue *txq = &pp->txqs[txq_id]; + struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index]; struct mvneta_tx_desc *tx_desc; - struct netdev_queue *nq; + int len = skb->len; int frags = 0; u32 tx_cmd; if (!netif_running(dev)) goto out; + if (skb_is_gso(skb)) { + frags = mvneta_tx_tso(skb, dev, txq); + goto out; + } + frags = skb_shinfo(skb)->nr_frags + 1; - nq = netdev_get_tx_queue(dev, txq_id); /* Get a descriptor for the first part of the packet */ tx_desc = mvneta_txq_next_desc_get(txq); - tx_cmd = mvneta_skb_tx_csum(pp, skb); + tx_cmd = mvneta_skb_tx_csum(skb); tx_desc->data_size = skb_headlen(skb); @@ -1504,16 +2948,17 @@ static int mvneta_tx(struct sk_buff *skb, struct net_device *dev) goto out; } + buf->type = MVNETA_TYPE_SKB; if (frags == 1) { /* First and Last descriptor */ tx_cmd |= MVNETA_TXD_FLZ_DESC; tx_desc->command = tx_cmd; - txq->tx_skb[txq->txq_put_index] = skb; + buf->skb = skb; mvneta_txq_inc_put(txq); } else { /* First but not Last */ tx_cmd |= MVNETA_TXD_F_DESC; - txq->tx_skb[txq->txq_put_index] = NULL; + buf->skb = NULL; mvneta_txq_inc_put(txq); tx_desc->command = tx_cmd; /* Continue with other skb fragments */ @@ -1528,33 +2973,39 @@ static int mvneta_tx(struct sk_buff *skb, struct net_device *dev) } } - txq->count += frags; - mvneta_txq_pend_desc_add(pp, txq, frags); - - if (txq->size - txq->count < MAX_SKB_FRAGS + 1) - netif_tx_stop_queue(nq); - out: if (frags > 0) { - u64_stats_update_begin(&pp->tx_stats.syncp); - pp->tx_stats.packets++; - pp->tx_stats.bytes += skb->len; - u64_stats_update_end(&pp->tx_stats.syncp); - + struct netdev_queue *nq = netdev_get_tx_queue(dev, txq_id); + struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats); + + netdev_tx_sent_queue(nq, len); + + txq->count += frags; + if (txq->count >= txq->tx_stop_threshold) + netif_tx_stop_queue(nq); + + /* This is not really the true transmit point, since we batch + * up several before hitting the hardware, but is the best we + * can do without more complexity to walk the packets in the + * pending section of the transmit queue. + */ + skb_tx_timestamp(skb); + + if (!netdev_xmit_more() || netif_xmit_stopped(nq) || + txq->pending + frags > MVNETA_TXQ_DEC_SENT_MASK) + mvneta_txq_pend_desc_add(pp, txq, frags); + else + txq->pending += frags; + + u64_stats_update_begin(&stats->syncp); + stats->es.ps.tx_bytes += len; + stats->es.ps.tx_packets++; + u64_stats_update_end(&stats->syncp); } else { dev->stats.tx_dropped++; dev_kfree_skb_any(skb); } - if (txq->count >= MVNETA_TXDONE_COAL_PKTS) - mvneta_txq_done(pp, txq); - - /* If after calling mvneta_txq_done, count equals - * frags, we need to set the timer - */ - if (txq->count == frags && frags > 0) - mvneta_add_tx_done_timer(pp); - return NETDEV_TX_OK; } @@ -1564,9 +3015,10 @@ static void mvneta_txq_done_force(struct mvneta_port *pp, struct mvneta_tx_queue *txq) { + struct netdev_queue *nq = netdev_get_tx_queue(pp->dev, txq->id); int tx_done = txq->count; - mvneta_txq_bufs_free(pp, txq, tx_done); + mvneta_txq_bufs_free(pp, txq, tx_done, nq, false); /* reset txq */ txq->count = 0; @@ -1574,33 +3026,27 @@ static void mvneta_txq_done_force(struct mvneta_port *pp, txq->txq_get_index = 0; } -/* handle tx done - called from tx done timer callback */ -static u32 mvneta_tx_done_gbe(struct mvneta_port *pp, u32 cause_tx_done, - int *tx_todo) +/* Handle tx done - called in softirq context. The <cause_tx_done> argument + * must be a valid cause according to MVNETA_TXQ_INTR_MASK_ALL. + */ +static void mvneta_tx_done_gbe(struct mvneta_port *pp, u32 cause_tx_done) { struct mvneta_tx_queue *txq; - u32 tx_done = 0; struct netdev_queue *nq; + int cpu = smp_processor_id(); - *tx_todo = 0; - while (cause_tx_done != 0) { + while (cause_tx_done) { txq = mvneta_tx_done_policy(pp, cause_tx_done); - if (!txq) - break; nq = netdev_get_tx_queue(pp->dev, txq->id); - __netif_tx_lock(nq, smp_processor_id()); + __netif_tx_lock(nq, cpu); - if (txq->count) { - tx_done += mvneta_txq_done(pp, txq); - *tx_todo += txq->count; - } + if (txq->count) + mvneta_txq_done(pp, txq); __netif_tx_unlock(nq); cause_tx_done &= ~((1 << txq->id)); } - - return tx_done; } /* Compute crc8 of the specified address, using a unique algorithm , @@ -1768,19 +3214,19 @@ static void mvneta_set_rx_mode(struct net_device *dev) if (dev->flags & IFF_PROMISC) { /* Accept all: Multicast + Unicast */ mvneta_rx_unicast_promisc_set(pp, 1); - mvneta_set_ucast_table(pp, rxq_def); - mvneta_set_special_mcast_table(pp, rxq_def); - mvneta_set_other_mcast_table(pp, rxq_def); + mvneta_set_ucast_table(pp, pp->rxq_def); + mvneta_set_special_mcast_table(pp, pp->rxq_def); + mvneta_set_other_mcast_table(pp, pp->rxq_def); } else { /* Accept single Unicast */ mvneta_rx_unicast_promisc_set(pp, 0); mvneta_set_ucast_table(pp, -1); - mvneta_mac_addr_set(pp, dev->dev_addr, rxq_def); + mvneta_mac_addr_set(pp, dev->dev_addr, pp->rxq_def); if (dev->flags & IFF_ALLMULTI) { /* Accept all multicast */ - mvneta_set_special_mcast_table(pp, rxq_def); - mvneta_set_other_mcast_table(pp, rxq_def); + mvneta_set_special_mcast_table(pp, pp->rxq_def); + mvneta_set_other_mcast_table(pp, pp->rxq_def); } else { /* Accept only initialized multicast */ mvneta_set_special_mcast_table(pp, -1); @@ -1789,7 +3235,7 @@ static void mvneta_set_rx_mode(struct net_device *dev) if (!netdev_mc_empty(dev)) { netdev_for_each_mc_addr(ha, dev) { mvneta_mcast_addr_set(pp, ha->addr, - rxq_def); + pp->rxq_def); } } } @@ -1801,14 +3247,31 @@ static irqreturn_t mvneta_isr(int irq, void *dev_id) { struct mvneta_port *pp = (struct mvneta_port *)dev_id; - /* Mask all interrupts */ mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0); - napi_schedule(&pp->napi); return IRQ_HANDLED; } +/* Interrupt handling - the callback for request_percpu_irq() */ +static irqreturn_t mvneta_percpu_isr(int irq, void *dev_id) +{ + struct mvneta_pcpu_port *port = (struct mvneta_pcpu_port *)dev_id; + + disable_percpu_irq(port->pp->dev->irq); + napi_schedule(&port->napi); + + return IRQ_HANDLED; +} + +static void mvneta_link_change(struct mvneta_port *pp) +{ + u32 gmac_stat = mvreg_read(pp, MVNETA_GMAC_STATUS); + + phylink_pcs_change(&pp->phylink_pcs, + !!(gmac_stat & MVNETA_GMAC_LINK_UP)); +} + /* NAPI handler * Bits 0 - 7 of the causeRxTx register indicate that are transmitted * packets on the corresponding TXQ (Bit 0 is for TX queue 1). @@ -1820,8 +3283,9 @@ static int mvneta_poll(struct napi_struct *napi, int budget) { int rx_done = 0; u32 cause_rx_tx; - unsigned long flags; + int rx_queue; struct mvneta_port *pp = netdev_priv(napi->dev); + struct mvneta_pcpu_port *port = this_cpu_ptr(pp->ports); if (!netif_running(pp->dev)) { napi_complete(napi); @@ -1829,104 +3293,128 @@ static int mvneta_poll(struct napi_struct *napi, int budget) } /* Read cause register */ - cause_rx_tx = mvreg_read(pp, MVNETA_INTR_NEW_CAUSE) & - MVNETA_RX_INTR_MASK(rxq_number); + cause_rx_tx = mvreg_read(pp, MVNETA_INTR_NEW_CAUSE); + if (cause_rx_tx & MVNETA_MISCINTR_INTR_MASK) { + u32 cause_misc = mvreg_read(pp, MVNETA_INTR_MISC_CAUSE); + + mvreg_write(pp, MVNETA_INTR_MISC_CAUSE, 0); + + if (cause_misc & (MVNETA_CAUSE_PHY_STATUS_CHANGE | + MVNETA_CAUSE_LINK_CHANGE)) + mvneta_link_change(pp); + } + + /* Release Tx descriptors */ + if (cause_rx_tx & MVNETA_TX_INTR_MASK_ALL) { + mvneta_tx_done_gbe(pp, (cause_rx_tx & MVNETA_TX_INTR_MASK_ALL)); + cause_rx_tx &= ~MVNETA_TX_INTR_MASK_ALL; + } /* For the case where the last mvneta_poll did not process all * RX packets */ - cause_rx_tx |= pp->cause_rx_tx; - if (rxq_number > 1) { - while ((cause_rx_tx != 0) && (budget > 0)) { - int count; - struct mvneta_rx_queue *rxq; - /* get rx queue number from cause_rx_tx */ - rxq = mvneta_rx_policy(pp, cause_rx_tx); - if (!rxq) - break; - - /* process the packet in that rx queue */ - count = mvneta_rx(pp, budget, rxq); - rx_done += count; - budget -= count; - if (budget > 0) { - /* set off the rx bit of the - * corresponding bit in the cause rx - * tx register, so that next iteration - * will find the next rx queue where - * packets are received on - */ - cause_rx_tx &= ~((1 << rxq->id) << 8); - } - } - } else { - rx_done = mvneta_rx(pp, budget, &pp->rxqs[rxq_def]); - budget -= rx_done; + cause_rx_tx |= pp->neta_armada3700 ? pp->cause_rx_tx : + port->cause_rx_tx; + + rx_queue = fls(((cause_rx_tx >> 8) & 0xff)); + if (rx_queue) { + rx_queue = rx_queue - 1; + if (pp->bm_priv) + rx_done = mvneta_rx_hwbm(napi, pp, budget, + &pp->rxqs[rx_queue]); + else + rx_done = mvneta_rx_swbm(napi, pp, budget, + &pp->rxqs[rx_queue]); } - if (budget > 0) { + if (rx_done < budget) { cause_rx_tx = 0; - napi_complete(napi); - local_irq_save(flags); - mvreg_write(pp, MVNETA_INTR_NEW_MASK, - MVNETA_RX_INTR_MASK(rxq_number)); - local_irq_restore(flags); + napi_complete_done(napi, rx_done); + + if (pp->neta_armada3700) { + unsigned long flags; + + local_irq_save(flags); + mvreg_write(pp, MVNETA_INTR_NEW_MASK, + MVNETA_RX_INTR_MASK(rxq_number) | + MVNETA_TX_INTR_MASK(txq_number) | + MVNETA_MISCINTR_INTR_MASK); + local_irq_restore(flags); + } else { + enable_percpu_irq(pp->dev->irq, 0); + } } - pp->cause_rx_tx = cause_rx_tx; + if (pp->neta_armada3700) + pp->cause_rx_tx = cause_rx_tx; + else + port->cause_rx_tx = cause_rx_tx; + return rx_done; } -/* tx done timer callback */ -static void mvneta_tx_done_timer_callback(unsigned long data) +static int mvneta_create_page_pool(struct mvneta_port *pp, + struct mvneta_rx_queue *rxq, int size) { - struct net_device *dev = (struct net_device *)data; - struct mvneta_port *pp = netdev_priv(dev); - int tx_done = 0, tx_todo = 0; + struct bpf_prog *xdp_prog = READ_ONCE(pp->xdp_prog); + struct page_pool_params pp_params = { + .order = 0, + .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV, + .pool_size = size, + .nid = NUMA_NO_NODE, + .dev = pp->dev->dev.parent, + .dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE, + .offset = pp->rx_offset_correction, + .max_len = MVNETA_MAX_RX_BUF_SIZE, + }; + int err; - if (!netif_running(dev)) - return ; + rxq->page_pool = page_pool_create(&pp_params); + if (IS_ERR(rxq->page_pool)) { + err = PTR_ERR(rxq->page_pool); + rxq->page_pool = NULL; + return err; + } + + err = __xdp_rxq_info_reg(&rxq->xdp_rxq, pp->dev, rxq->id, 0, + PAGE_SIZE); + if (err < 0) + goto err_free_pp; + + err = xdp_rxq_info_reg_mem_model(&rxq->xdp_rxq, MEM_TYPE_PAGE_POOL, + rxq->page_pool); + if (err) + goto err_unregister_rxq; - clear_bit(MVNETA_F_TX_DONE_TIMER_BIT, &pp->flags); + return 0; - tx_done = mvneta_tx_done_gbe(pp, - (((1 << txq_number) - 1) & - MVNETA_CAUSE_TXQ_SENT_DESC_ALL_MASK), - &tx_todo); - if (tx_todo > 0) - mvneta_add_tx_done_timer(pp); +err_unregister_rxq: + xdp_rxq_info_unreg(&rxq->xdp_rxq); +err_free_pp: + page_pool_destroy(rxq->page_pool); + rxq->page_pool = NULL; + return err; } /* Handle rxq fill: allocates rxq skbs; called when initializing a port */ static int mvneta_rxq_fill(struct mvneta_port *pp, struct mvneta_rx_queue *rxq, int num) { - struct net_device *dev = pp->dev; - int i; + int i, err; - for (i = 0; i < num; i++) { - struct sk_buff *skb; - struct mvneta_rx_desc *rx_desc; - unsigned long phys_addr; - - skb = dev_alloc_skb(pp->pkt_size); - if (!skb) { - netdev_err(dev, "%s:rxq %d, %d of %d buffs filled\n", - __func__, rxq->id, i, num); - break; - } + err = mvneta_create_page_pool(pp, rxq, num); + if (err < 0) + return err; - rx_desc = rxq->descs + i; - memset(rx_desc, 0, sizeof(struct mvneta_rx_desc)); - phys_addr = dma_map_single(dev->dev.parent, skb->head, - MVNETA_RX_BUF_SIZE(pp->pkt_size), - DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(dev->dev.parent, phys_addr))) { - dev_kfree_skb(skb); + for (i = 0; i < num; i++) { + memset(rxq->descs + i, 0, sizeof(struct mvneta_rx_desc)); + if (mvneta_rx_refill(pp, rxq->descs + i, rxq, + GFP_KERNEL) != 0) { + netdev_err(pp->dev, + "%s:rxq %d, %d of %d buffs filled\n", + __func__, rxq->id, i, num); break; } - - mvneta_rx_desc_fill(rx_desc, phys_addr, (u32)skb); } /* Add this number of RX descriptors as non occupied (ready to @@ -1942,7 +3430,7 @@ static void mvneta_tx_reset(struct mvneta_port *pp) { int queue; - /* free the skb's in the hal tx ring */ + /* free the skb's in the tx ring */ for (queue = 0; queue < txq_number; queue++) mvneta_txq_done_force(pp, &pp->txqs[queue]); @@ -1958,10 +3446,8 @@ static void mvneta_rx_reset(struct mvneta_port *pp) /* Rx/Tx queue initialization/cleanup methods */ -/* Create a specified RX queue */ -static int mvneta_rxq_init(struct mvneta_port *pp, - struct mvneta_rx_queue *rxq) - +static int mvneta_rxq_sw_init(struct mvneta_port *pp, + struct mvneta_rx_queue *rxq) { rxq->size = pp->rx_ring_size; @@ -1969,29 +3455,58 @@ static int mvneta_rxq_init(struct mvneta_port *pp, rxq->descs = dma_alloc_coherent(pp->dev->dev.parent, rxq->size * MVNETA_DESC_ALIGNED_SIZE, &rxq->descs_phys, GFP_KERNEL); - if (rxq->descs == NULL) + if (!rxq->descs) return -ENOMEM; - BUG_ON(rxq->descs != - PTR_ALIGN(rxq->descs, MVNETA_CPU_D_CACHE_LINE_SIZE)); - rxq->last_desc = rxq->size - 1; + return 0; +} + +static void mvneta_rxq_hw_init(struct mvneta_port *pp, + struct mvneta_rx_queue *rxq) +{ /* Set Rx descriptors queue starting address */ mvreg_write(pp, MVNETA_RXQ_BASE_ADDR_REG(rxq->id), rxq->descs_phys); mvreg_write(pp, MVNETA_RXQ_SIZE_REG(rxq->id), rxq->size); - /* Set Offset */ - mvneta_rxq_offset_set(pp, rxq, NET_SKB_PAD); - /* Set coalescing pkts and time */ mvneta_rx_pkts_coal_set(pp, rxq, rxq->pkts_coal); mvneta_rx_time_coal_set(pp, rxq, rxq->time_coal); - /* Fill RXQ with buffers from RX pool */ - mvneta_rxq_buf_size_set(pp, rxq, MVNETA_RX_BUF_SIZE(pp->pkt_size)); - mvneta_rxq_bm_disable(pp, rxq); - mvneta_rxq_fill(pp, rxq, rxq->size); + if (!pp->bm_priv) { + /* Set Offset */ + mvneta_rxq_offset_set(pp, rxq, 0); + mvneta_rxq_buf_size_set(pp, rxq, PAGE_SIZE < SZ_64K ? + MVNETA_MAX_RX_BUF_SIZE : + MVNETA_RX_BUF_SIZE(pp->pkt_size)); + mvneta_rxq_bm_disable(pp, rxq); + mvneta_rxq_fill(pp, rxq, rxq->size); + } else { + /* Set Offset */ + mvneta_rxq_offset_set(pp, rxq, + NET_SKB_PAD - pp->rx_offset_correction); + + mvneta_rxq_bm_enable(pp, rxq); + /* Fill RXQ with buffers from RX pool */ + mvneta_rxq_long_pool_set(pp, rxq); + mvneta_rxq_short_pool_set(pp, rxq); + mvneta_rxq_non_occup_desc_add(pp, rxq, rxq->size); + } +} + +/* Create a specified RX queue */ +static int mvneta_rxq_init(struct mvneta_port *pp, + struct mvneta_rx_queue *rxq) + +{ + int ret; + + ret = mvneta_rxq_sw_init(pp, rxq); + if (ret < 0) + return ret; + + mvneta_rxq_hw_init(pp, rxq); return 0; } @@ -2012,27 +3527,58 @@ static void mvneta_rxq_deinit(struct mvneta_port *pp, rxq->last_desc = 0; rxq->next_desc_to_proc = 0; rxq->descs_phys = 0; + rxq->first_to_refill = 0; + rxq->refill_num = 0; } -/* Create and initialize a tx queue */ -static int mvneta_txq_init(struct mvneta_port *pp, - struct mvneta_tx_queue *txq) +static int mvneta_txq_sw_init(struct mvneta_port *pp, + struct mvneta_tx_queue *txq) { + int cpu, err; + txq->size = pp->tx_ring_size; + /* A queue must always have room for at least one skb. + * Therefore, stop the queue when the free entries reaches + * the maximum number of descriptors per skb. + */ + txq->tx_stop_threshold = txq->size - MVNETA_MAX_SKB_DESCS; + txq->tx_wake_threshold = txq->tx_stop_threshold / 2; + /* Allocate memory for TX descriptors */ txq->descs = dma_alloc_coherent(pp->dev->dev.parent, txq->size * MVNETA_DESC_ALIGNED_SIZE, &txq->descs_phys, GFP_KERNEL); - if (txq->descs == NULL) + if (!txq->descs) return -ENOMEM; - /* Make sure descriptor address is cache line size aligned */ - BUG_ON(txq->descs != - PTR_ALIGN(txq->descs, MVNETA_CPU_D_CACHE_LINE_SIZE)); - txq->last_desc = txq->size - 1; + txq->buf = kmalloc_array(txq->size, sizeof(*txq->buf), GFP_KERNEL); + if (!txq->buf) + return -ENOMEM; + + /* Allocate DMA buffers for TSO MAC/IP/TCP headers */ + err = mvneta_alloc_tso_hdrs(pp, txq); + if (err) + return err; + + /* Setup XPS mapping */ + if (pp->neta_armada3700) + cpu = 0; + else if (txq_number > 1) + cpu = txq->id % num_present_cpus(); + else + cpu = pp->rxq_def % num_present_cpus(); + cpumask_set_cpu(cpu, &txq->affinity_mask); + netif_set_xps_queue(pp->dev, &txq->affinity_mask, txq->id); + + return 0; +} + +static void mvneta_txq_hw_init(struct mvneta_port *pp, + struct mvneta_tx_queue *txq) +{ /* Set maximum bandwidth for enabled TXQs */ mvreg_write(pp, MVETH_TXQ_TOKEN_CFG_REG(txq->id), 0x03ffffff); mvreg_write(pp, MVETH_TXQ_TOKEN_COUNT_REG(txq->id), 0x3fffffff); @@ -2041,34 +3587,50 @@ static int mvneta_txq_init(struct mvneta_port *pp, mvreg_write(pp, MVNETA_TXQ_BASE_ADDR_REG(txq->id), txq->descs_phys); mvreg_write(pp, MVNETA_TXQ_SIZE_REG(txq->id), txq->size); - txq->tx_skb = kmalloc(txq->size * sizeof(*txq->tx_skb), GFP_KERNEL); - if (txq->tx_skb == NULL) { - dma_free_coherent(pp->dev->dev.parent, - txq->size * MVNETA_DESC_ALIGNED_SIZE, - txq->descs, txq->descs_phys); - return -ENOMEM; - } mvneta_tx_done_pkts_coal_set(pp, txq, txq->done_pkts_coal); +} + +/* Create and initialize a tx queue */ +static int mvneta_txq_init(struct mvneta_port *pp, + struct mvneta_tx_queue *txq) +{ + int ret; + + ret = mvneta_txq_sw_init(pp, txq); + if (ret < 0) + return ret; + + mvneta_txq_hw_init(pp, txq); return 0; } /* Free allocated resources when mvneta_txq_init() fails to allocate memory*/ -static void mvneta_txq_deinit(struct mvneta_port *pp, - struct mvneta_tx_queue *txq) +static void mvneta_txq_sw_deinit(struct mvneta_port *pp, + struct mvneta_tx_queue *txq) { - kfree(txq->tx_skb); + struct netdev_queue *nq = netdev_get_tx_queue(pp->dev, txq->id); + + kfree(txq->buf); + mvneta_free_tso_hdrs(pp, txq); if (txq->descs) dma_free_coherent(pp->dev->dev.parent, txq->size * MVNETA_DESC_ALIGNED_SIZE, txq->descs, txq->descs_phys); + netdev_tx_reset_queue(nq); + + txq->buf = NULL; txq->descs = NULL; txq->last_desc = 0; txq->next_desc_to_proc = 0; txq->descs_phys = 0; +} +static void mvneta_txq_hw_deinit(struct mvneta_port *pp, + struct mvneta_tx_queue *txq) +{ /* Set minimum bandwidth for disabled TXQs */ mvreg_write(pp, MVETH_TXQ_TOKEN_CFG_REG(txq->id), 0); mvreg_write(pp, MVETH_TXQ_TOKEN_COUNT_REG(txq->id), 0); @@ -2078,6 +3640,13 @@ static void mvneta_txq_deinit(struct mvneta_port *pp, mvreg_write(pp, MVNETA_TXQ_SIZE_REG(txq->id), 0); } +static void mvneta_txq_deinit(struct mvneta_port *pp, + struct mvneta_tx_queue *txq) +{ + mvneta_txq_sw_deinit(pp, txq); + mvneta_txq_hw_deinit(pp, txq); +} + /* Cleanup all Tx queues */ static void mvneta_cleanup_txqs(struct mvneta_port *pp) { @@ -2104,6 +3673,7 @@ static int mvneta_setup_rxqs(struct mvneta_port *pp) for (queue = 0; queue < rxq_number; queue++) { int err = mvneta_rxq_init(pp, &pp->rxqs[queue]); + if (err) { netdev_err(pp->dev, "%s: can't create rxq=%d\n", __func__, queue); @@ -2133,30 +3703,117 @@ static int mvneta_setup_txqs(struct mvneta_port *pp) return 0; } +static int mvneta_comphy_init(struct mvneta_port *pp, phy_interface_t interface) +{ + int ret; + + ret = phy_set_mode_ext(pp->comphy, PHY_MODE_ETHERNET, interface); + if (ret) + return ret; + + return phy_power_on(pp->comphy); +} + +static int mvneta_config_interface(struct mvneta_port *pp, + phy_interface_t interface) +{ + int ret = 0; + + if (pp->comphy) { + if (interface == PHY_INTERFACE_MODE_SGMII || + interface == PHY_INTERFACE_MODE_1000BASEX || + interface == PHY_INTERFACE_MODE_2500BASEX) { + ret = mvneta_comphy_init(pp, interface); + } + } else { + switch (interface) { + case PHY_INTERFACE_MODE_QSGMII: + mvreg_write(pp, MVNETA_SERDES_CFG, + MVNETA_QSGMII_SERDES_PROTO); + break; + + case PHY_INTERFACE_MODE_SGMII: + case PHY_INTERFACE_MODE_1000BASEX: + mvreg_write(pp, MVNETA_SERDES_CFG, + MVNETA_SGMII_SERDES_PROTO); + break; + + case PHY_INTERFACE_MODE_2500BASEX: + mvreg_write(pp, MVNETA_SERDES_CFG, + MVNETA_HSGMII_SERDES_PROTO); + break; + default: + break; + } + } + + pp->phy_interface = interface; + + return ret; +} + static void mvneta_start_dev(struct mvneta_port *pp) { + int cpu; + + WARN_ON(mvneta_config_interface(pp, pp->phy_interface)); + mvneta_max_rx_size_set(pp, pp->pkt_size); mvneta_txq_max_tx_size_set(pp, pp->pkt_size); /* start the Rx/Tx activity */ mvneta_port_enable(pp); - /* Enable polling on the port */ - napi_enable(&pp->napi); + if (!pp->neta_armada3700) { + /* Enable polling on the port */ + for_each_online_cpu(cpu) { + struct mvneta_pcpu_port *port = + per_cpu_ptr(pp->ports, cpu); - /* Unmask interrupts */ - mvreg_write(pp, MVNETA_INTR_NEW_MASK, - MVNETA_RX_INTR_MASK(rxq_number)); + napi_enable(&port->napi); + } + } else { + napi_enable(&pp->napi); + } + + /* Unmask interrupts. It has to be done from each CPU */ + on_each_cpu(mvneta_percpu_unmask_interrupt, pp, true); + + mvreg_write(pp, MVNETA_INTR_MISC_MASK, + MVNETA_CAUSE_PHY_STATUS_CHANGE | + MVNETA_CAUSE_LINK_CHANGE); + + phylink_start(pp->phylink); + + /* We may have called phylink_speed_down before */ + phylink_speed_up(pp->phylink); - phy_start(pp->phy_dev); netif_tx_start_all_queues(pp->dev); + + clear_bit(__MVNETA_DOWN, &pp->state); } static void mvneta_stop_dev(struct mvneta_port *pp) { - phy_stop(pp->phy_dev); + unsigned int cpu; - napi_disable(&pp->napi); + set_bit(__MVNETA_DOWN, &pp->state); + + if (device_may_wakeup(&pp->dev->dev)) + phylink_speed_down(pp->phylink, false); + + phylink_stop(pp->phylink); + + if (!pp->neta_armada3700) { + for_each_online_cpu(cpu) { + struct mvneta_pcpu_port *port = + per_cpu_ptr(pp->ports, cpu); + + napi_disable(&port->napi); + } + } else { + napi_disable(&pp->napi); + } netif_carrier_off(pp->dev); @@ -2167,90 +3824,111 @@ static void mvneta_stop_dev(struct mvneta_port *pp) mvneta_port_disable(pp); /* Clear all ethernet port interrupts */ - mvreg_write(pp, MVNETA_INTR_MISC_CAUSE, 0); - mvreg_write(pp, MVNETA_INTR_OLD_CAUSE, 0); + on_each_cpu(mvneta_percpu_clear_intr_cause, pp, true); /* Mask all ethernet port interrupts */ - mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0); - mvreg_write(pp, MVNETA_INTR_OLD_MASK, 0); - mvreg_write(pp, MVNETA_INTR_MISC_MASK, 0); + on_each_cpu(mvneta_percpu_mask_interrupt, pp, true); mvneta_tx_reset(pp); mvneta_rx_reset(pp); + + WARN_ON(phy_power_off(pp->comphy)); } -/* tx timeout callback - display a message and stop/start the network device */ -static void mvneta_tx_timeout(struct net_device *dev) +static void mvneta_percpu_enable(void *arg) { - struct mvneta_port *pp = netdev_priv(dev); + struct mvneta_port *pp = arg; - netdev_info(dev, "tx timeout\n"); - mvneta_stop_dev(pp); - mvneta_start_dev(pp); + enable_percpu_irq(pp->dev->irq, IRQ_TYPE_NONE); } -/* Return positive if MTU is valid */ -static int mvneta_check_mtu_valid(struct net_device *dev, int mtu) +static void mvneta_percpu_disable(void *arg) { - if (mtu < 68) { - netdev_err(dev, "cannot change mtu to less than 68\n"); - return -EINVAL; - } - - /* 9676 == 9700 - 20 and rounding to 8 */ - if (mtu > 9676) { - netdev_info(dev, "Illegal MTU value %d, round to 9676\n", mtu); - mtu = 9676; - } + struct mvneta_port *pp = arg; - if (!IS_ALIGNED(MVNETA_RX_PKT_SIZE(mtu), 8)) { - netdev_info(dev, "Illegal MTU value %d, rounding to %d\n", - mtu, ALIGN(MVNETA_RX_PKT_SIZE(mtu), 8)); - mtu = ALIGN(MVNETA_RX_PKT_SIZE(mtu), 8); - } - - return mtu; + disable_percpu_irq(pp->dev->irq); } /* Change the device mtu */ static int mvneta_change_mtu(struct net_device *dev, int mtu) { struct mvneta_port *pp = netdev_priv(dev); + struct bpf_prog *prog = pp->xdp_prog; int ret; - mtu = mvneta_check_mtu_valid(dev, mtu); - if (mtu < 0) + if (!IS_ALIGNED(MVNETA_RX_PKT_SIZE(mtu), 8)) { + netdev_info(dev, "Illegal MTU value %d, rounding to %d\n", + mtu, ALIGN(MVNETA_RX_PKT_SIZE(mtu), 8)); + mtu = ALIGN(MVNETA_RX_PKT_SIZE(mtu), 8); + } + + if (prog && !prog->aux->xdp_has_frags && + mtu > MVNETA_MAX_RX_BUF_SIZE) { + netdev_info(dev, "Illegal MTU %d for XDP prog without frags\n", + mtu); + return -EINVAL; + } - dev->mtu = mtu; + WRITE_ONCE(dev->mtu, mtu); - if (!netif_running(dev)) + if (!netif_running(dev)) { + if (pp->bm_priv) + mvneta_bm_update_mtu(pp, mtu); + + netdev_update_features(dev); return 0; + } /* The interface is running, so we have to force a - * reallocation of the RXQs + * reallocation of the queues */ mvneta_stop_dev(pp); + on_each_cpu(mvneta_percpu_disable, pp, true); mvneta_cleanup_txqs(pp); mvneta_cleanup_rxqs(pp); - pp->pkt_size = MVNETA_RX_PKT_SIZE(pp->dev->mtu); + if (pp->bm_priv) + mvneta_bm_update_mtu(pp, mtu); + + pp->pkt_size = MVNETA_RX_PKT_SIZE(dev->mtu); ret = mvneta_setup_rxqs(pp); if (ret) { - netdev_err(pp->dev, "unable to setup rxqs after MTU change\n"); + netdev_err(dev, "unable to setup rxqs after MTU change\n"); return ret; } - mvneta_setup_txqs(pp); + ret = mvneta_setup_txqs(pp); + if (ret) { + netdev_err(dev, "unable to setup txqs after MTU change\n"); + return ret; + } + on_each_cpu(mvneta_percpu_enable, pp, true); mvneta_start_dev(pp); - mvneta_port_up(pp); + + netdev_update_features(dev); return 0; } +static netdev_features_t mvneta_fix_features(struct net_device *dev, + netdev_features_t features) +{ + struct mvneta_port *pp = netdev_priv(dev); + + if (pp->tx_csum_limit && dev->mtu > pp->tx_csum_limit) { + features &= ~(NETIF_F_IP_CSUM | NETIF_F_TSO); + netdev_info(dev, + "Disable IP checksum for MTU greater than %dB\n", + pp->tx_csum_limit); + } + + return features; +} + /* Get mac address */ static void mvneta_get_mac_addr(struct mvneta_port *pp, unsigned char *addr) { @@ -2270,107 +3948,583 @@ static void mvneta_get_mac_addr(struct mvneta_port *pp, unsigned char *addr) static int mvneta_set_mac_addr(struct net_device *dev, void *addr) { struct mvneta_port *pp = netdev_priv(dev); - u8 *mac = addr + 2; - int i; - - if (netif_running(dev)) - return -EBUSY; + struct sockaddr *sockaddr = addr; + int ret; + ret = eth_prepare_mac_addr_change(dev, addr); + if (ret < 0) + return ret; /* Remove previous address table entry */ mvneta_mac_addr_set(pp, dev->dev_addr, -1); /* Set new addr in hw */ - mvneta_mac_addr_set(pp, mac, rxq_def); + mvneta_mac_addr_set(pp, sockaddr->sa_data, pp->rxq_def); + + eth_commit_mac_addr_change(dev, addr); + return 0; +} + +static struct mvneta_port *mvneta_pcs_to_port(struct phylink_pcs *pcs) +{ + return container_of(pcs, struct mvneta_port, phylink_pcs); +} + +static unsigned int mvneta_pcs_inband_caps(struct phylink_pcs *pcs, + phy_interface_t interface) +{ + /* When operating in an 802.3z mode, we must have AN enabled: + * "Bit 2 Field InBandAnEn In-band Auto-Negotiation enable. ... + * When <PortType> = 1 (1000BASE-X) this field must be set to 1." + * Therefore, inband is "required". + */ + if (phy_interface_mode_is_8023z(interface)) + return LINK_INBAND_ENABLE; + + /* QSGMII, SGMII and RGMII can be configured to use inband + * signalling of the AN result. Indicate these as "possible". + */ + if (interface == PHY_INTERFACE_MODE_SGMII || + interface == PHY_INTERFACE_MODE_QSGMII || + phy_interface_mode_is_rgmii(interface)) + return LINK_INBAND_DISABLE | LINK_INBAND_ENABLE; + + /* For any other modes, indicate that inband is not supported. */ + return LINK_INBAND_DISABLE; +} + +static void mvneta_pcs_get_state(struct phylink_pcs *pcs, unsigned int neg_mode, + struct phylink_link_state *state) +{ + struct mvneta_port *pp = mvneta_pcs_to_port(pcs); + u32 gmac_stat; + + gmac_stat = mvreg_read(pp, MVNETA_GMAC_STATUS); + + if (gmac_stat & MVNETA_GMAC_SPEED_1000) + state->speed = + state->interface == PHY_INTERFACE_MODE_2500BASEX ? + SPEED_2500 : SPEED_1000; + else if (gmac_stat & MVNETA_GMAC_SPEED_100) + state->speed = SPEED_100; + else + state->speed = SPEED_10; + + state->an_complete = !!(gmac_stat & MVNETA_GMAC_AN_COMPLETE); + state->link = !!(gmac_stat & MVNETA_GMAC_LINK_UP); + state->duplex = !!(gmac_stat & MVNETA_GMAC_FULL_DUPLEX); + + if (gmac_stat & MVNETA_GMAC_RX_FLOW_CTRL_ENABLE) + state->pause |= MLO_PAUSE_RX; + if (gmac_stat & MVNETA_GMAC_TX_FLOW_CTRL_ENABLE) + state->pause |= MLO_PAUSE_TX; +} + +static int mvneta_pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode, + phy_interface_t interface, + const unsigned long *advertising, + bool permit_pause_to_mac) +{ + struct mvneta_port *pp = mvneta_pcs_to_port(pcs); + u32 mask, val, an, old_an, changed; + + mask = MVNETA_GMAC_INBAND_AN_ENABLE | + MVNETA_GMAC_INBAND_RESTART_AN | + MVNETA_GMAC_AN_SPEED_EN | + MVNETA_GMAC_AN_FLOW_CTRL_EN | + MVNETA_GMAC_AN_DUPLEX_EN; + + if (neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED) { + mask |= MVNETA_GMAC_CONFIG_MII_SPEED | + MVNETA_GMAC_CONFIG_GMII_SPEED | + MVNETA_GMAC_CONFIG_FULL_DUPLEX; + val = MVNETA_GMAC_INBAND_AN_ENABLE; + + if (interface == PHY_INTERFACE_MODE_SGMII) { + /* SGMII mode receives the speed and duplex from PHY */ + val |= MVNETA_GMAC_AN_SPEED_EN | + MVNETA_GMAC_AN_DUPLEX_EN; + } else { + /* 802.3z mode has fixed speed and duplex */ + val |= MVNETA_GMAC_CONFIG_GMII_SPEED | + MVNETA_GMAC_CONFIG_FULL_DUPLEX; + + /* The FLOW_CTRL_EN bit selects either the hardware + * automatically or the CONFIG_FLOW_CTRL manually + * controls the GMAC pause mode. + */ + if (permit_pause_to_mac) + val |= MVNETA_GMAC_AN_FLOW_CTRL_EN; + + /* Update the advertisement bits */ + mask |= MVNETA_GMAC_ADVERT_SYM_FLOW_CTRL; + if (phylink_test(advertising, Pause)) + val |= MVNETA_GMAC_ADVERT_SYM_FLOW_CTRL; + } + } else { + /* Phy or fixed speed - disable in-band AN modes */ + val = 0; + } + + old_an = an = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG); + an = (an & ~mask) | val; + changed = old_an ^ an; + if (changed) + mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, an); + + /* We are only interested in the advertisement bits changing */ + return !!(changed & MVNETA_GMAC_ADVERT_SYM_FLOW_CTRL); +} + +static void mvneta_pcs_an_restart(struct phylink_pcs *pcs) +{ + struct mvneta_port *pp = mvneta_pcs_to_port(pcs); + u32 gmac_an = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG); + + mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, + gmac_an | MVNETA_GMAC_INBAND_RESTART_AN); + mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, + gmac_an & ~MVNETA_GMAC_INBAND_RESTART_AN); +} + +static const struct phylink_pcs_ops mvneta_phylink_pcs_ops = { + .pcs_inband_caps = mvneta_pcs_inband_caps, + .pcs_get_state = mvneta_pcs_get_state, + .pcs_config = mvneta_pcs_config, + .pcs_an_restart = mvneta_pcs_an_restart, +}; + +static struct phylink_pcs *mvneta_mac_select_pcs(struct phylink_config *config, + phy_interface_t interface) +{ + struct net_device *ndev = to_net_dev(config->dev); + struct mvneta_port *pp = netdev_priv(ndev); + + return &pp->phylink_pcs; +} - /* Set addr in the device */ - for (i = 0; i < ETH_ALEN; i++) - dev->dev_addr[i] = mac[i]; +static int mvneta_mac_prepare(struct phylink_config *config, unsigned int mode, + phy_interface_t interface) +{ + struct net_device *ndev = to_net_dev(config->dev); + struct mvneta_port *pp = netdev_priv(ndev); + u32 val; + + if (pp->phy_interface != interface || + phylink_autoneg_inband(mode)) { + /* Force the link down when changing the interface or if in + * in-band mode. According to Armada 370 documentation, we + * can only change the port mode and in-band enable when the + * link is down. + */ + val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG); + val &= ~MVNETA_GMAC_FORCE_LINK_PASS; + val |= MVNETA_GMAC_FORCE_LINK_DOWN; + mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val); + } + + if (pp->phy_interface != interface) + WARN_ON(phy_power_off(pp->comphy)); + + /* Enable the 1ms clock */ + if (phylink_autoneg_inband(mode)) { + unsigned long rate = clk_get_rate(pp->clk); + + mvreg_write(pp, MVNETA_GMAC_CLOCK_DIVIDER, + MVNETA_GMAC_1MS_CLOCK_ENABLE | (rate / 1000)); + } return 0; } -static void mvneta_adjust_link(struct net_device *ndev) +static void mvneta_mac_config(struct phylink_config *config, unsigned int mode, + const struct phylink_link_state *state) { + struct net_device *ndev = to_net_dev(config->dev); struct mvneta_port *pp = netdev_priv(ndev); - struct phy_device *phydev = pp->phy_dev; - int status_change = 0; + u32 new_ctrl0, gmac_ctrl0 = mvreg_read(pp, MVNETA_GMAC_CTRL_0); + u32 new_ctrl2, gmac_ctrl2 = mvreg_read(pp, MVNETA_GMAC_CTRL_2); + u32 new_ctrl4, gmac_ctrl4 = mvreg_read(pp, MVNETA_GMAC_CTRL_4); - if (phydev->link) { - if ((pp->speed != phydev->speed) || - (pp->duplex != phydev->duplex)) { - u32 val; + new_ctrl0 = gmac_ctrl0 & ~MVNETA_GMAC0_PORT_1000BASE_X; + new_ctrl2 = gmac_ctrl2 & ~(MVNETA_GMAC2_INBAND_AN_ENABLE | + MVNETA_GMAC2_PORT_RESET); + new_ctrl4 = gmac_ctrl4 & ~(MVNETA_GMAC4_SHORT_PREAMBLE_ENABLE); + + /* Even though it might look weird, when we're configured in + * SGMII or QSGMII mode, the RGMII bit needs to be set. + */ + new_ctrl2 |= MVNETA_GMAC2_PORT_RGMII; + + if (state->interface == PHY_INTERFACE_MODE_QSGMII || + state->interface == PHY_INTERFACE_MODE_SGMII || + phy_interface_mode_is_8023z(state->interface)) + new_ctrl2 |= MVNETA_GMAC2_PCS_ENABLE; + + if (!phylink_autoneg_inband(mode)) { + /* Phy or fixed speed - nothing to do, leave the + * configured speed, duplex and flow control as-is. + */ + } else if (state->interface == PHY_INTERFACE_MODE_SGMII) { + /* SGMII mode receives the state from the PHY */ + new_ctrl2 |= MVNETA_GMAC2_INBAND_AN_ENABLE; + } else { + /* 802.3z negotiation - only 1000base-X */ + new_ctrl0 |= MVNETA_GMAC0_PORT_1000BASE_X; + } - val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG); - val &= ~(MVNETA_GMAC_CONFIG_MII_SPEED | - MVNETA_GMAC_CONFIG_GMII_SPEED | - MVNETA_GMAC_CONFIG_FULL_DUPLEX); + /* When at 2.5G, the link partner can send frames with shortened + * preambles. + */ + if (state->interface == PHY_INTERFACE_MODE_2500BASEX) + new_ctrl4 |= MVNETA_GMAC4_SHORT_PREAMBLE_ENABLE; + + if (new_ctrl0 != gmac_ctrl0) + mvreg_write(pp, MVNETA_GMAC_CTRL_0, new_ctrl0); + if (new_ctrl2 != gmac_ctrl2) + mvreg_write(pp, MVNETA_GMAC_CTRL_2, new_ctrl2); + if (new_ctrl4 != gmac_ctrl4) + mvreg_write(pp, MVNETA_GMAC_CTRL_4, new_ctrl4); + + if (gmac_ctrl2 & MVNETA_GMAC2_PORT_RESET) { + while ((mvreg_read(pp, MVNETA_GMAC_CTRL_2) & + MVNETA_GMAC2_PORT_RESET) != 0) + continue; + } +} - if (phydev->duplex) - val |= MVNETA_GMAC_CONFIG_FULL_DUPLEX; +static int mvneta_mac_finish(struct phylink_config *config, unsigned int mode, + phy_interface_t interface) +{ + struct net_device *ndev = to_net_dev(config->dev); + struct mvneta_port *pp = netdev_priv(ndev); + u32 val, clk; - if (phydev->speed == SPEED_1000) - val |= MVNETA_GMAC_CONFIG_GMII_SPEED; - else - val |= MVNETA_GMAC_CONFIG_MII_SPEED; + /* Disable 1ms clock if not in in-band mode */ + if (!phylink_autoneg_inband(mode)) { + clk = mvreg_read(pp, MVNETA_GMAC_CLOCK_DIVIDER); + clk &= ~MVNETA_GMAC_1MS_CLOCK_ENABLE; + mvreg_write(pp, MVNETA_GMAC_CLOCK_DIVIDER, clk); + } - mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val); + if (pp->phy_interface != interface) + /* Enable the Serdes PHY */ + WARN_ON(mvneta_config_interface(pp, interface)); - pp->duplex = phydev->duplex; - pp->speed = phydev->speed; - } + /* Allow the link to come up if in in-band mode, otherwise the + * link is forced via mac_link_down()/mac_link_up() + */ + if (phylink_autoneg_inband(mode)) { + val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG); + val &= ~MVNETA_GMAC_FORCE_LINK_DOWN; + mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val); } - if (phydev->link != pp->link) { - if (!phydev->link) { - pp->duplex = -1; - pp->speed = 0; - } + return 0; +} - pp->link = phydev->link; - status_change = 1; +static void mvneta_mac_link_down(struct phylink_config *config, + unsigned int mode, phy_interface_t interface) +{ + struct net_device *ndev = to_net_dev(config->dev); + struct mvneta_port *pp = netdev_priv(ndev); + u32 val; + + mvneta_port_down(pp); + + if (!phylink_autoneg_inband(mode)) { + val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG); + val &= ~MVNETA_GMAC_FORCE_LINK_PASS; + val |= MVNETA_GMAC_FORCE_LINK_DOWN; + mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val); } +} - if (status_change) { - if (phydev->link) { - u32 val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG); - val |= (MVNETA_GMAC_FORCE_LINK_PASS | - MVNETA_GMAC_FORCE_LINK_DOWN); - mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val); - mvneta_port_up(pp); - netdev_info(pp->dev, "link up\n"); - } else { - mvneta_port_down(pp); - netdev_info(pp->dev, "link down\n"); - } +static void mvneta_mac_link_up(struct phylink_config *config, + struct phy_device *phy, + unsigned int mode, phy_interface_t interface, + int speed, int duplex, + bool tx_pause, bool rx_pause) +{ + struct net_device *ndev = to_net_dev(config->dev); + struct mvneta_port *pp = netdev_priv(ndev); + u32 val; + + if (!phylink_autoneg_inband(mode)) { + val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG); + val &= ~(MVNETA_GMAC_FORCE_LINK_DOWN | + MVNETA_GMAC_CONFIG_MII_SPEED | + MVNETA_GMAC_CONFIG_GMII_SPEED | + MVNETA_GMAC_CONFIG_FLOW_CTRL | + MVNETA_GMAC_CONFIG_FULL_DUPLEX); + val |= MVNETA_GMAC_FORCE_LINK_PASS; + + if (speed == SPEED_1000 || speed == SPEED_2500) + val |= MVNETA_GMAC_CONFIG_GMII_SPEED; + else if (speed == SPEED_100) + val |= MVNETA_GMAC_CONFIG_MII_SPEED; + + if (duplex == DUPLEX_FULL) + val |= MVNETA_GMAC_CONFIG_FULL_DUPLEX; + + if (tx_pause || rx_pause) + val |= MVNETA_GMAC_CONFIG_FLOW_CTRL; + + mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val); + } else { + /* When inband doesn't cover flow control or flow control is + * disabled, we need to manually configure it. This bit will + * only have effect if MVNETA_GMAC_AN_FLOW_CTRL_EN is unset. + */ + val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG); + val &= ~MVNETA_GMAC_CONFIG_FLOW_CTRL; + + if (tx_pause || rx_pause) + val |= MVNETA_GMAC_CONFIG_FLOW_CTRL; + + mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val); } + + mvneta_port_up(pp); } -static int mvneta_mdio_probe(struct mvneta_port *pp) +static void mvneta_mac_disable_tx_lpi(struct phylink_config *config) { - struct phy_device *phy_dev; + struct mvneta_port *pp = netdev_priv(to_net_dev(config->dev)); + u32 lpi1; + + lpi1 = mvreg_read(pp, MVNETA_LPI_CTRL_1); + lpi1 &= ~(MVNETA_LPI_CTRL_1_REQUEST_ENABLE | + MVNETA_LPI_CTRL_1_REQUEST_FORCE | + MVNETA_LPI_CTRL_1_MANUAL_MODE); + mvreg_write(pp, MVNETA_LPI_CTRL_1, lpi1); +} - phy_dev = of_phy_connect(pp->dev, pp->phy_node, mvneta_adjust_link, 0, - pp->phy_interface); - if (!phy_dev) { - netdev_err(pp->dev, "could not find the PHY\n"); - return -ENODEV; +static int mvneta_mac_enable_tx_lpi(struct phylink_config *config, u32 timer, + bool tx_clk_stop) +{ + struct mvneta_port *pp = netdev_priv(to_net_dev(config->dev)); + u32 ts, tw, lpi0, lpi1, status; + + status = mvreg_read(pp, MVNETA_GMAC_STATUS); + if (status & MVNETA_GMAC_SPEED_1000) { + /* At 1G speeds, the timer resolution are 1us, and + * 802.3 says tw is 16.5us. Round up to 17us. + */ + tw = 17; + ts = timer; + } else { + /* At 100M speeds, the timer resolutions are 10us, and + * 802.3 says tw is 30us. + */ + tw = 3; + ts = DIV_ROUND_UP(timer, 10); } - phy_dev->supported &= PHY_GBIT_FEATURES; - phy_dev->advertising = phy_dev->supported; + if (ts > 255) + ts = 255; + + /* Configure ts */ + lpi0 = mvreg_read(pp, MVNETA_LPI_CTRL_0); + lpi0 = u32_replace_bits(lpi0, ts, MVNETA_LPI_CTRL_0_TS); + mvreg_write(pp, MVNETA_LPI_CTRL_0, lpi0); - pp->phy_dev = phy_dev; - pp->link = 0; - pp->duplex = 0; - pp->speed = 0; + /* Configure tw and enable LPI generation */ + lpi1 = mvreg_read(pp, MVNETA_LPI_CTRL_1); + lpi1 = u32_replace_bits(lpi1, tw, MVNETA_LPI_CTRL_1_TW); + lpi1 |= MVNETA_LPI_CTRL_1_REQUEST_ENABLE; + mvreg_write(pp, MVNETA_LPI_CTRL_1, lpi1); return 0; } +static const struct phylink_mac_ops mvneta_phylink_ops = { + .mac_select_pcs = mvneta_mac_select_pcs, + .mac_prepare = mvneta_mac_prepare, + .mac_config = mvneta_mac_config, + .mac_finish = mvneta_mac_finish, + .mac_link_down = mvneta_mac_link_down, + .mac_link_up = mvneta_mac_link_up, + .mac_disable_tx_lpi = mvneta_mac_disable_tx_lpi, + .mac_enable_tx_lpi = mvneta_mac_enable_tx_lpi, +}; + +static int mvneta_mdio_probe(struct mvneta_port *pp) +{ + struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL }; + int err = phylink_of_phy_connect(pp->phylink, pp->dn, 0); + + if (err) + netdev_err(pp->dev, "could not attach PHY: %d\n", err); + + phylink_ethtool_get_wol(pp->phylink, &wol); + device_set_wakeup_capable(&pp->dev->dev, !!wol.supported); + + /* PHY WoL may be enabled but device wakeup disabled */ + if (wol.supported) + device_set_wakeup_enable(&pp->dev->dev, !!wol.wolopts); + + return err; +} + static void mvneta_mdio_remove(struct mvneta_port *pp) { - phy_disconnect(pp->phy_dev); - pp->phy_dev = NULL; + phylink_disconnect_phy(pp->phylink); +} + +/* Electing a CPU must be done in an atomic way: it should be done + * after or before the removal/insertion of a CPU and this function is + * not reentrant. + */ +static void mvneta_percpu_elect(struct mvneta_port *pp) +{ + int elected_cpu = 0, max_cpu, cpu; + + /* Use the cpu associated to the rxq when it is online, in all + * the other cases, use the cpu 0 which can't be offline. + */ + if (pp->rxq_def < nr_cpu_ids && cpu_online(pp->rxq_def)) + elected_cpu = pp->rxq_def; + + max_cpu = num_present_cpus(); + + for_each_online_cpu(cpu) { + int rxq_map = 0, txq_map = 0; + int rxq; + + for (rxq = 0; rxq < rxq_number; rxq++) + if ((rxq % max_cpu) == cpu) + rxq_map |= MVNETA_CPU_RXQ_ACCESS(rxq); + + if (cpu == elected_cpu) + /* Map the default receive queue to the elected CPU */ + rxq_map |= MVNETA_CPU_RXQ_ACCESS(pp->rxq_def); + + /* We update the TX queue map only if we have one + * queue. In this case we associate the TX queue to + * the CPU bound to the default RX queue + */ + if (txq_number == 1) + txq_map = (cpu == elected_cpu) ? + MVNETA_CPU_TXQ_ACCESS(0) : 0; + else + txq_map = mvreg_read(pp, MVNETA_CPU_MAP(cpu)) & + MVNETA_CPU_TXQ_ACCESS_ALL_MASK; + + mvreg_write(pp, MVNETA_CPU_MAP(cpu), rxq_map | txq_map); + + /* Update the interrupt mask on each CPU according the + * new mapping + */ + smp_call_function_single(cpu, mvneta_percpu_unmask_interrupt, + pp, true); + } +}; + +static int mvneta_cpu_online(unsigned int cpu, struct hlist_node *node) +{ + int other_cpu; + struct mvneta_port *pp = hlist_entry_safe(node, struct mvneta_port, + node_online); + struct mvneta_pcpu_port *port = per_cpu_ptr(pp->ports, cpu); + + /* Armada 3700's per-cpu interrupt for mvneta is broken, all interrupts + * are routed to CPU 0, so we don't need all the cpu-hotplug support + */ + if (pp->neta_armada3700) + return 0; + + netdev_lock(port->napi.dev); + spin_lock(&pp->lock); + /* + * Configuring the driver for a new CPU while the driver is + * stopping is racy, so just avoid it. + */ + if (pp->is_stopped) { + spin_unlock(&pp->lock); + netdev_unlock(port->napi.dev); + return 0; + } + netif_tx_stop_all_queues(pp->dev); + + /* + * We have to synchronise on tha napi of each CPU except the one + * just being woken up + */ + for_each_online_cpu(other_cpu) { + if (other_cpu != cpu) { + struct mvneta_pcpu_port *other_port = + per_cpu_ptr(pp->ports, other_cpu); + + napi_synchronize(&other_port->napi); + } + } + + /* Mask all ethernet port interrupts */ + on_each_cpu(mvneta_percpu_mask_interrupt, pp, true); + napi_enable_locked(&port->napi); + + /* + * Enable per-CPU interrupts on the CPU that is + * brought up. + */ + mvneta_percpu_enable(pp); + + /* + * Enable per-CPU interrupt on the one CPU we care + * about. + */ + mvneta_percpu_elect(pp); + + /* Unmask all ethernet port interrupts */ + on_each_cpu(mvneta_percpu_unmask_interrupt, pp, true); + mvreg_write(pp, MVNETA_INTR_MISC_MASK, + MVNETA_CAUSE_PHY_STATUS_CHANGE | + MVNETA_CAUSE_LINK_CHANGE); + netif_tx_start_all_queues(pp->dev); + spin_unlock(&pp->lock); + netdev_unlock(port->napi.dev); + + return 0; +} + +static int mvneta_cpu_down_prepare(unsigned int cpu, struct hlist_node *node) +{ + struct mvneta_port *pp = hlist_entry_safe(node, struct mvneta_port, + node_online); + struct mvneta_pcpu_port *port = per_cpu_ptr(pp->ports, cpu); + + /* + * Thanks to this lock we are sure that any pending cpu election is + * done. + */ + spin_lock(&pp->lock); + /* Mask all ethernet port interrupts */ + on_each_cpu(mvneta_percpu_mask_interrupt, pp, true); + spin_unlock(&pp->lock); + + napi_synchronize(&port->napi); + napi_disable(&port->napi); + /* Disable per-CPU interrupts on the CPU that is brought down. */ + mvneta_percpu_disable(pp); + return 0; +} + +static int mvneta_cpu_dead(unsigned int cpu, struct hlist_node *node) +{ + struct mvneta_port *pp = hlist_entry_safe(node, struct mvneta_port, + node_dead); + + /* Check if a new CPU must be elected now this on is down */ + spin_lock(&pp->lock); + mvneta_percpu_elect(pp); + spin_unlock(&pp->lock); + /* Unmask all ethernet port interrupts */ + on_each_cpu(mvneta_percpu_unmask_interrupt, pp, true); + mvreg_write(pp, MVNETA_INTR_MISC_MASK, + MVNETA_CAUSE_PHY_STATUS_CHANGE | + MVNETA_CAUSE_LINK_CHANGE); + netif_tx_start_all_queues(pp->dev); + return 0; } static int mvneta_open(struct net_device *dev) @@ -2378,8 +4532,6 @@ static int mvneta_open(struct net_device *dev) struct mvneta_port *pp = netdev_priv(dev); int ret; - mvneta_mac_addr_set(pp, dev->dev_addr, rxq_def); - pp->pkt_size = MVNETA_RX_PKT_SIZE(pp->dev->mtu); ret = mvneta_setup_rxqs(pp); @@ -2391,28 +4543,63 @@ static int mvneta_open(struct net_device *dev) goto err_cleanup_rxqs; /* Connect to port interrupt line */ - ret = request_irq(pp->dev->irq, mvneta_isr, 0, - MVNETA_DRIVER_NAME, pp); + if (pp->neta_armada3700) + ret = request_irq(pp->dev->irq, mvneta_isr, 0, + dev->name, pp); + else + ret = request_percpu_irq(pp->dev->irq, mvneta_percpu_isr, + dev->name, pp->ports); if (ret) { netdev_err(pp->dev, "cannot request irq %d\n", pp->dev->irq); goto err_cleanup_txqs; } - /* In default link is down */ - netif_carrier_off(pp->dev); + if (!pp->neta_armada3700) { + /* Enable per-CPU interrupt on all the CPU to handle our RX + * queue interrupts + */ + on_each_cpu(mvneta_percpu_enable, pp, true); + + pp->is_stopped = false; + /* Register a CPU notifier to handle the case where our CPU + * might be taken offline. + */ + ret = cpuhp_state_add_instance_nocalls(online_hpstate, + &pp->node_online); + if (ret) + goto err_free_irq; + + ret = cpuhp_state_add_instance_nocalls(CPUHP_NET_MVNETA_DEAD, + &pp->node_dead); + if (ret) + goto err_free_online_hp; + } ret = mvneta_mdio_probe(pp); if (ret < 0) { netdev_err(dev, "cannot probe MDIO bus\n"); - goto err_free_irq; + goto err_free_dead_hp; } mvneta_start_dev(pp); return 0; +err_free_dead_hp: + if (!pp->neta_armada3700) + cpuhp_state_remove_instance_nocalls(CPUHP_NET_MVNETA_DEAD, + &pp->node_dead); +err_free_online_hp: + if (!pp->neta_armada3700) + cpuhp_state_remove_instance_nocalls(online_hpstate, + &pp->node_online); err_free_irq: - free_irq(pp->dev->irq, pp); + if (pp->neta_armada3700) { + free_irq(pp->dev->irq, pp); + } else { + on_each_cpu(mvneta_percpu_disable, pp, true); + free_percpu_irq(pp->dev->irq, pp->ports); + } err_cleanup_txqs: mvneta_cleanup_txqs(pp); err_cleanup_rxqs: @@ -2425,44 +4612,122 @@ static int mvneta_stop(struct net_device *dev) { struct mvneta_port *pp = netdev_priv(dev); - mvneta_stop_dev(pp); - mvneta_mdio_remove(pp); - free_irq(dev->irq, pp); + if (!pp->neta_armada3700) { + /* Inform that we are stopping so we don't want to setup the + * driver for new CPUs in the notifiers. The code of the + * notifier for CPU online is protected by the same spinlock, + * so when we get the lock, the notifier work is done. + */ + spin_lock(&pp->lock); + pp->is_stopped = true; + spin_unlock(&pp->lock); + + mvneta_stop_dev(pp); + mvneta_mdio_remove(pp); + + cpuhp_state_remove_instance_nocalls(online_hpstate, + &pp->node_online); + cpuhp_state_remove_instance_nocalls(CPUHP_NET_MVNETA_DEAD, + &pp->node_dead); + on_each_cpu(mvneta_percpu_disable, pp, true); + free_percpu_irq(dev->irq, pp->ports); + } else { + mvneta_stop_dev(pp); + mvneta_mdio_remove(pp); + free_irq(dev->irq, pp); + } + mvneta_cleanup_rxqs(pp); mvneta_cleanup_txqs(pp); - del_timer(&pp->tx_done_timer); - clear_bit(MVNETA_F_TX_DONE_TIMER_BIT, &pp->flags); return 0; } -/* Ethtool methods */ +static int mvneta_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + struct mvneta_port *pp = netdev_priv(dev); + + return phylink_mii_ioctl(pp->phylink, ifr, cmd); +} -/* Get settings (phy address, speed) for ethtools */ -int mvneta_ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +static int mvneta_xdp_setup(struct net_device *dev, struct bpf_prog *prog, + struct netlink_ext_ack *extack) { + bool need_update, running = netif_running(dev); struct mvneta_port *pp = netdev_priv(dev); + struct bpf_prog *old_prog; - if (!pp->phy_dev) - return -ENODEV; + if (prog && !prog->aux->xdp_has_frags && + dev->mtu > MVNETA_MAX_RX_BUF_SIZE) { + NL_SET_ERR_MSG_MOD(extack, "prog does not support XDP frags"); + return -EOPNOTSUPP; + } + + if (pp->bm_priv) { + NL_SET_ERR_MSG_MOD(extack, + "Hardware Buffer Management not supported on XDP"); + return -EOPNOTSUPP; + } + + need_update = !!pp->xdp_prog != !!prog; + if (running && need_update) + mvneta_stop(dev); - return phy_ethtool_gset(pp->phy_dev, cmd); + old_prog = xchg(&pp->xdp_prog, prog); + if (old_prog) + bpf_prog_put(old_prog); + + if (running && need_update) + return mvneta_open(dev); + + return 0; } -/* Set settings (phy address, speed) for ethtools */ -int mvneta_ethtool_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) +static int mvneta_xdp(struct net_device *dev, struct netdev_bpf *xdp) { - struct mvneta_port *pp = netdev_priv(dev); + switch (xdp->command) { + case XDP_SETUP_PROG: + return mvneta_xdp_setup(dev, xdp->prog, xdp->extack); + default: + return -EINVAL; + } +} + +/* Ethtool methods */ + +/* Set link ksettings (phy address, speed) for ethtools */ +static int +mvneta_ethtool_set_link_ksettings(struct net_device *ndev, + const struct ethtool_link_ksettings *cmd) +{ + struct mvneta_port *pp = netdev_priv(ndev); + + return phylink_ethtool_ksettings_set(pp->phylink, cmd); +} - if (!pp->phy_dev) - return -ENODEV; +/* Get link ksettings for ethtools */ +static int +mvneta_ethtool_get_link_ksettings(struct net_device *ndev, + struct ethtool_link_ksettings *cmd) +{ + struct mvneta_port *pp = netdev_priv(ndev); - return phy_ethtool_sset(pp->phy_dev, cmd); + return phylink_ethtool_ksettings_get(pp->phylink, cmd); +} + +static int mvneta_ethtool_nway_reset(struct net_device *dev) +{ + struct mvneta_port *pp = netdev_priv(dev); + + return phylink_ethtool_nway_reset(pp->phylink); } /* Set interrupt coalescing for ethtools */ -static int mvneta_ethtool_set_coalesce(struct net_device *dev, - struct ethtool_coalesce *c) +static int +mvneta_ethtool_set_coalesce(struct net_device *dev, + struct ethtool_coalesce *c, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack) { struct mvneta_port *pp = netdev_priv(dev); int queue; @@ -2485,8 +4750,11 @@ static int mvneta_ethtool_set_coalesce(struct net_device *dev, } /* get coalescing for ethtools */ -static int mvneta_ethtool_get_coalesce(struct net_device *dev, - struct ethtool_coalesce *c) +static int +mvneta_ethtool_get_coalesce(struct net_device *dev, + struct ethtool_coalesce *c, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack) { struct mvneta_port *pp = netdev_priv(dev); @@ -2501,17 +4769,20 @@ static int mvneta_ethtool_get_coalesce(struct net_device *dev, static void mvneta_ethtool_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo) { - strlcpy(drvinfo->driver, MVNETA_DRIVER_NAME, + strscpy(drvinfo->driver, MVNETA_DRIVER_NAME, sizeof(drvinfo->driver)); - strlcpy(drvinfo->version, MVNETA_DRIVER_VERSION, + strscpy(drvinfo->version, MVNETA_DRIVER_VERSION, sizeof(drvinfo->version)); - strlcpy(drvinfo->bus_info, dev_name(&dev->dev), + strscpy(drvinfo->bus_info, dev_name(&dev->dev), sizeof(drvinfo->bus_info)); } -static void mvneta_ethtool_get_ringparam(struct net_device *netdev, - struct ethtool_ringparam *ring) +static void +mvneta_ethtool_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kernel_ring, + struct netlink_ext_ack *extack) { struct mvneta_port *pp = netdev_priv(netdev); @@ -2521,8 +4792,11 @@ static void mvneta_ethtool_get_ringparam(struct net_device *netdev, ring->tx_pending = pp->tx_ring_size; } -static int mvneta_ethtool_set_ringparam(struct net_device *dev, - struct ethtool_ringparam *ring) +static int +mvneta_ethtool_set_ringparam(struct net_device *dev, + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kernel_ring, + struct netlink_ext_ack *extack) { struct mvneta_port *pp = netdev_priv(dev); @@ -2530,8 +4804,12 @@ static int mvneta_ethtool_set_ringparam(struct net_device *dev, return -EINVAL; pp->rx_ring_size = ring->rx_pending < MVNETA_MAX_RXD ? ring->rx_pending : MVNETA_MAX_RXD; - pp->tx_ring_size = ring->tx_pending < MVNETA_MAX_TXD ? - ring->tx_pending : MVNETA_MAX_TXD; + + pp->tx_ring_size = clamp_t(u16, ring->tx_pending, + MVNETA_MAX_SKB_DESCS * 2, MVNETA_MAX_TXD); + if (pp->tx_ring_size != ring->tx_pending) + netdev_warn(dev, "TX queue size set to %u (requested %u)\n", + pp->tx_ring_size, ring->tx_pending); if (netif_running(dev)) { mvneta_stop(dev); @@ -2545,6 +4823,500 @@ static int mvneta_ethtool_set_ringparam(struct net_device *dev, return 0; } +static void mvneta_ethtool_get_pauseparam(struct net_device *dev, + struct ethtool_pauseparam *pause) +{ + struct mvneta_port *pp = netdev_priv(dev); + + phylink_ethtool_get_pauseparam(pp->phylink, pause); +} + +static int mvneta_ethtool_set_pauseparam(struct net_device *dev, + struct ethtool_pauseparam *pause) +{ + struct mvneta_port *pp = netdev_priv(dev); + + return phylink_ethtool_set_pauseparam(pp->phylink, pause); +} + +static void mvneta_ethtool_get_strings(struct net_device *netdev, u32 sset, + u8 *data) +{ + if (sset == ETH_SS_STATS) { + struct mvneta_port *pp = netdev_priv(netdev); + int i; + + for (i = 0; i < ARRAY_SIZE(mvneta_statistics); i++) + ethtool_puts(&data, mvneta_statistics[i].name); + + if (!pp->bm_priv) { + page_pool_ethtool_stats_get_strings(data); + } + } +} + +static void +mvneta_ethtool_update_pcpu_stats(struct mvneta_port *pp, + struct mvneta_ethtool_stats *es) +{ + unsigned int start; + int cpu; + + for_each_possible_cpu(cpu) { + struct mvneta_pcpu_stats *stats; + u64 skb_alloc_error; + u64 refill_error; + u64 xdp_redirect; + u64 xdp_xmit_err; + u64 xdp_tx_err; + u64 xdp_pass; + u64 xdp_drop; + u64 xdp_xmit; + u64 xdp_tx; + + stats = per_cpu_ptr(pp->stats, cpu); + do { + start = u64_stats_fetch_begin(&stats->syncp); + skb_alloc_error = stats->es.skb_alloc_error; + refill_error = stats->es.refill_error; + xdp_redirect = stats->es.ps.xdp_redirect; + xdp_pass = stats->es.ps.xdp_pass; + xdp_drop = stats->es.ps.xdp_drop; + xdp_xmit = stats->es.ps.xdp_xmit; + xdp_xmit_err = stats->es.ps.xdp_xmit_err; + xdp_tx = stats->es.ps.xdp_tx; + xdp_tx_err = stats->es.ps.xdp_tx_err; + } while (u64_stats_fetch_retry(&stats->syncp, start)); + + es->skb_alloc_error += skb_alloc_error; + es->refill_error += refill_error; + es->ps.xdp_redirect += xdp_redirect; + es->ps.xdp_pass += xdp_pass; + es->ps.xdp_drop += xdp_drop; + es->ps.xdp_xmit += xdp_xmit; + es->ps.xdp_xmit_err += xdp_xmit_err; + es->ps.xdp_tx += xdp_tx; + es->ps.xdp_tx_err += xdp_tx_err; + } +} + +static void mvneta_ethtool_update_stats(struct mvneta_port *pp) +{ + struct mvneta_ethtool_stats stats = {}; + const struct mvneta_statistic *s; + void __iomem *base = pp->base; + u32 high, low; + u64 val; + int i; + + mvneta_ethtool_update_pcpu_stats(pp, &stats); + for (i = 0, s = mvneta_statistics; + s < mvneta_statistics + ARRAY_SIZE(mvneta_statistics); + s++, i++) { + switch (s->type) { + case T_REG_32: + val = readl_relaxed(base + s->offset); + pp->ethtool_stats[i] += val; + break; + case T_REG_64: + /* Docs say to read low 32-bit then high */ + low = readl_relaxed(base + s->offset); + high = readl_relaxed(base + s->offset + 4); + val = (u64)high << 32 | low; + pp->ethtool_stats[i] += val; + break; + case T_SW: + switch (s->offset) { + case ETHTOOL_STAT_EEE_WAKEUP: + val = phylink_get_eee_err(pp->phylink); + pp->ethtool_stats[i] += val; + break; + case ETHTOOL_STAT_SKB_ALLOC_ERR: + pp->ethtool_stats[i] = stats.skb_alloc_error; + break; + case ETHTOOL_STAT_REFILL_ERR: + pp->ethtool_stats[i] = stats.refill_error; + break; + case ETHTOOL_XDP_REDIRECT: + pp->ethtool_stats[i] = stats.ps.xdp_redirect; + break; + case ETHTOOL_XDP_PASS: + pp->ethtool_stats[i] = stats.ps.xdp_pass; + break; + case ETHTOOL_XDP_DROP: + pp->ethtool_stats[i] = stats.ps.xdp_drop; + break; + case ETHTOOL_XDP_TX: + pp->ethtool_stats[i] = stats.ps.xdp_tx; + break; + case ETHTOOL_XDP_TX_ERR: + pp->ethtool_stats[i] = stats.ps.xdp_tx_err; + break; + case ETHTOOL_XDP_XMIT: + pp->ethtool_stats[i] = stats.ps.xdp_xmit; + break; + case ETHTOOL_XDP_XMIT_ERR: + pp->ethtool_stats[i] = stats.ps.xdp_xmit_err; + break; + } + break; + } + } +} + +static void mvneta_ethtool_pp_stats(struct mvneta_port *pp, u64 *data) +{ + struct page_pool_stats stats = {}; + int i; + + for (i = 0; i < rxq_number; i++) { + if (pp->rxqs[i].page_pool) + page_pool_get_stats(pp->rxqs[i].page_pool, &stats); + } + + page_pool_ethtool_stats_get(data, &stats); +} + +static void mvneta_ethtool_get_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + struct mvneta_port *pp = netdev_priv(dev); + int i; + + mvneta_ethtool_update_stats(pp); + + for (i = 0; i < ARRAY_SIZE(mvneta_statistics); i++) + *data++ = pp->ethtool_stats[i]; + + if (!pp->bm_priv) + mvneta_ethtool_pp_stats(pp, data); +} + +static int mvneta_ethtool_get_sset_count(struct net_device *dev, int sset) +{ + if (sset == ETH_SS_STATS) { + int count = ARRAY_SIZE(mvneta_statistics); + struct mvneta_port *pp = netdev_priv(dev); + + if (!pp->bm_priv) + count += page_pool_ethtool_stats_get_count(); + + return count; + } + + return -EOPNOTSUPP; +} + +static u32 mvneta_ethtool_get_rxfh_indir_size(struct net_device *dev) +{ + return MVNETA_RSS_LU_TABLE_SIZE; +} + +static u32 mvneta_ethtool_get_rx_ring_count(struct net_device *dev) +{ + return rxq_number; +} + +static int mvneta_config_rss(struct mvneta_port *pp) +{ + int cpu; + u32 val; + + netif_tx_stop_all_queues(pp->dev); + + on_each_cpu(mvneta_percpu_mask_interrupt, pp, true); + + if (!pp->neta_armada3700) { + /* We have to synchronise on the napi of each CPU */ + for_each_online_cpu(cpu) { + struct mvneta_pcpu_port *pcpu_port = + per_cpu_ptr(pp->ports, cpu); + + napi_synchronize(&pcpu_port->napi); + napi_disable(&pcpu_port->napi); + } + } else { + napi_synchronize(&pp->napi); + napi_disable(&pp->napi); + } + + pp->rxq_def = pp->indir[0]; + + /* Update unicast mapping */ + mvneta_set_rx_mode(pp->dev); + + /* Update val of portCfg register accordingly with all RxQueue types */ + val = MVNETA_PORT_CONFIG_DEFL_VALUE(pp->rxq_def); + mvreg_write(pp, MVNETA_PORT_CONFIG, val); + + /* Update the elected CPU matching the new rxq_def */ + spin_lock(&pp->lock); + mvneta_percpu_elect(pp); + spin_unlock(&pp->lock); + + if (!pp->neta_armada3700) { + /* We have to synchronise on the napi of each CPU */ + for_each_online_cpu(cpu) { + struct mvneta_pcpu_port *pcpu_port = + per_cpu_ptr(pp->ports, cpu); + + napi_enable(&pcpu_port->napi); + } + } else { + napi_enable(&pp->napi); + } + + netif_tx_start_all_queues(pp->dev); + + return 0; +} + +static int mvneta_ethtool_set_rxfh(struct net_device *dev, + struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack) +{ + struct mvneta_port *pp = netdev_priv(dev); + + /* Current code for Armada 3700 doesn't support RSS features yet */ + if (pp->neta_armada3700) + return -EOPNOTSUPP; + + /* We require at least one supported parameter to be changed + * and no change in any of the unsupported parameters + */ + if (rxfh->key || + (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE && + rxfh->hfunc != ETH_RSS_HASH_TOP)) + return -EOPNOTSUPP; + + if (!rxfh->indir) + return 0; + + memcpy(pp->indir, rxfh->indir, MVNETA_RSS_LU_TABLE_SIZE); + + return mvneta_config_rss(pp); +} + +static int mvneta_ethtool_get_rxfh(struct net_device *dev, + struct ethtool_rxfh_param *rxfh) +{ + struct mvneta_port *pp = netdev_priv(dev); + + /* Current code for Armada 3700 doesn't support RSS features yet */ + if (pp->neta_armada3700) + return -EOPNOTSUPP; + + rxfh->hfunc = ETH_RSS_HASH_TOP; + + if (!rxfh->indir) + return 0; + + memcpy(rxfh->indir, pp->indir, MVNETA_RSS_LU_TABLE_SIZE); + + return 0; +} + +static void mvneta_ethtool_get_wol(struct net_device *dev, + struct ethtool_wolinfo *wol) +{ + struct mvneta_port *pp = netdev_priv(dev); + + phylink_ethtool_get_wol(pp->phylink, wol); +} + +static int mvneta_ethtool_set_wol(struct net_device *dev, + struct ethtool_wolinfo *wol) +{ + struct mvneta_port *pp = netdev_priv(dev); + int ret; + + ret = phylink_ethtool_set_wol(pp->phylink, wol); + if (!ret) + device_set_wakeup_enable(&dev->dev, !!wol->wolopts); + + return ret; +} + +static int mvneta_ethtool_get_eee(struct net_device *dev, + struct ethtool_keee *eee) +{ + struct mvneta_port *pp = netdev_priv(dev); + + return phylink_ethtool_get_eee(pp->phylink, eee); +} + +static int mvneta_ethtool_set_eee(struct net_device *dev, + struct ethtool_keee *eee) +{ + struct mvneta_port *pp = netdev_priv(dev); + + /* The Armada 37x documents do not give limits for this other than + * it being an 8-bit register. + */ + if (eee->tx_lpi_enabled && eee->tx_lpi_timer > 255) + return -EINVAL; + + return phylink_ethtool_set_eee(pp->phylink, eee); +} + +static void mvneta_clear_rx_prio_map(struct mvneta_port *pp) +{ + mvreg_write(pp, MVNETA_VLAN_PRIO_TO_RXQ, 0); +} + +static void mvneta_map_vlan_prio_to_rxq(struct mvneta_port *pp, u8 pri, u8 rxq) +{ + u32 val = mvreg_read(pp, MVNETA_VLAN_PRIO_TO_RXQ); + + val &= ~MVNETA_VLAN_PRIO_RXQ_MAP(pri, 0x7); + val |= MVNETA_VLAN_PRIO_RXQ_MAP(pri, rxq); + + mvreg_write(pp, MVNETA_VLAN_PRIO_TO_RXQ, val); +} + +static int mvneta_enable_per_queue_rate_limit(struct mvneta_port *pp) +{ + unsigned long core_clk_rate; + u32 refill_cycles; + u32 val; + + core_clk_rate = clk_get_rate(pp->clk); + if (!core_clk_rate) + return -EINVAL; + + refill_cycles = MVNETA_TXQ_BUCKET_REFILL_BASE_PERIOD_NS / + (NSEC_PER_SEC / core_clk_rate); + + if (refill_cycles > MVNETA_REFILL_MAX_NUM_CLK) + return -EINVAL; + + /* Enable bw limit algorithm version 3 */ + val = mvreg_read(pp, MVNETA_TXQ_CMD1_REG); + val &= ~(MVNETA_TXQ_CMD1_BW_LIM_SEL_V1 | MVNETA_TXQ_CMD1_BW_LIM_EN); + mvreg_write(pp, MVNETA_TXQ_CMD1_REG, val); + + /* Set the base refill rate */ + mvreg_write(pp, MVNETA_REFILL_NUM_CLK_REG, refill_cycles); + + return 0; +} + +static void mvneta_disable_per_queue_rate_limit(struct mvneta_port *pp) +{ + u32 val = mvreg_read(pp, MVNETA_TXQ_CMD1_REG); + + val |= (MVNETA_TXQ_CMD1_BW_LIM_SEL_V1 | MVNETA_TXQ_CMD1_BW_LIM_EN); + mvreg_write(pp, MVNETA_TXQ_CMD1_REG, val); +} + +static int mvneta_setup_queue_rates(struct mvneta_port *pp, int queue, + u64 min_rate, u64 max_rate) +{ + u32 refill_val, rem; + u32 val = 0; + + /* Convert to from Bps to bps */ + max_rate *= 8; + + if (min_rate) + return -EINVAL; + + refill_val = div_u64_rem(max_rate, MVNETA_TXQ_RATE_LIMIT_RESOLUTION, + &rem); + + if (rem || !refill_val || + refill_val > MVNETA_TXQ_BUCKET_REFILL_VALUE_MAX) + return -EINVAL; + + val = refill_val; + val |= (MVNETA_TXQ_BUCKET_REFILL_PERIOD << + MVNETA_TXQ_BUCKET_REFILL_PERIOD_SHIFT); + + mvreg_write(pp, MVNETA_TXQ_BUCKET_REFILL_REG(queue), val); + + return 0; +} + +static int mvneta_setup_mqprio(struct net_device *dev, + struct tc_mqprio_qopt_offload *mqprio) +{ + struct mvneta_port *pp = netdev_priv(dev); + int rxq, txq, tc, ret; + u8 num_tc; + + if (mqprio->qopt.hw != TC_MQPRIO_HW_OFFLOAD_TCS) + return 0; + + num_tc = mqprio->qopt.num_tc; + + if (num_tc > rxq_number) + return -EINVAL; + + mvneta_clear_rx_prio_map(pp); + + if (!num_tc) { + mvneta_disable_per_queue_rate_limit(pp); + netdev_reset_tc(dev); + return 0; + } + + netdev_set_num_tc(dev, mqprio->qopt.num_tc); + + for (tc = 0; tc < mqprio->qopt.num_tc; tc++) { + netdev_set_tc_queue(dev, tc, mqprio->qopt.count[tc], + mqprio->qopt.offset[tc]); + + for (rxq = mqprio->qopt.offset[tc]; + rxq < mqprio->qopt.count[tc] + mqprio->qopt.offset[tc]; + rxq++) { + if (rxq >= rxq_number) + return -EINVAL; + + mvneta_map_vlan_prio_to_rxq(pp, tc, rxq); + } + } + + if (mqprio->shaper != TC_MQPRIO_SHAPER_BW_RATE) { + mvneta_disable_per_queue_rate_limit(pp); + return 0; + } + + if (mqprio->qopt.num_tc > txq_number) + return -EINVAL; + + ret = mvneta_enable_per_queue_rate_limit(pp); + if (ret) + return ret; + + for (tc = 0; tc < mqprio->qopt.num_tc; tc++) { + for (txq = mqprio->qopt.offset[tc]; + txq < mqprio->qopt.count[tc] + mqprio->qopt.offset[tc]; + txq++) { + if (txq >= txq_number) + return -EINVAL; + + ret = mvneta_setup_queue_rates(pp, txq, + mqprio->min_rate[tc], + mqprio->max_rate[tc]); + if (ret) + return ret; + } + } + + return 0; +} + +static int mvneta_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) +{ + switch (type) { + case TC_SETUP_QDISC_MQPRIO: + return mvneta_setup_mqprio(dev, type_data); + default: + return -EOPNOTSUPP; + } +} + static const struct net_device_ops mvneta_netdev_ops = { .ndo_open = mvneta_open, .ndo_stop = mvneta_stop, @@ -2552,23 +5324,44 @@ static const struct net_device_ops mvneta_netdev_ops = { .ndo_set_rx_mode = mvneta_set_rx_mode, .ndo_set_mac_address = mvneta_set_mac_addr, .ndo_change_mtu = mvneta_change_mtu, - .ndo_tx_timeout = mvneta_tx_timeout, + .ndo_fix_features = mvneta_fix_features, .ndo_get_stats64 = mvneta_get_stats64, + .ndo_eth_ioctl = mvneta_ioctl, + .ndo_bpf = mvneta_xdp, + .ndo_xdp_xmit = mvneta_xdp_xmit, + .ndo_setup_tc = mvneta_setup_tc, }; -const struct ethtool_ops mvneta_eth_tool_ops = { +static const struct ethtool_ops mvneta_eth_tool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS | + ETHTOOL_COALESCE_MAX_FRAMES, + .nway_reset = mvneta_ethtool_nway_reset, .get_link = ethtool_op_get_link, - .get_settings = mvneta_ethtool_get_settings, - .set_settings = mvneta_ethtool_set_settings, .set_coalesce = mvneta_ethtool_set_coalesce, .get_coalesce = mvneta_ethtool_get_coalesce, .get_drvinfo = mvneta_ethtool_get_drvinfo, .get_ringparam = mvneta_ethtool_get_ringparam, .set_ringparam = mvneta_ethtool_set_ringparam, + .get_pauseparam = mvneta_ethtool_get_pauseparam, + .set_pauseparam = mvneta_ethtool_set_pauseparam, + .get_strings = mvneta_ethtool_get_strings, + .get_ethtool_stats = mvneta_ethtool_get_stats, + .get_sset_count = mvneta_ethtool_get_sset_count, + .get_rxfh_indir_size = mvneta_ethtool_get_rxfh_indir_size, + .get_rx_ring_count = mvneta_ethtool_get_rx_ring_count, + .get_rxfh = mvneta_ethtool_get_rxfh, + .set_rxfh = mvneta_ethtool_set_rxfh, + .get_link_ksettings = mvneta_ethtool_get_link_ksettings, + .set_link_ksettings = mvneta_ethtool_set_link_ksettings, + .get_wol = mvneta_ethtool_get_wol, + .set_wol = mvneta_ethtool_set_wol, + .get_ts_info = ethtool_op_get_ts_info, + .get_eee = mvneta_ethtool_get_eee, + .set_eee = mvneta_ethtool_set_eee, }; /* Initialize hw */ -static int mvneta_init(struct mvneta_port *pp, int phy_addr) +static int mvneta_init(struct device *dev, struct mvneta_port *pp) { int queue; @@ -2578,8 +5371,7 @@ static int mvneta_init(struct mvneta_port *pp, int phy_addr) /* Set port default values */ mvneta_defaults_set(pp); - pp->txqs = kzalloc(txq_number * sizeof(struct mvneta_tx_queue), - GFP_KERNEL); + pp->txqs = devm_kcalloc(dev, txq_number, sizeof(*pp->txqs), GFP_KERNEL); if (!pp->txqs) return -ENOMEM; @@ -2591,12 +5383,9 @@ static int mvneta_init(struct mvneta_port *pp, int phy_addr) txq->done_pkts_coal = MVNETA_TXDONE_COAL_PKTS; } - pp->rxqs = kzalloc(rxq_number * sizeof(struct mvneta_rx_queue), - GFP_KERNEL); - if (!pp->rxqs) { - kfree(pp->txqs); + pp->rxqs = devm_kcalloc(dev, rxq_number, sizeof(*pp->rxqs), GFP_KERNEL); + if (!pp->rxqs) return -ENOMEM; - } /* Create Rx descriptor rings */ for (queue = 0; queue < rxq_number; queue++) { @@ -2605,17 +5394,18 @@ static int mvneta_init(struct mvneta_port *pp, int phy_addr) rxq->size = pp->rx_ring_size; rxq->pkts_coal = MVNETA_RX_COAL_PKTS; rxq->time_coal = MVNETA_RX_COAL_USEC; + rxq->buf_virt_addr + = devm_kmalloc_array(pp->dev->dev.parent, + rxq->size, + sizeof(*rxq->buf_virt_addr), + GFP_KERNEL); + if (!rxq->buf_virt_addr) + return -ENOMEM; } return 0; } -static void mvneta_deinit(struct mvneta_port *pp) -{ - kfree(pp->txqs); - kfree(pp->rxqs); -} - /* platform glue : initialize decoding windows */ static void mvneta_conf_mbus_windows(struct mvneta_port *pp, const struct mbus_dram_target_info *dram) @@ -2635,137 +5425,237 @@ static void mvneta_conf_mbus_windows(struct mvneta_port *pp, win_enable = 0x3f; win_protect = 0; - for (i = 0; i < dram->num_cs; i++) { - const struct mbus_dram_window *cs = dram->cs + i; - mvreg_write(pp, MVNETA_WIN_BASE(i), (cs->base & 0xffff0000) | - (cs->mbus_attr << 8) | dram->mbus_dram_target_id); + if (dram) { + for (i = 0; i < dram->num_cs; i++) { + const struct mbus_dram_window *cs = dram->cs + i; - mvreg_write(pp, MVNETA_WIN_SIZE(i), - (cs->size - 1) & 0xffff0000); + mvreg_write(pp, MVNETA_WIN_BASE(i), + (cs->base & 0xffff0000) | + (cs->mbus_attr << 8) | + dram->mbus_dram_target_id); - win_enable &= ~(1 << i); - win_protect |= 3 << (2 * i); + mvreg_write(pp, MVNETA_WIN_SIZE(i), + (cs->size - 1) & 0xffff0000); + + win_enable &= ~(1 << i); + win_protect |= 3 << (2 * i); + } + } else { + if (pp->neta_ac5) + mvreg_write(pp, MVNETA_WIN_BASE(0), + (MVNETA_AC5_CNM_DDR_ATTR << 8) | + MVNETA_AC5_CNM_DDR_TARGET); + /* For Armada3700 open default 4GB Mbus window, leaving + * arbitration of target/attribute to a different layer + * of configuration. + */ + mvreg_write(pp, MVNETA_WIN_SIZE(0), 0xffff0000); + win_enable &= ~BIT(0); + win_protect = 3; } mvreg_write(pp, MVNETA_BASE_ADDR_ENABLE, win_enable); + mvreg_write(pp, MVNETA_ACCESS_PROTECT_ENABLE, win_protect); } /* Power up the port */ -static void mvneta_port_power_up(struct mvneta_port *pp, int phy_mode) +static int mvneta_port_power_up(struct mvneta_port *pp, int phy_mode) { - u32 val; - /* MAC Cause register should be cleared */ mvreg_write(pp, MVNETA_UNIT_INTR_CAUSE, 0); - if (phy_mode == PHY_INTERFACE_MODE_SGMII) - mvneta_port_sgmii_config(pp); - - mvneta_gmac_rgmii_set(pp, 1); + if (phy_mode != PHY_INTERFACE_MODE_QSGMII && + phy_mode != PHY_INTERFACE_MODE_SGMII && + !phy_interface_mode_is_8023z(phy_mode) && + !phy_interface_mode_is_rgmii(phy_mode)) + return -EINVAL; - /* Cancel Port Reset */ - val = mvreg_read(pp, MVNETA_GMAC_CTRL_2); - val &= ~MVNETA_GMAC2_PORT_RESET; - mvreg_write(pp, MVNETA_GMAC_CTRL_2, val); + /* Ensure LPI is disabled */ + mvneta_mac_disable_tx_lpi(&pp->phylink_config); - while ((mvreg_read(pp, MVNETA_GMAC_CTRL_2) & - MVNETA_GMAC2_PORT_RESET) != 0) - continue; + return 0; } /* Device initialization routine */ static int mvneta_probe(struct platform_device *pdev) { - const struct mbus_dram_target_info *dram_target_info; struct device_node *dn = pdev->dev.of_node; - struct device_node *phy_node; - u32 phy_addr; + struct device_node *bm_node; struct mvneta_port *pp; struct net_device *dev; - const char *dt_mac_addr; + struct phylink *phylink; + struct phy *comphy; char hw_mac_addr[ETH_ALEN]; + phy_interface_t phy_mode; const char *mac_from; - int phy_mode; + int tx_csum_limit; int err; + int cpu; - /* Our multiqueue support is not complete, so for now, only - * allow the usage of the first RX queue - */ - if (rxq_def != 0) { - dev_err(&pdev->dev, "Invalid rxq_def argument: %d\n", rxq_def); - return -EINVAL; - } - - dev = alloc_etherdev_mqs(sizeof(struct mvneta_port), txq_number, rxq_number); + dev = devm_alloc_etherdev_mqs(&pdev->dev, sizeof(struct mvneta_port), + txq_number, rxq_number); if (!dev) return -ENOMEM; - dev->irq = irq_of_parse_and_map(dn, 0); - if (dev->irq == 0) { - err = -EINVAL; - goto err_free_netdev; - } + dev->tx_queue_len = MVNETA_MAX_TXD; + dev->watchdog_timeo = 5 * HZ; + dev->netdev_ops = &mvneta_netdev_ops; + dev->ethtool_ops = &mvneta_eth_tool_ops; - phy_node = of_parse_phandle(dn, "phy", 0); - if (!phy_node) { - dev_err(&pdev->dev, "no associated PHY\n"); - err = -ENODEV; - goto err_free_irq; - } + pp = netdev_priv(dev); + spin_lock_init(&pp->lock); + pp->dn = dn; - phy_mode = of_get_phy_mode(dn); - if (phy_mode < 0) { + pp->rxq_def = rxq_def; + pp->indir[0] = rxq_def; + + err = of_get_phy_mode(dn, &phy_mode); + if (err) { dev_err(&pdev->dev, "incorrect phy-mode\n"); - err = -EINVAL; - goto err_free_irq; + return err; } - dev->tx_queue_len = MVNETA_MAX_TXD; - dev->watchdog_timeo = 5 * HZ; - dev->netdev_ops = &mvneta_netdev_ops; + pp->phy_interface = phy_mode; - SET_ETHTOOL_OPS(dev, &mvneta_eth_tool_ops); + comphy = devm_of_phy_get(&pdev->dev, dn, NULL); + if (comphy == ERR_PTR(-EPROBE_DEFER)) + return -EPROBE_DEFER; - pp = netdev_priv(dev); + if (IS_ERR(comphy)) + comphy = NULL; - pp->tx_done_timer.function = mvneta_tx_done_timer_callback; - init_timer(&pp->tx_done_timer); - clear_bit(MVNETA_F_TX_DONE_TIMER_BIT, &pp->flags); + pp->comphy = comphy; - pp->weight = MVNETA_RX_POLL_WEIGHT; - pp->phy_node = phy_node; - pp->phy_interface = phy_mode; + pp->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(pp->base)) + return PTR_ERR(pp->base); - pp->base = of_iomap(dn, 0); - if (pp->base == NULL) { - err = -ENOMEM; - goto err_free_irq; + /* Get special SoC configurations */ + if (of_device_is_compatible(dn, "marvell,armada-3700-neta")) + pp->neta_armada3700 = true; + if (of_device_is_compatible(dn, "marvell,armada-ac5-neta")) { + pp->neta_armada3700 = true; + pp->neta_ac5 = true; } - pp->clk = devm_clk_get(&pdev->dev, NULL); + dev->irq = irq_of_parse_and_map(dn, 0); + if (dev->irq == 0) + return -EINVAL; + + pp->clk = devm_clk_get(&pdev->dev, "core"); + if (IS_ERR(pp->clk)) + pp->clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(pp->clk)) { err = PTR_ERR(pp->clk); - goto err_unmap; + goto err_free_irq; } clk_prepare_enable(pp->clk); - dt_mac_addr = of_get_mac_address(dn); - if (dt_mac_addr && is_valid_ether_addr(dt_mac_addr)) { + pp->clk_bus = devm_clk_get(&pdev->dev, "bus"); + if (!IS_ERR(pp->clk_bus)) + clk_prepare_enable(pp->clk_bus); + + pp->phylink_pcs.ops = &mvneta_phylink_pcs_ops; + + pp->phylink_config.dev = &dev->dev; + pp->phylink_config.type = PHYLINK_NETDEV; + pp->phylink_config.mac_capabilities = MAC_SYM_PAUSE | MAC_10 | + MAC_100 | MAC_1000FD | MAC_2500FD; + + /* Setup EEE. Choose 250us idle. Only supported in SGMII modes. */ + __set_bit(PHY_INTERFACE_MODE_QSGMII, pp->phylink_config.lpi_interfaces); + __set_bit(PHY_INTERFACE_MODE_SGMII, pp->phylink_config.lpi_interfaces); + pp->phylink_config.lpi_capabilities = MAC_100FD | MAC_1000FD; + pp->phylink_config.lpi_timer_default = 250; + pp->phylink_config.eee_enabled_default = true; + + phy_interface_set_rgmii(pp->phylink_config.supported_interfaces); + __set_bit(PHY_INTERFACE_MODE_QSGMII, + pp->phylink_config.supported_interfaces); + if (comphy) { + /* If a COMPHY is present, we can support any of the serdes + * modes and switch between them. + */ + __set_bit(PHY_INTERFACE_MODE_SGMII, + pp->phylink_config.supported_interfaces); + __set_bit(PHY_INTERFACE_MODE_1000BASEX, + pp->phylink_config.supported_interfaces); + __set_bit(PHY_INTERFACE_MODE_2500BASEX, + pp->phylink_config.supported_interfaces); + } else if (phy_mode == PHY_INTERFACE_MODE_2500BASEX) { + /* No COMPHY, with only 2500BASE-X mode supported */ + __set_bit(PHY_INTERFACE_MODE_2500BASEX, + pp->phylink_config.supported_interfaces); + } else if (phy_mode == PHY_INTERFACE_MODE_1000BASEX || + phy_mode == PHY_INTERFACE_MODE_SGMII) { + /* No COMPHY, we can switch between 1000BASE-X and SGMII */ + __set_bit(PHY_INTERFACE_MODE_1000BASEX, + pp->phylink_config.supported_interfaces); + __set_bit(PHY_INTERFACE_MODE_SGMII, + pp->phylink_config.supported_interfaces); + } + + phylink = phylink_create(&pp->phylink_config, pdev->dev.fwnode, + phy_mode, &mvneta_phylink_ops); + if (IS_ERR(phylink)) { + err = PTR_ERR(phylink); + goto err_clk; + } + + pp->phylink = phylink; + + /* Alloc per-cpu port structure */ + pp->ports = alloc_percpu(struct mvneta_pcpu_port); + if (!pp->ports) { + err = -ENOMEM; + goto err_free_phylink; + } + + /* Alloc per-cpu stats */ + pp->stats = netdev_alloc_pcpu_stats(struct mvneta_pcpu_stats); + if (!pp->stats) { + err = -ENOMEM; + goto err_free_ports; + } + + err = of_get_ethdev_address(dn, dev); + if (!err) { mac_from = "device tree"; - memcpy(dev->dev_addr, dt_mac_addr, ETH_ALEN); } else { mvneta_get_mac_addr(pp, hw_mac_addr); if (is_valid_ether_addr(hw_mac_addr)) { mac_from = "hardware"; - memcpy(dev->dev_addr, hw_mac_addr, ETH_ALEN); + eth_hw_addr_set(dev, hw_mac_addr); } else { mac_from = "random"; eth_hw_addr_random(dev); } } - pp->tx_done_timer.data = (unsigned long)dev; + if (!of_property_read_u32(dn, "tx-csum-limit", &tx_csum_limit)) { + if (tx_csum_limit < 0 || + tx_csum_limit > MVNETA_TX_CSUM_MAX_SIZE) { + tx_csum_limit = MVNETA_TX_CSUM_DEF_SIZE; + dev_info(&pdev->dev, + "Wrong TX csum limit in DT, set to %dB\n", + MVNETA_TX_CSUM_DEF_SIZE); + } + } else if (of_device_is_compatible(dn, "marvell,armada-370-neta")) { + tx_csum_limit = MVNETA_TX_CSUM_DEF_SIZE; + } else { + tx_csum_limit = MVNETA_TX_CSUM_MAX_SIZE; + } + + pp->tx_csum_limit = tx_csum_limit; + + pp->dram_target_info = mv_mbus_dram_info(); + /* Armada3700 requires setting default configuration of Mbus + * windows, however without using filled mbus_dram_target_info + * structure. + */ + if (pp->dram_target_info || pp->neta_armada3700) + mvneta_conf_mbus_windows(pp, pp->dram_target_info); pp->tx_ring_size = MVNETA_MAX_TXD; pp->rx_ring_size = MVNETA_MAX_RXD; @@ -2773,28 +5663,82 @@ static int mvneta_probe(struct platform_device *pdev) pp->dev = dev; SET_NETDEV_DEV(dev, &pdev->dev); - err = mvneta_init(pp, phy_addr); + pp->id = global_port_id++; + + /* Obtain access to BM resources if enabled and already initialized */ + bm_node = of_parse_phandle(dn, "buffer-manager", 0); + if (bm_node) { + pp->bm_priv = mvneta_bm_get(bm_node); + if (pp->bm_priv) { + err = mvneta_bm_port_init(pdev, pp); + if (err < 0) { + dev_info(&pdev->dev, + "use SW buffer management\n"); + mvneta_bm_put(pp->bm_priv); + pp->bm_priv = NULL; + } + } + /* Set RX packet offset correction for platforms, whose + * NET_SKB_PAD, exceeds 64B. It should be 64B for 64-bit + * platforms and 0B for 32-bit ones. + */ + pp->rx_offset_correction = max(0, + NET_SKB_PAD - + MVNETA_RX_PKT_OFFSET_CORRECTION); + } + of_node_put(bm_node); + + /* sw buffer management */ + if (!pp->bm_priv) + pp->rx_offset_correction = MVNETA_SKB_HEADROOM; + + err = mvneta_init(&pdev->dev, pp); + if (err < 0) + goto err_netdev; + + err = mvneta_port_power_up(pp, pp->phy_interface); if (err < 0) { - dev_err(&pdev->dev, "can't init eth hal\n"); - goto err_clk; + dev_err(&pdev->dev, "can't power up port\n"); + goto err_netdev; } - mvneta_port_power_up(pp, phy_mode); - dram_target_info = mv_mbus_dram_info(); - if (dram_target_info) - mvneta_conf_mbus_windows(pp, dram_target_info); + /* Armada3700 network controller does not support per-cpu + * operation, so only single NAPI should be initialized. + */ + if (pp->neta_armada3700) { + netif_napi_add(dev, &pp->napi, mvneta_poll); + } else { + for_each_present_cpu(cpu) { + struct mvneta_pcpu_port *port = + per_cpu_ptr(pp->ports, cpu); - netif_napi_add(dev, &pp->napi, mvneta_poll, pp->weight); + netif_napi_add(dev, &port->napi, mvneta_poll); + port->pp = pp; + } + } - dev->features = NETIF_F_SG | NETIF_F_IP_CSUM; - dev->hw_features |= NETIF_F_SG | NETIF_F_IP_CSUM; - dev->vlan_features |= NETIF_F_SG | NETIF_F_IP_CSUM; - dev->priv_flags |= IFF_UNICAST_FLT; + dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | + NETIF_F_TSO | NETIF_F_RXCSUM; + dev->hw_features |= dev->features; + dev->vlan_features |= dev->features; + if (!pp->bm_priv) + dev->xdp_features = NETDEV_XDP_ACT_BASIC | + NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT | + NETDEV_XDP_ACT_RX_SG | + NETDEV_XDP_ACT_NDO_XMIT_SG; + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + netif_set_tso_max_segs(dev, MVNETA_MAX_TSO_SEGS); + + /* MTU range: 68 - 9676 */ + dev->min_mtu = ETH_MIN_MTU; + /* 9676 == 9700 - 20 and rounding to 8 */ + dev->max_mtu = 9676; err = register_netdev(dev); if (err < 0) { dev_err(&pdev->dev, "failed to register\n"); - goto err_deinit; + goto err_netdev; } netdev_info(dev, "Using %s mac address %pM\n", mac_from, @@ -2804,37 +5748,166 @@ static int mvneta_probe(struct platform_device *pdev) return 0; -err_deinit: - mvneta_deinit(pp); +err_netdev: + if (pp->bm_priv) { + mvneta_bm_pool_destroy(pp->bm_priv, pp->pool_long, 1 << pp->id); + mvneta_bm_pool_destroy(pp->bm_priv, pp->pool_short, + 1 << pp->id); + mvneta_bm_put(pp->bm_priv); + } + free_percpu(pp->stats); +err_free_ports: + free_percpu(pp->ports); +err_free_phylink: + if (pp->phylink) + phylink_destroy(pp->phylink); err_clk: + clk_disable_unprepare(pp->clk_bus); clk_disable_unprepare(pp->clk); -err_unmap: - iounmap(pp->base); err_free_irq: irq_dispose_mapping(dev->irq); -err_free_netdev: - free_netdev(dev); return err; } /* Device removal routine */ -static int mvneta_remove(struct platform_device *pdev) +static void mvneta_remove(struct platform_device *pdev) { struct net_device *dev = platform_get_drvdata(pdev); struct mvneta_port *pp = netdev_priv(dev); unregister_netdev(dev); - mvneta_deinit(pp); + clk_disable_unprepare(pp->clk_bus); clk_disable_unprepare(pp->clk); - iounmap(pp->base); + free_percpu(pp->ports); + free_percpu(pp->stats); irq_dispose_mapping(dev->irq); - free_netdev(dev); + phylink_destroy(pp->phylink); + + if (pp->bm_priv) { + mvneta_bm_pool_destroy(pp->bm_priv, pp->pool_long, 1 << pp->id); + mvneta_bm_pool_destroy(pp->bm_priv, pp->pool_short, + 1 << pp->id); + mvneta_bm_put(pp->bm_priv); + } +} + +#ifdef CONFIG_PM_SLEEP +static int mvneta_suspend(struct device *device) +{ + int queue; + struct net_device *dev = dev_get_drvdata(device); + struct mvneta_port *pp = netdev_priv(dev); + + if (!netif_running(dev)) + goto clean_exit; + + if (!pp->neta_armada3700) { + spin_lock(&pp->lock); + pp->is_stopped = true; + spin_unlock(&pp->lock); + + cpuhp_state_remove_instance_nocalls(online_hpstate, + &pp->node_online); + cpuhp_state_remove_instance_nocalls(CPUHP_NET_MVNETA_DEAD, + &pp->node_dead); + } + + rtnl_lock(); + mvneta_stop_dev(pp); + rtnl_unlock(); + + for (queue = 0; queue < rxq_number; queue++) { + struct mvneta_rx_queue *rxq = &pp->rxqs[queue]; + + mvneta_rxq_drop_pkts(pp, rxq); + } + + for (queue = 0; queue < txq_number; queue++) { + struct mvneta_tx_queue *txq = &pp->txqs[queue]; + + mvneta_txq_hw_deinit(pp, txq); + } + +clean_exit: + netif_device_detach(dev); + clk_disable_unprepare(pp->clk_bus); + clk_disable_unprepare(pp->clk); return 0; } +static int mvneta_resume(struct device *device) +{ + struct platform_device *pdev = to_platform_device(device); + struct net_device *dev = dev_get_drvdata(device); + struct mvneta_port *pp = netdev_priv(dev); + int err, queue; + + clk_prepare_enable(pp->clk); + if (!IS_ERR(pp->clk_bus)) + clk_prepare_enable(pp->clk_bus); + if (pp->dram_target_info || pp->neta_armada3700) + mvneta_conf_mbus_windows(pp, pp->dram_target_info); + if (pp->bm_priv) { + err = mvneta_bm_port_init(pdev, pp); + if (err < 0) { + dev_info(&pdev->dev, "use SW buffer management\n"); + pp->rx_offset_correction = MVNETA_SKB_HEADROOM; + pp->bm_priv = NULL; + } + } + mvneta_defaults_set(pp); + err = mvneta_port_power_up(pp, pp->phy_interface); + if (err < 0) { + dev_err(device, "can't power up port\n"); + return err; + } + + netif_device_attach(dev); + + if (!netif_running(dev)) + return 0; + + for (queue = 0; queue < rxq_number; queue++) { + struct mvneta_rx_queue *rxq = &pp->rxqs[queue]; + + rxq->next_desc_to_proc = 0; + mvneta_rxq_hw_init(pp, rxq); + } + + for (queue = 0; queue < txq_number; queue++) { + struct mvneta_tx_queue *txq = &pp->txqs[queue]; + + txq->next_desc_to_proc = 0; + mvneta_txq_hw_init(pp, txq); + } + + if (!pp->neta_armada3700) { + spin_lock(&pp->lock); + pp->is_stopped = false; + spin_unlock(&pp->lock); + cpuhp_state_add_instance_nocalls(online_hpstate, + &pp->node_online); + cpuhp_state_add_instance_nocalls(CPUHP_NET_MVNETA_DEAD, + &pp->node_dead); + } + + rtnl_lock(); + mvneta_start_dev(pp); + rtnl_unlock(); + mvneta_set_rx_mode(dev); + + return 0; +} +#endif + +static SIMPLE_DEV_PM_OPS(mvneta_pm_ops, mvneta_suspend, mvneta_resume); + static const struct of_device_id mvneta_match[] = { { .compatible = "marvell,armada-370-neta" }, + { .compatible = "marvell,armada-xp-neta" }, + { .compatible = "marvell,armada-3700-neta" }, + { .compatible = "marvell,armada-ac5-neta" }, { } }; MODULE_DEVICE_TABLE(of, mvneta_match); @@ -2845,16 +5918,55 @@ static struct platform_driver mvneta_driver = { .driver = { .name = MVNETA_DRIVER_NAME, .of_match_table = mvneta_match, + .pm = &mvneta_pm_ops, }, }; -module_platform_driver(mvneta_driver); +static int __init mvneta_driver_init(void) +{ + int ret; + + BUILD_BUG_ON_NOT_POWER_OF_2(MVNETA_TSO_PER_PAGE); + + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "net/mvneta:online", + mvneta_cpu_online, + mvneta_cpu_down_prepare); + if (ret < 0) + goto out; + online_hpstate = ret; + ret = cpuhp_setup_state_multi(CPUHP_NET_MVNETA_DEAD, "net/mvneta:dead", + NULL, mvneta_cpu_dead); + if (ret) + goto err_dead; + + ret = platform_driver_register(&mvneta_driver); + if (ret) + goto err; + return 0; + +err: + cpuhp_remove_multi_state(CPUHP_NET_MVNETA_DEAD); +err_dead: + cpuhp_remove_multi_state(online_hpstate); +out: + return ret; +} +module_init(mvneta_driver_init); + +static void __exit mvneta_driver_exit(void) +{ + platform_driver_unregister(&mvneta_driver); + cpuhp_remove_multi_state(CPUHP_NET_MVNETA_DEAD); + cpuhp_remove_multi_state(online_hpstate); +} +module_exit(mvneta_driver_exit); MODULE_DESCRIPTION("Marvell NETA Ethernet Driver - www.marvell.com"); MODULE_AUTHOR("Rami Rosen <rosenr@marvell.com>, Thomas Petazzoni <thomas.petazzoni@free-electrons.com>"); MODULE_LICENSE("GPL"); -module_param(rxq_number, int, S_IRUGO); -module_param(txq_number, int, S_IRUGO); +module_param(rxq_number, int, 0444); +module_param(txq_number, int, 0444); -module_param(rxq_def, int, S_IRUGO); +module_param(rxq_def, int, 0444); +module_param(rx_copybreak, int, 0644); |
