1 files changed, 3816 insertions, 704 deletions
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 712779fb12b7..7af44f858fa3 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -11,28 +11,43 @@
  * warranty of any kind, whether express or implied.
  */
 
-#include <linux/kernel.h>
-#include <linux/netdevice.h>
+#include <linux/clk.h>
+#include <linux/cpu.h>
 #include <linux/etherdevice.h>
-#include <linux/platform_device.h>
-#include <linux/skbuff.h>
+#include <linux/if_vlan.h>
 #include <linux/inetdevice.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
 #include <linux/mbus.h>
 #include <linux/module.h>
-#include <linux/interrupt.h>
-#include <net/ip.h>
-#include <net/ipv6.h>
+#include <linux/netdevice.h>
 #include <linux/of.h>
+#include <linux/of_address.h>
 #include <linux/of_irq.h>
 #include <linux/of_mdio.h>
 #include <linux/of_net.h>
-#include <linux/of_address.h>
+#include <linux/phy/phy.h>
 #include <linux/phy.h>
-#include <linux/clk.h>
+#include <linux/phylink.h>
+#include <linux/platform_device.h>
+#include <linux/skbuff.h>
+#include <net/hwbm.h>
+#include "mvneta_bm.h"
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/tso.h>
+#include <net/page_pool/helpers.h>
+#include <net/pkt_sched.h>
+#include <linux/bpf_trace.h>
 
 /* Registers */
 #define MVNETA_RXQ_CONFIG_REG(q)                (0x1400 + ((q) << 2))
-#define      MVNETA_RXQ_HW_BUF_ALLOC            BIT(1)
+#define      MVNETA_RXQ_HW_BUF_ALLOC            BIT(0)
+#define      MVNETA_RXQ_SHORT_POOL_ID_SHIFT	4
+#define      MVNETA_RXQ_SHORT_POOL_ID_MASK	0x30
+#define      MVNETA_RXQ_LONG_POOL_ID_SHIFT	6
+#define      MVNETA_RXQ_LONG_POOL_ID_MASK	0xc0
 #define      MVNETA_RXQ_PKT_OFFSET_ALL_MASK     (0xf    << 8)
 #define      MVNETA_RXQ_PKT_OFFSET_MASK(offs)   ((offs) << 8)
 #define MVNETA_RXQ_THRESHOLD_REG(q)             (0x14c0 + ((q) << 2))
@@ -46,6 +61,9 @@
 #define MVNETA_RXQ_STATUS_UPDATE_REG(q)         (0x1500 + ((q) << 2))
 #define      MVNETA_RXQ_ADD_NON_OCCUPIED_SHIFT  16
 #define      MVNETA_RXQ_ADD_NON_OCCUPIED_MAX    255
+#define MVNETA_PORT_POOL_BUFFER_SZ_REG(pool)	(0x1700 + ((pool) << 2))
+#define      MVNETA_PORT_POOL_BUFFER_SZ_SHIFT	3
+#define      MVNETA_PORT_POOL_BUFFER_SZ_MASK	0xfff8
 #define MVNETA_PORT_RX_RESET                    0x1cc0
 #define      MVNETA_PORT_RX_DMA_RESET           BIT(0)
 #define MVNETA_PHY_ADDR                         0x2000
@@ -58,6 +76,9 @@
 #define MVNETA_WIN_SIZE(w)                      (0x2204 + ((w) << 3))
 #define MVNETA_WIN_REMAP(w)                     (0x2280 + ((w) << 2))
 #define MVNETA_BASE_ADDR_ENABLE                 0x2290
+#define      MVNETA_AC5_CNM_DDR_TARGET		0x2
+#define      MVNETA_AC5_CNM_DDR_ATTR		0xb
+#define MVNETA_ACCESS_PROTECT_ENABLE            0x2294
 #define MVNETA_PORT_CONFIG                      0x2400
 #define      MVNETA_UNI_PROMISC_MODE            BIT(0)
 #define      MVNETA_DEF_RXQ(q)                  ((q) << 1)
@@ -79,36 +100,97 @@
 #define MVNETA_MAC_ADDR_HIGH                     0x2418
 #define MVNETA_SDMA_CONFIG                       0x241c
 #define      MVNETA_SDMA_BRST_SIZE_16            4
-#define      MVNETA_NO_DESC_SWAP                 0x0
 #define      MVNETA_RX_BRST_SZ_MASK(burst)       ((burst) << 1)
 #define      MVNETA_RX_NO_DATA_SWAP              BIT(4)
 #define      MVNETA_TX_NO_DATA_SWAP              BIT(5)
+#define      MVNETA_DESC_SWAP                    BIT(6)
 #define      MVNETA_TX_BRST_SZ_MASK(burst)       ((burst) << 22)
+#define	MVNETA_VLAN_PRIO_TO_RXQ			 0x2440
+#define      MVNETA_VLAN_PRIO_RXQ_MAP(prio, rxq) ((rxq) << ((prio) * 3))
 #define MVNETA_PORT_STATUS                       0x2444
-#define      MVNETA_TX_IN_PRGRS                  BIT(1)
+#define      MVNETA_TX_IN_PRGRS                  BIT(0)
 #define      MVNETA_TX_FIFO_EMPTY                BIT(8)
 #define MVNETA_RX_MIN_FRAME_SIZE                 0x247c
+/* Only exists on Armada XP and Armada 370 */
+#define MVNETA_SERDES_CFG			 0x24A0
+#define      MVNETA_SGMII_SERDES_PROTO		 0x0cc7
+#define      MVNETA_QSGMII_SERDES_PROTO		 0x0667
+#define      MVNETA_HSGMII_SERDES_PROTO		 0x1107
 #define MVNETA_TYPE_PRIO                         0x24bc
 #define      MVNETA_FORCE_UNI                    BIT(21)
 #define MVNETA_TXQ_CMD_1                         0x24e4
 #define MVNETA_TXQ_CMD                           0x2448
 #define      MVNETA_TXQ_DISABLE_SHIFT            8
 #define      MVNETA_TXQ_ENABLE_MASK              0x000000ff
+#define MVNETA_RX_DISCARD_FRAME_COUNT		 0x2484
+#define MVNETA_OVERRUN_FRAME_COUNT		 0x2488
+#define MVNETA_GMAC_CLOCK_DIVIDER                0x24f4
+#define      MVNETA_GMAC_1MS_CLOCK_ENABLE        BIT(31)
 #define MVNETA_ACC_MODE                          0x2500
+#define MVNETA_BM_ADDRESS                        0x2504
 #define MVNETA_CPU_MAP(cpu)                      (0x2540 + ((cpu) << 2))
 #define      MVNETA_CPU_RXQ_ACCESS_ALL_MASK      0x000000ff
 #define      MVNETA_CPU_TXQ_ACCESS_ALL_MASK      0x0000ff00
+#define      MVNETA_CPU_RXQ_ACCESS(rxq)		 BIT(rxq)
+#define      MVNETA_CPU_TXQ_ACCESS(txq)		 BIT(txq + 8)
 #define MVNETA_RXQ_TIME_COAL_REG(q)              (0x2580 + ((q) << 2))
+
+/* Exception Interrupt Port/Queue Cause register
+ *
+ * Their behavior depend of the mapping done using the PCPX2Q
+ * registers. For a given CPU if the bit associated to a queue is not
+ * set, then for the register a read from this CPU will always return
+ * 0 and a write won't do anything
+ */
+
 #define MVNETA_INTR_NEW_CAUSE                    0x25a0
-#define      MVNETA_RX_INTR_MASK(nr_rxqs)        (((1 << nr_rxqs) - 1) << 8)
 #define MVNETA_INTR_NEW_MASK                     0x25a4
+
+/* bits  0..7  = TXQ SENT, one bit per queue.
+ * bits  8..15 = RXQ OCCUP, one bit per queue.
+ * bits 16..23 = RXQ FREE, one bit per queue.
+ * bit  29 = OLD_REG_SUM, see old reg ?
+ * bit  30 = TX_ERR_SUM, one bit for 4 ports
+ * bit  31 = MISC_SUM,   one bit for 4 ports
+ */
+#define      MVNETA_TX_INTR_MASK(nr_txqs)        (((1 << nr_txqs) - 1) << 0)
+#define      MVNETA_TX_INTR_MASK_ALL             (0xff << 0)
+#define      MVNETA_RX_INTR_MASK(nr_rxqs)        (((1 << nr_rxqs) - 1) << 8)
+#define      MVNETA_RX_INTR_MASK_ALL             (0xff << 8)
+#define      MVNETA_MISCINTR_INTR_MASK           BIT(31)
+
 #define MVNETA_INTR_OLD_CAUSE                    0x25a8
 #define MVNETA_INTR_OLD_MASK                     0x25ac
+
+/* Data Path Port/Queue Cause Register */
 #define MVNETA_INTR_MISC_CAUSE                   0x25b0
 #define MVNETA_INTR_MISC_MASK                    0x25b4
+
+#define      MVNETA_CAUSE_PHY_STATUS_CHANGE      BIT(0)
+#define      MVNETA_CAUSE_LINK_CHANGE            BIT(1)
+#define      MVNETA_CAUSE_PTP                    BIT(4)
+
+#define      MVNETA_CAUSE_INTERNAL_ADDR_ERR      BIT(7)
+#define      MVNETA_CAUSE_RX_OVERRUN             BIT(8)
+#define      MVNETA_CAUSE_RX_CRC_ERROR           BIT(9)
+#define      MVNETA_CAUSE_RX_LARGE_PKT           BIT(10)
+#define      MVNETA_CAUSE_TX_UNDERUN             BIT(11)
+#define      MVNETA_CAUSE_PRBS_ERR               BIT(12)
+#define      MVNETA_CAUSE_PSC_SYNC_CHANGE        BIT(13)
+#define      MVNETA_CAUSE_SERDES_SYNC_ERR        BIT(14)
+
+#define      MVNETA_CAUSE_BMU_ALLOC_ERR_SHIFT    16
+#define      MVNETA_CAUSE_BMU_ALLOC_ERR_ALL_MASK   (0xF << MVNETA_CAUSE_BMU_ALLOC_ERR_SHIFT)
+#define      MVNETA_CAUSE_BMU_ALLOC_ERR_MASK(pool) (1 << (MVNETA_CAUSE_BMU_ALLOC_ERR_SHIFT + (pool)))
+
+#define      MVNETA_CAUSE_TXQ_ERROR_SHIFT        24
+#define      MVNETA_CAUSE_TXQ_ERROR_ALL_MASK     (0xFF << MVNETA_CAUSE_TXQ_ERROR_SHIFT)
+#define      MVNETA_CAUSE_TXQ_ERROR_MASK(q)      (1 << (MVNETA_CAUSE_TXQ_ERROR_SHIFT + (q)))
+
 #define MVNETA_INTR_ENABLE                       0x25b8
 #define      MVNETA_TXQ_INTR_ENABLE_ALL_MASK     0x0000ff00
-#define      MVNETA_RXQ_INTR_ENABLE_ALL_MASK     0xff000000
+#define      MVNETA_RXQ_INTR_ENABLE_ALL_MASK     0x000000ff
+
 #define MVNETA_RXQ_CMD                           0x2680
 #define      MVNETA_RXQ_DISABLE_SHIFT            8
 #define      MVNETA_RXQ_ENABLE_MASK              0x000000ff
@@ -117,9 +199,11 @@
 #define MVNETA_GMAC_CTRL_0                       0x2c00
 #define      MVNETA_GMAC_MAX_RX_SIZE_SHIFT       2
 #define      MVNETA_GMAC_MAX_RX_SIZE_MASK        0x7ffc
+#define      MVNETA_GMAC0_PORT_1000BASE_X        BIT(1)
 #define      MVNETA_GMAC0_PORT_ENABLE            BIT(0)
 #define MVNETA_GMAC_CTRL_2                       0x2c08
-#define      MVNETA_GMAC2_PSC_ENABLE             BIT(3)
+#define      MVNETA_GMAC2_INBAND_AN_ENABLE       BIT(0)
+#define      MVNETA_GMAC2_PCS_ENABLE             BIT(3)
 #define      MVNETA_GMAC2_PORT_RGMII             BIT(4)
 #define      MVNETA_GMAC2_PORT_RESET             BIT(6)
 #define MVNETA_GMAC_STATUS                       0x2c10
@@ -131,13 +215,25 @@
 #define      MVNETA_GMAC_TX_FLOW_CTRL_ENABLE     BIT(5)
 #define      MVNETA_GMAC_RX_FLOW_CTRL_ACTIVE     BIT(6)
 #define      MVNETA_GMAC_TX_FLOW_CTRL_ACTIVE     BIT(7)
+#define      MVNETA_GMAC_AN_COMPLETE             BIT(11)
+#define      MVNETA_GMAC_SYNC_OK                 BIT(14)
 #define MVNETA_GMAC_AUTONEG_CONFIG               0x2c0c
 #define      MVNETA_GMAC_FORCE_LINK_DOWN         BIT(0)
 #define      MVNETA_GMAC_FORCE_LINK_PASS         BIT(1)
+#define      MVNETA_GMAC_INBAND_AN_ENABLE        BIT(2)
+#define      MVNETA_GMAC_AN_BYPASS_ENABLE        BIT(3)
+#define      MVNETA_GMAC_INBAND_RESTART_AN       BIT(4)
 #define      MVNETA_GMAC_CONFIG_MII_SPEED        BIT(5)
 #define      MVNETA_GMAC_CONFIG_GMII_SPEED       BIT(6)
+#define      MVNETA_GMAC_AN_SPEED_EN             BIT(7)
+#define      MVNETA_GMAC_CONFIG_FLOW_CTRL        BIT(8)
+#define      MVNETA_GMAC_ADVERT_SYM_FLOW_CTRL    BIT(9)
+#define      MVNETA_GMAC_AN_FLOW_CTRL_EN         BIT(11)
 #define      MVNETA_GMAC_CONFIG_FULL_DUPLEX      BIT(12)
-#define MVNETA_MIB_COUNTERS_BASE                 0x3080
+#define      MVNETA_GMAC_AN_DUPLEX_EN            BIT(13)
+#define MVNETA_GMAC_CTRL_4                       0x2c90
+#define      MVNETA_GMAC4_SHORT_PREAMBLE_ENABLE  BIT(1)
+#define MVNETA_MIB_COUNTERS_BASE                 0x3000
 #define      MVNETA_MIB_LATE_COLLISION           0x7c
 #define MVNETA_DA_FILT_SPEC_MCAST                0x3400
 #define MVNETA_DA_FILT_OTH_MCAST                 0x3500
@@ -148,17 +244,55 @@
 #define      MVNETA_TXQ_SENT_THRESH_MASK(coal)   ((coal) << 16)
 #define MVNETA_TXQ_UPDATE_REG(q)                 (0x3c60 + ((q) << 2))
 #define      MVNETA_TXQ_DEC_SENT_SHIFT           16
+#define      MVNETA_TXQ_DEC_SENT_MASK            0xff
 #define MVNETA_TXQ_STATUS_REG(q)                 (0x3c40 + ((q) << 2))
 #define      MVNETA_TXQ_SENT_DESC_SHIFT          16
 #define      MVNETA_TXQ_SENT_DESC_MASK           0x3fff0000
 #define MVNETA_PORT_TX_RESET                     0x3cf0
 #define      MVNETA_PORT_TX_DMA_RESET            BIT(0)
+#define MVNETA_TXQ_CMD1_REG			 0x3e00
+#define      MVNETA_TXQ_CMD1_BW_LIM_SEL_V1	 BIT(3)
+#define      MVNETA_TXQ_CMD1_BW_LIM_EN		 BIT(0)
+#define MVNETA_REFILL_NUM_CLK_REG		 0x3e08
+#define      MVNETA_REFILL_MAX_NUM_CLK		 0x0000ffff
 #define MVNETA_TX_MTU                            0x3e0c
 #define MVNETA_TX_TOKEN_SIZE                     0x3e14
 #define      MVNETA_TX_TOKEN_SIZE_MAX            0xffffffff
+#define MVNETA_TXQ_BUCKET_REFILL_REG(q)		 (0x3e20 + ((q) << 2))
+#define      MVNETA_TXQ_BUCKET_REFILL_PERIOD_MASK	0x3ff00000
+#define      MVNETA_TXQ_BUCKET_REFILL_PERIOD_SHIFT	20
+#define      MVNETA_TXQ_BUCKET_REFILL_VALUE_MAX	 0x0007ffff
 #define MVNETA_TXQ_TOKEN_SIZE_REG(q)             (0x3e40 + ((q) << 2))
 #define      MVNETA_TXQ_TOKEN_SIZE_MAX           0x7fffffff
 
+/* The values of the bucket refill base period and refill period are taken from
+ * the reference manual, and adds up to a base resolution of 10Kbps. This allows
+ * to cover all rate-limit values from 10Kbps up to 5Gbps
+ */
+
+/* Base period for the rate limit algorithm */
+#define MVNETA_TXQ_BUCKET_REFILL_BASE_PERIOD_NS	100
+
+/* Number of Base Period to wait between each bucket refill */
+#define MVNETA_TXQ_BUCKET_REFILL_PERIOD	1000
+
+/* The base resolution for rate limiting, in bps. Any max_rate value should be
+ * a multiple of that value.
+ */
+#define MVNETA_TXQ_RATE_LIMIT_RESOLUTION (NSEC_PER_SEC / \
+					 (MVNETA_TXQ_BUCKET_REFILL_BASE_PERIOD_NS * \
+					  MVNETA_TXQ_BUCKET_REFILL_PERIOD))
+
+#define MVNETA_LPI_CTRL_0                        0x2cc0
+#define      MVNETA_LPI_CTRL_0_TS                (0xff << 8)
+#define MVNETA_LPI_CTRL_1                        0x2cc4
+#define      MVNETA_LPI_CTRL_1_REQUEST_ENABLE    BIT(0)
+#define      MVNETA_LPI_CTRL_1_REQUEST_FORCE     BIT(1)
+#define      MVNETA_LPI_CTRL_1_MANUAL_MODE       BIT(2)
+#define      MVNETA_LPI_CTRL_1_TW                (0xfff << 4)
+#define MVNETA_LPI_CTRL_2                        0x2cc8
+#define MVNETA_LPI_STATUS                        0x2ccc
+
 #define MVNETA_CAUSE_TXQ_SENT_DESC_ALL_MASK	 0xff
 
 /* Descriptor ring Macros */
@@ -168,16 +302,10 @@
 /* Various constants */
 
 /* Coalescing */
-#define MVNETA_TXDONE_COAL_PKTS		16
+#define MVNETA_TXDONE_COAL_PKTS		0	/* interrupt per packet */
 #define MVNETA_RX_COAL_PKTS		32
 #define MVNETA_RX_COAL_USEC		100
 
-/* Timer */
-#define MVNETA_TX_DONE_TIMER_PERIOD	10
-
-/* Napi polling weight */
-#define MVNETA_RX_POLL_WEIGHT		64
-
 /* The two bytes Marvell header. Either contains a special value used
  * by Marvell switches when a specific hardware mode is enabled (not
  * supported by this driver) or is filled automatically by zeroes on
@@ -190,9 +318,12 @@
 
 #define MVNETA_VLAN_TAG_LEN             4
 
-#define MVNETA_CPU_D_CACHE_LINE_SIZE    32
+#define MVNETA_TX_CSUM_DEF_SIZE		1600
 #define MVNETA_TX_CSUM_MAX_SIZE		9800
-#define MVNETA_ACC_MODE_EXT		1
+#define MVNETA_ACC_MODE_EXT1		1
+#define MVNETA_ACC_MODE_EXT2		2
+
+#define MVNETA_MAX_DECODE_WIN		6
 
 /* Timeout constants */
 #define MVNETA_TX_DISABLE_TIMEOUT_MSEC	1000
@@ -201,69 +332,235 @@
 
 #define MVNETA_TX_MTU_MAX		0x3ffff
 
+/* The RSS lookup table actually has 256 entries but we do not use
+ * them yet
+ */
+#define MVNETA_RSS_LU_TABLE_SIZE	1
+
 /* Max number of Rx descriptors */
-#define MVNETA_MAX_RXD 128
+#define MVNETA_MAX_RXD 512
 
 /* Max number of Tx descriptors */
-#define MVNETA_MAX_TXD 532
+#define MVNETA_MAX_TXD 1024
+
+/* Max number of allowed TCP segments for software TSO */
+#define MVNETA_MAX_TSO_SEGS 100
+
+#define MVNETA_MAX_SKB_DESCS (MVNETA_MAX_TSO_SEGS * 2 + MAX_SKB_FRAGS)
+
+/* The size of a TSO header page */
+#define MVNETA_TSO_PAGE_SIZE (2 * PAGE_SIZE)
+
+/* Number of TSO headers per page. This should be a power of 2 */
+#define MVNETA_TSO_PER_PAGE (MVNETA_TSO_PAGE_SIZE / TSO_HEADER_SIZE)
+
+/* Maximum number of TSO header pages */
+#define MVNETA_MAX_TSO_PAGES (MVNETA_MAX_TXD / MVNETA_TSO_PER_PAGE)
 
 /* descriptor aligned size */
 #define MVNETA_DESC_ALIGNED_SIZE	32
 
+/* Number of bytes to be taken into account by HW when putting incoming data
+ * to the buffers. It is needed in case NET_SKB_PAD exceeds maximum packet
+ * offset supported in MVNETA_RXQ_CONFIG_REG(q) registers.
+ */
+#define MVNETA_RX_PKT_OFFSET_CORRECTION		64
+
 #define MVNETA_RX_PKT_SIZE(mtu) \
 	ALIGN((mtu) + MVNETA_MH_SIZE + MVNETA_VLAN_TAG_LEN + \
 	      ETH_HLEN + ETH_FCS_LEN,			     \
-	      MVNETA_CPU_D_CACHE_LINE_SIZE)
+	      cache_line_size())
+
+/* Driver assumes that the last 3 bits are 0 */
+#define MVNETA_SKB_HEADROOM	ALIGN(max(NET_SKB_PAD, XDP_PACKET_HEADROOM), 8)
+#define MVNETA_SKB_PAD	(SKB_DATA_ALIGN(sizeof(struct skb_shared_info) + \
+			 MVNETA_SKB_HEADROOM))
+#define MVNETA_MAX_RX_BUF_SIZE	(PAGE_SIZE - MVNETA_SKB_PAD)
+
+#define MVNETA_RX_GET_BM_POOL_ID(rxd) \
+	(((rxd)->status & MVNETA_RXD_BM_POOL_MASK) >> MVNETA_RXD_BM_POOL_SHIFT)
+
+enum {
+	ETHTOOL_STAT_EEE_WAKEUP,
+	ETHTOOL_STAT_SKB_ALLOC_ERR,
+	ETHTOOL_STAT_REFILL_ERR,
+	ETHTOOL_XDP_REDIRECT,
+	ETHTOOL_XDP_PASS,
+	ETHTOOL_XDP_DROP,
+	ETHTOOL_XDP_TX,
+	ETHTOOL_XDP_TX_ERR,
+	ETHTOOL_XDP_XMIT,
+	ETHTOOL_XDP_XMIT_ERR,
+	ETHTOOL_MAX_STATS,
+};
+
+struct mvneta_statistic {
+	unsigned short offset;
+	unsigned short type;
+	const char name[ETH_GSTRING_LEN];
+};
 
-#define MVNETA_RX_BUF_SIZE(pkt_size)   ((pkt_size) + NET_SKB_PAD)
+#define T_REG_32	32
+#define T_REG_64	64
+#define T_SW		1
+
+#define MVNETA_XDP_PASS		0
+#define MVNETA_XDP_DROPPED	BIT(0)
+#define MVNETA_XDP_TX		BIT(1)
+#define MVNETA_XDP_REDIR	BIT(2)
+
+static const struct mvneta_statistic mvneta_statistics[] = {
+	{ 0x3000, T_REG_64, "good_octets_received", },
+	{ 0x3010, T_REG_32, "good_frames_received", },
+	{ 0x3008, T_REG_32, "bad_octets_received", },
+	{ 0x3014, T_REG_32, "bad_frames_received", },
+	{ 0x3018, T_REG_32, "broadcast_frames_received", },
+	{ 0x301c, T_REG_32, "multicast_frames_received", },
+	{ 0x3050, T_REG_32, "unrec_mac_control_received", },
+	{ 0x3058, T_REG_32, "good_fc_received", },
+	{ 0x305c, T_REG_32, "bad_fc_received", },
+	{ 0x3060, T_REG_32, "undersize_received", },
+	{ 0x3064, T_REG_32, "fragments_received", },
+	{ 0x3068, T_REG_32, "oversize_received", },
+	{ 0x306c, T_REG_32, "jabber_received", },
+	{ 0x3070, T_REG_32, "mac_receive_error", },
+	{ 0x3074, T_REG_32, "bad_crc_event", },
+	{ 0x3078, T_REG_32, "collision", },
+	{ 0x307c, T_REG_32, "late_collision", },
+	{ 0x2484, T_REG_32, "rx_discard", },
+	{ 0x2488, T_REG_32, "rx_overrun", },
+	{ 0x3020, T_REG_32, "frames_64_octets", },
+	{ 0x3024, T_REG_32, "frames_65_to_127_octets", },
+	{ 0x3028, T_REG_32, "frames_128_to_255_octets", },
+	{ 0x302c, T_REG_32, "frames_256_to_511_octets", },
+	{ 0x3030, T_REG_32, "frames_512_to_1023_octets", },
+	{ 0x3034, T_REG_32, "frames_1024_to_max_octets", },
+	{ 0x3038, T_REG_64, "good_octets_sent", },
+	{ 0x3040, T_REG_32, "good_frames_sent", },
+	{ 0x3044, T_REG_32, "excessive_collision", },
+	{ 0x3048, T_REG_32, "multicast_frames_sent", },
+	{ 0x304c, T_REG_32, "broadcast_frames_sent", },
+	{ 0x3054, T_REG_32, "fc_sent", },
+	{ 0x300c, T_REG_32, "internal_mac_transmit_err", },
+	{ ETHTOOL_STAT_EEE_WAKEUP, T_SW, "eee_wakeup_errors", },
+	{ ETHTOOL_STAT_SKB_ALLOC_ERR, T_SW, "skb_alloc_errors", },
+	{ ETHTOOL_STAT_REFILL_ERR, T_SW, "refill_errors", },
+	{ ETHTOOL_XDP_REDIRECT, T_SW, "rx_xdp_redirect", },
+	{ ETHTOOL_XDP_PASS, T_SW, "rx_xdp_pass", },
+	{ ETHTOOL_XDP_DROP, T_SW, "rx_xdp_drop", },
+	{ ETHTOOL_XDP_TX, T_SW, "rx_xdp_tx", },
+	{ ETHTOOL_XDP_TX_ERR, T_SW, "rx_xdp_tx_errors", },
+	{ ETHTOOL_XDP_XMIT, T_SW, "tx_xdp_xmit", },
+	{ ETHTOOL_XDP_XMIT_ERR, T_SW, "tx_xdp_xmit_errors", },
+};
 
 struct mvneta_stats {
-	struct	u64_stats_sync syncp;
-	u64	packets;
-	u64	bytes;
+	u64	rx_packets;
+	u64	rx_bytes;
+	u64	tx_packets;
+	u64	tx_bytes;
+	/* xdp */
+	u64	xdp_redirect;
+	u64	xdp_pass;
+	u64	xdp_drop;
+	u64	xdp_xmit;
+	u64	xdp_xmit_err;
+	u64	xdp_tx;
+	u64	xdp_tx_err;
+};
+
+struct mvneta_ethtool_stats {
+	struct mvneta_stats ps;
+	u64	skb_alloc_error;
+	u64	refill_error;
+};
+
+struct mvneta_pcpu_stats {
+	struct u64_stats_sync syncp;
+
+	struct mvneta_ethtool_stats es;
+	u64	rx_dropped;
+	u64	rx_errors;
+};
+
+struct mvneta_pcpu_port {
+	/* Pointer to the shared port */
+	struct mvneta_port	*pp;
+
+	/* Pointer to the CPU-local NAPI struct */
+	struct napi_struct	napi;
+
+	/* Cause of the previous interrupt */
+	u32			cause_rx_tx;
+};
+
+enum {
+	__MVNETA_DOWN,
 };
 
 struct mvneta_port {
+	u8 id;
+	struct mvneta_pcpu_port __percpu	*ports;
+	struct mvneta_pcpu_stats __percpu	*stats;
+
+	unsigned long state;
+
 	int pkt_size;
 	void __iomem *base;
 	struct mvneta_rx_queue *rxqs;
 	struct mvneta_tx_queue *txqs;
-	struct timer_list tx_done_timer;
 	struct net_device *dev;
+	struct hlist_node node_online;
+	struct hlist_node node_dead;
+	int rxq_def;
+	/* Protect the access to the percpu interrupt registers,
+	 * ensuring that the configuration remains coherent.
+	 */
+	spinlock_t lock;
+	bool is_stopped;
 
 	u32 cause_rx_tx;
 	struct napi_struct napi;
 
-	/* Flags */
-	unsigned long flags;
-#define MVNETA_F_TX_DONE_TIMER_BIT  0
-
-	/* Napi weight */
-	int weight;
+	struct bpf_prog *xdp_prog;
 
 	/* Core clock */
 	struct clk *clk;
+	/* AXI clock */
+	struct clk *clk_bus;
 	u8 mcast_count[256];
 	u16 tx_ring_size;
 	u16 rx_ring_size;
-	struct mvneta_stats tx_stats;
-	struct mvneta_stats rx_stats;
 
-	struct mii_bus *mii_bus;
-	struct phy_device *phy_dev;
 	phy_interface_t phy_interface;
-	struct device_node *phy_node;
-	unsigned int link;
-	unsigned int duplex;
-	unsigned int speed;
+	struct device_node *dn;
+	unsigned int tx_csum_limit;
+	struct phylink *phylink;
+	struct phylink_config phylink_config;
+	struct phylink_pcs phylink_pcs;
+	struct phy *comphy;
+
+	struct mvneta_bm *bm_priv;
+	struct mvneta_bm_pool *pool_long;
+	struct mvneta_bm_pool *pool_short;
+	int bm_win_id;
+
+	u64 ethtool_stats[ARRAY_SIZE(mvneta_statistics)];
+
+	u32 indir[MVNETA_RSS_LU_TABLE_SIZE];
+
+	/* Flags for special SoC configurations */
+	bool neta_armada3700;
+	bool neta_ac5;
+	u16 rx_offset_correction;
+	const struct mbus_dram_target_info *dram_target_info;
 };
 
 /* The mvneta_tx_desc and mvneta_rx_desc structures describe the
  * layout of the transmit and reception DMA descriptors, and their
  * layout is therefore defined by the hardware design
  */
-struct mvneta_tx_desc {
-	u32  command;		/* Options used by HW for packet transmitting.*/
+
 #define MVNETA_TX_L3_OFF_SHIFT	0
 #define MVNETA_TX_IP_HLEN_SHIFT	8
 #define MVNETA_TX_L4_UDP	BIT(16)
@@ -278,35 +575,87 @@ struct mvneta_tx_desc {
 #define MVNETA_TX_L4_CSUM_FULL	BIT(30)
 #define MVNETA_TX_L4_CSUM_NOT	BIT(31)
 
-	u16  reserverd1;	/* csum_l4 (for future use)		*/
-	u16  data_size;		/* Data size of transmitted packet in bytes */
-	u32  buf_phys_addr;	/* Physical addr of transmitted buffer	*/
-	u32  reserved2;		/* hw_cmd - (for future use, PMT)	*/
-	u32  reserved3[4];	/* Reserved - (for future use)		*/
-};
-
-struct mvneta_rx_desc {
-	u32  status;		/* Info about received packet		*/
 #define MVNETA_RXD_ERR_CRC		0x0
+#define MVNETA_RXD_BM_POOL_SHIFT	13
+#define MVNETA_RXD_BM_POOL_MASK		(BIT(13) | BIT(14))
 #define MVNETA_RXD_ERR_SUMMARY		BIT(16)
 #define MVNETA_RXD_ERR_OVERRUN		BIT(17)
 #define MVNETA_RXD_ERR_LEN		BIT(18)
 #define MVNETA_RXD_ERR_RESOURCE		(BIT(17) | BIT(18))
 #define MVNETA_RXD_ERR_CODE_MASK	(BIT(17) | BIT(18))
 #define MVNETA_RXD_L3_IP4		BIT(25)
-#define MVNETA_RXD_FIRST_LAST_DESC	(BIT(26) | BIT(27))
+#define MVNETA_RXD_LAST_DESC		BIT(26)
+#define MVNETA_RXD_FIRST_DESC		BIT(27)
+#define MVNETA_RXD_FIRST_LAST_DESC	(MVNETA_RXD_FIRST_DESC | \
+					 MVNETA_RXD_LAST_DESC)
 #define MVNETA_RXD_L4_CSUM_OK		BIT(30)
 
+#if defined(__LITTLE_ENDIAN)
+struct mvneta_tx_desc {
+	u32  command;		/* Options used by HW for packet transmitting.*/
+	u16  reserved1;		/* csum_l4 (for future use)		*/
+	u16  data_size;		/* Data size of transmitted packet in bytes */
+	u32  buf_phys_addr;	/* Physical addr of transmitted buffer	*/
+	u32  reserved2;		/* hw_cmd - (for future use, PMT)	*/
+	u32  reserved3[4];	/* Reserved - (for future use)		*/
+};
+
+struct mvneta_rx_desc {
+	u32  status;		/* Info about received packet		*/
 	u16  reserved1;		/* pnc_info - (for future use, PnC)	*/
 	u16  data_size;		/* Size of received packet in bytes	*/
+
 	u32  buf_phys_addr;	/* Physical address of the buffer	*/
 	u32  reserved2;		/* pnc_flow_id  (for future use, PnC)	*/
+
 	u32  buf_cookie;	/* cookie for access to RX buffer in rx path */
 	u16  reserved3;		/* prefetch_cmd, for future use		*/
 	u16  reserved4;		/* csum_l4 - (for future use, PnC)	*/
+
+	u32  reserved5;		/* pnc_extra PnC (for future use, PnC)	*/
+	u32  reserved6;		/* hw_cmd (for future use, PnC and HWF)	*/
+};
+#else
+struct mvneta_tx_desc {
+	u16  data_size;		/* Data size of transmitted packet in bytes */
+	u16  reserved1;		/* csum_l4 (for future use)		*/
+	u32  command;		/* Options used by HW for packet transmitting.*/
+	u32  reserved2;		/* hw_cmd - (for future use, PMT)	*/
+	u32  buf_phys_addr;	/* Physical addr of transmitted buffer	*/
+	u32  reserved3[4];	/* Reserved - (for future use)		*/
+};
+
+struct mvneta_rx_desc {
+	u16  data_size;		/* Size of received packet in bytes	*/
+	u16  reserved1;		/* pnc_info - (for future use, PnC)	*/
+	u32  status;		/* Info about received packet		*/
+
+	u32  reserved2;		/* pnc_flow_id  (for future use, PnC)	*/
+	u32  buf_phys_addr;	/* Physical address of the buffer	*/
+
+	u16  reserved4;		/* csum_l4 - (for future use, PnC)	*/
+	u16  reserved3;		/* prefetch_cmd, for future use		*/
+	u32  buf_cookie;	/* cookie for access to RX buffer in rx path */
+
 	u32  reserved5;		/* pnc_extra PnC (for future use, PnC)	*/
 	u32  reserved6;		/* hw_cmd (for future use, PnC and HWF)	*/
 };
+#endif
+
+enum mvneta_tx_buf_type {
+	MVNETA_TYPE_TSO,
+	MVNETA_TYPE_SKB,
+	MVNETA_TYPE_XDP_TX,
+	MVNETA_TYPE_XDP_NDO,
+};
+
+struct mvneta_tx_buf {
+	enum mvneta_tx_buf_type type;
+	union {
+		struct xdp_frame *xdpf;
+		struct sk_buff *skb;
+	};
+};
 
 struct mvneta_tx_queue {
 	/* Number of this TX queue, in the range 0-7 */
@@ -319,9 +668,12 @@ struct mvneta_tx_queue {
 	 * descriptor ring
 	 */
 	int count;
+	int pending;
+	int tx_stop_threshold;
+	int tx_wake_threshold;
 
-	/* Array of transmitted skb */
-	struct sk_buff **tx_skb;
+	/* Array of transmitted buffers */
+	struct mvneta_tx_buf *buf;
 
 	/* Index of last TX DMA descriptor that was inserted */
 	int txq_put_index;
@@ -342,6 +694,15 @@ struct mvneta_tx_queue {
 
 	/* Index of the next TX DMA descriptor to process */
 	int next_desc_to_proc;
+
+	/* DMA buffers for TSO headers */
+	char *tso_hdrs[MVNETA_MAX_TSO_PAGES];
+
+	/* DMA address of TSO headers */
+	dma_addr_t tso_hdrs_phys[MVNETA_MAX_TSO_PAGES];
+
+	/* Affinity mask for CPUs*/
+	cpumask_t affinity_mask;
 };
 
 struct mvneta_rx_queue {
@@ -351,12 +712,16 @@ struct mvneta_rx_queue {
 	/* num of rx descriptors in the rx descriptor ring */
 	int size;
 
-	/* counter of times when mvneta_refill() failed */
-	int missed;
-
 	u32 pkts_coal;
 	u32 time_coal;
 
+	/* page_pool */
+	struct page_pool *page_pool;
+	struct xdp_rxq_info xdp_rxq;
+
+	/* Virtual address of the RX buffer */
+	void  **buf_virt_addr;
+
 	/* Virtual address of the RX DMA descriptors array */
 	struct mvneta_rx_desc *descs;
 
@@ -368,13 +733,26 @@ struct mvneta_rx_queue {
 
 	/* Index of the next RX DMA descriptor to process */
 	int next_desc_to_proc;
+
+	/* Index of first RX DMA descriptor to refill */
+	int first_to_refill;
+	u32 refill_num;
 };
 
+static enum cpuhp_state online_hpstate;
+/* The hardware supports eight (8) rx queues, but we are only allowing
+ * the first one to be used. Therefore, let's just allocate one queue.
+ */
 static int rxq_number = 8;
 static int txq_number = 8;
 
 static int rxq_def;
 
+static int rx_copybreak __read_mostly = 256;
+
+/* HW BM need that each port be identify by a unique ID */
+static int global_port_id;
+
 #define MVNETA_DRIVER_NAME "mvneta"
 #define MVNETA_DRIVER_VERSION "1.0"
 
@@ -413,53 +791,64 @@ static void mvneta_txq_inc_put(struct mvneta_tx_queue *txq)
 static void mvneta_mib_counters_clear(struct mvneta_port *pp)
 {
 	int i;
-	u32 dummy;
 
 	/* Perform dummy reads from MIB counters */
 	for (i = 0; i < MVNETA_MIB_LATE_COLLISION; i += 4)
-		dummy = mvreg_read(pp, (MVNETA_MIB_COUNTERS_BASE + i));
+		mvreg_read(pp, (MVNETA_MIB_COUNTERS_BASE + i));
+	mvreg_read(pp, MVNETA_RX_DISCARD_FRAME_COUNT);
+	mvreg_read(pp, MVNETA_OVERRUN_FRAME_COUNT);
 }
 
 /* Get System Network Statistics */
-struct rtnl_link_stats64 *mvneta_get_stats64(struct net_device *dev,
-					     struct rtnl_link_stats64 *stats)
+static void
+mvneta_get_stats64(struct net_device *dev,
+		   struct rtnl_link_stats64 *stats)
 {
 	struct mvneta_port *pp = netdev_priv(dev);
 	unsigned int start;
+	int cpu;
 
-	memset(stats, 0, sizeof(struct rtnl_link_stats64));
-
-	do {
-		start = u64_stats_fetch_begin_bh(&pp->rx_stats.syncp);
-		stats->rx_packets = pp->rx_stats.packets;
-		stats->rx_bytes	= pp->rx_stats.bytes;
-	} while (u64_stats_fetch_retry_bh(&pp->rx_stats.syncp, start));
-
-
-	do {
-		start = u64_stats_fetch_begin_bh(&pp->tx_stats.syncp);
-		stats->tx_packets = pp->tx_stats.packets;
-		stats->tx_bytes	= pp->tx_stats.bytes;
-	} while (u64_stats_fetch_retry_bh(&pp->tx_stats.syncp, start));
-
-	stats->rx_errors	= dev->stats.rx_errors;
-	stats->rx_dropped	= dev->stats.rx_dropped;
+	for_each_possible_cpu(cpu) {
+		struct mvneta_pcpu_stats *cpu_stats;
+		u64 rx_packets;
+		u64 rx_bytes;
+		u64 rx_dropped;
+		u64 rx_errors;
+		u64 tx_packets;
+		u64 tx_bytes;
+
+		cpu_stats = per_cpu_ptr(pp->stats, cpu);
+		do {
+			start = u64_stats_fetch_begin(&cpu_stats->syncp);
+			rx_packets = cpu_stats->es.ps.rx_packets;
+			rx_bytes   = cpu_stats->es.ps.rx_bytes;
+			rx_dropped = cpu_stats->rx_dropped;
+			rx_errors  = cpu_stats->rx_errors;
+			tx_packets = cpu_stats->es.ps.tx_packets;
+			tx_bytes   = cpu_stats->es.ps.tx_bytes;
+		} while (u64_stats_fetch_retry(&cpu_stats->syncp, start));
+
+		stats->rx_packets += rx_packets;
+		stats->rx_bytes   += rx_bytes;
+		stats->rx_dropped += rx_dropped;
+		stats->rx_errors  += rx_errors;
+		stats->tx_packets += tx_packets;
+		stats->tx_bytes   += tx_bytes;
+	}
 
 	stats->tx_dropped	= dev->stats.tx_dropped;
-
-	return stats;
 }
 
 /* Rx descriptors helper methods */
 
-/* Checks whether the given RX descriptor is both the first and the
- * last descriptor for the RX packet. Each RX packet is currently
+/* Checks whether the RX descriptor having this status is both the first
+ * and the last descriptor for the RX packet. Each RX packet is currently
  * received through a single RX descriptor, so not having each RX
  * descriptor with its first and last bits set is an error
  */
-static int mvneta_rxq_desc_is_first_last(struct mvneta_rx_desc *desc)
+static int mvneta_rxq_desc_is_first_last(u32 status)
 {
-	return (desc->status & MVNETA_RXD_FIRST_LAST_DESC) ==
+	return (status & MVNETA_RXD_FIRST_LAST_DESC) ==
 		MVNETA_RXD_FIRST_LAST_DESC;
 }
 
@@ -535,6 +924,7 @@ mvneta_rxq_next_desc_get(struct mvneta_rx_queue *rxq)
 	int rx_desc = rxq->next_desc_to_proc;
 
 	rxq->next_desc_to_proc = MVNETA_QUEUE_NEXT_DESC(rxq, rx_desc);
+	prefetch(rxq->descs + rxq->next_desc_to_proc);
 	return rxq->descs + rx_desc;
 }
 
@@ -576,11 +966,15 @@ static void mvneta_txq_pend_desc_add(struct mvneta_port *pp,
 {
 	u32 val;
 
-	/* Only 255 descriptors can be added at once ; Assume caller
-	 * process TX desriptors in quanta less than 256
-	 */
-	val = pend_desc;
-	mvreg_write(pp, MVNETA_TXQ_UPDATE_REG(txq->id), val);
+	pend_desc += txq->pending;
+
+	/* Only 255 Tx descriptors can be added at once */
+	do {
+		val = min(pend_desc, 255);
+		mvreg_write(pp, MVNETA_TXQ_UPDATE_REG(txq->id), val);
+		pend_desc -= val;
+	} while (pend_desc > 0);
+	txq->pending = 0;
 }
 
 /* Get pointer to next TX descriptor to be processed (send) by HW */
@@ -630,31 +1024,228 @@ static void mvneta_rxq_bm_disable(struct mvneta_port *pp,
 	mvreg_write(pp, MVNETA_RXQ_CONFIG_REG(rxq->id), val);
 }
 
+/* Enable buffer management (BM) */
+static void mvneta_rxq_bm_enable(struct mvneta_port *pp,
+				 struct mvneta_rx_queue *rxq)
+{
+	u32 val;
 
+	val = mvreg_read(pp, MVNETA_RXQ_CONFIG_REG(rxq->id));
+	val |= MVNETA_RXQ_HW_BUF_ALLOC;
+	mvreg_write(pp, MVNETA_RXQ_CONFIG_REG(rxq->id), val);
+}
 
-/* Sets the RGMII Enable bit (RGMIIEn) in port MAC control register */
-static void mvneta_gmac_rgmii_set(struct mvneta_port *pp, int enable)
+/* Notify HW about port's assignment of pool for bigger packets */
+static void mvneta_rxq_long_pool_set(struct mvneta_port *pp,
+				     struct mvneta_rx_queue *rxq)
 {
-	u32  val;
+	u32 val;
 
-	val = mvreg_read(pp, MVNETA_GMAC_CTRL_2);
+	val = mvreg_read(pp, MVNETA_RXQ_CONFIG_REG(rxq->id));
+	val &= ~MVNETA_RXQ_LONG_POOL_ID_MASK;
+	val |= (pp->pool_long->id << MVNETA_RXQ_LONG_POOL_ID_SHIFT);
 
-	if (enable)
-		val |= MVNETA_GMAC2_PORT_RGMII;
-	else
-		val &= ~MVNETA_GMAC2_PORT_RGMII;
+	mvreg_write(pp, MVNETA_RXQ_CONFIG_REG(rxq->id), val);
+}
+
+/* Notify HW about port's assignment of pool for smaller packets */
+static void mvneta_rxq_short_pool_set(struct mvneta_port *pp,
+				      struct mvneta_rx_queue *rxq)
+{
+	u32 val;
+
+	val = mvreg_read(pp, MVNETA_RXQ_CONFIG_REG(rxq->id));
+	val &= ~MVNETA_RXQ_SHORT_POOL_ID_MASK;
+	val |= (pp->pool_short->id << MVNETA_RXQ_SHORT_POOL_ID_SHIFT);
 
-	mvreg_write(pp, MVNETA_GMAC_CTRL_2, val);
+	mvreg_write(pp, MVNETA_RXQ_CONFIG_REG(rxq->id), val);
 }
 
-/* Config SGMII port */
-static void mvneta_port_sgmii_config(struct mvneta_port *pp)
+/* Set port's receive buffer size for assigned BM pool */
+static inline void mvneta_bm_pool_bufsize_set(struct mvneta_port *pp,
+					      int buf_size,
+					      u8 pool_id)
 {
 	u32 val;
 
-	val = mvreg_read(pp, MVNETA_GMAC_CTRL_2);
-	val |= MVNETA_GMAC2_PSC_ENABLE;
-	mvreg_write(pp, MVNETA_GMAC_CTRL_2, val);
+	if (!IS_ALIGNED(buf_size, 8)) {
+		dev_warn(pp->dev->dev.parent,
+			 "illegal buf_size value %d, round to %d\n",
+			 buf_size, ALIGN(buf_size, 8));
+		buf_size = ALIGN(buf_size, 8);
+	}
+
+	val = mvreg_read(pp, MVNETA_PORT_POOL_BUFFER_SZ_REG(pool_id));
+	val |= buf_size & MVNETA_PORT_POOL_BUFFER_SZ_MASK;
+	mvreg_write(pp, MVNETA_PORT_POOL_BUFFER_SZ_REG(pool_id), val);
+}
+
+/* Configure MBUS window in order to enable access BM internal SRAM */
+static int mvneta_mbus_io_win_set(struct mvneta_port *pp, u32 base, u32 wsize,
+				  u8 target, u8 attr)
+{
+	u32 win_enable, win_protect;
+	int i;
+
+	win_enable = mvreg_read(pp, MVNETA_BASE_ADDR_ENABLE);
+
+	if (pp->bm_win_id < 0) {
+		/* Find first not occupied window */
+		for (i = 0; i < MVNETA_MAX_DECODE_WIN; i++) {
+			if (win_enable & (1 << i)) {
+				pp->bm_win_id = i;
+				break;
+			}
+		}
+		if (i == MVNETA_MAX_DECODE_WIN)
+			return -ENOMEM;
+	} else {
+		i = pp->bm_win_id;
+	}
+
+	mvreg_write(pp, MVNETA_WIN_BASE(i), 0);
+	mvreg_write(pp, MVNETA_WIN_SIZE(i), 0);
+
+	if (i < 4)
+		mvreg_write(pp, MVNETA_WIN_REMAP(i), 0);
+
+	mvreg_write(pp, MVNETA_WIN_BASE(i), (base & 0xffff0000) |
+		    (attr << 8) | target);
+
+	mvreg_write(pp, MVNETA_WIN_SIZE(i), (wsize - 1) & 0xffff0000);
+
+	win_protect = mvreg_read(pp, MVNETA_ACCESS_PROTECT_ENABLE);
+	win_protect |= 3 << (2 * i);
+	mvreg_write(pp, MVNETA_ACCESS_PROTECT_ENABLE, win_protect);
+
+	win_enable &= ~(1 << i);
+	mvreg_write(pp, MVNETA_BASE_ADDR_ENABLE, win_enable);
+
+	return 0;
+}
+
+static int mvneta_bm_port_mbus_init(struct mvneta_port *pp)
+{
+	u32 wsize;
+	u8 target, attr;
+	int err;
+
+	/* Get BM window information */
+	err = mvebu_mbus_get_io_win_info(pp->bm_priv->bppi_phys_addr, &wsize,
+					 &target, &attr);
+	if (err < 0)
+		return err;
+
+	pp->bm_win_id = -1;
+
+	/* Open NETA -> BM window */
+	err = mvneta_mbus_io_win_set(pp, pp->bm_priv->bppi_phys_addr, wsize,
+				     target, attr);
+	if (err < 0) {
+		netdev_info(pp->dev, "fail to configure mbus window to BM\n");
+		return err;
+	}
+	return 0;
+}
+
+/* Assign and initialize pools for port. In case of fail
+ * buffer manager will remain disabled for current port.
+ */
+static int mvneta_bm_port_init(struct platform_device *pdev,
+			       struct mvneta_port *pp)
+{
+	struct device_node *dn = pdev->dev.of_node;
+	u32 long_pool_id, short_pool_id;
+
+	if (!pp->neta_armada3700) {
+		int ret;
+
+		ret = mvneta_bm_port_mbus_init(pp);
+		if (ret)
+			return ret;
+	}
+
+	if (of_property_read_u32(dn, "bm,pool-long", &long_pool_id)) {
+		netdev_info(pp->dev, "missing long pool id\n");
+		return -EINVAL;
+	}
+
+	/* Create port's long pool depending on mtu */
+	pp->pool_long = mvneta_bm_pool_use(pp->bm_priv, long_pool_id,
+					   MVNETA_BM_LONG, pp->id,
+					   MVNETA_RX_PKT_SIZE(pp->dev->mtu));
+	if (!pp->pool_long) {
+		netdev_info(pp->dev, "fail to obtain long pool for port\n");
+		return -ENOMEM;
+	}
+
+	pp->pool_long->port_map |= 1 << pp->id;
+
+	mvneta_bm_pool_bufsize_set(pp, pp->pool_long->buf_size,
+				   pp->pool_long->id);
+
+	/* If short pool id is not defined, assume using single pool */
+	if (of_property_read_u32(dn, "bm,pool-short", &short_pool_id))
+		short_pool_id = long_pool_id;
+
+	/* Create port's short pool */
+	pp->pool_short = mvneta_bm_pool_use(pp->bm_priv, short_pool_id,
+					    MVNETA_BM_SHORT, pp->id,
+					    MVNETA_BM_SHORT_PKT_SIZE);
+	if (!pp->pool_short) {
+		netdev_info(pp->dev, "fail to obtain short pool for port\n");
+		mvneta_bm_pool_destroy(pp->bm_priv, pp->pool_long, 1 << pp->id);
+		return -ENOMEM;
+	}
+
+	if (short_pool_id != long_pool_id) {
+		pp->pool_short->port_map |= 1 << pp->id;
+		mvneta_bm_pool_bufsize_set(pp, pp->pool_short->buf_size,
+					   pp->pool_short->id);
+	}
+
+	return 0;
+}
+
+/* Update settings of a pool for bigger packets */
+static void mvneta_bm_update_mtu(struct mvneta_port *pp, int mtu)
+{
+	struct mvneta_bm_pool *bm_pool = pp->pool_long;
+	struct hwbm_pool *hwbm_pool = &bm_pool->hwbm_pool;
+	int num;
+
+	/* Release all buffers from long pool */
+	mvneta_bm_bufs_free(pp->bm_priv, bm_pool, 1 << pp->id);
+	if (hwbm_pool->buf_num) {
+		WARN(1, "cannot free all buffers in pool %d\n",
+		     bm_pool->id);
+		goto bm_mtu_err;
+	}
+
+	bm_pool->pkt_size = MVNETA_RX_PKT_SIZE(mtu);
+	bm_pool->buf_size = MVNETA_RX_BUF_SIZE(bm_pool->pkt_size);
+	hwbm_pool->frag_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
+			SKB_DATA_ALIGN(MVNETA_RX_BUF_SIZE(bm_pool->pkt_size));
+
+	/* Fill entire long pool */
+	num = hwbm_pool_add(hwbm_pool, hwbm_pool->size);
+	if (num != hwbm_pool->size) {
+		WARN(1, "pool %d: %d of %d allocated\n",
+		     bm_pool->id, num, hwbm_pool->size);
+		goto bm_mtu_err;
+	}
+	mvneta_bm_pool_bufsize_set(pp, bm_pool->buf_size, bm_pool->id);
+
+	return;
+
+bm_mtu_err:
+	mvneta_bm_pool_destroy(pp->bm_priv, pp->pool_long, 1 << pp->id);
+	mvneta_bm_pool_destroy(pp->bm_priv, pp->pool_short, 1 << pp->id);
+
+	pp->bm_priv = NULL;
+	pp->rx_offset_correction = MVNETA_SKB_HEADROOM;
+	mvreg_write(pp, MVNETA_ACC_MODE, MVNETA_ACC_MODE_EXT1);
+	netdev_info(pp->dev, "fail to update MTU, fall back to software BM\n");
 }
 
 /* Start the Ethernet port RX and TX activity */
@@ -664,23 +1255,22 @@ static void mvneta_port_up(struct mvneta_port *pp)
 	u32 q_map;
 
 	/* Enable all initialized TXs. */
-	mvneta_mib_counters_clear(pp);
 	q_map = 0;
 	for (queue = 0; queue < txq_number; queue++) {
 		struct mvneta_tx_queue *txq = &pp->txqs[queue];
-		if (txq->descs != NULL)
+		if (txq->descs)
 			q_map |= (1 << queue);
 	}
 	mvreg_write(pp, MVNETA_TXQ_CMD, q_map);
 
-	/* Enable all initialized RXQs. */
 	q_map = 0;
+	/* Enable all initialized RXQs. */
 	for (queue = 0; queue < rxq_number; queue++) {
 		struct mvneta_rx_queue *rxq = &pp->rxqs[queue];
-		if (rxq->descs != NULL)
+
+		if (rxq->descs)
 			q_map |= (1 << queue);
 	}
-
 	mvreg_write(pp, MVNETA_RXQ_CMD, q_map);
 }
 
@@ -703,14 +1293,14 @@ static void mvneta_port_down(struct mvneta_port *pp)
 	do {
 		if (count++ >= MVNETA_RX_DISABLE_TIMEOUT_MSEC) {
 			netdev_warn(pp->dev,
-				    "TIMEOUT for RX stopped ! rx_queue_cmd: 0x08%x\n",
+				    "TIMEOUT for RX stopped ! rx_queue_cmd: 0x%08x\n",
 				    val);
 			break;
 		}
 		mdelay(1);
 
 		val = mvreg_read(pp, MVNETA_RXQ_CMD);
-	} while (val & 0xff);
+	} while (val & MVNETA_RXQ_ENABLE_MASK);
 
 	/* Stop Tx port activity. Check port Tx activity. Issue stop
 	 * command for active channels only
@@ -735,14 +1325,14 @@ static void mvneta_port_down(struct mvneta_port *pp)
 		/* Check TX Command reg that all Txqs are stopped */
 		val = mvreg_read(pp, MVNETA_TXQ_CMD);
 
-	} while (val & 0xff);
+	} while (val & MVNETA_TXQ_ENABLE_MASK);
 
 	/* Double check to verify that TX FIFO is empty */
 	count = 0;
 	do {
 		if (count++ >= MVNETA_TX_FIFO_EMPTY_TIMEOUT) {
 			netdev_warn(pp->dev,
-				    "TX FIFO empty timeout status=0x08%x\n",
+				    "TX FIFO empty timeout status=0x%08x\n",
 				    val);
 			break;
 		}
@@ -835,6 +1425,43 @@ static void mvneta_set_other_mcast_table(struct mvneta_port *pp, int queue)
 		mvreg_write(pp, MVNETA_DA_FILT_OTH_MCAST + offset, val);
 }
 
+static void mvneta_percpu_unmask_interrupt(void *arg)
+{
+	struct mvneta_port *pp = arg;
+
+	/* All the queue are unmasked, but actually only the ones
+	 * mapped to this CPU will be unmasked
+	 */
+	mvreg_write(pp, MVNETA_INTR_NEW_MASK,
+		    MVNETA_RX_INTR_MASK_ALL |
+		    MVNETA_TX_INTR_MASK_ALL |
+		    MVNETA_MISCINTR_INTR_MASK);
+}
+
+static void mvneta_percpu_mask_interrupt(void *arg)
+{
+	struct mvneta_port *pp = arg;
+
+	/* All the queue are masked, but actually only the ones
+	 * mapped to this CPU will be masked
+	 */
+	mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0);
+	mvreg_write(pp, MVNETA_INTR_OLD_MASK, 0);
+	mvreg_write(pp, MVNETA_INTR_MISC_MASK, 0);
+}
+
+static void mvneta_percpu_clear_intr_cause(void *arg)
+{
+	struct mvneta_port *pp = arg;
+
+	/* All the queue are cleared, but actually only the ones
+	 * mapped to this CPU will be cleared
+	 */
+	mvreg_write(pp, MVNETA_INTR_NEW_CAUSE, 0);
+	mvreg_write(pp, MVNETA_INTR_MISC_CAUSE, 0);
+	mvreg_write(pp, MVNETA_INTR_OLD_CAUSE, 0);
+}
+
 /* This method sets defaults to the NETA port:
  *	Clears interrupt Cause and Mask registers.
  *	Clears all MAC tables.
@@ -849,28 +1476,50 @@ static void mvneta_defaults_set(struct mvneta_port *pp)
 	int cpu;
 	int queue;
 	u32 val;
+	int max_cpu = num_present_cpus();
 
 	/* Clear all Cause registers */
-	mvreg_write(pp, MVNETA_INTR_NEW_CAUSE, 0);
-	mvreg_write(pp, MVNETA_INTR_OLD_CAUSE, 0);
-	mvreg_write(pp, MVNETA_INTR_MISC_CAUSE, 0);
+	on_each_cpu(mvneta_percpu_clear_intr_cause, pp, true);
 
 	/* Mask all interrupts */
-	mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0);
-	mvreg_write(pp, MVNETA_INTR_OLD_MASK, 0);
-	mvreg_write(pp, MVNETA_INTR_MISC_MASK, 0);
+	on_each_cpu(mvneta_percpu_mask_interrupt, pp, true);
 	mvreg_write(pp, MVNETA_INTR_ENABLE, 0);
 
 	/* Enable MBUS Retry bit16 */
 	mvreg_write(pp, MVNETA_MBUS_RETRY, 0x20);
 
-	/* Set CPU queue access map - all CPUs have access to all RX
-	 * queues and to all TX queues
+	/* Set CPU queue access map. CPUs are assigned to the RX and
+	 * TX queues modulo their number. If there is only one TX
+	 * queue then it is assigned to the CPU associated to the
+	 * default RX queue.
 	 */
-	for (cpu = 0; cpu < CONFIG_NR_CPUS; cpu++)
-		mvreg_write(pp, MVNETA_CPU_MAP(cpu),
-			    (MVNETA_CPU_RXQ_ACCESS_ALL_MASK |
-			     MVNETA_CPU_TXQ_ACCESS_ALL_MASK));
+	for_each_present_cpu(cpu) {
+		int rxq_map = 0, txq_map = 0;
+		int rxq, txq;
+		if (!pp->neta_armada3700) {
+			for (rxq = 0; rxq < rxq_number; rxq++)
+				if ((rxq % max_cpu) == cpu)
+					rxq_map |= MVNETA_CPU_RXQ_ACCESS(rxq);
+
+			for (txq = 0; txq < txq_number; txq++)
+				if ((txq % max_cpu) == cpu)
+					txq_map |= MVNETA_CPU_TXQ_ACCESS(txq);
+
+			/* With only one TX queue we configure a special case
+			 * which will allow to get all the irq on a single
+			 * CPU
+			 */
+			if (txq_number == 1)
+				txq_map = (cpu == pp->rxq_def) ?
+					MVNETA_CPU_TXQ_ACCESS(0) : 0;
+
+		} else {
+			txq_map = MVNETA_CPU_TXQ_ACCESS_ALL_MASK;
+			rxq_map = MVNETA_CPU_RXQ_ACCESS_ALL_MASK;
+		}
+
+		mvreg_write(pp, MVNETA_CPU_MAP(cpu), rxq_map | txq_map);
+	}
 
 	/* Reset RX and TX DMAs */
 	mvreg_write(pp, MVNETA_PORT_RX_RESET, MVNETA_PORT_RX_DMA_RESET);
@@ -887,11 +1536,19 @@ static void mvneta_defaults_set(struct mvneta_port *pp)
 	mvreg_write(pp, MVNETA_PORT_RX_RESET, 0);
 
 	/* Set Port Acceleration Mode */
-	val = MVNETA_ACC_MODE_EXT;
+	if (pp->bm_priv)
+		/* HW buffer management + legacy parser */
+		val = MVNETA_ACC_MODE_EXT2;
+	else
+		/* SW buffer management + legacy parser */
+		val = MVNETA_ACC_MODE_EXT1;
 	mvreg_write(pp, MVNETA_ACC_MODE, val);
 
+	if (pp->bm_priv)
+		mvreg_write(pp, MVNETA_BM_ADDRESS, pp->bm_priv->bppi_phys_addr);
+
 	/* Update val of portCfg register accordingly with all RxQueue types */
-	val = MVNETA_PORT_CONFIG_DEFL_VALUE(rxq_def);
+	val = MVNETA_PORT_CONFIG_DEFL_VALUE(pp->rxq_def);
 	mvreg_write(pp, MVNETA_PORT_CONFIG, val);
 
 	val = 0;
@@ -904,13 +1561,22 @@ static void mvneta_defaults_set(struct mvneta_port *pp)
 	/* Default burst size */
 	val |= MVNETA_TX_BRST_SZ_MASK(MVNETA_SDMA_BRST_SIZE_16);
 	val |= MVNETA_RX_BRST_SZ_MASK(MVNETA_SDMA_BRST_SIZE_16);
+	val |= MVNETA_RX_NO_DATA_SWAP | MVNETA_TX_NO_DATA_SWAP;
 
-	val |= (MVNETA_RX_NO_DATA_SWAP | MVNETA_TX_NO_DATA_SWAP |
-		MVNETA_NO_DESC_SWAP);
+#if defined(__BIG_ENDIAN)
+	val |= MVNETA_DESC_SWAP;
+#endif
 
 	/* Assign port SDMA configuration */
 	mvreg_write(pp, MVNETA_SDMA_CONFIG, val);
 
+	/* Disable PHY polling in hardware, since we're using the
+	 * kernel phylib to do this.
+	 */
+	val = mvreg_read(pp, MVNETA_UNIT_CONTROL);
+	val &= ~MVNETA_PHY_POLLING_ENABLE;
+	mvreg_write(pp, MVNETA_UNIT_CONTROL, val);
+
 	mvneta_set_ucast_table(pp, -1);
 	mvneta_set_special_mcast_table(pp, -1);
 	mvneta_set_other_mcast_table(pp, -1);
@@ -919,6 +1585,8 @@ static void mvneta_defaults_set(struct mvneta_port *pp)
 	mvreg_write(pp, MVNETA_INTR_ENABLE,
 		    (MVNETA_RXQ_INTR_ENABLE_ALL_MASK
 		     | MVNETA_TXQ_INTR_ENABLE_ALL_MASK));
+
+	mvneta_mib_counters_clear(pp);
 }
 
 /* Set max sizes for tx queues */
@@ -992,8 +1660,8 @@ static void mvneta_set_ucast_addr(struct mvneta_port *pp, u8 last_nibble,
 }
 
 /* Set mac address */
-static void mvneta_mac_addr_set(struct mvneta_port *pp, unsigned char *addr,
-				int queue)
+static void mvneta_mac_addr_set(struct mvneta_port *pp,
+				const unsigned char *addr, int queue)
 {
 	unsigned int mac_h;
 	unsigned int mac_l;
@@ -1019,7 +1687,6 @@ static void mvneta_rx_pkts_coal_set(struct mvneta_port *pp,
 {
 	mvreg_write(pp, MVNETA_RXQ_THRESHOLD_REG(rxq->id),
 		    value | MVNETA_RXQ_NON_OCCUPIED(0));
-	rxq->pkts_coal = value;
 }
 
 /* Set the time delay in usec before RX interrupt will be generated by
@@ -1035,7 +1702,6 @@ static void mvneta_rx_time_coal_set(struct mvneta_port *pp,
 	val = (clk_rate / 1000000) * value;
 
 	mvreg_write(pp, MVNETA_RXQ_TIME_COAL_REG(rxq->id), val);
-	rxq->time_coal = value;
 }
 
 /* Set threshold for TX_DONE pkts coalescing */
@@ -1050,27 +1716,18 @@ static void mvneta_tx_done_pkts_coal_set(struct mvneta_port *pp,
 	val |= MVNETA_TXQ_SENT_THRESH_MASK(value);
 
 	mvreg_write(pp, MVNETA_TXQ_SIZE_REG(txq->id), val);
-
-	txq->done_pkts_coal = value;
 }
 
-/* Trigger tx done timer in MVNETA_TX_DONE_TIMER_PERIOD msecs */
-static void mvneta_add_tx_done_timer(struct mvneta_port *pp)
-{
-	if (test_and_set_bit(MVNETA_F_TX_DONE_TIMER_BIT, &pp->flags) == 0) {
-		pp->tx_done_timer.expires = jiffies +
-			msecs_to_jiffies(MVNETA_TX_DONE_TIMER_PERIOD);
-		add_timer(&pp->tx_done_timer);
-	}
-}
-
-
 /* Handle rx descriptor fill by setting buf_cookie and buf_phys_addr */
 static void mvneta_rx_desc_fill(struct mvneta_rx_desc *rx_desc,
-				u32 phys_addr, u32 cookie)
+				u32 phys_addr, void *virt_addr,
+				struct mvneta_rx_queue *rxq)
 {
-	rx_desc->buf_cookie = cookie;
+	int i;
+
 	rx_desc->buf_phys_addr = phys_addr;
+	i = rx_desc - rxq->descs;
+	rxq->buf_virt_addr[i] = virt_addr;
 }
 
 /* Decrement sent descriptors counter */
@@ -1124,7 +1781,7 @@ static int mvneta_txq_sent_desc_proc(struct mvneta_port *pp,
 }
 
 /* Set TXQ descriptors fields relevant for CSUM calculation */
-static u32 mvneta_txq_desc_csum(int l3_offs, int l3_proto,
+static u32 mvneta_txq_desc_csum(int l3_offs, __be16 l3_proto,
 				int ip_hdr_len, int l4_proto)
 {
 	u32 command;
@@ -1136,7 +1793,7 @@ static u32 mvneta_txq_desc_csum(int l3_offs, int l3_proto,
 	command =  l3_offs    << MVNETA_TX_L3_OFF_SHIFT;
 	command |= ip_hdr_len << MVNETA_TX_IP_HLEN_SHIFT;
 
-	if (l3_proto == swab16(ETH_P_IP))
+	if (l3_proto == htons(ETH_P_IP))
 		command |= MVNETA_TXD_IP_CSUM;
 	else
 		command |= MVNETA_TX_L3_IP6;
@@ -1156,14 +1813,13 @@ static u32 mvneta_txq_desc_csum(int l3_offs, int l3_proto,
 static void mvneta_rx_error(struct mvneta_port *pp,
 			    struct mvneta_rx_desc *rx_desc)
 {
+	struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
 	u32 status = rx_desc->status;
 
-	if (!mvneta_rxq_desc_is_first_last(rx_desc)) {
-		netdev_err(pp->dev,
-			   "bad rx status %08x (buffer oversize), size=%d\n",
-			   rx_desc->status, rx_desc->data_size);
-		return;
-	}
+	/* update per-cpu counter */
+	u64_stats_update_begin(&stats->syncp);
+	stats->rx_errors++;
+	u64_stats_update_end(&stats->syncp);
 
 	switch (status & MVNETA_RXD_ERR_CODE_MASK) {
 	case MVNETA_RXD_ERR_CRC:
@@ -1185,113 +1841,131 @@ static void mvneta_rx_error(struct mvneta_port *pp,
 	}
 }
 
-/* Handle RX checksum offload */
-static void mvneta_rx_csum(struct mvneta_port *pp,
-			   struct mvneta_rx_desc *rx_desc,
-			   struct sk_buff *skb)
+/* Handle RX checksum offload based on the descriptor's status */
+static int mvneta_rx_csum(struct mvneta_port *pp, u32 status)
 {
-	if ((rx_desc->status & MVNETA_RXD_L3_IP4) &&
-	    (rx_desc->status & MVNETA_RXD_L4_CSUM_OK)) {
-		skb->csum = 0;
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-		return;
-	}
+	if ((pp->dev->features & NETIF_F_RXCSUM) &&
+	    (status & MVNETA_RXD_L3_IP4) &&
+	    (status & MVNETA_RXD_L4_CSUM_OK))
+		return CHECKSUM_UNNECESSARY;
 
-	skb->ip_summed = CHECKSUM_NONE;
+	return CHECKSUM_NONE;
 }
 
-/* Return tx queue pointer (find last set bit) according to causeTxDone reg */
+/* Return tx queue pointer (find last set bit) according to <cause> returned
+ * form tx_done reg. <cause> must not be null. The return value is always a
+ * valid queue for matching the first one found in <cause>.
+ */
 static struct mvneta_tx_queue *mvneta_tx_done_policy(struct mvneta_port *pp,
 						     u32 cause)
 {
 	int queue = fls(cause) - 1;
 
-	return (queue < 0 || queue >= txq_number) ? NULL : &pp->txqs[queue];
+	return &pp->txqs[queue];
 }
 
 /* Free tx queue skbuffs */
 static void mvneta_txq_bufs_free(struct mvneta_port *pp,
-				 struct mvneta_tx_queue *txq, int num)
+				 struct mvneta_tx_queue *txq, int num,
+				 struct netdev_queue *nq, bool napi)
 {
+	unsigned int bytes_compl = 0, pkts_compl = 0;
+	struct xdp_frame_bulk bq;
 	int i;
 
+	xdp_frame_bulk_init(&bq);
+
+	rcu_read_lock(); /* need for xdp_return_frame_bulk */
+
 	for (i = 0; i < num; i++) {
+		struct mvneta_tx_buf *buf = &txq->buf[txq->txq_get_index];
 		struct mvneta_tx_desc *tx_desc = txq->descs +
 			txq->txq_get_index;
-		struct sk_buff *skb = txq->tx_skb[txq->txq_get_index];
 
 		mvneta_txq_inc_get(txq);
 
-		if (!skb)
-			continue;
-
-		dma_unmap_single(pp->dev->dev.parent, tx_desc->buf_phys_addr,
-				 tx_desc->data_size, DMA_TO_DEVICE);
-		dev_kfree_skb_any(skb);
+		if (buf->type == MVNETA_TYPE_XDP_NDO ||
+		    buf->type == MVNETA_TYPE_SKB)
+			dma_unmap_single(pp->dev->dev.parent,
+					 tx_desc->buf_phys_addr,
+					 tx_desc->data_size, DMA_TO_DEVICE);
+		if ((buf->type == MVNETA_TYPE_TSO ||
+		     buf->type == MVNETA_TYPE_SKB) && buf->skb) {
+			bytes_compl += buf->skb->len;
+			pkts_compl++;
+			dev_kfree_skb_any(buf->skb);
+		} else if ((buf->type == MVNETA_TYPE_XDP_TX ||
+			    buf->type == MVNETA_TYPE_XDP_NDO) && buf->xdpf) {
+			if (napi && buf->type == MVNETA_TYPE_XDP_TX)
+				xdp_return_frame_rx_napi(buf->xdpf);
+			else
+				xdp_return_frame_bulk(buf->xdpf, &bq);
+		}
 	}
+	xdp_flush_frame_bulk(&bq);
+
+	rcu_read_unlock();
+
+	netdev_tx_completed_queue(nq, pkts_compl, bytes_compl);
 }
 
 /* Handle end of transmission */
-static int mvneta_txq_done(struct mvneta_port *pp,
+static void mvneta_txq_done(struct mvneta_port *pp,
 			   struct mvneta_tx_queue *txq)
 {
 	struct netdev_queue *nq = netdev_get_tx_queue(pp->dev, txq->id);
 	int tx_done;
 
 	tx_done = mvneta_txq_sent_desc_proc(pp, txq);
-	if (tx_done == 0)
-		return tx_done;
-	mvneta_txq_bufs_free(pp, txq, tx_done);
+	if (!tx_done)
+		return;
+
+	mvneta_txq_bufs_free(pp, txq, tx_done, nq, true);
 
 	txq->count -= tx_done;
 
 	if (netif_tx_queue_stopped(nq)) {
-		if (txq->size - txq->count >= MAX_SKB_FRAGS + 1)
+		if (txq->count <= txq->tx_wake_threshold)
 			netif_tx_wake_queue(nq);
 	}
-
-	return tx_done;
 }
 
-/* Refill processing */
+/* Refill processing for SW buffer management */
+/* Allocate page per descriptor */
 static int mvneta_rx_refill(struct mvneta_port *pp,
-			    struct mvneta_rx_desc *rx_desc)
-
+			    struct mvneta_rx_desc *rx_desc,
+			    struct mvneta_rx_queue *rxq,
+			    gfp_t gfp_mask)
 {
 	dma_addr_t phys_addr;
-	struct sk_buff *skb;
+	struct page *page;
 
-	skb = netdev_alloc_skb(pp->dev, pp->pkt_size);
-	if (!skb)
-		return -ENOMEM;
-
-	phys_addr = dma_map_single(pp->dev->dev.parent, skb->head,
-				   MVNETA_RX_BUF_SIZE(pp->pkt_size),
-				   DMA_FROM_DEVICE);
-	if (unlikely(dma_mapping_error(pp->dev->dev.parent, phys_addr))) {
-		dev_kfree_skb(skb);
+	page = page_pool_alloc_pages(rxq->page_pool,
+				     gfp_mask | __GFP_NOWARN);
+	if (!page)
 		return -ENOMEM;
-	}
 
-	mvneta_rx_desc_fill(rx_desc, phys_addr, (u32)skb);
+	phys_addr = page_pool_get_dma_addr(page) + pp->rx_offset_correction;
+	mvneta_rx_desc_fill(rx_desc, phys_addr, page, rxq);
 
 	return 0;
 }
 
 /* Handle tx checksum */
-static u32 mvneta_skb_tx_csum(struct mvneta_port *pp, struct sk_buff *skb)
+static u32 mvneta_skb_tx_csum(struct sk_buff *skb)
 {
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		int ip_hdr_len = 0;
+		__be16 l3_proto = vlan_get_protocol(skb);
 		u8 l4_proto;
 
-		if (skb->protocol == htons(ETH_P_IP)) {
+		if (l3_proto == htons(ETH_P_IP)) {
 			struct iphdr *ip4h = ip_hdr(skb);
 
 			/* Calculate IPv4 checksum and L4 checksum */
 			ip_hdr_len = ip4h->ihl;
 			l4_proto = ip4h->protocol;
-		} else if (skb->protocol == htons(ETH_P_IPV6)) {
+		} else if (l3_proto == htons(ETH_P_IPV6)) {
 			struct ipv6hdr *ip6h = ipv6_hdr(skb);
 
 			/* Read l4_protocol from one of IPv6 extra headers */
@@ -1302,23 +1976,12 @@ static u32 mvneta_skb_tx_csum(struct mvneta_port *pp, struct sk_buff *skb)
 			return MVNETA_TX_L4_CSUM_NOT;
 
 		return mvneta_txq_desc_csum(skb_network_offset(skb),
-				skb->protocol, ip_hdr_len, l4_proto);
+					    l3_proto, ip_hdr_len, l4_proto);
 	}
 
 	return MVNETA_TX_L4_CSUM_NOT;
 }
 
-/* Returns rx queue pointer (find last set bit) according to causeRxTx
- * value
- */
-static struct mvneta_rx_queue *mvneta_rx_policy(struct mvneta_port *pp,
-						u32 cause)
-{
-	int queue = fls(cause >> 8) - 1;
-
-	return (queue < 0 || queue >= rxq_number) ? NULL : &pp->rxqs[queue];
-}
-
 /* Drop packets received by the RXQ and free buffers */
 static void mvneta_rxq_drop_pkts(struct mvneta_port *pp,
 				 struct mvneta_rx_queue *rxq)
@@ -1326,25 +1989,562 @@ static void mvneta_rxq_drop_pkts(struct mvneta_port *pp,
 	int rx_done, i;
 
 	rx_done = mvneta_rxq_busy_desc_num_get(pp, rxq);
+	if (rx_done)
+		mvneta_rxq_desc_num_update(pp, rxq, rx_done, rx_done);
+
+	if (pp->bm_priv) {
+		for (i = 0; i < rx_done; i++) {
+			struct mvneta_rx_desc *rx_desc =
+						  mvneta_rxq_next_desc_get(rxq);
+			u8 pool_id = MVNETA_RX_GET_BM_POOL_ID(rx_desc);
+			struct mvneta_bm_pool *bm_pool;
+
+			bm_pool = &pp->bm_priv->bm_pools[pool_id];
+			/* Return dropped buffer to the pool */
+			mvneta_bm_pool_put_bp(pp->bm_priv, bm_pool,
+					      rx_desc->buf_phys_addr);
+		}
+		return;
+	}
+
 	for (i = 0; i < rxq->size; i++) {
 		struct mvneta_rx_desc *rx_desc = rxq->descs + i;
-		struct sk_buff *skb = (struct sk_buff *)rx_desc->buf_cookie;
+		void *data = rxq->buf_virt_addr[i];
+		if (!data || !(rx_desc->buf_phys_addr))
+			continue;
 
-		dev_kfree_skb_any(skb);
-		dma_unmap_single(pp->dev->dev.parent, rx_desc->buf_phys_addr,
-				 rx_desc->data_size, DMA_FROM_DEVICE);
+		page_pool_put_full_page(rxq->page_pool, data, false);
 	}
+	if (xdp_rxq_info_is_reg(&rxq->xdp_rxq))
+		xdp_rxq_info_unreg(&rxq->xdp_rxq);
+	page_pool_destroy(rxq->page_pool);
+	rxq->page_pool = NULL;
+}
 
-	if (rx_done)
-		mvneta_rxq_desc_num_update(pp, rxq, rx_done, rx_done);
+static void
+mvneta_update_stats(struct mvneta_port *pp,
+		    struct mvneta_stats *ps)
+{
+	struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
+
+	u64_stats_update_begin(&stats->syncp);
+	stats->es.ps.rx_packets += ps->rx_packets;
+	stats->es.ps.rx_bytes += ps->rx_bytes;
+	/* xdp */
+	stats->es.ps.xdp_redirect += ps->xdp_redirect;
+	stats->es.ps.xdp_pass += ps->xdp_pass;
+	stats->es.ps.xdp_drop += ps->xdp_drop;
+	u64_stats_update_end(&stats->syncp);
 }
 
-/* Main rx processing */
-static int mvneta_rx(struct mvneta_port *pp, int rx_todo,
-		     struct mvneta_rx_queue *rxq)
+static inline
+int mvneta_rx_refill_queue(struct mvneta_port *pp, struct mvneta_rx_queue *rxq)
 {
+	struct mvneta_rx_desc *rx_desc;
+	int curr_desc = rxq->first_to_refill;
+	int i;
+
+	for (i = 0; (i < rxq->refill_num) && (i < 64); i++) {
+		rx_desc = rxq->descs + curr_desc;
+		if (!(rx_desc->buf_phys_addr)) {
+			if (mvneta_rx_refill(pp, rx_desc, rxq, GFP_ATOMIC)) {
+				struct mvneta_pcpu_stats *stats;
+
+				pr_err("Can't refill queue %d. Done %d from %d\n",
+				       rxq->id, i, rxq->refill_num);
+
+				stats = this_cpu_ptr(pp->stats);
+				u64_stats_update_begin(&stats->syncp);
+				stats->es.refill_error++;
+				u64_stats_update_end(&stats->syncp);
+				break;
+			}
+		}
+		curr_desc = MVNETA_QUEUE_NEXT_DESC(rxq, curr_desc);
+	}
+	rxq->refill_num -= i;
+	rxq->first_to_refill = curr_desc;
+
+	return i;
+}
+
+static void
+mvneta_xdp_put_buff(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
+		    struct xdp_buff *xdp, int sync_len)
+{
+	struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
+	int i;
+
+	if (likely(!xdp_buff_has_frags(xdp)))
+		goto out;
+
+	for (i = 0; i < sinfo->nr_frags; i++)
+		page_pool_put_full_page(rxq->page_pool,
+					skb_frag_page(&sinfo->frags[i]), true);
+
+out:
+	page_pool_put_page(rxq->page_pool, virt_to_head_page(xdp->data),
+			   sync_len, true);
+}
+
+static int
+mvneta_xdp_submit_frame(struct mvneta_port *pp, struct mvneta_tx_queue *txq,
+			struct xdp_frame *xdpf, int *nxmit_byte, bool dma_map)
+{
+	struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf);
+	struct device *dev = pp->dev->dev.parent;
+	struct mvneta_tx_desc *tx_desc;
+	int i, num_frames = 1;
+	struct page *page;
+
+	if (unlikely(xdp_frame_has_frags(xdpf)))
+		num_frames += sinfo->nr_frags;
+
+	if (txq->count + num_frames >= txq->size)
+		return MVNETA_XDP_DROPPED;
+
+	for (i = 0; i < num_frames; i++) {
+		struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index];
+		skb_frag_t *frag = NULL;
+		int len = xdpf->len;
+		dma_addr_t dma_addr;
+
+		if (unlikely(i)) { /* paged area */
+			frag = &sinfo->frags[i - 1];
+			len = skb_frag_size(frag);
+		}
+
+		tx_desc = mvneta_txq_next_desc_get(txq);
+		if (dma_map) {
+			/* ndo_xdp_xmit */
+			void *data;
+
+			data = unlikely(frag) ? skb_frag_address(frag)
+					      : xdpf->data;
+			dma_addr = dma_map_single(dev, data, len,
+						  DMA_TO_DEVICE);
+			if (dma_mapping_error(dev, dma_addr)) {
+				mvneta_txq_desc_put(txq);
+				goto unmap;
+			}
+
+			buf->type = MVNETA_TYPE_XDP_NDO;
+		} else {
+			page = unlikely(frag) ? skb_frag_page(frag)
+					      : virt_to_page(xdpf->data);
+			dma_addr = page_pool_get_dma_addr(page);
+			if (unlikely(frag))
+				dma_addr += skb_frag_off(frag);
+			else
+				dma_addr += sizeof(*xdpf) + xdpf->headroom;
+			dma_sync_single_for_device(dev, dma_addr, len,
+						   DMA_BIDIRECTIONAL);
+			buf->type = MVNETA_TYPE_XDP_TX;
+		}
+		buf->xdpf = unlikely(i) ? NULL : xdpf;
+
+		tx_desc->command = unlikely(i) ? 0 : MVNETA_TXD_F_DESC;
+		tx_desc->buf_phys_addr = dma_addr;
+		tx_desc->data_size = len;
+		*nxmit_byte += len;
+
+		mvneta_txq_inc_put(txq);
+	}
+	/*last descriptor */
+	tx_desc->command |= MVNETA_TXD_L_DESC | MVNETA_TXD_Z_PAD;
+
+	txq->pending += num_frames;
+	txq->count += num_frames;
+
+	return MVNETA_XDP_TX;
+
+unmap:
+	for (i--; i >= 0; i--) {
+		mvneta_txq_desc_put(txq);
+		tx_desc = txq->descs + txq->next_desc_to_proc;
+		dma_unmap_single(dev, tx_desc->buf_phys_addr,
+				 tx_desc->data_size,
+				 DMA_TO_DEVICE);
+	}
+
+	return MVNETA_XDP_DROPPED;
+}
+
+static int
+mvneta_xdp_xmit_back(struct mvneta_port *pp, struct xdp_buff *xdp)
+{
+	struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
+	struct mvneta_tx_queue *txq;
+	struct netdev_queue *nq;
+	int cpu, nxmit_byte = 0;
+	struct xdp_frame *xdpf;
+	u32 ret;
+
+	xdpf = xdp_convert_buff_to_frame(xdp);
+	if (unlikely(!xdpf))
+		return MVNETA_XDP_DROPPED;
+
+	cpu = smp_processor_id();
+	txq = &pp->txqs[cpu % txq_number];
+	nq = netdev_get_tx_queue(pp->dev, txq->id);
+
+	__netif_tx_lock(nq, cpu);
+	ret = mvneta_xdp_submit_frame(pp, txq, xdpf, &nxmit_byte, false);
+	if (ret == MVNETA_XDP_TX) {
+		u64_stats_update_begin(&stats->syncp);
+		stats->es.ps.tx_bytes += nxmit_byte;
+		stats->es.ps.tx_packets++;
+		stats->es.ps.xdp_tx++;
+		u64_stats_update_end(&stats->syncp);
+
+		mvneta_txq_pend_desc_add(pp, txq, 0);
+	} else {
+		u64_stats_update_begin(&stats->syncp);
+		stats->es.ps.xdp_tx_err++;
+		u64_stats_update_end(&stats->syncp);
+	}
+	__netif_tx_unlock(nq);
+
+	return ret;
+}
+
+static int
+mvneta_xdp_xmit(struct net_device *dev, int num_frame,
+		struct xdp_frame **frames, u32 flags)
+{
+	struct mvneta_port *pp = netdev_priv(dev);
+	struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
+	int i, nxmit_byte = 0, nxmit = 0;
+	int cpu = smp_processor_id();
+	struct mvneta_tx_queue *txq;
+	struct netdev_queue *nq;
+	u32 ret;
+
+	if (unlikely(test_bit(__MVNETA_DOWN, &pp->state)))
+		return -ENETDOWN;
+
+	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+		return -EINVAL;
+
+	txq = &pp->txqs[cpu % txq_number];
+	nq = netdev_get_tx_queue(pp->dev, txq->id);
+
+	__netif_tx_lock(nq, cpu);
+	for (i = 0; i < num_frame; i++) {
+		ret = mvneta_xdp_submit_frame(pp, txq, frames[i], &nxmit_byte,
+					      true);
+		if (ret != MVNETA_XDP_TX)
+			break;
+
+		nxmit++;
+	}
+
+	if (unlikely(flags & XDP_XMIT_FLUSH))
+		mvneta_txq_pend_desc_add(pp, txq, 0);
+	__netif_tx_unlock(nq);
+
+	u64_stats_update_begin(&stats->syncp);
+	stats->es.ps.tx_bytes += nxmit_byte;
+	stats->es.ps.tx_packets += nxmit;
+	stats->es.ps.xdp_xmit += nxmit;
+	stats->es.ps.xdp_xmit_err += num_frame - nxmit;
+	u64_stats_update_end(&stats->syncp);
+
+	return nxmit;
+}
+
+static int
+mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
+	       struct bpf_prog *prog, struct xdp_buff *xdp,
+	       u32 frame_sz, struct mvneta_stats *stats)
+{
+	unsigned int len, data_len, sync;
+	u32 ret, act;
+
+	len = xdp->data_end - xdp->data_hard_start - pp->rx_offset_correction;
+	data_len = xdp->data_end - xdp->data;
+	act = bpf_prog_run_xdp(prog, xdp);
+
+	/* Due xdp_adjust_tail: DMA sync for_device cover max len CPU touch */
+	sync = xdp->data_end - xdp->data_hard_start - pp->rx_offset_correction;
+	sync = max(sync, len);
+
+	switch (act) {
+	case XDP_PASS:
+		stats->xdp_pass++;
+		return MVNETA_XDP_PASS;
+	case XDP_REDIRECT: {
+		int err;
+
+		err = xdp_do_redirect(pp->dev, xdp, prog);
+		if (unlikely(err)) {
+			mvneta_xdp_put_buff(pp, rxq, xdp, sync);
+			ret = MVNETA_XDP_DROPPED;
+		} else {
+			ret = MVNETA_XDP_REDIR;
+			stats->xdp_redirect++;
+		}
+		break;
+	}
+	case XDP_TX:
+		ret = mvneta_xdp_xmit_back(pp, xdp);
+		if (ret != MVNETA_XDP_TX)
+			mvneta_xdp_put_buff(pp, rxq, xdp, sync);
+		break;
+	default:
+		bpf_warn_invalid_xdp_action(pp->dev, prog, act);
+		fallthrough;
+	case XDP_ABORTED:
+		trace_xdp_exception(pp->dev, prog, act);
+		fallthrough;
+	case XDP_DROP:
+		mvneta_xdp_put_buff(pp, rxq, xdp, sync);
+		ret = MVNETA_XDP_DROPPED;
+		stats->xdp_drop++;
+		break;
+	}
+
+	stats->rx_bytes += frame_sz + xdp->data_end - xdp->data - data_len;
+	stats->rx_packets++;
+
+	return ret;
+}
+
+static void
+mvneta_swbm_rx_frame(struct mvneta_port *pp,
+		     struct mvneta_rx_desc *rx_desc,
+		     struct mvneta_rx_queue *rxq,
+		     struct xdp_buff *xdp, int *size,
+		     struct page *page)
+{
+	unsigned char *data = page_address(page);
+	int data_len = -MVNETA_MH_SIZE, len;
+	struct net_device *dev = pp->dev;
+	enum dma_data_direction dma_dir;
+
+	if (*size > MVNETA_MAX_RX_BUF_SIZE) {
+		len = MVNETA_MAX_RX_BUF_SIZE;
+		data_len += len;
+	} else {
+		len = *size;
+		data_len += len - ETH_FCS_LEN;
+	}
+	*size = *size - len;
+
+	dma_dir = page_pool_get_dma_dir(rxq->page_pool);
+	dma_sync_single_for_cpu(dev->dev.parent,
+				rx_desc->buf_phys_addr,
+				len, dma_dir);
+
+	rx_desc->buf_phys_addr = 0;
+
+	/* Prefetch header */
+	prefetch(data);
+	xdp_buff_clear_frags_flag(xdp);
+	xdp_prepare_buff(xdp, data, pp->rx_offset_correction + MVNETA_MH_SIZE,
+			 data_len, true);
+}
+
+static void
+mvneta_swbm_add_rx_fragment(struct mvneta_port *pp,
+			    struct mvneta_rx_desc *rx_desc,
+			    struct mvneta_rx_queue *rxq,
+			    struct xdp_buff *xdp, int *size,
+			    struct page *page)
+{
+	struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
+	struct net_device *dev = pp->dev;
+	enum dma_data_direction dma_dir;
+	int data_len, len;
+
+	if (*size > MVNETA_MAX_RX_BUF_SIZE) {
+		len = MVNETA_MAX_RX_BUF_SIZE;
+		data_len = len;
+	} else {
+		len = *size;
+		data_len = len - ETH_FCS_LEN;
+	}
+	dma_dir = page_pool_get_dma_dir(rxq->page_pool);
+	dma_sync_single_for_cpu(dev->dev.parent,
+				rx_desc->buf_phys_addr,
+				len, dma_dir);
+	rx_desc->buf_phys_addr = 0;
+
+	if (!xdp_buff_has_frags(xdp))
+		sinfo->nr_frags = 0;
+
+	if (data_len > 0 && sinfo->nr_frags < MAX_SKB_FRAGS) {
+		skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags++];
+
+		skb_frag_fill_page_desc(frag, page,
+					pp->rx_offset_correction, data_len);
+
+		if (!xdp_buff_has_frags(xdp)) {
+			sinfo->xdp_frags_size = *size;
+			xdp_buff_set_frags_flag(xdp);
+		}
+		if (page_is_pfmemalloc(page))
+			xdp_buff_set_frag_pfmemalloc(xdp);
+	} else {
+		page_pool_put_full_page(rxq->page_pool, page, true);
+	}
+	*size -= len;
+}
+
+static struct sk_buff *
+mvneta_swbm_build_skb(struct mvneta_port *pp, struct page_pool *pool,
+		      struct xdp_buff *xdp, u32 desc_status)
+{
+	struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
+	u32 metasize = xdp->data - xdp->data_meta;
+	struct sk_buff *skb;
+	u8 num_frags;
+
+	if (unlikely(xdp_buff_has_frags(xdp)))
+		num_frags = sinfo->nr_frags;
+
+	skb = build_skb(xdp->data_hard_start, PAGE_SIZE);
+	if (!skb)
+		return ERR_PTR(-ENOMEM);
+
+	skb_mark_for_recycle(skb);
+
+	skb_reserve(skb, xdp->data - xdp->data_hard_start);
+	skb_put(skb, xdp->data_end - xdp->data);
+	if (metasize)
+		skb_metadata_set(skb, metasize);
+	skb->ip_summed = mvneta_rx_csum(pp, desc_status);
+
+	if (unlikely(xdp_buff_has_frags(xdp)))
+		xdp_update_skb_frags_info(skb, num_frags, sinfo->xdp_frags_size,
+					  num_frags * xdp->frame_sz,
+					  xdp_buff_get_skb_flags(xdp));
+
+	return skb;
+}
+
+/* Main rx processing when using software buffer management */
+static int mvneta_rx_swbm(struct napi_struct *napi,
+			  struct mvneta_port *pp, int budget,
+			  struct mvneta_rx_queue *rxq)
+{
+	int rx_proc = 0, rx_todo, refill, size = 0;
 	struct net_device *dev = pp->dev;
-	int rx_done, rx_filled;
+	struct mvneta_stats ps = {};
+	struct bpf_prog *xdp_prog;
+	u32 desc_status, frame_sz;
+	struct xdp_buff xdp_buf;
+
+	xdp_init_buff(&xdp_buf, PAGE_SIZE, &rxq->xdp_rxq);
+	xdp_buf.data_hard_start = NULL;
+
+	/* Get number of received packets */
+	rx_todo = mvneta_rxq_busy_desc_num_get(pp, rxq);
+
+	xdp_prog = READ_ONCE(pp->xdp_prog);
+
+	/* Fairness NAPI loop */
+	while (rx_proc < budget && rx_proc < rx_todo) {
+		struct mvneta_rx_desc *rx_desc = mvneta_rxq_next_desc_get(rxq);
+		u32 rx_status, index;
+		struct sk_buff *skb;
+		struct page *page;
+
+		index = rx_desc - rxq->descs;
+		page = (struct page *)rxq->buf_virt_addr[index];
+
+		rx_status = rx_desc->status;
+		rx_proc++;
+		rxq->refill_num++;
+
+		if (rx_status & MVNETA_RXD_FIRST_DESC) {
+			/* Check errors only for FIRST descriptor */
+			if (rx_status & MVNETA_RXD_ERR_SUMMARY) {
+				mvneta_rx_error(pp, rx_desc);
+				goto next;
+			}
+
+			size = rx_desc->data_size;
+			frame_sz = size - ETH_FCS_LEN;
+			desc_status = rx_status;
+
+			mvneta_swbm_rx_frame(pp, rx_desc, rxq, &xdp_buf,
+					     &size, page);
+		} else {
+			if (unlikely(!xdp_buf.data_hard_start)) {
+				rx_desc->buf_phys_addr = 0;
+				page_pool_put_full_page(rxq->page_pool, page,
+							true);
+				goto next;
+			}
+
+			mvneta_swbm_add_rx_fragment(pp, rx_desc, rxq, &xdp_buf,
+						    &size, page);
+		} /* Middle or Last descriptor */
+
+		if (!(rx_status & MVNETA_RXD_LAST_DESC))
+			/* no last descriptor this time */
+			continue;
+
+		if (size) {
+			mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1);
+			goto next;
+		}
+
+		if (xdp_prog &&
+		    mvneta_run_xdp(pp, rxq, xdp_prog, &xdp_buf, frame_sz, &ps))
+			goto next;
+
+		skb = mvneta_swbm_build_skb(pp, rxq->page_pool, &xdp_buf, desc_status);
+		if (IS_ERR(skb)) {
+			struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
+
+			mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1);
+
+			u64_stats_update_begin(&stats->syncp);
+			stats->es.skb_alloc_error++;
+			stats->rx_dropped++;
+			u64_stats_update_end(&stats->syncp);
+
+			goto next;
+		}
+
+		ps.rx_bytes += skb->len;
+		ps.rx_packets++;
+
+		skb->protocol = eth_type_trans(skb, dev);
+		napi_gro_receive(napi, skb);
+next:
+		xdp_buf.data_hard_start = NULL;
+	}
+
+	if (xdp_buf.data_hard_start)
+		mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1);
+
+	if (ps.xdp_redirect)
+		xdp_do_flush();
+
+	if (ps.rx_packets)
+		mvneta_update_stats(pp, &ps);
+
+	/* return some buffers to hardware queue, one at a time is too slow */
+	refill = mvneta_rx_refill_queue(pp, rxq);
+
+	/* Update rxq management counters */
+	mvneta_rxq_desc_num_update(pp, rxq, rx_proc, refill);
+
+	return ps.rx_packets;
+}
+
+/* Main rx processing when using hardware buffer management */
+static int mvneta_rx_hwbm(struct napi_struct *napi,
+			  struct mvneta_port *pp, int rx_todo,
+			  struct mvneta_rx_queue *rxq)
+{
+	struct net_device *dev = pp->dev;
+	int rx_done;
+	u32 rcvd_pkts = 0;
+	u32 rcvd_bytes = 0;
 
 	/* Get number of received packets */
 	rx_done = mvneta_rxq_busy_desc_num_get(pp, rxq);
@@ -1353,78 +2553,328 @@ static int mvneta_rx(struct mvneta_port *pp, int rx_todo,
 		rx_todo = rx_done;
 
 	rx_done = 0;
-	rx_filled = 0;
 
 	/* Fairness NAPI loop */
 	while (rx_done < rx_todo) {
 		struct mvneta_rx_desc *rx_desc = mvneta_rxq_next_desc_get(rxq);
+		struct mvneta_bm_pool *bm_pool = NULL;
 		struct sk_buff *skb;
-		u32 rx_status;
+		unsigned char *data;
+		dma_addr_t phys_addr;
+		u32 rx_status, frag_size;
 		int rx_bytes, err;
+		u8 pool_id;
 
-		prefetch(rx_desc);
 		rx_done++;
-		rx_filled++;
 		rx_status = rx_desc->status;
-		skb = (struct sk_buff *)rx_desc->buf_cookie;
+		rx_bytes = rx_desc->data_size - (ETH_FCS_LEN + MVNETA_MH_SIZE);
+		data = (u8 *)(uintptr_t)rx_desc->buf_cookie;
+		phys_addr = rx_desc->buf_phys_addr;
+		pool_id = MVNETA_RX_GET_BM_POOL_ID(rx_desc);
+		bm_pool = &pp->bm_priv->bm_pools[pool_id];
 
-		if (!mvneta_rxq_desc_is_first_last(rx_desc) ||
+		if (!mvneta_rxq_desc_is_first_last(rx_status) ||
 		    (rx_status & MVNETA_RXD_ERR_SUMMARY)) {
-			dev->stats.rx_errors++;
+err_drop_frame_ret_pool:
+			/* Return the buffer to the pool */
+			mvneta_bm_pool_put_bp(pp->bm_priv, bm_pool,
+					      rx_desc->buf_phys_addr);
+err_drop_frame:
 			mvneta_rx_error(pp, rx_desc);
-			mvneta_rx_desc_fill(rx_desc, rx_desc->buf_phys_addr,
-					    (u32)skb);
+			/* leave the descriptor untouched */
 			continue;
 		}
 
-		dma_unmap_single(pp->dev->dev.parent, rx_desc->buf_phys_addr,
-				 rx_desc->data_size, DMA_FROM_DEVICE);
+		if (rx_bytes <= rx_copybreak) {
+			/* better copy a small frame and not unmap the DMA region */
+			skb = netdev_alloc_skb_ip_align(dev, rx_bytes);
+			if (unlikely(!skb))
+				goto err_drop_frame_ret_pool;
 
-		rx_bytes = rx_desc->data_size -
-			(ETH_FCS_LEN + MVNETA_MH_SIZE);
-		u64_stats_update_begin(&pp->rx_stats.syncp);
-		pp->rx_stats.packets++;
-		pp->rx_stats.bytes += rx_bytes;
-		u64_stats_update_end(&pp->rx_stats.syncp);
+			dma_sync_single_range_for_cpu(&pp->bm_priv->pdev->dev,
+			                              rx_desc->buf_phys_addr,
+			                              MVNETA_MH_SIZE + NET_SKB_PAD,
+			                              rx_bytes,
+			                              DMA_FROM_DEVICE);
+			skb_put_data(skb, data + MVNETA_MH_SIZE + NET_SKB_PAD,
+				     rx_bytes);
 
-		/* Linux processing */
-		skb_reserve(skb, MVNETA_MH_SIZE);
-		skb_put(skb, rx_bytes);
+			skb->protocol = eth_type_trans(skb, dev);
+			skb->ip_summed = mvneta_rx_csum(pp, rx_status);
+			napi_gro_receive(napi, skb);
 
-		skb->protocol = eth_type_trans(skb, dev);
+			rcvd_pkts++;
+			rcvd_bytes += rx_bytes;
 
-		mvneta_rx_csum(pp, rx_desc, skb);
+			/* Return the buffer to the pool */
+			mvneta_bm_pool_put_bp(pp->bm_priv, bm_pool,
+					      rx_desc->buf_phys_addr);
 
-		napi_gro_receive(&pp->napi, skb);
+			/* leave the descriptor and buffer untouched */
+			continue;
+		}
 
 		/* Refill processing */
-		err = mvneta_rx_refill(pp, rx_desc);
+		err = hwbm_pool_refill(&bm_pool->hwbm_pool, GFP_ATOMIC);
 		if (err) {
-			netdev_err(pp->dev, "Linux processing - Can't refill\n");
-			rxq->missed++;
-			rx_filled--;
+			struct mvneta_pcpu_stats *stats;
+
+			netdev_err(dev, "Linux processing - Can't refill\n");
+
+			stats = this_cpu_ptr(pp->stats);
+			u64_stats_update_begin(&stats->syncp);
+			stats->es.refill_error++;
+			u64_stats_update_end(&stats->syncp);
+
+			goto err_drop_frame_ret_pool;
 		}
+
+		frag_size = bm_pool->hwbm_pool.frag_size;
+
+		skb = build_skb(data, frag_size > PAGE_SIZE ? 0 : frag_size);
+
+		/* After refill old buffer has to be unmapped regardless
+		 * the skb is successfully built or not.
+		 */
+		dma_unmap_single(&pp->bm_priv->pdev->dev, phys_addr,
+				 bm_pool->buf_size, DMA_FROM_DEVICE);
+		if (!skb)
+			goto err_drop_frame;
+
+		rcvd_pkts++;
+		rcvd_bytes += rx_bytes;
+
+		/* Linux processing */
+		skb_reserve(skb, MVNETA_MH_SIZE + NET_SKB_PAD);
+		skb_put(skb, rx_bytes);
+
+		skb->protocol = eth_type_trans(skb, dev);
+		skb->ip_summed = mvneta_rx_csum(pp, rx_status);
+
+		napi_gro_receive(napi, skb);
+	}
+
+	if (rcvd_pkts) {
+		struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
+
+		u64_stats_update_begin(&stats->syncp);
+		stats->es.ps.rx_packets += rcvd_pkts;
+		stats->es.ps.rx_bytes += rcvd_bytes;
+		u64_stats_update_end(&stats->syncp);
 	}
 
 	/* Update rxq management counters */
-	mvneta_rxq_desc_num_update(pp, rxq, rx_done, rx_filled);
+	mvneta_rxq_desc_num_update(pp, rxq, rx_done, rx_done);
 
 	return rx_done;
 }
 
+static void mvneta_free_tso_hdrs(struct mvneta_port *pp,
+				 struct mvneta_tx_queue *txq)
+{
+	struct device *dev = pp->dev->dev.parent;
+	int i;
+
+	for (i = 0; i < MVNETA_MAX_TSO_PAGES; i++) {
+		if (txq->tso_hdrs[i]) {
+			dma_free_coherent(dev, MVNETA_TSO_PAGE_SIZE,
+					  txq->tso_hdrs[i],
+					  txq->tso_hdrs_phys[i]);
+			txq->tso_hdrs[i] = NULL;
+		}
+	}
+}
+
+static int mvneta_alloc_tso_hdrs(struct mvneta_port *pp,
+				 struct mvneta_tx_queue *txq)
+{
+	struct device *dev = pp->dev->dev.parent;
+	int i, num;
+
+	num = DIV_ROUND_UP(txq->size, MVNETA_TSO_PER_PAGE);
+	for (i = 0; i < num; i++) {
+		txq->tso_hdrs[i] = dma_alloc_coherent(dev, MVNETA_TSO_PAGE_SIZE,
+						      &txq->tso_hdrs_phys[i],
+						      GFP_KERNEL);
+		if (!txq->tso_hdrs[i]) {
+			mvneta_free_tso_hdrs(pp, txq);
+			return -ENOMEM;
+		}
+	}
+
+	return 0;
+}
+
+static char *mvneta_get_tso_hdr(struct mvneta_tx_queue *txq, dma_addr_t *dma)
+{
+	int index, offset;
+
+	index = txq->txq_put_index / MVNETA_TSO_PER_PAGE;
+	offset = (txq->txq_put_index % MVNETA_TSO_PER_PAGE) * TSO_HEADER_SIZE;
+
+	*dma = txq->tso_hdrs_phys[index] + offset;
+
+	return txq->tso_hdrs[index] + offset;
+}
+
+static void mvneta_tso_put_hdr(struct sk_buff *skb, struct mvneta_tx_queue *txq,
+			       struct tso_t *tso, int size, bool is_last)
+{
+	struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index];
+	int hdr_len = skb_tcp_all_headers(skb);
+	struct mvneta_tx_desc *tx_desc;
+	dma_addr_t hdr_phys;
+	char *hdr;
+
+	hdr = mvneta_get_tso_hdr(txq, &hdr_phys);
+	tso_build_hdr(skb, hdr, tso, size, is_last);
+
+	tx_desc = mvneta_txq_next_desc_get(txq);
+	tx_desc->data_size = hdr_len;
+	tx_desc->command = mvneta_skb_tx_csum(skb);
+	tx_desc->command |= MVNETA_TXD_F_DESC;
+	tx_desc->buf_phys_addr = hdr_phys;
+	buf->type = MVNETA_TYPE_TSO;
+	buf->skb = NULL;
+
+	mvneta_txq_inc_put(txq);
+}
+
+static inline int
+mvneta_tso_put_data(struct net_device *dev, struct mvneta_tx_queue *txq,
+		    struct sk_buff *skb, char *data, int size,
+		    bool last_tcp, bool is_last)
+{
+	struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index];
+	struct mvneta_tx_desc *tx_desc;
+
+	tx_desc = mvneta_txq_next_desc_get(txq);
+	tx_desc->data_size = size;
+	tx_desc->buf_phys_addr = dma_map_single(dev->dev.parent, data,
+						size, DMA_TO_DEVICE);
+	if (unlikely(dma_mapping_error(dev->dev.parent,
+		     tx_desc->buf_phys_addr))) {
+		mvneta_txq_desc_put(txq);
+		return -ENOMEM;
+	}
+
+	tx_desc->command = 0;
+	buf->type = MVNETA_TYPE_SKB;
+	buf->skb = NULL;
+
+	if (last_tcp) {
+		/* last descriptor in the TCP packet */
+		tx_desc->command = MVNETA_TXD_L_DESC;
+
+		/* last descriptor in SKB */
+		if (is_last)
+			buf->skb = skb;
+	}
+	mvneta_txq_inc_put(txq);
+	return 0;
+}
+
+static void mvneta_release_descs(struct mvneta_port *pp,
+				 struct mvneta_tx_queue *txq,
+				 int first, int num)
+{
+	int desc_idx, i;
+
+	desc_idx = first + num;
+	if (desc_idx >= txq->size)
+		desc_idx -= txq->size;
+
+	for (i = num; i >= 0; i--) {
+		struct mvneta_tx_desc *tx_desc = txq->descs + desc_idx;
+		struct mvneta_tx_buf *buf = &txq->buf[desc_idx];
+
+		if (buf->type == MVNETA_TYPE_SKB)
+			dma_unmap_single(pp->dev->dev.parent,
+					 tx_desc->buf_phys_addr,
+					 tx_desc->data_size,
+					 DMA_TO_DEVICE);
+
+		mvneta_txq_desc_put(txq);
+
+		if (desc_idx == 0)
+			desc_idx = txq->size;
+		desc_idx -= 1;
+	}
+}
+
+static int mvneta_tx_tso(struct sk_buff *skb, struct net_device *dev,
+			 struct mvneta_tx_queue *txq)
+{
+	int hdr_len, total_len, data_left;
+	int first_desc, desc_count = 0;
+	struct mvneta_port *pp = netdev_priv(dev);
+	struct tso_t tso;
+
+	/* Count needed descriptors */
+	if ((txq->count + tso_count_descs(skb)) >= txq->size)
+		return 0;
+
+	if (skb_headlen(skb) < skb_tcp_all_headers(skb)) {
+		pr_info("*** Is this even possible?\n");
+		return 0;
+	}
+
+	first_desc = txq->txq_put_index;
+
+	/* Initialize the TSO handler, and prepare the first payload */
+	hdr_len = tso_start(skb, &tso);
+
+	total_len = skb->len - hdr_len;
+	while (total_len > 0) {
+		data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len);
+		total_len -= data_left;
+		desc_count++;
+
+		/* prepare packet headers: MAC + IP + TCP */
+		mvneta_tso_put_hdr(skb, txq, &tso, data_left, total_len == 0);
+
+		while (data_left > 0) {
+			int size;
+			desc_count++;
+
+			size = min_t(int, tso.size, data_left);
+
+			if (mvneta_tso_put_data(dev, txq, skb,
+						 tso.data, size,
+						 size == data_left,
+						 total_len == 0))
+				goto err_release;
+			data_left -= size;
+
+			tso_build_data(skb, &tso, size);
+		}
+	}
+
+	return desc_count;
+
+err_release:
+	/* Release all used data descriptors; header descriptors must not
+	 * be DMA-unmapped.
+	 */
+	mvneta_release_descs(pp, txq, first_desc, desc_count - 1);
+	return 0;
+}
+
 /* Handle tx fragmentation processing */
 static int mvneta_tx_frag_process(struct mvneta_port *pp, struct sk_buff *skb,
 				  struct mvneta_tx_queue *txq)
 {
 	struct mvneta_tx_desc *tx_desc;
-	int i;
+	int i, nr_frags = skb_shinfo(skb)->nr_frags;
+	int first_desc = txq->txq_put_index;
 
-	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+	for (i = 0; i < nr_frags; i++) {
+		struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index];
 		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-		void *addr = page_address(frag->page.p) + frag->page_offset;
+		void *addr = skb_frag_address(frag);
 
 		tx_desc = mvneta_txq_next_desc_get(txq);
-		tx_desc->data_size = frag->size;
+		tx_desc->data_size = skb_frag_size(frag);
 
 		tx_desc->buf_phys_addr =
 			dma_map_single(pp->dev->dev.parent, addr,
@@ -1436,20 +2886,17 @@ static int mvneta_tx_frag_process(struct mvneta_port *pp, struct sk_buff *skb,
 			goto error;
 		}
 
-		if (i == (skb_shinfo(skb)->nr_frags - 1)) {
+		if (i == nr_frags - 1) {
 			/* Last descriptor */
 			tx_desc->command = MVNETA_TXD_L_DESC | MVNETA_TXD_Z_PAD;
-
-			txq->tx_skb[txq->txq_put_index] = skb;
-
-			mvneta_txq_inc_put(txq);
+			buf->skb = skb;
 		} else {
 			/* Descriptor in the middle: Not First, Not Last */
 			tx_desc->command = 0;
-
-			txq->tx_skb[txq->txq_put_index] = NULL;
-			mvneta_txq_inc_put(txq);
+			buf->skb = NULL;
 		}
+		buf->type = MVNETA_TYPE_SKB;
+		mvneta_txq_inc_put(txq);
 	}
 
 	return 0;
@@ -1458,39 +2905,36 @@ error:
 	/* Release all descriptors that were used to map fragments of
 	 * this packet, as well as the corresponding DMA mappings
 	 */
-	for (i = i - 1; i >= 0; i--) {
-		tx_desc = txq->descs + i;
-		dma_unmap_single(pp->dev->dev.parent,
-				 tx_desc->buf_phys_addr,
-				 tx_desc->data_size,
-				 DMA_TO_DEVICE);
-		mvneta_txq_desc_put(txq);
-	}
-
+	mvneta_release_descs(pp, txq, first_desc, i - 1);
 	return -ENOMEM;
 }
 
 /* Main tx processing */
-static int mvneta_tx(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t mvneta_tx(struct sk_buff *skb, struct net_device *dev)
 {
 	struct mvneta_port *pp = netdev_priv(dev);
 	u16 txq_id = skb_get_queue_mapping(skb);
 	struct mvneta_tx_queue *txq = &pp->txqs[txq_id];
+	struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index];
 	struct mvneta_tx_desc *tx_desc;
-	struct netdev_queue *nq;
+	int len = skb->len;
 	int frags = 0;
 	u32 tx_cmd;
 
 	if (!netif_running(dev))
 		goto out;
 
+	if (skb_is_gso(skb)) {
+		frags = mvneta_tx_tso(skb, dev, txq);
+		goto out;
+	}
+
 	frags = skb_shinfo(skb)->nr_frags + 1;
-	nq    = netdev_get_tx_queue(dev, txq_id);
 
 	/* Get a descriptor for the first part of the packet */
 	tx_desc = mvneta_txq_next_desc_get(txq);
 
-	tx_cmd = mvneta_skb_tx_csum(pp, skb);
+	tx_cmd = mvneta_skb_tx_csum(skb);
 
 	tx_desc->data_size = skb_headlen(skb);
 
@@ -1504,16 +2948,17 @@ static int mvneta_tx(struct sk_buff *skb, struct net_device *dev)
 		goto out;
 	}
 
+	buf->type = MVNETA_TYPE_SKB;
 	if (frags == 1) {
 		/* First and Last descriptor */
 		tx_cmd |= MVNETA_TXD_FLZ_DESC;
 		tx_desc->command = tx_cmd;
-		txq->tx_skb[txq->txq_put_index] = skb;
+		buf->skb = skb;
 		mvneta_txq_inc_put(txq);
 	} else {
 		/* First but not Last */
 		tx_cmd |= MVNETA_TXD_F_DESC;
-		txq->tx_skb[txq->txq_put_index] = NULL;
+		buf->skb = NULL;
 		mvneta_txq_inc_put(txq);
 		tx_desc->command = tx_cmd;
 		/* Continue with other skb fragments */
@@ -1528,33 +2973,39 @@ static int mvneta_tx(struct sk_buff *skb, struct net_device *dev)
 		}
 	}
 
-	txq->count += frags;
-	mvneta_txq_pend_desc_add(pp, txq, frags);
-
-	if (txq->size - txq->count < MAX_SKB_FRAGS + 1)
-		netif_tx_stop_queue(nq);
-
 out:
 	if (frags > 0) {
-		u64_stats_update_begin(&pp->tx_stats.syncp);
-		pp->tx_stats.packets++;
-		pp->tx_stats.bytes += skb->len;
-		u64_stats_update_end(&pp->tx_stats.syncp);
-
+		struct netdev_queue *nq = netdev_get_tx_queue(dev, txq_id);
+		struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
+
+		netdev_tx_sent_queue(nq, len);
+
+		txq->count += frags;
+		if (txq->count >= txq->tx_stop_threshold)
+			netif_tx_stop_queue(nq);
+
+		/* This is not really the true transmit point, since we batch
+		 * up several before hitting the hardware, but is the best we
+		 * can do without more complexity to walk the packets in the
+		 * pending section of the transmit queue.
+		 */
+		skb_tx_timestamp(skb);
+
+		if (!netdev_xmit_more() || netif_xmit_stopped(nq) ||
+		    txq->pending + frags > MVNETA_TXQ_DEC_SENT_MASK)
+			mvneta_txq_pend_desc_add(pp, txq, frags);
+		else
+			txq->pending += frags;
+
+		u64_stats_update_begin(&stats->syncp);
+		stats->es.ps.tx_bytes += len;
+		stats->es.ps.tx_packets++;
+		u64_stats_update_end(&stats->syncp);
 	} else {
 		dev->stats.tx_dropped++;
 		dev_kfree_skb_any(skb);
 	}
 
-	if (txq->count >= MVNETA_TXDONE_COAL_PKTS)
-		mvneta_txq_done(pp, txq);
-
-	/* If after calling mvneta_txq_done, count equals
-	 * frags, we need to set the timer
-	 */
-	if (txq->count == frags && frags > 0)
-		mvneta_add_tx_done_timer(pp);
-
 	return NETDEV_TX_OK;
 }
 
@@ -1564,9 +3015,10 @@ static void mvneta_txq_done_force(struct mvneta_port *pp,
 				  struct mvneta_tx_queue *txq)
 
 {
+	struct netdev_queue *nq = netdev_get_tx_queue(pp->dev, txq->id);
 	int tx_done = txq->count;
 
-	mvneta_txq_bufs_free(pp, txq, tx_done);
+	mvneta_txq_bufs_free(pp, txq, tx_done, nq, false);
 
 	/* reset txq */
 	txq->count = 0;
@@ -1574,33 +3026,27 @@ static void mvneta_txq_done_force(struct mvneta_port *pp,
 	txq->txq_get_index = 0;
 }
 
-/* handle tx done - called from tx done timer callback */
-static u32 mvneta_tx_done_gbe(struct mvneta_port *pp, u32 cause_tx_done,
-			      int *tx_todo)
+/* Handle tx done - called in softirq context. The <cause_tx_done> argument
+ * must be a valid cause according to MVNETA_TXQ_INTR_MASK_ALL.
+ */
+static void mvneta_tx_done_gbe(struct mvneta_port *pp, u32 cause_tx_done)
 {
 	struct mvneta_tx_queue *txq;
-	u32 tx_done = 0;
 	struct netdev_queue *nq;
+	int cpu = smp_processor_id();
 
-	*tx_todo = 0;
-	while (cause_tx_done != 0) {
+	while (cause_tx_done) {
 		txq = mvneta_tx_done_policy(pp, cause_tx_done);
-		if (!txq)
-			break;
 
 		nq = netdev_get_tx_queue(pp->dev, txq->id);
-		__netif_tx_lock(nq, smp_processor_id());
+		__netif_tx_lock(nq, cpu);
 
-		if (txq->count) {
-			tx_done += mvneta_txq_done(pp, txq);
-			*tx_todo += txq->count;
-		}
+		if (txq->count)
+			mvneta_txq_done(pp, txq);
 
 		__netif_tx_unlock(nq);
 		cause_tx_done &= ~((1 << txq->id));
 	}
-
-	return tx_done;
 }
 
 /* Compute crc8 of the specified address, using a unique algorithm ,
@@ -1768,19 +3214,19 @@ static void mvneta_set_rx_mode(struct net_device *dev)
 	if (dev->flags & IFF_PROMISC) {
 		/* Accept all: Multicast + Unicast */
 		mvneta_rx_unicast_promisc_set(pp, 1);
-		mvneta_set_ucast_table(pp, rxq_def);
-		mvneta_set_special_mcast_table(pp, rxq_def);
-		mvneta_set_other_mcast_table(pp, rxq_def);
+		mvneta_set_ucast_table(pp, pp->rxq_def);
+		mvneta_set_special_mcast_table(pp, pp->rxq_def);
+		mvneta_set_other_mcast_table(pp, pp->rxq_def);
 	} else {
 		/* Accept single Unicast */
 		mvneta_rx_unicast_promisc_set(pp, 0);
 		mvneta_set_ucast_table(pp, -1);
-		mvneta_mac_addr_set(pp, dev->dev_addr, rxq_def);
+		mvneta_mac_addr_set(pp, dev->dev_addr, pp->rxq_def);
 
 		if (dev->flags & IFF_ALLMULTI) {
 			/* Accept all multicast */
-			mvneta_set_special_mcast_table(pp, rxq_def);
-			mvneta_set_other_mcast_table(pp, rxq_def);
+			mvneta_set_special_mcast_table(pp, pp->rxq_def);
+			mvneta_set_other_mcast_table(pp, pp->rxq_def);
 		} else {
 			/* Accept only initialized multicast */
 			mvneta_set_special_mcast_table(pp, -1);
@@ -1789,7 +3235,7 @@ static void mvneta_set_rx_mode(struct net_device *dev)
 			if (!netdev_mc_empty(dev)) {
 				netdev_for_each_mc_addr(ha, dev) {
 					mvneta_mcast_addr_set(pp, ha->addr,
-							      rxq_def);
+							      pp->rxq_def);
 				}
 			}
 		}
@@ -1801,14 +3247,31 @@ static irqreturn_t mvneta_isr(int irq, void *dev_id)
 {
 	struct mvneta_port *pp = (struct mvneta_port *)dev_id;
 
-	/* Mask all interrupts */
 	mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0);
-
 	napi_schedule(&pp->napi);
 
 	return IRQ_HANDLED;
 }
 
+/* Interrupt handling - the callback for request_percpu_irq() */
+static irqreturn_t mvneta_percpu_isr(int irq, void *dev_id)
+{
+	struct mvneta_pcpu_port *port = (struct mvneta_pcpu_port *)dev_id;
+
+	disable_percpu_irq(port->pp->dev->irq);
+	napi_schedule(&port->napi);
+
+	return IRQ_HANDLED;
+}
+
+static void mvneta_link_change(struct mvneta_port *pp)
+{
+	u32 gmac_stat = mvreg_read(pp, MVNETA_GMAC_STATUS);
+
+	phylink_pcs_change(&pp->phylink_pcs,
+			   !!(gmac_stat & MVNETA_GMAC_LINK_UP));
+}
+
 /* NAPI handler
  * Bits 0 - 7 of the causeRxTx register indicate that are transmitted
  * packets on the corresponding TXQ (Bit 0 is for TX queue 1).
@@ -1820,8 +3283,9 @@ static int mvneta_poll(struct napi_struct *napi, int budget)
 {
 	int rx_done = 0;
 	u32 cause_rx_tx;
-	unsigned long flags;
+	int rx_queue;
 	struct mvneta_port *pp = netdev_priv(napi->dev);
+	struct mvneta_pcpu_port *port = this_cpu_ptr(pp->ports);
 
 	if (!netif_running(pp->dev)) {
 		napi_complete(napi);
@@ -1829,104 +3293,128 @@ static int mvneta_poll(struct napi_struct *napi, int budget)
 	}
 
 	/* Read cause register */
-	cause_rx_tx = mvreg_read(pp, MVNETA_INTR_NEW_CAUSE) &
-		MVNETA_RX_INTR_MASK(rxq_number);
+	cause_rx_tx = mvreg_read(pp, MVNETA_INTR_NEW_CAUSE);
+	if (cause_rx_tx & MVNETA_MISCINTR_INTR_MASK) {
+		u32 cause_misc = mvreg_read(pp, MVNETA_INTR_MISC_CAUSE);
+
+		mvreg_write(pp, MVNETA_INTR_MISC_CAUSE, 0);
+
+		if (cause_misc & (MVNETA_CAUSE_PHY_STATUS_CHANGE |
+				  MVNETA_CAUSE_LINK_CHANGE))
+			mvneta_link_change(pp);
+	}
+
+	/* Release Tx descriptors */
+	if (cause_rx_tx & MVNETA_TX_INTR_MASK_ALL) {
+		mvneta_tx_done_gbe(pp, (cause_rx_tx & MVNETA_TX_INTR_MASK_ALL));
+		cause_rx_tx &= ~MVNETA_TX_INTR_MASK_ALL;
+	}
 
 	/* For the case where the last mvneta_poll did not process all
 	 * RX packets
 	 */
-	cause_rx_tx |= pp->cause_rx_tx;
-	if (rxq_number > 1) {
-		while ((cause_rx_tx != 0) && (budget > 0)) {
-			int count;
-			struct mvneta_rx_queue *rxq;
-			/* get rx queue number from cause_rx_tx */
-			rxq = mvneta_rx_policy(pp, cause_rx_tx);
-			if (!rxq)
-				break;
-
-			/* process the packet in that rx queue */
-			count = mvneta_rx(pp, budget, rxq);
-			rx_done += count;
-			budget -= count;
-			if (budget > 0) {
-				/* set off the rx bit of the
-				 * corresponding bit in the cause rx
-				 * tx register, so that next iteration
-				 * will find the next rx queue where
-				 * packets are received on
-				 */
-				cause_rx_tx &= ~((1 << rxq->id) << 8);
-			}
-		}
-	} else {
-		rx_done = mvneta_rx(pp, budget, &pp->rxqs[rxq_def]);
-		budget -= rx_done;
+	cause_rx_tx |= pp->neta_armada3700 ? pp->cause_rx_tx :
+		port->cause_rx_tx;
+
+	rx_queue = fls(((cause_rx_tx >> 8) & 0xff));
+	if (rx_queue) {
+		rx_queue = rx_queue - 1;
+		if (pp->bm_priv)
+			rx_done = mvneta_rx_hwbm(napi, pp, budget,
+						 &pp->rxqs[rx_queue]);
+		else
+			rx_done = mvneta_rx_swbm(napi, pp, budget,
+						 &pp->rxqs[rx_queue]);
 	}
 
-	if (budget > 0) {
+	if (rx_done < budget) {
 		cause_rx_tx = 0;
-		napi_complete(napi);
-		local_irq_save(flags);
-		mvreg_write(pp, MVNETA_INTR_NEW_MASK,
-			    MVNETA_RX_INTR_MASK(rxq_number));
-		local_irq_restore(flags);
+		napi_complete_done(napi, rx_done);
+
+		if (pp->neta_armada3700) {
+			unsigned long flags;
+
+			local_irq_save(flags);
+			mvreg_write(pp, MVNETA_INTR_NEW_MASK,
+				    MVNETA_RX_INTR_MASK(rxq_number) |
+				    MVNETA_TX_INTR_MASK(txq_number) |
+				    MVNETA_MISCINTR_INTR_MASK);
+			local_irq_restore(flags);
+		} else {
+			enable_percpu_irq(pp->dev->irq, 0);
+		}
 	}
 
-	pp->cause_rx_tx = cause_rx_tx;
+	if (pp->neta_armada3700)
+		pp->cause_rx_tx = cause_rx_tx;
+	else
+		port->cause_rx_tx = cause_rx_tx;
+
 	return rx_done;
 }
 
-/* tx done timer callback */
-static void mvneta_tx_done_timer_callback(unsigned long data)
+static int mvneta_create_page_pool(struct mvneta_port *pp,
+				   struct mvneta_rx_queue *rxq, int size)
 {
-	struct net_device *dev = (struct net_device *)data;
-	struct mvneta_port *pp = netdev_priv(dev);
-	int tx_done = 0, tx_todo = 0;
+	struct bpf_prog *xdp_prog = READ_ONCE(pp->xdp_prog);
+	struct page_pool_params pp_params = {
+		.order = 0,
+		.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV,
+		.pool_size = size,
+		.nid = NUMA_NO_NODE,
+		.dev = pp->dev->dev.parent,
+		.dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE,
+		.offset = pp->rx_offset_correction,
+		.max_len = MVNETA_MAX_RX_BUF_SIZE,
+	};
+	int err;
 
-	if (!netif_running(dev))
-		return ;
+	rxq->page_pool = page_pool_create(&pp_params);
+	if (IS_ERR(rxq->page_pool)) {
+		err = PTR_ERR(rxq->page_pool);
+		rxq->page_pool = NULL;
+		return err;
+	}
+
+	err = __xdp_rxq_info_reg(&rxq->xdp_rxq, pp->dev, rxq->id, 0,
+				 PAGE_SIZE);
+	if (err < 0)
+		goto err_free_pp;
+
+	err = xdp_rxq_info_reg_mem_model(&rxq->xdp_rxq, MEM_TYPE_PAGE_POOL,
+					 rxq->page_pool);
+	if (err)
+		goto err_unregister_rxq;
 
-	clear_bit(MVNETA_F_TX_DONE_TIMER_BIT, &pp->flags);
+	return 0;
 
-	tx_done = mvneta_tx_done_gbe(pp,
-				     (((1 << txq_number) - 1) &
-				      MVNETA_CAUSE_TXQ_SENT_DESC_ALL_MASK),
-				     &tx_todo);
-	if (tx_todo > 0)
-		mvneta_add_tx_done_timer(pp);
+err_unregister_rxq:
+	xdp_rxq_info_unreg(&rxq->xdp_rxq);
+err_free_pp:
+	page_pool_destroy(rxq->page_pool);
+	rxq->page_pool = NULL;
+	return err;
 }
 
 /* Handle rxq fill: allocates rxq skbs; called when initializing a port */
 static int mvneta_rxq_fill(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
 			   int num)
 {
-	struct net_device *dev = pp->dev;
-	int i;
+	int i, err;
 
-	for (i = 0; i < num; i++) {
-		struct sk_buff *skb;
-		struct mvneta_rx_desc *rx_desc;
-		unsigned long phys_addr;
-
-		skb = dev_alloc_skb(pp->pkt_size);
-		if (!skb) {
-			netdev_err(dev, "%s:rxq %d, %d of %d buffs  filled\n",
-				__func__, rxq->id, i, num);
-			break;
-		}
+	err = mvneta_create_page_pool(pp, rxq, num);
+	if (err < 0)
+		return err;
 
-		rx_desc = rxq->descs + i;
-		memset(rx_desc, 0, sizeof(struct mvneta_rx_desc));
-		phys_addr = dma_map_single(dev->dev.parent, skb->head,
-					   MVNETA_RX_BUF_SIZE(pp->pkt_size),
-					   DMA_FROM_DEVICE);
-		if (unlikely(dma_mapping_error(dev->dev.parent, phys_addr))) {
-			dev_kfree_skb(skb);
+	for (i = 0; i < num; i++) {
+		memset(rxq->descs + i, 0, sizeof(struct mvneta_rx_desc));
+		if (mvneta_rx_refill(pp, rxq->descs + i, rxq,
+				     GFP_KERNEL) != 0) {
+			netdev_err(pp->dev,
+				   "%s:rxq %d, %d of %d buffs  filled\n",
+				   __func__, rxq->id, i, num);
 			break;
 		}
-
-		mvneta_rx_desc_fill(rx_desc, phys_addr, (u32)skb);
 	}
 
 	/* Add this number of RX descriptors as non occupied (ready to
@@ -1942,7 +3430,7 @@ static void mvneta_tx_reset(struct mvneta_port *pp)
 {
 	int queue;
 
-	/* free the skb's in the hal tx ring */
+	/* free the skb's in the tx ring */
 	for (queue = 0; queue < txq_number; queue++)
 		mvneta_txq_done_force(pp, &pp->txqs[queue]);
 
@@ -1958,10 +3446,8 @@ static void mvneta_rx_reset(struct mvneta_port *pp)
 
 /* Rx/Tx queue initialization/cleanup methods */
 
-/* Create a specified RX queue */
-static int mvneta_rxq_init(struct mvneta_port *pp,
-			   struct mvneta_rx_queue *rxq)
-
+static int mvneta_rxq_sw_init(struct mvneta_port *pp,
+			      struct mvneta_rx_queue *rxq)
 {
 	rxq->size = pp->rx_ring_size;
 
@@ -1969,29 +3455,58 @@ static int mvneta_rxq_init(struct mvneta_port *pp,
 	rxq->descs = dma_alloc_coherent(pp->dev->dev.parent,
 					rxq->size * MVNETA_DESC_ALIGNED_SIZE,
 					&rxq->descs_phys, GFP_KERNEL);
-	if (rxq->descs == NULL)
+	if (!rxq->descs)
 		return -ENOMEM;
 
-	BUG_ON(rxq->descs !=
-	       PTR_ALIGN(rxq->descs, MVNETA_CPU_D_CACHE_LINE_SIZE));
-
 	rxq->last_desc = rxq->size - 1;
 
+	return 0;
+}
+
+static void mvneta_rxq_hw_init(struct mvneta_port *pp,
+			       struct mvneta_rx_queue *rxq)
+{
 	/* Set Rx descriptors queue starting address */
 	mvreg_write(pp, MVNETA_RXQ_BASE_ADDR_REG(rxq->id), rxq->descs_phys);
 	mvreg_write(pp, MVNETA_RXQ_SIZE_REG(rxq->id), rxq->size);
 
-	/* Set Offset */
-	mvneta_rxq_offset_set(pp, rxq, NET_SKB_PAD);
-
 	/* Set coalescing pkts and time */
 	mvneta_rx_pkts_coal_set(pp, rxq, rxq->pkts_coal);
 	mvneta_rx_time_coal_set(pp, rxq, rxq->time_coal);
 
-	/* Fill RXQ with buffers from RX pool */
-	mvneta_rxq_buf_size_set(pp, rxq, MVNETA_RX_BUF_SIZE(pp->pkt_size));
-	mvneta_rxq_bm_disable(pp, rxq);
-	mvneta_rxq_fill(pp, rxq, rxq->size);
+	if (!pp->bm_priv) {
+		/* Set Offset */
+		mvneta_rxq_offset_set(pp, rxq, 0);
+		mvneta_rxq_buf_size_set(pp, rxq, PAGE_SIZE < SZ_64K ?
+					MVNETA_MAX_RX_BUF_SIZE :
+					MVNETA_RX_BUF_SIZE(pp->pkt_size));
+		mvneta_rxq_bm_disable(pp, rxq);
+		mvneta_rxq_fill(pp, rxq, rxq->size);
+	} else {
+		/* Set Offset */
+		mvneta_rxq_offset_set(pp, rxq,
+				      NET_SKB_PAD - pp->rx_offset_correction);
+
+		mvneta_rxq_bm_enable(pp, rxq);
+		/* Fill RXQ with buffers from RX pool */
+		mvneta_rxq_long_pool_set(pp, rxq);
+		mvneta_rxq_short_pool_set(pp, rxq);
+		mvneta_rxq_non_occup_desc_add(pp, rxq, rxq->size);
+	}
+}
+
+/* Create a specified RX queue */
+static int mvneta_rxq_init(struct mvneta_port *pp,
+			   struct mvneta_rx_queue *rxq)
+
+{
+	int ret;
+
+	ret = mvneta_rxq_sw_init(pp, rxq);
+	if (ret < 0)
+		return ret;
+
+	mvneta_rxq_hw_init(pp, rxq);
 
 	return 0;
 }
@@ -2012,27 +3527,58 @@ static void mvneta_rxq_deinit(struct mvneta_port *pp,
 	rxq->last_desc         = 0;
 	rxq->next_desc_to_proc = 0;
 	rxq->descs_phys        = 0;
+	rxq->first_to_refill   = 0;
+	rxq->refill_num        = 0;
 }
 
-/* Create and initialize a tx queue */
-static int mvneta_txq_init(struct mvneta_port *pp,
-			   struct mvneta_tx_queue *txq)
+static int mvneta_txq_sw_init(struct mvneta_port *pp,
+			      struct mvneta_tx_queue *txq)
 {
+	int cpu, err;
+
 	txq->size = pp->tx_ring_size;
 
+	/* A queue must always have room for at least one skb.
+	 * Therefore, stop the queue when the free entries reaches
+	 * the maximum number of descriptors per skb.
+	 */
+	txq->tx_stop_threshold = txq->size - MVNETA_MAX_SKB_DESCS;
+	txq->tx_wake_threshold = txq->tx_stop_threshold / 2;
+
 	/* Allocate memory for TX descriptors */
 	txq->descs = dma_alloc_coherent(pp->dev->dev.parent,
 					txq->size * MVNETA_DESC_ALIGNED_SIZE,
 					&txq->descs_phys, GFP_KERNEL);
-	if (txq->descs == NULL)
+	if (!txq->descs)
 		return -ENOMEM;
 
-	/* Make sure descriptor address is cache line size aligned  */
-	BUG_ON(txq->descs !=
-	       PTR_ALIGN(txq->descs, MVNETA_CPU_D_CACHE_LINE_SIZE));
-
 	txq->last_desc = txq->size - 1;
 
+	txq->buf = kmalloc_array(txq->size, sizeof(*txq->buf), GFP_KERNEL);
+	if (!txq->buf)
+		return -ENOMEM;
+
+	/* Allocate DMA buffers for TSO MAC/IP/TCP headers */
+	err = mvneta_alloc_tso_hdrs(pp, txq);
+	if (err)
+		return err;
+
+	/* Setup XPS mapping */
+	if (pp->neta_armada3700)
+		cpu = 0;
+	else if (txq_number > 1)
+		cpu = txq->id % num_present_cpus();
+	else
+		cpu = pp->rxq_def % num_present_cpus();
+	cpumask_set_cpu(cpu, &txq->affinity_mask);
+	netif_set_xps_queue(pp->dev, &txq->affinity_mask, txq->id);
+
+	return 0;
+}
+
+static void mvneta_txq_hw_init(struct mvneta_port *pp,
+			       struct mvneta_tx_queue *txq)
+{
 	/* Set maximum bandwidth for enabled TXQs */
 	mvreg_write(pp, MVETH_TXQ_TOKEN_CFG_REG(txq->id), 0x03ffffff);
 	mvreg_write(pp, MVETH_TXQ_TOKEN_COUNT_REG(txq->id), 0x3fffffff);
@@ -2041,34 +3587,50 @@ static int mvneta_txq_init(struct mvneta_port *pp,
 	mvreg_write(pp, MVNETA_TXQ_BASE_ADDR_REG(txq->id), txq->descs_phys);
 	mvreg_write(pp, MVNETA_TXQ_SIZE_REG(txq->id), txq->size);
 
-	txq->tx_skb = kmalloc(txq->size * sizeof(*txq->tx_skb), GFP_KERNEL);
-	if (txq->tx_skb == NULL) {
-		dma_free_coherent(pp->dev->dev.parent,
-				  txq->size * MVNETA_DESC_ALIGNED_SIZE,
-				  txq->descs, txq->descs_phys);
-		return -ENOMEM;
-	}
 	mvneta_tx_done_pkts_coal_set(pp, txq, txq->done_pkts_coal);
+}
+
+/* Create and initialize a tx queue */
+static int mvneta_txq_init(struct mvneta_port *pp,
+			   struct mvneta_tx_queue *txq)
+{
+	int ret;
+
+	ret = mvneta_txq_sw_init(pp, txq);
+	if (ret < 0)
+		return ret;
+
+	mvneta_txq_hw_init(pp, txq);
 
 	return 0;
 }
 
 /* Free allocated resources when mvneta_txq_init() fails to allocate memory*/
-static void mvneta_txq_deinit(struct mvneta_port *pp,
-			      struct mvneta_tx_queue *txq)
+static void mvneta_txq_sw_deinit(struct mvneta_port *pp,
+				 struct mvneta_tx_queue *txq)
 {
-	kfree(txq->tx_skb);
+	struct netdev_queue *nq = netdev_get_tx_queue(pp->dev, txq->id);
+
+	kfree(txq->buf);
 
+	mvneta_free_tso_hdrs(pp, txq);
 	if (txq->descs)
 		dma_free_coherent(pp->dev->dev.parent,
 				  txq->size * MVNETA_DESC_ALIGNED_SIZE,
 				  txq->descs, txq->descs_phys);
 
+	netdev_tx_reset_queue(nq);
+
+	txq->buf               = NULL;
 	txq->descs             = NULL;
 	txq->last_desc         = 0;
 	txq->next_desc_to_proc = 0;
 	txq->descs_phys        = 0;
+}
 
+static void mvneta_txq_hw_deinit(struct mvneta_port *pp,
+				 struct mvneta_tx_queue *txq)
+{
 	/* Set minimum bandwidth for disabled TXQs */
 	mvreg_write(pp, MVETH_TXQ_TOKEN_CFG_REG(txq->id), 0);
 	mvreg_write(pp, MVETH_TXQ_TOKEN_COUNT_REG(txq->id), 0);
@@ -2078,6 +3640,13 @@ static void mvneta_txq_deinit(struct mvneta_port *pp,
 	mvreg_write(pp, MVNETA_TXQ_SIZE_REG(txq->id), 0);
 }
 
+static void mvneta_txq_deinit(struct mvneta_port *pp,
+			      struct mvneta_tx_queue *txq)
+{
+	mvneta_txq_sw_deinit(pp, txq);
+	mvneta_txq_hw_deinit(pp, txq);
+}
+
 /* Cleanup all Tx queues */
 static void mvneta_cleanup_txqs(struct mvneta_port *pp)
 {
@@ -2104,6 +3673,7 @@ static int mvneta_setup_rxqs(struct mvneta_port *pp)
 
 	for (queue = 0; queue < rxq_number; queue++) {
 		int err = mvneta_rxq_init(pp, &pp->rxqs[queue]);
+
 		if (err) {
 			netdev_err(pp->dev, "%s: can't create rxq=%d\n",
 				   __func__, queue);
@@ -2133,30 +3703,117 @@ static int mvneta_setup_txqs(struct mvneta_port *pp)
 	return 0;
 }
 
+static int mvneta_comphy_init(struct mvneta_port *pp, phy_interface_t interface)
+{
+	int ret;
+
+	ret = phy_set_mode_ext(pp->comphy, PHY_MODE_ETHERNET, interface);
+	if (ret)
+		return ret;
+
+	return phy_power_on(pp->comphy);
+}
+
+static int mvneta_config_interface(struct mvneta_port *pp,
+				   phy_interface_t interface)
+{
+	int ret = 0;
+
+	if (pp->comphy) {
+		if (interface == PHY_INTERFACE_MODE_SGMII ||
+		    interface == PHY_INTERFACE_MODE_1000BASEX ||
+		    interface == PHY_INTERFACE_MODE_2500BASEX) {
+			ret = mvneta_comphy_init(pp, interface);
+		}
+	} else {
+		switch (interface) {
+		case PHY_INTERFACE_MODE_QSGMII:
+			mvreg_write(pp, MVNETA_SERDES_CFG,
+				    MVNETA_QSGMII_SERDES_PROTO);
+			break;
+
+		case PHY_INTERFACE_MODE_SGMII:
+		case PHY_INTERFACE_MODE_1000BASEX:
+			mvreg_write(pp, MVNETA_SERDES_CFG,
+				    MVNETA_SGMII_SERDES_PROTO);
+			break;
+
+		case PHY_INTERFACE_MODE_2500BASEX:
+			mvreg_write(pp, MVNETA_SERDES_CFG,
+				    MVNETA_HSGMII_SERDES_PROTO);
+			break;
+		default:
+			break;
+		}
+	}
+
+	pp->phy_interface = interface;
+
+	return ret;
+}
+
 static void mvneta_start_dev(struct mvneta_port *pp)
 {
+	int cpu;
+
+	WARN_ON(mvneta_config_interface(pp, pp->phy_interface));
+
 	mvneta_max_rx_size_set(pp, pp->pkt_size);
 	mvneta_txq_max_tx_size_set(pp, pp->pkt_size);
 
 	/* start the Rx/Tx activity */
 	mvneta_port_enable(pp);
 
-	/* Enable polling on the port */
-	napi_enable(&pp->napi);
+	if (!pp->neta_armada3700) {
+		/* Enable polling on the port */
+		for_each_online_cpu(cpu) {
+			struct mvneta_pcpu_port *port =
+				per_cpu_ptr(pp->ports, cpu);
 
-	/* Unmask interrupts */
-	mvreg_write(pp, MVNETA_INTR_NEW_MASK,
-		    MVNETA_RX_INTR_MASK(rxq_number));
+			napi_enable(&port->napi);
+		}
+	} else {
+		napi_enable(&pp->napi);
+	}
+
+	/* Unmask interrupts. It has to be done from each CPU */
+	on_each_cpu(mvneta_percpu_unmask_interrupt, pp, true);
+
+	mvreg_write(pp, MVNETA_INTR_MISC_MASK,
+		    MVNETA_CAUSE_PHY_STATUS_CHANGE |
+		    MVNETA_CAUSE_LINK_CHANGE);
+
+	phylink_start(pp->phylink);
+
+	/* We may have called phylink_speed_down before */
+	phylink_speed_up(pp->phylink);
 
-	phy_start(pp->phy_dev);
 	netif_tx_start_all_queues(pp->dev);
+
+	clear_bit(__MVNETA_DOWN, &pp->state);
 }
 
 static void mvneta_stop_dev(struct mvneta_port *pp)
 {
-	phy_stop(pp->phy_dev);
+	unsigned int cpu;
 
-	napi_disable(&pp->napi);
+	set_bit(__MVNETA_DOWN, &pp->state);
+
+	if (device_may_wakeup(&pp->dev->dev))
+		phylink_speed_down(pp->phylink, false);
+
+	phylink_stop(pp->phylink);
+
+	if (!pp->neta_armada3700) {
+		for_each_online_cpu(cpu) {
+			struct mvneta_pcpu_port *port =
+				per_cpu_ptr(pp->ports, cpu);
+
+			napi_disable(&port->napi);
+		}
+	} else {
+		napi_disable(&pp->napi);
+	}
 
 	netif_carrier_off(pp->dev);
 
@@ -2167,90 +3824,111 @@ static void mvneta_stop_dev(struct mvneta_port *pp)
 	mvneta_port_disable(pp);
 
 	/* Clear all ethernet port interrupts */
-	mvreg_write(pp, MVNETA_INTR_MISC_CAUSE, 0);
-	mvreg_write(pp, MVNETA_INTR_OLD_CAUSE, 0);
+	on_each_cpu(mvneta_percpu_clear_intr_cause, pp, true);
 
 	/* Mask all ethernet port interrupts */
-	mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0);
-	mvreg_write(pp, MVNETA_INTR_OLD_MASK, 0);
-	mvreg_write(pp, MVNETA_INTR_MISC_MASK, 0);
+	on_each_cpu(mvneta_percpu_mask_interrupt, pp, true);
 
 	mvneta_tx_reset(pp);
 	mvneta_rx_reset(pp);
+
+	WARN_ON(phy_power_off(pp->comphy));
 }
 
-/* tx timeout callback - display a message and stop/start the network device */
-static void mvneta_tx_timeout(struct net_device *dev)
+static void mvneta_percpu_enable(void *arg)
 {
-	struct mvneta_port *pp = netdev_priv(dev);
+	struct mvneta_port *pp = arg;
 
-	netdev_info(dev, "tx timeout\n");
-	mvneta_stop_dev(pp);
-	mvneta_start_dev(pp);
+	enable_percpu_irq(pp->dev->irq, IRQ_TYPE_NONE);
 }
 
-/* Return positive if MTU is valid */
-static int mvneta_check_mtu_valid(struct net_device *dev, int mtu)
+static void mvneta_percpu_disable(void *arg)
 {
-	if (mtu < 68) {
-		netdev_err(dev, "cannot change mtu to less than 68\n");
-		return -EINVAL;
-	}
-
-	/* 9676 == 9700 - 20 and rounding to 8 */
-	if (mtu > 9676) {
-		netdev_info(dev, "Illegal MTU value %d, round to 9676\n", mtu);
-		mtu = 9676;
-	}
+	struct mvneta_port *pp = arg;
 
-	if (!IS_ALIGNED(MVNETA_RX_PKT_SIZE(mtu), 8)) {
-		netdev_info(dev, "Illegal MTU value %d, rounding to %d\n",
-			mtu, ALIGN(MVNETA_RX_PKT_SIZE(mtu), 8));
-		mtu = ALIGN(MVNETA_RX_PKT_SIZE(mtu), 8);
-	}
-
-	return mtu;
+	disable_percpu_irq(pp->dev->irq);
 }
 
 /* Change the device mtu */
 static int mvneta_change_mtu(struct net_device *dev, int mtu)
 {
 	struct mvneta_port *pp = netdev_priv(dev);
+	struct bpf_prog *prog = pp->xdp_prog;
 	int ret;
 
-	mtu = mvneta_check_mtu_valid(dev, mtu);
-	if (mtu < 0)
+	if (!IS_ALIGNED(MVNETA_RX_PKT_SIZE(mtu), 8)) {
+		netdev_info(dev, "Illegal MTU value %d, rounding to %d\n",
+			    mtu, ALIGN(MVNETA_RX_PKT_SIZE(mtu), 8));
+		mtu = ALIGN(MVNETA_RX_PKT_SIZE(mtu), 8);
+	}
+
+	if (prog && !prog->aux->xdp_has_frags &&
+	    mtu > MVNETA_MAX_RX_BUF_SIZE) {
+		netdev_info(dev, "Illegal MTU %d for XDP prog without frags\n",
+			    mtu);
+
 		return -EINVAL;
+	}
 
-	dev->mtu = mtu;
+	WRITE_ONCE(dev->mtu, mtu);
 
-	if (!netif_running(dev))
+	if (!netif_running(dev)) {
+		if (pp->bm_priv)
+			mvneta_bm_update_mtu(pp, mtu);
+
+		netdev_update_features(dev);
 		return 0;
+	}
 
 	/* The interface is running, so we have to force a
-	 * reallocation of the RXQs
+	 * reallocation of the queues
 	 */
 	mvneta_stop_dev(pp);
+	on_each_cpu(mvneta_percpu_disable, pp, true);
 
 	mvneta_cleanup_txqs(pp);
 	mvneta_cleanup_rxqs(pp);
 
-	pp->pkt_size = MVNETA_RX_PKT_SIZE(pp->dev->mtu);
+	if (pp->bm_priv)
+		mvneta_bm_update_mtu(pp, mtu);
+
+	pp->pkt_size = MVNETA_RX_PKT_SIZE(dev->mtu);
 
 	ret = mvneta_setup_rxqs(pp);
 	if (ret) {
-		netdev_err(pp->dev, "unable to setup rxqs after MTU change\n");
+		netdev_err(dev, "unable to setup rxqs after MTU change\n");
 		return ret;
 	}
 
-	mvneta_setup_txqs(pp);
+	ret = mvneta_setup_txqs(pp);
+	if (ret) {
+		netdev_err(dev, "unable to setup txqs after MTU change\n");
+		return ret;
+	}
 
+	on_each_cpu(mvneta_percpu_enable, pp, true);
 	mvneta_start_dev(pp);
-	mvneta_port_up(pp);
+
+	netdev_update_features(dev);
 
 	return 0;
 }
 
+static netdev_features_t mvneta_fix_features(struct net_device *dev,
+					     netdev_features_t features)
+{
+	struct mvneta_port *pp = netdev_priv(dev);
+
+	if (pp->tx_csum_limit && dev->mtu > pp->tx_csum_limit) {
+		features &= ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
+		netdev_info(dev,
+			    "Disable IP checksum for MTU greater than %dB\n",
+			    pp->tx_csum_limit);
+	}
+
+	return features;
+}
+
 /* Get mac address */
 static void mvneta_get_mac_addr(struct mvneta_port *pp, unsigned char *addr)
 {
@@ -2270,107 +3948,583 @@ static void mvneta_get_mac_addr(struct mvneta_port *pp, unsigned char *addr)
 static int mvneta_set_mac_addr(struct net_device *dev, void *addr)
 {
 	struct mvneta_port *pp = netdev_priv(dev);
-	u8 *mac = addr + 2;
-	int i;
-
-	if (netif_running(dev))
-		return -EBUSY;
+	struct sockaddr *sockaddr = addr;
+	int ret;
 
+	ret = eth_prepare_mac_addr_change(dev, addr);
+	if (ret < 0)
+		return ret;
 	/* Remove previous address table entry */
 	mvneta_mac_addr_set(pp, dev->dev_addr, -1);
 
 	/* Set new addr in hw */
-	mvneta_mac_addr_set(pp, mac, rxq_def);
+	mvneta_mac_addr_set(pp, sockaddr->sa_data, pp->rxq_def);
+
+	eth_commit_mac_addr_change(dev, addr);
+	return 0;
+}
+
+static struct mvneta_port *mvneta_pcs_to_port(struct phylink_pcs *pcs)
+{
+	return container_of(pcs, struct mvneta_port, phylink_pcs);
+}
+
+static unsigned int mvneta_pcs_inband_caps(struct phylink_pcs *pcs,
+					   phy_interface_t interface)
+{
+	/* When operating in an 802.3z mode, we must have AN enabled:
+	 * "Bit 2 Field InBandAnEn In-band Auto-Negotiation enable. ...
+	 * When <PortType> = 1 (1000BASE-X) this field must be set to 1."
+	 * Therefore, inband is "required".
+	 */
+	if (phy_interface_mode_is_8023z(interface))
+		return LINK_INBAND_ENABLE;
+
+	/* QSGMII, SGMII and RGMII can be configured to use inband
+	 * signalling of the AN result. Indicate these as "possible".
+	 */
+	if (interface == PHY_INTERFACE_MODE_SGMII ||
+	    interface == PHY_INTERFACE_MODE_QSGMII ||
+	    phy_interface_mode_is_rgmii(interface))
+		return LINK_INBAND_DISABLE | LINK_INBAND_ENABLE;
+
+	/* For any other modes, indicate that inband is not supported. */
+	return LINK_INBAND_DISABLE;
+}
+
+static void mvneta_pcs_get_state(struct phylink_pcs *pcs, unsigned int neg_mode,
+				 struct phylink_link_state *state)
+{
+	struct mvneta_port *pp = mvneta_pcs_to_port(pcs);
+	u32 gmac_stat;
+
+	gmac_stat = mvreg_read(pp, MVNETA_GMAC_STATUS);
+
+	if (gmac_stat & MVNETA_GMAC_SPEED_1000)
+		state->speed =
+			state->interface == PHY_INTERFACE_MODE_2500BASEX ?
+			SPEED_2500 : SPEED_1000;
+	else if (gmac_stat & MVNETA_GMAC_SPEED_100)
+		state->speed = SPEED_100;
+	else
+		state->speed = SPEED_10;
+
+	state->an_complete = !!(gmac_stat & MVNETA_GMAC_AN_COMPLETE);
+	state->link = !!(gmac_stat & MVNETA_GMAC_LINK_UP);
+	state->duplex = !!(gmac_stat & MVNETA_GMAC_FULL_DUPLEX);
+
+	if (gmac_stat & MVNETA_GMAC_RX_FLOW_CTRL_ENABLE)
+		state->pause |= MLO_PAUSE_RX;
+	if (gmac_stat & MVNETA_GMAC_TX_FLOW_CTRL_ENABLE)
+		state->pause |= MLO_PAUSE_TX;
+}
+
+static int mvneta_pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode,
+			     phy_interface_t interface,
+			     const unsigned long *advertising,
+			     bool permit_pause_to_mac)
+{
+	struct mvneta_port *pp = mvneta_pcs_to_port(pcs);
+	u32 mask, val, an, old_an, changed;
+
+	mask = MVNETA_GMAC_INBAND_AN_ENABLE |
+	       MVNETA_GMAC_INBAND_RESTART_AN |
+	       MVNETA_GMAC_AN_SPEED_EN |
+	       MVNETA_GMAC_AN_FLOW_CTRL_EN |
+	       MVNETA_GMAC_AN_DUPLEX_EN;
+
+	if (neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED) {
+		mask |= MVNETA_GMAC_CONFIG_MII_SPEED |
+			MVNETA_GMAC_CONFIG_GMII_SPEED |
+			MVNETA_GMAC_CONFIG_FULL_DUPLEX;
+		val = MVNETA_GMAC_INBAND_AN_ENABLE;
+
+		if (interface == PHY_INTERFACE_MODE_SGMII) {
+			/* SGMII mode receives the speed and duplex from PHY */
+			val |= MVNETA_GMAC_AN_SPEED_EN |
+			       MVNETA_GMAC_AN_DUPLEX_EN;
+		} else {
+			/* 802.3z mode has fixed speed and duplex */
+			val |= MVNETA_GMAC_CONFIG_GMII_SPEED |
+			       MVNETA_GMAC_CONFIG_FULL_DUPLEX;
+
+			/* The FLOW_CTRL_EN bit selects either the hardware
+			 * automatically or the CONFIG_FLOW_CTRL manually
+			 * controls the GMAC pause mode.
+			 */
+			if (permit_pause_to_mac)
+				val |= MVNETA_GMAC_AN_FLOW_CTRL_EN;
+
+			/* Update the advertisement bits */
+			mask |= MVNETA_GMAC_ADVERT_SYM_FLOW_CTRL;
+			if (phylink_test(advertising, Pause))
+				val |= MVNETA_GMAC_ADVERT_SYM_FLOW_CTRL;
+		}
+	} else {
+		/* Phy or fixed speed - disable in-band AN modes */
+		val = 0;
+	}
+
+	old_an = an = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG);
+	an = (an & ~mask) | val;
+	changed = old_an ^ an;
+	if (changed)
+		mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, an);
+
+	/* We are only interested in the advertisement bits changing */
+	return !!(changed & MVNETA_GMAC_ADVERT_SYM_FLOW_CTRL);
+}
+
+static void mvneta_pcs_an_restart(struct phylink_pcs *pcs)
+{
+	struct mvneta_port *pp = mvneta_pcs_to_port(pcs);
+	u32 gmac_an = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG);
+
+	mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG,
+		    gmac_an | MVNETA_GMAC_INBAND_RESTART_AN);
+	mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG,
+		    gmac_an & ~MVNETA_GMAC_INBAND_RESTART_AN);
+}
+
+static const struct phylink_pcs_ops mvneta_phylink_pcs_ops = {
+	.pcs_inband_caps = mvneta_pcs_inband_caps,
+	.pcs_get_state = mvneta_pcs_get_state,
+	.pcs_config = mvneta_pcs_config,
+	.pcs_an_restart = mvneta_pcs_an_restart,
+};
+
+static struct phylink_pcs *mvneta_mac_select_pcs(struct phylink_config *config,
+						 phy_interface_t interface)
+{
+	struct net_device *ndev = to_net_dev(config->dev);
+	struct mvneta_port *pp = netdev_priv(ndev);
+
+	return &pp->phylink_pcs;
+}
 
-	/* Set addr in the device */
-	for (i = 0; i < ETH_ALEN; i++)
-		dev->dev_addr[i] = mac[i];
+static int mvneta_mac_prepare(struct phylink_config *config, unsigned int mode,
+			      phy_interface_t interface)
+{
+	struct net_device *ndev = to_net_dev(config->dev);
+	struct mvneta_port *pp = netdev_priv(ndev);
+	u32 val;
+
+	if (pp->phy_interface != interface ||
+	    phylink_autoneg_inband(mode)) {
+		/* Force the link down when changing the interface or if in
+		 * in-band mode. According to Armada 370 documentation, we
+		 * can only change the port mode and in-band enable when the
+		 * link is down.
+		 */
+		val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG);
+		val &= ~MVNETA_GMAC_FORCE_LINK_PASS;
+		val |= MVNETA_GMAC_FORCE_LINK_DOWN;
+		mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val);
+	}
+
+	if (pp->phy_interface != interface)
+		WARN_ON(phy_power_off(pp->comphy));
+
+	/* Enable the 1ms clock */
+	if (phylink_autoneg_inband(mode)) {
+		unsigned long rate = clk_get_rate(pp->clk);
+
+		mvreg_write(pp, MVNETA_GMAC_CLOCK_DIVIDER,
+			    MVNETA_GMAC_1MS_CLOCK_ENABLE | (rate / 1000));
+	}
 
 	return 0;
 }
 
-static void mvneta_adjust_link(struct net_device *ndev)
+static void mvneta_mac_config(struct phylink_config *config, unsigned int mode,
+			      const struct phylink_link_state *state)
 {
+	struct net_device *ndev = to_net_dev(config->dev);
 	struct mvneta_port *pp = netdev_priv(ndev);
-	struct phy_device *phydev = pp->phy_dev;
-	int status_change = 0;
+	u32 new_ctrl0, gmac_ctrl0 = mvreg_read(pp, MVNETA_GMAC_CTRL_0);
+	u32 new_ctrl2, gmac_ctrl2 = mvreg_read(pp, MVNETA_GMAC_CTRL_2);
+	u32 new_ctrl4, gmac_ctrl4 = mvreg_read(pp, MVNETA_GMAC_CTRL_4);
 
-	if (phydev->link) {
-		if ((pp->speed != phydev->speed) ||
-		    (pp->duplex != phydev->duplex)) {
-			u32 val;
+	new_ctrl0 = gmac_ctrl0 & ~MVNETA_GMAC0_PORT_1000BASE_X;
+	new_ctrl2 = gmac_ctrl2 & ~(MVNETA_GMAC2_INBAND_AN_ENABLE |
+				   MVNETA_GMAC2_PORT_RESET);
+	new_ctrl4 = gmac_ctrl4 & ~(MVNETA_GMAC4_SHORT_PREAMBLE_ENABLE);
+
+	/* Even though it might look weird, when we're configured in
+	 * SGMII or QSGMII mode, the RGMII bit needs to be set.
+	 */
+	new_ctrl2 |= MVNETA_GMAC2_PORT_RGMII;
+
+	if (state->interface == PHY_INTERFACE_MODE_QSGMII ||
+	    state->interface == PHY_INTERFACE_MODE_SGMII ||
+	    phy_interface_mode_is_8023z(state->interface))
+		new_ctrl2 |= MVNETA_GMAC2_PCS_ENABLE;
+
+	if (!phylink_autoneg_inband(mode)) {
+		/* Phy or fixed speed - nothing to do, leave the
+		 * configured speed, duplex and flow control as-is.
+		 */
+	} else if (state->interface == PHY_INTERFACE_MODE_SGMII) {
+		/* SGMII mode receives the state from the PHY */
+		new_ctrl2 |= MVNETA_GMAC2_INBAND_AN_ENABLE;
+	} else {
+		/* 802.3z negotiation - only 1000base-X */
+		new_ctrl0 |= MVNETA_GMAC0_PORT_1000BASE_X;
+	}
 
-			val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG);
-			val &= ~(MVNETA_GMAC_CONFIG_MII_SPEED |
-				 MVNETA_GMAC_CONFIG_GMII_SPEED |
-				 MVNETA_GMAC_CONFIG_FULL_DUPLEX);
+	/* When at 2.5G, the link partner can send frames with shortened
+	 * preambles.
+	 */
+	if (state->interface == PHY_INTERFACE_MODE_2500BASEX)
+		new_ctrl4 |= MVNETA_GMAC4_SHORT_PREAMBLE_ENABLE;
+
+	if (new_ctrl0 != gmac_ctrl0)
+		mvreg_write(pp, MVNETA_GMAC_CTRL_0, new_ctrl0);
+	if (new_ctrl2 != gmac_ctrl2)
+		mvreg_write(pp, MVNETA_GMAC_CTRL_2, new_ctrl2);
+	if (new_ctrl4 != gmac_ctrl4)
+		mvreg_write(pp, MVNETA_GMAC_CTRL_4, new_ctrl4);
+
+	if (gmac_ctrl2 & MVNETA_GMAC2_PORT_RESET) {
+		while ((mvreg_read(pp, MVNETA_GMAC_CTRL_2) &
+			MVNETA_GMAC2_PORT_RESET) != 0)
+			continue;
+	}
+}
 
-			if (phydev->duplex)
-				val |= MVNETA_GMAC_CONFIG_FULL_DUPLEX;
+static int mvneta_mac_finish(struct phylink_config *config, unsigned int mode,
+			     phy_interface_t interface)
+{
+	struct net_device *ndev = to_net_dev(config->dev);
+	struct mvneta_port *pp = netdev_priv(ndev);
+	u32 val, clk;
 
-			if (phydev->speed == SPEED_1000)
-				val |= MVNETA_GMAC_CONFIG_GMII_SPEED;
-			else
-				val |= MVNETA_GMAC_CONFIG_MII_SPEED;
+	/* Disable 1ms clock if not in in-band mode */
+	if (!phylink_autoneg_inband(mode)) {
+		clk = mvreg_read(pp, MVNETA_GMAC_CLOCK_DIVIDER);
+		clk &= ~MVNETA_GMAC_1MS_CLOCK_ENABLE;
+		mvreg_write(pp, MVNETA_GMAC_CLOCK_DIVIDER, clk);
+	}
 
-			mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val);
+	if (pp->phy_interface != interface)
+		/* Enable the Serdes PHY */
+		WARN_ON(mvneta_config_interface(pp, interface));
 
-			pp->duplex = phydev->duplex;
-			pp->speed  = phydev->speed;
-		}
+	/* Allow the link to come up if in in-band mode, otherwise the
+	 * link is forced via mac_link_down()/mac_link_up()
+	 */
+	if (phylink_autoneg_inband(mode)) {
+		val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG);
+		val &= ~MVNETA_GMAC_FORCE_LINK_DOWN;
+		mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val);
 	}
 
-	if (phydev->link != pp->link) {
-		if (!phydev->link) {
-			pp->duplex = -1;
-			pp->speed = 0;
-		}
+	return 0;
+}
 
-		pp->link = phydev->link;
-		status_change = 1;
+static void mvneta_mac_link_down(struct phylink_config *config,
+				 unsigned int mode, phy_interface_t interface)
+{
+	struct net_device *ndev = to_net_dev(config->dev);
+	struct mvneta_port *pp = netdev_priv(ndev);
+	u32 val;
+
+	mvneta_port_down(pp);
+
+	if (!phylink_autoneg_inband(mode)) {
+		val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG);
+		val &= ~MVNETA_GMAC_FORCE_LINK_PASS;
+		val |= MVNETA_GMAC_FORCE_LINK_DOWN;
+		mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val);
 	}
+}
 
-	if (status_change) {
-		if (phydev->link) {
-			u32 val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG);
-			val |= (MVNETA_GMAC_FORCE_LINK_PASS |
-				MVNETA_GMAC_FORCE_LINK_DOWN);
-			mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val);
-			mvneta_port_up(pp);
-			netdev_info(pp->dev, "link up\n");
-		} else {
-			mvneta_port_down(pp);
-			netdev_info(pp->dev, "link down\n");
-		}
+static void mvneta_mac_link_up(struct phylink_config *config,
+			       struct phy_device *phy,
+			       unsigned int mode, phy_interface_t interface,
+			       int speed, int duplex,
+			       bool tx_pause, bool rx_pause)
+{
+	struct net_device *ndev = to_net_dev(config->dev);
+	struct mvneta_port *pp = netdev_priv(ndev);
+	u32 val;
+
+	if (!phylink_autoneg_inband(mode)) {
+		val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG);
+		val &= ~(MVNETA_GMAC_FORCE_LINK_DOWN |
+			 MVNETA_GMAC_CONFIG_MII_SPEED |
+			 MVNETA_GMAC_CONFIG_GMII_SPEED |
+			 MVNETA_GMAC_CONFIG_FLOW_CTRL |
+			 MVNETA_GMAC_CONFIG_FULL_DUPLEX);
+		val |= MVNETA_GMAC_FORCE_LINK_PASS;
+
+		if (speed == SPEED_1000 || speed == SPEED_2500)
+			val |= MVNETA_GMAC_CONFIG_GMII_SPEED;
+		else if (speed == SPEED_100)
+			val |= MVNETA_GMAC_CONFIG_MII_SPEED;
+
+		if (duplex == DUPLEX_FULL)
+			val |= MVNETA_GMAC_CONFIG_FULL_DUPLEX;
+
+		if (tx_pause || rx_pause)
+			val |= MVNETA_GMAC_CONFIG_FLOW_CTRL;
+
+		mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val);
+	} else {
+		/* When inband doesn't cover flow control or flow control is
+		 * disabled, we need to manually configure it. This bit will
+		 * only have effect if MVNETA_GMAC_AN_FLOW_CTRL_EN is unset.
+		 */
+		val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG);
+		val &= ~MVNETA_GMAC_CONFIG_FLOW_CTRL;
+
+		if (tx_pause || rx_pause)
+			val |= MVNETA_GMAC_CONFIG_FLOW_CTRL;
+
+		mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val);
 	}
+
+	mvneta_port_up(pp);
 }
 
-static int mvneta_mdio_probe(struct mvneta_port *pp)
+static void mvneta_mac_disable_tx_lpi(struct phylink_config *config)
 {
-	struct phy_device *phy_dev;
+	struct mvneta_port *pp = netdev_priv(to_net_dev(config->dev));
+	u32 lpi1;
+
+	lpi1 = mvreg_read(pp, MVNETA_LPI_CTRL_1);
+	lpi1 &= ~(MVNETA_LPI_CTRL_1_REQUEST_ENABLE |
+		  MVNETA_LPI_CTRL_1_REQUEST_FORCE |
+		  MVNETA_LPI_CTRL_1_MANUAL_MODE);
+	mvreg_write(pp, MVNETA_LPI_CTRL_1, lpi1);
+}
 
-	phy_dev = of_phy_connect(pp->dev, pp->phy_node, mvneta_adjust_link, 0,
-				 pp->phy_interface);
-	if (!phy_dev) {
-		netdev_err(pp->dev, "could not find the PHY\n");
-		return -ENODEV;
+static int mvneta_mac_enable_tx_lpi(struct phylink_config *config, u32 timer,
+				    bool tx_clk_stop)
+{
+	struct mvneta_port *pp = netdev_priv(to_net_dev(config->dev));
+	u32 ts, tw, lpi0, lpi1, status;
+
+	status = mvreg_read(pp, MVNETA_GMAC_STATUS);
+	if (status & MVNETA_GMAC_SPEED_1000) {
+		/* At 1G speeds, the timer resolution are 1us, and
+		 * 802.3 says tw is 16.5us. Round up to 17us.
+		 */
+		tw = 17;
+		ts = timer;
+	} else {
+		/* At 100M speeds, the timer resolutions are 10us, and
+		 * 802.3 says tw is 30us.
+		 */
+		tw = 3;
+		ts = DIV_ROUND_UP(timer, 10);
 	}
 
-	phy_dev->supported &= PHY_GBIT_FEATURES;
-	phy_dev->advertising = phy_dev->supported;
+	if (ts > 255)
+		ts = 255;
+
+	/* Configure ts */
+	lpi0 = mvreg_read(pp, MVNETA_LPI_CTRL_0);
+	lpi0 = u32_replace_bits(lpi0, ts, MVNETA_LPI_CTRL_0_TS);
+	mvreg_write(pp, MVNETA_LPI_CTRL_0, lpi0);
 
-	pp->phy_dev = phy_dev;
-	pp->link    = 0;
-	pp->duplex  = 0;
-	pp->speed   = 0;
+	/* Configure tw and enable LPI generation */
+	lpi1 = mvreg_read(pp, MVNETA_LPI_CTRL_1);
+	lpi1 = u32_replace_bits(lpi1, tw, MVNETA_LPI_CTRL_1_TW);
+	lpi1 |= MVNETA_LPI_CTRL_1_REQUEST_ENABLE;
+	mvreg_write(pp, MVNETA_LPI_CTRL_1, lpi1);
 
 	return 0;
 }
 
+static const struct phylink_mac_ops mvneta_phylink_ops = {
+	.mac_select_pcs = mvneta_mac_select_pcs,
+	.mac_prepare = mvneta_mac_prepare,
+	.mac_config = mvneta_mac_config,
+	.mac_finish = mvneta_mac_finish,
+	.mac_link_down = mvneta_mac_link_down,
+	.mac_link_up = mvneta_mac_link_up,
+	.mac_disable_tx_lpi = mvneta_mac_disable_tx_lpi,
+	.mac_enable_tx_lpi = mvneta_mac_enable_tx_lpi,
+};
+
+static int mvneta_mdio_probe(struct mvneta_port *pp)
+{
+	struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL };
+	int err = phylink_of_phy_connect(pp->phylink, pp->dn, 0);
+
+	if (err)
+		netdev_err(pp->dev, "could not attach PHY: %d\n", err);
+
+	phylink_ethtool_get_wol(pp->phylink, &wol);
+	device_set_wakeup_capable(&pp->dev->dev, !!wol.supported);
+
+	/* PHY WoL may be enabled but device wakeup disabled */
+	if (wol.supported)
+		device_set_wakeup_enable(&pp->dev->dev, !!wol.wolopts);
+
+	return err;
+}
+
 static void mvneta_mdio_remove(struct mvneta_port *pp)
 {
-	phy_disconnect(pp->phy_dev);
-	pp->phy_dev = NULL;
+	phylink_disconnect_phy(pp->phylink);
+}
+
+/* Electing a CPU must be done in an atomic way: it should be done
+ * after or before the removal/insertion of a CPU and this function is
+ * not reentrant.
+ */
+static void mvneta_percpu_elect(struct mvneta_port *pp)
+{
+	int elected_cpu = 0, max_cpu, cpu;
+
+	/* Use the cpu associated to the rxq when it is online, in all
+	 * the other cases, use the cpu 0 which can't be offline.
+	 */
+	if (pp->rxq_def < nr_cpu_ids && cpu_online(pp->rxq_def))
+		elected_cpu = pp->rxq_def;
+
+	max_cpu = num_present_cpus();
+
+	for_each_online_cpu(cpu) {
+		int rxq_map = 0, txq_map = 0;
+		int rxq;
+
+		for (rxq = 0; rxq < rxq_number; rxq++)
+			if ((rxq % max_cpu) == cpu)
+				rxq_map |= MVNETA_CPU_RXQ_ACCESS(rxq);
+
+		if (cpu == elected_cpu)
+			/* Map the default receive queue to the elected CPU */
+			rxq_map |= MVNETA_CPU_RXQ_ACCESS(pp->rxq_def);
+
+		/* We update the TX queue map only if we have one
+		 * queue. In this case we associate the TX queue to
+		 * the CPU bound to the default RX queue
+		 */
+		if (txq_number == 1)
+			txq_map = (cpu == elected_cpu) ?
+				MVNETA_CPU_TXQ_ACCESS(0) : 0;
+		else
+			txq_map = mvreg_read(pp, MVNETA_CPU_MAP(cpu)) &
+				MVNETA_CPU_TXQ_ACCESS_ALL_MASK;
+
+		mvreg_write(pp, MVNETA_CPU_MAP(cpu), rxq_map | txq_map);
+
+		/* Update the interrupt mask on each CPU according the
+		 * new mapping
+		 */
+		smp_call_function_single(cpu, mvneta_percpu_unmask_interrupt,
+					 pp, true);
+	}
+};
+
+static int mvneta_cpu_online(unsigned int cpu, struct hlist_node *node)
+{
+	int other_cpu;
+	struct mvneta_port *pp = hlist_entry_safe(node, struct mvneta_port,
+						  node_online);
+	struct mvneta_pcpu_port *port = per_cpu_ptr(pp->ports, cpu);
+
+	/* Armada 3700's per-cpu interrupt for mvneta is broken, all interrupts
+	 * are routed to CPU 0, so we don't need all the cpu-hotplug support
+	 */
+	if (pp->neta_armada3700)
+		return 0;
+
+	netdev_lock(port->napi.dev);
+	spin_lock(&pp->lock);
+	/*
+	 * Configuring the driver for a new CPU while the driver is
+	 * stopping is racy, so just avoid it.
+	 */
+	if (pp->is_stopped) {
+		spin_unlock(&pp->lock);
+		netdev_unlock(port->napi.dev);
+		return 0;
+	}
+	netif_tx_stop_all_queues(pp->dev);
+
+	/*
+	 * We have to synchronise on tha napi of each CPU except the one
+	 * just being woken up
+	 */
+	for_each_online_cpu(other_cpu) {
+		if (other_cpu != cpu) {
+			struct mvneta_pcpu_port *other_port =
+				per_cpu_ptr(pp->ports, other_cpu);
+
+			napi_synchronize(&other_port->napi);
+		}
+	}
+
+	/* Mask all ethernet port interrupts */
+	on_each_cpu(mvneta_percpu_mask_interrupt, pp, true);
+	napi_enable_locked(&port->napi);
+
+	/*
+	 * Enable per-CPU interrupts on the CPU that is
+	 * brought up.
+	 */
+	mvneta_percpu_enable(pp);
+
+	/*
+	 * Enable per-CPU interrupt on the one CPU we care
+	 * about.
+	 */
+	mvneta_percpu_elect(pp);
+
+	/* Unmask all ethernet port interrupts */
+	on_each_cpu(mvneta_percpu_unmask_interrupt, pp, true);
+	mvreg_write(pp, MVNETA_INTR_MISC_MASK,
+		    MVNETA_CAUSE_PHY_STATUS_CHANGE |
+		    MVNETA_CAUSE_LINK_CHANGE);
+	netif_tx_start_all_queues(pp->dev);
+	spin_unlock(&pp->lock);
+	netdev_unlock(port->napi.dev);
+
+	return 0;
+}
+
+static int mvneta_cpu_down_prepare(unsigned int cpu, struct hlist_node *node)
+{
+	struct mvneta_port *pp = hlist_entry_safe(node, struct mvneta_port,
+						  node_online);
+	struct mvneta_pcpu_port *port = per_cpu_ptr(pp->ports, cpu);
+
+	/*
+	 * Thanks to this lock we are sure that any pending cpu election is
+	 * done.
+	 */
+	spin_lock(&pp->lock);
+	/* Mask all ethernet port interrupts */
+	on_each_cpu(mvneta_percpu_mask_interrupt, pp, true);
+	spin_unlock(&pp->lock);
+
+	napi_synchronize(&port->napi);
+	napi_disable(&port->napi);
+	/* Disable per-CPU interrupts on the CPU that is brought down. */
+	mvneta_percpu_disable(pp);
+	return 0;
+}
+
+static int mvneta_cpu_dead(unsigned int cpu, struct hlist_node *node)
+{
+	struct mvneta_port *pp = hlist_entry_safe(node, struct mvneta_port,
+						  node_dead);
+
+	/* Check if a new CPU must be elected now this on is down */
+	spin_lock(&pp->lock);
+	mvneta_percpu_elect(pp);
+	spin_unlock(&pp->lock);
+	/* Unmask all ethernet port interrupts */
+	on_each_cpu(mvneta_percpu_unmask_interrupt, pp, true);
+	mvreg_write(pp, MVNETA_INTR_MISC_MASK,
+		    MVNETA_CAUSE_PHY_STATUS_CHANGE |
+		    MVNETA_CAUSE_LINK_CHANGE);
+	netif_tx_start_all_queues(pp->dev);
+	return 0;
 }
 
 static int mvneta_open(struct net_device *dev)
@@ -2378,8 +4532,6 @@ static int mvneta_open(struct net_device *dev)
 	struct mvneta_port *pp = netdev_priv(dev);
 	int ret;
 
-	mvneta_mac_addr_set(pp, dev->dev_addr, rxq_def);
-
 	pp->pkt_size = MVNETA_RX_PKT_SIZE(pp->dev->mtu);
 
 	ret = mvneta_setup_rxqs(pp);
@@ -2391,28 +4543,63 @@ static int mvneta_open(struct net_device *dev)
 		goto err_cleanup_rxqs;
 
 	/* Connect to port interrupt line */
-	ret = request_irq(pp->dev->irq, mvneta_isr, 0,
-			  MVNETA_DRIVER_NAME, pp);
+	if (pp->neta_armada3700)
+		ret = request_irq(pp->dev->irq, mvneta_isr, 0,
+				  dev->name, pp);
+	else
+		ret = request_percpu_irq(pp->dev->irq, mvneta_percpu_isr,
+					 dev->name, pp->ports);
 	if (ret) {
 		netdev_err(pp->dev, "cannot request irq %d\n", pp->dev->irq);
 		goto err_cleanup_txqs;
 	}
 
-	/* In default link is down */
-	netif_carrier_off(pp->dev);
+	if (!pp->neta_armada3700) {
+		/* Enable per-CPU interrupt on all the CPU to handle our RX
+		 * queue interrupts
+		 */
+		on_each_cpu(mvneta_percpu_enable, pp, true);
+
+		pp->is_stopped = false;
+		/* Register a CPU notifier to handle the case where our CPU
+		 * might be taken offline.
+		 */
+		ret = cpuhp_state_add_instance_nocalls(online_hpstate,
+						       &pp->node_online);
+		if (ret)
+			goto err_free_irq;
+
+		ret = cpuhp_state_add_instance_nocalls(CPUHP_NET_MVNETA_DEAD,
+						       &pp->node_dead);
+		if (ret)
+			goto err_free_online_hp;
+	}
 
 	ret = mvneta_mdio_probe(pp);
 	if (ret < 0) {
 		netdev_err(dev, "cannot probe MDIO bus\n");
-		goto err_free_irq;
+		goto err_free_dead_hp;
 	}
 
 	mvneta_start_dev(pp);
 
 	return 0;
 
+err_free_dead_hp:
+	if (!pp->neta_armada3700)
+		cpuhp_state_remove_instance_nocalls(CPUHP_NET_MVNETA_DEAD,
+						    &pp->node_dead);
+err_free_online_hp:
+	if (!pp->neta_armada3700)
+		cpuhp_state_remove_instance_nocalls(online_hpstate,
+						    &pp->node_online);
 err_free_irq:
-	free_irq(pp->dev->irq, pp);
+	if (pp->neta_armada3700) {
+		free_irq(pp->dev->irq, pp);
+	} else {
+		on_each_cpu(mvneta_percpu_disable, pp, true);
+		free_percpu_irq(pp->dev->irq, pp->ports);
+	}
 err_cleanup_txqs:
 	mvneta_cleanup_txqs(pp);
 err_cleanup_rxqs:
@@ -2425,44 +4612,122 @@ static int mvneta_stop(struct net_device *dev)
 {
 	struct mvneta_port *pp = netdev_priv(dev);
 
-	mvneta_stop_dev(pp);
-	mvneta_mdio_remove(pp);
-	free_irq(dev->irq, pp);
+	if (!pp->neta_armada3700) {
+		/* Inform that we are stopping so we don't want to setup the
+		 * driver for new CPUs in the notifiers. The code of the
+		 * notifier for CPU online is protected by the same spinlock,
+		 * so when we get the lock, the notifier work is done.
+		 */
+		spin_lock(&pp->lock);
+		pp->is_stopped = true;
+		spin_unlock(&pp->lock);
+
+		mvneta_stop_dev(pp);
+		mvneta_mdio_remove(pp);
+
+		cpuhp_state_remove_instance_nocalls(online_hpstate,
+						    &pp->node_online);
+		cpuhp_state_remove_instance_nocalls(CPUHP_NET_MVNETA_DEAD,
+						    &pp->node_dead);
+		on_each_cpu(mvneta_percpu_disable, pp, true);
+		free_percpu_irq(dev->irq, pp->ports);
+	} else {
+		mvneta_stop_dev(pp);
+		mvneta_mdio_remove(pp);
+		free_irq(dev->irq, pp);
+	}
+
 	mvneta_cleanup_rxqs(pp);
 	mvneta_cleanup_txqs(pp);
-	del_timer(&pp->tx_done_timer);
-	clear_bit(MVNETA_F_TX_DONE_TIMER_BIT, &pp->flags);
 
 	return 0;
 }
 
-/* Ethtool methods */
+static int mvneta_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+	struct mvneta_port *pp = netdev_priv(dev);
+
+	return phylink_mii_ioctl(pp->phylink, ifr, cmd);
+}
 
-/* Get settings (phy address, speed) for ethtools */
-int mvneta_ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int mvneta_xdp_setup(struct net_device *dev, struct bpf_prog *prog,
+			    struct netlink_ext_ack *extack)
 {
+	bool need_update, running = netif_running(dev);
 	struct mvneta_port *pp = netdev_priv(dev);
+	struct bpf_prog *old_prog;
 
-	if (!pp->phy_dev)
-		return -ENODEV;
+	if (prog && !prog->aux->xdp_has_frags &&
+	    dev->mtu > MVNETA_MAX_RX_BUF_SIZE) {
+		NL_SET_ERR_MSG_MOD(extack, "prog does not support XDP frags");
+		return -EOPNOTSUPP;
+	}
+
+	if (pp->bm_priv) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Hardware Buffer Management not supported on XDP");
+		return -EOPNOTSUPP;
+	}
+
+	need_update = !!pp->xdp_prog != !!prog;
+	if (running && need_update)
+		mvneta_stop(dev);
 
-	return phy_ethtool_gset(pp->phy_dev, cmd);
+	old_prog = xchg(&pp->xdp_prog, prog);
+	if (old_prog)
+		bpf_prog_put(old_prog);
+
+	if (running && need_update)
+		return mvneta_open(dev);
+
+	return 0;
 }
 
-/* Set settings (phy address, speed) for ethtools */
-int mvneta_ethtool_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int mvneta_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 {
-	struct mvneta_port *pp = netdev_priv(dev);
+	switch (xdp->command) {
+	case XDP_SETUP_PROG:
+		return mvneta_xdp_setup(dev, xdp->prog, xdp->extack);
+	default:
+		return -EINVAL;
+	}
+}
+
+/* Ethtool methods */
+
+/* Set link ksettings (phy address, speed) for ethtools */
+static int
+mvneta_ethtool_set_link_ksettings(struct net_device *ndev,
+				  const struct ethtool_link_ksettings *cmd)
+{
+	struct mvneta_port *pp = netdev_priv(ndev);
+
+	return phylink_ethtool_ksettings_set(pp->phylink, cmd);
+}
 
-	if (!pp->phy_dev)
-		return -ENODEV;
+/* Get link ksettings for ethtools */
+static int
+mvneta_ethtool_get_link_ksettings(struct net_device *ndev,
+				  struct ethtool_link_ksettings *cmd)
+{
+	struct mvneta_port *pp = netdev_priv(ndev);
 
-	return phy_ethtool_sset(pp->phy_dev, cmd);
+	return phylink_ethtool_ksettings_get(pp->phylink, cmd);
+}
+
+static int mvneta_ethtool_nway_reset(struct net_device *dev)
+{
+	struct mvneta_port *pp = netdev_priv(dev);
+
+	return phylink_ethtool_nway_reset(pp->phylink);
 }
 
 /* Set interrupt coalescing for ethtools */
-static int mvneta_ethtool_set_coalesce(struct net_device *dev,
-				       struct ethtool_coalesce *c)
+static int
+mvneta_ethtool_set_coalesce(struct net_device *dev,
+			    struct ethtool_coalesce *c,
+			    struct kernel_ethtool_coalesce *kernel_coal,
+			    struct netlink_ext_ack *extack)
 {
 	struct mvneta_port *pp = netdev_priv(dev);
 	int queue;
@@ -2485,8 +4750,11 @@ static int mvneta_ethtool_set_coalesce(struct net_device *dev,
 }
 
 /* get coalescing for ethtools */
-static int mvneta_ethtool_get_coalesce(struct net_device *dev,
-				       struct ethtool_coalesce *c)
+static int
+mvneta_ethtool_get_coalesce(struct net_device *dev,
+			    struct ethtool_coalesce *c,
+			    struct kernel_ethtool_coalesce *kernel_coal,
+			    struct netlink_ext_ack *extack)
 {
 	struct mvneta_port *pp = netdev_priv(dev);
 
@@ -2501,17 +4769,20 @@ static int mvneta_ethtool_get_coalesce(struct net_device *dev,
 static void mvneta_ethtool_get_drvinfo(struct net_device *dev,
 				    struct ethtool_drvinfo *drvinfo)
 {
-	strlcpy(drvinfo->driver, MVNETA_DRIVER_NAME,
+	strscpy(drvinfo->driver, MVNETA_DRIVER_NAME,
 		sizeof(drvinfo->driver));
-	strlcpy(drvinfo->version, MVNETA_DRIVER_VERSION,
+	strscpy(drvinfo->version, MVNETA_DRIVER_VERSION,
 		sizeof(drvinfo->version));
-	strlcpy(drvinfo->bus_info, dev_name(&dev->dev),
+	strscpy(drvinfo->bus_info, dev_name(&dev->dev),
 		sizeof(drvinfo->bus_info));
 }
 
 
-static void mvneta_ethtool_get_ringparam(struct net_device *netdev,
-					 struct ethtool_ringparam *ring)
+static void
+mvneta_ethtool_get_ringparam(struct net_device *netdev,
+			     struct ethtool_ringparam *ring,
+			     struct kernel_ethtool_ringparam *kernel_ring,
+			     struct netlink_ext_ack *extack)
 {
 	struct mvneta_port *pp = netdev_priv(netdev);
 
@@ -2521,8 +4792,11 @@ static void mvneta_ethtool_get_ringparam(struct net_device *netdev,
 	ring->tx_pending = pp->tx_ring_size;
 }
 
-static int mvneta_ethtool_set_ringparam(struct net_device *dev,
-					struct ethtool_ringparam *ring)
+static int
+mvneta_ethtool_set_ringparam(struct net_device *dev,
+			     struct ethtool_ringparam *ring,
+			     struct kernel_ethtool_ringparam *kernel_ring,
+			     struct netlink_ext_ack *extack)
 {
 	struct mvneta_port *pp = netdev_priv(dev);
 
@@ -2530,8 +4804,12 @@ static int mvneta_ethtool_set_ringparam(struct net_device *dev,
 		return -EINVAL;
 	pp->rx_ring_size = ring->rx_pending < MVNETA_MAX_RXD ?
 		ring->rx_pending : MVNETA_MAX_RXD;
-	pp->tx_ring_size = ring->tx_pending < MVNETA_MAX_TXD ?
-		ring->tx_pending : MVNETA_MAX_TXD;
+
+	pp->tx_ring_size = clamp_t(u16, ring->tx_pending,
+				   MVNETA_MAX_SKB_DESCS * 2, MVNETA_MAX_TXD);
+	if (pp->tx_ring_size != ring->tx_pending)
+		netdev_warn(dev, "TX queue size set to %u (requested %u)\n",
+			    pp->tx_ring_size, ring->tx_pending);
 
 	if (netif_running(dev)) {
 		mvneta_stop(dev);
@@ -2545,6 +4823,500 @@ static int mvneta_ethtool_set_ringparam(struct net_device *dev,
 	return 0;
 }
 
+static void mvneta_ethtool_get_pauseparam(struct net_device *dev,
+					  struct ethtool_pauseparam *pause)
+{
+	struct mvneta_port *pp = netdev_priv(dev);
+
+	phylink_ethtool_get_pauseparam(pp->phylink, pause);
+}
+
+static int mvneta_ethtool_set_pauseparam(struct net_device *dev,
+					 struct ethtool_pauseparam *pause)
+{
+	struct mvneta_port *pp = netdev_priv(dev);
+
+	return phylink_ethtool_set_pauseparam(pp->phylink, pause);
+}
+
+static void mvneta_ethtool_get_strings(struct net_device *netdev, u32 sset,
+				       u8 *data)
+{
+	if (sset == ETH_SS_STATS) {
+		struct mvneta_port *pp = netdev_priv(netdev);
+		int i;
+
+		for (i = 0; i < ARRAY_SIZE(mvneta_statistics); i++)
+			ethtool_puts(&data, mvneta_statistics[i].name);
+
+		if (!pp->bm_priv) {
+			page_pool_ethtool_stats_get_strings(data);
+		}
+	}
+}
+
+static void
+mvneta_ethtool_update_pcpu_stats(struct mvneta_port *pp,
+				 struct mvneta_ethtool_stats *es)
+{
+	unsigned int start;
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct mvneta_pcpu_stats *stats;
+		u64 skb_alloc_error;
+		u64 refill_error;
+		u64 xdp_redirect;
+		u64 xdp_xmit_err;
+		u64 xdp_tx_err;
+		u64 xdp_pass;
+		u64 xdp_drop;
+		u64 xdp_xmit;
+		u64 xdp_tx;
+
+		stats = per_cpu_ptr(pp->stats, cpu);
+		do {
+			start = u64_stats_fetch_begin(&stats->syncp);
+			skb_alloc_error = stats->es.skb_alloc_error;
+			refill_error = stats->es.refill_error;
+			xdp_redirect = stats->es.ps.xdp_redirect;
+			xdp_pass = stats->es.ps.xdp_pass;
+			xdp_drop = stats->es.ps.xdp_drop;
+			xdp_xmit = stats->es.ps.xdp_xmit;
+			xdp_xmit_err = stats->es.ps.xdp_xmit_err;
+			xdp_tx = stats->es.ps.xdp_tx;
+			xdp_tx_err = stats->es.ps.xdp_tx_err;
+		} while (u64_stats_fetch_retry(&stats->syncp, start));
+
+		es->skb_alloc_error += skb_alloc_error;
+		es->refill_error += refill_error;
+		es->ps.xdp_redirect += xdp_redirect;
+		es->ps.xdp_pass += xdp_pass;
+		es->ps.xdp_drop += xdp_drop;
+		es->ps.xdp_xmit += xdp_xmit;
+		es->ps.xdp_xmit_err += xdp_xmit_err;
+		es->ps.xdp_tx += xdp_tx;
+		es->ps.xdp_tx_err += xdp_tx_err;
+	}
+}
+
+static void mvneta_ethtool_update_stats(struct mvneta_port *pp)
+{
+	struct mvneta_ethtool_stats stats = {};
+	const struct mvneta_statistic *s;
+	void __iomem *base = pp->base;
+	u32 high, low;
+	u64 val;
+	int i;
+
+	mvneta_ethtool_update_pcpu_stats(pp, &stats);
+	for (i = 0, s = mvneta_statistics;
+	     s < mvneta_statistics + ARRAY_SIZE(mvneta_statistics);
+	     s++, i++) {
+		switch (s->type) {
+		case T_REG_32:
+			val = readl_relaxed(base + s->offset);
+			pp->ethtool_stats[i] += val;
+			break;
+		case T_REG_64:
+			/* Docs say to read low 32-bit then high */
+			low = readl_relaxed(base + s->offset);
+			high = readl_relaxed(base + s->offset + 4);
+			val = (u64)high << 32 | low;
+			pp->ethtool_stats[i] += val;
+			break;
+		case T_SW:
+			switch (s->offset) {
+			case ETHTOOL_STAT_EEE_WAKEUP:
+				val = phylink_get_eee_err(pp->phylink);
+				pp->ethtool_stats[i] += val;
+				break;
+			case ETHTOOL_STAT_SKB_ALLOC_ERR:
+				pp->ethtool_stats[i] = stats.skb_alloc_error;
+				break;
+			case ETHTOOL_STAT_REFILL_ERR:
+				pp->ethtool_stats[i] = stats.refill_error;
+				break;
+			case ETHTOOL_XDP_REDIRECT:
+				pp->ethtool_stats[i] = stats.ps.xdp_redirect;
+				break;
+			case ETHTOOL_XDP_PASS:
+				pp->ethtool_stats[i] = stats.ps.xdp_pass;
+				break;
+			case ETHTOOL_XDP_DROP:
+				pp->ethtool_stats[i] = stats.ps.xdp_drop;
+				break;
+			case ETHTOOL_XDP_TX:
+				pp->ethtool_stats[i] = stats.ps.xdp_tx;
+				break;
+			case ETHTOOL_XDP_TX_ERR:
+				pp->ethtool_stats[i] = stats.ps.xdp_tx_err;
+				break;
+			case ETHTOOL_XDP_XMIT:
+				pp->ethtool_stats[i] = stats.ps.xdp_xmit;
+				break;
+			case ETHTOOL_XDP_XMIT_ERR:
+				pp->ethtool_stats[i] = stats.ps.xdp_xmit_err;
+				break;
+			}
+			break;
+		}
+	}
+}
+
+static void mvneta_ethtool_pp_stats(struct mvneta_port *pp, u64 *data)
+{
+	struct page_pool_stats stats = {};
+	int i;
+
+	for (i = 0; i < rxq_number; i++) {
+		if (pp->rxqs[i].page_pool)
+			page_pool_get_stats(pp->rxqs[i].page_pool, &stats);
+	}
+
+	page_pool_ethtool_stats_get(data, &stats);
+}
+
+static void mvneta_ethtool_get_stats(struct net_device *dev,
+				     struct ethtool_stats *stats, u64 *data)
+{
+	struct mvneta_port *pp = netdev_priv(dev);
+	int i;
+
+	mvneta_ethtool_update_stats(pp);
+
+	for (i = 0; i < ARRAY_SIZE(mvneta_statistics); i++)
+		*data++ = pp->ethtool_stats[i];
+
+	if (!pp->bm_priv)
+		mvneta_ethtool_pp_stats(pp, data);
+}
+
+static int mvneta_ethtool_get_sset_count(struct net_device *dev, int sset)
+{
+	if (sset == ETH_SS_STATS) {
+		int count = ARRAY_SIZE(mvneta_statistics);
+		struct mvneta_port *pp = netdev_priv(dev);
+
+		if (!pp->bm_priv)
+			count += page_pool_ethtool_stats_get_count();
+
+		return count;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static u32 mvneta_ethtool_get_rxfh_indir_size(struct net_device *dev)
+{
+	return MVNETA_RSS_LU_TABLE_SIZE;
+}
+
+static u32 mvneta_ethtool_get_rx_ring_count(struct net_device *dev)
+{
+	return rxq_number;
+}
+
+static int  mvneta_config_rss(struct mvneta_port *pp)
+{
+	int cpu;
+	u32 val;
+
+	netif_tx_stop_all_queues(pp->dev);
+
+	on_each_cpu(mvneta_percpu_mask_interrupt, pp, true);
+
+	if (!pp->neta_armada3700) {
+		/* We have to synchronise on the napi of each CPU */
+		for_each_online_cpu(cpu) {
+			struct mvneta_pcpu_port *pcpu_port =
+				per_cpu_ptr(pp->ports, cpu);
+
+			napi_synchronize(&pcpu_port->napi);
+			napi_disable(&pcpu_port->napi);
+		}
+	} else {
+		napi_synchronize(&pp->napi);
+		napi_disable(&pp->napi);
+	}
+
+	pp->rxq_def = pp->indir[0];
+
+	/* Update unicast mapping */
+	mvneta_set_rx_mode(pp->dev);
+
+	/* Update val of portCfg register accordingly with all RxQueue types */
+	val = MVNETA_PORT_CONFIG_DEFL_VALUE(pp->rxq_def);
+	mvreg_write(pp, MVNETA_PORT_CONFIG, val);
+
+	/* Update the elected CPU matching the new rxq_def */
+	spin_lock(&pp->lock);
+	mvneta_percpu_elect(pp);
+	spin_unlock(&pp->lock);
+
+	if (!pp->neta_armada3700) {
+		/* We have to synchronise on the napi of each CPU */
+		for_each_online_cpu(cpu) {
+			struct mvneta_pcpu_port *pcpu_port =
+				per_cpu_ptr(pp->ports, cpu);
+
+			napi_enable(&pcpu_port->napi);
+		}
+	} else {
+		napi_enable(&pp->napi);
+	}
+
+	netif_tx_start_all_queues(pp->dev);
+
+	return 0;
+}
+
+static int mvneta_ethtool_set_rxfh(struct net_device *dev,
+				   struct ethtool_rxfh_param *rxfh,
+				   struct netlink_ext_ack *extack)
+{
+	struct mvneta_port *pp = netdev_priv(dev);
+
+	/* Current code for Armada 3700 doesn't support RSS features yet */
+	if (pp->neta_armada3700)
+		return -EOPNOTSUPP;
+
+	/* We require at least one supported parameter to be changed
+	 * and no change in any of the unsupported parameters
+	 */
+	if (rxfh->key ||
+	    (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE &&
+	     rxfh->hfunc != ETH_RSS_HASH_TOP))
+		return -EOPNOTSUPP;
+
+	if (!rxfh->indir)
+		return 0;
+
+	memcpy(pp->indir, rxfh->indir, MVNETA_RSS_LU_TABLE_SIZE);
+
+	return mvneta_config_rss(pp);
+}
+
+static int mvneta_ethtool_get_rxfh(struct net_device *dev,
+				   struct ethtool_rxfh_param *rxfh)
+{
+	struct mvneta_port *pp = netdev_priv(dev);
+
+	/* Current code for Armada 3700 doesn't support RSS features yet */
+	if (pp->neta_armada3700)
+		return -EOPNOTSUPP;
+
+	rxfh->hfunc = ETH_RSS_HASH_TOP;
+
+	if (!rxfh->indir)
+		return 0;
+
+	memcpy(rxfh->indir, pp->indir, MVNETA_RSS_LU_TABLE_SIZE);
+
+	return 0;
+}
+
+static void mvneta_ethtool_get_wol(struct net_device *dev,
+				   struct ethtool_wolinfo *wol)
+{
+	struct mvneta_port *pp = netdev_priv(dev);
+
+	phylink_ethtool_get_wol(pp->phylink, wol);
+}
+
+static int mvneta_ethtool_set_wol(struct net_device *dev,
+				  struct ethtool_wolinfo *wol)
+{
+	struct mvneta_port *pp = netdev_priv(dev);
+	int ret;
+
+	ret = phylink_ethtool_set_wol(pp->phylink, wol);
+	if (!ret)
+		device_set_wakeup_enable(&dev->dev, !!wol->wolopts);
+
+	return ret;
+}
+
+static int mvneta_ethtool_get_eee(struct net_device *dev,
+				  struct ethtool_keee *eee)
+{
+	struct mvneta_port *pp = netdev_priv(dev);
+
+	return phylink_ethtool_get_eee(pp->phylink, eee);
+}
+
+static int mvneta_ethtool_set_eee(struct net_device *dev,
+				  struct ethtool_keee *eee)
+{
+	struct mvneta_port *pp = netdev_priv(dev);
+
+	/* The Armada 37x documents do not give limits for this other than
+	 * it being an 8-bit register.
+	 */
+	if (eee->tx_lpi_enabled && eee->tx_lpi_timer > 255)
+		return -EINVAL;
+
+	return phylink_ethtool_set_eee(pp->phylink, eee);
+}
+
+static void mvneta_clear_rx_prio_map(struct mvneta_port *pp)
+{
+	mvreg_write(pp, MVNETA_VLAN_PRIO_TO_RXQ, 0);
+}
+
+static void mvneta_map_vlan_prio_to_rxq(struct mvneta_port *pp, u8 pri, u8 rxq)
+{
+	u32 val = mvreg_read(pp, MVNETA_VLAN_PRIO_TO_RXQ);
+
+	val &= ~MVNETA_VLAN_PRIO_RXQ_MAP(pri, 0x7);
+	val |= MVNETA_VLAN_PRIO_RXQ_MAP(pri, rxq);
+
+	mvreg_write(pp, MVNETA_VLAN_PRIO_TO_RXQ, val);
+}
+
+static int mvneta_enable_per_queue_rate_limit(struct mvneta_port *pp)
+{
+	unsigned long core_clk_rate;
+	u32 refill_cycles;
+	u32 val;
+
+	core_clk_rate = clk_get_rate(pp->clk);
+	if (!core_clk_rate)
+		return -EINVAL;
+
+	refill_cycles = MVNETA_TXQ_BUCKET_REFILL_BASE_PERIOD_NS /
+			(NSEC_PER_SEC / core_clk_rate);
+
+	if (refill_cycles > MVNETA_REFILL_MAX_NUM_CLK)
+		return -EINVAL;
+
+	/* Enable bw limit algorithm version 3 */
+	val = mvreg_read(pp, MVNETA_TXQ_CMD1_REG);
+	val &= ~(MVNETA_TXQ_CMD1_BW_LIM_SEL_V1 | MVNETA_TXQ_CMD1_BW_LIM_EN);
+	mvreg_write(pp, MVNETA_TXQ_CMD1_REG, val);
+
+	/* Set the base refill rate */
+	mvreg_write(pp, MVNETA_REFILL_NUM_CLK_REG, refill_cycles);
+
+	return 0;
+}
+
+static void mvneta_disable_per_queue_rate_limit(struct mvneta_port *pp)
+{
+	u32 val = mvreg_read(pp, MVNETA_TXQ_CMD1_REG);
+
+	val |= (MVNETA_TXQ_CMD1_BW_LIM_SEL_V1 | MVNETA_TXQ_CMD1_BW_LIM_EN);
+	mvreg_write(pp, MVNETA_TXQ_CMD1_REG, val);
+}
+
+static int mvneta_setup_queue_rates(struct mvneta_port *pp, int queue,
+				    u64 min_rate, u64 max_rate)
+{
+	u32 refill_val, rem;
+	u32 val = 0;
+
+	/* Convert to from Bps to bps */
+	max_rate *= 8;
+
+	if (min_rate)
+		return -EINVAL;
+
+	refill_val = div_u64_rem(max_rate, MVNETA_TXQ_RATE_LIMIT_RESOLUTION,
+				 &rem);
+
+	if (rem || !refill_val ||
+	    refill_val > MVNETA_TXQ_BUCKET_REFILL_VALUE_MAX)
+		return -EINVAL;
+
+	val = refill_val;
+	val |= (MVNETA_TXQ_BUCKET_REFILL_PERIOD <<
+		MVNETA_TXQ_BUCKET_REFILL_PERIOD_SHIFT);
+
+	mvreg_write(pp, MVNETA_TXQ_BUCKET_REFILL_REG(queue), val);
+
+	return 0;
+}
+
+static int mvneta_setup_mqprio(struct net_device *dev,
+			       struct tc_mqprio_qopt_offload *mqprio)
+{
+	struct mvneta_port *pp = netdev_priv(dev);
+	int rxq, txq, tc, ret;
+	u8 num_tc;
+
+	if (mqprio->qopt.hw != TC_MQPRIO_HW_OFFLOAD_TCS)
+		return 0;
+
+	num_tc = mqprio->qopt.num_tc;
+
+	if (num_tc > rxq_number)
+		return -EINVAL;
+
+	mvneta_clear_rx_prio_map(pp);
+
+	if (!num_tc) {
+		mvneta_disable_per_queue_rate_limit(pp);
+		netdev_reset_tc(dev);
+		return 0;
+	}
+
+	netdev_set_num_tc(dev, mqprio->qopt.num_tc);
+
+	for (tc = 0; tc < mqprio->qopt.num_tc; tc++) {
+		netdev_set_tc_queue(dev, tc, mqprio->qopt.count[tc],
+				    mqprio->qopt.offset[tc]);
+
+		for (rxq = mqprio->qopt.offset[tc];
+		     rxq < mqprio->qopt.count[tc] + mqprio->qopt.offset[tc];
+		     rxq++) {
+			if (rxq >= rxq_number)
+				return -EINVAL;
+
+			mvneta_map_vlan_prio_to_rxq(pp, tc, rxq);
+		}
+	}
+
+	if (mqprio->shaper != TC_MQPRIO_SHAPER_BW_RATE) {
+		mvneta_disable_per_queue_rate_limit(pp);
+		return 0;
+	}
+
+	if (mqprio->qopt.num_tc > txq_number)
+		return -EINVAL;
+
+	ret = mvneta_enable_per_queue_rate_limit(pp);
+	if (ret)
+		return ret;
+
+	for (tc = 0; tc < mqprio->qopt.num_tc; tc++) {
+		for (txq = mqprio->qopt.offset[tc];
+		     txq < mqprio->qopt.count[tc] + mqprio->qopt.offset[tc];
+		     txq++) {
+			if (txq >= txq_number)
+				return -EINVAL;
+
+			ret = mvneta_setup_queue_rates(pp, txq,
+						       mqprio->min_rate[tc],
+						       mqprio->max_rate[tc]);
+			if (ret)
+				return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int mvneta_setup_tc(struct net_device *dev, enum tc_setup_type type,
+			   void *type_data)
+{
+	switch (type) {
+	case TC_SETUP_QDISC_MQPRIO:
+		return mvneta_setup_mqprio(dev, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static const struct net_device_ops mvneta_netdev_ops = {
 	.ndo_open            = mvneta_open,
 	.ndo_stop            = mvneta_stop,
@@ -2552,23 +5324,44 @@ static const struct net_device_ops mvneta_netdev_ops = {
 	.ndo_set_rx_mode     = mvneta_set_rx_mode,
 	.ndo_set_mac_address = mvneta_set_mac_addr,
 	.ndo_change_mtu      = mvneta_change_mtu,
-	.ndo_tx_timeout      = mvneta_tx_timeout,
+	.ndo_fix_features    = mvneta_fix_features,
 	.ndo_get_stats64     = mvneta_get_stats64,
+	.ndo_eth_ioctl        = mvneta_ioctl,
+	.ndo_bpf	     = mvneta_xdp,
+	.ndo_xdp_xmit        = mvneta_xdp_xmit,
+	.ndo_setup_tc	     = mvneta_setup_tc,
 };
 
-const struct ethtool_ops mvneta_eth_tool_ops = {
+static const struct ethtool_ops mvneta_eth_tool_ops = {
+	.supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS |
+				     ETHTOOL_COALESCE_MAX_FRAMES,
+	.nway_reset	= mvneta_ethtool_nway_reset,
 	.get_link       = ethtool_op_get_link,
-	.get_settings   = mvneta_ethtool_get_settings,
-	.set_settings   = mvneta_ethtool_set_settings,
 	.set_coalesce   = mvneta_ethtool_set_coalesce,
 	.get_coalesce   = mvneta_ethtool_get_coalesce,
 	.get_drvinfo    = mvneta_ethtool_get_drvinfo,
 	.get_ringparam  = mvneta_ethtool_get_ringparam,
 	.set_ringparam	= mvneta_ethtool_set_ringparam,
+	.get_pauseparam	= mvneta_ethtool_get_pauseparam,
+	.set_pauseparam	= mvneta_ethtool_set_pauseparam,
+	.get_strings	= mvneta_ethtool_get_strings,
+	.get_ethtool_stats = mvneta_ethtool_get_stats,
+	.get_sset_count	= mvneta_ethtool_get_sset_count,
+	.get_rxfh_indir_size = mvneta_ethtool_get_rxfh_indir_size,
+	.get_rx_ring_count = mvneta_ethtool_get_rx_ring_count,
+	.get_rxfh	= mvneta_ethtool_get_rxfh,
+	.set_rxfh	= mvneta_ethtool_set_rxfh,
+	.get_link_ksettings = mvneta_ethtool_get_link_ksettings,
+	.set_link_ksettings = mvneta_ethtool_set_link_ksettings,
+	.get_wol        = mvneta_ethtool_get_wol,
+	.set_wol        = mvneta_ethtool_set_wol,
+	.get_ts_info	= ethtool_op_get_ts_info,
+	.get_eee	= mvneta_ethtool_get_eee,
+	.set_eee	= mvneta_ethtool_set_eee,
 };
 
 /* Initialize hw */
-static int mvneta_init(struct mvneta_port *pp, int phy_addr)
+static int mvneta_init(struct device *dev, struct mvneta_port *pp)
 {
 	int queue;
 
@@ -2578,8 +5371,7 @@ static int mvneta_init(struct mvneta_port *pp, int phy_addr)
 	/* Set port default values */
 	mvneta_defaults_set(pp);
 
-	pp->txqs = kzalloc(txq_number * sizeof(struct mvneta_tx_queue),
-			   GFP_KERNEL);
+	pp->txqs = devm_kcalloc(dev, txq_number, sizeof(*pp->txqs), GFP_KERNEL);
 	if (!pp->txqs)
 		return -ENOMEM;
 
@@ -2591,12 +5383,9 @@ static int mvneta_init(struct mvneta_port *pp, int phy_addr)
 		txq->done_pkts_coal = MVNETA_TXDONE_COAL_PKTS;
 	}
 
-	pp->rxqs = kzalloc(rxq_number * sizeof(struct mvneta_rx_queue),
-			   GFP_KERNEL);
-	if (!pp->rxqs) {
-		kfree(pp->txqs);
+	pp->rxqs = devm_kcalloc(dev, rxq_number, sizeof(*pp->rxqs), GFP_KERNEL);
+	if (!pp->rxqs)
 		return -ENOMEM;
-	}
 
 	/* Create Rx descriptor rings */
 	for (queue = 0; queue < rxq_number; queue++) {
@@ -2605,17 +5394,18 @@ static int mvneta_init(struct mvneta_port *pp, int phy_addr)
 		rxq->size = pp->rx_ring_size;
 		rxq->pkts_coal = MVNETA_RX_COAL_PKTS;
 		rxq->time_coal = MVNETA_RX_COAL_USEC;
+		rxq->buf_virt_addr
+			= devm_kmalloc_array(pp->dev->dev.parent,
+					     rxq->size,
+					     sizeof(*rxq->buf_virt_addr),
+					     GFP_KERNEL);
+		if (!rxq->buf_virt_addr)
+			return -ENOMEM;
 	}
 
 	return 0;
 }
 
-static void mvneta_deinit(struct mvneta_port *pp)
-{
-	kfree(pp->txqs);
-	kfree(pp->rxqs);
-}
-
 /* platform glue : initialize decoding windows */
 static void mvneta_conf_mbus_windows(struct mvneta_port *pp,
 				     const struct mbus_dram_target_info *dram)
@@ -2635,137 +5425,237 @@ static void mvneta_conf_mbus_windows(struct mvneta_port *pp,
 	win_enable = 0x3f;
 	win_protect = 0;
 
-	for (i = 0; i < dram->num_cs; i++) {
-		const struct mbus_dram_window *cs = dram->cs + i;
-		mvreg_write(pp, MVNETA_WIN_BASE(i), (cs->base & 0xffff0000) |
-			    (cs->mbus_attr << 8) | dram->mbus_dram_target_id);
+	if (dram) {
+		for (i = 0; i < dram->num_cs; i++) {
+			const struct mbus_dram_window *cs = dram->cs + i;
 
-		mvreg_write(pp, MVNETA_WIN_SIZE(i),
-			    (cs->size - 1) & 0xffff0000);
+			mvreg_write(pp, MVNETA_WIN_BASE(i),
+				    (cs->base & 0xffff0000) |
+				    (cs->mbus_attr << 8) |
+				    dram->mbus_dram_target_id);
 
-		win_enable &= ~(1 << i);
-		win_protect |= 3 << (2 * i);
+			mvreg_write(pp, MVNETA_WIN_SIZE(i),
+				    (cs->size - 1) & 0xffff0000);
+
+			win_enable &= ~(1 << i);
+			win_protect |= 3 << (2 * i);
+		}
+	} else {
+		if (pp->neta_ac5)
+			mvreg_write(pp, MVNETA_WIN_BASE(0),
+				    (MVNETA_AC5_CNM_DDR_ATTR << 8) |
+				    MVNETA_AC5_CNM_DDR_TARGET);
+		/* For Armada3700 open default 4GB Mbus window, leaving
+		 * arbitration of target/attribute to a different layer
+		 * of configuration.
+		 */
+		mvreg_write(pp, MVNETA_WIN_SIZE(0), 0xffff0000);
+		win_enable &= ~BIT(0);
+		win_protect = 3;
 	}
 
 	mvreg_write(pp, MVNETA_BASE_ADDR_ENABLE, win_enable);
+	mvreg_write(pp, MVNETA_ACCESS_PROTECT_ENABLE, win_protect);
 }
 
 /* Power up the port */
-static void mvneta_port_power_up(struct mvneta_port *pp, int phy_mode)
+static int mvneta_port_power_up(struct mvneta_port *pp, int phy_mode)
 {
-	u32 val;
-
 	/* MAC Cause register should be cleared */
 	mvreg_write(pp, MVNETA_UNIT_INTR_CAUSE, 0);
 
-	if (phy_mode == PHY_INTERFACE_MODE_SGMII)
-		mvneta_port_sgmii_config(pp);
-
-	mvneta_gmac_rgmii_set(pp, 1);
+	if (phy_mode != PHY_INTERFACE_MODE_QSGMII &&
+	    phy_mode != PHY_INTERFACE_MODE_SGMII &&
+	    !phy_interface_mode_is_8023z(phy_mode) &&
+	    !phy_interface_mode_is_rgmii(phy_mode))
+		return -EINVAL;
 
-	/* Cancel Port Reset */
-	val = mvreg_read(pp, MVNETA_GMAC_CTRL_2);
-	val &= ~MVNETA_GMAC2_PORT_RESET;
-	mvreg_write(pp, MVNETA_GMAC_CTRL_2, val);
+	/* Ensure LPI is disabled */
+	mvneta_mac_disable_tx_lpi(&pp->phylink_config);
 
-	while ((mvreg_read(pp, MVNETA_GMAC_CTRL_2) &
-		MVNETA_GMAC2_PORT_RESET) != 0)
-		continue;
+	return 0;
 }
 
 /* Device initialization routine */
 static int mvneta_probe(struct platform_device *pdev)
 {
-	const struct mbus_dram_target_info *dram_target_info;
 	struct device_node *dn = pdev->dev.of_node;
-	struct device_node *phy_node;
-	u32 phy_addr;
+	struct device_node *bm_node;
 	struct mvneta_port *pp;
 	struct net_device *dev;
-	const char *dt_mac_addr;
+	struct phylink *phylink;
+	struct phy *comphy;
 	char hw_mac_addr[ETH_ALEN];
+	phy_interface_t phy_mode;
 	const char *mac_from;
-	int phy_mode;
+	int tx_csum_limit;
 	int err;
+	int cpu;
 
-	/* Our multiqueue support is not complete, so for now, only
-	 * allow the usage of the first RX queue
-	 */
-	if (rxq_def != 0) {
-		dev_err(&pdev->dev, "Invalid rxq_def argument: %d\n", rxq_def);
-		return -EINVAL;
-	}
-
-	dev = alloc_etherdev_mqs(sizeof(struct mvneta_port), txq_number, rxq_number);
+	dev = devm_alloc_etherdev_mqs(&pdev->dev, sizeof(struct mvneta_port),
+				      txq_number, rxq_number);
 	if (!dev)
 		return -ENOMEM;
 
-	dev->irq = irq_of_parse_and_map(dn, 0);
-	if (dev->irq == 0) {
-		err = -EINVAL;
-		goto err_free_netdev;
-	}
+	dev->tx_queue_len = MVNETA_MAX_TXD;
+	dev->watchdog_timeo = 5 * HZ;
+	dev->netdev_ops = &mvneta_netdev_ops;
+	dev->ethtool_ops = &mvneta_eth_tool_ops;
 
-	phy_node = of_parse_phandle(dn, "phy", 0);
-	if (!phy_node) {
-		dev_err(&pdev->dev, "no associated PHY\n");
-		err = -ENODEV;
-		goto err_free_irq;
-	}
+	pp = netdev_priv(dev);
+	spin_lock_init(&pp->lock);
+	pp->dn = dn;
 
-	phy_mode = of_get_phy_mode(dn);
-	if (phy_mode < 0) {
+	pp->rxq_def = rxq_def;
+	pp->indir[0] = rxq_def;
+
+	err = of_get_phy_mode(dn, &phy_mode);
+	if (err) {
 		dev_err(&pdev->dev, "incorrect phy-mode\n");
-		err = -EINVAL;
-		goto err_free_irq;
+		return err;
 	}
 
-	dev->tx_queue_len = MVNETA_MAX_TXD;
-	dev->watchdog_timeo = 5 * HZ;
-	dev->netdev_ops = &mvneta_netdev_ops;
+	pp->phy_interface = phy_mode;
 
-	SET_ETHTOOL_OPS(dev, &mvneta_eth_tool_ops);
+	comphy = devm_of_phy_get(&pdev->dev, dn, NULL);
+	if (comphy == ERR_PTR(-EPROBE_DEFER))
+		return -EPROBE_DEFER;
 
-	pp = netdev_priv(dev);
+	if (IS_ERR(comphy))
+		comphy = NULL;
 
-	pp->tx_done_timer.function = mvneta_tx_done_timer_callback;
-	init_timer(&pp->tx_done_timer);
-	clear_bit(MVNETA_F_TX_DONE_TIMER_BIT, &pp->flags);
+	pp->comphy = comphy;
 
-	pp->weight = MVNETA_RX_POLL_WEIGHT;
-	pp->phy_node = phy_node;
-	pp->phy_interface = phy_mode;
+	pp->base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(pp->base))
+		return PTR_ERR(pp->base);
 
-	pp->base = of_iomap(dn, 0);
-	if (pp->base == NULL) {
-		err = -ENOMEM;
-		goto err_free_irq;
+	/* Get special SoC configurations */
+	if (of_device_is_compatible(dn, "marvell,armada-3700-neta"))
+		pp->neta_armada3700 = true;
+	if (of_device_is_compatible(dn, "marvell,armada-ac5-neta")) {
+		pp->neta_armada3700 = true;
+		pp->neta_ac5 = true;
 	}
 
-	pp->clk = devm_clk_get(&pdev->dev, NULL);
+	dev->irq = irq_of_parse_and_map(dn, 0);
+	if (dev->irq == 0)
+		return -EINVAL;
+
+	pp->clk = devm_clk_get(&pdev->dev, "core");
+	if (IS_ERR(pp->clk))
+		pp->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(pp->clk)) {
 		err = PTR_ERR(pp->clk);
-		goto err_unmap;
+		goto err_free_irq;
 	}
 
 	clk_prepare_enable(pp->clk);
 
-	dt_mac_addr = of_get_mac_address(dn);
-	if (dt_mac_addr && is_valid_ether_addr(dt_mac_addr)) {
+	pp->clk_bus = devm_clk_get(&pdev->dev, "bus");
+	if (!IS_ERR(pp->clk_bus))
+		clk_prepare_enable(pp->clk_bus);
+
+	pp->phylink_pcs.ops = &mvneta_phylink_pcs_ops;
+
+	pp->phylink_config.dev = &dev->dev;
+	pp->phylink_config.type = PHYLINK_NETDEV;
+	pp->phylink_config.mac_capabilities = MAC_SYM_PAUSE | MAC_10 |
+		MAC_100 | MAC_1000FD | MAC_2500FD;
+
+	/* Setup EEE. Choose 250us idle. Only supported in SGMII modes. */
+	__set_bit(PHY_INTERFACE_MODE_QSGMII, pp->phylink_config.lpi_interfaces);
+	__set_bit(PHY_INTERFACE_MODE_SGMII, pp->phylink_config.lpi_interfaces);
+	pp->phylink_config.lpi_capabilities = MAC_100FD | MAC_1000FD;
+	pp->phylink_config.lpi_timer_default = 250;
+	pp->phylink_config.eee_enabled_default = true;
+
+	phy_interface_set_rgmii(pp->phylink_config.supported_interfaces);
+	__set_bit(PHY_INTERFACE_MODE_QSGMII,
+		  pp->phylink_config.supported_interfaces);
+	if (comphy) {
+		/* If a COMPHY is present, we can support any of the serdes
+		 * modes and switch between them.
+		 */
+		__set_bit(PHY_INTERFACE_MODE_SGMII,
+			  pp->phylink_config.supported_interfaces);
+		__set_bit(PHY_INTERFACE_MODE_1000BASEX,
+			  pp->phylink_config.supported_interfaces);
+		__set_bit(PHY_INTERFACE_MODE_2500BASEX,
+			  pp->phylink_config.supported_interfaces);
+	} else if (phy_mode == PHY_INTERFACE_MODE_2500BASEX) {
+		/* No COMPHY, with only 2500BASE-X mode supported */
+		__set_bit(PHY_INTERFACE_MODE_2500BASEX,
+			  pp->phylink_config.supported_interfaces);
+	} else if (phy_mode == PHY_INTERFACE_MODE_1000BASEX ||
+		   phy_mode == PHY_INTERFACE_MODE_SGMII) {
+		/* No COMPHY, we can switch between 1000BASE-X and SGMII */
+		__set_bit(PHY_INTERFACE_MODE_1000BASEX,
+			  pp->phylink_config.supported_interfaces);
+		__set_bit(PHY_INTERFACE_MODE_SGMII,
+			  pp->phylink_config.supported_interfaces);
+	}
+
+	phylink = phylink_create(&pp->phylink_config, pdev->dev.fwnode,
+				 phy_mode, &mvneta_phylink_ops);
+	if (IS_ERR(phylink)) {
+		err = PTR_ERR(phylink);
+		goto err_clk;
+	}
+
+	pp->phylink = phylink;
+
+	/* Alloc per-cpu port structure */
+	pp->ports = alloc_percpu(struct mvneta_pcpu_port);
+	if (!pp->ports) {
+		err = -ENOMEM;
+		goto err_free_phylink;
+	}
+
+	/* Alloc per-cpu stats */
+	pp->stats = netdev_alloc_pcpu_stats(struct mvneta_pcpu_stats);
+	if (!pp->stats) {
+		err = -ENOMEM;
+		goto err_free_ports;
+	}
+
+	err = of_get_ethdev_address(dn, dev);
+	if (!err) {
 		mac_from = "device tree";
-		memcpy(dev->dev_addr, dt_mac_addr, ETH_ALEN);
 	} else {
 		mvneta_get_mac_addr(pp, hw_mac_addr);
 		if (is_valid_ether_addr(hw_mac_addr)) {
 			mac_from = "hardware";
-			memcpy(dev->dev_addr, hw_mac_addr, ETH_ALEN);
+			eth_hw_addr_set(dev, hw_mac_addr);
 		} else {
 			mac_from = "random";
 			eth_hw_addr_random(dev);
 		}
 	}
 
-	pp->tx_done_timer.data = (unsigned long)dev;
+	if (!of_property_read_u32(dn, "tx-csum-limit", &tx_csum_limit)) {
+		if (tx_csum_limit < 0 ||
+		    tx_csum_limit > MVNETA_TX_CSUM_MAX_SIZE) {
+			tx_csum_limit = MVNETA_TX_CSUM_DEF_SIZE;
+			dev_info(&pdev->dev,
+				 "Wrong TX csum limit in DT, set to %dB\n",
+				 MVNETA_TX_CSUM_DEF_SIZE);
+		}
+	} else if (of_device_is_compatible(dn, "marvell,armada-370-neta")) {
+		tx_csum_limit = MVNETA_TX_CSUM_DEF_SIZE;
+	} else {
+		tx_csum_limit = MVNETA_TX_CSUM_MAX_SIZE;
+	}
+
+	pp->tx_csum_limit = tx_csum_limit;
+
+	pp->dram_target_info = mv_mbus_dram_info();
+	/* Armada3700 requires setting default configuration of Mbus
+	 * windows, however without using filled mbus_dram_target_info
+	 * structure.
+	 */
+	if (pp->dram_target_info || pp->neta_armada3700)
+		mvneta_conf_mbus_windows(pp, pp->dram_target_info);
 
 	pp->tx_ring_size = MVNETA_MAX_TXD;
 	pp->rx_ring_size = MVNETA_MAX_RXD;
@@ -2773,28 +5663,82 @@ static int mvneta_probe(struct platform_device *pdev)
 	pp->dev = dev;
 	SET_NETDEV_DEV(dev, &pdev->dev);
 
-	err = mvneta_init(pp, phy_addr);
+	pp->id = global_port_id++;
+
+	/* Obtain access to BM resources if enabled and already initialized */
+	bm_node = of_parse_phandle(dn, "buffer-manager", 0);
+	if (bm_node) {
+		pp->bm_priv = mvneta_bm_get(bm_node);
+		if (pp->bm_priv) {
+			err = mvneta_bm_port_init(pdev, pp);
+			if (err < 0) {
+				dev_info(&pdev->dev,
+					 "use SW buffer management\n");
+				mvneta_bm_put(pp->bm_priv);
+				pp->bm_priv = NULL;
+			}
+		}
+		/* Set RX packet offset correction for platforms, whose
+		 * NET_SKB_PAD, exceeds 64B. It should be 64B for 64-bit
+		 * platforms and 0B for 32-bit ones.
+		 */
+		pp->rx_offset_correction = max(0,
+					       NET_SKB_PAD -
+					       MVNETA_RX_PKT_OFFSET_CORRECTION);
+	}
+	of_node_put(bm_node);
+
+	/* sw buffer management */
+	if (!pp->bm_priv)
+		pp->rx_offset_correction = MVNETA_SKB_HEADROOM;
+
+	err = mvneta_init(&pdev->dev, pp);
+	if (err < 0)
+		goto err_netdev;
+
+	err = mvneta_port_power_up(pp, pp->phy_interface);
 	if (err < 0) {
-		dev_err(&pdev->dev, "can't init eth hal\n");
-		goto err_clk;
+		dev_err(&pdev->dev, "can't power up port\n");
+		goto err_netdev;
 	}
-	mvneta_port_power_up(pp, phy_mode);
 
-	dram_target_info = mv_mbus_dram_info();
-	if (dram_target_info)
-		mvneta_conf_mbus_windows(pp, dram_target_info);
+	/* Armada3700 network controller does not support per-cpu
+	 * operation, so only single NAPI should be initialized.
+	 */
+	if (pp->neta_armada3700) {
+		netif_napi_add(dev, &pp->napi, mvneta_poll);
+	} else {
+		for_each_present_cpu(cpu) {
+			struct mvneta_pcpu_port *port =
+				per_cpu_ptr(pp->ports, cpu);
 
-	netif_napi_add(dev, &pp->napi, mvneta_poll, pp->weight);
+			netif_napi_add(dev, &port->napi, mvneta_poll);
+			port->pp = pp;
+		}
+	}
 
-	dev->features = NETIF_F_SG | NETIF_F_IP_CSUM;
-	dev->hw_features |= NETIF_F_SG | NETIF_F_IP_CSUM;
-	dev->vlan_features |= NETIF_F_SG | NETIF_F_IP_CSUM;
-	dev->priv_flags |= IFF_UNICAST_FLT;
+	dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
+			NETIF_F_TSO | NETIF_F_RXCSUM;
+	dev->hw_features |= dev->features;
+	dev->vlan_features |= dev->features;
+	if (!pp->bm_priv)
+		dev->xdp_features = NETDEV_XDP_ACT_BASIC |
+				    NETDEV_XDP_ACT_REDIRECT |
+				    NETDEV_XDP_ACT_NDO_XMIT |
+				    NETDEV_XDP_ACT_RX_SG |
+				    NETDEV_XDP_ACT_NDO_XMIT_SG;
+	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+	netif_set_tso_max_segs(dev, MVNETA_MAX_TSO_SEGS);
+
+	/* MTU range: 68 - 9676 */
+	dev->min_mtu = ETH_MIN_MTU;
+	/* 9676 == 9700 - 20 and rounding to 8 */
+	dev->max_mtu = 9676;
 
 	err = register_netdev(dev);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to register\n");
-		goto err_deinit;
+		goto err_netdev;
 	}
 
 	netdev_info(dev, "Using %s mac address %pM\n", mac_from,
@@ -2804,37 +5748,166 @@ static int mvneta_probe(struct platform_device *pdev)
 
 	return 0;
 
-err_deinit:
-	mvneta_deinit(pp);
+err_netdev:
+	if (pp->bm_priv) {
+		mvneta_bm_pool_destroy(pp->bm_priv, pp->pool_long, 1 << pp->id);
+		mvneta_bm_pool_destroy(pp->bm_priv, pp->pool_short,
+				       1 << pp->id);
+		mvneta_bm_put(pp->bm_priv);
+	}
+	free_percpu(pp->stats);
+err_free_ports:
+	free_percpu(pp->ports);
+err_free_phylink:
+	if (pp->phylink)
+		phylink_destroy(pp->phylink);
 err_clk:
+	clk_disable_unprepare(pp->clk_bus);
 	clk_disable_unprepare(pp->clk);
-err_unmap:
-	iounmap(pp->base);
 err_free_irq:
 	irq_dispose_mapping(dev->irq);
-err_free_netdev:
-	free_netdev(dev);
 	return err;
 }
 
 /* Device removal routine */
-static int mvneta_remove(struct platform_device *pdev)
+static void mvneta_remove(struct platform_device *pdev)
 {
 	struct net_device  *dev = platform_get_drvdata(pdev);
 	struct mvneta_port *pp = netdev_priv(dev);
 
 	unregister_netdev(dev);
-	mvneta_deinit(pp);
+	clk_disable_unprepare(pp->clk_bus);
 	clk_disable_unprepare(pp->clk);
-	iounmap(pp->base);
+	free_percpu(pp->ports);
+	free_percpu(pp->stats);
 	irq_dispose_mapping(dev->irq);
-	free_netdev(dev);
+	phylink_destroy(pp->phylink);
+
+	if (pp->bm_priv) {
+		mvneta_bm_pool_destroy(pp->bm_priv, pp->pool_long, 1 << pp->id);
+		mvneta_bm_pool_destroy(pp->bm_priv, pp->pool_short,
+				       1 << pp->id);
+		mvneta_bm_put(pp->bm_priv);
+	}
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int mvneta_suspend(struct device *device)
+{
+	int queue;
+	struct net_device *dev = dev_get_drvdata(device);
+	struct mvneta_port *pp = netdev_priv(dev);
+
+	if (!netif_running(dev))
+		goto clean_exit;
+
+	if (!pp->neta_armada3700) {
+		spin_lock(&pp->lock);
+		pp->is_stopped = true;
+		spin_unlock(&pp->lock);
+
+		cpuhp_state_remove_instance_nocalls(online_hpstate,
+						    &pp->node_online);
+		cpuhp_state_remove_instance_nocalls(CPUHP_NET_MVNETA_DEAD,
+						    &pp->node_dead);
+	}
+
+	rtnl_lock();
+	mvneta_stop_dev(pp);
+	rtnl_unlock();
+
+	for (queue = 0; queue < rxq_number; queue++) {
+		struct mvneta_rx_queue *rxq = &pp->rxqs[queue];
+
+		mvneta_rxq_drop_pkts(pp, rxq);
+	}
+
+	for (queue = 0; queue < txq_number; queue++) {
+		struct mvneta_tx_queue *txq = &pp->txqs[queue];
+
+		mvneta_txq_hw_deinit(pp, txq);
+	}
+
+clean_exit:
+	netif_device_detach(dev);
+	clk_disable_unprepare(pp->clk_bus);
+	clk_disable_unprepare(pp->clk);
 
 	return 0;
 }
 
+static int mvneta_resume(struct device *device)
+{
+	struct platform_device *pdev = to_platform_device(device);
+	struct net_device *dev = dev_get_drvdata(device);
+	struct mvneta_port *pp = netdev_priv(dev);
+	int err, queue;
+
+	clk_prepare_enable(pp->clk);
+	if (!IS_ERR(pp->clk_bus))
+		clk_prepare_enable(pp->clk_bus);
+	if (pp->dram_target_info || pp->neta_armada3700)
+		mvneta_conf_mbus_windows(pp, pp->dram_target_info);
+	if (pp->bm_priv) {
+		err = mvneta_bm_port_init(pdev, pp);
+		if (err < 0) {
+			dev_info(&pdev->dev, "use SW buffer management\n");
+			pp->rx_offset_correction = MVNETA_SKB_HEADROOM;
+			pp->bm_priv = NULL;
+		}
+	}
+	mvneta_defaults_set(pp);
+	err = mvneta_port_power_up(pp, pp->phy_interface);
+	if (err < 0) {
+		dev_err(device, "can't power up port\n");
+		return err;
+	}
+
+	netif_device_attach(dev);
+
+	if (!netif_running(dev))
+		return 0;
+
+	for (queue = 0; queue < rxq_number; queue++) {
+		struct mvneta_rx_queue *rxq = &pp->rxqs[queue];
+
+		rxq->next_desc_to_proc = 0;
+		mvneta_rxq_hw_init(pp, rxq);
+	}
+
+	for (queue = 0; queue < txq_number; queue++) {
+		struct mvneta_tx_queue *txq = &pp->txqs[queue];
+
+		txq->next_desc_to_proc = 0;
+		mvneta_txq_hw_init(pp, txq);
+	}
+
+	if (!pp->neta_armada3700) {
+		spin_lock(&pp->lock);
+		pp->is_stopped = false;
+		spin_unlock(&pp->lock);
+		cpuhp_state_add_instance_nocalls(online_hpstate,
+						 &pp->node_online);
+		cpuhp_state_add_instance_nocalls(CPUHP_NET_MVNETA_DEAD,
+						 &pp->node_dead);
+	}
+
+	rtnl_lock();
+	mvneta_start_dev(pp);
+	rtnl_unlock();
+	mvneta_set_rx_mode(dev);
+
+	return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(mvneta_pm_ops, mvneta_suspend, mvneta_resume);
+
 static const struct of_device_id mvneta_match[] = {
 	{ .compatible = "marvell,armada-370-neta" },
+	{ .compatible = "marvell,armada-xp-neta" },
+	{ .compatible = "marvell,armada-3700-neta" },
+	{ .compatible = "marvell,armada-ac5-neta" },
 	{ }
 };
 MODULE_DEVICE_TABLE(of, mvneta_match);
@@ -2845,16 +5918,55 @@ static struct platform_driver mvneta_driver = {
 	.driver = {
 		.name = MVNETA_DRIVER_NAME,
 		.of_match_table = mvneta_match,
+		.pm = &mvneta_pm_ops,
 	},
 };
 
-module_platform_driver(mvneta_driver);
+static int __init mvneta_driver_init(void)
+{
+	int ret;
+
+	BUILD_BUG_ON_NOT_POWER_OF_2(MVNETA_TSO_PER_PAGE);
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "net/mvneta:online",
+				      mvneta_cpu_online,
+				      mvneta_cpu_down_prepare);
+	if (ret < 0)
+		goto out;
+	online_hpstate = ret;
+	ret = cpuhp_setup_state_multi(CPUHP_NET_MVNETA_DEAD, "net/mvneta:dead",
+				      NULL, mvneta_cpu_dead);
+	if (ret)
+		goto err_dead;
+
+	ret = platform_driver_register(&mvneta_driver);
+	if (ret)
+		goto err;
+	return 0;
+
+err:
+	cpuhp_remove_multi_state(CPUHP_NET_MVNETA_DEAD);
+err_dead:
+	cpuhp_remove_multi_state(online_hpstate);
+out:
+	return ret;
+}
+module_init(mvneta_driver_init);
+
+static void __exit mvneta_driver_exit(void)
+{
+	platform_driver_unregister(&mvneta_driver);
+	cpuhp_remove_multi_state(CPUHP_NET_MVNETA_DEAD);
+	cpuhp_remove_multi_state(online_hpstate);
+}
+module_exit(mvneta_driver_exit);
 
 MODULE_DESCRIPTION("Marvell NETA Ethernet Driver - www.marvell.com");
 MODULE_AUTHOR("Rami Rosen <rosenr@marvell.com>, Thomas Petazzoni <thomas.petazzoni@free-electrons.com>");
 MODULE_LICENSE("GPL");
 
-module_param(rxq_number, int, S_IRUGO);
-module_param(txq_number, int, S_IRUGO);
+module_param(rxq_number, int, 0444);
+module_param(txq_number, int, 0444);
 
-module_param(rxq_def, int, S_IRUGO);
+module_param(rxq_def, int, 0444);
+module_param(rx_copybreak, int, 0644);