349 files changed, 21980 insertions, 4119 deletions
diff --git a/Documentation/core-api/idr.rst b/Documentation/core-api/idr.rst
index d351e880a2f6..a2738050c4f0 100644
--- a/Documentation/core-api/idr.rst
+++ b/Documentation/core-api/idr.rst
@@ -1,4 +1,4 @@
-.. SPDX-License-Identifier: CC-BY-SA-4.0
+.. SPDX-License-Identifier: GPL-2.0+
 
 =============
 ID Allocation
diff --git a/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt b/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt
index e22d8cfea687..5100358177c9 100644
--- a/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt
+++ b/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt
@@ -1,4 +1,4 @@
-Micrel KSZ9021/KSZ9031 Gigabit Ethernet PHY
+Micrel KSZ9021/KSZ9031/KSZ9131 Gigabit Ethernet PHY
 
 Some boards require special tuning values, particularly when it comes
 to clock delays. You can specify clock delay values in the PHY OF
@@ -64,6 +64,32 @@ KSZ9031:
         Attention: The link partner must be configurable as slave otherwise
         no link will be established.
 
+KSZ9131:
+
+  All skew control options are specified in picoseconds. The increment
+  step is 100ps. Unlike KSZ9031, the values represent picoseccond delays.
+  A negative value can be assigned as rxc-skew-psec = <(-100)>;.
+
+  Optional properties:
+
+    Range of the value -700 to 2400, default value 0:
+
+      - rxc-skew-psec : Skew control of RX clock pad
+      - txc-skew-psec : Skew control of TX clock pad
+
+    Range of the value -700 to 800, default value 0:
+
+      - rxdv-skew-psec : Skew control of RX CTL pad
+      - txen-skew-psec : Skew control of TX CTL pad
+      - rxd0-skew-psec : Skew control of RX data 0 pad
+      - rxd1-skew-psec : Skew control of RX data 1 pad
+      - rxd2-skew-psec : Skew control of RX data 2 pad
+      - rxd3-skew-psec : Skew control of RX data 3 pad
+      - txd0-skew-psec : Skew control of TX data 0 pad
+      - txd1-skew-psec : Skew control of TX data 1 pad
+      - txd2-skew-psec : Skew control of TX data 2 pad
+      - txd3-skew-psec : Skew control of TX data 3 pad
+
 Examples:
 
 	mdio {
diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX
index 2d239770b95f..eb0754475dd0 100644
--- a/Documentation/networking/00-INDEX
+++ b/Documentation/networking/00-INDEX
@@ -70,12 +70,6 @@ driver.txt
 	- Softnet driver issues.
 ena.txt
 	- info on Amazon's Elastic Network Adapter (ENA)
-e100.txt
-	- info on Intel's EtherExpress PRO/100 line of 10/100 boards
-e1000.txt
-	- info on Intel's E1000 line of gigabit ethernet boards
-e1000e.txt
-	- README for the Intel Gigabit Ethernet Driver (e1000e).
 eql.txt
 	- serial IP load balancing
 fib_trie.txt
@@ -94,16 +88,8 @@ generic_netlink.txt
 	- info on Generic Netlink
 gianfar.txt
 	- Gianfar Ethernet Driver.
-i40e.txt
-	- README for the Intel Ethernet Controller XL710 Driver (i40e).
-iavf.txt
-	- README for the Intel Ethernet Adaptive Virtual Function Driver (iavf).
 ieee802154.txt
 	- Linux IEEE 802.15.4 implementation, API and drivers
-igb.txt
-	- README for the Intel Gigabit Ethernet Driver (igb).
-igbvf.txt
-	- README for the Intel Gigabit Ethernet Driver (igbvf).
 ip-sysctl.txt
 	- /proc/sys/net/ipv4/* variables
 ip_dynaddr.txt
@@ -120,12 +106,6 @@ ipvs-sysctl.txt
 	- Per-inode explanation of the /proc/sys/net/ipv4/vs interface.
 irda.txt
 	- where to get IrDA (infrared) utilities and info for Linux.
-ixgb.txt
-	- README for the Intel 10 Gigabit Ethernet Driver (ixgb).
-ixgbe.txt
-	- README for the Intel 10 Gigabit Ethernet Driver (ixgbe).
-ixgbevf.txt
-	- README for the Intel Virtual Function (VF) Driver (ixgbevf).
 l2tp.txt
 	- User guide to the L2TP tunnel protocol.
 lapb-module.txt
diff --git a/Documentation/networking/e100.rst b/Documentation/networking/e100.rst
index f81111eba9c5..5e2839b4ec92 100644
--- a/Documentation/networking/e100.rst
+++ b/Documentation/networking/e100.rst
@@ -1,4 +1,5 @@
-==============================================================
+.. SPDX-License-Identifier: GPL-2.0+
+
 Linux* Base Driver for the Intel(R) PRO/100 Family of Adapters
 ==============================================================
 
diff --git a/Documentation/networking/e1000.rst b/Documentation/networking/e1000.rst
index f10dd4086921..6379d4d20771 100644
--- a/Documentation/networking/e1000.rst
+++ b/Documentation/networking/e1000.rst
@@ -1,4 +1,5 @@
-===========================================================
+.. SPDX-License-Identifier: GPL-2.0+
+
 Linux* Base Driver for Intel(R) Ethernet Network Connection
 ===========================================================
 
diff --git a/Documentation/networking/e1000e.rst b/Documentation/networking/e1000e.rst
new file mode 100644
index 000000000000..33554e5416c5
--- /dev/null
+++ b/Documentation/networking/e1000e.rst
@@ -0,0 +1,382 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+Linux* Driver for Intel(R) Ethernet Network Connection
+======================================================
+
+Intel Gigabit Linux driver.
+Copyright(c) 2008-2018 Intel Corporation.
+
+Contents
+========
+
+- Identifying Your Adapter
+- Command Line Parameters
+- Additional Configurations
+- Support
+
+
+Identifying Your Adapter
+========================
+For information on how to identify your adapter, and for the latest Intel
+network drivers, refer to the Intel Support website:
+https://www.intel.com/support
+
+
+Command Line Parameters
+=======================
+If the driver is built as a module, the following optional parameters are used
+by entering them on the command line with the modprobe command using this
+syntax::
+
+    modprobe e1000e [<option>=<VAL1>,<VAL2>,...]
+
+There needs to be a <VAL#> for each network port in the system supported by
+this driver. The values will be applied to each instance, in function order.
+For example::
+
+    modprobe e1000e InterruptThrottleRate=16000,16000
+
+In this case, there are two network ports supported by e1000e in the system.
+The default value for each parameter is generally the recommended setting,
+unless otherwise noted.
+
+NOTE: A descriptor describes a data buffer and attributes related to the data
+buffer. This information is accessed by the hardware.
+
+InterruptThrottleRate
+---------------------
+:Valid Range: 0,1,3,4,100-100000
+:Default Value: 3
+
+Interrupt Throttle Rate controls the number of interrupts each interrupt
+vector can generate per second. Increasing ITR lowers latency at the cost of
+increased CPU utilization, though it may help throughput in some circumstances.
+
+Setting InterruptThrottleRate to a value greater or equal to 100
+will program the adapter to send out a maximum of that many interrupts
+per second, even if more packets have come in. This reduces interrupt
+load on the system and can lower CPU utilization under heavy load,
+but will increase latency as packets are not processed as quickly.
+
+The default behaviour of the driver previously assumed a static
+InterruptThrottleRate value of 8000, providing a good fallback value for
+all traffic types, but lacking in small packet performance and latency.
+The hardware can handle many more small packets per second however, and
+for this reason an adaptive interrupt moderation algorithm was implemented.
+
+The driver has two adaptive modes (setting 1 or 3) in which
+it dynamically adjusts the InterruptThrottleRate value based on the traffic
+that it receives. After determining the type of incoming traffic in the last
+timeframe, it will adjust the InterruptThrottleRate to an appropriate value
+for that traffic.
+
+The algorithm classifies the incoming traffic every interval into
+classes.  Once the class is determined, the InterruptThrottleRate value is
+adjusted to suit that traffic type the best. There are three classes defined:
+"Bulk traffic", for large amounts of packets of normal size; "Low latency",
+for small amounts of traffic and/or a significant percentage of small
+packets; and "Lowest latency", for almost completely small packets or
+minimal traffic.
+
+ - 0: Off
+      Turns off any interrupt moderation and may improve small packet latency.
+      However, this is generally not suitable for bulk throughput traffic due
+      to the increased CPU utilization of the higher interrupt rate.
+ - 1: Dynamic mode
+      This mode attempts to moderate interrupts per vector while maintaining
+      very low latency. This can sometimes cause extra CPU utilization. If
+      planning on deploying e1000e in a latency sensitive environment, this
+      parameter should be considered.
+ - 3: Dynamic Conservative mode (default)
+      In dynamic conservative mode, the InterruptThrottleRate value is set to
+      4000 for traffic that falls in class "Bulk traffic". If traffic falls in
+      the "Low latency" or "Lowest latency" class, the InterruptThrottleRate is
+      increased stepwise to 20000. This default mode is suitable for most
+      applications.
+ - 4: Simplified Balancing mode
+      In simplified mode the interrupt rate is based on the ratio of TX and
+      RX traffic.  If the bytes per second rate is approximately equal, the
+      interrupt rate will drop as low as 2000 interrupts per second.  If the
+      traffic is mostly transmit or mostly receive, the interrupt rate could
+      be as high as 8000.
+ - 100-100000:
+      Setting InterruptThrottleRate to a value greater or equal to 100
+      will program the adapter to send at most that many interrupts per second,
+      even if more packets have come in. This reduces interrupt load on the
+      system and can lower CPU utilization under heavy load, but will increase
+      latency as packets are not processed as quickly.
+
+NOTE: InterruptThrottleRate takes precedence over the TxAbsIntDelay and
+RxAbsIntDelay parameters. In other words, minimizing the receive and/or
+transmit absolute delays does not force the controller to generate more
+interrupts than what the Interrupt Throttle Rate allows.
+
+RxIntDelay
+----------
+:Valid Range: 0-65535 (0=off)
+:Default Value: 0
+
+This value delays the generation of receive interrupts in units of 1.024
+microseconds. Receive interrupt reduction can improve CPU efficiency if
+properly tuned for specific network traffic. Increasing this value adds extra
+latency to frame reception and can end up decreasing the throughput of TCP
+traffic. If the system is reporting dropped receives, this value may be set
+too high, causing the driver to run out of available receive descriptors.
+
+CAUTION: When setting RxIntDelay to a value other than 0, adapters may hang
+(stop transmitting) under certain network conditions. If this occurs a NETDEV
+WATCHDOG message is logged in the system event log. In addition, the
+controller is automatically reset, restoring the network connection. To
+eliminate the potential for the hang ensure that RxIntDelay is set to 0.
+
+RxAbsIntDelay
+-------------
+:Valid Range: 0-65535 (0=off)
+:Default Value: 8
+
+This value, in units of 1.024 microseconds, limits the delay in which a
+receive interrupt is generated. This value ensures that an interrupt is
+generated after the initial packet is received within the set amount of time,
+which is useful only if RxIntDelay is non-zero. Proper tuning, along with
+RxIntDelay, may improve traffic throughput in specific network conditions.
+
+TxIntDelay
+----------
+:Valid Range: 0-65535 (0=off)
+:Default Value: 8
+
+This value delays the generation of transmit interrupts in units of 1.024
+microseconds. Transmit interrupt reduction can improve CPU efficiency if
+properly tuned for specific network traffic. If the system is reporting
+dropped transmits, this value may be set too high causing the driver to run
+out of available transmit descriptors.
+
+TxAbsIntDelay
+-------------
+:Valid Range: 0-65535 (0=off)
+:Default Value: 32
+
+This value, in units of 1.024 microseconds, limits the delay in which a
+transmit interrupt is generated. It is useful only if TxIntDelay is non-zero.
+It ensures that an interrupt is generated after the initial Packet is sent on
+the wire within the set amount of time. Proper tuning, along with TxIntDelay,
+may improve traffic throughput in specific network conditions.
+
+copybreak
+---------
+:Valid Range: 0-xxxxxxx (0=off)
+:Default Value: 256
+
+The driver copies all packets below or equaling this size to a fresh receive
+buffer before handing it up the stack.
+This parameter differs from other parameters because it is a single (not 1,1,1
+etc.) parameter applied to all driver instances and it is also available
+during runtime at /sys/module/e1000e/parameters/copybreak.
+
+To use copybreak, type::
+
+    modprobe e1000e.ko copybreak=128
+
+SmartPowerDownEnable
+--------------------
+:Valid Range: 0,1
+:Default Value: 0 (disabled)
+
+Allows the PHY to turn off in lower power states. The user can turn off this
+parameter in supported chipsets.
+
+KumeranLockLoss
+---------------
+:Valid Range: 0,1
+:Default Value: 1 (enabled)
+
+This workaround skips resetting the PHY at shutdown for the initial silicon
+releases of ICH8 systems.
+
+IntMode
+-------
+:Valid Range: 0-2
+:Default Value: 0
+
+   +-------+----------------+
+   | Value | Interrupt Mode |
+   +=======+================+
+   |   0   |     Legacy     |
+   +-------+----------------+
+   |   1   |       MSI      |
+   +-------+----------------+
+   |   2   |      MSI-X     |
+   +-------+----------------+
+
+IntMode allows load time control over the type of interrupt registered for by
+the driver. MSI-X is required for multiple queue support, and some kernels and
+combinations of kernel .config options will force a lower level of interrupt
+support.
+
+This command will show different values for each type of interrupt::
+
+  cat /proc/interrupts
+
+CrcStripping
+------------
+:Valid Range: 0,1
+:Default Value: 1 (enabled)
+
+Strip the CRC from received packets before sending up the network stack. If
+you have a machine with a BMC enabled but cannot receive IPMI traffic after
+loading or enabling the driver, try disabling this feature.
+
+WriteProtectNVM
+---------------
+:Valid Range: 0,1
+:Default Value: 1 (enabled)
+
+If set to 1, configure the hardware to ignore all write/erase cycles to the
+GbE region in the ICHx NVM (in order to prevent accidental corruption of the
+NVM). This feature can be disabled by setting the parameter to 0 during initial
+driver load.
+
+NOTE: The machine must be power cycled (full off/on) when enabling NVM writes
+via setting the parameter to zero. Once the NVM has been locked (via the
+parameter at 1 when the driver loads) it cannot be unlocked except via power
+cycle.
+
+Debug
+-----
+:Valid Range: 0-16 (0=none,...,16=all)
+:Default Value: 0
+
+This parameter adjusts the level of debug messages displayed in the system logs.
+
+
+Additional Features and Configurations
+======================================
+
+Jumbo Frames
+------------
+Jumbo Frames support is enabled by changing the Maximum Transmission Unit (MTU)
+to a value larger than the default value of 1500.
+
+Use the ifconfig command to increase the MTU size. For example, enter the
+following where <x> is the interface number::
+
+    ifconfig eth<x> mtu 9000 up
+
+Alternatively, you can use the ip command as follows::
+
+    ip link set mtu 9000 dev eth<x>
+    ip link set up dev eth<x>
+
+This setting is not saved across reboots. The setting change can be made
+permanent by adding 'MTU=9000' to the file:
+
+- For RHEL: /etc/sysconfig/network-scripts/ifcfg-eth<x>
+- For SLES: /etc/sysconfig/network/<config_file>
+
+NOTE: The maximum MTU setting for Jumbo Frames is 8996. This value coincides
+with the maximum Jumbo Frames size of 9018 bytes.
+
+NOTE: Using Jumbo frames at 10 or 100 Mbps is not supported and may result in
+poor performance or loss of link.
+
+NOTE: The following adapters limit Jumbo Frames sized packets to a maximum of
+4088 bytes:
+
+  - Intel(R) 82578DM Gigabit Network Connection
+  - Intel(R) 82577LM Gigabit Network Connection
+
+The following adapters do not support Jumbo Frames:
+
+  - Intel(R) PRO/1000 Gigabit Server Adapter
+  - Intel(R) PRO/1000 PM Network Connection
+  - Intel(R) 82562G 10/100 Network Connection
+  - Intel(R) 82562G-2 10/100 Network Connection
+  - Intel(R) 82562GT 10/100 Network Connection
+  - Intel(R) 82562GT-2 10/100 Network Connection
+  - Intel(R) 82562V 10/100 Network Connection
+  - Intel(R) 82562V-2 10/100 Network Connection
+  - Intel(R) 82566DC Gigabit Network Connection
+  - Intel(R) 82566DC-2 Gigabit Network Connection
+  - Intel(R) 82566DM Gigabit Network Connection
+  - Intel(R) 82566MC Gigabit Network Connection
+  - Intel(R) 82566MM Gigabit Network Connection
+  - Intel(R) 82567V-3 Gigabit Network Connection
+  - Intel(R) 82577LC Gigabit Network Connection
+  - Intel(R) 82578DC Gigabit Network Connection
+
+NOTE: Jumbo Frames cannot be configured on an 82579-based Network device if
+MACSec is enabled on the system.
+
+
+ethtool
+-------
+The driver utilizes the ethtool interface for driver configuration and
+diagnostics, as well as displaying statistical information. The latest ethtool
+version is required for this functionality. Download it at:
+
+https://www.kernel.org/pub/software/network/ethtool/
+
+NOTE: When validating enable/disable tests on some parts (for example, 82578),
+it is necessary to add a few seconds between tests when working with ethtool.
+
+
+Speed and Duplex Configuration
+------------------------------
+In addressing speed and duplex configuration issues, you need to distinguish
+between copper-based adapters and fiber-based adapters.
+
+In the default mode, an Intel(R) Ethernet Network Adapter using copper
+connections will attempt to auto-negotiate with its link partner to determine
+the best setting. If the adapter cannot establish link with the link partner
+using auto-negotiation, you may need to manually configure the adapter and link
+partner to identical settings to establish link and pass packets. This should
+only be needed when attempting to link with an older switch that does not
+support auto-negotiation or one that has been forced to a specific speed or
+duplex mode. Your link partner must match the setting you choose. 1 Gbps speeds
+and higher cannot be forced. Use the autonegotiation advertising setting to
+manually set devices for 1 Gbps and higher.
+
+Speed, duplex, and autonegotiation advertising are configured through the
+ethtool* utility.
+
+Caution: Only experienced network administrators should force speed and duplex
+or change autonegotiation advertising manually. The settings at the switch must
+always match the adapter settings. Adapter performance may suffer or your
+adapter may not operate if you configure the adapter differently from your
+switch.
+
+An Intel(R) Ethernet Network Adapter using fiber-based connections, however,
+will not attempt to auto-negotiate with its link partner since those adapters
+operate only in full duplex and only at their native speed.
+
+
+Enabling Wake on LAN* (WoL)
+---------------------------
+WoL is configured through the ethtool* utility.
+
+WoL will be enabled on the system during the next shut down or reboot. For
+this driver version, in order to enable WoL, the e1000e driver must be loaded
+prior to shutting down or suspending the system.
+
+NOTE: Wake on LAN is only supported on port A for the following devices:
+- Intel(R) PRO/1000 PT Dual Port Network Connection
+- Intel(R) PRO/1000 PT Dual Port Server Connection
+- Intel(R) PRO/1000 PT Dual Port Server Adapter
+- Intel(R) PRO/1000 PF Dual Port Server Adapter
+- Intel(R) PRO/1000 PT Quad Port Server Adapter
+- Intel(R) Gigabit PT Quad Port Server ExpressModule
+
+
+Support
+=======
+For general information, go to the Intel support website at:
+
+https://www.intel.com/support/
+
+or the Intel Wired Networking project hosted by Sourceforge at:
+
+https://sourceforge.net/projects/e1000
+
+If an issue is identified with the released source code on a supported kernel
+with a supported adapter, email the specific information related to the issue
+to e1000-devel@lists.sf.net.
diff --git a/Documentation/networking/e1000e.txt b/Documentation/networking/e1000e.txt
deleted file mode 100644
index 12089547baed..000000000000
--- a/Documentation/networking/e1000e.txt
+++ /dev/null
@@ -1,312 +0,0 @@
-Linux* Driver for Intel(R) Ethernet Network Connection
-======================================================
-
-Intel Gigabit Linux driver.
-Copyright(c) 1999 - 2013 Intel Corporation.
-
-Contents
-========
-
-- Identifying Your Adapter
-- Command Line Parameters
-- Additional Configurations
-- Support
-
-Identifying Your Adapter
-========================
-
-The e1000e driver supports all PCI Express Intel(R) Gigabit Network
-Connections, except those that are 82575, 82576 and 82580-based*.
-
-* NOTE: The Intel(R) PRO/1000 P Dual Port Server Adapter is supported by
-  the e1000 driver, not the e1000e driver due to the 82546 part being used
-  behind a PCI Express bridge.
-
-For more information on how to identify your adapter, go to the Adapter &
-Driver ID Guide at:
-
-    http://support.intel.com/support/go/network/adapter/idguide.htm
-
-For the latest Intel network drivers for Linux, refer to the following
-website.  In the search field, enter your adapter name or type, or use the
-networking link on the left to search for your adapter:
-
-    http://support.intel.com/support/go/network/adapter/home.htm
-
-Command Line Parameters
-=======================
-
-The default value for each parameter is generally the recommended setting,
-unless otherwise noted.
-
-NOTES:  For more information about the InterruptThrottleRate,
-        RxIntDelay, TxIntDelay, RxAbsIntDelay, and TxAbsIntDelay
-        parameters, see the application note at:
-        http://www.intel.com/design/network/applnots/ap450.htm
-
-InterruptThrottleRate
----------------------
-Valid Range:   0,1,3,4,100-100000 (0=off, 1=dynamic, 3=dynamic conservative,
-                                   4=simplified balancing)
-Default Value: 3
-
-The driver can limit the amount of interrupts per second that the adapter
-will generate for incoming packets. It does this by writing a value to the
-adapter that is based on the maximum amount of interrupts that the adapter
-will generate per second.
-
-Setting InterruptThrottleRate to a value greater or equal to 100
-will program the adapter to send out a maximum of that many interrupts
-per second, even if more packets have come in. This reduces interrupt
-load on the system and can lower CPU utilization under heavy load,
-but will increase latency as packets are not processed as quickly.
-
-The default behaviour of the driver previously assumed a static
-InterruptThrottleRate value of 8000, providing a good fallback value for
-all traffic types, but lacking in small packet performance and latency.
-The hardware can handle many more small packets per second however, and
-for this reason an adaptive interrupt moderation algorithm was implemented.
-
-The driver has two adaptive modes (setting 1 or 3) in which
-it dynamically adjusts the InterruptThrottleRate value based on the traffic
-that it receives. After determining the type of incoming traffic in the last
-timeframe, it will adjust the InterruptThrottleRate to an appropriate value
-for that traffic.
-
-The algorithm classifies the incoming traffic every interval into
-classes.  Once the class is determined, the InterruptThrottleRate value is
-adjusted to suit that traffic type the best. There are three classes defined:
-"Bulk traffic", for large amounts of packets of normal size; "Low latency",
-for small amounts of traffic and/or a significant percentage of small
-packets; and "Lowest latency", for almost completely small packets or
-minimal traffic.
-
-In dynamic conservative mode, the InterruptThrottleRate value is set to 4000
-for traffic that falls in class "Bulk traffic". If traffic falls in the "Low
-latency" or "Lowest latency" class, the InterruptThrottleRate is increased
-stepwise to 20000. This default mode is suitable for most applications.
-
-For situations where low latency is vital such as cluster or
-grid computing, the algorithm can reduce latency even more when
-InterruptThrottleRate is set to mode 1. In this mode, which operates
-the same as mode 3, the InterruptThrottleRate will be increased stepwise to
-70000 for traffic in class "Lowest latency".
-
-In simplified mode the interrupt rate is based on the ratio of TX and
-RX traffic.  If the bytes per second rate is approximately equal, the
-interrupt rate will drop as low as 2000 interrupts per second.  If the
-traffic is mostly transmit or mostly receive, the interrupt rate could
-be as high as 8000.
-
-Setting InterruptThrottleRate to 0 turns off any interrupt moderation
-and may improve small packet latency, but is generally not suitable
-for bulk throughput traffic.
-
-NOTE:  InterruptThrottleRate takes precedence over the TxAbsIntDelay and
-       RxAbsIntDelay parameters.  In other words, minimizing the receive
-       and/or transmit absolute delays does not force the controller to
-       generate more interrupts than what the Interrupt Throttle Rate
-       allows.
-
-NOTE:  When e1000e is loaded with default settings and multiple adapters
-       are in use simultaneously, the CPU utilization may increase non-
-       linearly.  In order to limit the CPU utilization without impacting
-       the overall throughput, we recommend that you load the driver as
-       follows:
-
-           modprobe e1000e InterruptThrottleRate=3000,3000,3000
-
-       This sets the InterruptThrottleRate to 3000 interrupts/sec for
-       the first, second, and third instances of the driver.  The range
-       of 2000 to 3000 interrupts per second works on a majority of
-       systems and is a good starting point, but the optimal value will
-       be platform-specific.  If CPU utilization is not a concern, use
-       RX_POLLING (NAPI) and default driver settings.
-
-RxIntDelay
-----------
-Valid Range:   0-65535 (0=off)
-Default Value: 0
-
-This value delays the generation of receive interrupts in units of 1.024
-microseconds.  Receive interrupt reduction can improve CPU efficiency if
-properly tuned for specific network traffic.  Increasing this value adds
-extra latency to frame reception and can end up decreasing the throughput
-of TCP traffic.  If the system is reporting dropped receives, this value
-may be set too high, causing the driver to run out of available receive
-descriptors.
-
-CAUTION:  When setting RxIntDelay to a value other than 0, adapters may
-          hang (stop transmitting) under certain network conditions.  If
-          this occurs a NETDEV WATCHDOG message is logged in the system
-          event log.  In addition, the controller is automatically reset,
-          restoring the network connection.  To eliminate the potential
-          for the hang ensure that RxIntDelay is set to 0.
-
-RxAbsIntDelay
--------------
-Valid Range:   0-65535 (0=off)
-Default Value: 8
-
-This value, in units of 1.024 microseconds, limits the delay in which a
-receive interrupt is generated.  Useful only if RxIntDelay is non-zero,
-this value ensures that an interrupt is generated after the initial
-packet is received within the set amount of time.  Proper tuning,
-along with RxIntDelay, may improve traffic throughput in specific network
-conditions.
-
-TxIntDelay
-----------
-Valid Range:   0-65535 (0=off)
-Default Value: 8
-
-This value delays the generation of transmit interrupts in units of
-1.024 microseconds.  Transmit interrupt reduction can improve CPU
-efficiency if properly tuned for specific network traffic.  If the
-system is reporting dropped transmits, this value may be set too high
-causing the driver to run out of available transmit descriptors.
-
-TxAbsIntDelay
--------------
-Valid Range:   0-65535 (0=off)
-Default Value: 32
-
-This value, in units of 1.024 microseconds, limits the delay in which a
-transmit interrupt is generated.  Useful only if TxIntDelay is non-zero,
-this value ensures that an interrupt is generated after the initial
-packet is sent on the wire within the set amount of time.  Proper tuning,
-along with TxIntDelay, may improve traffic throughput in specific
-network conditions.
-
-Copybreak
----------
-Valid Range:   0-xxxxxxx (0=off)
-Default Value: 256
-
-Driver copies all packets below or equaling this size to a fresh RX
-buffer before handing it up the stack.
-
-This parameter is different than other parameters, in that it is a
-single (not 1,1,1 etc.) parameter applied to all driver instances and
-it is also available during runtime at
-/sys/module/e1000e/parameters/copybreak
-
-SmartPowerDownEnable
---------------------
-Valid Range: 0-1
-Default Value:  0 (disabled)
-
-Allows PHY to turn off in lower power states. The user can set this parameter
-in supported chipsets.
-
-KumeranLockLoss
----------------
-Valid Range: 0-1
-Default Value: 1 (enabled)
-
-This workaround skips resetting the PHY at shutdown for the initial
-silicon releases of ICH8 systems.
-
-IntMode
--------
-Valid Range: 0-2 (0=legacy, 1=MSI, 2=MSI-X)
-Default Value: 2
-
-Allows changing the interrupt mode at module load time, without requiring a
-recompile. If the driver load fails to enable a specific interrupt mode, the
-driver will try other interrupt modes, from least to most compatible.  The
-interrupt order is MSI-X, MSI, Legacy.  If specifying MSI (IntMode=1)
-interrupts, only MSI and Legacy will be attempted.
-
-CrcStripping
-------------
-Valid Range: 0-1
-Default Value: 1 (enabled)
-
-Strip the CRC from received packets before sending up the network stack.  If
-you have a machine with a BMC enabled but cannot receive IPMI traffic after
-loading or enabling the driver, try disabling this feature.
-
-WriteProtectNVM
----------------
-Valid Range: 0,1
-Default Value: 1
-
-If set to 1, configure the hardware to ignore all write/erase cycles to the
-GbE region in the ICHx NVM (in order to prevent accidental corruption of the
-NVM). This feature can be disabled by setting the parameter to 0 during initial
-driver load.
-NOTE: The machine must be power cycled (full off/on) when enabling NVM writes
-via setting the parameter to zero. Once the NVM has been locked (via the
-parameter at 1 when the driver loads) it cannot be unlocked except via power
-cycle.
-
-Additional Configurations
-=========================
-
-  Jumbo Frames
-  ------------
-  Jumbo Frames support is enabled by changing the MTU to a value larger than
-  the default of 1500.  Use the ifconfig command to increase the MTU size.
-  For example:
-
-       ifconfig eth<x> mtu 9000 up
-
-  This setting is not saved across reboots.
-
-  Notes:
-
-  - The maximum MTU setting for Jumbo Frames is 9216.  This value coincides
-    with the maximum Jumbo Frames size of 9234 bytes.
-
-  - Using Jumbo frames at 10 or 100 Mbps is not supported and may result in
-    poor performance or loss of link.
-
-  - Some adapters limit Jumbo Frames sized packets to a maximum of
-    4096 bytes and some adapters do not support Jumbo Frames.
-
-  - Jumbo Frames cannot be configured on an 82579-based Network device, if
-    MACSec is enabled on the system.
-
-  ethtool
-  -------
-  The driver utilizes the ethtool interface for driver configuration and
-  diagnostics, as well as displaying statistical information.  We
-  strongly recommend downloading the latest version of ethtool at:
-
-  https://kernel.org/pub/software/network/ethtool/
-
-  NOTE: When validating enable/disable tests on some parts (82578, for example)
-  you need to add a few seconds between tests when working with ethtool.
-
-  Speed and Duplex
-  ----------------
-  Speed and Duplex are configured through the ethtool* utility. For
-  instructions,  refer to the ethtool man page.
-
-  Enabling Wake on LAN* (WoL)
-  ---------------------------
-  WoL is configured through the ethtool* utility. For instructions on
-  enabling WoL with ethtool, refer to the ethtool man page.
-
-  WoL will be enabled on the system during the next shut down or reboot.
-  For this driver version, in order to enable WoL, the e1000e driver must be
-  loaded when shutting down or rebooting the system.
-
-  In most cases Wake On LAN is only supported on port A for multiple port
-  adapters. To verify if a port supports Wake on Lan run ethtool eth<X>.
-
-Support
-=======
-
-For general information, go to the Intel support website at:
-
-    www.intel.com/support/
-
-or the Intel Wired Networking project hosted by Sourceforge at:
-
-    http://sourceforge.net/projects/e1000
-
-If an issue is identified with the released source code on the supported
-kernel with a supported adapter, email the specific information related
-to the issue to e1000-devel@lists.sf.net
diff --git a/Documentation/networking/fm10k.rst b/Documentation/networking/fm10k.rst
new file mode 100644
index 000000000000..bf5e5942f28d
--- /dev/null
+++ b/Documentation/networking/fm10k.rst
@@ -0,0 +1,141 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+Linux* Base Driver for Intel(R) Ethernet Multi-host Controller
+==============================================================
+
+August 20, 2018
+Copyright(c) 2015-2018 Intel Corporation.
+
+Contents
+========
+- Identifying Your Adapter
+- Additional Configurations
+- Performance Tuning
+- Known Issues
+- Support
+
+Identifying Your Adapter
+========================
+The driver in this release is compatible with devices based on the Intel(R)
+Ethernet Multi-host Controller.
+
+For information on how to identify your adapter, and for the latest Intel
+network drivers, refer to the Intel Support website:
+http://www.intel.com/support
+
+
+Flow Control
+------------
+The Intel(R) Ethernet Switch Host Interface Driver does not support Flow
+Control. It will not send pause frames. This may result in dropped frames.
+
+
+Virtual Functions (VFs)
+-----------------------
+Use sysfs to enable VFs.
+Valid Range: 0-64
+
+For example::
+
+    echo $num_vf_enabled > /sys/class/net/$dev/device/sriov_numvfs //enable VFs
+    echo 0 > /sys/class/net/$dev/device/sriov_numvfs //disable VFs
+
+NOTE: Neither the device nor the driver control how VFs are mapped into config
+space. Bus layout will vary by operating system. On operating systems that
+support it, you can check sysfs to find the mapping.
+
+NOTE: When SR-IOV mode is enabled, hardware VLAN filtering and VLAN tag
+stripping/insertion will remain enabled. Please remove the old VLAN filter
+before the new VLAN filter is added. For example::
+
+    ip link set eth0 vf 0 vlan 100	// set vlan 100 for VF 0
+    ip link set eth0 vf 0 vlan 0	// Delete vlan 100
+    ip link set eth0 vf 0 vlan 200	// set a new vlan 200 for VF 0
+
+
+Additional Features and Configurations
+======================================
+
+Jumbo Frames
+------------
+Jumbo Frames support is enabled by changing the Maximum Transmission Unit (MTU)
+to a value larger than the default value of 1500.
+
+Use the ifconfig command to increase the MTU size. For example, enter the
+following where <x> is the interface number::
+
+    ifconfig eth<x> mtu 9000 up
+
+Alternatively, you can use the ip command as follows::
+
+    ip link set mtu 9000 dev eth<x>
+    ip link set up dev eth<x>
+
+This setting is not saved across reboots. The setting change can be made
+permanent by adding 'MTU=9000' to the file:
+
+- For RHEL: /etc/sysconfig/network-scripts/ifcfg-eth<x>
+- For SLES: /etc/sysconfig/network/<config_file>
+
+NOTE: The maximum MTU setting for Jumbo Frames is 15342. This value coincides
+with the maximum Jumbo Frames size of 15364 bytes.
+
+NOTE: This driver will attempt to use multiple page sized buffers to receive
+each jumbo packet. This should help to avoid buffer starvation issues when
+allocating receive packets.
+
+
+Generic Receive Offload, aka GRO
+--------------------------------
+The driver supports the in-kernel software implementation of GRO. GRO has
+shown that by coalescing Rx traffic into larger chunks of data, CPU
+utilization can be significantly reduced when under large Rx load. GRO is an
+evolution of the previously-used LRO interface. GRO is able to coalesce
+other protocols besides TCP. It's also safe to use with configurations that
+are problematic for LRO, namely bridging and iSCSI.
+
+
+
+Supported ethtool Commands and Options for Filtering
+----------------------------------------------------
+-n --show-nfc
+  Retrieves the receive network flow classification configurations.
+
+rx-flow-hash tcp4|udp4|ah4|esp4|sctp4|tcp6|udp6|ah6|esp6|sctp6
+  Retrieves the hash options for the specified network traffic type.
+
+-N --config-nfc
+  Configures the receive network flow classification.
+
+rx-flow-hash tcp4|udp4|ah4|esp4|sctp4|tcp6|udp6|ah6|esp6|sctp6 m|v|t|s|d|f|n|r
+  Configures the hash options for the specified network traffic type.
+
+- udp4: UDP over IPv4
+- udp6: UDP over IPv6
+- f Hash on bytes 0 and 1 of the Layer 4 header of the rx packet.
+- n Hash on bytes 2 and 3 of the Layer 4 header of the rx packet.
+
+
+Known Issues/Troubleshooting
+============================
+
+Enabling SR-IOV in a 64-bit Microsoft* Windows Server* 2012/R2 guest OS under Linux KVM
+---------------------------------------------------------------------------------------
+KVM Hypervisor/VMM supports direct assignment of a PCIe device to a VM. This
+includes traditional PCIe devices, as well as SR-IOV-capable devices based on
+the Intel Ethernet Controller XL710.
+
+
+Support
+=======
+For general information, go to the Intel support website at:
+
+https://www.intel.com/support/
+
+or the Intel Wired Networking project hosted by Sourceforge at:
+
+https://sourceforge.net/projects/e1000
+
+If an issue is identified with the released source code on a supported kernel
+with a supported adapter, email the specific information related to the issue
+to e1000-devel@lists.sf.net.
diff --git a/Documentation/networking/i40e.rst b/Documentation/networking/i40e.rst
new file mode 100644
index 000000000000..0cc16c525d10
--- /dev/null
+++ b/Documentation/networking/i40e.rst
@@ -0,0 +1,770 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+Linux* Base Driver for the Intel(R) Ethernet Controller 700 Series
+==================================================================
+
+Intel 40 Gigabit Linux driver.
+Copyright(c) 1999-2018 Intel Corporation.
+
+Contents
+========
+
+- Overview
+- Identifying Your Adapter
+- Intel(R) Ethernet Flow Director
+- Additional Configurations
+- Known Issues
+- Support
+
+
+Driver information can be obtained using ethtool, lspci, and ifconfig.
+Instructions on updating ethtool can be found in the section Additional
+Configurations later in this document.
+
+For questions related to hardware requirements, refer to the documentation
+supplied with your Intel adapter. All hardware requirements listed apply to use
+with Linux.
+
+
+Identifying Your Adapter
+========================
+The driver is compatible with devices based on the following:
+
+ * Intel(R) Ethernet Controller X710
+ * Intel(R) Ethernet Controller XL710
+ * Intel(R) Ethernet Network Connection X722
+ * Intel(R) Ethernet Controller XXV710
+
+For the best performance, make sure the latest NVM/FW is installed on your
+device.
+
+For information on how to identify your adapter, and for the latest NVM/FW
+images and Intel network drivers, refer to the Intel Support website:
+https://www.intel.com/support
+
+SFP+ and QSFP+ Devices
+----------------------
+For information about supported media, refer to this document:
+https://www.intel.com/content/dam/www/public/us/en/documents/release-notes/xl710-ethernet-controller-feature-matrix.pdf
+
+NOTE: Some adapters based on the Intel(R) Ethernet Controller 700 Series only
+support Intel Ethernet Optics modules. On these adapters, other modules are not
+supported and will not function.  In all cases Intel recommends using Intel
+Ethernet Optics; other modules may function but are not validated by Intel.
+Contact Intel for supported media types.
+
+NOTE: For connections based on Intel(R) Ethernet Controller 700 Series, support
+is dependent on your system board. Please see your vendor for details.
+
+NOTE: In systems that do not have adequate airflow to cool the adapter and
+optical modules, you must use high temperature optical modules.
+
+Virtual Functions (VFs)
+-----------------------
+Use sysfs to enable VFs. For example::
+
+  #echo $num_vf_enabled > /sys/class/net/$dev/device/sriov_numvfs #enable VFs
+  #echo 0 > /sys/class/net/$dev/device/sriov_numvfs #disable VFs
+
+For example, the following instructions will configure PF eth0 and the first VF
+on VLAN 10::
+
+  $ ip link set dev eth0 vf 0 vlan 10
+
+VLAN Tag Packet Steering
+------------------------
+Allows you to send all packets with a specific VLAN tag to a particular SR-IOV
+virtual function (VF). Further, this feature allows you to designate a
+particular VF as trusted, and allows that trusted VF to request selective
+promiscuous mode on the Physical Function (PF).
+
+To set a VF as trusted or untrusted, enter the following command in the
+Hypervisor::
+
+  # ip link set dev eth0 vf 1 trust [on|off]
+
+Once the VF is designated as trusted, use the following commands in the VM to
+set the VF to promiscuous mode.
+
+::
+
+  For promiscuous all:
+  #ip link set eth2 promisc on
+  Where eth2 is a VF interface in the VM
+
+  For promiscuous Multicast:
+  #ip link set eth2 allmulticast on
+  Where eth2 is a VF interface in the VM
+
+NOTE: By default, the ethtool priv-flag vf-true-promisc-support is set to
+"off",meaning that promiscuous mode for the VF will be limited. To set the
+promiscuous mode for the VF to true promiscuous and allow the VF to see all
+ingress traffic, use the following command::
+
+  #ethtool -set-priv-flags p261p1 vf-true-promisc-support on
+
+The vf-true-promisc-support priv-flag does not enable promiscuous mode; rather,
+it designates which type of promiscuous mode (limited or true) you will get
+when you enable promiscuous mode using the ip link commands above. Note that
+this is a global setting that affects the entire device. However,the
+vf-true-promisc-support priv-flag is only exposed to the first PF of the
+device. The PF remains in limited promiscuous mode (unless it is in MFP mode)
+regardless of the vf-true-promisc-support setting.
+
+Now add a VLAN interface on the VF interface::
+
+  #ip link add link eth2 name eth2.100 type vlan id 100
+
+Note that the order in which you set the VF to promiscuous mode and add the
+VLAN interface does not matter (you can do either first). The end result in
+this example is that the VF will get all traffic that is tagged with VLAN 100.
+
+Intel(R) Ethernet Flow Director
+-------------------------------
+The Intel Ethernet Flow Director performs the following tasks:
+
+- Directs receive packets according to their flows to different queues.
+- Enables tight control on routing a flow in the platform.
+- Matches flows and CPU cores for flow affinity.
+- Supports multiple parameters for flexible flow classification and load
+  balancing (in SFP mode only).
+
+NOTE: The Linux i40e driver supports the following flow types: IPv4, TCPv4, and
+UDPv4. For a given flow type, it supports valid combinations of IP addresses
+(source or destination) and UDP/TCP ports (source and destination). For
+example, you can supply only a source IP address, a source IP address and a
+destination port, or any combination of one or more of these four parameters.
+
+NOTE: The Linux i40e driver allows you to filter traffic based on a
+user-defined flexible two-byte pattern and offset by using the ethtool user-def
+and mask fields. Only L3 and L4 flow types are supported for user-defined
+flexible filters. For a given flow type, you must clear all Intel Ethernet Flow
+Director filters before changing the input set (for that flow type).
+
+To enable or disable the Intel Ethernet Flow Director::
+
+  # ethtool -K ethX ntuple <on|off>
+
+When disabling ntuple filters, all the user programmed filters are flushed from
+the driver cache and hardware. All needed filters must be re-added when ntuple
+is re-enabled.
+
+To add a filter that directs packet to queue 2, use -U or -N switch::
+
+  # ethtool -N ethX flow-type tcp4 src-ip 192.168.10.1 dst-ip \
+  192.168.10.2 src-port 2000 dst-port 2001 action 2 [loc 1]
+
+To set a filter using only the source and destination IP address::
+
+  # ethtool -N ethX flow-type tcp4 src-ip 192.168.10.1 dst-ip \
+  192.168.10.2 action 2 [loc 1]
+
+To see the list of filters currently present::
+
+  # ethtool <-u|-n> ethX
+
+Application Targeted Routing (ATR) Perfect Filters
+--------------------------------------------------
+ATR is enabled by default when the kernel is in multiple transmit queue mode.
+An ATR Intel Ethernet Flow Director filter rule is added when a TCP-IP flow
+starts and is deleted when the flow ends. When a TCP-IP Intel Ethernet Flow
+Director rule is added from ethtool (Sideband filter), ATR is turned off by the
+driver. To re-enable ATR, the sideband can be disabled with the ethtool -K
+option. For example::
+
+  ethtool –K [adapter] ntuple [off|on]
+
+If sideband is re-enabled after ATR is re-enabled, ATR remains enabled until a
+TCP-IP flow is added. When all TCP-IP sideband rules are deleted, ATR is
+automatically re-enabled.
+
+Packets that match the ATR rules are counted in fdir_atr_match stats in
+ethtool, which also can be used to verify whether ATR rules still exist.
+
+Sideband Perfect Filters
+------------------------
+Sideband Perfect Filters are used to direct traffic that matches specified
+characteristics. They are enabled through ethtool's ntuple interface. To add a
+new filter use the following command::
+
+  ethtool -U <device> flow-type <type> src-ip <ip> dst-ip <ip> src-port <port> \
+  dst-port <port> action <queue>
+
+Where:
+  <device> - the ethernet device to program
+  <type> - can be ip4, tcp4, udp4, or sctp4
+  <ip> - the ip address to match on
+  <port> - the port number to match on
+  <queue> - the queue to direct traffic towards (-1 discards matching traffic)
+
+Use the following command to display all of the active filters::
+
+  ethtool -u <device>
+
+Use the following command to delete a filter::
+
+  ethtool -U <device> delete <N>
+
+Where <N> is the filter id displayed when printing all the active filters, and
+may also have been specified using "loc <N>" when adding the filter.
+
+The following example matches TCP traffic sent from 192.168.0.1, port 5300,
+directed to 192.168.0.5, port 80, and sends it to queue 7::
+
+  ethtool -U enp130s0 flow-type tcp4 src-ip 192.168.0.1 dst-ip 192.168.0.5 \
+  src-port 5300 dst-port 80 action 7
+
+For each flow-type, the programmed filters must all have the same matching
+input set. For example, issuing the following two commands is acceptable::
+
+  ethtool -U enp130s0 flow-type ip4 src-ip 192.168.0.1 src-port 5300 action 7
+  ethtool -U enp130s0 flow-type ip4 src-ip 192.168.0.5 src-port 55 action 10
+
+Issuing the next two commands, however, is not acceptable, since the first
+specifies src-ip and the second specifies dst-ip::
+
+  ethtool -U enp130s0 flow-type ip4 src-ip 192.168.0.1 src-port 5300 action 7
+  ethtool -U enp130s0 flow-type ip4 dst-ip 192.168.0.5 src-port 55 action 10
+
+The second command will fail with an error. You may program multiple filters
+with the same fields, using different values, but, on one device, you may not
+program two tcp4 filters with different matching fields.
+
+Matching on a sub-portion of a field is not supported by the i40e driver, thus
+partial mask fields are not supported.
+
+The driver also supports matching user-defined data within the packet payload.
+This flexible data is specified using the "user-def" field of the ethtool
+command in the following way:
+
++----------------------------+--------------------------+
+| 31    28    24    20    16 | 15    12    8    4    0  |
++----------------------------+--------------------------+
+| offset into packet payload | 2 bytes of flexible data |
++----------------------------+--------------------------+
+
+For example,
+
+::
+
+  ... user-def 0x4FFFF ...
+
+tells the filter to look 4 bytes into the payload and match that value against
+0xFFFF. The offset is based on the beginning of the payload, and not the
+beginning of the packet. Thus
+
+::
+
+  flow-type tcp4 ... user-def 0x8BEAF ...
+
+would match TCP/IPv4 packets which have the value 0xBEAF 8 bytes into the
+TCP/IPv4 payload.
+
+Note that ICMP headers are parsed as 4 bytes of header and 4 bytes of payload.
+Thus to match the first byte of the payload, you must actually add 4 bytes to
+the offset. Also note that ip4 filters match both ICMP frames as well as raw
+(unknown) ip4 frames, where the payload will be the L3 payload of the IP4 frame.
+
+The maximum offset is 64. The hardware will only read up to 64 bytes of data
+from the payload. The offset must be even because the flexible data is 2 bytes
+long and must be aligned to byte 0 of the packet payload.
+
+The user-defined flexible offset is also considered part of the input set and
+cannot be programmed separately for multiple filters of the same type. However,
+the flexible data is not part of the input set and multiple filters may use the
+same offset but match against different data.
+
+To create filters that direct traffic to a specific Virtual Function, use the
+"action" parameter. Specify the action as a 64 bit value, where the lower 32
+bits represents the queue number, while the next 8 bits represent which VF.
+Note that 0 is the PF, so the VF identifier is offset by 1. For example::
+
+  ... action 0x800000002 ...
+
+specifies to direct traffic to Virtual Function 7 (8 minus 1) into queue 2 of
+that VF.
+
+Note that these filters will not break internal routing rules, and will not
+route traffic that otherwise would not have been sent to the specified Virtual
+Function.
+
+Setting the link-down-on-close Private Flag
+-------------------------------------------
+When the link-down-on-close private flag is set to "on", the port's link will
+go down when the interface is brought down using the ifconfig ethX down command.
+
+Use ethtool to view and set link-down-on-close, as follows::
+
+  ethtool --show-priv-flags ethX
+  ethtool --set-priv-flags ethX link-down-on-close [on|off]
+
+Viewing Link Messages
+---------------------
+Link messages will not be displayed to the console if the distribution is
+restricting system messages. In order to see network driver link messages on
+your console, set dmesg to eight by entering the following::
+
+  dmesg -n 8
+
+NOTE: This setting is not saved across reboots.
+
+Jumbo Frames
+------------
+Jumbo Frames support is enabled by changing the Maximum Transmission Unit (MTU)
+to a value larger than the default value of 1500.
+
+Use the ifconfig command to increase the MTU size. For example, enter the
+following where <x> is the interface number::
+
+  ifconfig eth<x> mtu 9000 up
+
+Alternatively, you can use the ip command as follows::
+
+  ip link set mtu 9000 dev eth<x>
+  ip link set up dev eth<x>
+
+This setting is not saved across reboots. The setting change can be made
+permanent by adding 'MTU=9000' to the file::
+
+  /etc/sysconfig/network-scripts/ifcfg-eth<x> // for RHEL
+  /etc/sysconfig/network/<config_file> // for SLES
+
+NOTE: The maximum MTU setting for Jumbo Frames is 9702. This value coincides
+with the maximum Jumbo Frames size of 9728 bytes.
+
+NOTE: This driver will attempt to use multiple page sized buffers to receive
+each jumbo packet. This should help to avoid buffer starvation issues when
+allocating receive packets.
+
+ethtool
+-------
+The driver utilizes the ethtool interface for driver configuration and
+diagnostics, as well as displaying statistical information. The latest ethtool
+version is required for this functionality. Download it at:
+https://www.kernel.org/pub/software/network/ethtool/
+
+Supported ethtool Commands and Options for Filtering
+----------------------------------------------------
+-n --show-nfc
+  Retrieves the receive network flow classification configurations.
+
+rx-flow-hash tcp4|udp4|ah4|esp4|sctp4|tcp6|udp6|ah6|esp6|sctp6
+  Retrieves the hash options for the specified network traffic type.
+
+-N --config-nfc
+  Configures the receive network flow classification.
+
+rx-flow-hash tcp4|udp4|ah4|esp4|sctp4|tcp6|udp6|ah6|esp6|sctp6 m|v|t|s|d|f|n|r...
+  Configures the hash options for the specified network traffic type.
+
+udp4 UDP over IPv4
+udp6 UDP over IPv6
+
+f Hash on bytes 0 and 1 of the Layer 4 header of the Rx packet.
+n Hash on bytes 2 and 3 of the Layer 4 header of the Rx packet.
+
+Speed and Duplex Configuration
+------------------------------
+In addressing speed and duplex configuration issues, you need to distinguish
+between copper-based adapters and fiber-based adapters.
+
+In the default mode, an Intel(R) Ethernet Network Adapter using copper
+connections will attempt to auto-negotiate with its link partner to determine
+the best setting. If the adapter cannot establish link with the link partner
+using auto-negotiation, you may need to manually configure the adapter and link
+partner to identical settings to establish link and pass packets. This should
+only be needed when attempting to link with an older switch that does not
+support auto-negotiation or one that has been forced to a specific speed or
+duplex mode. Your link partner must match the setting you choose. 1 Gbps speeds
+and higher cannot be forced. Use the autonegotiation advertising setting to
+manually set devices for 1 Gbps and higher.
+
+NOTE: You cannot set the speed for devices based on the Intel(R) Ethernet
+Network Adapter XXV710 based devices.
+
+Speed, duplex, and autonegotiation advertising are configured through the
+ethtool* utility.
+
+Caution: Only experienced network administrators should force speed and duplex
+or change autonegotiation advertising manually. The settings at the switch must
+always match the adapter settings. Adapter performance may suffer or your
+adapter may not operate if you configure the adapter differently from your
+switch.
+
+An Intel(R) Ethernet Network Adapter using fiber-based connections, however,
+will not attempt to auto-negotiate with its link partner since those adapters
+operate only in full duplex and only at their native speed.
+
+NAPI
+----
+NAPI (Rx polling mode) is supported in the i40e driver.
+For more information on NAPI, see
+https://wiki.linuxfoundation.org/networking/napi
+
+Flow Control
+------------
+Ethernet Flow Control (IEEE 802.3x) can be configured with ethtool to enable
+receiving and transmitting pause frames for i40e. When transmit is enabled,
+pause frames are generated when the receive packet buffer crosses a predefined
+threshold. When receive is enabled, the transmit unit will halt for the time
+delay specified when a pause frame is received.
+
+NOTE: You must have a flow control capable link partner.
+
+Flow Control is on by default.
+
+Use ethtool to change the flow control settings.
+
+To enable or disable Rx or Tx Flow Control::
+
+  ethtool -A eth? rx <on|off> tx <on|off>
+
+Note: This command only enables or disables Flow Control if auto-negotiation is
+disabled. If auto-negotiation is enabled, this command changes the parameters
+used for auto-negotiation with the link partner.
+
+To enable or disable auto-negotiation::
+
+  ethtool -s eth? autoneg <on|off>
+
+Note: Flow Control auto-negotiation is part of link auto-negotiation. Depending
+on your device, you may not be able to change the auto-negotiation setting.
+
+RSS Hash Flow
+-------------
+Allows you to set the hash bytes per flow type and any combination of one or
+more options for Receive Side Scaling (RSS) hash byte configuration.
+
+::
+
+  # ethtool -N <dev> rx-flow-hash <type> <option>
+
+Where <type> is:
+  tcp4	signifying TCP over IPv4
+  udp4	signifying UDP over IPv4
+  tcp6	signifying TCP over IPv6
+  udp6	signifying UDP over IPv6
+And <option> is one or more of:
+  s	Hash on the IP source address of the Rx packet.
+  d	Hash on the IP destination address of the Rx packet.
+  f	Hash on bytes 0 and 1 of the Layer 4 header of the Rx packet.
+  n	Hash on bytes 2 and 3 of the Layer 4 header of the Rx packet.
+
+MAC and VLAN anti-spoofing feature
+----------------------------------
+When a malicious driver attempts to send a spoofed packet, it is dropped by the
+hardware and not transmitted.
+NOTE: This feature can be disabled for a specific Virtual Function (VF)::
+
+  ip link set <pf dev> vf <vf id> spoofchk {off|on}
+
+IEEE 1588 Precision Time Protocol (PTP) Hardware Clock (PHC)
+------------------------------------------------------------
+Precision Time Protocol (PTP) is used to synchronize clocks in a computer
+network. PTP support varies among Intel devices that support this driver. Use
+"ethtool -T <netdev name>" to get a definitive list of PTP capabilities
+supported by the device.
+
+IEEE 802.1ad (QinQ) Support
+---------------------------
+The IEEE 802.1ad standard, informally known as QinQ, allows for multiple VLAN
+IDs within a single Ethernet frame. VLAN IDs are sometimes referred to as
+"tags," and multiple VLAN IDs are thus referred to as a "tag stack." Tag stacks
+allow L2 tunneling and the ability to segregate traffic within a particular
+VLAN ID, among other uses.
+
+The following are examples of how to configure 802.1ad (QinQ)::
+
+  ip link add link eth0 eth0.24 type vlan proto 802.1ad id 24
+  ip link add link eth0.24 eth0.24.371 type vlan proto 802.1Q id 371
+
+Where "24" and "371" are example VLAN IDs.
+
+NOTES:
+  Receive checksum offloads, cloud filters, and VLAN acceleration are not
+  supported for 802.1ad (QinQ) packets.
+
+VXLAN and GENEVE Overlay HW Offloading
+--------------------------------------
+Virtual Extensible LAN (VXLAN) allows you to extend an L2 network over an L3
+network, which may be useful in a virtualized or cloud environment. Some
+Intel(R) Ethernet Network devices perform VXLAN processing, offloading it from
+the operating system. This reduces CPU utilization.
+
+VXLAN offloading is controlled by the Tx and Rx checksum offload options
+provided by ethtool. That is, if Tx checksum offload is enabled, and the
+adapter has the capability, VXLAN offloading is also enabled.
+
+Support for VXLAN and GENEVE HW offloading is dependent on kernel support of
+the HW offloading features.
+
+Multiple Functions per Port
+---------------------------
+Some adapters based on the Intel Ethernet Controller X710/XL710 support
+multiple functions on a single physical port. Configure these functions through
+the System Setup/BIOS.
+
+Minimum TX Bandwidth is the guaranteed minimum data transmission bandwidth, as
+a percentage of the full physical port link speed, that the partition will
+receive. The bandwidth the partition is awarded will never fall below the level
+you specify.
+
+The range for the minimum bandwidth values is:
+1 to ((100 minus # of partitions on the physical port) plus 1)
+For example, if a physical port has 4 partitions, the range would be:
+1 to ((100 - 4) + 1 = 97)
+
+The Maximum Bandwidth percentage represents the maximum transmit bandwidth
+allocated to the partition as a percentage of the full physical port link
+speed. The accepted range of values is 1-100. The value is used as a limiter,
+should you chose that any one particular function not be able to consume 100%
+of a port's bandwidth (should it be available). The sum of all the values for
+Maximum Bandwidth is not restricted, because no more than 100% of a port's
+bandwidth can ever be used.
+
+NOTE: X710/XXV710 devices fail to enable Max VFs (64) when Multiple Functions
+per Port (MFP) and SR-IOV are enabled. An error from i40e is logged that says
+"add vsi failed for VF N, aq_err 16". To workaround the issue, enable less than
+64 virtual functions (VFs).
+
+Data Center Bridging (DCB)
+--------------------------
+DCB is a configuration Quality of Service implementation in hardware. It uses
+the VLAN priority tag (802.1p) to filter traffic. That means that there are 8
+different priorities that traffic can be filtered into. It also enables
+priority flow control (802.1Qbb) which can limit or eliminate the number of
+dropped packets during network stress. Bandwidth can be allocated to each of
+these priorities, which is enforced at the hardware level (802.1Qaz).
+
+Adapter firmware implements LLDP and DCBX protocol agents as per 802.1AB and
+802.1Qaz respectively. The firmware based DCBX agent runs in willing mode only
+and can accept settings from a DCBX capable peer. Software configuration of
+DCBX parameters via dcbtool/lldptool are not supported.
+
+NOTE: Firmware LLDP can be disabled by setting the private flag disable-fw-lldp.
+
+The i40e driver implements the DCB netlink interface layer to allow user-space
+to communicate with the driver and query DCB configuration for the port.
+
+NOTE:
+The kernel assumes that TC0 is available, and will disable Priority Flow
+Control (PFC) on the device if TC0 is not available. To fix this, ensure TC0 is
+enabled when setting up DCB on your switch.
+
+Interrupt Rate Limiting
+-----------------------
+:Valid Range: 0-235 (0=no limit)
+
+The Intel(R) Ethernet Controller XL710 family supports an interrupt rate
+limiting mechanism. The user can control, via ethtool, the number of
+microseconds between interrupts.
+
+Syntax::
+
+  # ethtool -C ethX rx-usecs-high N
+
+The range of 0-235 microseconds provides an effective range of 4,310 to 250,000
+interrupts per second. The value of rx-usecs-high can be set independently of
+rx-usecs and tx-usecs in the same ethtool command, and is also independent of
+the adaptive interrupt moderation algorithm. The underlying hardware supports
+granularity in 4-microsecond intervals, so adjacent values may result in the
+same interrupt rate.
+
+One possible use case is the following::
+
+  # ethtool -C ethX adaptive-rx off adaptive-tx off rx-usecs-high 20 rx-usecs \
+    5 tx-usecs 5
+
+The above command would disable adaptive interrupt moderation, and allow a
+maximum of 5 microseconds before indicating a receive or transmit was complete.
+However, instead of resulting in as many as 200,000 interrupts per second, it
+limits total interrupts per second to 50,000 via the rx-usecs-high parameter.
+
+Performance Optimization
+========================
+Driver defaults are meant to fit a wide variety of workloads, but if further
+optimization is required we recommend experimenting with the following settings.
+
+NOTE: For better performance when processing small (64B) frame sizes, try
+enabling Hyper threading in the BIOS in order to increase the number of logical
+cores in the system and subsequently increase the number of queues available to
+the adapter.
+
+Virtualized Environments
+------------------------
+1. Disable XPS on both ends by using the included virt_perf_default script
+or by running the following command as root::
+
+  for file in `ls /sys/class/net/<ethX>/queues/tx-*/xps_cpus`;
+  do echo 0 > $file; done
+
+2. Using the appropriate mechanism (vcpupin) in the vm, pin the cpu's to
+individual lcpu's, making sure to use a set of cpu's included in the
+device's local_cpulist: /sys/class/net/<ethX>/device/local_cpulist.
+
+3. Configure as many Rx/Tx queues in the VM as available. Do not rely on
+the default setting of 1.
+
+
+Non-virtualized Environments
+----------------------------
+Pin the adapter's IRQs to specific cores by disabling the irqbalance service
+and using the included set_irq_affinity script. Please see the script's help
+text for further options.
+
+- The following settings will distribute the IRQs across all the cores evenly::
+
+  # scripts/set_irq_affinity -x all <interface1> , [ <interface2>, ... ]
+
+- The following settings will distribute the IRQs across all the cores that are
+  local to the adapter (same NUMA node)::
+
+  # scripts/set_irq_affinity -x local <interface1> ,[ <interface2>, ... ]
+
+For very CPU intensive workloads, we recommend pinning the IRQs to all cores.
+
+For IP Forwarding: Disable Adaptive ITR and lower Rx and Tx interrupts per
+queue using ethtool.
+
+- Setting rx-usecs and tx-usecs to 125 will limit interrupts to about 8000
+  interrupts per second per queue.
+
+::
+
+  # ethtool -C <interface> adaptive-rx off adaptive-tx off rx-usecs 125 \
+    tx-usecs 125
+
+For lower CPU utilization: Disable Adaptive ITR and lower Rx and Tx interrupts
+per queue using ethtool.
+
+- Setting rx-usecs and tx-usecs to 250 will limit interrupts to about 4000
+  interrupts per second per queue.
+
+::
+
+  # ethtool -C <interface> adaptive-rx off adaptive-tx off rx-usecs 250 \
+    tx-usecs 250
+
+For lower latency: Disable Adaptive ITR and ITR by setting Rx and Tx to 0 using
+ethtool.
+
+::
+
+  # ethtool -C <interface> adaptive-rx off adaptive-tx off rx-usecs 0 \
+    tx-usecs 0
+
+Application Device Queues (ADq)
+-------------------------------
+Application Device Queues (ADq) allows you to dedicate one or more queues to a
+specific application. This can reduce latency for the specified application,
+and allow Tx traffic to be rate limited per application. Follow the steps below
+to set ADq.
+
+1. Create traffic classes (TCs). Maximum of 8 TCs can be created per interface.
+The shaper bw_rlimit parameter is optional.
+
+Example: Sets up two tcs, tc0 and tc1, with 16 queues each and max tx rate set
+to 1Gbit for tc0 and 3Gbit for tc1.
+
+::
+
+  # tc qdisc add dev <interface> root mqprio num_tc 2 map 0 0 0 0 1 1 1 1
+  queues 16@0 16@16 hw 1 mode channel shaper bw_rlimit min_rate 1Gbit 2Gbit
+  max_rate 1Gbit 3Gbit
+
+map: priority mapping for up to 16 priorities to tcs (e.g. map 0 0 0 0 1 1 1 1
+sets priorities 0-3 to use tc0 and 4-7 to use tc1)
+
+queues: for each tc, <num queues>@<offset> (e.g. queues 16@0 16@16 assigns
+16 queues to tc0 at offset 0 and 16 queues to tc1 at offset 16. Max total
+number of queues for all tcs is 64 or number of cores, whichever is lower.)
+
+hw 1 mode channel: ‘channel’ with ‘hw’ set to 1 is a new new hardware
+offload mode in mqprio that makes full use of the mqprio options, the
+TCs, the queue configurations, and the QoS parameters.
+
+shaper bw_rlimit: for each tc, sets minimum and maximum bandwidth rates.
+Totals must be equal or less than port speed.
+
+For example: min_rate 1Gbit 3Gbit: Verify bandwidth limit using network
+monitoring tools such as ifstat or sar –n DEV [interval] [number of samples]
+
+2. Enable HW TC offload on interface::
+
+    # ethtool -K <interface> hw-tc-offload on
+
+3. Apply TCs to ingress (RX) flow of interface::
+
+    # tc qdisc add dev <interface> ingress
+
+NOTES:
+ - Run all tc commands from the iproute2 <pathtoiproute2>/tc/ directory.
+ - ADq is not compatible with cloud filters.
+ - Setting up channels via ethtool (ethtool -L) is not supported when the
+   TCs are configured using mqprio.
+ - You must have iproute2 latest version
+ - NVM version 6.01 or later is required.
+ - ADq cannot be enabled when any the following features are enabled: Data
+   Center Bridging (DCB), Multiple Functions per Port (MFP), or Sideband
+   Filters.
+ - If another driver (for example, DPDK) has set cloud filters, you cannot
+   enable ADq.
+ - Tunnel filters are not supported in ADq. If encapsulated packets do
+   arrive in non-tunnel mode, filtering will be done on the inner headers.
+   For example, for VXLAN traffic in non-tunnel mode, PCTYPE is identified
+   as a VXLAN encapsulated packet, outer headers are ignored. Therefore,
+   inner headers are matched.
+ - If a TC filter on a PF matches traffic over a VF (on the PF), that
+   traffic will be routed to the appropriate queue of the PF, and will
+   not be passed on the VF. Such traffic will end up getting dropped higher
+   up in the TCP/IP stack as it does not match PF address data.
+ - If traffic matches multiple TC filters that point to different TCs,
+   that traffic will be duplicated and sent to all matching TC queues.
+   The hardware switch mirrors the packet to a VSI list when multiple
+   filters are matched.
+
+
+Known Issues/Troubleshooting
+============================
+
+NOTE: 1 Gb devices based on the Intel(R) Ethernet Network Connection X722 do
+not support the following features:
+
+  * Data Center Bridging (DCB)
+  * QOS
+  * VMQ
+  * SR-IOV
+  * Task Encapsulation offload (VXLAN, NVGRE)
+  * Energy Efficient Ethernet (EEE)
+  * Auto-media detect
+
+Unexpected Issues when the device driver and DPDK share a device
+----------------------------------------------------------------
+Unexpected issues may result when an i40e device is in multi driver mode and
+the kernel driver and DPDK driver are sharing the device. This is because
+access to the global NIC resources is not synchronized between multiple
+drivers. Any change to the global NIC configuration (writing to a global
+register, setting global configuration by AQ, or changing switch modes) will
+affect all ports and drivers on the device. Loading DPDK with the
+"multi-driver" module parameter may mitigate some of the issues.
+
+TC0 must be enabled when setting up DCB on a switch
+---------------------------------------------------
+The kernel assumes that TC0 is available, and will disable Priority Flow
+Control (PFC) on the device if TC0 is not available. To fix this, ensure TC0 is
+enabled when setting up DCB on your switch.
+
+
+Support
+=======
+For general information, go to the Intel support website at:
+
+https://www.intel.com/support/
+
+or the Intel Wired Networking project hosted by Sourceforge at:
+
+https://sourceforge.net/projects/e1000
+
+If an issue is identified with the released source code on a supported kernel
+with a supported adapter, email the specific information related to the issue
+to e1000-devel@lists.sf.net.
diff --git a/Documentation/networking/i40e.txt b/Documentation/networking/i40e.txt
deleted file mode 100644
index c2d6e1824b29..000000000000
--- a/Documentation/networking/i40e.txt
+++ /dev/null
@@ -1,190 +0,0 @@
-Linux Base Driver for the Intel(R) Ethernet Controller XL710 Family
-===================================================================
-
-Intel i40e Linux driver.
-Copyright(c) 2013 Intel Corporation.
-
-Contents
-========
-
-- Identifying Your Adapter
-- Additional Configurations
-- Performance Tuning
-- Known Issues
-- Support
-
-
-Identifying Your Adapter
-========================
-
-The driver in this release is compatible with the Intel Ethernet
-Controller XL710 Family.
-
-For more information on how to identify your adapter, go to the Adapter &
-Driver ID Guide at:
-
-    http://support.intel.com/support/network/sb/CS-012904.htm
-
-
-Enabling the driver
-===================
-
-The driver is enabled via the standard kernel configuration system,
-using the make command:
-
-     make config/oldconfig/menuconfig/etc.
-
-The driver is located in the menu structure at:
-
-	-> Device Drivers
-	  -> Network device support (NETDEVICES [=y])
-	    -> Ethernet driver support
-	      -> Intel devices
-	        -> Intel(R) Ethernet Controller XL710 Family
-
-Additional Configurations
-=========================
-
-  Generic Receive Offload (GRO)
-  -----------------------------
-  The driver supports the in-kernel software implementation of GRO.  GRO has
-  shown that by coalescing Rx traffic into larger chunks of data, CPU
-  utilization can be significantly reduced when under large Rx load.  GRO is
-  an evolution of the previously-used LRO interface.  GRO is able to coalesce
-  other protocols besides TCP.  It's also safe to use with configurations that
-  are problematic for LRO, namely bridging and iSCSI.
-
-  Ethtool
-  -------
-  The driver utilizes the ethtool interface for driver configuration and
-  diagnostics, as well as displaying statistical information. The latest
-  ethtool version is required for this functionality.
-
-  The latest release of ethtool can be found from
-  https://www.kernel.org/pub/software/network/ethtool
-
-
-  Flow Director n-ntuple traffic filters (FDir)
-  ---------------------------------------------
-  The driver utilizes the ethtool interface for configuring ntuple filters,
-  via "ethtool -N <device> <filter>".
-
-  The sctp4, ip4, udp4, and tcp4 flow types are supported with the standard
-  fields including src-ip, dst-ip, src-port and dst-port. The driver only
-  supports fully enabling or fully masking the fields, so use of the mask
-  fields for partial matches is not supported.
-
-  Additionally, the driver supports using the action to specify filters for a
-  Virtual Function. You can specify the action as a 64bit value, where the
-  lower 32 bits represents the queue number, while the next 8 bits represent
-  which VF. Note that 0 is the PF, so the VF identifier is offset by 1. For
-  example:
-
-    ... action 0x800000002 ...
-
-  Would indicate to direct traffic for Virtual Function 7 (8 minus 1) on queue
-  2 of that VF.
-
-  The driver also supports using the user-defined field to specify 2 bytes of
-  arbitrary data to match within the packet payload in addition to the regular
-  fields. The data is specified in the lower 32bits of the user-def field in
-  the following way:
-
-  +----------------------------+---------------------------+
-  | 31    28    24    20    16 | 15    12     8     4     0|
-  +----------------------------+---------------------------+
-  | offset into packet payload |  2 bytes of flexible data |
-  +----------------------------+---------------------------+
-
-  As an example,
-
-    ... user-def 0x4FFFF ....
-
-  means to match the value 0xFFFF 4 bytes into the packet payload. Note that
-  the offset is based on the beginning of the payload, and not the beginning
-  of the packet. Thus
-
-    flow-type tcp4 ... user-def 0x8BEAF ....
-
-  would match TCP/IPv4 packets which have the value 0xBEAF 8bytes into the
-  TCP/IPv4 payload.
-
-  For ICMP, the hardware parses the ICMP header as 4 bytes of header and 4
-  bytes of payload, so if you want to match an ICMP frames payload you may need
-  to add 4 to the offset in order to match the data.
-
-  Furthermore, the offset can only be up to a value of 64, as the hardware
-  will only read up to 64 bytes of data from the payload. It must also be even
-  as the flexible data is 2 bytes long and must be aligned to byte 0 of the
-  packet payload.
-
-  When programming filters, the hardware is limited to using a single input
-  set for each flow type. This means that it is an error to program two
-  different filters with the same type that don't match on the same fields.
-  Thus the second of the following two commands will fail:
-
-    ethtool -N <device> flow-type tcp4 src-ip 192.168.0.7 action 5
-    ethtool -N <device> flow-type tcp4 dst-ip 192.168.15.18 action 1
-
-  This is because the first filter will be accepted and reprogram the input
-  set for TCPv4 filters, but the second filter will be unable to reprogram the
-  input set until all the conflicting TCPv4 filters are first removed.
-
-  Note that the user-defined flexible offset is also considered part of the
-  input set and cannot be programmed separately for multiple filters of the
-  same type. However, the flexible data is not part of the input set and
-  multiple filters may use the same offset but match against different data.
-
-  Data Center Bridging (DCB)
-  --------------------------
-  DCB configuration is not currently supported.
-
-  FCoE
-  ----
-  The driver supports Fiber Channel over Ethernet (FCoE) and Data Center
-  Bridging (DCB) functionality. Configuring DCB and FCoE is outside the scope
-  of this driver doc. Refer to http://www.open-fcoe.org/ for FCoE project
-  information and http://www.open-lldp.org/ or email list
-  e1000-eedc@lists.sourceforge.net for DCB information.
-
-  MAC and VLAN anti-spoofing feature
-  ----------------------------------
-  When a malicious driver attempts to send a spoofed packet, it is dropped by
-  the hardware and not transmitted.  An interrupt is sent to the PF driver
-  notifying it of the spoof attempt.
-
-  When a spoofed packet is detected the PF driver will send the following
-  message to the system log (displayed by  the "dmesg" command):
-
-  Spoof event(s) detected on VF (n)
-
-  Where n=the VF that attempted to do the spoofing.
-
-
-Performance Tuning
-==================
-
-An excellent article on performance tuning can be found at:
-
-http://www.redhat.com/promo/summit/2008/downloads/pdf/Thursday/Mark_Wagner.pdf
-
-
-Known Issues
-============
-
-
-Support
-=======
-
-For general information, go to the Intel support website at:
-
-    http://support.intel.com
-
-or the Intel Wired Networking project hosted by Sourceforge at:
-
-    http://e1000.sourceforge.net
-
-If an issue is identified with the released source code on the supported
-kernel with a supported adapter, email the specific information related
-to the issue to e1000-devel@lists.sourceforge.net and copy
-netdev@vger.kernel.org.
diff --git a/Documentation/networking/iavf.rst b/Documentation/networking/iavf.rst
new file mode 100644
index 000000000000..f8b42b64eb28
--- /dev/null
+++ b/Documentation/networking/iavf.rst
@@ -0,0 +1,281 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+Linux* Base Driver for Intel(R) Ethernet Adaptive Virtual Function
+==================================================================
+
+Intel Ethernet Adaptive Virtual Function Linux driver.
+Copyright(c) 2013-2018 Intel Corporation.
+
+Contents
+========
+
+- Identifying Your Adapter
+- Additional Configurations
+- Known Issues/Troubleshooting
+- Support
+
+This file describes the iavf Linux* Base Driver. This driver was formerly
+called i40evf.
+
+The iavf driver supports the below mentioned virtual function devices and
+can only be activated on kernels running the i40e or newer Physical Function
+(PF) driver compiled with CONFIG_PCI_IOV.  The iavf driver requires
+CONFIG_PCI_MSI to be enabled.
+
+The guest OS loading the iavf driver must support MSI-X interrupts.
+
+Identifying Your Adapter
+========================
+The driver in this kernel is compatible with devices based on the following:
+ * Intel(R) XL710 X710 Virtual Function
+ * Intel(R) X722 Virtual Function
+ * Intel(R) XXV710 Virtual Function
+ * Intel(R) Ethernet Adaptive Virtual Function
+
+For the best performance, make sure the latest NVM/FW is installed on your
+device.
+
+For information on how to identify your adapter, and for the latest NVM/FW
+images and Intel network drivers, refer to the Intel Support website:
+http://www.intel.com/support
+
+
+Additional Features and Configurations
+======================================
+
+Viewing Link Messages
+---------------------
+Link messages will not be displayed to the console if the distribution is
+restricting system messages. In order to see network driver link messages on
+your console, set dmesg to eight by entering the following::
+
+  dmesg -n 8
+
+NOTE: This setting is not saved across reboots.
+
+ethtool
+-------
+The driver utilizes the ethtool interface for driver configuration and
+diagnostics, as well as displaying statistical information. The latest ethtool
+version is required for this functionality. Download it at:
+https://www.kernel.org/pub/software/network/ethtool/
+
+Setting VLAN Tag Stripping
+--------------------------
+If you have applications that require Virtual Functions (VFs) to receive
+packets with VLAN tags, you can disable VLAN tag stripping for the VF. The
+Physical Function (PF) processes requests issued from the VF to enable or
+disable VLAN tag stripping. Note that if the PF has assigned a VLAN to a VF,
+then requests from that VF to set VLAN tag stripping will be ignored.
+
+To enable/disable VLAN tag stripping for a VF, issue the following command
+from inside the VM in which you are running the VF::
+
+  ethtool -K <if_name> rxvlan on/off
+
+or alternatively::
+
+  ethtool --offload <if_name> rxvlan on/off
+
+Adaptive Virtual Function
+-------------------------
+Adaptive Virtual Function (AVF) allows the virtual function driver, or VF, to
+adapt to changing feature sets of the physical function driver (PF) with which
+it is associated. This allows system administrators to update a PF without
+having to update all the VFs associated with it. All AVFs have a single common
+device ID and branding string.
+
+AVFs have a minimum set of features known as "base mode," but may provide
+additional features depending on what features are available in the PF with
+which the AVF is associated. The following are base mode features:
+
+- 4 Queue Pairs (QP) and associated Configuration Status Registers (CSRs)
+  for Tx/Rx.
+- i40e descriptors and ring format.
+- Descriptor write-back completion.
+- 1 control queue, with i40e descriptors, CSRs and ring format.
+- 5 MSI-X interrupt vectors and corresponding i40e CSRs.
+- 1 Interrupt Throttle Rate (ITR) index.
+- 1 Virtual Station Interface (VSI) per VF.
+- 1 Traffic Class (TC), TC0
+- Receive Side Scaling (RSS) with 64 entry indirection table and key,
+  configured through the PF.
+- 1 unicast MAC address reserved per VF.
+- 16 MAC address filters for each VF.
+- Stateless offloads - non-tunneled checksums.
+- AVF device ID.
+- HW mailbox is used for VF to PF communications (including on Windows).
+
+IEEE 802.1ad (QinQ) Support
+---------------------------
+The IEEE 802.1ad standard, informally known as QinQ, allows for multiple VLAN
+IDs within a single Ethernet frame. VLAN IDs are sometimes referred to as
+"tags," and multiple VLAN IDs are thus referred to as a "tag stack." Tag stacks
+allow L2 tunneling and the ability to segregate traffic within a particular
+VLAN ID, among other uses.
+
+The following are examples of how to configure 802.1ad (QinQ)::
+
+  ip link add link eth0 eth0.24 type vlan proto 802.1ad id 24
+  ip link add link eth0.24 eth0.24.371 type vlan proto 802.1Q id 371
+
+Where "24" and "371" are example VLAN IDs.
+
+NOTES:
+  Receive checksum offloads, cloud filters, and VLAN acceleration are not
+  supported for 802.1ad (QinQ) packets.
+
+Application Device Queues (ADq)
+-------------------------------
+Application Device Queues (ADq) allows you to dedicate one or more queues to a
+specific application. This can reduce latency for the specified application,
+and allow Tx traffic to be rate limited per application. Follow the steps below
+to set ADq.
+
+1. Create traffic classes (TCs). Maximum of 8 TCs can be created per interface.
+The shaper bw_rlimit parameter is optional.
+
+Example: Sets up two tcs, tc0 and tc1, with 16 queues each and max tx rate set
+to 1Gbit for tc0 and 3Gbit for tc1.
+
+::
+
+  # tc qdisc add dev <interface> root mqprio num_tc 2 map 0 0 0 0 1 1 1 1
+  queues 16@0 16@16 hw 1 mode channel shaper bw_rlimit min_rate 1Gbit 2Gbit
+  max_rate 1Gbit 3Gbit
+
+map: priority mapping for up to 16 priorities to tcs (e.g. map 0 0 0 0 1 1 1 1
+sets priorities 0-3 to use tc0 and 4-7 to use tc1)
+
+queues: for each tc, <num queues>@<offset> (e.g. queues 16@0 16@16 assigns
+16 queues to tc0 at offset 0 and 16 queues to tc1 at offset 16. Max total
+number of queues for all tcs is 64 or number of cores, whichever is lower.)
+
+hw 1 mode channel: ‘channel’ with ‘hw’ set to 1 is a new new hardware
+offload mode in mqprio that makes full use of the mqprio options, the
+TCs, the queue configurations, and the QoS parameters.
+
+shaper bw_rlimit: for each tc, sets minimum and maximum bandwidth rates.
+Totals must be equal or less than port speed.
+
+For example: min_rate 1Gbit 3Gbit: Verify bandwidth limit using network
+monitoring tools such as ifstat or sar –n DEV [interval] [number of samples]
+
+2. Enable HW TC offload on interface::
+
+    # ethtool -K <interface> hw-tc-offload on
+
+3. Apply TCs to ingress (RX) flow of interface::
+
+    # tc qdisc add dev <interface> ingress
+
+NOTES:
+ - Run all tc commands from the iproute2 <pathtoiproute2>/tc/ directory.
+ - ADq is not compatible with cloud filters.
+ - Setting up channels via ethtool (ethtool -L) is not supported when the TCs
+   are configured using mqprio.
+ - You must have iproute2 latest version
+ - NVM version 6.01 or later is required.
+ - ADq cannot be enabled when any the following features are enabled: Data
+   Center Bridging (DCB), Multiple Functions per Port (MFP), or Sideband Filters.
+ - If another driver (for example, DPDK) has set cloud filters, you cannot
+   enable ADq.
+ - Tunnel filters are not supported in ADq. If encapsulated packets do arrive
+   in non-tunnel mode, filtering will be done on the inner headers.  For example,
+   for VXLAN traffic in non-tunnel mode, PCTYPE is identified as a VXLAN
+   encapsulated packet, outer headers are ignored. Therefore, inner headers are
+   matched.
+ - If a TC filter on a PF matches traffic over a VF (on the PF), that traffic
+   will be routed to the appropriate queue of the PF, and will not be passed on
+   the VF. Such traffic will end up getting dropped higher up in the TCP/IP
+   stack as it does not match PF address data.
+ - If traffic matches multiple TC filters that point to different TCs, that
+   traffic will be duplicated and sent to all matching TC queues.  The hardware
+   switch mirrors the packet to a VSI list when multiple filters are matched.
+
+
+Known Issues/Troubleshooting
+============================
+
+Traffic Is Not Being Passed Between VM and Client
+-------------------------------------------------
+You may not be able to pass traffic between a client system and a
+Virtual Machine (VM) running on a separate host if the Virtual Function
+(VF, or Virtual NIC) is not in trusted mode and spoof checking is enabled
+on the VF. Note that this situation can occur in any combination of client,
+host, and guest operating system. For information on how to set the VF to
+trusted mode, refer to the section "VLAN Tag Packet Steering" in this
+readme document. For information on setting spoof checking, refer to the
+section "MAC and VLAN anti-spoofing feature" in this readme document.
+
+Do not unload port driver if VF with active VM is bound to it
+-------------------------------------------------------------
+Do not unload a port's driver if a Virtual Function (VF) with an active Virtual
+Machine (VM) is bound to it. Doing so will cause the port to appear to hang.
+Once the VM shuts down, or otherwise releases the VF, the command will complete.
+
+Virtual machine does not get link
+---------------------------------
+If the virtual machine has more than one virtual port assigned to it, and those
+virtual ports are bound to different physical ports, you may not get link on
+all of the virtual ports. The following command may work around the issue::
+
+  ethtool -r <PF>
+
+Where <PF> is the PF interface in the host, for example: p5p1. You may need to
+run the command more than once to get link on all virtual ports.
+
+MAC address of Virtual Function changes unexpectedly
+----------------------------------------------------
+If a Virtual Function's MAC address is not assigned in the host, then the VF
+(virtual function) driver will use a random MAC address. This random MAC
+address may change each time the VF driver is reloaded. You can assign a static
+MAC address in the host machine. This static MAC address will survive
+a VF driver reload.
+
+Driver Buffer Overflow Fix
+--------------------------
+The fix to resolve CVE-2016-8105, referenced in Intel SA-00069
+https://www.intel.com/content/www/us/en/security-center/advisory/intel-sa-00069.html
+is included in this and future versions of the driver.
+
+Multiple Interfaces on Same Ethernet Broadcast Network
+------------------------------------------------------
+Due to the default ARP behavior on Linux, it is not possible to have one system
+on two IP networks in the same Ethernet broadcast domain (non-partitioned
+switch) behave as expected. All Ethernet interfaces will respond to IP traffic
+for any IP address assigned to the system. This results in unbalanced receive
+traffic.
+
+If you have multiple interfaces in a server, either turn on ARP filtering by
+entering::
+
+  echo 1 > /proc/sys/net/ipv4/conf/all/arp_filter
+
+NOTE: This setting is not saved across reboots. The configuration change can be
+made permanent by adding the following line to the file /etc/sysctl.conf::
+
+  net.ipv4.conf.all.arp_filter = 1
+
+Another alternative is to install the interfaces in separate broadcast domains
+(either in different switches or in a switch partitioned to VLANs).
+
+Rx Page Allocation Errors
+-------------------------
+'Page allocation failure. order:0' errors may occur under stress.
+This is caused by the way the Linux kernel reports this stressed condition.
+
+
+Support
+=======
+For general information, go to the Intel support website at:
+
+https://support.intel.com
+
+or the Intel Wired Networking project hosted by Sourceforge at:
+
+https://sourceforge.net/projects/e1000
+
+If an issue is identified with the released source code on the supported kernel
+with a supported adapter, email the specific information related to the issue
+to e1000-devel@lists.sf.net
diff --git a/Documentation/networking/iavf.txt b/Documentation/networking/iavf.txt
deleted file mode 100644
index cc902a2369d6..000000000000
--- a/Documentation/networking/iavf.txt
+++ /dev/null
@@ -1,56 +0,0 @@
-Linux* Base Driver for Intel(R) Network Connection
-==================================================
-
-Intel Ethernet Adaptive Virtual Function Linux driver.
-Copyright(c) 2013-2018 Intel Corporation.
-
-Contents
-========
-
-- Identifying Your Adapter
-- Known Issues/Troubleshooting
-- Support
-
-This file describes the iavf Linux* Base Driver. This driver
-was formerly called i40evf.
-
-The iavf driver supports the below mentioned virtual function
-devices and can only be activated on kernels running the i40e or
-newer Physical Function (PF) driver compiled with CONFIG_PCI_IOV.
-The iavf driver requires CONFIG_PCI_MSI to be enabled.
-
-The guest OS loading the iavf driver must support MSI-X interrupts.
-
-Supported Hardware
-==================
-Intel XL710 X710 Virtual Function
-Intel X722 Virtual Function
-Intel Ethernet Adaptive Virtual Function
-
-Identifying Your Adapter
-========================
-
-For more information on how to identify your adapter, go to the
-Adapter & Driver ID Guide at:
-
-    https://www.intel.com/content/www/us/en/support/articles/000005584/network-and-i-o/ethernet-products.html
-
-
-Known Issues/Troubleshooting
-============================
-
-
-Support
-=======
-
-For general information, go to the Intel support website at:
-
-    http://support.intel.com
-
-or the Intel Wired Networking project hosted by Sourceforge at:
-
-    http://sourceforge.net/projects/e1000
-
-If an issue is identified with the released source code on the supported
-kernel with a supported adapter, email the specific information related
-to the issue to e1000-devel@lists.sf.net
diff --git a/Documentation/networking/ice.rst b/Documentation/networking/ice.rst
new file mode 100644
index 000000000000..1e4948c9e989
--- /dev/null
+++ b/Documentation/networking/ice.rst
@@ -0,0 +1,45 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+Linux* Base Driver for the Intel(R) Ethernet Connection E800 Series
+===================================================================
+
+Intel ice Linux driver.
+Copyright(c) 2018 Intel Corporation.
+
+Contents
+========
+
+- Enabling the driver
+- Support
+
+The driver in this release supports Intel's E800 Series of products. For
+more information, visit Intel's support page at https://support.intel.com.
+
+Enabling the driver
+===================
+The driver is enabled via the standard kernel configuration system,
+using the make command::
+
+  make oldconfig/silentoldconfig/menuconfig/etc.
+
+The driver is located in the menu structure at:
+
+  -> Device Drivers
+    -> Network device support (NETDEVICES [=y])
+      -> Ethernet driver support
+        -> Intel devices
+          -> Intel(R) Ethernet Connection E800 Series Support
+
+Support
+=======
+For general information, go to the Intel support website at:
+
+https://www.intel.com/support/
+
+or the Intel Wired Networking project hosted by Sourceforge at:
+
+https://sourceforge.net/projects/e1000
+
+If an issue is identified with the released source code on a supported kernel
+with a supported adapter, email the specific information related to the issue
+to e1000-devel@lists.sf.net.
diff --git a/Documentation/networking/ice.txt b/Documentation/networking/ice.txt
deleted file mode 100644
index 6261c46378e1..000000000000
--- a/Documentation/networking/ice.txt
+++ /dev/null
@@ -1,39 +0,0 @@
-Intel(R) Ethernet Connection E800 Series Linux Driver
-===================================================================
-
-Intel ice Linux driver.
-Copyright(c) 2018 Intel Corporation.
-
-Contents
-========
-- Enabling the driver
-- Support
-
-The driver in this release supports Intel's E800 Series of products. For
-more information, visit Intel's support page at http://support.intel.com.
-
-Enabling the driver
-===================
-
-The driver is enabled via the standard kernel configuration system,
-using the make command:
-
-     Make oldconfig/silentoldconfig/menuconfig/etc.
-
-The driver is located in the menu structure at:
-
-	-> Device Drivers
-	  -> Network device support (NETDEVICES [=y])
-	    -> Ethernet driver support
-	      -> Intel devices
-	        -> Intel(R) Ethernet Connection E800 Series Support
-
-Support
-=======
-
-For general information, go to the Intel support website at:
-
-    http://support.intel.com
-
-If an issue is identified with the released source code, please email
-the maintainer listed in the MAINTAINERS file.
diff --git a/Documentation/networking/igb.rst b/Documentation/networking/igb.rst
new file mode 100644
index 000000000000..ba16b86d5593
--- /dev/null
+++ b/Documentation/networking/igb.rst
@@ -0,0 +1,193 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+Linux* Base Driver for Intel(R) Ethernet Network Connection
+===========================================================
+
+Intel Gigabit Linux driver.
+Copyright(c) 1999-2018 Intel Corporation.
+
+Contents
+========
+
+- Identifying Your Adapter
+- Command Line Parameters
+- Additional Configurations
+- Support
+
+
+Identifying Your Adapter
+========================
+For information on how to identify your adapter, and for the latest Intel
+network drivers, refer to the Intel Support website:
+http://www.intel.com/support
+
+
+Command Line Parameters
+========================
+If the driver is built as a module, the following optional parameters are used
+by entering them on the command line with the modprobe command using this
+syntax::
+
+    modprobe igb [<option>=<VAL1>,<VAL2>,...]
+
+There needs to be a <VAL#> for each network port in the system supported by
+this driver. The values will be applied to each instance, in function order.
+For example::
+
+    modprobe igb max_vfs=2,4
+
+In this case, there are two network ports supported by igb in the system.
+
+NOTE: A descriptor describes a data buffer and attributes related to the data
+buffer. This information is accessed by the hardware.
+
+max_vfs
+-------
+:Valid Range: 0-7
+
+This parameter adds support for SR-IOV. It causes the driver to spawn up to
+max_vfs worth of virtual functions.  If the value is greater than 0 it will
+also force the VMDq parameter to be 1 or more.
+
+The parameters for the driver are referenced by position. Thus, if you have a
+dual port adapter, or more than one adapter in your system, and want N virtual
+functions per port, you must specify a number for each port with each parameter
+separated by a comma. For example::
+
+    modprobe igb max_vfs=4
+
+This will spawn 4 VFs on the first port.
+
+::
+
+    modprobe igb max_vfs=2,4
+
+This will spawn 2 VFs on the first port and 4 VFs on the second port.
+
+NOTE: Caution must be used in loading the driver with these parameters.
+Depending on your system configuration, number of slots, etc., it is impossible
+to predict in all cases where the positions would be on the command line.
+
+NOTE: Neither the device nor the driver control how VFs are mapped into config
+space. Bus layout will vary by operating system. On operating systems that
+support it, you can check sysfs to find the mapping.
+
+NOTE: When either SR-IOV mode or VMDq mode is enabled, hardware VLAN filtering
+and VLAN tag stripping/insertion will remain enabled. Please remove the old
+VLAN filter before the new VLAN filter is added. For example::
+
+    ip link set eth0 vf 0 vlan 100	// set vlan 100 for VF 0
+    ip link set eth0 vf 0 vlan 0	// Delete vlan 100
+    ip link set eth0 vf 0 vlan 200	// set a new vlan 200 for VF 0
+
+Debug
+-----
+:Valid Range: 0-16 (0=none,...,16=all)
+:Default Value: 0
+
+This parameter adjusts the level debug messages displayed in the system logs.
+
+
+Additional Features and Configurations
+======================================
+
+Jumbo Frames
+------------
+Jumbo Frames support is enabled by changing the Maximum Transmission Unit (MTU)
+to a value larger than the default value of 1500.
+
+Use the ifconfig command to increase the MTU size. For example, enter the
+following where <x> is the interface number::
+
+    ifconfig eth<x> mtu 9000 up
+
+Alternatively, you can use the ip command as follows::
+
+    ip link set mtu 9000 dev eth<x>
+    ip link set up dev eth<x>
+
+This setting is not saved across reboots. The setting change can be made
+permanent by adding 'MTU=9000' to the file:
+
+- For RHEL: /etc/sysconfig/network-scripts/ifcfg-eth<x>
+- For SLES: /etc/sysconfig/network/<config_file>
+
+NOTE: The maximum MTU setting for Jumbo Frames is 9216. This value coincides
+with the maximum Jumbo Frames size of 9234 bytes.
+
+NOTE: Using Jumbo frames at 10 or 100 Mbps is not supported and may result in
+poor performance or loss of link.
+
+
+ethtool
+-------
+The driver utilizes the ethtool interface for driver configuration and
+diagnostics, as well as displaying statistical information. The latest ethtool
+version is required for this functionality. Download it at:
+
+https://www.kernel.org/pub/software/network/ethtool/
+
+
+Enabling Wake on LAN* (WoL)
+---------------------------
+WoL is configured through the ethtool* utility.
+
+WoL will be enabled on the system during the next shut down or reboot. For
+this driver version, in order to enable WoL, the igb driver must be loaded
+prior to shutting down or suspending the system.
+
+NOTE: Wake on LAN is only supported on port A of multi-port devices.  Also
+Wake On LAN is not supported for the following device:
+- Intel(R) Gigabit VT Quad Port Server Adapter
+
+
+Multiqueue
+----------
+In this mode, a separate MSI-X vector is allocated for each queue and one for
+"other" interrupts such as link status change and errors. All interrupts are
+throttled via interrupt moderation. Interrupt moderation must be used to avoid
+interrupt storms while the driver is processing one interrupt. The moderation
+value should be at least as large as the expected time for the driver to
+process an interrupt. Multiqueue is off by default.
+
+REQUIREMENTS: MSI-X support is required for Multiqueue. If MSI-X is not found,
+the system will fallback to MSI or to Legacy interrupts. This driver supports
+receive multiqueue on all kernels that support MSI-X.
+
+NOTE: On some kernels a reboot is required to switch between single queue mode
+and multiqueue mode or vice-versa.
+
+
+MAC and VLAN anti-spoofing feature
+----------------------------------
+When a malicious driver attempts to send a spoofed packet, it is dropped by the
+hardware and not transmitted.
+
+An interrupt is sent to the PF driver notifying it of the spoof attempt. When a
+spoofed packet is detected, the PF driver will send the following message to
+the system log (displayed by the "dmesg" command):
+Spoof event(s) detected on VF(n), where n = the VF that attempted to do the
+spoofing
+
+
+Setting MAC Address, VLAN and Rate Limit Using IProute2 Tool
+------------------------------------------------------------
+You can set a MAC address of a Virtual Function (VF), a default VLAN and the
+rate limit using the IProute2 tool. Download the latest version of the
+IProute2 tool from Sourceforge if your version does not have all the features
+you require.
+
+
+Support
+=======
+For general information, go to the Intel support website at:
+
+https://www.intel.com/support/
+
+or the Intel Wired Networking project hosted by Sourceforge at:
+
+https://sourceforge.net/projects/e1000
+
+If an issue is identified with the released source code on a supported kernel
+with a supported adapter, email the specific information related to the issue
+to e1000-devel@lists.sf.net.
diff --git a/Documentation/networking/igb.txt b/Documentation/networking/igb.txt
deleted file mode 100644
index f90643ef39c9..000000000000
--- a/Documentation/networking/igb.txt
+++ /dev/null
@@ -1,129 +0,0 @@
-Linux* Base Driver for Intel(R) Ethernet Network Connection
-===========================================================
-
-Intel Gigabit Linux driver.
-Copyright(c) 1999 - 2013 Intel Corporation.
-
-Contents
-========
-
-- Identifying Your Adapter
-- Additional Configurations
-- Support
-
-Identifying Your Adapter
-========================
-
-This driver supports all 82575, 82576 and 82580-based Intel (R) gigabit network
-connections.
-
-For specific information on how to identify your adapter, go to the Adapter &
-Driver ID Guide at:
-
-    http://support.intel.com/support/go/network/adapter/idguide.htm
-
-Command Line Parameters
-=======================
-
-The default value for each parameter is generally the recommended setting,
-unless otherwise noted.
-
-max_vfs
--------
-Valid Range:   0-7
-Default Value: 0
-
-This parameter adds support for SR-IOV.  It causes the driver to spawn up to
-max_vfs worth of virtual function.
-
-Additional Configurations
-=========================
-
-  Jumbo Frames
-  ------------
-  Jumbo Frames support is enabled by changing the MTU to a value larger than
-  the default of 1500.  Use the ip command to increase the MTU size.
-  For example:
-
-       ip link set dev eth<x> mtu 9000
-
-  This setting is not saved across reboots.
-
-  Notes:
-
-  - The maximum MTU setting for Jumbo Frames is 9216.  This value coincides
-    with the maximum Jumbo Frames size of 9234 bytes.
-
-  - Using Jumbo frames at 10 or 100 Mbps is not supported and may result in
-    poor performance or loss of link.
-
-  ethtool
-  -------
-  The driver utilizes the ethtool interface for driver configuration and
-  diagnostics, as well as displaying statistical information. The latest
-  version of ethtool can be found at:
-
-  https://www.kernel.org/pub/software/network/ethtool/
-
-  Enabling Wake on LAN* (WoL)
-  ---------------------------
-  WoL is configured through the ethtool* utility.
-
-  For instructions on enabling WoL with ethtool, refer to the ethtool man page.
-
-  WoL will be enabled on the system during the next shut down or reboot.
-  For this driver version, in order to enable WoL, the igb driver must be
-  loaded when shutting down or rebooting the system.
-
-  Wake On LAN is only supported on port A of multi-port adapters.
-
-  Wake On LAN is not supported for the Intel(R) Gigabit VT Quad Port Server
-  Adapter.
-
-  Multiqueue
-  ----------
-  In this mode, a separate MSI-X vector is allocated for each queue and one
-  for "other" interrupts such as link status change and errors.  All
-  interrupts are throttled via interrupt moderation.  Interrupt moderation
-  must be used to avoid interrupt storms while the driver is processing one
-  interrupt.  The moderation value should be at least as large as the expected
-  time for the driver to process an interrupt. Multiqueue is off by default.
-
-  REQUIREMENTS: MSI-X support is required for Multiqueue. If MSI-X is not
-  found, the system will fallback to MSI or to Legacy interrupts.
-
-  MAC and VLAN anti-spoofing feature
-  ----------------------------------
-  When a malicious driver attempts to send a spoofed packet, it is dropped by
-  the hardware and not transmitted.  An interrupt is sent to the PF driver
-  notifying it of the spoof attempt.
-
-  When a spoofed packet is detected the PF driver will send the following
-  message to the system log (displayed by  the "dmesg" command):
-
-  Spoof event(s) detected on VF(n)
-
-  Where n=the VF that attempted to do the spoofing.
-
-  Setting MAC Address, VLAN and Rate Limit Using IProute2 Tool
-  ------------------------------------------------------------
-  You can set a MAC address of a Virtual Function (VF), a default VLAN and the
-  rate limit using the IProute2 tool. Download the latest version of the
-  iproute2 tool from Sourceforge if your version does not have all the
-  features you require.
-
-
-Support
-=======
-
-For general information, go to the Intel support website at:
-
-    www.intel.com/support/
-
-or the Intel Wired Networking project hosted by Sourceforge at:
-
-    http://sourceforge.net/projects/e1000
-
-If an issue is identified with the released source code on the supported
-kernel with a supported adapter, email the specific information related
-to the issue to e1000-devel@lists.sf.net
diff --git a/Documentation/networking/igbvf.rst b/Documentation/networking/igbvf.rst
new file mode 100644
index 000000000000..a8a9ffa4f8d3
--- /dev/null
+++ b/Documentation/networking/igbvf.rst
@@ -0,0 +1,64 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+Linux* Base Virtual Function Driver for Intel(R) 1G Ethernet
+============================================================
+
+Intel Gigabit Virtual Function Linux driver.
+Copyright(c) 1999-2018 Intel Corporation.
+
+Contents
+========
+- Identifying Your Adapter
+- Additional Configurations
+- Support
+
+This driver supports Intel 82576-based virtual function devices-based virtual
+function devices that can only be activated on kernels that support SR-IOV.
+
+SR-IOV requires the correct platform and OS support.
+
+The guest OS loading this driver must support MSI-X interrupts.
+
+For questions related to hardware requirements, refer to the documentation
+supplied with your Intel adapter. All hardware requirements listed apply to use
+with Linux.
+
+Driver information can be obtained using ethtool, lspci, and ifconfig.
+Instructions on updating ethtool can be found in the section Additional
+Configurations later in this document.
+
+NOTE: There is a limit of a total of 32 shared VLANs to 1 or more VFs.
+
+
+Identifying Your Adapter
+========================
+For information on how to identify your adapter, and for the latest Intel
+network drivers, refer to the Intel Support website:
+http://www.intel.com/support
+
+
+Additional Features and Configurations
+======================================
+
+ethtool
+-------
+The driver utilizes the ethtool interface for driver configuration and
+diagnostics, as well as displaying statistical information. The latest ethtool
+version is required for this functionality. Download it at:
+
+https://www.kernel.org/pub/software/network/ethtool/
+
+
+Support
+=======
+For general information, go to the Intel support website at:
+
+https://www.intel.com/support/
+
+or the Intel Wired Networking project hosted by Sourceforge at:
+
+https://sourceforge.net/projects/e1000
+
+If an issue is identified with the released source code on a supported kernel
+with a supported adapter, email the specific information related to the issue
+to e1000-devel@lists.sf.net.
diff --git a/Documentation/networking/igbvf.txt b/Documentation/networking/igbvf.txt
deleted file mode 100644
index bd404735fb46..000000000000
--- a/Documentation/networking/igbvf.txt
+++ /dev/null
@@ -1,80 +0,0 @@
-Linux* Base Driver for Intel(R) Ethernet Network Connection
-===========================================================
-
-Intel Gigabit Linux driver.
-Copyright(c) 1999 - 2013 Intel Corporation.
-
-Contents
-========
-
-- Identifying Your Adapter
-- Additional Configurations
-- Support
-
-This file describes the igbvf Linux* Base Driver for Intel Network Connection.
-
-The igbvf driver supports 82576-based virtual function devices that can only
-be activated on kernels that support SR-IOV. SR-IOV requires the correct
-platform and OS support.
-
-The igbvf driver requires the igb driver, version 2.0 or later. The igbvf
-driver supports virtual functions generated by the igb driver with a max_vfs
-value of 1 or greater. For more information on the max_vfs parameter refer
-to the README included with the igb driver.
-
-The guest OS loading the igbvf driver must support MSI-X interrupts.
-
-This driver is only supported as a loadable module at this time.  Intel is
-not supplying patches against the kernel source to allow for static linking
-of the driver.  For questions related to hardware requirements, refer to the
-documentation supplied with your Intel Gigabit adapter.  All hardware
-requirements listed apply to use with Linux.
-
-Instructions on updating ethtool can be found in the section "Additional
-Configurations" later in this document.
-
-VLANs: There is a limit of a total of 32 shared VLANs to 1 or more VFs.
-
-Identifying Your Adapter
-========================
-
-The igbvf driver supports 82576-based virtual function devices that can only
-be activated on kernels that support SR-IOV.
-
-For more information on how to identify your adapter, go to the Adapter &
-Driver ID Guide at:
-
-    http://support.intel.com/support/go/network/adapter/idguide.htm
-
-For the latest Intel network drivers for Linux, refer to the following
-website.  In the search field, enter your adapter name or type, or use the
-networking link on the left to search for your adapter:
-
-    http://downloadcenter.intel.com/scripts-df-external/Support_Intel.aspx
-
-Additional Configurations
-=========================
-
-  ethtool
-  -------
-  The driver utilizes the ethtool interface for driver configuration and
-  diagnostics, as well as displaying statistical information.  The ethtool
-  version 3.0 or later is required for this functionality, although we
-  strongly recommend downloading the latest version at:
-
-  https://www.kernel.org/pub/software/network/ethtool/
-
-Support
-=======
-
-For general information, go to the Intel support website at:
-
-    http://support.intel.com
-
-or the Intel Wired Networking project hosted by Sourceforge at:
-
-    http://sourceforge.net/projects/e1000
-
-If an issue is identified with the released source code on the supported
-kernel with a supported adapter, email the specific information related
-to the issue to e1000-devel@lists.sf.net
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index fcd710f2cc7a..bd89dae8d578 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -14,6 +14,16 @@ Contents:
    dpaa2/index
    e100
    e1000
+   e1000e
+   fm10k
+   igb
+   igbvf
+   ixgb
+   ixgbe
+   ixgbevf
+   i40e
+   iavf
+   ice
    kapi
    z8530book
    msg_zerocopy
diff --git a/Documentation/networking/ixgb.rst b/Documentation/networking/ixgb.rst
new file mode 100644
index 000000000000..8bd80e27843d
--- /dev/null
+++ b/Documentation/networking/ixgb.rst
@@ -0,0 +1,467 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+Linux Base Driver for 10 Gigabit Intel(R) Ethernet Network Connection
+=====================================================================
+
+October 1, 2018
+
+
+Contents
+========
+
+- In This Release
+- Identifying Your Adapter
+- Command Line Parameters
+- Improving Performance
+- Additional Configurations
+- Known Issues/Troubleshooting
+- Support
+
+
+
+In This Release
+===============
+
+This file describes the ixgb Linux Base Driver for the 10 Gigabit Intel(R)
+Network Connection.  This driver includes support for Itanium(R)2-based
+systems.
+
+For questions related to hardware requirements, refer to the documentation
+supplied with your 10 Gigabit adapter.  All hardware requirements listed apply
+to use with Linux.
+
+The following features are available in this kernel:
+ - Native VLANs
+ - Channel Bonding (teaming)
+ - SNMP
+
+Channel Bonding documentation can be found in the Linux kernel source:
+/Documentation/networking/bonding.txt
+
+The driver information previously displayed in the /proc filesystem is not
+supported in this release.  Alternatively, you can use ethtool (version 1.6
+or later), lspci, and iproute2 to obtain the same information.
+
+Instructions on updating ethtool can be found in the section "Additional
+Configurations" later in this document.
+
+
+Identifying Your Adapter
+========================
+
+The following Intel network adapters are compatible with the drivers in this
+release:
+
++------------+------------------------------+----------------------------------+
+| Controller | Adapter Name                 | Physical Layer                   |
++============+==============================+==================================+
+| 82597EX    | Intel(R) PRO/10GbE LR/SR/CX4 | - 10G Base-LR (fiber)            |
+|            | Server Adapters              | - 10G Base-SR (fiber)            |
+|            |                              | - 10G Base-CX4 (copper)          |
++------------+------------------------------+----------------------------------+
+
+For more information on how to identify your adapter, go to the Adapter &
+Driver ID Guide at:
+
+    https://support.intel.com
+
+
+Command Line Parameters
+=======================
+
+If the driver is built as a module, the  following optional parameters are
+used by entering them on the command line with the modprobe command using
+this syntax::
+
+    modprobe ixgb [<option>=<VAL1>,<VAL2>,...]
+
+For example, with two 10GbE PCI adapters, entering::
+
+    modprobe ixgb TxDescriptors=80,128
+
+loads the ixgb driver with 80 TX resources for the first adapter and 128 TX
+resources for the second adapter.
+
+The default value for each parameter is generally the recommended setting,
+unless otherwise noted.
+
+Copybreak
+---------
+:Valid Range: 0-XXXX
+:Default Value: 256
+
+    This is the maximum size of packet that is copied to a new buffer on
+    receive.
+
+Debug
+-----
+:Valid Range: 0-16 (0=none,...,16=all)
+:Default Value: 0
+
+    This parameter adjusts the level of debug messages displayed in the
+    system logs.
+
+FlowControl
+-----------
+:Valid Range: 0-3 (0=none, 1=Rx only, 2=Tx only, 3=Rx&Tx)
+:Default Value: 1 if no EEPROM, otherwise read from EEPROM
+
+    This parameter controls the automatic generation(Tx) and response(Rx) to
+    Ethernet PAUSE frames.  There are hardware bugs associated with enabling
+    Tx flow control so beware.
+
+RxDescriptors
+-------------
+:Valid Range: 64-4096
+:Default Value: 1024
+
+    This value is the number of receive descriptors allocated by the driver.
+    Increasing this value allows the driver to buffer more incoming packets.
+    Each descriptor is 16 bytes.  A receive buffer is also allocated for
+    each descriptor and can be either 2048, 4056, 8192, or 16384 bytes,
+    depending on the MTU setting.  When the MTU size is 1500 or less, the
+    receive buffer size is 2048 bytes. When the MTU is greater than 1500 the
+    receive buffer size will be either 4056, 8192, or 16384 bytes.  The
+    maximum MTU size is 16114.
+
+TxDescriptors
+-------------
+:Valid Range: 64-4096
+:Default Value: 256
+
+    This value is the number of transmit descriptors allocated by the driver.
+    Increasing this value allows the driver to queue more transmits.  Each
+    descriptor is 16 bytes.
+
+RxIntDelay
+----------
+:Valid Range: 0-65535 (0=off)
+:Default Value: 72
+
+    This value delays the generation of receive interrupts in units of
+    0.8192 microseconds.  Receive interrupt reduction can improve CPU
+    efficiency if properly tuned for specific network traffic.  Increasing
+    this value adds extra latency to frame reception and can end up
+    decreasing the throughput of TCP traffic.  If the system is reporting
+    dropped receives, this value may be set too high, causing the driver to
+    run out of available receive descriptors.
+
+TxIntDelay
+----------
+:Valid Range: 0-65535 (0=off)
+:Default Value: 32
+
+    This value delays the generation of transmit interrupts in units of
+    0.8192 microseconds.  Transmit interrupt reduction can improve CPU
+    efficiency if properly tuned for specific network traffic.  Increasing
+    this value adds extra latency to frame transmission and can end up
+    decreasing the throughput of TCP traffic.  If this value is set too high,
+    it will cause the driver to run out of available transmit descriptors.
+
+XsumRX
+------
+:Valid Range: 0-1
+:Default Value: 1
+
+    A value of '1' indicates that the driver should enable IP checksum
+    offload for received packets (both UDP and TCP) to the adapter hardware.
+
+RxFCHighThresh
+--------------
+:Valid Range: 1,536-262,136 (0x600 - 0x3FFF8, 8 byte granularity)
+:Default Value: 196,608 (0x30000)
+
+    Receive Flow control high threshold (when we send a pause frame)
+
+RxFCLowThresh
+-------------
+:Valid Range: 64-262,136 (0x40 - 0x3FFF8, 8 byte granularity)
+:Default Value: 163,840 (0x28000)
+
+    Receive Flow control low threshold (when we send a resume frame)
+
+FCReqTimeout
+------------
+:Valid Range: 1-65535
+:Default Value: 65535
+
+    Flow control request timeout (how long to pause the link partner's tx)
+
+IntDelayEnable
+--------------
+:Value Range: 0,1
+:Default Value: 1
+
+    Interrupt Delay, 0 disables transmit interrupt delay and 1 enables it.
+
+
+Improving Performance
+=====================
+
+With the 10 Gigabit server adapters, the default Linux configuration will
+very likely limit the total available throughput artificially.  There is a set
+of configuration changes that, when applied together, will increase the ability
+of Linux to transmit and receive data.  The following enhancements were
+originally acquired from settings published at http://www.spec.org/web99/ for
+various submitted results using Linux.
+
+NOTE:
+  These changes are only suggestions, and serve as a starting point for
+  tuning your network performance.
+
+The changes are made in three major ways, listed in order of greatest effect:
+
+- Use ip link to modify the mtu (maximum transmission unit) and the txqueuelen
+  parameter.
+- Use sysctl to modify /proc parameters (essentially kernel tuning)
+- Use setpci to modify the MMRBC field in PCI-X configuration space to increase
+  transmit burst lengths on the bus.
+
+NOTE:
+  setpci modifies the adapter's configuration registers to allow it to read
+  up to 4k bytes at a time (for transmits).  However, for some systems the
+  behavior after modifying this register may be undefined (possibly errors of
+  some kind).  A power-cycle, hard reset or explicitly setting the e6 register
+  back to 22 (setpci -d 8086:1a48 e6.b=22) may be required to get back to a
+  stable configuration.
+
+- COPY these lines and paste them into ixgb_perf.sh:
+
+::
+
+  #!/bin/bash
+  echo "configuring network performance , edit this file to change the interface
+  or device ID of 10GbE card"
+  # set mmrbc to 4k reads, modify only Intel 10GbE device IDs
+  # replace 1a48 with appropriate 10GbE device's ID installed on the system,
+  # if needed.
+  setpci -d 8086:1a48 e6.b=2e
+  # set the MTU (max transmission unit) - it requires your switch and clients
+  # to change as well.
+  # set the txqueuelen
+  # your ixgb adapter should be loaded as eth1 for this to work, change if needed
+  ip li set dev eth1 mtu 9000 txqueuelen 1000 up
+  # call the sysctl utility to modify /proc/sys entries
+  sysctl -p ./sysctl_ixgb.conf
+
+- COPY these lines and paste them into sysctl_ixgb.conf:
+
+::
+
+  # some of the defaults may be different for your kernel
+  # call this file with sysctl -p <this file>
+  # these are just suggested values that worked well to increase throughput in
+  # several network benchmark tests, your mileage may vary
+
+  ### IPV4 specific settings
+  # turn TCP timestamp support off, default 1, reduces CPU use
+  net.ipv4.tcp_timestamps = 0
+  # turn SACK support off, default on
+  # on systems with a VERY fast bus -> memory interface this is the big gainer
+  net.ipv4.tcp_sack = 0
+  # set min/default/max TCP read buffer, default 4096 87380 174760
+  net.ipv4.tcp_rmem = 10000000 10000000 10000000
+  # set min/pressure/max TCP write buffer, default 4096 16384 131072
+  net.ipv4.tcp_wmem = 10000000 10000000 10000000
+  # set min/pressure/max TCP buffer space, default 31744 32256 32768
+  net.ipv4.tcp_mem = 10000000 10000000 10000000
+
+  ### CORE settings (mostly for socket and UDP effect)
+  # set maximum receive socket buffer size, default 131071
+  net.core.rmem_max = 524287
+  # set maximum send socket buffer size, default 131071
+  net.core.wmem_max = 524287
+  # set default receive socket buffer size, default 65535
+  net.core.rmem_default = 524287
+  # set default send socket buffer size, default 65535
+  net.core.wmem_default = 524287
+  # set maximum amount of option memory buffers, default 10240
+  net.core.optmem_max = 524287
+  # set number of unprocessed input packets before kernel starts dropping them; default 300
+  net.core.netdev_max_backlog = 300000
+
+Edit the ixgb_perf.sh script if necessary to change eth1 to whatever interface
+your ixgb driver is using and/or replace '1a48' with appropriate 10GbE device's
+ID installed on the system.
+
+NOTE:
+  Unless these scripts are added to the boot process, these changes will
+  only last only until the next system reboot.
+
+
+Resolving Slow UDP Traffic
+--------------------------
+If your server does not seem to be able to receive UDP traffic as fast as it
+can receive TCP traffic, it could be because Linux, by default, does not set
+the network stack buffers as large as they need to be to support high UDP
+transfer rates.  One way to alleviate this problem is to allow more memory to
+be used by the IP stack to store incoming data.
+
+For instance, use the commands::
+
+    sysctl -w net.core.rmem_max=262143
+
+and::
+
+    sysctl -w net.core.rmem_default=262143
+
+to increase the read buffer memory max and default to 262143 (256k - 1) from
+defaults of max=131071 (128k - 1) and default=65535 (64k - 1).  These variables
+will increase the amount of memory used by the network stack for receives, and
+can be increased significantly more if necessary for your application.
+
+
+Additional Configurations
+=========================
+
+Configuring the Driver on Different Distributions
+-------------------------------------------------
+Configuring a network driver to load properly when the system is started is
+distribution dependent. Typically, the configuration process involves adding
+an alias line to /etc/modprobe.conf as well as editing other system startup
+scripts and/or configuration files.  Many popular Linux distributions ship
+with tools to make these changes for you.  To learn the proper way to
+configure a network device for your system, refer to your distribution
+documentation.  If during this process you are asked for the driver or module
+name, the name for the Linux Base Driver for the Intel 10GbE Family of
+Adapters is ixgb.
+
+Viewing Link Messages
+---------------------
+Link messages will not be displayed to the console if the distribution is
+restricting system messages. In order to see network driver link messages on
+your console, set dmesg to eight by entering the following::
+
+    dmesg -n 8
+
+NOTE: This setting is not saved across reboots.
+
+Jumbo Frames
+------------
+The driver supports Jumbo Frames for all adapters. Jumbo Frames support is
+enabled by changing the MTU to a value larger than the default of 1500.
+The maximum value for the MTU is 16114.  Use the ip command to
+increase the MTU size.  For example::
+
+    ip li set dev ethx mtu 9000
+
+The maximum MTU setting for Jumbo Frames is 16114.  This value coincides
+with the maximum Jumbo Frames size of 16128.
+
+Ethtool
+-------
+The driver utilizes the ethtool interface for driver configuration and
+diagnostics, as well as displaying statistical information.  The ethtool
+version 1.6 or later is required for this functionality.
+
+The latest release of ethtool can be found from
+https://www.kernel.org/pub/software/network/ethtool/
+
+NOTE:
+  The ethtool version 1.6 only supports a limited set of ethtool options.
+  Support for a more complete ethtool feature set can be enabled by
+  upgrading to the latest version.
+
+NAPI
+----
+NAPI (Rx polling mode) is supported in the ixgb driver.
+
+See https://wiki.linuxfoundation.org/networking/napi for more information on
+NAPI.
+
+
+Known Issues/Troubleshooting
+============================
+
+NOTE:
+  After installing the driver, if your Intel Network Connection is not
+  working, verify in the "In This Release" section of the readme that you have
+  installed the correct driver.
+
+Cable Interoperability Issue with Fujitsu XENPAK Module in SmartBits Chassis
+----------------------------------------------------------------------------
+Excessive CRC errors may be observed if the Intel(R) PRO/10GbE CX4
+Server adapter is connected to a Fujitsu XENPAK CX4 module in a SmartBits
+chassis using 15 m/24AWG cable assemblies manufactured by Fujitsu or Leoni.
+The CRC errors may be received either by the Intel(R) PRO/10GbE CX4
+Server adapter or the SmartBits. If this situation occurs using a different
+cable assembly may resolve the issue.
+
+Cable Interoperability Issues with HP Procurve 3400cl Switch Port
+-----------------------------------------------------------------
+Excessive CRC errors may be observed if the Intel(R) PRO/10GbE CX4 Server
+adapter is connected to an HP Procurve 3400cl switch port using short cables
+(1 m or shorter). If this situation occurs, using a longer cable may resolve
+the issue.
+
+Excessive CRC errors may be observed using Fujitsu 24AWG cable assemblies that
+Are 10 m or longer or where using a Leoni 15 m/24AWG cable assembly. The CRC
+errors may be received either by the CX4 Server adapter or at the switch. If
+this situation occurs, using a different cable assembly may resolve the issue.
+
+Jumbo Frames System Requirement
+-------------------------------
+Memory allocation failures have been observed on Linux systems with 64 MB
+of RAM or less that are running Jumbo Frames.  If you are using Jumbo
+Frames, your system may require more than the advertised minimum
+requirement of 64 MB of system memory.
+
+Performance Degradation with Jumbo Frames
+-----------------------------------------
+Degradation in throughput performance may be observed in some Jumbo frames
+environments.  If this is observed, increasing the application's socket buffer
+size and/or increasing the /proc/sys/net/ipv4/tcp_*mem entry values may help.
+See the specific application manual and /usr/src/linux*/Documentation/
+networking/ip-sysctl.txt for more details.
+
+Allocating Rx Buffers when Using Jumbo Frames
+---------------------------------------------
+Allocating Rx buffers when using Jumbo Frames on 2.6.x kernels may fail if
+the available memory is heavily fragmented. This issue may be seen with PCI-X
+adapters or with packet split disabled. This can be reduced or eliminated
+by changing the amount of available memory for receive buffer allocation, by
+increasing /proc/sys/vm/min_free_kbytes.
+
+Multiple Interfaces on Same Ethernet Broadcast Network
+------------------------------------------------------
+Due to the default ARP behavior on Linux, it is not possible to have
+one system on two IP networks in the same Ethernet broadcast domain
+(non-partitioned switch) behave as expected.  All Ethernet interfaces
+will respond to IP traffic for any IP address assigned to the system.
+This results in unbalanced receive traffic.
+
+If you have multiple interfaces in a server, do either of the following:
+
+  - Turn on ARP filtering by entering::
+
+      echo 1 > /proc/sys/net/ipv4/conf/all/arp_filter
+
+  - Install the interfaces in separate broadcast domains - either in
+    different switches or in a switch partitioned to VLANs.
+
+UDP Stress Test Dropped Packet Issue
+--------------------------------------
+Under small packets UDP stress test with 10GbE driver, the Linux system
+may drop UDP packets due to the fullness of socket buffers. You may want
+to change the driver's Flow Control variables to the minimum value for
+controlling packet reception.
+
+Tx Hangs Possible Under Stress
+------------------------------
+Under stress conditions, if TX hangs occur, turning off TSO
+"ethtool -K eth0 tso off" may resolve the problem.
+
+
+Support
+=======
+For general information, go to the Intel support website at:
+
+https://www.intel.com/support/
+
+or the Intel Wired Networking project hosted by Sourceforge at:
+
+https://sourceforge.net/projects/e1000
+
+If an issue is identified with the released source code on a supported kernel
+with a supported adapter, email the specific information related to the issue
+to e1000-devel@lists.sf.net
diff --git a/Documentation/networking/ixgb.txt b/Documentation/networking/ixgb.txt
deleted file mode 100644
index 09f71d71920a..000000000000
--- a/Documentation/networking/ixgb.txt
+++ /dev/null
@@ -1,433 +0,0 @@
-Linux Base Driver for 10 Gigabit Intel(R) Ethernet Network Connection
-=====================================================================
-
-March 14, 2011
-
-
-Contents
-========
-
-- In This Release
-- Identifying Your Adapter
-- Building and Installation
-- Command Line Parameters
-- Improving Performance
-- Additional Configurations
-- Known Issues/Troubleshooting
-- Support
-
-
-
-In This Release
-===============
-
-This file describes the ixgb Linux Base Driver for the 10 Gigabit Intel(R)
-Network Connection.  This driver includes support for Itanium(R)2-based
-systems.
-
-For questions related to hardware requirements, refer to the documentation
-supplied with your 10 Gigabit adapter.  All hardware requirements listed apply
-to use with Linux.
-
-The following features are available in this kernel:
- - Native VLANs
- - Channel Bonding (teaming)
- - SNMP
-
-Channel Bonding documentation can be found in the Linux kernel source:
-/Documentation/networking/bonding.txt
-
-The driver information previously displayed in the /proc filesystem is not
-supported in this release.  Alternatively, you can use ethtool (version 1.6
-or later), lspci, and iproute2 to obtain the same information.
-
-Instructions on updating ethtool can be found in the section "Additional
-Configurations" later in this document.
-
-
-Identifying Your Adapter
-========================
-
-The following Intel network adapters are compatible with the drivers in this
-release:
-
-Controller  Adapter Name                 Physical Layer
-----------  ------------                 --------------
-82597EX     Intel(R) PRO/10GbE LR/SR/CX4 10G Base-LR (1310 nm optical fiber)
-            Server Adapters              10G Base-SR (850 nm optical fiber)
-                                         10G Base-CX4(twin-axial copper cabling)
-
-For more information on how to identify your adapter, go to the Adapter &
-Driver ID Guide at:
-
-    http://support.intel.com/support/network/sb/CS-012904.htm
-
-
-Building and Installation
-=========================
-
-select m for "Intel(R) PRO/10GbE support" located at:
-      Location:
-        -> Device Drivers
-          -> Network device support (NETDEVICES [=y])
-            -> Ethernet (10000 Mbit) (NETDEV_10000 [=y])
-1. make modules && make modules_install
-
-2. Load the module:
-
-    modprobe ixgb <parameter>=<value>
-
-   The insmod command can be used if the full
-   path to the driver module is specified.  For example:
-
-     insmod /lib/modules/<KERNEL VERSION>/kernel/drivers/net/ixgb/ixgb.ko
-
-   With 2.6 based kernels also make sure that older ixgb drivers are
-   removed from the kernel, before loading the new module:
-
-     rmmod ixgb; modprobe ixgb
-
-3. Assign an IP address to the interface by entering the following, where
-   x is the interface number:
-
-     ip addr add ethx <IP_address>
-
-4. Verify that the interface works. Enter the following, where <IP_address>
-   is the IP address for another machine on the same subnet as the interface
-   that is being tested:
-
-     ping  <IP_address>
-
-
-Command Line Parameters
-=======================
-
-If the driver is built as a module, the  following optional parameters are
-used by entering them on the command line with the modprobe command using
-this syntax:
-
-     modprobe ixgb [<option>=<VAL1>,<VAL2>,...]
-
-For example, with two 10GbE PCI adapters, entering:
-
-     modprobe ixgb TxDescriptors=80,128
-
-loads the ixgb driver with 80 TX resources for the first adapter and 128 TX
-resources for the second adapter.
-
-The default value for each parameter is generally the recommended setting,
-unless otherwise noted.
-
-FlowControl
-Valid Range: 0-3 (0=none, 1=Rx only, 2=Tx only, 3=Rx&Tx)
-Default: Read from the EEPROM
-         If EEPROM is not detected, default is 1
-    This parameter controls the automatic generation(Tx) and response(Rx) to
-    Ethernet PAUSE frames.  There are hardware bugs associated with enabling
-    Tx flow control so beware.
-
-RxDescriptors
-Valid Range: 64-512
-Default Value: 512
-    This value is the number of receive descriptors allocated by the driver.
-    Increasing this value allows the driver to buffer more incoming packets.
-    Each descriptor is 16 bytes.  A receive buffer is also allocated for
-    each descriptor and can be either 2048, 4056, 8192, or 16384 bytes,
-    depending on the MTU setting.  When the MTU size is 1500 or less, the
-    receive buffer size is 2048 bytes. When the MTU is greater than 1500 the
-    receive buffer size will be either 4056, 8192, or 16384 bytes.  The
-    maximum MTU size is 16114.
-
-RxIntDelay
-Valid Range: 0-65535 (0=off)
-Default Value: 72
-    This value delays the generation of receive interrupts in units of
-    0.8192 microseconds.  Receive interrupt reduction can improve CPU
-    efficiency if properly tuned for specific network traffic.  Increasing
-    this value adds extra latency to frame reception and can end up
-    decreasing the throughput of TCP traffic.  If the system is reporting
-    dropped receives, this value may be set too high, causing the driver to
-    run out of available receive descriptors.
-
-TxDescriptors
-Valid Range: 64-4096
-Default Value: 256
-    This value is the number of transmit descriptors allocated by the driver.
-    Increasing this value allows the driver to queue more transmits.  Each
-    descriptor is 16 bytes.
-
-XsumRX
-Valid Range: 0-1
-Default Value: 1
-    A value of '1' indicates that the driver should enable IP checksum
-    offload for received packets (both UDP and TCP) to the adapter hardware.
-
-
-Improving Performance
-=====================
-
-With the 10 Gigabit server adapters, the default Linux configuration will
-very likely limit the total available throughput artificially.  There is a set
-of configuration changes that, when applied together, will increase the ability
-of Linux to transmit and receive data.  The following enhancements were
-originally acquired from settings published at http://www.spec.org/web99/ for
-various submitted results using Linux.
-
-NOTE: These changes are only suggestions, and serve as a starting point for
-      tuning your network performance.
-
-The changes are made in three major ways, listed in order of greatest effect:
-- Use ip link to modify the mtu (maximum transmission unit) and the txqueuelen
-  parameter.
-- Use sysctl to modify /proc parameters (essentially kernel tuning)
-- Use setpci to modify the MMRBC field in PCI-X configuration space to increase
-  transmit burst lengths on the bus.
-
-NOTE: setpci modifies the adapter's configuration registers to allow it to read
-up to 4k bytes at a time (for transmits).  However, for some systems the
-behavior after modifying this register may be undefined (possibly errors of
-some kind).  A power-cycle, hard reset or explicitly setting the e6 register
-back to 22 (setpci -d 8086:1a48 e6.b=22) may be required to get back to a
-stable configuration.
-
-- COPY these lines and paste them into ixgb_perf.sh:
-#!/bin/bash
-echo "configuring network performance , edit this file to change the interface
-or device ID of 10GbE card"
-# set mmrbc to 4k reads, modify only Intel 10GbE device IDs
-# replace 1a48 with appropriate 10GbE device's ID installed on the system,
-# if needed.
-setpci -d 8086:1a48 e6.b=2e
-# set the MTU (max transmission unit) - it requires your switch and clients
-# to change as well.
-# set the txqueuelen
-# your ixgb adapter should be loaded as eth1 for this to work, change if needed
-ip li set dev eth1 mtu 9000 txqueuelen 1000 up
-# call the sysctl utility to modify /proc/sys entries
-sysctl -p ./sysctl_ixgb.conf
-- END ixgb_perf.sh
-
-- COPY these lines and paste them into sysctl_ixgb.conf:
-# some of the defaults may be different for your kernel
-# call this file with sysctl -p <this file>
-# these are just suggested values that worked well to increase throughput in
-# several network benchmark tests, your mileage may vary
-
-### IPV4 specific settings
-# turn TCP timestamp support off, default 1, reduces CPU use
-net.ipv4.tcp_timestamps = 0
-# turn SACK support off, default on
-# on systems with a VERY fast bus -> memory interface this is the big gainer
-net.ipv4.tcp_sack = 0
-# set min/default/max TCP read buffer, default 4096 87380 174760
-net.ipv4.tcp_rmem = 10000000 10000000 10000000
-# set min/pressure/max TCP write buffer, default 4096 16384 131072
-net.ipv4.tcp_wmem = 10000000 10000000 10000000
-# set min/pressure/max TCP buffer space, default 31744 32256 32768
-net.ipv4.tcp_mem = 10000000 10000000 10000000
-
-### CORE settings (mostly for socket and UDP effect)
-# set maximum receive socket buffer size, default 131071
-net.core.rmem_max = 524287
-# set maximum send socket buffer size, default 131071
-net.core.wmem_max = 524287
-# set default receive socket buffer size, default 65535
-net.core.rmem_default = 524287
-# set default send socket buffer size, default 65535
-net.core.wmem_default = 524287
-# set maximum amount of option memory buffers, default 10240
-net.core.optmem_max = 524287
-# set number of unprocessed input packets before kernel starts dropping them; default 300
-net.core.netdev_max_backlog = 300000
-- END sysctl_ixgb.conf
-
-Edit the ixgb_perf.sh script if necessary to change eth1 to whatever interface
-your ixgb driver is using and/or replace '1a48' with appropriate 10GbE device's
-ID installed on the system.
-
-NOTE: Unless these scripts are added to the boot process, these changes will
-      only last only until the next system reboot.
-
-
-Resolving Slow UDP Traffic
---------------------------
-If your server does not seem to be able to receive UDP traffic as fast as it
-can receive TCP traffic, it could be because Linux, by default, does not set
-the network stack buffers as large as they need to be to support high UDP
-transfer rates.  One way to alleviate this problem is to allow more memory to
-be used by the IP stack to store incoming data.
-
-For instance, use the commands:
-    sysctl -w net.core.rmem_max=262143
-and
-    sysctl -w net.core.rmem_default=262143
-to increase the read buffer memory max and default to 262143 (256k - 1) from
-defaults of max=131071 (128k - 1) and default=65535 (64k - 1).  These variables
-will increase the amount of memory used by the network stack for receives, and
-can be increased significantly more if necessary for your application.
-
-
-Additional Configurations
-=========================
-
-  Configuring the Driver on Different Distributions
-  -------------------------------------------------
-  Configuring a network driver to load properly when the system is started is
-  distribution dependent. Typically, the configuration process involves adding
-  an alias line to /etc/modprobe.conf as well as editing other system startup
-  scripts and/or configuration files.  Many popular Linux distributions ship
-  with tools to make these changes for you.  To learn the proper way to
-  configure a network device for your system, refer to your distribution
-  documentation.  If during this process you are asked for the driver or module
-  name, the name for the Linux Base Driver for the Intel 10GbE Family of
-  Adapters is ixgb.
-
-  Viewing Link Messages
-  ---------------------
-  Link messages will not be displayed to the console if the distribution is
-  restricting system messages. In order to see network driver link messages on
-  your console, set dmesg to eight by entering the following:
-
-       dmesg -n 8
-
-  NOTE: This setting is not saved across reboots.
-
-
-  Jumbo Frames
-  ------------
-  The driver supports Jumbo Frames for all adapters. Jumbo Frames support is
-  enabled by changing the MTU to a value larger than the default of 1500.
-  The maximum value for the MTU is 16114.  Use the ip command to
-  increase the MTU size.  For example:
-
-        ip li set dev ethx mtu 9000
-
-  The maximum MTU setting for Jumbo Frames is 16114.  This value coincides
-  with the maximum Jumbo Frames size of 16128.
-
-
-  ethtool
-  -------
-  The driver utilizes the ethtool interface for driver configuration and
-  diagnostics, as well as displaying statistical information.  The ethtool
-  version 1.6 or later is required for this functionality.
-
-  The latest release of ethtool can be found from
-  https://www.kernel.org/pub/software/network/ethtool/
-
-  NOTE: The ethtool version 1.6 only supports a limited set of ethtool options.
-        Support for a more complete ethtool feature set can be enabled by
-        upgrading to the latest version.
-
-
-  NAPI
-  ----
-
-  NAPI (Rx polling mode) is supported in the ixgb driver.  NAPI is enabled
-  or disabled based on the configuration of the kernel.  see CONFIG_IXGB_NAPI
-
-  See www.cyberus.ca/~hadi/usenix-paper.tgz for more information on NAPI.
-
-
-Known Issues/Troubleshooting
-============================
-
-  NOTE: After installing the driver, if your Intel Network Connection is not
-  working, verify in the "In This Release" section of the readme that you have
-  installed the correct driver.
-
-  Intel(R) PRO/10GbE CX4 Server Adapter Cable Interoperability Issue with
-  Fujitsu XENPAK Module in SmartBits Chassis
-  ---------------------------------------------------------------------
-  Excessive CRC errors may be observed if the Intel(R) PRO/10GbE CX4
-  Server adapter is connected to a Fujitsu XENPAK CX4 module in a SmartBits
-  chassis using 15 m/24AWG cable assemblies manufactured by Fujitsu or Leoni.
-  The CRC errors may be received either by the Intel(R) PRO/10GbE CX4
-  Server adapter or the SmartBits. If this situation occurs using a different
-  cable assembly may resolve the issue.
-
-  CX4 Server Adapter Cable Interoperability Issues with HP Procurve 3400cl
-  Switch Port
-  ------------------------------------------------------------------------
-  Excessive CRC errors may be observed if the Intel(R) PRO/10GbE CX4 Server
-  adapter is connected to an HP Procurve 3400cl switch port using short cables
-  (1 m or shorter). If this situation occurs, using a longer cable may resolve
-  the issue.
-
-  Excessive CRC errors may be observed using Fujitsu 24AWG cable assemblies that
-  Are 10 m or longer or where using a Leoni 15 m/24AWG cable assembly. The CRC
-  errors may be received either by the CX4 Server adapter or at the switch. If
-  this situation occurs, using a different cable assembly may resolve the issue.
-
-
-  Jumbo Frames System Requirement
-  -------------------------------
-  Memory allocation failures have been observed on Linux systems with 64 MB
-  of RAM or less that are running Jumbo Frames.  If you are using Jumbo
-  Frames, your system may require more than the advertised minimum
-  requirement of 64 MB of system memory.
-
-
-  Performance Degradation with Jumbo Frames
-  -----------------------------------------
-  Degradation in throughput performance may be observed in some Jumbo frames
-  environments.  If this is observed, increasing the application's socket buffer
-  size and/or increasing the /proc/sys/net/ipv4/tcp_*mem entry values may help.
-  See the specific application manual and /usr/src/linux*/Documentation/
-  networking/ip-sysctl.txt for more details.
-
-
-  Allocating Rx Buffers when Using Jumbo Frames
-  ---------------------------------------------
-  Allocating Rx buffers when using Jumbo Frames on 2.6.x kernels may fail if
-  the available memory is heavily fragmented. This issue may be seen with PCI-X
-  adapters or with packet split disabled. This can be reduced or eliminated
-  by changing the amount of available memory for receive buffer allocation, by
-  increasing /proc/sys/vm/min_free_kbytes.
-
-
-  Multiple Interfaces on Same Ethernet Broadcast Network
-  ------------------------------------------------------
-  Due to the default ARP behavior on Linux, it is not possible to have
-  one system on two IP networks in the same Ethernet broadcast domain
-  (non-partitioned switch) behave as expected.  All Ethernet interfaces
-  will respond to IP traffic for any IP address assigned to the system.
-  This results in unbalanced receive traffic.
-
-  If you have multiple interfaces in a server, do either of the following:
-
-  - Turn on ARP filtering by entering:
-      echo 1 > /proc/sys/net/ipv4/conf/all/arp_filter
-
-  - Install the interfaces in separate broadcast domains - either in
-    different switches or in a switch partitioned to VLANs.
-
-
-  UDP Stress Test Dropped Packet Issue
-  --------------------------------------
-  Under small packets UDP stress test with 10GbE driver, the Linux system
-  may drop UDP packets due to the fullness of socket buffers. You may want
-  to change the driver's Flow Control variables to the minimum value for
-  controlling packet reception.
-
-
-  Tx Hangs Possible Under Stress
-  ------------------------------
-  Under stress conditions, if TX hangs occur, turning off TSO
-  "ethtool -K eth0 tso off" may resolve the problem.
-
-
-Support
-=======
-
-For general information, go to the Intel support website at:
-
-    http://support.intel.com
-
-or the Intel Wired Networking project hosted by Sourceforge at:
-
-    http://sourceforge.net/projects/e1000
-
-If an issue is identified with the released source code on the supported
-kernel with a supported adapter, email the specific information related
-to the issue to e1000-devel@lists.sf.net
diff --git a/Documentation/networking/ixgbe.rst b/Documentation/networking/ixgbe.rst
new file mode 100644
index 000000000000..725fc697fd8f
--- /dev/null
+++ b/Documentation/networking/ixgbe.rst
@@ -0,0 +1,527 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+Linux* Base Driver for the Intel(R) Ethernet 10 Gigabit PCI Express Adapters
+=============================================================================
+
+Intel 10 Gigabit Linux driver.
+Copyright(c) 1999-2018 Intel Corporation.
+
+Contents
+========
+
+- Identifying Your Adapter
+- Command Line Parameters
+- Additional Configurations
+- Known Issues
+- Support
+
+Identifying Your Adapter
+========================
+The driver is compatible with devices based on the following:
+
+ * Intel(R) Ethernet Controller 82598
+ * Intel(R) Ethernet Controller 82599
+ * Intel(R) Ethernet Controller X520
+ * Intel(R) Ethernet Controller X540
+ * Intel(R) Ethernet Controller x550
+ * Intel(R) Ethernet Controller X552
+ * Intel(R) Ethernet Controller X553
+
+For information on how to identify your adapter, and for the latest Intel
+network drivers, refer to the Intel Support website:
+https://www.intel.com/support
+
+SFP+ Devices with Pluggable Optics
+----------------------------------
+
+82599-BASED ADAPTERS
+~~~~~~~~~~~~~~~~~~~~
+NOTES:
+- If your 82599-based Intel(R) Network Adapter came with Intel optics or is an
+Intel(R) Ethernet Server Adapter X520-2, then it only supports Intel optics
+and/or the direct attach cables listed below.
+- When 82599-based SFP+ devices are connected back to back, they should be set
+to the same Speed setting via ethtool. Results may vary if you mix speed
+settings.
+
++---------------+---------------------------------------+------------------+
+| Supplier      | Type                                  | Part Numbers     |
++===============+=======================================+==================+
+| SR Modules                                                               |
++---------------+---------------------------------------+------------------+
+| Intel         | DUAL RATE 1G/10G SFP+ SR (bailed)     | FTLX8571D3BCV-IT |
++---------------+---------------------------------------+------------------+
+| Intel         | DUAL RATE 1G/10G SFP+ SR (bailed)     | AFBR-703SDZ-IN2  |
++---------------+---------------------------------------+------------------+
+| Intel         | DUAL RATE 1G/10G SFP+ SR (bailed)     | AFBR-703SDDZ-IN1 |
++---------------+---------------------------------------+------------------+
+| LR Modules                                                               |
++---------------+---------------------------------------+------------------+
+| Intel         | DUAL RATE 1G/10G SFP+ LR (bailed)     | FTLX1471D3BCV-IT |
++---------------+---------------------------------------+------------------+
+| Intel         | DUAL RATE 1G/10G SFP+ LR (bailed)     | AFCT-701SDZ-IN2  |
++---------------+---------------------------------------+------------------+
+| Intel         | DUAL RATE 1G/10G SFP+ LR (bailed)     | AFCT-701SDDZ-IN1 |
++---------------+---------------------------------------+------------------+
+
+The following is a list of 3rd party SFP+ modules that have received some
+testing. Not all modules are applicable to all devices.
+
++---------------+---------------------------------------+------------------+
+| Supplier      | Type                                  | Part Numbers     |
++===============+=======================================+==================+
+| Finisar       | SFP+ SR bailed, 10g single rate       | FTLX8571D3BCL    |
++---------------+---------------------------------------+------------------+
+| Avago         | SFP+ SR bailed, 10g single rate       | AFBR-700SDZ      |
++---------------+---------------------------------------+------------------+
+| Finisar       | SFP+ LR bailed, 10g single rate       | FTLX1471D3BCL    |
++---------------+---------------------------------------+------------------+
+| Finisar       | DUAL RATE 1G/10G SFP+ SR (No Bail)    | FTLX8571D3QCV-IT |
++---------------+---------------------------------------+------------------+
+| Avago         | DUAL RATE 1G/10G SFP+ SR (No Bail)    | AFBR-703SDZ-IN1  |
++---------------+---------------------------------------+------------------+
+| Finisar       | DUAL RATE 1G/10G SFP+ LR (No Bail)    | FTLX1471D3QCV-IT |
++---------------+---------------------------------------+------------------+
+| Avago         | DUAL RATE 1G/10G SFP+ LR (No Bail)    | AFCT-701SDZ-IN1  |
++---------------+---------------------------------------+------------------+
+| Finisar       | 1000BASE-T SFP                        | FCLF8522P2BTL    |
++---------------+---------------------------------------+------------------+
+| Avago         | 1000BASE-T                            | ABCU-5710RZ      |
++---------------+---------------------------------------+------------------+
+| HP            | 1000BASE-SX SFP                       | 453153-001       |
++---------------+---------------------------------------+------------------+
+
+82599-based adapters support all passive and active limiting direct attach
+cables that comply with SFF-8431 v4.1 and SFF-8472 v10.4 specifications.
+
+Laser turns off for SFP+ when ifconfig ethX down
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+"ifconfig ethX down" turns off the laser for 82599-based SFP+ fiber adapters.
+"ifconfig ethX up" turns on the laser.
+Alternatively, you can use "ip link set [down/up] dev ethX" to turn the
+laser off and on.
+
+
+82599-based QSFP+ Adapters
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+NOTES:
+- If your 82599-based Intel(R) Network Adapter came with Intel optics, it only
+supports Intel optics.
+- 82599-based QSFP+ adapters only support 4x10 Gbps connections.  1x40 Gbps
+connections are not supported. QSFP+ link partners must be configured for
+4x10 Gbps.
+- 82599-based QSFP+ adapters do not support automatic link speed detection.
+The link speed must be configured to either 10 Gbps or 1 Gbps to match the link
+partners speed capabilities. Incorrect speed configurations will result in
+failure to link.
+- Intel(R) Ethernet Converged Network Adapter X520-Q1 only supports the optics
+and direct attach cables listed below.
+
++---------------+---------------------------------------+------------------+
+| Supplier      | Type                                  | Part Numbers     |
++===============+=======================================+==================+
+| Intel         | DUAL RATE 1G/10G QSFP+ SRL (bailed)   | E10GQSFPSR       |
++---------------+---------------------------------------+------------------+
+
+82599-based QSFP+ adapters support all passive and active limiting QSFP+
+direct attach cables that comply with SFF-8436 v4.1 specifications.
+
+82598-BASED ADAPTERS
+~~~~~~~~~~~~~~~~~~~~
+NOTES:
+- Intel(r) Ethernet Network Adapters that support removable optical modules
+only support their original module type (for example, the Intel(R) 10 Gigabit
+SR Dual Port Express Module only supports SR optical modules). If you plug in
+a different type of module, the driver will not load.
+- Hot Swapping/hot plugging optical modules is not supported.
+- Only single speed, 10 gigabit modules are supported.
+- LAN on Motherboard (LOMs) may support DA, SR, or LR modules. Other module
+types are not supported. Please see your system documentation for details.
+
+The following is a list of SFP+ modules and direct attach cables that have
+received some testing. Not all modules are applicable to all devices.
+
++---------------+---------------------------------------+------------------+
+| Supplier      | Type                                  | Part Numbers     |
++===============+=======================================+==================+
+| Finisar       | SFP+ SR bailed, 10g single rate       | FTLX8571D3BCL    |
++---------------+---------------------------------------+------------------+
+| Avago         | SFP+ SR bailed, 10g single rate       | AFBR-700SDZ      |
++---------------+---------------------------------------+------------------+
+| Finisar       | SFP+ LR bailed, 10g single rate       | FTLX1471D3BCL    |
++---------------+---------------------------------------+------------------+
+
+82598-based adapters support all passive direct attach cables that comply with
+SFF-8431 v4.1 and SFF-8472 v10.4 specifications. Active direct attach cables
+are not supported.
+
+Third party optic modules and cables referred to above are listed only for the
+purpose of highlighting third party specifications and potential
+compatibility, and are not recommendations or endorsements or sponsorship of
+any third party's product by Intel. Intel is not endorsing or promoting
+products made by any third party and the third party reference is provided
+only to share information regarding certain optic modules and cables with the
+above specifications. There may be other manufacturers or suppliers, producing
+or supplying optic modules and cables with similar or matching descriptions.
+Customers must use their own discretion and diligence to purchase optic
+modules and cables from any third party of their choice. Customers are solely
+responsible for assessing the suitability of the product and/or devices and
+for the selection of the vendor for purchasing any product. THE OPTIC MODULES
+AND CABLES REFERRED TO ABOVE ARE NOT WARRANTED OR SUPPORTED BY INTEL. INTEL
+ASSUMES NO LIABILITY WHATSOEVER, AND INTEL DISCLAIMS ANY EXPRESS OR IMPLIED
+WARRANTY, RELATING TO SALE AND/OR USE OF SUCH THIRD PARTY PRODUCTS OR
+SELECTION OF VENDOR BY CUSTOMERS.
+
+Command Line Parameters
+=======================
+
+max_vfs
+-------
+:Valid Range: 1-63
+
+This parameter adds support for SR-IOV. It causes the driver to spawn up to
+max_vfs worth of virtual functions.
+If the value is greater than 0 it will also force the VMDq parameter to be 1 or
+more.
+
+NOTE: This parameter is only used on kernel 3.7.x and below. On kernel 3.8.x
+and above, use sysfs to enable VFs. Also, for Red Hat distributions, this
+parameter is only used on version 6.6 and older. For version 6.7 and newer, use
+sysfs. For example::
+
+  #echo $num_vf_enabled > /sys/class/net/$dev/device/sriov_numvfs // enable VFs
+  #echo 0 > /sys/class/net/$dev/device/sriov_numvfs               //disable VFs
+
+The parameters for the driver are referenced by position. Thus, if you have a
+dual port adapter, or more than one adapter in your system, and want N virtual
+functions per port, you must specify a number for each port with each parameter
+separated by a comma. For example::
+
+  modprobe ixgbe max_vfs=4
+
+This will spawn 4 VFs on the first port.
+
+::
+
+  modprobe ixgbe max_vfs=2,4
+
+This will spawn 2 VFs on the first port and 4 VFs on the second port.
+
+NOTE: Caution must be used in loading the driver with these parameters.
+Depending on your system configuration, number of slots, etc., it is impossible
+to predict in all cases where the positions would be on the command line.
+
+NOTE: Neither the device nor the driver control how VFs are mapped into config
+space. Bus layout will vary by operating system. On operating systems that
+support it, you can check sysfs to find the mapping.
+
+NOTE: When either SR-IOV mode or VMDq mode is enabled, hardware VLAN filtering
+and VLAN tag stripping/insertion will remain enabled. Please remove the old
+VLAN filter before the new VLAN filter is added. For example,
+
+::
+
+  ip link set eth0 vf 0 vlan 100 // set VLAN 100 for VF 0
+  ip link set eth0 vf 0 vlan 0   // Delete VLAN 100
+  ip link set eth0 vf 0 vlan 200 // set a new VLAN 200 for VF 0
+
+With kernel 3.6, the driver supports the simultaneous usage of max_vfs and DCB
+features, subject to the constraints described below. Prior to kernel 3.6, the
+driver did not support the simultaneous operation of max_vfs greater than 0 and
+the DCB features (multiple traffic classes utilizing Priority Flow Control and
+Extended Transmission Selection).
+
+When DCB is enabled, network traffic is transmitted and received through
+multiple traffic classes (packet buffers in the NIC). The traffic is associated
+with a specific class based on priority, which has a value of 0 through 7 used
+in the VLAN tag. When SR-IOV is not enabled, each traffic class is associated
+with a set of receive/transmit descriptor queue pairs. The number of queue
+pairs for a given traffic class depends on the hardware configuration. When
+SR-IOV is enabled, the descriptor queue pairs are grouped into pools. The
+Physical Function (PF) and each Virtual Function (VF) is allocated a pool of
+receive/transmit descriptor queue pairs. When multiple traffic classes are
+configured (for example, DCB is enabled), each pool contains a queue pair from
+each traffic class. When a single traffic class is configured in the hardware,
+the pools contain multiple queue pairs from the single traffic class.
+
+The number of VFs that can be allocated depends on the number of traffic
+classes that can be enabled. The configurable number of traffic classes for
+each enabled VF is as follows:
+0 - 15 VFs = Up to 8 traffic classes, depending on device support
+16 - 31 VFs = Up to 4 traffic classes
+32 - 63 VFs = 1 traffic class
+
+When VFs are configured, the PF is allocated one pool as well. The PF supports
+the DCB features with the constraint that each traffic class will only use a
+single queue pair. When zero VFs are configured, the PF can support multiple
+queue pairs per traffic class.
+
+allow_unsupported_sfp
+---------------------
+:Valid Range: 0,1
+:Default Value: 0 (disabled)
+
+This parameter allows unsupported and untested SFP+ modules on 82599-based
+adapters, as long as the type of module is known to the driver.
+
+debug
+-----
+:Valid Range: 0-16 (0=none,...,16=all)
+:Default Value: 0
+
+This parameter adjusts the level of debug messages displayed in the system
+logs.
+
+
+Additional Features and Configurations
+======================================
+
+Flow Control
+------------
+Ethernet Flow Control (IEEE 802.3x) can be configured with ethtool to enable
+receiving and transmitting pause frames for ixgbe. When transmit is enabled,
+pause frames are generated when the receive packet buffer crosses a predefined
+threshold. When receive is enabled, the transmit unit will halt for the time
+delay specified when a pause frame is received.
+
+NOTE: You must have a flow control capable link partner.
+
+Flow Control is enabled by default.
+
+Use ethtool to change the flow control settings. To enable or disable Rx or
+Tx Flow Control::
+
+  ethtool -A eth? rx <on|off> tx <on|off>
+
+Note: This command only enables or disables Flow Control if auto-negotiation is
+disabled. If auto-negotiation is enabled, this command changes the parameters
+used for auto-negotiation with the link partner.
+
+To enable or disable auto-negotiation::
+
+  ethtool -s eth? autoneg <on|off>
+
+Note: Flow Control auto-negotiation is part of link auto-negotiation. Depending
+on your device, you may not be able to change the auto-negotiation setting.
+
+NOTE: For 82598 backplane cards entering 1 gigabit mode, flow control default
+behavior is changed to off. Flow control in 1 gigabit mode on these devices can
+lead to transmit hangs.
+
+Intel(R) Ethernet Flow Director
+-------------------------------
+The Intel Ethernet Flow Director performs the following tasks:
+
+- Directs receive packets according to their flows to different queues.
+- Enables tight control on routing a flow in the platform.
+- Matches flows and CPU cores for flow affinity.
+- Supports multiple parameters for flexible flow classification and load
+  balancing (in SFP mode only).
+
+NOTE: Intel Ethernet Flow Director masking works in the opposite manner from
+subnet masking. In the following command::
+
+  #ethtool -N eth11 flow-type ip4 src-ip 172.4.1.2 m 255.0.0.0 dst-ip \
+  172.21.1.1 m 255.128.0.0 action 31
+
+The src-ip value that is written to the filter will be 0.4.1.2, not 172.0.0.0
+as might be expected. Similarly, the dst-ip value written to the filter will be
+0.21.1.1, not 172.0.0.0.
+
+To enable or disable the Intel Ethernet Flow Director::
+
+  # ethtool -K ethX ntuple <on|off>
+
+When disabling ntuple filters, all the user programmed filters are flushed from
+the driver cache and hardware. All needed filters must be re-added when ntuple
+is re-enabled.
+
+To add a filter that directs packet to queue 2, use -U or -N switch::
+
+  # ethtool -N ethX flow-type tcp4 src-ip 192.168.10.1 dst-ip \
+  192.168.10.2 src-port 2000 dst-port 2001 action 2 [loc 1]
+
+To see the list of filters currently present::
+
+  # ethtool <-u|-n> ethX
+
+Sideband Perfect Filters
+------------------------
+Sideband Perfect Filters are used to direct traffic that matches specified
+characteristics. They are enabled through ethtool's ntuple interface. To add a
+new filter use the following command::
+
+  ethtool -U <device> flow-type <type> src-ip <ip> dst-ip <ip> src-port <port> \
+  dst-port <port> action <queue>
+
+Where:
+  <device> - the ethernet device to program
+  <type> - can be ip4, tcp4, udp4, or sctp4
+  <ip> - the IP address to match on
+  <port> - the port number to match on
+  <queue> - the queue to direct traffic towards (-1 discards the matched traffic)
+
+Use the following command to delete a filter::
+
+  ethtool -U <device> delete <N>
+
+Where <N> is the filter id displayed when printing all the active filters, and
+may also have been specified using "loc <N>" when adding the filter.
+
+The following example matches TCP traffic sent from 192.168.0.1, port 5300,
+directed to 192.168.0.5, port 80, and sends it to queue 7::
+
+  ethtool -U enp130s0 flow-type tcp4 src-ip 192.168.0.1 dst-ip 192.168.0.5 \
+  src-port 5300 dst-port 80 action 7
+
+For each flow-type, the programmed filters must all have the same matching
+input set. For example, issuing the following two commands is acceptable::
+
+  ethtool -U enp130s0 flow-type ip4 src-ip 192.168.0.1 src-port 5300 action 7
+  ethtool -U enp130s0 flow-type ip4 src-ip 192.168.0.5 src-port 55 action 10
+
+Issuing the next two commands, however, is not acceptable, since the first
+specifies src-ip and the second specifies dst-ip::
+
+  ethtool -U enp130s0 flow-type ip4 src-ip 192.168.0.1 src-port 5300 action 7
+  ethtool -U enp130s0 flow-type ip4 dst-ip 192.168.0.5 src-port 55 action 10
+
+The second command will fail with an error. You may program multiple filters
+with the same fields, using different values, but, on one device, you may not
+program two TCP4 filters with different matching fields.
+
+Matching on a sub-portion of a field is not supported by the ixgbe driver, thus
+partial mask fields are not supported.
+
+To create filters that direct traffic to a specific Virtual Function, use the
+"user-def" parameter. Specify the user-def as a 64 bit value, where the lower 32
+bits represents the queue number, while the next 8 bits represent which VF.
+Note that 0 is the PF, so the VF identifier is offset by 1. For example::
+
+  ... user-def 0x800000002 ...
+
+specifies to direct traffic to Virtual Function 7 (8 minus 1) into queue 2 of
+that VF.
+
+Note that these filters will not break internal routing rules, and will not
+route traffic that otherwise would not have been sent to the specified Virtual
+Function.
+
+Jumbo Frames
+------------
+Jumbo Frames support is enabled by changing the Maximum Transmission Unit (MTU)
+to a value larger than the default value of 1500.
+
+Use the ifconfig command to increase the MTU size. For example, enter the
+following where <x> is the interface number::
+
+  ifconfig eth<x> mtu 9000 up
+
+Alternatively, you can use the ip command as follows::
+
+  ip link set mtu 9000 dev eth<x>
+  ip link set up dev eth<x>
+
+This setting is not saved across reboots. The setting change can be made
+permanent by adding 'MTU=9000' to the file::
+
+  /etc/sysconfig/network-scripts/ifcfg-eth<x> // for RHEL
+  /etc/sysconfig/network/<config_file> // for SLES
+
+NOTE: The maximum MTU setting for Jumbo Frames is 9710. This value coincides
+with the maximum Jumbo Frames size of 9728 bytes.
+
+NOTE: This driver will attempt to use multiple page sized buffers to receive
+each jumbo packet. This should help to avoid buffer starvation issues when
+allocating receive packets.
+
+NOTE: For 82599-based network connections, if you are enabling jumbo frames in
+a virtual function (VF), jumbo frames must first be enabled in the physical
+function (PF). The VF MTU setting cannot be larger than the PF MTU.
+
+Generic Receive Offload, aka GRO
+--------------------------------
+The driver supports the in-kernel software implementation of GRO. GRO has
+shown that by coalescing Rx traffic into larger chunks of data, CPU
+utilization can be significantly reduced when under large Rx load. GRO is an
+evolution of the previously-used LRO interface. GRO is able to coalesce
+other protocols besides TCP. It's also safe to use with configurations that
+are problematic for LRO, namely bridging and iSCSI.
+
+Data Center Bridging (DCB)
+--------------------------
+NOTE:
+The kernel assumes that TC0 is available, and will disable Priority Flow
+Control (PFC) on the device if TC0 is not available. To fix this, ensure TC0 is
+enabled when setting up DCB on your switch.
+
+DCB is a configuration Quality of Service implementation in hardware. It uses
+the VLAN priority tag (802.1p) to filter traffic. That means that there are 8
+different priorities that traffic can be filtered into. It also enables
+priority flow control (802.1Qbb) which can limit or eliminate the number of
+dropped packets during network stress. Bandwidth can be allocated to each of
+these priorities, which is enforced at the hardware level (802.1Qaz).
+
+Adapter firmware implements LLDP and DCBX protocol agents as per 802.1AB and
+802.1Qaz respectively. The firmware based DCBX agent runs in willing mode only
+and can accept settings from a DCBX capable peer. Software configuration of
+DCBX parameters via dcbtool/lldptool are not supported.
+
+The ixgbe driver implements the DCB netlink interface layer to allow user-space
+to communicate with the driver and query DCB configuration for the port.
+
+ethtool
+-------
+The driver utilizes the ethtool interface for driver configuration and
+diagnostics, as well as displaying statistical information. The latest ethtool
+version is required for this functionality. Download it at:
+https://www.kernel.org/pub/software/network/ethtool/
+
+FCoE
+----
+The ixgbe driver supports Fiber Channel over Ethernet (FCoE) and Data Center
+Bridging (DCB). This code has no default effect on the regular driver
+operation. Configuring DCB and FCoE is outside the scope of this README. Refer
+to http://www.open-fcoe.org/ for FCoE project information and contact
+ixgbe-eedc@lists.sourceforge.net for DCB information.
+
+MAC and VLAN anti-spoofing feature
+----------------------------------
+When a malicious driver attempts to send a spoofed packet, it is dropped by the
+hardware and not transmitted.
+
+An interrupt is sent to the PF driver notifying it of the spoof attempt. When a
+spoofed packet is detected, the PF driver will send the following message to
+the system log (displayed by the "dmesg" command)::
+
+  ixgbe ethX: ixgbe_spoof_check: n spoofed packets detected
+
+where "x" is the PF interface number; and "n" is number of spoofed packets.
+NOTE: This feature can be disabled for a specific Virtual Function (VF)::
+
+  ip link set <pf dev> vf <vf id> spoofchk {off|on}
+
+
+Known Issues/Troubleshooting
+============================
+
+Enabling SR-IOV in a 64-bit Microsoft* Windows Server* 2012/R2 guest OS
+-----------------------------------------------------------------------
+Linux KVM Hypervisor/VMM supports direct assignment of a PCIe device to a VM.
+This includes traditional PCIe devices, as well as SR-IOV-capable devices based
+on the Intel Ethernet Controller XL710.
+
+
+Support
+=======
+For general information, go to the Intel support website at:
+
+https://www.intel.com/support/
+
+or the Intel Wired Networking project hosted by Sourceforge at:
+
+https://sourceforge.net/projects/e1000
+
+If an issue is identified with the released source code on a supported kernel
+with a supported adapter, email the specific information related to the issue
+to e1000-devel@lists.sf.net.
diff --git a/Documentation/networking/ixgbe.txt b/Documentation/networking/ixgbe.txt
deleted file mode 100644
index 687835415707..000000000000
--- a/Documentation/networking/ixgbe.txt
+++ /dev/null
@@ -1,349 +0,0 @@
-Linux* Base Driver for the Intel(R) Ethernet 10 Gigabit PCI Express Family of
-Adapters
-=============================================================================
-
-Intel 10 Gigabit Linux driver.
-Copyright(c) 1999 - 2013 Intel Corporation.
-
-Contents
-========
-
-- Identifying Your Adapter
-- Additional Configurations
-- Performance Tuning
-- Known Issues
-- Support
-
-Identifying Your Adapter
-========================
-
-The driver in this release is compatible with 82598, 82599 and X540-based
-Intel Network Connections.
-
-For more information on how to identify your adapter, go to the Adapter &
-Driver ID Guide at:
-
-    http://support.intel.com/support/network/sb/CS-012904.htm
-
-SFP+ Devices with Pluggable Optics
-----------------------------------
-
-82599-BASED ADAPTERS
-
-NOTES: If your 82599-based Intel(R) Network Adapter came with Intel optics, or
-is an Intel(R) Ethernet Server Adapter X520-2, then it only supports Intel
-optics and/or the direct attach cables listed below.
-
-When 82599-based SFP+ devices are connected back to back, they should be set to
-the same Speed setting via ethtool. Results may vary if you mix speed settings.
-82598-based adapters support all passive direct attach cables that comply
-with SFF-8431 v4.1 and SFF-8472 v10.4 specifications. Active direct attach
-cables are not supported.
-
-Supplier    Type                                             Part Numbers
-
-SR Modules
-Intel       DUAL RATE 1G/10G SFP+ SR (bailed)                FTLX8571D3BCV-IT
-Intel       DUAL RATE 1G/10G SFP+ SR (bailed)                AFBR-703SDDZ-IN1
-Intel       DUAL RATE 1G/10G SFP+ SR (bailed)                AFBR-703SDZ-IN2
-LR Modules
-Intel       DUAL RATE 1G/10G SFP+ LR (bailed)                FTLX1471D3BCV-IT
-Intel       DUAL RATE 1G/10G SFP+ LR (bailed)                AFCT-701SDDZ-IN1
-Intel       DUAL RATE 1G/10G SFP+ LR (bailed)                AFCT-701SDZ-IN2
-
-The following is a list of 3rd party SFP+ modules and direct attach cables that
-have received some testing. Not all modules are applicable to all devices.
-
-Supplier   Type                                              Part Numbers
-
-Finisar    SFP+ SR bailed, 10g single rate                   FTLX8571D3BCL
-Avago      SFP+ SR bailed, 10g single rate                   AFBR-700SDZ
-Finisar    SFP+ LR bailed, 10g single rate                   FTLX1471D3BCL
-
-Finisar    DUAL RATE 1G/10G SFP+ SR (No Bail)                FTLX8571D3QCV-IT
-Avago      DUAL RATE 1G/10G SFP+ SR (No Bail)                AFBR-703SDZ-IN1
-Finisar    DUAL RATE 1G/10G SFP+ LR (No Bail)                FTLX1471D3QCV-IT
-Avago      DUAL RATE 1G/10G SFP+ LR (No Bail)                AFCT-701SDZ-IN1
-Finistar   1000BASE-T SFP                                    FCLF8522P2BTL
-Avago      1000BASE-T SFP                                    ABCU-5710RZ
-
-82599-based adapters support all passive and active limiting direct attach
-cables that comply with SFF-8431 v4.1 and SFF-8472 v10.4 specifications.
-
-Laser turns off for SFP+ when device is down
--------------------------------------------
-"ip link set down" turns off the laser for 82599-based SFP+ fiber adapters.
-"ip link set up" turns on the laser.
-
-
-82598-BASED ADAPTERS
-
-NOTES for 82598-Based Adapters:
-- Intel(R) Network Adapters that support removable optical modules only support
-  their original module type (i.e., the Intel(R) 10 Gigabit SR Dual Port
-  Express Module only supports SR optical modules). If you plug in a different
-  type of module, the driver will not load.
-- Hot Swapping/hot plugging optical modules is not supported.
-- Only single speed, 10 gigabit modules are supported.
-- LAN on Motherboard (LOMs) may support DA, SR, or LR modules. Other module
-  types are not supported. Please see your system documentation for details.
-
-The following is a list of 3rd party SFP+ modules and direct attach cables that
-have received some testing. Not all modules are applicable to all devices.
-
-Supplier   Type                                              Part Numbers
-
-Finisar    SFP+ SR bailed, 10g single rate                   FTLX8571D3BCL
-Avago      SFP+ SR bailed, 10g single rate                   AFBR-700SDZ
-Finisar    SFP+ LR bailed, 10g single rate                   FTLX1471D3BCL
-
-82598-based adapters support all passive direct attach cables that comply
-with SFF-8431 v4.1 and SFF-8472 v10.4 specifications. Active direct attach
-cables are not supported.
-
-
-Flow Control
-------------
-Ethernet Flow Control (IEEE 802.3x) can be configured with ethtool to enable
-receiving and transmitting pause frames for ixgbe. When TX is enabled, PAUSE
-frames are generated when the receive packet buffer crosses a predefined
-threshold.  When rx is enabled, the transmit unit will halt for the time delay
-specified when a PAUSE frame is received.
-
-Flow Control is enabled by default. If you want to disable a flow control
-capable link partner, use ethtool:
-
-     ethtool -A eth? autoneg off RX off TX off
-
-NOTE: For 82598 backplane cards entering 1 gig mode, flow control default
-behavior is changed to off.  Flow control in 1 gig mode on these devices can
-lead to Tx hangs.
-
-Intel(R) Ethernet Flow Director
--------------------------------
-Supports advanced filters that direct receive packets by their flows to
-different queues. Enables tight control on routing a flow in the platform.
-Matches flows and CPU cores for flow affinity. Supports multiple parameters
-for flexible flow classification and load balancing.
-
-Flow director is enabled only if the kernel is multiple TX queue capable.
-
-An included script (set_irq_affinity.sh) automates setting the IRQ to CPU
-affinity.
-
-You can verify that the driver is using Flow Director by looking at the counter
-in ethtool: fdir_miss and fdir_match.
-
-Other ethtool Commands:
-To enable Flow Director
-	ethtool -K ethX ntuple on
-To add a filter
-	Use -U switch. e.g., ethtool -U ethX flow-type tcp4 src-ip 10.0.128.23
-        action 1
-To see the list of filters currently present:
-	ethtool -u ethX
-
-Perfect Filter: Perfect filter is an interface to load the filter table that
-funnels all flow into queue_0 unless an alternative queue is specified using
-"action". In that case, any flow that matches the filter criteria will be
-directed to the appropriate queue.
-
-If the queue is defined as -1, filter will drop matching packets.
-
-To account for filter matches and misses, there are two stats in ethtool:
-fdir_match and fdir_miss. In addition, rx_queue_N_packets shows the number of
-packets processed by the Nth queue.
-
-NOTE: Receive Packet Steering (RPS) and Receive Flow Steering (RFS) are not
-compatible with Flow Director. IF Flow Director is enabled, these will be
-disabled.
-
-The following three parameters impact Flow Director.
-
-FdirMode
---------
-Valid Range: 0-2 (0=off, 1=ATR, 2=Perfect filter mode)
-Default Value: 1
-
-  Flow Director filtering modes.
-
-FdirPballoc
------------
-Valid Range: 0-2 (0=64k, 1=128k, 2=256k)
-Default Value: 0
-
-  Flow Director allocated packet buffer size.
-
-AtrSampleRate
---------------
-Valid Range: 1-100
-Default Value: 20
-
-  Software ATR Tx packet sample rate. For example, when set to 20, every 20th
-  packet, looks to see if the packet will create a new flow.
-
-Node
-----
-Valid Range:   0-n
-Default Value: 1 (off)
-
-  0 - n: where n is the number of NUMA nodes (i.e. 0 - 3) currently online in
-  your system
-  1: turns this option off
-
-  The Node parameter will allow you to pick which NUMA node you want to have
-  the adapter allocate memory on.
-
-max_vfs
--------
-Valid Range:   1-63
-Default Value: 0
-
-  If the value is greater than 0 it will also force the VMDq parameter to be 1
-  or more.
-
-  This parameter adds support for SR-IOV.  It causes the driver to spawn up to
-  max_vfs worth of virtual function.
-
-
-Additional Configurations
-=========================
-
-  Jumbo Frames
-  ------------
-  The driver supports Jumbo Frames for all adapters. Jumbo Frames support is
-  enabled by changing the MTU to a value larger than the default of 1500.
-  The maximum value for the MTU is 16110.  Use the ip command to
-  increase the MTU size.  For example:
-
-        ip link set dev ethx mtu 9000
-
-  The maximum MTU setting for Jumbo Frames is 9710.  This value coincides
-  with the maximum Jumbo Frames size of 9728.
-
-  Generic Receive Offload, aka GRO
-  --------------------------------
-  The driver supports the in-kernel software implementation of GRO.  GRO has
-  shown that by coalescing Rx traffic into larger chunks of data, CPU
-  utilization can be significantly reduced when under large Rx load.  GRO is an
-  evolution of the previously-used LRO interface.  GRO is able to coalesce
-  other protocols besides TCP.  It's also safe to use with configurations that
-  are problematic for LRO, namely bridging and iSCSI.
-
-  Data Center Bridging, aka DCB
-  -----------------------------
-  DCB is a configuration Quality of Service implementation in hardware.
-  It uses the VLAN priority tag (802.1p) to filter traffic.  That means
-  that there are 8 different priorities that traffic can be filtered into.
-  It also enables priority flow control which can limit or eliminate the
-  number of dropped packets during network stress.  Bandwidth can be
-  allocated to each of these priorities, which is enforced at the hardware
-  level.
-
-  To enable DCB support in ixgbe, you must enable the DCB netlink layer to
-  allow the userspace tools (see below) to communicate with the driver.
-  This can be found in the kernel configuration here:
-
-        -> Networking support
-          -> Networking options
-            -> Data Center Bridging support
-
-  Once this is selected, DCB support must be selected for ixgbe.  This can
-  be found here:
-
-        -> Device Drivers
-          -> Network device support (NETDEVICES [=y])
-            -> Ethernet (10000 Mbit) (NETDEV_10000 [=y])
-              -> Intel(R) 10GbE PCI Express adapters support
-                -> Data Center Bridging (DCB) Support
-
-  After these options are selected, you must rebuild your kernel and your
-  modules.
-
-  In order to use DCB, userspace tools must be downloaded and installed.
-  The dcbd tools can be found at:
-
-        http://e1000.sf.net
-
-  Ethtool
-  -------
-  The driver utilizes the ethtool interface for driver configuration and
-  diagnostics, as well as displaying statistical information. The latest
-  ethtool version is required for this functionality.
-
-  The latest release of ethtool can be found from
-  https://www.kernel.org/pub/software/network/ethtool/
-
-  FCoE
-  ----
-  This release of the ixgbe driver contains new code to enable users to use
-  Fiber Channel over Ethernet (FCoE) and Data Center Bridging (DCB)
-  functionality that is supported by the 82598-based hardware.  This code has
-  no default effect on the regular driver operation, and configuring DCB and
-  FCoE is outside the scope of this driver README. Refer to
-  http://www.open-fcoe.org/ for FCoE project information and contact
-  e1000-eedc@lists.sourceforge.net for DCB information.
-
-  MAC and VLAN anti-spoofing feature
-  ----------------------------------
-  When a malicious driver attempts to send a spoofed packet, it is dropped by
-  the hardware and not transmitted.  An interrupt is sent to the PF driver
-  notifying it of the spoof attempt.
-
-  When a spoofed packet is detected the PF driver will send the following
-  message to the system log (displayed by  the "dmesg" command):
-
-  Spoof event(s) detected on VF (n)
-
-  Where n=the VF that attempted to do the spoofing.
-
-
-Performance Tuning
-==================
-
-An excellent article on performance tuning can be found at:
-
-http://www.redhat.com/promo/summit/2008/downloads/pdf/Thursday/Mark_Wagner.pdf
-
-
-Known Issues
-============
-
-  Enabling SR-IOV in a 32-bit or 64-bit Microsoft* Windows* Server 2008/R2
-  Guest OS using Intel (R) 82576-based GbE or Intel (R) 82599-based 10GbE
-  controller under KVM
-  ------------------------------------------------------------------------
-  KVM Hypervisor/VMM supports direct assignment of a PCIe device to a VM.  This
-  includes traditional PCIe devices, as well as SR-IOV-capable devices using
-  Intel 82576-based and 82599-based controllers.
-
-  While direct assignment of a PCIe device or an SR-IOV Virtual Function (VF)
-  to a Linux-based VM running 2.6.32 or later kernel works fine, there is a
-  known issue with Microsoft Windows Server 2008 VM that results in a "yellow
-  bang" error. This problem is within the KVM VMM itself, not the Intel driver,
-  or the SR-IOV logic of the VMM, but rather that KVM emulates an older CPU
-  model for the guests, and this older CPU model does not support MSI-X
-  interrupts, which is a requirement for Intel SR-IOV.
-
-  If you wish to use the Intel 82576 or 82599-based controllers in SR-IOV mode
-  with KVM and a Microsoft Windows Server 2008 guest try the following
-  workaround. The workaround is to tell KVM to emulate a different model of CPU
-  when using qemu to create the KVM guest:
-
-       "-cpu qemu64,model=13"
-
-
-Support
-=======
-
-For general information, go to the Intel support website at:
-
-    http://support.intel.com
-
-or the Intel Wired Networking project hosted by Sourceforge at:
-
-    http://e1000.sourceforge.net
-
-If an issue is identified with the released source code on the supported
-kernel with a supported adapter, email the specific information related
-to the issue to e1000-devel@lists.sf.net
diff --git a/Documentation/networking/ixgbevf.rst b/Documentation/networking/ixgbevf.rst
new file mode 100644
index 000000000000..56cde6366c2f
--- /dev/null
+++ b/Documentation/networking/ixgbevf.rst
@@ -0,0 +1,66 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+Linux* Base Virtual Function Driver for Intel(R) 10G Ethernet
+=============================================================
+
+Intel 10 Gigabit Virtual Function Linux driver.
+Copyright(c) 1999-2018 Intel Corporation.
+
+Contents
+========
+
+- Identifying Your Adapter
+- Known Issues
+- Support
+
+This driver supports 82599, X540, X550, and X552-based virtual function devices
+that can only be activated on kernels that support SR-IOV.
+
+For questions related to hardware requirements, refer to the documentation
+supplied with your Intel adapter. All hardware requirements listed apply to use
+with Linux.
+
+
+Identifying Your Adapter
+========================
+The driver is compatible with devices based on the following:
+
+  * Intel(R) Ethernet Controller 82598
+  * Intel(R) Ethernet Controller 82599
+  * Intel(R) Ethernet Controller X520
+  * Intel(R) Ethernet Controller X540
+  * Intel(R) Ethernet Controller x550
+  * Intel(R) Ethernet Controller X552
+  * Intel(R) Ethernet Controller X553
+
+For information on how to identify your adapter, and for the latest Intel
+network drivers, refer to the Intel Support website:
+https://www.intel.com/support
+
+Known Issues/Troubleshooting
+============================
+
+SR-IOV requires the correct platform and OS support.
+
+The guest OS loading this driver must support MSI-X interrupts.
+
+This driver is only supported as a loadable module at this time. Intel is not
+supplying patches against the kernel source to allow for static linking of the
+drivers.
+
+VLANs: There is a limit of a total of 64 shared VLANs to 1 or more VFs.
+
+
+Support
+=======
+For general information, go to the Intel support website at:
+
+https://www.intel.com/support/
+
+or the Intel Wired Networking project hosted by Sourceforge at:
+
+https://sourceforge.net/projects/e1000
+
+If an issue is identified with the released source code on a supported kernel
+with a supported adapter, email the specific information related to the issue
+to e1000-devel@lists.sf.net.
diff --git a/Documentation/networking/ixgbevf.txt b/Documentation/networking/ixgbevf.txt
deleted file mode 100644
index 53d8d2a5a6a3..000000000000
--- a/Documentation/networking/ixgbevf.txt
+++ /dev/null
@@ -1,52 +0,0 @@
-Linux* Base Driver for Intel(R) Ethernet Network Connection
-===========================================================
-
-Intel Gigabit Linux driver.
-Copyright(c) 1999 - 2013 Intel Corporation.
-
-Contents
-========
-
-- Identifying Your Adapter
-- Known Issues/Troubleshooting
-- Support
-
-This file describes the ixgbevf Linux* Base Driver for Intel Network
-Connection.
-
-The ixgbevf driver supports 82599-based virtual function devices that can only
-be activated on kernels with CONFIG_PCI_IOV enabled.
-
-The ixgbevf driver supports virtual functions generated by the ixgbe driver
-with a max_vfs value of 1 or greater.
-
-The guest OS loading the ixgbevf driver must support MSI-X interrupts.
-
-VLANs: There is a limit of a total of 32 shared VLANs to 1 or more VFs.
-
-Identifying Your Adapter
-========================
-
-For more information on how to identify your adapter, go to the Adapter &
-Driver ID Guide at:
-
-    http://support.intel.com/support/go/network/adapter/idguide.htm
-
-Known Issues/Troubleshooting
-============================
-
-
-Support
-=======
-
-For general information, go to the Intel support website at:
-
-    http://support.intel.com
-
-or the Intel Wired Networking project hosted by Sourceforge at:
-
-    http://sourceforge.net/projects/e1000
-
-If an issue is identified with the released source code on the supported
-kernel with a supported adapter, email the specific information related
-to the issue to e1000-devel@lists.sf.net
diff --git a/LICENSES/other/CC-BY-SA-4.0 b/LICENSES/other/CC-BY-SA-4.0
deleted file mode 100644
index f9158e831e79..000000000000
--- a/LICENSES/other/CC-BY-SA-4.0
+++ /dev/null
@@ -1,397 +0,0 @@
-Valid-License-Identifier: CC-BY-SA-4.0
-SPDX-URL: https://spdx.org/licenses/CC-BY-SA-4.0
-Usage-Guide:
-  To use the Creative Commons Attribution Share Alike 4.0 International
-  license put the following SPDX tag/value pair into a comment according to
-  the placement guidelines in the licensing rules documentation:
-    SPDX-License-Identifier: CC-BY-SA-4.0
-License-Text:
-
-Creative Commons Attribution-ShareAlike 4.0 International
-
-Creative Commons Corporation ("Creative Commons") is not a law firm and
-does not provide legal services or legal advice. Distribution of Creative
-Commons public licenses does not create a lawyer-client or other
-relationship. Creative Commons makes its licenses and related information
-available on an "as-is" basis. Creative Commons gives no warranties
-regarding its licenses, any material licensed under their terms and
-conditions, or any related information. Creative Commons disclaims all
-liability for damages resulting from their use to the fullest extent
-possible.
-
-Using Creative Commons Public Licenses
-
-Creative Commons public licenses provide a standard set of terms and
-conditions that creators and other rights holders may use to share original
-works of authorship and other material subject to copyright and certain
-other rights specified in the public license below. The following
-considerations are for informational purposes only, are not exhaustive, and
-do not form part of our licenses.
-
-Considerations for licensors: Our public licenses are intended for use by
-those authorized to give the public permission to use material in ways
-otherwise restricted by copyright and certain other rights. Our licenses
-are irrevocable. Licensors should read and understand the terms and
-conditions of the license they choose before applying it. Licensors should
-also secure all rights necessary before applying our licenses so that the
-public can reuse the material as expected. Licensors should clearly mark
-any material not subject to the license. This includes other CC-licensed
-material, or material used under an exception or limitation to
-copyright. More considerations for licensors :
-wiki.creativecommons.org/Considerations_for_licensors
-
-Considerations for the public: By using one of our public licenses, a
-licensor grants the public permission to use the licensed material under
-specified terms and conditions. If the licensor's permission is not
-necessary for any reason - for example, because of any applicable exception
-or limitation to copyright - then that use is not regulated by the
-license. Our licenses grant only permissions under copyright and certain
-other rights that a licensor has authority to grant. Use of the licensed
-material may still be restricted for other reasons, including because
-others have copyright or other rights in the material. A licensor may make
-special requests, such as asking that all changes be marked or described.
-
-Although not required by our licenses, you are encouraged to respect those
-requests where reasonable. More considerations for the public :
-wiki.creativecommons.org/Considerations_for_licensees
-
-Creative Commons Attribution-ShareAlike 4.0 International Public License
-
-By exercising the Licensed Rights (defined below), You accept and agree to
-be bound by the terms and conditions of this Creative Commons
-Attribution-ShareAlike 4.0 International Public License ("Public
-License"). To the extent this Public License may be interpreted as a
-contract, You are granted the Licensed Rights in consideration of Your
-acceptance of these terms and conditions, and the Licensor grants You such
-rights in consideration of benefits the Licensor receives from making the
-Licensed Material available under these terms and conditions.
-
-Section 1 - Definitions.
-
-    a. Adapted Material means material subject to Copyright and Similar
-       Rights that is derived from or based upon the Licensed Material and
-       in which the Licensed Material is translated, altered, arranged,
-       transformed, or otherwise modified in a manner requiring permission
-       under the Copyright and Similar Rights held by the Licensor. For
-       purposes of this Public License, where the Licensed Material is a
-       musical work, performance, or sound recording, Adapted Material is
-       always produced where the Licensed Material is synched in timed
-       relation with a moving image.
-
-    b. Adapter's License means the license You apply to Your Copyright and
-       Similar Rights in Your contributions to Adapted Material in
-       accordance with the terms and conditions of this Public License.
-
-    c. BY-SA Compatible License means a license listed at
-       creativecommons.org/compatiblelicenses, approved by Creative Commons
-       as essentially the equivalent of this Public License.
-
-    d. Copyright and Similar Rights means copyright and/or similar rights
-       closely related to copyright including, without limitation,
-       performance, broadcast, sound recording, and Sui Generis Database
-       Rights, without regard to how the rights are labeled or
-       categorized. For purposes of this Public License, the rights
-       specified in Section 2(b)(1)-(2) are not Copyright and Similar
-       Rights.
-
-    e. Effective Technological Measures means those measures that, in the
-       absence of proper authority, may not be circumvented under laws
-       fulfilling obligations under Article 11 of the WIPO Copyright Treaty
-       adopted on December 20, 1996, and/or similar international
-       agreements.
-
-    f. Exceptions and Limitations means fair use, fair dealing, and/or any
-       other exception or limitation to Copyright and Similar Rights that
-       applies to Your use of the Licensed Material.
-
-    g. License Elements means the license attributes listed in the name of
-       a Creative Commons Public License. The License Elements of this
-       Public License are Attribution and ShareAlike.
-
-    h. Licensed Material means the artistic or literary work, database, or
-       other material to which the Licensor applied this Public License.
-
-    i. Licensed Rights means the rights granted to You subject to the terms
-       and conditions of this Public License, which are limited to all
-       Copyright and Similar Rights that apply to Your use of the Licensed
-       Material and that the Licensor has authority to license.
-
-    j. Licensor means the individual(s) or entity(ies) granting rights
-       under this Public License.
-
-    k. Share means to provide material to the public by any means or
-       process that requires permission under the Licensed Rights, such as
-       reproduction, public display, public performance, distribution,
-       dissemination, communication, or importation, and to make material
-       available to the public including in ways that members of the public
-       may access the material from a place and at a time individually
-       chosen by them.
-
-    l. Sui Generis Database Rights means rights other than copyright
-       resulting from Directive 96/9/EC of the European Parliament and of
-       the Council of 11 March 1996 on the legal protection of databases,
-       as amended and/or succeeded, as well as other essentially equivalent
-       rights anywhere in the world.  m. You means the individual or entity
-       exercising the Licensed Rights under this Public License. Your has a
-       corresponding meaning.
-
-Section 2 - Scope.
-
-    a. License grant.
-
-        1. Subject to the terms and conditions of this Public License, the
-           Licensor hereby grants You a worldwide, royalty-free,
-           non-sublicensable, non-exclusive, irrevocable license to
-           exercise the Licensed Rights in the Licensed Material to:
-
-            A. reproduce and Share the Licensed Material, in whole or in part; and
-
-            B. produce, reproduce, and Share Adapted Material.
-
-        2. Exceptions and Limitations. For the avoidance of doubt, where
-           Exceptions and Limitations apply to Your use, this Public
-           License does not apply, and You do not need to comply with its
-           terms and conditions.
-
-        3. Term. The term of this Public License is specified in Section 6(a).
-
-        4. Media and formats; technical modifications allowed. The Licensor
-           authorizes You to exercise the Licensed Rights in all media and
-           formats whether now known or hereafter created, and to make
-           technical modifications necessary to do so. The Licensor waives
-           and/or agrees not to assert any right or authority to forbid You
-           from making technical modifications necessary to exercise the
-           Licensed Rights, including technical modifications necessary to
-           circumvent Effective Technological Measures. For purposes of
-           this Public License, simply making modifications authorized by
-           this Section 2(a)(4) never produces Adapted Material.
-
-        5. Downstream recipients.
-
-            A. Offer from the Licensor - Licensed Material. Every recipient
-               of the Licensed Material automatically receives an offer
-               from the Licensor to exercise the Licensed Rights under the
-               terms and conditions of this Public License.
-
-            B. Additional offer from the Licensor - Adapted Material. Every
-               recipient of Adapted Material from You automatically
-               receives an offer from the Licensor to exercise the Licensed
-               Rights in the Adapted Material under the conditions of the
-               Adapter's License You apply.
-
-            C. No downstream restrictions. You may not offer or impose any
-               additional or different terms or conditions on, or apply any
-               Effective Technological Measures to, the Licensed Material
-               if doing so restricts exercise of the Licensed Rights by any
-               recipient of the Licensed Material.
-
-        6. No endorsement. Nothing in this Public License constitutes or
-           may be construed as permission to assert or imply that You are,
-           or that Your use of the Licensed Material is, connected with, or
-           sponsored, endorsed, or granted official status by, the Licensor
-           or others designated to receive attribution as provided in
-           Section 3(a)(1)(A)(i).
-
-    b. Other rights.
-
-        1. Moral rights, such as the right of integrity, are not licensed
-           under this Public License, nor are publicity, privacy, and/or
-           other similar personality rights; however, to the extent
-           possible, the Licensor waives and/or agrees not to assert any
-           such rights held by the Licensor to the limited extent necessary
-           to allow You to exercise the Licensed Rights, but not otherwise.
-
-        2. Patent and trademark rights are not licensed under this Public
-           License.
-
-        3. To the extent possible, the Licensor waives any right to collect
-           royalties from You for the exercise of the Licensed Rights,
-           whether directly or through a collecting society under any
-           voluntary or waivable statutory or compulsory licensing
-           scheme. In all other cases the Licensor expressly reserves any
-           right to collect such royalties.
-
-Section 3 - License Conditions.
-
-Your exercise of the Licensed Rights is expressly made subject to the
-following conditions.
-
-    a. Attribution.
-
-        1. If You Share the Licensed Material (including in modified form),
-           You must:
-
-            A. retain the following if it is supplied by the Licensor with
-               the Licensed Material:
-
-                i. identification of the creator(s) of the Licensed
-                   Material and any others designated to receive
-                   attribution, in any reasonable manner requested by the
-                   Licensor (including by pseudonym if designated);
-
-                ii. a copyright notice;
-
-                iii. a notice that refers to this Public License;
-
-                iv. a notice that refers to the disclaimer of warranties;
-
-                v. a URI or hyperlink to the Licensed Material to the extent reasonably practicable;
-
-            B. indicate if You modified the Licensed Material and retain an
-               indication of any previous modifications; and
-
-            C. indicate the Licensed Material is licensed under this Public
-            License, and include the text of, or the URI or hyperlink to,
-            this Public License.
-
-        2. You may satisfy the conditions in Section 3(a)(1) in any
-           reasonable manner based on the medium, means, and context in
-           which You Share the Licensed Material. For example, it may be
-           reasonable to satisfy the conditions by providing a URI or
-           hyperlink to a resource that includes the required information.
-
-        3. If requested by the Licensor, You must remove any of the
-           information required by Section 3(a)(1)(A) to the extent
-           reasonably practicable.  b. ShareAlike.In addition to the
-           conditions in Section 3(a), if You Share Adapted Material You
-           produce, the following conditions also apply.
-
-           1. The Adapter's License You apply must be a Creative Commons
-              license with the same License Elements, this version or
-              later, or a BY-SA Compatible License.
-
-           2. You must include the text of, or the URI or hyperlink to, the
-              Adapter's License You apply. You may satisfy this condition
-              in any reasonable manner based on the medium, means, and
-              context in which You Share Adapted Material.
-
-           3. You may not offer or impose any additional or different terms
-              or conditions on, or apply any Effective Technological
-              Measures to, Adapted Material that restrict exercise of the
-              rights granted under the Adapter's License You apply.
-
-Section 4 - Sui Generis Database Rights.
-
-Where the Licensed Rights include Sui Generis Database Rights that apply to
-Your use of the Licensed Material:
-
-    a. for the avoidance of doubt, Section 2(a)(1) grants You the right to
-       extract, reuse, reproduce, and Share all or a substantial portion of
-       the contents of the database;
-
-    b. if You include all or a substantial portion of the database contents
-       in a database in which You have Sui Generis Database Rights, then
-       the database in which You have Sui Generis Database Rights (but not
-       its individual contents) is Adapted Material, including for purposes
-       of Section 3(b); and
-
-    c. You must comply with the conditions in Section 3(a) if You Share all
-       or a substantial portion of the contents of the database.
-
-    For the avoidance of doubt, this Section 4 supplements and does not
-    replace Your obligations under this Public License where the Licensed
-    Rights include other Copyright and Similar Rights.
-
-Section 5 - Disclaimer of Warranties and Limitation of Liability.
-
-    a. Unless otherwise separately undertaken by the Licensor, to the
-       extent possible, the Licensor offers the Licensed Material as-is and
-       as-available, and makes no representations or warranties of any kind
-       concerning the Licensed Material, whether express, implied,
-       statutory, or other. This includes, without limitation, warranties
-       of title, merchantability, fitness for a particular purpose,
-       non-infringement, absence of latent or other defects, accuracy, or
-       the presence or absence of errors, whether or not known or
-       discoverable. Where disclaimers of warranties are not allowed in
-       full or in part, this disclaimer may not apply to You.
-
-    b. To the extent possible, in no event will the Licensor be liable to
-       You on any legal theory (including, without limitation, negligence)
-       or otherwise for any direct, special, indirect, incidental,
-       consequential, punitive, exemplary, or other losses, costs,
-       expenses, or damages arising out of this Public License or use of
-       the Licensed Material, even if the Licensor has been advised of the
-       possibility of such losses, costs, expenses, or damages. Where a
-       limitation of liability is not allowed in full or in part, this
-       limitation may not apply to You.
-
-    c. The disclaimer of warranties and limitation of liability provided
-       above shall be interpreted in a manner that, to the extent possible,
-       most closely approximates an absolute disclaimer and waiver of all
-       liability.
-
-Section 6 - Term and Termination.
-
-    a. This Public License applies for the term of the Copyright and
-       Similar Rights licensed here. However, if You fail to comply with
-       this Public License, then Your rights under this Public License
-       terminate automatically.
-
-    b. Where Your right to use the Licensed Material has terminated under
-       Section 6(a), it reinstates:
-
-        1. automatically as of the date the violation is cured, provided it
-           is cured within 30 days of Your discovery of the violation; or
-
-        2. upon express reinstatement by the Licensor.
-
-    c. For the avoidance of doubt, this Section 6(b) does not affect any
-       right the Licensor may have to seek remedies for Your violations of
-       this Public License.
-
-    d. For the avoidance of doubt, the Licensor may also offer the Licensed
-       Material under separate terms or conditions or stop distributing the
-       Licensed Material at any time; however, doing so will not terminate
-       this Public License.
-
-    e. Sections 1, 5, 6, 7, and 8 survive termination of this Public License.
-
-Section 7 - Other Terms and Conditions.
-
-    a. The Licensor shall not be bound by any additional or different terms
-       or conditions communicated by You unless expressly agreed.
-
-    b. Any arrangements, understandings, or agreements regarding the
-       Licensed Material not stated herein are separate from and
-       independent of the terms and conditions of this Public License.
-
-Section 8 - Interpretation.
-
-    a. For the avoidance of doubt, this Public License does not, and shall
-       not be interpreted to, reduce, limit, restrict, or impose conditions
-       on any use of the Licensed Material that could lawfully be made
-       without permission under this Public License.
-
-    b. To the extent possible, if any provision of this Public License is
-       deemed unenforceable, it shall be automatically reformed to the
-       minimum extent necessary to make it enforceable. If the provision
-       cannot be reformed, it shall be severed from this Public License
-       without affecting the enforceability of the remaining terms and
-       conditions.
-
-    c. No term or condition of this Public License will be waived and no
-       failure to comply consented to unless expressly agreed to by the
-       Licensor.
-
-    d. Nothing in this Public License constitutes or may be interpreted as
-       a limitation upon, or waiver of, any privileges and immunities that
-       apply to the Licensor or You, including from the legal processes of
-       any jurisdiction or authority.
-
-Creative Commons is not a party to its public licenses. Notwithstanding,
-Creative Commons may elect to apply one of its public licenses to material
-it publishes and in those instances will be considered the "Licensor." The
-text of the Creative Commons public licenses is dedicated to the public
-domain under the CC0 Public Domain Dedication. Except for the limited
-purpose of indicating that material is shared under a Creative Commons
-public license or as otherwise permitted by the Creative Commons policies
-published at creativecommons.org/policies, Creative Commons does not
-authorize the use of the trademark "Creative Commons" or any other
-trademark or logo of Creative Commons without its prior written consent
-including, without limitation, in connection with any unauthorized
-modifications to any of its public licenses or any other arrangements,
-understandings, or agreements concerning use of licensed material. For the
-avoidance of doubt, this paragraph does not form part of the public
-licenses.
-
-Creative Commons may be contacted at creativecommons.org.
diff --git a/MAINTAINERS b/MAINTAINERS
index 7f1399ac028e..e0eb8ec08789 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7351,15 +7351,16 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue.git
 S:	Supported
 F:	Documentation/networking/e100.rst
 F:	Documentation/networking/e1000.rst
-F:	Documentation/networking/e1000e.txt
-F:	Documentation/networking/igb.txt
-F:	Documentation/networking/igbvf.txt
-F:	Documentation/networking/ixgb.txt
-F:	Documentation/networking/ixgbe.txt
-F:	Documentation/networking/ixgbevf.txt
-F:	Documentation/networking/i40e.txt
-F:	Documentation/networking/iavf.txt
-F:	Documentation/networking/ice.txt
+F:	Documentation/networking/e1000e.rst
+F:	Documentation/networking/fm10k.rst
+F:	Documentation/networking/igb.rst
+F:	Documentation/networking/igbvf.rst
+F:	Documentation/networking/ixgb.rst
+F:	Documentation/networking/ixgbe.rst
+F:	Documentation/networking/ixgbevf.rst
+F:	Documentation/networking/i40e.rst
+F:	Documentation/networking/iavf.rst
+F:	Documentation/networking/ice.rst
 F:	drivers/net/ethernet/intel/
 F:	drivers/net/ethernet/intel/*/
 F:	include/linux/avf/virtchnl.h
@@ -10160,7 +10161,6 @@ L:	netdev@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec.git
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec-next.git
 S:	Maintained
-F:	net/core/flow.c
 F:	net/xfrm/
 F:	net/key/
 F:	net/ipv4/xfrm*
@@ -13100,7 +13100,7 @@ SELINUX SECURITY MODULE
 M:	Paul Moore <paul@paul-moore.com>
 M:	Stephen Smalley <sds@tycho.nsa.gov>
 M:	Eric Paris <eparis@parisplace.org>
-L:	selinux@tycho.nsa.gov (moderated for non-subscribers)
+L:	selinux@vger.kernel.org
 W:	https://selinuxproject.org
 W:	https://github.com/SELinuxProject
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/selinux.git
@@ -15745,7 +15745,7 @@ F:	include/linux/regulator/
 
 VRF
 M:	David Ahern <dsa@cumulusnetworks.com>
-M:	Shrijeet Mukherjee <shm@cumulusnetworks.com>
+M:	Shrijeet Mukherjee <shrijeet@gmail.com>
 L:	netdev@vger.kernel.org
 S:	Maintained
 F:	drivers/net/vrf.c
diff --git a/Makefile b/Makefile
index e8b599b4dcde..bf3786e4ffec 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 4
 PATCHLEVEL = 19
 SUBLEVEL = 0
-EXTRAVERSION = -rc7
+EXTRAVERSION = -rc8
 NAME = Merciless Moray
 
 # *DOCUMENTATION*
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index 450c7a4fbc8a..cb094e55dc5f 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -478,15 +478,15 @@ static const struct coproc_reg cp15_regs[] = {
 
 	/* ICC_SGI1R */
 	{ CRm64(12), Op1( 0), is64, access_gic_sgi},
-	/* ICC_ASGI1R */
-	{ CRm64(12), Op1( 1), is64, access_gic_sgi},
-	/* ICC_SGI0R */
-	{ CRm64(12), Op1( 2), is64, access_gic_sgi},
 
 	/* VBAR: swapped by interrupt.S. */
 	{ CRn(12), CRm( 0), Op1( 0), Op2( 0), is32,
 			NULL, reset_val, c12_VBAR, 0x00000000 },
 
+	/* ICC_ASGI1R */
+	{ CRm64(12), Op1( 1), is64, access_gic_sgi},
+	/* ICC_SGI0R */
+	{ CRm64(12), Op1( 2), is64, access_gic_sgi},
 	/* ICC_SRE */
 	{ CRn(12), CRm(12), Op1( 0), Op2(5), is32, access_gic_sre },
 
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 8e38d5267f22..e213f8e867f6 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -966,6 +966,12 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
 	return 0;
 }
 
+static int armv8pmu_filter_match(struct perf_event *event)
+{
+	unsigned long evtype = event->hw.config_base & ARMV8_PMU_EVTYPE_EVENT;
+	return evtype != ARMV8_PMUV3_PERFCTR_CHAIN;
+}
+
 static void armv8pmu_reset(void *info)
 {
 	struct arm_pmu *cpu_pmu = (struct arm_pmu *)info;
@@ -1114,6 +1120,7 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu)
 	cpu_pmu->stop			= armv8pmu_stop,
 	cpu_pmu->reset			= armv8pmu_reset,
 	cpu_pmu->set_event_filter	= armv8pmu_set_event_filter;
+	cpu_pmu->filter_match		= armv8pmu_filter_match;
 
 	return 0;
 }
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 5b4fac434c84..b3354ff94e79 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -64,6 +64,9 @@
 #include <asm/xen/hypervisor.h>
 #include <asm/mmu_context.h>
 
+static int num_standard_resources;
+static struct resource *standard_resources;
+
 phys_addr_t __fdt_pointer __initdata;
 
 /*
@@ -206,14 +209,19 @@ static void __init request_standard_resources(void)
 {
 	struct memblock_region *region;
 	struct resource *res;
+	unsigned long i = 0;
 
 	kernel_code.start   = __pa_symbol(_text);
 	kernel_code.end     = __pa_symbol(__init_begin - 1);
 	kernel_data.start   = __pa_symbol(_sdata);
 	kernel_data.end     = __pa_symbol(_end - 1);
 
+	num_standard_resources = memblock.memory.cnt;
+	standard_resources = alloc_bootmem_low(num_standard_resources *
+					       sizeof(*standard_resources));
+
 	for_each_memblock(memory, region) {
-		res = alloc_bootmem_low(sizeof(*res));
+		res = &standard_resources[i++];
 		if (memblock_is_nomap(region)) {
 			res->name  = "reserved";
 			res->flags = IORESOURCE_MEM;
@@ -243,36 +251,26 @@ static void __init request_standard_resources(void)
 
 static int __init reserve_memblock_reserved_regions(void)
 {
-	phys_addr_t start, end, roundup_end = 0;
-	struct resource *mem, *res;
-	u64 i;
-
-	for_each_reserved_mem_region(i, &start, &end) {
-		if (end <= roundup_end)
-			continue; /* done already */
-
-		start = __pfn_to_phys(PFN_DOWN(start));
-		end = __pfn_to_phys(PFN_UP(end)) - 1;
-		roundup_end = end;
-
-		res = kzalloc(sizeof(*res), GFP_ATOMIC);
-		if (WARN_ON(!res))
-			return -ENOMEM;
-		res->start = start;
-		res->end = end;
-		res->name  = "reserved";
-		res->flags = IORESOURCE_MEM;
-
-		mem = request_resource_conflict(&iomem_resource, res);
-		/*
-		 * We expected memblock_reserve() regions to conflict with
-		 * memory created by request_standard_resources().
-		 */
-		if (WARN_ON_ONCE(!mem))
+	u64 i, j;
+
+	for (i = 0; i < num_standard_resources; ++i) {
+		struct resource *mem = &standard_resources[i];
+		phys_addr_t r_start, r_end, mem_size = resource_size(mem);
+
+		if (!memblock_is_region_reserved(mem->start, mem_size))
 			continue;
-		kfree(res);
 
-		reserve_region_with_split(mem, start, end, "reserved");
+		for_each_reserved_mem_region(j, &r_start, &r_end) {
+			resource_size_t start, end;
+
+			start = max(PFN_PHYS(PFN_DOWN(r_start)), mem->start);
+			end = min(PFN_PHYS(PFN_UP(r_end)) - 1, mem->end);
+
+			if (start > mem->end || end < mem->start)
+				continue;
+
+			reserve_region_with_split(mem, start, end, "reserved");
+		}
 	}
 
 	return 0;
diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c
index f329b466e68f..2d14f17838d2 100644
--- a/arch/parisc/kernel/unwind.c
+++ b/arch/parisc/kernel/unwind.c
@@ -426,7 +426,7 @@ void unwind_frame_init_task(struct unwind_frame_info *info,
 			r.gr[30] = get_parisc_stackpointer();
 			regs = &r;
 		}
-		unwind_frame_init(info, task, &r);
+		unwind_frame_init(info, task, regs);
 	} else {
 		unwind_frame_init_from_blocked_task(info, task);
 	}
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 2fdc865ca374..2a2486526d1f 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -114,7 +114,7 @@
  */
 #define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
 			 _PAGE_ACCESSED | H_PAGE_THP_HUGE | _PAGE_PTE | \
-			 _PAGE_SOFT_DIRTY)
+			 _PAGE_SOFT_DIRTY | _PAGE_DEVMAP)
 /*
  * user access blocked by key
  */
@@ -132,7 +132,7 @@
  */
 #define _PAGE_CHG_MASK	(PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
 			 _PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE |	\
-			 _PAGE_SOFT_DIRTY)
+			 _PAGE_SOFT_DIRTY | _PAGE_DEVMAP)
 
 #define H_PTE_PKEY  (H_PTE_PKEY_BIT0 | H_PTE_PKEY_BIT1 | H_PTE_PKEY_BIT2 | \
 		     H_PTE_PKEY_BIT3 | H_PTE_PKEY_BIT4)
diff --git a/arch/sparc/include/asm/cpudata_64.h b/arch/sparc/include/asm/cpudata_64.h
index 666d6b5c0440..9c3fc03abe9a 100644
--- a/arch/sparc/include/asm/cpudata_64.h
+++ b/arch/sparc/include/asm/cpudata_64.h
@@ -28,7 +28,7 @@ typedef struct {
 	unsigned short	sock_id;	/* physical package */
 	unsigned short	core_id;
 	unsigned short  max_cache_id;	/* groupings of highest shared cache */
-	unsigned short	proc_id;	/* strand (aka HW thread) id */
+	signed short	proc_id;	/* strand (aka HW thread) id */
 } cpuinfo_sparc;
 
 DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data);
diff --git a/arch/sparc/include/uapi/asm/unistd.h b/arch/sparc/include/uapi/asm/unistd.h
index 09acf0ddec10..45b4bf1875e6 100644
--- a/arch/sparc/include/uapi/asm/unistd.h
+++ b/arch/sparc/include/uapi/asm/unistd.h
@@ -427,8 +427,9 @@
 #define __NR_preadv2		358
 #define __NR_pwritev2		359
 #define __NR_statx		360
+#define __NR_io_pgetevents	361
 
-#define NR_syscalls		361
+#define NR_syscalls		362
 
 /* Bitmask values returned from kern_features system call.  */
 #define KERN_FEATURE_MIXED_MODE_STACK	0x00000001
diff --git a/arch/sparc/kernel/auxio_64.c b/arch/sparc/kernel/auxio_64.c
index cc42225c20f3..4e8f56c3793c 100644
--- a/arch/sparc/kernel/auxio_64.c
+++ b/arch/sparc/kernel/auxio_64.c
@@ -115,8 +115,8 @@ static int auxio_probe(struct platform_device *dev)
 		auxio_devtype = AUXIO_TYPE_SBUS;
 		size = 1;
 	} else {
-		printk("auxio: Unknown parent bus type [%pOFn]\n",
-		       dp->parent);
+		printk("auxio: Unknown parent bus type [%s]\n",
+		       dp->parent->name);
 		return -ENODEV;
 	}
 	auxio_register = of_ioremap(&dev->resource[0], 0, size, "auxio");
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index d3149baaa33c..67b3e6b3ce5d 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -24,6 +24,7 @@
 #include <asm/cpudata.h>
 #include <linux/uaccess.h>
 #include <linux/atomic.h>
+#include <linux/sched/clock.h>
 #include <asm/nmi.h>
 #include <asm/pcr.h>
 #include <asm/cacheflush.h>
@@ -927,6 +928,8 @@ static void read_in_all_counters(struct cpu_hw_events *cpuc)
 			sparc_perf_event_update(cp, &cp->hw,
 						cpuc->current_idx[i]);
 			cpuc->current_idx[i] = PIC_NO_INDEX;
+			if (cp->hw.state & PERF_HES_STOPPED)
+				cp->hw.state |= PERF_HES_ARCH;
 		}
 	}
 }
@@ -959,10 +962,12 @@ static void calculate_single_pcr(struct cpu_hw_events *cpuc)
 
 		enc = perf_event_get_enc(cpuc->events[i]);
 		cpuc->pcr[0] &= ~mask_for_index(idx);
-		if (hwc->state & PERF_HES_STOPPED)
+		if (hwc->state & PERF_HES_ARCH) {
 			cpuc->pcr[0] |= nop_for_index(idx);
-		else
+		} else {
 			cpuc->pcr[0] |= event_encoding(enc, idx);
+			hwc->state = 0;
+		}
 	}
 out:
 	cpuc->pcr[0] |= cpuc->event[0]->hw.config_base;
@@ -988,6 +993,9 @@ static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc)
 
 		cpuc->current_idx[i] = idx;
 
+		if (cp->hw.state & PERF_HES_ARCH)
+			continue;
+
 		sparc_pmu_start(cp, PERF_EF_RELOAD);
 	}
 out:
@@ -1079,6 +1087,8 @@ static void sparc_pmu_start(struct perf_event *event, int flags)
 	event->hw.state = 0;
 
 	sparc_pmu_enable_event(cpuc, &event->hw, idx);
+
+	perf_event_update_userpage(event);
 }
 
 static void sparc_pmu_stop(struct perf_event *event, int flags)
@@ -1371,9 +1381,9 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags)
 	cpuc->events[n0] = event->hw.event_base;
 	cpuc->current_idx[n0] = PIC_NO_INDEX;
 
-	event->hw.state = PERF_HES_UPTODATE;
+	event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
 	if (!(ef_flags & PERF_EF_START))
-		event->hw.state |= PERF_HES_STOPPED;
+		event->hw.state |= PERF_HES_ARCH;
 
 	/*
 	 * If group events scheduling transaction was started,
@@ -1603,6 +1613,8 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
 	struct perf_sample_data data;
 	struct cpu_hw_events *cpuc;
 	struct pt_regs *regs;
+	u64 finish_clock;
+	u64 start_clock;
 	int i;
 
 	if (!atomic_read(&active_events))
@@ -1616,6 +1628,8 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
 		return NOTIFY_DONE;
 	}
 
+	start_clock = sched_clock();
+
 	regs = args->regs;
 
 	cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -1654,6 +1668,10 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
 			sparc_pmu_stop(event, 0);
 	}
 
+	finish_clock = sched_clock();
+
+	perf_sample_event_took(finish_clock - start_clock);
+
 	return NOTIFY_STOP;
 }
 
diff --git a/arch/sparc/kernel/power.c b/arch/sparc/kernel/power.c
index d941875dd718..92627abce311 100644
--- a/arch/sparc/kernel/power.c
+++ b/arch/sparc/kernel/power.c
@@ -41,8 +41,8 @@ static int power_probe(struct platform_device *op)
 
 	power_reg = of_ioremap(res, 0, 0x4, "power");
 
-	printk(KERN_INFO "%pOFn: Control reg at %llx\n",
-	       op->dev.of_node, res->start);
+	printk(KERN_INFO "%s: Control reg at %llx\n",
+	       op->dev.of_node->name, res->start);
 
 	if (has_button_interrupt(irq, op->dev.of_node)) {
 		if (request_irq(irq,
diff --git a/arch/sparc/kernel/prom_32.c b/arch/sparc/kernel/prom_32.c
index 17c87d29ff20..b51cbb9e87dc 100644
--- a/arch/sparc/kernel/prom_32.c
+++ b/arch/sparc/kernel/prom_32.c
@@ -68,8 +68,8 @@ static void __init sparc32_path_component(struct device_node *dp, char *tmp_buf)
 		return;
 
 	regs = rprop->value;
-	sprintf(tmp_buf, "%pOFn@%x,%x",
-		dp,
+	sprintf(tmp_buf, "%s@%x,%x",
+		dp->name,
 		regs->which_io, regs->phys_addr);
 }
 
@@ -84,8 +84,8 @@ static void __init sbus_path_component(struct device_node *dp, char *tmp_buf)
 		return;
 
 	regs = prop->value;
-	sprintf(tmp_buf, "%pOFn@%x,%x",
-		dp,
+	sprintf(tmp_buf, "%s@%x,%x",
+		dp->name,
 		regs->which_io,
 		regs->phys_addr);
 }
@@ -104,13 +104,13 @@ static void __init pci_path_component(struct device_node *dp, char *tmp_buf)
 	regs = prop->value;
 	devfn = (regs->phys_hi >> 8) & 0xff;
 	if (devfn & 0x07) {
-		sprintf(tmp_buf, "%pOFn@%x,%x",
-			dp,
+		sprintf(tmp_buf, "%s@%x,%x",
+			dp->name,
 			devfn >> 3,
 			devfn & 0x07);
 	} else {
-		sprintf(tmp_buf, "%pOFn@%x",
-			dp,
+		sprintf(tmp_buf, "%s@%x",
+			dp->name,
 			devfn >> 3);
 	}
 }
@@ -127,8 +127,8 @@ static void __init ebus_path_component(struct device_node *dp, char *tmp_buf)
 
 	regs = prop->value;
 
-	sprintf(tmp_buf, "%pOFn@%x,%x",
-		dp,
+	sprintf(tmp_buf, "%s@%x,%x",
+		dp->name,
 		regs->which_io, regs->phys_addr);
 }
 
@@ -167,8 +167,8 @@ static void __init ambapp_path_component(struct device_node *dp, char *tmp_buf)
 		return;
 	device = prop->value;
 
-	sprintf(tmp_buf, "%pOFn:%d:%d@%x,%x",
-		dp, *vendor, *device,
+	sprintf(tmp_buf, "%s:%d:%d@%x,%x",
+		dp->name, *vendor, *device,
 		*intr, reg0);
 }
 
@@ -201,7 +201,7 @@ char * __init build_path_component(struct device_node *dp)
 	tmp_buf[0] = '\0';
 	__build_path_component(dp, tmp_buf);
 	if (tmp_buf[0] == '\0')
-		snprintf(tmp_buf, sizeof(tmp_buf), "%pOFn", dp);
+		strcpy(tmp_buf, dp->name);
 
 	n = prom_early_alloc(strlen(tmp_buf) + 1);
 	strcpy(n, tmp_buf);
diff --git a/arch/sparc/kernel/prom_64.c b/arch/sparc/kernel/prom_64.c
index 6220411ce8fc..baeaeed64993 100644
--- a/arch/sparc/kernel/prom_64.c
+++ b/arch/sparc/kernel/prom_64.c
@@ -82,8 +82,8 @@ static void __init sun4v_path_component(struct device_node *dp, char *tmp_buf)
 
 	regs = rprop->value;
 	if (!of_node_is_root(dp->parent)) {
-		sprintf(tmp_buf, "%pOFn@%x,%x",
-			dp,
+		sprintf(tmp_buf, "%s@%x,%x",
+			dp->name,
 			(unsigned int) (regs->phys_addr >> 32UL),
 			(unsigned int) (regs->phys_addr & 0xffffffffUL));
 		return;
@@ -97,17 +97,17 @@ static void __init sun4v_path_component(struct device_node *dp, char *tmp_buf)
 		const char *prefix = (type == 0) ? "m" : "i";
 
 		if (low_bits)
-			sprintf(tmp_buf, "%pOFn@%s%x,%x",
-				dp, prefix,
+			sprintf(tmp_buf, "%s@%s%x,%x",
+				dp->name, prefix,
 				high_bits, low_bits);
 		else
-			sprintf(tmp_buf, "%pOFn@%s%x",
-				dp,
+			sprintf(tmp_buf, "%s@%s%x",
+				dp->name,
 				prefix,
 				high_bits);
 	} else if (type == 12) {
-		sprintf(tmp_buf, "%pOFn@%x",
-			dp, high_bits);
+		sprintf(tmp_buf, "%s@%x",
+			dp->name, high_bits);
 	}
 }
 
@@ -122,8 +122,8 @@ static void __init sun4u_path_component(struct device_node *dp, char *tmp_buf)
 
 	regs = prop->value;
 	if (!of_node_is_root(dp->parent)) {
-		sprintf(tmp_buf, "%pOFn@%x,%x",
-			dp,
+		sprintf(tmp_buf, "%s@%x,%x",
+			dp->name,
 			(unsigned int) (regs->phys_addr >> 32UL),
 			(unsigned int) (regs->phys_addr & 0xffffffffUL));
 		return;
@@ -138,8 +138,8 @@ static void __init sun4u_path_component(struct device_node *dp, char *tmp_buf)
 		if (tlb_type >= cheetah)
 			mask = 0x7fffff;
 
-		sprintf(tmp_buf, "%pOFn@%x,%x",
-			dp,
+		sprintf(tmp_buf, "%s@%x,%x",
+			dp->name,
 			*(u32 *)prop->value,
 			(unsigned int) (regs->phys_addr & mask));
 	}
@@ -156,8 +156,8 @@ static void __init sbus_path_component(struct device_node *dp, char *tmp_buf)
 		return;
 
 	regs = prop->value;
-	sprintf(tmp_buf, "%pOFn@%x,%x",
-		dp,
+	sprintf(tmp_buf, "%s@%x,%x",
+		dp->name,
 		regs->which_io,
 		regs->phys_addr);
 }
@@ -176,13 +176,13 @@ static void __init pci_path_component(struct device_node *dp, char *tmp_buf)
 	regs = prop->value;
 	devfn = (regs->phys_hi >> 8) & 0xff;
 	if (devfn & 0x07) {
-		sprintf(tmp_buf, "%pOFn@%x,%x",
-			dp,
+		sprintf(tmp_buf, "%s@%x,%x",
+			dp->name,
 			devfn >> 3,
 			devfn & 0x07);
 	} else {
-		sprintf(tmp_buf, "%pOFn@%x",
-			dp,
+		sprintf(tmp_buf, "%s@%x",
+			dp->name,
 			devfn >> 3);
 	}
 }
@@ -203,8 +203,8 @@ static void __init upa_path_component(struct device_node *dp, char *tmp_buf)
 	if (!prop)
 		return;
 
-	sprintf(tmp_buf, "%pOFn@%x,%x",
-		dp,
+	sprintf(tmp_buf, "%s@%x,%x",
+		dp->name,
 		*(u32 *) prop->value,
 		(unsigned int) (regs->phys_addr & 0xffffffffUL));
 }
@@ -221,7 +221,7 @@ static void __init vdev_path_component(struct device_node *dp, char *tmp_buf)
 
 	regs = prop->value;
 
-	sprintf(tmp_buf, "%pOFn@%x", dp, *regs);
+	sprintf(tmp_buf, "%s@%x", dp->name, *regs);
 }
 
 /* "name@addrhi,addrlo" */
@@ -236,8 +236,8 @@ static void __init ebus_path_component(struct device_node *dp, char *tmp_buf)
 
 	regs = prop->value;
 
-	sprintf(tmp_buf, "%pOFn@%x,%x",
-		dp,
+	sprintf(tmp_buf, "%s@%x,%x",
+		dp->name,
 		(unsigned int) (regs->phys_addr >> 32UL),
 		(unsigned int) (regs->phys_addr & 0xffffffffUL));
 }
@@ -257,8 +257,8 @@ static void __init i2c_path_component(struct device_node *dp, char *tmp_buf)
 	/* This actually isn't right... should look at the #address-cells
 	 * property of the i2c bus node etc. etc.
 	 */
-	sprintf(tmp_buf, "%pOFn@%x,%x",
-		dp, regs[0], regs[1]);
+	sprintf(tmp_buf, "%s@%x,%x",
+		dp->name, regs[0], regs[1]);
 }
 
 /* "name@reg0[,reg1]" */
@@ -274,11 +274,11 @@ static void __init usb_path_component(struct device_node *dp, char *tmp_buf)
 	regs = prop->value;
 
 	if (prop->length == sizeof(u32) || regs[1] == 1) {
-		sprintf(tmp_buf, "%pOFn@%x",
-			dp, regs[0]);
+		sprintf(tmp_buf, "%s@%x",
+			dp->name, regs[0]);
 	} else {
-		sprintf(tmp_buf, "%pOFn@%x,%x",
-			dp, regs[0], regs[1]);
+		sprintf(tmp_buf, "%s@%x,%x",
+			dp->name, regs[0], regs[1]);
 	}
 }
 
@@ -295,11 +295,11 @@ static void __init ieee1394_path_component(struct device_node *dp, char *tmp_buf
 	regs = prop->value;
 
 	if (regs[2] || regs[3]) {
-		sprintf(tmp_buf, "%pOFn@%08x%08x,%04x%08x",
-			dp, regs[0], regs[1], regs[2], regs[3]);
+		sprintf(tmp_buf, "%s@%08x%08x,%04x%08x",
+			dp->name, regs[0], regs[1], regs[2], regs[3]);
 	} else {
-		sprintf(tmp_buf, "%pOFn@%08x%08x",
-			dp, regs[0], regs[1]);
+		sprintf(tmp_buf, "%s@%08x%08x",
+			dp->name, regs[0], regs[1]);
 	}
 }
 
@@ -361,7 +361,7 @@ char * __init build_path_component(struct device_node *dp)
 	tmp_buf[0] = '\0';
 	__build_path_component(dp, tmp_buf);
 	if (tmp_buf[0] == '\0')
-		snprintf(tmp_buf, sizeof(tmp_buf), "%pOFn", dp);
+		strcpy(tmp_buf, dp->name);
 
 	n = prom_early_alloc(strlen(tmp_buf) + 1);
 	strcpy(n, tmp_buf);
diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S
index f6528884a2c8..4073e2b87dd0 100644
--- a/arch/sparc/kernel/rtrap_64.S
+++ b/arch/sparc/kernel/rtrap_64.S
@@ -84,8 +84,9 @@ __handle_signal:
 		ldx			[%sp + PTREGS_OFF + PT_V9_TSTATE], %l1
 		sethi			%hi(0xf << 20), %l4
 		and			%l1, %l4, %l4
+		andn			%l1, %l4, %l1
 		ba,pt			%xcc, __handle_preemption_continue
-		 andn			%l1, %l4, %l1
+		 srl			%l4, 20, %l4
 
 		/* When returning from a NMI (%pil==15) interrupt we want to
 		 * avoid running softirqs, doing IRQ tracing, preempting, etc.
diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S
index 12bee14b552c..621a363098ec 100644
--- a/arch/sparc/kernel/systbls_32.S
+++ b/arch/sparc/kernel/systbls_32.S
@@ -90,4 +90,4 @@ sys_call_table:
 /*345*/	.long sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf
 /*350*/	.long sys_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen
 /*355*/	.long sys_setsockopt, sys_mlock2, sys_copy_file_range, sys_preadv2, sys_pwritev2
-/*360*/	.long sys_statx
+/*360*/	.long sys_statx, sys_io_pgetevents
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index 387ef993880a..bb68c805b891 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -91,7 +91,7 @@ sys_call_table32:
 	.word sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf
 /*350*/	.word sys32_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen
 	.word compat_sys_setsockopt, sys_mlock2, sys_copy_file_range, compat_sys_preadv2, compat_sys_pwritev2
-/*360*/	.word sys_statx
+/*360*/	.word sys_statx, compat_sys_io_pgetevents
 
 #endif /* CONFIG_COMPAT */
 
@@ -173,4 +173,4 @@ sys_call_table:
 	.word sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf
 /*350*/	.word sys64_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen
 	.word sys_setsockopt, sys_mlock2, sys_copy_file_range, sys_preadv2, sys_pwritev2
-/*360*/	.word sys_statx
+/*360*/	.word sys_statx, sys_io_pgetevents
diff --git a/arch/sparc/vdso/vclock_gettime.c b/arch/sparc/vdso/vclock_gettime.c
index 3feb3d960ca5..75dca9aab737 100644
--- a/arch/sparc/vdso/vclock_gettime.c
+++ b/arch/sparc/vdso/vclock_gettime.c
@@ -33,9 +33,19 @@
 #define	TICK_PRIV_BIT	(1ULL << 63)
 #endif
 
+#ifdef	CONFIG_SPARC64
 #define SYSCALL_STRING							\
 	"ta	0x6d;"							\
-	"sub	%%g0, %%o0, %%o0;"					\
+	"bcs,a	1f;"							\
+	" sub	%%g0, %%o0, %%o0;"					\
+	"1:"
+#else
+#define SYSCALL_STRING							\
+	"ta	0x10;"							\
+	"bcs,a	1f;"							\
+	" sub	%%g0, %%o0, %%o0;"					\
+	"1:"
+#endif
 
 #define SYSCALL_CLOBBERS						\
 	"f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7",			\
diff --git a/arch/sparc/vdso/vma.c b/arch/sparc/vdso/vma.c
index f51595f861b8..5eaff3c1aa0c 100644
--- a/arch/sparc/vdso/vma.c
+++ b/arch/sparc/vdso/vma.c
@@ -262,7 +262,9 @@ static __init int vdso_setup(char *s)
 	unsigned long val;
 
 	err = kstrtoul(s, 10, &val);
+	if (err)
+		return err;
 	vdso_enabled = val;
-	return err;
+	return 0;
 }
 __setup("vdso=", vdso_setup);
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 28764dacf018..466f66c8a7f8 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -37,6 +37,7 @@ KBUILD_CFLAGS += $(call cc-option,-ffreestanding)
 KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector)
 KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
 KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
+KBUILD_CFLAGS += -Wno-pointer-sign
 
 KBUILD_AFLAGS  := $(KBUILD_CFLAGS) -D__ASSEMBLY__
 GCOV_PROFILE := n
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 2767c625a52c..fbbf1ba57ec6 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -389,6 +389,13 @@
 	 * that register for the time this macro runs
 	 */
 
+	/*
+	 * The high bits of the CS dword (__csh) are used for
+	 * CS_FROM_ENTRY_STACK and CS_FROM_USER_CR3. Clear them in case
+	 * hardware didn't do this for us.
+	 */
+	andl	$(0x0000ffff), PT_CS(%esp)
+
 	/* Are we on the entry stack? Bail out if not! */
 	movl	PER_CPU_VAR(cpu_entry_area), %ecx
 	addl	$CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
@@ -407,12 +414,6 @@
 	/* Load top of task-stack into %edi */
 	movl	TSS_entry2task_stack(%edi), %edi
 
-	/*
-	 * Clear unused upper bits of the dword containing the word-sized CS
-	 * slot in pt_regs in case hardware didn't clear it for us.
-	 */
-	andl	$(0x0000ffff), PT_CS(%esp)
-
 	/* Special case - entry from kernel mode via entry stack */
 #ifdef CONFIG_VM86
 	movl	PT_EFLAGS(%esp), %ecx		# mix EFLAGS and CS
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 957dfb693ecc..f95dcb209fdf 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1187,6 +1187,16 @@ ENTRY(paranoid_entry)
 	xorl	%ebx, %ebx
 
 1:
+	/*
+	 * Always stash CR3 in %r14.  This value will be restored,
+	 * verbatim, at exit.  Needed if paranoid_entry interrupted
+	 * another entry that already switched to the user CR3 value
+	 * but has not yet returned to userspace.
+	 *
+	 * This is also why CS (stashed in the "iret frame" by the
+	 * hardware at entry) can not be used: this may be a return
+	 * to kernel code, but with a user CR3 value.
+	 */
 	SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
 
 	ret
@@ -1211,11 +1221,13 @@ ENTRY(paranoid_exit)
 	testl	%ebx, %ebx			/* swapgs needed? */
 	jnz	.Lparanoid_exit_no_swapgs
 	TRACE_IRQS_IRETQ
+	/* Always restore stashed CR3 value (see paranoid_entry) */
 	RESTORE_CR3	scratch_reg=%rbx save_reg=%r14
 	SWAPGS_UNSAFE_STACK
 	jmp	.Lparanoid_exit_restore
 .Lparanoid_exit_no_swapgs:
 	TRACE_IRQS_IRETQ_DEBUG
+	/* Always restore stashed CR3 value (see paranoid_entry) */
 	RESTORE_CR3	scratch_reg=%rbx save_reg=%r14
 .Lparanoid_exit_restore:
 	jmp restore_regs_and_return_to_kernel
@@ -1626,6 +1638,7 @@ end_repeat_nmi:
 	movq	$-1, %rsi
 	call	do_nmi
 
+	/* Always restore stashed CR3 value (see paranoid_entry) */
 	RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
 
 	testl	%ebx, %ebx			/* swapgs needed? */
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index a38bf5a1e37a..69dcdf195b61 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -528,7 +528,7 @@ static inline void fpregs_activate(struct fpu *fpu)
 static inline void
 switch_fpu_prepare(struct fpu *old_fpu, int cpu)
 {
-	if (old_fpu->initialized) {
+	if (static_cpu_has(X86_FEATURE_FPU) && old_fpu->initialized) {
 		if (!copy_fpregs_to_fpstate(old_fpu))
 			old_fpu->last_cpu = -1;
 		else
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index e9202a0de8f0..1a19d11cfbbd 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -185,22 +185,22 @@ do {									\
 	typeof(var) pfo_ret__;				\
 	switch (sizeof(var)) {				\
 	case 1:						\
-		asm(op "b "__percpu_arg(1)",%0"		\
+		asm volatile(op "b "__percpu_arg(1)",%0"\
 		    : "=q" (pfo_ret__)			\
 		    : "m" (var));			\
 		break;					\
 	case 2:						\
-		asm(op "w "__percpu_arg(1)",%0"		\
+		asm volatile(op "w "__percpu_arg(1)",%0"\
 		    : "=r" (pfo_ret__)			\
 		    : "m" (var));			\
 		break;					\
 	case 4:						\
-		asm(op "l "__percpu_arg(1)",%0"		\
+		asm volatile(op "l "__percpu_arg(1)",%0"\
 		    : "=r" (pfo_ret__)			\
 		    : "m" (var));			\
 		break;					\
 	case 8:						\
-		asm(op "q "__percpu_arg(1)",%0"		\
+		asm volatile(op "q "__percpu_arg(1)",%0"\
 		    : "=r" (pfo_ret__)			\
 		    : "m" (var));			\
 		break;					\
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index b64acb08a62b..106b7d0e2dae 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -124,7 +124,7 @@
  */
 #define _PAGE_CHG_MASK	(PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT |		\
 			 _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY |	\
-			 _PAGE_SOFT_DIRTY)
+			 _PAGE_SOFT_DIRTY | _PAGE_DEVMAP)
 #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE)
 
 /*
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 23f1691670b6..61a949d84dfa 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -314,7 +314,6 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 		 * thread's fpu state, reconstruct fxstate from the fsave
 		 * header. Validate and sanitize the copied state.
 		 */
-		struct fpu *fpu = &tsk->thread.fpu;
 		struct user_i387_ia32_struct env;
 		int err = 0;
 
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 661583662430..71c0b01d93b1 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -42,10 +42,8 @@ IOMMU_INIT_FINISH(pci_swiotlb_detect_override,
 int __init pci_swiotlb_detect_4gb(void)
 {
 	/* don't initialize swiotlb if iommu=off (no_iommu=1) */
-#ifdef CONFIG_X86_64
 	if (!no_iommu && max_possible_pfn > MAX_DMA32_PFN)
 		swiotlb = 1;
-#endif
 
 	/*
 	 * If SME is active then swiotlb will be set to 1 so that bounce
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index be01328eb755..fddaefc51fb6 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -25,7 +25,7 @@
 #include <asm/time.h>
 
 #ifdef CONFIG_X86_64
-__visible volatile unsigned long jiffies __cacheline_aligned = INITIAL_JIFFIES;
+__visible volatile unsigned long jiffies __cacheline_aligned_in_smp = INITIAL_JIFFIES;
 #endif
 
 unsigned long profile_pc(struct pt_regs *regs)
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index b52bd2b6cdb4..6d5dc5dabfd7 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -58,7 +58,7 @@ struct cyc2ns {
 
 static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns);
 
-void cyc2ns_read_begin(struct cyc2ns_data *data)
+void __always_inline cyc2ns_read_begin(struct cyc2ns_data *data)
 {
 	int seq, idx;
 
@@ -75,7 +75,7 @@ void cyc2ns_read_begin(struct cyc2ns_data *data)
 	} while (unlikely(seq != this_cpu_read(cyc2ns.seq.sequence)));
 }
 
-void cyc2ns_read_end(void)
+void __always_inline cyc2ns_read_end(void)
 {
 	preempt_enable_notrace();
 }
@@ -104,7 +104,7 @@ void cyc2ns_read_end(void)
  *                      -johnstul@us.ibm.com "math is hard, lets go shopping!"
  */
 
-static inline unsigned long long cycles_2_ns(unsigned long long cyc)
+static __always_inline unsigned long long cycles_2_ns(unsigned long long cyc)
 {
 	struct cyc2ns_data data;
 	unsigned long long ns;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d96092b35936..61ccfb13899e 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -436,14 +436,18 @@ static inline struct kvm_svm *to_kvm_svm(struct kvm *kvm)
 
 static inline bool svm_sev_enabled(void)
 {
-	return max_sev_asid;
+	return IS_ENABLED(CONFIG_KVM_AMD_SEV) ? max_sev_asid : 0;
 }
 
 static inline bool sev_guest(struct kvm *kvm)
 {
+#ifdef CONFIG_KVM_AMD_SEV
 	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
 
 	return sev->active;
+#else
+	return false;
+#endif
 }
 
 static inline int sev_get_asid(struct kvm *kvm)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 612fd17be635..e665aa7167cf 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1572,8 +1572,12 @@ static int vmx_hv_remote_flush_tlb(struct kvm *kvm)
 		goto out;
 	}
 
+	/*
+	 * FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE hypercall needs the address of the
+	 * base of EPT PML4 table, strip off EPT configuration information.
+	 */
 	ret = hyperv_flush_guest_mapping(
-			to_vmx(kvm_get_vcpu(kvm, 0))->ept_pointer);
+			to_vmx(kvm_get_vcpu(kvm, 0))->ept_pointer & PAGE_MASK);
 
 out:
 	spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
diff --git a/block/blk-lib.c b/block/blk-lib.c
index d1b9dd03da25..bbd44666f2b5 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -29,9 +29,7 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 {
 	struct request_queue *q = bdev_get_queue(bdev);
 	struct bio *bio = *biop;
-	unsigned int granularity;
 	unsigned int op;
-	int alignment;
 	sector_t bs_mask;
 
 	if (!q)
@@ -54,38 +52,16 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 	if ((sector | nr_sects) & bs_mask)
 		return -EINVAL;
 
-	/* Zero-sector (unknown) and one-sector granularities are the same.  */
-	granularity = max(q->limits.discard_granularity >> 9, 1U);
-	alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;
-
 	while (nr_sects) {
-		unsigned int req_sects;
-		sector_t end_sect, tmp;
+		unsigned int req_sects = nr_sects;
+		sector_t end_sect;
 
-		/*
-		 * Issue in chunks of the user defined max discard setting,
-		 * ensuring that bi_size doesn't overflow
-		 */
-		req_sects = min_t(sector_t, nr_sects,
-					q->limits.max_discard_sectors);
 		if (!req_sects)
 			goto fail;
 		if (req_sects > UINT_MAX >> 9)
 			req_sects = UINT_MAX >> 9;
 
-		/*
-		 * If splitting a request, and the next starting sector would be
-		 * misaligned, stop the discard at the previous aligned sector.
-		 */
 		end_sect = sector + req_sects;
-		tmp = end_sect;
-		if (req_sects < nr_sects &&
-		    sector_div(tmp, granularity) != alignment) {
-			end_sect = end_sect - alignment;
-			sector_div(end_sect, granularity);
-			end_sect = end_sect * granularity + alignment;
-			req_sects = end_sect - sector;
-		}
 
 		bio = next_bio(bio, 0, gfp_mask);
 		bio->bi_iter.bi_sector = sector;
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 8e20a0677dcf..8ac93fcbaa2e 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -310,6 +310,7 @@ static void scale_up(struct rq_wb *rwb)
 	rq_depth_scale_up(&rwb->rq_depth);
 	calc_wb_limits(rwb);
 	rwb->unknown_cnt = 0;
+	rwb_wake_all(rwb);
 	rwb_trace_step(rwb, "scale up");
 }
 
@@ -318,7 +319,6 @@ static void scale_down(struct rq_wb *rwb, bool hard_throttle)
 	rq_depth_scale_down(&rwb->rq_depth, hard_throttle);
 	calc_wb_limits(rwb);
 	rwb->unknown_cnt = 0;
-	rwb_wake_all(rwb);
 	rwb_trace_step(rwb, "scale down");
 }
 
diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c
index 6470e3c4c990..f8c703426c90 100644
--- a/drivers/atm/eni.c
+++ b/drivers/atm/eni.c
@@ -241,7 +241,8 @@ static void __iomem *eni_alloc_mem(struct eni_dev *eni_dev, unsigned long *size)
 	len = eni_dev->free_len;
 	if (*size < MID_MIN_BUF_SIZE) *size = MID_MIN_BUF_SIZE;
 	if (*size > MID_MAX_BUF_SIZE) return NULL;
-	for (order = 0; (1 << order) < *size; order++);
+	for (order = 0; (1 << order) < *size; order++)
+		;
 	DPRINTK("trying: %ld->%d\n",*size,order);
 	best_order = 65; /* we don't have more than 2^64 of anything ... */
 	index = 0; /* silence GCC */
diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c
index e89146ddede6..d5c76b50d357 100644
--- a/drivers/atm/zatm.c
+++ b/drivers/atm/zatm.c
@@ -126,7 +126,7 @@ static unsigned long dummy[2] = {0,0};
 #define zin_n(r) inl(zatm_dev->base+r*4)
 #define zin(r) inl(zatm_dev->base+uPD98401_##r*4)
 #define zout(v,r) outl(v,zatm_dev->base+uPD98401_##r*4)
-#define zwait while (zin(CMR) & uPD98401_BUSY)
+#define zwait() do {} while (zin(CMR) & uPD98401_BUSY)
 
 /* RX0, RX1, TX0, TX1 */
 static const int mbx_entries[NR_MBX] = { 1024,1024,1024,1024 };
@@ -140,7 +140,7 @@ static const int mbx_esize[NR_MBX] = { 16,16,4,4 }; /* entry size in bytes */
 
 static void zpokel(struct zatm_dev *zatm_dev,u32 value,u32 addr)
 {
-	zwait;
+	zwait();
 	zout(value,CER);
 	zout(uPD98401_IND_ACC | uPD98401_IA_BALL |
 	    (uPD98401_IA_TGT_CM << uPD98401_IA_TGT_SHIFT) | addr,CMR);
@@ -149,10 +149,10 @@ static void zpokel(struct zatm_dev *zatm_dev,u32 value,u32 addr)
 
 static u32 zpeekl(struct zatm_dev *zatm_dev,u32 addr)
 {
-	zwait;
+	zwait();
 	zout(uPD98401_IND_ACC | uPD98401_IA_BALL | uPD98401_IA_RW |
 	  (uPD98401_IA_TGT_CM << uPD98401_IA_TGT_SHIFT) | addr,CMR);
-	zwait;
+	zwait();
 	return zin(CER);
 }
 
@@ -241,7 +241,7 @@ static void refill_pool(struct atm_dev *dev,int pool)
 	}
 	if (first) {
 		spin_lock_irqsave(&zatm_dev->lock, flags);
-		zwait;
+		zwait();
 		zout(virt_to_bus(first),CER);
 		zout(uPD98401_ADD_BAT | (pool << uPD98401_POOL_SHIFT) | count,
 		    CMR);
@@ -508,9 +508,9 @@ static int open_rx_first(struct atm_vcc *vcc)
 	}
 	if (zatm_vcc->pool < 0) return -EMSGSIZE;
 	spin_lock_irqsave(&zatm_dev->lock, flags);
-	zwait;
+	zwait();
 	zout(uPD98401_OPEN_CHAN,CMR);
-	zwait;
+	zwait();
 	DPRINTK("0x%x 0x%x\n",zin(CMR),zin(CER));
 	chan = (zin(CMR) & uPD98401_CHAN_ADDR) >> uPD98401_CHAN_ADDR_SHIFT;
 	spin_unlock_irqrestore(&zatm_dev->lock, flags);
@@ -571,21 +571,21 @@ static void close_rx(struct atm_vcc *vcc)
 		pos = vcc->vci >> 1;
 		shift = (1-(vcc->vci & 1)) << 4;
 		zpokel(zatm_dev,zpeekl(zatm_dev,pos) & ~(0xffff << shift),pos);
-		zwait;
+		zwait();
 		zout(uPD98401_NOP,CMR);
-		zwait;
+		zwait();
 		zout(uPD98401_NOP,CMR);
 		spin_unlock_irqrestore(&zatm_dev->lock, flags);
 	}
 	spin_lock_irqsave(&zatm_dev->lock, flags);
-	zwait;
+	zwait();
 	zout(uPD98401_DEACT_CHAN | uPD98401_CHAN_RT | (zatm_vcc->rx_chan <<
 	    uPD98401_CHAN_ADDR_SHIFT),CMR);
-	zwait;
+	zwait();
 	udelay(10); /* why oh why ... ? */
 	zout(uPD98401_CLOSE_CHAN | uPD98401_CHAN_RT | (zatm_vcc->rx_chan <<
 	    uPD98401_CHAN_ADDR_SHIFT),CMR);
-	zwait;
+	zwait();
 	if (!(zin(CMR) & uPD98401_CHAN_ADDR))
 		printk(KERN_CRIT DEV_LABEL "(itf %d): can't close RX channel "
 		    "%d\n",vcc->dev->number,zatm_vcc->rx_chan);
@@ -699,7 +699,7 @@ printk("NONONONOO!!!!\n");
 	skb_queue_tail(&zatm_vcc->tx_queue,skb);
 	DPRINTK("QRP=0x%08lx\n",zpeekl(zatm_dev,zatm_vcc->tx_chan*VC_SIZE/4+
 	  uPD98401_TXVC_QRP));
-	zwait;
+	zwait();
 	zout(uPD98401_TX_READY | (zatm_vcc->tx_chan <<
 	    uPD98401_CHAN_ADDR_SHIFT),CMR);
 	spin_unlock_irqrestore(&zatm_dev->lock, flags);
@@ -891,12 +891,12 @@ static void close_tx(struct atm_vcc *vcc)
 	}
 	spin_lock_irqsave(&zatm_dev->lock, flags);
 #if 0
-	zwait;
+	zwait();
 	zout(uPD98401_DEACT_CHAN | (chan << uPD98401_CHAN_ADDR_SHIFT),CMR);
 #endif
-	zwait;
+	zwait();
 	zout(uPD98401_CLOSE_CHAN | (chan << uPD98401_CHAN_ADDR_SHIFT),CMR);
-	zwait;
+	zwait();
 	if (!(zin(CMR) & uPD98401_CHAN_ADDR))
 		printk(KERN_CRIT DEV_LABEL "(itf %d): can't close TX channel "
 		    "%d\n",vcc->dev->number,chan);
@@ -926,9 +926,9 @@ static int open_tx_first(struct atm_vcc *vcc)
 	zatm_vcc->tx_chan = 0;
 	if (vcc->qos.txtp.traffic_class == ATM_NONE) return 0;
 	spin_lock_irqsave(&zatm_dev->lock, flags);
-	zwait;
+	zwait();
 	zout(uPD98401_OPEN_CHAN,CMR);
-	zwait;
+	zwait();
 	DPRINTK("0x%x 0x%x\n",zin(CMR),zin(CER));
 	chan = (zin(CMR) & uPD98401_CHAN_ADDR) >> uPD98401_CHAN_ADDR_SHIFT;
 	spin_unlock_irqrestore(&zatm_dev->lock, flags);
@@ -1557,7 +1557,7 @@ static void zatm_phy_put(struct atm_dev *dev,unsigned char value,
 	struct zatm_dev *zatm_dev;
 
 	zatm_dev = ZATM_DEV(dev);
-	zwait;
+	zwait();
 	zout(value,CER);
 	zout(uPD98401_IND_ACC | uPD98401_IA_B0 |
 	    (uPD98401_IA_TGT_PHY << uPD98401_IA_TGT_SHIFT) | addr,CMR);
@@ -1569,10 +1569,10 @@ static unsigned char zatm_phy_get(struct atm_dev *dev,unsigned long addr)
 	struct zatm_dev *zatm_dev;
 
 	zatm_dev = ZATM_DEV(dev);
-	zwait;
+	zwait();
 	zout(uPD98401_IND_ACC | uPD98401_IA_B0 | uPD98401_IA_RW |
 	  (uPD98401_IA_TGT_PHY << uPD98401_IA_TGT_SHIFT) | addr,CMR);
-	zwait;
+	zwait();
 	return zin(CER) & 0xff;
 }
 
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 5ca56bfae63c..f68e9baffad7 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -36,6 +36,10 @@ MODULE_VERSION(DRV_MODULE_VERSION);
 #define VDC_TX_RING_SIZE	512
 #define VDC_DEFAULT_BLK_SIZE	512
 
+#define MAX_XFER_BLKS		(128 * 1024)
+#define MAX_XFER_SIZE		(MAX_XFER_BLKS / VDC_DEFAULT_BLK_SIZE)
+#define MAX_RING_COOKIES	((MAX_XFER_BLKS / PAGE_SIZE) + 2)
+
 #define WAITING_FOR_LINK_UP	0x01
 #define WAITING_FOR_TX_SPACE	0x02
 #define WAITING_FOR_GEN_CMD	0x04
@@ -450,7 +454,7 @@ static int __send_request(struct request *req)
 {
 	struct vdc_port *port = req->rq_disk->private_data;
 	struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
-	struct scatterlist sg[port->ring_cookies];
+	struct scatterlist sg[MAX_RING_COOKIES];
 	struct vdc_req_entry *rqe;
 	struct vio_disk_desc *desc;
 	unsigned int map_perm;
@@ -458,6 +462,9 @@ static int __send_request(struct request *req)
 	u64 len;
 	u8 op;
 
+	if (WARN_ON(port->ring_cookies > MAX_RING_COOKIES))
+		return -EINVAL;
+
 	map_perm = LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_IO;
 
 	if (rq_data_dir(req) == READ) {
@@ -984,9 +991,8 @@ static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 		goto err_out_free_port;
 
 	port->vdisk_block_size = VDC_DEFAULT_BLK_SIZE;
-	port->max_xfer_size = ((128 * 1024) / port->vdisk_block_size);
-	port->ring_cookies = ((port->max_xfer_size *
-			       port->vdisk_block_size) / PAGE_SIZE) + 2;
+	port->max_xfer_size = MAX_XFER_SIZE;
+	port->ring_cookies = MAX_RING_COOKIES;
 
 	err = vio_ldc_alloc(&port->vio, &vdc_ldc_cfg, port);
 	if (err)
diff --git a/drivers/bluetooth/btsdio.c b/drivers/bluetooth/btsdio.c
index 20142bc77554..282d1af1d3ba 100644
--- a/drivers/bluetooth/btsdio.c
+++ b/drivers/bluetooth/btsdio.c
@@ -293,13 +293,17 @@ static int btsdio_probe(struct sdio_func *func,
 		tuple = tuple->next;
 	}
 
-	/* BCM43341 devices soldered onto the PCB (non-removable) use an
-	 * uart connection for bluetooth, ignore the BT SDIO interface.
+	/* Broadcom devices soldered onto the PCB (non-removable) use an
+	 * UART connection for Bluetooth, ignore the BT SDIO interface.
 	 */
 	if (func->vendor == SDIO_VENDOR_ID_BROADCOM &&
-	    func->device == SDIO_DEVICE_ID_BROADCOM_43341 &&
-	    !mmc_card_is_removable(func->card->host))
-		return -ENODEV;
+	    !mmc_card_is_removable(func->card->host)) {
+		switch (func->device) {
+		case SDIO_DEVICE_ID_BROADCOM_43341:
+		case SDIO_DEVICE_ID_BROADCOM_43430:
+			return -ENODEV;
+		}
+	}
 
 	data = devm_kzalloc(&func->dev, sizeof(*data), GFP_KERNEL);
 	if (!data)
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 61cde1a7ec1b..7439a7eb50ac 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -264,6 +264,7 @@ static const struct usb_device_id blacklist_table[] = {
 	{ USB_DEVICE(0x0489, 0xe03c), .driver_info = BTUSB_ATH3012 },
 
 	/* QCA ROME chipset */
+	{ USB_DEVICE(0x0cf3, 0x535b), .driver_info = BTUSB_QCA_ROME },
 	{ USB_DEVICE(0x0cf3, 0xe007), .driver_info = BTUSB_QCA_ROME },
 	{ USB_DEVICE(0x0cf3, 0xe009), .driver_info = BTUSB_QCA_ROME },
 	{ USB_DEVICE(0x0cf3, 0xe010), .driver_info = BTUSB_QCA_ROME },
diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
index 9f1392fc7105..f036c8f98ea3 100644
--- a/drivers/bluetooth/hci_qca.c
+++ b/drivers/bluetooth/hci_qca.c
@@ -40,6 +40,7 @@
 #include <linux/platform_device.h>
 #include <linux/regulator/consumer.h>
 #include <linux/serdev.h>
+#include <asm/unaligned.h>
 
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
@@ -63,6 +64,9 @@
 /* susclk rate */
 #define SUSCLK_RATE_32KHZ	32768
 
+/* Controller debug log header */
+#define QCA_DEBUG_HANDLE	0x2EDC
+
 /* HCI_IBS transmit side sleep protocol states */
 enum tx_ibs_states {
 	HCI_IBS_TX_ASLEEP,
@@ -849,6 +853,19 @@ static int qca_ibs_wake_ack(struct hci_dev *hdev, struct sk_buff *skb)
 	return 0;
 }
 
+static int qca_recv_acl_data(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	/* We receive debug logs from chip as an ACL packets.
+	 * Instead of sending the data to ACL to decode the
+	 * received data, we are pushing them to the above layers
+	 * as a diagnostic packet.
+	 */
+	if (get_unaligned_le16(skb->data) == QCA_DEBUG_HANDLE)
+		return hci_recv_diag(hdev, skb);
+
+	return hci_recv_frame(hdev, skb);
+}
+
 #define QCA_IBS_SLEEP_IND_EVENT \
 	.type = HCI_IBS_SLEEP_IND, \
 	.hlen = 0, \
@@ -871,7 +888,7 @@ static int qca_ibs_wake_ack(struct hci_dev *hdev, struct sk_buff *skb)
 	.maxlen = HCI_MAX_IBS_SIZE
 
 static const struct h4_recv_pkt qca_recv_pkts[] = {
-	{ H4_RECV_ACL,             .recv = hci_recv_frame    },
+	{ H4_RECV_ACL,             .recv = qca_recv_acl_data },
 	{ H4_RECV_SCO,             .recv = hci_recv_frame    },
 	{ H4_RECV_EVENT,           .recv = hci_recv_frame    },
 	{ QCA_IBS_WAKE_IND_EVENT,  .recv = qca_ibs_wake_ind  },
diff --git a/drivers/clk/sunxi-ng/ccu-sun4i-a10.c b/drivers/clk/sunxi-ng/ccu-sun4i-a10.c
index ffa5dac221e4..129ebd2588fd 100644
--- a/drivers/clk/sunxi-ng/ccu-sun4i-a10.c
+++ b/drivers/clk/sunxi-ng/ccu-sun4i-a10.c
@@ -1434,8 +1434,16 @@ static void __init sun4i_ccu_init(struct device_node *node,
 		return;
 	}
 
-	/* Force the PLL-Audio-1x divider to 1 */
 	val = readl(reg + SUN4I_PLL_AUDIO_REG);
+
+	/*
+	 * Force VCO and PLL bias current to lowest setting. Higher
+	 * settings interfere with sigma-delta modulation and result
+	 * in audible noise and distortions when using SPDIF or I2S.
+	 */
+	val &= ~GENMASK(25, 16);
+
+	/* Force the PLL-Audio-1x divider to 1 */
 	val &= ~GENMASK(29, 26);
 	writel(val | (1 << 26), reg + SUN4I_PLL_AUDIO_REG);
 
diff --git a/drivers/crypto/chelsio/chcr_core.c b/drivers/crypto/chelsio/chcr_core.c
index 04f277cade7c..62249d4ed373 100644
--- a/drivers/crypto/chelsio/chcr_core.c
+++ b/drivers/crypto/chelsio/chcr_core.c
@@ -237,9 +237,7 @@ static int chcr_uld_state_change(void *handle, enum cxgb4_state state)
 
 static int __init chcr_crypto_init(void)
 {
-	if (cxgb4_register_uld(CXGB4_ULD_CRYPTO, &chcr_uld_info))
-		pr_err("ULD register fail: No chcr crypto support in cxgb4\n");
-
+	cxgb4_register_uld(CXGB4_ULD_CRYPTO, &chcr_uld_info);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index 018fcdb353d2..281cf9cbb44c 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -174,6 +174,11 @@ void drm_atomic_state_default_clear(struct drm_atomic_state *state)
 		state->crtcs[i].state = NULL;
 		state->crtcs[i].old_state = NULL;
 		state->crtcs[i].new_state = NULL;
+
+		if (state->crtcs[i].commit) {
+			drm_crtc_commit_put(state->crtcs[i].commit);
+			state->crtcs[i].commit = NULL;
+		}
 	}
 
 	for (i = 0; i < config->num_total_plane; i++) {
diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index 80be74df7ba6..1bb4c318bdd4 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -1408,15 +1408,16 @@ EXPORT_SYMBOL(drm_atomic_helper_wait_for_vblanks);
 void drm_atomic_helper_wait_for_flip_done(struct drm_device *dev,
 					  struct drm_atomic_state *old_state)
 {
-	struct drm_crtc_state *new_crtc_state;
 	struct drm_crtc *crtc;
 	int i;
 
-	for_each_new_crtc_in_state(old_state, crtc, new_crtc_state, i) {
-		struct drm_crtc_commit *commit = new_crtc_state->commit;
+	for (i = 0; i < dev->mode_config.num_crtc; i++) {
+		struct drm_crtc_commit *commit = old_state->crtcs[i].commit;
 		int ret;
 
-		if (!commit)
+		crtc = old_state->crtcs[i].ptr;
+
+		if (!crtc || !commit)
 			continue;
 
 		ret = wait_for_completion_timeout(&commit->flip_done, 10 * HZ);
@@ -1934,6 +1935,9 @@ int drm_atomic_helper_setup_commit(struct drm_atomic_state *state,
 		drm_crtc_commit_get(commit);
 
 		commit->abort_completion = true;
+
+		state->crtcs[i].commit = commit;
+		drm_crtc_commit_get(commit);
 	}
 
 	for_each_oldnew_connector_in_state(state, conn, old_conn_state, new_conn_state, i) {
diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index bae43938c8f6..9cbe8f5c9aca 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -567,9 +567,9 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data,
 	struct drm_mode_crtc *crtc_req = data;
 	struct drm_crtc *crtc;
 	struct drm_plane *plane;
-	struct drm_connector **connector_set = NULL, *connector;
-	struct drm_framebuffer *fb = NULL;
-	struct drm_display_mode *mode = NULL;
+	struct drm_connector **connector_set, *connector;
+	struct drm_framebuffer *fb;
+	struct drm_display_mode *mode;
 	struct drm_mode_set set;
 	uint32_t __user *set_connectors_ptr;
 	struct drm_modeset_acquire_ctx ctx;
@@ -598,6 +598,10 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data,
 	mutex_lock(&crtc->dev->mode_config.mutex);
 	drm_modeset_acquire_init(&ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE);
 retry:
+	connector_set = NULL;
+	fb = NULL;
+	mode = NULL;
+
 	ret = drm_modeset_lock_all_ctx(crtc->dev, &ctx);
 	if (ret)
 		goto out;
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 3c9fc99648b7..ff0bfc65a8c1 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -113,6 +113,9 @@ static const struct edid_quirk {
 	/* AEO model 0 reports 8 bpc, but is a 6 bpc panel */
 	{ "AEO", 0, EDID_QUIRK_FORCE_6BPC },
 
+	/* BOE model on HP Pavilion 15-n233sl reports 8 bpc, but is a 6 bpc panel */
+	{ "BOE", 0x78b, EDID_QUIRK_FORCE_6BPC },
+
 	/* CPT panel of Asus UX303LA reports 8 bpc, but is a 6 bpc panel */
 	{ "CPT", 0x17df, EDID_QUIRK_FORCE_6BPC },
 
@@ -4279,7 +4282,7 @@ static void drm_parse_ycbcr420_deep_color_info(struct drm_connector *connector,
 	struct drm_hdmi_info *hdmi = &connector->display_info.hdmi;
 
 	dc_mask = db[7] & DRM_EDID_YCBCR420_DC_MASK;
-	hdmi->y420_dc_modes |= dc_mask;
+	hdmi->y420_dc_modes = dc_mask;
 }
 
 static void drm_parse_hdmi_forum_vsdb(struct drm_connector *connector,
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 515a7aec57ac..9628dd617826 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -1580,6 +1580,25 @@ unlock:
 }
 EXPORT_SYMBOL(drm_fb_helper_ioctl);
 
+static bool drm_fb_pixel_format_equal(const struct fb_var_screeninfo *var_1,
+				      const struct fb_var_screeninfo *var_2)
+{
+	return var_1->bits_per_pixel == var_2->bits_per_pixel &&
+	       var_1->grayscale == var_2->grayscale &&
+	       var_1->red.offset == var_2->red.offset &&
+	       var_1->red.length == var_2->red.length &&
+	       var_1->red.msb_right == var_2->red.msb_right &&
+	       var_1->green.offset == var_2->green.offset &&
+	       var_1->green.length == var_2->green.length &&
+	       var_1->green.msb_right == var_2->green.msb_right &&
+	       var_1->blue.offset == var_2->blue.offset &&
+	       var_1->blue.length == var_2->blue.length &&
+	       var_1->blue.msb_right == var_2->blue.msb_right &&
+	       var_1->transp.offset == var_2->transp.offset &&
+	       var_1->transp.length == var_2->transp.length &&
+	       var_1->transp.msb_right == var_2->transp.msb_right;
+}
+
 /**
  * drm_fb_helper_check_var - implementation for &fb_ops.fb_check_var
  * @var: screeninfo to check
@@ -1590,7 +1609,6 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var,
 {
 	struct drm_fb_helper *fb_helper = info->par;
 	struct drm_framebuffer *fb = fb_helper->fb;
-	int depth;
 
 	if (var->pixclock != 0 || in_dbg_master())
 		return -EINVAL;
@@ -1610,72 +1628,15 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var,
 		return -EINVAL;
 	}
 
-	switch (var->bits_per_pixel) {
-	case 16:
-		depth = (var->green.length == 6) ? 16 : 15;
-		break;
-	case 32:
-		depth = (var->transp.length > 0) ? 32 : 24;
-		break;
-	default:
-		depth = var->bits_per_pixel;
-		break;
-	}
-
-	switch (depth) {
-	case 8:
-		var->red.offset = 0;
-		var->green.offset = 0;
-		var->blue.offset = 0;
-		var->red.length = 8;
-		var->green.length = 8;
-		var->blue.length = 8;
-		var->transp.length = 0;
-		var->transp.offset = 0;
-		break;
-	case 15:
-		var->red.offset = 10;
-		var->green.offset = 5;
-		var->blue.offset = 0;
-		var->red.length = 5;
-		var->green.length = 5;
-		var->blue.length = 5;
-		var->transp.length = 1;
-		var->transp.offset = 15;
-		break;
-	case 16:
-		var->red.offset = 11;
-		var->green.offset = 5;
-		var->blue.offset = 0;
-		var->red.length = 5;
-		var->green.length = 6;
-		var->blue.length = 5;
-		var->transp.length = 0;
-		var->transp.offset = 0;
-		break;
-	case 24:
-		var->red.offset = 16;
-		var->green.offset = 8;
-		var->blue.offset = 0;
-		var->red.length = 8;
-		var->green.length = 8;
-		var->blue.length = 8;
-		var->transp.length = 0;
-		var->transp.offset = 0;
-		break;
-	case 32:
-		var->red.offset = 16;
-		var->green.offset = 8;
-		var->blue.offset = 0;
-		var->red.length = 8;
-		var->green.length = 8;
-		var->blue.length = 8;
-		var->transp.length = 8;
-		var->transp.offset = 24;
-		break;
-	default:
+	/*
+	 * drm fbdev emulation doesn't support changing the pixel format at all,
+	 * so reject all pixel format changing requests.
+	 */
+	if (!drm_fb_pixel_format_equal(var, &info->var)) {
+		DRM_DEBUG("fbdev emulation doesn't support changing the pixel format\n");
 		return -EINVAL;
 	}
+
 	return 0;
 }
 EXPORT_SYMBOL(drm_fb_helper_check_var);
diff --git a/drivers/gpu/drm/sun4i/sun4i_dotclock.c b/drivers/gpu/drm/sun4i/sun4i_dotclock.c
index e36004fbe453..2a15f2f9271e 100644
--- a/drivers/gpu/drm/sun4i/sun4i_dotclock.c
+++ b/drivers/gpu/drm/sun4i/sun4i_dotclock.c
@@ -81,9 +81,19 @@ static long sun4i_dclk_round_rate(struct clk_hw *hw, unsigned long rate,
 	int i;
 
 	for (i = tcon->dclk_min_div; i <= tcon->dclk_max_div; i++) {
-		unsigned long ideal = rate * i;
+		u64 ideal = (u64)rate * i;
 		unsigned long rounded;
 
+		/*
+		 * ideal has overflowed the max value that can be stored in an
+		 * unsigned long, and every clk operation we might do on a
+		 * truncated u64 value will give us incorrect results.
+		 * Let's just stop there since bigger dividers will result in
+		 * the same overflow issue.
+		 */
+		if (ideal > ULONG_MAX)
+			goto out;
+
 		rounded = clk_hw_round_rate(clk_hw_get_parent(hw),
 					    ideal);
 
diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index 9ee9a15e7134..9200e349f29e 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -2270,7 +2270,7 @@ EXPORT_SYMBOL(i2c_put_adapter);
  *
  * Return: NULL if a DMA safe buffer was not obtained. Use msg->buf with PIO.
  *	   Or a valid pointer to be used with DMA. After use, release it by
- *	   calling i2c_release_dma_safe_msg_buf().
+ *	   calling i2c_put_dma_safe_msg_buf().
  *
  * This function must only be called from process context!
  */
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index faa9e6116b2f..73332b9a25b5 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -46,6 +46,8 @@
 #include <linux/mutex.h>
 #include <linux/slab.h>
 
+#include <linux/nospec.h>
+
 #include <linux/uaccess.h>
 
 #include <rdma/ib.h>
@@ -1120,6 +1122,7 @@ static ssize_t ib_ucm_write(struct file *filp, const char __user *buf,
 
 	if (hdr.cmd >= ARRAY_SIZE(ucm_cmd_table))
 		return -EINVAL;
+	hdr.cmd = array_index_nospec(hdr.cmd, ARRAY_SIZE(ucm_cmd_table));
 
 	if (hdr.in + sizeof(hdr) > len)
 		return -EINVAL;
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 21863ddde63e..01d68ed46c1b 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -44,6 +44,8 @@
 #include <linux/module.h>
 #include <linux/nsproxy.h>
 
+#include <linux/nospec.h>
+
 #include <rdma/rdma_user_cm.h>
 #include <rdma/ib_marshall.h>
 #include <rdma/rdma_cm.h>
@@ -1676,6 +1678,7 @@ static ssize_t ucma_write(struct file *filp, const char __user *buf,
 
 	if (hdr.cmd >= ARRAY_SIZE(ucma_cmd_table))
 		return -EINVAL;
+	hdr.cmd = array_index_nospec(hdr.cmd, ARRAY_SIZE(ucma_cmd_table));
 
 	if (hdr.in + sizeof(hdr) > len)
 		return -EINVAL;
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 088205d7f1a1..cca1820802b8 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -393,7 +393,7 @@ static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
 
 static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
 {
-	mlx5_frag_buf_free(dev->mdev, &buf->fbc.frag_buf);
+	mlx5_frag_buf_free(dev->mdev, &buf->frag_buf);
 }
 
 static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe,
@@ -728,16 +728,11 @@ static int alloc_cq_frag_buf(struct mlx5_ib_dev *dev,
 			     int nent,
 			     int cqe_size)
 {
-	struct mlx5_frag_buf_ctrl *c = &buf->fbc;
-	struct mlx5_frag_buf *frag_buf = &c->frag_buf;
-	u32 cqc_buff[MLX5_ST_SZ_DW(cqc)] = {0};
+	struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
+	u8 log_wq_stride = 6 + (cqe_size == 128 ? 1 : 0);
+	u8 log_wq_sz     = ilog2(cqe_size);
 	int err;
 
-	MLX5_SET(cqc, cqc_buff, log_cq_size, ilog2(cqe_size));
-	MLX5_SET(cqc, cqc_buff, cqe_sz, (cqe_size == 128) ? 1 : 0);
-
-	mlx5_core_init_cq_frag_buf(&buf->fbc, cqc_buff);
-
 	err = mlx5_frag_buf_alloc_node(dev->mdev,
 				       nent * cqe_size,
 				       frag_buf,
@@ -745,6 +740,8 @@ static int alloc_cq_frag_buf(struct mlx5_ib_dev *dev,
 	if (err)
 		return err;
 
+	mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
+
 	buf->cqe_size = cqe_size;
 	buf->nent = nent;
 
@@ -934,7 +931,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
 
 	*inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
 		 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) *
-		 cq->buf.fbc.frag_buf.npages;
+		 cq->buf.frag_buf.npages;
 	*cqb = kvzalloc(*inlen, GFP_KERNEL);
 	if (!*cqb) {
 		err = -ENOMEM;
@@ -942,11 +939,11 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
 	}
 
 	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas);
-	mlx5_fill_page_frag_array(&cq->buf.fbc.frag_buf, pas);
+	mlx5_fill_page_frag_array(&cq->buf.frag_buf, pas);
 
 	cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context);
 	MLX5_SET(cqc, cqc, log_page_size,
-		 cq->buf.fbc.frag_buf.page_shift -
+		 cq->buf.frag_buf.page_shift -
 		 MLX5_ADAPTER_PAGE_SHIFT);
 
 	*index = dev->mdev->priv.uar->index;
@@ -1365,11 +1362,10 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
 		cqe_size = 64;
 		err = resize_kernel(dev, cq, entries, cqe_size);
 		if (!err) {
-			struct mlx5_frag_buf_ctrl *c;
+			struct mlx5_frag_buf *frag_buf = &cq->resize_buf->frag_buf;
 
-			c = &cq->resize_buf->fbc;
-			npas = c->frag_buf.npages;
-			page_shift = c->frag_buf.page_shift;
+			npas = frag_buf->npages;
+			page_shift = frag_buf->page_shift;
 		}
 	}
 
@@ -1390,8 +1386,7 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
 		mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift,
 				     pas, 0);
 	else
-		mlx5_fill_page_frag_array(&cq->resize_buf->fbc.frag_buf,
-					  pas);
+		mlx5_fill_page_frag_array(&cq->resize_buf->frag_buf, pas);
 
 	MLX5_SET(modify_cq_in, in,
 		 modify_field_select_resize_field_select.resize_field_select.resize_field_select,
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index f2f11e652dcd..66dc337e49a7 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -284,7 +284,7 @@ static bool devx_is_obj_create_cmd(const void *in)
 	case MLX5_CMD_OP_CREATE_FLOW_TABLE:
 	case MLX5_CMD_OP_CREATE_FLOW_GROUP:
 	case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
-	case MLX5_CMD_OP_ALLOC_ENCAP_HEADER:
+	case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT:
 	case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
 	case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
 	case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
@@ -627,9 +627,9 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
 			 MLX5_CMD_OP_DEALLOC_FLOW_COUNTER);
 		break;
-	case MLX5_CMD_OP_ALLOC_ENCAP_HEADER:
+	case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT:
 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
-			 MLX5_CMD_OP_DEALLOC_ENCAP_HEADER);
+			 MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT);
 		break;
 	case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 5d9b7f62a0ba..af32899bb72a 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -2793,7 +2793,7 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
 			return -EINVAL;
 
 		action->flow_tag = ib_spec->flow_tag.tag_id;
-		action->has_flow_tag = true;
+		action->flags |= FLOW_ACT_HAS_TAG;
 		break;
 	case IB_FLOW_SPEC_ACTION_DROP:
 		if (FIELDS_NOT_SUPPORTED(ib_spec->drop,
@@ -2886,7 +2886,7 @@ is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev,
 		return egress ? VALID_SPEC_INVALID : VALID_SPEC_NA;
 
 	return is_crypto && is_ipsec &&
-		(!egress || (!is_drop && !flow_act->has_flow_tag)) ?
+		(!egress || (!is_drop && !(flow_act->flags & FLOW_ACT_HAS_TAG))) ?
 		VALID_SPEC_VALID : VALID_SPEC_INVALID;
 }
 
@@ -3320,15 +3320,18 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
 	}
 
 	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+		struct mlx5_ib_mcounters *mcounters;
+
 		err = flow_counters_set_data(flow_act.counters, ucmd);
 		if (err)
 			goto free;
 
+		mcounters = to_mcounters(flow_act.counters);
 		handler->ibcounters = flow_act.counters;
 		dest_arr[dest_num].type =
 			MLX5_FLOW_DESTINATION_TYPE_COUNTER;
-		dest_arr[dest_num].counter =
-			to_mcounters(flow_act.counters)->hw_cntrs_hndl;
+		dest_arr[dest_num].counter_id =
+			mlx5_fc_id(mcounters->hw_cntrs_hndl);
 		dest_num++;
 	}
 
@@ -3346,7 +3349,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
 					MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
 	}
 
-	if (flow_act.has_flow_tag &&
+	if ((flow_act.flags & FLOW_ACT_HAS_TAG)  &&
 	    (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
 	     flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
 		mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 320d4dfe8c2f..289c18db2611 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -435,6 +435,7 @@ struct mlx5_ib_qp {
 
 struct mlx5_ib_cq_buf {
 	struct mlx5_frag_buf_ctrl fbc;
+	struct mlx5_frag_buf    frag_buf;
 	struct ib_umem		*umem;
 	int			cqe_size;
 	int			nent;
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 6cba2a02d11b..daf1eb84cd31 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -1279,7 +1279,7 @@ static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
 
 	if (dev->rep)
 		MLX5_SET(tirc, tirc, self_lb_block,
-			 MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_);
+			 MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST);
 
 	err = mlx5_core_create_tir(dev->mdev, in, inlen, &rq->tirn);
 
@@ -1582,7 +1582,7 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 create_tir:
 	if (dev->rep)
 		MLX5_SET(tirc, tirc, self_lb_block,
-			 MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_);
+			 MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST);
 
 	err = mlx5_core_create_tir(dev->mdev, in, inlen, &qp->rss_qp.tirn);
 
diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c
index f5ae24865355..b0f9d19b3410 100644
--- a/drivers/input/mouse/elan_i2c_core.c
+++ b/drivers/input/mouse/elan_i2c_core.c
@@ -1346,6 +1346,7 @@ static const struct acpi_device_id elan_acpi_id[] = {
 	{ "ELAN0611", 0 },
 	{ "ELAN0612", 0 },
 	{ "ELAN0618", 0 },
+	{ "ELAN061C", 0 },
 	{ "ELAN061D", 0 },
 	{ "ELAN0622", 0 },
 	{ "ELAN1000", 0 },
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index ee28ec9e0aba..ffa37adb7681 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -963,7 +963,8 @@ static inline void slave_disable_netpoll(struct slave *slave)
 		return;
 
 	slave->np = NULL;
-	__netpoll_free_async(np);
+
+	__netpoll_free(np);
 }
 
 static void bond_poll_controller(struct net_device *bond_dev)
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 3017ecf82ca5..2eb68769562c 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -1190,16 +1190,14 @@ static void bcm_sf2_sw_shutdown(struct platform_device *pdev)
 #ifdef CONFIG_PM_SLEEP
 static int bcm_sf2_suspend(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct bcm_sf2_priv *priv = platform_get_drvdata(pdev);
+	struct bcm_sf2_priv *priv = dev_get_drvdata(dev);
 
 	return dsa_switch_suspend(priv->dev->ds);
 }
 
 static int bcm_sf2_resume(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct bcm_sf2_priv *priv = platform_get_drvdata(pdev);
+	struct bcm_sf2_priv *priv = dev_get_drvdata(dev);
 
 	return dsa_switch_resume(priv->dev->ds);
 }
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 78ce820b5257..e05d4eddc935 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -2907,7 +2907,7 @@ static const struct mv88e6xxx_ops mv88e6141_ops = {
 	.port_set_link = mv88e6xxx_port_set_link,
 	.port_set_duplex = mv88e6xxx_port_set_duplex,
 	.port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay,
-	.port_set_speed = mv88e6390_port_set_speed,
+	.port_set_speed = mv88e6341_port_set_speed,
 	.port_tag_remap = mv88e6095_port_tag_remap,
 	.port_set_frame_mode = mv88e6351_port_set_frame_mode,
 	.port_set_egress_floods = mv88e6352_port_set_egress_floods,
@@ -3528,7 +3528,7 @@ static const struct mv88e6xxx_ops mv88e6341_ops = {
 	.port_set_link = mv88e6xxx_port_set_link,
 	.port_set_duplex = mv88e6xxx_port_set_duplex,
 	.port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay,
-	.port_set_speed = mv88e6390_port_set_speed,
+	.port_set_speed = mv88e6341_port_set_speed,
 	.port_tag_remap = mv88e6095_port_tag_remap,
 	.port_set_frame_mode = mv88e6351_port_set_frame_mode,
 	.port_set_egress_floods = mv88e6352_port_set_egress_floods,
diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c
index 92945841c8e8..cd7db60a508b 100644
--- a/drivers/net/dsa/mv88e6xxx/port.c
+++ b/drivers/net/dsa/mv88e6xxx/port.c
@@ -228,8 +228,11 @@ static int mv88e6xxx_port_set_speed(struct mv88e6xxx_chip *chip, int port,
 		ctrl = MV88E6XXX_PORT_MAC_CTL_SPEED_1000;
 		break;
 	case 2500:
-		ctrl = MV88E6390_PORT_MAC_CTL_SPEED_10000 |
-			MV88E6390_PORT_MAC_CTL_ALTSPEED;
+		if (alt_bit)
+			ctrl = MV88E6390_PORT_MAC_CTL_SPEED_10000 |
+				MV88E6390_PORT_MAC_CTL_ALTSPEED;
+		else
+			ctrl = MV88E6390_PORT_MAC_CTL_SPEED_10000;
 		break;
 	case 10000:
 		/* all bits set, fall through... */
@@ -291,6 +294,24 @@ int mv88e6185_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed)
 	return mv88e6xxx_port_set_speed(chip, port, speed, false, false);
 }
 
+/* Support 10, 100, 200, 1000, 2500 Mbps (e.g. 88E6341) */
+int mv88e6341_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed)
+{
+	if (speed == SPEED_MAX)
+		speed = port < 5 ? 1000 : 2500;
+
+	if (speed > 2500)
+		return -EOPNOTSUPP;
+
+	if (speed == 200 && port != 0)
+		return -EOPNOTSUPP;
+
+	if (speed == 2500 && port < 5)
+		return -EOPNOTSUPP;
+
+	return mv88e6xxx_port_set_speed(chip, port, speed, !port, true);
+}
+
 /* Support 10, 100, 200, 1000 Mbps (e.g. 88E6352 family) */
 int mv88e6352_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed)
 {
diff --git a/drivers/net/dsa/mv88e6xxx/port.h b/drivers/net/dsa/mv88e6xxx/port.h
index f32f56af8e35..36904c9bf955 100644
--- a/drivers/net/dsa/mv88e6xxx/port.h
+++ b/drivers/net/dsa/mv88e6xxx/port.h
@@ -269,6 +269,7 @@ int mv88e6xxx_port_set_duplex(struct mv88e6xxx_chip *chip, int port, int dup);
 
 int mv88e6065_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed);
 int mv88e6185_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed);
+int mv88e6341_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed);
 int mv88e6352_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed);
 int mv88e6390_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed);
 int mv88e6390x_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed);
diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index cdcde7f8e0b2..7e97e620bd44 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -955,8 +955,7 @@ qca8k_set_pm(struct qca8k_priv *priv, int enable)
 
 static int qca8k_suspend(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct qca8k_priv *priv = platform_get_drvdata(pdev);
+	struct qca8k_priv *priv = dev_get_drvdata(dev);
 
 	qca8k_set_pm(priv, 0);
 
@@ -965,8 +964,7 @@ static int qca8k_suspend(struct device *dev)
 
 static int qca8k_resume(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct qca8k_priv *priv = platform_get_drvdata(pdev);
+	struct qca8k_priv *priv = dev_get_drvdata(dev);
 
 	qca8k_set_pm(priv, 1);
 
diff --git a/drivers/net/ethernet/amazon/Kconfig b/drivers/net/ethernet/amazon/Kconfig
index 99b30353541a..9e87d7b8360f 100644
--- a/drivers/net/ethernet/amazon/Kconfig
+++ b/drivers/net/ethernet/amazon/Kconfig
@@ -17,7 +17,7 @@ if NET_VENDOR_AMAZON
 
 config ENA_ETHERNET
 	tristate "Elastic Network Adapter (ENA) support"
-	depends on (PCI_MSI && X86)
+	depends on PCI_MSI && !CPU_BIG_ENDIAN
 	---help---
 	  This driver supports Elastic Network Adapter (ENA)"
 
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 284a0a612131..18956e7604a3 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -3022,20 +3022,10 @@ static int ena_calc_io_queue_num(struct pci_dev *pdev,
 	int io_sq_num, io_queue_num;
 
 	/* In case of LLQ use the llq number in the get feature cmd */
-	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
-		io_sq_num = get_feat_ctx->max_queues.max_legacy_llq_num;
-
-		if (io_sq_num == 0) {
-			dev_err(&pdev->dev,
-				"Trying to use LLQ but llq_num is 0. Fall back into regular queues\n");
-
-			ena_dev->tx_mem_queue_type =
-				ENA_ADMIN_PLACEMENT_POLICY_HOST;
-			io_sq_num = get_feat_ctx->max_queues.max_sq_num;
-		}
-	} else {
+	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
+		io_sq_num = get_feat_ctx->llq.max_llq_num;
+	else
 		io_sq_num = get_feat_ctx->max_queues.max_sq_num;
-	}
 
 	io_queue_num = min_t(int, num_online_cpus(), ENA_MAX_NUM_IO_QUEUES);
 	io_queue_num = min_t(int, io_queue_num, io_sq_num);
@@ -3238,7 +3228,7 @@ static int ena_calc_queue_size(struct pci_dev *pdev,
 
 	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
 		queue_size = min_t(u32, queue_size,
-				   get_feat_ctx->max_queues.max_legacy_llq_depth);
+				   get_feat_ctx->llq.max_llq_depth);
 
 	queue_size = rounddown_pow_of_two(queue_size);
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
index c0568465e10b..096ca5730887 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
@@ -279,7 +279,7 @@ static int aq_fw2x_get_mac_permanent(struct aq_hw_s *self, u8 *mac)
 	return err;
 }
 
-int aq_fw2x_update_stats(struct aq_hw_s *self)
+static int aq_fw2x_update_stats(struct aq_hw_s *self)
 {
 	int err = 0;
 	u32 mpi_opts = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 0fe57e36912b..498b373c992d 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1471,7 +1471,7 @@ struct bnxt {
 	struct rx_port_stats	*hw_rx_port_stats;
 	struct tx_port_stats	*hw_tx_port_stats;
 	struct rx_port_stats_ext	*hw_rx_port_stats_ext;
-	struct rx_port_stats_ext	*hw_tx_port_stats_ext;
+	struct tx_port_stats_ext	*hw_tx_port_stats_ext;
 	dma_addr_t		hw_rx_port_stats_map;
 	dma_addr_t		hw_tx_port_stats_map;
 	dma_addr_t		hw_rx_port_stats_ext_map;
diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c
index b756fc79424e..35564a8a48f9 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
@@ -320,9 +320,12 @@ int bcmgenet_mii_probe(struct net_device *dev)
 	phydev->advertising = phydev->supported;
 
 	/* The internal PHY has its link interrupts routed to the
-	 * Ethernet MAC ISRs
+	 * Ethernet MAC ISRs. On GENETv5 there is a hardware issue
+	 * that prevents the signaling of link UP interrupts when
+	 * the link operates at 10Mbps, so fallback to polling for
+	 * those versions of GENET.
 	 */
-	if (priv->internal_phy)
+	if (priv->internal_phy && !GENET_IS_V5(priv))
 		dev->phydev->irq = PHY_IGNORE_INTERRUPT;
 
 	return 0;
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 0acaef3ef548..8f5bf9166c11 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -4156,8 +4156,7 @@ static int macb_remove(struct platform_device *pdev)
 
 static int __maybe_unused macb_suspend(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct net_device *netdev = platform_get_drvdata(pdev);
+	struct net_device *netdev = dev_get_drvdata(dev);
 	struct macb *bp = netdev_priv(netdev);
 
 	netif_carrier_off(netdev);
@@ -4179,8 +4178,7 @@ static int __maybe_unused macb_suspend(struct device *dev)
 
 static int __maybe_unused macb_resume(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct net_device *netdev = platform_get_drvdata(pdev);
+	struct net_device *netdev = dev_get_drvdata(dev);
 	struct macb *bp = netdev_priv(netdev);
 
 	if (bp->wol & MACB_WOL_ENABLED) {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
index 4bc211093c98..267322693ed5 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
@@ -702,15 +702,14 @@ static void uld_attach(struct adapter *adap, unsigned int uld)
  *	about any presently available devices that support its type.  Returns
  *	%-EBUSY if a ULD of the same type is already registered.
  */
-int cxgb4_register_uld(enum cxgb4_uld type,
-		       const struct cxgb4_uld_info *p)
+void cxgb4_register_uld(enum cxgb4_uld type,
+			const struct cxgb4_uld_info *p)
 {
 	int ret = 0;
-	unsigned int adap_idx = 0;
 	struct adapter *adap;
 
 	if (type >= CXGB4_ULD_MAX)
-		return -EINVAL;
+		return;
 
 	mutex_lock(&uld_mutex);
 	list_for_each_entry(adap, &adapter_list, list_node) {
@@ -733,52 +732,29 @@ int cxgb4_register_uld(enum cxgb4_uld type,
 		}
 		if (adap->flags & FULL_INIT_DONE)
 			enable_rx_uld(adap, type);
-		if (adap->uld[type].add) {
-			ret = -EBUSY;
+		if (adap->uld[type].add)
 			goto free_irq;
-		}
 		ret = setup_sge_txq_uld(adap, type, p);
 		if (ret)
 			goto free_irq;
 		adap->uld[type] = *p;
 		uld_attach(adap, type);
-		adap_idx++;
-	}
-	mutex_unlock(&uld_mutex);
-	return 0;
-
+		continue;
 free_irq:
-	if (adap->flags & FULL_INIT_DONE)
-		quiesce_rx_uld(adap, type);
-	if (adap->flags & USING_MSIX)
-		free_msix_queue_irqs_uld(adap, type);
-free_rxq:
-	free_sge_queues_uld(adap, type);
-free_queues:
-	free_queues_uld(adap, type);
-out:
-
-	list_for_each_entry(adap, &adapter_list, list_node) {
-		if ((type == CXGB4_ULD_CRYPTO && !is_pci_uld(adap)) ||
-		    (type != CXGB4_ULD_CRYPTO && !is_offload(adap)))
-			continue;
-		if (type == CXGB4_ULD_ISCSIT && is_t4(adap->params.chip))
-			continue;
-		if (!adap_idx)
-			break;
-		adap->uld[type].handle = NULL;
-		adap->uld[type].add = NULL;
-		release_sge_txq_uld(adap, type);
 		if (adap->flags & FULL_INIT_DONE)
 			quiesce_rx_uld(adap, type);
 		if (adap->flags & USING_MSIX)
 			free_msix_queue_irqs_uld(adap, type);
+free_rxq:
 		free_sge_queues_uld(adap, type);
+free_queues:
 		free_queues_uld(adap, type);
-		adap_idx--;
+out:
+		dev_warn(adap->pdev_dev,
+			 "ULD registration failed for uld type %d\n", type);
 	}
 	mutex_unlock(&uld_mutex);
-	return ret;
+	return;
 }
 EXPORT_SYMBOL(cxgb4_register_uld);
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index de9ad311dacd..5fa9a2d5fc4b 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -384,7 +384,7 @@ struct cxgb4_uld_info {
 	int (*tx_handler)(struct sk_buff *skb, struct net_device *dev);
 };
 
-int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p);
+void cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p);
 int cxgb4_unregister_uld(enum cxgb4_uld type);
 int cxgb4_ofld_send(struct net_device *dev, struct sk_buff *skb);
 int cxgb4_immdata_send(struct net_device *dev, unsigned int idx,
diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c
index 50222b7b81f3..0a82fcf16d35 100644
--- a/drivers/net/ethernet/davicom/dm9000.c
+++ b/drivers/net/ethernet/davicom/dm9000.c
@@ -1722,8 +1722,7 @@ out:
 static int
 dm9000_drv_suspend(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct net_device *ndev = dev_get_drvdata(dev);
 	struct board_info *db;
 
 	if (ndev) {
@@ -1745,8 +1744,7 @@ dm9000_drv_suspend(struct device *dev)
 static int
 dm9000_drv_resume(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct net_device *ndev = dev_get_drvdata(dev);
 	struct board_info *db = netdev_priv(ndev);
 
 	if (ndev) {
diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index 4778b663653e..bf80855dd0dd 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -452,6 +452,10 @@ struct bufdesc_ex {
  * initialisation.
  */
 #define FEC_QUIRK_MIB_CLEAR		(1 << 15)
+/* Only i.MX25/i.MX27/i.MX28 controller supports FRBR,FRSR registers,
+ * those FIFO receive registers are resolved in other platforms.
+ */
+#define FEC_QUIRK_HAS_FRREG		(1 << 16)
 
 struct bufdesc_prop {
 	int qid;
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index a17cc973d9a3..6db69ba30dcd 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -91,14 +91,16 @@ static struct platform_device_id fec_devtype[] = {
 		.driver_data = 0,
 	}, {
 		.name = "imx25-fec",
-		.driver_data = FEC_QUIRK_USE_GASKET | FEC_QUIRK_MIB_CLEAR,
+		.driver_data = FEC_QUIRK_USE_GASKET | FEC_QUIRK_MIB_CLEAR |
+			       FEC_QUIRK_HAS_FRREG,
 	}, {
 		.name = "imx27-fec",
-		.driver_data = FEC_QUIRK_MIB_CLEAR,
+		.driver_data = FEC_QUIRK_MIB_CLEAR | FEC_QUIRK_HAS_FRREG,
 	}, {
 		.name = "imx28-fec",
 		.driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_SWAP_FRAME |
-				FEC_QUIRK_SINGLE_MDIO | FEC_QUIRK_HAS_RACC,
+				FEC_QUIRK_SINGLE_MDIO | FEC_QUIRK_HAS_RACC |
+				FEC_QUIRK_HAS_FRREG,
 	}, {
 		.name = "imx6q-fec",
 		.driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_GBIT |
@@ -2162,7 +2164,13 @@ static void fec_enet_get_regs(struct net_device *ndev,
 	memset(buf, 0, regs->len);
 
 	for (i = 0; i < ARRAY_SIZE(fec_enet_register_offset); i++) {
-		off = fec_enet_register_offset[i] / 4;
+		off = fec_enet_register_offset[i];
+
+		if ((off == FEC_R_BOUND || off == FEC_R_FSTART) &&
+		    !(fep->quirks & FEC_QUIRK_HAS_FRREG))
+			continue;
+
+		off >>= 2;
 		buf[off] = readl(&theregs[off]);
 	}
 }
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index ce93fcce2732..76ce2f21178b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -977,35 +977,28 @@ static int hns3_fill_desc_vtags(struct sk_buff *skb,
 }
 
 static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
-			  int size, dma_addr_t dma, int frag_end,
-			  enum hns_desc_type type)
+			  int size, int frag_end, enum hns_desc_type type)
 {
 	struct hns3_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use];
 	struct hns3_desc *desc = &ring->desc[ring->next_to_use];
+	struct device *dev = ring_to_dev(ring);
 	u32 ol_type_vlan_len_msec = 0;
 	u16 bdtp_fe_sc_vld_ra_ri = 0;
+	struct skb_frag_struct *frag;
+	unsigned int frag_buf_num;
 	u32 type_cs_vlan_tso = 0;
 	struct sk_buff *skb;
 	u16 inner_vtag = 0;
 	u16 out_vtag = 0;
+	unsigned int k;
+	int sizeoflast;
 	u32 paylen = 0;
+	dma_addr_t dma;
 	u16 mss = 0;
 	u8 ol4_proto;
 	u8 il4_proto;
 	int ret;
 
-	/* The txbd's baseinfo of DESC_TYPE_PAGE & DESC_TYPE_SKB */
-	desc_cb->priv = priv;
-	desc_cb->length = size;
-	desc_cb->dma = dma;
-	desc_cb->type = type;
-
-	/* now, fill the descriptor */
-	desc->addr = cpu_to_le64(dma);
-	desc->tx.send_size = cpu_to_le16((u16)size);
-	hns3_set_txbd_baseinfo(&bdtp_fe_sc_vld_ra_ri, frag_end);
-	desc->tx.bdtp_fe_sc_vld_ra_ri = cpu_to_le16(bdtp_fe_sc_vld_ra_ri);
-
 	if (type == DESC_TYPE_SKB) {
 		skb = (struct sk_buff *)priv;
 		paylen = skb->len;
@@ -1046,38 +1039,47 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
 		desc->tx.mss = cpu_to_le16(mss);
 		desc->tx.vlan_tag = cpu_to_le16(inner_vtag);
 		desc->tx.outer_vlan_tag = cpu_to_le16(out_vtag);
-	}
 
-	/* move ring pointer to next.*/
-	ring_ptr_move_fw(ring, next_to_use);
+		dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE);
+	} else {
+		frag = (struct skb_frag_struct *)priv;
+		dma = skb_frag_dma_map(dev, frag, 0, size, DMA_TO_DEVICE);
+	}
 
-	return 0;
-}
+	if (dma_mapping_error(ring->dev, dma)) {
+		ring->stats.sw_err_cnt++;
+		return -ENOMEM;
+	}
 
-static int hns3_fill_desc_tso(struct hns3_enet_ring *ring, void *priv,
-			      int size, dma_addr_t dma, int frag_end,
-			      enum hns_desc_type type)
-{
-	unsigned int frag_buf_num;
-	unsigned int k;
-	int sizeoflast;
-	int ret;
+	desc_cb->length = size;
 
 	frag_buf_num = (size + HNS3_MAX_BD_SIZE - 1) / HNS3_MAX_BD_SIZE;
 	sizeoflast = size % HNS3_MAX_BD_SIZE;
 	sizeoflast = sizeoflast ? sizeoflast : HNS3_MAX_BD_SIZE;
 
-	/* When the frag size is bigger than hardware, split this frag */
+	/* When frag size is bigger than hardware limit, split this frag */
 	for (k = 0; k < frag_buf_num; k++) {
-		ret = hns3_fill_desc(ring, priv,
-				     (k == frag_buf_num - 1) ?
-				sizeoflast : HNS3_MAX_BD_SIZE,
-				dma + HNS3_MAX_BD_SIZE * k,
-				frag_end && (k == frag_buf_num - 1) ? 1 : 0,
-				(type == DESC_TYPE_SKB && !k) ?
-					DESC_TYPE_SKB : DESC_TYPE_PAGE);
-		if (ret)
-			return ret;
+		/* The txbd's baseinfo of DESC_TYPE_PAGE & DESC_TYPE_SKB */
+		desc_cb->priv = priv;
+		desc_cb->dma = dma + HNS3_MAX_BD_SIZE * k;
+		desc_cb->type = (type == DESC_TYPE_SKB && !k) ?
+					DESC_TYPE_SKB : DESC_TYPE_PAGE;
+
+		/* now, fill the descriptor */
+		desc->addr = cpu_to_le64(dma + HNS3_MAX_BD_SIZE * k);
+		desc->tx.send_size = cpu_to_le16((k == frag_buf_num - 1) ?
+				(u16)sizeoflast : (u16)HNS3_MAX_BD_SIZE);
+		hns3_set_txbd_baseinfo(&bdtp_fe_sc_vld_ra_ri,
+				       frag_end && (k == frag_buf_num - 1) ?
+						1 : 0);
+		desc->tx.bdtp_fe_sc_vld_ra_ri =
+				cpu_to_le16(bdtp_fe_sc_vld_ra_ri);
+
+		/* move ring pointer to next.*/
+		ring_ptr_move_fw(ring, next_to_use);
+
+		desc_cb = &ring->desc_cb[ring->next_to_use];
+		desc = &ring->desc[ring->next_to_use];
 	}
 
 	return 0;
@@ -1133,7 +1135,7 @@ static int hns3_nic_maybe_stop_tx(struct sk_buff **out_skb, int *bnum,
 	return 0;
 }
 
-static void hns_nic_dma_unmap(struct hns3_enet_ring *ring, int next_to_use_orig)
+static void hns3_clear_desc(struct hns3_enet_ring *ring, int next_to_use_orig)
 {
 	struct device *dev = ring_to_dev(ring);
 	unsigned int i;
@@ -1149,12 +1151,14 @@ static void hns_nic_dma_unmap(struct hns3_enet_ring *ring, int next_to_use_orig)
 					 ring->desc_cb[ring->next_to_use].dma,
 					ring->desc_cb[ring->next_to_use].length,
 					DMA_TO_DEVICE);
-		else
+		else if (ring->desc_cb[ring->next_to_use].length)
 			dma_unmap_page(dev,
 				       ring->desc_cb[ring->next_to_use].dma,
 				       ring->desc_cb[ring->next_to_use].length,
 				       DMA_TO_DEVICE);
 
+		ring->desc_cb[ring->next_to_use].length = 0;
+
 		/* rollback one */
 		ring_ptr_move_bw(ring, next_to_use);
 	}
@@ -1166,12 +1170,10 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
 	struct hns3_nic_ring_data *ring_data =
 		&tx_ring_data(priv, skb->queue_mapping);
 	struct hns3_enet_ring *ring = ring_data->ring;
-	struct device *dev = priv->dev;
 	struct netdev_queue *dev_queue;
 	struct skb_frag_struct *frag;
 	int next_to_use_head;
 	int next_to_use_frag;
-	dma_addr_t dma;
 	int buf_num;
 	int seg_num;
 	int size;
@@ -1206,35 +1208,23 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
 
 	next_to_use_head = ring->next_to_use;
 
-	dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE);
-	if (dma_mapping_error(dev, dma)) {
-		netdev_err(netdev, "TX head DMA map failed\n");
-		ring->stats.sw_err_cnt++;
-		goto out_err_tx_ok;
-	}
-
-	ret = priv->ops.fill_desc(ring, skb, size, dma, seg_num == 1 ? 1 : 0,
-			   DESC_TYPE_SKB);
+	ret = priv->ops.fill_desc(ring, skb, size, seg_num == 1 ? 1 : 0,
+				  DESC_TYPE_SKB);
 	if (ret)
-		goto head_dma_map_err;
+		goto head_fill_err;
 
 	next_to_use_frag = ring->next_to_use;
 	/* Fill the fragments */
 	for (i = 1; i < seg_num; i++) {
 		frag = &skb_shinfo(skb)->frags[i - 1];
 		size = skb_frag_size(frag);
-		dma = skb_frag_dma_map(dev, frag, 0, size, DMA_TO_DEVICE);
-		if (dma_mapping_error(dev, dma)) {
-			netdev_err(netdev, "TX frag(%d) DMA map failed\n", i);
-			ring->stats.sw_err_cnt++;
-			goto frag_dma_map_err;
-		}
-		ret = priv->ops.fill_desc(ring, skb_frag_page(frag), size, dma,
-				    seg_num - 1 == i ? 1 : 0,
-				    DESC_TYPE_PAGE);
+
+		ret = priv->ops.fill_desc(ring, frag, size,
+					  seg_num - 1 == i ? 1 : 0,
+					  DESC_TYPE_PAGE);
 
 		if (ret)
-			goto frag_dma_map_err;
+			goto frag_fill_err;
 	}
 
 	/* Complete translate all packets */
@@ -1247,11 +1237,11 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
 
 	return NETDEV_TX_OK;
 
-frag_dma_map_err:
-	hns_nic_dma_unmap(ring, next_to_use_frag);
+frag_fill_err:
+	hns3_clear_desc(ring, next_to_use_frag);
 
-head_dma_map_err:
-	hns_nic_dma_unmap(ring, next_to_use_head);
+head_fill_err:
+	hns3_clear_desc(ring, next_to_use_head);
 
 out_err_tx_ok:
 	dev_kfree_skb_any(skb);
@@ -1313,13 +1303,10 @@ static int hns3_nic_set_features(struct net_device *netdev,
 	int ret;
 
 	if (changed & (NETIF_F_TSO | NETIF_F_TSO6)) {
-		if (features & (NETIF_F_TSO | NETIF_F_TSO6)) {
-			priv->ops.fill_desc = hns3_fill_desc_tso;
+		if (features & (NETIF_F_TSO | NETIF_F_TSO6))
 			priv->ops.maybe_stop_tx = hns3_nic_maybe_stop_tso;
-		} else {
-			priv->ops.fill_desc = hns3_fill_desc;
+		else
 			priv->ops.maybe_stop_tx = hns3_nic_maybe_stop_tx;
-		}
 	}
 
 	if ((changed & NETIF_F_HW_VLAN_CTAG_FILTER) &&
@@ -1890,7 +1877,7 @@ static void hns3_unmap_buffer(struct hns3_enet_ring *ring,
 	if (cb->type == DESC_TYPE_SKB)
 		dma_unmap_single(ring_to_dev(ring), cb->dma, cb->length,
 				 ring_to_dma_dir(ring));
-	else
+	else if (cb->length)
 		dma_unmap_page(ring_to_dev(ring), cb->dma, cb->length,
 			       ring_to_dma_dir(ring));
 }
@@ -3247,14 +3234,12 @@ static void hns3_nic_set_priv_ops(struct net_device *netdev)
 {
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
 
+	priv->ops.fill_desc = hns3_fill_desc;
 	if ((netdev->features & NETIF_F_TSO) ||
-	    (netdev->features & NETIF_F_TSO6)) {
-		priv->ops.fill_desc = hns3_fill_desc_tso;
+	    (netdev->features & NETIF_F_TSO6))
 		priv->ops.maybe_stop_tx = hns3_nic_maybe_stop_tso;
-	} else {
-		priv->ops.fill_desc = hns3_fill_desc;
+	else
 		priv->ops.maybe_stop_tx = hns3_nic_maybe_stop_tx;
-	}
 }
 
 static int hns3_client_init(struct hnae3_handle *handle)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index f25b281ff2aa..71cfca132d0b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -419,8 +419,7 @@ struct hns3_nic_ring_data {
 
 struct hns3_nic_ops {
 	int (*fill_desc)(struct hns3_enet_ring *ring, void *priv,
-			 int size, dma_addr_t dma, int frag_end,
-			 enum hns_desc_type type);
+			 int size, int frag_end, enum hns_desc_type type);
 	int (*maybe_stop_tx)(struct sk_buff **out_skb,
 			     int *bnum, struct hns3_enet_ring *ring);
 	void (*get_rxd_bnum)(u32 bnum_flag, int *out_bnum);
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
index 0f5563f3b779..097b5502603f 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
@@ -58,6 +58,8 @@ enum hinic_port_cmd {
 
 	HINIC_PORT_CMD_GET_GLOBAL_QPN   = 102,
 
+	HINIC_PORT_CMD_SET_TSO          = 112,
+
 	HINIC_PORT_CMD_GET_CAP          = 170,
 };
 
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
index cb239627770f..967c993d5303 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
@@ -70,8 +70,6 @@
 #define SQ_MASKED_IDX(sq, idx)  ((idx) & (sq)->wq->mask)
 #define RQ_MASKED_IDX(rq, idx)  ((idx) & (rq)->wq->mask)
 
-#define TX_MAX_MSS_DEFAULT      0x3E00
-
 enum sq_wqe_type {
 	SQ_NORMAL_WQE = 0,
 };
@@ -494,33 +492,16 @@ static void sq_prepare_ctrl(struct hinic_sq_ctrl *ctrl, u16 prod_idx,
 			  HINIC_SQ_CTRL_SET(SQ_NORMAL_WQE, DATA_FORMAT)     |
 			  HINIC_SQ_CTRL_SET(ctrl_size, LEN);
 
-	ctrl->queue_info = HINIC_SQ_CTRL_SET(TX_MAX_MSS_DEFAULT,
-					     QUEUE_INFO_MSS);
+	ctrl->queue_info = HINIC_SQ_CTRL_SET(HINIC_MSS_DEFAULT,
+					     QUEUE_INFO_MSS) |
+			   HINIC_SQ_CTRL_SET(1, QUEUE_INFO_UC);
 }
 
 static void sq_prepare_task(struct hinic_sq_task *task)
 {
-	task->pkt_info0 =
-		HINIC_SQ_TASK_INFO0_SET(0, L2HDR_LEN) |
-		HINIC_SQ_TASK_INFO0_SET(HINIC_L4_OFF_DISABLE, L4_OFFLOAD) |
-		HINIC_SQ_TASK_INFO0_SET(HINIC_OUTER_L3TYPE_UNKNOWN,
-					INNER_L3TYPE) |
-		HINIC_SQ_TASK_INFO0_SET(HINIC_VLAN_OFF_DISABLE,
-					VLAN_OFFLOAD) |
-		HINIC_SQ_TASK_INFO0_SET(HINIC_PKT_NOT_PARSED, PARSE_FLAG);
-
-	task->pkt_info1 =
-		HINIC_SQ_TASK_INFO1_SET(HINIC_MEDIA_UNKNOWN, MEDIA_TYPE) |
-		HINIC_SQ_TASK_INFO1_SET(0, INNER_L4_LEN) |
-		HINIC_SQ_TASK_INFO1_SET(0, INNER_L3_LEN);
-
-	task->pkt_info2 =
-		HINIC_SQ_TASK_INFO2_SET(0, TUNNEL_L4_LEN) |
-		HINIC_SQ_TASK_INFO2_SET(0, OUTER_L3_LEN)  |
-		HINIC_SQ_TASK_INFO2_SET(HINIC_TUNNEL_L4TYPE_UNKNOWN,
-					TUNNEL_L4TYPE)    |
-		HINIC_SQ_TASK_INFO2_SET(HINIC_OUTER_L3TYPE_UNKNOWN,
-					OUTER_L3TYPE);
+	task->pkt_info0 = 0;
+	task->pkt_info1 = 0;
+	task->pkt_info2 = 0;
 
 	task->ufo_v6_identify = 0;
 
@@ -529,6 +510,86 @@ static void sq_prepare_task(struct hinic_sq_task *task)
 	task->zero_pad = 0;
 }
 
+void hinic_task_set_l2hdr(struct hinic_sq_task *task, u32 len)
+{
+	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(len, L2HDR_LEN);
+}
+
+void hinic_task_set_outter_l3(struct hinic_sq_task *task,
+			      enum hinic_l3_offload_type l3_type,
+			      u32 network_len)
+{
+	task->pkt_info2 |= HINIC_SQ_TASK_INFO2_SET(l3_type, OUTER_L3TYPE) |
+			   HINIC_SQ_TASK_INFO2_SET(network_len, OUTER_L3LEN);
+}
+
+void hinic_task_set_inner_l3(struct hinic_sq_task *task,
+			     enum hinic_l3_offload_type l3_type,
+			     u32 network_len)
+{
+	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(l3_type, INNER_L3TYPE);
+	task->pkt_info1 |= HINIC_SQ_TASK_INFO1_SET(network_len, INNER_L3LEN);
+}
+
+void hinic_task_set_tunnel_l4(struct hinic_sq_task *task,
+			      enum hinic_l4_offload_type l4_type,
+			      u32 tunnel_len)
+{
+	task->pkt_info2 |= HINIC_SQ_TASK_INFO2_SET(l4_type, TUNNEL_L4TYPE) |
+			   HINIC_SQ_TASK_INFO2_SET(tunnel_len, TUNNEL_L4LEN);
+}
+
+void hinic_set_cs_inner_l4(struct hinic_sq_task *task, u32 *queue_info,
+			   enum hinic_l4_offload_type l4_offload,
+			   u32 l4_len, u32 offset)
+{
+	u32 tcp_udp_cs = 0, sctp = 0;
+	u32 mss = HINIC_MSS_DEFAULT;
+
+	if (l4_offload == TCP_OFFLOAD_ENABLE ||
+	    l4_offload == UDP_OFFLOAD_ENABLE)
+		tcp_udp_cs = 1;
+	else if (l4_offload == SCTP_OFFLOAD_ENABLE)
+		sctp = 1;
+
+	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(l4_offload, L4_OFFLOAD);
+	task->pkt_info1 |= HINIC_SQ_TASK_INFO1_SET(l4_len, INNER_L4LEN);
+
+	*queue_info |= HINIC_SQ_CTRL_SET(offset, QUEUE_INFO_PLDOFF) |
+		       HINIC_SQ_CTRL_SET(tcp_udp_cs, QUEUE_INFO_TCPUDP_CS) |
+		       HINIC_SQ_CTRL_SET(sctp, QUEUE_INFO_SCTP);
+
+	*queue_info = HINIC_SQ_CTRL_CLEAR(*queue_info, QUEUE_INFO_MSS);
+	*queue_info |= HINIC_SQ_CTRL_SET(mss, QUEUE_INFO_MSS);
+}
+
+void hinic_set_tso_inner_l4(struct hinic_sq_task *task, u32 *queue_info,
+			    enum hinic_l4_offload_type l4_offload,
+			    u32 l4_len, u32 offset, u32 ip_ident, u32 mss)
+{
+	u32 tso = 0, ufo = 0;
+
+	if (l4_offload == TCP_OFFLOAD_ENABLE)
+		tso = 1;
+	else if (l4_offload == UDP_OFFLOAD_ENABLE)
+		ufo = 1;
+
+	task->ufo_v6_identify = ip_ident;
+
+	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(l4_offload, L4_OFFLOAD);
+	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(tso || ufo, TSO_FLAG);
+	task->pkt_info1 |= HINIC_SQ_TASK_INFO1_SET(l4_len, INNER_L4LEN);
+
+	*queue_info |= HINIC_SQ_CTRL_SET(offset, QUEUE_INFO_PLDOFF) |
+		       HINIC_SQ_CTRL_SET(tso, QUEUE_INFO_TSO) |
+		       HINIC_SQ_CTRL_SET(ufo, QUEUE_INFO_UFO) |
+		       HINIC_SQ_CTRL_SET(!!l4_offload, QUEUE_INFO_TCPUDP_CS);
+
+	/* set MSS value */
+	*queue_info = HINIC_SQ_CTRL_CLEAR(*queue_info, QUEUE_INFO_MSS);
+	*queue_info |= HINIC_SQ_CTRL_SET(mss, QUEUE_INFO_MSS);
+}
+
 /**
  * hinic_sq_prepare_wqe - prepare wqe before insert to the queue
  * @sq: send queue
@@ -613,6 +674,16 @@ struct hinic_sq_wqe *hinic_sq_get_wqe(struct hinic_sq *sq,
 }
 
 /**
+ * hinic_sq_return_wqe - return the wqe to the sq
+ * @sq: send queue
+ * @wqe_size: the size of the wqe
+ **/
+void hinic_sq_return_wqe(struct hinic_sq *sq, unsigned int wqe_size)
+{
+	hinic_return_wqe(sq->wq, wqe_size);
+}
+
+/**
  * hinic_sq_write_wqe - write the wqe to the sq
  * @sq: send queue
  * @prod_idx: pi of the wqe
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
index 6c84f83ec283..a0dc63a4bfc7 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
@@ -149,6 +149,31 @@ int hinic_get_sq_free_wqebbs(struct hinic_sq *sq);
 
 int hinic_get_rq_free_wqebbs(struct hinic_rq *rq);
 
+void hinic_task_set_l2hdr(struct hinic_sq_task *task, u32 len);
+
+void hinic_task_set_outter_l3(struct hinic_sq_task *task,
+			      enum hinic_l3_offload_type l3_type,
+			      u32 network_len);
+
+void hinic_task_set_inner_l3(struct hinic_sq_task *task,
+			     enum hinic_l3_offload_type l3_type,
+			     u32 network_len);
+
+void hinic_task_set_tunnel_l4(struct hinic_sq_task *task,
+			      enum hinic_l4_offload_type l4_type,
+			      u32 tunnel_len);
+
+void hinic_set_cs_inner_l4(struct hinic_sq_task *task,
+			   u32 *queue_info,
+			   enum hinic_l4_offload_type l4_offload,
+			   u32 l4_len, u32 offset);
+
+void hinic_set_tso_inner_l4(struct hinic_sq_task *task,
+			    u32 *queue_info,
+			    enum hinic_l4_offload_type l4_offload,
+			    u32 l4_len,
+			    u32 offset, u32 ip_ident, u32 mss);
+
 void hinic_sq_prepare_wqe(struct hinic_sq *sq, u16 prod_idx,
 			  struct hinic_sq_wqe *wqe, struct hinic_sge *sges,
 			  int nr_sges);
@@ -159,6 +184,8 @@ void hinic_sq_write_db(struct hinic_sq *sq, u16 prod_idx, unsigned int wqe_size,
 struct hinic_sq_wqe *hinic_sq_get_wqe(struct hinic_sq *sq,
 				      unsigned int wqe_size, u16 *prod_idx);
 
+void hinic_sq_return_wqe(struct hinic_sq *sq, unsigned int wqe_size);
+
 void hinic_sq_write_wqe(struct hinic_sq *sq, u16 prod_idx,
 			struct hinic_sq_wqe *wqe, struct sk_buff *skb,
 			unsigned int wqe_size);
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c
index 3e3181c089bd..f92f1bf3901a 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c
@@ -775,6 +775,20 @@ struct hinic_hw_wqe *hinic_get_wqe(struct hinic_wq *wq, unsigned int wqe_size,
 }
 
 /**
+ * hinic_return_wqe - return the wqe when transmit failed
+ * @wq: wq to return wqe
+ * @wqe_size: wqe size
+ **/
+void hinic_return_wqe(struct hinic_wq *wq, unsigned int wqe_size)
+{
+	int num_wqebbs = ALIGN(wqe_size, wq->wqebb_size) / wq->wqebb_size;
+
+	atomic_sub(num_wqebbs, &wq->prod_idx);
+
+	atomic_add(num_wqebbs, &wq->delta);
+}
+
+/**
  * hinic_put_wqe - return the wqe place to use for a new wqe
  * @wq: wq to return wqe
  * @wqe_size: wqe size
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h
index 9c030a0f035e..9b66545ba563 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h
@@ -104,6 +104,8 @@ void hinic_wq_free(struct hinic_wqs *wqs, struct hinic_wq *wq);
 struct hinic_hw_wqe *hinic_get_wqe(struct hinic_wq *wq, unsigned int wqe_size,
 				   u16 *prod_idx);
 
+void hinic_return_wqe(struct hinic_wq *wq, unsigned int wqe_size);
+
 void hinic_put_wqe(struct hinic_wq *wq, unsigned int wqe_size);
 
 struct hinic_hw_wqe *hinic_read_wqe(struct hinic_wq *wq, unsigned int wqe_size,
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h
index bc73485483c5..9754d6ed5f4a 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h
@@ -62,19 +62,33 @@
 			(((val) >> HINIC_CMDQ_WQE_HEADER_##member##_SHIFT) \
 			 & HINIC_CMDQ_WQE_HEADER_##member##_MASK)
 
-#define HINIC_SQ_CTRL_BUFDESC_SECT_LEN_SHIFT    0
-#define HINIC_SQ_CTRL_TASKSECT_LEN_SHIFT        16
-#define HINIC_SQ_CTRL_DATA_FORMAT_SHIFT         22
-#define HINIC_SQ_CTRL_LEN_SHIFT                 29
-
-#define HINIC_SQ_CTRL_BUFDESC_SECT_LEN_MASK     0xFF
-#define HINIC_SQ_CTRL_TASKSECT_LEN_MASK         0x1F
-#define HINIC_SQ_CTRL_DATA_FORMAT_MASK          0x1
-#define HINIC_SQ_CTRL_LEN_MASK                  0x3
-
-#define HINIC_SQ_CTRL_QUEUE_INFO_MSS_SHIFT      13
-
-#define HINIC_SQ_CTRL_QUEUE_INFO_MSS_MASK       0x3FFF
+#define HINIC_SQ_CTRL_BUFDESC_SECT_LEN_SHIFT           0
+#define HINIC_SQ_CTRL_TASKSECT_LEN_SHIFT               16
+#define HINIC_SQ_CTRL_DATA_FORMAT_SHIFT                22
+#define HINIC_SQ_CTRL_LEN_SHIFT                        29
+
+#define HINIC_SQ_CTRL_BUFDESC_SECT_LEN_MASK            0xFF
+#define HINIC_SQ_CTRL_TASKSECT_LEN_MASK                0x1F
+#define HINIC_SQ_CTRL_DATA_FORMAT_MASK                 0x1
+#define HINIC_SQ_CTRL_LEN_MASK                         0x3
+
+#define HINIC_SQ_CTRL_QUEUE_INFO_PLDOFF_SHIFT          2
+#define HINIC_SQ_CTRL_QUEUE_INFO_UFO_SHIFT             10
+#define HINIC_SQ_CTRL_QUEUE_INFO_TSO_SHIFT             11
+#define HINIC_SQ_CTRL_QUEUE_INFO_TCPUDP_CS_SHIFT       12
+#define HINIC_SQ_CTRL_QUEUE_INFO_MSS_SHIFT             13
+#define HINIC_SQ_CTRL_QUEUE_INFO_SCTP_SHIFT            27
+#define HINIC_SQ_CTRL_QUEUE_INFO_UC_SHIFT              28
+#define HINIC_SQ_CTRL_QUEUE_INFO_PRI_SHIFT             29
+
+#define HINIC_SQ_CTRL_QUEUE_INFO_PLDOFF_MASK           0xFF
+#define HINIC_SQ_CTRL_QUEUE_INFO_UFO_MASK              0x1
+#define HINIC_SQ_CTRL_QUEUE_INFO_TSO_MASK              0x1
+#define HINIC_SQ_CTRL_QUEUE_INFO_TCPUDP_CS_MASK	       0x1
+#define HINIC_SQ_CTRL_QUEUE_INFO_MSS_MASK              0x3FFF
+#define HINIC_SQ_CTRL_QUEUE_INFO_SCTP_MASK             0x1
+#define HINIC_SQ_CTRL_QUEUE_INFO_UC_MASK               0x1
+#define HINIC_SQ_CTRL_QUEUE_INFO_PRI_MASK              0x7
 
 #define HINIC_SQ_CTRL_SET(val, member)          \
 		(((u32)(val) & HINIC_SQ_CTRL_##member##_MASK) \
@@ -84,6 +98,10 @@
 		(((val) >> HINIC_SQ_CTRL_##member##_SHIFT) \
 		 & HINIC_SQ_CTRL_##member##_MASK)
 
+#define HINIC_SQ_CTRL_CLEAR(val, member)	\
+		((u32)(val) & (~(HINIC_SQ_CTRL_##member##_MASK \
+		 << HINIC_SQ_CTRL_##member##_SHIFT)))
+
 #define HINIC_SQ_TASK_INFO0_L2HDR_LEN_SHIFT     0
 #define HINIC_SQ_TASK_INFO0_L4_OFFLOAD_SHIFT    8
 #define HINIC_SQ_TASK_INFO0_INNER_L3TYPE_SHIFT  10
@@ -108,28 +126,28 @@
 
 /* 8 bits reserved */
 #define HINIC_SQ_TASK_INFO1_MEDIA_TYPE_SHIFT    8
-#define HINIC_SQ_TASK_INFO1_INNER_L4_LEN_SHIFT  16
-#define HINIC_SQ_TASK_INFO1_INNER_L3_LEN_SHIFT  24
+#define HINIC_SQ_TASK_INFO1_INNER_L4LEN_SHIFT   16
+#define HINIC_SQ_TASK_INFO1_INNER_L3LEN_SHIFT   24
 
 /* 8 bits reserved */
 #define HINIC_SQ_TASK_INFO1_MEDIA_TYPE_MASK     0xFF
-#define HINIC_SQ_TASK_INFO1_INNER_L4_LEN_MASK   0xFF
-#define HINIC_SQ_TASK_INFO1_INNER_L3_LEN_MASK   0xFF
+#define HINIC_SQ_TASK_INFO1_INNER_L4LEN_MASK    0xFF
+#define HINIC_SQ_TASK_INFO1_INNER_L3LEN_MASK    0xFF
 
 #define HINIC_SQ_TASK_INFO1_SET(val, member)    \
 		(((u32)(val) & HINIC_SQ_TASK_INFO1_##member##_MASK) <<  \
 		 HINIC_SQ_TASK_INFO1_##member##_SHIFT)
 
-#define HINIC_SQ_TASK_INFO2_TUNNEL_L4_LEN_SHIFT 0
-#define HINIC_SQ_TASK_INFO2_OUTER_L3_LEN_SHIFT  12
-#define HINIC_SQ_TASK_INFO2_TUNNEL_L4TYPE_SHIFT 19
+#define HINIC_SQ_TASK_INFO2_TUNNEL_L4LEN_SHIFT  0
+#define HINIC_SQ_TASK_INFO2_OUTER_L3LEN_SHIFT   8
+#define HINIC_SQ_TASK_INFO2_TUNNEL_L4TYPE_SHIFT 16
 /* 1 bit reserved */
-#define HINIC_SQ_TASK_INFO2_OUTER_L3TYPE_SHIFT  22
+#define HINIC_SQ_TASK_INFO2_OUTER_L3TYPE_SHIFT  24
 /* 8 bits reserved */
 
-#define HINIC_SQ_TASK_INFO2_TUNNEL_L4_LEN_MASK  0xFFF
-#define HINIC_SQ_TASK_INFO2_OUTER_L3_LEN_MASK   0x7F
-#define HINIC_SQ_TASK_INFO2_TUNNEL_L4TYPE_MASK  0x3
+#define HINIC_SQ_TASK_INFO2_TUNNEL_L4LEN_MASK   0xFF
+#define HINIC_SQ_TASK_INFO2_OUTER_L3LEN_MASK    0xFF
+#define HINIC_SQ_TASK_INFO2_TUNNEL_L4TYPE_MASK  0x7
 /* 1 bit reserved */
 #define HINIC_SQ_TASK_INFO2_OUTER_L3TYPE_MASK   0x3
 /* 8 bits reserved */
@@ -187,12 +205,15 @@
 		 sizeof(struct hinic_sq_task) + \
 		 (nr_sges) * sizeof(struct hinic_sq_bufdesc))
 
-#define HINIC_SCMD_DATA_LEN             16
+#define HINIC_SCMD_DATA_LEN                     16
+
+#define HINIC_MAX_SQ_BUFDESCS                   17
 
-#define HINIC_MAX_SQ_BUFDESCS           17
+#define HINIC_SQ_WQE_MAX_SIZE                   320
+#define HINIC_RQ_WQE_SIZE                       32
 
-#define HINIC_SQ_WQE_MAX_SIZE           320
-#define HINIC_RQ_WQE_SIZE               32
+#define HINIC_MSS_DEFAULT		        0x3E00
+#define HINIC_MSS_MIN		                0x50
 
 enum hinic_l4offload_type {
 	HINIC_L4_OFF_DISABLE            = 0,
@@ -211,6 +232,26 @@ enum hinic_pkt_parsed {
 	HINIC_PKT_PARSED     = 1,
 };
 
+enum hinic_l3_offload_type {
+	L3TYPE_UNKNOWN = 0,
+	IPV6_PKT = 1,
+	IPV4_PKT_NO_CHKSUM_OFFLOAD = 2,
+	IPV4_PKT_WITH_CHKSUM_OFFLOAD = 3,
+};
+
+enum hinic_l4_offload_type {
+	OFFLOAD_DISABLE     = 0,
+	TCP_OFFLOAD_ENABLE  = 1,
+	SCTP_OFFLOAD_ENABLE = 2,
+	UDP_OFFLOAD_ENABLE  = 3,
+};
+
+enum hinic_l4_tunnel_type {
+	NOT_TUNNEL,
+	TUNNEL_UDP_NO_CSUM,
+	TUNNEL_UDP_CSUM,
+};
+
 enum hinic_outer_l3type {
 	HINIC_OUTER_L3TYPE_UNKNOWN              = 0,
 	HINIC_OUTER_L3TYPE_IPV6                 = 1,
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c
index 4a8f82938ed5..fdf2bdb6b0d0 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_main.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c
@@ -805,7 +805,8 @@ static const struct net_device_ops hinic_netdev_ops = {
 
 static void netdev_features_init(struct net_device *netdev)
 {
-	netdev->hw_features = NETIF_F_SG | NETIF_F_HIGHDMA;
+	netdev->hw_features = NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_IP_CSUM |
+			      NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6;
 
 	netdev->vlan_features = netdev->hw_features;
 
@@ -863,6 +864,20 @@ static void link_status_event_handler(void *handle, void *buf_in, u16 in_size,
 	*out_size = sizeof(*ret_link_status);
 }
 
+static int set_features(struct hinic_dev *nic_dev,
+			netdev_features_t pre_features,
+			netdev_features_t features, bool force_change)
+{
+	netdev_features_t changed = force_change ? ~0 : pre_features ^ features;
+	int err = 0;
+
+	if (changed & NETIF_F_TSO)
+		err = hinic_port_set_tso(nic_dev, (features & NETIF_F_TSO) ?
+					 HINIC_TSO_ENABLE : HINIC_TSO_DISABLE);
+
+	return err;
+}
+
 /**
  * nic_dev_init - Initialize the NIC device
  * @pdev: the NIC pci device
@@ -963,7 +978,12 @@ static int nic_dev_init(struct pci_dev *pdev)
 	hinic_hwdev_cb_register(nic_dev->hwdev, HINIC_MGMT_MSG_CMD_LINK_STATUS,
 				nic_dev, link_status_event_handler);
 
+	err = set_features(nic_dev, 0, nic_dev->netdev->features, true);
+	if (err)
+		goto err_set_features;
+
 	SET_NETDEV_DEV(netdev, &pdev->dev);
+
 	err = register_netdev(netdev);
 	if (err) {
 		dev_err(&pdev->dev, "Failed to register netdev\n");
@@ -973,6 +993,7 @@ static int nic_dev_init(struct pci_dev *pdev)
 	return 0;
 
 err_reg_netdev:
+err_set_features:
 	hinic_hwdev_cb_unregister(nic_dev->hwdev,
 				  HINIC_MGMT_MSG_CMD_LINK_STATUS);
 	cancel_work_sync(&rx_mode_work->work);
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.c b/drivers/net/ethernet/huawei/hinic/hinic_port.c
index 4d4e3f05fb5f..7575a7d3bd9f 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_port.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_port.c
@@ -377,3 +377,35 @@ int hinic_port_get_cap(struct hinic_dev *nic_dev,
 
 	return 0;
 }
+
+/**
+ * hinic_port_set_tso - set port tso configuration
+ * @nic_dev: nic device
+ * @state: the tso state to set
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_port_set_tso(struct hinic_dev *nic_dev, enum hinic_tso_state state)
+{
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct hinic_tso_config tso_cfg = {0};
+	struct pci_dev *pdev = hwif->pdev;
+	u16 out_size;
+	int err;
+
+	tso_cfg.func_id = HINIC_HWIF_FUNC_IDX(hwif);
+	tso_cfg.tso_en = state;
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_TSO,
+				 &tso_cfg, sizeof(tso_cfg),
+				 &tso_cfg, &out_size);
+	if (err || out_size != sizeof(tso_cfg) || tso_cfg.status) {
+		dev_err(&pdev->dev,
+			"Failed to set port tso, ret = %d\n",
+			tso_cfg.status);
+		return -EINVAL;
+	}
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.h b/drivers/net/ethernet/huawei/hinic/hinic_port.h
index 9404365195dd..f6e3220fe28f 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_port.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_port.h
@@ -72,6 +72,11 @@ enum hinic_speed {
 	HINIC_SPEED_UNKNOWN = 0xFF,
 };
 
+enum hinic_tso_state {
+	HINIC_TSO_DISABLE = 0,
+	HINIC_TSO_ENABLE  = 1,
+};
+
 struct hinic_port_mac_cmd {
 	u8              status;
 	u8              version;
@@ -167,6 +172,17 @@ struct hinic_port_cap {
 	u8      rsvd2[3];
 };
 
+struct hinic_tso_config {
+	u8	status;
+	u8	version;
+	u8	rsvd0[6];
+
+	u16	func_id;
+	u16	rsvd1;
+	u8	tso_en;
+	u8	resv2[3];
+};
+
 int hinic_port_add_mac(struct hinic_dev *nic_dev, const u8 *addr,
 		       u16 vlan_id);
 
@@ -195,4 +211,6 @@ int hinic_port_set_func_state(struct hinic_dev *nic_dev,
 int hinic_port_get_cap(struct hinic_dev *nic_dev,
 		       struct hinic_port_cap *port_cap);
 
+int hinic_port_set_tso(struct hinic_dev *nic_dev, enum hinic_tso_state state);
+
 #endif
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
index c5fca0356c9c..11e73e67358d 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
@@ -26,6 +26,13 @@
 #include <linux/skbuff.h>
 #include <linux/smp.h>
 #include <asm/byteorder.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/sctp.h>
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+#include <net/checksum.h>
+#include <net/ip6_checksum.h>
 
 #include "hinic_common.h"
 #include "hinic_hw_if.h"
@@ -45,9 +52,31 @@
 #define CI_UPDATE_NO_PENDING            0
 #define CI_UPDATE_NO_COALESC            0
 
-#define HW_CONS_IDX(sq)         be16_to_cpu(*(u16 *)((sq)->hw_ci_addr))
+#define HW_CONS_IDX(sq)                 be16_to_cpu(*(u16 *)((sq)->hw_ci_addr))
 
-#define MIN_SKB_LEN             64
+#define MIN_SKB_LEN                     17
+
+#define	MAX_PAYLOAD_OFFSET	        221
+#define TRANSPORT_OFFSET(l4_hdr, skb)	((u32)((l4_hdr) - (skb)->data))
+
+union hinic_l3 {
+	struct iphdr *v4;
+	struct ipv6hdr *v6;
+	unsigned char *hdr;
+};
+
+union hinic_l4 {
+	struct tcphdr *tcp;
+	struct udphdr *udp;
+	unsigned char *hdr;
+};
+
+enum hinic_offload_type {
+	TX_OFFLOAD_TSO     = BIT(0),
+	TX_OFFLOAD_CSUM    = BIT(1),
+	TX_OFFLOAD_VLAN    = BIT(2),
+	TX_OFFLOAD_INVALID = BIT(3),
+};
 
 /**
  * hinic_txq_clean_stats - Clean the statistics of specific queue
@@ -175,18 +204,263 @@ static void tx_unmap_skb(struct hinic_dev *nic_dev, struct sk_buff *skb,
 			 DMA_TO_DEVICE);
 }
 
+static void get_inner_l3_l4_type(struct sk_buff *skb, union hinic_l3 *ip,
+				 union hinic_l4 *l4,
+				 enum hinic_offload_type offload_type,
+				 enum hinic_l3_offload_type *l3_type,
+				 u8 *l4_proto)
+{
+	u8 *exthdr;
+
+	if (ip->v4->version == 4) {
+		*l3_type = (offload_type == TX_OFFLOAD_CSUM) ?
+			   IPV4_PKT_NO_CHKSUM_OFFLOAD :
+			   IPV4_PKT_WITH_CHKSUM_OFFLOAD;
+		*l4_proto = ip->v4->protocol;
+	} else if (ip->v4->version == 6) {
+		*l3_type = IPV6_PKT;
+		exthdr = ip->hdr + sizeof(*ip->v6);
+		*l4_proto = ip->v6->nexthdr;
+		if (exthdr != l4->hdr) {
+			int start = exthdr - skb->data;
+			__be16 frag_off;
+
+			ipv6_skip_exthdr(skb, start, l4_proto, &frag_off);
+		}
+	} else {
+		*l3_type = L3TYPE_UNKNOWN;
+		*l4_proto = 0;
+	}
+}
+
+static void get_inner_l4_info(struct sk_buff *skb, union hinic_l4 *l4,
+			      enum hinic_offload_type offload_type, u8 l4_proto,
+			      enum hinic_l4_offload_type *l4_offload,
+			      u32 *l4_len, u32 *offset)
+{
+	*l4_offload = OFFLOAD_DISABLE;
+	*offset = 0;
+	*l4_len = 0;
+
+	switch (l4_proto) {
+	case IPPROTO_TCP:
+		*l4_offload = TCP_OFFLOAD_ENABLE;
+		/* doff in unit of 4B */
+		*l4_len = l4->tcp->doff * 4;
+		*offset = *l4_len + TRANSPORT_OFFSET(l4->hdr, skb);
+		break;
+
+	case IPPROTO_UDP:
+		*l4_offload = UDP_OFFLOAD_ENABLE;
+		*l4_len = sizeof(struct udphdr);
+		*offset = TRANSPORT_OFFSET(l4->hdr, skb);
+		break;
+
+	case IPPROTO_SCTP:
+		/* only csum offload support sctp */
+		if (offload_type != TX_OFFLOAD_CSUM)
+			break;
+
+		*l4_offload = SCTP_OFFLOAD_ENABLE;
+		*l4_len = sizeof(struct sctphdr);
+		*offset = TRANSPORT_OFFSET(l4->hdr, skb);
+		break;
+
+	default:
+		break;
+	}
+}
+
+static __sum16 csum_magic(union hinic_l3 *ip, unsigned short proto)
+{
+	return (ip->v4->version == 4) ?
+		csum_tcpudp_magic(ip->v4->saddr, ip->v4->daddr, 0, proto, 0) :
+		csum_ipv6_magic(&ip->v6->saddr, &ip->v6->daddr, 0, proto, 0);
+}
+
+static int offload_tso(struct hinic_sq_task *task, u32 *queue_info,
+		       struct sk_buff *skb)
+{
+	u32 offset, l4_len, ip_identify, network_hdr_len;
+	enum hinic_l3_offload_type l3_offload;
+	enum hinic_l4_offload_type l4_offload;
+	union hinic_l3 ip;
+	union hinic_l4 l4;
+	u8 l4_proto;
+
+	if (!skb_is_gso(skb))
+		return 0;
+
+	if (skb_cow_head(skb, 0) < 0)
+		return -EPROTONOSUPPORT;
+
+	if (skb->encapsulation) {
+		u32 gso_type = skb_shinfo(skb)->gso_type;
+		u32 tunnel_type = 0;
+		u32 l4_tunnel_len;
+
+		ip.hdr = skb_network_header(skb);
+		l4.hdr = skb_transport_header(skb);
+		network_hdr_len = skb_inner_network_header_len(skb);
+
+		if (ip.v4->version == 4) {
+			ip.v4->tot_len = 0;
+			l3_offload = IPV4_PKT_WITH_CHKSUM_OFFLOAD;
+		} else if (ip.v4->version == 6) {
+			l3_offload = IPV6_PKT;
+		} else {
+			l3_offload = 0;
+		}
+
+		hinic_task_set_outter_l3(task, l3_offload,
+					 skb_network_header_len(skb));
+
+		if (gso_type & SKB_GSO_UDP_TUNNEL_CSUM) {
+			l4.udp->check = ~csum_magic(&ip, IPPROTO_UDP);
+			tunnel_type = TUNNEL_UDP_CSUM;
+		} else if (gso_type & SKB_GSO_UDP_TUNNEL) {
+			tunnel_type = TUNNEL_UDP_NO_CSUM;
+		}
+
+		l4_tunnel_len = skb_inner_network_offset(skb) -
+				skb_transport_offset(skb);
+		hinic_task_set_tunnel_l4(task, tunnel_type, l4_tunnel_len);
+
+		ip.hdr = skb_inner_network_header(skb);
+		l4.hdr = skb_inner_transport_header(skb);
+	} else {
+		ip.hdr = skb_network_header(skb);
+		l4.hdr = skb_transport_header(skb);
+		network_hdr_len = skb_network_header_len(skb);
+	}
+
+	/* initialize inner IP header fields */
+	if (ip.v4->version == 4)
+		ip.v4->tot_len = 0;
+	else
+		ip.v6->payload_len = 0;
+
+	get_inner_l3_l4_type(skb, &ip, &l4, TX_OFFLOAD_TSO, &l3_offload,
+			     &l4_proto);
+
+	hinic_task_set_inner_l3(task, l3_offload, network_hdr_len);
+
+	ip_identify = 0;
+	if (l4_proto == IPPROTO_TCP)
+		l4.tcp->check = ~csum_magic(&ip, IPPROTO_TCP);
+
+	get_inner_l4_info(skb, &l4, TX_OFFLOAD_TSO, l4_proto, &l4_offload,
+			  &l4_len, &offset);
+
+	hinic_set_tso_inner_l4(task, queue_info, l4_offload, l4_len, offset,
+			       ip_identify, skb_shinfo(skb)->gso_size);
+
+	return 1;
+}
+
+static int offload_csum(struct hinic_sq_task *task, u32 *queue_info,
+			struct sk_buff *skb)
+{
+	enum hinic_l4_offload_type l4_offload;
+	u32 offset, l4_len, network_hdr_len;
+	enum hinic_l3_offload_type l3_type;
+	union hinic_l3 ip;
+	union hinic_l4 l4;
+	u8 l4_proto;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return 0;
+
+	if (skb->encapsulation) {
+		u32 l4_tunnel_len;
+
+		ip.hdr = skb_network_header(skb);
+
+		if (ip.v4->version == 4)
+			l3_type = IPV4_PKT_NO_CHKSUM_OFFLOAD;
+		else if (ip.v4->version == 6)
+			l3_type = IPV6_PKT;
+		else
+			l3_type = L3TYPE_UNKNOWN;
+
+		hinic_task_set_outter_l3(task, l3_type,
+					 skb_network_header_len(skb));
+
+		l4_tunnel_len = skb_inner_network_offset(skb) -
+				skb_transport_offset(skb);
+
+		hinic_task_set_tunnel_l4(task, TUNNEL_UDP_NO_CSUM,
+					 l4_tunnel_len);
+
+		ip.hdr = skb_inner_network_header(skb);
+		l4.hdr = skb_inner_transport_header(skb);
+		network_hdr_len = skb_inner_network_header_len(skb);
+	} else {
+		ip.hdr = skb_network_header(skb);
+		l4.hdr = skb_transport_header(skb);
+		network_hdr_len = skb_network_header_len(skb);
+	}
+
+	get_inner_l3_l4_type(skb, &ip, &l4, TX_OFFLOAD_CSUM, &l3_type,
+			     &l4_proto);
+
+	hinic_task_set_inner_l3(task, l3_type, network_hdr_len);
+
+	get_inner_l4_info(skb, &l4, TX_OFFLOAD_CSUM, l4_proto, &l4_offload,
+			  &l4_len, &offset);
+
+	hinic_set_cs_inner_l4(task, queue_info, l4_offload, l4_len, offset);
+
+	return 1;
+}
+
+static int hinic_tx_offload(struct sk_buff *skb, struct hinic_sq_task *task,
+			    u32 *queue_info)
+{
+	enum hinic_offload_type offload = 0;
+	int enabled;
+
+	enabled = offload_tso(task, queue_info, skb);
+	if (enabled > 0) {
+		offload |= TX_OFFLOAD_TSO;
+	} else if (enabled == 0) {
+		enabled = offload_csum(task, queue_info, skb);
+		if (enabled)
+			offload |= TX_OFFLOAD_CSUM;
+	} else {
+		return -EPROTONOSUPPORT;
+	}
+
+	if (offload)
+		hinic_task_set_l2hdr(task, skb_network_offset(skb));
+
+	/* payload offset should not more than 221 */
+	if (HINIC_SQ_CTRL_GET(*queue_info, QUEUE_INFO_PLDOFF) >
+	    MAX_PAYLOAD_OFFSET) {
+		return -EPROTONOSUPPORT;
+	}
+
+	/* mss should not less than 80 */
+	if (HINIC_SQ_CTRL_GET(*queue_info, QUEUE_INFO_MSS) < HINIC_MSS_MIN) {
+		*queue_info = HINIC_SQ_CTRL_CLEAR(*queue_info, QUEUE_INFO_MSS);
+		*queue_info |= HINIC_SQ_CTRL_SET(HINIC_MSS_MIN, QUEUE_INFO_MSS);
+	}
+
+	return 0;
+}
+
 netdev_tx_t hinic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 {
 	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	u16 prod_idx, q_id = skb->queue_mapping;
 	struct netdev_queue *netdev_txq;
 	int nr_sges, err = NETDEV_TX_OK;
 	struct hinic_sq_wqe *sq_wqe;
 	unsigned int wqe_size;
 	struct hinic_txq *txq;
 	struct hinic_qp *qp;
-	u16 prod_idx;
 
-	txq = &nic_dev->txqs[skb->queue_mapping];
+	txq = &nic_dev->txqs[q_id];
 	qp = container_of(txq->sq, struct hinic_qp, sq);
 
 	if (skb->len < MIN_SKB_LEN) {
@@ -236,15 +510,23 @@ netdev_tx_t hinic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 process_sq_wqe:
 	hinic_sq_prepare_wqe(txq->sq, prod_idx, sq_wqe, txq->sges, nr_sges);
 
+	err = hinic_tx_offload(skb, &sq_wqe->task, &sq_wqe->ctrl.queue_info);
+	if (err)
+		goto offload_error;
+
 	hinic_sq_write_wqe(txq->sq, prod_idx, sq_wqe, skb, wqe_size);
 
 flush_skbs:
-	netdev_txq = netdev_get_tx_queue(netdev, skb->queue_mapping);
+	netdev_txq = netdev_get_tx_queue(netdev, q_id);
 	if ((!skb->xmit_more) || (netif_xmit_stopped(netdev_txq)))
 		hinic_sq_write_db(txq->sq, prod_idx, wqe_size, 0);
 
 	return err;
 
+offload_error:
+	hinic_sq_return_wqe(txq->sq, wqe_size);
+	tx_unmap_skb(nic_dev, skb, txq->sges);
+
 skb_error:
 	dev_kfree_skb_any(skb);
 
@@ -252,7 +534,8 @@ update_error_stats:
 	u64_stats_update_begin(&txq->txq_stats.syncp);
 	txq->txq_stats.tx_dropped++;
 	u64_stats_update_end(&txq->txq_stats.syncp);
-	return err;
+
+	return NETDEV_TX_OK;
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig
index b542aba6f0e8..fd3373d82a9e 100644
--- a/drivers/net/ethernet/intel/Kconfig
+++ b/drivers/net/ethernet/intel/Kconfig
@@ -68,6 +68,9 @@ config E1000E
 
 	  <http://support.intel.com>
 
+	  More specific information on configuring the driver is in
+	  <file:Documentation/networking/e1000e.rst>.
+
 	  To compile this driver as a module, choose M here. The module
 	  will be called e1000e.
 
@@ -94,7 +97,7 @@ config IGB
 	  <http://support.intel.com>
 
 	  More specific information on configuring the driver is in
-	  <file:Documentation/networking/e1000.rst>.
+	  <file:Documentation/networking/igb.rst>.
 
 	  To compile this driver as a module, choose M here. The module
 	  will be called igb.
@@ -130,7 +133,7 @@ config IGBVF
 	  <http://support.intel.com>
 
 	  More specific information on configuring the driver is in
-	  <file:Documentation/networking/e1000.rst>.
+	  <file:Documentation/networking/igbvf.rst>.
 
 	  To compile this driver as a module, choose M here. The module
 	  will be called igbvf.
@@ -147,7 +150,7 @@ config IXGB
 	  <http://support.intel.com>
 
 	  More specific information on configuring the driver is in
-	  <file:Documentation/networking/ixgb.txt>.
+	  <file:Documentation/networking/ixgb.rst>.
 
 	  To compile this driver as a module, choose M here. The module
 	  will be called ixgb.
@@ -164,6 +167,9 @@ config IXGBE
 
 	  <http://support.intel.com>
 
+	  More specific information on configuring the driver is in
+	  <file:Documentation/networking/ixgbe.rst>.
+
 	  To compile this driver as a module, choose M here. The module
 	  will be called ixgbe.
 
@@ -205,7 +211,7 @@ config IXGBEVF
 	  <http://support.intel.com>
 
 	  More specific information on configuring the driver is in
-	  <file:Documentation/networking/ixgbevf.txt>.
+	  <file:Documentation/networking/ixgbevf.rst>.
 
 	  To compile this driver as a module, choose M here. The module
 	  will be called ixgbevf.  MSI-X interrupt support is required
@@ -222,6 +228,9 @@ config I40E
 
 	  <http://support.intel.com>
 
+	  More specific information on configuring the driver is in
+	  <file:Documentation/networking/i40e.rst>.
+
 	  To compile this driver as a module, choose M here. The module
 	  will be called i40e.
 
@@ -254,6 +263,9 @@ config I40EVF
 
 	  This driver was formerly named i40evf.
 
+	  More specific information on configuring the driver is in
+	  <file:Documentation/networking/iavf.rst>.
+
 	  To compile this driver as a module, choose M here. The module
 	  will be called iavf.  MSI-X interrupt support is required
 	  for this driver to work correctly.
@@ -269,6 +281,9 @@ config ICE
 
 	  <http://support.intel.com>
 
+	  More specific information on configuring the driver is in
+	  <file:Documentation/networking/ice.rst>.
+
 	  To compile this driver as a module, choose M here. The module
 	  will be called ice.
 
@@ -284,7 +299,26 @@ config FM10K
 
 	  <http://support.intel.com>
 
+	  More specific information on configuring the driver is in
+	  <file:Documentation/networking/fm10k.rst>.
+
 	  To compile this driver as a module, choose M here. The module
 	  will be called fm10k.  MSI-X interrupt support is required
 
+config IGC
+	tristate "Intel(R) Ethernet Controller I225-LM/I225-V support"
+	default n
+	depends on PCI
+	---help---
+	  This driver supports Intel(R) Ethernet Controller I225-LM/I225-V
+	  family of adapters.
+
+	  For more information on how to identify your adapter, go
+	  to the Adapter & Driver ID Guide that can be located at:
+
+	  <http://support.intel.com>
+
+	  To compile this driver as a module, choose M here. The module
+	  will be called igc.
+
 endif # NET_VENDOR_INTEL
diff --git a/drivers/net/ethernet/intel/Makefile b/drivers/net/ethernet/intel/Makefile
index b91153df6ee8..3075290063f6 100644
--- a/drivers/net/ethernet/intel/Makefile
+++ b/drivers/net/ethernet/intel/Makefile
@@ -7,6 +7,7 @@ obj-$(CONFIG_E100) += e100.o
 obj-$(CONFIG_E1000) += e1000/
 obj-$(CONFIG_E1000E) += e1000e/
 obj-$(CONFIG_IGB) += igb/
+obj-$(CONFIG_IGC) += igc/
 obj-$(CONFIG_IGBVF) += igbvf/
 obj-$(CONFIG_IXGBE) += ixgbe/
 obj-$(CONFIG_IXGBEVF) += ixgbevf/
diff --git a/drivers/net/ethernet/intel/igc/Makefile b/drivers/net/ethernet/intel/igc/Makefile
new file mode 100644
index 000000000000..4387f6ba8e67
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c)  2018 Intel Corporation
+
+#
+# Intel(R) I225-LM/I225-V 2.5G Ethernet Controller
+#
+
+obj-$(CONFIG_IGC) += igc.o
+
+igc-objs := igc_main.o igc_mac.o igc_i225.o igc_base.o igc_nvm.o igc_phy.o
diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
new file mode 100644
index 000000000000..cdf18a5d9e08
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -0,0 +1,443 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c)  2018 Intel Corporation */
+
+#ifndef _IGC_H_
+#define _IGC_H_
+
+#include <linux/kobject.h>
+
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/vmalloc.h>
+
+#include <linux/ethtool.h>
+
+#include <linux/sctp.h>
+
+#define IGC_ERR(args...) pr_err("igc: " args)
+
+#define PFX "igc: "
+
+#include <linux/timecounter.h>
+#include <linux/net_tstamp.h>
+#include <linux/ptp_clock_kernel.h>
+
+#include "igc_hw.h"
+
+/* main */
+extern char igc_driver_name[];
+extern char igc_driver_version[];
+
+/* Interrupt defines */
+#define IGC_START_ITR			648 /* ~6000 ints/sec */
+#define IGC_FLAG_HAS_MSI		BIT(0)
+#define IGC_FLAG_QUEUE_PAIRS		BIT(4)
+#define IGC_FLAG_NEED_LINK_UPDATE	BIT(9)
+#define IGC_FLAG_MEDIA_RESET		BIT(10)
+#define IGC_FLAG_MAS_ENABLE		BIT(12)
+#define IGC_FLAG_HAS_MSIX		BIT(13)
+#define IGC_FLAG_VLAN_PROMISC		BIT(15)
+
+#define IGC_START_ITR			648 /* ~6000 ints/sec */
+#define IGC_4K_ITR			980
+#define IGC_20K_ITR			196
+#define IGC_70K_ITR			56
+
+#define IGC_DEFAULT_ITR		3 /* dynamic */
+#define IGC_MAX_ITR_USECS	10000
+#define IGC_MIN_ITR_USECS	10
+#define NON_Q_VECTORS		1
+#define MAX_MSIX_ENTRIES	10
+
+/* TX/RX descriptor defines */
+#define IGC_DEFAULT_TXD		256
+#define IGC_DEFAULT_TX_WORK	128
+#define IGC_MIN_TXD		80
+#define IGC_MAX_TXD		4096
+
+#define IGC_DEFAULT_RXD		256
+#define IGC_MIN_RXD		80
+#define IGC_MAX_RXD		4096
+
+/* Transmit and receive queues */
+#define IGC_MAX_RX_QUEUES		4
+#define IGC_MAX_TX_QUEUES		4
+
+#define MAX_Q_VECTORS			8
+#define MAX_STD_JUMBO_FRAME_SIZE	9216
+
+/* Supported Rx Buffer Sizes */
+#define IGC_RXBUFFER_256		256
+#define IGC_RXBUFFER_2048		2048
+#define IGC_RXBUFFER_3072		3072
+
+#define IGC_RX_HDR_LEN			IGC_RXBUFFER_256
+
+/* RX and TX descriptor control thresholds.
+ * PTHRESH - MAC will consider prefetch if it has fewer than this number of
+ *           descriptors available in its onboard memory.
+ *           Setting this to 0 disables RX descriptor prefetch.
+ * HTHRESH - MAC will only prefetch if there are at least this many descriptors
+ *           available in host memory.
+ *           If PTHRESH is 0, this should also be 0.
+ * WTHRESH - RX descriptor writeback threshold - MAC will delay writing back
+ *           descriptors until either it has this many to write back, or the
+ *           ITR timer expires.
+ */
+#define IGC_RX_PTHRESH			8
+#define IGC_RX_HTHRESH			8
+#define IGC_TX_PTHRESH			8
+#define IGC_TX_HTHRESH			1
+#define IGC_RX_WTHRESH			4
+#define IGC_TX_WTHRESH			16
+
+#define IGC_RX_DMA_ATTR \
+	(DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
+
+#define IGC_TS_HDR_LEN			16
+
+#define IGC_SKB_PAD			(NET_SKB_PAD + NET_IP_ALIGN)
+
+#if (PAGE_SIZE < 8192)
+#define IGC_MAX_FRAME_BUILD_SKB \
+	(SKB_WITH_OVERHEAD(IGC_RXBUFFER_2048) - IGC_SKB_PAD - IGC_TS_HDR_LEN)
+#else
+#define IGC_MAX_FRAME_BUILD_SKB (IGC_RXBUFFER_2048 - IGC_TS_HDR_LEN)
+#endif
+
+/* How many Rx Buffers do we bundle into one write to the hardware ? */
+#define IGC_RX_BUFFER_WRITE	16 /* Must be power of 2 */
+
+/* igc_test_staterr - tests bits within Rx descriptor status and error fields */
+static inline __le32 igc_test_staterr(union igc_adv_rx_desc *rx_desc,
+				      const u32 stat_err_bits)
+{
+	return rx_desc->wb.upper.status_error & cpu_to_le32(stat_err_bits);
+}
+
+enum igc_state_t {
+	__IGC_TESTING,
+	__IGC_RESETTING,
+	__IGC_DOWN,
+	__IGC_PTP_TX_IN_PROGRESS,
+};
+
+enum igc_tx_flags {
+	/* cmd_type flags */
+	IGC_TX_FLAGS_VLAN	= 0x01,
+	IGC_TX_FLAGS_TSO	= 0x02,
+	IGC_TX_FLAGS_TSTAMP	= 0x04,
+
+	/* olinfo flags */
+	IGC_TX_FLAGS_IPV4	= 0x10,
+	IGC_TX_FLAGS_CSUM	= 0x20,
+};
+
+enum igc_boards {
+	board_base,
+};
+
+/* The largest size we can write to the descriptor is 65535.  In order to
+ * maintain a power of two alignment we have to limit ourselves to 32K.
+ */
+#define IGC_MAX_TXD_PWR		15
+#define IGC_MAX_DATA_PER_TXD	BIT(IGC_MAX_TXD_PWR)
+
+/* Tx Descriptors needed, worst case */
+#define TXD_USE_COUNT(S)	DIV_ROUND_UP((S), IGC_MAX_DATA_PER_TXD)
+#define DESC_NEEDED	(MAX_SKB_FRAGS + 4)
+
+/* wrapper around a pointer to a socket buffer,
+ * so a DMA handle can be stored along with the buffer
+ */
+struct igc_tx_buffer {
+	union igc_adv_tx_desc *next_to_watch;
+	unsigned long time_stamp;
+	struct sk_buff *skb;
+	unsigned int bytecount;
+	u16 gso_segs;
+	__be16 protocol;
+
+	DEFINE_DMA_UNMAP_ADDR(dma);
+	DEFINE_DMA_UNMAP_LEN(len);
+	u32 tx_flags;
+};
+
+struct igc_rx_buffer {
+	dma_addr_t dma;
+	struct page *page;
+#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
+	__u32 page_offset;
+#else
+	__u16 page_offset;
+#endif
+	__u16 pagecnt_bias;
+};
+
+struct igc_tx_queue_stats {
+	u64 packets;
+	u64 bytes;
+	u64 restart_queue;
+	u64 restart_queue2;
+};
+
+struct igc_rx_queue_stats {
+	u64 packets;
+	u64 bytes;
+	u64 drops;
+	u64 csum_err;
+	u64 alloc_failed;
+};
+
+struct igc_rx_packet_stats {
+	u64 ipv4_packets;      /* IPv4 headers processed */
+	u64 ipv4e_packets;     /* IPv4E headers with extensions processed */
+	u64 ipv6_packets;      /* IPv6 headers processed */
+	u64 ipv6e_packets;     /* IPv6E headers with extensions processed */
+	u64 tcp_packets;       /* TCP headers processed */
+	u64 udp_packets;       /* UDP headers processed */
+	u64 sctp_packets;      /* SCTP headers processed */
+	u64 nfs_packets;       /* NFS headers processe */
+	u64 other_packets;
+};
+
+struct igc_ring_container {
+	struct igc_ring *ring;          /* pointer to linked list of rings */
+	unsigned int total_bytes;       /* total bytes processed this int */
+	unsigned int total_packets;     /* total packets processed this int */
+	u16 work_limit;                 /* total work allowed per interrupt */
+	u8 count;                       /* total number of rings in vector */
+	u8 itr;                         /* current ITR setting for ring */
+};
+
+struct igc_ring {
+	struct igc_q_vector *q_vector;  /* backlink to q_vector */
+	struct net_device *netdev;      /* back pointer to net_device */
+	struct device *dev;             /* device for dma mapping */
+	union {                         /* array of buffer info structs */
+		struct igc_tx_buffer *tx_buffer_info;
+		struct igc_rx_buffer *rx_buffer_info;
+	};
+	void *desc;                     /* descriptor ring memory */
+	unsigned long flags;            /* ring specific flags */
+	void __iomem *tail;             /* pointer to ring tail register */
+	dma_addr_t dma;                 /* phys address of the ring */
+	unsigned int size;              /* length of desc. ring in bytes */
+
+	u16 count;                      /* number of desc. in the ring */
+	u8 queue_index;                 /* logical index of the ring*/
+	u8 reg_idx;                     /* physical index of the ring */
+
+	/* everything past this point are written often */
+	u16 next_to_clean;
+	u16 next_to_use;
+	u16 next_to_alloc;
+
+	union {
+		/* TX */
+		struct {
+			struct igc_tx_queue_stats tx_stats;
+			struct u64_stats_sync tx_syncp;
+			struct u64_stats_sync tx_syncp2;
+		};
+		/* RX */
+		struct {
+			struct igc_rx_queue_stats rx_stats;
+			struct igc_rx_packet_stats pkt_stats;
+			struct u64_stats_sync rx_syncp;
+			struct sk_buff *skb;
+		};
+	};
+} ____cacheline_internodealigned_in_smp;
+
+struct igc_q_vector {
+	struct igc_adapter *adapter;    /* backlink */
+	void __iomem *itr_register;
+	u32 eims_value;                 /* EIMS mask value */
+
+	u16 itr_val;
+	u8 set_itr;
+
+	struct igc_ring_container rx, tx;
+
+	struct napi_struct napi;
+
+	struct rcu_head rcu;    /* to avoid race with update stats on free */
+	char name[IFNAMSIZ + 9];
+	struct net_device poll_dev;
+
+	/* for dynamic allocation of rings associated with this q_vector */
+	struct igc_ring ring[0] ____cacheline_internodealigned_in_smp;
+};
+
+struct igc_mac_addr {
+	u8 addr[ETH_ALEN];
+	u8 queue;
+	u8 state; /* bitmask */
+};
+
+#define IGC_MAC_STATE_DEFAULT	0x1
+#define IGC_MAC_STATE_MODIFIED	0x2
+#define IGC_MAC_STATE_IN_USE	0x4
+
+/* Board specific private data structure */
+struct igc_adapter {
+	struct net_device *netdev;
+
+	unsigned long state;
+	unsigned int flags;
+	unsigned int num_q_vectors;
+
+	struct msix_entry *msix_entries;
+
+	/* TX */
+	u16 tx_work_limit;
+	u32 tx_timeout_count;
+	int num_tx_queues;
+	struct igc_ring *tx_ring[IGC_MAX_TX_QUEUES];
+
+	/* RX */
+	int num_rx_queues;
+	struct igc_ring *rx_ring[IGC_MAX_RX_QUEUES];
+
+	struct timer_list watchdog_timer;
+	struct timer_list dma_err_timer;
+	struct timer_list phy_info_timer;
+
+	u16 link_speed;
+	u16 link_duplex;
+
+	u8 port_num;
+
+	u8 __iomem *io_addr;
+	/* Interrupt Throttle Rate */
+	u32 rx_itr_setting;
+	u32 tx_itr_setting;
+
+	struct work_struct reset_task;
+	struct work_struct watchdog_task;
+	struct work_struct dma_err_task;
+	bool fc_autoneg;
+
+	u8 tx_timeout_factor;
+
+	int msg_enable;
+	u32 max_frame_size;
+	u32 min_frame_size;
+
+	/* OS defined structs */
+	struct pci_dev *pdev;
+	/* lock for statistics */
+	spinlock_t stats64_lock;
+	struct rtnl_link_stats64 stats64;
+
+	/* structs defined in igc_hw.h */
+	struct igc_hw hw;
+	struct igc_hw_stats stats;
+
+	struct igc_q_vector *q_vector[MAX_Q_VECTORS];
+	u32 eims_enable_mask;
+	u32 eims_other;
+
+	u16 tx_ring_count;
+	u16 rx_ring_count;
+
+	u32 *shadow_vfta;
+
+	u32 rss_queues;
+
+	/* lock for RX network flow classification filter */
+	spinlock_t nfc_lock;
+
+	struct igc_mac_addr *mac_table;
+
+	unsigned long link_check_timeout;
+	struct igc_info ei;
+};
+
+/* igc_desc_unused - calculate if we have unused descriptors */
+static inline u16 igc_desc_unused(const struct igc_ring *ring)
+{
+	u16 ntc = ring->next_to_clean;
+	u16 ntu = ring->next_to_use;
+
+	return ((ntc > ntu) ? 0 : ring->count) + ntc - ntu - 1;
+}
+
+static inline s32 igc_get_phy_info(struct igc_hw *hw)
+{
+	if (hw->phy.ops.get_phy_info)
+		return hw->phy.ops.get_phy_info(hw);
+
+	return 0;
+}
+
+static inline s32 igc_reset_phy(struct igc_hw *hw)
+{
+	if (hw->phy.ops.reset)
+		return hw->phy.ops.reset(hw);
+
+	return 0;
+}
+
+static inline struct netdev_queue *txring_txq(const struct igc_ring *tx_ring)
+{
+	return netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index);
+}
+
+enum igc_ring_flags_t {
+	IGC_RING_FLAG_RX_3K_BUFFER,
+	IGC_RING_FLAG_RX_BUILD_SKB_ENABLED,
+	IGC_RING_FLAG_RX_SCTP_CSUM,
+	IGC_RING_FLAG_RX_LB_VLAN_BSWAP,
+	IGC_RING_FLAG_TX_CTX_IDX,
+	IGC_RING_FLAG_TX_DETECT_HANG
+};
+
+#define ring_uses_large_buffer(ring) \
+	test_bit(IGC_RING_FLAG_RX_3K_BUFFER, &(ring)->flags)
+
+#define ring_uses_build_skb(ring) \
+	test_bit(IGC_RING_FLAG_RX_BUILD_SKB_ENABLED, &(ring)->flags)
+
+static inline unsigned int igc_rx_bufsz(struct igc_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+	if (ring_uses_large_buffer(ring))
+		return IGC_RXBUFFER_3072;
+
+	if (ring_uses_build_skb(ring))
+		return IGC_MAX_FRAME_BUILD_SKB + IGC_TS_HDR_LEN;
+#endif
+	return IGC_RXBUFFER_2048;
+}
+
+static inline unsigned int igc_rx_pg_order(struct igc_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+	if (ring_uses_large_buffer(ring))
+		return 1;
+#endif
+	return 0;
+}
+
+static inline s32 igc_read_phy_reg(struct igc_hw *hw, u32 offset, u16 *data)
+{
+	if (hw->phy.ops.read_reg)
+		return hw->phy.ops.read_reg(hw, offset, data);
+
+	return 0;
+}
+
+#define igc_rx_pg_size(_ring) (PAGE_SIZE << igc_rx_pg_order(_ring))
+
+#define IGC_TXD_DCMD	(IGC_ADVTXD_DCMD_EOP | IGC_ADVTXD_DCMD_RS)
+
+#define IGC_RX_DESC(R, i)       \
+	(&(((union igc_adv_rx_desc *)((R)->desc))[i]))
+#define IGC_TX_DESC(R, i)       \
+	(&(((union igc_adv_tx_desc *)((R)->desc))[i]))
+#define IGC_TX_CTXTDESC(R, i)   \
+	(&(((struct igc_adv_tx_context_desc *)((R)->desc))[i]))
+
+#endif /* _IGC_H_ */
diff --git a/drivers/net/ethernet/intel/igc/igc_base.c b/drivers/net/ethernet/intel/igc/igc_base.c
new file mode 100644
index 000000000000..832da609d9a7
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_base.c
@@ -0,0 +1,541 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c)  2018 Intel Corporation */
+
+#include <linux/delay.h>
+
+#include "igc_hw.h"
+#include "igc_i225.h"
+#include "igc_mac.h"
+#include "igc_base.h"
+#include "igc.h"
+
+/**
+ * igc_set_pcie_completion_timeout - set pci-e completion timeout
+ * @hw: pointer to the HW structure
+ */
+static s32 igc_set_pcie_completion_timeout(struct igc_hw *hw)
+{
+	u32 gcr = rd32(IGC_GCR);
+	u16 pcie_devctl2;
+	s32 ret_val = 0;
+
+	/* only take action if timeout value is defaulted to 0 */
+	if (gcr & IGC_GCR_CMPL_TMOUT_MASK)
+		goto out;
+
+	/* if capabilities version is type 1 we can write the
+	 * timeout of 10ms to 200ms through the GCR register
+	 */
+	if (!(gcr & IGC_GCR_CAP_VER2)) {
+		gcr |= IGC_GCR_CMPL_TMOUT_10ms;
+		goto out;
+	}
+
+	/* for version 2 capabilities we need to write the config space
+	 * directly in order to set the completion timeout value for
+	 * 16ms to 55ms
+	 */
+	ret_val = igc_read_pcie_cap_reg(hw, PCIE_DEVICE_CONTROL2,
+					&pcie_devctl2);
+	if (ret_val)
+		goto out;
+
+	pcie_devctl2 |= PCIE_DEVICE_CONTROL2_16ms;
+
+	ret_val = igc_write_pcie_cap_reg(hw, PCIE_DEVICE_CONTROL2,
+					 &pcie_devctl2);
+out:
+	/* disable completion timeout resend */
+	gcr &= ~IGC_GCR_CMPL_TMOUT_RESEND;
+
+	wr32(IGC_GCR, gcr);
+
+	return ret_val;
+}
+
+/**
+ * igc_check_for_link_base - Check for link
+ * @hw: pointer to the HW structure
+ *
+ * If sgmii is enabled, then use the pcs register to determine link, otherwise
+ * use the generic interface for determining link.
+ */
+static s32 igc_check_for_link_base(struct igc_hw *hw)
+{
+	s32 ret_val = 0;
+
+	ret_val = igc_check_for_copper_link(hw);
+
+	return ret_val;
+}
+
+/**
+ * igc_reset_hw_base - Reset hardware
+ * @hw: pointer to the HW structure
+ *
+ * This resets the hardware into a known state.  This is a
+ * function pointer entry point called by the api module.
+ */
+static s32 igc_reset_hw_base(struct igc_hw *hw)
+{
+	s32 ret_val;
+	u32 ctrl;
+
+	/* Prevent the PCI-E bus from sticking if there is no TLP connection
+	 * on the last TLP read/write transaction when MAC is reset.
+	 */
+	ret_val = igc_disable_pcie_master(hw);
+	if (ret_val)
+		hw_dbg("PCI-E Master disable polling has failed.\n");
+
+	/* set the completion timeout for interface */
+	ret_val = igc_set_pcie_completion_timeout(hw);
+	if (ret_val)
+		hw_dbg("PCI-E Set completion timeout has failed.\n");
+
+	hw_dbg("Masking off all interrupts\n");
+	wr32(IGC_IMC, 0xffffffff);
+
+	wr32(IGC_RCTL, 0);
+	wr32(IGC_TCTL, IGC_TCTL_PSP);
+	wrfl();
+
+	usleep_range(10000, 20000);
+
+	ctrl = rd32(IGC_CTRL);
+
+	hw_dbg("Issuing a global reset to MAC\n");
+	wr32(IGC_CTRL, ctrl | IGC_CTRL_RST);
+
+	ret_val = igc_get_auto_rd_done(hw);
+	if (ret_val) {
+		/* When auto config read does not complete, do not
+		 * return with an error. This can happen in situations
+		 * where there is no eeprom and prevents getting link.
+		 */
+		hw_dbg("Auto Read Done did not complete\n");
+	}
+
+	/* Clear any pending interrupt events. */
+	wr32(IGC_IMC, 0xffffffff);
+	rd32(IGC_ICR);
+
+	return ret_val;
+}
+
+/**
+ * igc_get_phy_id_base - Retrieve PHY addr and id
+ * @hw: pointer to the HW structure
+ *
+ * Retrieves the PHY address and ID for both PHY's which do and do not use
+ * sgmi interface.
+ */
+static s32 igc_get_phy_id_base(struct igc_hw *hw)
+{
+	s32  ret_val = 0;
+
+	ret_val = igc_get_phy_id(hw);
+
+	return ret_val;
+}
+
+/**
+ * igc_init_nvm_params_base - Init NVM func ptrs.
+ * @hw: pointer to the HW structure
+ */
+static s32 igc_init_nvm_params_base(struct igc_hw *hw)
+{
+	struct igc_nvm_info *nvm = &hw->nvm;
+	u32 eecd = rd32(IGC_EECD);
+	u16 size;
+
+	size = (u16)((eecd & IGC_EECD_SIZE_EX_MASK) >>
+		     IGC_EECD_SIZE_EX_SHIFT);
+
+	/* Added to a constant, "size" becomes the left-shift value
+	 * for setting word_size.
+	 */
+	size += NVM_WORD_SIZE_BASE_SHIFT;
+
+	/* Just in case size is out of range, cap it to the largest
+	 * EEPROM size supported
+	 */
+	if (size > 15)
+		size = 15;
+
+	nvm->word_size = BIT(size);
+	nvm->opcode_bits = 8;
+	nvm->delay_usec = 1;
+
+	nvm->page_size = eecd & IGC_EECD_ADDR_BITS ? 32 : 8;
+	nvm->address_bits = eecd & IGC_EECD_ADDR_BITS ?
+			    16 : 8;
+
+	if (nvm->word_size == BIT(15))
+		nvm->page_size = 128;
+
+	return 0;
+}
+
+/**
+ * igc_setup_copper_link_base - Configure copper link settings
+ * @hw: pointer to the HW structure
+ *
+ * Configures the link for auto-neg or forced speed and duplex.  Then we check
+ * for link, once link is established calls to configure collision distance
+ * and flow control are called.
+ */
+static s32 igc_setup_copper_link_base(struct igc_hw *hw)
+{
+	s32  ret_val = 0;
+	u32 ctrl;
+
+	ctrl = rd32(IGC_CTRL);
+	ctrl |= IGC_CTRL_SLU;
+	ctrl &= ~(IGC_CTRL_FRCSPD | IGC_CTRL_FRCDPX);
+	wr32(IGC_CTRL, ctrl);
+
+	ret_val = igc_setup_copper_link(hw);
+
+	return ret_val;
+}
+
+/**
+ * igc_init_mac_params_base - Init MAC func ptrs.
+ * @hw: pointer to the HW structure
+ */
+static s32 igc_init_mac_params_base(struct igc_hw *hw)
+{
+	struct igc_dev_spec_base *dev_spec = &hw->dev_spec._base;
+	struct igc_mac_info *mac = &hw->mac;
+
+	/* Set mta register count */
+	mac->mta_reg_count = 128;
+	mac->rar_entry_count = IGC_RAR_ENTRIES;
+
+	/* reset */
+	mac->ops.reset_hw = igc_reset_hw_base;
+
+	mac->ops.acquire_swfw_sync = igc_acquire_swfw_sync_i225;
+	mac->ops.release_swfw_sync = igc_release_swfw_sync_i225;
+
+	/* Allow a single clear of the SW semaphore on I225 */
+	if (mac->type == igc_i225)
+		dev_spec->clear_semaphore_once = true;
+
+	/* physical interface link setup */
+	mac->ops.setup_physical_interface = igc_setup_copper_link_base;
+
+	return 0;
+}
+
+/**
+ * igc_init_phy_params_base - Init PHY func ptrs.
+ * @hw: pointer to the HW structure
+ */
+static s32 igc_init_phy_params_base(struct igc_hw *hw)
+{
+	struct igc_phy_info *phy = &hw->phy;
+	s32 ret_val = 0;
+	u32 ctrl_ext;
+
+	if (hw->phy.media_type != igc_media_type_copper) {
+		phy->type = igc_phy_none;
+		goto out;
+	}
+
+	phy->autoneg_mask	= AUTONEG_ADVERTISE_SPEED_DEFAULT_2500;
+	phy->reset_delay_us	= 100;
+
+	ctrl_ext = rd32(IGC_CTRL_EXT);
+
+	/* set lan id */
+	hw->bus.func = (rd32(IGC_STATUS) & IGC_STATUS_FUNC_MASK) >>
+			IGC_STATUS_FUNC_SHIFT;
+
+	/* Make sure the PHY is in a good state. Several people have reported
+	 * firmware leaving the PHY's page select register set to something
+	 * other than the default of zero, which causes the PHY ID read to
+	 * access something other than the intended register.
+	 */
+	ret_val = hw->phy.ops.reset(hw);
+	if (ret_val) {
+		hw_dbg("Error resetting the PHY.\n");
+		goto out;
+	}
+
+	ret_val = igc_get_phy_id_base(hw);
+	if (ret_val)
+		return ret_val;
+
+	igc_check_for_link_base(hw);
+
+	/* Verify phy id and set remaining function pointers */
+	switch (phy->id) {
+	case I225_I_PHY_ID:
+		phy->type	= igc_phy_i225;
+		break;
+	default:
+		ret_val = -IGC_ERR_PHY;
+		goto out;
+	}
+
+out:
+	return ret_val;
+}
+
+static s32 igc_get_invariants_base(struct igc_hw *hw)
+{
+	struct igc_mac_info *mac = &hw->mac;
+	u32 link_mode = 0;
+	u32 ctrl_ext = 0;
+	s32 ret_val = 0;
+
+	switch (hw->device_id) {
+	case IGC_DEV_ID_I225_LM:
+	case IGC_DEV_ID_I225_V:
+		mac->type = igc_i225;
+		break;
+	default:
+		return -IGC_ERR_MAC_INIT;
+	}
+
+	hw->phy.media_type = igc_media_type_copper;
+
+	ctrl_ext = rd32(IGC_CTRL_EXT);
+	link_mode = ctrl_ext & IGC_CTRL_EXT_LINK_MODE_MASK;
+
+	/* mac initialization and operations */
+	ret_val = igc_init_mac_params_base(hw);
+	if (ret_val)
+		goto out;
+
+	/* NVM initialization */
+	ret_val = igc_init_nvm_params_base(hw);
+	switch (hw->mac.type) {
+	case igc_i225:
+		ret_val = igc_init_nvm_params_i225(hw);
+		break;
+	default:
+		break;
+	}
+
+	/* setup PHY parameters */
+	ret_val = igc_init_phy_params_base(hw);
+	if (ret_val)
+		goto out;
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_acquire_phy_base - Acquire rights to access PHY
+ * @hw: pointer to the HW structure
+ *
+ * Acquire access rights to the correct PHY.  This is a
+ * function pointer entry point called by the api module.
+ */
+static s32 igc_acquire_phy_base(struct igc_hw *hw)
+{
+	u16 mask = IGC_SWFW_PHY0_SM;
+
+	return hw->mac.ops.acquire_swfw_sync(hw, mask);
+}
+
+/**
+ * igc_release_phy_base - Release rights to access PHY
+ * @hw: pointer to the HW structure
+ *
+ * A wrapper to release access rights to the correct PHY.  This is a
+ * function pointer entry point called by the api module.
+ */
+static void igc_release_phy_base(struct igc_hw *hw)
+{
+	u16 mask = IGC_SWFW_PHY0_SM;
+
+	hw->mac.ops.release_swfw_sync(hw, mask);
+}
+
+/**
+ * igc_get_link_up_info_base - Get link speed/duplex info
+ * @hw: pointer to the HW structure
+ * @speed: stores the current speed
+ * @duplex: stores the current duplex
+ *
+ * This is a wrapper function, if using the serial gigabit media independent
+ * interface, use PCS to retrieve the link speed and duplex information.
+ * Otherwise, use the generic function to get the link speed and duplex info.
+ */
+static s32 igc_get_link_up_info_base(struct igc_hw *hw, u16 *speed,
+				     u16 *duplex)
+{
+	s32 ret_val;
+
+	ret_val = igc_get_speed_and_duplex_copper(hw, speed, duplex);
+
+	return ret_val;
+}
+
+/**
+ * igc_init_hw_base - Initialize hardware
+ * @hw: pointer to the HW structure
+ *
+ * This inits the hardware readying it for operation.
+ */
+static s32 igc_init_hw_base(struct igc_hw *hw)
+{
+	struct igc_mac_info *mac = &hw->mac;
+	u16 i, rar_count = mac->rar_entry_count;
+	s32 ret_val = 0;
+
+	/* Setup the receive address */
+	igc_init_rx_addrs(hw, rar_count);
+
+	/* Zero out the Multicast HASH table */
+	hw_dbg("Zeroing the MTA\n");
+	for (i = 0; i < mac->mta_reg_count; i++)
+		array_wr32(IGC_MTA, i, 0);
+
+	/* Zero out the Unicast HASH table */
+	hw_dbg("Zeroing the UTA\n");
+	for (i = 0; i < mac->uta_reg_count; i++)
+		array_wr32(IGC_UTA, i, 0);
+
+	/* Setup link and flow control */
+	ret_val = igc_setup_link(hw);
+
+	/* Clear all of the statistics registers (clear on read).  It is
+	 * important that we do this after we have tried to establish link
+	 * because the symbol error count will increment wildly if there
+	 * is no link.
+	 */
+	igc_clear_hw_cntrs_base(hw);
+
+	return ret_val;
+}
+
+/**
+ * igc_read_mac_addr_base - Read device MAC address
+ * @hw: pointer to the HW structure
+ */
+static s32 igc_read_mac_addr_base(struct igc_hw *hw)
+{
+	s32 ret_val = 0;
+
+	ret_val = igc_read_mac_addr(hw);
+
+	return ret_val;
+}
+
+/**
+ * igc_power_down_phy_copper_base - Remove link during PHY power down
+ * @hw: pointer to the HW structure
+ *
+ * In the case of a PHY power down to save power, or to turn off link during a
+ * driver unload, or wake on lan is not enabled, remove the link.
+ */
+void igc_power_down_phy_copper_base(struct igc_hw *hw)
+{
+	/* If the management interface is not enabled, then power down */
+	if (!(igc_enable_mng_pass_thru(hw) || igc_check_reset_block(hw)))
+		igc_power_down_phy_copper(hw);
+}
+
+/**
+ * igc_rx_fifo_flush_base - Clean rx fifo after Rx enable
+ * @hw: pointer to the HW structure
+ *
+ * After Rx enable, if manageability is enabled then there is likely some
+ * bad data at the start of the fifo and possibly in the DMA fifo.  This
+ * function clears the fifos and flushes any packets that came in as rx was
+ * being enabled.
+ */
+void igc_rx_fifo_flush_base(struct igc_hw *hw)
+{
+	u32 rctl, rlpml, rxdctl[4], rfctl, temp_rctl, rx_enabled;
+	int i, ms_wait;
+
+	/* disable IPv6 options as per hardware errata */
+	rfctl = rd32(IGC_RFCTL);
+	rfctl |= IGC_RFCTL_IPV6_EX_DIS;
+	wr32(IGC_RFCTL, rfctl);
+
+	if (!(rd32(IGC_MANC) & IGC_MANC_RCV_TCO_EN))
+		return;
+
+	/* Disable all Rx queues */
+	for (i = 0; i < 4; i++) {
+		rxdctl[i] = rd32(IGC_RXDCTL(i));
+		wr32(IGC_RXDCTL(i),
+		     rxdctl[i] & ~IGC_RXDCTL_QUEUE_ENABLE);
+	}
+	/* Poll all queues to verify they have shut down */
+	for (ms_wait = 0; ms_wait < 10; ms_wait++) {
+		usleep_range(1000, 2000);
+		rx_enabled = 0;
+		for (i = 0; i < 4; i++)
+			rx_enabled |= rd32(IGC_RXDCTL(i));
+		if (!(rx_enabled & IGC_RXDCTL_QUEUE_ENABLE))
+			break;
+	}
+
+	if (ms_wait == 10)
+		pr_debug("Queue disable timed out after 10ms\n");
+
+	/* Clear RLPML, RCTL.SBP, RFCTL.LEF, and set RCTL.LPE so that all
+	 * incoming packets are rejected.  Set enable and wait 2ms so that
+	 * any packet that was coming in as RCTL.EN was set is flushed
+	 */
+	wr32(IGC_RFCTL, rfctl & ~IGC_RFCTL_LEF);
+
+	rlpml = rd32(IGC_RLPML);
+	wr32(IGC_RLPML, 0);
+
+	rctl = rd32(IGC_RCTL);
+	temp_rctl = rctl & ~(IGC_RCTL_EN | IGC_RCTL_SBP);
+	temp_rctl |= IGC_RCTL_LPE;
+
+	wr32(IGC_RCTL, temp_rctl);
+	wr32(IGC_RCTL, temp_rctl | IGC_RCTL_EN);
+	wrfl();
+	usleep_range(2000, 3000);
+
+	/* Enable Rx queues that were previously enabled and restore our
+	 * previous state
+	 */
+	for (i = 0; i < 4; i++)
+		wr32(IGC_RXDCTL(i), rxdctl[i]);
+	wr32(IGC_RCTL, rctl);
+	wrfl();
+
+	wr32(IGC_RLPML, rlpml);
+	wr32(IGC_RFCTL, rfctl);
+
+	/* Flush receive errors generated by workaround */
+	rd32(IGC_ROC);
+	rd32(IGC_RNBC);
+	rd32(IGC_MPC);
+}
+
+static struct igc_mac_operations igc_mac_ops_base = {
+	.init_hw		= igc_init_hw_base,
+	.check_for_link		= igc_check_for_link_base,
+	.rar_set		= igc_rar_set,
+	.read_mac_addr		= igc_read_mac_addr_base,
+	.get_speed_and_duplex	= igc_get_link_up_info_base,
+};
+
+static const struct igc_phy_operations igc_phy_ops_base = {
+	.acquire		= igc_acquire_phy_base,
+	.release		= igc_release_phy_base,
+	.reset			= igc_phy_hw_reset,
+	.read_reg		= igc_read_phy_reg_gpy,
+	.write_reg		= igc_write_phy_reg_gpy,
+};
+
+const struct igc_info igc_base_info = {
+	.get_invariants		= igc_get_invariants_base,
+	.mac_ops		= &igc_mac_ops_base,
+	.phy_ops		= &igc_phy_ops_base,
+};
diff --git a/drivers/net/ethernet/intel/igc/igc_base.h b/drivers/net/ethernet/intel/igc/igc_base.h
new file mode 100644
index 000000000000..35588fa7b8c5
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_base.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c)  2018 Intel Corporation */
+
+#ifndef _IGC_BASE_H
+#define _IGC_BASE_H
+
+/* forward declaration */
+void igc_rx_fifo_flush_base(struct igc_hw *hw);
+void igc_power_down_phy_copper_base(struct igc_hw *hw);
+
+/* Transmit Descriptor - Advanced */
+union igc_adv_tx_desc {
+	struct {
+		__le64 buffer_addr;    /* Address of descriptor's data buf */
+		__le32 cmd_type_len;
+		__le32 olinfo_status;
+	} read;
+	struct {
+		__le64 rsvd;       /* Reserved */
+		__le32 nxtseq_seed;
+		__le32 status;
+	} wb;
+};
+
+/* Adv Transmit Descriptor Config Masks */
+#define IGC_ADVTXD_MAC_TSTAMP	0x00080000 /* IEEE1588 Timestamp packet */
+#define IGC_ADVTXD_DTYP_CTXT	0x00200000 /* Advanced Context Descriptor */
+#define IGC_ADVTXD_DTYP_DATA	0x00300000 /* Advanced Data Descriptor */
+#define IGC_ADVTXD_DCMD_EOP	0x01000000 /* End of Packet */
+#define IGC_ADVTXD_DCMD_IFCS	0x02000000 /* Insert FCS (Ethernet CRC) */
+#define IGC_ADVTXD_DCMD_RS	0x08000000 /* Report Status */
+#define IGC_ADVTXD_DCMD_DEXT	0x20000000 /* Descriptor extension (1=Adv) */
+#define IGC_ADVTXD_DCMD_VLE	0x40000000 /* VLAN pkt enable */
+#define IGC_ADVTXD_DCMD_TSE	0x80000000 /* TCP Seg enable */
+#define IGC_ADVTXD_PAYLEN_SHIFT	14 /* Adv desc PAYLEN shift */
+
+#define IGC_RAR_ENTRIES		16
+
+struct igc_adv_data_desc {
+	__le64 buffer_addr;    /* Address of the descriptor's data buffer */
+	union {
+		u32 data;
+		struct {
+			u32 datalen:16; /* Data buffer length */
+			u32 rsvd:4;
+			u32 dtyp:4;  /* Descriptor type */
+			u32 dcmd:8;  /* Descriptor command */
+		} config;
+	} lower;
+	union {
+		u32 data;
+		struct {
+			u32 status:4;  /* Descriptor status */
+			u32 idx:4;
+			u32 popts:6;  /* Packet Options */
+			u32 paylen:18; /* Payload length */
+		} options;
+	} upper;
+};
+
+/* Receive Descriptor - Advanced */
+union igc_adv_rx_desc {
+	struct {
+		__le64 pkt_addr; /* Packet buffer address */
+		__le64 hdr_addr; /* Header buffer address */
+	} read;
+	struct {
+		struct {
+			union {
+				__le32 data;
+				struct {
+					__le16 pkt_info; /*RSS type, Pkt type*/
+					/* Split Header, header buffer len */
+					__le16 hdr_info;
+				} hs_rss;
+			} lo_dword;
+			union {
+				__le32 rss; /* RSS Hash */
+				struct {
+					__le16 ip_id; /* IP id */
+					__le16 csum; /* Packet Checksum */
+				} csum_ip;
+			} hi_dword;
+		} lower;
+		struct {
+			__le32 status_error; /* ext status/error */
+			__le16 length; /* Packet length */
+			__le16 vlan; /* VLAN tag */
+		} upper;
+	} wb;  /* writeback */
+};
+
+/* Adv Transmit Descriptor Config Masks */
+#define IGC_ADVTXD_PAYLEN_SHIFT	14 /* Adv desc PAYLEN shift */
+
+/* Additional Transmit Descriptor Control definitions */
+#define IGC_TXDCTL_QUEUE_ENABLE	0x02000000 /* Ena specific Tx Queue */
+
+/* Additional Receive Descriptor Control definitions */
+#define IGC_RXDCTL_QUEUE_ENABLE	0x02000000 /* Ena specific Rx Queue */
+
+/* SRRCTL bit definitions */
+#define IGC_SRRCTL_BSIZEPKT_SHIFT		10 /* Shift _right_ */
+#define IGC_SRRCTL_BSIZEHDRSIZE_SHIFT		2  /* Shift _left_ */
+#define IGC_SRRCTL_DESCTYPE_ADV_ONEBUF	0x02000000
+
+#endif /* _IGC_BASE_H */
diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
new file mode 100644
index 000000000000..8740754ea1fd
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -0,0 +1,389 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c)  2018 Intel Corporation */
+
+#ifndef _IGC_DEFINES_H_
+#define _IGC_DEFINES_H_
+
+#define IGC_CTRL_EXT_DRV_LOAD	0x10000000 /* Drv loaded bit for FW */
+
+/* PCI Bus Info */
+#define PCIE_DEVICE_CONTROL2		0x28
+#define PCIE_DEVICE_CONTROL2_16ms	0x0005
+
+/* Physical Func Reset Done Indication */
+#define IGC_CTRL_EXT_LINK_MODE_MASK	0x00C00000
+
+/* Loop limit on how long we wait for auto-negotiation to complete */
+#define COPPER_LINK_UP_LIMIT		10
+#define PHY_AUTO_NEG_LIMIT		45
+#define PHY_FORCE_LIMIT			20
+
+/* Number of 100 microseconds we wait for PCI Express master disable */
+#define MASTER_DISABLE_TIMEOUT		800
+/*Blocks new Master requests */
+#define IGC_CTRL_GIO_MASTER_DISABLE	0x00000004
+/* Status of Master requests. */
+#define IGC_STATUS_GIO_MASTER_ENABLE	0x00080000
+
+/* PCI Express Control */
+#define IGC_GCR_CMPL_TMOUT_MASK		0x0000F000
+#define IGC_GCR_CMPL_TMOUT_10ms		0x00001000
+#define IGC_GCR_CMPL_TMOUT_RESEND	0x00010000
+#define IGC_GCR_CAP_VER2		0x00040000
+
+/* Receive Address
+ * Number of high/low register pairs in the RAR. The RAR (Receive Address
+ * Registers) holds the directed and multicast addresses that we monitor.
+ * Technically, we have 16 spots.  However, we reserve one of these spots
+ * (RAR[15]) for our directed address used by controllers with
+ * manageability enabled, allowing us room for 15 multicast addresses.
+ */
+#define IGC_RAH_AV		0x80000000 /* Receive descriptor valid */
+#define IGC_RAH_POOL_1		0x00040000
+#define IGC_RAL_MAC_ADDR_LEN	4
+#define IGC_RAH_MAC_ADDR_LEN	2
+
+/* Error Codes */
+#define IGC_SUCCESS			0
+#define IGC_ERR_NVM			1
+#define IGC_ERR_PHY			2
+#define IGC_ERR_CONFIG			3
+#define IGC_ERR_PARAM			4
+#define IGC_ERR_MAC_INIT		5
+#define IGC_ERR_RESET			9
+#define IGC_ERR_MASTER_REQUESTS_PENDING	10
+#define IGC_ERR_BLK_PHY_RESET		12
+#define IGC_ERR_SWFW_SYNC		13
+
+/* Device Control */
+#define IGC_CTRL_RST		0x04000000  /* Global reset */
+
+#define IGC_CTRL_PHY_RST	0x80000000  /* PHY Reset */
+#define IGC_CTRL_SLU		0x00000040  /* Set link up (Force Link) */
+#define IGC_CTRL_FRCSPD		0x00000800  /* Force Speed */
+#define IGC_CTRL_FRCDPX		0x00001000  /* Force Duplex */
+
+#define IGC_CTRL_RFCE		0x08000000  /* Receive Flow Control enable */
+#define IGC_CTRL_TFCE		0x10000000  /* Transmit flow control enable */
+
+#define IGC_CONNSW_AUTOSENSE_EN	0x1
+
+/* PBA constants */
+#define IGC_PBA_34K		0x0022
+
+/* SW Semaphore Register */
+#define IGC_SWSM_SMBI		0x00000001 /* Driver Semaphore bit */
+#define IGC_SWSM_SWESMBI	0x00000002 /* FW Semaphore bit */
+
+/* SWFW_SYNC Definitions */
+#define IGC_SWFW_EEP_SM		0x1
+#define IGC_SWFW_PHY0_SM	0x2
+
+/* Autoneg Advertisement Register */
+#define NWAY_AR_10T_HD_CAPS	0x0020   /* 10T   Half Duplex Capable */
+#define NWAY_AR_10T_FD_CAPS	0x0040   /* 10T   Full Duplex Capable */
+#define NWAY_AR_100TX_HD_CAPS	0x0080   /* 100TX Half Duplex Capable */
+#define NWAY_AR_100TX_FD_CAPS	0x0100   /* 100TX Full Duplex Capable */
+#define NWAY_AR_PAUSE		0x0400   /* Pause operation desired */
+#define NWAY_AR_ASM_DIR		0x0800   /* Asymmetric Pause Direction bit */
+
+/* Link Partner Ability Register (Base Page) */
+#define NWAY_LPAR_PAUSE		0x0400 /* LP Pause operation desired */
+#define NWAY_LPAR_ASM_DIR	0x0800 /* LP Asymmetric Pause Direction bit */
+
+/* 1000BASE-T Control Register */
+#define CR_1000T_ASYM_PAUSE	0x0080 /* Advertise asymmetric pause bit */
+#define CR_1000T_HD_CAPS	0x0100 /* Advertise 1000T HD capability */
+#define CR_1000T_FD_CAPS	0x0200 /* Advertise 1000T FD capability  */
+
+/* 1000BASE-T Status Register */
+#define SR_1000T_REMOTE_RX_STATUS	0x1000 /* Remote receiver OK */
+#define SR_1000T_LOCAL_RX_STATUS	0x2000 /* Local receiver OK */
+
+/* PHY GPY 211 registers */
+#define STANDARD_AN_REG_MASK	0x0007 /* MMD */
+#define ANEG_MULTIGBT_AN_CTRL	0x0020 /* MULTI GBT AN Control Register */
+#define MMD_DEVADDR_SHIFT	16     /* Shift MMD to higher bits */
+#define CR_2500T_FD_CAPS	0x0080 /* Advertise 2500T FD capability */
+
+/* NVM Control */
+/* Number of milliseconds for NVM auto read done after MAC reset. */
+#define AUTO_READ_DONE_TIMEOUT		10
+#define IGC_EECD_AUTO_RD		0x00000200  /* NVM Auto Read done */
+#define IGC_EECD_REQ		0x00000040 /* NVM Access Request */
+#define IGC_EECD_GNT		0x00000080 /* NVM Access Grant */
+/* NVM Addressing bits based on type 0=small, 1=large */
+#define IGC_EECD_ADDR_BITS		0x00000400
+#define IGC_NVM_GRANT_ATTEMPTS		1000 /* NVM # attempts to gain grant */
+#define IGC_EECD_SIZE_EX_MASK		0x00007800  /* NVM Size */
+#define IGC_EECD_SIZE_EX_SHIFT		11
+#define IGC_EECD_FLUPD_I225		0x00800000 /* Update FLASH */
+#define IGC_EECD_FLUDONE_I225		0x04000000 /* Update FLASH done*/
+#define IGC_EECD_FLASH_DETECTED_I225	0x00080000 /* FLASH detected */
+#define IGC_FLUDONE_ATTEMPTS		20000
+#define IGC_EERD_EEWR_MAX_COUNT		512 /* buffered EEPROM words rw */
+
+/* Offset to data in NVM read/write registers */
+#define IGC_NVM_RW_REG_DATA	16
+#define IGC_NVM_RW_REG_DONE	2    /* Offset to READ/WRITE done bit */
+#define IGC_NVM_RW_REG_START	1    /* Start operation */
+#define IGC_NVM_RW_ADDR_SHIFT	2    /* Shift to the address bits */
+#define IGC_NVM_POLL_READ	0    /* Flag for polling for read complete */
+
+/* NVM Word Offsets */
+#define NVM_CHECKSUM_REG		0x003F
+
+/* For checksumming, the sum of all words in the NVM should equal 0xBABA. */
+#define NVM_SUM				0xBABA
+
+#define NVM_PBA_OFFSET_0		8
+#define NVM_PBA_OFFSET_1		9
+#define NVM_RESERVED_WORD		0xFFFF
+#define NVM_PBA_PTR_GUARD		0xFAFA
+#define NVM_WORD_SIZE_BASE_SHIFT	6
+
+/* Collision related configuration parameters */
+#define IGC_COLLISION_THRESHOLD		15
+#define IGC_CT_SHIFT			4
+#define IGC_COLLISION_DISTANCE		63
+#define IGC_COLD_SHIFT			12
+
+/* Device Status */
+#define IGC_STATUS_FD		0x00000001      /* Full duplex.0=half,1=full */
+#define IGC_STATUS_LU		0x00000002      /* Link up.0=no,1=link */
+#define IGC_STATUS_FUNC_MASK	0x0000000C      /* PCI Function Mask */
+#define IGC_STATUS_FUNC_SHIFT	2
+#define IGC_STATUS_FUNC_1	0x00000004      /* Function 1 */
+#define IGC_STATUS_TXOFF	0x00000010      /* transmission paused */
+#define IGC_STATUS_SPEED_100	0x00000040      /* Speed 100Mb/s */
+#define IGC_STATUS_SPEED_1000	0x00000080      /* Speed 1000Mb/s */
+#define IGC_STATUS_SPEED_2500	0x00400000	/* Speed 2.5Gb/s */
+
+#define SPEED_10		10
+#define SPEED_100		100
+#define SPEED_1000		1000
+#define SPEED_2500		2500
+#define HALF_DUPLEX		1
+#define FULL_DUPLEX		2
+
+/* 1Gbps and 2.5Gbps half duplex is not supported, nor spec-compliant. */
+#define ADVERTISE_10_HALF		0x0001
+#define ADVERTISE_10_FULL		0x0002
+#define ADVERTISE_100_HALF		0x0004
+#define ADVERTISE_100_FULL		0x0008
+#define ADVERTISE_1000_HALF		0x0010 /* Not used, just FYI */
+#define ADVERTISE_1000_FULL		0x0020
+#define ADVERTISE_2500_HALF		0x0040 /* Not used, just FYI */
+#define ADVERTISE_2500_FULL		0x0080
+
+#define IGC_ALL_SPEED_DUPLEX_2500 ( \
+	ADVERTISE_10_HALF | ADVERTISE_10_FULL | ADVERTISE_100_HALF | \
+	ADVERTISE_100_FULL | ADVERTISE_1000_FULL | ADVERTISE_2500_FULL)
+
+#define AUTONEG_ADVERTISE_SPEED_DEFAULT_2500	IGC_ALL_SPEED_DUPLEX_2500
+
+/* Interrupt Cause Read */
+#define IGC_ICR_TXDW		BIT(0)	/* Transmit desc written back */
+#define IGC_ICR_TXQE		BIT(1)	/* Transmit Queue empty */
+#define IGC_ICR_LSC		BIT(2)	/* Link Status Change */
+#define IGC_ICR_RXSEQ		BIT(3)	/* Rx sequence error */
+#define IGC_ICR_RXDMT0		BIT(4)	/* Rx desc min. threshold (0) */
+#define IGC_ICR_RXO		BIT(6)	/* Rx overrun */
+#define IGC_ICR_RXT0		BIT(7)	/* Rx timer intr (ring 0) */
+#define IGC_ICR_DRSTA		BIT(30)	/* Device Reset Asserted */
+
+/* If this bit asserted, the driver should claim the interrupt */
+#define IGC_ICR_INT_ASSERTED	BIT(31)
+
+#define IGC_ICS_RXT0		IGC_ICR_RXT0 /* Rx timer intr */
+
+#define IMS_ENABLE_MASK ( \
+	IGC_IMS_RXT0   |    \
+	IGC_IMS_TXDW   |    \
+	IGC_IMS_RXDMT0 |    \
+	IGC_IMS_RXSEQ  |    \
+	IGC_IMS_LSC)
+
+/* Interrupt Mask Set */
+#define IGC_IMS_TXDW		IGC_ICR_TXDW	/* Tx desc written back */
+#define IGC_IMS_RXSEQ		IGC_ICR_RXSEQ	/* Rx sequence error */
+#define IGC_IMS_LSC		IGC_ICR_LSC	/* Link Status Change */
+#define IGC_IMS_DOUTSYNC	IGC_ICR_DOUTSYNC /* NIC DMA out of sync */
+#define IGC_IMS_DRSTA		IGC_ICR_DRSTA	/* Device Reset Asserted */
+#define IGC_IMS_RXT0		IGC_ICR_RXT0	/* Rx timer intr */
+#define IGC_IMS_RXDMT0		IGC_ICR_RXDMT0	/* Rx desc min. threshold */
+
+#define IGC_QVECTOR_MASK	0x7FFC		/* Q-vector mask */
+#define IGC_ITR_VAL_MASK	0x04		/* ITR value mask */
+
+/* Interrupt Cause Set */
+#define IGC_ICS_LSC		IGC_ICR_LSC       /* Link Status Change */
+#define IGC_ICS_RXDMT0		IGC_ICR_RXDMT0    /* rx desc min. threshold */
+#define IGC_ICS_DRSTA		IGC_ICR_DRSTA     /* Device Reset Aserted */
+
+#define IGC_ICR_DOUTSYNC	0x10000000 /* NIC DMA out of sync */
+#define IGC_EITR_CNT_IGNR	0x80000000 /* Don't reset counters on write */
+#define IGC_IVAR_VALID		0x80
+#define IGC_GPIE_NSICR		0x00000001
+#define IGC_GPIE_MSIX_MODE	0x00000010
+#define IGC_GPIE_EIAME		0x40000000
+#define IGC_GPIE_PBA		0x80000000
+
+/* Transmit Descriptor bit definitions */
+#define IGC_TXD_DTYP_D		0x00100000 /* Data Descriptor */
+#define IGC_TXD_DTYP_C		0x00000000 /* Context Descriptor */
+#define IGC_TXD_POPTS_IXSM	0x01       /* Insert IP checksum */
+#define IGC_TXD_POPTS_TXSM	0x02       /* Insert TCP/UDP checksum */
+#define IGC_TXD_CMD_EOP		0x01000000 /* End of Packet */
+#define IGC_TXD_CMD_IFCS	0x02000000 /* Insert FCS (Ethernet CRC) */
+#define IGC_TXD_CMD_IC		0x04000000 /* Insert Checksum */
+#define IGC_TXD_CMD_RS		0x08000000 /* Report Status */
+#define IGC_TXD_CMD_RPS		0x10000000 /* Report Packet Sent */
+#define IGC_TXD_CMD_DEXT	0x20000000 /* Desc extension (0 = legacy) */
+#define IGC_TXD_CMD_VLE		0x40000000 /* Add VLAN tag */
+#define IGC_TXD_CMD_IDE		0x80000000 /* Enable Tidv register */
+#define IGC_TXD_STAT_DD		0x00000001 /* Descriptor Done */
+#define IGC_TXD_STAT_EC		0x00000002 /* Excess Collisions */
+#define IGC_TXD_STAT_LC		0x00000004 /* Late Collisions */
+#define IGC_TXD_STAT_TU		0x00000008 /* Transmit underrun */
+#define IGC_TXD_CMD_TCP		0x01000000 /* TCP packet */
+#define IGC_TXD_CMD_IP		0x02000000 /* IP packet */
+#define IGC_TXD_CMD_TSE		0x04000000 /* TCP Seg enable */
+#define IGC_TXD_STAT_TC		0x00000004 /* Tx Underrun */
+#define IGC_TXD_EXTCMD_TSTAMP	0x00000010 /* IEEE1588 Timestamp packet */
+
+/* Transmit Control */
+#define IGC_TCTL_EN		0x00000002 /* enable Tx */
+#define IGC_TCTL_PSP		0x00000008 /* pad short packets */
+#define IGC_TCTL_CT		0x00000ff0 /* collision threshold */
+#define IGC_TCTL_COLD		0x003ff000 /* collision distance */
+#define IGC_TCTL_RTLC		0x01000000 /* Re-transmit on late collision */
+#define IGC_TCTL_MULR		0x10000000 /* Multiple request support */
+
+#define IGC_CT_SHIFT			4
+#define IGC_COLLISION_THRESHOLD		15
+
+/* Flow Control Constants */
+#define FLOW_CONTROL_ADDRESS_LOW	0x00C28001
+#define FLOW_CONTROL_ADDRESS_HIGH	0x00000100
+#define FLOW_CONTROL_TYPE		0x8808
+/* Enable XON frame transmission */
+#define IGC_FCRTL_XONE			0x80000000
+
+/* Management Control */
+#define IGC_MANC_RCV_TCO_EN	0x00020000 /* Receive TCO Packets Enabled */
+#define IGC_MANC_BLK_PHY_RST_ON_IDE	0x00040000 /* Block phy resets */
+
+/* Receive Control */
+#define IGC_RCTL_RST		0x00000001 /* Software reset */
+#define IGC_RCTL_EN		0x00000002 /* enable */
+#define IGC_RCTL_SBP		0x00000004 /* store bad packet */
+#define IGC_RCTL_UPE		0x00000008 /* unicast promisc enable */
+#define IGC_RCTL_MPE		0x00000010 /* multicast promisc enable */
+#define IGC_RCTL_LPE		0x00000020 /* long packet enable */
+#define IGC_RCTL_LBM_MAC	0x00000040 /* MAC loopback mode */
+#define IGC_RCTL_LBM_TCVR	0x000000C0 /* tcvr loopback mode */
+
+#define IGC_RCTL_RDMTS_HALF	0x00000000 /* Rx desc min thresh size */
+#define IGC_RCTL_BAM		0x00008000 /* broadcast enable */
+
+/* Receive Descriptor bit definitions */
+#define IGC_RXD_STAT_EOP	0x02    /* End of Packet */
+
+#define IGC_RXDEXT_STATERR_CE		0x01000000
+#define IGC_RXDEXT_STATERR_SE		0x02000000
+#define IGC_RXDEXT_STATERR_SEQ		0x04000000
+#define IGC_RXDEXT_STATERR_CXE		0x10000000
+#define IGC_RXDEXT_STATERR_TCPE		0x20000000
+#define IGC_RXDEXT_STATERR_IPE		0x40000000
+#define IGC_RXDEXT_STATERR_RXE		0x80000000
+
+/* Same mask, but for extended and packet split descriptors */
+#define IGC_RXDEXT_ERR_FRAME_ERR_MASK ( \
+	IGC_RXDEXT_STATERR_CE  |	\
+	IGC_RXDEXT_STATERR_SE  |	\
+	IGC_RXDEXT_STATERR_SEQ |	\
+	IGC_RXDEXT_STATERR_CXE |	\
+	IGC_RXDEXT_STATERR_RXE)
+
+/* Header split receive */
+#define IGC_RFCTL_IPV6_EX_DIS	0x00010000
+#define IGC_RFCTL_LEF		0x00040000
+
+#define IGC_RCTL_SZ_256		0x00030000 /* Rx buffer size 256 */
+
+#define IGC_RCTL_MO_SHIFT	12 /* multicast offset shift */
+#define IGC_RCTL_CFIEN		0x00080000 /* canonical form enable */
+#define IGC_RCTL_DPF		0x00400000 /* discard pause frames */
+#define IGC_RCTL_PMCF		0x00800000 /* pass MAC control frames */
+#define IGC_RCTL_SECRC		0x04000000 /* Strip Ethernet CRC */
+
+#define I225_RXPBSIZE_DEFAULT	0x000000A2 /* RXPBSIZE default */
+#define I225_TXPBSIZE_DEFAULT	0x04000014 /* TXPBSIZE default */
+
+/* GPY211 - I225 defines */
+#define GPY_MMD_MASK		0xFFFF0000
+#define GPY_MMD_SHIFT		16
+#define GPY_REG_MASK		0x0000FFFF
+
+#define IGC_MMDAC_FUNC_DATA	0x4000 /* Data, no post increment */
+
+/* MAC definitions */
+#define IGC_FACTPS_MNGCG	0x20000000
+#define IGC_FWSM_MODE_MASK	0xE
+#define IGC_FWSM_MODE_SHIFT	1
+
+/* Management Control */
+#define IGC_MANC_SMBUS_EN	0x00000001 /* SMBus Enabled - RO */
+#define IGC_MANC_ASF_EN		0x00000002 /* ASF Enabled - RO */
+
+/* PHY */
+#define PHY_REVISION_MASK	0xFFFFFFF0
+#define MAX_PHY_REG_ADDRESS	0x1F  /* 5 bit address bus (0-0x1F) */
+#define IGC_GEN_POLL_TIMEOUT	1920
+
+/* PHY Control Register */
+#define MII_CR_FULL_DUPLEX	0x0100  /* FDX =1, half duplex =0 */
+#define MII_CR_RESTART_AUTO_NEG	0x0200  /* Restart auto negotiation */
+#define MII_CR_POWER_DOWN	0x0800  /* Power down */
+#define MII_CR_AUTO_NEG_EN	0x1000  /* Auto Neg Enable */
+#define MII_CR_LOOPBACK		0x4000  /* 0 = normal, 1 = loopback */
+#define MII_CR_RESET		0x8000  /* 0 = normal, 1 = PHY reset */
+#define MII_CR_SPEED_1000	0x0040
+#define MII_CR_SPEED_100	0x2000
+#define MII_CR_SPEED_10		0x0000
+
+/* PHY Status Register */
+#define MII_SR_LINK_STATUS	0x0004 /* Link Status 1 = link */
+#define MII_SR_AUTONEG_COMPLETE	0x0020 /* Auto Neg Complete */
+
+/* PHY 1000 MII Register/Bit Definitions */
+/* PHY Registers defined by IEEE */
+#define PHY_CONTROL		0x00 /* Control Register */
+#define PHY_STATUS		0x01 /* Status Register */
+#define PHY_ID1			0x02 /* Phy Id Reg (word 1) */
+#define PHY_ID2			0x03 /* Phy Id Reg (word 2) */
+#define PHY_AUTONEG_ADV		0x04 /* Autoneg Advertisement */
+#define PHY_LP_ABILITY		0x05 /* Link Partner Ability (Base Page) */
+#define PHY_1000T_CTRL		0x09 /* 1000Base-T Control Reg */
+#define PHY_1000T_STATUS	0x0A /* 1000Base-T Status Reg */
+
+/* Bit definitions for valid PHY IDs. I = Integrated E = External */
+#define I225_I_PHY_ID		0x67C9DC00
+
+/* MDI Control */
+#define IGC_MDIC_DATA_MASK	0x0000FFFF
+#define IGC_MDIC_REG_MASK	0x001F0000
+#define IGC_MDIC_REG_SHIFT	16
+#define IGC_MDIC_PHY_MASK	0x03E00000
+#define IGC_MDIC_PHY_SHIFT	21
+#define IGC_MDIC_OP_WRITE	0x04000000
+#define IGC_MDIC_OP_READ	0x08000000
+#define IGC_MDIC_READY		0x10000000
+#define IGC_MDIC_INT_EN		0x20000000
+#define IGC_MDIC_ERROR		0x40000000
+#define IGC_MDIC_DEST		0x80000000
+
+#define IGC_N0_QUEUE -1
+
+#endif /* _IGC_DEFINES_H_ */
diff --git a/drivers/net/ethernet/intel/igc/igc_hw.h b/drivers/net/ethernet/intel/igc/igc_hw.h
new file mode 100644
index 000000000000..c50414f48f0d
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_hw.h
@@ -0,0 +1,321 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c)  2018 Intel Corporation */
+
+#ifndef _IGC_HW_H_
+#define _IGC_HW_H_
+
+#include <linux/types.h>
+#include <linux/if_ether.h>
+#include <linux/netdevice.h>
+
+#include "igc_regs.h"
+#include "igc_defines.h"
+#include "igc_mac.h"
+#include "igc_phy.h"
+#include "igc_nvm.h"
+#include "igc_i225.h"
+#include "igc_base.h"
+
+#define IGC_DEV_ID_I225_LM			0x15F2
+#define IGC_DEV_ID_I225_V			0x15F3
+
+#define IGC_FUNC_0				0
+
+/* Function pointers for the MAC. */
+struct igc_mac_operations {
+	s32 (*check_for_link)(struct igc_hw *hw);
+	s32 (*reset_hw)(struct igc_hw *hw);
+	s32 (*init_hw)(struct igc_hw *hw);
+	s32 (*setup_physical_interface)(struct igc_hw *hw);
+	void (*rar_set)(struct igc_hw *hw, u8 *address, u32 index);
+	s32 (*read_mac_addr)(struct igc_hw *hw);
+	s32 (*get_speed_and_duplex)(struct igc_hw *hw, u16 *speed,
+				    u16 *duplex);
+	s32 (*acquire_swfw_sync)(struct igc_hw *hw, u16 mask);
+	void (*release_swfw_sync)(struct igc_hw *hw, u16 mask);
+};
+
+enum igc_mac_type {
+	igc_undefined = 0,
+	igc_i225,
+	igc_num_macs  /* List is 1-based, so subtract 1 for true count. */
+};
+
+enum igc_phy_type {
+	igc_phy_unknown = 0,
+	igc_phy_none,
+	igc_phy_i225,
+};
+
+enum igc_media_type {
+	igc_media_type_unknown = 0,
+	igc_media_type_copper = 1,
+	igc_num_media_types
+};
+
+enum igc_nvm_type {
+	igc_nvm_unknown = 0,
+	igc_nvm_flash_hw,
+	igc_nvm_invm,
+};
+
+struct igc_info {
+	s32 (*get_invariants)(struct igc_hw *hw);
+	struct igc_mac_operations *mac_ops;
+	const struct igc_phy_operations *phy_ops;
+	struct igc_nvm_operations *nvm_ops;
+};
+
+extern const struct igc_info igc_base_info;
+
+struct igc_mac_info {
+	struct igc_mac_operations ops;
+
+	u8 addr[ETH_ALEN];
+	u8 perm_addr[ETH_ALEN];
+
+	enum igc_mac_type type;
+
+	u32 collision_delta;
+	u32 ledctl_default;
+	u32 ledctl_mode1;
+	u32 ledctl_mode2;
+	u32 mc_filter_type;
+	u32 tx_packet_delta;
+	u32 txcw;
+
+	u16 mta_reg_count;
+	u16 uta_reg_count;
+
+	u16 rar_entry_count;
+
+	u8 forced_speed_duplex;
+
+	bool adaptive_ifs;
+	bool has_fwsm;
+	bool asf_firmware_present;
+	bool arc_subsystem_valid;
+
+	bool autoneg;
+	bool autoneg_failed;
+	bool get_link_status;
+};
+
+struct igc_nvm_operations {
+	s32 (*acquire)(struct igc_hw *hw);
+	s32 (*read)(struct igc_hw *hw, u16 offset, u16 i, u16 *data);
+	void (*release)(struct igc_hw *hw);
+	s32 (*write)(struct igc_hw *hw, u16 offset, u16 i, u16 *data);
+	s32 (*update)(struct igc_hw *hw);
+	s32 (*validate)(struct igc_hw *hw);
+	s32 (*valid_led_default)(struct igc_hw *hw, u16 *data);
+};
+
+struct igc_phy_operations {
+	s32 (*acquire)(struct igc_hw *hw);
+	s32 (*check_polarity)(struct igc_hw *hw);
+	s32 (*check_reset_block)(struct igc_hw *hw);
+	s32 (*force_speed_duplex)(struct igc_hw *hw);
+	s32 (*get_cfg_done)(struct igc_hw *hw);
+	s32 (*get_cable_length)(struct igc_hw *hw);
+	s32 (*get_phy_info)(struct igc_hw *hw);
+	s32 (*read_reg)(struct igc_hw *hw, u32 address, u16 *data);
+	void (*release)(struct igc_hw *hw);
+	s32 (*reset)(struct igc_hw *hw);
+	s32 (*write_reg)(struct igc_hw *hw, u32 address, u16 data);
+};
+
+struct igc_nvm_info {
+	struct igc_nvm_operations ops;
+	enum igc_nvm_type type;
+
+	u32 flash_bank_size;
+	u32 flash_base_addr;
+
+	u16 word_size;
+	u16 delay_usec;
+	u16 address_bits;
+	u16 opcode_bits;
+	u16 page_size;
+};
+
+struct igc_phy_info {
+	struct igc_phy_operations ops;
+
+	enum igc_phy_type type;
+
+	u32 addr;
+	u32 id;
+	u32 reset_delay_us; /* in usec */
+	u32 revision;
+
+	enum igc_media_type media_type;
+
+	u16 autoneg_advertised;
+	u16 autoneg_mask;
+	u16 cable_length;
+	u16 max_cable_length;
+	u16 min_cable_length;
+	u16 pair_length[4];
+
+	u8 mdix;
+
+	bool disable_polarity_correction;
+	bool is_mdix;
+	bool polarity_correction;
+	bool reset_disable;
+	bool speed_downgraded;
+	bool autoneg_wait_to_complete;
+};
+
+struct igc_bus_info {
+	u16 func;
+	u16 pci_cmd_word;
+};
+
+enum igc_fc_mode {
+	igc_fc_none = 0,
+	igc_fc_rx_pause,
+	igc_fc_tx_pause,
+	igc_fc_full,
+	igc_fc_default = 0xFF
+};
+
+struct igc_fc_info {
+	u32 high_water;     /* Flow control high-water mark */
+	u32 low_water;      /* Flow control low-water mark */
+	u16 pause_time;     /* Flow control pause timer */
+	bool send_xon;      /* Flow control send XON */
+	bool strict_ieee;   /* Strict IEEE mode */
+	enum igc_fc_mode current_mode; /* Type of flow control */
+	enum igc_fc_mode requested_mode;
+};
+
+struct igc_dev_spec_base {
+	bool global_device_reset;
+	bool eee_disable;
+	bool clear_semaphore_once;
+	bool module_plugged;
+	u8 media_port;
+	bool mas_capable;
+};
+
+struct igc_hw {
+	void *back;
+
+	u8 __iomem *hw_addr;
+	unsigned long io_base;
+
+	struct igc_mac_info  mac;
+	struct igc_fc_info   fc;
+	struct igc_nvm_info  nvm;
+	struct igc_phy_info  phy;
+
+	struct igc_bus_info bus;
+
+	union {
+		struct igc_dev_spec_base	_base;
+	} dev_spec;
+
+	u16 device_id;
+	u16 subsystem_vendor_id;
+	u16 subsystem_device_id;
+	u16 vendor_id;
+
+	u8 revision_id;
+};
+
+/* Statistics counters collected by the MAC */
+struct igc_hw_stats {
+	u64 crcerrs;
+	u64 algnerrc;
+	u64 symerrs;
+	u64 rxerrc;
+	u64 mpc;
+	u64 scc;
+	u64 ecol;
+	u64 mcc;
+	u64 latecol;
+	u64 colc;
+	u64 dc;
+	u64 tncrs;
+	u64 sec;
+	u64 cexterr;
+	u64 rlec;
+	u64 xonrxc;
+	u64 xontxc;
+	u64 xoffrxc;
+	u64 xofftxc;
+	u64 fcruc;
+	u64 prc64;
+	u64 prc127;
+	u64 prc255;
+	u64 prc511;
+	u64 prc1023;
+	u64 prc1522;
+	u64 gprc;
+	u64 bprc;
+	u64 mprc;
+	u64 gptc;
+	u64 gorc;
+	u64 gotc;
+	u64 rnbc;
+	u64 ruc;
+	u64 rfc;
+	u64 roc;
+	u64 rjc;
+	u64 mgprc;
+	u64 mgpdc;
+	u64 mgptc;
+	u64 tor;
+	u64 tot;
+	u64 tpr;
+	u64 tpt;
+	u64 ptc64;
+	u64 ptc127;
+	u64 ptc255;
+	u64 ptc511;
+	u64 ptc1023;
+	u64 ptc1522;
+	u64 mptc;
+	u64 bptc;
+	u64 tsctc;
+	u64 tsctfc;
+	u64 iac;
+	u64 icrxptc;
+	u64 icrxatc;
+	u64 ictxptc;
+	u64 ictxatc;
+	u64 ictxqec;
+	u64 ictxqmtc;
+	u64 icrxdmtc;
+	u64 icrxoc;
+	u64 cbtmpc;
+	u64 htdpmc;
+	u64 cbrdpc;
+	u64 cbrmpc;
+	u64 rpthc;
+	u64 hgptc;
+	u64 htcbdpc;
+	u64 hgorc;
+	u64 hgotc;
+	u64 lenerrs;
+	u64 scvpc;
+	u64 hrmpc;
+	u64 doosync;
+	u64 o2bgptc;
+	u64 o2bspc;
+	u64 b2ospc;
+	u64 b2ogprc;
+};
+
+struct net_device *igc_get_hw_dev(struct igc_hw *hw);
+#define hw_dbg(format, arg...) \
+	netdev_dbg(igc_get_hw_dev(hw), format, ##arg)
+
+s32  igc_read_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value);
+s32  igc_write_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value);
+void igc_read_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value);
+void igc_write_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value);
+
+#endif /* _IGC_HW_H_ */
diff --git a/drivers/net/ethernet/intel/igc/igc_i225.c b/drivers/net/ethernet/intel/igc/igc_i225.c
new file mode 100644
index 000000000000..c25f555aaf82
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_i225.c
@@ -0,0 +1,490 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c)  2018 Intel Corporation */
+
+#include <linux/delay.h>
+
+#include "igc_hw.h"
+
+/**
+ * igc_get_hw_semaphore_i225 - Acquire hardware semaphore
+ * @hw: pointer to the HW structure
+ *
+ * Acquire the necessary semaphores for exclusive access to the EEPROM.
+ * Set the EEPROM access request bit and wait for EEPROM access grant bit.
+ * Return successful if access grant bit set, else clear the request for
+ * EEPROM access and return -IGC_ERR_NVM (-1).
+ */
+static s32 igc_acquire_nvm_i225(struct igc_hw *hw)
+{
+	return igc_acquire_swfw_sync_i225(hw, IGC_SWFW_EEP_SM);
+}
+
+/**
+ * igc_release_nvm_i225 - Release exclusive access to EEPROM
+ * @hw: pointer to the HW structure
+ *
+ * Stop any current commands to the EEPROM and clear the EEPROM request bit,
+ * then release the semaphores acquired.
+ */
+static void igc_release_nvm_i225(struct igc_hw *hw)
+{
+	igc_release_swfw_sync_i225(hw, IGC_SWFW_EEP_SM);
+}
+
+/**
+ * igc_get_hw_semaphore_i225 - Acquire hardware semaphore
+ * @hw: pointer to the HW structure
+ *
+ * Acquire the HW semaphore to access the PHY or NVM
+ */
+static s32 igc_get_hw_semaphore_i225(struct igc_hw *hw)
+{
+	s32 timeout = hw->nvm.word_size + 1;
+	s32 i = 0;
+	u32 swsm;
+
+	/* Get the SW semaphore */
+	while (i < timeout) {
+		swsm = rd32(IGC_SWSM);
+		if (!(swsm & IGC_SWSM_SMBI))
+			break;
+
+		usleep_range(500, 600);
+		i++;
+	}
+
+	if (i == timeout) {
+		/* In rare circumstances, the SW semaphore may already be held
+		 * unintentionally. Clear the semaphore once before giving up.
+		 */
+		if (hw->dev_spec._base.clear_semaphore_once) {
+			hw->dev_spec._base.clear_semaphore_once = false;
+			igc_put_hw_semaphore(hw);
+			for (i = 0; i < timeout; i++) {
+				swsm = rd32(IGC_SWSM);
+				if (!(swsm & IGC_SWSM_SMBI))
+					break;
+
+				usleep_range(500, 600);
+			}
+		}
+
+		/* If we do not have the semaphore here, we have to give up. */
+		if (i == timeout) {
+			hw_dbg("Driver can't access device - SMBI bit is set.\n");
+			return -IGC_ERR_NVM;
+		}
+	}
+
+	/* Get the FW semaphore. */
+	for (i = 0; i < timeout; i++) {
+		swsm = rd32(IGC_SWSM);
+		wr32(IGC_SWSM, swsm | IGC_SWSM_SWESMBI);
+
+		/* Semaphore acquired if bit latched */
+		if (rd32(IGC_SWSM) & IGC_SWSM_SWESMBI)
+			break;
+
+		usleep_range(500, 600);
+	}
+
+	if (i == timeout) {
+		/* Release semaphores */
+		igc_put_hw_semaphore(hw);
+		hw_dbg("Driver can't access the NVM\n");
+		return -IGC_ERR_NVM;
+	}
+
+	return 0;
+}
+
+/**
+ * igc_acquire_swfw_sync_i225 - Acquire SW/FW semaphore
+ * @hw: pointer to the HW structure
+ * @mask: specifies which semaphore to acquire
+ *
+ * Acquire the SW/FW semaphore to access the PHY or NVM.  The mask
+ * will also specify which port we're acquiring the lock for.
+ */
+s32 igc_acquire_swfw_sync_i225(struct igc_hw *hw, u16 mask)
+{
+	s32 i = 0, timeout = 200;
+	u32 fwmask = mask << 16;
+	u32 swmask = mask;
+	s32 ret_val = 0;
+	u32 swfw_sync;
+
+	while (i < timeout) {
+		if (igc_get_hw_semaphore_i225(hw)) {
+			ret_val = -IGC_ERR_SWFW_SYNC;
+			goto out;
+		}
+
+		swfw_sync = rd32(IGC_SW_FW_SYNC);
+		if (!(swfw_sync & (fwmask | swmask)))
+			break;
+
+		/* Firmware currently using resource (fwmask) */
+		igc_put_hw_semaphore(hw);
+		mdelay(5);
+		i++;
+	}
+
+	if (i == timeout) {
+		hw_dbg("Driver can't access resource, SW_FW_SYNC timeout.\n");
+		ret_val = -IGC_ERR_SWFW_SYNC;
+		goto out;
+	}
+
+	swfw_sync |= swmask;
+	wr32(IGC_SW_FW_SYNC, swfw_sync);
+
+	igc_put_hw_semaphore(hw);
+out:
+	return ret_val;
+}
+
+/**
+ * igc_release_swfw_sync_i225 - Release SW/FW semaphore
+ * @hw: pointer to the HW structure
+ * @mask: specifies which semaphore to acquire
+ *
+ * Release the SW/FW semaphore used to access the PHY or NVM.  The mask
+ * will also specify which port we're releasing the lock for.
+ */
+void igc_release_swfw_sync_i225(struct igc_hw *hw, u16 mask)
+{
+	u32 swfw_sync;
+
+	while (igc_get_hw_semaphore_i225(hw))
+		; /* Empty */
+
+	swfw_sync = rd32(IGC_SW_FW_SYNC);
+	swfw_sync &= ~mask;
+	wr32(IGC_SW_FW_SYNC, swfw_sync);
+
+	igc_put_hw_semaphore(hw);
+}
+
+/**
+ * igc_read_nvm_srrd_i225 - Reads Shadow Ram using EERD register
+ * @hw: pointer to the HW structure
+ * @offset: offset of word in the Shadow Ram to read
+ * @words: number of words to read
+ * @data: word read from the Shadow Ram
+ *
+ * Reads a 16 bit word from the Shadow Ram using the EERD register.
+ * Uses necessary synchronization semaphores.
+ */
+static s32 igc_read_nvm_srrd_i225(struct igc_hw *hw, u16 offset, u16 words,
+				  u16 *data)
+{
+	s32 status = 0;
+	u16 i, count;
+
+	/* We cannot hold synchronization semaphores for too long,
+	 * because of forceful takeover procedure. However it is more efficient
+	 * to read in bursts than synchronizing access for each word.
+	 */
+	for (i = 0; i < words; i += IGC_EERD_EEWR_MAX_COUNT) {
+		count = (words - i) / IGC_EERD_EEWR_MAX_COUNT > 0 ?
+			IGC_EERD_EEWR_MAX_COUNT : (words - i);
+
+		status = hw->nvm.ops.acquire(hw);
+		if (status)
+			break;
+
+		status = igc_read_nvm_eerd(hw, offset, count, data + i);
+		hw->nvm.ops.release(hw);
+		if (status)
+			break;
+	}
+
+	return status;
+}
+
+/**
+ * igc_write_nvm_srwr - Write to Shadow Ram using EEWR
+ * @hw: pointer to the HW structure
+ * @offset: offset within the Shadow Ram to be written to
+ * @words: number of words to write
+ * @data: 16 bit word(s) to be written to the Shadow Ram
+ *
+ * Writes data to Shadow Ram at offset using EEWR register.
+ *
+ * If igc_update_nvm_checksum is not called after this function , the
+ * Shadow Ram will most likely contain an invalid checksum.
+ */
+static s32 igc_write_nvm_srwr(struct igc_hw *hw, u16 offset, u16 words,
+			      u16 *data)
+{
+	struct igc_nvm_info *nvm = &hw->nvm;
+	u32 attempts = 100000;
+	u32 i, k, eewr = 0;
+	s32 ret_val = 0;
+
+	/* A check for invalid values:  offset too large, too many words,
+	 * too many words for the offset, and not enough words.
+	 */
+	if (offset >= nvm->word_size || (words > (nvm->word_size - offset)) ||
+	    words == 0) {
+		hw_dbg("nvm parameter(s) out of bounds\n");
+		ret_val = -IGC_ERR_NVM;
+		goto out;
+	}
+
+	for (i = 0; i < words; i++) {
+		eewr = ((offset + i) << IGC_NVM_RW_ADDR_SHIFT) |
+			(data[i] << IGC_NVM_RW_REG_DATA) |
+			IGC_NVM_RW_REG_START;
+
+		wr32(IGC_SRWR, eewr);
+
+		for (k = 0; k < attempts; k++) {
+			if (IGC_NVM_RW_REG_DONE &
+			    rd32(IGC_SRWR)) {
+				ret_val = 0;
+				break;
+			}
+			udelay(5);
+		}
+
+		if (ret_val) {
+			hw_dbg("Shadow RAM write EEWR timed out\n");
+			break;
+		}
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_write_nvm_srwr_i225 - Write to Shadow RAM using EEWR
+ * @hw: pointer to the HW structure
+ * @offset: offset within the Shadow RAM to be written to
+ * @words: number of words to write
+ * @data: 16 bit word(s) to be written to the Shadow RAM
+ *
+ * Writes data to Shadow RAM at offset using EEWR register.
+ *
+ * If igc_update_nvm_checksum is not called after this function , the
+ * data will not be committed to FLASH and also Shadow RAM will most likely
+ * contain an invalid checksum.
+ *
+ * If error code is returned, data and Shadow RAM may be inconsistent - buffer
+ * partially written.
+ */
+static s32 igc_write_nvm_srwr_i225(struct igc_hw *hw, u16 offset, u16 words,
+				   u16 *data)
+{
+	s32 status = 0;
+	u16 i, count;
+
+	/* We cannot hold synchronization semaphores for too long,
+	 * because of forceful takeover procedure. However it is more efficient
+	 * to write in bursts than synchronizing access for each word.
+	 */
+	for (i = 0; i < words; i += IGC_EERD_EEWR_MAX_COUNT) {
+		count = (words - i) / IGC_EERD_EEWR_MAX_COUNT > 0 ?
+			IGC_EERD_EEWR_MAX_COUNT : (words - i);
+
+		status = hw->nvm.ops.acquire(hw);
+		if (status)
+			break;
+
+		status = igc_write_nvm_srwr(hw, offset, count, data + i);
+		hw->nvm.ops.release(hw);
+		if (status)
+			break;
+	}
+
+	return status;
+}
+
+/**
+ * igc_validate_nvm_checksum_i225 - Validate EEPROM checksum
+ * @hw: pointer to the HW structure
+ *
+ * Calculates the EEPROM checksum by reading/adding each word of the EEPROM
+ * and then verifies that the sum of the EEPROM is equal to 0xBABA.
+ */
+static s32 igc_validate_nvm_checksum_i225(struct igc_hw *hw)
+{
+	s32 (*read_op_ptr)(struct igc_hw *hw, u16 offset, u16 count,
+			   u16 *data);
+	s32 status = 0;
+
+	status = hw->nvm.ops.acquire(hw);
+	if (status)
+		goto out;
+
+	/* Replace the read function with semaphore grabbing with
+	 * the one that skips this for a while.
+	 * We have semaphore taken already here.
+	 */
+	read_op_ptr = hw->nvm.ops.read;
+	hw->nvm.ops.read = igc_read_nvm_eerd;
+
+	status = igc_validate_nvm_checksum(hw);
+
+	/* Revert original read operation. */
+	hw->nvm.ops.read = read_op_ptr;
+
+	hw->nvm.ops.release(hw);
+
+out:
+	return status;
+}
+
+/**
+ * igc_pool_flash_update_done_i225 - Pool FLUDONE status
+ * @hw: pointer to the HW structure
+ */
+static s32 igc_pool_flash_update_done_i225(struct igc_hw *hw)
+{
+	s32 ret_val = -IGC_ERR_NVM;
+	u32 i, reg;
+
+	for (i = 0; i < IGC_FLUDONE_ATTEMPTS; i++) {
+		reg = rd32(IGC_EECD);
+		if (reg & IGC_EECD_FLUDONE_I225) {
+			ret_val = 0;
+			break;
+		}
+		udelay(5);
+	}
+
+	return ret_val;
+}
+
+/**
+ * igc_update_flash_i225 - Commit EEPROM to the flash
+ * @hw: pointer to the HW structure
+ */
+static s32 igc_update_flash_i225(struct igc_hw *hw)
+{
+	s32 ret_val = 0;
+	u32 flup;
+
+	ret_val = igc_pool_flash_update_done_i225(hw);
+	if (ret_val == -IGC_ERR_NVM) {
+		hw_dbg("Flash update time out\n");
+		goto out;
+	}
+
+	flup = rd32(IGC_EECD) | IGC_EECD_FLUPD_I225;
+	wr32(IGC_EECD, flup);
+
+	ret_val = igc_pool_flash_update_done_i225(hw);
+	if (ret_val)
+		hw_dbg("Flash update time out\n");
+	else
+		hw_dbg("Flash update complete\n");
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_update_nvm_checksum_i225 - Update EEPROM checksum
+ * @hw: pointer to the HW structure
+ *
+ * Updates the EEPROM checksum by reading/adding each word of the EEPROM
+ * up to the checksum.  Then calculates the EEPROM checksum and writes the
+ * value to the EEPROM. Next commit EEPROM data onto the Flash.
+ */
+static s32 igc_update_nvm_checksum_i225(struct igc_hw *hw)
+{
+	u16 checksum = 0;
+	s32 ret_val = 0;
+	u16 i, nvm_data;
+
+	/* Read the first word from the EEPROM. If this times out or fails, do
+	 * not continue or we could be in for a very long wait while every
+	 * EEPROM read fails
+	 */
+	ret_val = igc_read_nvm_eerd(hw, 0, 1, &nvm_data);
+	if (ret_val) {
+		hw_dbg("EEPROM read failed\n");
+		goto out;
+	}
+
+	ret_val = hw->nvm.ops.acquire(hw);
+	if (ret_val)
+		goto out;
+
+	/* Do not use hw->nvm.ops.write, hw->nvm.ops.read
+	 * because we do not want to take the synchronization
+	 * semaphores twice here.
+	 */
+
+	for (i = 0; i < NVM_CHECKSUM_REG; i++) {
+		ret_val = igc_read_nvm_eerd(hw, i, 1, &nvm_data);
+		if (ret_val) {
+			hw->nvm.ops.release(hw);
+			hw_dbg("NVM Read Error while updating checksum.\n");
+			goto out;
+		}
+		checksum += nvm_data;
+	}
+	checksum = (u16)NVM_SUM - checksum;
+	ret_val = igc_write_nvm_srwr(hw, NVM_CHECKSUM_REG, 1,
+				     &checksum);
+	if (ret_val) {
+		hw->nvm.ops.release(hw);
+		hw_dbg("NVM Write Error while updating checksum.\n");
+		goto out;
+	}
+
+	hw->nvm.ops.release(hw);
+
+	ret_val = igc_update_flash_i225(hw);
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_get_flash_presence_i225 - Check if flash device is detected
+ * @hw: pointer to the HW structure
+ */
+bool igc_get_flash_presence_i225(struct igc_hw *hw)
+{
+	bool ret_val = false;
+	u32 eec = 0;
+
+	eec = rd32(IGC_EECD);
+	if (eec & IGC_EECD_FLASH_DETECTED_I225)
+		ret_val = true;
+
+	return ret_val;
+}
+
+/**
+ * igc_init_nvm_params_i225 - Init NVM func ptrs.
+ * @hw: pointer to the HW structure
+ */
+s32 igc_init_nvm_params_i225(struct igc_hw *hw)
+{
+	struct igc_nvm_info *nvm = &hw->nvm;
+
+	nvm->ops.acquire = igc_acquire_nvm_i225;
+	nvm->ops.release = igc_release_nvm_i225;
+
+	/* NVM Function Pointers */
+	if (igc_get_flash_presence_i225(hw)) {
+		hw->nvm.type = igc_nvm_flash_hw;
+		nvm->ops.read = igc_read_nvm_srrd_i225;
+		nvm->ops.write = igc_write_nvm_srwr_i225;
+		nvm->ops.validate = igc_validate_nvm_checksum_i225;
+		nvm->ops.update = igc_update_nvm_checksum_i225;
+	} else {
+		hw->nvm.type = igc_nvm_invm;
+		nvm->ops.read = igc_read_nvm_eerd;
+		nvm->ops.write = NULL;
+		nvm->ops.validate = NULL;
+		nvm->ops.update = NULL;
+	}
+	return 0;
+}
diff --git a/drivers/net/ethernet/intel/igc/igc_i225.h b/drivers/net/ethernet/intel/igc/igc_i225.h
new file mode 100644
index 000000000000..7b66e1f9c0e6
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_i225.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c)  2018 Intel Corporation */
+
+#ifndef _IGC_I225_H_
+#define _IGC_I225_H_
+
+s32 igc_acquire_swfw_sync_i225(struct igc_hw *hw, u16 mask);
+void igc_release_swfw_sync_i225(struct igc_hw *hw, u16 mask);
+
+s32 igc_init_nvm_params_i225(struct igc_hw *hw);
+bool igc_get_flash_presence_i225(struct igc_hw *hw);
+
+#endif
diff --git a/drivers/net/ethernet/intel/igc/igc_mac.c b/drivers/net/ethernet/intel/igc/igc_mac.c
new file mode 100644
index 000000000000..f7683d3ae47c
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_mac.c
@@ -0,0 +1,806 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c)  2018 Intel Corporation */
+
+#include <linux/pci.h>
+#include <linux/delay.h>
+
+#include "igc_mac.h"
+#include "igc_hw.h"
+
+/* forward declaration */
+static s32 igc_set_default_fc(struct igc_hw *hw);
+static s32 igc_set_fc_watermarks(struct igc_hw *hw);
+
+/**
+ * igc_disable_pcie_master - Disables PCI-express master access
+ * @hw: pointer to the HW structure
+ *
+ * Returns 0 (0) if successful, else returns -10
+ * (-IGC_ERR_MASTER_REQUESTS_PENDING) if master disable bit has not caused
+ * the master requests to be disabled.
+ *
+ * Disables PCI-Express master access and verifies there are no pending
+ * requests.
+ */
+s32 igc_disable_pcie_master(struct igc_hw *hw)
+{
+	s32 timeout = MASTER_DISABLE_TIMEOUT;
+	s32 ret_val = 0;
+	u32 ctrl;
+
+	ctrl = rd32(IGC_CTRL);
+	ctrl |= IGC_CTRL_GIO_MASTER_DISABLE;
+	wr32(IGC_CTRL, ctrl);
+
+	while (timeout) {
+		if (!(rd32(IGC_STATUS) &
+		    IGC_STATUS_GIO_MASTER_ENABLE))
+			break;
+		usleep_range(2000, 3000);
+		timeout--;
+	}
+
+	if (!timeout) {
+		hw_dbg("Master requests are pending.\n");
+		ret_val = -IGC_ERR_MASTER_REQUESTS_PENDING;
+		goto out;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_init_rx_addrs - Initialize receive addresses
+ * @hw: pointer to the HW structure
+ * @rar_count: receive address registers
+ *
+ * Setup the receive address registers by setting the base receive address
+ * register to the devices MAC address and clearing all the other receive
+ * address registers to 0.
+ */
+void igc_init_rx_addrs(struct igc_hw *hw, u16 rar_count)
+{
+	u8 mac_addr[ETH_ALEN] = {0};
+	u32 i;
+
+	/* Setup the receive address */
+	hw_dbg("Programming MAC Address into RAR[0]\n");
+
+	hw->mac.ops.rar_set(hw, hw->mac.addr, 0);
+
+	/* Zero out the other (rar_entry_count - 1) receive addresses */
+	hw_dbg("Clearing RAR[1-%u]\n", rar_count - 1);
+	for (i = 1; i < rar_count; i++)
+		hw->mac.ops.rar_set(hw, mac_addr, i);
+}
+
+/**
+ * igc_setup_link - Setup flow control and link settings
+ * @hw: pointer to the HW structure
+ *
+ * Determines which flow control settings to use, then configures flow
+ * control.  Calls the appropriate media-specific link configuration
+ * function.  Assuming the adapter has a valid link partner, a valid link
+ * should be established.  Assumes the hardware has previously been reset
+ * and the transmitter and receiver are not enabled.
+ */
+s32 igc_setup_link(struct igc_hw *hw)
+{
+	s32 ret_val = 0;
+
+	/* In the case of the phy reset being blocked, we already have a link.
+	 * We do not need to set it up again.
+	 */
+	if (igc_check_reset_block(hw))
+		goto out;
+
+	/* If requested flow control is set to default, set flow control
+	 * based on the EEPROM flow control settings.
+	 */
+	if (hw->fc.requested_mode == igc_fc_default) {
+		ret_val = igc_set_default_fc(hw);
+		if (ret_val)
+			goto out;
+	}
+
+	/* We want to save off the original Flow Control configuration just
+	 * in case we get disconnected and then reconnected into a different
+	 * hub or switch with different Flow Control capabilities.
+	 */
+	hw->fc.current_mode = hw->fc.requested_mode;
+
+	hw_dbg("After fix-ups FlowControl is now = %x\n", hw->fc.current_mode);
+
+	/* Call the necessary media_type subroutine to configure the link. */
+	ret_val = hw->mac.ops.setup_physical_interface(hw);
+	if (ret_val)
+		goto out;
+
+	/* Initialize the flow control address, type, and PAUSE timer
+	 * registers to their default values.  This is done even if flow
+	 * control is disabled, because it does not hurt anything to
+	 * initialize these registers.
+	 */
+	hw_dbg("Initializing the Flow Control address, type and timer regs\n");
+	wr32(IGC_FCT, FLOW_CONTROL_TYPE);
+	wr32(IGC_FCAH, FLOW_CONTROL_ADDRESS_HIGH);
+	wr32(IGC_FCAL, FLOW_CONTROL_ADDRESS_LOW);
+
+	wr32(IGC_FCTTV, hw->fc.pause_time);
+
+	ret_val = igc_set_fc_watermarks(hw);
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_set_default_fc - Set flow control default values
+ * @hw: pointer to the HW structure
+ *
+ * Read the EEPROM for the default values for flow control and store the
+ * values.
+ */
+static s32 igc_set_default_fc(struct igc_hw *hw)
+{
+	hw->fc.requested_mode = igc_fc_full;
+	return 0;
+}
+
+/**
+ * igc_force_mac_fc - Force the MAC's flow control settings
+ * @hw: pointer to the HW structure
+ *
+ * Force the MAC's flow control settings.  Sets the TFCE and RFCE bits in the
+ * device control register to reflect the adapter settings.  TFCE and RFCE
+ * need to be explicitly set by software when a copper PHY is used because
+ * autonegotiation is managed by the PHY rather than the MAC.  Software must
+ * also configure these bits when link is forced on a fiber connection.
+ */
+s32 igc_force_mac_fc(struct igc_hw *hw)
+{
+	s32 ret_val = 0;
+	u32 ctrl;
+
+	ctrl = rd32(IGC_CTRL);
+
+	/* Because we didn't get link via the internal auto-negotiation
+	 * mechanism (we either forced link or we got link via PHY
+	 * auto-neg), we have to manually enable/disable transmit an
+	 * receive flow control.
+	 *
+	 * The "Case" statement below enables/disable flow control
+	 * according to the "hw->fc.current_mode" parameter.
+	 *
+	 * The possible values of the "fc" parameter are:
+	 *      0:  Flow control is completely disabled
+	 *      1:  Rx flow control is enabled (we can receive pause
+	 *          frames but not send pause frames).
+	 *      2:  Tx flow control is enabled (we can send pause frames
+	 *          frames but we do not receive pause frames).
+	 *      3:  Both Rx and TX flow control (symmetric) is enabled.
+	 *  other:  No other values should be possible at this point.
+	 */
+	hw_dbg("hw->fc.current_mode = %u\n", hw->fc.current_mode);
+
+	switch (hw->fc.current_mode) {
+	case igc_fc_none:
+		ctrl &= (~(IGC_CTRL_TFCE | IGC_CTRL_RFCE));
+		break;
+	case igc_fc_rx_pause:
+		ctrl &= (~IGC_CTRL_TFCE);
+		ctrl |= IGC_CTRL_RFCE;
+		break;
+	case igc_fc_tx_pause:
+		ctrl &= (~IGC_CTRL_RFCE);
+		ctrl |= IGC_CTRL_TFCE;
+		break;
+	case igc_fc_full:
+		ctrl |= (IGC_CTRL_TFCE | IGC_CTRL_RFCE);
+		break;
+	default:
+		hw_dbg("Flow control param set incorrectly\n");
+		ret_val = -IGC_ERR_CONFIG;
+		goto out;
+	}
+
+	wr32(IGC_CTRL, ctrl);
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_set_fc_watermarks - Set flow control high/low watermarks
+ * @hw: pointer to the HW structure
+ *
+ * Sets the flow control high/low threshold (watermark) registers.  If
+ * flow control XON frame transmission is enabled, then set XON frame
+ * transmission as well.
+ */
+static s32 igc_set_fc_watermarks(struct igc_hw *hw)
+{
+	u32 fcrtl = 0, fcrth = 0;
+
+	/* Set the flow control receive threshold registers.  Normally,
+	 * these registers will be set to a default threshold that may be
+	 * adjusted later by the driver's runtime code.  However, if the
+	 * ability to transmit pause frames is not enabled, then these
+	 * registers will be set to 0.
+	 */
+	if (hw->fc.current_mode & igc_fc_tx_pause) {
+		/* We need to set up the Receive Threshold high and low water
+		 * marks as well as (optionally) enabling the transmission of
+		 * XON frames.
+		 */
+		fcrtl = hw->fc.low_water;
+		if (hw->fc.send_xon)
+			fcrtl |= IGC_FCRTL_XONE;
+
+		fcrth = hw->fc.high_water;
+	}
+	wr32(IGC_FCRTL, fcrtl);
+	wr32(IGC_FCRTH, fcrth);
+
+	return 0;
+}
+
+/**
+ * igc_clear_hw_cntrs_base - Clear base hardware counters
+ * @hw: pointer to the HW structure
+ *
+ * Clears the base hardware counters by reading the counter registers.
+ */
+void igc_clear_hw_cntrs_base(struct igc_hw *hw)
+{
+	rd32(IGC_CRCERRS);
+	rd32(IGC_SYMERRS);
+	rd32(IGC_MPC);
+	rd32(IGC_SCC);
+	rd32(IGC_ECOL);
+	rd32(IGC_MCC);
+	rd32(IGC_LATECOL);
+	rd32(IGC_COLC);
+	rd32(IGC_DC);
+	rd32(IGC_SEC);
+	rd32(IGC_RLEC);
+	rd32(IGC_XONRXC);
+	rd32(IGC_XONTXC);
+	rd32(IGC_XOFFRXC);
+	rd32(IGC_XOFFTXC);
+	rd32(IGC_FCRUC);
+	rd32(IGC_GPRC);
+	rd32(IGC_BPRC);
+	rd32(IGC_MPRC);
+	rd32(IGC_GPTC);
+	rd32(IGC_GORCL);
+	rd32(IGC_GORCH);
+	rd32(IGC_GOTCL);
+	rd32(IGC_GOTCH);
+	rd32(IGC_RNBC);
+	rd32(IGC_RUC);
+	rd32(IGC_RFC);
+	rd32(IGC_ROC);
+	rd32(IGC_RJC);
+	rd32(IGC_TORL);
+	rd32(IGC_TORH);
+	rd32(IGC_TOTL);
+	rd32(IGC_TOTH);
+	rd32(IGC_TPR);
+	rd32(IGC_TPT);
+	rd32(IGC_MPTC);
+	rd32(IGC_BPTC);
+
+	rd32(IGC_PRC64);
+	rd32(IGC_PRC127);
+	rd32(IGC_PRC255);
+	rd32(IGC_PRC511);
+	rd32(IGC_PRC1023);
+	rd32(IGC_PRC1522);
+	rd32(IGC_PTC64);
+	rd32(IGC_PTC127);
+	rd32(IGC_PTC255);
+	rd32(IGC_PTC511);
+	rd32(IGC_PTC1023);
+	rd32(IGC_PTC1522);
+
+	rd32(IGC_ALGNERRC);
+	rd32(IGC_RXERRC);
+	rd32(IGC_TNCRS);
+	rd32(IGC_CEXTERR);
+	rd32(IGC_TSCTC);
+	rd32(IGC_TSCTFC);
+
+	rd32(IGC_MGTPRC);
+	rd32(IGC_MGTPDC);
+	rd32(IGC_MGTPTC);
+
+	rd32(IGC_IAC);
+	rd32(IGC_ICRXOC);
+
+	rd32(IGC_ICRXPTC);
+	rd32(IGC_ICRXATC);
+	rd32(IGC_ICTXPTC);
+	rd32(IGC_ICTXATC);
+	rd32(IGC_ICTXQEC);
+	rd32(IGC_ICTXQMTC);
+	rd32(IGC_ICRXDMTC);
+
+	rd32(IGC_CBTMPC);
+	rd32(IGC_HTDPMC);
+	rd32(IGC_CBRMPC);
+	rd32(IGC_RPTHC);
+	rd32(IGC_HGPTC);
+	rd32(IGC_HTCBDPC);
+	rd32(IGC_HGORCL);
+	rd32(IGC_HGORCH);
+	rd32(IGC_HGOTCL);
+	rd32(IGC_HGOTCH);
+	rd32(IGC_LENERRS);
+}
+
+/**
+ * igc_rar_set - Set receive address register
+ * @hw: pointer to the HW structure
+ * @addr: pointer to the receive address
+ * @index: receive address array register
+ *
+ * Sets the receive address array register at index to the address passed
+ * in by addr.
+ */
+void igc_rar_set(struct igc_hw *hw, u8 *addr, u32 index)
+{
+	u32 rar_low, rar_high;
+
+	/* HW expects these in little endian so we reverse the byte order
+	 * from network order (big endian) to little endian
+	 */
+	rar_low = ((u32)addr[0] |
+		   ((u32)addr[1] << 8) |
+		   ((u32)addr[2] << 16) | ((u32)addr[3] << 24));
+
+	rar_high = ((u32)addr[4] | ((u32)addr[5] << 8));
+
+	/* If MAC address zero, no need to set the AV bit */
+	if (rar_low || rar_high)
+		rar_high |= IGC_RAH_AV;
+
+	/* Some bridges will combine consecutive 32-bit writes into
+	 * a single burst write, which will malfunction on some parts.
+	 * The flushes avoid this.
+	 */
+	wr32(IGC_RAL(index), rar_low);
+	wrfl();
+	wr32(IGC_RAH(index), rar_high);
+	wrfl();
+}
+
+/**
+ * igc_check_for_copper_link - Check for link (Copper)
+ * @hw: pointer to the HW structure
+ *
+ * Checks to see of the link status of the hardware has changed.  If a
+ * change in link status has been detected, then we read the PHY registers
+ * to get the current speed/duplex if link exists.
+ */
+s32 igc_check_for_copper_link(struct igc_hw *hw)
+{
+	struct igc_mac_info *mac = &hw->mac;
+	s32 ret_val;
+	bool link;
+
+	/* We only want to go out to the PHY registers to see if Auto-Neg
+	 * has completed and/or if our link status has changed.  The
+	 * get_link_status flag is set upon receiving a Link Status
+	 * Change or Rx Sequence Error interrupt.
+	 */
+	if (!mac->get_link_status) {
+		ret_val = 0;
+		goto out;
+	}
+
+	/* First we want to see if the MII Status Register reports
+	 * link.  If so, then we want to get the current speed/duplex
+	 * of the PHY.
+	 */
+	ret_val = igc_phy_has_link(hw, 1, 0, &link);
+	if (ret_val)
+		goto out;
+
+	if (!link)
+		goto out; /* No link detected */
+
+	mac->get_link_status = false;
+
+	/* Check if there was DownShift, must be checked
+	 * immediately after link-up
+	 */
+	igc_check_downshift(hw);
+
+	/* If we are forcing speed/duplex, then we simply return since
+	 * we have already determined whether we have link or not.
+	 */
+	if (!mac->autoneg) {
+		ret_val = -IGC_ERR_CONFIG;
+		goto out;
+	}
+
+	/* Auto-Neg is enabled.  Auto Speed Detection takes care
+	 * of MAC speed/duplex configuration.  So we only need to
+	 * configure Collision Distance in the MAC.
+	 */
+	igc_config_collision_dist(hw);
+
+	/* Configure Flow Control now that Auto-Neg has completed.
+	 * First, we need to restore the desired flow control
+	 * settings because we may have had to re-autoneg with a
+	 * different link partner.
+	 */
+	ret_val = igc_config_fc_after_link_up(hw);
+	if (ret_val)
+		hw_dbg("Error configuring flow control\n");
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_config_collision_dist - Configure collision distance
+ * @hw: pointer to the HW structure
+ *
+ * Configures the collision distance to the default value and is used
+ * during link setup. Currently no func pointer exists and all
+ * implementations are handled in the generic version of this function.
+ */
+void igc_config_collision_dist(struct igc_hw *hw)
+{
+	u32 tctl;
+
+	tctl = rd32(IGC_TCTL);
+
+	tctl &= ~IGC_TCTL_COLD;
+	tctl |= IGC_COLLISION_DISTANCE << IGC_COLD_SHIFT;
+
+	wr32(IGC_TCTL, tctl);
+	wrfl();
+}
+
+/**
+ * igc_config_fc_after_link_up - Configures flow control after link
+ * @hw: pointer to the HW structure
+ *
+ * Checks the status of auto-negotiation after link up to ensure that the
+ * speed and duplex were not forced.  If the link needed to be forced, then
+ * flow control needs to be forced also.  If auto-negotiation is enabled
+ * and did not fail, then we configure flow control based on our link
+ * partner.
+ */
+s32 igc_config_fc_after_link_up(struct igc_hw *hw)
+{
+	u16 mii_status_reg, mii_nway_adv_reg, mii_nway_lp_ability_reg;
+	struct igc_mac_info *mac = &hw->mac;
+	u16 speed, duplex;
+	s32 ret_val = 0;
+
+	/* Check for the case where we have fiber media and auto-neg failed
+	 * so we had to force link.  In this case, we need to force the
+	 * configuration of the MAC to match the "fc" parameter.
+	 */
+	if (mac->autoneg_failed) {
+		if (hw->phy.media_type == igc_media_type_copper)
+			ret_val = igc_force_mac_fc(hw);
+	}
+
+	if (ret_val) {
+		hw_dbg("Error forcing flow control settings\n");
+		goto out;
+	}
+
+	/* Check for the case where we have copper media and auto-neg is
+	 * enabled.  In this case, we need to check and see if Auto-Neg
+	 * has completed, and if so, how the PHY and link partner has
+	 * flow control configured.
+	 */
+	if (hw->phy.media_type == igc_media_type_copper && mac->autoneg) {
+		/* Read the MII Status Register and check to see if AutoNeg
+		 * has completed.  We read this twice because this reg has
+		 * some "sticky" (latched) bits.
+		 */
+		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS,
+					       &mii_status_reg);
+		if (ret_val)
+			goto out;
+		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS,
+					       &mii_status_reg);
+		if (ret_val)
+			goto out;
+
+		if (!(mii_status_reg & MII_SR_AUTONEG_COMPLETE)) {
+			hw_dbg("Copper PHY and Auto Neg has not completed.\n");
+			goto out;
+		}
+
+		/* The AutoNeg process has completed, so we now need to
+		 * read both the Auto Negotiation Advertisement
+		 * Register (Address 4) and the Auto_Negotiation Base
+		 * Page Ability Register (Address 5) to determine how
+		 * flow control was negotiated.
+		 */
+		ret_val = hw->phy.ops.read_reg(hw, PHY_AUTONEG_ADV,
+					       &mii_nway_adv_reg);
+		if (ret_val)
+			goto out;
+		ret_val = hw->phy.ops.read_reg(hw, PHY_LP_ABILITY,
+					       &mii_nway_lp_ability_reg);
+		if (ret_val)
+			goto out;
+		/* Two bits in the Auto Negotiation Advertisement Register
+		 * (Address 4) and two bits in the Auto Negotiation Base
+		 * Page Ability Register (Address 5) determine flow control
+		 * for both the PHY and the link partner.  The following
+		 * table, taken out of the IEEE 802.3ab/D6.0 dated March 25,
+		 * 1999, describes these PAUSE resolution bits and how flow
+		 * control is determined based upon these settings.
+		 * NOTE:  DC = Don't Care
+		 *
+		 *   LOCAL DEVICE  |   LINK PARTNER
+		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | NIC Resolution
+		 *-------|---------|-------|---------|--------------------
+		 *   0   |    0    |  DC   |   DC    | igc_fc_none
+		 *   0   |    1    |   0   |   DC    | igc_fc_none
+		 *   0   |    1    |   1   |    0    | igc_fc_none
+		 *   0   |    1    |   1   |    1    | igc_fc_tx_pause
+		 *   1   |    0    |   0   |   DC    | igc_fc_none
+		 *   1   |   DC    |   1   |   DC    | igc_fc_full
+		 *   1   |    1    |   0   |    0    | igc_fc_none
+		 *   1   |    1    |   0   |    1    | igc_fc_rx_pause
+		 *
+		 * Are both PAUSE bits set to 1?  If so, this implies
+		 * Symmetric Flow Control is enabled at both ends.  The
+		 * ASM_DIR bits are irrelevant per the spec.
+		 *
+		 * For Symmetric Flow Control:
+		 *
+		 *   LOCAL DEVICE  |   LINK PARTNER
+		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+		 *-------|---------|-------|---------|--------------------
+		 *   1   |   DC    |   1   |   DC    | IGC_fc_full
+		 *
+		 */
+		if ((mii_nway_adv_reg & NWAY_AR_PAUSE) &&
+		    (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE)) {
+			/* Now we need to check if the user selected RX ONLY
+			 * of pause frames.  In this case, we had to advertise
+			 * FULL flow control because we could not advertise RX
+			 * ONLY. Hence, we must now check to see if we need to
+			 * turn OFF  the TRANSMISSION of PAUSE frames.
+			 */
+			if (hw->fc.requested_mode == igc_fc_full) {
+				hw->fc.current_mode = igc_fc_full;
+				hw_dbg("Flow Control = FULL.\n");
+			} else {
+				hw->fc.current_mode = igc_fc_rx_pause;
+				hw_dbg("Flow Control = RX PAUSE frames only.\n");
+			}
+		}
+
+		/* For receiving PAUSE frames ONLY.
+		 *
+		 *   LOCAL DEVICE  |   LINK PARTNER
+		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+		 *-------|---------|-------|---------|--------------------
+		 *   0   |    1    |   1   |    1    | igc_fc_tx_pause
+		 */
+		else if (!(mii_nway_adv_reg & NWAY_AR_PAUSE) &&
+			 (mii_nway_adv_reg & NWAY_AR_ASM_DIR) &&
+			 (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) &&
+			 (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) {
+			hw->fc.current_mode = igc_fc_tx_pause;
+			hw_dbg("Flow Control = TX PAUSE frames only.\n");
+		}
+		/* For transmitting PAUSE frames ONLY.
+		 *
+		 *   LOCAL DEVICE  |   LINK PARTNER
+		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+		 *-------|---------|-------|---------|--------------------
+		 *   1   |    1    |   0   |    1    | igc_fc_rx_pause
+		 */
+		else if ((mii_nway_adv_reg & NWAY_AR_PAUSE) &&
+			 (mii_nway_adv_reg & NWAY_AR_ASM_DIR) &&
+			 !(mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) &&
+			 (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) {
+			hw->fc.current_mode = igc_fc_rx_pause;
+			hw_dbg("Flow Control = RX PAUSE frames only.\n");
+		}
+		/* Per the IEEE spec, at this point flow control should be
+		 * disabled.  However, we want to consider that we could
+		 * be connected to a legacy switch that doesn't advertise
+		 * desired flow control, but can be forced on the link
+		 * partner.  So if we advertised no flow control, that is
+		 * what we will resolve to.  If we advertised some kind of
+		 * receive capability (Rx Pause Only or Full Flow Control)
+		 * and the link partner advertised none, we will configure
+		 * ourselves to enable Rx Flow Control only.  We can do
+		 * this safely for two reasons:  If the link partner really
+		 * didn't want flow control enabled, and we enable Rx, no
+		 * harm done since we won't be receiving any PAUSE frames
+		 * anyway.  If the intent on the link partner was to have
+		 * flow control enabled, then by us enabling RX only, we
+		 * can at least receive pause frames and process them.
+		 * This is a good idea because in most cases, since we are
+		 * predominantly a server NIC, more times than not we will
+		 * be asked to delay transmission of packets than asking
+		 * our link partner to pause transmission of frames.
+		 */
+		else if ((hw->fc.requested_mode == igc_fc_none) ||
+			 (hw->fc.requested_mode == igc_fc_tx_pause) ||
+			 (hw->fc.strict_ieee)) {
+			hw->fc.current_mode = igc_fc_none;
+			hw_dbg("Flow Control = NONE.\n");
+		} else {
+			hw->fc.current_mode = igc_fc_rx_pause;
+			hw_dbg("Flow Control = RX PAUSE frames only.\n");
+		}
+
+		/* Now we need to do one last check...  If we auto-
+		 * negotiated to HALF DUPLEX, flow control should not be
+		 * enabled per IEEE 802.3 spec.
+		 */
+		ret_val = hw->mac.ops.get_speed_and_duplex(hw, &speed, &duplex);
+		if (ret_val) {
+			hw_dbg("Error getting link speed and duplex\n");
+			goto out;
+		}
+
+		if (duplex == HALF_DUPLEX)
+			hw->fc.current_mode = igc_fc_none;
+
+		/* Now we call a subroutine to actually force the MAC
+		 * controller to use the correct flow control settings.
+		 */
+		ret_val = igc_force_mac_fc(hw);
+		if (ret_val) {
+			hw_dbg("Error forcing flow control settings\n");
+			goto out;
+		}
+	}
+
+out:
+	return 0;
+}
+
+/**
+ * igc_get_auto_rd_done - Check for auto read completion
+ * @hw: pointer to the HW structure
+ *
+ * Check EEPROM for Auto Read done bit.
+ */
+s32 igc_get_auto_rd_done(struct igc_hw *hw)
+{
+	s32 ret_val = 0;
+	s32 i = 0;
+
+	while (i < AUTO_READ_DONE_TIMEOUT) {
+		if (rd32(IGC_EECD) & IGC_EECD_AUTO_RD)
+			break;
+		usleep_range(1000, 2000);
+		i++;
+	}
+
+	if (i == AUTO_READ_DONE_TIMEOUT) {
+		hw_dbg("Auto read by HW from NVM has not completed.\n");
+		ret_val = -IGC_ERR_RESET;
+		goto out;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_get_speed_and_duplex_copper - Retrieve current speed/duplex
+ * @hw: pointer to the HW structure
+ * @speed: stores the current speed
+ * @duplex: stores the current duplex
+ *
+ * Read the status register for the current speed/duplex and store the current
+ * speed and duplex for copper connections.
+ */
+s32 igc_get_speed_and_duplex_copper(struct igc_hw *hw, u16 *speed,
+				    u16 *duplex)
+{
+	u32 status;
+
+	status = rd32(IGC_STATUS);
+	if (status & IGC_STATUS_SPEED_1000) {
+		/* For I225, STATUS will indicate 1G speed in both 1 Gbps
+		 * and 2.5 Gbps link modes. An additional bit is used
+		 * to differentiate between 1 Gbps and 2.5 Gbps.
+		 */
+		if (hw->mac.type == igc_i225 &&
+		    (status & IGC_STATUS_SPEED_2500)) {
+			*speed = SPEED_2500;
+			hw_dbg("2500 Mbs, ");
+		} else {
+			*speed = SPEED_1000;
+			hw_dbg("1000 Mbs, ");
+		}
+	} else if (status & IGC_STATUS_SPEED_100) {
+		*speed = SPEED_100;
+		hw_dbg("100 Mbs, ");
+	} else {
+		*speed = SPEED_10;
+		hw_dbg("10 Mbs, ");
+	}
+
+	if (status & IGC_STATUS_FD) {
+		*duplex = FULL_DUPLEX;
+		hw_dbg("Full Duplex\n");
+	} else {
+		*duplex = HALF_DUPLEX;
+		hw_dbg("Half Duplex\n");
+	}
+
+	return 0;
+}
+
+/**
+ * igc_put_hw_semaphore - Release hardware semaphore
+ * @hw: pointer to the HW structure
+ *
+ * Release hardware semaphore used to access the PHY or NVM
+ */
+void igc_put_hw_semaphore(struct igc_hw *hw)
+{
+	u32 swsm;
+
+	swsm = rd32(IGC_SWSM);
+
+	swsm &= ~(IGC_SWSM_SMBI | IGC_SWSM_SWESMBI);
+
+	wr32(IGC_SWSM, swsm);
+}
+
+/**
+ * igc_enable_mng_pass_thru - Enable processing of ARP's
+ * @hw: pointer to the HW structure
+ *
+ * Verifies the hardware needs to leave interface enabled so that frames can
+ * be directed to and from the management interface.
+ */
+bool igc_enable_mng_pass_thru(struct igc_hw *hw)
+{
+	bool ret_val = false;
+	u32 fwsm, factps;
+	u32 manc;
+
+	if (!hw->mac.asf_firmware_present)
+		goto out;
+
+	manc = rd32(IGC_MANC);
+
+	if (!(manc & IGC_MANC_RCV_TCO_EN))
+		goto out;
+
+	if (hw->mac.arc_subsystem_valid) {
+		fwsm = rd32(IGC_FWSM);
+		factps = rd32(IGC_FACTPS);
+
+		if (!(factps & IGC_FACTPS_MNGCG) &&
+		    ((fwsm & IGC_FWSM_MODE_MASK) ==
+		    (igc_mng_mode_pt << IGC_FWSM_MODE_SHIFT))) {
+			ret_val = true;
+			goto out;
+		}
+	} else {
+		if ((manc & IGC_MANC_SMBUS_EN) &&
+		    !(manc & IGC_MANC_ASF_EN)) {
+			ret_val = true;
+			goto out;
+		}
+	}
+
+out:
+	return ret_val;
+}
diff --git a/drivers/net/ethernet/intel/igc/igc_mac.h b/drivers/net/ethernet/intel/igc/igc_mac.h
new file mode 100644
index 000000000000..782bc995badc
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_mac.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c)  2018 Intel Corporation */
+
+#ifndef _IGC_MAC_H_
+#define _IGC_MAC_H_
+
+#include "igc_hw.h"
+#include "igc_phy.h"
+#include "igc_defines.h"
+
+#ifndef IGC_REMOVED
+#define IGC_REMOVED(a) (0)
+#endif /* IGC_REMOVED */
+
+/* forward declaration */
+s32 igc_disable_pcie_master(struct igc_hw *hw);
+s32 igc_check_for_copper_link(struct igc_hw *hw);
+s32 igc_config_fc_after_link_up(struct igc_hw *hw);
+s32 igc_force_mac_fc(struct igc_hw *hw);
+void igc_init_rx_addrs(struct igc_hw *hw, u16 rar_count);
+s32 igc_setup_link(struct igc_hw *hw);
+void igc_clear_hw_cntrs_base(struct igc_hw *hw);
+s32 igc_get_auto_rd_done(struct igc_hw *hw);
+void igc_put_hw_semaphore(struct igc_hw *hw);
+void igc_rar_set(struct igc_hw *hw, u8 *addr, u32 index);
+void igc_config_collision_dist(struct igc_hw *hw);
+
+s32 igc_get_speed_and_duplex_copper(struct igc_hw *hw, u16 *speed,
+				    u16 *duplex);
+
+bool igc_enable_mng_pass_thru(struct igc_hw *hw);
+
+enum igc_mng_mode {
+	igc_mng_mode_none = 0,
+	igc_mng_mode_asf,
+	igc_mng_mode_pt,
+	igc_mng_mode_ipmi,
+	igc_mng_mode_host_if_only
+};
+
+#endif
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
new file mode 100644
index 000000000000..9d85707e8a81
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -0,0 +1,3901 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c)  2018 Intel Corporation */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/if_vlan.h>
+#include <linux/aer.h>
+
+#include "igc.h"
+#include "igc_hw.h"
+
+#define DRV_VERSION	"0.0.1-k"
+#define DRV_SUMMARY	"Intel(R) 2.5G Ethernet Linux Driver"
+
+static int debug = -1;
+
+MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
+MODULE_DESCRIPTION(DRV_SUMMARY);
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION(DRV_VERSION);
+module_param(debug, int, 0);
+MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
+
+char igc_driver_name[] = "igc";
+char igc_driver_version[] = DRV_VERSION;
+static const char igc_driver_string[] = DRV_SUMMARY;
+static const char igc_copyright[] =
+	"Copyright(c) 2018 Intel Corporation.";
+
+static const struct igc_info *igc_info_tbl[] = {
+	[board_base] = &igc_base_info,
+};
+
+static const struct pci_device_id igc_pci_tbl[] = {
+	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_LM), board_base },
+	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_V), board_base },
+	/* required last entry */
+	{0, }
+};
+
+MODULE_DEVICE_TABLE(pci, igc_pci_tbl);
+
+/* forward declaration */
+static void igc_clean_tx_ring(struct igc_ring *tx_ring);
+static int igc_sw_init(struct igc_adapter *);
+static void igc_configure(struct igc_adapter *adapter);
+static void igc_power_down_link(struct igc_adapter *adapter);
+static void igc_set_default_mac_filter(struct igc_adapter *adapter);
+static void igc_set_rx_mode(struct net_device *netdev);
+static void igc_write_itr(struct igc_q_vector *q_vector);
+static void igc_assign_vector(struct igc_q_vector *q_vector, int msix_vector);
+static void igc_free_q_vector(struct igc_adapter *adapter, int v_idx);
+static void igc_set_interrupt_capability(struct igc_adapter *adapter,
+					 bool msix);
+static void igc_free_q_vectors(struct igc_adapter *adapter);
+static void igc_irq_disable(struct igc_adapter *adapter);
+static void igc_irq_enable(struct igc_adapter *adapter);
+static void igc_configure_msix(struct igc_adapter *adapter);
+static bool igc_alloc_mapped_page(struct igc_ring *rx_ring,
+				  struct igc_rx_buffer *bi);
+
+enum latency_range {
+	lowest_latency = 0,
+	low_latency = 1,
+	bulk_latency = 2,
+	latency_invalid = 255
+};
+
+static void igc_reset(struct igc_adapter *adapter)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	struct igc_hw *hw = &adapter->hw;
+
+	hw->mac.ops.reset_hw(hw);
+
+	if (hw->mac.ops.init_hw(hw))
+		dev_err(&pdev->dev, "Hardware Error\n");
+
+	if (!netif_running(adapter->netdev))
+		igc_power_down_link(adapter);
+
+	igc_get_phy_info(hw);
+}
+
+/**
+ * igc_power_up_link - Power up the phy/serdes link
+ * @adapter: address of board private structure
+ */
+static void igc_power_up_link(struct igc_adapter *adapter)
+{
+	igc_reset_phy(&adapter->hw);
+
+	if (adapter->hw.phy.media_type == igc_media_type_copper)
+		igc_power_up_phy_copper(&adapter->hw);
+
+	igc_setup_link(&adapter->hw);
+}
+
+/**
+ * igc_power_down_link - Power down the phy/serdes link
+ * @adapter: address of board private structure
+ */
+static void igc_power_down_link(struct igc_adapter *adapter)
+{
+	if (adapter->hw.phy.media_type == igc_media_type_copper)
+		igc_power_down_phy_copper_base(&adapter->hw);
+}
+
+/**
+ * igc_release_hw_control - release control of the h/w to f/w
+ * @adapter: address of board private structure
+ *
+ * igc_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
+ * For ASF and Pass Through versions of f/w this means that the
+ * driver is no longer loaded.
+ */
+static void igc_release_hw_control(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u32 ctrl_ext;
+
+	/* Let firmware take over control of h/w */
+	ctrl_ext = rd32(IGC_CTRL_EXT);
+	wr32(IGC_CTRL_EXT,
+	     ctrl_ext & ~IGC_CTRL_EXT_DRV_LOAD);
+}
+
+/**
+ * igc_get_hw_control - get control of the h/w from f/w
+ * @adapter: address of board private structure
+ *
+ * igc_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
+ * For ASF and Pass Through versions of f/w this means that
+ * the driver is loaded.
+ */
+static void igc_get_hw_control(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u32 ctrl_ext;
+
+	/* Let firmware know the driver has taken over */
+	ctrl_ext = rd32(IGC_CTRL_EXT);
+	wr32(IGC_CTRL_EXT,
+	     ctrl_ext | IGC_CTRL_EXT_DRV_LOAD);
+}
+
+/**
+ * igc_free_tx_resources - Free Tx Resources per Queue
+ * @tx_ring: Tx descriptor ring for a specific queue
+ *
+ * Free all transmit software resources
+ */
+static void igc_free_tx_resources(struct igc_ring *tx_ring)
+{
+	igc_clean_tx_ring(tx_ring);
+
+	vfree(tx_ring->tx_buffer_info);
+	tx_ring->tx_buffer_info = NULL;
+
+	/* if not set, then don't free */
+	if (!tx_ring->desc)
+		return;
+
+	dma_free_coherent(tx_ring->dev, tx_ring->size,
+			  tx_ring->desc, tx_ring->dma);
+
+	tx_ring->desc = NULL;
+}
+
+/**
+ * igc_free_all_tx_resources - Free Tx Resources for All Queues
+ * @adapter: board private structure
+ *
+ * Free all transmit software resources
+ */
+static void igc_free_all_tx_resources(struct igc_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_tx_queues; i++)
+		igc_free_tx_resources(adapter->tx_ring[i]);
+}
+
+/**
+ * igc_clean_tx_ring - Free Tx Buffers
+ * @tx_ring: ring to be cleaned
+ */
+static void igc_clean_tx_ring(struct igc_ring *tx_ring)
+{
+	u16 i = tx_ring->next_to_clean;
+	struct igc_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i];
+
+	while (i != tx_ring->next_to_use) {
+		union igc_adv_tx_desc *eop_desc, *tx_desc;
+
+		/* Free all the Tx ring sk_buffs */
+		dev_kfree_skb_any(tx_buffer->skb);
+
+		/* unmap skb header data */
+		dma_unmap_single(tx_ring->dev,
+				 dma_unmap_addr(tx_buffer, dma),
+				 dma_unmap_len(tx_buffer, len),
+				 DMA_TO_DEVICE);
+
+		/* check for eop_desc to determine the end of the packet */
+		eop_desc = tx_buffer->next_to_watch;
+		tx_desc = IGC_TX_DESC(tx_ring, i);
+
+		/* unmap remaining buffers */
+		while (tx_desc != eop_desc) {
+			tx_buffer++;
+			tx_desc++;
+			i++;
+			if (unlikely(i == tx_ring->count)) {
+				i = 0;
+				tx_buffer = tx_ring->tx_buffer_info;
+				tx_desc = IGC_TX_DESC(tx_ring, 0);
+			}
+
+			/* unmap any remaining paged data */
+			if (dma_unmap_len(tx_buffer, len))
+				dma_unmap_page(tx_ring->dev,
+					       dma_unmap_addr(tx_buffer, dma),
+					       dma_unmap_len(tx_buffer, len),
+					       DMA_TO_DEVICE);
+		}
+
+		/* move us one more past the eop_desc for start of next pkt */
+		tx_buffer++;
+		i++;
+		if (unlikely(i == tx_ring->count)) {
+			i = 0;
+			tx_buffer = tx_ring->tx_buffer_info;
+		}
+	}
+
+	/* reset BQL for queue */
+	netdev_tx_reset_queue(txring_txq(tx_ring));
+
+	/* reset next_to_use and next_to_clean */
+	tx_ring->next_to_use = 0;
+	tx_ring->next_to_clean = 0;
+}
+
+/**
+ * igc_clean_all_tx_rings - Free Tx Buffers for all queues
+ * @adapter: board private structure
+ */
+static void igc_clean_all_tx_rings(struct igc_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_tx_queues; i++)
+		if (adapter->tx_ring[i])
+			igc_clean_tx_ring(adapter->tx_ring[i]);
+}
+
+/**
+ * igc_setup_tx_resources - allocate Tx resources (Descriptors)
+ * @tx_ring: tx descriptor ring (for a specific queue) to setup
+ *
+ * Return 0 on success, negative on failure
+ */
+static int igc_setup_tx_resources(struct igc_ring *tx_ring)
+{
+	struct device *dev = tx_ring->dev;
+	int size = 0;
+
+	size = sizeof(struct igc_tx_buffer) * tx_ring->count;
+	tx_ring->tx_buffer_info = vzalloc(size);
+	if (!tx_ring->tx_buffer_info)
+		goto err;
+
+	/* round up to nearest 4K */
+	tx_ring->size = tx_ring->count * sizeof(union igc_adv_tx_desc);
+	tx_ring->size = ALIGN(tx_ring->size, 4096);
+
+	tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
+					   &tx_ring->dma, GFP_KERNEL);
+
+	if (!tx_ring->desc)
+		goto err;
+
+	tx_ring->next_to_use = 0;
+	tx_ring->next_to_clean = 0;
+
+	return 0;
+
+err:
+	vfree(tx_ring->tx_buffer_info);
+	dev_err(dev,
+		"Unable to allocate memory for the transmit descriptor ring\n");
+	return -ENOMEM;
+}
+
+/**
+ * igc_setup_all_tx_resources - wrapper to allocate Tx resources for all queues
+ * @adapter: board private structure
+ *
+ * Return 0 on success, negative on failure
+ */
+static int igc_setup_all_tx_resources(struct igc_adapter *adapter)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	int i, err = 0;
+
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		err = igc_setup_tx_resources(adapter->tx_ring[i]);
+		if (err) {
+			dev_err(&pdev->dev,
+				"Allocation for Tx Queue %u failed\n", i);
+			for (i--; i >= 0; i--)
+				igc_free_tx_resources(adapter->tx_ring[i]);
+			break;
+		}
+	}
+
+	return err;
+}
+
+/**
+ * igc_clean_rx_ring - Free Rx Buffers per Queue
+ * @rx_ring: ring to free buffers from
+ */
+static void igc_clean_rx_ring(struct igc_ring *rx_ring)
+{
+	u16 i = rx_ring->next_to_clean;
+
+	if (rx_ring->skb)
+		dev_kfree_skb(rx_ring->skb);
+	rx_ring->skb = NULL;
+
+	/* Free all the Rx ring sk_buffs */
+	while (i != rx_ring->next_to_alloc) {
+		struct igc_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
+
+		/* Invalidate cache lines that may have been written to by
+		 * device so that we avoid corrupting memory.
+		 */
+		dma_sync_single_range_for_cpu(rx_ring->dev,
+					      buffer_info->dma,
+					      buffer_info->page_offset,
+					      igc_rx_bufsz(rx_ring),
+					      DMA_FROM_DEVICE);
+
+		/* free resources associated with mapping */
+		dma_unmap_page_attrs(rx_ring->dev,
+				     buffer_info->dma,
+				     igc_rx_pg_size(rx_ring),
+				     DMA_FROM_DEVICE,
+				     IGC_RX_DMA_ATTR);
+		__page_frag_cache_drain(buffer_info->page,
+					buffer_info->pagecnt_bias);
+
+		i++;
+		if (i == rx_ring->count)
+			i = 0;
+	}
+
+	rx_ring->next_to_alloc = 0;
+	rx_ring->next_to_clean = 0;
+	rx_ring->next_to_use = 0;
+}
+
+/**
+ * igc_clean_all_rx_rings - Free Rx Buffers for all queues
+ * @adapter: board private structure
+ */
+static void igc_clean_all_rx_rings(struct igc_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_rx_queues; i++)
+		if (adapter->rx_ring[i])
+			igc_clean_rx_ring(adapter->rx_ring[i]);
+}
+
+/**
+ * igc_free_rx_resources - Free Rx Resources
+ * @rx_ring: ring to clean the resources from
+ *
+ * Free all receive software resources
+ */
+static void igc_free_rx_resources(struct igc_ring *rx_ring)
+{
+	igc_clean_rx_ring(rx_ring);
+
+	vfree(rx_ring->rx_buffer_info);
+	rx_ring->rx_buffer_info = NULL;
+
+	/* if not set, then don't free */
+	if (!rx_ring->desc)
+		return;
+
+	dma_free_coherent(rx_ring->dev, rx_ring->size,
+			  rx_ring->desc, rx_ring->dma);
+
+	rx_ring->desc = NULL;
+}
+
+/**
+ * igc_free_all_rx_resources - Free Rx Resources for All Queues
+ * @adapter: board private structure
+ *
+ * Free all receive software resources
+ */
+static void igc_free_all_rx_resources(struct igc_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_rx_queues; i++)
+		igc_free_rx_resources(adapter->rx_ring[i]);
+}
+
+/**
+ * igc_setup_rx_resources - allocate Rx resources (Descriptors)
+ * @rx_ring:    rx descriptor ring (for a specific queue) to setup
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int igc_setup_rx_resources(struct igc_ring *rx_ring)
+{
+	struct device *dev = rx_ring->dev;
+	int size, desc_len;
+
+	size = sizeof(struct igc_rx_buffer) * rx_ring->count;
+	rx_ring->rx_buffer_info = vzalloc(size);
+	if (!rx_ring->rx_buffer_info)
+		goto err;
+
+	desc_len = sizeof(union igc_adv_rx_desc);
+
+	/* Round up to nearest 4K */
+	rx_ring->size = rx_ring->count * desc_len;
+	rx_ring->size = ALIGN(rx_ring->size, 4096);
+
+	rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
+					   &rx_ring->dma, GFP_KERNEL);
+
+	if (!rx_ring->desc)
+		goto err;
+
+	rx_ring->next_to_alloc = 0;
+	rx_ring->next_to_clean = 0;
+	rx_ring->next_to_use = 0;
+
+	return 0;
+
+err:
+	vfree(rx_ring->rx_buffer_info);
+	rx_ring->rx_buffer_info = NULL;
+	dev_err(dev,
+		"Unable to allocate memory for the receive descriptor ring\n");
+	return -ENOMEM;
+}
+
+/**
+ * igc_setup_all_rx_resources - wrapper to allocate Rx resources
+ *                                (Descriptors) for all queues
+ * @adapter: board private structure
+ *
+ * Return 0 on success, negative on failure
+ */
+static int igc_setup_all_rx_resources(struct igc_adapter *adapter)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	int i, err = 0;
+
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		err = igc_setup_rx_resources(adapter->rx_ring[i]);
+		if (err) {
+			dev_err(&pdev->dev,
+				"Allocation for Rx Queue %u failed\n", i);
+			for (i--; i >= 0; i--)
+				igc_free_rx_resources(adapter->rx_ring[i]);
+			break;
+		}
+	}
+
+	return err;
+}
+
+/**
+ * igc_configure_rx_ring - Configure a receive ring after Reset
+ * @adapter: board private structure
+ * @ring: receive ring to be configured
+ *
+ * Configure the Rx unit of the MAC after a reset.
+ */
+static void igc_configure_rx_ring(struct igc_adapter *adapter,
+				  struct igc_ring *ring)
+{
+	struct igc_hw *hw = &adapter->hw;
+	union igc_adv_rx_desc *rx_desc;
+	int reg_idx = ring->reg_idx;
+	u32 srrctl = 0, rxdctl = 0;
+	u64 rdba = ring->dma;
+
+	/* disable the queue */
+	wr32(IGC_RXDCTL(reg_idx), 0);
+
+	/* Set DMA base address registers */
+	wr32(IGC_RDBAL(reg_idx),
+	     rdba & 0x00000000ffffffffULL);
+	wr32(IGC_RDBAH(reg_idx), rdba >> 32);
+	wr32(IGC_RDLEN(reg_idx),
+	     ring->count * sizeof(union igc_adv_rx_desc));
+
+	/* initialize head and tail */
+	ring->tail = adapter->io_addr + IGC_RDT(reg_idx);
+	wr32(IGC_RDH(reg_idx), 0);
+	writel(0, ring->tail);
+
+	/* reset next-to- use/clean to place SW in sync with hardware */
+	ring->next_to_clean = 0;
+	ring->next_to_use = 0;
+
+	/* set descriptor configuration */
+	srrctl = IGC_RX_HDR_LEN << IGC_SRRCTL_BSIZEHDRSIZE_SHIFT;
+	if (ring_uses_large_buffer(ring))
+		srrctl |= IGC_RXBUFFER_3072 >> IGC_SRRCTL_BSIZEPKT_SHIFT;
+	else
+		srrctl |= IGC_RXBUFFER_2048 >> IGC_SRRCTL_BSIZEPKT_SHIFT;
+	srrctl |= IGC_SRRCTL_DESCTYPE_ADV_ONEBUF;
+
+	wr32(IGC_SRRCTL(reg_idx), srrctl);
+
+	rxdctl |= IGC_RX_PTHRESH;
+	rxdctl |= IGC_RX_HTHRESH << 8;
+	rxdctl |= IGC_RX_WTHRESH << 16;
+
+	/* initialize rx_buffer_info */
+	memset(ring->rx_buffer_info, 0,
+	       sizeof(struct igc_rx_buffer) * ring->count);
+
+	/* initialize Rx descriptor 0 */
+	rx_desc = IGC_RX_DESC(ring, 0);
+	rx_desc->wb.upper.length = 0;
+
+	/* enable receive descriptor fetching */
+	rxdctl |= IGC_RXDCTL_QUEUE_ENABLE;
+
+	wr32(IGC_RXDCTL(reg_idx), rxdctl);
+}
+
+/**
+ * igc_configure_rx - Configure receive Unit after Reset
+ * @adapter: board private structure
+ *
+ * Configure the Rx unit of the MAC after a reset.
+ */
+static void igc_configure_rx(struct igc_adapter *adapter)
+{
+	int i;
+
+	/* Setup the HW Rx Head and Tail Descriptor Pointers and
+	 * the Base and Length of the Rx Descriptor Ring
+	 */
+	for (i = 0; i < adapter->num_rx_queues; i++)
+		igc_configure_rx_ring(adapter, adapter->rx_ring[i]);
+}
+
+/**
+ * igc_configure_tx_ring - Configure transmit ring after Reset
+ * @adapter: board private structure
+ * @ring: tx ring to configure
+ *
+ * Configure a transmit ring after a reset.
+ */
+static void igc_configure_tx_ring(struct igc_adapter *adapter,
+				  struct igc_ring *ring)
+{
+	struct igc_hw *hw = &adapter->hw;
+	int reg_idx = ring->reg_idx;
+	u64 tdba = ring->dma;
+	u32 txdctl = 0;
+
+	/* disable the queue */
+	wr32(IGC_TXDCTL(reg_idx), 0);
+	wrfl();
+	mdelay(10);
+
+	wr32(IGC_TDLEN(reg_idx),
+	     ring->count * sizeof(union igc_adv_tx_desc));
+	wr32(IGC_TDBAL(reg_idx),
+	     tdba & 0x00000000ffffffffULL);
+	wr32(IGC_TDBAH(reg_idx), tdba >> 32);
+
+	ring->tail = adapter->io_addr + IGC_TDT(reg_idx);
+	wr32(IGC_TDH(reg_idx), 0);
+	writel(0, ring->tail);
+
+	txdctl |= IGC_TX_PTHRESH;
+	txdctl |= IGC_TX_HTHRESH << 8;
+	txdctl |= IGC_TX_WTHRESH << 16;
+
+	txdctl |= IGC_TXDCTL_QUEUE_ENABLE;
+	wr32(IGC_TXDCTL(reg_idx), txdctl);
+}
+
+/**
+ * igc_configure_tx - Configure transmit Unit after Reset
+ * @adapter: board private structure
+ *
+ * Configure the Tx unit of the MAC after a reset.
+ */
+static void igc_configure_tx(struct igc_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_tx_queues; i++)
+		igc_configure_tx_ring(adapter, adapter->tx_ring[i]);
+}
+
+/**
+ * igc_setup_mrqc - configure the multiple receive queue control registers
+ * @adapter: Board private structure
+ */
+static void igc_setup_mrqc(struct igc_adapter *adapter)
+{
+}
+
+/**
+ * igc_setup_rctl - configure the receive control registers
+ * @adapter: Board private structure
+ */
+static void igc_setup_rctl(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u32 rctl;
+
+	rctl = rd32(IGC_RCTL);
+
+	rctl &= ~(3 << IGC_RCTL_MO_SHIFT);
+	rctl &= ~(IGC_RCTL_LBM_TCVR | IGC_RCTL_LBM_MAC);
+
+	rctl |= IGC_RCTL_EN | IGC_RCTL_BAM | IGC_RCTL_RDMTS_HALF |
+		(hw->mac.mc_filter_type << IGC_RCTL_MO_SHIFT);
+
+	/* enable stripping of CRC. Newer features require
+	 * that the HW strips the CRC.
+	 */
+	rctl |= IGC_RCTL_SECRC;
+
+	/* disable store bad packets and clear size bits. */
+	rctl &= ~(IGC_RCTL_SBP | IGC_RCTL_SZ_256);
+
+	/* enable LPE to allow for reception of jumbo frames */
+	rctl |= IGC_RCTL_LPE;
+
+	/* disable queue 0 to prevent tail write w/o re-config */
+	wr32(IGC_RXDCTL(0), 0);
+
+	/* This is useful for sniffing bad packets. */
+	if (adapter->netdev->features & NETIF_F_RXALL) {
+		/* UPE and MPE will be handled by normal PROMISC logic
+		 * in set_rx_mode
+		 */
+		rctl |= (IGC_RCTL_SBP | /* Receive bad packets */
+			 IGC_RCTL_BAM | /* RX All Bcast Pkts */
+			 IGC_RCTL_PMCF); /* RX All MAC Ctrl Pkts */
+
+		rctl &= ~(IGC_RCTL_DPF | /* Allow filtered pause */
+			  IGC_RCTL_CFIEN); /* Disable VLAN CFIEN Filter */
+	}
+
+	wr32(IGC_RCTL, rctl);
+}
+
+/**
+ * igc_setup_tctl - configure the transmit control registers
+ * @adapter: Board private structure
+ */
+static void igc_setup_tctl(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u32 tctl;
+
+	/* disable queue 0 which icould be enabled by default */
+	wr32(IGC_TXDCTL(0), 0);
+
+	/* Program the Transmit Control Register */
+	tctl = rd32(IGC_TCTL);
+	tctl &= ~IGC_TCTL_CT;
+	tctl |= IGC_TCTL_PSP | IGC_TCTL_RTLC |
+		(IGC_COLLISION_THRESHOLD << IGC_CT_SHIFT);
+
+	/* Enable transmits */
+	tctl |= IGC_TCTL_EN;
+
+	wr32(IGC_TCTL, tctl);
+}
+
+/**
+ * igc_set_mac - Change the Ethernet Address of the NIC
+ * @netdev: network interface device structure
+ * @p: pointer to an address structure
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int igc_set_mac(struct net_device *netdev, void *p)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_hw *hw = &adapter->hw;
+	struct sockaddr *addr = p;
+
+	if (!is_valid_ether_addr(addr->sa_data))
+		return -EADDRNOTAVAIL;
+
+	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
+	memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
+
+	/* set the correct pool for the new PF MAC address in entry 0 */
+	igc_set_default_mac_filter(adapter);
+
+	return 0;
+}
+
+static void igc_tx_csum(struct igc_ring *tx_ring, struct igc_tx_buffer *first)
+{
+}
+
+static int __igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size)
+{
+	struct net_device *netdev = tx_ring->netdev;
+
+	netif_stop_subqueue(netdev, tx_ring->queue_index);
+
+	/* memory barriier comment */
+	smp_mb();
+
+	/* We need to check again in a case another CPU has just
+	 * made room available.
+	 */
+	if (igc_desc_unused(tx_ring) < size)
+		return -EBUSY;
+
+	/* A reprieve! */
+	netif_wake_subqueue(netdev, tx_ring->queue_index);
+
+	u64_stats_update_begin(&tx_ring->tx_syncp2);
+	tx_ring->tx_stats.restart_queue2++;
+	u64_stats_update_end(&tx_ring->tx_syncp2);
+
+	return 0;
+}
+
+static inline int igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size)
+{
+	if (igc_desc_unused(tx_ring) >= size)
+		return 0;
+	return __igc_maybe_stop_tx(tx_ring, size);
+}
+
+static u32 igc_tx_cmd_type(struct sk_buff *skb, u32 tx_flags)
+{
+	/* set type for advanced descriptor with frame checksum insertion */
+	u32 cmd_type = IGC_ADVTXD_DTYP_DATA |
+		       IGC_ADVTXD_DCMD_DEXT |
+		       IGC_ADVTXD_DCMD_IFCS;
+
+	return cmd_type;
+}
+
+static void igc_tx_olinfo_status(struct igc_ring *tx_ring,
+				 union igc_adv_tx_desc *tx_desc,
+				 u32 tx_flags, unsigned int paylen)
+{
+	u32 olinfo_status = paylen << IGC_ADVTXD_PAYLEN_SHIFT;
+
+	/* insert L4 checksum */
+	olinfo_status |= (tx_flags & IGC_TX_FLAGS_CSUM) *
+			  ((IGC_TXD_POPTS_TXSM << 8) /
+			  IGC_TX_FLAGS_CSUM);
+
+	/* insert IPv4 checksum */
+	olinfo_status |= (tx_flags & IGC_TX_FLAGS_IPV4) *
+			  (((IGC_TXD_POPTS_IXSM << 8)) /
+			  IGC_TX_FLAGS_IPV4);
+
+	tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
+}
+
+static int igc_tx_map(struct igc_ring *tx_ring,
+		      struct igc_tx_buffer *first,
+		      const u8 hdr_len)
+{
+	struct sk_buff *skb = first->skb;
+	struct igc_tx_buffer *tx_buffer;
+	union igc_adv_tx_desc *tx_desc;
+	u32 tx_flags = first->tx_flags;
+	struct skb_frag_struct *frag;
+	u16 i = tx_ring->next_to_use;
+	unsigned int data_len, size;
+	dma_addr_t dma;
+	u32 cmd_type = igc_tx_cmd_type(skb, tx_flags);
+
+	tx_desc = IGC_TX_DESC(tx_ring, i);
+
+	igc_tx_olinfo_status(tx_ring, tx_desc, tx_flags, skb->len - hdr_len);
+
+	size = skb_headlen(skb);
+	data_len = skb->data_len;
+
+	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
+
+	tx_buffer = first;
+
+	for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
+		if (dma_mapping_error(tx_ring->dev, dma))
+			goto dma_error;
+
+		/* record length, and DMA address */
+		dma_unmap_len_set(tx_buffer, len, size);
+		dma_unmap_addr_set(tx_buffer, dma, dma);
+
+		tx_desc->read.buffer_addr = cpu_to_le64(dma);
+
+		while (unlikely(size > IGC_MAX_DATA_PER_TXD)) {
+			tx_desc->read.cmd_type_len =
+				cpu_to_le32(cmd_type ^ IGC_MAX_DATA_PER_TXD);
+
+			i++;
+			tx_desc++;
+			if (i == tx_ring->count) {
+				tx_desc = IGC_TX_DESC(tx_ring, 0);
+				i = 0;
+			}
+			tx_desc->read.olinfo_status = 0;
+
+			dma += IGC_MAX_DATA_PER_TXD;
+			size -= IGC_MAX_DATA_PER_TXD;
+
+			tx_desc->read.buffer_addr = cpu_to_le64(dma);
+		}
+
+		if (likely(!data_len))
+			break;
+
+		tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type ^ size);
+
+		i++;
+		tx_desc++;
+		if (i == tx_ring->count) {
+			tx_desc = IGC_TX_DESC(tx_ring, 0);
+			i = 0;
+		}
+		tx_desc->read.olinfo_status = 0;
+
+		size = skb_frag_size(frag);
+		data_len -= size;
+
+		dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
+				       size, DMA_TO_DEVICE);
+
+		tx_buffer = &tx_ring->tx_buffer_info[i];
+	}
+
+	/* write last descriptor with RS and EOP bits */
+	cmd_type |= size | IGC_TXD_DCMD;
+	tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
+
+	netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
+
+	/* set the timestamp */
+	first->time_stamp = jiffies;
+
+	/* Force memory writes to complete before letting h/w know there
+	 * are new descriptors to fetch.  (Only applicable for weak-ordered
+	 * memory model archs, such as IA-64).
+	 *
+	 * We also need this memory barrier to make certain all of the
+	 * status bits have been updated before next_to_watch is written.
+	 */
+	wmb();
+
+	/* set next_to_watch value indicating a packet is present */
+	first->next_to_watch = tx_desc;
+
+	i++;
+	if (i == tx_ring->count)
+		i = 0;
+
+	tx_ring->next_to_use = i;
+
+	/* Make sure there is space in the ring for the next send. */
+	igc_maybe_stop_tx(tx_ring, DESC_NEEDED);
+
+	if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) {
+		writel(i, tx_ring->tail);
+
+		/* we need this if more than one processor can write to our tail
+		 * at a time, it synchronizes IO on IA64/Altix systems
+		 */
+		mmiowb();
+	}
+
+	return 0;
+dma_error:
+	dev_err(tx_ring->dev, "TX DMA map failed\n");
+	tx_buffer = &tx_ring->tx_buffer_info[i];
+
+	/* clear dma mappings for failed tx_buffer_info map */
+	while (tx_buffer != first) {
+		if (dma_unmap_len(tx_buffer, len))
+			dma_unmap_page(tx_ring->dev,
+				       dma_unmap_addr(tx_buffer, dma),
+				       dma_unmap_len(tx_buffer, len),
+				       DMA_TO_DEVICE);
+		dma_unmap_len_set(tx_buffer, len, 0);
+
+		if (i-- == 0)
+			i += tx_ring->count;
+		tx_buffer = &tx_ring->tx_buffer_info[i];
+	}
+
+	if (dma_unmap_len(tx_buffer, len))
+		dma_unmap_single(tx_ring->dev,
+				 dma_unmap_addr(tx_buffer, dma),
+				 dma_unmap_len(tx_buffer, len),
+				 DMA_TO_DEVICE);
+	dma_unmap_len_set(tx_buffer, len, 0);
+
+	dev_kfree_skb_any(tx_buffer->skb);
+	tx_buffer->skb = NULL;
+
+	tx_ring->next_to_use = i;
+
+	return -1;
+}
+
+static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
+				       struct igc_ring *tx_ring)
+{
+	u16 count = TXD_USE_COUNT(skb_headlen(skb));
+	__be16 protocol = vlan_get_protocol(skb);
+	struct igc_tx_buffer *first;
+	u32 tx_flags = 0;
+	unsigned short f;
+	u8 hdr_len = 0;
+
+	/* need: 1 descriptor per page * PAGE_SIZE/IGC_MAX_DATA_PER_TXD,
+	 *	+ 1 desc for skb_headlen/IGC_MAX_DATA_PER_TXD,
+	 *	+ 2 desc gap to keep tail from touching head,
+	 *	+ 1 desc for context descriptor,
+	 * otherwise try next time
+	 */
+	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
+		count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
+
+	if (igc_maybe_stop_tx(tx_ring, count + 3)) {
+		/* this is a hard error */
+		return NETDEV_TX_BUSY;
+	}
+
+	/* record the location of the first descriptor for this packet */
+	first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
+	first->skb = skb;
+	first->bytecount = skb->len;
+	first->gso_segs = 1;
+
+	skb_tx_timestamp(skb);
+
+	/* record initial flags and protocol */
+	first->tx_flags = tx_flags;
+	first->protocol = protocol;
+
+	igc_tx_csum(tx_ring, first);
+
+	igc_tx_map(tx_ring, first, hdr_len);
+
+	return NETDEV_TX_OK;
+}
+
+static inline struct igc_ring *igc_tx_queue_mapping(struct igc_adapter *adapter,
+						    struct sk_buff *skb)
+{
+	unsigned int r_idx = skb->queue_mapping;
+
+	if (r_idx >= adapter->num_tx_queues)
+		r_idx = r_idx % adapter->num_tx_queues;
+
+	return adapter->tx_ring[r_idx];
+}
+
+static netdev_tx_t igc_xmit_frame(struct sk_buff *skb,
+				  struct net_device *netdev)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	/* The minimum packet size with TCTL.PSP set is 17 so pad the skb
+	 * in order to meet this minimum size requirement.
+	 */
+	if (skb->len < 17) {
+		if (skb_padto(skb, 17))
+			return NETDEV_TX_OK;
+		skb->len = 17;
+	}
+
+	return igc_xmit_frame_ring(skb, igc_tx_queue_mapping(adapter, skb));
+}
+
+static inline void igc_rx_hash(struct igc_ring *ring,
+			       union igc_adv_rx_desc *rx_desc,
+			       struct sk_buff *skb)
+{
+	if (ring->netdev->features & NETIF_F_RXHASH)
+		skb_set_hash(skb,
+			     le32_to_cpu(rx_desc->wb.lower.hi_dword.rss),
+			     PKT_HASH_TYPE_L3);
+}
+
+/**
+ * igc_process_skb_fields - Populate skb header fields from Rx descriptor
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being populated
+ *
+ * This function checks the ring, descriptor, and packet information in
+ * order to populate the hash, checksum, VLAN, timestamp, protocol, and
+ * other fields within the skb.
+ */
+static void igc_process_skb_fields(struct igc_ring *rx_ring,
+				   union igc_adv_rx_desc *rx_desc,
+				   struct sk_buff *skb)
+{
+	igc_rx_hash(rx_ring, rx_desc, skb);
+
+	skb_record_rx_queue(skb, rx_ring->queue_index);
+
+	skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+}
+
+static struct igc_rx_buffer *igc_get_rx_buffer(struct igc_ring *rx_ring,
+					       const unsigned int size)
+{
+	struct igc_rx_buffer *rx_buffer;
+
+	rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
+	prefetchw(rx_buffer->page);
+
+	/* we are reusing so sync this buffer for CPU use */
+	dma_sync_single_range_for_cpu(rx_ring->dev,
+				      rx_buffer->dma,
+				      rx_buffer->page_offset,
+				      size,
+				      DMA_FROM_DEVICE);
+
+	rx_buffer->pagecnt_bias--;
+
+	return rx_buffer;
+}
+
+/**
+ * igc_add_rx_frag - Add contents of Rx buffer to sk_buff
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @rx_buffer: buffer containing page to add
+ * @skb: sk_buff to place the data into
+ * @size: size of buffer to be added
+ *
+ * This function will add the data contained in rx_buffer->page to the skb.
+ */
+static void igc_add_rx_frag(struct igc_ring *rx_ring,
+			    struct igc_rx_buffer *rx_buffer,
+			    struct sk_buff *skb,
+			    unsigned int size)
+{
+#if (PAGE_SIZE < 8192)
+	unsigned int truesize = igc_rx_pg_size(rx_ring) / 2;
+
+	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
+			rx_buffer->page_offset, size, truesize);
+	rx_buffer->page_offset ^= truesize;
+#else
+	unsigned int truesize = ring_uses_build_skb(rx_ring) ?
+				SKB_DATA_ALIGN(IGC_SKB_PAD + size) :
+				SKB_DATA_ALIGN(size);
+	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
+			rx_buffer->page_offset, size, truesize);
+	rx_buffer->page_offset += truesize;
+#endif
+}
+
+static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring,
+				     struct igc_rx_buffer *rx_buffer,
+				     union igc_adv_rx_desc *rx_desc,
+				     unsigned int size)
+{
+	void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
+#if (PAGE_SIZE < 8192)
+	unsigned int truesize = igc_rx_pg_size(rx_ring) / 2;
+#else
+	unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
+				SKB_DATA_ALIGN(IGC_SKB_PAD + size);
+#endif
+	struct sk_buff *skb;
+
+	/* prefetch first cache line of first page */
+	prefetch(va);
+#if L1_CACHE_BYTES < 128
+	prefetch(va + L1_CACHE_BYTES);
+#endif
+
+	/* build an skb around the page buffer */
+	skb = build_skb(va - IGC_SKB_PAD, truesize);
+	if (unlikely(!skb))
+		return NULL;
+
+	/* update pointers within the skb to store the data */
+	skb_reserve(skb, IGC_SKB_PAD);
+	 __skb_put(skb, size);
+
+	/* update buffer offset */
+#if (PAGE_SIZE < 8192)
+	rx_buffer->page_offset ^= truesize;
+#else
+	rx_buffer->page_offset += truesize;
+#endif
+
+	return skb;
+}
+
+static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring,
+					 struct igc_rx_buffer *rx_buffer,
+					 union igc_adv_rx_desc *rx_desc,
+					 unsigned int size)
+{
+	void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
+#if (PAGE_SIZE < 8192)
+	unsigned int truesize = igc_rx_pg_size(rx_ring) / 2;
+#else
+	unsigned int truesize = SKB_DATA_ALIGN(size);
+#endif
+	unsigned int headlen;
+	struct sk_buff *skb;
+
+	/* prefetch first cache line of first page */
+	prefetch(va);
+#if L1_CACHE_BYTES < 128
+	prefetch(va + L1_CACHE_BYTES);
+#endif
+
+	/* allocate a skb to store the frags */
+	skb = napi_alloc_skb(&rx_ring->q_vector->napi, IGC_RX_HDR_LEN);
+	if (unlikely(!skb))
+		return NULL;
+
+	/* Determine available headroom for copy */
+	headlen = size;
+	if (headlen > IGC_RX_HDR_LEN)
+		headlen = eth_get_headlen(va, IGC_RX_HDR_LEN);
+
+	/* align pull length to size of long to optimize memcpy performance */
+	memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
+
+	/* update all of the pointers */
+	size -= headlen;
+	if (size) {
+		skb_add_rx_frag(skb, 0, rx_buffer->page,
+				(va + headlen) - page_address(rx_buffer->page),
+				size, truesize);
+#if (PAGE_SIZE < 8192)
+	rx_buffer->page_offset ^= truesize;
+#else
+	rx_buffer->page_offset += truesize;
+#endif
+	} else {
+		rx_buffer->pagecnt_bias++;
+	}
+
+	return skb;
+}
+
+/**
+ * igc_reuse_rx_page - page flip buffer and store it back on the ring
+ * @rx_ring: rx descriptor ring to store buffers on
+ * @old_buff: donor buffer to have page reused
+ *
+ * Synchronizes page for reuse by the adapter
+ */
+static void igc_reuse_rx_page(struct igc_ring *rx_ring,
+			      struct igc_rx_buffer *old_buff)
+{
+	u16 nta = rx_ring->next_to_alloc;
+	struct igc_rx_buffer *new_buff;
+
+	new_buff = &rx_ring->rx_buffer_info[nta];
+
+	/* update, and store next to alloc */
+	nta++;
+	rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+
+	/* Transfer page from old buffer to new buffer.
+	 * Move each member individually to avoid possible store
+	 * forwarding stalls.
+	 */
+	new_buff->dma		= old_buff->dma;
+	new_buff->page		= old_buff->page;
+	new_buff->page_offset	= old_buff->page_offset;
+	new_buff->pagecnt_bias	= old_buff->pagecnt_bias;
+}
+
+static inline bool igc_page_is_reserved(struct page *page)
+{
+	return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page);
+}
+
+static bool igc_can_reuse_rx_page(struct igc_rx_buffer *rx_buffer)
+{
+	unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
+	struct page *page = rx_buffer->page;
+
+	/* avoid re-using remote pages */
+	if (unlikely(igc_page_is_reserved(page)))
+		return false;
+
+#if (PAGE_SIZE < 8192)
+	/* if we are only owner of page we can reuse it */
+	if (unlikely((page_ref_count(page) - pagecnt_bias) > 1))
+		return false;
+#else
+#define IGC_LAST_OFFSET \
+	(SKB_WITH_OVERHEAD(PAGE_SIZE) - IGC_RXBUFFER_2048)
+
+	if (rx_buffer->page_offset > IGC_LAST_OFFSET)
+		return false;
+#endif
+
+	/* If we have drained the page fragment pool we need to update
+	 * the pagecnt_bias and page count so that we fully restock the
+	 * number of references the driver holds.
+	 */
+	if (unlikely(!pagecnt_bias)) {
+		page_ref_add(page, USHRT_MAX);
+		rx_buffer->pagecnt_bias = USHRT_MAX;
+	}
+
+	return true;
+}
+
+/**
+ * igc_is_non_eop - process handling of non-EOP buffers
+ * @rx_ring: Rx ring being processed
+ * @rx_desc: Rx descriptor for current buffer
+ * @skb: current socket buffer containing buffer in progress
+ *
+ * This function updates next to clean.  If the buffer is an EOP buffer
+ * this function exits returning false, otherwise it will place the
+ * sk_buff in the next buffer to be chained and return true indicating
+ * that this is in fact a non-EOP buffer.
+ */
+static bool igc_is_non_eop(struct igc_ring *rx_ring,
+			   union igc_adv_rx_desc *rx_desc)
+{
+	u32 ntc = rx_ring->next_to_clean + 1;
+
+	/* fetch, update, and store next to clean */
+	ntc = (ntc < rx_ring->count) ? ntc : 0;
+	rx_ring->next_to_clean = ntc;
+
+	prefetch(IGC_RX_DESC(rx_ring, ntc));
+
+	if (likely(igc_test_staterr(rx_desc, IGC_RXD_STAT_EOP)))
+		return false;
+
+	return true;
+}
+
+/**
+ * igc_cleanup_headers - Correct corrupted or empty headers
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being fixed
+ *
+ * Address the case where we are pulling data in on pages only
+ * and as such no data is present in the skb header.
+ *
+ * In addition if skb is not at least 60 bytes we need to pad it so that
+ * it is large enough to qualify as a valid Ethernet frame.
+ *
+ * Returns true if an error was encountered and skb was freed.
+ */
+static bool igc_cleanup_headers(struct igc_ring *rx_ring,
+				union igc_adv_rx_desc *rx_desc,
+				struct sk_buff *skb)
+{
+	if (unlikely((igc_test_staterr(rx_desc,
+				       IGC_RXDEXT_ERR_FRAME_ERR_MASK)))) {
+		struct net_device *netdev = rx_ring->netdev;
+
+		if (!(netdev->features & NETIF_F_RXALL)) {
+			dev_kfree_skb_any(skb);
+			return true;
+		}
+	}
+
+	/* if eth_skb_pad returns an error the skb was freed */
+	if (eth_skb_pad(skb))
+		return true;
+
+	return false;
+}
+
+static void igc_put_rx_buffer(struct igc_ring *rx_ring,
+			      struct igc_rx_buffer *rx_buffer)
+{
+	if (igc_can_reuse_rx_page(rx_buffer)) {
+		/* hand second half of page back to the ring */
+		igc_reuse_rx_page(rx_ring, rx_buffer);
+	} else {
+		/* We are not reusing the buffer so unmap it and free
+		 * any references we are holding to it
+		 */
+		dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
+				     igc_rx_pg_size(rx_ring), DMA_FROM_DEVICE,
+				     IGC_RX_DMA_ATTR);
+		__page_frag_cache_drain(rx_buffer->page,
+					rx_buffer->pagecnt_bias);
+	}
+
+	/* clear contents of rx_buffer */
+	rx_buffer->page = NULL;
+}
+
+/**
+ * igc_alloc_rx_buffers - Replace used receive buffers; packet split
+ * @adapter: address of board private structure
+ */
+static void igc_alloc_rx_buffers(struct igc_ring *rx_ring, u16 cleaned_count)
+{
+	union igc_adv_rx_desc *rx_desc;
+	u16 i = rx_ring->next_to_use;
+	struct igc_rx_buffer *bi;
+	u16 bufsz;
+
+	/* nothing to do */
+	if (!cleaned_count)
+		return;
+
+	rx_desc = IGC_RX_DESC(rx_ring, i);
+	bi = &rx_ring->rx_buffer_info[i];
+	i -= rx_ring->count;
+
+	bufsz = igc_rx_bufsz(rx_ring);
+
+	do {
+		if (!igc_alloc_mapped_page(rx_ring, bi))
+			break;
+
+		/* sync the buffer for use by the device */
+		dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
+						 bi->page_offset, bufsz,
+						 DMA_FROM_DEVICE);
+
+		/* Refresh the desc even if buffer_addrs didn't change
+		 * because each write-back erases this info.
+		 */
+		rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
+
+		rx_desc++;
+		bi++;
+		i++;
+		if (unlikely(!i)) {
+			rx_desc = IGC_RX_DESC(rx_ring, 0);
+			bi = rx_ring->rx_buffer_info;
+			i -= rx_ring->count;
+		}
+
+		/* clear the length for the next_to_use descriptor */
+		rx_desc->wb.upper.length = 0;
+
+		cleaned_count--;
+	} while (cleaned_count);
+
+	i += rx_ring->count;
+
+	if (rx_ring->next_to_use != i) {
+		/* record the next descriptor to use */
+		rx_ring->next_to_use = i;
+
+		/* update next to alloc since we have filled the ring */
+		rx_ring->next_to_alloc = i;
+
+		/* Force memory writes to complete before letting h/w
+		 * know there are new descriptors to fetch.  (Only
+		 * applicable for weak-ordered memory model archs,
+		 * such as IA-64).
+		 */
+		wmb();
+		writel(i, rx_ring->tail);
+	}
+}
+
+static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
+{
+	unsigned int total_bytes = 0, total_packets = 0;
+	struct igc_ring *rx_ring = q_vector->rx.ring;
+	struct sk_buff *skb = rx_ring->skb;
+	u16 cleaned_count = igc_desc_unused(rx_ring);
+
+	while (likely(total_packets < budget)) {
+		union igc_adv_rx_desc *rx_desc;
+		struct igc_rx_buffer *rx_buffer;
+		unsigned int size;
+
+		/* return some buffers to hardware, one at a time is too slow */
+		if (cleaned_count >= IGC_RX_BUFFER_WRITE) {
+			igc_alloc_rx_buffers(rx_ring, cleaned_count);
+			cleaned_count = 0;
+		}
+
+		rx_desc = IGC_RX_DESC(rx_ring, rx_ring->next_to_clean);
+		size = le16_to_cpu(rx_desc->wb.upper.length);
+		if (!size)
+			break;
+
+		/* This memory barrier is needed to keep us from reading
+		 * any other fields out of the rx_desc until we know the
+		 * descriptor has been written back
+		 */
+		dma_rmb();
+
+		rx_buffer = igc_get_rx_buffer(rx_ring, size);
+
+		/* retrieve a buffer from the ring */
+		if (skb)
+			igc_add_rx_frag(rx_ring, rx_buffer, skb, size);
+		else if (ring_uses_build_skb(rx_ring))
+			skb = igc_build_skb(rx_ring, rx_buffer, rx_desc, size);
+		else
+			skb = igc_construct_skb(rx_ring, rx_buffer,
+						rx_desc, size);
+
+		/* exit if we failed to retrieve a buffer */
+		if (!skb) {
+			rx_ring->rx_stats.alloc_failed++;
+			rx_buffer->pagecnt_bias++;
+			break;
+		}
+
+		igc_put_rx_buffer(rx_ring, rx_buffer);
+		cleaned_count++;
+
+		/* fetch next buffer in frame if non-eop */
+		if (igc_is_non_eop(rx_ring, rx_desc))
+			continue;
+
+		/* verify the packet layout is correct */
+		if (igc_cleanup_headers(rx_ring, rx_desc, skb)) {
+			skb = NULL;
+			continue;
+		}
+
+		/* probably a little skewed due to removing CRC */
+		total_bytes += skb->len;
+
+		/* populate checksum, timestamp, VLAN, and protocol */
+		igc_process_skb_fields(rx_ring, rx_desc, skb);
+
+		napi_gro_receive(&q_vector->napi, skb);
+
+		/* reset skb pointer */
+		skb = NULL;
+
+		/* update budget accounting */
+		total_packets++;
+	}
+
+	/* place incomplete frames back on ring for completion */
+	rx_ring->skb = skb;
+
+	u64_stats_update_begin(&rx_ring->rx_syncp);
+	rx_ring->rx_stats.packets += total_packets;
+	rx_ring->rx_stats.bytes += total_bytes;
+	u64_stats_update_end(&rx_ring->rx_syncp);
+	q_vector->rx.total_packets += total_packets;
+	q_vector->rx.total_bytes += total_bytes;
+
+	if (cleaned_count)
+		igc_alloc_rx_buffers(rx_ring, cleaned_count);
+
+	return total_packets;
+}
+
+static inline unsigned int igc_rx_offset(struct igc_ring *rx_ring)
+{
+	return ring_uses_build_skb(rx_ring) ? IGC_SKB_PAD : 0;
+}
+
+static bool igc_alloc_mapped_page(struct igc_ring *rx_ring,
+				  struct igc_rx_buffer *bi)
+{
+	struct page *page = bi->page;
+	dma_addr_t dma;
+
+	/* since we are recycling buffers we should seldom need to alloc */
+	if (likely(page))
+		return true;
+
+	/* alloc new page for storage */
+	page = dev_alloc_pages(igc_rx_pg_order(rx_ring));
+	if (unlikely(!page)) {
+		rx_ring->rx_stats.alloc_failed++;
+		return false;
+	}
+
+	/* map page for use */
+	dma = dma_map_page_attrs(rx_ring->dev, page, 0,
+				 igc_rx_pg_size(rx_ring),
+				 DMA_FROM_DEVICE,
+				 IGC_RX_DMA_ATTR);
+
+	/* if mapping failed free memory back to system since
+	 * there isn't much point in holding memory we can't use
+	 */
+	if (dma_mapping_error(rx_ring->dev, dma)) {
+		__free_page(page);
+
+		rx_ring->rx_stats.alloc_failed++;
+		return false;
+	}
+
+	bi->dma = dma;
+	bi->page = page;
+	bi->page_offset = igc_rx_offset(rx_ring);
+	bi->pagecnt_bias = 1;
+
+	return true;
+}
+
+/**
+ * igc_clean_tx_irq - Reclaim resources after transmit completes
+ * @q_vector: pointer to q_vector containing needed info
+ * @napi_budget: Used to determine if we are in netpoll
+ *
+ * returns true if ring is completely cleaned
+ */
+static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget)
+{
+	struct igc_adapter *adapter = q_vector->adapter;
+	unsigned int total_bytes = 0, total_packets = 0;
+	unsigned int budget = q_vector->tx.work_limit;
+	struct igc_ring *tx_ring = q_vector->tx.ring;
+	unsigned int i = tx_ring->next_to_clean;
+	struct igc_tx_buffer *tx_buffer;
+	union igc_adv_tx_desc *tx_desc;
+
+	if (test_bit(__IGC_DOWN, &adapter->state))
+		return true;
+
+	tx_buffer = &tx_ring->tx_buffer_info[i];
+	tx_desc = IGC_TX_DESC(tx_ring, i);
+	i -= tx_ring->count;
+
+	do {
+		union igc_adv_tx_desc *eop_desc = tx_buffer->next_to_watch;
+
+		/* if next_to_watch is not set then there is no work pending */
+		if (!eop_desc)
+			break;
+
+		/* prevent any other reads prior to eop_desc */
+		smp_rmb();
+
+		/* if DD is not set pending work has not been completed */
+		if (!(eop_desc->wb.status & cpu_to_le32(IGC_TXD_STAT_DD)))
+			break;
+
+		/* clear next_to_watch to prevent false hangs */
+		tx_buffer->next_to_watch = NULL;
+
+		/* update the statistics for this packet */
+		total_bytes += tx_buffer->bytecount;
+		total_packets += tx_buffer->gso_segs;
+
+		/* free the skb */
+		napi_consume_skb(tx_buffer->skb, napi_budget);
+
+		/* unmap skb header data */
+		dma_unmap_single(tx_ring->dev,
+				 dma_unmap_addr(tx_buffer, dma),
+				 dma_unmap_len(tx_buffer, len),
+				 DMA_TO_DEVICE);
+
+		/* clear tx_buffer data */
+		dma_unmap_len_set(tx_buffer, len, 0);
+
+		/* clear last DMA location and unmap remaining buffers */
+		while (tx_desc != eop_desc) {
+			tx_buffer++;
+			tx_desc++;
+			i++;
+			if (unlikely(!i)) {
+				i -= tx_ring->count;
+				tx_buffer = tx_ring->tx_buffer_info;
+				tx_desc = IGC_TX_DESC(tx_ring, 0);
+			}
+
+			/* unmap any remaining paged data */
+			if (dma_unmap_len(tx_buffer, len)) {
+				dma_unmap_page(tx_ring->dev,
+					       dma_unmap_addr(tx_buffer, dma),
+					       dma_unmap_len(tx_buffer, len),
+					       DMA_TO_DEVICE);
+				dma_unmap_len_set(tx_buffer, len, 0);
+			}
+		}
+
+		/* move us one more past the eop_desc for start of next pkt */
+		tx_buffer++;
+		tx_desc++;
+		i++;
+		if (unlikely(!i)) {
+			i -= tx_ring->count;
+			tx_buffer = tx_ring->tx_buffer_info;
+			tx_desc = IGC_TX_DESC(tx_ring, 0);
+		}
+
+		/* issue prefetch for next Tx descriptor */
+		prefetch(tx_desc);
+
+		/* update budget accounting */
+		budget--;
+	} while (likely(budget));
+
+	netdev_tx_completed_queue(txring_txq(tx_ring),
+				  total_packets, total_bytes);
+
+	i += tx_ring->count;
+	tx_ring->next_to_clean = i;
+	u64_stats_update_begin(&tx_ring->tx_syncp);
+	tx_ring->tx_stats.bytes += total_bytes;
+	tx_ring->tx_stats.packets += total_packets;
+	u64_stats_update_end(&tx_ring->tx_syncp);
+	q_vector->tx.total_bytes += total_bytes;
+	q_vector->tx.total_packets += total_packets;
+
+	if (test_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
+		struct igc_hw *hw = &adapter->hw;
+
+		/* Detect a transmit hang in hardware, this serializes the
+		 * check with the clearing of time_stamp and movement of i
+		 */
+		clear_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
+		if (tx_buffer->next_to_watch &&
+		    time_after(jiffies, tx_buffer->time_stamp +
+		    (adapter->tx_timeout_factor * HZ)) &&
+		    !(rd32(IGC_STATUS) & IGC_STATUS_TXOFF)) {
+			/* detected Tx unit hang */
+			dev_err(tx_ring->dev,
+				"Detected Tx Unit Hang\n"
+				"  Tx Queue             <%d>\n"
+				"  TDH                  <%x>\n"
+				"  TDT                  <%x>\n"
+				"  next_to_use          <%x>\n"
+				"  next_to_clean        <%x>\n"
+				"buffer_info[next_to_clean]\n"
+				"  time_stamp           <%lx>\n"
+				"  next_to_watch        <%p>\n"
+				"  jiffies              <%lx>\n"
+				"  desc.status          <%x>\n",
+				tx_ring->queue_index,
+				rd32(IGC_TDH(tx_ring->reg_idx)),
+				readl(tx_ring->tail),
+				tx_ring->next_to_use,
+				tx_ring->next_to_clean,
+				tx_buffer->time_stamp,
+				tx_buffer->next_to_watch,
+				jiffies,
+				tx_buffer->next_to_watch->wb.status);
+				netif_stop_subqueue(tx_ring->netdev,
+						    tx_ring->queue_index);
+
+			/* we are about to reset, no point in enabling stuff */
+			return true;
+		}
+	}
+
+#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
+	if (unlikely(total_packets &&
+		     netif_carrier_ok(tx_ring->netdev) &&
+		     igc_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD)) {
+		/* Make sure that anybody stopping the queue after this
+		 * sees the new next_to_clean.
+		 */
+		smp_mb();
+		if (__netif_subqueue_stopped(tx_ring->netdev,
+					     tx_ring->queue_index) &&
+		    !(test_bit(__IGC_DOWN, &adapter->state))) {
+			netif_wake_subqueue(tx_ring->netdev,
+					    tx_ring->queue_index);
+
+			u64_stats_update_begin(&tx_ring->tx_syncp);
+			tx_ring->tx_stats.restart_queue++;
+			u64_stats_update_end(&tx_ring->tx_syncp);
+		}
+	}
+
+	return !!budget;
+}
+
+/**
+ * igc_ioctl - I/O control method
+ * @netdev: network interface device structure
+ * @ifreq: frequency
+ * @cmd: command
+ */
+static int igc_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
+{
+	switch (cmd) {
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+/**
+ * igc_up - Open the interface and prepare it to handle traffic
+ * @adapter: board private structure
+ */
+static void igc_up(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	int i = 0;
+
+	/* hardware has been reset, we need to reload some things */
+	igc_configure(adapter);
+
+	clear_bit(__IGC_DOWN, &adapter->state);
+
+	for (i = 0; i < adapter->num_q_vectors; i++)
+		napi_enable(&adapter->q_vector[i]->napi);
+
+	if (adapter->msix_entries)
+		igc_configure_msix(adapter);
+	else
+		igc_assign_vector(adapter->q_vector[0], 0);
+
+	/* Clear any pending interrupts. */
+	rd32(IGC_ICR);
+	igc_irq_enable(adapter);
+
+	netif_tx_start_all_queues(adapter->netdev);
+
+	/* start the watchdog. */
+	hw->mac.get_link_status = 1;
+	schedule_work(&adapter->watchdog_task);
+}
+
+/**
+ * igc_update_stats - Update the board statistics counters
+ * @adapter: board private structure
+ */
+static void igc_update_stats(struct igc_adapter *adapter)
+{
+}
+
+static void igc_nfc_filter_exit(struct igc_adapter *adapter)
+{
+}
+
+/**
+ * igc_down - Close the interface
+ * @adapter: board private structure
+ */
+static void igc_down(struct igc_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct igc_hw *hw = &adapter->hw;
+	u32 tctl, rctl;
+	int i = 0;
+
+	set_bit(__IGC_DOWN, &adapter->state);
+
+	/* disable receives in the hardware */
+	rctl = rd32(IGC_RCTL);
+	wr32(IGC_RCTL, rctl & ~IGC_RCTL_EN);
+	/* flush and sleep below */
+
+	igc_nfc_filter_exit(adapter);
+
+	/* set trans_start so we don't get spurious watchdogs during reset */
+	netif_trans_update(netdev);
+
+	netif_carrier_off(netdev);
+	netif_tx_stop_all_queues(netdev);
+
+	/* disable transmits in the hardware */
+	tctl = rd32(IGC_TCTL);
+	tctl &= ~IGC_TCTL_EN;
+	wr32(IGC_TCTL, tctl);
+	/* flush both disables and wait for them to finish */
+	wrfl();
+	usleep_range(10000, 20000);
+
+	igc_irq_disable(adapter);
+
+	adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
+
+	for (i = 0; i < adapter->num_q_vectors; i++) {
+		if (adapter->q_vector[i]) {
+			napi_synchronize(&adapter->q_vector[i]->napi);
+			napi_disable(&adapter->q_vector[i]->napi);
+		}
+	}
+
+	del_timer_sync(&adapter->watchdog_timer);
+	del_timer_sync(&adapter->phy_info_timer);
+
+	/* record the stats before reset*/
+	spin_lock(&adapter->stats64_lock);
+	igc_update_stats(adapter);
+	spin_unlock(&adapter->stats64_lock);
+
+	adapter->link_speed = 0;
+	adapter->link_duplex = 0;
+
+	if (!pci_channel_offline(adapter->pdev))
+		igc_reset(adapter);
+
+	/* clear VLAN promisc flag so VFTA will be updated if necessary */
+	adapter->flags &= ~IGC_FLAG_VLAN_PROMISC;
+
+	igc_clean_all_tx_rings(adapter);
+	igc_clean_all_rx_rings(adapter);
+}
+
+static void igc_reinit_locked(struct igc_adapter *adapter)
+{
+	WARN_ON(in_interrupt());
+	while (test_and_set_bit(__IGC_RESETTING, &adapter->state))
+		usleep_range(1000, 2000);
+	igc_down(adapter);
+	igc_up(adapter);
+	clear_bit(__IGC_RESETTING, &adapter->state);
+}
+
+static void igc_reset_task(struct work_struct *work)
+{
+	struct igc_adapter *adapter;
+
+	adapter = container_of(work, struct igc_adapter, reset_task);
+
+	netdev_err(adapter->netdev, "Reset adapter\n");
+	igc_reinit_locked(adapter);
+}
+
+/**
+ * igc_change_mtu - Change the Maximum Transfer Unit
+ * @netdev: network interface device structure
+ * @new_mtu: new value for maximum frame size
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int igc_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct pci_dev *pdev = adapter->pdev;
+
+	/* adjust max frame to be at least the size of a standard frame */
+	if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN))
+		max_frame = ETH_FRAME_LEN + ETH_FCS_LEN;
+
+	while (test_and_set_bit(__IGC_RESETTING, &adapter->state))
+		usleep_range(1000, 2000);
+
+	/* igc_down has a dependency on max_frame_size */
+	adapter->max_frame_size = max_frame;
+
+	if (netif_running(netdev))
+		igc_down(adapter);
+
+	dev_info(&pdev->dev, "changing MTU from %d to %d\n",
+		 netdev->mtu, new_mtu);
+	netdev->mtu = new_mtu;
+
+	if (netif_running(netdev))
+		igc_up(adapter);
+	else
+		igc_reset(adapter);
+
+	clear_bit(__IGC_RESETTING, &adapter->state);
+
+	return 0;
+}
+
+/**
+ * igc_get_stats - Get System Network Statistics
+ * @netdev: network interface device structure
+ *
+ * Returns the address of the device statistics structure.
+ * The statistics are updated here and also from the timer callback.
+ */
+static struct net_device_stats *igc_get_stats(struct net_device *netdev)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	if (!test_bit(__IGC_RESETTING, &adapter->state))
+		igc_update_stats(adapter);
+
+	/* only return the current stats */
+	return &netdev->stats;
+}
+
+/**
+ * igc_configure - configure the hardware for RX and TX
+ * @adapter: private board structure
+ */
+static void igc_configure(struct igc_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	int i = 0;
+
+	igc_get_hw_control(adapter);
+	igc_set_rx_mode(netdev);
+
+	igc_setup_tctl(adapter);
+	igc_setup_mrqc(adapter);
+	igc_setup_rctl(adapter);
+
+	igc_configure_tx(adapter);
+	igc_configure_rx(adapter);
+
+	igc_rx_fifo_flush_base(&adapter->hw);
+
+	/* call igc_desc_unused which always leaves
+	 * at least 1 descriptor unused to make sure
+	 * next_to_use != next_to_clean
+	 */
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		struct igc_ring *ring = adapter->rx_ring[i];
+
+		igc_alloc_rx_buffers(ring, igc_desc_unused(ring));
+	}
+}
+
+/**
+ * igc_rar_set_index - Sync RAL[index] and RAH[index] registers with MAC table
+ * @adapter: Pointer to adapter structure
+ * @index: Index of the RAR entry which need to be synced with MAC table
+ */
+static void igc_rar_set_index(struct igc_adapter *adapter, u32 index)
+{
+	u8 *addr = adapter->mac_table[index].addr;
+	struct igc_hw *hw = &adapter->hw;
+	u32 rar_low, rar_high;
+
+	/* HW expects these to be in network order when they are plugged
+	 * into the registers which are little endian.  In order to guarantee
+	 * that ordering we need to do an leXX_to_cpup here in order to be
+	 * ready for the byteswap that occurs with writel
+	 */
+	rar_low = le32_to_cpup((__le32 *)(addr));
+	rar_high = le16_to_cpup((__le16 *)(addr + 4));
+
+	/* Indicate to hardware the Address is Valid. */
+	if (adapter->mac_table[index].state & IGC_MAC_STATE_IN_USE) {
+		if (is_valid_ether_addr(addr))
+			rar_high |= IGC_RAH_AV;
+
+		rar_high |= IGC_RAH_POOL_1 <<
+			adapter->mac_table[index].queue;
+	}
+
+	wr32(IGC_RAL(index), rar_low);
+	wrfl();
+	wr32(IGC_RAH(index), rar_high);
+	wrfl();
+}
+
+/* Set default MAC address for the PF in the first RAR entry */
+static void igc_set_default_mac_filter(struct igc_adapter *adapter)
+{
+	struct igc_mac_addr *mac_table = &adapter->mac_table[0];
+
+	ether_addr_copy(mac_table->addr, adapter->hw.mac.addr);
+	mac_table->state = IGC_MAC_STATE_DEFAULT | IGC_MAC_STATE_IN_USE;
+
+	igc_rar_set_index(adapter, 0);
+}
+
+/**
+ * igc_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
+ * @netdev: network interface device structure
+ *
+ * The set_rx_mode entry point is called whenever the unicast or multicast
+ * address lists or the network interface flags are updated.  This routine is
+ * responsible for configuring the hardware for proper unicast, multicast,
+ * promiscuous mode, and all-multi behavior.
+ */
+static void igc_set_rx_mode(struct net_device *netdev)
+{
+}
+
+/**
+ * igc_msix_other - msix other interrupt handler
+ * @irq: interrupt number
+ * @data: pointer to a q_vector
+ */
+static irqreturn_t igc_msix_other(int irq, void *data)
+{
+	struct igc_adapter *adapter = data;
+	struct igc_hw *hw = &adapter->hw;
+	u32 icr = rd32(IGC_ICR);
+
+	/* reading ICR causes bit 31 of EICR to be cleared */
+	if (icr & IGC_ICR_DRSTA)
+		schedule_work(&adapter->reset_task);
+
+	if (icr & IGC_ICR_DOUTSYNC) {
+		/* HW is reporting DMA is out of sync */
+		adapter->stats.doosync++;
+	}
+
+	if (icr & IGC_ICR_LSC) {
+		hw->mac.get_link_status = 1;
+		/* guard against interrupt when we're going down */
+		if (!test_bit(__IGC_DOWN, &adapter->state))
+			mod_timer(&adapter->watchdog_timer, jiffies + 1);
+	}
+
+	wr32(IGC_EIMS, adapter->eims_other);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * igc_write_ivar - configure ivar for given MSI-X vector
+ * @hw: pointer to the HW structure
+ * @msix_vector: vector number we are allocating to a given ring
+ * @index: row index of IVAR register to write within IVAR table
+ * @offset: column offset of in IVAR, should be multiple of 8
+ *
+ * The IVAR table consists of 2 columns,
+ * each containing an cause allocation for an Rx and Tx ring, and a
+ * variable number of rows depending on the number of queues supported.
+ */
+static void igc_write_ivar(struct igc_hw *hw, int msix_vector,
+			   int index, int offset)
+{
+	u32 ivar = array_rd32(IGC_IVAR0, index);
+
+	/* clear any bits that are currently set */
+	ivar &= ~((u32)0xFF << offset);
+
+	/* write vector and valid bit */
+	ivar |= (msix_vector | IGC_IVAR_VALID) << offset;
+
+	array_wr32(IGC_IVAR0, index, ivar);
+}
+
+static void igc_assign_vector(struct igc_q_vector *q_vector, int msix_vector)
+{
+	struct igc_adapter *adapter = q_vector->adapter;
+	struct igc_hw *hw = &adapter->hw;
+	int rx_queue = IGC_N0_QUEUE;
+	int tx_queue = IGC_N0_QUEUE;
+
+	if (q_vector->rx.ring)
+		rx_queue = q_vector->rx.ring->reg_idx;
+	if (q_vector->tx.ring)
+		tx_queue = q_vector->tx.ring->reg_idx;
+
+	switch (hw->mac.type) {
+	case igc_i225:
+		if (rx_queue > IGC_N0_QUEUE)
+			igc_write_ivar(hw, msix_vector,
+				       rx_queue >> 1,
+				       (rx_queue & 0x1) << 4);
+		if (tx_queue > IGC_N0_QUEUE)
+			igc_write_ivar(hw, msix_vector,
+				       tx_queue >> 1,
+				       ((tx_queue & 0x1) << 4) + 8);
+		q_vector->eims_value = BIT(msix_vector);
+		break;
+	default:
+		WARN_ONCE(hw->mac.type != igc_i225, "Wrong MAC type\n");
+		break;
+	}
+
+	/* add q_vector eims value to global eims_enable_mask */
+	adapter->eims_enable_mask |= q_vector->eims_value;
+
+	/* configure q_vector to set itr on first interrupt */
+	q_vector->set_itr = 1;
+}
+
+/**
+ * igc_configure_msix - Configure MSI-X hardware
+ * @adapter: Pointer to adapter structure
+ *
+ * igc_configure_msix sets up the hardware to properly
+ * generate MSI-X interrupts.
+ */
+static void igc_configure_msix(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	int i, vector = 0;
+	u32 tmp;
+
+	adapter->eims_enable_mask = 0;
+
+	/* set vector for other causes, i.e. link changes */
+	switch (hw->mac.type) {
+	case igc_i225:
+		/* Turn on MSI-X capability first, or our settings
+		 * won't stick.  And it will take days to debug.
+		 */
+		wr32(IGC_GPIE, IGC_GPIE_MSIX_MODE |
+		     IGC_GPIE_PBA | IGC_GPIE_EIAME |
+		     IGC_GPIE_NSICR);
+
+		/* enable msix_other interrupt */
+		adapter->eims_other = BIT(vector);
+		tmp = (vector++ | IGC_IVAR_VALID) << 8;
+
+		wr32(IGC_IVAR_MISC, tmp);
+		break;
+	default:
+		/* do nothing, since nothing else supports MSI-X */
+		break;
+	} /* switch (hw->mac.type) */
+
+	adapter->eims_enable_mask |= adapter->eims_other;
+
+	for (i = 0; i < adapter->num_q_vectors; i++)
+		igc_assign_vector(adapter->q_vector[i], vector++);
+
+	wrfl();
+}
+
+static irqreturn_t igc_msix_ring(int irq, void *data)
+{
+	struct igc_q_vector *q_vector = data;
+
+	/* Write the ITR value calculated from the previous interrupt. */
+	igc_write_itr(q_vector);
+
+	napi_schedule(&q_vector->napi);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * igc_request_msix - Initialize MSI-X interrupts
+ * @adapter: Pointer to adapter structure
+ *
+ * igc_request_msix allocates MSI-X vectors and requests interrupts from the
+ * kernel.
+ */
+static int igc_request_msix(struct igc_adapter *adapter)
+{
+	int i = 0, err = 0, vector = 0, free_vector = 0;
+	struct net_device *netdev = adapter->netdev;
+
+	err = request_irq(adapter->msix_entries[vector].vector,
+			  &igc_msix_other, 0, netdev->name, adapter);
+	if (err)
+		goto err_out;
+
+	for (i = 0; i < adapter->num_q_vectors; i++) {
+		struct igc_q_vector *q_vector = adapter->q_vector[i];
+
+		vector++;
+
+		q_vector->itr_register = adapter->io_addr + IGC_EITR(vector);
+
+		if (q_vector->rx.ring && q_vector->tx.ring)
+			sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
+				q_vector->rx.ring->queue_index);
+		else if (q_vector->tx.ring)
+			sprintf(q_vector->name, "%s-tx-%u", netdev->name,
+				q_vector->tx.ring->queue_index);
+		else if (q_vector->rx.ring)
+			sprintf(q_vector->name, "%s-rx-%u", netdev->name,
+				q_vector->rx.ring->queue_index);
+		else
+			sprintf(q_vector->name, "%s-unused", netdev->name);
+
+		err = request_irq(adapter->msix_entries[vector].vector,
+				  igc_msix_ring, 0, q_vector->name,
+				  q_vector);
+		if (err)
+			goto err_free;
+	}
+
+	igc_configure_msix(adapter);
+	return 0;
+
+err_free:
+	/* free already assigned IRQs */
+	free_irq(adapter->msix_entries[free_vector++].vector, adapter);
+
+	vector--;
+	for (i = 0; i < vector; i++) {
+		free_irq(adapter->msix_entries[free_vector++].vector,
+			 adapter->q_vector[i]);
+	}
+err_out:
+	return err;
+}
+
+/**
+ * igc_reset_q_vector - Reset config for interrupt vector
+ * @adapter: board private structure to initialize
+ * @v_idx: Index of vector to be reset
+ *
+ * If NAPI is enabled it will delete any references to the
+ * NAPI struct. This is preparation for igc_free_q_vector.
+ */
+static void igc_reset_q_vector(struct igc_adapter *adapter, int v_idx)
+{
+	struct igc_q_vector *q_vector = adapter->q_vector[v_idx];
+
+	/* if we're coming from igc_set_interrupt_capability, the vectors are
+	 * not yet allocated
+	 */
+	if (!q_vector)
+		return;
+
+	if (q_vector->tx.ring)
+		adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL;
+
+	if (q_vector->rx.ring)
+		adapter->rx_ring[q_vector->rx.ring->queue_index] = NULL;
+
+	netif_napi_del(&q_vector->napi);
+}
+
+static void igc_reset_interrupt_capability(struct igc_adapter *adapter)
+{
+	int v_idx = adapter->num_q_vectors;
+
+	if (adapter->msix_entries) {
+		pci_disable_msix(adapter->pdev);
+		kfree(adapter->msix_entries);
+		adapter->msix_entries = NULL;
+	} else if (adapter->flags & IGC_FLAG_HAS_MSI) {
+		pci_disable_msi(adapter->pdev);
+	}
+
+	while (v_idx--)
+		igc_reset_q_vector(adapter, v_idx);
+}
+
+/**
+ * igc_clear_interrupt_scheme - reset the device to a state of no interrupts
+ * @adapter: Pointer to adapter structure
+ *
+ * This function resets the device so that it has 0 rx queues, tx queues, and
+ * MSI-X interrupts allocated.
+ */
+static void igc_clear_interrupt_scheme(struct igc_adapter *adapter)
+{
+	igc_free_q_vectors(adapter);
+	igc_reset_interrupt_capability(adapter);
+}
+
+/**
+ * igc_free_q_vectors - Free memory allocated for interrupt vectors
+ * @adapter: board private structure to initialize
+ *
+ * This function frees the memory allocated to the q_vectors.  In addition if
+ * NAPI is enabled it will delete any references to the NAPI struct prior
+ * to freeing the q_vector.
+ */
+static void igc_free_q_vectors(struct igc_adapter *adapter)
+{
+	int v_idx = adapter->num_q_vectors;
+
+	adapter->num_tx_queues = 0;
+	adapter->num_rx_queues = 0;
+	adapter->num_q_vectors = 0;
+
+	while (v_idx--) {
+		igc_reset_q_vector(adapter, v_idx);
+		igc_free_q_vector(adapter, v_idx);
+	}
+}
+
+/**
+ * igc_free_q_vector - Free memory allocated for specific interrupt vector
+ * @adapter: board private structure to initialize
+ * @v_idx: Index of vector to be freed
+ *
+ * This function frees the memory allocated to the q_vector.
+ */
+static void igc_free_q_vector(struct igc_adapter *adapter, int v_idx)
+{
+	struct igc_q_vector *q_vector = adapter->q_vector[v_idx];
+
+	adapter->q_vector[v_idx] = NULL;
+
+	/* igc_get_stats64() might access the rings on this vector,
+	 * we must wait a grace period before freeing it.
+	 */
+	if (q_vector)
+		kfree_rcu(q_vector, rcu);
+}
+
+/* Need to wait a few seconds after link up to get diagnostic information from
+ * the phy
+ */
+static void igc_update_phy_info(struct timer_list *t)
+{
+	struct igc_adapter *adapter = from_timer(adapter, t, phy_info_timer);
+
+	igc_get_phy_info(&adapter->hw);
+}
+
+/**
+ * igc_has_link - check shared code for link and determine up/down
+ * @adapter: pointer to driver private info
+ */
+static bool igc_has_link(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	bool link_active = false;
+
+	/* get_link_status is set on LSC (link status) interrupt or
+	 * rx sequence error interrupt.  get_link_status will stay
+	 * false until the igc_check_for_link establishes link
+	 * for copper adapters ONLY
+	 */
+	switch (hw->phy.media_type) {
+	case igc_media_type_copper:
+		if (!hw->mac.get_link_status)
+			return true;
+		hw->mac.ops.check_for_link(hw);
+		link_active = !hw->mac.get_link_status;
+		break;
+	default:
+	case igc_media_type_unknown:
+		break;
+	}
+
+	if (hw->mac.type == igc_i225 &&
+	    hw->phy.id == I225_I_PHY_ID) {
+		if (!netif_carrier_ok(adapter->netdev)) {
+			adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
+		} else if (!(adapter->flags & IGC_FLAG_NEED_LINK_UPDATE)) {
+			adapter->flags |= IGC_FLAG_NEED_LINK_UPDATE;
+			adapter->link_check_timeout = jiffies;
+		}
+	}
+
+	return link_active;
+}
+
+/**
+ * igc_watchdog - Timer Call-back
+ * @data: pointer to adapter cast into an unsigned long
+ */
+static void igc_watchdog(struct timer_list *t)
+{
+	struct igc_adapter *adapter = from_timer(adapter, t, watchdog_timer);
+	/* Do the rest outside of interrupt context */
+	schedule_work(&adapter->watchdog_task);
+}
+
+static void igc_watchdog_task(struct work_struct *work)
+{
+	struct igc_adapter *adapter = container_of(work,
+						   struct igc_adapter,
+						   watchdog_task);
+	struct net_device *netdev = adapter->netdev;
+	struct igc_hw *hw = &adapter->hw;
+	struct igc_phy_info *phy = &hw->phy;
+	u16 phy_data, retry_count = 20;
+	u32 connsw;
+	u32 link;
+	int i;
+
+	link = igc_has_link(adapter);
+
+	if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE) {
+		if (time_after(jiffies, (adapter->link_check_timeout + HZ)))
+			adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
+		else
+			link = false;
+	}
+
+	/* Force link down if we have fiber to swap to */
+	if (adapter->flags & IGC_FLAG_MAS_ENABLE) {
+		if (hw->phy.media_type == igc_media_type_copper) {
+			connsw = rd32(IGC_CONNSW);
+			if (!(connsw & IGC_CONNSW_AUTOSENSE_EN))
+				link = 0;
+		}
+	}
+	if (link) {
+		if (!netif_carrier_ok(netdev)) {
+			u32 ctrl;
+
+			hw->mac.ops.get_speed_and_duplex(hw,
+							 &adapter->link_speed,
+							 &adapter->link_duplex);
+
+			ctrl = rd32(IGC_CTRL);
+			/* Link status message must follow this format */
+			netdev_info(netdev,
+				    "igc: %s NIC Link is Up %d Mbps %s Duplex, Flow Control: %s\n",
+				    netdev->name,
+				    adapter->link_speed,
+				    adapter->link_duplex == FULL_DUPLEX ?
+				    "Full" : "Half",
+				    (ctrl & IGC_CTRL_TFCE) &&
+				    (ctrl & IGC_CTRL_RFCE) ? "RX/TX" :
+				    (ctrl & IGC_CTRL_RFCE) ?  "RX" :
+				    (ctrl & IGC_CTRL_TFCE) ?  "TX" : "None");
+
+			/* check if SmartSpeed worked */
+			igc_check_downshift(hw);
+			if (phy->speed_downgraded)
+				netdev_warn(netdev, "Link Speed was downgraded by SmartSpeed\n");
+
+			/* adjust timeout factor according to speed/duplex */
+			adapter->tx_timeout_factor = 1;
+			switch (adapter->link_speed) {
+			case SPEED_10:
+				adapter->tx_timeout_factor = 14;
+				break;
+			case SPEED_100:
+				/* maybe add some timeout factor ? */
+				break;
+			}
+
+			if (adapter->link_speed != SPEED_1000)
+				goto no_wait;
+
+			/* wait for Remote receiver status OK */
+retry_read_status:
+			if (!igc_read_phy_reg(hw, PHY_1000T_STATUS,
+					      &phy_data)) {
+				if (!(phy_data & SR_1000T_REMOTE_RX_STATUS) &&
+				    retry_count) {
+					msleep(100);
+					retry_count--;
+					goto retry_read_status;
+				} else if (!retry_count) {
+					dev_err(&adapter->pdev->dev, "exceed max 2 second\n");
+				}
+			} else {
+				dev_err(&adapter->pdev->dev, "read 1000Base-T Status Reg\n");
+			}
+no_wait:
+			netif_carrier_on(netdev);
+
+			/* link state has changed, schedule phy info update */
+			if (!test_bit(__IGC_DOWN, &adapter->state))
+				mod_timer(&adapter->phy_info_timer,
+					  round_jiffies(jiffies + 2 * HZ));
+		}
+	} else {
+		if (netif_carrier_ok(netdev)) {
+			adapter->link_speed = 0;
+			adapter->link_duplex = 0;
+
+			/* Links status message must follow this format */
+			netdev_info(netdev, "igc: %s NIC Link is Down\n",
+				    netdev->name);
+			netif_carrier_off(netdev);
+
+			/* link state has changed, schedule phy info update */
+			if (!test_bit(__IGC_DOWN, &adapter->state))
+				mod_timer(&adapter->phy_info_timer,
+					  round_jiffies(jiffies + 2 * HZ));
+
+			/* link is down, time to check for alternate media */
+			if (adapter->flags & IGC_FLAG_MAS_ENABLE) {
+				if (adapter->flags & IGC_FLAG_MEDIA_RESET) {
+					schedule_work(&adapter->reset_task);
+					/* return immediately */
+					return;
+				}
+			}
+
+		/* also check for alternate media here */
+		} else if (!netif_carrier_ok(netdev) &&
+			   (adapter->flags & IGC_FLAG_MAS_ENABLE)) {
+			if (adapter->flags & IGC_FLAG_MEDIA_RESET) {
+				schedule_work(&adapter->reset_task);
+				/* return immediately */
+				return;
+			}
+		}
+	}
+
+	spin_lock(&adapter->stats64_lock);
+	igc_update_stats(adapter);
+	spin_unlock(&adapter->stats64_lock);
+
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		struct igc_ring *tx_ring = adapter->tx_ring[i];
+
+		if (!netif_carrier_ok(netdev)) {
+			/* We've lost link, so the controller stops DMA,
+			 * but we've got queued Tx work that's never going
+			 * to get done, so reset controller to flush Tx.
+			 * (Do the reset outside of interrupt context).
+			 */
+			if (igc_desc_unused(tx_ring) + 1 < tx_ring->count) {
+				adapter->tx_timeout_count++;
+				schedule_work(&adapter->reset_task);
+				/* return immediately since reset is imminent */
+				return;
+			}
+		}
+
+		/* Force detection of hung controller every watchdog period */
+		set_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
+	}
+
+	/* Cause software interrupt to ensure Rx ring is cleaned */
+	if (adapter->flags & IGC_FLAG_HAS_MSIX) {
+		u32 eics = 0;
+
+		for (i = 0; i < adapter->num_q_vectors; i++)
+			eics |= adapter->q_vector[i]->eims_value;
+		wr32(IGC_EICS, eics);
+	} else {
+		wr32(IGC_ICS, IGC_ICS_RXDMT0);
+	}
+
+	/* Reset the timer */
+	if (!test_bit(__IGC_DOWN, &adapter->state)) {
+		if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE)
+			mod_timer(&adapter->watchdog_timer,
+				  round_jiffies(jiffies +  HZ));
+		else
+			mod_timer(&adapter->watchdog_timer,
+				  round_jiffies(jiffies + 2 * HZ));
+	}
+}
+
+/**
+ * igc_update_ring_itr - update the dynamic ITR value based on packet size
+ * @q_vector: pointer to q_vector
+ *
+ * Stores a new ITR value based on strictly on packet size.  This
+ * algorithm is less sophisticated than that used in igc_update_itr,
+ * due to the difficulty of synchronizing statistics across multiple
+ * receive rings.  The divisors and thresholds used by this function
+ * were determined based on theoretical maximum wire speed and testing
+ * data, in order to minimize response time while increasing bulk
+ * throughput.
+ * NOTE: This function is called only when operating in a multiqueue
+ * receive environment.
+ */
+static void igc_update_ring_itr(struct igc_q_vector *q_vector)
+{
+	struct igc_adapter *adapter = q_vector->adapter;
+	int new_val = q_vector->itr_val;
+	int avg_wire_size = 0;
+	unsigned int packets;
+
+	/* For non-gigabit speeds, just fix the interrupt rate at 4000
+	 * ints/sec - ITR timer value of 120 ticks.
+	 */
+	switch (adapter->link_speed) {
+	case SPEED_10:
+	case SPEED_100:
+		new_val = IGC_4K_ITR;
+		goto set_itr_val;
+	default:
+		break;
+	}
+
+	packets = q_vector->rx.total_packets;
+	if (packets)
+		avg_wire_size = q_vector->rx.total_bytes / packets;
+
+	packets = q_vector->tx.total_packets;
+	if (packets)
+		avg_wire_size = max_t(u32, avg_wire_size,
+				      q_vector->tx.total_bytes / packets);
+
+	/* if avg_wire_size isn't set no work was done */
+	if (!avg_wire_size)
+		goto clear_counts;
+
+	/* Add 24 bytes to size to account for CRC, preamble, and gap */
+	avg_wire_size += 24;
+
+	/* Don't starve jumbo frames */
+	avg_wire_size = min(avg_wire_size, 3000);
+
+	/* Give a little boost to mid-size frames */
+	if (avg_wire_size > 300 && avg_wire_size < 1200)
+		new_val = avg_wire_size / 3;
+	else
+		new_val = avg_wire_size / 2;
+
+	/* conservative mode (itr 3) eliminates the lowest_latency setting */
+	if (new_val < IGC_20K_ITR &&
+	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
+	    (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
+		new_val = IGC_20K_ITR;
+
+set_itr_val:
+	if (new_val != q_vector->itr_val) {
+		q_vector->itr_val = new_val;
+		q_vector->set_itr = 1;
+	}
+clear_counts:
+	q_vector->rx.total_bytes = 0;
+	q_vector->rx.total_packets = 0;
+	q_vector->tx.total_bytes = 0;
+	q_vector->tx.total_packets = 0;
+}
+
+/**
+ * igc_update_itr - update the dynamic ITR value based on statistics
+ * @q_vector: pointer to q_vector
+ * @ring_container: ring info to update the itr for
+ *
+ * Stores a new ITR value based on packets and byte
+ * counts during the last interrupt.  The advantage of per interrupt
+ * computation is faster updates and more accurate ITR for the current
+ * traffic pattern.  Constants in this function were computed
+ * based on theoretical maximum wire speed and thresholds were set based
+ * on testing data as well as attempting to minimize response time
+ * while increasing bulk throughput.
+ * NOTE: These calculations are only valid when operating in a single-
+ * queue environment.
+ */
+static void igc_update_itr(struct igc_q_vector *q_vector,
+			   struct igc_ring_container *ring_container)
+{
+	unsigned int packets = ring_container->total_packets;
+	unsigned int bytes = ring_container->total_bytes;
+	u8 itrval = ring_container->itr;
+
+	/* no packets, exit with status unchanged */
+	if (packets == 0)
+		return;
+
+	switch (itrval) {
+	case lowest_latency:
+		/* handle TSO and jumbo frames */
+		if (bytes / packets > 8000)
+			itrval = bulk_latency;
+		else if ((packets < 5) && (bytes > 512))
+			itrval = low_latency;
+		break;
+	case low_latency:  /* 50 usec aka 20000 ints/s */
+		if (bytes > 10000) {
+			/* this if handles the TSO accounting */
+			if (bytes / packets > 8000)
+				itrval = bulk_latency;
+			else if ((packets < 10) || ((bytes / packets) > 1200))
+				itrval = bulk_latency;
+			else if ((packets > 35))
+				itrval = lowest_latency;
+		} else if (bytes / packets > 2000) {
+			itrval = bulk_latency;
+		} else if (packets <= 2 && bytes < 512) {
+			itrval = lowest_latency;
+		}
+		break;
+	case bulk_latency: /* 250 usec aka 4000 ints/s */
+		if (bytes > 25000) {
+			if (packets > 35)
+				itrval = low_latency;
+		} else if (bytes < 1500) {
+			itrval = low_latency;
+		}
+		break;
+	}
+
+	/* clear work counters since we have the values we need */
+	ring_container->total_bytes = 0;
+	ring_container->total_packets = 0;
+
+	/* write updated itr to ring container */
+	ring_container->itr = itrval;
+}
+
+/**
+ * igc_intr_msi - Interrupt Handler
+ * @irq: interrupt number
+ * @data: pointer to a network interface device structure
+ */
+static irqreturn_t igc_intr_msi(int irq, void *data)
+{
+	struct igc_adapter *adapter = data;
+	struct igc_q_vector *q_vector = adapter->q_vector[0];
+	struct igc_hw *hw = &adapter->hw;
+	/* read ICR disables interrupts using IAM */
+	u32 icr = rd32(IGC_ICR);
+
+	igc_write_itr(q_vector);
+
+	if (icr & IGC_ICR_DRSTA)
+		schedule_work(&adapter->reset_task);
+
+	if (icr & IGC_ICR_DOUTSYNC) {
+		/* HW is reporting DMA is out of sync */
+		adapter->stats.doosync++;
+	}
+
+	if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) {
+		hw->mac.get_link_status = 1;
+		if (!test_bit(__IGC_DOWN, &adapter->state))
+			mod_timer(&adapter->watchdog_timer, jiffies + 1);
+	}
+
+	napi_schedule(&q_vector->napi);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * igc_intr - Legacy Interrupt Handler
+ * @irq: interrupt number
+ * @data: pointer to a network interface device structure
+ */
+static irqreturn_t igc_intr(int irq, void *data)
+{
+	struct igc_adapter *adapter = data;
+	struct igc_q_vector *q_vector = adapter->q_vector[0];
+	struct igc_hw *hw = &adapter->hw;
+	/* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
+	 * need for the IMC write
+	 */
+	u32 icr = rd32(IGC_ICR);
+
+	/* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
+	 * not set, then the adapter didn't send an interrupt
+	 */
+	if (!(icr & IGC_ICR_INT_ASSERTED))
+		return IRQ_NONE;
+
+	igc_write_itr(q_vector);
+
+	if (icr & IGC_ICR_DRSTA)
+		schedule_work(&adapter->reset_task);
+
+	if (icr & IGC_ICR_DOUTSYNC) {
+		/* HW is reporting DMA is out of sync */
+		adapter->stats.doosync++;
+	}
+
+	if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) {
+		hw->mac.get_link_status = 1;
+		/* guard against interrupt when we're going down */
+		if (!test_bit(__IGC_DOWN, &adapter->state))
+			mod_timer(&adapter->watchdog_timer, jiffies + 1);
+	}
+
+	napi_schedule(&q_vector->napi);
+
+	return IRQ_HANDLED;
+}
+
+static void igc_set_itr(struct igc_q_vector *q_vector)
+{
+	struct igc_adapter *adapter = q_vector->adapter;
+	u32 new_itr = q_vector->itr_val;
+	u8 current_itr = 0;
+
+	/* for non-gigabit speeds, just fix the interrupt rate at 4000 */
+	switch (adapter->link_speed) {
+	case SPEED_10:
+	case SPEED_100:
+		current_itr = 0;
+		new_itr = IGC_4K_ITR;
+		goto set_itr_now;
+	default:
+		break;
+	}
+
+	igc_update_itr(q_vector, &q_vector->tx);
+	igc_update_itr(q_vector, &q_vector->rx);
+
+	current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
+
+	/* conservative mode (itr 3) eliminates the lowest_latency setting */
+	if (current_itr == lowest_latency &&
+	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
+	    (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
+		current_itr = low_latency;
+
+	switch (current_itr) {
+	/* counts and packets in update_itr are dependent on these numbers */
+	case lowest_latency:
+		new_itr = IGC_70K_ITR; /* 70,000 ints/sec */
+		break;
+	case low_latency:
+		new_itr = IGC_20K_ITR; /* 20,000 ints/sec */
+		break;
+	case bulk_latency:
+		new_itr = IGC_4K_ITR;  /* 4,000 ints/sec */
+		break;
+	default:
+		break;
+	}
+
+set_itr_now:
+	if (new_itr != q_vector->itr_val) {
+		/* this attempts to bias the interrupt rate towards Bulk
+		 * by adding intermediate steps when interrupt rate is
+		 * increasing
+		 */
+		new_itr = new_itr > q_vector->itr_val ?
+			  max((new_itr * q_vector->itr_val) /
+			  (new_itr + (q_vector->itr_val >> 2)),
+			  new_itr) : new_itr;
+		/* Don't write the value here; it resets the adapter's
+		 * internal timer, and causes us to delay far longer than
+		 * we should between interrupts.  Instead, we write the ITR
+		 * value at the beginning of the next interrupt so the timing
+		 * ends up being correct.
+		 */
+		q_vector->itr_val = new_itr;
+		q_vector->set_itr = 1;
+	}
+}
+
+static void igc_ring_irq_enable(struct igc_q_vector *q_vector)
+{
+	struct igc_adapter *adapter = q_vector->adapter;
+	struct igc_hw *hw = &adapter->hw;
+
+	if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
+	    (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
+		if (adapter->num_q_vectors == 1)
+			igc_set_itr(q_vector);
+		else
+			igc_update_ring_itr(q_vector);
+	}
+
+	if (!test_bit(__IGC_DOWN, &adapter->state)) {
+		if (adapter->msix_entries)
+			wr32(IGC_EIMS, q_vector->eims_value);
+		else
+			igc_irq_enable(adapter);
+	}
+}
+
+/**
+ * igc_poll - NAPI Rx polling callback
+ * @napi: napi polling structure
+ * @budget: count of how many packets we should handle
+ */
+static int igc_poll(struct napi_struct *napi, int budget)
+{
+	struct igc_q_vector *q_vector = container_of(napi,
+						     struct igc_q_vector,
+						     napi);
+	bool clean_complete = true;
+	int work_done = 0;
+
+	if (q_vector->tx.ring)
+		clean_complete = igc_clean_tx_irq(q_vector, budget);
+
+	if (q_vector->rx.ring) {
+		int cleaned = igc_clean_rx_irq(q_vector, budget);
+
+		work_done += cleaned;
+		if (cleaned >= budget)
+			clean_complete = false;
+	}
+
+	/* If all work not completed, return budget and keep polling */
+	if (!clean_complete)
+		return budget;
+
+	/* If not enough Rx work done, exit the polling mode */
+	napi_complete_done(napi, work_done);
+	igc_ring_irq_enable(q_vector);
+
+	return 0;
+}
+
+/**
+ * igc_set_interrupt_capability - set MSI or MSI-X if supported
+ * @adapter: Pointer to adapter structure
+ *
+ * Attempt to configure interrupts using the best available
+ * capabilities of the hardware and kernel.
+ */
+static void igc_set_interrupt_capability(struct igc_adapter *adapter,
+					 bool msix)
+{
+	int numvecs, i;
+	int err;
+
+	if (!msix)
+		goto msi_only;
+	adapter->flags |= IGC_FLAG_HAS_MSIX;
+
+	/* Number of supported queues. */
+	adapter->num_rx_queues = adapter->rss_queues;
+
+	adapter->num_tx_queues = adapter->rss_queues;
+
+	/* start with one vector for every Rx queue */
+	numvecs = adapter->num_rx_queues;
+
+	/* if Tx handler is separate add 1 for every Tx queue */
+	if (!(adapter->flags & IGC_FLAG_QUEUE_PAIRS))
+		numvecs += adapter->num_tx_queues;
+
+	/* store the number of vectors reserved for queues */
+	adapter->num_q_vectors = numvecs;
+
+	/* add 1 vector for link status interrupts */
+	numvecs++;
+
+	adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
+					GFP_KERNEL);
+
+	if (!adapter->msix_entries)
+		return;
+
+	/* populate entry values */
+	for (i = 0; i < numvecs; i++)
+		adapter->msix_entries[i].entry = i;
+
+	err = pci_enable_msix_range(adapter->pdev,
+				    adapter->msix_entries,
+				    numvecs,
+				    numvecs);
+	if (err > 0)
+		return;
+
+	kfree(adapter->msix_entries);
+	adapter->msix_entries = NULL;
+
+	igc_reset_interrupt_capability(adapter);
+
+msi_only:
+	adapter->flags &= ~IGC_FLAG_HAS_MSIX;
+
+	adapter->rss_queues = 1;
+	adapter->flags |= IGC_FLAG_QUEUE_PAIRS;
+	adapter->num_rx_queues = 1;
+	adapter->num_tx_queues = 1;
+	adapter->num_q_vectors = 1;
+	if (!pci_enable_msi(adapter->pdev))
+		adapter->flags |= IGC_FLAG_HAS_MSI;
+}
+
+static void igc_add_ring(struct igc_ring *ring,
+			 struct igc_ring_container *head)
+{
+	head->ring = ring;
+	head->count++;
+}
+
+/**
+ * igc_alloc_q_vector - Allocate memory for a single interrupt vector
+ * @adapter: board private structure to initialize
+ * @v_count: q_vectors allocated on adapter, used for ring interleaving
+ * @v_idx: index of vector in adapter struct
+ * @txr_count: total number of Tx rings to allocate
+ * @txr_idx: index of first Tx ring to allocate
+ * @rxr_count: total number of Rx rings to allocate
+ * @rxr_idx: index of first Rx ring to allocate
+ *
+ * We allocate one q_vector.  If allocation fails we return -ENOMEM.
+ */
+static int igc_alloc_q_vector(struct igc_adapter *adapter,
+			      unsigned int v_count, unsigned int v_idx,
+			      unsigned int txr_count, unsigned int txr_idx,
+			      unsigned int rxr_count, unsigned int rxr_idx)
+{
+	struct igc_q_vector *q_vector;
+	struct igc_ring *ring;
+	int ring_count, size;
+
+	/* igc only supports 1 Tx and/or 1 Rx queue per vector */
+	if (txr_count > 1 || rxr_count > 1)
+		return -ENOMEM;
+
+	ring_count = txr_count + rxr_count;
+	size = sizeof(struct igc_q_vector) +
+		(sizeof(struct igc_ring) * ring_count);
+
+	/* allocate q_vector and rings */
+	q_vector = adapter->q_vector[v_idx];
+	if (!q_vector)
+		q_vector = kzalloc(size, GFP_KERNEL);
+	else
+		memset(q_vector, 0, size);
+	if (!q_vector)
+		return -ENOMEM;
+
+	/* initialize NAPI */
+	netif_napi_add(adapter->netdev, &q_vector->napi,
+		       igc_poll, 64);
+
+	/* tie q_vector and adapter together */
+	adapter->q_vector[v_idx] = q_vector;
+	q_vector->adapter = adapter;
+
+	/* initialize work limits */
+	q_vector->tx.work_limit = adapter->tx_work_limit;
+
+	/* initialize ITR configuration */
+	q_vector->itr_register = adapter->io_addr + IGC_EITR(0);
+	q_vector->itr_val = IGC_START_ITR;
+
+	/* initialize pointer to rings */
+	ring = q_vector->ring;
+
+	/* initialize ITR */
+	if (rxr_count) {
+		/* rx or rx/tx vector */
+		if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3)
+			q_vector->itr_val = adapter->rx_itr_setting;
+	} else {
+		/* tx only vector */
+		if (!adapter->tx_itr_setting || adapter->tx_itr_setting > 3)
+			q_vector->itr_val = adapter->tx_itr_setting;
+	}
+
+	if (txr_count) {
+		/* assign generic ring traits */
+		ring->dev = &adapter->pdev->dev;
+		ring->netdev = adapter->netdev;
+
+		/* configure backlink on ring */
+		ring->q_vector = q_vector;
+
+		/* update q_vector Tx values */
+		igc_add_ring(ring, &q_vector->tx);
+
+		/* apply Tx specific ring traits */
+		ring->count = adapter->tx_ring_count;
+		ring->queue_index = txr_idx;
+
+		/* assign ring to adapter */
+		adapter->tx_ring[txr_idx] = ring;
+
+		/* push pointer to next ring */
+		ring++;
+	}
+
+	if (rxr_count) {
+		/* assign generic ring traits */
+		ring->dev = &adapter->pdev->dev;
+		ring->netdev = adapter->netdev;
+
+		/* configure backlink on ring */
+		ring->q_vector = q_vector;
+
+		/* update q_vector Rx values */
+		igc_add_ring(ring, &q_vector->rx);
+
+		/* apply Rx specific ring traits */
+		ring->count = adapter->rx_ring_count;
+		ring->queue_index = rxr_idx;
+
+		/* assign ring to adapter */
+		adapter->rx_ring[rxr_idx] = ring;
+	}
+
+	return 0;
+}
+
+/**
+ * igc_alloc_q_vectors - Allocate memory for interrupt vectors
+ * @adapter: board private structure to initialize
+ *
+ * We allocate one q_vector per queue interrupt.  If allocation fails we
+ * return -ENOMEM.
+ */
+static int igc_alloc_q_vectors(struct igc_adapter *adapter)
+{
+	int rxr_remaining = adapter->num_rx_queues;
+	int txr_remaining = adapter->num_tx_queues;
+	int rxr_idx = 0, txr_idx = 0, v_idx = 0;
+	int q_vectors = adapter->num_q_vectors;
+	int err;
+
+	if (q_vectors >= (rxr_remaining + txr_remaining)) {
+		for (; rxr_remaining; v_idx++) {
+			err = igc_alloc_q_vector(adapter, q_vectors, v_idx,
+						 0, 0, 1, rxr_idx);
+
+			if (err)
+				goto err_out;
+
+			/* update counts and index */
+			rxr_remaining--;
+			rxr_idx++;
+		}
+	}
+
+	for (; v_idx < q_vectors; v_idx++) {
+		int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx);
+		int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx);
+
+		err = igc_alloc_q_vector(adapter, q_vectors, v_idx,
+					 tqpv, txr_idx, rqpv, rxr_idx);
+
+		if (err)
+			goto err_out;
+
+		/* update counts and index */
+		rxr_remaining -= rqpv;
+		txr_remaining -= tqpv;
+		rxr_idx++;
+		txr_idx++;
+	}
+
+	return 0;
+
+err_out:
+	adapter->num_tx_queues = 0;
+	adapter->num_rx_queues = 0;
+	adapter->num_q_vectors = 0;
+
+	while (v_idx--)
+		igc_free_q_vector(adapter, v_idx);
+
+	return -ENOMEM;
+}
+
+/**
+ * igc_cache_ring_register - Descriptor ring to register mapping
+ * @adapter: board private structure to initialize
+ *
+ * Once we know the feature-set enabled for the device, we'll cache
+ * the register offset the descriptor ring is assigned to.
+ */
+static void igc_cache_ring_register(struct igc_adapter *adapter)
+{
+	int i = 0, j = 0;
+
+	switch (adapter->hw.mac.type) {
+	case igc_i225:
+	/* Fall through */
+	default:
+		for (; i < adapter->num_rx_queues; i++)
+			adapter->rx_ring[i]->reg_idx = i;
+		for (; j < adapter->num_tx_queues; j++)
+			adapter->tx_ring[j]->reg_idx = j;
+		break;
+	}
+}
+
+/**
+ * igc_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
+ * @adapter: Pointer to adapter structure
+ *
+ * This function initializes the interrupts and allocates all of the queues.
+ */
+static int igc_init_interrupt_scheme(struct igc_adapter *adapter, bool msix)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	int err = 0;
+
+	igc_set_interrupt_capability(adapter, msix);
+
+	err = igc_alloc_q_vectors(adapter);
+	if (err) {
+		dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
+		goto err_alloc_q_vectors;
+	}
+
+	igc_cache_ring_register(adapter);
+
+	return 0;
+
+err_alloc_q_vectors:
+	igc_reset_interrupt_capability(adapter);
+	return err;
+}
+
+static void igc_free_irq(struct igc_adapter *adapter)
+{
+	if (adapter->msix_entries) {
+		int vector = 0, i;
+
+		free_irq(adapter->msix_entries[vector++].vector, adapter);
+
+		for (i = 0; i < adapter->num_q_vectors; i++)
+			free_irq(adapter->msix_entries[vector++].vector,
+				 adapter->q_vector[i]);
+	} else {
+		free_irq(adapter->pdev->irq, adapter);
+	}
+}
+
+/**
+ * igc_irq_disable - Mask off interrupt generation on the NIC
+ * @adapter: board private structure
+ */
+static void igc_irq_disable(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+
+	if (adapter->msix_entries) {
+		u32 regval = rd32(IGC_EIAM);
+
+		wr32(IGC_EIAM, regval & ~adapter->eims_enable_mask);
+		wr32(IGC_EIMC, adapter->eims_enable_mask);
+		regval = rd32(IGC_EIAC);
+		wr32(IGC_EIAC, regval & ~adapter->eims_enable_mask);
+	}
+
+	wr32(IGC_IAM, 0);
+	wr32(IGC_IMC, ~0);
+	wrfl();
+
+	if (adapter->msix_entries) {
+		int vector = 0, i;
+
+		synchronize_irq(adapter->msix_entries[vector++].vector);
+
+		for (i = 0; i < adapter->num_q_vectors; i++)
+			synchronize_irq(adapter->msix_entries[vector++].vector);
+	} else {
+		synchronize_irq(adapter->pdev->irq);
+	}
+}
+
+/**
+ * igc_irq_enable - Enable default interrupt generation settings
+ * @adapter: board private structure
+ */
+static void igc_irq_enable(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+
+	if (adapter->msix_entries) {
+		u32 ims = IGC_IMS_LSC | IGC_IMS_DOUTSYNC | IGC_IMS_DRSTA;
+		u32 regval = rd32(IGC_EIAC);
+
+		wr32(IGC_EIAC, regval | adapter->eims_enable_mask);
+		regval = rd32(IGC_EIAM);
+		wr32(IGC_EIAM, regval | adapter->eims_enable_mask);
+		wr32(IGC_EIMS, adapter->eims_enable_mask);
+		wr32(IGC_IMS, ims);
+	} else {
+		wr32(IGC_IMS, IMS_ENABLE_MASK | IGC_IMS_DRSTA);
+		wr32(IGC_IAM, IMS_ENABLE_MASK | IGC_IMS_DRSTA);
+	}
+}
+
+/**
+ * igc_request_irq - initialize interrupts
+ * @adapter: Pointer to adapter structure
+ *
+ * Attempts to configure interrupts using the best available
+ * capabilities of the hardware and kernel.
+ */
+static int igc_request_irq(struct igc_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct pci_dev *pdev = adapter->pdev;
+	int err = 0;
+
+	if (adapter->flags & IGC_FLAG_HAS_MSIX) {
+		err = igc_request_msix(adapter);
+		if (!err)
+			goto request_done;
+		/* fall back to MSI */
+		igc_free_all_tx_resources(adapter);
+		igc_free_all_rx_resources(adapter);
+
+		igc_clear_interrupt_scheme(adapter);
+		err = igc_init_interrupt_scheme(adapter, false);
+		if (err)
+			goto request_done;
+		igc_setup_all_tx_resources(adapter);
+		igc_setup_all_rx_resources(adapter);
+		igc_configure(adapter);
+	}
+
+	igc_assign_vector(adapter->q_vector[0], 0);
+
+	if (adapter->flags & IGC_FLAG_HAS_MSI) {
+		err = request_irq(pdev->irq, &igc_intr_msi, 0,
+				  netdev->name, adapter);
+		if (!err)
+			goto request_done;
+
+		/* fall back to legacy interrupts */
+		igc_reset_interrupt_capability(adapter);
+		adapter->flags &= ~IGC_FLAG_HAS_MSI;
+	}
+
+	err = request_irq(pdev->irq, &igc_intr, IRQF_SHARED,
+			  netdev->name, adapter);
+
+	if (err)
+		dev_err(&pdev->dev, "Error %d getting interrupt\n",
+			err);
+
+request_done:
+	return err;
+}
+
+static void igc_write_itr(struct igc_q_vector *q_vector)
+{
+	u32 itr_val = q_vector->itr_val & IGC_QVECTOR_MASK;
+
+	if (!q_vector->set_itr)
+		return;
+
+	if (!itr_val)
+		itr_val = IGC_ITR_VAL_MASK;
+
+	itr_val |= IGC_EITR_CNT_IGNR;
+
+	writel(itr_val, q_vector->itr_register);
+	q_vector->set_itr = 0;
+}
+
+/**
+ * igc_open - Called when a network interface is made active
+ * @netdev: network interface device structure
+ *
+ * Returns 0 on success, negative value on failure
+ *
+ * The open entry point is called when a network interface is made
+ * active by the system (IFF_UP).  At this point all resources needed
+ * for transmit and receive operations are allocated, the interrupt
+ * handler is registered with the OS, the watchdog timer is started,
+ * and the stack is notified that the interface is ready.
+ */
+static int __igc_open(struct net_device *netdev, bool resuming)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_hw *hw = &adapter->hw;
+	int err = 0;
+	int i = 0;
+
+	/* disallow open during test */
+
+	if (test_bit(__IGC_TESTING, &adapter->state)) {
+		WARN_ON(resuming);
+		return -EBUSY;
+	}
+
+	netif_carrier_off(netdev);
+
+	/* allocate transmit descriptors */
+	err = igc_setup_all_tx_resources(adapter);
+	if (err)
+		goto err_setup_tx;
+
+	/* allocate receive descriptors */
+	err = igc_setup_all_rx_resources(adapter);
+	if (err)
+		goto err_setup_rx;
+
+	igc_power_up_link(adapter);
+
+	igc_configure(adapter);
+
+	err = igc_request_irq(adapter);
+	if (err)
+		goto err_req_irq;
+
+	/* Notify the stack of the actual queue counts. */
+	netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues);
+	if (err)
+		goto err_set_queues;
+
+	err = netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues);
+	if (err)
+		goto err_set_queues;
+
+	clear_bit(__IGC_DOWN, &adapter->state);
+
+	for (i = 0; i < adapter->num_q_vectors; i++)
+		napi_enable(&adapter->q_vector[i]->napi);
+
+	/* Clear any pending interrupts. */
+	rd32(IGC_ICR);
+	igc_irq_enable(adapter);
+
+	netif_tx_start_all_queues(netdev);
+
+	/* start the watchdog. */
+	hw->mac.get_link_status = 1;
+	schedule_work(&adapter->watchdog_task);
+
+	return IGC_SUCCESS;
+
+err_set_queues:
+	igc_free_irq(adapter);
+err_req_irq:
+	igc_release_hw_control(adapter);
+	igc_power_down_link(adapter);
+	igc_free_all_rx_resources(adapter);
+err_setup_rx:
+	igc_free_all_tx_resources(adapter);
+err_setup_tx:
+	igc_reset(adapter);
+
+	return err;
+}
+
+static int igc_open(struct net_device *netdev)
+{
+	return __igc_open(netdev, false);
+}
+
+/**
+ * igc_close - Disables a network interface
+ * @netdev: network interface device structure
+ *
+ * Returns 0, this is not allowed to fail
+ *
+ * The close entry point is called when an interface is de-activated
+ * by the OS.  The hardware is still under the driver's control, but
+ * needs to be disabled.  A global MAC reset is issued to stop the
+ * hardware, and all transmit and receive resources are freed.
+ */
+static int __igc_close(struct net_device *netdev, bool suspending)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	WARN_ON(test_bit(__IGC_RESETTING, &adapter->state));
+
+	igc_down(adapter);
+
+	igc_release_hw_control(adapter);
+
+	igc_free_irq(adapter);
+
+	igc_free_all_tx_resources(adapter);
+	igc_free_all_rx_resources(adapter);
+
+	return 0;
+}
+
+static int igc_close(struct net_device *netdev)
+{
+	if (netif_device_present(netdev) || netdev->dismantle)
+		return __igc_close(netdev, false);
+	return 0;
+}
+
+static const struct net_device_ops igc_netdev_ops = {
+	.ndo_open		= igc_open,
+	.ndo_stop		= igc_close,
+	.ndo_start_xmit		= igc_xmit_frame,
+	.ndo_set_mac_address	= igc_set_mac,
+	.ndo_change_mtu		= igc_change_mtu,
+	.ndo_get_stats		= igc_get_stats,
+	.ndo_do_ioctl		= igc_ioctl,
+};
+
+/* PCIe configuration access */
+void igc_read_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value)
+{
+	struct igc_adapter *adapter = hw->back;
+
+	pci_read_config_word(adapter->pdev, reg, value);
+}
+
+void igc_write_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value)
+{
+	struct igc_adapter *adapter = hw->back;
+
+	pci_write_config_word(adapter->pdev, reg, *value);
+}
+
+s32 igc_read_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value)
+{
+	struct igc_adapter *adapter = hw->back;
+	u16 cap_offset;
+
+	cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
+	if (!cap_offset)
+		return -IGC_ERR_CONFIG;
+
+	pci_read_config_word(adapter->pdev, cap_offset + reg, value);
+
+	return IGC_SUCCESS;
+}
+
+s32 igc_write_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value)
+{
+	struct igc_adapter *adapter = hw->back;
+	u16 cap_offset;
+
+	cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
+	if (!cap_offset)
+		return -IGC_ERR_CONFIG;
+
+	pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
+
+	return IGC_SUCCESS;
+}
+
+u32 igc_rd32(struct igc_hw *hw, u32 reg)
+{
+	struct igc_adapter *igc = container_of(hw, struct igc_adapter, hw);
+	u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr);
+	u32 value = 0;
+
+	if (IGC_REMOVED(hw_addr))
+		return ~value;
+
+	value = readl(&hw_addr[reg]);
+
+	/* reads should not return all F's */
+	if (!(~value) && (!reg || !(~readl(hw_addr)))) {
+		struct net_device *netdev = igc->netdev;
+
+		hw->hw_addr = NULL;
+		netif_device_detach(netdev);
+		netdev_err(netdev, "PCIe link lost, device now detached\n");
+	}
+
+	return value;
+}
+
+/**
+ * igc_probe - Device Initialization Routine
+ * @pdev: PCI device information struct
+ * @ent: entry in igc_pci_tbl
+ *
+ * Returns 0 on success, negative on failure
+ *
+ * igc_probe initializes an adapter identified by a pci_dev structure.
+ * The OS initialization, configuring the adapter private structure,
+ * and a hardware reset occur.
+ */
+static int igc_probe(struct pci_dev *pdev,
+		     const struct pci_device_id *ent)
+{
+	struct igc_adapter *adapter;
+	struct net_device *netdev;
+	struct igc_hw *hw;
+	const struct igc_info *ei = igc_info_tbl[ent->driver_data];
+	int err, pci_using_dac;
+
+	err = pci_enable_device_mem(pdev);
+	if (err)
+		return err;
+
+	pci_using_dac = 0;
+	err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
+	if (!err) {
+		err = dma_set_coherent_mask(&pdev->dev,
+					    DMA_BIT_MASK(64));
+		if (!err)
+			pci_using_dac = 1;
+	} else {
+		err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
+		if (err) {
+			err = dma_set_coherent_mask(&pdev->dev,
+						    DMA_BIT_MASK(32));
+			if (err) {
+				IGC_ERR("Wrong DMA configuration, aborting\n");
+				goto err_dma;
+			}
+		}
+	}
+
+	err = pci_request_selected_regions(pdev,
+					   pci_select_bars(pdev,
+							   IORESOURCE_MEM),
+					   igc_driver_name);
+	if (err)
+		goto err_pci_reg;
+
+	pci_enable_pcie_error_reporting(pdev);
+
+	pci_set_master(pdev);
+
+	err = -ENOMEM;
+	netdev = alloc_etherdev_mq(sizeof(struct igc_adapter),
+				   IGC_MAX_TX_QUEUES);
+
+	if (!netdev)
+		goto err_alloc_etherdev;
+
+	SET_NETDEV_DEV(netdev, &pdev->dev);
+
+	pci_set_drvdata(pdev, netdev);
+	adapter = netdev_priv(netdev);
+	adapter->netdev = netdev;
+	adapter->pdev = pdev;
+	hw = &adapter->hw;
+	hw->back = adapter;
+	adapter->port_num = hw->bus.func;
+	adapter->msg_enable = GENMASK(debug - 1, 0);
+
+	err = pci_save_state(pdev);
+	if (err)
+		goto err_ioremap;
+
+	err = -EIO;
+	adapter->io_addr = ioremap(pci_resource_start(pdev, 0),
+				   pci_resource_len(pdev, 0));
+	if (!adapter->io_addr)
+		goto err_ioremap;
+
+	/* hw->hw_addr can be zeroed, so use adapter->io_addr for unmap */
+	hw->hw_addr = adapter->io_addr;
+
+	netdev->netdev_ops = &igc_netdev_ops;
+
+	netdev->watchdog_timeo = 5 * HZ;
+
+	netdev->mem_start = pci_resource_start(pdev, 0);
+	netdev->mem_end = pci_resource_end(pdev, 0);
+
+	/* PCI config space info */
+	hw->vendor_id = pdev->vendor;
+	hw->device_id = pdev->device;
+	hw->revision_id = pdev->revision;
+	hw->subsystem_vendor_id = pdev->subsystem_vendor;
+	hw->subsystem_device_id = pdev->subsystem_device;
+
+	/* Copy the default MAC and PHY function pointers */
+	memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
+	memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
+
+	/* Initialize skew-specific constants */
+	err = ei->get_invariants(hw);
+	if (err)
+		goto err_sw_init;
+
+	/* setup the private structure */
+	err = igc_sw_init(adapter);
+	if (err)
+		goto err_sw_init;
+
+	/* MTU range: 68 - 9216 */
+	netdev->min_mtu = ETH_MIN_MTU;
+	netdev->max_mtu = MAX_STD_JUMBO_FRAME_SIZE;
+
+	/* before reading the NVM, reset the controller to put the device in a
+	 * known good starting state
+	 */
+	hw->mac.ops.reset_hw(hw);
+
+	if (eth_platform_get_mac_address(&pdev->dev, hw->mac.addr)) {
+		/* copy the MAC address out of the NVM */
+		if (hw->mac.ops.read_mac_addr(hw))
+			dev_err(&pdev->dev, "NVM Read Error\n");
+	}
+
+	memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
+
+	if (!is_valid_ether_addr(netdev->dev_addr)) {
+		dev_err(&pdev->dev, "Invalid MAC Address\n");
+		err = -EIO;
+		goto err_eeprom;
+	}
+
+	/* configure RXPBSIZE and TXPBSIZE */
+	wr32(IGC_RXPBS, I225_RXPBSIZE_DEFAULT);
+	wr32(IGC_TXPBS, I225_TXPBSIZE_DEFAULT);
+
+	timer_setup(&adapter->watchdog_timer, igc_watchdog, 0);
+	timer_setup(&adapter->phy_info_timer, igc_update_phy_info, 0);
+
+	INIT_WORK(&adapter->reset_task, igc_reset_task);
+	INIT_WORK(&adapter->watchdog_task, igc_watchdog_task);
+
+	/* Initialize link properties that are user-changeable */
+	adapter->fc_autoneg = true;
+	hw->mac.autoneg = true;
+	hw->phy.autoneg_advertised = 0xaf;
+
+	hw->fc.requested_mode = igc_fc_default;
+	hw->fc.current_mode = igc_fc_default;
+
+	/* reset the hardware with the new settings */
+	igc_reset(adapter);
+
+	/* let the f/w know that the h/w is now under the control of the
+	 * driver.
+	 */
+	igc_get_hw_control(adapter);
+
+	strncpy(netdev->name, "eth%d", IFNAMSIZ);
+	err = register_netdev(netdev);
+	if (err)
+		goto err_register;
+
+	 /* carrier off reporting is important to ethtool even BEFORE open */
+	netif_carrier_off(netdev);
+
+	/* Check if Media Autosense is enabled */
+	adapter->ei = *ei;
+
+	/* print pcie link status and MAC address */
+	pcie_print_link_status(pdev);
+	netdev_info(netdev, "MAC: %pM\n", netdev->dev_addr);
+
+	return 0;
+
+err_register:
+	igc_release_hw_control(adapter);
+err_eeprom:
+	if (!igc_check_reset_block(hw))
+		igc_reset_phy(hw);
+err_sw_init:
+	igc_clear_interrupt_scheme(adapter);
+	iounmap(adapter->io_addr);
+err_ioremap:
+	free_netdev(netdev);
+err_alloc_etherdev:
+	pci_release_selected_regions(pdev,
+				     pci_select_bars(pdev, IORESOURCE_MEM));
+err_pci_reg:
+err_dma:
+	pci_disable_device(pdev);
+	return err;
+}
+
+/**
+ * igc_remove - Device Removal Routine
+ * @pdev: PCI device information struct
+ *
+ * igc_remove is called by the PCI subsystem to alert the driver
+ * that it should release a PCI device.  This could be caused by a
+ * Hot-Plug event, or because the driver is going to be removed from
+ * memory.
+ */
+static void igc_remove(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	set_bit(__IGC_DOWN, &adapter->state);
+
+	del_timer_sync(&adapter->watchdog_timer);
+	del_timer_sync(&adapter->phy_info_timer);
+
+	cancel_work_sync(&adapter->reset_task);
+	cancel_work_sync(&adapter->watchdog_task);
+
+	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
+	 * would have already happened in close and is redundant.
+	 */
+	igc_release_hw_control(adapter);
+	unregister_netdev(netdev);
+
+	igc_clear_interrupt_scheme(adapter);
+	pci_iounmap(pdev, adapter->io_addr);
+	pci_release_mem_regions(pdev);
+
+	kfree(adapter->mac_table);
+	kfree(adapter->shadow_vfta);
+	free_netdev(netdev);
+
+	pci_disable_pcie_error_reporting(pdev);
+
+	pci_disable_device(pdev);
+}
+
+static struct pci_driver igc_driver = {
+	.name     = igc_driver_name,
+	.id_table = igc_pci_tbl,
+	.probe    = igc_probe,
+	.remove   = igc_remove,
+};
+
+static void igc_set_flag_queue_pairs(struct igc_adapter *adapter,
+				     const u32 max_rss_queues)
+{
+	/* Determine if we need to pair queues. */
+	/* If rss_queues > half of max_rss_queues, pair the queues in
+	 * order to conserve interrupts due to limited supply.
+	 */
+	if (adapter->rss_queues > (max_rss_queues / 2))
+		adapter->flags |= IGC_FLAG_QUEUE_PAIRS;
+	else
+		adapter->flags &= ~IGC_FLAG_QUEUE_PAIRS;
+}
+
+static unsigned int igc_get_max_rss_queues(struct igc_adapter *adapter)
+{
+	unsigned int max_rss_queues;
+
+	/* Determine the maximum number of RSS queues supported. */
+	max_rss_queues = IGC_MAX_RX_QUEUES;
+
+	return max_rss_queues;
+}
+
+static void igc_init_queue_configuration(struct igc_adapter *adapter)
+{
+	u32 max_rss_queues;
+
+	max_rss_queues = igc_get_max_rss_queues(adapter);
+	adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus());
+
+	igc_set_flag_queue_pairs(adapter, max_rss_queues);
+}
+
+/**
+ * igc_sw_init - Initialize general software structures (struct igc_adapter)
+ * @adapter: board private structure to initialize
+ *
+ * igc_sw_init initializes the Adapter private data structure.
+ * Fields are initialized based on PCI device information and
+ * OS network device settings (MTU size).
+ */
+static int igc_sw_init(struct igc_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct pci_dev *pdev = adapter->pdev;
+	struct igc_hw *hw = &adapter->hw;
+
+	int size = sizeof(struct igc_mac_addr) * hw->mac.rar_entry_count;
+
+	pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
+
+	/* set default ring sizes */
+	adapter->tx_ring_count = IGC_DEFAULT_TXD;
+	adapter->rx_ring_count = IGC_DEFAULT_RXD;
+
+	/* set default ITR values */
+	adapter->rx_itr_setting = IGC_DEFAULT_ITR;
+	adapter->tx_itr_setting = IGC_DEFAULT_ITR;
+
+	/* set default work limits */
+	adapter->tx_work_limit = IGC_DEFAULT_TX_WORK;
+
+	/* adjust max frame to be at least the size of a standard frame */
+	adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
+				VLAN_HLEN;
+	adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
+
+	spin_lock_init(&adapter->nfc_lock);
+	spin_lock_init(&adapter->stats64_lock);
+	/* Assume MSI-X interrupts, will be checked during IRQ allocation */
+	adapter->flags |= IGC_FLAG_HAS_MSIX;
+
+	adapter->mac_table = kzalloc(size, GFP_ATOMIC);
+	if (!adapter->mac_table)
+		return -ENOMEM;
+
+	igc_init_queue_configuration(adapter);
+
+	/* This call may decrease the number of queues */
+	if (igc_init_interrupt_scheme(adapter, true)) {
+		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
+		return -ENOMEM;
+	}
+
+	/* Explicitly disable IRQ since the NIC can be in any state. */
+	igc_irq_disable(adapter);
+
+	set_bit(__IGC_DOWN, &adapter->state);
+
+	return 0;
+}
+
+/**
+ * igc_get_hw_dev - return device
+ * @hw: pointer to hardware structure
+ *
+ * used by hardware layer to print debugging information
+ */
+struct net_device *igc_get_hw_dev(struct igc_hw *hw)
+{
+	struct igc_adapter *adapter = hw->back;
+
+	return adapter->netdev;
+}
+
+/**
+ * igc_init_module - Driver Registration Routine
+ *
+ * igc_init_module is the first routine called when the driver is
+ * loaded. All it does is register with the PCI subsystem.
+ */
+static int __init igc_init_module(void)
+{
+	int ret;
+
+	pr_info("%s - version %s\n",
+		igc_driver_string, igc_driver_version);
+
+	pr_info("%s\n", igc_copyright);
+
+	ret = pci_register_driver(&igc_driver);
+	return ret;
+}
+
+module_init(igc_init_module);
+
+/**
+ * igc_exit_module - Driver Exit Cleanup Routine
+ *
+ * igc_exit_module is called just before the driver is removed
+ * from memory.
+ */
+static void __exit igc_exit_module(void)
+{
+	pci_unregister_driver(&igc_driver);
+}
+
+module_exit(igc_exit_module);
+/* igc_main.c */
diff --git a/drivers/net/ethernet/intel/igc/igc_nvm.c b/drivers/net/ethernet/intel/igc/igc_nvm.c
new file mode 100644
index 000000000000..58f81aba0144
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_nvm.c
@@ -0,0 +1,215 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c)  2018 Intel Corporation */
+
+#include "igc_mac.h"
+#include "igc_nvm.h"
+
+/**
+ * igc_poll_eerd_eewr_done - Poll for EEPROM read/write completion
+ * @hw: pointer to the HW structure
+ * @ee_reg: EEPROM flag for polling
+ *
+ * Polls the EEPROM status bit for either read or write completion based
+ * upon the value of 'ee_reg'.
+ */
+static s32 igc_poll_eerd_eewr_done(struct igc_hw *hw, int ee_reg)
+{
+	s32 ret_val = -IGC_ERR_NVM;
+	u32 attempts = 100000;
+	u32 i, reg = 0;
+
+	for (i = 0; i < attempts; i++) {
+		if (ee_reg == IGC_NVM_POLL_READ)
+			reg = rd32(IGC_EERD);
+		else
+			reg = rd32(IGC_EEWR);
+
+		if (reg & IGC_NVM_RW_REG_DONE) {
+			ret_val = 0;
+			break;
+		}
+
+		udelay(5);
+	}
+
+	return ret_val;
+}
+
+/**
+ * igc_acquire_nvm - Generic request for access to EEPROM
+ * @hw: pointer to the HW structure
+ *
+ * Set the EEPROM access request bit and wait for EEPROM access grant bit.
+ * Return successful if access grant bit set, else clear the request for
+ * EEPROM access and return -IGC_ERR_NVM (-1).
+ */
+s32 igc_acquire_nvm(struct igc_hw *hw)
+{
+	s32 timeout = IGC_NVM_GRANT_ATTEMPTS;
+	u32 eecd = rd32(IGC_EECD);
+	s32 ret_val = 0;
+
+	wr32(IGC_EECD, eecd | IGC_EECD_REQ);
+	eecd = rd32(IGC_EECD);
+
+	while (timeout) {
+		if (eecd & IGC_EECD_GNT)
+			break;
+		udelay(5);
+		eecd = rd32(IGC_EECD);
+		timeout--;
+	}
+
+	if (!timeout) {
+		eecd &= ~IGC_EECD_REQ;
+		wr32(IGC_EECD, eecd);
+		hw_dbg("Could not acquire NVM grant\n");
+		ret_val = -IGC_ERR_NVM;
+	}
+
+	return ret_val;
+}
+
+/**
+ * igc_release_nvm - Release exclusive access to EEPROM
+ * @hw: pointer to the HW structure
+ *
+ * Stop any current commands to the EEPROM and clear the EEPROM request bit.
+ */
+void igc_release_nvm(struct igc_hw *hw)
+{
+	u32 eecd;
+
+	eecd = rd32(IGC_EECD);
+	eecd &= ~IGC_EECD_REQ;
+	wr32(IGC_EECD, eecd);
+}
+
+/**
+ * igc_read_nvm_eerd - Reads EEPROM using EERD register
+ * @hw: pointer to the HW structure
+ * @offset: offset of word in the EEPROM to read
+ * @words: number of words to read
+ * @data: word read from the EEPROM
+ *
+ * Reads a 16 bit word from the EEPROM using the EERD register.
+ */
+s32 igc_read_nvm_eerd(struct igc_hw *hw, u16 offset, u16 words, u16 *data)
+{
+	struct igc_nvm_info *nvm = &hw->nvm;
+	u32 i, eerd = 0;
+	s32 ret_val = 0;
+
+	/* A check for invalid values:  offset too large, too many words,
+	 * and not enough words.
+	 */
+	if (offset >= nvm->word_size || (words > (nvm->word_size - offset)) ||
+	    words == 0) {
+		hw_dbg("nvm parameter(s) out of bounds\n");
+		ret_val = -IGC_ERR_NVM;
+		goto out;
+	}
+
+	for (i = 0; i < words; i++) {
+		eerd = ((offset + i) << IGC_NVM_RW_ADDR_SHIFT) +
+			IGC_NVM_RW_REG_START;
+
+		wr32(IGC_EERD, eerd);
+		ret_val = igc_poll_eerd_eewr_done(hw, IGC_NVM_POLL_READ);
+		if (ret_val)
+			break;
+
+		data[i] = (rd32(IGC_EERD) >> IGC_NVM_RW_REG_DATA);
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_read_mac_addr - Read device MAC address
+ * @hw: pointer to the HW structure
+ */
+s32 igc_read_mac_addr(struct igc_hw *hw)
+{
+	u32 rar_high;
+	u32 rar_low;
+	u16 i;
+
+	rar_high = rd32(IGC_RAH(0));
+	rar_low = rd32(IGC_RAL(0));
+
+	for (i = 0; i < IGC_RAL_MAC_ADDR_LEN; i++)
+		hw->mac.perm_addr[i] = (u8)(rar_low >> (i * 8));
+
+	for (i = 0; i < IGC_RAH_MAC_ADDR_LEN; i++)
+		hw->mac.perm_addr[i + 4] = (u8)(rar_high >> (i * 8));
+
+	for (i = 0; i < ETH_ALEN; i++)
+		hw->mac.addr[i] = hw->mac.perm_addr[i];
+
+	return 0;
+}
+
+/**
+ * igc_validate_nvm_checksum - Validate EEPROM checksum
+ * @hw: pointer to the HW structure
+ *
+ * Calculates the EEPROM checksum by reading/adding each word of the EEPROM
+ * and then verifies that the sum of the EEPROM is equal to 0xBABA.
+ */
+s32 igc_validate_nvm_checksum(struct igc_hw *hw)
+{
+	u16 checksum = 0;
+	u16 i, nvm_data;
+	s32 ret_val = 0;
+
+	for (i = 0; i < (NVM_CHECKSUM_REG + 1); i++) {
+		ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data);
+		if (ret_val) {
+			hw_dbg("NVM Read Error\n");
+			goto out;
+		}
+		checksum += nvm_data;
+	}
+
+	if (checksum != (u16)NVM_SUM) {
+		hw_dbg("NVM Checksum Invalid\n");
+		ret_val = -IGC_ERR_NVM;
+		goto out;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_update_nvm_checksum - Update EEPROM checksum
+ * @hw: pointer to the HW structure
+ *
+ * Updates the EEPROM checksum by reading/adding each word of the EEPROM
+ * up to the checksum.  Then calculates the EEPROM checksum and writes the
+ * value to the EEPROM.
+ */
+s32 igc_update_nvm_checksum(struct igc_hw *hw)
+{
+	u16 checksum = 0;
+	u16 i, nvm_data;
+	s32  ret_val;
+
+	for (i = 0; i < NVM_CHECKSUM_REG; i++) {
+		ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data);
+		if (ret_val) {
+			hw_dbg("NVM Read Error while updating checksum.\n");
+			goto out;
+		}
+		checksum += nvm_data;
+	}
+	checksum = (u16)NVM_SUM - checksum;
+	ret_val = hw->nvm.ops.write(hw, NVM_CHECKSUM_REG, 1, &checksum);
+	if (ret_val)
+		hw_dbg("NVM Write Error while updating checksum.\n");
+
+out:
+	return ret_val;
+}
diff --git a/drivers/net/ethernet/intel/igc/igc_nvm.h b/drivers/net/ethernet/intel/igc/igc_nvm.h
new file mode 100644
index 000000000000..f9fc2e9cfb03
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_nvm.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c)  2018 Intel Corporation */
+
+#ifndef _IGC_NVM_H_
+#define _IGC_NVM_H_
+
+s32 igc_acquire_nvm(struct igc_hw *hw);
+void igc_release_nvm(struct igc_hw *hw);
+s32 igc_read_mac_addr(struct igc_hw *hw);
+s32 igc_read_nvm_eerd(struct igc_hw *hw, u16 offset, u16 words, u16 *data);
+s32 igc_validate_nvm_checksum(struct igc_hw *hw);
+s32 igc_update_nvm_checksum(struct igc_hw *hw);
+
+#endif
diff --git a/drivers/net/ethernet/intel/igc/igc_phy.c b/drivers/net/ethernet/intel/igc/igc_phy.c
new file mode 100644
index 000000000000..38e43e6fc1c7
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_phy.c
@@ -0,0 +1,791 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c)  2018 Intel Corporation */
+
+#include "igc_phy.h"
+
+/* forward declaration */
+static s32 igc_phy_setup_autoneg(struct igc_hw *hw);
+static s32 igc_wait_autoneg(struct igc_hw *hw);
+
+/**
+ * igc_check_reset_block - Check if PHY reset is blocked
+ * @hw: pointer to the HW structure
+ *
+ * Read the PHY management control register and check whether a PHY reset
+ * is blocked.  If a reset is not blocked return 0, otherwise
+ * return IGC_ERR_BLK_PHY_RESET (12).
+ */
+s32 igc_check_reset_block(struct igc_hw *hw)
+{
+	u32 manc;
+
+	manc = rd32(IGC_MANC);
+
+	return (manc & IGC_MANC_BLK_PHY_RST_ON_IDE) ?
+		IGC_ERR_BLK_PHY_RESET : 0;
+}
+
+/**
+ * igc_get_phy_id - Retrieve the PHY ID and revision
+ * @hw: pointer to the HW structure
+ *
+ * Reads the PHY registers and stores the PHY ID and possibly the PHY
+ * revision in the hardware structure.
+ */
+s32 igc_get_phy_id(struct igc_hw *hw)
+{
+	struct igc_phy_info *phy = &hw->phy;
+	s32 ret_val = 0;
+	u16 phy_id;
+
+	ret_val = phy->ops.read_reg(hw, PHY_ID1, &phy_id);
+	if (ret_val)
+		goto out;
+
+	phy->id = (u32)(phy_id << 16);
+	usleep_range(200, 500);
+	ret_val = phy->ops.read_reg(hw, PHY_ID2, &phy_id);
+	if (ret_val)
+		goto out;
+
+	phy->id |= (u32)(phy_id & PHY_REVISION_MASK);
+	phy->revision = (u32)(phy_id & ~PHY_REVISION_MASK);
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_phy_has_link - Polls PHY for link
+ * @hw: pointer to the HW structure
+ * @iterations: number of times to poll for link
+ * @usec_interval: delay between polling attempts
+ * @success: pointer to whether polling was successful or not
+ *
+ * Polls the PHY status register for link, 'iterations' number of times.
+ */
+s32 igc_phy_has_link(struct igc_hw *hw, u32 iterations,
+		     u32 usec_interval, bool *success)
+{
+	u16 i, phy_status;
+	s32 ret_val = 0;
+
+	for (i = 0; i < iterations; i++) {
+		/* Some PHYs require the PHY_STATUS register to be read
+		 * twice due to the link bit being sticky.  No harm doing
+		 * it across the board.
+		 */
+		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status);
+		if (ret_val && usec_interval > 0) {
+			/* If the first read fails, another entity may have
+			 * ownership of the resources, wait and try again to
+			 * see if they have relinquished the resources yet.
+			 */
+			if (usec_interval >= 1000)
+				mdelay(usec_interval / 1000);
+			else
+				udelay(usec_interval);
+		}
+		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status);
+		if (ret_val)
+			break;
+		if (phy_status & MII_SR_LINK_STATUS)
+			break;
+		if (usec_interval >= 1000)
+			mdelay(usec_interval / 1000);
+		else
+			udelay(usec_interval);
+	}
+
+	*success = (i < iterations) ? true : false;
+
+	return ret_val;
+}
+
+/**
+ * igc_power_up_phy_copper - Restore copper link in case of PHY power down
+ * @hw: pointer to the HW structure
+ *
+ * In the case of a PHY power down to save power, or to turn off link during a
+ * driver unload, restore the link to previous settings.
+ */
+void igc_power_up_phy_copper(struct igc_hw *hw)
+{
+	u16 mii_reg = 0;
+
+	/* The PHY will retain its settings across a power down/up cycle */
+	hw->phy.ops.read_reg(hw, PHY_CONTROL, &mii_reg);
+	mii_reg &= ~MII_CR_POWER_DOWN;
+	hw->phy.ops.write_reg(hw, PHY_CONTROL, mii_reg);
+}
+
+/**
+ * igc_power_down_phy_copper - Power down copper PHY
+ * @hw: pointer to the HW structure
+ *
+ * Power down PHY to save power when interface is down and wake on lan
+ * is not enabled.
+ */
+void igc_power_down_phy_copper(struct igc_hw *hw)
+{
+	u16 mii_reg = 0;
+
+	/* The PHY will retain its settings across a power down/up cycle */
+	hw->phy.ops.read_reg(hw, PHY_CONTROL, &mii_reg);
+	mii_reg |= MII_CR_POWER_DOWN;
+
+	/* Temporary workaround - should be removed when PHY will implement
+	 * IEEE registers as properly
+	 */
+	/* hw->phy.ops.write_reg(hw, PHY_CONTROL, mii_reg);*/
+	usleep_range(1000, 2000);
+}
+
+/**
+ * igc_check_downshift - Checks whether a downshift in speed occurred
+ * @hw: pointer to the HW structure
+ *
+ * Success returns 0, Failure returns 1
+ *
+ * A downshift is detected by querying the PHY link health.
+ */
+s32 igc_check_downshift(struct igc_hw *hw)
+{
+	struct igc_phy_info *phy = &hw->phy;
+	u16 phy_data, offset, mask;
+	s32 ret_val;
+
+	switch (phy->type) {
+	case igc_phy_i225:
+	default:
+		/* speed downshift not supported */
+		phy->speed_downgraded = false;
+		ret_val = 0;
+		goto out;
+	}
+
+	ret_val = phy->ops.read_reg(hw, offset, &phy_data);
+
+	if (!ret_val)
+		phy->speed_downgraded = (phy_data & mask) ? true : false;
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_phy_hw_reset - PHY hardware reset
+ * @hw: pointer to the HW structure
+ *
+ * Verify the reset block is not blocking us from resetting.  Acquire
+ * semaphore (if necessary) and read/set/write the device control reset
+ * bit in the PHY.  Wait the appropriate delay time for the device to
+ * reset and release the semaphore (if necessary).
+ */
+s32 igc_phy_hw_reset(struct igc_hw *hw)
+{
+	struct igc_phy_info *phy = &hw->phy;
+	s32  ret_val;
+	u32 ctrl;
+
+	ret_val = igc_check_reset_block(hw);
+	if (ret_val) {
+		ret_val = 0;
+		goto out;
+	}
+
+	ret_val = phy->ops.acquire(hw);
+	if (ret_val)
+		goto out;
+
+	ctrl = rd32(IGC_CTRL);
+	wr32(IGC_CTRL, ctrl | IGC_CTRL_PHY_RST);
+	wrfl();
+
+	udelay(phy->reset_delay_us);
+
+	wr32(IGC_CTRL, ctrl);
+	wrfl();
+
+	usleep_range(1500, 2000);
+
+	phy->ops.release(hw);
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_copper_link_autoneg - Setup/Enable autoneg for copper link
+ * @hw: pointer to the HW structure
+ *
+ * Performs initial bounds checking on autoneg advertisement parameter, then
+ * configure to advertise the full capability.  Setup the PHY to autoneg
+ * and restart the negotiation process between the link partner.  If
+ * autoneg_wait_to_complete, then wait for autoneg to complete before exiting.
+ */
+static s32 igc_copper_link_autoneg(struct igc_hw *hw)
+{
+	struct igc_phy_info *phy = &hw->phy;
+	u16 phy_ctrl;
+	s32 ret_val;
+
+	/* Perform some bounds checking on the autoneg advertisement
+	 * parameter.
+	 */
+	phy->autoneg_advertised &= phy->autoneg_mask;
+
+	/* If autoneg_advertised is zero, we assume it was not defaulted
+	 * by the calling code so we set to advertise full capability.
+	 */
+	if (phy->autoneg_advertised == 0)
+		phy->autoneg_advertised = phy->autoneg_mask;
+
+	hw_dbg("Reconfiguring auto-neg advertisement params\n");
+	ret_val = igc_phy_setup_autoneg(hw);
+	if (ret_val) {
+		hw_dbg("Error Setting up Auto-Negotiation\n");
+		goto out;
+	}
+	hw_dbg("Restarting Auto-Neg\n");
+
+	/* Restart auto-negotiation by setting the Auto Neg Enable bit and
+	 * the Auto Neg Restart bit in the PHY control register.
+	 */
+	ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_ctrl);
+	if (ret_val)
+		goto out;
+
+	phy_ctrl |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG);
+	ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_ctrl);
+	if (ret_val)
+		goto out;
+
+	/* Does the user want to wait for Auto-Neg to complete here, or
+	 * check at a later time (for example, callback routine).
+	 */
+	if (phy->autoneg_wait_to_complete) {
+		ret_val = igc_wait_autoneg(hw);
+		if (ret_val) {
+			hw_dbg("Error while waiting for autoneg to complete\n");
+			goto out;
+		}
+	}
+
+	hw->mac.get_link_status = true;
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_wait_autoneg - Wait for auto-neg completion
+ * @hw: pointer to the HW structure
+ *
+ * Waits for auto-negotiation to complete or for the auto-negotiation time
+ * limit to expire, which ever happens first.
+ */
+static s32 igc_wait_autoneg(struct igc_hw *hw)
+{
+	u16 i, phy_status;
+	s32 ret_val = 0;
+
+	/* Break after autoneg completes or PHY_AUTO_NEG_LIMIT expires. */
+	for (i = PHY_AUTO_NEG_LIMIT; i > 0; i--) {
+		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status);
+		if (ret_val)
+			break;
+		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status);
+		if (ret_val)
+			break;
+		if (phy_status & MII_SR_AUTONEG_COMPLETE)
+			break;
+		msleep(100);
+	}
+
+	/* PHY_AUTO_NEG_TIME expiration doesn't guarantee auto-negotiation
+	 * has completed.
+	 */
+	return ret_val;
+}
+
+/**
+ * igc_phy_setup_autoneg - Configure PHY for auto-negotiation
+ * @hw: pointer to the HW structure
+ *
+ * Reads the MII auto-neg advertisement register and/or the 1000T control
+ * register and if the PHY is already setup for auto-negotiation, then
+ * return successful.  Otherwise, setup advertisement and flow control to
+ * the appropriate values for the wanted auto-negotiation.
+ */
+static s32 igc_phy_setup_autoneg(struct igc_hw *hw)
+{
+	struct igc_phy_info *phy = &hw->phy;
+	u16 aneg_multigbt_an_ctrl = 0;
+	u16 mii_1000t_ctrl_reg = 0;
+	u16 mii_autoneg_adv_reg;
+	s32 ret_val;
+
+	phy->autoneg_advertised &= phy->autoneg_mask;
+
+	/* Read the MII Auto-Neg Advertisement Register (Address 4). */
+	ret_val = phy->ops.read_reg(hw, PHY_AUTONEG_ADV, &mii_autoneg_adv_reg);
+	if (ret_val)
+		return ret_val;
+
+	if (phy->autoneg_mask & ADVERTISE_1000_FULL) {
+		/* Read the MII 1000Base-T Control Register (Address 9). */
+		ret_val = phy->ops.read_reg(hw, PHY_1000T_CTRL,
+					    &mii_1000t_ctrl_reg);
+		if (ret_val)
+			return ret_val;
+	}
+
+	if ((phy->autoneg_mask & ADVERTISE_2500_FULL) &&
+	    hw->phy.id == I225_I_PHY_ID) {
+		/* Read the MULTI GBT AN Control Register - reg 7.32 */
+		ret_val = phy->ops.read_reg(hw, (STANDARD_AN_REG_MASK <<
+					    MMD_DEVADDR_SHIFT) |
+					    ANEG_MULTIGBT_AN_CTRL,
+					    &aneg_multigbt_an_ctrl);
+
+		if (ret_val)
+			return ret_val;
+	}
+
+	/* Need to parse both autoneg_advertised and fc and set up
+	 * the appropriate PHY registers.  First we will parse for
+	 * autoneg_advertised software override.  Since we can advertise
+	 * a plethora of combinations, we need to check each bit
+	 * individually.
+	 */
+
+	/* First we clear all the 10/100 mb speed bits in the Auto-Neg
+	 * Advertisement Register (Address 4) and the 1000 mb speed bits in
+	 * the  1000Base-T Control Register (Address 9).
+	 */
+	mii_autoneg_adv_reg &= ~(NWAY_AR_100TX_FD_CAPS |
+				 NWAY_AR_100TX_HD_CAPS |
+				 NWAY_AR_10T_FD_CAPS   |
+				 NWAY_AR_10T_HD_CAPS);
+	mii_1000t_ctrl_reg &= ~(CR_1000T_HD_CAPS | CR_1000T_FD_CAPS);
+
+	hw_dbg("autoneg_advertised %x\n", phy->autoneg_advertised);
+
+	/* Do we want to advertise 10 Mb Half Duplex? */
+	if (phy->autoneg_advertised & ADVERTISE_10_HALF) {
+		hw_dbg("Advertise 10mb Half duplex\n");
+		mii_autoneg_adv_reg |= NWAY_AR_10T_HD_CAPS;
+	}
+
+	/* Do we want to advertise 10 Mb Full Duplex? */
+	if (phy->autoneg_advertised & ADVERTISE_10_FULL) {
+		hw_dbg("Advertise 10mb Full duplex\n");
+		mii_autoneg_adv_reg |= NWAY_AR_10T_FD_CAPS;
+	}
+
+	/* Do we want to advertise 100 Mb Half Duplex? */
+	if (phy->autoneg_advertised & ADVERTISE_100_HALF) {
+		hw_dbg("Advertise 100mb Half duplex\n");
+		mii_autoneg_adv_reg |= NWAY_AR_100TX_HD_CAPS;
+	}
+
+	/* Do we want to advertise 100 Mb Full Duplex? */
+	if (phy->autoneg_advertised & ADVERTISE_100_FULL) {
+		hw_dbg("Advertise 100mb Full duplex\n");
+		mii_autoneg_adv_reg |= NWAY_AR_100TX_FD_CAPS;
+	}
+
+	/* We do not allow the Phy to advertise 1000 Mb Half Duplex */
+	if (phy->autoneg_advertised & ADVERTISE_1000_HALF)
+		hw_dbg("Advertise 1000mb Half duplex request denied!\n");
+
+	/* Do we want to advertise 1000 Mb Full Duplex? */
+	if (phy->autoneg_advertised & ADVERTISE_1000_FULL) {
+		hw_dbg("Advertise 1000mb Full duplex\n");
+		mii_1000t_ctrl_reg |= CR_1000T_FD_CAPS;
+	}
+
+	/* We do not allow the Phy to advertise 2500 Mb Half Duplex */
+	if (phy->autoneg_advertised & ADVERTISE_2500_HALF)
+		hw_dbg("Advertise 2500mb Half duplex request denied!\n");
+
+	/* Do we want to advertise 2500 Mb Full Duplex? */
+	if (phy->autoneg_advertised & ADVERTISE_2500_FULL) {
+		hw_dbg("Advertise 2500mb Full duplex\n");
+		aneg_multigbt_an_ctrl |= CR_2500T_FD_CAPS;
+	} else {
+		aneg_multigbt_an_ctrl &= ~CR_2500T_FD_CAPS;
+	}
+
+	/* Check for a software override of the flow control settings, and
+	 * setup the PHY advertisement registers accordingly.  If
+	 * auto-negotiation is enabled, then software will have to set the
+	 * "PAUSE" bits to the correct value in the Auto-Negotiation
+	 * Advertisement Register (PHY_AUTONEG_ADV) and re-start auto-
+	 * negotiation.
+	 *
+	 * The possible values of the "fc" parameter are:
+	 *      0:  Flow control is completely disabled
+	 *      1:  Rx flow control is enabled (we can receive pause frames
+	 *          but not send pause frames).
+	 *      2:  Tx flow control is enabled (we can send pause frames
+	 *          but we do not support receiving pause frames).
+	 *      3:  Both Rx and Tx flow control (symmetric) are enabled.
+	 *  other:  No software override.  The flow control configuration
+	 *          in the EEPROM is used.
+	 */
+	switch (hw->fc.current_mode) {
+	case igc_fc_none:
+		/* Flow control (Rx & Tx) is completely disabled by a
+		 * software over-ride.
+		 */
+		mii_autoneg_adv_reg &= ~(NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
+		break;
+	case igc_fc_rx_pause:
+		/* Rx Flow control is enabled, and Tx Flow control is
+		 * disabled, by a software over-ride.
+		 *
+		 * Since there really isn't a way to advertise that we are
+		 * capable of Rx Pause ONLY, we will advertise that we
+		 * support both symmetric and asymmetric Rx PAUSE.  Later
+		 * (in igc_config_fc_after_link_up) we will disable the
+		 * hw's ability to send PAUSE frames.
+		 */
+		mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
+		break;
+	case igc_fc_tx_pause:
+		/* Tx Flow control is enabled, and Rx Flow control is
+		 * disabled, by a software over-ride.
+		 */
+		mii_autoneg_adv_reg |= NWAY_AR_ASM_DIR;
+		mii_autoneg_adv_reg &= ~NWAY_AR_PAUSE;
+		break;
+	case igc_fc_full:
+		/* Flow control (both Rx and Tx) is enabled by a software
+		 * over-ride.
+		 */
+		mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
+		break;
+	default:
+		hw_dbg("Flow control param set incorrectly\n");
+		return -IGC_ERR_CONFIG;
+	}
+
+	ret_val = phy->ops.write_reg(hw, PHY_AUTONEG_ADV, mii_autoneg_adv_reg);
+	if (ret_val)
+		return ret_val;
+
+	hw_dbg("Auto-Neg Advertising %x\n", mii_autoneg_adv_reg);
+
+	if (phy->autoneg_mask & ADVERTISE_1000_FULL)
+		ret_val = phy->ops.write_reg(hw, PHY_1000T_CTRL,
+					     mii_1000t_ctrl_reg);
+
+	if ((phy->autoneg_mask & ADVERTISE_2500_FULL) &&
+	    hw->phy.id == I225_I_PHY_ID)
+		ret_val = phy->ops.write_reg(hw,
+					     (STANDARD_AN_REG_MASK <<
+					     MMD_DEVADDR_SHIFT) |
+					     ANEG_MULTIGBT_AN_CTRL,
+					     aneg_multigbt_an_ctrl);
+
+	return ret_val;
+}
+
+/**
+ * igc_setup_copper_link - Configure copper link settings
+ * @hw: pointer to the HW structure
+ *
+ * Calls the appropriate function to configure the link for auto-neg or forced
+ * speed and duplex.  Then we check for link, once link is established calls
+ * to configure collision distance and flow control are called.  If link is
+ * not established, we return -IGC_ERR_PHY (-2).
+ */
+s32 igc_setup_copper_link(struct igc_hw *hw)
+{
+	s32 ret_val = 0;
+	bool link;
+
+	if (hw->mac.autoneg) {
+		/* Setup autoneg and flow control advertisement and perform
+		 * autonegotiation.
+		 */
+		ret_val = igc_copper_link_autoneg(hw);
+		if (ret_val)
+			goto out;
+	} else {
+		/* PHY will be set to 10H, 10F, 100H or 100F
+		 * depending on user settings.
+		 */
+		hw_dbg("Forcing Speed and Duplex\n");
+		ret_val = hw->phy.ops.force_speed_duplex(hw);
+		if (ret_val) {
+			hw_dbg("Error Forcing Speed and Duplex\n");
+			goto out;
+		}
+	}
+
+	/* Check link status. Wait up to 100 microseconds for link to become
+	 * valid.
+	 */
+	ret_val = igc_phy_has_link(hw, COPPER_LINK_UP_LIMIT, 10, &link);
+	if (ret_val)
+		goto out;
+
+	if (link) {
+		hw_dbg("Valid link established!!!\n");
+		igc_config_collision_dist(hw);
+		ret_val = igc_config_fc_after_link_up(hw);
+	} else {
+		hw_dbg("Unable to establish link!!!\n");
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_read_phy_reg_mdic - Read MDI control register
+ * @hw: pointer to the HW structure
+ * @offset: register offset to be read
+ * @data: pointer to the read data
+ *
+ * Reads the MDI control register in the PHY at offset and stores the
+ * information read to data.
+ */
+static s32 igc_read_phy_reg_mdic(struct igc_hw *hw, u32 offset, u16 *data)
+{
+	struct igc_phy_info *phy = &hw->phy;
+	u32 i, mdic = 0;
+	s32 ret_val = 0;
+
+	if (offset > MAX_PHY_REG_ADDRESS) {
+		hw_dbg("PHY Address %d is out of range\n", offset);
+		ret_val = -IGC_ERR_PARAM;
+		goto out;
+	}
+
+	/* Set up Op-code, Phy Address, and register offset in the MDI
+	 * Control register.  The MAC will take care of interfacing with the
+	 * PHY to retrieve the desired data.
+	 */
+	mdic = ((offset << IGC_MDIC_REG_SHIFT) |
+		(phy->addr << IGC_MDIC_PHY_SHIFT) |
+		(IGC_MDIC_OP_READ));
+
+	wr32(IGC_MDIC, mdic);
+
+	/* Poll the ready bit to see if the MDI read completed
+	 * Increasing the time out as testing showed failures with
+	 * the lower time out
+	 */
+	for (i = 0; i < IGC_GEN_POLL_TIMEOUT; i++) {
+		usleep_range(500, 1000);
+		mdic = rd32(IGC_MDIC);
+		if (mdic & IGC_MDIC_READY)
+			break;
+	}
+	if (!(mdic & IGC_MDIC_READY)) {
+		hw_dbg("MDI Read did not complete\n");
+		ret_val = -IGC_ERR_PHY;
+		goto out;
+	}
+	if (mdic & IGC_MDIC_ERROR) {
+		hw_dbg("MDI Error\n");
+		ret_val = -IGC_ERR_PHY;
+		goto out;
+	}
+	*data = (u16)mdic;
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_write_phy_reg_mdic - Write MDI control register
+ * @hw: pointer to the HW structure
+ * @offset: register offset to write to
+ * @data: data to write to register at offset
+ *
+ * Writes data to MDI control register in the PHY at offset.
+ */
+static s32 igc_write_phy_reg_mdic(struct igc_hw *hw, u32 offset, u16 data)
+{
+	struct igc_phy_info *phy = &hw->phy;
+	u32 i, mdic = 0;
+	s32 ret_val = 0;
+
+	if (offset > MAX_PHY_REG_ADDRESS) {
+		hw_dbg("PHY Address %d is out of range\n", offset);
+		ret_val = -IGC_ERR_PARAM;
+		goto out;
+	}
+
+	/* Set up Op-code, Phy Address, and register offset in the MDI
+	 * Control register.  The MAC will take care of interfacing with the
+	 * PHY to write the desired data.
+	 */
+	mdic = (((u32)data) |
+		(offset << IGC_MDIC_REG_SHIFT) |
+		(phy->addr << IGC_MDIC_PHY_SHIFT) |
+		(IGC_MDIC_OP_WRITE));
+
+	wr32(IGC_MDIC, mdic);
+
+	/* Poll the ready bit to see if the MDI read completed
+	 * Increasing the time out as testing showed failures with
+	 * the lower time out
+	 */
+	for (i = 0; i < IGC_GEN_POLL_TIMEOUT; i++) {
+		usleep_range(500, 1000);
+		mdic = rd32(IGC_MDIC);
+		if (mdic & IGC_MDIC_READY)
+			break;
+	}
+	if (!(mdic & IGC_MDIC_READY)) {
+		hw_dbg("MDI Write did not complete\n");
+		ret_val = -IGC_ERR_PHY;
+		goto out;
+	}
+	if (mdic & IGC_MDIC_ERROR) {
+		hw_dbg("MDI Error\n");
+		ret_val = -IGC_ERR_PHY;
+		goto out;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ * __igc_access_xmdio_reg - Read/write XMDIO register
+ * @hw: pointer to the HW structure
+ * @address: XMDIO address to program
+ * @dev_addr: device address to program
+ * @data: pointer to value to read/write from/to the XMDIO address
+ * @read: boolean flag to indicate read or write
+ */
+static s32 __igc_access_xmdio_reg(struct igc_hw *hw, u16 address,
+				  u8 dev_addr, u16 *data, bool read)
+{
+	s32 ret_val;
+
+	ret_val = hw->phy.ops.write_reg(hw, IGC_MMDAC, dev_addr);
+	if (ret_val)
+		return ret_val;
+
+	ret_val = hw->phy.ops.write_reg(hw, IGC_MMDAAD, address);
+	if (ret_val)
+		return ret_val;
+
+	ret_val = hw->phy.ops.write_reg(hw, IGC_MMDAC, IGC_MMDAC_FUNC_DATA |
+					dev_addr);
+	if (ret_val)
+		return ret_val;
+
+	if (read)
+		ret_val = hw->phy.ops.read_reg(hw, IGC_MMDAAD, data);
+	else
+		ret_val = hw->phy.ops.write_reg(hw, IGC_MMDAAD, *data);
+	if (ret_val)
+		return ret_val;
+
+	/* Recalibrate the device back to 0 */
+	ret_val = hw->phy.ops.write_reg(hw, IGC_MMDAC, 0);
+	if (ret_val)
+		return ret_val;
+
+	return ret_val;
+}
+
+/**
+ * igc_read_xmdio_reg - Read XMDIO register
+ * @hw: pointer to the HW structure
+ * @addr: XMDIO address to program
+ * @dev_addr: device address to program
+ * @data: value to be read from the EMI address
+ */
+static s32 igc_read_xmdio_reg(struct igc_hw *hw, u16 addr,
+			      u8 dev_addr, u16 *data)
+{
+	return __igc_access_xmdio_reg(hw, addr, dev_addr, data, true);
+}
+
+/**
+ * igc_write_xmdio_reg - Write XMDIO register
+ * @hw: pointer to the HW structure
+ * @addr: XMDIO address to program
+ * @dev_addr: device address to program
+ * @data: value to be written to the XMDIO address
+ */
+static s32 igc_write_xmdio_reg(struct igc_hw *hw, u16 addr,
+			       u8 dev_addr, u16 data)
+{
+	return __igc_access_xmdio_reg(hw, addr, dev_addr, &data, false);
+}
+
+/**
+ * igc_write_phy_reg_gpy - Write GPY PHY register
+ * @hw: pointer to the HW structure
+ * @offset: register offset to write to
+ * @data: data to write at register offset
+ *
+ * Acquires semaphore, if necessary, then writes the data to PHY register
+ * at the offset. Release any acquired semaphores before exiting.
+ */
+s32 igc_write_phy_reg_gpy(struct igc_hw *hw, u32 offset, u16 data)
+{
+	u8 dev_addr = (offset & GPY_MMD_MASK) >> GPY_MMD_SHIFT;
+	s32 ret_val;
+
+	offset = offset & GPY_REG_MASK;
+
+	if (!dev_addr) {
+		ret_val = hw->phy.ops.acquire(hw);
+		if (ret_val)
+			return ret_val;
+		ret_val = igc_write_phy_reg_mdic(hw, offset, data);
+		if (ret_val)
+			return ret_val;
+		hw->phy.ops.release(hw);
+	} else {
+		ret_val = igc_write_xmdio_reg(hw, (u16)offset, dev_addr,
+					      data);
+	}
+
+	return ret_val;
+}
+
+/**
+ * igc_read_phy_reg_gpy - Read GPY PHY register
+ * @hw: pointer to the HW structure
+ * @offset: lower half is register offset to read to
+ * upper half is MMD to use.
+ * @data: data to read at register offset
+ *
+ * Acquires semaphore, if necessary, then reads the data in the PHY register
+ * at the offset. Release any acquired semaphores before exiting.
+ */
+s32 igc_read_phy_reg_gpy(struct igc_hw *hw, u32 offset, u16 *data)
+{
+	u8 dev_addr = (offset & GPY_MMD_MASK) >> GPY_MMD_SHIFT;
+	s32 ret_val;
+
+	offset = offset & GPY_REG_MASK;
+
+	if (!dev_addr) {
+		ret_val = hw->phy.ops.acquire(hw);
+		if (ret_val)
+			return ret_val;
+		ret_val = igc_read_phy_reg_mdic(hw, offset, data);
+		if (ret_val)
+			return ret_val;
+		hw->phy.ops.release(hw);
+	} else {
+		ret_val = igc_read_xmdio_reg(hw, (u16)offset, dev_addr,
+					     data);
+	}
+
+	return ret_val;
+}
diff --git a/drivers/net/ethernet/intel/igc/igc_phy.h b/drivers/net/ethernet/intel/igc/igc_phy.h
new file mode 100644
index 000000000000..25cba33de7e2
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_phy.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c)  2018 Intel Corporation */
+
+#ifndef _IGC_PHY_H_
+#define _IGC_PHY_H_
+
+#include "igc_mac.h"
+
+s32 igc_check_reset_block(struct igc_hw *hw);
+s32 igc_phy_hw_reset(struct igc_hw *hw);
+s32 igc_get_phy_id(struct igc_hw *hw);
+s32 igc_phy_has_link(struct igc_hw *hw, u32 iterations,
+		     u32 usec_interval, bool *success);
+s32 igc_check_downshift(struct igc_hw *hw);
+s32 igc_setup_copper_link(struct igc_hw *hw);
+void igc_power_up_phy_copper(struct igc_hw *hw);
+void igc_power_down_phy_copper(struct igc_hw *hw);
+s32 igc_write_phy_reg_gpy(struct igc_hw *hw, u32 offset, u16 data);
+s32 igc_read_phy_reg_gpy(struct igc_hw *hw, u32 offset, u16 *data);
+
+#endif
diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h
new file mode 100644
index 000000000000..a1bd3216c906
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_regs.h
@@ -0,0 +1,221 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c)  2018 Intel Corporation */
+
+#ifndef _IGC_REGS_H_
+#define _IGC_REGS_H_
+
+/* General Register Descriptions */
+#define IGC_CTRL		0x00000  /* Device Control - RW */
+#define IGC_STATUS		0x00008  /* Device Status - RO */
+#define IGC_EECD		0x00010  /* EEPROM/Flash Control - RW */
+#define IGC_CTRL_EXT		0x00018  /* Extended Device Control - RW */
+#define IGC_MDIC		0x00020  /* MDI Control - RW */
+#define IGC_MDICNFG		0x00E04  /* MDC/MDIO Configuration - RW */
+#define IGC_CONNSW		0x00034  /* Copper/Fiber switch control - RW */
+
+/* Internal Packet Buffer Size Registers */
+#define IGC_RXPBS		0x02404  /* Rx Packet Buffer Size - RW */
+#define IGC_TXPBS		0x03404  /* Tx Packet Buffer Size - RW */
+
+/* NVM  Register Descriptions */
+#define IGC_EERD		0x12014  /* EEprom mode read - RW */
+#define IGC_EEWR		0x12018  /* EEprom mode write - RW */
+
+/* Flow Control Register Descriptions */
+#define IGC_FCAL		0x00028  /* FC Address Low - RW */
+#define IGC_FCAH		0x0002C  /* FC Address High - RW */
+#define IGC_FCT			0x00030  /* FC Type - RW */
+#define IGC_FCTTV		0x00170  /* FC Transmit Timer - RW */
+#define IGC_FCRTL		0x02160  /* FC Receive Threshold Low - RW */
+#define IGC_FCRTH		0x02168  /* FC Receive Threshold High - RW */
+#define IGC_FCRTV		0x02460  /* FC Refresh Timer Value - RW */
+#define IGC_FCSTS		0x02464  /* FC Status - RO */
+
+/* PCIe Register Description */
+#define IGC_GCR			0x05B00  /* PCIe control- RW */
+
+/* Semaphore registers */
+#define IGC_SW_FW_SYNC		0x05B5C  /* SW-FW Synchronization - RW */
+#define IGC_SWSM		0x05B50  /* SW Semaphore */
+#define IGC_FWSM		0x05B54  /* FW Semaphore */
+
+/* Function Active and Power State to MNG */
+#define IGC_FACTPS		0x05B30
+
+/* Interrupt Register Description */
+#define IGC_EICS		0x01520  /* Ext. Interrupt Cause Set - W0 */
+#define IGC_EIMS		0x01524  /* Ext. Interrupt Mask Set/Read - RW */
+#define IGC_EIMC		0x01528  /* Ext. Interrupt Mask Clear - WO */
+#define IGC_EIAC		0x0152C  /* Ext. Interrupt Auto Clear - RW */
+#define IGC_EIAM		0x01530  /* Ext. Interrupt Auto Mask - RW */
+#define IGC_ICR			0x01500  /* Intr Cause Read - RC/W1C */
+#define IGC_ICS			0x01504  /* Intr Cause Set - WO */
+#define IGC_IMS			0x01508  /* Intr Mask Set/Read - RW */
+#define IGC_IMC			0x0150C  /* Intr Mask Clear - WO */
+#define IGC_IAM			0x01510  /* Intr Ack Auto Mask- RW */
+/* Intr Throttle - RW */
+#define IGC_EITR(_n)		(0x01680 + (0x4 * (_n)))
+/* Interrupt Vector Allocation - RW */
+#define IGC_IVAR0		0x01700
+#define IGC_IVAR_MISC		0x01740  /* IVAR for "other" causes - RW */
+#define IGC_GPIE		0x01514  /* General Purpose Intr Enable - RW */
+
+/* Interrupt Cause */
+#define IGC_ICRXPTC		0x04104  /* Rx Packet Timer Expire Count */
+#define IGC_ICRXATC		0x04108  /* Rx Absolute Timer Expire Count */
+#define IGC_ICTXPTC		0x0410C  /* Tx Packet Timer Expire Count */
+#define IGC_ICTXATC		0x04110  /* Tx Absolute Timer Expire Count */
+#define IGC_ICTXQEC		0x04118  /* Tx Queue Empty Count */
+#define IGC_ICTXQMTC		0x0411C  /* Tx Queue Min Threshold Count */
+#define IGC_ICRXDMTC		0x04120  /* Rx Descriptor Min Threshold Count */
+#define IGC_ICRXOC		0x04124  /* Receiver Overrun Count */
+
+#define IGC_CBTMPC		0x0402C  /* Circuit Breaker TX Packet Count */
+#define IGC_HTDPMC		0x0403C  /* Host Transmit Discarded Packets */
+#define IGC_CBRMPC		0x040FC  /* Circuit Breaker RX Packet Count */
+#define IGC_RPTHC		0x04104  /* Rx Packets To Host */
+#define IGC_HGPTC		0x04118  /* Host Good Packets TX Count */
+#define IGC_HTCBDPC		0x04124  /* Host TX Circ.Breaker Drop Count */
+
+/* MSI-X Table Register Descriptions */
+#define IGC_PBACL		0x05B68  /* MSIx PBA Clear - R/W 1 to clear */
+
+/* Receive Register Descriptions */
+#define IGC_RCTL		0x00100  /* Rx Control - RW */
+#define IGC_SRRCTL(_n)		(0x0C00C + ((_n) * 0x40))
+#define IGC_PSRTYPE(_i)		(0x05480 + ((_i) * 4))
+#define IGC_RDBAL(_n)		(0x0C000 + ((_n) * 0x40))
+#define IGC_RDBAH(_n)		(0x0C004 + ((_n) * 0x40))
+#define IGC_RDLEN(_n)		(0x0C008 + ((_n) * 0x40))
+#define IGC_RDH(_n)		(0x0C010 + ((_n) * 0x40))
+#define IGC_RDT(_n)		(0x0C018 + ((_n) * 0x40))
+#define IGC_RXDCTL(_n)		(0x0C028 + ((_n) * 0x40))
+#define IGC_RQDPC(_n)		(0x0C030 + ((_n) * 0x40))
+#define IGC_RXCSUM		0x05000  /* Rx Checksum Control - RW */
+#define IGC_RLPML		0x05004  /* Rx Long Packet Max Length */
+#define IGC_RFCTL		0x05008  /* Receive Filter Control*/
+#define IGC_MTA			0x05200  /* Multicast Table Array - RW Array */
+#define IGC_UTA			0x0A000  /* Unicast Table Array - RW */
+#define IGC_RAL(_n)		(0x05400 + ((_n) * 0x08))
+#define IGC_RAH(_n)		(0x05404 + ((_n) * 0x08))
+
+/* Transmit Register Descriptions */
+#define IGC_TCTL		0x00400  /* Tx Control - RW */
+#define IGC_TIPG		0x00410  /* Tx Inter-packet gap - RW */
+#define IGC_TDBAL(_n)		(0x0E000 + ((_n) * 0x40))
+#define IGC_TDBAH(_n)		(0x0E004 + ((_n) * 0x40))
+#define IGC_TDLEN(_n)		(0x0E008 + ((_n) * 0x40))
+#define IGC_TDH(_n)		(0x0E010 + ((_n) * 0x40))
+#define IGC_TDT(_n)		(0x0E018 + ((_n) * 0x40))
+#define IGC_TXDCTL(_n)		(0x0E028 + ((_n) * 0x40))
+
+/* MMD Register Descriptions */
+#define IGC_MMDAC		13 /* MMD Access Control */
+#define IGC_MMDAAD		14 /* MMD Access Address/Data */
+
+/* Good transmitted packets counter registers */
+#define IGC_PQGPTC(_n)		(0x010014 + (0x100 * (_n)))
+
+/* Statistics Register Descriptions */
+#define IGC_CRCERRS	0x04000  /* CRC Error Count - R/clr */
+#define IGC_ALGNERRC	0x04004  /* Alignment Error Count - R/clr */
+#define IGC_SYMERRS	0x04008  /* Symbol Error Count - R/clr */
+#define IGC_RXERRC	0x0400C  /* Receive Error Count - R/clr */
+#define IGC_MPC		0x04010  /* Missed Packet Count - R/clr */
+#define IGC_SCC		0x04014  /* Single Collision Count - R/clr */
+#define IGC_ECOL	0x04018  /* Excessive Collision Count - R/clr */
+#define IGC_MCC		0x0401C  /* Multiple Collision Count - R/clr */
+#define IGC_LATECOL	0x04020  /* Late Collision Count - R/clr */
+#define IGC_COLC	0x04028  /* Collision Count - R/clr */
+#define IGC_DC		0x04030  /* Defer Count - R/clr */
+#define IGC_TNCRS	0x04034  /* Tx-No CRS - R/clr */
+#define IGC_SEC		0x04038  /* Sequence Error Count - R/clr */
+#define IGC_CEXTERR	0x0403C  /* Carrier Extension Error Count - R/clr */
+#define IGC_RLEC	0x04040  /* Receive Length Error Count - R/clr */
+#define IGC_XONRXC	0x04048  /* XON Rx Count - R/clr */
+#define IGC_XONTXC	0x0404C  /* XON Tx Count - R/clr */
+#define IGC_XOFFRXC	0x04050  /* XOFF Rx Count - R/clr */
+#define IGC_XOFFTXC	0x04054  /* XOFF Tx Count - R/clr */
+#define IGC_FCRUC	0x04058  /* Flow Control Rx Unsupported Count- R/clr */
+#define IGC_PRC64	0x0405C  /* Packets Rx (64 bytes) - R/clr */
+#define IGC_PRC127	0x04060  /* Packets Rx (65-127 bytes) - R/clr */
+#define IGC_PRC255	0x04064  /* Packets Rx (128-255 bytes) - R/clr */
+#define IGC_PRC511	0x04068  /* Packets Rx (255-511 bytes) - R/clr */
+#define IGC_PRC1023	0x0406C  /* Packets Rx (512-1023 bytes) - R/clr */
+#define IGC_PRC1522	0x04070  /* Packets Rx (1024-1522 bytes) - R/clr */
+#define IGC_GPRC	0x04074  /* Good Packets Rx Count - R/clr */
+#define IGC_BPRC	0x04078  /* Broadcast Packets Rx Count - R/clr */
+#define IGC_MPRC	0x0407C  /* Multicast Packets Rx Count - R/clr */
+#define IGC_GPTC	0x04080  /* Good Packets Tx Count - R/clr */
+#define IGC_GORCL	0x04088  /* Good Octets Rx Count Low - R/clr */
+#define IGC_GORCH	0x0408C  /* Good Octets Rx Count High - R/clr */
+#define IGC_GOTCL	0x04090  /* Good Octets Tx Count Low - R/clr */
+#define IGC_GOTCH	0x04094  /* Good Octets Tx Count High - R/clr */
+#define IGC_RNBC	0x040A0  /* Rx No Buffers Count - R/clr */
+#define IGC_RUC		0x040A4  /* Rx Undersize Count - R/clr */
+#define IGC_RFC		0x040A8  /* Rx Fragment Count - R/clr */
+#define IGC_ROC		0x040AC  /* Rx Oversize Count - R/clr */
+#define IGC_RJC		0x040B0  /* Rx Jabber Count - R/clr */
+#define IGC_MGTPRC	0x040B4  /* Management Packets Rx Count - R/clr */
+#define IGC_MGTPDC	0x040B8  /* Management Packets Dropped Count - R/clr */
+#define IGC_MGTPTC	0x040BC  /* Management Packets Tx Count - R/clr */
+#define IGC_TORL	0x040C0  /* Total Octets Rx Low - R/clr */
+#define IGC_TORH	0x040C4  /* Total Octets Rx High - R/clr */
+#define IGC_TOTL	0x040C8  /* Total Octets Tx Low - R/clr */
+#define IGC_TOTH	0x040CC  /* Total Octets Tx High - R/clr */
+#define IGC_TPR		0x040D0  /* Total Packets Rx - R/clr */
+#define IGC_TPT		0x040D4  /* Total Packets Tx - R/clr */
+#define IGC_PTC64	0x040D8  /* Packets Tx (64 bytes) - R/clr */
+#define IGC_PTC127	0x040DC  /* Packets Tx (65-127 bytes) - R/clr */
+#define IGC_PTC255	0x040E0  /* Packets Tx (128-255 bytes) - R/clr */
+#define IGC_PTC511	0x040E4  /* Packets Tx (256-511 bytes) - R/clr */
+#define IGC_PTC1023	0x040E8  /* Packets Tx (512-1023 bytes) - R/clr */
+#define IGC_PTC1522	0x040EC  /* Packets Tx (1024-1522 Bytes) - R/clr */
+#define IGC_MPTC	0x040F0  /* Multicast Packets Tx Count - R/clr */
+#define IGC_BPTC	0x040F4  /* Broadcast Packets Tx Count - R/clr */
+#define IGC_TSCTC	0x040F8  /* TCP Segmentation Context Tx - R/clr */
+#define IGC_TSCTFC	0x040FC  /* TCP Segmentation Context Tx Fail - R/clr */
+#define IGC_IAC		0x04100  /* Interrupt Assertion Count */
+#define IGC_ICTXPTC	0x0410C  /* Interrupt Cause Tx Pkt Timer Expire Count */
+#define IGC_ICTXATC	0x04110  /* Interrupt Cause Tx Abs Timer Expire Count */
+#define IGC_ICTXQEC	0x04118  /* Interrupt Cause Tx Queue Empty Count */
+#define IGC_ICTXQMTC	0x0411C  /* Interrupt Cause Tx Queue Min Thresh Count */
+#define IGC_RPTHC	0x04104  /* Rx Packets To Host */
+#define IGC_HGPTC	0x04118  /* Host Good Packets Tx Count */
+#define IGC_RXDMTC	0x04120  /* Rx Descriptor Minimum Threshold Count */
+#define IGC_HGORCL	0x04128  /* Host Good Octets Received Count Low */
+#define IGC_HGORCH	0x0412C  /* Host Good Octets Received Count High */
+#define IGC_HGOTCL	0x04130  /* Host Good Octets Transmit Count Low */
+#define IGC_HGOTCH	0x04134  /* Host Good Octets Transmit Count High */
+#define IGC_LENERRS	0x04138  /* Length Errors Count */
+#define IGC_SCVPC	0x04228  /* SerDes/SGMII Code Violation Pkt Count */
+#define IGC_HRMPC	0x0A018  /* Header Redirection Missed Packet Count */
+
+/* Management registers */
+#define IGC_MANC	0x05820  /* Management Control - RW */
+
+/* Shadow Ram Write Register - RW */
+#define IGC_SRWR	0x12018
+
+/* forward declaration */
+struct igc_hw;
+u32 igc_rd32(struct igc_hw *hw, u32 reg);
+
+/* write operations, indexed using DWORDS */
+#define wr32(reg, val) \
+do { \
+	u8 __iomem *hw_addr = READ_ONCE((hw)->hw_addr); \
+	if (!IGC_REMOVED(hw_addr)) \
+		writel((val), &hw_addr[(reg)]); \
+} while (0)
+
+#define rd32(reg) (igc_rd32(hw, reg))
+
+#define wrfl() ((void)rd32(IGC_STATUS))
+
+#define array_wr32(reg, offset, value) \
+	wr32((reg) + ((offset) << 2), (value))
+
+#define array_rd32(reg, offset) (igc_rd32(hw, (reg) + ((offset) << 2)))
+
+#endif
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/Makefile b/drivers/net/ethernet/marvell/octeontx2/af/Makefile
index eaac26424486..45b108f8f955 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/Makefile
+++ b/drivers/net/ethernet/marvell/octeontx2/af/Makefile
@@ -7,4 +7,4 @@ obj-$(CONFIG_OCTEONTX2_MBOX) += octeontx2_mbox.o
 obj-$(CONFIG_OCTEONTX2_AF) += octeontx2_af.o
 
 octeontx2_mbox-y := mbox.o
-octeontx2_af-y := cgx.o rvu.o rvu_cgx.o
+octeontx2_af-y := cgx.o rvu.o rvu_cgx.o rvu_npa.o rvu_nix.o
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
index 5328ecc8cea2..352501b54aee 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
@@ -29,6 +29,7 @@
  * @wq_cmd_cmplt:	waitq to keep the process blocked until cmd completion
  * @cmd_lock:		Lock to serialize the command interface
  * @resp:		command response
+ * @link_info:		link related information
  * @event_cb:		callback for linkchange events
  * @cmd_pend:		flag set before new command is started
  *			flag cleared after command response is received
@@ -40,6 +41,7 @@ struct lmac {
 	wait_queue_head_t wq_cmd_cmplt;
 	struct mutex cmd_lock;
 	u64 resp;
+	struct cgx_link_user_info link_info;
 	struct cgx_event_cb event_cb;
 	bool cmd_pend;
 	struct cgx *cgx;
@@ -58,6 +60,12 @@ struct cgx {
 
 static LIST_HEAD(cgx_list);
 
+/* Convert firmware speed encoding to user format(Mbps) */
+static u32 cgx_speed_mbps[CGX_LINK_SPEED_MAX];
+
+/* Convert firmware lmac type encoding to string */
+static char *cgx_lmactype_string[LMAC_MODE_MAX];
+
 /* Supported devices */
 static const struct pci_device_id cgx_id_table[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_CGX) },
@@ -119,6 +127,177 @@ void *cgx_get_pdata(int cgx_id)
 }
 EXPORT_SYMBOL(cgx_get_pdata);
 
+/* Ensure the required lock for event queue(where asynchronous events are
+ * posted) is acquired before calling this API. Else an asynchronous event(with
+ * latest link status) can reach the destination before this function returns
+ * and could make the link status appear wrong.
+ */
+int cgx_get_link_info(void *cgxd, int lmac_id,
+		      struct cgx_link_user_info *linfo)
+{
+	struct lmac *lmac = lmac_pdata(lmac_id, cgxd);
+
+	if (!lmac)
+		return -ENODEV;
+
+	*linfo = lmac->link_info;
+	return 0;
+}
+EXPORT_SYMBOL(cgx_get_link_info);
+
+static u64 mac2u64 (u8 *mac_addr)
+{
+	u64 mac = 0;
+	int index;
+
+	for (index = ETH_ALEN - 1; index >= 0; index--)
+		mac |= ((u64)*mac_addr++) << (8 * index);
+	return mac;
+}
+
+int cgx_lmac_addr_set(u8 cgx_id, u8 lmac_id, u8 *mac_addr)
+{
+	struct cgx *cgx_dev = cgx_get_pdata(cgx_id);
+	u64 cfg;
+
+	/* copy 6bytes from macaddr */
+	/* memcpy(&cfg, mac_addr, 6); */
+
+	cfg = mac2u64 (mac_addr);
+
+	cgx_write(cgx_dev, 0, (CGXX_CMRX_RX_DMAC_CAM0 + (lmac_id * 0x8)),
+		  cfg | CGX_DMAC_CAM_ADDR_ENABLE | ((u64)lmac_id << 49));
+
+	cfg = cgx_read(cgx_dev, lmac_id, CGXX_CMRX_RX_DMAC_CTL0);
+	cfg |= CGX_DMAC_CTL0_CAM_ENABLE;
+	cgx_write(cgx_dev, lmac_id, CGXX_CMRX_RX_DMAC_CTL0, cfg);
+
+	return 0;
+}
+EXPORT_SYMBOL(cgx_lmac_addr_set);
+
+u64 cgx_lmac_addr_get(u8 cgx_id, u8 lmac_id)
+{
+	struct cgx *cgx_dev = cgx_get_pdata(cgx_id);
+	u64 cfg;
+
+	cfg = cgx_read(cgx_dev, 0, CGXX_CMRX_RX_DMAC_CAM0 + lmac_id * 0x8);
+	return cfg & CGX_RX_DMAC_ADR_MASK;
+}
+EXPORT_SYMBOL(cgx_lmac_addr_get);
+
+static inline u8 cgx_get_lmac_type(struct cgx *cgx, int lmac_id)
+{
+	u64 cfg;
+
+	cfg = cgx_read(cgx, lmac_id, CGXX_CMRX_CFG);
+	return (cfg >> CGX_LMAC_TYPE_SHIFT) & CGX_LMAC_TYPE_MASK;
+}
+
+/* Configure CGX LMAC in internal loopback mode */
+int cgx_lmac_internal_loopback(void *cgxd, int lmac_id, bool enable)
+{
+	struct cgx *cgx = cgxd;
+	u8 lmac_type;
+	u64 cfg;
+
+	if (!cgx || lmac_id >= cgx->lmac_count)
+		return -ENODEV;
+
+	lmac_type = cgx_get_lmac_type(cgx, lmac_id);
+	if (lmac_type == LMAC_MODE_SGMII || lmac_type == LMAC_MODE_QSGMII) {
+		cfg = cgx_read(cgx, lmac_id, CGXX_GMP_PCS_MRX_CTL);
+		if (enable)
+			cfg |= CGXX_GMP_PCS_MRX_CTL_LBK;
+		else
+			cfg &= ~CGXX_GMP_PCS_MRX_CTL_LBK;
+		cgx_write(cgx, lmac_id, CGXX_GMP_PCS_MRX_CTL, cfg);
+	} else {
+		cfg = cgx_read(cgx, lmac_id, CGXX_SPUX_CONTROL1);
+		if (enable)
+			cfg |= CGXX_SPUX_CONTROL1_LBK;
+		else
+			cfg &= ~CGXX_SPUX_CONTROL1_LBK;
+		cgx_write(cgx, lmac_id, CGXX_SPUX_CONTROL1, cfg);
+	}
+	return 0;
+}
+EXPORT_SYMBOL(cgx_lmac_internal_loopback);
+
+void cgx_lmac_promisc_config(int cgx_id, int lmac_id, bool enable)
+{
+	struct cgx *cgx = cgx_get_pdata(cgx_id);
+	u64 cfg = 0;
+
+	if (!cgx)
+		return;
+
+	if (enable) {
+		/* Enable promiscuous mode on LMAC */
+		cfg = cgx_read(cgx, lmac_id, CGXX_CMRX_RX_DMAC_CTL0);
+		cfg &= ~(CGX_DMAC_CAM_ACCEPT | CGX_DMAC_MCAST_MODE);
+		cfg |= CGX_DMAC_BCAST_MODE;
+		cgx_write(cgx, lmac_id, CGXX_CMRX_RX_DMAC_CTL0, cfg);
+
+		cfg = cgx_read(cgx, 0,
+			       (CGXX_CMRX_RX_DMAC_CAM0 + lmac_id * 0x8));
+		cfg &= ~CGX_DMAC_CAM_ADDR_ENABLE;
+		cgx_write(cgx, 0,
+			  (CGXX_CMRX_RX_DMAC_CAM0 + lmac_id * 0x8), cfg);
+	} else {
+		/* Disable promiscuous mode */
+		cfg = cgx_read(cgx, lmac_id, CGXX_CMRX_RX_DMAC_CTL0);
+		cfg |= CGX_DMAC_CAM_ACCEPT | CGX_DMAC_MCAST_MODE;
+		cgx_write(cgx, lmac_id, CGXX_CMRX_RX_DMAC_CTL0, cfg);
+		cfg = cgx_read(cgx, 0,
+			       (CGXX_CMRX_RX_DMAC_CAM0 + lmac_id * 0x8));
+		cfg |= CGX_DMAC_CAM_ADDR_ENABLE;
+		cgx_write(cgx, 0,
+			  (CGXX_CMRX_RX_DMAC_CAM0 + lmac_id * 0x8), cfg);
+	}
+}
+EXPORT_SYMBOL(cgx_lmac_promisc_config);
+
+int cgx_get_rx_stats(void *cgxd, int lmac_id, int idx, u64 *rx_stat)
+{
+	struct cgx *cgx = cgxd;
+
+	if (!cgx || lmac_id >= cgx->lmac_count)
+		return -ENODEV;
+	*rx_stat =  cgx_read(cgx, lmac_id, CGXX_CMRX_RX_STAT0 + (idx * 8));
+	return 0;
+}
+EXPORT_SYMBOL(cgx_get_rx_stats);
+
+int cgx_get_tx_stats(void *cgxd, int lmac_id, int idx, u64 *tx_stat)
+{
+	struct cgx *cgx = cgxd;
+
+	if (!cgx || lmac_id >= cgx->lmac_count)
+		return -ENODEV;
+	*tx_stat = cgx_read(cgx, lmac_id, CGXX_CMRX_TX_STAT0 + (idx * 8));
+	return 0;
+}
+EXPORT_SYMBOL(cgx_get_tx_stats);
+
+int cgx_lmac_rx_tx_enable(void *cgxd, int lmac_id, bool enable)
+{
+	struct cgx *cgx = cgxd;
+	u64 cfg;
+
+	if (!cgx || lmac_id >= cgx->lmac_count)
+		return -ENODEV;
+
+	cfg = cgx_read(cgx, lmac_id, CGXX_CMRX_CFG);
+	if (enable)
+		cfg |= CMR_EN | DATA_PKT_RX_EN | DATA_PKT_TX_EN;
+	else
+		cfg &= ~(CMR_EN | DATA_PKT_RX_EN | DATA_PKT_TX_EN);
+	cgx_write(cgx, lmac_id, CGXX_CMRX_CFG, cfg);
+	return 0;
+}
+EXPORT_SYMBOL(cgx_lmac_rx_tx_enable);
+
 /* CGX Firmware interface low level support */
 static int cgx_fwi_cmd_send(u64 req, u64 *resp, struct lmac *lmac)
 {
@@ -191,36 +370,79 @@ static inline int cgx_fwi_cmd_generic(u64 req, u64 *resp,
 	return err;
 }
 
+static inline void cgx_link_usertable_init(void)
+{
+	cgx_speed_mbps[CGX_LINK_NONE] = 0;
+	cgx_speed_mbps[CGX_LINK_10M] = 10;
+	cgx_speed_mbps[CGX_LINK_100M] = 100;
+	cgx_speed_mbps[CGX_LINK_1G] = 1000;
+	cgx_speed_mbps[CGX_LINK_2HG] = 2500;
+	cgx_speed_mbps[CGX_LINK_5G] = 5000;
+	cgx_speed_mbps[CGX_LINK_10G] = 10000;
+	cgx_speed_mbps[CGX_LINK_20G] = 20000;
+	cgx_speed_mbps[CGX_LINK_25G] = 25000;
+	cgx_speed_mbps[CGX_LINK_40G] = 40000;
+	cgx_speed_mbps[CGX_LINK_50G] = 50000;
+	cgx_speed_mbps[CGX_LINK_100G] = 100000;
+
+	cgx_lmactype_string[LMAC_MODE_SGMII] = "SGMII";
+	cgx_lmactype_string[LMAC_MODE_XAUI] = "XAUI";
+	cgx_lmactype_string[LMAC_MODE_RXAUI] = "RXAUI";
+	cgx_lmactype_string[LMAC_MODE_10G_R] = "10G_R";
+	cgx_lmactype_string[LMAC_MODE_40G_R] = "40G_R";
+	cgx_lmactype_string[LMAC_MODE_QSGMII] = "QSGMII";
+	cgx_lmactype_string[LMAC_MODE_25G_R] = "25G_R";
+	cgx_lmactype_string[LMAC_MODE_50G_R] = "50G_R";
+	cgx_lmactype_string[LMAC_MODE_100G_R] = "100G_R";
+	cgx_lmactype_string[LMAC_MODE_USXGMII] = "USXGMII";
+}
+
+static inline void link_status_user_format(u64 lstat,
+					   struct cgx_link_user_info *linfo,
+					   struct cgx *cgx, u8 lmac_id)
+{
+	char *lmac_string;
+
+	linfo->link_up = FIELD_GET(RESP_LINKSTAT_UP, lstat);
+	linfo->full_duplex = FIELD_GET(RESP_LINKSTAT_FDUPLEX, lstat);
+	linfo->speed = cgx_speed_mbps[FIELD_GET(RESP_LINKSTAT_SPEED, lstat)];
+	linfo->lmac_type_id = cgx_get_lmac_type(cgx, lmac_id);
+	lmac_string = cgx_lmactype_string[linfo->lmac_type_id];
+	strncpy(linfo->lmac_type, lmac_string, LMACTYPE_STR_LEN - 1);
+}
+
 /* Hardware event handlers */
 static inline void cgx_link_change_handler(u64 lstat,
 					   struct lmac *lmac)
 {
+	struct cgx_link_user_info *linfo;
 	struct cgx *cgx = lmac->cgx;
 	struct cgx_link_event event;
 	struct device *dev;
+	int err_type;
 
 	dev = &cgx->pdev->dev;
 
-	event.lstat.link_up = FIELD_GET(RESP_LINKSTAT_UP, lstat);
-	event.lstat.full_duplex = FIELD_GET(RESP_LINKSTAT_FDUPLEX, lstat);
-	event.lstat.speed = FIELD_GET(RESP_LINKSTAT_SPEED, lstat);
-	event.lstat.err_type = FIELD_GET(RESP_LINKSTAT_ERRTYPE, lstat);
+	link_status_user_format(lstat, &event.link_uinfo, cgx, lmac->lmac_id);
+	err_type = FIELD_GET(RESP_LINKSTAT_ERRTYPE, lstat);
 
 	event.cgx_id = cgx->cgx_id;
 	event.lmac_id = lmac->lmac_id;
 
+	/* update the local copy of link status */
+	lmac->link_info = event.link_uinfo;
+	linfo = &lmac->link_info;
+
 	if (!lmac->event_cb.notify_link_chg) {
 		dev_dbg(dev, "cgx port %d:%d Link change handler null",
 			cgx->cgx_id, lmac->lmac_id);
-		if (event.lstat.err_type != CGX_ERR_NONE) {
+		if (err_type != CGX_ERR_NONE) {
 			dev_err(dev, "cgx port %d:%d Link error %d\n",
-				cgx->cgx_id, lmac->lmac_id,
-				event.lstat.err_type);
+				cgx->cgx_id, lmac->lmac_id, err_type);
 		}
-		dev_info(dev, "cgx port %d:%d Link status %s, speed %x\n",
+		dev_info(dev, "cgx port %d:%d Link is %s %d Mbps\n",
 			 cgx->cgx_id, lmac->lmac_id,
-			event.lstat.link_up ? "UP" : "DOWN",
-			event.lstat.speed);
+			 linfo->link_up ? "UP" : "DOWN", linfo->speed);
 		return;
 	}
 
@@ -448,6 +670,8 @@ static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	list_add(&cgx->cgx_list, &cgx_list);
 	cgx->cgx_id = cgx_get_cgx_cnt() - 1;
 
+	cgx_link_usertable_init();
+
 	err = cgx_lmac_init(cgx);
 	if (err)
 		goto err_release_lmac;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
index a2a7a6d72d32..ada25ed0765b 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
@@ -11,6 +11,7 @@
 #ifndef CGX_H
 #define CGX_H
 
+#include "mbox.h"
 #include "cgx_fw_if.h"
 
  /* PCI device IDs */
@@ -24,14 +25,35 @@
 #define CGX_OFFSET(x)			((x) * MAX_LMAC_PER_CGX)
 
 /* Registers */
+#define CGXX_CMRX_CFG			0x00
+#define  CMR_EN					BIT_ULL(55)
+#define  DATA_PKT_TX_EN				BIT_ULL(53)
+#define  DATA_PKT_RX_EN				BIT_ULL(54)
+#define  CGX_LMAC_TYPE_SHIFT			40
+#define  CGX_LMAC_TYPE_MASK			0xF
 #define CGXX_CMRX_INT			0x040
 #define  FW_CGX_INT				BIT_ULL(1)
 #define CGXX_CMRX_INT_ENA_W1S		0x058
 #define CGXX_CMRX_RX_ID_MAP		0x060
+#define CGXX_CMRX_RX_STAT0		0x070
 #define CGXX_CMRX_RX_LMACS		0x128
+#define CGXX_CMRX_RX_DMAC_CTL0		0x1F8
+#define  CGX_DMAC_CTL0_CAM_ENABLE		BIT_ULL(3)
+#define  CGX_DMAC_CAM_ACCEPT			BIT_ULL(3)
+#define  CGX_DMAC_MCAST_MODE			BIT_ULL(1)
+#define  CGX_DMAC_BCAST_MODE			BIT_ULL(0)
+#define CGXX_CMRX_RX_DMAC_CAM0		0x200
+#define  CGX_DMAC_CAM_ADDR_ENABLE		BIT_ULL(48)
+#define CGXX_CMRX_RX_DMAC_CAM1		0x400
+#define CGX_RX_DMAC_ADR_MASK			GENMASK_ULL(47, 0)
+#define CGXX_CMRX_TX_STAT0		0x700
 #define CGXX_SCRATCH0_REG		0x1050
 #define CGXX_SCRATCH1_REG		0x1058
 #define CGX_CONST			0x2000
+#define CGXX_SPUX_CONTROL1		0x10000
+#define  CGXX_SPUX_CONTROL1_LBK			BIT_ULL(14)
+#define CGXX_GMP_PCS_MRX_CTL		0x30000
+#define  CGXX_GMP_PCS_MRX_CTL_LBK		BIT_ULL(14)
 
 #define CGX_COMMAND_REG			CGXX_SCRATCH1_REG
 #define CGX_EVENT_REG			CGXX_SCRATCH0_REG
@@ -40,8 +62,22 @@
 #define CGX_NVEC			37
 #define CGX_LMAC_FWI			0
 
+enum LMAC_TYPE {
+	LMAC_MODE_SGMII		= 0,
+	LMAC_MODE_XAUI		= 1,
+	LMAC_MODE_RXAUI		= 2,
+	LMAC_MODE_10G_R		= 3,
+	LMAC_MODE_40G_R		= 4,
+	LMAC_MODE_QSGMII	= 6,
+	LMAC_MODE_25G_R		= 7,
+	LMAC_MODE_50G_R		= 8,
+	LMAC_MODE_100G_R	= 9,
+	LMAC_MODE_USXGMII	= 10,
+	LMAC_MODE_MAX,
+};
+
 struct cgx_link_event {
-	struct cgx_lnk_sts lstat;
+	struct cgx_link_user_info link_uinfo;
 	u8 cgx_id;
 	u8 lmac_id;
 };
@@ -62,4 +98,13 @@ int cgx_get_cgx_cnt(void);
 int cgx_get_lmac_cnt(void *cgxd);
 void *cgx_get_pdata(int cgx_id);
 int cgx_lmac_evh_register(struct cgx_event_cb *cb, void *cgxd, int lmac_id);
+int cgx_get_tx_stats(void *cgxd, int lmac_id, int idx, u64 *tx_stat);
+int cgx_get_rx_stats(void *cgxd, int lmac_id, int idx, u64 *rx_stat);
+int cgx_lmac_rx_tx_enable(void *cgxd, int lmac_id, bool enable);
+int cgx_lmac_addr_set(u8 cgx_id, u8 lmac_id, u8 *mac_addr);
+u64 cgx_lmac_addr_get(u8 cgx_id, u8 lmac_id);
+void cgx_lmac_promisc_config(int cgx_id, int lmac_id, bool enable);
+int cgx_lmac_internal_loopback(void *cgxd, int lmac_id, bool enable);
+int cgx_get_link_info(void *cgxd, int lmac_id,
+		      struct cgx_link_user_info *linfo);
 #endif /* CGX_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/common.h b/drivers/net/ethernet/marvell/octeontx2/af/common.h
new file mode 100644
index 000000000000..28eb691185f4
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/af/common.h
@@ -0,0 +1,161 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Marvell OcteonTx2 RVU Admin Function driver
+ *
+ * Copyright (C) 2018 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef COMMON_H
+#define COMMON_H
+
+#include "rvu_struct.h"
+
+#define OTX2_ALIGN			128  /* Align to cacheline */
+
+#define Q_SIZE_16		0ULL /* 16 entries */
+#define Q_SIZE_64		1ULL /* 64 entries */
+#define Q_SIZE_256		2ULL
+#define Q_SIZE_1K		3ULL
+#define Q_SIZE_4K		4ULL
+#define Q_SIZE_16K		5ULL
+#define Q_SIZE_64K		6ULL
+#define Q_SIZE_256K		7ULL
+#define Q_SIZE_1M		8ULL /* Million entries */
+#define Q_SIZE_MIN		Q_SIZE_16
+#define Q_SIZE_MAX		Q_SIZE_1M
+
+#define Q_COUNT(x)		(16ULL << (2 * x))
+#define Q_SIZE(x, n)		((ilog2(x) - (n)) / 2)
+
+/* Admin queue info */
+
+/* Since we intend to add only one instruction at a time,
+ * keep queue size to it's minimum.
+ */
+#define AQ_SIZE			Q_SIZE_16
+/* HW head & tail pointer mask */
+#define AQ_PTR_MASK		0xFFFFF
+
+struct qmem {
+	void            *base;
+	dma_addr_t	iova;
+	int		alloc_sz;
+	u8		entry_sz;
+	u8		align;
+	u32		qsize;
+};
+
+static inline int qmem_alloc(struct device *dev, struct qmem **q,
+			     int qsize, int entry_sz)
+{
+	struct qmem *qmem;
+	int aligned_addr;
+
+	if (!qsize)
+		return -EINVAL;
+
+	*q = devm_kzalloc(dev, sizeof(*qmem), GFP_KERNEL);
+	if (!*q)
+		return -ENOMEM;
+	qmem = *q;
+
+	qmem->entry_sz = entry_sz;
+	qmem->alloc_sz = (qsize * entry_sz) + OTX2_ALIGN;
+	qmem->base = dma_zalloc_coherent(dev, qmem->alloc_sz,
+					 &qmem->iova, GFP_KERNEL);
+	if (!qmem->base)
+		return -ENOMEM;
+
+	qmem->qsize = qsize;
+
+	aligned_addr = ALIGN((u64)qmem->iova, OTX2_ALIGN);
+	qmem->align = (aligned_addr - qmem->iova);
+	qmem->base += qmem->align;
+	qmem->iova += qmem->align;
+	return 0;
+}
+
+static inline void qmem_free(struct device *dev, struct qmem *qmem)
+{
+	if (!qmem)
+		return;
+
+	if (qmem->base)
+		dma_free_coherent(dev, qmem->alloc_sz,
+				  qmem->base - qmem->align,
+				  qmem->iova - qmem->align);
+	devm_kfree(dev, qmem);
+}
+
+struct admin_queue {
+	struct qmem	*inst;
+	struct qmem	*res;
+	spinlock_t	lock; /* Serialize inst enqueue from PFs */
+};
+
+/* NPA aura count */
+enum npa_aura_sz {
+	NPA_AURA_SZ_0,
+	NPA_AURA_SZ_128,
+	NPA_AURA_SZ_256,
+	NPA_AURA_SZ_512,
+	NPA_AURA_SZ_1K,
+	NPA_AURA_SZ_2K,
+	NPA_AURA_SZ_4K,
+	NPA_AURA_SZ_8K,
+	NPA_AURA_SZ_16K,
+	NPA_AURA_SZ_32K,
+	NPA_AURA_SZ_64K,
+	NPA_AURA_SZ_128K,
+	NPA_AURA_SZ_256K,
+	NPA_AURA_SZ_512K,
+	NPA_AURA_SZ_1M,
+	NPA_AURA_SZ_MAX,
+};
+
+#define NPA_AURA_COUNT(x)	(1ULL << ((x) + 6))
+
+/* NPA AQ result structure for init/read/write of aura HW contexts */
+struct npa_aq_aura_res {
+	struct	npa_aq_res_s	res;
+	struct	npa_aura_s	aura_ctx;
+	struct	npa_aura_s	ctx_mask;
+};
+
+/* NPA AQ result structure for init/read/write of pool HW contexts */
+struct npa_aq_pool_res {
+	struct	npa_aq_res_s	res;
+	struct	npa_pool_s	pool_ctx;
+	struct	npa_pool_s	ctx_mask;
+};
+
+/* NIX Transmit schedulers */
+enum nix_scheduler {
+	NIX_TXSCH_LVL_SMQ = 0x0,
+	NIX_TXSCH_LVL_MDQ = 0x0,
+	NIX_TXSCH_LVL_TL4 = 0x1,
+	NIX_TXSCH_LVL_TL3 = 0x2,
+	NIX_TXSCH_LVL_TL2 = 0x3,
+	NIX_TXSCH_LVL_TL1 = 0x4,
+	NIX_TXSCH_LVL_CNT = 0x5,
+};
+
+/* NIX LSO format indices.
+ * As of now TSO is the only one using, so statically assigning indices.
+ */
+#define NIX_LSO_FORMAT_IDX_TSOV4	0
+#define NIX_LSO_FORMAT_IDX_TSOV6	1
+
+/* RSS info */
+#define MAX_RSS_GROUPS			8
+/* Group 0 has to be used in default pkt forwarding MCAM entries
+ * reserved for NIXLFs. Groups 1-7 can be used for RSS for ntuple
+ * filters.
+ */
+#define DEFAULT_RSS_CONTEXT_GROUP	0
+#define MAX_RSS_INDIR_TBL_SIZE		256 /* 1 << Max adder bits */
+
+#endif /* COMMON_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
index bedf0ee62450..c339024d04e0 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
@@ -124,21 +124,50 @@ M(ATTACH_RESOURCES,	0x002, rsrc_attach, msg_rsp)			\
 M(DETACH_RESOURCES,	0x003, rsrc_detach, msg_rsp)			\
 M(MSIX_OFFSET,		0x004, msg_req, msix_offset_rsp)		\
 /* CGX mbox IDs (range 0x200 - 0x3FF) */				\
+M(CGX_START_RXTX,	0x200, msg_req, msg_rsp)			\
+M(CGX_STOP_RXTX,	0x201, msg_req, msg_rsp)			\
+M(CGX_STATS,		0x202, msg_req, cgx_stats_rsp)			\
+M(CGX_MAC_ADDR_SET,	0x203, cgx_mac_addr_set_or_get,			\
+				cgx_mac_addr_set_or_get)		\
+M(CGX_MAC_ADDR_GET,	0x204, cgx_mac_addr_set_or_get,			\
+				cgx_mac_addr_set_or_get)		\
+M(CGX_PROMISC_ENABLE,	0x205, msg_req, msg_rsp)			\
+M(CGX_PROMISC_DISABLE,	0x206, msg_req, msg_rsp)			\
+M(CGX_START_LINKEVENTS, 0x207, msg_req, msg_rsp)			\
+M(CGX_STOP_LINKEVENTS,	0x208, msg_req, msg_rsp)			\
+M(CGX_GET_LINKINFO,	0x209, msg_req, cgx_link_info_msg)		\
+M(CGX_INTLBK_ENABLE,	0x20A, msg_req, msg_rsp)			\
+M(CGX_INTLBK_DISABLE,	0x20B, msg_req, msg_rsp)			\
 /* NPA mbox IDs (range 0x400 - 0x5FF) */				\
+M(NPA_LF_ALLOC,		0x400, npa_lf_alloc_req, npa_lf_alloc_rsp)	\
+M(NPA_LF_FREE,		0x401, msg_req, msg_rsp)			\
+M(NPA_AQ_ENQ,		0x402, npa_aq_enq_req, npa_aq_enq_rsp)		\
+M(NPA_HWCTX_DISABLE,	0x403, hwctx_disable_req, msg_rsp)		\
 /* SSO/SSOW mbox IDs (range 0x600 - 0x7FF) */				\
 /* TIM mbox IDs (range 0x800 - 0x9FF) */				\
 /* CPT mbox IDs (range 0xA00 - 0xBFF) */				\
 /* NPC mbox IDs (range 0x6000 - 0x7FFF) */				\
 /* NIX mbox IDs (range 0x8000 - 0xFFFF) */				\
+M(NIX_LF_ALLOC,		0x8000, nix_lf_alloc_req, nix_lf_alloc_rsp)	\
+M(NIX_LF_FREE,		0x8001, msg_req, msg_rsp)			\
+M(NIX_AQ_ENQ,		0x8002, nix_aq_enq_req, nix_aq_enq_rsp)		\
+M(NIX_HWCTX_DISABLE,	0x8003, hwctx_disable_req, msg_rsp)
+
+/* Messages initiated by AF (range 0xC00 - 0xDFF) */
+#define MBOX_UP_CGX_MESSAGES						\
+M(CGX_LINK_EVENT,		0xC00, cgx_link_info_msg, msg_rsp)
 
 enum {
 #define M(_name, _id, _1, _2) MBOX_MSG_ ## _name = _id,
 MBOX_MESSAGES
+MBOX_UP_CGX_MESSAGES
 #undef M
 };
 
 /* Mailbox message formats */
 
+#define RVU_DEFAULT_PF_FUNC     0xFFFF
+
 /* Generic request msg used for those mbox messages which
  * don't send any data in the request.
  */
@@ -208,4 +237,181 @@ struct msix_offset_rsp {
 	u16  cptlf_msixoff[MAX_RVU_BLKLF_CNT];
 };
 
+/* CGX mbox message formats */
+
+struct cgx_stats_rsp {
+	struct mbox_msghdr hdr;
+#define CGX_RX_STATS_COUNT	13
+#define CGX_TX_STATS_COUNT	18
+	u64 rx_stats[CGX_RX_STATS_COUNT];
+	u64 tx_stats[CGX_TX_STATS_COUNT];
+};
+
+/* Structure for requesting the operation for
+ * setting/getting mac address in the CGX interface
+ */
+struct cgx_mac_addr_set_or_get {
+	struct mbox_msghdr hdr;
+	u8 mac_addr[ETH_ALEN];
+};
+
+struct cgx_link_user_info {
+	uint64_t link_up:1;
+	uint64_t full_duplex:1;
+	uint64_t lmac_type_id:4;
+	uint64_t speed:20; /* speed in Mbps */
+#define LMACTYPE_STR_LEN 16
+	char lmac_type[LMACTYPE_STR_LEN];
+};
+
+struct cgx_link_info_msg {
+	struct mbox_msghdr hdr;
+	struct cgx_link_user_info link_info;
+};
+
+/* NPA mbox message formats */
+
+/* NPA mailbox error codes
+ * Range 301 - 400.
+ */
+enum npa_af_status {
+	NPA_AF_ERR_PARAM            = -301,
+	NPA_AF_ERR_AQ_FULL          = -302,
+	NPA_AF_ERR_AQ_ENQUEUE       = -303,
+	NPA_AF_ERR_AF_LF_INVALID    = -304,
+	NPA_AF_ERR_AF_LF_ALLOC      = -305,
+	NPA_AF_ERR_LF_RESET         = -306,
+};
+
+/* For NPA LF context alloc and init */
+struct npa_lf_alloc_req {
+	struct mbox_msghdr hdr;
+	int node;
+	int aura_sz;  /* No of auras */
+	u32 nr_pools; /* No of pools */
+};
+
+struct npa_lf_alloc_rsp {
+	struct mbox_msghdr hdr;
+	u32 stack_pg_ptrs;  /* No of ptrs per stack page */
+	u32 stack_pg_bytes; /* Size of stack page */
+	u16 qints; /* NPA_AF_CONST::QINTS */
+};
+
+/* NPA AQ enqueue msg */
+struct npa_aq_enq_req {
+	struct mbox_msghdr hdr;
+	u32 aura_id;
+	u8 ctype;
+	u8 op;
+	union {
+		/* Valid when op == WRITE/INIT and ctype == AURA.
+		 * LF fills the pool_id in aura.pool_addr. AF will translate
+		 * the pool_id to pool context pointer.
+		 */
+		struct npa_aura_s aura;
+		/* Valid when op == WRITE/INIT and ctype == POOL */
+		struct npa_pool_s pool;
+	};
+	/* Mask data when op == WRITE (1=write, 0=don't write) */
+	union {
+		/* Valid when op == WRITE and ctype == AURA */
+		struct npa_aura_s aura_mask;
+		/* Valid when op == WRITE and ctype == POOL */
+		struct npa_pool_s pool_mask;
+	};
+};
+
+struct npa_aq_enq_rsp {
+	struct mbox_msghdr hdr;
+	union {
+		/* Valid when op == READ and ctype == AURA */
+		struct npa_aura_s aura;
+		/* Valid when op == READ and ctype == POOL */
+		struct npa_pool_s pool;
+	};
+};
+
+/* Disable all contexts of type 'ctype' */
+struct hwctx_disable_req {
+	struct mbox_msghdr hdr;
+	u8 ctype;
+};
+
+/* NIX mailbox error codes
+ * Range 401 - 500.
+ */
+enum nix_af_status {
+	NIX_AF_ERR_PARAM            = -401,
+	NIX_AF_ERR_AQ_FULL          = -402,
+	NIX_AF_ERR_AQ_ENQUEUE       = -403,
+	NIX_AF_ERR_AF_LF_INVALID    = -404,
+	NIX_AF_ERR_AF_LF_ALLOC      = -405,
+	NIX_AF_ERR_TLX_ALLOC_FAIL   = -406,
+	NIX_AF_ERR_TLX_INVALID      = -407,
+	NIX_AF_ERR_RSS_SIZE_INVALID = -408,
+	NIX_AF_ERR_RSS_GRPS_INVALID = -409,
+	NIX_AF_ERR_FRS_INVALID      = -410,
+	NIX_AF_ERR_RX_LINK_INVALID  = -411,
+	NIX_AF_INVAL_TXSCHQ_CFG     = -412,
+	NIX_AF_SMQ_FLUSH_FAILED     = -413,
+	NIX_AF_ERR_LF_RESET         = -414,
+};
+
+/* For NIX LF context alloc and init */
+struct nix_lf_alloc_req {
+	struct mbox_msghdr hdr;
+	int node;
+	u32 rq_cnt;   /* No of receive queues */
+	u32 sq_cnt;   /* No of send queues */
+	u32 cq_cnt;   /* No of completion queues */
+	u8  xqe_sz;
+	u16 rss_sz;
+	u8  rss_grps;
+	u16 npa_func;
+	u16 sso_func;
+	u64 rx_cfg;   /* See NIX_AF_LF(0..127)_RX_CFG */
+};
+
+struct nix_lf_alloc_rsp {
+	struct mbox_msghdr hdr;
+	u16	sqb_size;
+	u8	lso_tsov4_idx;
+	u8	lso_tsov6_idx;
+	u8      mac_addr[ETH_ALEN];
+};
+
+/* NIX AQ enqueue msg */
+struct nix_aq_enq_req {
+	struct mbox_msghdr hdr;
+	u32  qidx;
+	u8 ctype;
+	u8 op;
+	union {
+		struct nix_rq_ctx_s rq;
+		struct nix_sq_ctx_s sq;
+		struct nix_cq_ctx_s cq;
+		struct nix_rsse_s   rss;
+		struct nix_rx_mce_s mce;
+	};
+	union {
+		struct nix_rq_ctx_s rq_mask;
+		struct nix_sq_ctx_s sq_mask;
+		struct nix_cq_ctx_s cq_mask;
+		struct nix_rsse_s   rss_mask;
+		struct nix_rx_mce_s mce_mask;
+	};
+};
+
+struct nix_aq_enq_rsp {
+	struct mbox_msghdr hdr;
+	union {
+		struct nix_rq_ctx_s rq;
+		struct nix_sq_ctx_s sq;
+		struct nix_cq_ctx_s cq;
+		struct nix_rsse_s   rss;
+		struct nix_rx_mce_s mce;
+	};
+};
+
 #endif /* MBOX_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
index 2033f42c3226..c06cca9c9e1a 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
@@ -47,18 +47,18 @@ MODULE_DEVICE_TABLE(pci, rvu_id_table);
  */
 int rvu_poll_reg(struct rvu *rvu, u64 block, u64 offset, u64 mask, bool zero)
 {
+	unsigned long timeout = jiffies + usecs_to_jiffies(100);
 	void __iomem *reg;
-	int timeout = 100;
 	u64 reg_val;
 
 	reg = rvu->afreg_base + ((block << 28) | offset);
-	while (timeout) {
+	while (time_before(jiffies, timeout)) {
 		reg_val = readq(reg);
 		if (zero && !(reg_val & mask))
 			return 0;
 		if (!zero && (reg_val & mask))
 			return 0;
-		usleep_range(1, 2);
+		usleep_range(1, 5);
 		timeout--;
 	}
 	return -EBUSY;
@@ -361,6 +361,19 @@ static void rvu_check_block_implemented(struct rvu *rvu)
 	}
 }
 
+int rvu_lf_reset(struct rvu *rvu, struct rvu_block *block, int lf)
+{
+	int err;
+
+	if (!block->implemented)
+		return 0;
+
+	rvu_write64(rvu, block->addr, block->lfreset_reg, lf | BIT_ULL(12));
+	err = rvu_poll_reg(rvu, block->addr, block->lfreset_reg, BIT_ULL(12),
+			   true);
+	return err;
+}
+
 static void rvu_block_reset(struct rvu *rvu, int blkaddr, u64 rst_reg)
 {
 	struct rvu_block *block = &rvu->hw->block[blkaddr];
@@ -552,6 +565,9 @@ static void rvu_free_hw_resources(struct rvu *rvu)
 	int id, max_msix;
 	u64 cfg;
 
+	rvu_npa_freemem(rvu);
+	rvu_nix_freemem(rvu);
+
 	/* Free block LF bitmaps */
 	for (id = 0; id < BLK_COUNT; id++) {
 		block = &hw->block[id];
@@ -755,6 +771,54 @@ init:
 		rvu_scan_block(rvu, block);
 	}
 
+	err = rvu_npa_init(rvu);
+	if (err)
+		return err;
+
+	err = rvu_nix_init(rvu);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+/* NPA and NIX admin queue APIs */
+void rvu_aq_free(struct rvu *rvu, struct admin_queue *aq)
+{
+	if (!aq)
+		return;
+
+	qmem_free(rvu->dev, aq->inst);
+	qmem_free(rvu->dev, aq->res);
+	devm_kfree(rvu->dev, aq);
+}
+
+int rvu_aq_alloc(struct rvu *rvu, struct admin_queue **ad_queue,
+		 int qsize, int inst_size, int res_size)
+{
+	struct admin_queue *aq;
+	int err;
+
+	*ad_queue = devm_kzalloc(rvu->dev, sizeof(*aq), GFP_KERNEL);
+	if (!*ad_queue)
+		return -ENOMEM;
+	aq = *ad_queue;
+
+	/* Alloc memory for instructions i.e AQ */
+	err = qmem_alloc(rvu->dev, &aq->inst, qsize, inst_size);
+	if (err) {
+		devm_kfree(rvu->dev, aq);
+		return err;
+	}
+
+	/* Alloc memory for results */
+	err = qmem_alloc(rvu->dev, &aq->res, qsize, res_size);
+	if (err) {
+		rvu_aq_free(rvu, aq);
+		return err;
+	}
+
+	spin_lock_init(&aq->lock);
 	return 0;
 }
 
@@ -1316,6 +1380,63 @@ static void rvu_mbox_handler(struct work_struct *work)
 	otx2_mbox_msg_send(mbox, pf);
 }
 
+static void rvu_mbox_up_handler(struct work_struct *work)
+{
+	struct rvu_work *mwork = container_of(work, struct rvu_work, work);
+	struct rvu *rvu = mwork->rvu;
+	struct otx2_mbox_dev *mdev;
+	struct mbox_hdr *rsp_hdr;
+	struct mbox_msghdr *msg;
+	struct otx2_mbox *mbox;
+	int offset, id;
+	u16 pf;
+
+	mbox = &rvu->mbox_up;
+	pf = mwork - rvu->mbox_wrk_up;
+	mdev = &mbox->dev[pf];
+
+	rsp_hdr = mdev->mbase + mbox->rx_start;
+	if (rsp_hdr->num_msgs == 0) {
+		dev_warn(rvu->dev, "mbox up handler: num_msgs = 0\n");
+		return;
+	}
+
+	offset = mbox->rx_start + ALIGN(sizeof(*rsp_hdr), MBOX_MSG_ALIGN);
+
+	for (id = 0; id < rsp_hdr->num_msgs; id++) {
+		msg = mdev->mbase + offset;
+
+		if (msg->id >= MBOX_MSG_MAX) {
+			dev_err(rvu->dev,
+				"Mbox msg with unknown ID 0x%x\n", msg->id);
+			goto end;
+		}
+
+		if (msg->sig != OTX2_MBOX_RSP_SIG) {
+			dev_err(rvu->dev,
+				"Mbox msg with wrong signature %x, ID 0x%x\n",
+				msg->sig, msg->id);
+			goto end;
+		}
+
+		switch (msg->id) {
+		case MBOX_MSG_CGX_LINK_EVENT:
+			break;
+		default:
+			if (msg->rc)
+				dev_err(rvu->dev,
+					"Mbox msg response has err %d, ID 0x%x\n",
+					msg->rc, msg->id);
+			break;
+		}
+end:
+		offset = mbox->rx_start + msg->next_msgoff;
+		mdev->msgs_acked++;
+	}
+
+	otx2_mbox_reset(mbox, 0);
+}
+
 static int rvu_mbox_init(struct rvu *rvu)
 {
 	struct rvu_hwinfo *hw = rvu->hw;
@@ -1337,6 +1458,13 @@ static int rvu_mbox_init(struct rvu *rvu)
 		goto exit;
 	}
 
+	rvu->mbox_wrk_up = devm_kcalloc(rvu->dev, hw->total_pfs,
+					sizeof(struct rvu_work), GFP_KERNEL);
+	if (!rvu->mbox_wrk_up) {
+		err = -ENOMEM;
+		goto exit;
+	}
+
 	/* Map mbox region shared with PFs */
 	bar4_addr = rvu_read64(rvu, BLKADDR_RVUM, RVU_AF_PF_BAR4_ADDR);
 	/* Mailbox is a reserved memory (in RAM) region shared between
@@ -1355,12 +1483,23 @@ static int rvu_mbox_init(struct rvu *rvu)
 	if (err)
 		goto exit;
 
+	err = otx2_mbox_init(&rvu->mbox_up, hwbase, rvu->pdev, rvu->afreg_base,
+			     MBOX_DIR_AFPF_UP, hw->total_pfs);
+	if (err)
+		goto exit;
+
 	for (pf = 0; pf < hw->total_pfs; pf++) {
 		mwork = &rvu->mbox_wrk[pf];
 		mwork->rvu = rvu;
 		INIT_WORK(&mwork->work, rvu_mbox_handler);
 	}
 
+	for (pf = 0; pf < hw->total_pfs; pf++) {
+		mwork = &rvu->mbox_wrk_up[pf];
+		mwork->rvu = rvu;
+		INIT_WORK(&mwork->work, rvu_mbox_up_handler);
+	}
+
 	return 0;
 exit:
 	if (hwbase)
@@ -1381,6 +1520,7 @@ static void rvu_mbox_destroy(struct rvu *rvu)
 		iounmap((void __iomem *)rvu->mbox.hwbase);
 
 	otx2_mbox_destroy(&rvu->mbox);
+	otx2_mbox_destroy(&rvu->mbox_up);
 }
 
 static irqreturn_t rvu_mbox_intr_handler(int irq, void *rvu_irq)
@@ -1407,6 +1547,12 @@ static irqreturn_t rvu_mbox_intr_handler(int irq, void *rvu_irq)
 			if (hdr->num_msgs)
 				queue_work(rvu->mbox_wq,
 					   &rvu->mbox_wrk[pf].work);
+			mbox = &rvu->mbox_up;
+			mdev = &mbox->dev[pf];
+			hdr = mdev->mbase + mbox->rx_start;
+			if (hdr->num_msgs)
+				queue_work(rvu->mbox_wq,
+					   &rvu->mbox_wrk_up[pf].work);
 		}
 	}
 
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index d169fa9eb45e..b48b5af83f1d 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -12,6 +12,7 @@
 #define RVU_H
 
 #include "rvu_struct.h"
+#include "common.h"
 #include "mbox.h"
 
 /* PCI device IDs */
@@ -41,7 +42,8 @@ struct rsrc_bmap {
 };
 
 struct rvu_block {
-	struct rsrc_bmap lf;
+	struct rsrc_bmap	lf;
+	struct admin_queue	*aq; /* NIX/NPA AQ */
 	u16  *fn_map; /* LF to pcifunc mapping */
 	bool multislot;
 	bool implemented;
@@ -70,14 +72,50 @@ struct rvu_pfvf {
 	struct rsrc_bmap msix;      /* Bitmap for MSIX vector alloc */
 #define MSIX_BLKLF(blkaddr, lf) (((blkaddr) << 8) | ((lf) & 0xFF))
 	u16		 *msix_lfmap; /* Vector to block LF mapping */
+
+	/* NPA contexts */
+	struct qmem	*aura_ctx;
+	struct qmem	*pool_ctx;
+	struct qmem	*npa_qints_ctx;
+	unsigned long	*aura_bmap;
+	unsigned long	*pool_bmap;
+
+	/* NIX contexts */
+	struct qmem	*rq_ctx;
+	struct qmem	*sq_ctx;
+	struct qmem	*cq_ctx;
+	struct qmem	*rss_ctx;
+	struct qmem	*cq_ints_ctx;
+	struct qmem	*nix_qints_ctx;
+	unsigned long	*sq_bmap;
+	unsigned long	*rq_bmap;
+	unsigned long	*cq_bmap;
+
+	u8		mac_addr[ETH_ALEN]; /* MAC address of this PF/VF */
+};
+
+struct nix_txsch {
+	struct rsrc_bmap schq;
+	u8   lvl;
+	u16  *pfvf_map;
+};
+
+struct nix_hw {
+	struct nix_txsch txsch[NIX_TXSCH_LVL_CNT]; /* Tx schedulers */
 };
 
 struct rvu_hwinfo {
 	u8	total_pfs;   /* MAX RVU PFs HW supports */
 	u16	total_vfs;   /* Max RVU VFs HW supports */
 	u16	max_vfs_per_pf; /* Max VFs that can be attached to a PF */
+	u8	cgx;
+	u8	lmac_per_cgx;
+	u8	cgx_links;
+	u8	lbk_links;
+	u8	sdp_links;
 
 	struct rvu_block block[BLK_COUNT]; /* Block info */
+	struct nix_hw    *nix0;
 };
 
 struct rvu {
@@ -93,6 +131,8 @@ struct rvu {
 	/* Mbox */
 	struct otx2_mbox	mbox;
 	struct rvu_work		*mbox_wrk;
+	struct otx2_mbox        mbox_up;
+	struct rvu_work		*mbox_wrk_up;
 	struct workqueue_struct *mbox_wq;
 
 	/* MSI-X */
@@ -109,6 +149,7 @@ struct rvu {
 	u16			*cgxlmac2pf_map; /* bitmap of mapped pfs for
 						  * every cgx lmac port
 						  */
+	unsigned long		pf_notify_bmap; /* Flags for PF notification */
 	void			**cgx_idmap; /* cgx id to cgx data map table */
 	struct			work_struct cgx_evh_work;
 	struct			workqueue_struct *cgx_evh_wq;
@@ -149,10 +190,84 @@ struct rvu_pfvf *rvu_get_pfvf(struct rvu *rvu, int pcifunc);
 void rvu_get_pf_numvfs(struct rvu *rvu, int pf, int *numvfs, int *hwvf);
 bool is_block_implemented(struct rvu_hwinfo *hw, int blkaddr);
 int rvu_get_lf(struct rvu *rvu, struct rvu_block *block, u16 pcifunc, u16 slot);
+int rvu_lf_reset(struct rvu *rvu, struct rvu_block *block, int lf);
 int rvu_get_blkaddr(struct rvu *rvu, int blktype, u16 pcifunc);
 int rvu_poll_reg(struct rvu *rvu, u64 block, u64 offset, u64 mask, bool zero);
 
+/* NPA/NIX AQ APIs */
+int rvu_aq_alloc(struct rvu *rvu, struct admin_queue **ad_queue,
+		 int qsize, int inst_size, int res_size);
+void rvu_aq_free(struct rvu *rvu, struct admin_queue *aq);
+
 /* CGX APIs */
+static inline bool is_pf_cgxmapped(struct rvu *rvu, u8 pf)
+{
+	return (pf >= PF_CGXMAP_BASE && pf <= rvu->cgx_mapped_pfs);
+}
+
+static inline void rvu_get_cgx_lmac_id(u8 map, u8 *cgx_id, u8 *lmac_id)
+{
+	*cgx_id = (map >> 4) & 0xF;
+	*lmac_id = (map & 0xF);
+}
+
 int rvu_cgx_probe(struct rvu *rvu);
 void rvu_cgx_wq_destroy(struct rvu *rvu);
+int rvu_cgx_config_rxtx(struct rvu *rvu, u16 pcifunc, bool start);
+int rvu_mbox_handler_CGX_START_RXTX(struct rvu *rvu, struct msg_req *req,
+				    struct msg_rsp *rsp);
+int rvu_mbox_handler_CGX_STOP_RXTX(struct rvu *rvu, struct msg_req *req,
+				   struct msg_rsp *rsp);
+int rvu_mbox_handler_CGX_STATS(struct rvu *rvu, struct msg_req *req,
+			       struct cgx_stats_rsp *rsp);
+int rvu_mbox_handler_CGX_MAC_ADDR_SET(struct rvu *rvu,
+				      struct cgx_mac_addr_set_or_get *req,
+				      struct cgx_mac_addr_set_or_get *rsp);
+int rvu_mbox_handler_CGX_MAC_ADDR_GET(struct rvu *rvu,
+				      struct cgx_mac_addr_set_or_get *req,
+				      struct cgx_mac_addr_set_or_get *rsp);
+int rvu_mbox_handler_CGX_PROMISC_ENABLE(struct rvu *rvu, struct msg_req *req,
+					struct msg_rsp *rsp);
+int rvu_mbox_handler_CGX_PROMISC_DISABLE(struct rvu *rvu, struct msg_req *req,
+					 struct msg_rsp *rsp);
+int rvu_mbox_handler_CGX_START_LINKEVENTS(struct rvu *rvu, struct msg_req *req,
+					  struct msg_rsp *rsp);
+int rvu_mbox_handler_CGX_STOP_LINKEVENTS(struct rvu *rvu, struct msg_req *req,
+					 struct msg_rsp *rsp);
+int rvu_mbox_handler_CGX_GET_LINKINFO(struct rvu *rvu, struct msg_req *req,
+				      struct cgx_link_info_msg *rsp);
+int rvu_mbox_handler_CGX_INTLBK_ENABLE(struct rvu *rvu, struct msg_req *req,
+				       struct msg_rsp *rsp);
+int rvu_mbox_handler_CGX_INTLBK_DISABLE(struct rvu *rvu, struct msg_req *req,
+					struct msg_rsp *rsp);
+
+/* NPA APIs */
+int rvu_npa_init(struct rvu *rvu);
+void rvu_npa_freemem(struct rvu *rvu);
+int rvu_mbox_handler_NPA_AQ_ENQ(struct rvu *rvu,
+				struct npa_aq_enq_req *req,
+				struct npa_aq_enq_rsp *rsp);
+int rvu_mbox_handler_NPA_HWCTX_DISABLE(struct rvu *rvu,
+				       struct hwctx_disable_req *req,
+				       struct msg_rsp *rsp);
+int rvu_mbox_handler_NPA_LF_ALLOC(struct rvu *rvu,
+				  struct npa_lf_alloc_req *req,
+				  struct npa_lf_alloc_rsp *rsp);
+int rvu_mbox_handler_NPA_LF_FREE(struct rvu *rvu, struct msg_req *req,
+				 struct msg_rsp *rsp);
+
+/* NIX APIs */
+int rvu_nix_init(struct rvu *rvu);
+void rvu_nix_freemem(struct rvu *rvu);
+int rvu_mbox_handler_NIX_LF_ALLOC(struct rvu *rvu,
+				  struct nix_lf_alloc_req *req,
+				  struct nix_lf_alloc_rsp *rsp);
+int rvu_mbox_handler_NIX_LF_FREE(struct rvu *rvu, struct msg_req *req,
+				 struct msg_rsp *rsp);
+int rvu_mbox_handler_NIX_AQ_ENQ(struct rvu *rvu,
+				struct nix_aq_enq_req *req,
+				struct nix_aq_enq_rsp *rsp);
+int rvu_mbox_handler_NIX_HWCTX_DISABLE(struct rvu *rvu,
+				       struct hwctx_disable_req *req,
+				       struct msg_rsp *rsp);
 #endif /* RVU_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
index 5ecc22308b22..e0aee2176637 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
@@ -20,6 +20,31 @@ struct cgx_evq_entry {
 	struct cgx_link_event link_event;
 };
 
+#define M(_name, _id, _req_type, _rsp_type)				\
+static struct _req_type __maybe_unused					\
+*otx2_mbox_alloc_msg_ ## _name(struct rvu *rvu, int devid)		\
+{									\
+	struct _req_type *req;						\
+									\
+	req = (struct _req_type *)otx2_mbox_alloc_msg_rsp(		\
+		&rvu->mbox_up, devid, sizeof(struct _req_type),		\
+		sizeof(struct _rsp_type));				\
+	if (!req)							\
+		return NULL;						\
+	req->hdr.sig = OTX2_MBOX_REQ_SIG;				\
+	req->hdr.id = _id;						\
+	return req;							\
+}
+
+MBOX_UP_CGX_MESSAGES
+#undef M
+
+/* Returns bitmap of mapped PFs */
+static inline u16 cgxlmac_to_pfmap(struct rvu *rvu, u8 cgx_id, u8 lmac_id)
+{
+	return rvu->cgxlmac2pf_map[CGX_OFFSET(cgx_id) + lmac_id];
+}
+
 static inline u8 cgxlmac_id_to_bmap(u8 cgx_id, u8 lmac_id)
 {
 	return ((cgx_id & 0xF) << 4) | (lmac_id & 0xF);
@@ -77,6 +102,34 @@ static int rvu_map_cgx_lmac_pf(struct rvu *rvu)
 	return 0;
 }
 
+static int rvu_cgx_send_link_info(int cgx_id, int lmac_id, struct rvu *rvu)
+{
+	struct cgx_evq_entry *qentry;
+	unsigned long flags;
+	int err;
+
+	qentry = kmalloc(sizeof(*qentry), GFP_KERNEL);
+	if (!qentry)
+		return -ENOMEM;
+
+	/* Lock the event queue before we read the local link status */
+	spin_lock_irqsave(&rvu->cgx_evq_lock, flags);
+	err = cgx_get_link_info(rvu_cgx_pdata(cgx_id, rvu), lmac_id,
+				&qentry->link_event.link_uinfo);
+	qentry->link_event.cgx_id = cgx_id;
+	qentry->link_event.lmac_id = lmac_id;
+	if (err)
+		goto skip_add;
+	list_add_tail(&qentry->evq_node, &rvu->cgx_evq_head);
+skip_add:
+	spin_unlock_irqrestore(&rvu->cgx_evq_lock, flags);
+
+	/* start worker to process the events */
+	queue_work(rvu->cgx_evh_wq, &rvu->cgx_evh_work);
+
+	return 0;
+}
+
 /* This is called from interrupt context and is expected to be atomic */
 static int cgx_lmac_postevent(struct cgx_link_event *event, void *data)
 {
@@ -98,6 +151,41 @@ static int cgx_lmac_postevent(struct cgx_link_event *event, void *data)
 	return 0;
 }
 
+static void cgx_notify_pfs(struct cgx_link_event *event, struct rvu *rvu)
+{
+	struct cgx_link_user_info *linfo;
+	struct cgx_link_info_msg *msg;
+	unsigned long pfmap;
+	int err, pfid;
+
+	linfo = &event->link_uinfo;
+	pfmap = cgxlmac_to_pfmap(rvu, event->cgx_id, event->lmac_id);
+
+	do {
+		pfid = find_first_bit(&pfmap, 16);
+		clear_bit(pfid, &pfmap);
+
+		/* check if notification is enabled */
+		if (!test_bit(pfid, &rvu->pf_notify_bmap)) {
+			dev_info(rvu->dev, "cgx %d: lmac %d Link status %s\n",
+				 event->cgx_id, event->lmac_id,
+				 linfo->link_up ? "UP" : "DOWN");
+			continue;
+		}
+
+		/* Send mbox message to PF */
+		msg = otx2_mbox_alloc_msg_CGX_LINK_EVENT(rvu, pfid);
+		if (!msg)
+			continue;
+		msg->link_info = *linfo;
+		otx2_mbox_msg_send(&rvu->mbox_up, pfid);
+		err = otx2_mbox_wait_for_rsp(&rvu->mbox_up, pfid);
+		if (err)
+			dev_warn(rvu->dev, "notification to pf %d failed\n",
+				 pfid);
+	} while (pfmap);
+}
+
 static void cgx_evhandler_task(struct work_struct *work)
 {
 	struct rvu *rvu = container_of(work, struct rvu, cgx_evh_work);
@@ -119,7 +207,8 @@ static void cgx_evhandler_task(struct work_struct *work)
 
 		event = &qentry->link_event;
 
-		/* Do nothing for now */
+		/* process event */
+		cgx_notify_pfs(event, rvu);
 		kfree(qentry);
 	} while (1);
 }
@@ -192,3 +281,232 @@ int rvu_cgx_probe(struct rvu *rvu)
 	cgx_lmac_event_handler_init(rvu);
 	return 0;
 }
+
+int rvu_cgx_config_rxtx(struct rvu *rvu, u16 pcifunc, bool start)
+{
+	int pf = rvu_get_pf(pcifunc);
+	u8 cgx_id, lmac_id;
+
+	/* This msg is expected only from PFs that are mapped to CGX LMACs,
+	 * if received from other PF/VF simply ACK, nothing to do.
+	 */
+	if ((pcifunc & RVU_PFVF_FUNC_MASK) || !is_pf_cgxmapped(rvu, pf))
+		return -ENODEV;
+
+	rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
+
+	cgx_lmac_rx_tx_enable(rvu_cgx_pdata(cgx_id, rvu), lmac_id, start);
+
+	return 0;
+}
+
+int rvu_mbox_handler_CGX_START_RXTX(struct rvu *rvu, struct msg_req *req,
+				    struct msg_rsp *rsp)
+{
+	rvu_cgx_config_rxtx(rvu, req->hdr.pcifunc, true);
+	return 0;
+}
+
+int rvu_mbox_handler_CGX_STOP_RXTX(struct rvu *rvu, struct msg_req *req,
+				   struct msg_rsp *rsp)
+{
+	rvu_cgx_config_rxtx(rvu, req->hdr.pcifunc, false);
+	return 0;
+}
+
+int rvu_mbox_handler_CGX_STATS(struct rvu *rvu, struct msg_req *req,
+			       struct cgx_stats_rsp *rsp)
+{
+	int pf = rvu_get_pf(req->hdr.pcifunc);
+	int stat = 0, err = 0;
+	u64 tx_stat, rx_stat;
+	u8 cgx_idx, lmac;
+	void *cgxd;
+
+	if ((req->hdr.pcifunc & RVU_PFVF_FUNC_MASK) ||
+	    !is_pf_cgxmapped(rvu, pf))
+		return -ENODEV;
+
+	rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_idx, &lmac);
+	cgxd = rvu_cgx_pdata(cgx_idx, rvu);
+
+	/* Rx stats */
+	while (stat < CGX_RX_STATS_COUNT) {
+		err = cgx_get_rx_stats(cgxd, lmac, stat, &rx_stat);
+		if (err)
+			return err;
+		rsp->rx_stats[stat] = rx_stat;
+		stat++;
+	}
+
+	/* Tx stats */
+	stat = 0;
+	while (stat < CGX_TX_STATS_COUNT) {
+		err = cgx_get_tx_stats(cgxd, lmac, stat, &tx_stat);
+		if (err)
+			return err;
+		rsp->tx_stats[stat] = tx_stat;
+		stat++;
+	}
+	return 0;
+}
+
+int rvu_mbox_handler_CGX_MAC_ADDR_SET(struct rvu *rvu,
+				      struct cgx_mac_addr_set_or_get *req,
+				      struct cgx_mac_addr_set_or_get *rsp)
+{
+	int pf = rvu_get_pf(req->hdr.pcifunc);
+	u8 cgx_id, lmac_id;
+
+	rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
+
+	cgx_lmac_addr_set(cgx_id, lmac_id, req->mac_addr);
+
+	return 0;
+}
+
+int rvu_mbox_handler_CGX_MAC_ADDR_GET(struct rvu *rvu,
+				      struct cgx_mac_addr_set_or_get *req,
+				      struct cgx_mac_addr_set_or_get *rsp)
+{
+	int pf = rvu_get_pf(req->hdr.pcifunc);
+	u8 cgx_id, lmac_id;
+	int rc = 0, i;
+	u64 cfg;
+
+	rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
+
+	rsp->hdr.rc = rc;
+	cfg = cgx_lmac_addr_get(cgx_id, lmac_id);
+	/* copy 48 bit mac address to req->mac_addr */
+	for (i = 0; i < ETH_ALEN; i++)
+		rsp->mac_addr[i] = cfg >> (ETH_ALEN - 1 - i) * 8;
+	return 0;
+}
+
+int rvu_mbox_handler_CGX_PROMISC_ENABLE(struct rvu *rvu, struct msg_req *req,
+					struct msg_rsp *rsp)
+{
+	u16 pcifunc = req->hdr.pcifunc;
+	int pf = rvu_get_pf(pcifunc);
+	u8 cgx_id, lmac_id;
+
+	/* This msg is expected only from PFs that are mapped to CGX LMACs,
+	 * if received from other PF/VF simply ACK, nothing to do.
+	 */
+	if ((req->hdr.pcifunc & RVU_PFVF_FUNC_MASK) ||
+	    !is_pf_cgxmapped(rvu, pf))
+		return -ENODEV;
+
+	rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
+
+	cgx_lmac_promisc_config(cgx_id, lmac_id, true);
+	return 0;
+}
+
+int rvu_mbox_handler_CGX_PROMISC_DISABLE(struct rvu *rvu, struct msg_req *req,
+					 struct msg_rsp *rsp)
+{
+	u16 pcifunc = req->hdr.pcifunc;
+	int pf = rvu_get_pf(pcifunc);
+	u8 cgx_id, lmac_id;
+
+	/* This msg is expected only from PFs that are mapped to CGX LMACs,
+	 * if received from other PF/VF simply ACK, nothing to do.
+	 */
+	if ((req->hdr.pcifunc & RVU_PFVF_FUNC_MASK) ||
+	    !is_pf_cgxmapped(rvu, pf))
+		return -ENODEV;
+
+	rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
+
+	cgx_lmac_promisc_config(cgx_id, lmac_id, false);
+	return 0;
+}
+
+static int rvu_cgx_config_linkevents(struct rvu *rvu, u16 pcifunc, bool en)
+{
+	int pf = rvu_get_pf(pcifunc);
+	u8 cgx_id, lmac_id;
+
+	/* This msg is expected only from PFs that are mapped to CGX LMACs,
+	 * if received from other PF/VF simply ACK, nothing to do.
+	 */
+	if ((pcifunc & RVU_PFVF_FUNC_MASK) || !is_pf_cgxmapped(rvu, pf))
+		return -ENODEV;
+
+	rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
+
+	if (en) {
+		set_bit(pf, &rvu->pf_notify_bmap);
+		/* Send the current link status to PF */
+		rvu_cgx_send_link_info(cgx_id, lmac_id, rvu);
+	} else {
+		clear_bit(pf, &rvu->pf_notify_bmap);
+	}
+
+	return 0;
+}
+
+int rvu_mbox_handler_CGX_START_LINKEVENTS(struct rvu *rvu, struct msg_req *req,
+					  struct msg_rsp *rsp)
+{
+	rvu_cgx_config_linkevents(rvu, req->hdr.pcifunc, true);
+	return 0;
+}
+
+int rvu_mbox_handler_CGX_STOP_LINKEVENTS(struct rvu *rvu, struct msg_req *req,
+					 struct msg_rsp *rsp)
+{
+	rvu_cgx_config_linkevents(rvu, req->hdr.pcifunc, false);
+	return 0;
+}
+
+int rvu_mbox_handler_CGX_GET_LINKINFO(struct rvu *rvu, struct msg_req *req,
+				      struct cgx_link_info_msg *rsp)
+{
+	u8 cgx_id, lmac_id;
+	int pf, err;
+
+	pf = rvu_get_pf(req->hdr.pcifunc);
+
+	if (!is_pf_cgxmapped(rvu, pf))
+		return -ENODEV;
+
+	rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
+
+	err = cgx_get_link_info(rvu_cgx_pdata(cgx_id, rvu), lmac_id,
+				&rsp->link_info);
+	return err;
+}
+
+static int rvu_cgx_config_intlbk(struct rvu *rvu, u16 pcifunc, bool en)
+{
+	int pf = rvu_get_pf(pcifunc);
+	u8 cgx_id, lmac_id;
+
+	/* This msg is expected only from PFs that are mapped to CGX LMACs,
+	 * if received from other PF/VF simply ACK, nothing to do.
+	 */
+	if ((pcifunc & RVU_PFVF_FUNC_MASK) || !is_pf_cgxmapped(rvu, pf))
+		return -ENODEV;
+
+	rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
+
+	return cgx_lmac_internal_loopback(rvu_cgx_pdata(cgx_id, rvu),
+					  lmac_id, en);
+}
+
+int rvu_mbox_handler_CGX_INTLBK_ENABLE(struct rvu *rvu, struct msg_req *req,
+				       struct msg_rsp *rsp)
+{
+	rvu_cgx_config_intlbk(rvu, req->hdr.pcifunc, true);
+	return 0;
+}
+
+int rvu_mbox_handler_CGX_INTLBK_DISABLE(struct rvu *rvu, struct msg_req *req,
+					struct msg_rsp *rsp)
+{
+	rvu_cgx_config_intlbk(rvu, req->hdr.pcifunc, false);
+	return 0;
+}
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
new file mode 100644
index 000000000000..214ca2c26ab4
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -0,0 +1,892 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell OcteonTx2 RVU Admin Function driver
+ *
+ * Copyright (C) 2018 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "rvu_struct.h"
+#include "rvu_reg.h"
+#include "rvu.h"
+#include "cgx.h"
+
+static inline struct nix_hw *get_nix_hw(struct rvu_hwinfo *hw, int blkaddr)
+{
+	if (blkaddr == BLKADDR_NIX0 && hw->nix0)
+		return hw->nix0;
+
+	return NULL;
+}
+
+static bool is_valid_txschq(struct rvu *rvu, int blkaddr,
+			    int lvl, u16 pcifunc, u16 schq)
+{
+	struct nix_txsch *txsch;
+	struct nix_hw *nix_hw;
+
+	nix_hw = get_nix_hw(rvu->hw, blkaddr);
+	if (!nix_hw)
+		return false;
+
+	txsch = &nix_hw->txsch[lvl];
+	/* Check out of bounds */
+	if (schq >= txsch->schq.max)
+		return false;
+
+	spin_lock(&rvu->rsrc_lock);
+	if (txsch->pfvf_map[schq] != pcifunc) {
+		spin_unlock(&rvu->rsrc_lock);
+		return false;
+	}
+	spin_unlock(&rvu->rsrc_lock);
+	return true;
+}
+
+static void nix_setup_lso_tso_l3(struct rvu *rvu, int blkaddr,
+				 u64 format, bool v4, u64 *fidx)
+{
+	struct nix_lso_format field = {0};
+
+	/* IP's Length field */
+	field.layer = NIX_TXLAYER_OL3;
+	/* In ipv4, length field is at offset 2 bytes, for ipv6 it's 4 */
+	field.offset = v4 ? 2 : 4;
+	field.sizem1 = 1; /* i.e 2 bytes */
+	field.alg = NIX_LSOALG_ADD_PAYLEN;
+	rvu_write64(rvu, blkaddr,
+		    NIX_AF_LSO_FORMATX_FIELDX(format, (*fidx)++),
+		    *(u64 *)&field);
+
+	/* No ID field in IPv6 header */
+	if (!v4)
+		return;
+
+	/* IP's ID field */
+	field.layer = NIX_TXLAYER_OL3;
+	field.offset = 4;
+	field.sizem1 = 1; /* i.e 2 bytes */
+	field.alg = NIX_LSOALG_ADD_SEGNUM;
+	rvu_write64(rvu, blkaddr,
+		    NIX_AF_LSO_FORMATX_FIELDX(format, (*fidx)++),
+		    *(u64 *)&field);
+}
+
+static void nix_setup_lso_tso_l4(struct rvu *rvu, int blkaddr,
+				 u64 format, u64 *fidx)
+{
+	struct nix_lso_format field = {0};
+
+	/* TCP's sequence number field */
+	field.layer = NIX_TXLAYER_OL4;
+	field.offset = 4;
+	field.sizem1 = 3; /* i.e 4 bytes */
+	field.alg = NIX_LSOALG_ADD_OFFSET;
+	rvu_write64(rvu, blkaddr,
+		    NIX_AF_LSO_FORMATX_FIELDX(format, (*fidx)++),
+		    *(u64 *)&field);
+
+	/* TCP's flags field */
+	field.layer = NIX_TXLAYER_OL4;
+	field.offset = 12;
+	field.sizem1 = 0; /* not needed */
+	field.alg = NIX_LSOALG_TCP_FLAGS;
+	rvu_write64(rvu, blkaddr,
+		    NIX_AF_LSO_FORMATX_FIELDX(format, (*fidx)++),
+		    *(u64 *)&field);
+}
+
+static void nix_setup_lso(struct rvu *rvu, int blkaddr)
+{
+	u64 cfg, idx, fidx = 0;
+
+	/* Enable LSO */
+	cfg = rvu_read64(rvu, blkaddr, NIX_AF_LSO_CFG);
+	/* For TSO, set first and middle segment flags to
+	 * mask out PSH, RST & FIN flags in TCP packet
+	 */
+	cfg &= ~((0xFFFFULL << 32) | (0xFFFFULL << 16));
+	cfg |= (0xFFF2ULL << 32) | (0xFFF2ULL << 16);
+	rvu_write64(rvu, blkaddr, NIX_AF_LSO_CFG, cfg | BIT_ULL(63));
+
+	/* Configure format fields for TCPv4 segmentation offload */
+	idx = NIX_LSO_FORMAT_IDX_TSOV4;
+	nix_setup_lso_tso_l3(rvu, blkaddr, idx, true, &fidx);
+	nix_setup_lso_tso_l4(rvu, blkaddr, idx, &fidx);
+
+	/* Set rest of the fields to NOP */
+	for (; fidx < 8; fidx++) {
+		rvu_write64(rvu, blkaddr,
+			    NIX_AF_LSO_FORMATX_FIELDX(idx, fidx), 0x0ULL);
+	}
+
+	/* Configure format fields for TCPv6 segmentation offload */
+	idx = NIX_LSO_FORMAT_IDX_TSOV6;
+	fidx = 0;
+	nix_setup_lso_tso_l3(rvu, blkaddr, idx, false, &fidx);
+	nix_setup_lso_tso_l4(rvu, blkaddr, idx, &fidx);
+
+	/* Set rest of the fields to NOP */
+	for (; fidx < 8; fidx++) {
+		rvu_write64(rvu, blkaddr,
+			    NIX_AF_LSO_FORMATX_FIELDX(idx, fidx), 0x0ULL);
+	}
+}
+
+static void nix_ctx_free(struct rvu *rvu, struct rvu_pfvf *pfvf)
+{
+	kfree(pfvf->rq_bmap);
+	kfree(pfvf->sq_bmap);
+	kfree(pfvf->cq_bmap);
+	if (pfvf->rq_ctx)
+		qmem_free(rvu->dev, pfvf->rq_ctx);
+	if (pfvf->sq_ctx)
+		qmem_free(rvu->dev, pfvf->sq_ctx);
+	if (pfvf->cq_ctx)
+		qmem_free(rvu->dev, pfvf->cq_ctx);
+	if (pfvf->rss_ctx)
+		qmem_free(rvu->dev, pfvf->rss_ctx);
+	if (pfvf->nix_qints_ctx)
+		qmem_free(rvu->dev, pfvf->nix_qints_ctx);
+	if (pfvf->cq_ints_ctx)
+		qmem_free(rvu->dev, pfvf->cq_ints_ctx);
+
+	pfvf->rq_bmap = NULL;
+	pfvf->cq_bmap = NULL;
+	pfvf->sq_bmap = NULL;
+	pfvf->rq_ctx = NULL;
+	pfvf->sq_ctx = NULL;
+	pfvf->cq_ctx = NULL;
+	pfvf->rss_ctx = NULL;
+	pfvf->nix_qints_ctx = NULL;
+	pfvf->cq_ints_ctx = NULL;
+}
+
+static int nixlf_rss_ctx_init(struct rvu *rvu, int blkaddr,
+			      struct rvu_pfvf *pfvf, int nixlf,
+			      int rss_sz, int rss_grps, int hwctx_size)
+{
+	int err, grp, num_indices;
+
+	/* RSS is not requested for this NIXLF */
+	if (!rss_sz)
+		return 0;
+	num_indices = rss_sz * rss_grps;
+
+	/* Alloc NIX RSS HW context memory and config the base */
+	err = qmem_alloc(rvu->dev, &pfvf->rss_ctx, num_indices, hwctx_size);
+	if (err)
+		return err;
+
+	rvu_write64(rvu, blkaddr, NIX_AF_LFX_RSS_BASE(nixlf),
+		    (u64)pfvf->rss_ctx->iova);
+
+	/* Config full RSS table size, enable RSS and caching */
+	rvu_write64(rvu, blkaddr, NIX_AF_LFX_RSS_CFG(nixlf),
+		    BIT_ULL(36) | BIT_ULL(4) |
+		    ilog2(num_indices / MAX_RSS_INDIR_TBL_SIZE));
+	/* Config RSS group offset and sizes */
+	for (grp = 0; grp < rss_grps; grp++)
+		rvu_write64(rvu, blkaddr, NIX_AF_LFX_RSS_GRPX(nixlf, grp),
+			    ((ilog2(rss_sz) - 1) << 16) | (rss_sz * grp));
+	return 0;
+}
+
+static int nix_aq_enqueue_wait(struct rvu *rvu, struct rvu_block *block,
+			       struct nix_aq_inst_s *inst)
+{
+	struct admin_queue *aq = block->aq;
+	struct nix_aq_res_s *result;
+	int timeout = 1000;
+	u64 reg, head;
+
+	result = (struct nix_aq_res_s *)aq->res->base;
+
+	/* Get current head pointer where to append this instruction */
+	reg = rvu_read64(rvu, block->addr, NIX_AF_AQ_STATUS);
+	head = (reg >> 4) & AQ_PTR_MASK;
+
+	memcpy((void *)(aq->inst->base + (head * aq->inst->entry_sz)),
+	       (void *)inst, aq->inst->entry_sz);
+	memset(result, 0, sizeof(*result));
+	/* sync into memory */
+	wmb();
+
+	/* Ring the doorbell and wait for result */
+	rvu_write64(rvu, block->addr, NIX_AF_AQ_DOOR, 1);
+	while (result->compcode == NIX_AQ_COMP_NOTDONE) {
+		cpu_relax();
+		udelay(1);
+		timeout--;
+		if (!timeout)
+			return -EBUSY;
+	}
+
+	if (result->compcode != NIX_AQ_COMP_GOOD)
+		/* TODO: Replace this with some error code */
+		return -EBUSY;
+
+	return 0;
+}
+
+static int rvu_nix_aq_enq_inst(struct rvu *rvu, struct nix_aq_enq_req *req,
+			       struct nix_aq_enq_rsp *rsp)
+{
+	struct rvu_hwinfo *hw = rvu->hw;
+	u16 pcifunc = req->hdr.pcifunc;
+	int nixlf, blkaddr, rc = 0;
+	struct nix_aq_inst_s inst;
+	struct rvu_block *block;
+	struct admin_queue *aq;
+	struct rvu_pfvf *pfvf;
+	void *ctx, *mask;
+	bool ena;
+	u64 cfg;
+
+	pfvf = rvu_get_pfvf(rvu, pcifunc);
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
+	if (!pfvf->nixlf || blkaddr < 0)
+		return NIX_AF_ERR_AF_LF_INVALID;
+
+	block = &hw->block[blkaddr];
+	aq = block->aq;
+	if (!aq) {
+		dev_warn(rvu->dev, "%s: NIX AQ not initialized\n", __func__);
+		return NIX_AF_ERR_AQ_ENQUEUE;
+	}
+
+	nixlf = rvu_get_lf(rvu, block, pcifunc, 0);
+	if (nixlf < 0)
+		return NIX_AF_ERR_AF_LF_INVALID;
+
+	switch (req->ctype) {
+	case NIX_AQ_CTYPE_RQ:
+		/* Check if index exceeds max no of queues */
+		if (!pfvf->rq_ctx || req->qidx >= pfvf->rq_ctx->qsize)
+			rc = NIX_AF_ERR_AQ_ENQUEUE;
+		break;
+	case NIX_AQ_CTYPE_SQ:
+		if (!pfvf->sq_ctx || req->qidx >= pfvf->sq_ctx->qsize)
+			rc = NIX_AF_ERR_AQ_ENQUEUE;
+		break;
+	case NIX_AQ_CTYPE_CQ:
+		if (!pfvf->cq_ctx || req->qidx >= pfvf->cq_ctx->qsize)
+			rc = NIX_AF_ERR_AQ_ENQUEUE;
+		break;
+	case NIX_AQ_CTYPE_RSS:
+		/* Check if RSS is enabled and qidx is within range */
+		cfg = rvu_read64(rvu, blkaddr, NIX_AF_LFX_RSS_CFG(nixlf));
+		if (!(cfg & BIT_ULL(4)) || !pfvf->rss_ctx ||
+		    (req->qidx >= (256UL << (cfg & 0xF))))
+			rc = NIX_AF_ERR_AQ_ENQUEUE;
+		break;
+	default:
+		rc = NIX_AF_ERR_AQ_ENQUEUE;
+	}
+
+	if (rc)
+		return rc;
+
+	/* Check if SQ pointed SMQ belongs to this PF/VF or not */
+	if (req->ctype == NIX_AQ_CTYPE_SQ &&
+	    req->op != NIX_AQ_INSTOP_WRITE) {
+		if (!is_valid_txschq(rvu, blkaddr, NIX_TXSCH_LVL_SMQ,
+				     pcifunc, req->sq.smq))
+			return NIX_AF_ERR_AQ_ENQUEUE;
+	}
+
+	memset(&inst, 0, sizeof(struct nix_aq_inst_s));
+	inst.lf = nixlf;
+	inst.cindex = req->qidx;
+	inst.ctype = req->ctype;
+	inst.op = req->op;
+	/* Currently we are not supporting enqueuing multiple instructions,
+	 * so always choose first entry in result memory.
+	 */
+	inst.res_addr = (u64)aq->res->iova;
+
+	/* Clean result + context memory */
+	memset(aq->res->base, 0, aq->res->entry_sz);
+	/* Context needs to be written at RES_ADDR + 128 */
+	ctx = aq->res->base + 128;
+	/* Mask needs to be written at RES_ADDR + 256 */
+	mask = aq->res->base + 256;
+
+	switch (req->op) {
+	case NIX_AQ_INSTOP_WRITE:
+		if (req->ctype == NIX_AQ_CTYPE_RQ)
+			memcpy(mask, &req->rq_mask,
+			       sizeof(struct nix_rq_ctx_s));
+		else if (req->ctype == NIX_AQ_CTYPE_SQ)
+			memcpy(mask, &req->sq_mask,
+			       sizeof(struct nix_sq_ctx_s));
+		else if (req->ctype == NIX_AQ_CTYPE_CQ)
+			memcpy(mask, &req->cq_mask,
+			       sizeof(struct nix_cq_ctx_s));
+		else if (req->ctype == NIX_AQ_CTYPE_RSS)
+			memcpy(mask, &req->rss_mask,
+			       sizeof(struct nix_rsse_s));
+		/* Fall through */
+	case NIX_AQ_INSTOP_INIT:
+		if (req->ctype == NIX_AQ_CTYPE_RQ)
+			memcpy(ctx, &req->rq, sizeof(struct nix_rq_ctx_s));
+		else if (req->ctype == NIX_AQ_CTYPE_SQ)
+			memcpy(ctx, &req->sq, sizeof(struct nix_sq_ctx_s));
+		else if (req->ctype == NIX_AQ_CTYPE_CQ)
+			memcpy(ctx, &req->cq, sizeof(struct nix_cq_ctx_s));
+		else if (req->ctype == NIX_AQ_CTYPE_RSS)
+			memcpy(ctx, &req->rss, sizeof(struct nix_rsse_s));
+		break;
+	case NIX_AQ_INSTOP_NOP:
+	case NIX_AQ_INSTOP_READ:
+	case NIX_AQ_INSTOP_LOCK:
+	case NIX_AQ_INSTOP_UNLOCK:
+		break;
+	default:
+		rc = NIX_AF_ERR_AQ_ENQUEUE;
+		return rc;
+	}
+
+	spin_lock(&aq->lock);
+
+	/* Submit the instruction to AQ */
+	rc = nix_aq_enqueue_wait(rvu, block, &inst);
+	if (rc) {
+		spin_unlock(&aq->lock);
+		return rc;
+	}
+
+	/* Set RQ/SQ/CQ bitmap if respective queue hw context is enabled */
+	if (req->op == NIX_AQ_INSTOP_INIT) {
+		if (req->ctype == NIX_AQ_CTYPE_RQ && req->rq.ena)
+			__set_bit(req->qidx, pfvf->rq_bmap);
+		if (req->ctype == NIX_AQ_CTYPE_SQ && req->sq.ena)
+			__set_bit(req->qidx, pfvf->sq_bmap);
+		if (req->ctype == NIX_AQ_CTYPE_CQ && req->cq.ena)
+			__set_bit(req->qidx, pfvf->cq_bmap);
+	}
+
+	if (req->op == NIX_AQ_INSTOP_WRITE) {
+		if (req->ctype == NIX_AQ_CTYPE_RQ) {
+			ena = (req->rq.ena & req->rq_mask.ena) |
+				(test_bit(req->qidx, pfvf->rq_bmap) &
+				~req->rq_mask.ena);
+			if (ena)
+				__set_bit(req->qidx, pfvf->rq_bmap);
+			else
+				__clear_bit(req->qidx, pfvf->rq_bmap);
+		}
+		if (req->ctype == NIX_AQ_CTYPE_SQ) {
+			ena = (req->rq.ena & req->sq_mask.ena) |
+				(test_bit(req->qidx, pfvf->sq_bmap) &
+				~req->sq_mask.ena);
+			if (ena)
+				__set_bit(req->qidx, pfvf->sq_bmap);
+			else
+				__clear_bit(req->qidx, pfvf->sq_bmap);
+		}
+		if (req->ctype == NIX_AQ_CTYPE_CQ) {
+			ena = (req->rq.ena & req->cq_mask.ena) |
+				(test_bit(req->qidx, pfvf->cq_bmap) &
+				~req->cq_mask.ena);
+			if (ena)
+				__set_bit(req->qidx, pfvf->cq_bmap);
+			else
+				__clear_bit(req->qidx, pfvf->cq_bmap);
+		}
+	}
+
+	if (rsp) {
+		/* Copy read context into mailbox */
+		if (req->op == NIX_AQ_INSTOP_READ) {
+			if (req->ctype == NIX_AQ_CTYPE_RQ)
+				memcpy(&rsp->rq, ctx,
+				       sizeof(struct nix_rq_ctx_s));
+			else if (req->ctype == NIX_AQ_CTYPE_SQ)
+				memcpy(&rsp->sq, ctx,
+				       sizeof(struct nix_sq_ctx_s));
+			else if (req->ctype == NIX_AQ_CTYPE_CQ)
+				memcpy(&rsp->cq, ctx,
+				       sizeof(struct nix_cq_ctx_s));
+			else if (req->ctype == NIX_AQ_CTYPE_RSS)
+				memcpy(&rsp->rss, ctx,
+				       sizeof(struct nix_cq_ctx_s));
+		}
+	}
+
+	spin_unlock(&aq->lock);
+	return 0;
+}
+
+static int nix_lf_hwctx_disable(struct rvu *rvu, struct hwctx_disable_req *req)
+{
+	struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, req->hdr.pcifunc);
+	struct nix_aq_enq_req aq_req;
+	unsigned long *bmap;
+	int qidx, q_cnt = 0;
+	int err = 0, rc;
+
+	if (!pfvf->cq_ctx || !pfvf->sq_ctx || !pfvf->rq_ctx)
+		return NIX_AF_ERR_AQ_ENQUEUE;
+
+	memset(&aq_req, 0, sizeof(struct nix_aq_enq_req));
+	aq_req.hdr.pcifunc = req->hdr.pcifunc;
+
+	if (req->ctype == NIX_AQ_CTYPE_CQ) {
+		aq_req.cq.ena = 0;
+		aq_req.cq_mask.ena = 1;
+		q_cnt = pfvf->cq_ctx->qsize;
+		bmap = pfvf->cq_bmap;
+	}
+	if (req->ctype == NIX_AQ_CTYPE_SQ) {
+		aq_req.sq.ena = 0;
+		aq_req.sq_mask.ena = 1;
+		q_cnt = pfvf->sq_ctx->qsize;
+		bmap = pfvf->sq_bmap;
+	}
+	if (req->ctype == NIX_AQ_CTYPE_RQ) {
+		aq_req.rq.ena = 0;
+		aq_req.rq_mask.ena = 1;
+		q_cnt = pfvf->rq_ctx->qsize;
+		bmap = pfvf->rq_bmap;
+	}
+
+	aq_req.ctype = req->ctype;
+	aq_req.op = NIX_AQ_INSTOP_WRITE;
+
+	for (qidx = 0; qidx < q_cnt; qidx++) {
+		if (!test_bit(qidx, bmap))
+			continue;
+		aq_req.qidx = qidx;
+		rc = rvu_nix_aq_enq_inst(rvu, &aq_req, NULL);
+		if (rc) {
+			err = rc;
+			dev_err(rvu->dev, "Failed to disable %s:%d context\n",
+				(req->ctype == NIX_AQ_CTYPE_CQ) ?
+				"CQ" : ((req->ctype == NIX_AQ_CTYPE_RQ) ?
+				"RQ" : "SQ"), qidx);
+		}
+	}
+
+	return err;
+}
+
+int rvu_mbox_handler_NIX_AQ_ENQ(struct rvu *rvu,
+				struct nix_aq_enq_req *req,
+				struct nix_aq_enq_rsp *rsp)
+{
+	return rvu_nix_aq_enq_inst(rvu, req, rsp);
+}
+
+int rvu_mbox_handler_NIX_HWCTX_DISABLE(struct rvu *rvu,
+				       struct hwctx_disable_req *req,
+				       struct msg_rsp *rsp)
+{
+	return nix_lf_hwctx_disable(rvu, req);
+}
+
+int rvu_mbox_handler_NIX_LF_ALLOC(struct rvu *rvu,
+				  struct nix_lf_alloc_req *req,
+				  struct nix_lf_alloc_rsp *rsp)
+{
+	int nixlf, qints, hwctx_size, err, rc = 0;
+	struct rvu_hwinfo *hw = rvu->hw;
+	u16 pcifunc = req->hdr.pcifunc;
+	struct rvu_block *block;
+	struct rvu_pfvf *pfvf;
+	u64 cfg, ctx_cfg;
+	int blkaddr;
+
+	if (!req->rq_cnt || !req->sq_cnt || !req->cq_cnt)
+		return NIX_AF_ERR_PARAM;
+
+	pfvf = rvu_get_pfvf(rvu, pcifunc);
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
+	if (!pfvf->nixlf || blkaddr < 0)
+		return NIX_AF_ERR_AF_LF_INVALID;
+
+	block = &hw->block[blkaddr];
+	nixlf = rvu_get_lf(rvu, block, pcifunc, 0);
+	if (nixlf < 0)
+		return NIX_AF_ERR_AF_LF_INVALID;
+
+	/* If RSS is being enabled, check if requested config is valid.
+	 * RSS table size should be power of two, otherwise
+	 * RSS_GRP::OFFSET + adder might go beyond that group or
+	 * won't be able to use entire table.
+	 */
+	if (req->rss_sz && (req->rss_sz > MAX_RSS_INDIR_TBL_SIZE ||
+			    !is_power_of_2(req->rss_sz)))
+		return NIX_AF_ERR_RSS_SIZE_INVALID;
+
+	if (req->rss_sz &&
+	    (!req->rss_grps || req->rss_grps > MAX_RSS_GROUPS))
+		return NIX_AF_ERR_RSS_GRPS_INVALID;
+
+	/* Reset this NIX LF */
+	err = rvu_lf_reset(rvu, block, nixlf);
+	if (err) {
+		dev_err(rvu->dev, "Failed to reset NIX%d LF%d\n",
+			block->addr - BLKADDR_NIX0, nixlf);
+		return NIX_AF_ERR_LF_RESET;
+	}
+
+	ctx_cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST3);
+
+	/* Alloc NIX RQ HW context memory and config the base */
+	hwctx_size = 1UL << ((ctx_cfg >> 4) & 0xF);
+	err = qmem_alloc(rvu->dev, &pfvf->rq_ctx, req->rq_cnt, hwctx_size);
+	if (err)
+		goto free_mem;
+
+	pfvf->rq_bmap = kcalloc(req->rq_cnt, sizeof(long), GFP_KERNEL);
+	if (!pfvf->rq_bmap)
+		goto free_mem;
+
+	rvu_write64(rvu, blkaddr, NIX_AF_LFX_RQS_BASE(nixlf),
+		    (u64)pfvf->rq_ctx->iova);
+
+	/* Set caching and queue count in HW */
+	cfg = BIT_ULL(36) | (req->rq_cnt - 1);
+	rvu_write64(rvu, blkaddr, NIX_AF_LFX_RQS_CFG(nixlf), cfg);
+
+	/* Alloc NIX SQ HW context memory and config the base */
+	hwctx_size = 1UL << (ctx_cfg & 0xF);
+	err = qmem_alloc(rvu->dev, &pfvf->sq_ctx, req->sq_cnt, hwctx_size);
+	if (err)
+		goto free_mem;
+
+	pfvf->sq_bmap = kcalloc(req->sq_cnt, sizeof(long), GFP_KERNEL);
+	if (!pfvf->sq_bmap)
+		goto free_mem;
+
+	rvu_write64(rvu, blkaddr, NIX_AF_LFX_SQS_BASE(nixlf),
+		    (u64)pfvf->sq_ctx->iova);
+	cfg = BIT_ULL(36) | (req->sq_cnt - 1);
+	rvu_write64(rvu, blkaddr, NIX_AF_LFX_SQS_CFG(nixlf), cfg);
+
+	/* Alloc NIX CQ HW context memory and config the base */
+	hwctx_size = 1UL << ((ctx_cfg >> 8) & 0xF);
+	err = qmem_alloc(rvu->dev, &pfvf->cq_ctx, req->cq_cnt, hwctx_size);
+	if (err)
+		goto free_mem;
+
+	pfvf->cq_bmap = kcalloc(req->cq_cnt, sizeof(long), GFP_KERNEL);
+	if (!pfvf->cq_bmap)
+		goto free_mem;
+
+	rvu_write64(rvu, blkaddr, NIX_AF_LFX_CQS_BASE(nixlf),
+		    (u64)pfvf->cq_ctx->iova);
+	cfg = BIT_ULL(36) | (req->cq_cnt - 1);
+	rvu_write64(rvu, blkaddr, NIX_AF_LFX_CQS_CFG(nixlf), cfg);
+
+	/* Initialize receive side scaling (RSS) */
+	hwctx_size = 1UL << ((ctx_cfg >> 12) & 0xF);
+	err = nixlf_rss_ctx_init(rvu, blkaddr, pfvf, nixlf,
+				 req->rss_sz, req->rss_grps, hwctx_size);
+	if (err)
+		goto free_mem;
+
+	/* Alloc memory for CQINT's HW contexts */
+	cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST2);
+	qints = (cfg >> 24) & 0xFFF;
+	hwctx_size = 1UL << ((ctx_cfg >> 24) & 0xF);
+	err = qmem_alloc(rvu->dev, &pfvf->cq_ints_ctx, qints, hwctx_size);
+	if (err)
+		goto free_mem;
+
+	rvu_write64(rvu, blkaddr, NIX_AF_LFX_CINTS_BASE(nixlf),
+		    (u64)pfvf->cq_ints_ctx->iova);
+	rvu_write64(rvu, blkaddr, NIX_AF_LFX_CINTS_CFG(nixlf), BIT_ULL(36));
+
+	/* Alloc memory for QINT's HW contexts */
+	cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST2);
+	qints = (cfg >> 12) & 0xFFF;
+	hwctx_size = 1UL << ((ctx_cfg >> 20) & 0xF);
+	err = qmem_alloc(rvu->dev, &pfvf->nix_qints_ctx, qints, hwctx_size);
+	if (err)
+		goto free_mem;
+
+	rvu_write64(rvu, blkaddr, NIX_AF_LFX_QINTS_BASE(nixlf),
+		    (u64)pfvf->nix_qints_ctx->iova);
+	rvu_write64(rvu, blkaddr, NIX_AF_LFX_QINTS_CFG(nixlf), BIT_ULL(36));
+
+	/* Enable LMTST for this NIX LF */
+	rvu_write64(rvu, blkaddr, NIX_AF_LFX_TX_CFG2(nixlf), BIT_ULL(0));
+
+	/* Set CQE/WQE size, NPA_PF_FUNC for SQBs and also SSO_PF_FUNC
+	 * If requester has sent a 'RVU_DEFAULT_PF_FUNC' use this NIX LF's
+	 * PCIFUNC itself.
+	 */
+	if (req->npa_func == RVU_DEFAULT_PF_FUNC)
+		cfg = pcifunc;
+	else
+		cfg = req->npa_func;
+
+	if (req->sso_func == RVU_DEFAULT_PF_FUNC)
+		cfg |= (u64)pcifunc << 16;
+	else
+		cfg |= (u64)req->sso_func << 16;
+
+	cfg |= (u64)req->xqe_sz << 33;
+	rvu_write64(rvu, blkaddr, NIX_AF_LFX_CFG(nixlf), cfg);
+
+	/* Config Rx pkt length, csum checks and apad  enable / disable */
+	rvu_write64(rvu, blkaddr, NIX_AF_LFX_RX_CFG(nixlf), req->rx_cfg);
+
+	goto exit;
+
+free_mem:
+	nix_ctx_free(rvu, pfvf);
+	rc = -ENOMEM;
+
+exit:
+	/* Set macaddr of this PF/VF */
+	ether_addr_copy(rsp->mac_addr, pfvf->mac_addr);
+
+	/* set SQB size info */
+	cfg = rvu_read64(rvu, blkaddr, NIX_AF_SQ_CONST);
+	rsp->sqb_size = (cfg >> 34) & 0xFFFF;
+	rsp->lso_tsov4_idx = NIX_LSO_FORMAT_IDX_TSOV4;
+	rsp->lso_tsov6_idx = NIX_LSO_FORMAT_IDX_TSOV6;
+	return rc;
+}
+
+int rvu_mbox_handler_NIX_LF_FREE(struct rvu *rvu, struct msg_req *req,
+				 struct msg_rsp *rsp)
+{
+	struct rvu_hwinfo *hw = rvu->hw;
+	u16 pcifunc = req->hdr.pcifunc;
+	struct rvu_block *block;
+	int blkaddr, nixlf, err;
+	struct rvu_pfvf *pfvf;
+
+	pfvf = rvu_get_pfvf(rvu, pcifunc);
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
+	if (!pfvf->nixlf || blkaddr < 0)
+		return NIX_AF_ERR_AF_LF_INVALID;
+
+	block = &hw->block[blkaddr];
+	nixlf = rvu_get_lf(rvu, block, pcifunc, 0);
+	if (nixlf < 0)
+		return NIX_AF_ERR_AF_LF_INVALID;
+
+	/* Reset this NIX LF */
+	err = rvu_lf_reset(rvu, block, nixlf);
+	if (err) {
+		dev_err(rvu->dev, "Failed to reset NIX%d LF%d\n",
+			block->addr - BLKADDR_NIX0, nixlf);
+		return NIX_AF_ERR_LF_RESET;
+	}
+
+	nix_ctx_free(rvu, pfvf);
+
+	return 0;
+}
+
+static int nix_setup_txschq(struct rvu *rvu, struct nix_hw *nix_hw, int blkaddr)
+{
+	struct nix_txsch *txsch;
+	u64 cfg, reg;
+	int err, lvl;
+
+	/* Get scheduler queue count of each type and alloc
+	 * bitmap for each for alloc/free/attach operations.
+	 */
+	for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) {
+		txsch = &nix_hw->txsch[lvl];
+		txsch->lvl = lvl;
+		switch (lvl) {
+		case NIX_TXSCH_LVL_SMQ:
+			reg = NIX_AF_MDQ_CONST;
+			break;
+		case NIX_TXSCH_LVL_TL4:
+			reg = NIX_AF_TL4_CONST;
+			break;
+		case NIX_TXSCH_LVL_TL3:
+			reg = NIX_AF_TL3_CONST;
+			break;
+		case NIX_TXSCH_LVL_TL2:
+			reg = NIX_AF_TL2_CONST;
+			break;
+		case NIX_TXSCH_LVL_TL1:
+			reg = NIX_AF_TL1_CONST;
+			break;
+		}
+		cfg = rvu_read64(rvu, blkaddr, reg);
+		txsch->schq.max = cfg & 0xFFFF;
+		err = rvu_alloc_bitmap(&txsch->schq);
+		if (err)
+			return err;
+
+		/* Allocate memory for scheduler queues to
+		 * PF/VF pcifunc mapping info.
+		 */
+		txsch->pfvf_map = devm_kcalloc(rvu->dev, txsch->schq.max,
+					       sizeof(u16), GFP_KERNEL);
+		if (!txsch->pfvf_map)
+			return -ENOMEM;
+	}
+	return 0;
+}
+
+static int nix_calibrate_x2p(struct rvu *rvu, int blkaddr)
+{
+	int idx, err;
+	u64 status;
+
+	/* Start X2P bus calibration */
+	rvu_write64(rvu, blkaddr, NIX_AF_CFG,
+		    rvu_read64(rvu, blkaddr, NIX_AF_CFG) | BIT_ULL(9));
+	/* Wait for calibration to complete */
+	err = rvu_poll_reg(rvu, blkaddr,
+			   NIX_AF_STATUS, BIT_ULL(10), false);
+	if (err) {
+		dev_err(rvu->dev, "NIX X2P bus calibration failed\n");
+		return err;
+	}
+
+	status = rvu_read64(rvu, blkaddr, NIX_AF_STATUS);
+	/* Check if CGX devices are ready */
+	for (idx = 0; idx < cgx_get_cgx_cnt(); idx++) {
+		if (status & (BIT_ULL(16 + idx)))
+			continue;
+		dev_err(rvu->dev,
+			"CGX%d didn't respond to NIX X2P calibration\n", idx);
+		err = -EBUSY;
+	}
+
+	/* Check if LBK is ready */
+	if (!(status & BIT_ULL(19))) {
+		dev_err(rvu->dev,
+			"LBK didn't respond to NIX X2P calibration\n");
+		err = -EBUSY;
+	}
+
+	/* Clear 'calibrate_x2p' bit */
+	rvu_write64(rvu, blkaddr, NIX_AF_CFG,
+		    rvu_read64(rvu, blkaddr, NIX_AF_CFG) & ~BIT_ULL(9));
+	if (err || (status & 0x3FFULL))
+		dev_err(rvu->dev,
+			"NIX X2P calibration failed, status 0x%llx\n", status);
+	if (err)
+		return err;
+	return 0;
+}
+
+static int nix_aq_init(struct rvu *rvu, struct rvu_block *block)
+{
+	u64 cfg;
+	int err;
+
+	/* Set admin queue endianness */
+	cfg = rvu_read64(rvu, block->addr, NIX_AF_CFG);
+#ifdef __BIG_ENDIAN
+	cfg |= BIT_ULL(1);
+	rvu_write64(rvu, block->addr, NIX_AF_CFG, cfg);
+#else
+	cfg &= ~BIT_ULL(1);
+	rvu_write64(rvu, block->addr, NIX_AF_CFG, cfg);
+#endif
+
+	/* Do not bypass NDC cache */
+	cfg = rvu_read64(rvu, block->addr, NIX_AF_NDC_CFG);
+	cfg &= ~0x3FFEULL;
+	rvu_write64(rvu, block->addr, NIX_AF_NDC_CFG, cfg);
+
+	/* Result structure can be followed by RQ/SQ/CQ context at
+	 * RES + 128bytes and a write mask at RES + 256 bytes, depending on
+	 * operation type. Alloc sufficient result memory for all operations.
+	 */
+	err = rvu_aq_alloc(rvu, &block->aq,
+			   Q_COUNT(AQ_SIZE), sizeof(struct nix_aq_inst_s),
+			   ALIGN(sizeof(struct nix_aq_res_s), 128) + 256);
+	if (err)
+		return err;
+
+	rvu_write64(rvu, block->addr, NIX_AF_AQ_CFG, AQ_SIZE);
+	rvu_write64(rvu, block->addr,
+		    NIX_AF_AQ_BASE, (u64)block->aq->inst->iova);
+	return 0;
+}
+
+int rvu_nix_init(struct rvu *rvu)
+{
+	struct rvu_hwinfo *hw = rvu->hw;
+	struct rvu_block *block;
+	int blkaddr, err;
+	u64 cfg;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0);
+	if (blkaddr < 0)
+		return 0;
+	block = &hw->block[blkaddr];
+
+	/* Calibrate X2P bus to check if CGX/LBK links are fine */
+	err = nix_calibrate_x2p(rvu, blkaddr);
+	if (err)
+		return err;
+
+	/* Set num of links of each type */
+	cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST);
+	hw->cgx = (cfg >> 12) & 0xF;
+	hw->lmac_per_cgx = (cfg >> 8) & 0xF;
+	hw->cgx_links = hw->cgx * hw->lmac_per_cgx;
+	hw->lbk_links = 1;
+	hw->sdp_links = 1;
+
+	/* Initialize admin queue */
+	err = nix_aq_init(rvu, block);
+	if (err)
+		return err;
+
+	/* Restore CINT timer delay to HW reset values */
+	rvu_write64(rvu, blkaddr, NIX_AF_CINT_DELAY, 0x0ULL);
+
+	/* Configure segmentation offload formats */
+	nix_setup_lso(rvu, blkaddr);
+
+	if (blkaddr == BLKADDR_NIX0) {
+		hw->nix0 = devm_kzalloc(rvu->dev,
+					sizeof(struct nix_hw), GFP_KERNEL);
+		if (!hw->nix0)
+			return -ENOMEM;
+
+		err = nix_setup_txschq(rvu, hw->nix0, blkaddr);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+void rvu_nix_freemem(struct rvu *rvu)
+{
+	struct rvu_hwinfo *hw = rvu->hw;
+	struct rvu_block *block;
+	struct nix_txsch *txsch;
+	struct nix_hw *nix_hw;
+	int blkaddr, lvl;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0);
+	if (blkaddr < 0)
+		return;
+
+	block = &hw->block[blkaddr];
+	rvu_aq_free(rvu, block->aq);
+
+	if (blkaddr == BLKADDR_NIX0) {
+		nix_hw = get_nix_hw(rvu->hw, blkaddr);
+		if (!nix_hw)
+			return;
+
+		for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) {
+			txsch = &nix_hw->txsch[lvl];
+			kfree(txsch->schq.bmap);
+		}
+	}
+}
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c
new file mode 100644
index 000000000000..0e43a693d119
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c
@@ -0,0 +1,475 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell OcteonTx2 RVU Admin Function driver
+ *
+ * Copyright (C) 2018 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "rvu_struct.h"
+#include "rvu_reg.h"
+#include "rvu.h"
+
+static int npa_aq_enqueue_wait(struct rvu *rvu, struct rvu_block *block,
+			       struct npa_aq_inst_s *inst)
+{
+	struct admin_queue *aq = block->aq;
+	struct npa_aq_res_s *result;
+	int timeout = 1000;
+	u64 reg, head;
+
+	result = (struct npa_aq_res_s *)aq->res->base;
+
+	/* Get current head pointer where to append this instruction */
+	reg = rvu_read64(rvu, block->addr, NPA_AF_AQ_STATUS);
+	head = (reg >> 4) & AQ_PTR_MASK;
+
+	memcpy((void *)(aq->inst->base + (head * aq->inst->entry_sz)),
+	       (void *)inst, aq->inst->entry_sz);
+	memset(result, 0, sizeof(*result));
+	/* sync into memory */
+	wmb();
+
+	/* Ring the doorbell and wait for result */
+	rvu_write64(rvu, block->addr, NPA_AF_AQ_DOOR, 1);
+	while (result->compcode == NPA_AQ_COMP_NOTDONE) {
+		cpu_relax();
+		udelay(1);
+		timeout--;
+		if (!timeout)
+			return -EBUSY;
+	}
+
+	if (result->compcode != NPA_AQ_COMP_GOOD)
+		/* TODO: Replace this with some error code */
+		return -EBUSY;
+
+	return 0;
+}
+
+static int rvu_npa_aq_enq_inst(struct rvu *rvu, struct npa_aq_enq_req *req,
+			       struct npa_aq_enq_rsp *rsp)
+{
+	struct rvu_hwinfo *hw = rvu->hw;
+	u16 pcifunc = req->hdr.pcifunc;
+	int blkaddr, npalf, rc = 0;
+	struct npa_aq_inst_s inst;
+	struct rvu_block *block;
+	struct admin_queue *aq;
+	struct rvu_pfvf *pfvf;
+	void *ctx, *mask;
+	bool ena;
+
+	pfvf = rvu_get_pfvf(rvu, pcifunc);
+	if (!pfvf->aura_ctx || req->aura_id >= pfvf->aura_ctx->qsize)
+		return NPA_AF_ERR_AQ_ENQUEUE;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPA, pcifunc);
+	if (!pfvf->npalf || blkaddr < 0)
+		return NPA_AF_ERR_AF_LF_INVALID;
+
+	block = &hw->block[blkaddr];
+	aq = block->aq;
+	if (!aq) {
+		dev_warn(rvu->dev, "%s: NPA AQ not initialized\n", __func__);
+		return NPA_AF_ERR_AQ_ENQUEUE;
+	}
+
+	npalf = rvu_get_lf(rvu, block, pcifunc, 0);
+	if (npalf < 0)
+		return NPA_AF_ERR_AF_LF_INVALID;
+
+	memset(&inst, 0, sizeof(struct npa_aq_inst_s));
+	inst.cindex = req->aura_id;
+	inst.lf = npalf;
+	inst.ctype = req->ctype;
+	inst.op = req->op;
+	/* Currently we are not supporting enqueuing multiple instructions,
+	 * so always choose first entry in result memory.
+	 */
+	inst.res_addr = (u64)aq->res->iova;
+
+	/* Clean result + context memory */
+	memset(aq->res->base, 0, aq->res->entry_sz);
+	/* Context needs to be written at RES_ADDR + 128 */
+	ctx = aq->res->base + 128;
+	/* Mask needs to be written at RES_ADDR + 256 */
+	mask = aq->res->base + 256;
+
+	switch (req->op) {
+	case NPA_AQ_INSTOP_WRITE:
+		/* Copy context and write mask */
+		if (req->ctype == NPA_AQ_CTYPE_AURA) {
+			memcpy(mask, &req->aura_mask,
+			       sizeof(struct npa_aura_s));
+			memcpy(ctx, &req->aura, sizeof(struct npa_aura_s));
+		} else {
+			memcpy(mask, &req->pool_mask,
+			       sizeof(struct npa_pool_s));
+			memcpy(ctx, &req->pool, sizeof(struct npa_pool_s));
+		}
+		break;
+	case NPA_AQ_INSTOP_INIT:
+		if (req->ctype == NPA_AQ_CTYPE_AURA) {
+			if (req->aura.pool_addr >= pfvf->pool_ctx->qsize) {
+				rc = NPA_AF_ERR_AQ_FULL;
+				break;
+			}
+			/* Set pool's context address */
+			req->aura.pool_addr = pfvf->pool_ctx->iova +
+			(req->aura.pool_addr * pfvf->pool_ctx->entry_sz);
+			memcpy(ctx, &req->aura, sizeof(struct npa_aura_s));
+		} else { /* POOL's context */
+			memcpy(ctx, &req->pool, sizeof(struct npa_pool_s));
+		}
+		break;
+	case NPA_AQ_INSTOP_NOP:
+	case NPA_AQ_INSTOP_READ:
+	case NPA_AQ_INSTOP_LOCK:
+	case NPA_AQ_INSTOP_UNLOCK:
+		break;
+	default:
+		rc = NPA_AF_ERR_AQ_FULL;
+		break;
+	}
+
+	if (rc)
+		return rc;
+
+	spin_lock(&aq->lock);
+
+	/* Submit the instruction to AQ */
+	rc = npa_aq_enqueue_wait(rvu, block, &inst);
+	if (rc) {
+		spin_unlock(&aq->lock);
+		return rc;
+	}
+
+	/* Set aura bitmap if aura hw context is enabled */
+	if (req->ctype == NPA_AQ_CTYPE_AURA) {
+		if (req->op == NPA_AQ_INSTOP_INIT && req->aura.ena)
+			__set_bit(req->aura_id, pfvf->aura_bmap);
+		if (req->op == NPA_AQ_INSTOP_WRITE) {
+			ena = (req->aura.ena & req->aura_mask.ena) |
+				(test_bit(req->aura_id, pfvf->aura_bmap) &
+				~req->aura_mask.ena);
+			if (ena)
+				__set_bit(req->aura_id, pfvf->aura_bmap);
+			else
+				__clear_bit(req->aura_id, pfvf->aura_bmap);
+		}
+	}
+
+	/* Set pool bitmap if pool hw context is enabled */
+	if (req->ctype == NPA_AQ_CTYPE_POOL) {
+		if (req->op == NPA_AQ_INSTOP_INIT && req->pool.ena)
+			__set_bit(req->aura_id, pfvf->pool_bmap);
+		if (req->op == NPA_AQ_INSTOP_WRITE) {
+			ena = (req->pool.ena & req->pool_mask.ena) |
+				(test_bit(req->aura_id, pfvf->pool_bmap) &
+				~req->pool_mask.ena);
+			if (ena)
+				__set_bit(req->aura_id, pfvf->pool_bmap);
+			else
+				__clear_bit(req->aura_id, pfvf->pool_bmap);
+		}
+	}
+	spin_unlock(&aq->lock);
+
+	if (rsp) {
+		/* Copy read context into mailbox */
+		if (req->op == NPA_AQ_INSTOP_READ) {
+			if (req->ctype == NPA_AQ_CTYPE_AURA)
+				memcpy(&rsp->aura, ctx,
+				       sizeof(struct npa_aura_s));
+			else
+				memcpy(&rsp->pool, ctx,
+				       sizeof(struct npa_pool_s));
+		}
+	}
+
+	return 0;
+}
+
+static int npa_lf_hwctx_disable(struct rvu *rvu, struct hwctx_disable_req *req)
+{
+	struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, req->hdr.pcifunc);
+	struct npa_aq_enq_req aq_req;
+	unsigned long *bmap;
+	int id, cnt = 0;
+	int err = 0, rc;
+
+	if (!pfvf->pool_ctx || !pfvf->aura_ctx)
+		return NPA_AF_ERR_AQ_ENQUEUE;
+
+	memset(&aq_req, 0, sizeof(struct npa_aq_enq_req));
+	aq_req.hdr.pcifunc = req->hdr.pcifunc;
+
+	if (req->ctype == NPA_AQ_CTYPE_POOL) {
+		aq_req.pool.ena = 0;
+		aq_req.pool_mask.ena = 1;
+		cnt = pfvf->pool_ctx->qsize;
+		bmap = pfvf->pool_bmap;
+	} else if (req->ctype == NPA_AQ_CTYPE_AURA) {
+		aq_req.aura.ena = 0;
+		aq_req.aura_mask.ena = 1;
+		cnt = pfvf->aura_ctx->qsize;
+		bmap = pfvf->aura_bmap;
+	}
+
+	aq_req.ctype = req->ctype;
+	aq_req.op = NPA_AQ_INSTOP_WRITE;
+
+	for (id = 0; id < cnt; id++) {
+		if (!test_bit(id, bmap))
+			continue;
+		aq_req.aura_id = id;
+		rc = rvu_npa_aq_enq_inst(rvu, &aq_req, NULL);
+		if (rc) {
+			err = rc;
+			dev_err(rvu->dev, "Failed to disable %s:%d context\n",
+				(req->ctype == NPA_AQ_CTYPE_AURA) ?
+				"Aura" : "Pool", id);
+		}
+	}
+
+	return err;
+}
+
+int rvu_mbox_handler_NPA_AQ_ENQ(struct rvu *rvu,
+				struct npa_aq_enq_req *req,
+				struct npa_aq_enq_rsp *rsp)
+{
+	return rvu_npa_aq_enq_inst(rvu, req, rsp);
+}
+
+int rvu_mbox_handler_NPA_HWCTX_DISABLE(struct rvu *rvu,
+				       struct hwctx_disable_req *req,
+				       struct msg_rsp *rsp)
+{
+	return npa_lf_hwctx_disable(rvu, req);
+}
+
+static void npa_ctx_free(struct rvu *rvu, struct rvu_pfvf *pfvf)
+{
+	kfree(pfvf->aura_bmap);
+	pfvf->aura_bmap = NULL;
+
+	qmem_free(rvu->dev, pfvf->aura_ctx);
+	pfvf->aura_ctx = NULL;
+
+	kfree(pfvf->pool_bmap);
+	pfvf->pool_bmap = NULL;
+
+	qmem_free(rvu->dev, pfvf->pool_ctx);
+	pfvf->pool_ctx = NULL;
+
+	qmem_free(rvu->dev, pfvf->npa_qints_ctx);
+	pfvf->npa_qints_ctx = NULL;
+}
+
+int rvu_mbox_handler_NPA_LF_ALLOC(struct rvu *rvu,
+				  struct npa_lf_alloc_req *req,
+				  struct npa_lf_alloc_rsp *rsp)
+{
+	int npalf, qints, hwctx_size, err, rc = 0;
+	struct rvu_hwinfo *hw = rvu->hw;
+	u16 pcifunc = req->hdr.pcifunc;
+	struct rvu_block *block;
+	struct rvu_pfvf *pfvf;
+	u64 cfg, ctx_cfg;
+	int blkaddr;
+
+	if (req->aura_sz > NPA_AURA_SZ_MAX ||
+	    req->aura_sz == NPA_AURA_SZ_0 || !req->nr_pools)
+		return NPA_AF_ERR_PARAM;
+
+	pfvf = rvu_get_pfvf(rvu, pcifunc);
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPA, pcifunc);
+	if (!pfvf->npalf || blkaddr < 0)
+		return NPA_AF_ERR_AF_LF_INVALID;
+
+	block = &hw->block[blkaddr];
+	npalf = rvu_get_lf(rvu, block, pcifunc, 0);
+	if (npalf < 0)
+		return NPA_AF_ERR_AF_LF_INVALID;
+
+	/* Reset this NPA LF */
+	err = rvu_lf_reset(rvu, block, npalf);
+	if (err) {
+		dev_err(rvu->dev, "Failed to reset NPALF%d\n", npalf);
+		return NPA_AF_ERR_LF_RESET;
+	}
+
+	ctx_cfg = rvu_read64(rvu, blkaddr, NPA_AF_CONST1);
+
+	/* Alloc memory for aura HW contexts */
+	hwctx_size = 1UL << (ctx_cfg & 0xF);
+	err = qmem_alloc(rvu->dev, &pfvf->aura_ctx,
+			 NPA_AURA_COUNT(req->aura_sz), hwctx_size);
+	if (err)
+		goto free_mem;
+
+	pfvf->aura_bmap = kcalloc(NPA_AURA_COUNT(req->aura_sz), sizeof(long),
+				  GFP_KERNEL);
+	if (!pfvf->aura_bmap)
+		goto free_mem;
+
+	/* Alloc memory for pool HW contexts */
+	hwctx_size = 1UL << ((ctx_cfg >> 4) & 0xF);
+	err = qmem_alloc(rvu->dev, &pfvf->pool_ctx, req->nr_pools, hwctx_size);
+	if (err)
+		goto free_mem;
+
+	pfvf->pool_bmap = kcalloc(NPA_AURA_COUNT(req->aura_sz), sizeof(long),
+				  GFP_KERNEL);
+	if (!pfvf->pool_bmap)
+		goto free_mem;
+
+	/* Get no of queue interrupts supported */
+	cfg = rvu_read64(rvu, blkaddr, NPA_AF_CONST);
+	qints = (cfg >> 28) & 0xFFF;
+
+	/* Alloc memory for Qints HW contexts */
+	hwctx_size = 1UL << ((ctx_cfg >> 8) & 0xF);
+	err = qmem_alloc(rvu->dev, &pfvf->npa_qints_ctx, qints, hwctx_size);
+	if (err)
+		goto free_mem;
+
+	cfg = rvu_read64(rvu, blkaddr, NPA_AF_LFX_AURAS_CFG(npalf));
+	/* Clear way partition mask and set aura offset to '0' */
+	cfg &= ~(BIT_ULL(34) - 1);
+	/* Set aura size & enable caching of contexts */
+	cfg |= (req->aura_sz << 16) | BIT_ULL(34);
+	rvu_write64(rvu, blkaddr, NPA_AF_LFX_AURAS_CFG(npalf), cfg);
+
+	/* Configure aura HW context's base */
+	rvu_write64(rvu, blkaddr, NPA_AF_LFX_LOC_AURAS_BASE(npalf),
+		    (u64)pfvf->aura_ctx->iova);
+
+	/* Enable caching of qints hw context */
+	rvu_write64(rvu, blkaddr, NPA_AF_LFX_QINTS_CFG(npalf), BIT_ULL(36));
+	rvu_write64(rvu, blkaddr, NPA_AF_LFX_QINTS_BASE(npalf),
+		    (u64)pfvf->npa_qints_ctx->iova);
+
+	goto exit;
+
+free_mem:
+	npa_ctx_free(rvu, pfvf);
+	rc = -ENOMEM;
+
+exit:
+	/* set stack page info */
+	cfg = rvu_read64(rvu, blkaddr, NPA_AF_CONST);
+	rsp->stack_pg_ptrs = (cfg >> 8) & 0xFF;
+	rsp->stack_pg_bytes = cfg & 0xFF;
+	rsp->qints = (cfg >> 28) & 0xFFF;
+	return rc;
+}
+
+int rvu_mbox_handler_NPA_LF_FREE(struct rvu *rvu, struct msg_req *req,
+				 struct msg_rsp *rsp)
+{
+	struct rvu_hwinfo *hw = rvu->hw;
+	u16 pcifunc = req->hdr.pcifunc;
+	struct rvu_block *block;
+	struct rvu_pfvf *pfvf;
+	int npalf, err;
+	int blkaddr;
+
+	pfvf = rvu_get_pfvf(rvu, pcifunc);
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPA, pcifunc);
+	if (!pfvf->npalf || blkaddr < 0)
+		return NPA_AF_ERR_AF_LF_INVALID;
+
+	block = &hw->block[blkaddr];
+	npalf = rvu_get_lf(rvu, block, pcifunc, 0);
+	if (npalf < 0)
+		return NPA_AF_ERR_AF_LF_INVALID;
+
+	/* Reset this NPA LF */
+	err = rvu_lf_reset(rvu, block, npalf);
+	if (err) {
+		dev_err(rvu->dev, "Failed to reset NPALF%d\n", npalf);
+		return NPA_AF_ERR_LF_RESET;
+	}
+
+	npa_ctx_free(rvu, pfvf);
+
+	return 0;
+}
+
+static int npa_aq_init(struct rvu *rvu, struct rvu_block *block)
+{
+	u64 cfg;
+	int err;
+
+	/* Set admin queue endianness */
+	cfg = rvu_read64(rvu, block->addr, NPA_AF_GEN_CFG);
+#ifdef __BIG_ENDIAN
+	cfg |= BIT_ULL(1);
+	rvu_write64(rvu, block->addr, NPA_AF_GEN_CFG, cfg);
+#else
+	cfg &= ~BIT_ULL(1);
+	rvu_write64(rvu, block->addr, NPA_AF_GEN_CFG, cfg);
+#endif
+
+	/* Do not bypass NDC cache */
+	cfg = rvu_read64(rvu, block->addr, NPA_AF_NDC_CFG);
+	cfg &= ~0x03DULL;
+	rvu_write64(rvu, block->addr, NPA_AF_NDC_CFG, cfg);
+
+	/* Result structure can be followed by Aura/Pool context at
+	 * RES + 128bytes and a write mask at RES + 256 bytes, depending on
+	 * operation type. Alloc sufficient result memory for all operations.
+	 */
+	err = rvu_aq_alloc(rvu, &block->aq,
+			   Q_COUNT(AQ_SIZE), sizeof(struct npa_aq_inst_s),
+			   ALIGN(sizeof(struct npa_aq_res_s), 128) + 256);
+	if (err)
+		return err;
+
+	rvu_write64(rvu, block->addr, NPA_AF_AQ_CFG, AQ_SIZE);
+	rvu_write64(rvu, block->addr,
+		    NPA_AF_AQ_BASE, (u64)block->aq->inst->iova);
+	return 0;
+}
+
+int rvu_npa_init(struct rvu *rvu)
+{
+	struct rvu_hwinfo *hw = rvu->hw;
+	struct rvu_block *block;
+	int blkaddr, err;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPA, 0);
+	if (blkaddr < 0)
+		return 0;
+
+	block = &hw->block[blkaddr];
+
+	/* Initialize admin queue */
+	err = npa_aq_init(rvu, &hw->block[blkaddr]);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+void rvu_npa_freemem(struct rvu *rvu)
+{
+	struct rvu_hwinfo *hw = rvu->hw;
+	struct rvu_block *block;
+	int blkaddr;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPA, 0);
+	if (blkaddr < 0)
+		return;
+
+	block = &hw->block[blkaddr];
+	rvu_aq_free(rvu, block->aq);
+}
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h
index 92e05817ffe7..c331b237a26f 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h
@@ -71,4 +71,812 @@ enum rvu_pf_int_vec_e {
 	RVU_PF_INT_VEC_CNT	  = 0x7,
 };
 
+/* NPA admin queue completion enumeration */
+enum npa_aq_comp {
+	NPA_AQ_COMP_NOTDONE    = 0x0,
+	NPA_AQ_COMP_GOOD       = 0x1,
+	NPA_AQ_COMP_SWERR      = 0x2,
+	NPA_AQ_COMP_CTX_POISON = 0x3,
+	NPA_AQ_COMP_CTX_FAULT  = 0x4,
+	NPA_AQ_COMP_LOCKERR    = 0x5,
+};
+
+/* NPA admin queue context types */
+enum npa_aq_ctype {
+	NPA_AQ_CTYPE_AURA = 0x0,
+	NPA_AQ_CTYPE_POOL = 0x1,
+};
+
+/* NPA admin queue instruction opcodes */
+enum npa_aq_instop {
+	NPA_AQ_INSTOP_NOP    = 0x0,
+	NPA_AQ_INSTOP_INIT   = 0x1,
+	NPA_AQ_INSTOP_WRITE  = 0x2,
+	NPA_AQ_INSTOP_READ   = 0x3,
+	NPA_AQ_INSTOP_LOCK   = 0x4,
+	NPA_AQ_INSTOP_UNLOCK = 0x5,
+};
+
+/* NPA admin queue instruction structure */
+struct npa_aq_inst_s {
+#if defined(__BIG_ENDIAN_BITFIELD)
+	u64 doneint               : 1;	/* W0 */
+	u64 reserved_44_62        : 19;
+	u64 cindex                : 20;
+	u64 reserved_17_23        : 7;
+	u64 lf                    : 9;
+	u64 ctype                 : 4;
+	u64 op                    : 4;
+#else
+	u64 op                    : 4;
+	u64 ctype                 : 4;
+	u64 lf                    : 9;
+	u64 reserved_17_23        : 7;
+	u64 cindex                : 20;
+	u64 reserved_44_62        : 19;
+	u64 doneint               : 1;
+#endif
+	u64 res_addr;			/* W1 */
+};
+
+/* NPA admin queue result structure */
+struct npa_aq_res_s {
+#if defined(__BIG_ENDIAN_BITFIELD)
+	u64 reserved_17_63        : 47; /* W0 */
+	u64 doneint               : 1;
+	u64 compcode              : 8;
+	u64 ctype                 : 4;
+	u64 op                    : 4;
+#else
+	u64 op                    : 4;
+	u64 ctype                 : 4;
+	u64 compcode              : 8;
+	u64 doneint               : 1;
+	u64 reserved_17_63        : 47;
+#endif
+	u64 reserved_64_127;		/* W1 */
+};
+
+struct npa_aura_s {
+	u64 pool_addr;			/* W0 */
+#if defined(__BIG_ENDIAN_BITFIELD)	/* W1 */
+	u64 avg_level             : 8;
+	u64 reserved_118_119      : 2;
+	u64 shift                 : 6;
+	u64 aura_drop             : 8;
+	u64 reserved_98_103       : 6;
+	u64 bp_ena                : 2;
+	u64 aura_drop_ena         : 1;
+	u64 pool_drop_ena         : 1;
+	u64 reserved_93           : 1;
+	u64 avg_con               : 9;
+	u64 pool_way_mask         : 16;
+	u64 pool_caching          : 1;
+	u64 reserved_65           : 2;
+	u64 ena                   : 1;
+#else
+	u64 ena                   : 1;
+	u64 reserved_65           : 2;
+	u64 pool_caching          : 1;
+	u64 pool_way_mask         : 16;
+	u64 avg_con               : 9;
+	u64 reserved_93           : 1;
+	u64 pool_drop_ena         : 1;
+	u64 aura_drop_ena         : 1;
+	u64 bp_ena                : 2;
+	u64 reserved_98_103       : 6;
+	u64 aura_drop             : 8;
+	u64 shift                 : 6;
+	u64 reserved_118_119      : 2;
+	u64 avg_level             : 8;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD)	/* W2 */
+	u64 reserved_189_191      : 3;
+	u64 nix1_bpid             : 9;
+	u64 reserved_177_179      : 3;
+	u64 nix0_bpid             : 9;
+	u64 reserved_164_167      : 4;
+	u64 count                 : 36;
+#else
+	u64 count                 : 36;
+	u64 reserved_164_167      : 4;
+	u64 nix0_bpid             : 9;
+	u64 reserved_177_179      : 3;
+	u64 nix1_bpid             : 9;
+	u64 reserved_189_191      : 3;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD)	/* W3 */
+	u64 reserved_252_255      : 4;
+	u64 fc_hyst_bits          : 4;
+	u64 fc_stype              : 2;
+	u64 fc_up_crossing        : 1;
+	u64 fc_ena                : 1;
+	u64 reserved_240_243      : 4;
+	u64 bp                    : 8;
+	u64 reserved_228_231      : 4;
+	u64 limit                 : 36;
+#else
+	u64 limit                 : 36;
+	u64 reserved_228_231      : 4;
+	u64 bp                    : 8;
+	u64 reserved_240_243      : 4;
+	u64 fc_ena                : 1;
+	u64 fc_up_crossing        : 1;
+	u64 fc_stype              : 2;
+	u64 fc_hyst_bits          : 4;
+	u64 reserved_252_255      : 4;
+#endif
+	u64 fc_addr;			/* W4 */
+#if defined(__BIG_ENDIAN_BITFIELD)	/* W5 */
+	u64 reserved_379_383      : 5;
+	u64 err_qint_idx          : 7;
+	u64 reserved_371          : 1;
+	u64 thresh_qint_idx       : 7;
+	u64 reserved_363          : 1;
+	u64 thresh_up             : 1;
+	u64 thresh_int_ena        : 1;
+	u64 thresh_int            : 1;
+	u64 err_int_ena           : 8;
+	u64 err_int               : 8;
+	u64 update_time           : 16;
+	u64 pool_drop             : 8;
+#else
+	u64 pool_drop             : 8;
+	u64 update_time           : 16;
+	u64 err_int               : 8;
+	u64 err_int_ena           : 8;
+	u64 thresh_int            : 1;
+	u64 thresh_int_ena        : 1;
+	u64 thresh_up             : 1;
+	u64 reserved_363          : 1;
+	u64 thresh_qint_idx       : 7;
+	u64 reserved_371          : 1;
+	u64 err_qint_idx          : 7;
+	u64 reserved_379_383      : 5;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD)	/* W6 */
+	u64 reserved_420_447      : 28;
+	u64 thresh                : 36;
+#else
+	u64 thresh                : 36;
+	u64 reserved_420_447      : 28;
+#endif
+	u64 reserved_448_511;		/* W7 */
+};
+
+struct npa_pool_s {
+	u64 stack_base;			/* W0 */
+#if defined(__BIG_ENDIAN_BITFIELD)	/* W1 */
+	u64 reserved_115_127      : 13;
+	u64 buf_size              : 11;
+	u64 reserved_100_103      : 4;
+	u64 buf_offset            : 12;
+	u64 stack_way_mask        : 16;
+	u64 reserved_70_71        : 3;
+	u64 stack_caching         : 1;
+	u64 reserved_66_67        : 2;
+	u64 nat_align             : 1;
+	u64 ena                   : 1;
+#else
+	u64 ena                   : 1;
+	u64 nat_align             : 1;
+	u64 reserved_66_67        : 2;
+	u64 stack_caching         : 1;
+	u64 reserved_70_71        : 3;
+	u64 stack_way_mask        : 16;
+	u64 buf_offset            : 12;
+	u64 reserved_100_103      : 4;
+	u64 buf_size              : 11;
+	u64 reserved_115_127      : 13;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD)	/* W2 */
+	u64 stack_pages           : 32;
+	u64 stack_max_pages       : 32;
+#else
+	u64 stack_max_pages       : 32;
+	u64 stack_pages           : 32;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD)	/* W3 */
+	u64 reserved_240_255      : 16;
+	u64 op_pc                 : 48;
+#else
+	u64 op_pc                 : 48;
+	u64 reserved_240_255      : 16;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD)	/* W4 */
+	u64 reserved_316_319      : 4;
+	u64 update_time           : 16;
+	u64 reserved_297_299      : 3;
+	u64 fc_up_crossing        : 1;
+	u64 fc_hyst_bits          : 4;
+	u64 fc_stype              : 2;
+	u64 fc_ena                : 1;
+	u64 avg_con               : 9;
+	u64 avg_level             : 8;
+	u64 reserved_270_271      : 2;
+	u64 shift                 : 6;
+	u64 reserved_260_263      : 4;
+	u64 stack_offset          : 4;
+#else
+	u64 stack_offset          : 4;
+	u64 reserved_260_263      : 4;
+	u64 shift                 : 6;
+	u64 reserved_270_271      : 2;
+	u64 avg_level             : 8;
+	u64 avg_con               : 9;
+	u64 fc_ena                : 1;
+	u64 fc_stype              : 2;
+	u64 fc_hyst_bits          : 4;
+	u64 fc_up_crossing        : 1;
+	u64 reserved_297_299      : 3;
+	u64 update_time           : 16;
+	u64 reserved_316_319      : 4;
+#endif
+	u64 fc_addr;			/* W5 */
+	u64 ptr_start;			/* W6 */
+	u64 ptr_end;			/* W7 */
+#if defined(__BIG_ENDIAN_BITFIELD)	/* W8 */
+	u64 reserved_571_575      : 5;
+	u64 err_qint_idx          : 7;
+	u64 reserved_563          : 1;
+	u64 thresh_qint_idx       : 7;
+	u64 reserved_555          : 1;
+	u64 thresh_up             : 1;
+	u64 thresh_int_ena        : 1;
+	u64 thresh_int            : 1;
+	u64 err_int_ena           : 8;
+	u64 err_int               : 8;
+	u64 reserved_512_535      : 24;
+#else
+	u64 reserved_512_535      : 24;
+	u64 err_int               : 8;
+	u64 err_int_ena           : 8;
+	u64 thresh_int            : 1;
+	u64 thresh_int_ena        : 1;
+	u64 thresh_up             : 1;
+	u64 reserved_555          : 1;
+	u64 thresh_qint_idx       : 7;
+	u64 reserved_563          : 1;
+	u64 err_qint_idx          : 7;
+	u64 reserved_571_575      : 5;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD)	/* W9 */
+	u64 reserved_612_639      : 28;
+	u64 thresh                : 36;
+#else
+	u64 thresh                : 36;
+	u64 reserved_612_639      : 28;
+#endif
+	u64 reserved_640_703;		/* W10 */
+	u64 reserved_704_767;		/* W11 */
+	u64 reserved_768_831;		/* W12 */
+	u64 reserved_832_895;		/* W13 */
+	u64 reserved_896_959;		/* W14 */
+	u64 reserved_960_1023;		/* W15 */
+};
+
+/* NIX admin queue completion status */
+enum nix_aq_comp {
+	NIX_AQ_COMP_NOTDONE        = 0x0,
+	NIX_AQ_COMP_GOOD           = 0x1,
+	NIX_AQ_COMP_SWERR          = 0x2,
+	NIX_AQ_COMP_CTX_POISON     = 0x3,
+	NIX_AQ_COMP_CTX_FAULT      = 0x4,
+	NIX_AQ_COMP_LOCKERR        = 0x5,
+	NIX_AQ_COMP_SQB_ALLOC_FAIL = 0x6,
+};
+
+/* NIX admin queue context types */
+enum nix_aq_ctype {
+	NIX_AQ_CTYPE_RQ   = 0x0,
+	NIX_AQ_CTYPE_SQ   = 0x1,
+	NIX_AQ_CTYPE_CQ   = 0x2,
+	NIX_AQ_CTYPE_MCE  = 0x3,
+	NIX_AQ_CTYPE_RSS  = 0x4,
+	NIX_AQ_CTYPE_DYNO = 0x5,
+};
+
+/* NIX admin queue instruction opcodes */
+enum nix_aq_instop {
+	NIX_AQ_INSTOP_NOP    = 0x0,
+	NIX_AQ_INSTOP_INIT   = 0x1,
+	NIX_AQ_INSTOP_WRITE  = 0x2,
+	NIX_AQ_INSTOP_READ   = 0x3,
+	NIX_AQ_INSTOP_LOCK   = 0x4,
+	NIX_AQ_INSTOP_UNLOCK = 0x5,
+};
+
+/* NIX admin queue instruction structure */
+struct nix_aq_inst_s {
+#if defined(__BIG_ENDIAN_BITFIELD)
+	u64 doneint		: 1;	/* W0 */
+	u64 reserved_44_62	: 19;
+	u64 cindex		: 20;
+	u64 reserved_15_23	: 9;
+	u64 lf			: 7;
+	u64 ctype		: 4;
+	u64 op			: 4;
+#else
+	u64 op			: 4;
+	u64 ctype		: 4;
+	u64 lf			: 7;
+	u64 reserved_15_23	: 9;
+	u64 cindex		: 20;
+	u64 reserved_44_62	: 19;
+	u64 doneint		: 1;
+#endif
+	u64 res_addr;			/* W1 */
+};
+
+/* NIX admin queue result structure */
+struct nix_aq_res_s {
+#if defined(__BIG_ENDIAN_BITFIELD)
+	u64 reserved_17_63	: 47;	/* W0 */
+	u64 doneint		: 1;
+	u64 compcode		: 8;
+	u64 ctype		: 4;
+	u64 op			: 4;
+#else
+	u64 op			: 4;
+	u64 ctype		: 4;
+	u64 compcode		: 8;
+	u64 doneint		: 1;
+	u64 reserved_17_63	: 47;
+#endif
+	u64 reserved_64_127;		/* W1 */
+};
+
+/* NIX Completion queue context structure */
+struct nix_cq_ctx_s {
+	u64 base;
+#if defined(__BIG_ENDIAN_BITFIELD)	/* W1 */
+	u64 wrptr		: 20;
+	u64 avg_con		: 9;
+	u64 cint_idx		: 7;
+	u64 cq_err		: 1;
+	u64 qint_idx		: 7;
+	u64 rsvd_81_83		: 3;
+	u64 bpid		: 9;
+	u64 rsvd_69_71		: 3;
+	u64 bp_ena		: 1;
+	u64 rsvd_64_67		: 4;
+#else
+	u64 rsvd_64_67		: 4;
+	u64 bp_ena		: 1;
+	u64 rsvd_69_71		: 3;
+	u64 bpid		: 9;
+	u64 rsvd_81_83		: 3;
+	u64 qint_idx		: 7;
+	u64 cq_err		: 1;
+	u64 cint_idx		: 7;
+	u64 avg_con		: 9;
+	u64 wrptr		: 20;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD)  /* W2 */
+	u64 update_time		: 16;
+	u64 avg_level		: 8;
+	u64 head		: 20;
+	u64 tail		: 20;
+#else
+	u64 tail		: 20;
+	u64 head		: 20;
+	u64 avg_level		: 8;
+	u64 update_time		: 16;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD)  /* W3 */
+	u64 cq_err_int_ena	: 8;
+	u64 cq_err_int		: 8;
+	u64 qsize		: 4;
+	u64 rsvd_233_235	: 3;
+	u64 caching		: 1;
+	u64 substream		: 20;
+	u64 rsvd_210_211	: 2;
+	u64 ena			: 1;
+	u64 drop_ena		: 1;
+	u64 drop		: 8;
+	u64 dp			: 8;
+#else
+	u64 dp			: 8;
+	u64 drop		: 8;
+	u64 drop_ena		: 1;
+	u64 ena			: 1;
+	u64 rsvd_210_211	: 2;
+	u64 substream		: 20;
+	u64 caching		: 1;
+	u64 rsvd_233_235	: 3;
+	u64 qsize		: 4;
+	u64 cq_err_int		: 8;
+	u64 cq_err_int_ena	: 8;
+#endif
+};
+
+/* NIX Receive queue context structure */
+struct nix_rq_ctx_s {
+#if defined(__BIG_ENDIAN_BITFIELD)  /* W0 */
+	u64 wqe_aura      : 20;
+	u64 substream     : 20;
+	u64 cq            : 20;
+	u64 ena_wqwd      : 1;
+	u64 ipsech_ena    : 1;
+	u64 sso_ena       : 1;
+	u64 ena           : 1;
+#else
+	u64 ena           : 1;
+	u64 sso_ena       : 1;
+	u64 ipsech_ena    : 1;
+	u64 ena_wqwd      : 1;
+	u64 cq            : 20;
+	u64 substream     : 20;
+	u64 wqe_aura      : 20;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD)  /* W1 */
+	u64 rsvd_127_122  : 6;
+	u64 lpb_drop_ena  : 1;
+	u64 spb_drop_ena  : 1;
+	u64 xqe_drop_ena  : 1;
+	u64 wqe_caching   : 1;
+	u64 pb_caching    : 2;
+	u64 sso_tt        : 2;
+	u64 sso_grp       : 10;
+	u64 lpb_aura      : 20;
+	u64 spb_aura      : 20;
+#else
+	u64 spb_aura      : 20;
+	u64 lpb_aura      : 20;
+	u64 sso_grp       : 10;
+	u64 sso_tt        : 2;
+	u64 pb_caching    : 2;
+	u64 wqe_caching   : 1;
+	u64 xqe_drop_ena  : 1;
+	u64 spb_drop_ena  : 1;
+	u64 lpb_drop_ena  : 1;
+	u64 rsvd_127_122  : 6;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD)  /* W2 */
+	u64 xqe_hdr_split : 1;
+	u64 xqe_imm_copy  : 1;
+	u64 rsvd_189_184  : 6;
+	u64 xqe_imm_size  : 6;
+	u64 later_skip    : 6;
+	u64 rsvd_171      : 1;
+	u64 first_skip    : 7;
+	u64 lpb_sizem1    : 12;
+	u64 spb_ena       : 1;
+	u64 rsvd_150_148  : 3;
+	u64 wqe_skip      : 2;
+	u64 spb_sizem1    : 6;
+	u64 rsvd_139_128  : 12;
+#else
+	u64 rsvd_139_128  : 12;
+	u64 spb_sizem1    : 6;
+	u64 wqe_skip      : 2;
+	u64 rsvd_150_148  : 3;
+	u64 spb_ena       : 1;
+	u64 lpb_sizem1    : 12;
+	u64 first_skip    : 7;
+	u64 rsvd_171      : 1;
+	u64 later_skip    : 6;
+	u64 xqe_imm_size  : 6;
+	u64 rsvd_189_184  : 6;
+	u64 xqe_imm_copy  : 1;
+	u64 xqe_hdr_split : 1;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD)  /* W3 */
+	u64 spb_pool_pass : 8;
+	u64 spb_pool_drop : 8;
+	u64 spb_aura_pass : 8;
+	u64 spb_aura_drop : 8;
+	u64 wqe_pool_pass : 8;
+	u64 wqe_pool_drop : 8;
+	u64 xqe_pass      : 8;
+	u64 xqe_drop      : 8;
+#else
+	u64 xqe_drop      : 8;
+	u64 xqe_pass      : 8;
+	u64 wqe_pool_drop : 8;
+	u64 wqe_pool_pass : 8;
+	u64 spb_aura_drop : 8;
+	u64 spb_aura_pass : 8;
+	u64 spb_pool_drop : 8;
+	u64 spb_pool_pass : 8;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD)  /* W4 */
+	u64 rsvd_319_315  : 5;
+	u64 qint_idx      : 7;
+	u64 rq_int_ena    : 8;
+	u64 rq_int        : 8;
+	u64 rsvd_291_288  : 4;
+	u64 lpb_pool_pass : 8;
+	u64 lpb_pool_drop : 8;
+	u64 lpb_aura_pass : 8;
+	u64 lpb_aura_drop : 8;
+#else
+	u64 lpb_aura_drop : 8;
+	u64 lpb_aura_pass : 8;
+	u64 lpb_pool_drop : 8;
+	u64 lpb_pool_pass : 8;
+	u64 rsvd_291_288  : 4;
+	u64 rq_int        : 8;
+	u64 rq_int_ena    : 8;
+	u64 qint_idx      : 7;
+	u64 rsvd_319_315  : 5;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD)  /* W5 */
+	u64 rsvd_383_366  : 18;
+	u64 flow_tagw     : 6;
+	u64 bad_utag      : 8;
+	u64 good_utag     : 8;
+	u64 ltag          : 24;
+#else
+	u64 ltag          : 24;
+	u64 good_utag     : 8;
+	u64 bad_utag      : 8;
+	u64 flow_tagw     : 6;
+	u64 rsvd_383_366  : 18;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD)  /* W6 */
+	u64 rsvd_447_432  : 16;
+	u64 octs          : 48;
+#else
+	u64 octs          : 48;
+	u64 rsvd_447_432  : 16;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD)  /* W7 */
+	u64 rsvd_511_496  : 16;
+	u64 pkts          : 48;
+#else
+	u64 pkts          : 48;
+	u64 rsvd_511_496  : 16;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD)  /* W8 */
+	u64 rsvd_575_560  : 16;
+	u64 drop_octs     : 48;
+#else
+	u64 drop_octs     : 48;
+	u64 rsvd_575_560  : 16;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD)	/* W9 */
+	u64 rsvd_639_624  : 16;
+	u64 drop_pkts     : 48;
+#else
+	u64 drop_pkts     : 48;
+	u64 rsvd_639_624  : 16;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD)	/* W10 */
+	u64 rsvd_703_688  : 16;
+	u64 re_pkts       : 48;
+#else
+	u64 re_pkts       : 48;
+	u64 rsvd_703_688  : 16;
+#endif
+	u64 rsvd_767_704;		/* W11 */
+	u64 rsvd_831_768;		/* W12 */
+	u64 rsvd_895_832;		/* W13 */
+	u64 rsvd_959_896;		/* W14 */
+	u64 rsvd_1023_960;		/* W15 */
+};
+
+/* NIX sqe sizes */
+enum nix_maxsqesz {
+	NIX_MAXSQESZ_W16 = 0x0,
+	NIX_MAXSQESZ_W8  = 0x1,
+};
+
+/* NIX SQB caching type */
+enum nix_stype {
+	NIX_STYPE_STF = 0x0,
+	NIX_STYPE_STT = 0x1,
+	NIX_STYPE_STP = 0x2,
+};
+
+/* NIX Send queue context structure */
+struct nix_sq_ctx_s {
+#if defined(__BIG_ENDIAN_BITFIELD)  /* W0 */
+	u64 sqe_way_mask          : 16;
+	u64 cq                    : 20;
+	u64 sdp_mcast             : 1;
+	u64 substream             : 20;
+	u64 qint_idx              : 6;
+	u64 ena                   : 1;
+#else
+	u64 ena                   : 1;
+	u64 qint_idx              : 6;
+	u64 substream             : 20;
+	u64 sdp_mcast             : 1;
+	u64 cq                    : 20;
+	u64 sqe_way_mask          : 16;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD)  /* W1 */
+	u64 sqb_count             : 16;
+	u64 default_chan          : 12;
+	u64 smq_rr_quantum        : 24;
+	u64 sso_ena               : 1;
+	u64 xoff                  : 1;
+	u64 cq_ena                : 1;
+	u64 smq                   : 9;
+#else
+	u64 smq                   : 9;
+	u64 cq_ena                : 1;
+	u64 xoff                  : 1;
+	u64 sso_ena               : 1;
+	u64 smq_rr_quantum        : 24;
+	u64 default_chan          : 12;
+	u64 sqb_count             : 16;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD)  /* W2 */
+	u64 rsvd_191              : 1;
+	u64 sqe_stype             : 2;
+	u64 sq_int_ena            : 8;
+	u64 sq_int                : 8;
+	u64 sqb_aura              : 20;
+	u64 smq_rr_count          : 25;
+#else
+	u64 smq_rr_count          : 25;
+	u64 sqb_aura              : 20;
+	u64 sq_int                : 8;
+	u64 sq_int_ena            : 8;
+	u64 sqe_stype             : 2;
+	u64 rsvd_191              : 1;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD)  /* W3 */
+	u64 rsvd_255_253          : 3;
+	u64 smq_next_sq_vld       : 1;
+	u64 smq_pend              : 1;
+	u64 smenq_next_sqb_vld    : 1;
+	u64 head_offset           : 6;
+	u64 smenq_offset          : 6;
+	u64 tail_offset           : 6;
+	u64 smq_lso_segnum        : 8;
+	u64 smq_next_sq           : 20;
+	u64 mnq_dis               : 1;
+	u64 lmt_dis               : 1;
+	u64 cq_limit              : 8;
+	u64 max_sqe_size          : 2;
+#else
+	u64 max_sqe_size          : 2;
+	u64 cq_limit              : 8;
+	u64 lmt_dis               : 1;
+	u64 mnq_dis               : 1;
+	u64 smq_next_sq           : 20;
+	u64 smq_lso_segnum        : 8;
+	u64 tail_offset           : 6;
+	u64 smenq_offset          : 6;
+	u64 head_offset           : 6;
+	u64 smenq_next_sqb_vld    : 1;
+	u64 smq_pend              : 1;
+	u64 smq_next_sq_vld       : 1;
+	u64 rsvd_255_253          : 3;
+#endif
+	u64 next_sqb              : 64;/* W4 */
+	u64 tail_sqb              : 64;/* W5 */
+	u64 smenq_sqb             : 64;/* W6 */
+	u64 smenq_next_sqb        : 64;/* W7 */
+	u64 head_sqb              : 64;/* W8 */
+#if defined(__BIG_ENDIAN_BITFIELD)  /* W9 */
+	u64 rsvd_639_630          : 10;
+	u64 vfi_lso_vld           : 1;
+	u64 vfi_lso_vlan1_ins_ena : 1;
+	u64 vfi_lso_vlan0_ins_ena : 1;
+	u64 vfi_lso_mps           : 14;
+	u64 vfi_lso_sb            : 8;
+	u64 vfi_lso_sizem1        : 3;
+	u64 vfi_lso_total         : 18;
+	u64 rsvd_583_576          : 8;
+#else
+	u64 rsvd_583_576          : 8;
+	u64 vfi_lso_total         : 18;
+	u64 vfi_lso_sizem1        : 3;
+	u64 vfi_lso_sb            : 8;
+	u64 vfi_lso_mps           : 14;
+	u64 vfi_lso_vlan0_ins_ena : 1;
+	u64 vfi_lso_vlan1_ins_ena : 1;
+	u64 vfi_lso_vld           : 1;
+	u64 rsvd_639_630          : 10;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD) /* W10 */
+	u64 rsvd_703_658          : 46;
+	u64 scm_lso_rem           : 18;
+#else
+	u64 scm_lso_rem           : 18;
+	u64 rsvd_703_658          : 46;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD) /* W11 */
+	u64 rsvd_767_752          : 16;
+	u64 octs                  : 48;
+#else
+	u64 octs                  : 48;
+	u64 rsvd_767_752          : 16;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD) /* W12 */
+	u64 rsvd_831_816          : 16;
+	u64 pkts                  : 48;
+#else
+	u64 pkts                  : 48;
+	u64 rsvd_831_816          : 16;
+#endif
+	u64 rsvd_895_832          : 64;/* W13 */
+#if defined(__BIG_ENDIAN_BITFIELD) /* W14 */
+	u64 rsvd_959_944          : 16;
+	u64 dropped_octs          : 48;
+#else
+	u64 dropped_octs          : 48;
+	u64 rsvd_959_944          : 16;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD) /* W15 */
+	u64 rsvd_1023_1008        : 16;
+	u64 dropped_pkts          : 48;
+#else
+	u64 dropped_pkts          : 48;
+	u64 rsvd_1023_1008        : 16;
+#endif
+};
+
+/* NIX Receive side scaling entry structure*/
+struct nix_rsse_s {
+#if defined(__BIG_ENDIAN_BITFIELD)
+	uint32_t reserved_20_31		: 12;
+	uint32_t rq			: 20;
+#else
+	uint32_t rq			: 20;
+	uint32_t reserved_20_31		: 12;
+
+#endif
+};
+
+/* NIX receive multicast/mirror entry structure */
+struct nix_rx_mce_s {
+#if defined(__BIG_ENDIAN_BITFIELD)  /* W0 */
+	uint64_t next       : 16;
+	uint64_t pf_func    : 16;
+	uint64_t rsvd_31_24 : 8;
+	uint64_t index      : 20;
+	uint64_t eol        : 1;
+	uint64_t rsvd_2     : 1;
+	uint64_t op         : 2;
+#else
+	uint64_t op         : 2;
+	uint64_t rsvd_2     : 1;
+	uint64_t eol        : 1;
+	uint64_t index      : 20;
+	uint64_t rsvd_31_24 : 8;
+	uint64_t pf_func    : 16;
+	uint64_t next       : 16;
+#endif
+};
+
+enum nix_lsoalg {
+	NIX_LSOALG_NOP,
+	NIX_LSOALG_ADD_SEGNUM,
+	NIX_LSOALG_ADD_PAYLEN,
+	NIX_LSOALG_ADD_OFFSET,
+	NIX_LSOALG_TCP_FLAGS,
+};
+
+enum nix_txlayer {
+	NIX_TXLAYER_OL3,
+	NIX_TXLAYER_OL4,
+	NIX_TXLAYER_IL3,
+	NIX_TXLAYER_IL4,
+};
+
+struct nix_lso_format {
+#if defined(__BIG_ENDIAN_BITFIELD)
+	u64 rsvd_19_63		: 45;
+	u64 alg			: 3;
+	u64 rsvd_14_15		: 2;
+	u64 sizem1		: 2;
+	u64 rsvd_10_11		: 2;
+	u64 layer		: 2;
+	u64 offset		: 8;
+#else
+	u64 offset		: 8;
+	u64 layer		: 2;
+	u64 rsvd_10_11		: 2;
+	u64 sizem1		: 2;
+	u64 rsvd_14_15		: 2;
+	u64 alg			: 3;
+	u64 rsvd_19_63		: 45;
+#endif
+};
+
 #endif /* RVU_STRUCT_H */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index a53736c26c0c..a5a0823e5ada 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -308,10 +308,11 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 	case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
 	case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
 	case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT:
-	case MLX5_CMD_OP_DEALLOC_ENCAP_HEADER:
+	case MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT:
 	case MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT:
 	case MLX5_CMD_OP_FPGA_DESTROY_QP:
 	case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT:
+	case MLX5_CMD_OP_DEALLOC_MEMIC:
 		return MLX5_CMD_STAT_OK;
 
 	case MLX5_CMD_OP_QUERY_HCA_CAP:
@@ -426,7 +427,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 	case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
 	case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
 	case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
-	case MLX5_CMD_OP_ALLOC_ENCAP_HEADER:
+	case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT:
 	case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
 	case MLX5_CMD_OP_FPGA_CREATE_QP:
 	case MLX5_CMD_OP_FPGA_MODIFY_QP:
@@ -435,6 +436,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 	case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
 	case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT:
 	case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
+	case MLX5_CMD_OP_ALLOC_MEMIC:
 		*status = MLX5_DRIVER_STATUS_ABORTED;
 		*synd = MLX5_DRIVER_SYND;
 		return -EIO;
@@ -599,8 +601,8 @@ const char *mlx5_command_str(int command)
 	MLX5_COMMAND_STR_CASE(DEALLOC_FLOW_COUNTER);
 	MLX5_COMMAND_STR_CASE(QUERY_FLOW_COUNTER);
 	MLX5_COMMAND_STR_CASE(MODIFY_FLOW_TABLE);
-	MLX5_COMMAND_STR_CASE(ALLOC_ENCAP_HEADER);
-	MLX5_COMMAND_STR_CASE(DEALLOC_ENCAP_HEADER);
+	MLX5_COMMAND_STR_CASE(ALLOC_PACKET_REFORMAT_CONTEXT);
+	MLX5_COMMAND_STR_CASE(DEALLOC_PACKET_REFORMAT_CONTEXT);
 	MLX5_COMMAND_STR_CASE(ALLOC_MODIFY_HEADER_CONTEXT);
 	MLX5_COMMAND_STR_CASE(DEALLOC_MODIFY_HEADER_CONTEXT);
 	MLX5_COMMAND_STR_CASE(FPGA_CREATE_QP);
@@ -617,6 +619,8 @@ const char *mlx5_command_str(int command)
 	MLX5_COMMAND_STR_CASE(MODIFY_GENERAL_OBJECT);
 	MLX5_COMMAND_STR_CASE(QUERY_GENERAL_OBJECT);
 	MLX5_COMMAND_STR_CASE(QUERY_MODIFY_HEADER_CONTEXT);
+	MLX5_COMMAND_STR_CASE(ALLOC_MEMIC);
+	MLX5_COMMAND_STR_CASE(DEALLOC_MEMIC);
 	default: return "unknown command opcode";
 	}
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
index a4179122a279..4b85abb5c9f7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
@@ -109,6 +109,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
 	cq->cons_index = 0;
 	cq->arm_sn     = 0;
 	cq->eq         = eq;
+	cq->uid = MLX5_GET(create_cq_in, in, uid);
 	refcount_set(&cq->refcount, 1);
 	init_completion(&cq->free);
 	if (!cq->comp)
@@ -144,6 +145,7 @@ err_cmd:
 	memset(dout, 0, sizeof(dout));
 	MLX5_SET(destroy_cq_in, din, opcode, MLX5_CMD_OP_DESTROY_CQ);
 	MLX5_SET(destroy_cq_in, din, cqn, cq->cqn);
+	MLX5_SET(destroy_cq_in, din, uid, cq->uid);
 	mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout));
 	return err;
 }
@@ -165,6 +167,7 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
 
 	MLX5_SET(destroy_cq_in, in, opcode, MLX5_CMD_OP_DESTROY_CQ);
 	MLX5_SET(destroy_cq_in, in, cqn, cq->cqn);
+	MLX5_SET(destroy_cq_in, in, uid, cq->uid);
 	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 	if (err)
 		return err;
@@ -196,6 +199,7 @@ int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
 	u32 out[MLX5_ST_SZ_DW(modify_cq_out)] = {0};
 
 	MLX5_SET(modify_cq_in, in, opcode, MLX5_CMD_OP_MODIFY_CQ);
+	MLX5_SET(modify_cq_in, in, uid, cq->uid);
 	return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 }
 EXPORT_SYMBOL(mlx5_core_modify_cq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
index 0240aee9189e..d027ce00c8ce 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
@@ -133,7 +133,7 @@ TRACE_EVENT(mlx5_fs_del_fg,
 	{MLX5_FLOW_CONTEXT_ACTION_DROP,		 "DROP"},\
 	{MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,	 "FWD"},\
 	{MLX5_FLOW_CONTEXT_ACTION_COUNT,	 "CNT"},\
-	{MLX5_FLOW_CONTEXT_ACTION_ENCAP,	 "ENCAP"},\
+	{MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT, "REFORMAT"},\
 	{MLX5_FLOW_CONTEXT_ACTION_DECAP,	 "DECAP"},\
 	{MLX5_FLOW_CONTEXT_ACTION_MOD_HDR,	 "MOD_HDR"},\
 	{MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH,	 "VLAN_PUSH"},\
@@ -252,10 +252,10 @@ TRACE_EVENT(mlx5_fs_add_rule,
 			   memcpy(__entry->destination,
 				  &rule->dest_attr,
 				  sizeof(__entry->destination));
-			   if (rule->dest_attr.type & MLX5_FLOW_DESTINATION_TYPE_COUNTER &&
-			       rule->dest_attr.counter)
+			   if (rule->dest_attr.type &
+			       MLX5_FLOW_DESTINATION_TYPE_COUNTER)
 				__entry->counter_id =
-				rule->dest_attr.counter->id;
+					rule->dest_attr.counter_id;
 	    ),
 	    TP_printk("rule=%p fte=%p index=%u sw_action=<%s> [dst] %s\n",
 		      __entry->rule, __entry->fte, __entry->index,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
index 24e3b564964f..023dc4bccd28 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
@@ -235,3 +235,211 @@ out:
 	kfree(out);
 	return err;
 }
+
+static u32 fec_supported_speeds[] = {
+	10000,
+	40000,
+	25000,
+	50000,
+	56000,
+	100000
+};
+
+#define MLX5E_FEC_SUPPORTED_SPEEDS ARRAY_SIZE(fec_supported_speeds)
+
+/* get/set FEC admin field for a given speed */
+static int mlx5e_fec_admin_field(u32 *pplm,
+				 u8 *fec_policy,
+				 bool write,
+				 u32 speed)
+{
+	switch (speed) {
+	case 10000:
+	case 40000:
+		if (!write)
+			*fec_policy = MLX5_GET(pplm_reg, pplm,
+					       fec_override_cap_10g_40g);
+		else
+			MLX5_SET(pplm_reg, pplm,
+				 fec_override_admin_10g_40g, *fec_policy);
+		break;
+	case 25000:
+		if (!write)
+			*fec_policy = MLX5_GET(pplm_reg, pplm,
+					       fec_override_admin_25g);
+		else
+			MLX5_SET(pplm_reg, pplm,
+				 fec_override_admin_25g, *fec_policy);
+		break;
+	case 50000:
+		if (!write)
+			*fec_policy = MLX5_GET(pplm_reg, pplm,
+					       fec_override_admin_50g);
+		else
+			MLX5_SET(pplm_reg, pplm,
+				 fec_override_admin_50g, *fec_policy);
+		break;
+	case 56000:
+		if (!write)
+			*fec_policy = MLX5_GET(pplm_reg, pplm,
+					       fec_override_admin_56g);
+		else
+			MLX5_SET(pplm_reg, pplm,
+				 fec_override_admin_56g, *fec_policy);
+		break;
+	case 100000:
+		if (!write)
+			*fec_policy = MLX5_GET(pplm_reg, pplm,
+					       fec_override_admin_100g);
+		else
+			MLX5_SET(pplm_reg, pplm,
+				 fec_override_admin_100g, *fec_policy);
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/* returns FEC capabilities for a given speed */
+static int mlx5e_get_fec_cap_field(u32 *pplm,
+				   u8 *fec_cap,
+				   u32 speed)
+{
+	switch (speed) {
+	case 10000:
+	case 40000:
+		*fec_cap = MLX5_GET(pplm_reg, pplm,
+				    fec_override_admin_10g_40g);
+		break;
+	case 25000:
+		*fec_cap = MLX5_GET(pplm_reg, pplm,
+				    fec_override_cap_25g);
+		break;
+	case 50000:
+		*fec_cap = MLX5_GET(pplm_reg, pplm,
+				    fec_override_cap_50g);
+		break;
+	case 56000:
+		*fec_cap = MLX5_GET(pplm_reg, pplm,
+				    fec_override_cap_56g);
+		break;
+	case 100000:
+		*fec_cap = MLX5_GET(pplm_reg, pplm,
+				    fec_override_cap_100g);
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+int mlx5e_get_fec_caps(struct mlx5_core_dev *dev, u8 *fec_caps)
+{
+	u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {};
+	u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {};
+	int sz = MLX5_ST_SZ_BYTES(pplm_reg);
+	u32 current_fec_speed;
+	int err;
+
+	if (!MLX5_CAP_GEN(dev, pcam_reg))
+		return -EOPNOTSUPP;
+
+	if (!MLX5_CAP_PCAM_REG(dev, pplm))
+		return -EOPNOTSUPP;
+
+	MLX5_SET(pplm_reg, in, local_port, 1);
+	err =  mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0);
+	if (err)
+		return err;
+
+	err = mlx5e_port_linkspeed(dev, &current_fec_speed);
+	if (err)
+		return err;
+
+	return mlx5e_get_fec_cap_field(out, fec_caps, current_fec_speed);
+}
+
+int mlx5e_get_fec_mode(struct mlx5_core_dev *dev, u32 *fec_mode_active,
+		       u8 *fec_configured_mode)
+{
+	u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {};
+	u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {};
+	int sz = MLX5_ST_SZ_BYTES(pplm_reg);
+	u32 link_speed;
+	int err;
+
+	if (!MLX5_CAP_GEN(dev, pcam_reg))
+		return -EOPNOTSUPP;
+
+	if (!MLX5_CAP_PCAM_REG(dev, pplm))
+		return -EOPNOTSUPP;
+
+	MLX5_SET(pplm_reg, in, local_port, 1);
+	err =  mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0);
+	if (err)
+		return err;
+
+	*fec_mode_active = MLX5_GET(pplm_reg, out, fec_mode_active);
+
+	if (!fec_configured_mode)
+		return 0;
+
+	err = mlx5e_port_linkspeed(dev, &link_speed);
+	if (err)
+		return err;
+
+	return mlx5e_fec_admin_field(out, fec_configured_mode, 0, link_speed);
+}
+
+int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u8 fec_policy)
+{
+	bool fec_mode_not_supp_in_speed = false;
+	u8 no_fec_policy = BIT(MLX5E_FEC_NOFEC);
+	u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {};
+	u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {};
+	int sz = MLX5_ST_SZ_BYTES(pplm_reg);
+	u32 current_fec_speed;
+	u8 fec_caps = 0;
+	int err;
+	int i;
+
+	if (!MLX5_CAP_GEN(dev, pcam_reg))
+		return -EOPNOTSUPP;
+
+	if (!MLX5_CAP_PCAM_REG(dev, pplm))
+		return -EOPNOTSUPP;
+
+	MLX5_SET(pplm_reg, in, local_port, 1);
+	err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0);
+	if (err)
+		return err;
+
+	err = mlx5e_port_linkspeed(dev, &current_fec_speed);
+	if (err)
+		return err;
+
+	memset(in, 0, sz);
+	MLX5_SET(pplm_reg, in, local_port, 1);
+	for (i = 0; i < MLX5E_FEC_SUPPORTED_SPEEDS && !!fec_policy; i++) {
+		mlx5e_get_fec_cap_field(out, &fec_caps, fec_supported_speeds[i]);
+		/* policy supported for link speed */
+		if (!!(fec_caps & fec_policy)) {
+			mlx5e_fec_admin_field(in, &fec_policy, 1,
+					      fec_supported_speeds[i]);
+		} else {
+			if (fec_supported_speeds[i] == current_fec_speed)
+				return -EOPNOTSUPP;
+			mlx5e_fec_admin_field(in, &no_fec_policy, 1,
+					      fec_supported_speeds[i]);
+			fec_mode_not_supp_in_speed = true;
+		}
+	}
+
+	if (fec_mode_not_supp_in_speed)
+		mlx5_core_dbg(dev,
+			      "FEC policy 0x%x is not supported for some speeds",
+			      fec_policy);
+
+	return mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 1);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
index f8cbd8194179..cd2160b8c9bf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
@@ -45,4 +45,16 @@ int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out);
 int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in);
 int mlx5e_port_query_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer);
 int mlx5e_port_set_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer);
+
+int mlx5e_get_fec_caps(struct mlx5_core_dev *dev, u8 *fec_caps);
+int mlx5e_get_fec_mode(struct mlx5_core_dev *dev, u32 *fec_mode_active,
+		       u8 *fec_configured_mode);
+int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u8 fec_policy);
+
+enum {
+	MLX5E_FEC_NOFEC,
+	MLX5E_FEC_FIRECODE,
+	MLX5E_FEC_RS_528_514,
+};
+
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
index db3278cc052b..3078491cc0d0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
@@ -153,7 +153,7 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb)
 
 	if (enable_uc_lb)
 		MLX5_SET(modify_tir_in, in, ctx.self_lb_block,
-			 MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_);
+			 MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST);
 
 	MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index c61f6302cde2..3e770abfd802 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -547,6 +547,70 @@ static void ptys2ethtool_adver_link(unsigned long *advertising_modes,
 			  __ETHTOOL_LINK_MODE_MASK_NBITS);
 }
 
+static const u32 pplm_fec_2_ethtool[] = {
+	[MLX5E_FEC_NOFEC] = ETHTOOL_FEC_OFF,
+	[MLX5E_FEC_FIRECODE] = ETHTOOL_FEC_BASER,
+	[MLX5E_FEC_RS_528_514] = ETHTOOL_FEC_RS,
+};
+
+static u32 pplm2ethtool_fec(u_long fec_mode, unsigned long size)
+{
+	int mode = 0;
+
+	if (!fec_mode)
+		return ETHTOOL_FEC_AUTO;
+
+	mode = find_first_bit(&fec_mode, size);
+
+	if (mode < ARRAY_SIZE(pplm_fec_2_ethtool))
+		return pplm_fec_2_ethtool[mode];
+
+	return 0;
+}
+
+/* we use ETHTOOL_FEC_* offset and apply it to ETHTOOL_LINK_MODE_FEC_*_BIT */
+static u32 ethtool_fec2ethtool_caps(u_long ethtool_fec_code)
+{
+	u32 offset;
+
+	offset = find_first_bit(&ethtool_fec_code, sizeof(u32));
+	offset -= ETHTOOL_FEC_OFF_BIT;
+	offset += ETHTOOL_LINK_MODE_FEC_NONE_BIT;
+
+	return offset;
+}
+
+static int get_fec_supported_advertised(struct mlx5_core_dev *dev,
+					struct ethtool_link_ksettings *link_ksettings)
+{
+	u_long fec_caps = 0;
+	u32 active_fec = 0;
+	u32 offset;
+	u32 bitn;
+	int err;
+
+	err = mlx5e_get_fec_caps(dev, (u8 *)&fec_caps);
+	if (err)
+		return (err == -EOPNOTSUPP) ? 0 : err;
+
+	err = mlx5e_get_fec_mode(dev, &active_fec, NULL);
+	if (err)
+		return err;
+
+	for_each_set_bit(bitn, &fec_caps, ARRAY_SIZE(pplm_fec_2_ethtool)) {
+		u_long ethtool_bitmask = pplm_fec_2_ethtool[bitn];
+
+		offset = ethtool_fec2ethtool_caps(ethtool_bitmask);
+		__set_bit(offset, link_ksettings->link_modes.supported);
+	}
+
+	active_fec = pplm2ethtool_fec(active_fec, sizeof(u32) * BITS_PER_BYTE);
+	offset = ethtool_fec2ethtool_caps(active_fec);
+	__set_bit(offset, link_ksettings->link_modes.advertising);
+
+	return 0;
+}
+
 static void ptys2ethtool_supported_advertised_port(struct ethtool_link_ksettings *link_ksettings,
 						   u32 eth_proto_cap,
 						   u8 connector_type)
@@ -742,7 +806,7 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev,
 	if (err) {
 		netdev_err(netdev, "%s: query port ptys failed: %d\n",
 			   __func__, err);
-		goto err_query_ptys;
+		goto err_query_regs;
 	}
 
 	eth_proto_cap    = MLX5_GET(ptys_reg, out, eth_proto_capability);
@@ -778,11 +842,17 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev,
 							  AUTONEG_ENABLE;
 	ethtool_link_ksettings_add_link_mode(link_ksettings, supported,
 					     Autoneg);
+
+	err = get_fec_supported_advertised(mdev, link_ksettings);
+	if (err)
+		netdev_dbg(netdev, "%s: FEC caps query failed: %d\n",
+			   __func__, err);
+
 	if (!an_disable_admin)
 		ethtool_link_ksettings_add_link_mode(link_ksettings,
 						     advertising, Autoneg);
 
-err_query_ptys:
+err_query_regs:
 	return err;
 }
 
@@ -1277,6 +1347,58 @@ static int mlx5e_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
 	return mlx5_set_port_wol(mdev, mlx5_wol_mode);
 }
 
+static int mlx5e_get_fecparam(struct net_device *netdev,
+			      struct ethtool_fecparam *fecparam)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+	u8 fec_configured = 0;
+	u32 fec_active = 0;
+	int err;
+
+	err = mlx5e_get_fec_mode(mdev, &fec_active, &fec_configured);
+
+	if (err)
+		return err;
+
+	fecparam->active_fec = pplm2ethtool_fec((u_long)fec_active,
+						sizeof(u32) * BITS_PER_BYTE);
+
+	if (!fecparam->active_fec)
+		return -EOPNOTSUPP;
+
+	fecparam->fec = pplm2ethtool_fec((u_long)fec_configured,
+					 sizeof(u8) * BITS_PER_BYTE);
+
+	return 0;
+}
+
+static int mlx5e_set_fecparam(struct net_device *netdev,
+			      struct ethtool_fecparam *fecparam)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+	u8 fec_policy = 0;
+	int mode;
+	int err;
+
+	for (mode = 0; mode < ARRAY_SIZE(pplm_fec_2_ethtool); mode++) {
+		if (!(pplm_fec_2_ethtool[mode] & fecparam->fec))
+			continue;
+		fec_policy |= (1 << mode);
+		break;
+	}
+
+	err = mlx5e_set_fec_mode(mdev, fec_policy);
+
+	if (err)
+		return err;
+
+	mlx5_toggle_port_link(mdev);
+
+	return 0;
+}
+
 static u32 mlx5e_get_msglevel(struct net_device *dev)
 {
 	return ((struct mlx5e_priv *)netdev_priv(dev))->msglevel;
@@ -1699,4 +1821,6 @@ const struct ethtool_ops mlx5e_ethtool_ops = {
 	.self_test         = mlx5e_self_test,
 	.get_msglevel      = mlx5e_get_msglevel,
 	.set_msglevel      = mlx5e_set_msglevel,
+	.get_fecparam      = mlx5e_get_fecparam,
+	.set_fecparam      = mlx5e_set_fecparam,
 };
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index c9848e333450..1243edbedc9e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3392,9 +3392,6 @@ static int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
 {
 	struct mlx5e_priv *priv = cb_priv;
 
-	if (!tc_cls_can_offload_and_chain0(priv->netdev, type_data))
-		return -EOPNOTSUPP;
-
 	switch (type) {
 	case TC_SETUP_CLSFLOWER:
 		return mlx5e_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 64c2b9ea8b1e..c3c657548824 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -853,9 +853,6 @@ static int mlx5e_rep_setup_tc_cb_egdev(enum tc_setup_type type, void *type_data,
 {
 	struct mlx5e_priv *priv = cb_priv;
 
-	if (!tc_cls_can_offload_and_chain0(priv->netdev, type_data))
-		return -EOPNOTSUPP;
-
 	switch (type) {
 	case TC_SETUP_CLSFLOWER:
 		return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_EGRESS);
@@ -869,9 +866,6 @@ static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data,
 {
 	struct mlx5e_priv *priv = cb_priv;
 
-	if (!tc_cls_can_offload_and_chain0(priv->netdev, type_data))
-		return -EOPNOTSUPP;
-
 	switch (type) {
 	case TC_SETUP_CLSFLOWER:
 		return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index f19067c94272..2f7fb8de6967 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -433,10 +433,9 @@ static inline u16 mlx5e_icosq_wrap_cnt(struct mlx5e_icosq *sq)
 
 static inline void mlx5e_fill_icosq_frag_edge(struct mlx5e_icosq *sq,
 					      struct mlx5_wq_cyc *wq,
-					      u16 pi, u16 frag_pi)
+					      u16 pi, u16 nnops)
 {
 	struct mlx5e_sq_wqe_info *edge_wi, *wi = &sq->db.ico_wqe[pi];
-	u8 nnops = mlx5_wq_cyc_get_frag_size(wq) - frag_pi;
 
 	edge_wi = wi + nnops;
 
@@ -455,15 +454,14 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 	struct mlx5_wq_cyc *wq = &sq->wq;
 	struct mlx5e_umr_wqe *umr_wqe;
 	u16 xlt_offset = ix << (MLX5E_LOG_ALIGNED_MPWQE_PPW - 1);
-	u16 pi, frag_pi;
+	u16 pi, contig_wqebbs_room;
 	int err;
 	int i;
 
 	pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
-	frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc);
-
-	if (unlikely(frag_pi + MLX5E_UMR_WQEBBS > mlx5_wq_cyc_get_frag_size(wq))) {
-		mlx5e_fill_icosq_frag_edge(sq, wq, pi, frag_pi);
+	contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+	if (unlikely(contig_wqebbs_room < MLX5E_UMR_WQEBBS)) {
+		mlx5e_fill_icosq_frag_edge(sq, wq, pi, contig_wqebbs_room);
 		pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
 	}
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 1c006869a642..1e55b9c27ffc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -614,46 +614,82 @@ static const struct counter_desc pport_phy_statistical_stats_desc[] = {
 	{ "rx_corrected_bits_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits) },
 };
 
-#define NUM_PPORT_PHY_STATISTICAL_COUNTERS ARRAY_SIZE(pport_phy_statistical_stats_desc)
+static const struct counter_desc
+pport_phy_statistical_err_lanes_stats_desc[] = {
+	{ "rx_err_lane_0_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits_lane0) },
+	{ "rx_err_lane_1_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits_lane1) },
+	{ "rx_err_lane_2_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits_lane2) },
+	{ "rx_err_lane_3_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits_lane3) },
+};
+
+#define NUM_PPORT_PHY_STATISTICAL_COUNTERS \
+	ARRAY_SIZE(pport_phy_statistical_stats_desc)
+#define NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS \
+	ARRAY_SIZE(pport_phy_statistical_err_lanes_stats_desc)
 
 static int mlx5e_grp_phy_get_num_stats(struct mlx5e_priv *priv)
 {
+	struct mlx5_core_dev *mdev = priv->mdev;
+	int num_stats;
+
 	/* "1" for link_down_events special counter */
-	return MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group) ?
-		NUM_PPORT_PHY_STATISTICAL_COUNTERS + 1 : 1;
+	num_stats = 1;
+
+	num_stats += MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group) ?
+		     NUM_PPORT_PHY_STATISTICAL_COUNTERS : 0;
+
+	num_stats += MLX5_CAP_PCAM_FEATURE(mdev, per_lane_error_counters) ?
+		     NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS : 0;
+
+	return num_stats;
 }
 
 static int mlx5e_grp_phy_fill_strings(struct mlx5e_priv *priv, u8 *data,
 				      int idx)
 {
+	struct mlx5_core_dev *mdev = priv->mdev;
 	int i;
 
 	strcpy(data + (idx++) * ETH_GSTRING_LEN, "link_down_events_phy");
 
-	if (!MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group))
+	if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group))
 		return idx;
 
 	for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS; i++)
 		strcpy(data + (idx++) * ETH_GSTRING_LEN,
 		       pport_phy_statistical_stats_desc[i].format);
+
+	if (MLX5_CAP_PCAM_FEATURE(mdev, per_lane_error_counters))
+		for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS; i++)
+			strcpy(data + (idx++) * ETH_GSTRING_LEN,
+			       pport_phy_statistical_err_lanes_stats_desc[i].format);
+
 	return idx;
 }
 
 static int mlx5e_grp_phy_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx)
 {
+	struct mlx5_core_dev *mdev = priv->mdev;
 	int i;
 
 	/* link_down_events_phy has special handling since it is not stored in __be64 format */
 	data[idx++] = MLX5_GET(ppcnt_reg, priv->stats.pport.phy_counters,
 			       counter_set.phys_layer_cntrs.link_down_events);
 
-	if (!MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group))
+	if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group))
 		return idx;
 
 	for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS; i++)
 		data[idx++] =
 			MLX5E_READ_CTR64_BE(&priv->stats.pport.phy_statistical_counters,
 					    pport_phy_statistical_stats_desc, i);
+
+	if (MLX5_CAP_PCAM_FEATURE(mdev, per_lane_error_counters))
+		for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS; i++)
+			data[idx++] =
+				MLX5E_READ_CTR64_BE(&priv->stats.pport.phy_statistical_counters,
+						    pport_phy_statistical_err_lanes_stats_desc,
+						    i);
 	return idx;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 6de21d9f4fad..608025ca5c04 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -61,6 +61,7 @@ struct mlx5_nic_flow_attr {
 	u32 hairpin_tirn;
 	u8 match_level;
 	struct mlx5_flow_table	*hairpin_ft;
+	struct mlx5_fc		*counter;
 };
 
 #define MLX5E_TC_FLOW_BASE (MLX5E_TC_LAST_EXPORTED_BIT + 1)
@@ -73,6 +74,7 @@ enum {
 	MLX5E_TC_FLOW_OFFLOADED	= BIT(MLX5E_TC_FLOW_BASE + 2),
 	MLX5E_TC_FLOW_HAIRPIN	= BIT(MLX5E_TC_FLOW_BASE + 3),
 	MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(MLX5E_TC_FLOW_BASE + 4),
+	MLX5E_TC_FLOW_SLOW	  = BIT(MLX5E_TC_FLOW_BASE + 5),
 };
 
 #define MLX5E_TC_MAX_SPLITS 1
@@ -81,7 +83,7 @@ struct mlx5e_tc_flow {
 	struct rhash_head	node;
 	struct mlx5e_priv	*priv;
 	u64			cookie;
-	u8			flags;
+	u16			flags;
 	struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1];
 	struct list_head	encap;   /* flows sharing the same encap ID */
 	struct list_head	mod_hdr; /* flows sharing the same mod hdr ID */
@@ -100,11 +102,6 @@ struct mlx5e_tc_flow_parse_attr {
 	int mirred_ifindex;
 };
 
-enum {
-	MLX5_HEADER_TYPE_VXLAN = 0x0,
-	MLX5_HEADER_TYPE_NVGRE = 0x1,
-};
-
 #define MLX5E_TC_TABLE_NUM_GROUPS 4
 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(16)
 
@@ -677,7 +674,7 @@ static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv,
 	}
 }
 
-static struct mlx5_flow_handle *
+static int
 mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
 		      struct mlx5e_tc_flow_parse_attr *parse_attr,
 		      struct mlx5e_tc_flow *flow,
@@ -688,19 +685,17 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
 	struct mlx5_flow_destination dest[2] = {};
 	struct mlx5_flow_act flow_act = {
 		.action = attr->action,
-		.has_flow_tag = true,
 		.flow_tag = attr->flow_tag,
-		.encap_id = 0,
+		.reformat_id = 0,
+		.flags    = FLOW_ACT_HAS_TAG | FLOW_ACT_NO_APPEND,
 	};
 	struct mlx5_fc *counter = NULL;
-	struct mlx5_flow_handle *rule;
 	bool table_created = false;
 	int err, dest_ix = 0;
 
 	if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) {
 		err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
 		if (err) {
-			rule = ERR_PTR(err);
 			goto err_add_hairpin_flow;
 		}
 		if (flow->flags & MLX5E_TC_FLOW_HAIRPIN_RSS) {
@@ -720,22 +715,21 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
 		counter = mlx5_fc_create(dev, true);
 		if (IS_ERR(counter)) {
-			rule = ERR_CAST(counter);
+			err = PTR_ERR(counter);
 			goto err_fc_create;
 		}
 		dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
-		dest[dest_ix].counter = counter;
+		dest[dest_ix].counter_id = mlx5_fc_id(counter);
 		dest_ix++;
+		attr->counter = counter;
 	}
 
 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
 		err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
 		flow_act.modify_id = attr->mod_hdr_id;
 		kfree(parse_attr->mod_hdr_actions);
-		if (err) {
-			rule = ERR_PTR(err);
+		if (err)
 			goto err_create_mod_hdr_id;
-		}
 	}
 
 	if (IS_ERR_OR_NULL(priv->fs.tc.t)) {
@@ -761,7 +755,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
 					   "Failed to create tc offload table\n");
 			netdev_err(priv->netdev,
 				   "Failed to create tc offload table\n");
-			rule = ERR_CAST(priv->fs.tc.t);
+			err = PTR_ERR(priv->fs.tc.t);
 			goto err_create_ft;
 		}
 
@@ -771,13 +765,15 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
 	if (attr->match_level != MLX5_MATCH_NONE)
 		parse_attr->spec.match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
 
-	rule = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec,
-				   &flow_act, dest, dest_ix);
+	flow->rule[0] = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec,
+					    &flow_act, dest, dest_ix);
 
-	if (IS_ERR(rule))
+	if (IS_ERR(flow->rule[0])) {
+		err = PTR_ERR(flow->rule[0]);
 		goto err_add_rule;
+	}
 
-	return rule;
+	return 0;
 
 err_add_rule:
 	if (table_created) {
@@ -793,7 +789,7 @@ err_fc_create:
 	if (flow->flags & MLX5E_TC_FLOW_HAIRPIN)
 		mlx5e_hairpin_flow_del(priv, flow);
 err_add_hairpin_flow:
-	return rule;
+	return err;
 }
 
 static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
@@ -802,7 +798,7 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
 	struct mlx5_nic_flow_attr *attr = flow->nic_attr;
 	struct mlx5_fc *counter = NULL;
 
-	counter = mlx5_flow_rule_counter(flow->rule[0]);
+	counter = attr->counter;
 	mlx5_del_flow_rules(flow->rule[0]);
 	mlx5_fc_destroy(priv->mdev, counter);
 
@@ -829,28 +825,115 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
 			      struct netlink_ext_ack *extack);
 
 static struct mlx5_flow_handle *
+mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
+			   struct mlx5e_tc_flow *flow,
+			   struct mlx5_flow_spec *spec,
+			   struct mlx5_esw_flow_attr *attr)
+{
+	struct mlx5_flow_handle *rule;
+
+	rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
+	if (IS_ERR(rule))
+		return rule;
+
+	if (attr->mirror_count) {
+		flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr);
+		if (IS_ERR(flow->rule[1])) {
+			mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
+			return flow->rule[1];
+		}
+	}
+
+	flow->flags |= MLX5E_TC_FLOW_OFFLOADED;
+	return rule;
+}
+
+static void
+mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
+			     struct mlx5e_tc_flow *flow,
+			   struct mlx5_esw_flow_attr *attr)
+{
+	flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED;
+
+	if (attr->mirror_count)
+		mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr);
+
+	mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
+}
+
+static struct mlx5_flow_handle *
+mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
+			      struct mlx5e_tc_flow *flow,
+			      struct mlx5_flow_spec *spec,
+			      struct mlx5_esw_flow_attr *slow_attr)
+{
+	struct mlx5_flow_handle *rule;
+
+	memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr));
+	slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+	slow_attr->mirror_count = 0,
+	slow_attr->dest_chain = FDB_SLOW_PATH_CHAIN,
+
+	rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr);
+	if (!IS_ERR(rule))
+		flow->flags |= MLX5E_TC_FLOW_SLOW;
+
+	return rule;
+}
+
+static void
+mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
+				  struct mlx5e_tc_flow *flow,
+				  struct mlx5_esw_flow_attr *slow_attr)
+{
+	memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr));
+	mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr);
+	flow->flags &= ~MLX5E_TC_FLOW_SLOW;
+}
+
+static int
 mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
 		      struct mlx5e_tc_flow_parse_attr *parse_attr,
 		      struct mlx5e_tc_flow *flow,
 		      struct netlink_ext_ack *extack)
 {
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	u32 max_chain = mlx5_eswitch_get_chain_range(esw);
 	struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+	u16 max_prio = mlx5_eswitch_get_prio_range(esw);
 	struct net_device *out_dev, *encap_dev = NULL;
-	struct mlx5_flow_handle *rule = NULL;
+	struct mlx5_fc *counter = NULL;
 	struct mlx5e_rep_priv *rpriv;
 	struct mlx5e_priv *out_priv;
-	int err;
+	int err = 0, encap_err = 0;
+
+	/* if prios are not supported, keep the old behaviour of using same prio
+	 * for all offloaded rules.
+	 */
+	if (!mlx5_eswitch_prios_supported(esw))
+		attr->prio = 1;
 
-	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) {
+	if (attr->chain > max_chain) {
+		NL_SET_ERR_MSG(extack, "Requested chain is out of supported range");
+		err = -EOPNOTSUPP;
+		goto err_max_prio_chain;
+	}
+
+	if (attr->prio > max_prio) {
+		NL_SET_ERR_MSG(extack, "Requested priority is out of supported range");
+		err = -EOPNOTSUPP;
+		goto err_max_prio_chain;
+	}
+
+	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) {
 		out_dev = __dev_get_by_index(dev_net(priv->netdev),
 					     attr->parse_attr->mirred_ifindex);
-		err = mlx5e_attach_encap(priv, &parse_attr->tun_info,
-					 out_dev, &encap_dev, flow, extack);
-		if (err) {
-			rule = ERR_PTR(err);
-			if (err != -EAGAIN)
-				goto err_attach_encap;
+		encap_err = mlx5e_attach_encap(priv, &parse_attr->tun_info,
+					       out_dev, &encap_dev, flow,
+					       extack);
+		if (encap_err && encap_err != -EAGAIN) {
+			err = encap_err;
+			goto err_attach_encap;
 		}
 		out_priv = netdev_priv(encap_dev);
 		rpriv = out_priv->ppriv;
@@ -859,49 +942,58 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
 	}
 
 	err = mlx5_eswitch_add_vlan_action(esw, attr);
-	if (err) {
-		rule = ERR_PTR(err);
+	if (err)
 		goto err_add_vlan;
-	}
 
 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
 		err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
 		kfree(parse_attr->mod_hdr_actions);
-		if (err) {
-			rule = ERR_PTR(err);
+		if (err)
 			goto err_mod_hdr;
+	}
+
+	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+		counter = mlx5_fc_create(esw->dev, true);
+		if (IS_ERR(counter)) {
+			err = PTR_ERR(counter);
+			goto err_create_counter;
 		}
+
+		attr->counter = counter;
 	}
 
-	/* we get here if (1) there's no error (rule being null) or when
+	/* we get here if (1) there's no error or when
 	 * (2) there's an encap action and we're on -EAGAIN (no valid neigh)
 	 */
-	if (rule != ERR_PTR(-EAGAIN)) {
-		rule = mlx5_eswitch_add_offloaded_rule(esw, &parse_attr->spec, attr);
-		if (IS_ERR(rule))
-			goto err_add_rule;
-
-		if (attr->mirror_count) {
-			flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, &parse_attr->spec, attr);
-			if (IS_ERR(flow->rule[1]))
-				goto err_fwd_rule;
-		}
+	if (encap_err == -EAGAIN) {
+		/* continue with goto slow path rule instead */
+		struct mlx5_esw_flow_attr slow_attr;
+
+		flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec, &slow_attr);
+	} else {
+		flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr);
 	}
-	return rule;
 
-err_fwd_rule:
-	mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
-	rule = flow->rule[1];
+	if (IS_ERR(flow->rule[0])) {
+		err = PTR_ERR(flow->rule[0]);
+		goto err_add_rule;
+	}
+
+	return 0;
+
 err_add_rule:
+	mlx5_fc_destroy(esw->dev, counter);
+err_create_counter:
 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
 		mlx5e_detach_mod_hdr(priv, flow);
 err_mod_hdr:
 	mlx5_eswitch_del_vlan_action(esw, attr);
 err_add_vlan:
-	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
+	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)
 		mlx5e_detach_encap(priv, flow);
 err_attach_encap:
-	return rule;
+err_max_prio_chain:
+	return err;
 }
 
 static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
@@ -909,36 +1001,43 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
 {
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 	struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+	struct mlx5_esw_flow_attr slow_attr;
 
 	if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
-		flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED;
-		if (attr->mirror_count)
-			mlx5_eswitch_del_offloaded_rule(esw, flow->rule[1], attr);
-		mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
+		if (flow->flags & MLX5E_TC_FLOW_SLOW)
+			mlx5e_tc_unoffload_from_slow_path(esw, flow, &slow_attr);
+		else
+			mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
 	}
 
 	mlx5_eswitch_del_vlan_action(esw, attr);
 
-	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) {
+	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) {
 		mlx5e_detach_encap(priv, flow);
 		kvfree(attr->parse_attr);
 	}
 
 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
 		mlx5e_detach_mod_hdr(priv, flow);
+
+	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
+		mlx5_fc_destroy(esw->dev, attr->counter);
 }
 
 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
 			      struct mlx5e_encap_entry *e)
 {
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-	struct mlx5_esw_flow_attr *esw_attr;
+	struct mlx5_esw_flow_attr slow_attr, *esw_attr;
+	struct mlx5_flow_handle *rule;
+	struct mlx5_flow_spec *spec;
 	struct mlx5e_tc_flow *flow;
 	int err;
 
-	err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
-			       e->encap_size, e->encap_header,
-			       &e->encap_id);
+	err = mlx5_packet_reformat_alloc(priv->mdev, e->tunnel_type,
+					 e->encap_size, e->encap_header,
+					 MLX5_FLOW_NAMESPACE_FDB,
+					 &e->encap_id);
 	if (err) {
 		mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %d\n",
 			       err);
@@ -950,26 +1049,20 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
 	list_for_each_entry(flow, &e->flows, encap) {
 		esw_attr = flow->esw_attr;
 		esw_attr->encap_id = e->encap_id;
-		flow->rule[0] = mlx5_eswitch_add_offloaded_rule(esw, &esw_attr->parse_attr->spec, esw_attr);
-		if (IS_ERR(flow->rule[0])) {
-			err = PTR_ERR(flow->rule[0]);
+		spec = &esw_attr->parse_attr->spec;
+
+		/* update from slow path rule to encap rule */
+		rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, esw_attr);
+		if (IS_ERR(rule)) {
+			err = PTR_ERR(rule);
 			mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
 				       err);
 			continue;
 		}
 
-		if (esw_attr->mirror_count) {
-			flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, &esw_attr->parse_attr->spec, esw_attr);
-			if (IS_ERR(flow->rule[1])) {
-				mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], esw_attr);
-				err = PTR_ERR(flow->rule[1]);
-				mlx5_core_warn(priv->mdev, "Failed to update cached mirror flow, %d\n",
-					       err);
-				continue;
-			}
-		}
-
-		flow->flags |= MLX5E_TC_FLOW_OFFLOADED;
+		mlx5e_tc_unoffload_from_slow_path(esw, flow, &slow_attr);
+		flow->flags |= MLX5E_TC_FLOW_OFFLOADED; /* was unset when slow path rule removed */
+		flow->rule[0] = rule;
 	}
 }
 
@@ -977,25 +1070,44 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
 			      struct mlx5e_encap_entry *e)
 {
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	struct mlx5_esw_flow_attr slow_attr;
+	struct mlx5_flow_handle *rule;
+	struct mlx5_flow_spec *spec;
 	struct mlx5e_tc_flow *flow;
+	int err;
 
 	list_for_each_entry(flow, &e->flows, encap) {
-		if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
-			struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+		spec = &flow->esw_attr->parse_attr->spec;
+
+		/* update from encap rule to slow path rule */
+		rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec, &slow_attr);
 
-			flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED;
-			if (attr->mirror_count)
-				mlx5_eswitch_del_offloaded_rule(esw, flow->rule[1], attr);
-			mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
+		if (IS_ERR(rule)) {
+			err = PTR_ERR(rule);
+			mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
+				       err);
+			continue;
 		}
+
+		mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->esw_attr);
+		flow->flags |= MLX5E_TC_FLOW_OFFLOADED; /* was unset when fast path rule removed */
+		flow->rule[0] = rule;
 	}
 
 	if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
 		e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
-		mlx5_encap_dealloc(priv->mdev, e->encap_id);
+		mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id);
 	}
 }
 
+static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
+{
+	if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
+		return flow->esw_attr->counter;
+	else
+		return flow->nic_attr->counter;
+}
+
 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
 {
 	struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
@@ -1021,7 +1133,7 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
 			continue;
 		list_for_each_entry(flow, &e->flows, encap) {
 			if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
-				counter = mlx5_flow_rule_counter(flow->rule[0]);
+				counter = mlx5e_tc_get_counter(flow);
 				mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
 				if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
 					neigh_used = true;
@@ -1061,7 +1173,7 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv,
 		mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
 
 		if (e->flags & MLX5_ENCAP_ENTRY_VALID)
-			mlx5_encap_dealloc(priv->mdev, e->encap_id);
+			mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id);
 
 		hash_del_rcu(&e->encap_hlist);
 		kfree(e->encap_header);
@@ -2391,7 +2503,7 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
 		return -ENOMEM;
 
 	switch (e->tunnel_type) {
-	case MLX5_HEADER_TYPE_VXLAN:
+	case MLX5_REFORMAT_TYPE_L2_TO_VXLAN:
 		fl4.flowi4_proto = IPPROTO_UDP;
 		fl4.fl4_dport = tun_key->tp_dst;
 		break;
@@ -2435,7 +2547,7 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
 	read_unlock_bh(&n->lock);
 
 	switch (e->tunnel_type) {
-	case MLX5_HEADER_TYPE_VXLAN:
+	case MLX5_REFORMAT_TYPE_L2_TO_VXLAN:
 		gen_vxlan_header_ipv4(out_dev, encap_header,
 				      ipv4_encap_size, e->h_dest, tos, ttl,
 				      fl4.daddr,
@@ -2455,8 +2567,10 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
 		goto out;
 	}
 
-	err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
-			       ipv4_encap_size, encap_header, &e->encap_id);
+	err = mlx5_packet_reformat_alloc(priv->mdev, e->tunnel_type,
+					 ipv4_encap_size, encap_header,
+					 MLX5_FLOW_NAMESPACE_FDB,
+					 &e->encap_id);
 	if (err)
 		goto destroy_neigh_entry;
 
@@ -2500,7 +2614,7 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv,
 		return -ENOMEM;
 
 	switch (e->tunnel_type) {
-	case MLX5_HEADER_TYPE_VXLAN:
+	case MLX5_REFORMAT_TYPE_L2_TO_VXLAN:
 		fl6.flowi6_proto = IPPROTO_UDP;
 		fl6.fl6_dport = tun_key->tp_dst;
 		break;
@@ -2544,7 +2658,7 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv,
 	read_unlock_bh(&n->lock);
 
 	switch (e->tunnel_type) {
-	case MLX5_HEADER_TYPE_VXLAN:
+	case MLX5_REFORMAT_TYPE_L2_TO_VXLAN:
 		gen_vxlan_header_ipv6(out_dev, encap_header,
 				      ipv6_encap_size, e->h_dest, tos, ttl,
 				      &fl6.daddr,
@@ -2565,8 +2679,10 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv,
 		goto out;
 	}
 
-	err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
-			       ipv6_encap_size, encap_header, &e->encap_id);
+	err = mlx5_packet_reformat_alloc(priv->mdev, e->tunnel_type,
+					 ipv6_encap_size, encap_header,
+					 MLX5_FLOW_NAMESPACE_FDB,
+					 &e->encap_id);
 	if (err)
 		goto destroy_neigh_entry;
 
@@ -2617,7 +2733,7 @@ vxlan_encap_offload_err:
 
 	if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->tp_dst)) &&
 	    MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) {
-		tunnel_type = MLX5_HEADER_TYPE_VXLAN;
+		tunnel_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN;
 	} else {
 		NL_SET_ERR_MSG_MOD(extack,
 				   "port isn't an offloaded vxlan udp dport");
@@ -2728,6 +2844,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 				struct mlx5e_tc_flow *flow,
 				struct netlink_ext_ack *extack)
 {
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 	struct mlx5_esw_flow_attr *attr = flow->esw_attr;
 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
 	struct ip_tunnel_info *info = NULL;
@@ -2797,7 +2914,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 				parse_attr->mirred_ifindex = out_dev->ifindex;
 				parse_attr->tun_info = *info;
 				attr->parse_attr = parse_attr;
-				action |= MLX5_FLOW_CONTEXT_ACTION_ENCAP |
+				action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
 					  MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
 					  MLX5_FLOW_CONTEXT_ACTION_COUNT;
 				/* attr->out_rep is resolved when we handle encap */
@@ -2836,6 +2953,25 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 			continue;
 		}
 
+		if (is_tcf_gact_goto_chain(a)) {
+			u32 dest_chain = tcf_gact_goto_chain_index(a);
+			u32 max_chain = mlx5_eswitch_get_chain_range(esw);
+
+			if (dest_chain <= attr->chain) {
+				NL_SET_ERR_MSG(extack, "Goto earlier chain isn't supported");
+				return -EOPNOTSUPP;
+			}
+			if (dest_chain > max_chain) {
+				NL_SET_ERR_MSG(extack, "Requested destination chain is out of supported range");
+				return -EOPNOTSUPP;
+			}
+			action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+				  MLX5_FLOW_CONTEXT_ACTION_COUNT;
+			attr->dest_chain = dest_chain;
+
+			continue;
+		}
+
 		return -EINVAL;
 	}
 
@@ -2853,9 +2989,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 	return 0;
 }
 
-static void get_flags(int flags, u8 *flow_flags)
+static void get_flags(int flags, u16 *flow_flags)
 {
-	u8 __flow_flags = 0;
+	u16 __flow_flags = 0;
 
 	if (flags & MLX5E_TC_INGRESS)
 		__flow_flags |= MLX5E_TC_FLOW_INGRESS;
@@ -2884,34 +3020,15 @@ static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv)
 		return &priv->fs.tc.ht;
 }
 
-int mlx5e_configure_flower(struct mlx5e_priv *priv,
-			   struct tc_cls_flower_offload *f, int flags)
+static int
+mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
+		 struct tc_cls_flower_offload *f, u16 flow_flags,
+		 struct mlx5e_tc_flow_parse_attr **__parse_attr,
+		 struct mlx5e_tc_flow **__flow)
 {
-	struct netlink_ext_ack *extack = f->common.extack;
-	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 	struct mlx5e_tc_flow_parse_attr *parse_attr;
-	struct rhashtable *tc_ht = get_tc_ht(priv);
 	struct mlx5e_tc_flow *flow;
-	int attr_size, err = 0;
-	u8 flow_flags = 0;
-
-	get_flags(flags, &flow_flags);
-
-	flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params);
-	if (flow) {
-		NL_SET_ERR_MSG_MOD(extack,
-				   "flow cookie already exists, ignoring");
-		netdev_warn_once(priv->netdev, "flow cookie %lx already exists, ignoring\n", f->cookie);
-		return 0;
-	}
-
-	if (esw && esw->mode == SRIOV_OFFLOADS) {
-		flow_flags |= MLX5E_TC_FLOW_ESWITCH;
-		attr_size  = sizeof(struct mlx5_esw_flow_attr);
-	} else {
-		flow_flags |= MLX5E_TC_FLOW_NIC;
-		attr_size  = sizeof(struct mlx5_nic_flow_attr);
-	}
+	int err;
 
 	flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL);
 	parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
@@ -2925,49 +3042,161 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv,
 	flow->priv = priv;
 
 	err = parse_cls_flower(priv, flow, &parse_attr->spec, f);
-	if (err < 0)
+	if (err)
 		goto err_free;
 
-	if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
-		err = parse_tc_fdb_actions(priv, f->exts, parse_attr, flow,
-					   extack);
-		if (err < 0)
-			goto err_free;
-		flow->rule[0] = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow,
-						      extack);
-	} else {
-		err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow,
-					   extack);
-		if (err < 0)
-			goto err_free;
-		flow->rule[0] = mlx5e_tc_add_nic_flow(priv, parse_attr, flow,
-						      extack);
-	}
+	*__flow = flow;
+	*__parse_attr = parse_attr;
 
-	if (IS_ERR(flow->rule[0])) {
-		err = PTR_ERR(flow->rule[0]);
-		if (err != -EAGAIN)
-			goto err_free;
-	}
+	return 0;
+
+err_free:
+	kfree(flow);
+	kvfree(parse_attr);
+	return err;
+}
+
+static int
+mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
+		   struct tc_cls_flower_offload *f,
+		   u16 flow_flags,
+		   struct mlx5e_tc_flow **__flow)
+{
+	struct netlink_ext_ack *extack = f->common.extack;
+	struct mlx5e_tc_flow_parse_attr *parse_attr;
+	struct mlx5e_tc_flow *flow;
+	int attr_size, err;
 
-	if (err != -EAGAIN)
-		flow->flags |= MLX5E_TC_FLOW_OFFLOADED;
+	flow_flags |= MLX5E_TC_FLOW_ESWITCH;
+	attr_size  = sizeof(struct mlx5_esw_flow_attr);
+	err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
+			       &parse_attr, &flow);
+	if (err)
+		goto out;
 
-	if (!(flow->flags & MLX5E_TC_FLOW_ESWITCH) ||
-	    !(flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP))
+	flow->esw_attr->chain = f->common.chain_index;
+	flow->esw_attr->prio = TC_H_MAJ(f->common.prio) >> 16;
+	err = parse_tc_fdb_actions(priv, f->exts, parse_attr, flow, extack);
+	if (err)
+		goto err_free;
+
+	err = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow, extack);
+	if (err)
+		goto err_free;
+
+	if (!(flow->esw_attr->action &
+	      MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT))
 		kvfree(parse_attr);
 
-	err = rhashtable_insert_fast(tc_ht, &flow->node, tc_ht_params);
-	if (err) {
-		mlx5e_tc_del_flow(priv, flow);
-		kfree(flow);
-	}
+	*__flow = flow;
 
+	return 0;
+
+err_free:
+	kfree(flow);
+	kvfree(parse_attr);
+out:
 	return err;
+}
+
+static int
+mlx5e_add_nic_flow(struct mlx5e_priv *priv,
+		   struct tc_cls_flower_offload *f,
+		   u16 flow_flags,
+		   struct mlx5e_tc_flow **__flow)
+{
+	struct netlink_ext_ack *extack = f->common.extack;
+	struct mlx5e_tc_flow_parse_attr *parse_attr;
+	struct mlx5e_tc_flow *flow;
+	int attr_size, err;
+
+	/* multi-chain not supported for NIC rules */
+	if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common))
+		return -EOPNOTSUPP;
+
+	flow_flags |= MLX5E_TC_FLOW_NIC;
+	attr_size  = sizeof(struct mlx5_nic_flow_attr);
+	err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
+			       &parse_attr, &flow);
+	if (err)
+		goto out;
+
+	err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow, extack);
+	if (err)
+		goto err_free;
+
+	err = mlx5e_tc_add_nic_flow(priv, parse_attr, flow, extack);
+	if (err)
+		goto err_free;
+
+	flow->flags |= MLX5E_TC_FLOW_OFFLOADED;
+	kvfree(parse_attr);
+	*__flow = flow;
+
+	return 0;
 
 err_free:
+	kfree(flow);
 	kvfree(parse_attr);
+out:
+	return err;
+}
+
+static int
+mlx5e_tc_add_flow(struct mlx5e_priv *priv,
+		  struct tc_cls_flower_offload *f,
+		  int flags,
+		  struct mlx5e_tc_flow **flow)
+{
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	u16 flow_flags;
+	int err;
+
+	get_flags(flags, &flow_flags);
+
+	if (!tc_can_offload_extack(priv->netdev, f->common.extack))
+		return -EOPNOTSUPP;
+
+	if (esw && esw->mode == SRIOV_OFFLOADS)
+		err = mlx5e_add_fdb_flow(priv, f, flow_flags, flow);
+	else
+		err = mlx5e_add_nic_flow(priv, f, flow_flags, flow);
+
+	return err;
+}
+
+int mlx5e_configure_flower(struct mlx5e_priv *priv,
+			   struct tc_cls_flower_offload *f, int flags)
+{
+	struct netlink_ext_ack *extack = f->common.extack;
+	struct rhashtable *tc_ht = get_tc_ht(priv);
+	struct mlx5e_tc_flow *flow;
+	int err = 0;
+
+	flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params);
+	if (flow) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "flow cookie already exists, ignoring");
+		netdev_warn_once(priv->netdev,
+				 "flow cookie %lx already exists, ignoring\n",
+				 f->cookie);
+		goto out;
+	}
+
+	err = mlx5e_tc_add_flow(priv, f, flags, &flow);
+	if (err)
+		goto out;
+
+	err = rhashtable_insert_fast(tc_ht, &flow->node, tc_ht_params);
+	if (err)
+		goto err_free;
+
+	return 0;
+
+err_free:
+	mlx5e_tc_del_flow(priv, flow);
 	kfree(flow);
+out:
 	return err;
 }
 
@@ -3018,7 +3247,7 @@ int mlx5e_stats_flower(struct mlx5e_priv *priv,
 	if (!(flow->flags & MLX5E_TC_FLOW_OFFLOADED))
 		return 0;
 
-	counter = mlx5_flow_rule_counter(flow->rule[0]);
+	counter = mlx5e_tc_get_counter(flow);
 	if (!counter)
 		return 0;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index ae73ea992845..6dacaeba2fbf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -290,10 +290,9 @@ dma_unmap_wqe_err:
 
 static inline void mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq,
 					   struct mlx5_wq_cyc *wq,
-					   u16 pi, u16 frag_pi)
+					   u16 pi, u16 nnops)
 {
 	struct mlx5e_tx_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi];
-	u8 nnops = mlx5_wq_cyc_get_frag_size(wq) - frag_pi;
 
 	edge_wi = wi + nnops;
 
@@ -348,8 +347,8 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 	struct mlx5e_tx_wqe_info *wi;
 
 	struct mlx5e_sq_stats *stats = sq->stats;
+	u16 headlen, ihs, contig_wqebbs_room;
 	u16 ds_cnt, ds_cnt_inl = 0;
-	u16 headlen, ihs, frag_pi;
 	u8 num_wqebbs, opcode;
 	u32 num_bytes;
 	int num_dma;
@@ -386,9 +385,9 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 	}
 
 	num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
-	frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc);
-	if (unlikely(frag_pi + num_wqebbs > mlx5_wq_cyc_get_frag_size(wq))) {
-		mlx5e_fill_sq_frag_edge(sq, wq, pi, frag_pi);
+	contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+	if (unlikely(contig_wqebbs_room < num_wqebbs)) {
+		mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
 		mlx5e_sq_fetch_wqe(sq, &wqe, &pi);
 	}
 
@@ -636,7 +635,7 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 	struct mlx5e_tx_wqe_info *wi;
 
 	struct mlx5e_sq_stats *stats = sq->stats;
-	u16 headlen, ihs, pi, frag_pi;
+	u16 headlen, ihs, pi, contig_wqebbs_room;
 	u16 ds_cnt, ds_cnt_inl = 0;
 	u8 num_wqebbs, opcode;
 	u32 num_bytes;
@@ -672,13 +671,14 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 	}
 
 	num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
-	frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc);
-	if (unlikely(frag_pi + num_wqebbs > mlx5_wq_cyc_get_frag_size(wq))) {
+	pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+	contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+	if (unlikely(contig_wqebbs_room < num_wqebbs)) {
+		mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
 		pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
-		mlx5e_fill_sq_frag_edge(sq, wq, pi, frag_pi);
 	}
 
-	mlx5i_sq_fetch_wqe(sq, &wqe, &pi);
+	mlx5i_sq_fetch_wqe(sq, &wqe, pi);
 
 	/* fill wqe */
 	wi       = &sq->db.wqe_info[pi];
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 48864f4988a4..c1e1a16a9b07 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -273,7 +273,7 @@ static void eq_pf_process(struct mlx5_eq *eq)
 		case MLX5_PFAULT_SUBTYPE_WQE:
 			/* WQE based event */
 			pfault->type =
-				be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24;
+				(be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24) & 0x7;
 			pfault->token =
 				be32_to_cpu(pf_eqe->wqe.token);
 			pfault->wqe.wq_num =
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index ea7dedc2d5ad..d004957328f9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -263,7 +263,7 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw)
 	esw_debug(dev, "Create FDB log_max_size(%d)\n",
 		  MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
 
-	root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
+	root_ns = mlx5_get_fdb_sub_ns(dev, 0);
 	if (!root_ns) {
 		esw_warn(dev, "Failed to get FDB flow namespace\n");
 		return -EOPNOTSUPP;
@@ -1198,7 +1198,7 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
 	if (counter) {
 		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
 		drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
-		drop_ctr_dst.counter = counter;
+		drop_ctr_dst.counter_id = mlx5_fc_id(counter);
 		dst = &drop_ctr_dst;
 		dest_num++;
 	}
@@ -1285,7 +1285,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw,
 	if (counter) {
 		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
 		drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
-		drop_ctr_dst.counter = counter;
+		drop_ctr_dst.counter_id = mlx5_fc_id(counter);
 		dst = &drop_ctr_dst;
 		dest_num++;
 	}
@@ -1746,7 +1746,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
 	esw->enabled_vports = 0;
 	esw->mode = SRIOV_NONE;
 	esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE;
-	if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) &&
+	if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) &&
 	    MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))
 		esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC;
 	else
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index dfc642de4e6d..aaafc9f17115 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -59,6 +59,10 @@
 #define mlx5_esw_has_fwd_fdb(dev) \
 	MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_to_table)
 
+#define FDB_MAX_CHAIN 3
+#define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1)
+#define FDB_MAX_PRIO 16
+
 struct vport_ingress {
 	struct mlx5_flow_table *acl;
 	struct mlx5_flow_group *allow_untagged_spoofchk_grp;
@@ -120,6 +124,13 @@ struct mlx5_vport {
 	u16                     enabled_events;
 };
 
+enum offloads_fdb_flags {
+	ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED = BIT(0),
+};
+
+extern const unsigned int ESW_POOLS[4];
+
+#define PRIO_LEVELS 2
 struct mlx5_eswitch_fdb {
 	union {
 		struct legacy_fdb {
@@ -130,16 +141,24 @@ struct mlx5_eswitch_fdb {
 		} legacy;
 
 		struct offloads_fdb {
-			struct mlx5_flow_table *fast_fdb;
-			struct mlx5_flow_table *fwd_fdb;
 			struct mlx5_flow_table *slow_fdb;
 			struct mlx5_flow_group *send_to_vport_grp;
 			struct mlx5_flow_group *miss_grp;
 			struct mlx5_flow_handle *miss_rule_uni;
 			struct mlx5_flow_handle *miss_rule_multi;
 			int vlan_push_pop_refcount;
+
+			struct {
+				struct mlx5_flow_table *fdb;
+				u32 num_rules;
+			} fdb_prio[FDB_MAX_CHAIN + 1][FDB_MAX_PRIO + 1][PRIO_LEVELS];
+			/* Protects fdb_prio table */
+			struct mutex fdb_prio_lock;
+
+			int fdb_left[ARRAY_SIZE(ESW_POOLS)];
 		} offloads;
 	};
+	u32 flags;
 };
 
 struct mlx5_esw_offload {
@@ -181,6 +200,7 @@ struct mlx5_eswitch {
 
 	struct mlx5_esw_offload offloads;
 	int                     mode;
+	int                     nvports;
 };
 
 void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports);
@@ -228,6 +248,19 @@ void
 mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw,
 				struct mlx5_flow_handle *rule,
 				struct mlx5_esw_flow_attr *attr);
+void
+mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw,
+			  struct mlx5_flow_handle *rule,
+			  struct mlx5_esw_flow_attr *attr);
+
+bool
+mlx5_eswitch_prios_supported(struct mlx5_eswitch *esw);
+
+u16
+mlx5_eswitch_get_prio_range(struct mlx5_eswitch *esw);
+
+u32
+mlx5_eswitch_get_chain_range(struct mlx5_eswitch *esw);
 
 struct mlx5_flow_handle *
 mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport,
@@ -266,6 +299,10 @@ struct mlx5_esw_flow_attr {
 	u32	encap_id;
 	u32	mod_hdr_id;
 	u8	match_level;
+	struct mlx5_fc *counter;
+	u32	chain;
+	u16	prio;
+	u32	dest_chain;
 	struct mlx5e_tc_flow_parse_attr *parse_attr;
 };
 
@@ -318,6 +355,11 @@ static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {}
 static inline void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) {}
 static inline int  mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { return 0; }
 static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) {}
+
+#define FDB_MAX_CHAIN 1
+#define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1)
+#define FDB_MAX_PRIO 1
+
 #endif /* CONFIG_MLX5_ESWITCH */
 
 #endif /* __MLX5_ESWITCH_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index a35a2310f871..9eac137790f5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -37,33 +37,59 @@
 #include <linux/mlx5/fs.h>
 #include "mlx5_core.h"
 #include "eswitch.h"
+#include "en.h"
+#include "fs_core.h"
 
 enum {
 	FDB_FAST_PATH = 0,
 	FDB_SLOW_PATH
 };
 
+#define fdb_prio_table(esw, chain, prio, level) \
+	(esw)->fdb_table.offloads.fdb_prio[(chain)][(prio)][(level)]
+
+static struct mlx5_flow_table *
+esw_get_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level);
+static void
+esw_put_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level);
+
+bool mlx5_eswitch_prios_supported(struct mlx5_eswitch *esw)
+{
+	return (!!(esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED));
+}
+
+u32 mlx5_eswitch_get_chain_range(struct mlx5_eswitch *esw)
+{
+	if (esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED)
+		return FDB_MAX_CHAIN;
+
+	return 0;
+}
+
+u16 mlx5_eswitch_get_prio_range(struct mlx5_eswitch *esw)
+{
+	if (esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED)
+		return FDB_MAX_PRIO;
+
+	return 1;
+}
+
 struct mlx5_flow_handle *
 mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 				struct mlx5_flow_spec *spec,
 				struct mlx5_esw_flow_attr *attr)
 {
 	struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {};
-	struct mlx5_flow_act flow_act = {0};
-	struct mlx5_flow_table *ft = NULL;
-	struct mlx5_fc *counter = NULL;
+	struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, };
+	bool mirror = !!(attr->mirror_count);
 	struct mlx5_flow_handle *rule;
+	struct mlx5_flow_table *fdb;
 	int j, i = 0;
 	void *misc;
 
 	if (esw->mode != SRIOV_OFFLOADS)
 		return ERR_PTR(-EOPNOTSUPP);
 
-	if (attr->mirror_count)
-		ft = esw->fdb_table.offloads.fwd_fdb;
-	else
-		ft = esw->fdb_table.offloads.fast_fdb;
-
 	flow_act.action = attr->action;
 	/* if per flow vlan pop/push is emulated, don't set that into the firmware */
 	if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1))
@@ -81,23 +107,33 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 	}
 
 	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
-		for (j = attr->mirror_count; j < attr->out_count; j++) {
-			dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
-			dest[i].vport.num = attr->out_rep[j]->vport;
-			dest[i].vport.vhca_id =
-				MLX5_CAP_GEN(attr->out_mdev[j], vhca_id);
-			dest[i].vport.vhca_id_valid = !!MLX5_CAP_ESW(esw->dev, merged_eswitch);
+		if (attr->dest_chain) {
+			struct mlx5_flow_table *ft;
+
+			ft = esw_get_prio_table(esw, attr->dest_chain, 1, 0);
+			if (IS_ERR(ft)) {
+				rule = ERR_CAST(ft);
+				goto err_create_goto_table;
+			}
+
+			dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+			dest[i].ft = ft;
 			i++;
+		} else {
+			for (j = attr->mirror_count; j < attr->out_count; j++) {
+				dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+				dest[i].vport.num = attr->out_rep[j]->vport;
+				dest[i].vport.vhca_id =
+					MLX5_CAP_GEN(attr->out_mdev[j], vhca_id);
+				dest[i].vport.vhca_id_valid =
+					!!MLX5_CAP_ESW(esw->dev, merged_eswitch);
+				i++;
+			}
 		}
 	}
 	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
-		counter = mlx5_fc_create(esw->dev, true);
-		if (IS_ERR(counter)) {
-			rule = ERR_CAST(counter);
-			goto err_counter_alloc;
-		}
 		dest[i].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
-		dest[i].counter = counter;
+		dest[i].counter_id = mlx5_fc_id(attr->counter);
 		i++;
 	}
 
@@ -127,10 +163,16 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
 		flow_act.modify_id = attr->mod_hdr_id;
 
-	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
-		flow_act.encap_id = attr->encap_id;
+	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)
+		flow_act.reformat_id = attr->encap_id;
 
-	rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, i);
+	fdb = esw_get_prio_table(esw, attr->chain, attr->prio, !!mirror);
+	if (IS_ERR(fdb)) {
+		rule = ERR_CAST(fdb);
+		goto err_esw_get;
+	}
+
+	rule = mlx5_add_flow_rules(fdb, spec, &flow_act, dest, i);
 	if (IS_ERR(rule))
 		goto err_add_rule;
 	else
@@ -139,8 +181,11 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 	return rule;
 
 err_add_rule:
-	mlx5_fc_destroy(esw->dev, counter);
-err_counter_alloc:
+	esw_put_prio_table(esw, attr->chain, attr->prio, !!mirror);
+err_esw_get:
+	if (attr->dest_chain)
+		esw_put_prio_table(esw, attr->dest_chain, 1, 0);
+err_create_goto_table:
 	return rule;
 }
 
@@ -150,11 +195,25 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
 			  struct mlx5_esw_flow_attr *attr)
 {
 	struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {};
-	struct mlx5_flow_act flow_act = {0};
+	struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, };
+	struct mlx5_flow_table *fast_fdb;
+	struct mlx5_flow_table *fwd_fdb;
 	struct mlx5_flow_handle *rule;
 	void *misc;
 	int i;
 
+	fast_fdb = esw_get_prio_table(esw, attr->chain, attr->prio, 0);
+	if (IS_ERR(fast_fdb)) {
+		rule = ERR_CAST(fast_fdb);
+		goto err_get_fast;
+	}
+
+	fwd_fdb = esw_get_prio_table(esw, attr->chain, attr->prio, 1);
+	if (IS_ERR(fwd_fdb)) {
+		rule = ERR_CAST(fwd_fdb);
+		goto err_get_fwd;
+	}
+
 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 	for (i = 0; i < attr->mirror_count; i++) {
 		dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
@@ -164,7 +223,7 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
 		dest[i].vport.vhca_id_valid = !!MLX5_CAP_ESW(esw->dev, merged_eswitch);
 	}
 	dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
-	dest[i].ft = esw->fdb_table.offloads.fwd_fdb,
+	dest[i].ft = fwd_fdb,
 	i++;
 
 	misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
@@ -187,12 +246,41 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
 		spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS |
 					      MLX5_MATCH_MISC_PARAMETERS;
 
-	rule = mlx5_add_flow_rules(esw->fdb_table.offloads.fast_fdb, spec, &flow_act, dest, i);
+	rule = mlx5_add_flow_rules(fast_fdb, spec, &flow_act, dest, i);
 
-	if (!IS_ERR(rule))
-		esw->offloads.num_flows++;
+	if (IS_ERR(rule))
+		goto add_err;
+
+	esw->offloads.num_flows++;
 
 	return rule;
+add_err:
+	esw_put_prio_table(esw, attr->chain, attr->prio, 1);
+err_get_fwd:
+	esw_put_prio_table(esw, attr->chain, attr->prio, 0);
+err_get_fast:
+	return rule;
+}
+
+static void
+__mlx5_eswitch_del_rule(struct mlx5_eswitch *esw,
+			struct mlx5_flow_handle *rule,
+			struct mlx5_esw_flow_attr *attr,
+			bool fwd_rule)
+{
+	bool mirror = (attr->mirror_count > 0);
+
+	mlx5_del_flow_rules(rule);
+	esw->offloads.num_flows--;
+
+	if (fwd_rule)  {
+		esw_put_prio_table(esw, attr->chain, attr->prio, 1);
+		esw_put_prio_table(esw, attr->chain, attr->prio, 0);
+	} else {
+		esw_put_prio_table(esw, attr->chain, attr->prio, !!mirror);
+		if (attr->dest_chain)
+			esw_put_prio_table(esw, attr->dest_chain, 1, 0);
+	}
 }
 
 void
@@ -200,12 +288,15 @@ mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw,
 				struct mlx5_flow_handle *rule,
 				struct mlx5_esw_flow_attr *attr)
 {
-	struct mlx5_fc *counter = NULL;
+	__mlx5_eswitch_del_rule(esw, rule, attr, false);
+}
 
-	counter = mlx5_flow_rule_counter(rule);
-	mlx5_del_flow_rules(rule);
-	mlx5_fc_destroy(esw->dev, counter);
-	esw->offloads.num_flows--;
+void
+mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw,
+			  struct mlx5_flow_handle *rule,
+			  struct mlx5_esw_flow_attr *attr)
+{
+	__mlx5_eswitch_del_rule(esw, rule, attr, true);
 }
 
 static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val)
@@ -294,7 +385,8 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
 
 	push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH);
 	pop  = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
-	fwd  = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST);
+	fwd  = !!((attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) &&
+		   !attr->dest_chain);
 
 	err = esw_add_vlan_action_check(attr, push, pop, fwd);
 	if (err)
@@ -501,74 +593,170 @@ out:
 
 #define ESW_OFFLOADS_NUM_GROUPS  4
 
-static int esw_create_offloads_fast_fdb_table(struct mlx5_eswitch *esw)
+/* Firmware currently has 4 pool of 4 sizes that it supports (ESW_POOLS),
+ * and a virtual memory region of 16M (ESW_SIZE), this region is duplicated
+ * for each flow table pool. We can allocate up to 16M of each pool,
+ * and we keep track of how much we used via put/get_sz_to_pool.
+ * Firmware doesn't report any of this for now.
+ * ESW_POOL is expected to be sorted from large to small
+ */
+#define ESW_SIZE (16 * 1024 * 1024)
+const unsigned int ESW_POOLS[4] = { 4 * 1024 * 1024, 1 * 1024 * 1024,
+				    64 * 1024, 4 * 1024 };
+
+static int
+get_sz_from_pool(struct mlx5_eswitch *esw)
+{
+	int sz = 0, i;
+
+	for (i = 0; i < ARRAY_SIZE(ESW_POOLS); i++) {
+		if (esw->fdb_table.offloads.fdb_left[i]) {
+			--esw->fdb_table.offloads.fdb_left[i];
+			sz = ESW_POOLS[i];
+			break;
+		}
+	}
+
+	return sz;
+}
+
+static void
+put_sz_to_pool(struct mlx5_eswitch *esw, int sz)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ESW_POOLS); i++) {
+		if (sz >= ESW_POOLS[i]) {
+			++esw->fdb_table.offloads.fdb_left[i];
+			break;
+		}
+	}
+}
+
+static struct mlx5_flow_table *
+create_next_size_table(struct mlx5_eswitch *esw,
+		       struct mlx5_flow_namespace *ns,
+		       u16 table_prio,
+		       int level,
+		       u32 flags)
+{
+	struct mlx5_flow_table *fdb;
+	int sz;
+
+	sz = get_sz_from_pool(esw);
+	if (!sz)
+		return ERR_PTR(-ENOSPC);
+
+	fdb = mlx5_create_auto_grouped_flow_table(ns,
+						  table_prio,
+						  sz,
+						  ESW_OFFLOADS_NUM_GROUPS,
+						  level,
+						  flags);
+	if (IS_ERR(fdb)) {
+		esw_warn(esw->dev, "Failed to create FDB Table err %d (table prio: %d, level: %d, size: %d)\n",
+			 (int)PTR_ERR(fdb), table_prio, level, sz);
+		put_sz_to_pool(esw, sz);
+	}
+
+	return fdb;
+}
+
+static struct mlx5_flow_table *
+esw_get_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level)
 {
 	struct mlx5_core_dev *dev = esw->dev;
-	struct mlx5_flow_namespace *root_ns;
 	struct mlx5_flow_table *fdb = NULL;
-	int esw_size, err = 0;
+	struct mlx5_flow_namespace *ns;
+	int table_prio, l = 0;
 	u32 flags = 0;
-	u32 max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
-				MLX5_CAP_GEN(dev, max_flow_counter_15_0);
 
-	root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
-	if (!root_ns) {
-		esw_warn(dev, "Failed to get FDB flow namespace\n");
-		err = -EOPNOTSUPP;
-		goto out_namespace;
-	}
+	if (chain == FDB_SLOW_PATH_CHAIN)
+		return esw->fdb_table.offloads.slow_fdb;
 
-	esw_debug(dev, "Create offloads FDB table, min (max esw size(2^%d), max counters(%d)*groups(%d))\n",
-		  MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size),
-		  max_flow_counter, ESW_OFFLOADS_NUM_GROUPS);
+	mutex_lock(&esw->fdb_table.offloads.fdb_prio_lock);
 
-	esw_size = min_t(int, max_flow_counter * ESW_OFFLOADS_NUM_GROUPS,
-			 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
+	fdb = fdb_prio_table(esw, chain, prio, level).fdb;
+	if (fdb) {
+		/* take ref on earlier levels as well */
+		while (level >= 0)
+			fdb_prio_table(esw, chain, prio, level--).num_rules++;
+		mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock);
+		return fdb;
+	}
 
-	if (mlx5_esw_has_fwd_fdb(dev))
-		esw_size >>= 1;
+	ns = mlx5_get_fdb_sub_ns(dev, chain);
+	if (!ns) {
+		esw_warn(dev, "Failed to get FDB sub namespace\n");
+		mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock);
+		return ERR_PTR(-EOPNOTSUPP);
+	}
 
 	if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE)
-		flags |= MLX5_FLOW_TABLE_TUNNEL_EN;
+		flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT |
+			  MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
 
-	fdb = mlx5_create_auto_grouped_flow_table(root_ns, FDB_FAST_PATH,
-						  esw_size,
-						  ESW_OFFLOADS_NUM_GROUPS, 0,
-						  flags);
-	if (IS_ERR(fdb)) {
-		err = PTR_ERR(fdb);
-		esw_warn(dev, "Failed to create Fast path FDB Table err %d\n", err);
-		goto out_namespace;
-	}
-	esw->fdb_table.offloads.fast_fdb = fdb;
+	table_prio = (chain * FDB_MAX_PRIO) + prio - 1;
 
-	if (!mlx5_esw_has_fwd_fdb(dev))
-		goto out_namespace;
+	/* create earlier levels for correct fs_core lookup when
+	 * connecting tables
+	 */
+	for (l = 0; l <= level; l++) {
+		if (fdb_prio_table(esw, chain, prio, l).fdb) {
+			fdb_prio_table(esw, chain, prio, l).num_rules++;
+			continue;
+		}
 
-	fdb = mlx5_create_auto_grouped_flow_table(root_ns, FDB_FAST_PATH,
-						  esw_size,
-						  ESW_OFFLOADS_NUM_GROUPS, 1,
-						  flags);
-	if (IS_ERR(fdb)) {
-		err = PTR_ERR(fdb);
-		esw_warn(dev, "Failed to create fwd table err %d\n", err);
-		goto out_ft;
+		fdb = create_next_size_table(esw, ns, table_prio, l, flags);
+		if (IS_ERR(fdb)) {
+			l--;
+			goto err_create_fdb;
+		}
+
+		fdb_prio_table(esw, chain, prio, l).fdb = fdb;
+		fdb_prio_table(esw, chain, prio, l).num_rules = 1;
 	}
-	esw->fdb_table.offloads.fwd_fdb = fdb;
 
-	return err;
+	mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock);
+	return fdb;
 
-out_ft:
-	mlx5_destroy_flow_table(esw->fdb_table.offloads.fast_fdb);
-out_namespace:
-	return err;
+err_create_fdb:
+	mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock);
+	if (l >= 0)
+		esw_put_prio_table(esw, chain, prio, l);
+
+	return fdb;
+}
+
+static void
+esw_put_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level)
+{
+	int l;
+
+	if (chain == FDB_SLOW_PATH_CHAIN)
+		return;
+
+	mutex_lock(&esw->fdb_table.offloads.fdb_prio_lock);
+
+	for (l = level; l >= 0; l--) {
+		if (--(fdb_prio_table(esw, chain, prio, l).num_rules) > 0)
+			continue;
+
+		put_sz_to_pool(esw, fdb_prio_table(esw, chain, prio, l).fdb->max_fte);
+		mlx5_destroy_flow_table(fdb_prio_table(esw, chain, prio, l).fdb);
+		fdb_prio_table(esw, chain, prio, l).fdb = NULL;
+	}
+
+	mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock);
 }
 
-static void esw_destroy_offloads_fast_fdb_table(struct mlx5_eswitch *esw)
+static void esw_destroy_offloads_fast_fdb_tables(struct mlx5_eswitch *esw)
 {
-	if (mlx5_esw_has_fwd_fdb(esw->dev))
-		mlx5_destroy_flow_table(esw->fdb_table.offloads.fwd_fdb);
-	mlx5_destroy_flow_table(esw->fdb_table.offloads.fast_fdb);
+	/* If lazy creation isn't supported, deref the fast path tables */
+	if (!(esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED)) {
+		esw_put_prio_table(esw, 0, 1, 1);
+		esw_put_prio_table(esw, 0, 1, 0);
+	}
 }
 
 #define MAX_PF_SQ 256
@@ -579,12 +767,13 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
 	struct mlx5_flow_table_attr ft_attr = {};
 	struct mlx5_core_dev *dev = esw->dev;
+	u32 *flow_group_in, max_flow_counter;
 	struct mlx5_flow_namespace *root_ns;
 	struct mlx5_flow_table *fdb = NULL;
-	int table_size, ix, err = 0;
+	int table_size, ix, err = 0, i;
 	struct mlx5_flow_group *g;
+	u32 flags = 0, fdb_max;
 	void *match_criteria;
-	u32 *flow_group_in;
 	u8 *dmac;
 
 	esw_debug(esw->dev, "Create offloads FDB Tables\n");
@@ -599,12 +788,29 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
 		goto ns_err;
 	}
 
-	err = esw_create_offloads_fast_fdb_table(esw);
-	if (err)
-		goto fast_fdb_err;
+	max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
+			    MLX5_CAP_GEN(dev, max_flow_counter_15_0);
+	fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size);
+
+	esw_debug(dev, "Create offloads FDB table, min (max esw size(2^%d), max counters(%d), groups(%d), max flow table size(2^%d))\n",
+		  MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size),
+		  max_flow_counter, ESW_OFFLOADS_NUM_GROUPS,
+		  fdb_max);
+
+	for (i = 0; i < ARRAY_SIZE(ESW_POOLS); i++)
+		esw->fdb_table.offloads.fdb_left[i] =
+			ESW_POOLS[i] <= fdb_max ? ESW_SIZE / ESW_POOLS[i] : 0;
 
 	table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + 2;
 
+	/* create the slow path fdb with encap set, so further table instances
+	 * can be created at run time while VFs are probed if the FW allows that.
+	 */
+	if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE)
+		flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT |
+			  MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
+
+	ft_attr.flags = flags;
 	ft_attr.max_fte = table_size;
 	ft_attr.prio = FDB_SLOW_PATH;
 
@@ -616,6 +822,18 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
 	}
 	esw->fdb_table.offloads.slow_fdb = fdb;
 
+	/* If lazy creation isn't supported, open the fast path tables now */
+	if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, multi_fdb_encap) &&
+	    esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) {
+		esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED;
+		esw_warn(dev, "Lazy creation of flow tables isn't supported, ignoring priorities\n");
+		esw_get_prio_table(esw, 0, 1, 0);
+		esw_get_prio_table(esw, 0, 1, 1);
+	} else {
+		esw_debug(dev, "Lazy creation of flow tables supported, deferring table opening\n");
+		esw->fdb_table.flags |= ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED;
+	}
+
 	/* create send-to-vport group */
 	memset(flow_group_in, 0, inlen);
 	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
@@ -663,6 +881,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
 	if (err)
 		goto miss_rule_err;
 
+	esw->nvports = nvports;
 	kvfree(flow_group_in);
 	return 0;
 
@@ -671,10 +890,9 @@ miss_rule_err:
 miss_err:
 	mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
 send_vport_err:
+	esw_destroy_offloads_fast_fdb_tables(esw);
 	mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb);
 slow_fdb_err:
-	esw_destroy_offloads_fast_fdb_table(esw);
-fast_fdb_err:
 ns_err:
 	kvfree(flow_group_in);
 	return err;
@@ -682,7 +900,7 @@ ns_err:
 
 static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
 {
-	if (!esw->fdb_table.offloads.fast_fdb)
+	if (!esw->fdb_table.offloads.slow_fdb)
 		return;
 
 	esw_debug(esw->dev, "Destroy offloads FDB Tables\n");
@@ -692,7 +910,7 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
 	mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
 
 	mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb);
-	esw_destroy_offloads_fast_fdb_table(esw);
+	esw_destroy_offloads_fast_fdb_tables(esw);
 }
 
 static int esw_create_offloads_table(struct mlx5_eswitch *esw)
@@ -949,6 +1167,8 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports)
 {
 	int err;
 
+	mutex_init(&esw->fdb_table.offloads.fdb_prio_lock);
+
 	err = esw_create_offloads_fdb_tables(esw, nvports);
 	if (err)
 		return err;
@@ -1256,7 +1476,7 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap,
 		return err;
 
 	if (encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE &&
-	    (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) ||
+	    (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) ||
 	     !MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap)))
 		return -EOPNOTSUPP;
 
@@ -1277,16 +1497,19 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap,
 		return -EOPNOTSUPP;
 	}
 
-	esw_destroy_offloads_fast_fdb_table(esw);
+	esw_destroy_offloads_fdb_tables(esw);
 
 	esw->offloads.encap = encap;
-	err = esw_create_offloads_fast_fdb_table(esw);
+
+	err = esw_create_offloads_fdb_tables(esw, esw->nvports);
+
 	if (err) {
 		NL_SET_ERR_MSG_MOD(extack,
 				   "Failed re-creating fast FDB table");
 		esw->offloads.encap = !encap;
-		(void)esw_create_offloads_fast_fdb_table(esw);
+		(void)esw_create_offloads_fdb_tables(esw, esw->nvports);
 	}
+
 	return err;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
index 5645a4facad2..515e3d6de051 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
@@ -245,7 +245,7 @@ static void *mlx5_fpga_ipsec_cmd_exec(struct mlx5_core_dev *mdev,
 		return ERR_PTR(res);
 	}
 
-	/* Context will be freed by wait func after completion */
+	/* Context should be freed by the caller after completion. */
 	return context;
 }
 
@@ -418,10 +418,8 @@ static int mlx5_fpga_ipsec_set_caps(struct mlx5_core_dev *mdev, u32 flags)
 	cmd.cmd = htonl(MLX5_FPGA_IPSEC_CMD_OP_SET_CAP);
 	cmd.flags = htonl(flags);
 	context = mlx5_fpga_ipsec_cmd_exec(mdev, &cmd, sizeof(cmd));
-	if (IS_ERR(context)) {
-		err = PTR_ERR(context);
-		goto out;
-	}
+	if (IS_ERR(context))
+		return PTR_ERR(context);
 
 	err = mlx5_fpga_ipsec_cmd_wait(context);
 	if (err)
@@ -435,6 +433,7 @@ static int mlx5_fpga_ipsec_set_caps(struct mlx5_core_dev *mdev, u32 flags)
 	}
 
 out:
+	kfree(context);
 	return err;
 }
 
@@ -650,7 +649,7 @@ static bool mlx5_is_fpga_egress_ipsec_rule(struct mlx5_core_dev *dev,
 	    (match_criteria_enable &
 	     ~(MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS)) ||
 	    (flow_act->action & ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | MLX5_FLOW_CONTEXT_ACTION_ALLOW)) ||
-	     flow_act->has_flow_tag)
+	     (flow_act->flags & FLOW_ACT_HAS_TAG))
 		return false;
 
 	return true;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 8e01f818021b..08a891f9aade 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -152,7 +152,8 @@ static int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
 				      struct mlx5_flow_table *next_ft,
 				      unsigned int *table_id, u32 flags)
 {
-	int en_encap_decap = !!(flags & MLX5_FLOW_TABLE_TUNNEL_EN);
+	int en_encap = !!(flags & MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT);
+	int en_decap = !!(flags & MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
 	u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {0};
 	u32 in[MLX5_ST_SZ_DW(create_flow_table_in)]   = {0};
 	int err;
@@ -169,9 +170,9 @@ static int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
 	}
 
 	MLX5_SET(create_flow_table_in, in, flow_table_context.decap_en,
-		 en_encap_decap);
-	MLX5_SET(create_flow_table_in, in, flow_table_context.encap_en,
-		 en_encap_decap);
+		 en_decap);
+	MLX5_SET(create_flow_table_in, in, flow_table_context.reformat_en,
+		 en_encap);
 
 	switch (op_mod) {
 	case FS_FT_OP_MOD_NORMAL:
@@ -343,7 +344,8 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
 
 	MLX5_SET(flow_context, in_flow_context, flow_tag, fte->action.flow_tag);
 	MLX5_SET(flow_context, in_flow_context, action, fte->action.action);
-	MLX5_SET(flow_context, in_flow_context, encap_id, fte->action.encap_id);
+	MLX5_SET(flow_context, in_flow_context, packet_reformat_id,
+		 fte->action.reformat_id);
 	MLX5_SET(flow_context, in_flow_context, modify_header_id,
 		 fte->action.modify_id);
 
@@ -417,7 +419,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
 				continue;
 
 			MLX5_SET(flow_counter_list, in_dests, flow_counter_id,
-				 dst->dest_attr.counter->id);
+				 dst->dest_attr.counter_id);
 			in_dests += MLX5_ST_SZ_BYTES(dest_format_struct);
 			list_size++;
 		}
@@ -594,62 +596,78 @@ void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev,
 	*bytes = MLX5_GET64(traffic_counter, stats, octets);
 }
 
-int mlx5_encap_alloc(struct mlx5_core_dev *dev,
-		     int header_type,
-		     size_t size,
-		     void *encap_header,
-		     u32 *encap_id)
+int mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev,
+			       int reformat_type,
+			       size_t size,
+			       void *reformat_data,
+			       enum mlx5_flow_namespace_type namespace,
+			       u32 *packet_reformat_id)
 {
-	int max_encap_size = MLX5_CAP_ESW(dev, max_encap_header_size);
-	u32 out[MLX5_ST_SZ_DW(alloc_encap_header_out)];
-	void *encap_header_in;
-	void *header;
+	u32 out[MLX5_ST_SZ_DW(alloc_packet_reformat_context_out)];
+	void *packet_reformat_context_in;
+	int max_encap_size;
+	void *reformat;
 	int inlen;
 	int err;
 	u32 *in;
 
+	if (namespace == MLX5_FLOW_NAMESPACE_FDB)
+		max_encap_size = MLX5_CAP_ESW(dev, max_encap_header_size);
+	else
+		max_encap_size = MLX5_CAP_FLOWTABLE(dev, max_encap_header_size);
+
 	if (size > max_encap_size) {
 		mlx5_core_warn(dev, "encap size %zd too big, max supported is %d\n",
 			       size, max_encap_size);
 		return -EINVAL;
 	}
 
-	in = kzalloc(MLX5_ST_SZ_BYTES(alloc_encap_header_in) + size,
+	in = kzalloc(MLX5_ST_SZ_BYTES(alloc_packet_reformat_context_in) + size,
 		     GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
 
-	encap_header_in = MLX5_ADDR_OF(alloc_encap_header_in, in, encap_header);
-	header = MLX5_ADDR_OF(encap_header_in, encap_header_in, encap_header);
-	inlen = header - (void *)in  + size;
+	packet_reformat_context_in = MLX5_ADDR_OF(alloc_packet_reformat_context_in,
+						  in, packet_reformat_context);
+	reformat = MLX5_ADDR_OF(packet_reformat_context_in,
+				packet_reformat_context_in,
+				reformat_data);
+	inlen = reformat - (void *)in  + size;
 
 	memset(in, 0, inlen);
-	MLX5_SET(alloc_encap_header_in, in, opcode,
-		 MLX5_CMD_OP_ALLOC_ENCAP_HEADER);
-	MLX5_SET(encap_header_in, encap_header_in, encap_header_size, size);
-	MLX5_SET(encap_header_in, encap_header_in, header_type, header_type);
-	memcpy(header, encap_header, size);
+	MLX5_SET(alloc_packet_reformat_context_in, in, opcode,
+		 MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT);
+	MLX5_SET(packet_reformat_context_in, packet_reformat_context_in,
+		 reformat_data_size, size);
+	MLX5_SET(packet_reformat_context_in, packet_reformat_context_in,
+		 reformat_type, reformat_type);
+	memcpy(reformat, reformat_data, size);
 
 	memset(out, 0, sizeof(out));
 	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 
-	*encap_id = MLX5_GET(alloc_encap_header_out, out, encap_id);
+	*packet_reformat_id = MLX5_GET(alloc_packet_reformat_context_out,
+				       out, packet_reformat_id);
 	kfree(in);
 	return err;
 }
+EXPORT_SYMBOL(mlx5_packet_reformat_alloc);
 
-void mlx5_encap_dealloc(struct mlx5_core_dev *dev, u32 encap_id)
+void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev,
+				  u32 packet_reformat_id)
 {
-	u32 in[MLX5_ST_SZ_DW(dealloc_encap_header_in)];
-	u32 out[MLX5_ST_SZ_DW(dealloc_encap_header_out)];
+	u32 in[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_in)];
+	u32 out[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_out)];
 
 	memset(in, 0, sizeof(in));
-	MLX5_SET(dealloc_encap_header_in, in, opcode,
-		 MLX5_CMD_OP_DEALLOC_ENCAP_HEADER);
-	MLX5_SET(dealloc_encap_header_in, in, encap_id, encap_id);
+	MLX5_SET(dealloc_packet_reformat_context_in, in, opcode,
+		 MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT);
+	MLX5_SET(dealloc_packet_reformat_context_in, in, packet_reformat_id,
+		 packet_reformat_id);
 
 	mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
+EXPORT_SYMBOL(mlx5_packet_reformat_dealloc);
 
 int mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
 			     u8 namespace, u8 num_actions,
@@ -667,9 +685,14 @@ int mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
 		table_type = FS_FT_FDB;
 		break;
 	case MLX5_FLOW_NAMESPACE_KERNEL:
+	case MLX5_FLOW_NAMESPACE_BYPASS:
 		max_actions = MLX5_CAP_FLOWTABLE_NIC_RX(dev, max_modify_header_actions);
 		table_type = FS_FT_NIC_RX;
 		break;
+	case MLX5_FLOW_NAMESPACE_EGRESS:
+		max_actions = MLX5_CAP_FLOWTABLE_NIC_TX(dev, max_modify_header_actions);
+		table_type = FS_FT_NIC_TX;
+		break;
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -702,6 +725,7 @@ int mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
 	kfree(in);
 	return err;
 }
+EXPORT_SYMBOL(mlx5_modify_header_alloc);
 
 void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id)
 {
@@ -716,6 +740,7 @@ void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id)
 
 	mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
+EXPORT_SYMBOL(mlx5_modify_header_dealloc);
 
 static const struct mlx5_flow_cmds mlx5_flow_cmds = {
 	.create_flow_table = mlx5_cmd_create_flow_table,
@@ -760,8 +785,8 @@ const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type typ
 	case FS_FT_FDB:
 	case FS_FT_SNIFFER_RX:
 	case FS_FT_SNIFFER_TX:
-		return mlx5_fs_cmd_get_fw_cmds();
 	case FS_FT_NIC_TX:
+		return mlx5_fs_cmd_get_fw_cmds();
 	default:
 		return mlx5_fs_cmd_get_stub_cmds();
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 37d114c668b7..67ba4c975d81 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -40,6 +40,7 @@
 #include "diag/fs_tracepoint.h"
 #include "accel/ipsec.h"
 #include "fpga/ipsec.h"
+#include "eswitch.h"
 
 #define INIT_TREE_NODE_ARRAY_SIZE(...)	(sizeof((struct init_tree_node[]){__VA_ARGS__}) /\
 					 sizeof(struct init_tree_node))
@@ -76,6 +77,14 @@
 					   FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), \
 					   FS_CAP(flow_table_properties_nic_receive.flow_table_modify))
 
+#define FS_CHAINING_CAPS_EGRESS                                                \
+	FS_REQUIRED_CAPS(                                                      \
+		FS_CAP(flow_table_properties_nic_transmit.flow_modify_en),     \
+		FS_CAP(flow_table_properties_nic_transmit.modify_root),        \
+		FS_CAP(flow_table_properties_nic_transmit                      \
+			       .identified_miss_table_mode),                   \
+		FS_CAP(flow_table_properties_nic_transmit.flow_table_modify))
+
 #define LEFTOVERS_NUM_LEVELS 1
 #define LEFTOVERS_NUM_PRIOS 1
 
@@ -151,6 +160,17 @@ static struct init_tree_node {
 	}
 };
 
+static struct init_tree_node egress_root_fs = {
+	.type = FS_TYPE_NAMESPACE,
+	.ar_size = 1,
+	.children = (struct init_tree_node[]) {
+		ADD_PRIO(0, MLX5_BY_PASS_NUM_PRIOS, 0,
+			 FS_CHAINING_CAPS_EGRESS,
+			 ADD_NS(ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS,
+						  BY_PASS_PRIO_NUM_LEVELS))),
+	}
+};
+
 enum fs_i_lock_class {
 	FS_LOCK_GRANDPARENT,
 	FS_LOCK_PARENT,
@@ -694,7 +714,7 @@ static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node  *root,
 	struct fs_node *iter = list_entry(start, struct fs_node, list);
 	struct mlx5_flow_table *ft = NULL;
 
-	if (!root)
+	if (!root || root->type == FS_TYPE_PRIO_CHAINS)
 		return NULL;
 
 	list_for_each_advance_continue(iter, &root->children, reverse) {
@@ -1388,7 +1408,7 @@ static bool check_conflicting_actions(u32 action1, u32 action2)
 		return false;
 
 	if (xored_actions & (MLX5_FLOW_CONTEXT_ACTION_DROP  |
-			     MLX5_FLOW_CONTEXT_ACTION_ENCAP |
+			     MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
 			     MLX5_FLOW_CONTEXT_ACTION_DECAP |
 			     MLX5_FLOW_CONTEXT_ACTION_MOD_HDR  |
 			     MLX5_FLOW_CONTEXT_ACTION_VLAN_POP |
@@ -1408,7 +1428,7 @@ static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act
 		return -EEXIST;
 	}
 
-	if (flow_act->has_flow_tag &&
+	if ((flow_act->flags & FLOW_ACT_HAS_TAG) &&
 	    fte->action.flow_tag != flow_act->flow_tag) {
 		mlx5_core_warn(get_dev(&fte->node),
 			       "FTE flow tag %u already exists with different flow tag %u\n",
@@ -1455,29 +1475,8 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
 	return handle;
 }
 
-struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_handle *handle)
+static bool counter_is_valid(u32 action)
 {
-	struct mlx5_flow_rule *dst;
-	struct fs_fte *fte;
-
-	fs_get_obj(fte, handle->rule[0]->node.parent);
-
-	fs_for_each_dst(dst, fte) {
-		if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
-			return dst->dest_attr.counter;
-	}
-
-	return NULL;
-}
-
-static bool counter_is_valid(struct mlx5_fc *counter, u32 action)
-{
-	if (!(action & MLX5_FLOW_CONTEXT_ACTION_COUNT))
-		return !counter;
-
-	if (!counter)
-		return false;
-
 	return (action & (MLX5_FLOW_CONTEXT_ACTION_DROP |
 			  MLX5_FLOW_CONTEXT_ACTION_FWD_DEST));
 }
@@ -1487,7 +1486,7 @@ static bool dest_is_valid(struct mlx5_flow_destination *dest,
 			  struct mlx5_flow_table *ft)
 {
 	if (dest && (dest->type == MLX5_FLOW_DESTINATION_TYPE_COUNTER))
-		return counter_is_valid(dest->counter, action);
+		return counter_is_valid(action);
 
 	if (!(action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
 		return true;
@@ -1629,6 +1628,8 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft,
 
 search_again_locked:
 	version = matched_fgs_get_version(match_head);
+	if (flow_act->flags & FLOW_ACT_NO_APPEND)
+		goto skip_search;
 	/* Try to find a fg that already contains a matching fte */
 	list_for_each_entry(iter, match_head, list) {
 		struct fs_fte *fte_tmp;
@@ -1645,6 +1646,11 @@ search_again_locked:
 		return rule;
 	}
 
+skip_search:
+	/* No group with matching fte found, or we skipped the search.
+	 * Try to add a new fte to any matching fg.
+	 */
+
 	/* Check the ft version, for case that new flow group
 	 * was added while the fgs weren't locked
 	 */
@@ -1975,12 +1981,24 @@ void mlx5_destroy_flow_group(struct mlx5_flow_group *fg)
 			       fg->id);
 }
 
+struct mlx5_flow_namespace *mlx5_get_fdb_sub_ns(struct mlx5_core_dev *dev,
+						int n)
+{
+	struct mlx5_flow_steering *steering = dev->priv.steering;
+
+	if (!steering || !steering->fdb_sub_ns)
+		return NULL;
+
+	return steering->fdb_sub_ns[n];
+}
+EXPORT_SYMBOL(mlx5_get_fdb_sub_ns);
+
 struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
 						    enum mlx5_flow_namespace_type type)
 {
 	struct mlx5_flow_steering *steering = dev->priv.steering;
 	struct mlx5_flow_root_namespace *root_ns;
-	int prio;
+	int prio = 0;
 	struct fs_prio *fs_prio;
 	struct mlx5_flow_namespace *ns;
 
@@ -1988,40 +2006,29 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
 		return NULL;
 
 	switch (type) {
-	case MLX5_FLOW_NAMESPACE_BYPASS:
-	case MLX5_FLOW_NAMESPACE_LAG:
-	case MLX5_FLOW_NAMESPACE_OFFLOADS:
-	case MLX5_FLOW_NAMESPACE_ETHTOOL:
-	case MLX5_FLOW_NAMESPACE_KERNEL:
-	case MLX5_FLOW_NAMESPACE_LEFTOVERS:
-	case MLX5_FLOW_NAMESPACE_ANCHOR:
-		prio = type;
-		break;
 	case MLX5_FLOW_NAMESPACE_FDB:
 		if (steering->fdb_root_ns)
 			return &steering->fdb_root_ns->ns;
-		else
-			return NULL;
+		return NULL;
 	case MLX5_FLOW_NAMESPACE_SNIFFER_RX:
 		if (steering->sniffer_rx_root_ns)
 			return &steering->sniffer_rx_root_ns->ns;
-		else
-			return NULL;
+		return NULL;
 	case MLX5_FLOW_NAMESPACE_SNIFFER_TX:
 		if (steering->sniffer_tx_root_ns)
 			return &steering->sniffer_tx_root_ns->ns;
-		else
-			return NULL;
-	case MLX5_FLOW_NAMESPACE_EGRESS:
-		if (steering->egress_root_ns)
-			return &steering->egress_root_ns->ns;
-		else
-			return NULL;
-	default:
 		return NULL;
+	default:
+		break;
+	}
+
+	if (type == MLX5_FLOW_NAMESPACE_EGRESS) {
+		root_ns = steering->egress_root_ns;
+	} else { /* Must be NIC RX */
+		root_ns = steering->root_ns;
+		prio = type;
 	}
 
-	root_ns = steering->root_ns;
 	if (!root_ns)
 		return NULL;
 
@@ -2064,8 +2071,10 @@ struct mlx5_flow_namespace *mlx5_get_flow_vport_acl_namespace(struct mlx5_core_d
 	}
 }
 
-static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
-				      unsigned int prio, int num_levels)
+static struct fs_prio *_fs_create_prio(struct mlx5_flow_namespace *ns,
+				       unsigned int prio,
+				       int num_levels,
+				       enum fs_node_type type)
 {
 	struct fs_prio *fs_prio;
 
@@ -2073,7 +2082,7 @@ static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
 	if (!fs_prio)
 		return ERR_PTR(-ENOMEM);
 
-	fs_prio->node.type = FS_TYPE_PRIO;
+	fs_prio->node.type = type;
 	tree_init_node(&fs_prio->node, NULL, del_sw_prio);
 	tree_add_node(&fs_prio->node, &ns->node);
 	fs_prio->num_levels = num_levels;
@@ -2083,6 +2092,19 @@ static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
 	return fs_prio;
 }
 
+static struct fs_prio *fs_create_prio_chained(struct mlx5_flow_namespace *ns,
+					      unsigned int prio,
+					      int num_levels)
+{
+	return _fs_create_prio(ns, prio, num_levels, FS_TYPE_PRIO_CHAINS);
+}
+
+static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
+				      unsigned int prio, int num_levels)
+{
+	return _fs_create_prio(ns, prio, num_levels, FS_TYPE_PRIO);
+}
+
 static struct mlx5_flow_namespace *fs_init_namespace(struct mlx5_flow_namespace
 						     *ns)
 {
@@ -2387,6 +2409,9 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
 	cleanup_egress_acls_root_ns(dev);
 	cleanup_ingress_acls_root_ns(dev);
 	cleanup_root_ns(steering->fdb_root_ns);
+	steering->fdb_root_ns = NULL;
+	kfree(steering->fdb_sub_ns);
+	steering->fdb_sub_ns = NULL;
 	cleanup_root_ns(steering->sniffer_rx_root_ns);
 	cleanup_root_ns(steering->sniffer_tx_root_ns);
 	cleanup_root_ns(steering->egress_root_ns);
@@ -2432,27 +2457,64 @@ static int init_sniffer_rx_root_ns(struct mlx5_flow_steering *steering)
 
 static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
 {
-	struct fs_prio *prio;
+	struct mlx5_flow_namespace *ns;
+	struct fs_prio *maj_prio;
+	struct fs_prio *min_prio;
+	int levels;
+	int chain;
+	int prio;
+	int err;
 
 	steering->fdb_root_ns = create_root_ns(steering, FS_FT_FDB);
 	if (!steering->fdb_root_ns)
 		return -ENOMEM;
 
-	prio = fs_create_prio(&steering->fdb_root_ns->ns, 0, 2);
-	if (IS_ERR(prio))
+	steering->fdb_sub_ns = kzalloc(sizeof(steering->fdb_sub_ns) *
+				       FDB_MAX_CHAIN + 1, GFP_KERNEL);
+	if (!steering->fdb_sub_ns)
+		return -ENOMEM;
+
+	levels = 2 * FDB_MAX_PRIO * (FDB_MAX_CHAIN + 1);
+	maj_prio = fs_create_prio_chained(&steering->fdb_root_ns->ns, 0,
+					  levels);
+	if (IS_ERR(maj_prio)) {
+		err = PTR_ERR(maj_prio);
 		goto out_err;
+	}
+
+	for (chain = 0; chain <= FDB_MAX_CHAIN; chain++) {
+		ns = fs_create_namespace(maj_prio);
+		if (IS_ERR(ns)) {
+			err = PTR_ERR(ns);
+			goto out_err;
+		}
+
+		for (prio = 0; prio < FDB_MAX_PRIO * (chain + 1); prio++) {
+			min_prio = fs_create_prio(ns, prio, 2);
+			if (IS_ERR(min_prio)) {
+				err = PTR_ERR(min_prio);
+				goto out_err;
+			}
+		}
+
+		steering->fdb_sub_ns[chain] = ns;
+	}
 
-	prio = fs_create_prio(&steering->fdb_root_ns->ns, 1, 1);
-	if (IS_ERR(prio))
+	maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, 1, 1);
+	if (IS_ERR(maj_prio)) {
+		err = PTR_ERR(maj_prio);
 		goto out_err;
+	}
 
 	set_prio_attrs(steering->fdb_root_ns);
 	return 0;
 
 out_err:
 	cleanup_root_ns(steering->fdb_root_ns);
+	kfree(steering->fdb_sub_ns);
+	steering->fdb_sub_ns = NULL;
 	steering->fdb_root_ns = NULL;
-	return PTR_ERR(prio);
+	return err;
 }
 
 static int init_egress_acl_root_ns(struct mlx5_flow_steering *steering, int vport)
@@ -2537,16 +2599,23 @@ cleanup_root_ns:
 
 static int init_egress_root_ns(struct mlx5_flow_steering *steering)
 {
-	struct fs_prio *prio;
+	int err;
 
 	steering->egress_root_ns = create_root_ns(steering,
 						  FS_FT_NIC_TX);
 	if (!steering->egress_root_ns)
 		return -ENOMEM;
 
-	/* create 1 prio*/
-	prio = fs_create_prio(&steering->egress_root_ns->ns, 0, 1);
-	return PTR_ERR_OR_ZERO(prio);
+	err = init_root_tree(steering, &egress_root_fs,
+			     &steering->egress_root_ns->ns.node);
+	if (err)
+		goto cleanup;
+	set_prio_attrs(steering->egress_root_ns);
+	return 0;
+cleanup:
+	cleanup_root_ns(steering->egress_root_ns);
+	steering->egress_root_ns = NULL;
+	return err;
 }
 
 int mlx5_init_fs(struct mlx5_core_dev *dev)
@@ -2614,7 +2683,7 @@ int mlx5_init_fs(struct mlx5_core_dev *dev)
 			goto err;
 	}
 
-	if (MLX5_IPSEC_DEV(dev)) {
+	if (MLX5_IPSEC_DEV(dev) || MLX5_CAP_FLOWTABLE_NIC_TX(dev, ft_support)) {
 		err = init_egress_root_ns(steering);
 		if (err)
 			goto err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index a06f83c0c2b6..b51ad217da32 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -38,9 +38,21 @@
 #include <linux/rhashtable.h>
 #include <linux/llist.h>
 
+/* FS_TYPE_PRIO_CHAINS is a PRIO that will have namespaces only,
+ * and those are in parallel to one another when going over them to connect
+ * a new flow table. Meaning the last flow table in a TYPE_PRIO prio in one
+ * parallel namespace will not automatically connect to the first flow table
+ * found in any prio in any next namespace, but skip the entire containing
+ * TYPE_PRIO_CHAINS prio.
+ *
+ * This is used to implement tc chains, each chain of prios is a different
+ * namespace inside a containing TYPE_PRIO_CHAINS prio.
+ */
+
 enum fs_node_type {
 	FS_TYPE_NAMESPACE,
 	FS_TYPE_PRIO,
+	FS_TYPE_PRIO_CHAINS,
 	FS_TYPE_FLOW_TABLE,
 	FS_TYPE_FLOW_GROUP,
 	FS_TYPE_FLOW_ENTRY,
@@ -73,6 +85,7 @@ struct mlx5_flow_steering {
 	struct kmem_cache               *ftes_cache;
 	struct mlx5_flow_root_namespace *root_ns;
 	struct mlx5_flow_root_namespace *fdb_root_ns;
+	struct mlx5_flow_namespace	**fdb_sub_ns;
 	struct mlx5_flow_root_namespace **esw_egress_root_ns;
 	struct mlx5_flow_root_namespace **esw_ingress_root_ns;
 	struct mlx5_flow_root_namespace	*sniffer_tx_root_ns;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
index 09206c4acd9a..32accd6b041b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
@@ -99,6 +99,18 @@ static void mlx5_fc_stats_insert(struct mlx5_core_dev *dev,
 	list_add_tail(&counter->list, next);
 }
 
+static void mlx5_fc_stats_remove(struct mlx5_core_dev *dev,
+				 struct mlx5_fc *counter)
+{
+	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+
+	list_del(&counter->list);
+
+	spin_lock(&fc_stats->counters_idr_lock);
+	WARN_ON(!idr_remove(&fc_stats->counters_idr, counter->id));
+	spin_unlock(&fc_stats->counters_idr_lock);
+}
+
 /* The function returns the last counter that was queried so the caller
  * function can continue calling it till all counters are queried.
  */
@@ -179,20 +191,23 @@ static void mlx5_fc_stats_work(struct work_struct *work)
 	struct mlx5_core_dev *dev = container_of(work, struct mlx5_core_dev,
 						 priv.fc_stats.work.work);
 	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
-	struct llist_node *tmplist = llist_del_all(&fc_stats->addlist);
+	/* Take dellist first to ensure that counters cannot be deleted before
+	 * they are inserted.
+	 */
+	struct llist_node *dellist = llist_del_all(&fc_stats->dellist);
+	struct llist_node *addlist = llist_del_all(&fc_stats->addlist);
 	struct mlx5_fc *counter = NULL, *last = NULL, *tmp;
 	unsigned long now = jiffies;
 
-	if (tmplist || !list_empty(&fc_stats->counters))
+	if (addlist || !list_empty(&fc_stats->counters))
 		queue_delayed_work(fc_stats->wq, &fc_stats->work,
 				   fc_stats->sampling_interval);
 
-	llist_for_each_entry(counter, tmplist, addlist)
+	llist_for_each_entry(counter, addlist, addlist)
 		mlx5_fc_stats_insert(dev, counter);
 
-	tmplist = llist_del_all(&fc_stats->dellist);
-	llist_for_each_entry_safe(counter, tmp, tmplist, dellist) {
-		list_del(&counter->list);
+	llist_for_each_entry_safe(counter, tmp, dellist, dellist) {
+		mlx5_fc_stats_remove(dev, counter);
 
 		mlx5_free_fc(dev, counter);
 	}
@@ -258,6 +273,12 @@ err_out:
 }
 EXPORT_SYMBOL(mlx5_fc_create);
 
+u32 mlx5_fc_id(struct mlx5_fc *counter)
+{
+	return counter->id;
+}
+EXPORT_SYMBOL(mlx5_fc_id);
+
 void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter)
 {
 	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
@@ -266,10 +287,6 @@ void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter)
 		return;
 
 	if (counter->aging) {
-		spin_lock(&fc_stats->counters_idr_lock);
-		WARN_ON(!idr_remove(&fc_stats->counters_idr, counter->id));
-		spin_unlock(&fc_stats->counters_idr_lock);
-
 		llist_add(&counter->dellist, &fc_stats->dellist);
 		mod_delayed_work(fc_stats->wq, &fc_stats->work, 0);
 		return;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
index 5ef3ef0072b4..9165ca567047 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
@@ -110,12 +110,11 @@ struct mlx5i_tx_wqe {
 
 static inline void mlx5i_sq_fetch_wqe(struct mlx5e_txqsq *sq,
 				      struct mlx5i_tx_wqe **wqe,
-				      u16 *pi)
+				      u16 pi)
 {
 	struct mlx5_wq_cyc *wq = &sq->wq;
 
-	*pi  = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
-	*wqe = mlx5_wq_cyc_get_wqe(wq, *pi);
+	*wqe = mlx5_wq_cyc_get_wqe(wq, pi);
 	memset(*wqe, 0, sizeof(**wqe));
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index cc298527baf1..0594d0961cb3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -39,6 +39,7 @@
 #include <linux/if_link.h>
 #include <linux/firmware.h>
 #include <linux/mlx5/cq.h>
+#include <linux/mlx5/fs.h>
 
 #define DRIVER_NAME "mlx5_core"
 #define DRIVER_VERSION "5.0-0"
@@ -171,17 +172,6 @@ struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev);
 void mlx5_dev_list_lock(void);
 void mlx5_dev_list_unlock(void);
 int mlx5_dev_list_trylock(void);
-int mlx5_encap_alloc(struct mlx5_core_dev *dev,
-		     int header_type,
-		     size_t size,
-		     void *encap_header,
-		     u32 *encap_id);
-void mlx5_encap_dealloc(struct mlx5_core_dev *dev, u32 encap_id);
-
-int mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
-			     u8 namespace, u8 num_actions,
-			     void *modify_actions, u32 *modify_header_id);
-void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id);
 
 bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index 4ca07bfb6b14..91b8139a388d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -211,6 +211,7 @@ int mlx5_core_create_dct(struct mlx5_core_dev *dev,
 	}
 
 	qp->qpn = MLX5_GET(create_dct_out, out, dctn);
+	qp->uid = MLX5_GET(create_dct_in, in, uid);
 	err = create_resource_common(dev, qp, MLX5_RES_DCT);
 	if (err)
 		goto err_cmd;
@@ -219,6 +220,7 @@ int mlx5_core_create_dct(struct mlx5_core_dev *dev,
 err_cmd:
 	MLX5_SET(destroy_dct_in, din, opcode, MLX5_CMD_OP_DESTROY_DCT);
 	MLX5_SET(destroy_dct_in, din, dctn, qp->qpn);
+	MLX5_SET(destroy_dct_in, din, uid, qp->uid);
 	mlx5_cmd_exec(dev, (void *)&in, sizeof(din),
 		      (void *)&out, sizeof(dout));
 	return err;
@@ -240,6 +242,7 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev,
 	if (err)
 		return err;
 
+	qp->uid = MLX5_GET(create_qp_in, in, uid);
 	qp->qpn = MLX5_GET(create_qp_out, out, qpn);
 	mlx5_core_dbg(dev, "qpn = 0x%x\n", qp->qpn);
 
@@ -261,6 +264,7 @@ err_cmd:
 	memset(dout, 0, sizeof(dout));
 	MLX5_SET(destroy_qp_in, din, opcode, MLX5_CMD_OP_DESTROY_QP);
 	MLX5_SET(destroy_qp_in, din, qpn, qp->qpn);
+	MLX5_SET(destroy_qp_in, din, uid, qp->uid);
 	mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout));
 	return err;
 }
@@ -275,6 +279,7 @@ static int mlx5_core_drain_dct(struct mlx5_core_dev *dev,
 
 	MLX5_SET(drain_dct_in, in, opcode, MLX5_CMD_OP_DRAIN_DCT);
 	MLX5_SET(drain_dct_in, in, dctn, qp->qpn);
+	MLX5_SET(drain_dct_in, in, uid, qp->uid);
 	return mlx5_cmd_exec(dev, (void *)&in, sizeof(in),
 			     (void *)&out, sizeof(out));
 }
@@ -301,6 +306,7 @@ destroy:
 	destroy_resource_common(dev, &dct->mqp);
 	MLX5_SET(destroy_dct_in, in, opcode, MLX5_CMD_OP_DESTROY_DCT);
 	MLX5_SET(destroy_dct_in, in, dctn, qp->qpn);
+	MLX5_SET(destroy_dct_in, in, uid, qp->uid);
 	err = mlx5_cmd_exec(dev, (void *)&in, sizeof(in),
 			    (void *)&out, sizeof(out));
 	return err;
@@ -320,6 +326,7 @@ int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
 
 	MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
 	MLX5_SET(destroy_qp_in, in, qpn, qp->qpn);
+	MLX5_SET(destroy_qp_in, in, uid, qp->uid);
 	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 	if (err)
 		return err;
@@ -373,7 +380,7 @@ static void mbox_free(struct mbox_info *mbox)
 
 static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn,
 				u32 opt_param_mask, void *qpc,
-				struct mbox_info *mbox)
+				struct mbox_info *mbox, u16 uid)
 {
 	mbox->out = NULL;
 	mbox->in = NULL;
@@ -381,26 +388,32 @@ static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn,
 #define MBOX_ALLOC(mbox, typ)  \
 	mbox_alloc(mbox, MLX5_ST_SZ_BYTES(typ##_in), MLX5_ST_SZ_BYTES(typ##_out))
 
-#define MOD_QP_IN_SET(typ, in, _opcode, _qpn) \
-	MLX5_SET(typ##_in, in, opcode, _opcode); \
-	MLX5_SET(typ##_in, in, qpn, _qpn)
-
-#define MOD_QP_IN_SET_QPC(typ, in, _opcode, _qpn, _opt_p, _qpc) \
-	MOD_QP_IN_SET(typ, in, _opcode, _qpn); \
-	MLX5_SET(typ##_in, in, opt_param_mask, _opt_p); \
-	memcpy(MLX5_ADDR_OF(typ##_in, in, qpc), _qpc, MLX5_ST_SZ_BYTES(qpc))
+#define MOD_QP_IN_SET(typ, in, _opcode, _qpn, _uid)                            \
+	do {                                                                   \
+		MLX5_SET(typ##_in, in, opcode, _opcode);                       \
+		MLX5_SET(typ##_in, in, qpn, _qpn);                             \
+		MLX5_SET(typ##_in, in, uid, _uid);                             \
+	} while (0)
+
+#define MOD_QP_IN_SET_QPC(typ, in, _opcode, _qpn, _opt_p, _qpc, _uid)          \
+	do {                                                                   \
+		MOD_QP_IN_SET(typ, in, _opcode, _qpn, _uid);                   \
+		MLX5_SET(typ##_in, in, opt_param_mask, _opt_p);                \
+		memcpy(MLX5_ADDR_OF(typ##_in, in, qpc), _qpc,                  \
+		       MLX5_ST_SZ_BYTES(qpc));                                 \
+	} while (0)
 
 	switch (opcode) {
 	/* 2RST & 2ERR */
 	case MLX5_CMD_OP_2RST_QP:
 		if (MBOX_ALLOC(mbox, qp_2rst))
 			return -ENOMEM;
-		MOD_QP_IN_SET(qp_2rst, mbox->in, opcode, qpn);
+		MOD_QP_IN_SET(qp_2rst, mbox->in, opcode, qpn, uid);
 		break;
 	case MLX5_CMD_OP_2ERR_QP:
 		if (MBOX_ALLOC(mbox, qp_2err))
 			return -ENOMEM;
-		MOD_QP_IN_SET(qp_2err, mbox->in, opcode, qpn);
+		MOD_QP_IN_SET(qp_2err, mbox->in, opcode, qpn, uid);
 		break;
 
 	/* MODIFY with QPC */
@@ -408,37 +421,37 @@ static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn,
 		if (MBOX_ALLOC(mbox, rst2init_qp))
 			return -ENOMEM;
 		MOD_QP_IN_SET_QPC(rst2init_qp, mbox->in, opcode, qpn,
-				  opt_param_mask, qpc);
+				  opt_param_mask, qpc, uid);
 		break;
 	case MLX5_CMD_OP_INIT2RTR_QP:
 		if (MBOX_ALLOC(mbox, init2rtr_qp))
 			return -ENOMEM;
 		MOD_QP_IN_SET_QPC(init2rtr_qp, mbox->in, opcode, qpn,
-				  opt_param_mask, qpc);
+				  opt_param_mask, qpc, uid);
 		break;
 	case MLX5_CMD_OP_RTR2RTS_QP:
 		if (MBOX_ALLOC(mbox, rtr2rts_qp))
 			return -ENOMEM;
 		MOD_QP_IN_SET_QPC(rtr2rts_qp, mbox->in, opcode, qpn,
-				  opt_param_mask, qpc);
+				  opt_param_mask, qpc, uid);
 		break;
 	case MLX5_CMD_OP_RTS2RTS_QP:
 		if (MBOX_ALLOC(mbox, rts2rts_qp))
 			return -ENOMEM;
 		MOD_QP_IN_SET_QPC(rts2rts_qp, mbox->in, opcode, qpn,
-				  opt_param_mask, qpc);
+				  opt_param_mask, qpc, uid);
 		break;
 	case MLX5_CMD_OP_SQERR2RTS_QP:
 		if (MBOX_ALLOC(mbox, sqerr2rts_qp))
 			return -ENOMEM;
 		MOD_QP_IN_SET_QPC(sqerr2rts_qp, mbox->in, opcode, qpn,
-				  opt_param_mask, qpc);
+				  opt_param_mask, qpc, uid);
 		break;
 	case MLX5_CMD_OP_INIT2INIT_QP:
 		if (MBOX_ALLOC(mbox, init2init_qp))
 			return -ENOMEM;
 		MOD_QP_IN_SET_QPC(init2init_qp, mbox->in, opcode, qpn,
-				  opt_param_mask, qpc);
+				  opt_param_mask, qpc, uid);
 		break;
 	default:
 		mlx5_core_err(dev, "Unknown transition for modify QP: OP(0x%x) QPN(0x%x)\n",
@@ -456,7 +469,7 @@ int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 opcode,
 	int err;
 
 	err = modify_qp_mbox_alloc(dev, opcode, qp->qpn,
-				   opt_param_mask, qpc, &mbox);
+				   opt_param_mask, qpc, &mbox, qp->uid);
 	if (err)
 		return err;
 
@@ -531,6 +544,17 @@ int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn)
 }
 EXPORT_SYMBOL_GPL(mlx5_core_xrcd_dealloc);
 
+static void destroy_rq_tracked(struct mlx5_core_dev *dev, u32 rqn, u16 uid)
+{
+	u32 in[MLX5_ST_SZ_DW(destroy_rq_in)]   = {};
+	u32 out[MLX5_ST_SZ_DW(destroy_rq_out)] = {};
+
+	MLX5_SET(destroy_rq_in, in, opcode, MLX5_CMD_OP_DESTROY_RQ);
+	MLX5_SET(destroy_rq_in, in, rqn, rqn);
+	MLX5_SET(destroy_rq_in, in, uid, uid);
+	mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
 int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
 				struct mlx5_core_qp *rq)
 {
@@ -541,6 +565,7 @@ int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
 	if (err)
 		return err;
 
+	rq->uid = MLX5_GET(create_rq_in, in, uid);
 	rq->qpn = rqn;
 	err = create_resource_common(dev, rq, MLX5_RES_RQ);
 	if (err)
@@ -549,7 +574,7 @@ int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
 	return 0;
 
 err_destroy_rq:
-	mlx5_core_destroy_rq(dev, rq->qpn);
+	destroy_rq_tracked(dev, rq->qpn, rq->uid);
 
 	return err;
 }
@@ -559,10 +584,21 @@ void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev,
 				  struct mlx5_core_qp *rq)
 {
 	destroy_resource_common(dev, rq);
-	mlx5_core_destroy_rq(dev, rq->qpn);
+	destroy_rq_tracked(dev, rq->qpn, rq->uid);
 }
 EXPORT_SYMBOL(mlx5_core_destroy_rq_tracked);
 
+static void destroy_sq_tracked(struct mlx5_core_dev *dev, u32 sqn, u16 uid)
+{
+	u32 in[MLX5_ST_SZ_DW(destroy_sq_in)]   = {};
+	u32 out[MLX5_ST_SZ_DW(destroy_sq_out)] = {};
+
+	MLX5_SET(destroy_sq_in, in, opcode, MLX5_CMD_OP_DESTROY_SQ);
+	MLX5_SET(destroy_sq_in, in, sqn, sqn);
+	MLX5_SET(destroy_sq_in, in, uid, uid);
+	mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
 int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
 				struct mlx5_core_qp *sq)
 {
@@ -573,6 +609,7 @@ int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
 	if (err)
 		return err;
 
+	sq->uid = MLX5_GET(create_sq_in, in, uid);
 	sq->qpn = sqn;
 	err = create_resource_common(dev, sq, MLX5_RES_SQ);
 	if (err)
@@ -581,7 +618,7 @@ int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
 	return 0;
 
 err_destroy_sq:
-	mlx5_core_destroy_sq(dev, sq->qpn);
+	destroy_sq_tracked(dev, sq->qpn, sq->uid);
 
 	return err;
 }
@@ -591,7 +628,7 @@ void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev,
 				  struct mlx5_core_qp *sq)
 {
 	destroy_resource_common(dev, sq);
-	mlx5_core_destroy_sq(dev, sq->qpn);
+	destroy_sq_tracked(dev, sq->qpn, sq->uid);
 }
 EXPORT_SYMBOL(mlx5_core_destroy_sq_tracked);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c
index 7e20666ce5ae..6a6fc9be01e6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c
@@ -166,6 +166,7 @@ static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
 	if (!create_in)
 		return -ENOMEM;
 
+	MLX5_SET(create_srq_in, create_in, uid, in->uid);
 	srqc = MLX5_ADDR_OF(create_srq_in, create_in, srq_context_entry);
 	pas = MLX5_ADDR_OF(create_srq_in, create_in, pas);
 
@@ -178,8 +179,10 @@ static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
 	err = mlx5_cmd_exec(dev, create_in, inlen, create_out,
 			    sizeof(create_out));
 	kvfree(create_in);
-	if (!err)
+	if (!err) {
 		srq->srqn = MLX5_GET(create_srq_out, create_out, srqn);
+		srq->uid = in->uid;
+	}
 
 	return err;
 }
@@ -193,6 +196,7 @@ static int destroy_srq_cmd(struct mlx5_core_dev *dev,
 	MLX5_SET(destroy_srq_in, srq_in, opcode,
 		 MLX5_CMD_OP_DESTROY_SRQ);
 	MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn);
+	MLX5_SET(destroy_srq_in, srq_in, uid, srq->uid);
 
 	return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
 			     srq_out, sizeof(srq_out));
@@ -208,6 +212,7 @@ static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
 	MLX5_SET(arm_rq_in, srq_in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_SRQ);
 	MLX5_SET(arm_rq_in, srq_in, srq_number, srq->srqn);
 	MLX5_SET(arm_rq_in, srq_in, lwm,      lwm);
+	MLX5_SET(arm_rq_in, srq_in, uid, srq->uid);
 
 	return  mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
 			      srq_out, sizeof(srq_out));
@@ -260,6 +265,7 @@ static int create_xrc_srq_cmd(struct mlx5_core_dev *dev,
 	if (!create_in)
 		return -ENOMEM;
 
+	MLX5_SET(create_xrc_srq_in, create_in, uid, in->uid);
 	xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, create_in,
 				xrc_srq_context_entry);
 	pas	 = MLX5_ADDR_OF(create_xrc_srq_in, create_in, pas);
@@ -277,6 +283,7 @@ static int create_xrc_srq_cmd(struct mlx5_core_dev *dev,
 		goto out;
 
 	srq->srqn = MLX5_GET(create_xrc_srq_out, create_out, xrc_srqn);
+	srq->uid = in->uid;
 out:
 	kvfree(create_in);
 	return err;
@@ -291,6 +298,7 @@ static int destroy_xrc_srq_cmd(struct mlx5_core_dev *dev,
 	MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, opcode,
 		 MLX5_CMD_OP_DESTROY_XRC_SRQ);
 	MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
+	MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, uid, srq->uid);
 
 	return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in),
 			     xrcsrq_out, sizeof(xrcsrq_out));
@@ -306,6 +314,7 @@ static int arm_xrc_srq_cmd(struct mlx5_core_dev *dev,
 	MLX5_SET(arm_xrc_srq_in, xrcsrq_in, op_mod,   MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ);
 	MLX5_SET(arm_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
 	MLX5_SET(arm_xrc_srq_in, xrcsrq_in, lwm,      lwm);
+	MLX5_SET(arm_xrc_srq_in, xrcsrq_in, uid, srq->uid);
 
 	return  mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in),
 			      xrcsrq_out, sizeof(xrcsrq_out));
@@ -365,10 +374,13 @@ static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
 	wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
 
 	MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
+	MLX5_SET(create_rmp_in, create_in, uid, in->uid);
 	set_wq(wq, in);
 	memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size);
 
 	err = mlx5_core_create_rmp(dev, create_in, inlen, &srq->srqn);
+	if (!err)
+		srq->uid = in->uid;
 
 	kvfree(create_in);
 	return err;
@@ -377,7 +389,13 @@ static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
 static int destroy_rmp_cmd(struct mlx5_core_dev *dev,
 			   struct mlx5_core_srq *srq)
 {
-	return mlx5_core_destroy_rmp(dev, srq->srqn);
+	u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)]   = {};
+	u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {};
+
+	MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP);
+	MLX5_SET(destroy_rmp_in, in, rmpn, srq->srqn);
+	MLX5_SET(destroy_rmp_in, in, uid, srq->uid);
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
 static int arm_rmp_cmd(struct mlx5_core_dev *dev,
@@ -400,6 +418,7 @@ static int arm_rmp_cmd(struct mlx5_core_dev *dev,
 
 	MLX5_SET(modify_rmp_in, in,	 rmp_state, MLX5_RMPC_STATE_RDY);
 	MLX5_SET(modify_rmp_in, in,	 rmpn,      srq->srqn);
+	MLX5_SET(modify_rmp_in, in, uid, srq->uid);
 	MLX5_SET(wq,		wq,	 lwm,	    lwm);
 	MLX5_SET(rmp_bitmask,	bitmask, lwm,	    1);
 	MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
@@ -469,11 +488,14 @@ static int create_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
 	MLX5_SET(xrqc, xrqc, user_index, in->user_index);
 	MLX5_SET(xrqc, xrqc, cqn, in->cqn);
 	MLX5_SET(create_xrq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRQ);
+	MLX5_SET(create_xrq_in, create_in, uid, in->uid);
 	err = mlx5_cmd_exec(dev, create_in, inlen, create_out,
 			    sizeof(create_out));
 	kvfree(create_in);
-	if (!err)
+	if (!err) {
 		srq->srqn = MLX5_GET(create_xrq_out, create_out, xrqn);
+		srq->uid = in->uid;
+	}
 
 	return err;
 }
@@ -485,6 +507,7 @@ static int destroy_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq)
 
 	MLX5_SET(destroy_xrq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRQ);
 	MLX5_SET(destroy_xrq_in, in, xrqn,   srq->srqn);
+	MLX5_SET(destroy_xrq_in, in, uid, srq->uid);
 
 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
@@ -500,6 +523,7 @@ static int arm_xrq_cmd(struct mlx5_core_dev *dev,
 	MLX5_SET(arm_rq_in, in, op_mod,     MLX5_ARM_RQ_IN_OP_MOD_XRQ);
 	MLX5_SET(arm_rq_in, in, srq_number, srq->srqn);
 	MLX5_SET(arm_rq_in, in, lwm,	    lwm);
+	MLX5_SET(arm_rq_in, in, uid, srq->uid);
 
 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
index 68e7f8df2a6d..2dcbf1ebfd6a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
@@ -39,11 +39,6 @@ u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq)
 	return (u32)wq->fbc.sz_m1 + 1;
 }
 
-u16 mlx5_wq_cyc_get_frag_size(struct mlx5_wq_cyc *wq)
-{
-	return wq->fbc.frag_sz_m1 + 1;
-}
-
 u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq)
 {
 	return wq->fbc.sz_m1 + 1;
@@ -54,54 +49,37 @@ u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq)
 	return (u32)wq->fbc.sz_m1 + 1;
 }
 
-static u32 mlx5_wq_cyc_get_byte_size(struct mlx5_wq_cyc *wq)
-{
-	return mlx5_wq_cyc_get_size(wq) << wq->fbc.log_stride;
-}
-
-static u32 mlx5_wq_qp_get_byte_size(struct mlx5_wq_qp *wq)
-{
-	return mlx5_wq_cyc_get_byte_size(&wq->rq) +
-	       mlx5_wq_cyc_get_byte_size(&wq->sq);
-}
-
-static u32 mlx5_cqwq_get_byte_size(struct mlx5_cqwq *wq)
+static u32 wq_get_byte_sz(u8 log_sz, u8 log_stride)
 {
-	return mlx5_cqwq_get_size(wq) << wq->fbc.log_stride;
-}
-
-static u32 mlx5_wq_ll_get_byte_size(struct mlx5_wq_ll *wq)
-{
-	return mlx5_wq_ll_get_size(wq) << wq->fbc.log_stride;
+	return ((u32)1 << log_sz) << log_stride;
 }
 
 int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		       void *wqc, struct mlx5_wq_cyc *wq,
 		       struct mlx5_wq_ctrl *wq_ctrl)
 {
+	u8 log_wq_stride = MLX5_GET(wq, wqc, log_wq_stride);
+	u8 log_wq_sz     = MLX5_GET(wq, wqc, log_wq_sz);
 	struct mlx5_frag_buf_ctrl *fbc = &wq->fbc;
 	int err;
 
-	mlx5_fill_fbc(MLX5_GET(wq, wqc, log_wq_stride),
-		      MLX5_GET(wq, wqc, log_wq_sz),
-		      fbc);
-	wq->sz    = wq->fbc.sz_m1 + 1;
-
 	err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
 	if (err) {
 		mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err);
 		return err;
 	}
 
-	err = mlx5_frag_buf_alloc_node(mdev, mlx5_wq_cyc_get_byte_size(wq),
+	wq->db  = wq_ctrl->db.db;
+
+	err = mlx5_frag_buf_alloc_node(mdev, wq_get_byte_sz(log_wq_sz, log_wq_stride),
 				       &wq_ctrl->buf, param->buf_numa_node);
 	if (err) {
 		mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", err);
 		goto err_db_free;
 	}
 
-	fbc->frag_buf = wq_ctrl->buf;
-	wq->db  = wq_ctrl->db.db;
+	mlx5_init_fbc(wq_ctrl->buf.frags, log_wq_stride, log_wq_sz, fbc);
+	wq->sz = mlx5_wq_cyc_get_size(wq);
 
 	wq_ctrl->mdev = mdev;
 
@@ -113,46 +91,19 @@ err_db_free:
 	return err;
 }
 
-static void mlx5_qp_set_frag_buf(struct mlx5_frag_buf *buf,
-				 struct mlx5_wq_qp *qp)
-{
-	struct mlx5_frag_buf_ctrl *sq_fbc;
-	struct mlx5_frag_buf *rqb, *sqb;
-
-	rqb  = &qp->rq.fbc.frag_buf;
-	*rqb = *buf;
-	rqb->size   = mlx5_wq_cyc_get_byte_size(&qp->rq);
-	rqb->npages = DIV_ROUND_UP(rqb->size, PAGE_SIZE);
-
-	sq_fbc = &qp->sq.fbc;
-	sqb    = &sq_fbc->frag_buf;
-	*sqb   = *buf;
-	sqb->size   = mlx5_wq_cyc_get_byte_size(&qp->sq);
-	sqb->npages = DIV_ROUND_UP(sqb->size, PAGE_SIZE);
-	sqb->frags += rqb->npages; /* first part is for the rq */
-	if (sq_fbc->strides_offset)
-		sqb->frags--;
-}
-
 int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		      void *qpc, struct mlx5_wq_qp *wq,
 		      struct mlx5_wq_ctrl *wq_ctrl)
 {
-	u16 sq_strides_offset;
-	u32 rq_pg_remainder;
-	int err;
+	u8 log_rq_stride = MLX5_GET(qpc, qpc, log_rq_stride) + 4;
+	u8 log_rq_sz     = MLX5_GET(qpc, qpc, log_rq_size);
+	u8 log_sq_stride = ilog2(MLX5_SEND_WQE_BB);
+	u8 log_sq_sz     = MLX5_GET(qpc, qpc, log_sq_size);
 
-	mlx5_fill_fbc(MLX5_GET(qpc, qpc, log_rq_stride) + 4,
-		      MLX5_GET(qpc, qpc, log_rq_size),
-		      &wq->rq.fbc);
+	u32 rq_byte_size;
+	int err;
 
-	rq_pg_remainder   = mlx5_wq_cyc_get_byte_size(&wq->rq) % PAGE_SIZE;
-	sq_strides_offset = rq_pg_remainder / MLX5_SEND_WQE_BB;
 
-	mlx5_fill_fbc_offset(ilog2(MLX5_SEND_WQE_BB),
-			     MLX5_GET(qpc, qpc, log_sq_size),
-			     sq_strides_offset,
-			     &wq->sq.fbc);
 
 	err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
 	if (err) {
@@ -160,14 +111,32 @@ int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		return err;
 	}
 
-	err = mlx5_frag_buf_alloc_node(mdev, mlx5_wq_qp_get_byte_size(wq),
+	err = mlx5_frag_buf_alloc_node(mdev,
+				       wq_get_byte_sz(log_rq_sz, log_rq_stride) +
+				       wq_get_byte_sz(log_sq_sz, log_sq_stride),
 				       &wq_ctrl->buf, param->buf_numa_node);
 	if (err) {
 		mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", err);
 		goto err_db_free;
 	}
 
-	mlx5_qp_set_frag_buf(&wq_ctrl->buf, wq);
+	mlx5_init_fbc(wq_ctrl->buf.frags, log_rq_stride, log_rq_sz, &wq->rq.fbc);
+
+	rq_byte_size = wq_get_byte_sz(log_rq_sz, log_rq_stride);
+
+	if (rq_byte_size < PAGE_SIZE) {
+		/* SQ starts within the same page of the RQ */
+		u16 sq_strides_offset = rq_byte_size / MLX5_SEND_WQE_BB;
+
+		mlx5_init_fbc_offset(wq_ctrl->buf.frags,
+				     log_sq_stride, log_sq_sz, sq_strides_offset,
+				     &wq->sq.fbc);
+	} else {
+		u16 rq_npages = rq_byte_size >> PAGE_SHIFT;
+
+		mlx5_init_fbc(wq_ctrl->buf.frags + rq_npages,
+			      log_sq_stride, log_sq_sz, &wq->sq.fbc);
+	}
 
 	wq->rq.db  = &wq_ctrl->db.db[MLX5_RCV_DBR];
 	wq->sq.db  = &wq_ctrl->db.db[MLX5_SND_DBR];
@@ -186,17 +155,19 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		     void *cqc, struct mlx5_cqwq *wq,
 		     struct mlx5_wq_ctrl *wq_ctrl)
 {
+	u8 log_wq_stride = MLX5_GET(cqc, cqc, cqe_sz) + 6;
+	u8 log_wq_sz     = MLX5_GET(cqc, cqc, log_cq_size);
 	int err;
 
-	mlx5_core_init_cq_frag_buf(&wq->fbc, cqc);
-
 	err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
 	if (err) {
 		mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err);
 		return err;
 	}
 
-	err = mlx5_frag_buf_alloc_node(mdev, mlx5_cqwq_get_byte_size(wq),
+	wq->db  = wq_ctrl->db.db;
+
+	err = mlx5_frag_buf_alloc_node(mdev, wq_get_byte_sz(log_wq_sz, log_wq_stride),
 				       &wq_ctrl->buf,
 				       param->buf_numa_node);
 	if (err) {
@@ -205,8 +176,7 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		goto err_db_free;
 	}
 
-	wq->fbc.frag_buf = wq_ctrl->buf;
-	wq->db  = wq_ctrl->db.db;
+	mlx5_init_fbc(wq_ctrl->buf.frags, log_wq_stride, log_wq_sz, &wq->fbc);
 
 	wq_ctrl->mdev = mdev;
 
@@ -222,30 +192,29 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		      void *wqc, struct mlx5_wq_ll *wq,
 		      struct mlx5_wq_ctrl *wq_ctrl)
 {
+	u8 log_wq_stride = MLX5_GET(wq, wqc, log_wq_stride);
+	u8 log_wq_sz     = MLX5_GET(wq, wqc, log_wq_sz);
 	struct mlx5_frag_buf_ctrl *fbc = &wq->fbc;
 	struct mlx5_wqe_srq_next_seg *next_seg;
 	int err;
 	int i;
 
-	mlx5_fill_fbc(MLX5_GET(wq, wqc, log_wq_stride),
-		      MLX5_GET(wq, wqc, log_wq_sz),
-		      fbc);
-
 	err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
 	if (err) {
 		mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err);
 		return err;
 	}
 
-	err = mlx5_frag_buf_alloc_node(mdev, mlx5_wq_ll_get_byte_size(wq),
+	wq->db  = wq_ctrl->db.db;
+
+	err = mlx5_frag_buf_alloc_node(mdev, wq_get_byte_sz(log_wq_sz, log_wq_stride),
 				       &wq_ctrl->buf, param->buf_numa_node);
 	if (err) {
 		mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", err);
 		goto err_db_free;
 	}
 
-	wq->fbc.frag_buf = wq_ctrl->buf;
-	wq->db  = wq_ctrl->db.db;
+	mlx5_init_fbc(wq_ctrl->buf.frags, log_wq_stride, log_wq_sz, fbc);
 
 	for (i = 0; i < fbc->sz_m1; i++) {
 		next_seg = mlx5_wq_ll_get_wqe(wq, i);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
index 3a1a170bb2d7..b1293d153a58 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
@@ -80,7 +80,6 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		       void *wqc, struct mlx5_wq_cyc *wq,
 		       struct mlx5_wq_ctrl *wq_ctrl);
 u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq);
-u16 mlx5_wq_cyc_get_frag_size(struct mlx5_wq_cyc *wq);
 
 int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		      void *qpc, struct mlx5_wq_qp *wq,
@@ -140,11 +139,6 @@ static inline u16 mlx5_wq_cyc_ctr2ix(struct mlx5_wq_cyc *wq, u16 ctr)
 	return ctr & wq->fbc.sz_m1;
 }
 
-static inline u16 mlx5_wq_cyc_ctr2fragix(struct mlx5_wq_cyc *wq, u16 ctr)
-{
-	return ctr & wq->fbc.frag_sz_m1;
-}
-
 static inline u16 mlx5_wq_cyc_get_head(struct mlx5_wq_cyc *wq)
 {
 	return mlx5_wq_cyc_ctr2ix(wq, wq->wqe_ctr);
@@ -160,6 +154,11 @@ static inline void *mlx5_wq_cyc_get_wqe(struct mlx5_wq_cyc *wq, u16 ix)
 	return mlx5_frag_buf_get_wqe(&wq->fbc, ix);
 }
 
+static inline u16 mlx5_wq_cyc_get_contig_wqebbs(struct mlx5_wq_cyc *wq, u16 ix)
+{
+	return mlx5_frag_buf_get_idx_last_contig_stride(&wq->fbc, ix) - ix + 1;
+}
+
 static inline int mlx5_wq_cyc_cc_bigger(u16 cc1, u16 cc2)
 {
 	int equal   = (cc1 == cc2);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile
index 68fa44a41485..1f77e97e2d7a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Makefile
+++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile
@@ -27,7 +27,8 @@ mlxsw_spectrum-objs		:= spectrum.o spectrum_buffers.o \
 				   spectrum_acl_flex_keys.o \
 				   spectrum1_mr_tcam.o spectrum2_mr_tcam.o \
 				   spectrum_mr_tcam.o spectrum_mr.o \
-				   spectrum_qdisc.o spectrum_span.o
+				   spectrum_qdisc.o spectrum_span.o \
+				   spectrum_nve.o spectrum_nve_vxlan.o
 mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB)	+= spectrum_dcb.o
 mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o
 obj-$(CONFIG_MLXSW_MINIMAL)	+= mlxsw_minimal.o
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index 81533d7f395c..937d0ace699a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -1055,6 +1055,7 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
 err_driver_init:
 	mlxsw_thermal_fini(mlxsw_core->thermal);
 err_thermal_init:
+	mlxsw_hwmon_fini(mlxsw_core->hwmon);
 err_hwmon_init:
 	if (!reload)
 		devlink_unregister(devlink);
@@ -1088,6 +1089,7 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core,
 	if (mlxsw_core->driver->fini)
 		mlxsw_core->driver->fini(mlxsw_core);
 	mlxsw_thermal_fini(mlxsw_core->thermal);
+	mlxsw_hwmon_fini(mlxsw_core->hwmon);
 	if (!reload)
 		devlink_unregister(devlink);
 	mlxsw_emad_fini(mlxsw_core);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
index 655ddd204ab2..c35be477856f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -359,6 +359,10 @@ static inline int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core,
 	return 0;
 }
 
+static inline void mlxsw_hwmon_fini(struct mlxsw_hwmon *mlxsw_hwmon)
+{
+}
+
 #endif
 
 struct mlxsw_thermal;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
index f6cf2896d337..e04e8162aa14 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
@@ -303,8 +303,7 @@ int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core,
 	struct device *hwmon_dev;
 	int err;
 
-	mlxsw_hwmon = devm_kzalloc(mlxsw_bus_info->dev, sizeof(*mlxsw_hwmon),
-				   GFP_KERNEL);
+	mlxsw_hwmon = kzalloc(sizeof(*mlxsw_hwmon), GFP_KERNEL);
 	if (!mlxsw_hwmon)
 		return -ENOMEM;
 	mlxsw_hwmon->core = mlxsw_core;
@@ -321,10 +320,9 @@ int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core,
 	mlxsw_hwmon->groups[0] = &mlxsw_hwmon->group;
 	mlxsw_hwmon->group.attrs = mlxsw_hwmon->attrs;
 
-	hwmon_dev = devm_hwmon_device_register_with_groups(mlxsw_bus_info->dev,
-							   "mlxsw",
-							   mlxsw_hwmon,
-							   mlxsw_hwmon->groups);
+	hwmon_dev = hwmon_device_register_with_groups(mlxsw_bus_info->dev,
+						      "mlxsw", mlxsw_hwmon,
+						      mlxsw_hwmon->groups);
 	if (IS_ERR(hwmon_dev)) {
 		err = PTR_ERR(hwmon_dev);
 		goto err_hwmon_register;
@@ -337,5 +335,12 @@ int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core,
 err_hwmon_register:
 err_fans_init:
 err_temp_init:
+	kfree(mlxsw_hwmon);
 	return err;
 }
+
+void mlxsw_hwmon_fini(struct mlxsw_hwmon *mlxsw_hwmon)
+{
+	hwmon_device_unregister(mlxsw_hwmon->hwmon_dev);
+	kfree(mlxsw_hwmon);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index ed7e4c4e7403..8a4983adae94 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -2994,6 +2994,13 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 		goto err_port_qdiscs_init;
 	}
 
+	err = mlxsw_sp_port_nve_init(mlxsw_sp_port);
+	if (err) {
+		dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to initialize NVE\n",
+			mlxsw_sp_port->local_port);
+		goto err_port_nve_init;
+	}
+
 	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_get(mlxsw_sp_port, 1);
 	if (IS_ERR(mlxsw_sp_port_vlan)) {
 		dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to create VID 1\n",
@@ -3022,6 +3029,8 @@ err_register_netdev:
 	mlxsw_sp_port_switchdev_fini(mlxsw_sp_port);
 	mlxsw_sp_port_vlan_put(mlxsw_sp_port_vlan);
 err_port_vlan_get:
+	mlxsw_sp_port_nve_fini(mlxsw_sp_port);
+err_port_nve_init:
 	mlxsw_sp_tc_qdisc_fini(mlxsw_sp_port);
 err_port_qdiscs_init:
 	mlxsw_sp_port_fids_fini(mlxsw_sp_port);
@@ -3061,6 +3070,7 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port)
 	mlxsw_sp->ports[local_port] = NULL;
 	mlxsw_sp_port_switchdev_fini(mlxsw_sp_port);
 	mlxsw_sp_port_vlan_flush(mlxsw_sp_port);
+	mlxsw_sp_port_nve_fini(mlxsw_sp_port);
 	mlxsw_sp_tc_qdisc_fini(mlxsw_sp_port);
 	mlxsw_sp_port_fids_fini(mlxsw_sp_port);
 	mlxsw_sp_port_dcb_fini(mlxsw_sp_port);
@@ -3795,6 +3805,12 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
 		goto err_afa_init;
 	}
 
+	err = mlxsw_sp_nve_init(mlxsw_sp);
+	if (err) {
+		dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize NVE\n");
+		goto err_nve_init;
+	}
+
 	err = mlxsw_sp_router_init(mlxsw_sp);
 	if (err) {
 		dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize router\n");
@@ -3841,6 +3857,8 @@ err_acl_init:
 err_netdev_notifier:
 	mlxsw_sp_router_fini(mlxsw_sp);
 err_router_init:
+	mlxsw_sp_nve_fini(mlxsw_sp);
+err_nve_init:
 	mlxsw_sp_afa_fini(mlxsw_sp);
 err_afa_init:
 	mlxsw_sp_counter_pool_fini(mlxsw_sp);
@@ -3873,6 +3891,7 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core,
 	mlxsw_sp->afk_ops = &mlxsw_sp1_afk_ops;
 	mlxsw_sp->mr_tcam_ops = &mlxsw_sp1_mr_tcam_ops;
 	mlxsw_sp->acl_tcam_ops = &mlxsw_sp1_acl_tcam_ops;
+	mlxsw_sp->nve_ops_arr = mlxsw_sp1_nve_ops_arr;
 
 	return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info);
 }
@@ -3887,6 +3906,7 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core,
 	mlxsw_sp->afk_ops = &mlxsw_sp2_afk_ops;
 	mlxsw_sp->mr_tcam_ops = &mlxsw_sp2_mr_tcam_ops;
 	mlxsw_sp->acl_tcam_ops = &mlxsw_sp2_acl_tcam_ops;
+	mlxsw_sp->nve_ops_arr = mlxsw_sp2_nve_ops_arr;
 
 	return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info);
 }
@@ -3900,6 +3920,7 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
 	mlxsw_sp_acl_fini(mlxsw_sp);
 	unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
 	mlxsw_sp_router_fini(mlxsw_sp);
+	mlxsw_sp_nve_fini(mlxsw_sp);
 	mlxsw_sp_afa_fini(mlxsw_sp);
 	mlxsw_sp_counter_pool_fini(mlxsw_sp);
 	mlxsw_sp_switchdev_fini(mlxsw_sp);
@@ -4566,6 +4587,41 @@ static void mlxsw_sp_port_ovs_leave(struct mlxsw_sp_port *mlxsw_sp_port)
 	mlxsw_sp_port_vp_mode_set(mlxsw_sp_port, false);
 }
 
+static bool mlxsw_sp_bridge_has_multiple_vxlans(struct net_device *br_dev)
+{
+	unsigned int num_vxlans = 0;
+	struct net_device *dev;
+	struct list_head *iter;
+
+	netdev_for_each_lower_dev(br_dev, dev, iter) {
+		if (netif_is_vxlan(dev))
+			num_vxlans++;
+	}
+
+	return num_vxlans > 1;
+}
+
+static bool mlxsw_sp_bridge_vxlan_is_valid(struct net_device *br_dev,
+					   struct netlink_ext_ack *extack)
+{
+	if (br_multicast_enabled(br_dev)) {
+		NL_SET_ERR_MSG_MOD(extack, "Multicast can not be enabled on a bridge with a VxLAN device");
+		return false;
+	}
+
+	if (br_vlan_enabled(br_dev)) {
+		NL_SET_ERR_MSG_MOD(extack, "VLAN filtering can not be enabled on a bridge with a VxLAN device");
+		return false;
+	}
+
+	if (mlxsw_sp_bridge_has_multiple_vxlans(br_dev)) {
+		NL_SET_ERR_MSG_MOD(extack, "Multiple VxLAN devices are not supported in a VLAN-unaware bridge");
+		return false;
+	}
+
+	return true;
+}
+
 static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
 					       struct net_device *dev,
 					       unsigned long event, void *ptr)
@@ -4595,6 +4651,11 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
 		}
 		if (!info->linking)
 			break;
+		if (netif_is_bridge_master(upper_dev) &&
+		    !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, upper_dev) &&
+		    mlxsw_sp_bridge_has_vxlan(upper_dev) &&
+		    !mlxsw_sp_bridge_vxlan_is_valid(upper_dev, extack))
+			return -EOPNOTSUPP;
 		if (netdev_has_any_upper_dev(upper_dev) &&
 		    (!netif_is_bridge_master(upper_dev) ||
 		     !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp,
@@ -4752,6 +4813,11 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev,
 		}
 		if (!info->linking)
 			break;
+		if (netif_is_bridge_master(upper_dev) &&
+		    !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, upper_dev) &&
+		    mlxsw_sp_bridge_has_vxlan(upper_dev) &&
+		    !mlxsw_sp_bridge_vxlan_is_valid(upper_dev, extack))
+			return -EOPNOTSUPP;
 		if (netdev_has_any_upper_dev(upper_dev) &&
 		    (!netif_is_bridge_master(upper_dev) ||
 		     !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp,
@@ -4898,6 +4964,63 @@ static bool mlxsw_sp_is_vrf_event(unsigned long event, void *ptr)
 	return netif_is_l3_master(info->upper_dev);
 }
 
+static int mlxsw_sp_netdevice_vxlan_event(struct mlxsw_sp *mlxsw_sp,
+					  struct net_device *dev,
+					  unsigned long event, void *ptr)
+{
+	struct netdev_notifier_changeupper_info *cu_info;
+	struct netdev_notifier_info *info = ptr;
+	struct netlink_ext_ack *extack;
+	struct net_device *upper_dev;
+
+	extack = netdev_notifier_info_to_extack(info);
+
+	switch (event) {
+	case NETDEV_CHANGEUPPER:
+		cu_info = container_of(info,
+				       struct netdev_notifier_changeupper_info,
+				       info);
+		upper_dev = cu_info->upper_dev;
+		if (!netif_is_bridge_master(upper_dev))
+			return 0;
+		if (!mlxsw_sp_lower_get(upper_dev))
+			return 0;
+		if (!mlxsw_sp_bridge_vxlan_is_valid(upper_dev, extack))
+			return -EOPNOTSUPP;
+		if (cu_info->linking) {
+			if (!netif_running(dev))
+				return 0;
+			return mlxsw_sp_bridge_vxlan_join(mlxsw_sp, upper_dev,
+							  dev, extack);
+		} else {
+			mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, upper_dev, dev);
+		}
+		break;
+	case NETDEV_PRE_UP:
+		upper_dev = netdev_master_upper_dev_get(dev);
+		if (!upper_dev)
+			return 0;
+		if (!netif_is_bridge_master(upper_dev))
+			return 0;
+		if (!mlxsw_sp_lower_get(upper_dev))
+			return 0;
+		return mlxsw_sp_bridge_vxlan_join(mlxsw_sp, upper_dev, dev,
+						  extack);
+	case NETDEV_DOWN:
+		upper_dev = netdev_master_upper_dev_get(dev);
+		if (!upper_dev)
+			return 0;
+		if (!netif_is_bridge_master(upper_dev))
+			return 0;
+		if (!mlxsw_sp_lower_get(upper_dev))
+			return 0;
+		mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, upper_dev, dev);
+		break;
+	}
+
+	return 0;
+}
+
 static int mlxsw_sp_netdevice_event(struct notifier_block *nb,
 				    unsigned long event, void *ptr)
 {
@@ -4914,6 +5037,8 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *nb,
 	}
 	mlxsw_sp_span_respin(mlxsw_sp);
 
+	if (netif_is_vxlan(dev))
+		err = mlxsw_sp_netdevice_vxlan_event(mlxsw_sp, dev, event, ptr);
 	if (mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev))
 		err = mlxsw_sp_netdevice_ipip_ol_event(mlxsw_sp, dev,
 						       event, ptr);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 1f68ac2a20f4..0875a79cbe7b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -16,6 +16,7 @@
 #include <net/psample.h>
 #include <net/pkt_cls.h>
 #include <net/red.h>
+#include <net/vxlan.h>
 
 #include "port.h"
 #include "core.h"
@@ -55,6 +56,8 @@ enum mlxsw_sp_resource_id {
 struct mlxsw_sp_port;
 struct mlxsw_sp_rif;
 struct mlxsw_sp_span_entry;
+enum mlxsw_sp_l3proto;
+union mlxsw_sp_l3addr;
 
 struct mlxsw_sp_upper {
 	struct net_device *dev;
@@ -113,9 +116,11 @@ struct mlxsw_sp_acl;
 struct mlxsw_sp_counter_pool;
 struct mlxsw_sp_fid_core;
 struct mlxsw_sp_kvdl;
+struct mlxsw_sp_nve;
 struct mlxsw_sp_kvdl_ops;
 struct mlxsw_sp_mr_tcam_ops;
 struct mlxsw_sp_acl_tcam_ops;
+struct mlxsw_sp_nve_ops;
 
 struct mlxsw_sp {
 	struct mlxsw_sp_port **ports;
@@ -132,6 +137,7 @@ struct mlxsw_sp {
 	struct mlxsw_sp_acl *acl;
 	struct mlxsw_sp_fid_core *fid_core;
 	struct mlxsw_sp_kvdl *kvdl;
+	struct mlxsw_sp_nve *nve;
 	struct notifier_block netdevice_nb;
 
 	struct mlxsw_sp_counter_pool *counter_pool;
@@ -146,6 +152,7 @@ struct mlxsw_sp {
 	const struct mlxsw_afk_ops *afk_ops;
 	const struct mlxsw_sp_mr_tcam_ops *mr_tcam_ops;
 	const struct mlxsw_sp_acl_tcam_ops *acl_tcam_ops;
+	const struct mlxsw_sp_nve_ops **nve_ops_arr;
 };
 
 static inline struct mlxsw_sp_upper *
@@ -235,6 +242,25 @@ struct mlxsw_sp_port {
 	struct mlxsw_sp_acl_block *eg_acl_block;
 };
 
+static inline struct net_device *
+mlxsw_sp_bridge_vxlan_dev_find(struct net_device *br_dev)
+{
+	struct net_device *dev;
+	struct list_head *iter;
+
+	netdev_for_each_lower_dev(br_dev, dev, iter) {
+		if (netif_is_vxlan(dev))
+			return dev;
+	}
+
+	return NULL;
+}
+
+static inline bool mlxsw_sp_bridge_has_vxlan(struct net_device *br_dev)
+{
+	return !!mlxsw_sp_bridge_vxlan_dev_find(br_dev);
+}
+
 static inline bool
 mlxsw_sp_port_is_pause_en(const struct mlxsw_sp_port *mlxsw_sp_port)
 {
@@ -330,6 +356,13 @@ void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port,
 				struct net_device *br_dev);
 bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp,
 					 const struct net_device *br_dev);
+int mlxsw_sp_bridge_vxlan_join(struct mlxsw_sp *mlxsw_sp,
+			       const struct net_device *br_dev,
+			       const struct net_device *vxlan_dev,
+			       struct netlink_ext_ack *extack);
+void mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp,
+				 const struct net_device *br_dev,
+				 const struct net_device *vxlan_dev);
 
 /* spectrum.c */
 int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port,
@@ -431,6 +464,15 @@ struct mlxsw_sp_rif *mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
 					      const struct net_device *dev);
 u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp);
 struct mlxsw_sp_fid *mlxsw_sp_rif_fid(const struct mlxsw_sp_rif *rif);
+int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
+				      enum mlxsw_sp_l3proto ul_proto,
+				      const union mlxsw_sp_l3addr *ul_sip,
+				      u32 tunnel_index);
+void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
+				      enum mlxsw_sp_l3proto ul_proto,
+				      const union mlxsw_sp_l3addr *ul_sip);
+int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
+				u16 *vr_id);
 
 /* spectrum_kvdl.c */
 enum mlxsw_sp_kvdl_entry_type {
@@ -679,6 +721,16 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
 			   struct tc_prio_qopt_offload *p);
 
 /* spectrum_fid.c */
+struct mlxsw_sp_fid *mlxsw_sp_fid_lookup_by_vni(struct mlxsw_sp *mlxsw_sp,
+						__be32 vni);
+int mlxsw_sp_fid_vni(const struct mlxsw_sp_fid *fid, __be32 *vni);
+int mlxsw_sp_fid_nve_flood_index_set(struct mlxsw_sp_fid *fid,
+				     u32 nve_flood_index);
+void mlxsw_sp_fid_nve_flood_index_clear(struct mlxsw_sp_fid *fid);
+bool mlxsw_sp_fid_nve_flood_index_is_set(const struct mlxsw_sp_fid *fid);
+int mlxsw_sp_fid_vni_set(struct mlxsw_sp_fid *fid, __be32 vni);
+void mlxsw_sp_fid_vni_clear(struct mlxsw_sp_fid *fid);
+bool mlxsw_sp_fid_vni_is_set(const struct mlxsw_sp_fid *fid);
 int mlxsw_sp_fid_flood_set(struct mlxsw_sp_fid *fid,
 			   enum mlxsw_sp_flood_type packet_type, u8 local_port,
 			   bool member);
@@ -697,6 +749,8 @@ u16 mlxsw_sp_fid_8021q_vid(const struct mlxsw_sp_fid *fid);
 struct mlxsw_sp_fid *mlxsw_sp_fid_8021q_get(struct mlxsw_sp *mlxsw_sp, u16 vid);
 struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_get(struct mlxsw_sp *mlxsw_sp,
 					    int br_ifindex);
+struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_lookup(struct mlxsw_sp *mlxsw_sp,
+					       int br_ifindex);
 struct mlxsw_sp_fid *mlxsw_sp_fid_rfid_get(struct mlxsw_sp *mlxsw_sp,
 					   u16 rif_index);
 struct mlxsw_sp_fid *mlxsw_sp_fid_dummy_get(struct mlxsw_sp *mlxsw_sp);
@@ -742,4 +796,39 @@ extern const struct mlxsw_sp_mr_tcam_ops mlxsw_sp1_mr_tcam_ops;
 /* spectrum2_mr_tcam.c */
 extern const struct mlxsw_sp_mr_tcam_ops mlxsw_sp2_mr_tcam_ops;
 
+/* spectrum_nve.c */
+enum mlxsw_sp_nve_type {
+	MLXSW_SP_NVE_TYPE_VXLAN,
+};
+
+struct mlxsw_sp_nve_params {
+	enum mlxsw_sp_nve_type type;
+	__be32 vni;
+	const struct net_device *dev;
+};
+
+extern const struct mlxsw_sp_nve_ops *mlxsw_sp1_nve_ops_arr[];
+extern const struct mlxsw_sp_nve_ops *mlxsw_sp2_nve_ops_arr[];
+
+int mlxsw_sp_nve_flood_ip_add(struct mlxsw_sp *mlxsw_sp,
+			      struct mlxsw_sp_fid *fid,
+			      enum mlxsw_sp_l3proto proto,
+			      union mlxsw_sp_l3addr *addr);
+void mlxsw_sp_nve_flood_ip_del(struct mlxsw_sp *mlxsw_sp,
+			       struct mlxsw_sp_fid *fid,
+			       enum mlxsw_sp_l3proto proto,
+			       union mlxsw_sp_l3addr *addr);
+u32 mlxsw_sp_nve_decap_tunnel_index_get(const struct mlxsw_sp *mlxsw_sp);
+bool mlxsw_sp_nve_ipv4_route_is_decap(const struct mlxsw_sp *mlxsw_sp,
+				      u32 tb_id, __be32 addr);
+int mlxsw_sp_nve_fid_enable(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *fid,
+			    struct mlxsw_sp_nve_params *params,
+			    struct netlink_ext_ack *extack);
+void mlxsw_sp_nve_fid_disable(struct mlxsw_sp *mlxsw_sp,
+			      struct mlxsw_sp_fid *fid);
+int mlxsw_sp_port_nve_init(struct mlxsw_sp_port *mlxsw_sp_port);
+void mlxsw_sp_port_nve_fini(struct mlxsw_sp_port *mlxsw_sp_port);
+int mlxsw_sp_nve_init(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_nve_fini(struct mlxsw_sp *mlxsw_sp);
+
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c
index 715d24ff937e..a3db033d7399 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c
@@ -6,6 +6,7 @@
 #include <linux/if_vlan.h>
 #include <linux/if_bridge.h>
 #include <linux/netdevice.h>
+#include <linux/rhashtable.h>
 #include <linux/rtnetlink.h>
 
 #include "spectrum.h"
@@ -14,6 +15,7 @@
 struct mlxsw_sp_fid_family;
 
 struct mlxsw_sp_fid_core {
+	struct rhashtable vni_ht;
 	struct mlxsw_sp_fid_family *fid_family_arr[MLXSW_SP_FID_TYPE_MAX];
 	unsigned int *port_fid_mappings;
 };
@@ -24,6 +26,12 @@ struct mlxsw_sp_fid {
 	unsigned int ref_count;
 	u16 fid_index;
 	struct mlxsw_sp_fid_family *fid_family;
+
+	struct rhash_head vni_ht_node;
+	__be32 vni;
+	u32 nve_flood_index;
+	u8 vni_valid:1,
+	   nve_flood_index_valid:1;
 };
 
 struct mlxsw_sp_fid_8021q {
@@ -36,6 +44,12 @@ struct mlxsw_sp_fid_8021d {
 	int br_ifindex;
 };
 
+static const struct rhashtable_params mlxsw_sp_fid_vni_ht_params = {
+	.key_len = sizeof_field(struct mlxsw_sp_fid, vni),
+	.key_offset = offsetof(struct mlxsw_sp_fid, vni),
+	.head_offset = offsetof(struct mlxsw_sp_fid, vni_ht_node),
+};
+
 struct mlxsw_sp_flood_table {
 	enum mlxsw_sp_flood_type packet_type;
 	enum mlxsw_reg_sfgc_bridge_type bridge_type;
@@ -56,6 +70,11 @@ struct mlxsw_sp_fid_ops {
 			    struct mlxsw_sp_port *port, u16 vid);
 	void (*port_vid_unmap)(struct mlxsw_sp_fid *fid,
 			       struct mlxsw_sp_port *port, u16 vid);
+	int (*vni_set)(struct mlxsw_sp_fid *fid, __be32 vni);
+	void (*vni_clear)(struct mlxsw_sp_fid *fid);
+	int (*nve_flood_index_set)(struct mlxsw_sp_fid *fid,
+				   u32 nve_flood_index);
+	void (*nve_flood_index_clear)(struct mlxsw_sp_fid *fid);
 };
 
 struct mlxsw_sp_fid_family {
@@ -94,6 +113,117 @@ static const int *mlxsw_sp_packet_type_sfgc_types[] = {
 	[MLXSW_SP_FLOOD_TYPE_MC]	= mlxsw_sp_sfgc_mc_packet_types,
 };
 
+struct mlxsw_sp_fid *mlxsw_sp_fid_lookup_by_vni(struct mlxsw_sp *mlxsw_sp,
+						__be32 vni)
+{
+	struct mlxsw_sp_fid *fid;
+
+	fid = rhashtable_lookup_fast(&mlxsw_sp->fid_core->vni_ht, &vni,
+				     mlxsw_sp_fid_vni_ht_params);
+	if (fid)
+		fid->ref_count++;
+
+	return fid;
+}
+
+int mlxsw_sp_fid_vni(const struct mlxsw_sp_fid *fid, __be32 *vni)
+{
+	if (!fid->vni_valid)
+		return -EINVAL;
+
+	*vni = fid->vni;
+
+	return 0;
+}
+
+int mlxsw_sp_fid_nve_flood_index_set(struct mlxsw_sp_fid *fid,
+				     u32 nve_flood_index)
+{
+	struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+	const struct mlxsw_sp_fid_ops *ops = fid_family->ops;
+	int err;
+
+	if (WARN_ON(!ops->nve_flood_index_set || fid->nve_flood_index_valid))
+		return -EINVAL;
+
+	err = ops->nve_flood_index_set(fid, nve_flood_index);
+	if (err)
+		return err;
+
+	fid->nve_flood_index = nve_flood_index;
+	fid->nve_flood_index_valid = true;
+
+	return 0;
+}
+
+void mlxsw_sp_fid_nve_flood_index_clear(struct mlxsw_sp_fid *fid)
+{
+	struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+	const struct mlxsw_sp_fid_ops *ops = fid_family->ops;
+
+	if (WARN_ON(!ops->nve_flood_index_clear || !fid->nve_flood_index_valid))
+		return;
+
+	fid->nve_flood_index_valid = false;
+	ops->nve_flood_index_clear(fid);
+}
+
+bool mlxsw_sp_fid_nve_flood_index_is_set(const struct mlxsw_sp_fid *fid)
+{
+	return fid->nve_flood_index_valid;
+}
+
+int mlxsw_sp_fid_vni_set(struct mlxsw_sp_fid *fid, __be32 vni)
+{
+	struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+	const struct mlxsw_sp_fid_ops *ops = fid_family->ops;
+	struct mlxsw_sp *mlxsw_sp = fid_family->mlxsw_sp;
+	int err;
+
+	if (WARN_ON(!ops->vni_set || fid->vni_valid))
+		return -EINVAL;
+
+	fid->vni = vni;
+	err = rhashtable_lookup_insert_fast(&mlxsw_sp->fid_core->vni_ht,
+					    &fid->vni_ht_node,
+					    mlxsw_sp_fid_vni_ht_params);
+	if (err)
+		return err;
+
+	err = ops->vni_set(fid, vni);
+	if (err)
+		goto err_vni_set;
+
+	fid->vni_valid = true;
+
+	return 0;
+
+err_vni_set:
+	rhashtable_remove_fast(&mlxsw_sp->fid_core->vni_ht, &fid->vni_ht_node,
+			       mlxsw_sp_fid_vni_ht_params);
+	return err;
+}
+
+void mlxsw_sp_fid_vni_clear(struct mlxsw_sp_fid *fid)
+{
+	struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+	const struct mlxsw_sp_fid_ops *ops = fid_family->ops;
+	struct mlxsw_sp *mlxsw_sp = fid_family->mlxsw_sp;
+
+	if (WARN_ON(!ops->vni_clear || !fid->vni_valid))
+		return;
+
+	fid->vni_valid = false;
+	ops->vni_clear(fid);
+	rhashtable_remove_fast(&mlxsw_sp->fid_core->vni_ht, &fid->vni_ht_node,
+			       mlxsw_sp_fid_vni_ht_params);
+}
+
+bool mlxsw_sp_fid_vni_is_set(const struct mlxsw_sp_fid *fid)
+{
+	return fid->vni_valid;
+}
+
 static const struct mlxsw_sp_flood_table *
 mlxsw_sp_fid_flood_table_lookup(const struct mlxsw_sp_fid *fid,
 				enum mlxsw_sp_flood_type packet_type)
@@ -217,6 +347,21 @@ static int mlxsw_sp_fid_op(struct mlxsw_sp *mlxsw_sp, u16 fid_index,
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl);
 }
 
+static int mlxsw_sp_fid_vni_op(struct mlxsw_sp *mlxsw_sp, u16 fid_index,
+			       __be32 vni, bool vni_valid, u32 nve_flood_index,
+			       bool nve_flood_index_valid)
+{
+	char sfmr_pl[MLXSW_REG_SFMR_LEN];
+
+	mlxsw_reg_sfmr_pack(sfmr_pl, MLXSW_REG_SFMR_OP_CREATE_FID, fid_index,
+			    0);
+	mlxsw_reg_sfmr_vv_set(sfmr_pl, vni_valid);
+	mlxsw_reg_sfmr_vni_set(sfmr_pl, be32_to_cpu(vni));
+	mlxsw_reg_sfmr_vtfp_set(sfmr_pl, nve_flood_index_valid);
+	mlxsw_reg_sfmr_nve_tunnel_flood_ptr_set(sfmr_pl, nve_flood_index);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl);
+}
+
 static int mlxsw_sp_fid_vid_map(struct mlxsw_sp *mlxsw_sp, u16 fid_index,
 				u16 vid, bool valid)
 {
@@ -393,6 +538,8 @@ static int mlxsw_sp_fid_8021d_configure(struct mlxsw_sp_fid *fid)
 
 static void mlxsw_sp_fid_8021d_deconfigure(struct mlxsw_sp_fid *fid)
 {
+	if (fid->vni_valid)
+		mlxsw_sp_nve_fid_disable(fid->fid_family->mlxsw_sp, fid);
 	mlxsw_sp_fid_op(fid->fid_family->mlxsw_sp, fid->fid_index, 0, false);
 }
 
@@ -531,6 +678,41 @@ mlxsw_sp_fid_8021d_port_vid_unmap(struct mlxsw_sp_fid *fid,
 				    mlxsw_sp_port->local_port, vid, false);
 }
 
+static int mlxsw_sp_fid_8021d_vni_set(struct mlxsw_sp_fid *fid, __be32 vni)
+{
+	struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+
+	return mlxsw_sp_fid_vni_op(fid_family->mlxsw_sp, fid->fid_index, vni,
+				   true, fid->nve_flood_index,
+				   fid->nve_flood_index_valid);
+}
+
+static void mlxsw_sp_fid_8021d_vni_clear(struct mlxsw_sp_fid *fid)
+{
+	struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+
+	mlxsw_sp_fid_vni_op(fid_family->mlxsw_sp, fid->fid_index, 0, false,
+			    fid->nve_flood_index, fid->nve_flood_index_valid);
+}
+
+static int mlxsw_sp_fid_8021d_nve_flood_index_set(struct mlxsw_sp_fid *fid,
+						  u32 nve_flood_index)
+{
+	struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+
+	return mlxsw_sp_fid_vni_op(fid_family->mlxsw_sp, fid->fid_index,
+				   fid->vni, fid->vni_valid, nve_flood_index,
+				   true);
+}
+
+static void mlxsw_sp_fid_8021d_nve_flood_index_clear(struct mlxsw_sp_fid *fid)
+{
+	struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+
+	mlxsw_sp_fid_vni_op(fid_family->mlxsw_sp, fid->fid_index, fid->vni,
+			    fid->vni_valid, 0, false);
+}
+
 static const struct mlxsw_sp_fid_ops mlxsw_sp_fid_8021d_ops = {
 	.setup			= mlxsw_sp_fid_8021d_setup,
 	.configure		= mlxsw_sp_fid_8021d_configure,
@@ -540,6 +722,10 @@ static const struct mlxsw_sp_fid_ops mlxsw_sp_fid_8021d_ops = {
 	.flood_index		= mlxsw_sp_fid_8021d_flood_index,
 	.port_vid_map		= mlxsw_sp_fid_8021d_port_vid_map,
 	.port_vid_unmap		= mlxsw_sp_fid_8021d_port_vid_unmap,
+	.vni_set		= mlxsw_sp_fid_8021d_vni_set,
+	.vni_clear		= mlxsw_sp_fid_8021d_vni_clear,
+	.nve_flood_index_set	= mlxsw_sp_fid_8021d_nve_flood_index_set,
+	.nve_flood_index_clear	= mlxsw_sp_fid_8021d_nve_flood_index_clear,
 };
 
 static const struct mlxsw_sp_flood_table mlxsw_sp_fid_8021d_flood_tables[] = {
@@ -708,14 +894,12 @@ static const struct mlxsw_sp_fid_family *mlxsw_sp_fid_family_arr[] = {
 	[MLXSW_SP_FID_TYPE_DUMMY]	= &mlxsw_sp_fid_dummy_family,
 };
 
-static struct mlxsw_sp_fid *mlxsw_sp_fid_get(struct mlxsw_sp *mlxsw_sp,
-					     enum mlxsw_sp_fid_type type,
-					     const void *arg)
+static struct mlxsw_sp_fid *mlxsw_sp_fid_lookup(struct mlxsw_sp *mlxsw_sp,
+						enum mlxsw_sp_fid_type type,
+						const void *arg)
 {
 	struct mlxsw_sp_fid_family *fid_family;
 	struct mlxsw_sp_fid *fid;
-	u16 fid_index;
-	int err;
 
 	fid_family = mlxsw_sp->fid_core->fid_family_arr[type];
 	list_for_each_entry(fid, &fid_family->fids_list, list) {
@@ -725,6 +909,23 @@ static struct mlxsw_sp_fid *mlxsw_sp_fid_get(struct mlxsw_sp *mlxsw_sp,
 		return fid;
 	}
 
+	return NULL;
+}
+
+static struct mlxsw_sp_fid *mlxsw_sp_fid_get(struct mlxsw_sp *mlxsw_sp,
+					     enum mlxsw_sp_fid_type type,
+					     const void *arg)
+{
+	struct mlxsw_sp_fid_family *fid_family;
+	struct mlxsw_sp_fid *fid;
+	u16 fid_index;
+	int err;
+
+	fid = mlxsw_sp_fid_lookup(mlxsw_sp, type, arg);
+	if (fid)
+		return fid;
+
+	fid_family = mlxsw_sp->fid_core->fid_family_arr[type];
 	fid = kzalloc(fid_family->fid_size, GFP_KERNEL);
 	if (!fid)
 		return ERR_PTR(-ENOMEM);
@@ -784,6 +985,13 @@ struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_get(struct mlxsw_sp *mlxsw_sp,
 	return mlxsw_sp_fid_get(mlxsw_sp, MLXSW_SP_FID_TYPE_8021D, &br_ifindex);
 }
 
+struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_lookup(struct mlxsw_sp *mlxsw_sp,
+					       int br_ifindex)
+{
+	return mlxsw_sp_fid_lookup(mlxsw_sp, MLXSW_SP_FID_TYPE_8021D,
+				   &br_ifindex);
+}
+
 struct mlxsw_sp_fid *mlxsw_sp_fid_rfid_get(struct mlxsw_sp *mlxsw_sp,
 					   u16 rif_index)
 {
@@ -918,6 +1126,10 @@ int mlxsw_sp_fids_init(struct mlxsw_sp *mlxsw_sp)
 		return -ENOMEM;
 	mlxsw_sp->fid_core = fid_core;
 
+	err = rhashtable_init(&fid_core->vni_ht, &mlxsw_sp_fid_vni_ht_params);
+	if (err)
+		goto err_rhashtable_init;
+
 	fid_core->port_fid_mappings = kcalloc(max_ports, sizeof(unsigned int),
 					      GFP_KERNEL);
 	if (!fid_core->port_fid_mappings) {
@@ -944,6 +1156,8 @@ err_fid_ops_register:
 	}
 	kfree(fid_core->port_fid_mappings);
 err_alloc_port_fid_mappings:
+	rhashtable_destroy(&fid_core->vni_ht);
+err_rhashtable_init:
 	kfree(fid_core);
 	return err;
 }
@@ -957,5 +1171,6 @@ void mlxsw_sp_fids_fini(struct mlxsw_sp *mlxsw_sp)
 		mlxsw_sp_fid_family_unregister(mlxsw_sp,
 					       fid_core->fid_family_arr[i]);
 	kfree(fid_core->port_fid_mappings);
+	rhashtable_destroy(&fid_core->vni_ht);
 	kfree(fid_core);
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c
new file mode 100644
index 000000000000..ad06d9969bc1
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c
@@ -0,0 +1,982 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/err.h>
+#include <linux/gfp.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/slab.h>
+#include <net/inet_ecn.h>
+#include <net/ipv6.h>
+
+#include "reg.h"
+#include "spectrum.h"
+#include "spectrum_nve.h"
+
+const struct mlxsw_sp_nve_ops *mlxsw_sp1_nve_ops_arr[] = {
+	[MLXSW_SP_NVE_TYPE_VXLAN]	= &mlxsw_sp1_nve_vxlan_ops,
+};
+
+const struct mlxsw_sp_nve_ops *mlxsw_sp2_nve_ops_arr[] = {
+	[MLXSW_SP_NVE_TYPE_VXLAN]	= &mlxsw_sp2_nve_vxlan_ops,
+};
+
+struct mlxsw_sp_nve_mc_entry;
+struct mlxsw_sp_nve_mc_record;
+struct mlxsw_sp_nve_mc_list;
+
+struct mlxsw_sp_nve_mc_record_ops {
+	enum mlxsw_reg_tnumt_record_type type;
+	int (*entry_add)(struct mlxsw_sp_nve_mc_record *mc_record,
+			 struct mlxsw_sp_nve_mc_entry *mc_entry,
+			 const union mlxsw_sp_l3addr *addr);
+	void (*entry_del)(const struct mlxsw_sp_nve_mc_record *mc_record,
+			  const struct mlxsw_sp_nve_mc_entry *mc_entry);
+	void (*entry_set)(const struct mlxsw_sp_nve_mc_record *mc_record,
+			  const struct mlxsw_sp_nve_mc_entry *mc_entry,
+			  char *tnumt_pl, unsigned int entry_index);
+	bool (*entry_compare)(const struct mlxsw_sp_nve_mc_record *mc_record,
+			      const struct mlxsw_sp_nve_mc_entry *mc_entry,
+			      const union mlxsw_sp_l3addr *addr);
+};
+
+struct mlxsw_sp_nve_mc_list_key {
+	u16 fid_index;
+};
+
+struct mlxsw_sp_nve_mc_ipv6_entry {
+	struct in6_addr addr6;
+	u32 addr6_kvdl_index;
+};
+
+struct mlxsw_sp_nve_mc_entry {
+	union {
+		__be32 addr4;
+		struct mlxsw_sp_nve_mc_ipv6_entry ipv6_entry;
+	};
+	u8 valid:1;
+};
+
+struct mlxsw_sp_nve_mc_record {
+	struct list_head list;
+	enum mlxsw_sp_l3proto proto;
+	unsigned int num_entries;
+	struct mlxsw_sp *mlxsw_sp;
+	struct mlxsw_sp_nve_mc_list *mc_list;
+	const struct mlxsw_sp_nve_mc_record_ops *ops;
+	u32 kvdl_index;
+	struct mlxsw_sp_nve_mc_entry entries[0];
+};
+
+struct mlxsw_sp_nve_mc_list {
+	struct list_head records_list;
+	struct rhash_head ht_node;
+	struct mlxsw_sp_nve_mc_list_key key;
+};
+
+static const struct rhashtable_params mlxsw_sp_nve_mc_list_ht_params = {
+	.key_len = sizeof(struct mlxsw_sp_nve_mc_list_key),
+	.key_offset = offsetof(struct mlxsw_sp_nve_mc_list, key),
+	.head_offset = offsetof(struct mlxsw_sp_nve_mc_list, ht_node),
+};
+
+static int
+mlxsw_sp_nve_mc_record_ipv4_entry_add(struct mlxsw_sp_nve_mc_record *mc_record,
+				      struct mlxsw_sp_nve_mc_entry *mc_entry,
+				      const union mlxsw_sp_l3addr *addr)
+{
+	mc_entry->addr4 = addr->addr4;
+
+	return 0;
+}
+
+static void
+mlxsw_sp_nve_mc_record_ipv4_entry_del(const struct mlxsw_sp_nve_mc_record *mc_record,
+				      const struct mlxsw_sp_nve_mc_entry *mc_entry)
+{
+}
+
+static void
+mlxsw_sp_nve_mc_record_ipv4_entry_set(const struct mlxsw_sp_nve_mc_record *mc_record,
+				      const struct mlxsw_sp_nve_mc_entry *mc_entry,
+				      char *tnumt_pl, unsigned int entry_index)
+{
+	u32 udip = be32_to_cpu(mc_entry->addr4);
+
+	mlxsw_reg_tnumt_udip_set(tnumt_pl, entry_index, udip);
+}
+
+static bool
+mlxsw_sp_nve_mc_record_ipv4_entry_compare(const struct mlxsw_sp_nve_mc_record *mc_record,
+					  const struct mlxsw_sp_nve_mc_entry *mc_entry,
+					  const union mlxsw_sp_l3addr *addr)
+{
+	return mc_entry->addr4 == addr->addr4;
+}
+
+static const struct mlxsw_sp_nve_mc_record_ops
+mlxsw_sp_nve_mc_record_ipv4_ops = {
+	.type		= MLXSW_REG_TNUMT_RECORD_TYPE_IPV4,
+	.entry_add	= &mlxsw_sp_nve_mc_record_ipv4_entry_add,
+	.entry_del	= &mlxsw_sp_nve_mc_record_ipv4_entry_del,
+	.entry_set	= &mlxsw_sp_nve_mc_record_ipv4_entry_set,
+	.entry_compare	= &mlxsw_sp_nve_mc_record_ipv4_entry_compare,
+};
+
+static int
+mlxsw_sp_nve_mc_record_ipv6_entry_add(struct mlxsw_sp_nve_mc_record *mc_record,
+				      struct mlxsw_sp_nve_mc_entry *mc_entry,
+				      const union mlxsw_sp_l3addr *addr)
+{
+	WARN_ON(1);
+
+	return -EINVAL;
+}
+
+static void
+mlxsw_sp_nve_mc_record_ipv6_entry_del(const struct mlxsw_sp_nve_mc_record *mc_record,
+				      const struct mlxsw_sp_nve_mc_entry *mc_entry)
+{
+}
+
+static void
+mlxsw_sp_nve_mc_record_ipv6_entry_set(const struct mlxsw_sp_nve_mc_record *mc_record,
+				      const struct mlxsw_sp_nve_mc_entry *mc_entry,
+				      char *tnumt_pl, unsigned int entry_index)
+{
+	u32 udip_ptr = mc_entry->ipv6_entry.addr6_kvdl_index;
+
+	mlxsw_reg_tnumt_udip_ptr_set(tnumt_pl, entry_index, udip_ptr);
+}
+
+static bool
+mlxsw_sp_nve_mc_record_ipv6_entry_compare(const struct mlxsw_sp_nve_mc_record *mc_record,
+					  const struct mlxsw_sp_nve_mc_entry *mc_entry,
+					  const union mlxsw_sp_l3addr *addr)
+{
+	return ipv6_addr_equal(&mc_entry->ipv6_entry.addr6, &addr->addr6);
+}
+
+static const struct mlxsw_sp_nve_mc_record_ops
+mlxsw_sp_nve_mc_record_ipv6_ops = {
+	.type		= MLXSW_REG_TNUMT_RECORD_TYPE_IPV6,
+	.entry_add	= &mlxsw_sp_nve_mc_record_ipv6_entry_add,
+	.entry_del	= &mlxsw_sp_nve_mc_record_ipv6_entry_del,
+	.entry_set	= &mlxsw_sp_nve_mc_record_ipv6_entry_set,
+	.entry_compare	= &mlxsw_sp_nve_mc_record_ipv6_entry_compare,
+};
+
+static const struct mlxsw_sp_nve_mc_record_ops *
+mlxsw_sp_nve_mc_record_ops_arr[] = {
+	[MLXSW_SP_L3_PROTO_IPV4] = &mlxsw_sp_nve_mc_record_ipv4_ops,
+	[MLXSW_SP_L3_PROTO_IPV6] = &mlxsw_sp_nve_mc_record_ipv6_ops,
+};
+
+static struct mlxsw_sp_nve_mc_list *
+mlxsw_sp_nve_mc_list_find(struct mlxsw_sp *mlxsw_sp,
+			  const struct mlxsw_sp_nve_mc_list_key *key)
+{
+	struct mlxsw_sp_nve *nve = mlxsw_sp->nve;
+
+	return rhashtable_lookup_fast(&nve->mc_list_ht, key,
+				      mlxsw_sp_nve_mc_list_ht_params);
+}
+
+static struct mlxsw_sp_nve_mc_list *
+mlxsw_sp_nve_mc_list_create(struct mlxsw_sp *mlxsw_sp,
+			    const struct mlxsw_sp_nve_mc_list_key *key)
+{
+	struct mlxsw_sp_nve *nve = mlxsw_sp->nve;
+	struct mlxsw_sp_nve_mc_list *mc_list;
+	int err;
+
+	mc_list = kmalloc(sizeof(*mc_list), GFP_KERNEL);
+	if (!mc_list)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&mc_list->records_list);
+	mc_list->key = *key;
+
+	err = rhashtable_insert_fast(&nve->mc_list_ht, &mc_list->ht_node,
+				     mlxsw_sp_nve_mc_list_ht_params);
+	if (err)
+		goto err_rhashtable_insert;
+
+	return mc_list;
+
+err_rhashtable_insert:
+	kfree(mc_list);
+	return ERR_PTR(err);
+}
+
+static void mlxsw_sp_nve_mc_list_destroy(struct mlxsw_sp *mlxsw_sp,
+					 struct mlxsw_sp_nve_mc_list *mc_list)
+{
+	struct mlxsw_sp_nve *nve = mlxsw_sp->nve;
+
+	rhashtable_remove_fast(&nve->mc_list_ht, &mc_list->ht_node,
+			       mlxsw_sp_nve_mc_list_ht_params);
+	WARN_ON(!list_empty(&mc_list->records_list));
+	kfree(mc_list);
+}
+
+static struct mlxsw_sp_nve_mc_list *
+mlxsw_sp_nve_mc_list_get(struct mlxsw_sp *mlxsw_sp,
+			 const struct mlxsw_sp_nve_mc_list_key *key)
+{
+	struct mlxsw_sp_nve_mc_list *mc_list;
+
+	mc_list = mlxsw_sp_nve_mc_list_find(mlxsw_sp, key);
+	if (mc_list)
+		return mc_list;
+
+	return mlxsw_sp_nve_mc_list_create(mlxsw_sp, key);
+}
+
+static void
+mlxsw_sp_nve_mc_list_put(struct mlxsw_sp *mlxsw_sp,
+			 struct mlxsw_sp_nve_mc_list *mc_list)
+{
+	if (!list_empty(&mc_list->records_list))
+		return;
+	mlxsw_sp_nve_mc_list_destroy(mlxsw_sp, mc_list);
+}
+
+static struct mlxsw_sp_nve_mc_record *
+mlxsw_sp_nve_mc_record_create(struct mlxsw_sp *mlxsw_sp,
+			      struct mlxsw_sp_nve_mc_list *mc_list,
+			      enum mlxsw_sp_l3proto proto)
+{
+	unsigned int num_max_entries = mlxsw_sp->nve->num_max_mc_entries[proto];
+	struct mlxsw_sp_nve_mc_record *mc_record;
+	int err;
+
+	mc_record = kzalloc(sizeof(*mc_record) + num_max_entries *
+			    sizeof(struct mlxsw_sp_nve_mc_entry), GFP_KERNEL);
+	if (!mc_record)
+		return ERR_PTR(-ENOMEM);
+
+	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_TNUMT, 1,
+				  &mc_record->kvdl_index);
+	if (err)
+		goto err_kvdl_alloc;
+
+	mc_record->ops = mlxsw_sp_nve_mc_record_ops_arr[proto];
+	mc_record->mlxsw_sp = mlxsw_sp;
+	mc_record->mc_list = mc_list;
+	mc_record->proto = proto;
+	list_add_tail(&mc_record->list, &mc_list->records_list);
+
+	return mc_record;
+
+err_kvdl_alloc:
+	kfree(mc_record);
+	return ERR_PTR(err);
+}
+
+static void
+mlxsw_sp_nve_mc_record_destroy(struct mlxsw_sp_nve_mc_record *mc_record)
+{
+	struct mlxsw_sp *mlxsw_sp = mc_record->mlxsw_sp;
+
+	list_del(&mc_record->list);
+	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_TNUMT, 1,
+			   mc_record->kvdl_index);
+	WARN_ON(mc_record->num_entries);
+	kfree(mc_record);
+}
+
+static struct mlxsw_sp_nve_mc_record *
+mlxsw_sp_nve_mc_record_get(struct mlxsw_sp *mlxsw_sp,
+			   struct mlxsw_sp_nve_mc_list *mc_list,
+			   enum mlxsw_sp_l3proto proto)
+{
+	struct mlxsw_sp_nve_mc_record *mc_record;
+
+	list_for_each_entry_reverse(mc_record, &mc_list->records_list, list) {
+		unsigned int num_entries = mc_record->num_entries;
+		struct mlxsw_sp_nve *nve = mlxsw_sp->nve;
+
+		if (mc_record->proto == proto &&
+		    num_entries < nve->num_max_mc_entries[proto])
+			return mc_record;
+	}
+
+	return mlxsw_sp_nve_mc_record_create(mlxsw_sp, mc_list, proto);
+}
+
+static void
+mlxsw_sp_nve_mc_record_put(struct mlxsw_sp_nve_mc_record *mc_record)
+{
+	if (mc_record->num_entries != 0)
+		return;
+
+	mlxsw_sp_nve_mc_record_destroy(mc_record);
+}
+
+static struct mlxsw_sp_nve_mc_entry *
+mlxsw_sp_nve_mc_free_entry_find(struct mlxsw_sp_nve_mc_record *mc_record)
+{
+	struct mlxsw_sp_nve *nve = mc_record->mlxsw_sp->nve;
+	unsigned int num_max_entries;
+	int i;
+
+	num_max_entries = nve->num_max_mc_entries[mc_record->proto];
+	for (i = 0; i < num_max_entries; i++) {
+		if (mc_record->entries[i].valid)
+			continue;
+		return &mc_record->entries[i];
+	}
+
+	return NULL;
+}
+
+static int
+mlxsw_sp_nve_mc_record_refresh(struct mlxsw_sp_nve_mc_record *mc_record)
+{
+	enum mlxsw_reg_tnumt_record_type type = mc_record->ops->type;
+	struct mlxsw_sp_nve_mc_list *mc_list = mc_record->mc_list;
+	struct mlxsw_sp *mlxsw_sp = mc_record->mlxsw_sp;
+	char tnumt_pl[MLXSW_REG_TNUMT_LEN];
+	unsigned int num_max_entries;
+	unsigned int num_entries = 0;
+	u32 next_kvdl_index = 0;
+	bool next_valid = false;
+	int i;
+
+	if (!list_is_last(&mc_record->list, &mc_list->records_list)) {
+		struct mlxsw_sp_nve_mc_record *next_record;
+
+		next_record = list_next_entry(mc_record, list);
+		next_kvdl_index = next_record->kvdl_index;
+		next_valid = true;
+	}
+
+	mlxsw_reg_tnumt_pack(tnumt_pl, type, MLXSW_REG_TNUMT_TUNNEL_PORT_NVE,
+			     mc_record->kvdl_index, next_valid,
+			     next_kvdl_index, mc_record->num_entries);
+
+	num_max_entries = mlxsw_sp->nve->num_max_mc_entries[mc_record->proto];
+	for (i = 0; i < num_max_entries; i++) {
+		struct mlxsw_sp_nve_mc_entry *mc_entry;
+
+		mc_entry = &mc_record->entries[i];
+		if (!mc_entry->valid)
+			continue;
+		mc_record->ops->entry_set(mc_record, mc_entry, tnumt_pl,
+					  num_entries++);
+	}
+
+	WARN_ON(num_entries != mc_record->num_entries);
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tnumt), tnumt_pl);
+}
+
+static bool
+mlxsw_sp_nve_mc_record_is_first(struct mlxsw_sp_nve_mc_record *mc_record)
+{
+	struct mlxsw_sp_nve_mc_list *mc_list = mc_record->mc_list;
+	struct mlxsw_sp_nve_mc_record *first_record;
+
+	first_record = list_first_entry(&mc_list->records_list,
+					struct mlxsw_sp_nve_mc_record, list);
+
+	return mc_record == first_record;
+}
+
+static struct mlxsw_sp_nve_mc_entry *
+mlxsw_sp_nve_mc_entry_find(struct mlxsw_sp_nve_mc_record *mc_record,
+			   union mlxsw_sp_l3addr *addr)
+{
+	struct mlxsw_sp_nve *nve = mc_record->mlxsw_sp->nve;
+	unsigned int num_max_entries;
+	int i;
+
+	num_max_entries = nve->num_max_mc_entries[mc_record->proto];
+	for (i = 0; i < num_max_entries; i++) {
+		struct mlxsw_sp_nve_mc_entry *mc_entry;
+
+		mc_entry = &mc_record->entries[i];
+		if (!mc_entry->valid)
+			continue;
+		if (mc_record->ops->entry_compare(mc_record, mc_entry, addr))
+			return mc_entry;
+	}
+
+	return NULL;
+}
+
+static int
+mlxsw_sp_nve_mc_record_ip_add(struct mlxsw_sp_nve_mc_record *mc_record,
+			      union mlxsw_sp_l3addr *addr)
+{
+	struct mlxsw_sp_nve_mc_entry *mc_entry = NULL;
+	int err;
+
+	mc_entry = mlxsw_sp_nve_mc_free_entry_find(mc_record);
+	if (WARN_ON(!mc_entry))
+		return -EINVAL;
+
+	err = mc_record->ops->entry_add(mc_record, mc_entry, addr);
+	if (err)
+		return err;
+	mc_record->num_entries++;
+	mc_entry->valid = true;
+
+	err = mlxsw_sp_nve_mc_record_refresh(mc_record);
+	if (err)
+		goto err_record_refresh;
+
+	/* If this is a new record and not the first one, then we need to
+	 * update the next pointer of the previous entry
+	 */
+	if (mc_record->num_entries != 1 ||
+	    mlxsw_sp_nve_mc_record_is_first(mc_record))
+		return 0;
+
+	err = mlxsw_sp_nve_mc_record_refresh(list_prev_entry(mc_record, list));
+	if (err)
+		goto err_prev_record_refresh;
+
+	return 0;
+
+err_prev_record_refresh:
+err_record_refresh:
+	mc_entry->valid = false;
+	mc_record->num_entries--;
+	mc_record->ops->entry_del(mc_record, mc_entry);
+	return err;
+}
+
+static void
+mlxsw_sp_nve_mc_record_entry_del(struct mlxsw_sp_nve_mc_record *mc_record,
+				 struct mlxsw_sp_nve_mc_entry *mc_entry)
+{
+	struct mlxsw_sp_nve_mc_list *mc_list = mc_record->mc_list;
+
+	mc_entry->valid = false;
+	mc_record->num_entries--;
+
+	/* When the record continues to exist we only need to invalidate
+	 * the requested entry
+	 */
+	if (mc_record->num_entries != 0) {
+		mlxsw_sp_nve_mc_record_refresh(mc_record);
+		mc_record->ops->entry_del(mc_record, mc_entry);
+		return;
+	}
+
+	/* If the record needs to be deleted, but it is not the first,
+	 * then we need to make sure that the previous record no longer
+	 * points to it. Remove deleted record from the list to reflect
+	 * that and then re-add it at the end, so that it could be
+	 * properly removed by the record destruction code
+	 */
+	if (!mlxsw_sp_nve_mc_record_is_first(mc_record)) {
+		struct mlxsw_sp_nve_mc_record *prev_record;
+
+		prev_record = list_prev_entry(mc_record, list);
+		list_del(&mc_record->list);
+		mlxsw_sp_nve_mc_record_refresh(prev_record);
+		list_add_tail(&mc_record->list, &mc_list->records_list);
+		mc_record->ops->entry_del(mc_record, mc_entry);
+		return;
+	}
+
+	/* If the first record needs to be deleted, but the list is not
+	 * singular, then the second record needs to be written in the
+	 * first record's address, as this address is stored as a property
+	 * of the FID
+	 */
+	if (mlxsw_sp_nve_mc_record_is_first(mc_record) &&
+	    !list_is_singular(&mc_list->records_list)) {
+		struct mlxsw_sp_nve_mc_record *next_record;
+
+		next_record = list_next_entry(mc_record, list);
+		swap(mc_record->kvdl_index, next_record->kvdl_index);
+		mlxsw_sp_nve_mc_record_refresh(next_record);
+		mc_record->ops->entry_del(mc_record, mc_entry);
+		return;
+	}
+
+	/* This is the last case where the last remaining record needs to
+	 * be deleted. Simply delete the entry
+	 */
+	mc_record->ops->entry_del(mc_record, mc_entry);
+}
+
+static struct mlxsw_sp_nve_mc_record *
+mlxsw_sp_nve_mc_record_find(struct mlxsw_sp_nve_mc_list *mc_list,
+			    enum mlxsw_sp_l3proto proto,
+			    union mlxsw_sp_l3addr *addr,
+			    struct mlxsw_sp_nve_mc_entry **mc_entry)
+{
+	struct mlxsw_sp_nve_mc_record *mc_record;
+
+	list_for_each_entry(mc_record, &mc_list->records_list, list) {
+		if (mc_record->proto != proto)
+			continue;
+
+		*mc_entry = mlxsw_sp_nve_mc_entry_find(mc_record, addr);
+		if (*mc_entry)
+			return mc_record;
+	}
+
+	return NULL;
+}
+
+static int mlxsw_sp_nve_mc_list_ip_add(struct mlxsw_sp *mlxsw_sp,
+				       struct mlxsw_sp_nve_mc_list *mc_list,
+				       enum mlxsw_sp_l3proto proto,
+				       union mlxsw_sp_l3addr *addr)
+{
+	struct mlxsw_sp_nve_mc_record *mc_record;
+	int err;
+
+	mc_record = mlxsw_sp_nve_mc_record_get(mlxsw_sp, mc_list, proto);
+	if (IS_ERR(mc_record))
+		return PTR_ERR(mc_record);
+
+	err = mlxsw_sp_nve_mc_record_ip_add(mc_record, addr);
+	if (err)
+		goto err_ip_add;
+
+	return 0;
+
+err_ip_add:
+	mlxsw_sp_nve_mc_record_put(mc_record);
+	return err;
+}
+
+static void mlxsw_sp_nve_mc_list_ip_del(struct mlxsw_sp *mlxsw_sp,
+					struct mlxsw_sp_nve_mc_list *mc_list,
+					enum mlxsw_sp_l3proto proto,
+					union mlxsw_sp_l3addr *addr)
+{
+	struct mlxsw_sp_nve_mc_record *mc_record;
+	struct mlxsw_sp_nve_mc_entry *mc_entry;
+
+	mc_record = mlxsw_sp_nve_mc_record_find(mc_list, proto, addr,
+						&mc_entry);
+	if (WARN_ON(!mc_record))
+		return;
+
+	mlxsw_sp_nve_mc_record_entry_del(mc_record, mc_entry);
+	mlxsw_sp_nve_mc_record_put(mc_record);
+}
+
+static int
+mlxsw_sp_nve_fid_flood_index_set(struct mlxsw_sp_fid *fid,
+				 struct mlxsw_sp_nve_mc_list *mc_list)
+{
+	struct mlxsw_sp_nve_mc_record *mc_record;
+
+	/* The address of the first record in the list is a property of
+	 * the FID and we never change it. It only needs to be set when
+	 * a new list is created
+	 */
+	if (mlxsw_sp_fid_nve_flood_index_is_set(fid))
+		return 0;
+
+	mc_record = list_first_entry(&mc_list->records_list,
+				     struct mlxsw_sp_nve_mc_record, list);
+
+	return mlxsw_sp_fid_nve_flood_index_set(fid, mc_record->kvdl_index);
+}
+
+static void
+mlxsw_sp_nve_fid_flood_index_clear(struct mlxsw_sp_fid *fid,
+				   struct mlxsw_sp_nve_mc_list *mc_list)
+{
+	struct mlxsw_sp_nve_mc_record *mc_record;
+
+	/* The address of the first record needs to be invalidated only when
+	 * the last record is about to be removed
+	 */
+	if (!list_is_singular(&mc_list->records_list))
+		return;
+
+	mc_record = list_first_entry(&mc_list->records_list,
+				     struct mlxsw_sp_nve_mc_record, list);
+	if (mc_record->num_entries != 1)
+		return;
+
+	return mlxsw_sp_fid_nve_flood_index_clear(fid);
+}
+
+int mlxsw_sp_nve_flood_ip_add(struct mlxsw_sp *mlxsw_sp,
+			      struct mlxsw_sp_fid *fid,
+			      enum mlxsw_sp_l3proto proto,
+			      union mlxsw_sp_l3addr *addr)
+{
+	struct mlxsw_sp_nve_mc_list_key key = { 0 };
+	struct mlxsw_sp_nve_mc_list *mc_list;
+	int err;
+
+	key.fid_index = mlxsw_sp_fid_index(fid);
+	mc_list = mlxsw_sp_nve_mc_list_get(mlxsw_sp, &key);
+	if (IS_ERR(mc_list))
+		return PTR_ERR(mc_list);
+
+	err = mlxsw_sp_nve_mc_list_ip_add(mlxsw_sp, mc_list, proto, addr);
+	if (err)
+		goto err_add_ip;
+
+	err = mlxsw_sp_nve_fid_flood_index_set(fid, mc_list);
+	if (err)
+		goto err_fid_flood_index_set;
+
+	return 0;
+
+err_fid_flood_index_set:
+	mlxsw_sp_nve_mc_list_ip_del(mlxsw_sp, mc_list, proto, addr);
+err_add_ip:
+	mlxsw_sp_nve_mc_list_put(mlxsw_sp, mc_list);
+	return err;
+}
+
+void mlxsw_sp_nve_flood_ip_del(struct mlxsw_sp *mlxsw_sp,
+			       struct mlxsw_sp_fid *fid,
+			       enum mlxsw_sp_l3proto proto,
+			       union mlxsw_sp_l3addr *addr)
+{
+	struct mlxsw_sp_nve_mc_list_key key = { 0 };
+	struct mlxsw_sp_nve_mc_list *mc_list;
+
+	key.fid_index = mlxsw_sp_fid_index(fid);
+	mc_list = mlxsw_sp_nve_mc_list_find(mlxsw_sp, &key);
+	if (WARN_ON(!mc_list))
+		return;
+
+	mlxsw_sp_nve_fid_flood_index_clear(fid, mc_list);
+	mlxsw_sp_nve_mc_list_ip_del(mlxsw_sp, mc_list, proto, addr);
+	mlxsw_sp_nve_mc_list_put(mlxsw_sp, mc_list);
+}
+
+static void
+mlxsw_sp_nve_mc_record_delete(struct mlxsw_sp_nve_mc_record *mc_record)
+{
+	struct mlxsw_sp_nve *nve = mc_record->mlxsw_sp->nve;
+	unsigned int num_max_entries;
+	int i;
+
+	num_max_entries = nve->num_max_mc_entries[mc_record->proto];
+	for (i = 0; i < num_max_entries; i++) {
+		struct mlxsw_sp_nve_mc_entry *mc_entry = &mc_record->entries[i];
+
+		if (!mc_entry->valid)
+			continue;
+		mlxsw_sp_nve_mc_record_entry_del(mc_record, mc_entry);
+	}
+
+	WARN_ON(mc_record->num_entries);
+	mlxsw_sp_nve_mc_record_put(mc_record);
+}
+
+static void mlxsw_sp_nve_flood_ip_flush(struct mlxsw_sp *mlxsw_sp,
+					struct mlxsw_sp_fid *fid)
+{
+	struct mlxsw_sp_nve_mc_record *mc_record, *tmp;
+	struct mlxsw_sp_nve_mc_list_key key = { 0 };
+	struct mlxsw_sp_nve_mc_list *mc_list;
+
+	if (!mlxsw_sp_fid_nve_flood_index_is_set(fid))
+		return;
+
+	mlxsw_sp_fid_nve_flood_index_clear(fid);
+
+	key.fid_index = mlxsw_sp_fid_index(fid);
+	mc_list = mlxsw_sp_nve_mc_list_find(mlxsw_sp, &key);
+	if (WARN_ON(!mc_list))
+		return;
+
+	list_for_each_entry_safe(mc_record, tmp, &mc_list->records_list, list)
+		mlxsw_sp_nve_mc_record_delete(mc_record);
+
+	WARN_ON(!list_empty(&mc_list->records_list));
+	mlxsw_sp_nve_mc_list_put(mlxsw_sp, mc_list);
+}
+
+u32 mlxsw_sp_nve_decap_tunnel_index_get(const struct mlxsw_sp *mlxsw_sp)
+{
+	WARN_ON(mlxsw_sp->nve->num_nve_tunnels == 0);
+
+	return mlxsw_sp->nve->tunnel_index;
+}
+
+bool mlxsw_sp_nve_ipv4_route_is_decap(const struct mlxsw_sp *mlxsw_sp,
+				      u32 tb_id, __be32 addr)
+{
+	struct mlxsw_sp_nve *nve = mlxsw_sp->nve;
+	struct mlxsw_sp_nve_config *config = &nve->config;
+
+	if (nve->num_nve_tunnels &&
+	    config->ul_proto == MLXSW_SP_L3_PROTO_IPV4 &&
+	    config->ul_sip.addr4 == addr && config->ul_tb_id == tb_id)
+		return true;
+
+	return false;
+}
+
+static int mlxsw_sp_nve_tunnel_init(struct mlxsw_sp *mlxsw_sp,
+				    struct mlxsw_sp_nve_config *config)
+{
+	struct mlxsw_sp_nve *nve = mlxsw_sp->nve;
+	const struct mlxsw_sp_nve_ops *ops;
+	int err;
+
+	if (nve->num_nve_tunnels++ != 0)
+		return 0;
+
+	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
+				  &nve->tunnel_index);
+	if (err)
+		goto err_kvdl_alloc;
+
+	ops = nve->nve_ops_arr[config->type];
+	err = ops->init(nve, config);
+	if (err)
+		goto err_ops_init;
+
+	return 0;
+
+err_ops_init:
+	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
+			   nve->tunnel_index);
+err_kvdl_alloc:
+	nve->num_nve_tunnels--;
+	return err;
+}
+
+static void mlxsw_sp_nve_tunnel_fini(struct mlxsw_sp *mlxsw_sp)
+{
+	struct mlxsw_sp_nve *nve = mlxsw_sp->nve;
+	const struct mlxsw_sp_nve_ops *ops;
+
+	ops = nve->nve_ops_arr[nve->config.type];
+
+	if (mlxsw_sp->nve->num_nve_tunnels == 1) {
+		ops->fini(nve);
+		mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
+				   nve->tunnel_index);
+	}
+	nve->num_nve_tunnels--;
+}
+
+static void mlxsw_sp_nve_fdb_flush_by_fid(struct mlxsw_sp *mlxsw_sp,
+					  u16 fid_index)
+{
+	char sfdf_pl[MLXSW_REG_SFDF_LEN];
+
+	mlxsw_reg_sfdf_pack(sfdf_pl, MLXSW_REG_SFDF_FLUSH_PER_NVE_AND_FID);
+	mlxsw_reg_sfdf_fid_set(sfdf_pl, fid_index);
+	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl);
+}
+
+int mlxsw_sp_nve_fid_enable(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *fid,
+			    struct mlxsw_sp_nve_params *params,
+			    struct netlink_ext_ack *extack)
+{
+	struct mlxsw_sp_nve *nve = mlxsw_sp->nve;
+	const struct mlxsw_sp_nve_ops *ops;
+	struct mlxsw_sp_nve_config config;
+	int err;
+
+	ops = nve->nve_ops_arr[params->type];
+
+	if (!ops->can_offload(nve, params->dev, extack))
+		return -EOPNOTSUPP;
+
+	memset(&config, 0, sizeof(config));
+	ops->nve_config(nve, params->dev, &config);
+	if (nve->num_nve_tunnels &&
+	    memcmp(&config, &nve->config, sizeof(config))) {
+		NL_SET_ERR_MSG_MOD(extack, "Conflicting NVE tunnels configuration");
+		return -EOPNOTSUPP;
+	}
+
+	err = mlxsw_sp_nve_tunnel_init(mlxsw_sp, &config);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to initialize NVE tunnel");
+		return err;
+	}
+
+	err = mlxsw_sp_fid_vni_set(fid, params->vni);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to set VNI on FID");
+		goto err_fid_vni_set;
+	}
+
+	nve->config = config;
+
+	return 0;
+
+err_fid_vni_set:
+	mlxsw_sp_nve_tunnel_fini(mlxsw_sp);
+	return err;
+}
+
+void mlxsw_sp_nve_fid_disable(struct mlxsw_sp *mlxsw_sp,
+			      struct mlxsw_sp_fid *fid)
+{
+	u16 fid_index = mlxsw_sp_fid_index(fid);
+
+	mlxsw_sp_nve_flood_ip_flush(mlxsw_sp, fid);
+	mlxsw_sp_nve_fdb_flush_by_fid(mlxsw_sp, fid_index);
+	mlxsw_sp_fid_vni_clear(fid);
+	mlxsw_sp_nve_tunnel_fini(mlxsw_sp);
+}
+
+int mlxsw_sp_port_nve_init(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	char tnqdr_pl[MLXSW_REG_TNQDR_LEN];
+
+	mlxsw_reg_tnqdr_pack(tnqdr_pl, mlxsw_sp_port->local_port);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tnqdr), tnqdr_pl);
+}
+
+void mlxsw_sp_port_nve_fini(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+}
+
+static int mlxsw_sp_nve_qos_init(struct mlxsw_sp *mlxsw_sp)
+{
+	char tnqcr_pl[MLXSW_REG_TNQCR_LEN];
+
+	mlxsw_reg_tnqcr_pack(tnqcr_pl);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tnqcr), tnqcr_pl);
+}
+
+static int mlxsw_sp_nve_ecn_encap_init(struct mlxsw_sp *mlxsw_sp)
+{
+	int i;
+
+	/* Iterate over inner ECN values */
+	for (i = INET_ECN_NOT_ECT; i <= INET_ECN_CE; i++) {
+		u8 outer_ecn = INET_ECN_encapsulate(0, i);
+		char tneem_pl[MLXSW_REG_TNEEM_LEN];
+		int err;
+
+		mlxsw_reg_tneem_pack(tneem_pl, i, outer_ecn);
+		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tneem),
+				      tneem_pl);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int __mlxsw_sp_nve_ecn_decap_init(struct mlxsw_sp *mlxsw_sp,
+					 u8 inner_ecn, u8 outer_ecn)
+{
+	char tndem_pl[MLXSW_REG_TNDEM_LEN];
+	bool trap_en, set_ce = false;
+	u8 new_inner_ecn;
+
+	trap_en = !!__INET_ECN_decapsulate(outer_ecn, inner_ecn, &set_ce);
+	new_inner_ecn = set_ce ? INET_ECN_CE : inner_ecn;
+
+	mlxsw_reg_tndem_pack(tndem_pl, outer_ecn, inner_ecn, new_inner_ecn,
+			     trap_en, trap_en ? MLXSW_TRAP_ID_DECAP_ECN0 : 0);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tndem), tndem_pl);
+}
+
+static int mlxsw_sp_nve_ecn_decap_init(struct mlxsw_sp *mlxsw_sp)
+{
+	int i;
+
+	/* Iterate over inner ECN values */
+	for (i = INET_ECN_NOT_ECT; i <= INET_ECN_CE; i++) {
+		int j;
+
+		/* Iterate over outer ECN values */
+		for (j = INET_ECN_NOT_ECT; j <= INET_ECN_CE; j++) {
+			int err;
+
+			err = __mlxsw_sp_nve_ecn_decap_init(mlxsw_sp, i, j);
+			if (err)
+				return err;
+		}
+	}
+
+	return 0;
+}
+
+static int mlxsw_sp_nve_ecn_init(struct mlxsw_sp *mlxsw_sp)
+{
+	int err;
+
+	err = mlxsw_sp_nve_ecn_encap_init(mlxsw_sp);
+	if (err)
+		return err;
+
+	return mlxsw_sp_nve_ecn_decap_init(mlxsw_sp);
+}
+
+static int mlxsw_sp_nve_resources_query(struct mlxsw_sp *mlxsw_sp)
+{
+	unsigned int max;
+
+	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_NVE_MC_ENTRIES_IPV4) ||
+	    !MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_NVE_MC_ENTRIES_IPV6))
+		return -EIO;
+	max = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_NVE_MC_ENTRIES_IPV4);
+	mlxsw_sp->nve->num_max_mc_entries[MLXSW_SP_L3_PROTO_IPV4] = max;
+	max = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_NVE_MC_ENTRIES_IPV6);
+	mlxsw_sp->nve->num_max_mc_entries[MLXSW_SP_L3_PROTO_IPV6] = max;
+
+	return 0;
+}
+
+int mlxsw_sp_nve_init(struct mlxsw_sp *mlxsw_sp)
+{
+	struct mlxsw_sp_nve *nve;
+	int err;
+
+	nve = kzalloc(sizeof(*mlxsw_sp->nve), GFP_KERNEL);
+	if (!nve)
+		return -ENOMEM;
+	mlxsw_sp->nve = nve;
+	nve->mlxsw_sp = mlxsw_sp;
+	nve->nve_ops_arr = mlxsw_sp->nve_ops_arr;
+
+	err = rhashtable_init(&nve->mc_list_ht,
+			      &mlxsw_sp_nve_mc_list_ht_params);
+	if (err)
+		goto err_rhashtable_init;
+
+	err = mlxsw_sp_nve_qos_init(mlxsw_sp);
+	if (err)
+		goto err_nve_qos_init;
+
+	err = mlxsw_sp_nve_ecn_init(mlxsw_sp);
+	if (err)
+		goto err_nve_ecn_init;
+
+	err = mlxsw_sp_nve_resources_query(mlxsw_sp);
+	if (err)
+		goto err_nve_resources_query;
+
+	return 0;
+
+err_nve_resources_query:
+err_nve_ecn_init:
+err_nve_qos_init:
+	rhashtable_destroy(&nve->mc_list_ht);
+err_rhashtable_init:
+	mlxsw_sp->nve = NULL;
+	kfree(nve);
+	return err;
+}
+
+void mlxsw_sp_nve_fini(struct mlxsw_sp *mlxsw_sp)
+{
+	WARN_ON(mlxsw_sp->nve->num_nve_tunnels);
+	rhashtable_destroy(&mlxsw_sp->nve->mc_list_ht);
+	mlxsw_sp->nve = NULL;
+	kfree(mlxsw_sp->nve);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h
new file mode 100644
index 000000000000..4cc3297e13d6
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */
+
+#ifndef _MLXSW_SPECTRUM_NVE_H
+#define _MLXSW_SPECTRUM_NVE_H
+
+#include <linux/netlink.h>
+#include <linux/rhashtable.h>
+
+#include "spectrum.h"
+
+struct mlxsw_sp_nve_config {
+	enum mlxsw_sp_nve_type type;
+	u8 ttl;
+	u8 learning_en:1;
+	__be16 udp_dport;
+	__be32 flowlabel;
+	u32 ul_tb_id;
+	enum mlxsw_sp_l3proto ul_proto;
+	union mlxsw_sp_l3addr ul_sip;
+};
+
+struct mlxsw_sp_nve {
+	struct mlxsw_sp_nve_config config;
+	struct rhashtable mc_list_ht;
+	struct mlxsw_sp *mlxsw_sp;
+	const struct mlxsw_sp_nve_ops **nve_ops_arr;
+	unsigned int num_nve_tunnels;	/* Protected by RTNL */
+	unsigned int num_max_mc_entries[MLXSW_SP_L3_PROTO_MAX];
+	u32 tunnel_index;
+};
+
+struct mlxsw_sp_nve_ops {
+	enum mlxsw_sp_nve_type type;
+	bool (*can_offload)(const struct mlxsw_sp_nve *nve,
+			    const struct net_device *dev,
+			    struct netlink_ext_ack *extack);
+	void (*nve_config)(const struct mlxsw_sp_nve *nve,
+			   const struct net_device *dev,
+			   struct mlxsw_sp_nve_config *config);
+	int (*init)(struct mlxsw_sp_nve *nve,
+		    const struct mlxsw_sp_nve_config *config);
+	void (*fini)(struct mlxsw_sp_nve *nve);
+};
+
+extern const struct mlxsw_sp_nve_ops mlxsw_sp1_nve_vxlan_ops;
+extern const struct mlxsw_sp_nve_ops mlxsw_sp2_nve_vxlan_ops;
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c
new file mode 100644
index 000000000000..d21c7be5b1c9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c
@@ -0,0 +1,249 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/netdevice.h>
+#include <linux/netlink.h>
+#include <linux/random.h>
+#include <net/vxlan.h>
+
+#include "reg.h"
+#include "spectrum_nve.h"
+
+/* Eth (18B) | IPv6 (40B) | UDP (8B) | VxLAN (8B) | Eth (14B) | IPv6 (40B)
+ *
+ * In the worst case - where we have a VLAN tag on the outer Ethernet
+ * header and IPv6 in overlay and underlay - we need to parse 128 bytes
+ */
+#define MLXSW_SP_NVE_VXLAN_PARSING_DEPTH 128
+#define MLXSW_SP_NVE_DEFAULT_PARSING_DEPTH 96
+
+#define MLXSW_SP_NVE_VXLAN_SUPPORTED_FLAGS	VXLAN_F_UDP_ZERO_CSUM_TX
+
+static bool mlxsw_sp1_nve_vxlan_can_offload(const struct mlxsw_sp_nve *nve,
+					    const struct net_device *dev,
+					    struct netlink_ext_ack *extack)
+{
+	struct vxlan_dev *vxlan = netdev_priv(dev);
+	struct vxlan_config *cfg = &vxlan->cfg;
+
+	if (cfg->saddr.sa.sa_family != AF_INET) {
+		NL_SET_ERR_MSG_MOD(extack, "VxLAN: Only IPv4 underlay is supported");
+		return false;
+	}
+
+	if (vxlan_addr_multicast(&cfg->remote_ip)) {
+		NL_SET_ERR_MSG_MOD(extack, "VxLAN: Multicast destination IP is not supported");
+		return false;
+	}
+
+	if (vxlan_addr_any(&cfg->saddr)) {
+		NL_SET_ERR_MSG_MOD(extack, "VxLAN: Source address must be specified");
+		return false;
+	}
+
+	if (cfg->remote_ifindex) {
+		NL_SET_ERR_MSG_MOD(extack, "VxLAN: Local interface is not supported");
+		return false;
+	}
+
+	if (cfg->port_min || cfg->port_max) {
+		NL_SET_ERR_MSG_MOD(extack, "VxLAN: Only default UDP source port range is supported");
+		return false;
+	}
+
+	if (cfg->tos != 1) {
+		NL_SET_ERR_MSG_MOD(extack, "VxLAN: TOS must be configured to inherit");
+		return false;
+	}
+
+	if (cfg->flags & VXLAN_F_TTL_INHERIT) {
+		NL_SET_ERR_MSG_MOD(extack, "VxLAN: TTL must not be configured to inherit");
+		return false;
+	}
+
+	if (cfg->flags & VXLAN_F_LEARN) {
+		NL_SET_ERR_MSG_MOD(extack, "VxLAN: Learning is not supported");
+		return false;
+	}
+
+	if (!(cfg->flags & VXLAN_F_UDP_ZERO_CSUM_TX)) {
+		NL_SET_ERR_MSG_MOD(extack, "VxLAN: UDP checksum is not supported");
+		return false;
+	}
+
+	if (cfg->flags & ~MLXSW_SP_NVE_VXLAN_SUPPORTED_FLAGS) {
+		NL_SET_ERR_MSG_MOD(extack, "VxLAN: Unsupported flag");
+		return false;
+	}
+
+	if (cfg->ttl == 0) {
+		NL_SET_ERR_MSG_MOD(extack, "VxLAN: TTL must not be configured to 0");
+		return false;
+	}
+
+	if (cfg->label != 0) {
+		NL_SET_ERR_MSG_MOD(extack, "VxLAN: Flow label must be configured to 0");
+		return false;
+	}
+
+	return true;
+}
+
+static void mlxsw_sp_nve_vxlan_config(const struct mlxsw_sp_nve *nve,
+				      const struct net_device *dev,
+				      struct mlxsw_sp_nve_config *config)
+{
+	struct vxlan_dev *vxlan = netdev_priv(dev);
+	struct vxlan_config *cfg = &vxlan->cfg;
+
+	config->type = MLXSW_SP_NVE_TYPE_VXLAN;
+	config->ttl = cfg->ttl;
+	config->flowlabel = cfg->label;
+	config->learning_en = cfg->flags & VXLAN_F_LEARN ? 1 : 0;
+	config->ul_tb_id = RT_TABLE_MAIN;
+	config->ul_proto = MLXSW_SP_L3_PROTO_IPV4;
+	config->ul_sip.addr4 = cfg->saddr.sin.sin_addr.s_addr;
+	config->udp_dport = cfg->dst_port;
+}
+
+static int mlxsw_sp_nve_parsing_set(struct mlxsw_sp *mlxsw_sp,
+				    unsigned int parsing_depth,
+				    __be16 udp_dport)
+{
+	char mprs_pl[MLXSW_REG_MPRS_LEN];
+
+	mlxsw_reg_mprs_pack(mprs_pl, parsing_depth, be16_to_cpu(udp_dport));
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mprs), mprs_pl);
+}
+
+static int
+mlxsw_sp1_nve_vxlan_config_set(struct mlxsw_sp *mlxsw_sp,
+			       const struct mlxsw_sp_nve_config *config)
+{
+	char tngcr_pl[MLXSW_REG_TNGCR_LEN];
+	u16 ul_vr_id;
+	u8 udp_sport;
+	int err;
+
+	err = mlxsw_sp_router_tb_id_vr_id(mlxsw_sp, config->ul_tb_id,
+					  &ul_vr_id);
+	if (err)
+		return err;
+
+	mlxsw_reg_tngcr_pack(tngcr_pl, MLXSW_REG_TNGCR_TYPE_VXLAN, true,
+			     config->ttl);
+	/* VxLAN driver's default UDP source port range is 32768 (0x8000)
+	 * to 60999 (0xee47). Set the upper 8 bits of the UDP source port
+	 * to a random number between 0x80 and 0xee
+	 */
+	get_random_bytes(&udp_sport, sizeof(udp_sport));
+	udp_sport = (udp_sport % (0xee - 0x80 + 1)) + 0x80;
+	mlxsw_reg_tngcr_nve_udp_sport_prefix_set(tngcr_pl, udp_sport);
+	mlxsw_reg_tngcr_learn_enable_set(tngcr_pl, config->learning_en);
+	mlxsw_reg_tngcr_underlay_virtual_router_set(tngcr_pl, ul_vr_id);
+	mlxsw_reg_tngcr_usipv4_set(tngcr_pl, be32_to_cpu(config->ul_sip.addr4));
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tngcr), tngcr_pl);
+}
+
+static void mlxsw_sp1_nve_vxlan_config_clear(struct mlxsw_sp *mlxsw_sp)
+{
+	char tngcr_pl[MLXSW_REG_TNGCR_LEN];
+
+	mlxsw_reg_tngcr_pack(tngcr_pl, MLXSW_REG_TNGCR_TYPE_VXLAN, false, 0);
+
+	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tngcr), tngcr_pl);
+}
+
+static int mlxsw_sp1_nve_vxlan_rtdp_set(struct mlxsw_sp *mlxsw_sp,
+					unsigned int tunnel_index)
+{
+	char rtdp_pl[MLXSW_REG_RTDP_LEN];
+
+	mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_NVE, tunnel_index);
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtdp), rtdp_pl);
+}
+
+static int mlxsw_sp1_nve_vxlan_init(struct mlxsw_sp_nve *nve,
+				    const struct mlxsw_sp_nve_config *config)
+{
+	struct mlxsw_sp *mlxsw_sp = nve->mlxsw_sp;
+	int err;
+
+	err = mlxsw_sp_nve_parsing_set(mlxsw_sp,
+				       MLXSW_SP_NVE_VXLAN_PARSING_DEPTH,
+				       config->udp_dport);
+	if (err)
+		return err;
+
+	err = mlxsw_sp1_nve_vxlan_config_set(mlxsw_sp, config);
+	if (err)
+		goto err_config_set;
+
+	err = mlxsw_sp1_nve_vxlan_rtdp_set(mlxsw_sp, nve->tunnel_index);
+	if (err)
+		goto err_rtdp_set;
+
+	err = mlxsw_sp_router_nve_promote_decap(mlxsw_sp, config->ul_tb_id,
+						config->ul_proto,
+						&config->ul_sip,
+						nve->tunnel_index);
+	if (err)
+		goto err_promote_decap;
+
+	return 0;
+
+err_promote_decap:
+err_rtdp_set:
+	mlxsw_sp1_nve_vxlan_config_clear(mlxsw_sp);
+err_config_set:
+	mlxsw_sp_nve_parsing_set(mlxsw_sp, MLXSW_SP_NVE_DEFAULT_PARSING_DEPTH,
+				 config->udp_dport);
+	return err;
+}
+
+static void mlxsw_sp1_nve_vxlan_fini(struct mlxsw_sp_nve *nve)
+{
+	struct mlxsw_sp_nve_config *config = &nve->config;
+	struct mlxsw_sp *mlxsw_sp = nve->mlxsw_sp;
+
+	mlxsw_sp_router_nve_demote_decap(mlxsw_sp, config->ul_tb_id,
+					 config->ul_proto, &config->ul_sip);
+	mlxsw_sp1_nve_vxlan_config_clear(mlxsw_sp);
+	mlxsw_sp_nve_parsing_set(mlxsw_sp, MLXSW_SP_NVE_DEFAULT_PARSING_DEPTH,
+				 config->udp_dport);
+}
+
+const struct mlxsw_sp_nve_ops mlxsw_sp1_nve_vxlan_ops = {
+	.type		= MLXSW_SP_NVE_TYPE_VXLAN,
+	.can_offload	= mlxsw_sp1_nve_vxlan_can_offload,
+	.nve_config	= mlxsw_sp_nve_vxlan_config,
+	.init		= mlxsw_sp1_nve_vxlan_init,
+	.fini		= mlxsw_sp1_nve_vxlan_fini,
+};
+
+static bool mlxsw_sp2_nve_vxlan_can_offload(const struct mlxsw_sp_nve *nve,
+					    const struct net_device *dev,
+					    struct netlink_ext_ack *extack)
+{
+	return false;
+}
+
+static int mlxsw_sp2_nve_vxlan_init(struct mlxsw_sp_nve *nve,
+				    const struct mlxsw_sp_nve_config *config)
+{
+	return -EOPNOTSUPP;
+}
+
+static void mlxsw_sp2_nve_vxlan_fini(struct mlxsw_sp_nve *nve)
+{
+}
+
+const struct mlxsw_sp_nve_ops mlxsw_sp2_nve_vxlan_ops = {
+	.type		= MLXSW_SP_NVE_TYPE_VXLAN,
+	.can_offload	= mlxsw_sp2_nve_vxlan_can_offload,
+	.nve_config	= mlxsw_sp_nve_vxlan_config,
+	.init		= mlxsw_sp2_nve_vxlan_init,
+	.fini		= mlxsw_sp2_nve_vxlan_fini,
+};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 2ab9cf25a08a..9e9bb57134f2 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -366,6 +366,7 @@ enum mlxsw_sp_fib_entry_type {
 	 * encapsulating entries.)
 	 */
 	MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
+	MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP,
 };
 
 struct mlxsw_sp_nexthop_group;
@@ -741,6 +742,19 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
 	return NULL;
 }
 
+int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
+				u16 *vr_id)
+{
+	struct mlxsw_sp_vr *vr;
+
+	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
+	if (!vr)
+		return -ESRCH;
+	*vr_id = vr->id;
+
+	return 0;
+}
+
 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
 					    enum mlxsw_sp_l3proto proto)
 {
@@ -1128,6 +1142,52 @@ mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
 }
 
+static struct mlxsw_sp_fib_entry *
+mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
+				     enum mlxsw_sp_l3proto proto,
+				     const union mlxsw_sp_l3addr *addr,
+				     enum mlxsw_sp_fib_entry_type type)
+{
+	struct mlxsw_sp_fib_entry *fib_entry;
+	struct mlxsw_sp_fib_node *fib_node;
+	unsigned char addr_prefix_len;
+	struct mlxsw_sp_fib *fib;
+	struct mlxsw_sp_vr *vr;
+	const void *addrp;
+	size_t addr_len;
+	u32 addr4;
+
+	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
+	if (!vr)
+		return NULL;
+	fib = mlxsw_sp_vr_fib(vr, proto);
+
+	switch (proto) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		addr4 = be32_to_cpu(addr->addr4);
+		addrp = &addr4;
+		addr_len = 4;
+		addr_prefix_len = 32;
+		break;
+	case MLXSW_SP_L3_PROTO_IPV6: /* fall through */
+	default:
+		WARN_ON(1);
+		return NULL;
+	}
+
+	fib_node = mlxsw_sp_fib_node_lookup(fib, addrp, addr_len,
+					    addr_prefix_len);
+	if (!fib_node || list_empty(&fib_node->entry_list))
+		return NULL;
+
+	fib_entry = list_first_entry(&fib_node->entry_list,
+				     struct mlxsw_sp_fib_entry, list);
+	if (fib_entry->type != type)
+		return NULL;
+
+	return fib_entry;
+}
+
 /* Given an IPIP entry, find the corresponding decap route. */
 static struct mlxsw_sp_fib_entry *
 mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
@@ -1765,6 +1825,56 @@ mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
 	return 0;
 }
 
+int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
+				      enum mlxsw_sp_l3proto ul_proto,
+				      const union mlxsw_sp_l3addr *ul_sip,
+				      u32 tunnel_index)
+{
+	enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
+	struct mlxsw_sp_fib_entry *fib_entry;
+	int err;
+
+	/* It is valid to create a tunnel with a local IP and only later
+	 * assign this IP address to a local interface
+	 */
+	fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
+							 ul_proto, ul_sip,
+							 type);
+	if (!fib_entry)
+		return 0;
+
+	fib_entry->decap.tunnel_index = tunnel_index;
+	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
+
+	err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
+	if (err)
+		goto err_fib_entry_update;
+
+	return 0;
+
+err_fib_entry_update:
+	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
+	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
+	return err;
+}
+
+void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
+				      enum mlxsw_sp_l3proto ul_proto,
+				      const union mlxsw_sp_l3addr *ul_sip)
+{
+	enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
+	struct mlxsw_sp_fib_entry *fib_entry;
+
+	fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
+							 ul_proto, ul_sip,
+							 type);
+	if (!fib_entry)
+		return;
+
+	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
+	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
+}
+
 struct mlxsw_sp_neigh_key {
 	struct neighbour *n;
 };
@@ -3815,6 +3925,7 @@ mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
 		return !!nh_group->nh_rif;
 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
+	case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
 		return true;
 	default:
 		return false;
@@ -3848,7 +3959,8 @@ mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
 	int i;
 
 	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
-	    fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP) {
+	    fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP ||
+	    fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP) {
 		nh_grp->nexthops->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
 		return;
 	}
@@ -4072,6 +4184,18 @@ mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
 				      fib_entry->decap.tunnel_index);
 }
 
+static int mlxsw_sp_fib_entry_op_nve_decap(struct mlxsw_sp *mlxsw_sp,
+					   struct mlxsw_sp_fib_entry *fib_entry,
+					   enum mlxsw_reg_ralue_op op)
+{
+	char ralue_pl[MLXSW_REG_RALUE_LEN];
+
+	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
+	mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl,
+					   fib_entry->decap.tunnel_index);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
+}
+
 static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
 				   struct mlxsw_sp_fib_entry *fib_entry,
 				   enum mlxsw_reg_ralue_op op)
@@ -4086,6 +4210,8 @@ static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
 		return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
 							fib_entry, op);
+	case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
+		return mlxsw_sp_fib_entry_op_nve_decap(mlxsw_sp, fib_entry, op);
 	}
 	return -EINVAL;
 }
@@ -4121,6 +4247,7 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
 			     struct mlxsw_sp_fib_entry *fib_entry)
 {
 	union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
+	u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id);
 	struct net_device *dev = fen_info->fi->fib_dev;
 	struct mlxsw_sp_ipip_entry *ipip_entry;
 	struct fib_info *fi = fen_info->fi;
@@ -4135,6 +4262,15 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
 							     fib_entry,
 							     ipip_entry);
 		}
+		if (mlxsw_sp_nve_ipv4_route_is_decap(mlxsw_sp, tb_id,
+						     dip.addr4)) {
+			u32 t_index;
+
+			t_index = mlxsw_sp_nve_decap_tunnel_index_get(mlxsw_sp);
+			fib_entry->decap.tunnel_index = t_index;
+			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
+			return 0;
+		}
 		/* fall through */
 	case RTN_BROADCAST:
 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index fa16ad2c6a50..bc60d7a8b49d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -15,6 +15,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/netlink.h>
 #include <net/switchdev.h>
+#include <net/vxlan.h>
 
 #include "spectrum_span.h"
 #include "spectrum_switchdev.h"
@@ -83,9 +84,19 @@ struct mlxsw_sp_bridge_ops {
 	void (*port_leave)(struct mlxsw_sp_bridge_device *bridge_device,
 			   struct mlxsw_sp_bridge_port *bridge_port,
 			   struct mlxsw_sp_port *mlxsw_sp_port);
+	int (*vxlan_join)(struct mlxsw_sp_bridge_device *bridge_device,
+			  const struct net_device *vxlan_dev,
+			  struct netlink_ext_ack *extack);
+	void (*vxlan_leave)(struct mlxsw_sp_bridge_device *bridge_device,
+			    const struct net_device *vxlan_dev);
 	struct mlxsw_sp_fid *
 		(*fid_get)(struct mlxsw_sp_bridge_device *bridge_device,
 			   u16 vid);
+	struct mlxsw_sp_fid *
+		(*fid_lookup)(struct mlxsw_sp_bridge_device *bridge_device,
+			      u16 vid);
+	u16 (*fid_vid)(struct mlxsw_sp_bridge_device *bridge_device,
+		       const struct mlxsw_sp_fid *fid);
 };
 
 static int
@@ -1236,6 +1247,51 @@ static enum mlxsw_reg_sfd_op mlxsw_sp_sfd_op(bool adding)
 			MLXSW_REG_SFD_OP_WRITE_REMOVE;
 }
 
+static int mlxsw_sp_port_fdb_tunnel_uc_op(struct mlxsw_sp *mlxsw_sp,
+					  const char *mac, u16 fid,
+					  enum mlxsw_sp_l3proto proto,
+					  const union mlxsw_sp_l3addr *addr,
+					  bool adding, bool dynamic)
+{
+	enum mlxsw_reg_sfd_uc_tunnel_protocol sfd_proto;
+	char *sfd_pl;
+	u8 num_rec;
+	u32 uip;
+	int err;
+
+	switch (proto) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		uip = be32_to_cpu(addr->addr4);
+		sfd_proto = MLXSW_REG_SFD_UC_TUNNEL_PROTOCOL_IPV4;
+		break;
+	case MLXSW_SP_L3_PROTO_IPV6: /* fall through */
+	default:
+		WARN_ON(1);
+		return -EOPNOTSUPP;
+	}
+
+	sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL);
+	if (!sfd_pl)
+		return -ENOMEM;
+
+	mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0);
+	mlxsw_reg_sfd_uc_tunnel_pack(sfd_pl, 0,
+				     mlxsw_sp_sfd_rec_policy(dynamic), mac, fid,
+				     MLXSW_REG_SFD_REC_ACTION_NOP, uip,
+				     sfd_proto);
+	num_rec = mlxsw_reg_sfd_num_rec_get(sfd_pl);
+	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl);
+	if (err)
+		goto out;
+
+	if (num_rec != mlxsw_reg_sfd_num_rec_get(sfd_pl))
+		err = -EBUSY;
+
+out:
+	kfree(sfd_pl);
+	return err;
+}
+
 static int __mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 				     const char *mac, u16 fid, bool adding,
 				     enum mlxsw_reg_sfd_rec_action action,
@@ -1949,6 +2005,21 @@ mlxsw_sp_bridge_8021q_port_leave(struct mlxsw_sp_bridge_device *bridge_device,
 	mlxsw_sp_port_pvid_set(mlxsw_sp_port, 1);
 }
 
+static int
+mlxsw_sp_bridge_8021q_vxlan_join(struct mlxsw_sp_bridge_device *bridge_device,
+				 const struct net_device *vxlan_dev,
+				 struct netlink_ext_ack *extack)
+{
+	WARN_ON(1);
+	return -EINVAL;
+}
+
+static void
+mlxsw_sp_bridge_8021q_vxlan_leave(struct mlxsw_sp_bridge_device *bridge_device,
+				  const struct net_device *vxlan_dev)
+{
+}
+
 static struct mlxsw_sp_fid *
 mlxsw_sp_bridge_8021q_fid_get(struct mlxsw_sp_bridge_device *bridge_device,
 			      u16 vid)
@@ -1958,10 +2029,29 @@ mlxsw_sp_bridge_8021q_fid_get(struct mlxsw_sp_bridge_device *bridge_device,
 	return mlxsw_sp_fid_8021q_get(mlxsw_sp, vid);
 }
 
+static struct mlxsw_sp_fid *
+mlxsw_sp_bridge_8021q_fid_lookup(struct mlxsw_sp_bridge_device *bridge_device,
+				 u16 vid)
+{
+	WARN_ON(1);
+	return NULL;
+}
+
+static u16
+mlxsw_sp_bridge_8021q_fid_vid(struct mlxsw_sp_bridge_device *bridge_device,
+			      const struct mlxsw_sp_fid *fid)
+{
+	return mlxsw_sp_fid_8021q_vid(fid);
+}
+
 static const struct mlxsw_sp_bridge_ops mlxsw_sp_bridge_8021q_ops = {
 	.port_join	= mlxsw_sp_bridge_8021q_port_join,
 	.port_leave	= mlxsw_sp_bridge_8021q_port_leave,
+	.vxlan_join	= mlxsw_sp_bridge_8021q_vxlan_join,
+	.vxlan_leave	= mlxsw_sp_bridge_8021q_vxlan_leave,
 	.fid_get	= mlxsw_sp_bridge_8021q_fid_get,
+	.fid_lookup	= mlxsw_sp_bridge_8021q_fid_lookup,
+	.fid_vid	= mlxsw_sp_bridge_8021q_fid_vid,
 };
 
 static bool
@@ -2025,19 +2115,126 @@ mlxsw_sp_bridge_8021d_port_leave(struct mlxsw_sp_bridge_device *bridge_device,
 	mlxsw_sp_port_vlan_bridge_leave(mlxsw_sp_port_vlan);
 }
 
+static int
+mlxsw_sp_bridge_8021d_vxlan_join(struct mlxsw_sp_bridge_device *bridge_device,
+				 const struct net_device *vxlan_dev,
+				 struct netlink_ext_ack *extack)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev);
+	struct vxlan_dev *vxlan = netdev_priv(vxlan_dev);
+	struct mlxsw_sp_nve_params params = {
+		.type = MLXSW_SP_NVE_TYPE_VXLAN,
+		.vni = vxlan->cfg.vni,
+		.dev = vxlan_dev,
+	};
+	struct mlxsw_sp_fid *fid;
+	int err;
+
+	fid = mlxsw_sp_fid_8021d_lookup(mlxsw_sp, bridge_device->dev->ifindex);
+	if (!fid)
+		return -EINVAL;
+
+	if (mlxsw_sp_fid_vni_is_set(fid))
+		return -EINVAL;
+
+	err = mlxsw_sp_nve_fid_enable(mlxsw_sp, fid, &params, extack);
+	if (err)
+		goto err_nve_fid_enable;
+
+	/* The tunnel port does not hold a reference on the FID. Only
+	 * local ports and the router port
+	 */
+	mlxsw_sp_fid_put(fid);
+
+	return 0;
+
+err_nve_fid_enable:
+	mlxsw_sp_fid_put(fid);
+	return err;
+}
+
+static void
+mlxsw_sp_bridge_8021d_vxlan_leave(struct mlxsw_sp_bridge_device *bridge_device,
+				  const struct net_device *vxlan_dev)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev);
+	struct mlxsw_sp_fid *fid;
+
+	fid = mlxsw_sp_fid_8021d_lookup(mlxsw_sp, bridge_device->dev->ifindex);
+	if (WARN_ON(!fid))
+		return;
+
+	/* If the VxLAN device is down, then the FID does not have a VNI */
+	if (!mlxsw_sp_fid_vni_is_set(fid))
+		goto out;
+
+	mlxsw_sp_nve_fid_disable(mlxsw_sp, fid);
+out:
+	mlxsw_sp_fid_put(fid);
+}
+
 static struct mlxsw_sp_fid *
 mlxsw_sp_bridge_8021d_fid_get(struct mlxsw_sp_bridge_device *bridge_device,
 			      u16 vid)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev);
+	struct net_device *vxlan_dev;
+	struct mlxsw_sp_fid *fid;
+	int err;
+
+	fid = mlxsw_sp_fid_8021d_get(mlxsw_sp, bridge_device->dev->ifindex);
+	if (IS_ERR(fid))
+		return fid;
+
+	if (mlxsw_sp_fid_vni_is_set(fid))
+		return fid;
+
+	vxlan_dev = mlxsw_sp_bridge_vxlan_dev_find(bridge_device->dev);
+	if (!vxlan_dev)
+		return fid;
+
+	if (!netif_running(vxlan_dev))
+		return fid;
+
+	err = mlxsw_sp_bridge_8021d_vxlan_join(bridge_device, vxlan_dev, NULL);
+	if (err)
+		goto err_vxlan_join;
+
+	return fid;
+
+err_vxlan_join:
+	mlxsw_sp_fid_put(fid);
+	return ERR_PTR(err);
+}
+
+static struct mlxsw_sp_fid *
+mlxsw_sp_bridge_8021d_fid_lookup(struct mlxsw_sp_bridge_device *bridge_device,
+				 u16 vid)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev);
 
-	return mlxsw_sp_fid_8021d_get(mlxsw_sp, bridge_device->dev->ifindex);
+	/* The only valid VLAN for a VLAN-unaware bridge is 0 */
+	if (vid)
+		return NULL;
+
+	return mlxsw_sp_fid_8021d_lookup(mlxsw_sp, bridge_device->dev->ifindex);
+}
+
+static u16
+mlxsw_sp_bridge_8021d_fid_vid(struct mlxsw_sp_bridge_device *bridge_device,
+			      const struct mlxsw_sp_fid *fid)
+{
+	return 0;
 }
 
 static const struct mlxsw_sp_bridge_ops mlxsw_sp_bridge_8021d_ops = {
 	.port_join	= mlxsw_sp_bridge_8021d_port_join,
 	.port_leave	= mlxsw_sp_bridge_8021d_port_leave,
+	.vxlan_join	= mlxsw_sp_bridge_8021d_vxlan_join,
+	.vxlan_leave	= mlxsw_sp_bridge_8021d_vxlan_leave,
 	.fid_get	= mlxsw_sp_bridge_8021d_fid_get,
+	.fid_lookup	= mlxsw_sp_bridge_8021d_fid_lookup,
+	.fid_vid	= mlxsw_sp_bridge_8021d_fid_vid,
 };
 
 int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port,
@@ -2087,15 +2284,43 @@ void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port,
 	mlxsw_sp_bridge_port_put(mlxsw_sp->bridge, bridge_port);
 }
 
+int mlxsw_sp_bridge_vxlan_join(struct mlxsw_sp *mlxsw_sp,
+			       const struct net_device *br_dev,
+			       const struct net_device *vxlan_dev,
+			       struct netlink_ext_ack *extack)
+{
+	struct mlxsw_sp_bridge_device *bridge_device;
+
+	bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev);
+	if (WARN_ON(!bridge_device))
+		return -EINVAL;
+
+	return bridge_device->ops->vxlan_join(bridge_device, vxlan_dev, extack);
+}
+
+void mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp,
+				 const struct net_device *br_dev,
+				 const struct net_device *vxlan_dev)
+{
+	struct mlxsw_sp_bridge_device *bridge_device;
+
+	bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev);
+	if (WARN_ON(!bridge_device))
+		return;
+
+	bridge_device->ops->vxlan_leave(bridge_device, vxlan_dev);
+}
+
 static void
 mlxsw_sp_fdb_call_notifiers(enum switchdev_notifier_type type,
 			    const char *mac, u16 vid,
-			    struct net_device *dev)
+			    struct net_device *dev, bool offloaded)
 {
 	struct switchdev_notifier_fdb_info info;
 
 	info.addr = mac;
 	info.vid = vid;
+	info.offloaded = offloaded;
 	call_switchdev_notifiers(type, dev, &info.info);
 }
 
@@ -2147,7 +2372,7 @@ do_fdb_op:
 	if (!do_notification)
 		return;
 	type = adding ? SWITCHDEV_FDB_ADD_TO_BRIDGE : SWITCHDEV_FDB_DEL_TO_BRIDGE;
-	mlxsw_sp_fdb_call_notifiers(type, mac, vid, bridge_port->dev);
+	mlxsw_sp_fdb_call_notifiers(type, mac, vid, bridge_port->dev, adding);
 
 	return;
 
@@ -2207,7 +2432,7 @@ do_fdb_op:
 	if (!do_notification)
 		return;
 	type = adding ? SWITCHDEV_FDB_ADD_TO_BRIDGE : SWITCHDEV_FDB_DEL_TO_BRIDGE;
-	mlxsw_sp_fdb_call_notifiers(type, mac, vid, bridge_port->dev);
+	mlxsw_sp_fdb_call_notifiers(type, mac, vid, bridge_port->dev, adding);
 
 	return;
 
@@ -2283,11 +2508,126 @@ out:
 
 struct mlxsw_sp_switchdev_event_work {
 	struct work_struct work;
-	struct switchdev_notifier_fdb_info fdb_info;
+	union {
+		struct switchdev_notifier_fdb_info fdb_info;
+		struct switchdev_notifier_vxlan_fdb_info vxlan_fdb_info;
+	};
 	struct net_device *dev;
 	unsigned long event;
 };
 
+static void
+mlxsw_sp_switchdev_vxlan_addr_convert(const union vxlan_addr *vxlan_addr,
+				      enum mlxsw_sp_l3proto *proto,
+				      union mlxsw_sp_l3addr *addr)
+{
+	if (vxlan_addr->sa.sa_family == AF_INET) {
+		addr->addr4 = vxlan_addr->sin.sin_addr.s_addr;
+		*proto = MLXSW_SP_L3_PROTO_IPV4;
+	} else {
+		addr->addr6 = vxlan_addr->sin6.sin6_addr;
+		*proto = MLXSW_SP_L3_PROTO_IPV6;
+	}
+}
+
+static void
+mlxsw_sp_switchdev_bridge_vxlan_fdb_event(struct mlxsw_sp *mlxsw_sp,
+					  struct mlxsw_sp_switchdev_event_work *
+					  switchdev_work,
+					  struct mlxsw_sp_fid *fid, __be32 vni)
+{
+	struct switchdev_notifier_vxlan_fdb_info vxlan_fdb_info;
+	struct switchdev_notifier_fdb_info *fdb_info;
+	struct net_device *dev = switchdev_work->dev;
+	enum mlxsw_sp_l3proto proto;
+	union mlxsw_sp_l3addr addr;
+	int err;
+
+	fdb_info = &switchdev_work->fdb_info;
+	err = vxlan_fdb_find_uc(dev, fdb_info->addr, vni, &vxlan_fdb_info);
+	if (err)
+		return;
+
+	mlxsw_sp_switchdev_vxlan_addr_convert(&vxlan_fdb_info.remote_ip,
+					      &proto, &addr);
+
+	switch (switchdev_work->event) {
+	case SWITCHDEV_FDB_ADD_TO_DEVICE:
+		err = mlxsw_sp_port_fdb_tunnel_uc_op(mlxsw_sp,
+						     vxlan_fdb_info.eth_addr,
+						     mlxsw_sp_fid_index(fid),
+						     proto, &addr, true, false);
+		if (err)
+			return;
+		vxlan_fdb_info.offloaded = true;
+		call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev,
+					 &vxlan_fdb_info.info);
+		mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED,
+					    vxlan_fdb_info.eth_addr,
+					    fdb_info->vid, dev, true);
+		break;
+	case SWITCHDEV_FDB_DEL_TO_DEVICE:
+		err = mlxsw_sp_port_fdb_tunnel_uc_op(mlxsw_sp,
+						     vxlan_fdb_info.eth_addr,
+						     mlxsw_sp_fid_index(fid),
+						     proto, &addr, false,
+						     false);
+		vxlan_fdb_info.offloaded = false;
+		call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev,
+					 &vxlan_fdb_info.info);
+		break;
+	}
+}
+
+static void
+mlxsw_sp_switchdev_bridge_nve_fdb_event(struct mlxsw_sp_switchdev_event_work *
+					switchdev_work)
+{
+	struct mlxsw_sp_bridge_device *bridge_device;
+	struct net_device *dev = switchdev_work->dev;
+	struct net_device *br_dev;
+	struct mlxsw_sp *mlxsw_sp;
+	struct mlxsw_sp_fid *fid;
+	__be32 vni;
+	int err;
+
+	if (switchdev_work->event != SWITCHDEV_FDB_ADD_TO_DEVICE &&
+	    switchdev_work->event != SWITCHDEV_FDB_DEL_TO_DEVICE)
+		return;
+
+	if (!switchdev_work->fdb_info.added_by_user)
+		return;
+
+	if (!netif_running(dev))
+		return;
+	br_dev = netdev_master_upper_dev_get(dev);
+	if (!br_dev)
+		return;
+	if (!netif_is_bridge_master(br_dev))
+		return;
+	mlxsw_sp = mlxsw_sp_lower_get(br_dev);
+	if (!mlxsw_sp)
+		return;
+	bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev);
+	if (!bridge_device)
+		return;
+
+	fid = bridge_device->ops->fid_lookup(bridge_device,
+					     switchdev_work->fdb_info.vid);
+	if (!fid)
+		return;
+
+	err = mlxsw_sp_fid_vni(fid, &vni);
+	if (err)
+		goto out;
+
+	mlxsw_sp_switchdev_bridge_vxlan_fdb_event(mlxsw_sp, switchdev_work, fid,
+						  vni);
+
+out:
+	mlxsw_sp_fid_put(fid);
+}
+
 static void mlxsw_sp_switchdev_bridge_fdb_event_work(struct work_struct *work)
 {
 	struct mlxsw_sp_switchdev_event_work *switchdev_work =
@@ -2298,6 +2638,11 @@ static void mlxsw_sp_switchdev_bridge_fdb_event_work(struct work_struct *work)
 	int err;
 
 	rtnl_lock();
+	if (netif_is_vxlan(dev)) {
+		mlxsw_sp_switchdev_bridge_nve_fdb_event(switchdev_work);
+		goto out;
+	}
+
 	mlxsw_sp_port = mlxsw_sp_port_dev_lower_find(dev);
 	if (!mlxsw_sp_port)
 		goto out;
@@ -2312,7 +2657,7 @@ static void mlxsw_sp_switchdev_bridge_fdb_event_work(struct work_struct *work)
 			break;
 		mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED,
 					    fdb_info->addr,
-					    fdb_info->vid, dev);
+					    fdb_info->vid, dev, true);
 		break;
 	case SWITCHDEV_FDB_DEL_TO_DEVICE:
 		fdb_info = &switchdev_work->fdb_info;
@@ -2337,6 +2682,189 @@ out:
 	dev_put(dev);
 }
 
+static void
+mlxsw_sp_switchdev_vxlan_fdb_add(struct mlxsw_sp *mlxsw_sp,
+				 struct mlxsw_sp_switchdev_event_work *
+				 switchdev_work)
+{
+	struct switchdev_notifier_vxlan_fdb_info *vxlan_fdb_info;
+	struct mlxsw_sp_bridge_device *bridge_device;
+	struct net_device *dev = switchdev_work->dev;
+	u8 all_zeros_mac[ETH_ALEN] = { 0 };
+	enum mlxsw_sp_l3proto proto;
+	union mlxsw_sp_l3addr addr;
+	struct net_device *br_dev;
+	struct mlxsw_sp_fid *fid;
+	u16 vid;
+	int err;
+
+	vxlan_fdb_info = &switchdev_work->vxlan_fdb_info;
+	br_dev = netdev_master_upper_dev_get(dev);
+
+	bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev);
+	if (!bridge_device)
+		return;
+
+	fid = mlxsw_sp_fid_lookup_by_vni(mlxsw_sp, vxlan_fdb_info->vni);
+	if (!fid)
+		return;
+
+	mlxsw_sp_switchdev_vxlan_addr_convert(&vxlan_fdb_info->remote_ip,
+					      &proto, &addr);
+
+	if (ether_addr_equal(vxlan_fdb_info->eth_addr, all_zeros_mac)) {
+		err = mlxsw_sp_nve_flood_ip_add(mlxsw_sp, fid, proto, &addr);
+		if (err) {
+			mlxsw_sp_fid_put(fid);
+			return;
+		}
+		vxlan_fdb_info->offloaded = true;
+		call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev,
+					 &vxlan_fdb_info->info);
+		mlxsw_sp_fid_put(fid);
+		return;
+	}
+
+	/* The device has a single FDB table, whereas Linux has two - one
+	 * in the bridge driver and another in the VxLAN driver. We only
+	 * program an entry to the device if the MAC points to the VxLAN
+	 * device in the bridge's FDB table
+	 */
+	vid = bridge_device->ops->fid_vid(bridge_device, fid);
+	if (br_fdb_find_port(br_dev, vxlan_fdb_info->eth_addr, vid) != dev)
+		goto err_br_fdb_find;
+
+	err = mlxsw_sp_port_fdb_tunnel_uc_op(mlxsw_sp, vxlan_fdb_info->eth_addr,
+					     mlxsw_sp_fid_index(fid), proto,
+					     &addr, true, false);
+	if (err)
+		goto err_fdb_tunnel_uc_op;
+	vxlan_fdb_info->offloaded = true;
+	call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev,
+				 &vxlan_fdb_info->info);
+	mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED,
+				    vxlan_fdb_info->eth_addr, vid, dev, true);
+
+	mlxsw_sp_fid_put(fid);
+
+	return;
+
+err_fdb_tunnel_uc_op:
+err_br_fdb_find:
+	mlxsw_sp_fid_put(fid);
+}
+
+static void
+mlxsw_sp_switchdev_vxlan_fdb_del(struct mlxsw_sp *mlxsw_sp,
+				 struct mlxsw_sp_switchdev_event_work *
+				 switchdev_work)
+{
+	struct switchdev_notifier_vxlan_fdb_info *vxlan_fdb_info;
+	struct mlxsw_sp_bridge_device *bridge_device;
+	struct net_device *dev = switchdev_work->dev;
+	struct net_device *br_dev = netdev_master_upper_dev_get(dev);
+	u8 all_zeros_mac[ETH_ALEN] = { 0 };
+	enum mlxsw_sp_l3proto proto;
+	union mlxsw_sp_l3addr addr;
+	struct mlxsw_sp_fid *fid;
+	u16 vid;
+
+	vxlan_fdb_info = &switchdev_work->vxlan_fdb_info;
+
+	bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev);
+	if (!bridge_device)
+		return;
+
+	fid = mlxsw_sp_fid_lookup_by_vni(mlxsw_sp, vxlan_fdb_info->vni);
+	if (!fid)
+		return;
+
+	mlxsw_sp_switchdev_vxlan_addr_convert(&vxlan_fdb_info->remote_ip,
+					      &proto, &addr);
+
+	if (ether_addr_equal(vxlan_fdb_info->eth_addr, all_zeros_mac)) {
+		mlxsw_sp_nve_flood_ip_del(mlxsw_sp, fid, proto, &addr);
+		mlxsw_sp_fid_put(fid);
+		return;
+	}
+
+	mlxsw_sp_port_fdb_tunnel_uc_op(mlxsw_sp, vxlan_fdb_info->eth_addr,
+				       mlxsw_sp_fid_index(fid), proto, &addr,
+				       false, false);
+	vid = bridge_device->ops->fid_vid(bridge_device, fid);
+	mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED,
+				    vxlan_fdb_info->eth_addr, vid, dev, false);
+
+	mlxsw_sp_fid_put(fid);
+}
+
+static void mlxsw_sp_switchdev_vxlan_fdb_event_work(struct work_struct *work)
+{
+	struct mlxsw_sp_switchdev_event_work *switchdev_work =
+		container_of(work, struct mlxsw_sp_switchdev_event_work, work);
+	struct net_device *dev = switchdev_work->dev;
+	struct mlxsw_sp *mlxsw_sp;
+	struct net_device *br_dev;
+
+	rtnl_lock();
+
+	if (!netif_running(dev))
+		goto out;
+	br_dev = netdev_master_upper_dev_get(dev);
+	if (!br_dev)
+		goto out;
+	if (!netif_is_bridge_master(br_dev))
+		goto out;
+	mlxsw_sp = mlxsw_sp_lower_get(br_dev);
+	if (!mlxsw_sp)
+		goto out;
+
+	switch (switchdev_work->event) {
+	case SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE:
+		mlxsw_sp_switchdev_vxlan_fdb_add(mlxsw_sp, switchdev_work);
+		break;
+	case SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE:
+		mlxsw_sp_switchdev_vxlan_fdb_del(mlxsw_sp, switchdev_work);
+		break;
+	}
+
+out:
+	rtnl_unlock();
+	kfree(switchdev_work);
+	dev_put(dev);
+}
+
+static int
+mlxsw_sp_switchdev_vxlan_work_prepare(struct mlxsw_sp_switchdev_event_work *
+				      switchdev_work,
+				      struct switchdev_notifier_info *info)
+{
+	struct vxlan_dev *vxlan = netdev_priv(switchdev_work->dev);
+	struct switchdev_notifier_vxlan_fdb_info *vxlan_fdb_info;
+	struct vxlan_config *cfg = &vxlan->cfg;
+
+	vxlan_fdb_info = container_of(info,
+				      struct switchdev_notifier_vxlan_fdb_info,
+				      info);
+
+	if (vxlan_fdb_info->remote_port != cfg->dst_port)
+		return -EOPNOTSUPP;
+	if (vxlan_fdb_info->remote_vni != cfg->vni)
+		return -EOPNOTSUPP;
+	if (vxlan_fdb_info->vni != cfg->vni)
+		return -EOPNOTSUPP;
+	if (vxlan_fdb_info->remote_ifindex)
+		return -EOPNOTSUPP;
+	if (is_multicast_ether_addr(vxlan_fdb_info->eth_addr))
+		return -EOPNOTSUPP;
+	if (vxlan_addr_multicast(&vxlan_fdb_info->remote_ip))
+		return -EOPNOTSUPP;
+
+	switchdev_work->vxlan_fdb_info = *vxlan_fdb_info;
+
+	return 0;
+}
+
 /* Called under rcu_read_lock() */
 static int mlxsw_sp_switchdev_event(struct notifier_block *unused,
 				    unsigned long event, void *ptr)
@@ -2346,6 +2874,7 @@ static int mlxsw_sp_switchdev_event(struct notifier_block *unused,
 	struct switchdev_notifier_fdb_info *fdb_info;
 	struct switchdev_notifier_info *info = ptr;
 	struct net_device *br_dev;
+	int err;
 
 	/* Tunnel devices are not our uppers, so check their master instead */
 	br_dev = netdev_master_upper_dev_get_rcu(dev);
@@ -2386,6 +2915,16 @@ static int mlxsw_sp_switchdev_event(struct notifier_block *unused,
 		 */
 		dev_hold(dev);
 		break;
+	case SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE: /* fall through */
+	case SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE:
+		INIT_WORK(&switchdev_work->work,
+			  mlxsw_sp_switchdev_vxlan_fdb_event_work);
+		err = mlxsw_sp_switchdev_vxlan_work_prepare(switchdev_work,
+							    info);
+		if (err)
+			goto err_vxlan_work_prepare;
+		dev_hold(dev);
+		break;
 	default:
 		kfree(switchdev_work);
 		return NOTIFY_DONE;
@@ -2395,6 +2934,7 @@ static int mlxsw_sp_switchdev_event(struct notifier_block *unused,
 
 	return NOTIFY_DONE;
 
+err_vxlan_work_prepare:
 err_addr_alloc:
 	kfree(switchdev_work);
 	return NOTIFY_BAD;
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 9b8a17ee3cb3..3238b9ee42f3 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -133,9 +133,9 @@ static inline int ocelot_vlant_wait_for_completion(struct ocelot *ocelot)
 {
 	unsigned int val, timeout = 10;
 
-	/* Wait for the issued mac table command to be completed, or timeout.
-	 * When the command read from ANA_TABLES_MACACCESS is
-	 * MACACCESS_CMD_IDLE, the issued command completed successfully.
+	/* Wait for the issued vlan table command to be completed, or timeout.
+	 * When the command read from ANA_TABLES_VLANACCESS is
+	 * VLANACCESS_CMD_IDLE, the issued command completed successfully.
 	 */
 	do {
 		val = ocelot_read(ocelot, ANA_TABLES_VLANACCESS);
diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c
index f2b1938236fe..244dc261006e 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/action.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/action.c
@@ -399,12 +399,14 @@ nfp_fl_set_ip4(const struct tc_action *action, int idx, u32 off,
 
 	switch (off) {
 	case offsetof(struct iphdr, daddr):
-		set_ip_addr->ipv4_dst_mask = mask;
-		set_ip_addr->ipv4_dst = exact;
+		set_ip_addr->ipv4_dst_mask |= mask;
+		set_ip_addr->ipv4_dst &= ~mask;
+		set_ip_addr->ipv4_dst |= exact & mask;
 		break;
 	case offsetof(struct iphdr, saddr):
-		set_ip_addr->ipv4_src_mask = mask;
-		set_ip_addr->ipv4_src = exact;
+		set_ip_addr->ipv4_src_mask |= mask;
+		set_ip_addr->ipv4_src &= ~mask;
+		set_ip_addr->ipv4_src |= exact & mask;
 		break;
 	default:
 		return -EOPNOTSUPP;
@@ -418,11 +420,12 @@ nfp_fl_set_ip4(const struct tc_action *action, int idx, u32 off,
 }
 
 static void
-nfp_fl_set_ip6_helper(int opcode_tag, int idx, __be32 exact, __be32 mask,
+nfp_fl_set_ip6_helper(int opcode_tag, u8 word, __be32 exact, __be32 mask,
 		      struct nfp_fl_set_ipv6_addr *ip6)
 {
-	ip6->ipv6[idx % 4].mask = mask;
-	ip6->ipv6[idx % 4].exact = exact;
+	ip6->ipv6[word].mask |= mask;
+	ip6->ipv6[word].exact &= ~mask;
+	ip6->ipv6[word].exact |= exact & mask;
 
 	ip6->reserved = cpu_to_be16(0);
 	ip6->head.jump_id = opcode_tag;
@@ -435,6 +438,7 @@ nfp_fl_set_ip6(const struct tc_action *action, int idx, u32 off,
 	       struct nfp_fl_set_ipv6_addr *ip_src)
 {
 	__be32 exact, mask;
+	u8 word;
 
 	/* We are expecting tcf_pedit to return a big endian value */
 	mask = (__force __be32)~tcf_pedit_mask(action, idx);
@@ -443,17 +447,20 @@ nfp_fl_set_ip6(const struct tc_action *action, int idx, u32 off,
 	if (exact & ~mask)
 		return -EOPNOTSUPP;
 
-	if (off < offsetof(struct ipv6hdr, saddr))
+	if (off < offsetof(struct ipv6hdr, saddr)) {
 		return -EOPNOTSUPP;
-	else if (off < offsetof(struct ipv6hdr, daddr))
-		nfp_fl_set_ip6_helper(NFP_FL_ACTION_OPCODE_SET_IPV6_SRC, idx,
+	} else if (off < offsetof(struct ipv6hdr, daddr)) {
+		word = (off - offsetof(struct ipv6hdr, saddr)) / sizeof(exact);
+		nfp_fl_set_ip6_helper(NFP_FL_ACTION_OPCODE_SET_IPV6_SRC, word,
 				      exact, mask, ip_src);
-	else if (off < offsetof(struct ipv6hdr, daddr) +
-		       sizeof(struct in6_addr))
-		nfp_fl_set_ip6_helper(NFP_FL_ACTION_OPCODE_SET_IPV6_DST, idx,
+	} else if (off < offsetof(struct ipv6hdr, daddr) +
+		       sizeof(struct in6_addr)) {
+		word = (off - offsetof(struct ipv6hdr, daddr)) / sizeof(exact);
+		nfp_fl_set_ip6_helper(NFP_FL_ACTION_OPCODE_SET_IPV6_DST, word,
 				      exact, mask, ip_dst);
-	else
+	} else {
 		return -EOPNOTSUPP;
+	}
 
 	return 0;
 }
@@ -511,7 +518,7 @@ nfp_fl_pedit(const struct tc_action *action, struct tc_cls_flower_offload *flow,
 	struct nfp_fl_set_eth set_eth;
 	enum pedit_header_type htype;
 	int idx, nkeys, err;
-	size_t act_size;
+	size_t act_size = 0;
 	u32 offset, cmd;
 	u8 ip_proto = 0;
 
@@ -569,7 +576,9 @@ nfp_fl_pedit(const struct tc_action *action, struct tc_cls_flower_offload *flow,
 		act_size = sizeof(set_eth);
 		memcpy(nfp_action, &set_eth, act_size);
 		*a_len += act_size;
-	} else if (set_ip_addr.head.len_lw) {
+	}
+	if (set_ip_addr.head.len_lw) {
+		nfp_action += act_size;
 		act_size = sizeof(set_ip_addr);
 		memcpy(nfp_action, &set_ip_addr, act_size);
 		*a_len += act_size;
@@ -577,10 +586,12 @@ nfp_fl_pedit(const struct tc_action *action, struct tc_cls_flower_offload *flow,
 		/* Hardware will automatically fix IPv4 and TCP/UDP checksum. */
 		*csum_updated |= TCA_CSUM_UPDATE_FLAG_IPV4HDR |
 				nfp_fl_csum_l4_to_flag(ip_proto);
-	} else if (set_ip6_dst.head.len_lw && set_ip6_src.head.len_lw) {
+	}
+	if (set_ip6_dst.head.len_lw && set_ip6_src.head.len_lw) {
 		/* TC compiles set src and dst IPv6 address as a single action,
 		 * the hardware requires this to be 2 separate actions.
 		 */
+		nfp_action += act_size;
 		act_size = sizeof(set_ip6_src);
 		memcpy(nfp_action, &set_ip6_src, act_size);
 		*a_len += act_size;
@@ -593,6 +604,7 @@ nfp_fl_pedit(const struct tc_action *action, struct tc_cls_flower_offload *flow,
 		/* Hardware will automatically fix TCP/UDP checksum. */
 		*csum_updated |= nfp_fl_csum_l4_to_flag(ip_proto);
 	} else if (set_ip6_dst.head.len_lw) {
+		nfp_action += act_size;
 		act_size = sizeof(set_ip6_dst);
 		memcpy(nfp_action, &set_ip6_dst, act_size);
 		*a_len += act_size;
@@ -600,13 +612,16 @@ nfp_fl_pedit(const struct tc_action *action, struct tc_cls_flower_offload *flow,
 		/* Hardware will automatically fix TCP/UDP checksum. */
 		*csum_updated |= nfp_fl_csum_l4_to_flag(ip_proto);
 	} else if (set_ip6_src.head.len_lw) {
+		nfp_action += act_size;
 		act_size = sizeof(set_ip6_src);
 		memcpy(nfp_action, &set_ip6_src, act_size);
 		*a_len += act_size;
 
 		/* Hardware will automatically fix TCP/UDP checksum. */
 		*csum_updated |= nfp_fl_csum_l4_to_flag(ip_proto);
-	} else if (set_tport.head.len_lw) {
+	}
+	if (set_tport.head.len_lw) {
+		nfp_action += act_size;
 		act_size = sizeof(set_tport);
 		memcpy(nfp_action, &set_tport, act_size);
 		*a_len += act_size;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
index 30c926c4bc47..8e5bec04d1f9 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
@@ -4,6 +4,7 @@
 #include <linux/etherdevice.h>
 #include <linux/inetdevice.h>
 #include <net/netevent.h>
+#include <net/vxlan.h>
 #include <linux/idr.h>
 #include <net/dst_metadata.h>
 #include <net/arp.h>
@@ -187,7 +188,7 @@ static bool nfp_tun_is_netdev_to_offload(struct net_device *netdev)
 		return false;
 	if (!strcmp(netdev->rtnl_link_ops->kind, "openvswitch"))
 		return true;
-	if (!strcmp(netdev->rtnl_link_ops->kind, "vxlan"))
+	if (netif_is_vxlan(netdev))
 		return true;
 
 	return false;
diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c
index 8b23d2848457..25382f8fbb70 100644
--- a/drivers/net/ethernet/nxp/lpc_eth.c
+++ b/drivers/net/ethernet/nxp/lpc_eth.c
@@ -19,34 +19,18 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/delay.h>
-#include <linux/interrupt.h>
-#include <linux/errno.h>
-#include <linux/ioport.h>
-#include <linux/crc32.h>
-#include <linux/platform_device.h>
-#include <linux/spinlock.h>
-#include <linux/ethtool.h>
-#include <linux/mii.h>
 #include <linux/clk.h>
-#include <linux/workqueue.h>
-#include <linux/netdevice.h>
+#include <linux/crc32.h>
 #include <linux/etherdevice.h>
-#include <linux/skbuff.h>
-#include <linux/phy.h>
-#include <linux/dma-mapping.h>
-#include <linux/of.h>
+#include <linux/module.h>
 #include <linux/of_net.h>
-#include <linux/types.h>
+#include <linux/phy.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
 
-#include <linux/io.h>
 #include <mach/board.h>
-#include <mach/platform.h>
 #include <mach/hardware.h>
+#include <mach/platform.h>
 
 #define MODNAME "lpc-eth"
 #define DRV_VERSION "1.00"
@@ -1257,18 +1241,19 @@ static const struct net_device_ops lpc_netdev_ops = {
 
 static int lpc_eth_drv_probe(struct platform_device *pdev)
 {
-	struct resource *res;
-	struct net_device *ndev;
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
 	struct netdata_local *pldat;
-	struct phy_device *phydev;
+	struct net_device *ndev;
 	dma_addr_t dma_handle;
+	struct resource *res;
 	int irq, ret;
 	u32 tmp;
 
 	/* Setup network interface for RMII or MII mode */
 	tmp = __raw_readl(LPC32XX_CLKPWR_MACCLK_CTRL);
 	tmp &= ~LPC32XX_CLKPWR_MACCTRL_PINS_MSK;
-	if (lpc_phy_interface_mode(&pdev->dev) == PHY_INTERFACE_MODE_MII)
+	if (lpc_phy_interface_mode(dev) == PHY_INTERFACE_MODE_MII)
 		tmp |= LPC32XX_CLKPWR_MACCTRL_USE_MII_PINS;
 	else
 		tmp |= LPC32XX_CLKPWR_MACCTRL_USE_RMII_PINS;
@@ -1278,7 +1263,7 @@ static int lpc_eth_drv_probe(struct platform_device *pdev)
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	irq = platform_get_irq(pdev, 0);
 	if (!res || irq < 0) {
-		dev_err(&pdev->dev, "error getting resources.\n");
+		dev_err(dev, "error getting resources.\n");
 		ret = -ENXIO;
 		goto err_exit;
 	}
@@ -1286,12 +1271,12 @@ static int lpc_eth_drv_probe(struct platform_device *pdev)
 	/* Allocate net driver data structure */
 	ndev = alloc_etherdev(sizeof(struct netdata_local));
 	if (!ndev) {
-		dev_err(&pdev->dev, "could not allocate device.\n");
+		dev_err(dev, "could not allocate device.\n");
 		ret = -ENOMEM;
 		goto err_exit;
 	}
 
-	SET_NETDEV_DEV(ndev, &pdev->dev);
+	SET_NETDEV_DEV(ndev, dev);
 
 	pldat = netdev_priv(ndev);
 	pldat->pdev = pdev;
@@ -1303,9 +1288,9 @@ static int lpc_eth_drv_probe(struct platform_device *pdev)
 	ndev->irq = irq;
 
 	/* Get clock for the device */
-	pldat->clk = clk_get(&pdev->dev, NULL);
+	pldat->clk = clk_get(dev, NULL);
 	if (IS_ERR(pldat->clk)) {
-		dev_err(&pdev->dev, "error getting clock.\n");
+		dev_err(dev, "error getting clock.\n");
 		ret = PTR_ERR(pldat->clk);
 		goto err_out_free_dev;
 	}
@@ -1318,14 +1303,14 @@ static int lpc_eth_drv_probe(struct platform_device *pdev)
 	/* Map IO space */
 	pldat->net_base = ioremap(res->start, resource_size(res));
 	if (!pldat->net_base) {
-		dev_err(&pdev->dev, "failed to map registers\n");
+		dev_err(dev, "failed to map registers\n");
 		ret = -ENOMEM;
 		goto err_out_disable_clocks;
 	}
 	ret = request_irq(ndev->irq, __lpc_eth_interrupt, 0,
 			  ndev->name, ndev);
 	if (ret) {
-		dev_err(&pdev->dev, "error requesting interrupt.\n");
+		dev_err(dev, "error requesting interrupt.\n");
 		goto err_out_iounmap;
 	}
 
@@ -1339,7 +1324,7 @@ static int lpc_eth_drv_probe(struct platform_device *pdev)
 		sizeof(struct txrx_desc_t) + sizeof(struct rx_status_t));
 	pldat->dma_buff_base_v = 0;
 
-	if (use_iram_for_net(&pldat->pdev->dev)) {
+	if (use_iram_for_net(dev)) {
 		dma_handle = LPC32XX_IRAM_BASE;
 		if (pldat->dma_buff_size <= lpc32xx_return_iram_size())
 			pldat->dma_buff_base_v =
@@ -1350,7 +1335,7 @@ static int lpc_eth_drv_probe(struct platform_device *pdev)
 	}
 
 	if (pldat->dma_buff_base_v == 0) {
-		ret = dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+		ret = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(32));
 		if (ret)
 			goto err_out_free_irq;
 
@@ -1359,7 +1344,7 @@ static int lpc_eth_drv_probe(struct platform_device *pdev)
 		/* Allocate a chunk of memory for the DMA ethernet buffers
 		   and descriptors */
 		pldat->dma_buff_base_v =
-			dma_alloc_coherent(&pldat->pdev->dev,
+			dma_alloc_coherent(dev,
 					   pldat->dma_buff_size, &dma_handle,
 					   GFP_KERNEL);
 		if (pldat->dma_buff_base_v == NULL) {
@@ -1384,7 +1369,7 @@ static int lpc_eth_drv_probe(struct platform_device *pdev)
 	__lpc_get_mac(pldat, ndev->dev_addr);
 
 	if (!is_valid_ether_addr(ndev->dev_addr)) {
-		const char *macaddr = of_get_mac_address(pdev->dev.of_node);
+		const char *macaddr = of_get_mac_address(np);
 		if (macaddr)
 			memcpy(ndev->dev_addr, macaddr, ETH_ALEN);
 	}
@@ -1414,7 +1399,7 @@ static int lpc_eth_drv_probe(struct platform_device *pdev)
 
 	ret = register_netdev(ndev);
 	if (ret) {
-		dev_err(&pdev->dev, "Cannot register net device, aborting.\n");
+		dev_err(dev, "Cannot register net device, aborting.\n");
 		goto err_out_dma_unmap;
 	}
 	platform_set_drvdata(pdev, ndev);
@@ -1426,19 +1411,17 @@ static int lpc_eth_drv_probe(struct platform_device *pdev)
 	netdev_info(ndev, "LPC mac at 0x%08x irq %d\n",
 	       res->start, ndev->irq);
 
-	phydev = ndev->phydev;
-
-	device_init_wakeup(&pdev->dev, 1);
-	device_set_wakeup_enable(&pdev->dev, 0);
+	device_init_wakeup(dev, 1);
+	device_set_wakeup_enable(dev, 0);
 
 	return 0;
 
 err_out_unregister_netdev:
 	unregister_netdev(ndev);
 err_out_dma_unmap:
-	if (!use_iram_for_net(&pldat->pdev->dev) ||
+	if (!use_iram_for_net(dev) ||
 	    pldat->dma_buff_size > lpc32xx_return_iram_size())
-		dma_free_coherent(&pldat->pdev->dev, pldat->dma_buff_size,
+		dma_free_coherent(dev, pldat->dma_buff_size,
 				  pldat->dma_buff_base_v,
 				  pldat->dma_buff_base_p);
 err_out_free_irq:
@@ -1533,13 +1516,11 @@ static int lpc_eth_drv_resume(struct platform_device *pdev)
 }
 #endif
 
-#ifdef CONFIG_OF
 static const struct of_device_id lpc_eth_match[] = {
 	{ .compatible = "nxp,lpc-eth" },
 	{ }
 };
 MODULE_DEVICE_TABLE(of, lpc_eth_match);
-#endif
 
 static struct platform_driver lpc_eth_driver = {
 	.probe		= lpc_eth_drv_probe,
@@ -1550,7 +1531,7 @@ static struct platform_driver lpc_eth_driver = {
 #endif
 	.driver		= {
 		.name	= MODNAME,
-		.of_match_table = of_match_ptr(lpc_eth_match),
+		.of_match_table = lpc_eth_match,
 	},
 };
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index 5f0962d353ce..d9a03aba0e02 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -915,7 +915,7 @@ u16 qed_get_cm_pq_idx_llt_mtc(struct qed_hwfn *p_hwfn, u8 tc);
 /* Prototypes */
 int qed_fill_dev_info(struct qed_dev *cdev,
 		      struct qed_dev_info *dev_info);
-void qed_link_update(struct qed_hwfn *hwfn);
+void qed_link_update(struct qed_hwfn *hwfn, struct qed_ptt *ptt);
 u32 qed_unzip_data(struct qed_hwfn *p_hwfn,
 		   u32 input_len, u8 *input_buf,
 		   u32 max_size, u8 *unzip_buf);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index f2dfc7a976c0..5c221ebaa7b3 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -12207,11 +12207,56 @@ struct public_port {
 	u32 transceiver_data;
 #define ETH_TRANSCEIVER_STATE_MASK	0x000000FF
 #define ETH_TRANSCEIVER_STATE_SHIFT	0x00000000
+#define ETH_TRANSCEIVER_STATE_OFFSET	0x00000000
 #define ETH_TRANSCEIVER_STATE_UNPLUGGED	0x00000000
 #define ETH_TRANSCEIVER_STATE_PRESENT	0x00000001
 #define ETH_TRANSCEIVER_STATE_VALID	0x00000003
 #define ETH_TRANSCEIVER_STATE_UPDATING	0x00000008
-
+#define ETH_TRANSCEIVER_TYPE_MASK       0x0000FF00
+#define ETH_TRANSCEIVER_TYPE_OFFSET     0x8
+#define ETH_TRANSCEIVER_TYPE_NONE                       0x00
+#define ETH_TRANSCEIVER_TYPE_UNKNOWN                    0xFF
+#define ETH_TRANSCEIVER_TYPE_1G_PCC                     0x01
+#define ETH_TRANSCEIVER_TYPE_1G_ACC                     0x02
+#define ETH_TRANSCEIVER_TYPE_1G_LX                      0x03
+#define ETH_TRANSCEIVER_TYPE_1G_SX                      0x04
+#define ETH_TRANSCEIVER_TYPE_10G_SR                     0x05
+#define ETH_TRANSCEIVER_TYPE_10G_LR                     0x06
+#define ETH_TRANSCEIVER_TYPE_10G_LRM                    0x07
+#define ETH_TRANSCEIVER_TYPE_10G_ER                     0x08
+#define ETH_TRANSCEIVER_TYPE_10G_PCC                    0x09
+#define ETH_TRANSCEIVER_TYPE_10G_ACC                    0x0a
+#define ETH_TRANSCEIVER_TYPE_XLPPI                      0x0b
+#define ETH_TRANSCEIVER_TYPE_40G_LR4                    0x0c
+#define ETH_TRANSCEIVER_TYPE_40G_SR4                    0x0d
+#define ETH_TRANSCEIVER_TYPE_40G_CR4                    0x0e
+#define ETH_TRANSCEIVER_TYPE_100G_AOC                   0x0f
+#define ETH_TRANSCEIVER_TYPE_100G_SR4                   0x10
+#define ETH_TRANSCEIVER_TYPE_100G_LR4                   0x11
+#define ETH_TRANSCEIVER_TYPE_100G_ER4                   0x12
+#define ETH_TRANSCEIVER_TYPE_100G_ACC                   0x13
+#define ETH_TRANSCEIVER_TYPE_100G_CR4                   0x14
+#define ETH_TRANSCEIVER_TYPE_4x10G_SR                   0x15
+#define ETH_TRANSCEIVER_TYPE_25G_CA_N                   0x16
+#define ETH_TRANSCEIVER_TYPE_25G_ACC_S                  0x17
+#define ETH_TRANSCEIVER_TYPE_25G_CA_S                   0x18
+#define ETH_TRANSCEIVER_TYPE_25G_ACC_M                  0x19
+#define ETH_TRANSCEIVER_TYPE_25G_CA_L                   0x1a
+#define ETH_TRANSCEIVER_TYPE_25G_ACC_L                  0x1b
+#define ETH_TRANSCEIVER_TYPE_25G_SR                     0x1c
+#define ETH_TRANSCEIVER_TYPE_25G_LR                     0x1d
+#define ETH_TRANSCEIVER_TYPE_25G_AOC                    0x1e
+#define ETH_TRANSCEIVER_TYPE_4x10G                      0x1f
+#define ETH_TRANSCEIVER_TYPE_4x25G_CR                   0x20
+#define ETH_TRANSCEIVER_TYPE_1000BASET                  0x21
+#define ETH_TRANSCEIVER_TYPE_10G_BASET                  0x22
+#define ETH_TRANSCEIVER_TYPE_MULTI_RATE_10G_40G_SR      0x30
+#define ETH_TRANSCEIVER_TYPE_MULTI_RATE_10G_40G_CR      0x31
+#define ETH_TRANSCEIVER_TYPE_MULTI_RATE_10G_40G_LR      0x32
+#define ETH_TRANSCEIVER_TYPE_MULTI_RATE_40G_100G_SR     0x33
+#define ETH_TRANSCEIVER_TYPE_MULTI_RATE_40G_100G_CR     0x34
+#define ETH_TRANSCEIVER_TYPE_MULTI_RATE_40G_100G_LR     0x35
+#define ETH_TRANSCEIVER_TYPE_MULTI_RATE_40G_100G_AOC    0x36
 	u32 wol_info;
 	u32 wol_pkt_len;
 	u32 wol_pkt_details;
@@ -13199,6 +13244,13 @@ struct nvm_cfg1_port {
 	u32 transceiver_00;
 	u32 device_ids;
 	u32 board_cfg;
+#define NVM_CFG1_PORT_PORT_TYPE_MASK                            0x000000FF
+#define NVM_CFG1_PORT_PORT_TYPE_OFFSET                          0
+#define NVM_CFG1_PORT_PORT_TYPE_UNDEFINED                       0x0
+#define NVM_CFG1_PORT_PORT_TYPE_MODULE                          0x1
+#define NVM_CFG1_PORT_PORT_TYPE_BACKPLANE                       0x2
+#define NVM_CFG1_PORT_PORT_TYPE_EXT_PHY                         0x3
+#define NVM_CFG1_PORT_PORT_TYPE_MODULE_SLAVE                    0x4
 	u32 mnm_10g_cap;
 	u32 mnm_10g_ctrl;
 	u32 mnm_10g_misc;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c
index af3a28ec04eb..0f0aba793352 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.c
@@ -228,7 +228,7 @@ static int qed_grc_attn_cb(struct qed_hwfn *p_hwfn)
 		attn_master_to_str(GET_FIELD(tmp, QED_GRC_ATTENTION_MASTER)),
 		GET_FIELD(tmp2, QED_GRC_ATTENTION_PF),
 		(GET_FIELD(tmp2, QED_GRC_ATTENTION_PRIV) ==
-		 QED_GRC_ATTENTION_PRIV_VF) ? "VF" : "(Ireelevant)",
+		 QED_GRC_ATTENTION_PRIV_VF) ? "VF" : "(Irrelevant)",
 		GET_FIELD(tmp2, QED_GRC_ATTENTION_VF));
 
 out:
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 75d217aaf8ce..35fd0db6a677 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -58,6 +58,7 @@
 #include "qed_iscsi.h"
 
 #include "qed_mcp.h"
+#include "qed_reg_addr.h"
 #include "qed_hw.h"
 #include "qed_selftest.h"
 #include "qed_debug.h"
@@ -1304,6 +1305,7 @@ static int qed_set_link(struct qed_dev *cdev, struct qed_link_params *params)
 	struct qed_hwfn *hwfn;
 	struct qed_mcp_link_params *link_params;
 	struct qed_ptt *ptt;
+	u32 sup_caps;
 	int rc;
 
 	if (!cdev)
@@ -1330,26 +1332,50 @@ static int qed_set_link(struct qed_dev *cdev, struct qed_link_params *params)
 		link_params->speed.autoneg = params->autoneg;
 	if (params->override_flags & QED_LINK_OVERRIDE_SPEED_ADV_SPEEDS) {
 		link_params->speed.advertised_speeds = 0;
-		if ((params->adv_speeds & QED_LM_1000baseT_Half_BIT) ||
-		    (params->adv_speeds & QED_LM_1000baseT_Full_BIT))
+		sup_caps = QED_LM_1000baseT_Full_BIT |
+			   QED_LM_1000baseKX_Full_BIT |
+			   QED_LM_1000baseX_Full_BIT;
+		if (params->adv_speeds & sup_caps)
 			link_params->speed.advertised_speeds |=
 			    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G;
-		if (params->adv_speeds & QED_LM_10000baseKR_Full_BIT)
+		sup_caps = QED_LM_10000baseT_Full_BIT |
+			   QED_LM_10000baseKR_Full_BIT |
+			   QED_LM_10000baseKX4_Full_BIT |
+			   QED_LM_10000baseR_FEC_BIT |
+			   QED_LM_10000baseCR_Full_BIT |
+			   QED_LM_10000baseSR_Full_BIT |
+			   QED_LM_10000baseLR_Full_BIT |
+			   QED_LM_10000baseLRM_Full_BIT;
+		if (params->adv_speeds & sup_caps)
 			link_params->speed.advertised_speeds |=
 			    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G;
 		if (params->adv_speeds & QED_LM_20000baseKR2_Full_BIT)
 			link_params->speed.advertised_speeds |=
 				NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_20G;
-		if (params->adv_speeds & QED_LM_25000baseKR_Full_BIT)
+		sup_caps = QED_LM_25000baseKR_Full_BIT |
+			   QED_LM_25000baseCR_Full_BIT |
+			   QED_LM_25000baseSR_Full_BIT;
+		if (params->adv_speeds & sup_caps)
 			link_params->speed.advertised_speeds |=
 			    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G;
-		if (params->adv_speeds & QED_LM_40000baseLR4_Full_BIT)
+		sup_caps = QED_LM_40000baseLR4_Full_BIT |
+			   QED_LM_40000baseKR4_Full_BIT |
+			   QED_LM_40000baseCR4_Full_BIT |
+			   QED_LM_40000baseSR4_Full_BIT;
+		if (params->adv_speeds & sup_caps)
 			link_params->speed.advertised_speeds |=
-			    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G;
-		if (params->adv_speeds & QED_LM_50000baseKR2_Full_BIT)
+				NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G;
+		sup_caps = QED_LM_50000baseKR2_Full_BIT |
+			   QED_LM_50000baseCR2_Full_BIT |
+			   QED_LM_50000baseSR2_Full_BIT;
+		if (params->adv_speeds & sup_caps)
 			link_params->speed.advertised_speeds |=
 			    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G;
-		if (params->adv_speeds & QED_LM_100000baseKR4_Full_BIT)
+		sup_caps = QED_LM_100000baseKR4_Full_BIT |
+			   QED_LM_100000baseSR4_Full_BIT |
+			   QED_LM_100000baseCR4_Full_BIT |
+			   QED_LM_100000baseLR4_ER4_Full_BIT;
+		if (params->adv_speeds & sup_caps)
 			link_params->speed.advertised_speeds |=
 			    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G;
 	}
@@ -1462,12 +1488,149 @@ static int qed_get_link_data(struct qed_hwfn *hwfn,
 	return 0;
 }
 
+static void qed_fill_link_capability(struct qed_hwfn *hwfn,
+				     struct qed_ptt *ptt, u32 capability,
+				     u32 *if_capability)
+{
+	u32 media_type, tcvr_state, tcvr_type;
+	u32 speed_mask, board_cfg;
+
+	if (qed_mcp_get_media_type(hwfn, ptt, &media_type))
+		media_type = MEDIA_UNSPECIFIED;
+
+	if (qed_mcp_get_transceiver_data(hwfn, ptt, &tcvr_state, &tcvr_type))
+		tcvr_type = ETH_TRANSCEIVER_STATE_UNPLUGGED;
+
+	if (qed_mcp_trans_speed_mask(hwfn, ptt, &speed_mask))
+		speed_mask = 0xFFFFFFFF;
+
+	if (qed_mcp_get_board_config(hwfn, ptt, &board_cfg))
+		board_cfg = NVM_CFG1_PORT_PORT_TYPE_UNDEFINED;
+
+	DP_VERBOSE(hwfn->cdev, NETIF_MSG_DRV,
+		   "Media_type = 0x%x tcvr_state = 0x%x tcvr_type = 0x%x speed_mask = 0x%x board_cfg = 0x%x\n",
+		   media_type, tcvr_state, tcvr_type, speed_mask, board_cfg);
+
+	switch (media_type) {
+	case MEDIA_DA_TWINAX:
+		if (capability & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_20G)
+			*if_capability |= QED_LM_20000baseKR2_Full_BIT;
+		/* For DAC media multiple speed capabilities are supported*/
+		capability = capability & speed_mask;
+		if (capability & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G)
+			*if_capability |= QED_LM_1000baseKX_Full_BIT;
+		if (capability & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G)
+			*if_capability |= QED_LM_10000baseCR_Full_BIT;
+		if (capability & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G)
+			*if_capability |= QED_LM_40000baseCR4_Full_BIT;
+		if (capability & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G)
+			*if_capability |= QED_LM_25000baseCR_Full_BIT;
+		if (capability & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G)
+			*if_capability |= QED_LM_50000baseCR2_Full_BIT;
+		if (capability &
+			NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G)
+			*if_capability |= QED_LM_100000baseCR4_Full_BIT;
+		break;
+	case MEDIA_BASE_T:
+		if (board_cfg & NVM_CFG1_PORT_PORT_TYPE_EXT_PHY) {
+			if (capability &
+			    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G) {
+				*if_capability |= QED_LM_1000baseT_Full_BIT;
+			}
+			if (capability &
+			    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G) {
+				*if_capability |= QED_LM_10000baseT_Full_BIT;
+			}
+		}
+		if (board_cfg & NVM_CFG1_PORT_PORT_TYPE_MODULE) {
+			if (tcvr_type == ETH_TRANSCEIVER_TYPE_1000BASET)
+				*if_capability |= QED_LM_1000baseT_Full_BIT;
+			if (tcvr_type == ETH_TRANSCEIVER_TYPE_10G_BASET)
+				*if_capability |= QED_LM_10000baseT_Full_BIT;
+		}
+		break;
+	case MEDIA_SFP_1G_FIBER:
+	case MEDIA_SFPP_10G_FIBER:
+	case MEDIA_XFP_FIBER:
+	case MEDIA_MODULE_FIBER:
+		if (capability &
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G) {
+			if ((tcvr_type == ETH_TRANSCEIVER_TYPE_1G_LX) ||
+			    (tcvr_type == ETH_TRANSCEIVER_TYPE_1G_SX))
+				*if_capability |= QED_LM_1000baseKX_Full_BIT;
+		}
+		if (capability &
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G) {
+			if (tcvr_type == ETH_TRANSCEIVER_TYPE_10G_SR)
+				*if_capability |= QED_LM_10000baseSR_Full_BIT;
+			if (tcvr_type == ETH_TRANSCEIVER_TYPE_10G_LR)
+				*if_capability |= QED_LM_10000baseLR_Full_BIT;
+			if (tcvr_type == ETH_TRANSCEIVER_TYPE_10G_LRM)
+				*if_capability |= QED_LM_10000baseLRM_Full_BIT;
+			if (tcvr_type == ETH_TRANSCEIVER_TYPE_10G_ER)
+				*if_capability |= QED_LM_10000baseR_FEC_BIT;
+		}
+		if (capability & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_20G)
+			*if_capability |= QED_LM_20000baseKR2_Full_BIT;
+		if (capability &
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G) {
+			if (tcvr_type == ETH_TRANSCEIVER_TYPE_25G_SR)
+				*if_capability |= QED_LM_25000baseSR_Full_BIT;
+		}
+		if (capability &
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G) {
+			if (tcvr_type == ETH_TRANSCEIVER_TYPE_40G_LR4)
+				*if_capability |= QED_LM_40000baseLR4_Full_BIT;
+			if (tcvr_type == ETH_TRANSCEIVER_TYPE_40G_SR4)
+				*if_capability |= QED_LM_40000baseSR4_Full_BIT;
+		}
+		if (capability &
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G)
+			*if_capability |= QED_LM_50000baseKR2_Full_BIT;
+		if (capability &
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G) {
+			if (tcvr_type == ETH_TRANSCEIVER_TYPE_100G_SR4)
+				*if_capability |= QED_LM_100000baseSR4_Full_BIT;
+		}
+
+		break;
+	case MEDIA_KR:
+		if (capability & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_20G)
+			*if_capability |= QED_LM_20000baseKR2_Full_BIT;
+		if (capability &
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G)
+			*if_capability |= QED_LM_1000baseKX_Full_BIT;
+		if (capability &
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G)
+			*if_capability |= QED_LM_10000baseKR_Full_BIT;
+		if (capability &
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G)
+			*if_capability |= QED_LM_25000baseKR_Full_BIT;
+		if (capability &
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G)
+			*if_capability |= QED_LM_40000baseKR4_Full_BIT;
+		if (capability &
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G)
+			*if_capability |= QED_LM_50000baseKR2_Full_BIT;
+		if (capability &
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G)
+			*if_capability |= QED_LM_100000baseKR4_Full_BIT;
+		break;
+	case MEDIA_UNSPECIFIED:
+	case MEDIA_NOT_PRESENT:
+		DP_VERBOSE(hwfn->cdev, QED_MSG_DEBUG,
+			   "Unknown media and transceiver type;\n");
+		break;
+	}
+}
+
 static void qed_fill_link(struct qed_hwfn *hwfn,
+			  struct qed_ptt *ptt,
 			  struct qed_link_output *if_link)
 {
+	struct qed_mcp_link_capabilities link_caps;
 	struct qed_mcp_link_params params;
 	struct qed_mcp_link_state link;
-	struct qed_mcp_link_capabilities link_caps;
 	u32 media_type;
 
 	memset(if_link, 0, sizeof(*if_link));
@@ -1498,58 +1661,20 @@ static void qed_fill_link(struct qed_hwfn *hwfn,
 		if_link->advertised_caps |= QED_LM_Autoneg_BIT;
 	else
 		if_link->advertised_caps &= ~QED_LM_Autoneg_BIT;
-	if (params.speed.advertised_speeds &
-	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G)
-		if_link->advertised_caps |= QED_LM_1000baseT_Half_BIT |
-		    QED_LM_1000baseT_Full_BIT;
-	if (params.speed.advertised_speeds &
-	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G)
-		if_link->advertised_caps |= QED_LM_10000baseKR_Full_BIT;
-	if (params.speed.advertised_speeds &
-	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_20G)
-		if_link->advertised_caps |= QED_LM_20000baseKR2_Full_BIT;
-	if (params.speed.advertised_speeds &
-	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G)
-		if_link->advertised_caps |= QED_LM_25000baseKR_Full_BIT;
-	if (params.speed.advertised_speeds &
-	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G)
-		if_link->advertised_caps |= QED_LM_40000baseLR4_Full_BIT;
-	if (params.speed.advertised_speeds &
-	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G)
-		if_link->advertised_caps |= QED_LM_50000baseKR2_Full_BIT;
-	if (params.speed.advertised_speeds &
-	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G)
-		if_link->advertised_caps |= QED_LM_100000baseKR4_Full_BIT;
-
-	if (link_caps.speed_capabilities &
-	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G)
-		if_link->supported_caps |= QED_LM_1000baseT_Half_BIT |
-		    QED_LM_1000baseT_Full_BIT;
-	if (link_caps.speed_capabilities &
-	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G)
-		if_link->supported_caps |= QED_LM_10000baseKR_Full_BIT;
-	if (link_caps.speed_capabilities &
-	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_20G)
-		if_link->supported_caps |= QED_LM_20000baseKR2_Full_BIT;
-	if (link_caps.speed_capabilities &
-	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G)
-		if_link->supported_caps |= QED_LM_25000baseKR_Full_BIT;
-	if (link_caps.speed_capabilities &
-	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G)
-		if_link->supported_caps |= QED_LM_40000baseLR4_Full_BIT;
-	if (link_caps.speed_capabilities &
-	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G)
-		if_link->supported_caps |= QED_LM_50000baseKR2_Full_BIT;
-	if (link_caps.speed_capabilities &
-	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G)
-		if_link->supported_caps |= QED_LM_100000baseKR4_Full_BIT;
+
+	/* Fill link advertised capability*/
+	qed_fill_link_capability(hwfn, ptt, params.speed.advertised_speeds,
+				 &if_link->advertised_caps);
+	/* Fill link supported capability*/
+	qed_fill_link_capability(hwfn, ptt, link_caps.speed_capabilities,
+				 &if_link->supported_caps);
 
 	if (link.link_up)
 		if_link->speed = link.speed;
 
 	/* TODO - fill duplex properly */
 	if_link->duplex = DUPLEX_FULL;
-	qed_mcp_get_media_type(hwfn->cdev, &media_type);
+	qed_mcp_get_media_type(hwfn, ptt, &media_type);
 	if_link->port = qed_get_port_type(media_type);
 
 	if_link->autoneg = params.speed.autoneg;
@@ -1562,9 +1687,8 @@ static void qed_fill_link(struct qed_hwfn *hwfn,
 		if_link->pause_config |= QED_LINK_PAUSE_TX_ENABLE;
 
 	/* Link partner capabilities */
-	if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_1G_HD)
-		if_link->lp_caps |= QED_LM_1000baseT_Half_BIT;
-	if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_1G_FD)
+	if (link.partner_adv_speed &
+	    QED_LINK_PARTNER_SPEED_1G_FD)
 		if_link->lp_caps |= QED_LM_1000baseT_Full_BIT;
 	if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_10G)
 		if_link->lp_caps |= QED_LM_10000baseKR_Full_BIT;
@@ -1607,21 +1731,34 @@ static void qed_fill_link(struct qed_hwfn *hwfn,
 static void qed_get_current_link(struct qed_dev *cdev,
 				 struct qed_link_output *if_link)
 {
+	struct qed_hwfn *hwfn;
+	struct qed_ptt *ptt;
 	int i;
 
-	qed_fill_link(&cdev->hwfns[0], if_link);
+	hwfn = &cdev->hwfns[0];
+	if (IS_PF(cdev)) {
+		ptt = qed_ptt_acquire(hwfn);
+		if (ptt) {
+			qed_fill_link(hwfn, ptt, if_link);
+			qed_ptt_release(hwfn, ptt);
+		} else {
+			DP_NOTICE(hwfn, "Failed to fill link; No PTT\n");
+		}
+	} else {
+		qed_fill_link(hwfn, NULL, if_link);
+	}
 
 	for_each_hwfn(cdev, i)
 		qed_inform_vf_link_state(&cdev->hwfns[i]);
 }
 
-void qed_link_update(struct qed_hwfn *hwfn)
+void qed_link_update(struct qed_hwfn *hwfn, struct qed_ptt *ptt)
 {
 	void *cookie = hwfn->cdev->ops_cookie;
 	struct qed_common_cb_ops *op = hwfn->cdev->protocol_ops.common;
 	struct qed_link_output if_link;
 
-	qed_fill_link(hwfn, &if_link);
+	qed_fill_link(hwfn, ptt, &if_link);
 	qed_inform_vf_link_state(hwfn);
 
 	if (IS_LEAD_HWFN(hwfn) && cookie)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index b06e4cb72c6c..386ee5410237 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -1447,7 +1447,7 @@ static void qed_mcp_handle_link_change(struct qed_hwfn *p_hwfn,
 	if (p_hwfn->mcp_info->capabilities & FW_MB_PARAM_FEATURE_SUPPORT_EEE)
 		qed_mcp_read_eee_config(p_hwfn, p_ptt, p_link);
 
-	qed_link_update(p_hwfn);
+	qed_link_update(p_hwfn, p_ptt);
 out:
 	spin_unlock_bh(&p_hwfn->mcp_info->link_lock);
 }
@@ -1867,12 +1867,12 @@ int qed_mcp_get_mbi_ver(struct qed_hwfn *p_hwfn,
 	return 0;
 }
 
-int qed_mcp_get_media_type(struct qed_dev *cdev, u32 *p_media_type)
+int qed_mcp_get_media_type(struct qed_hwfn *p_hwfn,
+			   struct qed_ptt *p_ptt, u32 *p_media_type)
 {
-	struct qed_hwfn *p_hwfn = &cdev->hwfns[0];
-	struct qed_ptt  *p_ptt;
+	*p_media_type = MEDIA_UNSPECIFIED;
 
-	if (IS_VF(cdev))
+	if (IS_VF(p_hwfn->cdev))
 		return -EINVAL;
 
 	if (!qed_mcp_is_init(p_hwfn)) {
@@ -1880,16 +1880,195 @@ int qed_mcp_get_media_type(struct qed_dev *cdev, u32 *p_media_type)
 		return -EBUSY;
 	}
 
-	*p_media_type = MEDIA_UNSPECIFIED;
+	if (!p_ptt) {
+		*p_media_type = MEDIA_UNSPECIFIED;
+		return -EINVAL;
+	}
 
-	p_ptt = qed_ptt_acquire(p_hwfn);
-	if (!p_ptt)
+	*p_media_type = qed_rd(p_hwfn, p_ptt,
+			       p_hwfn->mcp_info->port_addr +
+			       offsetof(struct public_port,
+					media_type));
+
+	return 0;
+}
+
+int qed_mcp_get_transceiver_data(struct qed_hwfn *p_hwfn,
+				 struct qed_ptt *p_ptt,
+				 u32 *p_transceiver_state,
+				 u32 *p_transceiver_type)
+{
+	u32 transceiver_info;
+
+	if (IS_VF(p_hwfn->cdev))
+		return -EINVAL;
+
+	if (!qed_mcp_is_init(p_hwfn)) {
+		DP_NOTICE(p_hwfn, "MFW is not initialized!\n");
 		return -EBUSY;
+	}
 
-	*p_media_type = qed_rd(p_hwfn, p_ptt, p_hwfn->mcp_info->port_addr +
-			       offsetof(struct public_port, media_type));
+	*p_transceiver_type = ETH_TRANSCEIVER_TYPE_NONE;
+	*p_transceiver_state = ETH_TRANSCEIVER_STATE_UPDATING;
 
-	qed_ptt_release(p_hwfn, p_ptt);
+	transceiver_info = qed_rd(p_hwfn, p_ptt,
+				  p_hwfn->mcp_info->port_addr +
+				  offsetof(struct public_port,
+					   transceiver_data));
+
+	*p_transceiver_state = (transceiver_info &
+				ETH_TRANSCEIVER_STATE_MASK) >>
+				ETH_TRANSCEIVER_STATE_OFFSET;
+
+	if (*p_transceiver_state == ETH_TRANSCEIVER_STATE_PRESENT)
+		*p_transceiver_type = (transceiver_info &
+				       ETH_TRANSCEIVER_TYPE_MASK) >>
+				       ETH_TRANSCEIVER_TYPE_OFFSET;
+	else
+		*p_transceiver_type = ETH_TRANSCEIVER_TYPE_UNKNOWN;
+
+	return 0;
+}
+static bool qed_is_transceiver_ready(u32 transceiver_state,
+				     u32 transceiver_type)
+{
+	if ((transceiver_state & ETH_TRANSCEIVER_STATE_PRESENT) &&
+	    ((transceiver_state & ETH_TRANSCEIVER_STATE_UPDATING) == 0x0) &&
+	    (transceiver_type != ETH_TRANSCEIVER_TYPE_NONE))
+		return true;
+
+	return false;
+}
+
+int qed_mcp_trans_speed_mask(struct qed_hwfn *p_hwfn,
+			     struct qed_ptt *p_ptt, u32 *p_speed_mask)
+{
+	u32 transceiver_type, transceiver_state;
+
+	qed_mcp_get_transceiver_data(p_hwfn, p_ptt, &transceiver_state,
+				     &transceiver_type);
+
+	if (qed_is_transceiver_ready(transceiver_state, transceiver_type) ==
+				     false)
+		return -EINVAL;
+
+	switch (transceiver_type) {
+	case ETH_TRANSCEIVER_TYPE_1G_LX:
+	case ETH_TRANSCEIVER_TYPE_1G_SX:
+	case ETH_TRANSCEIVER_TYPE_1G_PCC:
+	case ETH_TRANSCEIVER_TYPE_1G_ACC:
+	case ETH_TRANSCEIVER_TYPE_1000BASET:
+		*p_speed_mask = NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G;
+		break;
+	case ETH_TRANSCEIVER_TYPE_10G_SR:
+	case ETH_TRANSCEIVER_TYPE_10G_LR:
+	case ETH_TRANSCEIVER_TYPE_10G_LRM:
+	case ETH_TRANSCEIVER_TYPE_10G_ER:
+	case ETH_TRANSCEIVER_TYPE_10G_PCC:
+	case ETH_TRANSCEIVER_TYPE_10G_ACC:
+	case ETH_TRANSCEIVER_TYPE_4x10G:
+		*p_speed_mask = NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G;
+		break;
+	case ETH_TRANSCEIVER_TYPE_40G_LR4:
+	case ETH_TRANSCEIVER_TYPE_40G_SR4:
+	case ETH_TRANSCEIVER_TYPE_MULTI_RATE_10G_40G_SR:
+	case ETH_TRANSCEIVER_TYPE_MULTI_RATE_10G_40G_LR:
+		*p_speed_mask = NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G;
+		break;
+	case ETH_TRANSCEIVER_TYPE_100G_AOC:
+	case ETH_TRANSCEIVER_TYPE_100G_SR4:
+	case ETH_TRANSCEIVER_TYPE_100G_LR4:
+	case ETH_TRANSCEIVER_TYPE_100G_ER4:
+	case ETH_TRANSCEIVER_TYPE_100G_ACC:
+		*p_speed_mask =
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G;
+		break;
+	case ETH_TRANSCEIVER_TYPE_25G_SR:
+	case ETH_TRANSCEIVER_TYPE_25G_LR:
+	case ETH_TRANSCEIVER_TYPE_25G_AOC:
+	case ETH_TRANSCEIVER_TYPE_25G_ACC_S:
+	case ETH_TRANSCEIVER_TYPE_25G_ACC_M:
+	case ETH_TRANSCEIVER_TYPE_25G_ACC_L:
+		*p_speed_mask = NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G;
+		break;
+	case ETH_TRANSCEIVER_TYPE_25G_CA_N:
+	case ETH_TRANSCEIVER_TYPE_25G_CA_S:
+	case ETH_TRANSCEIVER_TYPE_25G_CA_L:
+	case ETH_TRANSCEIVER_TYPE_4x25G_CR:
+		*p_speed_mask = NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G;
+		break;
+	case ETH_TRANSCEIVER_TYPE_40G_CR4:
+	case ETH_TRANSCEIVER_TYPE_MULTI_RATE_10G_40G_CR:
+		*p_speed_mask = NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G;
+		break;
+	case ETH_TRANSCEIVER_TYPE_100G_CR4:
+	case ETH_TRANSCEIVER_TYPE_MULTI_RATE_40G_100G_CR:
+		*p_speed_mask =
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_20G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G;
+		break;
+	case ETH_TRANSCEIVER_TYPE_MULTI_RATE_40G_100G_SR:
+	case ETH_TRANSCEIVER_TYPE_MULTI_RATE_40G_100G_LR:
+	case ETH_TRANSCEIVER_TYPE_MULTI_RATE_40G_100G_AOC:
+		*p_speed_mask =
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G;
+		break;
+	case ETH_TRANSCEIVER_TYPE_XLPPI:
+		*p_speed_mask = NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G;
+		break;
+	case ETH_TRANSCEIVER_TYPE_10G_BASET:
+		*p_speed_mask = NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G;
+		break;
+	default:
+		DP_INFO(p_hwfn, "Unknown transceiver type 0x%x\n",
+			transceiver_type);
+		*p_speed_mask = 0xff;
+		break;
+	}
+
+	return 0;
+}
+
+int qed_mcp_get_board_config(struct qed_hwfn *p_hwfn,
+			     struct qed_ptt *p_ptt, u32 *p_board_config)
+{
+	u32 nvm_cfg_addr, nvm_cfg1_offset, port_cfg_addr;
+
+	if (IS_VF(p_hwfn->cdev))
+		return -EINVAL;
+
+	if (!qed_mcp_is_init(p_hwfn)) {
+		DP_NOTICE(p_hwfn, "MFW is not initialized!\n");
+		return -EBUSY;
+	}
+	if (!p_ptt) {
+		*p_board_config = NVM_CFG1_PORT_PORT_TYPE_UNDEFINED;
+		return -EINVAL;
+	}
+
+	nvm_cfg_addr = qed_rd(p_hwfn, p_ptt, MISC_REG_GEN_PURP_CR0);
+	nvm_cfg1_offset = qed_rd(p_hwfn, p_ptt, nvm_cfg_addr + 4);
+	port_cfg_addr = MCP_REG_SCRATCH + nvm_cfg1_offset +
+			offsetof(struct nvm_cfg1, port[MFW_PORT(p_hwfn)]);
+	*p_board_config = qed_rd(p_hwfn, p_ptt,
+				 port_cfg_addr +
+				 offsetof(struct nvm_cfg1_port,
+					  board_cfg));
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
index 85e6b3989e7a..1adfe52b3905 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
@@ -322,14 +322,61 @@ int qed_mcp_get_mbi_ver(struct qed_hwfn *p_hwfn,
  * @brief Get media type value of the port.
  *
  * @param cdev      - qed dev pointer
+ * @param p_ptt
  * @param mfw_ver    - media type value
  *
  * @return int -
  *      0 - Operation was successul.
  *      -EBUSY - Operation failed
  */
-int qed_mcp_get_media_type(struct qed_dev      *cdev,
-			   u32                  *media_type);
+int qed_mcp_get_media_type(struct qed_hwfn *p_hwfn,
+			   struct qed_ptt *p_ptt, u32 *media_type);
+
+/**
+ * @brief Get transceiver data of the port.
+ *
+ * @param cdev      - qed dev pointer
+ * @param p_ptt
+ * @param p_transceiver_state - transceiver state.
+ * @param p_transceiver_type - media type value
+ *
+ * @return int -
+ *      0 - Operation was successful.
+ *      -EBUSY - Operation failed
+ */
+int qed_mcp_get_transceiver_data(struct qed_hwfn *p_hwfn,
+				 struct qed_ptt *p_ptt,
+				 u32 *p_transceiver_state,
+				 u32 *p_tranceiver_type);
+
+/**
+ * @brief Get transceiver supported speed mask.
+ *
+ * @param cdev      - qed dev pointer
+ * @param p_ptt
+ * @param p_speed_mask - Bit mask of all supported speeds.
+ *
+ * @return int -
+ *      0 - Operation was successful.
+ *      -EBUSY - Operation failed
+ */
+
+int qed_mcp_trans_speed_mask(struct qed_hwfn *p_hwfn,
+			     struct qed_ptt *p_ptt, u32 *p_speed_mask);
+
+/**
+ * @brief Get board configuration.
+ *
+ * @param cdev      - qed dev pointer
+ * @param p_ptt
+ * @param p_board_config - Board config.
+ *
+ * @return int -
+ *      0 - Operation was successful.
+ *      -EBUSY - Operation failed
+ */
+int qed_mcp_get_board_config(struct qed_hwfn *p_hwfn,
+			     struct qed_ptt *p_ptt, u32 *p_board_config);
 
 /**
  * @brief General function for sending commands to the MCP
diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c
index be118d057b92..b6cccf44bf40 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_vf.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c
@@ -1688,7 +1688,7 @@ static void qed_handle_bulletin_change(struct qed_hwfn *hwfn)
 	ops->ports_update(cookie, vxlan_port, geneve_port);
 
 	/* Always update link configuration according to bulletin */
-	qed_link_update(hwfn);
+	qed_link_update(hwfn, NULL);
 }
 
 void qed_iov_vf_task(struct work_struct *work)
diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
index 7ff50b4488f6..8cbbd628fd73 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
@@ -413,19 +413,42 @@ struct qede_link_mode_mapping {
 };
 
 static const struct qede_link_mode_mapping qed_lm_map[] = {
-	{QED_LM_FIBRE_BIT, ETHTOOL_LINK_MODE_FIBRE_BIT},
 	{QED_LM_Autoneg_BIT, ETHTOOL_LINK_MODE_Autoneg_BIT},
 	{QED_LM_Asym_Pause_BIT, ETHTOOL_LINK_MODE_Asym_Pause_BIT},
 	{QED_LM_Pause_BIT, ETHTOOL_LINK_MODE_Pause_BIT},
-	{QED_LM_1000baseT_Half_BIT, ETHTOOL_LINK_MODE_1000baseT_Half_BIT},
 	{QED_LM_1000baseT_Full_BIT, ETHTOOL_LINK_MODE_1000baseT_Full_BIT},
+	{QED_LM_10000baseT_Full_BIT, ETHTOOL_LINK_MODE_10000baseT_Full_BIT},
+	{QED_LM_2500baseX_Full_BIT, ETHTOOL_LINK_MODE_2500baseX_Full_BIT},
+	{QED_LM_Backplane_BIT, ETHTOOL_LINK_MODE_Backplane_BIT},
+	{QED_LM_1000baseKX_Full_BIT, ETHTOOL_LINK_MODE_1000baseKX_Full_BIT},
+	{QED_LM_10000baseKX4_Full_BIT, ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT},
 	{QED_LM_10000baseKR_Full_BIT, ETHTOOL_LINK_MODE_10000baseKR_Full_BIT},
+	{QED_LM_10000baseKR_Full_BIT, ETHTOOL_LINK_MODE_10000baseKR_Full_BIT},
+	{QED_LM_10000baseR_FEC_BIT, ETHTOOL_LINK_MODE_10000baseR_FEC_BIT},
 	{QED_LM_20000baseKR2_Full_BIT, ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT},
-	{QED_LM_25000baseKR_Full_BIT, ETHTOOL_LINK_MODE_25000baseKR_Full_BIT},
+	{QED_LM_40000baseKR4_Full_BIT, ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT},
+	{QED_LM_40000baseCR4_Full_BIT, ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT},
+	{QED_LM_40000baseSR4_Full_BIT, ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT},
 	{QED_LM_40000baseLR4_Full_BIT, ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT},
+	{QED_LM_25000baseCR_Full_BIT, ETHTOOL_LINK_MODE_25000baseCR_Full_BIT},
+	{QED_LM_25000baseKR_Full_BIT, ETHTOOL_LINK_MODE_25000baseKR_Full_BIT},
+	{QED_LM_25000baseSR_Full_BIT, ETHTOOL_LINK_MODE_25000baseSR_Full_BIT},
+	{QED_LM_50000baseCR2_Full_BIT, ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT},
 	{QED_LM_50000baseKR2_Full_BIT, ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT},
 	{QED_LM_100000baseKR4_Full_BIT,
-	 ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT},
+		ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT},
+	{QED_LM_100000baseSR4_Full_BIT,
+		ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT},
+	{QED_LM_100000baseCR4_Full_BIT,
+		ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT},
+	{QED_LM_100000baseLR4_ER4_Full_BIT,
+		ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT},
+	{QED_LM_50000baseSR2_Full_BIT, ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT},
+	{QED_LM_1000baseX_Full_BIT, ETHTOOL_LINK_MODE_1000baseX_Full_BIT},
+	{QED_LM_10000baseCR_Full_BIT, ETHTOOL_LINK_MODE_10000baseCR_Full_BIT},
+	{QED_LM_10000baseSR_Full_BIT, ETHTOOL_LINK_MODE_10000baseSR_Full_BIT},
+	{QED_LM_10000baseLR_Full_BIT, ETHTOOL_LINK_MODE_10000baseLR_Full_BIT},
+	{QED_LM_10000baseLRM_Full_BIT, ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT},
 };
 
 #define QEDE_DRV_TO_ETHTOOL_CAPS(caps, lk_ksettings, name)	\
@@ -495,6 +518,7 @@ static int qede_set_link_ksettings(struct net_device *dev,
 	struct qede_dev *edev = netdev_priv(dev);
 	struct qed_link_output current_link;
 	struct qed_link_params params;
+	u32 sup_caps;
 
 	if (!edev->ops || !edev->ops->common->can_link_change(edev->cdev)) {
 		DP_INFO(edev, "Link settings are not allowed to be changed\n");
@@ -521,60 +545,85 @@ static int qede_set_link_ksettings(struct net_device *dev,
 		params.forced_speed = base->speed;
 		switch (base->speed) {
 		case SPEED_1000:
-			if (!(current_link.supported_caps &
-			      QED_LM_1000baseT_Full_BIT)) {
+			sup_caps = QED_LM_1000baseT_Full_BIT |
+					QED_LM_1000baseKX_Full_BIT |
+					QED_LM_1000baseX_Full_BIT;
+			if (!(current_link.supported_caps & sup_caps)) {
 				DP_INFO(edev, "1G speed not supported\n");
 				return -EINVAL;
 			}
-			params.adv_speeds = QED_LM_1000baseT_Full_BIT;
+			params.adv_speeds = current_link.supported_caps &
+						sup_caps;
 			break;
 		case SPEED_10000:
-			if (!(current_link.supported_caps &
-			      QED_LM_10000baseKR_Full_BIT)) {
+			sup_caps = QED_LM_10000baseT_Full_BIT |
+					QED_LM_10000baseKR_Full_BIT |
+					QED_LM_10000baseKX4_Full_BIT |
+					QED_LM_10000baseR_FEC_BIT |
+					QED_LM_10000baseCR_Full_BIT |
+					QED_LM_10000baseSR_Full_BIT |
+					QED_LM_10000baseLR_Full_BIT |
+					QED_LM_10000baseLRM_Full_BIT;
+			if (!(current_link.supported_caps & sup_caps)) {
 				DP_INFO(edev, "10G speed not supported\n");
 				return -EINVAL;
 			}
-			params.adv_speeds = QED_LM_10000baseKR_Full_BIT;
+			params.adv_speeds = current_link.supported_caps &
+						sup_caps;
 			break;
 		case SPEED_20000:
 			if (!(current_link.supported_caps &
-			      QED_LM_20000baseKR2_Full_BIT)) {
+			    QED_LM_20000baseKR2_Full_BIT)) {
 				DP_INFO(edev, "20G speed not supported\n");
 				return -EINVAL;
 			}
 			params.adv_speeds = QED_LM_20000baseKR2_Full_BIT;
 			break;
 		case SPEED_25000:
-			if (!(current_link.supported_caps &
-			      QED_LM_25000baseKR_Full_BIT)) {
+			sup_caps = QED_LM_25000baseKR_Full_BIT |
+					QED_LM_25000baseCR_Full_BIT |
+					QED_LM_25000baseSR_Full_BIT;
+			if (!(current_link.supported_caps & sup_caps)) {
 				DP_INFO(edev, "25G speed not supported\n");
 				return -EINVAL;
 			}
-			params.adv_speeds = QED_LM_25000baseKR_Full_BIT;
+			params.adv_speeds = current_link.supported_caps &
+						sup_caps;
 			break;
 		case SPEED_40000:
-			if (!(current_link.supported_caps &
-			      QED_LM_40000baseLR4_Full_BIT)) {
+			sup_caps = QED_LM_40000baseLR4_Full_BIT |
+					QED_LM_40000baseKR4_Full_BIT |
+					QED_LM_40000baseCR4_Full_BIT |
+					QED_LM_40000baseSR4_Full_BIT;
+			if (!(current_link.supported_caps & sup_caps)) {
 				DP_INFO(edev, "40G speed not supported\n");
 				return -EINVAL;
 			}
-			params.adv_speeds = QED_LM_40000baseLR4_Full_BIT;
+			params.adv_speeds = current_link.supported_caps &
+						sup_caps;
 			break;
 		case SPEED_50000:
-			if (!(current_link.supported_caps &
-			      QED_LM_50000baseKR2_Full_BIT)) {
+			sup_caps = QED_LM_50000baseKR2_Full_BIT |
+					QED_LM_50000baseCR2_Full_BIT |
+					QED_LM_50000baseSR2_Full_BIT;
+			if (!(current_link.supported_caps & sup_caps)) {
 				DP_INFO(edev, "50G speed not supported\n");
 				return -EINVAL;
 			}
-			params.adv_speeds = QED_LM_50000baseKR2_Full_BIT;
+			params.adv_speeds = current_link.supported_caps &
+						sup_caps;
 			break;
 		case SPEED_100000:
-			if (!(current_link.supported_caps &
-			      QED_LM_100000baseKR4_Full_BIT)) {
+			sup_caps = QED_LM_100000baseKR4_Full_BIT |
+					QED_LM_100000baseSR4_Full_BIT |
+					QED_LM_100000baseCR4_Full_BIT |
+					QED_LM_100000baseLR4_ER4_Full_BIT;
+			if (!(current_link.supported_caps & sup_caps)) {
 				DP_INFO(edev, "100G speed not supported\n");
 				return -EINVAL;
 			}
-			params.adv_speeds = QED_LM_100000baseKR4_Full_BIT;
+			params.adv_speeds = current_link.supported_caps &
+						sup_caps;
 			break;
 		default:
 			DP_INFO(edev, "Unsupported speed %u\n", base->speed);
diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c
index b48f76182049..10b075bc5959 100644
--- a/drivers/net/ethernet/qlogic/qla3xxx.c
+++ b/drivers/net/ethernet/qlogic/qla3xxx.c
@@ -380,8 +380,6 @@ static void fm93c56a_select(struct ql3_adapter *qdev)
 
 	qdev->eeprom_cmd_data = AUBURN_EEPROM_CS_1;
 	ql_write_nvram_reg(qdev, spir, ISP_NVRAM_MASK | qdev->eeprom_cmd_data);
-	ql_write_nvram_reg(qdev, spir,
-			   ((ISP_NVRAM_MASK << 16) | qdev->eeprom_cmd_data));
 }
 
 /*
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 8c4f49adcf9e..006b0aa8cec3 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -631,7 +631,6 @@ struct rtl8169_tc_offsets {
 
 enum rtl_flag {
 	RTL_FLAG_TASK_ENABLED = 0,
-	RTL_FLAG_TASK_SLOW_PENDING,
 	RTL_FLAG_TASK_RESET_PENDING,
 	RTL_FLAG_MAX
 };
@@ -5852,6 +5851,7 @@ static void rtl8169_tx_clear(struct rtl8169_private *tp)
 {
 	rtl8169_tx_clear_range(tp, tp->dirty_tx, NUM_TX_DESC);
 	tp->cur_tx = tp->dirty_tx = 0;
+	netdev_reset_queue(tp->dev);
 }
 
 static void rtl_reset_work(struct rtl8169_private *tp)
@@ -6154,6 +6154,8 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb,
 
 	txd->opts2 = cpu_to_le32(opts[1]);
 
+	netdev_sent_queue(dev, skb->len);
+
 	skb_tx_timestamp(skb);
 
 	/* Force memory writes to complete before releasing descriptor */
@@ -6252,7 +6254,7 @@ static void rtl8169_pcierr_interrupt(struct net_device *dev)
 
 static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp)
 {
-	unsigned int dirty_tx, tx_left;
+	unsigned int dirty_tx, tx_left, bytes_compl = 0, pkts_compl = 0;
 
 	dirty_tx = tp->dirty_tx;
 	smp_rmb();
@@ -6276,10 +6278,8 @@ static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp)
 		rtl8169_unmap_tx_skb(tp_to_dev(tp), tx_skb,
 				     tp->TxDescArray + entry);
 		if (status & LastFrag) {
-			u64_stats_update_begin(&tp->tx_stats.syncp);
-			tp->tx_stats.packets++;
-			tp->tx_stats.bytes += tx_skb->skb->len;
-			u64_stats_update_end(&tp->tx_stats.syncp);
+			pkts_compl++;
+			bytes_compl += tx_skb->skb->len;
 			dev_consume_skb_any(tx_skb->skb);
 			tx_skb->skb = NULL;
 		}
@@ -6288,6 +6288,13 @@ static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp)
 	}
 
 	if (tp->dirty_tx != dirty_tx) {
+		netdev_completed_queue(dev, pkts_compl, bytes_compl);
+
+		u64_stats_update_begin(&tp->tx_stats.syncp);
+		tp->tx_stats.packets += pkts_compl;
+		tp->tx_stats.bytes += bytes_compl;
+		u64_stats_update_end(&tp->tx_stats.syncp);
+
 		tp->dirty_tx = dirty_tx;
 		/* Sync with rtl8169_start_xmit:
 		 * - publish dirty_tx ring index (write barrier)
@@ -6452,42 +6459,29 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
 	if (status == 0xffff || !(status & (RTL_EVENT_NAPI | tp->event_slow)))
 		return IRQ_NONE;
 
-	rtl_irq_disable(tp);
-	napi_schedule_irqoff(&tp->napi);
-
-	return IRQ_HANDLED;
-}
-
-/*
- * Workqueue context.
- */
-static void rtl_slow_event_work(struct rtl8169_private *tp)
-{
-	struct net_device *dev = tp->dev;
-	u16 status;
+	if (unlikely(status & SYSErr)) {
+		rtl8169_pcierr_interrupt(tp->dev);
+		goto out;
+	}
 
-	status = rtl_get_events(tp) & tp->event_slow;
-	rtl_ack_events(tp, status);
+	if (status & LinkChg)
+		phy_mac_interrupt(tp->dev->phydev);
 
-	if (unlikely(status & RxFIFOOver)) {
-		switch (tp->mac_version) {
-		/* Work around for rx fifo overflow */
-		case RTL_GIGA_MAC_VER_11:
-			netif_stop_queue(dev);
-			/* XXX - Hack alert. See rtl_task(). */
-			set_bit(RTL_FLAG_TASK_RESET_PENDING, tp->wk.flags);
-		default:
-			break;
-		}
+	if (unlikely(status & RxFIFOOver &&
+	    tp->mac_version == RTL_GIGA_MAC_VER_11)) {
+		netif_stop_queue(tp->dev);
+		/* XXX - Hack alert. See rtl_task(). */
+		set_bit(RTL_FLAG_TASK_RESET_PENDING, tp->wk.flags);
 	}
 
-	if (unlikely(status & SYSErr))
-		rtl8169_pcierr_interrupt(dev);
-
-	if (status & LinkChg)
-		phy_mac_interrupt(dev->phydev);
+	if (status & RTL_EVENT_NAPI) {
+		rtl_irq_disable(tp);
+		napi_schedule_irqoff(&tp->napi);
+	}
+out:
+	rtl_ack_events(tp, status);
 
-	rtl_irq_enable_all(tp);
+	return IRQ_HANDLED;
 }
 
 static void rtl_task(struct work_struct *work)
@@ -6496,8 +6490,6 @@ static void rtl_task(struct work_struct *work)
 		int bitnr;
 		void (*action)(struct rtl8169_private *);
 	} rtl_work[] = {
-		/* XXX - keep rtl_slow_event_work() as first element. */
-		{ RTL_FLAG_TASK_SLOW_PENDING,	rtl_slow_event_work },
 		{ RTL_FLAG_TASK_RESET_PENDING,	rtl_reset_work },
 	};
 	struct rtl8169_private *tp =
@@ -6527,29 +6519,16 @@ static int rtl8169_poll(struct napi_struct *napi, int budget)
 {
 	struct rtl8169_private *tp = container_of(napi, struct rtl8169_private, napi);
 	struct net_device *dev = tp->dev;
-	u16 enable_mask = RTL_EVENT_NAPI | tp->event_slow;
-	int work_done= 0;
-	u16 status;
-
-	status = rtl_get_events(tp);
-	rtl_ack_events(tp, status & ~tp->event_slow);
+	int work_done;
 
-	if (status & RTL_EVENT_NAPI_RX)
-		work_done = rtl_rx(dev, tp, (u32) budget);
+	work_done = rtl_rx(dev, tp, (u32) budget);
 
-	if (status & RTL_EVENT_NAPI_TX)
-		rtl_tx(dev, tp);
-
-	if (status & tp->event_slow) {
-		enable_mask &= ~tp->event_slow;
-
-		rtl_schedule_task(tp, RTL_FLAG_TASK_SLOW_PENDING);
-	}
+	rtl_tx(dev, tp);
 
 	if (work_done < budget) {
 		napi_complete_done(napi, work_done);
 
-		rtl_irq_enable(tp, enable_mask);
+		rtl_irq_enable_all(tp);
 		mmiowb();
 	}
 
@@ -7071,20 +7050,12 @@ static int rtl_alloc_irq(struct rtl8169_private *tp)
 {
 	unsigned int flags;
 
-	switch (tp->mac_version) {
-	case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_06:
+	if (tp->mac_version <= RTL_GIGA_MAC_VER_06) {
 		RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
 		RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~MSIEnable);
 		RTL_W8(tp, Cfg9346, Cfg9346_Lock);
 		flags = PCI_IRQ_LEGACY;
-		break;
-	case RTL_GIGA_MAC_VER_39 ... RTL_GIGA_MAC_VER_40:
-		/* This version was reported to have issues with resume
-		 * from suspend when using MSI-X
-		 */
-		flags = PCI_IRQ_LEGACY | PCI_IRQ_MSI;
-		break;
-	default:
+	} else {
 		flags = PCI_IRQ_ALL_TYPES;
 	}
 
diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c
index aeafdb9ac015..beb06628f22d 100644
--- a/drivers/net/ethernet/rocker/rocker_main.c
+++ b/drivers/net/ethernet/rocker/rocker_main.c
@@ -371,7 +371,7 @@ static void rocker_desc_cookie_ptr_set(const struct rocker_desc_info *desc_info,
 static struct rocker_desc_info *
 rocker_desc_head_get(const struct rocker_dma_ring_info *info)
 {
-	static struct rocker_desc_info *desc_info;
+	struct rocker_desc_info *desc_info;
 	u32 head = __pos_inc(info->head, info->size);
 
 	desc_info = &info->desc_info[info->head];
@@ -402,7 +402,7 @@ static void rocker_desc_head_set(const struct rocker *rocker,
 static struct rocker_desc_info *
 rocker_desc_tail_get(struct rocker_dma_ring_info *info)
 {
-	static struct rocker_desc_info *desc_info;
+	struct rocker_desc_info *desc_info;
 
 	if (info->tail == info->head)
 		return NULL; /* nothing to be done between head and tail */
@@ -2728,6 +2728,7 @@ rocker_fdb_offload_notify(struct rocker_port *rocker_port,
 
 	info.addr = recv_info->addr;
 	info.vid = recv_info->vid;
+	info.offloaded = true;
 	call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED,
 				 rocker_port->dev, &info.info);
 }
diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c
index 8d6cff8bd162..4823b6a51134 100644
--- a/drivers/net/ethernet/smsc/smc91x.c
+++ b/drivers/net/ethernet/smsc/smc91x.c
@@ -2447,8 +2447,7 @@ static int smc_drv_remove(struct platform_device *pdev)
 
 static int smc_drv_suspend(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct net_device *ndev = dev_get_drvdata(dev);
 
 	if (ndev) {
 		if (netif_running(ndev)) {
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 226be2a56c1f..6cf4d4cb152e 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -3669,8 +3669,7 @@ static int cpsw_remove(struct platform_device *pdev)
 #ifdef CONFIG_PM_SLEEP
 static int cpsw_suspend(struct device *dev)
 {
-	struct platform_device	*pdev = to_platform_device(dev);
-	struct net_device	*ndev = platform_get_drvdata(pdev);
+	struct net_device	*ndev = dev_get_drvdata(dev);
 	struct cpsw_common	*cpsw = ndev_to_cpsw(ndev);
 
 	if (cpsw->data.dual_emac) {
@@ -3693,8 +3692,7 @@ static int cpsw_suspend(struct device *dev)
 
 static int cpsw_resume(struct device *dev)
 {
-	struct platform_device	*pdev = to_platform_device(dev);
-	struct net_device	*ndev = platform_get_drvdata(pdev);
+	struct net_device	*ndev = dev_get_drvdata(dev);
 	struct cpsw_common	*cpsw = ndev_to_cpsw(ndev);
 
 	/* Select default pin state */
diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c
index f270beebb428..9153db120352 100644
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c
@@ -2002,8 +2002,7 @@ static int davinci_emac_remove(struct platform_device *pdev)
 
 static int davinci_emac_suspend(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct net_device *ndev = dev_get_drvdata(dev);
 
 	if (netif_running(ndev))
 		emac_dev_stop(ndev);
@@ -2013,8 +2012,7 @@ static int davinci_emac_suspend(struct device *dev)
 
 static int davinci_emac_resume(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct net_device *ndev = dev_get_drvdata(dev);
 
 	if (netif_running(ndev))
 		emac_dev_open(ndev);
diff --git a/drivers/net/ethernet/wiznet/w5300.c b/drivers/net/ethernet/wiznet/w5300.c
index 80fdbff67d82..f9da5d6172e3 100644
--- a/drivers/net/ethernet/wiznet/w5300.c
+++ b/drivers/net/ethernet/wiznet/w5300.c
@@ -661,8 +661,7 @@ static int w5300_remove(struct platform_device *pdev)
 #ifdef CONFIG_PM_SLEEP
 static int w5300_suspend(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct net_device *ndev = dev_get_drvdata(dev);
 	struct w5300_priv *priv = netdev_priv(ndev);
 
 	if (netif_running(ndev)) {
@@ -676,8 +675,7 @@ static int w5300_suspend(struct device *dev)
 
 static int w5300_resume(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct net_device *ndev = dev_get_drvdata(dev);
 	struct w5300_priv *priv = netdev_priv(ndev);
 
 	if (!netif_running(ndev)) {
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 82eccc930c5c..a0cd1c41cf5f 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -831,12 +831,8 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
 	if (IS_ERR(rt))
 		return PTR_ERR(rt);
 
-	if (skb_dst(skb)) {
-		int mtu = dst_mtu(&rt->dst) - GENEVE_IPV4_HLEN -
-			  info->options_len;
-
-		skb_dst_update_pmtu(skb, mtu);
-	}
+	skb_tunnel_check_pmtu(skb, &rt->dst,
+			      GENEVE_IPV4_HLEN + info->options_len);
 
 	sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
 	if (geneve->collect_md) {
@@ -881,11 +877,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
 	if (IS_ERR(dst))
 		return PTR_ERR(dst);
 
-	if (skb_dst(skb)) {
-		int mtu = dst_mtu(dst) - GENEVE_IPV6_HLEN - info->options_len;
-
-		skb_dst_update_pmtu(skb, mtu);
-	}
+	skb_tunnel_check_pmtu(skb, dst, GENEVE_IPV6_HLEN + info->options_len);
 
 	sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
 	if (geneve->collect_md) {
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index a7207fa7e451..2df7f60fe052 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -69,6 +69,10 @@ static netdev_tx_t loopback_xmit(struct sk_buff *skb,
 	int len;
 
 	skb_tx_timestamp(skb);
+
+	/* do not fool net_timestamp_check() with various clock bases */
+	skb->tstamp = 0;
+
 	skb_orphan(skb);
 
 	/* Before queueing this packet to netif_rx(),
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index cfda146f3b3b..fc8d5f1ee1ad 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -1077,7 +1077,7 @@ static void macvlan_dev_netpoll_cleanup(struct net_device *dev)
 
 	vlan->netpoll = NULL;
 
-	__netpoll_free_async(netpoll);
+	__netpoll_free(netpoll);
 }
 #endif	/* CONFIG_NET_POLL_CONTROLLER */
 
diff --git a/drivers/net/phy/mdio-mux-bcm-iproc.c b/drivers/net/phy/mdio-mux-bcm-iproc.c
index c017486e9b86..696bdf1e4576 100644
--- a/drivers/net/phy/mdio-mux-bcm-iproc.c
+++ b/drivers/net/phy/mdio-mux-bcm-iproc.c
@@ -289,8 +289,7 @@ static int mdio_mux_iproc_remove(struct platform_device *pdev)
 #ifdef CONFIG_PM_SLEEP
 static int mdio_mux_iproc_suspend(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct iproc_mdiomux_desc *md = platform_get_drvdata(pdev);
+	struct iproc_mdiomux_desc *md = dev_get_drvdata(dev);
 
 	clk_disable_unprepare(md->core_clk);
 
@@ -299,8 +298,7 @@ static int mdio_mux_iproc_suspend(struct device *dev)
 
 static int mdio_mux_iproc_resume(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct iproc_mdiomux_desc *md = platform_get_drvdata(pdev);
+	struct iproc_mdiomux_desc *md = dev_get_drvdata(dev);
 
 	clk_prepare_enable(md->core_clk);
 	mdio_mux_iproc_config(md);
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 3db06b40580d..9265dea79412 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -14,7 +14,7 @@
  * option) any later version.
  *
  * Support : Micrel Phys:
- *		Giga phys: ksz9021, ksz9031
+ *		Giga phys: ksz9021, ksz9031, ksz9131
  *		100/10 Phys : ksz8001, ksz8721, ksz8737, ksz8041
  *			   ksz8021, ksz8031, ksz8051,
  *			   ksz8081, ksz8091,
@@ -609,6 +609,116 @@ err_force_master:
 	return result;
 }
 
+#define KSZ9131_SKEW_5BIT_MAX	2400
+#define KSZ9131_SKEW_4BIT_MAX	800
+#define KSZ9131_OFFSET		700
+#define KSZ9131_STEP		100
+
+static int ksz9131_of_load_skew_values(struct phy_device *phydev,
+				       struct device_node *of_node,
+				       u16 reg, size_t field_sz,
+				       char *field[], u8 numfields)
+{
+	int val[4] = {-(1 + KSZ9131_OFFSET), -(2 + KSZ9131_OFFSET),
+		      -(3 + KSZ9131_OFFSET), -(4 + KSZ9131_OFFSET)};
+	int skewval, skewmax = 0;
+	int matches = 0;
+	u16 maxval;
+	u16 newval;
+	u16 mask;
+	int i;
+
+	/* psec properties in dts should mean x pico seconds */
+	if (field_sz == 5)
+		skewmax = KSZ9131_SKEW_5BIT_MAX;
+	else
+		skewmax = KSZ9131_SKEW_4BIT_MAX;
+
+	for (i = 0; i < numfields; i++)
+		if (!of_property_read_s32(of_node, field[i], &skewval)) {
+			if (skewval < -KSZ9131_OFFSET)
+				skewval = -KSZ9131_OFFSET;
+			else if (skewval > skewmax)
+				skewval = skewmax;
+
+			val[i] = skewval + KSZ9131_OFFSET;
+			matches++;
+		}
+
+	if (!matches)
+		return 0;
+
+	if (matches < numfields)
+		newval = ksz9031_extended_read(phydev, OP_DATA, 2, reg);
+	else
+		newval = 0;
+
+	maxval = (field_sz == 4) ? 0xf : 0x1f;
+	for (i = 0; i < numfields; i++)
+		if (val[i] != -(i + 1 + KSZ9131_OFFSET)) {
+			mask = 0xffff;
+			mask ^= maxval << (field_sz * i);
+			newval = (newval & mask) |
+				(((val[i] / KSZ9131_STEP) & maxval)
+					<< (field_sz * i));
+		}
+
+	return ksz9031_extended_write(phydev, OP_DATA, 2, reg, newval);
+}
+
+static int ksz9131_config_init(struct phy_device *phydev)
+{
+	const struct device *dev = &phydev->mdio.dev;
+	struct device_node *of_node = dev->of_node;
+	char *clk_skews[2] = {"rxc-skew-psec", "txc-skew-psec"};
+	char *rx_data_skews[4] = {
+		"rxd0-skew-psec", "rxd1-skew-psec",
+		"rxd2-skew-psec", "rxd3-skew-psec"
+	};
+	char *tx_data_skews[4] = {
+		"txd0-skew-psec", "txd1-skew-psec",
+		"txd2-skew-psec", "txd3-skew-psec"
+	};
+	char *control_skews[2] = {"txen-skew-psec", "rxdv-skew-psec"};
+	const struct device *dev_walker;
+	int ret;
+
+	dev_walker = &phydev->mdio.dev;
+	do {
+		of_node = dev_walker->of_node;
+		dev_walker = dev_walker->parent;
+	} while (!of_node && dev_walker);
+
+	if (!of_node)
+		return 0;
+
+	ret = ksz9131_of_load_skew_values(phydev, of_node,
+					  MII_KSZ9031RN_CLK_PAD_SKEW, 5,
+					  clk_skews, 2);
+	if (ret < 0)
+		return ret;
+
+	ret = ksz9131_of_load_skew_values(phydev, of_node,
+					  MII_KSZ9031RN_CONTROL_PAD_SKEW, 4,
+					  control_skews, 2);
+	if (ret < 0)
+		return ret;
+
+	ret = ksz9131_of_load_skew_values(phydev, of_node,
+					  MII_KSZ9031RN_RX_DATA_PAD_SKEW, 4,
+					  rx_data_skews, 4);
+	if (ret < 0)
+		return ret;
+
+	ret = ksz9131_of_load_skew_values(phydev, of_node,
+					  MII_KSZ9031RN_TX_DATA_PAD_SKEW, 4,
+					  tx_data_skews, 4);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
 #define KSZ8873MLL_GLOBAL_CONTROL_4	0x06
 #define KSZ8873MLL_GLOBAL_CONTROL_4_DUPLEX	BIT(6)
 #define KSZ8873MLL_GLOBAL_CONTROL_4_SPEED	BIT(4)
@@ -975,6 +1085,23 @@ static struct phy_driver ksphy_driver[] = {
 	.suspend	= genphy_suspend,
 	.resume		= kszphy_resume,
 }, {
+	.phy_id		= PHY_ID_KSZ9131,
+	.phy_id_mask	= MICREL_PHY_ID_MASK,
+	.name		= "Microchip KSZ9131 Gigabit PHY",
+	.features	= PHY_GBIT_FEATURES,
+	.flags		= PHY_HAS_INTERRUPT,
+	.driver_data	= &ksz9021_type,
+	.probe		= kszphy_probe,
+	.config_init	= ksz9131_config_init,
+	.read_status	= ksz9031_read_status,
+	.ack_interrupt	= kszphy_ack_interrupt,
+	.config_intr	= kszphy_config_intr,
+	.get_sset_count = kszphy_get_sset_count,
+	.get_strings	= kszphy_get_strings,
+	.get_stats	= kszphy_get_stats,
+	.suspend	= genphy_suspend,
+	.resume		= kszphy_resume,
+}, {
 	.phy_id		= PHY_ID_KSZ8873MLL,
 	.phy_id_mask	= MICREL_PHY_ID_MASK,
 	.name		= "Micrel KSZ8873MLL Switch",
@@ -1022,6 +1149,7 @@ MODULE_LICENSE("GPL");
 static struct mdio_device_id __maybe_unused micrel_tbl[] = {
 	{ PHY_ID_KSZ9021, 0x000ffffe },
 	{ PHY_ID_KSZ9031, MICREL_PHY_ID_MASK },
+	{ PHY_ID_KSZ9131, MICREL_PHY_ID_MASK },
 	{ PHY_ID_KSZ8001, 0x00fffffc },
 	{ PHY_ID_KS8737, MICREL_PHY_ID_MASK },
 	{ PHY_ID_KSZ8021, 0x00ffffff },
diff --git a/drivers/net/phy/mscc.c b/drivers/net/phy/mscc.c
index bffe077dc75f..a2e59f4f6f01 100644
--- a/drivers/net/phy/mscc.c
+++ b/drivers/net/phy/mscc.c
@@ -522,7 +522,7 @@ static int vsc85xx_mdix_set(struct phy_device *phydev, u8 mdix)
 
 static int vsc85xx_downshift_get(struct phy_device *phydev, u8 *count)
 {
-	u16 reg_val;
+	int reg_val;
 
 	reg_val = phy_read_paged(phydev, MSCC_PHY_PAGE_EXTENDED,
 				 MSCC_PHY_ACTIPHY_CNTL);
@@ -1292,7 +1292,7 @@ static int vsc8574_config_pre_init(struct phy_device *phydev)
 				dev_err(dev,
 					"%s: failed to assert reset of micro\n",
 					__func__);
-				return ret;
+				goto out;
 			}
 		}
 	} else {
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index d887016e54b6..db633ae9f784 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1104,10 +1104,7 @@ static void team_port_disable_netpoll(struct team_port *port)
 		return;
 	port->np = NULL;
 
-	/* Wait for transmitting packets to finish before freeing. */
-	synchronize_rcu_bh();
-	__netpoll_cleanup(np);
-	kfree(np);
+	__netpoll_free(np);
 }
 #else
 static int team_port_enable_netpoll(struct team_port *port)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 3f5aa59c37b7..3e2c041d76ac 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2267,8 +2267,9 @@ static void virtnet_freeze_down(struct virtio_device *vdev)
 	/* Make sure no work handler is accessing the device */
 	flush_work(&vi->config_work);
 
+	netif_tx_lock_bh(vi->dev);
 	netif_device_detach(vi->dev);
-	netif_tx_disable(vi->dev);
+	netif_tx_unlock_bh(vi->dev);
 	cancel_delayed_work_sync(&vi->refill);
 
 	if (netif_running(vi->dev)) {
@@ -2304,7 +2305,9 @@ static int virtnet_restore_up(struct virtio_device *vdev)
 		}
 	}
 
+	netif_tx_lock_bh(vi->dev);
 	netif_device_attach(vi->dev);
+	netif_tx_unlock_bh(vi->dev);
 	return err;
 }
 
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 018406c4d944..297cdeaef479 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -103,22 +103,6 @@ bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b)
 		return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr;
 }
 
-static inline bool vxlan_addr_any(const union vxlan_addr *ipa)
-{
-	if (ipa->sa.sa_family == AF_INET6)
-		return ipv6_addr_any(&ipa->sin6.sin6_addr);
-	else
-		return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY);
-}
-
-static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa)
-{
-	if (ipa->sa.sa_family == AF_INET6)
-		return ipv6_addr_is_multicast(&ipa->sin6.sin6_addr);
-	else
-		return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr));
-}
-
 static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla)
 {
 	if (nla_len(nla) >= sizeof(struct in6_addr)) {
@@ -151,16 +135,6 @@ bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b)
 	return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr;
 }
 
-static inline bool vxlan_addr_any(const union vxlan_addr *ipa)
-{
-	return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY);
-}
-
-static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa)
-{
-	return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr));
-}
-
 static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla)
 {
 	if (nla_len(nla) >= sizeof(struct in6_addr)) {
@@ -298,6 +272,8 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
 	ndm->ndm_state = fdb->state;
 	ndm->ndm_ifindex = vxlan->dev->ifindex;
 	ndm->ndm_flags = fdb->flags;
+	if (rdst->offloaded)
+		ndm->ndm_flags |= NTF_OFFLOADED;
 	ndm->ndm_type = RTN_UNICAST;
 
 	if (!net_eq(dev_net(vxlan->dev), vxlan->net) &&
@@ -353,8 +329,8 @@ static inline size_t vxlan_nlmsg_size(void)
 		+ nla_total_size(sizeof(struct nda_cacheinfo));
 }
 
-static void vxlan_fdb_notify(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
-			     struct vxlan_rdst *rd, int type)
+static void __vxlan_fdb_notify(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
+			       struct vxlan_rdst *rd, int type)
 {
 	struct net *net = dev_net(vxlan->dev);
 	struct sk_buff *skb;
@@ -379,6 +355,49 @@ errout:
 		rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
 }
 
+static void vxlan_fdb_switchdev_call_notifiers(struct vxlan_dev *vxlan,
+					       struct vxlan_fdb *fdb,
+					       struct vxlan_rdst *rd,
+					       bool adding)
+{
+	struct switchdev_notifier_vxlan_fdb_info info;
+	enum switchdev_notifier_type notifier_type;
+
+	if (WARN_ON(!rd))
+		return;
+
+	notifier_type = adding ? SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE
+			       : SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE;
+
+	info = (struct switchdev_notifier_vxlan_fdb_info){
+		.remote_ip = rd->remote_ip,
+		.remote_port = rd->remote_port,
+		.remote_vni = rd->remote_vni,
+		.remote_ifindex = rd->remote_ifindex,
+		.vni = fdb->vni,
+		.offloaded = rd->offloaded,
+	};
+	memcpy(info.eth_addr, fdb->eth_addr, ETH_ALEN);
+
+	call_switchdev_notifiers(notifier_type, vxlan->dev,
+				 &info.info);
+}
+
+static void vxlan_fdb_notify(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
+			     struct vxlan_rdst *rd, int type)
+{
+	switch (type) {
+	case RTM_NEWNEIGH:
+		vxlan_fdb_switchdev_call_notifiers(vxlan, fdb, rd, true);
+		break;
+	case RTM_DELNEIGH:
+		vxlan_fdb_switchdev_call_notifiers(vxlan, fdb, rd, false);
+		break;
+	}
+
+	__vxlan_fdb_notify(vxlan, fdb, rd, type);
+}
+
 static void vxlan_ip_miss(struct net_device *dev, union vxlan_addr *ipa)
 {
 	struct vxlan_dev *vxlan = netdev_priv(dev);
@@ -488,6 +507,47 @@ static struct vxlan_rdst *vxlan_fdb_find_rdst(struct vxlan_fdb *f,
 	return NULL;
 }
 
+int vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni,
+		      struct switchdev_notifier_vxlan_fdb_info *fdb_info)
+{
+	struct vxlan_dev *vxlan = netdev_priv(dev);
+	u8 eth_addr[ETH_ALEN + 2] = { 0 };
+	struct vxlan_rdst *rdst;
+	struct vxlan_fdb *f;
+	int rc = 0;
+
+	if (is_multicast_ether_addr(mac) ||
+	    is_zero_ether_addr(mac))
+		return -EINVAL;
+
+	ether_addr_copy(eth_addr, mac);
+
+	rcu_read_lock();
+
+	f = __vxlan_find_mac(vxlan, eth_addr, vni);
+	if (!f) {
+		rc = -ENOENT;
+		goto out;
+	}
+
+	rdst = first_remote_rcu(f);
+
+	memset(fdb_info, 0, sizeof(*fdb_info));
+	fdb_info->info.dev = dev;
+	fdb_info->remote_ip = rdst->remote_ip;
+	fdb_info->remote_port = rdst->remote_port;
+	fdb_info->remote_vni = rdst->remote_vni;
+	fdb_info->remote_ifindex = rdst->remote_ifindex;
+	fdb_info->vni = vni;
+	fdb_info->offloaded = rdst->offloaded;
+	ether_addr_copy(fdb_info->eth_addr, mac);
+
+out:
+	rcu_read_unlock();
+	return rc;
+}
+EXPORT_SYMBOL_GPL(vxlan_fdb_find_uc);
+
 /* Replace destination of unicast mac */
 static int vxlan_fdb_replace(struct vxlan_fdb *f,
 			     union vxlan_addr *ip, __be16 port, __be32 vni,
@@ -533,6 +593,7 @@ static int vxlan_fdb_append(struct vxlan_fdb *f,
 
 	rd->remote_ip = *ip;
 	rd->remote_port = port;
+	rd->offloaded = false;
 	rd->remote_vni = vni;
 	rd->remote_ifindex = ifindex;
 
@@ -782,12 +843,15 @@ static void vxlan_fdb_free(struct rcu_head *head)
 static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,
 			      bool do_notify)
 {
+	struct vxlan_rdst *rd;
+
 	netdev_dbg(vxlan->dev,
 		    "delete %pM\n", f->eth_addr);
 
 	--vxlan->addrcnt;
 	if (do_notify)
-		vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_DELNEIGH);
+		list_for_each_entry(rd, &f->remotes, list)
+			vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH);
 
 	hlist_del_rcu(&f->hlist);
 	call_rcu(&f->rcu, vxlan_fdb_free);
@@ -2198,11 +2262,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 		}
 
 		ndst = &rt->dst;
-		if (skb_dst(skb)) {
-			int mtu = dst_mtu(ndst) - VXLAN_HEADROOM;
-
-			skb_dst_update_pmtu(skb, mtu);
-		}
+		skb_tunnel_check_pmtu(skb, ndst, VXLAN_HEADROOM);
 
 		tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
 		ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
@@ -2239,11 +2299,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 				goto out_unlock;
 		}
 
-		if (skb_dst(skb)) {
-			int mtu = dst_mtu(ndst) - VXLAN6_HEADROOM;
-
-			skb_dst_update_pmtu(skb, mtu);
-		}
+		skb_tunnel_check_pmtu(skb, ndst, VXLAN6_HEADROOM);
 
 		tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
 		ttl = ttl ? : ip6_dst_hoplimit(ndst);
@@ -3761,6 +3817,51 @@ static struct notifier_block vxlan_notifier_block __read_mostly = {
 	.notifier_call = vxlan_netdevice_event,
 };
 
+static void
+vxlan_fdb_offloaded_set(struct net_device *dev,
+			struct switchdev_notifier_vxlan_fdb_info *fdb_info)
+{
+	struct vxlan_dev *vxlan = netdev_priv(dev);
+	struct vxlan_rdst *rdst;
+	struct vxlan_fdb *f;
+
+	spin_lock_bh(&vxlan->hash_lock);
+
+	f = vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni);
+	if (!f)
+		goto out;
+
+	rdst = vxlan_fdb_find_rdst(f, &fdb_info->remote_ip,
+				   fdb_info->remote_port,
+				   fdb_info->remote_vni,
+				   fdb_info->remote_ifindex);
+	if (!rdst)
+		goto out;
+
+	rdst->offloaded = fdb_info->offloaded;
+
+out:
+	spin_unlock_bh(&vxlan->hash_lock);
+}
+
+static int vxlan_switchdev_event(struct notifier_block *unused,
+				 unsigned long event, void *ptr)
+{
+	struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+
+	switch (event) {
+	case SWITCHDEV_VXLAN_FDB_OFFLOADED:
+		vxlan_fdb_offloaded_set(dev, ptr);
+		break;
+	}
+
+	return 0;
+}
+
+static struct notifier_block vxlan_switchdev_notifier_block __read_mostly = {
+	.notifier_call = vxlan_switchdev_event,
+};
+
 static __net_init int vxlan_init_net(struct net *net)
 {
 	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
@@ -3834,11 +3935,17 @@ static int __init vxlan_init_module(void)
 	if (rc)
 		goto out2;
 
-	rc = rtnl_link_register(&vxlan_link_ops);
+	rc = register_switchdev_notifier(&vxlan_switchdev_notifier_block);
 	if (rc)
 		goto out3;
 
+	rc = rtnl_link_register(&vxlan_link_ops);
+	if (rc)
+		goto out4;
+
 	return 0;
+out4:
+	unregister_switchdev_notifier(&vxlan_switchdev_notifier_block);
 out3:
 	unregister_netdevice_notifier(&vxlan_notifier_block);
 out2:
@@ -3851,6 +3958,7 @@ late_initcall(vxlan_init_module);
 static void __exit vxlan_cleanup_module(void)
 {
 	rtnl_link_unregister(&vxlan_link_ops);
+	unregister_switchdev_notifier(&vxlan_switchdev_notifier_block);
 	unregister_netdevice_notifier(&vxlan_notifier_block);
 	unregister_pernet_subsys(&vxlan_net_ops);
 	/* rcu_barrier() is called by netns */
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index dd8ec1dd9219..6bb9908bf46f 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3143,8 +3143,8 @@ static void nvme_ns_remove(struct nvme_ns *ns)
 	}
 
 	mutex_lock(&ns->ctrl->subsys->lock);
-	nvme_mpath_clear_current_path(ns);
 	list_del_rcu(&ns->siblings);
+	nvme_mpath_clear_current_path(ns);
 	mutex_unlock(&ns->ctrl->subsys->lock);
 
 	down_write(&ns->ctrl->namespaces_rwsem);
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 7f01f6f60b87..d0b7dd8fb184 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -485,7 +485,13 @@ static int armpmu_filter_match(struct perf_event *event)
 {
 	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	unsigned int cpu = smp_processor_id();
-	return cpumask_test_cpu(cpu, &armpmu->supported_cpus);
+	int ret;
+
+	ret = cpumask_test_cpu(cpu, &armpmu->supported_cpus);
+	if (ret && armpmu->filter_match)
+		return armpmu->filter_match(event);
+
+	return ret;
 }
 
 static ssize_t armpmu_cpumask_show(struct device *dev,
diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c
index 01b0e2bb3319..2012551d93e0 100644
--- a/drivers/ptp/ptp_chardev.c
+++ b/drivers/ptp/ptp_chardev.c
@@ -24,6 +24,8 @@
 #include <linux/slab.h>
 #include <linux/timekeeping.h>
 
+#include <linux/nospec.h>
+
 #include "ptp_private.h"
 
 static int ptp_disable_pinfunc(struct ptp_clock_info *ops,
@@ -248,6 +250,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
 			err = -EINVAL;
 			break;
 		}
+		pin_index = array_index_nospec(pin_index, ops->n_pins);
 		if (mutex_lock_interruptible(&ptp->pincfg_mux))
 			return -ERESTARTSYS;
 		pd = ops->pin_config[pin_index];
@@ -266,6 +269,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
 			err = -EINVAL;
 			break;
 		}
+		pin_index = array_index_nospec(pin_index, ops->n_pins);
 		if (mutex_lock_interruptible(&ptp->pincfg_mux))
 			return -ERESTARTSYS;
 		err = ptp_set_pinfunc(ptp, pin_index, pd.func, pd.chan);
diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index bc03b0a690b4..9ede35cecb12 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -310,17 +310,17 @@ static void acm_process_notification(struct acm *acm, unsigned char *buf)
 
 		if (difference & ACM_CTRL_DSR)
 			acm->iocount.dsr++;
-		if (difference & ACM_CTRL_BRK)
-			acm->iocount.brk++;
-		if (difference & ACM_CTRL_RI)
-			acm->iocount.rng++;
 		if (difference & ACM_CTRL_DCD)
 			acm->iocount.dcd++;
-		if (difference & ACM_CTRL_FRAMING)
+		if (newctrl & ACM_CTRL_BRK)
+			acm->iocount.brk++;
+		if (newctrl & ACM_CTRL_RI)
+			acm->iocount.rng++;
+		if (newctrl & ACM_CTRL_FRAMING)
 			acm->iocount.frame++;
-		if (difference & ACM_CTRL_PARITY)
+		if (newctrl & ACM_CTRL_PARITY)
 			acm->iocount.parity++;
-		if (difference & ACM_CTRL_OVERRUN)
+		if (newctrl & ACM_CTRL_OVERRUN)
 			acm->iocount.overrun++;
 		spin_unlock_irqrestore(&acm->read_lock, flags);
 
@@ -355,7 +355,6 @@ static void acm_ctrl_irq(struct urb *urb)
 	case -ENOENT:
 	case -ESHUTDOWN:
 		/* this urb is terminated, clean up */
-		acm->nb_index = 0;
 		dev_dbg(&acm->control->dev,
 			"%s - urb shutting down with status: %d\n",
 			__func__, status);
@@ -1642,6 +1641,7 @@ static int acm_pre_reset(struct usb_interface *intf)
 	struct acm *acm = usb_get_intfdata(intf);
 
 	clear_bit(EVENT_RX_STALL, &acm->flags);
+	acm->nb_index = 0; /* pending control transfers are lost */
 
 	return 0;
 }
diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index 244417d0dfd1..ffccd40ea67d 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -1474,8 +1474,6 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb
 	u = 0;
 	switch (uurb->type) {
 	case USBDEVFS_URB_TYPE_CONTROL:
-		if (is_in)
-			allow_short = true;
 		if (!usb_endpoint_xfer_control(&ep->desc))
 			return -EINVAL;
 		/* min 8 byte setup packet */
@@ -1505,6 +1503,8 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb
 			is_in = 0;
 			uurb->endpoint &= ~USB_DIR_IN;
 		}
+		if (is_in)
+			allow_short = true;
 		snoop(&ps->dev->dev, "control urb: bRequestType=%02x "
 			"bRequest=%02x wValue=%04x "
 			"wIndex=%04x wLength=%04x\n",
diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c
index ca8a4b53c59f..1074cb82ec17 100644
--- a/drivers/usb/gadget/function/f_mass_storage.c
+++ b/drivers/usb/gadget/function/f_mass_storage.c
@@ -221,6 +221,8 @@
 #include <linux/usb/gadget.h>
 #include <linux/usb/composite.h>
 
+#include <linux/nospec.h>
+
 #include "configfs.h"
 
 
@@ -3152,6 +3154,7 @@ static struct config_group *fsg_lun_make(struct config_group *group,
 	fsg_opts = to_fsg_opts(&group->cg_item);
 	if (num >= FSG_MAX_LUNS)
 		return ERR_PTR(-ERANGE);
+	num = array_index_nospec(num, FSG_MAX_LUNS);
 
 	mutex_lock(&fsg_opts->lock);
 	if (fsg_opts->refcnt || fsg_opts->common->luns[num]) {
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 722860eb5a91..51dd8e00c4f8 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -179,10 +179,12 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
 		xhci->quirks |= XHCI_PME_STUCK_QUIRK;
 	}
 	if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
-		 pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI) {
+	    pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI)
 		xhci->quirks |= XHCI_SSIC_PORT_UNUSED;
+	if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
+	    (pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI ||
+	     pdev->device == PCI_DEVICE_ID_INTEL_APL_XHCI))
 		xhci->quirks |= XHCI_INTEL_USB_ROLE_SW;
-	}
 	if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
 	    (pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI ||
 	     pdev->device == PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_XHCI ||
diff --git a/drivers/usb/roles/intel-xhci-usb-role-switch.c b/drivers/usb/roles/intel-xhci-usb-role-switch.c
index 1fb3dd0f1dfa..277de96181f9 100644
--- a/drivers/usb/roles/intel-xhci-usb-role-switch.c
+++ b/drivers/usb/roles/intel-xhci-usb-role-switch.c
@@ -161,6 +161,8 @@ static int intel_xhci_usb_remove(struct platform_device *pdev)
 {
 	struct intel_xhci_usb_data *data = platform_get_drvdata(pdev);
 
+	pm_runtime_disable(&pdev->dev);
+
 	usb_role_switch_unregister(data->role_sw);
 	return 0;
 }
diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c
index d11f3f8dad40..1e592ec94ba4 100644
--- a/drivers/usb/usbip/vhci_hcd.c
+++ b/drivers/usb/usbip/vhci_hcd.c
@@ -318,8 +318,9 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
 	struct vhci_hcd	*vhci_hcd;
 	struct vhci	*vhci;
 	int             retval = 0;
-	int		rhport;
+	int		rhport = -1;
 	unsigned long	flags;
+	bool invalid_rhport = false;
 
 	u32 prev_port_status[VHCI_HC_PORTS];
 
@@ -334,9 +335,19 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
 	usbip_dbg_vhci_rh("typeReq %x wValue %x wIndex %x\n", typeReq, wValue,
 			  wIndex);
 
-	if (wIndex > VHCI_HC_PORTS)
-		pr_err("invalid port number %d\n", wIndex);
-	rhport = wIndex - 1;
+	/*
+	 * wIndex can be 0 for some request types (typeReq). rhport is
+	 * in valid range when wIndex >= 1 and < VHCI_HC_PORTS.
+	 *
+	 * Reference port_status[] only with valid rhport when
+	 * invalid_rhport is false.
+	 */
+	if (wIndex < 1 || wIndex > VHCI_HC_PORTS) {
+		invalid_rhport = true;
+		if (wIndex > VHCI_HC_PORTS)
+			pr_err("invalid port number %d\n", wIndex);
+	} else
+		rhport = wIndex - 1;
 
 	vhci_hcd = hcd_to_vhci_hcd(hcd);
 	vhci = vhci_hcd->vhci;
@@ -345,8 +356,9 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
 
 	/* store old status and compare now and old later */
 	if (usbip_dbg_flag_vhci_rh) {
-		memcpy(prev_port_status, vhci_hcd->port_status,
-			sizeof(prev_port_status));
+		if (!invalid_rhport)
+			memcpy(prev_port_status, vhci_hcd->port_status,
+				sizeof(prev_port_status));
 	}
 
 	switch (typeReq) {
@@ -354,8 +366,10 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
 		usbip_dbg_vhci_rh(" ClearHubFeature\n");
 		break;
 	case ClearPortFeature:
-		if (rhport < 0)
+		if (invalid_rhport) {
+			pr_err("invalid port number %d\n", wIndex);
 			goto error;
+		}
 		switch (wValue) {
 		case USB_PORT_FEAT_SUSPEND:
 			if (hcd->speed == HCD_USB3) {
@@ -415,9 +429,10 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
 		break;
 	case GetPortStatus:
 		usbip_dbg_vhci_rh(" GetPortStatus port %x\n", wIndex);
-		if (wIndex < 1) {
+		if (invalid_rhport) {
 			pr_err("invalid port number %d\n", wIndex);
 			retval = -EPIPE;
+			goto error;
 		}
 
 		/* we do not care about resume. */
@@ -513,16 +528,20 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
 				goto error;
 			}
 
-			if (rhport < 0)
+			if (invalid_rhport) {
+				pr_err("invalid port number %d\n", wIndex);
 				goto error;
+			}
 
 			vhci_hcd->port_status[rhport] |= USB_PORT_STAT_SUSPEND;
 			break;
 		case USB_PORT_FEAT_POWER:
 			usbip_dbg_vhci_rh(
 				" SetPortFeature: USB_PORT_FEAT_POWER\n");
-			if (rhport < 0)
+			if (invalid_rhport) {
+				pr_err("invalid port number %d\n", wIndex);
 				goto error;
+			}
 			if (hcd->speed == HCD_USB3)
 				vhci_hcd->port_status[rhport] |= USB_SS_PORT_STAT_POWER;
 			else
@@ -531,8 +550,10 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
 		case USB_PORT_FEAT_BH_PORT_RESET:
 			usbip_dbg_vhci_rh(
 				" SetPortFeature: USB_PORT_FEAT_BH_PORT_RESET\n");
-			if (rhport < 0)
+			if (invalid_rhport) {
+				pr_err("invalid port number %d\n", wIndex);
 				goto error;
+			}
 			/* Applicable only for USB3.0 hub */
 			if (hcd->speed != HCD_USB3) {
 				pr_err("USB_PORT_FEAT_BH_PORT_RESET req not "
@@ -543,8 +564,10 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
 		case USB_PORT_FEAT_RESET:
 			usbip_dbg_vhci_rh(
 				" SetPortFeature: USB_PORT_FEAT_RESET\n");
-			if (rhport < 0)
+			if (invalid_rhport) {
+				pr_err("invalid port number %d\n", wIndex);
 				goto error;
+			}
 			/* if it's already enabled, disable */
 			if (hcd->speed == HCD_USB3) {
 				vhci_hcd->port_status[rhport] = 0;
@@ -565,8 +588,10 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
 		default:
 			usbip_dbg_vhci_rh(" SetPortFeature: default %d\n",
 					  wValue);
-			if (rhport < 0)
+			if (invalid_rhport) {
+				pr_err("invalid port number %d\n", wIndex);
 				goto error;
+			}
 			if (hcd->speed == HCD_USB3) {
 				if ((vhci_hcd->port_status[rhport] &
 				     USB_SS_PORT_STAT_POWER) != 0) {
@@ -608,7 +633,7 @@ error:
 	if (usbip_dbg_flag_vhci_rh) {
 		pr_debug("port %d\n", rhport);
 		/* Only dump valid port status */
-		if (rhport >= 0) {
+		if (!invalid_rhport) {
 			dump_port_status_diff(prev_port_status[rhport],
 					      vhci_hcd->port_status[rhport],
 					      hcd->speed == HCD_USB3);
@@ -618,8 +643,10 @@ error:
 
 	spin_unlock_irqrestore(&vhci->lock, flags);
 
-	if ((vhci_hcd->port_status[rhport] & PORT_C_MASK) != 0)
+	if (!invalid_rhport &&
+	    (vhci_hcd->port_status[rhport] & PORT_C_MASK) != 0) {
 		usb_hcd_poll_rh_status(hcd);
+	}
 
 	return retval;
 }
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 35f2ae30f31f..77a83790a31f 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -690,8 +690,6 @@ static void afs_process_async_call(struct work_struct *work)
 	}
 
 	if (call->state == AFS_CALL_COMPLETE) {
-		call->reply[0] = NULL;
-
 		/* We have two refs to release - one from the alloc and one
 		 * queued with the work item - and we can't just deallocate the
 		 * call because the work item may be queued again.
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 2f306c0cc4ee..1d329e6981d5 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -199,11 +199,9 @@ static struct afs_server *afs_install_server(struct afs_net *net,
 
 	write_sequnlock(&net->fs_addr_lock);
 	ret = 0;
-	goto out;
 
 exists:
 	afs_get_server(server);
-out:
 	write_sequnlock(&net->fs_lock);
 	return server;
 }
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index af2b17b21b94..95983c744164 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -343,7 +343,7 @@ try_again:
 	trap = lock_rename(cache->graveyard, dir);
 
 	/* do some checks before getting the grave dentry */
-	if (rep->d_parent != dir) {
+	if (rep->d_parent != dir || IS_DEADDIR(d_inode(rep))) {
 		/* the entry was probably culled when we dropped the parent dir
 		 * lock */
 		unlock_rename(cache->graveyard, dir);
diff --git a/fs/dax.c b/fs/dax.c
index 4becbf168b7f..0fb270f0a0ef 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -666,6 +666,8 @@ struct page *dax_layout_busy_page(struct address_space *mapping)
 	while (index < end && pagevec_lookup_entries(&pvec, mapping, index,
 				min(end - index, (pgoff_t)PAGEVEC_SIZE),
 				indices)) {
+		pgoff_t nr_pages = 1;
+
 		for (i = 0; i < pagevec_count(&pvec); i++) {
 			struct page *pvec_ent = pvec.pages[i];
 			void *entry;
@@ -680,8 +682,15 @@ struct page *dax_layout_busy_page(struct address_space *mapping)
 
 			xa_lock_irq(&mapping->i_pages);
 			entry = get_unlocked_mapping_entry(mapping, index, NULL);
-			if (entry)
+			if (entry) {
 				page = dax_busy_page(entry);
+				/*
+				 * Account for multi-order entries at
+				 * the end of the pagevec.
+				 */
+				if (i + 1 >= pagevec_count(&pvec))
+					nr_pages = 1UL << dax_radix_order(entry);
+			}
 			put_unlocked_mapping_entry(mapping, index, entry);
 			xa_unlock_irq(&mapping->i_pages);
 			if (page)
@@ -696,7 +705,7 @@ struct page *dax_layout_busy_page(struct address_space *mapping)
 		 */
 		pagevec_remove_exceptionals(&pvec);
 		pagevec_release(&pvec);
-		index++;
+		index += nr_pages;
 
 		if (page)
 			break;
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index defc2168de91..f58c0cacc531 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -682,6 +682,7 @@ int fat_count_free_clusters(struct super_block *sb)
 			if (ops->ent_get(&fatent) == FAT_ENT_FREE)
 				free++;
 		} while (fat_ent_next(sbi, &fatent));
+		cond_resched();
 	}
 	sbi->free_clusters = free;
 	sbi->free_clus_valid = 1;
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index 83bfe04456b6..c550512ce335 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -70,20 +70,7 @@ void fscache_free_cookie(struct fscache_cookie *cookie)
 }
 
 /*
- * initialise an cookie jar slab element prior to any use
- */
-void fscache_cookie_init_once(void *_cookie)
-{
-	struct fscache_cookie *cookie = _cookie;
-
-	memset(cookie, 0, sizeof(*cookie));
-	spin_lock_init(&cookie->lock);
-	spin_lock_init(&cookie->stores_lock);
-	INIT_HLIST_HEAD(&cookie->backing_objects);
-}
-
-/*
- * Set the index key in a cookie.  The cookie struct has space for a 12-byte
+ * Set the index key in a cookie.  The cookie struct has space for a 16-byte
  * key plus length and hash, but if that's not big enough, it's instead a
  * pointer to a buffer containing 3 bytes of hash, 1 byte of length and then
  * the key data.
@@ -93,20 +80,18 @@ static int fscache_set_key(struct fscache_cookie *cookie,
 {
 	unsigned long long h;
 	u32 *buf;
+	int bufs;
 	int i;
 
-	cookie->key_len = index_key_len;
+	bufs = DIV_ROUND_UP(index_key_len, sizeof(*buf));
 
 	if (index_key_len > sizeof(cookie->inline_key)) {
-		buf = kzalloc(index_key_len, GFP_KERNEL);
+		buf = kcalloc(bufs, sizeof(*buf), GFP_KERNEL);
 		if (!buf)
 			return -ENOMEM;
 		cookie->key = buf;
 	} else {
 		buf = (u32 *)cookie->inline_key;
-		buf[0] = 0;
-		buf[1] = 0;
-		buf[2] = 0;
 	}
 
 	memcpy(buf, index_key, index_key_len);
@@ -116,7 +101,8 @@ static int fscache_set_key(struct fscache_cookie *cookie,
 	 */
 	h = (unsigned long)cookie->parent;
 	h += index_key_len + cookie->type;
-	for (i = 0; i < (index_key_len + sizeof(u32) - 1) / sizeof(u32); i++)
+
+	for (i = 0; i < bufs; i++)
 		h += buf[i];
 
 	cookie->key_hash = h ^ (h >> 32);
@@ -161,7 +147,7 @@ struct fscache_cookie *fscache_alloc_cookie(
 	struct fscache_cookie *cookie;
 
 	/* allocate and initialise a cookie */
-	cookie = kmem_cache_alloc(fscache_cookie_jar, GFP_KERNEL);
+	cookie = kmem_cache_zalloc(fscache_cookie_jar, GFP_KERNEL);
 	if (!cookie)
 		return NULL;
 
@@ -192,6 +178,9 @@ struct fscache_cookie *fscache_alloc_cookie(
 	cookie->netfs_data	= netfs_data;
 	cookie->flags		= (1 << FSCACHE_COOKIE_NO_DATA_YET);
 	cookie->type		= def->type;
+	spin_lock_init(&cookie->lock);
+	spin_lock_init(&cookie->stores_lock);
+	INIT_HLIST_HEAD(&cookie->backing_objects);
 
 	/* radix tree insertion won't use the preallocation pool unless it's
 	 * told it may not wait */
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
index f83328a7f048..d6209022e965 100644
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h
@@ -51,7 +51,6 @@ extern struct fscache_cache *fscache_select_cache_for_object(
 extern struct kmem_cache *fscache_cookie_jar;
 
 extern void fscache_free_cookie(struct fscache_cookie *);
-extern void fscache_cookie_init_once(void *);
 extern struct fscache_cookie *fscache_alloc_cookie(struct fscache_cookie *,
 						   const struct fscache_cookie_def *,
 						   const void *, size_t,
diff --git a/fs/fscache/main.c b/fs/fscache/main.c
index 7dce110bf17d..30ad89db1efc 100644
--- a/fs/fscache/main.c
+++ b/fs/fscache/main.c
@@ -143,9 +143,7 @@ static int __init fscache_init(void)
 
 	fscache_cookie_jar = kmem_cache_create("fscache_cookie_jar",
 					       sizeof(struct fscache_cookie),
-					       0,
-					       0,
-					       fscache_cookie_init_once);
+					       0, 0, NULL);
 	if (!fscache_cookie_jar) {
 		pr_notice("Failed to allocate a cookie jar\n");
 		ret = -ENOMEM;
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 3c159a7f9a9e..84544a4f012d 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -975,10 +975,6 @@ static void gfs2_iomap_journaled_page_done(struct inode *inode, loff_t pos,
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 
-	if (!page_has_buffers(page)) {
-		create_empty_buffers(page, inode->i_sb->s_blocksize,
-				     (1 << BH_Dirty)|(1 << BH_Uptodate));
-	}
 	gfs2_page_add_databufs(ip, page, offset_in_page(pos), copied);
 }
 
@@ -1061,7 +1057,7 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
 		}
 	}
 	release_metapath(&mp);
-	if (gfs2_is_jdata(ip))
+	if (!gfs2_is_stuffed(ip) && gfs2_is_jdata(ip))
 		iomap->page_done = gfs2_iomap_journaled_page_done;
 	return 0;
 
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 8e712b614e6e..933aac5da193 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -96,7 +96,9 @@ struct ocfs2_unblock_ctl {
 };
 
 /* Lockdep class keys */
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
 static struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES];
+#endif
 
 static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
 					int new_level);
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index bf000c8aeffb..fec62e9dfbe6 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -2337,8 +2337,8 @@ late_initcall(ubifs_init);
 
 static void __exit ubifs_exit(void)
 {
-	WARN_ON(list_empty(&ubifs_infos));
-	WARN_ON(atomic_long_read(&ubifs_clean_zn_cnt) == 0);
+	WARN_ON(!list_empty(&ubifs_infos));
+	WARN_ON(atomic_long_read(&ubifs_clean_zn_cnt) != 0);
 
 	dbg_debugfs_exit();
 	ubifs_compressors_exit();
diff --git a/include/drm/drm_atomic.h b/include/drm/drm_atomic.h
index da9d95a19580..1e713154f00e 100644
--- a/include/drm/drm_atomic.h
+++ b/include/drm/drm_atomic.h
@@ -153,6 +153,17 @@ struct __drm_planes_state {
 struct __drm_crtcs_state {
 	struct drm_crtc *ptr;
 	struct drm_crtc_state *state, *old_state, *new_state;
+
+	/**
+	 * @commit:
+	 *
+	 * A reference to the CRTC commit object that is kept for use by
+	 * drm_atomic_helper_wait_for_flip_done() after
+	 * drm_atomic_helper_commit_hw_done() is called. This ensures that a
+	 * concurrent commit won't free a commit object that is still in use.
+	 */
+	struct drm_crtc_commit *commit;
+
 	s32 __user *out_fence_ptr;
 	u64 last_vblank_count;
 };
diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h
index b25d12ef120a..e3c404833115 100644
--- a/include/drm/drm_edid.h
+++ b/include/drm/drm_edid.h
@@ -214,9 +214,9 @@ struct detailed_timing {
 #define DRM_EDID_HDMI_DC_Y444             (1 << 3)
 
 /* YCBCR 420 deep color modes */
-#define DRM_EDID_YCBCR420_DC_48		  (1 << 6)
-#define DRM_EDID_YCBCR420_DC_36		  (1 << 5)
-#define DRM_EDID_YCBCR420_DC_30		  (1 << 4)
+#define DRM_EDID_YCBCR420_DC_48		  (1 << 2)
+#define DRM_EDID_YCBCR420_DC_36		  (1 << 1)
+#define DRM_EDID_YCBCR420_DC_30		  (1 << 0)
 #define DRM_EDID_YCBCR420_DC_MASK (DRM_EDID_YCBCR420_DC_48 | \
 				    DRM_EDID_YCBCR420_DC_36 | \
 				    DRM_EDID_YCBCR420_DC_30)
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 99c19b06d9a4..fdcb45999b26 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -43,7 +43,7 @@ extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 			unsigned char *vec);
 extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
 			 unsigned long new_addr, unsigned long old_end,
-			 pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush);
+			 pmd_t *old_pmd, pmd_t *new_pmd);
 extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 			unsigned long addr, pgprot_t newprot,
 			int prot_numa);
diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h
index 472fa4d4ea62..7361cd3fddc1 100644
--- a/include/linux/micrel_phy.h
+++ b/include/linux/micrel_phy.h
@@ -31,6 +31,7 @@
 #define PHY_ID_KSZ8081		0x00221560
 #define PHY_ID_KSZ8061		0x00221570
 #define PHY_ID_KSZ9031		0x00221620
+#define PHY_ID_KSZ9131		0x00221640
 
 #define PHY_ID_KSZ886X		0x00221430
 #define PHY_ID_KSZ8863		0x00221435
diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h
index 0ef6138eca49..31a750570c38 100644
--- a/include/linux/mlx5/cq.h
+++ b/include/linux/mlx5/cq.h
@@ -61,6 +61,7 @@ struct mlx5_core_cq {
 	int			reset_notify_added;
 	struct list_head	reset_notify;
 	struct mlx5_eq		*eq;
+	u16 uid;
 };
 
 
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index e9b502d5bcc1..b4c0457fbebd 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1124,6 +1124,12 @@ enum mlx5_qcam_feature_groups {
 #define MLX5_CAP_FLOWTABLE_NIC_RX_MAX(mdev, cap) \
 	MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_receive.cap)
 
+#define MLX5_CAP_FLOWTABLE_NIC_TX(mdev, cap) \
+		MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_transmit.cap)
+
+#define MLX5_CAP_FLOWTABLE_NIC_TX_MAX(mdev, cap) \
+	MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_transmit.cap)
+
 #define MLX5_CAP_FLOWTABLE_SNIFFER_RX(mdev, cap) \
 	MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive_sniffer.cap)
 
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 4b75796cac23..31460eeb6fe0 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -133,6 +133,7 @@ enum {
 	MLX5_REG_PVLC		 = 0x500f,
 	MLX5_REG_PCMR		 = 0x5041,
 	MLX5_REG_PMLP		 = 0x5002,
+	MLX5_REG_PPLM		 = 0x5023,
 	MLX5_REG_PCAM		 = 0x507f,
 	MLX5_REG_NODE_DESC	 = 0x6001,
 	MLX5_REG_HOST_ENDIANNESS = 0x7004,
@@ -163,10 +164,7 @@ enum mlx5_dcbx_oper_mode {
 };
 
 enum mlx5_dct_atomic_mode {
-	MLX5_ATOMIC_MODE_DCT_OFF        = 20,
-	MLX5_ATOMIC_MODE_DCT_NONE       = 0 << MLX5_ATOMIC_MODE_DCT_OFF,
-	MLX5_ATOMIC_MODE_DCT_IB_COMP    = 1 << MLX5_ATOMIC_MODE_DCT_OFF,
-	MLX5_ATOMIC_MODE_DCT_CX         = 2 << MLX5_ATOMIC_MODE_DCT_OFF,
+	MLX5_ATOMIC_MODE_DCT_CX         = 2,
 };
 
 enum {
@@ -360,7 +358,7 @@ struct mlx5_frag_buf {
 };
 
 struct mlx5_frag_buf_ctrl {
-	struct mlx5_frag_buf	frag_buf;
+	struct mlx5_buf_list   *frags;
 	u32			sz_m1;
 	u16			frag_sz_m1;
 	u16			strides_offset;
@@ -477,6 +475,7 @@ struct mlx5_core_srq {
 
 	atomic_t		refcount;
 	struct completion	free;
+	u16		uid;
 };
 
 struct mlx5_eq_table {
@@ -996,10 +995,12 @@ static inline u32 mlx5_base_mkey(const u32 key)
 	return key & 0xffffff00u;
 }
 
-static inline void mlx5_fill_fbc_offset(u8 log_stride, u8 log_sz,
+static inline void mlx5_init_fbc_offset(struct mlx5_buf_list *frags,
+					u8 log_stride, u8 log_sz,
 					u16 strides_offset,
 					struct mlx5_frag_buf_ctrl *fbc)
 {
+	fbc->frags      = frags;
 	fbc->log_stride = log_stride;
 	fbc->log_sz     = log_sz;
 	fbc->sz_m1	= (1 << fbc->log_sz) - 1;
@@ -1008,18 +1009,11 @@ static inline void mlx5_fill_fbc_offset(u8 log_stride, u8 log_sz,
 	fbc->strides_offset = strides_offset;
 }
 
-static inline void mlx5_fill_fbc(u8 log_stride, u8 log_sz,
+static inline void mlx5_init_fbc(struct mlx5_buf_list *frags,
+				 u8 log_stride, u8 log_sz,
 				 struct mlx5_frag_buf_ctrl *fbc)
 {
-	mlx5_fill_fbc_offset(log_stride, log_sz, 0, fbc);
-}
-
-static inline void mlx5_core_init_cq_frag_buf(struct mlx5_frag_buf_ctrl *fbc,
-					      void *cqc)
-{
-	mlx5_fill_fbc(6 + MLX5_GET(cqc, cqc, cqe_sz),
-		      MLX5_GET(cqc, cqc, log_cq_size),
-		      fbc);
+	mlx5_init_fbc_offset(frags, log_stride, log_sz, 0, fbc);
 }
 
 static inline void *mlx5_frag_buf_get_wqe(struct mlx5_frag_buf_ctrl *fbc,
@@ -1030,8 +1024,15 @@ static inline void *mlx5_frag_buf_get_wqe(struct mlx5_frag_buf_ctrl *fbc,
 	ix  += fbc->strides_offset;
 	frag = ix >> fbc->log_frag_strides;
 
-	return fbc->frag_buf.frags[frag].buf +
-		((fbc->frag_sz_m1 & ix) << fbc->log_stride);
+	return fbc->frags[frag].buf + ((fbc->frag_sz_m1 & ix) << fbc->log_stride);
+}
+
+static inline u32
+mlx5_frag_buf_get_idx_last_contig_stride(struct mlx5_frag_buf_ctrl *fbc, u32 ix)
+{
+	u32 last_frag_stride_idx = (ix + fbc->strides_offset) | fbc->frag_sz_m1;
+
+	return min_t(u32, last_frag_stride_idx - fbc->strides_offset, fbc->sz_m1);
 }
 
 int mlx5_cmd_init(struct mlx5_core_dev *dev);
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 804516e4f483..5660f07d3be0 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -45,7 +45,8 @@ enum {
 };
 
 enum {
-	MLX5_FLOW_TABLE_TUNNEL_EN = BIT(0),
+	MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT = BIT(0),
+	MLX5_FLOW_TABLE_TUNNEL_EN_DECAP = BIT(1),
 };
 
 #define LEFTOVERS_RULE_NUM	 2
@@ -91,7 +92,7 @@ struct mlx5_flow_destination {
 		u32			tir_num;
 		u32			ft_num;
 		struct mlx5_flow_table	*ft;
-		struct mlx5_fc		*counter;
+		u32			counter_id;
 		struct {
 			u16		num;
 			u16		vhca_id;
@@ -101,6 +102,8 @@ struct mlx5_flow_destination {
 };
 
 struct mlx5_flow_namespace *
+mlx5_get_fdb_sub_ns(struct mlx5_core_dev *dev, int n);
+struct mlx5_flow_namespace *
 mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
 			enum mlx5_flow_namespace_type type);
 struct mlx5_flow_namespace *
@@ -155,20 +158,28 @@ struct mlx5_fs_vlan {
 
 #define MLX5_FS_VLAN_DEPTH	2
 
+enum {
+	FLOW_ACT_HAS_TAG   = BIT(0),
+	FLOW_ACT_NO_APPEND = BIT(1),
+};
+
 struct mlx5_flow_act {
 	u32 action;
-	bool has_flow_tag;
 	u32 flow_tag;
-	u32 encap_id;
+	u32 reformat_id;
 	u32 modify_id;
 	uintptr_t esp_id;
+	u32 flags;
 	struct mlx5_fs_vlan vlan[MLX5_FS_VLAN_DEPTH];
 	struct ib_counters *counters;
 };
 
 #define MLX5_DECLARE_FLOW_ACT(name) \
-	struct mlx5_flow_act name = {MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,\
-				     MLX5_FS_DEFAULT_FLOW_TAG, 0, 0}
+	struct mlx5_flow_act name = { .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,\
+				      .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG, \
+				      .reformat_id = 0, \
+				      .modify_id = 0, \
+				      .flags =  0, }
 
 /* Single destination per rule.
  * Group ID is implied by the match criteria.
@@ -185,15 +196,30 @@ int mlx5_modify_rule_destination(struct mlx5_flow_handle *handler,
 				 struct mlx5_flow_destination *new_dest,
 				 struct mlx5_flow_destination *old_dest);
 
-struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_handle *handler);
 struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging);
 void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter);
 void mlx5_fc_query_cached(struct mlx5_fc *counter,
 			  u64 *bytes, u64 *packets, u64 *lastuse);
 int mlx5_fc_query(struct mlx5_core_dev *dev, struct mlx5_fc *counter,
 		  u64 *packets, u64 *bytes);
+u32 mlx5_fc_id(struct mlx5_fc *counter);
 
 int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn);
 int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn);
 
+int mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
+			     u8 namespace, u8 num_actions,
+			     void *modify_actions, u32 *modify_header_id);
+void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev,
+				u32 modify_header_id);
+
+int mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev,
+			       int reformat_type,
+			       size_t size,
+			       void *reformat_data,
+			       enum mlx5_flow_namespace_type namespace,
+			       u32 *packet_reformat_id);
+void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev,
+				  u32 packet_reformat_id);
+
 #endif
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 6e8a882052b1..dbff9ff28f2c 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -243,8 +243,8 @@ enum {
 	MLX5_CMD_OP_DEALLOC_FLOW_COUNTER          = 0x93a,
 	MLX5_CMD_OP_QUERY_FLOW_COUNTER            = 0x93b,
 	MLX5_CMD_OP_MODIFY_FLOW_TABLE             = 0x93c,
-	MLX5_CMD_OP_ALLOC_ENCAP_HEADER            = 0x93d,
-	MLX5_CMD_OP_DEALLOC_ENCAP_HEADER          = 0x93e,
+	MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT = 0x93d,
+	MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT = 0x93e,
 	MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT   = 0x940,
 	MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT = 0x941,
 	MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT   = 0x942,
@@ -336,7 +336,7 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
 	u8         modify_root[0x1];
 	u8         identified_miss_table_mode[0x1];
 	u8         flow_table_modify[0x1];
-	u8         encap[0x1];
+	u8         reformat[0x1];
 	u8         decap[0x1];
 	u8         reserved_at_9[0x1];
 	u8         pop_vlan[0x1];
@@ -344,8 +344,12 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
 	u8         reserved_at_c[0x1];
 	u8         pop_vlan_2[0x1];
 	u8         push_vlan_2[0x1];
-	u8         reserved_at_f[0x11];
-
+	u8	   reformat_and_vlan_action[0x1];
+	u8	   reserved_at_10[0x2];
+	u8	   reformat_l3_tunnel_to_l2[0x1];
+	u8	   reformat_l2_to_l3_tunnel[0x1];
+	u8	   reformat_and_modify_action[0x1];
+	u8         reserved_at_14[0xb];
 	u8         reserved_at_20[0x2];
 	u8         log_max_ft_size[0x6];
 	u8         log_max_modify_header_context[0x8];
@@ -554,7 +558,13 @@ struct mlx5_ifc_flow_table_nic_cap_bits {
 	u8         nic_rx_multi_path_tirs[0x1];
 	u8         nic_rx_multi_path_tirs_fts[0x1];
 	u8         allow_sniffer_and_nic_rx_shared_tir[0x1];
-	u8         reserved_at_3[0x1fd];
+	u8	   reserved_at_3[0x1d];
+	u8	   encap_general_header[0x1];
+	u8	   reserved_at_21[0xa];
+	u8	   log_max_packet_reformat_context[0x5];
+	u8	   reserved_at_30[0x6];
+	u8	   max_encap_header_size[0xa];
+	u8	   reserved_at_40[0x1c0];
 
 	struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive;
 
@@ -574,7 +584,9 @@ struct mlx5_ifc_flow_table_nic_cap_bits {
 struct mlx5_ifc_flow_table_eswitch_cap_bits {
 	u8      reserved_at_0[0x1c];
 	u8      fdb_multi_path_to_table[0x1];
-	u8      reserved_at_1d[0x1e3];
+	u8      reserved_at_1d[0x1];
+	u8      multi_fdb_encap[0x1];
+	u8      reserved_at_1e[0x1e1];
 
 	struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_esw_fdb;
 
@@ -599,7 +611,7 @@ struct mlx5_ifc_e_switch_cap_bits {
 	u8         vxlan_encap_decap[0x1];
 	u8         nvgre_encap_decap[0x1];
 	u8         reserved_at_22[0x9];
-	u8         log_max_encap_headers[0x5];
+	u8         log_max_packet_reformat_context[0x5];
 	u8         reserved_2b[0x6];
 	u8         max_encap_header_size[0xa];
 
@@ -996,7 +1008,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         umr_modify_atomic_disabled[0x1];
 	u8         umr_indirect_mkey_disabled[0x1];
 	u8         umr_fence[0x2];
-	u8         reserved_at_20c[0x3];
+	u8         dc_req_scat_data_cqe[0x1];
+	u8         reserved_at_20d[0x2];
 	u8         drain_sigerr[0x1];
 	u8         cmdif_checksum[0x2];
 	u8         sigerr_cqe[0x1];
@@ -1281,7 +1294,9 @@ struct mlx5_ifc_wq_bits {
 	u8         reserved_at_118[0x3];
 	u8         log_wq_sz[0x5];
 
-	u8         reserved_at_120[0x3];
+	u8         dbr_umem_valid[0x1];
+	u8         wq_umem_valid[0x1];
+	u8         reserved_at_122[0x1];
 	u8         log_hairpin_num_packets[0x5];
 	u8         reserved_at_128[0x3];
 	u8         log_hairpin_data_sz[0x5];
@@ -2355,7 +2370,10 @@ struct mlx5_ifc_qpc_bits {
 
 	u8         dc_access_key[0x40];
 
-	u8         reserved_at_680[0xc0];
+	u8         reserved_at_680[0x3];
+	u8         dbr_umem_valid[0x1];
+
+	u8         reserved_at_684[0xbc];
 };
 
 struct mlx5_ifc_roce_addr_layout_bits {
@@ -2395,7 +2413,7 @@ enum {
 	MLX5_FLOW_CONTEXT_ACTION_DROP      = 0x2,
 	MLX5_FLOW_CONTEXT_ACTION_FWD_DEST  = 0x4,
 	MLX5_FLOW_CONTEXT_ACTION_COUNT     = 0x8,
-	MLX5_FLOW_CONTEXT_ACTION_ENCAP     = 0x10,
+	MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT = 0x10,
 	MLX5_FLOW_CONTEXT_ACTION_DECAP     = 0x20,
 	MLX5_FLOW_CONTEXT_ACTION_MOD_HDR   = 0x40,
 	MLX5_FLOW_CONTEXT_ACTION_VLAN_POP  = 0x80,
@@ -2428,7 +2446,7 @@ struct mlx5_ifc_flow_context_bits {
 	u8         reserved_at_a0[0x8];
 	u8         flow_counter_list_size[0x18];
 
-	u8         encap_id[0x20];
+	u8         packet_reformat_id[0x20];
 
 	u8         modify_header_id[0x20];
 
@@ -2455,7 +2473,7 @@ struct mlx5_ifc_xrc_srqc_bits {
 
 	u8         wq_signature[0x1];
 	u8         cont_srq[0x1];
-	u8         reserved_at_22[0x1];
+	u8         dbr_umem_valid[0x1];
 	u8         rlky[0x1];
 	u8         basic_cyclic_rcv_wqe[0x1];
 	u8         log_rq_stride[0x3];
@@ -2550,8 +2568,8 @@ enum {
 };
 
 enum {
-	MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_    = 0x1,
-	MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST_  = 0x2,
+	MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST    = 0x1,
+	MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST  = 0x2,
 };
 
 struct mlx5_ifc_tirc_bits {
@@ -3119,7 +3137,9 @@ enum {
 
 struct mlx5_ifc_cqc_bits {
 	u8         status[0x4];
-	u8         reserved_at_4[0x4];
+	u8         reserved_at_4[0x2];
+	u8         dbr_umem_valid[0x1];
+	u8         reserved_at_7[0x1];
 	u8         cqe_sz[0x3];
 	u8         cc[0x1];
 	u8         reserved_at_c[0x1];
@@ -3386,7 +3406,7 @@ struct mlx5_ifc_sqerr2rts_qp_out_bits {
 
 struct mlx5_ifc_sqerr2rts_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -3416,7 +3436,7 @@ struct mlx5_ifc_sqd2rts_qp_out_bits {
 
 struct mlx5_ifc_sqd2rts_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -3621,7 +3641,7 @@ struct mlx5_ifc_rts2rts_qp_out_bits {
 
 struct mlx5_ifc_rts2rts_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -3651,7 +3671,7 @@ struct mlx5_ifc_rtr2rts_qp_out_bits {
 
 struct mlx5_ifc_rtr2rts_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -3681,7 +3701,7 @@ struct mlx5_ifc_rst2init_qp_out_bits {
 
 struct mlx5_ifc_rst2init_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -4804,19 +4824,19 @@ struct mlx5_ifc_query_eq_in_bits {
 	u8         reserved_at_60[0x20];
 };
 
-struct mlx5_ifc_encap_header_in_bits {
+struct mlx5_ifc_packet_reformat_context_in_bits {
 	u8         reserved_at_0[0x5];
-	u8         header_type[0x3];
+	u8         reformat_type[0x3];
 	u8         reserved_at_8[0xe];
-	u8         encap_header_size[0xa];
+	u8         reformat_data_size[0xa];
 
 	u8         reserved_at_20[0x10];
-	u8         encap_header[2][0x8];
+	u8         reformat_data[2][0x8];
 
-	u8         more_encap_header[0][0x8];
+	u8         more_reformat_data[0][0x8];
 };
 
-struct mlx5_ifc_query_encap_header_out_bits {
+struct mlx5_ifc_query_packet_reformat_context_out_bits {
 	u8         status[0x8];
 	u8         reserved_at_8[0x18];
 
@@ -4824,33 +4844,41 @@ struct mlx5_ifc_query_encap_header_out_bits {
 
 	u8         reserved_at_40[0xa0];
 
-	struct mlx5_ifc_encap_header_in_bits encap_header[0];
+	struct mlx5_ifc_packet_reformat_context_in_bits packet_reformat_context[0];
 };
 
-struct mlx5_ifc_query_encap_header_in_bits {
+struct mlx5_ifc_query_packet_reformat_context_in_bits {
 	u8         opcode[0x10];
 	u8         reserved_at_10[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         encap_id[0x20];
+	u8         packet_reformat_id[0x20];
 
 	u8         reserved_at_60[0xa0];
 };
 
-struct mlx5_ifc_alloc_encap_header_out_bits {
+struct mlx5_ifc_alloc_packet_reformat_context_out_bits {
 	u8         status[0x8];
 	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         encap_id[0x20];
+	u8         packet_reformat_id[0x20];
 
 	u8         reserved_at_60[0x20];
 };
 
-struct mlx5_ifc_alloc_encap_header_in_bits {
+enum {
+	MLX5_REFORMAT_TYPE_L2_TO_VXLAN = 0x0,
+	MLX5_REFORMAT_TYPE_L2_TO_NVGRE = 0x1,
+	MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL = 0x2,
+	MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2 = 0x3,
+	MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x4,
+};
+
+struct mlx5_ifc_alloc_packet_reformat_context_in_bits {
 	u8         opcode[0x10];
 	u8         reserved_at_10[0x10];
 
@@ -4859,10 +4887,10 @@ struct mlx5_ifc_alloc_encap_header_in_bits {
 
 	u8         reserved_at_40[0xa0];
 
-	struct mlx5_ifc_encap_header_in_bits encap_header;
+	struct mlx5_ifc_packet_reformat_context_in_bits packet_reformat_context;
 };
 
-struct mlx5_ifc_dealloc_encap_header_out_bits {
+struct mlx5_ifc_dealloc_packet_reformat_context_out_bits {
 	u8         status[0x8];
 	u8         reserved_at_8[0x18];
 
@@ -4871,14 +4899,14 @@ struct mlx5_ifc_dealloc_encap_header_out_bits {
 	u8         reserved_at_40[0x40];
 };
 
-struct mlx5_ifc_dealloc_encap_header_in_bits {
+struct mlx5_ifc_dealloc_packet_reformat_context_in_bits {
 	u8         opcode[0x10];
 	u8         reserved_at_10[0x10];
 
 	u8         reserved_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         encap_id[0x20];
+	u8         packet_reformat_id[0x20];
 
 	u8         reserved_60[0x20];
 };
@@ -5176,7 +5204,7 @@ struct mlx5_ifc_qp_2rst_out_bits {
 
 struct mlx5_ifc_qp_2rst_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -5198,7 +5226,7 @@ struct mlx5_ifc_qp_2err_out_bits {
 
 struct mlx5_ifc_qp_2err_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -5298,7 +5326,7 @@ struct mlx5_ifc_modify_tis_bitmask_bits {
 
 struct mlx5_ifc_modify_tis_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -5337,7 +5365,7 @@ struct mlx5_ifc_modify_tir_out_bits {
 
 struct mlx5_ifc_modify_tir_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -5365,7 +5393,7 @@ struct mlx5_ifc_modify_sq_out_bits {
 
 struct mlx5_ifc_modify_sq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -5438,7 +5466,7 @@ struct mlx5_ifc_rqt_bitmask_bits {
 
 struct mlx5_ifc_modify_rqt_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -5472,7 +5500,7 @@ enum {
 
 struct mlx5_ifc_modify_rq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -5508,7 +5536,7 @@ struct mlx5_ifc_rmp_bitmask_bits {
 
 struct mlx5_ifc_modify_rmp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -5613,7 +5641,7 @@ enum {
 
 struct mlx5_ifc_modify_cq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -5625,7 +5653,10 @@ struct mlx5_ifc_modify_cq_in_bits {
 
 	struct mlx5_ifc_cqc_bits cq_context;
 
-	u8         reserved_at_280[0x600];
+	u8         reserved_at_280[0x40];
+
+	u8         cq_umem_valid[0x1];
+	u8         reserved_at_2c1[0x5bf];
 
 	u8         pas[0][0x40];
 };
@@ -5773,7 +5804,7 @@ struct mlx5_ifc_init2rtr_qp_out_bits {
 
 struct mlx5_ifc_init2rtr_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -5803,7 +5834,7 @@ struct mlx5_ifc_init2init_qp_out_bits {
 
 struct mlx5_ifc_init2init_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -5902,7 +5933,7 @@ struct mlx5_ifc_drain_dct_out_bits {
 
 struct mlx5_ifc_drain_dct_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -5946,7 +5977,7 @@ struct mlx5_ifc_detach_from_mcg_out_bits {
 
 struct mlx5_ifc_detach_from_mcg_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -5970,7 +6001,7 @@ struct mlx5_ifc_destroy_xrq_out_bits {
 
 struct mlx5_ifc_destroy_xrq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -5992,7 +6023,7 @@ struct mlx5_ifc_destroy_xrc_srq_out_bits {
 
 struct mlx5_ifc_destroy_xrc_srq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6014,7 +6045,7 @@ struct mlx5_ifc_destroy_tis_out_bits {
 
 struct mlx5_ifc_destroy_tis_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6036,7 +6067,7 @@ struct mlx5_ifc_destroy_tir_out_bits {
 
 struct mlx5_ifc_destroy_tir_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6058,7 +6089,7 @@ struct mlx5_ifc_destroy_srq_out_bits {
 
 struct mlx5_ifc_destroy_srq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6080,7 +6111,7 @@ struct mlx5_ifc_destroy_sq_out_bits {
 
 struct mlx5_ifc_destroy_sq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6126,7 +6157,7 @@ struct mlx5_ifc_destroy_rqt_out_bits {
 
 struct mlx5_ifc_destroy_rqt_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6148,7 +6179,7 @@ struct mlx5_ifc_destroy_rq_out_bits {
 
 struct mlx5_ifc_destroy_rq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6192,7 +6223,7 @@ struct mlx5_ifc_destroy_rmp_out_bits {
 
 struct mlx5_ifc_destroy_rmp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6214,7 +6245,7 @@ struct mlx5_ifc_destroy_qp_out_bits {
 
 struct mlx5_ifc_destroy_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6366,7 +6397,7 @@ struct mlx5_ifc_destroy_dct_out_bits {
 
 struct mlx5_ifc_destroy_dct_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6388,7 +6419,7 @@ struct mlx5_ifc_destroy_cq_out_bits {
 
 struct mlx5_ifc_destroy_cq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6491,7 +6522,7 @@ struct mlx5_ifc_dealloc_xrcd_out_bits {
 
 struct mlx5_ifc_dealloc_xrcd_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6579,7 +6610,7 @@ struct mlx5_ifc_dealloc_pd_out_bits {
 
 struct mlx5_ifc_dealloc_pd_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6625,7 +6656,7 @@ struct mlx5_ifc_create_xrq_out_bits {
 
 struct mlx5_ifc_create_xrq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6649,7 +6680,7 @@ struct mlx5_ifc_create_xrc_srq_out_bits {
 
 struct mlx5_ifc_create_xrc_srq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6658,7 +6689,9 @@ struct mlx5_ifc_create_xrc_srq_in_bits {
 
 	struct mlx5_ifc_xrc_srqc_bits xrc_srq_context_entry;
 
-	u8         reserved_at_280[0x600];
+	u8         reserved_at_280[0x40];
+	u8         xrc_srq_umem_valid[0x1];
+	u8         reserved_at_2c1[0x5bf];
 
 	u8         pas[0][0x40];
 };
@@ -6677,7 +6710,7 @@ struct mlx5_ifc_create_tis_out_bits {
 
 struct mlx5_ifc_create_tis_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6701,7 +6734,7 @@ struct mlx5_ifc_create_tir_out_bits {
 
 struct mlx5_ifc_create_tir_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6725,7 +6758,7 @@ struct mlx5_ifc_create_srq_out_bits {
 
 struct mlx5_ifc_create_srq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6753,7 +6786,7 @@ struct mlx5_ifc_create_sq_out_bits {
 
 struct mlx5_ifc_create_sq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6807,7 +6840,7 @@ struct mlx5_ifc_create_rqt_out_bits {
 
 struct mlx5_ifc_create_rqt_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6831,7 +6864,7 @@ struct mlx5_ifc_create_rq_out_bits {
 
 struct mlx5_ifc_create_rq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6855,7 +6888,7 @@ struct mlx5_ifc_create_rmp_out_bits {
 
 struct mlx5_ifc_create_rmp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6879,7 +6912,7 @@ struct mlx5_ifc_create_qp_out_bits {
 
 struct mlx5_ifc_create_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6892,7 +6925,10 @@ struct mlx5_ifc_create_qp_in_bits {
 
 	struct mlx5_ifc_qpc_bits qpc;
 
-	u8         reserved_at_800[0x80];
+	u8         reserved_at_800[0x60];
+
+	u8         wq_umem_valid[0x1];
+	u8         reserved_at_861[0x1f];
 
 	u8         pas[0][0x40];
 };
@@ -6954,7 +6990,8 @@ struct mlx5_ifc_create_mkey_in_bits {
 	u8         reserved_at_40[0x20];
 
 	u8         pg_access[0x1];
-	u8         reserved_at_61[0x1f];
+	u8         mkey_umem_valid[0x1];
+	u8         reserved_at_62[0x1e];
 
 	struct mlx5_ifc_mkc_bits memory_key_mkey_entry;
 
@@ -6980,7 +7017,7 @@ struct mlx5_ifc_create_flow_table_out_bits {
 };
 
 struct mlx5_ifc_flow_table_context_bits {
-	u8         encap_en[0x1];
+	u8         reformat_en[0x1];
 	u8         decap_en[0x1];
 	u8         reserved_at_2[0x2];
 	u8         table_miss_action[0x4];
@@ -7122,7 +7159,7 @@ struct mlx5_ifc_create_dct_out_bits {
 
 struct mlx5_ifc_create_dct_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -7148,7 +7185,7 @@ struct mlx5_ifc_create_cq_out_bits {
 
 struct mlx5_ifc_create_cq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -7157,7 +7194,10 @@ struct mlx5_ifc_create_cq_in_bits {
 
 	struct mlx5_ifc_cqc_bits cq_context;
 
-	u8         reserved_at_280[0x600];
+	u8         reserved_at_280[0x60];
+
+	u8         cq_umem_valid[0x1];
+	u8         reserved_at_2e1[0x59f];
 
 	u8         pas[0][0x40];
 };
@@ -7205,7 +7245,7 @@ struct mlx5_ifc_attach_to_mcg_out_bits {
 
 struct mlx5_ifc_attach_to_mcg_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -7256,7 +7296,7 @@ enum {
 
 struct mlx5_ifc_arm_xrc_srq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -7284,7 +7324,7 @@ enum {
 
 struct mlx5_ifc_arm_rq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -7332,7 +7372,7 @@ struct mlx5_ifc_alloc_xrcd_out_bits {
 
 struct mlx5_ifc_alloc_xrcd_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -7420,7 +7460,7 @@ struct mlx5_ifc_alloc_pd_out_bits {
 
 struct mlx5_ifc_alloc_pd_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -7788,20 +7828,34 @@ struct mlx5_ifc_pplr_reg_bits {
 
 struct mlx5_ifc_pplm_reg_bits {
 	u8         reserved_at_0[0x8];
-	u8         local_port[0x8];
-	u8         reserved_at_10[0x10];
+	u8	   local_port[0x8];
+	u8	   reserved_at_10[0x10];
 
-	u8         reserved_at_20[0x20];
+	u8	   reserved_at_20[0x20];
 
-	u8         port_profile_mode[0x8];
-	u8         static_port_profile[0x8];
-	u8         active_port_profile[0x8];
-	u8         reserved_at_58[0x8];
+	u8	   port_profile_mode[0x8];
+	u8	   static_port_profile[0x8];
+	u8	   active_port_profile[0x8];
+	u8	   reserved_at_58[0x8];
 
-	u8         retransmission_active[0x8];
-	u8         fec_mode_active[0x18];
+	u8	   retransmission_active[0x8];
+	u8	   fec_mode_active[0x18];
 
-	u8         reserved_at_80[0x20];
+	u8	   rs_fec_correction_bypass_cap[0x4];
+	u8	   reserved_at_84[0x8];
+	u8	   fec_override_cap_56g[0x4];
+	u8	   fec_override_cap_100g[0x4];
+	u8	   fec_override_cap_50g[0x4];
+	u8	   fec_override_cap_25g[0x4];
+	u8	   fec_override_cap_10g_40g[0x4];
+
+	u8	   rs_fec_correction_bypass_admin[0x4];
+	u8	   reserved_at_a4[0x8];
+	u8	   fec_override_admin_56g[0x4];
+	u8	   fec_override_admin_100g[0x4];
+	u8	   fec_override_admin_50g[0x4];
+	u8	   fec_override_admin_25g[0x4];
+	u8	   fec_override_admin_10g_40g[0x4];
 };
 
 struct mlx5_ifc_ppcnt_reg_bits {
@@ -8086,7 +8140,8 @@ struct mlx5_ifc_pcam_enhanced_features_bits {
 	u8         rx_icrc_encapsulated_counter[0x1];
 	u8	   reserved_at_6e[0x8];
 	u8         pfcc_mask[0x1];
-	u8         reserved_at_77[0x4];
+	u8         reserved_at_77[0x3];
+	u8         per_lane_error_counters[0x1];
 	u8         rx_buffer_fullness_counters[0x1];
 	u8         ptys_connector_type[0x1];
 	u8         reserved_at_7d[0x1];
@@ -8097,7 +8152,10 @@ struct mlx5_ifc_pcam_enhanced_features_bits {
 struct mlx5_ifc_pcam_regs_5000_to_507f_bits {
 	u8         port_access_reg_cap_mask_127_to_96[0x20];
 	u8         port_access_reg_cap_mask_95_to_64[0x20];
-	u8         port_access_reg_cap_mask_63_to_32[0x20];
+
+	u8         port_access_reg_cap_mask_63_to_36[0x1c];
+	u8         pplm[0x1];
+	u8         port_access_reg_cap_mask_34_to_32[0x3];
 
 	u8         port_access_reg_cap_mask_31_to_13[0x13];
 	u8         pbmc[0x1];
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 4778d41085d4..fbe322c966bc 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -471,6 +471,7 @@ struct mlx5_core_qp {
 	int			qpn;
 	struct mlx5_rsc_debug	*dbg;
 	int			pid;
+	u16			uid;
 };
 
 struct mlx5_core_dct {
diff --git a/include/linux/mlx5/srq.h b/include/linux/mlx5/srq.h
index 24ff23e27c8a..1b1f3c20c6a3 100644
--- a/include/linux/mlx5/srq.h
+++ b/include/linux/mlx5/srq.h
@@ -61,6 +61,7 @@ struct mlx5_srq_attr {
 	u32 tm_next_tag;
 	u32 tm_hw_phase_cnt;
 	u32 tm_sw_phase_cnt;
+	u16 uid;
 };
 
 struct mlx5_core_dev;
diff --git a/include/linux/module.h b/include/linux/module.h
index f807f15bebbe..e19ae08c7fb8 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -20,6 +20,7 @@
 #include <linux/export.h>
 #include <linux/rbtree_latch.h>
 #include <linux/error-injection.h>
+#include <linux/tracepoint-defs.h>
 
 #include <linux/percpu.h>
 #include <asm/module.h>
@@ -430,7 +431,7 @@ struct module {
 
 #ifdef CONFIG_TRACEPOINTS
 	unsigned int num_tracepoints;
-	struct tracepoint * const *tracepoints_ptrs;
+	tracepoint_ptr_t *tracepoints_ptrs;
 #endif
 #ifdef HAVE_JUMP_LABEL
 	struct jump_entry *jump_entries;
diff --git a/include/linux/netfilter/nfnetlink_osf.h b/include/linux/netfilter/nfnetlink_osf.h
index ecf7dab81e9e..c6000046c966 100644
--- a/include/linux/netfilter/nfnetlink_osf.h
+++ b/include/linux/netfilter/nfnetlink_osf.h
@@ -27,6 +27,7 @@ bool nf_osf_match(const struct sk_buff *skb, u_int8_t family,
 		  const struct list_head *nf_osf_fingers);
 
 const char *nf_osf_find(const struct sk_buff *skb,
-                        const struct list_head *nf_osf_fingers);
+			const struct list_head *nf_osf_fingers,
+			const int ttl_check);
 
 #endif /* _NFOSF_H */
diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index 3ef82d3a78db..676f1ff161a9 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -31,8 +31,6 @@ struct netpoll {
 	bool ipv6;
 	u16 local_port, remote_port;
 	u8 remote_mac[ETH_ALEN];
-
-	struct work_struct cleanup_work;
 };
 
 struct netpoll_info {
@@ -63,7 +61,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt);
 int __netpoll_setup(struct netpoll *np, struct net_device *ndev);
 int netpoll_setup(struct netpoll *np);
 void __netpoll_cleanup(struct netpoll *np);
-void __netpoll_free_async(struct netpoll *np);
+void __netpoll_free(struct netpoll *np);
 void netpoll_cleanup(struct netpoll *np);
 void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
 			     struct net_device *dev);
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index 10f92e1d8e7b..bf309ff6f244 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -99,6 +99,7 @@ struct arm_pmu {
 	void		(*stop)(struct arm_pmu *);
 	void		(*reset)(void *);
 	int		(*map_event)(struct perf_event *event);
+	int		(*filter_match)(struct perf_event *event);
 	int		num_events;
 	bool		secure_access; /* 32-bit ARM only */
 #define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index dee3c9c744f7..a47321a0d572 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -667,15 +667,35 @@ enum qed_link_mode_bits {
 	QED_LM_Autoneg_BIT = BIT(1),
 	QED_LM_Asym_Pause_BIT = BIT(2),
 	QED_LM_Pause_BIT = BIT(3),
-	QED_LM_1000baseT_Half_BIT = BIT(4),
-	QED_LM_1000baseT_Full_BIT = BIT(5),
+	QED_LM_1000baseT_Full_BIT = BIT(4),
+	QED_LM_10000baseT_Full_BIT = BIT(5),
 	QED_LM_10000baseKR_Full_BIT = BIT(6),
 	QED_LM_20000baseKR2_Full_BIT = BIT(7),
 	QED_LM_25000baseKR_Full_BIT = BIT(8),
 	QED_LM_40000baseLR4_Full_BIT = BIT(9),
 	QED_LM_50000baseKR2_Full_BIT = BIT(10),
 	QED_LM_100000baseKR4_Full_BIT = BIT(11),
-	QED_LM_COUNT = 11
+	QED_LM_2500baseX_Full_BIT = BIT(12),
+	QED_LM_Backplane_BIT = BIT(13),
+	QED_LM_1000baseKX_Full_BIT = BIT(14),
+	QED_LM_10000baseKX4_Full_BIT = BIT(15),
+	QED_LM_10000baseR_FEC_BIT = BIT(16),
+	QED_LM_40000baseKR4_Full_BIT = BIT(17),
+	QED_LM_40000baseCR4_Full_BIT = BIT(18),
+	QED_LM_40000baseSR4_Full_BIT = BIT(19),
+	QED_LM_25000baseCR_Full_BIT = BIT(20),
+	QED_LM_25000baseSR_Full_BIT = BIT(21),
+	QED_LM_50000baseCR2_Full_BIT = BIT(22),
+	QED_LM_100000baseSR4_Full_BIT = BIT(23),
+	QED_LM_100000baseCR4_Full_BIT = BIT(24),
+	QED_LM_100000baseLR4_ER4_Full_BIT = BIT(25),
+	QED_LM_50000baseSR2_Full_BIT = BIT(26),
+	QED_LM_1000baseX_Full_BIT = BIT(27),
+	QED_LM_10000baseCR_Full_BIT = BIT(28),
+	QED_LM_10000baseSR_Full_BIT = BIT(29),
+	QED_LM_10000baseLR_Full_BIT = BIT(30),
+	QED_LM_10000baseLRM_Full_BIT = BIT(31),
+	QED_LM_COUNT = 32
 };
 
 struct qed_link_params {
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 119d092c6b13..0ba687454267 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3505,13 +3505,19 @@ static inline bool __skb_metadata_differs(const struct sk_buff *skb_a,
 #define __it(x, op) (x -= sizeof(u##op))
 #define __it_diff(a, b, op) (*(u##op *)__it(a, op)) ^ (*(u##op *)__it(b, op))
 	case 32: diffs |= __it_diff(a, b, 64);
+		 /* fall through */
 	case 24: diffs |= __it_diff(a, b, 64);
+		 /* fall through */
 	case 16: diffs |= __it_diff(a, b, 64);
+		 /* fall through */
 	case  8: diffs |= __it_diff(a, b, 64);
 		break;
 	case 28: diffs |= __it_diff(a, b, 64);
+		 /* fall through */
 	case 20: diffs |= __it_diff(a, b, 64);
+		 /* fall through */
 	case 12: diffs |= __it_diff(a, b, 64);
+		 /* fall through */
 	case  4: diffs |= __it_diff(a, b, 32);
 		break;
 	}
diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h
index 22c5a46e9693..49ba9cde7e4b 100644
--- a/include/linux/tracepoint-defs.h
+++ b/include/linux/tracepoint-defs.h
@@ -35,6 +35,12 @@ struct tracepoint {
 	struct tracepoint_func __rcu *funcs;
 };
 
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+typedef const int tracepoint_ptr_t;
+#else
+typedef struct tracepoint * const tracepoint_ptr_t;
+#endif
+
 struct bpf_raw_event_map {
 	struct tracepoint	*tp;
 	void			*bpf_func;
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 041f7e56a289..538ba1a58f5b 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -99,6 +99,29 @@ extern void syscall_unregfunc(void);
 #define TRACE_DEFINE_ENUM(x)
 #define TRACE_DEFINE_SIZEOF(x)
 
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
+{
+	return offset_to_ptr(p);
+}
+
+#define __TRACEPOINT_ENTRY(name)					\
+	asm("	.section \"__tracepoints_ptrs\", \"a\"		\n"	\
+	    "	.balign 4					\n"	\
+	    "	.long 	__tracepoint_" #name " - .		\n"	\
+	    "	.previous					\n")
+#else
+static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
+{
+	return *p;
+}
+
+#define __TRACEPOINT_ENTRY(name)					 \
+	static tracepoint_ptr_t __tracepoint_ptr_##name __used		 \
+	__attribute__((section("__tracepoints_ptrs"))) =		 \
+		&__tracepoint_##name
+#endif
+
 #endif /* _LINUX_TRACEPOINT_H */
 
 /*
@@ -253,19 +276,6 @@ extern void syscall_unregfunc(void);
 		return static_key_false(&__tracepoint_##name.key);	\
 	}
 
-#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
-#define __TRACEPOINT_ENTRY(name)					\
-	asm("	.section \"__tracepoints_ptrs\", \"a\"		\n"	\
-	    "	.balign 4					\n"	\
-	    "	.long 	__tracepoint_" #name " - .		\n"	\
-	    "	.previous					\n")
-#else
-#define __TRACEPOINT_ENTRY(name)					 \
-	static struct tracepoint * const __tracepoint_ptr_##name __used	 \
-	__attribute__((section("__tracepoints_ptrs"))) =		 \
-		&__tracepoint_##name
-#endif
-
 /*
  * We have no guarantee that gcc and the linker won't up-align the tracepoint
  * structures, so we create an array of pointers that will be used for iteration
diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 3555440e14fc..093aedebdf0c 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -277,12 +277,19 @@ struct l2cap_conn_rsp {
 #define L2CAP_CR_SEC_BLOCK	0x0003
 #define L2CAP_CR_NO_MEM		0x0004
 #define L2CAP_CR_BAD_AMP	0x0005
-#define L2CAP_CR_AUTHENTICATION	0x0005
-#define L2CAP_CR_AUTHORIZATION	0x0006
-#define L2CAP_CR_BAD_KEY_SIZE	0x0007
-#define L2CAP_CR_ENCRYPTION	0x0008
-#define L2CAP_CR_INVALID_SCID	0x0009
-#define L2CAP_CR_SCID_IN_USE	0x000A
+#define L2CAP_CR_INVALID_SCID	0x0006
+#define L2CAP_CR_SCID_IN_USE	0x0007
+
+/* credit based connect results */
+#define L2CAP_CR_LE_SUCCESS		0x0000
+#define L2CAP_CR_LE_BAD_PSM		0x0002
+#define L2CAP_CR_LE_NO_MEM		0x0004
+#define L2CAP_CR_LE_AUTHENTICATION	0x0005
+#define L2CAP_CR_LE_AUTHORIZATION	0x0006
+#define L2CAP_CR_LE_BAD_KEY_SIZE	0x0007
+#define L2CAP_CR_LE_ENCRYPTION		0x0008
+#define L2CAP_CR_LE_INVALID_SCID	0x0009
+#define L2CAP_CR_LE_SCID_IN_USE		0X000A
 
 /* connect/create channel status */
 #define L2CAP_CS_NO_INFO	0x0000
diff --git a/include/net/dst.h b/include/net/dst.h
index 7f735e76ca73..6cf0870414c7 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -527,4 +527,14 @@ static inline void skb_dst_update_pmtu(struct sk_buff *skb, u32 mtu)
 		dst->ops->update_pmtu(dst, NULL, skb, mtu);
 }
 
+static inline void skb_tunnel_check_pmtu(struct sk_buff *skb,
+					 struct dst_entry *encap_dst,
+					 int headroom)
+{
+	u32 encap_mtu = dst_mtu(encap_dst);
+
+	if (skb->len > encap_mtu - headroom)
+		skb_dst_update_pmtu(skb, encap_mtu - headroom);
+}
+
 #endif /* _NET_DST_H */
diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h
index 482a1b705362..c8e2bebd8d93 100644
--- a/include/net/inet_ecn.h
+++ b/include/net/inet_ecn.h
@@ -183,8 +183,7 @@ static inline int INET_ECN_set_ce(struct sk_buff *skb)
  *          1 if something is broken and should be logged (!!! above)
  *          2 if packet should be dropped
  */
-static inline int INET_ECN_decapsulate(struct sk_buff *skb,
-				       __u8 outer, __u8 inner)
+static inline int __INET_ECN_decapsulate(__u8 outer, __u8 inner, bool *set_ce)
 {
 	if (INET_ECN_is_not_ect(inner)) {
 		switch (outer & INET_ECN_MASK) {
@@ -198,10 +197,21 @@ static inline int INET_ECN_decapsulate(struct sk_buff *skb,
 		}
 	}
 
-	if (INET_ECN_is_ce(outer))
+	*set_ce = INET_ECN_is_ce(outer);
+	return 0;
+}
+
+static inline int INET_ECN_decapsulate(struct sk_buff *skb,
+				       __u8 outer, __u8 inner)
+{
+	bool set_ce = false;
+	int rc;
+
+	rc = __INET_ECN_decapsulate(outer, inner, &set_ce);
+	if (!rc && set_ce)
 		INET_ECN_set_ce(skb);
 
-	return 0;
+	return rc;
 }
 
 static inline int IP_ECN_decapsulate(const struct iphdr *oiph,
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index caabfd84a098..84097010237c 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -159,6 +159,10 @@ struct fib6_info {
 	struct rt6_info * __percpu	*rt6i_pcpu;
 	struct rt6_exception_bucket __rcu *rt6i_exception_bucket;
 
+#ifdef CONFIG_IPV6_ROUTER_PREF
+	unsigned long			last_probe;
+#endif
+
 	u32				fib6_metric;
 	u8				fib6_protocol;
 	u8				fib6_type;
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index 0e355f4a3d76..77e2761d4f2f 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -99,7 +99,7 @@ int nf_flow_table_iterate(struct nf_flowtable *flow_table,
 			  void (*iter)(struct flow_offload *flow, void *data),
 			  void *data);
 
-void nf_flow_table_cleanup(struct net *net, struct net_device *dev);
+void nf_flow_table_cleanup(struct net_device *dev);
 
 int nf_flow_table_init(struct nf_flowtable *flow_table);
 void nf_flow_table_free(struct nf_flowtable *flow_table);
diff --git a/include/net/netfilter/nfnetlink_log.h b/include/net/netfilter/nfnetlink_log.h
deleted file mode 100644
index ea32a7d3cf1b..000000000000
--- a/include/net/netfilter/nfnetlink_log.h
+++ /dev/null
@@ -1 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index 86f034b524d4..8dadc74c22e7 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -148,11 +148,6 @@ SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE,	enum sctp_event_primitive, primitive)
 #define sctp_chunk_is_data(a) (a->chunk_hdr->type == SCTP_CID_DATA || \
 			       a->chunk_hdr->type == SCTP_CID_I_DATA)
 
-/* Calculate the actual data size in a data chunk */
-#define SCTP_DATA_SNDSIZE(c) ((int)((unsigned long)(c->chunk_end) - \
-				    (unsigned long)(c->chunk_hdr) - \
-				    sctp_datachk_len(&c->asoc->stream)))
-
 /* Internal error codes */
 enum sctp_ierror {
 	SCTP_IERROR_NO_ERROR	        = 0,
diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index 5ef1bad81ef5..9e3d32746430 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -347,7 +347,7 @@ static inline __u16 sctp_data_size(struct sctp_chunk *chunk)
 	__u16 size;
 
 	size = ntohs(chunk->chunk_hdr->length);
-	size -= sctp_datahdr_len(&chunk->asoc->stream);
+	size -= sctp_datachk_len(&chunk->asoc->stream);
 
 	return size;
 }
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 28a7c8e44636..a11f93790476 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -876,6 +876,8 @@ struct sctp_transport {
 	unsigned long sackdelay;
 	__u32 sackfreq;
 
+	atomic_t mtu_info;
+
 	/* When was the last time that we heard from this transport? We use
 	 * this to pick new active and retran paths.
 	 */
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index d574ce63bf22..881ecb1555bf 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -145,6 +145,10 @@ enum switchdev_notifier_type {
 	SWITCHDEV_FDB_ADD_TO_DEVICE,
 	SWITCHDEV_FDB_DEL_TO_DEVICE,
 	SWITCHDEV_FDB_OFFLOADED,
+
+	SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE,
+	SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE,
+	SWITCHDEV_VXLAN_FDB_OFFLOADED,
 };
 
 struct switchdev_notifier_info {
@@ -155,7 +159,8 @@ struct switchdev_notifier_fdb_info {
 	struct switchdev_notifier_info info; /* must be first */
 	const unsigned char *addr;
 	u16 vid;
-	bool added_by_user;
+	u8 added_by_user:1,
+	   offloaded:1;
 };
 
 static inline struct net_device *
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 7ef15179f263..03431c148e16 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -5,6 +5,8 @@
 #include <linux/if_vlan.h>
 #include <net/udp_tunnel.h>
 #include <net/dst_metadata.h>
+#include <net/rtnetlink.h>
+#include <net/switchdev.h>
 
 /* VXLAN protocol (RFC 7348) header:
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
@@ -190,6 +192,7 @@ union vxlan_addr {
 struct vxlan_rdst {
 	union vxlan_addr	 remote_ip;
 	__be16			 remote_port;
+	u8			 offloaded:1;
 	__be32			 remote_vni;
 	u32			 remote_ifindex;
 	struct list_head	 list;
@@ -370,4 +373,65 @@ static inline unsigned short vxlan_get_sk_family(struct vxlan_sock *vs)
 	return vs->sock->sk->sk_family;
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
+
+static inline bool vxlan_addr_any(const union vxlan_addr *ipa)
+{
+	if (ipa->sa.sa_family == AF_INET6)
+		return ipv6_addr_any(&ipa->sin6.sin6_addr);
+	else
+		return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY);
+}
+
+static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa)
+{
+	if (ipa->sa.sa_family == AF_INET6)
+		return ipv6_addr_is_multicast(&ipa->sin6.sin6_addr);
+	else
+		return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr));
+}
+
+#else /* !IS_ENABLED(CONFIG_IPV6) */
+
+static inline bool vxlan_addr_any(const union vxlan_addr *ipa)
+{
+	return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY);
+}
+
+static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa)
+{
+	return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr));
+}
+
+#endif /* IS_ENABLED(CONFIG_IPV6) */
+
+static inline bool netif_is_vxlan(const struct net_device *dev)
+{
+	return dev->rtnl_link_ops &&
+	       !strcmp(dev->rtnl_link_ops->kind, "vxlan");
+}
+
+struct switchdev_notifier_vxlan_fdb_info {
+	struct switchdev_notifier_info info; /* must be first */
+	union vxlan_addr remote_ip;
+	__be16 remote_port;
+	__be32 remote_vni;
+	u32 remote_ifindex;
+	u8 eth_addr[ETH_ALEN];
+	__be32 vni;
+	bool offloaded;
+};
+
+#if IS_ENABLED(CONFIG_VXLAN)
+int vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni,
+		      struct switchdev_notifier_vxlan_fdb_info *fdb_info);
+#else
+static inline int
+vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni,
+		  struct switchdev_notifier_vxlan_fdb_info *fdb_info)
+{
+	return -ENOENT;
+}
+#endif
+
 #endif
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 5444e76870bb..579974b0bf0d 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -1511,9 +1511,16 @@ enum nft_flowtable_hook_attributes {
 };
 #define NFTA_FLOWTABLE_HOOK_MAX	(__NFTA_FLOWTABLE_HOOK_MAX - 1)
 
+/**
+ * enum nft_osf_attributes - nftables osf expression netlink attributes
+ *
+ * @NFTA_OSF_DREG: destination register (NLA_U32: nft_registers)
+ * @NFTA_OSF_TTL: Value of the TTL osf option (NLA_U8)
+ */
 enum nft_osf_attributes {
 	NFTA_OSF_UNSPEC,
 	NFTA_OSF_DREG,
+	NFTA_OSF_TTL,
 	__NFTA_OSF_MAX,
 };
 #define NFTA_OSF_MAX (__NFTA_OSF_MAX - 1)
diff --git a/include/uapi/linux/netfilter/xt_quota.h b/include/uapi/linux/netfilter/xt_quota.h
index d72fd52adbba..f3ba5d9e58b6 100644
--- a/include/uapi/linux/netfilter/xt_quota.h
+++ b/include/uapi/linux/netfilter/xt_quota.h
@@ -15,11 +15,9 @@ struct xt_quota_info {
 	__u32 flags;
 	__u32 pad;
 	__aligned_u64 quota;
-#ifdef __KERNEL__
-	atomic64_t counter;
-#else
-	__aligned_u64 remain;
-#endif
+
+	/* Used internally by the kernel */
+	struct xt_quota_priv	*master;
 };
 
 #endif /* _XT_QUOTA_H */
diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index b479db5c71d9..34dd3d497f2c 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -301,6 +301,7 @@ enum sctp_sinfo_flags {
 	SCTP_SACK_IMMEDIATELY	= (1 << 3), /* SACK should be sent without delay. */
 	/* 2 bits here have been used by SCTP_PR_SCTP_MASK */
 	SCTP_SENDALL		= (1 << 6),
+	SCTP_PR_SCTP_ALL	= (1 << 7),
 	SCTP_NOTIFICATION	= MSG_NOTIFICATION, /* Next message is not user msg but notification. */
 	SCTP_EOF		= MSG_FIN,  /* Initiate graceful shutdown process. */
 };
diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c
index ef0b7b6ef8a5..686d244e798d 100644
--- a/kernel/bpf/xskmap.c
+++ b/kernel/bpf/xskmap.c
@@ -192,11 +192,8 @@ static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
 	sock_hold(sock->sk);
 
 	old_xs = xchg(&m->xsk_map[i], xs);
-	if (old_xs) {
-		/* Make sure we've flushed everything. */
-		synchronize_net();
+	if (old_xs)
 		sock_put((struct sock *)old_xs);
-	}
 
 	sockfd_put(sock);
 	return 0;
@@ -212,11 +209,8 @@ static int xsk_map_delete_elem(struct bpf_map *map, void *key)
 		return -EINVAL;
 
 	old_xs = xchg(&m->xsk_map[k], NULL);
-	if (old_xs) {
-		/* Make sure we've flushed everything. */
-		synchronize_net();
+	if (old_xs)
 		sock_put((struct sock *)old_xs);
-	}
 
 	return 0;
 }
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 7fc4a371bdd2..908c9cdae2f0 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4001,7 +4001,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 	 * put back on, and if we advance min_vruntime, we'll be placed back
 	 * further than we started -- ie. we'll be penalized.
 	 */
-	if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) == DEQUEUE_SAVE)
+	if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) != DEQUEUE_SAVE)
 		update_min_vruntime(cfs_rq);
 }
 
@@ -4476,9 +4476,13 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq)
 
 	/*
 	 * Add to the _head_ of the list, so that an already-started
-	 * distribute_cfs_runtime will not see us
+	 * distribute_cfs_runtime will not see us. If disribute_cfs_runtime is
+	 * not running add to the tail so that later runqueues don't get starved.
 	 */
-	list_add_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq);
+	if (cfs_b->distribute_running)
+		list_add_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq);
+	else
+		list_add_tail_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq);
 
 	/*
 	 * If we're the first throttled task, make sure the bandwidth
@@ -4622,14 +4626,16 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
 	 * in us over-using our runtime if it is all used during this loop, but
 	 * only by limited amounts in that extreme case.
 	 */
-	while (throttled && cfs_b->runtime > 0) {
+	while (throttled && cfs_b->runtime > 0 && !cfs_b->distribute_running) {
 		runtime = cfs_b->runtime;
+		cfs_b->distribute_running = 1;
 		raw_spin_unlock(&cfs_b->lock);
 		/* we can't nest cfs_b->lock while distributing bandwidth */
 		runtime = distribute_cfs_runtime(cfs_b, runtime,
 						 runtime_expires);
 		raw_spin_lock(&cfs_b->lock);
 
+		cfs_b->distribute_running = 0;
 		throttled = !list_empty(&cfs_b->throttled_cfs_rq);
 
 		cfs_b->runtime -= min(runtime, cfs_b->runtime);
@@ -4740,6 +4746,11 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
 
 	/* confirm we're still not at a refresh boundary */
 	raw_spin_lock(&cfs_b->lock);
+	if (cfs_b->distribute_running) {
+		raw_spin_unlock(&cfs_b->lock);
+		return;
+	}
+
 	if (runtime_refresh_within(cfs_b, min_bandwidth_expiration)) {
 		raw_spin_unlock(&cfs_b->lock);
 		return;
@@ -4749,6 +4760,9 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
 		runtime = cfs_b->runtime;
 
 	expires = cfs_b->runtime_expires;
+	if (runtime)
+		cfs_b->distribute_running = 1;
+
 	raw_spin_unlock(&cfs_b->lock);
 
 	if (!runtime)
@@ -4759,6 +4773,7 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
 	raw_spin_lock(&cfs_b->lock);
 	if (expires == cfs_b->runtime_expires)
 		cfs_b->runtime -= min(runtime, cfs_b->runtime);
+	cfs_b->distribute_running = 0;
 	raw_spin_unlock(&cfs_b->lock);
 }
 
@@ -4867,6 +4882,7 @@ void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
 	cfs_b->period_timer.function = sched_cfs_period_timer;
 	hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	cfs_b->slack_timer.function = sched_cfs_slack_timer;
+	cfs_b->distribute_running = 0;
 }
 
 static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 455fa330de04..9683f458aec7 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -346,6 +346,8 @@ struct cfs_bandwidth {
 	int			nr_periods;
 	int			nr_throttled;
 	u64			throttled_time;
+
+	bool                    distribute_running;
 #endif
 };
 
diff --git a/kernel/trace/preemptirq_delay_test.c b/kernel/trace/preemptirq_delay_test.c
index f704390db9fc..d8765c952fab 100644
--- a/kernel/trace/preemptirq_delay_test.c
+++ b/kernel/trace/preemptirq_delay_test.c
@@ -5,12 +5,12 @@
  * Copyright (C) 2018 Joel Fernandes (Google) <joel@joelfernandes.org>
  */
 
+#include <linux/trace_clock.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/kernel.h>
 #include <linux/kthread.h>
-#include <linux/ktime.h>
 #include <linux/module.h>
 #include <linux/printk.h>
 #include <linux/string.h>
@@ -25,13 +25,13 @@ MODULE_PARM_DESC(test_mode, "Mode of the test such as preempt or irq (default ir
 
 static void busy_wait(ulong time)
 {
-	ktime_t start, end;
-	start = ktime_get();
+	u64 start, end;
+	start = trace_clock_local();
 	do {
-		end = ktime_get();
+		end = trace_clock_local();
 		if (kthread_should_stop())
 			break;
-	} while (ktime_to_ns(ktime_sub(end, start)) < (time * 1000));
+	} while ((end - start) < (time * 1000));
 }
 
 static int preemptirq_delay_run(void *data)
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 85f6b01431c7..d239004aaf29 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -738,16 +738,30 @@ static void free_synth_field(struct synth_field *field)
 	kfree(field);
 }
 
-static struct synth_field *parse_synth_field(char *field_type,
-					     char *field_name)
+static struct synth_field *parse_synth_field(int argc, char **argv,
+					     int *consumed)
 {
 	struct synth_field *field;
+	const char *prefix = NULL;
+	char *field_type = argv[0], *field_name;
 	int len, ret = 0;
 	char *array;
 
 	if (field_type[0] == ';')
 		field_type++;
 
+	if (!strcmp(field_type, "unsigned")) {
+		if (argc < 3)
+			return ERR_PTR(-EINVAL);
+		prefix = "unsigned ";
+		field_type = argv[1];
+		field_name = argv[2];
+		*consumed = 3;
+	} else {
+		field_name = argv[1];
+		*consumed = 2;
+	}
+
 	len = strlen(field_name);
 	if (field_name[len - 1] == ';')
 		field_name[len - 1] = '\0';
@@ -760,11 +774,15 @@ static struct synth_field *parse_synth_field(char *field_type,
 	array = strchr(field_name, '[');
 	if (array)
 		len += strlen(array);
+	if (prefix)
+		len += strlen(prefix);
 	field->type = kzalloc(len, GFP_KERNEL);
 	if (!field->type) {
 		ret = -ENOMEM;
 		goto free;
 	}
+	if (prefix)
+		strcat(field->type, prefix);
 	strcat(field->type, field_type);
 	if (array) {
 		strcat(field->type, array);
@@ -1009,7 +1027,7 @@ static int create_synth_event(int argc, char **argv)
 	struct synth_field *field, *fields[SYNTH_FIELDS_MAX];
 	struct synth_event *event = NULL;
 	bool delete_event = false;
-	int i, n_fields = 0, ret = 0;
+	int i, consumed = 0, n_fields = 0, ret = 0;
 	char *name;
 
 	mutex_lock(&synth_event_mutex);
@@ -1061,16 +1079,16 @@ static int create_synth_event(int argc, char **argv)
 			goto err;
 		}
 
-		field = parse_synth_field(argv[i], argv[i + 1]);
+		field = parse_synth_field(argc - i, &argv[i], &consumed);
 		if (IS_ERR(field)) {
 			ret = PTR_ERR(field);
 			goto err;
 		}
-		fields[n_fields] = field;
-		i++; n_fields++;
+		fields[n_fields++] = field;
+		i += consumed - 1;
 	}
 
-	if (i < argc) {
+	if (i < argc && strcmp(argv[i], ";") != 0) {
 		ret = -EINVAL;
 		goto err;
 	}
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index bf2c06ef9afc..a3be42304485 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -28,8 +28,8 @@
 #include <linux/sched/task.h>
 #include <linux/static_key.h>
 
-extern struct tracepoint * const __start___tracepoints_ptrs[];
-extern struct tracepoint * const __stop___tracepoints_ptrs[];
+extern tracepoint_ptr_t __start___tracepoints_ptrs[];
+extern tracepoint_ptr_t __stop___tracepoints_ptrs[];
 
 DEFINE_SRCU(tracepoint_srcu);
 EXPORT_SYMBOL_GPL(tracepoint_srcu);
@@ -371,25 +371,17 @@ int tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data)
 }
 EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
 
-static void for_each_tracepoint_range(struct tracepoint * const *begin,
-		struct tracepoint * const *end,
+static void for_each_tracepoint_range(
+		tracepoint_ptr_t *begin, tracepoint_ptr_t *end,
 		void (*fct)(struct tracepoint *tp, void *priv),
 		void *priv)
 {
+	tracepoint_ptr_t *iter;
+
 	if (!begin)
 		return;
-
-	if (IS_ENABLED(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS)) {
-		const int *iter;
-
-		for (iter = (const int *)begin; iter < (const int *)end; iter++)
-			fct(offset_to_ptr(iter), priv);
-	} else {
-		struct tracepoint * const *iter;
-
-		for (iter = begin; iter < end; iter++)
-			fct(*iter, priv);
-	}
+	for (iter = begin; iter < end; iter++)
+		fct(tracepoint_ptr_deref(iter), priv);
 }
 
 #ifdef CONFIG_MODULES
diff --git a/lib/test_ida.c b/lib/test_ida.c
index 2d1637d8136b..b06880625961 100644
--- a/lib/test_ida.c
+++ b/lib/test_ida.c
@@ -150,10 +150,10 @@ static void ida_check_conv(struct ida *ida)
 	IDA_BUG_ON(ida, !ida_is_empty(ida));
 }
 
+static DEFINE_IDA(ida);
+
 static int ida_checks(void)
 {
-	DEFINE_IDA(ida);
-
 	IDA_BUG_ON(&ida, !ida_is_empty(&ida));
 	ida_check_alloc(&ida);
 	ida_check_destroy(&ida);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 00704060b7f7..deed97fba979 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1780,7 +1780,7 @@ static pmd_t move_soft_dirty_pmd(pmd_t pmd)
 
 bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
 		  unsigned long new_addr, unsigned long old_end,
-		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
+		  pmd_t *old_pmd, pmd_t *new_pmd)
 {
 	spinlock_t *old_ptl, *new_ptl;
 	pmd_t pmd;
@@ -1811,7 +1811,7 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
 		if (new_ptl != old_ptl)
 			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
 		pmd = pmdp_huge_get_and_clear(mm, old_addr, old_pmd);
-		if (pmd_present(pmd) && pmd_dirty(pmd))
+		if (pmd_present(pmd))
 			force_flush = true;
 		VM_BUG_ON(!pmd_none(*new_pmd));
 
@@ -1822,12 +1822,10 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
 		}
 		pmd = move_soft_dirty_pmd(pmd);
 		set_pmd_at(mm, new_addr, new_pmd, pmd);
-		if (new_ptl != old_ptl)
-			spin_unlock(new_ptl);
 		if (force_flush)
 			flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE);
-		else
-			*need_flush = true;
+		if (new_ptl != old_ptl)
+			spin_unlock(new_ptl);
 		spin_unlock(old_ptl);
 		return true;
 	}
@@ -2885,9 +2883,6 @@ void set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw,
 	if (!(pvmw->pmd && !pvmw->pte))
 		return;
 
-	mmu_notifier_invalidate_range_start(mm, address,
-			address + HPAGE_PMD_SIZE);
-
 	flush_cache_range(vma, address, address + HPAGE_PMD_SIZE);
 	pmdval = *pvmw->pmd;
 	pmdp_invalidate(vma, address, pvmw->pmd);
@@ -2900,9 +2895,6 @@ void set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw,
 	set_pmd_at(mm, address, pvmw->pmd, pmdswp);
 	page_remove_rmap(page, true);
 	put_page(page);
-
-	mmu_notifier_invalidate_range_end(mm, address,
-			address + HPAGE_PMD_SIZE);
 }
 
 void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
diff --git a/mm/mmap.c b/mm/mmap.c
index 5f2b2b184c60..f7cd9cb966c0 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1410,7 +1410,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
 	if (flags & MAP_FIXED_NOREPLACE) {
 		struct vm_area_struct *vma = find_vma(mm, addr);
 
-		if (vma && vma->vm_start <= addr)
+		if (vma && vma->vm_start < addr + len)
 			return -EEXIST;
 	}
 
diff --git a/mm/mremap.c b/mm/mremap.c
index 5c2e18505f75..a9617e72e6b7 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -115,7 +115,7 @@ static pte_t move_soft_dirty_pte(pte_t pte)
 static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 		unsigned long old_addr, unsigned long old_end,
 		struct vm_area_struct *new_vma, pmd_t *new_pmd,
-		unsigned long new_addr, bool need_rmap_locks, bool *need_flush)
+		unsigned long new_addr, bool need_rmap_locks)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	pte_t *old_pte, *new_pte, pte;
@@ -163,15 +163,17 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 
 		pte = ptep_get_and_clear(mm, old_addr, old_pte);
 		/*
-		 * If we are remapping a dirty PTE, make sure
+		 * If we are remapping a valid PTE, make sure
 		 * to flush TLB before we drop the PTL for the
-		 * old PTE or we may race with page_mkclean().
+		 * PTE.
 		 *
-		 * This check has to be done after we removed the
-		 * old PTE from page tables or another thread may
-		 * dirty it after the check and before the removal.
+		 * NOTE! Both old and new PTL matter: the old one
+		 * for racing with page_mkclean(), the new one to
+		 * make sure the physical page stays valid until
+		 * the TLB entry for the old mapping has been
+		 * flushed.
 		 */
-		if (pte_present(pte) && pte_dirty(pte))
+		if (pte_present(pte))
 			force_flush = true;
 		pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr);
 		pte = move_soft_dirty_pte(pte);
@@ -179,13 +181,11 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 	}
 
 	arch_leave_lazy_mmu_mode();
+	if (force_flush)
+		flush_tlb_range(vma, old_end - len, old_end);
 	if (new_ptl != old_ptl)
 		spin_unlock(new_ptl);
 	pte_unmap(new_pte - 1);
-	if (force_flush)
-		flush_tlb_range(vma, old_end - len, old_end);
-	else
-		*need_flush = true;
 	pte_unmap_unlock(old_pte - 1, old_ptl);
 	if (need_rmap_locks)
 		drop_rmap_locks(vma);
@@ -198,7 +198,6 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
 {
 	unsigned long extent, next, old_end;
 	pmd_t *old_pmd, *new_pmd;
-	bool need_flush = false;
 	unsigned long mmun_start;	/* For mmu_notifiers */
 	unsigned long mmun_end;		/* For mmu_notifiers */
 
@@ -229,8 +228,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
 				if (need_rmap_locks)
 					take_rmap_locks(vma);
 				moved = move_huge_pmd(vma, old_addr, new_addr,
-						    old_end, old_pmd, new_pmd,
-						    &need_flush);
+						    old_end, old_pmd, new_pmd);
 				if (need_rmap_locks)
 					drop_rmap_locks(vma);
 				if (moved)
@@ -246,10 +244,8 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
 		if (extent > next - new_addr)
 			extent = next - new_addr;
 		move_ptes(vma, old_pmd, old_addr, old_addr + extent, new_vma,
-			  new_pmd, new_addr, need_rmap_locks, &need_flush);
+			  new_pmd, new_addr, need_rmap_locks);
 	}
-	if (need_flush)
-		flush_tlb_range(vma, old_end-len, old_addr);
 
 	mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);
 
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 546af0e73ac3..ff720f1ebf73 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -756,8 +756,7 @@ static void vlan_dev_netpoll_cleanup(struct net_device *dev)
 		return;
 
 	vlan->netpoll = NULL;
-
-	__netpoll_free_async(netpoll);
+	__netpoll_free(netpoll);
 }
 #endif /* CONFIG_NET_POLL_CONTROLLER */
 
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index f47f8fad757a..ef9928d7b4fb 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -4937,31 +4937,27 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
 	hci_debugfs_create_conn(conn);
 	hci_conn_add_sysfs(conn);
 
-	if (!status) {
-		/* The remote features procedure is defined for master
-		 * role only. So only in case of an initiated connection
-		 * request the remote features.
-		 *
-		 * If the local controller supports slave-initiated features
-		 * exchange, then requesting the remote features in slave
-		 * role is possible. Otherwise just transition into the
-		 * connected state without requesting the remote features.
-		 */
-		if (conn->out ||
-		    (hdev->le_features[0] & HCI_LE_SLAVE_FEATURES)) {
-			struct hci_cp_le_read_remote_features cp;
+	/* The remote features procedure is defined for master
+	 * role only. So only in case of an initiated connection
+	 * request the remote features.
+	 *
+	 * If the local controller supports slave-initiated features
+	 * exchange, then requesting the remote features in slave
+	 * role is possible. Otherwise just transition into the
+	 * connected state without requesting the remote features.
+	 */
+	if (conn->out ||
+	    (hdev->le_features[0] & HCI_LE_SLAVE_FEATURES)) {
+		struct hci_cp_le_read_remote_features cp;
 
-			cp.handle = __cpu_to_le16(conn->handle);
+		cp.handle = __cpu_to_le16(conn->handle);
 
-			hci_send_cmd(hdev, HCI_OP_LE_READ_REMOTE_FEATURES,
-				     sizeof(cp), &cp);
+		hci_send_cmd(hdev, HCI_OP_LE_READ_REMOTE_FEATURES,
+			     sizeof(cp), &cp);
 
-			hci_conn_hold(conn);
-		} else {
-			conn->state = BT_CONNECTED;
-			hci_connect_cfm(conn, status);
-		}
+		hci_conn_hold(conn);
 	} else {
+		conn->state = BT_CONNECTED;
 		hci_connect_cfm(conn, status);
 	}
 
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 514899f7f0d4..2146e0f3b6f8 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -680,9 +680,9 @@ static void l2cap_chan_le_connect_reject(struct l2cap_chan *chan)
 	u16 result;
 
 	if (test_bit(FLAG_DEFER_SETUP, &chan->flags))
-		result = L2CAP_CR_AUTHORIZATION;
+		result = L2CAP_CR_LE_AUTHORIZATION;
 	else
-		result = L2CAP_CR_BAD_PSM;
+		result = L2CAP_CR_LE_BAD_PSM;
 
 	l2cap_state_change(chan, BT_DISCONN);
 
@@ -3670,7 +3670,7 @@ void __l2cap_le_connect_rsp_defer(struct l2cap_chan *chan)
 	rsp.mtu     = cpu_to_le16(chan->imtu);
 	rsp.mps     = cpu_to_le16(chan->mps);
 	rsp.credits = cpu_to_le16(chan->rx_credits);
-	rsp.result  = cpu_to_le16(L2CAP_CR_SUCCESS);
+	rsp.result  = cpu_to_le16(L2CAP_CR_LE_SUCCESS);
 
 	l2cap_send_cmd(conn, chan->ident, L2CAP_LE_CONN_RSP, sizeof(rsp),
 		       &rsp);
@@ -3816,9 +3816,17 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn,
 
 	result = L2CAP_CR_NO_MEM;
 
+	/* Check for valid dynamic CID range (as per Erratum 3253) */
+	if (scid < L2CAP_CID_DYN_START || scid > L2CAP_CID_DYN_END) {
+		result = L2CAP_CR_INVALID_SCID;
+		goto response;
+	}
+
 	/* Check if we already have channel with that dcid */
-	if (__l2cap_get_chan_by_dcid(conn, scid))
+	if (__l2cap_get_chan_by_dcid(conn, scid)) {
+		result = L2CAP_CR_SCID_IN_USE;
 		goto response;
+	}
 
 	chan = pchan->ops->new_connection(pchan);
 	if (!chan)
@@ -5280,7 +5288,7 @@ static int l2cap_le_connect_rsp(struct l2cap_conn *conn,
 	credits = __le16_to_cpu(rsp->credits);
 	result  = __le16_to_cpu(rsp->result);
 
-	if (result == L2CAP_CR_SUCCESS && (mtu < 23 || mps < 23 ||
+	if (result == L2CAP_CR_LE_SUCCESS && (mtu < 23 || mps < 23 ||
 					   dcid < L2CAP_CID_DYN_START ||
 					   dcid > L2CAP_CID_LE_DYN_END))
 		return -EPROTO;
@@ -5301,7 +5309,7 @@ static int l2cap_le_connect_rsp(struct l2cap_conn *conn,
 	l2cap_chan_lock(chan);
 
 	switch (result) {
-	case L2CAP_CR_SUCCESS:
+	case L2CAP_CR_LE_SUCCESS:
 		if (__l2cap_get_chan_by_dcid(conn, dcid)) {
 			err = -EBADSLT;
 			break;
@@ -5315,8 +5323,8 @@ static int l2cap_le_connect_rsp(struct l2cap_conn *conn,
 		l2cap_chan_ready(chan);
 		break;
 
-	case L2CAP_CR_AUTHENTICATION:
-	case L2CAP_CR_ENCRYPTION:
+	case L2CAP_CR_LE_AUTHENTICATION:
+	case L2CAP_CR_LE_ENCRYPTION:
 		/* If we already have MITM protection we can't do
 		 * anything.
 		 */
@@ -5459,7 +5467,7 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn,
 	pchan = l2cap_global_chan_by_psm(BT_LISTEN, psm, &conn->hcon->src,
 					 &conn->hcon->dst, LE_LINK);
 	if (!pchan) {
-		result = L2CAP_CR_BAD_PSM;
+		result = L2CAP_CR_LE_BAD_PSM;
 		chan = NULL;
 		goto response;
 	}
@@ -5469,28 +5477,28 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn,
 
 	if (!smp_sufficient_security(conn->hcon, pchan->sec_level,
 				     SMP_ALLOW_STK)) {
-		result = L2CAP_CR_AUTHENTICATION;
+		result = L2CAP_CR_LE_AUTHENTICATION;
 		chan = NULL;
 		goto response_unlock;
 	}
 
 	/* Check for valid dynamic CID range */
 	if (scid < L2CAP_CID_DYN_START || scid > L2CAP_CID_LE_DYN_END) {
-		result = L2CAP_CR_INVALID_SCID;
+		result = L2CAP_CR_LE_INVALID_SCID;
 		chan = NULL;
 		goto response_unlock;
 	}
 
 	/* Check if we already have channel with that dcid */
 	if (__l2cap_get_chan_by_dcid(conn, scid)) {
-		result = L2CAP_CR_SCID_IN_USE;
+		result = L2CAP_CR_LE_SCID_IN_USE;
 		chan = NULL;
 		goto response_unlock;
 	}
 
 	chan = pchan->ops->new_connection(pchan);
 	if (!chan) {
-		result = L2CAP_CR_NO_MEM;
+		result = L2CAP_CR_LE_NO_MEM;
 		goto response_unlock;
 	}
 
@@ -5526,7 +5534,7 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn,
 		chan->ops->defer(chan);
 	} else {
 		l2cap_chan_ready(chan);
-		result = L2CAP_CR_SUCCESS;
+		result = L2CAP_CR_LE_SUCCESS;
 	}
 
 response_unlock:
diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c
index b64e1649993b..94e88f510c5b 100644
--- a/net/bpfilter/bpfilter_kern.c
+++ b/net/bpfilter/bpfilter_kern.c
@@ -23,9 +23,11 @@ static void shutdown_umh(struct umh_info *info)
 
 	if (!info->pid)
 		return;
-	tsk = pid_task(find_vpid(info->pid), PIDTYPE_PID);
-	if (tsk)
+	tsk = get_pid_task(find_vpid(info->pid), PIDTYPE_PID);
+	if (tsk) {
 		force_sig(SIGKILL, tsk);
+		put_task_struct(tsk);
+	}
 	fput(info->pipe_to_umh);
 	fput(info->pipe_from_umh);
 	info->pid = 0;
diff --git a/net/bridge/br.c b/net/bridge/br.c
index e411e40333e2..360ad66c21e9 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -151,7 +151,7 @@ static int br_switchdev_event(struct notifier_block *unused,
 			break;
 		}
 		br_fdb_offloaded_set(br, p, fdb_info->addr,
-				     fdb_info->vid);
+				     fdb_info->vid, true);
 		break;
 	case SWITCHDEV_FDB_DEL_TO_BRIDGE:
 		fdb_info = ptr;
@@ -163,7 +163,7 @@ static int br_switchdev_event(struct notifier_block *unused,
 	case SWITCHDEV_FDB_OFFLOADED:
 		fdb_info = ptr;
 		br_fdb_offloaded_set(br, p, fdb_info->addr,
-				     fdb_info->vid);
+				     fdb_info->vid, fdb_info->offloaded);
 		break;
 	}
 
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index e053a4e43758..c6abf927f0c9 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -344,7 +344,7 @@ void br_netpoll_disable(struct net_bridge_port *p)
 
 	p->np = NULL;
 
-	__netpoll_free_async(np);
+	__netpoll_free(np);
 }
 
 #endif
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 74331690a390..e56ba3912a90 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -1152,7 +1152,7 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
 }
 
 void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p,
-			  const unsigned char *addr, u16 vid)
+			  const unsigned char *addr, u16 vid, bool offloaded)
 {
 	struct net_bridge_fdb_entry *fdb;
 
@@ -1160,7 +1160,7 @@ void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p,
 
 	fdb = br_fdb_find(br, addr, vid);
 	if (fdb)
-		fdb->offloaded = 1;
+		fdb->offloaded = offloaded;
 
 	spin_unlock_bh(&br->hash_lock);
 }
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 10ee39fdca5c..2920e06a5403 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -574,7 +574,7 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
 			      const unsigned char *addr, u16 vid,
 			      bool swdev_notify);
 void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p,
-			  const unsigned char *addr, u16 vid);
+			  const unsigned char *addr, u16 vid, bool offloaded);
 
 /* br_forward.c */
 enum br_pkt_type {
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index d77f807420c4..b993df770675 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -103,7 +103,7 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p,
 static void
 br_switchdev_fdb_call_notifiers(bool adding, const unsigned char *mac,
 				u16 vid, struct net_device *dev,
-				bool added_by_user)
+				bool added_by_user, bool offloaded)
 {
 	struct switchdev_notifier_fdb_info info;
 	unsigned long notifier_type;
@@ -111,6 +111,7 @@ br_switchdev_fdb_call_notifiers(bool adding, const unsigned char *mac,
 	info.addr = mac;
 	info.vid = vid;
 	info.added_by_user = added_by_user;
+	info.offloaded = offloaded;
 	notifier_type = adding ? SWITCHDEV_FDB_ADD_TO_DEVICE : SWITCHDEV_FDB_DEL_TO_DEVICE;
 	call_switchdev_notifiers(notifier_type, dev, &info.info);
 }
@@ -126,13 +127,15 @@ br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type)
 		br_switchdev_fdb_call_notifiers(false, fdb->key.addr.addr,
 						fdb->key.vlan_id,
 						fdb->dst->dev,
-						fdb->added_by_user);
+						fdb->added_by_user,
+						fdb->offloaded);
 		break;
 	case RTM_NEWNEIGH:
 		br_switchdev_fdb_call_notifiers(true, fdb->key.addr.addr,
 						fdb->key.vlan_id,
 						fdb->dst->dev,
-						fdb->added_by_user);
+						fdb->added_by_user,
+						fdb->offloaded);
 		break;
 	}
 }
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 4cc603dfc9ef..d05402868575 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -928,6 +928,9 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
 			return -EINVAL;
 	}
 
+	if (info.cmd != cmd)
+		return -EINVAL;
+
 	if (info.cmd == ETHTOOL_GRXCLSRLALL) {
 		if (info.rule_cnt > 0) {
 			if (info.rule_cnt <= KMALLOC_MAX_SIZE / sizeof(u32))
@@ -2392,13 +2395,17 @@ roll_back:
 	return ret;
 }
 
-static int ethtool_set_per_queue(struct net_device *dev, void __user *useraddr)
+static int ethtool_set_per_queue(struct net_device *dev,
+				 void __user *useraddr, u32 sub_cmd)
 {
 	struct ethtool_per_queue_op per_queue_opt;
 
 	if (copy_from_user(&per_queue_opt, useraddr, sizeof(per_queue_opt)))
 		return -EFAULT;
 
+	if (per_queue_opt.sub_command != sub_cmd)
+		return -EINVAL;
+
 	switch (per_queue_opt.sub_command) {
 	case ETHTOOL_GCOALESCE:
 		return ethtool_get_per_queue_coalesce(dev, useraddr, &per_queue_opt);
@@ -2769,7 +2776,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 		rc = ethtool_get_phy_stats(dev, useraddr);
 		break;
 	case ETHTOOL_PERQUEUE:
-		rc = ethtool_set_per_queue(dev, useraddr);
+		rc = ethtool_set_per_queue(dev, useraddr, sub_cmd);
 		break;
 	case ETHTOOL_GLINKSETTINGS:
 		rc = ethtool_get_link_ksettings(dev, useraddr);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 69c41cb3966d..ee605d9d8bd4 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1167,8 +1167,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
 		neigh->nud_state = new;
 		err = 0;
 		notify = old & NUD_VALID;
-		if (((old & (NUD_INCOMPLETE | NUD_PROBE)) ||
-		     (flags & NEIGH_UPDATE_F_ADMIN)) &&
+		if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
 		    (new & NUD_FAILED)) {
 			neigh_invalidate(neigh);
 			notify = 1;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index de1d1ba92f2d..5da9552b186b 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -57,7 +57,6 @@ DEFINE_STATIC_SRCU(netpoll_srcu);
 	 MAX_UDP_CHUNK)
 
 static void zap_completion_queue(void);
-static void netpoll_async_cleanup(struct work_struct *work);
 
 static unsigned int carrier_timeout = 4;
 module_param(carrier_timeout, uint, 0644);
@@ -312,7 +311,6 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
 	/* It is up to the caller to keep npinfo alive. */
 	struct netpoll_info *npinfo;
 
-	rcu_read_lock_bh();
 	lockdep_assert_irqs_disabled();
 
 	npinfo = rcu_dereference_bh(np->dev->npinfo);
@@ -357,7 +355,6 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
 		skb_queue_tail(&npinfo->txq, skb);
 		schedule_delayed_work(&npinfo->tx_work,0);
 	}
-	rcu_read_unlock_bh();
 }
 EXPORT_SYMBOL(netpoll_send_skb_on_dev);
 
@@ -591,7 +588,6 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
 
 	np->dev = ndev;
 	strlcpy(np->dev_name, ndev->name, IFNAMSIZ);
-	INIT_WORK(&np->cleanup_work, netpoll_async_cleanup);
 
 	if (ndev->priv_flags & IFF_DISABLE_NETPOLL) {
 		np_err(np, "%s doesn't support polling, aborting\n",
@@ -790,10 +786,6 @@ void __netpoll_cleanup(struct netpoll *np)
 {
 	struct netpoll_info *npinfo;
 
-	/* rtnl_dereference would be preferable here but
-	 * rcu_cleanup_netpoll path can put us in here safely without
-	 * holding the rtnl, so plain rcu_dereference it is
-	 */
 	npinfo = rtnl_dereference(np->dev->npinfo);
 	if (!npinfo)
 		return;
@@ -814,21 +806,16 @@ void __netpoll_cleanup(struct netpoll *np)
 }
 EXPORT_SYMBOL_GPL(__netpoll_cleanup);
 
-static void netpoll_async_cleanup(struct work_struct *work)
+void __netpoll_free(struct netpoll *np)
 {
-	struct netpoll *np = container_of(work, struct netpoll, cleanup_work);
+	ASSERT_RTNL();
 
-	rtnl_lock();
+	/* Wait for transmitting packets to finish before freeing. */
+	synchronize_rcu_bh();
 	__netpoll_cleanup(np);
-	rtnl_unlock();
 	kfree(np);
 }
-
-void __netpoll_free_async(struct netpoll *np)
-{
-	schedule_work(&np->cleanup_work);
-}
-EXPORT_SYMBOL_GPL(__netpoll_free_async);
+EXPORT_SYMBOL_GPL(__netpoll_free);
 
 void netpoll_cleanup(struct netpoll *np)
 {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 54b961de9538..946de0e24c87 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1846,8 +1846,9 @@ int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len)
 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
 		int delta = skb->len - len;
 
-		skb->csum = csum_sub(skb->csum,
-				     skb_checksum(skb, len, delta, 0));
+		skb->csum = csum_block_sub(skb->csum,
+					   skb_checksum(skb, len, delta, 0),
+					   len);
 	}
 	return __pskb_trim(skb, len);
 }
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 3f840b6eea69..7d0c19e7edcf 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -722,7 +722,7 @@ static void dsa_slave_netpoll_cleanup(struct net_device *dev)
 
 	p->netpoll = NULL;
 
-	__netpoll_free_async(netpoll);
+	__netpoll_free(netpoll);
 }
 
 static void dsa_slave_poll_controller(struct net_device *dev)
@@ -1478,6 +1478,7 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work)
 			netdev_dbg(dev, "fdb add failed err=%d\n", err);
 			break;
 		}
+		fdb_info->offloaded = true;
 		call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, dev,
 					 &fdb_info->info);
 		break;
diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c
index 844806120f44..3e614cc824f7 100644
--- a/net/ipv4/ipmr_base.c
+++ b/net/ipv4/ipmr_base.c
@@ -315,8 +315,6 @@ int mr_table_dump(struct mr_table *mrt, struct sk_buff *skb,
 next_entry:
 		e++;
 	}
-	e = 0;
-	s_e = 0;
 
 	spin_lock_bh(lock);
 	list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic_main.c b/net/ipv4/netfilter/nf_nat_snmp_basic_main.c
index ac110c1d55b5..a0aa13bcabda 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic_main.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic_main.c
@@ -60,6 +60,7 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
 MODULE_DESCRIPTION("Basic SNMP Application Layer Gateway");
 MODULE_ALIAS("ip_nat_snmp_basic");
+MODULE_ALIAS_NFCT_HELPER("snmp_trap");
 
 #define SNMP_PORT 161
 #define SNMP_TRAP_PORT 162
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index b88081285fd1..9277abdd822a 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -369,6 +369,39 @@ static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain)
 	return cwnd;
 }
 
+/* With pacing at lower layers, there's often less data "in the network" than
+ * "in flight". With TSQ and departure time pacing at lower layers (e.g. fq),
+ * we often have several skbs queued in the pacing layer with a pre-scheduled
+ * earliest departure time (EDT). BBR adapts its pacing rate based on the
+ * inflight level that it estimates has already been "baked in" by previous
+ * departure time decisions. We calculate a rough estimate of the number of our
+ * packets that might be in the network at the earliest departure time for the
+ * next skb scheduled:
+ *   in_network_at_edt = inflight_at_edt - (EDT - now) * bw
+ * If we're increasing inflight, then we want to know if the transmit of the
+ * EDT skb will push inflight above the target, so inflight_at_edt includes
+ * bbr_tso_segs_goal() from the skb departing at EDT. If decreasing inflight,
+ * then estimate if inflight will sink too low just before the EDT transmit.
+ */
+static u32 bbr_packets_in_net_at_edt(struct sock *sk, u32 inflight_now)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bbr *bbr = inet_csk_ca(sk);
+	u64 now_ns, edt_ns, interval_us;
+	u32 interval_delivered, inflight_at_edt;
+
+	now_ns = tp->tcp_clock_cache;
+	edt_ns = max(tp->tcp_wstamp_ns, now_ns);
+	interval_us = div_u64(edt_ns - now_ns, NSEC_PER_USEC);
+	interval_delivered = (u64)bbr_bw(sk) * interval_us >> BW_SCALE;
+	inflight_at_edt = inflight_now;
+	if (bbr->pacing_gain > BBR_UNIT)              /* increasing inflight */
+		inflight_at_edt += bbr_tso_segs_goal(sk);  /* include EDT skb */
+	if (interval_delivered >= inflight_at_edt)
+		return 0;
+	return inflight_at_edt - interval_delivered;
+}
+
 /* An optimization in BBR to reduce losses: On the first round of recovery, we
  * follow the packet conservation principle: send P packets per P packets acked.
  * After that, we slow-start and send at most 2*P packets per P packets acked.
@@ -460,7 +493,7 @@ static bool bbr_is_next_cycle_phase(struct sock *sk,
 	if (bbr->pacing_gain == BBR_UNIT)
 		return is_full_length;		/* just use wall clock time */
 
-	inflight = rs->prior_in_flight;  /* what was in-flight before ACK? */
+	inflight = bbr_packets_in_net_at_edt(sk, rs->prior_in_flight);
 	bw = bbr_max_bw(sk);
 
 	/* A pacing_gain > 1.0 probes for bw by trying to raise inflight to at
@@ -488,8 +521,6 @@ static void bbr_advance_cycle_phase(struct sock *sk)
 
 	bbr->cycle_idx = (bbr->cycle_idx + 1) & (CYCLE_LEN - 1);
 	bbr->cycle_mstamp = tp->delivered_mstamp;
-	bbr->pacing_gain = bbr->lt_use_bw ? BBR_UNIT :
-					    bbr_pacing_gain[bbr->cycle_idx];
 }
 
 /* Gain cycling: cycle pacing gain to converge to fair share of available bw. */
@@ -507,8 +538,6 @@ static void bbr_reset_startup_mode(struct sock *sk)
 	struct bbr *bbr = inet_csk_ca(sk);
 
 	bbr->mode = BBR_STARTUP;
-	bbr->pacing_gain = bbr_high_gain;
-	bbr->cwnd_gain	 = bbr_high_gain;
 }
 
 static void bbr_reset_probe_bw_mode(struct sock *sk)
@@ -516,8 +545,6 @@ static void bbr_reset_probe_bw_mode(struct sock *sk)
 	struct bbr *bbr = inet_csk_ca(sk);
 
 	bbr->mode = BBR_PROBE_BW;
-	bbr->pacing_gain = BBR_UNIT;
-	bbr->cwnd_gain = bbr_cwnd_gain;
 	bbr->cycle_idx = CYCLE_LEN - 1 - prandom_u32_max(bbr_cycle_rand);
 	bbr_advance_cycle_phase(sk);	/* flip to next phase of gain cycle */
 }
@@ -735,13 +762,11 @@ static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs)
 
 	if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) {
 		bbr->mode = BBR_DRAIN;	/* drain queue we created */
-		bbr->pacing_gain = bbr_drain_gain;	/* pace slow to drain */
-		bbr->cwnd_gain = bbr_high_gain;	/* maintain cwnd */
 		tcp_sk(sk)->snd_ssthresh =
 				bbr_target_cwnd(sk, bbr_max_bw(sk), BBR_UNIT);
 	}	/* fall through to check if in-flight is already small: */
 	if (bbr->mode == BBR_DRAIN &&
-	    tcp_packets_in_flight(tcp_sk(sk)) <=
+	    bbr_packets_in_net_at_edt(sk, tcp_packets_in_flight(tcp_sk(sk))) <=
 	    bbr_target_cwnd(sk, bbr_max_bw(sk), BBR_UNIT))
 		bbr_reset_probe_bw_mode(sk);  /* we estimate queue is drained */
 }
@@ -798,8 +823,6 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
 	if (bbr_probe_rtt_mode_ms > 0 && filter_expired &&
 	    !bbr->idle_restart && bbr->mode != BBR_PROBE_RTT) {
 		bbr->mode = BBR_PROBE_RTT;  /* dip, drain queue */
-		bbr->pacing_gain = BBR_UNIT;
-		bbr->cwnd_gain = BBR_UNIT;
 		bbr_save_cwnd(sk);  /* note cwnd so we can restore it */
 		bbr->probe_rtt_done_stamp = 0;
 	}
@@ -827,6 +850,35 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
 		bbr->idle_restart = 0;
 }
 
+static void bbr_update_gains(struct sock *sk)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	switch (bbr->mode) {
+	case BBR_STARTUP:
+		bbr->pacing_gain = bbr_high_gain;
+		bbr->cwnd_gain	 = bbr_high_gain;
+		break;
+	case BBR_DRAIN:
+		bbr->pacing_gain = bbr_drain_gain;	/* slow, to drain */
+		bbr->cwnd_gain	 = bbr_high_gain;	/* keep cwnd */
+		break;
+	case BBR_PROBE_BW:
+		bbr->pacing_gain = (bbr->lt_use_bw ?
+				    BBR_UNIT :
+				    bbr_pacing_gain[bbr->cycle_idx]);
+		bbr->cwnd_gain	 = bbr_cwnd_gain;
+		break;
+	case BBR_PROBE_RTT:
+		bbr->pacing_gain = BBR_UNIT;
+		bbr->cwnd_gain	 = BBR_UNIT;
+		break;
+	default:
+		WARN_ONCE(1, "BBR bad mode: %u\n", bbr->mode);
+		break;
+	}
+}
+
 static void bbr_update_model(struct sock *sk, const struct rate_sample *rs)
 {
 	bbr_update_bw(sk, rs);
@@ -834,6 +886,7 @@ static void bbr_update_model(struct sock *sk, const struct rate_sample *rs)
 	bbr_check_full_bw_reached(sk, rs);
 	bbr_check_drain(sk, rs);
 	bbr_update_min_rtt(sk, rs);
+	bbr_update_gains(sk);
 }
 
 static void bbr_main(struct sock *sk, const struct rate_sample *rs)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index d212e4cbc689..c07990a35ff3 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2321,18 +2321,19 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 	while ((skb = tcp_send_head(sk))) {
 		unsigned int limit;
 
+		if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
+			/* "skb_mstamp_ns" is used as a start point for the retransmit timer */
+			skb->skb_mstamp_ns = tp->tcp_wstamp_ns = tp->tcp_clock_cache;
+			list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
+			goto repair; /* Skip network transmission */
+		}
+
 		if (tcp_pacing_check(sk))
 			break;
 
 		tso_segs = tcp_init_tso_segs(skb, mss_now);
 		BUG_ON(!tso_segs);
 
-		if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
-			/* "skb_mstamp" is used as a start point for the retransmit timer */
-			tcp_update_skb_after_send(sk, skb, tp->tcp_wstamp_ns);
-			goto repair; /* Skip network transmission */
-		}
-
 		cwnd_quota = tcp_cwnd_test(tp, skb);
 		if (!cwnd_quota) {
 			if (push_one == 2)
diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c
index a9162aa11af9..95df7f7f6328 100644
--- a/net/ipv4/tcp_ulp.c
+++ b/net/ipv4/tcp_ulp.c
@@ -99,8 +99,10 @@ void tcp_cleanup_ulp(struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
-	sock_owned_by_me(sk);
-
+	/* No sock_owned_by_me() check here as at the time the
+	 * stack calls this function, the socket is dead and
+	 * about to be destroyed.
+	 */
 	if (!icsk->icsk_ulp_ops)
 		return;
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 2496b12bf721..e39c284e2954 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4972,12 +4972,14 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
 
 		/* unicast address incl. temp addr */
 		list_for_each_entry(ifa, &idev->addr_list, if_list) {
-			if (++ip_idx < s_ip_idx)
-				continue;
+			if (ip_idx < s_ip_idx)
+				goto next;
 			err = inet6_fill_ifaddr(skb, ifa, fillargs);
 			if (err < 0)
 				break;
 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+next:
+			ip_idx++;
 		}
 		break;
 	}
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index a0b6932c3afd..a9d06d4dd057 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1184,11 +1184,6 @@ route_lookup:
 	}
 	skb_dst_set(skb, dst);
 
-	if (encap_limit >= 0) {
-		init_tel_txopt(&opt, encap_limit);
-		ipv6_push_frag_opts(skb, &opt.ops, &proto);
-	}
-
 	if (hop_limit == 0) {
 		if (skb->protocol == htons(ETH_P_IP))
 			hop_limit = ip_hdr(skb)->ttl;
@@ -1210,6 +1205,11 @@ route_lookup:
 	if (err)
 		return err;
 
+	if (encap_limit >= 0) {
+		init_tel_txopt(&opt, encap_limit);
+		ipv6_push_frag_opts(skb, &opt.ops, &proto);
+	}
+
 	skb_push(skb, sizeof(struct ipv6hdr));
 	skb_reset_network_header(skb);
 	ipv6h = ipv6_hdr(skb);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 6895e1dc0b03..21f6deb2aec9 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -2436,17 +2436,17 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
 {
 	int err;
 
-	/* callers have the socket lock and rtnl lock
-	 * so no other readers or writers of iml or its sflist
-	 */
+	write_lock_bh(&iml->sflock);
 	if (!iml->sflist) {
 		/* any-source empty exclude case */
-		return ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0);
+		err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0);
+	} else {
+		err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode,
+				iml->sflist->sl_count, iml->sflist->sl_addr, 0);
+		sock_kfree_s(sk, iml->sflist, IP6_SFLSIZE(iml->sflist->sl_max));
+		iml->sflist = NULL;
 	}
-	err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode,
-		iml->sflist->sl_count, iml->sflist->sl_addr, 0);
-	sock_kfree_s(sk, iml->sflist, IP6_SFLSIZE(iml->sflist->sl_max));
-	iml->sflist = NULL;
+	write_unlock_bh(&iml->sflock);
 	return err;
 }
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 9fd600e42f9d..e3226284e480 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -517,10 +517,11 @@ static void rt6_probe_deferred(struct work_struct *w)
 
 static void rt6_probe(struct fib6_info *rt)
 {
-	struct __rt6_probe_work *work;
+	struct __rt6_probe_work *work = NULL;
 	const struct in6_addr *nh_gw;
 	struct neighbour *neigh;
 	struct net_device *dev;
+	struct inet6_dev *idev;
 
 	/*
 	 * Okay, this does not seem to be appropriate
@@ -536,15 +537,12 @@ static void rt6_probe(struct fib6_info *rt)
 	nh_gw = &rt->fib6_nh.nh_gw;
 	dev = rt->fib6_nh.nh_dev;
 	rcu_read_lock_bh();
+	idev = __in6_dev_get(dev);
 	neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
 	if (neigh) {
-		struct inet6_dev *idev;
-
 		if (neigh->nud_state & NUD_VALID)
 			goto out;
 
-		idev = __in6_dev_get(dev);
-		work = NULL;
 		write_lock(&neigh->lock);
 		if (!(neigh->nud_state & NUD_VALID) &&
 		    time_after(jiffies,
@@ -554,11 +552,13 @@ static void rt6_probe(struct fib6_info *rt)
 				__neigh_set_probe_once(neigh);
 		}
 		write_unlock(&neigh->lock);
-	} else {
+	} else if (time_after(jiffies, rt->last_probe +
+				       idev->cnf.rtr_probe_interval)) {
 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
 	}
 
 	if (work) {
+		rt->last_probe = jiffies;
 		INIT_WORK(&work->work, rt6_probe_deferred);
 		work->target = *nh_gw;
 		dev_hold(dev);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 374e7d302f26..06d17ff3562f 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -766,11 +766,9 @@ static int udp6_unicast_rcv_skb(struct sock *sk, struct sk_buff *skb,
 
 	ret = udpv6_queue_rcv_skb(sk, skb);
 
-	/* a return value > 0 means to resubmit the input, but
-	 * it wants the return to be -protocol, or 0
-	 */
+	/* a return value > 0 means to resubmit the input */
 	if (ret > 0)
-		return -ret;
+		return ret;
 	return 0;
 }
 
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index ef3defaf43b9..d35bcf92969c 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -146,8 +146,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
 	fl6->daddr = reverse ? hdr->saddr : hdr->daddr;
 	fl6->saddr = reverse ? hdr->daddr : hdr->saddr;
 
-	while (nh + offset + 1 < skb->data ||
-	       pskb_may_pull(skb, nh + offset + 1 - skb->data)) {
+	while (nh + offset + sizeof(*exthdr) < skb->data ||
+	       pskb_may_pull(skb, nh + offset + sizeof(*exthdr) - skb->data)) {
 		nh = skb_network_header(skb);
 		exthdr = (struct ipv6_opt_hdr *)(nh + offset);
 
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index c0ac522b48a1..4ff89cb7c86f 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -734,6 +734,7 @@ void llc_sap_add_socket(struct llc_sap *sap, struct sock *sk)
 	llc_sk(sk)->sap = sap;
 
 	spin_lock_bh(&sap->sk_lock);
+	sock_set_flag(sk, SOCK_RCU_FREE);
 	sap->sk_count++;
 	sk_nulls_add_node_rcu(sk, laddr_hb);
 	hlist_add_head(&llc->dev_hash_node, dev_hb);
diff --git a/net/ncsi/Kconfig b/net/ncsi/Kconfig
index 08a8a6031fd7..7f2b46108a24 100644
--- a/net/ncsi/Kconfig
+++ b/net/ncsi/Kconfig
@@ -10,3 +10,9 @@ config NET_NCSI
 	  support. Enable this only if your system connects to a network
 	  device via NCSI and the ethernet driver you're using supports
 	  the protocol explicitly.
+config NCSI_OEM_CMD_GET_MAC
+	bool "Get NCSI OEM MAC Address"
+	depends on NET_NCSI
+	---help---
+	  This allows to get MAC address from NCSI firmware and set them back to
+		controller.
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index 13c9b5eeb3b7..1dae77c54009 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -71,6 +71,13 @@ enum {
 /* OEM Vendor Manufacture ID */
 #define NCSI_OEM_MFR_MLX_ID             0x8119
 #define NCSI_OEM_MFR_BCM_ID             0x113d
+/* Broadcom specific OEM Command */
+#define NCSI_OEM_BCM_CMD_GMA            0x01   /* CMD ID for Get MAC */
+/* OEM Command payload lengths*/
+#define NCSI_OEM_BCM_CMD_GMA_LEN        12
+/* Mac address offset in OEM response */
+#define BCM_MAC_ADDR_OFFSET             28
+
 
 struct ncsi_channel_version {
 	u32 version;		/* Supported BCD encoded NCSI version */
@@ -246,6 +253,7 @@ enum {
 	ncsi_dev_state_probe_dp,
 	ncsi_dev_state_config_sp	= 0x0301,
 	ncsi_dev_state_config_cis,
+	ncsi_dev_state_config_oem_gma,
 	ncsi_dev_state_config_clear_vids,
 	ncsi_dev_state_config_svf,
 	ncsi_dev_state_config_ev,
@@ -279,6 +287,7 @@ struct ncsi_dev_priv {
 #define NCSI_DEV_PROBED		1            /* Finalized NCSI topology    */
 #define NCSI_DEV_HWA		2            /* Enabled HW arbitration     */
 #define NCSI_DEV_RESHUFFLE	4
+	unsigned int        gma_flag;        /* OEM GMA flag               */
 	spinlock_t          lock;            /* Protect the NCSI device    */
 #if IS_ENABLED(CONFIG_IPV6)
 	unsigned int        inet6_addr_num;  /* Number of IPv6 addresses   */
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index 6aa0614d2d28..bfc43b28c7a6 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -651,6 +651,72 @@ static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
 	return 0;
 }
 
+#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC)
+
+/* NCSI OEM Command APIs */
+static int ncsi_oem_gma_handler_bcm(struct ncsi_cmd_arg *nca)
+{
+	unsigned char data[NCSI_OEM_BCM_CMD_GMA_LEN];
+	int ret = 0;
+
+	nca->payload = NCSI_OEM_BCM_CMD_GMA_LEN;
+
+	memset(data, 0, NCSI_OEM_BCM_CMD_GMA_LEN);
+	*(unsigned int *)data = ntohl(NCSI_OEM_MFR_BCM_ID);
+	data[5] = NCSI_OEM_BCM_CMD_GMA;
+
+	nca->data = data;
+
+	ret = ncsi_xmit_cmd(nca);
+	if (ret)
+		netdev_err(nca->ndp->ndev.dev,
+			   "NCSI: Failed to transmit cmd 0x%x during configure\n",
+			   nca->type);
+	return ret;
+}
+
+/* OEM Command handlers initialization */
+static struct ncsi_oem_gma_handler {
+	unsigned int	mfr_id;
+	int		(*handler)(struct ncsi_cmd_arg *nca);
+} ncsi_oem_gma_handlers[] = {
+	{ NCSI_OEM_MFR_BCM_ID, ncsi_oem_gma_handler_bcm }
+};
+
+static int ncsi_gma_handler(struct ncsi_cmd_arg *nca, unsigned int mf_id)
+{
+	struct ncsi_oem_gma_handler *nch = NULL;
+	int i;
+
+	/* This function should only be called once, return if flag set */
+	if (nca->ndp->gma_flag == 1)
+		return -1;
+
+	/* Find gma handler for given manufacturer id */
+	for (i = 0; i < ARRAY_SIZE(ncsi_oem_gma_handlers); i++) {
+		if (ncsi_oem_gma_handlers[i].mfr_id == mf_id) {
+			if (ncsi_oem_gma_handlers[i].handler)
+				nch = &ncsi_oem_gma_handlers[i];
+			break;
+			}
+	}
+
+	if (!nch) {
+		netdev_err(nca->ndp->ndev.dev,
+			   "NCSI: No GMA handler available for MFR-ID (0x%x)\n",
+			   mf_id);
+		return -1;
+	}
+
+	/* Set the flag for GMA command which should only be called once */
+	nca->ndp->gma_flag = 1;
+
+	/* Get Mac address from NCSI device */
+	return nch->handler(nca);
+}
+
+#endif /* CONFIG_NCSI_OEM_CMD_GET_MAC */
+
 static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
 {
 	struct ncsi_dev *nd = &ndp->ndev;
@@ -701,7 +767,23 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
 			goto error;
 		}
 
+		nd->state = ncsi_dev_state_config_oem_gma;
+		break;
+	case ncsi_dev_state_config_oem_gma:
 		nd->state = ncsi_dev_state_config_clear_vids;
+		ret = -1;
+
+#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC)
+		nca.type = NCSI_PKT_CMD_OEM;
+		nca.package = np->id;
+		nca.channel = nc->id;
+		ndp->pending_req_num = 1;
+		ret = ncsi_gma_handler(&nca, nc->version.mf_id);
+#endif /* CONFIG_NCSI_OEM_CMD_GET_MAC */
+
+		if (ret < 0)
+			schedule_work(&ndp->work);
+
 		break;
 	case ncsi_dev_state_config_clear_vids:
 	case ncsi_dev_state_config_svf:
diff --git a/net/ncsi/ncsi-pkt.h b/net/ncsi/ncsi-pkt.h
index 0f2087c8d42a..4d3f06be38bd 100644
--- a/net/ncsi/ncsi-pkt.h
+++ b/net/ncsi/ncsi-pkt.h
@@ -165,6 +165,14 @@ struct ncsi_rsp_oem_pkt {
 	unsigned char           data[];      /* Payload data      */
 };
 
+/* Broadcom Response Data */
+struct ncsi_rsp_oem_bcm_pkt {
+	unsigned char           ver;         /* Payload Version   */
+	unsigned char           type;        /* OEM Command type  */
+	__be16                  len;         /* Payload Length    */
+	unsigned char           data[];      /* Cmd specific Data */
+};
+
 /* Get Link Status */
 struct ncsi_rsp_gls_pkt {
 	struct ncsi_rsp_pkt_hdr rsp;        /* Response header   */
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index 85fa59afae34..77e07ba3f493 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -611,19 +611,59 @@ static int ncsi_rsp_handler_snfc(struct ncsi_request *nr)
 	return 0;
 }
 
+/* Response handler for Broadcom command Get Mac Address */
+static int ncsi_rsp_handler_oem_bcm_gma(struct ncsi_request *nr)
+{
+	struct ncsi_dev_priv *ndp = nr->ndp;
+	struct net_device *ndev = ndp->ndev.dev;
+	const struct net_device_ops *ops = ndev->netdev_ops;
+	struct ncsi_rsp_oem_pkt *rsp;
+	struct sockaddr saddr;
+	int ret = 0;
+
+	/* Get the response header */
+	rsp = (struct ncsi_rsp_oem_pkt *)skb_network_header(nr->rsp);
+
+	saddr.sa_family = ndev->type;
+	ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+	memcpy(saddr.sa_data, &rsp->data[BCM_MAC_ADDR_OFFSET], ETH_ALEN);
+	/* Increase mac address by 1 for BMC's address */
+	saddr.sa_data[ETH_ALEN - 1]++;
+	ret = ops->ndo_set_mac_address(ndev, &saddr);
+	if (ret < 0)
+		netdev_warn(ndev, "NCSI: 'Writing mac address to device failed\n");
+
+	return ret;
+}
+
+/* Response handler for Broadcom card */
+static int ncsi_rsp_handler_oem_bcm(struct ncsi_request *nr)
+{
+	struct ncsi_rsp_oem_bcm_pkt *bcm;
+	struct ncsi_rsp_oem_pkt *rsp;
+
+	/* Get the response header */
+	rsp = (struct ncsi_rsp_oem_pkt *)skb_network_header(nr->rsp);
+	bcm = (struct ncsi_rsp_oem_bcm_pkt *)(rsp->data);
+
+	if (bcm->type == NCSI_OEM_BCM_CMD_GMA)
+		return ncsi_rsp_handler_oem_bcm_gma(nr);
+	return 0;
+}
+
 static struct ncsi_rsp_oem_handler {
 	unsigned int	mfr_id;
 	int		(*handler)(struct ncsi_request *nr);
 } ncsi_rsp_oem_handlers[] = {
 	{ NCSI_OEM_MFR_MLX_ID, NULL },
-	{ NCSI_OEM_MFR_BCM_ID, NULL }
+	{ NCSI_OEM_MFR_BCM_ID, ncsi_rsp_handler_oem_bcm }
 };
 
 /* Response handler for OEM command */
 static int ncsi_rsp_handler_oem(struct ncsi_request *nr)
 {
-	struct ncsi_rsp_oem_pkt *rsp;
 	struct ncsi_rsp_oem_handler *nrh = NULL;
+	struct ncsi_rsp_oem_pkt *rsp;
 	unsigned int mfr_id, i;
 
 	/* Get the response header */
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 8a33dac4e805..e287da68d5fa 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -15,7 +15,7 @@
 
 #define __ipset_dereference_protected(p, c)	rcu_dereference_protected(p, c)
 #define ipset_dereference_protected(p, set) \
-	__ipset_dereference_protected(p, spin_is_locked(&(set)->lock))
+	__ipset_dereference_protected(p, lockdep_is_held(&(set)->lock))
 
 #define rcu_dereference_bh_nfnl(p)	rcu_dereference_bh_check(p, 1)
 
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 185c633b6872..a3cc2ef8a48a 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -483,7 +483,7 @@ static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
 	flush_delayed_work(&flowtable->gc_work);
 }
 
-void nf_flow_table_cleanup(struct net *net, struct net_device *dev)
+void nf_flow_table_cleanup(struct net_device *dev)
 {
 	struct nf_flowtable *flowtable;
 
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index b48545b84ce8..e7a50af1b3d6 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -350,7 +350,6 @@ static int cttimeout_default_set(struct net *net, struct sock *ctnl,
 				 struct netlink_ext_ack *extack)
 {
 	const struct nf_conntrack_l4proto *l4proto;
-	__u16 l3num;
 	__u8 l4num;
 	int ret;
 
@@ -359,7 +358,6 @@ static int cttimeout_default_set(struct net *net, struct sock *ctnl,
 	    !cda[CTA_TIMEOUT_DATA])
 		return -EINVAL;
 
-	l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO]));
 	l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]);
 	l4proto = nf_ct_l4proto_find_get(l4num);
 
diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c
index 00db27dfd2ff..6f41dd74729d 100644
--- a/net/netfilter/nfnetlink_osf.c
+++ b/net/netfilter/nfnetlink_osf.c
@@ -30,32 +30,27 @@ EXPORT_SYMBOL_GPL(nf_osf_fingers);
 static inline int nf_osf_ttl(const struct sk_buff *skb,
 			     int ttl_check, unsigned char f_ttl)
 {
+	struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
 	const struct iphdr *ip = ip_hdr(skb);
-
-	if (ttl_check != -1) {
-		if (ttl_check == NF_OSF_TTL_TRUE)
-			return ip->ttl == f_ttl;
-		if (ttl_check == NF_OSF_TTL_NOCHECK)
-			return 1;
-		else if (ip->ttl <= f_ttl)
-			return 1;
-		else {
-			struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
-			int ret = 0;
-
-			for_ifa(in_dev) {
-				if (inet_ifa_match(ip->saddr, ifa)) {
-					ret = (ip->ttl == f_ttl);
-					break;
-				}
-			}
-			endfor_ifa(in_dev);
-
-			return ret;
+	int ret = 0;
+
+	if (ttl_check == NF_OSF_TTL_TRUE)
+		return ip->ttl == f_ttl;
+	if (ttl_check == NF_OSF_TTL_NOCHECK)
+		return 1;
+	else if (ip->ttl <= f_ttl)
+		return 1;
+
+	for_ifa(in_dev) {
+		if (inet_ifa_match(ip->saddr, ifa)) {
+			ret = (ip->ttl == f_ttl);
+			break;
 		}
 	}
 
-	return ip->ttl == f_ttl;
+	endfor_ifa(in_dev);
+
+	return ret;
 }
 
 struct nf_osf_hdr_ctx {
@@ -213,7 +208,7 @@ nf_osf_match(const struct sk_buff *skb, u_int8_t family,
 	if (!tcp)
 		return false;
 
-	ttl_check = (info->flags & NF_OSF_TTL) ? info->ttl : -1;
+	ttl_check = (info->flags & NF_OSF_TTL) ? info->ttl : 0;
 
 	list_for_each_entry_rcu(kf, &nf_osf_fingers[ctx.df], finger_entry) {
 
@@ -257,7 +252,8 @@ nf_osf_match(const struct sk_buff *skb, u_int8_t family,
 EXPORT_SYMBOL_GPL(nf_osf_match);
 
 const char *nf_osf_find(const struct sk_buff *skb,
-			const struct list_head *nf_osf_fingers)
+			const struct list_head *nf_osf_fingers,
+			const int ttl_check)
 {
 	const struct iphdr *ip = ip_hdr(skb);
 	const struct nf_osf_user_finger *f;
@@ -275,7 +271,7 @@ const char *nf_osf_find(const struct sk_buff *skb,
 
 	list_for_each_entry_rcu(kf, &nf_osf_fingers[ctx.df], finger_entry) {
 		f = &kf->finger;
-		if (!nf_osf_match_one(skb, f, -1, &ctx))
+		if (!nf_osf_match_one(skb, f, ttl_check, &ctx))
 			continue;
 
 		genre = f->genre;
diff --git a/net/netfilter/nft_dup_netdev.c b/net/netfilter/nft_dup_netdev.c
index 2cc1e0ef56e8..15cc62b293d6 100644
--- a/net/netfilter/nft_dup_netdev.c
+++ b/net/netfilter/nft_dup_netdev.c
@@ -46,8 +46,6 @@ static int nft_dup_netdev_init(const struct nft_ctx *ctx,
 	return nft_validate_register_load(priv->sreg_dev, sizeof(int));
 }
 
-static const struct nft_expr_ops nft_dup_netdev_ingress_ops;
-
 static int nft_dup_netdev_dump(struct sk_buff *skb, const struct nft_expr *expr)
 {
 	struct nft_dup_netdev *priv = nft_expr_priv(expr);
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index d6bab8c3cbb0..e82d9a966c45 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -201,7 +201,7 @@ static int flow_offload_netdev_event(struct notifier_block *this,
 	if (event != NETDEV_DOWN)
 		return NOTIFY_DONE;
 
-	nf_flow_table_cleanup(dev_net(dev), dev);
+	nf_flow_table_cleanup(dev);
 
 	return NOTIFY_DONE;
 }
diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c
index 8abb9891cdf2..d7694e7255a0 100644
--- a/net/netfilter/nft_fwd_netdev.c
+++ b/net/netfilter/nft_fwd_netdev.c
@@ -53,8 +53,6 @@ static int nft_fwd_netdev_init(const struct nft_ctx *ctx,
 	return nft_validate_register_load(priv->sreg_dev, sizeof(int));
 }
 
-static const struct nft_expr_ops nft_fwd_netdev_ingress_ops;
-
 static int nft_fwd_netdev_dump(struct sk_buff *skb, const struct nft_expr *expr)
 {
 	struct nft_fwd_netdev *priv = nft_expr_priv(expr);
@@ -169,8 +167,6 @@ static int nft_fwd_neigh_init(const struct nft_ctx *ctx,
 	return nft_validate_register_load(priv->sreg_addr, addr_len);
 }
 
-static const struct nft_expr_ops nft_fwd_netdev_ingress_ops;
-
 static int nft_fwd_neigh_dump(struct sk_buff *skb, const struct nft_expr *expr)
 {
 	struct nft_fwd_neigh *priv = nft_expr_priv(expr);
diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c
index a35fb59ace73..0b452fd470c4 100644
--- a/net/netfilter/nft_osf.c
+++ b/net/netfilter/nft_osf.c
@@ -6,10 +6,12 @@
 
 struct nft_osf {
 	enum nft_registers	dreg:8;
+	u8			ttl;
 };
 
 static const struct nla_policy nft_osf_policy[NFTA_OSF_MAX + 1] = {
 	[NFTA_OSF_DREG]		= { .type = NLA_U32 },
+	[NFTA_OSF_TTL]		= { .type = NLA_U8 },
 };
 
 static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs,
@@ -33,7 +35,7 @@ static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs,
 		return;
 	}
 
-	os_name = nf_osf_find(skb, nf_osf_fingers);
+	os_name = nf_osf_find(skb, nf_osf_fingers, priv->ttl);
 	if (!os_name)
 		strncpy((char *)dest, "unknown", NFT_OSF_MAXGENRELEN);
 	else
@@ -46,6 +48,14 @@ static int nft_osf_init(const struct nft_ctx *ctx,
 {
 	struct nft_osf *priv = nft_expr_priv(expr);
 	int err;
+	u8 ttl;
+
+	if (nla_get_u8(tb[NFTA_OSF_TTL])) {
+		ttl = nla_get_u8(tb[NFTA_OSF_TTL]);
+		if (ttl > 2)
+			return -EINVAL;
+		priv->ttl = ttl;
+	}
 
 	priv->dreg = nft_parse_register(tb[NFTA_OSF_DREG]);
 	err = nft_validate_register_store(ctx, priv->dreg, NULL,
@@ -60,6 +70,9 @@ static int nft_osf_dump(struct sk_buff *skb, const struct nft_expr *expr)
 {
 	const struct nft_osf *priv = nft_expr_priv(expr);
 
+	if (nla_put_u8(skb, NFTA_OSF_TTL, priv->ttl))
+		goto nla_put_failure;
+
 	if (nft_dump_register(skb, NFTA_OSF_DREG, priv->dreg))
 		goto nla_put_failure;
 
diff --git a/net/netfilter/nft_xfrm.c b/net/netfilter/nft_xfrm.c
index 3cf71a2e375b..5322609f7662 100644
--- a/net/netfilter/nft_xfrm.c
+++ b/net/netfilter/nft_xfrm.c
@@ -118,12 +118,13 @@ static bool xfrm_state_addr_ok(enum nft_xfrm_keys k, u8 family, u8 mode)
 
 static void nft_xfrm_state_get_key(const struct nft_xfrm *priv,
 				   struct nft_regs *regs,
-				   const struct xfrm_state *state,
-				   u8 family)
+				   const struct xfrm_state *state)
 {
 	u32 *dest = &regs->data[priv->dreg];
 
-	if (!xfrm_state_addr_ok(priv->key, family, state->props.mode)) {
+	if (!xfrm_state_addr_ok(priv->key,
+				state->props.family,
+				state->props.mode)) {
 		regs->verdict.code = NFT_BREAK;
 		return;
 	}
@@ -169,7 +170,7 @@ static void nft_xfrm_get_eval_in(const struct nft_xfrm *priv,
 	}
 
 	state = sp->xvec[priv->spnum];
-	nft_xfrm_state_get_key(priv, regs, state, nft_pf(pkt));
+	nft_xfrm_state_get_key(priv, regs, state);
 }
 
 static void nft_xfrm_get_eval_out(const struct nft_xfrm *priv,
@@ -184,7 +185,7 @@ static void nft_xfrm_get_eval_out(const struct nft_xfrm *priv,
 		if (i < priv->spnum)
 			continue;
 
-		nft_xfrm_state_get_key(priv, regs, dst->xfrm, nft_pf(pkt));
+		nft_xfrm_state_get_key(priv, regs, dst->xfrm);
 		return;
 	}
 
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index bf7bba80e24c..7a103553d10d 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -40,14 +40,8 @@
 static bool
 xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
 {
-	const struct xt_osf_info *info = p->matchinfo;
-	struct net *net = xt_net(p);
-
-	if (!info)
-		return false;
-
 	return nf_osf_match(skb, xt_family(p), xt_hooknum(p), xt_in(p),
-			    xt_out(p), info, net, nf_osf_fingers);
+			    xt_out(p), p->matchinfo, xt_net(p), nf_osf_fingers);
 }
 
 static struct xt_match xt_osf_match = {
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index fceae245eb03..10d61a6eed71 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -11,6 +11,11 @@
 #include <linux/netfilter/xt_quota.h>
 #include <linux/module.h>
 
+struct xt_quota_priv {
+	spinlock_t	lock;
+	uint64_t	quota;
+};
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Sam Johnston <samj@samj.net>");
 MODULE_DESCRIPTION("Xtables: countdown quota match");
@@ -21,48 +26,54 @@ static bool
 quota_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	struct xt_quota_info *q = (void *)par->matchinfo;
-	u64 current_count = atomic64_read(&q->counter);
+	struct xt_quota_priv *priv = q->master;
 	bool ret = q->flags & XT_QUOTA_INVERT;
-	u64 old_count, new_count;
-
-	do {
-		if (current_count == 1)
-			return ret;
-		if (current_count <= skb->len) {
-			atomic64_set(&q->counter, 1);
-			return ret;
-		}
-		old_count = current_count;
-		new_count = current_count - skb->len;
-		current_count = atomic64_cmpxchg(&q->counter, old_count,
-						 new_count);
-	} while (current_count != old_count);
-	return !ret;
+
+	spin_lock_bh(&priv->lock);
+	if (priv->quota >= skb->len) {
+		priv->quota -= skb->len;
+		ret = !ret;
+	} else {
+		/* we do not allow even small packets from now on */
+		priv->quota = 0;
+	}
+	spin_unlock_bh(&priv->lock);
+
+	return ret;
 }
 
 static int quota_mt_check(const struct xt_mtchk_param *par)
 {
 	struct xt_quota_info *q = par->matchinfo;
 
-	BUILD_BUG_ON(sizeof(atomic64_t) != sizeof(__u64));
-
 	if (q->flags & ~XT_QUOTA_MASK)
 		return -EINVAL;
-	if (atomic64_read(&q->counter) > q->quota + 1)
-		return -ERANGE;
 
-	if (atomic64_read(&q->counter) == 0)
-		atomic64_set(&q->counter, q->quota + 1);
+	q->master = kmalloc(sizeof(*q->master), GFP_KERNEL);
+	if (q->master == NULL)
+		return -ENOMEM;
+
+	spin_lock_init(&q->master->lock);
+	q->master->quota = q->quota;
 	return 0;
 }
 
+static void quota_mt_destroy(const struct xt_mtdtor_param *par)
+{
+	const struct xt_quota_info *q = par->matchinfo;
+
+	kfree(q->master);
+}
+
 static struct xt_match quota_mt_reg __read_mostly = {
 	.name       = "quota",
 	.revision   = 0,
 	.family     = NFPROTO_UNSPEC,
 	.match      = quota_mt,
 	.checkentry = quota_mt_check,
+	.destroy    = quota_mt_destroy,
 	.matchsize  = sizeof(struct xt_quota_info),
+	.usersize   = offsetof(struct xt_quota_info, master),
 	.me         = THIS_MODULE,
 };
 
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index e0d8ca03169a..44860505246d 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -337,7 +337,7 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
 {
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 	struct rxrpc_connection *conn;
-	struct rxrpc_peer *peer;
+	struct rxrpc_peer *peer = NULL;
 	struct rxrpc_call *call;
 
 	_enter("");
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index cad0691c2bb4..0906e51d3cfb 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -139,7 +139,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
 	udp_sk(usk)->gro_complete = NULL;
 
 	udp_encap_enable();
-#if IS_ENABLED(CONFIG_IPV6)
+#if IS_ENABLED(CONFIG_AF_RXRPC_IPV6)
 	if (local->srx.transport.family == AF_INET6)
 		udpv6_encap_enable();
 #endif
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 0f0b499d1202..189418888839 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -572,7 +572,8 @@ void rxrpc_reject_packets(struct rxrpc_local *local)
 			whdr.flags	^= RXRPC_CLIENT_INITIATED;
 			whdr.flags	&= RXRPC_CLIENT_INITIATED;
 
-			ret = kernel_sendmsg(local->socket, &msg, iov, 2, size);
+			ret = kernel_sendmsg(local->socket, &msg,
+					     iov, ioc, size);
 			if (ret < 0)
 				trace_rxrpc_tx_fail(local->debug_id, 0, ret,
 						    rxrpc_tx_point_reject);
diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
index 7feb611c7258..bc05af89fc38 100644
--- a/net/rxrpc/peer_event.c
+++ b/net/rxrpc/peer_event.c
@@ -197,6 +197,7 @@ void rxrpc_error_report(struct sock *sk)
 	rxrpc_store_error(peer, serr);
 	rcu_read_unlock();
 	rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
+	rxrpc_put_peer(peer);
 
 	_leave("");
 }
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 43c8559aca56..f427a1e00e7e 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -31,6 +31,8 @@
 #include <net/pkt_sched.h>
 #include <net/pkt_cls.h>
 
+extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
+
 /* The list of all installed classifier types */
 static LIST_HEAD(tcf_proto_base);
 
@@ -1304,7 +1306,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 replay:
 	tp_created = 0;
 
-	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack);
+	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
 	if (err < 0)
 		return err;
 
@@ -1454,7 +1456,7 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
 		return -EPERM;
 
-	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack);
+	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
 	if (err < 0)
 		return err;
 
@@ -1570,7 +1572,7 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 	void *fh = NULL;
 	int err;
 
-	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack);
+	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
 	if (err < 0)
 		return err;
 
@@ -1937,7 +1939,7 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n,
 		return -EPERM;
 
 replay:
-	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack);
+	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
 	if (err < 0)
 		return err;
 
@@ -2055,7 +2057,7 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
 	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
 		return skb->len;
 
-	err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, NULL,
+	err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
 			  cb->extack);
 	if (err)
 		return err;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index cf5c714ae786..022bca98bde6 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1318,10 +1318,6 @@ check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
 	return 0;
 }
 
-/*
- * Delete/get qdisc.
- */
-
 const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
 	[TCA_KIND]		= { .type = NLA_STRING },
 	[TCA_OPTIONS]		= { .type = NLA_NESTED },
@@ -1334,6 +1330,10 @@ const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
 	[TCA_EGRESS_BLOCK]	= { .type = NLA_U32 },
 };
 
+/*
+ * Delete/get qdisc.
+ */
+
 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
 			struct netlink_ext_ack *extack)
 {
@@ -2070,7 +2070,8 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
 
 	if (tcm->tcm_parent) {
 		q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
-		if (q && tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
+		if (q && q != root &&
+		    tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
 			return -1;
 		return 0;
 	}
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 297d9cf960b9..a827a1f562bf 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1450,7 +1450,8 @@ void sctp_assoc_sync_pmtu(struct sctp_association *asoc)
 	/* Get the lowest pmtu of all the transports. */
 	list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) {
 		if (t->pmtu_pending && t->dst) {
-			sctp_transport_update_pmtu(t, sctp_dst_mtu(t->dst));
+			sctp_transport_update_pmtu(t,
+						   atomic_read(&t->mtu_info));
 			t->pmtu_pending = 0;
 		}
 		if (!pmtu || (t->pathmtu < pmtu))
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 9bbc5f92c941..5c36a99882ed 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -395,6 +395,7 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc,
 		return;
 
 	if (sock_owned_by_user(sk)) {
+		atomic_set(&t->mtu_info, pmtu);
 		asoc->pmtu_pending = 1;
 		t->pmtu_pending = 1;
 		return;
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 7f849b01ec8e..67939ad99c01 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -120,6 +120,12 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag,
 			sctp_assoc_sync_pmtu(asoc);
 	}
 
+	if (asoc->pmtu_pending) {
+		if (asoc->param_flags & SPP_PMTUD_ENABLE)
+			sctp_assoc_sync_pmtu(asoc);
+		asoc->pmtu_pending = 0;
+	}
+
 	/* If there a is a prepend chunk stick it on the list before
 	 * any other chunks get appended.
 	 */
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 42191ed9902b..9cb854b05342 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -385,9 +385,7 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc,
 			asoc->outqueue.outstanding_bytes -= sctp_data_size(chk);
 		}
 
-		msg_len -= SCTP_DATA_SNDSIZE(chk) +
-			   sizeof(struct sk_buff) +
-			   sizeof(struct sctp_chunk);
+		msg_len -= chk->skb->truesize + sizeof(struct sctp_chunk);
 		if (msg_len <= 0)
 			break;
 	}
@@ -421,9 +419,7 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc,
 			streamout->ext->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
 		}
 
-		msg_len -= SCTP_DATA_SNDSIZE(chk) +
-			   sizeof(struct sk_buff) +
-			   sizeof(struct sctp_chunk);
+		msg_len -= chk->skb->truesize + sizeof(struct sctp_chunk);
 		sctp_chunk_free(chk);
 		if (msg_len <= 0)
 			break;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index f73e9d38d5ba..fc0386e8ff23 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -83,7 +83,7 @@
 #include <net/sctp/stream_sched.h>
 
 /* Forward declarations for internal helper functions. */
-static int sctp_writeable(struct sock *sk);
+static bool sctp_writeable(struct sock *sk);
 static void sctp_wfree(struct sk_buff *skb);
 static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
 				size_t msg_len);
@@ -119,25 +119,10 @@ static void sctp_enter_memory_pressure(struct sock *sk)
 /* Get the sndbuf space available at the time on the association.  */
 static inline int sctp_wspace(struct sctp_association *asoc)
 {
-	int amt;
+	struct sock *sk = asoc->base.sk;
 
-	if (asoc->ep->sndbuf_policy)
-		amt = asoc->sndbuf_used;
-	else
-		amt = sk_wmem_alloc_get(asoc->base.sk);
-
-	if (amt >= asoc->base.sk->sk_sndbuf) {
-		if (asoc->base.sk->sk_userlocks & SOCK_SNDBUF_LOCK)
-			amt = 0;
-		else {
-			amt = sk_stream_wspace(asoc->base.sk);
-			if (amt < 0)
-				amt = 0;
-		}
-	} else {
-		amt = asoc->base.sk->sk_sndbuf - amt;
-	}
-	return amt;
+	return asoc->ep->sndbuf_policy ? sk->sk_sndbuf - asoc->sndbuf_used
+				       : sk_stream_wspace(sk);
 }
 
 /* Increment the used sndbuf space count of the corresponding association by
@@ -166,12 +151,9 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk)
 	/* Save the chunk pointer in skb for sctp_wfree to use later.  */
 	skb_shinfo(chunk->skb)->destructor_arg = chunk;
 
-	asoc->sndbuf_used += SCTP_DATA_SNDSIZE(chunk) +
-				sizeof(struct sk_buff) +
-				sizeof(struct sctp_chunk);
-
 	refcount_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
-	sk->sk_wmem_queued += chunk->skb->truesize;
+	asoc->sndbuf_used += chunk->skb->truesize + sizeof(struct sctp_chunk);
+	sk->sk_wmem_queued += chunk->skb->truesize + sizeof(struct sctp_chunk);
 	sk_mem_charge(sk, chunk->skb->truesize);
 }
 
@@ -271,11 +253,10 @@ struct sctp_association *sctp_id2assoc(struct sock *sk, sctp_assoc_t id)
 
 	spin_lock_bh(&sctp_assocs_id_lock);
 	asoc = (struct sctp_association *)idr_find(&sctp_assocs_id, (int)id);
+	if (asoc && (asoc->base.sk != sk || asoc->base.dead))
+		asoc = NULL;
 	spin_unlock_bh(&sctp_assocs_id_lock);
 
-	if (!asoc || (asoc->base.sk != sk) || asoc->base.dead)
-		return NULL;
-
 	return asoc;
 }
 
@@ -1928,10 +1909,10 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc,
 		asoc->pmtu_pending = 0;
 	}
 
-	if (sctp_wspace(asoc) < msg_len)
+	if (sctp_wspace(asoc) < (int)msg_len)
 		sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc));
 
-	if (!sctp_wspace(asoc)) {
+	if (sctp_wspace(asoc) <= 0) {
 		timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
 		err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
 		if (err)
@@ -1946,8 +1927,10 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc,
 		if (sp->strm_interleave) {
 			timeo = sock_sndtimeo(sk, 0);
 			err = sctp_wait_for_connect(asoc, &timeo);
-			if (err)
+			if (err) {
+				err = -ESRCH;
 				goto err;
+			}
 		} else {
 			wait_connect = true;
 		}
@@ -7100,14 +7083,14 @@ static int sctp_getsockopt_pr_assocstatus(struct sock *sk, int len,
 	}
 
 	policy = params.sprstat_policy;
-	if (policy & ~SCTP_PR_SCTP_MASK)
+	if (!policy || (policy & ~(SCTP_PR_SCTP_MASK | SCTP_PR_SCTP_ALL)))
 		goto out;
 
 	asoc = sctp_id2assoc(sk, params.sprstat_assoc_id);
 	if (!asoc)
 		goto out;
 
-	if (policy == SCTP_PR_SCTP_NONE) {
+	if (policy & SCTP_PR_SCTP_ALL) {
 		params.sprstat_abandoned_unsent = 0;
 		params.sprstat_abandoned_sent = 0;
 		for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) {
@@ -7159,7 +7142,7 @@ static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len,
 	}
 
 	policy = params.sprstat_policy;
-	if (policy & ~SCTP_PR_SCTP_MASK)
+	if (!policy || (policy & ~(SCTP_PR_SCTP_MASK | SCTP_PR_SCTP_ALL)))
 		goto out;
 
 	asoc = sctp_id2assoc(sk, params.sprstat_assoc_id);
@@ -7175,7 +7158,7 @@ static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len,
 		goto out;
 	}
 
-	if (policy == SCTP_PR_SCTP_NONE) {
+	if (policy == SCTP_PR_SCTP_ALL) {
 		params.sprstat_abandoned_unsent = 0;
 		params.sprstat_abandoned_sent = 0;
 		for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) {
@@ -8460,17 +8443,11 @@ static void sctp_wfree(struct sk_buff *skb)
 	struct sctp_association *asoc = chunk->asoc;
 	struct sock *sk = asoc->base.sk;
 
-	asoc->sndbuf_used -= SCTP_DATA_SNDSIZE(chunk) +
-				sizeof(struct sk_buff) +
-				sizeof(struct sctp_chunk);
-
-	WARN_ON(refcount_sub_and_test(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc));
-
-	/*
-	 * This undoes what is done via sctp_set_owner_w and sk_mem_charge
-	 */
-	sk->sk_wmem_queued   -= skb->truesize;
 	sk_mem_uncharge(sk, skb->truesize);
+	sk->sk_wmem_queued -= skb->truesize + sizeof(struct sctp_chunk);
+	asoc->sndbuf_used -= skb->truesize + sizeof(struct sctp_chunk);
+	WARN_ON(refcount_sub_and_test(sizeof(struct sctp_chunk),
+				      &sk->sk_wmem_alloc));
 
 	if (chunk->shkey) {
 		struct sctp_shared_key *shkey = chunk->shkey;
@@ -8544,7 +8521,7 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
 			goto do_error;
 		if (signal_pending(current))
 			goto do_interrupted;
-		if (msg_len <= sctp_wspace(asoc))
+		if ((int)msg_len <= sctp_wspace(asoc))
 			break;
 
 		/* Let another process have a go.  Since we are going
@@ -8619,14 +8596,9 @@ void sctp_write_space(struct sock *sk)
  * UDP-style sockets or TCP-style sockets, this code should work.
  *  - Daisy
  */
-static int sctp_writeable(struct sock *sk)
+static bool sctp_writeable(struct sock *sk)
 {
-	int amt = 0;
-
-	amt = sk->sk_sndbuf - sk_wmem_alloc_get(sk);
-	if (amt < 0)
-		amt = 0;
-	return amt;
+	return sk->sk_sndbuf > sk->sk_wmem_queued;
 }
 
 /* Wait for an association to go into ESTABLISHED state. If timeout is 0,
diff --git a/net/socket.c b/net/socket.c
index 713dc4833d40..b68801c7d0ab 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2875,9 +2875,14 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
 		    copy_in_user(&rxnfc->fs.ring_cookie,
 				 &compat_rxnfc->fs.ring_cookie,
 				 (void __user *)(&rxnfc->fs.location + 1) -
-				 (void __user *)&rxnfc->fs.ring_cookie) ||
-		    copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
-				 sizeof(rxnfc->rule_cnt)))
+				 (void __user *)&rxnfc->fs.ring_cookie))
+			return -EFAULT;
+		if (ethcmd == ETHTOOL_GRXCLSRLALL) {
+			if (put_user(rule_cnt, &rxnfc->rule_cnt))
+				return -EFAULT;
+		} else if (copy_in_user(&rxnfc->rule_cnt,
+					&compat_rxnfc->rule_cnt,
+					sizeof(rxnfc->rule_cnt)))
 			return -EFAULT;
 	}
 
diff --git a/net/tipc/group.c b/net/tipc/group.c
index e82f13cb2dc5..06fee142f09f 100644
--- a/net/tipc/group.c
+++ b/net/tipc/group.c
@@ -666,6 +666,7 @@ static void tipc_group_create_event(struct tipc_group *grp,
 	struct sk_buff *skb;
 	struct tipc_msg *hdr;
 
+	memset(&evt, 0, sizeof(evt));
 	evt.event = event;
 	evt.found_lower = m->instance;
 	evt.found_upper = m->instance;
diff --git a/net/tipc/link.c b/net/tipc/link.c
index f6552e4f4b43..201c3b5bc96b 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1041,6 +1041,7 @@ static int tipc_link_retrans(struct tipc_link *l, struct tipc_link *r,
 	if (r->last_retransm != buf_seqno(skb)) {
 		r->last_retransm = buf_seqno(skb);
 		r->stale_limit = jiffies + msecs_to_jiffies(r->tolerance);
+		r->stale_cnt = 0;
 	} else if (++r->stale_cnt > 99 && time_after(jiffies, r->stale_limit)) {
 		link_retransmit_failure(l, skb);
 		if (link_is_bc_sndlink(l))
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 51b4b96f89db..3cfeb9df64b0 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -115,7 +115,7 @@ struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ)
 	struct sk_buff *buf;
 	struct distr_item *item;
 
-	list_del(&publ->binding_node);
+	list_del_rcu(&publ->binding_node);
 
 	if (publ->scope == TIPC_NODE_SCOPE)
 		return NULL;
@@ -147,7 +147,7 @@ static void named_distribute(struct net *net, struct sk_buff_head *list,
 			ITEM_SIZE) * ITEM_SIZE;
 	u32 msg_rem = msg_dsz;
 
-	list_for_each_entry(publ, pls, binding_node) {
+	list_for_each_entry_rcu(publ, pls, binding_node) {
 		/* Prepare next buffer: */
 		if (!skb) {
 			skb = named_prepare_buf(net, PUBLICATION, msg_rem,
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index d1edfa3cad61..98d34fb61744 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -225,6 +225,8 @@ static inline void unix_release_addr(struct unix_address *addr)
 
 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
 {
+	*hashp = 0;
+
 	if (len <= sizeof(short) || len > sizeof(*sunaddr))
 		return -EINVAL;
 	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 0577cd49aa72..07156f43d295 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -754,6 +754,8 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,
 	sk->sk_destruct = xsk_destruct;
 	sk_refcnt_debug_inc(sk);
 
+	sock_set_flag(sk, SOCK_RCU_FREE);
+
 	xs = xdp_sk(sk);
 	mutex_init(&xs->mutex);
 	spin_lock_init(&xs->tx_completion_lock);
diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h
index 61be810389d8..ce66323102f9 100644
--- a/net/xfrm/xfrm_hash.h
+++ b/net/xfrm/xfrm_hash.h
@@ -13,7 +13,7 @@ static inline unsigned int __xfrm4_addr_hash(const xfrm_address_t *addr)
 
 static inline unsigned int __xfrm6_addr_hash(const xfrm_address_t *addr)
 {
-	return ntohl(addr->a6[2] ^ addr->a6[3]);
+	return jhash2((__force u32 *)addr->a6, 4, 0);
 }
 
 static inline unsigned int __xfrm4_daddr_saddr_hash(const xfrm_address_t *daddr,
@@ -26,8 +26,7 @@ static inline unsigned int __xfrm4_daddr_saddr_hash(const xfrm_address_t *daddr,
 static inline unsigned int __xfrm6_daddr_saddr_hash(const xfrm_address_t *daddr,
 						    const xfrm_address_t *saddr)
 {
-	return ntohl(daddr->a6[2] ^ daddr->a6[3] ^
-		     saddr->a6[2] ^ saddr->a6[3]);
+	return __xfrm6_addr_hash(daddr) ^ __xfrm6_addr_hash(saddr);
 }
 
 static inline u32 __bits2mask32(__u8 bits)
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index be3520e429c9..684c0bc01e2c 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -131,7 +131,7 @@ struct sec_path *secpath_dup(struct sec_path *src)
 	sp->len = 0;
 	sp->olen = 0;
 
-	memset(sp->ovec, 0, sizeof(sp->ovec[XFRM_MAX_OFFLOAD_DEPTH]));
+	memset(sp->ovec, 0, sizeof(sp->ovec));
 
 	if (src) {
 		int i;
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
index dc5b20bf29cf..d679fa0f44b3 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface.c
@@ -116,6 +116,9 @@ static void xfrmi_unlink(struct xfrmi_net *xfrmn, struct xfrm_if *xi)
 
 static void xfrmi_dev_free(struct net_device *dev)
 {
+	struct xfrm_if *xi = netdev_priv(dev);
+
+	gro_cells_destroy(&xi->gro_cells);
 	free_percpu(dev->tstats);
 }
 
@@ -561,9 +564,6 @@ static void xfrmi_get_stats64(struct net_device *dev,
 {
 	int cpu;
 
-	if (!dev->tstats)
-		return;
-
 	for_each_possible_cpu(cpu) {
 		struct pcpu_sw_netstats *stats;
 		struct pcpu_sw_netstats tmp;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index f094d4b3520d..119a427d9b2b 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -632,9 +632,9 @@ static void xfrm_hash_rebuild(struct work_struct *work)
 				break;
 		}
 		if (newpos)
-			hlist_add_behind(&policy->bydst, newpos);
+			hlist_add_behind_rcu(&policy->bydst, newpos);
 		else
-			hlist_add_head(&policy->bydst, chain);
+			hlist_add_head_rcu(&policy->bydst, chain);
 	}
 
 	spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
@@ -774,9 +774,9 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 			break;
 	}
 	if (newpos)
-		hlist_add_behind(&policy->bydst, newpos);
+		hlist_add_behind_rcu(&policy->bydst, newpos);
 	else
-		hlist_add_head(&policy->bydst, chain);
+		hlist_add_head_rcu(&policy->bydst, chain);
 	__xfrm_policy_link(policy, dir);
 
 	/* After previous checking, family can either be AF_INET or AF_INET6 */
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 86299efa804a..fd23d5778ea1 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -377,6 +377,7 @@ struct kvm_sync_regs {
 
 #define KVM_X86_QUIRK_LINT0_REENABLED	(1 << 0)
 #define KVM_X86_QUIRK_CD_NW_CLEARED	(1 << 1)
+#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE	(1 << 2)
 
 #define KVM_STATE_NESTED_GUEST_MODE	0x00000001
 #define KVM_STATE_NESTED_RUN_PENDING	0x00000002
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 07548de5c988..251be353f950 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -952,6 +952,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_S390_HPAGE_1M 156
 #define KVM_CAP_NESTED_STATE 157
 #define KVM_CAP_ARM_INJECT_SERROR_ESR 158
+#define KVM_CAP_MSR_PLATFORM_INFO 159
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/tools/lib/api/fs/tracing_path.c b/tools/lib/api/fs/tracing_path.c
index 120037496f77..5afb11b30fca 100644
--- a/tools/lib/api/fs/tracing_path.c
+++ b/tools/lib/api/fs/tracing_path.c
@@ -36,7 +36,7 @@ static const char *tracing_path_tracefs_mount(void)
 
 	__tracing_path_set("", mnt);
 
-	return mnt;
+	return tracing_path;
 }
 
 static const char *tracing_path_debugfs_mount(void)
@@ -49,7 +49,7 @@ static const char *tracing_path_debugfs_mount(void)
 
 	__tracing_path_set("tracing/", mnt);
 
-	return mnt;
+	return tracing_path;
 }
 
 const char *tracing_path_mount(void)
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index f6d1a03c7523..e30d20fb482d 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -833,7 +833,7 @@ ifndef NO_JVMTI
     JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | awk '{print $$3}')
   else
     ifneq (,$(wildcard /usr/sbin/alternatives))
-      JDIR=$(shell alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g')
+      JDIR=$(shell /usr/sbin/alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g')
     endif
   endif
   ifndef JDIR
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 5224ade3d5af..0be411695379 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -635,7 +635,7 @@ $(LIBPERF_IN): prepare FORCE
 $(LIB_FILE): $(LIBPERF_IN)
 	$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBPERF_IN) $(LIB_OBJS)
 
-LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ)
+LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) 'EXTRA_CFLAGS=$(EXTRA_CFLAGS)' 'LDFLAGS=$(LDFLAGS)'
 
 $(LIBTRACEEVENT): FORCE
 	$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 76e12bcd1765..b2188e623e22 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -981,6 +981,7 @@ int cmd_report(int argc, const char **argv)
 			.id_index	 = perf_event__process_id_index,
 			.auxtrace_info	 = perf_event__process_auxtrace_info,
 			.auxtrace	 = perf_event__process_auxtrace,
+			.event_update	 = perf_event__process_event_update,
 			.feature	 = process_feature_event,
 			.ordered_events	 = true,
 			.ordering_requires_timestamps = true,
diff --git a/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json b/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json
index d40498f2cb1e..635c09fda1d9 100644
--- a/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json
+++ b/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json
@@ -188,7 +188,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xb",
         "EventName": "UNC_P_FREQ_GE_1200MHZ_CYCLES",
-        "Filter": "filter_band0=1200",
+        "Filter": "filter_band0=12",
         "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_1200mhz_cycles %",
         "PerPkg": "1",
@@ -199,7 +199,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xc",
         "EventName": "UNC_P_FREQ_GE_2000MHZ_CYCLES",
-        "Filter": "filter_band1=2000",
+        "Filter": "filter_band1=20",
         "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_2000mhz_cycles %",
         "PerPkg": "1",
@@ -210,7 +210,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xd",
         "EventName": "UNC_P_FREQ_GE_3000MHZ_CYCLES",
-        "Filter": "filter_band2=3000",
+        "Filter": "filter_band2=30",
         "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_3000mhz_cycles %",
         "PerPkg": "1",
@@ -221,7 +221,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xe",
         "EventName": "UNC_P_FREQ_GE_4000MHZ_CYCLES",
-        "Filter": "filter_band3=4000",
+        "Filter": "filter_band3=40",
         "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_4000mhz_cycles %",
         "PerPkg": "1",
@@ -232,7 +232,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xb",
         "EventName": "UNC_P_FREQ_GE_1200MHZ_TRANSITIONS",
-        "Filter": "edge=1,filter_band0=1200",
+        "Filter": "edge=1,filter_band0=12",
         "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_1200mhz_cycles %",
         "PerPkg": "1",
@@ -243,7 +243,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xc",
         "EventName": "UNC_P_FREQ_GE_2000MHZ_TRANSITIONS",
-        "Filter": "edge=1,filter_band1=2000",
+        "Filter": "edge=1,filter_band1=20",
         "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_2000mhz_cycles %",
         "PerPkg": "1",
@@ -254,7 +254,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xd",
         "EventName": "UNC_P_FREQ_GE_3000MHZ_TRANSITIONS",
-        "Filter": "edge=1,filter_band2=4000",
+        "Filter": "edge=1,filter_band2=30",
         "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_3000mhz_cycles %",
         "PerPkg": "1",
@@ -265,7 +265,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xe",
         "EventName": "UNC_P_FREQ_GE_4000MHZ_TRANSITIONS",
-        "Filter": "edge=1,filter_band3=4000",
+        "Filter": "edge=1,filter_band3=40",
         "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_4000mhz_cycles %",
         "PerPkg": "1",
diff --git a/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json b/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json
index 16034bfd06dd..8755693d86c6 100644
--- a/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json
+++ b/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json
@@ -187,7 +187,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xb",
         "EventName": "UNC_P_FREQ_GE_1200MHZ_CYCLES",
-        "Filter": "filter_band0=1200",
+        "Filter": "filter_band0=12",
         "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_1200mhz_cycles %",
         "PerPkg": "1",
@@ -198,7 +198,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xc",
         "EventName": "UNC_P_FREQ_GE_2000MHZ_CYCLES",
-        "Filter": "filter_band1=2000",
+        "Filter": "filter_band1=20",
         "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_2000mhz_cycles %",
         "PerPkg": "1",
@@ -209,7 +209,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xd",
         "EventName": "UNC_P_FREQ_GE_3000MHZ_CYCLES",
-        "Filter": "filter_band2=3000",
+        "Filter": "filter_band2=30",
         "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_3000mhz_cycles %",
         "PerPkg": "1",
@@ -220,7 +220,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xe",
         "EventName": "UNC_P_FREQ_GE_4000MHZ_CYCLES",
-        "Filter": "filter_band3=4000",
+        "Filter": "filter_band3=40",
         "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_4000mhz_cycles %",
         "PerPkg": "1",
@@ -231,7 +231,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xb",
         "EventName": "UNC_P_FREQ_GE_1200MHZ_TRANSITIONS",
-        "Filter": "edge=1,filter_band0=1200",
+        "Filter": "edge=1,filter_band0=12",
         "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_1200mhz_cycles %",
         "PerPkg": "1",
@@ -242,7 +242,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xc",
         "EventName": "UNC_P_FREQ_GE_2000MHZ_TRANSITIONS",
-        "Filter": "edge=1,filter_band1=2000",
+        "Filter": "edge=1,filter_band1=20",
         "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_2000mhz_cycles %",
         "PerPkg": "1",
@@ -253,7 +253,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xd",
         "EventName": "UNC_P_FREQ_GE_3000MHZ_TRANSITIONS",
-        "Filter": "edge=1,filter_band2=4000",
+        "Filter": "edge=1,filter_band2=30",
         "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_3000mhz_cycles %",
         "PerPkg": "1",
@@ -264,7 +264,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xe",
         "EventName": "UNC_P_FREQ_GE_4000MHZ_TRANSITIONS",
-        "Filter": "edge=1,filter_band3=4000",
+        "Filter": "edge=1,filter_band3=40",
         "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_4000mhz_cycles %",
         "PerPkg": "1",
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 0cd42150f712..bc646185f8d9 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -1081,6 +1081,7 @@ void *cpu_map_data__alloc(struct cpu_map *map, size_t *size, u16 *type, int *max
 	}
 
 	*size += sizeof(struct cpu_map_data);
+	*size = PERF_ALIGN(*size, sizeof(u64));
 	return zalloc(*size);
 }
 
@@ -1560,26 +1561,9 @@ struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr,
 
 		return NULL;
 	}
-try_again:
+
 	al->map = map_groups__find(mg, al->addr);
-	if (al->map == NULL) {
-		/*
-		 * If this is outside of all known maps, and is a negative
-		 * address, try to look it up in the kernel dso, as it might be
-		 * a vsyscall or vdso (which executes in user-mode).
-		 *
-		 * XXX This is nasty, we should have a symbol list in the
-		 * "[vdso]" dso, but for now lets use the old trick of looking
-		 * in the whole kernel symbol list.
-		 */
-		if (cpumode == PERF_RECORD_MISC_USER && machine &&
-		    mg != &machine->kmaps &&
-		    machine__kernel_ip(machine, al->addr)) {
-			mg = &machine->kmaps;
-			load_map = true;
-			goto try_again;
-		}
-	} else {
+	if (al->map != NULL) {
 		/*
 		 * Kernel maps might be changed when loading symbols so loading
 		 * must be done prior to using kernel maps.
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 1a61628a1c12..e596ae358c4d 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1089,6 +1089,9 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
 		attr->exclude_user   = 1;
 	}
 
+	if (evsel->own_cpus)
+		evsel->attr.read_format |= PERF_FORMAT_ID;
+
 	/*
 	 * Apply event specific term settings,
 	 * it overloads any global configuration.
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index afd68524ffa9..7799788f662f 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -930,13 +930,14 @@ static void pmu_format_value(unsigned long *format, __u64 value, __u64 *v,
 
 static __u64 pmu_format_max_value(const unsigned long *format)
 {
-	__u64 w = 0;
-	int fbit;
-
-	for_each_set_bit(fbit, format, PERF_PMU_FORMAT_BITS)
-		w |= (1ULL << fbit);
+	int w;
 
-	return w;
+	w = bitmap_weight(format, PERF_PMU_FORMAT_BITS);
+	if (!w)
+		return 0;
+	if (w < 64)
+		return (1ULL << w) - 1;
+	return -1;
 }
 
 /*
diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c
index 09d6746e6ec8..e767c4a9d4d2 100644
--- a/tools/perf/util/srcline.c
+++ b/tools/perf/util/srcline.c
@@ -85,6 +85,9 @@ static struct symbol *new_inline_sym(struct dso *dso,
 	struct symbol *inline_sym;
 	char *demangled = NULL;
 
+	if (!funcname)
+		funcname = "??";
+
 	if (dso) {
 		demangled = dso__demangle_sym(dso, 0, funcname);
 		if (demangled)
diff --git a/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh b/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh
index a72df93cf1f8..128f0ab24307 100755
--- a/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh
+++ b/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh
@@ -141,6 +141,10 @@ echo "Import devices from localhost - should work"
 src/usbip attach -r localhost -b $busid;
 echo "=============================================================="
 
+# Wait for sysfs file to be updated. Without this sleep, usbip port
+# shows no imported devices.
+sleep 3;
+
 echo "List imported devices - expect to see imported devices";
 src/usbip port;
 echo "=============================================================="
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc
new file mode 100644
index 000000000000..88e6c3f43006
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc
@@ -0,0 +1,80 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test synthetic_events syntax parser
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+    echo "synthetic event is not supported"
+    exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+echo "Test synthetic_events syntax parser"
+
+echo > synthetic_events
+
+# synthetic event must have a field
+! echo "myevent" >> synthetic_events
+echo "myevent u64 var1" >> synthetic_events
+
+# synthetic event must be found in synthetic_events
+grep "myevent[[:space:]]u64 var1" synthetic_events
+
+# it is not possible to add same name event
+! echo "myevent u64 var2" >> synthetic_events
+
+# Non-append open will cleanup all events and add new one
+echo "myevent u64 var2" > synthetic_events
+
+# multiple fields with different spaces
+echo "myevent u64 var1; u64 var2;" > synthetic_events
+grep "myevent[[:space:]]u64 var1; u64 var2" synthetic_events
+echo "myevent u64 var1 ; u64 var2 ;" > synthetic_events
+grep "myevent[[:space:]]u64 var1; u64 var2" synthetic_events
+echo "myevent u64 var1 ;u64 var2" > synthetic_events
+grep "myevent[[:space:]]u64 var1; u64 var2" synthetic_events
+
+# test field types
+echo "myevent u32 var" > synthetic_events
+echo "myevent u16 var" > synthetic_events
+echo "myevent u8 var" > synthetic_events
+echo "myevent s64 var" > synthetic_events
+echo "myevent s32 var" > synthetic_events
+echo "myevent s16 var" > synthetic_events
+echo "myevent s8 var" > synthetic_events
+
+echo "myevent char var" > synthetic_events
+echo "myevent int var" > synthetic_events
+echo "myevent long var" > synthetic_events
+echo "myevent pid_t var" > synthetic_events
+
+echo "myevent unsigned char var" > synthetic_events
+echo "myevent unsigned int var" > synthetic_events
+echo "myevent unsigned long var" > synthetic_events
+grep "myevent[[:space:]]unsigned long var" synthetic_events
+
+# test string type
+echo "myevent char var[10]" > synthetic_events
+grep "myevent[[:space:]]char\[10\] var" synthetic_events
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c
index cad14cd0ea92..b5277106df1f 100644
--- a/tools/testing/selftests/net/reuseport_bpf.c
+++ b/tools/testing/selftests/net/reuseport_bpf.c
@@ -437,14 +437,19 @@ void enable_fastopen(void)
 	}
 }
 
-static struct rlimit rlim_old, rlim_new;
+static struct rlimit rlim_old;
 
 static  __attribute__((constructor)) void main_ctor(void)
 {
 	getrlimit(RLIMIT_MEMLOCK, &rlim_old);
-	rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20);
-	rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20);
-	setrlimit(RLIMIT_MEMLOCK, &rlim_new);
+
+	if (rlim_old.rlim_cur != RLIM_INFINITY) {
+		struct rlimit rlim_new;
+
+		rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20);
+		rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20);
+		setrlimit(RLIMIT_MEMLOCK, &rlim_new);
+	}
 }
 
 static __attribute__((destructor)) void main_dtor(void)