911 files changed, 46116 insertions, 15720 deletions
diff --git a/Documentation/bpf/bpf_devel_QA.txt b/Documentation/bpf/bpf_devel_QA.txt
index 84cbb302f2b5..1a0b704e1a38 100644
--- a/Documentation/bpf/bpf_devel_QA.txt
+++ b/Documentation/bpf/bpf_devel_QA.txt
@@ -539,6 +539,18 @@ A: Although LLVM IR generation and optimization try to stay architecture
        The clang option "-fno-jump-tables" can be used to disable
        switch table generation.
 
+     - For clang -target bpf, it is guaranteed that pointer or long /
+       unsigned long types will always have a width of 64 bit, no matter
+       whether underlying clang binary or default target (or kernel) is
+       32 bit. However, when native clang target is used, then it will
+       compile these types based on the underlying architecture's conventions,
+       meaning in case of 32 bit architecture, pointer or long / unsigned
+       long types e.g. in BPF context structure will have width of 32 bit
+       while the BPF LLVM back end still operates in 64 bit. The native
+       target is mostly needed in tracing for the case of walking pt_regs
+       or other kernel structures where CPU's register width matters.
+       Otherwise, clang -target bpf is generally recommended.
+
    You should use default target when:
 
      - Your program includes a header file, e.g., ptrace.h, which eventually
diff --git a/Documentation/devicetree/bindings/net/dsa/marvell.txt b/Documentation/devicetree/bindings/net/dsa/marvell.txt
index 8c033d48e2ba..60d50a2b0323 100644
--- a/Documentation/devicetree/bindings/net/dsa/marvell.txt
+++ b/Documentation/devicetree/bindings/net/dsa/marvell.txt
@@ -13,9 +13,18 @@ placed as a child node of an mdio device.
 The properties described here are those specific to Marvell devices.
 Additional required and optional properties can be found in dsa.txt.
 
+The compatibility string is used only to find an identification register,
+which is at a different MDIO base address in different switch families.
+- "marvell,mv88e6085"	: Switch has base address 0x10. Use with models:
+			  6085, 6095, 6097, 6123, 6131, 6141, 6161, 6165,
+			  6171, 6172, 6175, 6176, 6185, 6240, 6320, 6321,
+			  6341, 6350, 6351, 6352
+- "marvell,mv88e6190"	: Switch has base address 0x00. Use with models:
+			  6190, 6190X, 6191, 6290, 6390, 6390X
+
 Required properties:
 - compatible		: Should be one of "marvell,mv88e6085" or
-			  "marvell,mv88e6190"
+			  "marvell,mv88e6190" as indicated above
 - reg			: Address on the MII bus for the switch.
 
 Optional properties:
diff --git a/Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt b/Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt
new file mode 100644
index 000000000000..2aaef567c5be
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt
@@ -0,0 +1,23 @@
+* MCR20A IEEE 802.15.4 *
+
+Required properties:
+  - compatible:		should be "nxp,mcr20a"
+  - spi-max-frequency:	maximal bus speed, should be set to a frequency
+			lower than 9000000 depends sync or async operation mode
+  - reg:		the chipselect index
+  - interrupts:		the interrupt generated by the device. Non high-level
+			can occur deadlocks while handling isr.
+
+Optional properties:
+  - rst_b-gpio:		GPIO spec for the RST_B pin
+
+Example:
+
+	mcr20a@0 {
+		compatible = "nxp,mcr20a";
+		spi-max-frequency = <9000000>;
+		reg = <0>;
+		interrupts = <17 2>;
+		interrupt-parent = <&gpio>;
+		rst_b-gpio = <&gpio 27 1>
+	};
diff --git a/Documentation/devicetree/bindings/net/macb.txt b/Documentation/devicetree/bindings/net/macb.txt
index 27966ae741e0..457d5ae16f23 100644
--- a/Documentation/devicetree/bindings/net/macb.txt
+++ b/Documentation/devicetree/bindings/net/macb.txt
@@ -29,6 +29,7 @@ Optional properties for PHY child node:
 - reset-gpios : Should specify the gpio for phy reset
 - magic-packet : If present, indicates that the hardware supports waking
   up via magic packet.
+- phy-handle : see ethernet.txt file in the same directory
 
 Examples:
 
diff --git a/Documentation/devicetree/bindings/net/sff,sfp.txt b/Documentation/devicetree/bindings/net/sff,sfp.txt
index f1c441bedf68..929591d52ed6 100644
--- a/Documentation/devicetree/bindings/net/sff,sfp.txt
+++ b/Documentation/devicetree/bindings/net/sff,sfp.txt
@@ -33,6 +33,10 @@ Optional Properties:
   Select (AKA RS1) output gpio signal (SFP+ only), low: low Tx rate, high:
   high Tx rate. Must not be present for SFF modules
 
+- maximum-power-milliwatt : Maximum module power consumption
+  Specifies the maximum power consumption allowable by a module in the
+  slot, in milli-Watts.  Presently, modules can be up to 1W, 1.5W or 2W.
+
 Example #1: Direct serdes to SFP connection
 
 sfp_eth3: sfp-eth3 {
@@ -40,6 +44,7 @@ sfp_eth3: sfp-eth3 {
 	i2c-bus = <&sfp_1g_i2c>;
 	los-gpios = <&cpm_gpio2 22 GPIO_ACTIVE_HIGH>;
 	mod-def0-gpios = <&cpm_gpio2 21 GPIO_ACTIVE_LOW>;
+	maximum-power-milliwatt = <1000>;
 	pinctrl-names = "default";
 	pinctrl-0 = <&cpm_sfp_1g_pins &cps_sfp_1g_pins>;
 	tx-disable-gpios = <&cps_gpio1 24 GPIO_ACTIVE_HIGH>;
diff --git a/Documentation/devicetree/bindings/net/ti,dp83867.txt b/Documentation/devicetree/bindings/net/ti,dp83867.txt
index 02c4353b5cf2..9ef9338aaee1 100644
--- a/Documentation/devicetree/bindings/net/ti,dp83867.txt
+++ b/Documentation/devicetree/bindings/net/ti,dp83867.txt
@@ -25,6 +25,8 @@ Optional property:
 				    software needs to take when this pin is
 				    strapped in these modes. See data manual
 				    for details.
+	- ti,clk-output-sel - Muxing option for CLK_OUT pin - see dt-bindings/net/ti-dp83867.h
+				    for applicable values.
 
 Note: ti,min-output-impedance and ti,max-output-impedance are mutually
       exclusive. When both properties are present ti,max-output-impedance
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index a553d4e4a0fb..1d1120753ae8 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -755,13 +755,13 @@ udp_rmem_min - INTEGER
 	Minimal size of receive buffer used by UDP sockets in moderation.
 	Each UDP socket is able to use the size for receiving data, even if
 	total pages of UDP sockets exceed udp_mem pressure. The unit is byte.
-	Default: 1 page
+	Default: 4K
 
 udp_wmem_min - INTEGER
 	Minimal size of send buffer used by UDP sockets in moderation.
 	Each UDP socket is able to use the size for sending data, even if
 	total pages of UDP sockets exceed udp_mem pressure. The unit is byte.
-	Default: 1 page
+	Default: 4K
 
 CIPSOv4 Variables:
 
@@ -1363,6 +1363,13 @@ flowlabel_reflect - BOOLEAN
 	FALSE: disabled
 	Default: FALSE
 
+fib_multipath_hash_policy - INTEGER
+	Controls which hash policy to use for multipath routes.
+	Default: 0 (Layer 3)
+	Possible values:
+	0 - Layer 3 (source and destination addresses plus flow label)
+	1 - Layer 4 (standard 5-tuple)
+
 anycast_src_echo_reply - BOOLEAN
 	Controls the use of anycast addresses as source addresses for ICMPv6
 	echo reply
@@ -2094,7 +2101,7 @@ sctp_rmem - vector of 3 INTEGERs: min, default, max
 	It is guaranteed to each SCTP socket (but not association) even
 	under moderate memory pressure.
 
-	Default: 1 page
+	Default: 4K
 
 sctp_wmem  - vector of 3 INTEGERs: min, default, max
 	Currently this tunable has no effect.
diff --git a/Documentation/networking/msg_zerocopy.rst b/Documentation/networking/msg_zerocopy.rst
index 291a01264967..fe46d4867e2d 100644
--- a/Documentation/networking/msg_zerocopy.rst
+++ b/Documentation/networking/msg_zerocopy.rst
@@ -72,11 +72,6 @@ this flag, a process must first signal intent by setting a socket option:
 	if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &one, sizeof(one)))
 		error(1, errno, "setsockopt zerocopy");
 
-Setting the socket option only works when the socket is in its initial
-(TCP_CLOSED) state.  Trying to set the option for a socket returned by accept(),
-for example, will lead to an EBUSY error. In this case, the option should be set
-to the listening socket and it will be inherited by the accepted sockets.
-
 Transmission
 ------------
 
diff --git a/Documentation/networking/net_dim.txt b/Documentation/networking/net_dim.txt
new file mode 100644
index 000000000000..9cb31c5e2dcd
--- /dev/null
+++ b/Documentation/networking/net_dim.txt
@@ -0,0 +1,174 @@
+Net DIM - Generic Network Dynamic Interrupt Moderation
+======================================================
+
+Author:
+	Tal Gilboa <talgi@mellanox.com>
+
+
+Contents
+=========
+
+- Assumptions
+- Introduction
+- The Net DIM Algorithm
+- Registering a Network Device to DIM
+- Example
+
+Part 0: Assumptions
+======================
+
+This document assumes the reader has basic knowledge in network drivers
+and in general interrupt moderation.
+
+
+Part I: Introduction
+======================
+
+Dynamic Interrupt Moderation (DIM) (in networking) refers to changing the
+interrupt moderation configuration of a channel in order to optimize packet
+processing. The mechanism includes an algorithm which decides if and how to
+change moderation parameters for a channel, usually by performing an analysis on
+runtime data sampled from the system. Net DIM is such a mechanism. In each
+iteration of the algorithm, it analyses a given sample of the data, compares it
+to the previous sample and if required, it can decide to change some of the
+interrupt moderation configuration fields. The data sample is composed of data
+bandwidth, the number of packets and the number of events. The time between
+samples is also measured. Net DIM compares the current and the previous data and
+returns an adjusted interrupt moderation configuration object. In some cases,
+the algorithm might decide not to change anything. The configuration fields are
+the minimum duration (microseconds) allowed between events and the maximum
+number of wanted packets per event. The Net DIM algorithm ascribes importance to
+increase bandwidth over reducing interrupt rate.
+
+
+Part II: The Net DIM Algorithm
+===============================
+
+Each iteration of the Net DIM algorithm follows these steps:
+1. Calculates new data sample.
+2. Compares it to previous sample.
+3. Makes a decision - suggests interrupt moderation configuration fields.
+4. Applies a schedule work function, which applies suggested configuration.
+
+The first two steps are straightforward, both the new and the previous data are
+supplied by the driver registered to Net DIM. The previous data is the new data
+supplied to the previous iteration. The comparison step checks the difference
+between the new and previous data and decides on the result of the last step.
+A step would result as "better" if bandwidth increases and as "worse" if
+bandwidth reduces. If there is no change in bandwidth, the packet rate is
+compared in a similar fashion - increase == "better" and decrease == "worse".
+In case there is no change in the packet rate as well, the interrupt rate is
+compared. Here the algorithm tries to optimize for lower interrupt rate so an
+increase in the interrupt rate is considered "worse" and a decrease is
+considered "better". Step #2 has an optimization for avoiding false results: it
+only considers a difference between samples as valid if it is greater than a
+certain percentage. Also, since Net DIM does not measure anything by itself, it
+assumes the data provided by the driver is valid.
+
+Step #3 decides on the suggested configuration based on the result from step #2
+and the internal state of the algorithm. The states reflect the "direction" of
+the algorithm: is it going left (reducing moderation), right (increasing
+moderation) or standing still. Another optimization is that if a decision
+to stay still is made multiple times, the interval between iterations of the
+algorithm would increase in order to reduce calculation overhead. Also, after
+"parking" on one of the most left or most right decisions, the algorithm may
+decide to verify this decision by taking a step in the other direction. This is
+done in order to avoid getting stuck in a "deep sleep" scenario. Once a
+decision is made, an interrupt moderation configuration is selected from
+the predefined profiles.
+
+The last step is to notify the registered driver that it should apply the
+suggested configuration. This is done by scheduling a work function, defined by
+the Net DIM API and provided by the registered driver.
+
+As you can see, Net DIM itself does not actively interact with the system. It
+would have trouble making the correct decisions if the wrong data is supplied to
+it and it would be useless if the work function would not apply the suggested
+configuration. This does, however, allow the registered driver some room for
+manoeuvre as it may provide partial data or ignore the algorithm suggestion
+under some conditions.
+
+
+Part III: Registering a Network Device to DIM
+==============================================
+
+Net DIM API exposes the main function net_dim(struct net_dim *dim,
+struct net_dim_sample end_sample). This function is the entry point to the Net
+DIM algorithm and has to be called every time the driver would like to check if
+it should change interrupt moderation parameters. The driver should provide two
+data structures: struct net_dim and struct net_dim_sample. Struct net_dim
+describes the state of DIM for a specific object (RX queue, TX queue,
+other queues, etc.). This includes the current selected profile, previous data
+samples, the callback function provided by the driver and more.
+Struct net_dim_sample describes a data sample, which will be compared to the
+data sample stored in struct net_dim in order to decide on the algorithm's next
+step. The sample should include bytes, packets and interrupts, measured by
+the driver.
+
+In order to use Net DIM from a networking driver, the driver needs to call the
+main net_dim() function. The recommended method is to call net_dim() on each
+interrupt. Since Net DIM has a built-in moderation and it might decide to skip
+iterations under certain conditions, there is no need to moderate the net_dim()
+calls as well. As mentioned above, the driver needs to provide an object of type
+struct net_dim to the net_dim() function call. It is advised for each entity
+using Net DIM to hold a struct net_dim as part of its data structure and use it
+as the main Net DIM API object. The struct net_dim_sample should hold the latest
+bytes, packets and interrupts count. No need to perform any calculations, just
+include the raw data.
+
+The net_dim() call itself does not return anything. Instead Net DIM relies on
+the driver to provide a callback function, which is called when the algorithm
+decides to make a change in the interrupt moderation parameters. This callback
+will be scheduled and run in a separate thread in order not to add overhead to
+the data flow. After the work is done, Net DIM algorithm needs to be set to
+the proper state in order to move to the next iteration.
+
+
+Part IV: Example
+=================
+
+The following code demonstrates how to register a driver to Net DIM. The actual
+usage is not complete but it should make the outline of the usage clear.
+
+my_driver.c:
+
+#include <linux/net_dim.h>
+
+/* Callback for net DIM to schedule on a decision to change moderation */
+void my_driver_do_dim_work(struct work_struct *work)
+{
+	/* Get struct net_dim from struct work_struct */
+	struct net_dim *dim = container_of(work, struct net_dim,
+					   work);
+	/* Do interrupt moderation related stuff */
+	...
+
+	/* Signal net DIM work is done and it should move to next iteration */
+	dim->state = NET_DIM_START_MEASURE;
+}
+
+/* My driver's interrupt handler */
+int my_driver_handle_interrupt(struct my_driver_entity *my_entity, ...)
+{
+	...
+	/* A struct to hold current measured data */
+	struct net_dim_sample dim_sample;
+	...
+	/* Initiate data sample struct with current data */
+	net_dim_sample(my_entity->events,
+		       my_entity->packets,
+		       my_entity->bytes,
+		       &dim_sample);
+	/* Call net DIM */
+	net_dim(&my_entity->dim, dim_sample);
+	...
+}
+
+/* My entity's initialization function (my_entity was already allocated) */
+int my_driver_init_my_entity(struct my_driver_entity *my_entity, ...)
+{
+	...
+	/* Initiate struct work_struct with my driver's callback function */
+	INIT_WORK(&my_entity->dim.work, my_driver_do_dim_work);
+	...
+}
diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt
index bf654845556e..999eb41da81d 100644
--- a/Documentation/networking/packet_mmap.txt
+++ b/Documentation/networking/packet_mmap.txt
@@ -7,15 +7,12 @@ socket interface on 2.4/2.6/3.x kernels. This type of sockets is used for
 i) capture network traffic with utilities like tcpdump, ii) transmit network
 traffic, or any other that needs raw access to network interface.
 
-You can find the latest version of this document at:
-    http://wiki.ipxwarzone.com/index.php5?title=Linux_packet_mmap
-
 Howto can be found at:
-    http://wiki.gnu-log.net (packet_mmap)
+    https://sites.google.com/site/packetmmap/
 
 Please send your comments to
     Ulisses Alonso Camaró <uaca@i.hate.spam.alumni.uv.es>
-    Johann Baudy <johann.baudy@gnu-log.net>
+    Johann Baudy
 
 -------------------------------------------------------------------------------
 + Why use PACKET_MMAP
@@ -51,17 +48,8 @@ From the user standpoint, you should use the higher level libpcap library, which
 is a de facto standard, portable across nearly all operating systems
 including Win32. 
 
-Said that, at time of this writing, official libpcap 0.8.1 is out and doesn't include
-support for PACKET_MMAP, and also probably the libpcap included in your distribution. 
-
-I'm aware of two implementations of PACKET_MMAP in libpcap:
-
-    http://wiki.ipxwarzone.com/		     (by Simon Patarin, based on libpcap 0.6.2)
-    http://public.lanl.gov/cpw/              (by Phil Wood, based on lastest libpcap)
-
-The rest of this document is intended for people who want to understand
-the low level details or want to improve libpcap by including PACKET_MMAP
-support.
+Packet MMAP support was integrated into libpcap around the time of version 1.3.0;
+TPACKET_V3 support was added in version 1.5.0
 
 --------------------------------------------------------------------------------
 + How to use mmap() directly to improve capture process
@@ -174,7 +162,7 @@ As capture, each frame contains two parts:
  /* bind socket to eth0 */
  bind(this->socket, (struct sockaddr *)&my_addr, sizeof(struct sockaddr_ll));
 
- A complete tutorial is available at: http://wiki.gnu-log.net/
+ A complete tutorial is available at: https://sites.google.com/site/packetmmap/
 
 By default, the user should put data at :
  frame base + TPACKET_HDRLEN - sizeof(struct sockaddr_ll)
diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt
index 35c62f522754..5992602469d8 100644
--- a/Documentation/sysctl/net.txt
+++ b/Documentation/sysctl/net.txt
@@ -270,6 +270,18 @@ optmem_max
 Maximum ancillary buffer size allowed per socket. Ancillary data is a sequence
 of struct cmsghdr structures with appended data.
 
+fb_tunnels_only_for_init_net
+----------------------------
+
+Controls if fallback tunnels (like tunl0, gre0, gretap0, erspan0,
+sit0, ip6tnl0, ip6gre0) are automatically created when a new
+network namespace is created, if corresponding tunnel is present
+in initial network namespace.
+If set to 1, these devices are not automatically created, and
+user space is responsible for creating them if needed.
+
+Default : 0  (for compatibility reasons)
+
 2. /proc/sys/net/unix - Parameters for Unix domain sockets
 -------------------------------------------------------
 
diff --git a/MAINTAINERS b/MAINTAINERS
index 73c0cdabf755..b3ea844cf228 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8596,6 +8596,15 @@ S:	Maintained
 F:	Documentation/ABI/testing/sysfs-bus-iio-potentiometer-mcp4531
 F:	drivers/iio/potentiometer/mcp4531.c
 
+MCR20A IEEE-802.15.4 RADIO DRIVER
+M:	Xue Liu <liuxuenetmail@gmail.com>
+L:	linux-wpan@vger.kernel.org
+W:	https://github.com/xueliu/mcr20a-linux
+S:	Maintained
+F:	drivers/net/ieee802154/mcr20a.c
+F:	drivers/net/ieee802154/mcr20a.h
+F:	Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt
+
 MEASUREMENT COMPUTING CIO-DAC IIO DRIVER
 M:	William Breathitt Gray <vilhelm.gray@gmail.com>
 L:	linux-iio@vger.kernel.org
@@ -9152,6 +9161,13 @@ F:	drivers/net/dsa/microchip/*
 F:	include/linux/platform_data/microchip-ksz.h
 F:	Documentation/devicetree/bindings/net/dsa/ksz.txt
 
+MICROCHIP LAN743X ETHERNET DRIVER
+M:	Bryan Whitehead <bryan.whitehead@microchip.com>
+M:	Microchip Linux Driver Support <UNGLinuxDriver@microchip.com>
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	drivers/net/ethernet/microchip/lan743x_*
+
 MICROCHIP USB251XB DRIVER
 M:	Richard Leitner <richard.leitner@skidata.com>
 L:	linux-usb@vger.kernel.org
diff --git a/arch/arm/boot/dts/armada-370-rd.dts b/arch/arm/boot/dts/armada-370-rd.dts
index 8b2fa9a49967..c28afb242393 100644
--- a/arch/arm/boot/dts/armada-370-rd.dts
+++ b/arch/arm/boot/dts/armada-370-rd.dts
@@ -56,6 +56,7 @@
 
 /dts-v1/;
 #include <dt-bindings/input/input.h>
+#include <dt-bindings/interrupt-controller/irq.h>
 #include <dt-bindings/gpio/gpio.h>
 #include "armada-370.dtsi"
 
@@ -243,6 +244,8 @@
 		#address-cells = <1>;
 		#size-cells = <0>;
 		reg = <0x10>;
+		interrupt-controller;
+		#interrupt-cells = <2>;
 
 		ports {
 			#address-cells = <1>;
@@ -278,6 +281,35 @@
 				};
 			};
 		};
+
+		mdio {
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			switchphy0: switchphy@0 {
+				reg = <0>;
+				interrupt-parent = <&switch>;
+				interrupts = <0 IRQ_TYPE_LEVEL_HIGH>;
+			};
+
+			switchphy1: switchphy@1 {
+				reg = <1>;
+				interrupt-parent = <&switch>;
+				interrupts = <1 IRQ_TYPE_LEVEL_HIGH>;
+			};
+
+			switchphy2: switchphy@2 {
+				reg = <2>;
+				interrupt-parent = <&switch>;
+				interrupts = <2 IRQ_TYPE_LEVEL_HIGH>;
+			};
+
+			switchphy3: switchphy@3 {
+				reg = <3>;
+				interrupt-parent = <&switch>;
+				interrupts = <3 IRQ_TYPE_LEVEL_HIGH>;
+			};
+		};
 	};
 };
 
diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c
index d3d435248a24..c73eb8209555 100644
--- a/arch/m68k/mac/config.c
+++ b/arch/m68k/mac/config.c
@@ -1088,6 +1088,10 @@ int __init mac_platform_init(void)
 	    macintosh_config->expansion_type == MAC_EXP_PDS_COMM)
 		platform_device_register_simple("macsonic", -1, NULL, 0);
 
+	if (macintosh_config->expansion_type == MAC_EXP_PDS ||
+	    macintosh_config->expansion_type == MAC_EXP_PDS_COMM)
+		platform_device_register_simple("mac89x0", -1, NULL, 0);
+
 	if (macintosh_config->ether_type == MAC_ETHER_MACE)
 		platform_device_register_simple("macmace", -1, NULL, 0);
 
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index ce5b2ebd5701..b725154182cc 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -11,10 +11,10 @@
 #include <linux/netdevice.h>
 #include <linux/filter.h>
 #include <linux/if_vlan.h>
-#include <asm/cacheflush.h>
+#include <linux/bpf.h>
+
 #include <asm/set_memory.h>
 #include <asm/nospec-branch.h>
-#include <linux/bpf.h>
 
 /*
  * assembly code in arch/x86/net/bpf_jit.S
@@ -61,7 +61,12 @@ static bool is_imm8(int value)
 
 static bool is_simm32(s64 value)
 {
-	return value == (s64) (s32) value;
+	return value == (s64)(s32)value;
+}
+
+static bool is_uimm32(u64 value)
+{
+	return value == (u64)(u32)value;
 }
 
 /* mov dst, src */
@@ -98,16 +103,6 @@ static int bpf_size_to_x86_bytes(int bpf_size)
 #define X86_JLE 0x7E
 #define X86_JG  0x7F
 
-static void bpf_flush_icache(void *start, void *end)
-{
-	mm_segment_t old_fs = get_fs();
-
-	set_fs(KERNEL_DS);
-	smp_wmb();
-	flush_icache_range((unsigned long)start, (unsigned long)end);
-	set_fs(old_fs);
-}
-
 #define CHOOSE_LOAD_FUNC(K, func) \
 	((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
 
@@ -212,7 +207,7 @@ struct jit_context {
 /* emit x64 prologue code for BPF program and check it's size.
  * bpf_tail_call helper will skip it while jumping into another program
  */
-static void emit_prologue(u8 **pprog, u32 stack_depth)
+static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
 {
 	u8 *prog = *pprog;
 	int cnt = 0;
@@ -247,18 +242,21 @@ static void emit_prologue(u8 **pprog, u32 stack_depth)
 	/* mov qword ptr [rbp+24],r15 */
 	EMIT4(0x4C, 0x89, 0x7D, 24);
 
-	/* Clear the tail call counter (tail_call_cnt): for eBPF tail calls
-	 * we need to reset the counter to 0. It's done in two instructions,
-	 * resetting rax register to 0 (xor on eax gets 0 extended), and
-	 * moving it to the counter location.
-	 */
+	if (!ebpf_from_cbpf) {
+		/* Clear the tail call counter (tail_call_cnt): for eBPF tail
+		 * calls we need to reset the counter to 0. It's done in two
+		 * instructions, resetting rax register to 0, and moving it
+		 * to the counter location.
+		 */
 
-	/* xor eax, eax */
-	EMIT2(0x31, 0xc0);
-	/* mov qword ptr [rbp+32], rax */
-	EMIT4(0x48, 0x89, 0x45, 32);
+		/* xor eax, eax */
+		EMIT2(0x31, 0xc0);
+		/* mov qword ptr [rbp+32], rax */
+		EMIT4(0x48, 0x89, 0x45, 32);
+
+		BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
+	}
 
-	BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
 	*pprog = prog;
 }
 
@@ -356,6 +354,86 @@ static void emit_load_skb_data_hlen(u8 **pprog)
 	*pprog = prog;
 }
 
+static void emit_mov_imm32(u8 **pprog, bool sign_propagate,
+			   u32 dst_reg, const u32 imm32)
+{
+	u8 *prog = *pprog;
+	u8 b1, b2, b3;
+	int cnt = 0;
+
+	/* optimization: if imm32 is positive, use 'mov %eax, imm32'
+	 * (which zero-extends imm32) to save 2 bytes.
+	 */
+	if (sign_propagate && (s32)imm32 < 0) {
+		/* 'mov %rax, imm32' sign extends imm32 */
+		b1 = add_1mod(0x48, dst_reg);
+		b2 = 0xC7;
+		b3 = 0xC0;
+		EMIT3_off32(b1, b2, add_1reg(b3, dst_reg), imm32);
+		goto done;
+	}
+
+	/* optimization: if imm32 is zero, use 'xor %eax, %eax'
+	 * to save 3 bytes.
+	 */
+	if (imm32 == 0) {
+		if (is_ereg(dst_reg))
+			EMIT1(add_2mod(0x40, dst_reg, dst_reg));
+		b2 = 0x31; /* xor */
+		b3 = 0xC0;
+		EMIT2(b2, add_2reg(b3, dst_reg, dst_reg));
+		goto done;
+	}
+
+	/* mov %eax, imm32 */
+	if (is_ereg(dst_reg))
+		EMIT1(add_1mod(0x40, dst_reg));
+	EMIT1_off32(add_1reg(0xB8, dst_reg), imm32);
+done:
+	*pprog = prog;
+}
+
+static void emit_mov_imm64(u8 **pprog, u32 dst_reg,
+			   const u32 imm32_hi, const u32 imm32_lo)
+{
+	u8 *prog = *pprog;
+	int cnt = 0;
+
+	if (is_uimm32(((u64)imm32_hi << 32) | (u32)imm32_lo)) {
+		/* For emitting plain u32, where sign bit must not be
+		 * propagated LLVM tends to load imm64 over mov32
+		 * directly, so save couple of bytes by just doing
+		 * 'mov %eax, imm32' instead.
+		 */
+		emit_mov_imm32(&prog, false, dst_reg, imm32_lo);
+	} else {
+		/* movabsq %rax, imm64 */
+		EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg));
+		EMIT(imm32_lo, 4);
+		EMIT(imm32_hi, 4);
+	}
+
+	*pprog = prog;
+}
+
+static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg)
+{
+	u8 *prog = *pprog;
+	int cnt = 0;
+
+	if (is64) {
+		/* mov dst, src */
+		EMIT_mov(dst_reg, src_reg);
+	} else {
+		/* mov32 dst, src */
+		if (is_ereg(dst_reg) || is_ereg(src_reg))
+			EMIT1(add_2mod(0x40, dst_reg, src_reg));
+		EMIT2(0x89, add_2reg(0xC0, dst_reg, src_reg));
+	}
+
+	*pprog = prog;
+}
+
 static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 		  int oldproglen, struct jit_context *ctx)
 {
@@ -369,7 +447,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 	int proglen = 0;
 	u8 *prog = temp;
 
-	emit_prologue(&prog, bpf_prog->aux->stack_depth);
+	emit_prologue(&prog, bpf_prog->aux->stack_depth,
+		      bpf_prog_was_classic(bpf_prog));
 
 	if (seen_ld_abs)
 		emit_load_skb_data_hlen(&prog);
@@ -378,7 +457,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 		const s32 imm32 = insn->imm;
 		u32 dst_reg = insn->dst_reg;
 		u32 src_reg = insn->src_reg;
-		u8 b1 = 0, b2 = 0, b3 = 0;
+		u8 b2 = 0, b3 = 0;
 		s64 jmp_offset;
 		u8 jmp_cond;
 		bool reload_skb_data;
@@ -414,16 +493,11 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 			EMIT2(b2, add_2reg(0xC0, dst_reg, src_reg));
 			break;
 
-			/* mov dst, src */
 		case BPF_ALU64 | BPF_MOV | BPF_X:
-			EMIT_mov(dst_reg, src_reg);
-			break;
-
-			/* mov32 dst, src */
 		case BPF_ALU | BPF_MOV | BPF_X:
-			if (is_ereg(dst_reg) || is_ereg(src_reg))
-				EMIT1(add_2mod(0x40, dst_reg, src_reg));
-			EMIT2(0x89, add_2reg(0xC0, dst_reg, src_reg));
+			emit_mov_reg(&prog,
+				     BPF_CLASS(insn->code) == BPF_ALU64,
+				     dst_reg, src_reg);
 			break;
 
 			/* neg dst */
@@ -486,58 +560,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 			break;
 
 		case BPF_ALU64 | BPF_MOV | BPF_K:
-			/* optimization: if imm32 is positive,
-			 * use 'mov eax, imm32' (which zero-extends imm32)
-			 * to save 2 bytes
-			 */
-			if (imm32 < 0) {
-				/* 'mov rax, imm32' sign extends imm32 */
-				b1 = add_1mod(0x48, dst_reg);
-				b2 = 0xC7;
-				b3 = 0xC0;
-				EMIT3_off32(b1, b2, add_1reg(b3, dst_reg), imm32);
-				break;
-			}
-
 		case BPF_ALU | BPF_MOV | BPF_K:
-			/* optimization: if imm32 is zero, use 'xor <dst>,<dst>'
-			 * to save 3 bytes.
-			 */
-			if (imm32 == 0) {
-				if (is_ereg(dst_reg))
-					EMIT1(add_2mod(0x40, dst_reg, dst_reg));
-				b2 = 0x31; /* xor */
-				b3 = 0xC0;
-				EMIT2(b2, add_2reg(b3, dst_reg, dst_reg));
-				break;
-			}
-
-			/* mov %eax, imm32 */
-			if (is_ereg(dst_reg))
-				EMIT1(add_1mod(0x40, dst_reg));
-			EMIT1_off32(add_1reg(0xB8, dst_reg), imm32);
+			emit_mov_imm32(&prog, BPF_CLASS(insn->code) == BPF_ALU64,
+				       dst_reg, imm32);
 			break;
 
 		case BPF_LD | BPF_IMM | BPF_DW:
-			/* optimization: if imm64 is zero, use 'xor <dst>,<dst>'
-			 * to save 7 bytes.
-			 */
-			if (insn[0].imm == 0 && insn[1].imm == 0) {
-				b1 = add_2mod(0x48, dst_reg, dst_reg);
-				b2 = 0x31; /* xor */
-				b3 = 0xC0;
-				EMIT3(b1, b2, add_2reg(b3, dst_reg, dst_reg));
-
-				insn++;
-				i++;
-				break;
-			}
-
-			/* movabsq %rax, imm64 */
-			EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg));
-			EMIT(insn[0].imm, 4);
-			EMIT(insn[1].imm, 4);
-
+			emit_mov_imm64(&prog, dst_reg, insn[1].imm, insn[0].imm);
 			insn++;
 			i++;
 			break;
@@ -594,36 +623,38 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 		case BPF_ALU | BPF_MUL | BPF_X:
 		case BPF_ALU64 | BPF_MUL | BPF_K:
 		case BPF_ALU64 | BPF_MUL | BPF_X:
-			EMIT1(0x50); /* push rax */
-			EMIT1(0x52); /* push rdx */
+		{
+			bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
+
+			if (dst_reg != BPF_REG_0)
+				EMIT1(0x50); /* push rax */
+			if (dst_reg != BPF_REG_3)
+				EMIT1(0x52); /* push rdx */
 
 			/* mov r11, dst_reg */
 			EMIT_mov(AUX_REG, dst_reg);
 
 			if (BPF_SRC(insn->code) == BPF_X)
-				/* mov rax, src_reg */
-				EMIT_mov(BPF_REG_0, src_reg);
+				emit_mov_reg(&prog, is64, BPF_REG_0, src_reg);
 			else
-				/* mov rax, imm32 */
-				EMIT3_off32(0x48, 0xC7, 0xC0, imm32);
+				emit_mov_imm32(&prog, is64, BPF_REG_0, imm32);
 
-			if (BPF_CLASS(insn->code) == BPF_ALU64)
+			if (is64)
 				EMIT1(add_1mod(0x48, AUX_REG));
 			else if (is_ereg(AUX_REG))
 				EMIT1(add_1mod(0x40, AUX_REG));
 			/* mul(q) r11 */
 			EMIT2(0xF7, add_1reg(0xE0, AUX_REG));
 
-			/* mov r11, rax */
-			EMIT_mov(AUX_REG, BPF_REG_0);
-
-			EMIT1(0x5A); /* pop rdx */
-			EMIT1(0x58); /* pop rax */
-
-			/* mov dst_reg, r11 */
-			EMIT_mov(dst_reg, AUX_REG);
+			if (dst_reg != BPF_REG_3)
+				EMIT1(0x5A); /* pop rdx */
+			if (dst_reg != BPF_REG_0) {
+				/* mov dst_reg, rax */
+				EMIT_mov(dst_reg, BPF_REG_0);
+				EMIT1(0x58); /* pop rax */
+			}
 			break;
-
+		}
 			/* shifts */
 		case BPF_ALU | BPF_LSH | BPF_K:
 		case BPF_ALU | BPF_RSH | BPF_K:
@@ -641,7 +672,11 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 			case BPF_RSH: b3 = 0xE8; break;
 			case BPF_ARSH: b3 = 0xF8; break;
 			}
-			EMIT3(0xC1, add_1reg(b3, dst_reg), imm32);
+
+			if (imm32 == 1)
+				EMIT2(0xD1, add_1reg(b3, dst_reg));
+			else
+				EMIT3(0xC1, add_1reg(b3, dst_reg), imm32);
 			break;
 
 		case BPF_ALU | BPF_LSH | BPF_X:
@@ -1222,7 +1257,6 @@ skip_init_addrs:
 		bpf_jit_dump(prog->len, proglen, pass + 1, image);
 
 	if (image) {
-		bpf_flush_icache(header, image + proglen);
 		if (!prog->is_func || extra_pass) {
 			bpf_jit_binary_lock_ro(header);
 		} else {
diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c
index 0277f36be85b..6e737142ceaa 100644
--- a/drivers/atm/idt77252.c
+++ b/drivers/atm/idt77252.c
@@ -3173,14 +3173,10 @@ static void init_sram(struct idt77252_dev *card)
 				    (u32) 0xffffffff);
 	}
 
-	writel((SAR_FBQ0_LOW << 28) | 0x00000000 | 0x00000000 |
-	       (SAR_FB_SIZE_0 / 48), SAR_REG_FBQS0);
-	writel((SAR_FBQ1_LOW << 28) | 0x00000000 | 0x00000000 |
-	       (SAR_FB_SIZE_1 / 48), SAR_REG_FBQS1);
-	writel((SAR_FBQ2_LOW << 28) | 0x00000000 | 0x00000000 |
-	       (SAR_FB_SIZE_2 / 48), SAR_REG_FBQS2);
-	writel((SAR_FBQ3_LOW << 28) | 0x00000000 | 0x00000000 |
-	       (SAR_FB_SIZE_3 / 48), SAR_REG_FBQS3);
+	writel((SAR_FBQ0_LOW << 28) | (SAR_FB_SIZE_0 / 48), SAR_REG_FBQS0);
+	writel((SAR_FBQ1_LOW << 28) | (SAR_FB_SIZE_1 / 48), SAR_REG_FBQS1);
+	writel((SAR_FBQ2_LOW << 28) | (SAR_FB_SIZE_2 / 48), SAR_REG_FBQS2);
+	writel((SAR_FBQ3_LOW << 28) | (SAR_FB_SIZE_3 / 48), SAR_REG_FBQS3);
 
 	/* Initialize rate table  */
 	for (i = 0; i < 256; i++) {
diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c
index 204afe66de92..3d7a5c149af3 100644
--- a/drivers/bluetooth/ath3k.c
+++ b/drivers/bluetooth/ath3k.c
@@ -203,6 +203,12 @@ static const struct usb_device_id ath3k_blist_tbl[] = {
 	{ }	/* Terminating entry */
 };
 
+static inline void ath3k_log_failed_loading(int err, int len, int size)
+{
+	BT_ERR("Error in firmware loading err = %d, len = %d, size = %d",
+			err, len, size);
+}
+
 #define USB_REQ_DFU_DNLOAD	1
 #define BULK_SIZE		4096
 #define FW_HDR_SIZE		20
@@ -227,15 +233,16 @@ static int ath3k_load_firmware(struct usb_device *udev,
 		return -ENOMEM;
 	}
 
-	memcpy(send_buf, firmware->data, 20);
+	memcpy(send_buf, firmware->data, FW_HDR_SIZE);
 	err = usb_control_msg(udev, pipe, USB_REQ_DFU_DNLOAD, USB_TYPE_VENDOR,
-			      0, 0, send_buf, 20, USB_CTRL_SET_TIMEOUT);
+			      0, 0, send_buf, FW_HDR_SIZE,
+			      USB_CTRL_SET_TIMEOUT);
 	if (err < 0) {
 		BT_ERR("Can't change to loading configuration err");
 		goto error;
 	}
-	sent += 20;
-	count -= 20;
+	sent += FW_HDR_SIZE;
+	count -= FW_HDR_SIZE;
 
 	pipe = usb_sndbulkpipe(udev, 0x02);
 
@@ -250,8 +257,7 @@ static int ath3k_load_firmware(struct usb_device *udev,
 					&len, 3000);
 
 		if (err || (len != size)) {
-			BT_ERR("Error in firmware loading err = %d,"
-				"len = %d, size = %d", err, len, size);
+			ath3k_log_failed_loading(err, len, size);
 			goto error;
 		}
 
@@ -350,8 +356,7 @@ static int ath3k_load_fwfile(struct usb_device *udev,
 		err = usb_bulk_msg(udev, pipe, send_buf, size,
 					&len, 3000);
 		if (err || (len != size)) {
-			BT_ERR("Error in firmware loading err = %d,"
-				"len = %d, size = %d", err, len, size);
+			ath3k_log_failed_loading(err, len, size);
 			kfree(send_buf);
 			return err;
 		}
@@ -398,7 +403,7 @@ static int ath3k_set_normal_mode(struct usb_device *udev)
 static int ath3k_load_patch(struct usb_device *udev)
 {
 	unsigned char fw_state;
-	char filename[ATH3K_NAME_LEN] = {0};
+	char filename[ATH3K_NAME_LEN];
 	const struct firmware *firmware;
 	struct ath3k_version fw_version;
 	__u32 pt_rom_version, pt_build_version;
@@ -451,7 +456,7 @@ static int ath3k_load_patch(struct usb_device *udev)
 static int ath3k_load_syscfg(struct usb_device *udev)
 {
 	unsigned char fw_state;
-	char filename[ATH3K_NAME_LEN] = {0};
+	char filename[ATH3K_NAME_LEN];
 	const struct firmware *firmware;
 	struct ath3k_version fw_version;
 	int clk_value, ret;
@@ -522,7 +527,6 @@ static int ath3k_probe(struct usb_interface *intf,
 
 	/* load patch and sysconfig files for AR3012 */
 	if (id->driver_info & BTUSB_ATH3012) {
-
 		/* New firmware with patch and sysconfig files already loaded */
 		if (le16_to_cpu(udev->descriptor.bcdDevice) > 0x0001)
 			return -ENODEV;
@@ -565,7 +569,7 @@ static int ath3k_probe(struct usb_interface *intf,
 
 static void ath3k_disconnect(struct usb_interface *intf)
 {
-	BT_DBG("ath3k_disconnect intf %p", intf);
+	BT_DBG("%s intf %p", __func__, intf);
 }
 
 static struct usb_driver ath3k_driver = {
diff --git a/drivers/bluetooth/btmrvl_main.c b/drivers/bluetooth/btmrvl_main.c
index b280d466f05b..f6c694a1b9b0 100644
--- a/drivers/bluetooth/btmrvl_main.c
+++ b/drivers/bluetooth/btmrvl_main.c
@@ -183,7 +183,7 @@ static int btmrvl_send_sync_cmd(struct btmrvl_private *priv, u16 opcode,
 		return -EFAULT;
 	}
 
-	skb = bt_skb_alloc(HCI_COMMAND_HDR_SIZE + len, GFP_ATOMIC);
+	skb = bt_skb_alloc(HCI_COMMAND_HDR_SIZE + len, GFP_KERNEL);
 	if (!skb) {
 		BT_ERR("No free skb");
 		return -ENOMEM;
diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c
index 6e2ad748abba..437f080deaab 100644
--- a/drivers/bluetooth/btrtl.c
+++ b/drivers/bluetooth/btrtl.c
@@ -35,6 +35,60 @@
 #define RTL_ROM_LMP_8761A	0x8761
 #define RTL_ROM_LMP_8822B	0x8822
 
+#define IC_MATCH_FL_LMPSUBV	(1 << 0)
+#define IC_MATCH_FL_HCIREV	(1 << 1)
+#define IC_INFO(lmps, hcir) \
+	.match_flags = IC_MATCH_FL_LMPSUBV | IC_MATCH_FL_HCIREV, \
+	.lmp_subver = (lmps), \
+	.hci_rev = (hcir)
+
+struct id_table {
+	__u16 match_flags;
+	__u16 lmp_subver;
+	__u16 hci_rev;
+	bool config_needed;
+	char *fw_name;
+	char *cfg_name;
+};
+
+static const struct id_table ic_id_table[] = {
+	/* 8723B */
+	{ IC_INFO(RTL_ROM_LMP_8723B, 0xb),
+	  .config_needed = false,
+	  .fw_name  = "rtl_bt/rtl8723b_fw.bin",
+	  .cfg_name = "rtl_bt/rtl8723b_config.bin" },
+
+	/* 8723D */
+	{ IC_INFO(RTL_ROM_LMP_8723B, 0xd),
+	  .config_needed = true,
+	  .fw_name  = "rtl_bt/rtl8723d_fw.bin",
+	  .cfg_name = "rtl_bt/rtl8723d_config.bin" },
+
+	/* 8821A */
+	{ IC_INFO(RTL_ROM_LMP_8821A, 0xa),
+	  .config_needed = false,
+	  .fw_name  = "rtl_bt/rtl8821a_fw.bin",
+	  .cfg_name = "rtl_bt/rtl8821a_config.bin" },
+
+	/* 8821C */
+	{ IC_INFO(RTL_ROM_LMP_8821A, 0xc),
+	  .config_needed = false,
+	  .fw_name  = "rtl_bt/rtl8821c_fw.bin",
+	  .cfg_name = "rtl_bt/rtl8821c_config.bin" },
+
+	/* 8761A */
+	{ IC_MATCH_FL_LMPSUBV, RTL_ROM_LMP_8761A, 0x0,
+	  .config_needed = false,
+	  .fw_name  = "rtl_bt/rtl8761a_fw.bin",
+	  .cfg_name = "rtl_bt/rtl8761a_config.bin" },
+
+	/* 8822B */
+	{ IC_INFO(RTL_ROM_LMP_8822B, 0xb),
+	  .config_needed = true,
+	  .fw_name  = "rtl_bt/rtl8822b_fw.bin",
+	  .cfg_name = "rtl_bt/rtl8822b_config.bin" },
+	};
+
 static int rtl_read_rom_version(struct hci_dev *hdev, u8 *version)
 {
 	struct rtl_rom_version_evt *rom_version;
@@ -64,9 +118,9 @@ static int rtl_read_rom_version(struct hci_dev *hdev, u8 *version)
 	return 0;
 }
 
-static int rtl8723b_parse_firmware(struct hci_dev *hdev, u16 lmp_subver,
-				   const struct firmware *fw,
-				   unsigned char **_buf)
+static int rtlbt_parse_firmware(struct hci_dev *hdev, u16 lmp_subver,
+				const struct firmware *fw,
+				unsigned char **_buf)
 {
 	const u8 extension_sig[] = { 0x51, 0x04, 0xfd, 0x77 };
 	struct rtl_epatch_header *epatch_info;
@@ -88,6 +142,8 @@ static int rtl8723b_parse_firmware(struct hci_dev *hdev, u16 lmp_subver,
 		{ RTL_ROM_LMP_8821A, 2 },
 		{ RTL_ROM_LMP_8761A, 3 },
 		{ RTL_ROM_LMP_8822B, 8 },
+		{ RTL_ROM_LMP_8723B, 9 },	/* 8723D */
+		{ RTL_ROM_LMP_8821A, 10 },	/* 8821C */
 	};
 
 	ret = rtl_read_rom_version(hdev, &rom_version);
@@ -320,8 +376,8 @@ out:
 	return ret;
 }
 
-static int btrtl_setup_rtl8723b(struct hci_dev *hdev, u16 lmp_subver,
-				const char *fw_name)
+static int btrtl_setup_rtl8723b(struct hci_dev *hdev, u16 hci_rev,
+				u16 lmp_subver)
 {
 	unsigned char *fw_data = NULL;
 	const struct firmware *fw;
@@ -330,39 +386,40 @@ static int btrtl_setup_rtl8723b(struct hci_dev *hdev, u16 lmp_subver,
 	u8 *cfg_buff = NULL;
 	u8 *tbuff;
 	char *cfg_name = NULL;
-	bool config_needed = false;
+	char *fw_name = NULL;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ic_id_table); i++) {
+		if ((ic_id_table[i].match_flags & IC_MATCH_FL_LMPSUBV) &&
+		    (ic_id_table[i].lmp_subver != lmp_subver))
+			continue;
+		if ((ic_id_table[i].match_flags & IC_MATCH_FL_HCIREV) &&
+		    (ic_id_table[i].hci_rev != hci_rev))
+			continue;
 
-	switch (lmp_subver) {
-	case RTL_ROM_LMP_8723B:
-		cfg_name = "rtl_bt/rtl8723b_config.bin";
-		break;
-	case RTL_ROM_LMP_8821A:
-		cfg_name = "rtl_bt/rtl8821a_config.bin";
-		break;
-	case RTL_ROM_LMP_8761A:
-		cfg_name = "rtl_bt/rtl8761a_config.bin";
-		break;
-	case RTL_ROM_LMP_8822B:
-		cfg_name = "rtl_bt/rtl8822b_config.bin";
-		config_needed = true;
-		break;
-	default:
-		BT_ERR("%s: rtl: no config according to lmp_subver %04x",
-		       hdev->name, lmp_subver);
 		break;
 	}
 
+	if (i >= ARRAY_SIZE(ic_id_table)) {
+		BT_ERR("%s: unknown IC info, lmp subver %04x, hci rev %04x",
+		       hdev->name, lmp_subver, hci_rev);
+		return -EINVAL;
+	}
+
+	cfg_name = ic_id_table[i].cfg_name;
+
 	if (cfg_name) {
 		cfg_sz = rtl_load_config(hdev, cfg_name, &cfg_buff);
 		if (cfg_sz < 0) {
 			cfg_sz = 0;
-			if (config_needed)
+			if (ic_id_table[i].config_needed)
 				BT_ERR("Necessary config file %s not found\n",
 				       cfg_name);
 		}
 	} else
 		cfg_sz = 0;
 
+	fw_name = ic_id_table[i].fw_name;
 	bt_dev_info(hdev, "rtl: loading %s", fw_name);
 	ret = request_firmware(&fw, fw_name, &hdev->dev);
 	if (ret < 0) {
@@ -370,7 +427,7 @@ static int btrtl_setup_rtl8723b(struct hci_dev *hdev, u16 lmp_subver,
 		goto err_req_fw;
 	}
 
-	ret = rtl8723b_parse_firmware(hdev, lmp_subver, fw, &fw_data);
+	ret = rtlbt_parse_firmware(hdev, lmp_subver, fw, &fw_data);
 	if (ret < 0)
 		goto out;
 
@@ -429,7 +486,7 @@ int btrtl_setup_realtek(struct hci_dev *hdev)
 {
 	struct sk_buff *skb;
 	struct hci_rp_read_local_version *resp;
-	u16 lmp_subver;
+	u16 hci_rev, lmp_subver;
 
 	skb = btrtl_read_local_version(hdev);
 	if (IS_ERR(skb))
@@ -441,6 +498,7 @@ int btrtl_setup_realtek(struct hci_dev *hdev)
 		    resp->hci_ver, resp->hci_rev,
 		    resp->lmp_ver, resp->lmp_subver);
 
+	hci_rev = le16_to_cpu(resp->hci_rev);
 	lmp_subver = le16_to_cpu(resp->lmp_subver);
 	kfree_skb(skb);
 
@@ -455,17 +513,10 @@ int btrtl_setup_realtek(struct hci_dev *hdev)
 	case RTL_ROM_LMP_3499:
 		return btrtl_setup_rtl8723a(hdev);
 	case RTL_ROM_LMP_8723B:
-		return btrtl_setup_rtl8723b(hdev, lmp_subver,
-					    "rtl_bt/rtl8723b_fw.bin");
 	case RTL_ROM_LMP_8821A:
-		return btrtl_setup_rtl8723b(hdev, lmp_subver,
-					    "rtl_bt/rtl8821a_fw.bin");
 	case RTL_ROM_LMP_8761A:
-		return btrtl_setup_rtl8723b(hdev, lmp_subver,
-					    "rtl_bt/rtl8761a_fw.bin");
 	case RTL_ROM_LMP_8822B:
-		return btrtl_setup_rtl8723b(hdev, lmp_subver,
-					    "rtl_bt/rtl8822b_fw.bin");
+		return btrtl_setup_rtl8723b(hdev, hci_rev, lmp_subver);
 	default:
 		bt_dev_info(hdev, "rtl: assuming no firmware upload needed");
 		return 0;
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 366a49c7c08f..5cd868ea28ed 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -340,6 +340,7 @@ static const struct usb_device_id blacklist_table[] = {
 
 	/* Intel Bluetooth devices */
 	{ USB_DEVICE(0x8087, 0x0025), .driver_info = BTUSB_INTEL_NEW },
+	{ USB_DEVICE(0x8087, 0x0026), .driver_info = BTUSB_INTEL_NEW },
 	{ USB_DEVICE(0x8087, 0x07da), .driver_info = BTUSB_CSR },
 	{ USB_DEVICE(0x8087, 0x07dc), .driver_info = BTUSB_INTEL },
 	{ USB_DEVICE(0x8087, 0x0a2a), .driver_info = BTUSB_INTEL },
@@ -374,6 +375,9 @@ static const struct usb_device_id blacklist_table[] = {
 	{ USB_DEVICE(0x13d3, 0x3461), .driver_info = BTUSB_REALTEK },
 	{ USB_DEVICE(0x13d3, 0x3462), .driver_info = BTUSB_REALTEK },
 
+	/* Additional Realtek 8822BE Bluetooth devices */
+	{ USB_DEVICE(0x0b05, 0x185c), .driver_info = BTUSB_REALTEK },
+
 	/* Silicon Wave based devices */
 	{ USB_DEVICE(0x0c10, 0x0000), .driver_info = BTUSB_SWAVE },
 
@@ -2073,6 +2077,8 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
 	case 0x0c:	/* WsP */
 	case 0x11:	/* JfP */
 	case 0x12:	/* ThP */
+	case 0x13:	/* HrP */
+	case 0x14:	/* QnJ, IcP */
 		break;
 	default:
 		BT_ERR("%s: Unsupported Intel hardware variant (%u)",
@@ -2165,6 +2171,8 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
 		break;
 	case 0x11:	/* JfP */
 	case 0x12:	/* ThP */
+	case 0x13:	/* HrP */
+	case 0x14:	/* QnJ, IcP */
 		snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u-%u.sfi",
 			 le16_to_cpu(ver.hw_variant),
 			 le16_to_cpu(ver.hw_revision),
@@ -2196,6 +2204,8 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
 		break;
 	case 0x11:	/* JfP */
 	case 0x12:	/* ThP */
+	case 0x13:	/* HrP */
+	case 0x14:	/* QnJ, IcP */
 		snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u-%u.ddc",
 			 le16_to_cpu(ver.hw_variant),
 			 le16_to_cpu(ver.hw_revision),
diff --git a/drivers/bluetooth/hci_ath.c b/drivers/bluetooth/hci_ath.c
index 14ae7ee88acb..d568fbd94d6c 100644
--- a/drivers/bluetooth/hci_ath.c
+++ b/drivers/bluetooth/hci_ath.c
@@ -71,12 +71,12 @@ static int ath_wakeup_ar3k(struct tty_struct *tty)
 	/* Clear RTS first */
 	tty->driver->ops->tiocmget(tty);
 	tty->driver->ops->tiocmset(tty, 0x00, TIOCM_RTS);
-	mdelay(20);
+	msleep(20);
 
 	/* Set RTS, wake up board */
 	tty->driver->ops->tiocmget(tty);
 	tty->driver->ops->tiocmset(tty, TIOCM_RTS, 0x00);
-	mdelay(20);
+	msleep(20);
 
 	status = tty->driver->ops->tiocmget(tty);
 	return status;
diff --git a/drivers/bluetooth/hci_ll.c b/drivers/bluetooth/hci_ll.c
index 1b4417a623a4..2f30dcad96bd 100644
--- a/drivers/bluetooth/hci_ll.c
+++ b/drivers/bluetooth/hci_ll.c
@@ -650,7 +650,7 @@ static int download_firmware(struct ll_device *lldev)
 			break;
 		case ACTION_DELAY:	/* sleep */
 			bt_dev_info(lldev->hu.hdev, "sleep command in scr");
-			mdelay(((struct bts_action_delay *)action_ptr)->msec);
+			msleep(((struct bts_action_delay *)action_ptr)->msec);
 			break;
 		}
 		len -= (sizeof(struct bts_action) +
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index a5367c5efbe7..66f203730e80 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1334,7 +1334,7 @@ static bool validate_ipv6_net_dev(struct net_device *net_dev,
 			   IPV6_ADDR_LINKLOCAL;
 	struct rt6_info *rt = rt6_lookup(dev_net(net_dev), &dst_addr->sin6_addr,
 					 &src_addr->sin6_addr, net_dev->ifindex,
-					 strict);
+					 NULL, strict);
 	bool ret;
 
 	if (!rt)
@@ -4554,6 +4554,7 @@ static struct pernet_operations cma_pernet_operations = {
 	.exit = cma_exit_net,
 	.id = &cma_pernet_id,
 	.size = sizeof(struct cma_pernet),
+	.async = true,
 };
 
 static int __init cma_init(void)
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 7a9d0de89d6a..e96771ddc9a7 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -1217,6 +1217,7 @@ static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state)
 		if (ctx->dev)
 			c4iw_remove(ctx);
 		break;
+	case CXGB4_STATE_FATAL_ERROR:
 	case CXGB4_STATE_START_RECOVERY:
 		pr_info("%s: Fatal Error\n", pci_name(ctx->lldi.pdev));
 		if (ctx->dev) {
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index bc6299697dda..d42b922bede8 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -2,3 +2,4 @@ obj-$(CONFIG_MLX5_INFINIBAND)	+= mlx5_ib.o
 
 mlx5_ib-y :=	main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o cong.o
 mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
+mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 15457c9569a7..94a27d89a303 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -64,14 +64,9 @@ static void mlx5_ib_cq_event(struct mlx5_core_cq *mcq, enum mlx5_event type)
 	}
 }
 
-static void *get_cqe_from_buf(struct mlx5_ib_cq_buf *buf, int n, int size)
-{
-	return mlx5_buf_offset(&buf->buf, n * size);
-}
-
 static void *get_cqe(struct mlx5_ib_cq *cq, int n)
 {
-	return get_cqe_from_buf(&cq->buf, n, cq->mcq.cqe_sz);
+	return mlx5_frag_buf_get_wqe(&cq->buf.fbc, n);
 }
 
 static u8 sw_ownership_bit(int n, int nent)
@@ -404,7 +399,7 @@ static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
 
 static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
 {
-	mlx5_buf_free(dev->mdev, &buf->buf);
+	mlx5_frag_buf_free(dev->mdev, &buf->fbc.frag_buf);
 }
 
 static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe,
@@ -725,12 +720,25 @@ int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
 	return ret;
 }
 
-static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf,
-			int nent, int cqe_size)
+static int alloc_cq_frag_buf(struct mlx5_ib_dev *dev,
+			     struct mlx5_ib_cq_buf *buf,
+			     int nent,
+			     int cqe_size)
 {
+	struct mlx5_frag_buf_ctrl *c = &buf->fbc;
+	struct mlx5_frag_buf *frag_buf = &c->frag_buf;
+	u32 cqc_buff[MLX5_ST_SZ_DW(cqc)] = {0};
 	int err;
 
-	err = mlx5_buf_alloc(dev->mdev, nent * cqe_size, &buf->buf);
+	MLX5_SET(cqc, cqc_buff, log_cq_size, ilog2(cqe_size));
+	MLX5_SET(cqc, cqc_buff, cqe_sz, (cqe_size == 128) ? 1 : 0);
+
+	mlx5_core_init_cq_frag_buf(&buf->fbc, cqc_buff);
+
+	err = mlx5_frag_buf_alloc_node(dev->mdev,
+				       nent * cqe_size,
+				       frag_buf,
+				       dev->mdev->priv.numa_node);
 	if (err)
 		return err;
 
@@ -863,14 +871,15 @@ static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_ucontext *context)
 	ib_umem_release(cq->buf.umem);
 }
 
-static void init_cq_buf(struct mlx5_ib_cq *cq, struct mlx5_ib_cq_buf *buf)
+static void init_cq_frag_buf(struct mlx5_ib_cq *cq,
+			     struct mlx5_ib_cq_buf *buf)
 {
 	int i;
 	void *cqe;
 	struct mlx5_cqe64 *cqe64;
 
 	for (i = 0; i < buf->nent; i++) {
-		cqe = get_cqe_from_buf(buf, i, buf->cqe_size);
+		cqe = get_cqe(cq, i);
 		cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64;
 		cqe64->op_own = MLX5_CQE_INVALID << 4;
 	}
@@ -892,14 +901,15 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
 	cq->mcq.arm_db     = cq->db.db + 1;
 	cq->mcq.cqe_sz = cqe_size;
 
-	err = alloc_cq_buf(dev, &cq->buf, entries, cqe_size);
+	err = alloc_cq_frag_buf(dev, &cq->buf, entries, cqe_size);
 	if (err)
 		goto err_db;
 
-	init_cq_buf(cq, &cq->buf);
+	init_cq_frag_buf(cq, &cq->buf);
 
 	*inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
-		 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * cq->buf.buf.npages;
+		 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) *
+		 cq->buf.fbc.frag_buf.npages;
 	*cqb = kvzalloc(*inlen, GFP_KERNEL);
 	if (!*cqb) {
 		err = -ENOMEM;
@@ -907,11 +917,12 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
 	}
 
 	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas);
-	mlx5_fill_page_array(&cq->buf.buf, pas);
+	mlx5_fill_page_frag_array(&cq->buf.fbc.frag_buf, pas);
 
 	cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context);
 	MLX5_SET(cqc, cqc, log_page_size,
-		 cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+		 cq->buf.fbc.frag_buf.page_shift -
+		 MLX5_ADAPTER_PAGE_SHIFT);
 
 	*index = dev->mdev->priv.uar->index;
 
@@ -1213,11 +1224,11 @@ static int resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
 	if (!cq->resize_buf)
 		return -ENOMEM;
 
-	err = alloc_cq_buf(dev, cq->resize_buf, entries, cqe_size);
+	err = alloc_cq_frag_buf(dev, cq->resize_buf, entries, cqe_size);
 	if (err)
 		goto ex;
 
-	init_cq_buf(cq, cq->resize_buf);
+	init_cq_frag_buf(cq, cq->resize_buf);
 
 	return 0;
 
@@ -1262,9 +1273,8 @@ static int copy_resize_cqes(struct mlx5_ib_cq *cq)
 	}
 
 	while ((scqe64->op_own >> 4) != MLX5_CQE_RESIZE_CQ) {
-		dcqe = get_cqe_from_buf(cq->resize_buf,
-					(i + 1) & (cq->resize_buf->nent),
-					dsize);
+		dcqe = mlx5_frag_buf_get_wqe(&cq->resize_buf->fbc,
+					     (i + 1) & cq->resize_buf->nent);
 		dcqe64 = dsize == 64 ? dcqe : dcqe + 64;
 		sw_own = sw_ownership_bit(i + 1, cq->resize_buf->nent);
 		memcpy(dcqe, scqe, dsize);
@@ -1330,8 +1340,11 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
 		cqe_size = 64;
 		err = resize_kernel(dev, cq, entries, cqe_size);
 		if (!err) {
-			npas = cq->resize_buf->buf.npages;
-			page_shift = cq->resize_buf->buf.page_shift;
+			struct mlx5_frag_buf_ctrl *c;
+
+			c = &cq->resize_buf->fbc;
+			npas = c->frag_buf.npages;
+			page_shift = c->frag_buf.page_shift;
 		}
 	}
 
@@ -1352,7 +1365,8 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
 		mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift,
 				     pas, 0);
 	else
-		mlx5_fill_page_array(&cq->resize_buf->buf, pas);
+		mlx5_fill_page_frag_array(&cq->resize_buf->fbc.frag_buf,
+					  pas);
 
 	MLX5_SET(modify_cq_in, in,
 		 modify_field_select_resize_field_select.resize_field_select.resize_field_select,
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
new file mode 100644
index 000000000000..0e04fdddf670
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/ib_rep.c
@@ -0,0 +1,192 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ */
+
+#include "ib_rep.h"
+
+static const struct mlx5_ib_profile rep_profile = {
+	STAGE_CREATE(MLX5_IB_STAGE_INIT,
+		     mlx5_ib_stage_init_init,
+		     mlx5_ib_stage_init_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
+		     mlx5_ib_stage_rep_flow_db_init,
+		     NULL),
+	STAGE_CREATE(MLX5_IB_STAGE_CAPS,
+		     mlx5_ib_stage_caps_init,
+		     NULL),
+	STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
+		     mlx5_ib_stage_rep_non_default_cb,
+		     NULL),
+	STAGE_CREATE(MLX5_IB_STAGE_ROCE,
+		     mlx5_ib_stage_rep_roce_init,
+		     mlx5_ib_stage_rep_roce_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
+		     mlx5_ib_stage_dev_res_init,
+		     mlx5_ib_stage_dev_res_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
+		     mlx5_ib_stage_counters_init,
+		     mlx5_ib_stage_counters_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_BFREG,
+		     mlx5_ib_stage_bfrag_init,
+		     mlx5_ib_stage_bfrag_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR,
+		     NULL,
+		     mlx5_ib_stage_pre_ib_reg_umr_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
+		     mlx5_ib_stage_ib_reg_init,
+		     mlx5_ib_stage_ib_reg_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR,
+		     mlx5_ib_stage_post_ib_reg_umr_init,
+		     NULL),
+	STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR,
+		     mlx5_ib_stage_class_attr_init,
+		     NULL),
+};
+
+static int
+mlx5_ib_nic_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+{
+	return 0;
+}
+
+static void
+mlx5_ib_nic_rep_unload(struct mlx5_eswitch_rep *rep)
+{
+	rep->rep_if[REP_IB].priv = NULL;
+}
+
+static int
+mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+{
+	struct mlx5_ib_dev *ibdev;
+
+	ibdev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*ibdev));
+	if (!ibdev)
+		return -ENOMEM;
+
+	ibdev->rep = rep;
+	ibdev->mdev = dev;
+	ibdev->num_ports = max(MLX5_CAP_GEN(dev, num_ports),
+			       MLX5_CAP_GEN(dev, num_vhca_ports));
+	if (!__mlx5_ib_add(ibdev, &rep_profile))
+		return -EINVAL;
+
+	rep->rep_if[REP_IB].priv = ibdev;
+
+	return 0;
+}
+
+static void
+mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
+{
+	struct mlx5_ib_dev *dev;
+
+	if (!rep->rep_if[REP_IB].priv)
+		return;
+
+	dev = mlx5_ib_rep_to_dev(rep);
+	__mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
+	rep->rep_if[REP_IB].priv = NULL;
+}
+
+static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep)
+{
+	return mlx5_ib_rep_to_dev(rep);
+}
+
+static void mlx5_ib_rep_register_vf_vports(struct mlx5_ib_dev *dev)
+{
+	struct mlx5_eswitch *esw   = dev->mdev->priv.eswitch;
+	int total_vfs = MLX5_TOTAL_VPORTS(dev->mdev);
+	int vport;
+
+	for (vport = 1; vport < total_vfs; vport++) {
+		struct mlx5_eswitch_rep_if rep_if = {};
+
+		rep_if.load = mlx5_ib_vport_rep_load;
+		rep_if.unload = mlx5_ib_vport_rep_unload;
+		rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev;
+		mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_IB);
+	}
+}
+
+static void mlx5_ib_rep_unregister_vf_vports(struct mlx5_ib_dev *dev)
+{
+	struct mlx5_eswitch *esw   = dev->mdev->priv.eswitch;
+	int total_vfs = MLX5_TOTAL_VPORTS(dev->mdev);
+	int vport;
+
+	for (vport = 1; vport < total_vfs; vport++)
+		mlx5_eswitch_unregister_vport_rep(esw, vport, REP_IB);
+}
+
+void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev)
+{
+	struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+	struct mlx5_eswitch_rep_if rep_if = {};
+
+	rep_if.load = mlx5_ib_nic_rep_load;
+	rep_if.unload = mlx5_ib_nic_rep_unload;
+	rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev;
+	rep_if.priv = dev;
+
+	mlx5_eswitch_register_vport_rep(esw, 0, &rep_if, REP_IB);
+
+	mlx5_ib_rep_register_vf_vports(dev);
+}
+
+void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev)
+{
+	struct mlx5_eswitch *esw   = dev->mdev->priv.eswitch;
+
+	mlx5_ib_rep_unregister_vf_vports(dev); /* VFs vports */
+	mlx5_eswitch_unregister_vport_rep(esw, 0, REP_IB); /* UPLINK PF*/
+}
+
+u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw)
+{
+	return mlx5_eswitch_mode(esw);
+}
+
+struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
+					  int vport_index)
+{
+	return mlx5_eswitch_get_proto_dev(esw, vport_index, REP_IB);
+}
+
+struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
+					  int vport_index)
+{
+	return mlx5_eswitch_get_proto_dev(esw, vport_index, REP_ETH);
+}
+
+struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw)
+{
+	return mlx5_eswitch_uplink_get_proto_dev(esw, REP_IB);
+}
+
+struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, int vport)
+{
+	return mlx5_eswitch_vport_rep(esw, vport);
+}
+
+int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
+			      struct mlx5_ib_sq *sq)
+{
+	struct mlx5_flow_handle *flow_rule;
+	struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+
+	if (!dev->rep)
+		return 0;
+
+	flow_rule =
+		mlx5_eswitch_add_send_to_vport_rule(esw,
+						    dev->rep->vport,
+						    sq->base.mqp.qpn);
+	if (IS_ERR(flow_rule))
+		return PTR_ERR(flow_rule);
+	sq->flow_rule = flow_rule;
+
+	return 0;
+}
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.h b/drivers/infiniband/hw/mlx5/ib_rep.h
new file mode 100644
index 000000000000..046fd942fd46
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/ib_rep.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef __MLX5_IB_REP_H__
+#define __MLX5_IB_REP_H__
+
+#include <linux/mlx5/eswitch.h>
+#include "mlx5_ib.h"
+
+#ifdef CONFIG_MLX5_ESWITCH
+u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw);
+struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
+					  int vport_index);
+struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw);
+struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw,
+					   int vport_index);
+void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev);
+void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev);
+int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
+			      struct mlx5_ib_sq *sq);
+struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
+					  int vport_index);
+#else /* CONFIG_MLX5_ESWITCH */
+static inline u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw)
+{
+	return SRIOV_NONE;
+}
+
+static inline
+struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
+					  int vport_index)
+{
+	return NULL;
+}
+
+static inline
+struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw)
+{
+	return NULL;
+}
+
+static inline
+struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw,
+					   int vport_index)
+{
+	return NULL;
+}
+
+static inline void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev) {}
+static inline void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev) {}
+static inline int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
+					    struct mlx5_ib_sq *sq)
+{
+	return 0;
+}
+
+static inline
+struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
+					  int vport_index)
+{
+	return NULL;
+}
+#endif
+
+static inline
+struct mlx5_ib_dev *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep)
+{
+	return (struct mlx5_ib_dev *)rep->rep_if[REP_IB].priv;
+}
+#endif /* __MLX5_IB_REP_H__ */
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index da091de4e69d..390e4375647e 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -57,7 +57,9 @@
 #include <linux/in.h>
 #include <linux/etherdevice.h>
 #include "mlx5_ib.h"
+#include "ib_rep.h"
 #include "cmd.h"
+#include <linux/mlx5/fs_helpers.h>
 
 #define DRIVER_NAME "mlx5_ib"
 #define DRIVER_VERSION "5.0-0"
@@ -130,7 +132,7 @@ static int get_port_state(struct ib_device *ibdev,
 	int ret;
 
 	memset(&attr, 0, sizeof(attr));
-	ret = mlx5_ib_query_port(ibdev, port_num, &attr);
+	ret = ibdev->query_port(ibdev, port_num, &attr);
 	if (!ret)
 		*state = attr.state;
 	return ret;
@@ -154,10 +156,19 @@ static int mlx5_netdev_event(struct notifier_block *this,
 	case NETDEV_REGISTER:
 	case NETDEV_UNREGISTER:
 		write_lock(&roce->netdev_lock);
-
-		if (ndev->dev.parent == &mdev->pdev->dev)
-			roce->netdev = (event == NETDEV_UNREGISTER) ?
+		if (ibdev->rep) {
+			struct mlx5_eswitch *esw = ibdev->mdev->priv.eswitch;
+			struct net_device *rep_ndev;
+
+			rep_ndev = mlx5_ib_get_rep_netdev(esw,
+							  ibdev->rep->vport);
+			if (rep_ndev == ndev)
+				roce->netdev = (event == NETDEV_UNREGISTER) ?
 					NULL : ndev;
+		} else if (ndev->dev.parent == &ibdev->mdev->pdev->dev) {
+			roce->netdev = (event == NETDEV_UNREGISTER) ?
+				NULL : ndev;
+		}
 		write_unlock(&roce->netdev_lock);
 		break;
 
@@ -1272,6 +1283,22 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
 	return ret;
 }
 
+static int mlx5_ib_rep_query_port(struct ib_device *ibdev, u8 port,
+				  struct ib_port_attr *props)
+{
+	int ret;
+
+	/* Only link layer == ethernet is valid for representors */
+	ret = mlx5_query_port_roce(ibdev, port, props);
+	if (ret || !props)
+		return ret;
+
+	/* We don't support GIDS */
+	props->gid_tbl_len = 0;
+
+	return ret;
+}
+
 static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
 			     union ib_gid *gid)
 {
@@ -2290,11 +2317,9 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
 		   offsetof(typeof(filter), field) -\
 		   sizeof(filter.field))
 
-#define IPV4_VERSION 4
-#define IPV6_VERSION 6
 static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
 			   u32 *match_v, const union ib_flow_spec *ib_spec,
-			   u32 *tag_id, bool *is_drop)
+			   struct mlx5_flow_act *action)
 {
 	void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
 					   misc_parameters);
@@ -2377,7 +2402,7 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 				 ip_version, 0xf);
 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
-				 ip_version, IPV4_VERSION);
+				 ip_version, MLX5_FS_IPV4_VERSION);
 		} else {
 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 				 ethertype, 0xffff);
@@ -2416,7 +2441,7 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 				 ip_version, 0xf);
 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
-				 ip_version, IPV6_VERSION);
+				 ip_version, MLX5_FS_IPV6_VERSION);
 		} else {
 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 				 ethertype, 0xffff);
@@ -2512,13 +2537,14 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
 		if (ib_spec->flow_tag.tag_id >= BIT(24))
 			return -EINVAL;
 
-		*tag_id = ib_spec->flow_tag.tag_id;
+		action->flow_tag = ib_spec->flow_tag.tag_id;
+		action->has_flow_tag = true;
 		break;
 	case IB_FLOW_SPEC_ACTION_DROP:
 		if (FIELDS_NOT_SUPPORTED(ib_spec->drop,
 					 LAST_DROP_FIELD))
 			return -EOPNOTSUPP;
-		*is_drop = true;
+		action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
 		break;
 	default:
 		return -EINVAL;
@@ -2635,7 +2661,7 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
 							  ibflow);
 	struct mlx5_ib_flow_handler *iter, *tmp;
 
-	mutex_lock(&dev->flow_db.lock);
+	mutex_lock(&dev->flow_db->lock);
 
 	list_for_each_entry_safe(iter, tmp, &handler->list, list) {
 		mlx5_del_flow_rules(iter->rule);
@@ -2646,7 +2672,7 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
 
 	mlx5_del_flow_rules(handler->rule);
 	put_flow_table(dev, handler->prio, true);
-	mutex_unlock(&dev->flow_db.lock);
+	mutex_unlock(&dev->flow_db->lock);
 
 	kfree(handler);
 
@@ -2695,7 +2721,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
 					     MLX5_FLOW_NAMESPACE_BYPASS);
 		num_entries = MLX5_FS_MAX_ENTRIES;
 		num_groups = MLX5_FS_MAX_TYPES;
-		prio = &dev->flow_db.prios[priority];
+		prio = &dev->flow_db->prios[priority];
 	} else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
 		   flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
 		ns = mlx5_get_flow_namespace(dev->mdev,
@@ -2703,7 +2729,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
 		build_leftovers_ft_param(&priority,
 					 &num_entries,
 					 &num_groups);
-		prio = &dev->flow_db.prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
+		prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
 	} else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
 		if (!MLX5_CAP_FLOWTABLE(dev->mdev,
 					allow_sniffer_and_nic_rx_shared_tir))
@@ -2713,7 +2739,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
 					     MLX5_FLOW_NAMESPACE_SNIFFER_RX :
 					     MLX5_FLOW_NAMESPACE_SNIFFER_TX);
 
-		prio = &dev->flow_db.sniffer[ft_type];
+		prio = &dev->flow_db->sniffer[ft_type];
 		priority = 0;
 		num_entries = 1;
 		num_groups = 1;
@@ -2771,13 +2797,11 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
 {
 	struct mlx5_flow_table	*ft = ft_prio->flow_table;
 	struct mlx5_ib_flow_handler *handler;
-	struct mlx5_flow_act flow_act = {0};
+	struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};
 	struct mlx5_flow_spec *spec;
 	struct mlx5_flow_destination *rule_dst = dst;
 	const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
 	unsigned int spec_index;
-	u32 flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
-	bool is_drop = false;
 	int err = 0;
 	int dest_num = 1;
 
@@ -2796,7 +2820,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
 	for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
 		err = parse_flow_attr(dev->mdev, spec->match_criteria,
 				      spec->match_value,
-				      ib_flow, &flow_tag, &is_drop);
+				      ib_flow, &flow_act);
 		if (err < 0)
 			goto free;
 
@@ -2806,9 +2830,20 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
 	if (!flow_is_multicast_only(flow_attr))
 		set_underlay_qp(dev, spec, underlay_qpn);
 
+	if (dev->rep) {
+		void *misc;
+
+		misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+				    misc_parameters);
+		MLX5_SET(fte_match_set_misc, misc, source_port,
+			 dev->rep->vport);
+		misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+				    misc_parameters);
+		MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+	}
+
 	spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
-	if (is_drop) {
-		flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
+	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) {
 		rule_dst = NULL;
 		dest_num = 0;
 	} else {
@@ -2816,15 +2851,14 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
 		    MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
 	}
 
-	if (flow_tag != MLX5_FS_DEFAULT_FLOW_TAG &&
+	if (flow_act.has_flow_tag &&
 	    (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
 	     flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
 		mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
-			     flow_tag, flow_attr->type);
+			     flow_act.flow_tag, flow_attr->type);
 		err = -EINVAL;
 		goto free;
 	}
-	flow_act.flow_tag = flow_tag;
 	handler->rule = mlx5_add_flow_rules(ft, spec,
 					    &flow_act,
 					    rule_dst, dest_num);
@@ -3003,7 +3037,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
 	if (!dst)
 		return ERR_PTR(-ENOMEM);
 
-	mutex_lock(&dev->flow_db.lock);
+	mutex_lock(&dev->flow_db->lock);
 
 	ft_prio = get_flow_table(dev, flow_attr, MLX5_IB_FT_RX);
 	if (IS_ERR(ft_prio)) {
@@ -3052,7 +3086,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
 		goto destroy_ft;
 	}
 
-	mutex_unlock(&dev->flow_db.lock);
+	mutex_unlock(&dev->flow_db->lock);
 	kfree(dst);
 
 	return &handler->ibflow;
@@ -3062,7 +3096,7 @@ destroy_ft:
 	if (ft_prio_tx)
 		put_flow_table(dev, ft_prio_tx, false);
 unlock:
-	mutex_unlock(&dev->flow_db.lock);
+	mutex_unlock(&dev->flow_db->lock);
 	kfree(dst);
 	kfree(handler);
 	return ERR_PTR(err);
@@ -3769,6 +3803,25 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
 	return 0;
 }
 
+static int mlx5_port_rep_immutable(struct ib_device *ibdev, u8 port_num,
+				   struct ib_port_immutable *immutable)
+{
+	struct ib_port_attr attr;
+	int err;
+
+	immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
+
+	err = ib_query_port(ibdev, port_num, &attr);
+	if (err)
+		return err;
+
+	immutable->pkey_tbl_len = attr.pkey_tbl_len;
+	immutable->gid_tbl_len = attr.gid_tbl_len;
+	immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
+
+	return 0;
+}
+
 static void get_dev_fw_str(struct ib_device *ibdev, char *str)
 {
 	struct mlx5_ib_dev *dev =
@@ -3799,7 +3852,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)
 		goto err_destroy_vport_lag;
 	}
 
-	dev->flow_db.lag_demux_ft = ft;
+	dev->flow_db->lag_demux_ft = ft;
 	return 0;
 
 err_destroy_vport_lag:
@@ -3811,9 +3864,9 @@ static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev)
 {
 	struct mlx5_core_dev *mdev = dev->mdev;
 
-	if (dev->flow_db.lag_demux_ft) {
-		mlx5_destroy_flow_table(dev->flow_db.lag_demux_ft);
-		dev->flow_db.lag_demux_ft = NULL;
+	if (dev->flow_db->lag_demux_ft) {
+		mlx5_destroy_flow_table(dev->flow_db->lag_demux_ft);
+		dev->flow_db->lag_demux_ft = NULL;
 
 		mlx5_cmd_destroy_vport_lag(mdev);
 	}
@@ -3845,14 +3898,10 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev, u8 port_num)
 {
 	int err;
 
-	err = mlx5_add_netdev_notifier(dev, port_num);
-	if (err)
-		return err;
-
 	if (MLX5_CAP_GEN(dev->mdev, roce)) {
 		err = mlx5_nic_vport_enable_roce(dev->mdev);
 		if (err)
-			goto err_unregister_netdevice_notifier;
+			return err;
 	}
 
 	err = mlx5_eth_lag_init(dev);
@@ -3865,8 +3914,6 @@ err_disable_roce:
 	if (MLX5_CAP_GEN(dev->mdev, roce))
 		mlx5_nic_vport_disable_roce(dev->mdev);
 
-err_unregister_netdevice_notifier:
-	mlx5_remove_netdev_notifier(dev, port_num);
 	return err;
 }
 
@@ -4500,7 +4547,7 @@ static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev)
 	mlx5_nic_vport_disable_roce(dev->mdev);
 }
 
-static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
 {
 	mlx5_ib_cleanup_multiport_master(dev);
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
@@ -4509,7 +4556,7 @@ static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
 	kfree(dev->port);
 }
 
-static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
 {
 	struct mlx5_core_dev *mdev = dev->mdev;
 	const char *name;
@@ -4531,8 +4578,6 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
 		goto err_free_port;
 
 	if (!mlx5_core_mp_enabled(mdev)) {
-		int i;
-
 		for (i = 1; i <= dev->num_ports; i++) {
 			err = get_port_caps(dev, i);
 			if (err)
@@ -4561,7 +4606,6 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
 		dev->mdev->priv.eq_table.num_comp_vectors;
 	dev->ib_dev.dev.parent		= &mdev->pdev->dev;
 
-	mutex_init(&dev->flow_db.lock);
 	mutex_init(&dev->cap_mask_mutex);
 	INIT_LIST_HEAD(&dev->qp_list);
 	spin_lock_init(&dev->reset_flow_resource_lock);
@@ -4582,7 +4626,38 @@ err_free_port:
 	return -ENOMEM;
 }
 
-static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
+static int mlx5_ib_stage_flow_db_init(struct mlx5_ib_dev *dev)
+{
+	dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL);
+
+	if (!dev->flow_db)
+		return -ENOMEM;
+
+	mutex_init(&dev->flow_db->lock);
+
+	return 0;
+}
+
+int mlx5_ib_stage_rep_flow_db_init(struct mlx5_ib_dev *dev)
+{
+	struct mlx5_ib_dev *nic_dev;
+
+	nic_dev = mlx5_ib_get_uplink_ibdev(dev->mdev->priv.eswitch);
+
+	if (!nic_dev)
+		return -EINVAL;
+
+	dev->flow_db = nic_dev->flow_db;
+
+	return 0;
+}
+
+static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev)
+{
+	kfree(dev->flow_db);
+}
+
+int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
 {
 	struct mlx5_core_dev *mdev = dev->mdev;
 	int err;
@@ -4623,7 +4698,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
 		(1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ);
 
 	dev->ib_dev.query_device	= mlx5_ib_query_device;
-	dev->ib_dev.query_port		= mlx5_ib_query_port;
 	dev->ib_dev.get_link_layer	= mlx5_ib_port_link_layer;
 	dev->ib_dev.query_gid		= mlx5_ib_query_gid;
 	dev->ib_dev.add_gid		= mlx5_ib_add_gid;
@@ -4666,7 +4740,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
 	dev->ib_dev.alloc_mr		= mlx5_ib_alloc_mr;
 	dev->ib_dev.map_mr_sg		= mlx5_ib_map_mr_sg;
 	dev->ib_dev.check_mr_status	= mlx5_ib_check_mr_status;
-	dev->ib_dev.get_port_immutable  = mlx5_port_immutable;
 	dev->ib_dev.get_dev_fw_str      = get_dev_fw_str;
 	dev->ib_dev.get_vector_affinity	= mlx5_ib_get_vector_affinity;
 	if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads))
@@ -4717,6 +4790,80 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
 	return 0;
 }
 
+static int mlx5_ib_stage_non_default_cb(struct mlx5_ib_dev *dev)
+{
+	dev->ib_dev.get_port_immutable  = mlx5_port_immutable;
+	dev->ib_dev.query_port		= mlx5_ib_query_port;
+
+	return 0;
+}
+
+int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev)
+{
+	dev->ib_dev.get_port_immutable  = mlx5_port_rep_immutable;
+	dev->ib_dev.query_port		= mlx5_ib_rep_query_port;
+
+	return 0;
+}
+
+static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev,
+					  u8 port_num)
+{
+	int i;
+
+	for (i = 0; i < dev->num_ports; i++) {
+		dev->roce[i].dev = dev;
+		dev->roce[i].native_port_num = i + 1;
+		dev->roce[i].last_port_state = IB_PORT_DOWN;
+	}
+
+	dev->ib_dev.get_netdev	= mlx5_ib_get_netdev;
+	dev->ib_dev.create_wq	 = mlx5_ib_create_wq;
+	dev->ib_dev.modify_wq	 = mlx5_ib_modify_wq;
+	dev->ib_dev.destroy_wq	 = mlx5_ib_destroy_wq;
+	dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table;
+	dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table;
+
+	dev->ib_dev.uverbs_ex_cmd_mask |=
+			(1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
+			(1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
+			(1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
+			(1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
+			(1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
+
+	return mlx5_add_netdev_notifier(dev, port_num);
+}
+
+static void mlx5_ib_stage_common_roce_cleanup(struct mlx5_ib_dev *dev)
+{
+	u8 port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+
+	mlx5_remove_netdev_notifier(dev, port_num);
+}
+
+int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev)
+{
+	struct mlx5_core_dev *mdev = dev->mdev;
+	enum rdma_link_layer ll;
+	int port_type_cap;
+	int err = 0;
+	u8 port_num;
+
+	port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+	port_type_cap = MLX5_CAP_GEN(mdev, port_type);
+	ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
+
+	if (ll == IB_LINK_LAYER_ETHERNET)
+		err = mlx5_ib_stage_common_roce_init(dev, port_num);
+
+	return err;
+}
+
+void mlx5_ib_stage_rep_roce_cleanup(struct mlx5_ib_dev *dev)
+{
+	mlx5_ib_stage_common_roce_cleanup(dev);
+}
+
 static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev)
 {
 	struct mlx5_core_dev *mdev = dev->mdev;
@@ -4724,37 +4871,26 @@ static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev)
 	int port_type_cap;
 	u8 port_num;
 	int err;
-	int i;
 
 	port_num = mlx5_core_native_port_num(dev->mdev) - 1;
 	port_type_cap = MLX5_CAP_GEN(mdev, port_type);
 	ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
 
 	if (ll == IB_LINK_LAYER_ETHERNET) {
-		for (i = 0; i < dev->num_ports; i++) {
-			dev->roce[i].dev = dev;
-			dev->roce[i].native_port_num = i + 1;
-			dev->roce[i].last_port_state = IB_PORT_DOWN;
-		}
+		err = mlx5_ib_stage_common_roce_init(dev, port_num);
+		if (err)
+			return err;
 
-		dev->ib_dev.get_netdev	= mlx5_ib_get_netdev;
-		dev->ib_dev.create_wq	 = mlx5_ib_create_wq;
-		dev->ib_dev.modify_wq	 = mlx5_ib_modify_wq;
-		dev->ib_dev.destroy_wq	 = mlx5_ib_destroy_wq;
-		dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table;
-		dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table;
-		dev->ib_dev.uverbs_ex_cmd_mask |=
-			(1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
-			(1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
-			(1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
-			(1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
-			(1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
 		err = mlx5_enable_eth(dev, port_num);
 		if (err)
-			return err;
+			goto cleanup;
 	}
 
 	return 0;
+cleanup:
+	mlx5_ib_stage_common_roce_cleanup(dev);
+
+	return err;
 }
 
 static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev)
@@ -4770,16 +4906,16 @@ static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev)
 
 	if (ll == IB_LINK_LAYER_ETHERNET) {
 		mlx5_disable_eth(dev);
-		mlx5_remove_netdev_notifier(dev, port_num);
+		mlx5_ib_stage_common_roce_cleanup(dev);
 	}
 }
 
-static int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev)
 {
 	return create_dev_resources(&dev->devr);
 }
 
-static void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev)
 {
 	destroy_dev_resources(&dev->devr);
 }
@@ -4791,7 +4927,7 @@ static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev)
 	return mlx5_ib_odp_init_one(dev);
 }
 
-static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
 {
 	if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
 		dev->ib_dev.get_hw_stats	= mlx5_ib_get_hw_stats;
@@ -4803,7 +4939,7 @@ static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
 	return 0;
 }
 
-static void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev)
 {
 	if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
 		mlx5_ib_dealloc_counters(dev);
@@ -4834,7 +4970,7 @@ static void mlx5_ib_stage_uar_cleanup(struct mlx5_ib_dev *dev)
 	mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar);
 }
 
-static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
 {
 	int err;
 
@@ -4849,28 +4985,28 @@ static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
 	return err;
 }
 
-static void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev)
 {
 	mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
 	mlx5_free_bfreg(dev->mdev, &dev->bfreg);
 }
 
-static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
 {
 	return ib_register_device(&dev->ib_dev, NULL);
 }
 
-static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev)
 {
 	destroy_umrc_res(dev);
 }
 
-static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
 {
 	ib_unregister_device(&dev->ib_dev);
 }
 
-static int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev)
 {
 	return create_umr_res(dev);
 }
@@ -4887,7 +5023,7 @@ static void mlx5_ib_stage_delay_drop_cleanup(struct mlx5_ib_dev *dev)
 	cancel_delay_drop(dev);
 }
 
-static int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev)
 {
 	int err;
 	int i;
@@ -4902,9 +5038,21 @@ static int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev)
 	return 0;
 }
 
-static void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
-			     const struct mlx5_ib_profile *profile,
-			     int stage)
+static int mlx5_ib_stage_rep_reg_init(struct mlx5_ib_dev *dev)
+{
+	mlx5_ib_register_vport_reps(dev);
+
+	return 0;
+}
+
+static void mlx5_ib_stage_rep_reg_cleanup(struct mlx5_ib_dev *dev)
+{
+	mlx5_ib_unregister_vport_reps(dev);
+}
+
+void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
+		      const struct mlx5_ib_profile *profile,
+		      int stage)
 {
 	/* Number of stages to cleanup */
 	while (stage) {
@@ -4918,23 +5066,14 @@ static void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
 
 static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num);
 
-static void *__mlx5_ib_add(struct mlx5_core_dev *mdev,
-			   const struct mlx5_ib_profile *profile)
+void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
+		    const struct mlx5_ib_profile *profile)
 {
-	struct mlx5_ib_dev *dev;
 	int err;
 	int i;
 
 	printk_once(KERN_INFO "%s", mlx5_version);
 
-	dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
-	if (!dev)
-		return NULL;
-
-	dev->mdev = mdev;
-	dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports),
-			     MLX5_CAP_GEN(mdev, num_vhca_ports));
-
 	for (i = 0; i < MLX5_IB_STAGE_MAX; i++) {
 		if (profile->stage[i].init) {
 			err = profile->stage[i].init(dev);
@@ -4958,9 +5097,15 @@ static const struct mlx5_ib_profile pf_profile = {
 	STAGE_CREATE(MLX5_IB_STAGE_INIT,
 		     mlx5_ib_stage_init_init,
 		     mlx5_ib_stage_init_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
+		     mlx5_ib_stage_flow_db_init,
+		     mlx5_ib_stage_flow_db_cleanup),
 	STAGE_CREATE(MLX5_IB_STAGE_CAPS,
 		     mlx5_ib_stage_caps_init,
 		     NULL),
+	STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
+		     mlx5_ib_stage_non_default_cb,
+		     NULL),
 	STAGE_CREATE(MLX5_IB_STAGE_ROCE,
 		     mlx5_ib_stage_roce_init,
 		     mlx5_ib_stage_roce_cleanup),
@@ -4999,6 +5144,51 @@ static const struct mlx5_ib_profile pf_profile = {
 		     NULL),
 };
 
+static const struct mlx5_ib_profile nic_rep_profile = {
+	STAGE_CREATE(MLX5_IB_STAGE_INIT,
+		     mlx5_ib_stage_init_init,
+		     mlx5_ib_stage_init_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
+		     mlx5_ib_stage_flow_db_init,
+		     mlx5_ib_stage_flow_db_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_CAPS,
+		     mlx5_ib_stage_caps_init,
+		     NULL),
+	STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
+		     mlx5_ib_stage_rep_non_default_cb,
+		     NULL),
+	STAGE_CREATE(MLX5_IB_STAGE_ROCE,
+		     mlx5_ib_stage_rep_roce_init,
+		     mlx5_ib_stage_rep_roce_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
+		     mlx5_ib_stage_dev_res_init,
+		     mlx5_ib_stage_dev_res_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
+		     mlx5_ib_stage_counters_init,
+		     mlx5_ib_stage_counters_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_UAR,
+		     mlx5_ib_stage_uar_init,
+		     mlx5_ib_stage_uar_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_BFREG,
+		     mlx5_ib_stage_bfrag_init,
+		     mlx5_ib_stage_bfrag_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR,
+		     NULL,
+		     mlx5_ib_stage_pre_ib_reg_umr_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
+		     mlx5_ib_stage_ib_reg_init,
+		     mlx5_ib_stage_ib_reg_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR,
+		     mlx5_ib_stage_post_ib_reg_umr_init,
+		     NULL),
+	STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR,
+		     mlx5_ib_stage_class_attr_init,
+		     NULL),
+	STAGE_CREATE(MLX5_IB_STAGE_REP_REG,
+		     mlx5_ib_stage_rep_reg_init,
+		     mlx5_ib_stage_rep_reg_cleanup),
+};
+
 static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num)
 {
 	struct mlx5_ib_multiport_info *mpi;
@@ -5044,8 +5234,11 @@ static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num)
 static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 {
 	enum rdma_link_layer ll;
+	struct mlx5_ib_dev *dev;
 	int port_type_cap;
 
+	printk_once(KERN_INFO "%s", mlx5_version);
+
 	port_type_cap = MLX5_CAP_GEN(mdev, port_type);
 	ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
 
@@ -5055,7 +5248,22 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 		return mlx5_ib_add_slave_port(mdev, port_num);
 	}
 
-	return __mlx5_ib_add(mdev, &pf_profile);
+	dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
+	if (!dev)
+		return NULL;
+
+	dev->mdev = mdev;
+	dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports),
+			     MLX5_CAP_GEN(mdev, num_vhca_ports));
+
+	if (MLX5_VPORT_MANAGER(mdev) &&
+	    mlx5_ib_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) {
+		dev->rep = mlx5_ib_vport_rep(mdev->priv.eswitch, 0);
+
+		return __mlx5_ib_add(dev, &nic_rep_profile);
+	}
+
+	return __mlx5_ib_add(dev, &pf_profile);
 }
 
 static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index a5272499b600..c33bf1523d67 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -343,6 +343,7 @@ struct mlx5_ib_sq {
 	struct mlx5_ib_wq	*sq;
 	struct mlx5_ib_ubuffer  ubuffer;
 	struct mlx5_db		*doorbell;
+	struct mlx5_flow_handle	*flow_rule;
 	u32			tisn;
 	u8			state;
 };
@@ -371,7 +372,7 @@ struct mlx5_ib_qp {
 		struct mlx5_ib_rss_qp rss_qp;
 		struct mlx5_ib_dct dct;
 	};
-	struct mlx5_buf		buf;
+	struct mlx5_frag_buf	buf;
 
 	struct mlx5_db		db;
 	struct mlx5_ib_wq	rq;
@@ -413,7 +414,7 @@ struct mlx5_ib_qp {
 };
 
 struct mlx5_ib_cq_buf {
-	struct mlx5_buf		buf;
+	struct mlx5_frag_buf_ctrl fbc;
 	struct ib_umem		*umem;
 	int			cqe_size;
 	int			nent;
@@ -495,7 +496,7 @@ struct mlx5_ib_wc {
 struct mlx5_ib_srq {
 	struct ib_srq		ibsrq;
 	struct mlx5_core_srq	msrq;
-	struct mlx5_buf		buf;
+	struct mlx5_frag_buf	buf;
 	struct mlx5_db		db;
 	u64		       *wrid;
 	/* protect SRQ hanlding
@@ -731,7 +732,9 @@ struct mlx5_ib_delay_drop {
 
 enum mlx5_ib_stages {
 	MLX5_IB_STAGE_INIT,
+	MLX5_IB_STAGE_FLOW_DB,
 	MLX5_IB_STAGE_CAPS,
+	MLX5_IB_STAGE_NON_DEFAULT_CB,
 	MLX5_IB_STAGE_ROCE,
 	MLX5_IB_STAGE_DEVICE_RESOURCES,
 	MLX5_IB_STAGE_ODP,
@@ -744,6 +747,7 @@ enum mlx5_ib_stages {
 	MLX5_IB_STAGE_POST_IB_REG_UMR,
 	MLX5_IB_STAGE_DELAY_DROP,
 	MLX5_IB_STAGE_CLASS_ATTR,
+	MLX5_IB_STAGE_REP_REG,
 	MLX5_IB_STAGE_MAX,
 };
 
@@ -798,7 +802,7 @@ struct mlx5_ib_dev {
 	struct srcu_struct      mr_srcu;
 	u32			null_mkey;
 #endif
-	struct mlx5_ib_flow_db	flow_db;
+	struct mlx5_ib_flow_db	*flow_db;
 	/* protect resources needed as part of reset flow */
 	spinlock_t		reset_flow_resource_lock;
 	struct list_head	qp_list;
@@ -808,6 +812,7 @@ struct mlx5_ib_dev {
 	struct mlx5_sq_bfreg	fp_bfreg;
 	struct mlx5_ib_delay_drop	delay_drop;
 	const struct mlx5_ib_profile	*profile;
+	struct mlx5_eswitch_rep		*rep;
 
 	/* protect the user_td */
 	struct mutex		lb_mutex;
@@ -1050,6 +1055,31 @@ static inline void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
 
 #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
 
+/* Needed for rep profile */
+int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_rep_flow_db_init(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_rep_roce_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev);
+void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
+		      const struct mlx5_ib_profile *profile,
+		      int stage);
+void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
+		    const struct mlx5_ib_profile *profile);
+
 int mlx5_ib_get_vf_config(struct ib_device *device, int vf,
 			  u8 port, struct ifla_vf_info *info);
 int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf,
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index c51c602f06d6..95a36e9ea552 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -587,7 +587,7 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
 
 static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
 {
-	if (!mlx5_debugfs_root)
+	if (!mlx5_debugfs_root || dev->rep)
 		return;
 
 	debugfs_remove_recursive(dev->cache.root);
@@ -600,7 +600,7 @@ static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
 	struct mlx5_cache_ent *ent;
 	int i;
 
-	if (!mlx5_debugfs_root)
+	if (!mlx5_debugfs_root || dev->rep)
 		return 0;
 
 	cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root);
@@ -690,6 +690,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
 			   MLX5_IB_UMR_OCTOWORD;
 		ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
 		if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
+		    !dev->rep &&
 		    mlx5_core_is_pf(dev->mdev))
 			ent->limit = dev->mdev->profile->mr_cache[i].limit;
 		else
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index a2e1aa86e133..85c612ac547a 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -36,6 +36,7 @@
 #include <rdma/ib_user_verbs.h>
 #include <linux/mlx5/fs.h>
 #include "mlx5_ib.h"
+#include "ib_rep.h"
 
 /* not supported currently */
 static int wq_signature;
@@ -1082,6 +1083,13 @@ static void destroy_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
 	mlx5_core_destroy_tis(dev->mdev, sq->tisn);
 }
 
+static void destroy_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
+				       struct mlx5_ib_sq *sq)
+{
+	if (sq->flow_rule)
+		mlx5_del_flow_rules(sq->flow_rule);
+}
+
 static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
 				   struct mlx5_ib_sq *sq, void *qpin,
 				   struct ib_pd *pd)
@@ -1145,8 +1153,15 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
 	if (err)
 		goto err_umem;
 
+	err = create_flow_rule_vport_sq(dev, sq);
+	if (err)
+		goto err_flow;
+
 	return 0;
 
+err_flow:
+	mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp);
+
 err_umem:
 	ib_umem_release(sq->ubuffer.umem);
 	sq->ubuffer.umem = NULL;
@@ -1157,6 +1172,7 @@ err_umem:
 static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
 				     struct mlx5_ib_sq *sq)
 {
+	destroy_flow_rule_vport_sq(dev, sq);
 	mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp);
 	ib_umem_release(sq->ubuffer.umem);
 }
@@ -1267,6 +1283,10 @@ static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
 	if (tunnel_offload_en)
 		MLX5_SET(tirc, tirc, tunneled_offload_en, 1);
 
+	if (dev->rep)
+		MLX5_SET(tirc, tirc, self_lb_block,
+			 MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_);
+
 	err = mlx5_core_create_tir(dev->mdev, in, inlen, &rq->tirn);
 
 	kvfree(in);
@@ -1558,6 +1578,10 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 	MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields);
 
 create_tir:
+	if (dev->rep)
+		MLX5_SET(tirc, tirc, self_lb_block,
+			 MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_);
+
 	err = mlx5_core_create_tir(dev->mdev, in, inlen, &qp->rss_qp.tirn);
 
 	if (err)
@@ -2143,7 +2167,6 @@ static struct ib_qp *mlx5_ib_create_dct(struct ib_pd *pd,
 					struct ib_qp_init_attr *attr,
 					struct mlx5_ib_create_qp *ucmd)
 {
-	struct mlx5_ib_dev *dev;
 	struct mlx5_ib_qp *qp;
 	int err = 0;
 	u32 uidx = MLX5_IB_DEFAULT_UIDX;
@@ -2152,8 +2175,6 @@ static struct ib_qp *mlx5_ib_create_dct(struct ib_pd *pd,
 	if (!attr->srq || !attr->recv_cq)
 		return ERR_PTR(-EINVAL);
 
-	dev = to_mdev(pd->device);
-
 	err = get_qp_user_index(to_mucontext(pd->uobject->context),
 				ucmd, sizeof(*ucmd), &uidx);
 	if (err)
diff --git a/drivers/infiniband/hw/usnic/usnic_transport.c b/drivers/infiniband/hw/usnic/usnic_transport.c
index de318389a301..67de94343cb4 100644
--- a/drivers/infiniband/hw/usnic/usnic_transport.c
+++ b/drivers/infiniband/hw/usnic/usnic_transport.c
@@ -174,14 +174,13 @@ void usnic_transport_put_socket(struct socket *sock)
 int usnic_transport_sock_get_addr(struct socket *sock, int *proto,
 					uint32_t *addr, uint16_t *port)
 {
-	int len;
 	int err;
 	struct sockaddr_in sock_addr;
 
 	err = sock->ops->getname(sock,
 				(struct sockaddr *)&sock_addr,
-				&len, 0);
-	if (err)
+				0);
+	if (err < 0)
 		return err;
 
 	if (sock_addr.sin_family != AF_INET)
diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c
index c5603d1a07d6..1f8f489b4167 100644
--- a/drivers/isdn/mISDN/socket.c
+++ b/drivers/isdn/mISDN/socket.c
@@ -560,7 +560,7 @@ done:
 
 static int
 data_sock_getname(struct socket *sock, struct sockaddr *addr,
-		  int *addr_len, int peer)
+		  int peer)
 {
 	struct sockaddr_mISDN	*maddr = (struct sockaddr_mISDN *) addr;
 	struct sock		*sk = sock->sk;
@@ -570,14 +570,13 @@ data_sock_getname(struct socket *sock, struct sockaddr *addr,
 
 	lock_sock(sk);
 
-	*addr_len = sizeof(*maddr);
 	maddr->family = AF_ISDN;
 	maddr->dev = _pms(sk)->dev->id;
 	maddr->channel = _pms(sk)->ch.nr;
 	maddr->sapi = _pms(sk)->ch.addr & 0xff;
 	maddr->tei = (_pms(sk)->ch.addr >> 8) & 0xff;
 	release_sock(sk);
-	return 0;
+	return sizeof(*maddr);
 }
 
 static const struct proto_ops data_sock_ops = {
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 944ec3c9282c..08b85215c2be 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -149,9 +149,9 @@ config MACVTAP
 config IPVLAN
     tristate "IP-VLAN support"
     depends on INET
-    depends on IPV6
+    depends on IPV6 || !IPV6
     depends on NETFILTER
-    depends on NET_L3_MASTER_DEV
+    select NET_L3_MASTER_DEV
     ---help---
       This allows one to create virtual devices off of a main interface
       and packets will be delivered based on the dest L3 (IPv6/IPv4 addr)
diff --git a/drivers/net/Space.c b/drivers/net/Space.c
index 11fe71278f40..3afda6561434 100644
--- a/drivers/net/Space.c
+++ b/drivers/net/Space.c
@@ -114,12 +114,6 @@ static struct devprobe2 m68k_probes[] __initdata = {
 #ifdef CONFIG_MVME147_NET	/* MVME147 internal Ethernet */
 	{mvme147lance_probe, 0},
 #endif
-#ifdef CONFIG_MAC8390           /* NuBus NS8390-based cards */
-	{mac8390_probe, 0},
-#endif
-#ifdef CONFIG_MAC89x0
-	{mac89x0_probe, 0},
-#endif
 	{NULL, 0},
 };
 
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index c669554d70bb..4c19d23dd282 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4791,6 +4791,7 @@ static struct pernet_operations bond_net_ops = {
 	.exit = bond_net_exit,
 	.id   = &bond_net_id,
 	.size = sizeof(struct bond_net),
+	.async = true,
 };
 
 static int __init bonding_init(void)
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 63e02a54d537..78616787f2a3 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -852,7 +852,7 @@ void b53_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data)
 }
 EXPORT_SYMBOL(b53_get_ethtool_stats);
 
-int b53_get_sset_count(struct dsa_switch *ds)
+int b53_get_sset_count(struct dsa_switch *ds, int port)
 {
 	struct b53_device *dev = ds->priv;
 
diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h
index d954cf36ecd8..1187ebd79287 100644
--- a/drivers/net/dsa/b53/b53_priv.h
+++ b/drivers/net/dsa/b53/b53_priv.h
@@ -288,7 +288,7 @@ void b53_imp_vlan_setup(struct dsa_switch *ds, int cpu_port);
 int b53_configure_vlan(struct dsa_switch *ds);
 void b53_get_strings(struct dsa_switch *ds, int port, uint8_t *data);
 void b53_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data);
-int b53_get_sset_count(struct dsa_switch *ds);
+int b53_get_sset_count(struct dsa_switch *ds, int port);
 int b53_br_join(struct dsa_switch *ds, int port, struct net_device *bridge);
 void b53_br_leave(struct dsa_switch *ds, int port, struct net_device *bridge);
 void b53_br_set_stp_state(struct dsa_switch *ds, int port, u8 state);
diff --git a/drivers/net/dsa/dsa_loop.c b/drivers/net/dsa/dsa_loop.c
index 7aa84ee4e771..f77be9f85cb3 100644
--- a/drivers/net/dsa/dsa_loop.c
+++ b/drivers/net/dsa/dsa_loop.c
@@ -86,7 +86,7 @@ static int dsa_loop_setup(struct dsa_switch *ds)
 	return 0;
 }
 
-static int dsa_loop_get_sset_count(struct dsa_switch *ds)
+static int dsa_loop_get_sset_count(struct dsa_switch *ds, int port)
 {
 	return __DSA_LOOP_CNT_MAX;
 }
diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c
index 6171c0853ff1..fefa454f3e56 100644
--- a/drivers/net/dsa/lan9303-core.c
+++ b/drivers/net/dsa/lan9303-core.c
@@ -1007,7 +1007,7 @@ static void lan9303_get_ethtool_stats(struct dsa_switch *ds, int port,
 	}
 }
 
-static int lan9303_get_sset_count(struct dsa_switch *ds)
+static int lan9303_get_sset_count(struct dsa_switch *ds, int port)
 {
 	return ARRAY_SIZE(lan9303_mib);
 }
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index 663b0d5b982b..bcb3e6c734f2 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -439,7 +439,7 @@ static void ksz_disable_port(struct dsa_switch *ds, int port,
 	ksz_port_cfg(dev, port, REG_PORT_CTRL_0, PORT_MAC_LOOPBACK, true);
 }
 
-static int ksz_sset_count(struct dsa_switch *ds)
+static int ksz_sset_count(struct dsa_switch *ds, int port)
 {
 	return TOTAL_SWITCH_COUNTER_NUM;
 }
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index 8a0bb000d056..511ca134f13f 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -604,7 +604,7 @@ mt7530_get_ethtool_stats(struct dsa_switch *ds, int port,
 }
 
 static int
-mt7530_get_sset_count(struct dsa_switch *ds)
+mt7530_get_sset_count(struct dsa_switch *ds, int port)
 {
 	return ARRAY_SIZE(mt7530_mib);
 }
diff --git a/drivers/net/dsa/mv88e6xxx/Kconfig b/drivers/net/dsa/mv88e6xxx/Kconfig
index 1aaa7a95ebc4..ae9e7f7cb31c 100644
--- a/drivers/net/dsa/mv88e6xxx/Kconfig
+++ b/drivers/net/dsa/mv88e6xxx/Kconfig
@@ -18,3 +18,13 @@ config NET_DSA_MV88E6XXX_GLOBAL2
 
 	  It is required on most chips. If the chip you compile the support for
 	  doesn't have such registers set, say N here. In doubt, say Y.
+
+config NET_DSA_MV88E6XXX_PTP
+	bool "PTP support for Marvell 88E6xxx"
+	default n
+	depends on NET_DSA_MV88E6XXX_GLOBAL2
+	imply NETWORK_PHY_TIMESTAMPING
+	imply PTP_1588_CLOCK
+	help
+	  Say Y to enable PTP hardware timestamping on Marvell 88E6xxx switch
+	  chips that support it.
diff --git a/drivers/net/dsa/mv88e6xxx/Makefile b/drivers/net/dsa/mv88e6xxx/Makefile
index 58a4a0014e59..50de304abe2f 100644
--- a/drivers/net/dsa/mv88e6xxx/Makefile
+++ b/drivers/net/dsa/mv88e6xxx/Makefile
@@ -5,6 +5,10 @@ mv88e6xxx-objs += global1.o
 mv88e6xxx-objs += global1_atu.o
 mv88e6xxx-objs += global1_vtu.o
 mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2.o
+mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2_avb.o
+mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2_scratch.o
+mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_PTP) += hwtstamp.o
 mv88e6xxx-objs += phy.o
 mv88e6xxx-objs += port.o
+mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_PTP) += ptp.o
 mv88e6xxx-objs += serdes.o
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index eb328bade225..fd78378ad6b1 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -36,8 +36,10 @@
 #include "chip.h"
 #include "global1.h"
 #include "global2.h"
+#include "hwtstamp.h"
 #include "phy.h"
 #include "port.h"
+#include "ptp.h"
 #include "serdes.h"
 
 static void assert_reg_lock(struct mv88e6xxx_chip *chip)
@@ -251,9 +253,8 @@ static void mv88e6xxx_g1_irq_unmask(struct irq_data *d)
 	chip->g1_irq.masked &= ~(1 << n);
 }
 
-static irqreturn_t mv88e6xxx_g1_irq_thread_fn(int irq, void *dev_id)
+static irqreturn_t mv88e6xxx_g1_irq_thread_work(struct mv88e6xxx_chip *chip)
 {
-	struct mv88e6xxx_chip *chip = dev_id;
 	unsigned int nhandled = 0;
 	unsigned int sub_irq;
 	unsigned int n;
@@ -278,6 +279,13 @@ out:
 	return (nhandled > 0 ? IRQ_HANDLED : IRQ_NONE);
 }
 
+static irqreturn_t mv88e6xxx_g1_irq_thread_fn(int irq, void *dev_id)
+{
+	struct mv88e6xxx_chip *chip = dev_id;
+
+	return mv88e6xxx_g1_irq_thread_work(chip);
+}
+
 static void mv88e6xxx_g1_irq_bus_lock(struct irq_data *d)
 {
 	struct mv88e6xxx_chip *chip = irq_data_get_irq_chip_data(d);
@@ -333,7 +341,7 @@ static const struct irq_domain_ops mv88e6xxx_g1_irq_domain_ops = {
 	.xlate	= irq_domain_xlate_twocell,
 };
 
-static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip *chip)
+static void mv88e6xxx_g1_irq_free_common(struct mv88e6xxx_chip *chip)
 {
 	int irq, virq;
 	u16 mask;
@@ -342,8 +350,6 @@ static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip *chip)
 	mask &= ~GENMASK(chip->g1_irq.nirqs, 0);
 	mv88e6xxx_g1_write(chip, MV88E6XXX_G1_CTL1, mask);
 
-	free_irq(chip->irq, chip);
-
 	for (irq = 0; irq < chip->g1_irq.nirqs; irq++) {
 		virq = irq_find_mapping(chip->g1_irq.domain, irq);
 		irq_dispose_mapping(virq);
@@ -352,7 +358,14 @@ static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip *chip)
 	irq_domain_remove(chip->g1_irq.domain);
 }
 
-static int mv88e6xxx_g1_irq_setup(struct mv88e6xxx_chip *chip)
+static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip *chip)
+{
+	mv88e6xxx_g1_irq_free_common(chip);
+
+	free_irq(chip->irq, chip);
+}
+
+static int mv88e6xxx_g1_irq_setup_common(struct mv88e6xxx_chip *chip)
 {
 	int err, irq, virq;
 	u16 reg, mask;
@@ -385,13 +398,6 @@ static int mv88e6xxx_g1_irq_setup(struct mv88e6xxx_chip *chip)
 	if (err)
 		goto out_disable;
 
-	err = request_threaded_irq(chip->irq, NULL,
-				   mv88e6xxx_g1_irq_thread_fn,
-				   IRQF_ONESHOT | IRQF_TRIGGER_FALLING,
-				   dev_name(chip->dev), chip);
-	if (err)
-		goto out_disable;
-
 	return 0;
 
 out_disable:
@@ -409,6 +415,62 @@ out_mapping:
 	return err;
 }
 
+static int mv88e6xxx_g1_irq_setup(struct mv88e6xxx_chip *chip)
+{
+	int err;
+
+	err = mv88e6xxx_g1_irq_setup_common(chip);
+	if (err)
+		return err;
+
+	err = request_threaded_irq(chip->irq, NULL,
+				   mv88e6xxx_g1_irq_thread_fn,
+				   IRQF_ONESHOT | IRQF_TRIGGER_FALLING,
+				   dev_name(chip->dev), chip);
+	if (err)
+		mv88e6xxx_g1_irq_free_common(chip);
+
+	return err;
+}
+
+static void mv88e6xxx_irq_poll(struct kthread_work *work)
+{
+	struct mv88e6xxx_chip *chip = container_of(work,
+						   struct mv88e6xxx_chip,
+						   irq_poll_work.work);
+	mv88e6xxx_g1_irq_thread_work(chip);
+
+	kthread_queue_delayed_work(chip->kworker, &chip->irq_poll_work,
+				   msecs_to_jiffies(100));
+}
+
+static int mv88e6xxx_irq_poll_setup(struct mv88e6xxx_chip *chip)
+{
+	int err;
+
+	err = mv88e6xxx_g1_irq_setup_common(chip);
+	if (err)
+		return err;
+
+	kthread_init_delayed_work(&chip->irq_poll_work,
+				  mv88e6xxx_irq_poll);
+
+	chip->kworker = kthread_create_worker(0, dev_name(chip->dev));
+	if (IS_ERR(chip->kworker))
+		return PTR_ERR(chip->kworker);
+
+	kthread_queue_delayed_work(chip->kworker, &chip->irq_poll_work,
+				   msecs_to_jiffies(100));
+
+	return 0;
+}
+
+static void mv88e6xxx_irq_poll_free(struct mv88e6xxx_chip *chip)
+{
+	kthread_cancel_delayed_work_sync(&chip->irq_poll_work);
+	kthread_destroy_worker(chip->kworker);
+}
+
 int mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int addr, int reg, u16 mask)
 {
 	int i;
@@ -604,7 +666,7 @@ static uint64_t _mv88e6xxx_get_ethtool_stat(struct mv88e6xxx_chip *chip,
 			return UINT64_MAX;
 
 		low = reg;
-		if (s->sizeof_stat == 4) {
+		if (s->size == 4) {
 			err = mv88e6xxx_port_read(chip, port, s->reg + 1, &reg);
 			if (err)
 				return UINT64_MAX;
@@ -617,7 +679,7 @@ static uint64_t _mv88e6xxx_get_ethtool_stat(struct mv88e6xxx_chip *chip,
 	case STATS_TYPE_BANK0:
 		reg |= s->reg | histogram;
 		mv88e6xxx_g1_stats_read(chip, reg, &low);
-		if (s->sizeof_stat == 8)
+		if (s->size == 8)
 			mv88e6xxx_g1_stats_read(chip, reg + 1, &high);
 		break;
 	default:
@@ -627,8 +689,8 @@ static uint64_t _mv88e6xxx_get_ethtool_stat(struct mv88e6xxx_chip *chip,
 	return value;
 }
 
-static void mv88e6xxx_stats_get_strings(struct mv88e6xxx_chip *chip,
-					uint8_t *data, int types)
+static int mv88e6xxx_stats_get_strings(struct mv88e6xxx_chip *chip,
+				       uint8_t *data, int types)
 {
 	struct mv88e6xxx_hw_stat *stat;
 	int i, j;
@@ -641,29 +703,41 @@ static void mv88e6xxx_stats_get_strings(struct mv88e6xxx_chip *chip,
 			j++;
 		}
 	}
+
+	return j;
 }
 
-static void mv88e6095_stats_get_strings(struct mv88e6xxx_chip *chip,
-					uint8_t *data)
+static int mv88e6095_stats_get_strings(struct mv88e6xxx_chip *chip,
+				       uint8_t *data)
 {
-	mv88e6xxx_stats_get_strings(chip, data,
-				    STATS_TYPE_BANK0 | STATS_TYPE_PORT);
+	return mv88e6xxx_stats_get_strings(chip, data,
+					   STATS_TYPE_BANK0 | STATS_TYPE_PORT);
 }
 
-static void mv88e6320_stats_get_strings(struct mv88e6xxx_chip *chip,
-					uint8_t *data)
+static int mv88e6320_stats_get_strings(struct mv88e6xxx_chip *chip,
+				       uint8_t *data)
 {
-	mv88e6xxx_stats_get_strings(chip, data,
-				    STATS_TYPE_BANK0 | STATS_TYPE_BANK1);
+	return mv88e6xxx_stats_get_strings(chip, data,
+					   STATS_TYPE_BANK0 | STATS_TYPE_BANK1);
 }
 
 static void mv88e6xxx_get_strings(struct dsa_switch *ds, int port,
 				  uint8_t *data)
 {
 	struct mv88e6xxx_chip *chip = ds->priv;
+	int count = 0;
+
+	mutex_lock(&chip->reg_lock);
 
 	if (chip->info->ops->stats_get_strings)
-		chip->info->ops->stats_get_strings(chip, data);
+		count = chip->info->ops->stats_get_strings(chip, data);
+
+	if (chip->info->ops->serdes_get_strings) {
+		data += count * ETH_GSTRING_LEN;
+		chip->info->ops->serdes_get_strings(chip, port, data);
+	}
+
+	mutex_unlock(&chip->reg_lock);
 }
 
 static int mv88e6xxx_stats_get_sset_count(struct mv88e6xxx_chip *chip,
@@ -692,19 +766,34 @@ static int mv88e6320_stats_get_sset_count(struct mv88e6xxx_chip *chip)
 					      STATS_TYPE_BANK1);
 }
 
-static int mv88e6xxx_get_sset_count(struct dsa_switch *ds)
+static int mv88e6xxx_get_sset_count(struct dsa_switch *ds, int port)
 {
 	struct mv88e6xxx_chip *chip = ds->priv;
+	int serdes_count = 0;
+	int count = 0;
 
+	mutex_lock(&chip->reg_lock);
 	if (chip->info->ops->stats_get_sset_count)
-		return chip->info->ops->stats_get_sset_count(chip);
+		count = chip->info->ops->stats_get_sset_count(chip);
+	if (count < 0)
+		goto out;
 
-	return 0;
+	if (chip->info->ops->serdes_get_sset_count)
+		serdes_count = chip->info->ops->serdes_get_sset_count(chip,
+								      port);
+	if (serdes_count < 0)
+		count = serdes_count;
+	else
+		count += serdes_count;
+out:
+	mutex_unlock(&chip->reg_lock);
+
+	return count;
 }
 
-static void mv88e6xxx_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
-				      uint64_t *data, int types,
-				      u16 bank1_select, u16 histogram)
+static int mv88e6xxx_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
+				     uint64_t *data, int types,
+				     u16 bank1_select, u16 histogram)
 {
 	struct mv88e6xxx_hw_stat *stat;
 	int i, j;
@@ -712,24 +801,28 @@ static void mv88e6xxx_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
 	for (i = 0, j = 0; i < ARRAY_SIZE(mv88e6xxx_hw_stats); i++) {
 		stat = &mv88e6xxx_hw_stats[i];
 		if (stat->type & types) {
+			mutex_lock(&chip->reg_lock);
 			data[j] = _mv88e6xxx_get_ethtool_stat(chip, stat, port,
 							      bank1_select,
 							      histogram);
+			mutex_unlock(&chip->reg_lock);
+
 			j++;
 		}
 	}
+	return j;
 }
 
-static void mv88e6095_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
-				      uint64_t *data)
+static int mv88e6095_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
+				     uint64_t *data)
 {
 	return mv88e6xxx_stats_get_stats(chip, port, data,
 					 STATS_TYPE_BANK0 | STATS_TYPE_PORT,
 					 0, MV88E6XXX_G1_STATS_OP_HIST_RX_TX);
 }
 
-static void mv88e6320_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
-				      uint64_t *data)
+static int mv88e6320_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
+				     uint64_t *data)
 {
 	return mv88e6xxx_stats_get_stats(chip, port, data,
 					 STATS_TYPE_BANK0 | STATS_TYPE_BANK1,
@@ -737,8 +830,8 @@ static void mv88e6320_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
 					 MV88E6XXX_G1_STATS_OP_HIST_RX_TX);
 }
 
-static void mv88e6390_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
-				      uint64_t *data)
+static int mv88e6390_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
+				     uint64_t *data)
 {
 	return mv88e6xxx_stats_get_stats(chip, port, data,
 					 STATS_TYPE_BANK0 | STATS_TYPE_BANK1,
@@ -749,8 +842,17 @@ static void mv88e6390_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
 static void mv88e6xxx_get_stats(struct mv88e6xxx_chip *chip, int port,
 				uint64_t *data)
 {
+	int count = 0;
+
 	if (chip->info->ops->stats_get_stats)
-		chip->info->ops->stats_get_stats(chip, port, data);
+		count = chip->info->ops->stats_get_stats(chip, port, data);
+
+	if (chip->info->ops->serdes_get_stats) {
+		data += count;
+		mutex_lock(&chip->reg_lock);
+		chip->info->ops->serdes_get_stats(chip, port, data);
+		mutex_unlock(&chip->reg_lock);
+	}
 }
 
 static void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, int port,
@@ -762,14 +864,13 @@ static void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, int port,
 	mutex_lock(&chip->reg_lock);
 
 	ret = mv88e6xxx_stats_snapshot(chip, port);
-	if (ret < 0) {
-		mutex_unlock(&chip->reg_lock);
+	mutex_unlock(&chip->reg_lock);
+
+	if (ret < 0)
 		return;
-	}
 
 	mv88e6xxx_get_stats(chip, port, data);
 
-	mutex_unlock(&chip->reg_lock);
 }
 
 static int mv88e6xxx_stats_set_histogram(struct mv88e6xxx_chip *chip)
@@ -1433,7 +1534,9 @@ static int mv88e6xxx_port_db_dump_fid(struct mv88e6xxx_chip *chip,
 	eth_broadcast_addr(addr.mac);
 
 	do {
+		mutex_lock(&chip->reg_lock);
 		err = mv88e6xxx_g1_atu_getnext(chip, fid, &addr);
+		mutex_unlock(&chip->reg_lock);
 		if (err)
 			return err;
 
@@ -1466,7 +1569,10 @@ static int mv88e6xxx_port_db_dump(struct mv88e6xxx_chip *chip, int port,
 	int err;
 
 	/* Dump port's default Filtering Information Database (VLAN ID 0) */
+	mutex_lock(&chip->reg_lock);
 	err = mv88e6xxx_port_get_fid(chip, port, &fid);
+	mutex_unlock(&chip->reg_lock);
+
 	if (err)
 		return err;
 
@@ -1476,7 +1582,9 @@ static int mv88e6xxx_port_db_dump(struct mv88e6xxx_chip *chip, int port,
 
 	/* Dump VLANs' Filtering Information Databases */
 	do {
+		mutex_lock(&chip->reg_lock);
 		err = mv88e6xxx_vtu_getnext(chip, &vlan);
+		mutex_unlock(&chip->reg_lock);
 		if (err)
 			return err;
 
@@ -1496,13 +1604,8 @@ static int mv88e6xxx_port_fdb_dump(struct dsa_switch *ds, int port,
 				   dsa_fdb_dump_cb_t *cb, void *data)
 {
 	struct mv88e6xxx_chip *chip = ds->priv;
-	int err;
-
-	mutex_lock(&chip->reg_lock);
-	err = mv88e6xxx_port_db_dump(chip, port, cb, data);
-	mutex_unlock(&chip->reg_lock);
 
-	return err;
+	return mv88e6xxx_port_db_dump(chip, port, cb, data);
 }
 
 static int mv88e6xxx_bridge_map(struct mv88e6xxx_chip *chip,
@@ -2092,6 +2195,17 @@ static int mv88e6xxx_setup(struct dsa_switch *ds)
 	if (err)
 		goto unlock;
 
+	/* Setup PTP Hardware Clock and timestamping */
+	if (chip->info->ptp_support) {
+		err = mv88e6xxx_ptp_setup(chip);
+		if (err)
+			goto unlock;
+
+		err = mv88e6xxx_hwtstamp_setup(chip);
+		if (err)
+			goto unlock;
+	}
+
 unlock:
 	mutex_unlock(&chip->reg_lock);
 
@@ -2148,6 +2262,15 @@ static int mv88e6xxx_mdio_register(struct mv88e6xxx_chip *chip,
 	struct mii_bus *bus;
 	int err;
 
+	if (external) {
+		mutex_lock(&chip->reg_lock);
+		err = mv88e6xxx_g2_scratch_gpio_set_smi(chip, true);
+		mutex_unlock(&chip->reg_lock);
+
+		if (err)
+			return err;
+	}
+
 	bus = devm_mdiobus_alloc_size(chip->dev, sizeof(*mdio_bus));
 	if (!bus)
 		return -ENOMEM;
@@ -2170,12 +2293,19 @@ static int mv88e6xxx_mdio_register(struct mv88e6xxx_chip *chip,
 	bus->write = mv88e6xxx_mdio_write;
 	bus->parent = chip->dev;
 
+	if (!external) {
+		err = mv88e6xxx_g2_irq_mdio_setup(chip, bus);
+		if (err)
+			return err;
+	}
+
 	if (np)
 		err = of_mdiobus_register(bus, np);
 	else
 		err = mdiobus_register(bus);
 	if (err) {
 		dev_err(chip->dev, "Cannot register MDIO bus (%d)\n", err);
+		mv88e6xxx_g2_irq_mdio_free(chip, bus);
 		return err;
 	}
 
@@ -2202,6 +2332,9 @@ static void mv88e6xxx_mdios_unregister(struct mv88e6xxx_chip *chip)
 	list_for_each_entry(mdio_bus, &chip->mdios, list) {
 		bus = mdio_bus->bus;
 
+		if (!mdio_bus->external)
+			mv88e6xxx_g2_irq_mdio_free(chip, bus);
+
 		mdiobus_unregister(bus);
 	}
 }
@@ -2472,6 +2605,7 @@ static const struct mv88e6xxx_ops mv88e6141_ops = {
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
+	.gpio_ops = &mv88e6352_gpio_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6161_ops = {
@@ -2602,6 +2736,7 @@ static const struct mv88e6xxx_ops mv88e6172_ops = {
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
 	.serdes_power = mv88e6352_serdes_power,
+	.gpio_ops = &mv88e6352_gpio_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6175_ops = {
@@ -2673,6 +2808,7 @@ static const struct mv88e6xxx_ops mv88e6176_ops = {
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
 	.serdes_power = mv88e6352_serdes_power,
+	.gpio_ops = &mv88e6352_gpio_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6185_ops = {
@@ -2736,6 +2872,7 @@ static const struct mv88e6xxx_ops mv88e6190_ops = {
 	.vtu_getnext = mv88e6390_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
 	.serdes_power = mv88e6390_serdes_power,
+	.gpio_ops = &mv88e6352_gpio_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6190x_ops = {
@@ -2771,6 +2908,7 @@ static const struct mv88e6xxx_ops mv88e6190x_ops = {
 	.vtu_getnext = mv88e6390_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
 	.serdes_power = mv88e6390_serdes_power,
+	.gpio_ops = &mv88e6352_gpio_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6191_ops = {
@@ -2843,6 +2981,8 @@ static const struct mv88e6xxx_ops mv88e6240_ops = {
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
 	.serdes_power = mv88e6352_serdes_power,
+	.gpio_ops = &mv88e6352_gpio_ops,
+	.avb_ops = &mv88e6352_avb_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6290_ops = {
@@ -2879,6 +3019,8 @@ static const struct mv88e6xxx_ops mv88e6290_ops = {
 	.vtu_getnext = mv88e6390_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
 	.serdes_power = mv88e6390_serdes_power,
+	.gpio_ops = &mv88e6352_gpio_ops,
+	.avb_ops = &mv88e6390_avb_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6320_ops = {
@@ -2913,6 +3055,8 @@ static const struct mv88e6xxx_ops mv88e6320_ops = {
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6185_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6185_g1_vtu_loadpurge,
+	.gpio_ops = &mv88e6352_gpio_ops,
+	.avb_ops = &mv88e6352_avb_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6321_ops = {
@@ -2945,6 +3089,8 @@ static const struct mv88e6xxx_ops mv88e6321_ops = {
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6185_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6185_g1_vtu_loadpurge,
+	.gpio_ops = &mv88e6352_gpio_ops,
+	.avb_ops = &mv88e6352_avb_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6341_ops = {
@@ -2981,6 +3127,8 @@ static const struct mv88e6xxx_ops mv88e6341_ops = {
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
+	.gpio_ops = &mv88e6352_gpio_ops,
+	.avb_ops = &mv88e6390_avb_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6350_ops = {
@@ -3049,6 +3197,7 @@ static const struct mv88e6xxx_ops mv88e6351_ops = {
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
+	.avb_ops = &mv88e6352_avb_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6352_ops = {
@@ -3086,6 +3235,11 @@ static const struct mv88e6xxx_ops mv88e6352_ops = {
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
 	.serdes_power = mv88e6352_serdes_power,
+	.gpio_ops = &mv88e6352_gpio_ops,
+	.avb_ops = &mv88e6352_avb_ops,
+	.serdes_get_sset_count = mv88e6352_serdes_get_sset_count,
+	.serdes_get_strings = mv88e6352_serdes_get_strings,
+	.serdes_get_stats = mv88e6352_serdes_get_stats,
 };
 
 static const struct mv88e6xxx_ops mv88e6390_ops = {
@@ -3124,6 +3278,8 @@ static const struct mv88e6xxx_ops mv88e6390_ops = {
 	.vtu_getnext = mv88e6390_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
 	.serdes_power = mv88e6390_serdes_power,
+	.gpio_ops = &mv88e6352_gpio_ops,
+	.avb_ops = &mv88e6390_avb_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6390x_ops = {
@@ -3162,6 +3318,8 @@ static const struct mv88e6xxx_ops mv88e6390x_ops = {
 	.vtu_getnext = mv88e6390_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
 	.serdes_power = mv88e6390_serdes_power,
+	.gpio_ops = &mv88e6352_gpio_ops,
+	.avb_ops = &mv88e6390_avb_ops,
 };
 
 static const struct mv88e6xxx_info mv88e6xxx_table[] = {
@@ -3171,6 +3329,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6085",
 		.num_databases = 4096,
 		.num_ports = 10,
+		.num_internal_phys = 5,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3191,6 +3350,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6095/88E6095F",
 		.num_databases = 256,
 		.num_ports = 11,
+		.num_internal_phys = 0,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3209,6 +3369,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6097/88E6097F",
 		.num_databases = 4096,
 		.num_ports = 11,
+		.num_internal_phys = 8,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3229,6 +3390,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6123",
 		.num_databases = 4096,
 		.num_ports = 3,
+		.num_internal_phys = 5,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3249,6 +3411,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6131",
 		.num_databases = 256,
 		.num_ports = 8,
+		.num_internal_phys = 0,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3264,15 +3427,18 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 	[MV88E6141] = {
 		.prod_num = MV88E6XXX_PORT_SWITCH_ID_PROD_6141,
 		.family = MV88E6XXX_FAMILY_6341,
-		.name = "Marvell 88E6341",
+		.name = "Marvell 88E6141",
 		.num_databases = 4096,
 		.num_ports = 6,
+		.num_internal_phys = 5,
+		.num_gpio = 11,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
 		.global2_addr = 0x1c,
 		.age_time_coeff = 3750,
 		.atu_move_port_mask = 0x1f,
+		.g1_irqs = 9,
 		.g2_irqs = 10,
 		.pvt = true,
 		.multi_chip = true,
@@ -3286,6 +3452,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6161",
 		.num_databases = 4096,
 		.num_ports = 6,
+		.num_internal_phys = 5,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3306,6 +3473,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6165",
 		.num_databases = 4096,
 		.num_ports = 6,
+		.num_internal_phys = 0,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3326,6 +3494,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6171",
 		.num_databases = 4096,
 		.num_ports = 7,
+		.num_internal_phys = 5,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3346,6 +3515,8 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6172",
 		.num_databases = 4096,
 		.num_ports = 7,
+		.num_internal_phys = 5,
+		.num_gpio = 15,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3366,6 +3537,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6175",
 		.num_databases = 4096,
 		.num_ports = 7,
+		.num_internal_phys = 5,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3386,6 +3558,8 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6176",
 		.num_databases = 4096,
 		.num_ports = 7,
+		.num_internal_phys = 5,
+		.num_gpio = 15,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3406,6 +3580,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6185",
 		.num_databases = 256,
 		.num_ports = 10,
+		.num_internal_phys = 0,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3424,6 +3599,8 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6190",
 		.num_databases = 4096,
 		.num_ports = 11,	/* 10 + Z80 */
+		.num_internal_phys = 11,
+		.num_gpio = 16,
 		.max_vid = 8191,
 		.port_base_addr = 0x0,
 		.global1_addr = 0x1b,
@@ -3444,6 +3621,8 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6190X",
 		.num_databases = 4096,
 		.num_ports = 11,	/* 10 + Z80 */
+		.num_internal_phys = 11,
+		.num_gpio = 16,
 		.max_vid = 8191,
 		.port_base_addr = 0x0,
 		.global1_addr = 0x1b,
@@ -3464,6 +3643,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6191",
 		.num_databases = 4096,
 		.num_ports = 11,	/* 10 + Z80 */
+		.num_internal_phys = 11,
 		.max_vid = 8191,
 		.port_base_addr = 0x0,
 		.global1_addr = 0x1b,
@@ -3475,6 +3655,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.pvt = true,
 		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_DSA,
+		.ptp_support = true,
 		.ops = &mv88e6191_ops,
 	},
 
@@ -3484,6 +3665,8 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6240",
 		.num_databases = 4096,
 		.num_ports = 7,
+		.num_internal_phys = 5,
+		.num_gpio = 15,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3495,6 +3678,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.pvt = true,
 		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
+		.ptp_support = true,
 		.ops = &mv88e6240_ops,
 	},
 
@@ -3504,6 +3688,8 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6290",
 		.num_databases = 4096,
 		.num_ports = 11,	/* 10 + Z80 */
+		.num_internal_phys = 11,
+		.num_gpio = 16,
 		.max_vid = 8191,
 		.port_base_addr = 0x0,
 		.global1_addr = 0x1b,
@@ -3515,6 +3701,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.pvt = true,
 		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_DSA,
+		.ptp_support = true,
 		.ops = &mv88e6290_ops,
 	},
 
@@ -3524,16 +3711,20 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6320",
 		.num_databases = 4096,
 		.num_ports = 7,
+		.num_internal_phys = 5,
+		.num_gpio = 15,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
 		.global2_addr = 0x1c,
 		.age_time_coeff = 15000,
 		.g1_irqs = 8,
+		.g2_irqs = 10,
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
 		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
+		.ptp_support = true,
 		.ops = &mv88e6320_ops,
 	},
 
@@ -3543,15 +3734,19 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6321",
 		.num_databases = 4096,
 		.num_ports = 7,
+		.num_internal_phys = 5,
+		.num_gpio = 15,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
 		.global2_addr = 0x1c,
 		.age_time_coeff = 15000,
 		.g1_irqs = 8,
+		.g2_irqs = 10,
 		.atu_move_port_mask = 0xf,
 		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
+		.ptp_support = true,
 		.ops = &mv88e6321_ops,
 	},
 
@@ -3560,17 +3755,21 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.family = MV88E6XXX_FAMILY_6341,
 		.name = "Marvell 88E6341",
 		.num_databases = 4096,
+		.num_internal_phys = 5,
 		.num_ports = 6,
+		.num_gpio = 11,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
 		.global2_addr = 0x1c,
 		.age_time_coeff = 3750,
 		.atu_move_port_mask = 0x1f,
+		.g1_irqs = 9,
 		.g2_irqs = 10,
 		.pvt = true,
 		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
+		.ptp_support = true,
 		.ops = &mv88e6341_ops,
 	},
 
@@ -3580,6 +3779,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6350",
 		.num_databases = 4096,
 		.num_ports = 7,
+		.num_internal_phys = 5,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3600,6 +3800,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6351",
 		.num_databases = 4096,
 		.num_ports = 7,
+		.num_internal_phys = 5,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3620,6 +3821,8 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6352",
 		.num_databases = 4096,
 		.num_ports = 7,
+		.num_internal_phys = 5,
+		.num_gpio = 15,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3631,6 +3834,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.pvt = true,
 		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
+		.ptp_support = true,
 		.ops = &mv88e6352_ops,
 	},
 	[MV88E6390] = {
@@ -3639,6 +3843,8 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6390",
 		.num_databases = 4096,
 		.num_ports = 11,	/* 10 + Z80 */
+		.num_internal_phys = 11,
+		.num_gpio = 16,
 		.max_vid = 8191,
 		.port_base_addr = 0x0,
 		.global1_addr = 0x1b,
@@ -3650,6 +3856,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.pvt = true,
 		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_DSA,
+		.ptp_support = true,
 		.ops = &mv88e6390_ops,
 	},
 	[MV88E6390X] = {
@@ -3658,6 +3865,8 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6390X",
 		.num_databases = 4096,
 		.num_ports = 11,	/* 10 + Z80 */
+		.num_internal_phys = 11,
+		.num_gpio = 16,
 		.max_vid = 8191,
 		.port_base_addr = 0x0,
 		.global1_addr = 0x1b,
@@ -3669,6 +3878,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.pvt = true,
 		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_DSA,
+		.ptp_support = true,
 		.ops = &mv88e6390x_ops,
 	},
 };
@@ -3880,6 +4090,11 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = {
 	.port_mdb_del           = mv88e6xxx_port_mdb_del,
 	.crosschip_bridge_join	= mv88e6xxx_crosschip_bridge_join,
 	.crosschip_bridge_leave	= mv88e6xxx_crosschip_bridge_leave,
+	.port_hwtstamp_set	= mv88e6xxx_port_hwtstamp_set,
+	.port_hwtstamp_get	= mv88e6xxx_port_hwtstamp_get,
+	.port_txtstamp		= mv88e6xxx_port_txtstamp,
+	.port_rxtstamp		= mv88e6xxx_port_rxtstamp,
+	.get_ts_info		= mv88e6xxx_get_ts_info,
 };
 
 static struct dsa_switch_driver mv88e6xxx_switch_drv = {
@@ -3959,33 +4174,34 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev)
 		goto out;
 	}
 
-	if (chip->irq > 0) {
-		/* Has to be performed before the MDIO bus is created,
-		 * because the PHYs will link there interrupts to these
-		 * interrupt controllers
-		 */
-		mutex_lock(&chip->reg_lock);
+	/* Has to be performed before the MDIO bus is created, because
+	 * the PHYs will link their interrupts to these interrupt
+	 * controllers
+	 */
+	mutex_lock(&chip->reg_lock);
+	if (chip->irq > 0)
 		err = mv88e6xxx_g1_irq_setup(chip);
-		mutex_unlock(&chip->reg_lock);
-
-		if (err)
-			goto out;
-
-		if (chip->info->g2_irqs > 0) {
-			err = mv88e6xxx_g2_irq_setup(chip);
-			if (err)
-				goto out_g1_irq;
-		}
+	else
+		err = mv88e6xxx_irq_poll_setup(chip);
+	mutex_unlock(&chip->reg_lock);
 
-		err = mv88e6xxx_g1_atu_prob_irq_setup(chip);
-		if (err)
-			goto out_g2_irq;
+	if (err)
+		goto out;
 
-		err = mv88e6xxx_g1_vtu_prob_irq_setup(chip);
+	if (chip->info->g2_irqs > 0) {
+		err = mv88e6xxx_g2_irq_setup(chip);
 		if (err)
-			goto out_g1_atu_prob_irq;
+			goto out_g1_irq;
 	}
 
+	err = mv88e6xxx_g1_atu_prob_irq_setup(chip);
+	if (err)
+		goto out_g2_irq;
+
+	err = mv88e6xxx_g1_vtu_prob_irq_setup(chip);
+	if (err)
+		goto out_g1_atu_prob_irq;
+
 	err = mv88e6xxx_mdios_register(chip, np);
 	if (err)
 		goto out_g1_vtu_prob_irq;
@@ -3999,20 +4215,19 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev)
 out_mdio:
 	mv88e6xxx_mdios_unregister(chip);
 out_g1_vtu_prob_irq:
-	if (chip->irq > 0)
-		mv88e6xxx_g1_vtu_prob_irq_free(chip);
+	mv88e6xxx_g1_vtu_prob_irq_free(chip);
 out_g1_atu_prob_irq:
-	if (chip->irq > 0)
-		mv88e6xxx_g1_atu_prob_irq_free(chip);
+	mv88e6xxx_g1_atu_prob_irq_free(chip);
 out_g2_irq:
-	if (chip->info->g2_irqs > 0 && chip->irq > 0)
+	if (chip->info->g2_irqs > 0)
 		mv88e6xxx_g2_irq_free(chip);
 out_g1_irq:
-	if (chip->irq > 0) {
-		mutex_lock(&chip->reg_lock);
+	mutex_lock(&chip->reg_lock);
+	if (chip->irq > 0)
 		mv88e6xxx_g1_irq_free(chip);
-		mutex_unlock(&chip->reg_lock);
-	}
+	else
+		mv88e6xxx_irq_poll_free(chip);
+	mutex_unlock(&chip->reg_lock);
 out:
 	return err;
 }
@@ -4022,19 +4237,27 @@ static void mv88e6xxx_remove(struct mdio_device *mdiodev)
 	struct dsa_switch *ds = dev_get_drvdata(&mdiodev->dev);
 	struct mv88e6xxx_chip *chip = ds->priv;
 
+	if (chip->info->ptp_support) {
+		mv88e6xxx_hwtstamp_free(chip);
+		mv88e6xxx_ptp_free(chip);
+	}
+
 	mv88e6xxx_phy_destroy(chip);
 	mv88e6xxx_unregister_switch(chip);
 	mv88e6xxx_mdios_unregister(chip);
 
-	if (chip->irq > 0) {
-		mv88e6xxx_g1_vtu_prob_irq_free(chip);
-		mv88e6xxx_g1_atu_prob_irq_free(chip);
-		if (chip->info->g2_irqs > 0)
-			mv88e6xxx_g2_irq_free(chip);
-		mutex_lock(&chip->reg_lock);
+	mv88e6xxx_g1_vtu_prob_irq_free(chip);
+	mv88e6xxx_g1_atu_prob_irq_free(chip);
+
+	if (chip->info->g2_irqs > 0)
+		mv88e6xxx_g2_irq_free(chip);
+
+	mutex_lock(&chip->reg_lock);
+	if (chip->irq > 0)
 		mv88e6xxx_g1_irq_free(chip);
-		mutex_unlock(&chip->reg_lock);
-	}
+	else
+		mv88e6xxx_irq_poll_free(chip);
+	mutex_unlock(&chip->reg_lock);
 }
 
 static const struct of_device_id mv88e6xxx_of_match[] = {
diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
index 3dba6e90adcf..bad211014e91 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.h
+++ b/drivers/net/dsa/mv88e6xxx/chip.h
@@ -15,7 +15,10 @@
 #include <linux/if_vlan.h>
 #include <linux/irq.h>
 #include <linux/gpio/consumer.h>
+#include <linux/kthread.h>
 #include <linux/phy.h>
+#include <linux/ptp_clock_kernel.h>
+#include <linux/timecounter.h>
 #include <net/dsa.h>
 
 #ifndef UINT64_MAX
@@ -39,6 +42,8 @@
 #define MV88E6XXX_MAX_PVT_SWITCHES	32
 #define MV88E6XXX_MAX_PVT_PORTS		16
 
+#define MV88E6XXX_MAX_GPIO	16
+
 enum mv88e6xxx_egress_mode {
 	MV88E6XXX_EGRESS_MODE_UNMODIFIED,
 	MV88E6XXX_EGRESS_MODE_UNTAGGED,
@@ -105,6 +110,8 @@ struct mv88e6xxx_info {
 	const char *name;
 	unsigned int num_databases;
 	unsigned int num_ports;
+	unsigned int num_internal_phys;
+	unsigned int num_gpio;
 	unsigned int max_vid;
 	unsigned int port_base_addr;
 	unsigned int global1_addr;
@@ -126,6 +133,9 @@ struct mv88e6xxx_info {
 	 */
 	u8 atu_move_port_mask;
 	const struct mv88e6xxx_ops *ops;
+
+	/* Supports PTP */
+	bool ptp_support;
 };
 
 struct mv88e6xxx_atu_entry {
@@ -146,6 +156,8 @@ struct mv88e6xxx_vtu_entry {
 
 struct mv88e6xxx_bus_ops;
 struct mv88e6xxx_irq_ops;
+struct mv88e6xxx_gpio_ops;
+struct mv88e6xxx_avb_ops;
 
 struct mv88e6xxx_irq {
 	u16 masked;
@@ -154,6 +166,36 @@ struct mv88e6xxx_irq {
 	unsigned int nirqs;
 };
 
+/* state flags for mv88e6xxx_port_hwtstamp::state */
+enum {
+	MV88E6XXX_HWTSTAMP_ENABLED,
+	MV88E6XXX_HWTSTAMP_TX_IN_PROGRESS,
+};
+
+struct mv88e6xxx_port_hwtstamp {
+	/* Port index */
+	int port_id;
+
+	/* Timestamping state */
+	unsigned long state;
+
+	/* Resources for receive timestamping */
+	struct sk_buff_head rx_queue;
+	struct sk_buff_head rx_queue2;
+
+	/* Resources for transmit timestamping */
+	unsigned long tx_tstamp_start;
+	struct sk_buff *tx_skb;
+	u16 tx_seq_id;
+
+	/* Current timestamp configuration */
+	struct hwtstamp_config tstamp_config;
+};
+
+struct mv88e6xxx_port {
+	u64 serdes_stats[2];
+};
+
 struct mv88e6xxx_chip {
 	const struct mv88e6xxx_info *info;
 
@@ -207,8 +249,34 @@ struct mv88e6xxx_chip {
 	int irq;
 	int device_irq;
 	int watchdog_irq;
+
 	int atu_prob_irq;
 	int vtu_prob_irq;
+	struct kthread_worker *kworker;
+	struct kthread_delayed_work irq_poll_work;
+
+	/* GPIO resources */
+	u8 gpio_data[2];
+
+	/* This cyclecounter abstracts the switch PTP time.
+	 * reg_lock must be held for any operation that read()s.
+	 */
+	struct cyclecounter	tstamp_cc;
+	struct timecounter	tstamp_tc;
+	struct delayed_work	overflow_work;
+
+	struct ptp_clock	*ptp_clock;
+	struct ptp_clock_info	ptp_clock_info;
+	struct delayed_work	tai_event_work;
+	struct ptp_pin_desc	pin_config[MV88E6XXX_MAX_GPIO];
+	u16 trig_config;
+	u16 evcap_config;
+
+	/* Per-port timestamping resources. */
+	struct mv88e6xxx_port_hwtstamp port_hwtstamp[DSA_MAX_PORTS];
+
+	/* Array of port structures. */
+	struct mv88e6xxx_port ports[DSA_MAX_PORTS];
 };
 
 struct mv88e6xxx_bus_ops {
@@ -327,9 +395,9 @@ struct mv88e6xxx_ops {
 
 	/* Return the number of strings describing statistics */
 	int (*stats_get_sset_count)(struct mv88e6xxx_chip *chip);
-	void (*stats_get_strings)(struct mv88e6xxx_chip *chip,  uint8_t *data);
-	void (*stats_get_stats)(struct mv88e6xxx_chip *chip,  int port,
-				uint64_t *data);
+	int (*stats_get_strings)(struct mv88e6xxx_chip *chip,  uint8_t *data);
+	int (*stats_get_stats)(struct mv88e6xxx_chip *chip,  int port,
+			       uint64_t *data);
 	int (*set_cpu_port)(struct mv88e6xxx_chip *chip, int port);
 	int (*set_egress_port)(struct mv88e6xxx_chip *chip, int port);
 	const struct mv88e6xxx_irq_ops *watchdog_ops;
@@ -339,11 +407,24 @@ struct mv88e6xxx_ops {
 	/* Power on/off a SERDES interface */
 	int (*serdes_power)(struct mv88e6xxx_chip *chip, int port, bool on);
 
+	/* Statistics from the SERDES interface */
+	int (*serdes_get_sset_count)(struct mv88e6xxx_chip *chip, int port);
+	void (*serdes_get_strings)(struct mv88e6xxx_chip *chip,  int port,
+				   uint8_t *data);
+	void (*serdes_get_stats)(struct mv88e6xxx_chip *chip,  int port,
+				 uint64_t *data);
+
 	/* VLAN Translation Unit operations */
 	int (*vtu_getnext)(struct mv88e6xxx_chip *chip,
 			   struct mv88e6xxx_vtu_entry *entry);
 	int (*vtu_loadpurge)(struct mv88e6xxx_chip *chip,
 			     struct mv88e6xxx_vtu_entry *entry);
+
+	/* GPIO operations */
+	const struct mv88e6xxx_gpio_ops *gpio_ops;
+
+	/* Interface to the AVB/PTP registers */
+	const struct mv88e6xxx_avb_ops *avb_ops;
 };
 
 struct mv88e6xxx_irq_ops {
@@ -355,13 +436,49 @@ struct mv88e6xxx_irq_ops {
 	void (*irq_free)(struct mv88e6xxx_chip *chip);
 };
 
+struct mv88e6xxx_gpio_ops {
+	/* Get/set data on GPIO pin */
+	int (*get_data)(struct mv88e6xxx_chip *chip, unsigned int pin);
+	int (*set_data)(struct mv88e6xxx_chip *chip, unsigned int pin,
+			int value);
+
+	/* get/set GPIO direction */
+	int (*get_dir)(struct mv88e6xxx_chip *chip, unsigned int pin);
+	int (*set_dir)(struct mv88e6xxx_chip *chip, unsigned int pin,
+		       bool input);
+
+	/* get/set GPIO pin control */
+	int (*get_pctl)(struct mv88e6xxx_chip *chip, unsigned int pin,
+			int *func);
+	int (*set_pctl)(struct mv88e6xxx_chip *chip, unsigned int pin,
+			int func);
+};
+
+struct mv88e6xxx_avb_ops {
+	/* Access port-scoped Precision Time Protocol registers */
+	int (*port_ptp_read)(struct mv88e6xxx_chip *chip, int port, int addr,
+			     u16 *data, int len);
+	int (*port_ptp_write)(struct mv88e6xxx_chip *chip, int port, int addr,
+			      u16 data);
+
+	/* Access global Precision Time Protocol registers */
+	int (*ptp_read)(struct mv88e6xxx_chip *chip, int addr, u16 *data,
+			int len);
+	int (*ptp_write)(struct mv88e6xxx_chip *chip, int addr, u16 data);
+
+	/* Access global Time Application Interface registers */
+	int (*tai_read)(struct mv88e6xxx_chip *chip, int addr, u16 *data,
+			int len);
+	int (*tai_write)(struct mv88e6xxx_chip *chip, int addr, u16 data);
+};
+
 #define STATS_TYPE_PORT		BIT(0)
 #define STATS_TYPE_BANK0	BIT(1)
 #define STATS_TYPE_BANK1	BIT(2)
 
 struct mv88e6xxx_hw_stat {
 	char string[ETH_GSTRING_LEN];
-	int sizeof_stat;
+	size_t size;
 	int reg;
 	int type;
 };
@@ -386,6 +503,11 @@ static inline u16 mv88e6xxx_port_mask(struct mv88e6xxx_chip *chip)
 	return GENMASK(mv88e6xxx_num_ports(chip) - 1, 0);
 }
 
+static inline unsigned int mv88e6xxx_num_gpio(struct mv88e6xxx_chip *chip)
+{
+	return chip->info->num_gpio;
+}
+
 int mv88e6xxx_read(struct mv88e6xxx_chip *chip, int addr, int reg, u16 *val);
 int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val);
 int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg,
diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c
index af0727877825..0ce627fded48 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.c
+++ b/drivers/net/dsa/mv88e6xxx/global2.c
@@ -20,22 +20,22 @@
 #include "global1.h" /* for MV88E6XXX_G1_STS_IRQ_DEVICE */
 #include "global2.h"
 
-static int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val)
+int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val)
 {
 	return mv88e6xxx_read(chip, chip->info->global2_addr, reg, val);
 }
 
-static int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val)
+int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val)
 {
 	return mv88e6xxx_write(chip, chip->info->global2_addr, reg, val);
 }
 
-static int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update)
+int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update)
 {
 	return mv88e6xxx_update(chip, chip->info->global2_addr, reg, update);
 }
 
-static int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask)
+int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask)
 {
 	return mv88e6xxx_wait(chip, chip->info->global2_addr, reg, mask);
 }
@@ -798,6 +798,7 @@ int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, struct mii_bus *bus,
 						   val);
 }
 
+/* Offset 0x1B: Watchdog Control */
 static int mv88e6097_watchdog_action(struct mv88e6xxx_chip *chip, int irq)
 {
 	u16 reg;
@@ -1089,7 +1090,7 @@ int mv88e6xxx_g2_irq_setup(struct mv88e6xxx_chip *chip)
 
 	err = request_threaded_irq(chip->device_irq, NULL,
 				   mv88e6xxx_g2_irq_thread_fn,
-				   IRQF_ONESHOT, "mv88e6xxx-g1", chip);
+				   IRQF_ONESHOT, "mv88e6xxx-g2", chip);
 	if (err)
 		goto out;
 
@@ -1106,6 +1107,38 @@ out:
 	return err;
 }
 
+int mv88e6xxx_g2_irq_mdio_setup(struct mv88e6xxx_chip *chip,
+				struct mii_bus *bus)
+{
+	int phy, irq, err, err_phy;
+
+	for (phy = 0; phy < chip->info->num_internal_phys; phy++) {
+		irq = irq_find_mapping(chip->g2_irq.domain, phy);
+		if (irq < 0) {
+			err = irq;
+			goto out;
+		}
+		bus->irq[chip->info->port_base_addr + phy] = irq;
+	}
+	return 0;
+out:
+	err_phy = phy;
+
+	for (phy = 0; phy < err_phy; phy++)
+		irq_dispose_mapping(bus->irq[phy]);
+
+	return err;
+}
+
+void mv88e6xxx_g2_irq_mdio_free(struct mv88e6xxx_chip *chip,
+				struct mii_bus *bus)
+{
+	int phy;
+
+	for (phy = 0; phy < chip->info->num_internal_phys; phy++)
+		irq_dispose_mapping(bus->irq[phy]);
+}
+
 int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip)
 {
 	u16 reg;
diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h
index 669f59017b12..520ec70d32e8 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.h
+++ b/drivers/net/dsa/mv88e6xxx/global2.h
@@ -149,7 +149,26 @@
 #define MV88E6390_G2_EEPROM_ADDR_MASK	0xffff
 
 /* Offset 0x16: AVB Command Register */
-#define MV88E6352_G2_AVB_CMD		0x16
+#define MV88E6352_G2_AVB_CMD			0x16
+#define MV88E6352_G2_AVB_CMD_BUSY		0x8000
+#define MV88E6352_G2_AVB_CMD_OP_READ		0x4000
+#define MV88E6352_G2_AVB_CMD_OP_READ_INCR	0x6000
+#define MV88E6352_G2_AVB_CMD_OP_WRITE		0x3000
+#define MV88E6390_G2_AVB_CMD_OP_READ		0x0000
+#define MV88E6390_G2_AVB_CMD_OP_READ_INCR	0x4000
+#define MV88E6390_G2_AVB_CMD_OP_WRITE		0x6000
+#define MV88E6352_G2_AVB_CMD_PORT_MASK		0x0f00
+#define MV88E6352_G2_AVB_CMD_PORT_TAIGLOBAL	0xe
+#define MV88E6352_G2_AVB_CMD_PORT_PTPGLOBAL	0xf
+#define MV88E6390_G2_AVB_CMD_PORT_MASK		0x1f00
+#define MV88E6390_G2_AVB_CMD_PORT_TAIGLOBAL	0x1e
+#define MV88E6390_G2_AVB_CMD_PORT_PTPGLOBAL	0x1f
+#define MV88E6352_G2_AVB_CMD_BLOCK_PTP		0
+#define MV88E6352_G2_AVB_CMD_BLOCK_AVB		1
+#define MV88E6352_G2_AVB_CMD_BLOCK_QAV		2
+#define MV88E6352_G2_AVB_CMD_BLOCK_QVB		3
+#define MV88E6352_G2_AVB_CMD_BLOCK_MASK		0x00e0
+#define MV88E6352_G2_AVB_CMD_ADDR_MASK		0x001f
 
 /* Offset 0x17: AVB Data Register */
 #define MV88E6352_G2_AVB_DATA		0x17
@@ -223,6 +242,40 @@
 #define MV88E6352_G2_NOEGR_POLICY	0x2000
 #define MV88E6390_G2_LAG_ID_4		0x2000
 
+/* Scratch/Misc registers accessed through MV88E6XXX_G2_SCRATCH_MISC */
+/* Offset 0x02: Misc Configuration */
+#define MV88E6352_G2_SCRATCH_MISC_CFG		0x02
+#define MV88E6352_G2_SCRATCH_MISC_CFG_NORMALSMI	0x80
+/* Offset 0x60-0x61: GPIO Configuration */
+#define MV88E6352_G2_SCRATCH_GPIO_CFG0		0x60
+#define MV88E6352_G2_SCRATCH_GPIO_CFG1		0x61
+/* Offset 0x62-0x63: GPIO Direction */
+#define MV88E6352_G2_SCRATCH_GPIO_DIR0		0x62
+#define MV88E6352_G2_SCRATCH_GPIO_DIR1		0x63
+#define MV88E6352_G2_SCRATCH_GPIO_DIR_OUT	0
+#define MV88E6352_G2_SCRATCH_GPIO_DIR_IN	1
+/* Offset 0x64-0x65: GPIO Data */
+#define MV88E6352_G2_SCRATCH_GPIO_DATA0		0x64
+#define MV88E6352_G2_SCRATCH_GPIO_DATA1		0x65
+/* Offset 0x68-0x6F: GPIO Pin Control */
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL0		0x68
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL1		0x69
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL2		0x6A
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL3		0x6B
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL4		0x6C
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL5		0x6D
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL6		0x6E
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL7		0x6F
+#define MV88E6352_G2_SCRATCH_CONFIG_DATA0	0x70
+#define MV88E6352_G2_SCRATCH_CONFIG_DATA1	0x71
+#define MV88E6352_G2_SCRATCH_CONFIG_DATA1_NO_CPU	BIT(2)
+#define MV88E6352_G2_SCRATCH_CONFIG_DATA2	0x72
+#define MV88E6352_G2_SCRATCH_CONFIG_DATA2_P0_MODE_MASK	0x3
+
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL_GPIO	0
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL_TRIG	1
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL_EVREQ	2
+
 #ifdef CONFIG_NET_DSA_MV88E6XXX_GLOBAL2
 
 static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip)
@@ -230,6 +283,11 @@ static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip)
 	return 0;
 }
 
+int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val);
+int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val);
+int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update);
+int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask);
+
 int mv88e6352_g2_irl_init_all(struct mv88e6xxx_chip *chip, int port);
 int mv88e6390_g2_irl_init_all(struct mv88e6xxx_chip *chip, int port);
 
@@ -259,6 +317,11 @@ int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip);
 int mv88e6xxx_g2_irq_setup(struct mv88e6xxx_chip *chip);
 void mv88e6xxx_g2_irq_free(struct mv88e6xxx_chip *chip);
 
+int mv88e6xxx_g2_irq_mdio_setup(struct mv88e6xxx_chip *chip,
+				struct mii_bus *bus);
+void mv88e6xxx_g2_irq_mdio_free(struct mv88e6xxx_chip *chip,
+				struct mii_bus *bus);
+
 int mv88e6185_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip);
 int mv88e6352_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip);
 
@@ -267,6 +330,14 @@ int mv88e6xxx_g2_pot_clear(struct mv88e6xxx_chip *chip);
 extern const struct mv88e6xxx_irq_ops mv88e6097_watchdog_ops;
 extern const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops;
 
+extern const struct mv88e6xxx_avb_ops mv88e6352_avb_ops;
+extern const struct mv88e6xxx_avb_ops mv88e6390_avb_ops;
+
+extern const struct mv88e6xxx_gpio_ops mv88e6352_gpio_ops;
+
+int mv88e6xxx_g2_scratch_gpio_set_smi(struct mv88e6xxx_chip *chip,
+				      bool external);
+
 #else /* !CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */
 
 static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip)
@@ -279,6 +350,26 @@ static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip)
 	return 0;
 }
 
+static inline int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline int mv88e6352_g2_irl_init_all(struct mv88e6xxx_chip *chip,
 					    int port)
 {
@@ -364,6 +455,17 @@ static inline void mv88e6xxx_g2_irq_free(struct mv88e6xxx_chip *chip)
 {
 }
 
+static inline int mv88e6xxx_g2_irq_mdio_setup(struct mv88e6xxx_chip *chip,
+					      struct mii_bus *bus)
+{
+	return 0;
+}
+
+static inline void mv88e6xxx_g2_irq_mdio_free(struct mv88e6xxx_chip *chip,
+					      struct mii_bus *bus)
+{
+}
+
 static inline int mv88e6185_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip)
 {
 	return -EOPNOTSUPP;
@@ -382,6 +484,17 @@ static inline int mv88e6xxx_g2_pot_clear(struct mv88e6xxx_chip *chip)
 static const struct mv88e6xxx_irq_ops mv88e6097_watchdog_ops = {};
 static const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops = {};
 
+static const struct mv88e6xxx_avb_ops mv88e6352_avb_ops = {};
+static const struct mv88e6xxx_avb_ops mv88e6390_avb_ops = {};
+
+static const struct mv88e6xxx_gpio_ops mv88e6352_gpio_ops = {};
+
+static inline int mv88e6xxx_g2_scratch_gpio_set_smi(struct mv88e6xxx_chip *chip,
+						    bool external)
+{
+	return -EOPNOTSUPP;
+}
+
 #endif /* CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */
 
 #endif /* _MV88E6XXX_GLOBAL2_H */
diff --git a/drivers/net/dsa/mv88e6xxx/global2_avb.c b/drivers/net/dsa/mv88e6xxx/global2_avb.c
new file mode 100644
index 000000000000..2e398ccb88ca
--- /dev/null
+++ b/drivers/net/dsa/mv88e6xxx/global2_avb.c
@@ -0,0 +1,193 @@
+/*
+ * Marvell 88E6xxx Switch Global 2 Registers support
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * Copyright (c) 2016-2017 Savoir-faire Linux Inc.
+ *	Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+ *
+ * Copyright (c) 2017 National Instruments
+ *	Brandon Streiff <brandon.streiff@ni.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include "global2.h"
+
+/* Offset 0x16: AVB Command Register
+ * Offset 0x17: AVB Data Register
+ *
+ * There are two different versions of this register interface:
+ *    "6352": 3-bit "op" field, 4-bit "port" field.
+ *    "6390": 2-bit "op" field, 5-bit "port" field.
+ *
+ * The "op" codes are different between the two, as well as the special
+ * port fields for global PTP and TAI configuration.
+ */
+
+/* mv88e6xxx_g2_avb_read -- Read one or multiple 16-bit words.
+ * The hardware supports snapshotting up to four contiguous registers.
+ */
+static int mv88e6xxx_g2_avb_read(struct mv88e6xxx_chip *chip, u16 readop,
+				 u16 *data, int len)
+{
+	int err;
+	int i;
+
+	/* Hardware can only snapshot four words. */
+	if (len > 4)
+		return -E2BIG;
+
+	err = mv88e6xxx_g2_update(chip, MV88E6352_G2_AVB_CMD, readop);
+	if (err)
+		return err;
+
+	for (i = 0; i < len; ++i) {
+		err = mv88e6xxx_g2_read(chip, MV88E6352_G2_AVB_DATA,
+					&data[i]);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+/* mv88e6xxx_g2_avb_write -- Write one 16-bit word. */
+static int mv88e6xxx_g2_avb_write(struct mv88e6xxx_chip *chip, u16 writeop,
+				  u16 data)
+{
+	int err;
+
+	err = mv88e6xxx_g2_write(chip, MV88E6352_G2_AVB_DATA, data);
+	if (err)
+		return err;
+
+	return mv88e6xxx_g2_update(chip, MV88E6352_G2_AVB_CMD, writeop);
+}
+
+static int mv88e6352_g2_avb_port_ptp_read(struct mv88e6xxx_chip *chip,
+					  int port, int addr, u16 *data,
+					  int len)
+{
+	u16 readop = (len == 1 ? MV88E6352_G2_AVB_CMD_OP_READ :
+				 MV88E6352_G2_AVB_CMD_OP_READ_INCR) |
+		     (port << 8) | (MV88E6352_G2_AVB_CMD_BLOCK_PTP << 5) |
+		     addr;
+
+	return mv88e6xxx_g2_avb_read(chip, readop, data, len);
+}
+
+static int mv88e6352_g2_avb_port_ptp_write(struct mv88e6xxx_chip *chip,
+					   int port, int addr, u16 data)
+{
+	u16 writeop = MV88E6352_G2_AVB_CMD_OP_WRITE | (port << 8) |
+		      (MV88E6352_G2_AVB_CMD_BLOCK_PTP << 5) | addr;
+
+	return mv88e6xxx_g2_avb_write(chip, writeop, data);
+}
+
+static int mv88e6352_g2_avb_ptp_read(struct mv88e6xxx_chip *chip, int addr,
+				     u16 *data, int len)
+{
+	return mv88e6352_g2_avb_port_ptp_read(chip,
+					MV88E6352_G2_AVB_CMD_PORT_PTPGLOBAL,
+					addr, data, len);
+}
+
+static int mv88e6352_g2_avb_ptp_write(struct mv88e6xxx_chip *chip, int addr,
+				      u16 data)
+{
+	return mv88e6352_g2_avb_port_ptp_write(chip,
+					MV88E6352_G2_AVB_CMD_PORT_PTPGLOBAL,
+					addr, data);
+}
+
+static int mv88e6352_g2_avb_tai_read(struct mv88e6xxx_chip *chip, int addr,
+				     u16 *data, int len)
+{
+	return mv88e6352_g2_avb_port_ptp_read(chip,
+					MV88E6352_G2_AVB_CMD_PORT_TAIGLOBAL,
+					addr, data, len);
+}
+
+static int mv88e6352_g2_avb_tai_write(struct mv88e6xxx_chip *chip, int addr,
+				      u16 data)
+{
+	return mv88e6352_g2_avb_port_ptp_write(chip,
+					MV88E6352_G2_AVB_CMD_PORT_TAIGLOBAL,
+					addr, data);
+}
+
+const struct mv88e6xxx_avb_ops mv88e6352_avb_ops = {
+	.port_ptp_read		= mv88e6352_g2_avb_port_ptp_read,
+	.port_ptp_write		= mv88e6352_g2_avb_port_ptp_write,
+	.ptp_read		= mv88e6352_g2_avb_ptp_read,
+	.ptp_write		= mv88e6352_g2_avb_ptp_write,
+	.tai_read		= mv88e6352_g2_avb_tai_read,
+	.tai_write		= mv88e6352_g2_avb_tai_write,
+};
+
+static int mv88e6390_g2_avb_port_ptp_read(struct mv88e6xxx_chip *chip,
+					  int port, int addr, u16 *data,
+					  int len)
+{
+	u16 readop = (len == 1 ? MV88E6390_G2_AVB_CMD_OP_READ :
+				 MV88E6390_G2_AVB_CMD_OP_READ_INCR) |
+		     (port << 8) | (MV88E6352_G2_AVB_CMD_BLOCK_PTP << 5) |
+		     addr;
+
+	return mv88e6xxx_g2_avb_read(chip, readop, data, len);
+}
+
+static int mv88e6390_g2_avb_port_ptp_write(struct mv88e6xxx_chip *chip,
+					   int port, int addr, u16 data)
+{
+	u16 writeop = MV88E6390_G2_AVB_CMD_OP_WRITE | (port << 8) |
+		      (MV88E6352_G2_AVB_CMD_BLOCK_PTP << 5) | addr;
+
+	return mv88e6xxx_g2_avb_write(chip, writeop, data);
+}
+
+static int mv88e6390_g2_avb_ptp_read(struct mv88e6xxx_chip *chip, int addr,
+				     u16 *data, int len)
+{
+	return mv88e6390_g2_avb_port_ptp_read(chip,
+					MV88E6390_G2_AVB_CMD_PORT_PTPGLOBAL,
+					addr, data, len);
+}
+
+static int mv88e6390_g2_avb_ptp_write(struct mv88e6xxx_chip *chip, int addr,
+				      u16 data)
+{
+	return mv88e6390_g2_avb_port_ptp_write(chip,
+					MV88E6390_G2_AVB_CMD_PORT_PTPGLOBAL,
+					addr, data);
+}
+
+static int mv88e6390_g2_avb_tai_read(struct mv88e6xxx_chip *chip, int addr,
+				     u16 *data, int len)
+{
+	return mv88e6390_g2_avb_port_ptp_read(chip,
+					MV88E6390_G2_AVB_CMD_PORT_TAIGLOBAL,
+					addr, data, len);
+}
+
+static int mv88e6390_g2_avb_tai_write(struct mv88e6xxx_chip *chip, int addr,
+				      u16 data)
+{
+	return mv88e6390_g2_avb_port_ptp_write(chip,
+					MV88E6390_G2_AVB_CMD_PORT_TAIGLOBAL,
+					addr, data);
+}
+
+const struct mv88e6xxx_avb_ops mv88e6390_avb_ops = {
+	.port_ptp_read		= mv88e6390_g2_avb_port_ptp_read,
+	.port_ptp_write		= mv88e6390_g2_avb_port_ptp_write,
+	.ptp_read		= mv88e6390_g2_avb_ptp_read,
+	.ptp_write		= mv88e6390_g2_avb_ptp_write,
+	.tai_read		= mv88e6390_g2_avb_tai_read,
+	.tai_write		= mv88e6390_g2_avb_tai_write,
+};
diff --git a/drivers/net/dsa/mv88e6xxx/global2_scratch.c b/drivers/net/dsa/mv88e6xxx/global2_scratch.c
new file mode 100644
index 000000000000..3f92b8892dc7
--- /dev/null
+++ b/drivers/net/dsa/mv88e6xxx/global2_scratch.c
@@ -0,0 +1,291 @@
+/*
+ * Marvell 88E6xxx Switch Global 2 Scratch & Misc Registers support
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * Copyright (c) 2017 National Instruments
+ *      Brandon Streiff <brandon.streiff@ni.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include "chip.h"
+#include "global2.h"
+
+/* Offset 0x1A: Scratch and Misc. Register */
+static int mv88e6xxx_g2_scratch_read(struct mv88e6xxx_chip *chip, int reg,
+				     u8 *data)
+{
+	u16 value;
+	int err;
+
+	err = mv88e6xxx_g2_write(chip, MV88E6XXX_G2_SCRATCH_MISC_MISC,
+				 reg << 8);
+	if (err)
+		return err;
+
+	err = mv88e6xxx_g2_read(chip, MV88E6XXX_G2_SCRATCH_MISC_MISC, &value);
+	if (err)
+		return err;
+
+	*data = (value & MV88E6XXX_G2_SCRATCH_MISC_DATA_MASK);
+
+	return 0;
+}
+
+static int mv88e6xxx_g2_scratch_write(struct mv88e6xxx_chip *chip, int reg,
+				      u8 data)
+{
+	u16 value = (reg << 8) | data;
+
+	return mv88e6xxx_g2_update(chip, MV88E6XXX_G2_SCRATCH_MISC_MISC, value);
+}
+
+/**
+ * mv88e6xxx_g2_scratch_gpio_get_bit - get a bit
+ * @chip: chip private data
+ * @nr: bit index
+ * @set: is bit set?
+ */
+static int mv88e6xxx_g2_scratch_get_bit(struct mv88e6xxx_chip *chip,
+					int base_reg, unsigned int offset,
+					int *set)
+{
+	int reg = base_reg + (offset / 8);
+	u8 mask = (1 << (offset & 0x7));
+	u8 val;
+	int err;
+
+	err = mv88e6xxx_g2_scratch_read(chip, reg, &val);
+	if (err)
+		return err;
+
+	*set = !!(mask & val);
+
+	return 0;
+}
+
+/**
+ * mv88e6xxx_g2_scratch_gpio_set_bit - set (or clear) a bit
+ * @chip: chip private data
+ * @nr: bit index
+ * @set: set if true, clear if false
+ *
+ * Helper function for dealing with the direction and data registers.
+ */
+static int mv88e6xxx_g2_scratch_set_bit(struct mv88e6xxx_chip *chip,
+					int base_reg, unsigned int offset,
+					int set)
+{
+	int reg = base_reg + (offset / 8);
+	u8 mask = (1 << (offset & 0x7));
+	u8 val;
+	int err;
+
+	err = mv88e6xxx_g2_scratch_read(chip, reg, &val);
+	if (err)
+		return err;
+
+	if (set)
+		val |= mask;
+	else
+		val &= ~mask;
+
+	return mv88e6xxx_g2_scratch_write(chip, reg, val);
+}
+
+/**
+ * mv88e6352_g2_scratch_gpio_get_data - get data on gpio pin
+ * @chip: chip private data
+ * @pin: gpio index
+ *
+ * Return: 0 for low, 1 for high, negative error
+ */
+static int mv88e6352_g2_scratch_gpio_get_data(struct mv88e6xxx_chip *chip,
+					      unsigned int pin)
+{
+	int val = 0;
+	int err;
+
+	err = mv88e6xxx_g2_scratch_get_bit(chip,
+					   MV88E6352_G2_SCRATCH_GPIO_DATA0,
+					   pin, &val);
+	if (err)
+		return err;
+
+	return val;
+}
+
+/**
+ * mv88e6352_g2_scratch_gpio_set_data - set data on gpio pin
+ * @chip: chip private data
+ * @pin: gpio index
+ * @value: value to set
+ */
+static int mv88e6352_g2_scratch_gpio_set_data(struct mv88e6xxx_chip *chip,
+					      unsigned int pin, int value)
+{
+	u8 mask = (1 << (pin & 0x7));
+	int offset = (pin / 8);
+	int reg;
+
+	reg = MV88E6352_G2_SCRATCH_GPIO_DATA0 + offset;
+
+	if (value)
+		chip->gpio_data[offset] |= mask;
+	else
+		chip->gpio_data[offset] &= ~mask;
+
+	return mv88e6xxx_g2_scratch_write(chip, reg, chip->gpio_data[offset]);
+}
+
+/**
+ * mv88e6352_g2_scratch_gpio_get_dir - get direction of gpio pin
+ * @chip: chip private data
+ * @pin: gpio index
+ *
+ * Return: 0 for output, 1 for input (same as GPIOF_DIR_XXX).
+ */
+static int mv88e6352_g2_scratch_gpio_get_dir(struct mv88e6xxx_chip *chip,
+					     unsigned int pin)
+{
+	int val = 0;
+	int err;
+
+	err = mv88e6xxx_g2_scratch_get_bit(chip,
+					   MV88E6352_G2_SCRATCH_GPIO_DIR0,
+					   pin, &val);
+	if (err)
+		return err;
+
+	return val;
+}
+
+/**
+ * mv88e6352_g2_scratch_gpio_set_dir - set direction of gpio pin
+ * @chip: chip private data
+ * @pin: gpio index
+ */
+static int mv88e6352_g2_scratch_gpio_set_dir(struct mv88e6xxx_chip *chip,
+					     unsigned int pin, bool input)
+{
+	int value = (input ? MV88E6352_G2_SCRATCH_GPIO_DIR_IN :
+			     MV88E6352_G2_SCRATCH_GPIO_DIR_OUT);
+
+	return mv88e6xxx_g2_scratch_set_bit(chip,
+					    MV88E6352_G2_SCRATCH_GPIO_DIR0,
+					    pin, value);
+}
+
+/**
+ * mv88e6352_g2_scratch_gpio_get_pctl - get pin control setting
+ * @chip: chip private data
+ * @pin: gpio index
+ * @func: function number
+ *
+ * Note that the function numbers themselves may vary by chipset.
+ */
+static int mv88e6352_g2_scratch_gpio_get_pctl(struct mv88e6xxx_chip *chip,
+					      unsigned int pin, int *func)
+{
+	int reg = MV88E6352_G2_SCRATCH_GPIO_PCTL0 + (pin / 2);
+	int offset = (pin & 0x1) ? 4 : 0;
+	u8 mask = (0x7 << offset);
+	int err;
+	u8 val;
+
+	err = mv88e6xxx_g2_scratch_read(chip, reg, &val);
+	if (err)
+		return err;
+
+	*func = (val & mask) >> offset;
+
+	return 0;
+}
+
+/**
+ * mv88e6352_g2_scratch_gpio_set_pctl - set pin control setting
+ * @chip: chip private data
+ * @pin: gpio index
+ * @func: function number
+ */
+static int mv88e6352_g2_scratch_gpio_set_pctl(struct mv88e6xxx_chip *chip,
+					      unsigned int pin, int func)
+{
+	int reg = MV88E6352_G2_SCRATCH_GPIO_PCTL0 + (pin / 2);
+	int offset = (pin & 0x1) ? 4 : 0;
+	u8 mask = (0x7 << offset);
+	int err;
+	u8 val;
+
+	err = mv88e6xxx_g2_scratch_read(chip, reg, &val);
+	if (err)
+		return err;
+
+	val = (val & ~mask) | ((func & mask) << offset);
+
+	return mv88e6xxx_g2_scratch_write(chip, reg, val);
+}
+
+const struct mv88e6xxx_gpio_ops mv88e6352_gpio_ops = {
+	.get_data = mv88e6352_g2_scratch_gpio_get_data,
+	.set_data = mv88e6352_g2_scratch_gpio_set_data,
+	.get_dir = mv88e6352_g2_scratch_gpio_get_dir,
+	.set_dir = mv88e6352_g2_scratch_gpio_set_dir,
+	.get_pctl = mv88e6352_g2_scratch_gpio_get_pctl,
+	.set_pctl = mv88e6352_g2_scratch_gpio_set_pctl,
+};
+
+/**
+ * mv88e6xxx_g2_gpio_set_smi - set gpio muxing for external smi
+ * @chip: chip private data
+ * @external: set mux for external smi, or free for gpio usage
+ *
+ * Some mv88e6xxx models have GPIO pins that may be configured as
+ * an external SMI interface, or they may be made free for other
+ * GPIO uses.
+ */
+int mv88e6xxx_g2_scratch_gpio_set_smi(struct mv88e6xxx_chip *chip,
+				      bool external)
+{
+	int misc_cfg = MV88E6352_G2_SCRATCH_MISC_CFG;
+	int config_data1 = MV88E6352_G2_SCRATCH_CONFIG_DATA1;
+	int config_data2 = MV88E6352_G2_SCRATCH_CONFIG_DATA2;
+	bool no_cpu;
+	u8 p0_mode;
+	int err;
+	u8 val;
+
+	err = mv88e6xxx_g2_scratch_read(chip, config_data2, &val);
+	if (err)
+		return err;
+
+	p0_mode = val & MV88E6352_G2_SCRATCH_CONFIG_DATA2_P0_MODE_MASK;
+
+	if (p0_mode == 0x01 || p0_mode == 0x02)
+		return -EBUSY;
+
+	err = mv88e6xxx_g2_scratch_read(chip, config_data1, &val);
+	if (err)
+		return err;
+
+	no_cpu = !!(val & MV88E6352_G2_SCRATCH_CONFIG_DATA1_NO_CPU);
+
+	err = mv88e6xxx_g2_scratch_read(chip, misc_cfg, &val);
+	if (err)
+		return err;
+
+	/* NO_CPU being 0 inverts the meaning of the bit */
+	if (!no_cpu)
+		external = !external;
+
+	if (external)
+		val |= MV88E6352_G2_SCRATCH_MISC_CFG_NORMALSMI;
+	else
+		val &= ~MV88E6352_G2_SCRATCH_MISC_CFG_NORMALSMI;
+
+	return mv88e6xxx_g2_scratch_write(chip, misc_cfg, val);
+}
diff --git a/drivers/net/dsa/mv88e6xxx/hwtstamp.c b/drivers/net/dsa/mv88e6xxx/hwtstamp.c
new file mode 100644
index 000000000000..ac7694c71266
--- /dev/null
+++ b/drivers/net/dsa/mv88e6xxx/hwtstamp.c
@@ -0,0 +1,576 @@
+/*
+ * Marvell 88E6xxx Switch hardware timestamping support
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * Copyright (c) 2017 National Instruments
+ *      Erik Hons <erik.hons@ni.com>
+ *      Brandon Streiff <brandon.streiff@ni.com>
+ *      Dane Wagner <dane.wagner@ni.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include "chip.h"
+#include "global2.h"
+#include "hwtstamp.h"
+#include "ptp.h"
+#include <linux/ptp_classify.h>
+
+#define SKB_PTP_TYPE(__skb) (*(unsigned int *)((__skb)->cb))
+
+static int mv88e6xxx_port_ptp_read(struct mv88e6xxx_chip *chip, int port,
+				   int addr, u16 *data, int len)
+{
+	if (!chip->info->ops->avb_ops->port_ptp_read)
+		return -EOPNOTSUPP;
+
+	return chip->info->ops->avb_ops->port_ptp_read(chip, port, addr,
+						       data, len);
+}
+
+static int mv88e6xxx_port_ptp_write(struct mv88e6xxx_chip *chip, int port,
+				    int addr, u16 data)
+{
+	if (!chip->info->ops->avb_ops->port_ptp_write)
+		return -EOPNOTSUPP;
+
+	return chip->info->ops->avb_ops->port_ptp_write(chip, port, addr,
+							data);
+}
+
+static int mv88e6xxx_ptp_write(struct mv88e6xxx_chip *chip, int addr,
+			       u16 data)
+{
+	if (!chip->info->ops->avb_ops->ptp_write)
+		return -EOPNOTSUPP;
+
+	return chip->info->ops->avb_ops->ptp_write(chip, addr, data);
+}
+
+/* TX_TSTAMP_TIMEOUT: This limits the time spent polling for a TX
+ * timestamp. When working properly, hardware will produce a timestamp
+ * within 1ms. Software may enounter delays due to MDIO contention, so
+ * the timeout is set accordingly.
+ */
+#define TX_TSTAMP_TIMEOUT	msecs_to_jiffies(20)
+
+int mv88e6xxx_get_ts_info(struct dsa_switch *ds, int port,
+			  struct ethtool_ts_info *info)
+{
+	struct mv88e6xxx_chip *chip = ds->priv;
+
+	if (!chip->info->ptp_support)
+		return -EOPNOTSUPP;
+
+	info->so_timestamping =
+		SOF_TIMESTAMPING_TX_HARDWARE |
+		SOF_TIMESTAMPING_RX_HARDWARE |
+		SOF_TIMESTAMPING_RAW_HARDWARE;
+	info->phc_index = ptp_clock_index(chip->ptp_clock);
+	info->tx_types =
+		(1 << HWTSTAMP_TX_OFF) |
+		(1 << HWTSTAMP_TX_ON);
+	info->rx_filters =
+		(1 << HWTSTAMP_FILTER_NONE) |
+		(1 << HWTSTAMP_FILTER_PTP_V2_L4_EVENT) |
+		(1 << HWTSTAMP_FILTER_PTP_V2_L4_SYNC) |
+		(1 << HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ) |
+		(1 << HWTSTAMP_FILTER_PTP_V2_L2_EVENT) |
+		(1 << HWTSTAMP_FILTER_PTP_V2_L2_SYNC) |
+		(1 << HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ) |
+		(1 << HWTSTAMP_FILTER_PTP_V2_EVENT) |
+		(1 << HWTSTAMP_FILTER_PTP_V2_SYNC) |
+		(1 << HWTSTAMP_FILTER_PTP_V2_DELAY_REQ);
+
+	return 0;
+}
+
+static int mv88e6xxx_set_hwtstamp_config(struct mv88e6xxx_chip *chip, int port,
+					 struct hwtstamp_config *config)
+{
+	struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port];
+	bool tstamp_enable = false;
+	u16 port_config0;
+	int err;
+
+	/* Prevent the TX/RX paths from trying to interact with the
+	 * timestamp hardware while we reconfigure it.
+	 */
+	clear_bit_unlock(MV88E6XXX_HWTSTAMP_ENABLED, &ps->state);
+
+	/* reserved for future extensions */
+	if (config->flags)
+		return -EINVAL;
+
+	switch (config->tx_type) {
+	case HWTSTAMP_TX_OFF:
+		tstamp_enable = false;
+		break;
+	case HWTSTAMP_TX_ON:
+		tstamp_enable = true;
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	/* The switch supports timestamping both L2 and L4; one cannot be
+	 * disabled independently of the other.
+	 */
+	switch (config->rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+		tstamp_enable = false;
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+		config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+		break;
+	case HWTSTAMP_FILTER_ALL:
+	default:
+		config->rx_filter = HWTSTAMP_FILTER_NONE;
+		return -ERANGE;
+	}
+
+	if (tstamp_enable) {
+		/* Disable transportSpecific value matching, so that packets
+		 * with either 1588 (0) and 802.1AS (1) will be timestamped.
+		 */
+		port_config0 = MV88E6XXX_PORT_PTP_CFG0_DISABLE_TSPEC_MATCH;
+	} else {
+		/* Disable PTP. This disables both RX and TX timestamping. */
+		port_config0 = MV88E6XXX_PORT_PTP_CFG0_DISABLE_PTP;
+	}
+
+	mutex_lock(&chip->reg_lock);
+	err = mv88e6xxx_port_ptp_write(chip, port, MV88E6XXX_PORT_PTP_CFG0,
+				       port_config0);
+	mutex_unlock(&chip->reg_lock);
+
+	if (err < 0)
+		return err;
+
+	/* Once hardware has been configured, enable timestamp checks
+	 * in the RX/TX paths.
+	 */
+	if (tstamp_enable)
+		set_bit(MV88E6XXX_HWTSTAMP_ENABLED, &ps->state);
+
+	return 0;
+}
+
+int mv88e6xxx_port_hwtstamp_set(struct dsa_switch *ds, int port,
+				struct ifreq *ifr)
+{
+	struct mv88e6xxx_chip *chip = ds->priv;
+	struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port];
+	struct hwtstamp_config config;
+	int err;
+
+	if (!chip->info->ptp_support)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+		return -EFAULT;
+
+	err = mv88e6xxx_set_hwtstamp_config(chip, port, &config);
+	if (err)
+		return err;
+
+	/* Save the chosen configuration to be returned later. */
+	memcpy(&ps->tstamp_config, &config, sizeof(config));
+
+	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
+		-EFAULT : 0;
+}
+
+int mv88e6xxx_port_hwtstamp_get(struct dsa_switch *ds, int port,
+				struct ifreq *ifr)
+{
+	struct mv88e6xxx_chip *chip = ds->priv;
+	struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port];
+	struct hwtstamp_config *config = &ps->tstamp_config;
+
+	if (!chip->info->ptp_support)
+		return -EOPNOTSUPP;
+
+	return copy_to_user(ifr->ifr_data, config, sizeof(*config)) ?
+		-EFAULT : 0;
+}
+
+/* Get the start of the PTP header in this skb */
+static u8 *parse_ptp_header(struct sk_buff *skb, unsigned int type)
+{
+	u8 *data = skb_mac_header(skb);
+	unsigned int offset = 0;
+
+	if (type & PTP_CLASS_VLAN)
+		offset += VLAN_HLEN;
+
+	switch (type & PTP_CLASS_PMASK) {
+	case PTP_CLASS_IPV4:
+		offset += ETH_HLEN + IPV4_HLEN(data + offset) + UDP_HLEN;
+		break;
+	case PTP_CLASS_IPV6:
+		offset += ETH_HLEN + IP6_HLEN + UDP_HLEN;
+		break;
+	case PTP_CLASS_L2:
+		offset += ETH_HLEN;
+		break;
+	default:
+		return NULL;
+	}
+
+	/* Ensure that the entire header is present in this packet. */
+	if (skb->len + ETH_HLEN < offset + 34)
+		return NULL;
+
+	return data + offset;
+}
+
+/* Returns a pointer to the PTP header if the caller should time stamp,
+ * or NULL if the caller should not.
+ */
+static u8 *mv88e6xxx_should_tstamp(struct mv88e6xxx_chip *chip, int port,
+				   struct sk_buff *skb, unsigned int type)
+{
+	struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port];
+	u8 *hdr;
+
+	if (!chip->info->ptp_support)
+		return NULL;
+
+	hdr = parse_ptp_header(skb, type);
+	if (!hdr)
+		return NULL;
+
+	if (!test_bit(MV88E6XXX_HWTSTAMP_ENABLED, &ps->state))
+		return NULL;
+
+	return hdr;
+}
+
+static int mv88e6xxx_ts_valid(u16 status)
+{
+	if (!(status & MV88E6XXX_PTP_TS_VALID))
+		return 0;
+	if (status & MV88E6XXX_PTP_TS_STATUS_MASK)
+		return 0;
+	return 1;
+}
+
+static int seq_match(struct sk_buff *skb, u16 ts_seqid)
+{
+	unsigned int type = SKB_PTP_TYPE(skb);
+	u8 *hdr = parse_ptp_header(skb, type);
+	__be16 *seqid;
+
+	seqid = (__be16 *)(hdr + OFF_PTP_SEQUENCE_ID);
+
+	return ts_seqid == ntohs(*seqid);
+}
+
+static void mv88e6xxx_get_rxts(struct mv88e6xxx_chip *chip,
+			       struct mv88e6xxx_port_hwtstamp *ps,
+			       struct sk_buff *skb, u16 reg,
+			       struct sk_buff_head *rxq)
+{
+	u16 buf[4] = { 0 }, status, seq_id;
+	u64 ns, timelo, timehi;
+	struct skb_shared_hwtstamps *shwt;
+	int err;
+
+	mutex_lock(&chip->reg_lock);
+	err = mv88e6xxx_port_ptp_read(chip, ps->port_id,
+				      reg, buf, ARRAY_SIZE(buf));
+	mutex_unlock(&chip->reg_lock);
+	if (err)
+		pr_err("failed to get the receive time stamp\n");
+
+	status = buf[0];
+	timelo = buf[1];
+	timehi = buf[2];
+	seq_id = buf[3];
+
+	if (status & MV88E6XXX_PTP_TS_VALID) {
+		mutex_lock(&chip->reg_lock);
+		err = mv88e6xxx_port_ptp_write(chip, ps->port_id, reg, 0);
+		mutex_unlock(&chip->reg_lock);
+		if (err)
+			pr_err("failed to clear the receive status\n");
+	}
+	/* Since the device can only handle one time stamp at a time,
+	 * we purge any extra frames from the queue.
+	 */
+	for ( ; skb; skb = skb_dequeue(rxq)) {
+		if (mv88e6xxx_ts_valid(status) && seq_match(skb, seq_id)) {
+			ns = timehi << 16 | timelo;
+
+			mutex_lock(&chip->reg_lock);
+			ns = timecounter_cyc2time(&chip->tstamp_tc, ns);
+			mutex_unlock(&chip->reg_lock);
+			shwt = skb_hwtstamps(skb);
+			memset(shwt, 0, sizeof(*shwt));
+			shwt->hwtstamp = ns_to_ktime(ns);
+			status &= ~MV88E6XXX_PTP_TS_VALID;
+		}
+		netif_rx_ni(skb);
+	}
+}
+
+static void mv88e6xxx_rxtstamp_work(struct mv88e6xxx_chip *chip,
+				    struct mv88e6xxx_port_hwtstamp *ps)
+{
+	struct sk_buff *skb;
+
+	skb = skb_dequeue(&ps->rx_queue);
+
+	if (skb)
+		mv88e6xxx_get_rxts(chip, ps, skb, MV88E6XXX_PORT_PTP_ARR0_STS,
+				   &ps->rx_queue);
+
+	skb = skb_dequeue(&ps->rx_queue2);
+	if (skb)
+		mv88e6xxx_get_rxts(chip, ps, skb, MV88E6XXX_PORT_PTP_ARR1_STS,
+				   &ps->rx_queue2);
+}
+
+static int is_pdelay_resp(u8 *msgtype)
+{
+	return (*msgtype & 0xf) == 3;
+}
+
+bool mv88e6xxx_port_rxtstamp(struct dsa_switch *ds, int port,
+			     struct sk_buff *skb, unsigned int type)
+{
+	struct mv88e6xxx_port_hwtstamp *ps;
+	struct mv88e6xxx_chip *chip;
+	u8 *hdr;
+
+	chip = ds->priv;
+	ps = &chip->port_hwtstamp[port];
+
+	if (ps->tstamp_config.rx_filter != HWTSTAMP_FILTER_PTP_V2_EVENT)
+		return false;
+
+	hdr = mv88e6xxx_should_tstamp(chip, port, skb, type);
+	if (!hdr)
+		return false;
+
+	SKB_PTP_TYPE(skb) = type;
+
+	if (is_pdelay_resp(hdr))
+		skb_queue_tail(&ps->rx_queue2, skb);
+	else
+		skb_queue_tail(&ps->rx_queue, skb);
+
+	ptp_schedule_worker(chip->ptp_clock, 0);
+
+	return true;
+}
+
+static int mv88e6xxx_txtstamp_work(struct mv88e6xxx_chip *chip,
+				   struct mv88e6xxx_port_hwtstamp *ps)
+{
+	struct skb_shared_hwtstamps shhwtstamps;
+	u16 departure_block[4], status;
+	struct sk_buff *tmp_skb;
+	u32 time_raw;
+	int err;
+	u64 ns;
+
+	if (!ps->tx_skb)
+		return 0;
+
+	mutex_lock(&chip->reg_lock);
+	err = mv88e6xxx_port_ptp_read(chip, ps->port_id,
+				      MV88E6XXX_PORT_PTP_DEP_STS,
+				      departure_block,
+				      ARRAY_SIZE(departure_block));
+	mutex_unlock(&chip->reg_lock);
+
+	if (err)
+		goto free_and_clear_skb;
+
+	if (!(departure_block[0] & MV88E6XXX_PTP_TS_VALID)) {
+		if (time_is_before_jiffies(ps->tx_tstamp_start +
+					   TX_TSTAMP_TIMEOUT)) {
+			dev_warn(chip->dev, "p%d: clearing tx timestamp hang\n",
+				 ps->port_id);
+			goto free_and_clear_skb;
+		}
+		/* The timestamp should be available quickly, while getting it
+		 * is high priority and time bounded to only 10ms. A poll is
+		 * warranted so restart the work.
+		 */
+		return 1;
+	}
+
+	/* We have the timestamp; go ahead and clear valid now */
+	mutex_lock(&chip->reg_lock);
+	mv88e6xxx_port_ptp_write(chip, ps->port_id,
+				 MV88E6XXX_PORT_PTP_DEP_STS, 0);
+	mutex_unlock(&chip->reg_lock);
+
+	status = departure_block[0] & MV88E6XXX_PTP_TS_STATUS_MASK;
+	if (status != MV88E6XXX_PTP_TS_STATUS_NORMAL) {
+		dev_warn(chip->dev, "p%d: tx timestamp overrun\n", ps->port_id);
+		goto free_and_clear_skb;
+	}
+
+	if (departure_block[3] != ps->tx_seq_id) {
+		dev_warn(chip->dev, "p%d: unexpected seq. id\n", ps->port_id);
+		goto free_and_clear_skb;
+	}
+
+	memset(&shhwtstamps, 0, sizeof(shhwtstamps));
+	time_raw = ((u32)departure_block[2] << 16) | departure_block[1];
+	mutex_lock(&chip->reg_lock);
+	ns = timecounter_cyc2time(&chip->tstamp_tc, time_raw);
+	mutex_unlock(&chip->reg_lock);
+	shhwtstamps.hwtstamp = ns_to_ktime(ns);
+
+	dev_dbg(chip->dev,
+		"p%d: txtstamp %llx status 0x%04x skb ID 0x%04x hw ID 0x%04x\n",
+		ps->port_id, ktime_to_ns(shhwtstamps.hwtstamp),
+		departure_block[0], ps->tx_seq_id, departure_block[3]);
+
+	/* skb_complete_tx_timestamp() will free up the client to make
+	 * another timestamp-able transmit. We have to be ready for it
+	 * -- by clearing the ps->tx_skb "flag" -- beforehand.
+	 */
+
+	tmp_skb = ps->tx_skb;
+	ps->tx_skb = NULL;
+	clear_bit_unlock(MV88E6XXX_HWTSTAMP_TX_IN_PROGRESS, &ps->state);
+	skb_complete_tx_timestamp(tmp_skb, &shhwtstamps);
+
+	return 0;
+
+free_and_clear_skb:
+	dev_kfree_skb_any(ps->tx_skb);
+	ps->tx_skb = NULL;
+	clear_bit_unlock(MV88E6XXX_HWTSTAMP_TX_IN_PROGRESS, &ps->state);
+
+	return 0;
+}
+
+long mv88e6xxx_hwtstamp_work(struct ptp_clock_info *ptp)
+{
+	struct mv88e6xxx_chip *chip = ptp_to_chip(ptp);
+	struct dsa_switch *ds = chip->ds;
+	struct mv88e6xxx_port_hwtstamp *ps;
+	int i, restart = 0;
+
+	for (i = 0; i < ds->num_ports; i++) {
+		if (!dsa_is_user_port(ds, i))
+			continue;
+
+		ps = &chip->port_hwtstamp[i];
+		if (test_bit(MV88E6XXX_HWTSTAMP_TX_IN_PROGRESS, &ps->state))
+			restart |= mv88e6xxx_txtstamp_work(chip, ps);
+
+		mv88e6xxx_rxtstamp_work(chip, ps);
+	}
+
+	return restart ? 1 : -1;
+}
+
+bool mv88e6xxx_port_txtstamp(struct dsa_switch *ds, int port,
+			     struct sk_buff *clone, unsigned int type)
+{
+	struct mv88e6xxx_chip *chip = ds->priv;
+	struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port];
+	__be16 *seq_ptr;
+	u8 *hdr;
+
+	if (!(skb_shinfo(clone)->tx_flags & SKBTX_HW_TSTAMP))
+		return false;
+
+	hdr = mv88e6xxx_should_tstamp(chip, port, clone, type);
+	if (!hdr)
+		return false;
+
+	seq_ptr = (__be16 *)(hdr + OFF_PTP_SEQUENCE_ID);
+
+	if (test_and_set_bit_lock(MV88E6XXX_HWTSTAMP_TX_IN_PROGRESS,
+				  &ps->state))
+		return false;
+
+	ps->tx_skb = clone;
+	ps->tx_tstamp_start = jiffies;
+	ps->tx_seq_id = be16_to_cpup(seq_ptr);
+
+	ptp_schedule_worker(chip->ptp_clock, 0);
+	return true;
+}
+
+static int mv88e6xxx_hwtstamp_port_setup(struct mv88e6xxx_chip *chip, int port)
+{
+	struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port];
+
+	ps->port_id = port;
+
+	skb_queue_head_init(&ps->rx_queue);
+	skb_queue_head_init(&ps->rx_queue2);
+
+	return mv88e6xxx_port_ptp_write(chip, port, MV88E6XXX_PORT_PTP_CFG0,
+					MV88E6XXX_PORT_PTP_CFG0_DISABLE_PTP);
+}
+
+int mv88e6xxx_hwtstamp_setup(struct mv88e6xxx_chip *chip)
+{
+	int err;
+	int i;
+
+	/* Disable timestamping on all ports. */
+	for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) {
+		err = mv88e6xxx_hwtstamp_port_setup(chip, i);
+		if (err)
+			return err;
+	}
+
+	/* MV88E6XXX_PTP_MSG_TYPE is a mask of PTP message types to
+	 * timestamp. This affects all ports that have timestamping enabled,
+	 * but the timestamp config is per-port; thus we configure all events
+	 * here and only support the HWTSTAMP_FILTER_*_EVENT filter types.
+	 */
+	err = mv88e6xxx_ptp_write(chip, MV88E6XXX_PTP_MSGTYPE,
+				  MV88E6XXX_PTP_MSGTYPE_ALL_EVENT);
+	if (err)
+		return err;
+
+	/* Use ARRIVAL1 for peer delay response messages. */
+	err = mv88e6xxx_ptp_write(chip, MV88E6XXX_PTP_TS_ARRIVAL_PTR,
+				  MV88E6XXX_PTP_MSGTYPE_PDLAY_RES);
+	if (err)
+		return err;
+
+	/* 88E6341 devices default to timestamping at the PHY, but this has
+	 * a hardware issue that results in unreliable timestamps. Force
+	 * these devices to timestamp at the MAC.
+	 */
+	if (chip->info->family == MV88E6XXX_FAMILY_6341) {
+		u16 val = MV88E6341_PTP_CFG_UPDATE |
+			  MV88E6341_PTP_CFG_MODE_IDX |
+			  MV88E6341_PTP_CFG_MODE_TS_AT_MAC;
+		err = mv88e6xxx_ptp_write(chip, MV88E6341_PTP_CFG, val);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+void mv88e6xxx_hwtstamp_free(struct mv88e6xxx_chip *chip)
+{
+}
diff --git a/drivers/net/dsa/mv88e6xxx/hwtstamp.h b/drivers/net/dsa/mv88e6xxx/hwtstamp.h
new file mode 100644
index 000000000000..bc71c9212a08
--- /dev/null
+++ b/drivers/net/dsa/mv88e6xxx/hwtstamp.h
@@ -0,0 +1,172 @@
+/*
+ * Marvell 88E6xxx Switch hardware timestamping support
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * Copyright (c) 2017 National Instruments
+ *      Erik Hons <erik.hons@ni.com>
+ *      Brandon Streiff <brandon.streiff@ni.com>
+ *      Dane Wagner <dane.wagner@ni.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef _MV88E6XXX_HWTSTAMP_H
+#define _MV88E6XXX_HWTSTAMP_H
+
+#include "chip.h"
+
+/* Global PTP registers */
+/* Offset 0x00: PTP EtherType */
+#define MV88E6XXX_PTP_ETHERTYPE	0x00
+
+/* Offset 0x01: Message Type Timestamp Enables */
+#define MV88E6XXX_PTP_MSGTYPE			0x01
+#define MV88E6XXX_PTP_MSGTYPE_SYNC		0x0001
+#define MV88E6XXX_PTP_MSGTYPE_DELAY_REQ		0x0002
+#define MV88E6XXX_PTP_MSGTYPE_PDLAY_REQ		0x0004
+#define MV88E6XXX_PTP_MSGTYPE_PDLAY_RES		0x0008
+#define MV88E6XXX_PTP_MSGTYPE_ALL_EVENT		0x000f
+
+/* Offset 0x02: Timestamp Arrival Capture Pointers */
+#define MV88E6XXX_PTP_TS_ARRIVAL_PTR	0x02
+
+/* Offset 0x07: PTP Global Configuration */
+#define MV88E6341_PTP_CFG			0x07
+#define MV88E6341_PTP_CFG_UPDATE		0x8000
+#define MV88E6341_PTP_CFG_IDX_MASK		0x7f00
+#define MV88E6341_PTP_CFG_DATA_MASK		0x00ff
+#define MV88E6341_PTP_CFG_MODE_IDX		0x0
+#define MV88E6341_PTP_CFG_MODE_TS_AT_PHY	0x00
+#define MV88E6341_PTP_CFG_MODE_TS_AT_MAC	0x80
+
+/* Offset 0x08: PTP Interrupt Status */
+#define MV88E6XXX_PTP_IRQ_STATUS	0x08
+
+/* Per-Port PTP Registers */
+/* Offset 0x00: PTP Configuration 0 */
+#define MV88E6XXX_PORT_PTP_CFG0				0x00
+#define MV88E6XXX_PORT_PTP_CFG0_TSPEC_SHIFT		12
+#define MV88E6XXX_PORT_PTP_CFG0_TSPEC_MASK		0xf000
+#define MV88E6XXX_PORT_PTP_CFG0_TSPEC_1588		0x0000
+#define MV88E6XXX_PORT_PTP_CFG0_TSPEC_8021AS		0x1000
+#define MV88E6XXX_PORT_PTP_CFG0_DISABLE_TSPEC_MATCH	0x0800
+#define MV88E6XXX_PORT_PTP_CFG0_DISABLE_OVERWRITE	0x0002
+#define MV88E6XXX_PORT_PTP_CFG0_DISABLE_PTP		0x0001
+
+/* Offset 0x01: PTP Configuration 1 */
+#define MV88E6XXX_PORT_PTP_CFG1	0x01
+
+/* Offset 0x02: PTP Configuration 2 */
+#define MV88E6XXX_PORT_PTP_CFG2				0x02
+#define MV88E6XXX_PORT_PTP_CFG2_EMBED_ARRIVAL		0x1000
+#define MV88E6XXX_PORT_PTP_CFG2_DEP_IRQ_EN		0x0002
+#define MV88E6XXX_PORT_PTP_CFG2_ARR_IRQ_EN		0x0001
+
+/* Offset 0x03: PTP LED Configuration */
+#define MV88E6XXX_PORT_PTP_LED_CFG	0x03
+
+/* Offset 0x08: PTP Arrival 0 Status */
+#define MV88E6XXX_PORT_PTP_ARR0_STS	0x08
+
+/* Offset 0x09/0x0A: PTP Arrival 0 Time */
+#define MV88E6XXX_PORT_PTP_ARR0_TIME_LO	0x09
+#define MV88E6XXX_PORT_PTP_ARR0_TIME_HI	0x0a
+
+/* Offset 0x0B: PTP Arrival 0 Sequence ID */
+#define MV88E6XXX_PORT_PTP_ARR0_SEQID	0x0b
+
+/* Offset 0x0C: PTP Arrival 1 Status */
+#define MV88E6XXX_PORT_PTP_ARR1_STS	0x0c
+
+/* Offset 0x0D/0x0E: PTP Arrival 1 Time */
+#define MV88E6XXX_PORT_PTP_ARR1_TIME_LO	0x0d
+#define MV88E6XXX_PORT_PTP_ARR1_TIME_HI	0x0e
+
+/* Offset 0x0F: PTP Arrival 1 Sequence ID */
+#define MV88E6XXX_PORT_PTP_ARR1_SEQID	0x0f
+
+/* Offset 0x10: PTP Departure Status */
+#define MV88E6XXX_PORT_PTP_DEP_STS	0x10
+
+/* Offset 0x11/0x12: PTP Deperture Time */
+#define MV88E6XXX_PORT_PTP_DEP_TIME_LO	0x11
+#define MV88E6XXX_PORT_PTP_DEP_TIME_HI	0x12
+
+/* Offset 0x13: PTP Departure Sequence ID */
+#define MV88E6XXX_PORT_PTP_DEP_SEQID	0x13
+
+/* Status fields for arrival and depature timestamp status registers */
+#define MV88E6XXX_PTP_TS_STATUS_MASK		0x0006
+#define MV88E6XXX_PTP_TS_STATUS_NORMAL		0x0000
+#define MV88E6XXX_PTP_TS_STATUS_OVERWITTEN	0x0002
+#define MV88E6XXX_PTP_TS_STATUS_DISCARDED	0x0004
+#define MV88E6XXX_PTP_TS_VALID			0x0001
+
+#ifdef CONFIG_NET_DSA_MV88E6XXX_PTP
+
+int mv88e6xxx_port_hwtstamp_set(struct dsa_switch *ds, int port,
+				struct ifreq *ifr);
+int mv88e6xxx_port_hwtstamp_get(struct dsa_switch *ds, int port,
+				struct ifreq *ifr);
+
+bool mv88e6xxx_port_rxtstamp(struct dsa_switch *ds, int port,
+			     struct sk_buff *clone, unsigned int type);
+bool mv88e6xxx_port_txtstamp(struct dsa_switch *ds, int port,
+			     struct sk_buff *clone, unsigned int type);
+
+int mv88e6xxx_get_ts_info(struct dsa_switch *ds, int port,
+			  struct ethtool_ts_info *info);
+
+int mv88e6xxx_hwtstamp_setup(struct mv88e6xxx_chip *chip);
+void mv88e6xxx_hwtstamp_free(struct mv88e6xxx_chip *chip);
+
+#else /* !CONFIG_NET_DSA_MV88E6XXX_PTP */
+
+static inline int mv88e6xxx_port_hwtstamp_set(struct dsa_switch *ds,
+					      int port, struct ifreq *ifr)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int mv88e6xxx_port_hwtstamp_get(struct dsa_switch *ds,
+					      int port, struct ifreq *ifr)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline bool mv88e6xxx_port_rxtstamp(struct dsa_switch *ds, int port,
+					   struct sk_buff *clone,
+					   unsigned int type)
+{
+	return false;
+}
+
+static inline bool mv88e6xxx_port_txtstamp(struct dsa_switch *ds, int port,
+					   struct sk_buff *clone,
+					   unsigned int type)
+{
+	return false;
+}
+
+static inline int mv88e6xxx_get_ts_info(struct dsa_switch *ds, int port,
+					struct ethtool_ts_info *info)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int mv88e6xxx_hwtstamp_setup(struct mv88e6xxx_chip *chip)
+{
+	return 0;
+}
+
+static inline void mv88e6xxx_hwtstamp_free(struct mv88e6xxx_chip *chip)
+{
+}
+
+#endif /* CONFIG_NET_DSA_MV88E6XXX_PTP */
+
+#endif /* _MV88E6XXX_HWTSTAMP_H */
diff --git a/drivers/net/dsa/mv88e6xxx/ptp.c b/drivers/net/dsa/mv88e6xxx/ptp.c
new file mode 100644
index 000000000000..bd85e2c390e1
--- /dev/null
+++ b/drivers/net/dsa/mv88e6xxx/ptp.c
@@ -0,0 +1,381 @@
+/*
+ * Marvell 88E6xxx Switch PTP support
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * Copyright (c) 2017 National Instruments
+ *      Erik Hons <erik.hons@ni.com>
+ *      Brandon Streiff <brandon.streiff@ni.com>
+ *      Dane Wagner <dane.wagner@ni.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include "chip.h"
+#include "global2.h"
+#include "ptp.h"
+
+/* Raw timestamps are in units of 8-ns clock periods. */
+#define CC_SHIFT	28
+#define CC_MULT		(8 << CC_SHIFT)
+#define CC_MULT_NUM	(1 << 9)
+#define CC_MULT_DEM	15625ULL
+
+#define TAI_EVENT_WORK_INTERVAL msecs_to_jiffies(100)
+
+#define cc_to_chip(cc) container_of(cc, struct mv88e6xxx_chip, tstamp_cc)
+#define dw_overflow_to_chip(dw) container_of(dw, struct mv88e6xxx_chip, \
+					     overflow_work)
+#define dw_tai_event_to_chip(dw) container_of(dw, struct mv88e6xxx_chip, \
+					      tai_event_work)
+
+static int mv88e6xxx_tai_read(struct mv88e6xxx_chip *chip, int addr,
+			      u16 *data, int len)
+{
+	if (!chip->info->ops->avb_ops->tai_read)
+		return -EOPNOTSUPP;
+
+	return chip->info->ops->avb_ops->tai_read(chip, addr, data, len);
+}
+
+static int mv88e6xxx_tai_write(struct mv88e6xxx_chip *chip, int addr, u16 data)
+{
+	if (!chip->info->ops->avb_ops->tai_write)
+		return -EOPNOTSUPP;
+
+	return chip->info->ops->avb_ops->tai_write(chip, addr, data);
+}
+
+/* TODO: places where this are called should be using pinctrl */
+static int mv88e6xxx_set_gpio_func(struct mv88e6xxx_chip *chip, int pin,
+				   int func, int input)
+{
+	int err;
+
+	if (!chip->info->ops->gpio_ops)
+		return -EOPNOTSUPP;
+
+	err = chip->info->ops->gpio_ops->set_dir(chip, pin, input);
+	if (err)
+		return err;
+
+	return chip->info->ops->gpio_ops->set_pctl(chip, pin, func);
+}
+
+static u64 mv88e6xxx_ptp_clock_read(const struct cyclecounter *cc)
+{
+	struct mv88e6xxx_chip *chip = cc_to_chip(cc);
+	u16 phc_time[2];
+	int err;
+
+	err = mv88e6xxx_tai_read(chip, MV88E6XXX_TAI_TIME_LO, phc_time,
+				 ARRAY_SIZE(phc_time));
+	if (err)
+		return 0;
+	else
+		return ((u32)phc_time[1] << 16) | phc_time[0];
+}
+
+/* mv88e6xxx_config_eventcap - configure TAI event capture
+ * @event: PTP_CLOCK_PPS (internal) or PTP_CLOCK_EXTTS (external)
+ * @rising: zero for falling-edge trigger, else rising-edge trigger
+ *
+ * This will also reset the capture sequence counter.
+ */
+static int mv88e6xxx_config_eventcap(struct mv88e6xxx_chip *chip, int event,
+				     int rising)
+{
+	u16 global_config;
+	u16 cap_config;
+	int err;
+
+	chip->evcap_config = MV88E6XXX_TAI_CFG_CAP_OVERWRITE |
+			     MV88E6XXX_TAI_CFG_CAP_CTR_START;
+	if (!rising)
+		chip->evcap_config |= MV88E6XXX_TAI_CFG_EVREQ_FALLING;
+
+	global_config = (chip->evcap_config | chip->trig_config);
+	err = mv88e6xxx_tai_write(chip, MV88E6XXX_TAI_CFG, global_config);
+	if (err)
+		return err;
+
+	if (event == PTP_CLOCK_PPS) {
+		cap_config = MV88E6XXX_TAI_EVENT_STATUS_CAP_TRIG;
+	} else if (event == PTP_CLOCK_EXTTS) {
+		/* if STATUS_CAP_TRIG is unset we capture PTP_EVREQ events */
+		cap_config = 0;
+	} else {
+		return -EINVAL;
+	}
+
+	/* Write the capture config; this also clears the capture counter */
+	err = mv88e6xxx_tai_write(chip, MV88E6XXX_TAI_EVENT_STATUS,
+				  cap_config);
+
+	return err;
+}
+
+static void mv88e6xxx_tai_event_work(struct work_struct *ugly)
+{
+	struct delayed_work *dw = to_delayed_work(ugly);
+	struct mv88e6xxx_chip *chip = dw_tai_event_to_chip(dw);
+	struct ptp_clock_event ev;
+	u16 status[4];
+	u32 raw_ts;
+	int err;
+
+	mutex_lock(&chip->reg_lock);
+	err = mv88e6xxx_tai_read(chip, MV88E6XXX_TAI_EVENT_STATUS,
+				 status, ARRAY_SIZE(status));
+	mutex_unlock(&chip->reg_lock);
+
+	if (err) {
+		dev_err(chip->dev, "failed to read TAI status register\n");
+		return;
+	}
+	if (status[0] & MV88E6XXX_TAI_EVENT_STATUS_ERROR) {
+		dev_warn(chip->dev, "missed event capture\n");
+		return;
+	}
+	if (!(status[0] & MV88E6XXX_TAI_EVENT_STATUS_VALID))
+		goto out;
+
+	raw_ts = ((u32)status[2] << 16) | status[1];
+
+	/* Clear the valid bit so the next timestamp can come in */
+	status[0] &= ~MV88E6XXX_TAI_EVENT_STATUS_VALID;
+	mutex_lock(&chip->reg_lock);
+	err = mv88e6xxx_tai_write(chip, MV88E6XXX_TAI_EVENT_STATUS, status[0]);
+	mutex_unlock(&chip->reg_lock);
+
+	/* This is an external timestamp */
+	ev.type = PTP_CLOCK_EXTTS;
+
+	/* We only have one timestamping channel. */
+	ev.index = 0;
+	mutex_lock(&chip->reg_lock);
+	ev.timestamp = timecounter_cyc2time(&chip->tstamp_tc, raw_ts);
+	mutex_unlock(&chip->reg_lock);
+
+	ptp_clock_event(chip->ptp_clock, &ev);
+out:
+	schedule_delayed_work(&chip->tai_event_work, TAI_EVENT_WORK_INTERVAL);
+}
+
+static int mv88e6xxx_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
+{
+	struct mv88e6xxx_chip *chip = ptp_to_chip(ptp);
+	int neg_adj = 0;
+	u32 diff, mult;
+	u64 adj;
+
+	if (scaled_ppm < 0) {
+		neg_adj = 1;
+		scaled_ppm = -scaled_ppm;
+	}
+	mult = CC_MULT;
+	adj = CC_MULT_NUM;
+	adj *= scaled_ppm;
+	diff = div_u64(adj, CC_MULT_DEM);
+
+	mutex_lock(&chip->reg_lock);
+
+	timecounter_read(&chip->tstamp_tc);
+	chip->tstamp_cc.mult = neg_adj ? mult - diff : mult + diff;
+
+	mutex_unlock(&chip->reg_lock);
+
+	return 0;
+}
+
+static int mv88e6xxx_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+	struct mv88e6xxx_chip *chip = ptp_to_chip(ptp);
+
+	mutex_lock(&chip->reg_lock);
+	timecounter_adjtime(&chip->tstamp_tc, delta);
+	mutex_unlock(&chip->reg_lock);
+
+	return 0;
+}
+
+static int mv88e6xxx_ptp_gettime(struct ptp_clock_info *ptp,
+				 struct timespec64 *ts)
+{
+	struct mv88e6xxx_chip *chip = ptp_to_chip(ptp);
+	u64 ns;
+
+	mutex_lock(&chip->reg_lock);
+	ns = timecounter_read(&chip->tstamp_tc);
+	mutex_unlock(&chip->reg_lock);
+
+	*ts = ns_to_timespec64(ns);
+
+	return 0;
+}
+
+static int mv88e6xxx_ptp_settime(struct ptp_clock_info *ptp,
+				 const struct timespec64 *ts)
+{
+	struct mv88e6xxx_chip *chip = ptp_to_chip(ptp);
+	u64 ns;
+
+	ns = timespec64_to_ns(ts);
+
+	mutex_lock(&chip->reg_lock);
+	timecounter_init(&chip->tstamp_tc, &chip->tstamp_cc, ns);
+	mutex_unlock(&chip->reg_lock);
+
+	return 0;
+}
+
+static int mv88e6xxx_ptp_enable_extts(struct mv88e6xxx_chip *chip,
+				      struct ptp_clock_request *rq, int on)
+{
+	int rising = (rq->extts.flags & PTP_RISING_EDGE);
+	int func;
+	int pin;
+	int err;
+
+	pin = ptp_find_pin(chip->ptp_clock, PTP_PF_EXTTS, rq->extts.index);
+
+	if (pin < 0)
+		return -EBUSY;
+
+	mutex_lock(&chip->reg_lock);
+
+	if (on) {
+		func = MV88E6352_G2_SCRATCH_GPIO_PCTL_EVREQ;
+
+		err = mv88e6xxx_set_gpio_func(chip, pin, func, true);
+		if (err)
+			goto out;
+
+		schedule_delayed_work(&chip->tai_event_work,
+				      TAI_EVENT_WORK_INTERVAL);
+
+		err = mv88e6xxx_config_eventcap(chip, PTP_CLOCK_EXTTS, rising);
+	} else {
+		func = MV88E6352_G2_SCRATCH_GPIO_PCTL_GPIO;
+
+		err = mv88e6xxx_set_gpio_func(chip, pin, func, true);
+
+		cancel_delayed_work_sync(&chip->tai_event_work);
+	}
+
+out:
+	mutex_unlock(&chip->reg_lock);
+
+	return err;
+}
+
+static int mv88e6xxx_ptp_enable(struct ptp_clock_info *ptp,
+				struct ptp_clock_request *rq, int on)
+{
+	struct mv88e6xxx_chip *chip = ptp_to_chip(ptp);
+
+	switch (rq->type) {
+	case PTP_CLK_REQ_EXTTS:
+		return mv88e6xxx_ptp_enable_extts(chip, rq, on);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int mv88e6xxx_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin,
+				enum ptp_pin_function func, unsigned int chan)
+{
+	switch (func) {
+	case PTP_PF_NONE:
+	case PTP_PF_EXTTS:
+		break;
+	case PTP_PF_PEROUT:
+	case PTP_PF_PHYSYNC:
+		return -EOPNOTSUPP;
+	}
+	return 0;
+}
+
+/* With a 125MHz input clock, the 32-bit timestamp counter overflows in ~34.3
+ * seconds; this task forces periodic reads so that we don't miss any.
+ */
+#define MV88E6XXX_TAI_OVERFLOW_PERIOD (HZ * 16)
+static void mv88e6xxx_ptp_overflow_check(struct work_struct *work)
+{
+	struct delayed_work *dw = to_delayed_work(work);
+	struct mv88e6xxx_chip *chip = dw_overflow_to_chip(dw);
+	struct timespec64 ts;
+
+	mv88e6xxx_ptp_gettime(&chip->ptp_clock_info, &ts);
+
+	schedule_delayed_work(&chip->overflow_work,
+			      MV88E6XXX_TAI_OVERFLOW_PERIOD);
+}
+
+int mv88e6xxx_ptp_setup(struct mv88e6xxx_chip *chip)
+{
+	int i;
+
+	/* Set up the cycle counter */
+	memset(&chip->tstamp_cc, 0, sizeof(chip->tstamp_cc));
+	chip->tstamp_cc.read	= mv88e6xxx_ptp_clock_read;
+	chip->tstamp_cc.mask	= CYCLECOUNTER_MASK(32);
+	chip->tstamp_cc.mult	= CC_MULT;
+	chip->tstamp_cc.shift	= CC_SHIFT;
+
+	timecounter_init(&chip->tstamp_tc, &chip->tstamp_cc,
+			 ktime_to_ns(ktime_get_real()));
+
+	INIT_DELAYED_WORK(&chip->overflow_work, mv88e6xxx_ptp_overflow_check);
+	INIT_DELAYED_WORK(&chip->tai_event_work, mv88e6xxx_tai_event_work);
+
+	chip->ptp_clock_info.owner = THIS_MODULE;
+	snprintf(chip->ptp_clock_info.name, sizeof(chip->ptp_clock_info.name),
+		 dev_name(chip->dev));
+	chip->ptp_clock_info.max_adj	= 1000000;
+
+	chip->ptp_clock_info.n_ext_ts	= 1;
+	chip->ptp_clock_info.n_per_out	= 0;
+	chip->ptp_clock_info.n_pins	= mv88e6xxx_num_gpio(chip);
+	chip->ptp_clock_info.pps	= 0;
+
+	for (i = 0; i < chip->ptp_clock_info.n_pins; ++i) {
+		struct ptp_pin_desc *ppd = &chip->pin_config[i];
+
+		snprintf(ppd->name, sizeof(ppd->name), "mv88e6xxx_gpio%d", i);
+		ppd->index = i;
+		ppd->func = PTP_PF_NONE;
+	}
+	chip->ptp_clock_info.pin_config = chip->pin_config;
+
+	chip->ptp_clock_info.adjfine	= mv88e6xxx_ptp_adjfine;
+	chip->ptp_clock_info.adjtime	= mv88e6xxx_ptp_adjtime;
+	chip->ptp_clock_info.gettime64	= mv88e6xxx_ptp_gettime;
+	chip->ptp_clock_info.settime64	= mv88e6xxx_ptp_settime;
+	chip->ptp_clock_info.enable	= mv88e6xxx_ptp_enable;
+	chip->ptp_clock_info.verify	= mv88e6xxx_ptp_verify;
+	chip->ptp_clock_info.do_aux_work = mv88e6xxx_hwtstamp_work;
+
+	chip->ptp_clock = ptp_clock_register(&chip->ptp_clock_info, chip->dev);
+	if (IS_ERR(chip->ptp_clock))
+		return PTR_ERR(chip->ptp_clock);
+
+	schedule_delayed_work(&chip->overflow_work,
+			      MV88E6XXX_TAI_OVERFLOW_PERIOD);
+
+	return 0;
+}
+
+void mv88e6xxx_ptp_free(struct mv88e6xxx_chip *chip)
+{
+	if (chip->ptp_clock) {
+		cancel_delayed_work_sync(&chip->overflow_work);
+		cancel_delayed_work_sync(&chip->tai_event_work);
+
+		ptp_clock_unregister(chip->ptp_clock);
+		chip->ptp_clock = NULL;
+	}
+}
diff --git a/drivers/net/dsa/mv88e6xxx/ptp.h b/drivers/net/dsa/mv88e6xxx/ptp.h
new file mode 100644
index 000000000000..10f271ab650d
--- /dev/null
+++ b/drivers/net/dsa/mv88e6xxx/ptp.h
@@ -0,0 +1,108 @@
+/*
+ * Marvell 88E6xxx Switch PTP support
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * Copyright (c) 2017 National Instruments
+ *      Erik Hons <erik.hons@ni.com>
+ *      Brandon Streiff <brandon.streiff@ni.com>
+ *      Dane Wagner <dane.wagner@ni.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef _MV88E6XXX_PTP_H
+#define _MV88E6XXX_PTP_H
+
+#include "chip.h"
+
+/* Offset 0x00: TAI Global Config */
+#define MV88E6XXX_TAI_CFG			0x00
+#define MV88E6XXX_TAI_CFG_CAP_OVERWRITE		0x8000
+#define MV88E6XXX_TAI_CFG_CAP_CTR_START		0x4000
+#define MV88E6XXX_TAI_CFG_EVREQ_FALLING		0x2000
+#define MV88E6XXX_TAI_CFG_TRIG_ACTIVE_LO	0x1000
+#define MV88E6XXX_TAI_CFG_IRL_ENABLE		0x0400
+#define MV88E6XXX_TAI_CFG_TRIG_IRQ_EN		0x0200
+#define MV88E6XXX_TAI_CFG_EVREQ_IRQ_EN		0x0100
+#define MV88E6XXX_TAI_CFG_TRIG_LOCK		0x0080
+#define MV88E6XXX_TAI_CFG_BLOCK_UPDATE		0x0008
+#define MV88E6XXX_TAI_CFG_MULTI_PTP		0x0004
+#define MV88E6XXX_TAI_CFG_TRIG_MODE_ONESHOT	0x0002
+#define MV88E6XXX_TAI_CFG_TRIG_ENABLE		0x0001
+
+/* Offset 0x01: Timestamp Clock Period (ps) */
+#define MV88E6XXX_TAI_CLOCK_PERIOD		0x01
+
+/* Offset 0x02/0x03: Trigger Generation Amount */
+#define MV88E6XXX_TAI_TRIG_GEN_AMOUNT_LO	0x02
+#define MV88E6XXX_TAI_TRIG_GEN_AMOUNT_HI	0x03
+
+/* Offset 0x04: Clock Compensation */
+#define MV88E6XXX_TAI_TRIG_CLOCK_COMP		0x04
+
+/* Offset 0x05: Trigger Configuration */
+#define MV88E6XXX_TAI_TRIG_CFG			0x05
+
+/* Offset 0x06: Ingress Rate Limiter Clock Generation Amount */
+#define MV88E6XXX_TAI_IRL_AMOUNT		0x06
+
+/* Offset 0x07: Ingress Rate Limiter Compensation */
+#define MV88E6XXX_TAI_IRL_COMP			0x07
+
+/* Offset 0x08: Ingress Rate Limiter Compensation */
+#define MV88E6XXX_TAI_IRL_COMP_PS		0x08
+
+/* Offset 0x09: Event Status */
+#define MV88E6XXX_TAI_EVENT_STATUS		0x09
+#define MV88E6XXX_TAI_EVENT_STATUS_CAP_TRIG	0x4000
+#define MV88E6XXX_TAI_EVENT_STATUS_ERROR	0x0200
+#define MV88E6XXX_TAI_EVENT_STATUS_VALID	0x0100
+#define MV88E6XXX_TAI_EVENT_STATUS_CTR_MASK	0x00ff
+
+/* Offset 0x0A/0x0B: Event Time */
+#define MV88E6XXX_TAI_EVENT_TIME_LO		0x0a
+#define MV88E6XXX_TAI_EVENT_TYPE_HI		0x0b
+
+/* Offset 0x0E/0x0F: PTP Global Time */
+#define MV88E6XXX_TAI_TIME_LO			0x0e
+#define MV88E6XXX_TAI_TIME_HI			0x0f
+
+/* Offset 0x10/0x11: Trig Generation Time */
+#define MV88E6XXX_TAI_TRIG_TIME_LO		0x10
+#define MV88E6XXX_TAI_TRIG_TIME_HI		0x11
+
+/* Offset 0x12: Lock Status */
+#define MV88E6XXX_TAI_LOCK_STATUS		0x12
+
+#ifdef CONFIG_NET_DSA_MV88E6XXX_PTP
+
+long mv88e6xxx_hwtstamp_work(struct ptp_clock_info *ptp);
+int mv88e6xxx_ptp_setup(struct mv88e6xxx_chip *chip);
+void mv88e6xxx_ptp_free(struct mv88e6xxx_chip *chip);
+
+#define ptp_to_chip(ptp) container_of(ptp, struct mv88e6xxx_chip,	\
+				      ptp_clock_info)
+
+#else /* !CONFIG_NET_DSA_MV88E6XXX_PTP */
+
+static inline long mv88e6xxx_hwtstamp_work(struct ptp_clock_info *ptp)
+{
+	return -1;
+}
+
+static inline int mv88e6xxx_ptp_setup(struct mv88e6xxx_chip *chip)
+{
+	return 0;
+}
+
+static inline void mv88e6xxx_ptp_free(struct mv88e6xxx_chip *chip)
+{
+}
+
+#endif /* CONFIG_NET_DSA_MV88E6XXX_PTP */
+
+#endif /* _MV88E6XXX_PTP_H */
diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c
index f3c01119b3d1..b6166424216a 100644
--- a/drivers/net/dsa/mv88e6xxx/serdes.c
+++ b/drivers/net/dsa/mv88e6xxx/serdes.c
@@ -55,18 +55,30 @@ static int mv88e6352_serdes_power_set(struct mv88e6xxx_chip *chip, bool on)
 	return err;
 }
 
-int mv88e6352_serdes_power(struct mv88e6xxx_chip *chip, int port, bool on)
+static bool mv88e6352_port_has_serdes(struct mv88e6xxx_chip *chip, int port)
 {
-	int err;
 	u8 cmode;
+	int err;
 
 	err = mv88e6xxx_port_get_cmode(chip, port, &cmode);
-	if (err)
-		return err;
+	if (err) {
+		dev_err(chip->dev, "failed to read cmode\n");
+		return false;
+	}
 
 	if ((cmode == MV88E6XXX_PORT_STS_CMODE_100BASE_X) ||
 	    (cmode == MV88E6XXX_PORT_STS_CMODE_1000BASE_X) ||
-	    (cmode == MV88E6XXX_PORT_STS_CMODE_SGMII)) {
+	    (cmode == MV88E6XXX_PORT_STS_CMODE_SGMII))
+		return true;
+
+	return false;
+}
+
+int mv88e6352_serdes_power(struct mv88e6xxx_chip *chip, int port, bool on)
+{
+	int err;
+
+	if (mv88e6352_port_has_serdes(chip, port)) {
 		err = mv88e6352_serdes_power_set(chip, on);
 		if (err < 0)
 			return err;
@@ -75,6 +87,90 @@ int mv88e6352_serdes_power(struct mv88e6xxx_chip *chip, int port, bool on)
 	return 0;
 }
 
+struct mv88e6352_serdes_hw_stat {
+	char string[ETH_GSTRING_LEN];
+	int sizeof_stat;
+	int reg;
+};
+
+static struct mv88e6352_serdes_hw_stat mv88e6352_serdes_hw_stats[] = {
+	{ "serdes_fibre_rx_error", 16, 21 },
+	{ "serdes_PRBS_error", 32, 24 },
+};
+
+int mv88e6352_serdes_get_sset_count(struct mv88e6xxx_chip *chip, int port)
+{
+	if (mv88e6352_port_has_serdes(chip, port))
+		return ARRAY_SIZE(mv88e6352_serdes_hw_stats);
+
+	return 0;
+}
+
+void mv88e6352_serdes_get_strings(struct mv88e6xxx_chip *chip,
+				  int port, uint8_t *data)
+{
+	struct mv88e6352_serdes_hw_stat *stat;
+	int i;
+
+	if (!mv88e6352_port_has_serdes(chip, port))
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(mv88e6352_serdes_hw_stats); i++) {
+		stat = &mv88e6352_serdes_hw_stats[i];
+		memcpy(data + i * ETH_GSTRING_LEN, stat->string,
+		       ETH_GSTRING_LEN);
+	}
+}
+
+static uint64_t mv88e6352_serdes_get_stat(struct mv88e6xxx_chip *chip,
+					  struct mv88e6352_serdes_hw_stat *stat)
+{
+	u64 val = 0;
+	u16 reg;
+	int err;
+
+	err = mv88e6352_serdes_read(chip, stat->reg, &reg);
+	if (err) {
+		dev_err(chip->dev, "failed to read statistic\n");
+		return 0;
+	}
+
+	val = reg;
+
+	if (stat->sizeof_stat == 32) {
+		err = mv88e6352_serdes_read(chip, stat->reg + 1, &reg);
+		if (err) {
+			dev_err(chip->dev, "failed to read statistic\n");
+			return 0;
+		}
+		val = val << 16 | reg;
+	}
+
+	return val;
+}
+
+void mv88e6352_serdes_get_stats(struct mv88e6xxx_chip *chip, int port,
+				uint64_t *data)
+{
+	struct mv88e6xxx_port *mv88e6xxx_port = &chip->ports[port];
+	struct mv88e6352_serdes_hw_stat *stat;
+	u64 value;
+	int i;
+
+	if (!mv88e6352_port_has_serdes(chip, port))
+		return;
+
+	BUILD_BUG_ON(ARRAY_SIZE(mv88e6352_serdes_hw_stats) >
+		     ARRAY_SIZE(mv88e6xxx_port->serdes_stats));
+
+	for (i = 0; i < ARRAY_SIZE(mv88e6352_serdes_hw_stats); i++) {
+		stat = &mv88e6352_serdes_hw_stats[i];
+		value = mv88e6352_serdes_get_stat(chip, stat);
+		mv88e6xxx_port->serdes_stats[i] += value;
+		data[i] = mv88e6xxx_port->serdes_stats[i];
+	}
+}
+
 /* Set the power on/off for 10GBASE-R and 10GBASE-X4/X2 */
 static int mv88e6390_serdes_10g(struct mv88e6xxx_chip *chip, int addr, bool on)
 {
diff --git a/drivers/net/dsa/mv88e6xxx/serdes.h b/drivers/net/dsa/mv88e6xxx/serdes.h
index 5c1cd6d8e9a5..641baa75f910 100644
--- a/drivers/net/dsa/mv88e6xxx/serdes.h
+++ b/drivers/net/dsa/mv88e6xxx/serdes.h
@@ -44,5 +44,9 @@
 
 int mv88e6352_serdes_power(struct mv88e6xxx_chip *chip, int port, bool on);
 int mv88e6390_serdes_power(struct mv88e6xxx_chip *chip, int port, bool on);
-
+int mv88e6352_serdes_get_sset_count(struct mv88e6xxx_chip *chip, int port);
+void mv88e6352_serdes_get_strings(struct mv88e6xxx_chip *chip,
+				  int port, uint8_t *data);
+void mv88e6352_serdes_get_stats(struct mv88e6xxx_chip *chip, int port,
+				uint64_t *data);
 #endif
diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index 9df22ebee822..600d5ad1fbde 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -631,7 +631,7 @@ qca8k_get_ethtool_stats(struct dsa_switch *ds, int port,
 }
 
 static int
-qca8k_get_sset_count(struct dsa_switch *ds)
+qca8k_get_sset_count(struct dsa_switch *ds, int port)
 {
 	return ARRAY_SIZE(ar8327_mib);
 }
diff --git a/drivers/net/ethernet/8390/Makefile b/drivers/net/ethernet/8390/Makefile
index f975c2fc88a3..1d650e66cc6e 100644
--- a/drivers/net/ethernet/8390/Makefile
+++ b/drivers/net/ethernet/8390/Makefile
@@ -7,8 +7,8 @@ obj-$(CONFIG_MAC8390) += mac8390.o
 obj-$(CONFIG_APNE) += apne.o 8390.o
 obj-$(CONFIG_ARM_ETHERH) += etherh.o
 obj-$(CONFIG_AX88796) += ax88796.o
-obj-$(CONFIG_HYDRA) += hydra.o 8390.o
-obj-$(CONFIG_MCF8390) += mcf8390.o 8390.o
+obj-$(CONFIG_HYDRA) += hydra.o
+obj-$(CONFIG_MCF8390) += mcf8390.o
 obj-$(CONFIG_NE2000) += ne.o 8390p.o
 obj-$(CONFIG_NE2K_PCI) += ne2k-pci.o 8390.o
 obj-$(CONFIG_PCMCIA_AXNET) += axnet_cs.o 8390.o
@@ -16,4 +16,4 @@ obj-$(CONFIG_PCMCIA_PCNET) += pcnet_cs.o 8390.o
 obj-$(CONFIG_STNIC) += stnic.o 8390.o
 obj-$(CONFIG_ULTRA) += smc-ultra.o 8390.o
 obj-$(CONFIG_WD80x3) += wd.o 8390.o
-obj-$(CONFIG_ZORRO8390) += zorro8390.o 8390.o
+obj-$(CONFIG_ZORRO8390) += zorro8390.o
diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c
index 245554707163..da61cf3cb3a9 100644
--- a/drivers/net/ethernet/8390/ax88796.c
+++ b/drivers/net/ethernet/8390/ax88796.c
@@ -77,8 +77,6 @@ static unsigned char version[] = "ax88796.c: Copyright 2005,2007 Simtec Electron
 
 #define AX_GPOC_PPDSET	BIT(6)
 
-static u32 ax_msg_enable;
-
 /* device private data */
 
 struct ax_device {
@@ -747,7 +745,6 @@ static int ax_init_dev(struct net_device *dev)
 	ei_local->block_output = &ax_block_output;
 	ei_local->get_8390_hdr = &ax_get_8390_hdr;
 	ei_local->priv = 0;
-	ei_local->msg_enable = ax_msg_enable;
 
 	dev->netdev_ops = &ax_netdev_ops;
 	dev->ethtool_ops = &ax_ethtool_ops;
diff --git a/drivers/net/ethernet/8390/axnet_cs.c b/drivers/net/ethernet/8390/axnet_cs.c
index 7bddb8efb6d5..d422a124cd7c 100644
--- a/drivers/net/ethernet/8390/axnet_cs.c
+++ b/drivers/net/ethernet/8390/axnet_cs.c
@@ -104,7 +104,6 @@ static void AX88190_init(struct net_device *dev, int startp);
 static int ax_open(struct net_device *dev);
 static int ax_close(struct net_device *dev);
 static irqreturn_t ax_interrupt(int irq, void *dev_id);
-static u32 axnet_msg_enable;
 
 /*====================================================================*/
 
@@ -151,7 +150,6 @@ static int axnet_probe(struct pcmcia_device *link)
 	return -ENOMEM;
 
     ei_local = netdev_priv(dev);
-    ei_local->msg_enable = axnet_msg_enable;
     spin_lock_init(&ei_local->page_lock);
 
     info = PRIV(dev);
diff --git a/drivers/net/ethernet/8390/etherh.c b/drivers/net/ethernet/8390/etherh.c
index 11cbf22ad201..32e9627e3880 100644
--- a/drivers/net/ethernet/8390/etherh.c
+++ b/drivers/net/ethernet/8390/etherh.c
@@ -64,8 +64,6 @@ static char version[] =
 
 #include "lib8390.c"
 
-static u32 etherh_msg_enable;
-
 struct etherh_priv {
 	void __iomem	*ioc_fast;
 	void __iomem	*memc;
@@ -502,18 +500,6 @@ etherh_close(struct net_device *dev)
 }
 
 /*
- * Initialisation
- */
-
-static void __init etherh_banner(void)
-{
-	static int version_printed;
-
-	if ((etherh_msg_enable & NETIF_MSG_DRV) && (version_printed++ == 0))
-		pr_info("%s", version);
-}
-
-/*
  * Read the ethernet address string from the on board rom.
  * This is an ascii string...
  */
@@ -671,8 +657,6 @@ etherh_probe(struct expansion_card *ec, const struct ecard_id *id)
 	struct etherh_priv *eh;
 	int ret;
 
-	etherh_banner();
-
 	ret = ecard_request_resources(ec);
 	if (ret)
 		goto out;
@@ -757,7 +741,6 @@ etherh_probe(struct expansion_card *ec, const struct ecard_id *id)
 	ei_local->block_output  = etherh_block_output;
 	ei_local->get_8390_hdr  = etherh_get_header;
 	ei_local->interface_num = 0;
-	ei_local->msg_enable = etherh_msg_enable;
 
 	etherh_reset(dev);
 	__NS8390_init(dev, 0);
diff --git a/drivers/net/ethernet/8390/hydra.c b/drivers/net/ethernet/8390/hydra.c
index 8ae249195301..941754ea78ec 100644
--- a/drivers/net/ethernet/8390/hydra.c
+++ b/drivers/net/ethernet/8390/hydra.c
@@ -66,7 +66,6 @@ static void hydra_block_input(struct net_device *dev, int count,
 static void hydra_block_output(struct net_device *dev, int count,
 			       const unsigned char *buf, int start_page);
 static void hydra_remove_one(struct zorro_dev *z);
-static u32 hydra_msg_enable;
 
 static struct zorro_device_id hydra_zorro_tbl[] = {
     { ZORRO_PROD_HYDRA_SYSTEMS_AMIGANET },
@@ -119,7 +118,6 @@ static int hydra_init(struct zorro_dev *z)
     int start_page, stop_page;
     int j;
     int err;
-    struct ei_device *ei_local;
 
     static u32 hydra_offsets[16] = {
 	0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e,
@@ -138,8 +136,6 @@ static int hydra_init(struct zorro_dev *z)
     start_page = NESM_START_PG;
     stop_page = NESM_STOP_PG;
 
-    ei_local = netdev_priv(dev);
-    ei_local->msg_enable = hydra_msg_enable;
     dev->base_addr = ioaddr;
     dev->irq = IRQ_AMIGA_PORTS;
 
diff --git a/drivers/net/ethernet/8390/lib8390.c b/drivers/net/ethernet/8390/lib8390.c
index 60f8e2c8e726..5d9bbde9fe68 100644
--- a/drivers/net/ethernet/8390/lib8390.c
+++ b/drivers/net/ethernet/8390/lib8390.c
@@ -975,6 +975,8 @@ static void ethdev_setup(struct net_device *dev)
 	ether_setup(dev);
 
 	spin_lock_init(&ei_local->page_lock);
+
+	ei_local->msg_enable = msg_enable;
 }
 
 /**
diff --git a/drivers/net/ethernet/8390/mac8390.c b/drivers/net/ethernet/8390/mac8390.c
index 2f91ce8dc614..b6d735bf8011 100644
--- a/drivers/net/ethernet/8390/mac8390.c
+++ b/drivers/net/ethernet/8390/mac8390.c
@@ -123,8 +123,7 @@ enum mac8390_access {
 };
 
 extern int mac8390_memtest(struct net_device *dev);
-static int mac8390_initdev(struct net_device *dev,
-			   struct nubus_rsrc *ndev,
+static int mac8390_initdev(struct net_device *dev, struct nubus_board *board,
 			   enum mac8390_type type);
 
 static int mac8390_open(struct net_device *dev);
@@ -168,9 +167,8 @@ static void slow_sane_block_output(struct net_device *dev, int count,
 				   const unsigned char *buf, int start_page);
 static void word_memcpy_tocard(unsigned long tp, const void *fp, int count);
 static void word_memcpy_fromcard(void *tp, unsigned long fp, int count);
-static u32 mac8390_msg_enable;
 
-static enum mac8390_type __init mac8390_ident(struct nubus_rsrc *fres)
+static enum mac8390_type mac8390_ident(struct nubus_rsrc *fres)
 {
 	switch (fres->dr_sw) {
 	case NUBUS_DRSW_3COM:
@@ -236,7 +234,7 @@ static enum mac8390_type __init mac8390_ident(struct nubus_rsrc *fres)
 	return MAC8390_NONE;
 }
 
-static enum mac8390_access __init mac8390_testio(volatile unsigned long membase)
+static enum mac8390_access mac8390_testio(unsigned long membase)
 {
 	unsigned long outdata = 0xA5A0B5B0;
 	unsigned long indata =  0x00000000;
@@ -254,7 +252,7 @@ static enum mac8390_access __init mac8390_testio(volatile unsigned long membase)
 	return ACCESS_UNKNOWN;
 }
 
-static int __init mac8390_memsize(unsigned long membase)
+static int mac8390_memsize(unsigned long membase)
 {
 	unsigned long flags;
 	int i, j;
@@ -290,36 +288,34 @@ static int __init mac8390_memsize(unsigned long membase)
 	return i * 0x1000;
 }
 
-static bool __init mac8390_init(struct net_device *dev,
-				struct nubus_rsrc *ndev,
-				enum mac8390_type cardtype)
+static bool mac8390_rsrc_init(struct net_device *dev,
+			      struct nubus_rsrc *fres,
+			      enum mac8390_type cardtype)
 {
+	struct nubus_board *board = fres->board;
 	struct nubus_dir dir;
 	struct nubus_dirent ent;
 	int offset;
 	volatile unsigned short *i;
 
-	printk_once(KERN_INFO pr_fmt("%s"), version);
-
-	dev->irq = SLOT2IRQ(ndev->board->slot);
+	dev->irq = SLOT2IRQ(board->slot);
 	/* This is getting to be a habit */
-	dev->base_addr = (ndev->board->slot_addr |
-			  ((ndev->board->slot & 0xf) << 20));
+	dev->base_addr = board->slot_addr | ((board->slot & 0xf) << 20);
 
 	/*
 	 * Get some Nubus info - we will trust the card's idea
 	 * of where its memory and registers are.
 	 */
 
-	if (nubus_get_func_dir(ndev, &dir) == -1) {
-		pr_err("%s: Unable to get Nubus functional directory for slot %X!\n",
-		       dev->name, ndev->board->slot);
+	if (nubus_get_func_dir(fres, &dir) == -1) {
+		dev_err(&board->dev,
+			"Unable to get Nubus functional directory\n");
 		return false;
 	}
 
 	/* Get the MAC address */
 	if (nubus_find_rsrc(&dir, NUBUS_RESID_MAC_ADDRESS, &ent) == -1) {
-		pr_info("%s: Couldn't get MAC address!\n", dev->name);
+		dev_info(&board->dev, "MAC address resource not found\n");
 		return false;
 	}
 
@@ -329,8 +325,8 @@ static bool __init mac8390_init(struct net_device *dev,
 		nubus_rewinddir(&dir);
 		if (nubus_find_rsrc(&dir, NUBUS_RESID_MINOR_BASEOS,
 				    &ent) == -1) {
-			pr_err("%s: Memory offset resource for slot %X not found!\n",
-			       dev->name, ndev->board->slot);
+			dev_err(&board->dev,
+				"Memory offset resource not found\n");
 			return false;
 		}
 		nubus_get_rsrc_mem(&offset, &ent, 4);
@@ -340,8 +336,8 @@ static bool __init mac8390_init(struct net_device *dev,
 		nubus_rewinddir(&dir);
 		if (nubus_find_rsrc(&dir, NUBUS_RESID_MINOR_LENGTH,
 				    &ent) == -1) {
-			pr_info("%s: Memory length resource for slot %X not found, probing\n",
-				dev->name, ndev->board->slot);
+			dev_info(&board->dev,
+				 "Memory length resource not found, probing\n");
 			offset = mac8390_memsize(dev->mem_start);
 		} else {
 			nubus_get_rsrc_mem(&offset, &ent, 4);
@@ -351,25 +347,25 @@ static bool __init mac8390_init(struct net_device *dev,
 		switch (cardtype) {
 		case MAC8390_KINETICS:
 		case MAC8390_DAYNA: /* it's the same */
-			dev->base_addr = (int)(ndev->board->slot_addr +
+			dev->base_addr = (int)(board->slot_addr +
 					       DAYNA_8390_BASE);
-			dev->mem_start = (int)(ndev->board->slot_addr +
+			dev->mem_start = (int)(board->slot_addr +
 					       DAYNA_8390_MEM);
 			dev->mem_end = dev->mem_start +
 				       mac8390_memsize(dev->mem_start);
 			break;
 		case MAC8390_INTERLAN:
-			dev->base_addr = (int)(ndev->board->slot_addr +
+			dev->base_addr = (int)(board->slot_addr +
 					       INTERLAN_8390_BASE);
-			dev->mem_start = (int)(ndev->board->slot_addr +
+			dev->mem_start = (int)(board->slot_addr +
 					       INTERLAN_8390_MEM);
 			dev->mem_end = dev->mem_start +
 				       mac8390_memsize(dev->mem_start);
 			break;
 		case MAC8390_CABLETRON:
-			dev->base_addr = (int)(ndev->board->slot_addr +
+			dev->base_addr = (int)(board->slot_addr +
 					       CABLETRON_8390_BASE);
-			dev->mem_start = (int)(ndev->board->slot_addr +
+			dev->mem_start = (int)(board->slot_addr +
 					       CABLETRON_8390_MEM);
 			/* The base address is unreadable if 0x00
 			 * has been written to the command register
@@ -384,8 +380,8 @@ static bool __init mac8390_init(struct net_device *dev,
 			break;
 
 		default:
-			pr_err("Card type %s is unsupported, sorry\n",
-			       ndev->board->name);
+			dev_err(&board->dev,
+				"No known base address for card type\n");
 			return false;
 		}
 	}
@@ -393,91 +389,83 @@ static bool __init mac8390_init(struct net_device *dev,
 	return true;
 }
 
-struct net_device * __init mac8390_probe(int unit)
+static int mac8390_device_probe(struct nubus_board *board)
 {
 	struct net_device *dev;
-	struct nubus_rsrc *ndev = NULL;
 	int err = -ENODEV;
-	struct ei_device *ei_local;
-
-	static unsigned int slots;
-
-	enum mac8390_type cardtype;
-
-	/* probably should check for Nubus instead */
-
-	if (!MACH_IS_MAC)
-		return ERR_PTR(-ENODEV);
+	struct nubus_rsrc *fres;
+	enum mac8390_type cardtype = MAC8390_NONE;
 
 	dev = ____alloc_ei_netdev(0);
 	if (!dev)
-		return ERR_PTR(-ENOMEM);
-
-	if (unit >= 0)
-		sprintf(dev->name, "eth%d", unit);
+		return -ENOMEM;
 
-	for_each_func_rsrc(ndev) {
-		if (ndev->category != NUBUS_CAT_NETWORK ||
-		    ndev->type != NUBUS_TYPE_ETHERNET)
-			continue;
+	SET_NETDEV_DEV(dev, &board->dev);
 
-		/* Have we seen it already? */
-		if (slots & (1 << ndev->board->slot))
+	for_each_board_func_rsrc(board, fres) {
+		if (fres->category != NUBUS_CAT_NETWORK ||
+		    fres->type != NUBUS_TYPE_ETHERNET)
 			continue;
-		slots |= 1 << ndev->board->slot;
 
-		cardtype = mac8390_ident(ndev);
+		cardtype = mac8390_ident(fres);
 		if (cardtype == MAC8390_NONE)
 			continue;
 
-		if (!mac8390_init(dev, ndev, cardtype))
-			continue;
-
-		/* Do the nasty 8390 stuff */
-		if (!mac8390_initdev(dev, ndev, cardtype))
+		if (mac8390_rsrc_init(dev, fres, cardtype))
 			break;
 	}
-
-	if (!ndev)
+	if (!fres)
 		goto out;
 
-	 ei_local = netdev_priv(dev);
-	 ei_local->msg_enable = mac8390_msg_enable;
+	err = mac8390_initdev(dev, board, cardtype);
+	if (err)
+		goto out;
 
 	err = register_netdev(dev);
 	if (err)
 		goto out;
-	return dev;
+
+	nubus_set_drvdata(board, dev);
+	return 0;
 
 out:
 	free_netdev(dev);
-	return ERR_PTR(err);
+	return err;
+}
+
+static int mac8390_device_remove(struct nubus_board *board)
+{
+	struct net_device *dev = nubus_get_drvdata(board);
+
+	unregister_netdev(dev);
+	free_netdev(dev);
+	return 0;
 }
 
-#ifdef MODULE
+static struct nubus_driver mac8390_driver = {
+	.probe = mac8390_device_probe,
+	.remove = mac8390_device_remove,
+	.driver = {
+		.name = KBUILD_MODNAME,
+		.owner = THIS_MODULE,
+	}
+};
+
 MODULE_AUTHOR("David Huggins-Daines <dhd@debian.org> and others");
 MODULE_DESCRIPTION("Macintosh NS8390-based Nubus Ethernet driver");
 MODULE_LICENSE("GPL");
 
-static struct net_device *dev_mac8390;
-
-int __init init_module(void)
+static int __init mac8390_init(void)
 {
-	dev_mac8390 = mac8390_probe(-1);
-	if (IS_ERR(dev_mac8390)) {
-		pr_warn("mac8390: No card found\n");
-		return PTR_ERR(dev_mac8390);
-	}
-	return 0;
+	return nubus_driver_register(&mac8390_driver);
 }
+module_init(mac8390_init);
 
-void __exit cleanup_module(void)
+static void __exit mac8390_exit(void)
 {
-	unregister_netdev(dev_mac8390);
-	free_netdev(dev_mac8390);
+	nubus_driver_unregister(&mac8390_driver);
 }
-
-#endif /* MODULE */
+module_exit(mac8390_exit);
 
 static const struct net_device_ops mac8390_netdev_ops = {
 	.ndo_open 		= mac8390_open,
@@ -493,9 +481,8 @@ static const struct net_device_ops mac8390_netdev_ops = {
 #endif
 };
 
-static int __init mac8390_initdev(struct net_device *dev,
-				  struct nubus_rsrc *ndev,
-				  enum mac8390_type type)
+static int mac8390_initdev(struct net_device *dev, struct nubus_board *board,
+			   enum mac8390_type type)
 {
 	static u32 fwrd4_offsets[16] = {
 		0,      4,      8,      12,
@@ -546,7 +533,8 @@ static int __init mac8390_initdev(struct net_device *dev,
 	case MAC8390_APPLE:
 		switch (mac8390_testio(dev->mem_start)) {
 		case ACCESS_UNKNOWN:
-			pr_err("Don't know how to access card memory!\n");
+			dev_err(&board->dev,
+				"Don't know how to access card memory\n");
 			return -ENODEV;
 
 		case ACCESS_16:
@@ -612,21 +600,18 @@ static int __init mac8390_initdev(struct net_device *dev,
 		break;
 
 	default:
-		pr_err("Card type %s is unsupported, sorry\n",
-		       ndev->board->name);
+		dev_err(&board->dev, "Unsupported card type\n");
 		return -ENODEV;
 	}
 
 	__NS8390_init(dev, 0);
 
 	/* Good, done, now spit out some messages */
-	pr_info("%s: %s in slot %X (type %s)\n",
-		dev->name, ndev->board->name, ndev->board->slot,
-		cardname[type]);
-	pr_info("MAC %pM IRQ %d, %d KB shared memory at %#lx, %d-bit access.\n",
-		dev->dev_addr, dev->irq,
-		(unsigned int)(dev->mem_end - dev->mem_start) >> 10,
-		dev->mem_start, access_bitmode ? 32 : 16);
+	dev_info(&board->dev, "%s (type %s)\n", board->name, cardname[type]);
+	dev_info(&board->dev, "MAC %pM, IRQ %d, %d KB shared memory at %#lx, %d-bit access.\n",
+		 dev->dev_addr, dev->irq,
+		 (unsigned int)(dev->mem_end - dev->mem_start) >> 10,
+		 dev->mem_start, access_bitmode ? 32 : 16);
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/8390/mcf8390.c b/drivers/net/ethernet/8390/mcf8390.c
index 4bb967bc879e..4ad8031ab669 100644
--- a/drivers/net/ethernet/8390/mcf8390.c
+++ b/drivers/net/ethernet/8390/mcf8390.c
@@ -38,7 +38,6 @@ static const char version[] =
 
 #define NESM_START_PG	0x40	/* First page of TX buffer */
 #define NESM_STOP_PG	0x80	/* Last page +1 of RX ring */
-static u32 mcf8390_msg_enable;
 
 #ifdef NE2000_ODDOFFSET
 /*
@@ -407,7 +406,6 @@ static int mcf8390_init(struct net_device *dev)
 static int mcf8390_probe(struct platform_device *pdev)
 {
 	struct net_device *dev;
-	struct ei_device *ei_local;
 	struct resource *mem, *irq;
 	resource_size_t msize;
 	int ret;
@@ -435,8 +433,6 @@ static int mcf8390_probe(struct platform_device *pdev)
 
 	SET_NETDEV_DEV(dev, &pdev->dev);
 	platform_set_drvdata(pdev, dev);
-	ei_local = netdev_priv(dev);
-	ei_local->msg_enable = mcf8390_msg_enable;
 
 	dev->irq = irq->start;
 	dev->base_addr = mem->start;
diff --git a/drivers/net/ethernet/8390/ne.c b/drivers/net/ethernet/8390/ne.c
index 66f47987e2a2..4cdff6e6af89 100644
--- a/drivers/net/ethernet/8390/ne.c
+++ b/drivers/net/ethernet/8390/ne.c
@@ -485,7 +485,7 @@ static int __init ne_probe1(struct net_device *dev, unsigned long ioaddr)
 		mdelay(10);		/* wait 10ms for interrupt to propagate */
 		outb_p(0x00, ioaddr + EN0_IMR); 		/* Mask it again. */
 		dev->irq = probe_irq_off(cookie);
-		if (netif_msg_probe(ei_local))
+		if (ne_msg_enable & NETIF_MSG_PROBE)
 			pr_cont(" autoirq is %d", dev->irq);
 	} else if (dev->irq == 2)
 		/* Fixup for users that don't know that IRQ 2 is really IRQ 9,
diff --git a/drivers/net/ethernet/8390/pcnet_cs.c b/drivers/net/ethernet/8390/pcnet_cs.c
index bcad4a7fac9f..61e43802b9a5 100644
--- a/drivers/net/ethernet/8390/pcnet_cs.c
+++ b/drivers/net/ethernet/8390/pcnet_cs.c
@@ -66,7 +66,6 @@
 #define PCNET_RDC_TIMEOUT (2*HZ/100)	/* Max wait in jiffies for Tx RDC */
 
 static const char *if_names[] = { "auto", "10baseT", "10base2"};
-static u32 pcnet_msg_enable;
 
 /*====================================================================*/
 
@@ -556,7 +555,6 @@ static int pcnet_config(struct pcmcia_device *link)
     int start_pg, stop_pg, cm_offset;
     int has_shmem = 0;
     struct hw_info *local_hw_info;
-    struct ei_device *ei_local;
 
     dev_dbg(&link->dev, "pcnet_config\n");
 
@@ -606,8 +604,6 @@ static int pcnet_config(struct pcmcia_device *link)
 	mii_phy_probe(dev);
 
     SET_NETDEV_DEV(dev, &link->dev);
-    ei_local = netdev_priv(dev);
-    ei_local->msg_enable = pcnet_msg_enable;
 
     if (register_netdev(dev) != 0) {
 	pr_notice("register_netdev() failed\n");
diff --git a/drivers/net/ethernet/8390/wd.c b/drivers/net/ethernet/8390/wd.c
index 6efa2722f850..fb17c2c7e1dd 100644
--- a/drivers/net/ethernet/8390/wd.c
+++ b/drivers/net/ethernet/8390/wd.c
@@ -299,7 +299,7 @@ static int __init wd_probe1(struct net_device *dev, int ioaddr)
 
 			outb_p(0x00, nic_addr+EN0_IMR);	/* Mask all intrs. again. */
 
-			if (netif_msg_drv(ei_local))
+			if (wd_msg_enable & NETIF_MSG_PROBE)
 				pr_cont(" autoirq is %d", dev->irq);
 			if (dev->irq < 2)
 				dev->irq = word16 ? 10 : 5;
diff --git a/drivers/net/ethernet/8390/zorro8390.c b/drivers/net/ethernet/8390/zorro8390.c
index 6d93956b293b..35a500a21521 100644
--- a/drivers/net/ethernet/8390/zorro8390.c
+++ b/drivers/net/ethernet/8390/zorro8390.c
@@ -44,8 +44,6 @@
 static const char version[] =
 	"8390.c:v1.10cvs 9/23/94 Donald Becker (becker@cesdis.gsfc.nasa.gov)\n";
 
-static u32 zorro8390_msg_enable;
-
 #include "lib8390.c"
 
 #define DRV_NAME	"zorro8390"
@@ -296,7 +294,6 @@ static int zorro8390_init(struct net_device *dev, unsigned long board,
 	int err;
 	unsigned char SA_prom[32];
 	int start_page, stop_page;
-	struct ei_device *ei_local = netdev_priv(dev);
 	static u32 zorro8390_offsets[16] = {
 		0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e,
 		0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e,
@@ -388,8 +385,6 @@ static int zorro8390_init(struct net_device *dev, unsigned long board,
 	dev->netdev_ops = &zorro8390_netdev_ops;
 	__NS8390_init(dev, 0);
 
-	ei_local->msg_enable = zorro8390_msg_enable;
-
 	err = register_netdev(dev);
 	if (err) {
 		free_irq(IRQ_AMIGA_PORTS, dev);
diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c
index 358f7ab77c70..c99e3e845ac0 100644
--- a/drivers/net/ethernet/amd/amd8111e.c
+++ b/drivers/net/ethernet/amd/amd8111e.c
@@ -649,7 +649,7 @@ static void amd8111e_free_ring(struct amd8111e_priv *lp)
 static int amd8111e_tx(struct net_device *dev)
 {
 	struct amd8111e_priv *lp = netdev_priv(dev);
-	int tx_index = lp->tx_complete_idx & TX_RING_DR_MOD_MASK;
+	int tx_index;
 	int status;
 	/* Complete all the transmit packet */
 	while (lp->tx_complete_idx != lp->tx_idx){
diff --git a/drivers/net/ethernet/apple/macmace.c b/drivers/net/ethernet/apple/macmace.c
index f17a160dbff2..137cbb470af2 100644
--- a/drivers/net/ethernet/apple/macmace.c
+++ b/drivers/net/ethernet/apple/macmace.c
@@ -247,8 +247,8 @@ static int mace_probe(struct platform_device *pdev)
 	dev->netdev_ops		= &mace_netdev_ops;
 	dev->watchdog_timeo	= TX_TIMEOUT;
 
-	printk(KERN_INFO "%s: 68K MACE, hardware address %pM\n",
-	       dev->name, dev->dev_addr);
+	pr_info("Onboard MACE, hardware address %pM, chip revision 0x%04X\n",
+		dev->dev_addr, mp->chipid);
 
 	err = register_netdev(dev);
 	if (!err)
@@ -589,7 +589,6 @@ static irqreturn_t mace_interrupt(int irq, void *dev_id)
 			else if (fs & (UFLO|LCOL|RTRY)) {
 				++dev->stats.tx_aborted_errors;
 				if (mb->xmtfs & UFLO) {
-					printk(KERN_ERR "%s: DMA underrun.\n", dev->name);
 					dev->stats.tx_fifo_errors++;
 					mace_txdma_reset(dev);
 				}
@@ -644,10 +643,8 @@ static void mace_dma_rx_frame(struct net_device *dev, struct mace_frame *mf)
 
 	if (frame_status & (RS_OFLO | RS_CLSN | RS_FRAMERR | RS_FCSERR)) {
 		dev->stats.rx_errors++;
-		if (frame_status & RS_OFLO) {
-			printk(KERN_DEBUG "%s: fifo overflow.\n", dev->name);
+		if (frame_status & RS_OFLO)
 			dev->stats.rx_fifo_errors++;
-		}
 		if (frame_status & RS_CLSN)
 			dev->stats.collisions++;
 		if (frame_status & RS_FRAMERR)
@@ -770,18 +767,4 @@ static struct platform_driver mac_mace_driver = {
 	},
 };
 
-static int __init mac_mace_init_module(void)
-{
-	if (!MACH_IS_MAC)
-		return -ENODEV;
-
-	return platform_driver_register(&mac_mace_driver);
-}
-
-static void __exit mac_mace_cleanup_module(void)
-{
-	platform_driver_unregister(&mac_mace_driver);
-}
-
-module_init(mac_mace_init_module);
-module_exit(mac_mace_cleanup_module);
+module_platform_driver(mac_mace_driver);
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/Makefile b/drivers/net/ethernet/aquantia/atlantic/hw_atl/Makefile
new file mode 100644
index 000000000000..805fa28f391a
--- /dev/null
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+# kbuild requires Makefile in a directory to build individual objects
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index e84afcf1ecb5..d09bd43680b3 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -472,8 +472,44 @@ static int macb_mii_probe(struct net_device *dev)
 	struct macb *bp = netdev_priv(dev);
 	struct macb_platform_data *pdata;
 	struct phy_device *phydev;
-	int phy_irq;
-	int ret;
+	struct device_node *np;
+	int phy_irq, ret, i;
+
+	pdata = dev_get_platdata(&bp->pdev->dev);
+	np = bp->pdev->dev.of_node;
+	ret = 0;
+
+	if (np) {
+		if (of_phy_is_fixed_link(np)) {
+			if (of_phy_register_fixed_link(np) < 0) {
+				dev_err(&bp->pdev->dev,
+					"broken fixed-link specification\n");
+				return -ENODEV;
+			}
+			bp->phy_node = of_node_get(np);
+		} else {
+			bp->phy_node = of_parse_phandle(np, "phy-handle", 0);
+			/* fallback to standard phy registration if no
+			 * phy-handle was found nor any phy found during
+			 * dt phy registration
+			 */
+			if (!bp->phy_node && !phy_find_first(bp->mii_bus)) {
+				for (i = 0; i < PHY_MAX_ADDR; i++) {
+					struct phy_device *phydev;
+
+					phydev = mdiobus_scan(bp->mii_bus, i);
+					if (IS_ERR(phydev) &&
+					    PTR_ERR(phydev) != -ENODEV) {
+						ret = PTR_ERR(phydev);
+						break;
+					}
+				}
+
+				if (ret)
+					return -ENODEV;
+			}
+		}
+	}
 
 	if (bp->phy_node) {
 		phydev = of_phy_connect(dev, bp->phy_node,
@@ -488,7 +524,6 @@ static int macb_mii_probe(struct net_device *dev)
 			return -ENXIO;
 		}
 
-		pdata = dev_get_platdata(&bp->pdev->dev);
 		if (pdata) {
 			if (gpio_is_valid(pdata->phy_irq_pin)) {
 				ret = devm_gpio_request(&bp->pdev->dev,
@@ -533,7 +568,7 @@ static int macb_mii_init(struct macb *bp)
 {
 	struct macb_platform_data *pdata;
 	struct device_node *np;
-	int err = -ENXIO, i;
+	int err;
 
 	/* Enable management port */
 	macb_writel(bp, NCR, MACB_BIT(MPE));
@@ -556,43 +591,10 @@ static int macb_mii_init(struct macb *bp)
 	dev_set_drvdata(&bp->dev->dev, bp->mii_bus);
 
 	np = bp->pdev->dev.of_node;
-	if (np) {
-		if (of_phy_is_fixed_link(np)) {
-			if (of_phy_register_fixed_link(np) < 0) {
-				dev_err(&bp->pdev->dev,
-					"broken fixed-link specification\n");
-				goto err_out_unregister_bus;
-			}
-			bp->phy_node = of_node_get(np);
-
-			err = mdiobus_register(bp->mii_bus);
-		} else {
-			/* try dt phy registration */
-			err = of_mdiobus_register(bp->mii_bus, np);
-
-			/* fallback to standard phy registration if no phy were
-			 * found during dt phy registration
-			 */
-			if (!err && !phy_find_first(bp->mii_bus)) {
-				for (i = 0; i < PHY_MAX_ADDR; i++) {
-					struct phy_device *phydev;
-
-					phydev = mdiobus_scan(bp->mii_bus, i);
-					if (IS_ERR(phydev) &&
-					    PTR_ERR(phydev) != -ENODEV) {
-						err = PTR_ERR(phydev);
-						break;
-					}
-				}
 
-				if (err)
-					goto err_out_unregister_bus;
-			}
-		}
+	if (np) {
+		err = of_mdiobus_register(bp->mii_bus, np);
 	} else {
-		for (i = 0; i < PHY_MAX_ADDR; i++)
-			bp->mii_bus->irq[i] = PHY_POLL;
-
 		if (pdata)
 			bp->mii_bus->phy_mask = pdata->phy_mask;
 
@@ -610,10 +612,10 @@ static int macb_mii_init(struct macb *bp)
 
 err_out_unregister_bus:
 	mdiobus_unregister(bp->mii_bus);
-err_out_free_mdiobus:
-	of_node_put(bp->phy_node);
 	if (np && of_phy_is_fixed_link(np))
 		of_phy_deregister_fixed_link(np);
+err_out_free_mdiobus:
+	of_node_put(bp->phy_node);
 	mdiobus_free(bp->mii_bus);
 err_out:
 	return err;
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c
index 32ae63b6f20e..666cf7e9cd09 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_core.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c
@@ -164,15 +164,6 @@ void liquidio_link_ctrl_cmd_completion(void *nctrl_ptr)
 		}
 		break;
 
-	case OCTNET_CMD_CHANGE_MTU:
-		/* If command is successful, change the MTU. */
-		netif_info(lio, probe, lio->netdev, "MTU Changed from %d to %d\n",
-			   netdev->mtu, nctrl->ncmd.s.param1);
-		netdev->mtu = nctrl->ncmd.s.param1;
-		queue_delayed_work(lio->link_status_wq.wq,
-				   &lio->link_status_wq.wk.work, 0);
-		break;
-
 	case OCTNET_CMD_GPIO_ACCESS:
 		netif_info(lio, probe, lio->netdev, "LED Flashing visual identification\n");
 
@@ -571,7 +562,8 @@ liquidio_push_packet(u32 octeon_id __attribute__((unused)),
 
 		napi_gro_receive(napi, skb);
 
-		droq->stats.rx_bytes_received += len;
+		droq->stats.rx_bytes_received += len -
+			rh->r_dh.len * BYTES_PER_DHLEN_UNIT;
 		droq->stats.rx_pkts_received++;
 	} else {
 		recv_buffer_free(skb);
@@ -635,9 +627,7 @@ static int liquidio_napi_poll(struct napi_struct *napi, int budget)
 	iq_no = droq->q_no;
 
 	/* Handle Droq descriptors */
-	work_done = octeon_process_droq_poll_cmd(oct, droq->q_no,
-						 POLL_EVENT_PROCESS_PKTS,
-						 budget);
+	work_done = octeon_droq_process_poll_pkts(oct, droq, budget);
 
 	/* Flush the instruction queue */
 	iq = oct->instr_queue[iq_no];
@@ -668,8 +658,7 @@ static int liquidio_napi_poll(struct napi_struct *napi, int budget)
 		tx_done = 1;
 		napi_complete_done(napi, work_done);
 
-		octeon_process_droq_poll_cmd(droq->oct_dev, droq->q_no,
-					     POLL_EVENT_ENABLE_INTR, 0);
+		octeon_enable_irq(droq->oct_dev, droq->q_no);
 		return 0;
 	}
 
@@ -1080,3 +1069,88 @@ int octeon_setup_interrupt(struct octeon_device *oct, u32 num_ioqs)
 	}
 	return 0;
 }
+
+static void liquidio_change_mtu_completion(struct octeon_device *oct,
+					   u32 status, void *buf)
+{
+	struct octeon_soft_command *sc = (struct octeon_soft_command *)buf;
+	struct liquidio_if_cfg_context *ctx;
+
+	ctx  = (struct liquidio_if_cfg_context *)sc->ctxptr;
+
+	if (status) {
+		dev_err(&oct->pci_dev->dev, "MTU change failed. Status: %llx\n",
+			CVM_CAST64(status));
+		WRITE_ONCE(ctx->cond, LIO_CHANGE_MTU_FAIL);
+	} else {
+		WRITE_ONCE(ctx->cond, LIO_CHANGE_MTU_SUCCESS);
+	}
+
+	/* This barrier is required to be sure that the response has been
+	 * written fully before waking up the handler
+	 */
+	wmb();
+
+	wake_up_interruptible(&ctx->wc);
+}
+
+/**
+ * \brief Net device change_mtu
+ * @param netdev network device
+ */
+int liquidio_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+	struct liquidio_if_cfg_context *ctx;
+	struct octeon_soft_command *sc;
+	union octnet_cmd *ncmd;
+	int ctx_size;
+	int ret = 0;
+
+	ctx_size = sizeof(struct liquidio_if_cfg_context);
+	sc = (struct octeon_soft_command *)
+		octeon_alloc_soft_command(oct, OCTNET_CMD_SIZE, 16, ctx_size);
+
+	ncmd = (union octnet_cmd *)sc->virtdptr;
+	ctx  = (struct liquidio_if_cfg_context *)sc->ctxptr;
+
+	WRITE_ONCE(ctx->cond, 0);
+	ctx->octeon_id = lio_get_device_id(oct);
+	init_waitqueue_head(&ctx->wc);
+
+	ncmd->u64 = 0;
+	ncmd->s.cmd = OCTNET_CMD_CHANGE_MTU;
+	ncmd->s.param1 = new_mtu;
+
+	octeon_swap_8B_data((u64 *)ncmd, (OCTNET_CMD_SIZE >> 3));
+
+	sc->iq_no = lio->linfo.txpciq[0].s.q_no;
+
+	octeon_prepare_soft_command(oct, sc, OPCODE_NIC,
+				    OPCODE_NIC_CMD, 0, 0, 0);
+
+	sc->callback = liquidio_change_mtu_completion;
+	sc->callback_arg = sc;
+	sc->wait_time = 100;
+
+	ret = octeon_send_soft_command(oct, sc);
+	if (ret == IQ_SEND_FAILED) {
+		netif_info(lio, rx_err, lio->netdev, "Failed to change MTU\n");
+		return -EINVAL;
+	}
+	/* Sleep on a wait queue till the cond flag indicates that the
+	 * response arrived or timed-out.
+	 */
+	if (sleep_cond(&ctx->wc, &ctx->cond) == -EINTR ||
+	    ctx->cond == LIO_CHANGE_MTU_FAIL) {
+		octeon_free_soft_command(oct, sc);
+		return -EINVAL;
+	}
+
+	netdev->mtu = new_mtu;
+	lio->mtu = new_mtu;
+
+	octeon_free_soft_command(oct, sc);
+	return 0;
+}
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
index a63ddf07f168..550ac29682a5 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
@@ -232,10 +232,16 @@ static int lio_get_link_ksettings(struct net_device *netdev,
 
 	linfo = &lio->linfo;
 
-	if (linfo->link.s.if_mode == INTERFACE_MODE_XAUI ||
-	    linfo->link.s.if_mode == INTERFACE_MODE_RXAUI ||
-	    linfo->link.s.if_mode == INTERFACE_MODE_XLAUI ||
-	    linfo->link.s.if_mode == INTERFACE_MODE_XFI) {
+	switch (linfo->link.s.phy_type) {
+	case LIO_PHY_PORT_TP:
+		ecmd->base.port = PORT_TP;
+		supported = (SUPPORTED_10000baseT_Full |
+			     SUPPORTED_TP | SUPPORTED_Pause);
+		advertising = (ADVERTISED_10000baseT_Full | ADVERTISED_Pause);
+		ecmd->base.autoneg = AUTONEG_DISABLE;
+		break;
+
+	case LIO_PHY_PORT_FIBRE:
 		ecmd->base.port = PORT_FIBRE;
 
 		if (linfo->link.s.speed == SPEED_10000) {
@@ -245,12 +251,18 @@ static int lio_get_link_ksettings(struct net_device *netdev,
 
 		supported |= SUPPORTED_FIBRE | SUPPORTED_Pause;
 		advertising |= ADVERTISED_Pause;
+		ecmd->base.autoneg = AUTONEG_DISABLE;
+		break;
+	}
+
+	if (linfo->link.s.if_mode == INTERFACE_MODE_XAUI ||
+	    linfo->link.s.if_mode == INTERFACE_MODE_RXAUI ||
+	    linfo->link.s.if_mode == INTERFACE_MODE_XLAUI ||
+	    linfo->link.s.if_mode == INTERFACE_MODE_XFI) {
 		ethtool_convert_legacy_u32_to_link_mode(
 			ecmd->link_modes.supported, supported);
 		ethtool_convert_legacy_u32_to_link_mode(
 			ecmd->link_modes.advertising, advertising);
-		ecmd->base.autoneg = AUTONEG_DISABLE;
-
 	} else {
 		dev_err(&oct->pci_dev->dev, "Unknown link interface reported %d\n",
 			linfo->link.s.if_mode);
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index a5eecd895a82..21280cb66550 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -91,18 +91,9 @@ static int octeon_console_debug_enabled(u32 console)
  */
 #define LIO_SYNC_OCTEON_TIME_INTERVAL_MS 60000
 
-struct liquidio_if_cfg_context {
-	int octeon_id;
-
-	wait_queue_head_t wc;
-
-	int cond;
-};
-
-struct liquidio_if_cfg_resp {
-	u64 rh;
-	struct liquidio_if_cfg_info cfg_info;
-	u64 status;
+struct lio_trusted_vf_ctx {
+	struct completion complete;
+	int status;
 };
 
 struct liquidio_rx_ctl_context {
@@ -841,8 +832,12 @@ static void octnet_link_status_change(struct work_struct *work)
 	struct cavium_wk *wk = (struct cavium_wk *)work;
 	struct lio *lio = (struct lio *)wk->ctxptr;
 
+	/* lio->linfo.link.s.mtu always contains max MTU of the lio interface.
+	 * this API is invoked only when new max-MTU of the interface is
+	 * less than current MTU.
+	 */
 	rtnl_lock();
-	call_netdevice_notifiers(NETDEV_CHANGEMTU, lio->netdev);
+	dev_set_mtu(lio->netdev, lio->linfo.link.s.mtu);
 	rtnl_unlock();
 }
 
@@ -891,7 +886,11 @@ static inline void update_link_status(struct net_device *netdev,
 {
 	struct lio *lio = GET_LIO(netdev);
 	int changed = (lio->linfo.link.u64 != ls->u64);
+	int current_max_mtu = lio->linfo.link.s.mtu;
+	struct octeon_device *oct = lio->oct_dev;
 
+	dev_dbg(&oct->pci_dev->dev, "%s: lio->linfo.link.u64=%llx, ls->u64=%llx\n",
+		__func__, lio->linfo.link.u64, ls->u64);
 	lio->linfo.link.u64 = ls->u64;
 
 	if ((lio->intf_open) && (changed)) {
@@ -899,12 +898,26 @@ static inline void update_link_status(struct net_device *netdev,
 		lio->link_changes++;
 
 		if (lio->linfo.link.s.link_up) {
+			dev_dbg(&oct->pci_dev->dev, "%s: link_up", __func__);
 			netif_carrier_on(netdev);
 			txqs_wake(netdev);
 		} else {
+			dev_dbg(&oct->pci_dev->dev, "%s: link_off", __func__);
 			netif_carrier_off(netdev);
 			stop_txq(netdev);
 		}
+		if (lio->linfo.link.s.mtu != current_max_mtu) {
+			netif_info(lio, probe, lio->netdev, "Max MTU changed from %d to %d\n",
+				   current_max_mtu, lio->linfo.link.s.mtu);
+			netdev->max_mtu = lio->linfo.link.s.mtu;
+		}
+		if (lio->linfo.link.s.mtu < netdev->mtu) {
+			dev_warn(&oct->pci_dev->dev,
+				 "Current MTU is higher than new max MTU; Reducing the current mtu from %d to %d\n",
+				     netdev->mtu, lio->linfo.link.s.mtu);
+			queue_delayed_work(lio->link_status_wq.wq,
+					   &lio->link_status_wq.wk.work, 0);
+		}
 	}
 }
 
@@ -2449,38 +2462,6 @@ static struct net_device_stats *liquidio_get_stats(struct net_device *netdev)
 }
 
 /**
- * \brief Net device change_mtu
- * @param netdev network device
- */
-static int liquidio_change_mtu(struct net_device *netdev, int new_mtu)
-{
-	struct lio *lio = GET_LIO(netdev);
-	struct octeon_device *oct = lio->oct_dev;
-	struct octnic_ctrl_pkt nctrl;
-	int ret = 0;
-
-	memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
-
-	nctrl.ncmd.u64 = 0;
-	nctrl.ncmd.s.cmd = OCTNET_CMD_CHANGE_MTU;
-	nctrl.ncmd.s.param1 = new_mtu;
-	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
-	nctrl.wait_time = 100;
-	nctrl.netpndev = (u64)netdev;
-	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
-
-	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
-	if (ret < 0) {
-		dev_err(&oct->pci_dev->dev, "Failed to set MTU\n");
-		return -1;
-	}
-
-	lio->mtu = new_mtu;
-
-	return 0;
-}
-
-/**
  * \brief Handler for SIOCSHWTSTAMP ioctl
  * @param netdev network device
  * @param ifr interface request
@@ -3289,10 +3270,120 @@ static int liquidio_get_vf_config(struct net_device *netdev, int vfidx,
 	ether_addr_copy(&ivi->mac[0], macaddr);
 	ivi->vlan = oct->sriov_info.vf_vlantci[vfidx] & VLAN_VID_MASK;
 	ivi->qos = oct->sriov_info.vf_vlantci[vfidx] >> VLAN_PRIO_SHIFT;
+	if (oct->sriov_info.trusted_vf.active &&
+	    oct->sriov_info.trusted_vf.id == vfidx)
+		ivi->trusted = true;
+	else
+		ivi->trusted = false;
 	ivi->linkstate = oct->sriov_info.vf_linkstate[vfidx];
 	return 0;
 }
 
+static void trusted_vf_callback(struct octeon_device *oct_dev,
+				u32 status, void *ptr)
+{
+	struct octeon_soft_command *sc = (struct octeon_soft_command *)ptr;
+	struct lio_trusted_vf_ctx *ctx;
+
+	ctx = (struct lio_trusted_vf_ctx *)sc->ctxptr;
+	ctx->status = status;
+
+	complete(&ctx->complete);
+}
+
+static int liquidio_send_vf_trust_cmd(struct lio *lio, int vfidx, bool trusted)
+{
+	struct octeon_device *oct = lio->oct_dev;
+	struct lio_trusted_vf_ctx *ctx;
+	struct octeon_soft_command *sc;
+	int ctx_size, retval;
+
+	ctx_size = sizeof(struct lio_trusted_vf_ctx);
+	sc = octeon_alloc_soft_command(oct, 0, 0, ctx_size);
+
+	ctx  = (struct lio_trusted_vf_ctx *)sc->ctxptr;
+	init_completion(&ctx->complete);
+
+	sc->iq_no = lio->linfo.txpciq[0].s.q_no;
+
+	/* vfidx is 0 based, but vf_num (param1) is 1 based */
+	octeon_prepare_soft_command(oct, sc, OPCODE_NIC,
+				    OPCODE_NIC_SET_TRUSTED_VF, 0, vfidx + 1,
+				    trusted);
+
+	sc->callback = trusted_vf_callback;
+	sc->callback_arg = sc;
+	sc->wait_time = 1000;
+
+	retval = octeon_send_soft_command(oct, sc);
+	if (retval == IQ_SEND_FAILED) {
+		retval = -1;
+	} else {
+		/* Wait for response or timeout */
+		if (wait_for_completion_timeout(&ctx->complete,
+						msecs_to_jiffies(2000)))
+			retval = ctx->status;
+		else
+			retval = -1;
+	}
+
+	octeon_free_soft_command(oct, sc);
+
+	return retval;
+}
+
+static int liquidio_set_vf_trust(struct net_device *netdev, int vfidx,
+				 bool setting)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+
+	if (strcmp(oct->fw_info.liquidio_firmware_version, "1.7.1") < 0) {
+		/* trusted vf is not supported by firmware older than 1.7.1 */
+		return -EOPNOTSUPP;
+	}
+
+	if (vfidx < 0 || vfidx >= oct->sriov_info.num_vfs_alloced) {
+		netif_info(lio, drv, lio->netdev, "Invalid vfidx %d\n", vfidx);
+		return -EINVAL;
+	}
+
+	if (setting) {
+		/* Set */
+
+		if (oct->sriov_info.trusted_vf.active &&
+		    oct->sriov_info.trusted_vf.id == vfidx)
+			return 0;
+
+		if (oct->sriov_info.trusted_vf.active) {
+			netif_info(lio, drv, lio->netdev, "More than one trusted VF is not allowed\n");
+			return -EPERM;
+		}
+	} else {
+		/* Clear */
+
+		if (!oct->sriov_info.trusted_vf.active)
+			return 0;
+	}
+
+	if (!liquidio_send_vf_trust_cmd(lio, vfidx, setting)) {
+		if (setting) {
+			oct->sriov_info.trusted_vf.id = vfidx;
+			oct->sriov_info.trusted_vf.active = true;
+		} else {
+			oct->sriov_info.trusted_vf.active = false;
+		}
+
+		netif_info(lio, drv, lio->netdev, "VF %u is %strusted\n", vfidx,
+			   setting ? "" : "not ");
+	} else {
+		netif_info(lio, drv, lio->netdev, "Failed to set VF trusted\n");
+		return -1;
+	}
+
+	return 0;
+}
+
 static int liquidio_set_vf_link_state(struct net_device *netdev, int vfidx,
 				      int linkstate)
 {
@@ -3423,6 +3514,7 @@ static const struct net_device_ops lionetdevops = {
 	.ndo_set_vf_mac		= liquidio_set_vf_mac,
 	.ndo_set_vf_vlan	= liquidio_set_vf_vlan,
 	.ndo_get_vf_config	= liquidio_get_vf_config,
+	.ndo_set_vf_trust	= liquidio_set_vf_trust,
 	.ndo_set_vf_link_state  = liquidio_set_vf_link_state,
 };
 
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index fd70a4844e2d..3342d64b7081 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -40,20 +40,6 @@ MODULE_PARM_DESC(debug, "NETIF_MSG debug bits");
 
 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
 
-struct liquidio_if_cfg_context {
-	int octeon_id;
-
-	wait_queue_head_t wc;
-
-	int cond;
-};
-
-struct liquidio_if_cfg_resp {
-	u64 rh;
-	struct liquidio_if_cfg_info cfg_info;
-	u64 status;
-};
-
 struct liquidio_rx_ctl_context {
 	int octeon_id;
 
@@ -564,8 +550,12 @@ static void octnet_link_status_change(struct work_struct *work)
 	struct cavium_wk *wk = (struct cavium_wk *)work;
 	struct lio *lio = (struct lio *)wk->ctxptr;
 
+	/* lio->linfo.link.s.mtu always contains max MTU of the lio interface.
+	 * this API is invoked only when new max-MTU of the interface is
+	 * less than current MTU.
+	 */
 	rtnl_lock();
-	call_netdevice_notifiers(NETDEV_CHANGEMTU, lio->netdev);
+	dev_set_mtu(lio->netdev, lio->linfo.link.s.mtu);
 	rtnl_unlock();
 }
 
@@ -613,6 +603,7 @@ static void update_link_status(struct net_device *netdev,
 			       union oct_link_status *ls)
 {
 	struct lio *lio = GET_LIO(netdev);
+	int current_max_mtu = lio->linfo.link.s.mtu;
 	struct octeon_device *oct = lio->oct_dev;
 
 	if ((lio->intf_open) && (lio->linfo.link.u64 != ls->u64)) {
@@ -629,18 +620,17 @@ static void update_link_status(struct net_device *netdev,
 			txqs_stop(netdev);
 		}
 
-		if (lio->linfo.link.s.mtu != netdev->max_mtu) {
-			dev_info(&oct->pci_dev->dev, "Max MTU Changed from %d to %d\n",
-				 netdev->max_mtu, lio->linfo.link.s.mtu);
+		if (lio->linfo.link.s.mtu != current_max_mtu) {
+			dev_info(&oct->pci_dev->dev,
+				 "Max MTU Changed from %d to %d\n",
+				 current_max_mtu, lio->linfo.link.s.mtu);
 			netdev->max_mtu = lio->linfo.link.s.mtu;
 		}
 
 		if (lio->linfo.link.s.mtu < netdev->mtu) {
 			dev_warn(&oct->pci_dev->dev,
-				 "PF has changed the MTU for gmx port. Reducing the mtu from %d to %d\n",
+				 "Current MTU is higher than new max MTU; Reducing the current mtu from %d to %d\n",
 				 netdev->mtu, lio->linfo.link.s.mtu);
-			lio->mtu = lio->linfo.link.s.mtu;
-			netdev->mtu = lio->linfo.link.s.mtu;
 			queue_delayed_work(lio->link_status_wq.wq,
 					   &lio->link_status_wq.wk.work, 0);
 		}
@@ -1538,41 +1528,6 @@ static struct net_device_stats *liquidio_get_stats(struct net_device *netdev)
 }
 
 /**
- * \brief Net device change_mtu
- * @param netdev network device
- */
-static int liquidio_change_mtu(struct net_device *netdev, int new_mtu)
-{
-	struct octnic_ctrl_pkt nctrl;
-	struct octeon_device *oct;
-	struct lio *lio;
-	int ret = 0;
-
-	lio = GET_LIO(netdev);
-	oct = lio->oct_dev;
-
-	memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
-
-	nctrl.ncmd.u64 = 0;
-	nctrl.ncmd.s.cmd = OCTNET_CMD_CHANGE_MTU;
-	nctrl.ncmd.s.param1 = new_mtu;
-	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
-	nctrl.wait_time = LIO_CMD_WAIT_TM;
-	nctrl.netpndev = (u64)netdev;
-	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
-
-	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
-	if (ret < 0) {
-		dev_err(&oct->pci_dev->dev, "Failed to set MTU\n");
-		return -EIO;
-	}
-
-	lio->mtu = new_mtu;
-
-	return 0;
-}
-
-/**
  * \brief Handler for SIOCSHWTSTAMP ioctl
  * @param netdev network device
  * @param ifr interface request
diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
index 522dcc4dcff7..82a783db5baf 100644
--- a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
+++ b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
@@ -84,6 +84,7 @@ enum octeon_tag_type {
 #define OPCODE_NIC_IF_CFG              0x09
 #define OPCODE_NIC_VF_DRV_NOTICE       0x0A
 #define OPCODE_NIC_INTRMOD_PARAMS      0x0B
+#define OPCODE_NIC_SET_TRUSTED_VF	0x13
 #define OPCODE_NIC_SYNC_OCTEON_TIME	0x14
 #define VF_DRV_LOADED                  1
 #define VF_DRV_REMOVED                -1
@@ -192,7 +193,8 @@ static inline void add_sg_size(struct octeon_sg_entry *sg_entry,
 
 #define   OCTNET_MAX_FRM_SIZE        (16000 + OCTNET_FRM_HEADER_SIZE)
 
-#define   OCTNET_DEFAULT_FRM_SIZE    (1500 + OCTNET_FRM_HEADER_SIZE)
+#define   OCTNET_DEFAULT_MTU         (1500)
+#define   OCTNET_DEFAULT_FRM_SIZE  (OCTNET_DEFAULT_MTU + OCTNET_FRM_HEADER_SIZE)
 
 /** NIC Commands are sent using this Octeon Input Queue */
 #define   OCTNET_CMD_Q                0
@@ -675,9 +677,11 @@ union oct_link_status {
 		u64 if_mode:5;
 		u64 pause:1;
 		u64 flashing:1;
-		u64 reserved:15;
+		u64 phy_type:5;
+		u64 reserved:10;
 #else
-		u64 reserved:15;
+		u64 reserved:10;
+		u64 phy_type:5;
 		u64 flashing:1;
 		u64 pause:1;
 		u64 if_mode:5;
@@ -690,6 +694,12 @@ union oct_link_status {
 	} s;
 };
 
+enum lio_phy_type {
+	LIO_PHY_PORT_TP = 0x0,
+	LIO_PHY_PORT_FIBRE = 0x1,
+	LIO_PHY_PORT_UNKNOWN,
+};
+
 /** The txpciq info passed to host from the firmware */
 
 union oct_txpciq {
@@ -909,6 +919,12 @@ union oct_nic_if_cfg {
 	} s;
 };
 
+struct lio_trusted_vf {
+	uint64_t active: 1;
+	uint64_t id : 8;
+	uint64_t reserved: 55;
+};
+
 struct lio_time {
 	s64 sec;   /* seconds */
 	s64 nsec;  /* nanoseconds */
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h
index 63b0c758a0a6..91937cc5c1d7 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h
@@ -370,6 +370,8 @@ struct octeon_sriov_info {
 
 	u32	sriov_enabled;
 
+	struct lio_trusted_vf	trusted_vf;
+
 	/*lookup table that maps DPI ring number to VF pci_dev struct pointer*/
 	struct pci_dev *dpiring_to_vfpcidev_lut[MAX_POSSIBLE_VFS];
 
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
index 3461d65ff4eb..f044718cea52 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
@@ -788,7 +788,7 @@ octeon_droq_process_packets(struct octeon_device *oct,
  * called before calling this routine.
  */
 
-static int
+int
 octeon_droq_process_poll_pkts(struct octeon_device *oct,
 			      struct octeon_droq *droq, u32 budget)
 {
@@ -835,71 +835,46 @@ octeon_droq_process_poll_pkts(struct octeon_device *oct,
 	return total_pkts_processed;
 }
 
+/* Enable Pkt Interrupt */
 int
-octeon_process_droq_poll_cmd(struct octeon_device *oct, u32 q_no, int cmd,
-			     u32 arg)
+octeon_enable_irq(struct octeon_device *oct, u32 q_no)
 {
-	struct octeon_droq *droq;
-
-	droq = oct->droq[q_no];
+	switch (oct->chip_id) {
+	case OCTEON_CN66XX:
+	case OCTEON_CN68XX: {
+		struct octeon_cn6xxx *cn6xxx =
+			(struct octeon_cn6xxx *)oct->chip;
+		unsigned long flags;
+		u32 value;
 
-	if (cmd == POLL_EVENT_PROCESS_PKTS)
-		return octeon_droq_process_poll_pkts(oct, droq, arg);
+		spin_lock_irqsave
+			(&cn6xxx->lock_for_droq_int_enb_reg, flags);
+		value = octeon_read_csr(oct, CN6XXX_SLI_PKT_TIME_INT_ENB);
+		value |= (1 << q_no);
+		octeon_write_csr(oct, CN6XXX_SLI_PKT_TIME_INT_ENB, value);
+		value = octeon_read_csr(oct, CN6XXX_SLI_PKT_CNT_INT_ENB);
+		value |= (1 << q_no);
+		octeon_write_csr(oct, CN6XXX_SLI_PKT_CNT_INT_ENB, value);
 
-	if (cmd == POLL_EVENT_PENDING_PKTS) {
-		u32 pkt_cnt = atomic_read(&droq->pkts_pending);
+		/* don't bother flushing the enables */
 
-		return  octeon_droq_process_packets(oct, droq, pkt_cnt);
+		spin_unlock_irqrestore
+			(&cn6xxx->lock_for_droq_int_enb_reg, flags);
 	}
-
-	if (cmd == POLL_EVENT_ENABLE_INTR) {
-		u32 value;
-		unsigned long flags;
-
-		/* Enable Pkt Interrupt */
-		switch (oct->chip_id) {
-		case OCTEON_CN66XX:
-		case OCTEON_CN68XX: {
-			struct octeon_cn6xxx *cn6xxx =
-				(struct octeon_cn6xxx *)oct->chip;
-			spin_lock_irqsave
-				(&cn6xxx->lock_for_droq_int_enb_reg, flags);
-			value =
-				octeon_read_csr(oct,
-						CN6XXX_SLI_PKT_TIME_INT_ENB);
-			value |= (1 << q_no);
-			octeon_write_csr(oct,
-					 CN6XXX_SLI_PKT_TIME_INT_ENB,
-					 value);
-			value =
-				octeon_read_csr(oct,
-						CN6XXX_SLI_PKT_CNT_INT_ENB);
-			value |= (1 << q_no);
-			octeon_write_csr(oct,
-					 CN6XXX_SLI_PKT_CNT_INT_ENB,
-					 value);
-
-			/* don't bother flushing the enables */
-
-			spin_unlock_irqrestore
-				(&cn6xxx->lock_for_droq_int_enb_reg, flags);
-			return 0;
-		}
 		break;
-		case OCTEON_CN23XX_PF_VID: {
-			lio_enable_irq(oct->droq[q_no], oct->instr_queue[q_no]);
-		}
+	case OCTEON_CN23XX_PF_VID:
+		lio_enable_irq(oct->droq[q_no], oct->instr_queue[q_no]);
 		break;
 
-		case OCTEON_CN23XX_VF_VID:
-			lio_enable_irq(oct->droq[q_no], oct->instr_queue[q_no]);
+	case OCTEON_CN23XX_VF_VID:
+		lio_enable_irq(oct->droq[q_no], oct->instr_queue[q_no]);
 		break;
-		}
-		return 0;
+	default:
+		dev_err(&oct->pci_dev->dev, "%s Unknown Chip\n", __func__);
+		return 1;
 	}
 
-	dev_err(&oct->pci_dev->dev, "%s Unknown command: %d\n", __func__, cmd);
-	return -EINVAL;
+	return 0;
 }
 
 int octeon_register_droq_ops(struct octeon_device *oct, u32 q_no,
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
index 815a9f56fd59..f28f262d4ab6 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
@@ -123,11 +123,6 @@ struct oct_droq_stats {
 
 };
 
-#define POLL_EVENT_INTR_ARRIVED  1
-#define POLL_EVENT_PROCESS_PKTS  2
-#define POLL_EVENT_PENDING_PKTS  3
-#define POLL_EVENT_ENABLE_INTR   4
-
 /* The maximum number of buffers that can be dispatched from the
  * output/dma queue. Set to 64 assuming 1K buffers in DROQ and the fact that
  * max packet size from DROQ is 64K.
@@ -414,8 +409,10 @@ int octeon_droq_process_packets(struct octeon_device *oct,
 				struct octeon_droq *droq,
 				u32 budget);
 
-int octeon_process_droq_poll_cmd(struct octeon_device *oct, u32 q_no,
-				 int cmd, u32 arg);
+int octeon_droq_process_poll_pkts(struct octeon_device *oct,
+				  struct octeon_droq *droq, u32 budget);
+
+int octeon_enable_irq(struct octeon_device *oct, u32 q_no);
 
 void octeon_droq_check_oom(struct octeon_droq *droq);
 
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c b/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c
index 57af7df74ced..28e74ee23ff8 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c
@@ -87,7 +87,7 @@ int octeon_mbox_read(struct octeon_mbox *mbox)
 	}
 
 	if (mbox->state & OCTEON_MBOX_STATE_REQUEST_RECEIVING) {
-		if (mbox->mbox_req.recv_len < msg.s.len) {
+		if (mbox->mbox_req.recv_len < mbox->mbox_req.msg.s.len) {
 			ret = 0;
 		} else {
 			mbox->state &= ~OCTEON_MBOX_STATE_REQUEST_RECEIVING;
@@ -96,7 +96,8 @@ int octeon_mbox_read(struct octeon_mbox *mbox)
 		}
 	} else {
 		if (mbox->state & OCTEON_MBOX_STATE_RESPONSE_RECEIVING) {
-			if (mbox->mbox_resp.recv_len < msg.s.len) {
+			if (mbox->mbox_resp.recv_len <
+			    mbox->mbox_resp.msg.s.len) {
 				ret = 0;
 			} else {
 				mbox->state &=
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
index f2d1a076a038..76803a569794 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
@@ -35,6 +35,18 @@
 #define   LIO_IFSTATE_RX_TIMESTAMP_ENABLED 0x08
 #define   LIO_IFSTATE_RESETTING		   0x10
 
+struct liquidio_if_cfg_context {
+	u32 octeon_id;
+	wait_queue_head_t wc;
+	int cond;
+};
+
+struct liquidio_if_cfg_resp {
+	u64 rh;
+	struct liquidio_if_cfg_info cfg_info;
+	u64 status;
+};
+
 struct oct_nic_stats_resp {
 	u64     rh;
 	struct oct_link_stats stats;
@@ -184,6 +196,14 @@ int octeon_setup_interrupt(struct octeon_device *oct, u32 num_ioqs);
  */
 void liquidio_set_ethtool_ops(struct net_device *netdev);
 
+/**
+ * \brief Net device change_mtu
+ * @param netdev network device
+ */
+int liquidio_change_mtu(struct net_device *netdev, int new_mtu);
+#define LIO_CHANGE_MTU_SUCCESS 1
+#define LIO_CHANGE_MTU_FAIL    2
+
 #define SKB_ADJ_MASK  0x3F
 #define SKB_ADJ       (SKB_ADJ_MASK + 1)
 
diff --git a/drivers/net/ethernet/cavium/liquidio/request_manager.c b/drivers/net/ethernet/cavium/liquidio/request_manager.c
index e07d2093b971..2766af05b89e 100644
--- a/drivers/net/ethernet/cavium/liquidio/request_manager.c
+++ b/drivers/net/ethernet/cavium/liquidio/request_manager.c
@@ -366,6 +366,7 @@ int
 lio_process_iq_request_list(struct octeon_device *oct,
 			    struct octeon_instr_queue *iq, u32 napi_budget)
 {
+	struct cavium_wq *cwq = &oct->dma_comp_wq;
 	int reqtype;
 	void *buf;
 	u32 old = iq->flush_index;
@@ -450,6 +451,10 @@ lio_process_iq_request_list(struct octeon_device *oct,
 						   bytes_compl);
 	iq->flush_index = old;
 
+	if (atomic_read(&oct->response_list
+			[OCTEON_ORDERED_SC_LIST].pending_req_count))
+		queue_delayed_work(cwq->wq, &cwq->wk.work, msecs_to_jiffies(1));
+
 	return inst_count;
 }
 
diff --git a/drivers/net/ethernet/cavium/liquidio/response_manager.c b/drivers/net/ethernet/cavium/liquidio/response_manager.c
index 3d691c69f74d..fe5b53700576 100644
--- a/drivers/net/ethernet/cavium/liquidio/response_manager.c
+++ b/drivers/net/ethernet/cavium/liquidio/response_manager.c
@@ -49,7 +49,6 @@ int octeon_setup_response_list(struct octeon_device *oct)
 	INIT_DELAYED_WORK(&cwq->wk.work, oct_poll_req_completion);
 	cwq->wk.ctxptr = oct;
 	oct->cmd_resp_state = OCT_DRV_ONLINE;
-	queue_delayed_work(cwq->wq, &cwq->wk.work, msecs_to_jiffies(50));
 
 	return ret;
 }
@@ -164,5 +163,8 @@ static void oct_poll_req_completion(struct work_struct *work)
 	struct cavium_wq *cwq = &oct->dma_comp_wq;
 
 	lio_process_ordered_list(oct, 0);
-	queue_delayed_work(cwq->wq, &cwq->wk.work, msecs_to_jiffies(50));
+
+	if (atomic_read(&oct->response_list
+			[OCTEON_ORDERED_SC_LIST].pending_req_count))
+		queue_delayed_work(cwq->wq, &cwq->wk.work, msecs_to_jiffies(1));
 }
diff --git a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c b/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c
index a89721fad633..080918af773c 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c
@@ -681,18 +681,18 @@ int t3_seeprom_wp(struct adapter *adapter, int enable)
 	return t3_seeprom_write(adapter, EEPROM_STAT_ADDR, enable ? 0xc : 0);
 }
 
-static int vpdstrtouint(char *s, int len, unsigned int base, unsigned int *val)
+static int vpdstrtouint(char *s, u8 len, unsigned int base, unsigned int *val)
 {
-	char tok[len + 1];
+	char tok[256];
 
 	memcpy(tok, s, len);
 	tok[len] = 0;
 	return kstrtouint(strim(tok), base, val);
 }
 
-static int vpdstrtou16(char *s, int len, unsigned int base, u16 *val)
+static int vpdstrtou16(char *s, u8 len, unsigned int base, u16 *val)
 {
-	char tok[len + 1];
+	char tok[256];
 
 	memcpy(tok, s, len);
 	tok[len] = 0;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/Makefile b/drivers/net/ethernet/chelsio/cxgb4/Makefile
index 53b6a02c778e..bea6a059a8f1 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/Makefile
+++ b/drivers/net/ethernet/chelsio/cxgb4/Makefile
@@ -6,7 +6,7 @@
 obj-$(CONFIG_CHELSIO_T4) += cxgb4.o
 
 cxgb4-objs := cxgb4_main.o l2t.o smt.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o \
-	      cxgb4_uld.o sched.o cxgb4_filter.o cxgb4_tc_u32.o \
+	      cxgb4_uld.o srq.o sched.o cxgb4_filter.o cxgb4_tc_u32.o \
 	      cxgb4_ptp.o cxgb4_tc_flower.o cxgb4_cudbg.o \
 	      cudbg_common.o cudbg_lib.o cudbg_zlib.o
 cxgb4-$(CONFIG_CHELSIO_T4_DCB) +=  cxgb4_dcb.o
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
index 00a1d2d13169..9da6f57901a9 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
@@ -878,6 +878,86 @@ static int cudbg_get_payload_range(struct adapter *padap, u8 mem_type,
 				      &payload->start, &payload->end);
 }
 
+static int cudbg_memory_read(struct cudbg_init *pdbg_init, int win,
+			     int mtype, u32 addr, u32 len, void *hbuf)
+{
+	u32 win_pf, memoffset, mem_aperture, mem_base;
+	struct adapter *adap = pdbg_init->adap;
+	u32 pos, offset, resid;
+	u32 *res_buf;
+	u64 *buf;
+	int ret;
+
+	/* Argument sanity checks ...
+	 */
+	if (addr & 0x3 || (uintptr_t)hbuf & 0x3)
+		return -EINVAL;
+
+	buf = (u64 *)hbuf;
+
+	/* Try to do 64-bit reads.  Residual will be handled later. */
+	resid = len & 0x7;
+	len -= resid;
+
+	ret = t4_memory_rw_init(adap, win, mtype, &memoffset, &mem_base,
+				&mem_aperture);
+	if (ret)
+		return ret;
+
+	addr = addr + memoffset;
+	win_pf = is_t4(adap->params.chip) ? 0 : PFNUM_V(adap->pf);
+
+	pos = addr & ~(mem_aperture - 1);
+	offset = addr - pos;
+
+	/* Set up initial PCI-E Memory Window to cover the start of our
+	 * transfer.
+	 */
+	t4_memory_update_win(adap, win, pos | win_pf);
+
+	/* Transfer data from the adapter */
+	while (len > 0) {
+		*buf++ = le64_to_cpu((__force __le64)
+				     t4_read_reg64(adap, mem_base + offset));
+		offset += sizeof(u64);
+		len -= sizeof(u64);
+
+		/* If we've reached the end of our current window aperture,
+		 * move the PCI-E Memory Window on to the next.
+		 */
+		if (offset == mem_aperture) {
+			pos += mem_aperture;
+			offset = 0;
+			t4_memory_update_win(adap, win, pos | win_pf);
+		}
+	}
+
+	res_buf = (u32 *)buf;
+	/* Read residual in 32-bit multiples */
+	while (resid > sizeof(u32)) {
+		*res_buf++ = le32_to_cpu((__force __le32)
+					 t4_read_reg(adap, mem_base + offset));
+		offset += sizeof(u32);
+		resid -= sizeof(u32);
+
+		/* If we've reached the end of our current window aperture,
+		 * move the PCI-E Memory Window on to the next.
+		 */
+		if (offset == mem_aperture) {
+			pos += mem_aperture;
+			offset = 0;
+			t4_memory_update_win(adap, win, pos | win_pf);
+		}
+	}
+
+	/* Transfer residual < 32-bits */
+	if (resid)
+		t4_memory_rw_residual(adap, resid, mem_base + offset,
+				      (u8 *)res_buf, T4_MEMORY_READ);
+
+	return 0;
+}
+
 #define CUDBG_YIELD_ITERATION 256
 
 static int cudbg_read_fw_mem(struct cudbg_init *pdbg_init,
@@ -937,10 +1017,8 @@ static int cudbg_read_fw_mem(struct cudbg_init *pdbg_init,
 				goto skip_read;
 
 		spin_lock(&padap->win0_lock);
-		rc = t4_memory_rw(padap, MEMWIN_NIC, mem_type,
-				  bytes_read, bytes,
-				  (__be32 *)temp_buff.data,
-				  1);
+		rc = cudbg_memory_read(pdbg_init, MEMWIN_NIC, mem_type,
+				       bytes_read, bytes, temp_buff.data);
 		spin_unlock(&padap->win0_lock);
 		if (rc) {
 			cudbg_err->sys_err = rc;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 9040e13ce4b7..688f95440af2 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -390,6 +390,8 @@ struct adapter_params {
 	 * used by the Port
 	 */
 	u8 mps_bg_map[MAX_NPORTS];	/* MPS Buffer Group Map */
+	bool write_w_imm_support;       /* FW supports WRITE_WITH_IMMEDIATE */
+	bool write_cmpl_support;        /* FW supports WRITE_CMPL */
 };
 
 /* State needed to monitor the forward progress of SGE Ingress DMA activities
@@ -831,6 +833,16 @@ struct vf_info {
 	u16 vlan;
 };
 
+enum {
+	HMA_DMA_MAPPED_FLAG = 1
+};
+
+struct hma_data {
+	unsigned char flags;
+	struct sg_table *sgt;
+	dma_addr_t *phy_addr;	/* physical address of the page */
+};
+
 struct mbox_list {
 	struct list_head list;
 };
@@ -907,6 +919,7 @@ struct adapter {
 	struct work_struct tid_release_task;
 	struct work_struct db_full_task;
 	struct work_struct db_drop_task;
+	struct work_struct fatal_err_notify_task;
 	bool tid_release_task_busy;
 
 	/* lock for mailbox cmd list */
@@ -946,6 +959,11 @@ struct adapter {
 
 	/* Ethtool Dump */
 	struct ethtool_dump eth_dump;
+
+	/* HMA */
+	struct hma_data hma;
+
+	struct srq_data *srq;
 };
 
 /* Support for "sched-class" command to allow a TX Scheduling Class to be
@@ -1488,6 +1506,11 @@ u32 t4_read_pcie_cfg4(struct adapter *adap, int reg);
 u32 t4_get_util_window(struct adapter *adap);
 void t4_setup_memwin(struct adapter *adap, u32 memwin_base, u32 window);
 
+int t4_memory_rw_init(struct adapter *adap, int win, int mtype, u32 *mem_off,
+		      u32 *mem_base, u32 *mem_aperture);
+void t4_memory_update_win(struct adapter *adap, int win, u32 addr);
+void t4_memory_rw_residual(struct adapter *adap, u32 off, u32 addr, u8 *buf,
+			   int dir);
 #define T4_MEMORY_WRITE	0
 #define T4_MEMORY_READ	1
 int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, u32 len,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index 2822bbff73e8..de2ba86eccfd 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -2617,7 +2617,7 @@ int mem_open(struct inode *inode, struct file *file)
 
 	file->private_data = inode->i_private;
 
-	mem = (uintptr_t)file->private_data & 0x3;
+	mem = (uintptr_t)file->private_data & 0x7;
 	adap = file->private_data - mem;
 
 	(void)t4_fwcache(adap, FW_PARAM_DEV_FWCACHE_FLUSH);
@@ -2630,7 +2630,7 @@ static ssize_t mem_read(struct file *file, char __user *buf, size_t count,
 {
 	loff_t pos = *ppos;
 	loff_t avail = file_inode(file)->i_size;
-	unsigned int mem = (uintptr_t)file->private_data & 3;
+	unsigned int mem = (uintptr_t)file->private_data & 0x7;
 	struct adapter *adap = file->private_data - mem;
 	__be32 *data;
 	int ret;
@@ -3042,6 +3042,12 @@ int t4_setup_debugfs(struct adapter *adap)
 			add_debugfs_mem(adap, "mc", MEM_MC,
 					EXT_MEM_SIZE_G(size));
 		}
+
+		if (i & HMA_MUX_F) {
+			size = t4_read_reg(adap, MA_EXT_MEMORY1_BAR_A);
+			add_debugfs_mem(adap, "hma", MEM_HMA,
+					EXT_MEM1_SIZE_G(size));
+		}
 	}
 
 	de = debugfs_create_file_size("flash", S_IRUSR, adap->debugfs_root, adap,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
index 7852d98bad75..59d04d73c672 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
@@ -597,22 +597,22 @@ static void fw_caps_to_lmm(enum fw_port_type port_type,
 
 	case FW_PORT_TYPE_KR:
 		SET_LMM(Backplane);
-		SET_LMM(10000baseKR_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
 		break;
 
 	case FW_PORT_TYPE_BP_AP:
 		SET_LMM(Backplane);
-		SET_LMM(10000baseR_FEC);
-		SET_LMM(10000baseKR_Full);
-		SET_LMM(1000baseKX_Full);
+		FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseR_FEC);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
 		break;
 
 	case FW_PORT_TYPE_BP4_AP:
 		SET_LMM(Backplane);
-		SET_LMM(10000baseR_FEC);
-		SET_LMM(10000baseKR_Full);
-		SET_LMM(1000baseKX_Full);
-		SET_LMM(10000baseKX4_Full);
+		FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseR_FEC);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKX4_Full);
 		break;
 
 	case FW_PORT_TYPE_FIBER_XFI:
@@ -628,7 +628,9 @@ static void fw_caps_to_lmm(enum fw_port_type port_type,
 	case FW_PORT_TYPE_BP40_BA:
 	case FW_PORT_TYPE_QSFP:
 		SET_LMM(FIBRE);
-		SET_LMM(40000baseSR4_Full);
+		FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
+		FW_CAPS_TO_LMM(SPEED_40G, 40000baseSR4_Full);
 		break;
 
 	case FW_PORT_TYPE_CR_QSFP:
@@ -655,12 +657,14 @@ static void fw_caps_to_lmm(enum fw_port_type port_type,
 
 	case FW_PORT_TYPE_CR2_QSFP:
 		SET_LMM(FIBRE);
-		SET_LMM(50000baseSR2_Full);
+		FW_CAPS_TO_LMM(SPEED_50G, 50000baseSR2_Full);
 		break;
 
 	case FW_PORT_TYPE_KR4_100G:
 	case FW_PORT_TYPE_CR4_QSFP:
 		SET_LMM(FIBRE);
+		FW_CAPS_TO_LMM(SPEED_1G,  1000baseT_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseSR_Full);
 		FW_CAPS_TO_LMM(SPEED_40G, 40000baseSR4_Full);
 		FW_CAPS_TO_LMM(SPEED_25G, 25000baseCR_Full);
 		FW_CAPS_TO_LMM(SPEED_50G, 50000baseCR2_Full);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
index 3177b0c9bd2d..db92f1858060 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
@@ -1335,12 +1335,6 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id,
 		return ret;
 	}
 
-	/* Clear out any old resources being used by the filter before
-	 * we start constructing the new filter.
-	 */
-	if (f->valid)
-		clear_filter(adapter, f);
-
 	if (is_t6(adapter->params.chip) && fs->type &&
 	    ipv6_addr_type((const struct in6_addr *)fs->val.lip) !=
 	    IPV6_ADDR_ANY) {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 61022b5f6743..e880be8e3c45 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -75,6 +75,7 @@
 #include "t4fw_api.h"
 #include "t4fw_version.h"
 #include "cxgb4_dcb.h"
+#include "srq.h"
 #include "cxgb4_debugfs.h"
 #include "clip_tbl.h"
 #include "l2t.h"
@@ -210,6 +211,9 @@ static void link_report(struct net_device *dev)
 		case 40000:
 			s = "40Gbps";
 			break;
+		case 50000:
+			s = "50Gbps";
+			break;
 		case 100000:
 			s = "100Gbps";
 			break;
@@ -583,6 +587,10 @@ static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp,
 		const struct cpl_abort_rpl_rss *p = (void *)rsp;
 
 		hash_del_filter_rpl(q->adap, p);
+	} else if (opcode == CPL_SRQ_TABLE_RPL) {
+		const struct cpl_srq_table_rpl *p = (void *)rsp;
+
+		do_srq_table_rpl(q->adap, p);
 	} else
 		dev_err(q->adap->pdev_dev,
 			"unexpected CPL %#x on FW event queue\n", opcode);
@@ -1733,10 +1741,11 @@ EXPORT_SYMBOL(cxgb4_sync_txq_pidx);
 
 int cxgb4_read_tpte(struct net_device *dev, u32 stag, __be32 *tpte)
 {
-	struct adapter *adap;
-	u32 offset, memtype, memaddr;
 	u32 edc0_size, edc1_size, mc0_size, mc1_size, size;
 	u32 edc0_end, edc1_end, mc0_end, mc1_end;
+	u32 offset, memtype, memaddr;
+	struct adapter *adap;
+	u32 hma_size = 0;
 	int ret;
 
 	adap = netdev2adap(dev);
@@ -1756,6 +1765,10 @@ int cxgb4_read_tpte(struct net_device *dev, u32 stag, __be32 *tpte)
 	size = t4_read_reg(adap, MA_EXT_MEMORY0_BAR_A);
 	mc0_size = EXT_MEM0_SIZE_G(size) << 20;
 
+	if (t4_read_reg(adap, MA_TARGET_MEM_ENABLE_A) & HMA_MUX_F) {
+		size = t4_read_reg(adap, MA_EXT_MEMORY1_BAR_A);
+		hma_size = EXT_MEM1_SIZE_G(size) << 20;
+	}
 	edc0_end = edc0_size;
 	edc1_end = edc0_end + edc1_size;
 	mc0_end = edc1_end + mc0_size;
@@ -1767,7 +1780,10 @@ int cxgb4_read_tpte(struct net_device *dev, u32 stag, __be32 *tpte)
 		memtype = MEM_EDC1;
 		memaddr = offset - edc0_end;
 	} else {
-		if (offset < mc0_end) {
+		if (hma_size && (offset < (edc1_end + hma_size))) {
+			memtype = MEM_HMA;
+			memaddr = offset - edc1_end;
+		} else if (offset < mc0_end) {
 			memtype = MEM_MC0;
 			memaddr = offset - edc1_end;
 		} else if (is_t5(adap->params.chip)) {
@@ -2870,11 +2886,11 @@ static int cxgb_set_tx_maxrate(struct net_device *dev, int index, u32 rate)
 	/* Convert from Mbps to Kbps */
 	req_rate = rate << 10;
 
-	/* Max rate is 10 Gbps */
+	/* Max rate is 100 Gbps */
 	if (req_rate >= SCHED_MAX_RATE_KBPS) {
 		dev_err(adap->pdev_dev,
-			"Invalid rate %u Mbps, Max rate is %u Gbps\n",
-			rate, SCHED_MAX_RATE_KBPS);
+			"Invalid rate %u Mbps, Max rate is %u Mbps\n",
+			rate, SCHED_MAX_RATE_KBPS >> 10);
 		return -ERANGE;
 	}
 
@@ -3244,6 +3260,14 @@ static const struct ethtool_ops cxgb4_mgmt_ethtool_ops = {
 	.get_drvinfo       = cxgb4_mgmt_get_drvinfo,
 };
 
+static void notify_fatal_err(struct work_struct *work)
+{
+	struct adapter *adap;
+
+	adap = container_of(work, struct adapter, fatal_err_notify_task);
+	notify_ulds(adap, CXGB4_STATE_FATAL_ERROR);
+}
+
 void t4_fatal_err(struct adapter *adap)
 {
 	int port;
@@ -3268,6 +3292,7 @@ void t4_fatal_err(struct adapter *adap)
 		netif_carrier_off(dev);
 	}
 	dev_alert(adap->pdev_dev, "encountered fatal error, adapter stopped\n");
+	queue_work(adap->workq, &adap->fatal_err_notify_task);
 }
 
 static void setup_memwin(struct adapter *adap)
@@ -3298,6 +3323,206 @@ static void setup_memwin_rdma(struct adapter *adap)
 	}
 }
 
+/* HMA Definitions */
+
+/* The maximum number of address that can be send in a single FW cmd */
+#define HMA_MAX_ADDR_IN_CMD	5
+
+#define HMA_PAGE_SIZE		PAGE_SIZE
+
+#define HMA_MAX_NO_FW_ADDRESS	(16 << 10)  /* FW supports 16K addresses */
+
+#define HMA_PAGE_ORDER					\
+	((HMA_PAGE_SIZE < HMA_MAX_NO_FW_ADDRESS) ?	\
+	ilog2(HMA_MAX_NO_FW_ADDRESS / HMA_PAGE_SIZE) : 0)
+
+/* The minimum and maximum possible HMA sizes that can be specified in the FW
+ * configuration(in units of MB).
+ */
+#define HMA_MIN_TOTAL_SIZE	1
+#define HMA_MAX_TOTAL_SIZE				\
+	(((HMA_PAGE_SIZE << HMA_PAGE_ORDER) *		\
+	  HMA_MAX_NO_FW_ADDRESS) >> 20)
+
+static void adap_free_hma_mem(struct adapter *adapter)
+{
+	struct scatterlist *iter;
+	struct page *page;
+	int i;
+
+	if (!adapter->hma.sgt)
+		return;
+
+	if (adapter->hma.flags & HMA_DMA_MAPPED_FLAG) {
+		dma_unmap_sg(adapter->pdev_dev, adapter->hma.sgt->sgl,
+			     adapter->hma.sgt->nents, PCI_DMA_BIDIRECTIONAL);
+		adapter->hma.flags &= ~HMA_DMA_MAPPED_FLAG;
+	}
+
+	for_each_sg(adapter->hma.sgt->sgl, iter,
+		    adapter->hma.sgt->orig_nents, i) {
+		page = sg_page(iter);
+		if (page)
+			__free_pages(page, HMA_PAGE_ORDER);
+	}
+
+	kfree(adapter->hma.phy_addr);
+	sg_free_table(adapter->hma.sgt);
+	kfree(adapter->hma.sgt);
+	adapter->hma.sgt = NULL;
+}
+
+static int adap_config_hma(struct adapter *adapter)
+{
+	struct scatterlist *sgl, *iter;
+	struct sg_table *sgt;
+	struct page *newpage;
+	unsigned int i, j, k;
+	u32 param, hma_size;
+	unsigned int ncmds;
+	size_t page_size;
+	u32 page_order;
+	int node, ret;
+
+	/* HMA is supported only for T6+ cards.
+	 * Avoid initializing HMA in kdump kernels.
+	 */
+	if (is_kdump_kernel() ||
+	    CHELSIO_CHIP_VERSION(adapter->params.chip) < CHELSIO_T6)
+		return 0;
+
+	/* Get the HMA region size required by fw */
+	param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
+		 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_HMA_SIZE));
+	ret = t4_query_params(adapter, adapter->mbox, adapter->pf, 0,
+			      1, &param, &hma_size);
+	/* An error means card has its own memory or HMA is not supported by
+	 * the firmware. Return without any errors.
+	 */
+	if (ret || !hma_size)
+		return 0;
+
+	if (hma_size < HMA_MIN_TOTAL_SIZE ||
+	    hma_size > HMA_MAX_TOTAL_SIZE) {
+		dev_err(adapter->pdev_dev,
+			"HMA size %uMB beyond bounds(%u-%lu)MB\n",
+			hma_size, HMA_MIN_TOTAL_SIZE, HMA_MAX_TOTAL_SIZE);
+		return -EINVAL;
+	}
+
+	page_size = HMA_PAGE_SIZE;
+	page_order = HMA_PAGE_ORDER;
+	adapter->hma.sgt = kzalloc(sizeof(*adapter->hma.sgt), GFP_KERNEL);
+	if (unlikely(!adapter->hma.sgt)) {
+		dev_err(adapter->pdev_dev, "HMA SG table allocation failed\n");
+		return -ENOMEM;
+	}
+	sgt = adapter->hma.sgt;
+	/* FW returned value will be in MB's
+	 */
+	sgt->orig_nents = (hma_size << 20) / (page_size << page_order);
+	if (sg_alloc_table(sgt, sgt->orig_nents, GFP_KERNEL)) {
+		dev_err(adapter->pdev_dev, "HMA SGL allocation failed\n");
+		kfree(adapter->hma.sgt);
+		adapter->hma.sgt = NULL;
+		return -ENOMEM;
+	}
+
+	sgl = adapter->hma.sgt->sgl;
+	node = dev_to_node(adapter->pdev_dev);
+	for_each_sg(sgl, iter, sgt->orig_nents, i) {
+		newpage = alloc_pages_node(node, __GFP_NOWARN | GFP_KERNEL,
+					   page_order);
+		if (!newpage) {
+			dev_err(adapter->pdev_dev,
+				"Not enough memory for HMA page allocation\n");
+			ret = -ENOMEM;
+			goto free_hma;
+		}
+		sg_set_page(iter, newpage, page_size << page_order, 0);
+	}
+
+	sgt->nents = dma_map_sg(adapter->pdev_dev, sgl, sgt->orig_nents,
+				DMA_BIDIRECTIONAL);
+	if (!sgt->nents) {
+		dev_err(adapter->pdev_dev,
+			"Not enough memory for HMA DMA mapping");
+		ret = -ENOMEM;
+		goto free_hma;
+	}
+	adapter->hma.flags |= HMA_DMA_MAPPED_FLAG;
+
+	adapter->hma.phy_addr = kcalloc(sgt->nents, sizeof(dma_addr_t),
+					GFP_KERNEL);
+	if (unlikely(!adapter->hma.phy_addr))
+		goto free_hma;
+
+	for_each_sg(sgl, iter, sgt->nents, i) {
+		newpage = sg_page(iter);
+		adapter->hma.phy_addr[i] = sg_dma_address(iter);
+	}
+
+	ncmds = DIV_ROUND_UP(sgt->nents, HMA_MAX_ADDR_IN_CMD);
+	/* Pass on the addresses to firmware */
+	for (i = 0, k = 0; i < ncmds; i++, k += HMA_MAX_ADDR_IN_CMD) {
+		struct fw_hma_cmd hma_cmd;
+		u8 naddr = HMA_MAX_ADDR_IN_CMD;
+		u8 soc = 0, eoc = 0;
+		u8 hma_mode = 1; /* Presently we support only Page table mode */
+
+		soc = (i == 0) ? 1 : 0;
+		eoc = (i == ncmds - 1) ? 1 : 0;
+
+		/* For last cmd, set naddr corresponding to remaining
+		 * addresses
+		 */
+		if (i == ncmds - 1) {
+			naddr = sgt->nents % HMA_MAX_ADDR_IN_CMD;
+			naddr = naddr ? naddr : HMA_MAX_ADDR_IN_CMD;
+		}
+		memset(&hma_cmd, 0, sizeof(hma_cmd));
+		hma_cmd.op_pkd = htonl(FW_CMD_OP_V(FW_HMA_CMD) |
+				       FW_CMD_REQUEST_F | FW_CMD_WRITE_F);
+		hma_cmd.retval_len16 = htonl(FW_LEN16(hma_cmd));
+
+		hma_cmd.mode_to_pcie_params =
+			htonl(FW_HMA_CMD_MODE_V(hma_mode) |
+			      FW_HMA_CMD_SOC_V(soc) | FW_HMA_CMD_EOC_V(eoc));
+
+		/* HMA cmd size specified in MB's */
+		hma_cmd.naddr_size =
+			htonl(FW_HMA_CMD_SIZE_V(hma_size) |
+			      FW_HMA_CMD_NADDR_V(naddr));
+
+		/* Total Page size specified in units of 4K */
+		hma_cmd.addr_size_pkd =
+			htonl(FW_HMA_CMD_ADDR_SIZE_V
+				((page_size << page_order) >> 12));
+
+		/* Fill the 5 addresses */
+		for (j = 0; j < naddr; j++) {
+			hma_cmd.phy_address[j] =
+				cpu_to_be64(adapter->hma.phy_addr[j + k]);
+		}
+		ret = t4_wr_mbox(adapter, adapter->mbox, &hma_cmd,
+				 sizeof(hma_cmd), &hma_cmd);
+		if (ret) {
+			dev_err(adapter->pdev_dev,
+				"HMA FW command failed with err %d\n", ret);
+			goto free_hma;
+		}
+	}
+
+	if (!ret)
+		dev_info(adapter->pdev_dev,
+			 "Reserved %uMB host memory for HMA\n", hma_size);
+	return ret;
+
+free_hma:
+	adap_free_hma_mem(adapter);
+	return ret;
+}
+
 static int adap_init1(struct adapter *adap, struct fw_caps_config_cmd *c)
 {
 	u32 v;
@@ -3751,6 +3976,12 @@ static int adap_init0_config(struct adapter *adapter, int reset)
 	if (ret < 0)
 		goto bye;
 
+	/* We will proceed even if HMA init fails. */
+	ret = adap_config_hma(adapter);
+	if (ret)
+		dev_err(adapter->pdev_dev,
+			"HMA configuration failed with error %d\n", ret);
+
 	/*
 	 * And finally tell the firmware to initialize itself using the
 	 * parameters from the Configuration File.
@@ -3957,6 +4188,11 @@ static int adap_init0(struct adapter *adap)
 	 * effect. Otherwise, it's time to try initializing the adapter.
 	 */
 	if (state == DEV_STATE_INIT) {
+		ret = adap_config_hma(adap);
+		if (ret)
+			dev_err(adap->pdev_dev,
+				"HMA configuration failed with error %d\n",
+				ret);
 		dev_info(adap->pdev_dev, "Coming up as %s: "\
 			 "Adapter already initialized\n",
 			 adap->flags & MASTER_PF ? "MASTER" : "SLAVE");
@@ -4236,6 +4472,20 @@ static int adap_init0(struct adapter *adap)
 		adap->vres.pbl.start = val[4];
 		adap->vres.pbl.size = val[5] - val[4] + 1;
 
+		params[0] = FW_PARAM_PFVF(SRQ_START);
+		params[1] = FW_PARAM_PFVF(SRQ_END);
+		ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 2,
+				      params, val);
+		if (!ret) {
+			adap->vres.srq.start = val[0];
+			adap->vres.srq.size = val[1] - val[0] + 1;
+		}
+		if (adap->vres.srq.size) {
+			adap->srq = t4_init_srq(adap->vres.srq.size);
+			if (!adap->srq)
+				dev_warn(&adap->pdev->dev, "could not allocate SRQ, continuing\n");
+		}
+
 		params[0] = FW_PARAM_PFVF(SQRQ_START);
 		params[1] = FW_PARAM_PFVF(SQRQ_END);
 		params[2] = FW_PARAM_PFVF(CQ_START);
@@ -4269,6 +4519,18 @@ static int adap_init0(struct adapter *adap)
 			 "max_ordird_qp %d max_ird_adapter %d\n",
 			 adap->params.max_ordird_qp,
 			 adap->params.max_ird_adapter);
+
+		/* Enable write_with_immediate if FW supports it */
+		params[0] = FW_PARAM_DEV(RDMA_WRITE_WITH_IMM);
+		ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 1, params,
+				      val);
+		adap->params.write_w_imm_support = (ret == 0 && val[0] != 0);
+
+		/* Enable write_cmpl if FW supports it */
+		params[0] = FW_PARAM_DEV(RI_WRITE_CMPL_WR);
+		ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 1, params,
+				      val);
+		adap->params.write_cmpl_support = (ret == 0 && val[0] != 0);
 		adap->num_ofld_uld += 2;
 	}
 	if (caps_cmd.iscsicaps) {
@@ -4346,6 +4608,7 @@ static int adap_init0(struct adapter *adap)
 	 * happened to HW/FW, stop issuing commands.
 	 */
 bye:
+	adap_free_hma_mem(adap);
 	kfree(adap->sge.egr_map);
 	kfree(adap->sge.ingr_map);
 	kfree(adap->sge.starving_fl);
@@ -4903,6 +5166,7 @@ static void free_some_resources(struct adapter *adapter)
 
 	kvfree(adapter->smt);
 	kvfree(adapter->l2t);
+	kvfree(adapter->srq);
 	t4_cleanup_sched(adapter);
 	kvfree(adapter->tids.tid_tab);
 	cxgb4_cleanup_tc_flower(adapter);
@@ -5257,6 +5521,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	INIT_WORK(&adapter->tid_release_task, process_tid_release_list);
 	INIT_WORK(&adapter->db_full_task, process_db_full);
 	INIT_WORK(&adapter->db_drop_task, process_db_drop);
+	INIT_WORK(&adapter->fatal_err_notify_task, notify_fatal_err);
 
 	err = t4_prep_adapter(adapter);
 	if (err)
@@ -5574,6 +5839,8 @@ static void remove_one(struct pci_dev *pdev)
 			t4_uld_clean_up(adapter);
 		}
 
+		adap_free_hma_mem(adapter);
+
 		disable_interrupts(adapter);
 
 		for_each_port(adapter, i)
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
index 6b5fea4532f3..a95cde0fadf7 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
@@ -342,6 +342,7 @@ static void free_queues_uld(struct adapter *adap, unsigned int uld_type)
 {
 	struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
 
+	adap->sge.uld_rxq_info[uld_type] = NULL;
 	kfree(rxq_info->rspq_id);
 	kfree(rxq_info->uldrxq);
 	kfree(rxq_info);
@@ -665,6 +666,8 @@ static void uld_init(struct adapter *adap, struct cxgb4_lld_info *lld)
 	lld->ulptx_memwrite_dsgl = adap->params.ulptx_memwrite_dsgl;
 	lld->nodeid = dev_to_node(adap->pdev_dev);
 	lld->fr_nsmr_tpte_wr_support = adap->params.fr_nsmr_tpte_wr_support;
+	lld->write_w_imm_support = adap->params.write_w_imm_support;
+	lld->write_cmpl_support = adap->params.write_cmpl_support;
 }
 
 static void uld_attach(struct adapter *adap, unsigned int uld)
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index a14e8db51cdc..b0ca06edaa7c 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -257,7 +257,8 @@ enum cxgb4_state {
 	CXGB4_STATE_UP,
 	CXGB4_STATE_START_RECOVERY,
 	CXGB4_STATE_DOWN,
-	CXGB4_STATE_DETACH
+	CXGB4_STATE_DETACH,
+	CXGB4_STATE_FATAL_ERROR
 };
 
 enum cxgb4_control {
@@ -283,6 +284,7 @@ struct cxgb4_virt_res {                      /* virtualized HW resources */
 	struct cxgb4_range iscsi;
 	struct cxgb4_range stag;
 	struct cxgb4_range rq;
+	struct cxgb4_range srq;
 	struct cxgb4_range pbl;
 	struct cxgb4_range qp;
 	struct cxgb4_range cq;
@@ -352,6 +354,8 @@ struct cxgb4_lld_info {
 	void **iscsi_ppm;		     /* iscsi page pod manager */
 	int nodeid;			     /* device numa node id */
 	bool fr_nsmr_tpte_wr_support;	     /* FW supports FR_NSMR_TPTE_WR */
+	bool write_w_imm_support;         /* FW supports WRITE_WITH_IMMEDIATE */
+	bool write_cmpl_support;             /* FW supports WRITE_CMPL WR */
 };
 
 struct cxgb4_uld_info {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sched.h b/drivers/net/ethernet/chelsio/cxgb4/sched.h
index 77b2b3fd9021..3a49e00a38a1 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sched.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/sched.h
@@ -42,8 +42,8 @@
 
 #define FW_SCHED_CLS_NONE 0xffffffff
 
-/* Max rate that can be set to a scheduling class is 10 Gbps */
-#define SCHED_MAX_RATE_KBPS 10000000U
+/* Max rate that can be set to a scheduling class is 100 Gbps */
+#define SCHED_MAX_RATE_KBPS 100000000U
 
 enum {
 	SCHED_STATE_ACTIVE,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/srq.c b/drivers/net/ethernet/chelsio/cxgb4/srq.c
new file mode 100644
index 000000000000..6228a5708307
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/srq.c
@@ -0,0 +1,138 @@
+/*
+ * This file is part of the Chelsio T6 Ethernet driver for Linux.
+ *
+ * Copyright (c) 2017-2018 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "cxgb4.h"
+#include "t4_msg.h"
+#include "srq.h"
+
+struct srq_data *t4_init_srq(int srq_size)
+{
+	struct srq_data *s;
+
+	s = kvzalloc(sizeof(*s), GFP_KERNEL);
+	if (!s)
+		return NULL;
+
+	s->srq_size = srq_size;
+	init_completion(&s->comp);
+	mutex_init(&s->lock);
+
+	return s;
+}
+
+/* cxgb4_get_srq_entry: read the SRQ table entry
+ * @dev: Pointer to the net_device
+ * @idx: Index to the srq
+ * @entryp: pointer to the srq entry
+ *
+ * Sends CPL_SRQ_TABLE_REQ message for the given index.
+ * Contents will be returned in CPL_SRQ_TABLE_RPL message.
+ *
+ * Returns zero if the read is successful, else a error
+ * number will be returned. Caller should not use the srq
+ * entry if the return value is non-zero.
+ *
+ *
+ */
+int cxgb4_get_srq_entry(struct net_device *dev,
+			int srq_idx, struct srq_entry *entryp)
+{
+	struct cpl_srq_table_req *req;
+	struct adapter *adap;
+	struct sk_buff *skb;
+	struct srq_data *s;
+	int rc = -ENODEV;
+
+	adap = netdev2adap(dev);
+	s = adap->srq;
+
+	if (!(adap->flags & FULL_INIT_DONE) || !s)
+		goto out;
+
+	skb = alloc_skb(sizeof(*req), GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+	req = (struct cpl_srq_table_req *)
+		__skb_put(skb, sizeof(*req));
+	memset(req, 0, sizeof(*req));
+	INIT_TP_WR(req, 0);
+	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SRQ_TABLE_REQ,
+					      TID_TID_V(srq_idx) |
+				TID_QID_V(adap->sge.fw_evtq.abs_id)));
+	req->idx = srq_idx;
+
+	mutex_lock(&s->lock);
+
+	s->entryp = entryp;
+	t4_mgmt_tx(adap, skb);
+
+	rc = wait_for_completion_timeout(&s->comp, SRQ_WAIT_TO);
+	if (rc)
+		rc = 0;
+	else /* !rc means we timed out */
+		rc = -ETIMEDOUT;
+
+	WARN_ON_ONCE(entryp->idx != srq_idx);
+	mutex_unlock(&s->lock);
+out:
+	return rc;
+}
+EXPORT_SYMBOL(cxgb4_get_srq_entry);
+
+void do_srq_table_rpl(struct adapter *adap,
+		      const struct cpl_srq_table_rpl *rpl)
+{
+	unsigned int idx = TID_TID_G(GET_TID(rpl));
+	struct srq_data *s = adap->srq;
+	struct srq_entry *e;
+
+	if (unlikely(rpl->status != CPL_CONTAINS_READ_RPL)) {
+		dev_err(adap->pdev_dev,
+			"Unexpected SRQ_TABLE_RPL status %u for entry %u\n",
+				rpl->status, idx);
+		goto out;
+	}
+
+	/* Store the read entry */
+	e = s->entryp;
+	e->valid = 1;
+	e->idx = idx;
+	e->pdid = SRQT_PDID_G(be64_to_cpu(rpl->rsvd_pdid));
+	e->qlen = SRQT_QLEN_G(be32_to_cpu(rpl->qlen_qbase));
+	e->qbase = SRQT_QBASE_G(be32_to_cpu(rpl->qlen_qbase));
+	e->cur_msn = be16_to_cpu(rpl->cur_msn);
+	e->max_msn = be16_to_cpu(rpl->max_msn);
+out:
+	complete(&s->comp);
+}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/srq.h b/drivers/net/ethernet/chelsio/cxgb4/srq.h
new file mode 100644
index 000000000000..ec85cf93865a
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/srq.h
@@ -0,0 +1,65 @@
+/*
+ * This file is part of the Chelsio T6 Ethernet driver for Linux.
+ *
+ * Copyright (c) 2017-2018 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __CXGB4_SRQ_H
+#define __CXGB4_SRQ_H
+
+struct adapter;
+struct cpl_srq_table_rpl;
+
+#define SRQ_WAIT_TO	(HZ * 5)
+
+struct srq_entry {
+	u8 valid;
+	u8 idx;
+	u8 qlen;
+	u16 pdid;
+	u16 cur_msn;
+	u16 max_msn;
+	u32 qbase;
+};
+
+struct srq_data {
+	unsigned int srq_size;
+	struct srq_entry *entryp;
+	struct completion comp;
+	struct mutex lock; /* generic mutex for srq data */
+};
+
+struct srq_data *t4_init_srq(int srq_size);
+int cxgb4_get_srq_entry(struct net_device *dev,
+			int srq_idx, struct srq_entry *entryp);
+void do_srq_table_rpl(struct adapter *adap,
+		      const struct cpl_srq_table_rpl *rpl);
+#endif  /* __CXGB4_SRQ_H */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index 920bccd6bc40..38e38dcfff91 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -484,6 +484,117 @@ static int t4_edc_err_read(struct adapter *adap, int idx)
 }
 
 /**
+ * t4_memory_rw_init - Get memory window relative offset, base, and size.
+ * @adap: the adapter
+ * @win: PCI-E Memory Window to use
+ * @mtype: memory type: MEM_EDC0, MEM_EDC1, MEM_HMA or MEM_MC
+ * @mem_off: memory relative offset with respect to @mtype.
+ * @mem_base: configured memory base address.
+ * @mem_aperture: configured memory window aperture.
+ *
+ * Get the configured memory window's relative offset, base, and size.
+ */
+int t4_memory_rw_init(struct adapter *adap, int win, int mtype, u32 *mem_off,
+		      u32 *mem_base, u32 *mem_aperture)
+{
+	u32 edc_size, mc_size, mem_reg;
+
+	/* Offset into the region of memory which is being accessed
+	 * MEM_EDC0 = 0
+	 * MEM_EDC1 = 1
+	 * MEM_MC   = 2 -- MEM_MC for chips with only 1 memory controller
+	 * MEM_MC1  = 3 -- for chips with 2 memory controllers (e.g. T5)
+	 * MEM_HMA  = 4
+	 */
+	edc_size  = EDRAM0_SIZE_G(t4_read_reg(adap, MA_EDRAM0_BAR_A));
+	if (mtype == MEM_HMA) {
+		*mem_off = 2 * (edc_size * 1024 * 1024);
+	} else if (mtype != MEM_MC1) {
+		*mem_off = (mtype * (edc_size * 1024 * 1024));
+	} else {
+		mc_size = EXT_MEM0_SIZE_G(t4_read_reg(adap,
+						      MA_EXT_MEMORY0_BAR_A));
+		*mem_off = (MEM_MC0 * edc_size + mc_size) * 1024 * 1024;
+	}
+
+	/* Each PCI-E Memory Window is programmed with a window size -- or
+	 * "aperture" -- which controls the granularity of its mapping onto
+	 * adapter memory.  We need to grab that aperture in order to know
+	 * how to use the specified window.  The window is also programmed
+	 * with the base address of the Memory Window in BAR0's address
+	 * space.  For T4 this is an absolute PCI-E Bus Address.  For T5
+	 * the address is relative to BAR0.
+	 */
+	mem_reg = t4_read_reg(adap,
+			      PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN_A,
+						  win));
+	/* a dead adapter will return 0xffffffff for PIO reads */
+	if (mem_reg == 0xffffffff)
+		return -ENXIO;
+
+	*mem_aperture = 1 << (WINDOW_G(mem_reg) + WINDOW_SHIFT_X);
+	*mem_base = PCIEOFST_G(mem_reg) << PCIEOFST_SHIFT_X;
+	if (is_t4(adap->params.chip))
+		*mem_base -= adap->t4_bar0;
+
+	return 0;
+}
+
+/**
+ * t4_memory_update_win - Move memory window to specified address.
+ * @adap: the adapter
+ * @win: PCI-E Memory Window to use
+ * @addr: location to move.
+ *
+ * Move memory window to specified address.
+ */
+void t4_memory_update_win(struct adapter *adap, int win, u32 addr)
+{
+	t4_write_reg(adap,
+		     PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win),
+		     addr);
+	/* Read it back to ensure that changes propagate before we
+	 * attempt to use the new value.
+	 */
+	t4_read_reg(adap,
+		    PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win));
+}
+
+/**
+ * t4_memory_rw_residual - Read/Write residual data.
+ * @adap: the adapter
+ * @off: relative offset within residual to start read/write.
+ * @addr: address within indicated memory type.
+ * @buf: host memory buffer
+ * @dir: direction of transfer T4_MEMORY_READ (1) or T4_MEMORY_WRITE (0)
+ *
+ * Read/Write residual data less than 32-bits.
+ */
+void t4_memory_rw_residual(struct adapter *adap, u32 off, u32 addr, u8 *buf,
+			   int dir)
+{
+	union {
+		u32 word;
+		char byte[4];
+	} last;
+	unsigned char *bp;
+	int i;
+
+	if (dir == T4_MEMORY_READ) {
+		last.word = le32_to_cpu((__force __le32)
+					t4_read_reg(adap, addr));
+		for (bp = (unsigned char *)buf, i = off; i < 4; i++)
+			bp[i] = last.byte[i];
+	} else {
+		last.word = *buf;
+		for (i = off; i < 4; i++)
+			last.byte[i] = 0;
+		t4_write_reg(adap, addr,
+			     (__force u32)cpu_to_le32(last.word));
+	}
+}
+
+/**
  *	t4_memory_rw - read/write EDC 0, EDC 1 or MC via PCIE memory window
  *	@adap: the adapter
  *	@win: PCI-E Memory Window to use
@@ -504,8 +615,9 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr,
 		 u32 len, void *hbuf, int dir)
 {
 	u32 pos, offset, resid, memoffset;
-	u32 edc_size, mc_size, win_pf, mem_reg, mem_aperture, mem_base;
+	u32 win_pf, mem_aperture, mem_base;
 	u32 *buf;
+	int ret;
 
 	/* Argument sanity checks ...
 	 */
@@ -521,59 +633,26 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr,
 	resid = len & 0x3;
 	len -= resid;
 
-	/* Offset into the region of memory which is being accessed
-	 * MEM_EDC0 = 0
-	 * MEM_EDC1 = 1
-	 * MEM_MC   = 2 -- MEM_MC for chips with only 1 memory controller
-	 * MEM_MC1  = 3 -- for chips with 2 memory controllers (e.g. T5)
-	 * MEM_HMA  = 4
-	 */
-	edc_size  = EDRAM0_SIZE_G(t4_read_reg(adap, MA_EDRAM0_BAR_A));
-	if (mtype == MEM_HMA) {
-		memoffset = 2 * (edc_size * 1024 * 1024);
-	} else if (mtype != MEM_MC1) {
-		memoffset = (mtype * (edc_size * 1024 * 1024));
-	} else {
-		mc_size = EXT_MEM0_SIZE_G(t4_read_reg(adap,
-						      MA_EXT_MEMORY0_BAR_A));
-		memoffset = (MEM_MC0 * edc_size + mc_size) * 1024 * 1024;
-	}
+	ret = t4_memory_rw_init(adap, win, mtype, &memoffset, &mem_base,
+				&mem_aperture);
+	if (ret)
+		return ret;
 
 	/* Determine the PCIE_MEM_ACCESS_OFFSET */
 	addr = addr + memoffset;
 
-	/* Each PCI-E Memory Window is programmed with a window size -- or
-	 * "aperture" -- which controls the granularity of its mapping onto
-	 * adapter memory.  We need to grab that aperture in order to know
-	 * how to use the specified window.  The window is also programmed
-	 * with the base address of the Memory Window in BAR0's address
-	 * space.  For T4 this is an absolute PCI-E Bus Address.  For T5
-	 * the address is relative to BAR0.
-	 */
-	mem_reg = t4_read_reg(adap,
-			      PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN_A,
-						  win));
-	mem_aperture = 1 << (WINDOW_G(mem_reg) + WINDOW_SHIFT_X);
-	mem_base = PCIEOFST_G(mem_reg) << PCIEOFST_SHIFT_X;
-	if (is_t4(adap->params.chip))
-		mem_base -= adap->t4_bar0;
 	win_pf = is_t4(adap->params.chip) ? 0 : PFNUM_V(adap->pf);
 
 	/* Calculate our initial PCI-E Memory Window Position and Offset into
 	 * that Window.
 	 */
-	pos = addr & ~(mem_aperture-1);
+	pos = addr & ~(mem_aperture - 1);
 	offset = addr - pos;
 
 	/* Set up initial PCI-E Memory Window to cover the start of our
-	 * transfer.  (Read it back to ensure that changes propagate before we
-	 * attempt to use the new value.)
+	 * transfer.
 	 */
-	t4_write_reg(adap,
-		     PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win),
-		     pos | win_pf);
-	t4_read_reg(adap,
-		    PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win));
+	t4_memory_update_win(adap, win, pos | win_pf);
 
 	/* Transfer data to/from the adapter as long as there's an integral
 	 * number of 32-bit transfers to complete.
@@ -628,12 +707,7 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr,
 		if (offset == mem_aperture) {
 			pos += mem_aperture;
 			offset = 0;
-			t4_write_reg(adap,
-				PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A,
-						    win), pos | win_pf);
-			t4_read_reg(adap,
-				PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A,
-						    win));
+			t4_memory_update_win(adap, win, pos | win_pf);
 		}
 	}
 
@@ -642,28 +716,9 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr,
 	 * residual amount.  The PCI-E Memory Window has already been moved
 	 * above (if necessary) to cover this final transfer.
 	 */
-	if (resid) {
-		union {
-			u32 word;
-			char byte[4];
-		} last;
-		unsigned char *bp;
-		int i;
-
-		if (dir == T4_MEMORY_READ) {
-			last.word = le32_to_cpu(
-					(__force __le32)t4_read_reg(adap,
-						mem_base + offset));
-			for (bp = (unsigned char *)buf, i = resid; i < 4; i++)
-				bp[i] = last.byte[i];
-		} else {
-			last.word = *buf;
-			for (i = resid; i < 4; i++)
-				last.byte[i] = 0;
-			t4_write_reg(adap, mem_base + offset,
-				     (__force u32)cpu_to_le32(last.word));
-		}
-	}
+	if (resid)
+		t4_memory_rw_residual(adap, resid, mem_base + offset,
+				      (u8 *)buf, dir);
 
 	return 0;
 }
@@ -6036,6 +6091,7 @@ unsigned int t4_get_tp_ch_map(struct adapter *adap, int pidx)
 
 	case CHELSIO_T6:
 		switch (nports) {
+		case 1:
 		case 2: return 1 << pidx;
 		}
 		break;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
index d0db4427b77e..5e8f5ca8e3ee 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
@@ -52,6 +52,7 @@ enum {
 	CPL_L2T_WRITE_REQ     = 0x12,
 	CPL_SMT_WRITE_REQ     = 0x14,
 	CPL_TID_RELEASE       = 0x1A,
+	CPL_SRQ_TABLE_REQ     = 0x1C,
 	CPL_TX_DATA_ISO	      = 0x1F,
 
 	CPL_CLOSE_LISTSRV_RPL = 0x20,
@@ -102,6 +103,7 @@ enum {
 	CPL_FW4_MSG           = 0xC0,
 	CPL_FW4_PLD           = 0xC1,
 	CPL_FW4_ACK           = 0xC3,
+	CPL_SRQ_TABLE_RPL     = 0xCC,
 
 	CPL_RX_PHYS_DSGL      = 0xD0,
 
@@ -136,6 +138,8 @@ enum CPL_error {
 	CPL_ERR_KEEPALV_NEG_ADVICE = 37,
 	CPL_ERR_ABORT_FAILED       = 42,
 	CPL_ERR_IWARP_FLM          = 50,
+	CPL_CONTAINS_READ_RPL      = 60,
+	CPL_CONTAINS_WRITE_RPL     = 61,
 };
 
 enum {
@@ -198,6 +202,7 @@ union opcode_tid {
 /* partitioning of TID fields that also carry a queue id */
 #define TID_TID_S    0
 #define TID_TID_M    0x3fff
+#define TID_TID_V(x) ((x) << TID_TID_S)
 #define TID_TID_G(x) (((x) >> TID_TID_S) & TID_TID_M)
 
 #define TID_QID_S    14
@@ -743,6 +748,22 @@ struct cpl_abort_req_rss {
 	u8 status;
 };
 
+struct cpl_abort_req_rss6 {
+	WR_HDR;
+	union opcode_tid ot;
+	__u32 srqidx_status;
+};
+
+#define ABORT_RSS_STATUS_S    0
+#define ABORT_RSS_STATUS_M    0xff
+#define ABORT_RSS_STATUS_V(x) ((x) << ABORT_RSS_STATUS_S)
+#define ABORT_RSS_STATUS_G(x) (((x) >> ABORT_RSS_STATUS_S) & ABORT_RSS_STATUS_M)
+
+#define ABORT_RSS_SRQIDX_S    8
+#define ABORT_RSS_SRQIDX_M    0xffffff
+#define ABORT_RSS_SRQIDX_V(x) ((x) << ABORT_RSS_SRQIDX_S)
+#define ABORT_RSS_SRQIDX_G(x) (((x) >> ABORT_RSS_SRQIDX_S) & ABORT_RSS_SRQIDX_M)
+
 struct cpl_abort_req {
 	WR_HDR;
 	union opcode_tid ot;
@@ -758,6 +779,11 @@ struct cpl_abort_rpl_rss {
 	u8 status;
 };
 
+struct cpl_abort_rpl_rss6 {
+	union opcode_tid ot;
+	__u32 srqidx_status;
+};
+
 struct cpl_abort_rpl {
 	WR_HDR;
 	union opcode_tid ot;
@@ -2112,4 +2138,49 @@ enum {
 	X_CPL_RX_MPS_PKT_TYPE_QFC   = 1 << 2,
 	X_CPL_RX_MPS_PKT_TYPE_PTP   = 1 << 3
 };
+
+struct cpl_srq_table_req {
+	WR_HDR;
+	union opcode_tid ot;
+	__u8 status;
+	__u8 rsvd[2];
+	__u8 idx;
+	__be64 rsvd_pdid;
+	__be32 qlen_qbase;
+	__be16 cur_msn;
+	__be16 max_msn;
+};
+
+struct cpl_srq_table_rpl {
+	union opcode_tid ot;
+	__u8 status;
+	__u8 rsvd[2];
+	__u8 idx;
+	__be64 rsvd_pdid;
+	__be32 qlen_qbase;
+	__be16 cur_msn;
+	__be16 max_msn;
+};
+
+/* cpl_srq_table_{req,rpl}.params fields */
+#define SRQT_QLEN_S   28
+#define SRQT_QLEN_M   0xF
+#define SRQT_QLEN_V(x) ((x) << SRQT_QLEN_S)
+#define SRQT_QLEN_G(x) (((x) >> SRQT_QLEN_S) & SRQT_QLEN_M)
+
+#define SRQT_QBASE_S    0
+#define SRQT_QBASE_M   0x3FFFFFF
+#define SRQT_QBASE_V(x) ((x) << SRQT_QBASE_S)
+#define SRQT_QBASE_G(x) (((x) >> SRQT_QBASE_S) & SRQT_QBASE_M)
+
+#define SRQT_PDID_S    0
+#define SRQT_PDID_M   0xFF
+#define SRQT_PDID_V(x) ((x) << SRQT_PDID_S)
+#define SRQT_PDID_G(x) (((x) >> SRQT_PDID_S) & SRQT_PDID_M)
+
+#define SRQT_IDX_S    0
+#define SRQT_IDX_M    0xF
+#define SRQT_IDX_V(x) ((x) << SRQT_IDX_S)
+#define SRQT_IDX_G(x) (((x) >> SRQT_IDX_S) & SRQT_IDX_M)
+
 #endif  /* __T4_MSG_H */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index 0d83b4064a78..544757f6ab3a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -101,6 +101,7 @@ enum fw_wr_opcodes {
 	FW_RI_BIND_MW_WR               = 0x18,
 	FW_RI_FR_NSMR_WR               = 0x19,
 	FW_RI_FR_NSMR_TPTE_WR	       = 0x20,
+	FW_RI_RDMA_WRITE_CMPL_WR       = 0x21,
 	FW_RI_INV_LSTAG_WR             = 0x1a,
 	FW_ISCSI_TX_DATA_WR	       = 0x45,
 	FW_PTP_TX_PKT_WR               = 0x46,
@@ -766,6 +767,7 @@ enum fw_cmd_opcodes {
 	FW_DEVLOG_CMD                  = 0x25,
 	FW_CLIP_CMD                    = 0x28,
 	FW_PTP_CMD                     = 0x3e,
+	FW_HMA_CMD                     = 0x3f,
 	FW_LASTC2E_CMD                 = 0x40,
 	FW_ERROR_CMD                   = 0x80,
 	FW_DEBUG_CMD                   = 0x81,
@@ -1132,6 +1134,7 @@ enum fw_memtype_cf {
 	FW_MEMTYPE_CF_FLASH		= 0x4,
 	FW_MEMTYPE_CF_INTERNAL		= 0x5,
 	FW_MEMTYPE_CF_EXTMEM1           = 0x6,
+	FW_MEMTYPE_CF_HMA		= 0x7,
 };
 
 struct fw_caps_config_cmd {
@@ -1210,6 +1213,9 @@ enum fw_params_param_dev {
 	FW_PARAMS_PARAM_DEV_RI_FR_NSMR_TPTE_WR	= 0x1C,
 	FW_PARAMS_PARAM_DEV_FILTER2_WR  = 0x1D,
 	FW_PARAMS_PARAM_DEV_MPSBGMAP	= 0x1E,
+	FW_PARAMS_PARAM_DEV_HMA_SIZE	= 0x20,
+	FW_PARAMS_PARAM_DEV_RDMA_WRITE_WITH_IMM = 0x21,
+	FW_PARAMS_PARAM_DEV_RI_WRITE_CMPL_WR    = 0x24,
 };
 
 /*
@@ -1241,6 +1247,8 @@ enum fw_params_param_pfvf {
 	FW_PARAMS_PARAM_PFVF_SQRQ_END	= 0x16,
 	FW_PARAMS_PARAM_PFVF_CQ_START	= 0x17,
 	FW_PARAMS_PARAM_PFVF_CQ_END	= 0x18,
+	FW_PARAMS_PARAM_PFVF_SRQ_START  = 0x19,
+	FW_PARAMS_PARAM_PFVF_SRQ_END    = 0x1A,
 	FW_PARAMS_PARAM_PFVF_SCHEDCLASS_ETH = 0x20,
 	FW_PARAMS_PARAM_PFVF_VIID       = 0x24,
 	FW_PARAMS_PARAM_PFVF_CPMASK     = 0x25,
@@ -3435,6 +3443,59 @@ struct fw_debug_cmd {
 #define FW_DEBUG_CMD_TYPE_G(x)	\
 	(((x) >> FW_DEBUG_CMD_TYPE_S) & FW_DEBUG_CMD_TYPE_M)
 
+struct fw_hma_cmd {
+	__be32 op_pkd;
+	__be32 retval_len16;
+	__be32 mode_to_pcie_params;
+	__be32 naddr_size;
+	__be32 addr_size_pkd;
+	__be32 r6;
+	__be64 phy_address[5];
+};
+
+#define FW_HMA_CMD_MODE_S	31
+#define FW_HMA_CMD_MODE_M	0x1
+#define FW_HMA_CMD_MODE_V(x)	((x) << FW_HMA_CMD_MODE_S)
+#define FW_HMA_CMD_MODE_G(x)	\
+	(((x) >> FW_HMA_CMD_MODE_S) & FW_HMA_CMD_MODE_M)
+#define FW_HMA_CMD_MODE_F	FW_HMA_CMD_MODE_V(1U)
+
+#define FW_HMA_CMD_SOC_S	30
+#define FW_HMA_CMD_SOC_M	0x1
+#define FW_HMA_CMD_SOC_V(x)	((x) << FW_HMA_CMD_SOC_S)
+#define FW_HMA_CMD_SOC_G(x)	(((x) >> FW_HMA_CMD_SOC_S) & FW_HMA_CMD_SOC_M)
+#define FW_HMA_CMD_SOC_F	FW_HMA_CMD_SOC_V(1U)
+
+#define FW_HMA_CMD_EOC_S	29
+#define FW_HMA_CMD_EOC_M	0x1
+#define FW_HMA_CMD_EOC_V(x)	((x) << FW_HMA_CMD_EOC_S)
+#define FW_HMA_CMD_EOC_G(x)	(((x) >> FW_HMA_CMD_EOC_S) & FW_HMA_CMD_EOC_M)
+#define FW_HMA_CMD_EOC_F	FW_HMA_CMD_EOC_V(1U)
+
+#define FW_HMA_CMD_PCIE_PARAMS_S	0
+#define FW_HMA_CMD_PCIE_PARAMS_M	0x7ffffff
+#define FW_HMA_CMD_PCIE_PARAMS_V(x)	((x) << FW_HMA_CMD_PCIE_PARAMS_S)
+#define FW_HMA_CMD_PCIE_PARAMS_G(x)	\
+	(((x) >> FW_HMA_CMD_PCIE_PARAMS_S) & FW_HMA_CMD_PCIE_PARAMS_M)
+
+#define FW_HMA_CMD_NADDR_S	12
+#define FW_HMA_CMD_NADDR_M	0x3f
+#define FW_HMA_CMD_NADDR_V(x)	((x) << FW_HMA_CMD_NADDR_S)
+#define FW_HMA_CMD_NADDR_G(x)	\
+	(((x) >> FW_HMA_CMD_NADDR_S) & FW_HMA_CMD_NADDR_M)
+
+#define FW_HMA_CMD_SIZE_S	0
+#define FW_HMA_CMD_SIZE_M	0xfff
+#define FW_HMA_CMD_SIZE_V(x)	((x) << FW_HMA_CMD_SIZE_S)
+#define FW_HMA_CMD_SIZE_G(x)	\
+	(((x) >> FW_HMA_CMD_SIZE_S) & FW_HMA_CMD_SIZE_M)
+
+#define FW_HMA_CMD_ADDR_SIZE_S		11
+#define FW_HMA_CMD_ADDR_SIZE_M		0x1fffff
+#define FW_HMA_CMD_ADDR_SIZE_V(x)	((x) << FW_HMA_CMD_ADDR_SIZE_S)
+#define FW_HMA_CMD_ADDR_SIZE_G(x)	\
+	(((x) >> FW_HMA_CMD_ADDR_SIZE_S) & FW_HMA_CMD_ADDR_SIZE_M)
+
 enum pcie_fw_eval {
 	PCIE_FW_EVAL_CRASH = 0,
 };
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
index b7e79e64d2ed..7bd8497fd9be 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
@@ -155,8 +155,6 @@ void t4vf_os_link_changed(struct adapter *adapter, int pidx, int link_ok)
 		const char *fc;
 		const struct port_info *pi = netdev_priv(dev);
 
-		netif_carrier_on(dev);
-
 		switch (pi->link_cfg.speed) {
 		case 100:
 			s = "100Mbps";
@@ -202,7 +200,6 @@ void t4vf_os_link_changed(struct adapter *adapter, int pidx, int link_ok)
 
 		netdev_info(dev, "link up, %s, full-duplex, %s PAUSE\n", s, fc);
 	} else {
-		netif_carrier_off(dev);
 		netdev_info(dev, "link down\n");
 	}
 }
@@ -278,6 +275,17 @@ static int link_start(struct net_device *dev)
 	 */
 	if (ret == 0)
 		ret = t4vf_enable_vi(pi->adapter, pi->viid, true, true);
+
+	/* The Virtual Interfaces are connected to an internal switch on the
+	 * chip which allows VIs attached to the same port to talk to each
+	 * other even when the port link is down.  As a result, we generally
+	 * want to always report a VI's link as being "up", provided there are
+	 * no errors in enabling vi.
+	 */
+
+	if (ret == 0)
+		netif_carrier_on(dev);
+
 	return ret;
 }
 
@@ -1281,22 +1289,22 @@ static void fw_caps_to_lmm(enum fw_port_type port_type,
 
 	case FW_PORT_TYPE_KR:
 		SET_LMM(Backplane);
-		SET_LMM(10000baseKR_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
 		break;
 
 	case FW_PORT_TYPE_BP_AP:
 		SET_LMM(Backplane);
-		SET_LMM(10000baseR_FEC);
-		SET_LMM(10000baseKR_Full);
-		SET_LMM(1000baseKX_Full);
+		FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseR_FEC);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
 		break;
 
 	case FW_PORT_TYPE_BP4_AP:
 		SET_LMM(Backplane);
-		SET_LMM(10000baseR_FEC);
-		SET_LMM(10000baseKR_Full);
-		SET_LMM(1000baseKX_Full);
-		SET_LMM(10000baseKX4_Full);
+		FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseR_FEC);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKX4_Full);
 		break;
 
 	case FW_PORT_TYPE_FIBER_XFI:
@@ -1312,18 +1320,24 @@ static void fw_caps_to_lmm(enum fw_port_type port_type,
 	case FW_PORT_TYPE_BP40_BA:
 	case FW_PORT_TYPE_QSFP:
 		SET_LMM(FIBRE);
-		SET_LMM(40000baseSR4_Full);
+		FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
+		FW_CAPS_TO_LMM(SPEED_40G, 40000baseSR4_Full);
 		break;
 
 	case FW_PORT_TYPE_CR_QSFP:
 	case FW_PORT_TYPE_SFP28:
 		SET_LMM(FIBRE);
-		SET_LMM(25000baseCR_Full);
+		FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
+		FW_CAPS_TO_LMM(SPEED_25G, 25000baseCR_Full);
 		break;
 
 	case FW_PORT_TYPE_KR_SFP28:
 		SET_LMM(Backplane);
-		SET_LMM(25000baseKR_Full);
+		FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
+		FW_CAPS_TO_LMM(SPEED_25G, 25000baseKR_Full);
 		break;
 
 	case FW_PORT_TYPE_KR_XLAUI:
@@ -1335,13 +1349,18 @@ static void fw_caps_to_lmm(enum fw_port_type port_type,
 
 	case FW_PORT_TYPE_CR2_QSFP:
 		SET_LMM(FIBRE);
-		SET_LMM(50000baseSR2_Full);
+		FW_CAPS_TO_LMM(SPEED_50G, 50000baseSR2_Full);
 		break;
 
 	case FW_PORT_TYPE_KR4_100G:
 	case FW_PORT_TYPE_CR4_QSFP:
 		SET_LMM(FIBRE);
-		SET_LMM(100000baseCR4_Full);
+		FW_CAPS_TO_LMM(SPEED_1G,  1000baseT_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseSR_Full);
+		FW_CAPS_TO_LMM(SPEED_40G, 40000baseSR4_Full);
+		FW_CAPS_TO_LMM(SPEED_25G, 25000baseCR_Full);
+		FW_CAPS_TO_LMM(SPEED_50G, 50000baseCR2_Full);
+		FW_CAPS_TO_LMM(SPEED_100G, 100000baseCR4_Full);
 		break;
 
 	default:
diff --git a/drivers/net/ethernet/cirrus/mac89x0.c b/drivers/net/ethernet/cirrus/mac89x0.c
index 977d4c2c759d..3f8fe8fd79cc 100644
--- a/drivers/net/ethernet/cirrus/mac89x0.c
+++ b/drivers/net/ethernet/cirrus/mac89x0.c
@@ -56,21 +56,11 @@
   local_irq_{dis,en}able()
 */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 static const char version[] =
 "cs89x0.c:v1.02 11/26/96 Russell Nelson <nelson@crynwr.com>\n";
 
-/* ======================= configure the driver here ======================= */
-
-/* use 0 for production, 1 for verification, >2 for debug */
-#ifndef NET_DEBUG
-#define NET_DEBUG 0
-#endif
-
-/* ======================= end of configuration ======================= */
-
-
-/* Always include 'config.h' first in case the user wants to turn on
-   or override something. */
 #include <linux/module.h>
 
 /*
@@ -93,6 +83,7 @@ static const char version[] =
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/netdevice.h>
+#include <linux/platform_device.h>
 #include <linux/etherdevice.h>
 #include <linux/skbuff.h>
 #include <linux/delay.h>
@@ -105,24 +96,22 @@ static const char version[] =
 
 #include "cs89x0.h"
 
-static unsigned int net_debug = NET_DEBUG;
+static int debug = -1;
+module_param(debug, int, 0);
+MODULE_PARM_DESC(debug, "debug message level");
 
 /* Information that need to be kept for each board. */
 struct net_local {
+	int msg_enable;
 	int chip_type;		/* one of: CS8900, CS8920, CS8920M */
 	char chip_revision;	/* revision letter of the chip ('A'...) */
 	int send_cmd;		/* the propercommand used to send a packet. */
 	int rx_mode;
 	int curr_rx_cfg;
         int send_underrun;      /* keep track of how many underruns in a row we get */
-	struct sk_buff *skb;
 };
 
 /* Index to functions, as function prototypes. */
-
-#if 0
-extern void reset_chip(struct net_device *dev);
-#endif
 static int net_open(struct net_device *dev);
 static int net_send_packet(struct sk_buff *skb, struct net_device *dev);
 static irqreturn_t net_interrupt(int irq, void *dev_id);
@@ -132,10 +121,6 @@ static int net_close(struct net_device *dev);
 static struct net_device_stats *net_get_stats(struct net_device *dev);
 static int set_mac_address(struct net_device *dev, void *addr);
 
-
-/* Example routines you must write ;->. */
-#define tx_done(dev) 1
-
 /* For reading/writing registers ISA-style */
 static inline int
 readreg_io(struct net_device *dev, int portno)
@@ -176,12 +161,10 @@ static const struct net_device_ops mac89x0_netdev_ops = {
 
 /* Probe for the CS8900 card in slot E.  We won't bother looking
    anywhere else until we have a really good reason to do so. */
-struct net_device * __init mac89x0_probe(int unit)
+static int mac89x0_device_probe(struct platform_device *pdev)
 {
 	struct net_device *dev;
-	static int once_is_enough;
 	struct net_local *lp;
-	static unsigned version_printed;
 	int i, slot;
 	unsigned rev_type = 0;
 	unsigned long ioaddr;
@@ -189,21 +172,9 @@ struct net_device * __init mac89x0_probe(int unit)
 	int err = -ENODEV;
 	struct nubus_rsrc *fres;
 
-	if (!MACH_IS_MAC)
-		return ERR_PTR(-ENODEV);
-
 	dev = alloc_etherdev(sizeof(struct net_local));
 	if (!dev)
-		return ERR_PTR(-ENOMEM);
-
-	if (unit >= 0) {
-		sprintf(dev->name, "eth%d", unit);
-		netdev_boot_setup_check(dev);
-	}
-
-	if (once_is_enough)
-		goto out;
-	once_is_enough = 1;
+		return -ENOMEM;
 
 	/* We might have to parameterize this later */
 	slot = 0xE;
@@ -230,9 +201,13 @@ struct net_device * __init mac89x0_probe(int unit)
 	if (sig != swab16(CHIP_EISA_ID_SIG))
 		goto out;
 
+	SET_NETDEV_DEV(dev, &pdev->dev);
+
 	/* Initialize the net_device structure. */
 	lp = netdev_priv(dev);
 
+	lp->msg_enable = netif_msg_init(debug, 0);
+
 	/* Fill in the 'dev' fields. */
 	dev->base_addr = ioaddr;
 	dev->mem_start = (unsigned long)
@@ -255,19 +230,16 @@ struct net_device * __init mac89x0_probe(int unit)
 	if (lp->chip_type != CS8900 && lp->chip_revision >= 'C')
 		lp->send_cmd = TX_NOW;
 
-	if (net_debug && version_printed++ == 0)
-		printk(version);
+	netif_dbg(lp, drv, dev, "%s", version);
 
-	printk(KERN_INFO "%s: cs89%c0%s rev %c found at %#8lx",
-	       dev->name,
-	       lp->chip_type==CS8900?'0':'2',
-	       lp->chip_type==CS8920M?"M":"",
-	       lp->chip_revision,
-	       dev->base_addr);
+	pr_info("cs89%c0%s rev %c found at %#8lx\n",
+		lp->chip_type == CS8900 ? '0' : '2',
+		lp->chip_type == CS8920M ? "M" : "",
+		lp->chip_revision, dev->base_addr);
 
 	/* Try to read the MAC address */
 	if ((readreg(dev, PP_SelfST) & (EEPROM_PRESENT | EEPROM_OK)) == 0) {
-		printk("\nmac89x0: No EEPROM, giving up now.\n");
+		pr_info("No EEPROM, giving up now.\n");
 		goto out1;
         } else {
                 for (i = 0; i < ETH_ALEN; i += 2) {
@@ -282,39 +254,23 @@ struct net_device * __init mac89x0_probe(int unit)
 
 	/* print the IRQ and ethernet address. */
 
-	printk(" IRQ %d ADDR %pM\n", dev->irq, dev->dev_addr);
+	pr_info("MAC %pM, IRQ %d\n", dev->dev_addr, dev->irq);
 
 	dev->netdev_ops		= &mac89x0_netdev_ops;
 
 	err = register_netdev(dev);
 	if (err)
 		goto out1;
-	return NULL;
+
+	platform_set_drvdata(pdev, dev);
+	return 0;
 out1:
 	nubus_writew(0, dev->base_addr + ADD_PORT);
 out:
 	free_netdev(dev);
-	return ERR_PTR(err);
+	return err;
 }
 
-#if 0
-/* This is useful for something, but I don't know what yet. */
-void __init reset_chip(struct net_device *dev)
-{
-	int reset_start_time;
-
-	writereg(dev, PP_SelfCTL, readreg(dev, PP_SelfCTL) | POWER_ON_RESET);
-
-	/* wait 30 ms */
-	msleep_interruptible(30);
-
-	/* Wait until the chip is reset */
-	reset_start_time = jiffies;
-	while( (readreg(dev, PP_SelfST) & INIT_DONE) == 0 && jiffies - reset_start_time < 2)
-		;
-}
-#endif
-
 /* Open/initialize the board.  This is called (in the current kernel)
    sometime after booting when the 'ifconfig' program is run.
 
@@ -374,11 +330,9 @@ net_send_packet(struct sk_buff *skb, struct net_device *dev)
 	struct net_local *lp = netdev_priv(dev);
 	unsigned long flags;
 
-	if (net_debug > 3)
-		printk("%s: sent %d byte packet of type %x\n",
-		       dev->name, skb->len,
-		       (skb->data[ETH_ALEN+ETH_ALEN] << 8)
-		       | skb->data[ETH_ALEN+ETH_ALEN+1]);
+	netif_dbg(lp, tx_queued, dev, "sent %d byte packet of type %x\n",
+		  skb->len, skb->data[ETH_ALEN + ETH_ALEN] << 8 |
+		  skb->data[ETH_ALEN + ETH_ALEN + 1]);
 
 	/* keep the upload from being interrupted, since we
 	   ask the chip to start transmitting before the
@@ -416,11 +370,6 @@ static irqreturn_t net_interrupt(int irq, void *dev_id)
 	struct net_local *lp;
 	int ioaddr, status;
 
-	if (dev == NULL) {
-		printk ("net_interrupt(): irq %d for unknown device.\n", irq);
-		return IRQ_NONE;
-	}
-
 	ioaddr = dev->base_addr;
 	lp = netdev_priv(dev);
 
@@ -432,7 +381,7 @@ static irqreturn_t net_interrupt(int irq, void *dev_id)
            faster than you can read them off, you're screwed.  Hasta la
            vista, baby!  */
 	while ((status = swab16(nubus_readw(dev->base_addr + ISQ_PORT)))) {
-		if (net_debug > 4)printk("%s: event=%04x\n", dev->name, status);
+		netif_dbg(lp, intr, dev, "status=%04x\n", status);
 		switch(status & ISQ_EVENT_MASK) {
 		case ISQ_RECEIVER_EVENT:
 			/* Got a packet(s). */
@@ -462,7 +411,7 @@ static irqreturn_t net_interrupt(int irq, void *dev_id)
 				netif_wake_queue(dev);
 			}
 			if (status & TX_UNDERRUN) {
-				if (net_debug > 0) printk("%s: transmit underrun\n", dev->name);
+				netif_dbg(lp, tx_err, dev, "transmit underrun\n");
                                 lp->send_underrun++;
                                 if (lp->send_underrun == 3) lp->send_cmd = TX_AFTER_381;
                                 else if (lp->send_underrun == 6) lp->send_cmd = TX_AFTER_ALL;
@@ -483,6 +432,7 @@ static irqreturn_t net_interrupt(int irq, void *dev_id)
 static void
 net_rx(struct net_device *dev)
 {
+	struct net_local *lp = netdev_priv(dev);
 	struct sk_buff *skb;
 	int status, length;
 
@@ -506,7 +456,6 @@ net_rx(struct net_device *dev)
 	/* Malloc up new buffer. */
 	skb = alloc_skb(length, GFP_ATOMIC);
 	if (skb == NULL) {
-		printk("%s: Memory squeeze, dropping packet.\n", dev->name);
 		dev->stats.rx_dropped++;
 		return;
 	}
@@ -515,10 +464,9 @@ net_rx(struct net_device *dev)
 	skb_copy_to_linear_data(skb, (void *)(dev->mem_start + PP_RxFrame),
 				length);
 
-	if (net_debug > 3)printk("%s: received %d byte packet of type %x\n",
-                                 dev->name, length,
-                                 (skb->data[ETH_ALEN+ETH_ALEN] << 8)
-				 | skb->data[ETH_ALEN+ETH_ALEN+1]);
+	netif_dbg(lp, rx_status, dev, "received %d byte packet of type %x\n",
+		  length, skb->data[ETH_ALEN + ETH_ALEN] << 8 |
+		  skb->data[ETH_ALEN + ETH_ALEN + 1]);
 
         skb->protocol=eth_type_trans(skb,dev);
 	netif_rx(skb);
@@ -594,7 +542,7 @@ static int set_mac_address(struct net_device *dev, void *addr)
 		return -EADDRNOTAVAIL;
 
 	memcpy(dev->dev_addr, saddr->sa_data, ETH_ALEN);
-	printk("%s: Setting MAC address to %pM\n", dev->name, dev->dev_addr);
+	netdev_info(dev, "Setting MAC address to %pM\n", dev->dev_addr);
 
 	/* set the Ethernet address */
 	for (i=0; i < ETH_ALEN/2; i++)
@@ -603,32 +551,24 @@ static int set_mac_address(struct net_device *dev, void *addr)
 	return 0;
 }
 
-#ifdef MODULE
-
-static struct net_device *dev_cs89x0;
-static int debug;
-
-module_param(debug, int, 0);
-MODULE_PARM_DESC(debug, "CS89[02]0 debug level (0-5)");
 MODULE_LICENSE("GPL");
 
-int __init
-init_module(void)
+static int mac89x0_device_remove(struct platform_device *pdev)
 {
-	net_debug = debug;
-        dev_cs89x0 = mac89x0_probe(-1);
-	if (IS_ERR(dev_cs89x0)) {
-                printk(KERN_WARNING "mac89x0.c: No card found\n");
-		return PTR_ERR(dev_cs89x0);
-	}
+	struct net_device *dev = platform_get_drvdata(pdev);
+
+	unregister_netdev(dev);
+	nubus_writew(0, dev->base_addr + ADD_PORT);
+	free_netdev(dev);
 	return 0;
 }
 
-void
-cleanup_module(void)
-{
-	unregister_netdev(dev_cs89x0);
-	nubus_writew(0, dev_cs89x0->base_addr + ADD_PORT);
-	free_netdev(dev_cs89x0);
-}
-#endif /* MODULE */
+static struct platform_driver mac89x0_platform_driver = {
+	.probe = mac89x0_device_probe,
+	.remove = mac89x0_device_remove,
+	.driver = {
+		.name = "mac89x0",
+	},
+};
+
+module_platform_driver(mac89x0_platform_driver);
diff --git a/drivers/net/ethernet/cisco/enic/enic.h b/drivers/net/ethernet/cisco/enic/enic.h
index 9b218f0e5a4c..0dd64acd2a3f 100644
--- a/drivers/net/ethernet/cisco/enic/enic.h
+++ b/drivers/net/ethernet/cisco/enic/enic.h
@@ -33,7 +33,7 @@
 
 #define DRV_NAME		"enic"
 #define DRV_DESCRIPTION		"Cisco VIC Ethernet NIC Driver"
-#define DRV_VERSION		"2.3.0.45"
+#define DRV_VERSION		"2.3.0.53"
 #define DRV_COPYRIGHT		"Copyright 2008-2013 Cisco Systems, Inc"
 
 #define ENIC_BARS_MAX		6
@@ -140,6 +140,7 @@ struct enic_rfs_flw_tbl {
 struct vxlan_offload {
 	u16 vxlan_udp_port_number;
 	u8 patch_level;
+	u8 flags;
 };
 
 /* Per-instance private data structure */
diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c
index efb9333c7cf8..869006c2002d 100644
--- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c
+++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c
@@ -474,6 +474,39 @@ static int enic_grxclsrule(struct enic *enic, struct ethtool_rxnfc *cmd)
 	return 0;
 }
 
+static int enic_get_rx_flow_hash(struct enic *enic, struct ethtool_rxnfc *cmd)
+{
+	cmd->data = 0;
+
+	switch (cmd->flow_type) {
+	case TCP_V6_FLOW:
+	case TCP_V4_FLOW:
+		cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		/* Fall through */
+	case UDP_V6_FLOW:
+	case UDP_V4_FLOW:
+		if (vnic_dev_capable_udp_rss(enic->vdev))
+			cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		/* Fall through */
+	case SCTP_V4_FLOW:
+	case AH_ESP_V4_FLOW:
+	case AH_V4_FLOW:
+	case ESP_V4_FLOW:
+	case SCTP_V6_FLOW:
+	case AH_ESP_V6_FLOW:
+	case AH_V6_FLOW:
+	case ESP_V6_FLOW:
+	case IPV4_FLOW:
+	case IPV6_FLOW:
+		cmd->data |= RXH_IP_SRC | RXH_IP_DST;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int enic_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
 			  u32 *rule_locs)
 {
@@ -500,6 +533,9 @@ static int enic_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
 		ret = enic_grxclsrule(enic, cmd);
 		spin_unlock_bh(&enic->rfs_h.lock);
 		break;
+	case ETHTOOL_GRXFH:
+		ret = enic_get_rx_flow_hash(enic, cmd);
+		break;
 	default:
 		ret = -EOPNOTSUPP;
 		break;
diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index f202ba72a811..81684acf52af 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -191,8 +191,16 @@ static void enic_udp_tunnel_add(struct net_device *netdev,
 		goto error;
 	}
 
-	if (ti->sa_family != AF_INET) {
-		netdev_info(netdev, "vxlan: only IPv4 offload supported");
+	switch (ti->sa_family) {
+	case AF_INET6:
+		if (!(enic->vxlan.flags & ENIC_VXLAN_OUTER_IPV6)) {
+			netdev_info(netdev, "vxlan: only IPv4 offload supported");
+			goto error;
+		}
+		/* Fall through */
+	case AF_INET:
+		break;
+	default:
 		goto error;
 	}
 
@@ -204,6 +212,11 @@ static void enic_udp_tunnel_add(struct net_device *netdev,
 
 		goto error;
 	}
+	if ((vnic_dev_get_res_count(enic->vdev, RES_TYPE_WQ) != 1) &&
+	    !(enic->vxlan.flags & ENIC_VXLAN_MULTI_WQ)) {
+		netdev_info(netdev, "vxlan: vxlan offload with multi wq not supported on this adapter");
+		goto error;
+	}
 
 	err = vnic_dev_overlay_offload_cfg(enic->vdev,
 					   OVERLAY_CFG_VXLAN_PORT_UPDATE,
@@ -238,9 +251,8 @@ static void enic_udp_tunnel_del(struct net_device *netdev,
 
 	spin_lock_bh(&enic->devcmd_lock);
 
-	if ((ti->sa_family != AF_INET) ||
-	    ((ntohs(ti->port) != enic->vxlan.vxlan_udp_port_number)) ||
-	    (ti->type != UDP_TUNNEL_TYPE_VXLAN)) {
+	if ((ntohs(ti->port) != enic->vxlan.vxlan_udp_port_number) ||
+	    ti->type != UDP_TUNNEL_TYPE_VXLAN) {
 		netdev_info(netdev, "udp_tnl: port:%d, sa_family: %d, type: %d not offloaded",
 			    ntohs(ti->port), ti->sa_family, ti->type);
 		goto unlock;
@@ -271,22 +283,37 @@ static netdev_features_t enic_features_check(struct sk_buff *skb,
 	struct enic *enic = netdev_priv(dev);
 	struct udphdr *udph;
 	u16 port = 0;
-	u16 proto;
+	u8 proto;
 
 	if (!skb->encapsulation)
 		return features;
 
 	features = vxlan_features_check(skb, features);
 
-	/* hardware only supports IPv4 vxlan tunnel */
-	if (vlan_get_protocol(skb) != htons(ETH_P_IP))
+	switch (vlan_get_protocol(skb)) {
+	case htons(ETH_P_IPV6):
+		if (!(enic->vxlan.flags & ENIC_VXLAN_OUTER_IPV6))
+			goto out;
+		proto = ipv6_hdr(skb)->nexthdr;
+		break;
+	case htons(ETH_P_IP):
+		proto = ip_hdr(skb)->protocol;
+		break;
+	default:
 		goto out;
+	}
 
-	/* hardware does not support offload of ipv6 inner pkt */
-	if (eth->h_proto != ntohs(ETH_P_IP))
+	switch (eth->h_proto) {
+	case ntohs(ETH_P_IPV6):
+		if (!(enic->vxlan.flags & ENIC_VXLAN_INNER_IPV6))
+			goto out;
+		/* Fall through */
+	case ntohs(ETH_P_IP):
+		break;
+	default:
 		goto out;
+	}
 
-	proto = ip_hdr(skb)->protocol;
 
 	if (proto == IPPROTO_UDP) {
 		udph = udp_hdr(skb);
@@ -635,12 +662,25 @@ static int enic_queue_wq_skb_csum_l4(struct enic *enic, struct vnic_wq *wq,
 
 static void enic_preload_tcp_csum_encap(struct sk_buff *skb)
 {
-	if (skb->protocol == cpu_to_be16(ETH_P_IP)) {
+	const struct ethhdr *eth = (struct ethhdr *)skb_inner_mac_header(skb);
+
+	switch (eth->h_proto) {
+	case ntohs(ETH_P_IP):
 		inner_ip_hdr(skb)->check = 0;
 		inner_tcp_hdr(skb)->check =
 			~csum_tcpudp_magic(inner_ip_hdr(skb)->saddr,
 					   inner_ip_hdr(skb)->daddr, 0,
 					   IPPROTO_TCP, 0);
+		break;
+	case ntohs(ETH_P_IPV6):
+		inner_tcp_hdr(skb)->check =
+			~csum_ipv6_magic(&inner_ipv6_hdr(skb)->saddr,
+					 &inner_ipv6_hdr(skb)->daddr, 0,
+					 IPPROTO_TCP, 0);
+		break;
+	default:
+		WARN_ONCE(1, "Non ipv4/ipv6 inner pkt for encap offload");
+		break;
 	}
 }
 
@@ -1898,6 +1938,8 @@ static int enic_open(struct net_device *netdev)
 	}
 
 	for (i = 0; i < enic->rq_count; i++) {
+		/* enable rq before updating rq desc */
+		vnic_rq_enable(&enic->rq[i]);
 		vnic_rq_fill(&enic->rq[i], enic_rq_alloc_buf);
 		/* Need at least one buffer on ring to get going */
 		if (vnic_rq_desc_used(&enic->rq[i]) == 0) {
@@ -1909,8 +1951,6 @@ static int enic_open(struct net_device *netdev)
 
 	for (i = 0; i < enic->wq_count; i++)
 		vnic_wq_enable(&enic->wq[i]);
-	for (i = 0; i < enic->rq_count; i++)
-		vnic_rq_enable(&enic->rq[i]);
 
 	if (!enic_is_dynamic(enic) && !enic_is_sriov_vf(enic))
 		enic_dev_add_station_addr(enic);
@@ -1936,8 +1976,12 @@ static int enic_open(struct net_device *netdev)
 	return 0;
 
 err_out_free_rq:
-	for (i = 0; i < enic->rq_count; i++)
+	for (i = 0; i < enic->rq_count; i++) {
+		err = vnic_rq_disable(&enic->rq[i]);
+		if (err)
+			return err;
 		vnic_rq_clean(&enic->rq[i], enic_free_rq_buf);
+	}
 	enic_dev_notify_unset(enic);
 err_out_free_intr:
 	enic_unset_affinity_hint(enic);
@@ -2151,9 +2195,10 @@ static int enic_dev_wait(struct vnic_dev *vdev,
 static int enic_dev_open(struct enic *enic)
 {
 	int err;
+	u32 flags = CMD_OPENF_IG_DESCCACHE;
 
 	err = enic_dev_wait(enic->vdev, vnic_dev_open,
-		vnic_dev_open_done, 0);
+		vnic_dev_open_done, flags);
 	if (err)
 		dev_err(enic_get_dev(enic), "vNIC device open failed, err %d\n",
 			err);
@@ -2275,7 +2320,7 @@ static int enic_set_rss_nic_cfg(struct enic *enic)
 {
 	struct device *dev = enic_get_dev(enic);
 	const u8 rss_default_cpu = 0;
-	const u8 rss_hash_type = NIC_CFG_RSS_HASH_TYPE_IPV4 |
+	u8 rss_hash_type = NIC_CFG_RSS_HASH_TYPE_IPV4 |
 		NIC_CFG_RSS_HASH_TYPE_TCP_IPV4 |
 		NIC_CFG_RSS_HASH_TYPE_IPV6 |
 		NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
@@ -2283,6 +2328,8 @@ static int enic_set_rss_nic_cfg(struct enic *enic)
 	const u8 rss_base_cpu = 0;
 	u8 rss_enable = ENIC_SETTING(enic, RSS) && (enic->rq_count > 1);
 
+	if (vnic_dev_capable_udp_rss(enic->vdev))
+		rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP;
 	if (rss_enable) {
 		if (!enic_set_rsskey(enic)) {
 			if (enic_set_rsscpu(enic, rss_hash_bits)) {
@@ -2901,9 +2948,11 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		netdev->hw_features |= NETIF_F_RXCSUM;
 	if (ENIC_SETTING(enic, VXLAN)) {
 		u64 patch_level;
+		u64 a1 = 0;
 
 		netdev->hw_enc_features |= NETIF_F_RXCSUM		|
 					   NETIF_F_TSO			|
+					   NETIF_F_TSO6			|
 					   NETIF_F_TSO_ECN		|
 					   NETIF_F_GSO_UDP_TUNNEL	|
 					   NETIF_F_HW_CSUM		|
@@ -2922,9 +2971,10 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		 */
 		err = vnic_dev_get_supported_feature_ver(enic->vdev,
 							 VIC_FEATURE_VXLAN,
-							 &patch_level);
+							 &patch_level, &a1);
 		if (err)
 			patch_level = 0;
+		enic->vxlan.flags = (u8)a1;
 		/* mask bits that are supported by driver
 		 */
 		patch_level &= BIT_ULL(0) | BIT_ULL(2);
diff --git a/drivers/net/ethernet/cisco/enic/vnic_dev.c b/drivers/net/ethernet/cisco/enic/vnic_dev.c
index 39bad67422dd..76cdd4c9d11f 100644
--- a/drivers/net/ethernet/cisco/enic/vnic_dev.c
+++ b/drivers/net/ethernet/cisco/enic/vnic_dev.c
@@ -1269,16 +1269,32 @@ int vnic_dev_overlay_offload_cfg(struct vnic_dev *vdev, u8 overlay,
 }
 
 int vnic_dev_get_supported_feature_ver(struct vnic_dev *vdev, u8 feature,
-				       u64 *supported_versions)
+				       u64 *supported_versions, u64 *a1)
 {
 	u64 a0 = feature;
 	int wait = 1000;
-	u64 a1 = 0;
 	int ret;
 
-	ret = vnic_dev_cmd(vdev, CMD_GET_SUPP_FEATURE_VER, &a0, &a1, wait);
+	ret = vnic_dev_cmd(vdev, CMD_GET_SUPP_FEATURE_VER, &a0, a1, wait);
 	if (!ret)
 		*supported_versions = a0;
 
 	return ret;
 }
+
+bool vnic_dev_capable_udp_rss(struct vnic_dev *vdev)
+{
+	u64 a0 = CMD_NIC_CFG, a1 = 0;
+	u64 rss_hash_type;
+	int wait = 1000;
+	int err;
+
+	err = vnic_dev_cmd(vdev, CMD_CAPABILITY, &a0, &a1, wait);
+	if (err || !a0)
+		return false;
+
+	rss_hash_type = (a1 >> NIC_CFG_RSS_HASH_TYPE_SHIFT) &
+			NIC_CFG_RSS_HASH_TYPE_MASK_FIELD;
+
+	return (rss_hash_type & NIC_CFG_RSS_HASH_TYPE_UDP);
+}
diff --git a/drivers/net/ethernet/cisco/enic/vnic_dev.h b/drivers/net/ethernet/cisco/enic/vnic_dev.h
index 9d43d6bb9907..59d4cc8fbb85 100644
--- a/drivers/net/ethernet/cisco/enic/vnic_dev.h
+++ b/drivers/net/ethernet/cisco/enic/vnic_dev.h
@@ -183,6 +183,7 @@ int vnic_dev_overlay_offload_ctrl(struct vnic_dev *vdev, u8 overlay, u8 config);
 int vnic_dev_overlay_offload_cfg(struct vnic_dev *vdev, u8 overlay,
 				 u16 vxlan_udp_port_number);
 int vnic_dev_get_supported_feature_ver(struct vnic_dev *vdev, u8 feature,
-				       u64 *supported_versions);
+				       u64 *supported_versions, u64 *a1);
+bool vnic_dev_capable_udp_rss(struct vnic_dev *vdev);
 
 #endif /* _VNIC_DEV_H_ */
diff --git a/drivers/net/ethernet/cisco/enic/vnic_devcmd.h b/drivers/net/ethernet/cisco/enic/vnic_devcmd.h
index d83880b0d468..41de4ba622a1 100644
--- a/drivers/net/ethernet/cisco/enic/vnic_devcmd.h
+++ b/drivers/net/ethernet/cisco/enic/vnic_devcmd.h
@@ -439,6 +439,7 @@ enum vnic_devcmd_cmd {
 
 /* flags for CMD_OPEN */
 #define CMD_OPENF_OPROM		0x1	/* open coming from option rom */
+#define CMD_OPENF_IG_DESCCACHE	0x2	/* Do not flush IG DESC cache */
 
 /* flags for CMD_INIT */
 #define CMD_INITF_DEFAULT_MAC	0x1	/* init with default mac addr */
@@ -697,6 +698,10 @@ enum overlay_ofld_cmd {
 
 #define OVERLAY_CFG_VXLAN_PORT_UPDATE	0
 
+#define ENIC_VXLAN_INNER_IPV6		BIT(0)
+#define ENIC_VXLAN_OUTER_IPV6		BIT(1)
+#define ENIC_VXLAN_MULTI_WQ		BIT(2)
+
 /* Use this enum to get the supported versions for each of these features
  * If you need to use the devcmd_get_supported_feature_version(), add
  * the new feature into this enum and install function handler in devcmd.c
diff --git a/drivers/net/ethernet/cisco/enic/vnic_nic.h b/drivers/net/ethernet/cisco/enic/vnic_nic.h
index 995a50dd4c99..5a93db0d7afc 100644
--- a/drivers/net/ethernet/cisco/enic/vnic_nic.h
+++ b/drivers/net/ethernet/cisco/enic/vnic_nic.h
@@ -47,6 +47,7 @@
 #define NIC_CFG_RSS_HASH_TYPE_TCP_IPV6		(1 << 4)
 #define NIC_CFG_RSS_HASH_TYPE_IPV6_EX		(1 << 5)
 #define NIC_CFG_RSS_HASH_TYPE_TCP_IPV6_EX	(1 << 6)
+#define NIC_CFG_RSS_HASH_TYPE_UDP		(1 << 7)
 
 static inline void vnic_set_nic_cfg(u32 *nic_cfg,
 	u8 rss_default_cpu, u8 rss_hash_type,
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index 1a49297224ed..ff92ab1daeb8 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -19,7 +19,7 @@
 #include "be.h"
 #include "be_cmds.h"
 
-char *be_misconfig_evt_port_state[] = {
+const char * const be_misconfig_evt_port_state[] = {
 	"Physical Link is functional",
 	"Optics faulted/incorrectly installed/not installed - Reseat optics. If issue not resolved, replace.",
 	"Optics of two types installed – Remove one optic or install matching pair of optics.",
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h
index 09da2d82c2f0..e8b43cf44b6f 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.h
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.h
@@ -201,7 +201,7 @@ enum {
 			 phy_state == BE_PHY_UNQUALIFIED ||	\
 			 phy_state == BE_PHY_UNCERTIFIED)
 
-extern  char *be_misconfig_evt_port_state[];
+extern const  char * const be_misconfig_evt_port_state[];
 
 /* async event indicating misconfigured port */
 struct be_async_event_misconfig_port {
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index e4ec32a9ca15..fd43f98ddbe7 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -454,6 +454,16 @@ static void dpaa_set_rx_mode(struct net_device *net_dev)
 				  err);
 	}
 
+	if (!!(net_dev->flags & IFF_ALLMULTI) != priv->mac_dev->allmulti) {
+		priv->mac_dev->allmulti = !priv->mac_dev->allmulti;
+		err = priv->mac_dev->set_allmulti(priv->mac_dev->fman_mac,
+						  priv->mac_dev->allmulti);
+		if (err < 0)
+			netif_err(priv, drv, net_dev,
+				  "mac_dev->set_allmulti() = %d\n",
+				  err);
+	}
+
 	err = priv->mac_dev->set_multi(net_dev, priv->mac_dev);
 	if (err < 0)
 		netif_err(priv, drv, net_dev, "mac_dev->set_multi() = %d\n",
@@ -1916,8 +1926,10 @@ static int skb_to_sg_fd(struct dpaa_priv *priv,
 		goto csum_failed;
 	}
 
+	/* SGT[0] is used by the linear part */
 	sgt = (struct qm_sg_entry *)(sgt_buf + priv->tx_headroom);
-	qm_sg_entry_set_len(&sgt[0], skb_headlen(skb));
+	frag_len = skb_headlen(skb);
+	qm_sg_entry_set_len(&sgt[0], frag_len);
 	sgt[0].bpid = FSL_DPAA_BPID_INV;
 	sgt[0].offset = 0;
 	addr = dma_map_single(dev, skb->data,
@@ -1930,9 +1942,9 @@ static int skb_to_sg_fd(struct dpaa_priv *priv,
 	qm_sg_entry_set64(&sgt[0], addr);
 
 	/* populate the rest of SGT entries */
-	frag = &skb_shinfo(skb)->frags[0];
-	frag_len = frag->size;
-	for (i = 1; i <= nr_frags; i++, frag++) {
+	for (i = 0; i < nr_frags; i++) {
+		frag = &skb_shinfo(skb)->frags[i];
+		frag_len = frag->size;
 		WARN_ON(!skb_frag_page(frag));
 		addr = skb_frag_dma_map(dev, frag, 0,
 					frag_len, dma_dir);
@@ -1942,15 +1954,16 @@ static int skb_to_sg_fd(struct dpaa_priv *priv,
 			goto sg_map_failed;
 		}
 
-		qm_sg_entry_set_len(&sgt[i], frag_len);
-		sgt[i].bpid = FSL_DPAA_BPID_INV;
-		sgt[i].offset = 0;
+		qm_sg_entry_set_len(&sgt[i + 1], frag_len);
+		sgt[i + 1].bpid = FSL_DPAA_BPID_INV;
+		sgt[i + 1].offset = 0;
 
 		/* keep the offset in the address */
-		qm_sg_entry_set64(&sgt[i], addr);
-		frag_len = frag->size;
+		qm_sg_entry_set64(&sgt[i + 1], addr);
 	}
-	qm_sg_entry_set_f(&sgt[i - 1], frag_len);
+
+	/* Set the final bit in the last used entry of the SGT */
+	qm_sg_entry_set_f(&sgt[nr_frags], frag_len);
 
 	qm_fd_set_sg(fd, priv->tx_headroom, skb->len);
 
@@ -2051,19 +2064,23 @@ static int dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev)
 	/* MAX_SKB_FRAGS is equal or larger than our dpaa_SGT_MAX_ENTRIES;
 	 * make sure we don't feed FMan with more fragments than it supports.
 	 */
-	if (nonlinear &&
-	    likely(skb_shinfo(skb)->nr_frags < DPAA_SGT_MAX_ENTRIES)) {
-		/* Just create a S/G fd based on the skb */
-		err = skb_to_sg_fd(priv, skb, &fd);
-		percpu_priv->tx_frag_skbuffs++;
-	} else {
+	if (unlikely(nonlinear &&
+		     (skb_shinfo(skb)->nr_frags >= DPAA_SGT_MAX_ENTRIES))) {
 		/* If the egress skb contains more fragments than we support
 		 * we have no choice but to linearize it ourselves.
 		 */
-		if (unlikely(nonlinear) && __skb_linearize(skb))
+		if (__skb_linearize(skb))
 			goto enomem;
 
-		/* Finally, create a contig FD from this skb */
+		nonlinear = skb_is_nonlinear(skb);
+	}
+
+	if (nonlinear) {
+		/* Just create a S/G fd based on the skb */
+		err = skb_to_sg_fd(priv, skb, &fd);
+		percpu_priv->tx_frag_skbuffs++;
+	} else {
+		/* Create a contig FD from this skb */
 		err = skb_to_contig_fd(priv, skb, &fd, &offset);
 	}
 	if (unlikely(err < 0))
@@ -2200,14 +2217,8 @@ static enum qman_cb_dqrr_result rx_error_dqrr(struct qman_portal *portal,
 	if (dpaa_eth_napi_schedule(percpu_priv, portal))
 		return qman_cb_dqrr_stop;
 
-	if (dpaa_eth_refill_bpools(priv))
-		/* Unable to refill the buffer pool due to insufficient
-		 * system memory. Just release the frame back into the pool,
-		 * otherwise we'll soon end up with an empty buffer pool.
-		 */
-		dpaa_fd_release(net_dev, &dq->fd);
-	else
-		dpaa_rx_error(net_dev, priv, percpu_priv, &dq->fd, fq->fqid);
+	dpaa_eth_refill_bpools(priv);
+	dpaa_rx_error(net_dev, priv, percpu_priv, &dq->fd, fq->fqid);
 
 	return qman_cb_dqrr_consume;
 }
@@ -2766,7 +2777,7 @@ static int dpaa_eth_probe(struct platform_device *pdev)
 
 	priv->channel = (u16)channel;
 
-	/* Start a thread that will walk the CPUs with affine portals
+	/* Walk the CPUs with affine portals
 	 * and add this pool channel to each's dequeue mask.
 	 */
 	dpaa_eth_add_channel(priv->channel);
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
index faea674094b9..85306d1b2acf 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
@@ -211,7 +211,7 @@ static int dpaa_set_pauseparam(struct net_device *net_dev,
 	if (epause->rx_pause)
 		newadv = ADVERTISED_Pause | ADVERTISED_Asym_Pause;
 	if (epause->tx_pause)
-		newadv |= ADVERTISED_Asym_Pause;
+		newadv ^= ADVERTISED_Asym_Pause;
 
 	oldadv = phydev->advertising &
 			(ADVERTISED_Pause | ADVERTISED_Asym_Pause);
diff --git a/drivers/net/ethernet/freescale/fman/fman_dtsec.c b/drivers/net/ethernet/freescale/fman/fman_dtsec.c
index 7af31ddd093f..57b1e2b47c0a 100644
--- a/drivers/net/ethernet/freescale/fman/fman_dtsec.c
+++ b/drivers/net/ethernet/freescale/fman/fman_dtsec.c
@@ -1117,6 +1117,25 @@ int dtsec_add_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr)
 	return 0;
 }
 
+int dtsec_set_allmulti(struct fman_mac *dtsec, bool enable)
+{
+	u32 tmp;
+	struct dtsec_regs __iomem *regs = dtsec->regs;
+
+	if (!is_init_done(dtsec->dtsec_drv_param))
+		return -EINVAL;
+
+	tmp = ioread32be(&regs->rctrl);
+	if (enable)
+		tmp |= RCTRL_MPROM;
+	else
+		tmp &= ~RCTRL_MPROM;
+
+	iowrite32be(tmp, &regs->rctrl);
+
+	return 0;
+}
+
 int dtsec_del_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr)
 {
 	struct dtsec_regs __iomem *regs = dtsec->regs;
diff --git a/drivers/net/ethernet/freescale/fman/fman_dtsec.h b/drivers/net/ethernet/freescale/fman/fman_dtsec.h
index c4467c072058..1a689adf5a22 100644
--- a/drivers/net/ethernet/freescale/fman/fman_dtsec.h
+++ b/drivers/net/ethernet/freescale/fman/fman_dtsec.h
@@ -55,5 +55,6 @@ int dtsec_set_exception(struct fman_mac *dtsec,
 int dtsec_add_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr);
 int dtsec_del_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr);
 int dtsec_get_version(struct fman_mac *dtsec, u32 *mac_version);
+int dtsec_set_allmulti(struct fman_mac *dtsec, bool enable);
 
 #endif /* __DTSEC_H */
diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.c b/drivers/net/ethernet/freescale/fman/fman_memac.c
index c0296880feba..446a97b792e3 100644
--- a/drivers/net/ethernet/freescale/fman/fman_memac.c
+++ b/drivers/net/ethernet/freescale/fman/fman_memac.c
@@ -350,6 +350,7 @@ struct fman_mac {
 	struct fman_rev_info fm_rev_info;
 	bool basex_if;
 	struct phy_device *pcsphy;
+	bool allmulti_enabled;
 };
 
 static void add_addr_in_paddr(struct memac_regs __iomem *regs, u8 *adr,
@@ -940,6 +941,29 @@ int memac_add_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr)
 	return 0;
 }
 
+int memac_set_allmulti(struct fman_mac *memac, bool enable)
+{
+	u32 entry;
+	struct memac_regs __iomem *regs = memac->regs;
+
+	if (!is_init_done(memac->memac_drv_param))
+		return -EINVAL;
+
+	if (enable) {
+		for (entry = 0; entry < HASH_TABLE_SIZE; entry++)
+			iowrite32be(entry | HASH_CTRL_MCAST_EN,
+				    &regs->hashtable_ctrl);
+	} else {
+		for (entry = 0; entry < HASH_TABLE_SIZE; entry++)
+			iowrite32be(entry & ~HASH_CTRL_MCAST_EN,
+				    &regs->hashtable_ctrl);
+	}
+
+	memac->allmulti_enabled = enable;
+
+	return 0;
+}
+
 int memac_del_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr)
 {
 	struct memac_regs __iomem *regs = memac->regs;
@@ -963,8 +987,12 @@ int memac_del_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr)
 			break;
 		}
 	}
-	if (list_empty(&memac->multicast_addr_hash->lsts[hash]))
-		iowrite32be(hash & ~HASH_CTRL_MCAST_EN, &regs->hashtable_ctrl);
+
+	if (!memac->allmulti_enabled) {
+		if (list_empty(&memac->multicast_addr_hash->lsts[hash]))
+			iowrite32be(hash & ~HASH_CTRL_MCAST_EN,
+				    &regs->hashtable_ctrl);
+	}
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.h b/drivers/net/ethernet/freescale/fman/fman_memac.h
index c4a66469a907..b5a50338ed9a 100644
--- a/drivers/net/ethernet/freescale/fman/fman_memac.h
+++ b/drivers/net/ethernet/freescale/fman/fman_memac.h
@@ -57,5 +57,6 @@ int memac_set_exception(struct fman_mac *memac,
 			enum fman_mac_exceptions exception, bool enable);
 int memac_add_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr);
 int memac_del_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr);
+int memac_set_allmulti(struct fman_mac *memac, bool enable);
 
 #endif /* __MEMAC_H */
diff --git a/drivers/net/ethernet/freescale/fman/fman_tgec.c b/drivers/net/ethernet/freescale/fman/fman_tgec.c
index 4b0f3a50b293..284735d4ebe9 100644
--- a/drivers/net/ethernet/freescale/fman/fman_tgec.c
+++ b/drivers/net/ethernet/freescale/fman/fman_tgec.c
@@ -217,6 +217,7 @@ struct fman_mac {
 	struct tgec_cfg *cfg;
 	void *fm;
 	struct fman_rev_info fm_rev_info;
+	bool allmulti_enabled;
 };
 
 static void set_mac_address(struct tgec_regs __iomem *regs, u8 *adr)
@@ -564,6 +565,29 @@ int tgec_add_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr)
 	return 0;
 }
 
+int tgec_set_allmulti(struct fman_mac *tgec, bool enable)
+{
+	u32 entry;
+	struct tgec_regs __iomem *regs = tgec->regs;
+
+	if (!is_init_done(tgec->cfg))
+		return -EINVAL;
+
+	if (enable) {
+		for (entry = 0; entry < TGEC_HASH_TABLE_SIZE; entry++)
+			iowrite32be(entry | TGEC_HASH_MCAST_EN,
+				    &regs->hashtable_ctrl);
+	} else {
+		for (entry = 0; entry < TGEC_HASH_TABLE_SIZE; entry++)
+			iowrite32be(entry & ~TGEC_HASH_MCAST_EN,
+				    &regs->hashtable_ctrl);
+	}
+
+	tgec->allmulti_enabled = enable;
+
+	return 0;
+}
+
 int tgec_del_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr)
 {
 	struct tgec_regs __iomem *regs = tgec->regs;
@@ -591,9 +615,12 @@ int tgec_del_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr)
 			break;
 		}
 	}
-	if (list_empty(&tgec->multicast_addr_hash->lsts[hash]))
-		iowrite32be((hash & ~TGEC_HASH_MCAST_EN),
-			    &regs->hashtable_ctrl);
+
+	if (!tgec->allmulti_enabled) {
+		if (list_empty(&tgec->multicast_addr_hash->lsts[hash]))
+			iowrite32be((hash & ~TGEC_HASH_MCAST_EN),
+				    &regs->hashtable_ctrl);
+	}
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/freescale/fman/fman_tgec.h b/drivers/net/ethernet/freescale/fman/fman_tgec.h
index 514bba9f47ce..cbbd3b422a98 100644
--- a/drivers/net/ethernet/freescale/fman/fman_tgec.h
+++ b/drivers/net/ethernet/freescale/fman/fman_tgec.h
@@ -51,5 +51,6 @@ int tgec_set_exception(struct fman_mac *tgec,
 int tgec_add_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr);
 int tgec_del_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr);
 int tgec_get_version(struct fman_mac *tgec, u32 *mac_version);
+int tgec_set_allmulti(struct fman_mac *tgec, bool enable);
 
 #endif /* __TGEC_H */
diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c
index 88c0a0636b44..4829dcd9e077 100644
--- a/drivers/net/ethernet/freescale/fman/mac.c
+++ b/drivers/net/ethernet/freescale/fman/mac.c
@@ -470,6 +470,7 @@ static void setup_dtsec(struct mac_device *mac_dev)
 	mac_dev->set_tx_pause		= dtsec_set_tx_pause_frames;
 	mac_dev->set_rx_pause		= dtsec_accept_rx_pause_frames;
 	mac_dev->set_exception		= dtsec_set_exception;
+	mac_dev->set_allmulti		= dtsec_set_allmulti;
 	mac_dev->set_multi		= set_multi;
 	mac_dev->start			= start;
 	mac_dev->stop			= stop;
@@ -488,6 +489,7 @@ static void setup_tgec(struct mac_device *mac_dev)
 	mac_dev->set_tx_pause		= tgec_set_tx_pause_frames;
 	mac_dev->set_rx_pause		= tgec_accept_rx_pause_frames;
 	mac_dev->set_exception		= tgec_set_exception;
+	mac_dev->set_allmulti		= tgec_set_allmulti;
 	mac_dev->set_multi		= set_multi;
 	mac_dev->start			= start;
 	mac_dev->stop			= stop;
@@ -506,6 +508,7 @@ static void setup_memac(struct mac_device *mac_dev)
 	mac_dev->set_tx_pause		= memac_set_tx_pause_frames;
 	mac_dev->set_rx_pause		= memac_accept_rx_pause_frames;
 	mac_dev->set_exception		= memac_set_exception;
+	mac_dev->set_allmulti		= memac_set_allmulti;
 	mac_dev->set_multi		= set_multi;
 	mac_dev->start			= start;
 	mac_dev->stop			= stop;
diff --git a/drivers/net/ethernet/freescale/fman/mac.h b/drivers/net/ethernet/freescale/fman/mac.h
index eefb3357e304..b520cec120ee 100644
--- a/drivers/net/ethernet/freescale/fman/mac.h
+++ b/drivers/net/ethernet/freescale/fman/mac.h
@@ -59,6 +59,7 @@ struct mac_device {
 	bool rx_pause_active;
 	bool tx_pause_active;
 	bool promisc;
+	bool allmulti;
 
 	int (*init)(struct mac_device *mac_dev);
 	int (*start)(struct mac_device *mac_dev);
@@ -66,6 +67,7 @@ struct mac_device {
 	void (*adjust_link)(struct mac_device *mac_dev);
 	int (*set_promisc)(struct fman_mac *mac_dev, bool enable);
 	int (*change_addr)(struct fman_mac *mac_dev, enet_addr_t *enet_addr);
+	int (*set_allmulti)(struct fman_mac *mac_dev, bool enable);
 	int (*set_multi)(struct net_device *net_dev,
 			 struct mac_device *mac_dev);
 	int (*set_rx_pause)(struct fman_mac *mac_dev, bool en);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
index 3e9203ea42a6..519e2bd6aa60 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
@@ -11,6 +11,7 @@
 
 enum HCLGE_MBX_OPCODE {
 	HCLGE_MBX_RESET = 0x01,		/* (VF -> PF) assert reset */
+	HCLGE_MBX_ASSERTING_RESET,	/* (PF -> VF) PF is asserting reset*/
 	HCLGE_MBX_SET_UNICAST,		/* (VF -> PF) set UC addr */
 	HCLGE_MBX_SET_MULTICAST,	/* (VF -> PF) set MC addr */
 	HCLGE_MBX_SET_VLAN,		/* (VF -> PF) set VLAN */
@@ -57,6 +58,8 @@ enum hclge_mbx_vlan_cfg_subcode {
 
 #define HCLGE_MBX_MAX_MSG_SIZE	16
 #define HCLGE_MBX_MAX_RESP_DATA_SIZE	8
+#define HCLGE_MBX_RING_MAP_BASIC_MSG_NUM	3
+#define HCLGE_MBX_RING_NODE_VARIABLE_NUM	3
 
 struct hclgevf_mbx_resp_status {
 	struct mutex mbx_mutex; /* protects against contending sync cmd resp */
@@ -83,6 +86,21 @@ struct hclge_mbx_pf_to_vf_cmd {
 	u16 msg[8];
 };
 
+/* used by VF to store the received Async responses from PF */
+struct hclgevf_mbx_arq_ring {
+#define HCLGE_MBX_MAX_ARQ_MSG_SIZE	8
+#define HCLGE_MBX_MAX_ARQ_MSG_NUM	1024
+	struct hclgevf_dev *hdev;
+	u32 head;
+	u32 tail;
+	u32 count;
+	u16 msg_q[HCLGE_MBX_MAX_ARQ_MSG_NUM][HCLGE_MBX_MAX_ARQ_MSG_SIZE];
+};
+
 #define hclge_mbx_ring_ptr_move_crq(crq) \
 	(crq->next_to_use = (crq->next_to_use + 1) % crq->desc_num)
+#define hclge_mbx_tail_ptr_move_arq(arq) \
+	(arq.tail = (arq.tail + 1) % HCLGE_MBX_MAX_ARQ_MSG_SIZE)
+#define hclge_mbx_head_ptr_move_arq(arq) \
+		(arq.head = (arq.head + 1) % HCLGE_MBX_MAX_ARQ_MSG_SIZE)
 #endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index fd06bc78c58e..37ec1b3286c6 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -118,6 +118,8 @@ enum hnae3_reset_notify_type {
 };
 
 enum hnae3_reset_type {
+	HNAE3_VF_RESET,
+	HNAE3_VF_FULL_RESET,
 	HNAE3_FUNC_RESET,
 	HNAE3_CORE_RESET,
 	HNAE3_GLOBAL_RESET,
@@ -265,6 +267,8 @@ struct hnae3_ae_dev {
  *   Get tc size of handle
  * get_vector()
  *   Get vector number and vector information
+ * put_vector()
+ *   Put the vector in hdev
  * map_ring_to_vector()
  *   Map rings to vector
  * unmap_ring_from_vector()
@@ -336,7 +340,8 @@ struct hnae3_ae_ops {
 				   u32 *tx_usecs_high, u32 *rx_usecs_high);
 
 	void (*get_mac_addr)(struct hnae3_handle *handle, u8 *p);
-	int (*set_mac_addr)(struct hnae3_handle *handle, void *p);
+	int (*set_mac_addr)(struct hnae3_handle *handle, void *p,
+			    bool is_first);
 	int (*add_uc_addr)(struct hnae3_handle *handle,
 			   const unsigned char *addr);
 	int (*rm_uc_addr)(struct hnae3_handle *handle,
@@ -375,6 +380,7 @@ struct hnae3_ae_ops {
 
 	int (*get_vector)(struct hnae3_handle *handle, u16 vector_num,
 			  struct hnae3_vector_info *vector_info);
+	int (*put_vector)(struct hnae3_handle *handle, int vector_num);
 	int (*map_ring_to_vector)(struct hnae3_handle *handle,
 				  int vector_num,
 				  struct hnae3_ring_chain_node *vr_chain);
@@ -396,8 +402,7 @@ struct hnae3_ae_ops {
 	int (*set_vf_vlan_filter)(struct hnae3_handle *handle, int vfid,
 				  u16 vlan, u8 qos, __be16 proto);
 	int (*enable_hw_strip_rxvtag)(struct hnae3_handle *handle, bool enable);
-	void (*reset_event)(struct hnae3_handle *handle,
-			    enum hnae3_reset_type reset);
+	void (*reset_event)(struct hnae3_handle *handle);
 	void (*get_channels)(struct hnae3_handle *handle,
 			     struct ethtool_channels *ch);
 	void (*get_tqps_and_rss_info)(struct hnae3_handle *h,
@@ -407,6 +412,10 @@ struct hnae3_ae_ops {
 				 u32 *flowctrl_adv);
 	int (*set_led_id)(struct hnae3_handle *handle,
 			  enum ethtool_phys_id_state status);
+	void (*get_link_mode)(struct hnae3_handle *handle,
+			      unsigned long *supported,
+			      unsigned long *advertising);
+	void (*get_port_type)(struct hnae3_handle *handle, u8 *port_type);
 };
 
 struct hnae3_dcb_ops {
@@ -487,6 +496,9 @@ struct hnae3_handle {
 	struct hnae3_ae_algo *ae_algo;  /* the class who provides this handle */
 	u64 flags; /* Indicate the capabilities for this handle*/
 
+	unsigned long last_reset_time;
+	enum hnae3_reset_type reset_level;
+
 	union {
 		struct net_device *netdev; /* first member */
 		struct hnae3_knic_private_info kinfo;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 601b6295d3f8..40a3eb70629e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -168,8 +168,8 @@ void hns3_set_vector_coalesce_rl(struct hns3_enet_tqp_vector *tqp_vector,
 	 * GL and RL(Rate Limiter) are 2 ways to acheive interrupt coalescing
 	 */
 
-	if (rl_reg > 0 && !tqp_vector->tx_group.gl_adapt_enable &&
-	    !tqp_vector->rx_group.gl_adapt_enable)
+	if (rl_reg > 0 && !tqp_vector->tx_group.coal.gl_adapt_enable &&
+	    !tqp_vector->rx_group.coal.gl_adapt_enable)
 		/* According to the hardware, the range of rl_reg is
 		 * 0-59 and the unit is 4.
 		 */
@@ -205,23 +205,30 @@ static void hns3_vector_gl_rl_init(struct hns3_enet_tqp_vector *tqp_vector,
 	 */
 
 	/* Default: enable interrupt coalescing self-adaptive and GL */
-	tqp_vector->tx_group.gl_adapt_enable = 1;
-	tqp_vector->rx_group.gl_adapt_enable = 1;
+	tqp_vector->tx_group.coal.gl_adapt_enable = 1;
+	tqp_vector->rx_group.coal.gl_adapt_enable = 1;
 
-	tqp_vector->tx_group.int_gl = HNS3_INT_GL_50K;
-	tqp_vector->rx_group.int_gl = HNS3_INT_GL_50K;
-
-	hns3_set_vector_coalesce_tx_gl(tqp_vector,
-				       tqp_vector->tx_group.int_gl);
-	hns3_set_vector_coalesce_rx_gl(tqp_vector,
-				       tqp_vector->rx_group.int_gl);
+	tqp_vector->tx_group.coal.int_gl = HNS3_INT_GL_50K;
+	tqp_vector->rx_group.coal.int_gl = HNS3_INT_GL_50K;
 
 	/* Default: disable RL */
 	h->kinfo.int_rl_setting = 0;
-	hns3_set_vector_coalesce_rl(tqp_vector, h->kinfo.int_rl_setting);
 
-	tqp_vector->rx_group.flow_level = HNS3_FLOW_LOW;
-	tqp_vector->tx_group.flow_level = HNS3_FLOW_LOW;
+	tqp_vector->int_adapt_down = HNS3_INT_ADAPT_DOWN_START;
+	tqp_vector->rx_group.coal.flow_level = HNS3_FLOW_LOW;
+	tqp_vector->tx_group.coal.flow_level = HNS3_FLOW_LOW;
+}
+
+static void hns3_vector_gl_rl_init_hw(struct hns3_enet_tqp_vector *tqp_vector,
+				      struct hns3_nic_priv *priv)
+{
+	struct hnae3_handle *h = priv->ae_handle;
+
+	hns3_set_vector_coalesce_tx_gl(tqp_vector,
+				       tqp_vector->tx_group.coal.int_gl);
+	hns3_set_vector_coalesce_rx_gl(tqp_vector,
+				       tqp_vector->rx_group.coal.int_gl);
+	hns3_set_vector_coalesce_rl(tqp_vector, h->kinfo.int_rl_setting);
 }
 
 static int hns3_nic_set_real_num_queue(struct net_device *netdev)
@@ -249,6 +256,16 @@ static int hns3_nic_set_real_num_queue(struct net_device *netdev)
 	return 0;
 }
 
+static u16 hns3_get_max_available_channels(struct hnae3_handle *h)
+{
+	u16 free_tqps, max_rss_size, max_tqps;
+
+	h->ae_algo->ops->get_tqps_and_rss_info(h, &free_tqps, &max_rss_size);
+	max_tqps = h->kinfo.num_tc * max_rss_size;
+
+	return min_t(u16, max_tqps, (free_tqps + h->kinfo.num_tqps));
+}
+
 static int hns3_nic_net_up(struct net_device *netdev)
 {
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
@@ -303,7 +320,7 @@ static int hns3_nic_net_open(struct net_device *netdev)
 		return ret;
 	}
 
-	priv->last_reset_time = jiffies;
+	priv->ae_handle->last_reset_time = jiffies;
 	return 0;
 }
 
@@ -1104,7 +1121,7 @@ static int hns3_nic_net_set_mac_address(struct net_device *netdev, void *p)
 	if (!mac_addr || !is_valid_ether_addr((const u8 *)mac_addr->sa_data))
 		return -EADDRNOTAVAIL;
 
-	ret = h->ae_algo->ops->set_mac_addr(h, mac_addr->sa_data);
+	ret = h->ae_algo->ops->set_mac_addr(h, mac_addr->sa_data, false);
 	if (ret) {
 		netdev_err(netdev, "set_mac_address fail, ret=%d!\n", ret);
 		return ret;
@@ -1388,11 +1405,15 @@ static int hns3_vlan_rx_add_vid(struct net_device *netdev,
 				__be16 proto, u16 vid)
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
 	int ret = -EIO;
 
 	if (h->ae_algo->ops->set_vlan_filter)
 		ret = h->ae_algo->ops->set_vlan_filter(h, proto, vid, false);
 
+	if (!ret)
+		set_bit(vid, priv->active_vlans);
+
 	return ret;
 }
 
@@ -1400,14 +1421,32 @@ static int hns3_vlan_rx_kill_vid(struct net_device *netdev,
 				 __be16 proto, u16 vid)
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
 	int ret = -EIO;
 
 	if (h->ae_algo->ops->set_vlan_filter)
 		ret = h->ae_algo->ops->set_vlan_filter(h, proto, vid, true);
 
+	if (!ret)
+		clear_bit(vid, priv->active_vlans);
+
 	return ret;
 }
 
+static void hns3_restore_vlan(struct net_device *netdev)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	u16 vid;
+	int ret;
+
+	for_each_set_bit(vid, priv->active_vlans, VLAN_N_VID) {
+		ret = hns3_vlan_rx_add_vid(netdev, htons(ETH_P_8021Q), vid);
+		if (ret)
+			netdev_warn(netdev, "Restore vlan: %d filter, ret:%d\n",
+				    vid, ret);
+	}
+}
+
 static int hns3_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
 				u8 qos, __be16 vlan_proto)
 {
@@ -1504,7 +1543,6 @@ static bool hns3_get_tx_timeo_queue_info(struct net_device *ndev)
 static void hns3_nic_net_timeout(struct net_device *ndev)
 {
 	struct hns3_nic_priv *priv = netdev_priv(ndev);
-	unsigned long last_reset_time = priv->last_reset_time;
 	struct hnae3_handle *h = priv->ae_handle;
 
 	if (!hns3_get_tx_timeo_queue_info(ndev))
@@ -1512,24 +1550,12 @@ static void hns3_nic_net_timeout(struct net_device *ndev)
 
 	priv->tx_timeout_count++;
 
-	/* This timeout is far away enough from last timeout,
-	 * if timeout again,set the reset type to PF reset
-	 */
-	if (time_after(jiffies, (last_reset_time + 20 * HZ)))
-		priv->reset_level = HNAE3_FUNC_RESET;
-
-	/* Don't do any new action before the next timeout */
-	else if (time_before(jiffies, (last_reset_time + ndev->watchdog_timeo)))
+	if (time_before(jiffies, (h->last_reset_time + ndev->watchdog_timeo)))
 		return;
 
-	priv->last_reset_time = jiffies;
-
+	/* request the reset */
 	if (h->ae_algo->ops->reset_event)
-		h->ae_algo->ops->reset_event(h, priv->reset_level);
-
-	priv->reset_level++;
-	if (priv->reset_level > HNAE3_GLOBAL_RESET)
-		priv->reset_level = HNAE3_GLOBAL_RESET;
+		h->ae_algo->ops->reset_event(h);
 }
 
 static const struct net_device_ops hns3_nic_netdev_ops = {
@@ -2064,15 +2090,13 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i,
 	desc = &ring->desc[ring->next_to_clean];
 	size = le16_to_cpu(desc->rx.size);
 
-	if (twobufs) {
-		truesize = hnae_buf_size(ring);
-	} else {
-		truesize = ALIGN(size, L1_CACHE_BYTES);
+	truesize = hnae_buf_size(ring);
+
+	if (!twobufs)
 		last_offset = hnae_page_size(ring) - hnae_buf_size(ring);
-	}
 
 	skb_add_rx_frag(skb, i, desc_cb->priv, desc_cb->page_offset + pull_len,
-			size - pull_len, truesize - pull_len);
+			size - pull_len, truesize);
 
 	 /* Avoid re-using remote pages,flag default unreuse */
 	if (unlikely(page_to_nid(desc_cb->priv) != numa_node_id()))
@@ -2369,20 +2393,20 @@ out:
 
 static bool hns3_get_new_int_gl(struct hns3_enet_ring_group *ring_group)
 {
-#define HNS3_RX_ULTRA_PACKET_RATE 40000
+	struct hns3_enet_tqp_vector *tqp_vector =
+					ring_group->ring->tqp_vector;
 	enum hns3_flow_level_range new_flow_level;
-	struct hns3_enet_tqp_vector *tqp_vector;
-	int packets_per_secs;
-	int bytes_per_usecs;
+	int packets_per_msecs;
+	int bytes_per_msecs;
+	u32 time_passed_ms;
 	u16 new_int_gl;
-	int usecs;
 
-	if (!ring_group->int_gl)
+	if (!ring_group->coal.int_gl || !tqp_vector->last_jiffies)
 		return false;
 
 	if (ring_group->total_packets == 0) {
-		ring_group->int_gl = HNS3_INT_GL_50K;
-		ring_group->flow_level = HNS3_FLOW_LOW;
+		ring_group->coal.int_gl = HNS3_INT_GL_50K;
+		ring_group->coal.flow_level = HNS3_FLOW_LOW;
 		return true;
 	}
 
@@ -2392,35 +2416,46 @@ static bool hns3_get_new_int_gl(struct hns3_enet_ring_group *ring_group)
 	 * 20-1249MB/s high      (18000 ints/s)
 	 * > 40000pps  ultra     (8000 ints/s)
 	 */
-	new_flow_level = ring_group->flow_level;
-	new_int_gl = ring_group->int_gl;
-	tqp_vector = ring_group->ring->tqp_vector;
-	usecs = (ring_group->int_gl << 1);
-	bytes_per_usecs = ring_group->total_bytes / usecs;
-	/* 1000000 microseconds */
-	packets_per_secs = ring_group->total_packets * 1000000 / usecs;
+	new_flow_level = ring_group->coal.flow_level;
+	new_int_gl = ring_group->coal.int_gl;
+	time_passed_ms =
+		jiffies_to_msecs(jiffies - tqp_vector->last_jiffies);
+
+	if (!time_passed_ms)
+		return false;
+
+	do_div(ring_group->total_packets, time_passed_ms);
+	packets_per_msecs = ring_group->total_packets;
+
+	do_div(ring_group->total_bytes, time_passed_ms);
+	bytes_per_msecs = ring_group->total_bytes;
+
+#define HNS3_RX_LOW_BYTE_RATE 10000
+#define HNS3_RX_MID_BYTE_RATE 20000
 
 	switch (new_flow_level) {
 	case HNS3_FLOW_LOW:
-		if (bytes_per_usecs > 10)
+		if (bytes_per_msecs > HNS3_RX_LOW_BYTE_RATE)
 			new_flow_level = HNS3_FLOW_MID;
 		break;
 	case HNS3_FLOW_MID:
-		if (bytes_per_usecs > 20)
+		if (bytes_per_msecs > HNS3_RX_MID_BYTE_RATE)
 			new_flow_level = HNS3_FLOW_HIGH;
-		else if (bytes_per_usecs <= 10)
+		else if (bytes_per_msecs <= HNS3_RX_LOW_BYTE_RATE)
 			new_flow_level = HNS3_FLOW_LOW;
 		break;
 	case HNS3_FLOW_HIGH:
 	case HNS3_FLOW_ULTRA:
 	default:
-		if (bytes_per_usecs <= 20)
+		if (bytes_per_msecs <= HNS3_RX_MID_BYTE_RATE)
 			new_flow_level = HNS3_FLOW_MID;
 		break;
 	}
 
-	if ((packets_per_secs > HNS3_RX_ULTRA_PACKET_RATE) &&
-	    (&tqp_vector->rx_group == ring_group))
+#define HNS3_RX_ULTRA_PACKET_RATE 40
+
+	if (packets_per_msecs > HNS3_RX_ULTRA_PACKET_RATE &&
+	    &tqp_vector->rx_group == ring_group)
 		new_flow_level = HNS3_FLOW_ULTRA;
 
 	switch (new_flow_level) {
@@ -2442,9 +2477,9 @@ static bool hns3_get_new_int_gl(struct hns3_enet_ring_group *ring_group)
 
 	ring_group->total_bytes = 0;
 	ring_group->total_packets = 0;
-	ring_group->flow_level = new_flow_level;
-	if (new_int_gl != ring_group->int_gl) {
-		ring_group->int_gl = new_int_gl;
+	ring_group->coal.flow_level = new_flow_level;
+	if (new_int_gl != ring_group->coal.int_gl) {
+		ring_group->coal.int_gl = new_int_gl;
 		return true;
 	}
 	return false;
@@ -2456,19 +2491,27 @@ static void hns3_update_new_int_gl(struct hns3_enet_tqp_vector *tqp_vector)
 	struct hns3_enet_ring_group *tx_group = &tqp_vector->tx_group;
 	bool rx_update, tx_update;
 
-	if (rx_group->gl_adapt_enable) {
+	if (tqp_vector->int_adapt_down > 0) {
+		tqp_vector->int_adapt_down--;
+		return;
+	}
+
+	if (rx_group->coal.gl_adapt_enable) {
 		rx_update = hns3_get_new_int_gl(rx_group);
 		if (rx_update)
 			hns3_set_vector_coalesce_rx_gl(tqp_vector,
-						       rx_group->int_gl);
+						       rx_group->coal.int_gl);
 	}
 
-	if (tx_group->gl_adapt_enable) {
+	if (tx_group->coal.gl_adapt_enable) {
 		tx_update = hns3_get_new_int_gl(&tqp_vector->tx_group);
 		if (tx_update)
 			hns3_set_vector_coalesce_tx_gl(tqp_vector,
-						       tx_group->int_gl);
+						       tx_group->coal.int_gl);
 	}
+
+	tqp_vector->last_jiffies = jiffies;
+	tqp_vector->int_adapt_down = HNS3_INT_ADAPT_DOWN_START;
 }
 
 static int hns3_nic_common_poll(struct napi_struct *napi, int budget)
@@ -2615,32 +2658,18 @@ static int hns3_nic_init_vector_data(struct hns3_nic_priv *priv)
 	struct hnae3_ring_chain_node vector_ring_chain;
 	struct hnae3_handle *h = priv->ae_handle;
 	struct hns3_enet_tqp_vector *tqp_vector;
-	struct hnae3_vector_info *vector;
-	struct pci_dev *pdev = h->pdev;
-	u16 tqp_num = h->kinfo.num_tqps;
-	u16 vector_num;
 	int ret = 0;
 	u16 i;
 
-	/* RSS size, cpu online and vector_num should be the same */
-	/* Should consider 2p/4p later */
-	vector_num = min_t(u16, num_online_cpus(), tqp_num);
-	vector = devm_kcalloc(&pdev->dev, vector_num, sizeof(*vector),
-			      GFP_KERNEL);
-	if (!vector)
-		return -ENOMEM;
-
-	vector_num = h->ae_algo->ops->get_vector(h, vector_num, vector);
-
-	priv->vector_num = vector_num;
-	priv->tqp_vector = (struct hns3_enet_tqp_vector *)
-		devm_kcalloc(&pdev->dev, vector_num, sizeof(*priv->tqp_vector),
-			     GFP_KERNEL);
-	if (!priv->tqp_vector)
-		return -ENOMEM;
+	for (i = 0; i < priv->vector_num; i++) {
+		tqp_vector = &priv->tqp_vector[i];
+		hns3_vector_gl_rl_init_hw(tqp_vector, priv);
+		tqp_vector->num_tqps = 0;
+	}
 
-	for (i = 0; i < tqp_num; i++) {
-		u16 vector_i = i % vector_num;
+	for (i = 0; i < h->kinfo.num_tqps; i++) {
+		u16 vector_i = i % priv->vector_num;
+		u16 tqp_num = h->kinfo.num_tqps;
 
 		tqp_vector = &priv->tqp_vector[vector_i];
 
@@ -2650,52 +2679,94 @@ static int hns3_nic_init_vector_data(struct hns3_nic_priv *priv)
 		hns3_add_ring_to_group(&tqp_vector->rx_group,
 				       priv->ring_data[i + tqp_num].ring);
 
-		tqp_vector->idx = vector_i;
-		tqp_vector->mask_addr = vector[vector_i].io_addr;
-		tqp_vector->vector_irq = vector[vector_i].vector;
-		tqp_vector->num_tqps++;
-
 		priv->ring_data[i].ring->tqp_vector = tqp_vector;
 		priv->ring_data[i + tqp_num].ring->tqp_vector = tqp_vector;
+		tqp_vector->num_tqps++;
 	}
 
-	for (i = 0; i < vector_num; i++) {
+	for (i = 0; i < priv->vector_num; i++) {
 		tqp_vector = &priv->tqp_vector[i];
 
 		tqp_vector->rx_group.total_bytes = 0;
 		tqp_vector->rx_group.total_packets = 0;
 		tqp_vector->tx_group.total_bytes = 0;
 		tqp_vector->tx_group.total_packets = 0;
-		hns3_vector_gl_rl_init(tqp_vector, priv);
 		tqp_vector->handle = h;
 
 		ret = hns3_get_vector_ring_chain(tqp_vector,
 						 &vector_ring_chain);
 		if (ret)
-			goto out;
+			return ret;
 
 		ret = h->ae_algo->ops->map_ring_to_vector(h,
 			tqp_vector->vector_irq, &vector_ring_chain);
-		if (ret)
-			goto out;
 
 		hns3_free_vector_ring_chain(tqp_vector, &vector_ring_chain);
 
+		if (ret)
+			return ret;
+
 		netif_napi_add(priv->netdev, &tqp_vector->napi,
 			       hns3_nic_common_poll, NAPI_POLL_WEIGHT);
 	}
 
+	return 0;
+}
+
+static int hns3_nic_alloc_vector_data(struct hns3_nic_priv *priv)
+{
+	struct hnae3_handle *h = priv->ae_handle;
+	struct hns3_enet_tqp_vector *tqp_vector;
+	struct hnae3_vector_info *vector;
+	struct pci_dev *pdev = h->pdev;
+	u16 tqp_num = h->kinfo.num_tqps;
+	u16 vector_num;
+	int ret = 0;
+	u16 i;
+
+	/* RSS size, cpu online and vector_num should be the same */
+	/* Should consider 2p/4p later */
+	vector_num = min_t(u16, num_online_cpus(), tqp_num);
+	vector = devm_kcalloc(&pdev->dev, vector_num, sizeof(*vector),
+			      GFP_KERNEL);
+	if (!vector)
+		return -ENOMEM;
+
+	vector_num = h->ae_algo->ops->get_vector(h, vector_num, vector);
+
+	priv->vector_num = vector_num;
+	priv->tqp_vector = (struct hns3_enet_tqp_vector *)
+		devm_kcalloc(&pdev->dev, vector_num, sizeof(*priv->tqp_vector),
+			     GFP_KERNEL);
+	if (!priv->tqp_vector) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	for (i = 0; i < priv->vector_num; i++) {
+		tqp_vector = &priv->tqp_vector[i];
+		tqp_vector->idx = i;
+		tqp_vector->mask_addr = vector[i].io_addr;
+		tqp_vector->vector_irq = vector[i].vector;
+		hns3_vector_gl_rl_init(tqp_vector, priv);
+	}
+
 out:
 	devm_kfree(&pdev->dev, vector);
 	return ret;
 }
 
+static void hns3_clear_ring_group(struct hns3_enet_ring_group *group)
+{
+	group->ring = NULL;
+	group->count = 0;
+}
+
 static int hns3_nic_uninit_vector_data(struct hns3_nic_priv *priv)
 {
 	struct hnae3_ring_chain_node vector_ring_chain;
 	struct hnae3_handle *h = priv->ae_handle;
 	struct hns3_enet_tqp_vector *tqp_vector;
-	struct pci_dev *pdev = h->pdev;
 	int i, ret;
 
 	for (i = 0; i < priv->vector_num; i++) {
@@ -2711,6 +2782,10 @@ static int hns3_nic_uninit_vector_data(struct hns3_nic_priv *priv)
 		if (ret)
 			return ret;
 
+		ret = h->ae_algo->ops->put_vector(h, tqp_vector->vector_irq);
+		if (ret)
+			return ret;
+
 		hns3_free_vector_ring_chain(tqp_vector, &vector_ring_chain);
 
 		if (priv->tqp_vector[i].irq_init_flag == HNS3_VECTOR_INITED) {
@@ -2722,12 +2797,30 @@ static int hns3_nic_uninit_vector_data(struct hns3_nic_priv *priv)
 		}
 
 		priv->ring_data[i].ring->irq_init_flag = HNS3_VECTOR_NOT_INITED;
-
+		hns3_clear_ring_group(&tqp_vector->rx_group);
+		hns3_clear_ring_group(&tqp_vector->tx_group);
 		netif_napi_del(&priv->tqp_vector[i].napi);
 	}
 
-	devm_kfree(&pdev->dev, priv->tqp_vector);
+	return 0;
+}
 
+static int hns3_nic_dealloc_vector_data(struct hns3_nic_priv *priv)
+{
+	struct hnae3_handle *h = priv->ae_handle;
+	struct pci_dev *pdev = h->pdev;
+	int i, ret;
+
+	for (i = 0; i < priv->vector_num; i++) {
+		struct hns3_enet_tqp_vector *tqp_vector;
+
+		tqp_vector = &priv->tqp_vector[i];
+		ret = h->ae_algo->ops->put_vector(h, tqp_vector->vector_irq);
+		if (ret)
+			return ret;
+	}
+
+	devm_kfree(&pdev->dev, priv->tqp_vector);
 	return 0;
 }
 
@@ -2957,13 +3050,8 @@ int hns3_uninit_all_ring(struct hns3_nic_priv *priv)
 			h->ae_algo->ops->reset_queue(h, i);
 
 		hns3_fini_ring(priv->ring_data[i].ring);
-		devm_kfree(priv->dev, priv->ring_data[i].ring);
 		hns3_fini_ring(priv->ring_data[i + h->kinfo.num_tqps].ring);
-		devm_kfree(priv->dev,
-			   priv->ring_data[i + h->kinfo.num_tqps].ring);
 	}
-	devm_kfree(priv->dev, priv->ring_data);
-
 	return 0;
 }
 
@@ -2987,7 +3075,7 @@ static void hns3_init_mac_addr(struct net_device *netdev)
 	}
 
 	if (h->ae_algo->ops->set_mac_addr)
-		h->ae_algo->ops->set_mac_addr(h, netdev->dev_addr);
+		h->ae_algo->ops->set_mac_addr(h, netdev->dev_addr, true);
 
 }
 
@@ -3013,7 +3101,7 @@ static int hns3_client_init(struct hnae3_handle *handle)
 	int ret;
 
 	netdev = alloc_etherdev_mq(sizeof(struct hns3_nic_priv),
-				   handle->kinfo.num_tqps);
+				   hns3_get_max_available_channels(handle));
 	if (!netdev)
 		return -ENOMEM;
 
@@ -3021,8 +3109,8 @@ static int hns3_client_init(struct hnae3_handle *handle)
 	priv->dev = &pdev->dev;
 	priv->netdev = netdev;
 	priv->ae_handle = handle;
-	priv->last_reset_time = jiffies;
-	priv->reset_level = HNAE3_FUNC_RESET;
+	priv->ae_handle->reset_level = HNAE3_NONE_RESET;
+	priv->ae_handle->last_reset_time = jiffies;
 	priv->tx_timeout_count = 0;
 
 	handle->kinfo.netdev = netdev;
@@ -3048,6 +3136,12 @@ static int hns3_client_init(struct hnae3_handle *handle)
 		goto out_get_ring_cfg;
 	}
 
+	ret = hns3_nic_alloc_vector_data(priv);
+	if (ret) {
+		ret = -ENOMEM;
+		goto out_alloc_vector_data;
+	}
+
 	ret = hns3_nic_init_vector_data(priv);
 	if (ret) {
 		ret = -ENOMEM;
@@ -3076,8 +3170,10 @@ static int hns3_client_init(struct hnae3_handle *handle)
 out_reg_netdev_fail:
 out_init_ring_data:
 	(void)hns3_nic_uninit_vector_data(priv);
-	priv->ring_data = NULL;
 out_init_vector_data:
+	hns3_nic_dealloc_vector_data(priv);
+out_alloc_vector_data:
+	priv->ring_data = NULL;
 out_get_ring_cfg:
 	priv->ae_handle = NULL;
 	free_netdev(netdev);
@@ -3097,10 +3193,16 @@ static void hns3_client_uninit(struct hnae3_handle *handle, bool reset)
 	if (ret)
 		netdev_err(netdev, "uninit vector error\n");
 
+	ret = hns3_nic_dealloc_vector_data(priv);
+	if (ret)
+		netdev_err(netdev, "dealloc vector error\n");
+
 	ret = hns3_uninit_all_ring(priv);
 	if (ret)
 		netdev_err(netdev, "uninit ring error\n");
 
+	hns3_put_ring_config(priv);
+
 	priv->ring_data = NULL;
 
 	free_netdev(netdev);
@@ -3240,7 +3342,6 @@ static int hns3_reset_notify_down_enet(struct hnae3_handle *handle)
 static int hns3_reset_notify_up_enet(struct hnae3_handle *handle)
 {
 	struct hnae3_knic_private_info *kinfo = &handle->kinfo;
-	struct hns3_nic_priv *priv = netdev_priv(kinfo->netdev);
 	int ret = 0;
 
 	if (netif_running(kinfo->netdev)) {
@@ -3250,8 +3351,7 @@ static int hns3_reset_notify_up_enet(struct hnae3_handle *handle)
 				   "hns net up fail, ret=%d!\n", ret);
 			return ret;
 		}
-
-		priv->last_reset_time = jiffies;
+		handle->last_reset_time = jiffies;
 	}
 
 	return ret;
@@ -3263,11 +3363,14 @@ static int hns3_reset_notify_init_enet(struct hnae3_handle *handle)
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
 	int ret;
 
-	priv->reset_level = 1;
 	hns3_init_mac_addr(netdev);
 	hns3_nic_set_rx_mode(netdev);
 	hns3_recover_hw_addr(netdev);
 
+	/* Hardware table is only clear when pf resets */
+	if (!(handle->flags & HNAE3_SUPPORT_VF))
+		hns3_restore_vlan(netdev);
+
 	/* Carrier off reporting is important to ethtool even BEFORE open */
 	netif_carrier_off(netdev);
 
@@ -3306,6 +3409,8 @@ static int hns3_reset_notify_uninit_enet(struct hnae3_handle *handle)
 	if (ret)
 		netdev_err(netdev, "uninit ring error\n");
 
+	hns3_put_ring_config(priv);
+
 	priv->ring_data = NULL;
 
 	return ret;
@@ -3336,18 +3441,24 @@ static int hns3_reset_notify(struct hnae3_handle *handle,
 	return ret;
 }
 
-static u16 hns3_get_max_available_channels(struct net_device *netdev)
+static void hns3_restore_coal(struct hns3_nic_priv *priv,
+			      struct hns3_enet_coalesce *tx,
+			      struct hns3_enet_coalesce *rx)
 {
-	struct hnae3_handle *h = hns3_get_handle(netdev);
-	u16 free_tqps, max_rss_size, max_tqps;
-
-	h->ae_algo->ops->get_tqps_and_rss_info(h, &free_tqps, &max_rss_size);
-	max_tqps = h->kinfo.num_tc * max_rss_size;
+	u16 vector_num = priv->vector_num;
+	int i;
 
-	return min_t(u16, max_tqps, (free_tqps + h->kinfo.num_tqps));
+	for (i = 0; i < vector_num; i++) {
+		memcpy(&priv->tqp_vector[i].tx_group.coal, tx,
+		       sizeof(struct hns3_enet_coalesce));
+		memcpy(&priv->tqp_vector[i].rx_group.coal, rx,
+		       sizeof(struct hns3_enet_coalesce));
+	}
 }
 
-static int hns3_modify_tqp_num(struct net_device *netdev, u16 new_tqp_num)
+static int hns3_modify_tqp_num(struct net_device *netdev, u16 new_tqp_num,
+			       struct hns3_enet_coalesce *tx,
+			       struct hns3_enet_coalesce *rx)
 {
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
 	struct hnae3_handle *h = hns3_get_handle(netdev);
@@ -3361,6 +3472,12 @@ static int hns3_modify_tqp_num(struct net_device *netdev, u16 new_tqp_num)
 	if (ret)
 		return ret;
 
+	ret = hns3_nic_alloc_vector_data(priv);
+	if (ret)
+		goto err_alloc_vector;
+
+	hns3_restore_coal(priv, tx, rx);
+
 	ret = hns3_nic_init_vector_data(priv);
 	if (ret)
 		goto err_uninit_vector;
@@ -3375,6 +3492,8 @@ err_put_ring:
 	hns3_put_ring_config(priv);
 err_uninit_vector:
 	hns3_nic_uninit_vector_data(priv);
+err_alloc_vector:
+	hns3_nic_dealloc_vector_data(priv);
 	return ret;
 }
 
@@ -3389,6 +3508,7 @@ int hns3_set_channels(struct net_device *netdev,
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
 	struct hnae3_handle *h = hns3_get_handle(netdev);
 	struct hnae3_knic_private_info *kinfo = &h->kinfo;
+	struct hns3_enet_coalesce tx_coal, rx_coal;
 	bool if_running = netif_running(netdev);
 	u32 new_tqp_num = ch->combined_count;
 	u16 org_tqp_num;
@@ -3397,12 +3517,12 @@ int hns3_set_channels(struct net_device *netdev,
 	if (ch->rx_count || ch->tx_count)
 		return -EINVAL;
 
-	if (new_tqp_num > hns3_get_max_available_channels(netdev) ||
+	if (new_tqp_num > hns3_get_max_available_channels(h) ||
 	    new_tqp_num < kinfo->num_tc) {
 		dev_err(&netdev->dev,
 			"Change tqps fail, the tqp range is from %d to %d",
 			kinfo->num_tc,
-			hns3_get_max_available_channels(netdev));
+			hns3_get_max_available_channels(h));
 		return -EINVAL;
 	}
 
@@ -3411,7 +3531,7 @@ int hns3_set_channels(struct net_device *netdev,
 		return 0;
 
 	if (if_running)
-		dev_close(netdev);
+		hns3_nic_net_stop(netdev);
 
 	hns3_clear_all_ring(h);
 
@@ -3422,12 +3542,26 @@ int hns3_set_channels(struct net_device *netdev,
 		goto open_netdev;
 	}
 
+	/* Changing the tqp num may also change the vector num,
+	 * ethtool only support setting and querying one coal
+	 * configuation for now, so save the vector 0' coal
+	 * configuation here in order to restore it.
+	 */
+	memcpy(&tx_coal, &priv->tqp_vector[0].tx_group.coal,
+	       sizeof(struct hns3_enet_coalesce));
+	memcpy(&rx_coal, &priv->tqp_vector[0].rx_group.coal,
+	       sizeof(struct hns3_enet_coalesce));
+
+	hns3_nic_dealloc_vector_data(priv);
+
 	hns3_uninit_all_ring(priv);
+	hns3_put_ring_config(priv);
 
 	org_tqp_num = h->kinfo.num_tqps;
-	ret = hns3_modify_tqp_num(netdev, new_tqp_num);
+	ret = hns3_modify_tqp_num(netdev, new_tqp_num, &tx_coal, &rx_coal);
 	if (ret) {
-		ret = hns3_modify_tqp_num(netdev, org_tqp_num);
+		ret = hns3_modify_tqp_num(netdev, org_tqp_num,
+					  &tx_coal, &rx_coal);
 		if (ret) {
 			/* If revert to old tqp failed, fatal error occurred */
 			dev_err(&netdev->dev,
@@ -3440,7 +3574,7 @@ int hns3_set_channels(struct net_device *netdev,
 
 open_netdev:
 	if (if_running)
-		dev_open(netdev);
+		hns3_nic_net_open(netdev);
 
 	return ret;
 }
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index 213f501b30bb..9e4cfbbf8dcd 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -10,6 +10,8 @@
 #ifndef __HNS3_ENET_H
 #define __HNS3_ENET_H
 
+#include <linux/if_vlan.h>
+
 #include "hnae3.h"
 
 extern const char hns3_driver_version[];
@@ -460,15 +462,21 @@ enum hns3_link_mode_bits {
 #define HNS3_INT_RL_MAX			0x00EC
 #define HNS3_INT_RL_ENABLE_MASK		0x40
 
+#define HNS3_INT_ADAPT_DOWN_START	100
+
+struct hns3_enet_coalesce {
+	u16 int_gl;
+	u8 gl_adapt_enable;
+	enum hns3_flow_level_range flow_level;
+};
+
 struct hns3_enet_ring_group {
 	/* array of pointers to rings */
 	struct hns3_enet_ring *ring;
 	u64 total_bytes;	/* total bytes processed this group */
 	u64 total_packets;	/* total packets processed this group */
 	u16 count;
-	enum hns3_flow_level_range flow_level;
-	u16 int_gl;
-	u8 gl_adapt_enable;
+	struct hns3_enet_coalesce coal;
 };
 
 struct hns3_enet_tqp_vector {
@@ -491,6 +499,7 @@ struct hns3_enet_tqp_vector {
 
 	/* when 0 should adjust interrupt coalesce parameter */
 	u8 int_adapt_down;
+	unsigned long last_jiffies;
 } ____cacheline_internodealigned_in_smp;
 
 enum hns3_udp_tnl_type {
@@ -523,8 +532,6 @@ struct hns3_nic_priv {
 	/* The most recently read link state */
 	int link;
 	u64 tx_timeout_count;
-	enum hnae3_reset_type reset_level;
-	unsigned long last_reset_time;
 
 	unsigned long state;
 
@@ -535,6 +542,7 @@ struct hns3_nic_priv {
 	struct notifier_block notifier_block;
 	/* Vxlan/Geneve information */
 	struct hns3_udp_tunnel udp_tnl[HNS3_UDP_TNL_MAX];
+	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
 };
 
 union l3_hdr_info {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index b034c7f24eda..9d07116a4426 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -74,19 +74,6 @@ struct hns3_link_mode_mapping {
 	u32 ethtool_link_mode;
 };
 
-static const struct hns3_link_mode_mapping hns3_lm_map[] = {
-	{HNS3_LM_FIBRE_BIT, ETHTOOL_LINK_MODE_FIBRE_BIT},
-	{HNS3_LM_AUTONEG_BIT, ETHTOOL_LINK_MODE_Autoneg_BIT},
-	{HNS3_LM_TP_BIT, ETHTOOL_LINK_MODE_TP_BIT},
-	{HNS3_LM_PAUSE_BIT, ETHTOOL_LINK_MODE_Pause_BIT},
-	{HNS3_LM_BACKPLANE_BIT, ETHTOOL_LINK_MODE_Backplane_BIT},
-	{HNS3_LM_10BASET_HALF_BIT, ETHTOOL_LINK_MODE_10baseT_Half_BIT},
-	{HNS3_LM_10BASET_FULL_BIT, ETHTOOL_LINK_MODE_10baseT_Full_BIT},
-	{HNS3_LM_100BASET_HALF_BIT, ETHTOOL_LINK_MODE_100baseT_Half_BIT},
-	{HNS3_LM_100BASET_FULL_BIT, ETHTOOL_LINK_MODE_100baseT_Full_BIT},
-	{HNS3_LM_1000BASET_FULL_BIT, ETHTOOL_LINK_MODE_1000baseT_Full_BIT},
-};
-
 static int hns3_lp_setup(struct net_device *ndev, enum hnae3_loop loop)
 {
 	struct hnae3_handle *h = hns3_get_handle(ndev);
@@ -309,6 +296,9 @@ static void hns3_self_test(struct net_device *ndev,
 	struct hnae3_handle *h = priv->ae_handle;
 	int st_param[HNS3_SELF_TEST_TPYE_NUM][2];
 	bool if_running = netif_running(ndev);
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
+	bool dis_vlan_filter;
+#endif
 	int test_index = 0;
 	u32 i;
 
@@ -323,6 +313,14 @@ static void hns3_self_test(struct net_device *ndev,
 	if (if_running)
 		dev_close(ndev);
 
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
+	/* Disable the vlan filter for selftest does not support it */
+	dis_vlan_filter = (ndev->features & NETIF_F_HW_VLAN_CTAG_FILTER) &&
+				h->ae_algo->ops->enable_vlan_filter;
+	if (dis_vlan_filter)
+		h->ae_algo->ops->enable_vlan_filter(h, false);
+#endif
+
 	set_bit(HNS3_NIC_STATE_TESTING, &priv->state);
 
 	for (i = 0; i < HNS3_SELF_TEST_TPYE_NUM; i++) {
@@ -345,28 +343,15 @@ static void hns3_self_test(struct net_device *ndev,
 
 	clear_bit(HNS3_NIC_STATE_TESTING, &priv->state);
 
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
+	if (dis_vlan_filter)
+		h->ae_algo->ops->enable_vlan_filter(h, true);
+#endif
+
 	if (if_running)
 		dev_open(ndev);
 }
 
-static void hns3_driv_to_eth_caps(u32 caps, struct ethtool_link_ksettings *cmd,
-				  bool is_advertised)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(hns3_lm_map); i++) {
-		if (!(caps & hns3_lm_map[i].hns3_link_mode))
-			continue;
-
-		if (is_advertised)
-			__set_bit(hns3_lm_map[i].ethtool_link_mode,
-				  cmd->link_modes.advertising);
-		else
-			__set_bit(hns3_lm_map[i].ethtool_link_mode,
-				  cmd->link_modes.supported);
-	}
-}
-
 static int hns3_get_sset_count(struct net_device *netdev, int stringset)
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
@@ -578,18 +563,19 @@ static int hns3_get_link_ksettings(struct net_device *netdev,
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
 	u32 flowctrl_adv = 0;
-	u32 supported_caps;
-	u32 advertised_caps;
-	u8 media_type = HNAE3_MEDIA_TYPE_UNKNOWN;
 	u8 link_stat;
 
 	if (!h->ae_algo || !h->ae_algo->ops)
 		return -EOPNOTSUPP;
 
 	/* 1.auto_neg & speed & duplex from cmd */
-	if (netdev->phydev)
+	if (netdev->phydev) {
 		phy_ethtool_ksettings_get(netdev->phydev, cmd);
-	else if (h->ae_algo->ops->get_ksettings_an_result)
+
+		return 0;
+	}
+
+	if (h->ae_algo->ops->get_ksettings_an_result)
 		h->ae_algo->ops->get_ksettings_an_result(h,
 							 &cmd->base.autoneg,
 							 &cmd->base.speed,
@@ -603,62 +589,16 @@ static int hns3_get_link_ksettings(struct net_device *netdev,
 		cmd->base.duplex = DUPLEX_UNKNOWN;
 	}
 
-	/* 2.media_type get from bios parameter block */
-	if (h->ae_algo->ops->get_media_type) {
-		h->ae_algo->ops->get_media_type(h, &media_type);
-
-		switch (media_type) {
-		case HNAE3_MEDIA_TYPE_FIBER:
-			cmd->base.port = PORT_FIBRE;
-			supported_caps = HNS3_LM_FIBRE_BIT |
-					 HNS3_LM_AUTONEG_BIT |
-					 HNS3_LM_PAUSE_BIT |
-					 HNS3_LM_1000BASET_FULL_BIT;
-
-			advertised_caps = supported_caps;
-			break;
-		case HNAE3_MEDIA_TYPE_COPPER:
-			cmd->base.port = PORT_TP;
-			supported_caps = HNS3_LM_TP_BIT |
-					 HNS3_LM_AUTONEG_BIT |
-					 HNS3_LM_PAUSE_BIT |
-					 HNS3_LM_1000BASET_FULL_BIT |
-					 HNS3_LM_100BASET_FULL_BIT |
-					 HNS3_LM_100BASET_HALF_BIT |
-					 HNS3_LM_10BASET_FULL_BIT |
-					 HNS3_LM_10BASET_HALF_BIT;
-			advertised_caps = supported_caps;
-			break;
-		case HNAE3_MEDIA_TYPE_BACKPLANE:
-			cmd->base.port = PORT_NONE;
-			supported_caps = HNS3_LM_BACKPLANE_BIT |
-					 HNS3_LM_PAUSE_BIT |
-					 HNS3_LM_AUTONEG_BIT |
-					 HNS3_LM_1000BASET_FULL_BIT |
-					 HNS3_LM_100BASET_FULL_BIT |
-					 HNS3_LM_100BASET_HALF_BIT |
-					 HNS3_LM_10BASET_FULL_BIT |
-					 HNS3_LM_10BASET_HALF_BIT;
-
-			advertised_caps = supported_caps;
-			break;
-		case HNAE3_MEDIA_TYPE_UNKNOWN:
-		default:
-			cmd->base.port = PORT_OTHER;
-			supported_caps = 0;
-			advertised_caps = 0;
-			break;
-		}
-
-		if (!cmd->base.autoneg)
-			advertised_caps &= ~HNS3_LM_AUTONEG_BIT;
+	/* 2.get link mode and port type*/
+	if (h->ae_algo->ops->get_link_mode)
+		h->ae_algo->ops->get_link_mode(h,
+					       cmd->link_modes.supported,
+					       cmd->link_modes.advertising);
 
-		advertised_caps &= ~HNS3_LM_PAUSE_BIT;
-
-		/* now, map driver link modes to ethtool link modes */
-		hns3_driv_to_eth_caps(supported_caps, cmd, false);
-		hns3_driv_to_eth_caps(advertised_caps, cmd, true);
-	}
+	cmd->base.port = PORT_NONE;
+	if (h->ae_algo->ops->get_port_type)
+		h->ae_algo->ops->get_port_type(h,
+					       &cmd->base.port);
 
 	/* 3.mdix_ctrl&mdix get from phy reg */
 	if (h->ae_algo->ops->get_mdix_mode)
@@ -905,11 +845,13 @@ static int hns3_get_coalesce_per_queue(struct net_device *netdev, u32 queue,
 	tx_vector = priv->ring_data[queue].ring->tqp_vector;
 	rx_vector = priv->ring_data[queue_num + queue].ring->tqp_vector;
 
-	cmd->use_adaptive_tx_coalesce = tx_vector->tx_group.gl_adapt_enable;
-	cmd->use_adaptive_rx_coalesce = rx_vector->rx_group.gl_adapt_enable;
+	cmd->use_adaptive_tx_coalesce =
+			tx_vector->tx_group.coal.gl_adapt_enable;
+	cmd->use_adaptive_rx_coalesce =
+			rx_vector->rx_group.coal.gl_adapt_enable;
 
-	cmd->tx_coalesce_usecs = tx_vector->tx_group.int_gl;
-	cmd->rx_coalesce_usecs = rx_vector->rx_group.int_gl;
+	cmd->tx_coalesce_usecs = tx_vector->tx_group.coal.int_gl;
+	cmd->rx_coalesce_usecs = rx_vector->rx_group.coal.int_gl;
 
 	cmd->tx_coalesce_usecs_high = h->kinfo.int_rl_setting;
 	cmd->rx_coalesce_usecs_high = h->kinfo.int_rl_setting;
@@ -1029,14 +971,18 @@ static void hns3_set_coalesce_per_queue(struct net_device *netdev,
 	tx_vector = priv->ring_data[queue].ring->tqp_vector;
 	rx_vector = priv->ring_data[queue_num + queue].ring->tqp_vector;
 
-	tx_vector->tx_group.gl_adapt_enable = cmd->use_adaptive_tx_coalesce;
-	rx_vector->rx_group.gl_adapt_enable = cmd->use_adaptive_rx_coalesce;
+	tx_vector->tx_group.coal.gl_adapt_enable =
+				cmd->use_adaptive_tx_coalesce;
+	rx_vector->rx_group.coal.gl_adapt_enable =
+				cmd->use_adaptive_rx_coalesce;
 
-	tx_vector->tx_group.int_gl = cmd->tx_coalesce_usecs;
-	rx_vector->rx_group.int_gl = cmd->rx_coalesce_usecs;
+	tx_vector->tx_group.coal.int_gl = cmd->tx_coalesce_usecs;
+	rx_vector->rx_group.coal.int_gl = cmd->rx_coalesce_usecs;
 
-	hns3_set_vector_coalesce_tx_gl(tx_vector, tx_vector->tx_group.int_gl);
-	hns3_set_vector_coalesce_rx_gl(rx_vector, rx_vector->rx_group.int_gl);
+	hns3_set_vector_coalesce_tx_gl(tx_vector,
+				       tx_vector->tx_group.coal.int_gl);
+	hns3_set_vector_coalesce_rx_gl(rx_vector,
+				       rx_vector->rx_group.coal.int_gl);
 
 	hns3_set_vector_coalesce_rl(tx_vector, h->kinfo.int_rl_setting);
 	hns3_set_vector_coalesce_rl(rx_vector, h->kinfo.int_rl_setting);
@@ -1111,6 +1057,7 @@ static const struct ethtool_ops hns3vf_ethtool_ops = {
 	.get_channels = hns3_get_channels,
 	.get_coalesce = hns3_get_coalesce,
 	.set_coalesce = hns3_set_coalesce,
+	.get_link = hns3_get_link,
 };
 
 static const struct ethtool_ops hns3_ethtool_ops = {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index 3fd10a6bec53..ee3cbac6dfaa 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -12,7 +12,7 @@
 #include <linux/types.h>
 #include <linux/io.h>
 
-#define HCLGE_CMDQ_TX_TIMEOUT		1000
+#define HCLGE_CMDQ_TX_TIMEOUT		30000
 
 struct hclge_dev;
 struct hclge_desc {
@@ -414,6 +414,8 @@ struct hclge_pf_res_cmd {
 #define HCLGE_CFG_DEFAULT_SPEED_M	GENMASK(23, 16)
 #define HCLGE_CFG_RSS_SIZE_S	24
 #define HCLGE_CFG_RSS_SIZE_M	GENMASK(31, 24)
+#define HCLGE_CFG_SPEED_ABILITY_S	0
+#define HCLGE_CFG_SPEED_ABILITY_M	GENMASK(7, 0)
 
 struct hclge_cfg_param_cmd {
 	__le32 offset;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
index 5018d6633133..955f0e3d5c95 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
@@ -144,6 +144,8 @@ static int hclge_map_update(struct hnae3_handle *h)
 	if (ret)
 		return ret;
 
+	hclge_rss_indir_init_cfg(hdev);
+
 	return hclge_rss_init_hw(hdev);
 }
 
@@ -203,9 +205,11 @@ static int hclge_ieee_setets(struct hnae3_handle *h, struct ieee_ets *ets)
 
 static int hclge_ieee_getpfc(struct hnae3_handle *h, struct ieee_pfc *pfc)
 {
+	u64 requests[HNAE3_MAX_TC], indications[HNAE3_MAX_TC];
 	struct hclge_vport *vport = hclge_get_vport(h);
 	struct hclge_dev *hdev = vport->back;
 	u8 i, j, pfc_map, *prio_tc;
+	int ret;
 
 	memset(pfc, 0, sizeof(*pfc));
 	pfc->pfc_cap = hdev->pfc_max;
@@ -220,6 +224,18 @@ static int hclge_ieee_getpfc(struct hnae3_handle *h, struct ieee_pfc *pfc)
 		}
 	}
 
+	ret = hclge_pfc_tx_stats_get(hdev, requests);
+	if (ret)
+		return ret;
+
+	ret = hclge_pfc_rx_stats_get(hdev, indications);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
+		pfc->requests[i] = requests[i];
+		pfc->indications[i] = indications[i];
+	}
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 32bc6f68e297..bede4117bad9 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -55,6 +55,8 @@ static const struct pci_device_id ae_algo_pci_tbl[] = {
 	{0, }
 };
 
+MODULE_DEVICE_TABLE(pci, ae_algo_pci_tbl);
+
 static const char hns3_nic_test_strs[][ETH_GSTRING_LEN] = {
 	"Mac    Loopback test",
 	"Serdes Loopback test",
@@ -1024,6 +1026,45 @@ static int hclge_parse_speed(int speed_cmd, int *speed)
 	return 0;
 }
 
+static void hclge_parse_fiber_link_mode(struct hclge_dev *hdev,
+					u8 speed_ability)
+{
+	unsigned long *supported = hdev->hw.mac.supported;
+
+	if (speed_ability & HCLGE_SUPPORT_1G_BIT)
+		set_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT,
+			supported);
+
+	if (speed_ability & HCLGE_SUPPORT_10G_BIT)
+		set_bit(ETHTOOL_LINK_MODE_10000baseSR_Full_BIT,
+			supported);
+
+	if (speed_ability & HCLGE_SUPPORT_25G_BIT)
+		set_bit(ETHTOOL_LINK_MODE_25000baseSR_Full_BIT,
+			supported);
+
+	if (speed_ability & HCLGE_SUPPORT_50G_BIT)
+		set_bit(ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT,
+			supported);
+
+	if (speed_ability & HCLGE_SUPPORT_100G_BIT)
+		set_bit(ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT,
+			supported);
+
+	set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, supported);
+	set_bit(ETHTOOL_LINK_MODE_Pause_BIT, supported);
+}
+
+static void hclge_parse_link_mode(struct hclge_dev *hdev, u8 speed_ability)
+{
+	u8 media_type = hdev->hw.mac.media_type;
+
+	if (media_type != HNAE3_MEDIA_TYPE_FIBER)
+		return;
+
+	hclge_parse_fiber_link_mode(hdev, speed_ability);
+}
+
 static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc)
 {
 	struct hclge_cfg_param_cmd *req;
@@ -1072,6 +1113,10 @@ static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc)
 
 	req = (struct hclge_cfg_param_cmd *)desc[1].data;
 	cfg->numa_node_map = __le32_to_cpu(req->param[0]);
+
+	cfg->speed_ability = hnae_get_field(__le32_to_cpu(req->param[1]),
+					    HCLGE_CFG_SPEED_ABILITY_M,
+					    HCLGE_CFG_SPEED_ABILITY_S);
 }
 
 /* hclge_get_cfg: query the static parameter from flash
@@ -1160,6 +1205,8 @@ static int hclge_configure(struct hclge_dev *hdev)
 		return ret;
 	}
 
+	hclge_parse_link_mode(hdev, cfg.speed_ability);
+
 	if ((hdev->tc_max > HNAE3_MAX_TC) ||
 	    (hdev->tc_max < 1)) {
 		dev_warn(&hdev->pdev->dev, "TC num = %d.\n",
@@ -2702,7 +2749,7 @@ static int hclge_reset_wait(struct hclge_dev *hdev)
 	return 0;
 }
 
-static int hclge_func_reset_cmd(struct hclge_dev *hdev, int func_id)
+int hclge_func_reset_cmd(struct hclge_dev *hdev, int func_id)
 {
 	struct hclge_desc desc;
 	struct hclge_reset_cmd *req = (struct hclge_reset_cmd *)desc.data;
@@ -2798,27 +2845,31 @@ static void hclge_reset(struct hclge_dev *hdev)
 	hclge_notify_client(hdev, HNAE3_UP_CLIENT);
 }
 
-static void hclge_reset_event(struct hnae3_handle *handle,
-			      enum hnae3_reset_type reset)
+static void hclge_reset_event(struct hnae3_handle *handle)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
 
-	dev_info(&hdev->pdev->dev,
-		 "Receive reset event , reset_type is %d", reset);
+	/* check if this is a new reset request and we are not here just because
+	 * last reset attempt did not succeed and watchdog hit us again. We will
+	 * know this if last reset request did not occur very recently (watchdog
+	 * timer = 5*HZ, let us check after sufficiently large time, say 4*5*Hz)
+	 * In case of new request we reset the "reset level" to PF reset.
+	 */
+	if (time_after(jiffies, (handle->last_reset_time + 4 * 5 * HZ)))
+		handle->reset_level = HNAE3_FUNC_RESET;
 
-	switch (reset) {
-	case HNAE3_FUNC_RESET:
-	case HNAE3_CORE_RESET:
-	case HNAE3_GLOBAL_RESET:
-		/* request reset & schedule reset task */
-		set_bit(reset, &hdev->reset_request);
-		hclge_reset_task_schedule(hdev);
-		break;
-	default:
-		dev_warn(&hdev->pdev->dev, "Unsupported reset event:%d", reset);
-		break;
-	}
+	dev_info(&hdev->pdev->dev, "received reset event , reset type is %d",
+		 handle->reset_level);
+
+	/* request reset & schedule reset task */
+	set_bit(handle->reset_level, &hdev->reset_request);
+	hclge_reset_task_schedule(hdev);
+
+	if (handle->reset_level < HNAE3_GLOBAL_RESET)
+		handle->reset_level++;
+
+	handle->last_reset_time = jiffies;
 }
 
 static void hclge_reset_subtask(struct hclge_dev *hdev)
@@ -2969,6 +3020,24 @@ static int hclge_get_vector_index(struct hclge_dev *hdev, int vector)
 	return -EINVAL;
 }
 
+static int hclge_put_vector(struct hnae3_handle *handle, int vector)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	int vector_id;
+
+	vector_id = hclge_get_vector_index(hdev, vector);
+	if (vector_id < 0) {
+		dev_err(&hdev->pdev->dev,
+			"Get vector index fail. vector_id =%d\n", vector_id);
+		return vector_id;
+	}
+
+	hclge_free_vector(hdev, vector_id);
+
+	return 0;
+}
+
 static u32 hclge_get_rss_key_size(struct hnae3_handle *handle)
 {
 	return HCLGE_RSS_KEY_SIZE;
@@ -2979,31 +3048,6 @@ static u32 hclge_get_rss_indir_size(struct hnae3_handle *handle)
 	return HCLGE_RSS_IND_TBL_SIZE;
 }
 
-static int hclge_get_rss_algo(struct hclge_dev *hdev)
-{
-	struct hclge_rss_config_cmd *req;
-	struct hclge_desc desc;
-	int rss_hash_algo;
-	int ret;
-
-	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_GENERIC_CONFIG, true);
-
-	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-	if (ret) {
-		dev_err(&hdev->pdev->dev,
-			"Get link status error, status =%d\n", ret);
-		return ret;
-	}
-
-	req = (struct hclge_rss_config_cmd *)desc.data;
-	rss_hash_algo = (req->hash_config & HCLGE_RSS_HASH_ALGO_MASK);
-
-	if (rss_hash_algo == HCLGE_RSS_HASH_ALGO_TOEPLITZ)
-		return ETH_RSS_HASH_TOP;
-
-	return -EINVAL;
-}
-
 static int hclge_set_rss_algo_key(struct hclge_dev *hdev,
 				  const u8 hfunc, const u8 *key)
 {
@@ -3042,7 +3086,7 @@ static int hclge_set_rss_algo_key(struct hclge_dev *hdev,
 	return 0;
 }
 
-static int hclge_set_rss_indir_table(struct hclge_dev *hdev, const u32 *indir)
+static int hclge_set_rss_indir_table(struct hclge_dev *hdev, const u8 *indir)
 {
 	struct hclge_rss_indirection_table_cmd *req;
 	struct hclge_desc desc;
@@ -3116,14 +3160,16 @@ static int hclge_set_rss_input_tuple(struct hclge_dev *hdev)
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_INPUT_TUPLE, false);
 
 	req = (struct hclge_rss_input_tuple_cmd *)desc.data;
-	req->ipv4_tcp_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
-	req->ipv4_udp_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
-	req->ipv4_sctp_en = HCLGE_RSS_INPUT_TUPLE_SCTP;
-	req->ipv4_fragment_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
-	req->ipv6_tcp_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
-	req->ipv6_udp_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
-	req->ipv6_sctp_en = HCLGE_RSS_INPUT_TUPLE_SCTP;
-	req->ipv6_fragment_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
+
+	/* Get the tuple cfg from pf */
+	req->ipv4_tcp_en = hdev->vport[0].rss_tuple_sets.ipv4_tcp_en;
+	req->ipv4_udp_en = hdev->vport[0].rss_tuple_sets.ipv4_udp_en;
+	req->ipv4_sctp_en = hdev->vport[0].rss_tuple_sets.ipv4_sctp_en;
+	req->ipv4_fragment_en = hdev->vport[0].rss_tuple_sets.ipv4_fragment_en;
+	req->ipv6_tcp_en = hdev->vport[0].rss_tuple_sets.ipv6_tcp_en;
+	req->ipv6_udp_en = hdev->vport[0].rss_tuple_sets.ipv6_udp_en;
+	req->ipv6_sctp_en = hdev->vport[0].rss_tuple_sets.ipv6_sctp_en;
+	req->ipv6_fragment_en = hdev->vport[0].rss_tuple_sets.ipv6_fragment_en;
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
@@ -3138,12 +3184,11 @@ static int hclge_get_rss(struct hnae3_handle *handle, u32 *indir,
 			 u8 *key, u8 *hfunc)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
-	struct hclge_dev *hdev = vport->back;
 	int i;
 
 	/* Get hash algorithm */
 	if (hfunc)
-		*hfunc = hclge_get_rss_algo(hdev);
+		*hfunc = vport->rss_algo;
 
 	/* Get the RSS Key required by the user */
 	if (key)
@@ -3167,8 +3212,6 @@ static int hclge_set_rss(struct hnae3_handle *handle, const u32 *indir,
 
 	/* Set the RSS Hash Key if specififed by the user */
 	if (key) {
-		/* Update the shadow RSS key with user specified qids */
-		memcpy(vport->rss_hash_key, key, HCLGE_RSS_KEY_SIZE);
 
 		if (hfunc == ETH_RSS_HASH_TOP ||
 		    hfunc == ETH_RSS_HASH_NO_CHANGE)
@@ -3178,6 +3221,10 @@ static int hclge_set_rss(struct hnae3_handle *handle, const u32 *indir,
 		ret = hclge_set_rss_algo_key(hdev, hash_algo, key);
 		if (ret)
 			return ret;
+
+		/* Update the shadow RSS key with user specified qids */
+		memcpy(vport->rss_hash_key, key, HCLGE_RSS_KEY_SIZE);
+		vport->rss_algo = hash_algo;
 	}
 
 	/* Update the shadow RSS table with user specified qids */
@@ -3185,8 +3232,7 @@ static int hclge_set_rss(struct hnae3_handle *handle, const u32 *indir,
 		vport->rss_indirection_tbl[i] = indir[i];
 
 	/* Update the hardware */
-	ret = hclge_set_rss_indir_table(hdev, indir);
-	return ret;
+	return hclge_set_rss_indir_table(hdev, vport->rss_indirection_tbl);
 }
 
 static u8 hclge_get_rss_hash_bits(struct ethtool_rxnfc *nfc)
@@ -3229,15 +3275,16 @@ static int hclge_set_rss_tuple(struct hnae3_handle *handle,
 		return -EINVAL;
 
 	req = (struct hclge_rss_input_tuple_cmd *)desc.data;
-	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_INPUT_TUPLE, true);
-	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-	if (ret) {
-		dev_err(&hdev->pdev->dev,
-			"Read rss tuple fail, status = %d\n", ret);
-		return ret;
-	}
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_INPUT_TUPLE, false);
 
-	hclge_cmd_reuse_desc(&desc, false);
+	req->ipv4_tcp_en = vport->rss_tuple_sets.ipv4_tcp_en;
+	req->ipv4_udp_en = vport->rss_tuple_sets.ipv4_udp_en;
+	req->ipv4_sctp_en = vport->rss_tuple_sets.ipv4_sctp_en;
+	req->ipv4_fragment_en = vport->rss_tuple_sets.ipv4_fragment_en;
+	req->ipv6_tcp_en = vport->rss_tuple_sets.ipv6_tcp_en;
+	req->ipv6_udp_en = vport->rss_tuple_sets.ipv6_udp_en;
+	req->ipv6_sctp_en = vport->rss_tuple_sets.ipv6_sctp_en;
+	req->ipv6_fragment_en = vport->rss_tuple_sets.ipv6_fragment_en;
 
 	tuple_sets = hclge_get_rss_hash_bits(nfc);
 	switch (nfc->flow_type) {
@@ -3274,52 +3321,49 @@ static int hclge_set_rss_tuple(struct hnae3_handle *handle,
 	}
 
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-	if (ret)
+	if (ret) {
 		dev_err(&hdev->pdev->dev,
 			"Set rss tuple fail, status = %d\n", ret);
+		return ret;
+	}
 
-	return ret;
+	vport->rss_tuple_sets.ipv4_tcp_en = req->ipv4_tcp_en;
+	vport->rss_tuple_sets.ipv4_udp_en = req->ipv4_udp_en;
+	vport->rss_tuple_sets.ipv4_sctp_en = req->ipv4_sctp_en;
+	vport->rss_tuple_sets.ipv4_fragment_en = req->ipv4_fragment_en;
+	vport->rss_tuple_sets.ipv6_tcp_en = req->ipv6_tcp_en;
+	vport->rss_tuple_sets.ipv6_udp_en = req->ipv6_udp_en;
+	vport->rss_tuple_sets.ipv6_sctp_en = req->ipv6_sctp_en;
+	vport->rss_tuple_sets.ipv6_fragment_en = req->ipv6_fragment_en;
+	return 0;
 }
 
 static int hclge_get_rss_tuple(struct hnae3_handle *handle,
 			       struct ethtool_rxnfc *nfc)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
-	struct hclge_dev *hdev = vport->back;
-	struct hclge_rss_input_tuple_cmd *req;
-	struct hclge_desc desc;
 	u8 tuple_sets;
-	int ret;
 
 	nfc->data = 0;
 
-	req = (struct hclge_rss_input_tuple_cmd *)desc.data;
-	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_INPUT_TUPLE, true);
-	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-	if (ret) {
-		dev_err(&hdev->pdev->dev,
-			"Read rss tuple fail, status = %d\n", ret);
-		return ret;
-	}
-
 	switch (nfc->flow_type) {
 	case TCP_V4_FLOW:
-		tuple_sets = req->ipv4_tcp_en;
+		tuple_sets = vport->rss_tuple_sets.ipv4_tcp_en;
 		break;
 	case UDP_V4_FLOW:
-		tuple_sets = req->ipv4_udp_en;
+		tuple_sets = vport->rss_tuple_sets.ipv4_udp_en;
 		break;
 	case TCP_V6_FLOW:
-		tuple_sets = req->ipv6_tcp_en;
+		tuple_sets = vport->rss_tuple_sets.ipv6_tcp_en;
 		break;
 	case UDP_V6_FLOW:
-		tuple_sets = req->ipv6_udp_en;
+		tuple_sets = vport->rss_tuple_sets.ipv6_udp_en;
 		break;
 	case SCTP_V4_FLOW:
-		tuple_sets = req->ipv4_sctp_en;
+		tuple_sets = vport->rss_tuple_sets.ipv4_sctp_en;
 		break;
 	case SCTP_V6_FLOW:
-		tuple_sets = req->ipv6_sctp_en;
+		tuple_sets = vport->rss_tuple_sets.ipv6_sctp_en;
 		break;
 	case IPV4_FLOW:
 	case IPV6_FLOW:
@@ -3354,50 +3398,28 @@ static int hclge_get_tc_size(struct hnae3_handle *handle)
 
 int hclge_rss_init_hw(struct hclge_dev *hdev)
 {
-	const  u8 hfunc = HCLGE_RSS_HASH_ALGO_TOEPLITZ;
 	struct hclge_vport *vport = hdev->vport;
+	u8 *rss_indir = vport[0].rss_indirection_tbl;
+	u16 rss_size = vport[0].alloc_rss_size;
+	u8 *key = vport[0].rss_hash_key;
+	u8 hfunc = vport[0].rss_algo;
 	u16 tc_offset[HCLGE_MAX_TC_NUM];
-	u8 rss_key[HCLGE_RSS_KEY_SIZE];
 	u16 tc_valid[HCLGE_MAX_TC_NUM];
 	u16 tc_size[HCLGE_MAX_TC_NUM];
-	u32 *rss_indir = NULL;
-	u16 rss_size = 0, roundup_size;
-	const u8 *key;
-	int i, ret, j;
-
-	rss_indir = kcalloc(HCLGE_RSS_IND_TBL_SIZE, sizeof(u32), GFP_KERNEL);
-	if (!rss_indir)
-		return -ENOMEM;
-
-	/* Get default RSS key */
-	netdev_rss_key_fill(rss_key, HCLGE_RSS_KEY_SIZE);
-
-	/* Initialize RSS indirect table for each vport */
-	for (j = 0; j < hdev->num_vmdq_vport + 1; j++) {
-		for (i = 0; i < HCLGE_RSS_IND_TBL_SIZE; i++) {
-			vport[j].rss_indirection_tbl[i] =
-				i % vport[j].alloc_rss_size;
-
-			/* vport 0 is for PF */
-			if (j != 0)
-				continue;
+	u16 roundup_size;
+	int i, ret;
 
-			rss_size = vport[j].alloc_rss_size;
-			rss_indir[i] = vport[j].rss_indirection_tbl[i];
-		}
-	}
 	ret = hclge_set_rss_indir_table(hdev, rss_indir);
 	if (ret)
-		goto err;
+		return ret;
 
-	key = rss_key;
 	ret = hclge_set_rss_algo_key(hdev, hfunc, key);
 	if (ret)
-		goto err;
+		return ret;
 
 	ret = hclge_set_rss_input_tuple(hdev);
 	if (ret)
-		goto err;
+		return ret;
 
 	/* Each TC have the same queue size, and tc_size set to hardware is
 	 * the log2 of roundup power of two of rss_size, the acutal queue
@@ -3407,8 +3429,7 @@ int hclge_rss_init_hw(struct hclge_dev *hdev)
 		dev_err(&hdev->pdev->dev,
 			"Configure rss tc size failed, invalid TC_SIZE = %d\n",
 			rss_size);
-		ret = -EINVAL;
-		goto err;
+		return -EINVAL;
 	}
 
 	roundup_size = roundup_pow_of_two(rss_size);
@@ -3425,12 +3446,50 @@ int hclge_rss_init_hw(struct hclge_dev *hdev)
 		tc_offset[i] = rss_size * i;
 	}
 
-	ret = hclge_set_rss_tc_mode(hdev, tc_valid, tc_size, tc_offset);
+	return hclge_set_rss_tc_mode(hdev, tc_valid, tc_size, tc_offset);
+}
 
-err:
-	kfree(rss_indir);
+void hclge_rss_indir_init_cfg(struct hclge_dev *hdev)
+{
+	struct hclge_vport *vport = hdev->vport;
+	int i, j;
 
-	return ret;
+	for (j = 0; j < hdev->num_vmdq_vport + 1; j++) {
+		for (i = 0; i < HCLGE_RSS_IND_TBL_SIZE; i++)
+			vport[j].rss_indirection_tbl[i] =
+				i % vport[j].alloc_rss_size;
+	}
+}
+
+static void hclge_rss_init_cfg(struct hclge_dev *hdev)
+{
+	struct hclge_vport *vport = hdev->vport;
+	int i;
+
+	netdev_rss_key_fill(vport->rss_hash_key, HCLGE_RSS_KEY_SIZE);
+
+	for (i = 0; i < hdev->num_vmdq_vport + 1; i++) {
+		vport[i].rss_tuple_sets.ipv4_tcp_en =
+			HCLGE_RSS_INPUT_TUPLE_OTHER;
+		vport[i].rss_tuple_sets.ipv4_udp_en =
+			HCLGE_RSS_INPUT_TUPLE_OTHER;
+		vport[i].rss_tuple_sets.ipv4_sctp_en =
+			HCLGE_RSS_INPUT_TUPLE_SCTP;
+		vport[i].rss_tuple_sets.ipv4_fragment_en =
+			HCLGE_RSS_INPUT_TUPLE_OTHER;
+		vport[i].rss_tuple_sets.ipv6_tcp_en =
+			HCLGE_RSS_INPUT_TUPLE_OTHER;
+		vport[i].rss_tuple_sets.ipv6_udp_en =
+			HCLGE_RSS_INPUT_TUPLE_OTHER;
+		vport[i].rss_tuple_sets.ipv6_sctp_en =
+			HCLGE_RSS_INPUT_TUPLE_SCTP;
+		vport[i].rss_tuple_sets.ipv6_fragment_en =
+			HCLGE_RSS_INPUT_TUPLE_OTHER;
+
+		vport[i].rss_algo = HCLGE_RSS_HASH_ALGO_TOEPLITZ;
+	}
+
+	hclge_rss_indir_init_cfg(hdev);
 }
 
 int hclge_bind_ring_with_vector(struct hclge_vport *vport,
@@ -3533,18 +3592,13 @@ static int hclge_unmap_ring_frm_vector(struct hnae3_handle *handle,
 	}
 
 	ret = hclge_bind_ring_with_vector(vport, vector_id, false, ring_chain);
-	if (ret) {
+	if (ret)
 		dev_err(&handle->pdev->dev,
 			"Unmap ring from vector fail. vectorid=%d, ret =%d\n",
 			vector_id,
 			ret);
-		return ret;
-	}
-
-	/* Free this MSIX or MSI vector */
-	hclge_free_vector(hdev, vector_id);
 
-	return 0;
+	return ret;
 }
 
 int hclge_cmd_set_promisc_mode(struct hclge_dev *hdev,
@@ -3717,20 +3771,11 @@ static int hclge_ae_start(struct hnae3_handle *handle)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
-	int i, queue_id, ret;
+	int i, ret;
 
-	for (i = 0; i < vport->alloc_tqps; i++) {
-		/* todo clear interrupt */
-		/* ring enable */
-		queue_id = hclge_get_queue_id(handle->kinfo.tqp[i]);
-		if (queue_id < 0) {
-			dev_warn(&hdev->pdev->dev,
-				 "Get invalid queue id, ignore it\n");
-			continue;
-		}
+	for (i = 0; i < vport->alloc_tqps; i++)
+		hclge_tqp_enable(hdev, i, 0, true);
 
-		hclge_tqp_enable(hdev, queue_id, 0, true);
-	}
 	/* mac enable */
 	hclge_cfg_mac_mode(hdev, true);
 	clear_bit(HCLGE_STATE_DOWN, &hdev->state);
@@ -3750,19 +3795,11 @@ static void hclge_ae_stop(struct hnae3_handle *handle)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
-	int i, queue_id;
+	int i;
 
-	for (i = 0; i < vport->alloc_tqps; i++) {
-		/* Ring disable */
-		queue_id = hclge_get_queue_id(handle->kinfo.tqp[i]);
-		if (queue_id < 0) {
-			dev_warn(&hdev->pdev->dev,
-				 "Get invalid queue id, ignore it\n");
-			continue;
-		}
+	for (i = 0; i < vport->alloc_tqps; i++)
+		hclge_tqp_enable(hdev, i, 0, false);
 
-		hclge_tqp_enable(hdev, queue_id, 0, false);
-	}
 	/* Mac disable */
 	hclge_cfg_mac_mode(hdev, false);
 
@@ -3770,6 +3807,9 @@ static void hclge_ae_stop(struct hnae3_handle *handle)
 
 	/* reset tqp stats */
 	hclge_reset_tqp_stats(handle);
+	del_timer_sync(&hdev->service_timer);
+	cancel_work_sync(&hdev->service_task);
+	hclge_update_link_status(hdev);
 }
 
 static int hclge_get_mac_vlan_cmd_status(struct hclge_vport *vport,
@@ -3790,11 +3830,11 @@ static int hclge_get_mac_vlan_cmd_status(struct hclge_vport *vport,
 		if ((!resp_code) || (resp_code == 1)) {
 			return_status = 0;
 		} else if (resp_code == 2) {
-			return_status = -EIO;
+			return_status = -ENOSPC;
 			dev_err(&hdev->pdev->dev,
 				"add mac addr failed for uc_overflow.\n");
 		} else if (resp_code == 3) {
-			return_status = -EIO;
+			return_status = -ENOSPC;
 			dev_err(&hdev->pdev->dev,
 				"add mac addr failed for mc_overflow.\n");
 		} else {
@@ -3806,7 +3846,7 @@ static int hclge_get_mac_vlan_cmd_status(struct hclge_vport *vport,
 		if (!resp_code) {
 			return_status = 0;
 		} else if (resp_code == 1) {
-			return_status = -EIO;
+			return_status = -ENOENT;
 			dev_dbg(&hdev->pdev->dev,
 				"remove mac addr failed for miss.\n");
 		} else {
@@ -3818,7 +3858,7 @@ static int hclge_get_mac_vlan_cmd_status(struct hclge_vport *vport,
 		if (!resp_code) {
 			return_status = 0;
 		} else if (resp_code == 1) {
-			return_status = -EIO;
+			return_status = -ENOENT;
 			dev_dbg(&hdev->pdev->dev,
 				"lookup mac addr failed for miss.\n");
 		} else {
@@ -3827,7 +3867,7 @@ static int hclge_get_mac_vlan_cmd_status(struct hclge_vport *vport,
 				resp_code);
 		}
 	} else {
-		return_status = -EIO;
+		return_status = -EINVAL;
 		dev_err(&hdev->pdev->dev,
 			"unknown opcode for get_mac_vlan_cmd_status,opcode=%d.\n",
 			op);
@@ -4118,8 +4158,9 @@ int hclge_add_uc_addr_common(struct hclge_vport *vport,
 {
 	struct hclge_dev *hdev = vport->back;
 	struct hclge_mac_vlan_tbl_entry_cmd req;
-	enum hclge_cmd_status status;
+	struct hclge_desc desc;
 	u16 egress_port = 0;
+	int ret;
 
 	/* mac addr check */
 	if (is_zero_ether_addr(addr) ||
@@ -4151,9 +4192,23 @@ int hclge_add_uc_addr_common(struct hclge_vport *vport,
 
 	hclge_prepare_mac_addr(&req, addr);
 
-	status = hclge_add_mac_vlan_tbl(vport, &req, NULL);
+	/* Lookup the mac address in the mac_vlan table, and add
+	 * it if the entry is inexistent. Repeated unicast entry
+	 * is not allowed in the mac vlan table.
+	 */
+	ret = hclge_lookup_mac_vlan_tbl(vport, &req, &desc, false);
+	if (ret == -ENOENT)
+		return hclge_add_mac_vlan_tbl(vport, &req, NULL);
+
+	/* check if we just hit the duplicate */
+	if (!ret)
+		ret = -EINVAL;
 
-	return status;
+	dev_err(&hdev->pdev->dev,
+		"PF failed to add unicast entry(%pM) in the MAC table\n",
+		addr);
+
+	return ret;
 }
 
 static int hclge_rm_uc_addr(struct hnae3_handle *handle,
@@ -4169,7 +4224,7 @@ int hclge_rm_uc_addr_common(struct hclge_vport *vport,
 {
 	struct hclge_dev *hdev = vport->back;
 	struct hclge_mac_vlan_tbl_entry_cmd req;
-	enum hclge_cmd_status status;
+	int ret;
 
 	/* mac addr check */
 	if (is_zero_ether_addr(addr) ||
@@ -4185,9 +4240,9 @@ int hclge_rm_uc_addr_common(struct hclge_vport *vport,
 	hnae_set_bit(req.flags, HCLGE_MAC_VLAN_BIT0_EN_B, 1);
 	hnae_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT0_EN_B, 0);
 	hclge_prepare_mac_addr(&req, addr);
-	status = hclge_remove_mac_vlan_tbl(vport, &req);
+	ret = hclge_remove_mac_vlan_tbl(vport, &req);
 
-	return status;
+	return ret;
 }
 
 static int hclge_add_mc_addr(struct hnae3_handle *handle,
@@ -4392,7 +4447,8 @@ static void hclge_get_mac_addr(struct hnae3_handle *handle, u8 *p)
 	ether_addr_copy(p, hdev->hw.mac.mac_addr);
 }
 
-static int hclge_set_mac_addr(struct hnae3_handle *handle, void *p)
+static int hclge_set_mac_addr(struct hnae3_handle *handle, void *p,
+			      bool is_first)
 {
 	const unsigned char *new_addr = (const unsigned char *)p;
 	struct hclge_vport *vport = hclge_get_vport(handle);
@@ -4409,11 +4465,9 @@ static int hclge_set_mac_addr(struct hnae3_handle *handle, void *p)
 		return -EINVAL;
 	}
 
-	ret = hclge_rm_uc_addr(handle, hdev->hw.mac.mac_addr);
-	if (ret)
+	if (!is_first && hclge_rm_uc_addr(handle, hdev->hw.mac.mac_addr))
 		dev_warn(&hdev->pdev->dev,
-			 "remove old uc mac address fail, ret =%d.\n",
-			 ret);
+			 "remove old uc mac address fail.\n");
 
 	ret = hclge_add_uc_addr(handle, new_addr);
 	if (ret) {
@@ -4421,17 +4475,15 @@ static int hclge_set_mac_addr(struct hnae3_handle *handle, void *p)
 			"add uc mac address fail, ret =%d.\n",
 			ret);
 
-		ret = hclge_add_uc_addr(handle, hdev->hw.mac.mac_addr);
-		if (ret) {
+		if (!is_first &&
+		    hclge_add_uc_addr(handle, hdev->hw.mac.mac_addr))
 			dev_err(&hdev->pdev->dev,
-				"restore uc mac address fail, ret =%d.\n",
-				ret);
-		}
+				"restore uc mac address fail.\n");
 
 		return -EIO;
 	}
 
-	ret = hclge_mac_pause_addr_cfg(hdev, new_addr);
+	ret = hclge_pause_addr_cfg(hdev, new_addr);
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
 			"configure mac pause address fail, ret =%d.\n",
@@ -4771,11 +4823,9 @@ static int hclge_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable)
 	return hclge_set_vlan_rx_offload_cfg(vport);
 }
 
-static int hclge_set_mtu(struct hnae3_handle *handle, int new_mtu)
+static int hclge_set_mac_mtu(struct hclge_dev *hdev, int new_mtu)
 {
-	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_config_max_frm_size_cmd *req;
-	struct hclge_dev *hdev = vport->back;
 	struct hclge_desc desc;
 	int max_frm_size;
 	int ret;
@@ -4804,6 +4854,27 @@ static int hclge_set_mtu(struct hnae3_handle *handle, int new_mtu)
 	return 0;
 }
 
+static int hclge_set_mtu(struct hnae3_handle *handle, int new_mtu)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	int ret;
+
+	ret = hclge_set_mac_mtu(hdev, new_mtu);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Change mtu fail, ret =%d\n", ret);
+		return ret;
+	}
+
+	ret = hclge_buffer_alloc(hdev);
+	if (ret)
+		dev_err(&hdev->pdev->dev,
+			"Allocate buffer fail, ret =%d\n", ret);
+
+	return ret;
+}
+
 static int hclge_send_reset_tqp_cmd(struct hclge_dev *hdev, u16 queue_id,
 				    bool enable)
 {
@@ -4848,21 +4919,36 @@ static int hclge_get_reset_status(struct hclge_dev *hdev, u16 queue_id)
 	return hnae_get_bit(req->ready_to_reset, HCLGE_TQP_RESET_B);
 }
 
+static u16 hclge_covert_handle_qid_global(struct hnae3_handle *handle,
+					  u16 queue_id)
+{
+	struct hnae3_queue *queue;
+	struct hclge_tqp *tqp;
+
+	queue = handle->kinfo.tqp[queue_id];
+	tqp = container_of(queue, struct hclge_tqp, q);
+
+	return tqp->index;
+}
+
 void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
 	int reset_try_times = 0;
 	int reset_status;
+	u16 queue_gid;
 	int ret;
 
+	queue_gid = hclge_covert_handle_qid_global(handle, queue_id);
+
 	ret = hclge_tqp_enable(hdev, queue_id, 0, false);
 	if (ret) {
 		dev_warn(&hdev->pdev->dev, "Disable tqp fail, ret = %d\n", ret);
 		return;
 	}
 
-	ret = hclge_send_reset_tqp_cmd(hdev, queue_id, true);
+	ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, true);
 	if (ret) {
 		dev_warn(&hdev->pdev->dev,
 			 "Send reset tqp cmd fail, ret = %d\n", ret);
@@ -4873,7 +4959,7 @@ void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
 	while (reset_try_times++ < HCLGE_TQP_RESET_TRY_TIMES) {
 		/* Wait for tqp hw reset */
 		msleep(20);
-		reset_status = hclge_get_reset_status(hdev, queue_id);
+		reset_status = hclge_get_reset_status(hdev, queue_gid);
 		if (reset_status)
 			break;
 	}
@@ -4883,7 +4969,7 @@ void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
 		return;
 	}
 
-	ret = hclge_send_reset_tqp_cmd(hdev, queue_id, false);
+	ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, false);
 	if (ret) {
 		dev_warn(&hdev->pdev->dev,
 			 "Deassert the soft reset fail, ret = %d\n", ret);
@@ -4891,6 +4977,43 @@ void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
 	}
 }
 
+void hclge_reset_vf_queue(struct hclge_vport *vport, u16 queue_id)
+{
+	struct hclge_dev *hdev = vport->back;
+	int reset_try_times = 0;
+	int reset_status;
+	u16 queue_gid;
+	int ret;
+
+	queue_gid = hclge_covert_handle_qid_global(&vport->nic, queue_id);
+
+	ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, true);
+	if (ret) {
+		dev_warn(&hdev->pdev->dev,
+			 "Send reset tqp cmd fail, ret = %d\n", ret);
+		return;
+	}
+
+	reset_try_times = 0;
+	while (reset_try_times++ < HCLGE_TQP_RESET_TRY_TIMES) {
+		/* Wait for tqp hw reset */
+		msleep(20);
+		reset_status = hclge_get_reset_status(hdev, queue_gid);
+		if (reset_status)
+			break;
+	}
+
+	if (reset_try_times >= HCLGE_TQP_RESET_TRY_TIMES) {
+		dev_warn(&hdev->pdev->dev, "Reset TQP fail\n");
+		return;
+	}
+
+	ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, false);
+	if (ret)
+		dev_warn(&hdev->pdev->dev,
+			 "Deassert the soft reset fail, ret = %d\n", ret);
+}
+
 static u32 hclge_get_fw_version(struct hnae3_handle *handle)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
@@ -5376,11 +5499,6 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 		dev_err(&pdev->dev, "Mac init error, ret = %d\n", ret);
 		return ret;
 	}
-	ret = hclge_buffer_alloc(hdev);
-	if (ret) {
-		dev_err(&pdev->dev, "Buffer allocate fail, ret =%d\n", ret);
-		return  ret;
-	}
 
 	ret = hclge_config_tso(hdev, HCLGE_TSO_MSS_MIN, HCLGE_TSO_MSS_MAX);
 	if (ret) {
@@ -5400,6 +5518,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 		return ret;
 	}
 
+	hclge_rss_init_cfg(hdev);
 	ret = hclge_rss_init_hw(hdev);
 	if (ret) {
 		dev_err(&pdev->dev, "Rss init fail, ret =%d\n", ret);
@@ -5486,12 +5605,6 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev)
 		return ret;
 	}
 
-	ret = hclge_buffer_alloc(hdev);
-	if (ret) {
-		dev_err(&pdev->dev, "Buffer allocate fail, ret =%d\n", ret);
-		return ret;
-	}
-
 	ret = hclge_config_tso(hdev, HCLGE_TSO_MSS_MIN, HCLGE_TSO_MSS_MAX);
 	if (ret) {
 		dev_err(&pdev->dev, "Enable tso fail, ret =%d\n", ret);
@@ -5504,9 +5617,9 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev)
 		return ret;
 	}
 
-	ret = hclge_tm_schd_init(hdev);
+	ret = hclge_tm_init_hw(hdev);
 	if (ret) {
-		dev_err(&pdev->dev, "tm schd init fail, ret =%d\n", ret);
+		dev_err(&pdev->dev, "tm init hw fail, ret =%d\n", ret);
 		return ret;
 	}
 
@@ -5997,6 +6110,42 @@ static int hclge_update_led_status(struct hclge_dev *hdev)
 					HCLGE_LED_NO_CHANGE);
 }
 
+static void hclge_get_link_mode(struct hnae3_handle *handle,
+				unsigned long *supported,
+				unsigned long *advertising)
+{
+	unsigned int size = BITS_TO_LONGS(__ETHTOOL_LINK_MODE_MASK_NBITS);
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	unsigned int idx = 0;
+
+	for (; idx < size; idx++) {
+		supported[idx] = hdev->hw.mac.supported[idx];
+		advertising[idx] = hdev->hw.mac.advertising[idx];
+	}
+}
+
+static void hclge_get_port_type(struct hnae3_handle *handle,
+				u8 *port_type)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	u8 media_type = hdev->hw.mac.media_type;
+
+	switch (media_type) {
+	case HNAE3_MEDIA_TYPE_FIBER:
+		*port_type = PORT_FIBRE;
+		break;
+	case HNAE3_MEDIA_TYPE_COPPER:
+		*port_type = PORT_TP;
+		break;
+	case HNAE3_MEDIA_TYPE_UNKNOWN:
+	default:
+		*port_type = PORT_OTHER;
+		break;
+	}
+}
+
 static const struct hnae3_ae_ops hclge_ops = {
 	.init_ae_dev = hclge_init_ae_dev,
 	.uninit_ae_dev = hclge_uninit_ae_dev,
@@ -6005,6 +6154,7 @@ static const struct hnae3_ae_ops hclge_ops = {
 	.map_ring_to_vector = hclge_map_ring_to_vector,
 	.unmap_ring_from_vector = hclge_unmap_ring_frm_vector,
 	.get_vector = hclge_get_vector,
+	.put_vector = hclge_put_vector,
 	.set_promisc_mode = hclge_set_promisc_mode,
 	.set_loopback = hclge_set_loopback,
 	.start = hclge_ae_start,
@@ -6051,6 +6201,8 @@ static const struct hnae3_ae_ops hclge_ops = {
 	.get_regs_len = hclge_get_regs_len,
 	.get_regs = hclge_get_regs,
 	.set_led_id = hclge_set_led_id,
+	.get_link_mode = hclge_get_link_mode,
+	.get_port_type = hclge_get_port_type,
 };
 
 static struct hnae3_ae_algo ae_algo = {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index d99a76a9557c..0f4157e71282 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -106,6 +106,12 @@
 #define HCLGE_MAC_MIN_FRAME		64
 #define HCLGE_MAC_MAX_FRAME		9728
 
+#define HCLGE_SUPPORT_1G_BIT		BIT(0)
+#define HCLGE_SUPPORT_10G_BIT		BIT(1)
+#define HCLGE_SUPPORT_25G_BIT		BIT(2)
+#define HCLGE_SUPPORT_50G_BIT		BIT(3)
+#define HCLGE_SUPPORT_100G_BIT		BIT(4)
+
 enum HCLGE_DEV_STATE {
 	HCLGE_STATE_REINITING,
 	HCLGE_STATE_DOWN,
@@ -170,6 +176,8 @@ struct hclge_mac {
 	struct phy_device *phydev;
 	struct mii_bus *mdio_bus;
 	phy_interface_t phy_if;
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(supported);
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(advertising);
 };
 
 struct hclge_hw {
@@ -236,6 +244,7 @@ struct hclge_cfg {
 	u8 mac_addr[ETH_ALEN];
 	u8 default_speed;
 	u32 numa_node_map;
+	u8 speed_ability;
 };
 
 struct hclge_tm_info {
@@ -573,12 +582,27 @@ struct hclge_rx_vtag_cfg {
 	bool vlan2_vlan_prionly;/* Outer VLAN Tag up to descriptor Enable */
 };
 
+struct hclge_rss_tuple_cfg {
+	u8 ipv4_tcp_en;
+	u8 ipv4_udp_en;
+	u8 ipv4_sctp_en;
+	u8 ipv4_fragment_en;
+	u8 ipv6_tcp_en;
+	u8 ipv6_udp_en;
+	u8 ipv6_sctp_en;
+	u8 ipv6_fragment_en;
+};
+
 struct hclge_vport {
 	u16 alloc_tqps;	/* Allocated Tx/Rx queues */
 
 	u8  rss_hash_key[HCLGE_RSS_KEY_SIZE]; /* User configured hash keys */
 	/* User configured lookup table entries */
 	u8  rss_indirection_tbl[HCLGE_RSS_IND_TBL_SIZE];
+	int rss_algo;		/* User configured hash algorithm */
+	/* User configured rss tuple sets */
+	struct hclge_rss_tuple_cfg rss_tuple_sets;
+
 	u16 alloc_rss_size;
 
 	u16 qs_offset;
@@ -627,8 +651,11 @@ int hclge_set_vf_vlan_common(struct hclge_dev *vport, int vfid,
 
 int hclge_buffer_alloc(struct hclge_dev *hdev);
 int hclge_rss_init_hw(struct hclge_dev *hdev);
+void hclge_rss_indir_init_cfg(struct hclge_dev *hdev);
 
 void hclge_mbx_handler(struct hclge_dev *hdev);
 void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id);
+void hclge_reset_vf_queue(struct hclge_vport *vport, u16 queue_id);
 int hclge_cfg_flowctrl(struct hclge_dev *hdev);
+int hclge_func_reset_cmd(struct hclge_dev *hdev, int func_id);
 #endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index f38fc5ce9f51..39013334a613 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -79,6 +79,18 @@ static int hclge_send_mbx_msg(struct hclge_vport *vport, u8 *msg, u16 msg_len,
 	return status;
 }
 
+int hclge_inform_reset_assert_to_vf(struct hclge_vport *vport)
+{
+	u8 msg_data[2];
+	u8 dest_vfid;
+
+	dest_vfid = (u8)vport->vport_id;
+
+	/* send this requested info to VF */
+	return hclge_send_mbx_msg(vport, msg_data, sizeof(u8),
+				  HCLGE_MBX_ASSERTING_RESET, dest_vfid);
+}
+
 static void hclge_free_vector_ring_chain(struct hnae3_ring_chain_node *head)
 {
 	struct hnae3_ring_chain_node *chain_tmp, *chain;
@@ -105,14 +117,17 @@ static int hclge_get_ring_chain_from_mbx(
 			struct hnae3_ring_chain_node *ring_chain,
 			struct hclge_vport *vport)
 {
-#define HCLGE_RING_NODE_VARIABLE_NUM		3
-#define HCLGE_RING_MAP_MBX_BASIC_MSG_NUM	3
 	struct hnae3_ring_chain_node *cur_chain, *new_chain;
 	int ring_num;
 	int i;
 
 	ring_num = req->msg[2];
 
+	if (ring_num > ((HCLGE_MBX_VF_MSG_DATA_NUM -
+		HCLGE_MBX_RING_MAP_BASIC_MSG_NUM) /
+		HCLGE_MBX_RING_NODE_VARIABLE_NUM))
+		return -ENOMEM;
+
 	hnae_set_bit(ring_chain->flag, HNAE3_RING_TYPE_B, req->msg[3]);
 	ring_chain->tqp_index =
 			hclge_get_queue_id(vport->nic.kinfo.tqp[req->msg[4]]);
@@ -128,18 +143,18 @@ static int hclge_get_ring_chain_from_mbx(
 			goto err;
 
 		hnae_set_bit(new_chain->flag, HNAE3_RING_TYPE_B,
-			     req->msg[HCLGE_RING_NODE_VARIABLE_NUM * i +
-			     HCLGE_RING_MAP_MBX_BASIC_MSG_NUM]);
+			     req->msg[HCLGE_MBX_RING_NODE_VARIABLE_NUM * i +
+			     HCLGE_MBX_RING_MAP_BASIC_MSG_NUM]);
 
 		new_chain->tqp_index =
 		hclge_get_queue_id(vport->nic.kinfo.tqp
-			[req->msg[HCLGE_RING_NODE_VARIABLE_NUM * i +
-			HCLGE_RING_MAP_MBX_BASIC_MSG_NUM + 1]]);
+			[req->msg[HCLGE_MBX_RING_NODE_VARIABLE_NUM * i +
+			HCLGE_MBX_RING_MAP_BASIC_MSG_NUM + 1]]);
 
 		hnae_set_field(new_chain->int_gl_idx, HCLGE_INT_GL_IDX_M,
 			       HCLGE_INT_GL_IDX_S,
-			       req->msg[HCLGE_RING_NODE_VARIABLE_NUM * i +
-			       HCLGE_RING_MAP_MBX_BASIC_MSG_NUM + 2]);
+			       req->msg[HCLGE_MBX_RING_NODE_VARIABLE_NUM * i +
+			       HCLGE_MBX_RING_MAP_BASIC_MSG_NUM + 2]);
 
 		cur_chain->next = new_chain;
 		cur_chain = new_chain;
@@ -196,6 +211,8 @@ static int hclge_set_vf_uc_mac_addr(struct hclge_vport *vport,
 
 		hclge_rm_uc_addr_common(vport, old_addr);
 		status = hclge_add_uc_addr_common(vport, mac_addr);
+		if (status)
+			hclge_add_uc_addr_common(vport, old_addr);
 	} else if (mbx_req->msg[1] == HCLGE_MBX_MAC_VLAN_UC_ADD) {
 		status = hclge_add_uc_addr_common(vport, mac_addr);
 	} else if (mbx_req->msg[1] == HCLGE_MBX_MAC_VLAN_UC_REMOVE) {
@@ -291,7 +308,7 @@ static int hclge_get_vf_queue_info(struct hclge_vport *vport,
 
 	/* get the queue related info */
 	memcpy(&resp_data[0], &vport->alloc_tqps, sizeof(u16));
-	memcpy(&resp_data[2], &hdev->rss_size_max, sizeof(u16));
+	memcpy(&resp_data[2], &vport->nic.kinfo.rss_size, sizeof(u16));
 	memcpy(&resp_data[4], &hdev->num_desc, sizeof(u16));
 	memcpy(&resp_data[6], &hdev->rx_buf_len, sizeof(u16));
 
@@ -304,27 +321,61 @@ static int hclge_get_link_info(struct hclge_vport *vport,
 {
 	struct hclge_dev *hdev = vport->back;
 	u16 link_status;
-	u8 msg_data[2];
+	u8 msg_data[8];
 	u8 dest_vfid;
+	u16 duplex;
 
 	/* mac.link can only be 0 or 1 */
 	link_status = (u16)hdev->hw.mac.link;
+	duplex = hdev->hw.mac.duplex;
 	memcpy(&msg_data[0], &link_status, sizeof(u16));
+	memcpy(&msg_data[2], &hdev->hw.mac.speed, sizeof(u32));
+	memcpy(&msg_data[6], &duplex, sizeof(u16));
 	dest_vfid = mbx_req->mbx_src_vfid;
 
 	/* send this requested info to VF */
-	return hclge_send_mbx_msg(vport, msg_data, sizeof(u8),
+	return hclge_send_mbx_msg(vport, msg_data, sizeof(msg_data),
 				  HCLGE_MBX_LINK_STAT_CHANGE, dest_vfid);
 }
 
-static void hclge_reset_vf_queue(struct hclge_vport *vport,
-				 struct hclge_mbx_vf_to_pf_cmd *mbx_req)
+static void hclge_mbx_reset_vf_queue(struct hclge_vport *vport,
+				     struct hclge_mbx_vf_to_pf_cmd *mbx_req)
 {
 	u16 queue_id;
 
 	memcpy(&queue_id, &mbx_req->msg[2], sizeof(queue_id));
 
-	hclge_reset_tqp(&vport->nic, queue_id);
+	hclge_reset_vf_queue(vport, queue_id);
+
+	/* send response msg to VF after queue reset complete*/
+	hclge_gen_resp_to_vf(vport, mbx_req, 0, NULL, 0);
+}
+
+static void hclge_reset_vf(struct hclge_vport *vport,
+			   struct hclge_mbx_vf_to_pf_cmd *mbx_req)
+{
+	struct hclge_dev *hdev = vport->back;
+	int ret;
+
+	dev_warn(&hdev->pdev->dev, "PF received VF reset request from VF %d!",
+		 mbx_req->mbx_src_vfid);
+
+	/* Acknowledge VF that PF is now about to assert the reset for the VF.
+	 * On receiving this message VF will get into pending state and will
+	 * start polling for the hardware reset completion status.
+	 */
+	ret = hclge_inform_reset_assert_to_vf(vport);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"PF fail(%d) to inform VF(%d)of reset, reset failed!\n",
+			ret, vport->vport_id);
+		return;
+	}
+
+	dev_warn(&hdev->pdev->dev, "PF is now resetting VF %d.\n",
+		 mbx_req->mbx_src_vfid);
+	/* reset this virtual function */
+	hclge_func_reset_cmd(hdev, mbx_req->mbx_src_vfid);
 }
 
 void hclge_mbx_handler(struct hclge_dev *hdev)
@@ -333,11 +384,11 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
 	struct hclge_mbx_vf_to_pf_cmd *req;
 	struct hclge_vport *vport;
 	struct hclge_desc *desc;
-	int ret;
+	int ret, flag;
 
+	flag = le16_to_cpu(crq->desc[crq->next_to_use].flag);
 	/* handle all the mailbox requests in the queue */
-	while (hnae_get_bit(crq->desc[crq->next_to_use].flag,
-			    HCLGE_CMDQ_RX_OUTVLD_B)) {
+	while (hnae_get_bit(flag, HCLGE_CMDQ_RX_OUTVLD_B)) {
 		desc = &crq->desc[crq->next_to_use];
 		req = (struct hclge_mbx_vf_to_pf_cmd *)desc->data;
 
@@ -360,7 +411,7 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
 					ret);
 			break;
 		case HCLGE_MBX_SET_UNICAST:
-			ret = hclge_set_vf_uc_mac_addr(vport, req, false);
+			ret = hclge_set_vf_uc_mac_addr(vport, req, true);
 			if (ret)
 				dev_err(&hdev->pdev->dev,
 					"PF fail(%d) to set VF UC MAC Addr\n",
@@ -402,7 +453,10 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
 					ret);
 			break;
 		case HCLGE_MBX_QUEUE_RESET:
-			hclge_reset_vf_queue(vport, req);
+			hclge_mbx_reset_vf_queue(vport, req);
+			break;
+		case HCLGE_MBX_RESET:
+			hclge_reset_vf(vport, req);
 			break;
 		default:
 			dev_err(&hdev->pdev->dev,
@@ -410,7 +464,9 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
 				req->msg[0]);
 			break;
 		}
+		crq->desc[crq->next_to_use].flag = 0;
 		hclge_mbx_ring_ptr_move_crq(crq);
+		flag = le16_to_cpu(crq->desc[crq->next_to_use].flag);
 	}
 
 	/* Write back CMDQ_RQ header pointer, M7 need this pointer */
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index 36bd79a77940..885f25cd7be4 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -23,6 +23,9 @@ enum hclge_shaper_level {
 	HCLGE_SHAPER_LVL_PF	= 1,
 };
 
+#define HCLGE_TM_PFC_PKT_GET_CMD_NUM	3
+#define HCLGE_TM_PFC_NUM_GET_PER_CMD	3
+
 #define HCLGE_SHAPER_BS_U_DEF	5
 #define HCLGE_SHAPER_BS_S_DEF	20
 
@@ -112,6 +115,56 @@ static int hclge_shaper_para_calc(u32 ir, u8 shaper_level,
 	return 0;
 }
 
+static int hclge_pfc_stats_get(struct hclge_dev *hdev,
+			       enum hclge_opcode_type opcode, u64 *stats)
+{
+	struct hclge_desc desc[HCLGE_TM_PFC_PKT_GET_CMD_NUM];
+	int ret, i, j;
+
+	if (!(opcode == HCLGE_OPC_QUERY_PFC_RX_PKT_CNT ||
+	      opcode == HCLGE_OPC_QUERY_PFC_TX_PKT_CNT))
+		return -EINVAL;
+
+	for (i = 0; i < HCLGE_TM_PFC_PKT_GET_CMD_NUM; i++) {
+		hclge_cmd_setup_basic_desc(&desc[i], opcode, true);
+		if (i != (HCLGE_TM_PFC_PKT_GET_CMD_NUM - 1))
+			desc[i].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+		else
+			desc[i].flag &= ~cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+	}
+
+	ret = hclge_cmd_send(&hdev->hw, desc, HCLGE_TM_PFC_PKT_GET_CMD_NUM);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Get pfc pause stats fail, ret = %d.\n", ret);
+		return ret;
+	}
+
+	for (i = 0; i < HCLGE_TM_PFC_PKT_GET_CMD_NUM; i++) {
+		struct hclge_pfc_stats_cmd *pfc_stats =
+				(struct hclge_pfc_stats_cmd *)desc[i].data;
+
+		for (j = 0; j < HCLGE_TM_PFC_NUM_GET_PER_CMD; j++) {
+			u32 index = i * HCLGE_TM_PFC_PKT_GET_CMD_NUM + j;
+
+			if (index < HCLGE_MAX_TC_NUM)
+				stats[index] =
+					le64_to_cpu(pfc_stats->pkt_num[j]);
+		}
+	}
+	return 0;
+}
+
+int hclge_pfc_rx_stats_get(struct hclge_dev *hdev, u64 *stats)
+{
+	return hclge_pfc_stats_get(hdev, HCLGE_OPC_QUERY_PFC_RX_PKT_CNT, stats);
+}
+
+int hclge_pfc_tx_stats_get(struct hclge_dev *hdev, u64 *stats)
+{
+	return hclge_pfc_stats_get(hdev, HCLGE_OPC_QUERY_PFC_TX_PKT_CNT, stats);
+}
+
 int hclge_mac_pause_en_cfg(struct hclge_dev *hdev, bool tx, bool rx)
 {
 	struct hclge_desc desc;
@@ -138,8 +191,8 @@ static int hclge_pfc_pause_en_cfg(struct hclge_dev *hdev, u8 tx_rx_bitmap,
 	return hclge_cmd_send(&hdev->hw, &desc, 1);
 }
 
-static int hclge_mac_pause_param_cfg(struct hclge_dev *hdev, const u8 *addr,
-				     u8 pause_trans_gap, u16 pause_trans_time)
+static int hclge_pause_param_cfg(struct hclge_dev *hdev, const u8 *addr,
+				 u8 pause_trans_gap, u16 pause_trans_time)
 {
 	struct hclge_cfg_pause_param_cmd *pause_param;
 	struct hclge_desc desc;
@@ -155,7 +208,7 @@ static int hclge_mac_pause_param_cfg(struct hclge_dev *hdev, const u8 *addr,
 	return hclge_cmd_send(&hdev->hw, &desc, 1);
 }
 
-int hclge_mac_pause_addr_cfg(struct hclge_dev *hdev, const u8 *mac_addr)
+int hclge_pause_addr_cfg(struct hclge_dev *hdev, const u8 *mac_addr)
 {
 	struct hclge_cfg_pause_param_cmd *pause_param;
 	struct hclge_desc desc;
@@ -174,7 +227,7 @@ int hclge_mac_pause_addr_cfg(struct hclge_dev *hdev, const u8 *mac_addr)
 	trans_gap = pause_param->pause_trans_gap;
 	trans_time = le16_to_cpu(pause_param->pause_trans_time);
 
-	return hclge_mac_pause_param_cfg(hdev, mac_addr, trans_gap,
+	return hclge_pause_param_cfg(hdev, mac_addr, trans_gap,
 					 trans_time);
 }
 
@@ -1096,11 +1149,11 @@ static int hclge_tm_schd_setup_hw(struct hclge_dev *hdev)
 	return hclge_tm_schd_mode_hw(hdev);
 }
 
-static int hclge_mac_pause_param_setup_hw(struct hclge_dev *hdev)
+static int hclge_pause_param_setup_hw(struct hclge_dev *hdev)
 {
 	struct hclge_mac *mac = &hdev->hw.mac;
 
-	return hclge_mac_pause_param_cfg(hdev, mac->mac_addr,
+	return hclge_pause_param_cfg(hdev, mac->mac_addr,
 					 HCLGE_DEFAULT_PAUSE_TRANS_GAP,
 					 HCLGE_DEFAULT_PAUSE_TRANS_TIME);
 }
@@ -1151,13 +1204,12 @@ int hclge_pause_setup_hw(struct hclge_dev *hdev)
 	int ret;
 	u8 i;
 
-	if (hdev->tm_info.fc_mode != HCLGE_FC_PFC) {
-		ret = hclge_mac_pause_setup_hw(hdev);
-		if (ret)
-			return ret;
+	ret = hclge_pause_param_setup_hw(hdev);
+	if (ret)
+		return ret;
 
-		return hclge_mac_pause_param_setup_hw(hdev);
-	}
+	if (hdev->tm_info.fc_mode != HCLGE_FC_PFC)
+		return hclge_mac_pause_setup_hw(hdev);
 
 	/* Only DCB-supported dev supports qset back pressure and pfc cmd */
 	if (!hnae3_dev_dcb_supported(hdev))
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
index 5401e7559437..2dbe177581e9 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
@@ -109,6 +109,10 @@ struct hclge_cfg_pause_param_cmd {
 	__le16 pause_trans_time;
 };
 
+struct hclge_pfc_stats_cmd {
+	__le64 pkt_num[3];
+};
+
 struct hclge_port_shapping_cmd {
 	__le32 port_shapping_para;
 };
@@ -129,5 +133,7 @@ int hclge_tm_dwrr_cfg(struct hclge_dev *hdev);
 int hclge_tm_map_cfg(struct hclge_dev *hdev);
 int hclge_tm_init_hw(struct hclge_dev *hdev);
 int hclge_mac_pause_en_cfg(struct hclge_dev *hdev, bool tx, bool rx);
-int hclge_mac_pause_addr_cfg(struct hclge_dev *hdev, const u8 *mac_addr);
+int hclge_pause_addr_cfg(struct hclge_dev *hdev, const u8 *mac_addr);
+int hclge_pfc_rx_stats_get(struct hclge_dev *hdev, u64 *stats);
+int hclge_pfc_tx_stats_get(struct hclge_dev *hdev, u64 *stats);
 #endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
index 85985e731311..1bbfe131b596 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
@@ -315,6 +315,12 @@ int hclgevf_cmd_init(struct hclgevf_dev *hdev)
 		goto err_csq;
 	}
 
+	/* initialize the pointers of async rx queue of mailbox */
+	hdev->arq.hdev = hdev;
+	hdev->arq.head = 0;
+	hdev->arq.tail = 0;
+	hdev->arq.count = 0;
+
 	/* get firmware version */
 	ret = hclgevf_cmd_query_firmware_version(&hdev->hw, &version);
 	if (ret) {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h
index 2caca9317f8c..621c6cbacf76 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h
@@ -7,7 +7,7 @@
 #include <linux/types.h>
 #include "hnae3.h"
 
-#define HCLGEVF_CMDQ_TX_TIMEOUT		200
+#define HCLGEVF_CMDQ_TX_TIMEOUT		30000
 #define HCLGEVF_CMDQ_RX_INVLD_B		0
 #define HCLGEVF_CMDQ_RX_OUTVLD_B	1
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 0d89965f7928..2b8426412cc9 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -2,6 +2,7 @@
 // Copyright (c) 2016-2017 Hisilicon Limited.
 
 #include <linux/etherdevice.h>
+#include <net/rtnetlink.h>
 #include "hclgevf_cmd.h"
 #include "hclgevf_main.h"
 #include "hclge_mbx.h"
@@ -9,6 +10,8 @@
 
 #define HCLGEVF_NAME	"hclgevf"
 
+static int hclgevf_init_hdev(struct hclgevf_dev *hdev);
+static void hclgevf_uninit_hdev(struct hclgevf_dev *hdev);
 static struct hnae3_ae_algo ae_algovf;
 
 static const struct pci_device_id ae_algovf_pci_tbl[] = {
@@ -18,6 +21,8 @@ static const struct pci_device_id ae_algovf_pci_tbl[] = {
 	{0, }
 };
 
+MODULE_DEVICE_TABLE(pci, ae_algovf_pci_tbl);
+
 static inline struct hclgevf_dev *hclgevf_ae_get_hdev(
 	struct hnae3_handle *handle)
 {
@@ -206,6 +211,12 @@ static int hclgevf_alloc_tqps(struct hclgevf_dev *hdev)
 	struct hclgevf_tqp *tqp;
 	int i;
 
+	/* if this is on going reset then we need to re-allocate the TPQs
+	 * since we cannot assume we would get same number of TPQs back from PF
+	 */
+	if (hclgevf_dev_ongoing_reset(hdev))
+		devm_kfree(&hdev->pdev->dev, hdev->htqp);
+
 	hdev->htqp = devm_kcalloc(&hdev->pdev->dev, hdev->num_tqps,
 				  sizeof(struct hclgevf_tqp), GFP_KERNEL);
 	if (!hdev->htqp)
@@ -249,6 +260,12 @@ static int hclgevf_knic_setup(struct hclgevf_dev *hdev)
 	new_tqps = kinfo->rss_size * kinfo->num_tc;
 	kinfo->num_tqps = min(new_tqps, hdev->num_tqps);
 
+	/* if this is on going reset then we need to re-allocate the hnae queues
+	 * as well since number of TPQs from PF might have changed.
+	 */
+	if (hclgevf_dev_ongoing_reset(hdev))
+		devm_kfree(&hdev->pdev->dev, kinfo->tqp);
+
 	kinfo->tqp = devm_kcalloc(&hdev->pdev->dev, kinfo->num_tqps,
 				  sizeof(struct hnae3_queue *), GFP_KERNEL);
 	if (!kinfo->tqp)
@@ -533,13 +550,11 @@ static int hclgevf_bind_ring_to_vector(struct hnae3_handle *handle, bool en,
 				       int vector,
 				       struct hnae3_ring_chain_node *ring_chain)
 {
-#define HCLGEVF_RING_NODE_VARIABLE_NUM		3
-#define HCLGEVF_RING_MAP_MBX_BASIC_MSG_NUM	3
 	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
 	struct hnae3_ring_chain_node *node;
 	struct hclge_mbx_vf_to_pf_cmd *req;
 	struct hclgevf_desc desc;
-	int i, vector_id;
+	int i = 0, vector_id;
 	int status;
 	u8 type;
 
@@ -551,28 +566,33 @@ static int hclgevf_bind_ring_to_vector(struct hnae3_handle *handle, bool en,
 		return vector_id;
 	}
 
-	hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_MBX_VF_TO_PF, false);
-	type = en ?
-		HCLGE_MBX_MAP_RING_TO_VECTOR : HCLGE_MBX_UNMAP_RING_TO_VECTOR;
-	req->msg[0] = type;
-	req->msg[1] = vector_id; /* vector_id should be id in VF */
-
-	i = 0;
 	for (node = ring_chain; node; node = node->next) {
-		i++;
-		/* msg[2] is cause num */
-		req->msg[HCLGEVF_RING_NODE_VARIABLE_NUM * i] =
+		int idx_offset = HCLGE_MBX_RING_MAP_BASIC_MSG_NUM +
+					HCLGE_MBX_RING_NODE_VARIABLE_NUM * i;
+
+		if (i == 0) {
+			hclgevf_cmd_setup_basic_desc(&desc,
+						     HCLGEVF_OPC_MBX_VF_TO_PF,
+						     false);
+			type = en ?
+				HCLGE_MBX_MAP_RING_TO_VECTOR :
+				HCLGE_MBX_UNMAP_RING_TO_VECTOR;
+			req->msg[0] = type;
+			req->msg[1] = vector_id;
+		}
+
+		req->msg[idx_offset] =
 				hnae_get_bit(node->flag, HNAE3_RING_TYPE_B);
-		req->msg[HCLGEVF_RING_NODE_VARIABLE_NUM * i + 1] =
-				node->tqp_index;
-		req->msg[HCLGEVF_RING_NODE_VARIABLE_NUM * i + 2] =
-				hnae_get_field(node->int_gl_idx,
-					       HNAE3_RING_GL_IDX_M,
-					       HNAE3_RING_GL_IDX_S);
-
-		if (i == (HCLGE_MBX_VF_MSG_DATA_NUM -
-		    HCLGEVF_RING_MAP_MBX_BASIC_MSG_NUM) /
-		    HCLGEVF_RING_NODE_VARIABLE_NUM) {
+		req->msg[idx_offset + 1] = node->tqp_index;
+		req->msg[idx_offset + 2] = hnae_get_field(node->int_gl_idx,
+							  HNAE3_RING_GL_IDX_M,
+							  HNAE3_RING_GL_IDX_S);
+
+		i++;
+		if ((i == (HCLGE_MBX_VF_MSG_DATA_NUM -
+		     HCLGE_MBX_RING_MAP_BASIC_MSG_NUM) /
+		     HCLGE_MBX_RING_NODE_VARIABLE_NUM) ||
+		    !node->next) {
 			req->msg[2] = i;
 
 			status = hclgevf_cmd_send(&hdev->hw, &desc, 1);
@@ -591,17 +611,6 @@ static int hclgevf_bind_ring_to_vector(struct hnae3_handle *handle, bool en,
 		}
 	}
 
-	if (i > 0) {
-		req->msg[2] = i;
-
-		status = hclgevf_cmd_send(&hdev->hw, &desc, 1);
-		if (status) {
-			dev_err(&hdev->pdev->dev,
-				"Map TQP fail, status is %d.\n", status);
-			return status;
-		}
-	}
-
 	return 0;
 }
 
@@ -627,13 +636,18 @@ static int hclgevf_unmap_ring_from_vector(
 	}
 
 	ret = hclgevf_bind_ring_to_vector(handle, false, vector, ring_chain);
-	if (ret) {
+	if (ret)
 		dev_err(&handle->pdev->dev,
 			"Unmap ring from vector fail. vector=%d, ret =%d\n",
 			vector_id,
 			ret);
-		return ret;
-	}
+
+	return ret;
+}
+
+static int hclgevf_put_vector(struct hnae3_handle *handle, int vector)
+{
+	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
 
 	hclgevf_free_vector(hdev, vector);
 
@@ -729,21 +743,25 @@ static void hclgevf_get_mac_addr(struct hnae3_handle *handle, u8 *p)
 	ether_addr_copy(p, hdev->hw.mac.mac_addr);
 }
 
-static int hclgevf_set_mac_addr(struct hnae3_handle *handle, void *p)
+static int hclgevf_set_mac_addr(struct hnae3_handle *handle, void *p,
+				bool is_first)
 {
 	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
 	u8 *old_mac_addr = (u8 *)hdev->hw.mac.mac_addr;
 	u8 *new_mac_addr = (u8 *)p;
 	u8 msg_data[ETH_ALEN * 2];
+	u16 subcode;
 	int status;
 
 	ether_addr_copy(msg_data, new_mac_addr);
 	ether_addr_copy(&msg_data[ETH_ALEN], old_mac_addr);
 
+	subcode = is_first ? HCLGE_MBX_MAC_VLAN_UC_ADD :
+			HCLGE_MBX_MAC_VLAN_UC_MODIFY;
+
 	status = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_UNICAST,
-				      HCLGE_MBX_MAC_VLAN_UC_MODIFY,
-				      msg_data, ETH_ALEN * 2,
-				      false, NULL, 0);
+				      subcode, msg_data, ETH_ALEN * 2,
+				      true, NULL, 0);
 	if (!status)
 		ether_addr_copy(hdev->hw.mac.mac_addr, new_mac_addr);
 
@@ -816,11 +834,149 @@ static void hclgevf_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
 {
 	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
 	u8 msg_data[2];
+	int ret;
 
 	memcpy(&msg_data[0], &queue_id, sizeof(queue_id));
 
-	hclgevf_send_mbx_msg(hdev, HCLGE_MBX_QUEUE_RESET, 0, msg_data, 2, false,
-			     NULL, 0);
+	/* disable vf queue before send queue reset msg to PF */
+	ret = hclgevf_tqp_enable(hdev, queue_id, 0, false);
+	if (ret)
+		return;
+
+	hclgevf_send_mbx_msg(hdev, HCLGE_MBX_QUEUE_RESET, 0, msg_data,
+			     2, true, NULL, 0);
+}
+
+static int hclgevf_notify_client(struct hclgevf_dev *hdev,
+				 enum hnae3_reset_notify_type type)
+{
+	struct hnae3_client *client = hdev->nic_client;
+	struct hnae3_handle *handle = &hdev->nic;
+
+	if (!client->ops->reset_notify)
+		return -EOPNOTSUPP;
+
+	return client->ops->reset_notify(handle, type);
+}
+
+static int hclgevf_reset_wait(struct hclgevf_dev *hdev)
+{
+#define HCLGEVF_RESET_WAIT_MS	500
+#define HCLGEVF_RESET_WAIT_CNT	20
+	u32 val, cnt = 0;
+
+	/* wait to check the hardware reset completion status */
+	val = hclgevf_read_dev(&hdev->hw, HCLGEVF_FUN_RST_ING);
+	while (hnae_get_bit(val, HCLGEVF_FUN_RST_ING_B) &&
+			    (cnt < HCLGEVF_RESET_WAIT_CNT)) {
+		msleep(HCLGEVF_RESET_WAIT_MS);
+		val = hclgevf_read_dev(&hdev->hw, HCLGEVF_FUN_RST_ING);
+		cnt++;
+	}
+
+	/* hardware completion status should be available by this time */
+	if (cnt >= HCLGEVF_RESET_WAIT_CNT) {
+		dev_warn(&hdev->pdev->dev,
+			 "could'nt get reset done status from h/w, timeout!\n");
+		return -EBUSY;
+	}
+
+	/* we will wait a bit more to let reset of the stack to complete. This
+	 * might happen in case reset assertion was made by PF. Yes, this also
+	 * means we might end up waiting bit more even for VF reset.
+	 */
+	msleep(5000);
+
+	return 0;
+}
+
+static int hclgevf_reset_stack(struct hclgevf_dev *hdev)
+{
+	int ret;
+
+	/* uninitialize the nic client */
+	hclgevf_notify_client(hdev, HNAE3_UNINIT_CLIENT);
+
+	/* re-initialize the hclge device */
+	ret = hclgevf_init_hdev(hdev);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"hclge device re-init failed, VF is disabled!\n");
+		return ret;
+	}
+
+	/* bring up the nic client again */
+	hclgevf_notify_client(hdev, HNAE3_INIT_CLIENT);
+
+	return 0;
+}
+
+static int hclgevf_reset(struct hclgevf_dev *hdev)
+{
+	int ret;
+
+	rtnl_lock();
+
+	/* bring down the nic to stop any ongoing TX/RX */
+	hclgevf_notify_client(hdev, HNAE3_DOWN_CLIENT);
+
+	/* check if VF could successfully fetch the hardware reset completion
+	 * status from the hardware
+	 */
+	ret = hclgevf_reset_wait(hdev);
+	if (ret) {
+		/* can't do much in this situation, will disable VF */
+		dev_err(&hdev->pdev->dev,
+			"VF failed(=%d) to fetch H/W reset completion status\n",
+			ret);
+
+		dev_warn(&hdev->pdev->dev, "VF reset failed, disabling VF!\n");
+		hclgevf_notify_client(hdev, HNAE3_UNINIT_CLIENT);
+
+		rtnl_unlock();
+		return ret;
+	}
+
+	/* now, re-initialize the nic client and ae device*/
+	ret = hclgevf_reset_stack(hdev);
+	if (ret)
+		dev_err(&hdev->pdev->dev, "failed to reset VF stack\n");
+
+	/* bring up the nic to enable TX/RX again */
+	hclgevf_notify_client(hdev, HNAE3_UP_CLIENT);
+
+	rtnl_unlock();
+
+	return ret;
+}
+
+static int hclgevf_do_reset(struct hclgevf_dev *hdev)
+{
+	int status;
+	u8 respmsg;
+
+	status = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_RESET, 0, NULL,
+				      0, false, &respmsg, sizeof(u8));
+	if (status)
+		dev_err(&hdev->pdev->dev,
+			"VF reset request to PF failed(=%d)\n", status);
+
+	return status;
+}
+
+static void hclgevf_reset_event(struct hnae3_handle *handle)
+{
+	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+
+	dev_info(&hdev->pdev->dev, "received reset request from VF enet\n");
+
+	handle->reset_level = HNAE3_VF_RESET;
+
+	/* reset of this VF requested */
+	set_bit(HCLGEVF_RESET_REQUESTED, &hdev->reset_state);
+	hclgevf_reset_task_schedule(hdev);
+
+	handle->last_reset_time = jiffies;
 }
 
 static u32 hclgevf_get_fw_version(struct hnae3_handle *handle)
@@ -845,10 +1001,22 @@ static void hclgevf_get_misc_vector(struct hclgevf_dev *hdev)
 	hdev->num_msi_used += 1;
 }
 
-static void hclgevf_mbx_task_schedule(struct hclgevf_dev *hdev)
+void hclgevf_reset_task_schedule(struct hclgevf_dev *hdev)
+{
+	if (!test_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state) &&
+	    !test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) {
+		set_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state);
+		schedule_work(&hdev->rst_service_task);
+	}
+}
+
+void hclgevf_mbx_task_schedule(struct hclgevf_dev *hdev)
 {
-	if (!test_and_set_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state))
+	if (!test_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state) &&
+	    !test_bit(HCLGEVF_STATE_MBX_HANDLING, &hdev->state)) {
+		set_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state);
 		schedule_work(&hdev->mbx_service_task);
+	}
 }
 
 static void hclgevf_task_schedule(struct hclgevf_dev *hdev)
@@ -858,6 +1026,16 @@ static void hclgevf_task_schedule(struct hclgevf_dev *hdev)
 		schedule_work(&hdev->service_task);
 }
 
+static void hclgevf_deferred_task_schedule(struct hclgevf_dev *hdev)
+{
+	/* if we have any pending mailbox event then schedule the mbx task */
+	if (hdev->mbx_event_pending)
+		hclgevf_mbx_task_schedule(hdev);
+
+	if (test_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state))
+		hclgevf_reset_task_schedule(hdev);
+}
+
 static void hclgevf_service_timer(struct timer_list *t)
 {
 	struct hclgevf_dev *hdev = from_timer(hdev, t, service_timer);
@@ -867,6 +1045,75 @@ static void hclgevf_service_timer(struct timer_list *t)
 	hclgevf_task_schedule(hdev);
 }
 
+static void hclgevf_reset_service_task(struct work_struct *work)
+{
+	struct hclgevf_dev *hdev =
+		container_of(work, struct hclgevf_dev, rst_service_task);
+	int ret;
+
+	if (test_and_set_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state))
+		return;
+
+	clear_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state);
+
+	if (test_and_clear_bit(HCLGEVF_RESET_PENDING,
+			       &hdev->reset_state)) {
+		/* PF has initmated that it is about to reset the hardware.
+		 * We now have to poll & check if harware has actually completed
+		 * the reset sequence. On hardware reset completion, VF needs to
+		 * reset the client and ae device.
+		 */
+		hdev->reset_attempts = 0;
+
+		ret = hclgevf_reset(hdev);
+		if (ret)
+			dev_err(&hdev->pdev->dev, "VF stack reset failed.\n");
+	} else if (test_and_clear_bit(HCLGEVF_RESET_REQUESTED,
+				      &hdev->reset_state)) {
+		/* we could be here when either of below happens:
+		 * 1. reset was initiated due to watchdog timeout due to
+		 *    a. IMP was earlier reset and our TX got choked down and
+		 *       which resulted in watchdog reacting and inducing VF
+		 *       reset. This also means our cmdq would be unreliable.
+		 *    b. problem in TX due to other lower layer(example link
+		 *       layer not functioning properly etc.)
+		 * 2. VF reset might have been initiated due to some config
+		 *    change.
+		 *
+		 * NOTE: Theres no clear way to detect above cases than to react
+		 * to the response of PF for this reset request. PF will ack the
+		 * 1b and 2. cases but we will not get any intimation about 1a
+		 * from PF as cmdq would be in unreliable state i.e. mailbox
+		 * communication between PF and VF would be broken.
+		 */
+
+		/* if we are never geting into pending state it means either:
+		 * 1. PF is not receiving our request which could be due to IMP
+		 *    reset
+		 * 2. PF is screwed
+		 * We cannot do much for 2. but to check first we can try reset
+		 * our PCIe + stack and see if it alleviates the problem.
+		 */
+		if (hdev->reset_attempts > 3) {
+			/* prepare for full reset of stack + pcie interface */
+			hdev->nic.reset_level = HNAE3_VF_FULL_RESET;
+
+			/* "defer" schedule the reset task again */
+			set_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state);
+		} else {
+			hdev->reset_attempts++;
+
+			/* request PF for resetting this VF via mailbox */
+			ret = hclgevf_do_reset(hdev);
+			if (ret)
+				dev_warn(&hdev->pdev->dev,
+					 "VF rst fail, stack will call\n");
+		}
+	}
+
+	clear_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state);
+}
+
 static void hclgevf_mailbox_service_task(struct work_struct *work)
 {
 	struct hclgevf_dev *hdev;
@@ -878,7 +1125,7 @@ static void hclgevf_mailbox_service_task(struct work_struct *work)
 
 	clear_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state);
 
-	hclgevf_mbx_handler(hdev);
+	hclgevf_mbx_async_handler(hdev);
 
 	clear_bit(HCLGEVF_STATE_MBX_HANDLING, &hdev->state);
 }
@@ -894,6 +1141,8 @@ static void hclgevf_service_task(struct work_struct *work)
 	 */
 	hclgevf_request_link_info(hdev);
 
+	hclgevf_deferred_task_schedule(hdev);
+
 	clear_bit(HCLGEVF_STATE_SERVICE_SCHED, &hdev->state);
 }
 
@@ -936,8 +1185,7 @@ static irqreturn_t hclgevf_misc_irq_handle(int irq, void *data)
 	if (!hclgevf_check_event_cause(hdev, &clearval))
 		goto skip_sched;
 
-	/* schedule the VF mailbox service task, if not already scheduled */
-	hclgevf_mbx_task_schedule(hdev);
+	hclgevf_mbx_handler(hdev);
 
 	hclgevf_clear_event_cause(hdev, clearval);
 
@@ -959,6 +1207,22 @@ static int hclgevf_configure(struct hclgevf_dev *hdev)
 	return hclgevf_get_tc_info(hdev);
 }
 
+static int hclgevf_alloc_hdev(struct hnae3_ae_dev *ae_dev)
+{
+	struct pci_dev *pdev = ae_dev->pdev;
+	struct hclgevf_dev *hdev = ae_dev->priv;
+
+	hdev = devm_kzalloc(&pdev->dev, sizeof(*hdev), GFP_KERNEL);
+	if (!hdev)
+		return -ENOMEM;
+
+	hdev->pdev = pdev;
+	hdev->ae_dev = ae_dev;
+	ae_dev->priv = hdev;
+
+	return 0;
+}
+
 static int hclgevf_init_roce_base_info(struct hclgevf_dev *hdev)
 {
 	struct hnae3_handle *roce = &hdev->roce;
@@ -1057,10 +1321,17 @@ static void hclgevf_ae_stop(struct hnae3_handle *handle)
 
 	/* reset tqp stats */
 	hclgevf_reset_tqp_stats(handle);
+	del_timer_sync(&hdev->service_timer);
+	cancel_work_sync(&hdev->service_task);
+	hclgevf_update_link_status(hdev, 0);
 }
 
 static void hclgevf_state_init(struct hclgevf_dev *hdev)
 {
+	/* if this is on going reset then skip this initialization */
+	if (hclgevf_dev_ongoing_reset(hdev))
+		return;
+
 	/* setup tasks for the MBX */
 	INIT_WORK(&hdev->mbx_service_task, hclgevf_mailbox_service_task);
 	clear_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state);
@@ -1072,6 +1343,8 @@ static void hclgevf_state_init(struct hclgevf_dev *hdev)
 	INIT_WORK(&hdev->service_task, hclgevf_service_task);
 	clear_bit(HCLGEVF_STATE_SERVICE_SCHED, &hdev->state);
 
+	INIT_WORK(&hdev->rst_service_task, hclgevf_reset_service_task);
+
 	mutex_init(&hdev->mbx_resp.mbx_mutex);
 
 	/* bring the device down */
@@ -1088,6 +1361,8 @@ static void hclgevf_state_uninit(struct hclgevf_dev *hdev)
 		cancel_work_sync(&hdev->service_task);
 	if (hdev->mbx_service_task.func)
 		cancel_work_sync(&hdev->mbx_service_task);
+	if (hdev->rst_service_task.func)
+		cancel_work_sync(&hdev->rst_service_task);
 
 	mutex_destroy(&hdev->mbx_resp.mbx_mutex);
 }
@@ -1098,6 +1373,10 @@ static int hclgevf_init_msi(struct hclgevf_dev *hdev)
 	int vectors;
 	int i;
 
+	/* if this is on going reset then skip this initialization */
+	if (hclgevf_dev_ongoing_reset(hdev))
+		return 0;
+
 	hdev->num_msi = HCLGEVF_MAX_VF_VECTOR_NUM;
 
 	vectors = pci_alloc_irq_vectors(pdev, 1, hdev->num_msi,
@@ -1148,6 +1427,10 @@ static int hclgevf_misc_irq_init(struct hclgevf_dev *hdev)
 {
 	int ret = 0;
 
+	/* if this is on going reset then skip this initialization */
+	if (hclgevf_dev_ongoing_reset(hdev))
+		return 0;
+
 	hclgevf_get_misc_vector(hdev);
 
 	ret = request_irq(hdev->misc_vector.vector_irq, hclgevf_misc_irq_handle,
@@ -1258,6 +1541,14 @@ static int hclgevf_pci_init(struct hclgevf_dev *hdev)
 	struct hclgevf_hw *hw;
 	int ret;
 
+	/* check if we need to skip initialization of pci. This will happen if
+	 * device is undergoing VF reset. Otherwise, we would need to
+	 * re-initialize pci interface again i.e. when device is not going
+	 * through *any* reset or actually undergoing full reset.
+	 */
+	if (hclgevf_dev_ongoing_reset(hdev))
+		return 0;
+
 	ret = pci_enable_device(pdev);
 	if (ret) {
 		dev_err(&pdev->dev, "failed to enable PCI device\n");
@@ -1309,19 +1600,16 @@ static void hclgevf_pci_uninit(struct hclgevf_dev *hdev)
 	pci_set_drvdata(pdev, NULL);
 }
 
-static int hclgevf_init_ae_dev(struct hnae3_ae_dev *ae_dev)
+static int hclgevf_init_hdev(struct hclgevf_dev *hdev)
 {
-	struct pci_dev *pdev = ae_dev->pdev;
-	struct hclgevf_dev *hdev;
+	struct pci_dev *pdev = hdev->pdev;
 	int ret;
 
-	hdev = devm_kzalloc(&pdev->dev, sizeof(*hdev), GFP_KERNEL);
-	if (!hdev)
-		return -ENOMEM;
-
-	hdev->pdev = pdev;
-	hdev->ae_dev = ae_dev;
-	ae_dev->priv = hdev;
+	/* check if device is on-going full reset(i.e. pcie as well) */
+	if (hclgevf_dev_ongoing_full_reset(hdev)) {
+		dev_warn(&pdev->dev, "device is going full reset\n");
+		hclgevf_uninit_hdev(hdev);
+	}
 
 	ret = hclgevf_pci_init(hdev);
 	if (ret) {
@@ -1406,15 +1694,38 @@ err_irq_init:
 	return ret;
 }
 
-static void hclgevf_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
+static void hclgevf_uninit_hdev(struct hclgevf_dev *hdev)
 {
-	struct hclgevf_dev *hdev = ae_dev->priv;
-
 	hclgevf_cmd_uninit(hdev);
 	hclgevf_misc_irq_uninit(hdev);
 	hclgevf_state_uninit(hdev);
 	hclgevf_uninit_msi(hdev);
 	hclgevf_pci_uninit(hdev);
+}
+
+static int hclgevf_init_ae_dev(struct hnae3_ae_dev *ae_dev)
+{
+	struct pci_dev *pdev = ae_dev->pdev;
+	int ret;
+
+	ret = hclgevf_alloc_hdev(ae_dev);
+	if (ret) {
+		dev_err(&pdev->dev, "hclge device allocation failed\n");
+		return ret;
+	}
+
+	ret = hclgevf_init_hdev(ae_dev->priv);
+	if (ret)
+		dev_err(&pdev->dev, "hclge device initialization failed\n");
+
+	return ret;
+}
+
+static void hclgevf_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
+{
+	struct hclgevf_dev *hdev = ae_dev->priv;
+
+	hclgevf_uninit_hdev(hdev);
 	ae_dev->priv = NULL;
 }
 
@@ -1447,6 +1758,43 @@ static void hclgevf_get_channels(struct hnae3_handle *handle,
 	ch->combined_count = hdev->num_tqps;
 }
 
+static void hclgevf_get_tqps_and_rss_info(struct hnae3_handle *handle,
+					  u16 *free_tqps, u16 *max_rss_size)
+{
+	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+
+	*free_tqps = 0;
+	*max_rss_size = hdev->rss_size_max;
+}
+
+static int hclgevf_get_status(struct hnae3_handle *handle)
+{
+	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+
+	return hdev->hw.mac.link;
+}
+
+static void hclgevf_get_ksettings_an_result(struct hnae3_handle *handle,
+					    u8 *auto_neg, u32 *speed,
+					    u8 *duplex)
+{
+	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+
+	if (speed)
+		*speed = hdev->hw.mac.speed;
+	if (duplex)
+		*duplex = hdev->hw.mac.duplex;
+	if (auto_neg)
+		*auto_neg = AUTONEG_DISABLE;
+}
+
+void hclgevf_update_speed_duplex(struct hclgevf_dev *hdev, u32 speed,
+				 u8 duplex)
+{
+	hdev->hw.mac.speed = speed;
+	hdev->hw.mac.duplex = duplex;
+}
+
 static const struct hnae3_ae_ops hclgevf_ops = {
 	.init_ae_dev = hclgevf_init_ae_dev,
 	.uninit_ae_dev = hclgevf_uninit_ae_dev,
@@ -1457,6 +1805,7 @@ static const struct hnae3_ae_ops hclgevf_ops = {
 	.map_ring_to_vector = hclgevf_map_ring_to_vector,
 	.unmap_ring_from_vector = hclgevf_unmap_ring_from_vector,
 	.get_vector = hclgevf_get_vector,
+	.put_vector = hclgevf_put_vector,
 	.reset_queue = hclgevf_reset_tqp,
 	.set_promisc_mode = hclgevf_set_promisc_mode,
 	.get_mac_addr = hclgevf_get_mac_addr,
@@ -1476,7 +1825,11 @@ static const struct hnae3_ae_ops hclgevf_ops = {
 	.get_tc_size = hclgevf_get_tc_size,
 	.get_fw_version = hclgevf_get_fw_version,
 	.set_vlan_filter = hclgevf_set_vlan_filter,
+	.reset_event = hclgevf_reset_event,
 	.get_channels = hclgevf_get_channels,
+	.get_tqps_and_rss_info = hclgevf_get_tqps_and_rss_info,
+	.get_status = hclgevf_get_status,
+	.get_ksettings_an_result = hclgevf_get_ksettings_an_result,
 };
 
 static struct hnae3_ae_algo ae_algovf = {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
index a63bee4a3674..a477a7c36bbd 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
@@ -34,6 +34,9 @@
 #define HCLGEVF_VECTOR0_RX_CMDQ_INT_B	1
 
 #define HCLGEVF_TQP_RESET_TRY_TIMES	10
+/* Reset related Registers */
+#define HCLGEVF_FUN_RST_ING		0x20C00
+#define HCLGEVF_FUN_RST_ING_B		0
 
 #define HCLGEVF_RSS_IND_TBL_SIZE		512
 #define HCLGEVF_RSS_SET_BITMAP_MSK	0xffff
@@ -52,6 +55,8 @@ enum hclgevf_states {
 	HCLGEVF_STATE_DISABLED,
 	/* task states */
 	HCLGEVF_STATE_SERVICE_SCHED,
+	HCLGEVF_STATE_RST_SERVICE_SCHED,
+	HCLGEVF_STATE_RST_HANDLING,
 	HCLGEVF_STATE_MBX_SERVICE_SCHED,
 	HCLGEVF_STATE_MBX_HANDLING,
 };
@@ -61,6 +66,8 @@ enum hclgevf_states {
 struct hclgevf_mac {
 	u8 mac_addr[ETH_ALEN];
 	int link;
+	u8 duplex;
+	u32 speed;
 };
 
 struct hclgevf_hw {
@@ -120,6 +127,11 @@ struct hclgevf_dev {
 	struct hclgevf_rss_cfg rss_cfg;
 	unsigned long state;
 
+#define HCLGEVF_RESET_REQUESTED		0
+#define HCLGEVF_RESET_PENDING		1
+	unsigned long reset_state;	/* requested, pending */
+	u32 reset_attempts;
+
 	u32 fw_version;
 	u16 num_tqps;		/* num task queue pairs of this PF */
 
@@ -140,10 +152,13 @@ struct hclgevf_dev {
 	int *vector_irq;
 
 	bool accept_mta_mc; /* whether to accept mta filter multicast */
+	bool mbx_event_pending;
 	struct hclgevf_mbx_resp_status mbx_resp; /* mailbox response */
+	struct hclgevf_mbx_arq_ring arq; /* mailbox async rx queue */
 
 	struct timer_list service_timer;
 	struct work_struct service_task;
+	struct work_struct rst_service_task;
 	struct work_struct mbx_service_task;
 
 	struct hclgevf_tqp *htqp;
@@ -156,9 +171,29 @@ struct hclgevf_dev {
 	u32 flag;
 };
 
+static inline bool hclgevf_dev_ongoing_reset(struct hclgevf_dev *hdev)
+{
+	return (hdev &&
+		(test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) &&
+		(hdev->nic.reset_level == HNAE3_VF_RESET));
+}
+
+static inline bool hclgevf_dev_ongoing_full_reset(struct hclgevf_dev *hdev)
+{
+	return (hdev &&
+		(test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) &&
+		(hdev->nic.reset_level == HNAE3_VF_FULL_RESET));
+}
+
 int hclgevf_send_mbx_msg(struct hclgevf_dev *hdev, u16 code, u16 subcode,
 			 const u8 *msg_data, u8 msg_len, bool need_resp,
 			 u8 *resp_data, u16 resp_len);
 void hclgevf_mbx_handler(struct hclgevf_dev *hdev);
+void hclgevf_mbx_async_handler(struct hclgevf_dev *hdev);
+
 void hclgevf_update_link_status(struct hclgevf_dev *hdev, int link_state);
+void hclgevf_update_speed_duplex(struct hclgevf_dev *hdev, u32 speed,
+				 u8 duplex);
+void hclgevf_reset_task_schedule(struct hclgevf_dev *hdev);
+void hclgevf_mbx_task_schedule(struct hclgevf_dev *hdev);
 #endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
index e39cad285fa9..a28618428338 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
@@ -54,6 +54,10 @@ static int hclgevf_get_mbx_resp(struct hclgevf_dev *hdev, u16 code0, u16 code1,
 	mbx_resp = &hdev->mbx_resp;
 	r_code0 = (u16)(mbx_resp->origin_mbx_msg >> 16);
 	r_code1 = (u16)(mbx_resp->origin_mbx_msg & 0xff);
+
+	if (mbx_resp->resp_status)
+		return mbx_resp->resp_status;
+
 	if (resp_data)
 		memcpy(resp_data, &mbx_resp->additional_info[0], resp_len);
 
@@ -128,7 +132,8 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
 	struct hclge_mbx_pf_to_vf_cmd *req;
 	struct hclgevf_cmq_ring *crq;
 	struct hclgevf_desc *desc;
-	u16 link_status, flag;
+	u16 *msg_q;
+	u16 flag;
 	u8 *temp;
 	int i;
 
@@ -140,6 +145,12 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
 		desc = &crq->desc[crq->next_to_use];
 		req = (struct hclge_mbx_pf_to_vf_cmd *)desc->data;
 
+		/* synchronous messages are time critical and need preferential
+		 * treatment. Therefore, we need to acknowledge all the sync
+		 * responses as quickly as possible so that waiting tasks do not
+		 * timeout and simultaneously queue the async messages for later
+		 * prcessing in context of mailbox task i.e. the slow path.
+		 */
 		switch (req->msg[0]) {
 		case HCLGE_MBX_PF_VF_RESP:
 			if (resp->received_resp)
@@ -159,10 +170,31 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
 			}
 			break;
 		case HCLGE_MBX_LINK_STAT_CHANGE:
-			link_status = le16_to_cpu(req->msg[1]);
+		case HCLGE_MBX_ASSERTING_RESET:
+			/* set this mbx event as pending. This is required as we
+			 * might loose interrupt event when mbx task is busy
+			 * handling. This shall be cleared when mbx task just
+			 * enters handling state.
+			 */
+			hdev->mbx_event_pending = true;
 
-			/* update upper layer with new link link status */
-			hclgevf_update_link_status(hdev, link_status);
+			/* we will drop the async msg if we find ARQ as full
+			 * and continue with next message
+			 */
+			if (hdev->arq.count >= HCLGE_MBX_MAX_ARQ_MSG_NUM) {
+				dev_warn(&hdev->pdev->dev,
+					 "Async Q full, dropping msg(%d)\n",
+					 req->msg[1]);
+				break;
+			}
+
+			/* tail the async message in arq */
+			msg_q = hdev->arq.msg_q[hdev->arq.tail];
+			memcpy(&msg_q[0], req->msg, HCLGE_MBX_MAX_ARQ_MSG_SIZE);
+			hclge_mbx_tail_ptr_move_arq(hdev->arq);
+			hdev->arq.count++;
+
+			hclgevf_mbx_task_schedule(hdev);
 
 			break;
 		default:
@@ -171,6 +203,7 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
 				req->msg[0]);
 			break;
 		}
+		crq->desc[crq->next_to_use].flag = 0;
 		hclge_mbx_ring_ptr_move_crq(crq);
 		flag = le16_to_cpu(crq->desc[crq->next_to_use].flag);
 	}
@@ -179,3 +212,57 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
 	hclgevf_write_dev(&hdev->hw, HCLGEVF_NIC_CRQ_HEAD_REG,
 			  crq->next_to_use);
 }
+
+void hclgevf_mbx_async_handler(struct hclgevf_dev *hdev)
+{
+	u16 link_status;
+	u16 *msg_q;
+	u8 duplex;
+	u32 speed;
+	u32 tail;
+
+	/* we can safely clear it now as we are at start of the async message
+	 * processing
+	 */
+	hdev->mbx_event_pending = false;
+
+	tail = hdev->arq.tail;
+
+	/* process all the async queue messages */
+	while (tail != hdev->arq.head) {
+		msg_q = hdev->arq.msg_q[hdev->arq.head];
+
+		switch (msg_q[0]) {
+		case HCLGE_MBX_LINK_STAT_CHANGE:
+			link_status = le16_to_cpu(msg_q[1]);
+			memcpy(&speed, &msg_q[2], sizeof(speed));
+			duplex = (u8)le16_to_cpu(msg_q[4]);
+
+			/* update upper layer with new link link status */
+			hclgevf_update_link_status(hdev, link_status);
+			hclgevf_update_speed_duplex(hdev, speed, duplex);
+
+			break;
+		case HCLGE_MBX_ASSERTING_RESET:
+			/* PF has asserted reset hence VF should go in pending
+			 * state and poll for the hardware reset status till it
+			 * has been completely reset. After this stack should
+			 * eventually be re-initialized.
+			 */
+			hdev->nic.reset_level = HNAE3_VF_RESET;
+			set_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state);
+			hclgevf_reset_task_schedule(hdev);
+
+			break;
+		default:
+			dev_err(&hdev->pdev->dev,
+				"fetched unsupported(%d) message from arq\n",
+				msg_q[0]);
+			break;
+		}
+
+		hclge_mbx_head_ptr_move_arq(hdev->arq);
+		hdev->arq.count--;
+		msg_q = hdev->arq.msg_q[hdev->arq.head];
+	}
+}
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 1b3cc8bb0705..5632c030811b 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -90,7 +90,7 @@ MODULE_VERSION(IBMVNIC_DRIVER_VERSION);
 
 static int ibmvnic_version = IBMVNIC_INITIAL_VERSION;
 static int ibmvnic_remove(struct vio_dev *);
-static void release_sub_crqs(struct ibmvnic_adapter *);
+static void release_sub_crqs(struct ibmvnic_adapter *, bool);
 static int ibmvnic_reset_crq(struct ibmvnic_adapter *);
 static int ibmvnic_send_crq_init(struct ibmvnic_adapter *);
 static int ibmvnic_reenable_crq_queue(struct ibmvnic_adapter *);
@@ -111,7 +111,7 @@ static int ibmvnic_poll(struct napi_struct *napi, int data);
 static void send_map_query(struct ibmvnic_adapter *adapter);
 static void send_request_map(struct ibmvnic_adapter *, dma_addr_t, __be32, u8);
 static void send_request_unmap(struct ibmvnic_adapter *, u8);
-static void send_login(struct ibmvnic_adapter *adapter);
+static int send_login(struct ibmvnic_adapter *adapter);
 static void send_cap_queries(struct ibmvnic_adapter *adapter);
 static int init_sub_crqs(struct ibmvnic_adapter *);
 static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter);
@@ -361,14 +361,14 @@ static void release_stats_buffers(struct ibmvnic_adapter *adapter)
 static int init_stats_buffers(struct ibmvnic_adapter *adapter)
 {
 	adapter->tx_stats_buffers =
-				kcalloc(adapter->req_tx_queues,
+				kcalloc(IBMVNIC_MAX_QUEUES,
 					sizeof(struct ibmvnic_tx_queue_stats),
 					GFP_KERNEL);
 	if (!adapter->tx_stats_buffers)
 		return -ENOMEM;
 
 	adapter->rx_stats_buffers =
-				kcalloc(adapter->req_rx_queues,
+				kcalloc(IBMVNIC_MAX_QUEUES,
 					sizeof(struct ibmvnic_rx_queue_stats),
 					GFP_KERNEL);
 	if (!adapter->rx_stats_buffers)
@@ -509,7 +509,7 @@ static int init_rx_pools(struct net_device *netdev)
 		return -1;
 	}
 
-	adapter->num_active_rx_pools = 0;
+	adapter->num_active_rx_pools = rxadd_subcrqs;
 
 	for (i = 0; i < rxadd_subcrqs; i++) {
 		rx_pool = &adapter->rx_pool[i];
@@ -554,41 +554,44 @@ static int init_rx_pools(struct net_device *netdev)
 		rx_pool->next_free = 0;
 	}
 
-	adapter->num_active_rx_pools = rxadd_subcrqs;
+	return 0;
+}
+
+static int reset_one_tx_pool(struct ibmvnic_adapter *adapter,
+			     struct ibmvnic_tx_pool *tx_pool)
+{
+	int rc, i;
+
+	rc = reset_long_term_buff(adapter, &tx_pool->long_term_buff);
+	if (rc)
+		return rc;
+
+	memset(tx_pool->tx_buff, 0,
+	       tx_pool->num_buffers *
+	       sizeof(struct ibmvnic_tx_buff));
+
+	for (i = 0; i < tx_pool->num_buffers; i++)
+		tx_pool->free_map[i] = i;
+
+	tx_pool->consumer_index = 0;
+	tx_pool->producer_index = 0;
 
 	return 0;
 }
 
 static int reset_tx_pools(struct ibmvnic_adapter *adapter)
 {
-	struct ibmvnic_tx_pool *tx_pool;
 	int tx_scrqs;
-	int i, j, rc;
+	int i, rc;
 
 	tx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs);
 	for (i = 0; i < tx_scrqs; i++) {
-		netdev_dbg(adapter->netdev, "Re-setting tx_pool[%d]\n", i);
-
-		tx_pool = &adapter->tx_pool[i];
-
-		rc = reset_long_term_buff(adapter, &tx_pool->long_term_buff);
+		rc = reset_one_tx_pool(adapter, &adapter->tso_pool[i]);
 		if (rc)
 			return rc;
-
-		rc = reset_long_term_buff(adapter, &tx_pool->tso_ltb);
+		rc = reset_one_tx_pool(adapter, &adapter->tx_pool[i]);
 		if (rc)
 			return rc;
-
-		memset(tx_pool->tx_buff, 0,
-		       adapter->req_tx_entries_per_subcrq *
-		       sizeof(struct ibmvnic_tx_buff));
-
-		for (j = 0; j < adapter->req_tx_entries_per_subcrq; j++)
-			tx_pool->free_map[j] = j;
-
-		tx_pool->consumer_index = 0;
-		tx_pool->producer_index = 0;
-		tx_pool->tso_index = 0;
 	}
 
 	return 0;
@@ -605,35 +608,70 @@ static void release_vpd_data(struct ibmvnic_adapter *adapter)
 	adapter->vpd = NULL;
 }
 
+static void release_one_tx_pool(struct ibmvnic_adapter *adapter,
+				struct ibmvnic_tx_pool *tx_pool)
+{
+	kfree(tx_pool->tx_buff);
+	kfree(tx_pool->free_map);
+	free_long_term_buff(adapter, &tx_pool->long_term_buff);
+}
+
 static void release_tx_pools(struct ibmvnic_adapter *adapter)
 {
-	struct ibmvnic_tx_pool *tx_pool;
 	int i;
 
 	if (!adapter->tx_pool)
 		return;
 
 	for (i = 0; i < adapter->num_active_tx_pools; i++) {
-		netdev_dbg(adapter->netdev, "Releasing tx_pool[%d]\n", i);
-		tx_pool = &adapter->tx_pool[i];
-		kfree(tx_pool->tx_buff);
-		free_long_term_buff(adapter, &tx_pool->long_term_buff);
-		free_long_term_buff(adapter, &tx_pool->tso_ltb);
-		kfree(tx_pool->free_map);
+		release_one_tx_pool(adapter, &adapter->tx_pool[i]);
+		release_one_tx_pool(adapter, &adapter->tso_pool[i]);
 	}
 
 	kfree(adapter->tx_pool);
 	adapter->tx_pool = NULL;
+	kfree(adapter->tso_pool);
+	adapter->tso_pool = NULL;
 	adapter->num_active_tx_pools = 0;
 }
 
+static int init_one_tx_pool(struct net_device *netdev,
+			    struct ibmvnic_tx_pool *tx_pool,
+			    int num_entries, int buf_size)
+{
+	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+	int i;
+
+	tx_pool->tx_buff = kcalloc(num_entries,
+				   sizeof(struct ibmvnic_tx_buff),
+				   GFP_KERNEL);
+	if (!tx_pool->tx_buff)
+		return -1;
+
+	if (alloc_long_term_buff(adapter, &tx_pool->long_term_buff,
+				 num_entries * buf_size))
+		return -1;
+
+	tx_pool->free_map = kcalloc(num_entries, sizeof(int), GFP_KERNEL);
+	if (!tx_pool->free_map)
+		return -1;
+
+	for (i = 0; i < num_entries; i++)
+		tx_pool->free_map[i] = i;
+
+	tx_pool->consumer_index = 0;
+	tx_pool->producer_index = 0;
+	tx_pool->num_buffers = num_entries;
+	tx_pool->buf_size = buf_size;
+
+	return 0;
+}
+
 static int init_tx_pools(struct net_device *netdev)
 {
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
-	struct device *dev = &adapter->vdev->dev;
-	struct ibmvnic_tx_pool *tx_pool;
 	int tx_subcrqs;
-	int i, j;
+	int i, rc;
 
 	tx_subcrqs = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs);
 	adapter->tx_pool = kcalloc(tx_subcrqs,
@@ -641,57 +679,31 @@ static int init_tx_pools(struct net_device *netdev)
 	if (!adapter->tx_pool)
 		return -1;
 
-	adapter->num_active_tx_pools = 0;
-
-	for (i = 0; i < tx_subcrqs; i++) {
-		tx_pool = &adapter->tx_pool[i];
-
-		netdev_dbg(adapter->netdev,
-			   "Initializing tx_pool[%d], %lld buffs\n",
-			   i, adapter->req_tx_entries_per_subcrq);
-
-		tx_pool->tx_buff = kcalloc(adapter->req_tx_entries_per_subcrq,
-					   sizeof(struct ibmvnic_tx_buff),
-					   GFP_KERNEL);
-		if (!tx_pool->tx_buff) {
-			dev_err(dev, "tx pool buffer allocation failed\n");
-			release_tx_pools(adapter);
-			return -1;
-		}
+	adapter->tso_pool = kcalloc(tx_subcrqs,
+				    sizeof(struct ibmvnic_tx_pool), GFP_KERNEL);
+	if (!adapter->tso_pool)
+		return -1;
 
-		if (alloc_long_term_buff(adapter, &tx_pool->long_term_buff,
-					 adapter->req_tx_entries_per_subcrq *
-					 adapter->req_mtu)) {
-			release_tx_pools(adapter);
-			return -1;
-		}
+	adapter->num_active_tx_pools = tx_subcrqs;
 
-		/* alloc TSO ltb */
-		if (alloc_long_term_buff(adapter, &tx_pool->tso_ltb,
-					 IBMVNIC_TSO_BUFS *
-					 IBMVNIC_TSO_BUF_SZ)) {
+	for (i = 0; i < tx_subcrqs; i++) {
+		rc = init_one_tx_pool(netdev, &adapter->tx_pool[i],
+				      adapter->req_tx_entries_per_subcrq,
+				      adapter->req_mtu + VLAN_HLEN);
+		if (rc) {
 			release_tx_pools(adapter);
-			return -1;
+			return rc;
 		}
 
-		tx_pool->tso_index = 0;
-
-		tx_pool->free_map = kcalloc(adapter->req_tx_entries_per_subcrq,
-					    sizeof(int), GFP_KERNEL);
-		if (!tx_pool->free_map) {
+		init_one_tx_pool(netdev, &adapter->tso_pool[i],
+				 IBMVNIC_TSO_BUFS,
+				 IBMVNIC_TSO_BUF_SZ);
+		if (rc) {
 			release_tx_pools(adapter);
-			return -1;
+			return rc;
 		}
-
-		for (j = 0; j < adapter->req_tx_entries_per_subcrq; j++)
-			tx_pool->free_map[j] = j;
-
-		tx_pool->consumer_index = 0;
-		tx_pool->producer_index = 0;
 	}
 
-	adapter->num_active_tx_pools = tx_subcrqs;
-
 	return 0;
 }
 
@@ -740,6 +752,45 @@ static void ibmvnic_napi_disable(struct ibmvnic_adapter *adapter)
 	adapter->napi_enabled = false;
 }
 
+static int init_napi(struct ibmvnic_adapter *adapter)
+{
+	int i;
+
+	adapter->napi = kcalloc(adapter->req_rx_queues,
+				sizeof(struct napi_struct), GFP_KERNEL);
+	if (!adapter->napi)
+		return -ENOMEM;
+
+	for (i = 0; i < adapter->req_rx_queues; i++) {
+		netdev_dbg(adapter->netdev, "Adding napi[%d]\n", i);
+		netif_napi_add(adapter->netdev, &adapter->napi[i],
+			       ibmvnic_poll, NAPI_POLL_WEIGHT);
+	}
+
+	adapter->num_active_rx_napi = adapter->req_rx_queues;
+	return 0;
+}
+
+static void release_napi(struct ibmvnic_adapter *adapter)
+{
+	int i;
+
+	if (!adapter->napi)
+		return;
+
+	for (i = 0; i < adapter->num_active_rx_napi; i++) {
+		if (&adapter->napi[i]) {
+			netdev_dbg(adapter->netdev,
+				   "Releasing napi[%d]\n", i);
+			netif_napi_del(&adapter->napi[i]);
+		}
+	}
+
+	kfree(adapter->napi);
+	adapter->napi = NULL;
+	adapter->num_active_rx_napi = 0;
+}
+
 static int ibmvnic_login(struct net_device *netdev)
 {
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
@@ -750,7 +801,7 @@ static int ibmvnic_login(struct net_device *netdev)
 	do {
 		if (adapter->renegotiate) {
 			adapter->renegotiate = false;
-			release_sub_crqs(adapter);
+			release_sub_crqs(adapter, 1);
 
 			reinit_completion(&adapter->init_done);
 			send_cap_queries(adapter);
@@ -774,8 +825,11 @@ static int ibmvnic_login(struct net_device *netdev)
 		}
 
 		reinit_completion(&adapter->init_done);
-		send_login(adapter);
-		if (!wait_for_completion_timeout(&adapter->init_done,
+		rc = send_login(adapter);
+		if (rc) {
+			dev_err(dev, "Unable to attempt device login\n");
+			return rc;
+		} else if (!wait_for_completion_timeout(&adapter->init_done,
 						 timeout)) {
 			dev_err(dev, "Login timeout\n");
 			return -1;
@@ -805,29 +859,13 @@ static void release_login_rsp_buffer(struct ibmvnic_adapter *adapter)
 
 static void release_resources(struct ibmvnic_adapter *adapter)
 {
-	int i;
-
 	release_vpd_data(adapter);
 
 	release_tx_pools(adapter);
 	release_rx_pools(adapter);
 
-	release_stats_token(adapter);
-	release_stats_buffers(adapter);
 	release_error_buffers(adapter);
-
-	if (adapter->napi) {
-		for (i = 0; i < adapter->req_rx_queues; i++) {
-			if (&adapter->napi[i]) {
-				netdev_dbg(adapter->netdev,
-					   "Releasing napi[%d]\n", i);
-				netif_napi_del(&adapter->napi[i]);
-			}
-		}
-	}
-	kfree(adapter->napi);
-	adapter->napi = NULL;
-
+	release_napi(adapter);
 	release_login_rsp_buffer(adapter);
 }
 
@@ -947,20 +985,12 @@ static int ibmvnic_get_vpd(struct ibmvnic_adapter *adapter)
 static int init_resources(struct ibmvnic_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
-	int i, rc;
+	int rc;
 
 	rc = set_real_num_queues(netdev);
 	if (rc)
 		return rc;
 
-	rc = init_stats_buffers(adapter);
-	if (rc)
-		return rc;
-
-	rc = init_stats_token(adapter);
-	if (rc)
-		return rc;
-
 	adapter->vpd = kzalloc(sizeof(*adapter->vpd), GFP_KERNEL);
 	if (!adapter->vpd)
 		return -ENOMEM;
@@ -973,16 +1003,10 @@ static int init_resources(struct ibmvnic_adapter *adapter)
 	}
 
 	adapter->map_id = 1;
-	adapter->napi = kcalloc(adapter->req_rx_queues,
-				sizeof(struct napi_struct), GFP_KERNEL);
-	if (!adapter->napi)
-		return -ENOMEM;
 
-	for (i = 0; i < adapter->req_rx_queues; i++) {
-		netdev_dbg(netdev, "Adding napi[%d]\n", i);
-		netif_napi_add(netdev, &adapter->napi[i], ibmvnic_poll,
-			       NAPI_POLL_WEIGHT);
-	}
+	rc = init_napi(adapter);
+	if (rc)
+		return rc;
 
 	send_map_query(adapter);
 
@@ -1076,6 +1100,7 @@ static int ibmvnic_open(struct net_device *netdev)
 static void clean_rx_pools(struct ibmvnic_adapter *adapter)
 {
 	struct ibmvnic_rx_pool *rx_pool;
+	struct ibmvnic_rx_buff *rx_buff;
 	u64 rx_entries;
 	int rx_scrqs;
 	int i, j;
@@ -1089,56 +1114,64 @@ static void clean_rx_pools(struct ibmvnic_adapter *adapter)
 	/* Free any remaining skbs in the rx buffer pools */
 	for (i = 0; i < rx_scrqs; i++) {
 		rx_pool = &adapter->rx_pool[i];
-		if (!rx_pool)
+		if (!rx_pool || !rx_pool->rx_buff)
 			continue;
 
 		netdev_dbg(adapter->netdev, "Cleaning rx_pool[%d]\n", i);
 		for (j = 0; j < rx_entries; j++) {
-			if (rx_pool->rx_buff[j].skb) {
-				dev_kfree_skb_any(rx_pool->rx_buff[j].skb);
-				rx_pool->rx_buff[j].skb = NULL;
+			rx_buff = &rx_pool->rx_buff[j];
+			if (rx_buff && rx_buff->skb) {
+				dev_kfree_skb_any(rx_buff->skb);
+				rx_buff->skb = NULL;
 			}
 		}
 	}
 }
 
-static void clean_tx_pools(struct ibmvnic_adapter *adapter)
+static void clean_one_tx_pool(struct ibmvnic_adapter *adapter,
+			      struct ibmvnic_tx_pool *tx_pool)
 {
-	struct ibmvnic_tx_pool *tx_pool;
+	struct ibmvnic_tx_buff *tx_buff;
 	u64 tx_entries;
+	int i;
+
+	if (!tx_pool && !tx_pool->tx_buff)
+		return;
+
+	tx_entries = tx_pool->num_buffers;
+
+	for (i = 0; i < tx_entries; i++) {
+		tx_buff = &tx_pool->tx_buff[i];
+		if (tx_buff && tx_buff->skb) {
+			dev_kfree_skb_any(tx_buff->skb);
+			tx_buff->skb = NULL;
+		}
+	}
+}
+
+static void clean_tx_pools(struct ibmvnic_adapter *adapter)
+{
 	int tx_scrqs;
-	int i, j;
+	int i;
 
-	if (!adapter->tx_pool)
+	if (!adapter->tx_pool || !adapter->tso_pool)
 		return;
 
 	tx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs);
-	tx_entries = adapter->req_tx_entries_per_subcrq;
 
 	/* Free any remaining skbs in the tx buffer pools */
 	for (i = 0; i < tx_scrqs; i++) {
-		tx_pool = &adapter->tx_pool[i];
-		if (!tx_pool)
-			continue;
-
 		netdev_dbg(adapter->netdev, "Cleaning tx_pool[%d]\n", i);
-		for (j = 0; j < tx_entries; j++) {
-			if (tx_pool->tx_buff[j].skb) {
-				dev_kfree_skb_any(tx_pool->tx_buff[j].skb);
-				tx_pool->tx_buff[j].skb = NULL;
-			}
-		}
+		clean_one_tx_pool(adapter, &adapter->tx_pool[i]);
+		clean_one_tx_pool(adapter, &adapter->tso_pool[i]);
 	}
 }
 
-static int __ibmvnic_close(struct net_device *netdev)
+static void ibmvnic_cleanup(struct net_device *netdev)
 {
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
-	int rc = 0;
 	int i;
 
-	adapter->state = VNIC_CLOSING;
-
 	/* ensure that transmissions are stopped if called by do_reset */
 	if (adapter->resetting)
 		netif_tx_disable(netdev);
@@ -1150,30 +1183,16 @@ static int __ibmvnic_close(struct net_device *netdev)
 	if (adapter->tx_scrq) {
 		for (i = 0; i < adapter->req_tx_queues; i++)
 			if (adapter->tx_scrq[i]->irq) {
-				netdev_dbg(adapter->netdev,
+				netdev_dbg(netdev,
 					   "Disabling tx_scrq[%d] irq\n", i);
 				disable_irq(adapter->tx_scrq[i]->irq);
 			}
 	}
 
-	rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN);
-	if (rc)
-		return rc;
-
 	if (adapter->rx_scrq) {
 		for (i = 0; i < adapter->req_rx_queues; i++) {
-			int retries = 10;
-
-			while (pending_scrq(adapter, adapter->rx_scrq[i])) {
-				retries--;
-				mdelay(100);
-
-				if (retries == 0)
-					break;
-			}
-
 			if (adapter->rx_scrq[i]->irq) {
-				netdev_dbg(adapter->netdev,
+				netdev_dbg(netdev,
 					   "Disabling rx_scrq[%d] irq\n", i);
 				disable_irq(adapter->rx_scrq[i]->irq);
 			}
@@ -1181,8 +1200,20 @@ static int __ibmvnic_close(struct net_device *netdev)
 	}
 	clean_rx_pools(adapter);
 	clean_tx_pools(adapter);
+}
+
+static int __ibmvnic_close(struct net_device *netdev)
+{
+	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+	int rc = 0;
+
+	adapter->state = VNIC_CLOSING;
+	rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN);
+	if (rc)
+		return rc;
+	ibmvnic_cleanup(netdev);
 	adapter->state = VNIC_CLOSED;
-	return rc;
+	return 0;
 }
 
 static int ibmvnic_close(struct net_device *netdev)
@@ -1214,7 +1245,10 @@ static int build_hdr_data(u8 hdr_field, struct sk_buff *skb,
 	int len = 0;
 	u8 *hdr;
 
-	hdr_len[0] = sizeof(struct ethhdr);
+	if (skb_vlan_tagged(skb) && !skb_vlan_tag_present(skb))
+		hdr_len[0] = sizeof(struct vlan_ethhdr);
+	else
+		hdr_len[0] = sizeof(struct ethhdr);
 
 	if (skb->protocol == htons(ETH_P_IP)) {
 		hdr_len[1] = ip_hdr(skb)->ihl * 4;
@@ -1330,6 +1364,21 @@ static void build_hdr_descs_arr(struct ibmvnic_tx_buff *txbuff,
 			 txbuff->indir_arr + 1);
 }
 
+static int ibmvnic_xmit_workarounds(struct sk_buff *skb,
+				    struct net_device *netdev)
+{
+	/* For some backing devices, mishandling of small packets
+	 * can result in a loss of connection or TX stall. Device
+	 * architects recommend that no packet should be smaller
+	 * than the minimum MTU value provided to the driver, so
+	 * pad any packets to that length
+	 */
+	if (skb->len < netdev->min_mtu)
+		return skb_put_padto(skb, netdev->min_mtu);
+
+	return 0;
+}
+
 static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 {
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
@@ -1367,7 +1416,17 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 		goto out;
 	}
 
-	tx_pool = &adapter->tx_pool[queue_num];
+	if (ibmvnic_xmit_workarounds(skb, netdev)) {
+		tx_dropped++;
+		tx_send_failed++;
+		ret = NETDEV_TX_OK;
+		goto out;
+	}
+	if (skb_is_gso(skb))
+		tx_pool = &adapter->tso_pool[queue_num];
+	else
+		tx_pool = &adapter->tx_pool[queue_num];
+
 	tx_scrq = adapter->tx_scrq[queue_num];
 	txq = netdev_get_tx_queue(netdev, skb_get_queue_mapping(skb));
 	handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
@@ -1375,21 +1434,21 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 
 	index = tx_pool->free_map[tx_pool->consumer_index];
 
-	if (skb_is_gso(skb)) {
-		offset = tx_pool->tso_index * IBMVNIC_TSO_BUF_SZ;
-		dst = tx_pool->tso_ltb.buff + offset;
-		memset(dst, 0, IBMVNIC_TSO_BUF_SZ);
-		data_dma_addr = tx_pool->tso_ltb.addr + offset;
-		tx_pool->tso_index++;
-		if (tx_pool->tso_index == IBMVNIC_TSO_BUFS)
-			tx_pool->tso_index = 0;
-	} else {
-		offset = index * adapter->req_mtu;
-		dst = tx_pool->long_term_buff.buff + offset;
-		memset(dst, 0, adapter->req_mtu);
-		data_dma_addr = tx_pool->long_term_buff.addr + offset;
+	if (index == IBMVNIC_INVALID_MAP) {
+		dev_kfree_skb_any(skb);
+		tx_send_failed++;
+		tx_dropped++;
+		ret = NETDEV_TX_OK;
+		goto out;
 	}
 
+	tx_pool->free_map[tx_pool->consumer_index] = IBMVNIC_INVALID_MAP;
+
+	offset = index * tx_pool->buf_size;
+	dst = tx_pool->long_term_buff.buff + offset;
+	memset(dst, 0, tx_pool->buf_size);
+	data_dma_addr = tx_pool->long_term_buff.addr + offset;
+
 	if (skb_shinfo(skb)->nr_frags) {
 		int cur, i;
 
@@ -1411,8 +1470,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 	}
 
 	tx_pool->consumer_index =
-	    (tx_pool->consumer_index + 1) %
-		adapter->req_tx_entries_per_subcrq;
+	    (tx_pool->consumer_index + 1) % tx_pool->num_buffers;
 
 	tx_buff = &tx_pool->tx_buff[index];
 	tx_buff->skb = skb;
@@ -1428,11 +1486,13 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 	tx_crq.v1.n_crq_elem = 1;
 	tx_crq.v1.n_sge = 1;
 	tx_crq.v1.flags1 = IBMVNIC_TX_COMP_NEEDED;
-	tx_crq.v1.correlator = cpu_to_be32(index);
+
 	if (skb_is_gso(skb))
-		tx_crq.v1.dma_reg = cpu_to_be16(tx_pool->tso_ltb.map_id);
+		tx_crq.v1.correlator =
+			cpu_to_be32(index | IBMVNIC_TSO_POOL_MASK);
 	else
-		tx_crq.v1.dma_reg = cpu_to_be16(tx_pool->long_term_buff.map_id);
+		tx_crq.v1.correlator = cpu_to_be32(index);
+	tx_crq.v1.dma_reg = cpu_to_be16(tx_pool->long_term_buff.map_id);
 	tx_crq.v1.sge_len = cpu_to_be32(skb->len);
 	tx_crq.v1.ioba = cpu_to_be64(data_dma_addr);
 
@@ -1467,6 +1527,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 	if ((*hdrs >> 7) & 1) {
 		build_hdr_descs_arr(tx_buff, &num_entries, *hdrs);
 		tx_crq.v1.n_crq_elem = num_entries;
+		tx_buff->num_entries = num_entries;
 		tx_buff->indir_arr[0] = tx_crq;
 		tx_buff->indir_dma = dma_map_single(dev, tx_buff->indir_arr,
 						    sizeof(tx_buff->indir_arr),
@@ -1479,24 +1540,18 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 			tx_map_failed++;
 			tx_dropped++;
 			ret = NETDEV_TX_OK;
-			goto out;
+			goto tx_err_out;
 		}
 		lpar_rc = send_subcrq_indirect(adapter, handle_array[queue_num],
 					       (u64)tx_buff->indir_dma,
 					       (u64)num_entries);
 	} else {
+		tx_buff->num_entries = num_entries;
 		lpar_rc = send_subcrq(adapter, handle_array[queue_num],
 				      &tx_crq);
 	}
 	if (lpar_rc != H_SUCCESS) {
 		dev_err(dev, "tx failed with code %ld\n", lpar_rc);
-
-		if (tx_pool->consumer_index == 0)
-			tx_pool->consumer_index =
-				adapter->req_tx_entries_per_subcrq - 1;
-		else
-			tx_pool->consumer_index--;
-
 		dev_kfree_skb_any(skb);
 		tx_buff->skb = NULL;
 
@@ -1512,12 +1567,12 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 		tx_send_failed++;
 		tx_dropped++;
 		ret = NETDEV_TX_OK;
-		goto out;
+		goto tx_err_out;
 	}
 
-	if (atomic_inc_return(&tx_scrq->used)
+	if (atomic_add_return(num_entries, &tx_scrq->used)
 					>= adapter->req_tx_entries_per_subcrq) {
-		netdev_info(netdev, "Stopping queue %d\n", queue_num);
+		netdev_dbg(netdev, "Stopping queue %d\n", queue_num);
 		netif_stop_subqueue(netdev, queue_num);
 	}
 
@@ -1525,7 +1580,16 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 	tx_bytes += skb->len;
 	txq->trans_start = jiffies;
 	ret = NETDEV_TX_OK;
+	goto out;
 
+tx_err_out:
+	/* roll back consumer index and map array*/
+	if (tx_pool->consumer_index == 0)
+		tx_pool->consumer_index =
+			tx_pool->num_buffers - 1;
+	else
+		tx_pool->consumer_index--;
+	tx_pool->free_map[tx_pool->consumer_index] = index;
 out:
 	netdev->stats.tx_dropped += tx_dropped;
 	netdev->stats.tx_bytes += tx_bytes;
@@ -1644,16 +1708,19 @@ static int do_reset(struct ibmvnic_adapter *adapter,
 		rc = ibmvnic_reenable_crq_queue(adapter);
 		if (rc)
 			return 0;
+		ibmvnic_cleanup(netdev);
+	} else if (rwi->reset_reason == VNIC_RESET_FAILOVER) {
+		ibmvnic_cleanup(netdev);
+	} else {
+		rc = __ibmvnic_close(netdev);
+		if (rc)
+			return rc;
 	}
 
-	rc = __ibmvnic_close(netdev);
-	if (rc)
-		return rc;
-
 	if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM ||
 	    adapter->wait_for_reset) {
 		release_resources(adapter);
-		release_sub_crqs(adapter);
+		release_sub_crqs(adapter, 1);
 		release_crq_queue(adapter);
 	}
 
@@ -1691,6 +1758,9 @@ static int do_reset(struct ibmvnic_adapter *adapter,
 			release_tx_pools(adapter);
 			init_rx_pools(netdev);
 			init_tx_pools(netdev);
+
+			release_napi(adapter);
+			init_napi(adapter);
 		} else {
 			rc = reset_tx_pools(adapter);
 			if (rc)
@@ -1699,12 +1769,14 @@ static int do_reset(struct ibmvnic_adapter *adapter,
 			rc = reset_rx_pools(adapter);
 			if (rc)
 				return rc;
-
-			if (reset_state == VNIC_CLOSED)
-				return 0;
 		}
 	}
 
+	adapter->state = VNIC_CLOSED;
+
+	if (reset_state == VNIC_CLOSED)
+		return 0;
+
 	rc = __ibmvnic_open(netdev);
 	if (rc) {
 		if (list_empty(&adapter->rwi_list))
@@ -2011,6 +2083,23 @@ static int ibmvnic_change_mtu(struct net_device *netdev, int new_mtu)
 	return wait_for_reset(adapter);
 }
 
+static netdev_features_t ibmvnic_features_check(struct sk_buff *skb,
+						struct net_device *dev,
+						netdev_features_t features)
+{
+	/* Some backing hardware adapters can not
+	 * handle packets with a MSS less than 224
+	 * or with only one segment.
+	 */
+	if (skb_is_gso(skb)) {
+		if (skb_shinfo(skb)->gso_size < 224 ||
+		    skb_shinfo(skb)->gso_segs == 1)
+			features &= ~NETIF_F_GSO_MASK;
+	}
+
+	return features;
+}
+
 static const struct net_device_ops ibmvnic_netdev_ops = {
 	.ndo_open		= ibmvnic_open,
 	.ndo_stop		= ibmvnic_close,
@@ -2023,6 +2112,7 @@ static const struct net_device_ops ibmvnic_netdev_ops = {
 	.ndo_poll_controller	= ibmvnic_netpoll_controller,
 #endif
 	.ndo_change_mtu		= ibmvnic_change_mtu,
+	.ndo_features_check     = ibmvnic_features_check,
 };
 
 /* ethtool functions */
@@ -2295,24 +2385,27 @@ static int reset_sub_crq_queues(struct ibmvnic_adapter *adapter)
 }
 
 static void release_sub_crq_queue(struct ibmvnic_adapter *adapter,
-				  struct ibmvnic_sub_crq_queue *scrq)
+				  struct ibmvnic_sub_crq_queue *scrq,
+				  bool do_h_free)
 {
 	struct device *dev = &adapter->vdev->dev;
 	long rc;
 
 	netdev_dbg(adapter->netdev, "Releasing sub-CRQ\n");
 
-	/* Close the sub-crqs */
-	do {
-		rc = plpar_hcall_norets(H_FREE_SUB_CRQ,
-					adapter->vdev->unit_address,
-					scrq->crq_num);
-	} while (rc == H_BUSY || H_IS_LONG_BUSY(rc));
+	if (do_h_free) {
+		/* Close the sub-crqs */
+		do {
+			rc = plpar_hcall_norets(H_FREE_SUB_CRQ,
+						adapter->vdev->unit_address,
+						scrq->crq_num);
+		} while (rc == H_BUSY || H_IS_LONG_BUSY(rc));
 
-	if (rc) {
-		netdev_err(adapter->netdev,
-			   "Failed to release sub-CRQ %16lx, rc = %ld\n",
-			   scrq->crq_num, rc);
+		if (rc) {
+			netdev_err(adapter->netdev,
+				   "Failed to release sub-CRQ %16lx, rc = %ld\n",
+				   scrq->crq_num, rc);
+		}
 	}
 
 	dma_unmap_single(dev, scrq->msg_token, 4 * PAGE_SIZE,
@@ -2380,12 +2473,12 @@ zero_page_failed:
 	return NULL;
 }
 
-static void release_sub_crqs(struct ibmvnic_adapter *adapter)
+static void release_sub_crqs(struct ibmvnic_adapter *adapter, bool do_h_free)
 {
 	int i;
 
 	if (adapter->tx_scrq) {
-		for (i = 0; i < adapter->req_tx_queues; i++) {
+		for (i = 0; i < adapter->num_active_tx_scrqs; i++) {
 			if (!adapter->tx_scrq[i])
 				continue;
 
@@ -2398,15 +2491,17 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter)
 				adapter->tx_scrq[i]->irq = 0;
 			}
 
-			release_sub_crq_queue(adapter, adapter->tx_scrq[i]);
+			release_sub_crq_queue(adapter, adapter->tx_scrq[i],
+					      do_h_free);
 		}
 
 		kfree(adapter->tx_scrq);
 		adapter->tx_scrq = NULL;
+		adapter->num_active_tx_scrqs = 0;
 	}
 
 	if (adapter->rx_scrq) {
-		for (i = 0; i < adapter->req_rx_queues; i++) {
+		for (i = 0; i < adapter->num_active_rx_scrqs; i++) {
 			if (!adapter->rx_scrq[i])
 				continue;
 
@@ -2419,11 +2514,13 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter)
 				adapter->rx_scrq[i]->irq = 0;
 			}
 
-			release_sub_crq_queue(adapter, adapter->rx_scrq[i]);
+			release_sub_crq_queue(adapter, adapter->rx_scrq[i],
+					      do_h_free);
 		}
 
 		kfree(adapter->rx_scrq);
 		adapter->rx_scrq = NULL;
+		adapter->num_active_rx_scrqs = 0;
 	}
 }
 
@@ -2464,6 +2561,7 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter,
 			       struct ibmvnic_sub_crq_queue *scrq)
 {
 	struct device *dev = &adapter->vdev->dev;
+	struct ibmvnic_tx_pool *tx_pool;
 	struct ibmvnic_tx_buff *txbuff;
 	union sub_crq *next;
 	int index;
@@ -2473,6 +2571,7 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter,
 restart_loop:
 	while (pending_scrq(adapter, scrq)) {
 		unsigned int pool = scrq->pool_index;
+		int num_entries = 0;
 
 		next = ibmvnic_next_scrq(adapter, scrq);
 		for (i = 0; i < next->tx_comp.num_comps; i++) {
@@ -2482,7 +2581,14 @@ restart_loop:
 				continue;
 			}
 			index = be32_to_cpu(next->tx_comp.correlators[i]);
-			txbuff = &adapter->tx_pool[pool].tx_buff[index];
+			if (index & IBMVNIC_TSO_POOL_MASK) {
+				tx_pool = &adapter->tso_pool[pool];
+				index &= ~IBMVNIC_TSO_POOL_MASK;
+			} else {
+				tx_pool = &adapter->tx_pool[pool];
+			}
+
+			txbuff = &tx_pool->tx_buff[index];
 
 			for (j = 0; j < IBMVNIC_MAX_FRAGS_PER_CRQ; j++) {
 				if (!txbuff->data_dma[j])
@@ -2503,22 +2609,23 @@ restart_loop:
 				txbuff->skb = NULL;
 			}
 
-			adapter->tx_pool[pool].free_map[adapter->tx_pool[pool].
-						     producer_index] = index;
-			adapter->tx_pool[pool].producer_index =
-			    (adapter->tx_pool[pool].producer_index + 1) %
-			    adapter->req_tx_entries_per_subcrq;
+			num_entries += txbuff->num_entries;
+
+			tx_pool->free_map[tx_pool->producer_index] = index;
+			tx_pool->producer_index =
+				(tx_pool->producer_index + 1) %
+					tx_pool->num_buffers;
 		}
 		/* remove tx_comp scrq*/
 		next->tx_comp.first = 0;
 
-		if (atomic_sub_return(next->tx_comp.num_comps, &scrq->used) <=
+		if (atomic_sub_return(num_entries, &scrq->used) <=
 		    (adapter->req_tx_entries_per_subcrq / 2) &&
 		    __netif_subqueue_stopped(adapter->netdev,
 					     scrq->pool_index)) {
 			netif_wake_subqueue(adapter->netdev, scrq->pool_index);
-			netdev_info(adapter->netdev, "Started queue %d\n",
-				    scrq->pool_index);
+			netdev_dbg(adapter->netdev, "Started queue %d\n",
+				   scrq->pool_index);
 		}
 	}
 
@@ -2590,7 +2697,7 @@ static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter)
 			dev_err(dev, "Couldn't register tx irq 0x%x. rc=%d\n",
 				scrq->irq, rc);
 			irq_dispose_mapping(scrq->irq);
-			goto req_rx_irq_failed;
+			goto req_tx_irq_failed;
 		}
 	}
 
@@ -2626,7 +2733,7 @@ req_tx_irq_failed:
 		free_irq(adapter->tx_scrq[j]->irq, adapter->tx_scrq[j]);
 		irq_dispose_mapping(adapter->rx_scrq[j]->irq);
 	}
-	release_sub_crqs(adapter);
+	release_sub_crqs(adapter, 1);
 	return rc;
 }
 
@@ -2688,6 +2795,7 @@ static int init_sub_crqs(struct ibmvnic_adapter *adapter)
 	for (i = 0; i < adapter->req_tx_queues; i++) {
 		adapter->tx_scrq[i] = allqueues[i];
 		adapter->tx_scrq[i]->pool_index = i;
+		adapter->num_active_tx_scrqs++;
 	}
 
 	adapter->rx_scrq = kcalloc(adapter->req_rx_queues,
@@ -2698,6 +2806,7 @@ static int init_sub_crqs(struct ibmvnic_adapter *adapter)
 	for (i = 0; i < adapter->req_rx_queues; i++) {
 		adapter->rx_scrq[i] = allqueues[i + adapter->req_tx_queues];
 		adapter->rx_scrq[i]->scrq_num = i;
+		adapter->num_active_rx_scrqs++;
 	}
 
 	kfree(allqueues);
@@ -2708,7 +2817,7 @@ rx_failed:
 	adapter->tx_scrq = NULL;
 tx_failed:
 	for (i = 0; i < registered_queues; i++)
-		release_sub_crq_queue(adapter, allqueues[i]);
+		release_sub_crq_queue(adapter, allqueues[i], 1);
 	kfree(allqueues);
 	return -1;
 }
@@ -3048,7 +3157,7 @@ static void vnic_add_client_data(struct ibmvnic_adapter *adapter,
 	strncpy(&vlcd->name, adapter->netdev->name, len);
 }
 
-static void send_login(struct ibmvnic_adapter *adapter)
+static int send_login(struct ibmvnic_adapter *adapter)
 {
 	struct ibmvnic_login_rsp_buffer *login_rsp_buffer;
 	struct ibmvnic_login_buffer *login_buffer;
@@ -3064,6 +3173,12 @@ static void send_login(struct ibmvnic_adapter *adapter)
 	struct vnic_login_client_data *vlcd;
 	int i;
 
+	if (!adapter->tx_scrq || !adapter->rx_scrq) {
+		netdev_err(adapter->netdev,
+			   "RX or TX queues are not allocated, device login failed\n");
+		return -1;
+	}
+
 	release_login_rsp_buffer(adapter);
 	client_data_len = vnic_client_data_len(adapter);
 
@@ -3161,7 +3276,7 @@ static void send_login(struct ibmvnic_adapter *adapter)
 	crq.login.len = cpu_to_be32(buffer_size);
 	ibmvnic_send_crq(adapter, &crq);
 
-	return;
+	return 0;
 
 buf_rsp_map_failed:
 	kfree(login_rsp_buffer);
@@ -3170,7 +3285,7 @@ buf_rsp_alloc_failed:
 buf_map_failed:
 	kfree(login_buffer);
 buf_alloc_failed:
-	return;
+	return -1;
 }
 
 static void send_request_map(struct ibmvnic_adapter *adapter, dma_addr_t addr,
@@ -4335,6 +4450,7 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter)
 {
 	struct device *dev = &adapter->vdev->dev;
 	unsigned long timeout = msecs_to_jiffies(30000);
+	u64 old_num_rx_queues, old_num_tx_queues;
 	int rc;
 
 	if (adapter->resetting && !adapter->wait_for_reset) {
@@ -4352,6 +4468,9 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter)
 
 	adapter->from_passive_init = false;
 
+	old_num_rx_queues = adapter->req_rx_queues;
+	old_num_tx_queues = adapter->req_tx_queues;
+
 	init_completion(&adapter->init_done);
 	adapter->init_done_rc = 0;
 	ibmvnic_send_crq_init(adapter);
@@ -4371,10 +4490,18 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter)
 		return -1;
 	}
 
-	if (adapter->resetting && !adapter->wait_for_reset)
-		rc = reset_sub_crq_queues(adapter);
-	else
+	if (adapter->resetting && !adapter->wait_for_reset) {
+		if (adapter->req_rx_queues != old_num_rx_queues ||
+		    adapter->req_tx_queues != old_num_tx_queues) {
+			release_sub_crqs(adapter, 0);
+			rc = init_sub_crqs(adapter);
+		} else {
+			rc = reset_sub_crq_queues(adapter);
+		}
+	} else {
 		rc = init_sub_crqs(adapter);
+	}
+
 	if (rc) {
 		dev_err(dev, "Initialization of sub crqs failed\n");
 		release_crq_queue(adapter);
@@ -4387,6 +4514,14 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter)
 		release_crq_queue(adapter);
 	}
 
+	rc = init_stats_buffers(adapter);
+	if (rc)
+		return rc;
+
+	rc = init_stats_token(adapter);
+	if (rc)
+		return rc;
+
 	return rc;
 }
 
@@ -4474,7 +4609,7 @@ ibmvnic_register_fail:
 	device_remove_file(&dev->dev, &dev_attr_failover);
 
 ibmvnic_init_fail:
-	release_sub_crqs(adapter);
+	release_sub_crqs(adapter, 1);
 	release_crq_queue(adapter);
 	free_netdev(netdev);
 
@@ -4491,9 +4626,12 @@ static int ibmvnic_remove(struct vio_dev *dev)
 	mutex_lock(&adapter->reset_lock);
 
 	release_resources(adapter);
-	release_sub_crqs(adapter);
+	release_sub_crqs(adapter, 1);
 	release_crq_queue(adapter);
 
+	release_stats_token(adapter);
+	release_stats_buffers(adapter);
+
 	adapter->state = VNIC_REMOVED;
 
 	mutex_unlock(&adapter->reset_lock);
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index fe21a6e2ddae..89efe700eafe 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -43,6 +43,7 @@
 
 #define IBMVNIC_TSO_BUF_SZ	65536
 #define IBMVNIC_TSO_BUFS	64
+#define IBMVNIC_TSO_POOL_MASK	0x80000000
 
 #define IBMVNIC_MAX_LTB_SIZE ((1 << (MAX_ORDER - 1)) * PAGE_SIZE)
 #define IBMVNIC_BUFFER_HLEN 500
@@ -909,6 +910,7 @@ struct ibmvnic_tx_buff {
 	union sub_crq indir_arr[6];
 	u8 hdr_data[140];
 	dma_addr_t indir_dma;
+	int num_entries;
 };
 
 struct ibmvnic_tx_pool {
@@ -916,11 +918,9 @@ struct ibmvnic_tx_pool {
 	int *free_map;
 	int consumer_index;
 	int producer_index;
-	wait_queue_head_t ibmvnic_tx_comp_q;
-	struct task_struct *work_thread;
 	struct ibmvnic_long_term_buff long_term_buff;
-	struct ibmvnic_long_term_buff tso_ltb;
-	int tso_index;
+	int num_buffers;
+	int buf_size;
 };
 
 struct ibmvnic_rx_buff {
@@ -1043,6 +1043,7 @@ struct ibmvnic_adapter {
 	u64 promisc;
 
 	struct ibmvnic_tx_pool *tx_pool;
+	struct ibmvnic_tx_pool *tso_pool;
 	struct completion init_done;
 	int init_done_rc;
 
@@ -1091,8 +1092,11 @@ struct ibmvnic_adapter {
 	u64 opt_rxba_entries_per_subcrq;
 	__be64 tx_rx_desc_req;
 	u8 map_id;
-	u64 num_active_rx_pools;
-	u64 num_active_tx_pools;
+	u32 num_active_rx_scrqs;
+	u32 num_active_rx_pools;
+	u32 num_active_rx_napi;
+	u32 num_active_tx_scrqs;
+	u32 num_active_tx_pools;
 
 	struct tasklet_struct tasklet;
 	enum vnic_state state;
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_common.c b/drivers/net/ethernet/intel/fm10k/fm10k_common.c
index 736a9f087bc9..c58a5377a287 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_common.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_common.c
@@ -1,5 +1,5 @@
 /* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -262,6 +262,7 @@ s32 fm10k_stop_hw_generic(struct fm10k_hw *hw)
  *  fm10k_read_hw_stats_32b - Reads value of 32-bit registers
  *  @hw: pointer to the hardware structure
  *  @addr: address of register containing a 32-bit value
+ *  @stat: pointer to structure holding hw stat information
  *
  *  Function reads the content of the register and returns the delta
  *  between the base and the current value.
@@ -281,6 +282,7 @@ u32 fm10k_read_hw_stats_32b(struct fm10k_hw *hw, u32 addr,
  *  fm10k_read_hw_stats_48b - Reads value of 48-bit registers
  *  @hw: pointer to the hardware structure
  *  @addr: address of register containing the lower 32-bit value
+ *  @stat: pointer to structure holding hw stat information
  *
  *  Function reads the content of 2 registers, combined to represent a 48-bit
  *  statistical value. Extra processing is required to handle overflowing.
@@ -461,7 +463,6 @@ void fm10k_update_hw_stats_q(struct fm10k_hw *hw, struct fm10k_hw_stats_q *q,
 
 /**
  *  fm10k_unbind_hw_stats_q - Unbind the queue counters from their queues
- *  @hw: pointer to the hardware structure
  *  @q: pointer to the ring of hardware statistics queue
  *  @idx: index pointing to the start of the ring iteration
  *  @count: number of queues to iterate over
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
index 8e12aae065d8..2c93d719438f 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
@@ -28,13 +28,13 @@
 
 #include "fm10k.h"
 
-#define DRV_VERSION	"0.22.1-k"
+#define DRV_VERSION	"0.23.4-k"
 #define DRV_SUMMARY	"Intel(R) Ethernet Switch Host Interface Driver"
 const char fm10k_driver_version[] = DRV_VERSION;
 char fm10k_driver_name[] = "fm10k";
 static const char fm10k_driver_string[] = DRV_SUMMARY;
 static const char fm10k_copyright[] =
-	"Copyright(c) 2013 - 2017 Intel Corporation.";
+	"Copyright(c) 2013 - 2018 Intel Corporation.";
 
 MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
 MODULE_DESCRIPTION(DRV_SUMMARY);
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
index a38ae5c54da3..75c99aed3c41 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
@@ -1,5 +1,5 @@
 /* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -486,7 +486,7 @@ static void fm10k_insert_tunnel_port(struct list_head *ports,
 
 /**
  * fm10k_udp_tunnel_add
- * @netdev: network interface device structure
+ * @dev: network interface device structure
  * @ti: Tunnel endpoint information
  *
  * This function is called when a new UDP tunnel port has been added.
@@ -518,8 +518,8 @@ static void fm10k_udp_tunnel_add(struct net_device *dev,
 
 /**
  * fm10k_udp_tunnel_del
- * @netdev: network interface device structure
- * @ti: Tunnel endpoint information
+ * @dev: network interface device structure
+ * @ti: Tunnel end point information
  *
  * This function is called when a new UDP tunnel port is deleted. The freed
  * port will be removed from the list, then we reprogram the offloaded port
@@ -803,7 +803,7 @@ int fm10k_queue_vlan_request(struct fm10k_intfc *interface,
  * @glort: the target glort for this update
  * @addr: the address to update
  * @vid: the vid to update
- * @sync: whether to add or remove
+ * @set: whether to add or remove
  *
  * This function queues up a MAC request for sending to the switch manager.
  * A separate thread monitors the queue and sends updates to the switch
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
index a434fecfdfeb..50f53e403ef5 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
@@ -1,5 +1,5 @@
 /* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -29,7 +29,7 @@ static const struct fm10k_info *fm10k_info_tbl[] = {
 	[fm10k_device_vf] = &fm10k_vf_info,
 };
 
-/**
+/*
  * fm10k_pci_tbl - PCI Device ID Table
  *
  * Wildcard entries (PCI_ANY_ID) should come last
@@ -211,7 +211,7 @@ static void fm10k_start_service_event(struct fm10k_intfc *interface)
 
 /**
  * fm10k_service_timer - Timer Call-back
- * @data: pointer to interface cast into an unsigned long
+ * @t: pointer to timer data
  **/
 static void fm10k_service_timer(struct timer_list *t)
 {
@@ -649,7 +649,7 @@ void fm10k_update_stats(struct fm10k_intfc *interface)
 
 /**
  * fm10k_watchdog_flush_tx - flush queues on host not ready
- * @interface - pointer to the device interface structure
+ * @interface: pointer to the device interface structure
  **/
 static void fm10k_watchdog_flush_tx(struct fm10k_intfc *interface)
 {
@@ -679,7 +679,7 @@ static void fm10k_watchdog_flush_tx(struct fm10k_intfc *interface)
 
 /**
  * fm10k_watchdog_subtask - check and bring link up
- * @interface - pointer to the device interface structure
+ * @interface: pointer to the device interface structure
  **/
 static void fm10k_watchdog_subtask(struct fm10k_intfc *interface)
 {
@@ -703,7 +703,7 @@ static void fm10k_watchdog_subtask(struct fm10k_intfc *interface)
 
 /**
  * fm10k_check_hang_subtask - check for hung queues and dropped interrupts
- * @interface - pointer to the device interface structure
+ * @interface: pointer to the device interface structure
  *
  * This function serves two purposes.  First it strobes the interrupt lines
  * in order to make certain interrupts are occurring.  Secondly it sets the
@@ -1995,6 +1995,7 @@ skip_tx_dma_drain:
 /**
  * fm10k_sw_init - Initialize general software structures
  * @interface: host interface private structure to initialize
+ * @ent: PCI device ID entry
  *
  * fm10k_sw_init initializes the interface private data structure.
  * Fields are initialized based on PCI device information and
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
index d6406fc31ffb..bee192fe2ffb 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
@@ -1,5 +1,5 @@
 /* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -1180,7 +1180,7 @@ s32 fm10k_iov_msg_msix_pf(struct fm10k_hw *hw, u32 **results,
 
 /**
  * fm10k_iov_select_vid - Select correct default VLAN ID
- * @hw: Pointer to hardware structure
+ * @vf_info: pointer to VF information structure
  * @vid: VLAN ID to correct
  *
  * Will report an error if the VLAN ID is out of range. For VID = 0, it will
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c
index f8e87bf086b9..9d0d31da426b 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c
@@ -1,5 +1,5 @@
 /* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -120,6 +120,7 @@ static s32 fm10k_tlv_attr_get_null_string(u32 *attr, unsigned char *string)
  *  @msg: Pointer to message block
  *  @attr_id: Attribute ID
  *  @mac_addr: MAC address to be stored
+ *  @vlan: VLAN to be stored
  *
  *  This function will reorder a MAC address to be CPU endian and store it
  *  in the attribute buffer.  It will return success if provided with a
@@ -155,8 +156,8 @@ s32 fm10k_tlv_attr_put_mac_vlan(u32 *msg, u16 attr_id,
 /**
  *  fm10k_tlv_attr_get_mac_vlan - Get MAC/VLAN stored in attribute
  *  @attr: Pointer to attribute
- *  @attr_id: Attribute ID
  *  @mac_addr: location of buffer to store MAC address
+ *  @vlan: location of buffer to store VLAN
  *
  *  This function pulls the MAC address back out of the attribute and will
  *  place it in the array pointed by by mac_addr.  It will return success
@@ -549,7 +550,7 @@ static s32 fm10k_tlv_attr_parse(u32 *attr, u32 **results,
  *  @hw: Pointer to hardware structure
  *  @msg: Pointer to message
  *  @mbx: Pointer to mailbox information structure
- *  @func: Function array containing list of message handling functions
+ *  @data: Pointer to message handler data structure
  *
  *  This function should be the first function called upon receiving a
  *  message.  The handler will identify the message type and call the correct
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 46e9f4e0a02c..271ab1a861b7 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -507,6 +507,7 @@ struct i40e_pf {
 #define I40E_HW_STOP_FW_LLDP			BIT(16)
 #define I40E_HW_PORT_ID_VALID			BIT(17)
 #define I40E_HW_RESTART_AUTONEG			BIT(18)
+#define I40E_HW_STOPPABLE_FW_LLDP		BIT(19)
 
 	u64 flags;
 #define I40E_FLAG_RX_CSUM_ENABLED		BIT_ULL(0)
@@ -824,6 +825,7 @@ struct i40e_q_vector {
 	struct i40e_ring_container rx;
 	struct i40e_ring_container tx;
 
+	u8 itr_countdown;	/* when 0 should adjust adaptive ITR */
 	u8 num_ringpairs;	/* total number of ring pairs in vector */
 
 	cpumask_t affinity_mask;
@@ -832,8 +834,6 @@ struct i40e_q_vector {
 	struct rcu_head rcu;	/* to avoid race with update stats on free */
 	char name[I40E_INT_NAME_STR_LEN];
 	bool arm_wb_state;
-#define ITR_COUNTDOWN_START 100
-	u8 itr_countdown;	/* when 0 should adjust ITR */
 } ____cacheline_internodealigned_in_smp;
 
 /* lan device */
@@ -1041,6 +1041,7 @@ void i40e_notify_client_of_l2_param_changes(struct i40e_vsi *vsi);
 void i40e_notify_client_of_netdev_close(struct i40e_vsi *vsi, bool reset);
 void i40e_notify_client_of_vf_enable(struct i40e_pf *pf, u32 num_vfs);
 void i40e_notify_client_of_vf_reset(struct i40e_pf *pf, u32 vf_id);
+void i40e_client_update_msix_info(struct i40e_pf *pf);
 int i40e_vf_client_capable(struct i40e_pf *pf, u32 vf_id);
 /**
  * i40e_irq_dynamic_enable - Enable default interrupt generation settings
@@ -1109,4 +1110,10 @@ static inline bool i40e_enabled_xdp_vsi(struct i40e_vsi *vsi)
 
 int i40e_create_queue_channel(struct i40e_vsi *vsi, struct i40e_channel *ch);
 int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate);
+int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
+			      struct i40e_cloud_filter *filter,
+			      bool add);
+int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
+				      struct i40e_cloud_filter *filter,
+				      bool add);
 #endif /* _I40E_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
index a852775d3059..0dfc52772c45 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
@@ -1914,6 +1914,43 @@ enum i40e_aq_phy_type {
 	I40E_PHY_TYPE_DEFAULT			= 0xFF,
 };
 
+#define I40E_PHY_TYPES_BITMASK (BIT_ULL(I40E_PHY_TYPE_SGMII) | \
+				BIT_ULL(I40E_PHY_TYPE_1000BASE_KX) | \
+				BIT_ULL(I40E_PHY_TYPE_10GBASE_KX4) | \
+				BIT_ULL(I40E_PHY_TYPE_10GBASE_KR) | \
+				BIT_ULL(I40E_PHY_TYPE_40GBASE_KR4) | \
+				BIT_ULL(I40E_PHY_TYPE_XAUI) | \
+				BIT_ULL(I40E_PHY_TYPE_XFI) | \
+				BIT_ULL(I40E_PHY_TYPE_SFI) | \
+				BIT_ULL(I40E_PHY_TYPE_XLAUI) | \
+				BIT_ULL(I40E_PHY_TYPE_XLPPI) | \
+				BIT_ULL(I40E_PHY_TYPE_40GBASE_CR4_CU) | \
+				BIT_ULL(I40E_PHY_TYPE_10GBASE_CR1_CU) | \
+				BIT_ULL(I40E_PHY_TYPE_10GBASE_AOC) | \
+				BIT_ULL(I40E_PHY_TYPE_40GBASE_AOC) | \
+				BIT_ULL(I40E_PHY_TYPE_UNRECOGNIZED) | \
+				BIT_ULL(I40E_PHY_TYPE_UNSUPPORTED) | \
+				BIT_ULL(I40E_PHY_TYPE_100BASE_TX) | \
+				BIT_ULL(I40E_PHY_TYPE_1000BASE_T) | \
+				BIT_ULL(I40E_PHY_TYPE_10GBASE_T) | \
+				BIT_ULL(I40E_PHY_TYPE_10GBASE_SR) | \
+				BIT_ULL(I40E_PHY_TYPE_10GBASE_LR) | \
+				BIT_ULL(I40E_PHY_TYPE_10GBASE_SFPP_CU) | \
+				BIT_ULL(I40E_PHY_TYPE_10GBASE_CR1) | \
+				BIT_ULL(I40E_PHY_TYPE_40GBASE_CR4) | \
+				BIT_ULL(I40E_PHY_TYPE_40GBASE_SR4) | \
+				BIT_ULL(I40E_PHY_TYPE_40GBASE_LR4) | \
+				BIT_ULL(I40E_PHY_TYPE_1000BASE_SX) | \
+				BIT_ULL(I40E_PHY_TYPE_1000BASE_LX) | \
+				BIT_ULL(I40E_PHY_TYPE_1000BASE_T_OPTICAL) | \
+				BIT_ULL(I40E_PHY_TYPE_20GBASE_KR2) | \
+				BIT_ULL(I40E_PHY_TYPE_25GBASE_KR) | \
+				BIT_ULL(I40E_PHY_TYPE_25GBASE_CR) | \
+				BIT_ULL(I40E_PHY_TYPE_25GBASE_SR) | \
+				BIT_ULL(I40E_PHY_TYPE_25GBASE_LR) | \
+				BIT_ULL(I40E_PHY_TYPE_25GBASE_AOC) | \
+				BIT_ULL(I40E_PHY_TYPE_25GBASE_ACC))
+
 #define I40E_LINK_SPEED_100MB_SHIFT	0x1
 #define I40E_LINK_SPEED_1000MB_SHIFT	0x2
 #define I40E_LINK_SPEED_10GB_SHIFT	0x3
diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c
index 0de9610c1d8d..704695a61645 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_client.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_client.c
@@ -287,6 +287,17 @@ out:
 	return capable;
 }
 
+void i40e_client_update_msix_info(struct i40e_pf *pf)
+{
+	struct i40e_client_instance *cdev = pf->cinst;
+
+	if (!cdev || !cdev->client)
+		return;
+
+	cdev->lan_info.msix_count = pf->num_iwarp_msix;
+	cdev->lan_info.msix_entries = &pf->msix_entries[pf->iwarp_base_vector];
+}
+
 /**
  * i40e_client_add_instance - add a client instance struct to the instance list
  * @pf: pointer to the board struct
@@ -328,9 +339,6 @@ static void i40e_client_add_instance(struct i40e_pf *pf)
 		return;
 	}
 
-	cdev->lan_info.msix_count = pf->num_iwarp_msix;
-	cdev->lan_info.msix_entries = &pf->msix_entries[pf->iwarp_base_vector];
-
 	mac = list_first_entry(&cdev->lan_info.netdev->dev_addrs.list,
 			       struct netdev_hw_addr, list);
 	if (mac)
@@ -340,6 +348,8 @@ static void i40e_client_add_instance(struct i40e_pf *pf)
 
 	cdev->client = registered_client;
 	pf->cinst = cdev;
+
+	i40e_client_update_msix_info(pf);
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index ef5a868aae46..4fa31d87d9d2 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -1208,6 +1208,29 @@ static enum i40e_media_type i40e_get_media_type(struct i40e_hw *hw)
 	return media;
 }
 
+/**
+ * i40e_poll_globr - Poll for Global Reset completion
+ * @hw: pointer to the hardware structure
+ * @retry_limit: how many times to retry before failure
+ **/
+static i40e_status i40e_poll_globr(struct i40e_hw *hw,
+				   u32 retry_limit)
+{
+	u32 cnt, reg = 0;
+
+	for (cnt = 0; cnt < retry_limit; cnt++) {
+		reg = rd32(hw, I40E_GLGEN_RSTAT);
+		if (!(reg & I40E_GLGEN_RSTAT_DEVSTATE_MASK))
+			return 0;
+		msleep(100);
+	}
+
+	hw_dbg(hw, "Global reset failed.\n");
+	hw_dbg(hw, "I40E_GLGEN_RSTAT = 0x%x\n", reg);
+
+	return I40E_ERR_RESET_FAILED;
+}
+
 #define I40E_PF_RESET_WAIT_COUNT_A0	200
 #define I40E_PF_RESET_WAIT_COUNT	200
 /**
@@ -1284,14 +1307,14 @@ i40e_status i40e_pf_reset(struct i40e_hw *hw)
 			if (!(reg & I40E_PFGEN_CTRL_PFSWR_MASK))
 				break;
 			reg2 = rd32(hw, I40E_GLGEN_RSTAT);
-			if (reg2 & I40E_GLGEN_RSTAT_DEVSTATE_MASK) {
-				hw_dbg(hw, "Core reset upcoming. Skipping PF reset request.\n");
-				hw_dbg(hw, "I40E_GLGEN_RSTAT = 0x%x\n", reg2);
-				return I40E_ERR_NOT_READY;
-			}
+			if (reg2 & I40E_GLGEN_RSTAT_DEVSTATE_MASK)
+				break;
 			usleep_range(1000, 2000);
 		}
-		if (reg & I40E_PFGEN_CTRL_PFSWR_MASK) {
+		if (reg2 & I40E_GLGEN_RSTAT_DEVSTATE_MASK) {
+			if (i40e_poll_globr(hw, grst_del))
+				return I40E_ERR_RESET_FAILED;
+		} else if (reg & I40E_PFGEN_CTRL_PFSWR_MASK) {
 			hw_dbg(hw, "PF reset polling failed to complete.\n");
 			return I40E_ERR_RESET_FAILED;
 		}
@@ -2415,6 +2438,7 @@ i40e_status i40e_aq_get_switch_config(struct i40e_hw *hw,
  * i40e_aq_set_switch_config
  * @hw: pointer to the hardware structure
  * @flags: bit flag values to set
+ * @mode: cloud filter mode
  * @valid_flags: which bit flags to set
  * @mode: cloud filter mode
  * @cmd_details: pointer to command details structure or NULL
@@ -3200,9 +3224,10 @@ static void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff,
 	u32 valid_functions, num_functions;
 	u32 number, logical_id, phys_id;
 	struct i40e_hw_capabilities *p;
+	u16 id, ocp_cfg_word0;
+	i40e_status status;
 	u8 major_rev;
 	u32 i = 0;
-	u16 id;
 
 	cap = (struct i40e_aqc_list_capabilities_element_resp *) buff;
 
@@ -3389,6 +3414,26 @@ static void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff,
 			hw->num_ports++;
 	}
 
+	/* OCP cards case: if a mezz is removed the Ethernet port is at
+	 * disabled state in PRTGEN_CNF register. Additional NVM read is
+	 * needed in order to check if we are dealing with OCP card.
+	 * Those cards have 4 PFs at minimum, so using PRTGEN_CNF for counting
+	 * physical ports results in wrong partition id calculation and thus
+	 * not supporting WoL.
+	 */
+	if (hw->mac.type == I40E_MAC_X722) {
+		if (!i40e_acquire_nvm(hw, I40E_RESOURCE_READ)) {
+			status = i40e_aq_read_nvm(hw, I40E_SR_EMP_MODULE_PTR,
+						  2 * I40E_SR_OCP_CFG_WORD0,
+						  sizeof(ocp_cfg_word0),
+						  &ocp_cfg_word0, true, NULL);
+			if (!status &&
+			    (ocp_cfg_word0 & I40E_SR_OCP_ENABLED))
+				hw->num_ports = 4;
+			i40e_release_nvm(hw);
+		}
+	}
+
 	valid_functions = p->valid_functions;
 	num_functions = 0;
 	while (valid_functions) {
@@ -5531,7 +5576,7 @@ i40e_aq_add_cloud_filters(struct i40e_hw *hw, u16 seid,
  * function.
  *
  **/
-i40e_status
+enum i40e_status_code
 i40e_aq_add_cloud_filters_bb(struct i40e_hw *hw, u16 seid,
 			     struct i40e_aqc_cloud_filters_element_bb *filters,
 			     u8 filter_count)
@@ -5625,7 +5670,7 @@ i40e_aq_rem_cloud_filters(struct i40e_hw *hw, u16 seid,
  * function.
  *
  **/
-i40e_status
+enum i40e_status_code
 i40e_aq_rem_cloud_filters_bb(struct i40e_hw *hw, u16 seid,
 			     struct i40e_aqc_cloud_filters_element_bb *filters,
 			     u8 filter_count)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index 4c3b4243cf65..b829fd365693 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -155,8 +155,8 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
 		dev_info(&pf->pdev->dev, "        vlan_features = 0x%08lx\n",
 			 (unsigned long int)nd->vlan_features);
 	}
-	dev_info(&pf->pdev->dev,
-		 "    vlgrp: & = %p\n", vsi->active_vlans);
+	dev_info(&pf->pdev->dev, "    active_vlans is %s\n",
+		 vsi->active_vlans ? "<valid>" : "<null>");
 	dev_info(&pf->pdev->dev,
 		 "    flags = 0x%08lx, netdev_registered = %i, current_netdev_flags = 0x%04x\n",
 		 vsi->flags, vsi->netdev_registered, vsi->current_netdev_flags);
@@ -270,14 +270,6 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
 			continue;
 
 		dev_info(&pf->pdev->dev,
-			 "    rx_rings[%i]: desc = %p\n",
-			 i, rx_ring->desc);
-		dev_info(&pf->pdev->dev,
-			 "    rx_rings[%i]: dev = %p, netdev = %p, rx_bi = %p\n",
-			 i, rx_ring->dev,
-			 rx_ring->netdev,
-			 rx_ring->rx_bi);
-		dev_info(&pf->pdev->dev,
 			 "    rx_rings[%i]: state = %lu, queue_index = %d, reg_idx = %d\n",
 			 i, *rx_ring->state,
 			 rx_ring->queue_index,
@@ -307,17 +299,12 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
 			 rx_ring->rx_stats.realloc_count,
 			 rx_ring->rx_stats.page_reuse_count);
 		dev_info(&pf->pdev->dev,
-			 "    rx_rings[%i]: size = %i, dma = 0x%08lx\n",
-			 i, rx_ring->size,
-			 (unsigned long int)rx_ring->dma);
-		dev_info(&pf->pdev->dev,
-			 "    rx_rings[%i]: vsi = %p, q_vector = %p\n",
-			 i, rx_ring->vsi,
-			 rx_ring->q_vector);
+			 "    rx_rings[%i]: size = %i\n",
+			 i, rx_ring->size);
 		dev_info(&pf->pdev->dev,
-			 "    rx_rings[%i]: rx_itr_setting = %d (%s)\n",
-			 i, rx_ring->rx_itr_setting,
-			 ITR_IS_DYNAMIC(rx_ring->rx_itr_setting) ? "dynamic" : "fixed");
+			 "    rx_rings[%i]: itr_setting = %d (%s)\n",
+			 i, rx_ring->itr_setting,
+			 ITR_IS_DYNAMIC(rx_ring->itr_setting) ? "dynamic" : "fixed");
 	}
 	for (i = 0; i < vsi->num_queue_pairs; i++) {
 		struct i40e_ring *tx_ring = READ_ONCE(vsi->tx_rings[i]);
@@ -326,14 +313,6 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
 			continue;
 
 		dev_info(&pf->pdev->dev,
-			 "    tx_rings[%i]: desc = %p\n",
-			 i, tx_ring->desc);
-		dev_info(&pf->pdev->dev,
-			 "    tx_rings[%i]: dev = %p, netdev = %p, tx_bi = %p\n",
-			 i, tx_ring->dev,
-			 tx_ring->netdev,
-			 tx_ring->tx_bi);
-		dev_info(&pf->pdev->dev,
 			 "    tx_rings[%i]: state = %lu, queue_index = %d, reg_idx = %d\n",
 			 i, *tx_ring->state,
 			 tx_ring->queue_index,
@@ -355,20 +334,15 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
 			 tx_ring->tx_stats.tx_busy,
 			 tx_ring->tx_stats.tx_done_old);
 		dev_info(&pf->pdev->dev,
-			 "    tx_rings[%i]: size = %i, dma = 0x%08lx\n",
-			 i, tx_ring->size,
-			 (unsigned long int)tx_ring->dma);
-		dev_info(&pf->pdev->dev,
-			 "    tx_rings[%i]: vsi = %p, q_vector = %p\n",
-			 i, tx_ring->vsi,
-			 tx_ring->q_vector);
+			 "    tx_rings[%i]: size = %i\n",
+			 i, tx_ring->size);
 		dev_info(&pf->pdev->dev,
 			 "    tx_rings[%i]: DCB tc = %d\n",
 			 i, tx_ring->dcb_tc);
 		dev_info(&pf->pdev->dev,
-			 "    tx_rings[%i]: tx_itr_setting = %d (%s)\n",
-			 i, tx_ring->tx_itr_setting,
-			 ITR_IS_DYNAMIC(tx_ring->tx_itr_setting) ? "dynamic" : "fixed");
+			 "    tx_rings[%i]: itr_setting = %d (%s)\n",
+			 i, tx_ring->itr_setting,
+			 ITR_IS_DYNAMIC(tx_ring->itr_setting) ? "dynamic" : "fixed");
 	}
 	rcu_read_unlock();
 	dev_info(&pf->pdev->dev,
@@ -466,8 +440,6 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
 		 vsi->info.resp_reserved[6], vsi->info.resp_reserved[7],
 		 vsi->info.resp_reserved[8], vsi->info.resp_reserved[9],
 		 vsi->info.resp_reserved[10], vsi->info.resp_reserved[11]);
-	if (vsi->back)
-		dev_info(&pf->pdev->dev, "    PF = %p\n", vsi->back);
 	dev_info(&pf->pdev->dev, "    idx = %d\n", vsi->idx);
 	dev_info(&pf->pdev->dev,
 		 "    tc_config: numtc = %d, enabled_tc = 0x%x\n",
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 2f5bee713fef..0c7e7de595d3 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -230,6 +230,8 @@ static const struct i40e_priv_flags i40e_gstrings_priv_flags[] = {
 	I40E_PRIV_FLAG("flow-director-atr", I40E_FLAG_FD_ATR_ENABLED, 0),
 	I40E_PRIV_FLAG("veb-stats", I40E_FLAG_VEB_STATS_ENABLED, 0),
 	I40E_PRIV_FLAG("hw-atr-eviction", I40E_FLAG_HW_ATR_EVICT_ENABLED, 0),
+	I40E_PRIV_FLAG("link-down-on-close",
+		       I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED, 0),
 	I40E_PRIV_FLAG("legacy-rx", I40E_FLAG_LEGACY_RX, 0),
 	I40E_PRIV_FLAG("disable-source-pruning",
 		       I40E_FLAG_SOURCE_PRUNING_DISABLED, 0),
@@ -857,7 +859,9 @@ static int i40e_set_link_ksettings(struct net_device *netdev,
 	if (hw->device_id == I40E_DEV_ID_KX_B ||
 	    hw->device_id == I40E_DEV_ID_KX_C ||
 	    hw->device_id == I40E_DEV_ID_20G_KR2 ||
-	    hw->device_id == I40E_DEV_ID_20G_KR2_A) {
+	    hw->device_id == I40E_DEV_ID_20G_KR2_A ||
+	    hw->device_id == I40E_DEV_ID_25G_B ||
+	    hw->device_id == I40E_DEV_ID_KX_X722) {
 		netdev_info(netdev, "Changing settings is not supported on backplane.\n");
 		return -EOPNOTSUPP;
 	}
@@ -868,23 +872,21 @@ static int i40e_set_link_ksettings(struct net_device *netdev,
 	/* save autoneg out of ksettings */
 	autoneg = copy_ks.base.autoneg;
 
-	memset(&safe_ks, 0, sizeof(safe_ks));
+	/* get our own copy of the bits to check against */
+	memset(&safe_ks, 0, sizeof(struct ethtool_link_ksettings));
+	safe_ks.base.cmd = copy_ks.base.cmd;
+	safe_ks.base.link_mode_masks_nwords =
+		copy_ks.base.link_mode_masks_nwords;
+	i40e_get_link_ksettings(netdev, &safe_ks);
+
 	/* Get link modes supported by hardware and check against modes
 	 * requested by the user.  Return an error if unsupported mode was set.
 	 */
-	i40e_phy_type_to_ethtool(pf, &safe_ks);
 	if (!bitmap_subset(copy_ks.link_modes.advertising,
 			   safe_ks.link_modes.supported,
 			   __ETHTOOL_LINK_MODE_MASK_NBITS))
 		return -EINVAL;
 
-	/* get our own copy of the bits to check against */
-	memset(&safe_ks, 0, sizeof(struct ethtool_link_ksettings));
-	safe_ks.base.cmd = copy_ks.base.cmd;
-	safe_ks.base.link_mode_masks_nwords =
-		copy_ks.base.link_mode_masks_nwords;
-	i40e_get_link_ksettings(netdev, &safe_ks);
-
 	/* set autoneg back to what it currently is */
 	copy_ks.base.autoneg = safe_ks.base.autoneg;
 
@@ -2244,14 +2246,14 @@ static int __i40e_get_coalesce(struct net_device *netdev,
 	rx_ring = vsi->rx_rings[queue];
 	tx_ring = vsi->tx_rings[queue];
 
-	if (ITR_IS_DYNAMIC(rx_ring->rx_itr_setting))
+	if (ITR_IS_DYNAMIC(rx_ring->itr_setting))
 		ec->use_adaptive_rx_coalesce = 1;
 
-	if (ITR_IS_DYNAMIC(tx_ring->tx_itr_setting))
+	if (ITR_IS_DYNAMIC(tx_ring->itr_setting))
 		ec->use_adaptive_tx_coalesce = 1;
 
-	ec->rx_coalesce_usecs = rx_ring->rx_itr_setting & ~I40E_ITR_DYNAMIC;
-	ec->tx_coalesce_usecs = tx_ring->tx_itr_setting & ~I40E_ITR_DYNAMIC;
+	ec->rx_coalesce_usecs = rx_ring->itr_setting & ~I40E_ITR_DYNAMIC;
+	ec->tx_coalesce_usecs = tx_ring->itr_setting & ~I40E_ITR_DYNAMIC;
 
 	/* we use the _usecs_high to store/set the interrupt rate limit
 	 * that the hardware supports, that almost but not quite
@@ -2311,34 +2313,35 @@ static void i40e_set_itr_per_queue(struct i40e_vsi *vsi,
 	struct i40e_pf *pf = vsi->back;
 	struct i40e_hw *hw = &pf->hw;
 	struct i40e_q_vector *q_vector;
-	u16 vector, intrl;
+	u16 intrl;
 
 	intrl = i40e_intrl_usec_to_reg(vsi->int_rate_limit);
 
-	rx_ring->rx_itr_setting = ec->rx_coalesce_usecs;
-	tx_ring->tx_itr_setting = ec->tx_coalesce_usecs;
+	rx_ring->itr_setting = ITR_REG_ALIGN(ec->rx_coalesce_usecs);
+	tx_ring->itr_setting = ITR_REG_ALIGN(ec->tx_coalesce_usecs);
 
 	if (ec->use_adaptive_rx_coalesce)
-		rx_ring->rx_itr_setting |= I40E_ITR_DYNAMIC;
+		rx_ring->itr_setting |= I40E_ITR_DYNAMIC;
 	else
-		rx_ring->rx_itr_setting &= ~I40E_ITR_DYNAMIC;
+		rx_ring->itr_setting &= ~I40E_ITR_DYNAMIC;
 
 	if (ec->use_adaptive_tx_coalesce)
-		tx_ring->tx_itr_setting |= I40E_ITR_DYNAMIC;
+		tx_ring->itr_setting |= I40E_ITR_DYNAMIC;
 	else
-		tx_ring->tx_itr_setting &= ~I40E_ITR_DYNAMIC;
+		tx_ring->itr_setting &= ~I40E_ITR_DYNAMIC;
 
 	q_vector = rx_ring->q_vector;
-	q_vector->rx.itr = ITR_TO_REG(rx_ring->rx_itr_setting);
-	vector = vsi->base_vector + q_vector->v_idx;
-	wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1), q_vector->rx.itr);
+	q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting);
 
 	q_vector = tx_ring->q_vector;
-	q_vector->tx.itr = ITR_TO_REG(tx_ring->tx_itr_setting);
-	vector = vsi->base_vector + q_vector->v_idx;
-	wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1), q_vector->tx.itr);
+	q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting);
 
-	wr32(hw, I40E_PFINT_RATEN(vector - 1), intrl);
+	/* The interrupt handler itself will take care of programming
+	 * the Tx and Rx ITR values based on the values we have entered
+	 * into the q_vector, no need to write the values now.
+	 */
+
+	wr32(hw, I40E_PFINT_RATEN(q_vector->reg_idx), intrl);
 	i40e_flush(hw);
 }
 
@@ -2364,11 +2367,11 @@ static int __i40e_set_coalesce(struct net_device *netdev,
 		vsi->work_limit = ec->tx_max_coalesced_frames_irq;
 
 	if (queue < 0) {
-		cur_rx_itr = vsi->rx_rings[0]->rx_itr_setting;
-		cur_tx_itr = vsi->tx_rings[0]->tx_itr_setting;
+		cur_rx_itr = vsi->rx_rings[0]->itr_setting;
+		cur_tx_itr = vsi->tx_rings[0]->itr_setting;
 	} else if (queue < vsi->num_queue_pairs) {
-		cur_rx_itr = vsi->rx_rings[queue]->rx_itr_setting;
-		cur_tx_itr = vsi->tx_rings[queue]->tx_itr_setting;
+		cur_rx_itr = vsi->rx_rings[queue]->itr_setting;
+		cur_tx_itr = vsi->tx_rings[queue]->itr_setting;
 	} else {
 		netif_info(pf, drv, netdev, "Invalid queue value, queue range is 0 - %d\n",
 			   vsi->num_queue_pairs - 1);
@@ -2396,7 +2399,7 @@ static int __i40e_set_coalesce(struct net_device *netdev,
 		return -EINVAL;
 	}
 
-	if (ec->rx_coalesce_usecs > (I40E_MAX_ITR << 1)) {
+	if (ec->rx_coalesce_usecs > I40E_MAX_ITR) {
 		netif_info(pf, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n");
 		return -EINVAL;
 	}
@@ -2407,16 +2410,16 @@ static int __i40e_set_coalesce(struct net_device *netdev,
 		return -EINVAL;
 	}
 
-	if (ec->tx_coalesce_usecs > (I40E_MAX_ITR << 1)) {
+	if (ec->tx_coalesce_usecs > I40E_MAX_ITR) {
 		netif_info(pf, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n");
 		return -EINVAL;
 	}
 
 	if (ec->use_adaptive_rx_coalesce && !cur_rx_itr)
-		ec->rx_coalesce_usecs = I40E_MIN_ITR << 1;
+		ec->rx_coalesce_usecs = I40E_MIN_ITR;
 
 	if (ec->use_adaptive_tx_coalesce && !cur_tx_itr)
-		ec->tx_coalesce_usecs = I40E_MIN_ITR << 1;
+		ec->tx_coalesce_usecs = I40E_MIN_ITR;
 
 	intrl_reg = i40e_intrl_usec_to_reg(ec->rx_coalesce_usecs_high);
 	vsi->int_rate_limit = INTRL_REG_TO_USEC(intrl_reg);
@@ -4406,6 +4409,8 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
 	}
 
 flags_complete:
+	changed_flags = orig_flags ^ new_flags;
+
 	/* Before we finalize any flag changes, we need to perform some
 	 * checks to ensure that the changes are supported and safe.
 	 */
@@ -4415,21 +4420,17 @@ flags_complete:
 	    !(pf->hw_features & I40E_HW_ATR_EVICT_CAPABLE))
 		return -EOPNOTSUPP;
 
-	/* Disable FW LLDP not supported if NPAR active or if FW
-	 * API version < 1.7
+	/* If the driver detected FW LLDP was disabled on init, this flag could
+	 * be set, however we do not support _changing_ the flag if NPAR is
+	 * enabled or FW API version < 1.7.  There are situations where older
+	 * FW versions/NPAR enabled PFs could disable LLDP, however we _must_
+	 * not allow the user to enable/disable LLDP with this flag on
+	 * unsupported FW versions.
 	 */
-	if (new_flags & I40E_FLAG_DISABLE_FW_LLDP) {
-		if (pf->hw.func_caps.npar_enable) {
-			dev_warn(&pf->pdev->dev,
-				 "Unable to stop FW LLDP if NPAR active\n");
-			return -EOPNOTSUPP;
-		}
-
-		if (pf->hw.aq.api_maj_ver < 1 ||
-		    (pf->hw.aq.api_maj_ver == 1 &&
-		     pf->hw.aq.api_min_ver < 7)) {
+	if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP) {
+		if (!(pf->hw_features & I40E_HW_STOPPABLE_FW_LLDP)) {
 			dev_warn(&pf->pdev->dev,
-				 "FW ver does not support stopping FW LLDP\n");
+				 "Device does not support changing FW LLDP\n");
 			return -EOPNOTSUPP;
 		}
 	}
@@ -4439,6 +4440,10 @@ flags_complete:
 	 * something else has modified the flags variable since we copied it
 	 * originally. We'll just punt with an error and log something in the
 	 * message buffer.
+	 *
+	 * This is the point of no return for this function.  We need to have
+	 * checked any discrepancies or misconfigurations and returned
+	 * EOPNOTSUPP before updating pf->flags here.
 	 */
 	if (cmpxchg64(&pf->flags, orig_flags, new_flags) != orig_flags) {
 		dev_warn(&pf->pdev->dev,
@@ -4446,8 +4451,6 @@ flags_complete:
 		return -EAGAIN;
 	}
 
-	changed_flags = orig_flags ^ new_flags;
-
 	/* Process any additional changes needed as a result of flag changes.
 	 * The changed_flags value reflects the list of bits that were
 	 * changed in the code above.
@@ -4479,6 +4482,12 @@ flags_complete:
 		}
 	}
 
+	if ((changed_flags & pf->flags &
+	     I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED) &&
+	    (pf->flags & I40E_FLAG_MFP_ENABLED))
+		dev_warn(&pf->pdev->dev,
+			 "Turning on link-down-on-close flag may affect other partitions\n");
+
 	if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP) {
 		if (pf->flags & I40E_FLAG_DISABLE_FW_LLDP) {
 			struct i40e_dcbx_config *dcbcfg;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c b/drivers/net/ethernet/intel/i40e/i40e_fcoe.c
deleted file mode 100644
index 2d1253c5b7a1..000000000000
--- a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c
+++ /dev/null
@@ -1,1571 +0,0 @@
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
-
-#include <linux/if_ether.h>
-#include <scsi/scsi_cmnd.h>
-#include <scsi/scsi_device.h>
-#include <scsi/fc/fc_fs.h>
-#include <scsi/fc/fc_fip.h>
-#include <scsi/fc/fc_fcoe.h>
-#include <scsi/libfc.h>
-#include <scsi/libfcoe.h>
-#include <uapi/linux/dcbnl.h>
-
-#include "i40e.h"
-#include "i40e_fcoe.h"
-
-/**
- * i40e_fcoe_sof_is_class2 - returns true if this is a FC Class 2 SOF
- * @sof: the FCoE start of frame delimiter
- **/
-static inline bool i40e_fcoe_sof_is_class2(u8 sof)
-{
-	return (sof == FC_SOF_I2) || (sof == FC_SOF_N2);
-}
-
-/**
- * i40e_fcoe_sof_is_class3 - returns true if this is a FC Class 3 SOF
- * @sof: the FCoE start of frame delimiter
- **/
-static inline bool i40e_fcoe_sof_is_class3(u8 sof)
-{
-	return (sof == FC_SOF_I3) || (sof == FC_SOF_N3);
-}
-
-/**
- * i40e_fcoe_sof_is_supported - returns true if the FC SOF is supported by HW
- * @sof: the input SOF value from the frame
- **/
-static inline bool i40e_fcoe_sof_is_supported(u8 sof)
-{
-	return i40e_fcoe_sof_is_class2(sof) ||
-	       i40e_fcoe_sof_is_class3(sof);
-}
-
-/**
- * i40e_fcoe_fc_sof - pull the SOF from FCoE header in the frame
- * @skb: the frame whose EOF is to be pulled from
- **/
-static inline int i40e_fcoe_fc_sof(struct sk_buff *skb, u8 *sof)
-{
-	*sof = ((struct fcoe_hdr *)skb_network_header(skb))->fcoe_sof;
-
-	if (!i40e_fcoe_sof_is_supported(*sof))
-		return -EINVAL;
-	return 0;
-}
-
-/**
- * i40e_fcoe_eof_is_supported - returns true if the EOF is supported by HW
- * @eof:     the input EOF value from the frame
- **/
-static inline bool i40e_fcoe_eof_is_supported(u8 eof)
-{
-	return (eof == FC_EOF_N) || (eof == FC_EOF_T) ||
-	       (eof == FC_EOF_NI) || (eof == FC_EOF_A);
-}
-
-/**
- * i40e_fcoe_fc_eof - pull EOF from FCoE trailer in the frame
- * @skb: the frame whose EOF is to be pulled from
- **/
-static inline int i40e_fcoe_fc_eof(struct sk_buff *skb, u8 *eof)
-{
-	/* the first byte of the last dword is EOF */
-	skb_copy_bits(skb, skb->len - 4, eof, 1);
-
-	if (!i40e_fcoe_eof_is_supported(*eof))
-		return -EINVAL;
-	return 0;
-}
-
-/**
- * i40e_fcoe_ctxt_eof - convert input FC EOF for descriptor programming
- * @eof: the input eof value from the frame
- *
- * The FC EOF is converted to the value understood by HW for descriptor
- * programming. Never call this w/o calling i40e_fcoe_eof_is_supported()
- * first and that already checks for all supported valid eof values.
- **/
-static inline u32 i40e_fcoe_ctxt_eof(u8 eof)
-{
-	switch (eof) {
-	case FC_EOF_N:
-		return I40E_TX_DESC_CMD_L4T_EOFT_EOF_N;
-	case FC_EOF_T:
-		return I40E_TX_DESC_CMD_L4T_EOFT_EOF_T;
-	case FC_EOF_NI:
-		return I40E_TX_DESC_CMD_L4T_EOFT_EOF_NI;
-	case FC_EOF_A:
-		return I40E_TX_DESC_CMD_L4T_EOFT_EOF_A;
-	default:
-		/* Supported valid eof shall be already checked by
-		 * calling i40e_fcoe_eof_is_supported() first,
-		 * therefore this default case shall never hit.
-		 */
-		WARN_ON(1);
-		return -EINVAL;
-	}
-}
-
-/**
- * i40e_fcoe_xid_is_valid - returns true if the exchange id is valid
- * @xid: the exchange id
- **/
-static inline bool i40e_fcoe_xid_is_valid(u16 xid)
-{
-	return (xid != FC_XID_UNKNOWN) && (xid < I40E_FCOE_DDP_MAX);
-}
-
-/**
- * i40e_fcoe_ddp_unmap - unmap the mapped sglist associated
- * @pf: pointer to PF
- * @ddp: sw DDP context
- *
- * Unmap the scatter-gather list associated with the given SW DDP context
- *
- * Returns: data length already ddp-ed in bytes
- *
- **/
-static inline void i40e_fcoe_ddp_unmap(struct i40e_pf *pf,
-				       struct i40e_fcoe_ddp *ddp)
-{
-	if (test_and_set_bit(__I40E_FCOE_DDP_UNMAPPED, &ddp->flags))
-		return;
-
-	if (ddp->sgl) {
-		dma_unmap_sg(&pf->pdev->dev, ddp->sgl, ddp->sgc,
-			     DMA_FROM_DEVICE);
-		ddp->sgl = NULL;
-		ddp->sgc = 0;
-	}
-
-	if (ddp->pool) {
-		dma_pool_free(ddp->pool, ddp->udl, ddp->udp);
-		ddp->pool = NULL;
-	}
-}
-
-/**
- * i40e_fcoe_ddp_clear - clear the given SW DDP context
- * @ddp - SW DDP context
- **/
-static inline void i40e_fcoe_ddp_clear(struct i40e_fcoe_ddp *ddp)
-{
-	memset(ddp, 0, sizeof(struct i40e_fcoe_ddp));
-	ddp->xid = FC_XID_UNKNOWN;
-	ddp->flags = __I40E_FCOE_DDP_NONE;
-}
-
-/**
- * i40e_fcoe_progid_is_fcoe - check if the prog_id is for FCoE
- * @id: the prog id for the programming status Rx descriptor write-back
- **/
-static inline bool i40e_fcoe_progid_is_fcoe(u8 id)
-{
-	return (id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_PROG_STATUS) ||
-	       (id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_INVL_STATUS);
-}
-
-/**
- * i40e_fcoe_fc_get_xid - get xid from the frame header
- * @fh: the fc frame header
- *
- * In case the incoming frame's exchange is originated from
- * the initiator, then received frame's exchange id is ANDed
- * with fc_cpu_mask bits to get the same cpu on which exchange
- * was originated, otherwise just use the current cpu.
- *
- * Returns ox_id if exchange originator, rx_id if responder
- **/
-static inline u16 i40e_fcoe_fc_get_xid(struct fc_frame_header *fh)
-{
-	u32 f_ctl = ntoh24(fh->fh_f_ctl);
-
-	return (f_ctl & FC_FC_EX_CTX) ?
-		be16_to_cpu(fh->fh_ox_id) :
-		be16_to_cpu(fh->fh_rx_id);
-}
-
-/**
- * i40e_fcoe_fc_frame_header - get fc frame header from skb
- * @skb: packet
- *
- * This checks if there is a VLAN header and returns the data
- * pointer to the start of the fc_frame_header.
- *
- * Returns pointer to the fc_frame_header
- **/
-static inline struct fc_frame_header *i40e_fcoe_fc_frame_header(
-	struct sk_buff *skb)
-{
-	void *fh = skb->data + sizeof(struct fcoe_hdr);
-
-	if (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q))
-		fh += sizeof(struct vlan_hdr);
-
-	return (struct fc_frame_header *)fh;
-}
-
-/**
- * i40e_fcoe_ddp_put - release the DDP context for a given exchange id
- * @netdev: the corresponding net_device
- * @xid: the exchange id that corresponding DDP context will be released
- *
- * This is the implementation of net_device_ops.ndo_fcoe_ddp_done
- * and it is expected to be called by ULD, i.e., FCP layer of libfc
- * to release the corresponding ddp context when the I/O is done.
- *
- * Returns : data length already ddp-ed in bytes
- **/
-static int i40e_fcoe_ddp_put(struct net_device *netdev, u16 xid)
-{
-	struct i40e_netdev_priv *np = netdev_priv(netdev);
-	struct i40e_pf *pf = np->vsi->back;
-	struct i40e_fcoe *fcoe = &pf->fcoe;
-	int len = 0;
-	struct i40e_fcoe_ddp *ddp = &fcoe->ddp[xid];
-
-	if (!fcoe || !ddp)
-		goto out;
-
-	if (test_bit(__I40E_FCOE_DDP_DONE, &ddp->flags))
-		len = ddp->len;
-	i40e_fcoe_ddp_unmap(pf, ddp);
-out:
-	return len;
-}
-
-/**
- * i40e_fcoe_sw_init - sets up the HW for FCoE
- * @pf: pointer to PF
- **/
-void i40e_init_pf_fcoe(struct i40e_pf *pf)
-{
-	struct i40e_hw *hw = &pf->hw;
-	u32 val;
-
-	pf->flags &= ~I40E_FLAG_FCOE_ENABLED;
-	pf->num_fcoe_qps = 0;
-	pf->fcoe_hmc_cntx_num = 0;
-	pf->fcoe_hmc_filt_num = 0;
-
-	if (!pf->hw.func_caps.fcoe) {
-		dev_dbg(&pf->pdev->dev, "FCoE capability is disabled\n");
-		return;
-	}
-
-	if (!pf->hw.func_caps.dcb) {
-		dev_warn(&pf->pdev->dev,
-			 "Hardware is not DCB capable not enabling FCoE.\n");
-		return;
-	}
-
-	/* enable FCoE hash filter */
-	val = i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1));
-	val |= BIT(I40E_FILTER_PCTYPE_FCOE_OX - 32);
-	val |= BIT(I40E_FILTER_PCTYPE_FCOE_RX - 32);
-	val &= I40E_PFQF_HENA_PTYPE_ENA_MASK;
-	i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), val);
-
-	/* enable flag */
-	pf->flags |= I40E_FLAG_FCOE_ENABLED;
-	pf->num_fcoe_qps = I40E_DEFAULT_FCOE;
-
-	/* Reserve 4K DDP contexts and 20K filter size for FCoE */
-	pf->fcoe_hmc_cntx_num = BIT(I40E_DMA_CNTX_SIZE_4K) *
-				I40E_DMA_CNTX_BASE_SIZE;
-	pf->fcoe_hmc_filt_num = pf->fcoe_hmc_cntx_num +
-				BIT(I40E_HASH_FILTER_SIZE_16K) *
-				I40E_HASH_FILTER_BASE_SIZE;
-
-	/* FCoE object: max 16K filter buckets and 4K DMA contexts */
-	pf->filter_settings.fcoe_filt_num = I40E_HASH_FILTER_SIZE_16K;
-	pf->filter_settings.fcoe_cntx_num = I40E_DMA_CNTX_SIZE_4K;
-
-	/* Setup max frame with FCoE_MTU plus L2 overheads */
-	val = i40e_read_rx_ctl(hw, I40E_GLFCOE_RCTL);
-	val &= ~I40E_GLFCOE_RCTL_MAX_SIZE_MASK;
-	val |= ((FCOE_MTU + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)
-		 << I40E_GLFCOE_RCTL_MAX_SIZE_SHIFT);
-	i40e_write_rx_ctl(hw, I40E_GLFCOE_RCTL, val);
-
-	dev_info(&pf->pdev->dev, "FCoE is supported.\n");
-}
-
-/**
- * i40e_get_fcoe_tc_map - Return TC map for FCoE APP
- * @pf: pointer to PF
- *
- **/
-u8 i40e_get_fcoe_tc_map(struct i40e_pf *pf)
-{
-	struct i40e_dcb_app_priority_table app;
-	struct i40e_hw *hw = &pf->hw;
-	u8 enabled_tc = 0;
-	u8 tc, i;
-	/* Get the FCoE APP TLV */
-	struct i40e_dcbx_config *dcbcfg = &hw->local_dcbx_config;
-
-	for (i = 0; i < dcbcfg->numapps; i++) {
-		app = dcbcfg->app[i];
-		if (app.selector == IEEE_8021QAZ_APP_SEL_ETHERTYPE &&
-		    app.protocolid == ETH_P_FCOE) {
-			tc = dcbcfg->etscfg.prioritytable[app.priority];
-			enabled_tc |= BIT(tc);
-			break;
-		}
-	}
-
-	/* TC0 if there is no TC defined for FCoE APP TLV */
-	enabled_tc = enabled_tc ? enabled_tc : 0x1;
-
-	return enabled_tc;
-}
-
-/**
- * i40e_fcoe_vsi_init - prepares the VSI context for creating a FCoE VSI
- * @vsi: pointer to the associated VSI struct
- * @ctxt: pointer to the associated VSI context to be passed to HW
- *
- * Returns 0 on success or < 0 on error
- **/
-int i40e_fcoe_vsi_init(struct i40e_vsi *vsi, struct i40e_vsi_context *ctxt)
-{
-	struct i40e_aqc_vsi_properties_data *info = &ctxt->info;
-	struct i40e_pf *pf = vsi->back;
-	struct i40e_hw *hw = &pf->hw;
-	u8 enabled_tc = 0;
-
-	if (!(pf->flags & I40E_FLAG_FCOE_ENABLED)) {
-		dev_err(&pf->pdev->dev,
-			"FCoE is not enabled for this device\n");
-		return -EPERM;
-	}
-
-	/* initialize the hardware for FCoE */
-	ctxt->pf_num = hw->pf_id;
-	ctxt->vf_num = 0;
-	ctxt->uplink_seid = vsi->uplink_seid;
-	ctxt->connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL;
-	ctxt->flags = I40E_AQ_VSI_TYPE_PF;
-
-	/* FCoE VSI would need the following sections */
-	info->valid_sections |= cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID);
-
-	/* FCoE VSI does not need these sections */
-	info->valid_sections &= cpu_to_le16(~(I40E_AQ_VSI_PROP_SECURITY_VALID |
-					    I40E_AQ_VSI_PROP_VLAN_VALID |
-					    I40E_AQ_VSI_PROP_CAS_PV_VALID |
-					    I40E_AQ_VSI_PROP_INGRESS_UP_VALID |
-					    I40E_AQ_VSI_PROP_EGRESS_UP_VALID));
-
-	if (i40e_is_vsi_uplink_mode_veb(vsi)) {
-		info->valid_sections |=
-				cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
-		info->switch_id =
-				cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
-	}
-	enabled_tc = i40e_get_fcoe_tc_map(pf);
-	i40e_vsi_setup_queue_map(vsi, ctxt, enabled_tc, true);
-
-	/* set up queue option section: only enable FCoE */
-	info->queueing_opt_flags = I40E_AQ_VSI_QUE_OPT_FCOE_ENA;
-
-	return 0;
-}
-
-/**
- * i40e_fcoe_enable - this is the implementation of ndo_fcoe_enable,
- * indicating the upper FCoE protocol stack is ready to use FCoE
- * offload features.
- *
- * @netdev: pointer to the netdev that FCoE is created on
- *
- * Returns 0 on success
- *
- * in RTNL
- *
- **/
-int i40e_fcoe_enable(struct net_device *netdev)
-{
-	struct i40e_netdev_priv *np = netdev_priv(netdev);
-	struct i40e_vsi *vsi = np->vsi;
-	struct i40e_pf *pf = vsi->back;
-	struct i40e_fcoe *fcoe = &pf->fcoe;
-
-	if (!(pf->flags & I40E_FLAG_FCOE_ENABLED)) {
-		netdev_err(netdev, "HW does not support FCoE.\n");
-		return -ENODEV;
-	}
-
-	if (vsi->type != I40E_VSI_FCOE) {
-		netdev_err(netdev, "interface does not support FCoE.\n");
-		return -EBUSY;
-	}
-
-	atomic_inc(&fcoe->refcnt);
-
-	return 0;
-}
-
-/**
- * i40e_fcoe_disable- disables FCoE for upper FCoE protocol stack.
- * @dev: pointer to the netdev that FCoE is created on
- *
- * Returns 0 on success
- *
- **/
-int i40e_fcoe_disable(struct net_device *netdev)
-{
-	struct i40e_netdev_priv *np = netdev_priv(netdev);
-	struct i40e_vsi *vsi = np->vsi;
-	struct i40e_pf *pf = vsi->back;
-	struct i40e_fcoe *fcoe = &pf->fcoe;
-
-	if (!(pf->flags & I40E_FLAG_FCOE_ENABLED)) {
-		netdev_err(netdev, "device does not support FCoE\n");
-		return -ENODEV;
-	}
-	if (vsi->type != I40E_VSI_FCOE)
-		return -EBUSY;
-
-	if (!atomic_dec_and_test(&fcoe->refcnt))
-		return -EINVAL;
-
-	netdev_info(netdev, "FCoE disabled\n");
-
-	return 0;
-}
-
-/**
- * i40e_fcoe_dma_pool_free - free the per cpu pool for FCoE DDP
- * @fcoe: the FCoE sw object
- * @dev: the device that the pool is associated with
- * @cpu: the cpu for this pool
- *
- **/
-static void i40e_fcoe_dma_pool_free(struct i40e_fcoe *fcoe,
-				    struct device *dev,
-				    unsigned int cpu)
-{
-	struct i40e_fcoe_ddp_pool *ddp_pool;
-
-	ddp_pool = per_cpu_ptr(fcoe->ddp_pool, cpu);
-	if (!ddp_pool->pool) {
-		dev_warn(dev, "DDP pool already freed for cpu %d\n", cpu);
-		return;
-	}
-	dma_pool_destroy(ddp_pool->pool);
-	ddp_pool->pool = NULL;
-}
-
-/**
- * i40e_fcoe_dma_pool_create - per cpu pool for FCoE DDP
- * @fcoe: the FCoE sw object
- * @dev: the device that the pool is associated with
- * @cpu: the cpu for this pool
- *
- * Returns 0 on successful or non zero on failure
- *
- **/
-static int i40e_fcoe_dma_pool_create(struct i40e_fcoe *fcoe,
-				     struct device *dev,
-				     unsigned int cpu)
-{
-	struct i40e_fcoe_ddp_pool *ddp_pool;
-	struct dma_pool *pool;
-	char pool_name[32];
-
-	ddp_pool = per_cpu_ptr(fcoe->ddp_pool, cpu);
-	if (ddp_pool && ddp_pool->pool) {
-		dev_warn(dev, "DDP pool already allocated for cpu %d\n", cpu);
-		return 0;
-	}
-	snprintf(pool_name, sizeof(pool_name), "i40e_fcoe_ddp_%d", cpu);
-	pool = dma_pool_create(pool_name, dev, I40E_FCOE_DDP_PTR_MAX,
-			       I40E_FCOE_DDP_PTR_ALIGN, PAGE_SIZE);
-	if (!pool) {
-		dev_err(dev, "dma_pool_create %s failed\n", pool_name);
-		return -ENOMEM;
-	}
-	ddp_pool->pool = pool;
-	return 0;
-}
-
-/**
- * i40e_fcoe_free_ddp_resources - release FCoE DDP resources
- * @vsi: the vsi FCoE is associated with
- *
- **/
-void i40e_fcoe_free_ddp_resources(struct i40e_vsi *vsi)
-{
-	struct i40e_pf *pf = vsi->back;
-	struct i40e_fcoe *fcoe = &pf->fcoe;
-	int cpu, i;
-
-	/* do nothing if not FCoE VSI */
-	if (vsi->type != I40E_VSI_FCOE)
-		return;
-
-	/* do nothing if no DDP pools were allocated */
-	if (!fcoe->ddp_pool)
-		return;
-
-	for (i = 0; i < I40E_FCOE_DDP_MAX; i++)
-		i40e_fcoe_ddp_put(vsi->netdev, i);
-
-	for_each_possible_cpu(cpu)
-		i40e_fcoe_dma_pool_free(fcoe, &pf->pdev->dev, cpu);
-
-	free_percpu(fcoe->ddp_pool);
-	fcoe->ddp_pool = NULL;
-
-	netdev_info(vsi->netdev, "VSI %d,%d FCoE DDP resources released\n",
-		    vsi->id, vsi->seid);
-}
-
-/**
- * i40e_fcoe_setup_ddp_resources - allocate per cpu DDP resources
- * @vsi: the VSI FCoE is associated with
- *
- * Returns 0 on successful or non zero on failure
- *
- **/
-int i40e_fcoe_setup_ddp_resources(struct i40e_vsi *vsi)
-{
-	struct i40e_pf *pf = vsi->back;
-	struct device *dev = &pf->pdev->dev;
-	struct i40e_fcoe *fcoe = &pf->fcoe;
-	unsigned int cpu;
-	int i;
-
-	if (vsi->type != I40E_VSI_FCOE)
-		return -ENODEV;
-
-	/* do nothing if no DDP pools were allocated */
-	if (fcoe->ddp_pool)
-		return -EEXIST;
-
-	/* allocate per CPU memory to track DDP pools */
-	fcoe->ddp_pool = alloc_percpu(struct i40e_fcoe_ddp_pool);
-	if (!fcoe->ddp_pool) {
-		dev_err(&pf->pdev->dev, "failed to allocate percpu DDP\n");
-		return -ENOMEM;
-	}
-
-	/* allocate pci pool for each cpu */
-	for_each_possible_cpu(cpu) {
-		if (!i40e_fcoe_dma_pool_create(fcoe, dev, cpu))
-			continue;
-
-		dev_err(dev, "failed to alloc DDP pool on cpu:%d\n", cpu);
-		i40e_fcoe_free_ddp_resources(vsi);
-		return -ENOMEM;
-	}
-
-	/* initialize the sw context */
-	for (i = 0; i < I40E_FCOE_DDP_MAX; i++)
-		i40e_fcoe_ddp_clear(&fcoe->ddp[i]);
-
-	netdev_info(vsi->netdev, "VSI %d,%d FCoE DDP resources allocated\n",
-		    vsi->id, vsi->seid);
-
-	return 0;
-}
-
-/**
- * i40e_fcoe_handle_status - check the Programming Status for FCoE
- * @rx_ring: the Rx ring for this descriptor
- * @rx_desc: the Rx descriptor for Programming Status, not a packet descriptor.
- *
- * Check if this is the Rx Programming Status descriptor write-back for FCoE.
- * This is used to verify if the context/filter programming or invalidation
- * requested by SW to the HW is successful or not and take actions accordingly.
- **/
-void i40e_fcoe_handle_status(struct i40e_ring *rx_ring,
-			     union i40e_rx_desc *rx_desc, u8 prog_id)
-{
-	struct i40e_pf *pf = rx_ring->vsi->back;
-	struct i40e_fcoe *fcoe = &pf->fcoe;
-	struct i40e_fcoe_ddp *ddp;
-	u32 error;
-	u16 xid;
-	u64 qw;
-
-	/* we only care for FCoE here */
-	if (!i40e_fcoe_progid_is_fcoe(prog_id))
-		return;
-
-	xid = le32_to_cpu(rx_desc->wb.qword0.hi_dword.fcoe_param) &
-	      (I40E_FCOE_DDP_MAX - 1);
-
-	if (!i40e_fcoe_xid_is_valid(xid))
-		return;
-
-	ddp = &fcoe->ddp[xid];
-	WARN_ON(xid != ddp->xid);
-
-	qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
-	error = (qw & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >>
-		I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT;
-
-	/* DDP context programming status: failure or success */
-	if (prog_id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_PROG_STATUS) {
-		if (I40E_RX_PROG_FCOE_ERROR_TBL_FULL(error)) {
-			dev_err(&pf->pdev->dev, "xid %x ddp->xid %x TABLE FULL\n",
-				xid, ddp->xid);
-			ddp->prerr |= I40E_RX_PROG_FCOE_ERROR_TBL_FULL_BIT;
-		}
-		if (I40E_RX_PROG_FCOE_ERROR_CONFLICT(error)) {
-			dev_err(&pf->pdev->dev, "xid %x ddp->xid %x CONFLICT\n",
-				xid, ddp->xid);
-			ddp->prerr |= I40E_RX_PROG_FCOE_ERROR_CONFLICT_BIT;
-		}
-	}
-
-	/* DDP context invalidation status: failure or success */
-	if (prog_id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_INVL_STATUS) {
-		if (I40E_RX_PROG_FCOE_ERROR_INVLFAIL(error)) {
-			dev_err(&pf->pdev->dev, "xid %x ddp->xid %x INVALIDATION FAILURE\n",
-				xid, ddp->xid);
-			ddp->prerr |= I40E_RX_PROG_FCOE_ERROR_INVLFAIL_BIT;
-		}
-		/* clear the flag so we can retry invalidation */
-		clear_bit(__I40E_FCOE_DDP_ABORTED, &ddp->flags);
-	}
-
-	/* unmap DMA */
-	i40e_fcoe_ddp_unmap(pf, ddp);
-	i40e_fcoe_ddp_clear(ddp);
-}
-
-/**
- * i40e_fcoe_handle_offload - check ddp status and mark it done
- * @adapter: i40e adapter
- * @rx_desc: advanced rx descriptor
- * @skb: the skb holding the received data
- *
- * This checks ddp status.
- *
- * Returns : < 0 indicates an error or not a FCOE ddp, 0 indicates
- * not passing the skb to ULD, > 0 indicates is the length of data
- * being ddped.
- *
- **/
-int i40e_fcoe_handle_offload(struct i40e_ring *rx_ring,
-			     union i40e_rx_desc *rx_desc,
-			     struct sk_buff *skb)
-{
-	struct i40e_pf *pf = rx_ring->vsi->back;
-	struct i40e_fcoe *fcoe = &pf->fcoe;
-	struct fc_frame_header *fh = NULL;
-	struct i40e_fcoe_ddp *ddp = NULL;
-	u32 status, fltstat;
-	u32 error, fcerr;
-	int rc = -EINVAL;
-	u16 ptype;
-	u16 xid;
-	u64 qw;
-
-	/* check this rxd is for programming status */
-	qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
-	/* packet descriptor, check packet type */
-	ptype = (qw & I40E_RXD_QW1_PTYPE_MASK) >> I40E_RXD_QW1_PTYPE_SHIFT;
-	if (!i40e_rx_is_fcoe(ptype))
-		goto out_no_ddp;
-
-	error = (qw & I40E_RXD_QW1_ERROR_MASK) >> I40E_RXD_QW1_ERROR_SHIFT;
-	fcerr = (error >> I40E_RX_DESC_ERROR_L3L4E_SHIFT) &
-		 I40E_RX_DESC_FCOE_ERROR_MASK;
-
-	/* check stateless offload error */
-	if (unlikely(fcerr == I40E_RX_DESC_ERROR_L3L4E_PROT)) {
-		dev_err(&pf->pdev->dev, "Protocol Error\n");
-		skb->ip_summed = CHECKSUM_NONE;
-	} else {
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-	}
-
-	/* check hw status on ddp */
-	status = (qw & I40E_RXD_QW1_STATUS_MASK) >> I40E_RXD_QW1_STATUS_SHIFT;
-	fltstat = (status >> I40E_RX_DESC_STATUS_FLTSTAT_SHIFT) &
-		   I40E_RX_DESC_FLTSTAT_FCMASK;
-
-	/* now we are ready to check DDP */
-	fh = i40e_fcoe_fc_frame_header(skb);
-	xid = i40e_fcoe_fc_get_xid(fh);
-	if (!i40e_fcoe_xid_is_valid(xid))
-		goto out_no_ddp;
-
-	/* non DDP normal receive, return to the protocol stack */
-	if (fltstat == I40E_RX_DESC_FLTSTAT_NOMTCH)
-		goto out_no_ddp;
-
-	/* do we have a sw ddp context setup ? */
-	ddp = &fcoe->ddp[xid];
-	if (!ddp->sgl)
-		goto out_no_ddp;
-
-	/* fetch xid from hw rxd wb, which should match up the sw ctxt */
-	xid = le16_to_cpu(rx_desc->wb.qword0.lo_dword.mirr_fcoe.fcoe_ctx_id);
-	if (ddp->xid != xid) {
-		dev_err(&pf->pdev->dev, "xid 0x%x does not match ctx_xid 0x%x\n",
-			ddp->xid, xid);
-		goto out_put_ddp;
-	}
-
-	/* the same exchange has already errored out */
-	if (ddp->fcerr) {
-		dev_err(&pf->pdev->dev, "xid 0x%x fcerr 0x%x reported fcer 0x%x\n",
-			xid, ddp->fcerr, fcerr);
-		goto out_put_ddp;
-	}
-
-	/* fcoe param is valid by now with correct DDPed length */
-	ddp->len = le32_to_cpu(rx_desc->wb.qword0.hi_dword.fcoe_param);
-	ddp->fcerr = fcerr;
-	/* header posting only, useful only for target mode and debugging */
-	if (fltstat == I40E_RX_DESC_FLTSTAT_DDP) {
-		/* For target mode, we get header of the last packet but it
-		 * does not have the FCoE trailer field, i.e., CRC and EOF
-		 * Ordered Set since they are offloaded by the HW, so fill
-		 * it up correspondingly to allow the packet to pass through
-		 * to the upper protocol stack.
-		 */
-		u32 f_ctl = ntoh24(fh->fh_f_ctl);
-
-		if ((f_ctl & FC_FC_END_SEQ) &&
-		    (fh->fh_r_ctl == FC_RCTL_DD_SOL_DATA)) {
-			struct fcoe_crc_eof *crc = NULL;
-
-			crc = skb_put(skb, sizeof(*crc));
-			crc->fcoe_eof = FC_EOF_T;
-		} else {
-			/* otherwise, drop the header only frame */
-			rc = 0;
-			goto out_no_ddp;
-		}
-	}
-
-out_put_ddp:
-	/* either we got RSP or we have an error, unmap DMA in both cases */
-	i40e_fcoe_ddp_unmap(pf, ddp);
-	if (ddp->len && !ddp->fcerr) {
-		int pkts;
-
-		rc = ddp->len;
-		i40e_fcoe_ddp_clear(ddp);
-		ddp->len = rc;
-		pkts = DIV_ROUND_UP(rc, 2048);
-		rx_ring->stats.bytes += rc;
-		rx_ring->stats.packets += pkts;
-		rx_ring->q_vector->rx.total_bytes += rc;
-		rx_ring->q_vector->rx.total_packets += pkts;
-		set_bit(__I40E_FCOE_DDP_DONE, &ddp->flags);
-	}
-
-out_no_ddp:
-	return rc;
-}
-
-/**
- * i40e_fcoe_ddp_setup - called to set up ddp context
- * @netdev: the corresponding net_device
- * @xid: the exchange id requesting ddp
- * @sgl: the scatter-gather list for this request
- * @sgc: the number of scatter-gather items
- * @target_mode: indicates this is a DDP request for target
- *
- * Returns : 1 for success and 0 for no DDP on this I/O
- **/
-static int i40e_fcoe_ddp_setup(struct net_device *netdev, u16 xid,
-			       struct scatterlist *sgl, unsigned int sgc,
-			       int target_mode)
-{
-	static const unsigned int bufflen = I40E_FCOE_DDP_BUF_MIN;
-	struct i40e_netdev_priv *np = netdev_priv(netdev);
-	struct i40e_fcoe_ddp_pool *ddp_pool;
-	struct i40e_pf *pf = np->vsi->back;
-	struct i40e_fcoe *fcoe = &pf->fcoe;
-	unsigned int i, j, dmacount;
-	struct i40e_fcoe_ddp *ddp;
-	unsigned int firstoff = 0;
-	unsigned int thisoff = 0;
-	unsigned int thislen = 0;
-	struct scatterlist *sg;
-	dma_addr_t addr = 0;
-	unsigned int len;
-
-	if (xid >= I40E_FCOE_DDP_MAX) {
-		dev_warn(&pf->pdev->dev, "xid=0x%x out-of-range\n", xid);
-		return 0;
-	}
-
-	/* no DDP if we are already down or resetting */
-	if (test_bit(__I40E_DOWN, &pf->state) ||
-	    test_bit(__I40E_NEEDS_RESTART, &pf->state)) {
-		dev_info(&pf->pdev->dev, "xid=0x%x device in reset/down\n",
-			 xid);
-		return 0;
-	}
-
-	ddp = &fcoe->ddp[xid];
-	if (ddp->sgl) {
-		dev_info(&pf->pdev->dev, "xid 0x%x w/ non-null sgl=%p nents=%d\n",
-			 xid, ddp->sgl, ddp->sgc);
-		return 0;
-	}
-	i40e_fcoe_ddp_clear(ddp);
-
-	if (!fcoe->ddp_pool) {
-		dev_info(&pf->pdev->dev, "No DDP pool, xid 0x%x\n", xid);
-		return 0;
-	}
-
-	ddp_pool = per_cpu_ptr(fcoe->ddp_pool, get_cpu());
-	if (!ddp_pool->pool) {
-		dev_info(&pf->pdev->dev, "No percpu ddp pool, xid 0x%x\n", xid);
-		goto out_noddp;
-	}
-
-	/* setup dma from scsi command sgl */
-	dmacount = dma_map_sg(&pf->pdev->dev, sgl, sgc, DMA_FROM_DEVICE);
-	if (dmacount == 0) {
-		dev_info(&pf->pdev->dev, "dma_map_sg for sgl %p, sgc %d failed\n",
-			 sgl, sgc);
-		goto out_noddp_unmap;
-	}
-
-	/* alloc the udl from our ddp pool */
-	ddp->udl = dma_pool_alloc(ddp_pool->pool, GFP_ATOMIC, &ddp->udp);
-	if (!ddp->udl) {
-		dev_info(&pf->pdev->dev,
-			 "Failed allocated ddp context, xid 0x%x\n", xid);
-		goto out_noddp_unmap;
-	}
-
-	j = 0;
-	ddp->len = 0;
-	for_each_sg(sgl, sg, dmacount, i) {
-		addr = sg_dma_address(sg);
-		len = sg_dma_len(sg);
-		ddp->len += len;
-		while (len) {
-			/* max number of buffers allowed in one DDP context */
-			if (j >= I40E_FCOE_DDP_BUFFCNT_MAX) {
-				dev_info(&pf->pdev->dev,
-					 "xid=%x:%d,%d,%d:addr=%llx not enough descriptors\n",
-					 xid, i, j, dmacount, (u64)addr);
-				goto out_noddp_free;
-			}
-
-			/* get the offset of length of current buffer */
-			thisoff = addr & ((dma_addr_t)bufflen - 1);
-			thislen = min_t(unsigned int, (bufflen - thisoff), len);
-			/* all but the 1st buffer (j == 0)
-			 * must be aligned on bufflen
-			 */
-			if ((j != 0) && (thisoff))
-				goto out_noddp_free;
-
-			/* all but the last buffer
-			 * ((i == (dmacount - 1)) && (thislen == len))
-			 * must end at bufflen
-			 */
-			if (((i != (dmacount - 1)) || (thislen != len)) &&
-			    ((thislen + thisoff) != bufflen))
-				goto out_noddp_free;
-
-			ddp->udl[j] = (u64)(addr - thisoff);
-			/* only the first buffer may have none-zero offset */
-			if (j == 0)
-				firstoff = thisoff;
-			len -= thislen;
-			addr += thislen;
-			j++;
-		}
-	}
-	/* only the last buffer may have non-full bufflen */
-	ddp->lastsize = thisoff + thislen;
-	ddp->firstoff = firstoff;
-	ddp->list_len = j;
-	ddp->pool = ddp_pool->pool;
-	ddp->sgl = sgl;
-	ddp->sgc = sgc;
-	ddp->xid = xid;
-	if (target_mode)
-		set_bit(__I40E_FCOE_DDP_TARGET, &ddp->flags);
-	set_bit(__I40E_FCOE_DDP_INITALIZED, &ddp->flags);
-
-	put_cpu();
-	return 1; /* Success */
-
-out_noddp_free:
-	dma_pool_free(ddp->pool, ddp->udl, ddp->udp);
-	i40e_fcoe_ddp_clear(ddp);
-
-out_noddp_unmap:
-	dma_unmap_sg(&pf->pdev->dev, sgl, sgc, DMA_FROM_DEVICE);
-out_noddp:
-	put_cpu();
-	return 0;
-}
-
-/**
- * i40e_fcoe_ddp_get - called to set up ddp context in initiator mode
- * @netdev: the corresponding net_device
- * @xid: the exchange id requesting ddp
- * @sgl: the scatter-gather list for this request
- * @sgc: the number of scatter-gather items
- *
- * This is the implementation of net_device_ops.ndo_fcoe_ddp_setup
- * and is expected to be called from ULD, e.g., FCP layer of libfc
- * to set up ddp for the corresponding xid of the given sglist for
- * the corresponding I/O.
- *
- * Returns : 1 for success and 0 for no ddp
- **/
-static int i40e_fcoe_ddp_get(struct net_device *netdev, u16 xid,
-			     struct scatterlist *sgl, unsigned int sgc)
-{
-	return i40e_fcoe_ddp_setup(netdev, xid, sgl, sgc, 0);
-}
-
-/**
- * i40e_fcoe_ddp_target - called to set up ddp context in target mode
- * @netdev: the corresponding net_device
- * @xid: the exchange id requesting ddp
- * @sgl: the scatter-gather list for this request
- * @sgc: the number of scatter-gather items
- *
- * This is the implementation of net_device_ops.ndo_fcoe_ddp_target
- * and is expected to be called from ULD, e.g., FCP layer of libfc
- * to set up ddp for the corresponding xid of the given sglist for
- * the corresponding I/O. The DDP in target mode is a write I/O request
- * from the initiator.
- *
- * Returns : 1 for success and 0 for no ddp
- **/
-static int i40e_fcoe_ddp_target(struct net_device *netdev, u16 xid,
-				struct scatterlist *sgl, unsigned int sgc)
-{
-	return i40e_fcoe_ddp_setup(netdev, xid, sgl, sgc, 1);
-}
-
-/**
- * i40e_fcoe_program_ddp - programs the HW DDP related descriptors
- * @tx_ring: transmit ring for this packet
- * @skb:     the packet to be sent out
- * @sof: the SOF to indicate class of service
- *
- * Determine if it is READ/WRITE command, and finds out if there is
- * a matching SW DDP context for this command. DDP is applicable
- * only in case of READ if initiator or WRITE in case of
- * responder (via checking XFER_RDY).
- *
- * Note: caller checks sof and ddp sw context
- *
- * Returns : none
- *
- **/
-static void i40e_fcoe_program_ddp(struct i40e_ring *tx_ring,
-				  struct sk_buff *skb,
-				  struct i40e_fcoe_ddp *ddp, u8 sof)
-{
-	struct i40e_fcoe_filter_context_desc *filter_desc = NULL;
-	struct i40e_fcoe_queue_context_desc *queue_desc = NULL;
-	struct i40e_fcoe_ddp_context_desc *ddp_desc = NULL;
-	struct i40e_pf *pf = tx_ring->vsi->back;
-	u16 i = tx_ring->next_to_use;
-	struct fc_frame_header *fh;
-	u64 flags_rsvd_lanq = 0;
-	bool target_mode;
-
-	/* check if abort is still pending */
-	if (test_bit(__I40E_FCOE_DDP_ABORTED, &ddp->flags)) {
-		dev_warn(&pf->pdev->dev,
-			 "DDP abort is still pending xid:%hx and ddp->flags:%lx:\n",
-			 ddp->xid, ddp->flags);
-		return;
-	}
-
-	/* set the flag to indicate this is programmed */
-	if (test_and_set_bit(__I40E_FCOE_DDP_PROGRAMMED, &ddp->flags)) {
-		dev_warn(&pf->pdev->dev,
-			 "DDP is already programmed for xid:%hx and ddp->flags:%lx:\n",
-			 ddp->xid, ddp->flags);
-		return;
-	}
-
-	/* Prepare the DDP context descriptor */
-	ddp_desc = I40E_DDP_CONTEXT_DESC(tx_ring, i);
-	i++;
-	if (i == tx_ring->count)
-		i = 0;
-
-	ddp_desc->type_cmd_foff_lsize =
-				cpu_to_le64(I40E_TX_DESC_DTYPE_DDP_CTX	|
-				((u64)I40E_FCOE_DDP_CTX_DESC_BSIZE_4K  <<
-				I40E_FCOE_DDP_CTX_QW1_CMD_SHIFT)	|
-				((u64)ddp->firstoff		       <<
-				I40E_FCOE_DDP_CTX_QW1_FOFF_SHIFT)	|
-				((u64)ddp->lastsize		       <<
-				I40E_FCOE_DDP_CTX_QW1_LSIZE_SHIFT));
-	ddp_desc->rsvd = cpu_to_le64(0);
-
-	/* target mode needs last packet in the sequence  */
-	target_mode = test_bit(__I40E_FCOE_DDP_TARGET, &ddp->flags);
-	if (target_mode)
-		ddp_desc->type_cmd_foff_lsize |=
-			cpu_to_le64(I40E_FCOE_DDP_CTX_DESC_LASTSEQH);
-
-	/* Prepare queue_context descriptor */
-	queue_desc = I40E_QUEUE_CONTEXT_DESC(tx_ring, i++);
-	if (i == tx_ring->count)
-		i = 0;
-	queue_desc->dmaindx_fbase = cpu_to_le64(ddp->xid | ((u64)ddp->udp));
-	queue_desc->flen_tph = cpu_to_le64(ddp->list_len |
-				((u64)(I40E_FCOE_QUEUE_CTX_DESC_TPHRDESC |
-				I40E_FCOE_QUEUE_CTX_DESC_TPHDATA) <<
-				I40E_FCOE_QUEUE_CTX_QW1_TPH_SHIFT));
-
-	/* Prepare filter_context_desc */
-	filter_desc = I40E_FILTER_CONTEXT_DESC(tx_ring, i);
-	i++;
-	if (i == tx_ring->count)
-		i = 0;
-
-	fh = (struct fc_frame_header *)skb_transport_header(skb);
-	filter_desc->param = cpu_to_le32(ntohl(fh->fh_parm_offset));
-	filter_desc->seqn = cpu_to_le16(ntohs(fh->fh_seq_cnt));
-	filter_desc->rsvd_dmaindx = cpu_to_le16(ddp->xid <<
-				I40E_FCOE_FILTER_CTX_QW0_DMAINDX_SHIFT);
-
-	flags_rsvd_lanq = I40E_FCOE_FILTER_CTX_DESC_CTYP_DDP;
-	flags_rsvd_lanq |= (u64)(target_mode ?
-			I40E_FCOE_FILTER_CTX_DESC_ENODE_RSP :
-			I40E_FCOE_FILTER_CTX_DESC_ENODE_INIT);
-
-	flags_rsvd_lanq |= (u64)((sof == FC_SOF_I2 || sof == FC_SOF_N2) ?
-			I40E_FCOE_FILTER_CTX_DESC_FC_CLASS2 :
-			I40E_FCOE_FILTER_CTX_DESC_FC_CLASS3);
-
-	flags_rsvd_lanq |= ((u64)skb->queue_mapping <<
-				I40E_FCOE_FILTER_CTX_QW1_LANQINDX_SHIFT);
-	filter_desc->flags_rsvd_lanq = cpu_to_le64(flags_rsvd_lanq);
-
-	/* By this time, all offload related descriptors has been programmed */
-	tx_ring->next_to_use = i;
-}
-
-/**
- * i40e_fcoe_invalidate_ddp - invalidates DDP in case of abort
- * @tx_ring: transmit ring for this packet
- * @skb: the packet associated w/ this DDP invalidation, i.e., ABTS
- * @ddp: the SW DDP context for this DDP
- *
- * Programs the Tx context descriptor to do DDP invalidation.
- **/
-static void i40e_fcoe_invalidate_ddp(struct i40e_ring *tx_ring,
-				     struct sk_buff *skb,
-				     struct i40e_fcoe_ddp *ddp)
-{
-	struct i40e_tx_context_desc *context_desc;
-	int i;
-
-	if (test_and_set_bit(__I40E_FCOE_DDP_ABORTED, &ddp->flags))
-		return;
-
-	i = tx_ring->next_to_use;
-	context_desc = I40E_TX_CTXTDESC(tx_ring, i);
-	i++;
-	if (i == tx_ring->count)
-		i = 0;
-
-	context_desc->tunneling_params = cpu_to_le32(0);
-	context_desc->l2tag2 = cpu_to_le16(0);
-	context_desc->rsvd = cpu_to_le16(0);
-	context_desc->type_cmd_tso_mss = cpu_to_le64(
-		I40E_TX_DESC_DTYPE_FCOE_CTX |
-		(I40E_FCOE_TX_CTX_DESC_OPCODE_DDP_CTX_INVL <<
-		I40E_TXD_CTX_QW1_CMD_SHIFT) |
-		(I40E_FCOE_TX_CTX_DESC_OPCODE_SINGLE_SEND <<
-		I40E_TXD_CTX_QW1_CMD_SHIFT));
-	tx_ring->next_to_use = i;
-}
-
-/**
- * i40e_fcoe_handle_ddp - check we should setup or invalidate DDP
- * @tx_ring: transmit ring for this packet
- * @skb: the packet to be sent out
- * @sof: the SOF to indicate class of service
- *
- * Determine if it is ABTS/READ/XFER_RDY, and finds out if there is
- * a matching SW DDP context for this command. DDP is applicable
- * only in case of READ if initiator or WRITE in case of
- * responder (via checking XFER_RDY). In case this is an ABTS, send
- * just invalidate the context.
- **/
-static void i40e_fcoe_handle_ddp(struct i40e_ring *tx_ring,
-				 struct sk_buff *skb, u8 sof)
-{
-	struct i40e_pf *pf = tx_ring->vsi->back;
-	struct i40e_fcoe *fcoe = &pf->fcoe;
-	struct fc_frame_header *fh;
-	struct i40e_fcoe_ddp *ddp;
-	u32 f_ctl;
-	u8 r_ctl;
-	u16 xid;
-
-	fh = (struct fc_frame_header *)skb_transport_header(skb);
-	f_ctl = ntoh24(fh->fh_f_ctl);
-	r_ctl = fh->fh_r_ctl;
-	ddp = NULL;
-
-	if ((r_ctl == FC_RCTL_DD_DATA_DESC) && (f_ctl & FC_FC_EX_CTX)) {
-		/* exchange responder? if so, XFER_RDY for write */
-		xid = ntohs(fh->fh_rx_id);
-		if (i40e_fcoe_xid_is_valid(xid)) {
-			ddp = &fcoe->ddp[xid];
-			if ((ddp->xid == xid) &&
-			    (test_bit(__I40E_FCOE_DDP_TARGET, &ddp->flags)))
-				i40e_fcoe_program_ddp(tx_ring, skb, ddp, sof);
-		}
-	} else if (r_ctl == FC_RCTL_DD_UNSOL_CMD) {
-		/* exchange originator, check READ cmd */
-		xid = ntohs(fh->fh_ox_id);
-		if (i40e_fcoe_xid_is_valid(xid)) {
-			ddp = &fcoe->ddp[xid];
-			if ((ddp->xid == xid) &&
-			    (!test_bit(__I40E_FCOE_DDP_TARGET, &ddp->flags)))
-				i40e_fcoe_program_ddp(tx_ring, skb, ddp, sof);
-		}
-	} else if (r_ctl == FC_RCTL_BA_ABTS) {
-		/* exchange originator, check ABTS */
-		xid = ntohs(fh->fh_ox_id);
-		if (i40e_fcoe_xid_is_valid(xid)) {
-			ddp = &fcoe->ddp[xid];
-			if ((ddp->xid == xid) &&
-			    (!test_bit(__I40E_FCOE_DDP_TARGET, &ddp->flags)))
-				i40e_fcoe_invalidate_ddp(tx_ring, skb, ddp);
-		}
-	}
-}
-
-/**
- * i40e_fcoe_tso - set up FCoE TSO
- * @tx_ring:  ring to send buffer on
- * @skb:      send buffer
- * @tx_flags: collected send information
- * @hdr_len:  the tso header length
- * @sof: the SOF to indicate class of service
- *
- * Note must already have sof checked to be either class 2 or class 3 before
- * calling this function.
- *
- * Returns 1 to indicate sequence segmentation offload is properly setup
- * or returns 0 to indicate no tso is needed, otherwise returns error
- * code to drop the frame.
- **/
-static int i40e_fcoe_tso(struct i40e_ring *tx_ring,
-			 struct sk_buff *skb,
-			 u32 tx_flags, u8 *hdr_len, u8 sof)
-{
-	struct i40e_tx_context_desc *context_desc;
-	u32 cd_type, cd_cmd, cd_tso_len, cd_mss;
-	struct fc_frame_header *fh;
-	u64 cd_type_cmd_tso_mss;
-
-	/* must match gso type as FCoE */
-	if (!skb_is_gso(skb))
-		return 0;
-
-	/* is it the expected gso type for FCoE ?*/
-	if (skb_shinfo(skb)->gso_type != SKB_GSO_FCOE) {
-		netdev_err(skb->dev,
-			   "wrong gso type %d:expecting SKB_GSO_FCOE\n",
-			   skb_shinfo(skb)->gso_type);
-		return -EINVAL;
-	}
-
-	/* header and trailer are inserted by hw */
-	*hdr_len = skb_transport_offset(skb) + sizeof(struct fc_frame_header) +
-		   sizeof(struct fcoe_crc_eof);
-
-	/* check sof to decide a class 2 or 3 TSO */
-	if (likely(i40e_fcoe_sof_is_class3(sof)))
-		cd_cmd = I40E_FCOE_TX_CTX_DESC_OPCODE_TSO_FC_CLASS3;
-	else
-		cd_cmd = I40E_FCOE_TX_CTX_DESC_OPCODE_TSO_FC_CLASS2;
-
-	/* param field valid? */
-	fh = (struct fc_frame_header *)skb_transport_header(skb);
-	if (fh->fh_f_ctl[2] & FC_FC_REL_OFF)
-		cd_cmd |= I40E_FCOE_TX_CTX_DESC_RELOFF;
-
-	/* fill the field values */
-	cd_type = I40E_TX_DESC_DTYPE_FCOE_CTX;
-	cd_tso_len = skb->len - *hdr_len;
-	cd_mss = skb_shinfo(skb)->gso_size;
-	cd_type_cmd_tso_mss =
-		((u64)cd_type  << I40E_TXD_CTX_QW1_DTYPE_SHIFT)     |
-		((u64)cd_cmd     << I40E_TXD_CTX_QW1_CMD_SHIFT)	    |
-		((u64)cd_tso_len << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
-		((u64)cd_mss     << I40E_TXD_CTX_QW1_MSS_SHIFT);
-
-	/* grab the next descriptor */
-	context_desc = I40E_TX_CTXTDESC(tx_ring, tx_ring->next_to_use);
-	tx_ring->next_to_use++;
-	if (tx_ring->next_to_use == tx_ring->count)
-		tx_ring->next_to_use = 0;
-
-	context_desc->tunneling_params = 0;
-	context_desc->l2tag2 = cpu_to_le16((tx_flags & I40E_TX_FLAGS_VLAN_MASK)
-					    >> I40E_TX_FLAGS_VLAN_SHIFT);
-	context_desc->type_cmd_tso_mss = cpu_to_le64(cd_type_cmd_tso_mss);
-
-	return 1;
-}
-
-/**
- * i40e_fcoe_tx_map - build the tx descriptor
- * @tx_ring:  ring to send buffer on
- * @skb:      send buffer
- * @first:    first buffer info buffer to use
- * @tx_flags: collected send information
- * @hdr_len:  ptr to the size of the packet header
- * @eof:      the frame eof value
- *
- * Note, for FCoE, sof and eof are already checked
- **/
-static void i40e_fcoe_tx_map(struct i40e_ring *tx_ring,
-			     struct sk_buff *skb,
-			     struct i40e_tx_buffer *first,
-			     u32 tx_flags, u8 hdr_len, u8 eof)
-{
-	u32 td_offset = 0;
-	u32 td_cmd = 0;
-	u32 maclen;
-
-	/* insert CRC */
-	td_cmd = I40E_TX_DESC_CMD_ICRC;
-
-	/* setup MACLEN */
-	maclen = skb_network_offset(skb);
-	if (tx_flags & I40E_TX_FLAGS_SW_VLAN)
-		maclen += sizeof(struct vlan_hdr);
-
-	if (skb->protocol == htons(ETH_P_FCOE)) {
-		/* for FCoE, maclen should exclude ether type */
-		maclen -= 2;
-		/* setup type as FCoE and EOF insertion */
-		td_cmd |= (I40E_TX_DESC_CMD_FCOET | i40e_fcoe_ctxt_eof(eof));
-		/* setup FCoELEN and FCLEN */
-		td_offset |= ((((sizeof(struct fcoe_hdr) + 2) >> 2) <<
-				I40E_TX_DESC_LENGTH_IPLEN_SHIFT) |
-			      ((sizeof(struct fc_frame_header) >> 2) <<
-				I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT));
-		/* trim to exclude trailer */
-		pskb_trim(skb, skb->len - sizeof(struct fcoe_crc_eof));
-	}
-
-	/* MACLEN is ether header length in words not bytes */
-	td_offset |= (maclen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
-
-	i40e_tx_map(tx_ring, skb, first, tx_flags, hdr_len, td_cmd, td_offset);
-}
-
-/**
- * i40e_fcoe_set_skb_header - adjust skb header point for FIP/FCoE/FC
- * @skb: the skb to be adjusted
- *
- * Returns true if this skb is a FCoE/FIP or VLAN carried FCoE/FIP and then
- * adjusts the skb header pointers correspondingly. Otherwise, returns false.
- **/
-static inline int i40e_fcoe_set_skb_header(struct sk_buff *skb)
-{
-	__be16 protocol = skb->protocol;
-
-	skb_reset_mac_header(skb);
-	skb->mac_len = sizeof(struct ethhdr);
-	if (protocol == htons(ETH_P_8021Q)) {
-		struct vlan_ethhdr *veth = (struct vlan_ethhdr *)eth_hdr(skb);
-
-		protocol = veth->h_vlan_encapsulated_proto;
-		skb->mac_len += sizeof(struct vlan_hdr);
-	}
-
-	/* FCoE or FIP only */
-	if ((protocol != htons(ETH_P_FIP)) &&
-	    (protocol != htons(ETH_P_FCOE)))
-		return -EINVAL;
-
-	/* set header to L2 of FCoE/FIP */
-	skb_set_network_header(skb, skb->mac_len);
-	if (protocol == htons(ETH_P_FIP))
-		return 0;
-
-	/* set header to L3 of FC */
-	skb_set_transport_header(skb, skb->mac_len + sizeof(struct fcoe_hdr));
-	return 0;
-}
-
-/**
- * i40e_fcoe_xmit_frame - transmit buffer
- * @skb:     send buffer
- * @netdev:  the fcoe netdev
- *
- * Returns 0 if sent, else an error code
- **/
-static netdev_tx_t i40e_fcoe_xmit_frame(struct sk_buff *skb,
-					struct net_device *netdev)
-{
-	struct i40e_netdev_priv *np = netdev_priv(skb->dev);
-	struct i40e_vsi *vsi = np->vsi;
-	struct i40e_ring *tx_ring = vsi->tx_rings[skb->queue_mapping];
-	struct i40e_tx_buffer *first;
-	u32 tx_flags = 0;
-	int fso, count;
-	u8 hdr_len = 0;
-	u8 sof = 0;
-	u8 eof = 0;
-
-	if (i40e_fcoe_set_skb_header(skb))
-		goto out_drop;
-
-	count = i40e_xmit_descriptor_count(skb);
-	if (i40e_chk_linearize(skb, count)) {
-		if (__skb_linearize(skb))
-			goto out_drop;
-		count = i40e_txd_use_count(skb->len);
-		tx_ring->tx_stats.tx_linearize++;
-	}
-
-	/* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD,
-	 *       + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD,
-	 *       + 4 desc gap to avoid the cache line where head is,
-	 *       + 1 desc for context descriptor,
-	 * otherwise try next time
-	 */
-	if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) {
-		tx_ring->tx_stats.tx_busy++;
-		return NETDEV_TX_BUSY;
-	}
-
-	/* prepare the xmit flags */
-	if (i40e_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags))
-		goto out_drop;
-
-	/* record the location of the first descriptor for this packet */
-	first = &tx_ring->tx_bi[tx_ring->next_to_use];
-
-	/* FIP is a regular L2 traffic w/o offload */
-	if (skb->protocol == htons(ETH_P_FIP))
-		goto out_send;
-
-	/* check sof and eof, only supports FC Class 2 or 3 */
-	if (i40e_fcoe_fc_sof(skb, &sof) || i40e_fcoe_fc_eof(skb, &eof)) {
-		netdev_err(netdev, "SOF/EOF error:%02x - %02x\n", sof, eof);
-		goto out_drop;
-	}
-
-	/* always do FCCRC for FCoE */
-	tx_flags |= I40E_TX_FLAGS_FCCRC;
-
-	/* check we should do sequence offload */
-	fso = i40e_fcoe_tso(tx_ring, skb, tx_flags, &hdr_len, sof);
-	if (fso < 0)
-		goto out_drop;
-	else if (fso)
-		tx_flags |= I40E_TX_FLAGS_FSO;
-	else
-		i40e_fcoe_handle_ddp(tx_ring, skb, sof);
-
-out_send:
-	/* send out the packet */
-	i40e_fcoe_tx_map(tx_ring, skb, first, tx_flags, hdr_len, eof);
-
-	i40e_maybe_stop_tx(tx_ring, DESC_NEEDED);
-	return NETDEV_TX_OK;
-
-out_drop:
-	dev_kfree_skb_any(skb);
-	return NETDEV_TX_OK;
-}
-
-/**
- * i40e_fcoe_change_mtu - NDO callback to change the Maximum Transfer Unit
- * @netdev: network interface device structure
- * @new_mtu: new value for maximum frame size
- *
- * Returns error as operation not permitted
- *
- **/
-static int i40e_fcoe_change_mtu(struct net_device *netdev, int new_mtu)
-{
-	netdev_warn(netdev, "MTU change is not supported on FCoE interfaces\n");
-	return -EPERM;
-}
-
-/**
- * i40e_fcoe_set_features - set the netdev feature flags
- * @netdev: ptr to the netdev being adjusted
- * @features: the feature set that the stack is suggesting
- *
- **/
-static int i40e_fcoe_set_features(struct net_device *netdev,
-				  netdev_features_t features)
-{
-	struct i40e_netdev_priv *np = netdev_priv(netdev);
-	struct i40e_vsi *vsi = np->vsi;
-
-	if (features & NETIF_F_HW_VLAN_CTAG_RX)
-		i40e_vlan_stripping_enable(vsi);
-	else
-		i40e_vlan_stripping_disable(vsi);
-
-	return 0;
-}
-
-static const struct net_device_ops i40e_fcoe_netdev_ops = {
-	.ndo_open		= i40e_open,
-	.ndo_stop		= i40e_close,
-	.ndo_get_stats64	= i40e_get_netdev_stats_struct,
-	.ndo_set_rx_mode	= i40e_set_rx_mode,
-	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_set_mac_address	= i40e_set_mac,
-	.ndo_change_mtu		= i40e_fcoe_change_mtu,
-	.ndo_do_ioctl		= i40e_ioctl,
-	.ndo_tx_timeout		= i40e_tx_timeout,
-	.ndo_vlan_rx_add_vid	= i40e_vlan_rx_add_vid,
-	.ndo_vlan_rx_kill_vid	= i40e_vlan_rx_kill_vid,
-	.ndo_setup_tc		= __i40e_setup_tc,
-
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	.ndo_poll_controller	= i40e_netpoll,
-#endif
-	.ndo_start_xmit		= i40e_fcoe_xmit_frame,
-	.ndo_fcoe_enable	= i40e_fcoe_enable,
-	.ndo_fcoe_disable	= i40e_fcoe_disable,
-	.ndo_fcoe_ddp_setup	= i40e_fcoe_ddp_get,
-	.ndo_fcoe_ddp_done	= i40e_fcoe_ddp_put,
-	.ndo_fcoe_ddp_target	= i40e_fcoe_ddp_target,
-	.ndo_set_features	= i40e_fcoe_set_features,
-};
-
-/* fcoe network device type */
-static struct device_type fcoe_netdev_type = {
-	.name = "fcoe",
-};
-
-/**
- * i40e_fcoe_config_netdev - prepares the VSI context for creating a FCoE VSI
- * @vsi: pointer to the associated VSI struct
- * @ctxt: pointer to the associated VSI context to be passed to HW
- *
- * Returns 0 on success or < 0 on error
- **/
-void i40e_fcoe_config_netdev(struct net_device *netdev, struct i40e_vsi *vsi)
-{
-	struct i40e_hw *hw = &vsi->back->hw;
-	struct i40e_pf *pf = vsi->back;
-
-	if (vsi->type != I40E_VSI_FCOE)
-		return;
-
-	netdev->features = (NETIF_F_HW_VLAN_CTAG_TX |
-			    NETIF_F_HW_VLAN_CTAG_RX |
-			    NETIF_F_HW_VLAN_CTAG_FILTER);
-
-	netdev->vlan_features = netdev->features;
-	netdev->vlan_features &= ~(NETIF_F_HW_VLAN_CTAG_TX |
-				   NETIF_F_HW_VLAN_CTAG_RX |
-				   NETIF_F_HW_VLAN_CTAG_FILTER);
-	netdev->fcoe_ddp_xid = I40E_FCOE_DDP_MAX - 1;
-	netdev->features |= NETIF_F_ALL_FCOE;
-	netdev->vlan_features |= NETIF_F_ALL_FCOE;
-	netdev->hw_features |= netdev->features;
-	netdev->priv_flags |= IFF_UNICAST_FLT;
-	netdev->priv_flags |= IFF_SUPP_NOFCS;
-
-	strlcpy(netdev->name, "fcoe%d", IFNAMSIZ-1);
-	netdev->mtu = FCOE_MTU;
-	SET_NETDEV_DEV(netdev, &pf->pdev->dev);
-	SET_NETDEV_DEVTYPE(netdev, &fcoe_netdev_type);
-	/* set different dev_port value 1 for FCoE netdev than the default
-	 * zero dev_port value for PF netdev, this helps biosdevname user
-	 * tool to differentiate them correctly while both attached to the
-	 * same PCI function.
-	 */
-	netdev->dev_port = 1;
-	spin_lock_bh(&vsi->mac_filter_hash_lock);
-	i40e_add_filter(vsi, hw->mac.san_addr, 0);
-	i40e_add_filter(vsi, (u8[6]) FC_FCOE_FLOGI_MAC, 0);
-	i40e_add_filter(vsi, FIP_ALL_FCOE_MACS, 0);
-	i40e_add_filter(vsi, FIP_ALL_ENODE_MACS, 0);
-	spin_unlock_bh(&vsi->mac_filter_hash_lock);
-
-	/* use san mac */
-	ether_addr_copy(netdev->dev_addr, hw->mac.san_addr);
-	ether_addr_copy(netdev->perm_addr, hw->mac.san_addr);
-	/* fcoe netdev ops */
-	netdev->netdev_ops = &i40e_fcoe_netdev_ops;
-}
-
-/**
- * i40e_fcoe_vsi_setup - allocate and set up FCoE VSI
- * @pf: the PF that VSI is associated with
- *
- **/
-void i40e_fcoe_vsi_setup(struct i40e_pf *pf)
-{
-	struct i40e_vsi *vsi;
-	u16 seid;
-	int i;
-
-	if (!(pf->flags & I40E_FLAG_FCOE_ENABLED))
-		return;
-
-	for (i = 0; i < pf->num_alloc_vsi; i++) {
-		vsi = pf->vsi[i];
-		if (vsi && vsi->type == I40E_VSI_FCOE) {
-			dev_warn(&pf->pdev->dev,
-				 "FCoE VSI already created\n");
-			return;
-		}
-	}
-
-	seid = pf->vsi[pf->lan_vsi]->seid;
-	vsi = i40e_vsi_setup(pf, I40E_VSI_FCOE, seid, 0);
-	if (vsi) {
-		dev_dbg(&pf->pdev->dev,
-			"Successfully created FCoE VSI seid %d id %d uplink_seid %d PF seid %d\n",
-			vsi->seid, vsi->id, vsi->uplink_seid, seid);
-	} else {
-		dev_info(&pf->pdev->dev, "Failed to create FCoE VSI\n");
-	}
-}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_fcoe.h b/drivers/net/ethernet/intel/i40e/i40e_fcoe.h
deleted file mode 100644
index a93174ddeaba..000000000000
--- a/drivers/net/ethernet/intel/i40e/i40e_fcoe.h
+++ /dev/null
@@ -1,127 +0,0 @@
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
-
-#ifndef _I40E_FCOE_H_
-#define _I40E_FCOE_H_
-
-/* FCoE HW context helper macros */
-#define I40E_DDP_CONTEXT_DESC(R, i)     \
-	(&(((struct i40e_fcoe_ddp_context_desc *)((R)->desc))[i]))
-
-#define I40E_QUEUE_CONTEXT_DESC(R, i)   \
-	(&(((struct i40e_fcoe_queue_context_desc *)((R)->desc))[i]))
-
-#define I40E_FILTER_CONTEXT_DESC(R, i)  \
-	(&(((struct i40e_fcoe_filter_context_desc *)((R)->desc))[i]))
-
-/* receive queue descriptor filter status for FCoE */
-#define I40E_RX_DESC_FLTSTAT_FCMASK	0x3
-#define I40E_RX_DESC_FLTSTAT_NOMTCH	0x0	/* no ddp context match */
-#define I40E_RX_DESC_FLTSTAT_NODDP	0x1	/* no ddp due to error */
-#define I40E_RX_DESC_FLTSTAT_DDP	0x2	/* DDPed payload, post header */
-#define I40E_RX_DESC_FLTSTAT_FCPRSP	0x3	/* FCP_RSP */
-
-/* receive queue descriptor error codes for FCoE */
-#define I40E_RX_DESC_FCOE_ERROR_MASK		\
-	(I40E_RX_DESC_ERROR_L3L4E_PROT |	\
-	 I40E_RX_DESC_ERROR_L3L4E_FC |		\
-	 I40E_RX_DESC_ERROR_L3L4E_DMAC_ERR |	\
-	 I40E_RX_DESC_ERROR_L3L4E_DMAC_WARN)
-
-/* receive queue descriptor programming error */
-#define I40E_RX_PROG_FCOE_ERROR_TBL_FULL(e)	\
-	(((e) >> I40E_RX_PROG_STATUS_DESC_FCOE_TBL_FULL_SHIFT) & 0x1)
-
-#define I40E_RX_PROG_FCOE_ERROR_CONFLICT(e)	\
-	(((e) >> I40E_RX_PROG_STATUS_DESC_FCOE_CONFLICT_SHIFT) & 0x1)
-
-#define I40E_RX_PROG_FCOE_ERROR_TBL_FULL_BIT	\
-	BIT(I40E_RX_PROG_STATUS_DESC_FCOE_TBL_FULL_SHIFT)
-#define I40E_RX_PROG_FCOE_ERROR_CONFLICT_BIT	\
-	BIT(I40E_RX_PROG_STATUS_DESC_FCOE_CONFLICT_SHIFT)
-
-#define I40E_RX_PROG_FCOE_ERROR_INVLFAIL(e)	\
-	I40E_RX_PROG_FCOE_ERROR_CONFLICT(e)
-#define I40E_RX_PROG_FCOE_ERROR_INVLFAIL_BIT	\
-	I40E_RX_PROG_FCOE_ERROR_CONFLICT_BIT
-
-/* FCoE DDP related definitions */
-#define I40E_FCOE_MIN_XID	0x0000  /* the min xid supported by fcoe_sw */
-#define I40E_FCOE_MAX_XID	0x0FFF  /* the max xid supported by fcoe_sw */
-#define I40E_FCOE_DDP_BUFFCNT_MAX	512	/* 9 bits bufcnt */
-#define I40E_FCOE_DDP_PTR_ALIGN		16
-#define I40E_FCOE_DDP_PTR_MAX	(I40E_FCOE_DDP_BUFFCNT_MAX * sizeof(dma_addr_t))
-#define I40E_FCOE_DDP_BUF_MIN	4096
-#define I40E_FCOE_DDP_MAX	2048
-#define I40E_FCOE_FILTER_CTX_QW1_PCTYPE_SHIFT	8
-
-/* supported netdev features for FCoE */
-#define I40E_FCOE_NETIF_FEATURES (NETIF_F_ALL_FCOE | \
-	NETIF_F_HW_VLAN_CTAG_TX | \
-	NETIF_F_HW_VLAN_CTAG_RX | \
-	NETIF_F_HW_VLAN_CTAG_FILTER)
-
-/* DDP context flags */
-enum i40e_fcoe_ddp_flags {
-	__I40E_FCOE_DDP_NONE = 1,
-	__I40E_FCOE_DDP_TARGET,
-	__I40E_FCOE_DDP_INITALIZED,
-	__I40E_FCOE_DDP_PROGRAMMED,
-	__I40E_FCOE_DDP_DONE,
-	__I40E_FCOE_DDP_ABORTED,
-	__I40E_FCOE_DDP_UNMAPPED,
-};
-
-/* DDP SW context struct */
-struct i40e_fcoe_ddp {
-	int len;
-	u16 xid;
-	u16 firstoff;
-	u16 lastsize;
-	u16 list_len;
-	u8 fcerr;
-	u8 prerr;
-	unsigned long flags;
-	unsigned int sgc;
-	struct scatterlist *sgl;
-	dma_addr_t udp;
-	u64 *udl;
-	struct dma_pool *pool;
-
-};
-
-struct i40e_fcoe_ddp_pool {
-	struct dma_pool *pool;
-};
-
-struct i40e_fcoe {
-	unsigned long mode;
-	atomic_t refcnt;
-	struct i40e_fcoe_ddp_pool __percpu *ddp_pool;
-	struct i40e_fcoe_ddp ddp[I40E_FCOE_DDP_MAX];
-};
-
-#endif /* _I40E_FCOE_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index e31adbc75f9c..4a4401c61089 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -69,12 +69,6 @@ static int i40e_reset(struct i40e_pf *pf);
 static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired);
 static void i40e_fdir_sb_setup(struct i40e_pf *pf);
 static int i40e_veb_get_bw_info(struct i40e_veb *veb);
-static int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
-				     struct i40e_cloud_filter *filter,
-				     bool add);
-static int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
-					     struct i40e_cloud_filter *filter,
-					     bool add);
 static int i40e_get_capabilities(struct i40e_pf *pf,
 				 enum i40e_admin_queue_opc list_type);
 
@@ -215,8 +209,8 @@ static int i40e_get_lump(struct i40e_pf *pf, struct i40e_lump_tracking *pile,
 
 	if (!pile || needed == 0 || id >= I40E_PILE_VALID_BIT) {
 		dev_info(&pf->pdev->dev,
-			 "param err: pile=%p needed=%d id=0x%04x\n",
-			 pile, needed, id);
+			 "param err: pile=%s needed=%d id=0x%04x\n",
+			 pile ? "<valid>" : "<null>", needed, id);
 		return -EINVAL;
 	}
 
@@ -1380,14 +1374,7 @@ struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi,
 
 		ether_addr_copy(f->macaddr, macaddr);
 		f->vlan = vlan;
-		/* If we're in overflow promisc mode, set the state directly
-		 * to failed, so we don't bother to try sending the filter
-		 * to the hardware.
-		 */
-		if (test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state))
-			f->state = I40E_FILTER_FAILED;
-		else
-			f->state = I40E_FILTER_NEW;
+		f->state = I40E_FILTER_NEW;
 		INIT_HLIST_NODE(&f->hlist);
 
 		key = i40e_addr_to_hkey(macaddr);
@@ -2116,17 +2103,16 @@ void i40e_aqc_del_filters(struct i40e_vsi *vsi, const char *vsi_name,
  * @list: the list of filters to send to firmware
  * @add_head: Position in the add hlist
  * @num_add: the number of filters to add
- * @promisc_change: set to true on exit if promiscuous mode was forced on
  *
  * Send a request to firmware via AdminQ to add a chunk of filters. Will set
- * promisc_changed to true if the firmware has run out of space for more
- * filters.
+ * __I40E_VSI_OVERFLOW_PROMISC bit in vsi->state if the firmware has run out of
+ * space for more filters.
  */
 static
 void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name,
 			  struct i40e_aqc_add_macvlan_element_data *list,
 			  struct i40e_new_mac_filter *add_head,
-			  int num_add, bool *promisc_changed)
+			  int num_add)
 {
 	struct i40e_hw *hw = &vsi->back->hw;
 	int aq_err, fcnt;
@@ -2136,7 +2122,6 @@ void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name,
 	fcnt = i40e_update_filter_state(num_add, list, add_head);
 
 	if (fcnt != num_add) {
-		*promisc_changed = true;
 		set_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
 		dev_warn(&vsi->back->pdev->dev,
 			 "Error %s adding RX filters on %s, promiscuous mode forced on\n",
@@ -2177,11 +2162,13 @@ i40e_aqc_broadcast_filter(struct i40e_vsi *vsi, const char *vsi_name,
 							    NULL);
 	}
 
-	if (aq_ret)
+	if (aq_ret) {
+		set_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
 		dev_warn(&vsi->back->pdev->dev,
-			 "Error %s setting broadcast promiscuous mode on %s\n",
+			 "Error %s, forcing overflow promiscuous on %s\n",
 			 i40e_aq_str(hw, hw->aq.asq_last_status),
 			 vsi_name);
+	}
 
 	return aq_ret;
 }
@@ -2267,9 +2254,9 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 	struct i40e_mac_filter *f;
 	struct i40e_new_mac_filter *new, *add_head = NULL;
 	struct i40e_hw *hw = &vsi->back->hw;
+	bool old_overflow, new_overflow;
 	unsigned int failed_filters = 0;
 	unsigned int vlan_filters = 0;
-	bool promisc_changed = false;
 	char vsi_name[16] = "PF";
 	int filter_list_len = 0;
 	i40e_status aq_ret = 0;
@@ -2291,6 +2278,8 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 		usleep_range(1000, 2000);
 	pf = vsi->back;
 
+	old_overflow = test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
+
 	if (vsi->netdev) {
 		changed_flags = vsi->current_netdev_flags ^ vsi->netdev->flags;
 		vsi->current_netdev_flags = vsi->netdev->flags;
@@ -2423,12 +2412,6 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 
 		num_add = 0;
 		hlist_for_each_entry_safe(new, h, &tmp_add_list, hlist) {
-			if (test_bit(__I40E_VSI_OVERFLOW_PROMISC,
-				     vsi->state)) {
-				new->state = I40E_FILTER_FAILED;
-				continue;
-			}
-
 			/* handle broadcast filters by updating the broadcast
 			 * promiscuous flag instead of adding a MAC filter.
 			 */
@@ -2464,15 +2447,14 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 			/* flush a full buffer */
 			if (num_add == filter_list_len) {
 				i40e_aqc_add_filters(vsi, vsi_name, add_list,
-						     add_head, num_add,
-						     &promisc_changed);
+						     add_head, num_add);
 				memset(add_list, 0, list_size);
 				num_add = 0;
 			}
 		}
 		if (num_add) {
 			i40e_aqc_add_filters(vsi, vsi_name, add_list, add_head,
-					     num_add, &promisc_changed);
+					     num_add);
 		}
 		/* Now move all of the filters from the temp add list back to
 		 * the VSI's list.
@@ -2501,24 +2483,16 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 	}
 	spin_unlock_bh(&vsi->mac_filter_hash_lock);
 
-	/* If promiscuous mode has changed, we need to calculate a new
-	 * threshold for when we are safe to exit
-	 */
-	if (promisc_changed)
-		vsi->promisc_threshold = (vsi->active_filters * 3) / 4;
-
 	/* Check if we are able to exit overflow promiscuous mode. We can
 	 * safely exit if we didn't just enter, we no longer have any failed
 	 * filters, and we have reduced filters below the threshold value.
 	 */
-	if (test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state) &&
-	    !promisc_changed && !failed_filters &&
-	    (vsi->active_filters < vsi->promisc_threshold)) {
+	if (old_overflow && !failed_filters &&
+	    vsi->active_filters < vsi->promisc_threshold) {
 		dev_info(&pf->pdev->dev,
 			 "filter logjam cleared on %s, leaving overflow promiscuous mode\n",
 			 vsi_name);
 		clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
-		promisc_changed = true;
 		vsi->promisc_threshold = 0;
 	}
 
@@ -2528,6 +2502,14 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 		goto out;
 	}
 
+	new_overflow = test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
+
+	/* If we are entering overflow promiscuous, we need to calculate a new
+	 * threshold for when we are safe to exit
+	 */
+	if (!old_overflow && new_overflow)
+		vsi->promisc_threshold = (vsi->active_filters * 3) / 4;
+
 	/* check for changes in promiscuous modes */
 	if (changed_flags & IFF_ALLMULTI) {
 		bool cur_multipromisc;
@@ -2548,12 +2530,11 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 		}
 	}
 
-	if ((changed_flags & IFF_PROMISC) || promisc_changed) {
+	if ((changed_flags & IFF_PROMISC) || old_overflow != new_overflow) {
 		bool cur_promisc;
 
 		cur_promisc = (!!(vsi->current_netdev_flags & IFF_PROMISC) ||
-			       test_bit(__I40E_VSI_OVERFLOW_PROMISC,
-					vsi->state));
+			       new_overflow);
 		aq_ret = i40e_set_promiscuous(pf, cur_promisc);
 		if (aq_ret) {
 			retval = i40e_aq_rc_to_posix(aq_ret,
@@ -2738,22 +2719,6 @@ void i40e_vlan_stripping_disable(struct i40e_vsi *vsi)
 }
 
 /**
- * i40e_vlan_rx_register - Setup or shutdown vlan offload
- * @netdev: network interface to be adjusted
- * @features: netdev features to test if VLAN offload is enabled or not
- **/
-static void i40e_vlan_rx_register(struct net_device *netdev, u32 features)
-{
-	struct i40e_netdev_priv *np = netdev_priv(netdev);
-	struct i40e_vsi *vsi = np->vsi;
-
-	if (features & NETIF_F_HW_VLAN_CTAG_RX)
-		i40e_vlan_stripping_enable(vsi);
-	else
-		i40e_vlan_stripping_disable(vsi);
-}
-
-/**
  * i40e_add_vlan_all_mac - Add a MAC/VLAN filter for each existing MAC address
  * @vsi: the vsi being configured
  * @vid: vlan id to be added (0 = untagged only , -1 = any)
@@ -2928,7 +2893,10 @@ static void i40e_restore_vlan(struct i40e_vsi *vsi)
 	if (!vsi->netdev)
 		return;
 
-	i40e_vlan_rx_register(vsi->netdev, vsi->netdev->features);
+	if (vsi->netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
+		i40e_vlan_stripping_enable(vsi);
+	else
+		i40e_vlan_stripping_disable(vsi);
 
 	for_each_set_bit(vid, vsi->active_vlans, VLAN_N_VID)
 		i40e_vlan_rx_add_vid(vsi->netdev, htons(ETH_P_8021Q),
@@ -3449,15 +3417,20 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi)
 	for (i = 0; i < vsi->num_q_vectors; i++, vector++) {
 		struct i40e_q_vector *q_vector = vsi->q_vectors[i];
 
-		q_vector->itr_countdown = ITR_COUNTDOWN_START;
-		q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[i]->rx_itr_setting);
-		q_vector->rx.latency_range = I40E_LOW_LATENCY;
+		q_vector->rx.next_update = jiffies + 1;
+		q_vector->rx.target_itr =
+			ITR_TO_REG(vsi->rx_rings[i]->itr_setting);
 		wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1),
-		     q_vector->rx.itr);
-		q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[i]->tx_itr_setting);
-		q_vector->tx.latency_range = I40E_LOW_LATENCY;
+		     q_vector->rx.target_itr);
+		q_vector->rx.current_itr = q_vector->rx.target_itr;
+
+		q_vector->tx.next_update = jiffies + 1;
+		q_vector->tx.target_itr =
+			ITR_TO_REG(vsi->tx_rings[i]->itr_setting);
 		wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1),
-		     q_vector->tx.itr);
+		     q_vector->tx.target_itr);
+		q_vector->tx.current_itr = q_vector->tx.target_itr;
+
 		wr32(hw, I40E_PFINT_RATEN(vector - 1),
 		     i40e_intrl_usec_to_reg(vsi->int_rate_limit));
 
@@ -3558,13 +3531,14 @@ static void i40e_configure_msi_and_legacy(struct i40e_vsi *vsi)
 	u32 val;
 
 	/* set the ITR configuration */
-	q_vector->itr_countdown = ITR_COUNTDOWN_START;
-	q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[0]->rx_itr_setting);
-	q_vector->rx.latency_range = I40E_LOW_LATENCY;
-	wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.itr);
-	q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[0]->tx_itr_setting);
-	q_vector->tx.latency_range = I40E_LOW_LATENCY;
-	wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.itr);
+	q_vector->rx.next_update = jiffies + 1;
+	q_vector->rx.target_itr = ITR_TO_REG(vsi->rx_rings[0]->itr_setting);
+	wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.target_itr);
+	q_vector->rx.current_itr = q_vector->rx.target_itr;
+	q_vector->tx.next_update = jiffies + 1;
+	q_vector->tx.target_itr = ITR_TO_REG(vsi->tx_rings[0]->itr_setting);
+	wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.target_itr);
+	q_vector->tx.current_itr = q_vector->tx.target_itr;
 
 	i40e_enable_misc_int_causes(pf);
 
@@ -5375,7 +5349,7 @@ out:
  * @vsi: VSI to be configured
  *
  **/
-int i40e_get_link_speed(struct i40e_vsi *vsi)
+static int i40e_get_link_speed(struct i40e_vsi *vsi)
 {
 	struct i40e_pf *pf = vsi->back;
 
@@ -6560,6 +6534,75 @@ int i40e_up(struct i40e_vsi *vsi)
 }
 
 /**
+ * i40e_force_link_state - Force the link status
+ * @pf: board private structure
+ * @is_up: whether the link state should be forced up or down
+ **/
+static i40e_status i40e_force_link_state(struct i40e_pf *pf, bool is_up)
+{
+	struct i40e_aq_get_phy_abilities_resp abilities;
+	struct i40e_aq_set_phy_config config = {0};
+	struct i40e_hw *hw = &pf->hw;
+	i40e_status err;
+	u64 mask;
+
+	/* Get the current phy config */
+	err = i40e_aq_get_phy_capabilities(hw, false, false, &abilities,
+					   NULL);
+	if (err) {
+		dev_err(&pf->pdev->dev,
+			"failed to get phy cap., ret =  %s last_status =  %s\n",
+			i40e_stat_str(hw, err),
+			i40e_aq_str(hw, hw->aq.asq_last_status));
+		return err;
+	}
+
+	/* If link needs to go up, but was not forced to go down,
+	 * no need for a flap
+	 */
+	if (is_up && abilities.phy_type != 0)
+		return I40E_SUCCESS;
+
+	/* To force link we need to set bits for all supported PHY types,
+	 * but there are now more than 32, so we need to split the bitmap
+	 * across two fields.
+	 */
+	mask = I40E_PHY_TYPES_BITMASK;
+	config.phy_type = is_up ? cpu_to_le32((u32)(mask & 0xffffffff)) : 0;
+	config.phy_type_ext = is_up ? (u8)((mask >> 32) & 0xff) : 0;
+	/* Copy the old settings, except of phy_type */
+	config.abilities = abilities.abilities;
+	config.link_speed = abilities.link_speed;
+	config.eee_capability = abilities.eee_capability;
+	config.eeer = abilities.eeer_val;
+	config.low_power_ctrl = abilities.d3_lpan;
+	err = i40e_aq_set_phy_config(hw, &config, NULL);
+
+	if (err) {
+		dev_err(&pf->pdev->dev,
+			"set phy config ret =  %s last_status =  %s\n",
+			i40e_stat_str(&pf->hw, err),
+			i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+		return err;
+	}
+
+	/* Update the link info */
+	err = i40e_update_link_info(hw);
+	if (err) {
+		/* Wait a little bit (on 40G cards it sometimes takes a really
+		 * long time for link to come back from the atomic reset)
+		 * and try once more
+		 */
+		msleep(1000);
+		i40e_update_link_info(hw);
+	}
+
+	i40e_aq_set_link_restart_an(hw, true, NULL);
+
+	return I40E_SUCCESS;
+}
+
+/**
  * i40e_down - Shutdown the connection processing
  * @vsi: the VSI being stopped
  **/
@@ -6576,6 +6619,9 @@ void i40e_down(struct i40e_vsi *vsi)
 	}
 	i40e_vsi_disable_irq(vsi);
 	i40e_vsi_stop_rings(vsi);
+	if (vsi->type == I40E_VSI_MAIN &&
+	    vsi->back->flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED)
+		i40e_force_link_state(vsi->back, false);
 	i40e_napi_disable_all(vsi);
 
 	for (i = 0; i < vsi->num_queue_pairs; i++) {
@@ -6848,8 +6894,8 @@ i40e_set_cld_element(struct i40e_cloud_filter *filter,
  * Add or delete a cloud filter for a specific flow spec.
  * Returns 0 if the filter were successfully added.
  **/
-static int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
-				     struct i40e_cloud_filter *filter, bool add)
+int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
+			      struct i40e_cloud_filter *filter, bool add)
 {
 	struct i40e_aqc_cloud_filters_element_data cld_filter;
 	struct i40e_pf *pf = vsi->back;
@@ -6915,9 +6961,9 @@ static int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
  * Add or delete a cloud filter for a specific flow spec using big buffer.
  * Returns 0 if the filter were successfully added.
  **/
-static int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
-					     struct i40e_cloud_filter *filter,
-					     bool add)
+int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
+				      struct i40e_cloud_filter *filter,
+				      bool add)
 {
 	struct i40e_aqc_cloud_filters_element_bb cld_filter;
 	struct i40e_pf *pf = vsi->back;
@@ -7537,6 +7583,9 @@ int i40e_open(struct net_device *netdev)
 
 	netif_carrier_off(netdev);
 
+	if (i40e_force_link_state(pf, true))
+		return -EAGAIN;
+
 	err = i40e_vsi_open(vsi);
 	if (err)
 		return err;
@@ -8087,6 +8136,88 @@ u32 i40e_get_global_fd_count(struct i40e_pf *pf)
 }
 
 /**
+ * i40e_reenable_fdir_sb - Restore FDir SB capability
+ * @pf: board private structure
+ **/
+static void i40e_reenable_fdir_sb(struct i40e_pf *pf)
+{
+	if (pf->flags & I40E_FLAG_FD_SB_AUTO_DISABLED) {
+		pf->flags &= ~I40E_FLAG_FD_SB_AUTO_DISABLED;
+		if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
+		    (I40E_DEBUG_FD & pf->hw.debug_mask))
+			dev_info(&pf->pdev->dev, "FD Sideband/ntuple is being enabled since we have space in the table now\n");
+	}
+}
+
+/**
+ * i40e_reenable_fdir_atr - Restore FDir ATR capability
+ * @pf: board private structure
+ **/
+static void i40e_reenable_fdir_atr(struct i40e_pf *pf)
+{
+	if (pf->flags & I40E_FLAG_FD_ATR_AUTO_DISABLED) {
+		/* ATR uses the same filtering logic as SB rules. It only
+		 * functions properly if the input set mask is at the default
+		 * settings. It is safe to restore the default input set
+		 * because there are no active TCPv4 filter rules.
+		 */
+		i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_TCP,
+					I40E_L3_SRC_MASK | I40E_L3_DST_MASK |
+					I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
+
+		pf->flags &= ~I40E_FLAG_FD_ATR_AUTO_DISABLED;
+		if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
+		    (I40E_DEBUG_FD & pf->hw.debug_mask))
+			dev_info(&pf->pdev->dev, "ATR is being enabled since we have space in the table and there are no conflicting ntuple rules\n");
+	}
+}
+
+/**
+ * i40e_delete_invalid_filter - Delete an invalid FDIR filter
+ * @pf: board private structure
+ * @filter: FDir filter to remove
+ */
+static void i40e_delete_invalid_filter(struct i40e_pf *pf,
+				       struct i40e_fdir_filter *filter)
+{
+	/* Update counters */
+	pf->fdir_pf_active_filters--;
+	pf->fd_inv = 0;
+
+	switch (filter->flow_type) {
+	case TCP_V4_FLOW:
+		pf->fd_tcp4_filter_cnt--;
+		break;
+	case UDP_V4_FLOW:
+		pf->fd_udp4_filter_cnt--;
+		break;
+	case SCTP_V4_FLOW:
+		pf->fd_sctp4_filter_cnt--;
+		break;
+	case IP_USER_FLOW:
+		switch (filter->ip4_proto) {
+		case IPPROTO_TCP:
+			pf->fd_tcp4_filter_cnt--;
+			break;
+		case IPPROTO_UDP:
+			pf->fd_udp4_filter_cnt--;
+			break;
+		case IPPROTO_SCTP:
+			pf->fd_sctp4_filter_cnt--;
+			break;
+		case IPPROTO_IP:
+			pf->fd_ip4_filter_cnt--;
+			break;
+		}
+		break;
+	}
+
+	/* Remove the filter from the list and free memory */
+	hlist_del(&filter->fdir_node);
+	kfree(filter);
+}
+
+/**
  * i40e_fdir_check_and_reenable - Function to reenabe FD ATR or SB if disabled
  * @pf: board private structure
  **/
@@ -8104,40 +8235,23 @@ void i40e_fdir_check_and_reenable(struct i40e_pf *pf)
 	fcnt_avail = pf->fdir_pf_filter_count;
 	if ((fcnt_prog < (fcnt_avail - I40E_FDIR_BUFFER_HEAD_ROOM)) ||
 	    (pf->fd_add_err == 0) ||
-	    (i40e_get_current_atr_cnt(pf) < pf->fd_atr_cnt)) {
-		if (pf->flags & I40E_FLAG_FD_SB_AUTO_DISABLED) {
-			pf->flags &= ~I40E_FLAG_FD_SB_AUTO_DISABLED;
-			if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
-			    (I40E_DEBUG_FD & pf->hw.debug_mask))
-				dev_info(&pf->pdev->dev, "FD Sideband/ntuple is being enabled since we have space in the table now\n");
-		}
-	}
+	    (i40e_get_current_atr_cnt(pf) < pf->fd_atr_cnt))
+		i40e_reenable_fdir_sb(pf);
 
 	/* We should wait for even more space before re-enabling ATR.
 	 * Additionally, we cannot enable ATR as long as we still have TCP SB
 	 * rules active.
 	 */
 	if ((fcnt_prog < (fcnt_avail - I40E_FDIR_BUFFER_HEAD_ROOM_FOR_ATR)) &&
-	    (pf->fd_tcp4_filter_cnt == 0)) {
-		if (pf->flags & I40E_FLAG_FD_ATR_AUTO_DISABLED) {
-			pf->flags &= ~I40E_FLAG_FD_ATR_AUTO_DISABLED;
-			if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
-			    (I40E_DEBUG_FD & pf->hw.debug_mask))
-				dev_info(&pf->pdev->dev, "ATR is being enabled since we have space in the table and there are no conflicting ntuple rules\n");
-		}
-	}
+	    (pf->fd_tcp4_filter_cnt == 0))
+		i40e_reenable_fdir_atr(pf);
 
 	/* if hw had a problem adding a filter, delete it */
 	if (pf->fd_inv > 0) {
 		hlist_for_each_entry_safe(filter, node,
-					  &pf->fdir_filter_list, fdir_node) {
-			if (filter->fd_id == pf->fd_inv) {
-				hlist_del(&filter->fdir_node);
-				kfree(filter);
-				pf->fdir_pf_active_filters--;
-				pf->fd_inv = 0;
-			}
-		}
+					  &pf->fdir_filter_list, fdir_node)
+			if (filter->fd_id == pf->fd_inv)
+				i40e_delete_invalid_filter(pf, filter);
 	}
 }
 
@@ -9215,6 +9329,17 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
 	}
 	i40e_get_oem_version(&pf->hw);
 
+	if (test_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state) &&
+	    ((hw->aq.fw_maj_ver == 4 && hw->aq.fw_min_ver <= 33) ||
+	     hw->aq.fw_maj_ver < 4) && hw->mac.type == I40E_MAC_XL710) {
+		/* The following delay is necessary for 4.33 firmware and older
+		 * to recover after EMP reset. 200 ms should suffice but we
+		 * put here 300 ms to be sure that FW is ready to operate
+		 * after reset.
+		 */
+		mdelay(300);
+	}
+
 	/* re-verify the eeprom if we just had an EMP reset */
 	if (test_and_clear_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state))
 		i40e_verify_eeprom(pf);
@@ -9937,18 +10062,17 @@ static int i40e_vsi_clear(struct i40e_vsi *vsi)
 
 	mutex_lock(&pf->switch_mutex);
 	if (!pf->vsi[vsi->idx]) {
-		dev_err(&pf->pdev->dev, "pf->vsi[%d] is NULL, just free vsi[%d](%p,type %d)\n",
-			vsi->idx, vsi->idx, vsi, vsi->type);
+		dev_err(&pf->pdev->dev, "pf->vsi[%d] is NULL, just free vsi[%d](type %d)\n",
+			vsi->idx, vsi->idx, vsi->type);
 		goto unlock_vsi;
 	}
 
 	if (pf->vsi[vsi->idx] != vsi) {
 		dev_err(&pf->pdev->dev,
-			"pf->vsi[%d](%p, type %d) != vsi[%d](%p,type %d): no free!\n",
+			"pf->vsi[%d](type %d) != vsi[%d](type %d): no free!\n",
 			pf->vsi[vsi->idx]->idx,
-			pf->vsi[vsi->idx],
 			pf->vsi[vsi->idx]->type,
-			vsi->idx, vsi, vsi->type);
+			vsi->idx, vsi->type);
 		goto unlock_vsi;
 	}
 
@@ -10018,7 +10142,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi)
 		ring->dcb_tc = 0;
 		if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE)
 			ring->flags = I40E_TXR_FLAGS_WB_ON_ITR;
-		ring->tx_itr_setting = pf->tx_itr_default;
+		ring->itr_setting = pf->tx_itr_default;
 		vsi->tx_rings[i] = ring++;
 
 		if (!i40e_enabled_xdp_vsi(vsi))
@@ -10036,7 +10160,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi)
 		if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE)
 			ring->flags = I40E_TXR_FLAGS_WB_ON_ITR;
 		set_ring_xdp(ring);
-		ring->tx_itr_setting = pf->tx_itr_default;
+		ring->itr_setting = pf->tx_itr_default;
 		vsi->xdp_rings[i] = ring++;
 
 setup_rx:
@@ -10049,7 +10173,7 @@ setup_rx:
 		ring->count = vsi->num_desc;
 		ring->size = 0;
 		ring->dcb_tc = 0;
-		ring->rx_itr_setting = pf->rx_itr_default;
+		ring->itr_setting = pf->rx_itr_default;
 		vsi->rx_rings[i] = ring;
 	}
 
@@ -10328,9 +10452,6 @@ static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx, int cpu)
 		netif_napi_add(vsi->netdev, &q_vector->napi,
 			       i40e_napi_poll, NAPI_POLL_WEIGHT);
 
-	q_vector->rx.latency_range = I40E_LOW_LATENCY;
-	q_vector->tx.latency_range = I40E_LOW_LATENCY;
-
 	/* tie q_vector and vsi together */
 	vsi->q_vectors[v_idx] = q_vector;
 
@@ -10473,6 +10594,9 @@ static int i40e_restore_interrupt_scheme(struct i40e_pf *pf)
 	if (err)
 		goto err_unwind;
 
+	if (pf->flags & I40E_FLAG_IWARP_ENABLED)
+		i40e_client_update_msix_info(pf);
+
 	return 0;
 
 err_unwind:
@@ -11089,6 +11213,16 @@ static int i40e_sw_init(struct i40e_pf *pf)
 		/* IWARP needs one extra vector for CQP just like MISC.*/
 		pf->num_iwarp_msix = (int)num_online_cpus() + 1;
 	}
+	/* Stopping the FW LLDP engine is only supported on the
+	 * XL710 with a FW ver >= 1.7.  Also, stopping FW LLDP
+	 * engine is not supported if NPAR is functioning on this
+	 * part
+	 */
+	if (pf->hw.mac.type == I40E_MAC_XL710 &&
+	    !pf->hw.func_caps.npar_enable &&
+	    (pf->hw.aq.api_maj_ver > 1 ||
+	     (pf->hw.aq.api_maj_ver == 1 && pf->hw.aq.api_min_ver > 6)))
+		pf->hw_features |= I40E_HW_STOPPABLE_FW_LLDP;
 
 #ifdef CONFIG_PCI_IOV
 	if (pf->hw.func_caps.num_vfs && pf->hw.partition_id == 1) {
@@ -14213,6 +14347,11 @@ static int __maybe_unused i40e_suspend(struct device *dev)
 	del_timer_sync(&pf->service_timer);
 	cancel_work_sync(&pf->service_task);
 
+	/* Client close must be called explicitly here because the timer
+	 * has been stopped.
+	 */
+	i40e_notify_client_of_netdev_close(pf->vsi[pf->lan_vsi], false);
+
 	if (pf->wol_en && (pf->hw_features & I40E_HW_WOL_MC_MAGIC_PKT_WAKE))
 		i40e_enable_mc_magic_wake(pf);
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
index 83798b7841b9..eabb636f6a19 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
@@ -287,7 +287,7 @@ i40e_status i40e_aq_query_switch_comp_bw_config(struct i40e_hw *hw,
 		struct i40e_asq_cmd_details *cmd_details);
 i40e_status i40e_aq_resume_port_tx(struct i40e_hw *hw,
 				   struct i40e_asq_cmd_details *cmd_details);
-i40e_status
+enum i40e_status_code
 i40e_aq_add_cloud_filters_bb(struct i40e_hw *hw, u16 seid,
 			     struct i40e_aqc_cloud_filters_element_bb *filters,
 			     u8 filter_count);
@@ -299,7 +299,7 @@ enum i40e_status_code
 i40e_aq_rem_cloud_filters(struct i40e_hw *hw, u16 vsi,
 			  struct i40e_aqc_cloud_filters_element_data *filters,
 			  u8 filter_count);
-i40e_status
+enum i40e_status_code
 i40e_aq_rem_cloud_filters_bb(struct i40e_hw *hw, u16 seid,
 			     struct i40e_aqc_cloud_filters_element_bb *filters,
 			     u8 filter_count);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index e554aa6cf070..97cfe944b568 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -708,16 +708,22 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring)
 /**
  * i40e_get_tx_pending - how many tx descriptors not processed
  * @tx_ring: the ring of descriptors
+ * @in_sw: use SW variables
  *
  * Since there is no access to the ring head register
  * in XL710, we need to use our local copies
  **/
-u32 i40e_get_tx_pending(struct i40e_ring *ring)
+u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw)
 {
 	u32 head, tail;
 
-	head = i40e_get_head(ring);
-	tail = readl(ring->tail);
+	if (!in_sw) {
+		head = i40e_get_head(ring);
+		tail = readl(ring->tail);
+	} else {
+		head = ring->next_to_clean;
+		tail = ring->next_to_use;
+	}
 
 	if (head != tail)
 		return (head < tail) ?
@@ -774,7 +780,7 @@ void i40e_detect_recover_hung(struct i40e_vsi *vsi)
 			 */
 			smp_rmb();
 			tx_ring->tx_stats.prev_pkt_ctr =
-			    i40e_get_tx_pending(tx_ring) ? packets : -1;
+			    i40e_get_tx_pending(tx_ring, true) ? packets : -1;
 		}
 	}
 }
@@ -898,7 +904,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
 		 * them to be written back in case we stay in NAPI.
 		 * In this mode on X722 we do not enable Interrupt.
 		 */
-		unsigned int j = i40e_get_tx_pending(tx_ring);
+		unsigned int j = i40e_get_tx_pending(tx_ring, false);
 
 		if (budget &&
 		    ((j / WB_STRIDE) == 0) && (j > 0) &&
@@ -995,99 +1001,241 @@ void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
 	}
 }
 
+static inline bool i40e_container_is_rx(struct i40e_q_vector *q_vector,
+					struct i40e_ring_container *rc)
+{
+	return &q_vector->rx == rc;
+}
+
+static inline unsigned int i40e_itr_divisor(struct i40e_q_vector *q_vector)
+{
+	unsigned int divisor;
+
+	switch (q_vector->vsi->back->hw.phy.link_info.link_speed) {
+	case I40E_LINK_SPEED_40GB:
+		divisor = I40E_ITR_ADAPTIVE_MIN_INC * 1024;
+		break;
+	case I40E_LINK_SPEED_25GB:
+	case I40E_LINK_SPEED_20GB:
+		divisor = I40E_ITR_ADAPTIVE_MIN_INC * 512;
+		break;
+	default:
+	case I40E_LINK_SPEED_10GB:
+		divisor = I40E_ITR_ADAPTIVE_MIN_INC * 256;
+		break;
+	case I40E_LINK_SPEED_1GB:
+	case I40E_LINK_SPEED_100MB:
+		divisor = I40E_ITR_ADAPTIVE_MIN_INC * 32;
+		break;
+	}
+
+	return divisor;
+}
+
 /**
- * i40e_set_new_dynamic_itr - Find new ITR level
+ * i40e_update_itr - update the dynamic ITR value based on statistics
+ * @q_vector: structure containing interrupt and ring information
  * @rc: structure containing ring performance data
  *
- * Returns true if ITR changed, false if not
- *
- * Stores a new ITR value based on packets and byte counts during
- * the last interrupt.  The advantage of per interrupt computation
- * is faster updates and more accurate ITR for the current traffic
- * pattern.  Constants in this function were computed based on
- * theoretical maximum wire speed and thresholds were set based on
- * testing data as well as attempting to minimize response time
+ * Stores a new ITR value based on packets and byte
+ * counts during the last interrupt.  The advantage of per interrupt
+ * computation is faster updates and more accurate ITR for the current
+ * traffic pattern.  Constants in this function were computed
+ * based on theoretical maximum wire speed and thresholds were set based
+ * on testing data as well as attempting to minimize response time
  * while increasing bulk throughput.
  **/
-static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
+static void i40e_update_itr(struct i40e_q_vector *q_vector,
+			    struct i40e_ring_container *rc)
 {
-	enum i40e_latency_range new_latency_range = rc->latency_range;
-	u32 new_itr = rc->itr;
-	int bytes_per_usec;
-	unsigned int usecs, estimated_usecs;
+	unsigned int avg_wire_size, packets, bytes, itr;
+	unsigned long next_update = jiffies;
 
-	if (rc->total_packets == 0 || !rc->itr)
-		return false;
+	/* If we don't have any rings just leave ourselves set for maximum
+	 * possible latency so we take ourselves out of the equation.
+	 */
+	if (!rc->ring || !ITR_IS_DYNAMIC(rc->ring->itr_setting))
+		return;
+
+	/* For Rx we want to push the delay up and default to low latency.
+	 * for Tx we want to pull the delay down and default to high latency.
+	 */
+	itr = i40e_container_is_rx(q_vector, rc) ?
+	      I40E_ITR_ADAPTIVE_MIN_USECS | I40E_ITR_ADAPTIVE_LATENCY :
+	      I40E_ITR_ADAPTIVE_MAX_USECS | I40E_ITR_ADAPTIVE_LATENCY;
+
+	/* If we didn't update within up to 1 - 2 jiffies we can assume
+	 * that either packets are coming in so slow there hasn't been
+	 * any work, or that there is so much work that NAPI is dealing
+	 * with interrupt moderation and we don't need to do anything.
+	 */
+	if (time_after(next_update, rc->next_update))
+		goto clear_counts;
+
+	/* If itr_countdown is set it means we programmed an ITR within
+	 * the last 4 interrupt cycles. This has a side effect of us
+	 * potentially firing an early interrupt. In order to work around
+	 * this we need to throw out any data received for a few
+	 * interrupts following the update.
+	 */
+	if (q_vector->itr_countdown) {
+		itr = rc->target_itr;
+		goto clear_counts;
+	}
+
+	packets = rc->total_packets;
+	bytes = rc->total_bytes;
 
-	usecs = (rc->itr << 1) * ITR_COUNTDOWN_START;
-	bytes_per_usec = rc->total_bytes / usecs;
+	if (i40e_container_is_rx(q_vector, rc)) {
+		/* If Rx there are 1 to 4 packets and bytes are less than
+		 * 9000 assume insufficient data to use bulk rate limiting
+		 * approach unless Tx is already in bulk rate limiting. We
+		 * are likely latency driven.
+		 */
+		if (packets && packets < 4 && bytes < 9000 &&
+		    (q_vector->tx.target_itr & I40E_ITR_ADAPTIVE_LATENCY)) {
+			itr = I40E_ITR_ADAPTIVE_LATENCY;
+			goto adjust_by_size;
+		}
+	} else if (packets < 4) {
+		/* If we have Tx and Rx ITR maxed and Tx ITR is running in
+		 * bulk mode and we are receiving 4 or fewer packets just
+		 * reset the ITR_ADAPTIVE_LATENCY bit for latency mode so
+		 * that the Rx can relax.
+		 */
+		if (rc->target_itr == I40E_ITR_ADAPTIVE_MAX_USECS &&
+		    (q_vector->rx.target_itr & I40E_ITR_MASK) ==
+		     I40E_ITR_ADAPTIVE_MAX_USECS)
+			goto clear_counts;
+	} else if (packets > 32) {
+		/* If we have processed over 32 packets in a single interrupt
+		 * for Tx assume we need to switch over to "bulk" mode.
+		 */
+		rc->target_itr &= ~I40E_ITR_ADAPTIVE_LATENCY;
+	}
 
-	/* The calculations in this algorithm depend on interrupts actually
-	 * firing at the ITR rate. This may not happen if the packet rate is
-	 * really low, or if we've been napi polling. Check to make sure
-	 * that's not the case before we continue.
+	/* We have no packets to actually measure against. This means
+	 * either one of the other queues on this vector is active or
+	 * we are a Tx queue doing TSO with too high of an interrupt rate.
+	 *
+	 * Between 4 and 56 we can assume that our current interrupt delay
+	 * is only slightly too low. As such we should increase it by a small
+	 * fixed amount.
 	 */
-	estimated_usecs = jiffies_to_usecs(jiffies - rc->last_itr_update);
-	if (estimated_usecs > usecs) {
-		new_latency_range = I40E_LOW_LATENCY;
-		goto reset_latency;
+	if (packets < 56) {
+		itr = rc->target_itr + I40E_ITR_ADAPTIVE_MIN_INC;
+		if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) {
+			itr &= I40E_ITR_ADAPTIVE_LATENCY;
+			itr += I40E_ITR_ADAPTIVE_MAX_USECS;
+		}
+		goto clear_counts;
+	}
+
+	if (packets <= 256) {
+		itr = min(q_vector->tx.current_itr, q_vector->rx.current_itr);
+		itr &= I40E_ITR_MASK;
+
+		/* Between 56 and 112 is our "goldilocks" zone where we are
+		 * working out "just right". Just report that our current
+		 * ITR is good for us.
+		 */
+		if (packets <= 112)
+			goto clear_counts;
+
+		/* If packet count is 128 or greater we are likely looking
+		 * at a slight overrun of the delay we want. Try halving
+		 * our delay to see if that will cut the number of packets
+		 * in half per interrupt.
+		 */
+		itr /= 2;
+		itr &= I40E_ITR_MASK;
+		if (itr < I40E_ITR_ADAPTIVE_MIN_USECS)
+			itr = I40E_ITR_ADAPTIVE_MIN_USECS;
+
+		goto clear_counts;
 	}
 
-	/* simple throttlerate management
-	 *   0-10MB/s   lowest (50000 ints/s)
-	 *  10-20MB/s   low    (20000 ints/s)
-	 *  20-1249MB/s bulk   (18000 ints/s)
+	/* The paths below assume we are dealing with a bulk ITR since
+	 * number of packets is greater than 256. We are just going to have
+	 * to compute a value and try to bring the count under control,
+	 * though for smaller packet sizes there isn't much we can do as
+	 * NAPI polling will likely be kicking in sooner rather than later.
+	 */
+	itr = I40E_ITR_ADAPTIVE_BULK;
+
+adjust_by_size:
+	/* If packet counts are 256 or greater we can assume we have a gross
+	 * overestimation of what the rate should be. Instead of trying to fine
+	 * tune it just use the formula below to try and dial in an exact value
+	 * give the current packet size of the frame.
+	 */
+	avg_wire_size = bytes / packets;
+
+	/* The following is a crude approximation of:
+	 *  wmem_default / (size + overhead) = desired_pkts_per_int
+	 *  rate / bits_per_byte / (size + ethernet overhead) = pkt_rate
+	 *  (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value
+	 *
+	 * Assuming wmem_default is 212992 and overhead is 640 bytes per
+	 * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the
+	 * formula down to
 	 *
-	 * The math works out because the divisor is in 10^(-6) which
-	 * turns the bytes/us input value into MB/s values, but
-	 * make sure to use usecs, as the register values written
-	 * are in 2 usec increments in the ITR registers, and make sure
-	 * to use the smoothed values that the countdown timer gives us.
+	 *  (170 * (size + 24)) / (size + 640) = ITR
+	 *
+	 * We first do some math on the packet size and then finally bitshift
+	 * by 8 after rounding up. We also have to account for PCIe link speed
+	 * difference as ITR scales based on this.
 	 */
-	switch (new_latency_range) {
-	case I40E_LOWEST_LATENCY:
-		if (bytes_per_usec > 10)
-			new_latency_range = I40E_LOW_LATENCY;
-		break;
-	case I40E_LOW_LATENCY:
-		if (bytes_per_usec > 20)
-			new_latency_range = I40E_BULK_LATENCY;
-		else if (bytes_per_usec <= 10)
-			new_latency_range = I40E_LOWEST_LATENCY;
-		break;
-	case I40E_BULK_LATENCY:
-	default:
-		if (bytes_per_usec <= 20)
-			new_latency_range = I40E_LOW_LATENCY;
-		break;
+	if (avg_wire_size <= 60) {
+		/* Start at 250k ints/sec */
+		avg_wire_size = 4096;
+	} else if (avg_wire_size <= 380) {
+		/* 250K ints/sec to 60K ints/sec */
+		avg_wire_size *= 40;
+		avg_wire_size += 1696;
+	} else if (avg_wire_size <= 1084) {
+		/* 60K ints/sec to 36K ints/sec */
+		avg_wire_size *= 15;
+		avg_wire_size += 11452;
+	} else if (avg_wire_size <= 1980) {
+		/* 36K ints/sec to 30K ints/sec */
+		avg_wire_size *= 5;
+		avg_wire_size += 22420;
+	} else {
+		/* plateau at a limit of 30K ints/sec */
+		avg_wire_size = 32256;
 	}
 
-reset_latency:
-	rc->latency_range = new_latency_range;
+	/* If we are in low latency mode halve our delay which doubles the
+	 * rate to somewhere between 100K to 16K ints/sec
+	 */
+	if (itr & I40E_ITR_ADAPTIVE_LATENCY)
+		avg_wire_size /= 2;
 
-	switch (new_latency_range) {
-	case I40E_LOWEST_LATENCY:
-		new_itr = I40E_ITR_50K;
-		break;
-	case I40E_LOW_LATENCY:
-		new_itr = I40E_ITR_20K;
-		break;
-	case I40E_BULK_LATENCY:
-		new_itr = I40E_ITR_18K;
-		break;
-	default:
-		break;
+	/* Resultant value is 256 times larger than it needs to be. This
+	 * gives us room to adjust the value as needed to either increase
+	 * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc.
+	 *
+	 * Use addition as we have already recorded the new latency flag
+	 * for the ITR value.
+	 */
+	itr += DIV_ROUND_UP(avg_wire_size, i40e_itr_divisor(q_vector)) *
+	       I40E_ITR_ADAPTIVE_MIN_INC;
+
+	if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) {
+		itr &= I40E_ITR_ADAPTIVE_LATENCY;
+		itr += I40E_ITR_ADAPTIVE_MAX_USECS;
 	}
 
+clear_counts:
+	/* write back value */
+	rc->target_itr = itr;
+
+	/* next update should occur within next jiffy */
+	rc->next_update = next_update + 1;
+
 	rc->total_bytes = 0;
 	rc->total_packets = 0;
-	rc->last_itr_update = jiffies;
-
-	if (new_itr != rc->itr) {
-		rc->itr = new_itr;
-		return true;
-	}
-	return false;
 }
 
 /**
@@ -1991,7 +2139,7 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
  * @rx_buffer: rx buffer to pull data from
  *
  * This function will clean up the contents of the rx_buffer.  It will
- * either recycle the bufer or unmap it and free the associated resources.
+ * either recycle the buffer or unmap it and free the associated resources.
  */
 static void i40e_put_rx_buffer(struct i40e_ring *rx_ring,
 			       struct i40e_rx_buffer *rx_buffer)
@@ -2274,29 +2422,45 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
 	return failure ? budget : (int)total_rx_packets;
 }
 
-static u32 i40e_buildreg_itr(const int type, const u16 itr)
+static inline u32 i40e_buildreg_itr(const int type, u16 itr)
 {
 	u32 val;
 
+	/* We don't bother with setting the CLEARPBA bit as the data sheet
+	 * points out doing so is "meaningless since it was already
+	 * auto-cleared". The auto-clearing happens when the interrupt is
+	 * asserted.
+	 *
+	 * Hardware errata 28 for also indicates that writing to a
+	 * xxINT_DYN_CTLx CSR with INTENA_MSK (bit 31) set to 0 will clear
+	 * an event in the PBA anyway so we need to rely on the automask
+	 * to hold pending events for us until the interrupt is re-enabled
+	 *
+	 * The itr value is reported in microseconds, and the register
+	 * value is recorded in 2 microsecond units. For this reason we
+	 * only need to shift by the interval shift - 1 instead of the
+	 * full value.
+	 */
+	itr &= I40E_ITR_MASK;
+
 	val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
-	      I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
 	      (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) |
-	      (itr << I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT);
+	      (itr << (I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT - 1));
 
 	return val;
 }
 
 /* a small macro to shorten up some long lines */
 #define INTREG I40E_PFINT_DYN_CTLN
-static inline int get_rx_itr(struct i40e_vsi *vsi, int idx)
-{
-	return vsi->rx_rings[idx]->rx_itr_setting;
-}
 
-static inline int get_tx_itr(struct i40e_vsi *vsi, int idx)
-{
-	return vsi->tx_rings[idx]->tx_itr_setting;
-}
+/* The act of updating the ITR will cause it to immediately trigger. In order
+ * to prevent this from throwing off adaptive update statistics we defer the
+ * update so that it can only happen so often. So after either Tx or Rx are
+ * updated we make the adaptive scheme wait until either the ITR completely
+ * expires via the next_update expiration or we have been through at least
+ * 3 interrupts.
+ */
+#define ITR_COUNTDOWN_START 3
 
 /**
  * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt
@@ -2308,10 +2472,7 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
 					  struct i40e_q_vector *q_vector)
 {
 	struct i40e_hw *hw = &vsi->back->hw;
-	bool rx = false, tx = false;
-	u32 rxval, txval;
-	int idx = q_vector->v_idx;
-	int rx_itr_setting, tx_itr_setting;
+	u32 intval;
 
 	/* If we don't have MSIX, then we only need to re-enable icr0 */
 	if (!(vsi->back->flags & I40E_FLAG_MSIX_ENABLED)) {
@@ -2319,65 +2480,49 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
 		return;
 	}
 
-	/* avoid dynamic calculation if in countdown mode OR if
-	 * all dynamic is disabled
-	 */
-	rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
-
-	rx_itr_setting = get_rx_itr(vsi, idx);
-	tx_itr_setting = get_tx_itr(vsi, idx);
-
-	if (q_vector->itr_countdown > 0 ||
-	    (!ITR_IS_DYNAMIC(rx_itr_setting) &&
-	     !ITR_IS_DYNAMIC(tx_itr_setting))) {
-		goto enable_int;
-	}
-
-	if (ITR_IS_DYNAMIC(rx_itr_setting)) {
-		rx = i40e_set_new_dynamic_itr(&q_vector->rx);
-		rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr);
-	}
-
-	if (ITR_IS_DYNAMIC(tx_itr_setting)) {
-		tx = i40e_set_new_dynamic_itr(&q_vector->tx);
-		txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr);
-	}
+	/* These will do nothing if dynamic updates are not enabled */
+	i40e_update_itr(q_vector, &q_vector->tx);
+	i40e_update_itr(q_vector, &q_vector->rx);
 
-	if (rx || tx) {
-		/* get the higher of the two ITR adjustments and
-		 * use the same value for both ITR registers
-		 * when in adaptive mode (Rx and/or Tx)
-		 */
-		u16 itr = max(q_vector->tx.itr, q_vector->rx.itr);
-
-		q_vector->tx.itr = q_vector->rx.itr = itr;
-		txval = i40e_buildreg_itr(I40E_TX_ITR, itr);
-		tx = true;
-		rxval = i40e_buildreg_itr(I40E_RX_ITR, itr);
-		rx = true;
-	}
-
-	/* only need to enable the interrupt once, but need
-	 * to possibly update both ITR values
+	/* This block of logic allows us to get away with only updating
+	 * one ITR value with each interrupt. The idea is to perform a
+	 * pseudo-lazy update with the following criteria.
+	 *
+	 * 1. Rx is given higher priority than Tx if both are in same state
+	 * 2. If we must reduce an ITR that is given highest priority.
+	 * 3. We then give priority to increasing ITR based on amount.
 	 */
-	if (rx) {
-		/* set the INTENA_MSK_MASK so that this first write
-		 * won't actually enable the interrupt, instead just
-		 * updating the ITR (it's bit 31 PF and VF)
+	if (q_vector->rx.target_itr < q_vector->rx.current_itr) {
+		/* Rx ITR needs to be reduced, this is highest priority */
+		intval = i40e_buildreg_itr(I40E_RX_ITR,
+					   q_vector->rx.target_itr);
+		q_vector->rx.current_itr = q_vector->rx.target_itr;
+		q_vector->itr_countdown = ITR_COUNTDOWN_START;
+	} else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) ||
+		   ((q_vector->rx.target_itr - q_vector->rx.current_itr) <
+		    (q_vector->tx.target_itr - q_vector->tx.current_itr))) {
+		/* Tx ITR needs to be reduced, this is second priority
+		 * Tx ITR needs to be increased more than Rx, fourth priority
 		 */
-		rxval |= BIT(31);
-		/* don't check _DOWN because interrupt isn't being enabled */
-		wr32(hw, INTREG(q_vector->reg_idx), rxval);
+		intval = i40e_buildreg_itr(I40E_TX_ITR,
+					   q_vector->tx.target_itr);
+		q_vector->tx.current_itr = q_vector->tx.target_itr;
+		q_vector->itr_countdown = ITR_COUNTDOWN_START;
+	} else if (q_vector->rx.current_itr != q_vector->rx.target_itr) {
+		/* Rx ITR needs to be increased, third priority */
+		intval = i40e_buildreg_itr(I40E_RX_ITR,
+					   q_vector->rx.target_itr);
+		q_vector->rx.current_itr = q_vector->rx.target_itr;
+		q_vector->itr_countdown = ITR_COUNTDOWN_START;
+	} else {
+		/* No ITR update, lowest priority */
+		intval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
+		if (q_vector->itr_countdown)
+			q_vector->itr_countdown--;
 	}
 
-enable_int:
 	if (!test_bit(__I40E_VSI_DOWN, vsi->state))
-		wr32(hw, INTREG(q_vector->reg_idx), txval);
-
-	if (q_vector->itr_countdown)
-		q_vector->itr_countdown--;
-	else
-		q_vector->itr_countdown = ITR_COUNTDOWN_START;
+		wr32(hw, INTREG(q_vector->reg_idx), intval);
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index 701b708628b0..3c80ea784389 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -30,32 +30,37 @@
 #include <net/xdp.h>
 
 /* Interrupt Throttling and Rate Limiting Goodies */
-
-#define I40E_MAX_ITR               0x0FF0  /* reg uses 2 usec resolution */
-#define I40E_MIN_ITR               0x0001  /* reg uses 2 usec resolution */
-#define I40E_ITR_100K              0x0005
-#define I40E_ITR_50K               0x000A
-#define I40E_ITR_20K               0x0019
-#define I40E_ITR_18K               0x001B
-#define I40E_ITR_8K                0x003E
-#define I40E_ITR_4K                0x007A
-#define I40E_MAX_INTRL             0x3B    /* reg uses 4 usec resolution */
-#define I40E_ITR_RX_DEF            (ITR_REG_TO_USEC(I40E_ITR_20K) | \
-				    I40E_ITR_DYNAMIC)
-#define I40E_ITR_TX_DEF            (ITR_REG_TO_USEC(I40E_ITR_20K) | \
-				    I40E_ITR_DYNAMIC)
-#define I40E_ITR_DYNAMIC           0x8000  /* use top bit as a flag */
-#define I40E_MIN_INT_RATE          250     /* ~= 1000000 / (I40E_MAX_ITR * 2) */
-#define I40E_MAX_INT_RATE          500000  /* == 1000000 / (I40E_MIN_ITR * 2) */
 #define I40E_DEFAULT_IRQ_WORK      256
-#define ITR_TO_REG(setting) ((setting & ~I40E_ITR_DYNAMIC) >> 1)
-#define ITR_IS_DYNAMIC(setting) (!!(setting & I40E_ITR_DYNAMIC))
-#define ITR_REG_TO_USEC(itr_reg) (itr_reg << 1)
+
+/* The datasheet for the X710 and XL710 indicate that the maximum value for
+ * the ITR is 8160usec which is then called out as 0xFF0 with a 2usec
+ * resolution. 8160 is 0x1FE0 when written out in hex. So instead of storing
+ * the register value which is divided by 2 lets use the actual values and
+ * avoid an excessive amount of translation.
+ */
+#define I40E_ITR_DYNAMIC	0x8000	/* use top bit as a flag */
+#define I40E_ITR_MASK		0x1FFE	/* mask for ITR register value */
+#define I40E_MIN_ITR		     2	/* reg uses 2 usec resolution */
+#define I40E_ITR_100K		    10	/* all values below must be even */
+#define I40E_ITR_50K		    20
+#define I40E_ITR_20K		    50
+#define I40E_ITR_18K		    60
+#define I40E_ITR_8K		   122
+#define I40E_MAX_ITR		  8160	/* maximum value as per datasheet */
+#define ITR_TO_REG(setting) ((setting) & ~I40E_ITR_DYNAMIC)
+#define ITR_REG_ALIGN(setting) __ALIGN_MASK(setting, ~I40E_ITR_MASK)
+#define ITR_IS_DYNAMIC(setting) (!!((setting) & I40E_ITR_DYNAMIC))
+
+#define I40E_ITR_RX_DEF		(I40E_ITR_20K | I40E_ITR_DYNAMIC)
+#define I40E_ITR_TX_DEF		(I40E_ITR_20K | I40E_ITR_DYNAMIC)
+
 /* 0x40 is the enable bit for interrupt rate limiting, and must be set if
  * the value of the rate limit is non-zero
  */
 #define INTRL_ENA                  BIT(6)
+#define I40E_MAX_INTRL             0x3B    /* reg uses 4 usec resolution */
 #define INTRL_REG_TO_USEC(intrl) ((intrl & ~INTRL_ENA) << 2)
+
 /**
  * i40e_intrl_usec_to_reg - convert interrupt rate limit to register
  * @intrl: interrupt rate limit to convert
@@ -382,8 +387,7 @@ struct i40e_ring {
 	 * these values always store the USER setting, and must be converted
 	 * before programming to a register.
 	 */
-	u16 rx_itr_setting;
-	u16 tx_itr_setting;
+	u16 itr_setting;
 
 	u16 count;			/* Number of descriptors */
 	u16 reg_idx;			/* HW register index of the ring */
@@ -459,21 +463,21 @@ static inline void set_ring_xdp(struct i40e_ring *ring)
 	ring->flags |= I40E_TXR_FLAGS_XDP;
 }
 
-enum i40e_latency_range {
-	I40E_LOWEST_LATENCY = 0,
-	I40E_LOW_LATENCY = 1,
-	I40E_BULK_LATENCY = 2,
-};
+#define I40E_ITR_ADAPTIVE_MIN_INC	0x0002
+#define I40E_ITR_ADAPTIVE_MIN_USECS	0x0002
+#define I40E_ITR_ADAPTIVE_MAX_USECS	0x007e
+#define I40E_ITR_ADAPTIVE_LATENCY	0x8000
+#define I40E_ITR_ADAPTIVE_BULK		0x0000
+#define ITR_IS_BULK(x) (!((x) & I40E_ITR_ADAPTIVE_LATENCY))
 
 struct i40e_ring_container {
-	/* array of pointers to rings */
-	struct i40e_ring *ring;
+	struct i40e_ring *ring;		/* pointer to linked list of ring(s) */
+	unsigned long next_update;	/* jiffies value of next update */
 	unsigned int total_bytes;	/* total bytes processed this int */
 	unsigned int total_packets;	/* total packets processed this int */
-	unsigned long last_itr_update;	/* jiffies of last ITR update */
 	u16 count;
-	enum i40e_latency_range latency_range;
-	u16 itr;
+	u16 target_itr;			/* target ITR setting for ring(s) */
+	u16 current_itr;		/* current ITR setting for ring(s) */
 };
 
 /* iterator for handling rings in ring container */
@@ -501,7 +505,7 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring);
 void i40e_free_rx_resources(struct i40e_ring *rx_ring);
 int i40e_napi_poll(struct napi_struct *napi, int budget);
 void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector);
-u32 i40e_get_tx_pending(struct i40e_ring *ring);
+u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw);
 void i40e_detect_recover_hung(struct i40e_vsi *vsi);
 int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
 bool __i40e_chk_linearize(struct sk_buff *skb);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h
index cd294e6a8587..69ea15892a5b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_type.h
@@ -39,7 +39,7 @@
 #define I40E_MASK(mask, shift) ((u32)(mask) << (shift))
 
 #define I40E_MAX_VSI_QP			16
-#define I40E_MAX_VF_VSI			3
+#define I40E_MAX_VF_VSI			4
 #define I40E_MAX_CHAINED_RX_BUFFERS	5
 #define I40E_MAX_PF_UDP_OFFLOAD_PORTS	16
 
@@ -1336,6 +1336,9 @@ struct i40e_hw_port_stats {
 #define I40E_SR_PCIE_ALT_MODULE_MAX_SIZE	1024
 #define I40E_SR_CONTROL_WORD_1_SHIFT		0x06
 #define I40E_SR_CONTROL_WORD_1_MASK	(0x03 << I40E_SR_CONTROL_WORD_1_SHIFT)
+#define I40E_PTR_TYPE				BIT(15)
+#define I40E_SR_OCP_CFG_WORD0			0x2B
+#define I40E_SR_OCP_ENABLED			BIT(15)
 
 /* Shadow RAM related */
 #define I40E_SR_SECTOR_SIZE_IN_WORDS	0x800
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index e9309fb9084b..321ab4badb68 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -258,6 +258,38 @@ static u16 i40e_vc_get_pf_queue_id(struct i40e_vf *vf, u16 vsi_id,
 }
 
 /**
+ * i40e_get_real_pf_qid
+ * @vf: pointer to the VF info
+ * @vsi_id: vsi id
+ * @queue_id: queue number
+ *
+ * wrapper function to get pf_queue_id handling ADq code as well
+ **/
+static u16 i40e_get_real_pf_qid(struct i40e_vf *vf, u16 vsi_id, u16 queue_id)
+{
+	int i;
+
+	if (vf->adq_enabled) {
+		/* Although VF considers all the queues(can be 1 to 16) as its
+		 * own but they may actually belong to different VSIs(up to 4).
+		 * We need to find which queues belongs to which VSI.
+		 */
+		for (i = 0; i < vf->num_tc; i++) {
+			if (queue_id < vf->ch[i].num_qps) {
+				vsi_id = vf->ch[i].vsi_id;
+				break;
+			}
+			/* find right queue id which is relative to a
+			 * given VSI.
+			 */
+			queue_id -= vf->ch[i].num_qps;
+			}
+		}
+
+	return i40e_vc_get_pf_queue_id(vf, vsi_id, queue_id);
+}
+
+/**
  * i40e_config_irq_link_list
  * @vf: pointer to the VF info
  * @vsi_id: id of VSI as given by the FW
@@ -310,7 +342,7 @@ static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_id,
 
 	vsi_queue_id = next_q / I40E_VIRTCHNL_SUPPORTED_QTYPES;
 	qtype = next_q % I40E_VIRTCHNL_SUPPORTED_QTYPES;
-	pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id, vsi_queue_id);
+	pf_queue_id = i40e_get_real_pf_qid(vf, vsi_id, vsi_queue_id);
 	reg = ((qtype << I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_SHIFT) | pf_queue_id);
 
 	wr32(hw, reg_idx, reg);
@@ -333,8 +365,9 @@ static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_id,
 		if (next_q < size) {
 			vsi_queue_id = next_q / I40E_VIRTCHNL_SUPPORTED_QTYPES;
 			qtype = next_q % I40E_VIRTCHNL_SUPPORTED_QTYPES;
-			pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id,
-							      vsi_queue_id);
+			pf_queue_id = i40e_get_real_pf_qid(vf,
+							   vsi_id,
+							   vsi_queue_id);
 		} else {
 			pf_queue_id = I40E_QUEUE_END_OF_LIST;
 			qtype = 0;
@@ -669,18 +702,20 @@ error_param:
 /**
  * i40e_alloc_vsi_res
  * @vf: pointer to the VF info
- * @type: type of VSI to allocate
+ * @idx: VSI index, applies only for ADq mode, zero otherwise
  *
  * alloc VF vsi context & resources
  **/
-static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type)
+static int i40e_alloc_vsi_res(struct i40e_vf *vf, u8 idx)
 {
 	struct i40e_mac_filter *f = NULL;
 	struct i40e_pf *pf = vf->pf;
 	struct i40e_vsi *vsi;
+	u64 max_tx_rate = 0;
 	int ret = 0;
 
-	vsi = i40e_vsi_setup(pf, type, pf->vsi[pf->lan_vsi]->seid, vf->vf_id);
+	vsi = i40e_vsi_setup(pf, I40E_VSI_SRIOV, pf->vsi[pf->lan_vsi]->seid,
+			     vf->vf_id);
 
 	if (!vsi) {
 		dev_err(&pf->pdev->dev,
@@ -689,7 +724,8 @@ static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type)
 		ret = -ENOENT;
 		goto error_alloc_vsi_res;
 	}
-	if (type == I40E_VSI_SRIOV) {
+
+	if (!idx) {
 		u64 hena = i40e_pf_get_default_rss_hena(pf);
 		u8 broadcast[ETH_ALEN];
 
@@ -721,17 +757,29 @@ static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type)
 		spin_unlock_bh(&vsi->mac_filter_hash_lock);
 		wr32(&pf->hw, I40E_VFQF_HENA1(0, vf->vf_id), (u32)hena);
 		wr32(&pf->hw, I40E_VFQF_HENA1(1, vf->vf_id), (u32)(hena >> 32));
+		/* program mac filter only for VF VSI */
+		ret = i40e_sync_vsi_filters(vsi);
+		if (ret)
+			dev_err(&pf->pdev->dev, "Unable to program ucast filters\n");
 	}
 
-	/* program mac filter */
-	ret = i40e_sync_vsi_filters(vsi);
-	if (ret)
-		dev_err(&pf->pdev->dev, "Unable to program ucast filters\n");
+	/* storing VSI index and id for ADq and don't apply the mac filter */
+	if (vf->adq_enabled) {
+		vf->ch[idx].vsi_idx = vsi->idx;
+		vf->ch[idx].vsi_id = vsi->id;
+	}
 
 	/* Set VF bandwidth if specified */
 	if (vf->tx_rate) {
+		max_tx_rate = vf->tx_rate;
+	} else if (vf->ch[idx].max_tx_rate) {
+		max_tx_rate = vf->ch[idx].max_tx_rate;
+	}
+
+	if (max_tx_rate) {
+		max_tx_rate = div_u64(max_tx_rate, I40E_BW_CREDIT_DIVISOR);
 		ret = i40e_aq_config_vsi_bw_limit(&pf->hw, vsi->seid,
-						  vf->tx_rate / 50, 0, NULL);
+						  max_tx_rate, 0, NULL);
 		if (ret)
 			dev_err(&pf->pdev->dev, "Unable to set tx rate, VF %d, error code %d.\n",
 				vf->vf_id, ret);
@@ -742,6 +790,92 @@ error_alloc_vsi_res:
 }
 
 /**
+ * i40e_map_pf_queues_to_vsi
+ * @vf: pointer to the VF info
+ *
+ * PF maps LQPs to a VF by programming VSILAN_QTABLE & VPLAN_QTABLE. This
+ * function takes care of first part VSILAN_QTABLE, mapping pf queues to VSI.
+ **/
+static void i40e_map_pf_queues_to_vsi(struct i40e_vf *vf)
+{
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_hw *hw = &pf->hw;
+	u32 reg, num_tc = 1; /* VF has at least one traffic class */
+	u16 vsi_id, qps;
+	int i, j;
+
+	if (vf->adq_enabled)
+		num_tc = vf->num_tc;
+
+	for (i = 0; i < num_tc; i++) {
+		if (vf->adq_enabled) {
+			qps = vf->ch[i].num_qps;
+			vsi_id =  vf->ch[i].vsi_id;
+		} else {
+			qps = pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs;
+			vsi_id = vf->lan_vsi_id;
+		}
+
+		for (j = 0; j < 7; j++) {
+			if (j * 2 >= qps) {
+				/* end of list */
+				reg = 0x07FF07FF;
+			} else {
+				u16 qid = i40e_vc_get_pf_queue_id(vf,
+								  vsi_id,
+								  j * 2);
+				reg = qid;
+				qid = i40e_vc_get_pf_queue_id(vf, vsi_id,
+							      (j * 2) + 1);
+				reg |= qid << 16;
+			}
+			i40e_write_rx_ctl(hw,
+					  I40E_VSILAN_QTABLE(j, vsi_id),
+					  reg);
+		}
+	}
+}
+
+/**
+ * i40e_map_pf_to_vf_queues
+ * @vf: pointer to the VF info
+ *
+ * PF maps LQPs to a VF by programming VSILAN_QTABLE & VPLAN_QTABLE. This
+ * function takes care of the second part VPLAN_QTABLE & completes VF mappings.
+ **/
+static void i40e_map_pf_to_vf_queues(struct i40e_vf *vf)
+{
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_hw *hw = &pf->hw;
+	u32 reg, total_qps = 0;
+	u32 qps, num_tc = 1; /* VF has at least one traffic class */
+	u16 vsi_id, qid;
+	int i, j;
+
+	if (vf->adq_enabled)
+		num_tc = vf->num_tc;
+
+	for (i = 0; i < num_tc; i++) {
+		if (vf->adq_enabled) {
+			qps = vf->ch[i].num_qps;
+			vsi_id =  vf->ch[i].vsi_id;
+		} else {
+			qps = pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs;
+			vsi_id = vf->lan_vsi_id;
+		}
+
+		for (j = 0; j < qps; j++) {
+			qid = i40e_vc_get_pf_queue_id(vf, vsi_id, j);
+
+			reg = (qid & I40E_VPLAN_QTABLE_QINDEX_MASK);
+			wr32(hw, I40E_VPLAN_QTABLE(total_qps, vf->vf_id),
+			     reg);
+			total_qps++;
+		}
+	}
+}
+
+/**
  * i40e_enable_vf_mappings
  * @vf: pointer to the VF info
  *
@@ -751,8 +885,7 @@ static void i40e_enable_vf_mappings(struct i40e_vf *vf)
 {
 	struct i40e_pf *pf = vf->pf;
 	struct i40e_hw *hw = &pf->hw;
-	u32 reg, total_queue_pairs = 0;
-	int j;
+	u32 reg;
 
 	/* Tell the hardware we're using noncontiguous mapping. HW requires
 	 * that VF queues be mapped using this method, even when they are
@@ -765,30 +898,8 @@ static void i40e_enable_vf_mappings(struct i40e_vf *vf)
 	reg = I40E_VPLAN_MAPENA_TXRX_ENA_MASK;
 	wr32(hw, I40E_VPLAN_MAPENA(vf->vf_id), reg);
 
-	/* map PF queues to VF queues */
-	for (j = 0; j < pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs; j++) {
-		u16 qid = i40e_vc_get_pf_queue_id(vf, vf->lan_vsi_id, j);
-
-		reg = (qid & I40E_VPLAN_QTABLE_QINDEX_MASK);
-		wr32(hw, I40E_VPLAN_QTABLE(total_queue_pairs, vf->vf_id), reg);
-		total_queue_pairs++;
-	}
-
-	/* map PF queues to VSI */
-	for (j = 0; j < 7; j++) {
-		if (j * 2 >= pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs) {
-			reg = 0x07FF07FF;	/* unused */
-		} else {
-			u16 qid = i40e_vc_get_pf_queue_id(vf, vf->lan_vsi_id,
-							  j * 2);
-			reg = qid;
-			qid = i40e_vc_get_pf_queue_id(vf, vf->lan_vsi_id,
-						      (j * 2) + 1);
-			reg |= qid << 16;
-		}
-		i40e_write_rx_ctl(hw, I40E_VSILAN_QTABLE(j, vf->lan_vsi_id),
-				  reg);
-	}
+	i40e_map_pf_to_vf_queues(vf);
+	i40e_map_pf_queues_to_vsi(vf);
 
 	i40e_flush(hw);
 }
@@ -824,7 +935,7 @@ static void i40e_free_vf_res(struct i40e_vf *vf)
 	struct i40e_pf *pf = vf->pf;
 	struct i40e_hw *hw = &pf->hw;
 	u32 reg_idx, reg;
-	int i, msix_vf;
+	int i, j, msix_vf;
 
 	/* Start by disabling VF's configuration API to prevent the OS from
 	 * accessing the VF's VSI after it's freed / invalidated.
@@ -846,6 +957,20 @@ static void i40e_free_vf_res(struct i40e_vf *vf)
 		vf->lan_vsi_id = 0;
 		vf->num_mac = 0;
 	}
+
+	/* do the accounting and remove additional ADq VSI's */
+	if (vf->adq_enabled && vf->ch[0].vsi_idx) {
+		for (j = 0; j < vf->num_tc; j++) {
+			/* At this point VSI0 is already released so don't
+			 * release it again and only clear their values in
+			 * structure variables
+			 */
+			if (j)
+				i40e_vsi_release(pf->vsi[vf->ch[j].vsi_idx]);
+			vf->ch[j].vsi_idx = 0;
+			vf->ch[j].vsi_id = 0;
+		}
+	}
 	msix_vf = pf->hw.func_caps.num_msix_vectors_vf;
 
 	/* disable interrupts so the VF starts in a known state */
@@ -891,7 +1016,7 @@ static int i40e_alloc_vf_res(struct i40e_vf *vf)
 {
 	struct i40e_pf *pf = vf->pf;
 	int total_queue_pairs = 0;
-	int ret;
+	int ret, idx;
 
 	if (vf->num_req_queues &&
 	    vf->num_req_queues <= pf->queues_left + I40E_DEFAULT_QUEUES_PER_VF)
@@ -900,11 +1025,30 @@ static int i40e_alloc_vf_res(struct i40e_vf *vf)
 		pf->num_vf_qps = I40E_DEFAULT_QUEUES_PER_VF;
 
 	/* allocate hw vsi context & associated resources */
-	ret = i40e_alloc_vsi_res(vf, I40E_VSI_SRIOV);
+	ret = i40e_alloc_vsi_res(vf, 0);
 	if (ret)
 		goto error_alloc;
 	total_queue_pairs += pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs;
 
+	/* allocate additional VSIs based on tc information for ADq */
+	if (vf->adq_enabled) {
+		if (pf->queues_left >=
+		    (I40E_MAX_VF_QUEUES - I40E_DEFAULT_QUEUES_PER_VF)) {
+			/* TC 0 always belongs to VF VSI */
+			for (idx = 1; idx < vf->num_tc; idx++) {
+				ret = i40e_alloc_vsi_res(vf, idx);
+				if (ret)
+					goto error_alloc;
+			}
+			/* send correct number of queues */
+			total_queue_pairs = I40E_MAX_VF_QUEUES;
+		} else {
+			dev_info(&pf->pdev->dev, "VF %d: Not enough queues to allocate, disabling ADq\n",
+				 vf->vf_id);
+			vf->adq_enabled = false;
+		}
+	}
+
 	/* We account for each VF to get a default number of queue pairs.  If
 	 * the VF has now requested more, we need to account for that to make
 	 * certain we never request more queues than we actually have left in
@@ -1537,6 +1681,27 @@ static int i40e_vc_get_version_msg(struct i40e_vf *vf, u8 *msg)
 }
 
 /**
+ * i40e_del_qch - delete all the additional VSIs created as a part of ADq
+ * @vf: pointer to VF structure
+ **/
+static void i40e_del_qch(struct i40e_vf *vf)
+{
+	struct i40e_pf *pf = vf->pf;
+	int i;
+
+	/* first element in the array belongs to primary VF VSI and we shouldn't
+	 * delete it. We should however delete the rest of the VSIs created
+	 */
+	for (i = 1; i < vf->num_tc; i++) {
+		if (vf->ch[i].vsi_idx) {
+			i40e_vsi_release(pf->vsi[vf->ch[i].vsi_idx]);
+			vf->ch[i].vsi_idx = 0;
+			vf->ch[i].vsi_id = 0;
+		}
+	}
+}
+
+/**
  * i40e_vc_get_vf_resources_msg
  * @vf: pointer to the VF info
  * @msg: pointer to the msg buffer
@@ -1631,6 +1796,9 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
 	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_REQ_QUEUES)
 		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_REQ_QUEUES;
 
+	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ADQ)
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_ADQ;
+
 	vfres->num_vsis = num_vsis;
 	vfres->num_queue_pairs = vf->num_queue_pairs;
 	vfres->max_vectors = pf->hw.func_caps.num_msix_vectors_vf;
@@ -1855,27 +2023,37 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 	    (struct virtchnl_vsi_queue_config_info *)msg;
 	struct virtchnl_queue_pair_info *qpi;
 	struct i40e_pf *pf = vf->pf;
-	u16 vsi_id, vsi_queue_id;
+	u16 vsi_id, vsi_queue_id = 0;
 	i40e_status aq_ret = 0;
-	int i;
+	int i, j = 0, idx = 0;
+
+	vsi_id = qci->vsi_id;
 
 	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
 		aq_ret = I40E_ERR_PARAM;
 		goto error_param;
 	}
 
-	vsi_id = qci->vsi_id;
 	if (!i40e_vc_isvalid_vsi_id(vf, vsi_id)) {
 		aq_ret = I40E_ERR_PARAM;
 		goto error_param;
 	}
+
 	for (i = 0; i < qci->num_queue_pairs; i++) {
 		qpi = &qci->qpair[i];
-		vsi_queue_id = qpi->txq.queue_id;
-		if ((qpi->txq.vsi_id != vsi_id) ||
-		    (qpi->rxq.vsi_id != vsi_id) ||
-		    (qpi->rxq.queue_id != vsi_queue_id) ||
-		    !i40e_vc_isvalid_queue_id(vf, vsi_id, vsi_queue_id)) {
+
+		if (!vf->adq_enabled) {
+			vsi_queue_id = qpi->txq.queue_id;
+
+			if (qpi->txq.vsi_id != qci->vsi_id ||
+			    qpi->rxq.vsi_id != qci->vsi_id ||
+			    qpi->rxq.queue_id != vsi_queue_id) {
+				aq_ret = I40E_ERR_PARAM;
+				goto error_param;
+			}
+		}
+
+		if (!i40e_vc_isvalid_queue_id(vf, vsi_id, vsi_queue_id)) {
 			aq_ret = I40E_ERR_PARAM;
 			goto error_param;
 		}
@@ -1887,9 +2065,33 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 			aq_ret = I40E_ERR_PARAM;
 			goto error_param;
 		}
+
+		/* For ADq there can be up to 4 VSIs with max 4 queues each.
+		 * VF does not know about these additional VSIs and all
+		 * it cares is about its own queues. PF configures these queues
+		 * to its appropriate VSIs based on TC mapping
+		 **/
+		if (vf->adq_enabled) {
+			if (j == (vf->ch[idx].num_qps - 1)) {
+				idx++;
+				j = 0; /* resetting the queue count */
+				vsi_queue_id = 0;
+			} else {
+				j++;
+				vsi_queue_id++;
+			}
+			vsi_id = vf->ch[idx].vsi_id;
+		}
 	}
 	/* set vsi num_queue_pairs in use to num configured by VF */
-	pf->vsi[vf->lan_vsi_idx]->num_queue_pairs = qci->num_queue_pairs;
+	if (!vf->adq_enabled) {
+		pf->vsi[vf->lan_vsi_idx]->num_queue_pairs =
+			qci->num_queue_pairs;
+	} else {
+		for (i = 0; i < vf->num_tc; i++)
+			pf->vsi[vf->ch[i].vsi_idx]->num_queue_pairs =
+			       vf->ch[i].num_qps;
+	}
 
 error_param:
 	/* send the response to the VF */
@@ -1898,6 +2100,33 @@ error_param:
 }
 
 /**
+ * i40e_validate_queue_map
+ * @vsi_id: vsi id
+ * @queuemap: Tx or Rx queue map
+ *
+ * check if Tx or Rx queue map is valid
+ **/
+static int i40e_validate_queue_map(struct i40e_vf *vf, u16 vsi_id,
+				   unsigned long queuemap)
+{
+	u16 vsi_queue_id, queue_id;
+
+	for_each_set_bit(vsi_queue_id, &queuemap, I40E_MAX_VSI_QP) {
+		if (vf->adq_enabled) {
+			vsi_id = vf->ch[vsi_queue_id / I40E_MAX_VF_VSI].vsi_id;
+			queue_id = (vsi_queue_id % I40E_DEFAULT_QUEUES_PER_VF);
+		} else {
+			queue_id = vsi_queue_id;
+		}
+
+		if (!i40e_vc_isvalid_queue_id(vf, vsi_id, queue_id))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
  * i40e_vc_config_irq_map_msg
  * @vf: pointer to the VF info
  * @msg: pointer to the msg buffer
@@ -1911,9 +2140,8 @@ static int i40e_vc_config_irq_map_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 	struct virtchnl_irq_map_info *irqmap_info =
 	    (struct virtchnl_irq_map_info *)msg;
 	struct virtchnl_vector_map *map;
-	u16 vsi_id, vsi_queue_id, vector_id;
+	u16 vsi_id, vector_id;
 	i40e_status aq_ret = 0;
-	unsigned long tempmap;
 	int i;
 
 	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
@@ -1923,7 +2151,6 @@ static int i40e_vc_config_irq_map_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 
 	for (i = 0; i < irqmap_info->num_vectors; i++) {
 		map = &irqmap_info->vecmap[i];
-
 		vector_id = map->vector_id;
 		vsi_id = map->vsi_id;
 		/* validate msg params */
@@ -1933,23 +2160,14 @@ static int i40e_vc_config_irq_map_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 			goto error_param;
 		}
 
-		/* lookout for the invalid queue index */
-		tempmap = map->rxq_map;
-		for_each_set_bit(vsi_queue_id, &tempmap, I40E_MAX_VSI_QP) {
-			if (!i40e_vc_isvalid_queue_id(vf, vsi_id,
-						      vsi_queue_id)) {
-				aq_ret = I40E_ERR_PARAM;
-				goto error_param;
-			}
+		if (i40e_validate_queue_map(vf, vsi_id, map->rxq_map)) {
+			aq_ret = I40E_ERR_PARAM;
+			goto error_param;
 		}
 
-		tempmap = map->txq_map;
-		for_each_set_bit(vsi_queue_id, &tempmap, I40E_MAX_VSI_QP) {
-			if (!i40e_vc_isvalid_queue_id(vf, vsi_id,
-						      vsi_queue_id)) {
-				aq_ret = I40E_ERR_PARAM;
-				goto error_param;
-			}
+		if (i40e_validate_queue_map(vf, vsi_id, map->txq_map)) {
+			aq_ret = I40E_ERR_PARAM;
+			goto error_param;
 		}
 
 		i40e_config_irq_link_list(vf, vsi_id, map);
@@ -1975,6 +2193,7 @@ static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 	struct i40e_pf *pf = vf->pf;
 	u16 vsi_id = vqs->vsi_id;
 	i40e_status aq_ret = 0;
+	int i;
 
 	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
 		aq_ret = I40E_ERR_PARAM;
@@ -1993,6 +2212,16 @@ static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 
 	if (i40e_vsi_start_rings(pf->vsi[vf->lan_vsi_idx]))
 		aq_ret = I40E_ERR_TIMEOUT;
+
+	/* need to start the rings for additional ADq VSI's as well */
+	if (vf->adq_enabled) {
+		/* zero belongs to LAN VSI */
+		for (i = 1; i < vf->num_tc; i++) {
+			if (i40e_vsi_start_rings(pf->vsi[vf->ch[i].vsi_idx]))
+				aq_ret = I40E_ERR_TIMEOUT;
+		}
+	}
+
 error_param:
 	/* send the response to the VF */
 	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ENABLE_QUEUES,
@@ -2139,25 +2368,47 @@ error_param:
 /**
  * i40e_check_vf_permission
  * @vf: pointer to the VF info
- * @macaddr: pointer to the MAC Address being checked
+ * @al: MAC address list from virtchnl
  *
- * Check if the VF has permission to add or delete unicast MAC address
- * filters and return error code -EPERM if not.  Then check if the
- * address filter requested is broadcast or zero and if so return
- * an invalid MAC address error code.
+ * Check that the given list of MAC addresses is allowed. Will return -EPERM
+ * if any address in the list is not valid. Checks the following conditions:
+ *
+ * 1) broadcast and zero addresses are never valid
+ * 2) unicast addresses are not allowed if the VMM has administratively set
+ *    the VF MAC address, unless the VF is marked as privileged.
+ * 3) There is enough space to add all the addresses.
+ *
+ * Note that to guarantee consistency, it is expected this function be called
+ * while holding the mac_filter_hash_lock, as otherwise the current number of
+ * addresses might not be accurate.
  **/
-static inline int i40e_check_vf_permission(struct i40e_vf *vf, u8 *macaddr)
+static inline int i40e_check_vf_permission(struct i40e_vf *vf,
+					   struct virtchnl_ether_addr_list *al)
 {
 	struct i40e_pf *pf = vf->pf;
-	int ret = 0;
+	int i;
+
+	/* If this VF is not privileged, then we can't add more than a limited
+	 * number of addresses. Check to make sure that the additions do not
+	 * push us over the limit.
+	 */
+	if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps) &&
+	    (vf->num_mac + al->num_elements) > I40E_VC_MAX_MAC_ADDR_PER_VF) {
+		dev_err(&pf->pdev->dev,
+			"Cannot add more MAC addresses, VF is not trusted, switch the VF to trusted to add more functionality\n");
+		return -EPERM;
+	}
+
+	for (i = 0; i < al->num_elements; i++) {
+		u8 *addr = al->list[i].addr;
+
+		if (is_broadcast_ether_addr(addr) ||
+		    is_zero_ether_addr(addr)) {
+			dev_err(&pf->pdev->dev, "invalid VF MAC addr %pM\n",
+				addr);
+			return I40E_ERR_INVALID_MAC_ADDR;
+		}
 
-	if (is_broadcast_ether_addr(macaddr) ||
-		   is_zero_ether_addr(macaddr)) {
-		dev_err(&pf->pdev->dev, "invalid VF MAC addr %pM\n", macaddr);
-		ret = I40E_ERR_INVALID_MAC_ADDR;
-	} else if (vf->pf_set_mac && !is_multicast_ether_addr(macaddr) &&
-		   !test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps) &&
-		   !ether_addr_equal(macaddr, vf->default_lan_addr.addr)) {
 		/* If the host VMM administrator has set the VF MAC address
 		 * administratively via the ndo_set_vf_mac command then deny
 		 * permission to the VF to add or delete unicast MAC addresses.
@@ -2165,16 +2416,16 @@ static inline int i40e_check_vf_permission(struct i40e_vf *vf, u8 *macaddr)
 		 * The VF may request to set the MAC address filter already
 		 * assigned to it so do not return an error in that case.
 		 */
-		dev_err(&pf->pdev->dev,
-			"VF attempting to override administratively set MAC address, reload the VF driver to resume normal operation\n");
-		ret = -EPERM;
-	} else if ((vf->num_mac >= I40E_VC_MAX_MAC_ADDR_PER_VF) &&
-		   !test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps)) {
-		dev_err(&pf->pdev->dev,
-			"VF is not trusted, switch the VF to trusted to add more functionality\n");
-		ret = -EPERM;
+		if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps) &&
+		    !is_multicast_ether_addr(addr) && vf->pf_set_mac &&
+		    !ether_addr_equal(addr, vf->default_lan_addr.addr)) {
+			dev_err(&pf->pdev->dev,
+				"VF attempting to override administratively set MAC address, reload the VF driver to resume normal operation\n");
+			return -EPERM;
+		}
 	}
-	return ret;
+
+	return 0;
 }
 
 /**
@@ -2201,11 +2452,6 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 		goto error_param;
 	}
 
-	for (i = 0; i < al->num_elements; i++) {
-		ret = i40e_check_vf_permission(vf, al->list[i].addr);
-		if (ret)
-			goto error_param;
-	}
 	vsi = pf->vsi[vf->lan_vsi_idx];
 
 	/* Lock once, because all function inside for loop accesses VSI's
@@ -2213,6 +2459,12 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 	 */
 	spin_lock_bh(&vsi->mac_filter_hash_lock);
 
+	ret = i40e_check_vf_permission(vf, al);
+	if (ret) {
+		spin_unlock_bh(&vsi->mac_filter_hash_lock);
+		goto error_param;
+	}
+
 	/* add new addresses to the list */
 	for (i = 0; i < al->num_elements; i++) {
 		struct i40e_mac_filter *f;
@@ -2688,6 +2940,618 @@ err:
 }
 
 /**
+ * i40e_validate_cloud_filter
+ * @mask: mask for TC filter
+ * @data: data for TC filter
+ *
+ * This function validates cloud filter programmed as TC filter for ADq
+ **/
+static int i40e_validate_cloud_filter(struct i40e_vf *vf,
+				      struct virtchnl_filter *tc_filter)
+{
+	struct virtchnl_l4_spec mask = tc_filter->mask.tcp_spec;
+	struct virtchnl_l4_spec data = tc_filter->data.tcp_spec;
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi = NULL;
+	struct i40e_mac_filter *f;
+	struct hlist_node *h;
+	bool found = false;
+	int bkt;
+
+	if (!tc_filter->action) {
+		dev_info(&pf->pdev->dev,
+			 "VF %d: Currently ADq doesn't support Drop Action\n",
+			 vf->vf_id);
+		goto err;
+	}
+
+	/* action_meta is TC number here to which the filter is applied */
+	if (!tc_filter->action_meta ||
+	    tc_filter->action_meta > I40E_MAX_VF_VSI) {
+		dev_info(&pf->pdev->dev, "VF %d: Invalid TC number %u\n",
+			 vf->vf_id, tc_filter->action_meta);
+		goto err;
+	}
+
+	/* Check filter if it's programmed for advanced mode or basic mode.
+	 * There are two ADq modes (for VF only),
+	 * 1. Basic mode: intended to allow as many filter options as possible
+	 *		  to be added to a VF in Non-trusted mode. Main goal is
+	 *		  to add filters to its own MAC and VLAN id.
+	 * 2. Advanced mode: is for allowing filters to be applied other than
+	 *		  its own MAC or VLAN. This mode requires the VF to be
+	 *		  Trusted.
+	 */
+	if (mask.dst_mac[0] && !mask.dst_ip[0]) {
+		vsi = pf->vsi[vf->lan_vsi_idx];
+		f = i40e_find_mac(vsi, data.dst_mac);
+
+		if (!f) {
+			dev_info(&pf->pdev->dev,
+				 "Destination MAC %pM doesn't belong to VF %d\n",
+				 data.dst_mac, vf->vf_id);
+			goto err;
+		}
+
+		if (mask.vlan_id) {
+			hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f,
+					   hlist) {
+				if (f->vlan == ntohs(data.vlan_id)) {
+					found = true;
+					break;
+				}
+			}
+			if (!found) {
+				dev_info(&pf->pdev->dev,
+					 "VF %d doesn't have any VLAN id %u\n",
+					 vf->vf_id, ntohs(data.vlan_id));
+				goto err;
+			}
+		}
+	} else {
+		/* Check if VF is trusted */
+		if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps)) {
+			dev_err(&pf->pdev->dev,
+				"VF %d not trusted, make VF trusted to add advanced mode ADq cloud filters\n",
+				vf->vf_id);
+			return I40E_ERR_CONFIG;
+		}
+	}
+
+	if (mask.dst_mac[0] & data.dst_mac[0]) {
+		if (is_broadcast_ether_addr(data.dst_mac) ||
+		    is_zero_ether_addr(data.dst_mac)) {
+			dev_info(&pf->pdev->dev, "VF %d: Invalid Dest MAC addr %pM\n",
+				 vf->vf_id, data.dst_mac);
+			goto err;
+		}
+	}
+
+	if (mask.src_mac[0] & data.src_mac[0]) {
+		if (is_broadcast_ether_addr(data.src_mac) ||
+		    is_zero_ether_addr(data.src_mac)) {
+			dev_info(&pf->pdev->dev, "VF %d: Invalid Source MAC addr %pM\n",
+				 vf->vf_id, data.src_mac);
+			goto err;
+		}
+	}
+
+	if (mask.dst_port & data.dst_port) {
+		if (!data.dst_port || be16_to_cpu(data.dst_port) > 0xFFFF) {
+			dev_info(&pf->pdev->dev, "VF %d: Invalid Dest port\n",
+				 vf->vf_id);
+			goto err;
+		}
+	}
+
+	if (mask.src_port & data.src_port) {
+		if (!data.src_port || be16_to_cpu(data.src_port) > 0xFFFF) {
+			dev_info(&pf->pdev->dev, "VF %d: Invalid Source port\n",
+				 vf->vf_id);
+			goto err;
+		}
+	}
+
+	if (tc_filter->flow_type != VIRTCHNL_TCP_V6_FLOW &&
+	    tc_filter->flow_type != VIRTCHNL_TCP_V4_FLOW) {
+		dev_info(&pf->pdev->dev, "VF %d: Invalid Flow type\n",
+			 vf->vf_id);
+		goto err;
+	}
+
+	if (mask.vlan_id & data.vlan_id) {
+		if (ntohs(data.vlan_id) > I40E_MAX_VLANID) {
+			dev_info(&pf->pdev->dev, "VF %d: invalid VLAN ID\n",
+				 vf->vf_id);
+			goto err;
+		}
+	}
+
+	return I40E_SUCCESS;
+err:
+	return I40E_ERR_CONFIG;
+}
+
+/**
+ * i40e_find_vsi_from_seid - searches for the vsi with the given seid
+ * @vf: pointer to the VF info
+ * @seid - seid of the vsi it is searching for
+ **/
+static struct i40e_vsi *i40e_find_vsi_from_seid(struct i40e_vf *vf, u16 seid)
+{
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi = NULL;
+	int i;
+
+	for (i = 0; i < vf->num_tc ; i++) {
+		vsi = i40e_find_vsi_from_id(pf, vf->ch[i].vsi_id);
+		if (vsi && vsi->seid == seid)
+			return vsi;
+	}
+	return NULL;
+}
+
+/**
+ * i40e_del_all_cloud_filters
+ * @vf: pointer to the VF info
+ *
+ * This function deletes all cloud filters
+ **/
+static void i40e_del_all_cloud_filters(struct i40e_vf *vf)
+{
+	struct i40e_cloud_filter *cfilter = NULL;
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi = NULL;
+	struct hlist_node *node;
+	int ret;
+
+	hlist_for_each_entry_safe(cfilter, node,
+				  &vf->cloud_filter_list, cloud_node) {
+		vsi = i40e_find_vsi_from_seid(vf, cfilter->seid);
+
+		if (!vsi) {
+			dev_err(&pf->pdev->dev, "VF %d: no VSI found for matching %u seid, can't delete cloud filter\n",
+				vf->vf_id, cfilter->seid);
+			continue;
+		}
+
+		if (cfilter->dst_port)
+			ret = i40e_add_del_cloud_filter_big_buf(vsi, cfilter,
+								false);
+		else
+			ret = i40e_add_del_cloud_filter(vsi, cfilter, false);
+		if (ret)
+			dev_err(&pf->pdev->dev,
+				"VF %d: Failed to delete cloud filter, err %s aq_err %s\n",
+				vf->vf_id, i40e_stat_str(&pf->hw, ret),
+				i40e_aq_str(&pf->hw,
+					    pf->hw.aq.asq_last_status));
+
+		hlist_del(&cfilter->cloud_node);
+		kfree(cfilter);
+		vf->num_cloud_filters--;
+	}
+}
+
+/**
+ * i40e_vc_del_cloud_filter
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * This function deletes a cloud filter programmed as TC filter for ADq
+ **/
+static int i40e_vc_del_cloud_filter(struct i40e_vf *vf, u8 *msg)
+{
+	struct virtchnl_filter *vcf = (struct virtchnl_filter *)msg;
+	struct virtchnl_l4_spec mask = vcf->mask.tcp_spec;
+	struct virtchnl_l4_spec tcf = vcf->data.tcp_spec;
+	struct i40e_cloud_filter cfilter, *cf = NULL;
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi = NULL;
+	struct hlist_node *node;
+	i40e_status aq_ret = 0;
+	int i, ret;
+
+	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
+		aq_ret = I40E_ERR_PARAM;
+		goto err;
+	}
+
+	if (!vf->adq_enabled) {
+		dev_info(&pf->pdev->dev,
+			 "VF %d: ADq not enabled, can't apply cloud filter\n",
+			 vf->vf_id);
+		aq_ret = I40E_ERR_PARAM;
+		goto err;
+	}
+
+	if (i40e_validate_cloud_filter(vf, vcf)) {
+		dev_info(&pf->pdev->dev,
+			 "VF %d: Invalid input, can't apply cloud filter\n",
+			 vf->vf_id);
+		aq_ret = I40E_ERR_PARAM;
+		goto err;
+	}
+
+	memset(&cfilter, 0, sizeof(cfilter));
+	/* parse destination mac address */
+	for (i = 0; i < ETH_ALEN; i++)
+		cfilter.dst_mac[i] = mask.dst_mac[i] & tcf.dst_mac[i];
+
+	/* parse source mac address */
+	for (i = 0; i < ETH_ALEN; i++)
+		cfilter.src_mac[i] = mask.src_mac[i] & tcf.src_mac[i];
+
+	cfilter.vlan_id = mask.vlan_id & tcf.vlan_id;
+	cfilter.dst_port = mask.dst_port & tcf.dst_port;
+	cfilter.src_port = mask.src_port & tcf.src_port;
+
+	switch (vcf->flow_type) {
+	case VIRTCHNL_TCP_V4_FLOW:
+		cfilter.n_proto = ETH_P_IP;
+		if (mask.dst_ip[0] & tcf.dst_ip[0])
+			memcpy(&cfilter.ip.v4.dst_ip, tcf.dst_ip,
+			       ARRAY_SIZE(tcf.dst_ip));
+		else if (mask.src_ip[0] & tcf.dst_ip[0])
+			memcpy(&cfilter.ip.v4.src_ip, tcf.src_ip,
+			       ARRAY_SIZE(tcf.dst_ip));
+		break;
+	case VIRTCHNL_TCP_V6_FLOW:
+		cfilter.n_proto = ETH_P_IPV6;
+		if (mask.dst_ip[3] & tcf.dst_ip[3])
+			memcpy(&cfilter.ip.v6.dst_ip6, tcf.dst_ip,
+			       sizeof(cfilter.ip.v6.dst_ip6));
+		if (mask.src_ip[3] & tcf.src_ip[3])
+			memcpy(&cfilter.ip.v6.src_ip6, tcf.src_ip,
+			       sizeof(cfilter.ip.v6.src_ip6));
+		break;
+	default:
+		/* TC filter can be configured based on different combinations
+		 * and in this case IP is not a part of filter config
+		 */
+		dev_info(&pf->pdev->dev, "VF %d: Flow type not configured\n",
+			 vf->vf_id);
+	}
+
+	/* get the vsi to which the tc belongs to */
+	vsi = pf->vsi[vf->ch[vcf->action_meta].vsi_idx];
+	cfilter.seid = vsi->seid;
+	cfilter.flags = vcf->field_flags;
+
+	/* Deleting TC filter */
+	if (tcf.dst_port)
+		ret = i40e_add_del_cloud_filter_big_buf(vsi, &cfilter, false);
+	else
+		ret = i40e_add_del_cloud_filter(vsi, &cfilter, false);
+	if (ret) {
+		dev_err(&pf->pdev->dev,
+			"VF %d: Failed to delete cloud filter, err %s aq_err %s\n",
+			vf->vf_id, i40e_stat_str(&pf->hw, ret),
+			i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+		goto err;
+	}
+
+	hlist_for_each_entry_safe(cf, node,
+				  &vf->cloud_filter_list, cloud_node) {
+		if (cf->seid != cfilter.seid)
+			continue;
+		if (mask.dst_port)
+			if (cfilter.dst_port != cf->dst_port)
+				continue;
+		if (mask.dst_mac[0])
+			if (!ether_addr_equal(cf->src_mac, cfilter.src_mac))
+				continue;
+		/* for ipv4 data to be valid, only first byte of mask is set */
+		if (cfilter.n_proto == ETH_P_IP && mask.dst_ip[0])
+			if (memcmp(&cfilter.ip.v4.dst_ip, &cf->ip.v4.dst_ip,
+				   ARRAY_SIZE(tcf.dst_ip)))
+				continue;
+		/* for ipv6, mask is set for all sixteen bytes (4 words) */
+		if (cfilter.n_proto == ETH_P_IPV6 && mask.dst_ip[3])
+			if (memcmp(&cfilter.ip.v6.dst_ip6, &cf->ip.v6.dst_ip6,
+				   sizeof(cfilter.ip.v6.src_ip6)))
+				continue;
+		if (mask.vlan_id)
+			if (cfilter.vlan_id != cf->vlan_id)
+				continue;
+
+		hlist_del(&cf->cloud_node);
+		kfree(cf);
+		vf->num_cloud_filters--;
+	}
+
+err:
+	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_DEL_CLOUD_FILTER,
+				       aq_ret);
+}
+
+/**
+ * i40e_vc_add_cloud_filter
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * This function adds a cloud filter programmed as TC filter for ADq
+ **/
+static int i40e_vc_add_cloud_filter(struct i40e_vf *vf, u8 *msg)
+{
+	struct virtchnl_filter *vcf = (struct virtchnl_filter *)msg;
+	struct virtchnl_l4_spec mask = vcf->mask.tcp_spec;
+	struct virtchnl_l4_spec tcf = vcf->data.tcp_spec;
+	struct i40e_cloud_filter *cfilter = NULL;
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi = NULL;
+	i40e_status aq_ret = 0;
+	int i, ret;
+
+	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
+		aq_ret = I40E_ERR_PARAM;
+		goto err;
+	}
+
+	if (!vf->adq_enabled) {
+		dev_info(&pf->pdev->dev,
+			 "VF %d: ADq is not enabled, can't apply cloud filter\n",
+			 vf->vf_id);
+		aq_ret = I40E_ERR_PARAM;
+		goto err;
+	}
+
+	if (i40e_validate_cloud_filter(vf, vcf)) {
+		dev_info(&pf->pdev->dev,
+			 "VF %d: Invalid input/s, can't apply cloud filter\n",
+			 vf->vf_id);
+			aq_ret = I40E_ERR_PARAM;
+			goto err;
+	}
+
+	cfilter = kzalloc(sizeof(*cfilter), GFP_KERNEL);
+	if (!cfilter)
+		return -ENOMEM;
+
+	/* parse destination mac address */
+	for (i = 0; i < ETH_ALEN; i++)
+		cfilter->dst_mac[i] = mask.dst_mac[i] & tcf.dst_mac[i];
+
+	/* parse source mac address */
+	for (i = 0; i < ETH_ALEN; i++)
+		cfilter->src_mac[i] = mask.src_mac[i] & tcf.src_mac[i];
+
+	cfilter->vlan_id = mask.vlan_id & tcf.vlan_id;
+	cfilter->dst_port = mask.dst_port & tcf.dst_port;
+	cfilter->src_port = mask.src_port & tcf.src_port;
+
+	switch (vcf->flow_type) {
+	case VIRTCHNL_TCP_V4_FLOW:
+		cfilter->n_proto = ETH_P_IP;
+		if (mask.dst_ip[0] & tcf.dst_ip[0])
+			memcpy(&cfilter->ip.v4.dst_ip, tcf.dst_ip,
+			       ARRAY_SIZE(tcf.dst_ip));
+		else if (mask.src_ip[0] & tcf.dst_ip[0])
+			memcpy(&cfilter->ip.v4.src_ip, tcf.src_ip,
+			       ARRAY_SIZE(tcf.dst_ip));
+		break;
+	case VIRTCHNL_TCP_V6_FLOW:
+		cfilter->n_proto = ETH_P_IPV6;
+		if (mask.dst_ip[3] & tcf.dst_ip[3])
+			memcpy(&cfilter->ip.v6.dst_ip6, tcf.dst_ip,
+			       sizeof(cfilter->ip.v6.dst_ip6));
+		if (mask.src_ip[3] & tcf.src_ip[3])
+			memcpy(&cfilter->ip.v6.src_ip6, tcf.src_ip,
+			       sizeof(cfilter->ip.v6.src_ip6));
+		break;
+	default:
+		/* TC filter can be configured based on different combinations
+		 * and in this case IP is not a part of filter config
+		 */
+		dev_info(&pf->pdev->dev, "VF %d: Flow type not configured\n",
+			 vf->vf_id);
+	}
+
+	/* get the VSI to which the TC belongs to */
+	vsi = pf->vsi[vf->ch[vcf->action_meta].vsi_idx];
+	cfilter->seid = vsi->seid;
+	cfilter->flags = vcf->field_flags;
+
+	/* Adding cloud filter programmed as TC filter */
+	if (tcf.dst_port)
+		ret = i40e_add_del_cloud_filter_big_buf(vsi, cfilter, true);
+	else
+		ret = i40e_add_del_cloud_filter(vsi, cfilter, true);
+	if (ret) {
+		dev_err(&pf->pdev->dev,
+			"VF %d: Failed to add cloud filter, err %s aq_err %s\n",
+			vf->vf_id, i40e_stat_str(&pf->hw, ret),
+			i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+		goto err;
+	}
+
+	INIT_HLIST_NODE(&cfilter->cloud_node);
+	hlist_add_head(&cfilter->cloud_node, &vf->cloud_filter_list);
+	vf->num_cloud_filters++;
+err:
+	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ADD_CLOUD_FILTER,
+				       aq_ret);
+}
+
+/**
+ * i40e_vc_add_qch_msg: Add queue channel and enable ADq
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ **/
+static int i40e_vc_add_qch_msg(struct i40e_vf *vf, u8 *msg)
+{
+	struct virtchnl_tc_info *tci =
+		(struct virtchnl_tc_info *)msg;
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_link_status *ls = &pf->hw.phy.link_info;
+	int i, adq_request_qps = 0, speed = 0;
+	i40e_status aq_ret = 0;
+
+	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
+		aq_ret = I40E_ERR_PARAM;
+		goto err;
+	}
+
+	/* ADq cannot be applied if spoof check is ON */
+	if (vf->spoofchk) {
+		dev_err(&pf->pdev->dev,
+			"Spoof check is ON, turn it OFF to enable ADq\n");
+		aq_ret = I40E_ERR_PARAM;
+		goto err;
+	}
+
+	if (!(vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ADQ)) {
+		dev_err(&pf->pdev->dev,
+			"VF %d attempting to enable ADq, but hasn't properly negotiated that capability\n",
+			vf->vf_id);
+		aq_ret = I40E_ERR_PARAM;
+		goto err;
+	}
+
+	/* max number of traffic classes for VF currently capped at 4 */
+	if (!tci->num_tc || tci->num_tc > I40E_MAX_VF_VSI) {
+		dev_err(&pf->pdev->dev,
+			"VF %d trying to set %u TCs, valid range 1-4 TCs per VF\n",
+			vf->vf_id, tci->num_tc);
+		aq_ret = I40E_ERR_PARAM;
+		goto err;
+	}
+
+	/* validate queues for each TC */
+	for (i = 0; i < tci->num_tc; i++)
+		if (!tci->list[i].count ||
+		    tci->list[i].count > I40E_DEFAULT_QUEUES_PER_VF) {
+			dev_err(&pf->pdev->dev,
+				"VF %d: TC %d trying to set %u queues, valid range 1-4 queues per TC\n",
+				vf->vf_id, i, tci->list[i].count);
+			aq_ret = I40E_ERR_PARAM;
+			goto err;
+		}
+
+	/* need Max VF queues but already have default number of queues */
+	adq_request_qps = I40E_MAX_VF_QUEUES - I40E_DEFAULT_QUEUES_PER_VF;
+
+	if (pf->queues_left < adq_request_qps) {
+		dev_err(&pf->pdev->dev,
+			"No queues left to allocate to VF %d\n",
+			vf->vf_id);
+		aq_ret = I40E_ERR_PARAM;
+		goto err;
+	} else {
+		/* we need to allocate max VF queues to enable ADq so as to
+		 * make sure ADq enabled VF always gets back queues when it
+		 * goes through a reset.
+		 */
+		vf->num_queue_pairs = I40E_MAX_VF_QUEUES;
+	}
+
+	/* get link speed in MB to validate rate limit */
+	switch (ls->link_speed) {
+	case VIRTCHNL_LINK_SPEED_100MB:
+		speed = SPEED_100;
+		break;
+	case VIRTCHNL_LINK_SPEED_1GB:
+		speed = SPEED_1000;
+		break;
+	case VIRTCHNL_LINK_SPEED_10GB:
+		speed = SPEED_10000;
+		break;
+	case VIRTCHNL_LINK_SPEED_20GB:
+		speed = SPEED_20000;
+		break;
+	case VIRTCHNL_LINK_SPEED_25GB:
+		speed = SPEED_25000;
+		break;
+	case VIRTCHNL_LINK_SPEED_40GB:
+		speed = SPEED_40000;
+		break;
+	default:
+		dev_err(&pf->pdev->dev,
+			"Cannot detect link speed\n");
+		aq_ret = I40E_ERR_PARAM;
+		goto err;
+	}
+
+	/* parse data from the queue channel info */
+	vf->num_tc = tci->num_tc;
+	for (i = 0; i < vf->num_tc; i++) {
+		if (tci->list[i].max_tx_rate) {
+			if (tci->list[i].max_tx_rate > speed) {
+				dev_err(&pf->pdev->dev,
+					"Invalid max tx rate %llu specified for VF %d.",
+					tci->list[i].max_tx_rate,
+					vf->vf_id);
+				aq_ret = I40E_ERR_PARAM;
+				goto err;
+			} else {
+				vf->ch[i].max_tx_rate =
+					tci->list[i].max_tx_rate;
+			}
+		}
+		vf->ch[i].num_qps = tci->list[i].count;
+	}
+
+	/* set this flag only after making sure all inputs are sane */
+	vf->adq_enabled = true;
+	/* num_req_queues is set when user changes number of queues via ethtool
+	 * and this causes issue for default VSI(which depends on this variable)
+	 * when ADq is enabled, hence reset it.
+	 */
+	vf->num_req_queues = 0;
+
+	/* reset the VF in order to allocate resources */
+	i40e_vc_notify_vf_reset(vf);
+	i40e_reset_vf(vf, false);
+
+	return I40E_SUCCESS;
+
+	/* send the response to the VF */
+err:
+	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ENABLE_CHANNELS,
+				       aq_ret);
+}
+
+/**
+ * i40e_vc_del_qch_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ **/
+static int i40e_vc_del_qch_msg(struct i40e_vf *vf, u8 *msg)
+{
+	struct i40e_pf *pf = vf->pf;
+	i40e_status aq_ret = 0;
+
+	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
+		aq_ret = I40E_ERR_PARAM;
+		goto err;
+	}
+
+	if (vf->adq_enabled) {
+		i40e_del_all_cloud_filters(vf);
+		i40e_del_qch(vf);
+		vf->adq_enabled = false;
+		vf->num_tc = 0;
+		dev_info(&pf->pdev->dev,
+			 "Deleting Queue Channels and cloud filters for ADq on VF %d\n",
+			 vf->vf_id);
+	} else {
+		dev_info(&pf->pdev->dev, "VF %d trying to delete queue channels but ADq isn't enabled\n",
+			 vf->vf_id);
+		aq_ret = I40E_ERR_PARAM;
+	}
+
+	/* reset the VF in order to allocate resources */
+	i40e_vc_notify_vf_reset(vf);
+	i40e_reset_vf(vf, false);
+
+	return I40E_SUCCESS;
+
+err:
+	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_DISABLE_CHANNELS,
+				       aq_ret);
+}
+
+/**
  * i40e_vc_process_vf_msg
  * @pf: pointer to the PF structure
  * @vf_id: source VF id
@@ -2816,7 +3680,18 @@ int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode,
 	case VIRTCHNL_OP_REQUEST_QUEUES:
 		ret = i40e_vc_request_queues_msg(vf, msg, msglen);
 		break;
-
+	case VIRTCHNL_OP_ENABLE_CHANNELS:
+		ret = i40e_vc_add_qch_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_DISABLE_CHANNELS:
+		ret = i40e_vc_del_qch_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_ADD_CLOUD_FILTER:
+		ret = i40e_vc_add_cloud_filter(vf, msg);
+		break;
+	case VIRTCHNL_OP_DEL_CLOUD_FILTER:
+		ret = i40e_vc_del_cloud_filter(vf, msg);
+		break;
 	case VIRTCHNL_OP_UNKNOWN:
 	default:
 		dev_err(&pf->pdev->dev, "Unsupported opcode %d from VF %d\n",
@@ -2889,6 +3764,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
 	int ret = 0;
 	struct hlist_node *h;
 	int bkt;
+	u8 i;
 
 	/* validate the request */
 	if (vf_id >= pf->num_alloc_vfs) {
@@ -2900,6 +3776,16 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
 
 	vf = &(pf->vf[vf_id]);
 	vsi = pf->vsi[vf->lan_vsi_idx];
+
+	/* When the VF is resetting wait until it is done.
+	 * It can take up to 200 milliseconds,
+	 * but wait for up to 300 milliseconds to be safe.
+	 */
+	for (i = 0; i < 15; i++) {
+		if (test_bit(I40E_VF_STATE_INIT, &vf->vf_states))
+			break;
+		msleep(20);
+	}
 	if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) {
 		dev_err(&pf->pdev->dev, "VF %d still in reset. Try again.\n",
 			vf_id);
@@ -3382,6 +4268,16 @@ int i40e_ndo_set_vf_trust(struct net_device *netdev, int vf_id, bool setting)
 	i40e_vc_disable_vf(vf);
 	dev_info(&pf->pdev->dev, "VF %u is now %strusted\n",
 		 vf_id, setting ? "" : "un");
+
+	if (vf->adq_enabled) {
+		if (!vf->trusted) {
+			dev_info(&pf->pdev->dev,
+				 "VF %u no longer Trusted, deleting all cloud filters\n",
+				 vf_id);
+			i40e_del_all_cloud_filters(vf);
+		}
+	}
+
 out:
 	return ret;
 }
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
index 5efc4f92bb37..6852599b2379 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
@@ -69,6 +69,19 @@ enum i40e_vf_capabilities {
 	I40E_VIRTCHNL_VF_CAP_IWARP,
 };
 
+/* In ADq, max 4 VSI's can be allocated per VF including primary VF VSI.
+ * These variables are used to store indices, id's and number of queues
+ * for each VSI including that of primary VF VSI. Each Traffic class is
+ * termed as channel and each channel can in-turn have 4 queues which
+ * means max 16 queues overall per VF.
+ */
+struct i40evf_channel {
+	u16 vsi_idx; /* index in PF struct for all channel VSIs */
+	u16 vsi_id; /* VSI ID used by firmware */
+	u16 num_qps; /* number of queue pairs requested by user */
+	u64 max_tx_rate; /* bandwidth rate allocation for VSIs */
+};
+
 /* VF information structure */
 struct i40e_vf {
 	struct i40e_pf *pf;
@@ -111,6 +124,13 @@ struct i40e_vf {
 	u16 num_mac;
 	u16 num_vlan;
 
+	/* ADq related variables */
+	bool adq_enabled; /* flag to enable adq */
+	u8 num_tc;
+	struct i40evf_channel ch[I40E_MAX_VF_VSI];
+	struct hlist_head cloud_filter_list;
+	u16 num_cloud_filters;
+
 	/* RDMA Client */
 	struct virtchnl_iwarp_qvlist_info *qvlist_info;
 };
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index 357d6051281f..e088d23eb083 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -196,7 +196,7 @@ void i40evf_detect_recover_hung(struct i40e_vsi *vsi)
 			 */
 			smp_rmb();
 			tx_ring->tx_stats.prev_pkt_ctr =
-			  i40evf_get_tx_pending(tx_ring, false) ? packets : -1;
+			  i40evf_get_tx_pending(tx_ring, true) ? packets : -1;
 		}
 	}
 }
@@ -392,99 +392,241 @@ void i40evf_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
 	     val);
 }
 
+static inline bool i40e_container_is_rx(struct i40e_q_vector *q_vector,
+					struct i40e_ring_container *rc)
+{
+	return &q_vector->rx == rc;
+}
+
+static inline unsigned int i40e_itr_divisor(struct i40e_q_vector *q_vector)
+{
+	unsigned int divisor;
+
+	switch (q_vector->adapter->link_speed) {
+	case I40E_LINK_SPEED_40GB:
+		divisor = I40E_ITR_ADAPTIVE_MIN_INC * 1024;
+		break;
+	case I40E_LINK_SPEED_25GB:
+	case I40E_LINK_SPEED_20GB:
+		divisor = I40E_ITR_ADAPTIVE_MIN_INC * 512;
+		break;
+	default:
+	case I40E_LINK_SPEED_10GB:
+		divisor = I40E_ITR_ADAPTIVE_MIN_INC * 256;
+		break;
+	case I40E_LINK_SPEED_1GB:
+	case I40E_LINK_SPEED_100MB:
+		divisor = I40E_ITR_ADAPTIVE_MIN_INC * 32;
+		break;
+	}
+
+	return divisor;
+}
+
 /**
- * i40e_set_new_dynamic_itr - Find new ITR level
+ * i40e_update_itr - update the dynamic ITR value based on statistics
+ * @q_vector: structure containing interrupt and ring information
  * @rc: structure containing ring performance data
  *
- * Returns true if ITR changed, false if not
- *
- * Stores a new ITR value based on packets and byte counts during
- * the last interrupt.  The advantage of per interrupt computation
- * is faster updates and more accurate ITR for the current traffic
- * pattern.  Constants in this function were computed based on
- * theoretical maximum wire speed and thresholds were set based on
- * testing data as well as attempting to minimize response time
+ * Stores a new ITR value based on packets and byte
+ * counts during the last interrupt.  The advantage of per interrupt
+ * computation is faster updates and more accurate ITR for the current
+ * traffic pattern.  Constants in this function were computed
+ * based on theoretical maximum wire speed and thresholds were set based
+ * on testing data as well as attempting to minimize response time
  * while increasing bulk throughput.
  **/
-static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
+static void i40e_update_itr(struct i40e_q_vector *q_vector,
+			    struct i40e_ring_container *rc)
 {
-	enum i40e_latency_range new_latency_range = rc->latency_range;
-	u32 new_itr = rc->itr;
-	int bytes_per_usec;
-	unsigned int usecs, estimated_usecs;
+	unsigned int avg_wire_size, packets, bytes, itr;
+	unsigned long next_update = jiffies;
 
-	if (rc->total_packets == 0 || !rc->itr)
-		return false;
+	/* If we don't have any rings just leave ourselves set for maximum
+	 * possible latency so we take ourselves out of the equation.
+	 */
+	if (!rc->ring || !ITR_IS_DYNAMIC(rc->ring->itr_setting))
+		return;
+
+	/* For Rx we want to push the delay up and default to low latency.
+	 * for Tx we want to pull the delay down and default to high latency.
+	 */
+	itr = i40e_container_is_rx(q_vector, rc) ?
+	      I40E_ITR_ADAPTIVE_MIN_USECS | I40E_ITR_ADAPTIVE_LATENCY :
+	      I40E_ITR_ADAPTIVE_MAX_USECS | I40E_ITR_ADAPTIVE_LATENCY;
+
+	/* If we didn't update within up to 1 - 2 jiffies we can assume
+	 * that either packets are coming in so slow there hasn't been
+	 * any work, or that there is so much work that NAPI is dealing
+	 * with interrupt moderation and we don't need to do anything.
+	 */
+	if (time_after(next_update, rc->next_update))
+		goto clear_counts;
+
+	/* If itr_countdown is set it means we programmed an ITR within
+	 * the last 4 interrupt cycles. This has a side effect of us
+	 * potentially firing an early interrupt. In order to work around
+	 * this we need to throw out any data received for a few
+	 * interrupts following the update.
+	 */
+	if (q_vector->itr_countdown) {
+		itr = rc->target_itr;
+		goto clear_counts;
+	}
+
+	packets = rc->total_packets;
+	bytes = rc->total_bytes;
 
-	usecs = (rc->itr << 1) * ITR_COUNTDOWN_START;
-	bytes_per_usec = rc->total_bytes / usecs;
+	if (i40e_container_is_rx(q_vector, rc)) {
+		/* If Rx there are 1 to 4 packets and bytes are less than
+		 * 9000 assume insufficient data to use bulk rate limiting
+		 * approach unless Tx is already in bulk rate limiting. We
+		 * are likely latency driven.
+		 */
+		if (packets && packets < 4 && bytes < 9000 &&
+		    (q_vector->tx.target_itr & I40E_ITR_ADAPTIVE_LATENCY)) {
+			itr = I40E_ITR_ADAPTIVE_LATENCY;
+			goto adjust_by_size;
+		}
+	} else if (packets < 4) {
+		/* If we have Tx and Rx ITR maxed and Tx ITR is running in
+		 * bulk mode and we are receiving 4 or fewer packets just
+		 * reset the ITR_ADAPTIVE_LATENCY bit for latency mode so
+		 * that the Rx can relax.
+		 */
+		if (rc->target_itr == I40E_ITR_ADAPTIVE_MAX_USECS &&
+		    (q_vector->rx.target_itr & I40E_ITR_MASK) ==
+		     I40E_ITR_ADAPTIVE_MAX_USECS)
+			goto clear_counts;
+	} else if (packets > 32) {
+		/* If we have processed over 32 packets in a single interrupt
+		 * for Tx assume we need to switch over to "bulk" mode.
+		 */
+		rc->target_itr &= ~I40E_ITR_ADAPTIVE_LATENCY;
+	}
 
-	/* The calculations in this algorithm depend on interrupts actually
-	 * firing at the ITR rate. This may not happen if the packet rate is
-	 * really low, or if we've been napi polling. Check to make sure
-	 * that's not the case before we continue.
+	/* We have no packets to actually measure against. This means
+	 * either one of the other queues on this vector is active or
+	 * we are a Tx queue doing TSO with too high of an interrupt rate.
+	 *
+	 * Between 4 and 56 we can assume that our current interrupt delay
+	 * is only slightly too low. As such we should increase it by a small
+	 * fixed amount.
 	 */
-	estimated_usecs = jiffies_to_usecs(jiffies - rc->last_itr_update);
-	if (estimated_usecs > usecs) {
-		new_latency_range = I40E_LOW_LATENCY;
-		goto reset_latency;
+	if (packets < 56) {
+		itr = rc->target_itr + I40E_ITR_ADAPTIVE_MIN_INC;
+		if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) {
+			itr &= I40E_ITR_ADAPTIVE_LATENCY;
+			itr += I40E_ITR_ADAPTIVE_MAX_USECS;
+		}
+		goto clear_counts;
+	}
+
+	if (packets <= 256) {
+		itr = min(q_vector->tx.current_itr, q_vector->rx.current_itr);
+		itr &= I40E_ITR_MASK;
+
+		/* Between 56 and 112 is our "goldilocks" zone where we are
+		 * working out "just right". Just report that our current
+		 * ITR is good for us.
+		 */
+		if (packets <= 112)
+			goto clear_counts;
+
+		/* If packet count is 128 or greater we are likely looking
+		 * at a slight overrun of the delay we want. Try halving
+		 * our delay to see if that will cut the number of packets
+		 * in half per interrupt.
+		 */
+		itr /= 2;
+		itr &= I40E_ITR_MASK;
+		if (itr < I40E_ITR_ADAPTIVE_MIN_USECS)
+			itr = I40E_ITR_ADAPTIVE_MIN_USECS;
+
+		goto clear_counts;
 	}
 
-	/* simple throttlerate management
-	 *   0-10MB/s   lowest (50000 ints/s)
-	 *  10-20MB/s   low    (20000 ints/s)
-	 *  20-1249MB/s bulk   (18000 ints/s)
+	/* The paths below assume we are dealing with a bulk ITR since
+	 * number of packets is greater than 256. We are just going to have
+	 * to compute a value and try to bring the count under control,
+	 * though for smaller packet sizes there isn't much we can do as
+	 * NAPI polling will likely be kicking in sooner rather than later.
+	 */
+	itr = I40E_ITR_ADAPTIVE_BULK;
+
+adjust_by_size:
+	/* If packet counts are 256 or greater we can assume we have a gross
+	 * overestimation of what the rate should be. Instead of trying to fine
+	 * tune it just use the formula below to try and dial in an exact value
+	 * give the current packet size of the frame.
+	 */
+	avg_wire_size = bytes / packets;
+
+	/* The following is a crude approximation of:
+	 *  wmem_default / (size + overhead) = desired_pkts_per_int
+	 *  rate / bits_per_byte / (size + ethernet overhead) = pkt_rate
+	 *  (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value
 	 *
-	 * The math works out because the divisor is in 10^(-6) which
-	 * turns the bytes/us input value into MB/s values, but
-	 * make sure to use usecs, as the register values written
-	 * are in 2 usec increments in the ITR registers, and make sure
-	 * to use the smoothed values that the countdown timer gives us.
+	 * Assuming wmem_default is 212992 and overhead is 640 bytes per
+	 * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the
+	 * formula down to
+	 *
+	 *  (170 * (size + 24)) / (size + 640) = ITR
+	 *
+	 * We first do some math on the packet size and then finally bitshift
+	 * by 8 after rounding up. We also have to account for PCIe link speed
+	 * difference as ITR scales based on this.
 	 */
-	switch (new_latency_range) {
-	case I40E_LOWEST_LATENCY:
-		if (bytes_per_usec > 10)
-			new_latency_range = I40E_LOW_LATENCY;
-		break;
-	case I40E_LOW_LATENCY:
-		if (bytes_per_usec > 20)
-			new_latency_range = I40E_BULK_LATENCY;
-		else if (bytes_per_usec <= 10)
-			new_latency_range = I40E_LOWEST_LATENCY;
-		break;
-	case I40E_BULK_LATENCY:
-	default:
-		if (bytes_per_usec <= 20)
-			new_latency_range = I40E_LOW_LATENCY;
-		break;
+	if (avg_wire_size <= 60) {
+		/* Start at 250k ints/sec */
+		avg_wire_size = 4096;
+	} else if (avg_wire_size <= 380) {
+		/* 250K ints/sec to 60K ints/sec */
+		avg_wire_size *= 40;
+		avg_wire_size += 1696;
+	} else if (avg_wire_size <= 1084) {
+		/* 60K ints/sec to 36K ints/sec */
+		avg_wire_size *= 15;
+		avg_wire_size += 11452;
+	} else if (avg_wire_size <= 1980) {
+		/* 36K ints/sec to 30K ints/sec */
+		avg_wire_size *= 5;
+		avg_wire_size += 22420;
+	} else {
+		/* plateau at a limit of 30K ints/sec */
+		avg_wire_size = 32256;
 	}
 
-reset_latency:
-	rc->latency_range = new_latency_range;
+	/* If we are in low latency mode halve our delay which doubles the
+	 * rate to somewhere between 100K to 16K ints/sec
+	 */
+	if (itr & I40E_ITR_ADAPTIVE_LATENCY)
+		avg_wire_size /= 2;
 
-	switch (new_latency_range) {
-	case I40E_LOWEST_LATENCY:
-		new_itr = I40E_ITR_50K;
-		break;
-	case I40E_LOW_LATENCY:
-		new_itr = I40E_ITR_20K;
-		break;
-	case I40E_BULK_LATENCY:
-		new_itr = I40E_ITR_18K;
-		break;
-	default:
-		break;
+	/* Resultant value is 256 times larger than it needs to be. This
+	 * gives us room to adjust the value as needed to either increase
+	 * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc.
+	 *
+	 * Use addition as we have already recorded the new latency flag
+	 * for the ITR value.
+	 */
+	itr += DIV_ROUND_UP(avg_wire_size, i40e_itr_divisor(q_vector)) *
+	       I40E_ITR_ADAPTIVE_MIN_INC;
+
+	if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) {
+		itr &= I40E_ITR_ADAPTIVE_LATENCY;
+		itr += I40E_ITR_ADAPTIVE_MAX_USECS;
 	}
 
+clear_counts:
+	/* write back value */
+	rc->target_itr = itr;
+
+	/* next update should occur within next jiffy */
+	rc->next_update = next_update + 1;
+
 	rc->total_bytes = 0;
 	rc->total_packets = 0;
-	rc->last_itr_update = jiffies;
-
-	if (new_itr != rc->itr) {
-		rc->itr = new_itr;
-		return true;
-	}
-	return false;
 }
 
 /**
@@ -1273,7 +1415,7 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
  * @rx_buffer: rx buffer to pull data from
  *
  * This function will clean up the contents of the rx_buffer.  It will
- * either recycle the bufer or unmap it and free the associated resources.
+ * either recycle the buffer or unmap it and free the associated resources.
  */
 static void i40e_put_rx_buffer(struct i40e_ring *rx_ring,
 			       struct i40e_rx_buffer *rx_buffer)
@@ -1457,33 +1599,45 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
 	return failure ? budget : (int)total_rx_packets;
 }
 
-static u32 i40e_buildreg_itr(const int type, const u16 itr)
+static inline u32 i40e_buildreg_itr(const int type, u16 itr)
 {
 	u32 val;
 
+	/* We don't bother with setting the CLEARPBA bit as the data sheet
+	 * points out doing so is "meaningless since it was already
+	 * auto-cleared". The auto-clearing happens when the interrupt is
+	 * asserted.
+	 *
+	 * Hardware errata 28 for also indicates that writing to a
+	 * xxINT_DYN_CTLx CSR with INTENA_MSK (bit 31) set to 0 will clear
+	 * an event in the PBA anyway so we need to rely on the automask
+	 * to hold pending events for us until the interrupt is re-enabled
+	 *
+	 * The itr value is reported in microseconds, and the register
+	 * value is recorded in 2 microsecond units. For this reason we
+	 * only need to shift by the interval shift - 1 instead of the
+	 * full value.
+	 */
+	itr &= I40E_ITR_MASK;
+
 	val = I40E_VFINT_DYN_CTLN1_INTENA_MASK |
-	      I40E_VFINT_DYN_CTLN1_CLEARPBA_MASK |
 	      (type << I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT) |
-	      (itr << I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT);
+	      (itr << (I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT - 1));
 
 	return val;
 }
 
 /* a small macro to shorten up some long lines */
 #define INTREG I40E_VFINT_DYN_CTLN1
-static inline int get_rx_itr(struct i40e_vsi *vsi, int idx)
-{
-	struct i40evf_adapter *adapter = vsi->back;
 
-	return adapter->rx_rings[idx].rx_itr_setting;
-}
-
-static inline int get_tx_itr(struct i40e_vsi *vsi, int idx)
-{
-	struct i40evf_adapter *adapter = vsi->back;
-
-	return adapter->tx_rings[idx].tx_itr_setting;
-}
+/* The act of updating the ITR will cause it to immediately trigger. In order
+ * to prevent this from throwing off adaptive update statistics we defer the
+ * update so that it can only happen so often. So after either Tx or Rx are
+ * updated we make the adaptive scheme wait until either the ITR completely
+ * expires via the next_update expiration or we have been through at least
+ * 3 interrupts.
+ */
+#define ITR_COUNTDOWN_START 3
 
 /**
  * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt
@@ -1495,70 +1649,51 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
 					  struct i40e_q_vector *q_vector)
 {
 	struct i40e_hw *hw = &vsi->back->hw;
-	bool rx = false, tx = false;
-	u32 rxval, txval;
-	int idx = q_vector->v_idx;
-	int rx_itr_setting, tx_itr_setting;
-
-	/* avoid dynamic calculation if in countdown mode OR if
-	 * all dynamic is disabled
-	 */
-	rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
-
-	rx_itr_setting = get_rx_itr(vsi, idx);
-	tx_itr_setting = get_tx_itr(vsi, idx);
+	u32 intval;
 
-	if (q_vector->itr_countdown > 0 ||
-	    (!ITR_IS_DYNAMIC(rx_itr_setting) &&
-	     !ITR_IS_DYNAMIC(tx_itr_setting))) {
-		goto enable_int;
-	}
-
-	if (ITR_IS_DYNAMIC(rx_itr_setting)) {
-		rx = i40e_set_new_dynamic_itr(&q_vector->rx);
-		rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr);
-	}
+	/* These will do nothing if dynamic updates are not enabled */
+	i40e_update_itr(q_vector, &q_vector->tx);
+	i40e_update_itr(q_vector, &q_vector->rx);
 
-	if (ITR_IS_DYNAMIC(tx_itr_setting)) {
-		tx = i40e_set_new_dynamic_itr(&q_vector->tx);
-		txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr);
-	}
-
-	if (rx || tx) {
-		/* get the higher of the two ITR adjustments and
-		 * use the same value for both ITR registers
-		 * when in adaptive mode (Rx and/or Tx)
-		 */
-		u16 itr = max(q_vector->tx.itr, q_vector->rx.itr);
-
-		q_vector->tx.itr = q_vector->rx.itr = itr;
-		txval = i40e_buildreg_itr(I40E_TX_ITR, itr);
-		tx = true;
-		rxval = i40e_buildreg_itr(I40E_RX_ITR, itr);
-		rx = true;
-	}
-
-	/* only need to enable the interrupt once, but need
-	 * to possibly update both ITR values
+	/* This block of logic allows us to get away with only updating
+	 * one ITR value with each interrupt. The idea is to perform a
+	 * pseudo-lazy update with the following criteria.
+	 *
+	 * 1. Rx is given higher priority than Tx if both are in same state
+	 * 2. If we must reduce an ITR that is given highest priority.
+	 * 3. We then give priority to increasing ITR based on amount.
 	 */
-	if (rx) {
-		/* set the INTENA_MSK_MASK so that this first write
-		 * won't actually enable the interrupt, instead just
-		 * updating the ITR (it's bit 31 PF and VF)
+	if (q_vector->rx.target_itr < q_vector->rx.current_itr) {
+		/* Rx ITR needs to be reduced, this is highest priority */
+		intval = i40e_buildreg_itr(I40E_RX_ITR,
+					   q_vector->rx.target_itr);
+		q_vector->rx.current_itr = q_vector->rx.target_itr;
+		q_vector->itr_countdown = ITR_COUNTDOWN_START;
+	} else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) ||
+		   ((q_vector->rx.target_itr - q_vector->rx.current_itr) <
+		    (q_vector->tx.target_itr - q_vector->tx.current_itr))) {
+		/* Tx ITR needs to be reduced, this is second priority
+		 * Tx ITR needs to be increased more than Rx, fourth priority
 		 */
-		rxval |= BIT(31);
-		/* don't check _DOWN because interrupt isn't being enabled */
-		wr32(hw, INTREG(q_vector->reg_idx), rxval);
+		intval = i40e_buildreg_itr(I40E_TX_ITR,
+					   q_vector->tx.target_itr);
+		q_vector->tx.current_itr = q_vector->tx.target_itr;
+		q_vector->itr_countdown = ITR_COUNTDOWN_START;
+	} else if (q_vector->rx.current_itr != q_vector->rx.target_itr) {
+		/* Rx ITR needs to be increased, third priority */
+		intval = i40e_buildreg_itr(I40E_RX_ITR,
+					   q_vector->rx.target_itr);
+		q_vector->rx.current_itr = q_vector->rx.target_itr;
+		q_vector->itr_countdown = ITR_COUNTDOWN_START;
+	} else {
+		/* No ITR update, lowest priority */
+		intval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
+		if (q_vector->itr_countdown)
+			q_vector->itr_countdown--;
 	}
 
-enable_int:
 	if (!test_bit(__I40E_VSI_DOWN, vsi->state))
-		wr32(hw, INTREG(q_vector->reg_idx), txval);
-
-	if (q_vector->itr_countdown)
-		q_vector->itr_countdown--;
-	else
-		q_vector->itr_countdown = ITR_COUNTDOWN_START;
+		wr32(hw, INTREG(q_vector->reg_idx), intval);
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
index 7798a6645c3f..9129447d079b 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
@@ -28,31 +28,35 @@
 #define _I40E_TXRX_H_
 
 /* Interrupt Throttling and Rate Limiting Goodies */
-
-#define I40E_MAX_ITR               0x0FF0  /* reg uses 2 usec resolution */
-#define I40E_MIN_ITR               0x0001  /* reg uses 2 usec resolution */
-#define I40E_ITR_100K              0x0005
-#define I40E_ITR_50K               0x000A
-#define I40E_ITR_20K               0x0019
-#define I40E_ITR_18K               0x001B
-#define I40E_ITR_8K                0x003E
-#define I40E_ITR_4K                0x007A
-#define I40E_MAX_INTRL             0x3B    /* reg uses 4 usec resolution */
-#define I40E_ITR_RX_DEF            (ITR_REG_TO_USEC(I40E_ITR_20K) | \
-				    I40E_ITR_DYNAMIC)
-#define I40E_ITR_TX_DEF            (ITR_REG_TO_USEC(I40E_ITR_20K) | \
-				    I40E_ITR_DYNAMIC)
-#define I40E_ITR_DYNAMIC           0x8000  /* use top bit as a flag */
-#define I40E_MIN_INT_RATE          250     /* ~= 1000000 / (I40E_MAX_ITR * 2) */
-#define I40E_MAX_INT_RATE          500000  /* == 1000000 / (I40E_MIN_ITR * 2) */
 #define I40E_DEFAULT_IRQ_WORK      256
-#define ITR_TO_REG(setting) ((setting & ~I40E_ITR_DYNAMIC) >> 1)
-#define ITR_IS_DYNAMIC(setting) (!!(setting & I40E_ITR_DYNAMIC))
-#define ITR_REG_TO_USEC(itr_reg) (itr_reg << 1)
+
+/* The datasheet for the X710 and XL710 indicate that the maximum value for
+ * the ITR is 8160usec which is then called out as 0xFF0 with a 2usec
+ * resolution. 8160 is 0x1FE0 when written out in hex. So instead of storing
+ * the register value which is divided by 2 lets use the actual values and
+ * avoid an excessive amount of translation.
+ */
+#define I40E_ITR_DYNAMIC	0x8000	/* use top bit as a flag */
+#define I40E_ITR_MASK		0x1FFE	/* mask for ITR register value */
+#define I40E_MIN_ITR		     2	/* reg uses 2 usec resolution */
+#define I40E_ITR_100K		    10	/* all values below must be even */
+#define I40E_ITR_50K		    20
+#define I40E_ITR_20K		    50
+#define I40E_ITR_18K		    60
+#define I40E_ITR_8K		   122
+#define I40E_MAX_ITR		  8160	/* maximum value as per datasheet */
+#define ITR_TO_REG(setting) ((setting) & ~I40E_ITR_DYNAMIC)
+#define ITR_REG_ALIGN(setting) __ALIGN_MASK(setting, ~I40E_ITR_MASK)
+#define ITR_IS_DYNAMIC(setting) (!!((setting) & I40E_ITR_DYNAMIC))
+
+#define I40E_ITR_RX_DEF		(I40E_ITR_20K | I40E_ITR_DYNAMIC)
+#define I40E_ITR_TX_DEF		(I40E_ITR_20K | I40E_ITR_DYNAMIC)
+
 /* 0x40 is the enable bit for interrupt rate limiting, and must be set if
  * the value of the rate limit is non-zero
  */
 #define INTRL_ENA                  BIT(6)
+#define I40E_MAX_INTRL             0x3B    /* reg uses 4 usec resolution */
 #define INTRL_REG_TO_USEC(intrl) ((intrl & ~INTRL_ENA) << 2)
 #define INTRL_USEC_TO_REG(set) ((set) ? ((set) >> 2) | INTRL_ENA : 0)
 #define I40E_INTRL_8K              125     /* 8000 ints/sec */
@@ -362,8 +366,7 @@ struct i40e_ring {
 	 * these values always store the USER setting, and must be converted
 	 * before programming to a register.
 	 */
-	u16 rx_itr_setting;
-	u16 tx_itr_setting;
+	u16 itr_setting;
 
 	u16 count;			/* Number of descriptors */
 	u16 reg_idx;			/* HW register index of the ring */
@@ -425,21 +428,21 @@ static inline void clear_ring_build_skb_enabled(struct i40e_ring *ring)
 	ring->flags &= ~I40E_RXR_FLAGS_BUILD_SKB_ENABLED;
 }
 
-enum i40e_latency_range {
-	I40E_LOWEST_LATENCY = 0,
-	I40E_LOW_LATENCY = 1,
-	I40E_BULK_LATENCY = 2,
-};
+#define I40E_ITR_ADAPTIVE_MIN_INC	0x0002
+#define I40E_ITR_ADAPTIVE_MIN_USECS	0x0002
+#define I40E_ITR_ADAPTIVE_MAX_USECS	0x007e
+#define I40E_ITR_ADAPTIVE_LATENCY	0x8000
+#define I40E_ITR_ADAPTIVE_BULK		0x0000
+#define ITR_IS_BULK(x) (!((x) & I40E_ITR_ADAPTIVE_LATENCY))
 
 struct i40e_ring_container {
-	/* array of pointers to rings */
-	struct i40e_ring *ring;
+	struct i40e_ring *ring;		/* pointer to linked list of ring(s) */
+	unsigned long next_update;	/* jiffies value of next update */
 	unsigned int total_bytes;	/* total bytes processed this int */
 	unsigned int total_packets;	/* total packets processed this int */
-	unsigned long last_itr_update;	/* jiffies of last ITR update */
 	u16 count;
-	enum i40e_latency_range latency_range;
-	u16 itr;
+	u16 target_itr;			/* target ITR setting for ring(s) */
+	u16 current_itr;		/* current ITR setting for ring(s) */
 };
 
 /* iterator for handling rings in ring container */
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h
index 9690c1ea019e..279dced87e47 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf.h
+++ b/drivers/net/ethernet/intel/i40evf/i40evf.h
@@ -52,7 +52,10 @@
 #include <linux/socket.h>
 #include <linux/jiffies.h>
 #include <net/ip6_checksum.h>
+#include <net/pkt_cls.h>
 #include <net/udp.h>
+#include <net/tc_act/tc_gact.h>
+#include <net/tc_act/tc_mirred.h>
 
 #include "i40e_type.h"
 #include <linux/avf/virtchnl.h>
@@ -106,6 +109,7 @@ struct i40e_vsi {
 
 #define I40EVF_HKEY_ARRAY_SIZE ((I40E_VFQF_HKEY_MAX_INDEX + 1) * 4)
 #define I40EVF_HLUT_ARRAY_SIZE ((I40E_VFQF_HLUT_MAX_INDEX + 1) * 4)
+#define I40EVF_MBPS_DIVISOR	125000 /* divisor to convert to Mbps */
 
 /* MAX_MSIX_Q_VECTORS of these are allocated,
  * but we only use one per queue-specific vector.
@@ -117,9 +121,8 @@ struct i40e_q_vector {
 	struct i40e_ring_container rx;
 	struct i40e_ring_container tx;
 	u32 ring_mask;
+	u8 itr_countdown;	/* when 0 should adjust adaptive ITR */
 	u8 num_ringpairs;	/* total number of ring pairs in vector */
-#define ITR_COUNTDOWN_START 100
-	u8 itr_countdown;	/* when 0 or 1 update ITR */
 	u16 v_idx;		/* index in the vsi->q_vector array. */
 	u16 reg_idx;		/* register index of the interrupt */
 	char name[IFNAMSIZ + 15];
@@ -169,6 +172,28 @@ struct i40evf_vlan_filter {
 	bool add;		/* filter needs to be added */
 };
 
+#define I40EVF_MAX_TRAFFIC_CLASS	4
+/* State of traffic class creation */
+enum i40evf_tc_state_t {
+	__I40EVF_TC_INVALID, /* no traffic class, default state */
+	__I40EVF_TC_RUNNING, /* traffic classes have been created */
+};
+
+/* channel info */
+struct i40evf_channel_config {
+	struct virtchnl_channel_info ch_info[I40EVF_MAX_TRAFFIC_CLASS];
+	enum i40evf_tc_state_t state;
+	u8 total_qps;
+};
+
+/* State of cloud filter */
+enum i40evf_cloud_filter_state_t {
+	__I40EVF_CF_INVALID,	 /* cloud filter not added */
+	__I40EVF_CF_ADD_PENDING, /* cloud filter pending add by the PF */
+	__I40EVF_CF_DEL_PENDING, /* cloud filter pending del by the PF */
+	__I40EVF_CF_ACTIVE,	 /* cloud filter is active */
+};
+
 /* Driver state. The order of these is important! */
 enum i40evf_state_t {
 	__I40EVF_STARTUP,		/* driver loaded, probe complete */
@@ -190,6 +215,36 @@ enum i40evf_critical_section_t {
 	__I40EVF_IN_REMOVE_TASK,	/* device being removed */
 };
 
+#define I40EVF_CLOUD_FIELD_OMAC		0x01
+#define I40EVF_CLOUD_FIELD_IMAC		0x02
+#define I40EVF_CLOUD_FIELD_IVLAN	0x04
+#define I40EVF_CLOUD_FIELD_TEN_ID	0x08
+#define I40EVF_CLOUD_FIELD_IIP		0x10
+
+#define I40EVF_CF_FLAGS_OMAC	I40EVF_CLOUD_FIELD_OMAC
+#define I40EVF_CF_FLAGS_IMAC	I40EVF_CLOUD_FIELD_IMAC
+#define I40EVF_CF_FLAGS_IMAC_IVLAN	(I40EVF_CLOUD_FIELD_IMAC |\
+					 I40EVF_CLOUD_FIELD_IVLAN)
+#define I40EVF_CF_FLAGS_IMAC_TEN_ID	(I40EVF_CLOUD_FIELD_IMAC |\
+					 I40EVF_CLOUD_FIELD_TEN_ID)
+#define I40EVF_CF_FLAGS_OMAC_TEN_ID_IMAC	(I40EVF_CLOUD_FIELD_OMAC |\
+						 I40EVF_CLOUD_FIELD_IMAC |\
+						 I40EVF_CLOUD_FIELD_TEN_ID)
+#define I40EVF_CF_FLAGS_IMAC_IVLAN_TEN_ID	(I40EVF_CLOUD_FIELD_IMAC |\
+						 I40EVF_CLOUD_FIELD_IVLAN |\
+						 I40EVF_CLOUD_FIELD_TEN_ID)
+#define I40EVF_CF_FLAGS_IIP	I40E_CLOUD_FIELD_IIP
+
+/* bookkeeping of cloud filters */
+struct i40evf_cloud_filter {
+	enum i40evf_cloud_filter_state_t state;
+	struct list_head list;
+	struct virtchnl_filter f;
+	unsigned long cookie;
+	bool del;		/* filter needs to be deleted */
+	bool add;		/* filter needs to be added */
+};
+
 /* board specific private data structure */
 struct i40evf_adapter {
 	struct timer_list watchdog_timer;
@@ -225,13 +280,10 @@ struct i40evf_adapter {
 
 	u32 flags;
 #define I40EVF_FLAG_RX_CSUM_ENABLED		BIT(0)
-#define I40EVF_FLAG_IMIR_ENABLED		BIT(1)
-#define I40EVF_FLAG_MQ_CAPABLE			BIT(2)
 #define I40EVF_FLAG_PF_COMMS_FAILED		BIT(3)
 #define I40EVF_FLAG_RESET_PENDING		BIT(4)
 #define I40EVF_FLAG_RESET_NEEDED		BIT(5)
 #define I40EVF_FLAG_WB_ON_ITR_CAPABLE		BIT(6)
-#define I40EVF_FLAG_OUTER_UDP_CSUM_CAPABLE	BIT(7)
 #define I40EVF_FLAG_ADDR_SET_BY_PF		BIT(8)
 #define I40EVF_FLAG_SERVICE_CLIENT_REQUESTED	BIT(9)
 #define I40EVF_FLAG_CLIENT_NEEDS_OPEN		BIT(10)
@@ -241,6 +293,7 @@ struct i40evf_adapter {
 #define I40EVF_FLAG_ALLMULTI_ON			BIT(14)
 #define I40EVF_FLAG_LEGACY_RX			BIT(15)
 #define I40EVF_FLAG_REINIT_ITR_NEEDED		BIT(16)
+#define I40EVF_FLAG_QUEUES_DISABLED		BIT(17)
 /* duplicates for common code */
 #define I40E_FLAG_DCB_ENABLED			0
 #define I40E_FLAG_RX_CSUM_ENABLED		I40EVF_FLAG_RX_CSUM_ENABLED
@@ -269,6 +322,10 @@ struct i40evf_adapter {
 #define I40EVF_FLAG_AQ_RELEASE_ALLMULTI		BIT(18)
 #define I40EVF_FLAG_AQ_ENABLE_VLAN_STRIPPING	BIT(19)
 #define I40EVF_FLAG_AQ_DISABLE_VLAN_STRIPPING	BIT(20)
+#define I40EVF_FLAG_AQ_ENABLE_CHANNELS		BIT(21)
+#define I40EVF_FLAG_AQ_DISABLE_CHANNELS		BIT(22)
+#define I40EVF_FLAG_AQ_ADD_CLOUD_FILTER		BIT(23)
+#define I40EVF_FLAG_AQ_DEL_CLOUD_FILTER		BIT(24)
 
 	/* OS defined structs */
 	struct net_device *netdev;
@@ -314,6 +371,13 @@ struct i40evf_adapter {
 	u16 rss_lut_size;
 	u8 *rss_key;
 	u8 *rss_lut;
+	/* ADQ related members */
+	struct i40evf_channel_config ch_config;
+	u8 num_tc;
+	struct list_head cloud_filter_list;
+	/* lock to protest access to the cloud filter list */
+	spinlock_t cloud_filter_list_lock;
+	u16 num_cloud_filters;
 };
 
 
@@ -380,4 +444,8 @@ void i40evf_notify_client_message(struct i40e_vsi *vsi, u8 *msg, u16 len);
 void i40evf_notify_client_l2_params(struct i40e_vsi *vsi);
 void i40evf_notify_client_open(struct i40e_vsi *vsi);
 void i40evf_notify_client_close(struct i40e_vsi *vsi, bool reset);
+void i40evf_enable_channels(struct i40evf_adapter *adapter);
+void i40evf_disable_channels(struct i40evf_adapter *adapter);
+void i40evf_add_cloud_filter(struct i40evf_adapter *adapter);
+void i40evf_del_cloud_filter(struct i40evf_adapter *adapter);
 #endif /* _I40EVF_H_ */
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
index e2d8aa19d205..e6793255de0b 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
@@ -457,14 +457,14 @@ static int __i40evf_get_coalesce(struct net_device *netdev,
 	rx_ring = &adapter->rx_rings[queue];
 	tx_ring = &adapter->tx_rings[queue];
 
-	if (ITR_IS_DYNAMIC(rx_ring->rx_itr_setting))
+	if (ITR_IS_DYNAMIC(rx_ring->itr_setting))
 		ec->use_adaptive_rx_coalesce = 1;
 
-	if (ITR_IS_DYNAMIC(tx_ring->tx_itr_setting))
+	if (ITR_IS_DYNAMIC(tx_ring->itr_setting))
 		ec->use_adaptive_tx_coalesce = 1;
 
-	ec->rx_coalesce_usecs = rx_ring->rx_itr_setting & ~I40E_ITR_DYNAMIC;
-	ec->tx_coalesce_usecs = tx_ring->tx_itr_setting & ~I40E_ITR_DYNAMIC;
+	ec->rx_coalesce_usecs = rx_ring->itr_setting & ~I40E_ITR_DYNAMIC;
+	ec->tx_coalesce_usecs = tx_ring->itr_setting & ~I40E_ITR_DYNAMIC;
 
 	return 0;
 }
@@ -502,7 +502,7 @@ static int i40evf_get_per_queue_coalesce(struct net_device *netdev,
 
 /**
  * i40evf_set_itr_per_queue - set ITR values for specific queue
- * @vsi: the VSI to set values for
+ * @adapter: the VF adapter struct to set values for
  * @ec: coalesce settings from ethtool
  * @queue: the queue to modify
  *
@@ -514,33 +514,29 @@ static void i40evf_set_itr_per_queue(struct i40evf_adapter *adapter,
 {
 	struct i40e_ring *rx_ring = &adapter->rx_rings[queue];
 	struct i40e_ring *tx_ring = &adapter->tx_rings[queue];
-	struct i40e_vsi *vsi = &adapter->vsi;
-	struct i40e_hw *hw = &adapter->hw;
 	struct i40e_q_vector *q_vector;
-	u16 vector;
 
-	rx_ring->rx_itr_setting = ec->rx_coalesce_usecs;
-	tx_ring->tx_itr_setting = ec->tx_coalesce_usecs;
+	rx_ring->itr_setting = ITR_REG_ALIGN(ec->rx_coalesce_usecs);
+	tx_ring->itr_setting = ITR_REG_ALIGN(ec->tx_coalesce_usecs);
 
-	rx_ring->rx_itr_setting |= I40E_ITR_DYNAMIC;
+	rx_ring->itr_setting |= I40E_ITR_DYNAMIC;
 	if (!ec->use_adaptive_rx_coalesce)
-		rx_ring->rx_itr_setting ^= I40E_ITR_DYNAMIC;
+		rx_ring->itr_setting ^= I40E_ITR_DYNAMIC;
 
-	tx_ring->tx_itr_setting |= I40E_ITR_DYNAMIC;
+	tx_ring->itr_setting |= I40E_ITR_DYNAMIC;
 	if (!ec->use_adaptive_tx_coalesce)
-		tx_ring->tx_itr_setting ^= I40E_ITR_DYNAMIC;
+		tx_ring->itr_setting ^= I40E_ITR_DYNAMIC;
 
 	q_vector = rx_ring->q_vector;
-	q_vector->rx.itr = ITR_TO_REG(rx_ring->rx_itr_setting);
-	vector = vsi->base_vector + q_vector->v_idx;
-	wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, vector - 1), q_vector->rx.itr);
+	q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting);
 
 	q_vector = tx_ring->q_vector;
-	q_vector->tx.itr = ITR_TO_REG(tx_ring->tx_itr_setting);
-	vector = vsi->base_vector + q_vector->v_idx;
-	wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, vector - 1), q_vector->tx.itr);
+	q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting);
 
-	i40e_flush(hw);
+	/* The interrupt handler itself will take care of programming
+	 * the Tx and Rx ITR values based on the values we have entered
+	 * into the q_vector, no need to write the values now.
+	 */
 }
 
 /**
@@ -565,8 +561,8 @@ static int __i40evf_set_coalesce(struct net_device *netdev,
 	if (ec->rx_coalesce_usecs == 0) {
 		if (ec->use_adaptive_rx_coalesce)
 			netif_info(adapter, drv, netdev, "rx-usecs=0, need to disable adaptive-rx for a complete disable\n");
-	} else if ((ec->rx_coalesce_usecs < (I40E_MIN_ITR << 1)) ||
-		   (ec->rx_coalesce_usecs > (I40E_MAX_ITR << 1))) {
+	} else if ((ec->rx_coalesce_usecs < I40E_MIN_ITR) ||
+		   (ec->rx_coalesce_usecs > I40E_MAX_ITR)) {
 		netif_info(adapter, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n");
 		return -EINVAL;
 	}
@@ -575,8 +571,8 @@ static int __i40evf_set_coalesce(struct net_device *netdev,
 	if (ec->tx_coalesce_usecs == 0) {
 		if (ec->use_adaptive_tx_coalesce)
 			netif_info(adapter, drv, netdev, "tx-usecs=0, need to disable adaptive-tx for a complete disable\n");
-	} else if ((ec->tx_coalesce_usecs < (I40E_MIN_ITR << 1)) ||
-		   (ec->tx_coalesce_usecs > (I40E_MAX_ITR << 1))) {
+	} else if ((ec->tx_coalesce_usecs < I40E_MIN_ITR) ||
+		   (ec->tx_coalesce_usecs > I40E_MAX_ITR)) {
 		netif_info(adapter, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n");
 		return -EINVAL;
 	}
@@ -699,6 +695,12 @@ static int i40evf_set_channels(struct net_device *netdev,
 		return -EINVAL;
 	}
 
+	if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) &&
+	    adapter->num_tc) {
+		dev_info(&adapter->pdev->dev, "Cannot set channels since ADq is enabled.\n");
+		return -EINVAL;
+	}
+
 	/* All of these should have already been checked by ethtool before this
 	 * even gets to us, but just to be sure.
 	 */
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index 16989ad2ca90..7e7cd80abaf4 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -353,11 +353,12 @@ i40evf_map_vector_to_rxq(struct i40evf_adapter *adapter, int v_idx, int r_idx)
 	rx_ring->vsi = &adapter->vsi;
 	q_vector->rx.ring = rx_ring;
 	q_vector->rx.count++;
-	q_vector->rx.latency_range = I40E_LOW_LATENCY;
-	q_vector->rx.itr = ITR_TO_REG(rx_ring->rx_itr_setting);
+	q_vector->rx.next_update = jiffies + 1;
+	q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting);
 	q_vector->ring_mask |= BIT(r_idx);
-	q_vector->itr_countdown = ITR_COUNTDOWN_START;
-	wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, v_idx - 1), q_vector->rx.itr);
+	wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, q_vector->reg_idx),
+	     q_vector->rx.current_itr);
+	q_vector->rx.current_itr = q_vector->rx.target_itr;
 }
 
 /**
@@ -378,11 +379,12 @@ i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx)
 	tx_ring->vsi = &adapter->vsi;
 	q_vector->tx.ring = tx_ring;
 	q_vector->tx.count++;
-	q_vector->tx.latency_range = I40E_LOW_LATENCY;
-	q_vector->tx.itr = ITR_TO_REG(tx_ring->tx_itr_setting);
-	q_vector->itr_countdown = ITR_COUNTDOWN_START;
+	q_vector->tx.next_update = jiffies + 1;
+	q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting);
 	q_vector->num_ringpairs++;
-	wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, v_idx - 1), q_vector->tx.itr);
+	wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, q_vector->reg_idx),
+	     q_vector->tx.target_itr);
+	q_vector->tx.current_itr = q_vector->tx.target_itr;
 }
 
 /**
@@ -783,7 +785,7 @@ static int i40evf_vlan_rx_kill_vid(struct net_device *netdev,
  **/
 static struct
 i40evf_mac_filter *i40evf_find_filter(struct i40evf_adapter *adapter,
-				      u8 *macaddr)
+				      const u8 *macaddr)
 {
 	struct i40evf_mac_filter *f;
 
@@ -806,20 +808,18 @@ i40evf_mac_filter *i40evf_find_filter(struct i40evf_adapter *adapter,
  **/
 static struct
 i40evf_mac_filter *i40evf_add_filter(struct i40evf_adapter *adapter,
-				     u8 *macaddr)
+				     const u8 *macaddr)
 {
 	struct i40evf_mac_filter *f;
 
 	if (!macaddr)
 		return NULL;
 
-	spin_lock_bh(&adapter->mac_vlan_list_lock);
-
 	f = i40evf_find_filter(adapter, macaddr);
 	if (!f) {
 		f = kzalloc(sizeof(*f), GFP_ATOMIC);
 		if (!f)
-			goto clearout;
+			return f;
 
 		ether_addr_copy(f->macaddr, macaddr);
 
@@ -830,8 +830,6 @@ i40evf_mac_filter *i40evf_add_filter(struct i40evf_adapter *adapter,
 		f->remove = false;
 	}
 
-clearout:
-	spin_unlock_bh(&adapter->mac_vlan_list_lock);
 	return f;
 }
 
@@ -866,9 +864,10 @@ static int i40evf_set_mac(struct net_device *netdev, void *p)
 		adapter->aq_required |= I40EVF_FLAG_AQ_DEL_MAC_FILTER;
 	}
 
+	f = i40evf_add_filter(adapter, addr->sa_data);
+
 	spin_unlock_bh(&adapter->mac_vlan_list_lock);
 
-	f = i40evf_add_filter(adapter, addr->sa_data);
 	if (f) {
 		ether_addr_copy(hw->mac.addr, addr->sa_data);
 		ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr);
@@ -878,50 +877,64 @@ static int i40evf_set_mac(struct net_device *netdev, void *p)
 }
 
 /**
- * i40evf_set_rx_mode - NDO callback to set the netdev filters
- * @netdev: network interface device structure
- **/
-static void i40evf_set_rx_mode(struct net_device *netdev)
+ * i40evf_addr_sync - Callback for dev_(mc|uc)_sync to add address
+ * @netdev: the netdevice
+ * @addr: address to add
+ *
+ * Called by __dev_(mc|uc)_sync when an address needs to be added. We call
+ * __dev_(uc|mc)_sync from .set_rx_mode and guarantee to hold the hash lock.
+ */
+static int i40evf_addr_sync(struct net_device *netdev, const u8 *addr)
 {
 	struct i40evf_adapter *adapter = netdev_priv(netdev);
-	struct i40evf_mac_filter *f, *ftmp;
-	struct netdev_hw_addr *uca;
-	struct netdev_hw_addr *mca;
-	struct netdev_hw_addr *ha;
-
-	/* add addr if not already in the filter list */
-	netdev_for_each_uc_addr(uca, netdev) {
-		i40evf_add_filter(adapter, uca->addr);
-	}
-	netdev_for_each_mc_addr(mca, netdev) {
-		i40evf_add_filter(adapter, mca->addr);
-	}
 
-	spin_lock_bh(&adapter->mac_vlan_list_lock);
-
-	list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) {
-		netdev_for_each_mc_addr(mca, netdev)
-			if (ether_addr_equal(mca->addr, f->macaddr))
-				goto bottom_of_search_loop;
-
-		netdev_for_each_uc_addr(uca, netdev)
-			if (ether_addr_equal(uca->addr, f->macaddr))
-				goto bottom_of_search_loop;
+	if (i40evf_add_filter(adapter, addr))
+		return 0;
+	else
+		return -ENOMEM;
+}
 
-		for_each_dev_addr(netdev, ha)
-			if (ether_addr_equal(ha->addr, f->macaddr))
-				goto bottom_of_search_loop;
+/**
+ * i40evf_addr_unsync - Callback for dev_(mc|uc)_sync to remove address
+ * @netdev: the netdevice
+ * @addr: address to add
+ *
+ * Called by __dev_(mc|uc)_sync when an address needs to be removed. We call
+ * __dev_(uc|mc)_sync from .set_rx_mode and guarantee to hold the hash lock.
+ */
+static int i40evf_addr_unsync(struct net_device *netdev, const u8 *addr)
+{
+	struct i40evf_adapter *adapter = netdev_priv(netdev);
+	struct i40evf_mac_filter *f;
 
-		if (ether_addr_equal(f->macaddr, adapter->hw.mac.addr))
-			goto bottom_of_search_loop;
+	/* Under some circumstances, we might receive a request to delete
+	 * our own device address from our uc list. Because we store the
+	 * device address in the VSI's MAC/VLAN filter list, we need to ignore
+	 * such requests and not delete our device address from this list.
+	 */
+	if (ether_addr_equal(addr, netdev->dev_addr))
+		return 0;
 
-		/* f->macaddr wasn't found in uc, mc, or ha list so delete it */
+	f = i40evf_find_filter(adapter, addr);
+	if (f) {
 		f->remove = true;
 		adapter->aq_required |= I40EVF_FLAG_AQ_DEL_MAC_FILTER;
-
-bottom_of_search_loop:
-		continue;
 	}
+	return 0;
+}
+
+/**
+ * i40evf_set_rx_mode - NDO callback to set the netdev filters
+ * @netdev: network interface device structure
+ **/
+static void i40evf_set_rx_mode(struct net_device *netdev)
+{
+	struct i40evf_adapter *adapter = netdev_priv(netdev);
+
+	spin_lock_bh(&adapter->mac_vlan_list_lock);
+	__dev_uc_sync(netdev, i40evf_addr_sync, i40evf_addr_unsync);
+	__dev_mc_sync(netdev, i40evf_addr_sync, i40evf_addr_unsync);
+	spin_unlock_bh(&adapter->mac_vlan_list_lock);
 
 	if (netdev->flags & IFF_PROMISC &&
 	    !(adapter->flags & I40EVF_FLAG_PROMISC_ON))
@@ -936,8 +949,6 @@ bottom_of_search_loop:
 	else if (!(netdev->flags & IFF_ALLMULTI) &&
 		 adapter->flags & I40EVF_FLAG_ALLMULTI_ON)
 		adapter->aq_required |= I40EVF_FLAG_AQ_RELEASE_ALLMULTI;
-
-	spin_unlock_bh(&adapter->mac_vlan_list_lock);
 }
 
 /**
@@ -1025,7 +1036,9 @@ static void i40evf_up_complete(struct i40evf_adapter *adapter)
 void i40evf_down(struct i40evf_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
+	struct i40evf_vlan_filter *vlf;
 	struct i40evf_mac_filter *f;
+	struct i40evf_cloud_filter *cf;
 
 	if (adapter->state <= __I40EVF_DOWN_PENDING)
 		return;
@@ -1038,17 +1051,29 @@ void i40evf_down(struct i40evf_adapter *adapter)
 
 	spin_lock_bh(&adapter->mac_vlan_list_lock);
 
+	/* clear the sync flag on all filters */
+	__dev_uc_unsync(adapter->netdev, NULL);
+	__dev_mc_unsync(adapter->netdev, NULL);
+
 	/* remove all MAC filters */
 	list_for_each_entry(f, &adapter->mac_filter_list, list) {
 		f->remove = true;
 	}
+
 	/* remove all VLAN filters */
-	list_for_each_entry(f, &adapter->vlan_filter_list, list) {
-		f->remove = true;
+	list_for_each_entry(vlf, &adapter->vlan_filter_list, list) {
+		vlf->remove = true;
 	}
 
 	spin_unlock_bh(&adapter->mac_vlan_list_lock);
 
+	/* remove all cloud filters */
+	spin_lock_bh(&adapter->cloud_filter_list_lock);
+	list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
+		cf->del = true;
+	}
+	spin_unlock_bh(&adapter->cloud_filter_list_lock);
+
 	if (!(adapter->flags & I40EVF_FLAG_PF_COMMS_FAILED) &&
 	    adapter->state != __I40EVF_RESETTING) {
 		/* cancel any current operation */
@@ -1059,6 +1084,7 @@ void i40evf_down(struct i40evf_adapter *adapter)
 		 */
 		adapter->aq_required = I40EVF_FLAG_AQ_DEL_MAC_FILTER;
 		adapter->aq_required |= I40EVF_FLAG_AQ_DEL_VLAN_FILTER;
+		adapter->aq_required |= I40EVF_FLAG_AQ_DEL_CLOUD_FILTER;
 		adapter->aq_required |= I40EVF_FLAG_AQ_DISABLE_QUEUES;
 	}
 
@@ -1144,6 +1170,9 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter)
 	 */
 	if (adapter->num_req_queues)
 		num_active_queues = adapter->num_req_queues;
+	else if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) &&
+		 adapter->num_tc)
+		num_active_queues = adapter->ch_config.total_qps;
 	else
 		num_active_queues = min_t(int,
 					  adapter->vsi_res->num_queue_pairs,
@@ -1169,7 +1198,7 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter)
 		tx_ring->netdev = adapter->netdev;
 		tx_ring->dev = &adapter->pdev->dev;
 		tx_ring->count = adapter->tx_desc_count;
-		tx_ring->tx_itr_setting = I40E_ITR_TX_DEF;
+		tx_ring->itr_setting = I40E_ITR_TX_DEF;
 		if (adapter->flags & I40EVF_FLAG_WB_ON_ITR_CAPABLE)
 			tx_ring->flags |= I40E_TXR_FLAGS_WB_ON_ITR;
 
@@ -1178,7 +1207,7 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter)
 		rx_ring->netdev = adapter->netdev;
 		rx_ring->dev = &adapter->pdev->dev;
 		rx_ring->count = adapter->rx_desc_count;
-		rx_ring->rx_itr_setting = I40E_ITR_RX_DEF;
+		rx_ring->itr_setting = I40E_ITR_RX_DEF;
 	}
 
 	adapter->num_active_queues = num_active_queues;
@@ -1471,6 +1500,16 @@ int i40evf_init_interrupt_scheme(struct i40evf_adapter *adapter)
 		goto err_alloc_q_vectors;
 	}
 
+	/* If we've made it so far while ADq flag being ON, then we haven't
+	 * bailed out anywhere in middle. And ADq isn't just enabled but actual
+	 * resources have been allocated in the reset path.
+	 * Now we can truly claim that ADq is enabled.
+	 */
+	if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) &&
+	    adapter->num_tc)
+		dev_info(&adapter->pdev->dev, "ADq Enabled, %u TCs created",
+			 adapter->num_tc);
+
 	dev_info(&adapter->pdev->dev, "Multiqueue %s: Queue pair count = %u",
 		 (adapter->num_active_queues > 1) ? "Enabled" : "Disabled",
 		 adapter->num_active_queues);
@@ -1712,6 +1751,27 @@ static void i40evf_watchdog_task(struct work_struct *work)
 		i40evf_set_promiscuous(adapter, 0);
 		goto watchdog_done;
 	}
+
+	if (adapter->aq_required & I40EVF_FLAG_AQ_ENABLE_CHANNELS) {
+		i40evf_enable_channels(adapter);
+		goto watchdog_done;
+	}
+
+	if (adapter->aq_required & I40EVF_FLAG_AQ_DISABLE_CHANNELS) {
+		i40evf_disable_channels(adapter);
+		goto watchdog_done;
+	}
+
+	if (adapter->aq_required & I40EVF_FLAG_AQ_ADD_CLOUD_FILTER) {
+		i40evf_add_cloud_filter(adapter);
+		goto watchdog_done;
+	}
+
+	if (adapter->aq_required & I40EVF_FLAG_AQ_DEL_CLOUD_FILTER) {
+		i40evf_del_cloud_filter(adapter);
+		goto watchdog_done;
+	}
+
 	schedule_delayed_work(&adapter->client_task, msecs_to_jiffies(5));
 
 	if (adapter->state == __I40EVF_RUNNING)
@@ -1735,6 +1795,7 @@ static void i40evf_disable_vf(struct i40evf_adapter *adapter)
 {
 	struct i40evf_mac_filter *f, *ftmp;
 	struct i40evf_vlan_filter *fv, *fvtmp;
+	struct i40evf_cloud_filter *cf, *cftmp;
 
 	adapter->flags |= I40EVF_FLAG_PF_COMMS_FAILED;
 
@@ -1756,7 +1817,7 @@ static void i40evf_disable_vf(struct i40evf_adapter *adapter)
 
 	spin_lock_bh(&adapter->mac_vlan_list_lock);
 
-	/* Delete all of the filters, both MAC and VLAN. */
+	/* Delete all of the filters */
 	list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) {
 		list_del(&f->list);
 		kfree(f);
@@ -1769,6 +1830,14 @@ static void i40evf_disable_vf(struct i40evf_adapter *adapter)
 
 	spin_unlock_bh(&adapter->mac_vlan_list_lock);
 
+	spin_lock_bh(&adapter->cloud_filter_list_lock);
+	list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, list) {
+		list_del(&cf->list);
+		kfree(cf);
+		adapter->num_cloud_filters--;
+	}
+	spin_unlock_bh(&adapter->cloud_filter_list_lock);
+
 	i40evf_free_misc_irq(adapter);
 	i40evf_reset_interrupt_capability(adapter);
 	i40evf_free_queues(adapter);
@@ -1798,9 +1867,11 @@ static void i40evf_reset_task(struct work_struct *work)
 	struct i40evf_adapter *adapter = container_of(work,
 						      struct i40evf_adapter,
 						      reset_task);
+	struct virtchnl_vf_resource *vfres = adapter->vf_res;
 	struct net_device *netdev = adapter->netdev;
 	struct i40e_hw *hw = &adapter->hw;
 	struct i40evf_vlan_filter *vlf;
+	struct i40evf_cloud_filter *cf;
 	struct i40evf_mac_filter *f;
 	u32 reg_val;
 	int i = 0, err;
@@ -1893,6 +1964,7 @@ continue_reset:
 	i40evf_free_all_rx_resources(adapter);
 	i40evf_free_all_tx_resources(adapter);
 
+	adapter->flags |= I40EVF_FLAG_QUEUES_DISABLED;
 	/* kill and reinit the admin queue */
 	i40evf_shutdown_adminq(hw);
 	adapter->current_op = VIRTCHNL_OP_UNKNOWN;
@@ -1924,8 +1996,19 @@ continue_reset:
 
 	spin_unlock_bh(&adapter->mac_vlan_list_lock);
 
+	/* check if TCs are running and re-add all cloud filters */
+	spin_lock_bh(&adapter->cloud_filter_list_lock);
+	if ((vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) &&
+	    adapter->num_tc) {
+		list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
+			cf->add = true;
+		}
+	}
+	spin_unlock_bh(&adapter->cloud_filter_list_lock);
+
 	adapter->aq_required |= I40EVF_FLAG_AQ_ADD_MAC_FILTER;
 	adapter->aq_required |= I40EVF_FLAG_AQ_ADD_VLAN_FILTER;
+	adapter->aq_required |= I40EVF_FLAG_AQ_ADD_CLOUD_FILTER;
 	i40evf_misc_irq_enable(adapter);
 
 	mod_timer(&adapter->watchdog_timer, jiffies + 2);
@@ -2191,6 +2274,712 @@ void i40evf_free_all_rx_resources(struct i40evf_adapter *adapter)
 }
 
 /**
+ * i40evf_validate_tx_bandwidth - validate the max Tx bandwidth
+ * @adapter: board private structure
+ * @max_tx_rate: max Tx bw for a tc
+ **/
+static int i40evf_validate_tx_bandwidth(struct i40evf_adapter *adapter,
+					u64 max_tx_rate)
+{
+	int speed = 0, ret = 0;
+
+	switch (adapter->link_speed) {
+	case I40E_LINK_SPEED_40GB:
+		speed = 40000;
+		break;
+	case I40E_LINK_SPEED_25GB:
+		speed = 25000;
+		break;
+	case I40E_LINK_SPEED_20GB:
+		speed = 20000;
+		break;
+	case I40E_LINK_SPEED_10GB:
+		speed = 10000;
+		break;
+	case I40E_LINK_SPEED_1GB:
+		speed = 1000;
+		break;
+	case I40E_LINK_SPEED_100MB:
+		speed = 100;
+		break;
+	default:
+		break;
+	}
+
+	if (max_tx_rate > speed) {
+		dev_err(&adapter->pdev->dev,
+			"Invalid tx rate specified\n");
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+/**
+ * i40evf_validate_channel_config - validate queue mapping info
+ * @adapter: board private structure
+ * @mqprio_qopt: queue parameters
+ *
+ * This function validates if the config provided by the user to
+ * configure queue channels is valid or not. Returns 0 on a valid
+ * config.
+ **/
+static int i40evf_validate_ch_config(struct i40evf_adapter *adapter,
+				     struct tc_mqprio_qopt_offload *mqprio_qopt)
+{
+	u64 total_max_rate = 0;
+	int i, num_qps = 0;
+	u64 tx_rate = 0;
+	int ret = 0;
+
+	if (mqprio_qopt->qopt.num_tc > I40EVF_MAX_TRAFFIC_CLASS ||
+	    mqprio_qopt->qopt.num_tc < 1)
+		return -EINVAL;
+
+	for (i = 0; i <= mqprio_qopt->qopt.num_tc - 1; i++) {
+		if (!mqprio_qopt->qopt.count[i] ||
+		    mqprio_qopt->qopt.offset[i] != num_qps)
+			return -EINVAL;
+		if (mqprio_qopt->min_rate[i]) {
+			dev_err(&adapter->pdev->dev,
+				"Invalid min tx rate (greater than 0) specified\n");
+			return -EINVAL;
+		}
+		/*convert to Mbps */
+		tx_rate = div_u64(mqprio_qopt->max_rate[i],
+				  I40EVF_MBPS_DIVISOR);
+		total_max_rate += tx_rate;
+		num_qps += mqprio_qopt->qopt.count[i];
+	}
+	if (num_qps > MAX_QUEUES)
+		return -EINVAL;
+
+	ret = i40evf_validate_tx_bandwidth(adapter, total_max_rate);
+	return ret;
+}
+
+/**
+ * i40evf_del_all_cloud_filters - delete all cloud filters
+ * on the traffic classes
+ **/
+static void i40evf_del_all_cloud_filters(struct i40evf_adapter *adapter)
+{
+	struct i40evf_cloud_filter *cf, *cftmp;
+
+	spin_lock_bh(&adapter->cloud_filter_list_lock);
+	list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list,
+				 list) {
+		list_del(&cf->list);
+		kfree(cf);
+		adapter->num_cloud_filters--;
+	}
+	spin_unlock_bh(&adapter->cloud_filter_list_lock);
+}
+
+/**
+ * __i40evf_setup_tc - configure multiple traffic classes
+ * @netdev: network interface device structure
+ * @type_date: tc offload data
+ *
+ * This function processes the config information provided by the
+ * user to configure traffic classes/queue channels and packages the
+ * information to request the PF to setup traffic classes.
+ *
+ * Returns 0 on success.
+ **/
+static int __i40evf_setup_tc(struct net_device *netdev, void *type_data)
+{
+	struct tc_mqprio_qopt_offload *mqprio_qopt = type_data;
+	struct i40evf_adapter *adapter = netdev_priv(netdev);
+	struct virtchnl_vf_resource *vfres = adapter->vf_res;
+	u8 num_tc = 0, total_qps = 0;
+	int ret = 0, netdev_tc = 0;
+	u64 max_tx_rate;
+	u16 mode;
+	int i;
+
+	num_tc = mqprio_qopt->qopt.num_tc;
+	mode = mqprio_qopt->mode;
+
+	/* delete queue_channel */
+	if (!mqprio_qopt->qopt.hw) {
+		if (adapter->ch_config.state == __I40EVF_TC_RUNNING) {
+			/* reset the tc configuration */
+			netdev_reset_tc(netdev);
+			adapter->num_tc = 0;
+			netif_tx_stop_all_queues(netdev);
+			netif_tx_disable(netdev);
+			i40evf_del_all_cloud_filters(adapter);
+			adapter->aq_required = I40EVF_FLAG_AQ_DISABLE_CHANNELS;
+			goto exit;
+		} else {
+			return -EINVAL;
+		}
+	}
+
+	/* add queue channel */
+	if (mode == TC_MQPRIO_MODE_CHANNEL) {
+		if (!(vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ)) {
+			dev_err(&adapter->pdev->dev, "ADq not supported\n");
+			return -EOPNOTSUPP;
+		}
+		if (adapter->ch_config.state != __I40EVF_TC_INVALID) {
+			dev_err(&adapter->pdev->dev, "TC configuration already exists\n");
+			return -EINVAL;
+		}
+
+		ret = i40evf_validate_ch_config(adapter, mqprio_qopt);
+		if (ret)
+			return ret;
+		/* Return if same TC config is requested */
+		if (adapter->num_tc == num_tc)
+			return 0;
+		adapter->num_tc = num_tc;
+
+		for (i = 0; i < I40EVF_MAX_TRAFFIC_CLASS; i++) {
+			if (i < num_tc) {
+				adapter->ch_config.ch_info[i].count =
+					mqprio_qopt->qopt.count[i];
+				adapter->ch_config.ch_info[i].offset =
+					mqprio_qopt->qopt.offset[i];
+				total_qps += mqprio_qopt->qopt.count[i];
+				max_tx_rate = mqprio_qopt->max_rate[i];
+				/* convert to Mbps */
+				max_tx_rate = div_u64(max_tx_rate,
+						      I40EVF_MBPS_DIVISOR);
+				adapter->ch_config.ch_info[i].max_tx_rate =
+					max_tx_rate;
+			} else {
+				adapter->ch_config.ch_info[i].count = 1;
+				adapter->ch_config.ch_info[i].offset = 0;
+			}
+		}
+		adapter->ch_config.total_qps = total_qps;
+		netif_tx_stop_all_queues(netdev);
+		netif_tx_disable(netdev);
+		adapter->aq_required |= I40EVF_FLAG_AQ_ENABLE_CHANNELS;
+		netdev_reset_tc(netdev);
+		/* Report the tc mapping up the stack */
+		netdev_set_num_tc(adapter->netdev, num_tc);
+		for (i = 0; i < I40EVF_MAX_TRAFFIC_CLASS; i++) {
+			u16 qcount = mqprio_qopt->qopt.count[i];
+			u16 qoffset = mqprio_qopt->qopt.offset[i];
+
+			if (i < num_tc)
+				netdev_set_tc_queue(netdev, netdev_tc++, qcount,
+						    qoffset);
+		}
+	}
+exit:
+	return ret;
+}
+
+/**
+ * i40evf_parse_cls_flower - Parse tc flower filters provided by kernel
+ * @adapter: board private structure
+ * @cls_flower: pointer to struct tc_cls_flower_offload
+ * @filter: pointer to cloud filter structure
+ */
+static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
+				   struct tc_cls_flower_offload *f,
+				   struct i40evf_cloud_filter *filter)
+{
+	u16 n_proto_mask = 0;
+	u16 n_proto_key = 0;
+	u8 field_flags = 0;
+	u16 addr_type = 0;
+	u16 n_proto = 0;
+	int i = 0;
+	struct virtchnl_filter *vf = &filter->f;
+
+	if (f->dissector->used_keys &
+	    ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
+	      BIT(FLOW_DISSECTOR_KEY_BASIC) |
+	      BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
+	      BIT(FLOW_DISSECTOR_KEY_VLAN) |
+	      BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
+	      BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
+	      BIT(FLOW_DISSECTOR_KEY_PORTS) |
+	      BIT(FLOW_DISSECTOR_KEY_ENC_KEYID))) {
+		dev_err(&adapter->pdev->dev, "Unsupported key used: 0x%x\n",
+			f->dissector->used_keys);
+		return -EOPNOTSUPP;
+	}
+
+	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+		struct flow_dissector_key_keyid *mask =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_ENC_KEYID,
+						  f->mask);
+
+		if (mask->keyid != 0)
+			field_flags |= I40EVF_CLOUD_FIELD_TEN_ID;
+	}
+
+	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_dissector_key_basic *key =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_BASIC,
+						  f->key);
+
+		struct flow_dissector_key_basic *mask =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_BASIC,
+						  f->mask);
+		n_proto_key = ntohs(key->n_proto);
+		n_proto_mask = ntohs(mask->n_proto);
+
+		if (n_proto_key == ETH_P_ALL) {
+			n_proto_key = 0;
+			n_proto_mask = 0;
+		}
+		n_proto = n_proto_key & n_proto_mask;
+		if (n_proto != ETH_P_IP && n_proto != ETH_P_IPV6)
+			return -EINVAL;
+		if (n_proto == ETH_P_IPV6) {
+			/* specify flow type as TCP IPv6 */
+			vf->flow_type = VIRTCHNL_TCP_V6_FLOW;
+		}
+
+		if (key->ip_proto != IPPROTO_TCP) {
+			dev_info(&adapter->pdev->dev, "Only TCP transport is supported\n");
+			return -EINVAL;
+		}
+	}
+
+	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+		struct flow_dissector_key_eth_addrs *key =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_ETH_ADDRS,
+						  f->key);
+
+		struct flow_dissector_key_eth_addrs *mask =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_ETH_ADDRS,
+						  f->mask);
+		/* use is_broadcast and is_zero to check for all 0xf or 0 */
+		if (!is_zero_ether_addr(mask->dst)) {
+			if (is_broadcast_ether_addr(mask->dst)) {
+				field_flags |= I40EVF_CLOUD_FIELD_OMAC;
+			} else {
+				dev_err(&adapter->pdev->dev, "Bad ether dest mask %pM\n",
+					mask->dst);
+				return I40E_ERR_CONFIG;
+			}
+		}
+
+		if (!is_zero_ether_addr(mask->src)) {
+			if (is_broadcast_ether_addr(mask->src)) {
+				field_flags |= I40EVF_CLOUD_FIELD_IMAC;
+			} else {
+				dev_err(&adapter->pdev->dev, "Bad ether src mask %pM\n",
+					mask->src);
+				return I40E_ERR_CONFIG;
+			}
+		}
+
+		if (!is_zero_ether_addr(key->dst))
+			if (is_valid_ether_addr(key->dst) ||
+			    is_multicast_ether_addr(key->dst)) {
+				/* set the mask if a valid dst_mac address */
+				for (i = 0; i < ETH_ALEN; i++)
+					vf->mask.tcp_spec.dst_mac[i] |= 0xff;
+				ether_addr_copy(vf->data.tcp_spec.dst_mac,
+						key->dst);
+			}
+
+		if (!is_zero_ether_addr(key->src))
+			if (is_valid_ether_addr(key->src) ||
+			    is_multicast_ether_addr(key->src)) {
+				/* set the mask if a valid dst_mac address */
+				for (i = 0; i < ETH_ALEN; i++)
+					vf->mask.tcp_spec.src_mac[i] |= 0xff;
+				ether_addr_copy(vf->data.tcp_spec.src_mac,
+						key->src);
+		}
+	}
+
+	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
+		struct flow_dissector_key_vlan *key =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_VLAN,
+						  f->key);
+		struct flow_dissector_key_vlan *mask =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_VLAN,
+						  f->mask);
+
+		if (mask->vlan_id) {
+			if (mask->vlan_id == VLAN_VID_MASK) {
+				field_flags |= I40EVF_CLOUD_FIELD_IVLAN;
+			} else {
+				dev_err(&adapter->pdev->dev, "Bad vlan mask %u\n",
+					mask->vlan_id);
+				return I40E_ERR_CONFIG;
+			}
+		}
+		vf->mask.tcp_spec.vlan_id |= cpu_to_be16(0xffff);
+		vf->data.tcp_spec.vlan_id = cpu_to_be16(key->vlan_id);
+	}
+
+	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
+		struct flow_dissector_key_control *key =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_CONTROL,
+						  f->key);
+
+		addr_type = key->addr_type;
+	}
+
+	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+		struct flow_dissector_key_ipv4_addrs *key =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+						  f->key);
+		struct flow_dissector_key_ipv4_addrs *mask =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+						  f->mask);
+
+		if (mask->dst) {
+			if (mask->dst == cpu_to_be32(0xffffffff)) {
+				field_flags |= I40EVF_CLOUD_FIELD_IIP;
+			} else {
+				dev_err(&adapter->pdev->dev, "Bad ip dst mask 0x%08x\n",
+					be32_to_cpu(mask->dst));
+				return I40E_ERR_CONFIG;
+			}
+		}
+
+		if (mask->src) {
+			if (mask->src == cpu_to_be32(0xffffffff)) {
+				field_flags |= I40EVF_CLOUD_FIELD_IIP;
+			} else {
+				dev_err(&adapter->pdev->dev, "Bad ip src mask 0x%08x\n",
+					be32_to_cpu(mask->dst));
+				return I40E_ERR_CONFIG;
+			}
+		}
+
+		if (field_flags & I40EVF_CLOUD_FIELD_TEN_ID) {
+			dev_info(&adapter->pdev->dev, "Tenant id not allowed for ip filter\n");
+			return I40E_ERR_CONFIG;
+		}
+		if (key->dst) {
+			vf->mask.tcp_spec.dst_ip[0] |= cpu_to_be32(0xffffffff);
+			vf->data.tcp_spec.dst_ip[0] = key->dst;
+		}
+		if (key->src) {
+			vf->mask.tcp_spec.src_ip[0] |= cpu_to_be32(0xffffffff);
+			vf->data.tcp_spec.src_ip[0] = key->src;
+		}
+	}
+
+	if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+		struct flow_dissector_key_ipv6_addrs *key =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+						  f->key);
+		struct flow_dissector_key_ipv6_addrs *mask =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+						  f->mask);
+
+		/* validate mask, make sure it is not IPV6_ADDR_ANY */
+		if (ipv6_addr_any(&mask->dst)) {
+			dev_err(&adapter->pdev->dev, "Bad ipv6 dst mask 0x%02x\n",
+				IPV6_ADDR_ANY);
+			return I40E_ERR_CONFIG;
+		}
+
+		/* src and dest IPv6 address should not be LOOPBACK
+		 * (0:0:0:0:0:0:0:1) which can be represented as ::1
+		 */
+		if (ipv6_addr_loopback(&key->dst) ||
+		    ipv6_addr_loopback(&key->src)) {
+			dev_err(&adapter->pdev->dev,
+				"ipv6 addr should not be loopback\n");
+			return I40E_ERR_CONFIG;
+		}
+		if (!ipv6_addr_any(&mask->dst) || !ipv6_addr_any(&mask->src))
+			field_flags |= I40EVF_CLOUD_FIELD_IIP;
+
+		for (i = 0; i < 4; i++)
+			vf->mask.tcp_spec.dst_ip[i] |= cpu_to_be32(0xffffffff);
+		memcpy(&vf->data.tcp_spec.dst_ip, &key->dst.s6_addr32,
+		       sizeof(vf->data.tcp_spec.dst_ip));
+		for (i = 0; i < 4; i++)
+			vf->mask.tcp_spec.src_ip[i] |= cpu_to_be32(0xffffffff);
+		memcpy(&vf->data.tcp_spec.src_ip, &key->src.s6_addr32,
+		       sizeof(vf->data.tcp_spec.src_ip));
+	}
+	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
+		struct flow_dissector_key_ports *key =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_PORTS,
+						  f->key);
+		struct flow_dissector_key_ports *mask =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_PORTS,
+						  f->mask);
+
+		if (mask->src) {
+			if (mask->src == cpu_to_be16(0xffff)) {
+				field_flags |= I40EVF_CLOUD_FIELD_IIP;
+			} else {
+				dev_err(&adapter->pdev->dev, "Bad src port mask %u\n",
+					be16_to_cpu(mask->src));
+				return I40E_ERR_CONFIG;
+			}
+		}
+
+		if (mask->dst) {
+			if (mask->dst == cpu_to_be16(0xffff)) {
+				field_flags |= I40EVF_CLOUD_FIELD_IIP;
+			} else {
+				dev_err(&adapter->pdev->dev, "Bad dst port mask %u\n",
+					be16_to_cpu(mask->dst));
+				return I40E_ERR_CONFIG;
+			}
+		}
+		if (key->dst) {
+			vf->mask.tcp_spec.dst_port |= cpu_to_be16(0xffff);
+			vf->data.tcp_spec.dst_port = key->dst;
+		}
+
+		if (key->src) {
+			vf->mask.tcp_spec.src_port |= cpu_to_be16(0xffff);
+			vf->data.tcp_spec.src_port = key->src;
+		}
+	}
+	vf->field_flags = field_flags;
+
+	return 0;
+}
+
+/**
+ * i40evf_handle_tclass - Forward to a traffic class on the device
+ * @adapter: board private structure
+ * @tc: traffic class index on the device
+ * @filter: pointer to cloud filter structure
+ */
+static int i40evf_handle_tclass(struct i40evf_adapter *adapter, u32 tc,
+				struct i40evf_cloud_filter *filter)
+{
+	if (tc == 0)
+		return 0;
+	if (tc < adapter->num_tc) {
+		if (!filter->f.data.tcp_spec.dst_port) {
+			dev_err(&adapter->pdev->dev,
+				"Specify destination port to redirect to traffic class other than TC0\n");
+			return -EINVAL;
+		}
+	}
+	/* redirect to a traffic class on the same device */
+	filter->f.action = VIRTCHNL_ACTION_TC_REDIRECT;
+	filter->f.action_meta = tc;
+	return 0;
+}
+
+/**
+ * i40evf_configure_clsflower - Add tc flower filters
+ * @adapter: board private structure
+ * @cls_flower: Pointer to struct tc_cls_flower_offload
+ */
+static int i40evf_configure_clsflower(struct i40evf_adapter *adapter,
+				      struct tc_cls_flower_offload *cls_flower)
+{
+	int tc = tc_classid_to_hwtc(adapter->netdev, cls_flower->classid);
+	struct i40evf_cloud_filter *filter = NULL;
+	int err = -EINVAL, count = 50;
+
+	if (tc < 0) {
+		dev_err(&adapter->pdev->dev, "Invalid traffic class\n");
+		return -EINVAL;
+	}
+
+	filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+	if (!filter)
+		return -ENOMEM;
+
+	while (test_and_set_bit(__I40EVF_IN_CRITICAL_TASK,
+				&adapter->crit_section)) {
+		if (--count == 0)
+			goto err;
+		udelay(1);
+	}
+
+	filter->cookie = cls_flower->cookie;
+
+	/* set the mask to all zeroes to begin with */
+	memset(&filter->f.mask.tcp_spec, 0, sizeof(struct virtchnl_l4_spec));
+	/* start out with flow type and eth type IPv4 to begin with */
+	filter->f.flow_type = VIRTCHNL_TCP_V4_FLOW;
+	err = i40evf_parse_cls_flower(adapter, cls_flower, filter);
+	if (err < 0)
+		goto err;
+
+	err = i40evf_handle_tclass(adapter, tc, filter);
+	if (err < 0)
+		goto err;
+
+	/* add filter to the list */
+	spin_lock_bh(&adapter->cloud_filter_list_lock);
+	list_add_tail(&filter->list, &adapter->cloud_filter_list);
+	adapter->num_cloud_filters++;
+	filter->add = true;
+	adapter->aq_required |= I40EVF_FLAG_AQ_ADD_CLOUD_FILTER;
+	spin_unlock_bh(&adapter->cloud_filter_list_lock);
+err:
+	if (err)
+		kfree(filter);
+
+	clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
+	return err;
+}
+
+/* i40evf_find_cf - Find the cloud filter in the list
+ * @adapter: Board private structure
+ * @cookie: filter specific cookie
+ *
+ * Returns ptr to the filter object or NULL. Must be called while holding the
+ * cloud_filter_list_lock.
+ */
+static struct i40evf_cloud_filter *i40evf_find_cf(struct i40evf_adapter *adapter,
+						  unsigned long *cookie)
+{
+	struct i40evf_cloud_filter *filter = NULL;
+
+	if (!cookie)
+		return NULL;
+
+	list_for_each_entry(filter, &adapter->cloud_filter_list, list) {
+		if (!memcmp(cookie, &filter->cookie, sizeof(filter->cookie)))
+			return filter;
+	}
+	return NULL;
+}
+
+/**
+ * i40evf_delete_clsflower - Remove tc flower filters
+ * @adapter: board private structure
+ * @cls_flower: Pointer to struct tc_cls_flower_offload
+ */
+static int i40evf_delete_clsflower(struct i40evf_adapter *adapter,
+				   struct tc_cls_flower_offload *cls_flower)
+{
+	struct i40evf_cloud_filter *filter = NULL;
+	int err = 0;
+
+	spin_lock_bh(&adapter->cloud_filter_list_lock);
+	filter = i40evf_find_cf(adapter, &cls_flower->cookie);
+	if (filter) {
+		filter->del = true;
+		adapter->aq_required |= I40EVF_FLAG_AQ_DEL_CLOUD_FILTER;
+	} else {
+		err = -EINVAL;
+	}
+	spin_unlock_bh(&adapter->cloud_filter_list_lock);
+
+	return err;
+}
+
+/**
+ * i40evf_setup_tc_cls_flower - flower classifier offloads
+ * @netdev: net device to configure
+ * @type_data: offload data
+ */
+static int i40evf_setup_tc_cls_flower(struct i40evf_adapter *adapter,
+				      struct tc_cls_flower_offload *cls_flower)
+{
+	if (cls_flower->common.chain_index)
+		return -EOPNOTSUPP;
+
+	switch (cls_flower->command) {
+	case TC_CLSFLOWER_REPLACE:
+		return i40evf_configure_clsflower(adapter, cls_flower);
+	case TC_CLSFLOWER_DESTROY:
+		return i40evf_delete_clsflower(adapter, cls_flower);
+	case TC_CLSFLOWER_STATS:
+		return -EOPNOTSUPP;
+	default:
+		return -EINVAL;
+	}
+}
+
+/**
+ * i40evf_setup_tc_block_cb - block callback for tc
+ * @type: type of offload
+ * @type_data: offload data
+ * @cb_priv:
+ *
+ * This function is the block callback for traffic classes
+ **/
+static int i40evf_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+				    void *cb_priv)
+{
+	switch (type) {
+	case TC_SETUP_CLSFLOWER:
+		return i40evf_setup_tc_cls_flower(cb_priv, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+/**
+ * i40evf_setup_tc_block - register callbacks for tc
+ * @netdev: network interface device structure
+ * @f: tc offload data
+ *
+ * This function registers block callbacks for tc
+ * offloads
+ **/
+static int i40evf_setup_tc_block(struct net_device *dev,
+				 struct tc_block_offload *f)
+{
+	struct i40evf_adapter *adapter = netdev_priv(dev);
+
+	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+		return -EOPNOTSUPP;
+
+	switch (f->command) {
+	case TC_BLOCK_BIND:
+		return tcf_block_cb_register(f->block, i40evf_setup_tc_block_cb,
+					     adapter, adapter);
+	case TC_BLOCK_UNBIND:
+		tcf_block_cb_unregister(f->block, i40evf_setup_tc_block_cb,
+					adapter);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+/**
+ * i40evf_setup_tc - configure multiple traffic classes
+ * @netdev: network interface device structure
+ * @type: type of offload
+ * @type_date: tc offload data
+ *
+ * This function is the callback to ndo_setup_tc in the
+ * netdev_ops.
+ *
+ * Returns 0 on success
+ **/
+static int i40evf_setup_tc(struct net_device *netdev, enum tc_setup_type type,
+			   void *type_data)
+{
+	switch (type) {
+	case TC_SETUP_QDISC_MQPRIO:
+		return __i40evf_setup_tc(netdev, type_data);
+	case TC_SETUP_BLOCK:
+		return i40evf_setup_tc_block(netdev, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+/**
  * i40evf_open - Called when a network interface is made active
  * @netdev: network interface device structure
  *
@@ -2236,7 +3025,12 @@ static int i40evf_open(struct net_device *netdev)
 	if (err)
 		goto err_req_irq;
 
+	spin_lock_bh(&adapter->mac_vlan_list_lock);
+
 	i40evf_add_filter(adapter, adapter->hw.mac.addr);
+
+	spin_unlock_bh(&adapter->mac_vlan_list_lock);
+
 	i40evf_configure(adapter);
 
 	i40evf_up_complete(adapter);
@@ -2457,6 +3251,7 @@ static const struct net_device_ops i40evf_netdev_ops = {
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= i40evf_netpoll,
 #endif
+	.ndo_setup_tc		= i40evf_setup_tc,
 };
 
 /**
@@ -2571,6 +3366,9 @@ int i40evf_process_config(struct i40evf_adapter *adapter)
 	if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN)
 		hw_features |= (NETIF_F_HW_VLAN_CTAG_TX |
 				NETIF_F_HW_VLAN_CTAG_RX);
+	/* Enable cloud filter if ADQ is supported */
+	if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ)
+		hw_features |= NETIF_F_HW_TC;
 
 	netdev->hw_features |= hw_features;
 
@@ -2938,9 +3736,11 @@ static int i40evf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	mutex_init(&hw->aq.arq_mutex);
 
 	spin_lock_init(&adapter->mac_vlan_list_lock);
+	spin_lock_init(&adapter->cloud_filter_list_lock);
 
 	INIT_LIST_HEAD(&adapter->mac_filter_list);
 	INIT_LIST_HEAD(&adapter->vlan_filter_list);
+	INIT_LIST_HEAD(&adapter->cloud_filter_list);
 
 	INIT_WORK(&adapter->reset_task, i40evf_reset_task);
 	INIT_WORK(&adapter->adminq_task, i40evf_adminq_task);
@@ -3065,7 +3865,9 @@ static void i40evf_remove(struct pci_dev *pdev)
 {
 	struct net_device *netdev = pci_get_drvdata(pdev);
 	struct i40evf_adapter *adapter = netdev_priv(netdev);
+	struct i40evf_vlan_filter *vlf, *vlftmp;
 	struct i40evf_mac_filter *f, *ftmp;
+	struct i40evf_cloud_filter *cf, *cftmp;
 	struct i40e_hw *hw = &adapter->hw;
 	int err;
 	/* Indicate we are in remove and not to run reset_task */
@@ -3087,6 +3889,7 @@ static void i40evf_remove(struct pci_dev *pdev)
 	/* Shut down all the garbage mashers on the detention level */
 	adapter->state = __I40EVF_REMOVE;
 	adapter->aq_required = 0;
+	adapter->flags &= ~I40EVF_FLAG_REINIT_ITR_NEEDED;
 	i40evf_request_reset(adapter);
 	msleep(50);
 	/* If the FW isn't responding, kick it once, but only once. */
@@ -3127,13 +3930,21 @@ static void i40evf_remove(struct pci_dev *pdev)
 		list_del(&f->list);
 		kfree(f);
 	}
-	list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) {
-		list_del(&f->list);
-		kfree(f);
+	list_for_each_entry_safe(vlf, vlftmp, &adapter->vlan_filter_list,
+				 list) {
+		list_del(&vlf->list);
+		kfree(vlf);
 	}
 
 	spin_unlock_bh(&adapter->mac_vlan_list_lock);
 
+	spin_lock_bh(&adapter->cloud_filter_list_lock);
+	list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, list) {
+		list_del(&cf->list);
+		kfree(cf);
+	}
+	spin_unlock_bh(&adapter->cloud_filter_list_lock);
+
 	free_netdev(netdev);
 
 	pci_disable_pcie_error_reporting(pdev);
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
index 50ce0d6c09ef..3c76c817ca1a 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
@@ -161,7 +161,8 @@ int i40evf_send_vf_config_msg(struct i40evf_adapter *adapter)
 	       VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 |
 	       VIRTCHNL_VF_OFFLOAD_ENCAP |
 	       VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM |
-	       VIRTCHNL_VF_OFFLOAD_REQ_QUEUES;
+	       VIRTCHNL_VF_OFFLOAD_REQ_QUEUES |
+	       VIRTCHNL_VF_OFFLOAD_ADQ;
 
 	adapter->current_op = VIRTCHNL_OP_GET_VF_RESOURCES;
 	adapter->aq_required &= ~I40EVF_FLAG_AQ_GET_CONFIG;
@@ -344,6 +345,7 @@ void i40evf_disable_queues(struct i40evf_adapter *adapter)
 void i40evf_map_queues(struct i40evf_adapter *adapter)
 {
 	struct virtchnl_irq_map_info *vimi;
+	struct virtchnl_vector_map *vecmap;
 	int v_idx, q_vectors, len;
 	struct i40e_q_vector *q_vector;
 
@@ -367,17 +369,22 @@ void i40evf_map_queues(struct i40evf_adapter *adapter)
 	vimi->num_vectors = adapter->num_msix_vectors;
 	/* Queue vectors first */
 	for (v_idx = 0; v_idx < q_vectors; v_idx++) {
-		q_vector = adapter->q_vectors + v_idx;
-		vimi->vecmap[v_idx].vsi_id = adapter->vsi_res->vsi_id;
-		vimi->vecmap[v_idx].vector_id = v_idx + NONQ_VECS;
-		vimi->vecmap[v_idx].txq_map = q_vector->ring_mask;
-		vimi->vecmap[v_idx].rxq_map = q_vector->ring_mask;
+		q_vector = &adapter->q_vectors[v_idx];
+		vecmap = &vimi->vecmap[v_idx];
+
+		vecmap->vsi_id = adapter->vsi_res->vsi_id;
+		vecmap->vector_id = v_idx + NONQ_VECS;
+		vecmap->txq_map = q_vector->ring_mask;
+		vecmap->rxq_map = q_vector->ring_mask;
+		vecmap->rxitr_idx = I40E_RX_ITR;
+		vecmap->txitr_idx = I40E_TX_ITR;
 	}
 	/* Misc vector last - this is only for AdminQ messages */
-	vimi->vecmap[v_idx].vsi_id = adapter->vsi_res->vsi_id;
-	vimi->vecmap[v_idx].vector_id = 0;
-	vimi->vecmap[v_idx].txq_map = 0;
-	vimi->vecmap[v_idx].rxq_map = 0;
+	vecmap = &vimi->vecmap[v_idx];
+	vecmap->vsi_id = adapter->vsi_res->vsi_id;
+	vecmap->vector_id = 0;
+	vecmap->txq_map = 0;
+	vecmap->rxq_map = 0;
 
 	adapter->aq_required &= ~I40EVF_FLAG_AQ_MAP_VECTORS;
 	i40evf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_IRQ_MAP,
@@ -459,7 +466,7 @@ void i40evf_add_ether_addrs(struct i40evf_adapter *adapter)
 		more = true;
 	}
 
-	veal = kzalloc(len, GFP_KERNEL);
+	veal = kzalloc(len, GFP_ATOMIC);
 	if (!veal) {
 		spin_unlock_bh(&adapter->mac_vlan_list_lock);
 		return;
@@ -532,7 +539,7 @@ void i40evf_del_ether_addrs(struct i40evf_adapter *adapter)
 		      (count * sizeof(struct virtchnl_ether_addr));
 		more = true;
 	}
-	veal = kzalloc(len, GFP_KERNEL);
+	veal = kzalloc(len, GFP_ATOMIC);
 	if (!veal) {
 		spin_unlock_bh(&adapter->mac_vlan_list_lock);
 		return;
@@ -606,7 +613,7 @@ void i40evf_add_vlans(struct i40evf_adapter *adapter)
 		      (count * sizeof(u16));
 		more = true;
 	}
-	vvfl = kzalloc(len, GFP_KERNEL);
+	vvfl = kzalloc(len, GFP_ATOMIC);
 	if (!vvfl) {
 		spin_unlock_bh(&adapter->mac_vlan_list_lock);
 		return;
@@ -678,7 +685,7 @@ void i40evf_del_vlans(struct i40evf_adapter *adapter)
 		      (count * sizeof(u16));
 		more = true;
 	}
-	vvfl = kzalloc(len, GFP_KERNEL);
+	vvfl = kzalloc(len, GFP_ATOMIC);
 	if (!vvfl) {
 		spin_unlock_bh(&adapter->mac_vlan_list_lock);
 		return;
@@ -967,6 +974,205 @@ static void i40evf_print_link_message(struct i40evf_adapter *adapter)
 }
 
 /**
+ * i40evf_enable_channel
+ * @adapter: adapter structure
+ *
+ * Request that the PF enable channels as specified by
+ * the user via tc tool.
+ **/
+void i40evf_enable_channels(struct i40evf_adapter *adapter)
+{
+	struct virtchnl_tc_info *vti = NULL;
+	u16 len;
+	int i;
+
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot configure mqprio, command %d pending\n",
+			adapter->current_op);
+		return;
+	}
+
+	len = (adapter->num_tc * sizeof(struct virtchnl_channel_info)) +
+	       sizeof(struct virtchnl_tc_info);
+
+	vti = kzalloc(len, GFP_KERNEL);
+	if (!vti)
+		return;
+	vti->num_tc = adapter->num_tc;
+	for (i = 0; i < vti->num_tc; i++) {
+		vti->list[i].count = adapter->ch_config.ch_info[i].count;
+		vti->list[i].offset = adapter->ch_config.ch_info[i].offset;
+		vti->list[i].pad = 0;
+		vti->list[i].max_tx_rate =
+				adapter->ch_config.ch_info[i].max_tx_rate;
+	}
+
+	adapter->ch_config.state = __I40EVF_TC_RUNNING;
+	adapter->flags |= I40EVF_FLAG_REINIT_ITR_NEEDED;
+	adapter->current_op = VIRTCHNL_OP_ENABLE_CHANNELS;
+	adapter->aq_required &= ~I40EVF_FLAG_AQ_ENABLE_CHANNELS;
+	i40evf_send_pf_msg(adapter, VIRTCHNL_OP_ENABLE_CHANNELS,
+			   (u8 *)vti, len);
+	kfree(vti);
+}
+
+/**
+ * i40evf_disable_channel
+ * @adapter: adapter structure
+ *
+ * Request that the PF disable channels that are configured
+ **/
+void i40evf_disable_channels(struct i40evf_adapter *adapter)
+{
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot configure mqprio, command %d pending\n",
+			adapter->current_op);
+		return;
+	}
+
+	adapter->ch_config.state = __I40EVF_TC_INVALID;
+	adapter->flags |= I40EVF_FLAG_REINIT_ITR_NEEDED;
+	adapter->current_op = VIRTCHNL_OP_DISABLE_CHANNELS;
+	adapter->aq_required &= ~I40EVF_FLAG_AQ_DISABLE_CHANNELS;
+	i40evf_send_pf_msg(adapter, VIRTCHNL_OP_DISABLE_CHANNELS,
+			   NULL, 0);
+}
+
+/**
+ * i40evf_print_cloud_filter
+ * @adapter: adapter structure
+ * @f: cloud filter to print
+ *
+ * Print the cloud filter
+ **/
+static void i40evf_print_cloud_filter(struct i40evf_adapter *adapter,
+				      struct virtchnl_filter *f)
+{
+	switch (f->flow_type) {
+	case VIRTCHNL_TCP_V4_FLOW:
+		dev_info(&adapter->pdev->dev, "dst_mac: %pM src_mac: %pM vlan_id: %hu dst_ip: %pI4 src_ip %pI4 dst_port %hu src_port %hu\n",
+			 &f->data.tcp_spec.dst_mac,
+			 &f->data.tcp_spec.src_mac,
+			 ntohs(f->data.tcp_spec.vlan_id),
+			 &f->data.tcp_spec.dst_ip[0],
+			 &f->data.tcp_spec.src_ip[0],
+			 ntohs(f->data.tcp_spec.dst_port),
+			 ntohs(f->data.tcp_spec.src_port));
+		break;
+	case VIRTCHNL_TCP_V6_FLOW:
+		dev_info(&adapter->pdev->dev, "dst_mac: %pM src_mac: %pM vlan_id: %hu dst_ip: %pI6 src_ip %pI6 dst_port %hu src_port %hu\n",
+			 &f->data.tcp_spec.dst_mac,
+			 &f->data.tcp_spec.src_mac,
+			 ntohs(f->data.tcp_spec.vlan_id),
+			 &f->data.tcp_spec.dst_ip,
+			 &f->data.tcp_spec.src_ip,
+			 ntohs(f->data.tcp_spec.dst_port),
+			 ntohs(f->data.tcp_spec.src_port));
+		break;
+	}
+}
+
+/**
+ * i40evf_add_cloud_filter
+ * @adapter: adapter structure
+ *
+ * Request that the PF add cloud filters as specified
+ * by the user via tc tool.
+ **/
+void i40evf_add_cloud_filter(struct i40evf_adapter *adapter)
+{
+	struct i40evf_cloud_filter *cf;
+	struct virtchnl_filter *f;
+	int len = 0, count = 0;
+
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot add cloud filter, command %d pending\n",
+			adapter->current_op);
+		return;
+	}
+	list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
+		if (cf->add) {
+			count++;
+			break;
+		}
+	}
+	if (!count) {
+		adapter->aq_required &= ~I40EVF_FLAG_AQ_ADD_CLOUD_FILTER;
+		return;
+	}
+	adapter->current_op = VIRTCHNL_OP_ADD_CLOUD_FILTER;
+
+	len = sizeof(struct virtchnl_filter);
+	f = kzalloc(len, GFP_KERNEL);
+	if (!f)
+		return;
+
+	list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
+		if (cf->add) {
+			memcpy(f, &cf->f, sizeof(struct virtchnl_filter));
+			cf->add = false;
+			cf->state = __I40EVF_CF_ADD_PENDING;
+			i40evf_send_pf_msg(adapter,
+					   VIRTCHNL_OP_ADD_CLOUD_FILTER,
+					   (u8 *)f, len);
+		}
+	}
+	kfree(f);
+}
+
+/**
+ * i40evf_del_cloud_filter
+ * @adapter: adapter structure
+ *
+ * Request that the PF delete cloud filters as specified
+ * by the user via tc tool.
+ **/
+void i40evf_del_cloud_filter(struct i40evf_adapter *adapter)
+{
+	struct i40evf_cloud_filter *cf, *cftmp;
+	struct virtchnl_filter *f;
+	int len = 0, count = 0;
+
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot remove cloud filter, command %d pending\n",
+			adapter->current_op);
+		return;
+	}
+	list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
+		if (cf->del) {
+			count++;
+			break;
+		}
+	}
+	if (!count) {
+		adapter->aq_required &= ~I40EVF_FLAG_AQ_DEL_CLOUD_FILTER;
+		return;
+	}
+	adapter->current_op = VIRTCHNL_OP_DEL_CLOUD_FILTER;
+
+	len = sizeof(struct virtchnl_filter);
+	f = kzalloc(len, GFP_KERNEL);
+	if (!f)
+		return;
+
+	list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, list) {
+		if (cf->del) {
+			memcpy(f, &cf->f, sizeof(struct virtchnl_filter));
+			cf->del = false;
+			cf->state = __I40EVF_CF_DEL_PENDING;
+			i40evf_send_pf_msg(adapter,
+					   VIRTCHNL_OP_DEL_CLOUD_FILTER,
+					   (u8 *)f, len);
+		}
+	}
+	kfree(f);
+}
+
+/**
  * i40evf_request_reset
  * @adapter: adapter structure
  *
@@ -1011,14 +1217,25 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
 			if (adapter->link_up == link_up)
 				break;
 
-			/* If we get link up message and start queues before
-			 * our queues are configured it will trigger a TX hang.
-			 * In that case, just ignore the link status message,
-			 * we'll get another one after we enable queues and
-			 * actually prepared to send traffic.
-			 */
-			if (link_up && adapter->state != __I40EVF_RUNNING)
-				break;
+			if (link_up) {
+				/* If we get link up message and start queues
+				 * before our queues are configured it will
+				 * trigger a TX hang. In that case, just ignore
+				 * the link status message,we'll get another one
+				 * after we enable queues and actually prepared
+				 * to send traffic.
+				 */
+				if (adapter->state != __I40EVF_RUNNING)
+					break;
+
+				/* For ADq enabled VF, we reconfigure VSIs and
+				 * re-allocate queues. Hence wait till all
+				 * queues are enabled.
+				 */
+				if (adapter->flags &
+				    I40EVF_FLAG_QUEUES_DISABLED)
+					break;
+			}
 
 			adapter->link_up = link_up;
 			if (link_up) {
@@ -1031,7 +1248,7 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
 			i40evf_print_link_message(adapter);
 			break;
 		case VIRTCHNL_EVENT_RESET_IMPENDING:
-			dev_info(&adapter->pdev->dev, "PF reset warning received\n");
+			dev_info(&adapter->pdev->dev, "Reset warning received from the PF\n");
 			if (!(adapter->flags & I40EVF_FLAG_RESET_PENDING)) {
 				adapter->flags |= I40EVF_FLAG_RESET_PENDING;
 				dev_info(&adapter->pdev->dev, "Scheduling reset task\n");
@@ -1063,6 +1280,57 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
 			dev_err(&adapter->pdev->dev, "Failed to delete MAC filter, error %s\n",
 				i40evf_stat_str(&adapter->hw, v_retval));
 			break;
+		case VIRTCHNL_OP_ENABLE_CHANNELS:
+			dev_err(&adapter->pdev->dev, "Failed to configure queue channels, error %s\n",
+				i40evf_stat_str(&adapter->hw, v_retval));
+			adapter->flags &= ~I40EVF_FLAG_REINIT_ITR_NEEDED;
+			adapter->ch_config.state = __I40EVF_TC_INVALID;
+			netdev_reset_tc(netdev);
+			netif_tx_start_all_queues(netdev);
+			break;
+		case VIRTCHNL_OP_DISABLE_CHANNELS:
+			dev_err(&adapter->pdev->dev, "Failed to disable queue channels, error %s\n",
+				i40evf_stat_str(&adapter->hw, v_retval));
+			adapter->flags &= ~I40EVF_FLAG_REINIT_ITR_NEEDED;
+			adapter->ch_config.state = __I40EVF_TC_RUNNING;
+			netif_tx_start_all_queues(netdev);
+			break;
+		case VIRTCHNL_OP_ADD_CLOUD_FILTER: {
+			struct i40evf_cloud_filter *cf, *cftmp;
+
+			list_for_each_entry_safe(cf, cftmp,
+						 &adapter->cloud_filter_list,
+						 list) {
+				if (cf->state == __I40EVF_CF_ADD_PENDING) {
+					cf->state = __I40EVF_CF_INVALID;
+					dev_info(&adapter->pdev->dev, "Failed to add cloud filter, error %s\n",
+						 i40evf_stat_str(&adapter->hw,
+								 v_retval));
+					i40evf_print_cloud_filter(adapter,
+								  &cf->f);
+					list_del(&cf->list);
+					kfree(cf);
+					adapter->num_cloud_filters--;
+				}
+			}
+			}
+			break;
+		case VIRTCHNL_OP_DEL_CLOUD_FILTER: {
+			struct i40evf_cloud_filter *cf;
+
+			list_for_each_entry(cf, &adapter->cloud_filter_list,
+					    list) {
+				if (cf->state == __I40EVF_CF_DEL_PENDING) {
+					cf->state = __I40EVF_CF_ACTIVE;
+					dev_info(&adapter->pdev->dev, "Failed to del cloud filter, error %s\n",
+						 i40evf_stat_str(&adapter->hw,
+								 v_retval));
+					i40evf_print_cloud_filter(adapter,
+								  &cf->f);
+				}
+			}
+			}
+			break;
 		default:
 			dev_err(&adapter->pdev->dev, "PF returned error %d (%s) to our request %d\n",
 				v_retval,
@@ -1102,6 +1370,7 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
 	case VIRTCHNL_OP_ENABLE_QUEUES:
 		/* enable transmits */
 		i40evf_irq_enable(adapter, true);
+		adapter->flags &= ~I40EVF_FLAG_QUEUES_DISABLED;
 		break;
 	case VIRTCHNL_OP_DISABLE_QUEUES:
 		i40evf_free_all_tx_resources(adapter);
@@ -1156,6 +1425,29 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
 		}
 		}
 		break;
+	case VIRTCHNL_OP_ADD_CLOUD_FILTER: {
+		struct i40evf_cloud_filter *cf;
+
+		list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
+			if (cf->state == __I40EVF_CF_ADD_PENDING)
+				cf->state = __I40EVF_CF_ACTIVE;
+		}
+		}
+		break;
+	case VIRTCHNL_OP_DEL_CLOUD_FILTER: {
+		struct i40evf_cloud_filter *cf, *cftmp;
+
+		list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list,
+					 list) {
+			if (cf->state == __I40EVF_CF_DEL_PENDING) {
+				cf->state = __I40EVF_CF_INVALID;
+				list_del(&cf->list);
+				kfree(cf);
+				adapter->num_cloud_filters--;
+			}
+		}
+		}
+		break;
 	default:
 		if (adapter->current_op && (v_opcode != adapter->current_op))
 			dev_warn(&adapter->pdev->dev, "Expected response %d from PF, received %d\n",
diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index 1c6b8d9176a8..55d6f17d5799 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -109,6 +109,7 @@ struct vf_data_storage {
 	u16 pf_qos;
 	u16 tx_rate;
 	bool spoofchk_enabled;
+	bool trusted;
 };
 
 /* Number of unicast MAC filters reserved for the PF in the RAR registers */
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index b88fae785369..715bb32e6901 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -190,6 +190,8 @@ static int igb_ndo_set_vf_vlan(struct net_device *netdev,
 static int igb_ndo_set_vf_bw(struct net_device *, int, int, int);
 static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf,
 				   bool setting);
+static int igb_ndo_set_vf_trust(struct net_device *netdev, int vf,
+				bool setting);
 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
 				 struct ifla_vf_info *ivi);
 static void igb_check_vf_rate_limit(struct igb_adapter *);
@@ -774,8 +776,7 @@ u32 igb_rd32(struct e1000_hw *hw, u32 reg)
 	if (!(~value) && (!reg || !(~readl(hw_addr)))) {
 		struct net_device *netdev = igb->netdev;
 		hw->hw_addr = NULL;
-		netif_device_detach(netdev);
-		netdev_err(netdev, "PCIe link lost, device now detached\n");
+		netdev_err(netdev, "PCIe link lost\n");
 	}
 
 	return value;
@@ -2527,6 +2528,7 @@ static const struct net_device_ops igb_netdev_ops = {
 	.ndo_set_vf_vlan	= igb_ndo_set_vf_vlan,
 	.ndo_set_vf_rate	= igb_ndo_set_vf_bw,
 	.ndo_set_vf_spoofchk	= igb_ndo_set_vf_spoofchk,
+	.ndo_set_vf_trust	= igb_ndo_set_vf_trust,
 	.ndo_get_vf_config	= igb_ndo_get_vf_config,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= igb_netpoll,
@@ -5747,7 +5749,7 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
 	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
 		struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
 
-		if (adapter->tstamp_config.tx_type & HWTSTAMP_TX_ON &&
+		if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON &&
 		    !test_and_set_bit_lock(__IGB_PTP_TX_IN_PROGRESS,
 					   &adapter->state)) {
 			skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
@@ -6383,6 +6385,9 @@ static int igb_vf_configure(struct igb_adapter *adapter, int vf)
 	/* By default spoof check is enabled for all VFs */
 	adapter->vf_data[vf].spoofchk_enabled = true;
 
+	/* By default VFs are not trusted */
+	adapter->vf_data[vf].trusted = false;
+
 	return 0;
 }
 
@@ -6940,13 +6945,13 @@ static int igb_set_vf_mac_filter(struct igb_adapter *adapter, const int vf,
 		}
 		break;
 	case E1000_VF_MAC_FILTER_ADD:
-		if (vf_data->flags & IGB_VF_FLAG_PF_SET_MAC) {
+		if ((vf_data->flags & IGB_VF_FLAG_PF_SET_MAC) &&
+		    !vf_data->trusted) {
 			dev_warn(&pdev->dev,
 				 "VF %d requested MAC filter but is administratively denied\n",
 				 vf);
 			return -EINVAL;
 		}
-
 		if (!is_valid_ether_addr(addr)) {
 			dev_warn(&pdev->dev,
 				 "VF %d attempted to set invalid MAC filter\n",
@@ -6998,7 +7003,8 @@ static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
 	int ret = 0;
 
 	if (!info) {
-		if (vf_data->flags & IGB_VF_FLAG_PF_SET_MAC) {
+		if ((vf_data->flags & IGB_VF_FLAG_PF_SET_MAC) &&
+		    !vf_data->trusted) {
 			dev_warn(&pdev->dev,
 				 "VF %d attempted to override administratively set MAC address\nReload the VF driver to resume operations\n",
 				 vf);
@@ -8934,6 +8940,22 @@ static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf,
 	return 0;
 }
 
+static int igb_ndo_set_vf_trust(struct net_device *netdev, int vf, bool setting)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+
+	if (vf >= adapter->vfs_allocated_count)
+		return -EINVAL;
+	if (adapter->vf_data[vf].trusted == setting)
+		return 0;
+
+	adapter->vf_data[vf].trusted = setting;
+
+	dev_info(&adapter->pdev->dev, "VF %u is %strusted\n",
+		 vf, setting ? "" : "not ");
+	return 0;
+}
+
 static int igb_ndo_get_vf_config(struct net_device *netdev,
 				 int vf, struct ifla_vf_info *ivi)
 {
@@ -8947,6 +8969,7 @@ static int igb_ndo_get_vf_config(struct net_device *netdev,
 	ivi->vlan = adapter->vf_data[vf].pf_vlan;
 	ivi->qos = adapter->vf_data[vf].pf_qos;
 	ivi->spoofchk = adapter->vf_data[vf].spoofchk_enabled;
+	ivi->trusted = adapter->vf_data[vf].trusted;
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index 221f15803480..c0e6ab42e0e1 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -97,6 +97,7 @@ static const struct ixgbe_stats ixgbe_gstrings_stats[] = {
 	{"tx_heartbeat_errors", IXGBE_NETDEV_STAT(tx_heartbeat_errors)},
 	{"tx_timeout_count", IXGBE_STAT(tx_timeout_count)},
 	{"tx_restart_queue", IXGBE_STAT(restart_queue)},
+	{"rx_length_errors", IXGBE_STAT(stats.rlec)},
 	{"rx_long_length_errors", IXGBE_STAT(stats.roc)},
 	{"rx_short_length_errors", IXGBE_STAT(stats.ruc)},
 	{"tx_flow_control_xon", IXGBE_STAT(stats.lxontxc)},
@@ -3059,6 +3060,8 @@ static int ixgbe_set_rxfh(struct net_device *netdev, const u32 *indir,
 
 		for (i = 0; i < reta_entries; i++)
 			adapter->rss_indir_tbl[i] = indir[i];
+
+		ixgbe_store_reta(adapter);
 	}
 
 	/* Fill out the rss hash key */
@@ -3067,8 +3070,6 @@ static int ixgbe_set_rxfh(struct net_device *netdev, const u32 *indir,
 		ixgbe_store_key(adapter);
 	}
 
-	ixgbe_store_reta(adapter);
-
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
index 93eacddb6704..f2254528dcfc 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -423,15 +423,21 @@ static int ixgbe_ipsec_parse_proto_keys(struct xfrm_state *xs,
 	const char aes_gcm_name[] = "rfc4106(gcm(aes))";
 	int key_len;
 
-	if (xs->aead) {
-		key_data = &xs->aead->alg_key[0];
-		key_len = xs->aead->alg_key_len;
-		alg_name = xs->aead->alg_name;
-	} else {
+	if (!xs->aead) {
 		netdev_err(dev, "Unsupported IPsec algorithm\n");
 		return -EINVAL;
 	}
 
+	if (xs->aead->alg_icv_len != IXGBE_IPSEC_AUTH_BITS) {
+		netdev_err(dev, "IPsec offload requires %d bit authentication\n",
+			   IXGBE_IPSEC_AUTH_BITS);
+		return -EINVAL;
+	}
+
+	key_data = &xs->aead->alg_key[0];
+	key_len = xs->aead->alg_key_len;
+	alg_name = xs->aead->alg_name;
+
 	if (strcmp(alg_name, aes_gcm_name)) {
 		netdev_err(dev, "Unsupported IPsec algorithm - please use %s\n",
 			   aes_gcm_name);
@@ -718,23 +724,10 @@ static bool ixgbe_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *xs)
 	return true;
 }
 
-/**
- * ixgbe_ipsec_free - called by xfrm garbage collections
- * @xs: pointer to transformer state struct
- *
- * We don't have any garbage to collect, so we shouldn't bother
- * implementing this function, but the XFRM code doesn't check for
- * existence before calling the API callback.
- **/
-static void ixgbe_ipsec_free(struct xfrm_state *xs)
-{
-}
-
 static const struct xfrmdev_ops ixgbe_xfrmdev_ops = {
 	.xdo_dev_state_add = ixgbe_ipsec_add_sa,
 	.xdo_dev_state_delete = ixgbe_ipsec_del_sa,
 	.xdo_dev_offload_ok = ixgbe_ipsec_offload_ok,
-	.xdo_dev_state_free = ixgbe_ipsec_free,
 };
 
 /**
@@ -783,11 +776,33 @@ int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring,
 
 	itd->flags = 0;
 	if (xs->id.proto == IPPROTO_ESP) {
+		struct sk_buff *skb = first->skb;
+		int ret, authlen, trailerlen;
+		u8 padlen;
+
 		itd->flags |= IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP |
 			      IXGBE_ADVTXD_TUCMD_L4T_TCP;
 		if (first->protocol == htons(ETH_P_IP))
 			itd->flags |= IXGBE_ADVTXD_TUCMD_IPV4;
-		itd->trailer_len = xs->props.trailer_len;
+
+		/* The actual trailer length is authlen (16 bytes) plus
+		 * 2 bytes for the proto and the padlen values, plus
+		 * padlen bytes of padding.  This ends up not the same
+		 * as the static value found in xs->props.trailer_len (21).
+		 *
+		 * The "correct" way to get the auth length would be to use
+		 *    authlen = crypto_aead_authsize(xs->data);
+		 * but since we know we only have one size to worry about
+		 * we can let the compiler use the constant and save us a
+		 * few CPU cycles.
+		 */
+		authlen = IXGBE_IPSEC_AUTH_BITS / 8;
+
+		ret = skb_copy_bits(skb, skb->len - (authlen + 2), &padlen, 1);
+		if (unlikely(ret))
+			return 0;
+		trailerlen = authlen + 2 + padlen;
+		itd->trailer_len = trailerlen;
 	}
 	if (tsa->encrypt)
 		itd->flags |= IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h
index da3ce7849e85..87d2800b94ab 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h
@@ -32,6 +32,7 @@
 #define IXGBE_IPSEC_MAX_RX_IP_COUNT	128
 #define IXGBE_IPSEC_BASE_RX_INDEX	0
 #define IXGBE_IPSEC_BASE_TX_INDEX	IXGBE_IPSEC_MAX_SA_COUNT
+#define IXGBE_IPSEC_AUTH_BITS		128
 
 #define IXGBE_RXTXIDX_IPS_EN		0x00000001
 #define IXGBE_RXIDX_TBL_SHIFT		1
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
index 4242f0213e46..ed4cbe94c355 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
@@ -58,7 +58,6 @@ static bool ixgbe_cache_ring_dcb_sriov(struct ixgbe_adapter *adapter)
 		return false;
 
 	/* start at VMDq register offset for SR-IOV enabled setups */
-	pool = 0;
 	reg_idx = vmdq->offset * __ALIGN_MASK(1, ~vmdq->mask);
 	for (i = 0, pool = 0; i < adapter->num_rx_queues; i++, reg_idx++) {
 		/* If we are greater than indices move to next pool */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 9fc063af233c..85369423452d 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -7711,7 +7711,8 @@ static void ixgbe_service_task(struct work_struct *work)
 
 	if (test_bit(__IXGBE_PTP_RUNNING, &adapter->state)) {
 		ixgbe_ptp_overflow_check(adapter);
-		ixgbe_ptp_rx_hang(adapter);
+		if (adapter->flags & IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER)
+			ixgbe_ptp_rx_hang(adapter);
 		ixgbe_ptp_tx_hang(adapter);
 	}
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index 27a70a52f3c9..008aa073a679 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -831,7 +831,11 @@ static int ixgbe_vf_reset_msg(struct ixgbe_adapter *adapter, u32 vf)
 	IXGBE_WRITE_REG(hw, IXGBE_VFTE(reg_offset), reg);
 
 	/* force drop enable for all VF Rx queues */
-	ixgbe_write_qde(adapter, vf, IXGBE_QDE_ENABLE);
+	reg = IXGBE_QDE_ENABLE;
+	if (adapter->vfinfo[vf].pf_vlan)
+		reg |= IXGBE_QDE_HIDE_VLAN;
+
+	ixgbe_write_qde(adapter, vf, reg);
 
 	/* enable receive for vf */
 	reg = IXGBE_READ_REG(hw, IXGBE_VFRE(reg_offset));
diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
index 4400e49090b4..e7623fed42da 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
@@ -94,6 +94,13 @@ static const char ixgbe_gstrings_test[][ETH_GSTRING_LEN] = {
 
 #define IXGBEVF_TEST_LEN (sizeof(ixgbe_gstrings_test) / ETH_GSTRING_LEN)
 
+static const char ixgbevf_priv_flags_strings[][ETH_GSTRING_LEN] = {
+#define IXGBEVF_PRIV_FLAGS_LEGACY_RX	BIT(0)
+	"legacy-rx",
+};
+
+#define IXGBEVF_PRIV_FLAGS_STR_LEN ARRAY_SIZE(ixgbevf_priv_flags_strings)
+
 static int ixgbevf_get_link_ksettings(struct net_device *netdev,
 				      struct ethtool_link_ksettings *cmd)
 {
@@ -241,6 +248,8 @@ static void ixgbevf_get_drvinfo(struct net_device *netdev,
 		sizeof(drvinfo->version));
 	strlcpy(drvinfo->bus_info, pci_name(adapter->pdev),
 		sizeof(drvinfo->bus_info));
+
+	drvinfo->n_priv_flags = IXGBEVF_PRIV_FLAGS_STR_LEN;
 }
 
 static void ixgbevf_get_ringparam(struct net_device *netdev,
@@ -392,6 +401,8 @@ static int ixgbevf_get_sset_count(struct net_device *netdev, int stringset)
 		return IXGBEVF_TEST_LEN;
 	case ETH_SS_STATS:
 		return IXGBEVF_STATS_LEN;
+	case ETH_SS_PRIV_FLAGS:
+		return IXGBEVF_PRIV_FLAGS_STR_LEN;
 	default:
 		return -EINVAL;
 	}
@@ -496,6 +507,10 @@ static void ixgbevf_get_strings(struct net_device *netdev, u32 stringset,
 			p += ETH_GSTRING_LEN;
 		}
 		break;
+	case ETH_SS_PRIV_FLAGS:
+		memcpy(data, ixgbevf_priv_flags_strings,
+		       IXGBEVF_PRIV_FLAGS_STR_LEN * ETH_GSTRING_LEN);
+		break;
 	}
 }
 
@@ -888,6 +903,37 @@ static int ixgbevf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
 	return err;
 }
 
+static u32 ixgbevf_get_priv_flags(struct net_device *netdev)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+	u32 priv_flags = 0;
+
+	if (adapter->flags & IXGBEVF_FLAGS_LEGACY_RX)
+		priv_flags |= IXGBEVF_PRIV_FLAGS_LEGACY_RX;
+
+	return priv_flags;
+}
+
+static int ixgbevf_set_priv_flags(struct net_device *netdev, u32 priv_flags)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+	unsigned int flags = adapter->flags;
+
+	flags &= ~IXGBEVF_FLAGS_LEGACY_RX;
+	if (priv_flags & IXGBEVF_PRIV_FLAGS_LEGACY_RX)
+		flags |= IXGBEVF_FLAGS_LEGACY_RX;
+
+	if (flags != adapter->flags) {
+		adapter->flags = flags;
+
+		/* reset interface to repopulate queues */
+		if (netif_running(netdev))
+			ixgbevf_reinit_locked(adapter);
+	}
+
+	return 0;
+}
+
 static const struct ethtool_ops ixgbevf_ethtool_ops = {
 	.get_drvinfo		= ixgbevf_get_drvinfo,
 	.get_regs_len		= ixgbevf_get_regs_len,
@@ -909,6 +955,8 @@ static const struct ethtool_ops ixgbevf_ethtool_ops = {
 	.get_rxfh_key_size	= ixgbevf_get_rxfh_key_size,
 	.get_rxfh		= ixgbevf_get_rxfh,
 	.get_link_ksettings	= ixgbevf_get_link_ksettings,
+	.get_priv_flags		= ixgbevf_get_priv_flags,
+	.set_priv_flags		= ixgbevf_set_priv_flags,
 };
 
 void ixgbevf_set_ethtool_ops(struct net_device *netdev)
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
index f6952425c87d..f65ca156af2d 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
@@ -89,19 +89,15 @@ struct ixgbevf_rx_queue_stats {
 };
 
 enum ixgbevf_ring_state_t {
+	__IXGBEVF_RX_3K_BUFFER,
+	__IXGBEVF_RX_BUILD_SKB_ENABLED,
 	__IXGBEVF_TX_DETECT_HANG,
 	__IXGBEVF_HANG_CHECK_ARMED,
 };
 
-#define check_for_tx_hang(ring) \
-	test_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
-#define set_check_for_tx_hang(ring) \
-	set_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
-#define clear_check_for_tx_hang(ring) \
-	clear_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
-
 struct ixgbevf_ring {
 	struct ixgbevf_ring *next;
+	struct ixgbevf_q_vector *q_vector;	/* backpointer to q_vector */
 	struct net_device *netdev;
 	struct device *dev;
 	void *desc;			/* descriptor ring memory */
@@ -133,7 +129,7 @@ struct ixgbevf_ring {
 	 */
 	u16 reg_idx;
 	int queue_index; /* needed for multiqueue queue management */
-};
+} ____cacheline_internodealigned_in_smp;
 
 /* How many Rx Buffers do we bundle into one write to the hardware ? */
 #define IXGBEVF_RX_BUFFER_WRITE	16	/* Must be power of 2 */
@@ -156,12 +152,20 @@ struct ixgbevf_ring {
 /* Supported Rx Buffer Sizes */
 #define IXGBEVF_RXBUFFER_256	256    /* Used for packet split */
 #define IXGBEVF_RXBUFFER_2048	2048
+#define IXGBEVF_RXBUFFER_3072	3072
 
 #define IXGBEVF_RX_HDR_SIZE	IXGBEVF_RXBUFFER_256
-#define IXGBEVF_RX_BUFSZ	IXGBEVF_RXBUFFER_2048
 
 #define MAXIMUM_ETHERNET_VLAN_SIZE (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN)
 
+#define IXGBEVF_SKB_PAD		(NET_SKB_PAD + NET_IP_ALIGN)
+#if (PAGE_SIZE < 8192)
+#define IXGBEVF_MAX_FRAME_BUILD_SKB \
+	(SKB_WITH_OVERHEAD(IXGBEVF_RXBUFFER_2048) - IXGBEVF_SKB_PAD)
+#else
+#define IXGBEVF_MAX_FRAME_BUILD_SKB	IXGBEVF_RXBUFFER_2048
+#endif
+
 #define IXGBE_TX_FLAGS_CSUM		BIT(0)
 #define IXGBE_TX_FLAGS_VLAN		BIT(1)
 #define IXGBE_TX_FLAGS_TSO		BIT(2)
@@ -170,6 +174,50 @@ struct ixgbevf_ring {
 #define IXGBE_TX_FLAGS_VLAN_PRIO_MASK	0x0000e000
 #define IXGBE_TX_FLAGS_VLAN_SHIFT	16
 
+#define ring_uses_large_buffer(ring) \
+	test_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state)
+#define set_ring_uses_large_buffer(ring) \
+	set_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state)
+#define clear_ring_uses_large_buffer(ring) \
+	clear_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state)
+
+#define ring_uses_build_skb(ring) \
+	test_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state)
+#define set_ring_build_skb_enabled(ring) \
+	set_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state)
+#define clear_ring_build_skb_enabled(ring) \
+	clear_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state)
+
+static inline unsigned int ixgbevf_rx_bufsz(struct ixgbevf_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+	if (ring_uses_large_buffer(ring))
+		return IXGBEVF_RXBUFFER_3072;
+
+	if (ring_uses_build_skb(ring))
+		return IXGBEVF_MAX_FRAME_BUILD_SKB;
+#endif
+	return IXGBEVF_RXBUFFER_2048;
+}
+
+static inline unsigned int ixgbevf_rx_pg_order(struct ixgbevf_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+	if (ring_uses_large_buffer(ring))
+		return 1;
+#endif
+	return 0;
+}
+
+#define ixgbevf_rx_pg_size(_ring) (PAGE_SIZE << ixgbevf_rx_pg_order(_ring))
+
+#define check_for_tx_hang(ring) \
+	test_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
+#define set_check_for_tx_hang(ring) \
+	set_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
+#define clear_check_for_tx_hang(ring) \
+	clear_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
+
 struct ixgbevf_ring_container {
 	struct ixgbevf_ring *ring;	/* pointer to linked list of rings */
 	unsigned int total_bytes;	/* total bytes processed this int */
@@ -194,7 +242,11 @@ struct ixgbevf_q_vector {
 	u16 itr; /* Interrupt throttle rate written to EITR */
 	struct napi_struct napi;
 	struct ixgbevf_ring_container rx, tx;
+	struct rcu_head rcu;    /* to avoid race with update stats on free */
 	char name[IFNAMSIZ + 9];
+
+	/* for dynamic allocation of rings associated with this q_vector */
+	struct ixgbevf_ring ring[0] ____cacheline_internodealigned_in_smp;
 #ifdef CONFIG_NET_RX_BUSY_POLL
 	unsigned int state;
 #define IXGBEVF_QV_STATE_IDLE		0
@@ -331,6 +383,8 @@ struct ixgbevf_adapter {
 
 	u32 *rss_key;
 	u8 rss_indir_tbl[IXGBEVF_X550_VFRETA_SIZE];
+	u32 flags;
+#define IXGBEVF_FLAGS_LEGACY_RX		BIT(1)
 };
 
 enum ixbgevf_state_t {
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 9b3d43d28106..4da449e0a4ba 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -130,6 +130,9 @@ static void ixgbevf_service_event_complete(struct ixgbevf_adapter *adapter)
 static void ixgbevf_queue_reset_subtask(struct ixgbevf_adapter *adapter);
 static void ixgbevf_set_itr(struct ixgbevf_q_vector *q_vector);
 static void ixgbevf_free_all_rx_resources(struct ixgbevf_adapter *adapter);
+static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer);
+static void ixgbevf_reuse_rx_page(struct ixgbevf_ring *rx_ring,
+				  struct ixgbevf_rx_buffer *old_buff);
 
 static void ixgbevf_remove_adapter(struct ixgbe_hw *hw)
 {
@@ -527,6 +530,49 @@ static void ixgbevf_process_skb_fields(struct ixgbevf_ring *rx_ring,
 	skb->protocol = eth_type_trans(skb, rx_ring->netdev);
 }
 
+static
+struct ixgbevf_rx_buffer *ixgbevf_get_rx_buffer(struct ixgbevf_ring *rx_ring,
+						const unsigned int size)
+{
+	struct ixgbevf_rx_buffer *rx_buffer;
+
+	rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
+	prefetchw(rx_buffer->page);
+
+	/* we are reusing so sync this buffer for CPU use */
+	dma_sync_single_range_for_cpu(rx_ring->dev,
+				      rx_buffer->dma,
+				      rx_buffer->page_offset,
+				      size,
+				      DMA_FROM_DEVICE);
+
+	rx_buffer->pagecnt_bias--;
+
+	return rx_buffer;
+}
+
+static void ixgbevf_put_rx_buffer(struct ixgbevf_ring *rx_ring,
+				  struct ixgbevf_rx_buffer *rx_buffer)
+{
+	if (ixgbevf_can_reuse_rx_page(rx_buffer)) {
+		/* hand second half of page back to the ring */
+		ixgbevf_reuse_rx_page(rx_ring, rx_buffer);
+	} else {
+		/* We are not reusing the buffer so unmap it and free
+		 * any references we are holding to it
+		 */
+		dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
+				     ixgbevf_rx_pg_size(rx_ring),
+				     DMA_FROM_DEVICE,
+				     IXGBEVF_RX_DMA_ATTR);
+		__page_frag_cache_drain(rx_buffer->page,
+					rx_buffer->pagecnt_bias);
+	}
+
+	/* clear contents of rx_buffer */
+	rx_buffer->page = NULL;
+}
+
 /**
  * ixgbevf_is_non_eop - process handling of non-EOP buffers
  * @rx_ring: Rx ring being processed
@@ -554,32 +600,38 @@ static bool ixgbevf_is_non_eop(struct ixgbevf_ring *rx_ring,
 	return true;
 }
 
+static inline unsigned int ixgbevf_rx_offset(struct ixgbevf_ring *rx_ring)
+{
+	return ring_uses_build_skb(rx_ring) ? IXGBEVF_SKB_PAD : 0;
+}
+
 static bool ixgbevf_alloc_mapped_page(struct ixgbevf_ring *rx_ring,
 				      struct ixgbevf_rx_buffer *bi)
 {
 	struct page *page = bi->page;
-	dma_addr_t dma = bi->dma;
+	dma_addr_t dma;
 
 	/* since we are recycling buffers we should seldom need to alloc */
 	if (likely(page))
 		return true;
 
 	/* alloc new page for storage */
-	page = dev_alloc_page();
+	page = dev_alloc_pages(ixgbevf_rx_pg_order(rx_ring));
 	if (unlikely(!page)) {
 		rx_ring->rx_stats.alloc_rx_page_failed++;
 		return false;
 	}
 
 	/* map page for use */
-	dma = dma_map_page_attrs(rx_ring->dev, page, 0, PAGE_SIZE,
+	dma = dma_map_page_attrs(rx_ring->dev, page, 0,
+				 ixgbevf_rx_pg_size(rx_ring),
 				 DMA_FROM_DEVICE, IXGBEVF_RX_DMA_ATTR);
 
 	/* if mapping failed free memory back to system since
 	 * there isn't much point in holding memory we can't use
 	 */
 	if (dma_mapping_error(rx_ring->dev, dma)) {
-		__free_page(page);
+		__free_pages(page, ixgbevf_rx_pg_order(rx_ring));
 
 		rx_ring->rx_stats.alloc_rx_page_failed++;
 		return false;
@@ -587,7 +639,7 @@ static bool ixgbevf_alloc_mapped_page(struct ixgbevf_ring *rx_ring,
 
 	bi->dma = dma;
 	bi->page = page;
-	bi->page_offset = 0;
+	bi->page_offset = ixgbevf_rx_offset(rx_ring);
 	bi->pagecnt_bias = 1;
 	rx_ring->rx_stats.alloc_rx_page++;
 
@@ -621,7 +673,7 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring,
 		/* sync the buffer for use by the device */
 		dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
 						 bi->page_offset,
-						 IXGBEVF_RX_BUFSZ,
+						 ixgbevf_rx_bufsz(rx_ring),
 						 DMA_FROM_DEVICE);
 
 		/* Refresh the desc even if pkt_addr didn't change
@@ -734,11 +786,10 @@ static inline bool ixgbevf_page_is_reserved(struct page *page)
 	return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page);
 }
 
-static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer,
-				      struct page *page,
-				      const unsigned int truesize)
+static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer)
 {
-	unsigned int pagecnt_bias = rx_buffer->pagecnt_bias--;
+	unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
+	struct page *page = rx_buffer->page;
 
 	/* avoid re-using remote pages */
 	if (unlikely(ixgbevf_page_is_reserved(page)))
@@ -746,17 +797,13 @@ static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer,
 
 #if (PAGE_SIZE < 8192)
 	/* if we are only owner of page we can reuse it */
-	if (unlikely(page_ref_count(page) != pagecnt_bias))
+	if (unlikely((page_ref_count(page) - pagecnt_bias) > 1))
 		return false;
-
-	/* flip page offset to other buffer */
-	rx_buffer->page_offset ^= IXGBEVF_RX_BUFSZ;
-
 #else
-	/* move offset up to the next cache line */
-	rx_buffer->page_offset += truesize;
+#define IXGBEVF_LAST_OFFSET \
+	(SKB_WITH_OVERHEAD(PAGE_SIZE) - IXGBEVF_RXBUFFER_2048)
 
-	if (rx_buffer->page_offset > (PAGE_SIZE - IXGBEVF_RX_BUFSZ))
+	if (rx_buffer->page_offset > IXGBEVF_LAST_OFFSET)
 		return false;
 
 #endif
@@ -765,7 +812,7 @@ static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer,
 	 * the pagecnt_bias and page count so that we fully restock the
 	 * number of references the driver holds.
 	 */
-	if (unlikely(pagecnt_bias == 1)) {
+	if (unlikely(!pagecnt_bias)) {
 		page_ref_add(page, USHRT_MAX);
 		rx_buffer->pagecnt_bias = USHRT_MAX;
 	}
@@ -777,127 +824,81 @@ static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer,
  * ixgbevf_add_rx_frag - Add contents of Rx buffer to sk_buff
  * @rx_ring: rx descriptor ring to transact packets on
  * @rx_buffer: buffer containing page to add
- * @rx_desc: descriptor containing length of buffer written by hardware
  * @skb: sk_buff to place the data into
+ * @size: size of buffer to be added
  *
  * This function will add the data contained in rx_buffer->page to the skb.
- * This is done either through a direct copy if the data in the buffer is
- * less than the skb header size, otherwise it will just attach the page as
- * a frag to the skb.
- *
- * The function will then update the page offset if necessary and return
- * true if the buffer can be reused by the adapter.
  **/
-static bool ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring,
+static void ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring,
 				struct ixgbevf_rx_buffer *rx_buffer,
-				u16 size,
-				union ixgbe_adv_rx_desc *rx_desc,
-				struct sk_buff *skb)
+				struct sk_buff *skb,
+				unsigned int size)
 {
-	struct page *page = rx_buffer->page;
-	unsigned char *va = page_address(page) + rx_buffer->page_offset;
 #if (PAGE_SIZE < 8192)
-	unsigned int truesize = IXGBEVF_RX_BUFSZ;
+	unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2;
 #else
-	unsigned int truesize = ALIGN(size, L1_CACHE_BYTES);
+	unsigned int truesize = ring_uses_build_skb(rx_ring) ?
+				SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size) :
+				SKB_DATA_ALIGN(size);
+#endif
+	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
+			rx_buffer->page_offset, size, truesize);
+#if (PAGE_SIZE < 8192)
+	rx_buffer->page_offset ^= truesize;
+#else
+	rx_buffer->page_offset += truesize;
 #endif
-	unsigned int pull_len;
-
-	if (unlikely(skb_is_nonlinear(skb)))
-		goto add_tail_frag;
-
-	if (likely(size <= IXGBEVF_RX_HDR_SIZE)) {
-		memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
-
-		/* page is not reserved, we can reuse buffer as is */
-		if (likely(!ixgbevf_page_is_reserved(page)))
-			return true;
-
-		/* this page cannot be reused so discard it */
-		return false;
-	}
-
-	/* we need the header to contain the greater of either ETH_HLEN or
-	 * 60 bytes if the skb->len is less than 60 for skb_pad.
-	 */
-	pull_len = eth_get_headlen(va, IXGBEVF_RX_HDR_SIZE);
-
-	/* align pull length to size of long to optimize memcpy performance */
-	memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long)));
-
-	/* update all of the pointers */
-	va += pull_len;
-	size -= pull_len;
-
-add_tail_frag:
-	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
-			(unsigned long)va & ~PAGE_MASK, size, truesize);
-
-	return ixgbevf_can_reuse_rx_page(rx_buffer, page, truesize);
 }
 
-static struct sk_buff *ixgbevf_fetch_rx_buffer(struct ixgbevf_ring *rx_ring,
-					       union ixgbe_adv_rx_desc *rx_desc,
-					       struct sk_buff *skb)
+static
+struct sk_buff *ixgbevf_construct_skb(struct ixgbevf_ring *rx_ring,
+				      struct ixgbevf_rx_buffer *rx_buffer,
+				      union ixgbe_adv_rx_desc *rx_desc,
+				      unsigned int size)
 {
-	struct ixgbevf_rx_buffer *rx_buffer;
-	struct page *page;
-	u16 size = le16_to_cpu(rx_desc->wb.upper.length);
-
-	rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
-	page = rx_buffer->page;
-	prefetchw(page);
-
-	/* we are reusing so sync this buffer for CPU use */
-	dma_sync_single_range_for_cpu(rx_ring->dev,
-				      rx_buffer->dma,
-				      rx_buffer->page_offset,
-				      size,
-				      DMA_FROM_DEVICE);
-
-	if (likely(!skb)) {
-		void *page_addr = page_address(page) +
-				  rx_buffer->page_offset;
+	void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
+#if (PAGE_SIZE < 8192)
+	unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2;
+#else
+	unsigned int truesize = SKB_DATA_ALIGN(size);
+#endif
+	unsigned int headlen;
+	struct sk_buff *skb;
 
-		/* prefetch first cache line of first page */
-		prefetch(page_addr);
+	/* prefetch first cache line of first page */
+	prefetch(va);
 #if L1_CACHE_BYTES < 128
-		prefetch(page_addr + L1_CACHE_BYTES);
+	prefetch(va + L1_CACHE_BYTES);
 #endif
 
-		/* allocate a skb to store the frags */
-		skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
-						IXGBEVF_RX_HDR_SIZE);
-		if (unlikely(!skb)) {
-			rx_ring->rx_stats.alloc_rx_buff_failed++;
-			return NULL;
-		}
+	/* allocate a skb to store the frags */
+	skb = napi_alloc_skb(&rx_ring->q_vector->napi, IXGBEVF_RX_HDR_SIZE);
+	if (unlikely(!skb))
+		return NULL;
 
-		/* we will be copying header into skb->data in
-		 * pskb_may_pull so it is in our interest to prefetch
-		 * it now to avoid a possible cache miss
-		 */
-		prefetchw(skb->data);
-	}
+	/* Determine available headroom for copy */
+	headlen = size;
+	if (headlen > IXGBEVF_RX_HDR_SIZE)
+		headlen = eth_get_headlen(va, IXGBEVF_RX_HDR_SIZE);
 
-	/* pull page into skb */
-	if (ixgbevf_add_rx_frag(rx_ring, rx_buffer, size, rx_desc, skb)) {
-		/* hand second half of page back to the ring */
-		ixgbevf_reuse_rx_page(rx_ring, rx_buffer);
+	/* align pull length to size of long to optimize memcpy performance */
+	memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
+
+	/* update all of the pointers */
+	size -= headlen;
+	if (size) {
+		skb_add_rx_frag(skb, 0, rx_buffer->page,
+				(va + headlen) - page_address(rx_buffer->page),
+				size, truesize);
+#if (PAGE_SIZE < 8192)
+		rx_buffer->page_offset ^= truesize;
+#else
+		rx_buffer->page_offset += truesize;
+#endif
 	} else {
-		/* We are not reusing the buffer so unmap it and free
-		 * any references we are holding to it
-		 */
-		dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
-				     PAGE_SIZE, DMA_FROM_DEVICE,
-				     IXGBEVF_RX_DMA_ATTR);
-		__page_frag_cache_drain(page, rx_buffer->pagecnt_bias);
+		rx_buffer->pagecnt_bias++;
 	}
 
-	/* clear contents of buffer_info */
-	rx_buffer->dma = 0;
-	rx_buffer->page = NULL;
-
 	return skb;
 }
 
@@ -909,6 +910,44 @@ static inline void ixgbevf_irq_enable_queues(struct ixgbevf_adapter *adapter,
 	IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, qmask);
 }
 
+static struct sk_buff *ixgbevf_build_skb(struct ixgbevf_ring *rx_ring,
+					 struct ixgbevf_rx_buffer *rx_buffer,
+					 union ixgbe_adv_rx_desc *rx_desc,
+					 unsigned int size)
+{
+	void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
+#if (PAGE_SIZE < 8192)
+	unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2;
+#else
+	unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
+				SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size);
+#endif
+	struct sk_buff *skb;
+
+	/* prefetch first cache line of first page */
+	prefetch(va);
+#if L1_CACHE_BYTES < 128
+	prefetch(va + L1_CACHE_BYTES);
+#endif
+
+	/* build an skb to around the page buffer */
+	skb = build_skb(va - IXGBEVF_SKB_PAD, truesize);
+	if (unlikely(!skb))
+		return NULL;
+
+	/* update pointers within the skb to store the data */
+	skb_reserve(skb, IXGBEVF_SKB_PAD);
+	__skb_put(skb, size);
+
+	/* update buffer offset */
+#if (PAGE_SIZE < 8192)
+	rx_buffer->page_offset ^= truesize;
+#else
+	rx_buffer->page_offset += truesize;
+#endif
+
+	return skb;
+}
 static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
 				struct ixgbevf_ring *rx_ring,
 				int budget)
@@ -919,6 +958,8 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
 
 	while (likely(total_rx_packets < budget)) {
 		union ixgbe_adv_rx_desc *rx_desc;
+		struct ixgbevf_rx_buffer *rx_buffer;
+		unsigned int size;
 
 		/* return some buffers to hardware, one at a time is too slow */
 		if (cleaned_count >= IXGBEVF_RX_BUFFER_WRITE) {
@@ -927,8 +968,8 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
 		}
 
 		rx_desc = IXGBEVF_RX_DESC(rx_ring, rx_ring->next_to_clean);
-
-		if (!rx_desc->wb.upper.length)
+		size = le16_to_cpu(rx_desc->wb.upper.length);
+		if (!size)
 			break;
 
 		/* This memory barrier is needed to keep us from reading
@@ -937,15 +978,26 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
 		 */
 		rmb();
 
+		rx_buffer = ixgbevf_get_rx_buffer(rx_ring, size);
+
 		/* retrieve a buffer from the ring */
-		skb = ixgbevf_fetch_rx_buffer(rx_ring, rx_desc, skb);
+		if (skb)
+			ixgbevf_add_rx_frag(rx_ring, rx_buffer, skb, size);
+		else if (ring_uses_build_skb(rx_ring))
+			skb = ixgbevf_build_skb(rx_ring, rx_buffer,
+						rx_desc, size);
+		else
+			skb = ixgbevf_construct_skb(rx_ring, rx_buffer,
+						    rx_desc, size);
 
 		/* exit if we failed to retrieve a buffer */
 		if (!skb) {
 			rx_ring->rx_stats.alloc_rx_buff_failed++;
+			rx_buffer->pagecnt_bias++;
 			break;
 		}
 
+		ixgbevf_put_rx_buffer(rx_ring, rx_buffer);
 		cleaned_count++;
 
 		/* fetch next buffer in frame if non-eop */
@@ -1260,85 +1312,6 @@ static irqreturn_t ixgbevf_msix_clean_rings(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-static inline void map_vector_to_rxq(struct ixgbevf_adapter *a, int v_idx,
-				     int r_idx)
-{
-	struct ixgbevf_q_vector *q_vector = a->q_vector[v_idx];
-
-	a->rx_ring[r_idx]->next = q_vector->rx.ring;
-	q_vector->rx.ring = a->rx_ring[r_idx];
-	q_vector->rx.count++;
-}
-
-static inline void map_vector_to_txq(struct ixgbevf_adapter *a, int v_idx,
-				     int t_idx)
-{
-	struct ixgbevf_q_vector *q_vector = a->q_vector[v_idx];
-
-	a->tx_ring[t_idx]->next = q_vector->tx.ring;
-	q_vector->tx.ring = a->tx_ring[t_idx];
-	q_vector->tx.count++;
-}
-
-/**
- * ixgbevf_map_rings_to_vectors - Maps descriptor rings to vectors
- * @adapter: board private structure to initialize
- *
- * This function maps descriptor rings to the queue-specific vectors
- * we were allotted through the MSI-X enabling code.  Ideally, we'd have
- * one vector per ring/queue, but on a constrained vector budget, we
- * group the rings as "efficiently" as possible.  You would add new
- * mapping configurations in here.
- **/
-static int ixgbevf_map_rings_to_vectors(struct ixgbevf_adapter *adapter)
-{
-	int q_vectors;
-	int v_start = 0;
-	int rxr_idx = 0, txr_idx = 0;
-	int rxr_remaining = adapter->num_rx_queues;
-	int txr_remaining = adapter->num_tx_queues;
-	int i, j;
-	int rqpv, tqpv;
-
-	q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
-
-	/* The ideal configuration...
-	 * We have enough vectors to map one per queue.
-	 */
-	if (q_vectors == adapter->num_rx_queues + adapter->num_tx_queues) {
-		for (; rxr_idx < rxr_remaining; v_start++, rxr_idx++)
-			map_vector_to_rxq(adapter, v_start, rxr_idx);
-
-		for (; txr_idx < txr_remaining; v_start++, txr_idx++)
-			map_vector_to_txq(adapter, v_start, txr_idx);
-		return 0;
-	}
-
-	/* If we don't have enough vectors for a 1-to-1
-	 * mapping, we'll have to group them so there are
-	 * multiple queues per vector.
-	 */
-	/* Re-adjusting *qpv takes care of the remainder. */
-	for (i = v_start; i < q_vectors; i++) {
-		rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - i);
-		for (j = 0; j < rqpv; j++) {
-			map_vector_to_rxq(adapter, i, rxr_idx);
-			rxr_idx++;
-			rxr_remaining--;
-		}
-	}
-	for (i = v_start; i < q_vectors; i++) {
-		tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - i);
-		for (j = 0; j < tqpv; j++) {
-			map_vector_to_txq(adapter, i, txr_idx);
-			txr_idx++;
-			txr_remaining--;
-		}
-	}
-
-	return 0;
-}
-
 /**
  * ixgbevf_request_msix_irqs - Initialize MSI-X interrupts
  * @adapter: board private structure
@@ -1411,20 +1384,6 @@ free_queue_irqs:
 	return err;
 }
 
-static inline void ixgbevf_reset_q_vectors(struct ixgbevf_adapter *adapter)
-{
-	int i, q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
-
-	for (i = 0; i < q_vectors; i++) {
-		struct ixgbevf_q_vector *q_vector = adapter->q_vector[i];
-
-		q_vector->rx.ring = NULL;
-		q_vector->tx.ring = NULL;
-		q_vector->rx.count = 0;
-		q_vector->tx.count = 0;
-	}
-}
-
 /**
  * ixgbevf_request_irq - initialize interrupts
  * @adapter: board private structure
@@ -1464,8 +1423,6 @@ static void ixgbevf_free_irq(struct ixgbevf_adapter *adapter)
 		free_irq(adapter->msix_entries[i].vector,
 			 adapter->q_vector[i]);
 	}
-
-	ixgbevf_reset_q_vectors(adapter);
 }
 
 /**
@@ -1587,7 +1544,8 @@ static void ixgbevf_configure_tx(struct ixgbevf_adapter *adapter)
 
 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT	2
 
-static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter, int index)
+static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter,
+				     struct ixgbevf_ring *ring, int index)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
 	u32 srrctl;
@@ -1595,7 +1553,10 @@ static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter, int index)
 	srrctl = IXGBE_SRRCTL_DROP_EN;
 
 	srrctl |= IXGBEVF_RX_HDR_SIZE << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT;
-	srrctl |= IXGBEVF_RX_BUFSZ >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
+	if (ring_uses_large_buffer(ring))
+		srrctl |= IXGBEVF_RXBUFFER_3072 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
+	else
+		srrctl |= IXGBEVF_RXBUFFER_2048 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
 	srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
 
 	IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(index), srrctl);
@@ -1767,10 +1728,21 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter,
 	ring->next_to_use = 0;
 	ring->next_to_alloc = 0;
 
-	ixgbevf_configure_srrctl(adapter, reg_idx);
+	ixgbevf_configure_srrctl(adapter, ring, reg_idx);
 
-	/* allow any size packet since we can handle overflow */
-	rxdctl &= ~IXGBE_RXDCTL_RLPML_EN;
+	/* RXDCTL.RLPML does not work on 82599 */
+	if (adapter->hw.mac.type != ixgbe_mac_82599_vf) {
+		rxdctl &= ~(IXGBE_RXDCTL_RLPMLMASK |
+			    IXGBE_RXDCTL_RLPML_EN);
+
+#if (PAGE_SIZE < 8192)
+		/* Limit the maximum frame size so we don't overrun the skb */
+		if (ring_uses_build_skb(ring) &&
+		    !ring_uses_large_buffer(ring))
+			rxdctl |= IXGBEVF_MAX_FRAME_BUILD_SKB |
+				  IXGBE_RXDCTL_RLPML_EN;
+#endif
+	}
 
 	rxdctl |= IXGBE_RXDCTL_ENABLE | IXGBE_RXDCTL_VME;
 	IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(reg_idx), rxdctl);
@@ -1779,6 +1751,29 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter,
 	ixgbevf_alloc_rx_buffers(ring, ixgbevf_desc_unused(ring));
 }
 
+static void ixgbevf_set_rx_buffer_len(struct ixgbevf_adapter *adapter,
+				      struct ixgbevf_ring *rx_ring)
+{
+	struct net_device *netdev = adapter->netdev;
+	unsigned int max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
+
+	/* set build_skb and buffer size flags */
+	clear_ring_build_skb_enabled(rx_ring);
+	clear_ring_uses_large_buffer(rx_ring);
+
+	if (adapter->flags & IXGBEVF_FLAGS_LEGACY_RX)
+		return;
+
+	set_ring_build_skb_enabled(rx_ring);
+
+	if (PAGE_SIZE < 8192) {
+		if (max_frame <= IXGBEVF_MAX_FRAME_BUILD_SKB)
+			return;
+
+		set_ring_uses_large_buffer(rx_ring);
+	}
+}
+
 /**
  * ixgbevf_configure_rx - Configure 82599 VF Receive Unit after Reset
  * @adapter: board private structure
@@ -1806,8 +1801,12 @@ static void ixgbevf_configure_rx(struct ixgbevf_adapter *adapter)
 	/* Setup the HW Rx Head and Tail Descriptor Pointers and
 	 * the Base and Length of the Rx Descriptor Ring
 	 */
-	for (i = 0; i < adapter->num_rx_queues; i++)
-		ixgbevf_configure_rx_ring(adapter, adapter->rx_ring[i]);
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		struct ixgbevf_ring *rx_ring = adapter->rx_ring[i];
+
+		ixgbevf_set_rx_buffer_len(adapter, rx_ring);
+		ixgbevf_configure_rx_ring(adapter, rx_ring);
+	}
 }
 
 static int ixgbevf_vlan_rx_add_vid(struct net_device *netdev,
@@ -2136,13 +2135,13 @@ static void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring)
 		dma_sync_single_range_for_cpu(rx_ring->dev,
 					      rx_buffer->dma,
 					      rx_buffer->page_offset,
-					      IXGBEVF_RX_BUFSZ,
+					      ixgbevf_rx_bufsz(rx_ring),
 					      DMA_FROM_DEVICE);
 
 		/* free resources associated with mapping */
 		dma_unmap_page_attrs(rx_ring->dev,
 				     rx_buffer->dma,
-				     PAGE_SIZE,
+				     ixgbevf_rx_pg_size(rx_ring),
 				     DMA_FROM_DEVICE,
 				     IXGBEVF_RX_DMA_ATTR);
 
@@ -2405,105 +2404,171 @@ static void ixgbevf_set_num_queues(struct ixgbevf_adapter *adapter)
 }
 
 /**
- * ixgbevf_alloc_queues - Allocate memory for all rings
+ * ixgbevf_set_interrupt_capability - set MSI-X or FAIL if not supported
+ * @adapter: board private structure to initialize
+ *
+ * Attempt to configure the interrupts using the best available
+ * capabilities of the hardware and the kernel.
+ **/
+static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter)
+{
+	int vector, v_budget;
+
+	/* It's easy to be greedy for MSI-X vectors, but it really
+	 * doesn't do us much good if we have a lot more vectors
+	 * than CPU's.  So let's be conservative and only ask for
+	 * (roughly) the same number of vectors as there are CPU's.
+	 * The default is to use pairs of vectors.
+	 */
+	v_budget = max(adapter->num_rx_queues, adapter->num_tx_queues);
+	v_budget = min_t(int, v_budget, num_online_cpus());
+	v_budget += NON_Q_VECTORS;
+
+	adapter->msix_entries = kcalloc(v_budget,
+					sizeof(struct msix_entry), GFP_KERNEL);
+	if (!adapter->msix_entries)
+		return -ENOMEM;
+
+	for (vector = 0; vector < v_budget; vector++)
+		adapter->msix_entries[vector].entry = vector;
+
+	/* A failure in MSI-X entry allocation isn't fatal, but the VF driver
+	 * does not support any other modes, so we will simply fail here. Note
+	 * that we clean up the msix_entries pointer else-where.
+	 */
+	return ixgbevf_acquire_msix_vectors(adapter, v_budget);
+}
+
+static void ixgbevf_add_ring(struct ixgbevf_ring *ring,
+			     struct ixgbevf_ring_container *head)
+{
+	ring->next = head->ring;
+	head->ring = ring;
+	head->count++;
+}
+
+/**
+ * ixgbevf_alloc_q_vector - Allocate memory for a single interrupt vector
  * @adapter: board private structure to initialize
+ * @v_idx: index of vector in adapter struct
+ * @txr_count: number of Tx rings for q vector
+ * @txr_idx: index of first Tx ring to assign
+ * @rxr_count: number of Rx rings for q vector
+ * @rxr_idx: index of first Rx ring to assign
  *
- * We allocate one ring per queue at run-time since we don't know the
- * number of queues at compile-time.  The polling_netdev array is
- * intended for Multiqueue, but should work fine with a single queue.
+ * We allocate one q_vector.  If allocation fails we return -ENOMEM.
  **/
-static int ixgbevf_alloc_queues(struct ixgbevf_adapter *adapter)
+static int ixgbevf_alloc_q_vector(struct ixgbevf_adapter *adapter, int v_idx,
+				  int txr_count, int txr_idx,
+				  int rxr_count, int rxr_idx)
 {
+	struct ixgbevf_q_vector *q_vector;
 	struct ixgbevf_ring *ring;
-	int rx = 0, tx = 0;
+	int ring_count, size;
 
-	for (; tx < adapter->num_tx_queues; tx++) {
-		ring = kzalloc(sizeof(*ring), GFP_KERNEL);
-		if (!ring)
-			goto err_allocation;
+	ring_count = txr_count + rxr_count;
+	size = sizeof(*q_vector) + (sizeof(*ring) * ring_count);
 
+	/* allocate q_vector and rings */
+	q_vector = kzalloc(size, GFP_KERNEL);
+	if (!q_vector)
+		return -ENOMEM;
+
+	/* initialize NAPI */
+	netif_napi_add(adapter->netdev, &q_vector->napi, ixgbevf_poll, 64);
+
+	/* tie q_vector and adapter together */
+	adapter->q_vector[v_idx] = q_vector;
+	q_vector->adapter = adapter;
+	q_vector->v_idx = v_idx;
+
+	/* initialize pointer to rings */
+	ring = q_vector->ring;
+
+	while (txr_count) {
+		/* assign generic ring traits */
 		ring->dev = &adapter->pdev->dev;
 		ring->netdev = adapter->netdev;
+
+		/* configure backlink on ring */
+		ring->q_vector = q_vector;
+
+		/* update q_vector Tx values */
+		ixgbevf_add_ring(ring, &q_vector->tx);
+
+		/* apply Tx specific ring traits */
 		ring->count = adapter->tx_ring_count;
-		ring->queue_index = tx;
-		ring->reg_idx = tx;
+		ring->queue_index = txr_idx;
+		ring->reg_idx = txr_idx;
 
-		adapter->tx_ring[tx] = ring;
-	}
+		/* assign ring to adapter */
+		 adapter->tx_ring[txr_idx] = ring;
 
-	for (; rx < adapter->num_rx_queues; rx++) {
-		ring = kzalloc(sizeof(*ring), GFP_KERNEL);
-		if (!ring)
-			goto err_allocation;
+		/* update count and index */
+		txr_count--;
+		txr_idx++;
 
+		/* push pointer to next ring */
+		ring++;
+	}
+
+	while (rxr_count) {
+		/* assign generic ring traits */
 		ring->dev = &adapter->pdev->dev;
 		ring->netdev = adapter->netdev;
 
+		/* configure backlink on ring */
+		ring->q_vector = q_vector;
+
+		/* update q_vector Rx values */
+		ixgbevf_add_ring(ring, &q_vector->rx);
+
+		/* apply Rx specific ring traits */
 		ring->count = adapter->rx_ring_count;
-		ring->queue_index = rx;
-		ring->reg_idx = rx;
+		ring->queue_index = rxr_idx;
+		ring->reg_idx = rxr_idx;
 
-		adapter->rx_ring[rx] = ring;
-	}
+		/* assign ring to adapter */
+		adapter->rx_ring[rxr_idx] = ring;
 
-	return 0;
+		/* update count and index */
+		rxr_count--;
+		rxr_idx++;
 
-err_allocation:
-	while (tx) {
-		kfree(adapter->tx_ring[--tx]);
-		adapter->tx_ring[tx] = NULL;
+		/* push pointer to next ring */
+		ring++;
 	}
 
-	while (rx) {
-		kfree(adapter->rx_ring[--rx]);
-		adapter->rx_ring[rx] = NULL;
-	}
-	return -ENOMEM;
+	return 0;
 }
 
 /**
- * ixgbevf_set_interrupt_capability - set MSI-X or FAIL if not supported
+ * ixgbevf_free_q_vector - Free memory allocated for specific interrupt vector
  * @adapter: board private structure to initialize
+ * @v_idx: index of vector in adapter struct
  *
- * Attempt to configure the interrupts using the best available
- * capabilities of the hardware and the kernel.
+ * This function frees the memory allocated to the q_vector.  In addition if
+ * NAPI is enabled it will delete any references to the NAPI struct prior
+ * to freeing the q_vector.
  **/
-static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter)
+static void ixgbevf_free_q_vector(struct ixgbevf_adapter *adapter, int v_idx)
 {
-	struct net_device *netdev = adapter->netdev;
-	int err;
-	int vector, v_budget;
-
-	/* It's easy to be greedy for MSI-X vectors, but it really
-	 * doesn't do us much good if we have a lot more vectors
-	 * than CPU's.  So let's be conservative and only ask for
-	 * (roughly) the same number of vectors as there are CPU's.
-	 * The default is to use pairs of vectors.
-	 */
-	v_budget = max(adapter->num_rx_queues, adapter->num_tx_queues);
-	v_budget = min_t(int, v_budget, num_online_cpus());
-	v_budget += NON_Q_VECTORS;
+	struct ixgbevf_q_vector *q_vector = adapter->q_vector[v_idx];
+	struct ixgbevf_ring *ring;
 
-	/* A failure in MSI-X entry allocation isn't fatal, but it does
-	 * mean we disable MSI-X capabilities of the adapter.
-	 */
-	adapter->msix_entries = kcalloc(v_budget,
-					sizeof(struct msix_entry), GFP_KERNEL);
-	if (!adapter->msix_entries)
-		return -ENOMEM;
+	ixgbevf_for_each_ring(ring, q_vector->tx)
+		adapter->tx_ring[ring->queue_index] = NULL;
 
-	for (vector = 0; vector < v_budget; vector++)
-		adapter->msix_entries[vector].entry = vector;
+	ixgbevf_for_each_ring(ring, q_vector->rx)
+		adapter->rx_ring[ring->queue_index] = NULL;
 
-	err = ixgbevf_acquire_msix_vectors(adapter, v_budget);
-	if (err)
-		return err;
+	adapter->q_vector[v_idx] = NULL;
+	netif_napi_del(&q_vector->napi);
 
-	err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues);
-	if (err)
-		return err;
-
-	return netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues);
+	/* ixgbevf_get_stats() might access the rings on this vector,
+	 * we must wait a grace period before freeing it.
+	 */
+	kfree_rcu(q_vector, rcu);
 }
 
 /**
@@ -2515,35 +2580,53 @@ static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter)
  **/
 static int ixgbevf_alloc_q_vectors(struct ixgbevf_adapter *adapter)
 {
-	int q_idx, num_q_vectors;
-	struct ixgbevf_q_vector *q_vector;
+	int q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
+	int rxr_remaining = adapter->num_rx_queues;
+	int txr_remaining = adapter->num_tx_queues;
+	int rxr_idx = 0, txr_idx = 0, v_idx = 0;
+	int err;
+
+	if (q_vectors >= (rxr_remaining + txr_remaining)) {
+		for (; rxr_remaining; v_idx++, q_vectors--) {
+			int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors);
+
+			err = ixgbevf_alloc_q_vector(adapter, v_idx,
+						     0, 0, rqpv, rxr_idx);
+			if (err)
+				goto err_out;
+
+			/* update counts and index */
+			rxr_remaining -= rqpv;
+			rxr_idx += rqpv;
+		}
+	}
+
+	for (; q_vectors; v_idx++, q_vectors--) {
+		int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors);
+		int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors);
 
-	num_q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
+		err = ixgbevf_alloc_q_vector(adapter, v_idx,
+					     tqpv, txr_idx,
+					     rqpv, rxr_idx);
 
-	for (q_idx = 0; q_idx < num_q_vectors; q_idx++) {
-		q_vector = kzalloc(sizeof(struct ixgbevf_q_vector), GFP_KERNEL);
-		if (!q_vector)
+		if (err)
 			goto err_out;
-		q_vector->adapter = adapter;
-		q_vector->v_idx = q_idx;
-		netif_napi_add(adapter->netdev, &q_vector->napi,
-			       ixgbevf_poll, 64);
-		adapter->q_vector[q_idx] = q_vector;
+
+		/* update counts and index */
+		rxr_remaining -= rqpv;
+		rxr_idx += rqpv;
+		txr_remaining -= tqpv;
+		txr_idx += tqpv;
 	}
 
 	return 0;
 
 err_out:
-	while (q_idx) {
-		q_idx--;
-		q_vector = adapter->q_vector[q_idx];
-#ifdef CONFIG_NET_RX_BUSY_POLL
-		napi_hash_del(&q_vector->napi);
-#endif
-		netif_napi_del(&q_vector->napi);
-		kfree(q_vector);
-		adapter->q_vector[q_idx] = NULL;
+	while (v_idx) {
+		v_idx--;
+		ixgbevf_free_q_vector(adapter, v_idx);
 	}
+
 	return -ENOMEM;
 }
 
@@ -2557,17 +2640,11 @@ err_out:
  **/
 static void ixgbevf_free_q_vectors(struct ixgbevf_adapter *adapter)
 {
-	int q_idx, num_q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
-
-	for (q_idx = 0; q_idx < num_q_vectors; q_idx++) {
-		struct ixgbevf_q_vector *q_vector = adapter->q_vector[q_idx];
+	int q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
 
-		adapter->q_vector[q_idx] = NULL;
-#ifdef CONFIG_NET_RX_BUSY_POLL
-		napi_hash_del(&q_vector->napi);
-#endif
-		netif_napi_del(&q_vector->napi);
-		kfree(q_vector);
+	while (q_vectors) {
+		q_vectors--;
+		ixgbevf_free_q_vector(adapter, q_vectors);
 	}
 }
 
@@ -2611,12 +2688,6 @@ static int ixgbevf_init_interrupt_scheme(struct ixgbevf_adapter *adapter)
 		goto err_alloc_q_vectors;
 	}
 
-	err = ixgbevf_alloc_queues(adapter);
-	if (err) {
-		pr_err("Unable to allocate memory for queues\n");
-		goto err_alloc_queues;
-	}
-
 	hw_dbg(&adapter->hw, "Multiqueue %s: Rx Queue count = %u, Tx Queue count = %u\n",
 	       (adapter->num_rx_queues > 1) ? "Enabled" :
 	       "Disabled", adapter->num_rx_queues, adapter->num_tx_queues);
@@ -2624,8 +2695,6 @@ static int ixgbevf_init_interrupt_scheme(struct ixgbevf_adapter *adapter)
 	set_bit(__IXGBEVF_DOWN, &adapter->state);
 
 	return 0;
-err_alloc_queues:
-	ixgbevf_free_q_vectors(adapter);
 err_alloc_q_vectors:
 	ixgbevf_reset_interrupt_capability(adapter);
 err_set_interrupt:
@@ -2641,17 +2710,6 @@ err_set_interrupt:
  **/
 static void ixgbevf_clear_interrupt_scheme(struct ixgbevf_adapter *adapter)
 {
-	int i;
-
-	for (i = 0; i < adapter->num_tx_queues; i++) {
-		kfree(adapter->tx_ring[i]);
-		adapter->tx_ring[i] = NULL;
-	}
-	for (i = 0; i < adapter->num_rx_queues; i++) {
-		kfree(adapter->rx_ring[i]);
-		adapter->rx_ring[i] = NULL;
-	}
-
 	adapter->num_tx_queues = 0;
 	adapter->num_rx_queues = 0;
 
@@ -3088,9 +3146,14 @@ static int ixgbevf_setup_all_tx_resources(struct ixgbevf_adapter *adapter)
 		if (!err)
 			continue;
 		hw_dbg(&adapter->hw, "Allocation for Tx Queue %u failed\n", i);
-		break;
+		goto err_setup_tx;
 	}
 
+	return 0;
+err_setup_tx:
+	/* rewind the index freeing the rings as we go */
+	while (i--)
+		ixgbevf_free_tx_resources(adapter->tx_ring[i]);
 	return err;
 }
 
@@ -3148,8 +3211,14 @@ static int ixgbevf_setup_all_rx_resources(struct ixgbevf_adapter *adapter)
 		if (!err)
 			continue;
 		hw_dbg(&adapter->hw, "Allocation for Rx Queue %u failed\n", i);
-		break;
+		goto err_setup_rx;
 	}
+
+	return 0;
+err_setup_rx:
+	/* rewind the index freeing the rings as we go */
+	while (i--)
+		ixgbevf_free_rx_resources(adapter->rx_ring[i]);
 	return err;
 }
 
@@ -3244,28 +3313,31 @@ int ixgbevf_open(struct net_device *netdev)
 
 	ixgbevf_configure(adapter);
 
-	/* Map the Tx/Rx rings to the vectors we were allotted.
-	 * if request_irq will be called in this function map_rings
-	 * must be called *before* up_complete
-	 */
-	ixgbevf_map_rings_to_vectors(adapter);
-
 	err = ixgbevf_request_irq(adapter);
 	if (err)
 		goto err_req_irq;
 
+	/* Notify the stack of the actual queue counts. */
+	err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues);
+	if (err)
+		goto err_set_queues;
+
+	err = netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues);
+	if (err)
+		goto err_set_queues;
+
 	ixgbevf_up_complete(adapter);
 
 	return 0;
 
+err_set_queues:
+	ixgbevf_free_irq(adapter);
 err_req_irq:
-	ixgbevf_down(adapter);
-err_setup_rx:
 	ixgbevf_free_all_rx_resources(adapter);
-err_setup_tx:
+err_setup_rx:
 	ixgbevf_free_all_tx_resources(adapter);
+err_setup_tx:
 	ixgbevf_reset(adapter);
-
 err_setup_reset:
 
 	return err;
@@ -3707,11 +3779,10 @@ static int ixgbevf_maybe_stop_tx(struct ixgbevf_ring *tx_ring, int size)
 	return __ixgbevf_maybe_stop_tx(tx_ring, size);
 }
 
-static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+static int ixgbevf_xmit_frame_ring(struct sk_buff *skb,
+				   struct ixgbevf_ring *tx_ring)
 {
-	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
 	struct ixgbevf_tx_buffer *first;
-	struct ixgbevf_ring *tx_ring;
 	int tso;
 	u32 tx_flags = 0;
 	u16 count = TXD_USE_COUNT(skb_headlen(skb));
@@ -3726,8 +3797,6 @@ static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 		return NETDEV_TX_OK;
 	}
 
-	tx_ring = adapter->tx_ring[skb->queue_mapping];
-
 	/* need: 1 descriptor per page * PAGE_SIZE/IXGBE_MAX_DATA_PER_TXD,
 	 *       + 1 desc for skb_headlen/IXGBE_MAX_DATA_PER_TXD,
 	 *       + 2 desc gap to keep tail from touching head,
@@ -3780,6 +3849,29 @@ out_drop:
 	return NETDEV_TX_OK;
 }
 
+static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+	struct ixgbevf_ring *tx_ring;
+
+	if (skb->len <= 0) {
+		dev_kfree_skb_any(skb);
+		return NETDEV_TX_OK;
+	}
+
+	/* The minimum packet size for olinfo paylen is 17 so pad the skb
+	 * in order to meet this minimum size requirement.
+	 */
+	if (skb->len < 17) {
+		if (skb_padto(skb, 17))
+			return NETDEV_TX_OK;
+		skb->len = 17;
+	}
+
+	tx_ring = adapter->tx_ring[skb->queue_mapping];
+	return ixgbevf_xmit_frame_ring(skb, tx_ring);
+}
+
 /**
  * ixgbevf_set_mac - Change the Ethernet Address of the NIC
  * @netdev: network interface device structure
@@ -3839,6 +3931,9 @@ static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu)
 	/* must set new MTU before calling down or up */
 	netdev->mtu = new_mtu;
 
+	if (netif_running(netdev))
+		ixgbevf_reinit_locked(adapter);
+
 	return 0;
 }
 
@@ -3917,17 +4012,11 @@ static int ixgbevf_resume(struct pci_dev *pdev)
 
 	rtnl_lock();
 	err = ixgbevf_init_interrupt_scheme(adapter);
+	if (!err && netif_running(netdev))
+		err = ixgbevf_open(netdev);
 	rtnl_unlock();
-	if (err) {
-		dev_err(&pdev->dev, "Cannot initialize interrupts\n");
+	if (err)
 		return err;
-	}
-
-	if (netif_running(netdev)) {
-		err = ixgbevf_open(netdev);
-		if (err)
-			return err;
-	}
 
 	netif_device_attach(netdev);
 
@@ -3953,6 +4042,7 @@ static void ixgbevf_get_stats(struct net_device *netdev,
 
 	stats->multicast = adapter->stats.vfmprc - adapter->stats.base_vfmprc;
 
+	rcu_read_lock();
 	for (i = 0; i < adapter->num_rx_queues; i++) {
 		ring = adapter->rx_ring[i];
 		do {
@@ -3974,6 +4064,7 @@ static void ixgbevf_get_stats(struct net_device *netdev,
 		stats->tx_bytes += bytes;
 		stats->tx_packets += packets;
 	}
+	rcu_read_unlock();
 }
 
 #define IXGBEVF_MAX_MAC_HDR_LEN		127
diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index 5a1668cdb461..f8bc3d4a39ff 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -44,6 +44,7 @@
 #define MVPP2_RX_ATTR_FIFO_SIZE_REG(port)	(0x20 + 4 * (port))
 #define MVPP2_RX_MIN_PKT_SIZE_REG		0x60
 #define MVPP2_RX_FIFO_INIT_REG			0x64
+#define MVPP22_TX_FIFO_THRESH_REG(port)		(0x8840 + 4 * (port))
 #define MVPP22_TX_FIFO_SIZE_REG(port)		(0x8860 + 4 * (port))
 
 /* RX DMA Top Registers */
@@ -65,6 +66,10 @@
 #define     MVPP2_RXQ_PACKET_OFFSET_MASK	0x70000000
 #define     MVPP2_RXQ_DISABLE_MASK		BIT(31)
 
+/* Top Registers */
+#define MVPP2_MH_REG(port)			(0x5040 + 4 * (port))
+#define MVPP2_DSA_EXTENDED			BIT(5)
+
 /* Parser Registers */
 #define MVPP2_PRS_INIT_LOOKUP_REG		0x1000
 #define     MVPP2_PRS_PORT_LU_MAX		0xf
@@ -254,6 +259,7 @@
 #define MVPP2_BM_BPPI_READ_PTR_REG(pool)	(0x6100 + ((pool) * 4))
 #define MVPP2_BM_BPPI_PTRS_NUM_REG(pool)	(0x6140 + ((pool) * 4))
 #define     MVPP2_BM_BPPI_PTR_NUM_MASK		0x7ff
+#define MVPP22_BM_POOL_PTRS_NUM_MASK		0xfff8
 #define     MVPP2_BM_BPPI_PREFETCH_FULL_MASK	BIT(16)
 #define MVPP2_BM_POOL_CTRL_REG(pool)		(0x6200 + ((pool) * 4))
 #define     MVPP2_BM_START_MASK			BIT(0)
@@ -473,6 +479,7 @@
 #define MVPP2_ETH_TYPE_LEN		2
 #define MVPP2_PPPOE_HDR_SIZE		8
 #define MVPP2_VLAN_TAG_LEN		4
+#define MVPP2_VLAN_TAG_EDSA_LEN		8
 
 /* Lbtd 802.3 type */
 #define MVPP2_IP_LBDT_TYPE		0xfffa
@@ -536,6 +543,11 @@
 /* TX FIFO constants */
 #define MVPP22_TX_FIFO_DATA_SIZE_10KB		0xa
 #define MVPP22_TX_FIFO_DATA_SIZE_3KB		0x3
+#define MVPP2_TX_FIFO_THRESHOLD_MIN		256
+#define MVPP2_TX_FIFO_THRESHOLD_10KB	\
+	(MVPP22_TX_FIFO_DATA_SIZE_10KB * 1024 - MVPP2_TX_FIFO_THRESHOLD_MIN)
+#define MVPP2_TX_FIFO_THRESHOLD_3KB	\
+	(MVPP22_TX_FIFO_DATA_SIZE_3KB * 1024 - MVPP2_TX_FIFO_THRESHOLD_MIN)
 
 /* RX buffer constants */
 #define MVPP2_SKB_SHINFO_SIZE \
@@ -589,6 +601,9 @@ enum mvpp2_tag_type {
 #define MVPP2_PRS_TCAM_PROTO_MASK	0xff
 #define MVPP2_PRS_TCAM_PROTO_MASK_L	0x3f
 #define MVPP2_PRS_DBL_VLANS_MAX		100
+#define MVPP2_PRS_CAST_MASK		BIT(0)
+#define MVPP2_PRS_MCAST_VAL		BIT(0)
+#define MVPP2_PRS_UCAST_VAL		0x0
 
 /* Tcam structure:
  * - lookup ID - 4 bits
@@ -609,35 +624,81 @@ enum mvpp2_tag_type {
 #define MVPP2_PRS_TCAM_LU_BYTE			20
 #define MVPP2_PRS_TCAM_EN_OFFS(offs)		((offs) + 2)
 #define MVPP2_PRS_TCAM_INV_WORD			5
+
+#define MVPP2_PRS_VID_TCAM_BYTE         2
+
+/* TCAM range for unicast and multicast filtering. We have 25 entries per port,
+ * with 4 dedicated to UC filtering and the rest to multicast filtering.
+ * Additionnally we reserve one entry for the broadcast address, and one for
+ * each port's own address.
+ */
+#define MVPP2_PRS_MAC_UC_MC_FILT_MAX	25
+#define MVPP2_PRS_MAC_RANGE_SIZE	80
+
+/* Number of entries per port dedicated to UC and MC filtering */
+#define MVPP2_PRS_MAC_UC_FILT_MAX	4
+#define MVPP2_PRS_MAC_MC_FILT_MAX	(MVPP2_PRS_MAC_UC_MC_FILT_MAX - \
+					 MVPP2_PRS_MAC_UC_FILT_MAX)
+
+/* There is a TCAM range reserved for VLAN filtering entries, range size is 33
+ * 10 VLAN ID filter entries per port
+ * 1 default VLAN filter entry per port
+ * It is assumed that there are 3 ports for filter, not including loopback port
+ */
+#define MVPP2_PRS_VLAN_FILT_MAX		11
+#define MVPP2_PRS_VLAN_FILT_RANGE_SIZE	33
+
+#define MVPP2_PRS_VLAN_FILT_MAX_ENTRY   (MVPP2_PRS_VLAN_FILT_MAX - 2)
+#define MVPP2_PRS_VLAN_FILT_DFLT_ENTRY  (MVPP2_PRS_VLAN_FILT_MAX - 1)
+
 /* Tcam entries ID */
 #define MVPP2_PE_DROP_ALL		0
 #define MVPP2_PE_FIRST_FREE_TID		1
-#define MVPP2_PE_LAST_FREE_TID		(MVPP2_PRS_TCAM_SRAM_SIZE - 31)
+
+/* MAC filtering range */
+#define MVPP2_PE_MAC_RANGE_END		(MVPP2_PE_VID_FILT_RANGE_START - 1)
+#define MVPP2_PE_MAC_RANGE_START	(MVPP2_PE_MAC_RANGE_END - \
+						MVPP2_PRS_MAC_RANGE_SIZE + 1)
+/* VLAN filtering range */
+#define MVPP2_PE_VID_FILT_RANGE_END     (MVPP2_PRS_TCAM_SRAM_SIZE - 31)
+#define MVPP2_PE_VID_FILT_RANGE_START   (MVPP2_PE_VID_FILT_RANGE_END - \
+					 MVPP2_PRS_VLAN_FILT_RANGE_SIZE + 1)
+#define MVPP2_PE_LAST_FREE_TID          (MVPP2_PE_VID_FILT_RANGE_START - 1)
 #define MVPP2_PE_IP6_EXT_PROTO_UN	(MVPP2_PRS_TCAM_SRAM_SIZE - 30)
-#define MVPP2_PE_MAC_MC_IP6		(MVPP2_PRS_TCAM_SRAM_SIZE - 29)
-#define MVPP2_PE_IP6_ADDR_UN		(MVPP2_PRS_TCAM_SRAM_SIZE - 28)
-#define MVPP2_PE_IP4_ADDR_UN		(MVPP2_PRS_TCAM_SRAM_SIZE - 27)
-#define MVPP2_PE_LAST_DEFAULT_FLOW	(MVPP2_PRS_TCAM_SRAM_SIZE - 26)
-#define MVPP2_PE_FIRST_DEFAULT_FLOW	(MVPP2_PRS_TCAM_SRAM_SIZE - 19)
-#define MVPP2_PE_EDSA_TAGGED		(MVPP2_PRS_TCAM_SRAM_SIZE - 18)
-#define MVPP2_PE_EDSA_UNTAGGED		(MVPP2_PRS_TCAM_SRAM_SIZE - 17)
-#define MVPP2_PE_DSA_TAGGED		(MVPP2_PRS_TCAM_SRAM_SIZE - 16)
-#define MVPP2_PE_DSA_UNTAGGED		(MVPP2_PRS_TCAM_SRAM_SIZE - 15)
-#define MVPP2_PE_ETYPE_EDSA_TAGGED	(MVPP2_PRS_TCAM_SRAM_SIZE - 14)
-#define MVPP2_PE_ETYPE_EDSA_UNTAGGED	(MVPP2_PRS_TCAM_SRAM_SIZE - 13)
-#define MVPP2_PE_ETYPE_DSA_TAGGED	(MVPP2_PRS_TCAM_SRAM_SIZE - 12)
-#define MVPP2_PE_ETYPE_DSA_UNTAGGED	(MVPP2_PRS_TCAM_SRAM_SIZE - 11)
-#define MVPP2_PE_MH_DEFAULT		(MVPP2_PRS_TCAM_SRAM_SIZE - 10)
-#define MVPP2_PE_DSA_DEFAULT		(MVPP2_PRS_TCAM_SRAM_SIZE - 9)
-#define MVPP2_PE_IP6_PROTO_UN		(MVPP2_PRS_TCAM_SRAM_SIZE - 8)
-#define MVPP2_PE_IP4_PROTO_UN		(MVPP2_PRS_TCAM_SRAM_SIZE - 7)
-#define MVPP2_PE_ETH_TYPE_UN		(MVPP2_PRS_TCAM_SRAM_SIZE - 6)
-#define MVPP2_PE_VLAN_DBL		(MVPP2_PRS_TCAM_SRAM_SIZE - 5)
-#define MVPP2_PE_VLAN_NONE		(MVPP2_PRS_TCAM_SRAM_SIZE - 4)
-#define MVPP2_PE_MAC_MC_ALL		(MVPP2_PRS_TCAM_SRAM_SIZE - 3)
-#define MVPP2_PE_MAC_PROMISCUOUS	(MVPP2_PRS_TCAM_SRAM_SIZE - 2)
+#define MVPP2_PE_IP6_ADDR_UN		(MVPP2_PRS_TCAM_SRAM_SIZE - 29)
+#define MVPP2_PE_IP4_ADDR_UN		(MVPP2_PRS_TCAM_SRAM_SIZE - 28)
+#define MVPP2_PE_LAST_DEFAULT_FLOW	(MVPP2_PRS_TCAM_SRAM_SIZE - 27)
+#define MVPP2_PE_FIRST_DEFAULT_FLOW	(MVPP2_PRS_TCAM_SRAM_SIZE - 22)
+#define MVPP2_PE_EDSA_TAGGED		(MVPP2_PRS_TCAM_SRAM_SIZE - 21)
+#define MVPP2_PE_EDSA_UNTAGGED		(MVPP2_PRS_TCAM_SRAM_SIZE - 20)
+#define MVPP2_PE_DSA_TAGGED		(MVPP2_PRS_TCAM_SRAM_SIZE - 19)
+#define MVPP2_PE_DSA_UNTAGGED		(MVPP2_PRS_TCAM_SRAM_SIZE - 18)
+#define MVPP2_PE_ETYPE_EDSA_TAGGED	(MVPP2_PRS_TCAM_SRAM_SIZE - 17)
+#define MVPP2_PE_ETYPE_EDSA_UNTAGGED	(MVPP2_PRS_TCAM_SRAM_SIZE - 16)
+#define MVPP2_PE_ETYPE_DSA_TAGGED	(MVPP2_PRS_TCAM_SRAM_SIZE - 15)
+#define MVPP2_PE_ETYPE_DSA_UNTAGGED	(MVPP2_PRS_TCAM_SRAM_SIZE - 14)
+#define MVPP2_PE_MH_DEFAULT		(MVPP2_PRS_TCAM_SRAM_SIZE - 13)
+#define MVPP2_PE_DSA_DEFAULT		(MVPP2_PRS_TCAM_SRAM_SIZE - 12)
+#define MVPP2_PE_IP6_PROTO_UN		(MVPP2_PRS_TCAM_SRAM_SIZE - 11)
+#define MVPP2_PE_IP4_PROTO_UN		(MVPP2_PRS_TCAM_SRAM_SIZE - 10)
+#define MVPP2_PE_ETH_TYPE_UN		(MVPP2_PRS_TCAM_SRAM_SIZE - 9)
+#define MVPP2_PE_VID_FLTR_DEFAULT	(MVPP2_PRS_TCAM_SRAM_SIZE - 8)
+#define MVPP2_PE_VID_EDSA_FLTR_DEFAULT	(MVPP2_PRS_TCAM_SRAM_SIZE - 7)
+#define MVPP2_PE_VLAN_DBL		(MVPP2_PRS_TCAM_SRAM_SIZE - 6)
+#define MVPP2_PE_VLAN_NONE		(MVPP2_PRS_TCAM_SRAM_SIZE - 5)
+/* reserved */
+#define MVPP2_PE_MAC_MC_PROMISCUOUS	(MVPP2_PRS_TCAM_SRAM_SIZE - 3)
+#define MVPP2_PE_MAC_UC_PROMISCUOUS	(MVPP2_PRS_TCAM_SRAM_SIZE - 2)
 #define MVPP2_PE_MAC_NON_PROMISCUOUS	(MVPP2_PRS_TCAM_SRAM_SIZE - 1)
 
+#define MVPP2_PRS_VID_PORT_FIRST(port)	(MVPP2_PE_VID_FILT_RANGE_START + \
+					 ((port) * MVPP2_PRS_VLAN_FILT_MAX))
+#define MVPP2_PRS_VID_PORT_LAST(port)	(MVPP2_PRS_VID_PORT_FIRST(port) \
+					 + MVPP2_PRS_VLAN_FILT_MAX_ENTRY)
+/* Index of default vid filter for given port */
+#define MVPP2_PRS_VID_PORT_DFLT(port)	(MVPP2_PRS_VID_PORT_FIRST(port) \
+					 + MVPP2_PRS_VLAN_FILT_DFLT_ENTRY)
+
 /* Sram structure
  * The fields are represented by MVPP2_PRS_TCAM_DATA_REG(3)->(0).
  */
@@ -725,6 +786,7 @@ enum mvpp2_tag_type {
 #define MVPP2_PRS_IPV6_EXT_AH_L4_AI_BIT		BIT(4)
 #define MVPP2_PRS_SINGLE_VLAN_AI		0
 #define MVPP2_PRS_DBL_VLAN_AI_BIT		BIT(7)
+#define MVPP2_PRS_EDSA_VID_AI_BIT		BIT(0)
 
 /* DSA/EDSA type */
 #define MVPP2_PRS_TAGGED		true
@@ -747,6 +809,7 @@ enum mvpp2_prs_lookup {
 	MVPP2_PRS_LU_MAC,
 	MVPP2_PRS_LU_DSA,
 	MVPP2_PRS_LU_VLAN,
+	MVPP2_PRS_LU_VID,
 	MVPP2_PRS_LU_L2,
 	MVPP2_PRS_LU_PPPOE,
 	MVPP2_PRS_LU_IP4,
@@ -755,6 +818,12 @@ enum mvpp2_prs_lookup {
 	MVPP2_PRS_LU_LAST,
 };
 
+/* L2 cast enum */
+enum mvpp2_prs_l2_cast {
+	MVPP2_PRS_L2_UNI_CAST,
+	MVPP2_PRS_L2_MULTI_CAST,
+};
+
 /* L3 cast enum */
 enum mvpp2_prs_l3_cast {
 	MVPP2_PRS_L3_UNI_CAST,
@@ -772,23 +841,26 @@ enum mvpp2_prs_l3_cast {
 #define MVPP22_RSS_TABLE_ENTRIES	32
 
 /* BM constants */
-#define MVPP2_BM_POOLS_NUM		8
+#define MVPP2_BM_JUMBO_BUF_NUM		512
 #define MVPP2_BM_LONG_BUF_NUM		1024
 #define MVPP2_BM_SHORT_BUF_NUM		2048
 #define MVPP2_BM_POOL_SIZE_MAX		(16*1024 - MVPP2_BM_POOL_PTR_ALIGN/4)
 #define MVPP2_BM_POOL_PTR_ALIGN		128
-#define MVPP2_BM_SWF_LONG_POOL(port)	((port > 2) ? 2 : port)
-#define MVPP2_BM_SWF_SHORT_POOL		3
 
 /* BM cookie (32 bits) definition */
 #define MVPP2_BM_COOKIE_POOL_OFFS	8
 #define MVPP2_BM_COOKIE_CPU_OFFS	24
 
+#define MVPP2_BM_SHORT_FRAME_SIZE		512
+#define MVPP2_BM_LONG_FRAME_SIZE		2048
+#define MVPP2_BM_JUMBO_FRAME_SIZE		10240
 /* BM short pool packet size
  * These value assure that for SWF the total number
  * of bytes allocated for each buffer will be 512
  */
-#define MVPP2_BM_SHORT_PKT_SIZE		MVPP2_RX_MAX_PKT_SIZE(512)
+#define MVPP2_BM_SHORT_PKT_SIZE	MVPP2_RX_MAX_PKT_SIZE(MVPP2_BM_SHORT_FRAME_SIZE)
+#define MVPP2_BM_LONG_PKT_SIZE	MVPP2_RX_MAX_PKT_SIZE(MVPP2_BM_LONG_FRAME_SIZE)
+#define MVPP2_BM_JUMBO_PKT_SIZE	MVPP2_RX_MAX_PKT_SIZE(MVPP2_BM_JUMBO_FRAME_SIZE)
 
 #define MVPP21_ADDR_SPACE_SZ		0
 #define MVPP22_ADDR_SPACE_SZ		SZ_64K
@@ -796,12 +868,18 @@ enum mvpp2_prs_l3_cast {
 #define MVPP2_MAX_THREADS		8
 #define MVPP2_MAX_QVECS			MVPP2_MAX_THREADS
 
-enum mvpp2_bm_type {
-	MVPP2_BM_FREE,
-	MVPP2_BM_SWF_LONG,
-	MVPP2_BM_SWF_SHORT
+enum mvpp2_bm_pool_log_num {
+	MVPP2_BM_SHORT,
+	MVPP2_BM_LONG,
+	MVPP2_BM_JUMBO,
+	MVPP2_BM_POOLS_NUM
 };
 
+static struct {
+	int pkt_size;
+	int buf_num;
+} mvpp2_pools[MVPP2_BM_POOLS_NUM];
+
 /* GMAC MIB Counters register definitions */
 #define MVPP21_MIB_COUNTERS_OFFSET		0x1000
 #define MVPP21_MIB_COUNTERS_PORT_SZ		0x400
@@ -1230,7 +1308,6 @@ struct mvpp2_cls_lookup_entry {
 struct mvpp2_bm_pool {
 	/* Pool number in the range 0-7 */
 	int id;
-	enum mvpp2_bm_type type;
 
 	/* Buffer Pointers Pool External (BPPE) size */
 	int size;
@@ -1662,6 +1739,14 @@ static void mvpp2_prs_match_etype(struct mvpp2_prs_entry *pe, int offset,
 	mvpp2_prs_tcam_data_byte_set(pe, offset + 1, ethertype & 0xff, 0xff);
 }
 
+/* Set vid in tcam sw entry */
+static void mvpp2_prs_match_vid(struct mvpp2_prs_entry *pe, int offset,
+				unsigned short vid)
+{
+	mvpp2_prs_tcam_data_byte_set(pe, offset + 0, (vid & 0xf00) >> 8, 0xf);
+	mvpp2_prs_tcam_data_byte_set(pe, offset + 1, vid & 0xff, 0xff);
+}
+
 /* Set bits in sram sw entry */
 static void mvpp2_prs_sram_bits_set(struct mvpp2_prs_entry *pe, int bit_num,
 				    int val)
@@ -1914,78 +1999,43 @@ static void mvpp2_prs_mac_drop_all_set(struct mvpp2 *priv, int port, bool add)
 	mvpp2_prs_hw_write(priv, &pe);
 }
 
-/* Set port to promiscuous mode */
-static void mvpp2_prs_mac_promisc_set(struct mvpp2 *priv, int port, bool add)
+/* Set port to unicast or multicast promiscuous mode */
+static void mvpp2_prs_mac_promisc_set(struct mvpp2 *priv, int port,
+				      enum mvpp2_prs_l2_cast l2_cast, bool add)
 {
 	struct mvpp2_prs_entry pe;
+	unsigned char cast_match;
+	unsigned int ri;
+	int tid;
 
-	/* Promiscuous mode - Accept unknown packets */
-
-	if (priv->prs_shadow[MVPP2_PE_MAC_PROMISCUOUS].valid) {
-		/* Entry exist - update port only */
-		pe.index = MVPP2_PE_MAC_PROMISCUOUS;
-		mvpp2_prs_hw_read(priv, &pe);
+	if (l2_cast == MVPP2_PRS_L2_UNI_CAST) {
+		cast_match = MVPP2_PRS_UCAST_VAL;
+		tid = MVPP2_PE_MAC_UC_PROMISCUOUS;
+		ri = MVPP2_PRS_RI_L2_UCAST;
 	} else {
-		/* Entry doesn't exist - create new */
-		memset(&pe, 0, sizeof(pe));
-		mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_MAC);
-		pe.index = MVPP2_PE_MAC_PROMISCUOUS;
-
-		/* Continue - set next lookup */
-		mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_DSA);
-
-		/* Set result info bits */
-		mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L2_UCAST,
-					 MVPP2_PRS_RI_L2_CAST_MASK);
-
-		/* Shift to ethertype */
-		mvpp2_prs_sram_shift_set(&pe, 2 * ETH_ALEN,
-					 MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
-
-		/* Mask all ports */
-		mvpp2_prs_tcam_port_map_set(&pe, 0);
-
-		/* Update shadow table */
-		mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_MAC);
+		cast_match = MVPP2_PRS_MCAST_VAL;
+		tid = MVPP2_PE_MAC_MC_PROMISCUOUS;
+		ri = MVPP2_PRS_RI_L2_MCAST;
 	}
 
-	/* Update port mask */
-	mvpp2_prs_tcam_port_set(&pe, port, add);
-
-	mvpp2_prs_hw_write(priv, &pe);
-}
-
-/* Accept multicast */
-static void mvpp2_prs_mac_multi_set(struct mvpp2 *priv, int port, int index,
-				    bool add)
-{
-	struct mvpp2_prs_entry pe;
-	unsigned char da_mc;
-
-	/* Ethernet multicast address first byte is
-	 * 0x01 for IPv4 and 0x33 for IPv6
-	 */
-	da_mc = (index == MVPP2_PE_MAC_MC_ALL) ? 0x01 : 0x33;
-
-	if (priv->prs_shadow[index].valid) {
-		/* Entry exist - update port only */
-		pe.index = index;
+	/* promiscuous mode - Accept unknown unicast or multicast packets */
+	if (priv->prs_shadow[tid].valid) {
+		pe.index = tid;
 		mvpp2_prs_hw_read(priv, &pe);
 	} else {
-		/* Entry doesn't exist - create new */
 		memset(&pe, 0, sizeof(pe));
 		mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_MAC);
-		pe.index = index;
+		pe.index = tid;
 
 		/* Continue - set next lookup */
 		mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_DSA);
 
 		/* Set result info bits */
-		mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L2_MCAST,
-					 MVPP2_PRS_RI_L2_CAST_MASK);
+		mvpp2_prs_sram_ri_update(&pe, ri, MVPP2_PRS_RI_L2_CAST_MASK);
 
-		/* Update tcam entry data first byte */
-		mvpp2_prs_tcam_data_byte_set(&pe, 0, da_mc, 0xff);
+		/* Match UC or MC addresses */
+		mvpp2_prs_tcam_data_byte_set(&pe, 0, cast_match,
+					     MVPP2_PRS_CAST_MASK);
 
 		/* Shift to ethertype */
 		mvpp2_prs_sram_shift_set(&pe, 2 * ETH_ALEN,
@@ -2029,24 +2079,30 @@ static void mvpp2_prs_dsa_tag_set(struct mvpp2 *priv, int port, bool add,
 		mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_DSA);
 		pe.index = tid;
 
-		/* Shift 4 bytes if DSA tag or 8 bytes in case of EDSA tag*/
-		mvpp2_prs_sram_shift_set(&pe, shift,
-					 MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
-
 		/* Update shadow table */
 		mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_DSA);
 
 		if (tagged) {
 			/* Set tagged bit in DSA tag */
 			mvpp2_prs_tcam_data_byte_set(&pe, 0,
-						     MVPP2_PRS_TCAM_DSA_TAGGED_BIT,
-						     MVPP2_PRS_TCAM_DSA_TAGGED_BIT);
-			/* Clear all ai bits for next iteration */
-			mvpp2_prs_sram_ai_update(&pe, 0,
-						 MVPP2_PRS_SRAM_AI_MASK);
-			/* If packet is tagged continue check vlans */
-			mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VLAN);
+					     MVPP2_PRS_TCAM_DSA_TAGGED_BIT,
+					     MVPP2_PRS_TCAM_DSA_TAGGED_BIT);
+
+			/* Set ai bits for next iteration */
+			if (extend)
+				mvpp2_prs_sram_ai_update(&pe, 1,
+							MVPP2_PRS_SRAM_AI_MASK);
+			else
+				mvpp2_prs_sram_ai_update(&pe, 0,
+							MVPP2_PRS_SRAM_AI_MASK);
+
+			/* If packet is tagged continue check vid filtering */
+			mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VID);
 		} else {
+			/* Shift 4 bytes for DSA tag or 8 bytes for EDSA tag*/
+			mvpp2_prs_sram_shift_set(&pe, shift,
+					MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
 			/* Set result info bits to 'no vlans' */
 			mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_VLAN_NONE,
 						 MVPP2_PRS_RI_VLAN_MASK);
@@ -2231,10 +2287,9 @@ static int mvpp2_prs_vlan_add(struct mvpp2 *priv, unsigned short tpid, int ai,
 
 		mvpp2_prs_match_etype(pe, 0, tpid);
 
-		mvpp2_prs_sram_next_lu_set(pe, MVPP2_PRS_LU_L2);
-		/* Shift 4 bytes - skip 1 vlan tag */
-		mvpp2_prs_sram_shift_set(pe, MVPP2_VLAN_TAG_LEN,
-					 MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+		/* VLAN tag detected, proceed with VID filtering */
+		mvpp2_prs_sram_next_lu_set(pe, MVPP2_PRS_LU_VID);
+
 		/* Clear all ai bits for next iteration */
 		mvpp2_prs_sram_ai_update(pe, 0, MVPP2_PRS_SRAM_AI_MASK);
 
@@ -2375,8 +2430,8 @@ static int mvpp2_prs_double_vlan_add(struct mvpp2 *priv, unsigned short tpid1,
 		mvpp2_prs_match_etype(pe, 4, tpid2);
 
 		mvpp2_prs_sram_next_lu_set(pe, MVPP2_PRS_LU_VLAN);
-		/* Shift 8 bytes - skip 2 vlan tags */
-		mvpp2_prs_sram_shift_set(pe, 2 * MVPP2_VLAN_TAG_LEN,
+		/* Shift 4 bytes - skip outer vlan tag */
+		mvpp2_prs_sram_shift_set(pe, MVPP2_VLAN_TAG_LEN,
 					 MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
 		mvpp2_prs_sram_ri_update(pe, MVPP2_PRS_RI_VLAN_DOUBLE,
 					 MVPP2_PRS_RI_VLAN_MASK);
@@ -2694,11 +2749,10 @@ static void mvpp2_prs_mac_init(struct mvpp2 *priv)
 	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_MAC);
 	mvpp2_prs_hw_write(priv, &pe);
 
-	/* place holders only - no ports */
+	/* Create dummy entries for drop all and promiscuous modes */
 	mvpp2_prs_mac_drop_all_set(priv, 0, false);
-	mvpp2_prs_mac_promisc_set(priv, 0, false);
-	mvpp2_prs_mac_multi_set(priv, 0, MVPP2_PE_MAC_MC_ALL, false);
-	mvpp2_prs_mac_multi_set(priv, 0, MVPP2_PE_MAC_MC_IP6, false);
+	mvpp2_prs_mac_promisc_set(priv, 0, MVPP2_PRS_L2_UNI_CAST, false);
+	mvpp2_prs_mac_promisc_set(priv, 0, MVPP2_PRS_L2_MULTI_CAST, false);
 }
 
 /* Set default entries for various types of dsa packets */
@@ -2755,6 +2809,62 @@ static void mvpp2_prs_dsa_init(struct mvpp2 *priv)
 	mvpp2_prs_hw_write(priv, &pe);
 }
 
+/* Initialize parser entries for VID filtering */
+static void mvpp2_prs_vid_init(struct mvpp2 *priv)
+{
+	struct mvpp2_prs_entry pe;
+
+	memset(&pe, 0, sizeof(pe));
+
+	/* Set default vid entry */
+	pe.index = MVPP2_PE_VID_FLTR_DEFAULT;
+	mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID);
+
+	mvpp2_prs_tcam_ai_update(&pe, 0, MVPP2_PRS_EDSA_VID_AI_BIT);
+
+	/* Skip VLAN header - Set offset to 4 bytes */
+	mvpp2_prs_sram_shift_set(&pe, MVPP2_VLAN_TAG_LEN,
+				 MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+	/* Clear all ai bits for next iteration */
+	mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+
+	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+
+	/* Unmask all ports */
+	mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK);
+
+	/* Update shadow table and hw entry */
+	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID);
+	mvpp2_prs_hw_write(priv, &pe);
+
+	/* Set default vid entry for extended DSA*/
+	memset(&pe, 0, sizeof(pe));
+
+	/* Set default vid entry */
+	pe.index = MVPP2_PE_VID_EDSA_FLTR_DEFAULT;
+	mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID);
+
+	mvpp2_prs_tcam_ai_update(&pe, MVPP2_PRS_EDSA_VID_AI_BIT,
+				 MVPP2_PRS_EDSA_VID_AI_BIT);
+
+	/* Skip VLAN header - Set offset to 8 bytes */
+	mvpp2_prs_sram_shift_set(&pe, MVPP2_VLAN_TAG_EDSA_LEN,
+				 MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+	/* Clear all ai bits for next iteration */
+	mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+
+	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+
+	/* Unmask all ports */
+	mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK);
+
+	/* Update shadow table and hw entry */
+	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID);
+	mvpp2_prs_hw_write(priv, &pe);
+}
+
 /* Match basic ethertypes */
 static int mvpp2_prs_etype_init(struct mvpp2 *priv)
 {
@@ -3023,7 +3133,8 @@ static int mvpp2_prs_vlan_init(struct platform_device *pdev, struct mvpp2 *priv)
 	mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VLAN);
 	pe.index = MVPP2_PE_VLAN_DBL;
 
-	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VID);
+
 	/* Clear ai for next iterations */
 	mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
 	mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_VLAN_DOUBLE,
@@ -3386,6 +3497,192 @@ static int mvpp2_prs_ip6_init(struct mvpp2 *priv)
 	return 0;
 }
 
+/* Find tcam entry with matched pair <vid,port> */
+static int mvpp2_prs_vid_range_find(struct mvpp2 *priv, int pmap, u16 vid,
+				    u16 mask)
+{
+	unsigned char byte[2], enable[2];
+	struct mvpp2_prs_entry pe;
+	u16 rvid, rmask;
+	int tid;
+
+	/* Go through the all entries with MVPP2_PRS_LU_VID */
+	for (tid = MVPP2_PE_VID_FILT_RANGE_START;
+	     tid <= MVPP2_PE_VID_FILT_RANGE_END; tid++) {
+		if (!priv->prs_shadow[tid].valid ||
+		    priv->prs_shadow[tid].lu != MVPP2_PRS_LU_VID)
+			continue;
+
+		pe.index = tid;
+
+		mvpp2_prs_hw_read(priv, &pe);
+		mvpp2_prs_tcam_data_byte_get(&pe, 2, &byte[0], &enable[0]);
+		mvpp2_prs_tcam_data_byte_get(&pe, 3, &byte[1], &enable[1]);
+
+		rvid = ((byte[0] & 0xf) << 8) + byte[1];
+		rmask = ((enable[0] & 0xf) << 8) + enable[1];
+
+		if (rvid != vid || rmask != mask)
+			continue;
+
+		return tid;
+	}
+
+	return 0;
+}
+
+/* Write parser entry for VID filtering */
+static int mvpp2_prs_vid_entry_add(struct mvpp2_port *port, u16 vid)
+{
+	unsigned int vid_start = MVPP2_PE_VID_FILT_RANGE_START +
+				 port->id * MVPP2_PRS_VLAN_FILT_MAX;
+	unsigned int mask = 0xfff, reg_val, shift;
+	struct mvpp2 *priv = port->priv;
+	struct mvpp2_prs_entry pe;
+	int tid;
+
+	/* Scan TCAM and see if entry with this <vid,port> already exist */
+	tid = mvpp2_prs_vid_range_find(priv, (1 << port->id), vid, mask);
+
+	reg_val = mvpp2_read(priv, MVPP2_MH_REG(port->id));
+	if (reg_val & MVPP2_DSA_EXTENDED)
+		shift = MVPP2_VLAN_TAG_EDSA_LEN;
+	else
+		shift = MVPP2_VLAN_TAG_LEN;
+
+	/* No such entry */
+	if (!tid) {
+		memset(&pe, 0, sizeof(pe));
+
+		/* Go through all entries from first to last in vlan range */
+		tid = mvpp2_prs_tcam_first_free(priv, vid_start,
+						vid_start +
+						MVPP2_PRS_VLAN_FILT_MAX_ENTRY);
+
+		/* There isn't room for a new VID filter */
+		if (tid < 0)
+			return tid;
+
+		mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID);
+		pe.index = tid;
+
+		/* Mask all ports */
+		mvpp2_prs_tcam_port_map_set(&pe, 0);
+	} else {
+		mvpp2_prs_hw_read(priv, &pe);
+	}
+
+	/* Enable the current port */
+	mvpp2_prs_tcam_port_set(&pe, port->id, true);
+
+	/* Continue - set next lookup */
+	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+
+	/* Skip VLAN header - Set offset to 4 or 8 bytes */
+	mvpp2_prs_sram_shift_set(&pe, shift, MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+	/* Set match on VID */
+	mvpp2_prs_match_vid(&pe, MVPP2_PRS_VID_TCAM_BYTE, vid);
+
+	/* Clear all ai bits for next iteration */
+	mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+
+	/* Update shadow table */
+	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID);
+	mvpp2_prs_hw_write(priv, &pe);
+
+	return 0;
+}
+
+/* Write parser entry for VID filtering */
+static void mvpp2_prs_vid_entry_remove(struct mvpp2_port *port, u16 vid)
+{
+	struct mvpp2 *priv = port->priv;
+	int tid;
+
+	/* Scan TCAM and see if entry with this <vid,port> already exist */
+	tid = mvpp2_prs_vid_range_find(priv, (1 << port->id), vid, 0xfff);
+
+	/* No such entry */
+	if (tid)
+		return;
+
+	mvpp2_prs_hw_inv(priv, tid);
+	priv->prs_shadow[tid].valid = false;
+}
+
+/* Remove all existing VID filters on this port */
+static void mvpp2_prs_vid_remove_all(struct mvpp2_port *port)
+{
+	struct mvpp2 *priv = port->priv;
+	int tid;
+
+	for (tid = MVPP2_PRS_VID_PORT_FIRST(port->id);
+	     tid <= MVPP2_PRS_VID_PORT_LAST(port->id); tid++) {
+		if (priv->prs_shadow[tid].valid)
+			mvpp2_prs_vid_entry_remove(port, tid);
+	}
+}
+
+/* Remove VID filering entry for this port */
+static void mvpp2_prs_vid_disable_filtering(struct mvpp2_port *port)
+{
+	unsigned int tid = MVPP2_PRS_VID_PORT_DFLT(port->id);
+	struct mvpp2 *priv = port->priv;
+
+	/* Invalidate the guard entry */
+	mvpp2_prs_hw_inv(priv, tid);
+
+	priv->prs_shadow[tid].valid = false;
+}
+
+/* Add guard entry that drops packets when no VID is matched on this port */
+static void mvpp2_prs_vid_enable_filtering(struct mvpp2_port *port)
+{
+	unsigned int tid = MVPP2_PRS_VID_PORT_DFLT(port->id);
+	struct mvpp2 *priv = port->priv;
+	unsigned int reg_val, shift;
+	struct mvpp2_prs_entry pe;
+
+	if (priv->prs_shadow[tid].valid)
+		return;
+
+	memset(&pe, 0, sizeof(pe));
+
+	pe.index = tid;
+
+	reg_val = mvpp2_read(priv, MVPP2_MH_REG(port->id));
+	if (reg_val & MVPP2_DSA_EXTENDED)
+		shift = MVPP2_VLAN_TAG_EDSA_LEN;
+	else
+		shift = MVPP2_VLAN_TAG_LEN;
+
+	mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID);
+
+	/* Mask all ports */
+	mvpp2_prs_tcam_port_map_set(&pe, 0);
+
+	/* Update port mask */
+	mvpp2_prs_tcam_port_set(&pe, port->id, true);
+
+	/* Continue - set next lookup */
+	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+
+	/* Skip VLAN header - Set offset to 4 or 8 bytes */
+	mvpp2_prs_sram_shift_set(&pe, shift, MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+	/* Drop VLAN packets that don't belong to any VIDs on this port */
+	mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_DROP_MASK,
+				 MVPP2_PRS_RI_DROP_MASK);
+
+	/* Clear all ai bits for next iteration */
+	mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+
+	/* Update shadow table */
+	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID);
+	mvpp2_prs_hw_write(priv, &pe);
+}
+
 /* Parser default initialization */
 static int mvpp2_prs_default_init(struct platform_device *pdev,
 				  struct mvpp2 *priv)
@@ -3429,6 +3726,8 @@ static int mvpp2_prs_default_init(struct platform_device *pdev,
 
 	mvpp2_prs_dsa_init(priv);
 
+	mvpp2_prs_vid_init(priv);
+
 	err = mvpp2_prs_etype_init(priv);
 	if (err)
 		return err;
@@ -3485,8 +3784,8 @@ mvpp2_prs_mac_da_range_find(struct mvpp2 *priv, int pmap, const u8 *da,
 	mvpp2_prs_tcam_lu_set(pe, MVPP2_PRS_LU_MAC);
 
 	/* Go through the all entires with MVPP2_PRS_LU_MAC */
-	for (tid = MVPP2_PE_FIRST_FREE_TID;
-	     tid <= MVPP2_PE_LAST_FREE_TID; tid++) {
+	for (tid = MVPP2_PE_MAC_RANGE_START;
+	     tid <= MVPP2_PE_MAC_RANGE_END; tid++) {
 		unsigned int entry_pmap;
 
 		if (!priv->prs_shadow[tid].valid ||
@@ -3508,16 +3807,17 @@ mvpp2_prs_mac_da_range_find(struct mvpp2 *priv, int pmap, const u8 *da,
 }
 
 /* Update parser's mac da entry */
-static int mvpp2_prs_mac_da_accept(struct mvpp2 *priv, int port,
-				   const u8 *da, bool add)
+static int mvpp2_prs_mac_da_accept(struct mvpp2_port *port, const u8 *da,
+				   bool add)
 {
-	struct mvpp2_prs_entry *pe;
-	unsigned int pmap, len, ri;
 	unsigned char mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+	struct mvpp2 *priv = port->priv;
+	unsigned int pmap, len, ri;
+	struct mvpp2_prs_entry *pe;
 	int tid;
 
 	/* Scan TCAM and see if entry with this <MAC DA, port> already exist */
-	pe = mvpp2_prs_mac_da_range_find(priv, (1 << port), da, mask,
+	pe = mvpp2_prs_mac_da_range_find(priv, BIT(port->id), da, mask,
 					 MVPP2_PRS_UDF_MAC_DEF);
 
 	/* No such entry */
@@ -3526,18 +3826,10 @@ static int mvpp2_prs_mac_da_accept(struct mvpp2 *priv, int port,
 			return 0;
 
 		/* Create new TCAM entry */
-		/* Find first range mac entry*/
-		for (tid = MVPP2_PE_FIRST_FREE_TID;
-		     tid <= MVPP2_PE_LAST_FREE_TID; tid++)
-			if (priv->prs_shadow[tid].valid &&
-			    (priv->prs_shadow[tid].lu == MVPP2_PRS_LU_MAC) &&
-			    (priv->prs_shadow[tid].udf ==
-						       MVPP2_PRS_UDF_MAC_RANGE))
-				break;
-
 		/* Go through the all entries from first to last */
-		tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID,
-						tid - 1);
+		tid = mvpp2_prs_tcam_first_free(priv,
+						MVPP2_PE_MAC_RANGE_START,
+						MVPP2_PE_MAC_RANGE_END);
 		if (tid < 0)
 			return tid;
 
@@ -3552,7 +3844,7 @@ static int mvpp2_prs_mac_da_accept(struct mvpp2 *priv, int port,
 	}
 
 	/* Update port mask */
-	mvpp2_prs_tcam_port_set(pe, port, add);
+	mvpp2_prs_tcam_port_set(pe, port->id, add);
 
 	/* Invalidate the entry if no ports are left enabled */
 	pmap = mvpp2_prs_tcam_port_map_get(pe);
@@ -3576,12 +3868,16 @@ static int mvpp2_prs_mac_da_accept(struct mvpp2 *priv, int port,
 		mvpp2_prs_tcam_data_byte_set(pe, len, da[len], 0xff);
 
 	/* Set result info bits */
-	if (is_broadcast_ether_addr(da))
+	if (is_broadcast_ether_addr(da)) {
 		ri = MVPP2_PRS_RI_L2_BCAST;
-	else if (is_multicast_ether_addr(da))
+	} else if (is_multicast_ether_addr(da)) {
 		ri = MVPP2_PRS_RI_L2_MCAST;
-	else
-		ri = MVPP2_PRS_RI_L2_UCAST | MVPP2_PRS_RI_MAC_ME_MASK;
+	} else {
+		ri = MVPP2_PRS_RI_L2_UCAST;
+
+		if (ether_addr_equal(da, port->dev->dev_addr))
+			ri |= MVPP2_PRS_RI_MAC_ME_MASK;
+	}
 
 	mvpp2_prs_sram_ri_update(pe, ri, MVPP2_PRS_RI_L2_CAST_MASK |
 				 MVPP2_PRS_RI_MAC_ME_MASK);
@@ -3608,13 +3904,12 @@ static int mvpp2_prs_update_mac_da(struct net_device *dev, const u8 *da)
 	int err;
 
 	/* Remove old parser entry */
-	err = mvpp2_prs_mac_da_accept(port->priv, port->id, dev->dev_addr,
-				      false);
+	err = mvpp2_prs_mac_da_accept(port, dev->dev_addr, false);
 	if (err)
 		return err;
 
 	/* Add new parser entry */
-	err = mvpp2_prs_mac_da_accept(port->priv, port->id, da, true);
+	err = mvpp2_prs_mac_da_accept(port, da, true);
 	if (err)
 		return err;
 
@@ -3624,14 +3919,15 @@ static int mvpp2_prs_update_mac_da(struct net_device *dev, const u8 *da)
 	return 0;
 }
 
-/* Delete all port's multicast simple (not range) entries */
-static void mvpp2_prs_mcast_del_all(struct mvpp2 *priv, int port)
+static void mvpp2_prs_mac_del_all(struct mvpp2_port *port)
 {
+	struct mvpp2 *priv = port->priv;
 	struct mvpp2_prs_entry pe;
+	unsigned long pmap;
 	int index, tid;
 
-	for (tid = MVPP2_PE_FIRST_FREE_TID;
-	     tid <= MVPP2_PE_LAST_FREE_TID; tid++) {
+	for (tid = MVPP2_PE_MAC_RANGE_START;
+	     tid <= MVPP2_PE_MAC_RANGE_END; tid++) {
 		unsigned char da[ETH_ALEN], da_mask[ETH_ALEN];
 
 		if (!priv->prs_shadow[tid].valid ||
@@ -3639,18 +3935,29 @@ static void mvpp2_prs_mcast_del_all(struct mvpp2 *priv, int port)
 		    (priv->prs_shadow[tid].udf != MVPP2_PRS_UDF_MAC_DEF))
 			continue;
 
-		/* Only simple mac entries */
 		pe.index = tid;
 		mvpp2_prs_hw_read(priv, &pe);
 
+		pmap = mvpp2_prs_tcam_port_map_get(&pe);
+
+		/* We only want entries active on this port */
+		if (!test_bit(port->id, &pmap))
+			continue;
+
 		/* Read mac addr from entry */
 		for (index = 0; index < ETH_ALEN; index++)
 			mvpp2_prs_tcam_data_byte_get(&pe, index, &da[index],
 						     &da_mask[index]);
 
-		if (is_multicast_ether_addr(da) && !is_broadcast_ether_addr(da))
-			/* Delete this entry */
-			mvpp2_prs_mac_da_accept(priv, port, da, false);
+		/* Special cases : Don't remove broadcast and port's own
+		 * address
+		 */
+		if (is_broadcast_ether_addr(da) ||
+		    ether_addr_equal(da, port->dev->dev_addr))
+			continue;
+
+		/* Remove entry from TCAM */
+		mvpp2_prs_mac_da_accept(port, da, false);
 	}
 }
 
@@ -3901,7 +4208,6 @@ static int mvpp2_bm_pool_create(struct platform_device *pdev,
 	val |= MVPP2_BM_START_MASK;
 	mvpp2_write(priv, MVPP2_BM_POOL_CTRL_REG(bm_pool->id), val);
 
-	bm_pool->type = MVPP2_BM_FREE;
 	bm_pool->size = size;
 	bm_pool->pkt_size = 0;
 	bm_pool->buf_num = 0;
@@ -3954,11 +4260,17 @@ static void mvpp2_bm_bufs_get_addrs(struct device *dev, struct mvpp2 *priv,
 
 /* Free all buffers from the pool */
 static void mvpp2_bm_bufs_free(struct device *dev, struct mvpp2 *priv,
-			       struct mvpp2_bm_pool *bm_pool)
+			       struct mvpp2_bm_pool *bm_pool, int buf_num)
 {
 	int i;
 
-	for (i = 0; i < bm_pool->buf_num; i++) {
+	if (buf_num > bm_pool->buf_num) {
+		WARN(1, "Pool does not have so many bufs pool(%d) bufs(%d)\n",
+		     bm_pool->id, buf_num);
+		buf_num = bm_pool->buf_num;
+	}
+
+	for (i = 0; i < buf_num; i++) {
 		dma_addr_t buf_dma_addr;
 		phys_addr_t buf_phys_addr;
 		void *data;
@@ -3980,16 +4292,39 @@ static void mvpp2_bm_bufs_free(struct device *dev, struct mvpp2 *priv,
 	bm_pool->buf_num -= i;
 }
 
+/* Check number of buffers in BM pool */
+static int mvpp2_check_hw_buf_num(struct mvpp2 *priv, struct mvpp2_bm_pool *bm_pool)
+{
+	int buf_num = 0;
+
+	buf_num += mvpp2_read(priv, MVPP2_BM_POOL_PTRS_NUM_REG(bm_pool->id)) &
+				    MVPP22_BM_POOL_PTRS_NUM_MASK;
+	buf_num += mvpp2_read(priv, MVPP2_BM_BPPI_PTRS_NUM_REG(bm_pool->id)) &
+				    MVPP2_BM_BPPI_PTR_NUM_MASK;
+
+	/* HW has one buffer ready which is not reflected in the counters */
+	if (buf_num)
+		buf_num += 1;
+
+	return buf_num;
+}
+
 /* Cleanup pool */
 static int mvpp2_bm_pool_destroy(struct platform_device *pdev,
 				 struct mvpp2 *priv,
 				 struct mvpp2_bm_pool *bm_pool)
 {
+	int buf_num;
 	u32 val;
 
-	mvpp2_bm_bufs_free(&pdev->dev, priv, bm_pool);
-	if (bm_pool->buf_num) {
-		WARN(1, "cannot free all buffers in pool %d\n", bm_pool->id);
+	buf_num = mvpp2_check_hw_buf_num(priv, bm_pool);
+	mvpp2_bm_bufs_free(&pdev->dev, priv, bm_pool, buf_num);
+
+	/* Check buffer counters after free */
+	buf_num = mvpp2_check_hw_buf_num(priv, bm_pool);
+	if (buf_num) {
+		WARN(1, "cannot free all buffers in pool %d, buf_num left %d\n",
+		     bm_pool->id, bm_pool->buf_num);
 		return 0;
 	}
 
@@ -4051,6 +4386,21 @@ static int mvpp2_bm_init(struct platform_device *pdev, struct mvpp2 *priv)
 	return 0;
 }
 
+static void mvpp2_setup_bm_pool(void)
+{
+	/* Short pool */
+	mvpp2_pools[MVPP2_BM_SHORT].buf_num  = MVPP2_BM_SHORT_BUF_NUM;
+	mvpp2_pools[MVPP2_BM_SHORT].pkt_size = MVPP2_BM_SHORT_PKT_SIZE;
+
+	/* Long pool */
+	mvpp2_pools[MVPP2_BM_LONG].buf_num  = MVPP2_BM_LONG_BUF_NUM;
+	mvpp2_pools[MVPP2_BM_LONG].pkt_size = MVPP2_BM_LONG_PKT_SIZE;
+
+	/* Jumbo pool */
+	mvpp2_pools[MVPP2_BM_JUMBO].buf_num  = MVPP2_BM_JUMBO_BUF_NUM;
+	mvpp2_pools[MVPP2_BM_JUMBO].pkt_size = MVPP2_BM_JUMBO_PKT_SIZE;
+}
+
 /* Attach long pool to rxq */
 static void mvpp2_rxq_long_pool_set(struct mvpp2_port *port,
 				    int lrxq, int long_pool)
@@ -4189,13 +4539,11 @@ static int mvpp2_bm_bufs_add(struct mvpp2_port *port,
 	bm_pool->buf_num += i;
 
 	netdev_dbg(port->dev,
-		   "%s pool %d: pkt_size=%4d, buf_size=%4d, total_size=%4d\n",
-		   bm_pool->type == MVPP2_BM_SWF_SHORT ? "short" : " long",
+		   "pool %d: pkt_size=%4d, buf_size=%4d, total_size=%4d\n",
 		   bm_pool->id, bm_pool->pkt_size, buf_size, total_size);
 
 	netdev_dbg(port->dev,
-		   "%s pool %d: %d of %d buffers added\n",
-		   bm_pool->type == MVPP2_BM_SWF_SHORT ? "short" : " long",
+		   "pool %d: %d of %d buffers added\n",
 		   bm_pool->id, i, buf_num);
 	return i;
 }
@@ -4204,25 +4552,20 @@ static int mvpp2_bm_bufs_add(struct mvpp2_port *port,
  * pool pointer on success
  */
 static struct mvpp2_bm_pool *
-mvpp2_bm_pool_use(struct mvpp2_port *port, int pool, enum mvpp2_bm_type type,
-		  int pkt_size)
+mvpp2_bm_pool_use(struct mvpp2_port *port, unsigned pool, int pkt_size)
 {
 	struct mvpp2_bm_pool *new_pool = &port->priv->bm_pools[pool];
 	int num;
 
-	if (new_pool->type != MVPP2_BM_FREE && new_pool->type != type) {
-		netdev_err(port->dev, "mixing pool types is forbidden\n");
+	if (pool >= MVPP2_BM_POOLS_NUM) {
+		netdev_err(port->dev, "Invalid pool %d\n", pool);
 		return NULL;
 	}
 
-	if (new_pool->type == MVPP2_BM_FREE)
-		new_pool->type = type;
-
 	/* Allocate buffers in case BM pool is used as long pool, but packet
 	 * size doesn't match MTU or BM pool hasn't being used yet
 	 */
-	if (((type == MVPP2_BM_SWF_LONG) && (pkt_size > new_pool->pkt_size)) ||
-	    (new_pool->pkt_size == 0)) {
+	if (new_pool->pkt_size == 0) {
 		int pkts_num;
 
 		/* Set default buffer number or free all the buffers in case
@@ -4230,12 +4573,10 @@ mvpp2_bm_pool_use(struct mvpp2_port *port, int pool, enum mvpp2_bm_type type,
 		 */
 		pkts_num = new_pool->buf_num;
 		if (pkts_num == 0)
-			pkts_num = type == MVPP2_BM_SWF_LONG ?
-				   MVPP2_BM_LONG_BUF_NUM :
-				   MVPP2_BM_SHORT_BUF_NUM;
+			pkts_num = mvpp2_pools[pool].buf_num;
 		else
 			mvpp2_bm_bufs_free(port->dev->dev.parent,
-					   port->priv, new_pool);
+					   port->priv, new_pool, pkts_num);
 
 		new_pool->pkt_size = pkt_size;
 		new_pool->frag_size =
@@ -4261,16 +4602,28 @@ mvpp2_bm_pool_use(struct mvpp2_port *port, int pool, enum mvpp2_bm_type type,
 static int mvpp2_swf_bm_pool_init(struct mvpp2_port *port)
 {
 	int rxq;
+	enum mvpp2_bm_pool_log_num long_log_pool, short_log_pool;
+
+	/* If port pkt_size is higher than 1518B:
+	 * HW Long pool - SW Jumbo pool, HW Short pool - SW Long pool
+	 * else: HW Long pool - SW Long pool, HW Short pool - SW Short pool
+	 */
+	if (port->pkt_size > MVPP2_BM_LONG_PKT_SIZE) {
+		long_log_pool = MVPP2_BM_JUMBO;
+		short_log_pool = MVPP2_BM_LONG;
+	} else {
+		long_log_pool = MVPP2_BM_LONG;
+		short_log_pool = MVPP2_BM_SHORT;
+	}
 
 	if (!port->pool_long) {
 		port->pool_long =
-		       mvpp2_bm_pool_use(port, MVPP2_BM_SWF_LONG_POOL(port->id),
-					 MVPP2_BM_SWF_LONG,
-					 port->pkt_size);
+			mvpp2_bm_pool_use(port, long_log_pool,
+					  mvpp2_pools[long_log_pool].pkt_size);
 		if (!port->pool_long)
 			return -ENOMEM;
 
-		port->pool_long->port_map |= (1 << port->id);
+		port->pool_long->port_map |= BIT(port->id);
 
 		for (rxq = 0; rxq < port->nrxqs; rxq++)
 			mvpp2_rxq_long_pool_set(port, rxq, port->pool_long->id);
@@ -4278,13 +4631,12 @@ static int mvpp2_swf_bm_pool_init(struct mvpp2_port *port)
 
 	if (!port->pool_short) {
 		port->pool_short =
-			mvpp2_bm_pool_use(port, MVPP2_BM_SWF_SHORT_POOL,
-					  MVPP2_BM_SWF_SHORT,
-					  MVPP2_BM_SHORT_PKT_SIZE);
+			mvpp2_bm_pool_use(port, short_log_pool,
+					  mvpp2_pools[short_log_pool].pkt_size);
 		if (!port->pool_short)
 			return -ENOMEM;
 
-		port->pool_short->port_map |= (1 << port->id);
+		port->pool_short->port_map |= BIT(port->id);
 
 		for (rxq = 0; rxq < port->nrxqs; rxq++)
 			mvpp2_rxq_short_pool_set(port, rxq,
@@ -4297,30 +4649,49 @@ static int mvpp2_swf_bm_pool_init(struct mvpp2_port *port)
 static int mvpp2_bm_update_mtu(struct net_device *dev, int mtu)
 {
 	struct mvpp2_port *port = netdev_priv(dev);
-	struct mvpp2_bm_pool *port_pool = port->pool_long;
-	int num, pkts_num = port_pool->buf_num;
+	enum mvpp2_bm_pool_log_num new_long_pool;
 	int pkt_size = MVPP2_RX_PKT_SIZE(mtu);
 
-	/* Update BM pool with new buffer size */
-	mvpp2_bm_bufs_free(dev->dev.parent, port->priv, port_pool);
-	if (port_pool->buf_num) {
-		WARN(1, "cannot free all buffers in pool %d\n", port_pool->id);
-		return -EIO;
-	}
-
-	port_pool->pkt_size = pkt_size;
-	port_pool->frag_size = SKB_DATA_ALIGN(MVPP2_RX_BUF_SIZE(pkt_size)) +
-		MVPP2_SKB_SHINFO_SIZE;
-	num = mvpp2_bm_bufs_add(port, port_pool, pkts_num);
-	if (num != pkts_num) {
-		WARN(1, "pool %d: %d of %d allocated\n",
-		     port_pool->id, num, pkts_num);
-		return -EIO;
+	/* If port MTU is higher than 1518B:
+	 * HW Long pool - SW Jumbo pool, HW Short pool - SW Long pool
+	 * else: HW Long pool - SW Long pool, HW Short pool - SW Short pool
+	 */
+	if (pkt_size > MVPP2_BM_LONG_PKT_SIZE)
+		new_long_pool = MVPP2_BM_JUMBO;
+	else
+		new_long_pool = MVPP2_BM_LONG;
+
+	if (new_long_pool != port->pool_long->id) {
+		/* Remove port from old short & long pool */
+		port->pool_long = mvpp2_bm_pool_use(port, port->pool_long->id,
+						    port->pool_long->pkt_size);
+		port->pool_long->port_map &= ~BIT(port->id);
+		port->pool_long = NULL;
+
+		port->pool_short = mvpp2_bm_pool_use(port, port->pool_short->id,
+						     port->pool_short->pkt_size);
+		port->pool_short->port_map &= ~BIT(port->id);
+		port->pool_short = NULL;
+
+		port->pkt_size =  pkt_size;
+
+		/* Add port to new short & long pool */
+		mvpp2_swf_bm_pool_init(port);
+
+		/* Update L4 checksum when jumbo enable/disable on port */
+		if (new_long_pool == MVPP2_BM_JUMBO && port->id != 0) {
+			dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
+			dev->hw_features &= ~(NETIF_F_IP_CSUM |
+					      NETIF_F_IPV6_CSUM);
+		} else {
+			dev->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
+			dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
+		}
 	}
 
-	mvpp2_bm_pool_bufsize_set(port->priv, port_pool,
-				  MVPP2_RX_BUF_SIZE(port_pool->pkt_size));
 	dev->mtu = mtu;
+	dev->wanted_features = dev->features;
+
 	netdev_update_features(dev);
 	return 0;
 }
@@ -7007,15 +7378,14 @@ static int mvpp2_open(struct net_device *dev)
 			0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 	int err;
 
-	err = mvpp2_prs_mac_da_accept(port->priv, port->id, mac_bcast, true);
+	err = mvpp2_prs_mac_da_accept(port, mac_bcast, true);
 	if (err) {
 		netdev_err(dev, "mvpp2_prs_mac_da_accept BC failed\n");
 		return err;
 	}
-	err = mvpp2_prs_mac_da_accept(port->priv, port->id,
-				      dev->dev_addr, true);
+	err = mvpp2_prs_mac_da_accept(port, dev->dev_addr, true);
 	if (err) {
-		netdev_err(dev, "mvpp2_prs_mac_da_accept MC failed\n");
+		netdev_err(dev, "mvpp2_prs_mac_da_accept own addr failed\n");
 		return err;
 	}
 	err = mvpp2_prs_tag_mode_set(port->priv, port->id, MVPP2_TAG_TYPE_MH);
@@ -7129,30 +7499,64 @@ static int mvpp2_stop(struct net_device *dev)
 	return 0;
 }
 
+static int mvpp2_prs_mac_da_accept_list(struct mvpp2_port *port,
+					struct netdev_hw_addr_list *list)
+{
+	struct netdev_hw_addr *ha;
+	int ret;
+
+	netdev_hw_addr_list_for_each(ha, list) {
+		ret = mvpp2_prs_mac_da_accept(port, ha->addr, true);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static void mvpp2_set_rx_promisc(struct mvpp2_port *port, bool enable)
+{
+	if (!enable && (port->dev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
+		mvpp2_prs_vid_enable_filtering(port);
+	else
+		mvpp2_prs_vid_disable_filtering(port);
+
+	mvpp2_prs_mac_promisc_set(port->priv, port->id,
+				  MVPP2_PRS_L2_UNI_CAST, enable);
+
+	mvpp2_prs_mac_promisc_set(port->priv, port->id,
+				  MVPP2_PRS_L2_MULTI_CAST, enable);
+}
+
 static void mvpp2_set_rx_mode(struct net_device *dev)
 {
 	struct mvpp2_port *port = netdev_priv(dev);
-	struct mvpp2 *priv = port->priv;
-	struct netdev_hw_addr *ha;
-	int id = port->id;
-	bool allmulti = dev->flags & IFF_ALLMULTI;
-
-retry:
-	mvpp2_prs_mac_promisc_set(priv, id, dev->flags & IFF_PROMISC);
-	mvpp2_prs_mac_multi_set(priv, id, MVPP2_PE_MAC_MC_ALL, allmulti);
-	mvpp2_prs_mac_multi_set(priv, id, MVPP2_PE_MAC_MC_IP6, allmulti);
-
-	/* Remove all port->id's mcast enries */
-	mvpp2_prs_mcast_del_all(priv, id);
-
-	if (!allmulti) {
-		netdev_for_each_mc_addr(ha, dev) {
-			if (mvpp2_prs_mac_da_accept(priv, id, ha->addr, true)) {
-				allmulti = true;
-				goto retry;
-			}
-		}
+
+	/* Clear the whole UC and MC list */
+	mvpp2_prs_mac_del_all(port);
+
+	if (dev->flags & IFF_PROMISC) {
+		mvpp2_set_rx_promisc(port, true);
+		return;
+	}
+
+	mvpp2_set_rx_promisc(port, false);
+
+	if (netdev_uc_count(dev) > MVPP2_PRS_MAC_UC_FILT_MAX ||
+	    mvpp2_prs_mac_da_accept_list(port, &dev->uc))
+		mvpp2_prs_mac_promisc_set(port->priv, port->id,
+					  MVPP2_PRS_L2_UNI_CAST, true);
+
+	if (dev->flags & IFF_ALLMULTI) {
+		mvpp2_prs_mac_promisc_set(port->priv, port->id,
+					  MVPP2_PRS_L2_MULTI_CAST, true);
+		return;
 	}
+
+	if (netdev_mc_count(dev) > MVPP2_PRS_MAC_MC_FILT_MAX ||
+	    mvpp2_prs_mac_da_accept_list(port, &dev->mc))
+		mvpp2_prs_mac_promisc_set(port->priv, port->id,
+					  MVPP2_PRS_L2_MULTI_CAST, true);
 }
 
 static int mvpp2_set_mac_address(struct net_device *dev, void *p)
@@ -7292,6 +7696,48 @@ static int mvpp2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	return ret;
 }
 
+static int mvpp2_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+	int ret;
+
+	ret = mvpp2_prs_vid_entry_add(port, vid);
+	if (ret)
+		netdev_err(dev, "rx-vlan-filter offloading cannot accept more than %d VIDs per port\n",
+			   MVPP2_PRS_VLAN_FILT_MAX - 1);
+	return ret;
+}
+
+static int mvpp2_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+
+	mvpp2_prs_vid_entry_remove(port, vid);
+	return 0;
+}
+
+static int mvpp2_set_features(struct net_device *dev,
+			      netdev_features_t features)
+{
+	netdev_features_t changed = dev->features ^ features;
+	struct mvpp2_port *port = netdev_priv(dev);
+
+	if (changed & NETIF_F_HW_VLAN_CTAG_FILTER) {
+		if (features & NETIF_F_HW_VLAN_CTAG_FILTER) {
+			mvpp2_prs_vid_enable_filtering(port);
+		} else {
+			/* Invalidate all registered VID filters for this
+			 * port
+			 */
+			mvpp2_prs_vid_remove_all(port);
+
+			mvpp2_prs_vid_disable_filtering(port);
+		}
+	}
+
+	return 0;
+}
+
 /* Ethtool methods */
 
 /* Set interrupt coalescing for ethtools */
@@ -7433,6 +7879,9 @@ static const struct net_device_ops mvpp2_netdev_ops = {
 	.ndo_change_mtu		= mvpp2_change_mtu,
 	.ndo_get_stats64	= mvpp2_get_stats64,
 	.ndo_do_ioctl		= mvpp2_ioctl,
+	.ndo_vlan_rx_add_vid	= mvpp2_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid	= mvpp2_vlan_rx_kill_vid,
+	.ndo_set_features	= mvpp2_set_features,
 };
 
 static const struct ethtool_ops mvpp2_eth_tool_ops = {
@@ -7943,16 +8392,25 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 		}
 	}
 
-	features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
+	features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
+		   NETIF_F_TSO;
 	dev->features = features | NETIF_F_RXCSUM;
-	dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO;
+	dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO |
+			    NETIF_F_HW_VLAN_CTAG_FILTER;
+
+	if (port->pool_long->id == MVPP2_BM_JUMBO && port->id != 0) {
+		dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
+		dev->hw_features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
+	}
+
 	dev->vlan_features |= features;
 	dev->gso_max_segs = MVPP2_MAX_TSO_SEGS;
+	dev->priv_flags |= IFF_UNICAST_FLT;
 
-	/* MTU range: 68 - 9676 */
+	/* MTU range: 68 - 9704 */
 	dev->min_mtu = ETH_MIN_MTU;
-	/* 9676 == 9700 - 20 and rounding to 8 */
-	dev->max_mtu = 9676;
+	/* 9704 == 9728 - 20 and rounding to 8 */
+	dev->max_mtu = MVPP2_BM_JUMBO_PKT_SIZE;
 
 	err = register_netdev(dev);
 	if (err < 0) {
@@ -8083,14 +8541,25 @@ static void mvpp22_rx_fifo_init(struct mvpp2 *priv)
 	mvpp2_write(priv, MVPP2_RX_FIFO_INIT_REG, 0x1);
 }
 
-/* Initialize Tx FIFO's */
+/* Initialize Tx FIFO's: the total FIFO size is 19kB on PPv2.2 and 10G
+ * interfaces must have a Tx FIFO size of 10kB. As only port 0 can do 10G,
+ * configure its Tx FIFO size to 10kB and the others ports Tx FIFO size to 3kB.
+ */
 static void mvpp22_tx_fifo_init(struct mvpp2 *priv)
 {
-	int port;
+	int port, size, thrs;
 
-	for (port = 0; port < MVPP2_MAX_PORTS; port++)
-		mvpp2_write(priv, MVPP22_TX_FIFO_SIZE_REG(port),
-			    MVPP22_TX_FIFO_DATA_SIZE_3KB);
+	for (port = 0; port < MVPP2_MAX_PORTS; port++) {
+		if (port == 0) {
+			size = MVPP22_TX_FIFO_DATA_SIZE_10KB;
+			thrs = MVPP2_TX_FIFO_THRESHOLD_10KB;
+		} else {
+			size = MVPP22_TX_FIFO_DATA_SIZE_3KB;
+			thrs = MVPP2_TX_FIFO_THRESHOLD_3KB;
+		}
+		mvpp2_write(priv, MVPP22_TX_FIFO_SIZE_REG(port), size);
+		mvpp2_write(priv, MVPP22_TX_FIFO_THRESH_REG(port), thrs);
+	}
 }
 
 static void mvpp2_axi_init(struct mvpp2 *priv)
@@ -8284,6 +8753,8 @@ static int mvpp2_probe(struct platform_device *pdev)
 			priv->sysctrl_base = NULL;
 	}
 
+	mvpp2_setup_bm_pool();
+
 	for (i = 0; i < MVPP2_MAX_THREADS; i++) {
 		u32 addr_space_sz;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index ebc1f566a4d9..9a7a2f05ab35 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -199,6 +199,10 @@ static const char main_strings[][ETH_GSTRING_LEN] = {
 	"rx_xdp_drop",
 	"rx_xdp_tx",
 	"rx_xdp_tx_full",
+
+	/* phy statistics */
+	"rx_packets_phy", "rx_bytes_phy",
+	"tx_packets_phy", "tx_bytes_phy",
 };
 
 static const char mlx4_en_test_names[][ETH_GSTRING_LEN]= {
@@ -411,6 +415,10 @@ static void mlx4_en_get_ethtool_stats(struct net_device *dev,
 		if (bitmap_iterator_test(&it))
 			data[index++] = ((unsigned long *)&priv->xdp_stats)[i];
 
+	for (i = 0; i < NUM_PHY_STATS; i++, bitmap_iterator_inc(&it))
+		if (bitmap_iterator_test(&it))
+			data[index++] = ((unsigned long *)&priv->phy_stats)[i];
+
 	for (i = 0; i < priv->tx_ring_num[TX]; i++) {
 		data[index++] = priv->tx_ring[TX][i]->packets;
 		data[index++] = priv->tx_ring[TX][i]->bytes;
@@ -490,6 +498,12 @@ static void mlx4_en_get_strings(struct net_device *dev,
 				strcpy(data + (index++) * ETH_GSTRING_LEN,
 				       main_strings[strings]);
 
+		for (i = 0; i < NUM_PHY_STATS; i++, strings++,
+		     bitmap_iterator_inc(&it))
+			if (bitmap_iterator_test(&it))
+				strcpy(data + (index++) * ETH_GSTRING_LEN,
+				       main_strings[strings]);
+
 		for (i = 0; i < priv->tx_ring_num[TX]; i++) {
 			sprintf(data + (index++) * ETH_GSTRING_LEN,
 				"tx%d_packets", i);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 8fc51bc29003..e0adac4a9a19 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -3256,6 +3256,10 @@ void mlx4_en_set_stats_bitmap(struct mlx4_dev *dev,
 
 	bitmap_set(stats_bitmap->bitmap, last_i, NUM_XDP_STATS);
 	last_i += NUM_XDP_STATS;
+
+	if (!mlx4_is_slave(dev))
+		bitmap_set(stats_bitmap->bitmap, last_i, NUM_PHY_STATS);
+	last_i += NUM_PHY_STATS;
 }
 
 int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
@@ -3630,10 +3634,6 @@ int mlx4_en_reset_config(struct net_device *dev,
 		mlx4_en_stop_port(dev, 1);
 	}
 
-	en_warn(priv, "Changing device configuration rx filter(%x) rx vlan(%x)\n",
-		ts_config.rx_filter,
-		!!(features & NETIF_F_HW_VLAN_CTAG_RX));
-
 	mlx4_en_safe_replace_resources(priv, tmp);
 
 	if (DEV_FEATURE_CHANGED(dev, features, NETIF_F_HW_VLAN_CTAG_RX)) {
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.c b/drivers/net/ethernet/mellanox/mlx4/en_port.c
index 1fa4849a6f56..0158b88bea5b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_port.c
@@ -275,19 +275,31 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
 		priv->port_stats.xmit_more         += READ_ONCE(ring->xmit_more);
 	}
 
-	if (mlx4_is_master(mdev->dev)) {
-		stats->rx_packets = en_stats_adder(&mlx4_en_stats->RTOT_prio_0,
-						   &mlx4_en_stats->RTOT_prio_1,
-						   NUM_PRIORITIES);
-		stats->tx_packets = en_stats_adder(&mlx4_en_stats->TTOT_prio_0,
-						   &mlx4_en_stats->TTOT_prio_1,
-						   NUM_PRIORITIES);
-		stats->rx_bytes = en_stats_adder(&mlx4_en_stats->ROCT_prio_0,
-						 &mlx4_en_stats->ROCT_prio_1,
-						 NUM_PRIORITIES);
-		stats->tx_bytes = en_stats_adder(&mlx4_en_stats->TOCT_prio_0,
-						 &mlx4_en_stats->TOCT_prio_1,
-						 NUM_PRIORITIES);
+	if (!mlx4_is_slave(mdev->dev)) {
+		struct mlx4_en_phy_stats *p_stats = &priv->phy_stats;
+
+		p_stats->rx_packets_phy =
+			en_stats_adder(&mlx4_en_stats->RTOT_prio_0,
+				       &mlx4_en_stats->RTOT_prio_1,
+				       NUM_PRIORITIES);
+		p_stats->tx_packets_phy =
+			en_stats_adder(&mlx4_en_stats->TTOT_prio_0,
+				       &mlx4_en_stats->TTOT_prio_1,
+				       NUM_PRIORITIES);
+		p_stats->rx_bytes_phy =
+			en_stats_adder(&mlx4_en_stats->ROCT_prio_0,
+				       &mlx4_en_stats->ROCT_prio_1,
+				       NUM_PRIORITIES);
+		p_stats->tx_bytes_phy =
+			en_stats_adder(&mlx4_en_stats->TOCT_prio_0,
+				       &mlx4_en_stats->TOCT_prio_1,
+				       NUM_PRIORITIES);
+		if (mlx4_is_master(mdev->dev)) {
+			stats->rx_packets = p_stats->rx_packets_phy;
+			stats->tx_packets = p_stats->tx_packets_phy;
+			stats->rx_bytes = p_stats->rx_bytes_phy;
+			stats->tx_bytes = p_stats->tx_bytes_phy;
+		}
 	}
 
 	/* net device stats */
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index b4d144e67514..05787efef492 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -291,13 +291,10 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
 
 	tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS *
 					sizeof(struct mlx4_en_rx_alloc));
-	ring->rx_info = vzalloc_node(tmp, node);
+	ring->rx_info = kvzalloc_node(tmp, GFP_KERNEL, node);
 	if (!ring->rx_info) {
-		ring->rx_info = vzalloc(tmp);
-		if (!ring->rx_info) {
-			err = -ENOMEM;
-			goto err_xdp_info;
-		}
+		err = -ENOMEM;
+		goto err_xdp_info;
 	}
 
 	en_dbg(DRV, priv, "Allocated rx_info ring at addr:%p size:%d\n",
@@ -318,7 +315,7 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
 	return 0;
 
 err_info:
-	vfree(ring->rx_info);
+	kvfree(ring->rx_info);
 	ring->rx_info = NULL;
 err_xdp_info:
 	xdp_rxq_info_unreg(&ring->xdp_rxq);
@@ -447,7 +444,7 @@ void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
 		bpf_prog_put(old_prog);
 	xdp_rxq_info_unreg(&ring->xdp_rxq);
 	mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE);
-	vfree(ring->rx_info);
+	kvfree(ring->rx_info);
 	ring->rx_info = NULL;
 	kfree(ring);
 	*pring = NULL;
@@ -649,6 +646,12 @@ static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va,
 	return get_fixed_ipv4_csum(hw_checksum, skb, hdr);
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
+#define MLX4_CQE_STATUS_IP_ANY (MLX4_CQE_STATUS_IPV4 | MLX4_CQE_STATUS_IPV6)
+#else
+#define MLX4_CQE_STATUS_IP_ANY (MLX4_CQE_STATUS_IPV4)
+#endif
+
 int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
@@ -662,12 +665,9 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 	int polled = 0;
 	int index;
 
-	if (unlikely(!priv->port_up))
+	if (unlikely(!priv->port_up || budget <= 0))
 		return 0;
 
-	if (unlikely(budget <= 0))
-		return polled;
-
 	ring = priv->rx_ring[cq_ring];
 
 	/* Protect accesses to: ring->xdp_prog, priv->mac_hash list */
@@ -838,12 +838,7 @@ xdp_drop_no_cnt:
 				ring->csum_ok++;
 			} else {
 				if (!(priv->flags & MLX4_EN_FLAG_RX_CSUM_NON_TCP_UDP &&
-				      (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV4 |
-#if IS_ENABLED(CONFIG_IPV6)
-								 MLX4_CQE_STATUS_IPV6))))
-#else
-								 0))))
-#endif
+				      (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IP_ANY))))
 					goto csum_none;
 				if (check_csum(cqe, skb, va, dev->features))
 					goto csum_none;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index f470ae37d937..f7c81133594f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -608,6 +608,7 @@ struct mlx4_en_priv {
 	struct mlx4_en_flow_stats_tx tx_flowstats;
 	struct mlx4_en_port_stats port_stats;
 	struct mlx4_en_xdp_stats xdp_stats;
+	struct mlx4_en_phy_stats phy_stats;
 	struct mlx4_en_stats_bitmap stats_bitmap;
 	struct list_head mc_list;
 	struct list_head curr_list;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h
index aab28eb27a30..86b6051da8ec 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h
@@ -63,6 +63,14 @@ struct mlx4_en_xdp_stats {
 #define NUM_XDP_STATS		3
 };
 
+struct mlx4_en_phy_stats {
+	unsigned long rx_packets_phy;
+	unsigned long rx_bytes_phy;
+	unsigned long tx_packets_phy;
+	unsigned long tx_bytes_phy;
+#define NUM_PHY_STATS		4
+};
+
 #define NUM_MAIN_STATS	21
 
 #define MLX4_NUM_PRIORITIES	8
@@ -116,7 +124,7 @@ enum {
 
 #define NUM_ALL_STATS	(NUM_MAIN_STATS + NUM_PORT_STATS + NUM_PKT_STATS + \
 			 NUM_FLOW_STATS + NUM_PERF_STATS + NUM_PF_STATS + \
-			 NUM_XDP_STATS)
+			 NUM_XDP_STATS + NUM_PHY_STATS)
 
 #define MLX4_FIND_NETDEV_STAT(n) (offsetof(struct net_device_stats, n) / \
 				  sizeof(((struct net_device_stats *)0)->n))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
index 53e69edaedde..9f1b1939716a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
@@ -37,24 +37,11 @@
 #include "mlx5_core.h"
 #include "fpga/ipsec.h"
 
-void *mlx5_accel_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
-				   struct mlx5_accel_ipsec_sa *cmd)
-{
-	if (!MLX5_IPSEC_DEV(mdev))
-		return ERR_PTR(-EOPNOTSUPP);
-
-	return mlx5_fpga_ipsec_sa_cmd_exec(mdev, cmd);
-}
-
-int mlx5_accel_ipsec_sa_cmd_wait(void *ctx)
-{
-	return mlx5_fpga_ipsec_sa_cmd_wait(ctx);
-}
-
 u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev)
 {
 	return mlx5_fpga_ipsec_device_caps(mdev);
 }
+EXPORT_SYMBOL_GPL(mlx5_accel_ipsec_device_caps);
 
 unsigned int mlx5_accel_ipsec_counters_count(struct mlx5_core_dev *mdev)
 {
@@ -67,6 +54,21 @@ int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
 	return mlx5_fpga_ipsec_counters_read(mdev, counters, count);
 }
 
+void *mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev,
+				       struct mlx5_accel_esp_xfrm *xfrm,
+				       const __be32 saddr[4],
+				       const __be32 daddr[4],
+				       const __be32 spi, bool is_ipv6)
+{
+	return mlx5_fpga_ipsec_create_sa_ctx(mdev, xfrm, saddr, daddr,
+					     spi, is_ipv6);
+}
+
+void mlx5_accel_esp_free_hw_context(void *context)
+{
+	mlx5_fpga_ipsec_delete_sa_ctx(context);
+}
+
 int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev)
 {
 	return mlx5_fpga_ipsec_init(mdev);
@@ -76,3 +78,32 @@ void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev)
 {
 	mlx5_fpga_ipsec_cleanup(mdev);
 }
+
+struct mlx5_accel_esp_xfrm *
+mlx5_accel_esp_create_xfrm(struct mlx5_core_dev *mdev,
+			   const struct mlx5_accel_esp_xfrm_attrs *attrs,
+			   u32 flags)
+{
+	struct mlx5_accel_esp_xfrm *xfrm;
+
+	xfrm = mlx5_fpga_esp_create_xfrm(mdev, attrs, flags);
+	if (IS_ERR(xfrm))
+		return xfrm;
+
+	xfrm->mdev = mdev;
+	return xfrm;
+}
+EXPORT_SYMBOL_GPL(mlx5_accel_esp_create_xfrm);
+
+void mlx5_accel_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm)
+{
+	mlx5_fpga_esp_destroy_xfrm(xfrm);
+}
+EXPORT_SYMBOL_GPL(mlx5_accel_esp_destroy_xfrm);
+
+int mlx5_accel_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
+			       const struct mlx5_accel_esp_xfrm_attrs *attrs)
+{
+	return mlx5_fpga_esp_modify_xfrm(xfrm, attrs);
+}
+EXPORT_SYMBOL_GPL(mlx5_accel_esp_modify_xfrm);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
index d6e20fea9554..024dbd22a89b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
@@ -34,89 +34,25 @@
 #ifndef __MLX5_ACCEL_IPSEC_H__
 #define __MLX5_ACCEL_IPSEC_H__
 
-#ifdef CONFIG_MLX5_ACCEL
-
 #include <linux/mlx5/driver.h>
+#include <linux/mlx5/accel.h>
 
-enum {
-	MLX5_ACCEL_IPSEC_DEVICE = BIT(1),
-	MLX5_ACCEL_IPSEC_IPV6 = BIT(2),
-	MLX5_ACCEL_IPSEC_ESP = BIT(3),
-	MLX5_ACCEL_IPSEC_LSO = BIT(4),
-};
-
-#define MLX5_IPSEC_SADB_IP_AH       BIT(7)
-#define MLX5_IPSEC_SADB_IP_ESP      BIT(6)
-#define MLX5_IPSEC_SADB_SA_VALID    BIT(5)
-#define MLX5_IPSEC_SADB_SPI_EN      BIT(4)
-#define MLX5_IPSEC_SADB_DIR_SX      BIT(3)
-#define MLX5_IPSEC_SADB_IPV6        BIT(2)
-
-enum {
-	MLX5_IPSEC_CMD_ADD_SA = 0,
-	MLX5_IPSEC_CMD_DEL_SA = 1,
-};
-
-enum mlx5_accel_ipsec_enc_mode {
-	MLX5_IPSEC_SADB_MODE_NONE = 0,
-	MLX5_IPSEC_SADB_MODE_AES_GCM_128_AUTH_128 = 1,
-	MLX5_IPSEC_SADB_MODE_AES_GCM_256_AUTH_128 = 3,
-};
+#ifdef CONFIG_MLX5_ACCEL
 
 #define MLX5_IPSEC_DEV(mdev) (mlx5_accel_ipsec_device_caps(mdev) & \
-			      MLX5_ACCEL_IPSEC_DEVICE)
-
-struct mlx5_accel_ipsec_sa {
-	__be32 cmd;
-	u8 key_enc[32];
-	u8 key_auth[32];
-	__be32 sip[4];
-	__be32 dip[4];
-	union {
-		struct {
-			__be32 reserved;
-			u8 salt_iv[8];
-			__be32 salt;
-		} __packed gcm;
-		struct {
-			u8 salt[16];
-		} __packed cbc;
-	};
-	__be32 spi;
-	__be32 sw_sa_handle;
-	__be16 tfclen;
-	u8 enc_mode;
-	u8 sip_masklen;
-	u8 dip_masklen;
-	u8 flags;
-	u8 reserved[2];
-} __packed;
-
-/**
- * mlx5_accel_ipsec_sa_cmd_exec - Execute an IPSec SADB command
- * @mdev: mlx5 device
- * @cmd: command to execute
- * May be called from atomic context. Returns context pointer, or error
- * Caller must eventually call mlx5_accel_ipsec_sa_cmd_wait from non-atomic
- * context, to cleanup the context pointer
- */
-void *mlx5_accel_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
-				   struct mlx5_accel_ipsec_sa *cmd);
-
-/**
- * mlx5_accel_ipsec_sa_cmd_wait - Wait for command execution completion
- * @context: Context pointer returned from call to mlx5_accel_ipsec_sa_cmd_exec
- * Sleeps (killable) until command execution is complete.
- * Returns the command result, or -EINTR if killed
- */
-int mlx5_accel_ipsec_sa_cmd_wait(void *context);
-
-u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev);
+			      MLX5_ACCEL_IPSEC_CAP_DEVICE)
 
 unsigned int mlx5_accel_ipsec_counters_count(struct mlx5_core_dev *mdev);
 int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
 				   unsigned int count);
 
+void *mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev,
+				       struct mlx5_accel_esp_xfrm *xfrm,
+				       const __be32 saddr[4],
+				       const __be32 daddr[4],
+				       const __be32 spi, bool is_ipv6);
+void mlx5_accel_esp_free_hw_context(void *context);
+
 int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev);
 void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev);
 
@@ -124,6 +60,20 @@ void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev);
 
 #define MLX5_IPSEC_DEV(mdev) false
 
+static inline void *
+mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev,
+				 struct mlx5_accel_esp_xfrm *xfrm,
+				 const __be32 saddr[4],
+				 const __be32 daddr[4],
+				 const __be32 spi, bool is_ipv6)
+{
+	return NULL;
+}
+
+static inline void mlx5_accel_esp_free_hw_context(void *context)
+{
+}
+
 static inline int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev)
 {
 	return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
index 47239bf7bf43..323ffe8bf7e4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
@@ -71,19 +71,24 @@ static void *mlx5_dma_zalloc_coherent_node(struct mlx5_core_dev *dev,
 }
 
 int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size,
-			struct mlx5_buf *buf, int node)
+			struct mlx5_frag_buf *buf, int node)
 {
 	dma_addr_t t;
 
 	buf->size = size;
 	buf->npages       = 1;
 	buf->page_shift   = (u8)get_order(size) + PAGE_SHIFT;
-	buf->direct.buf   = mlx5_dma_zalloc_coherent_node(dev, size,
-							  &t, node);
-	if (!buf->direct.buf)
+
+	buf->frags = kzalloc(sizeof(*buf->frags), GFP_KERNEL);
+	if (!buf->frags)
 		return -ENOMEM;
 
-	buf->direct.map = t;
+	buf->frags->buf   = mlx5_dma_zalloc_coherent_node(dev, size,
+							  &t, node);
+	if (!buf->frags->buf)
+		goto err_out;
+
+	buf->frags->map = t;
 
 	while (t & ((1 << buf->page_shift) - 1)) {
 		--buf->page_shift;
@@ -91,18 +96,24 @@ int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size,
 	}
 
 	return 0;
+err_out:
+	kfree(buf->frags);
+	return -ENOMEM;
 }
 
-int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf)
+int mlx5_buf_alloc(struct mlx5_core_dev *dev,
+		   int size, struct mlx5_frag_buf *buf)
 {
 	return mlx5_buf_alloc_node(dev, size, buf, dev->priv.numa_node);
 }
-EXPORT_SYMBOL_GPL(mlx5_buf_alloc);
+EXPORT_SYMBOL(mlx5_buf_alloc);
 
-void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf)
+void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf)
 {
-	dma_free_coherent(&dev->pdev->dev, buf->size, buf->direct.buf,
-			  buf->direct.map);
+	dma_free_coherent(&dev->pdev->dev, buf->size, buf->frags->buf,
+			  buf->frags->map);
+
+	kfree(buf->frags);
 }
 EXPORT_SYMBOL_GPL(mlx5_buf_free);
 
@@ -147,6 +158,7 @@ err_free_buf:
 err_out:
 	return -ENOMEM;
 }
+EXPORT_SYMBOL_GPL(mlx5_frag_buf_alloc_node);
 
 void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf)
 {
@@ -162,6 +174,7 @@ void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf)
 	}
 	kfree(buf->frags);
 }
+EXPORT_SYMBOL_GPL(mlx5_frag_buf_free);
 
 static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct mlx5_core_dev *dev,
 						 int node)
@@ -275,13 +288,13 @@ void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db)
 }
 EXPORT_SYMBOL_GPL(mlx5_db_free);
 
-void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas)
+void mlx5_fill_page_array(struct mlx5_frag_buf *buf, __be64 *pas)
 {
 	u64 addr;
 	int i;
 
 	for (i = 0; i < buf->npages; i++) {
-		addr = buf->direct.map + (i << buf->page_shift);
+		addr = buf->frags->map + (i << buf->page_shift);
 
 		pas[i] = cpu_to_be64(addr);
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
index 1016e05c7ec7..a4179122a279 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
@@ -58,8 +58,7 @@ void mlx5_cq_tasklet_cb(unsigned long data)
 				 tasklet_ctx.list) {
 		list_del_init(&mcq->tasklet_ctx.list);
 		mcq->tasklet_ctx.comp(mcq);
-		if (refcount_dec_and_test(&mcq->refcount))
-			complete(&mcq->free);
+		mlx5_cq_put(mcq);
 		if (time_after(jiffies, end))
 			break;
 	}
@@ -80,69 +79,19 @@ static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq)
 	 * still arrive.
 	 */
 	if (list_empty_careful(&cq->tasklet_ctx.list)) {
-		refcount_inc(&cq->refcount);
+		mlx5_cq_hold(cq);
 		list_add_tail(&cq->tasklet_ctx.list, &tasklet_ctx->list);
 	}
 	spin_unlock_irqrestore(&tasklet_ctx->lock, flags);
 }
 
-void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn)
-{
-	struct mlx5_core_cq *cq;
-	struct mlx5_cq_table *table = &dev->priv.cq_table;
-
-	spin_lock(&table->lock);
-	cq = radix_tree_lookup(&table->tree, cqn);
-	if (likely(cq))
-		refcount_inc(&cq->refcount);
-	spin_unlock(&table->lock);
-
-	if (!cq) {
-		mlx5_core_warn(dev, "Completion event for bogus CQ 0x%x\n", cqn);
-		return;
-	}
-
-	++cq->arm_sn;
-
-	cq->comp(cq);
-
-	if (refcount_dec_and_test(&cq->refcount))
-		complete(&cq->free);
-}
-
-void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type)
-{
-	struct mlx5_cq_table *table = &dev->priv.cq_table;
-	struct mlx5_core_cq *cq;
-
-	spin_lock(&table->lock);
-
-	cq = radix_tree_lookup(&table->tree, cqn);
-	if (cq)
-		refcount_inc(&cq->refcount);
-
-	spin_unlock(&table->lock);
-
-	if (!cq) {
-		mlx5_core_warn(dev, "Async event for bogus CQ 0x%x\n", cqn);
-		return;
-	}
-
-	cq->event(cq, event_type);
-
-	if (refcount_dec_and_test(&cq->refcount))
-		complete(&cq->free);
-}
-
 int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
 			u32 *in, int inlen)
 {
-	struct mlx5_cq_table *table = &dev->priv.cq_table;
+	int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), c_eqn);
+	u32 dout[MLX5_ST_SZ_DW(destroy_cq_out)];
 	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
 	u32 din[MLX5_ST_SZ_DW(destroy_cq_in)];
-	u32 dout[MLX5_ST_SZ_DW(destroy_cq_out)];
-	int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context),
-			   c_eqn);
 	struct mlx5_eq *eq;
 	int err;
 
@@ -159,6 +108,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
 	cq->cqn = MLX5_GET(create_cq_out, out, cqn);
 	cq->cons_index = 0;
 	cq->arm_sn     = 0;
+	cq->eq         = eq;
 	refcount_set(&cq->refcount, 1);
 	init_completion(&cq->free);
 	if (!cq->comp)
@@ -167,12 +117,16 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
 	cq->tasklet_ctx.priv = &eq->tasklet_ctx;
 	INIT_LIST_HEAD(&cq->tasklet_ctx.list);
 
-	spin_lock_irq(&table->lock);
-	err = radix_tree_insert(&table->tree, cq->cqn, cq);
-	spin_unlock_irq(&table->lock);
+	/* Add to comp EQ CQ tree to recv comp events */
+	err = mlx5_eq_add_cq(eq, cq);
 	if (err)
 		goto err_cmd;
 
+	/* Add to async EQ CQ tree to recv async events */
+	err = mlx5_eq_add_cq(&dev->priv.eq_table.async_eq, cq);
+	if (err)
+		goto err_cq_add;
+
 	cq->pid = current->pid;
 	err = mlx5_debug_cq_add(dev, cq);
 	if (err)
@@ -183,6 +137,8 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
 
 	return 0;
 
+err_cq_add:
+	mlx5_eq_del_cq(eq, cq);
 err_cmd:
 	memset(din, 0, sizeof(din));
 	memset(dout, 0, sizeof(dout));
@@ -195,23 +151,17 @@ EXPORT_SYMBOL(mlx5_core_create_cq);
 
 int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
 {
-	struct mlx5_cq_table *table = &dev->priv.cq_table;
 	u32 out[MLX5_ST_SZ_DW(destroy_cq_out)] = {0};
 	u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {0};
-	struct mlx5_core_cq *tmp;
 	int err;
 
-	spin_lock_irq(&table->lock);
-	tmp = radix_tree_delete(&table->tree, cq->cqn);
-	spin_unlock_irq(&table->lock);
-	if (!tmp) {
-		mlx5_core_warn(dev, "cq 0x%x not found in tree\n", cq->cqn);
-		return -EINVAL;
-	}
-	if (tmp != cq) {
-		mlx5_core_warn(dev, "corruption on srqn 0x%x\n", cq->cqn);
-		return -EINVAL;
-	}
+	err = mlx5_eq_del_cq(&dev->priv.eq_table.async_eq, cq);
+	if (err)
+		return err;
+
+	err = mlx5_eq_del_cq(cq->eq, cq);
+	if (err)
+		return err;
 
 	MLX5_SET(destroy_cq_in, in, opcode, MLX5_CMD_OP_DESTROY_CQ);
 	MLX5_SET(destroy_cq_in, in, cqn, cq->cqn);
@@ -222,8 +172,7 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
 	synchronize_irq(cq->irqn);
 
 	mlx5_debug_cq_remove(dev, cq);
-	if (refcount_dec_and_test(&cq->refcount))
-		complete(&cq->free);
+	mlx5_cq_put(cq);
 	wait_for_completion(&cq->free);
 
 	return 0;
@@ -270,21 +219,3 @@ int mlx5_core_modify_cq_moderation(struct mlx5_core_dev *dev,
 	return mlx5_core_modify_cq(dev, cq, in, sizeof(in));
 }
 EXPORT_SYMBOL(mlx5_core_modify_cq_moderation);
-
-int mlx5_init_cq_table(struct mlx5_core_dev *dev)
-{
-	struct mlx5_cq_table *table = &dev->priv.cq_table;
-	int err;
-
-	memset(table, 0, sizeof(*table));
-	spin_lock_init(&table->lock);
-	INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
-	err = mlx5_cq_debugfs_init(dev);
-
-	return err;
-}
-
-void mlx5_cleanup_cq_table(struct mlx5_core_dev *dev)
-{
-	mlx5_cq_debugfs_cleanup(dev);
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
index 17b723218b0c..b994b80d5714 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -337,6 +337,14 @@ void mlx5_unregister_interface(struct mlx5_interface *intf)
 }
 EXPORT_SYMBOL(mlx5_unregister_interface);
 
+void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol)
+{
+	mutex_lock(&mlx5_intf_mutex);
+	mlx5_remove_dev_by_protocol(mdev, protocol);
+	mlx5_add_dev_by_protocol(mdev, protocol);
+	mutex_unlock(&mlx5_intf_mutex);
+}
+
 void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol)
 {
 	struct mlx5_priv *priv = &mdev->priv;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
index fd509160c8f6..d93ff567b40d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
@@ -246,6 +246,9 @@ const char *parse_fs_dst(struct trace_seq *p,
 	case MLX5_FLOW_DESTINATION_TYPE_COUNTER:
 		trace_seq_printf(p, "counter_id=%u\n", counter_id);
 		break;
+	case MLX5_FLOW_DESTINATION_TYPE_PORT:
+		trace_seq_printf(p, "port\n");
+		break;
 	}
 
 	trace_seq_putc(p, 0);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
index 80eef4163f52..a6ba57fbb414 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
@@ -163,9 +163,9 @@ TRACE_EVENT(mlx5_fs_set_fte,
 			   fs_get_obj(__entry->fg, fte->node.parent);
 			   __entry->group_index = __entry->fg->id;
 			   __entry->index = fte->index;
-			   __entry->action = fte->action;
+			   __entry->action = fte->action.action;
 			   __entry->mask_enable = __entry->fg->mask.match_criteria_enable;
-			   __entry->flow_tag = fte->flow_tag;
+			   __entry->flow_tag = fte->action.flow_tag;
 			   memcpy(__entry->mask_outer,
 				  MLX5_ADDR_OF(fte_match_param,
 					       &__entry->fg->mask.match_criteria,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index bac5103efad3..cf58c9637904 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -38,17 +38,24 @@
 #include <linux/module.h>
 
 #include "en.h"
-#include "accel/ipsec.h"
 #include "en_accel/ipsec.h"
 #include "en_accel/ipsec_rxtx.h"
 
-struct mlx5e_ipsec_sa_entry {
-	struct hlist_node hlist; /* Item in SADB_RX hashtable */
-	unsigned int handle; /* Handle in SADB_RX */
-	struct xfrm_state *x;
-	struct mlx5e_ipsec *ipsec;
-	void *context;
-};
+
+static struct mlx5e_ipsec_sa_entry *to_ipsec_sa_entry(struct xfrm_state *x)
+{
+	struct mlx5e_ipsec_sa_entry *sa;
+
+	if (!x)
+		return NULL;
+
+	sa = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
+	if (!sa)
+		return NULL;
+
+	WARN_ON(sa->x != x);
+	return sa;
+}
 
 struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *ipsec,
 					      unsigned int handle)
@@ -74,18 +81,16 @@ static int mlx5e_ipsec_sadb_rx_add(struct mlx5e_ipsec_sa_entry *sa_entry)
 	unsigned long flags;
 	int ret;
 
-	spin_lock_irqsave(&ipsec->sadb_rx_lock, flags);
 	ret = ida_simple_get(&ipsec->halloc, 1, 0, GFP_KERNEL);
 	if (ret < 0)
-		goto out;
+		return ret;
 
+	spin_lock_irqsave(&ipsec->sadb_rx_lock, flags);
 	sa_entry->handle = ret;
 	hash_add_rcu(ipsec->sadb_rx, &sa_entry->hlist, sa_entry->handle);
-	ret = 0;
-
-out:
 	spin_unlock_irqrestore(&ipsec->sadb_rx_lock, flags);
-	return ret;
+
+	return 0;
 }
 
 static void mlx5e_ipsec_sadb_rx_del(struct mlx5e_ipsec_sa_entry *sa_entry)
@@ -101,87 +106,99 @@ static void mlx5e_ipsec_sadb_rx_del(struct mlx5e_ipsec_sa_entry *sa_entry)
 static void mlx5e_ipsec_sadb_rx_free(struct mlx5e_ipsec_sa_entry *sa_entry)
 {
 	struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
-	unsigned long flags;
 
-	/* Wait for the hash_del_rcu call in sadb_rx_del to affect data path */
-	synchronize_rcu();
-	spin_lock_irqsave(&ipsec->sadb_rx_lock, flags);
+	/* xfrm already doing sync rcu between del and free callbacks */
+
 	ida_simple_remove(&ipsec->halloc, sa_entry->handle);
-	spin_unlock_irqrestore(&ipsec->sadb_rx_lock, flags);
 }
 
-static enum mlx5_accel_ipsec_enc_mode mlx5e_ipsec_enc_mode(struct xfrm_state *x)
+static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry)
 {
-	unsigned int key_len = (x->aead->alg_key_len + 7) / 8 - 4;
-
-	switch (key_len) {
-	case 16:
-		return MLX5_IPSEC_SADB_MODE_AES_GCM_128_AUTH_128;
-	case 32:
-		return MLX5_IPSEC_SADB_MODE_AES_GCM_256_AUTH_128;
-	default:
-		netdev_warn(x->xso.dev, "Bad key len: %d for alg %s\n",
-			    key_len, x->aead->alg_name);
-		return -1;
+	struct xfrm_replay_state_esn *replay_esn;
+	u32 seq_bottom;
+	u8 overlap;
+	u32 *esn;
+
+	if (!(sa_entry->x->props.flags & XFRM_STATE_ESN)) {
+		sa_entry->esn_state.trigger = 0;
+		return false;
+	}
+
+	replay_esn = sa_entry->x->replay_esn;
+	seq_bottom = replay_esn->seq - replay_esn->replay_window + 1;
+	overlap = sa_entry->esn_state.overlap;
+
+	sa_entry->esn_state.esn = xfrm_replay_seqhi(sa_entry->x,
+						    htonl(seq_bottom));
+	esn = &sa_entry->esn_state.esn;
+
+	sa_entry->esn_state.trigger = 1;
+	if (unlikely(overlap && seq_bottom < MLX5E_IPSEC_ESN_SCOPE_MID)) {
+		++(*esn);
+		sa_entry->esn_state.overlap = 0;
+		return true;
+	} else if (unlikely(!overlap &&
+			    (seq_bottom >= MLX5E_IPSEC_ESN_SCOPE_MID))) {
+		sa_entry->esn_state.overlap = 1;
+		return true;
 	}
+
+	return false;
 }
 
-static void mlx5e_ipsec_build_hw_sa(u32 op, struct mlx5e_ipsec_sa_entry *sa_entry,
-				    struct mlx5_accel_ipsec_sa *hw_sa)
+static void
+mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry,
+				   struct mlx5_accel_esp_xfrm_attrs *attrs)
 {
 	struct xfrm_state *x = sa_entry->x;
+	struct aes_gcm_keymat *aes_gcm = &attrs->keymat.aes_gcm;
 	struct aead_geniv_ctx *geniv_ctx;
-	unsigned int crypto_data_len;
 	struct crypto_aead *aead;
-	unsigned int key_len;
+	unsigned int crypto_data_len, key_len;
 	int ivsize;
 
-	memset(hw_sa, 0, sizeof(*hw_sa));
-
-	if (op == MLX5_IPSEC_CMD_ADD_SA) {
-		crypto_data_len = (x->aead->alg_key_len + 7) / 8;
-		key_len = crypto_data_len - 4; /* 4 bytes salt at end */
-		aead = x->data;
-		geniv_ctx = crypto_aead_ctx(aead);
-		ivsize = crypto_aead_ivsize(aead);
-
-		memcpy(&hw_sa->key_enc, x->aead->alg_key, key_len);
-		/* Duplicate 128 bit key twice according to HW layout */
-		if (key_len == 16)
-			memcpy(&hw_sa->key_enc[16], x->aead->alg_key, key_len);
-		memcpy(&hw_sa->gcm.salt_iv, geniv_ctx->salt, ivsize);
-		hw_sa->gcm.salt = *((__be32 *)(x->aead->alg_key + key_len));
-	}
+	memset(attrs, 0, sizeof(*attrs));
 
-	hw_sa->cmd = htonl(op);
-	hw_sa->flags |= MLX5_IPSEC_SADB_SA_VALID | MLX5_IPSEC_SADB_SPI_EN;
-	if (x->props.family == AF_INET) {
-		hw_sa->sip[3] = x->props.saddr.a4;
-		hw_sa->dip[3] = x->id.daddr.a4;
-		hw_sa->sip_masklen = 32;
-		hw_sa->dip_masklen = 32;
-	} else {
-		memcpy(hw_sa->sip, x->props.saddr.a6, sizeof(hw_sa->sip));
-		memcpy(hw_sa->dip, x->id.daddr.a6, sizeof(hw_sa->dip));
-		hw_sa->sip_masklen = 128;
-		hw_sa->dip_masklen = 128;
-		hw_sa->flags |= MLX5_IPSEC_SADB_IPV6;
-	}
-	hw_sa->spi = x->id.spi;
-	hw_sa->sw_sa_handle = htonl(sa_entry->handle);
-	switch (x->id.proto) {
-	case IPPROTO_ESP:
-		hw_sa->flags |= MLX5_IPSEC_SADB_IP_ESP;
-		break;
-	case IPPROTO_AH:
-		hw_sa->flags |= MLX5_IPSEC_SADB_IP_AH;
-		break;
-	default:
-		break;
+	/* key */
+	crypto_data_len = (x->aead->alg_key_len + 7) / 8;
+	key_len = crypto_data_len - 4; /* 4 bytes salt at end */
+
+	memcpy(aes_gcm->aes_key, x->aead->alg_key, key_len);
+	aes_gcm->key_len = key_len * 8;
+
+	/* salt and seq_iv */
+	aead = x->data;
+	geniv_ctx = crypto_aead_ctx(aead);
+	ivsize = crypto_aead_ivsize(aead);
+	memcpy(&aes_gcm->seq_iv, &geniv_ctx->salt, ivsize);
+	memcpy(&aes_gcm->salt, x->aead->alg_key + key_len,
+	       sizeof(aes_gcm->salt));
+
+	/* iv len */
+	aes_gcm->icv_len = x->aead->alg_icv_len;
+
+	/* esn */
+	if (sa_entry->esn_state.trigger) {
+		attrs->flags |= MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED;
+		attrs->esn = sa_entry->esn_state.esn;
+		if (sa_entry->esn_state.overlap)
+			attrs->flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
 	}
-	hw_sa->enc_mode = mlx5e_ipsec_enc_mode(x);
-	if (!(x->xso.flags & XFRM_OFFLOAD_INBOUND))
-		hw_sa->flags |= MLX5_IPSEC_SADB_DIR_SX;
+
+	/* rx handle */
+	attrs->sa_handle = sa_entry->handle;
+
+	/* algo type */
+	attrs->keymat_type = MLX5_ACCEL_ESP_KEYMAT_AES_GCM;
+
+	/* action */
+	attrs->action = (!(x->xso.flags & XFRM_OFFLOAD_INBOUND)) ?
+			MLX5_ACCEL_ESP_ACTION_ENCRYPT :
+			MLX5_ACCEL_ESP_ACTION_DECRYPT;
+	/* flags */
+	attrs->flags |= (x->props.mode == XFRM_MODE_TRANSPORT) ?
+			MLX5_ACCEL_ESP_FLAGS_TRANSPORT :
+			MLX5_ACCEL_ESP_FLAGS_TUNNEL;
 }
 
 static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x)
@@ -203,7 +220,9 @@ static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x)
 		netdev_info(netdev, "Cannot offload compressed xfrm states\n");
 		return -EINVAL;
 	}
-	if (x->props.flags & XFRM_STATE_ESN) {
+	if (x->props.flags & XFRM_STATE_ESN &&
+	    !(mlx5_accel_ipsec_device_caps(priv->mdev) &
+	    MLX5_ACCEL_IPSEC_CAP_ESN)) {
 		netdev_info(netdev, "Cannot offload ESN xfrm states\n");
 		return -EINVAL;
 	}
@@ -251,7 +270,8 @@ static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x)
 		return -EINVAL;
 	}
 	if (x->props.family == AF_INET6 &&
-	    !(mlx5_accel_ipsec_device_caps(priv->mdev) & MLX5_ACCEL_IPSEC_IPV6)) {
+	    !(mlx5_accel_ipsec_device_caps(priv->mdev) &
+	     MLX5_ACCEL_IPSEC_CAP_IPV6)) {
 		netdev_info(netdev, "IPv6 xfrm state offload is not supported by this device\n");
 		return -EINVAL;
 	}
@@ -262,9 +282,10 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x)
 {
 	struct mlx5e_ipsec_sa_entry *sa_entry = NULL;
 	struct net_device *netdev = x->xso.dev;
-	struct mlx5_accel_ipsec_sa hw_sa;
+	struct mlx5_accel_esp_xfrm_attrs attrs;
 	struct mlx5e_priv *priv;
-	void *context;
+	__be32 saddr[4] = {0}, daddr[4] = {0}, spi;
+	bool is_ipv6 = false;
 	int err;
 
 	priv = netdev_priv(netdev);
@@ -291,22 +312,49 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x)
 			netdev_info(netdev, "Failed adding to SADB_RX: %d\n", err);
 			goto err_entry;
 		}
+	} else {
+		sa_entry->set_iv_op = (x->props.flags & XFRM_STATE_ESN) ?
+				mlx5e_ipsec_set_iv_esn : mlx5e_ipsec_set_iv;
 	}
 
-	mlx5e_ipsec_build_hw_sa(MLX5_IPSEC_CMD_ADD_SA, sa_entry, &hw_sa);
-	context = mlx5_accel_ipsec_sa_cmd_exec(sa_entry->ipsec->en_priv->mdev, &hw_sa);
-	if (IS_ERR(context)) {
-		err = PTR_ERR(context);
+	/* check esn */
+	mlx5e_ipsec_update_esn_state(sa_entry);
+
+	/* create xfrm */
+	mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &attrs);
+	sa_entry->xfrm =
+		mlx5_accel_esp_create_xfrm(priv->mdev, &attrs,
+					   MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA);
+	if (IS_ERR(sa_entry->xfrm)) {
+		err = PTR_ERR(sa_entry->xfrm);
 		goto err_sadb_rx;
 	}
 
-	err = mlx5_accel_ipsec_sa_cmd_wait(context);
-	if (err)
-		goto err_sadb_rx;
+	/* create hw context */
+	if (x->props.family == AF_INET) {
+		saddr[3] = x->props.saddr.a4;
+		daddr[3] = x->id.daddr.a4;
+	} else {
+		memcpy(saddr, x->props.saddr.a6, sizeof(saddr));
+		memcpy(daddr, x->id.daddr.a6, sizeof(daddr));
+		is_ipv6 = true;
+	}
+	spi = x->id.spi;
+	sa_entry->hw_context =
+			mlx5_accel_esp_create_hw_context(priv->mdev,
+							 sa_entry->xfrm,
+							 saddr, daddr, spi,
+							 is_ipv6);
+	if (IS_ERR(sa_entry->hw_context)) {
+		err = PTR_ERR(sa_entry->hw_context);
+		goto err_xfrm;
+	}
 
 	x->xso.offload_handle = (unsigned long)sa_entry;
 	goto out;
 
+err_xfrm:
+	mlx5_accel_esp_destroy_xfrm(sa_entry->xfrm);
 err_sadb_rx:
 	if (x->xso.flags & XFRM_OFFLOAD_INBOUND) {
 		mlx5e_ipsec_sadb_rx_del(sa_entry);
@@ -320,43 +368,26 @@ out:
 
 static void mlx5e_xfrm_del_state(struct xfrm_state *x)
 {
-	struct mlx5e_ipsec_sa_entry *sa_entry;
-	struct mlx5_accel_ipsec_sa hw_sa;
-	void *context;
+	struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
 
-	if (!x->xso.offload_handle)
+	if (!sa_entry)
 		return;
 
-	sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
-	WARN_ON(sa_entry->x != x);
-
 	if (x->xso.flags & XFRM_OFFLOAD_INBOUND)
 		mlx5e_ipsec_sadb_rx_del(sa_entry);
-
-	mlx5e_ipsec_build_hw_sa(MLX5_IPSEC_CMD_DEL_SA, sa_entry, &hw_sa);
-	context = mlx5_accel_ipsec_sa_cmd_exec(sa_entry->ipsec->en_priv->mdev, &hw_sa);
-	if (IS_ERR(context))
-		return;
-
-	sa_entry->context = context;
 }
 
 static void mlx5e_xfrm_free_state(struct xfrm_state *x)
 {
-	struct mlx5e_ipsec_sa_entry *sa_entry;
-	int res;
+	struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
 
-	if (!x->xso.offload_handle)
+	if (!sa_entry)
 		return;
 
-	sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
-	WARN_ON(sa_entry->x != x);
-
-	res = mlx5_accel_ipsec_sa_cmd_wait(sa_entry->context);
-	sa_entry->context = NULL;
-	if (res) {
-		/* Leftover object will leak */
-		return;
+	if (sa_entry->hw_context) {
+		flush_workqueue(sa_entry->ipsec->wq);
+		mlx5_accel_esp_free_hw_context(sa_entry->hw_context);
+		mlx5_accel_esp_destroy_xfrm(sa_entry->xfrm);
 	}
 
 	if (x->xso.flags & XFRM_OFFLOAD_INBOUND)
@@ -383,6 +414,14 @@ int mlx5e_ipsec_init(struct mlx5e_priv *priv)
 	ida_init(&ipsec->halloc);
 	ipsec->en_priv = priv;
 	ipsec->en_priv->ipsec = ipsec;
+	ipsec->no_trailer = !!(mlx5_accel_ipsec_device_caps(priv->mdev) &
+			       MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER);
+	ipsec->wq = alloc_ordered_workqueue("mlx5e_ipsec: %s", 0,
+					    priv->netdev->name);
+	if (!ipsec->wq) {
+		kfree(ipsec);
+		return -ENOMEM;
+	}
 	netdev_dbg(priv->netdev, "IPSec attached to netdevice\n");
 	return 0;
 }
@@ -394,6 +433,9 @@ void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv)
 	if (!ipsec)
 		return;
 
+	drain_workqueue(ipsec->wq);
+	destroy_workqueue(ipsec->wq);
+
 	ida_destroy(&ipsec->halloc);
 	kfree(ipsec);
 	priv->ipsec = NULL;
@@ -414,11 +456,58 @@ static bool mlx5e_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
 	return true;
 }
 
+struct mlx5e_ipsec_modify_state_work {
+	struct work_struct		work;
+	struct mlx5_accel_esp_xfrm_attrs attrs;
+	struct mlx5e_ipsec_sa_entry	*sa_entry;
+};
+
+static void _update_xfrm_state(struct work_struct *work)
+{
+	int ret;
+	struct mlx5e_ipsec_modify_state_work *modify_work =
+		container_of(work, struct mlx5e_ipsec_modify_state_work, work);
+	struct mlx5e_ipsec_sa_entry *sa_entry = modify_work->sa_entry;
+
+	ret = mlx5_accel_esp_modify_xfrm(sa_entry->xfrm,
+					 &modify_work->attrs);
+	if (ret)
+		netdev_warn(sa_entry->ipsec->en_priv->netdev,
+			    "Not an IPSec offload device\n");
+
+	kfree(modify_work);
+}
+
+static void mlx5e_xfrm_advance_esn_state(struct xfrm_state *x)
+{
+	struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
+	struct mlx5e_ipsec_modify_state_work *modify_work;
+	bool need_update;
+
+	if (!sa_entry)
+		return;
+
+	need_update = mlx5e_ipsec_update_esn_state(sa_entry);
+	if (!need_update)
+		return;
+
+	modify_work = kzalloc(sizeof(*modify_work), GFP_ATOMIC);
+	if (!modify_work)
+		return;
+
+	mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &modify_work->attrs);
+	modify_work->sa_entry = sa_entry;
+
+	INIT_WORK(&modify_work->work, _update_xfrm_state);
+	WARN_ON(!queue_work(sa_entry->ipsec->wq, &modify_work->work));
+}
+
 static const struct xfrmdev_ops mlx5e_ipsec_xfrmdev_ops = {
 	.xdo_dev_state_add	= mlx5e_xfrm_add_state,
 	.xdo_dev_state_delete	= mlx5e_xfrm_del_state,
 	.xdo_dev_state_free	= mlx5e_xfrm_free_state,
 	.xdo_dev_offload_ok	= mlx5e_ipsec_offload_ok,
+	.xdo_dev_state_advance_esn = mlx5e_xfrm_advance_esn_state,
 };
 
 void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
@@ -429,7 +518,7 @@ void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
 	if (!priv->ipsec)
 		return;
 
-	if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_ESP) ||
+	if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_ESP) ||
 	    !MLX5_CAP_ETH(mdev, swp)) {
 		mlx5_core_dbg(mdev, "mlx5e: ESP and SWP offload not supported\n");
 		return;
@@ -448,7 +537,7 @@ void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
 	netdev->features |= NETIF_F_HW_ESP_TX_CSUM;
 	netdev->hw_enc_features |= NETIF_F_HW_ESP_TX_CSUM;
 
-	if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_LSO) ||
+	if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_LSO) ||
 	    !MLX5_CAP_ETH(mdev, swp_lso)) {
 		mlx5_core_dbg(mdev, "mlx5e: ESP LSO not supported\n");
 		return;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
index 56e00baf16cc..1198fc1eba4c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
@@ -40,7 +40,11 @@
 #include <net/xfrm.h>
 #include <linux/idr.h>
 
+#include "accel/ipsec.h"
+
 #define MLX5E_IPSEC_SADB_RX_BITS 10
+#define MLX5E_IPSEC_ESN_SCOPE_MID 0x80000000L
+
 #define MLX5E_METADATA_ETHER_TYPE (0x8CE4)
 #define MLX5E_METADATA_ETHER_LEN 8
 
@@ -77,10 +81,30 @@ struct mlx5e_ipsec_stats {
 struct mlx5e_ipsec {
 	struct mlx5e_priv *en_priv;
 	DECLARE_HASHTABLE(sadb_rx, MLX5E_IPSEC_SADB_RX_BITS);
+	bool no_trailer;
 	spinlock_t sadb_rx_lock; /* Protects sadb_rx and halloc */
 	struct ida halloc;
 	struct mlx5e_ipsec_sw_stats sw_stats;
 	struct mlx5e_ipsec_stats stats;
+	struct workqueue_struct *wq;
+};
+
+struct mlx5e_ipsec_esn_state {
+	u32 esn;
+	u8 trigger: 1;
+	u8 overlap: 1;
+};
+
+struct mlx5e_ipsec_sa_entry {
+	struct hlist_node hlist; /* Item in SADB_RX hashtable */
+	struct mlx5e_ipsec_esn_state esn_state;
+	unsigned int handle; /* Handle in SADB_RX */
+	struct xfrm_state *x;
+	struct mlx5e_ipsec *ipsec;
+	struct mlx5_accel_esp_xfrm *xfrm;
+	void *hw_context;
+	void (*set_iv_op)(struct sk_buff *skb, struct xfrm_state *x,
+			  struct xfrm_offload *xo);
 };
 
 void mlx5e_ipsec_build_inverse_table(void);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
index 6a7c8b04447e..c245d8e78509 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
@@ -42,10 +42,11 @@
 enum {
 	MLX5E_IPSEC_RX_SYNDROME_DECRYPTED = 0x11,
 	MLX5E_IPSEC_RX_SYNDROME_AUTH_FAILED = 0x12,
+	MLX5E_IPSEC_RX_SYNDROME_BAD_PROTO = 0x17,
 };
 
 struct mlx5e_ipsec_rx_metadata {
-	unsigned char   reserved;
+	unsigned char   nexthdr;
 	__be32		sa_handle;
 } __packed;
 
@@ -175,7 +176,30 @@ static void mlx5e_ipsec_set_swp(struct sk_buff *skb,
 	}
 }
 
-static void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_offload *xo)
+void mlx5e_ipsec_set_iv_esn(struct sk_buff *skb, struct xfrm_state *x,
+			    struct xfrm_offload *xo)
+{
+	struct xfrm_replay_state_esn *replay_esn = x->replay_esn;
+	__u32 oseq = replay_esn->oseq;
+	int iv_offset;
+	__be64 seqno;
+	u32 seq_hi;
+
+	if (unlikely(skb_is_gso(skb) && oseq < MLX5E_IPSEC_ESN_SCOPE_MID &&
+		     MLX5E_IPSEC_ESN_SCOPE_MID < (oseq - skb_shinfo(skb)->gso_segs))) {
+		seq_hi = xo->seq.hi - 1;
+	} else {
+		seq_hi = xo->seq.hi;
+	}
+
+	/* Place the SN in the IV field */
+	seqno = cpu_to_be64(xo->seq.low + ((u64)seq_hi << 32));
+	iv_offset = skb_transport_offset(skb) + sizeof(struct ip_esp_hdr);
+	skb_store_bits(skb, iv_offset, &seqno, 8);
+}
+
+void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_state *x,
+			struct xfrm_offload *xo)
 {
 	int iv_offset;
 	__be64 seqno;
@@ -227,6 +251,7 @@ struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct xfrm_offload *xo = xfrm_offload(skb);
 	struct mlx5e_ipsec_metadata *mdata;
+	struct mlx5e_ipsec_sa_entry *sa_entry;
 	struct xfrm_state *x;
 
 	if (!xo)
@@ -261,7 +286,8 @@ struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
 		goto drop;
 	}
 	mlx5e_ipsec_set_swp(skb, &wqe->eth, x->props.mode, xo);
-	mlx5e_ipsec_set_iv(skb, xo);
+	sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
+	sa_entry->set_iv_op(skb, x, xo);
 	mlx5e_ipsec_set_metadata(skb, mdata, xo);
 
 	return skb;
@@ -301,10 +327,17 @@ mlx5e_ipsec_build_sp(struct net_device *netdev, struct sk_buff *skb,
 	switch (mdata->syndrome) {
 	case MLX5E_IPSEC_RX_SYNDROME_DECRYPTED:
 		xo->status = CRYPTO_SUCCESS;
+		if (likely(priv->ipsec->no_trailer)) {
+			xo->flags |= XFRM_ESP_NO_TRAILER;
+			xo->proto = mdata->content.rx.nexthdr;
+		}
 		break;
 	case MLX5E_IPSEC_RX_SYNDROME_AUTH_FAILED:
 		xo->status = CRYPTO_TUNNEL_ESP_AUTH_FAILED;
 		break;
+	case MLX5E_IPSEC_RX_SYNDROME_BAD_PROTO:
+		xo->status = CRYPTO_INVALID_PROTOCOL;
+		break;
 	default:
 		atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_syndrome);
 		return NULL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
index e37ae2598dbb..2bfbbef1b054 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
@@ -37,6 +37,7 @@
 #ifdef CONFIG_MLX5_EN_IPSEC
 
 #include <linux/skbuff.h>
+#include <net/xfrm.h>
 #include "en.h"
 
 struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev,
@@ -46,6 +47,10 @@ void mlx5e_ipsec_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 void mlx5e_ipsec_inverse_table_init(void);
 bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_device *netdev,
 			       netdev_features_t features);
+void mlx5e_ipsec_set_iv_esn(struct sk_buff *skb, struct xfrm_state *x,
+			    struct xfrm_offload *xo);
+void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_state *x,
+			struct xfrm_offload *xo);
 struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
 					  struct mlx5e_tx_wqe *wqe,
 					  struct sk_buff *skb);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 363d8dcb7f17..ea4b255380a2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -1156,6 +1156,15 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep)
 	kfree(ppriv); /* mlx5e_rep_priv */
 }
 
+static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep)
+{
+	struct mlx5e_rep_priv *rpriv;
+
+	rpriv = mlx5e_rep_to_rep_priv(rep);
+
+	return rpriv->netdev;
+}
+
 static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
@@ -1168,6 +1177,7 @@ static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv)
 
 		rep_if.load = mlx5e_vport_rep_load;
 		rep_if.unload = mlx5e_vport_rep_unload;
+		rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev;
 		mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_ETH);
 	}
 }
@@ -1195,6 +1205,7 @@ void mlx5e_register_vport_reps(struct mlx5e_priv *priv)
 
 	rep_if.load = mlx5e_nic_rep_load;
 	rep_if.unload = mlx5e_nic_rep_unload;
+	rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev;
 	rep_if.priv = rpriv;
 	INIT_LIST_HEAD(&rpriv->vport_sqs_list);
 	mlx5_eswitch_register_vport_rep(esw, 0, &rep_if, REP_ETH); /* UPLINK PF vport*/
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index e5c3ab46a24a..8cce90dc461d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -53,7 +53,7 @@ static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config)
 static inline void mlx5e_read_cqe_slot(struct mlx5e_cq *cq, u32 cqcc,
 				       void *data)
 {
-	u32 ci = cqcc & cq->wq.sz_m1;
+	u32 ci = cqcc & cq->wq.fbc.sz_m1;
 
 	memcpy(data, mlx5_cqwq_get_wqe(&cq->wq, ci), sizeof(struct mlx5_cqe64));
 }
@@ -75,9 +75,10 @@ static inline void mlx5e_read_mini_arr_slot(struct mlx5e_cq *cq, u32 cqcc)
 
 static inline void mlx5e_cqes_update_owner(struct mlx5e_cq *cq, u32 cqcc, int n)
 {
-	u8 op_own = (cqcc >> cq->wq.log_sz) & 1;
-	u32 wq_sz = 1 << cq->wq.log_sz;
-	u32 ci = cqcc & cq->wq.sz_m1;
+	struct mlx5_frag_buf_ctrl *fbc = &cq->wq.fbc;
+	u8 op_own = (cqcc >> fbc->log_sz) & 1;
+	u32 wq_sz = 1 << fbc->log_sz;
+	u32 ci = cqcc & fbc->sz_m1;
 	u32 ci_top = min_t(u32, wq_sz, ci + n);
 
 	for (; ci < ci_top; ci++, n--) {
@@ -102,7 +103,7 @@ static inline void mlx5e_decompress_cqe(struct mlx5e_rq *rq,
 	cq->title.byte_cnt     = cq->mini_arr[cq->mini_arr_idx].byte_cnt;
 	cq->title.check_sum    = cq->mini_arr[cq->mini_arr_idx].checksum;
 	cq->title.op_own      &= 0xf0;
-	cq->title.op_own      |= 0x01 & (cqcc >> cq->wq.log_sz);
+	cq->title.op_own      |= 0x01 & (cqcc >> cq->wq.fbc.log_sz);
 	cq->title.wqe_counter  = cpu_to_be16(cq->decmprs_wqe_counter);
 
 	if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index fa86a1466718..7c33df2034f0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -675,6 +675,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
 	struct mlx5_flow_destination dest[2] = {};
 	struct mlx5_flow_act flow_act = {
 		.action = attr->action,
+		.has_flow_tag = true,
 		.flow_tag = attr->flow_tag,
 		.encap_id = 0,
 	};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 25106e996a96..c1c94974e16b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -393,6 +393,51 @@ static void general_event_handler(struct mlx5_core_dev *dev,
 	}
 }
 
+/* caller must eventually call mlx5_cq_put on the returned cq */
+static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn)
+{
+	struct mlx5_cq_table *table = &eq->cq_table;
+	struct mlx5_core_cq *cq = NULL;
+
+	spin_lock(&table->lock);
+	cq = radix_tree_lookup(&table->tree, cqn);
+	if (likely(cq))
+		mlx5_cq_hold(cq);
+	spin_unlock(&table->lock);
+
+	return cq;
+}
+
+static void mlx5_eq_cq_completion(struct mlx5_eq *eq, u32 cqn)
+{
+	struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn);
+
+	if (unlikely(!cq)) {
+		mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn);
+		return;
+	}
+
+	++cq->arm_sn;
+
+	cq->comp(cq);
+
+	mlx5_cq_put(cq);
+}
+
+static void mlx5_eq_cq_event(struct mlx5_eq *eq, u32 cqn, int event_type)
+{
+	struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn);
+
+	if (unlikely(!cq)) {
+		mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn);
+		return;
+	}
+
+	cq->event(cq, event_type);
+
+	mlx5_cq_put(cq);
+}
+
 static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
 {
 	struct mlx5_eq *eq = eq_ptr;
@@ -415,7 +460,7 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
 		switch (eqe->type) {
 		case MLX5_EVENT_TYPE_COMP:
 			cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff;
-			mlx5_cq_completion(dev, cqn);
+			mlx5_eq_cq_completion(eq, cqn);
 			break;
 		case MLX5_EVENT_TYPE_DCT_DRAINED:
 			rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
@@ -472,7 +517,7 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
 			cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
 			mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrome 0x%x\n",
 				       cqn, eqe->data.cq_err.syndrome);
-			mlx5_cq_event(dev, cqn, eqe->type);
+			mlx5_eq_cq_event(eq, cqn, eqe->type);
 			break;
 
 		case MLX5_EVENT_TYPE_PAGE_REQUEST:
@@ -567,6 +612,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
 		       int nent, u64 mask, const char *name,
 		       enum mlx5_eq_type type)
 {
+	struct mlx5_cq_table *cq_table = &eq->cq_table;
 	u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0};
 	struct mlx5_priv *priv = &dev->priv;
 	irq_handler_t handler;
@@ -576,6 +622,11 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
 	u32 *in;
 	int err;
 
+	/* Init CQ table */
+	memset(cq_table, 0, sizeof(*cq_table));
+	spin_lock_init(&cq_table->lock);
+	INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC);
+
 	eq->type = type;
 	eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE);
 	eq->cons_index = 0;
@@ -669,7 +720,6 @@ err_buf:
 	mlx5_buf_free(dev, &eq->buf);
 	return err;
 }
-EXPORT_SYMBOL_GPL(mlx5_create_map_eq);
 
 int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
 {
@@ -696,7 +746,40 @@ int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
 
 	return err;
 }
-EXPORT_SYMBOL_GPL(mlx5_destroy_unmap_eq);
+
+int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
+{
+	struct mlx5_cq_table *table = &eq->cq_table;
+	int err;
+
+	spin_lock_irq(&table->lock);
+	err = radix_tree_insert(&table->tree, cq->cqn, cq);
+	spin_unlock_irq(&table->lock);
+
+	return err;
+}
+
+int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
+{
+	struct mlx5_cq_table *table = &eq->cq_table;
+	struct mlx5_core_cq *tmp;
+
+	spin_lock_irq(&table->lock);
+	tmp = radix_tree_delete(&table->tree, cq->cqn);
+	spin_unlock_irq(&table->lock);
+
+	if (!tmp) {
+		mlx5_core_warn(eq->dev, "cq 0x%x not found in eq 0x%x tree\n", eq->eqn, cq->cqn);
+		return -ENOENT;
+	}
+
+	if (tmp != cq) {
+		mlx5_core_warn(eq->dev, "corruption on cqn 0x%x in eq 0x%x\n", eq->eqn, cq->cqn);
+		return -EINVAL;
+	}
+
+	return 0;
+}
 
 int mlx5_eq_init(struct mlx5_core_dev *dev)
 {
@@ -840,4 +923,3 @@ int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
 	MLX5_SET(query_eq_in, in, eq_number, eq->eqn);
 	return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
 }
-EXPORT_SYMBOL_GPL(mlx5_core_eq_query);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index c2b1d7d351fc..77b7272eaaa8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1619,10 +1619,14 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
 	esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode);
 	esw->mode = mode;
 
-	if (mode == SRIOV_LEGACY)
+	if (mode == SRIOV_LEGACY) {
 		err = esw_create_legacy_fdb_table(esw, nvfs + 1);
-	else
+	} else {
+		mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
+
 		err = esw_offloads_init(esw, nvfs + 1);
+	}
+
 	if (err)
 		goto abort;
 
@@ -1644,12 +1648,17 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
 
 abort:
 	esw->mode = SRIOV_NONE;
+
+	if (mode == SRIOV_OFFLOADS)
+		mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
+
 	return err;
 }
 
 void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
 {
 	struct esw_mc_addr *mc_promisc;
+	int old_mode;
 	int nvports;
 	int i;
 
@@ -1675,7 +1684,11 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
 	else if (esw->mode == SRIOV_OFFLOADS)
 		esw_offloads_cleanup(esw, nvports);
 
+	old_mode = esw->mode;
 	esw->mode = SRIOV_NONE;
+
+	if (old_mode == SRIOV_OFFLOADS)
+		mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
 }
 
 int mlx5_eswitch_init(struct mlx5_core_dev *dev)
@@ -2175,3 +2188,9 @@ free_out:
 	kvfree(out);
 	return err;
 }
+
+u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw)
+{
+	return esw->mode;
+}
+EXPORT_SYMBOL_GPL(mlx5_eswitch_mode);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 2fa037066b2f..98d2177d0806 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -37,19 +37,9 @@
 #include <linux/if_link.h>
 #include <net/devlink.h>
 #include <linux/mlx5/device.h>
+#include <linux/mlx5/eswitch.h>
 #include "lib/mpfs.h"
 
-enum {
-	SRIOV_NONE,
-	SRIOV_LEGACY,
-	SRIOV_OFFLOADS
-};
-
-enum {
-	REP_ETH,
-	NUM_REP_TYPES,
-};
-
 #ifdef CONFIG_MLX5_ESWITCH
 
 #define MLX5_MAX_UC_PER_VPORT(dev) \
@@ -139,29 +129,13 @@ struct mlx5_eswitch_fdb {
 			struct mlx5_flow_table *fdb;
 			struct mlx5_flow_group *send_to_vport_grp;
 			struct mlx5_flow_group *miss_grp;
-			struct mlx5_flow_handle *miss_rule;
+			struct mlx5_flow_handle *miss_rule_uni;
+			struct mlx5_flow_handle *miss_rule_multi;
 			int vlan_push_pop_refcount;
 		} offloads;
 	};
 };
 
-struct mlx5_eswitch_rep;
-struct mlx5_eswitch_rep_if {
-	int		       (*load)(struct mlx5_core_dev *dev,
-				       struct mlx5_eswitch_rep *rep);
-	void		       (*unload)(struct mlx5_eswitch_rep *rep);
-	void			*priv;
-	bool		       valid;
-};
-
-struct mlx5_eswitch_rep {
-	struct mlx5_eswitch_rep_if rep_if[NUM_REP_TYPES];
-	u16		       vport;
-	u8		       hw_id[ETH_ALEN];
-	u16		       vlan;
-	u32		       vlan_refcount;
-};
-
 struct mlx5_esw_offload {
 	struct mlx5_flow_table *ft_offloads;
 	struct mlx5_flow_group *vport_rx_group;
@@ -231,9 +205,6 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
 int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
 				 int vport,
 				 struct ifla_vf_stats *vf_stats);
-struct mlx5_flow_handle *
-mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport,
-				    u32 sqn);
 void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule);
 
 struct mlx5_flow_spec;
@@ -278,13 +249,6 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode);
 int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode);
 int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap);
 int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap);
-void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
-				     int vport_index,
-				     struct mlx5_eswitch_rep_if *rep_if,
-				     u8 rep_type);
-void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
-				       int vport_index,
-				       u8 rep_type);
 void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type);
 
 int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 99f583a15cc3..0a8303c1b52f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -338,6 +338,7 @@ out:
 	kvfree(spec);
 	return flow_rule;
 }
+EXPORT_SYMBOL(mlx5_eswitch_add_send_to_vport_rule);
 
 void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule)
 {
@@ -350,7 +351,11 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
 	struct mlx5_flow_destination dest = {};
 	struct mlx5_flow_handle *flow_rule = NULL;
 	struct mlx5_flow_spec *spec;
+	void *headers_c;
+	void *headers_v;
 	int err = 0;
+	u8 *dmac_c;
+	u8 *dmac_v;
 
 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
 	if (!spec) {
@@ -358,6 +363,13 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
 		goto out;
 	}
 
+	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+	headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+				 outer_headers);
+	dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c,
+			      outer_headers.dmac_47_16);
+	dmac_c[0] = 0x01;
+
 	dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
 	dest.vport_num = 0;
 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
@@ -366,11 +378,28 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
 					&flow_act, &dest, 1);
 	if (IS_ERR(flow_rule)) {
 		err = PTR_ERR(flow_rule);
-		esw_warn(esw->dev,  "FDB: Failed to add miss flow rule err %d\n", err);
+		esw_warn(esw->dev,  "FDB: Failed to add unicast miss flow rule err %d\n", err);
 		goto out;
 	}
 
-	esw->fdb_table.offloads.miss_rule = flow_rule;
+	esw->fdb_table.offloads.miss_rule_uni = flow_rule;
+
+	headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+				 outer_headers);
+	dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v,
+			      outer_headers.dmac_47_16);
+	dmac_v[0] = 0x01;
+	flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.fdb, spec,
+					&flow_act, &dest, 1);
+	if (IS_ERR(flow_rule)) {
+		err = PTR_ERR(flow_rule);
+		esw_warn(esw->dev, "FDB: Failed to add multicast miss flow rule err %d\n", err);
+		mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni);
+		goto out;
+	}
+
+	esw->fdb_table.offloads.miss_rule_multi = flow_rule;
+
 out:
 	kvfree(spec);
 	return err;
@@ -426,6 +455,7 @@ static void esw_destroy_offloads_fast_fdb_table(struct mlx5_eswitch *esw)
 }
 
 #define MAX_PF_SQ 256
+#define MAX_SQ_NVPORTS 32
 
 static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
 {
@@ -438,6 +468,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
 	struct mlx5_flow_group *g;
 	void *match_criteria;
 	u32 *flow_group_in;
+	u8 *dmac;
 
 	esw_debug(esw->dev, "Create offloads FDB Tables\n");
 	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
@@ -455,7 +486,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
 	if (err)
 		goto fast_fdb_err;
 
-	table_size = nvports + MAX_PF_SQ + 1;
+	table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + 2;
 
 	ft_attr.max_fte = table_size;
 	ft_attr.prio = FDB_SLOW_PATH;
@@ -478,7 +509,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
 	MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn);
 	MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port);
 
-	ix = nvports + MAX_PF_SQ;
+	ix = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ;
 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix - 1);
 
@@ -492,10 +523,16 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
 
 	/* create miss group */
 	memset(flow_group_in, 0, inlen);
-	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, 0);
+	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+		 MLX5_MATCH_OUTER_HEADERS);
+	match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
+				      match_criteria);
+	dmac = MLX5_ADDR_OF(fte_match_param, match_criteria,
+			    outer_headers.dmac_47_16);
+	dmac[0] = 0x01;
 
 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
-	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + 1);
+	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + 2);
 
 	g = mlx5_create_flow_group(fdb, flow_group_in);
 	if (IS_ERR(g)) {
@@ -531,7 +568,8 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
 		return;
 
 	esw_debug(esw->dev, "Destroy offloads FDB Tables\n");
-	mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule);
+	mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_multi);
+	mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni);
 	mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
 	mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
 
@@ -789,14 +827,9 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports)
 {
 	int err;
 
-	/* disable PF RoCE so missed packets don't go through RoCE steering */
-	mlx5_dev_list_lock();
-	mlx5_remove_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
-	mlx5_dev_list_unlock();
-
 	err = esw_create_offloads_fdb_tables(esw, nvports);
 	if (err)
-		goto create_fdb_err;
+		return err;
 
 	err = esw_create_offloads_table(esw);
 	if (err)
@@ -821,12 +854,6 @@ create_fg_err:
 create_ft_err:
 	esw_destroy_offloads_fdb_tables(esw);
 
-create_fdb_err:
-	/* enable back PF RoCE */
-	mlx5_dev_list_lock();
-	mlx5_add_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
-	mlx5_dev_list_unlock();
-
 	return err;
 }
 
@@ -844,9 +871,7 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw)
 	}
 
 	/* enable back PF RoCE */
-	mlx5_dev_list_lock();
-	mlx5_add_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
-	mlx5_dev_list_unlock();
+	mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
 
 	return err;
 }
@@ -1160,10 +1185,12 @@ void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
 
 	rep_if->load   = __rep_if->load;
 	rep_if->unload = __rep_if->unload;
+	rep_if->get_proto_dev = __rep_if->get_proto_dev;
 	rep_if->priv = __rep_if->priv;
 
 	rep_if->valid = true;
 }
+EXPORT_SYMBOL(mlx5_eswitch_register_vport_rep);
 
 void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
 				       int vport_index, u8 rep_type)
@@ -1178,6 +1205,7 @@ void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
 
 	rep->rep_if[rep_type].valid = false;
 }
+EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_rep);
 
 void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type)
 {
@@ -1188,3 +1216,35 @@ void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type)
 	rep = &offloads->vport_reps[UPLINK_REP_INDEX];
 	return rep->rep_if[rep_type].priv;
 }
+
+void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
+				 int vport,
+				 u8 rep_type)
+{
+	struct mlx5_esw_offload *offloads = &esw->offloads;
+	struct mlx5_eswitch_rep *rep;
+
+	if (vport == FDB_UPLINK_VPORT)
+		vport = UPLINK_REP_INDEX;
+
+	rep = &offloads->vport_reps[vport];
+
+	if (rep->rep_if[rep_type].valid &&
+	    rep->rep_if[rep_type].get_proto_dev)
+		return rep->rep_if[rep_type].get_proto_dev(rep);
+	return NULL;
+}
+EXPORT_SYMBOL(mlx5_eswitch_get_proto_dev);
+
+void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type)
+{
+	return mlx5_eswitch_get_proto_dev(esw, UPLINK_REP_INDEX, rep_type);
+}
+EXPORT_SYMBOL(mlx5_eswitch_uplink_get_proto_dev);
+
+struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
+						int vport)
+{
+	return &esw->offloads.vport_reps[vport];
+}
+EXPORT_SYMBOL(mlx5_eswitch_vport_rep);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
index 35d0e33381ca..4f1568528738 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
@@ -31,49 +31,91 @@
  *
  */
 
+#include <linux/rhashtable.h>
 #include <linux/mlx5/driver.h>
+#include <linux/mlx5/fs_helpers.h>
+#include <linux/mlx5/fs.h>
+#include <linux/rbtree.h>
 
 #include "mlx5_core.h"
+#include "fs_cmd.h"
 #include "fpga/ipsec.h"
 #include "fpga/sdk.h"
 #include "fpga/core.h"
 
 #define SBU_QP_QUEUE_SIZE 8
+#define MLX5_FPGA_IPSEC_CMD_TIMEOUT_MSEC	(60 * 1000)
 
-enum mlx5_ipsec_response_syndrome {
-	MLX5_IPSEC_RESPONSE_SUCCESS = 0,
-	MLX5_IPSEC_RESPONSE_ILLEGAL_REQUEST = 1,
-	MLX5_IPSEC_RESPONSE_SADB_ISSUE = 2,
-	MLX5_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE = 3,
+enum mlx5_fpga_ipsec_cmd_status {
+	MLX5_FPGA_IPSEC_CMD_PENDING,
+	MLX5_FPGA_IPSEC_CMD_SEND_FAIL,
+	MLX5_FPGA_IPSEC_CMD_COMPLETE,
 };
 
-enum mlx5_fpga_ipsec_sacmd_status {
-	MLX5_FPGA_IPSEC_SACMD_PENDING,
-	MLX5_FPGA_IPSEC_SACMD_SEND_FAIL,
-	MLX5_FPGA_IPSEC_SACMD_COMPLETE,
-};
-
-struct mlx5_ipsec_command_context {
+struct mlx5_fpga_ipsec_cmd_context {
 	struct mlx5_fpga_dma_buf buf;
-	struct mlx5_accel_ipsec_sa sa;
-	enum mlx5_fpga_ipsec_sacmd_status status;
+	enum mlx5_fpga_ipsec_cmd_status status;
+	struct mlx5_ifc_fpga_ipsec_cmd_resp resp;
 	int status_code;
 	struct completion complete;
 	struct mlx5_fpga_device *dev;
 	struct list_head list; /* Item in pending_cmds */
+	u8 command[0];
+};
+
+struct mlx5_fpga_esp_xfrm;
+
+struct mlx5_fpga_ipsec_sa_ctx {
+	struct rhash_head		hash;
+	struct mlx5_ifc_fpga_ipsec_sa	hw_sa;
+	struct mlx5_core_dev		*dev;
+	struct mlx5_fpga_esp_xfrm	*fpga_xfrm;
+};
+
+struct mlx5_fpga_esp_xfrm {
+	unsigned int			num_rules;
+	struct mlx5_fpga_ipsec_sa_ctx	*sa_ctx;
+	struct mutex			lock; /* xfrm lock */
+	struct mlx5_accel_esp_xfrm	accel_xfrm;
+};
+
+struct mlx5_fpga_ipsec_rule {
+	struct rb_node			node;
+	struct fs_fte			*fte;
+	struct mlx5_fpga_ipsec_sa_ctx	*ctx;
 };
 
-struct mlx5_ipsec_sadb_resp {
-	__be32 syndrome;
-	__be32 sw_sa_handle;
-	u8 reserved[24];
-} __packed;
+static const struct rhashtable_params rhash_sa = {
+	.key_len = FIELD_SIZEOF(struct mlx5_fpga_ipsec_sa_ctx, hw_sa),
+	.key_offset = offsetof(struct mlx5_fpga_ipsec_sa_ctx, hw_sa),
+	.head_offset = offsetof(struct mlx5_fpga_ipsec_sa_ctx, hash),
+	.automatic_shrinking = true,
+	.min_size = 1,
+};
 
 struct mlx5_fpga_ipsec {
+	struct mlx5_fpga_device *fdev;
 	struct list_head pending_cmds;
 	spinlock_t pending_cmds_lock; /* Protects pending_cmds */
 	u32 caps[MLX5_ST_SZ_DW(ipsec_extended_cap)];
 	struct mlx5_fpga_conn *conn;
+
+	struct notifier_block	fs_notifier_ingress_bypass;
+	struct notifier_block	fs_notifier_egress;
+
+	/* Map hardware SA           -->  SA context
+	 *     (mlx5_fpga_ipsec_sa)       (mlx5_fpga_ipsec_sa_ctx)
+	 * We will use this hash to avoid SAs duplication in fpga which
+	 * aren't allowed
+	 */
+	struct rhashtable sa_hash;	/* hw_sa -> mlx5_fpga_ipsec_sa_ctx */
+	struct mutex sa_hash_lock;
+
+	/* Tree holding all rules for this fpga device
+	 * Key for searching a rule (mlx5_fpga_ipsec_rule) is (ft, id)
+	 */
+	struct rb_root rules_rb;
+	struct mutex rules_rb_lock; /* rules lock */
 };
 
 static bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev)
@@ -97,28 +139,29 @@ static void mlx5_fpga_ipsec_send_complete(struct mlx5_fpga_conn *conn,
 					  struct mlx5_fpga_dma_buf *buf,
 					  u8 status)
 {
-	struct mlx5_ipsec_command_context *context;
+	struct mlx5_fpga_ipsec_cmd_context *context;
 
 	if (status) {
-		context = container_of(buf, struct mlx5_ipsec_command_context,
+		context = container_of(buf, struct mlx5_fpga_ipsec_cmd_context,
 				       buf);
 		mlx5_fpga_warn(fdev, "IPSec command send failed with status %u\n",
 			       status);
-		context->status = MLX5_FPGA_IPSEC_SACMD_SEND_FAIL;
+		context->status = MLX5_FPGA_IPSEC_CMD_SEND_FAIL;
 		complete(&context->complete);
 	}
 }
 
-static inline int syndrome_to_errno(enum mlx5_ipsec_response_syndrome syndrome)
+static inline
+int syndrome_to_errno(enum mlx5_ifc_fpga_ipsec_response_syndrome syndrome)
 {
 	switch (syndrome) {
-	case MLX5_IPSEC_RESPONSE_SUCCESS:
+	case MLX5_FPGA_IPSEC_RESPONSE_SUCCESS:
 		return 0;
-	case MLX5_IPSEC_RESPONSE_SADB_ISSUE:
+	case MLX5_FPGA_IPSEC_RESPONSE_SADB_ISSUE:
 		return -EEXIST;
-	case MLX5_IPSEC_RESPONSE_ILLEGAL_REQUEST:
+	case MLX5_FPGA_IPSEC_RESPONSE_ILLEGAL_REQUEST:
 		return -EINVAL;
-	case MLX5_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE:
+	case MLX5_FPGA_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE:
 		return -EIO;
 	}
 	return -EIO;
@@ -126,9 +169,9 @@ static inline int syndrome_to_errno(enum mlx5_ipsec_response_syndrome syndrome)
 
 static void mlx5_fpga_ipsec_recv(void *cb_arg, struct mlx5_fpga_dma_buf *buf)
 {
-	struct mlx5_ipsec_sadb_resp *resp = buf->sg[0].data;
-	struct mlx5_ipsec_command_context *context;
-	enum mlx5_ipsec_response_syndrome syndrome;
+	struct mlx5_ifc_fpga_ipsec_cmd_resp *resp = buf->sg[0].data;
+	struct mlx5_fpga_ipsec_cmd_context *context;
+	enum mlx5_ifc_fpga_ipsec_response_syndrome syndrome;
 	struct mlx5_fpga_device *fdev = cb_arg;
 	unsigned long flags;
 
@@ -138,12 +181,12 @@ static void mlx5_fpga_ipsec_recv(void *cb_arg, struct mlx5_fpga_dma_buf *buf)
 		return;
 	}
 
-	mlx5_fpga_dbg(fdev, "mlx5_ipsec recv_cb syndrome %08x sa_id %x\n",
-		      ntohl(resp->syndrome), ntohl(resp->sw_sa_handle));
+	mlx5_fpga_dbg(fdev, "mlx5_ipsec recv_cb syndrome %08x\n",
+		      ntohl(resp->syndrome));
 
 	spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags);
 	context = list_first_entry_or_null(&fdev->ipsec->pending_cmds,
-					   struct mlx5_ipsec_command_context,
+					   struct mlx5_fpga_ipsec_cmd_context,
 					   list);
 	if (context)
 		list_del(&context->list);
@@ -155,51 +198,48 @@ static void mlx5_fpga_ipsec_recv(void *cb_arg, struct mlx5_fpga_dma_buf *buf)
 	}
 	mlx5_fpga_dbg(fdev, "Handling response for %p\n", context);
 
-	if (context->sa.sw_sa_handle != resp->sw_sa_handle) {
-		mlx5_fpga_err(fdev, "mismatch SA handle. cmd 0x%08x vs resp 0x%08x\n",
-			      ntohl(context->sa.sw_sa_handle),
-			      ntohl(resp->sw_sa_handle));
-		return;
-	}
-
 	syndrome = ntohl(resp->syndrome);
 	context->status_code = syndrome_to_errno(syndrome);
-	context->status = MLX5_FPGA_IPSEC_SACMD_COMPLETE;
+	context->status = MLX5_FPGA_IPSEC_CMD_COMPLETE;
+	memcpy(&context->resp, resp, sizeof(*resp));
 
 	if (context->status_code)
-		mlx5_fpga_warn(fdev, "IPSec SADB command failed with syndrome %08x\n",
+		mlx5_fpga_warn(fdev, "IPSec command failed with syndrome %08x\n",
 			       syndrome);
+
 	complete(&context->complete);
 }
 
-void *mlx5_fpga_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
-				  struct mlx5_accel_ipsec_sa *cmd)
+static void *mlx5_fpga_ipsec_cmd_exec(struct mlx5_core_dev *mdev,
+				      const void *cmd, int cmd_size)
 {
-	struct mlx5_ipsec_command_context *context;
+	struct mlx5_fpga_ipsec_cmd_context *context;
 	struct mlx5_fpga_device *fdev = mdev->fpga;
 	unsigned long flags;
-	int res = 0;
+	int res;
 
-	BUILD_BUG_ON((sizeof(struct mlx5_accel_ipsec_sa) & 3) != 0);
 	if (!fdev || !fdev->ipsec)
 		return ERR_PTR(-EOPNOTSUPP);
 
-	context = kzalloc(sizeof(*context), GFP_ATOMIC);
+	if (cmd_size & 3)
+		return ERR_PTR(-EINVAL);
+
+	context = kzalloc(sizeof(*context) + cmd_size, GFP_ATOMIC);
 	if (!context)
 		return ERR_PTR(-ENOMEM);
 
-	memcpy(&context->sa, cmd, sizeof(*cmd));
+	context->status = MLX5_FPGA_IPSEC_CMD_PENDING;
+	context->dev = fdev;
 	context->buf.complete = mlx5_fpga_ipsec_send_complete;
-	context->buf.sg[0].size = sizeof(context->sa);
-	context->buf.sg[0].data = &context->sa;
 	init_completion(&context->complete);
-	context->dev = fdev;
+	memcpy(&context->command, cmd, cmd_size);
+	context->buf.sg[0].size = cmd_size;
+	context->buf.sg[0].data = &context->command;
+
 	spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags);
 	list_add_tail(&context->list, &fdev->ipsec->pending_cmds);
 	spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags);
 
-	context->status = MLX5_FPGA_IPSEC_SACMD_PENDING;
-
 	res = mlx5_fpga_sbu_conn_sendmsg(fdev->ipsec->conn, &context->buf);
 	if (res) {
 		mlx5_fpga_warn(fdev, "Failure sending IPSec command: %d\n",
@@ -214,47 +254,103 @@ void *mlx5_fpga_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
 	return context;
 }
 
-int mlx5_fpga_ipsec_sa_cmd_wait(void *ctx)
+static int mlx5_fpga_ipsec_cmd_wait(void *ctx)
 {
-	struct mlx5_ipsec_command_context *context = ctx;
+	struct mlx5_fpga_ipsec_cmd_context *context = ctx;
+	unsigned long timeout =
+		msecs_to_jiffies(MLX5_FPGA_IPSEC_CMD_TIMEOUT_MSEC);
 	int res;
 
-	res = wait_for_completion_killable(&context->complete);
-	if (res) {
+	res = wait_for_completion_timeout(&context->complete, timeout);
+	if (!res) {
 		mlx5_fpga_warn(context->dev, "Failure waiting for IPSec command response\n");
-		return -EINTR;
+		return -ETIMEDOUT;
 	}
 
-	if (context->status == MLX5_FPGA_IPSEC_SACMD_COMPLETE)
+	if (context->status == MLX5_FPGA_IPSEC_CMD_COMPLETE)
 		res = context->status_code;
 	else
 		res = -EIO;
 
-	kfree(context);
 	return res;
 }
 
+static inline bool is_v2_sadb_supported(struct mlx5_fpga_ipsec *fipsec)
+{
+	if (MLX5_GET(ipsec_extended_cap, fipsec->caps, v2_command))
+		return true;
+	return false;
+}
+
+static int mlx5_fpga_ipsec_update_hw_sa(struct mlx5_fpga_device *fdev,
+					struct mlx5_ifc_fpga_ipsec_sa *hw_sa,
+					int opcode)
+{
+	struct mlx5_core_dev *dev = fdev->mdev;
+	struct mlx5_ifc_fpga_ipsec_sa *sa;
+	struct mlx5_fpga_ipsec_cmd_context *cmd_context;
+	size_t sa_cmd_size;
+	int err;
+
+	hw_sa->ipsec_sa_v1.cmd = htonl(opcode);
+	if (is_v2_sadb_supported(fdev->ipsec))
+		sa_cmd_size = sizeof(*hw_sa);
+	else
+		sa_cmd_size = sizeof(hw_sa->ipsec_sa_v1);
+
+	cmd_context = (struct mlx5_fpga_ipsec_cmd_context *)
+			mlx5_fpga_ipsec_cmd_exec(dev, hw_sa, sa_cmd_size);
+	if (IS_ERR(cmd_context))
+		return PTR_ERR(cmd_context);
+
+	err = mlx5_fpga_ipsec_cmd_wait(cmd_context);
+	if (err)
+		goto out;
+
+	sa = (struct mlx5_ifc_fpga_ipsec_sa *)&cmd_context->command;
+	if (sa->ipsec_sa_v1.sw_sa_handle != cmd_context->resp.sw_sa_handle) {
+		mlx5_fpga_err(fdev, "mismatch SA handle. cmd 0x%08x vs resp 0x%08x\n",
+			      ntohl(sa->ipsec_sa_v1.sw_sa_handle),
+			      ntohl(cmd_context->resp.sw_sa_handle));
+		err = -EIO;
+	}
+
+out:
+	kfree(cmd_context);
+	return err;
+}
+
 u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev)
 {
 	struct mlx5_fpga_device *fdev = mdev->fpga;
 	u32 ret = 0;
 
-	if (mlx5_fpga_is_ipsec_device(mdev))
-		ret |= MLX5_ACCEL_IPSEC_DEVICE;
-	else
+	if (mlx5_fpga_is_ipsec_device(mdev)) {
+		ret |= MLX5_ACCEL_IPSEC_CAP_DEVICE;
+		ret |= MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA;
+	} else {
 		return ret;
+	}
 
 	if (!fdev->ipsec)
 		return ret;
 
 	if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, esp))
-		ret |= MLX5_ACCEL_IPSEC_ESP;
+		ret |= MLX5_ACCEL_IPSEC_CAP_ESP;
 
 	if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, ipv6))
-		ret |= MLX5_ACCEL_IPSEC_IPV6;
+		ret |= MLX5_ACCEL_IPSEC_CAP_IPV6;
 
 	if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, lso))
-		ret |= MLX5_ACCEL_IPSEC_LSO;
+		ret |= MLX5_ACCEL_IPSEC_CAP_LSO;
+
+	if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, rx_no_trailer))
+		ret |= MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER;
+
+	if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, esn)) {
+		ret |= MLX5_ACCEL_IPSEC_CAP_ESN;
+		ret |= MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN;
+	}
 
 	return ret;
 }
@@ -318,6 +414,828 @@ out:
 	return ret;
 }
 
+static int mlx5_fpga_ipsec_set_caps(struct mlx5_core_dev *mdev, u32 flags)
+{
+	struct mlx5_fpga_ipsec_cmd_context *context;
+	struct mlx5_ifc_fpga_ipsec_cmd_cap cmd = {0};
+	int err;
+
+	cmd.cmd = htonl(MLX5_FPGA_IPSEC_CMD_OP_SET_CAP);
+	cmd.flags = htonl(flags);
+	context = mlx5_fpga_ipsec_cmd_exec(mdev, &cmd, sizeof(cmd));
+	if (IS_ERR(context)) {
+		err = PTR_ERR(context);
+		goto out;
+	}
+
+	err = mlx5_fpga_ipsec_cmd_wait(context);
+	if (err)
+		goto out;
+
+	if ((context->resp.flags & cmd.flags) != cmd.flags) {
+		mlx5_fpga_err(context->dev, "Failed to set capabilities. cmd 0x%08x vs resp 0x%08x\n",
+			      cmd.flags,
+			      context->resp.flags);
+		err = -EIO;
+	}
+
+out:
+	return err;
+}
+
+static int mlx5_fpga_ipsec_enable_supported_caps(struct mlx5_core_dev *mdev)
+{
+	u32 dev_caps = mlx5_fpga_ipsec_device_caps(mdev);
+	u32 flags = 0;
+
+	if (dev_caps & MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER)
+		flags |= MLX5_FPGA_IPSEC_CAP_NO_TRAILER;
+
+	return mlx5_fpga_ipsec_set_caps(mdev, flags);
+}
+
+static void
+mlx5_fpga_ipsec_build_hw_xfrm(struct mlx5_core_dev *mdev,
+			      const struct mlx5_accel_esp_xfrm_attrs *xfrm_attrs,
+			      struct mlx5_ifc_fpga_ipsec_sa *hw_sa)
+{
+	const struct aes_gcm_keymat *aes_gcm = &xfrm_attrs->keymat.aes_gcm;
+
+	/* key */
+	memcpy(&hw_sa->ipsec_sa_v1.key_enc, aes_gcm->aes_key,
+	       aes_gcm->key_len / 8);
+	/* Duplicate 128 bit key twice according to HW layout */
+	if (aes_gcm->key_len == 128)
+		memcpy(&hw_sa->ipsec_sa_v1.key_enc[16],
+		       aes_gcm->aes_key, aes_gcm->key_len / 8);
+
+	/* salt and seq_iv */
+	memcpy(&hw_sa->ipsec_sa_v1.gcm.salt_iv, &aes_gcm->seq_iv,
+	       sizeof(aes_gcm->seq_iv));
+	memcpy(&hw_sa->ipsec_sa_v1.gcm.salt, &aes_gcm->salt,
+	       sizeof(aes_gcm->salt));
+
+	/* esn */
+	if (xfrm_attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED) {
+		hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_ESN_EN;
+		hw_sa->ipsec_sa_v1.flags |=
+				(xfrm_attrs->flags &
+				 MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP) ?
+					MLX5_FPGA_IPSEC_SA_ESN_OVERLAP : 0;
+		hw_sa->esn = htonl(xfrm_attrs->esn);
+	} else {
+		hw_sa->ipsec_sa_v1.flags &= ~MLX5_FPGA_IPSEC_SA_ESN_EN;
+		hw_sa->ipsec_sa_v1.flags &=
+				~(xfrm_attrs->flags &
+				  MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP) ?
+					MLX5_FPGA_IPSEC_SA_ESN_OVERLAP : 0;
+		hw_sa->esn = 0;
+	}
+
+	/* rx handle */
+	hw_sa->ipsec_sa_v1.sw_sa_handle = htonl(xfrm_attrs->sa_handle);
+
+	/* enc mode */
+	switch (aes_gcm->key_len) {
+	case 128:
+		hw_sa->ipsec_sa_v1.enc_mode =
+			MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_128_AUTH_128;
+		break;
+	case 256:
+		hw_sa->ipsec_sa_v1.enc_mode =
+			MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_256_AUTH_128;
+		break;
+	}
+
+	/* flags */
+	hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_SA_VALID |
+			MLX5_FPGA_IPSEC_SA_SPI_EN |
+			MLX5_FPGA_IPSEC_SA_IP_ESP;
+
+	if (xfrm_attrs->action & MLX5_ACCEL_ESP_ACTION_ENCRYPT)
+		hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_DIR_SX;
+	else
+		hw_sa->ipsec_sa_v1.flags &= ~MLX5_FPGA_IPSEC_SA_DIR_SX;
+}
+
+static void
+mlx5_fpga_ipsec_build_hw_sa(struct mlx5_core_dev *mdev,
+			    struct mlx5_accel_esp_xfrm_attrs *xfrm_attrs,
+			    const __be32 saddr[4],
+			    const __be32 daddr[4],
+			    const __be32 spi, bool is_ipv6,
+			    struct mlx5_ifc_fpga_ipsec_sa *hw_sa)
+{
+	mlx5_fpga_ipsec_build_hw_xfrm(mdev, xfrm_attrs, hw_sa);
+
+	/* IPs */
+	memcpy(hw_sa->ipsec_sa_v1.sip, saddr, sizeof(hw_sa->ipsec_sa_v1.sip));
+	memcpy(hw_sa->ipsec_sa_v1.dip, daddr, sizeof(hw_sa->ipsec_sa_v1.dip));
+
+	/* SPI */
+	hw_sa->ipsec_sa_v1.spi = spi;
+
+	/* flags */
+	if (is_ipv6)
+		hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_IPV6;
+}
+
+static bool is_full_mask(const void *p, size_t len)
+{
+	WARN_ON(len % 4);
+
+	return !memchr_inv(p, 0xff, len);
+}
+
+static bool validate_fpga_full_mask(struct mlx5_core_dev *dev,
+				    const u32 *match_c,
+				    const u32 *match_v)
+{
+	const void *misc_params_c = MLX5_ADDR_OF(fte_match_param,
+						 match_c,
+						 misc_parameters);
+	const void *headers_c = MLX5_ADDR_OF(fte_match_param,
+					     match_c,
+					     outer_headers);
+	const void *headers_v = MLX5_ADDR_OF(fte_match_param,
+					     match_v,
+					     outer_headers);
+
+	if (mlx5_fs_is_outer_ipv4_flow(dev, headers_c, headers_v)) {
+		const void *s_ipv4_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+						    headers_c,
+						    src_ipv4_src_ipv6.ipv4_layout.ipv4);
+		const void *d_ipv4_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+						    headers_c,
+						    dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+
+		if (!is_full_mask(s_ipv4_c, MLX5_FLD_SZ_BYTES(ipv4_layout,
+							      ipv4)) ||
+		    !is_full_mask(d_ipv4_c, MLX5_FLD_SZ_BYTES(ipv4_layout,
+							      ipv4)))
+			return false;
+	} else {
+		const void *s_ipv6_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+						    headers_c,
+						    src_ipv4_src_ipv6.ipv6_layout.ipv6);
+		const void *d_ipv6_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+						    headers_c,
+						    dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
+
+		if (!is_full_mask(s_ipv6_c, MLX5_FLD_SZ_BYTES(ipv6_layout,
+							      ipv6)) ||
+		    !is_full_mask(d_ipv6_c, MLX5_FLD_SZ_BYTES(ipv6_layout,
+							      ipv6)))
+			return false;
+	}
+
+	if (!is_full_mask(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c,
+				       outer_esp_spi),
+			  MLX5_FLD_SZ_BYTES(fte_match_set_misc, outer_esp_spi)))
+		return false;
+
+	return true;
+}
+
+static bool mlx5_is_fpga_ipsec_rule(struct mlx5_core_dev *dev,
+				    u8 match_criteria_enable,
+				    const u32 *match_c,
+				    const u32 *match_v)
+{
+	u32 ipsec_dev_caps = mlx5_accel_ipsec_device_caps(dev);
+	bool ipv6_flow;
+
+	ipv6_flow = mlx5_fs_is_outer_ipv6_flow(dev, match_c, match_v);
+
+	if (!(match_criteria_enable & MLX5_MATCH_OUTER_HEADERS) ||
+	    mlx5_fs_is_outer_udp_flow(match_c, match_v) ||
+	    mlx5_fs_is_outer_tcp_flow(match_c, match_v) ||
+	    mlx5_fs_is_vxlan_flow(match_c) ||
+	    !(mlx5_fs_is_outer_ipv4_flow(dev, match_c, match_v) ||
+	      ipv6_flow))
+		return false;
+
+	if (!(ipsec_dev_caps & MLX5_ACCEL_IPSEC_CAP_DEVICE))
+		return false;
+
+	if (!(ipsec_dev_caps & MLX5_ACCEL_IPSEC_CAP_ESP) &&
+	    mlx5_fs_is_outer_ipsec_flow(match_c))
+		return false;
+
+	if (!(ipsec_dev_caps & MLX5_ACCEL_IPSEC_CAP_IPV6) &&
+	    ipv6_flow)
+		return false;
+
+	if (!validate_fpga_full_mask(dev, match_c, match_v))
+		return false;
+
+	return true;
+}
+
+static bool mlx5_is_fpga_egress_ipsec_rule(struct mlx5_core_dev *dev,
+					   u8 match_criteria_enable,
+					   const u32 *match_c,
+					   const u32 *match_v,
+					   struct mlx5_flow_act *flow_act)
+{
+	const void *outer_c = MLX5_ADDR_OF(fte_match_param, match_c,
+					   outer_headers);
+	bool is_dmac = MLX5_GET(fte_match_set_lyr_2_4, outer_c, dmac_47_16) ||
+			MLX5_GET(fte_match_set_lyr_2_4, outer_c, dmac_15_0);
+	bool is_smac = MLX5_GET(fte_match_set_lyr_2_4, outer_c, smac_47_16) ||
+			MLX5_GET(fte_match_set_lyr_2_4, outer_c, smac_15_0);
+	int ret;
+
+	ret = mlx5_is_fpga_ipsec_rule(dev, match_criteria_enable, match_c,
+				      match_v);
+	if (!ret)
+		return ret;
+
+	if (is_dmac || is_smac ||
+	    (match_criteria_enable &
+	     ~(MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS)) ||
+	    (flow_act->action & ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | MLX5_FLOW_CONTEXT_ACTION_ALLOW)) ||
+	     flow_act->has_flow_tag)
+		return false;
+
+	return true;
+}
+
+void *mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev,
+				    struct mlx5_accel_esp_xfrm *accel_xfrm,
+				    const __be32 saddr[4],
+				    const __be32 daddr[4],
+				    const __be32 spi, bool is_ipv6)
+{
+	struct mlx5_fpga_ipsec_sa_ctx *sa_ctx;
+	struct mlx5_fpga_esp_xfrm *fpga_xfrm =
+			container_of(accel_xfrm, typeof(*fpga_xfrm),
+				     accel_xfrm);
+	struct mlx5_fpga_device *fdev = mdev->fpga;
+	struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
+	int opcode, err;
+	void *context;
+
+	/* alloc SA */
+	sa_ctx = kzalloc(sizeof(*sa_ctx), GFP_KERNEL);
+	if (!sa_ctx)
+		return ERR_PTR(-ENOMEM);
+
+	sa_ctx->dev = mdev;
+
+	/* build candidate SA */
+	mlx5_fpga_ipsec_build_hw_sa(mdev, &accel_xfrm->attrs,
+				    saddr, daddr, spi, is_ipv6,
+				    &sa_ctx->hw_sa);
+
+	mutex_lock(&fpga_xfrm->lock);
+
+	if (fpga_xfrm->sa_ctx) {        /* multiple rules for same accel_xfrm */
+		/* all rules must be with same IPs and SPI */
+		if (memcmp(&sa_ctx->hw_sa, &fpga_xfrm->sa_ctx->hw_sa,
+			   sizeof(sa_ctx->hw_sa))) {
+			context = ERR_PTR(-EINVAL);
+			goto exists;
+		}
+
+		++fpga_xfrm->num_rules;
+		context = fpga_xfrm->sa_ctx;
+		goto exists;
+	}
+
+	/* This is unbounded fpga_xfrm, try to add to hash */
+	mutex_lock(&fipsec->sa_hash_lock);
+
+	err = rhashtable_lookup_insert_fast(&fipsec->sa_hash, &sa_ctx->hash,
+					    rhash_sa);
+	if (err) {
+		/* Can't bound different accel_xfrm to already existing sa_ctx.
+		 * This is because we can't support multiple ketmats for
+		 * same IPs and SPI
+		 */
+		context = ERR_PTR(-EEXIST);
+		goto unlock_hash;
+	}
+
+	/* Bound accel_xfrm to sa_ctx */
+	opcode = is_v2_sadb_supported(fdev->ipsec) ?
+			MLX5_FPGA_IPSEC_CMD_OP_ADD_SA_V2 :
+			MLX5_FPGA_IPSEC_CMD_OP_ADD_SA;
+	err = mlx5_fpga_ipsec_update_hw_sa(fdev, &sa_ctx->hw_sa, opcode);
+	sa_ctx->hw_sa.ipsec_sa_v1.cmd = 0;
+	if (err) {
+		context = ERR_PTR(err);
+		goto delete_hash;
+	}
+
+	mutex_unlock(&fipsec->sa_hash_lock);
+
+	++fpga_xfrm->num_rules;
+	fpga_xfrm->sa_ctx = sa_ctx;
+	sa_ctx->fpga_xfrm = fpga_xfrm;
+
+	mutex_unlock(&fpga_xfrm->lock);
+
+	return sa_ctx;
+
+delete_hash:
+	WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash, &sa_ctx->hash,
+				       rhash_sa));
+unlock_hash:
+	mutex_unlock(&fipsec->sa_hash_lock);
+
+exists:
+	mutex_unlock(&fpga_xfrm->lock);
+	kfree(sa_ctx);
+	return context;
+}
+
+static void *
+mlx5_fpga_ipsec_fs_create_sa_ctx(struct mlx5_core_dev *mdev,
+				 struct fs_fte *fte,
+				 bool is_egress)
+{
+	struct mlx5_accel_esp_xfrm *accel_xfrm;
+	__be32 saddr[4], daddr[4], spi;
+	struct mlx5_flow_group *fg;
+	bool is_ipv6 = false;
+
+	fs_get_obj(fg, fte->node.parent);
+	/* validate */
+	if (is_egress &&
+	    !mlx5_is_fpga_egress_ipsec_rule(mdev,
+					    fg->mask.match_criteria_enable,
+					    fg->mask.match_criteria,
+					    fte->val,
+					    &fte->action))
+		return ERR_PTR(-EINVAL);
+	else if (!mlx5_is_fpga_ipsec_rule(mdev,
+					  fg->mask.match_criteria_enable,
+					  fg->mask.match_criteria,
+					  fte->val))
+		return ERR_PTR(-EINVAL);
+
+	/* get xfrm context */
+	accel_xfrm =
+		(struct mlx5_accel_esp_xfrm *)fte->action.esp_id;
+
+	/* IPs */
+	if (mlx5_fs_is_outer_ipv4_flow(mdev, fg->mask.match_criteria,
+				       fte->val)) {
+		memcpy(&saddr[3],
+		       MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+				    fte->val,
+				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
+				    sizeof(saddr[3]));
+		memcpy(&daddr[3],
+		       MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+				    fte->val,
+				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+				    sizeof(daddr[3]));
+	} else {
+		memcpy(saddr,
+		       MLX5_ADDR_OF(fte_match_param,
+				    fte->val,
+				    outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6),
+				    sizeof(saddr));
+		memcpy(daddr,
+		       MLX5_ADDR_OF(fte_match_param,
+				    fte->val,
+				    outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+				    sizeof(daddr));
+		is_ipv6 = true;
+	}
+
+	/* SPI */
+	spi = MLX5_GET_BE(typeof(spi),
+			  fte_match_param, fte->val,
+			  misc_parameters.outer_esp_spi);
+
+	/* create */
+	return mlx5_fpga_ipsec_create_sa_ctx(mdev, accel_xfrm,
+					     saddr, daddr,
+					     spi, is_ipv6);
+}
+
+static void
+mlx5_fpga_ipsec_release_sa_ctx(struct mlx5_fpga_ipsec_sa_ctx *sa_ctx)
+{
+	struct mlx5_fpga_device *fdev = sa_ctx->dev->fpga;
+	struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
+	int opcode = is_v2_sadb_supported(fdev->ipsec) ?
+			MLX5_FPGA_IPSEC_CMD_OP_DEL_SA_V2 :
+			MLX5_FPGA_IPSEC_CMD_OP_DEL_SA;
+	int err;
+
+	err = mlx5_fpga_ipsec_update_hw_sa(fdev, &sa_ctx->hw_sa, opcode);
+	sa_ctx->hw_sa.ipsec_sa_v1.cmd = 0;
+	if (err) {
+		WARN_ON(err);
+		return;
+	}
+
+	mutex_lock(&fipsec->sa_hash_lock);
+	WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash, &sa_ctx->hash,
+				       rhash_sa));
+	mutex_unlock(&fipsec->sa_hash_lock);
+}
+
+void mlx5_fpga_ipsec_delete_sa_ctx(void *context)
+{
+	struct mlx5_fpga_esp_xfrm *fpga_xfrm =
+			((struct mlx5_fpga_ipsec_sa_ctx *)context)->fpga_xfrm;
+
+	mutex_lock(&fpga_xfrm->lock);
+	if (!--fpga_xfrm->num_rules) {
+		mlx5_fpga_ipsec_release_sa_ctx(fpga_xfrm->sa_ctx);
+		fpga_xfrm->sa_ctx = NULL;
+	}
+	mutex_unlock(&fpga_xfrm->lock);
+}
+
+static inline struct mlx5_fpga_ipsec_rule *
+_rule_search(struct rb_root *root, struct fs_fte *fte)
+{
+	struct rb_node *node = root->rb_node;
+
+	while (node) {
+		struct mlx5_fpga_ipsec_rule *rule =
+				container_of(node, struct mlx5_fpga_ipsec_rule,
+					     node);
+
+		if (rule->fte < fte)
+			node = node->rb_left;
+		else if (rule->fte > fte)
+			node = node->rb_right;
+		else
+			return rule;
+	}
+	return NULL;
+}
+
+static struct mlx5_fpga_ipsec_rule *
+rule_search(struct mlx5_fpga_ipsec *ipsec_dev, struct fs_fte *fte)
+{
+	struct mlx5_fpga_ipsec_rule *rule;
+
+	mutex_lock(&ipsec_dev->rules_rb_lock);
+	rule = _rule_search(&ipsec_dev->rules_rb, fte);
+	mutex_unlock(&ipsec_dev->rules_rb_lock);
+
+	return rule;
+}
+
+static inline int _rule_insert(struct rb_root *root,
+			       struct mlx5_fpga_ipsec_rule *rule)
+{
+	struct rb_node **new = &root->rb_node, *parent = NULL;
+
+	/* Figure out where to put new node */
+	while (*new) {
+		struct mlx5_fpga_ipsec_rule *this =
+				container_of(*new, struct mlx5_fpga_ipsec_rule,
+					     node);
+
+		parent = *new;
+		if (rule->fte < this->fte)
+			new = &((*new)->rb_left);
+		else if (rule->fte > this->fte)
+			new = &((*new)->rb_right);
+		else
+			return -EEXIST;
+	}
+
+	/* Add new node and rebalance tree. */
+	rb_link_node(&rule->node, parent, new);
+	rb_insert_color(&rule->node, root);
+
+	return 0;
+}
+
+static int rule_insert(struct mlx5_fpga_ipsec *ipsec_dev,
+		       struct mlx5_fpga_ipsec_rule *rule)
+{
+	int ret;
+
+	mutex_lock(&ipsec_dev->rules_rb_lock);
+	ret = _rule_insert(&ipsec_dev->rules_rb, rule);
+	mutex_unlock(&ipsec_dev->rules_rb_lock);
+
+	return ret;
+}
+
+static inline void _rule_delete(struct mlx5_fpga_ipsec *ipsec_dev,
+				struct mlx5_fpga_ipsec_rule *rule)
+{
+	struct rb_root *root = &ipsec_dev->rules_rb;
+
+	mutex_lock(&ipsec_dev->rules_rb_lock);
+	rb_erase(&rule->node, root);
+	mutex_unlock(&ipsec_dev->rules_rb_lock);
+}
+
+static void rule_delete(struct mlx5_fpga_ipsec *ipsec_dev,
+			struct mlx5_fpga_ipsec_rule *rule)
+{
+	_rule_delete(ipsec_dev, rule);
+	kfree(rule);
+}
+
+struct mailbox_mod {
+	uintptr_t			saved_esp_id;
+	u32				saved_action;
+	u32				saved_outer_esp_spi_value;
+};
+
+static void restore_spec_mailbox(struct fs_fte *fte,
+				 struct mailbox_mod *mbox_mod)
+{
+	char *misc_params_v = MLX5_ADDR_OF(fte_match_param,
+					   fte->val,
+					   misc_parameters);
+
+	MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi,
+		 mbox_mod->saved_outer_esp_spi_value);
+	fte->action.action |= mbox_mod->saved_action;
+	fte->action.esp_id = (uintptr_t)mbox_mod->saved_esp_id;
+}
+
+static void modify_spec_mailbox(struct mlx5_core_dev *mdev,
+				struct fs_fte *fte,
+				struct mailbox_mod *mbox_mod)
+{
+	char *misc_params_v = MLX5_ADDR_OF(fte_match_param,
+					   fte->val,
+					   misc_parameters);
+
+	mbox_mod->saved_esp_id = fte->action.esp_id;
+	mbox_mod->saved_action = fte->action.action &
+			(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
+			 MLX5_FLOW_CONTEXT_ACTION_DECRYPT);
+	mbox_mod->saved_outer_esp_spi_value =
+			MLX5_GET(fte_match_set_misc, misc_params_v,
+				 outer_esp_spi);
+
+	fte->action.esp_id = 0;
+	fte->action.action &= ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
+				MLX5_FLOW_CONTEXT_ACTION_DECRYPT);
+	if (!MLX5_CAP_FLOWTABLE(mdev,
+				flow_table_properties_nic_receive.ft_field_support.outer_esp_spi))
+		MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi, 0);
+}
+
+static enum fs_flow_table_type egress_to_fs_ft(bool egress)
+{
+	return egress ? FS_FT_NIC_TX : FS_FT_NIC_RX;
+}
+
+static int fpga_ipsec_fs_create_flow_group(struct mlx5_core_dev *dev,
+					   struct mlx5_flow_table *ft,
+					   u32 *in,
+					   unsigned int *group_id,
+					   bool is_egress)
+{
+	int (*create_flow_group)(struct mlx5_core_dev *dev,
+				 struct mlx5_flow_table *ft, u32 *in,
+				 unsigned int *group_id) =
+		mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->create_flow_group;
+	char *misc_params_c = MLX5_ADDR_OF(create_flow_group_in, in,
+					   match_criteria.misc_parameters);
+	u32 saved_outer_esp_spi_mask;
+	u8 match_criteria_enable;
+	int ret;
+
+	if (MLX5_CAP_FLOWTABLE(dev,
+			       flow_table_properties_nic_receive.ft_field_support.outer_esp_spi))
+		return create_flow_group(dev, ft, in, group_id);
+
+	match_criteria_enable =
+		MLX5_GET(create_flow_group_in, in, match_criteria_enable);
+	saved_outer_esp_spi_mask =
+		MLX5_GET(fte_match_set_misc, misc_params_c, outer_esp_spi);
+	if (!match_criteria_enable || !saved_outer_esp_spi_mask)
+		return create_flow_group(dev, ft, in, group_id);
+
+	MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, 0);
+
+	if (!(*misc_params_c) &&
+	    !memcmp(misc_params_c, misc_params_c + 1, MLX5_ST_SZ_BYTES(fte_match_set_misc) - 1))
+		MLX5_SET(create_flow_group_in, in, match_criteria_enable,
+			 match_criteria_enable & ~MLX5_MATCH_MISC_PARAMETERS);
+
+	ret = create_flow_group(dev, ft, in, group_id);
+
+	MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, saved_outer_esp_spi_mask);
+	MLX5_SET(create_flow_group_in, in, match_criteria_enable, match_criteria_enable);
+
+	return ret;
+}
+
+static int fpga_ipsec_fs_create_fte(struct mlx5_core_dev *dev,
+				    struct mlx5_flow_table *ft,
+				    struct mlx5_flow_group *fg,
+				    struct fs_fte *fte,
+				    bool is_egress)
+{
+	int (*create_fte)(struct mlx5_core_dev *dev,
+			  struct mlx5_flow_table *ft,
+			  struct mlx5_flow_group *fg,
+			  struct fs_fte *fte) =
+		mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->create_fte;
+	struct mlx5_fpga_device *fdev = dev->fpga;
+	struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
+	struct mlx5_fpga_ipsec_rule *rule;
+	bool is_esp = fte->action.esp_id;
+	struct mailbox_mod mbox_mod;
+	int ret;
+
+	if (!is_esp ||
+	    !(fte->action.action &
+	      (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
+	       MLX5_FLOW_CONTEXT_ACTION_DECRYPT)))
+		return create_fte(dev, ft, fg, fte);
+
+	rule = kzalloc(sizeof(*rule), GFP_KERNEL);
+	if (!rule)
+		return -ENOMEM;
+
+	rule->ctx = mlx5_fpga_ipsec_fs_create_sa_ctx(dev, fte, is_egress);
+	if (IS_ERR(rule->ctx)) {
+		kfree(rule);
+		return PTR_ERR(rule->ctx);
+	}
+
+	rule->fte = fte;
+	WARN_ON(rule_insert(fipsec, rule));
+
+	modify_spec_mailbox(dev, fte, &mbox_mod);
+	ret = create_fte(dev, ft, fg, fte);
+	restore_spec_mailbox(fte, &mbox_mod);
+	if (ret) {
+		_rule_delete(fipsec, rule);
+		mlx5_fpga_ipsec_delete_sa_ctx(rule->ctx);
+		kfree(rule);
+	}
+
+	return ret;
+}
+
+static int fpga_ipsec_fs_update_fte(struct mlx5_core_dev *dev,
+				    struct mlx5_flow_table *ft,
+				    unsigned int group_id,
+				    int modify_mask,
+				    struct fs_fte *fte,
+				    bool is_egress)
+{
+	int (*update_fte)(struct mlx5_core_dev *dev,
+			  struct mlx5_flow_table *ft,
+			  unsigned int group_id,
+			  int modify_mask,
+			  struct fs_fte *fte) =
+		mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->update_fte;
+	bool is_esp = fte->action.esp_id;
+	struct mailbox_mod mbox_mod;
+	int ret;
+
+	if (!is_esp ||
+	    !(fte->action.action &
+	      (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
+	       MLX5_FLOW_CONTEXT_ACTION_DECRYPT)))
+		return update_fte(dev, ft, group_id, modify_mask, fte);
+
+	modify_spec_mailbox(dev, fte, &mbox_mod);
+	ret = update_fte(dev, ft, group_id, modify_mask, fte);
+	restore_spec_mailbox(fte, &mbox_mod);
+
+	return ret;
+}
+
+static int fpga_ipsec_fs_delete_fte(struct mlx5_core_dev *dev,
+				    struct mlx5_flow_table *ft,
+				    struct fs_fte *fte,
+				    bool is_egress)
+{
+	int (*delete_fte)(struct mlx5_core_dev *dev,
+			  struct mlx5_flow_table *ft,
+			  struct fs_fte *fte) =
+		mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->delete_fte;
+	struct mlx5_fpga_device *fdev = dev->fpga;
+	struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
+	struct mlx5_fpga_ipsec_rule *rule;
+	bool is_esp = fte->action.esp_id;
+	struct mailbox_mod mbox_mod;
+	int ret;
+
+	if (!is_esp ||
+	    !(fte->action.action &
+	      (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
+	       MLX5_FLOW_CONTEXT_ACTION_DECRYPT)))
+		return delete_fte(dev, ft, fte);
+
+	rule = rule_search(fipsec, fte);
+	if (!rule)
+		return -ENOENT;
+
+	mlx5_fpga_ipsec_delete_sa_ctx(rule->ctx);
+	rule_delete(fipsec, rule);
+
+	modify_spec_mailbox(dev, fte, &mbox_mod);
+	ret = delete_fte(dev, ft, fte);
+	restore_spec_mailbox(fte, &mbox_mod);
+
+	return ret;
+}
+
+static int
+mlx5_fpga_ipsec_fs_create_flow_group_egress(struct mlx5_core_dev *dev,
+					    struct mlx5_flow_table *ft,
+					    u32 *in,
+					    unsigned int *group_id)
+{
+	return fpga_ipsec_fs_create_flow_group(dev, ft, in, group_id, true);
+}
+
+static int
+mlx5_fpga_ipsec_fs_create_fte_egress(struct mlx5_core_dev *dev,
+				     struct mlx5_flow_table *ft,
+				     struct mlx5_flow_group *fg,
+				     struct fs_fte *fte)
+{
+	return fpga_ipsec_fs_create_fte(dev, ft, fg, fte, true);
+}
+
+static int
+mlx5_fpga_ipsec_fs_update_fte_egress(struct mlx5_core_dev *dev,
+				     struct mlx5_flow_table *ft,
+				     unsigned int group_id,
+				     int modify_mask,
+				     struct fs_fte *fte)
+{
+	return fpga_ipsec_fs_update_fte(dev, ft, group_id, modify_mask, fte,
+					true);
+}
+
+static int
+mlx5_fpga_ipsec_fs_delete_fte_egress(struct mlx5_core_dev *dev,
+				     struct mlx5_flow_table *ft,
+				     struct fs_fte *fte)
+{
+	return fpga_ipsec_fs_delete_fte(dev, ft, fte, true);
+}
+
+static int
+mlx5_fpga_ipsec_fs_create_flow_group_ingress(struct mlx5_core_dev *dev,
+					     struct mlx5_flow_table *ft,
+					     u32 *in,
+					     unsigned int *group_id)
+{
+	return fpga_ipsec_fs_create_flow_group(dev, ft, in, group_id, false);
+}
+
+static int
+mlx5_fpga_ipsec_fs_create_fte_ingress(struct mlx5_core_dev *dev,
+				      struct mlx5_flow_table *ft,
+				      struct mlx5_flow_group *fg,
+				      struct fs_fte *fte)
+{
+	return fpga_ipsec_fs_create_fte(dev, ft, fg, fte, false);
+}
+
+static int
+mlx5_fpga_ipsec_fs_update_fte_ingress(struct mlx5_core_dev *dev,
+				      struct mlx5_flow_table *ft,
+				      unsigned int group_id,
+				      int modify_mask,
+				      struct fs_fte *fte)
+{
+	return fpga_ipsec_fs_update_fte(dev, ft, group_id, modify_mask, fte,
+					false);
+}
+
+static int
+mlx5_fpga_ipsec_fs_delete_fte_ingress(struct mlx5_core_dev *dev,
+				      struct mlx5_flow_table *ft,
+				      struct fs_fte *fte)
+{
+	return fpga_ipsec_fs_delete_fte(dev, ft, fte, false);
+}
+
+static struct mlx5_flow_cmds fpga_ipsec_ingress;
+static struct mlx5_flow_cmds fpga_ipsec_egress;
+
+const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type)
+{
+	switch (type) {
+	case FS_FT_NIC_RX:
+		return &fpga_ipsec_ingress;
+	case FS_FT_NIC_TX:
+		return &fpga_ipsec_egress;
+	default:
+		WARN_ON(true);
+		return NULL;
+	}
+}
+
 int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev)
 {
 	struct mlx5_fpga_conn_attr init_attr = {0};
@@ -332,6 +1250,8 @@ int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev)
 	if (!fdev->ipsec)
 		return -ENOMEM;
 
+	fdev->ipsec->fdev = fdev;
+
 	err = mlx5_fpga_get_sbu_caps(fdev, sizeof(fdev->ipsec->caps),
 				     fdev->ipsec->caps);
 	if (err) {
@@ -355,14 +1275,47 @@ int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev)
 		goto error;
 	}
 	fdev->ipsec->conn = conn;
+
+	err = rhashtable_init(&fdev->ipsec->sa_hash, &rhash_sa);
+	if (err)
+		goto err_destroy_conn;
+	mutex_init(&fdev->ipsec->sa_hash_lock);
+
+	fdev->ipsec->rules_rb = RB_ROOT;
+	mutex_init(&fdev->ipsec->rules_rb_lock);
+
+	err = mlx5_fpga_ipsec_enable_supported_caps(mdev);
+	if (err) {
+		mlx5_fpga_err(fdev, "Failed to enable IPSec extended capabilities: %d\n",
+			      err);
+		goto err_destroy_hash;
+	}
+
 	return 0;
 
+err_destroy_hash:
+	rhashtable_destroy(&fdev->ipsec->sa_hash);
+
+err_destroy_conn:
+	mlx5_fpga_sbu_conn_destroy(conn);
+
 error:
 	kfree(fdev->ipsec);
 	fdev->ipsec = NULL;
 	return err;
 }
 
+static void destroy_rules_rb(struct rb_root *root)
+{
+	struct mlx5_fpga_ipsec_rule *r, *tmp;
+
+	rbtree_postorder_for_each_entry_safe(r, tmp, root, node) {
+		rb_erase(&r->node, root);
+		mlx5_fpga_ipsec_delete_sa_ctx(r->ctx);
+		kfree(r);
+	}
+}
+
 void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev)
 {
 	struct mlx5_fpga_device *fdev = mdev->fpga;
@@ -370,7 +1323,209 @@ void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev)
 	if (!mlx5_fpga_is_ipsec_device(mdev))
 		return;
 
+	destroy_rules_rb(&fdev->ipsec->rules_rb);
+	rhashtable_destroy(&fdev->ipsec->sa_hash);
+
 	mlx5_fpga_sbu_conn_destroy(fdev->ipsec->conn);
 	kfree(fdev->ipsec);
 	fdev->ipsec = NULL;
 }
+
+void mlx5_fpga_ipsec_build_fs_cmds(void)
+{
+	/* ingress */
+	fpga_ipsec_ingress.create_flow_table =
+		mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->create_flow_table;
+	fpga_ipsec_ingress.destroy_flow_table =
+		mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->destroy_flow_table;
+	fpga_ipsec_ingress.modify_flow_table =
+		mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->modify_flow_table;
+	fpga_ipsec_ingress.create_flow_group =
+		mlx5_fpga_ipsec_fs_create_flow_group_ingress;
+	fpga_ipsec_ingress.destroy_flow_group =
+		 mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->destroy_flow_group;
+	fpga_ipsec_ingress.create_fte =
+		mlx5_fpga_ipsec_fs_create_fte_ingress;
+	fpga_ipsec_ingress.update_fte =
+		mlx5_fpga_ipsec_fs_update_fte_ingress;
+	fpga_ipsec_ingress.delete_fte =
+		mlx5_fpga_ipsec_fs_delete_fte_ingress;
+	fpga_ipsec_ingress.update_root_ft =
+		mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->update_root_ft;
+
+	/* egress */
+	fpga_ipsec_egress.create_flow_table =
+		mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->create_flow_table;
+	fpga_ipsec_egress.destroy_flow_table =
+		mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->destroy_flow_table;
+	fpga_ipsec_egress.modify_flow_table =
+		mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->modify_flow_table;
+	fpga_ipsec_egress.create_flow_group =
+		mlx5_fpga_ipsec_fs_create_flow_group_egress;
+	fpga_ipsec_egress.destroy_flow_group =
+		mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->destroy_flow_group;
+	fpga_ipsec_egress.create_fte =
+		mlx5_fpga_ipsec_fs_create_fte_egress;
+	fpga_ipsec_egress.update_fte =
+		mlx5_fpga_ipsec_fs_update_fte_egress;
+	fpga_ipsec_egress.delete_fte =
+		mlx5_fpga_ipsec_fs_delete_fte_egress;
+	fpga_ipsec_egress.update_root_ft =
+		mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->update_root_ft;
+}
+
+static int
+mlx5_fpga_esp_validate_xfrm_attrs(struct mlx5_core_dev *mdev,
+				  const struct mlx5_accel_esp_xfrm_attrs *attrs)
+{
+	if (attrs->tfc_pad) {
+		mlx5_core_err(mdev, "Cannot offload xfrm states with tfc padding\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (attrs->replay_type != MLX5_ACCEL_ESP_REPLAY_NONE) {
+		mlx5_core_err(mdev, "Cannot offload xfrm states with anti replay\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (attrs->keymat_type != MLX5_ACCEL_ESP_KEYMAT_AES_GCM) {
+		mlx5_core_err(mdev, "Only aes gcm keymat is supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (attrs->keymat.aes_gcm.iv_algo !=
+	    MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ) {
+		mlx5_core_err(mdev, "Only iv sequence algo is supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (attrs->keymat.aes_gcm.icv_len != 128) {
+		mlx5_core_err(mdev, "Cannot offload xfrm states with AEAD ICV length other than 128bit\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (attrs->keymat.aes_gcm.key_len != 128 &&
+	    attrs->keymat.aes_gcm.key_len != 256) {
+		mlx5_core_err(mdev, "Cannot offload xfrm states with AEAD key length other than 128/256 bit\n");
+		return -EOPNOTSUPP;
+	}
+
+	if ((attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED) &&
+	    (!MLX5_GET(ipsec_extended_cap, mdev->fpga->ipsec->caps,
+		       v2_command))) {
+		mlx5_core_err(mdev, "Cannot offload xfrm states with AEAD key length other than 128/256 bit\n");
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+struct mlx5_accel_esp_xfrm *
+mlx5_fpga_esp_create_xfrm(struct mlx5_core_dev *mdev,
+			  const struct mlx5_accel_esp_xfrm_attrs *attrs,
+			  u32 flags)
+{
+	struct mlx5_fpga_esp_xfrm *fpga_xfrm;
+
+	if (!(flags & MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA)) {
+		mlx5_core_warn(mdev, "Tried to create an esp action without metadata\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (mlx5_fpga_esp_validate_xfrm_attrs(mdev, attrs)) {
+		mlx5_core_warn(mdev, "Tried to create an esp with unsupported attrs\n");
+		return ERR_PTR(-EOPNOTSUPP);
+	}
+
+	fpga_xfrm = kzalloc(sizeof(*fpga_xfrm), GFP_KERNEL);
+	if (!fpga_xfrm)
+		return ERR_PTR(-ENOMEM);
+
+	mutex_init(&fpga_xfrm->lock);
+	memcpy(&fpga_xfrm->accel_xfrm.attrs, attrs,
+	       sizeof(fpga_xfrm->accel_xfrm.attrs));
+
+	return &fpga_xfrm->accel_xfrm;
+}
+
+void mlx5_fpga_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm)
+{
+	struct mlx5_fpga_esp_xfrm *fpga_xfrm =
+			container_of(xfrm, struct mlx5_fpga_esp_xfrm,
+				     accel_xfrm);
+	/* assuming no sa_ctx are connected to this xfrm_ctx */
+	kfree(fpga_xfrm);
+}
+
+int mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
+			      const struct mlx5_accel_esp_xfrm_attrs *attrs)
+{
+	struct mlx5_core_dev *mdev = xfrm->mdev;
+	struct mlx5_fpga_device *fdev = mdev->fpga;
+	struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
+	struct mlx5_fpga_esp_xfrm *fpga_xfrm;
+	struct mlx5_ifc_fpga_ipsec_sa org_hw_sa;
+
+	int err = 0;
+
+	if (!memcmp(&xfrm->attrs, attrs, sizeof(xfrm->attrs)))
+		return 0;
+
+	if (!mlx5_fpga_esp_validate_xfrm_attrs(mdev, attrs)) {
+		mlx5_core_warn(mdev, "Tried to create an esp with unsupported attrs\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (is_v2_sadb_supported(fipsec)) {
+		mlx5_core_warn(mdev, "Modify esp is not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	fpga_xfrm = container_of(xfrm, struct mlx5_fpga_esp_xfrm, accel_xfrm);
+
+	mutex_lock(&fpga_xfrm->lock);
+
+	if (!fpga_xfrm->sa_ctx)
+		/* Unbounded xfrm, chane only sw attrs */
+		goto change_sw_xfrm_attrs;
+
+	/* copy original hw sa */
+	memcpy(&org_hw_sa, &fpga_xfrm->sa_ctx->hw_sa, sizeof(org_hw_sa));
+	mutex_lock(&fipsec->sa_hash_lock);
+	/* remove original hw sa from hash */
+	WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash,
+				       &fpga_xfrm->sa_ctx->hash, rhash_sa));
+	/* update hw_sa with new xfrm attrs*/
+	mlx5_fpga_ipsec_build_hw_xfrm(xfrm->mdev, attrs,
+				      &fpga_xfrm->sa_ctx->hw_sa);
+	/* try to insert new hw_sa to hash */
+	err = rhashtable_insert_fast(&fipsec->sa_hash,
+				     &fpga_xfrm->sa_ctx->hash, rhash_sa);
+	if (err)
+		goto rollback_sa;
+
+	/* modify device with new hw_sa */
+	err = mlx5_fpga_ipsec_update_hw_sa(fdev, &fpga_xfrm->sa_ctx->hw_sa,
+					   MLX5_FPGA_IPSEC_CMD_OP_MOD_SA_V2);
+	fpga_xfrm->sa_ctx->hw_sa.ipsec_sa_v1.cmd = 0;
+	if (err)
+		WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash,
+					       &fpga_xfrm->sa_ctx->hash,
+					       rhash_sa));
+rollback_sa:
+	if (err) {
+		/* return original hw_sa to hash */
+		memcpy(&fpga_xfrm->sa_ctx->hw_sa, &org_hw_sa,
+		       sizeof(org_hw_sa));
+		WARN_ON(rhashtable_insert_fast(&fipsec->sa_hash,
+					       &fpga_xfrm->sa_ctx->hash,
+					       rhash_sa));
+	}
+	mutex_unlock(&fipsec->sa_hash_lock);
+
+change_sw_xfrm_attrs:
+	if (!err)
+		memcpy(&xfrm->attrs, attrs, sizeof(xfrm->attrs));
+	mutex_unlock(&fpga_xfrm->lock);
+	return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
index 26a3e4b56972..2b5e63b0d4d6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
@@ -35,33 +35,38 @@
 #define __MLX5_FPGA_IPSEC_H__
 
 #include "accel/ipsec.h"
+#include "fs_cmd.h"
 
 #ifdef CONFIG_MLX5_FPGA
 
-void *mlx5_fpga_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
-				  struct mlx5_accel_ipsec_sa *cmd);
-int mlx5_fpga_ipsec_sa_cmd_wait(void *context);
-
 u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev);
 unsigned int mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev);
 int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
 				  unsigned int counters_count);
 
+void *mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev,
+				    struct mlx5_accel_esp_xfrm *accel_xfrm,
+				    const __be32 saddr[4],
+				    const __be32 daddr[4],
+				    const __be32 spi, bool is_ipv6);
+void mlx5_fpga_ipsec_delete_sa_ctx(void *context);
+
 int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev);
 void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev);
+void mlx5_fpga_ipsec_build_fs_cmds(void);
 
-#else
+struct mlx5_accel_esp_xfrm *
+mlx5_fpga_esp_create_xfrm(struct mlx5_core_dev *mdev,
+			  const struct mlx5_accel_esp_xfrm_attrs *attrs,
+			  u32 flags);
+void mlx5_fpga_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm);
+int mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
+			      const struct mlx5_accel_esp_xfrm_attrs *attrs);
 
-static inline void *mlx5_fpga_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
-						struct mlx5_accel_ipsec_sa *cmd)
-{
-	return ERR_PTR(-EOPNOTSUPP);
-}
+const struct mlx5_flow_cmds *
+mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type);
 
-static inline int mlx5_fpga_ipsec_sa_cmd_wait(void *context)
-{
-	return -EOPNOTSUPP;
-}
+#else
 
 static inline u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev)
 {
@@ -80,6 +85,20 @@ static inline int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev,
 	return 0;
 }
 
+static inline void *
+mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev,
+			      struct mlx5_accel_esp_xfrm *accel_xfrm,
+			      const __be32 saddr[4],
+			      const __be32 daddr[4],
+			      const __be32 spi, bool is_ipv6)
+{
+	return NULL;
+}
+
+static inline void mlx5_fpga_ipsec_delete_sa_ctx(void *context)
+{
+}
+
 static inline int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev)
 {
 	return 0;
@@ -89,6 +108,35 @@ static inline void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev)
 {
 }
 
+static inline void mlx5_fpga_ipsec_build_fs_cmds(void)
+{
+}
+
+static inline struct mlx5_accel_esp_xfrm *
+mlx5_fpga_esp_create_xfrm(struct mlx5_core_dev *mdev,
+			  const struct mlx5_accel_esp_xfrm_attrs *attrs,
+			  u32 flags)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void mlx5_fpga_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm)
+{
+}
+
+static inline int
+mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
+			  const struct mlx5_accel_esp_xfrm_attrs *attrs)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline const struct mlx5_flow_cmds *
+mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type)
+{
+	return mlx5_fs_cmd_get_default(type);
+}
+
 #endif /* CONFIG_MLX5_FPGA */
 
 #endif	/* __MLX5_FPGA_SADB_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 881e2e55840c..645f83cac34d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -39,9 +39,81 @@
 #include "mlx5_core.h"
 #include "eswitch.h"
 
-int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
-			    struct mlx5_flow_table *ft, u32 underlay_qpn,
-			    bool disconnect)
+static int mlx5_cmd_stub_update_root_ft(struct mlx5_core_dev *dev,
+					struct mlx5_flow_table *ft,
+					u32 underlay_qpn,
+					bool disconnect)
+{
+	return 0;
+}
+
+static int mlx5_cmd_stub_create_flow_table(struct mlx5_core_dev *dev,
+					   u16 vport,
+					   enum fs_flow_table_op_mod op_mod,
+					   enum fs_flow_table_type type,
+					   unsigned int level,
+					   unsigned int log_size,
+					   struct mlx5_flow_table *next_ft,
+					   unsigned int *table_id, u32 flags)
+{
+	return 0;
+}
+
+static int mlx5_cmd_stub_destroy_flow_table(struct mlx5_core_dev *dev,
+					    struct mlx5_flow_table *ft)
+{
+	return 0;
+}
+
+static int mlx5_cmd_stub_modify_flow_table(struct mlx5_core_dev *dev,
+					   struct mlx5_flow_table *ft,
+					   struct mlx5_flow_table *next_ft)
+{
+	return 0;
+}
+
+static int mlx5_cmd_stub_create_flow_group(struct mlx5_core_dev *dev,
+					   struct mlx5_flow_table *ft,
+					   u32 *in,
+					   unsigned int *group_id)
+{
+	return 0;
+}
+
+static int mlx5_cmd_stub_destroy_flow_group(struct mlx5_core_dev *dev,
+					    struct mlx5_flow_table *ft,
+					    unsigned int group_id)
+{
+	return 0;
+}
+
+static int mlx5_cmd_stub_create_fte(struct mlx5_core_dev *dev,
+				    struct mlx5_flow_table *ft,
+				    struct mlx5_flow_group *group,
+				    struct fs_fte *fte)
+{
+	return 0;
+}
+
+static int mlx5_cmd_stub_update_fte(struct mlx5_core_dev *dev,
+				    struct mlx5_flow_table *ft,
+				    unsigned int group_id,
+				    int modify_mask,
+				    struct fs_fte *fte)
+{
+	return -EOPNOTSUPP;
+}
+
+static int mlx5_cmd_stub_delete_fte(struct mlx5_core_dev *dev,
+				    struct mlx5_flow_table *ft,
+				    struct fs_fte *fte)
+{
+	return 0;
+}
+
+static int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
+				   struct mlx5_flow_table *ft, u32 underlay_qpn,
+				   bool disconnect)
 {
 	u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)]   = {0};
 	u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {0};
@@ -71,12 +143,14 @@ int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
-int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
-			       u16 vport,
-			       enum fs_flow_table_op_mod op_mod,
-			       enum fs_flow_table_type type, unsigned int level,
-			       unsigned int log_size, struct mlx5_flow_table
-			       *next_ft, unsigned int *table_id, u32 flags)
+static int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
+				      u16 vport,
+				      enum fs_flow_table_op_mod op_mod,
+				      enum fs_flow_table_type type,
+				      unsigned int level,
+				      unsigned int log_size,
+				      struct mlx5_flow_table *next_ft,
+				      unsigned int *table_id, u32 flags)
 {
 	int en_encap_decap = !!(flags & MLX5_FLOW_TABLE_TUNNEL_EN);
 	u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {0};
@@ -125,8 +199,8 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
 	return err;
 }
 
-int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev,
-				struct mlx5_flow_table *ft)
+static int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev,
+				       struct mlx5_flow_table *ft)
 {
 	u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)]   = {0};
 	u32 out[MLX5_ST_SZ_DW(destroy_flow_table_out)] = {0};
@@ -143,9 +217,9 @@ int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev,
 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
-int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev,
-			       struct mlx5_flow_table *ft,
-			       struct mlx5_flow_table *next_ft)
+static int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev,
+				      struct mlx5_flow_table *ft,
+				      struct mlx5_flow_table *next_ft)
 {
 	u32 in[MLX5_ST_SZ_DW(modify_flow_table_in)]   = {0};
 	u32 out[MLX5_ST_SZ_DW(modify_flow_table_out)] = {0};
@@ -188,10 +262,10 @@ int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev,
 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
-int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev,
-			       struct mlx5_flow_table *ft,
-			       u32 *in,
-			       unsigned int *group_id)
+static int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev,
+				      struct mlx5_flow_table *ft,
+				      u32 *in,
+				      unsigned int *group_id)
 {
 	u32 out[MLX5_ST_SZ_DW(create_flow_group_out)] = {0};
 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
@@ -213,9 +287,9 @@ int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev,
 	return err;
 }
 
-int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev,
-				struct mlx5_flow_table *ft,
-				unsigned int group_id)
+static int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev,
+				       struct mlx5_flow_table *ft,
+				       unsigned int group_id)
 {
 	u32 out[MLX5_ST_SZ_DW(destroy_flow_group_out)] = {0};
 	u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)]   = {0};
@@ -266,16 +340,17 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
 
 	in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context);
 	MLX5_SET(flow_context, in_flow_context, group_id, group_id);
-	MLX5_SET(flow_context, in_flow_context, flow_tag, fte->flow_tag);
-	MLX5_SET(flow_context, in_flow_context, action, fte->action);
-	MLX5_SET(flow_context, in_flow_context, encap_id, fte->encap_id);
-	MLX5_SET(flow_context, in_flow_context, modify_header_id, fte->modify_id);
+	MLX5_SET(flow_context, in_flow_context, flow_tag, fte->action.flow_tag);
+	MLX5_SET(flow_context, in_flow_context, action, fte->action.action);
+	MLX5_SET(flow_context, in_flow_context, encap_id, fte->action.encap_id);
+	MLX5_SET(flow_context, in_flow_context, modify_header_id,
+		 fte->action.modify_id);
 	in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context,
 				      match_value);
 	memcpy(in_match_value, &fte->val, sizeof(fte->val));
 
 	in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination);
-	if (fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+	if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
 		int list_size = 0;
 
 		list_for_each_entry(dst, &fte->node.children, node.list) {
@@ -301,7 +376,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
 			 list_size);
 	}
 
-	if (fte->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+	if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
 		int max_list_size = BIT(MLX5_CAP_FLOWTABLE_TYPE(dev,
 					log_max_flow_counter,
 					ft->type));
@@ -332,19 +407,21 @@ err_out:
 	return err;
 }
 
-int mlx5_cmd_create_fte(struct mlx5_core_dev *dev,
-			struct mlx5_flow_table *ft,
-			unsigned group_id,
-			struct fs_fte *fte)
+static int mlx5_cmd_create_fte(struct mlx5_core_dev *dev,
+			       struct mlx5_flow_table *ft,
+			       struct mlx5_flow_group *group,
+			       struct fs_fte *fte)
 {
+	unsigned int group_id = group->id;
+
 	return mlx5_cmd_set_fte(dev, 0, 0, ft, group_id, fte);
 }
 
-int mlx5_cmd_update_fte(struct mlx5_core_dev *dev,
-			struct mlx5_flow_table *ft,
-			unsigned group_id,
-			int modify_mask,
-			struct fs_fte *fte)
+static int mlx5_cmd_update_fte(struct mlx5_core_dev *dev,
+			       struct mlx5_flow_table *ft,
+			       unsigned int group_id,
+			       int modify_mask,
+			       struct fs_fte *fte)
 {
 	int opmod;
 	int atomic_mod_cap = MLX5_CAP_FLOWTABLE(dev,
@@ -357,9 +434,9 @@ int mlx5_cmd_update_fte(struct mlx5_core_dev *dev,
 	return	mlx5_cmd_set_fte(dev, opmod, modify_mask, ft, group_id, fte);
 }
 
-int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev,
-			struct mlx5_flow_table *ft,
-			unsigned int index)
+static int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev,
+			       struct mlx5_flow_table *ft,
+			       struct fs_fte *fte)
 {
 	u32 out[MLX5_ST_SZ_DW(delete_fte_out)] = {0};
 	u32 in[MLX5_ST_SZ_DW(delete_fte_in)]   = {0};
@@ -367,7 +444,7 @@ int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev,
 	MLX5_SET(delete_fte_in, in, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY);
 	MLX5_SET(delete_fte_in, in, table_type, ft->type);
 	MLX5_SET(delete_fte_in, in, table_id, ft->id);
-	MLX5_SET(delete_fte_in, in, flow_index, index);
+	MLX5_SET(delete_fte_in, in, flow_index, fte->index);
 	if (ft->vport) {
 		MLX5_SET(delete_fte_in, in, vport_number, ft->vport);
 		MLX5_SET(delete_fte_in, in, other_vport, 1);
@@ -610,3 +687,53 @@ void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id)
 
 	mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
+
+static const struct mlx5_flow_cmds mlx5_flow_cmds = {
+	.create_flow_table = mlx5_cmd_create_flow_table,
+	.destroy_flow_table = mlx5_cmd_destroy_flow_table,
+	.modify_flow_table = mlx5_cmd_modify_flow_table,
+	.create_flow_group = mlx5_cmd_create_flow_group,
+	.destroy_flow_group = mlx5_cmd_destroy_flow_group,
+	.create_fte = mlx5_cmd_create_fte,
+	.update_fte = mlx5_cmd_update_fte,
+	.delete_fte = mlx5_cmd_delete_fte,
+	.update_root_ft = mlx5_cmd_update_root_ft,
+};
+
+static const struct mlx5_flow_cmds mlx5_flow_cmd_stubs = {
+	.create_flow_table = mlx5_cmd_stub_create_flow_table,
+	.destroy_flow_table = mlx5_cmd_stub_destroy_flow_table,
+	.modify_flow_table = mlx5_cmd_stub_modify_flow_table,
+	.create_flow_group = mlx5_cmd_stub_create_flow_group,
+	.destroy_flow_group = mlx5_cmd_stub_destroy_flow_group,
+	.create_fte = mlx5_cmd_stub_create_fte,
+	.update_fte = mlx5_cmd_stub_update_fte,
+	.delete_fte = mlx5_cmd_stub_delete_fte,
+	.update_root_ft = mlx5_cmd_stub_update_root_ft,
+};
+
+static const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void)
+{
+	return &mlx5_flow_cmds;
+}
+
+static const struct mlx5_flow_cmds *mlx5_fs_cmd_get_stub_cmds(void)
+{
+	return &mlx5_flow_cmd_stubs;
+}
+
+const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type type)
+{
+	switch (type) {
+	case FS_FT_NIC_RX:
+	case FS_FT_ESW_EGRESS_ACL:
+	case FS_FT_ESW_INGRESS_ACL:
+	case FS_FT_FDB:
+	case FS_FT_SNIFFER_RX:
+	case FS_FT_SNIFFER_TX:
+		return mlx5_fs_cmd_get_fw_cmds();
+	case FS_FT_NIC_TX:
+	default:
+		return mlx5_fs_cmd_get_stub_cmds();
+	}
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
index 71e2d0f37ad9..6228ba7bfa1a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
@@ -33,46 +33,52 @@
 #ifndef _MLX5_FS_CMD_
 #define _MLX5_FS_CMD_
 
-int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
-			       u16 vport,
-			       enum fs_flow_table_op_mod op_mod,
-			       enum fs_flow_table_type type, unsigned int level,
-			       unsigned int log_size, struct mlx5_flow_table
-			       *next_ft, unsigned int *table_id, u32 flags);
+#include "fs_core.h"
 
-int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev,
-				struct mlx5_flow_table *ft);
+struct mlx5_flow_cmds {
+	int (*create_flow_table)(struct mlx5_core_dev *dev,
+				 u16 vport,
+				 enum fs_flow_table_op_mod op_mod,
+				 enum fs_flow_table_type type,
+				 unsigned int level, unsigned int log_size,
+				 struct mlx5_flow_table *next_ft,
+				 unsigned int *table_id, u32 flags);
+	int (*destroy_flow_table)(struct mlx5_core_dev *dev,
+				  struct mlx5_flow_table *ft);
 
-int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev,
-			       struct mlx5_flow_table *ft,
-			       struct mlx5_flow_table *next_ft);
+	int (*modify_flow_table)(struct mlx5_core_dev *dev,
+				 struct mlx5_flow_table *ft,
+				 struct mlx5_flow_table *next_ft);
 
-int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev,
-			       struct mlx5_flow_table *ft,
-			       u32 *in, unsigned int *group_id);
+	int (*create_flow_group)(struct mlx5_core_dev *dev,
+				 struct mlx5_flow_table *ft,
+				 u32 *in,
+				 unsigned int *group_id);
 
-int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev,
-				struct mlx5_flow_table *ft,
-				unsigned int group_id);
+	int (*destroy_flow_group)(struct mlx5_core_dev *dev,
+				  struct mlx5_flow_table *ft,
+				  unsigned int group_id);
 
-int mlx5_cmd_create_fte(struct mlx5_core_dev *dev,
-			struct mlx5_flow_table *ft,
-			unsigned group_id,
-			struct fs_fte *fte);
+	int (*create_fte)(struct mlx5_core_dev *dev,
+			  struct mlx5_flow_table *ft,
+			  struct mlx5_flow_group *fg,
+			  struct fs_fte *fte);
 
-int mlx5_cmd_update_fte(struct mlx5_core_dev *dev,
-			struct mlx5_flow_table *ft,
-			unsigned group_id,
-			int modify_mask,
-			struct fs_fte *fte);
+	int (*update_fte)(struct mlx5_core_dev *dev,
+			  struct mlx5_flow_table *ft,
+			  unsigned int group_id,
+			  int modify_mask,
+			  struct fs_fte *fte);
 
-int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev,
-			struct mlx5_flow_table *ft,
-			unsigned int index);
+	int (*delete_fte)(struct mlx5_core_dev *dev,
+			  struct mlx5_flow_table *ft,
+			  struct fs_fte *fte);
 
-int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
-			    struct mlx5_flow_table *ft, u32 underlay_qpn,
-			    bool disconnect);
+	int (*update_root_ft)(struct mlx5_core_dev *dev,
+			      struct mlx5_flow_table *ft,
+			      u32 underlay_qpn,
+			      bool disconnect);
+};
 
 int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id);
 int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id);
@@ -90,4 +96,6 @@ void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev,
 			  struct mlx5_cmd_fc_bulk *b, u32 id,
 			  u64 *packets, u64 *bytes);
 
+const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type type);
+
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 31fc2cfac3b3..3ba07c7096ef 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -37,6 +37,8 @@
 #include "fs_core.h"
 #include "fs_cmd.h"
 #include "diag/fs_tracepoint.h"
+#include "accel/ipsec.h"
+#include "fpga/ipsec.h"
 
 #define INIT_TREE_NODE_ARRAY_SIZE(...)	(sizeof((struct init_tree_node[]){__VA_ARGS__}) /\
 					 sizeof(struct init_tree_node))
@@ -425,15 +427,17 @@ static void del_sw_prio(struct fs_node *node)
 
 static void del_hw_flow_table(struct fs_node *node)
 {
+	struct mlx5_flow_root_namespace *root;
 	struct mlx5_flow_table *ft;
 	struct mlx5_core_dev *dev;
 	int err;
 
 	fs_get_obj(ft, node);
 	dev = get_dev(&ft->node);
+	root = find_root(&ft->node);
 
 	if (node->active) {
-		err = mlx5_cmd_destroy_flow_table(dev, ft);
+		err = root->cmds->destroy_flow_table(dev, ft);
 		if (err)
 			mlx5_core_warn(dev, "flow steering can't destroy ft\n");
 	}
@@ -454,6 +458,7 @@ static void del_sw_flow_table(struct fs_node *node)
 
 static void del_sw_hw_rule(struct fs_node *node)
 {
+	struct mlx5_flow_root_namespace *root;
 	struct mlx5_flow_rule *rule;
 	struct mlx5_flow_table *ft;
 	struct mlx5_flow_group *fg;
@@ -477,19 +482,20 @@ static void del_sw_hw_rule(struct fs_node *node)
 	if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER  &&
 	    --fte->dests_size) {
 		modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION);
-		fte->action &= ~MLX5_FLOW_CONTEXT_ACTION_COUNT;
+		fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_COUNT;
 		update_fte = true;
 		goto out;
 	}
 
-	if ((fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) &&
+	if ((fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) &&
 	    --fte->dests_size) {
 		modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST),
 		update_fte = true;
 	}
 out:
+	root = find_root(&ft->node);
 	if (update_fte && fte->dests_size) {
-		err = mlx5_cmd_update_fte(dev, ft, fg->id, modify_mask, fte);
+		err = root->cmds->update_fte(dev, ft, fg->id, modify_mask, fte);
 		if (err)
 			mlx5_core_warn(dev,
 				       "%s can't del rule fg id=%d fte_index=%d\n",
@@ -500,6 +506,7 @@ out:
 
 static void del_hw_fte(struct fs_node *node)
 {
+	struct mlx5_flow_root_namespace *root;
 	struct mlx5_flow_table *ft;
 	struct mlx5_flow_group *fg;
 	struct mlx5_core_dev *dev;
@@ -512,9 +519,9 @@ static void del_hw_fte(struct fs_node *node)
 
 	trace_mlx5_fs_del_fte(fte);
 	dev = get_dev(&ft->node);
+	root = find_root(&ft->node);
 	if (node->active) {
-		err = mlx5_cmd_delete_fte(dev, ft,
-					  fte->index);
+		err = root->cmds->delete_fte(dev, ft, fte);
 		if (err)
 			mlx5_core_warn(dev,
 				       "flow steering can't delete fte in index %d of flow group id %d\n",
@@ -542,6 +549,7 @@ static void del_sw_fte(struct fs_node *node)
 
 static void del_hw_flow_group(struct fs_node *node)
 {
+	struct mlx5_flow_root_namespace *root;
 	struct mlx5_flow_group *fg;
 	struct mlx5_flow_table *ft;
 	struct mlx5_core_dev *dev;
@@ -551,7 +559,8 @@ static void del_hw_flow_group(struct fs_node *node)
 	dev = get_dev(&ft->node);
 	trace_mlx5_fs_del_fg(fg);
 
-	if (fg->node.active && mlx5_cmd_destroy_flow_group(dev, ft, fg->id))
+	root = find_root(&ft->node);
+	if (fg->node.active && root->cmds->destroy_flow_group(dev, ft, fg->id))
 		mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n",
 			       fg->id, ft->id);
 }
@@ -615,10 +624,7 @@ static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft,
 
 	memcpy(fte->val, match_value, sizeof(fte->val));
 	fte->node.type =  FS_TYPE_FLOW_ENTRY;
-	fte->flow_tag = flow_act->flow_tag;
-	fte->action = flow_act->action;
-	fte->encap_id = flow_act->encap_id;
-	fte->modify_id = flow_act->modify_id;
+	fte->action = *flow_act;
 
 	tree_init_node(&fte->node, del_hw_fte, del_sw_fte);
 
@@ -797,15 +803,14 @@ static int connect_fts_in_prio(struct mlx5_core_dev *dev,
 			       struct fs_prio *prio,
 			       struct mlx5_flow_table *ft)
 {
+	struct mlx5_flow_root_namespace *root = find_root(&prio->node);
 	struct mlx5_flow_table *iter;
 	int i = 0;
 	int err;
 
 	fs_for_each_ft(iter, prio) {
 		i++;
-		err = mlx5_cmd_modify_flow_table(dev,
-						 iter,
-						 ft);
+		err = root->cmds->modify_flow_table(dev, iter, ft);
 		if (err) {
 			mlx5_core_warn(dev, "Failed to modify flow table %d\n",
 				       iter->id);
@@ -853,12 +858,12 @@ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio
 	if (list_empty(&root->underlay_qpns)) {
 		/* Don't set any QPN (zero) in case QPN list is empty */
 		qpn = 0;
-		err = mlx5_cmd_update_root_ft(root->dev, ft, qpn, false);
+		err = root->cmds->update_root_ft(root->dev, ft, qpn, false);
 	} else {
 		list_for_each_entry(uqp, &root->underlay_qpns, list) {
 			qpn = uqp->qpn;
-			err = mlx5_cmd_update_root_ft(root->dev, ft, qpn,
-						      false);
+			err = root->cmds->update_root_ft(root->dev, ft,
+							 qpn, false);
 			if (err)
 				break;
 		}
@@ -877,6 +882,7 @@ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio
 static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule,
 					 struct mlx5_flow_destination *dest)
 {
+	struct mlx5_flow_root_namespace *root;
 	struct mlx5_flow_table *ft;
 	struct mlx5_flow_group *fg;
 	struct fs_fte *fte;
@@ -884,17 +890,16 @@ static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule,
 	int err = 0;
 
 	fs_get_obj(fte, rule->node.parent);
-	if (!(fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
+	if (!(fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
 		return -EINVAL;
 	down_write_ref_node(&fte->node);
 	fs_get_obj(fg, fte->node.parent);
 	fs_get_obj(ft, fg->node.parent);
 
 	memcpy(&rule->dest_attr, dest, sizeof(*dest));
-	err = mlx5_cmd_update_fte(get_dev(&ft->node),
-				  ft, fg->id,
-				  modify_mask,
-				  fte);
+	root = find_root(&ft->node);
+	err = root->cmds->update_fte(get_dev(&ft->node), ft, fg->id,
+				     modify_mask, fte);
 	up_write_ref_node(&fte->node);
 
 	return err;
@@ -1035,9 +1040,9 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
 	tree_init_node(&ft->node, del_hw_flow_table, del_sw_flow_table);
 	log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0;
 	next_ft = find_next_chained_ft(fs_prio);
-	err = mlx5_cmd_create_flow_table(root->dev, ft->vport, ft->op_mod, ft->type,
-					 ft->level, log_table_sz, next_ft, &ft->id,
-					 ft->flags);
+	err = root->cmds->create_flow_table(root->dev, ft->vport, ft->op_mod,
+					    ft->type, ft->level, log_table_sz,
+					    next_ft, &ft->id, ft->flags);
 	if (err)
 		goto free_ft;
 
@@ -1053,7 +1058,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
 	mutex_unlock(&root->chain_lock);
 	return ft;
 destroy_ft:
-	mlx5_cmd_destroy_flow_table(root->dev, ft);
+	root->cmds->destroy_flow_table(root->dev, ft);
 free_ft:
 	kfree(ft);
 unlock_root:
@@ -1125,6 +1130,7 @@ EXPORT_SYMBOL(mlx5_create_auto_grouped_flow_table);
 struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
 					       u32 *fg_in)
 {
+	struct mlx5_flow_root_namespace *root = find_root(&ft->node);
 	void *match_criteria = MLX5_ADDR_OF(create_flow_group_in,
 					    fg_in, match_criteria);
 	u8 match_criteria_enable = MLX5_GET(create_flow_group_in,
@@ -1152,7 +1158,7 @@ struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
 	if (IS_ERR(fg))
 		return fg;
 
-	err = mlx5_cmd_create_flow_group(dev, ft, fg_in, &fg->id);
+	err = root->cmds->create_flow_group(dev, ft, fg_in, &fg->id);
 	if (err) {
 		tree_put_node(&fg->node);
 		return ERR_PTR(err);
@@ -1275,6 +1281,7 @@ add_rule_fte(struct fs_fte *fte,
 	     int dest_num,
 	     bool update_action)
 {
+	struct mlx5_flow_root_namespace *root;
 	struct mlx5_flow_handle *handle;
 	struct mlx5_flow_table *ft;
 	int modify_mask = 0;
@@ -1290,12 +1297,13 @@ add_rule_fte(struct fs_fte *fte,
 		modify_mask |= BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION);
 
 	fs_get_obj(ft, fg->node.parent);
+	root = find_root(&fg->node);
 	if (!(fte->status & FS_FTE_STATUS_EXISTING))
-		err = mlx5_cmd_create_fte(get_dev(&ft->node),
-					  ft, fg->id, fte);
+		err = root->cmds->create_fte(get_dev(&ft->node),
+					     ft, fg, fte);
 	else
-		err = mlx5_cmd_update_fte(get_dev(&ft->node),
-					  ft, fg->id, modify_mask, fte);
+		err = root->cmds->update_fte(get_dev(&ft->node), ft, fg->id,
+						     modify_mask, fte);
 	if (err)
 		goto free_handle;
 
@@ -1360,6 +1368,7 @@ out:
 static int create_auto_flow_group(struct mlx5_flow_table *ft,
 				  struct mlx5_flow_group *fg)
 {
+	struct mlx5_flow_root_namespace *root = find_root(&ft->node);
 	struct mlx5_core_dev *dev = get_dev(&ft->node);
 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
 	void *match_criteria_addr;
@@ -1380,7 +1389,7 @@ static int create_auto_flow_group(struct mlx5_flow_table *ft,
 	memcpy(match_criteria_addr, fg->mask.match_criteria,
 	       sizeof(fg->mask.match_criteria));
 
-	err = mlx5_cmd_create_flow_group(dev, ft, in, &fg->id);
+	err = root->cmds->create_flow_group(dev, ft, in, &fg->id);
 	if (!err) {
 		fg->node.active = true;
 		trace_mlx5_fs_add_fg(fg);
@@ -1438,16 +1447,17 @@ static bool check_conflicting_actions(u32 action1, u32 action2)
 
 static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act *flow_act)
 {
-	if (check_conflicting_actions(flow_act->action, fte->action)) {
+	if (check_conflicting_actions(flow_act->action, fte->action.action)) {
 		mlx5_core_warn(get_dev(&fte->node),
 			       "Found two FTEs with conflicting actions\n");
 		return -EEXIST;
 	}
 
-	if (fte->flow_tag != flow_act->flow_tag) {
+	if (flow_act->has_flow_tag &&
+	    fte->action.flow_tag != flow_act->flow_tag) {
 		mlx5_core_warn(get_dev(&fte->node),
 			       "FTE flow tag %u already exists with different flow tag %u\n",
-			       fte->flow_tag,
+			       fte->action.flow_tag,
 			       flow_act->flow_tag);
 		return -EEXIST;
 	}
@@ -1471,12 +1481,12 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
 	if (ret)
 		return ERR_PTR(ret);
 
-	old_action = fte->action;
-	fte->action |= flow_act->action;
+	old_action = fte->action.action;
+	fte->action.action |= flow_act->action;
 	handle = add_rule_fte(fte, fg, dest, dest_num,
 			      old_action != flow_act->action);
 	if (IS_ERR(handle)) {
-		fte->action = old_action;
+		fte->action.action = old_action;
 		return handle;
 	}
 	trace_mlx5_fs_set_fte(fte, false);
@@ -1637,7 +1647,6 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft,
 
 	list_for_each_entry(iter, match_head, list) {
 		nested_down_read_ref_node(&iter->g->node, FS_LOCK_PARENT);
-		ida_pre_get(&iter->g->fte_allocator, GFP_KERNEL);
 	}
 
 search_again_locked:
@@ -1919,7 +1928,6 @@ static int update_root_ft_destroy(struct mlx5_flow_table *ft)
 		return 0;
 
 	new_root_ft = find_next_ft(ft);
-
 	if (!new_root_ft) {
 		root->root_ft = NULL;
 		return 0;
@@ -1928,13 +1936,14 @@ static int update_root_ft_destroy(struct mlx5_flow_table *ft)
 	if (list_empty(&root->underlay_qpns)) {
 		/* Don't set any QPN (zero) in case QPN list is empty */
 		qpn = 0;
-		err = mlx5_cmd_update_root_ft(root->dev, new_root_ft, qpn,
-					      false);
+		err = root->cmds->update_root_ft(root->dev, new_root_ft,
+						 qpn, false);
 	} else {
 		list_for_each_entry(uqp, &root->underlay_qpns, list) {
 			qpn = uqp->qpn;
-			err = mlx5_cmd_update_root_ft(root->dev, new_root_ft,
-						      qpn, false);
+			err = root->cmds->update_root_ft(root->dev,
+							 new_root_ft, qpn,
+							 false);
 			if (err)
 				break;
 		}
@@ -2046,6 +2055,11 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
 			return &steering->sniffer_tx_root_ns->ns;
 		else
 			return NULL;
+	case MLX5_FLOW_NAMESPACE_EGRESS:
+		if (steering->egress_root_ns)
+			return &steering->egress_root_ns->ns;
+		else
+			return NULL;
 	default:
 		return NULL;
 	}
@@ -2236,13 +2250,18 @@ static int init_root_tree(struct mlx5_flow_steering *steering,
 	return 0;
 }
 
-static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_flow_steering *steering,
-						       enum fs_flow_table_type
-						       table_type)
+static struct mlx5_flow_root_namespace
+*create_root_ns(struct mlx5_flow_steering *steering,
+		enum fs_flow_table_type table_type)
 {
+	const struct mlx5_flow_cmds *cmds = mlx5_fs_cmd_get_default(table_type);
 	struct mlx5_flow_root_namespace *root_ns;
 	struct mlx5_flow_namespace *ns;
 
+	if (mlx5_accel_ipsec_device_caps(steering->dev) & MLX5_ACCEL_IPSEC_CAP_DEVICE &&
+	    (table_type == FS_FT_NIC_RX || table_type == FS_FT_NIC_TX))
+		cmds = mlx5_fs_cmd_get_default_ipsec_fpga_cmds(table_type);
+
 	/* Create the root namespace */
 	root_ns = kvzalloc(sizeof(*root_ns), GFP_KERNEL);
 	if (!root_ns)
@@ -2250,6 +2269,7 @@ static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_flow_steering
 
 	root_ns->dev = steering->dev;
 	root_ns->table_type = table_type;
+	root_ns->cmds = cmds;
 
 	INIT_LIST_HEAD(&root_ns->underlay_qpns);
 
@@ -2408,6 +2428,7 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
 	cleanup_root_ns(steering->fdb_root_ns);
 	cleanup_root_ns(steering->sniffer_rx_root_ns);
 	cleanup_root_ns(steering->sniffer_tx_root_ns);
+	cleanup_root_ns(steering->egress_root_ns);
 	mlx5_cleanup_fc_stats(dev);
 	kmem_cache_destroy(steering->ftes_cache);
 	kmem_cache_destroy(steering->fgs_cache);
@@ -2553,6 +2574,20 @@ cleanup_root_ns:
 	return err;
 }
 
+static int init_egress_root_ns(struct mlx5_flow_steering *steering)
+{
+	struct fs_prio *prio;
+
+	steering->egress_root_ns = create_root_ns(steering,
+						  FS_FT_NIC_TX);
+	if (!steering->egress_root_ns)
+		return -ENOMEM;
+
+	/* create 1 prio*/
+	prio = fs_create_prio(&steering->egress_root_ns->ns, 0, 1);
+	return PTR_ERR_OR_ZERO(prio);
+}
+
 int mlx5_init_fs(struct mlx5_core_dev *dev)
 {
 	struct mlx5_flow_steering *steering;
@@ -2618,6 +2653,12 @@ int mlx5_init_fs(struct mlx5_core_dev *dev)
 			goto err;
 	}
 
+	if (MLX5_IPSEC_DEV(dev)) {
+		err = init_egress_root_ns(steering);
+		if (err)
+			goto err;
+	}
+
 	return 0;
 err:
 	mlx5_cleanup_fs(dev);
@@ -2641,7 +2682,8 @@ int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn)
 		goto update_ft_fail;
 	}
 
-	err = mlx5_cmd_update_root_ft(dev, root->root_ft, underlay_qpn, false);
+	err = root->cmds->update_root_ft(dev, root->root_ft, underlay_qpn,
+					 false);
 	if (err) {
 		mlx5_core_warn(dev, "Failed adding underlay QPN (%u) to root FT err(%d)\n",
 			       underlay_qpn, err);
@@ -2684,7 +2726,8 @@ int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn)
 		goto out;
 	}
 
-	err = mlx5_cmd_update_root_ft(dev, root->root_ft, underlay_qpn, true);
+	err = root->cmds->update_root_ft(dev, root->root_ft, underlay_qpn,
+					 true);
 	if (err)
 		mlx5_core_warn(dev, "Failed removing underlay QPN (%u) from root FT err(%d)\n",
 			       underlay_qpn, err);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 05262708f14b..e26d3e9d5f9f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -48,6 +48,7 @@ enum fs_node_type {
 
 enum fs_flow_table_type {
 	FS_FT_NIC_RX          = 0x0,
+	FS_FT_NIC_TX          = 0x1,
 	FS_FT_ESW_EGRESS_ACL  = 0x2,
 	FS_FT_ESW_INGRESS_ACL = 0x3,
 	FS_FT_FDB             = 0X4,
@@ -75,6 +76,7 @@ struct mlx5_flow_steering {
 	struct mlx5_flow_root_namespace **esw_ingress_root_ns;
 	struct mlx5_flow_root_namespace	*sniffer_tx_root_ns;
 	struct mlx5_flow_root_namespace	*sniffer_rx_root_ns;
+	struct mlx5_flow_root_namespace	*egress_root_ns;
 };
 
 struct fs_node {
@@ -174,11 +176,8 @@ struct fs_fte {
 	struct fs_node			node;
 	u32				val[MLX5_ST_SZ_DW_MATCH_PARAM];
 	u32				dests_size;
-	u32				flow_tag;
 	u32				index;
-	u32				action;
-	u32				encap_id;
-	u32				modify_id;
+	struct mlx5_flow_act		action;
 	enum fs_fte_status		status;
 	struct mlx5_fc			*counter;
 	struct rhash_head		hash;
@@ -224,6 +223,7 @@ struct mlx5_flow_root_namespace {
 	/* Should be held when chaining flow tables */
 	struct mutex			chain_lock;
 	struct list_head		underlay_qpns;
+	const struct mlx5_flow_cmds	*cmds;
 };
 
 int mlx5_init_fc_stats(struct mlx5_core_dev *dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index ae391e4b7070..13b6f66310c9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -58,6 +58,7 @@
 #include "eswitch.h"
 #include "lib/mlx5.h"
 #include "fpga/core.h"
+#include "fpga/ipsec.h"
 #include "accel/ipsec.h"
 #include "lib/clock.h"
 
@@ -942,9 +943,9 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
 		goto out;
 	}
 
-	err = mlx5_init_cq_table(dev);
+	err = mlx5_cq_debugfs_init(dev);
 	if (err) {
-		dev_err(&pdev->dev, "failed to initialize cq table\n");
+		dev_err(&pdev->dev, "failed to initialize cq debugfs\n");
 		goto err_eq_cleanup;
 	}
 
@@ -1002,7 +1003,7 @@ err_tables_cleanup:
 	mlx5_cleanup_mkey_table(dev);
 	mlx5_cleanup_srq_table(dev);
 	mlx5_cleanup_qp_table(dev);
-	mlx5_cleanup_cq_table(dev);
+	mlx5_cq_debugfs_cleanup(dev);
 
 err_eq_cleanup:
 	mlx5_eq_cleanup(dev);
@@ -1023,7 +1024,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
 	mlx5_cleanup_mkey_table(dev);
 	mlx5_cleanup_srq_table(dev);
 	mlx5_cleanup_qp_table(dev);
-	mlx5_cleanup_cq_table(dev);
+	mlx5_cq_debugfs_cleanup(dev);
 	mlx5_eq_cleanup(dev);
 }
 
@@ -1173,6 +1174,18 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 		goto err_affinity_hints;
 	}
 
+	err = mlx5_fpga_device_start(dev);
+	if (err) {
+		dev_err(&pdev->dev, "fpga device start failed %d\n", err);
+		goto err_fpga_start;
+	}
+
+	err = mlx5_accel_ipsec_init(dev);
+	if (err) {
+		dev_err(&pdev->dev, "IPSec device start failed %d\n", err);
+		goto err_ipsec_start;
+	}
+
 	err = mlx5_init_fs(dev);
 	if (err) {
 		dev_err(&pdev->dev, "Failed to init flow steering\n");
@@ -1191,17 +1204,6 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 		goto err_sriov;
 	}
 
-	err = mlx5_fpga_device_start(dev);
-	if (err) {
-		dev_err(&pdev->dev, "fpga device start failed %d\n", err);
-		goto err_fpga_start;
-	}
-	err = mlx5_accel_ipsec_init(dev);
-	if (err) {
-		dev_err(&pdev->dev, "IPSec device start failed %d\n", err);
-		goto err_ipsec_start;
-	}
-
 	if (mlx5_device_registered(dev)) {
 		mlx5_attach_device(dev);
 	} else {
@@ -1219,17 +1221,18 @@ out:
 	return 0;
 
 err_reg_dev:
-	mlx5_accel_ipsec_cleanup(dev);
-err_ipsec_start:
-	mlx5_fpga_device_stop(dev);
-
-err_fpga_start:
 	mlx5_sriov_detach(dev);
 
 err_sriov:
 	mlx5_cleanup_fs(dev);
 
 err_fs:
+	mlx5_accel_ipsec_cleanup(dev);
+
+err_ipsec_start:
+	mlx5_fpga_device_stop(dev);
+
+err_fpga_start:
 	mlx5_irq_clear_affinity_hints(dev);
 
 err_affinity_hints:
@@ -1296,11 +1299,10 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 	if (mlx5_device_registered(dev))
 		mlx5_detach_device(dev);
 
-	mlx5_accel_ipsec_cleanup(dev);
-	mlx5_fpga_device_stop(dev);
-
 	mlx5_sriov_detach(dev);
 	mlx5_cleanup_fs(dev);
+	mlx5_accel_ipsec_cleanup(dev);
+	mlx5_fpga_device_stop(dev);
 	mlx5_irq_clear_affinity_hints(dev);
 	free_comp_eqs(dev);
 	mlx5_stop_eqs(dev);
@@ -1657,6 +1659,7 @@ static int __init init(void)
 	get_random_bytes(&sw_owner_id, sizeof(sw_owner_id));
 
 	mlx5_core_verify_params();
+	mlx5_fpga_ipsec_build_fs_cmds();
 	mlx5_register_debugfs();
 
 	err = pci_register_driver(&mlx5_core_driver);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 394552f36fcf..4e25f2b2e0bc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -38,16 +38,11 @@
 #include <linux/sched.h>
 #include <linux/if_link.h>
 #include <linux/firmware.h>
+#include <linux/mlx5/cq.h>
 
 #define DRIVER_NAME "mlx5_core"
 #define DRIVER_VERSION "5.0-0"
 
-#define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs(mdev->pdev))
-#define MLX5_VPORT_MANAGER(mdev) \
-	(MLX5_CAP_GEN(mdev, vport_group_manager) && \
-	(MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \
-	 mlx5_core_is_pf(mdev))
-
 extern uint mlx5_core_debug_mask;
 
 #define mlx5_core_dbg(__dev, format, ...)				\
@@ -115,9 +110,29 @@ int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
 					u32 element_id);
 int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev);
 u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev);
+
+int mlx5_eq_init(struct mlx5_core_dev *dev);
+void mlx5_eq_cleanup(struct mlx5_core_dev *dev);
+int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
+		       int nent, u64 mask, const char *name,
+		       enum mlx5_eq_type type);
+int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
+int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
+int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+		       u32 *out, int outlen);
+int mlx5_start_eqs(struct mlx5_core_dev *dev);
+void mlx5_stop_eqs(struct mlx5_core_dev *dev);
 struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn);
 u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq);
 void mlx5_cq_tasklet_cb(unsigned long data);
+void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced);
+int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev);
+void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev);
+int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev);
+void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev);
 
 int mlx5_query_pcam_reg(struct mlx5_core_dev *dev, u32 *pcam, u8 feature_group,
 			u8 access_reg_group);
@@ -186,4 +201,5 @@ static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev)
 int mlx5_lag_allow(struct mlx5_core_dev *dev);
 int mlx5_lag_forbid(struct mlx5_core_dev *dev);
 
+void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol);
 #endif /* __MLX5_CORE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
index 6bcfc25350f5..ea66448ba365 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
@@ -41,7 +41,7 @@ u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq)
 
 u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq)
 {
-	return wq->sz_m1 + 1;
+	return wq->fbc.sz_m1 + 1;
 }
 
 u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq)
@@ -62,7 +62,7 @@ static u32 mlx5_wq_qp_get_byte_size(struct mlx5_wq_qp *wq)
 
 static u32 mlx5_cqwq_get_byte_size(struct mlx5_cqwq *wq)
 {
-	return mlx5_cqwq_get_size(wq) << wq->log_stride;
+	return mlx5_cqwq_get_size(wq) << wq->fbc.log_stride;
 }
 
 static u32 mlx5_wq_ll_get_byte_size(struct mlx5_wq_ll *wq)
@@ -92,7 +92,7 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		goto err_db_free;
 	}
 
-	wq->buf = wq_ctrl->buf.direct.buf;
+	wq->buf = wq_ctrl->buf.frags->buf;
 	wq->db  = wq_ctrl->db.db;
 
 	wq_ctrl->mdev = mdev;
@@ -130,7 +130,7 @@ int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		goto err_db_free;
 	}
 
-	wq->rq.buf = wq_ctrl->buf.direct.buf;
+	wq->rq.buf = wq_ctrl->buf.frags->buf;
 	wq->sq.buf = wq->rq.buf + mlx5_wq_cyc_get_byte_size(&wq->rq);
 	wq->rq.db  = &wq_ctrl->db.db[MLX5_RCV_DBR];
 	wq->sq.db  = &wq_ctrl->db.db[MLX5_SND_DBR];
@@ -151,11 +151,7 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 {
 	int err;
 
-	wq->log_stride	= 6 + MLX5_GET(cqc, cqc, cqe_sz);
-	wq->log_sz	= MLX5_GET(cqc, cqc, log_cq_size);
-	wq->sz_m1	= (1 << wq->log_sz) - 1;
-	wq->log_frag_strides = PAGE_SHIFT - wq->log_stride;
-	wq->frag_sz_m1	= (1 << wq->log_frag_strides) - 1;
+	mlx5_core_init_cq_frag_buf(&wq->fbc, cqc);
 
 	err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
 	if (err) {
@@ -172,7 +168,7 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		goto err_db_free;
 	}
 
-	wq->frag_buf = wq_ctrl->frag_buf;
+	wq->fbc.frag_buf = wq_ctrl->frag_buf;
 	wq->db  = wq_ctrl->db.db;
 
 	wq_ctrl->mdev = mdev;
@@ -209,7 +205,7 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		goto err_db_free;
 	}
 
-	wq->buf = wq_ctrl->buf.direct.buf;
+	wq->buf = wq_ctrl->buf.frags->buf;
 	wq->db  = wq_ctrl->db.db;
 
 	for (i = 0; i < wq->sz_m1; i++) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
index 718589d0cec2..fca90b94596d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
@@ -45,7 +45,7 @@ struct mlx5_wq_param {
 
 struct mlx5_wq_ctrl {
 	struct mlx5_core_dev	*mdev;
-	struct mlx5_buf		buf;
+	struct mlx5_frag_buf	buf;
 	struct mlx5_db		db;
 };
 
@@ -68,14 +68,9 @@ struct mlx5_wq_qp {
 };
 
 struct mlx5_cqwq {
-	struct mlx5_frag_buf	frag_buf;
-	__be32			*db;
-	u32			sz_m1;
-	u32			frag_sz_m1;
-	u32			cc; /* consumer counter */
-	u8			log_sz;
-	u8			log_stride;
-	u8			log_frag_strides;
+	struct mlx5_frag_buf_ctrl fbc;
+	__be32			  *db;
+	u32			  cc; /* consumer counter */
 };
 
 struct mlx5_wq_ll {
@@ -131,20 +126,17 @@ static inline int mlx5_wq_cyc_cc_bigger(u16 cc1, u16 cc2)
 
 static inline u32 mlx5_cqwq_get_ci(struct mlx5_cqwq *wq)
 {
-	return wq->cc & wq->sz_m1;
+	return wq->cc & wq->fbc.sz_m1;
 }
 
 static inline void *mlx5_cqwq_get_wqe(struct mlx5_cqwq *wq, u32 ix)
 {
-	unsigned int frag = (ix >> wq->log_frag_strides);
-
-	return wq->frag_buf.frags[frag].buf +
-		((wq->frag_sz_m1 & ix) << wq->log_stride);
+	return mlx5_frag_buf_get_wqe(&wq->fbc, ix);
 }
 
 static inline u32 mlx5_cqwq_get_wrap_cnt(struct mlx5_cqwq *wq)
 {
-	return wq->cc >> wq->log_sz;
+	return wq->cc >> wq->fbc.log_sz;
 }
 
 static inline void mlx5_cqwq_pop(struct mlx5_cqwq *wq)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
index d56eea310509..f4d9c9975ac3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
@@ -76,6 +76,8 @@ config MLXSW_SPECTRUM
 	depends on PSAMPLE || PSAMPLE=n
 	depends on BRIDGE || BRIDGE=n
 	depends on IPV6 || IPV6=n
+	depends on NET_IPGRE || NET_IPGRE=n
+	depends on IPV6_GRE || IPV6_GRE=n
 	select PARMAN
 	select MLXFW
 	default m
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile
index 9463c3fa254f..0cadcabfe86f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Makefile
+++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile
@@ -20,7 +20,7 @@ mlxsw_spectrum-objs		:= spectrum.o spectrum_buffers.o \
 				   spectrum_cnt.o spectrum_fid.o \
 				   spectrum_ipip.o spectrum_acl_flex_actions.o \
 				   spectrum_mr.o spectrum_mr_tcam.o \
-				   spectrum_qdisc.o
+				   spectrum_qdisc.o spectrum_span.o
 mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB)	+= spectrum_dcb.o
 mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o
 obj-$(CONFIG_MLXSW_MINIMAL)	+= mlxsw_minimal.o
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
index 996dc099cd58..3c0d882ba183 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
@@ -1,6 +1,6 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017, 2018 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2017 Jiri Pirko <jiri@mellanox.com>
  *
  * Redistribution and use in source and binary forms, with or without
@@ -849,7 +849,6 @@ struct mlxsw_afa_mirror {
 	struct mlxsw_afa_resource resource;
 	int span_id;
 	u8 local_in_port;
-	u8 local_out_port;
 	bool ingress;
 };
 
@@ -859,7 +858,7 @@ mlxsw_afa_mirror_destroy(struct mlxsw_afa_block *block,
 {
 	block->afa->ops->mirror_del(block->afa->ops_priv,
 				    mirror->local_in_port,
-				    mirror->local_out_port,
+				    mirror->span_id,
 				    mirror->ingress);
 	kfree(mirror);
 }
@@ -875,9 +874,8 @@ mlxsw_afa_mirror_destructor(struct mlxsw_afa_block *block,
 }
 
 static struct mlxsw_afa_mirror *
-mlxsw_afa_mirror_create(struct mlxsw_afa_block *block,
-			u8 local_in_port, u8 local_out_port,
-			bool ingress)
+mlxsw_afa_mirror_create(struct mlxsw_afa_block *block, u8 local_in_port,
+			const struct net_device *out_dev, bool ingress)
 {
 	struct mlxsw_afa_mirror *mirror;
 	int err;
@@ -887,13 +885,12 @@ mlxsw_afa_mirror_create(struct mlxsw_afa_block *block,
 		return ERR_PTR(-ENOMEM);
 
 	err = block->afa->ops->mirror_add(block->afa->ops_priv,
-					  local_in_port, local_out_port,
+					  local_in_port, out_dev,
 					  ingress, &mirror->span_id);
 	if (err)
 		goto err_mirror_add;
 
 	mirror->ingress = ingress;
-	mirror->local_out_port = local_out_port;
 	mirror->local_in_port = local_in_port;
 	mirror->resource.destructor = mlxsw_afa_mirror_destructor;
 	mlxsw_afa_resource_add(block, &mirror->resource);
@@ -920,13 +917,13 @@ mlxsw_afa_block_append_allocated_mirror(struct mlxsw_afa_block *block,
 }
 
 int
-mlxsw_afa_block_append_mirror(struct mlxsw_afa_block *block,
-			      u8 local_in_port, u8 local_out_port, bool ingress)
+mlxsw_afa_block_append_mirror(struct mlxsw_afa_block *block, u8 local_in_port,
+			      const struct net_device *out_dev, bool ingress)
 {
 	struct mlxsw_afa_mirror *mirror;
 	int err;
 
-	mirror = mlxsw_afa_mirror_create(block, local_in_port, local_out_port,
+	mirror = mlxsw_afa_mirror_create(block, local_in_port, out_dev,
 					 ingress);
 	if (IS_ERR(mirror))
 		return PTR_ERR(mirror);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
index b91f2b0829b0..3a155d104384 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
@@ -36,6 +36,7 @@
 #define _MLXSW_CORE_ACL_FLEX_ACTIONS_H
 
 #include <linux/types.h>
+#include <linux/netdevice.h>
 
 struct mlxsw_afa;
 struct mlxsw_afa_block;
@@ -48,9 +49,10 @@ struct mlxsw_afa_ops {
 	void (*kvdl_fwd_entry_del)(void *priv, u32 kvdl_index);
 	int (*counter_index_get)(void *priv, unsigned int *p_counter_index);
 	void (*counter_index_put)(void *priv, unsigned int counter_index);
-	int (*mirror_add)(void *priv, u8 locol_in_port, u8 local_out_port,
+	int (*mirror_add)(void *priv, u8 local_in_port,
+			  const struct net_device *out_dev,
 			  bool ingress, int *p_span_id);
-	void (*mirror_del)(void *priv, u8 locol_in_port, u8 local_out_port,
+	void (*mirror_del)(void *priv, u8 local_in_port, int span_id,
 			   bool ingress);
 };
 
@@ -71,7 +73,8 @@ int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id);
 int mlxsw_afa_block_append_trap_and_forward(struct mlxsw_afa_block *block,
 					    u16 trap_id);
 int mlxsw_afa_block_append_mirror(struct mlxsw_afa_block *block,
-				  u8 local_in_port, u8 local_out_port,
+				  u8 local_in_port,
+				  const struct net_device *out_dev,
 				  bool ingress);
 int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block,
 			       u8 local_port, bool in_port);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index 85faa87bf42d..e30c6ce3dcb4 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -1519,8 +1519,7 @@ static int mlxsw_pci_cmd_exec(void *bus_priv, u16 opcode, u8 opcode_mod,
 			      u8 *p_status)
 {
 	struct mlxsw_pci *mlxsw_pci = bus_priv;
-	dma_addr_t in_mapaddr = mlxsw_pci->cmd.in_mbox.mapaddr;
-	dma_addr_t out_mapaddr = mlxsw_pci->cmd.out_mbox.mapaddr;
+	dma_addr_t in_mapaddr = 0, out_mapaddr = 0;
 	bool evreq = mlxsw_pci->cmd.nopoll;
 	unsigned long timeout = msecs_to_jiffies(MLXSW_PCI_CIR_TIMEOUT_MSECS);
 	bool *p_wait_done = &mlxsw_pci->cmd.wait_done;
@@ -1532,11 +1531,15 @@ static int mlxsw_pci_cmd_exec(void *bus_priv, u16 opcode, u8 opcode_mod,
 	if (err)
 		return err;
 
-	if (in_mbox)
+	if (in_mbox) {
 		memcpy(mlxsw_pci->cmd.in_mbox.buf, in_mbox, in_mbox_size);
+		in_mapaddr = mlxsw_pci->cmd.in_mbox.mapaddr;
+	}
 	mlxsw_pci_write32(mlxsw_pci, CIR_IN_PARAM_HI, upper_32_bits(in_mapaddr));
 	mlxsw_pci_write32(mlxsw_pci, CIR_IN_PARAM_LO, lower_32_bits(in_mapaddr));
 
+	if (out_mbox)
+		out_mapaddr = mlxsw_pci->cmd.out_mbox.mapaddr;
 	mlxsw_pci_write32(mlxsw_pci, CIR_OUT_PARAM_HI, upper_32_bits(out_mapaddr));
 	mlxsw_pci_write32(mlxsw_pci, CIR_OUT_PARAM_LO, lower_32_bits(out_mapaddr));
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 0e08be41c8e0..e002398364c8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -1,11 +1,11 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/reg.h
- * Copyright (c) 2015-2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2015-2016 Ido Schimmel <idosch@mellanox.com>
  * Copyright (c) 2015 Elad Raz <eladr@mellanox.com>
  * Copyright (c) 2015-2017 Jiri Pirko <jiri@mellanox.com>
  * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
- * Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
+ * Copyright (c) 2017-2018 Petr Machata <petrm@mellanox.com>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -2872,6 +2872,14 @@ static inline void mlxsw_reg_pmtu_pack(char *payload, u8 local_port,
 
 MLXSW_REG_DEFINE(ptys, MLXSW_REG_PTYS_ID, MLXSW_REG_PTYS_LEN);
 
+/* an_disable_admin
+ * Auto negotiation disable administrative configuration
+ * 0 - Device doesn't support AN disable.
+ * 1 - Device supports AN disable.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ptys, an_disable_admin, 0x00, 30, 1);
+
 /* reg_ptys_local_port
  * Local port number.
  * Access: Index
@@ -3000,12 +3008,13 @@ MLXSW_ITEM32(reg, ptys, ib_proto_oper, 0x28, 0, 16);
 MLXSW_ITEM32(reg, ptys, eth_proto_lp_advertise, 0x30, 0, 32);
 
 static inline void mlxsw_reg_ptys_eth_pack(char *payload, u8 local_port,
-					   u32 proto_admin)
+					   u32 proto_admin, bool autoneg)
 {
 	MLXSW_REG_ZERO(ptys, payload);
 	mlxsw_reg_ptys_local_port_set(payload, local_port);
 	mlxsw_reg_ptys_proto_mask_set(payload, MLXSW_REG_PTYS_PROTO_MASK_ETH);
 	mlxsw_reg_ptys_eth_proto_admin_set(payload, proto_admin);
+	mlxsw_reg_ptys_an_disable_admin_set(payload, !autoneg);
 }
 
 static inline void mlxsw_reg_ptys_eth_unpack(char *payload,
@@ -6772,8 +6781,104 @@ MLXSW_ITEM32(reg, mpat, qos, 0x04, 26, 1);
  */
 MLXSW_ITEM32(reg, mpat, be, 0x04, 25, 1);
 
+enum mlxsw_reg_mpat_span_type {
+	/* Local SPAN Ethernet.
+	 * The original packet is not encapsulated.
+	 */
+	MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH = 0x0,
+
+	/* Encapsulated Remote SPAN Ethernet L3 GRE.
+	 * The packet is encapsulated with GRE header.
+	 */
+	MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3 = 0x3,
+};
+
+/* reg_mpat_span_type
+ * SPAN type.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, span_type, 0x04, 0, 4);
+
+/* Remote SPAN - Ethernet VLAN
+ * - - - - - - - - - - - - - -
+ */
+
+/* reg_mpat_eth_rspan_vid
+ * Encapsulation header VLAN ID.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_vid, 0x18, 0, 12);
+
+/* Encapsulated Remote SPAN - Ethernet L2
+ * - - - - - - - - - - - - - - - - - - -
+ */
+
+enum mlxsw_reg_mpat_eth_rspan_version {
+	MLXSW_REG_MPAT_ETH_RSPAN_VERSION_NO_HEADER = 15,
+};
+
+/* reg_mpat_eth_rspan_version
+ * RSPAN mirror header version.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_version, 0x10, 18, 4);
+
+/* reg_mpat_eth_rspan_mac
+ * Destination MAC address.
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, mpat, eth_rspan_mac, 0x12, 6);
+
+/* reg_mpat_eth_rspan_tp
+ * Tag Packet. Indicates whether the mirroring header should be VLAN tagged.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_tp, 0x18, 16, 1);
+
+/* Encapsulated Remote SPAN - Ethernet L3
+ * - - - - - - - - - - - - - - - - - - -
+ */
+
+enum mlxsw_reg_mpat_eth_rspan_protocol {
+	MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV4,
+	MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV6,
+};
+
+/* reg_mpat_eth_rspan_protocol
+ * SPAN encapsulation protocol.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_protocol, 0x18, 24, 4);
+
+/* reg_mpat_eth_rspan_ttl
+ * Encapsulation header Time-to-Live/HopLimit.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_ttl, 0x1C, 4, 8);
+
+/* reg_mpat_eth_rspan_smac
+ * Source MAC address
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, mpat, eth_rspan_smac, 0x22, 6);
+
+/* reg_mpat_eth_rspan_dip*
+ * Destination IP address. The IP version is configured by protocol.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_dip4, 0x4C, 0, 32);
+MLXSW_ITEM_BUF(reg, mpat, eth_rspan_dip6, 0x40, 16);
+
+/* reg_mpat_eth_rspan_sip*
+ * Source IP address. The IP version is configured by protocol.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_sip4, 0x5C, 0, 32);
+MLXSW_ITEM_BUF(reg, mpat, eth_rspan_sip6, 0x50, 16);
+
 static inline void mlxsw_reg_mpat_pack(char *payload, u8 pa_id,
-				       u16 system_port, bool e)
+				       u16 system_port, bool e,
+				       enum mlxsw_reg_mpat_span_type span_type)
 {
 	MLXSW_REG_ZERO(mpat, payload);
 	mlxsw_reg_mpat_pa_id_set(payload, pa_id);
@@ -6781,6 +6886,49 @@ static inline void mlxsw_reg_mpat_pack(char *payload, u8 pa_id,
 	mlxsw_reg_mpat_e_set(payload, e);
 	mlxsw_reg_mpat_qos_set(payload, 1);
 	mlxsw_reg_mpat_be_set(payload, 1);
+	mlxsw_reg_mpat_span_type_set(payload, span_type);
+}
+
+static inline void mlxsw_reg_mpat_eth_rspan_pack(char *payload, u16 vid)
+{
+	mlxsw_reg_mpat_eth_rspan_vid_set(payload, vid);
+}
+
+static inline void
+mlxsw_reg_mpat_eth_rspan_l2_pack(char *payload,
+				 enum mlxsw_reg_mpat_eth_rspan_version version,
+				 const char *mac,
+				 bool tp)
+{
+	mlxsw_reg_mpat_eth_rspan_version_set(payload, version);
+	mlxsw_reg_mpat_eth_rspan_mac_memcpy_to(payload, mac);
+	mlxsw_reg_mpat_eth_rspan_tp_set(payload, tp);
+}
+
+static inline void
+mlxsw_reg_mpat_eth_rspan_l3_ipv4_pack(char *payload, u8 ttl,
+				      const char *smac,
+				      u32 sip, u32 dip)
+{
+	mlxsw_reg_mpat_eth_rspan_ttl_set(payload, ttl);
+	mlxsw_reg_mpat_eth_rspan_smac_memcpy_to(payload, smac);
+	mlxsw_reg_mpat_eth_rspan_protocol_set(payload,
+				    MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV4);
+	mlxsw_reg_mpat_eth_rspan_sip4_set(payload, sip);
+	mlxsw_reg_mpat_eth_rspan_dip4_set(payload, dip);
+}
+
+static inline void
+mlxsw_reg_mpat_eth_rspan_l3_ipv6_pack(char *payload, u8 ttl,
+				      const char *smac,
+				      struct in6_addr sip, struct in6_addr dip)
+{
+	mlxsw_reg_mpat_eth_rspan_ttl_set(payload, ttl);
+	mlxsw_reg_mpat_eth_rspan_smac_memcpy_to(payload, smac);
+	mlxsw_reg_mpat_eth_rspan_protocol_set(payload,
+				    MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV6);
+	mlxsw_reg_mpat_eth_rspan_sip6_memcpy_to(payload, (void *)&sip);
+	mlxsw_reg_mpat_eth_rspan_dip6_memcpy_to(payload, (void *)&dip);
 }
 
 /* MPAR - Monitoring Port Analyzer Register
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index bf400c75fcc8..7885fc475f7e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -1,6 +1,6 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/spectrum.c
- * Copyright (c) 2015-2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2015-2017 Jiri Pirko <jiri@mellanox.com>
  * Copyright (c) 2015 Ido Schimmel <idosch@mellanox.com>
  * Copyright (c) 2015 Elad Raz <eladr@mellanox.com>
@@ -71,11 +71,12 @@
 #include "spectrum_cnt.h"
 #include "spectrum_dpipe.h"
 #include "spectrum_acl_flex_actions.h"
+#include "spectrum_span.h"
 #include "../mlxfw/mlxfw.h"
 
 #define MLXSW_FWREV_MAJOR 13
-#define MLXSW_FWREV_MINOR 1530
-#define MLXSW_FWREV_SUBMINOR 152
+#define MLXSW_FWREV_MINOR 1620
+#define MLXSW_FWREV_SUBMINOR 192
 #define MLXSW_FWREV_MINOR_TO_BRANCH(minor) ((minor) / 100)
 
 #define MLXSW_SP_FW_FILENAME \
@@ -487,347 +488,6 @@ static int mlxsw_sp_base_mac_get(struct mlxsw_sp *mlxsw_sp)
 	return 0;
 }
 
-static int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp)
-{
-	int i;
-
-	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_SPAN))
-		return -EIO;
-
-	mlxsw_sp->span.entries_count = MLXSW_CORE_RES_GET(mlxsw_sp->core,
-							  MAX_SPAN);
-	mlxsw_sp->span.entries = kcalloc(mlxsw_sp->span.entries_count,
-					 sizeof(struct mlxsw_sp_span_entry),
-					 GFP_KERNEL);
-	if (!mlxsw_sp->span.entries)
-		return -ENOMEM;
-
-	for (i = 0; i < mlxsw_sp->span.entries_count; i++)
-		INIT_LIST_HEAD(&mlxsw_sp->span.entries[i].bound_ports_list);
-
-	return 0;
-}
-
-static void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp)
-{
-	int i;
-
-	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
-		struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
-
-		WARN_ON_ONCE(!list_empty(&curr->bound_ports_list));
-	}
-	kfree(mlxsw_sp->span.entries);
-}
-
-static struct mlxsw_sp_span_entry *
-mlxsw_sp_span_entry_create(struct mlxsw_sp_port *port)
-{
-	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
-	struct mlxsw_sp_span_entry *span_entry;
-	char mpat_pl[MLXSW_REG_MPAT_LEN];
-	u8 local_port = port->local_port;
-	int index;
-	int i;
-	int err;
-
-	/* find a free entry to use */
-	index = -1;
-	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
-		if (!mlxsw_sp->span.entries[i].used) {
-			index = i;
-			span_entry = &mlxsw_sp->span.entries[i];
-			break;
-		}
-	}
-	if (index < 0)
-		return NULL;
-
-	/* create a new port analayzer entry for local_port */
-	mlxsw_reg_mpat_pack(mpat_pl, index, local_port, true);
-	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
-	if (err)
-		return NULL;
-
-	span_entry->used = true;
-	span_entry->id = index;
-	span_entry->ref_count = 1;
-	span_entry->local_port = local_port;
-	return span_entry;
-}
-
-static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp *mlxsw_sp,
-					struct mlxsw_sp_span_entry *span_entry)
-{
-	u8 local_port = span_entry->local_port;
-	char mpat_pl[MLXSW_REG_MPAT_LEN];
-	int pa_id = span_entry->id;
-
-	mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, false);
-	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
-	span_entry->used = false;
-}
-
-struct mlxsw_sp_span_entry *
-mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port)
-{
-	int i;
-
-	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
-		struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
-
-		if (curr->used && curr->local_port == local_port)
-			return curr;
-	}
-	return NULL;
-}
-
-static struct mlxsw_sp_span_entry
-*mlxsw_sp_span_entry_get(struct mlxsw_sp_port *port)
-{
-	struct mlxsw_sp_span_entry *span_entry;
-
-	span_entry = mlxsw_sp_span_entry_find(port->mlxsw_sp,
-					      port->local_port);
-	if (span_entry) {
-		/* Already exists, just take a reference */
-		span_entry->ref_count++;
-		return span_entry;
-	}
-
-	return mlxsw_sp_span_entry_create(port);
-}
-
-static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp,
-				   struct mlxsw_sp_span_entry *span_entry)
-{
-	WARN_ON(!span_entry->ref_count);
-	if (--span_entry->ref_count == 0)
-		mlxsw_sp_span_entry_destroy(mlxsw_sp, span_entry);
-	return 0;
-}
-
-static bool mlxsw_sp_span_is_egress_mirror(struct mlxsw_sp_port *port)
-{
-	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
-	struct mlxsw_sp_span_inspected_port *p;
-	int i;
-
-	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
-		struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
-
-		list_for_each_entry(p, &curr->bound_ports_list, list)
-			if (p->local_port == port->local_port &&
-			    p->type == MLXSW_SP_SPAN_EGRESS)
-				return true;
-	}
-
-	return false;
-}
-
-static int mlxsw_sp_span_mtu_to_buffsize(const struct mlxsw_sp *mlxsw_sp,
-					 int mtu)
-{
-	return mlxsw_sp_bytes_cells(mlxsw_sp, mtu * 5 / 2) + 1;
-}
-
-static int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu)
-{
-	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
-	char sbib_pl[MLXSW_REG_SBIB_LEN];
-	int err;
-
-	/* If port is egress mirrored, the shared buffer size should be
-	 * updated according to the mtu value
-	 */
-	if (mlxsw_sp_span_is_egress_mirror(port)) {
-		u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp, mtu);
-
-		mlxsw_reg_sbib_pack(sbib_pl, port->local_port, buffsize);
-		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
-		if (err) {
-			netdev_err(port->dev, "Could not update shared buffer for mirroring\n");
-			return err;
-		}
-	}
-
-	return 0;
-}
-
-static struct mlxsw_sp_span_inspected_port *
-mlxsw_sp_span_entry_bound_port_find(struct mlxsw_sp_span_entry *span_entry,
-				    enum mlxsw_sp_span_type type,
-				    struct mlxsw_sp_port *port,
-				    bool bind)
-{
-	struct mlxsw_sp_span_inspected_port *p;
-
-	list_for_each_entry(p, &span_entry->bound_ports_list, list)
-		if (type == p->type &&
-		    port->local_port == p->local_port &&
-		    bind == p->bound)
-			return p;
-	return NULL;
-}
-
-static int
-mlxsw_sp_span_inspected_port_bind(struct mlxsw_sp_port *port,
-				  struct mlxsw_sp_span_entry *span_entry,
-				  enum mlxsw_sp_span_type type,
-				  bool bind)
-{
-	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
-	char mpar_pl[MLXSW_REG_MPAR_LEN];
-	int pa_id = span_entry->id;
-
-	/* bind the port to the SPAN entry */
-	mlxsw_reg_mpar_pack(mpar_pl, port->local_port,
-			    (enum mlxsw_reg_mpar_i_e) type, bind, pa_id);
-	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpar), mpar_pl);
-}
-
-static int
-mlxsw_sp_span_inspected_port_add(struct mlxsw_sp_port *port,
-				 struct mlxsw_sp_span_entry *span_entry,
-				 enum mlxsw_sp_span_type type,
-				 bool bind)
-{
-	struct mlxsw_sp_span_inspected_port *inspected_port;
-	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
-	char sbib_pl[MLXSW_REG_SBIB_LEN];
-	int i;
-	int err;
-
-	/* A given (source port, direction) can only be bound to one analyzer,
-	 * so if a binding is requested, check for conflicts.
-	 */
-	if (bind)
-		for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
-			struct mlxsw_sp_span_entry *curr =
-				&mlxsw_sp->span.entries[i];
-
-			if (mlxsw_sp_span_entry_bound_port_find(curr, type,
-								port, bind))
-				return -EEXIST;
-		}
-
-	/* if it is an egress SPAN, bind a shared buffer to it */
-	if (type == MLXSW_SP_SPAN_EGRESS) {
-		u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp,
-							     port->dev->mtu);
-
-		mlxsw_reg_sbib_pack(sbib_pl, port->local_port, buffsize);
-		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
-		if (err) {
-			netdev_err(port->dev, "Could not create shared buffer for mirroring\n");
-			return err;
-		}
-	}
-
-	if (bind) {
-		err = mlxsw_sp_span_inspected_port_bind(port, span_entry, type,
-							true);
-		if (err)
-			goto err_port_bind;
-	}
-
-	inspected_port = kzalloc(sizeof(*inspected_port), GFP_KERNEL);
-	if (!inspected_port) {
-		err = -ENOMEM;
-		goto err_inspected_port_alloc;
-	}
-	inspected_port->local_port = port->local_port;
-	inspected_port->type = type;
-	inspected_port->bound = bind;
-	list_add_tail(&inspected_port->list, &span_entry->bound_ports_list);
-
-	return 0;
-
-err_inspected_port_alloc:
-	if (bind)
-		mlxsw_sp_span_inspected_port_bind(port, span_entry, type,
-						  false);
-err_port_bind:
-	if (type == MLXSW_SP_SPAN_EGRESS) {
-		mlxsw_reg_sbib_pack(sbib_pl, port->local_port, 0);
-		mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
-	}
-	return err;
-}
-
-static void
-mlxsw_sp_span_inspected_port_del(struct mlxsw_sp_port *port,
-				 struct mlxsw_sp_span_entry *span_entry,
-				 enum mlxsw_sp_span_type type,
-				 bool bind)
-{
-	struct mlxsw_sp_span_inspected_port *inspected_port;
-	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
-	char sbib_pl[MLXSW_REG_SBIB_LEN];
-
-	inspected_port = mlxsw_sp_span_entry_bound_port_find(span_entry, type,
-							     port, bind);
-	if (!inspected_port)
-		return;
-
-	if (bind)
-		mlxsw_sp_span_inspected_port_bind(port, span_entry, type,
-						  false);
-	/* remove the SBIB buffer if it was egress SPAN */
-	if (type == MLXSW_SP_SPAN_EGRESS) {
-		mlxsw_reg_sbib_pack(sbib_pl, port->local_port, 0);
-		mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
-	}
-
-	mlxsw_sp_span_entry_put(mlxsw_sp, span_entry);
-
-	list_del(&inspected_port->list);
-	kfree(inspected_port);
-}
-
-int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
-			     struct mlxsw_sp_port *to,
-			     enum mlxsw_sp_span_type type, bool bind)
-{
-	struct mlxsw_sp *mlxsw_sp = from->mlxsw_sp;
-	struct mlxsw_sp_span_entry *span_entry;
-	int err;
-
-	span_entry = mlxsw_sp_span_entry_get(to);
-	if (!span_entry)
-		return -ENOENT;
-
-	netdev_dbg(from->dev, "Adding inspected port to SPAN entry %d\n",
-		   span_entry->id);
-
-	err = mlxsw_sp_span_inspected_port_add(from, span_entry, type, bind);
-	if (err)
-		goto err_port_bind;
-
-	return 0;
-
-err_port_bind:
-	mlxsw_sp_span_entry_put(mlxsw_sp, span_entry);
-	return err;
-}
-
-void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, u8 destination_port,
-			      enum mlxsw_sp_span_type type, bool bind)
-{
-	struct mlxsw_sp_span_entry *span_entry;
-
-	span_entry = mlxsw_sp_span_entry_find(from->mlxsw_sp,
-					      destination_port);
-	if (!span_entry) {
-		netdev_err(from->dev, "no span entry found\n");
-		return;
-	}
-
-	netdev_dbg(from->dev, "removing inspected port from SPAN entry %d\n",
-		   span_entry->id);
-	mlxsw_sp_span_inspected_port_del(from, span_entry, type, bind);
-}
-
 static int mlxsw_sp_port_sample_set(struct mlxsw_sp_port *mlxsw_sp_port,
 				    bool enable, u32 rate)
 {
@@ -1380,6 +1040,16 @@ mlxsw_sp_port_get_hw_xstats(struct net_device *dev,
 		xstats->tail_drop[i] =
 			mlxsw_reg_ppcnt_tc_no_buffer_discard_uc_get(ppcnt_pl);
 	}
+
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+		err = mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_PRIO_CNT,
+						  i, ppcnt_pl);
+		if (err)
+			continue;
+
+		xstats->tx_packets[i] = mlxsw_reg_ppcnt_tx_frames_get(ppcnt_pl);
+		xstats->tx_bytes[i] = mlxsw_reg_ppcnt_tx_octets_get(ppcnt_pl);
+	}
 }
 
 static void update_stats_cache(struct work_struct *work)
@@ -1604,7 +1274,6 @@ mlxsw_sp_port_add_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port,
 				      bool ingress)
 {
 	enum mlxsw_sp_span_type span_type;
-	struct mlxsw_sp_port *to_port;
 	struct net_device *to_dev;
 
 	to_dev = tcf_mirred_dev(a);
@@ -1613,17 +1282,10 @@ mlxsw_sp_port_add_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port,
 		return -EINVAL;
 	}
 
-	if (!mlxsw_sp_port_dev_check(to_dev)) {
-		netdev_err(mlxsw_sp_port->dev, "Cannot mirror to a non-spectrum port");
-		return -EOPNOTSUPP;
-	}
-	to_port = netdev_priv(to_dev);
-
-	mirror->to_local_port = to_port->local_port;
 	mirror->ingress = ingress;
 	span_type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
-	return mlxsw_sp_span_mirror_add(mlxsw_sp_port, to_port, span_type,
-					true);
+	return mlxsw_sp_span_mirror_add(mlxsw_sp_port, to_dev, span_type,
+					true, &mirror->span_id);
 }
 
 static void
@@ -1634,7 +1296,7 @@ mlxsw_sp_port_del_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port,
 
 	span_type = mirror->ingress ?
 			MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
-	mlxsw_sp_span_mirror_del(mlxsw_sp_port, mirror->to_local_port,
+	mlxsw_sp_span_mirror_del(mlxsw_sp_port, mirror->span_id,
 				 span_type, true);
 }
 
@@ -2728,7 +2390,7 @@ static int mlxsw_sp_port_get_link_ksettings(struct net_device *dev,
 	int err;
 
 	autoneg = mlxsw_sp_port->link.autoneg;
-	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port, 0);
+	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port, 0, false);
 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
 	if (err)
 		return err;
@@ -2762,7 +2424,7 @@ mlxsw_sp_port_set_link_ksettings(struct net_device *dev,
 	bool autoneg;
 	int err;
 
-	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port, 0);
+	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port, 0, false);
 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
 	if (err)
 		return err;
@@ -2780,7 +2442,7 @@ mlxsw_sp_port_set_link_ksettings(struct net_device *dev,
 	}
 
 	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port,
-				eth_proto_new);
+				eth_proto_new, autoneg);
 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
 	if (err)
 		return err;
@@ -2991,7 +2653,7 @@ mlxsw_sp_port_speed_by_width_set(struct mlxsw_sp_port *mlxsw_sp_port, u8 width)
 
 	eth_proto_admin = mlxsw_sp_to_ptys_upper_speed(upper_speed);
 	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port,
-				eth_proto_admin);
+				eth_proto_admin, mlxsw_sp_port->link.autoneg);
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
 }
 
@@ -4021,14 +3683,24 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
 		goto err_afa_init;
 	}
 
+	err = mlxsw_sp_span_init(mlxsw_sp);
+	if (err) {
+		dev_err(mlxsw_sp->bus_info->dev, "Failed to init span system\n");
+		goto err_span_init;
+	}
+
+	/* Initialize router after SPAN is initialized, so that the FIB and
+	 * neighbor event handlers can issue SPAN respin.
+	 */
 	err = mlxsw_sp_router_init(mlxsw_sp);
 	if (err) {
 		dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize router\n");
 		goto err_router_init;
 	}
 
-	/* Initialize netdevice notifier after router is initialized, so that
-	 * the event handler can use router structures.
+	/* Initialize netdevice notifier after router and SPAN is initialized,
+	 * so that the event handler can use router structures and call SPAN
+	 * respin.
 	 */
 	mlxsw_sp->netdevice_nb.notifier_call = mlxsw_sp_netdevice_event;
 	err = register_netdevice_notifier(&mlxsw_sp->netdevice_nb);
@@ -4037,12 +3709,6 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
 		goto err_netdev_notifier;
 	}
 
-	err = mlxsw_sp_span_init(mlxsw_sp);
-	if (err) {
-		dev_err(mlxsw_sp->bus_info->dev, "Failed to init span system\n");
-		goto err_span_init;
-	}
-
 	err = mlxsw_sp_acl_init(mlxsw_sp);
 	if (err) {
 		dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize ACL\n");
@@ -4068,12 +3734,12 @@ err_ports_create:
 err_dpipe_init:
 	mlxsw_sp_acl_fini(mlxsw_sp);
 err_acl_init:
-	mlxsw_sp_span_fini(mlxsw_sp);
-err_span_init:
 	unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
 err_netdev_notifier:
 	mlxsw_sp_router_fini(mlxsw_sp);
 err_router_init:
+	mlxsw_sp_span_fini(mlxsw_sp);
+err_span_init:
 	mlxsw_sp_afa_fini(mlxsw_sp);
 err_afa_init:
 	mlxsw_sp_counter_pool_fini(mlxsw_sp);
@@ -4099,9 +3765,9 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
 	mlxsw_sp_ports_remove(mlxsw_sp);
 	mlxsw_sp_dpipe_fini(mlxsw_sp);
 	mlxsw_sp_acl_fini(mlxsw_sp);
-	mlxsw_sp_span_fini(mlxsw_sp);
 	unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
 	mlxsw_sp_router_fini(mlxsw_sp);
+	mlxsw_sp_span_fini(mlxsw_sp);
 	mlxsw_sp_afa_fini(mlxsw_sp);
 	mlxsw_sp_counter_pool_fini(mlxsw_sp);
 	mlxsw_sp_switchdev_fini(mlxsw_sp);
@@ -4113,12 +3779,8 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
 }
 
 static const struct mlxsw_config_profile mlxsw_sp_config_profile = {
-	.used_max_vepa_channels		= 1,
-	.max_vepa_channels		= 0,
 	.used_max_mid			= 1,
 	.max_mid			= MLXSW_SP_MID_MAX,
-	.used_max_pgt			= 1,
-	.max_pgt			= 0,
 	.used_flood_tables		= 1,
 	.used_flood_mode		= 1,
 	.flood_mode			= 3,
@@ -4144,70 +3806,6 @@ static const struct mlxsw_config_profile mlxsw_sp_config_profile = {
 	.resource_query_enable		= 1,
 };
 
-static bool
-mlxsw_sp_resource_kvd_granularity_validate(struct netlink_ext_ack *extack,
-					   u64 size)
-{
-	const struct mlxsw_config_profile *profile;
-
-	profile = &mlxsw_sp_config_profile;
-	if (size % profile->kvd_hash_granularity) {
-		NL_SET_ERR_MSG_MOD(extack, "resource set with wrong granularity");
-		return false;
-	}
-	return true;
-}
-
-static int
-mlxsw_sp_resource_kvd_size_validate(struct devlink *devlink, u64 size,
-				    struct netlink_ext_ack *extack)
-{
-	NL_SET_ERR_MSG_MOD(extack, "kvd size cannot be changed");
-	return -EINVAL;
-}
-
-static int
-mlxsw_sp_resource_kvd_linear_size_validate(struct devlink *devlink, u64 size,
-					   struct netlink_ext_ack *extack)
-{
-	if (!mlxsw_sp_resource_kvd_granularity_validate(extack, size))
-		return -EINVAL;
-
-	return 0;
-}
-
-static int
-mlxsw_sp_resource_kvd_hash_single_size_validate(struct devlink *devlink, u64 size,
-						struct netlink_ext_ack *extack)
-{
-	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
-
-	if (!mlxsw_sp_resource_kvd_granularity_validate(extack, size))
-		return -EINVAL;
-
-	if (size < MLXSW_CORE_RES_GET(mlxsw_core, KVD_SINGLE_MIN_SIZE)) {
-		NL_SET_ERR_MSG_MOD(extack, "hash single size is smaller than minimum");
-		return -EINVAL;
-	}
-	return 0;
-}
-
-static int
-mlxsw_sp_resource_kvd_hash_double_size_validate(struct devlink *devlink, u64 size,
-						struct netlink_ext_ack *extack)
-{
-	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
-
-	if (!mlxsw_sp_resource_kvd_granularity_validate(extack, size))
-		return -EINVAL;
-
-	if (size < MLXSW_CORE_RES_GET(mlxsw_core, KVD_DOUBLE_MIN_SIZE)) {
-		NL_SET_ERR_MSG_MOD(extack, "hash double size is smaller than minimum");
-		return -EINVAL;
-	}
-	return 0;
-}
-
 static u64 mlxsw_sp_resource_kvd_linear_occ_get(struct devlink *devlink)
 {
 	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
@@ -4216,23 +3814,10 @@ static u64 mlxsw_sp_resource_kvd_linear_occ_get(struct devlink *devlink)
 	return mlxsw_sp_kvdl_occ_get(mlxsw_sp);
 }
 
-static struct devlink_resource_ops mlxsw_sp_resource_kvd_ops = {
-	.size_validate = mlxsw_sp_resource_kvd_size_validate,
-};
-
 static struct devlink_resource_ops mlxsw_sp_resource_kvd_linear_ops = {
-	.size_validate = mlxsw_sp_resource_kvd_linear_size_validate,
 	.occ_get = mlxsw_sp_resource_kvd_linear_occ_get,
 };
 
-static struct devlink_resource_ops mlxsw_sp_resource_kvd_hash_single_ops = {
-	.size_validate = mlxsw_sp_resource_kvd_hash_single_size_validate,
-};
-
-static struct devlink_resource_ops mlxsw_sp_resource_kvd_hash_double_ops = {
-	.size_validate = mlxsw_sp_resource_kvd_hash_double_size_validate,
-};
-
 static void
 mlxsw_sp_resource_size_params_prepare(struct mlxsw_core *mlxsw_core,
 				      struct devlink_resource_size_params *kvd_size_params,
@@ -4291,17 +3876,16 @@ static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core)
 
 	kvd_size = MLXSW_CORE_RES_GET(mlxsw_core, KVD_SIZE);
 	err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD,
-					true, kvd_size,
-					MLXSW_SP_RESOURCE_KVD,
+					kvd_size, MLXSW_SP_RESOURCE_KVD,
 					DEVLINK_RESOURCE_ID_PARENT_TOP,
 					&kvd_size_params,
-					&mlxsw_sp_resource_kvd_ops);
+					NULL);
 	if (err)
 		return err;
 
 	linear_size = profile->kvd_linear_size;
 	err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR,
-					false, linear_size,
+					linear_size,
 					MLXSW_SP_RESOURCE_KVD_LINEAR,
 					MLXSW_SP_RESOURCE_KVD,
 					&linear_size_params,
@@ -4309,27 +3893,31 @@ static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core)
 	if (err)
 		return err;
 
+	err = mlxsw_sp_kvdl_resources_register(devlink);
+	if  (err)
+		return err;
+
 	double_size = kvd_size - linear_size;
 	double_size *= profile->kvd_hash_double_parts;
 	double_size /= profile->kvd_hash_double_parts +
 		       profile->kvd_hash_single_parts;
 	double_size = rounddown(double_size, profile->kvd_hash_granularity);
 	err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_HASH_DOUBLE,
-					false, double_size,
+					double_size,
 					MLXSW_SP_RESOURCE_KVD_HASH_DOUBLE,
 					MLXSW_SP_RESOURCE_KVD,
 					&hash_double_size_params,
-					&mlxsw_sp_resource_kvd_hash_double_ops);
+					NULL);
 	if (err)
 		return err;
 
 	single_size = kvd_size - double_size - linear_size;
 	err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_HASH_SINGLE,
-					false, single_size,
+					single_size,
 					MLXSW_SP_RESOURCE_KVD_HASH_SINGLE,
 					MLXSW_SP_RESOURCE_KVD,
 					&hash_single_size_params,
-					&mlxsw_sp_resource_kvd_hash_single_ops);
+					NULL);
 	if (err)
 		return err;
 
@@ -4583,13 +4171,11 @@ mlxsw_sp_master_lag_check(struct mlxsw_sp *mlxsw_sp,
 	u16 lag_id;
 
 	if (mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id) != 0) {
-		NL_SET_ERR_MSG(extack,
-			       "spectrum: Exceeded number of supported LAG devices");
+		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported LAG devices");
 		return false;
 	}
 	if (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH) {
-		NL_SET_ERR_MSG(extack,
-			       "spectrum: LAG device using unsupported Tx type");
+		NL_SET_ERR_MSG_MOD(extack, "LAG device using unsupported Tx type");
 		return false;
 	}
 	return true;
@@ -4831,8 +4417,7 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
 		    !netif_is_lag_master(upper_dev) &&
 		    !netif_is_bridge_master(upper_dev) &&
 		    !netif_is_ovs_master(upper_dev)) {
-			NL_SET_ERR_MSG(extack,
-				       "spectrum: Unknown upper device type");
+			NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type");
 			return -EINVAL;
 		}
 		if (!info->linking)
@@ -4841,8 +4426,7 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
 		    (!netif_is_bridge_master(upper_dev) ||
 		     !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp,
 							  upper_dev))) {
-			NL_SET_ERR_MSG(extack,
-				       "spectrum: Enslaving a port to a device that already has an upper device is not supported");
+			NL_SET_ERR_MSG_MOD(extack, "Enslaving a port to a device that already has an upper device is not supported");
 			return -EINVAL;
 		}
 		if (netif_is_lag_master(upper_dev) &&
@@ -4850,24 +4434,20 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
 					       info->upper_info, extack))
 			return -EINVAL;
 		if (netif_is_lag_master(upper_dev) && vlan_uses_dev(dev)) {
-			NL_SET_ERR_MSG(extack,
-				       "spectrum: Master device is a LAG master and this device has a VLAN");
+			NL_SET_ERR_MSG_MOD(extack, "Master device is a LAG master and this device has a VLAN");
 			return -EINVAL;
 		}
 		if (netif_is_lag_port(dev) && is_vlan_dev(upper_dev) &&
 		    !netif_is_lag_master(vlan_dev_real_dev(upper_dev))) {
-			NL_SET_ERR_MSG(extack,
-				       "spectrum: Can not put a VLAN on a LAG port");
+			NL_SET_ERR_MSG_MOD(extack, "Can not put a VLAN on a LAG port");
 			return -EINVAL;
 		}
 		if (netif_is_ovs_master(upper_dev) && vlan_uses_dev(dev)) {
-			NL_SET_ERR_MSG(extack,
-				       "spectrum: Master device is an OVS master and this device has a VLAN");
+			NL_SET_ERR_MSG_MOD(extack, "Master device is an OVS master and this device has a VLAN");
 			return -EINVAL;
 		}
 		if (netif_is_ovs_port(dev) && is_vlan_dev(upper_dev)) {
-			NL_SET_ERR_MSG(extack,
-				       "spectrum: Can not put a VLAN on an OVS port");
+			NL_SET_ERR_MSG_MOD(extack, "Can not put a VLAN on an OVS port");
 			return -EINVAL;
 		}
 		break;
@@ -4980,7 +4560,7 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev,
 	case NETDEV_PRECHANGEUPPER:
 		upper_dev = info->upper_dev;
 		if (!netif_is_bridge_master(upper_dev)) {
-			NL_SET_ERR_MSG(extack, "spectrum: VLAN devices only support bridge and VRF uppers");
+			NL_SET_ERR_MSG_MOD(extack, "VLAN devices only support bridge and VRF uppers");
 			return -EINVAL;
 		}
 		if (!info->linking)
@@ -4989,7 +4569,7 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev,
 		    (!netif_is_bridge_master(upper_dev) ||
 		     !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp,
 							  upper_dev))) {
-			NL_SET_ERR_MSG(extack, "spectrum: Enslaving a port to a device that already has an upper device is not supported");
+			NL_SET_ERR_MSG_MOD(extack, "Enslaving a port to a device that already has an upper device is not supported");
 			return -EINVAL;
 		}
 		break;
@@ -5067,10 +4647,18 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *nb,
 				    unsigned long event, void *ptr)
 {
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct mlxsw_sp_span_entry *span_entry;
 	struct mlxsw_sp *mlxsw_sp;
 	int err = 0;
 
 	mlxsw_sp = container_of(nb, struct mlxsw_sp, netdevice_nb);
+	if (event == NETDEV_UNREGISTER) {
+		span_entry = mlxsw_sp_span_entry_find_by_port(mlxsw_sp, dev);
+		if (span_entry)
+			mlxsw_sp_span_entry_invalidate(mlxsw_sp, span_entry);
+	}
+	mlxsw_sp_span_respin(mlxsw_sp);
+
 	if (mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev))
 		err = mlxsw_sp_netdevice_ipip_ol_event(mlxsw_sp, dev,
 						       event, ptr);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 92064db2ae44..21bee8f19894 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -70,16 +70,23 @@
 #define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR "linear"
 #define MLXSW_SP_RESOURCE_NAME_KVD_HASH_SINGLE "hash_single"
 #define MLXSW_SP_RESOURCE_NAME_KVD_HASH_DOUBLE "hash_double"
+#define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_SINGLES "singles"
+#define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_CHUNKS "chunks"
+#define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_LARGE_CHUNKS "large_chunks"
 
 enum mlxsw_sp_resource_id {
 	MLXSW_SP_RESOURCE_KVD,
 	MLXSW_SP_RESOURCE_KVD_LINEAR,
 	MLXSW_SP_RESOURCE_KVD_HASH_SINGLE,
 	MLXSW_SP_RESOURCE_KVD_HASH_DOUBLE,
+	MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE,
+	MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS,
+	MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS,
 };
 
 struct mlxsw_sp_port;
 struct mlxsw_sp_rif;
+struct mlxsw_sp_span_entry;
 
 struct mlxsw_sp_upper {
 	struct net_device *dev;
@@ -111,35 +118,13 @@ struct mlxsw_sp_mid {
 	unsigned long *ports_in_mid; /* bits array */
 };
 
-enum mlxsw_sp_span_type {
-	MLXSW_SP_SPAN_EGRESS,
-	MLXSW_SP_SPAN_INGRESS
-};
-
-struct mlxsw_sp_span_inspected_port {
-	struct list_head list;
-	enum mlxsw_sp_span_type type;
-	u8 local_port;
-
-	/* Whether this is a directly bound mirror (port-to-port) or an ACL. */
-	bool bound;
-};
-
-struct mlxsw_sp_span_entry {
-	u8 local_port;
-	bool used;
-	struct list_head bound_ports_list;
-	int ref_count;
-	int id;
-};
-
 enum mlxsw_sp_port_mall_action_type {
 	MLXSW_SP_PORT_MALL_MIRROR,
 	MLXSW_SP_PORT_MALL_SAMPLE,
 };
 
 struct mlxsw_sp_port_mall_mirror_tc_entry {
-	u8 to_local_port;
+	int span_id;
 	bool ingress;
 };
 
@@ -226,6 +211,8 @@ struct mlxsw_sp_port_xstats {
 	u64 wred_drop[TC_MAX_QUEUE];
 	u64 tail_drop[TC_MAX_QUEUE];
 	u64 backlog[TC_MAX_QUEUE];
+	u64 tx_bytes[IEEE_8021QAZ_MAX_TCS];
+	u64 tx_packets[IEEE_8021QAZ_MAX_TCS];
 };
 
 struct mlxsw_sp_port {
@@ -263,6 +250,7 @@ struct mlxsw_sp_port {
 	struct mlxsw_sp_port_sample *sample;
 	struct list_head vlans_list;
 	struct mlxsw_sp_qdisc *root_qdisc;
+	struct mlxsw_sp_qdisc *tclass_qdiscs;
 	unsigned acl_rule_count;
 	struct mlxsw_sp_acl_block *ing_acl_block;
 	struct mlxsw_sp_acl_block *eg_acl_block;
@@ -400,16 +388,6 @@ struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find(struct net_device *dev);
 struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev);
 void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port);
 struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find_rcu(struct net_device *dev);
-int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
-			     struct mlxsw_sp_port *to,
-			     enum mlxsw_sp_span_type type,
-			     bool bind);
-void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from,
-			      u8 destination_port,
-			      enum mlxsw_sp_span_type type,
-			      bool bind);
-struct mlxsw_sp_span_entry *
-mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port);
 
 /* spectrum_dcb.c */
 #ifdef CONFIG_MLXSW_SPECTRUM_DCB
@@ -465,6 +443,7 @@ int mlxsw_sp_kvdl_alloc_size_query(struct mlxsw_sp *mlxsw_sp,
 				   unsigned int entry_count,
 				   unsigned int *p_alloc_size);
 u64 mlxsw_sp_kvdl_occ_get(const struct mlxsw_sp *mlxsw_sp);
+int mlxsw_sp_kvdl_resources_register(struct devlink *devlink);
 
 struct mlxsw_sp_acl_rule_info {
 	unsigned int priority;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
index 92d90ed7207e..79b1fa27a9a4 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
@@ -160,6 +160,13 @@ bool mlxsw_sp_acl_block_disabled(struct mlxsw_sp_acl_block *block)
 	return block->disable_count;
 }
 
+static bool
+mlxsw_sp_acl_ruleset_is_singular(const struct mlxsw_sp_acl_ruleset *ruleset)
+{
+	/* We hold a reference on ruleset ourselves */
+	return ruleset->ref_count == 2;
+}
+
 static int
 mlxsw_sp_acl_ruleset_bind(struct mlxsw_sp *mlxsw_sp,
 			  struct mlxsw_sp_acl_block *block,
@@ -341,21 +348,8 @@ mlxsw_sp_acl_ruleset_create(struct mlxsw_sp *mlxsw_sp,
 	if (err)
 		goto err_ht_insert;
 
-	if (!chain_index) {
-		/* We only need ruleset with chain index 0, the implicit one,
-		 * to be directly bound to device. The rest of the rulesets
-		 * are bound by "Goto action set".
-		 */
-		err = mlxsw_sp_acl_ruleset_block_bind(mlxsw_sp, ruleset, block);
-		if (err)
-			goto err_ruleset_bind;
-	}
-
 	return ruleset;
 
-err_ruleset_bind:
-	rhashtable_remove_fast(&acl->ruleset_ht, &ruleset->ht_node,
-			       mlxsw_sp_acl_ruleset_ht_params);
 err_ht_insert:
 	ops->ruleset_del(mlxsw_sp, ruleset->priv);
 err_ops_ruleset_add:
@@ -369,12 +363,8 @@ static void mlxsw_sp_acl_ruleset_destroy(struct mlxsw_sp *mlxsw_sp,
 					 struct mlxsw_sp_acl_ruleset *ruleset)
 {
 	const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops;
-	struct mlxsw_sp_acl_block *block = ruleset->ht_key.block;
-	u32 chain_index = ruleset->ht_key.chain_index;
 	struct mlxsw_sp_acl *acl = mlxsw_sp->acl;
 
-	if (!chain_index)
-		mlxsw_sp_acl_ruleset_block_unbind(mlxsw_sp, ruleset, block);
 	rhashtable_remove_fast(&acl->ruleset_ht, &ruleset->ht_node,
 			       mlxsw_sp_acl_ruleset_ht_params);
 	ops->ruleset_del(mlxsw_sp, ruleset->priv);
@@ -577,7 +567,6 @@ int mlxsw_sp_acl_rulei_act_mirror(struct mlxsw_sp *mlxsw_sp,
 				  struct net_device *out_dev)
 {
 	struct mlxsw_sp_acl_block_binding *binding;
-	struct mlxsw_sp_port *out_port;
 	struct mlxsw_sp_port *in_port;
 
 	if (!list_is_singular(&block->binding_list))
@@ -586,16 +575,10 @@ int mlxsw_sp_acl_rulei_act_mirror(struct mlxsw_sp *mlxsw_sp,
 	binding = list_first_entry(&block->binding_list,
 				   struct mlxsw_sp_acl_block_binding, list);
 	in_port = binding->mlxsw_sp_port;
-	if (!mlxsw_sp_port_dev_check(out_dev))
-		return -EINVAL;
-
-	out_port = netdev_priv(out_dev);
-	if (out_port->mlxsw_sp != mlxsw_sp)
-		return -EINVAL;
 
 	return mlxsw_afa_block_append_mirror(rulei->act_block,
 					     in_port->local_port,
-					     out_port->local_port,
+					     out_dev,
 					     binding->ingress);
 }
 
@@ -700,10 +683,25 @@ int mlxsw_sp_acl_rule_add(struct mlxsw_sp *mlxsw_sp,
 	if (err)
 		goto err_rhashtable_insert;
 
+	if (!ruleset->ht_key.chain_index &&
+	    mlxsw_sp_acl_ruleset_is_singular(ruleset)) {
+		/* We only need ruleset with chain index 0, the implicit
+		 * one, to be directly bound to device. The rest of the
+		 * rulesets are bound by "Goto action set".
+		 */
+		err = mlxsw_sp_acl_ruleset_block_bind(mlxsw_sp, ruleset,
+						      ruleset->ht_key.block);
+		if (err)
+			goto err_ruleset_block_bind;
+	}
+
 	list_add_tail(&rule->list, &mlxsw_sp->acl->rules);
 	ruleset->ht_key.block->rule_count++;
 	return 0;
 
+err_ruleset_block_bind:
+	rhashtable_remove_fast(&ruleset->rule_ht, &rule->ht_node,
+			       mlxsw_sp_acl_rule_ht_params);
 err_rhashtable_insert:
 	ops->rule_del(mlxsw_sp, rule->priv);
 	return err;
@@ -717,6 +715,10 @@ void mlxsw_sp_acl_rule_del(struct mlxsw_sp *mlxsw_sp,
 
 	ruleset->ht_key.block->rule_count--;
 	list_del(&rule->list);
+	if (!ruleset->ht_key.chain_index &&
+	    mlxsw_sp_acl_ruleset_is_singular(ruleset))
+		mlxsw_sp_acl_ruleset_block_unbind(mlxsw_sp, ruleset,
+						  ruleset->ht_key.block);
 	rhashtable_remove_fast(&ruleset->rule_ht, &rule->ht_node,
 			       mlxsw_sp_acl_rule_ht_params);
 	ops->rule_del(mlxsw_sp, rule->priv);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
index 6ca6894125f0..510ce48d87f7 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
@@ -1,6 +1,6 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017, 2018 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2017 Jiri Pirko <jiri@mellanox.com>
  * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
  *
@@ -35,6 +35,7 @@
 
 #include "spectrum_acl_flex_actions.h"
 #include "core_acl_flex_actions.h"
+#include "spectrum_span.h"
 
 #define MLXSW_SP_KVDL_ACT_EXT_SIZE 1
 
@@ -125,40 +126,23 @@ mlxsw_sp_act_counter_index_put(void *priv, unsigned int counter_index)
 }
 
 static int
-mlxsw_sp_act_mirror_add(void *priv, u8 local_in_port, u8 local_out_port,
+mlxsw_sp_act_mirror_add(void *priv, u8 local_in_port,
+			const struct net_device *out_dev,
 			bool ingress, int *p_span_id)
 {
-	struct mlxsw_sp_port *in_port, *out_port;
-	struct mlxsw_sp_span_entry *span_entry;
+	struct mlxsw_sp_port *in_port;
 	struct mlxsw_sp *mlxsw_sp = priv;
 	enum mlxsw_sp_span_type type;
-	int err;
 
 	type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
-	out_port = mlxsw_sp->ports[local_out_port];
 	in_port = mlxsw_sp->ports[local_in_port];
 
-	err = mlxsw_sp_span_mirror_add(in_port, out_port, type, false);
-	if (err)
-		return err;
-
-	span_entry = mlxsw_sp_span_entry_find(mlxsw_sp, local_out_port);
-	if (!span_entry) {
-		err = -ENOENT;
-		goto err_span_entry_find;
-	}
-
-	*p_span_id = span_entry->id;
-	return 0;
-
-err_span_entry_find:
-	mlxsw_sp_span_mirror_del(in_port, local_out_port, type, false);
-	return err;
+	return mlxsw_sp_span_mirror_add(in_port, out_dev, type,
+					false, p_span_id);
 }
 
 static void
-mlxsw_sp_act_mirror_del(void *priv, u8 local_in_port, u8 local_out_port,
-			bool ingress)
+mlxsw_sp_act_mirror_del(void *priv, u8 local_in_port, int span_id, bool ingress)
 {
 	struct mlxsw_sp *mlxsw_sp = priv;
 	struct mlxsw_sp_port *in_port;
@@ -167,7 +151,7 @@ mlxsw_sp_act_mirror_del(void *priv, u8 local_in_port, u8 local_out_port,
 	type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
 	in_port = mlxsw_sp->ports[local_in_port];
 
-	mlxsw_sp_span_mirror_del(in_port, local_out_port, type, false);
+	mlxsw_sp_span_mirror_del(in_port, span_id, type, false);
 }
 
 static const struct mlxsw_afa_ops mlxsw_sp_act_afa_ops = {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
index c6e180c2be1e..ad1b548e3cac 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
@@ -228,10 +228,6 @@ mlxsw_sp_acl_tcam_group_add(struct mlxsw_sp *mlxsw_sp,
 	if (err)
 		return err;
 
-	err = mlxsw_sp_acl_tcam_group_update(mlxsw_sp, group);
-	if (err)
-		goto err_group_update;
-
 	err = rhashtable_init(&group->chunk_ht,
 			      &mlxsw_sp_acl_tcam_chunk_ht_params);
 	if (err)
@@ -240,7 +236,6 @@ mlxsw_sp_acl_tcam_group_add(struct mlxsw_sp *mlxsw_sp,
 	return 0;
 
 err_rhashtable_init:
-err_group_update:
 	mlxsw_sp_acl_tcam_group_id_put(tcam, group->id);
 	return err;
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
index 7502e53447bd..98d896c14b87 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
@@ -1,7 +1,7 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017-2018 Petr Machata <petrm@mellanox.com>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -33,126 +33,125 @@
  */
 
 #include <net/ip_tunnels.h>
+#include <net/ip6_tunnel.h>
 
 #include "spectrum_ipip.h"
 
 struct ip_tunnel_parm
-mlxsw_sp_ipip_netdev_parms(const struct net_device *ol_dev)
+mlxsw_sp_ipip_netdev_parms4(const struct net_device *ol_dev)
 {
 	struct ip_tunnel *tun = netdev_priv(ol_dev);
 
 	return tun->parms;
 }
 
-static bool mlxsw_sp_ipip_parms_has_ikey(struct ip_tunnel_parm parms)
+struct __ip6_tnl_parm
+mlxsw_sp_ipip_netdev_parms6(const struct net_device *ol_dev)
+{
+	struct ip6_tnl *tun = netdev_priv(ol_dev);
+
+	return tun->parms;
+}
+
+static bool mlxsw_sp_ipip_parms4_has_ikey(struct ip_tunnel_parm parms)
 {
 	return !!(parms.i_flags & TUNNEL_KEY);
 }
 
-static bool mlxsw_sp_ipip_parms_has_okey(struct ip_tunnel_parm parms)
+static bool mlxsw_sp_ipip_parms4_has_okey(struct ip_tunnel_parm parms)
 {
 	return !!(parms.o_flags & TUNNEL_KEY);
 }
 
-static u32 mlxsw_sp_ipip_parms_ikey(struct ip_tunnel_parm parms)
+static u32 mlxsw_sp_ipip_parms4_ikey(struct ip_tunnel_parm parms)
 {
-	return mlxsw_sp_ipip_parms_has_ikey(parms) ?
+	return mlxsw_sp_ipip_parms4_has_ikey(parms) ?
 		be32_to_cpu(parms.i_key) : 0;
 }
 
-static u32 mlxsw_sp_ipip_parms_okey(struct ip_tunnel_parm parms)
+static u32 mlxsw_sp_ipip_parms4_okey(struct ip_tunnel_parm parms)
 {
-	return mlxsw_sp_ipip_parms_has_okey(parms) ?
+	return mlxsw_sp_ipip_parms4_has_okey(parms) ?
 		be32_to_cpu(parms.o_key) : 0;
 }
 
-static __be32 mlxsw_sp_ipip_parms_saddr4(struct ip_tunnel_parm parms)
+static union mlxsw_sp_l3addr
+mlxsw_sp_ipip_parms4_saddr(struct ip_tunnel_parm parms)
 {
-	return parms.iph.saddr;
+	return (union mlxsw_sp_l3addr) { .addr4 = parms.iph.saddr };
 }
 
 static union mlxsw_sp_l3addr
-mlxsw_sp_ipip_parms_saddr(enum mlxsw_sp_l3proto proto,
-			  struct ip_tunnel_parm parms)
+mlxsw_sp_ipip_parms6_saddr(struct __ip6_tnl_parm parms)
 {
-	switch (proto) {
-	case MLXSW_SP_L3_PROTO_IPV4:
-		return (union mlxsw_sp_l3addr) {
-			.addr4 = mlxsw_sp_ipip_parms_saddr4(parms),
-		};
-	case MLXSW_SP_L3_PROTO_IPV6:
-		break;
-	}
-
-	WARN_ON(1);
-	return (union mlxsw_sp_l3addr) {
-		.addr4 = 0,
-	};
+	return (union mlxsw_sp_l3addr) { .addr6 = parms.laddr };
 }
 
-static __be32 mlxsw_sp_ipip_parms_daddr4(struct ip_tunnel_parm parms)
+static union mlxsw_sp_l3addr
+mlxsw_sp_ipip_parms4_daddr(struct ip_tunnel_parm parms)
 {
-	return parms.iph.daddr;
+	return (union mlxsw_sp_l3addr) { .addr4 = parms.iph.daddr };
 }
 
 static union mlxsw_sp_l3addr
-mlxsw_sp_ipip_parms_daddr(enum mlxsw_sp_l3proto proto,
-			  struct ip_tunnel_parm parms)
+mlxsw_sp_ipip_parms6_daddr(struct __ip6_tnl_parm parms)
+{
+	return (union mlxsw_sp_l3addr) { .addr6 = parms.raddr };
+}
+
+union mlxsw_sp_l3addr
+mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
+			   const struct net_device *ol_dev)
 {
+	struct ip_tunnel_parm parms4;
+	struct __ip6_tnl_parm parms6;
+
 	switch (proto) {
 	case MLXSW_SP_L3_PROTO_IPV4:
-		return (union mlxsw_sp_l3addr) {
-			.addr4 = mlxsw_sp_ipip_parms_daddr4(parms),
-		};
+		parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
+		return mlxsw_sp_ipip_parms4_saddr(parms4);
 	case MLXSW_SP_L3_PROTO_IPV6:
-		break;
+		parms6 = mlxsw_sp_ipip_netdev_parms6(ol_dev);
+		return mlxsw_sp_ipip_parms6_saddr(parms6);
 	}
 
 	WARN_ON(1);
-	return (union mlxsw_sp_l3addr) {
-		.addr4 = 0,
-	};
-}
-
-static bool mlxsw_sp_ipip_netdev_has_ikey(const struct net_device *ol_dev)
-{
-	return mlxsw_sp_ipip_parms_has_ikey(mlxsw_sp_ipip_netdev_parms(ol_dev));
+	return (union mlxsw_sp_l3addr) {0};
 }
 
-static bool mlxsw_sp_ipip_netdev_has_okey(const struct net_device *ol_dev)
+static __be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev)
 {
-	return mlxsw_sp_ipip_parms_has_okey(mlxsw_sp_ipip_netdev_parms(ol_dev));
-}
 
-static u32 mlxsw_sp_ipip_netdev_ikey(const struct net_device *ol_dev)
-{
-	return mlxsw_sp_ipip_parms_ikey(mlxsw_sp_ipip_netdev_parms(ol_dev));
-}
+	struct ip_tunnel_parm parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
 
-static u32 mlxsw_sp_ipip_netdev_okey(const struct net_device *ol_dev)
-{
-	return mlxsw_sp_ipip_parms_okey(mlxsw_sp_ipip_netdev_parms(ol_dev));
+	return mlxsw_sp_ipip_parms4_daddr(parms4).addr4;
 }
 
-union mlxsw_sp_l3addr
-mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
+static union mlxsw_sp_l3addr
+mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto,
 			   const struct net_device *ol_dev)
 {
-	return mlxsw_sp_ipip_parms_saddr(proto,
-					 mlxsw_sp_ipip_netdev_parms(ol_dev));
-}
+	struct ip_tunnel_parm parms4;
+	struct __ip6_tnl_parm parms6;
 
-static __be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev)
-{
-	return mlxsw_sp_ipip_parms_daddr4(mlxsw_sp_ipip_netdev_parms(ol_dev));
+	switch (proto) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
+		return mlxsw_sp_ipip_parms4_daddr(parms4);
+	case MLXSW_SP_L3_PROTO_IPV6:
+		parms6 = mlxsw_sp_ipip_netdev_parms6(ol_dev);
+		return mlxsw_sp_ipip_parms6_daddr(parms6);
+	}
+
+	WARN_ON(1);
+	return (union mlxsw_sp_l3addr) {0};
 }
 
-static union mlxsw_sp_l3addr
-mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto,
-			   const struct net_device *ol_dev)
+bool mlxsw_sp_l3addr_is_zero(union mlxsw_sp_l3addr addr)
 {
-	return mlxsw_sp_ipip_parms_daddr(proto,
-					 mlxsw_sp_ipip_netdev_parms(ol_dev));
+	union mlxsw_sp_l3addr naddr = {0};
+
+	return !memcmp(&addr, &naddr, sizeof(naddr));
 }
 
 static int
@@ -176,12 +175,17 @@ mlxsw_sp_ipip_fib_entry_op_gre4_rtdp(struct mlxsw_sp *mlxsw_sp,
 				     u32 tunnel_index,
 				     struct mlxsw_sp_ipip_entry *ipip_entry)
 {
-	bool has_ikey = mlxsw_sp_ipip_netdev_has_ikey(ipip_entry->ol_dev);
 	u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb);
-	u32 ikey = mlxsw_sp_ipip_netdev_ikey(ipip_entry->ol_dev);
 	char rtdp_pl[MLXSW_REG_RTDP_LEN];
+	struct ip_tunnel_parm parms;
 	unsigned int type_check;
+	bool has_ikey;
 	u32 daddr4;
+	u32 ikey;
+
+	parms = mlxsw_sp_ipip_netdev_parms4(ipip_entry->ol_dev);
+	has_ikey = mlxsw_sp_ipip_parms4_has_ikey(parms);
+	ikey = mlxsw_sp_ipip_parms4_ikey(parms);
 
 	mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_IPIP, tunnel_index);
 
@@ -243,15 +247,14 @@ static bool mlxsw_sp_ipip_tunnel_complete(enum mlxsw_sp_l3proto proto,
 {
 	union mlxsw_sp_l3addr saddr = mlxsw_sp_ipip_netdev_saddr(proto, ol_dev);
 	union mlxsw_sp_l3addr daddr = mlxsw_sp_ipip_netdev_daddr(proto, ol_dev);
-	union mlxsw_sp_l3addr naddr = {0};
 
 	/* Tunnels with unset local or remote address are valid in Linux and
 	 * used for lightweight tunnels (LWT) and Non-Broadcast Multi-Access
 	 * (NBMA) tunnels. In principle these can be offloaded, but the driver
 	 * currently doesn't support this. So punt.
 	 */
-	return memcmp(&saddr, &naddr, sizeof(naddr)) &&
-	       memcmp(&daddr, &naddr, sizeof(naddr));
+	return !mlxsw_sp_l3addr_is_zero(saddr) &&
+	       !mlxsw_sp_l3addr_is_zero(daddr);
 }
 
 static bool mlxsw_sp_ipip_can_offload_gre4(const struct mlxsw_sp *mlxsw_sp,
@@ -273,14 +276,15 @@ static struct mlxsw_sp_rif_ipip_lb_config
 mlxsw_sp_ipip_ol_loopback_config_gre4(struct mlxsw_sp *mlxsw_sp,
 				      const struct net_device *ol_dev)
 {
+	struct ip_tunnel_parm parms = mlxsw_sp_ipip_netdev_parms4(ol_dev);
 	enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt;
 
-	lb_ipipt = mlxsw_sp_ipip_netdev_has_okey(ol_dev) ?
+	lb_ipipt = mlxsw_sp_ipip_parms4_has_okey(parms) ?
 		MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_KEY_IN_IP :
 		MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_IN_IP;
 	return (struct mlxsw_sp_rif_ipip_lb_config){
 		.lb_ipipt = lb_ipipt,
-		.okey = mlxsw_sp_ipip_netdev_okey(ol_dev),
+		.okey = mlxsw_sp_ipip_parms4_okey(parms),
 		.ul_protocol = MLXSW_SP_L3_PROTO_IPV4,
 		.saddr = mlxsw_sp_ipip_netdev_saddr(MLXSW_SP_L3_PROTO_IPV4,
 						    ol_dev),
@@ -300,16 +304,12 @@ mlxsw_sp_ipip_ol_netdev_change_gre4(struct mlxsw_sp *mlxsw_sp,
 	bool update_nhs = false;
 	int err = 0;
 
-	new_parms = mlxsw_sp_ipip_netdev_parms(ipip_entry->ol_dev);
+	new_parms = mlxsw_sp_ipip_netdev_parms4(ipip_entry->ol_dev);
 
-	new_saddr = mlxsw_sp_ipip_parms_saddr(MLXSW_SP_L3_PROTO_IPV4,
-					      new_parms);
-	old_saddr = mlxsw_sp_ipip_parms_saddr(MLXSW_SP_L3_PROTO_IPV4,
-					      ipip_entry->parms);
-	new_daddr = mlxsw_sp_ipip_parms_daddr(MLXSW_SP_L3_PROTO_IPV4,
-					      new_parms);
-	old_daddr = mlxsw_sp_ipip_parms_daddr(MLXSW_SP_L3_PROTO_IPV4,
-					      ipip_entry->parms);
+	new_saddr = mlxsw_sp_ipip_parms4_saddr(new_parms);
+	old_saddr = mlxsw_sp_ipip_parms4_saddr(ipip_entry->parms4);
+	new_daddr = mlxsw_sp_ipip_parms4_daddr(new_parms);
+	old_daddr = mlxsw_sp_ipip_parms4_daddr(ipip_entry->parms4);
 
 	if (!mlxsw_sp_l3addr_eq(&new_saddr, &old_saddr)) {
 		u16 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
@@ -326,14 +326,14 @@ mlxsw_sp_ipip_ol_netdev_change_gre4(struct mlxsw_sp *mlxsw_sp,
 		}
 
 		update_tunnel = true;
-	} else if ((mlxsw_sp_ipip_parms_okey(ipip_entry->parms) !=
-		    mlxsw_sp_ipip_parms_okey(new_parms)) ||
-		   ipip_entry->parms.link != new_parms.link) {
+	} else if ((mlxsw_sp_ipip_parms4_okey(ipip_entry->parms4) !=
+		    mlxsw_sp_ipip_parms4_okey(new_parms)) ||
+		   ipip_entry->parms4.link != new_parms.link) {
 		update_tunnel = true;
 	} else if (!mlxsw_sp_l3addr_eq(&new_daddr, &old_daddr)) {
 		update_nhs = true;
-	} else if (mlxsw_sp_ipip_parms_ikey(ipip_entry->parms) !=
-		   mlxsw_sp_ipip_parms_ikey(new_parms)) {
+	} else if (mlxsw_sp_ipip_parms4_ikey(ipip_entry->parms4) !=
+		   mlxsw_sp_ipip_parms4_ikey(new_parms)) {
 		update_decap = true;
 	}
 
@@ -350,7 +350,7 @@ mlxsw_sp_ipip_ol_netdev_change_gre4(struct mlxsw_sp *mlxsw_sp,
 							  false, false, false,
 							  extack);
 
-	ipip_entry->parms = new_parms;
+	ipip_entry->parms4 = new_parms;
 	return err;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
index 04b08d9d76e9..6909d867bb59 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
@@ -1,7 +1,7 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017-2018 Petr Machata <petrm@mellanox.com>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -37,14 +37,19 @@
 
 #include "spectrum_router.h"
 #include <net/ip_fib.h>
+#include <linux/if_tunnel.h>
 
 struct ip_tunnel_parm
-mlxsw_sp_ipip_netdev_parms(const struct net_device *ol_dev);
+mlxsw_sp_ipip_netdev_parms4(const struct net_device *ol_dev);
+struct __ip6_tnl_parm
+mlxsw_sp_ipip_netdev_parms6(const struct net_device *ol_dev);
 
 union mlxsw_sp_l3addr
 mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
 			   const struct net_device *ol_dev);
 
+bool mlxsw_sp_l3addr_is_zero(union mlxsw_sp_l3addr addr);
+
 enum mlxsw_sp_ipip_type {
 	MLXSW_SP_IPIP_TYPE_GRE4,
 	MLXSW_SP_IPIP_TYPE_MAX,
@@ -56,7 +61,9 @@ struct mlxsw_sp_ipip_entry {
 	struct mlxsw_sp_rif_ipip_lb *ol_lb;
 	struct mlxsw_sp_fib_entry *decap_fib_entry;
 	struct list_head ipip_list_node;
-	struct ip_tunnel_parm parms;
+	union {
+		struct ip_tunnel_parm parms4;
+	};
 };
 
 struct mlxsw_sp_ipip_ops {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
index 55f9d2d70f9e..85503e93b93f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
@@ -67,7 +67,7 @@ struct mlxsw_sp_kvdl_part_info {
 
 struct mlxsw_sp_kvdl_part {
 	struct list_head list;
-	const struct mlxsw_sp_kvdl_part_info *info;
+	struct mlxsw_sp_kvdl_part_info *info;
 	unsigned long usage[0];	/* Entries */
 };
 
@@ -188,21 +188,27 @@ int mlxsw_sp_kvdl_alloc_size_query(struct mlxsw_sp *mlxsw_sp,
 	return 0;
 }
 
+enum mlxsw_sp_kvdl_part_id {
+	MLXSW_SP_KVDL_PART_SINGLE,
+	MLXSW_SP_KVDL_PART_CHUNKS,
+	MLXSW_SP_KVDL_PART_LARGE_CHUNKS,
+};
+
 static const struct mlxsw_sp_kvdl_part_info kvdl_parts_info[] = {
 	{
-		.part_index	= 0,
+		.part_index	= MLXSW_SP_KVDL_PART_SINGLE,
 		.start_index	= MLXSW_SP_KVDL_SINGLE_BASE,
 		.end_index	= MLXSW_SP_KVDL_SINGLE_END,
 		.alloc_size	= 1,
 	},
 	{
-		.part_index	= 1,
+		.part_index	= MLXSW_SP_KVDL_PART_CHUNKS,
 		.start_index	= MLXSW_SP_KVDL_CHUNKS_BASE,
 		.end_index	= MLXSW_SP_KVDL_CHUNKS_END,
 		.alloc_size	= MLXSW_SP_CHUNK_MAX,
 	},
 	{
-		.part_index	= 2,
+		.part_index	= MLXSW_SP_KVDL_PART_LARGE_CHUNKS,
 		.start_index	= MLXSW_SP_KVDL_LARGE_CHUNKS_BASE,
 		.end_index	= MLXSW_SP_KVDL_LARGE_CHUNKS_END,
 		.alloc_size	= MLXSW_SP_LARGE_CHUNK_MAX,
@@ -222,27 +228,76 @@ mlxsw_sp_kvdl_part_find(struct mlxsw_sp *mlxsw_sp, unsigned int part_index)
 	return NULL;
 }
 
+static void
+mlxsw_sp_kvdl_part_update(struct mlxsw_sp *mlxsw_sp,
+			  struct mlxsw_sp_kvdl_part *part, unsigned int size)
+{
+	struct mlxsw_sp_kvdl_part_info *info = part->info;
+
+	if (list_is_last(&part->list, &mlxsw_sp->kvdl->parts_list)) {
+		info->end_index = size - 1;
+	} else  {
+		struct mlxsw_sp_kvdl_part *last_part;
+
+		last_part = list_next_entry(part, list);
+		info->start_index = last_part->info->end_index + 1;
+		info->end_index = info->start_index + size - 1;
+	}
+}
+
 static int mlxsw_sp_kvdl_part_init(struct mlxsw_sp *mlxsw_sp,
 				   unsigned int part_index)
 {
+	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
 	const struct mlxsw_sp_kvdl_part_info *info;
+	enum mlxsw_sp_resource_id resource_id;
 	struct mlxsw_sp_kvdl_part *part;
+	bool need_update = true;
 	unsigned int nr_entries;
 	size_t usage_size;
+	u64 resource_size;
+	int err;
 
 	info = &kvdl_parts_info[part_index];
 
-	nr_entries = (info->end_index - info->start_index + 1) /
-		     info->alloc_size;
+	switch (part_index) {
+	case MLXSW_SP_KVDL_PART_SINGLE:
+		resource_id = MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE;
+		break;
+	case MLXSW_SP_KVDL_PART_CHUNKS:
+		resource_id = MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS;
+		break;
+	case MLXSW_SP_KVDL_PART_LARGE_CHUNKS:
+		resource_id = MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	err = devlink_resource_size_get(devlink, resource_id, &resource_size);
+	if (err) {
+		need_update = false;
+		resource_size = info->end_index - info->start_index + 1;
+	}
+
+	nr_entries = div_u64(resource_size, info->alloc_size);
 	usage_size = BITS_TO_LONGS(nr_entries) * sizeof(unsigned long);
 	part = kzalloc(sizeof(*part) + usage_size, GFP_KERNEL);
 	if (!part)
 		return -ENOMEM;
 
-	part->info = info;
-	list_add(&part->list, &mlxsw_sp->kvdl->parts_list);
+	part->info = kmemdup(info, sizeof(*part->info), GFP_KERNEL);
+	if (!part->info)
+		goto err_part_info_alloc;
 
+	list_add(&part->list, &mlxsw_sp->kvdl->parts_list);
+	if (need_update)
+		mlxsw_sp_kvdl_part_update(mlxsw_sp, part, resource_size);
 	return 0;
+
+err_part_info_alloc:
+	kfree(part);
+	return -ENOMEM;
 }
 
 static void mlxsw_sp_kvdl_part_fini(struct mlxsw_sp *mlxsw_sp,
@@ -255,6 +310,7 @@ static void mlxsw_sp_kvdl_part_fini(struct mlxsw_sp *mlxsw_sp,
 		return;
 
 	list_del(&part->list);
+	kfree(part->info);
 	kfree(part);
 }
 
@@ -312,6 +368,123 @@ u64 mlxsw_sp_kvdl_occ_get(const struct mlxsw_sp *mlxsw_sp)
 	return occ;
 }
 
+static u64 mlxsw_sp_kvdl_single_occ_get(struct devlink *devlink)
+{
+	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+	struct mlxsw_sp_kvdl_part *part;
+
+	part = mlxsw_sp_kvdl_part_find(mlxsw_sp, MLXSW_SP_KVDL_PART_SINGLE);
+	if (!part)
+		return -EINVAL;
+
+	return mlxsw_sp_kvdl_part_occ(part);
+}
+
+static u64 mlxsw_sp_kvdl_chunks_occ_get(struct devlink *devlink)
+{
+	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+	struct mlxsw_sp_kvdl_part *part;
+
+	part = mlxsw_sp_kvdl_part_find(mlxsw_sp, MLXSW_SP_KVDL_PART_CHUNKS);
+	if (!part)
+		return -EINVAL;
+
+	return mlxsw_sp_kvdl_part_occ(part);
+}
+
+static u64 mlxsw_sp_kvdl_large_chunks_occ_get(struct devlink *devlink)
+{
+	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+	struct mlxsw_sp_kvdl_part *part;
+
+	part = mlxsw_sp_kvdl_part_find(mlxsw_sp,
+				       MLXSW_SP_KVDL_PART_LARGE_CHUNKS);
+	if (!part)
+		return -EINVAL;
+
+	return mlxsw_sp_kvdl_part_occ(part);
+}
+
+static struct devlink_resource_ops mlxsw_sp_kvdl_single_ops = {
+	.occ_get = mlxsw_sp_kvdl_single_occ_get,
+};
+
+static struct devlink_resource_ops mlxsw_sp_kvdl_chunks_ops = {
+	.occ_get = mlxsw_sp_kvdl_chunks_occ_get,
+};
+
+static struct devlink_resource_ops mlxsw_sp_kvdl_chunks_large_ops = {
+	.occ_get = mlxsw_sp_kvdl_large_chunks_occ_get,
+};
+
+static struct devlink_resource_size_params mlxsw_sp_kvdl_single_size_params = {
+	.size_min = 0,
+	.size_granularity = 1,
+	.unit = DEVLINK_RESOURCE_UNIT_ENTRY,
+};
+
+static struct devlink_resource_size_params mlxsw_sp_kvdl_chunks_size_params = {
+	.size_min = 0,
+	.size_granularity = MLXSW_SP_CHUNK_MAX,
+	.unit = DEVLINK_RESOURCE_UNIT_ENTRY,
+};
+
+static struct devlink_resource_size_params mlxsw_sp_kvdl_large_chunks_size_params = {
+	.size_min = 0,
+	.size_granularity = MLXSW_SP_LARGE_CHUNK_MAX,
+	.unit = DEVLINK_RESOURCE_UNIT_ENTRY,
+};
+
+static void
+mlxsw_sp_kvdl_resource_size_params_prepare(struct devlink *devlink)
+{
+	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+	u32 kvdl_max_size;
+
+	kvdl_max_size = MLXSW_CORE_RES_GET(mlxsw_core, KVD_SIZE) -
+			MLXSW_CORE_RES_GET(mlxsw_core, KVD_SINGLE_MIN_SIZE) -
+			MLXSW_CORE_RES_GET(mlxsw_core, KVD_DOUBLE_MIN_SIZE);
+
+	mlxsw_sp_kvdl_single_size_params.size_max = kvdl_max_size;
+	mlxsw_sp_kvdl_chunks_size_params.size_max = kvdl_max_size;
+	mlxsw_sp_kvdl_large_chunks_size_params.size_max = kvdl_max_size;
+}
+
+int mlxsw_sp_kvdl_resources_register(struct devlink *devlink)
+{
+	int err;
+
+	mlxsw_sp_kvdl_resource_size_params_prepare(devlink);
+	err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_SINGLES,
+					MLXSW_SP_KVDL_SINGLE_SIZE,
+					MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE,
+					MLXSW_SP_RESOURCE_KVD_LINEAR,
+					&mlxsw_sp_kvdl_single_size_params,
+					&mlxsw_sp_kvdl_single_ops);
+	if (err)
+		return err;
+
+	err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_CHUNKS,
+					MLXSW_SP_KVDL_CHUNKS_SIZE,
+					MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS,
+					MLXSW_SP_RESOURCE_KVD_LINEAR,
+					&mlxsw_sp_kvdl_chunks_size_params,
+					&mlxsw_sp_kvdl_chunks_ops);
+	if (err)
+		return err;
+
+	err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_LARGE_CHUNKS,
+					MLXSW_SP_KVDL_LARGE_CHUNKS_SIZE,
+					MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS,
+					MLXSW_SP_RESOURCE_KVD_LINEAR,
+					&mlxsw_sp_kvdl_large_chunks_size_params,
+					&mlxsw_sp_kvdl_chunks_large_ops);
+	return err;
+}
+
 int mlxsw_sp_kvdl_init(struct mlxsw_sp *mlxsw_sp)
 {
 	struct mlxsw_sp_kvdl *kvdl;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
index d20b143de3b4..978a3c70653a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
@@ -126,8 +126,8 @@ mlxsw_sp_mr_route_ivif_in_evifs(const struct mlxsw_sp_mr_route *mr_route)
 
 	switch (mr_route->mr_table->proto) {
 	case MLXSW_SP_L3_PROTO_IPV4:
-		ivif = mr_route->mfc4->mfc_parent;
-		return mr_route->mfc4->mfc_un.res.ttls[ivif] != 255;
+		ivif = mr_route->mfc4->_c.mfc_parent;
+		return mr_route->mfc4->_c.mfc_un.res.ttls[ivif] != 255;
 	case MLXSW_SP_L3_PROTO_IPV6:
 		/* fall through */
 	default:
@@ -364,7 +364,7 @@ mlxsw_sp_mr_route4_create(struct mlxsw_sp_mr_table *mr_table,
 	mr_route->mfc4 = mfc;
 	mr_route->mr_table = mr_table;
 	for (i = 0; i < MAXVIFS; i++) {
-		if (mfc->mfc_un.res.ttls[i] != 255) {
+		if (mfc->_c.mfc_un.res.ttls[i] != 255) {
 			err = mlxsw_sp_mr_route_evif_link(mr_route,
 							  &mr_table->vifs[i]);
 			if (err)
@@ -374,7 +374,8 @@ mlxsw_sp_mr_route4_create(struct mlxsw_sp_mr_table *mr_table,
 				mr_route->min_mtu = mr_table->vifs[i].dev->mtu;
 		}
 	}
-	mlxsw_sp_mr_route_ivif_link(mr_route, &mr_table->vifs[mfc->mfc_parent]);
+	mlxsw_sp_mr_route_ivif_link(mr_route,
+				    &mr_table->vifs[mfc->_c.mfc_parent]);
 
 	mr_route->route_action = mlxsw_sp_mr_route_action(mr_route);
 	return mr_route;
@@ -418,9 +419,9 @@ static void mlxsw_sp_mr_mfc_offload_set(struct mlxsw_sp_mr_route *mr_route,
 	switch (mr_route->mr_table->proto) {
 	case MLXSW_SP_L3_PROTO_IPV4:
 		if (offload)
-			mr_route->mfc4->mfc_flags |= MFC_OFFLOAD;
+			mr_route->mfc4->_c.mfc_flags |= MFC_OFFLOAD;
 		else
-			mr_route->mfc4->mfc_flags &= ~MFC_OFFLOAD;
+			mr_route->mfc4->_c.mfc_flags &= ~MFC_OFFLOAD;
 		break;
 	case MLXSW_SP_L3_PROTO_IPV6:
 		/* fall through */
@@ -943,10 +944,10 @@ static void mlxsw_sp_mr_route_stats_update(struct mlxsw_sp *mlxsw_sp,
 
 	switch (mr_route->mr_table->proto) {
 	case MLXSW_SP_L3_PROTO_IPV4:
-		if (mr_route->mfc4->mfc_un.res.pkt != packets)
-			mr_route->mfc4->mfc_un.res.lastuse = jiffies;
-		mr_route->mfc4->mfc_un.res.pkt = packets;
-		mr_route->mfc4->mfc_un.res.bytes = bytes;
+		if (mr_route->mfc4->_c.mfc_un.res.pkt != packets)
+			mr_route->mfc4->_c.mfc_un.res.lastuse = jiffies;
+		mr_route->mfc4->_c.mfc_un.res.pkt = packets;
+		mr_route->mfc4->_c.mfc_un.res.bytes = bytes;
 		break;
 	case MLXSW_SP_L3_PROTO_IPV6:
 		/* fall through */
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
index 0b7670459051..91262b0573e3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
@@ -42,6 +42,8 @@
 #include "reg.h"
 
 #define MLXSW_SP_PRIO_BAND_TO_TCLASS(band) (IEEE_8021QAZ_MAX_TCS - band - 1)
+#define MLXSW_SP_PRIO_CHILD_TO_TCLASS(child) \
+	MLXSW_SP_PRIO_BAND_TO_TCLASS((child - 1))
 
 enum mlxsw_sp_qdisc_type {
 	MLXSW_SP_QDISC_NO_QDISC,
@@ -76,6 +78,7 @@ struct mlxsw_sp_qdisc_ops {
 struct mlxsw_sp_qdisc {
 	u32 handle;
 	u8 tclass_num;
+	u8 prio_bitmap;
 	union {
 		struct red_stats red;
 	} xstats_base;
@@ -99,6 +102,44 @@ mlxsw_sp_qdisc_compare(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, u32 handle,
 	       mlxsw_sp_qdisc->handle == handle;
 }
 
+static struct mlxsw_sp_qdisc *
+mlxsw_sp_qdisc_find(struct mlxsw_sp_port *mlxsw_sp_port, u32 parent,
+		    bool root_only)
+{
+	int tclass, child_index;
+
+	if (parent == TC_H_ROOT)
+		return mlxsw_sp_port->root_qdisc;
+
+	if (root_only || !mlxsw_sp_port->root_qdisc ||
+	    !mlxsw_sp_port->root_qdisc->ops ||
+	    TC_H_MAJ(parent) != mlxsw_sp_port->root_qdisc->handle ||
+	    TC_H_MIN(parent) > IEEE_8021QAZ_MAX_TCS)
+		return NULL;
+
+	child_index = TC_H_MIN(parent);
+	tclass = MLXSW_SP_PRIO_CHILD_TO_TCLASS(child_index);
+	return &mlxsw_sp_port->tclass_qdiscs[tclass];
+}
+
+static struct mlxsw_sp_qdisc *
+mlxsw_sp_qdisc_find_by_handle(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle)
+{
+	int i;
+
+	if (mlxsw_sp_port->root_qdisc->handle == handle)
+		return mlxsw_sp_port->root_qdisc;
+
+	if (mlxsw_sp_port->root_qdisc->handle == TC_H_UNSPEC)
+		return NULL;
+
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+		if (mlxsw_sp_port->tclass_qdiscs[i].handle == handle)
+			return &mlxsw_sp_port->tclass_qdiscs[i];
+
+	return NULL;
+}
+
 static int
 mlxsw_sp_qdisc_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
 		       struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
@@ -185,6 +226,23 @@ mlxsw_sp_qdisc_get_xstats(struct mlxsw_sp_port *mlxsw_sp_port,
 	return -EOPNOTSUPP;
 }
 
+static void
+mlxsw_sp_qdisc_bstats_per_priority_get(struct mlxsw_sp_port_xstats *xstats,
+				       u8 prio_bitmap, u64 *tx_packets,
+				       u64 *tx_bytes)
+{
+	int i;
+
+	*tx_packets = 0;
+	*tx_bytes = 0;
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+		if (prio_bitmap & BIT(i)) {
+			*tx_packets += xstats->tx_packets[i];
+			*tx_bytes += xstats->tx_bytes[i];
+		}
+	}
+}
+
 static int
 mlxsw_sp_tclass_congestion_enable(struct mlxsw_sp_port *mlxsw_sp_port,
 				  int tclass_num, u32 min, u32 max,
@@ -230,17 +288,16 @@ mlxsw_sp_setup_tc_qdisc_red_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port,
 	u8 tclass_num = mlxsw_sp_qdisc->tclass_num;
 	struct mlxsw_sp_qdisc_stats *stats_base;
 	struct mlxsw_sp_port_xstats *xstats;
-	struct rtnl_link_stats64 *stats;
 	struct red_stats *red_base;
 
 	xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
-	stats = &mlxsw_sp_port->periodic_hw_stats.stats;
 	stats_base = &mlxsw_sp_qdisc->stats_base;
 	red_base = &mlxsw_sp_qdisc->xstats_base.red;
 
-	stats_base->tx_packets = stats->tx_packets;
-	stats_base->tx_bytes = stats->tx_bytes;
-
+	mlxsw_sp_qdisc_bstats_per_priority_get(xstats,
+					       mlxsw_sp_qdisc->prio_bitmap,
+					       &stats_base->tx_packets,
+					       &stats_base->tx_bytes);
 	red_base->prob_mark = xstats->ecn;
 	red_base->prob_drop = xstats->wred_drop[tclass_num];
 	red_base->pdrop = xstats->tail_drop[tclass_num];
@@ -255,6 +312,12 @@ static int
 mlxsw_sp_qdisc_red_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
 			   struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
 {
+	struct mlxsw_sp_qdisc *root_qdisc = mlxsw_sp_port->root_qdisc;
+
+	if (root_qdisc != mlxsw_sp_qdisc)
+		root_qdisc->stats_base.backlog -=
+					mlxsw_sp_qdisc->stats_base.backlog;
+
 	return mlxsw_sp_tclass_congestion_disable(mlxsw_sp_port,
 						  mlxsw_sp_qdisc->tclass_num);
 }
@@ -319,6 +382,7 @@ mlxsw_sp_qdisc_red_unoffload(struct mlxsw_sp_port *mlxsw_sp_port,
 	backlog = mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp,
 				       mlxsw_sp_qdisc->stats_base.backlog);
 	p->qstats->backlog -= backlog;
+	mlxsw_sp_qdisc->stats_base.backlog = 0;
 }
 
 static int
@@ -357,14 +421,16 @@ mlxsw_sp_qdisc_get_red_stats(struct mlxsw_sp_port *mlxsw_sp_port,
 	u8 tclass_num = mlxsw_sp_qdisc->tclass_num;
 	struct mlxsw_sp_qdisc_stats *stats_base;
 	struct mlxsw_sp_port_xstats *xstats;
-	struct rtnl_link_stats64 *stats;
 
 	xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
-	stats = &mlxsw_sp_port->periodic_hw_stats.stats;
 	stats_base = &mlxsw_sp_qdisc->stats_base;
 
-	tx_bytes = stats->tx_bytes - stats_base->tx_bytes;
-	tx_packets = stats->tx_packets - stats_base->tx_packets;
+	mlxsw_sp_qdisc_bstats_per_priority_get(xstats,
+					       mlxsw_sp_qdisc->prio_bitmap,
+					       &tx_packets, &tx_bytes);
+	tx_bytes = tx_bytes - stats_base->tx_bytes;
+	tx_packets = tx_packets - stats_base->tx_packets;
+
 	overlimits = xstats->wred_drop[tclass_num] + xstats->ecn -
 		     stats_base->overlimits;
 	drops = xstats->wred_drop[tclass_num] + xstats->tail_drop[tclass_num] -
@@ -406,11 +472,10 @@ int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port,
 {
 	struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
 
-	if (p->parent != TC_H_ROOT)
+	mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, false);
+	if (!mlxsw_sp_qdisc)
 		return -EOPNOTSUPP;
 
-	mlxsw_sp_qdisc = mlxsw_sp_port->root_qdisc;
-
 	if (p->command == TC_RED_REPLACE)
 		return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle,
 					      mlxsw_sp_qdisc,
@@ -441,9 +506,13 @@ mlxsw_sp_qdisc_prio_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
 {
 	int i;
 
-	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
 		mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i,
 					  MLXSW_SP_PORT_DEFAULT_TCLASS);
+		mlxsw_sp_qdisc_destroy(mlxsw_sp_port,
+				       &mlxsw_sp_port->tclass_qdiscs[i]);
+		mlxsw_sp_port->tclass_qdiscs[i].prio_bitmap = 0;
+	}
 
 	return 0;
 }
@@ -467,16 +536,41 @@ mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port,
 			    void *params)
 {
 	struct tc_prio_qopt_offload_params *p = params;
-	int tclass, i;
+	struct mlxsw_sp_qdisc *child_qdisc;
+	int tclass, i, band, backlog;
+	u8 old_priomap;
 	int err;
 
-	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
-		tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(p->priomap[i]);
-		err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, tclass);
-		if (err)
-			return err;
+	for (band = 0; band < p->bands; band++) {
+		tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band);
+		child_qdisc = &mlxsw_sp_port->tclass_qdiscs[tclass];
+		old_priomap = child_qdisc->prio_bitmap;
+		child_qdisc->prio_bitmap = 0;
+		for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+			if (p->priomap[i] == band) {
+				child_qdisc->prio_bitmap |= BIT(i);
+				if (BIT(i) & old_priomap)
+					continue;
+				err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port,
+								i, tclass);
+				if (err)
+					return err;
+			}
+		}
+		if (old_priomap != child_qdisc->prio_bitmap &&
+		    child_qdisc->ops && child_qdisc->ops->clean_stats) {
+			backlog = child_qdisc->stats_base.backlog;
+			child_qdisc->ops->clean_stats(mlxsw_sp_port,
+						      child_qdisc);
+			child_qdisc->stats_base.backlog = backlog;
+		}
+	}
+	for (; band < IEEE_8021QAZ_MAX_TCS; band++) {
+		tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band);
+		child_qdisc = &mlxsw_sp_port->tclass_qdiscs[tclass];
+		child_qdisc->prio_bitmap = 0;
+		mlxsw_sp_qdisc_destroy(mlxsw_sp_port, child_qdisc);
 	}
-
 	return 0;
 }
 
@@ -513,6 +607,7 @@ mlxsw_sp_qdisc_get_prio_stats(struct mlxsw_sp_port *mlxsw_sp_port,
 
 	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
 		drops += xstats->tail_drop[i];
+		drops += xstats->wred_drop[i];
 		backlog += xstats->backlog[i];
 	}
 	drops = drops - stats_base->drops;
@@ -548,8 +643,10 @@ mlxsw_sp_setup_tc_qdisc_prio_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port,
 	stats_base->tx_bytes = stats->tx_bytes;
 
 	stats_base->drops = 0;
-	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
 		stats_base->drops += xstats->tail_drop[i];
+		stats_base->drops += xstats->wred_drop[i];
+	}
 
 	mlxsw_sp_qdisc->stats_base.backlog = 0;
 }
@@ -564,15 +661,48 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_prio = {
 	.clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats,
 };
 
+/* Grafting is not supported in mlxsw. It will result in un-offloading of the
+ * grafted qdisc as well as the qdisc in the qdisc new location.
+ * (However, if the graft is to the location where the qdisc is already at, it
+ * will be ignored completely and won't cause un-offloading).
+ */
+static int
+mlxsw_sp_qdisc_prio_graft(struct mlxsw_sp_port *mlxsw_sp_port,
+			  struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+			  struct tc_prio_qopt_offload_graft_params *p)
+{
+	int tclass_num = MLXSW_SP_PRIO_BAND_TO_TCLASS(p->band);
+	struct mlxsw_sp_qdisc *old_qdisc;
+
+	/* Check if the grafted qdisc is already in its "new" location. If so -
+	 * nothing needs to be done.
+	 */
+	if (p->band < IEEE_8021QAZ_MAX_TCS &&
+	    mlxsw_sp_port->tclass_qdiscs[tclass_num].handle == p->child_handle)
+		return 0;
+
+	/* See if the grafted qdisc is already offloaded on any tclass. If so,
+	 * unoffload it.
+	 */
+	old_qdisc = mlxsw_sp_qdisc_find_by_handle(mlxsw_sp_port,
+						  p->child_handle);
+	if (old_qdisc)
+		mlxsw_sp_qdisc_destroy(mlxsw_sp_port, old_qdisc);
+
+	mlxsw_sp_qdisc_destroy(mlxsw_sp_port,
+			       &mlxsw_sp_port->tclass_qdiscs[tclass_num]);
+	return -EOPNOTSUPP;
+}
+
 int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
 			   struct tc_prio_qopt_offload *p)
 {
 	struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
 
-	if (p->parent != TC_H_ROOT)
+	mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, true);
+	if (!mlxsw_sp_qdisc)
 		return -EOPNOTSUPP;
 
-	mlxsw_sp_qdisc = mlxsw_sp_port->root_qdisc;
 	if (p->command == TC_PRIO_REPLACE)
 		return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle,
 					      mlxsw_sp_qdisc,
@@ -589,6 +719,9 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
 	case TC_PRIO_STATS:
 		return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc,
 						&p->stats);
+	case TC_PRIO_GRAFT:
+		return mlxsw_sp_qdisc_prio_graft(mlxsw_sp_port, mlxsw_sp_qdisc,
+						 &p->graft_params);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -596,17 +729,36 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
 
 int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port)
 {
-	mlxsw_sp_port->root_qdisc = kzalloc(sizeof(*mlxsw_sp_port->root_qdisc),
-					    GFP_KERNEL);
-	if (!mlxsw_sp_port->root_qdisc)
-		return -ENOMEM;
+	struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
+	int i;
 
+	mlxsw_sp_qdisc = kzalloc(sizeof(*mlxsw_sp_qdisc), GFP_KERNEL);
+	if (!mlxsw_sp_qdisc)
+		goto err_root_qdisc_init;
+
+	mlxsw_sp_port->root_qdisc = mlxsw_sp_qdisc;
+	mlxsw_sp_port->root_qdisc->prio_bitmap = 0xff;
 	mlxsw_sp_port->root_qdisc->tclass_num = MLXSW_SP_PORT_DEFAULT_TCLASS;
 
+	mlxsw_sp_qdisc = kzalloc(sizeof(*mlxsw_sp_qdisc) * IEEE_8021QAZ_MAX_TCS,
+				 GFP_KERNEL);
+	if (!mlxsw_sp_qdisc)
+		goto err_tclass_qdiscs_init;
+
+	mlxsw_sp_port->tclass_qdiscs = mlxsw_sp_qdisc;
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+		mlxsw_sp_port->tclass_qdiscs[i].tclass_num = i;
+
 	return 0;
+
+err_tclass_qdiscs_init:
+	kfree(mlxsw_sp_port->root_qdisc);
+err_root_qdisc_init:
+	return -ENOMEM;
 }
 
 void mlxsw_sp_tc_qdisc_fini(struct mlxsw_sp_port *mlxsw_sp_port)
 {
+	kfree(mlxsw_sp_port->tclass_qdiscs);
 	kfree(mlxsw_sp_port->root_qdisc);
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index f7948e983637..921bd1075edf 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -1,10 +1,10 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
- * Copyright (c) 2016-2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
  * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com>
  * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
- * Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
+ * Copyright (c) 2017-2018 Petr Machata <petrm@mellanox.com>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -70,6 +70,7 @@
 #include "spectrum_mr.h"
 #include "spectrum_mr_tcam.h"
 #include "spectrum_router.h"
+#include "spectrum_span.h"
 
 struct mlxsw_sp_fib;
 struct mlxsw_sp_vr;
@@ -796,7 +797,7 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
 
 	vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
 	if (!vr) {
-		NL_SET_ERR_MSG(extack, "spectrum: Exceeded number of supported virtual routers");
+		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported virtual routers");
 		return ERR_PTR(-EBUSY);
 	}
 	fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
@@ -1024,9 +1025,11 @@ mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
 			  enum mlxsw_sp_ipip_type ipipt,
 			  struct net_device *ol_dev)
 {
+	const struct mlxsw_sp_ipip_ops *ipip_ops;
 	struct mlxsw_sp_ipip_entry *ipip_entry;
 	struct mlxsw_sp_ipip_entry *ret = NULL;
 
+	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
 	ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
 	if (!ipip_entry)
 		return ERR_PTR(-ENOMEM);
@@ -1040,7 +1043,15 @@ mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
 
 	ipip_entry->ipipt = ipipt;
 	ipip_entry->ol_dev = ol_dev;
-	ipip_entry->parms = mlxsw_sp_ipip_netdev_parms(ol_dev);
+
+	switch (ipip_ops->ul_proto) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		ipip_entry->parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
+		break;
+	case MLXSW_SP_L3_PROTO_IPV6:
+		WARN_ON(1);
+		break;
+	}
 
 	return ipip_entry;
 
@@ -2320,6 +2331,8 @@ static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
 	read_unlock_bh(&n->lock);
 
 	rtnl_lock();
+	mlxsw_sp_span_respin(mlxsw_sp);
+
 	entry_connected = nud_state & NUD_VALID && !dead;
 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
 	if (!entry_connected && !neigh_entry)
@@ -2417,7 +2430,8 @@ static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
 		mlxsw_core_schedule_work(&net_work->work);
 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
 		break;
-	case NETEVENT_MULTIPATH_HASH_UPDATE:
+	case NETEVENT_IPV4_MPATH_HASH_UPDATE:
+	case NETEVENT_IPV6_MPATH_HASH_UPDATE:
 		net = ptr;
 
 		if (!net_eq(net, &init_net))
@@ -5579,6 +5593,8 @@ static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
 
 	/* Protect internal structures from changes */
 	rtnl_lock();
+	mlxsw_sp_span_respin(mlxsw_sp);
+
 	switch (fib_work->event) {
 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
@@ -5621,6 +5637,8 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
 	int err;
 
 	rtnl_lock();
+	mlxsw_sp_span_respin(mlxsw_sp);
+
 	switch (fib_work->event) {
 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
 	case FIB_EVENT_ENTRY_ADD:
@@ -5793,7 +5811,7 @@ static int mlxsw_sp_router_fib_rule_event(unsigned long event,
 	}
 
 	if (err < 0)
-		NL_SET_ERR_MSG(extack, "spectrum: FIB rules not supported. Aborting offload");
+		NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported. Aborting offload");
 
 	return err;
 }
@@ -6032,7 +6050,7 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
 
 	err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
 	if (err) {
-		NL_SET_ERR_MSG(extack, "spectrum: Exceeded number of supported router interfaces");
+		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
 		goto err_rif_index_alloc;
 	}
 
@@ -7013,13 +7031,25 @@ static void mlxsw_sp_mp4_hash_init(char *recr2_pl)
 
 static void mlxsw_sp_mp6_hash_init(char *recr2_pl)
 {
+	bool only_l3 = !ip6_multipath_hash_policy(&init_net);
+
 	mlxsw_sp_mp_hash_header_set(recr2_pl,
 				    MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
 	mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
 	mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
-	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
+	if (only_l3) {
+		mlxsw_sp_mp_hash_field_set(recr2_pl,
+					   MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
+	} else {
+		mlxsw_sp_mp_hash_header_set(recr2_pl,
+					    MLXSW_REG_RECR2_TCP_UDP_EN_IPV6);
+		mlxsw_sp_mp_hash_field_set(recr2_pl,
+					   MLXSW_REG_RECR2_TCP_UDP_SPORT);
+		mlxsw_sp_mp_hash_field_set(recr2_pl,
+					   MLXSW_REG_RECR2_TCP_UDP_DPORT);
+	}
 }
 
 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
new file mode 100644
index 000000000000..ae22a3daffbf
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -0,0 +1,804 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/mlxsw_span.c
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2018 Petr Machata <petrm@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/list.h>
+#include <net/arp.h>
+#include <net/gre.h>
+#include <net/ndisc.h>
+#include <net/ip6_tunnel.h>
+
+#include "spectrum.h"
+#include "spectrum_span.h"
+#include "spectrum_ipip.h"
+
+int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp)
+{
+	int i;
+
+	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_SPAN))
+		return -EIO;
+
+	mlxsw_sp->span.entries_count = MLXSW_CORE_RES_GET(mlxsw_sp->core,
+							  MAX_SPAN);
+	mlxsw_sp->span.entries = kcalloc(mlxsw_sp->span.entries_count,
+					 sizeof(struct mlxsw_sp_span_entry),
+					 GFP_KERNEL);
+	if (!mlxsw_sp->span.entries)
+		return -ENOMEM;
+
+	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+		struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+
+		INIT_LIST_HEAD(&curr->bound_ports_list);
+		curr->id = i;
+	}
+
+	return 0;
+}
+
+void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp)
+{
+	int i;
+
+	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+		struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+
+		WARN_ON_ONCE(!list_empty(&curr->bound_ports_list));
+	}
+	kfree(mlxsw_sp->span.entries);
+}
+
+static int
+mlxsw_sp_span_entry_phys_parms(const struct net_device *to_dev,
+			       struct mlxsw_sp_span_parms *sparmsp)
+{
+	sparmsp->dest_port = netdev_priv(to_dev);
+	return 0;
+}
+
+static int
+mlxsw_sp_span_entry_phys_configure(struct mlxsw_sp_span_entry *span_entry,
+				   struct mlxsw_sp_span_parms sparms)
+{
+	struct mlxsw_sp_port *dest_port = sparms.dest_port;
+	struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp;
+	u8 local_port = dest_port->local_port;
+	char mpat_pl[MLXSW_REG_MPAT_LEN];
+	int pa_id = span_entry->id;
+
+	/* Create a new port analayzer entry for local_port. */
+	mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true,
+			    MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH);
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+}
+
+static void
+mlxsw_sp_span_entry_deconfigure_common(struct mlxsw_sp_span_entry *span_entry,
+				       enum mlxsw_reg_mpat_span_type span_type)
+{
+	struct mlxsw_sp_port *dest_port = span_entry->parms.dest_port;
+	struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp;
+	u8 local_port = dest_port->local_port;
+	char mpat_pl[MLXSW_REG_MPAT_LEN];
+	int pa_id = span_entry->id;
+
+	mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, false, span_type);
+	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+}
+
+static void
+mlxsw_sp_span_entry_phys_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+	mlxsw_sp_span_entry_deconfigure_common(span_entry,
+					    MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH);
+}
+
+static const
+struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_phys = {
+	.can_handle = mlxsw_sp_port_dev_check,
+	.parms = mlxsw_sp_span_entry_phys_parms,
+	.configure = mlxsw_sp_span_entry_phys_configure,
+	.deconfigure = mlxsw_sp_span_entry_phys_deconfigure,
+};
+
+static int mlxsw_sp_span_dmac(struct neigh_table *tbl,
+			      const void *pkey,
+			      struct net_device *l3edev,
+			      unsigned char dmac[ETH_ALEN])
+{
+	struct neighbour *neigh = neigh_lookup(tbl, pkey, l3edev);
+	int err = 0;
+
+	if (!neigh) {
+		neigh = neigh_create(tbl, pkey, l3edev);
+		if (IS_ERR(neigh))
+			return PTR_ERR(neigh);
+	}
+
+	neigh_event_send(neigh, NULL);
+
+	read_lock_bh(&neigh->lock);
+	if ((neigh->nud_state & NUD_VALID) && !neigh->dead)
+		memcpy(dmac, neigh->ha, ETH_ALEN);
+	else
+		err = -ENOENT;
+	read_unlock_bh(&neigh->lock);
+
+	neigh_release(neigh);
+	return err;
+}
+
+static int
+mlxsw_sp_span_entry_unoffloadable(struct mlxsw_sp_span_parms *sparmsp)
+{
+	sparmsp->dest_port = NULL;
+	return 0;
+}
+
+static __maybe_unused int
+mlxsw_sp_span_entry_tunnel_parms_common(struct net_device *l3edev,
+					union mlxsw_sp_l3addr saddr,
+					union mlxsw_sp_l3addr daddr,
+					union mlxsw_sp_l3addr gw,
+					__u8 ttl,
+					struct neigh_table *tbl,
+					struct mlxsw_sp_span_parms *sparmsp)
+{
+	unsigned char dmac[ETH_ALEN];
+
+	if (mlxsw_sp_l3addr_is_zero(gw))
+		gw = daddr;
+
+	if (!l3edev || !mlxsw_sp_port_dev_check(l3edev) ||
+	    mlxsw_sp_span_dmac(tbl, &gw, l3edev, dmac))
+		return mlxsw_sp_span_entry_unoffloadable(sparmsp);
+
+	sparmsp->dest_port = netdev_priv(l3edev);
+	sparmsp->ttl = ttl;
+	memcpy(sparmsp->dmac, dmac, ETH_ALEN);
+	memcpy(sparmsp->smac, l3edev->dev_addr, ETH_ALEN);
+	sparmsp->saddr = saddr;
+	sparmsp->daddr = daddr;
+	return 0;
+}
+
+#if IS_ENABLED(CONFIG_NET_IPGRE)
+static struct net_device *
+mlxsw_sp_span_gretap4_route(const struct net_device *to_dev,
+			    __be32 *saddrp, __be32 *daddrp)
+{
+	struct ip_tunnel *tun = netdev_priv(to_dev);
+	struct net_device *dev = NULL;
+	struct ip_tunnel_parm parms;
+	struct rtable *rt = NULL;
+	struct flowi4 fl4;
+
+	/* We assume "dev" stays valid after rt is put. */
+	ASSERT_RTNL();
+
+	parms = mlxsw_sp_ipip_netdev_parms4(to_dev);
+	ip_tunnel_init_flow(&fl4, parms.iph.protocol, *daddrp, *saddrp,
+			    0, 0, parms.link, tun->fwmark);
+
+	rt = ip_route_output_key(tun->net, &fl4);
+	if (IS_ERR(rt))
+		return NULL;
+
+	if (rt->rt_type != RTN_UNICAST)
+		goto out;
+
+	dev = rt->dst.dev;
+	*saddrp = fl4.saddr;
+	*daddrp = rt->rt_gateway;
+
+out:
+	ip_rt_put(rt);
+	return dev;
+}
+
+static int
+mlxsw_sp_span_entry_gretap4_parms(const struct net_device *to_dev,
+				  struct mlxsw_sp_span_parms *sparmsp)
+{
+	struct ip_tunnel_parm tparm = mlxsw_sp_ipip_netdev_parms4(to_dev);
+	union mlxsw_sp_l3addr saddr = { .addr4 = tparm.iph.saddr };
+	union mlxsw_sp_l3addr daddr = { .addr4 = tparm.iph.daddr };
+	bool inherit_tos = tparm.iph.tos & 0x1;
+	bool inherit_ttl = !tparm.iph.ttl;
+	union mlxsw_sp_l3addr gw = daddr;
+	struct net_device *l3edev;
+
+	if (!(to_dev->flags & IFF_UP) ||
+	    /* Reject tunnels with GRE keys, checksums, etc. */
+	    tparm.i_flags || tparm.o_flags ||
+	    /* Require a fixed TTL and a TOS copied from the mirrored packet. */
+	    inherit_ttl || !inherit_tos ||
+	    /* A destination address may not be "any". */
+	    mlxsw_sp_l3addr_is_zero(daddr))
+		return mlxsw_sp_span_entry_unoffloadable(sparmsp);
+
+	l3edev = mlxsw_sp_span_gretap4_route(to_dev, &saddr.addr4, &gw.addr4);
+	return mlxsw_sp_span_entry_tunnel_parms_common(l3edev, saddr, daddr, gw,
+						       tparm.iph.ttl,
+						       &arp_tbl, sparmsp);
+}
+
+static int
+mlxsw_sp_span_entry_gretap4_configure(struct mlxsw_sp_span_entry *span_entry,
+				      struct mlxsw_sp_span_parms sparms)
+{
+	struct mlxsw_sp_port *dest_port = sparms.dest_port;
+	struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp;
+	u8 local_port = dest_port->local_port;
+	char mpat_pl[MLXSW_REG_MPAT_LEN];
+	int pa_id = span_entry->id;
+
+	/* Create a new port analayzer entry for local_port. */
+	mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true,
+			    MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3);
+	mlxsw_reg_mpat_eth_rspan_l2_pack(mpat_pl,
+				    MLXSW_REG_MPAT_ETH_RSPAN_VERSION_NO_HEADER,
+				    sparms.dmac, false);
+	mlxsw_reg_mpat_eth_rspan_l3_ipv4_pack(mpat_pl,
+					      sparms.ttl, sparms.smac,
+					      be32_to_cpu(sparms.saddr.addr4),
+					      be32_to_cpu(sparms.daddr.addr4));
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+}
+
+static void
+mlxsw_sp_span_entry_gretap4_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+	mlxsw_sp_span_entry_deconfigure_common(span_entry,
+					MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3);
+}
+
+static const struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_gretap4 = {
+	.can_handle = is_gretap_dev,
+	.parms = mlxsw_sp_span_entry_gretap4_parms,
+	.configure = mlxsw_sp_span_entry_gretap4_configure,
+	.deconfigure = mlxsw_sp_span_entry_gretap4_deconfigure,
+};
+#endif
+
+#if IS_ENABLED(CONFIG_IPV6_GRE)
+static struct net_device *
+mlxsw_sp_span_gretap6_route(const struct net_device *to_dev,
+			    struct in6_addr *saddrp,
+			    struct in6_addr *daddrp)
+{
+	struct ip6_tnl *t = netdev_priv(to_dev);
+	struct flowi6 fl6 = t->fl.u.ip6;
+	struct net_device *dev = NULL;
+	struct dst_entry *dst;
+	struct rt6_info *rt6;
+
+	/* We assume "dev" stays valid after dst is released. */
+	ASSERT_RTNL();
+
+	fl6.flowi6_mark = t->parms.fwmark;
+	if (!ip6_tnl_xmit_ctl(t, &fl6.saddr, &fl6.daddr))
+		return NULL;
+
+	dst = ip6_route_output(t->net, NULL, &fl6);
+	if (!dst || dst->error)
+		goto out;
+
+	rt6 = container_of(dst, struct rt6_info, dst);
+
+	dev = dst->dev;
+	*saddrp = fl6.saddr;
+	*daddrp = rt6->rt6i_gateway;
+
+out:
+	dst_release(dst);
+	return dev;
+}
+
+static int
+mlxsw_sp_span_entry_gretap6_parms(const struct net_device *to_dev,
+				  struct mlxsw_sp_span_parms *sparmsp)
+{
+	struct __ip6_tnl_parm tparm = mlxsw_sp_ipip_netdev_parms6(to_dev);
+	bool inherit_tos = tparm.flags & IP6_TNL_F_USE_ORIG_TCLASS;
+	union mlxsw_sp_l3addr saddr = { .addr6 = tparm.laddr };
+	union mlxsw_sp_l3addr daddr = { .addr6 = tparm.raddr };
+	bool inherit_ttl = !tparm.hop_limit;
+	union mlxsw_sp_l3addr gw = daddr;
+	struct net_device *l3edev;
+
+	if (!(to_dev->flags & IFF_UP) ||
+	    /* Reject tunnels with GRE keys, checksums, etc. */
+	    tparm.i_flags || tparm.o_flags ||
+	    /* Require a fixed TTL and a TOS copied from the mirrored packet. */
+	    inherit_ttl || !inherit_tos ||
+	    /* A destination address may not be "any". */
+	    mlxsw_sp_l3addr_is_zero(daddr))
+		return mlxsw_sp_span_entry_unoffloadable(sparmsp);
+
+	l3edev = mlxsw_sp_span_gretap6_route(to_dev, &saddr.addr6, &gw.addr6);
+	return mlxsw_sp_span_entry_tunnel_parms_common(l3edev, saddr, daddr, gw,
+						       tparm.hop_limit,
+						       &nd_tbl, sparmsp);
+}
+
+static int
+mlxsw_sp_span_entry_gretap6_configure(struct mlxsw_sp_span_entry *span_entry,
+				      struct mlxsw_sp_span_parms sparms)
+{
+	struct mlxsw_sp_port *dest_port = sparms.dest_port;
+	struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp;
+	u8 local_port = dest_port->local_port;
+	char mpat_pl[MLXSW_REG_MPAT_LEN];
+	int pa_id = span_entry->id;
+
+	/* Create a new port analayzer entry for local_port. */
+	mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true,
+			    MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3);
+	mlxsw_reg_mpat_eth_rspan_l2_pack(mpat_pl,
+				    MLXSW_REG_MPAT_ETH_RSPAN_VERSION_NO_HEADER,
+				    sparms.dmac, false);
+	mlxsw_reg_mpat_eth_rspan_l3_ipv6_pack(mpat_pl, sparms.ttl, sparms.smac,
+					      sparms.saddr.addr6,
+					      sparms.daddr.addr6);
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+}
+
+static void
+mlxsw_sp_span_entry_gretap6_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+	mlxsw_sp_span_entry_deconfigure_common(span_entry,
+					MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3);
+}
+
+static const
+struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_gretap6 = {
+	.can_handle = is_ip6gretap_dev,
+	.parms = mlxsw_sp_span_entry_gretap6_parms,
+	.configure = mlxsw_sp_span_entry_gretap6_configure,
+	.deconfigure = mlxsw_sp_span_entry_gretap6_deconfigure,
+};
+#endif
+
+static const
+struct mlxsw_sp_span_entry_ops *const mlxsw_sp_span_entry_types[] = {
+	&mlxsw_sp_span_entry_ops_phys,
+#if IS_ENABLED(CONFIG_NET_IPGRE)
+	&mlxsw_sp_span_entry_ops_gretap4,
+#endif
+#if IS_ENABLED(CONFIG_IPV6_GRE)
+	&mlxsw_sp_span_entry_ops_gretap6,
+#endif
+};
+
+static int
+mlxsw_sp_span_entry_nop_parms(const struct net_device *to_dev,
+			      struct mlxsw_sp_span_parms *sparmsp)
+{
+	return mlxsw_sp_span_entry_unoffloadable(sparmsp);
+}
+
+static int
+mlxsw_sp_span_entry_nop_configure(struct mlxsw_sp_span_entry *span_entry,
+				  struct mlxsw_sp_span_parms sparms)
+{
+	return 0;
+}
+
+static void
+mlxsw_sp_span_entry_nop_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+}
+
+static const struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_nop = {
+	.parms = mlxsw_sp_span_entry_nop_parms,
+	.configure = mlxsw_sp_span_entry_nop_configure,
+	.deconfigure = mlxsw_sp_span_entry_nop_deconfigure,
+};
+
+static void
+mlxsw_sp_span_entry_configure(struct mlxsw_sp *mlxsw_sp,
+			      struct mlxsw_sp_span_entry *span_entry,
+			      struct mlxsw_sp_span_parms sparms)
+{
+	if (sparms.dest_port) {
+		if (sparms.dest_port->mlxsw_sp != mlxsw_sp) {
+			netdev_err(span_entry->to_dev, "Cannot mirror to %s, which belongs to a different mlxsw instance",
+				   sparms.dest_port->dev->name);
+			sparms.dest_port = NULL;
+		} else if (span_entry->ops->configure(span_entry, sparms)) {
+			netdev_err(span_entry->to_dev, "Failed to offload mirror to %s",
+				   sparms.dest_port->dev->name);
+			sparms.dest_port = NULL;
+		}
+	}
+
+	span_entry->parms = sparms;
+}
+
+static void
+mlxsw_sp_span_entry_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+	if (span_entry->parms.dest_port)
+		span_entry->ops->deconfigure(span_entry);
+}
+
+static struct mlxsw_sp_span_entry *
+mlxsw_sp_span_entry_create(struct mlxsw_sp *mlxsw_sp,
+			   const struct net_device *to_dev,
+			   const struct mlxsw_sp_span_entry_ops *ops,
+			   struct mlxsw_sp_span_parms sparms)
+{
+	struct mlxsw_sp_span_entry *span_entry = NULL;
+	int i;
+
+	/* find a free entry to use */
+	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+		if (!mlxsw_sp->span.entries[i].ref_count) {
+			span_entry = &mlxsw_sp->span.entries[i];
+			break;
+		}
+	}
+	if (!span_entry)
+		return NULL;
+
+	span_entry->ops = ops;
+	span_entry->ref_count = 1;
+	span_entry->to_dev = to_dev;
+	mlxsw_sp_span_entry_configure(mlxsw_sp, span_entry, sparms);
+
+	return span_entry;
+}
+
+static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp_span_entry *span_entry)
+{
+	mlxsw_sp_span_entry_deconfigure(span_entry);
+}
+
+struct mlxsw_sp_span_entry *
+mlxsw_sp_span_entry_find_by_port(struct mlxsw_sp *mlxsw_sp,
+				 const struct net_device *to_dev)
+{
+	int i;
+
+	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+		struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+
+		if (curr->ref_count && curr->to_dev == to_dev)
+			return curr;
+	}
+	return NULL;
+}
+
+void mlxsw_sp_span_entry_invalidate(struct mlxsw_sp *mlxsw_sp,
+				    struct mlxsw_sp_span_entry *span_entry)
+{
+	mlxsw_sp_span_entry_deconfigure(span_entry);
+	span_entry->ops = &mlxsw_sp_span_entry_ops_nop;
+}
+
+static struct mlxsw_sp_span_entry *
+mlxsw_sp_span_entry_find_by_id(struct mlxsw_sp *mlxsw_sp, int span_id)
+{
+	int i;
+
+	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+		struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+
+		if (curr->ref_count && curr->id == span_id)
+			return curr;
+	}
+	return NULL;
+}
+
+static struct mlxsw_sp_span_entry *
+mlxsw_sp_span_entry_get(struct mlxsw_sp *mlxsw_sp,
+			const struct net_device *to_dev,
+			const struct mlxsw_sp_span_entry_ops *ops,
+			struct mlxsw_sp_span_parms sparms)
+{
+	struct mlxsw_sp_span_entry *span_entry;
+
+	span_entry = mlxsw_sp_span_entry_find_by_port(mlxsw_sp, to_dev);
+	if (span_entry) {
+		/* Already exists, just take a reference */
+		span_entry->ref_count++;
+		return span_entry;
+	}
+
+	return mlxsw_sp_span_entry_create(mlxsw_sp, to_dev, ops, sparms);
+}
+
+static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp,
+				   struct mlxsw_sp_span_entry *span_entry)
+{
+	WARN_ON(!span_entry->ref_count);
+	if (--span_entry->ref_count == 0)
+		mlxsw_sp_span_entry_destroy(span_entry);
+	return 0;
+}
+
+static bool mlxsw_sp_span_is_egress_mirror(struct mlxsw_sp_port *port)
+{
+	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
+	struct mlxsw_sp_span_inspected_port *p;
+	int i;
+
+	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+		struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+
+		list_for_each_entry(p, &curr->bound_ports_list, list)
+			if (p->local_port == port->local_port &&
+			    p->type == MLXSW_SP_SPAN_EGRESS)
+				return true;
+	}
+
+	return false;
+}
+
+static int mlxsw_sp_span_mtu_to_buffsize(const struct mlxsw_sp *mlxsw_sp,
+					 int mtu)
+{
+	return mlxsw_sp_bytes_cells(mlxsw_sp, mtu * 5 / 2) + 1;
+}
+
+int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu)
+{
+	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
+	char sbib_pl[MLXSW_REG_SBIB_LEN];
+	int err;
+
+	/* If port is egress mirrored, the shared buffer size should be
+	 * updated according to the mtu value
+	 */
+	if (mlxsw_sp_span_is_egress_mirror(port)) {
+		u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp, mtu);
+
+		mlxsw_reg_sbib_pack(sbib_pl, port->local_port, buffsize);
+		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
+		if (err) {
+			netdev_err(port->dev, "Could not update shared buffer for mirroring\n");
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static struct mlxsw_sp_span_inspected_port *
+mlxsw_sp_span_entry_bound_port_find(struct mlxsw_sp_port *port,
+				    struct mlxsw_sp_span_entry *span_entry)
+{
+	struct mlxsw_sp_span_inspected_port *p;
+
+	list_for_each_entry(p, &span_entry->bound_ports_list, list)
+		if (port->local_port == p->local_port)
+			return p;
+	return NULL;
+}
+
+static int
+mlxsw_sp_span_inspected_port_bind(struct mlxsw_sp_port *port,
+				  struct mlxsw_sp_span_entry *span_entry,
+				  enum mlxsw_sp_span_type type,
+				  bool bind)
+{
+	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
+	char mpar_pl[MLXSW_REG_MPAR_LEN];
+	int pa_id = span_entry->id;
+
+	/* bind the port to the SPAN entry */
+	mlxsw_reg_mpar_pack(mpar_pl, port->local_port,
+			    (enum mlxsw_reg_mpar_i_e)type, bind, pa_id);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpar), mpar_pl);
+}
+
+static int
+mlxsw_sp_span_inspected_port_add(struct mlxsw_sp_port *port,
+				 struct mlxsw_sp_span_entry *span_entry,
+				 enum mlxsw_sp_span_type type,
+				 bool bind)
+{
+	struct mlxsw_sp_span_inspected_port *inspected_port;
+	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
+	char sbib_pl[MLXSW_REG_SBIB_LEN];
+	int err;
+
+	/* if it is an egress SPAN, bind a shared buffer to it */
+	if (type == MLXSW_SP_SPAN_EGRESS) {
+		u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp,
+							     port->dev->mtu);
+
+		mlxsw_reg_sbib_pack(sbib_pl, port->local_port, buffsize);
+		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
+		if (err) {
+			netdev_err(port->dev, "Could not create shared buffer for mirroring\n");
+			return err;
+		}
+	}
+
+	if (bind) {
+		err = mlxsw_sp_span_inspected_port_bind(port, span_entry, type,
+							true);
+		if (err)
+			goto err_port_bind;
+	}
+
+	inspected_port = kzalloc(sizeof(*inspected_port), GFP_KERNEL);
+	if (!inspected_port) {
+		err = -ENOMEM;
+		goto err_inspected_port_alloc;
+	}
+	inspected_port->local_port = port->local_port;
+	inspected_port->type = type;
+	list_add_tail(&inspected_port->list, &span_entry->bound_ports_list);
+
+	return 0;
+
+err_inspected_port_alloc:
+	if (bind)
+		mlxsw_sp_span_inspected_port_bind(port, span_entry, type,
+						  false);
+err_port_bind:
+	if (type == MLXSW_SP_SPAN_EGRESS) {
+		mlxsw_reg_sbib_pack(sbib_pl, port->local_port, 0);
+		mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
+	}
+	return err;
+}
+
+static void
+mlxsw_sp_span_inspected_port_del(struct mlxsw_sp_port *port,
+				 struct mlxsw_sp_span_entry *span_entry,
+				 enum mlxsw_sp_span_type type,
+				 bool bind)
+{
+	struct mlxsw_sp_span_inspected_port *inspected_port;
+	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
+	char sbib_pl[MLXSW_REG_SBIB_LEN];
+
+	inspected_port = mlxsw_sp_span_entry_bound_port_find(port, span_entry);
+	if (!inspected_port)
+		return;
+
+	if (bind)
+		mlxsw_sp_span_inspected_port_bind(port, span_entry, type,
+						  false);
+	/* remove the SBIB buffer if it was egress SPAN */
+	if (type == MLXSW_SP_SPAN_EGRESS) {
+		mlxsw_reg_sbib_pack(sbib_pl, port->local_port, 0);
+		mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
+	}
+
+	mlxsw_sp_span_entry_put(mlxsw_sp, span_entry);
+
+	list_del(&inspected_port->list);
+	kfree(inspected_port);
+}
+
+static const struct mlxsw_sp_span_entry_ops *
+mlxsw_sp_span_entry_ops(struct mlxsw_sp *mlxsw_sp,
+			const struct net_device *to_dev)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(mlxsw_sp_span_entry_types); ++i)
+		if (mlxsw_sp_span_entry_types[i]->can_handle(to_dev))
+			return mlxsw_sp_span_entry_types[i];
+
+	return NULL;
+}
+
+int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
+			     const struct net_device *to_dev,
+			     enum mlxsw_sp_span_type type, bool bind,
+			     int *p_span_id)
+{
+	struct mlxsw_sp *mlxsw_sp = from->mlxsw_sp;
+	const struct mlxsw_sp_span_entry_ops *ops;
+	struct mlxsw_sp_span_parms sparms = {0};
+	struct mlxsw_sp_span_entry *span_entry;
+	int err;
+
+	ops = mlxsw_sp_span_entry_ops(mlxsw_sp, to_dev);
+	if (!ops) {
+		netdev_err(to_dev, "Cannot mirror to %s", to_dev->name);
+		return -EOPNOTSUPP;
+	}
+
+	err = ops->parms(to_dev, &sparms);
+	if (err)
+		return err;
+
+	span_entry = mlxsw_sp_span_entry_get(mlxsw_sp, to_dev, ops, sparms);
+	if (!span_entry)
+		return -ENOENT;
+
+	netdev_dbg(from->dev, "Adding inspected port to SPAN entry %d\n",
+		   span_entry->id);
+
+	err = mlxsw_sp_span_inspected_port_add(from, span_entry, type, bind);
+	if (err)
+		goto err_port_bind;
+
+	*p_span_id = span_entry->id;
+	return 0;
+
+err_port_bind:
+	mlxsw_sp_span_entry_put(mlxsw_sp, span_entry);
+	return err;
+}
+
+void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, int span_id,
+			      enum mlxsw_sp_span_type type, bool bind)
+{
+	struct mlxsw_sp_span_entry *span_entry;
+
+	span_entry = mlxsw_sp_span_entry_find_by_id(from->mlxsw_sp, span_id);
+	if (!span_entry) {
+		netdev_err(from->dev, "no span entry found\n");
+		return;
+	}
+
+	netdev_dbg(from->dev, "removing inspected port from SPAN entry %d\n",
+		   span_entry->id);
+	mlxsw_sp_span_inspected_port_del(from, span_entry, type, bind);
+}
+
+void mlxsw_sp_span_respin(struct mlxsw_sp *mlxsw_sp)
+{
+	int i;
+	int err;
+
+	ASSERT_RTNL();
+	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+		struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+		struct mlxsw_sp_span_parms sparms = {0};
+
+		if (!curr->ref_count)
+			continue;
+
+		err = curr->ops->parms(curr->to_dev, &sparms);
+		if (err)
+			continue;
+
+		if (memcmp(&sparms, &curr->parms, sizeof(sparms))) {
+			mlxsw_sp_span_entry_deconfigure(curr);
+			mlxsw_sp_span_entry_configure(mlxsw_sp, curr, sparms);
+		}
+	}
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
new file mode 100644
index 000000000000..4b87ec20e658
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
@@ -0,0 +1,107 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/mlxsw_span.h
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MLXSW_SPECTRUM_SPAN_H
+#define _MLXSW_SPECTRUM_SPAN_H
+
+#include <linux/types.h>
+#include <linux/if_ether.h>
+
+#include "spectrum_router.h"
+
+struct mlxsw_sp;
+struct mlxsw_sp_port;
+
+enum mlxsw_sp_span_type {
+	MLXSW_SP_SPAN_EGRESS,
+	MLXSW_SP_SPAN_INGRESS
+};
+
+struct mlxsw_sp_span_inspected_port {
+	struct list_head list;
+	enum mlxsw_sp_span_type type;
+	u8 local_port;
+
+	/* Whether this is a directly bound mirror (port-to-port) or an ACL. */
+	bool bound;
+};
+
+struct mlxsw_sp_span_parms {
+	struct mlxsw_sp_port *dest_port; /* NULL for unoffloaded SPAN. */
+	unsigned int ttl;
+	unsigned char dmac[ETH_ALEN];
+	unsigned char smac[ETH_ALEN];
+	union mlxsw_sp_l3addr daddr;
+	union mlxsw_sp_l3addr saddr;
+};
+
+struct mlxsw_sp_span_entry_ops;
+
+struct mlxsw_sp_span_entry {
+	const struct net_device *to_dev;
+	const struct mlxsw_sp_span_entry_ops *ops;
+	struct mlxsw_sp_span_parms parms;
+	struct list_head bound_ports_list;
+	int ref_count;
+	int id;
+};
+
+struct mlxsw_sp_span_entry_ops {
+	bool (*can_handle)(const struct net_device *to_dev);
+	int (*parms)(const struct net_device *to_dev,
+		     struct mlxsw_sp_span_parms *sparmsp);
+	int (*configure)(struct mlxsw_sp_span_entry *span_entry,
+			 struct mlxsw_sp_span_parms sparms);
+	void (*deconfigure)(struct mlxsw_sp_span_entry *span_entry);
+};
+
+int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_span_respin(struct mlxsw_sp *mlxsw_sp);
+
+int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
+			     const struct net_device *to_dev,
+			     enum mlxsw_sp_span_type type,
+			     bool bind, int *p_span_id);
+void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, int span_id,
+			      enum mlxsw_sp_span_type type, bool bind);
+struct mlxsw_sp_span_entry *
+mlxsw_sp_span_entry_find_by_port(struct mlxsw_sp *mlxsw_sp,
+				 const struct net_device *to_dev);
+
+void mlxsw_sp_span_entry_invalidate(struct mlxsw_sp *mlxsw_sp,
+				    struct mlxsw_sp_span_entry *span_entry);
+
+int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 161bcdc012f0..c11c9a635866 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -1844,7 +1844,7 @@ mlxsw_sp_bridge_8021q_port_join(struct mlxsw_sp_bridge_device *bridge_device,
 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
 
 	if (is_vlan_dev(bridge_port->dev)) {
-		NL_SET_ERR_MSG(extack, "spectrum: Can not enslave a VLAN device to a VLAN-aware bridge");
+		NL_SET_ERR_MSG_MOD(extack, "Can not enslave a VLAN device to a VLAN-aware bridge");
 		return -EINVAL;
 	}
 
@@ -1907,20 +1907,16 @@ mlxsw_sp_bridge_8021d_port_join(struct mlxsw_sp_bridge_device *bridge_device,
 				struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
+	struct net_device *dev = bridge_port->dev;
 	u16 vid;
 
-	if (!is_vlan_dev(bridge_port->dev)) {
-		NL_SET_ERR_MSG(extack, "spectrum: Only VLAN devices can be enslaved to a VLAN-unaware bridge");
-		return -EINVAL;
-	}
-	vid = vlan_dev_vlan_id(bridge_port->dev);
-
+	vid = is_vlan_dev(dev) ? vlan_dev_vlan_id(dev) : 1;
 	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
 	if (WARN_ON(!mlxsw_sp_port_vlan))
 		return -EINVAL;
 
 	if (mlxsw_sp_port_is_br_member(mlxsw_sp_port, bridge_device->dev)) {
-		NL_SET_ERR_MSG(extack, "spectrum: Can not bridge VLAN uppers of the same port");
+		NL_SET_ERR_MSG_MOD(extack, "Can not bridge VLAN uppers of the same port");
 		return -EINVAL;
 	}
 
@@ -1937,8 +1933,10 @@ mlxsw_sp_bridge_8021d_port_leave(struct mlxsw_sp_bridge_device *bridge_device,
 				 struct mlxsw_sp_port *mlxsw_sp_port)
 {
 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
-	u16 vid = vlan_dev_vlan_id(bridge_port->dev);
+	struct net_device *dev = bridge_port->dev;
+	u16 vid;
 
+	vid = is_vlan_dev(dev) ? vlan_dev_vlan_id(dev) : 1;
 	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
 	if (WARN_ON(!mlxsw_sp_port_vlan))
 		return;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
index f3c29bbf07e2..c87b0934a405 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
@@ -789,7 +789,7 @@ mlxsw_sx_port_get_link_ksettings(struct net_device *dev,
 	u32 supported, advertising, lp_advertising;
 	int err;
 
-	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port, 0);
+	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port, 0, false);
 	err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl);
 	if (err) {
 		netdev_err(dev, "Failed to get proto");
@@ -879,7 +879,7 @@ mlxsw_sx_port_set_link_ksettings(struct net_device *dev,
 		mlxsw_sx_to_ptys_advert_link(advertising) :
 		mlxsw_sx_to_ptys_speed(speed);
 
-	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port, 0);
+	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port, 0, false);
 	err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl);
 	if (err) {
 		netdev_err(dev, "Failed to get proto");
@@ -897,7 +897,7 @@ mlxsw_sx_port_set_link_ksettings(struct net_device *dev,
 		return 0;
 
 	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port,
-				eth_proto_new);
+				eth_proto_new, true);
 	err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl);
 	if (err) {
 		netdev_err(dev, "Failed to set proto admin");
@@ -1029,7 +1029,7 @@ mlxsw_sx_port_speed_by_width_set(struct mlxsw_sx_port *mlxsw_sx_port, u8 width)
 
 	eth_proto_admin = mlxsw_sx_to_ptys_upper_speed(upper_speed);
 	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port,
-				eth_proto_admin);
+				eth_proto_admin, true);
 	return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl);
 }
 
diff --git a/drivers/net/ethernet/microchip/Kconfig b/drivers/net/ethernet/microchip/Kconfig
index 36a09d94b368..71dca8bd51ac 100644
--- a/drivers/net/ethernet/microchip/Kconfig
+++ b/drivers/net/ethernet/microchip/Kconfig
@@ -42,4 +42,14 @@ config ENCX24J600
       To compile this driver as a module, choose M here. The module will be
       called encx24j600.
 
+config LAN743X
+	tristate "LAN743x support"
+	depends on PCI
+	select PHYLIB
+	---help---
+	  Support for the Microchip LAN743x PCI Express Gigabit Ethernet chip
+
+	  To compile this driver as a module, choose M here. The module will be
+	  called lan743x.
+
 endif # NET_VENDOR_MICROCHIP
diff --git a/drivers/net/ethernet/microchip/Makefile b/drivers/net/ethernet/microchip/Makefile
index ff78f621b59a..2e982cc249fb 100644
--- a/drivers/net/ethernet/microchip/Makefile
+++ b/drivers/net/ethernet/microchip/Makefile
@@ -4,3 +4,6 @@
 
 obj-$(CONFIG_ENC28J60) += enc28j60.o
 obj-$(CONFIG_ENCX24J600) += encx24j600.o encx24j600-regmap.o
+obj-$(CONFIG_LAN743X) += lan743x.o
+
+lan743x-objs := lan743x_main.o
diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
new file mode 100644
index 000000000000..dd947e4dd3ce
--- /dev/null
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -0,0 +1,2771 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/* Copyright (C) 2018 Microchip Technology Inc. */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/crc32.h>
+#include <linux/microchipphy.h>
+#include <linux/net_tstamp.h>
+#include <linux/phy.h>
+#include <linux/rtnetlink.h>
+#include <linux/iopoll.h>
+#include "lan743x_main.h"
+
+static void lan743x_pci_cleanup(struct lan743x_adapter *adapter)
+{
+	pci_release_selected_regions(adapter->pdev,
+				     pci_select_bars(adapter->pdev,
+						     IORESOURCE_MEM));
+	pci_disable_device(adapter->pdev);
+}
+
+static int lan743x_pci_init(struct lan743x_adapter *adapter,
+			    struct pci_dev *pdev)
+{
+	unsigned long bars = 0;
+	int ret;
+
+	adapter->pdev = pdev;
+	ret = pci_enable_device_mem(pdev);
+	if (ret)
+		goto return_error;
+
+	netif_info(adapter, probe, adapter->netdev,
+		   "PCI: Vendor ID = 0x%04X, Device ID = 0x%04X\n",
+		   pdev->vendor, pdev->device);
+	bars = pci_select_bars(pdev, IORESOURCE_MEM);
+	if (!test_bit(0, &bars))
+		goto disable_device;
+
+	ret = pci_request_selected_regions(pdev, bars, DRIVER_NAME);
+	if (ret)
+		goto disable_device;
+
+	pci_set_master(pdev);
+	return 0;
+
+disable_device:
+	pci_disable_device(adapter->pdev);
+
+return_error:
+	return ret;
+}
+
+static u32 lan743x_csr_read(struct lan743x_adapter *adapter, int offset)
+{
+	return ioread32(&adapter->csr.csr_address[offset]);
+}
+
+static void lan743x_csr_write(struct lan743x_adapter *adapter, int offset,
+			      u32 data)
+{
+	iowrite32(data, &adapter->csr.csr_address[offset]);
+}
+
+#define LAN743X_CSR_READ_OP(offset)	lan743x_csr_read(adapter, offset)
+
+static int lan743x_csr_light_reset(struct lan743x_adapter *adapter)
+{
+	u32 data;
+
+	data = lan743x_csr_read(adapter, HW_CFG);
+	data |= HW_CFG_LRST_;
+	lan743x_csr_write(adapter, HW_CFG, data);
+
+	return readx_poll_timeout(LAN743X_CSR_READ_OP, HW_CFG, data,
+				  !(data & HW_CFG_LRST_), 100000, 10000000);
+}
+
+static int lan743x_csr_wait_for_bit(struct lan743x_adapter *adapter,
+				    int offset, u32 bit_mask,
+				    int target_value, int usleep_min,
+				    int usleep_max, int count)
+{
+	u32 data;
+
+	return readx_poll_timeout(LAN743X_CSR_READ_OP, offset, data,
+				  target_value == ((data & bit_mask) ? 1 : 0),
+				  usleep_max, usleep_min * count);
+}
+
+static int lan743x_csr_init(struct lan743x_adapter *adapter)
+{
+	struct lan743x_csr *csr = &adapter->csr;
+	resource_size_t bar_start, bar_length;
+	int result;
+
+	bar_start = pci_resource_start(adapter->pdev, 0);
+	bar_length = pci_resource_len(adapter->pdev, 0);
+	csr->csr_address = devm_ioremap(&adapter->pdev->dev,
+					bar_start, bar_length);
+	if (!csr->csr_address) {
+		result = -ENOMEM;
+		goto clean_up;
+	}
+
+	csr->id_rev = lan743x_csr_read(adapter, ID_REV);
+	csr->fpga_rev = lan743x_csr_read(adapter, FPGA_REV);
+	netif_info(adapter, probe, adapter->netdev,
+		   "ID_REV = 0x%08X, FPGA_REV = %d.%d\n",
+		   csr->id_rev,	FPGA_REV_GET_MAJOR_(csr->fpga_rev),
+		   FPGA_REV_GET_MINOR_(csr->fpga_rev));
+	if (!ID_REV_IS_VALID_CHIP_ID_(csr->id_rev)) {
+		result = -ENODEV;
+		goto clean_up;
+	}
+
+	csr->flags = LAN743X_CSR_FLAG_SUPPORTS_INTR_AUTO_SET_CLR;
+	switch (csr->id_rev & ID_REV_CHIP_REV_MASK_) {
+	case ID_REV_CHIP_REV_A0_:
+		csr->flags |= LAN743X_CSR_FLAG_IS_A0;
+		csr->flags &= ~LAN743X_CSR_FLAG_SUPPORTS_INTR_AUTO_SET_CLR;
+		break;
+	case ID_REV_CHIP_REV_B0_:
+		csr->flags |= LAN743X_CSR_FLAG_IS_B0;
+		break;
+	}
+
+	result = lan743x_csr_light_reset(adapter);
+	if (result)
+		goto clean_up;
+	return 0;
+clean_up:
+	return result;
+}
+
+static void lan743x_intr_software_isr(void *context)
+{
+	struct lan743x_adapter *adapter = context;
+	struct lan743x_intr *intr = &adapter->intr;
+	u32 int_sts;
+
+	int_sts = lan743x_csr_read(adapter, INT_STS);
+	if (int_sts & INT_BIT_SW_GP_) {
+		lan743x_csr_write(adapter, INT_STS, INT_BIT_SW_GP_);
+		intr->software_isr_flag = 1;
+	}
+}
+
+static void lan743x_tx_isr(void *context, u32 int_sts, u32 flags)
+{
+	struct lan743x_tx *tx = context;
+	struct lan743x_adapter *adapter = tx->adapter;
+	bool enable_flag = true;
+	u32 int_en = 0;
+
+	int_en = lan743x_csr_read(adapter, INT_EN_SET);
+	if (flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CLEAR) {
+		lan743x_csr_write(adapter, INT_EN_CLR,
+				  INT_BIT_DMA_TX_(tx->channel_number));
+	}
+
+	if (int_sts & INT_BIT_DMA_TX_(tx->channel_number)) {
+		u32 ioc_bit = DMAC_INT_BIT_TX_IOC_(tx->channel_number);
+		u32 dmac_int_sts;
+		u32 dmac_int_en;
+
+		if (flags & LAN743X_VECTOR_FLAG_SOURCE_STATUS_READ)
+			dmac_int_sts = lan743x_csr_read(adapter, DMAC_INT_STS);
+		else
+			dmac_int_sts = ioc_bit;
+		if (flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CHECK)
+			dmac_int_en = lan743x_csr_read(adapter,
+						       DMAC_INT_EN_SET);
+		else
+			dmac_int_en = ioc_bit;
+
+		dmac_int_en &= ioc_bit;
+		dmac_int_sts &= dmac_int_en;
+		if (dmac_int_sts & ioc_bit) {
+			napi_schedule(&tx->napi);
+			enable_flag = false;/* poll func will enable later */
+		}
+	}
+
+	if (enable_flag)
+		/* enable isr */
+		lan743x_csr_write(adapter, INT_EN_SET,
+				  INT_BIT_DMA_TX_(tx->channel_number));
+}
+
+static void lan743x_rx_isr(void *context, u32 int_sts, u32 flags)
+{
+	struct lan743x_rx *rx = context;
+	struct lan743x_adapter *adapter = rx->adapter;
+	bool enable_flag = true;
+
+	if (flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CLEAR) {
+		lan743x_csr_write(adapter, INT_EN_CLR,
+				  INT_BIT_DMA_RX_(rx->channel_number));
+	}
+
+	if (int_sts & INT_BIT_DMA_RX_(rx->channel_number)) {
+		u32 rx_frame_bit = DMAC_INT_BIT_RXFRM_(rx->channel_number);
+		u32 dmac_int_sts;
+		u32 dmac_int_en;
+
+		if (flags & LAN743X_VECTOR_FLAG_SOURCE_STATUS_READ)
+			dmac_int_sts = lan743x_csr_read(adapter, DMAC_INT_STS);
+		else
+			dmac_int_sts = rx_frame_bit;
+		if (flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CHECK)
+			dmac_int_en = lan743x_csr_read(adapter,
+						       DMAC_INT_EN_SET);
+		else
+			dmac_int_en = rx_frame_bit;
+
+		dmac_int_en &= rx_frame_bit;
+		dmac_int_sts &= dmac_int_en;
+		if (dmac_int_sts & rx_frame_bit) {
+			napi_schedule(&rx->napi);
+			enable_flag = false;/* poll funct will enable later */
+		}
+	}
+
+	if (enable_flag) {
+		/* enable isr */
+		lan743x_csr_write(adapter, INT_EN_SET,
+				  INT_BIT_DMA_RX_(rx->channel_number));
+	}
+}
+
+static void lan743x_intr_shared_isr(void *context, u32 int_sts, u32 flags)
+{
+	struct lan743x_adapter *adapter = context;
+	unsigned int channel;
+
+	if (int_sts & INT_BIT_ALL_RX_) {
+		for (channel = 0; channel < LAN743X_USED_RX_CHANNELS;
+			channel++) {
+			u32 int_bit = INT_BIT_DMA_RX_(channel);
+
+			if (int_sts & int_bit) {
+				lan743x_rx_isr(&adapter->rx[channel],
+					       int_bit, flags);
+				int_sts &= ~int_bit;
+			}
+		}
+	}
+	if (int_sts & INT_BIT_ALL_TX_) {
+		for (channel = 0; channel < LAN743X_USED_TX_CHANNELS;
+			channel++) {
+			u32 int_bit = INT_BIT_DMA_TX_(channel);
+
+			if (int_sts & int_bit) {
+				lan743x_tx_isr(&adapter->tx[channel],
+					       int_bit, flags);
+				int_sts &= ~int_bit;
+			}
+		}
+	}
+	if (int_sts & INT_BIT_ALL_OTHER_) {
+		if (int_sts & INT_BIT_SW_GP_) {
+			lan743x_intr_software_isr(adapter);
+			int_sts &= ~INT_BIT_SW_GP_;
+		}
+	}
+	if (int_sts)
+		lan743x_csr_write(adapter, INT_EN_CLR, int_sts);
+}
+
+static irqreturn_t lan743x_intr_entry_isr(int irq, void *ptr)
+{
+	struct lan743x_vector *vector = ptr;
+	struct lan743x_adapter *adapter = vector->adapter;
+	irqreturn_t result = IRQ_NONE;
+	u32 int_enables;
+	u32 int_sts;
+
+	if (vector->flags & LAN743X_VECTOR_FLAG_SOURCE_STATUS_READ) {
+		int_sts = lan743x_csr_read(adapter, INT_STS);
+	} else if (vector->flags &
+		   (LAN743X_VECTOR_FLAG_SOURCE_STATUS_R2C |
+		   LAN743X_VECTOR_FLAG_SOURCE_ENABLE_R2C)) {
+		int_sts = lan743x_csr_read(adapter, INT_STS_R2C);
+	} else {
+		/* use mask as implied status */
+		int_sts = vector->int_mask | INT_BIT_MAS_;
+	}
+
+	if (!(int_sts & INT_BIT_MAS_))
+		goto irq_done;
+
+	if (vector->flags & LAN743X_VECTOR_FLAG_VECTOR_ENABLE_ISR_CLEAR)
+		/* disable vector interrupt */
+		lan743x_csr_write(adapter,
+				  INT_VEC_EN_CLR,
+				  INT_VEC_EN_(vector->vector_index));
+
+	if (vector->flags & LAN743X_VECTOR_FLAG_MASTER_ENABLE_CLEAR)
+		/* disable master interrupt */
+		lan743x_csr_write(adapter, INT_EN_CLR, INT_BIT_MAS_);
+
+	if (vector->flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CHECK) {
+		int_enables = lan743x_csr_read(adapter, INT_EN_SET);
+	} else {
+		/*  use vector mask as implied enable mask */
+		int_enables = vector->int_mask;
+	}
+
+	int_sts &= int_enables;
+	int_sts &= vector->int_mask;
+	if (int_sts) {
+		if (vector->handler) {
+			vector->handler(vector->context,
+					int_sts, vector->flags);
+		} else {
+			/* disable interrupts on this vector */
+			lan743x_csr_write(adapter, INT_EN_CLR,
+					  vector->int_mask);
+		}
+		result = IRQ_HANDLED;
+	}
+
+	if (vector->flags & LAN743X_VECTOR_FLAG_MASTER_ENABLE_SET)
+		/* enable master interrupt */
+		lan743x_csr_write(adapter, INT_EN_SET, INT_BIT_MAS_);
+
+	if (vector->flags & LAN743X_VECTOR_FLAG_VECTOR_ENABLE_ISR_SET)
+		/* enable vector interrupt */
+		lan743x_csr_write(adapter,
+				  INT_VEC_EN_SET,
+				  INT_VEC_EN_(vector->vector_index));
+irq_done:
+	return result;
+}
+
+static int lan743x_intr_test_isr(struct lan743x_adapter *adapter)
+{
+	struct lan743x_intr *intr = &adapter->intr;
+	int result = -ENODEV;
+	int timeout = 10;
+
+	intr->software_isr_flag = 0;
+
+	/* enable interrupt */
+	lan743x_csr_write(adapter, INT_EN_SET, INT_BIT_SW_GP_);
+
+	/* activate interrupt here */
+	lan743x_csr_write(adapter, INT_SET, INT_BIT_SW_GP_);
+	while ((timeout > 0) && (!(intr->software_isr_flag))) {
+		usleep_range(1000, 20000);
+		timeout--;
+	}
+
+	if (intr->software_isr_flag)
+		result = 0;
+
+	/* disable interrupts */
+	lan743x_csr_write(adapter, INT_EN_CLR, INT_BIT_SW_GP_);
+	return result;
+}
+
+static int lan743x_intr_register_isr(struct lan743x_adapter *adapter,
+				     int vector_index, u32 flags,
+				     u32 int_mask,
+				     lan743x_vector_handler handler,
+				     void *context)
+{
+	struct lan743x_vector *vector = &adapter->intr.vector_list
+					[vector_index];
+	int ret;
+
+	vector->adapter = adapter;
+	vector->flags = flags;
+	vector->vector_index = vector_index;
+	vector->int_mask = int_mask;
+	vector->handler = handler;
+	vector->context = context;
+
+	ret = request_irq(vector->irq,
+			  lan743x_intr_entry_isr,
+			  (flags & LAN743X_VECTOR_FLAG_IRQ_SHARED) ?
+			  IRQF_SHARED : 0, DRIVER_NAME, vector);
+	if (ret) {
+		vector->handler = NULL;
+		vector->context = NULL;
+		vector->int_mask = 0;
+		vector->flags = 0;
+	}
+	return ret;
+}
+
+static void lan743x_intr_unregister_isr(struct lan743x_adapter *adapter,
+					int vector_index)
+{
+	struct lan743x_vector *vector = &adapter->intr.vector_list
+					[vector_index];
+
+	free_irq(vector->irq, vector);
+	vector->handler = NULL;
+	vector->context = NULL;
+	vector->int_mask = 0;
+	vector->flags = 0;
+}
+
+static u32 lan743x_intr_get_vector_flags(struct lan743x_adapter *adapter,
+					 u32 int_mask)
+{
+	int index;
+
+	for (index = 0; index < LAN743X_MAX_VECTOR_COUNT; index++) {
+		if (adapter->intr.vector_list[index].int_mask & int_mask)
+			return adapter->intr.vector_list[index].flags;
+	}
+	return 0;
+}
+
+static void lan743x_intr_close(struct lan743x_adapter *adapter)
+{
+	struct lan743x_intr *intr = &adapter->intr;
+	int index = 0;
+
+	lan743x_csr_write(adapter, INT_EN_CLR, INT_BIT_MAS_);
+	lan743x_csr_write(adapter, INT_VEC_EN_CLR, 0x000000FF);
+
+	for (index = 0; index < LAN743X_MAX_VECTOR_COUNT; index++) {
+		if (intr->flags & INTR_FLAG_IRQ_REQUESTED(index)) {
+			lan743x_intr_unregister_isr(adapter, index);
+			intr->flags &= ~INTR_FLAG_IRQ_REQUESTED(index);
+		}
+	}
+
+	if (intr->flags & INTR_FLAG_MSI_ENABLED) {
+		pci_disable_msi(adapter->pdev);
+		intr->flags &= ~INTR_FLAG_MSI_ENABLED;
+	}
+
+	if (intr->flags & INTR_FLAG_MSIX_ENABLED) {
+		pci_disable_msix(adapter->pdev);
+		intr->flags &= ~INTR_FLAG_MSIX_ENABLED;
+	}
+}
+
+static int lan743x_intr_open(struct lan743x_adapter *adapter)
+{
+	struct msix_entry msix_entries[LAN743X_MAX_VECTOR_COUNT];
+	struct lan743x_intr *intr = &adapter->intr;
+	u32 int_vec_en_auto_clr = 0;
+	u32 int_vec_map0 = 0;
+	u32 int_vec_map1 = 0;
+	int ret = -ENODEV;
+	int index = 0;
+	u32 flags = 0;
+
+	intr->number_of_vectors = 0;
+
+	/* Try to set up MSIX interrupts */
+	memset(&msix_entries[0], 0,
+	       sizeof(struct msix_entry) * LAN743X_MAX_VECTOR_COUNT);
+	for (index = 0; index < LAN743X_MAX_VECTOR_COUNT; index++)
+		msix_entries[index].entry = index;
+	ret = pci_enable_msix_range(adapter->pdev,
+				    msix_entries, 1,
+				    1 + LAN743X_USED_TX_CHANNELS +
+				    LAN743X_USED_RX_CHANNELS);
+
+	if (ret > 0) {
+		intr->flags |= INTR_FLAG_MSIX_ENABLED;
+		intr->number_of_vectors = ret;
+		intr->using_vectors = true;
+		for (index = 0; index < intr->number_of_vectors; index++)
+			intr->vector_list[index].irq = msix_entries
+						       [index].vector;
+		netif_info(adapter, ifup, adapter->netdev,
+			   "using MSIX interrupts, number of vectors = %d\n",
+			   intr->number_of_vectors);
+	}
+
+	/* If MSIX failed try to setup using MSI interrupts */
+	if (!intr->number_of_vectors) {
+		if (!(adapter->csr.flags & LAN743X_CSR_FLAG_IS_A0)) {
+			if (!pci_enable_msi(adapter->pdev)) {
+				intr->flags |= INTR_FLAG_MSI_ENABLED;
+				intr->number_of_vectors = 1;
+				intr->using_vectors = true;
+				intr->vector_list[0].irq =
+					adapter->pdev->irq;
+				netif_info(adapter, ifup, adapter->netdev,
+					   "using MSI interrupts, number of vectors = %d\n",
+					   intr->number_of_vectors);
+			}
+		}
+	}
+
+	/* If MSIX, and MSI failed, setup using legacy interrupt */
+	if (!intr->number_of_vectors) {
+		intr->number_of_vectors = 1;
+		intr->using_vectors = false;
+		intr->vector_list[0].irq = intr->irq;
+		netif_info(adapter, ifup, adapter->netdev,
+			   "using legacy interrupts\n");
+	}
+
+	/* At this point we must have at least one irq */
+	lan743x_csr_write(adapter, INT_VEC_EN_CLR, 0xFFFFFFFF);
+
+	/* map all interrupts to vector 0 */
+	lan743x_csr_write(adapter, INT_VEC_MAP0, 0x00000000);
+	lan743x_csr_write(adapter, INT_VEC_MAP1, 0x00000000);
+	lan743x_csr_write(adapter, INT_VEC_MAP2, 0x00000000);
+	flags = LAN743X_VECTOR_FLAG_SOURCE_STATUS_READ |
+		LAN743X_VECTOR_FLAG_SOURCE_STATUS_W2C |
+		LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CHECK |
+		LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CLEAR;
+
+	if (intr->using_vectors) {
+		flags |= LAN743X_VECTOR_FLAG_VECTOR_ENABLE_ISR_CLEAR |
+			 LAN743X_VECTOR_FLAG_VECTOR_ENABLE_ISR_SET;
+	} else {
+		flags |= LAN743X_VECTOR_FLAG_MASTER_ENABLE_CLEAR |
+			 LAN743X_VECTOR_FLAG_MASTER_ENABLE_SET |
+			 LAN743X_VECTOR_FLAG_IRQ_SHARED;
+	}
+
+	if (adapter->csr.flags & LAN743X_CSR_FLAG_SUPPORTS_INTR_AUTO_SET_CLR) {
+		flags &= ~LAN743X_VECTOR_FLAG_SOURCE_STATUS_READ;
+		flags &= ~LAN743X_VECTOR_FLAG_SOURCE_STATUS_W2C;
+		flags &= ~LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CLEAR;
+		flags &= ~LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CHECK;
+		flags |= LAN743X_VECTOR_FLAG_SOURCE_STATUS_R2C;
+		flags |= LAN743X_VECTOR_FLAG_SOURCE_ENABLE_R2C;
+	}
+
+	ret = lan743x_intr_register_isr(adapter, 0, flags,
+					INT_BIT_ALL_RX_ | INT_BIT_ALL_TX_ |
+					INT_BIT_ALL_OTHER_,
+					lan743x_intr_shared_isr, adapter);
+	if (ret)
+		goto clean_up;
+	intr->flags |= INTR_FLAG_IRQ_REQUESTED(0);
+
+	if (intr->using_vectors)
+		lan743x_csr_write(adapter, INT_VEC_EN_SET,
+				  INT_VEC_EN_(0));
+
+	if (!(adapter->csr.flags & LAN743X_CSR_FLAG_IS_A0)) {
+		lan743x_csr_write(adapter, INT_MOD_CFG0, LAN743X_INT_MOD);
+		lan743x_csr_write(adapter, INT_MOD_CFG1, LAN743X_INT_MOD);
+		lan743x_csr_write(adapter, INT_MOD_CFG2, LAN743X_INT_MOD);
+		lan743x_csr_write(adapter, INT_MOD_CFG3, LAN743X_INT_MOD);
+		lan743x_csr_write(adapter, INT_MOD_CFG4, LAN743X_INT_MOD);
+		lan743x_csr_write(adapter, INT_MOD_CFG5, LAN743X_INT_MOD);
+		lan743x_csr_write(adapter, INT_MOD_CFG6, LAN743X_INT_MOD);
+		lan743x_csr_write(adapter, INT_MOD_CFG7, LAN743X_INT_MOD);
+		lan743x_csr_write(adapter, INT_MOD_MAP0, 0x00005432);
+		lan743x_csr_write(adapter, INT_MOD_MAP1, 0x00000001);
+		lan743x_csr_write(adapter, INT_MOD_MAP2, 0x00FFFFFF);
+	}
+
+	/* enable interrupts */
+	lan743x_csr_write(adapter, INT_EN_SET, INT_BIT_MAS_);
+	ret = lan743x_intr_test_isr(adapter);
+	if (ret)
+		goto clean_up;
+
+	if (intr->number_of_vectors > 1) {
+		int number_of_tx_vectors = intr->number_of_vectors - 1;
+
+		if (number_of_tx_vectors > LAN743X_USED_TX_CHANNELS)
+			number_of_tx_vectors = LAN743X_USED_TX_CHANNELS;
+		flags = LAN743X_VECTOR_FLAG_SOURCE_STATUS_READ |
+			LAN743X_VECTOR_FLAG_SOURCE_STATUS_W2C |
+			LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CHECK |
+			LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CLEAR |
+			LAN743X_VECTOR_FLAG_VECTOR_ENABLE_ISR_CLEAR |
+			LAN743X_VECTOR_FLAG_VECTOR_ENABLE_ISR_SET;
+
+		if (adapter->csr.flags &
+		   LAN743X_CSR_FLAG_SUPPORTS_INTR_AUTO_SET_CLR) {
+			flags = LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_CLEAR |
+				LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_SET |
+				LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_SET |
+				LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_CLEAR |
+				LAN743X_VECTOR_FLAG_SOURCE_STATUS_AUTO_CLEAR;
+		}
+
+		for (index = 0; index < number_of_tx_vectors; index++) {
+			u32 int_bit = INT_BIT_DMA_TX_(index);
+			int vector = index + 1;
+
+			/* map TX interrupt to vector */
+			int_vec_map1 |= INT_VEC_MAP1_TX_VEC_(index, vector);
+			lan743x_csr_write(adapter, INT_VEC_MAP1, int_vec_map1);
+			if (flags &
+			    LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_CLEAR) {
+				int_vec_en_auto_clr |= INT_VEC_EN_(vector);
+				lan743x_csr_write(adapter, INT_VEC_EN_AUTO_CLR,
+						  int_vec_en_auto_clr);
+			}
+
+			/* Remove TX interrupt from shared mask */
+			intr->vector_list[0].int_mask &= ~int_bit;
+			ret = lan743x_intr_register_isr(adapter, vector, flags,
+							int_bit, lan743x_tx_isr,
+							&adapter->tx[index]);
+			if (ret)
+				goto clean_up;
+			intr->flags |= INTR_FLAG_IRQ_REQUESTED(vector);
+			if (!(flags &
+			    LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_SET))
+				lan743x_csr_write(adapter, INT_VEC_EN_SET,
+						  INT_VEC_EN_(vector));
+		}
+	}
+	if ((intr->number_of_vectors - LAN743X_USED_TX_CHANNELS) > 1) {
+		int number_of_rx_vectors = intr->number_of_vectors -
+					   LAN743X_USED_TX_CHANNELS - 1;
+
+		if (number_of_rx_vectors > LAN743X_USED_RX_CHANNELS)
+			number_of_rx_vectors = LAN743X_USED_RX_CHANNELS;
+
+		flags = LAN743X_VECTOR_FLAG_SOURCE_STATUS_READ |
+			LAN743X_VECTOR_FLAG_SOURCE_STATUS_W2C |
+			LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CHECK |
+			LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CLEAR |
+			LAN743X_VECTOR_FLAG_VECTOR_ENABLE_ISR_CLEAR |
+			LAN743X_VECTOR_FLAG_VECTOR_ENABLE_ISR_SET;
+
+		if (adapter->csr.flags &
+		    LAN743X_CSR_FLAG_SUPPORTS_INTR_AUTO_SET_CLR) {
+			flags = LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_CLEAR |
+				LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_SET |
+				LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_SET |
+				LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_CLEAR |
+				LAN743X_VECTOR_FLAG_SOURCE_STATUS_AUTO_CLEAR;
+		}
+		for (index = 0; index < number_of_rx_vectors; index++) {
+			int vector = index + 1 + LAN743X_USED_TX_CHANNELS;
+			u32 int_bit = INT_BIT_DMA_RX_(index);
+
+			/* map RX interrupt to vector */
+			int_vec_map0 |= INT_VEC_MAP0_RX_VEC_(index, vector);
+			lan743x_csr_write(adapter, INT_VEC_MAP0, int_vec_map0);
+			if (flags &
+			    LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_CLEAR) {
+				int_vec_en_auto_clr |= INT_VEC_EN_(vector);
+				lan743x_csr_write(adapter, INT_VEC_EN_AUTO_CLR,
+						  int_vec_en_auto_clr);
+			}
+
+			/* Remove RX interrupt from shared mask */
+			intr->vector_list[0].int_mask &= ~int_bit;
+			ret = lan743x_intr_register_isr(adapter, vector, flags,
+							int_bit, lan743x_rx_isr,
+							&adapter->rx[index]);
+			if (ret)
+				goto clean_up;
+			intr->flags |= INTR_FLAG_IRQ_REQUESTED(vector);
+
+			lan743x_csr_write(adapter, INT_VEC_EN_SET,
+					  INT_VEC_EN_(vector));
+		}
+	}
+	return 0;
+
+clean_up:
+	lan743x_intr_close(adapter);
+	return ret;
+}
+
+static int lan743x_dp_write(struct lan743x_adapter *adapter,
+			    u32 select, u32 addr, u32 length, u32 *buf)
+{
+	int ret = -EIO;
+	u32 dp_sel;
+	int i;
+
+	mutex_lock(&adapter->dp_lock);
+	if (lan743x_csr_wait_for_bit(adapter, DP_SEL, DP_SEL_DPRDY_,
+				     1, 40, 100, 100))
+		goto unlock;
+	dp_sel = lan743x_csr_read(adapter, DP_SEL);
+	dp_sel &= ~DP_SEL_MASK_;
+	dp_sel |= select;
+	lan743x_csr_write(adapter, DP_SEL, dp_sel);
+
+	for (i = 0; i < length; i++) {
+		lan743x_csr_write(adapter, DP_ADDR, addr + i);
+		lan743x_csr_write(adapter, DP_DATA_0, buf[i]);
+		lan743x_csr_write(adapter, DP_CMD, DP_CMD_WRITE_);
+		if (lan743x_csr_wait_for_bit(adapter, DP_SEL, DP_SEL_DPRDY_,
+					     1, 40, 100, 100))
+			goto unlock;
+	}
+	ret = 0;
+
+unlock:
+	mutex_unlock(&adapter->dp_lock);
+	return ret;
+}
+
+static u32 lan743x_mac_mii_access(u16 id, u16 index, int read)
+{
+	u32 ret;
+
+	ret = (id << MAC_MII_ACC_PHY_ADDR_SHIFT_) &
+		MAC_MII_ACC_PHY_ADDR_MASK_;
+	ret |= (index << MAC_MII_ACC_MIIRINDA_SHIFT_) &
+		MAC_MII_ACC_MIIRINDA_MASK_;
+
+	if (read)
+		ret |= MAC_MII_ACC_MII_READ_;
+	else
+		ret |= MAC_MII_ACC_MII_WRITE_;
+	ret |= MAC_MII_ACC_MII_BUSY_;
+
+	return ret;
+}
+
+static int lan743x_mac_mii_wait_till_not_busy(struct lan743x_adapter *adapter)
+{
+	u32 data;
+
+	return readx_poll_timeout(LAN743X_CSR_READ_OP, MAC_MII_ACC, data,
+				  !(data & MAC_MII_ACC_MII_BUSY_), 0, 1000000);
+}
+
+static int lan743x_mdiobus_read(struct mii_bus *bus, int phy_id, int index)
+{
+	struct lan743x_adapter *adapter = bus->priv;
+	u32 val, mii_access;
+	int ret;
+
+	/* comfirm MII not busy */
+	ret = lan743x_mac_mii_wait_till_not_busy(adapter);
+	if (ret < 0)
+		return ret;
+
+	/* set the address, index & direction (read from PHY) */
+	mii_access = lan743x_mac_mii_access(phy_id, index, MAC_MII_READ);
+	lan743x_csr_write(adapter, MAC_MII_ACC, mii_access);
+	ret = lan743x_mac_mii_wait_till_not_busy(adapter);
+	if (ret < 0)
+		return ret;
+
+	val = lan743x_csr_read(adapter, MAC_MII_DATA);
+	return (int)(val & 0xFFFF);
+}
+
+static int lan743x_mdiobus_write(struct mii_bus *bus,
+				 int phy_id, int index, u16 regval)
+{
+	struct lan743x_adapter *adapter = bus->priv;
+	u32 val, mii_access;
+	int ret;
+
+	/* confirm MII not busy */
+	ret = lan743x_mac_mii_wait_till_not_busy(adapter);
+	if (ret < 0)
+		return ret;
+	val = (u32)regval;
+	lan743x_csr_write(adapter, MAC_MII_DATA, val);
+
+	/* set the address, index & direction (write to PHY) */
+	mii_access = lan743x_mac_mii_access(phy_id, index, MAC_MII_WRITE);
+	lan743x_csr_write(adapter, MAC_MII_ACC, mii_access);
+	ret = lan743x_mac_mii_wait_till_not_busy(adapter);
+	return ret;
+}
+
+static void lan743x_mac_set_address(struct lan743x_adapter *adapter,
+				    u8 *addr)
+{
+	u32 addr_lo, addr_hi;
+
+	addr_lo = addr[0] |
+		addr[1] << 8 |
+		addr[2] << 16 |
+		addr[3] << 24;
+	addr_hi = addr[4] |
+		addr[5] << 8;
+	lan743x_csr_write(adapter, MAC_RX_ADDRL, addr_lo);
+	lan743x_csr_write(adapter, MAC_RX_ADDRH, addr_hi);
+
+	ether_addr_copy(adapter->mac_address, addr);
+	netif_info(adapter, drv, adapter->netdev,
+		   "MAC address set to %pM\n", addr);
+}
+
+static int lan743x_mac_init(struct lan743x_adapter *adapter)
+{
+	bool mac_address_valid = true;
+	struct net_device *netdev;
+	u32 mac_addr_hi = 0;
+	u32 mac_addr_lo = 0;
+	u32 data;
+	int ret;
+
+	netdev = adapter->netdev;
+	lan743x_csr_write(adapter, MAC_CR, MAC_CR_RST_);
+	ret = lan743x_csr_wait_for_bit(adapter, MAC_CR, MAC_CR_RST_,
+				       0, 1000, 20000, 100);
+	if (ret)
+		return ret;
+
+	/* setup auto duplex, and speed detection */
+	data = lan743x_csr_read(adapter, MAC_CR);
+	data |= MAC_CR_ADD_ | MAC_CR_ASD_;
+	data |= MAC_CR_CNTR_RST_;
+	lan743x_csr_write(adapter, MAC_CR, data);
+
+	mac_addr_hi = lan743x_csr_read(adapter, MAC_RX_ADDRH);
+	mac_addr_lo = lan743x_csr_read(adapter, MAC_RX_ADDRL);
+	adapter->mac_address[0] = mac_addr_lo & 0xFF;
+	adapter->mac_address[1] = (mac_addr_lo >> 8) & 0xFF;
+	adapter->mac_address[2] = (mac_addr_lo >> 16) & 0xFF;
+	adapter->mac_address[3] = (mac_addr_lo >> 24) & 0xFF;
+	adapter->mac_address[4] = mac_addr_hi & 0xFF;
+	adapter->mac_address[5] = (mac_addr_hi >> 8) & 0xFF;
+
+	if (((mac_addr_hi & 0x0000FFFF) == 0x0000FFFF) &&
+	    mac_addr_lo == 0xFFFFFFFF) {
+		mac_address_valid = false;
+	} else if (!is_valid_ether_addr(adapter->mac_address)) {
+		mac_address_valid = false;
+	}
+
+	if (!mac_address_valid)
+		random_ether_addr(adapter->mac_address);
+	lan743x_mac_set_address(adapter, adapter->mac_address);
+	ether_addr_copy(netdev->dev_addr, adapter->mac_address);
+	return 0;
+}
+
+static int lan743x_mac_open(struct lan743x_adapter *adapter)
+{
+	int ret = 0;
+	u32 temp;
+
+	temp = lan743x_csr_read(adapter, MAC_RX);
+	lan743x_csr_write(adapter, MAC_RX, temp | MAC_RX_RXEN_);
+	temp = lan743x_csr_read(adapter, MAC_TX);
+	lan743x_csr_write(adapter, MAC_TX, temp | MAC_TX_TXEN_);
+	return ret;
+}
+
+static void lan743x_mac_close(struct lan743x_adapter *adapter)
+{
+	u32 temp;
+
+	temp = lan743x_csr_read(adapter, MAC_TX);
+	temp &= ~MAC_TX_TXEN_;
+	lan743x_csr_write(adapter, MAC_TX, temp);
+	lan743x_csr_wait_for_bit(adapter, MAC_TX, MAC_TX_TXD_,
+				 1, 1000, 20000, 100);
+
+	temp = lan743x_csr_read(adapter, MAC_RX);
+	temp &= ~MAC_RX_RXEN_;
+	lan743x_csr_write(adapter, MAC_RX, temp);
+	lan743x_csr_wait_for_bit(adapter, MAC_RX, MAC_RX_RXD_,
+				 1, 1000, 20000, 100);
+}
+
+static void lan743x_mac_flow_ctrl_set_enables(struct lan743x_adapter *adapter,
+					      bool tx_enable, bool rx_enable)
+{
+	u32 flow_setting = 0;
+
+	/* set maximum pause time because when fifo space frees
+	 * up a zero value pause frame will be sent to release the pause
+	 */
+	flow_setting = MAC_FLOW_CR_FCPT_MASK_;
+	if (tx_enable)
+		flow_setting |= MAC_FLOW_CR_TX_FCEN_;
+	if (rx_enable)
+		flow_setting |= MAC_FLOW_CR_RX_FCEN_;
+	lan743x_csr_write(adapter, MAC_FLOW, flow_setting);
+}
+
+static int lan743x_mac_set_mtu(struct lan743x_adapter *adapter, int new_mtu)
+{
+	int enabled = 0;
+	u32 mac_rx = 0;
+
+	mac_rx = lan743x_csr_read(adapter, MAC_RX);
+	if (mac_rx & MAC_RX_RXEN_) {
+		enabled = 1;
+		if (mac_rx & MAC_RX_RXD_) {
+			lan743x_csr_write(adapter, MAC_RX, mac_rx);
+			mac_rx &= ~MAC_RX_RXD_;
+		}
+		mac_rx &= ~MAC_RX_RXEN_;
+		lan743x_csr_write(adapter, MAC_RX, mac_rx);
+		lan743x_csr_wait_for_bit(adapter, MAC_RX, MAC_RX_RXD_,
+					 1, 1000, 20000, 100);
+		lan743x_csr_write(adapter, MAC_RX, mac_rx | MAC_RX_RXD_);
+	}
+
+	mac_rx &= ~(MAC_RX_MAX_SIZE_MASK_);
+	mac_rx |= (((new_mtu + ETH_HLEN + 4) << MAC_RX_MAX_SIZE_SHIFT_) &
+		  MAC_RX_MAX_SIZE_MASK_);
+	lan743x_csr_write(adapter, MAC_RX, mac_rx);
+
+	if (enabled) {
+		mac_rx |= MAC_RX_RXEN_;
+		lan743x_csr_write(adapter, MAC_RX, mac_rx);
+	}
+	return 0;
+}
+
+/* PHY */
+static int lan743x_phy_reset(struct lan743x_adapter *adapter)
+{
+	u32 data;
+
+	/* Only called with in probe, and before mdiobus_register */
+
+	data = lan743x_csr_read(adapter, PMT_CTL);
+	data |= PMT_CTL_ETH_PHY_RST_;
+	lan743x_csr_write(adapter, PMT_CTL, data);
+
+	return readx_poll_timeout(LAN743X_CSR_READ_OP, PMT_CTL, data,
+				  (!(data & PMT_CTL_ETH_PHY_RST_) &&
+				  (data & PMT_CTL_READY_)),
+				  50000, 1000000);
+}
+
+static void lan743x_phy_update_flowcontrol(struct lan743x_adapter *adapter,
+					   u8 duplex, u16 local_adv,
+					   u16 remote_adv)
+{
+	struct lan743x_phy *phy = &adapter->phy;
+	u8 cap;
+
+	if (phy->fc_autoneg)
+		cap = mii_resolve_flowctrl_fdx(local_adv, remote_adv);
+	else
+		cap = phy->fc_request_control;
+
+	lan743x_mac_flow_ctrl_set_enables(adapter,
+					  cap & FLOW_CTRL_TX,
+					  cap & FLOW_CTRL_RX);
+}
+
+static int lan743x_phy_init(struct lan743x_adapter *adapter)
+{
+	return lan743x_phy_reset(adapter);
+}
+
+static void lan743x_phy_link_status_change(struct net_device *netdev)
+{
+	struct lan743x_adapter *adapter = netdev_priv(netdev);
+	struct phy_device *phydev = netdev->phydev;
+
+	phy_print_status(phydev);
+	if (phydev->state == PHY_RUNNING) {
+		struct ethtool_link_ksettings ksettings;
+		int remote_advertisement = 0;
+		int local_advertisement = 0;
+
+		memset(&ksettings, 0, sizeof(ksettings));
+		phy_ethtool_get_link_ksettings(netdev, &ksettings);
+		local_advertisement = phy_read(phydev, MII_ADVERTISE);
+		if (local_advertisement < 0)
+			return;
+
+		remote_advertisement = phy_read(phydev, MII_LPA);
+		if (remote_advertisement < 0)
+			return;
+
+		lan743x_phy_update_flowcontrol(adapter,
+					       ksettings.base.duplex,
+					       local_advertisement,
+					       remote_advertisement);
+	}
+}
+
+static void lan743x_phy_close(struct lan743x_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+
+	phy_stop(netdev->phydev);
+	phy_disconnect(netdev->phydev);
+	netdev->phydev = NULL;
+}
+
+static int lan743x_phy_open(struct lan743x_adapter *adapter)
+{
+	struct lan743x_phy *phy = &adapter->phy;
+	struct phy_device *phydev;
+	struct net_device *netdev;
+	int ret = -EIO;
+	u32 mii_adv;
+
+	netdev = adapter->netdev;
+	phydev = phy_find_first(adapter->mdiobus);
+	if (!phydev)
+		goto return_error;
+
+	ret = phy_connect_direct(netdev, phydev,
+				 lan743x_phy_link_status_change,
+				 PHY_INTERFACE_MODE_GMII);
+	if (ret)
+		goto return_error;
+
+	/* MAC doesn't support 1000T Half */
+	phydev->supported &= ~SUPPORTED_1000baseT_Half;
+
+	/* support both flow controls */
+	phy->fc_request_control = (FLOW_CTRL_RX | FLOW_CTRL_TX);
+	phydev->advertising &= ~(ADVERTISED_Pause | ADVERTISED_Asym_Pause);
+	mii_adv = (u32)mii_advertise_flowctrl(phy->fc_request_control);
+	phydev->advertising |= mii_adv_to_ethtool_adv_t(mii_adv);
+	phy->fc_autoneg = phydev->autoneg;
+
+	phy_start(phydev);
+	phy_start_aneg(phydev);
+	return 0;
+
+return_error:
+	return ret;
+}
+
+static void lan743x_rfe_update_mac_address(struct lan743x_adapter *adapter)
+{
+	u8 *mac_addr;
+	u32 mac_addr_hi = 0;
+	u32 mac_addr_lo = 0;
+
+	/* Add mac address to perfect Filter */
+	mac_addr = adapter->mac_address;
+	mac_addr_lo = ((((u32)(mac_addr[0])) << 0) |
+		      (((u32)(mac_addr[1])) << 8) |
+		      (((u32)(mac_addr[2])) << 16) |
+		      (((u32)(mac_addr[3])) << 24));
+	mac_addr_hi = ((((u32)(mac_addr[4])) << 0) |
+		      (((u32)(mac_addr[5])) << 8));
+
+	lan743x_csr_write(adapter, RFE_ADDR_FILT_LO(0), mac_addr_lo);
+	lan743x_csr_write(adapter, RFE_ADDR_FILT_HI(0),
+			  mac_addr_hi | RFE_ADDR_FILT_HI_VALID_);
+}
+
+static void lan743x_rfe_set_multicast(struct lan743x_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	u32 hash_table[DP_SEL_VHF_HASH_LEN];
+	u32 rfctl;
+	u32 data;
+
+	rfctl = lan743x_csr_read(adapter, RFE_CTL);
+	rfctl &= ~(RFE_CTL_AU_ | RFE_CTL_AM_ |
+		 RFE_CTL_DA_PERFECT_ | RFE_CTL_MCAST_HASH_);
+	rfctl |= RFE_CTL_AB_;
+	if (netdev->flags & IFF_PROMISC) {
+		rfctl |= RFE_CTL_AM_ | RFE_CTL_AU_;
+	} else {
+		if (netdev->flags & IFF_ALLMULTI)
+			rfctl |= RFE_CTL_AM_;
+	}
+
+	memset(hash_table, 0, DP_SEL_VHF_HASH_LEN * sizeof(u32));
+	if (netdev_mc_count(netdev)) {
+		struct netdev_hw_addr *ha;
+		int i;
+
+		rfctl |= RFE_CTL_DA_PERFECT_;
+		i = 1;
+		netdev_for_each_mc_addr(ha, netdev) {
+			/* set first 32 into Perfect Filter */
+			if (i < 33) {
+				lan743x_csr_write(adapter,
+						  RFE_ADDR_FILT_HI(i), 0);
+				data = ha->addr[3];
+				data = ha->addr[2] | (data << 8);
+				data = ha->addr[1] | (data << 8);
+				data = ha->addr[0] | (data << 8);
+				lan743x_csr_write(adapter,
+						  RFE_ADDR_FILT_LO(i), data);
+				data = ha->addr[5];
+				data = ha->addr[4] | (data << 8);
+				data |= RFE_ADDR_FILT_HI_VALID_;
+				lan743x_csr_write(adapter,
+						  RFE_ADDR_FILT_HI(i), data);
+			} else {
+				u32 bitnum = (ether_crc(ETH_ALEN, ha->addr) >>
+					     23) & 0x1FF;
+				hash_table[bitnum / 32] |= (1 << (bitnum % 32));
+				rfctl |= RFE_CTL_MCAST_HASH_;
+			}
+			i++;
+		}
+	}
+
+	lan743x_dp_write(adapter, DP_SEL_RFE_RAM,
+			 DP_SEL_VHF_VLAN_LEN,
+			 DP_SEL_VHF_HASH_LEN, hash_table);
+	lan743x_csr_write(adapter, RFE_CTL, rfctl);
+}
+
+static int lan743x_dmac_init(struct lan743x_adapter *adapter)
+{
+	u32 data = 0;
+
+	lan743x_csr_write(adapter, DMAC_CMD, DMAC_CMD_SWR_);
+	lan743x_csr_wait_for_bit(adapter, DMAC_CMD, DMAC_CMD_SWR_,
+				 0, 1000, 20000, 100);
+	switch (DEFAULT_DMA_DESCRIPTOR_SPACING) {
+	case DMA_DESCRIPTOR_SPACING_16:
+		data = DMAC_CFG_MAX_DSPACE_16_;
+		break;
+	case DMA_DESCRIPTOR_SPACING_32:
+		data = DMAC_CFG_MAX_DSPACE_32_;
+		break;
+	case DMA_DESCRIPTOR_SPACING_64:
+		data = DMAC_CFG_MAX_DSPACE_64_;
+		break;
+	case DMA_DESCRIPTOR_SPACING_128:
+		data = DMAC_CFG_MAX_DSPACE_128_;
+		break;
+	default:
+		return -EPERM;
+	}
+	if (!(adapter->csr.flags & LAN743X_CSR_FLAG_IS_A0))
+		data |= DMAC_CFG_COAL_EN_;
+	data |= DMAC_CFG_CH_ARB_SEL_RX_HIGH_;
+	data |= DMAC_CFG_MAX_READ_REQ_SET_(6);
+	lan743x_csr_write(adapter, DMAC_CFG, data);
+	data = DMAC_COAL_CFG_TIMER_LIMIT_SET_(1);
+	data |= DMAC_COAL_CFG_TIMER_TX_START_;
+	data |= DMAC_COAL_CFG_FLUSH_INTS_;
+	data |= DMAC_COAL_CFG_INT_EXIT_COAL_;
+	data |= DMAC_COAL_CFG_CSR_EXIT_COAL_;
+	data |= DMAC_COAL_CFG_TX_THRES_SET_(0x0A);
+	data |= DMAC_COAL_CFG_RX_THRES_SET_(0x0C);
+	lan743x_csr_write(adapter, DMAC_COAL_CFG, data);
+	data = DMAC_OBFF_TX_THRES_SET_(0x08);
+	data |= DMAC_OBFF_RX_THRES_SET_(0x0A);
+	lan743x_csr_write(adapter, DMAC_OBFF_CFG, data);
+	return 0;
+}
+
+static int lan743x_dmac_tx_get_state(struct lan743x_adapter *adapter,
+				     int tx_channel)
+{
+	u32 dmac_cmd = 0;
+
+	dmac_cmd = lan743x_csr_read(adapter, DMAC_CMD);
+	return DMAC_CHANNEL_STATE_SET((dmac_cmd &
+				      DMAC_CMD_START_T_(tx_channel)),
+				      (dmac_cmd &
+				      DMAC_CMD_STOP_T_(tx_channel)));
+}
+
+static int lan743x_dmac_tx_wait_till_stopped(struct lan743x_adapter *adapter,
+					     int tx_channel)
+{
+	int timeout = 100;
+	int result = 0;
+
+	while (timeout &&
+	       ((result = lan743x_dmac_tx_get_state(adapter, tx_channel)) ==
+	       DMAC_CHANNEL_STATE_STOP_PENDING)) {
+		usleep_range(1000, 20000);
+		timeout--;
+	}
+	if (result == DMAC_CHANNEL_STATE_STOP_PENDING)
+		result = -ENODEV;
+	return result;
+}
+
+static int lan743x_dmac_rx_get_state(struct lan743x_adapter *adapter,
+				     int rx_channel)
+{
+	u32 dmac_cmd = 0;
+
+	dmac_cmd = lan743x_csr_read(adapter, DMAC_CMD);
+	return DMAC_CHANNEL_STATE_SET((dmac_cmd &
+				      DMAC_CMD_START_R_(rx_channel)),
+				      (dmac_cmd &
+				      DMAC_CMD_STOP_R_(rx_channel)));
+}
+
+static int lan743x_dmac_rx_wait_till_stopped(struct lan743x_adapter *adapter,
+					     int rx_channel)
+{
+	int timeout = 100;
+	int result = 0;
+
+	while (timeout &&
+	       ((result = lan743x_dmac_rx_get_state(adapter, rx_channel)) ==
+	       DMAC_CHANNEL_STATE_STOP_PENDING)) {
+		usleep_range(1000, 20000);
+		timeout--;
+	}
+	if (result == DMAC_CHANNEL_STATE_STOP_PENDING)
+		result = -ENODEV;
+	return result;
+}
+
+static void lan743x_tx_release_desc(struct lan743x_tx *tx,
+				    int descriptor_index, bool cleanup)
+{
+	struct lan743x_tx_buffer_info *buffer_info = NULL;
+	struct lan743x_tx_descriptor *descriptor = NULL;
+	u32 descriptor_type = 0;
+
+	descriptor = &tx->ring_cpu_ptr[descriptor_index];
+	buffer_info = &tx->buffer_info[descriptor_index];
+	if (!(buffer_info->flags & TX_BUFFER_INFO_FLAG_ACTIVE))
+		goto done;
+
+	descriptor_type = (descriptor->data0) &
+			  TX_DESC_DATA0_DTYPE_MASK_;
+	if (descriptor_type == TX_DESC_DATA0_DTYPE_DATA_)
+		goto clean_up_data_descriptor;
+	else
+		goto clear_active;
+
+clean_up_data_descriptor:
+	if (buffer_info->dma_ptr) {
+		if (buffer_info->flags &
+		    TX_BUFFER_INFO_FLAG_SKB_FRAGMENT) {
+			dma_unmap_page(&tx->adapter->pdev->dev,
+				       buffer_info->dma_ptr,
+				       buffer_info->buffer_length,
+				       DMA_TO_DEVICE);
+		} else {
+			dma_unmap_single(&tx->adapter->pdev->dev,
+					 buffer_info->dma_ptr,
+					 buffer_info->buffer_length,
+					 DMA_TO_DEVICE);
+		}
+		buffer_info->dma_ptr = 0;
+		buffer_info->buffer_length = 0;
+	}
+	if (buffer_info->skb) {
+		dev_kfree_skb(buffer_info->skb);
+		buffer_info->skb = NULL;
+	}
+
+clear_active:
+	buffer_info->flags &= ~TX_BUFFER_INFO_FLAG_ACTIVE;
+
+done:
+	memset(buffer_info, 0, sizeof(*buffer_info));
+	memset(descriptor, 0, sizeof(*descriptor));
+}
+
+static int lan743x_tx_next_index(struct lan743x_tx *tx, int index)
+{
+	return ((++index) % tx->ring_size);
+}
+
+static void lan743x_tx_release_completed_descriptors(struct lan743x_tx *tx)
+{
+	while ((*tx->head_cpu_ptr) != (tx->last_head)) {
+		lan743x_tx_release_desc(tx, tx->last_head, false);
+		tx->last_head = lan743x_tx_next_index(tx, tx->last_head);
+	}
+}
+
+static void lan743x_tx_release_all_descriptors(struct lan743x_tx *tx)
+{
+	u32 original_head = 0;
+
+	original_head = tx->last_head;
+	do {
+		lan743x_tx_release_desc(tx, tx->last_head, true);
+		tx->last_head = lan743x_tx_next_index(tx, tx->last_head);
+	} while (tx->last_head != original_head);
+	memset(tx->ring_cpu_ptr, 0,
+	       sizeof(*tx->ring_cpu_ptr) * (tx->ring_size));
+	memset(tx->buffer_info, 0,
+	       sizeof(*tx->buffer_info) * (tx->ring_size));
+}
+
+static int lan743x_tx_get_desc_cnt(struct lan743x_tx *tx,
+				   struct sk_buff *skb)
+{
+	int result = 1; /* 1 for the main skb buffer */
+	int nr_frags = 0;
+
+	if (skb_is_gso(skb))
+		result++; /* requires an extension descriptor */
+	nr_frags = skb_shinfo(skb)->nr_frags;
+	result += nr_frags; /* 1 for each fragment buffer */
+	return result;
+}
+
+static int lan743x_tx_get_avail_desc(struct lan743x_tx *tx)
+{
+	int last_head = tx->last_head;
+	int last_tail = tx->last_tail;
+
+	if (last_tail >= last_head)
+		return tx->ring_size - last_tail + last_head - 1;
+	else
+		return last_head - last_tail - 1;
+}
+
+static int lan743x_tx_frame_start(struct lan743x_tx *tx,
+				  unsigned char *first_buffer,
+				  unsigned int first_buffer_length,
+				  unsigned int frame_length,
+				  bool check_sum)
+{
+	/* called only from within lan743x_tx_xmit_frame.
+	 * assuming tx->ring_lock has already been acquired.
+	 */
+	struct lan743x_tx_descriptor *tx_descriptor = NULL;
+	struct lan743x_tx_buffer_info *buffer_info = NULL;
+	struct lan743x_adapter *adapter = tx->adapter;
+	struct device *dev = &adapter->pdev->dev;
+	dma_addr_t dma_ptr;
+
+	tx->frame_flags |= TX_FRAME_FLAG_IN_PROGRESS;
+	tx->frame_first = tx->last_tail;
+	tx->frame_tail = tx->frame_first;
+
+	tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail];
+	buffer_info = &tx->buffer_info[tx->frame_tail];
+	dma_ptr = dma_map_single(dev, first_buffer, first_buffer_length,
+				 DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, dma_ptr))
+		return -ENOMEM;
+
+	tx_descriptor->data1 = DMA_ADDR_LOW32(dma_ptr);
+	tx_descriptor->data2 = DMA_ADDR_HIGH32(dma_ptr);
+	tx_descriptor->data3 = (frame_length << 16) &
+		TX_DESC_DATA3_FRAME_LENGTH_MSS_MASK_;
+
+	buffer_info->skb = NULL;
+	buffer_info->dma_ptr = dma_ptr;
+	buffer_info->buffer_length = first_buffer_length;
+	buffer_info->flags |= TX_BUFFER_INFO_FLAG_ACTIVE;
+
+	tx->frame_data0 = (first_buffer_length &
+		TX_DESC_DATA0_BUF_LENGTH_MASK_) |
+		TX_DESC_DATA0_DTYPE_DATA_ |
+		TX_DESC_DATA0_FS_ |
+		TX_DESC_DATA0_FCS_;
+
+	if (check_sum)
+		tx->frame_data0 |= TX_DESC_DATA0_ICE_ |
+				   TX_DESC_DATA0_IPE_ |
+				   TX_DESC_DATA0_TPE_;
+
+	/* data0 will be programmed in one of other frame assembler functions */
+	return 0;
+}
+
+static void lan743x_tx_frame_add_lso(struct lan743x_tx *tx,
+				     unsigned int frame_length)
+{
+	/* called only from within lan743x_tx_xmit_frame.
+	 * assuming tx->ring_lock has already been acquired.
+	 */
+	struct lan743x_tx_descriptor *tx_descriptor = NULL;
+	struct lan743x_tx_buffer_info *buffer_info = NULL;
+
+	/* wrap up previous descriptor */
+	tx->frame_data0 |= TX_DESC_DATA0_EXT_;
+	tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail];
+	tx_descriptor->data0 = tx->frame_data0;
+
+	/* move to next descriptor */
+	tx->frame_tail = lan743x_tx_next_index(tx, tx->frame_tail);
+	tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail];
+	buffer_info = &tx->buffer_info[tx->frame_tail];
+
+	/* add extension descriptor */
+	tx_descriptor->data1 = 0;
+	tx_descriptor->data2 = 0;
+	tx_descriptor->data3 = 0;
+
+	buffer_info->skb = NULL;
+	buffer_info->dma_ptr = 0;
+	buffer_info->buffer_length = 0;
+	buffer_info->flags |= TX_BUFFER_INFO_FLAG_ACTIVE;
+
+	tx->frame_data0 = (frame_length & TX_DESC_DATA0_EXT_PAY_LENGTH_MASK_) |
+			  TX_DESC_DATA0_DTYPE_EXT_ |
+			  TX_DESC_DATA0_EXT_LSO_;
+
+	/* data0 will be programmed in one of other frame assembler functions */
+}
+
+static int lan743x_tx_frame_add_fragment(struct lan743x_tx *tx,
+					 const struct skb_frag_struct *fragment,
+					 unsigned int frame_length)
+{
+	/* called only from within lan743x_tx_xmit_frame
+	 * assuming tx->ring_lock has already been acquired
+	 */
+	struct lan743x_tx_descriptor *tx_descriptor = NULL;
+	struct lan743x_tx_buffer_info *buffer_info = NULL;
+	struct lan743x_adapter *adapter = tx->adapter;
+	struct device *dev = &adapter->pdev->dev;
+	unsigned int fragment_length = 0;
+	dma_addr_t dma_ptr;
+
+	fragment_length = skb_frag_size(fragment);
+	if (!fragment_length)
+		return 0;
+
+	/* wrap up previous descriptor */
+	tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail];
+	tx_descriptor->data0 = tx->frame_data0;
+
+	/* move to next descriptor */
+	tx->frame_tail = lan743x_tx_next_index(tx, tx->frame_tail);
+	tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail];
+	buffer_info = &tx->buffer_info[tx->frame_tail];
+	dma_ptr = skb_frag_dma_map(dev, fragment,
+				   0, fragment_length,
+				   DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, dma_ptr)) {
+		int desc_index;
+
+		/* cleanup all previously setup descriptors */
+		desc_index = tx->frame_first;
+		while (desc_index != tx->frame_tail) {
+			lan743x_tx_release_desc(tx, desc_index, true);
+			desc_index = lan743x_tx_next_index(tx, desc_index);
+		}
+		dma_wmb();
+		tx->frame_flags &= ~TX_FRAME_FLAG_IN_PROGRESS;
+		tx->frame_first = 0;
+		tx->frame_data0 = 0;
+		tx->frame_tail = 0;
+		return -ENOMEM;
+	}
+
+	tx_descriptor->data1 = DMA_ADDR_LOW32(dma_ptr);
+	tx_descriptor->data2 = DMA_ADDR_HIGH32(dma_ptr);
+	tx_descriptor->data3 = (frame_length << 16) &
+			       TX_DESC_DATA3_FRAME_LENGTH_MSS_MASK_;
+
+	buffer_info->skb = NULL;
+	buffer_info->dma_ptr = dma_ptr;
+	buffer_info->buffer_length = fragment_length;
+	buffer_info->flags |= TX_BUFFER_INFO_FLAG_ACTIVE;
+	buffer_info->flags |= TX_BUFFER_INFO_FLAG_SKB_FRAGMENT;
+
+	tx->frame_data0 = (fragment_length & TX_DESC_DATA0_BUF_LENGTH_MASK_) |
+			  TX_DESC_DATA0_DTYPE_DATA_ |
+			  TX_DESC_DATA0_FCS_;
+
+	/* data0 will be programmed in one of other frame assembler functions */
+	return 0;
+}
+
+static void lan743x_tx_frame_end(struct lan743x_tx *tx,
+				 struct sk_buff *skb,
+				 bool ignore_sync)
+{
+	/* called only from within lan743x_tx_xmit_frame
+	 * assuming tx->ring_lock has already been acquired
+	 */
+	struct lan743x_tx_descriptor *tx_descriptor = NULL;
+	struct lan743x_tx_buffer_info *buffer_info = NULL;
+	struct lan743x_adapter *adapter = tx->adapter;
+	u32 tx_tail_flags = 0;
+
+	/* wrap up previous descriptor */
+	tx->frame_data0 |= TX_DESC_DATA0_LS_;
+	tx->frame_data0 |= TX_DESC_DATA0_IOC_;
+
+	tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail];
+	buffer_info = &tx->buffer_info[tx->frame_tail];
+	buffer_info->skb = skb;
+	if (ignore_sync)
+		buffer_info->flags |= TX_BUFFER_INFO_FLAG_IGNORE_SYNC;
+
+	tx_descriptor->data0 = tx->frame_data0;
+	tx->frame_tail = lan743x_tx_next_index(tx, tx->frame_tail);
+	tx->last_tail = tx->frame_tail;
+
+	dma_wmb();
+
+	if (tx->vector_flags & LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_SET)
+		tx_tail_flags |= TX_TAIL_SET_TOP_INT_VEC_EN_;
+	if (tx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_SET)
+		tx_tail_flags |= TX_TAIL_SET_DMAC_INT_EN_ |
+		TX_TAIL_SET_TOP_INT_EN_;
+
+	lan743x_csr_write(adapter, TX_TAIL(tx->channel_number),
+			  tx_tail_flags | tx->frame_tail);
+	tx->frame_flags &= ~TX_FRAME_FLAG_IN_PROGRESS;
+}
+
+static netdev_tx_t lan743x_tx_xmit_frame(struct lan743x_tx *tx,
+					 struct sk_buff *skb)
+{
+	int required_number_of_descriptors = 0;
+	unsigned int start_frame_length = 0;
+	unsigned int frame_length = 0;
+	unsigned int head_length = 0;
+	unsigned long irq_flags = 0;
+	bool ignore_sync = false;
+	int nr_frags = 0;
+	bool gso = false;
+	int j;
+
+	required_number_of_descriptors = lan743x_tx_get_desc_cnt(tx, skb);
+
+	spin_lock_irqsave(&tx->ring_lock, irq_flags);
+	if (required_number_of_descriptors >
+		lan743x_tx_get_avail_desc(tx)) {
+		if (required_number_of_descriptors > (tx->ring_size - 1)) {
+			dev_kfree_skb(skb);
+		} else {
+			/* save to overflow buffer */
+			tx->overflow_skb = skb;
+			netif_stop_queue(tx->adapter->netdev);
+		}
+		goto unlock;
+	}
+
+	/* space available, transmit skb  */
+	head_length = skb_headlen(skb);
+	frame_length = skb_pagelen(skb);
+	nr_frags = skb_shinfo(skb)->nr_frags;
+	start_frame_length = frame_length;
+	gso = skb_is_gso(skb);
+	if (gso) {
+		start_frame_length = max(skb_shinfo(skb)->gso_size,
+					 (unsigned short)8);
+	}
+
+	if (lan743x_tx_frame_start(tx,
+				   skb->data, head_length,
+				   start_frame_length,
+				   skb->ip_summed == CHECKSUM_PARTIAL)) {
+		dev_kfree_skb(skb);
+		goto unlock;
+	}
+
+	if (gso)
+		lan743x_tx_frame_add_lso(tx, frame_length);
+
+	if (nr_frags <= 0)
+		goto finish;
+
+	for (j = 0; j < nr_frags; j++) {
+		const struct skb_frag_struct *frag;
+
+		frag = &(skb_shinfo(skb)->frags[j]);
+		if (lan743x_tx_frame_add_fragment(tx, frag, frame_length)) {
+			/* upon error no need to call
+			 *	lan743x_tx_frame_end
+			 * frame assembler clean up was performed inside
+			 *	lan743x_tx_frame_add_fragment
+			 */
+			dev_kfree_skb(skb);
+			goto unlock;
+		}
+	}
+
+finish:
+	lan743x_tx_frame_end(tx, skb, ignore_sync);
+
+unlock:
+	spin_unlock_irqrestore(&tx->ring_lock, irq_flags);
+	return NETDEV_TX_OK;
+}
+
+static int lan743x_tx_napi_poll(struct napi_struct *napi, int weight)
+{
+	struct lan743x_tx *tx = container_of(napi, struct lan743x_tx, napi);
+	struct lan743x_adapter *adapter = tx->adapter;
+	bool start_transmitter = false;
+	unsigned long irq_flags = 0;
+	u32 ioc_bit = 0;
+	u32 int_sts = 0;
+
+	ioc_bit = DMAC_INT_BIT_TX_IOC_(tx->channel_number);
+	int_sts = lan743x_csr_read(adapter, DMAC_INT_STS);
+	if (tx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_STATUS_W2C)
+		lan743x_csr_write(adapter, DMAC_INT_STS, ioc_bit);
+	spin_lock_irqsave(&tx->ring_lock, irq_flags);
+
+	/* clean up tx ring */
+	lan743x_tx_release_completed_descriptors(tx);
+	if (netif_queue_stopped(adapter->netdev)) {
+		if (tx->overflow_skb) {
+			if (lan743x_tx_get_desc_cnt(tx, tx->overflow_skb) <=
+				lan743x_tx_get_avail_desc(tx))
+				start_transmitter = true;
+		} else {
+			netif_wake_queue(adapter->netdev);
+		}
+	}
+	spin_unlock_irqrestore(&tx->ring_lock, irq_flags);
+
+	if (start_transmitter) {
+		/* space is now available, transmit overflow skb */
+		lan743x_tx_xmit_frame(tx, tx->overflow_skb);
+		tx->overflow_skb = NULL;
+		netif_wake_queue(adapter->netdev);
+	}
+
+	if (!napi_complete_done(napi, weight))
+		goto done;
+
+	/* enable isr */
+	lan743x_csr_write(adapter, INT_EN_SET,
+			  INT_BIT_DMA_TX_(tx->channel_number));
+	lan743x_csr_read(adapter, INT_STS);
+
+done:
+	return weight;
+}
+
+static void lan743x_tx_ring_cleanup(struct lan743x_tx *tx)
+{
+	if (tx->head_cpu_ptr) {
+		pci_free_consistent(tx->adapter->pdev,
+				    sizeof(*tx->head_cpu_ptr),
+				    (void *)(tx->head_cpu_ptr),
+				    tx->head_dma_ptr);
+		tx->head_cpu_ptr = NULL;
+		tx->head_dma_ptr = 0;
+	}
+	kfree(tx->buffer_info);
+	tx->buffer_info = NULL;
+
+	if (tx->ring_cpu_ptr) {
+		pci_free_consistent(tx->adapter->pdev,
+				    tx->ring_allocation_size,
+				    tx->ring_cpu_ptr,
+				    tx->ring_dma_ptr);
+		tx->ring_allocation_size = 0;
+		tx->ring_cpu_ptr = NULL;
+		tx->ring_dma_ptr = 0;
+	}
+	tx->ring_size = 0;
+}
+
+static int lan743x_tx_ring_init(struct lan743x_tx *tx)
+{
+	size_t ring_allocation_size = 0;
+	void *cpu_ptr = NULL;
+	dma_addr_t dma_ptr;
+	int ret = -ENOMEM;
+
+	tx->ring_size = LAN743X_TX_RING_SIZE;
+	if (tx->ring_size & ~TX_CFG_B_TX_RING_LEN_MASK_) {
+		ret = -EINVAL;
+		goto cleanup;
+	}
+	ring_allocation_size = ALIGN(tx->ring_size *
+				     sizeof(struct lan743x_tx_descriptor),
+				     PAGE_SIZE);
+	dma_ptr = 0;
+	cpu_ptr = pci_zalloc_consistent(tx->adapter->pdev,
+					ring_allocation_size, &dma_ptr);
+	if (!cpu_ptr) {
+		ret = -ENOMEM;
+		goto cleanup;
+	}
+
+	tx->ring_allocation_size = ring_allocation_size;
+	tx->ring_cpu_ptr = (struct lan743x_tx_descriptor *)cpu_ptr;
+	tx->ring_dma_ptr = dma_ptr;
+
+	cpu_ptr = kcalloc(tx->ring_size, sizeof(*tx->buffer_info), GFP_KERNEL);
+	if (!cpu_ptr) {
+		ret = -ENOMEM;
+		goto cleanup;
+	}
+	tx->buffer_info = (struct lan743x_tx_buffer_info *)cpu_ptr;
+	dma_ptr = 0;
+	cpu_ptr = pci_zalloc_consistent(tx->adapter->pdev,
+					sizeof(*tx->head_cpu_ptr), &dma_ptr);
+	if (!cpu_ptr) {
+		ret = -ENOMEM;
+		goto cleanup;
+	}
+
+	tx->head_cpu_ptr = cpu_ptr;
+	tx->head_dma_ptr = dma_ptr;
+	if (tx->head_dma_ptr & 0x3) {
+		ret = -ENOMEM;
+		goto cleanup;
+	}
+
+	return 0;
+
+cleanup:
+	lan743x_tx_ring_cleanup(tx);
+	return ret;
+}
+
+static void lan743x_tx_close(struct lan743x_tx *tx)
+{
+	struct lan743x_adapter *adapter = tx->adapter;
+
+	lan743x_csr_write(adapter,
+			  DMAC_CMD,
+			  DMAC_CMD_STOP_T_(tx->channel_number));
+	lan743x_dmac_tx_wait_till_stopped(adapter, tx->channel_number);
+
+	lan743x_csr_write(adapter,
+			  DMAC_INT_EN_CLR,
+			  DMAC_INT_BIT_TX_IOC_(tx->channel_number));
+	lan743x_csr_write(adapter, INT_EN_CLR,
+			  INT_BIT_DMA_TX_(tx->channel_number));
+	napi_disable(&tx->napi);
+	netif_napi_del(&tx->napi);
+
+	lan743x_csr_write(adapter, FCT_TX_CTL,
+			  FCT_TX_CTL_DIS_(tx->channel_number));
+	lan743x_csr_wait_for_bit(adapter, FCT_TX_CTL,
+				 FCT_TX_CTL_EN_(tx->channel_number),
+				 0, 1000, 20000, 100);
+
+	lan743x_tx_release_all_descriptors(tx);
+
+	if (tx->overflow_skb) {
+		dev_kfree_skb(tx->overflow_skb);
+		tx->overflow_skb = NULL;
+	}
+
+	lan743x_tx_ring_cleanup(tx);
+}
+
+static int lan743x_tx_open(struct lan743x_tx *tx)
+{
+	struct lan743x_adapter *adapter = NULL;
+	u32 data = 0;
+	int ret;
+
+	adapter = tx->adapter;
+	ret = lan743x_tx_ring_init(tx);
+	if (ret)
+		return ret;
+
+	/* initialize fifo */
+	lan743x_csr_write(adapter, FCT_TX_CTL,
+			  FCT_TX_CTL_RESET_(tx->channel_number));
+	lan743x_csr_wait_for_bit(adapter, FCT_TX_CTL,
+				 FCT_TX_CTL_RESET_(tx->channel_number),
+				 0, 1000, 20000, 100);
+
+	/* enable fifo */
+	lan743x_csr_write(adapter, FCT_TX_CTL,
+			  FCT_TX_CTL_EN_(tx->channel_number));
+
+	/* reset tx channel */
+	lan743x_csr_write(adapter, DMAC_CMD,
+			  DMAC_CMD_TX_SWR_(tx->channel_number));
+	lan743x_csr_wait_for_bit(adapter, DMAC_CMD,
+				 DMAC_CMD_TX_SWR_(tx->channel_number),
+				 0, 1000, 20000, 100);
+
+	/* Write TX_BASE_ADDR */
+	lan743x_csr_write(adapter,
+			  TX_BASE_ADDRH(tx->channel_number),
+			  DMA_ADDR_HIGH32(tx->ring_dma_ptr));
+	lan743x_csr_write(adapter,
+			  TX_BASE_ADDRL(tx->channel_number),
+			  DMA_ADDR_LOW32(tx->ring_dma_ptr));
+
+	/* Write TX_CFG_B */
+	data = lan743x_csr_read(adapter, TX_CFG_B(tx->channel_number));
+	data &= ~TX_CFG_B_TX_RING_LEN_MASK_;
+	data |= ((tx->ring_size) & TX_CFG_B_TX_RING_LEN_MASK_);
+	if (!(adapter->csr.flags & LAN743X_CSR_FLAG_IS_A0))
+		data |= TX_CFG_B_TDMABL_512_;
+	lan743x_csr_write(adapter, TX_CFG_B(tx->channel_number), data);
+
+	/* Write TX_CFG_A */
+	data = TX_CFG_A_TX_TMR_HPWB_SEL_IOC_ | TX_CFG_A_TX_HP_WB_EN_;
+	if (!(adapter->csr.flags & LAN743X_CSR_FLAG_IS_A0)) {
+		data |= TX_CFG_A_TX_HP_WB_ON_INT_TMR_;
+		data |= TX_CFG_A_TX_PF_THRES_SET_(0x10);
+		data |= TX_CFG_A_TX_PF_PRI_THRES_SET_(0x04);
+		data |= TX_CFG_A_TX_HP_WB_THRES_SET_(0x07);
+	}
+	lan743x_csr_write(adapter, TX_CFG_A(tx->channel_number), data);
+
+	/* Write TX_HEAD_WRITEBACK_ADDR */
+	lan743x_csr_write(adapter,
+			  TX_HEAD_WRITEBACK_ADDRH(tx->channel_number),
+			  DMA_ADDR_HIGH32(tx->head_dma_ptr));
+	lan743x_csr_write(adapter,
+			  TX_HEAD_WRITEBACK_ADDRL(tx->channel_number),
+			  DMA_ADDR_LOW32(tx->head_dma_ptr));
+
+	/* set last head */
+	tx->last_head = lan743x_csr_read(adapter, TX_HEAD(tx->channel_number));
+
+	/* write TX_TAIL */
+	tx->last_tail = 0;
+	lan743x_csr_write(adapter, TX_TAIL(tx->channel_number),
+			  (u32)(tx->last_tail));
+	tx->vector_flags = lan743x_intr_get_vector_flags(adapter,
+							 INT_BIT_DMA_TX_
+							 (tx->channel_number));
+	netif_napi_add(adapter->netdev,
+		       &tx->napi, lan743x_tx_napi_poll,
+		       tx->ring_size - 1);
+	napi_enable(&tx->napi);
+
+	data = 0;
+	if (tx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_CLEAR)
+		data |= TX_CFG_C_TX_TOP_INT_EN_AUTO_CLR_;
+	if (tx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_STATUS_AUTO_CLEAR)
+		data |= TX_CFG_C_TX_DMA_INT_STS_AUTO_CLR_;
+	if (tx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_STATUS_R2C)
+		data |= TX_CFG_C_TX_INT_STS_R2C_MODE_MASK_;
+	if (tx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_R2C)
+		data |= TX_CFG_C_TX_INT_EN_R2C_;
+	lan743x_csr_write(adapter, TX_CFG_C(tx->channel_number), data);
+
+	if (!(tx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_SET))
+		lan743x_csr_write(adapter, INT_EN_SET,
+				  INT_BIT_DMA_TX_(tx->channel_number));
+	lan743x_csr_write(adapter, DMAC_INT_EN_SET,
+			  DMAC_INT_BIT_TX_IOC_(tx->channel_number));
+
+	/*  start dmac channel */
+	lan743x_csr_write(adapter, DMAC_CMD,
+			  DMAC_CMD_START_T_(tx->channel_number));
+	return 0;
+}
+
+static int lan743x_rx_next_index(struct lan743x_rx *rx, int index)
+{
+	return ((++index) % rx->ring_size);
+}
+
+static int lan743x_rx_allocate_ring_element(struct lan743x_rx *rx, int index)
+{
+	struct lan743x_rx_buffer_info *buffer_info;
+	struct lan743x_rx_descriptor *descriptor;
+	int length = 0;
+
+	length = (LAN743X_MAX_FRAME_SIZE + ETH_HLEN + 4 + RX_HEAD_PADDING);
+	descriptor = &rx->ring_cpu_ptr[index];
+	buffer_info = &rx->buffer_info[index];
+	buffer_info->skb = __netdev_alloc_skb(rx->adapter->netdev,
+					      length,
+					      GFP_ATOMIC | GFP_DMA);
+	if (!(buffer_info->skb))
+		return -ENOMEM;
+	buffer_info->dma_ptr = dma_map_single(&rx->adapter->pdev->dev,
+					      buffer_info->skb->data,
+					      length,
+					      DMA_FROM_DEVICE);
+	if (dma_mapping_error(&rx->adapter->pdev->dev,
+			      buffer_info->dma_ptr)) {
+		buffer_info->dma_ptr = 0;
+		return -ENOMEM;
+	}
+
+	buffer_info->buffer_length = length;
+	descriptor->data1 = DMA_ADDR_LOW32(buffer_info->dma_ptr);
+	descriptor->data2 = DMA_ADDR_HIGH32(buffer_info->dma_ptr);
+	descriptor->data3 = 0;
+	descriptor->data0 = (RX_DESC_DATA0_OWN_ |
+			    (length & RX_DESC_DATA0_BUF_LENGTH_MASK_));
+	skb_reserve(buffer_info->skb, RX_HEAD_PADDING);
+
+	return 0;
+}
+
+static void lan743x_rx_reuse_ring_element(struct lan743x_rx *rx, int index)
+{
+	struct lan743x_rx_buffer_info *buffer_info;
+	struct lan743x_rx_descriptor *descriptor;
+
+	descriptor = &rx->ring_cpu_ptr[index];
+	buffer_info = &rx->buffer_info[index];
+
+	descriptor->data1 = DMA_ADDR_LOW32(buffer_info->dma_ptr);
+	descriptor->data2 = DMA_ADDR_HIGH32(buffer_info->dma_ptr);
+	descriptor->data3 = 0;
+	descriptor->data0 = (RX_DESC_DATA0_OWN_ |
+			    ((buffer_info->buffer_length) &
+			    RX_DESC_DATA0_BUF_LENGTH_MASK_));
+}
+
+static void lan743x_rx_release_ring_element(struct lan743x_rx *rx, int index)
+{
+	struct lan743x_rx_buffer_info *buffer_info;
+	struct lan743x_rx_descriptor *descriptor;
+
+	descriptor = &rx->ring_cpu_ptr[index];
+	buffer_info = &rx->buffer_info[index];
+
+	memset(descriptor, 0, sizeof(*descriptor));
+
+	if (buffer_info->dma_ptr) {
+		dma_unmap_single(&rx->adapter->pdev->dev,
+				 buffer_info->dma_ptr,
+				 buffer_info->buffer_length,
+				 DMA_FROM_DEVICE);
+		buffer_info->dma_ptr = 0;
+	}
+
+	if (buffer_info->skb) {
+		dev_kfree_skb(buffer_info->skb);
+		buffer_info->skb = NULL;
+	}
+
+	memset(buffer_info, 0, sizeof(*buffer_info));
+}
+
+static int lan743x_rx_process_packet(struct lan743x_rx *rx)
+{
+	struct skb_shared_hwtstamps *hwtstamps = NULL;
+	int result = RX_PROCESS_RESULT_NOTHING_TO_DO;
+	struct lan743x_rx_buffer_info *buffer_info;
+	struct lan743x_rx_descriptor *descriptor;
+	int current_head_index = -1;
+	int extension_index = -1;
+	int first_index = -1;
+	int last_index = -1;
+
+	current_head_index = *rx->head_cpu_ptr;
+	if (current_head_index < 0 || current_head_index >= rx->ring_size)
+		goto done;
+
+	if (rx->last_head < 0 || rx->last_head >= rx->ring_size)
+		goto done;
+
+	if (rx->last_head != current_head_index) {
+		descriptor = &rx->ring_cpu_ptr[rx->last_head];
+		if (descriptor->data0 & RX_DESC_DATA0_OWN_)
+			goto done;
+
+		if (!(descriptor->data0 & RX_DESC_DATA0_FS_))
+			goto done;
+
+		first_index = rx->last_head;
+		if (descriptor->data0 & RX_DESC_DATA0_LS_) {
+			last_index = rx->last_head;
+		} else {
+			int index;
+
+			index = lan743x_rx_next_index(rx, first_index);
+			while (index != current_head_index) {
+				descriptor = &rx->ring_cpu_ptr[index];
+				if (descriptor->data0 & RX_DESC_DATA0_OWN_)
+					goto done;
+
+				if (descriptor->data0 & RX_DESC_DATA0_LS_) {
+					last_index = index;
+					break;
+				}
+				index = lan743x_rx_next_index(rx, index);
+			}
+		}
+		if (last_index >= 0) {
+			descriptor = &rx->ring_cpu_ptr[last_index];
+			if (descriptor->data0 & RX_DESC_DATA0_EXT_) {
+				/* extension is expected to follow */
+				int index = lan743x_rx_next_index(rx,
+								  last_index);
+				if (index != current_head_index) {
+					descriptor = &rx->ring_cpu_ptr[index];
+					if (descriptor->data0 &
+					    RX_DESC_DATA0_OWN_) {
+						goto done;
+					}
+					if (descriptor->data0 &
+					    RX_DESC_DATA0_EXT_) {
+						extension_index = index;
+					} else {
+						goto done;
+					}
+				} else {
+					/* extension is not yet available */
+					/* prevent processing of this packet */
+					first_index = -1;
+					last_index = -1;
+				}
+			}
+		}
+	}
+	if (first_index >= 0 && last_index >= 0) {
+		int real_last_index = last_index;
+		struct sk_buff *skb = NULL;
+		u32 ts_sec = 0;
+		u32 ts_nsec = 0;
+
+		/* packet is available */
+		if (first_index == last_index) {
+			/* single buffer packet */
+			int packet_length;
+
+			buffer_info = &rx->buffer_info[first_index];
+			skb = buffer_info->skb;
+			descriptor = &rx->ring_cpu_ptr[first_index];
+
+			/* unmap from dma */
+			if (buffer_info->dma_ptr) {
+				dma_unmap_single(&rx->adapter->pdev->dev,
+						 buffer_info->dma_ptr,
+						 buffer_info->buffer_length,
+						 DMA_FROM_DEVICE);
+				buffer_info->dma_ptr = 0;
+				buffer_info->buffer_length = 0;
+			}
+			buffer_info->skb = NULL;
+			packet_length =	RX_DESC_DATA0_FRAME_LENGTH_GET_
+					(descriptor->data0);
+			skb_put(skb, packet_length - 4);
+			skb->protocol = eth_type_trans(skb,
+						       rx->adapter->netdev);
+			lan743x_rx_allocate_ring_element(rx, first_index);
+		} else {
+			int index = first_index;
+
+			/* multi buffer packet not supported */
+			/* this should not happen since
+			 * buffers are allocated to be at least jumbo size
+			 */
+
+			/* clean up buffers */
+			if (first_index <= last_index) {
+				while ((index >= first_index) &&
+				       (index <= last_index)) {
+					lan743x_rx_release_ring_element(rx,
+									index);
+					lan743x_rx_allocate_ring_element(rx,
+									 index);
+					index = lan743x_rx_next_index(rx,
+								      index);
+				}
+			} else {
+				while ((index >= first_index) ||
+				       (index <= last_index)) {
+					lan743x_rx_release_ring_element(rx,
+									index);
+					lan743x_rx_allocate_ring_element(rx,
+									 index);
+					index = lan743x_rx_next_index(rx,
+								      index);
+				}
+			}
+		}
+
+		if (extension_index >= 0) {
+			descriptor = &rx->ring_cpu_ptr[extension_index];
+			buffer_info = &rx->buffer_info[extension_index];
+
+			ts_sec = descriptor->data1;
+			ts_nsec = (descriptor->data2 &
+				  RX_DESC_DATA2_TS_NS_MASK_);
+			lan743x_rx_reuse_ring_element(rx, extension_index);
+			real_last_index = extension_index;
+		}
+
+		if (!skb) {
+			result = RX_PROCESS_RESULT_PACKET_DROPPED;
+			goto move_forward;
+		}
+
+		if (extension_index < 0)
+			goto pass_packet_to_os;
+		hwtstamps = skb_hwtstamps(skb);
+		if (hwtstamps)
+			hwtstamps->hwtstamp = ktime_set(ts_sec, ts_nsec);
+
+pass_packet_to_os:
+		/* pass packet to OS */
+		napi_gro_receive(&rx->napi, skb);
+		result = RX_PROCESS_RESULT_PACKET_RECEIVED;
+
+move_forward:
+		/* push tail and head forward */
+		rx->last_tail = real_last_index;
+		rx->last_head = lan743x_rx_next_index(rx, real_last_index);
+	}
+done:
+	return result;
+}
+
+static int lan743x_rx_napi_poll(struct napi_struct *napi, int weight)
+{
+	struct lan743x_rx *rx = container_of(napi, struct lan743x_rx, napi);
+	struct lan743x_adapter *adapter = rx->adapter;
+	u32 rx_tail_flags = 0;
+	int count;
+
+	if (rx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_STATUS_W2C) {
+		/* clear int status bit before reading packet */
+		lan743x_csr_write(adapter, DMAC_INT_STS,
+				  DMAC_INT_BIT_RXFRM_(rx->channel_number));
+	}
+	count = 0;
+	while (count < weight) {
+		int rx_process_result = -1;
+
+		rx_process_result = lan743x_rx_process_packet(rx);
+		if (rx_process_result == RX_PROCESS_RESULT_PACKET_RECEIVED) {
+			count++;
+		} else if (rx_process_result ==
+			RX_PROCESS_RESULT_NOTHING_TO_DO) {
+			break;
+		} else if (rx_process_result ==
+			RX_PROCESS_RESULT_PACKET_DROPPED) {
+			continue;
+		}
+	}
+	rx->frame_count += count;
+	if (count == weight)
+		goto done;
+
+	if (!napi_complete_done(napi, count))
+		goto done;
+
+	if (rx->vector_flags & LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_SET)
+		rx_tail_flags |= RX_TAIL_SET_TOP_INT_VEC_EN_;
+	if (rx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_SET) {
+		rx_tail_flags |= RX_TAIL_SET_TOP_INT_EN_;
+	} else {
+		lan743x_csr_write(adapter, INT_EN_SET,
+				  INT_BIT_DMA_RX_(rx->channel_number));
+	}
+
+	/* update RX_TAIL */
+	lan743x_csr_write(adapter, RX_TAIL(rx->channel_number),
+			  rx_tail_flags | rx->last_tail);
+done:
+	return count;
+}
+
+static void lan743x_rx_ring_cleanup(struct lan743x_rx *rx)
+{
+	if (rx->buffer_info && rx->ring_cpu_ptr) {
+		int index;
+
+		for (index = 0; index < rx->ring_size; index++)
+			lan743x_rx_release_ring_element(rx, index);
+	}
+
+	if (rx->head_cpu_ptr) {
+		pci_free_consistent(rx->adapter->pdev,
+				    sizeof(*rx->head_cpu_ptr),
+				    rx->head_cpu_ptr,
+				    rx->head_dma_ptr);
+		rx->head_cpu_ptr = NULL;
+		rx->head_dma_ptr = 0;
+	}
+
+	kfree(rx->buffer_info);
+	rx->buffer_info = NULL;
+
+	if (rx->ring_cpu_ptr) {
+		pci_free_consistent(rx->adapter->pdev,
+				    rx->ring_allocation_size,
+				    rx->ring_cpu_ptr,
+				    rx->ring_dma_ptr);
+		rx->ring_allocation_size = 0;
+		rx->ring_cpu_ptr = NULL;
+		rx->ring_dma_ptr = 0;
+	}
+
+	rx->ring_size = 0;
+	rx->last_head = 0;
+}
+
+static int lan743x_rx_ring_init(struct lan743x_rx *rx)
+{
+	size_t ring_allocation_size = 0;
+	dma_addr_t dma_ptr = 0;
+	void *cpu_ptr = NULL;
+	int ret = -ENOMEM;
+	int index = 0;
+
+	rx->ring_size = LAN743X_RX_RING_SIZE;
+	if (rx->ring_size <= 1) {
+		ret = -EINVAL;
+		goto cleanup;
+	}
+	if (rx->ring_size & ~RX_CFG_B_RX_RING_LEN_MASK_) {
+		ret = -EINVAL;
+		goto cleanup;
+	}
+	ring_allocation_size = ALIGN(rx->ring_size *
+				     sizeof(struct lan743x_rx_descriptor),
+				     PAGE_SIZE);
+	dma_ptr = 0;
+	cpu_ptr = pci_zalloc_consistent(rx->adapter->pdev,
+					ring_allocation_size, &dma_ptr);
+	if (!cpu_ptr) {
+		ret = -ENOMEM;
+		goto cleanup;
+	}
+	rx->ring_allocation_size = ring_allocation_size;
+	rx->ring_cpu_ptr = (struct lan743x_rx_descriptor *)cpu_ptr;
+	rx->ring_dma_ptr = dma_ptr;
+
+	cpu_ptr = kcalloc(rx->ring_size, sizeof(*rx->buffer_info),
+			  GFP_KERNEL);
+	if (!cpu_ptr) {
+		ret = -ENOMEM;
+		goto cleanup;
+	}
+	rx->buffer_info = (struct lan743x_rx_buffer_info *)cpu_ptr;
+	dma_ptr = 0;
+	cpu_ptr = pci_zalloc_consistent(rx->adapter->pdev,
+					sizeof(*rx->head_cpu_ptr), &dma_ptr);
+	if (!cpu_ptr) {
+		ret = -ENOMEM;
+		goto cleanup;
+	}
+
+	rx->head_cpu_ptr = cpu_ptr;
+	rx->head_dma_ptr = dma_ptr;
+	if (rx->head_dma_ptr & 0x3) {
+		ret = -ENOMEM;
+		goto cleanup;
+	}
+
+	rx->last_head = 0;
+	for (index = 0; index < rx->ring_size; index++) {
+		ret = lan743x_rx_allocate_ring_element(rx, index);
+		if (ret)
+			goto cleanup;
+	}
+	return 0;
+
+cleanup:
+	lan743x_rx_ring_cleanup(rx);
+	return ret;
+}
+
+static void lan743x_rx_close(struct lan743x_rx *rx)
+{
+	struct lan743x_adapter *adapter = rx->adapter;
+
+	lan743x_csr_write(adapter, FCT_RX_CTL,
+			  FCT_RX_CTL_DIS_(rx->channel_number));
+	lan743x_csr_wait_for_bit(adapter, FCT_RX_CTL,
+				 FCT_RX_CTL_EN_(rx->channel_number),
+				 0, 1000, 20000, 100);
+
+	lan743x_csr_write(adapter, DMAC_CMD,
+			  DMAC_CMD_STOP_R_(rx->channel_number));
+	lan743x_dmac_rx_wait_till_stopped(adapter, rx->channel_number);
+
+	lan743x_csr_write(adapter, DMAC_INT_EN_CLR,
+			  DMAC_INT_BIT_RXFRM_(rx->channel_number));
+	lan743x_csr_write(adapter, INT_EN_CLR,
+			  INT_BIT_DMA_RX_(rx->channel_number));
+	napi_disable(&rx->napi);
+
+	netif_napi_del(&rx->napi);
+
+	lan743x_rx_ring_cleanup(rx);
+}
+
+static int lan743x_rx_open(struct lan743x_rx *rx)
+{
+	struct lan743x_adapter *adapter = rx->adapter;
+	u32 data = 0;
+	int ret;
+
+	rx->frame_count = 0;
+	ret = lan743x_rx_ring_init(rx);
+	if (ret)
+		goto return_error;
+
+	netif_napi_add(adapter->netdev,
+		       &rx->napi, lan743x_rx_napi_poll,
+		       rx->ring_size - 1);
+
+	lan743x_csr_write(adapter, DMAC_CMD,
+			  DMAC_CMD_RX_SWR_(rx->channel_number));
+	lan743x_csr_wait_for_bit(adapter, DMAC_CMD,
+				 DMAC_CMD_RX_SWR_(rx->channel_number),
+				 0, 1000, 20000, 100);
+
+	/* set ring base address */
+	lan743x_csr_write(adapter,
+			  RX_BASE_ADDRH(rx->channel_number),
+			  DMA_ADDR_HIGH32(rx->ring_dma_ptr));
+	lan743x_csr_write(adapter,
+			  RX_BASE_ADDRL(rx->channel_number),
+			  DMA_ADDR_LOW32(rx->ring_dma_ptr));
+
+	/* set rx write back address */
+	lan743x_csr_write(adapter,
+			  RX_HEAD_WRITEBACK_ADDRH(rx->channel_number),
+			  DMA_ADDR_HIGH32(rx->head_dma_ptr));
+	lan743x_csr_write(adapter,
+			  RX_HEAD_WRITEBACK_ADDRL(rx->channel_number),
+			  DMA_ADDR_LOW32(rx->head_dma_ptr));
+	data = RX_CFG_A_RX_HP_WB_EN_;
+	if (!(adapter->csr.flags & LAN743X_CSR_FLAG_IS_A0)) {
+		data |= (RX_CFG_A_RX_WB_ON_INT_TMR_ |
+			RX_CFG_A_RX_WB_THRES_SET_(0x7) |
+			RX_CFG_A_RX_PF_THRES_SET_(16) |
+			RX_CFG_A_RX_PF_PRI_THRES_SET_(4));
+	}
+
+	/* set RX_CFG_A */
+	lan743x_csr_write(adapter,
+			  RX_CFG_A(rx->channel_number), data);
+
+	/* set RX_CFG_B */
+	data = lan743x_csr_read(adapter, RX_CFG_B(rx->channel_number));
+	data &= ~RX_CFG_B_RX_PAD_MASK_;
+	if (!RX_HEAD_PADDING)
+		data |= RX_CFG_B_RX_PAD_0_;
+	else
+		data |= RX_CFG_B_RX_PAD_2_;
+	data &= ~RX_CFG_B_RX_RING_LEN_MASK_;
+	data |= ((rx->ring_size) & RX_CFG_B_RX_RING_LEN_MASK_);
+	data |= RX_CFG_B_TS_ALL_RX_;
+	if (!(adapter->csr.flags & LAN743X_CSR_FLAG_IS_A0))
+		data |= RX_CFG_B_RDMABL_512_;
+
+	lan743x_csr_write(adapter, RX_CFG_B(rx->channel_number), data);
+	rx->vector_flags = lan743x_intr_get_vector_flags(adapter,
+							 INT_BIT_DMA_RX_
+							 (rx->channel_number));
+
+	/* set RX_CFG_C */
+	data = 0;
+	if (rx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_CLEAR)
+		data |= RX_CFG_C_RX_TOP_INT_EN_AUTO_CLR_;
+	if (rx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_STATUS_AUTO_CLEAR)
+		data |= RX_CFG_C_RX_DMA_INT_STS_AUTO_CLR_;
+	if (rx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_STATUS_R2C)
+		data |= RX_CFG_C_RX_INT_STS_R2C_MODE_MASK_;
+	if (rx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_R2C)
+		data |= RX_CFG_C_RX_INT_EN_R2C_;
+	lan743x_csr_write(adapter, RX_CFG_C(rx->channel_number), data);
+
+	rx->last_tail = ((u32)(rx->ring_size - 1));
+	lan743x_csr_write(adapter, RX_TAIL(rx->channel_number),
+			  rx->last_tail);
+	rx->last_head = lan743x_csr_read(adapter, RX_HEAD(rx->channel_number));
+	if (rx->last_head) {
+		ret = -EIO;
+		goto napi_delete;
+	}
+
+	napi_enable(&rx->napi);
+
+	lan743x_csr_write(adapter, INT_EN_SET,
+			  INT_BIT_DMA_RX_(rx->channel_number));
+	lan743x_csr_write(adapter, DMAC_INT_STS,
+			  DMAC_INT_BIT_RXFRM_(rx->channel_number));
+	lan743x_csr_write(adapter, DMAC_INT_EN_SET,
+			  DMAC_INT_BIT_RXFRM_(rx->channel_number));
+	lan743x_csr_write(adapter, DMAC_CMD,
+			  DMAC_CMD_START_R_(rx->channel_number));
+
+	/* initialize fifo */
+	lan743x_csr_write(adapter, FCT_RX_CTL,
+			  FCT_RX_CTL_RESET_(rx->channel_number));
+	lan743x_csr_wait_for_bit(adapter, FCT_RX_CTL,
+				 FCT_RX_CTL_RESET_(rx->channel_number),
+				 0, 1000, 20000, 100);
+	lan743x_csr_write(adapter, FCT_FLOW(rx->channel_number),
+			  FCT_FLOW_CTL_REQ_EN_ |
+			  FCT_FLOW_CTL_ON_THRESHOLD_SET_(0x2A) |
+			  FCT_FLOW_CTL_OFF_THRESHOLD_SET_(0xA));
+
+	/* enable fifo */
+	lan743x_csr_write(adapter, FCT_RX_CTL,
+			  FCT_RX_CTL_EN_(rx->channel_number));
+	return 0;
+
+napi_delete:
+	netif_napi_del(&rx->napi);
+	lan743x_rx_ring_cleanup(rx);
+
+return_error:
+	return ret;
+}
+
+static int lan743x_netdev_close(struct net_device *netdev)
+{
+	struct lan743x_adapter *adapter = netdev_priv(netdev);
+	int index;
+
+	lan743x_tx_close(&adapter->tx[0]);
+
+	for (index = 0; index < LAN743X_USED_RX_CHANNELS; index++)
+		lan743x_rx_close(&adapter->rx[index]);
+
+	lan743x_phy_close(adapter);
+
+	lan743x_mac_close(adapter);
+
+	lan743x_intr_close(adapter);
+
+	return 0;
+}
+
+static int lan743x_netdev_open(struct net_device *netdev)
+{
+	struct lan743x_adapter *adapter = netdev_priv(netdev);
+	int index;
+	int ret;
+
+	ret = lan743x_intr_open(adapter);
+	if (ret)
+		goto return_error;
+
+	ret = lan743x_mac_open(adapter);
+	if (ret)
+		goto close_intr;
+
+	ret = lan743x_phy_open(adapter);
+	if (ret)
+		goto close_mac;
+
+	for (index = 0; index < LAN743X_USED_RX_CHANNELS; index++) {
+		ret = lan743x_rx_open(&adapter->rx[index]);
+		if (ret)
+			goto close_rx;
+	}
+
+	ret = lan743x_tx_open(&adapter->tx[0]);
+	if (ret)
+		goto close_rx;
+
+	return 0;
+
+close_rx:
+	for (index = 0; index < LAN743X_USED_RX_CHANNELS; index++) {
+		if (adapter->rx[index].ring_cpu_ptr)
+			lan743x_rx_close(&adapter->rx[index]);
+	}
+	lan743x_phy_close(adapter);
+
+close_mac:
+	lan743x_mac_close(adapter);
+
+close_intr:
+	lan743x_intr_close(adapter);
+
+return_error:
+	netif_warn(adapter, ifup, adapter->netdev,
+		   "Error opening LAN743x\n");
+	return ret;
+}
+
+static netdev_tx_t lan743x_netdev_xmit_frame(struct sk_buff *skb,
+					     struct net_device *netdev)
+{
+	struct lan743x_adapter *adapter = netdev_priv(netdev);
+
+	return lan743x_tx_xmit_frame(&adapter->tx[0], skb);
+}
+
+static int lan743x_netdev_ioctl(struct net_device *netdev,
+				struct ifreq *ifr, int cmd)
+{
+	if (!netif_running(netdev))
+		return -EINVAL;
+	return phy_mii_ioctl(netdev->phydev, ifr, cmd);
+}
+
+static void lan743x_netdev_set_multicast(struct net_device *netdev)
+{
+	struct lan743x_adapter *adapter = netdev_priv(netdev);
+
+	lan743x_rfe_set_multicast(adapter);
+}
+
+static int lan743x_netdev_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	struct lan743x_adapter *adapter = netdev_priv(netdev);
+	int ret = 0;
+
+	ret = lan743x_mac_set_mtu(adapter, new_mtu);
+	if (!ret)
+		netdev->mtu = new_mtu;
+	return ret;
+}
+
+static void lan743x_netdev_get_stats64(struct net_device *netdev,
+				       struct rtnl_link_stats64 *stats)
+{
+	struct lan743x_adapter *adapter = netdev_priv(netdev);
+
+	stats->rx_packets = lan743x_csr_read(adapter, STAT_RX_TOTAL_FRAMES);
+	stats->tx_packets = lan743x_csr_read(adapter, STAT_TX_TOTAL_FRAMES);
+	stats->rx_bytes = lan743x_csr_read(adapter,
+					   STAT_RX_UNICAST_BYTE_COUNT) +
+			  lan743x_csr_read(adapter,
+					   STAT_RX_BROADCAST_BYTE_COUNT) +
+			  lan743x_csr_read(adapter,
+					   STAT_RX_MULTICAST_BYTE_COUNT);
+	stats->tx_bytes = lan743x_csr_read(adapter,
+					   STAT_TX_UNICAST_BYTE_COUNT) +
+			  lan743x_csr_read(adapter,
+					   STAT_TX_BROADCAST_BYTE_COUNT) +
+			  lan743x_csr_read(adapter,
+					   STAT_TX_MULTICAST_BYTE_COUNT);
+	stats->rx_errors = lan743x_csr_read(adapter, STAT_RX_FCS_ERRORS) +
+			   lan743x_csr_read(adapter,
+					    STAT_RX_ALIGNMENT_ERRORS) +
+			   lan743x_csr_read(adapter, STAT_RX_JABBER_ERRORS) +
+			   lan743x_csr_read(adapter,
+					    STAT_RX_UNDERSIZE_FRAME_ERRORS) +
+			   lan743x_csr_read(adapter,
+					    STAT_RX_OVERSIZE_FRAME_ERRORS);
+	stats->tx_errors = lan743x_csr_read(adapter, STAT_TX_FCS_ERRORS) +
+			   lan743x_csr_read(adapter,
+					    STAT_TX_EXCESS_DEFERRAL_ERRORS) +
+			   lan743x_csr_read(adapter, STAT_TX_CARRIER_ERRORS);
+	stats->rx_dropped = lan743x_csr_read(adapter,
+					     STAT_RX_DROPPED_FRAMES);
+	stats->tx_dropped = lan743x_csr_read(adapter,
+					     STAT_TX_EXCESSIVE_COLLISION);
+	stats->multicast = lan743x_csr_read(adapter,
+					    STAT_RX_MULTICAST_FRAMES) +
+			   lan743x_csr_read(adapter,
+					    STAT_TX_MULTICAST_FRAMES);
+	stats->collisions = lan743x_csr_read(adapter,
+					     STAT_TX_SINGLE_COLLISIONS) +
+			    lan743x_csr_read(adapter,
+					     STAT_TX_MULTIPLE_COLLISIONS) +
+			    lan743x_csr_read(adapter,
+					     STAT_TX_LATE_COLLISIONS);
+}
+
+static int lan743x_netdev_set_mac_address(struct net_device *netdev,
+					  void *addr)
+{
+	struct lan743x_adapter *adapter = netdev_priv(netdev);
+	struct sockaddr *sock_addr = addr;
+	int ret;
+
+	ret = eth_prepare_mac_addr_change(netdev, sock_addr);
+	if (ret)
+		return ret;
+	ether_addr_copy(netdev->dev_addr, sock_addr->sa_data);
+	lan743x_mac_set_address(adapter, sock_addr->sa_data);
+	lan743x_rfe_update_mac_address(adapter);
+	return 0;
+}
+
+static const struct net_device_ops lan743x_netdev_ops = {
+	.ndo_open		= lan743x_netdev_open,
+	.ndo_stop		= lan743x_netdev_close,
+	.ndo_start_xmit		= lan743x_netdev_xmit_frame,
+	.ndo_do_ioctl		= lan743x_netdev_ioctl,
+	.ndo_set_rx_mode	= lan743x_netdev_set_multicast,
+	.ndo_change_mtu		= lan743x_netdev_change_mtu,
+	.ndo_get_stats64	= lan743x_netdev_get_stats64,
+	.ndo_set_mac_address	= lan743x_netdev_set_mac_address,
+};
+
+static void lan743x_hardware_cleanup(struct lan743x_adapter *adapter)
+{
+	lan743x_csr_write(adapter, INT_EN_CLR, 0xFFFFFFFF);
+}
+
+static void lan743x_mdiobus_cleanup(struct lan743x_adapter *adapter)
+{
+	mdiobus_unregister(adapter->mdiobus);
+}
+
+static void lan743x_full_cleanup(struct lan743x_adapter *adapter)
+{
+	unregister_netdev(adapter->netdev);
+
+	lan743x_mdiobus_cleanup(adapter);
+	lan743x_hardware_cleanup(adapter);
+	lan743x_pci_cleanup(adapter);
+}
+
+static int lan743x_hardware_init(struct lan743x_adapter *adapter,
+				 struct pci_dev *pdev)
+{
+	struct lan743x_tx *tx;
+	int index;
+	int ret;
+
+	adapter->intr.irq = adapter->pdev->irq;
+	lan743x_csr_write(adapter, INT_EN_CLR, 0xFFFFFFFF);
+	mutex_init(&adapter->dp_lock);
+	ret = lan743x_mac_init(adapter);
+	if (ret)
+		return ret;
+
+	ret = lan743x_phy_init(adapter);
+	if (ret)
+		return ret;
+
+	lan743x_rfe_update_mac_address(adapter);
+
+	ret = lan743x_dmac_init(adapter);
+	if (ret)
+		return ret;
+
+	for (index = 0; index < LAN743X_USED_RX_CHANNELS; index++) {
+		adapter->rx[index].adapter = adapter;
+		adapter->rx[index].channel_number = index;
+	}
+
+	tx = &adapter->tx[0];
+	tx->adapter = adapter;
+	tx->channel_number = 0;
+	spin_lock_init(&tx->ring_lock);
+	return 0;
+}
+
+static int lan743x_mdiobus_init(struct lan743x_adapter *adapter)
+{
+	int ret;
+
+	adapter->mdiobus = devm_mdiobus_alloc(&adapter->pdev->dev);
+	if (!(adapter->mdiobus)) {
+		ret = -ENOMEM;
+		goto return_error;
+	}
+
+	adapter->mdiobus->priv = (void *)adapter;
+	adapter->mdiobus->read = lan743x_mdiobus_read;
+	adapter->mdiobus->write = lan743x_mdiobus_write;
+	adapter->mdiobus->name = "lan743x-mdiobus";
+	snprintf(adapter->mdiobus->id, MII_BUS_ID_SIZE,
+		 "pci-%s", pci_name(adapter->pdev));
+
+	/* set to internal PHY id */
+	adapter->mdiobus->phy_mask = ~(u32)BIT(1);
+
+	/* register mdiobus */
+	ret = mdiobus_register(adapter->mdiobus);
+	if (ret < 0)
+		goto return_error;
+	return 0;
+
+return_error:
+	return ret;
+}
+
+/* lan743x_pcidev_probe - Device Initialization Routine
+ * @pdev: PCI device information struct
+ * @id: entry in lan743x_pci_tbl
+ *
+ * Returns 0 on success, negative on failure
+ *
+ * initializes an adapter identified by a pci_dev structure.
+ * The OS initialization, configuring of the adapter private structure,
+ * and a hardware reset occur.
+ **/
+static int lan743x_pcidev_probe(struct pci_dev *pdev,
+				const struct pci_device_id *id)
+{
+	struct lan743x_adapter *adapter = NULL;
+	struct net_device *netdev = NULL;
+	int ret = -ENODEV;
+
+	netdev = devm_alloc_etherdev(&pdev->dev,
+				     sizeof(struct lan743x_adapter));
+	if (!netdev)
+		goto return_error;
+
+	SET_NETDEV_DEV(netdev, &pdev->dev);
+	pci_set_drvdata(pdev, netdev);
+	adapter = netdev_priv(netdev);
+	adapter->netdev = netdev;
+	adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE |
+			      NETIF_MSG_LINK | NETIF_MSG_IFUP |
+			      NETIF_MSG_IFDOWN | NETIF_MSG_TX_QUEUED;
+	netdev->max_mtu = LAN743X_MAX_FRAME_SIZE;
+
+	ret = lan743x_pci_init(adapter, pdev);
+	if (ret)
+		goto return_error;
+
+	ret = lan743x_csr_init(adapter);
+	if (ret)
+		goto cleanup_pci;
+
+	ret = lan743x_hardware_init(adapter, pdev);
+	if (ret)
+		goto cleanup_pci;
+
+	ret = lan743x_mdiobus_init(adapter);
+	if (ret)
+		goto cleanup_hardware;
+
+	adapter->netdev->netdev_ops = &lan743x_netdev_ops;
+	adapter->netdev->features = NETIF_F_SG | NETIF_F_TSO | NETIF_F_HW_CSUM;
+	adapter->netdev->hw_features = adapter->netdev->features;
+
+	/* carrier off reporting is important to ethtool even BEFORE open */
+	netif_carrier_off(netdev);
+
+	ret = register_netdev(adapter->netdev);
+	if (ret < 0)
+		goto cleanup_mdiobus;
+	return 0;
+
+cleanup_mdiobus:
+	lan743x_mdiobus_cleanup(adapter);
+
+cleanup_hardware:
+	lan743x_hardware_cleanup(adapter);
+
+cleanup_pci:
+	lan743x_pci_cleanup(adapter);
+
+return_error:
+	pr_warn("Initialization failed\n");
+	return ret;
+}
+
+/**
+ * lan743x_pcidev_remove - Device Removal Routine
+ * @pdev: PCI device information struct
+ *
+ * this is called by the PCI subsystem to alert the driver
+ * that it should release a PCI device.  This could be caused by a
+ * Hot-Plug event, or because the driver is going to be removed from
+ * memory.
+ **/
+static void lan743x_pcidev_remove(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct lan743x_adapter *adapter = netdev_priv(netdev);
+
+	lan743x_full_cleanup(adapter);
+}
+
+static void lan743x_pcidev_shutdown(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct lan743x_adapter *adapter = netdev_priv(netdev);
+
+	rtnl_lock();
+	netif_device_detach(netdev);
+
+	/* close netdev when netdev is at running state.
+	 * For instance, it is true when system goes to sleep by pm-suspend
+	 * However, it is false when system goes to sleep by suspend GUI menu
+	 */
+	if (netif_running(netdev))
+		lan743x_netdev_close(netdev);
+	rtnl_unlock();
+
+	/* clean up lan743x portion */
+	lan743x_hardware_cleanup(adapter);
+}
+
+static const struct pci_device_id lan743x_pcidev_tbl[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_SMSC, PCI_DEVICE_ID_SMSC_LAN7430) },
+	{ 0, }
+};
+
+static struct pci_driver lan743x_pcidev_driver = {
+	.name     = DRIVER_NAME,
+	.id_table = lan743x_pcidev_tbl,
+	.probe    = lan743x_pcidev_probe,
+	.remove   = lan743x_pcidev_remove,
+	.shutdown = lan743x_pcidev_shutdown,
+};
+
+module_pci_driver(lan743x_pcidev_driver);
+
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h
new file mode 100644
index 000000000000..73b463a9df61
--- /dev/null
+++ b/drivers/net/ethernet/microchip/lan743x_main.h
@@ -0,0 +1,597 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/* Copyright (C) 2018 Microchip Technology Inc. */
+
+#ifndef _LAN743X_H
+#define _LAN743X_H
+
+#define DRIVER_AUTHOR   "Bryan Whitehead <Bryan.Whitehead@microchip.com>"
+#define DRIVER_DESC "LAN743x PCIe Gigabit Ethernet Driver"
+#define DRIVER_NAME "lan743x"
+
+/* Register Definitions */
+#define ID_REV				(0x00)
+#define ID_REV_IS_VALID_CHIP_ID_(id_rev)	\
+	(((id_rev) & 0xFFF00000) == 0x74300000)
+#define ID_REV_CHIP_REV_MASK_		(0x0000FFFF)
+#define ID_REV_CHIP_REV_A0_		(0x00000000)
+#define ID_REV_CHIP_REV_B0_		(0x00000010)
+
+#define FPGA_REV			(0x04)
+#define FPGA_REV_GET_MINOR_(fpga_rev)	(((fpga_rev) >> 8) & 0x000000FF)
+#define FPGA_REV_GET_MAJOR_(fpga_rev)	((fpga_rev) & 0x000000FF)
+
+#define HW_CFG					(0x010)
+#define HW_CFG_LRST_				BIT(1)
+
+#define PMT_CTL					(0x014)
+#define PMT_CTL_READY_				BIT(7)
+#define PMT_CTL_ETH_PHY_RST_			BIT(4)
+
+#define DP_SEL				(0x024)
+#define DP_SEL_DPRDY_			BIT(31)
+#define DP_SEL_MASK_			(0x0000001F)
+#define DP_SEL_RFE_RAM			(0x00000001)
+
+#define DP_SEL_VHF_HASH_LEN		(16)
+#define DP_SEL_VHF_VLAN_LEN		(128)
+
+#define DP_CMD				(0x028)
+#define DP_CMD_WRITE_			(0x00000001)
+
+#define DP_ADDR				(0x02C)
+
+#define DP_DATA_0			(0x030)
+
+#define FCT_RX_CTL			(0xAC)
+#define FCT_RX_CTL_EN_(channel)		BIT(28 + (channel))
+#define FCT_RX_CTL_DIS_(channel)	BIT(24 + (channel))
+#define FCT_RX_CTL_RESET_(channel)	BIT(20 + (channel))
+
+#define FCT_TX_CTL			(0xC4)
+#define FCT_TX_CTL_EN_(channel)		BIT(28 + (channel))
+#define FCT_TX_CTL_DIS_(channel)	BIT(24 + (channel))
+#define FCT_TX_CTL_RESET_(channel)	BIT(20 + (channel))
+
+#define FCT_FLOW(rx_channel)			(0xE0 + ((rx_channel) << 2))
+#define FCT_FLOW_CTL_OFF_THRESHOLD_		(0x00007F00)
+#define FCT_FLOW_CTL_OFF_THRESHOLD_SET_(value)	\
+	((value << 8) & FCT_FLOW_CTL_OFF_THRESHOLD_)
+#define FCT_FLOW_CTL_REQ_EN_			BIT(7)
+#define FCT_FLOW_CTL_ON_THRESHOLD_		(0x0000007F)
+#define FCT_FLOW_CTL_ON_THRESHOLD_SET_(value)	\
+	((value << 0) & FCT_FLOW_CTL_ON_THRESHOLD_)
+
+#define MAC_CR				(0x100)
+#define MAC_CR_ADD_			BIT(12)
+#define MAC_CR_ASD_			BIT(11)
+#define MAC_CR_CNTR_RST_		BIT(5)
+#define MAC_CR_RST_			BIT(0)
+
+#define MAC_RX				(0x104)
+#define MAC_RX_MAX_SIZE_SHIFT_		(16)
+#define MAC_RX_MAX_SIZE_MASK_		(0x3FFF0000)
+#define MAC_RX_RXD_			BIT(1)
+#define MAC_RX_RXEN_			BIT(0)
+
+#define MAC_TX				(0x108)
+#define MAC_TX_TXD_			BIT(1)
+#define MAC_TX_TXEN_			BIT(0)
+
+#define MAC_FLOW			(0x10C)
+#define MAC_FLOW_CR_TX_FCEN_		BIT(30)
+#define MAC_FLOW_CR_RX_FCEN_		BIT(29)
+#define MAC_FLOW_CR_FCPT_MASK_		(0x0000FFFF)
+
+#define MAC_RX_ADDRH			(0x118)
+
+#define MAC_RX_ADDRL			(0x11C)
+
+#define MAC_MII_ACC			(0x120)
+#define MAC_MII_ACC_PHY_ADDR_SHIFT_	(11)
+#define MAC_MII_ACC_PHY_ADDR_MASK_	(0x0000F800)
+#define MAC_MII_ACC_MIIRINDA_SHIFT_	(6)
+#define MAC_MII_ACC_MIIRINDA_MASK_	(0x000007C0)
+#define MAC_MII_ACC_MII_READ_		(0x00000000)
+#define MAC_MII_ACC_MII_WRITE_		(0x00000002)
+#define MAC_MII_ACC_MII_BUSY_		BIT(0)
+
+#define MAC_MII_DATA			(0x124)
+
+/* offset 0x400 - 0x500, x may range from 0 to 32, for a total of 33 entries */
+#define RFE_ADDR_FILT_HI(x)		(0x400 + (8 * (x)))
+#define RFE_ADDR_FILT_HI_VALID_		BIT(31)
+
+/* offset 0x404 - 0x504, x may range from 0 to 32, for a total of 33 entries */
+#define RFE_ADDR_FILT_LO(x)		(0x404 + (8 * (x)))
+
+#define RFE_CTL				(0x508)
+#define RFE_CTL_AB_			BIT(10)
+#define RFE_CTL_AM_			BIT(9)
+#define RFE_CTL_AU_			BIT(8)
+#define RFE_CTL_MCAST_HASH_		BIT(3)
+#define RFE_CTL_DA_PERFECT_		BIT(1)
+
+#define INT_STS				(0x780)
+#define INT_BIT_DMA_RX_(channel)	BIT(24 + (channel))
+#define INT_BIT_ALL_RX_			(0x0F000000)
+#define INT_BIT_DMA_TX_(channel)	BIT(16 + (channel))
+#define INT_BIT_ALL_TX_			(0x000F0000)
+#define INT_BIT_SW_GP_			BIT(9)
+#define INT_BIT_ALL_OTHER_		(0x00000280)
+#define INT_BIT_MAS_			BIT(0)
+
+#define INT_SET				(0x784)
+
+#define INT_EN_SET			(0x788)
+
+#define INT_EN_CLR			(0x78C)
+
+#define INT_STS_R2C			(0x790)
+
+#define INT_VEC_EN_SET			(0x794)
+#define INT_VEC_EN_CLR			(0x798)
+#define INT_VEC_EN_AUTO_CLR		(0x79C)
+#define INT_VEC_EN_(vector_index)	BIT(0 + vector_index)
+
+#define INT_VEC_MAP0			(0x7A0)
+#define INT_VEC_MAP0_RX_VEC_(channel, vector)	\
+	(((u32)(vector)) << ((channel) << 2))
+
+#define INT_VEC_MAP1			(0x7A4)
+#define INT_VEC_MAP1_TX_VEC_(channel, vector)	\
+	(((u32)(vector)) << ((channel) << 2))
+
+#define INT_VEC_MAP2			(0x7A8)
+
+#define INT_MOD_MAP0			(0x7B0)
+
+#define INT_MOD_MAP1			(0x7B4)
+
+#define INT_MOD_MAP2			(0x7B8)
+
+#define INT_MOD_CFG0			(0x7C0)
+#define INT_MOD_CFG1			(0x7C4)
+#define INT_MOD_CFG2			(0x7C8)
+#define INT_MOD_CFG3			(0x7CC)
+#define INT_MOD_CFG4			(0x7D0)
+#define INT_MOD_CFG5			(0x7D4)
+#define INT_MOD_CFG6			(0x7D8)
+#define INT_MOD_CFG7			(0x7DC)
+
+#define DMAC_CFG				(0xC00)
+#define DMAC_CFG_COAL_EN_			BIT(16)
+#define DMAC_CFG_CH_ARB_SEL_RX_HIGH_		(0x00000000)
+#define DMAC_CFG_MAX_READ_REQ_MASK_		(0x00000070)
+#define DMAC_CFG_MAX_READ_REQ_SET_(val)	\
+	((((u32)(val)) << 4) & DMAC_CFG_MAX_READ_REQ_MASK_)
+#define DMAC_CFG_MAX_DSPACE_16_			(0x00000000)
+#define DMAC_CFG_MAX_DSPACE_32_			(0x00000001)
+#define DMAC_CFG_MAX_DSPACE_64_			BIT(1)
+#define DMAC_CFG_MAX_DSPACE_128_		(0x00000003)
+
+#define DMAC_COAL_CFG				(0xC04)
+#define DMAC_COAL_CFG_TIMER_LIMIT_MASK_		(0xFFF00000)
+#define DMAC_COAL_CFG_TIMER_LIMIT_SET_(val)	\
+	((((u32)(val)) << 20) & DMAC_COAL_CFG_TIMER_LIMIT_MASK_)
+#define DMAC_COAL_CFG_TIMER_TX_START_		BIT(19)
+#define DMAC_COAL_CFG_FLUSH_INTS_		BIT(18)
+#define DMAC_COAL_CFG_INT_EXIT_COAL_		BIT(17)
+#define DMAC_COAL_CFG_CSR_EXIT_COAL_		BIT(16)
+#define DMAC_COAL_CFG_TX_THRES_MASK_		(0x0000FF00)
+#define DMAC_COAL_CFG_TX_THRES_SET_(val)	\
+	((((u32)(val)) << 8) & DMAC_COAL_CFG_TX_THRES_MASK_)
+#define DMAC_COAL_CFG_RX_THRES_MASK_		(0x000000FF)
+#define DMAC_COAL_CFG_RX_THRES_SET_(val)	\
+	(((u32)(val)) & DMAC_COAL_CFG_RX_THRES_MASK_)
+
+#define DMAC_OBFF_CFG				(0xC08)
+#define DMAC_OBFF_TX_THRES_MASK_		(0x0000FF00)
+#define DMAC_OBFF_TX_THRES_SET_(val)	\
+	((((u32)(val)) << 8) & DMAC_OBFF_TX_THRES_MASK_)
+#define DMAC_OBFF_RX_THRES_MASK_		(0x000000FF)
+#define DMAC_OBFF_RX_THRES_SET_(val)	\
+	(((u32)(val)) & DMAC_OBFF_RX_THRES_MASK_)
+
+#define DMAC_CMD				(0xC0C)
+#define DMAC_CMD_SWR_				BIT(31)
+#define DMAC_CMD_TX_SWR_(channel)		BIT(24 + (channel))
+#define DMAC_CMD_START_T_(channel)		BIT(20 + (channel))
+#define DMAC_CMD_STOP_T_(channel)		BIT(16 + (channel))
+#define DMAC_CMD_RX_SWR_(channel)		BIT(8 + (channel))
+#define DMAC_CMD_START_R_(channel)		BIT(4 + (channel))
+#define DMAC_CMD_STOP_R_(channel)		BIT(0 + (channel))
+
+#define DMAC_INT_STS				(0xC10)
+#define DMAC_INT_EN_SET				(0xC14)
+#define DMAC_INT_EN_CLR				(0xC18)
+#define DMAC_INT_BIT_RXFRM_(channel)		BIT(16 + (channel))
+#define DMAC_INT_BIT_TX_IOC_(channel)		BIT(0 + (channel))
+
+#define RX_CFG_A(channel)			(0xC40 + ((channel) << 6))
+#define RX_CFG_A_RX_WB_ON_INT_TMR_		BIT(30)
+#define RX_CFG_A_RX_WB_THRES_MASK_		(0x1F000000)
+#define RX_CFG_A_RX_WB_THRES_SET_(val)	\
+	((((u32)(val)) << 24) & RX_CFG_A_RX_WB_THRES_MASK_)
+#define RX_CFG_A_RX_PF_THRES_MASK_		(0x001F0000)
+#define RX_CFG_A_RX_PF_THRES_SET_(val)	\
+	((((u32)(val)) << 16) & RX_CFG_A_RX_PF_THRES_MASK_)
+#define RX_CFG_A_RX_PF_PRI_THRES_MASK_		(0x00001F00)
+#define RX_CFG_A_RX_PF_PRI_THRES_SET_(val)	\
+	((((u32)(val)) << 8) & RX_CFG_A_RX_PF_PRI_THRES_MASK_)
+#define RX_CFG_A_RX_HP_WB_EN_			BIT(5)
+
+#define RX_CFG_B(channel)			(0xC44 + ((channel) << 6))
+#define RX_CFG_B_TS_ALL_RX_			BIT(29)
+#define RX_CFG_B_RX_PAD_MASK_			(0x03000000)
+#define RX_CFG_B_RX_PAD_0_			(0x00000000)
+#define RX_CFG_B_RX_PAD_2_			(0x02000000)
+#define RX_CFG_B_RDMABL_512_			(0x00040000)
+#define RX_CFG_B_RX_RING_LEN_MASK_		(0x0000FFFF)
+
+#define RX_BASE_ADDRH(channel)			(0xC48 + ((channel) << 6))
+
+#define RX_BASE_ADDRL(channel)			(0xC4C + ((channel) << 6))
+
+#define RX_HEAD_WRITEBACK_ADDRH(channel)	(0xC50 + ((channel) << 6))
+
+#define RX_HEAD_WRITEBACK_ADDRL(channel)	(0xC54 + ((channel) << 6))
+
+#define RX_HEAD(channel)			(0xC58 + ((channel) << 6))
+
+#define RX_TAIL(channel)			(0xC5C + ((channel) << 6))
+#define RX_TAIL_SET_TOP_INT_EN_			BIT(30)
+#define RX_TAIL_SET_TOP_INT_VEC_EN_		BIT(29)
+
+#define RX_CFG_C(channel)			(0xC64 + ((channel) << 6))
+#define RX_CFG_C_RX_TOP_INT_EN_AUTO_CLR_	BIT(6)
+#define RX_CFG_C_RX_INT_EN_R2C_			BIT(4)
+#define RX_CFG_C_RX_DMA_INT_STS_AUTO_CLR_	BIT(3)
+#define RX_CFG_C_RX_INT_STS_R2C_MODE_MASK_	(0x00000007)
+
+#define TX_CFG_A(channel)			(0xD40 + ((channel) << 6))
+#define TX_CFG_A_TX_HP_WB_ON_INT_TMR_		BIT(30)
+#define TX_CFG_A_TX_TMR_HPWB_SEL_IOC_		(0x10000000)
+#define TX_CFG_A_TX_PF_THRES_MASK_		(0x001F0000)
+#define TX_CFG_A_TX_PF_THRES_SET_(value)	\
+	((((u32)(value)) << 16) & TX_CFG_A_TX_PF_THRES_MASK_)
+#define TX_CFG_A_TX_PF_PRI_THRES_MASK_		(0x00001F00)
+#define TX_CFG_A_TX_PF_PRI_THRES_SET_(value)	\
+	((((u32)(value)) << 8) & TX_CFG_A_TX_PF_PRI_THRES_MASK_)
+#define TX_CFG_A_TX_HP_WB_EN_			BIT(5)
+#define TX_CFG_A_TX_HP_WB_THRES_MASK_		(0x0000000F)
+#define TX_CFG_A_TX_HP_WB_THRES_SET_(value)	\
+	(((u32)(value)) & TX_CFG_A_TX_HP_WB_THRES_MASK_)
+
+#define TX_CFG_B(channel)			(0xD44 + ((channel) << 6))
+#define TX_CFG_B_TDMABL_512_			(0x00040000)
+#define TX_CFG_B_TX_RING_LEN_MASK_		(0x0000FFFF)
+
+#define TX_BASE_ADDRH(channel)			(0xD48 + ((channel) << 6))
+
+#define TX_BASE_ADDRL(channel)			(0xD4C + ((channel) << 6))
+
+#define TX_HEAD_WRITEBACK_ADDRH(channel)	(0xD50 + ((channel) << 6))
+
+#define TX_HEAD_WRITEBACK_ADDRL(channel)	(0xD54 + ((channel) << 6))
+
+#define TX_HEAD(channel)			(0xD58 + ((channel) << 6))
+
+#define TX_TAIL(channel)			(0xD5C + ((channel) << 6))
+#define TX_TAIL_SET_DMAC_INT_EN_		BIT(31)
+#define TX_TAIL_SET_TOP_INT_EN_			BIT(30)
+#define TX_TAIL_SET_TOP_INT_VEC_EN_		BIT(29)
+
+#define TX_CFG_C(channel)			(0xD64 + ((channel) << 6))
+#define TX_CFG_C_TX_TOP_INT_EN_AUTO_CLR_	BIT(6)
+#define TX_CFG_C_TX_DMA_INT_EN_AUTO_CLR_	BIT(5)
+#define TX_CFG_C_TX_INT_EN_R2C_			BIT(4)
+#define TX_CFG_C_TX_DMA_INT_STS_AUTO_CLR_	BIT(3)
+#define TX_CFG_C_TX_INT_STS_R2C_MODE_MASK_	(0x00000007)
+
+/* MAC statistics registers */
+#define STAT_RX_FCS_ERRORS			(0x1200)
+#define STAT_RX_ALIGNMENT_ERRORS		(0x1204)
+#define STAT_RX_JABBER_ERRORS			(0x120C)
+#define STAT_RX_UNDERSIZE_FRAME_ERRORS		(0x1210)
+#define STAT_RX_OVERSIZE_FRAME_ERRORS		(0x1214)
+#define STAT_RX_DROPPED_FRAMES			(0x1218)
+#define STAT_RX_UNICAST_BYTE_COUNT		(0x121C)
+#define STAT_RX_BROADCAST_BYTE_COUNT		(0x1220)
+#define STAT_RX_MULTICAST_BYTE_COUNT		(0x1224)
+#define STAT_RX_MULTICAST_FRAMES		(0x1230)
+#define STAT_RX_TOTAL_FRAMES			(0x1254)
+
+#define STAT_TX_FCS_ERRORS			(0x1280)
+#define STAT_TX_EXCESS_DEFERRAL_ERRORS		(0x1284)
+#define STAT_TX_CARRIER_ERRORS			(0x1288)
+#define STAT_TX_SINGLE_COLLISIONS		(0x1290)
+#define STAT_TX_MULTIPLE_COLLISIONS		(0x1294)
+#define STAT_TX_EXCESSIVE_COLLISION		(0x1298)
+#define STAT_TX_LATE_COLLISIONS			(0x129C)
+#define STAT_TX_UNICAST_BYTE_COUNT		(0x12A0)
+#define STAT_TX_BROADCAST_BYTE_COUNT		(0x12A4)
+#define STAT_TX_MULTICAST_BYTE_COUNT		(0x12A8)
+#define STAT_TX_MULTICAST_FRAMES		(0x12B4)
+#define STAT_TX_TOTAL_FRAMES			(0x12D8)
+
+/* End of Register definitions */
+
+#define LAN743X_MAX_RX_CHANNELS		(4)
+#define LAN743X_MAX_TX_CHANNELS		(1)
+struct lan743x_adapter;
+
+#define LAN743X_USED_RX_CHANNELS	(4)
+#define LAN743X_USED_TX_CHANNELS	(1)
+#define LAN743X_INT_MOD	(400)
+
+#if (LAN743X_USED_RX_CHANNELS > LAN743X_MAX_RX_CHANNELS)
+#error Invalid LAN743X_USED_RX_CHANNELS
+#endif
+#if (LAN743X_USED_TX_CHANNELS > LAN743X_MAX_TX_CHANNELS)
+#error Invalid LAN743X_USED_TX_CHANNELS
+#endif
+
+/* PCI */
+/* SMSC acquired EFAR late 1990's, MCHP acquired SMSC 2012 */
+#define PCI_VENDOR_ID_SMSC		PCI_VENDOR_ID_EFAR
+#define PCI_DEVICE_ID_SMSC_LAN7430	(0x7430)
+
+#define PCI_CONFIG_LENGTH		(0x1000)
+
+/* CSR */
+#define CSR_LENGTH					(0x2000)
+
+#define LAN743X_CSR_FLAG_IS_A0				BIT(0)
+#define LAN743X_CSR_FLAG_IS_B0				BIT(1)
+#define LAN743X_CSR_FLAG_SUPPORTS_INTR_AUTO_SET_CLR	BIT(8)
+
+struct lan743x_csr {
+	u32 flags;
+	u8 __iomem *csr_address;
+	u32 id_rev;
+	u32 fpga_rev;
+};
+
+/* INTERRUPTS */
+typedef void(*lan743x_vector_handler)(void *context, u32 int_sts, u32 flags);
+
+#define LAN743X_VECTOR_FLAG_IRQ_SHARED			BIT(0)
+#define LAN743X_VECTOR_FLAG_SOURCE_STATUS_READ		BIT(1)
+#define LAN743X_VECTOR_FLAG_SOURCE_STATUS_R2C		BIT(2)
+#define LAN743X_VECTOR_FLAG_SOURCE_STATUS_W2C		BIT(3)
+#define LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CHECK		BIT(4)
+#define LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CLEAR		BIT(5)
+#define LAN743X_VECTOR_FLAG_SOURCE_ENABLE_R2C		BIT(6)
+#define LAN743X_VECTOR_FLAG_MASTER_ENABLE_CLEAR		BIT(7)
+#define LAN743X_VECTOR_FLAG_MASTER_ENABLE_SET		BIT(8)
+#define LAN743X_VECTOR_FLAG_VECTOR_ENABLE_ISR_CLEAR	BIT(9)
+#define LAN743X_VECTOR_FLAG_VECTOR_ENABLE_ISR_SET	BIT(10)
+#define LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_CLEAR	BIT(11)
+#define LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_SET	BIT(12)
+#define LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_CLEAR	BIT(13)
+#define LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_SET	BIT(14)
+#define LAN743X_VECTOR_FLAG_SOURCE_STATUS_AUTO_CLEAR	BIT(15)
+
+struct lan743x_vector {
+	int			irq;
+	u32			flags;
+	struct lan743x_adapter	*adapter;
+	int			vector_index;
+	u32			int_mask;
+	lan743x_vector_handler	handler;
+	void			*context;
+};
+
+#define LAN743X_MAX_VECTOR_COUNT	(8)
+
+struct lan743x_intr {
+	int			flags;
+
+	unsigned int		irq;
+
+	struct lan743x_vector	vector_list[LAN743X_MAX_VECTOR_COUNT];
+	int			number_of_vectors;
+	bool			using_vectors;
+
+	int			software_isr_flag;
+};
+
+#define LAN743X_MAX_FRAME_SIZE			(9 * 1024)
+
+/* PHY */
+struct lan743x_phy {
+	bool	fc_autoneg;
+	u8	fc_request_control;
+};
+
+/* TX */
+struct lan743x_tx_descriptor;
+struct lan743x_tx_buffer_info;
+
+#define GPIO_QUEUE_STARTED		(0)
+#define GPIO_TX_FUNCTION		(1)
+#define GPIO_TX_COMPLETION		(2)
+#define GPIO_TX_FRAGMENT		(3)
+
+#define TX_FRAME_FLAG_IN_PROGRESS	BIT(0)
+
+struct lan743x_tx {
+	struct lan743x_adapter *adapter;
+	u32	vector_flags;
+	int	channel_number;
+
+	int	ring_size;
+	size_t	ring_allocation_size;
+	struct lan743x_tx_descriptor *ring_cpu_ptr;
+	dma_addr_t ring_dma_ptr;
+	/* ring_lock: used to prevent concurrent access to tx ring */
+	spinlock_t ring_lock;
+	u32		frame_flags;
+	u32		frame_first;
+	u32		frame_data0;
+	u32		frame_tail;
+
+	struct lan743x_tx_buffer_info *buffer_info;
+
+	u32		*head_cpu_ptr;
+	dma_addr_t	head_dma_ptr;
+	int		last_head;
+	int		last_tail;
+
+	struct napi_struct napi;
+
+	struct sk_buff *overflow_skb;
+};
+
+/* RX */
+struct lan743x_rx_descriptor;
+struct lan743x_rx_buffer_info;
+
+struct lan743x_rx {
+	struct lan743x_adapter *adapter;
+	u32	vector_flags;
+	int	channel_number;
+
+	int	ring_size;
+	size_t	ring_allocation_size;
+	struct lan743x_rx_descriptor *ring_cpu_ptr;
+	dma_addr_t ring_dma_ptr;
+
+	struct lan743x_rx_buffer_info *buffer_info;
+
+	u32		*head_cpu_ptr;
+	dma_addr_t	head_dma_ptr;
+	u32		last_head;
+	u32		last_tail;
+
+	struct napi_struct napi;
+
+	u32		frame_count;
+};
+
+struct lan743x_adapter {
+	struct net_device       *netdev;
+	struct mii_bus		*mdiobus;
+	int                     msg_enable;
+	struct pci_dev		*pdev;
+	struct lan743x_csr      csr;
+	struct lan743x_intr     intr;
+
+	/* lock, used to prevent concurrent access to data port */
+	struct mutex		dp_lock;
+
+	u8			mac_address[ETH_ALEN];
+
+	struct lan743x_phy      phy;
+	struct lan743x_tx       tx[LAN743X_MAX_TX_CHANNELS];
+	struct lan743x_rx       rx[LAN743X_MAX_RX_CHANNELS];
+};
+
+#define LAN743X_COMPONENT_FLAG_RX(channel)  BIT(20 + (channel))
+
+#define INTR_FLAG_IRQ_REQUESTED(vector_index)	BIT(0 + vector_index)
+#define INTR_FLAG_MSI_ENABLED			BIT(8)
+#define INTR_FLAG_MSIX_ENABLED			BIT(9)
+
+#define MAC_MII_READ            1
+#define MAC_MII_WRITE           0
+
+#define PHY_FLAG_OPENED     BIT(0)
+#define PHY_FLAG_ATTACHED   BIT(1)
+
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+#define DMA_ADDR_HIGH32(dma_addr)   ((u32)(((dma_addr) >> 32) & 0xFFFFFFFF))
+#else
+#define DMA_ADDR_HIGH32(dma_addr)   ((u32)(0))
+#endif
+#define DMA_ADDR_LOW32(dma_addr) ((u32)((dma_addr) & 0xFFFFFFFF))
+#define DMA_DESCRIPTOR_SPACING_16       (16)
+#define DMA_DESCRIPTOR_SPACING_32       (32)
+#define DMA_DESCRIPTOR_SPACING_64       (64)
+#define DMA_DESCRIPTOR_SPACING_128      (128)
+#define DEFAULT_DMA_DESCRIPTOR_SPACING  (L1_CACHE_BYTES)
+
+#define DMAC_CHANNEL_STATE_SET(start_bit, stop_bit) \
+	(((start_bit) ? 2 : 0) | ((stop_bit) ? 1 : 0))
+#define DMAC_CHANNEL_STATE_INITIAL      DMAC_CHANNEL_STATE_SET(0, 0)
+#define DMAC_CHANNEL_STATE_STARTED      DMAC_CHANNEL_STATE_SET(1, 0)
+#define DMAC_CHANNEL_STATE_STOP_PENDING DMAC_CHANNEL_STATE_SET(1, 1)
+#define DMAC_CHANNEL_STATE_STOPPED      DMAC_CHANNEL_STATE_SET(0, 1)
+
+/* TX Descriptor bits */
+#define TX_DESC_DATA0_DTYPE_MASK_		(0xC0000000)
+#define TX_DESC_DATA0_DTYPE_DATA_		(0x00000000)
+#define TX_DESC_DATA0_DTYPE_EXT_		(0x40000000)
+#define TX_DESC_DATA0_FS_			(0x20000000)
+#define TX_DESC_DATA0_LS_			(0x10000000)
+#define TX_DESC_DATA0_EXT_			(0x08000000)
+#define TX_DESC_DATA0_IOC_			(0x04000000)
+#define TX_DESC_DATA0_ICE_			(0x00400000)
+#define TX_DESC_DATA0_IPE_			(0x00200000)
+#define TX_DESC_DATA0_TPE_			(0x00100000)
+#define TX_DESC_DATA0_FCS_			(0x00020000)
+#define TX_DESC_DATA0_BUF_LENGTH_MASK_		(0x0000FFFF)
+#define TX_DESC_DATA0_EXT_LSO_			(0x00200000)
+#define TX_DESC_DATA0_EXT_PAY_LENGTH_MASK_	(0x000FFFFF)
+#define TX_DESC_DATA3_FRAME_LENGTH_MSS_MASK_	(0x3FFF0000)
+
+struct lan743x_tx_descriptor {
+	u32     data0;
+	u32     data1;
+	u32     data2;
+	u32     data3;
+} __aligned(DEFAULT_DMA_DESCRIPTOR_SPACING);
+
+#define TX_BUFFER_INFO_FLAG_ACTIVE		BIT(0)
+#define TX_BUFFER_INFO_FLAG_IGNORE_SYNC		BIT(2)
+#define TX_BUFFER_INFO_FLAG_SKB_FRAGMENT	BIT(3)
+struct lan743x_tx_buffer_info {
+	int flags;
+	struct sk_buff *skb;
+	dma_addr_t      dma_ptr;
+	unsigned int    buffer_length;
+};
+
+#define LAN743X_TX_RING_SIZE    (50)
+
+/* OWN bit is set. ie, Descs are owned by RX DMAC */
+#define RX_DESC_DATA0_OWN_                (0x00008000)
+/* OWN bit is clear. ie, Descs are owned by host */
+#define RX_DESC_DATA0_FS_                 (0x80000000)
+#define RX_DESC_DATA0_LS_                 (0x40000000)
+#define RX_DESC_DATA0_FRAME_LENGTH_MASK_  (0x3FFF0000)
+#define RX_DESC_DATA0_FRAME_LENGTH_GET_(data0)	\
+	(((data0) & RX_DESC_DATA0_FRAME_LENGTH_MASK_) >> 16)
+#define RX_DESC_DATA0_EXT_                (0x00004000)
+#define RX_DESC_DATA0_BUF_LENGTH_MASK_    (0x00003FFF)
+#define RX_DESC_DATA2_TS_NS_MASK_         (0x3FFFFFFF)
+
+#if ((NET_IP_ALIGN != 0) && (NET_IP_ALIGN != 2))
+#error NET_IP_ALIGN must be 0 or 2
+#endif
+
+#define RX_HEAD_PADDING		NET_IP_ALIGN
+
+struct lan743x_rx_descriptor {
+	u32     data0;
+	u32     data1;
+	u32     data2;
+	u32     data3;
+} __aligned(DEFAULT_DMA_DESCRIPTOR_SPACING);
+
+#define RX_BUFFER_INFO_FLAG_ACTIVE      BIT(0)
+struct lan743x_rx_buffer_info {
+	int flags;
+	struct sk_buff *skb;
+
+	dma_addr_t      dma_ptr;
+	unsigned int    buffer_length;
+};
+
+#define LAN743X_RX_RING_SIZE        (65)
+
+#define RX_PROCESS_RESULT_NOTHING_TO_DO     (0)
+#define RX_PROCESS_RESULT_PACKET_RECEIVED   (1)
+#define RX_PROCESS_RESULT_PACKET_DROPPED    (2)
+
+#endif /* _LAN743X_H */
diff --git a/drivers/net/ethernet/natsemi/jazzsonic.c b/drivers/net/ethernet/natsemi/jazzsonic.c
index d5b28884e21e..51fa82b429a3 100644
--- a/drivers/net/ethernet/natsemi/jazzsonic.c
+++ b/drivers/net/ethernet/natsemi/jazzsonic.c
@@ -60,14 +60,6 @@ do {									\
 	*((volatile unsigned int *)dev->base_addr+(reg)) = (val);		\
 } while (0)
 
-
-/* use 0 for production, 1 for verification, >1 for debug */
-#ifdef SONIC_DEBUG
-static unsigned int sonic_debug = SONIC_DEBUG;
-#else
-static unsigned int sonic_debug = 1;
-#endif
-
 /*
  * We cannot use station (ethernet) address prefixes to detect the
  * sonic controller since these are board manufacturer depended.
@@ -117,7 +109,6 @@ static const struct net_device_ops sonic_netdev_ops = {
 
 static int sonic_probe1(struct net_device *dev)
 {
-	static unsigned version_printed;
 	unsigned int silicon_revision;
 	unsigned int val;
 	struct sonic_local *lp = netdev_priv(dev);
@@ -133,26 +124,17 @@ static int sonic_probe1(struct net_device *dev)
 	 * the expected location.
 	 */
 	silicon_revision = SONIC_READ(SONIC_SR);
-	if (sonic_debug > 1)
-		printk("SONIC Silicon Revision = 0x%04x\n",silicon_revision);
-
 	i = 0;
 	while (known_revisions[i] != 0xffff &&
 	       known_revisions[i] != silicon_revision)
 		i++;
 
 	if (known_revisions[i] == 0xffff) {
-		printk("SONIC ethernet controller not found (0x%4x)\n",
-		       silicon_revision);
+		pr_info("SONIC ethernet controller not found (0x%4x)\n",
+			silicon_revision);
 		goto out;
 	}
 
-	if (sonic_debug  &&  version_printed++ == 0)
-		printk(version);
-
-	printk(KERN_INFO "%s: Sonic ethernet found at 0x%08lx, ",
-	       dev_name(lp->device), dev->base_addr);
-
 	/*
 	 * Put the sonic into software reset, then
 	 * retrieve and print the ethernet address.
@@ -245,12 +227,16 @@ static int jazz_sonic_probe(struct platform_device *pdev)
 	err = sonic_probe1(dev);
 	if (err)
 		goto out;
+
+	pr_info("SONIC ethernet @%08lx, MAC %pM, IRQ %d\n",
+		dev->base_addr, dev->dev_addr, dev->irq);
+
+	sonic_msg_init(dev);
+
 	err = register_netdev(dev);
 	if (err)
 		goto out1;
 
-	printk("%s: MAC %pM IRQ %d\n", dev->name, dev->dev_addr, dev->irq);
-
 	return 0;
 
 out1:
@@ -262,8 +248,6 @@ out:
 }
 
 MODULE_DESCRIPTION("Jazz SONIC ethernet driver");
-module_param(sonic_debug, int, 0);
-MODULE_PARM_DESC(sonic_debug, "jazzsonic debug level (1-4)");
 MODULE_ALIAS("platform:jazzsonic");
 
 #include "sonic.c"
diff --git a/drivers/net/ethernet/natsemi/macsonic.c b/drivers/net/ethernet/natsemi/macsonic.c
index b922ab5cedea..0937fc2a928e 100644
--- a/drivers/net/ethernet/natsemi/macsonic.c
+++ b/drivers/net/ethernet/natsemi/macsonic.c
@@ -60,8 +60,6 @@
 #include <asm/macints.h>
 #include <asm/mac_via.h>
 
-static char mac_sonic_string[] = "macsonic";
-
 #include "sonic.h"
 
 /* These should basically be bus-size and endian independent (since
@@ -72,15 +70,6 @@ static char mac_sonic_string[] = "macsonic";
 #define SONIC_WRITE(reg,val) (nubus_writew(val, dev->base_addr + (reg * 4) \
 	      + lp->reg_offset))
 
-/* use 0 for production, 1 for verification, >1 for debug */
-#ifdef SONIC_DEBUG
-static unsigned int sonic_debug = SONIC_DEBUG;
-#else
-static unsigned int sonic_debug = 1;
-#endif
-
-static int sonic_version_printed;
-
 /* For onboard SONIC */
 #define ONBOARD_SONIC_REGISTERS	0x50F0A000
 #define ONBOARD_SONIC_PROM_BASE	0x50f08000
@@ -313,11 +302,6 @@ static int mac_onboard_sonic_probe(struct net_device *dev)
 	int sr;
 	bool commslot = macintosh_config->expansion_type == MAC_EXP_PDS_COMM;
 
-	if (!MACH_IS_MAC)
-		return -ENODEV;
-
-	printk(KERN_INFO "Checking for internal Macintosh ethernet (SONIC).. ");
-
 	/* Bogus probing, on the models which may or may not have
 	   Ethernet (BTW, the Ethernet *is* always at the same
 	   address, and nothing else lives there, at least if Apple's
@@ -327,13 +311,11 @@ static int mac_onboard_sonic_probe(struct net_device *dev)
 
 		card_present = hwreg_present((void*)ONBOARD_SONIC_REGISTERS);
 		if (!card_present) {
-			printk("none.\n");
+			pr_info("Onboard/comm-slot SONIC not found\n");
 			return -ENODEV;
 		}
 	}
 
-	printk("yes\n");
-
 	/* Danger!  My arms are flailing wildly!  You *must* set lp->reg_offset
 	 * and dev->base_addr before using SONIC_READ() or SONIC_WRITE() */
 	dev->base_addr = ONBOARD_SONIC_REGISTERS;
@@ -342,18 +324,10 @@ static int mac_onboard_sonic_probe(struct net_device *dev)
 	else
 		dev->irq = IRQ_NUBUS_9;
 
-	if (!sonic_version_printed) {
-		printk(KERN_INFO "%s", version);
-		sonic_version_printed = 1;
-	}
-	printk(KERN_INFO "%s: onboard / comm-slot SONIC at 0x%08lx\n",
-	       dev_name(lp->device), dev->base_addr);
-
 	/* The PowerBook's SONIC is 16 bit always. */
 	if (macintosh_config->ident == MAC_MODEL_PB520) {
 		lp->reg_offset = 0;
 		lp->dma_bitmode = SONIC_BITMODE16;
-		sr = SONIC_READ(SONIC_SR);
 	} else if (commslot) {
 		/* Some of the comm-slot cards are 16 bit.  But some
 		   of them are not.  The 32-bit cards use offset 2 and
@@ -370,22 +344,21 @@ static int mac_onboard_sonic_probe(struct net_device *dev)
 		else {
 			lp->dma_bitmode = SONIC_BITMODE16;
 			lp->reg_offset = 0;
-			sr = SONIC_READ(SONIC_SR);
 		}
 	} else {
 		/* All onboard cards are at offset 2 with 32 bit DMA. */
 		lp->reg_offset = 2;
 		lp->dma_bitmode = SONIC_BITMODE32;
-		sr = SONIC_READ(SONIC_SR);
 	}
-	printk(KERN_INFO
-	       "%s: revision 0x%04x, using %d bit DMA and register offset %d\n",
-	       dev_name(lp->device), sr, lp->dma_bitmode?32:16, lp->reg_offset);
 
-#if 0 /* This is sometimes useful to find out how MacOS configured the card. */
-	printk(KERN_INFO "%s: DCR: 0x%04x, DCR2: 0x%04x\n", dev_name(lp->device),
-	       SONIC_READ(SONIC_DCR) & 0xffff, SONIC_READ(SONIC_DCR2) & 0xffff);
-#endif
+	pr_info("Onboard/comm-slot SONIC, revision 0x%04x, %d bit DMA, register offset %d\n",
+		SONIC_READ(SONIC_SR), lp->dma_bitmode ? 32 : 16,
+		lp->reg_offset);
+
+	/* This is sometimes useful to find out how MacOS configured the card */
+	pr_debug("%s: DCR=0x%04x, DCR2=0x%04x\n", __func__,
+		 SONIC_READ(SONIC_DCR) & 0xffff,
+		 SONIC_READ(SONIC_DCR2) & 0xffff);
 
 	/* Software reset, then initialize control registers. */
 	SONIC_WRITE(SONIC_CMD, SONIC_CR_RST);
@@ -406,11 +379,14 @@ static int mac_onboard_sonic_probe(struct net_device *dev)
 	/* Now look for the MAC address. */
 	mac_onboard_sonic_ethernet_addr(dev);
 
+	pr_info("SONIC ethernet @%08lx, MAC %pM, IRQ %d\n",
+		dev->base_addr, dev->dev_addr, dev->irq);
+
 	/* Shared init code */
 	return macsonic_init(dev);
 }
 
-static int mac_nubus_sonic_ethernet_addr(struct net_device *dev,
+static int mac_sonic_nubus_ethernet_addr(struct net_device *dev,
 					 unsigned long prom_addr, int id)
 {
 	int i;
@@ -449,70 +425,49 @@ static int macsonic_ident(struct nubus_rsrc *fres)
 	return -1;
 }
 
-static int mac_nubus_sonic_probe(struct net_device *dev)
+static int mac_sonic_nubus_probe_board(struct nubus_board *board, int id,
+				       struct net_device *dev)
 {
-	static int slots;
-	struct nubus_rsrc *ndev = NULL;
 	struct sonic_local* lp = netdev_priv(dev);
 	unsigned long base_addr, prom_addr;
 	u16 sonic_dcr;
-	int id = -1;
 	int reg_offset, dma_bitmode;
 
-	/* Find the first SONIC that hasn't been initialized already */
-	for_each_func_rsrc(ndev) {
-		if (ndev->category != NUBUS_CAT_NETWORK ||
-		    ndev->type != NUBUS_TYPE_ETHERNET)
-			continue;
-
-		/* Have we seen it already? */
-		if (slots & (1<<ndev->board->slot))
-			continue;
-		slots |= 1<<ndev->board->slot;
-
-		/* Is it one of ours? */
-		if ((id = macsonic_ident(ndev)) != -1)
-			break;
-	}
-
-	if (ndev == NULL)
-		return -ENODEV;
-
 	switch (id) {
 	case MACSONIC_DUODOCK:
-		base_addr = ndev->board->slot_addr + DUODOCK_SONIC_REGISTERS;
-		prom_addr = ndev->board->slot_addr + DUODOCK_SONIC_PROM_BASE;
+		base_addr = board->slot_addr + DUODOCK_SONIC_REGISTERS;
+		prom_addr = board->slot_addr + DUODOCK_SONIC_PROM_BASE;
 		sonic_dcr = SONIC_DCR_EXBUS | SONIC_DCR_RFT0 | SONIC_DCR_RFT1 |
 		            SONIC_DCR_TFT0;
 		reg_offset = 2;
 		dma_bitmode = SONIC_BITMODE32;
 		break;
 	case MACSONIC_APPLE:
-		base_addr = ndev->board->slot_addr + APPLE_SONIC_REGISTERS;
-		prom_addr = ndev->board->slot_addr + APPLE_SONIC_PROM_BASE;
+		base_addr = board->slot_addr + APPLE_SONIC_REGISTERS;
+		prom_addr = board->slot_addr + APPLE_SONIC_PROM_BASE;
 		sonic_dcr = SONIC_DCR_BMS | SONIC_DCR_RFT1 | SONIC_DCR_TFT0;
 		reg_offset = 0;
 		dma_bitmode = SONIC_BITMODE32;
 		break;
 	case MACSONIC_APPLE16:
-		base_addr = ndev->board->slot_addr + APPLE_SONIC_REGISTERS;
-		prom_addr = ndev->board->slot_addr + APPLE_SONIC_PROM_BASE;
+		base_addr = board->slot_addr + APPLE_SONIC_REGISTERS;
+		prom_addr = board->slot_addr + APPLE_SONIC_PROM_BASE;
 		sonic_dcr = SONIC_DCR_EXBUS | SONIC_DCR_RFT1 | SONIC_DCR_TFT0 |
 		            SONIC_DCR_PO1 | SONIC_DCR_BMS;
 		reg_offset = 0;
 		dma_bitmode = SONIC_BITMODE16;
 		break;
 	case MACSONIC_DAYNALINK:
-		base_addr = ndev->board->slot_addr + APPLE_SONIC_REGISTERS;
-		prom_addr = ndev->board->slot_addr + DAYNALINK_PROM_BASE;
+		base_addr = board->slot_addr + APPLE_SONIC_REGISTERS;
+		prom_addr = board->slot_addr + DAYNALINK_PROM_BASE;
 		sonic_dcr = SONIC_DCR_RFT1 | SONIC_DCR_TFT0 |
 		            SONIC_DCR_PO1 | SONIC_DCR_BMS;
 		reg_offset = 0;
 		dma_bitmode = SONIC_BITMODE16;
 		break;
 	case MACSONIC_DAYNA:
-		base_addr = ndev->board->slot_addr + DAYNA_SONIC_REGISTERS;
-		prom_addr = ndev->board->slot_addr + DAYNA_SONIC_MAC_ADDR;
+		base_addr = board->slot_addr + DAYNA_SONIC_REGISTERS;
+		prom_addr = board->slot_addr + DAYNA_SONIC_MAC_ADDR;
 		sonic_dcr = SONIC_DCR_BMS |
 		            SONIC_DCR_RFT1 | SONIC_DCR_TFT0 | SONIC_DCR_PO1;
 		reg_offset = 0;
@@ -528,21 +483,16 @@ static int mac_nubus_sonic_probe(struct net_device *dev)
 	dev->base_addr = base_addr;
 	lp->reg_offset = reg_offset;
 	lp->dma_bitmode = dma_bitmode;
-	dev->irq = SLOT2IRQ(ndev->board->slot);
+	dev->irq = SLOT2IRQ(board->slot);
 
-	if (!sonic_version_printed) {
-		printk(KERN_INFO "%s", version);
-		sonic_version_printed = 1;
-	}
-	printk(KERN_INFO "%s: %s in slot %X\n",
-	       dev_name(lp->device), ndev->board->name, ndev->board->slot);
-	printk(KERN_INFO "%s: revision 0x%04x, using %d bit DMA and register offset %d\n",
-	       dev_name(lp->device), SONIC_READ(SONIC_SR), dma_bitmode?32:16, reg_offset);
+	dev_info(&board->dev, "%s, revision 0x%04x, %d bit DMA, register offset %d\n",
+		 board->name, SONIC_READ(SONIC_SR),
+		 lp->dma_bitmode ? 32 : 16, lp->reg_offset);
 
-#if 0 /* This is sometimes useful to find out how MacOS configured the card. */
-	printk(KERN_INFO "%s: DCR: 0x%04x, DCR2: 0x%04x\n", dev_name(lp->device),
-	       SONIC_READ(SONIC_DCR) & 0xffff, SONIC_READ(SONIC_DCR2) & 0xffff);
-#endif
+	/* This is sometimes useful to find out how MacOS configured the card */
+	dev_dbg(&board->dev, "%s: DCR=0x%04x, DCR2=0x%04x\n", __func__,
+		SONIC_READ(SONIC_DCR) & 0xffff,
+		SONIC_READ(SONIC_DCR2) & 0xffff);
 
 	/* Software reset, then initialize control registers. */
 	SONIC_WRITE(SONIC_CMD, SONIC_CR_RST);
@@ -557,14 +507,17 @@ static int mac_nubus_sonic_probe(struct net_device *dev)
 	SONIC_WRITE(SONIC_ISR, 0x7fff);
 
 	/* Now look for the MAC address. */
-	if (mac_nubus_sonic_ethernet_addr(dev, prom_addr, id) != 0)
+	if (mac_sonic_nubus_ethernet_addr(dev, prom_addr, id) != 0)
 		return -ENODEV;
 
+	dev_info(&board->dev, "SONIC ethernet @%08lx, MAC %pM, IRQ %d\n",
+		 dev->base_addr, dev->dev_addr, dev->irq);
+
 	/* Shared init code */
 	return macsonic_init(dev);
 }
 
-static int mac_sonic_probe(struct platform_device *pdev)
+static int mac_sonic_platform_probe(struct platform_device *pdev)
 {
 	struct net_device *dev;
 	struct sonic_local *lp;
@@ -579,22 +532,16 @@ static int mac_sonic_probe(struct platform_device *pdev)
 	SET_NETDEV_DEV(dev, &pdev->dev);
 	platform_set_drvdata(pdev, dev);
 
-	/* This will catch fatal stuff like -ENOMEM as well as success */
 	err = mac_onboard_sonic_probe(dev);
-	if (err == 0)
-		goto found;
-	if (err != -ENODEV)
-		goto out;
-	err = mac_nubus_sonic_probe(dev);
 	if (err)
 		goto out;
-found:
+
+	sonic_msg_init(dev);
+
 	err = register_netdev(dev);
 	if (err)
 		goto out;
 
-	printk("%s: MAC %pM IRQ %d\n", dev->name, dev->dev_addr, dev->irq);
-
 	return 0;
 
 out:
@@ -604,13 +551,11 @@ out:
 }
 
 MODULE_DESCRIPTION("Macintosh SONIC ethernet driver");
-module_param(sonic_debug, int, 0);
-MODULE_PARM_DESC(sonic_debug, "macsonic debug level (1-4)");
 MODULE_ALIAS("platform:macsonic");
 
 #include "sonic.c"
 
-static int mac_sonic_device_remove(struct platform_device *pdev)
+static int mac_sonic_platform_remove(struct platform_device *pdev)
 {
 	struct net_device *dev = platform_get_drvdata(pdev);
 	struct sonic_local* lp = netdev_priv(dev);
@@ -623,12 +568,105 @@ static int mac_sonic_device_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static struct platform_driver mac_sonic_driver = {
-	.probe  = mac_sonic_probe,
-	.remove = mac_sonic_device_remove,
-	.driver	= {
-		.name	= mac_sonic_string,
+static struct platform_driver mac_sonic_platform_driver = {
+	.probe  = mac_sonic_platform_probe,
+	.remove = mac_sonic_platform_remove,
+	.driver = {
+		.name = "macsonic",
+	},
+};
+
+static int mac_sonic_nubus_probe(struct nubus_board *board)
+{
+	struct net_device *ndev;
+	struct sonic_local *lp;
+	struct nubus_rsrc *fres;
+	int id = -1;
+	int err;
+
+	/* The platform driver will handle a PDS or Comm Slot card (even if
+	 * it has a pseudoslot declaration ROM).
+	 */
+	if (macintosh_config->expansion_type == MAC_EXP_PDS_COMM)
+		return -ENODEV;
+
+	for_each_board_func_rsrc(board, fres) {
+		if (fres->category != NUBUS_CAT_NETWORK ||
+		    fres->type != NUBUS_TYPE_ETHERNET)
+			continue;
+
+		id = macsonic_ident(fres);
+		if (id != -1)
+			break;
+	}
+	if (!fres)
+		return -ENODEV;
+
+	ndev = alloc_etherdev(sizeof(struct sonic_local));
+	if (!ndev)
+		return -ENOMEM;
+
+	lp = netdev_priv(ndev);
+	lp->device = &board->dev;
+	SET_NETDEV_DEV(ndev, &board->dev);
+
+	err = mac_sonic_nubus_probe_board(board, id, ndev);
+	if (err)
+		goto out;
+
+	sonic_msg_init(ndev);
+
+	err = register_netdev(ndev);
+	if (err)
+		goto out;
+
+	nubus_set_drvdata(board, ndev);
+
+	return 0;
+
+out:
+	free_netdev(ndev);
+	return err;
+}
+
+static int mac_sonic_nubus_remove(struct nubus_board *board)
+{
+	struct net_device *ndev = nubus_get_drvdata(board);
+	struct sonic_local *lp = netdev_priv(ndev);
+
+	unregister_netdev(ndev);
+	dma_free_coherent(lp->device,
+			  SIZEOF_SONIC_DESC * SONIC_BUS_SCALE(lp->dma_bitmode),
+			  lp->descriptors, lp->descriptors_laddr);
+	free_netdev(ndev);
+
+	return 0;
+}
+
+static struct nubus_driver mac_sonic_nubus_driver = {
+	.probe  = mac_sonic_nubus_probe,
+	.remove = mac_sonic_nubus_remove,
+	.driver = {
+		.name = "macsonic-nubus",
+		.owner = THIS_MODULE,
 	},
 };
 
-module_platform_driver(mac_sonic_driver);
+static int perr, nerr;
+
+static int __init mac_sonic_init(void)
+{
+	perr = platform_driver_register(&mac_sonic_platform_driver);
+	nerr = nubus_driver_register(&mac_sonic_nubus_driver);
+	return 0;
+}
+module_init(mac_sonic_init);
+
+static void __exit mac_sonic_exit(void)
+{
+	if (!perr)
+		platform_driver_unregister(&mac_sonic_platform_driver);
+	if (!nerr)
+		nubus_driver_unregister(&mac_sonic_nubus_driver);
+}
+module_exit(mac_sonic_exit);
diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c
index 612c7a44b26c..7ed08486ae23 100644
--- a/drivers/net/ethernet/natsemi/sonic.c
+++ b/drivers/net/ethernet/natsemi/sonic.c
@@ -33,7 +33,21 @@
  * the NetBSD file "sys/arch/mac68k/dev/if_sn.c".
  */
 
+static unsigned int version_printed;
 
+static int sonic_debug = -1;
+module_param(sonic_debug, int, 0);
+MODULE_PARM_DESC(sonic_debug, "debug message level");
+
+static void sonic_msg_init(struct net_device *dev)
+{
+	struct sonic_local *lp = netdev_priv(dev);
+
+	lp->msg_enable = netif_msg_init(sonic_debug, 0);
+
+	if (version_printed++ == 0)
+		netif_dbg(lp, drv, dev, "%s", version);
+}
 
 /*
  * Open/initialize the SONIC controller.
@@ -47,8 +61,7 @@ static int sonic_open(struct net_device *dev)
 	struct sonic_local *lp = netdev_priv(dev);
 	int i;
 
-	if (sonic_debug > 2)
-		printk("sonic_open: initializing sonic driver.\n");
+	netif_dbg(lp, ifup, dev, "%s: initializing sonic driver\n", __func__);
 
 	for (i = 0; i < SONIC_NUM_RRS; i++) {
 		struct sk_buff *skb = netdev_alloc_skb(dev, SONIC_RBSIZE + 2);
@@ -95,8 +108,7 @@ static int sonic_open(struct net_device *dev)
 
 	netif_start_queue(dev);
 
-	if (sonic_debug > 2)
-		printk("sonic_open: Initialization done.\n");
+	netif_dbg(lp, ifup, dev, "%s: Initialization done\n", __func__);
 
 	return 0;
 }
@@ -110,8 +122,7 @@ static int sonic_close(struct net_device *dev)
 	struct sonic_local *lp = netdev_priv(dev);
 	int i;
 
-	if (sonic_debug > 2)
-		printk("sonic_close\n");
+	netif_dbg(lp, ifdown, dev, "%s\n", __func__);
 
 	netif_stop_queue(dev);
 
@@ -205,8 +216,7 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev)
 	int length;
 	int entry = lp->next_tx;
 
-	if (sonic_debug > 2)
-		printk("sonic_send_packet: skb=%p, dev=%p\n", skb, dev);
+	netif_dbg(lp, tx_queued, dev, "%s: skb=%p\n", __func__, skb);
 
 	length = skb->len;
 	if (length < ETH_ZLEN) {
@@ -252,14 +262,12 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev)
 	lp->next_tx = (entry + 1) & SONIC_TDS_MASK;
 	if (lp->tx_skb[lp->next_tx] != NULL) {
 		/* The ring is full, the ISR has yet to process the next TD. */
-		if (sonic_debug > 3)
-			printk("%s: stopping queue\n", dev->name);
+		netif_dbg(lp, tx_queued, dev, "%s: stopping queue\n", __func__);
 		netif_stop_queue(dev);
 		/* after this packet, wait for ISR to free up some TDAs */
 	} else netif_start_queue(dev);
 
-	if (sonic_debug > 2)
-		printk("sonic_send_packet: issuing Tx command\n");
+	netif_dbg(lp, tx_queued, dev, "%s: issuing Tx command\n", __func__);
 
 	SONIC_WRITE(SONIC_CMD, SONIC_CR_TXP);
 
@@ -281,8 +289,7 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id)
 
 	do {
 		if (status & SONIC_INT_PKTRX) {
-			if (sonic_debug > 2)
-				printk("%s: packet rx\n", dev->name);
+			netif_dbg(lp, intr, dev, "%s: packet rx\n", __func__);
 			sonic_rx(dev);	/* got packet(s) */
 			SONIC_WRITE(SONIC_ISR, SONIC_INT_PKTRX); /* clear the interrupt */
 		}
@@ -299,8 +306,7 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id)
 			 *   still being allocated by sonic_send_packet (status clear & tx_skb[entry] clear)
 			 */
 
-			if (sonic_debug > 2)
-				printk("%s: tx done\n", dev->name);
+			netif_dbg(lp, intr, dev, "%s: tx done\n", __func__);
 
 			while (lp->tx_skb[entry] != NULL) {
 				if ((td_status = sonic_tda_get(dev, entry, SONIC_TD_STATUS)) == 0)
@@ -346,20 +352,20 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id)
 		 * check error conditions
 		 */
 		if (status & SONIC_INT_RFO) {
-			if (sonic_debug > 1)
-				printk("%s: rx fifo overrun\n", dev->name);
+			netif_dbg(lp, rx_err, dev, "%s: rx fifo overrun\n",
+				  __func__);
 			lp->stats.rx_fifo_errors++;
 			SONIC_WRITE(SONIC_ISR, SONIC_INT_RFO); /* clear the interrupt */
 		}
 		if (status & SONIC_INT_RDE) {
-			if (sonic_debug > 1)
-				printk("%s: rx descriptors exhausted\n", dev->name);
+			netif_dbg(lp, rx_err, dev, "%s: rx descriptors exhausted\n",
+				  __func__);
 			lp->stats.rx_dropped++;
 			SONIC_WRITE(SONIC_ISR, SONIC_INT_RDE); /* clear the interrupt */
 		}
 		if (status & SONIC_INT_RBAE) {
-			if (sonic_debug > 1)
-				printk("%s: rx buffer area exceeded\n", dev->name);
+			netif_dbg(lp, rx_err, dev, "%s: rx buffer area exceeded\n",
+				  __func__);
 			lp->stats.rx_dropped++;
 			SONIC_WRITE(SONIC_ISR, SONIC_INT_RBAE); /* clear the interrupt */
 		}
@@ -380,8 +386,9 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id)
 
 		/* transmit error */
 		if (status & SONIC_INT_TXER) {
-			if ((SONIC_READ(SONIC_TCR) & SONIC_TCR_FU) && (sonic_debug > 2))
-				printk(KERN_ERR "%s: tx fifo underrun\n", dev->name);
+			if (SONIC_READ(SONIC_TCR) & SONIC_TCR_FU)
+				netif_dbg(lp, tx_err, dev, "%s: tx fifo underrun\n",
+					  __func__);
 			SONIC_WRITE(SONIC_ISR, SONIC_INT_TXER); /* clear the interrupt */
 		}
 
@@ -475,8 +482,8 @@ static void sonic_rx(struct net_device *dev)
 			if (lp->cur_rwp >= lp->rra_end) lp->cur_rwp = lp->rra_laddr & 0xffff;
 			SONIC_WRITE(SONIC_RWP, lp->cur_rwp);
 			if (SONIC_READ(SONIC_ISR) & SONIC_INT_RBE) {
-				if (sonic_debug > 2)
-					printk("%s: rx buffer exhausted\n", dev->name);
+				netif_dbg(lp, rx_err, dev, "%s: rx buffer exhausted\n",
+					  __func__);
 				SONIC_WRITE(SONIC_ISR, SONIC_INT_RBE); /* clear the flag */
 			}
 		} else
@@ -542,9 +549,8 @@ static void sonic_multicast_list(struct net_device *dev)
 		    (netdev_mc_count(dev) > 15)) {
 			rcr |= SONIC_RCR_AMC;
 		} else {
-			if (sonic_debug > 2)
-				printk("sonic_multicast_list: mc_count %d\n",
-				       netdev_mc_count(dev));
+			netif_dbg(lp, ifup, dev, "%s: mc_count %d\n", __func__,
+				  netdev_mc_count(dev));
 			sonic_set_cam_enable(dev, 1);  /* always enable our own address */
 			i = 1;
 			netdev_for_each_mc_addr(ha, dev) {
@@ -562,8 +568,7 @@ static void sonic_multicast_list(struct net_device *dev)
 		}
 	}
 
-	if (sonic_debug > 2)
-		printk("sonic_multicast_list: setting RCR=%x\n", rcr);
+	netif_dbg(lp, ifup, dev, "%s: setting RCR=%x\n", __func__, rcr);
 
 	SONIC_WRITE(SONIC_RCR, rcr);
 }
@@ -596,8 +601,8 @@ static int sonic_init(struct net_device *dev)
 	/*
 	 * initialize the receive resource area
 	 */
-	if (sonic_debug > 2)
-		printk("sonic_init: initialize receive resource area\n");
+	netif_dbg(lp, ifup, dev, "%s: initialize receive resource area\n",
+		  __func__);
 
 	for (i = 0; i < SONIC_NUM_RRS; i++) {
 		u16 bufadr_l = (unsigned long)lp->rx_laddr[i] & 0xffff;
@@ -622,8 +627,7 @@ static int sonic_init(struct net_device *dev)
 	SONIC_WRITE(SONIC_EOBC, (SONIC_RBSIZE >> 1) - (lp->dma_bitmode ? 2 : 1));
 
 	/* load the resource pointers */
-	if (sonic_debug > 3)
-		printk("sonic_init: issuing RRRA command\n");
+	netif_dbg(lp, ifup, dev, "%s: issuing RRRA command\n", __func__);
 
 	SONIC_WRITE(SONIC_CMD, SONIC_CR_RRRA);
 	i = 0;
@@ -632,16 +636,17 @@ static int sonic_init(struct net_device *dev)
 			break;
 	}
 
-	if (sonic_debug > 2)
-		printk("sonic_init: status=%x i=%d\n", SONIC_READ(SONIC_CMD), i);
+	netif_dbg(lp, ifup, dev, "%s: status=%x, i=%d\n", __func__,
+		  SONIC_READ(SONIC_CMD), i);
 
 	/*
 	 * Initialize the receive descriptors so that they
 	 * become a circular linked list, ie. let the last
 	 * descriptor point to the first again.
 	 */
-	if (sonic_debug > 2)
-		printk("sonic_init: initialize receive descriptors\n");
+	netif_dbg(lp, ifup, dev, "%s: initialize receive descriptors\n",
+		  __func__);
+
 	for (i=0; i<SONIC_NUM_RDS; i++) {
 		sonic_rda_put(dev, i, SONIC_RD_STATUS, 0);
 		sonic_rda_put(dev, i, SONIC_RD_PKTLEN, 0);
@@ -664,8 +669,9 @@ static int sonic_init(struct net_device *dev)
 	/*
 	 * initialize transmit descriptors
 	 */
-	if (sonic_debug > 2)
-		printk("sonic_init: initialize transmit descriptors\n");
+	netif_dbg(lp, ifup, dev, "%s: initialize transmit descriptors\n",
+		  __func__);
+
 	for (i = 0; i < SONIC_NUM_TDS; i++) {
 		sonic_tda_put(dev, i, SONIC_TD_STATUS, 0);
 		sonic_tda_put(dev, i, SONIC_TD_CONFIG, 0);
@@ -712,10 +718,8 @@ static int sonic_init(struct net_device *dev)
 		if (SONIC_READ(SONIC_ISR) & SONIC_INT_LCD)
 			break;
 	}
-	if (sonic_debug > 2) {
-		printk("sonic_init: CMD=%x, ISR=%x\n, i=%d",
-		       SONIC_READ(SONIC_CMD), SONIC_READ(SONIC_ISR), i);
-	}
+	netif_dbg(lp, ifup, dev, "%s: CMD=%x, ISR=%x, i=%d\n", __func__,
+		  SONIC_READ(SONIC_CMD), SONIC_READ(SONIC_ISR), i);
 
 	/*
 	 * enable receiver, disable loopback
@@ -731,9 +735,8 @@ static int sonic_init(struct net_device *dev)
 	if ((cmd & SONIC_CR_RXEN) == 0 || (cmd & SONIC_CR_STP) == 0)
 		printk(KERN_ERR "sonic_init: failed, status=%x\n", cmd);
 
-	if (sonic_debug > 2)
-		printk("sonic_init: new status=%x\n",
-		       SONIC_READ(SONIC_CMD));
+	netif_dbg(lp, ifup, dev, "%s: new status=%x\n", __func__,
+		  SONIC_READ(SONIC_CMD));
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/natsemi/sonic.h b/drivers/net/ethernet/natsemi/sonic.h
index 421b1a283fed..2b27f7049acb 100644
--- a/drivers/net/ethernet/natsemi/sonic.h
+++ b/drivers/net/ethernet/natsemi/sonic.h
@@ -319,6 +319,7 @@ struct sonic_local {
 	unsigned int eol_rx;
 	unsigned int eol_tx;           /* last unacked transmit packet */
 	unsigned int next_tx;          /* next free TD */
+	int msg_enable;
 	struct device *device;         /* generic device */
 	struct net_device_stats stats;
 };
@@ -336,6 +337,7 @@ static struct net_device_stats *sonic_get_stats(struct net_device *dev);
 static void sonic_multicast_list(struct net_device *dev);
 static int sonic_init(struct net_device *dev);
 static void sonic_tx_timeout(struct net_device *dev);
+static void sonic_msg_init(struct net_device *dev);
 
 /* Internal inlines for reading/writing DMA buffers.  Note that bus
    size and endianness matter here, whereas they don't for registers,
diff --git a/drivers/net/ethernet/natsemi/xtsonic.c b/drivers/net/ethernet/natsemi/xtsonic.c
index 1817deea98a4..e1b886e87a76 100644
--- a/drivers/net/ethernet/natsemi/xtsonic.c
+++ b/drivers/net/ethernet/natsemi/xtsonic.c
@@ -73,14 +73,6 @@ extern void xtboard_get_ether_addr(unsigned char *buf);
 #define SONIC_WRITE(reg,val) \
 	*((volatile unsigned int *)dev->base_addr+reg) = val
 
-
-/* Use 0 for production, 1 for verification, and >2 for debug */
-#ifdef SONIC_DEBUG
-static unsigned int sonic_debug = SONIC_DEBUG;
-#else
-static unsigned int sonic_debug = 1;
-#endif
-
 /*
  * We cannot use station (ethernet) address prefixes to detect the
  * sonic controller since these are board manufacturer depended.
@@ -130,7 +122,6 @@ static const struct net_device_ops xtsonic_netdev_ops = {
 
 static int __init sonic_probe1(struct net_device *dev)
 {
-	static unsigned version_printed = 0;
 	unsigned int silicon_revision;
 	struct sonic_local *lp = netdev_priv(dev);
 	unsigned int base_addr = dev->base_addr;
@@ -146,23 +137,17 @@ static int __init sonic_probe1(struct net_device *dev)
 	 * the expected location.
 	 */
 	silicon_revision = SONIC_READ(SONIC_SR);
-	if (sonic_debug > 1)
-		printk("SONIC Silicon Revision = 0x%04x\n",silicon_revision);
-
 	i = 0;
 	while ((known_revisions[i] != 0xffff) &&
 			(known_revisions[i] != silicon_revision))
 		i++;
 
 	if (known_revisions[i] == 0xffff) {
-		printk("SONIC ethernet controller not found (0x%4x)\n",
-				silicon_revision);
+		pr_info("SONIC ethernet controller not found (0x%4x)\n",
+			silicon_revision);
 		return -ENODEV;
 	}
 
-	if (sonic_debug  &&  version_printed++ == 0)
-		printk(version);
-
 	/*
 	 * Put the sonic into software reset, then retrieve ethernet address.
 	 * Note: we are assuming that the boot-loader has initialized the cam.
@@ -273,12 +258,15 @@ int xtsonic_probe(struct platform_device *pdev)
 
 	if ((err = sonic_probe1(dev)))
 		goto out;
+
+	pr_info("SONIC ethernet @%08lx, MAC %pM, IRQ %d\n",
+		dev->base_addr, dev->dev_addr, dev->irq);
+
+	sonic_msg_init(dev);
+
 	if ((err = register_netdev(dev)))
 		goto out1;
 
-	printk("%s: SONIC ethernet @%08lx, MAC %pM, IRQ %d\n", dev->name,
-	       dev->base_addr, dev->dev_addr, dev->irq);
-
 	return 0;
 
 out1:
@@ -290,8 +278,6 @@ out:
 }
 
 MODULE_DESCRIPTION("Xtensa XT2000 SONIC ethernet driver");
-module_param(sonic_debug, int, 0);
-MODULE_PARM_DESC(sonic_debug, "xtsonic debug level (1-4)");
 
 #include "sonic.c"
 
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/Makefile b/drivers/net/ethernet/netronome/nfp/bpf/Makefile
new file mode 100644
index 000000000000..805fa28f391a
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/bpf/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+# kbuild requires Makefile in a directory to build individual objects
diff --git a/drivers/net/ethernet/netronome/nfp/flower/Makefile b/drivers/net/ethernet/netronome/nfp/flower/Makefile
new file mode 100644
index 000000000000..805fa28f391a
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/flower/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+# kbuild requires Makefile in a directory to build individual objects
diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
index adfe474c2cf0..28c1cd5b823b 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
@@ -61,6 +61,13 @@
 #define NFP_FLOWER_MASK_MPLS_BOS	BIT(8)
 #define NFP_FLOWER_MASK_MPLS_Q		BIT(0)
 
+/* Compressed HW representation of TCP Flags */
+#define NFP_FL_TCP_FLAG_URG		BIT(4)
+#define NFP_FL_TCP_FLAG_PSH		BIT(3)
+#define NFP_FL_TCP_FLAG_RST		BIT(2)
+#define NFP_FL_TCP_FLAG_SYN		BIT(1)
+#define NFP_FL_TCP_FLAG_FIN		BIT(0)
+
 #define NFP_FL_SC_ACT_DROP		0x80000000
 #define NFP_FL_SC_ACT_USER		0x7D000000
 #define NFP_FL_SC_ACT_POPV		0x6A000000
@@ -257,7 +264,7 @@ struct nfp_flower_tp_ports {
  *    3                   2                   1
  *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * |    DSCP   |ECN|   protocol    |           reserved            |
+ * |    DSCP   |ECN|   protocol    |      ttl      |     flags     |
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  * |                        ipv4_addr_src                          |
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
@@ -268,7 +275,7 @@ struct nfp_flower_ipv4 {
 	u8 tos;
 	u8 proto;
 	u8 ttl;
-	u8 reserved;
+	u8 flags;
 	__be32 ipv4_src;
 	__be32 ipv4_dst;
 };
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h
index 332ff0fdc038..c5cebf6fb1d3 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h
@@ -41,6 +41,7 @@
 #include <linux/time64.h>
 #include <linux/types.h>
 #include <net/pkt_cls.h>
+#include <net/tcp.h>
 #include <linux/workqueue.h>
 
 struct net_device;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c
index 37c2ecae2a7a..b3bc8279d4fb 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/match.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/match.c
@@ -181,6 +181,26 @@ nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *frame,
 		frame->tos = flow_ip->tos;
 		frame->ttl = flow_ip->ttl;
 	}
+
+	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_TCP)) {
+		struct flow_dissector_key_tcp *tcp;
+		u32 tcp_flags;
+
+		tcp = skb_flow_dissector_target(flow->dissector,
+						FLOW_DISSECTOR_KEY_TCP, target);
+		tcp_flags = be16_to_cpu(tcp->flags);
+
+		if (tcp_flags & TCPHDR_FIN)
+			frame->flags |= NFP_FL_TCP_FLAG_FIN;
+		if (tcp_flags & TCPHDR_SYN)
+			frame->flags |= NFP_FL_TCP_FLAG_SYN;
+		if (tcp_flags & TCPHDR_RST)
+			frame->flags |= NFP_FL_TCP_FLAG_RST;
+		if (tcp_flags & TCPHDR_PSH)
+			frame->flags |= NFP_FL_TCP_FLAG_PSH;
+		if (tcp_flags & TCPHDR_URG)
+			frame->flags |= NFP_FL_TCP_FLAG_URG;
+	}
 }
 
 static void
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index eb5c13dea8f5..f3586c519805 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -44,11 +44,16 @@
 #include "../nfp_net.h"
 #include "../nfp_port.h"
 
+#define NFP_FLOWER_SUPPORTED_TCPFLAGS \
+	(TCPHDR_FIN | TCPHDR_SYN | TCPHDR_RST | \
+	 TCPHDR_PSH | TCPHDR_URG)
+
 #define NFP_FLOWER_WHITELIST_DISSECTOR \
 	(BIT(FLOW_DISSECTOR_KEY_CONTROL) | \
 	 BIT(FLOW_DISSECTOR_KEY_BASIC) | \
 	 BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | \
 	 BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | \
+	 BIT(FLOW_DISSECTOR_KEY_TCP) | \
 	 BIT(FLOW_DISSECTOR_KEY_PORTS) | \
 	 BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | \
 	 BIT(FLOW_DISSECTOR_KEY_VLAN) | \
@@ -288,6 +293,35 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
 		}
 	}
 
+	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_TCP)) {
+		struct flow_dissector_key_tcp *tcp;
+		u32 tcp_flags;
+
+		tcp = skb_flow_dissector_target(flow->dissector,
+						FLOW_DISSECTOR_KEY_TCP,
+						flow->key);
+		tcp_flags = be16_to_cpu(tcp->flags);
+
+		if (tcp_flags & ~NFP_FLOWER_SUPPORTED_TCPFLAGS)
+			return -EOPNOTSUPP;
+
+		/* We only support PSH and URG flags when either
+		 * FIN, SYN or RST is present as well.
+		 */
+		if ((tcp_flags & (TCPHDR_PSH | TCPHDR_URG)) &&
+		    !(tcp_flags & (TCPHDR_FIN | TCPHDR_SYN | TCPHDR_RST)))
+			return -EOPNOTSUPP;
+
+		/* We need to store TCP flags in the IPv4 key space, thus
+		 * we need to ensure we include a IPv4 key layer if we have
+		 * not done so already.
+		 */
+		if (!(key_layer & NFP_FLOWER_LAYER_IPV4)) {
+			key_layer |= NFP_FLOWER_LAYER_IPV4;
+			key_size += sizeof(struct nfp_flower_ipv4);
+		}
+	}
+
 	ret_key_ls->key_layer = key_layer;
 	ret_key_ls->key_layer_two = key_layer_two;
 	ret_key_ls->key_size = key_size;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c
index ab301d56430b..c4b1f344b4da 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c
@@ -645,6 +645,7 @@ MODULE_FIRMWARE("netronome/nic_AMDA0097-0001_4x10_1x40.nffw");
 MODULE_FIRMWARE("netronome/nic_AMDA0097-0001_8x10.nffw");
 MODULE_FIRMWARE("netronome/nic_AMDA0099-0001_2x10.nffw");
 MODULE_FIRMWARE("netronome/nic_AMDA0099-0001_2x25.nffw");
+MODULE_FIRMWARE("netronome/nic_AMDA0099-0001_1x10_1x25.nffw");
 
 MODULE_AUTHOR("Netronome Systems <oss-drivers@netronome.com>");
 MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
index 4499a7333078..bb63c115537d 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2015-2017 Netronome Systems, Inc.
+ * Copyright (C) 2015-2018 Netronome Systems, Inc.
  *
  * This software is dual licensed under the GNU General License Version 2,
  * June 1991 as shown in the file COPYING in the top-level directory of this
@@ -51,12 +51,12 @@
  * The configuration BAR is 8K in size, but due to
  * THB-350, 32k needs to be reserved.
  */
-#define NFP_NET_CFG_BAR_SZ              (32 * 1024)
+#define NFP_NET_CFG_BAR_SZ		(32 * 1024)
 
 /**
  * Offset in Freelist buffer where packet starts on RX
  */
-#define NFP_NET_RX_OFFSET               32
+#define NFP_NET_RX_OFFSET		32
 
 /**
  * LSO parameters
@@ -75,65 +75,65 @@
 #define NFP_NET_META_PORTID		5
 #define NFP_NET_META_CSUM		6 /* checksum complete type */
 
-#define	NFP_META_PORT_ID_CTRL		~0U
+#define NFP_META_PORT_ID_CTRL		~0U
 
 /**
  * Hash type pre-pended when a RSS hash was computed
  */
-#define NFP_NET_RSS_NONE                0
-#define NFP_NET_RSS_IPV4                1
-#define NFP_NET_RSS_IPV6                2
-#define NFP_NET_RSS_IPV6_EX             3
-#define NFP_NET_RSS_IPV4_TCP            4
-#define NFP_NET_RSS_IPV6_TCP            5
-#define NFP_NET_RSS_IPV6_EX_TCP         6
-#define NFP_NET_RSS_IPV4_UDP            7
-#define NFP_NET_RSS_IPV6_UDP            8
-#define NFP_NET_RSS_IPV6_EX_UDP         9
+#define NFP_NET_RSS_NONE		0
+#define NFP_NET_RSS_IPV4		1
+#define NFP_NET_RSS_IPV6		2
+#define NFP_NET_RSS_IPV6_EX		3
+#define NFP_NET_RSS_IPV4_TCP		4
+#define NFP_NET_RSS_IPV6_TCP		5
+#define NFP_NET_RSS_IPV6_EX_TCP		6
+#define NFP_NET_RSS_IPV4_UDP		7
+#define NFP_NET_RSS_IPV6_UDP		8
+#define NFP_NET_RSS_IPV6_EX_UDP		9
 
 /**
  * Ring counts
- * %NFP_NET_TXR_MAX:         Maximum number of TX rings
- * %NFP_NET_RXR_MAX:         Maximum number of RX rings
+ * %NFP_NET_TXR_MAX:	     Maximum number of TX rings
+ * %NFP_NET_RXR_MAX:	     Maximum number of RX rings
  */
-#define NFP_NET_TXR_MAX                 64
-#define NFP_NET_RXR_MAX                 64
+#define NFP_NET_TXR_MAX			64
+#define NFP_NET_RXR_MAX			64
 
 /**
  * Read/Write config words (0x0000 - 0x002c)
- * %NFP_NET_CFG_CTRL:        Global control
+ * %NFP_NET_CFG_CTRL:	     Global control
  * %NFP_NET_CFG_UPDATE:      Indicate which fields are updated
  * %NFP_NET_CFG_TXRS_ENABLE: Bitmask of enabled TX rings
  * %NFP_NET_CFG_RXRS_ENABLE: Bitmask of enabled RX rings
- * %NFP_NET_CFG_MTU:         Set MTU size
+ * %NFP_NET_CFG_MTU:	     Set MTU size
  * %NFP_NET_CFG_FLBUFSZ:     Set freelist buffer size (must be larger than MTU)
- * %NFP_NET_CFG_EXN:         MSI-X table entry for exceptions
- * %NFP_NET_CFG_LSC:         MSI-X table entry for link state changes
+ * %NFP_NET_CFG_EXN:	     MSI-X table entry for exceptions
+ * %NFP_NET_CFG_LSC:	     MSI-X table entry for link state changes
  * %NFP_NET_CFG_MACADDR:     MAC address
  *
  * TODO:
  * - define Error details in UPDATE
  */
-#define NFP_NET_CFG_CTRL                0x0000
-#define   NFP_NET_CFG_CTRL_ENABLE         (0x1 <<  0) /* Global enable */
-#define   NFP_NET_CFG_CTRL_PROMISC        (0x1 <<  1) /* Enable Promisc mode */
-#define   NFP_NET_CFG_CTRL_L2BC           (0x1 <<  2) /* Allow L2 Broadcast */
-#define   NFP_NET_CFG_CTRL_L2MC           (0x1 <<  3) /* Allow L2 Multicast */
-#define   NFP_NET_CFG_CTRL_RXCSUM         (0x1 <<  4) /* Enable RX Checksum */
-#define   NFP_NET_CFG_CTRL_TXCSUM         (0x1 <<  5) /* Enable TX Checksum */
-#define   NFP_NET_CFG_CTRL_RXVLAN         (0x1 <<  6) /* Enable VLAN strip */
-#define   NFP_NET_CFG_CTRL_TXVLAN         (0x1 <<  7) /* Enable VLAN insert */
-#define   NFP_NET_CFG_CTRL_SCATTER        (0x1 <<  8) /* Scatter DMA */
-#define   NFP_NET_CFG_CTRL_GATHER         (0x1 <<  9) /* Gather DMA */
-#define   NFP_NET_CFG_CTRL_LSO            (0x1 << 10) /* LSO/TSO (version 1) */
+#define NFP_NET_CFG_CTRL		0x0000
+#define   NFP_NET_CFG_CTRL_ENABLE	  (0x1 <<  0) /* Global enable */
+#define   NFP_NET_CFG_CTRL_PROMISC	  (0x1 <<  1) /* Enable Promisc mode */
+#define   NFP_NET_CFG_CTRL_L2BC		  (0x1 <<  2) /* Allow L2 Broadcast */
+#define   NFP_NET_CFG_CTRL_L2MC		  (0x1 <<  3) /* Allow L2 Multicast */
+#define   NFP_NET_CFG_CTRL_RXCSUM	  (0x1 <<  4) /* Enable RX Checksum */
+#define   NFP_NET_CFG_CTRL_TXCSUM	  (0x1 <<  5) /* Enable TX Checksum */
+#define   NFP_NET_CFG_CTRL_RXVLAN	  (0x1 <<  6) /* Enable VLAN strip */
+#define   NFP_NET_CFG_CTRL_TXVLAN	  (0x1 <<  7) /* Enable VLAN insert */
+#define   NFP_NET_CFG_CTRL_SCATTER	  (0x1 <<  8) /* Scatter DMA */
+#define   NFP_NET_CFG_CTRL_GATHER	  (0x1 <<  9) /* Gather DMA */
+#define   NFP_NET_CFG_CTRL_LSO		  (0x1 << 10) /* LSO/TSO (version 1) */
 #define   NFP_NET_CFG_CTRL_CTAG_FILTER	  (0x1 << 11) /* VLAN CTAG filtering */
-#define   NFP_NET_CFG_CTRL_RINGCFG        (0x1 << 16) /* Ring runtime changes */
+#define   NFP_NET_CFG_CTRL_RINGCFG	  (0x1 << 16) /* Ring runtime changes */
 #define   NFP_NET_CFG_CTRL_RSS		  (0x1 << 17) /* RSS (version 1) */
-#define   NFP_NET_CFG_CTRL_IRQMOD         (0x1 << 18) /* Interrupt moderation */
-#define   NFP_NET_CFG_CTRL_RINGPRIO       (0x1 << 19) /* Ring priorities */
-#define   NFP_NET_CFG_CTRL_MSIXAUTO       (0x1 << 20) /* MSI-X auto-masking */
-#define   NFP_NET_CFG_CTRL_TXRWB          (0x1 << 21) /* Write-back of TX ring*/
-#define   NFP_NET_CFG_CTRL_L2SWITCH       (0x1 << 22) /* L2 Switch */
+#define   NFP_NET_CFG_CTRL_IRQMOD	  (0x1 << 18) /* Interrupt moderation */
+#define   NFP_NET_CFG_CTRL_RINGPRIO	  (0x1 << 19) /* Ring priorities */
+#define   NFP_NET_CFG_CTRL_MSIXAUTO	  (0x1 << 20) /* MSI-X auto-masking */
+#define   NFP_NET_CFG_CTRL_TXRWB	  (0x1 << 21) /* Write-back of TX ring*/
+#define   NFP_NET_CFG_CTRL_L2SWITCH	  (0x1 << 22) /* L2 Switch */
 #define   NFP_NET_CFG_CTRL_L2SWITCH_LOCAL (0x1 << 23) /* Switch to local */
 #define   NFP_NET_CFG_CTRL_VXLAN	  (0x1 << 24) /* VXLAN tunnel support */
 #define   NFP_NET_CFG_CTRL_NVGRE	  (0x1 << 25) /* NVGRE tunnel support */
@@ -152,35 +152,35 @@
 #define NFP_NET_CFG_CTRL_CHAIN_META	(NFP_NET_CFG_CTRL_RSS2 | \
 					 NFP_NET_CFG_CTRL_CSUM_COMPLETE)
 
-#define NFP_NET_CFG_UPDATE              0x0004
-#define   NFP_NET_CFG_UPDATE_GEN          (0x1 <<  0) /* General update */
-#define   NFP_NET_CFG_UPDATE_RING         (0x1 <<  1) /* Ring config change */
-#define   NFP_NET_CFG_UPDATE_RSS          (0x1 <<  2) /* RSS config change */
-#define   NFP_NET_CFG_UPDATE_TXRPRIO      (0x1 <<  3) /* TX Ring prio change */
-#define   NFP_NET_CFG_UPDATE_RXRPRIO      (0x1 <<  4) /* RX Ring prio change */
-#define   NFP_NET_CFG_UPDATE_MSIX         (0x1 <<  5) /* MSI-X change */
-#define   NFP_NET_CFG_UPDATE_L2SWITCH     (0x1 <<  6) /* Switch changes */
-#define   NFP_NET_CFG_UPDATE_RESET        (0x1 <<  7) /* Update due to FLR */
-#define   NFP_NET_CFG_UPDATE_IRQMOD       (0x1 <<  8) /* IRQ mod change */
+#define NFP_NET_CFG_UPDATE		0x0004
+#define   NFP_NET_CFG_UPDATE_GEN	  (0x1 <<  0) /* General update */
+#define   NFP_NET_CFG_UPDATE_RING	  (0x1 <<  1) /* Ring config change */
+#define   NFP_NET_CFG_UPDATE_RSS	  (0x1 <<  2) /* RSS config change */
+#define   NFP_NET_CFG_UPDATE_TXRPRIO	  (0x1 <<  3) /* TX Ring prio change */
+#define   NFP_NET_CFG_UPDATE_RXRPRIO	  (0x1 <<  4) /* RX Ring prio change */
+#define   NFP_NET_CFG_UPDATE_MSIX	  (0x1 <<  5) /* MSI-X change */
+#define   NFP_NET_CFG_UPDATE_L2SWITCH	  (0x1 <<  6) /* Switch changes */
+#define   NFP_NET_CFG_UPDATE_RESET	  (0x1 <<  7) /* Update due to FLR */
+#define   NFP_NET_CFG_UPDATE_IRQMOD	  (0x1 <<  8) /* IRQ mod change */
 #define   NFP_NET_CFG_UPDATE_VXLAN	  (0x1 <<  9) /* VXLAN port change */
 #define   NFP_NET_CFG_UPDATE_BPF	  (0x1 << 10) /* BPF program load */
 #define   NFP_NET_CFG_UPDATE_MACADDR	  (0x1 << 11) /* MAC address change */
 #define   NFP_NET_CFG_UPDATE_MBOX	  (0x1 << 12) /* Mailbox update */
 #define   NFP_NET_CFG_UPDATE_VF		  (0x1 << 13) /* VF settings change */
-#define   NFP_NET_CFG_UPDATE_ERR          (0x1 << 31) /* A error occurred */
-#define NFP_NET_CFG_TXRS_ENABLE         0x0008
-#define NFP_NET_CFG_RXRS_ENABLE         0x0010
-#define NFP_NET_CFG_MTU                 0x0018
-#define NFP_NET_CFG_FLBUFSZ             0x001c
-#define NFP_NET_CFG_EXN                 0x001f
-#define NFP_NET_CFG_LSC                 0x0020
-#define NFP_NET_CFG_MACADDR             0x0024
+#define   NFP_NET_CFG_UPDATE_ERR	  (0x1 << 31) /* A error occurred */
+#define NFP_NET_CFG_TXRS_ENABLE		0x0008
+#define NFP_NET_CFG_RXRS_ENABLE		0x0010
+#define NFP_NET_CFG_MTU			0x0018
+#define NFP_NET_CFG_FLBUFSZ		0x001c
+#define NFP_NET_CFG_EXN			0x001f
+#define NFP_NET_CFG_LSC			0x0020
+#define NFP_NET_CFG_MACADDR		0x0024
 
 /**
  * Read-only words (0x0030 - 0x0050):
  * %NFP_NET_CFG_VERSION:     Firmware version number
- * %NFP_NET_CFG_STS:         Status
- * %NFP_NET_CFG_CAP:         Capabilities (same bits as %NFP_NET_CFG_CTRL)
+ * %NFP_NET_CFG_STS:	     Status
+ * %NFP_NET_CFG_CAP:	     Capabilities (same bits as %NFP_NET_CFG_CTRL)
  * %NFP_NET_CFG_MAX_TXRINGS: Maximum number of TX rings
  * %NFP_NET_CFG_MAX_RXRINGS: Maximum number of RX rings
  * %NFP_NET_CFG_MAX_MTU:     Maximum support MTU
@@ -190,37 +190,37 @@
  * TODO:
  * - define more STS bits
  */
-#define NFP_NET_CFG_VERSION             0x0030
+#define NFP_NET_CFG_VERSION		0x0030
 #define   NFP_NET_CFG_VERSION_RESERVED_MASK	(0xff << 24)
 #define   NFP_NET_CFG_VERSION_CLASS_MASK  (0xff << 16)
-#define   NFP_NET_CFG_VERSION_CLASS(x)    (((x) & 0xff) << 16)
+#define   NFP_NET_CFG_VERSION_CLASS(x)	  (((x) & 0xff) << 16)
 #define   NFP_NET_CFG_VERSION_CLASS_GENERIC	0
 #define   NFP_NET_CFG_VERSION_MAJOR_MASK  (0xff <<  8)
-#define   NFP_NET_CFG_VERSION_MAJOR(x)    (((x) & 0xff) <<  8)
+#define   NFP_NET_CFG_VERSION_MAJOR(x)	  (((x) & 0xff) <<  8)
 #define   NFP_NET_CFG_VERSION_MINOR_MASK  (0xff <<  0)
-#define   NFP_NET_CFG_VERSION_MINOR(x)    (((x) & 0xff) <<  0)
-#define NFP_NET_CFG_STS                 0x0034
-#define   NFP_NET_CFG_STS_LINK            (0x1 << 0) /* Link up or down */
+#define   NFP_NET_CFG_VERSION_MINOR(x)	  (((x) & 0xff) <<  0)
+#define NFP_NET_CFG_STS			0x0034
+#define   NFP_NET_CFG_STS_LINK		  (0x1 << 0) /* Link up or down */
 /* Link rate */
 #define   NFP_NET_CFG_STS_LINK_RATE_SHIFT 1
 #define   NFP_NET_CFG_STS_LINK_RATE_MASK  0xF
-#define   NFP_NET_CFG_STS_LINK_RATE       \
+#define   NFP_NET_CFG_STS_LINK_RATE	  \
 	(NFP_NET_CFG_STS_LINK_RATE_MASK << NFP_NET_CFG_STS_LINK_RATE_SHIFT)
 #define   NFP_NET_CFG_STS_LINK_RATE_UNSUPPORTED   0
-#define   NFP_NET_CFG_STS_LINK_RATE_UNKNOWN       1
-#define   NFP_NET_CFG_STS_LINK_RATE_1G            2
-#define   NFP_NET_CFG_STS_LINK_RATE_10G           3
-#define   NFP_NET_CFG_STS_LINK_RATE_25G           4
-#define   NFP_NET_CFG_STS_LINK_RATE_40G           5
-#define   NFP_NET_CFG_STS_LINK_RATE_50G           6
-#define   NFP_NET_CFG_STS_LINK_RATE_100G          7
-#define NFP_NET_CFG_CAP                 0x0038
-#define NFP_NET_CFG_MAX_TXRINGS         0x003c
-#define NFP_NET_CFG_MAX_RXRINGS         0x0040
-#define NFP_NET_CFG_MAX_MTU             0x0044
+#define   NFP_NET_CFG_STS_LINK_RATE_UNKNOWN	  1
+#define   NFP_NET_CFG_STS_LINK_RATE_1G		  2
+#define   NFP_NET_CFG_STS_LINK_RATE_10G		  3
+#define   NFP_NET_CFG_STS_LINK_RATE_25G		  4
+#define   NFP_NET_CFG_STS_LINK_RATE_40G		  5
+#define   NFP_NET_CFG_STS_LINK_RATE_50G		  6
+#define   NFP_NET_CFG_STS_LINK_RATE_100G	  7
+#define NFP_NET_CFG_CAP			0x0038
+#define NFP_NET_CFG_MAX_TXRINGS		0x003c
+#define NFP_NET_CFG_MAX_RXRINGS		0x0040
+#define NFP_NET_CFG_MAX_MTU		0x0044
 /* Next two words are being used by VFs for solving THB350 issue */
-#define NFP_NET_CFG_START_TXQ           0x0048
-#define NFP_NET_CFG_START_RXQ           0x004c
+#define NFP_NET_CFG_START_TXQ		0x0048
+#define NFP_NET_CFG_START_RXQ		0x004c
 
 /**
  * Prepend configuration
@@ -280,8 +280,8 @@
 /**
  * 40B reserved for future use (0x0098 - 0x00c0)
  */
-#define NFP_NET_CFG_RESERVED            0x0098
-#define NFP_NET_CFG_RESERVED_SZ         0x0028
+#define NFP_NET_CFG_RESERVED		0x0098
+#define NFP_NET_CFG_RESERVED_SZ		0x0028
 
 /**
  * RSS configuration (0x0100 - 0x01ac):
@@ -290,26 +290,26 @@
  * %NFP_NET_CFG_RSS_KEY:     RSS "secret" key
  * %NFP_NET_CFG_RSS_ITBL:    RSS indirection table
  */
-#define NFP_NET_CFG_RSS_BASE            0x0100
-#define NFP_NET_CFG_RSS_CTRL            NFP_NET_CFG_RSS_BASE
-#define   NFP_NET_CFG_RSS_MASK            (0x7f)
-#define   NFP_NET_CFG_RSS_MASK_of(_x)     ((_x) & 0x7f)
-#define   NFP_NET_CFG_RSS_IPV4            (1 <<  8) /* RSS for IPv4 */
-#define   NFP_NET_CFG_RSS_IPV6            (1 <<  9) /* RSS for IPv6 */
-#define   NFP_NET_CFG_RSS_IPV4_TCP        (1 << 10) /* RSS for IPv4/TCP */
-#define   NFP_NET_CFG_RSS_IPV4_UDP        (1 << 11) /* RSS for IPv4/UDP */
-#define   NFP_NET_CFG_RSS_IPV6_TCP        (1 << 12) /* RSS for IPv6/TCP */
-#define   NFP_NET_CFG_RSS_IPV6_UDP        (1 << 13) /* RSS for IPv6/UDP */
+#define NFP_NET_CFG_RSS_BASE		0x0100
+#define NFP_NET_CFG_RSS_CTRL		NFP_NET_CFG_RSS_BASE
+#define   NFP_NET_CFG_RSS_MASK		  (0x7f)
+#define   NFP_NET_CFG_RSS_MASK_of(_x)	  ((_x) & 0x7f)
+#define   NFP_NET_CFG_RSS_IPV4		  (1 <<  8) /* RSS for IPv4 */
+#define   NFP_NET_CFG_RSS_IPV6		  (1 <<  9) /* RSS for IPv6 */
+#define   NFP_NET_CFG_RSS_IPV4_TCP	  (1 << 10) /* RSS for IPv4/TCP */
+#define   NFP_NET_CFG_RSS_IPV4_UDP	  (1 << 11) /* RSS for IPv4/UDP */
+#define   NFP_NET_CFG_RSS_IPV6_TCP	  (1 << 12) /* RSS for IPv6/TCP */
+#define   NFP_NET_CFG_RSS_IPV6_UDP	  (1 << 13) /* RSS for IPv6/UDP */
 #define   NFP_NET_CFG_RSS_HFUNC		  0xff000000
-#define   NFP_NET_CFG_RSS_TOEPLITZ        (1 << 24) /* Use Toeplitz hash */
+#define   NFP_NET_CFG_RSS_TOEPLITZ	  (1 << 24) /* Use Toeplitz hash */
 #define   NFP_NET_CFG_RSS_XOR		  (1 << 25) /* Use XOR as hash */
 #define   NFP_NET_CFG_RSS_CRC32		  (1 << 26) /* Use CRC32 as hash */
 #define   NFP_NET_CFG_RSS_HFUNCS	  3
-#define NFP_NET_CFG_RSS_KEY             (NFP_NET_CFG_RSS_BASE + 0x4)
-#define NFP_NET_CFG_RSS_KEY_SZ          0x28
-#define NFP_NET_CFG_RSS_ITBL            (NFP_NET_CFG_RSS_BASE + 0x4 + \
+#define NFP_NET_CFG_RSS_KEY		(NFP_NET_CFG_RSS_BASE + 0x4)
+#define NFP_NET_CFG_RSS_KEY_SZ		0x28
+#define NFP_NET_CFG_RSS_ITBL		(NFP_NET_CFG_RSS_BASE + 0x4 + \
 					 NFP_NET_CFG_RSS_KEY_SZ)
-#define NFP_NET_CFG_RSS_ITBL_SZ         0x80
+#define NFP_NET_CFG_RSS_ITBL_SZ		0x80
 
 /**
  * TX ring configuration (0x200 - 0x800)
@@ -321,13 +321,13 @@
  * %NFP_NET_CFG_TXR_PRIO:    Per TX ring priority (1B entries)
  * %NFP_NET_CFG_TXR_IRQ_MOD: Per TX ring interrupt moderation packet
  */
-#define NFP_NET_CFG_TXR_BASE            0x0200
-#define NFP_NET_CFG_TXR_ADDR(_x)        (NFP_NET_CFG_TXR_BASE + ((_x) * 0x8))
-#define NFP_NET_CFG_TXR_WB_ADDR(_x)     (NFP_NET_CFG_TXR_BASE + 0x200 + \
+#define NFP_NET_CFG_TXR_BASE		0x0200
+#define NFP_NET_CFG_TXR_ADDR(_x)	(NFP_NET_CFG_TXR_BASE + ((_x) * 0x8))
+#define NFP_NET_CFG_TXR_WB_ADDR(_x)	(NFP_NET_CFG_TXR_BASE + 0x200 + \
 					 ((_x) * 0x8))
-#define NFP_NET_CFG_TXR_SZ(_x)          (NFP_NET_CFG_TXR_BASE + 0x400 + (_x))
-#define NFP_NET_CFG_TXR_VEC(_x)         (NFP_NET_CFG_TXR_BASE + 0x440 + (_x))
-#define NFP_NET_CFG_TXR_PRIO(_x)        (NFP_NET_CFG_TXR_BASE + 0x480 + (_x))
+#define NFP_NET_CFG_TXR_SZ(_x)		(NFP_NET_CFG_TXR_BASE + 0x400 + (_x))
+#define NFP_NET_CFG_TXR_VEC(_x)		(NFP_NET_CFG_TXR_BASE + 0x440 + (_x))
+#define NFP_NET_CFG_TXR_PRIO(_x)	(NFP_NET_CFG_TXR_BASE + 0x480 + (_x))
 #define NFP_NET_CFG_TXR_IRQ_MOD(_x)	(NFP_NET_CFG_TXR_BASE + 0x500 + \
 					 ((_x) * 0x4))
 
@@ -340,11 +340,11 @@
  * %NFP_NET_CFG_RXR_PRIO:    Per RX ring priority (1B entries)
  * %NFP_NET_CFG_RXR_IRQ_MOD: Per RX ring interrupt moderation (4B entries)
  */
-#define NFP_NET_CFG_RXR_BASE            0x0800
-#define NFP_NET_CFG_RXR_ADDR(_x)        (NFP_NET_CFG_RXR_BASE + ((_x) * 0x8))
-#define NFP_NET_CFG_RXR_SZ(_x)          (NFP_NET_CFG_RXR_BASE + 0x200 + (_x))
-#define NFP_NET_CFG_RXR_VEC(_x)         (NFP_NET_CFG_RXR_BASE + 0x240 + (_x))
-#define NFP_NET_CFG_RXR_PRIO(_x)        (NFP_NET_CFG_RXR_BASE + 0x280 + (_x))
+#define NFP_NET_CFG_RXR_BASE		0x0800
+#define NFP_NET_CFG_RXR_ADDR(_x)	(NFP_NET_CFG_RXR_BASE + ((_x) * 0x8))
+#define NFP_NET_CFG_RXR_SZ(_x)		(NFP_NET_CFG_RXR_BASE + 0x200 + (_x))
+#define NFP_NET_CFG_RXR_VEC(_x)		(NFP_NET_CFG_RXR_BASE + 0x240 + (_x))
+#define NFP_NET_CFG_RXR_PRIO(_x)	(NFP_NET_CFG_RXR_BASE + 0x280 + (_x))
 #define NFP_NET_CFG_RXR_IRQ_MOD(_x)	(NFP_NET_CFG_RXR_BASE + 0x300 + \
 					 ((_x) * 0x4))
 
@@ -358,36 +358,36 @@
  * the MSI-X entry and the host driver must clear the register to
  * re-enable the interrupt.
  */
-#define NFP_NET_CFG_ICR_BASE            0x0c00
-#define NFP_NET_CFG_ICR(_x)             (NFP_NET_CFG_ICR_BASE + (_x))
-#define   NFP_NET_CFG_ICR_UNMASKED      0x0
-#define   NFP_NET_CFG_ICR_RXTX          0x1
-#define   NFP_NET_CFG_ICR_LSC           0x2
+#define NFP_NET_CFG_ICR_BASE		0x0c00
+#define NFP_NET_CFG_ICR(_x)		(NFP_NET_CFG_ICR_BASE + (_x))
+#define   NFP_NET_CFG_ICR_UNMASKED	0x0
+#define   NFP_NET_CFG_ICR_RXTX		0x1
+#define   NFP_NET_CFG_ICR_LSC		0x2
 
 /**
  * General device stats (0x0d00 - 0x0d90)
  * all counters are 64bit.
  */
-#define NFP_NET_CFG_STATS_BASE          0x0d00
-#define NFP_NET_CFG_STATS_RX_DISCARDS   (NFP_NET_CFG_STATS_BASE + 0x00)
-#define NFP_NET_CFG_STATS_RX_ERRORS     (NFP_NET_CFG_STATS_BASE + 0x08)
-#define NFP_NET_CFG_STATS_RX_OCTETS     (NFP_NET_CFG_STATS_BASE + 0x10)
-#define NFP_NET_CFG_STATS_RX_UC_OCTETS  (NFP_NET_CFG_STATS_BASE + 0x18)
-#define NFP_NET_CFG_STATS_RX_MC_OCTETS  (NFP_NET_CFG_STATS_BASE + 0x20)
-#define NFP_NET_CFG_STATS_RX_BC_OCTETS  (NFP_NET_CFG_STATS_BASE + 0x28)
-#define NFP_NET_CFG_STATS_RX_FRAMES     (NFP_NET_CFG_STATS_BASE + 0x30)
-#define NFP_NET_CFG_STATS_RX_MC_FRAMES  (NFP_NET_CFG_STATS_BASE + 0x38)
-#define NFP_NET_CFG_STATS_RX_BC_FRAMES  (NFP_NET_CFG_STATS_BASE + 0x40)
-
-#define NFP_NET_CFG_STATS_TX_DISCARDS   (NFP_NET_CFG_STATS_BASE + 0x48)
-#define NFP_NET_CFG_STATS_TX_ERRORS     (NFP_NET_CFG_STATS_BASE + 0x50)
-#define NFP_NET_CFG_STATS_TX_OCTETS     (NFP_NET_CFG_STATS_BASE + 0x58)
-#define NFP_NET_CFG_STATS_TX_UC_OCTETS  (NFP_NET_CFG_STATS_BASE + 0x60)
-#define NFP_NET_CFG_STATS_TX_MC_OCTETS  (NFP_NET_CFG_STATS_BASE + 0x68)
-#define NFP_NET_CFG_STATS_TX_BC_OCTETS  (NFP_NET_CFG_STATS_BASE + 0x70)
-#define NFP_NET_CFG_STATS_TX_FRAMES     (NFP_NET_CFG_STATS_BASE + 0x78)
-#define NFP_NET_CFG_STATS_TX_MC_FRAMES  (NFP_NET_CFG_STATS_BASE + 0x80)
-#define NFP_NET_CFG_STATS_TX_BC_FRAMES  (NFP_NET_CFG_STATS_BASE + 0x88)
+#define NFP_NET_CFG_STATS_BASE		0x0d00
+#define NFP_NET_CFG_STATS_RX_DISCARDS	(NFP_NET_CFG_STATS_BASE + 0x00)
+#define NFP_NET_CFG_STATS_RX_ERRORS	(NFP_NET_CFG_STATS_BASE + 0x08)
+#define NFP_NET_CFG_STATS_RX_OCTETS	(NFP_NET_CFG_STATS_BASE + 0x10)
+#define NFP_NET_CFG_STATS_RX_UC_OCTETS	(NFP_NET_CFG_STATS_BASE + 0x18)
+#define NFP_NET_CFG_STATS_RX_MC_OCTETS	(NFP_NET_CFG_STATS_BASE + 0x20)
+#define NFP_NET_CFG_STATS_RX_BC_OCTETS	(NFP_NET_CFG_STATS_BASE + 0x28)
+#define NFP_NET_CFG_STATS_RX_FRAMES	(NFP_NET_CFG_STATS_BASE + 0x30)
+#define NFP_NET_CFG_STATS_RX_MC_FRAMES	(NFP_NET_CFG_STATS_BASE + 0x38)
+#define NFP_NET_CFG_STATS_RX_BC_FRAMES	(NFP_NET_CFG_STATS_BASE + 0x40)
+
+#define NFP_NET_CFG_STATS_TX_DISCARDS	(NFP_NET_CFG_STATS_BASE + 0x48)
+#define NFP_NET_CFG_STATS_TX_ERRORS	(NFP_NET_CFG_STATS_BASE + 0x50)
+#define NFP_NET_CFG_STATS_TX_OCTETS	(NFP_NET_CFG_STATS_BASE + 0x58)
+#define NFP_NET_CFG_STATS_TX_UC_OCTETS	(NFP_NET_CFG_STATS_BASE + 0x60)
+#define NFP_NET_CFG_STATS_TX_MC_OCTETS	(NFP_NET_CFG_STATS_BASE + 0x68)
+#define NFP_NET_CFG_STATS_TX_BC_OCTETS	(NFP_NET_CFG_STATS_BASE + 0x70)
+#define NFP_NET_CFG_STATS_TX_FRAMES	(NFP_NET_CFG_STATS_BASE + 0x78)
+#define NFP_NET_CFG_STATS_TX_MC_FRAMES	(NFP_NET_CFG_STATS_BASE + 0x80)
+#define NFP_NET_CFG_STATS_TX_BC_FRAMES	(NFP_NET_CFG_STATS_BASE + 0x88)
 
 #define NFP_NET_CFG_STATS_APP0_FRAMES	(NFP_NET_CFG_STATS_BASE + 0x90)
 #define NFP_NET_CFG_STATS_APP0_BYTES	(NFP_NET_CFG_STATS_BASE + 0x98)
@@ -404,11 +404,11 @@
  * %NFP_NET_CFG_TXR_STATS:   TX ring statistics (Packet and Byte count)
  * %NFP_NET_CFG_RXR_STATS:   RX ring statistics (Packet and Byte count)
  */
-#define NFP_NET_CFG_TXR_STATS_BASE      0x1000
-#define NFP_NET_CFG_TXR_STATS(_x)       (NFP_NET_CFG_TXR_STATS_BASE + \
+#define NFP_NET_CFG_TXR_STATS_BASE	0x1000
+#define NFP_NET_CFG_TXR_STATS(_x)	(NFP_NET_CFG_TXR_STATS_BASE + \
 					 ((_x) * 0x10))
-#define NFP_NET_CFG_RXR_STATS_BASE      0x1400
-#define NFP_NET_CFG_RXR_STATS(_x)       (NFP_NET_CFG_RXR_STATS_BASE + \
+#define NFP_NET_CFG_RXR_STATS_BASE	0x1400
+#define NFP_NET_CFG_RXR_STATS(_x)	(NFP_NET_CFG_RXR_STATS_BASE + \
 					 ((_x) * 0x10))
 
 /**
@@ -444,7 +444,7 @@
  * %NFP_NET_CFG_TLV_TYPE:	Offset of type within the TLV
  * %NFP_NET_CFG_TLV_TYPE_REQUIRED: Driver must be able to parse the TLV
  * %NFP_NET_CFG_TLV_LENGTH:	Offset of length within the TLV
- * %NFP_NET_CFG_TLV_LENGTH_INC:	TLV length increments
+ * %NFP_NET_CFG_TLV_LENGTH_INC: TLV length increments
  * %NFP_NET_CFG_TLV_VALUE:	Offset of value with the TLV
  *
  * List of simple TLV structures, first one starts at %NFP_NET_CFG_TLV_BASE.
@@ -457,12 +457,12 @@
  * Note that the 4 byte TLV header is not counted in %NFP_NET_CFG_TLV_LENGTH.
  */
 #define NFP_NET_CFG_TLV_TYPE		0x00
-#define   NFP_NET_CFG_TLV_TYPE_REQUIRED	  0x8000
+#define   NFP_NET_CFG_TLV_TYPE_REQUIRED   0x8000
 #define NFP_NET_CFG_TLV_LENGTH		0x02
 #define   NFP_NET_CFG_TLV_LENGTH_INC	  4
 #define NFP_NET_CFG_TLV_VALUE		0x04
 
-#define NFP_NET_CFG_TLV_HEADER_REQUIRED	0x80000000
+#define NFP_NET_CFG_TLV_HEADER_REQUIRED 0x80000000
 #define NFP_NET_CFG_TLV_HEADER_TYPE	0x7fff0000
 #define NFP_NET_CFG_TLV_HEADER_LENGTH	0x0000ffff
 
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/Makefile b/drivers/net/ethernet/netronome/nfp/nfpcore/Makefile
new file mode 100644
index 000000000000..805fa28f391a
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+# kbuild requires Makefile in a directory to build individual objects
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000/Makefile b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000/Makefile
new file mode 100644
index 000000000000..805fa28f391a
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+# kbuild requires Makefile in a directory to build individual objects
diff --git a/drivers/net/ethernet/netronome/nfp/nic/Makefile b/drivers/net/ethernet/netronome/nfp/nic/Makefile
new file mode 100644
index 000000000000..805fa28f391a
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nic/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+# kbuild requires Makefile in a directory to build individual objects
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
index d5d02be72947..69051e98aff9 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
@@ -1799,7 +1799,7 @@ enum qed_iwarp_mpa_pkt_type {
 /* fpdu can be fragmented over maximum 3 bds: header, partial mpa, unaligned */
 #define QED_IWARP_MAX_BDS_PER_FPDU 3
 
-char *pkt_type_str[] = {
+static const char * const pkt_type_str[] = {
 	"QED_IWARP_MPA_PKT_PACKED",
 	"QED_IWARP_MPA_PKT_PARTIAL",
 	"QED_IWARP_MPA_PKT_UNALIGNED"
diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c
index 9c236298fe21..5803cd6db406 100644
--- a/drivers/net/ethernet/qualcomm/qca_spi.c
+++ b/drivers/net/ethernet/qualcomm/qca_spi.c
@@ -705,7 +705,6 @@ qcaspi_netdev_xmit(struct sk_buff *skb, struct net_device *dev)
 		tskb = skb_copy_expand(skb, QCAFRM_HEADER_LEN,
 				       QCAFRM_FOOTER_LEN + pad_len, GFP_ATOMIC);
 		if (!tskb) {
-			netdev_dbg(qca->net_dev, "could not allocate tx_buff\n");
 			qca->stats.out_of_mem++;
 			return NETDEV_TX_BUSY;
 		}
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
index c4949183eef3..38d9356ebcc4 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2013-2018, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -43,6 +43,11 @@
 
 /* Local Definitions and Declarations */
 
+static const struct nla_policy rmnet_policy[IFLA_RMNET_MAX + 1] = {
+	[IFLA_RMNET_MUX_ID]	= { .type = NLA_U16 },
+	[IFLA_RMNET_FLAGS]	= { .len = sizeof(struct ifla_rmnet_flags) },
+};
+
 static int rmnet_is_real_dev_registered(const struct net_device *real_dev)
 {
 	return rcu_access_pointer(real_dev->rx_handler) == rmnet_rx_handler;
@@ -131,7 +136,7 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev,
 			 struct nlattr *tb[], struct nlattr *data[],
 			 struct netlink_ext_ack *extack)
 {
-	u32 data_format = RMNET_INGRESS_FORMAT_DEAGGREGATION;
+	u32 data_format = RMNET_FLAGS_INGRESS_DEAGGREGATION;
 	struct net_device *real_dev;
 	int mode = RMNET_EPMODE_VND;
 	struct rmnet_endpoint *ep;
@@ -143,14 +148,14 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev,
 	if (!real_dev || !dev)
 		return -ENODEV;
 
-	if (!data[IFLA_VLAN_ID])
+	if (!data[IFLA_RMNET_MUX_ID])
 		return -EINVAL;
 
 	ep = kzalloc(sizeof(*ep), GFP_ATOMIC);
 	if (!ep)
 		return -ENOMEM;
 
-	mux_id = nla_get_u16(data[IFLA_VLAN_ID]);
+	mux_id = nla_get_u16(data[IFLA_RMNET_MUX_ID]);
 
 	err = rmnet_register_real_device(real_dev);
 	if (err)
@@ -165,10 +170,10 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev,
 
 	hlist_add_head_rcu(&ep->hlnode, &port->muxed_ep[mux_id]);
 
-	if (data[IFLA_VLAN_FLAGS]) {
-		struct ifla_vlan_flags *flags;
+	if (data[IFLA_RMNET_FLAGS]) {
+		struct ifla_rmnet_flags *flags;
 
-		flags = nla_data(data[IFLA_VLAN_FLAGS]);
+		flags = nla_data(data[IFLA_RMNET_FLAGS]);
 		data_format = flags->flags & flags->mask;
 	}
 
@@ -276,10 +281,10 @@ static int rmnet_rtnl_validate(struct nlattr *tb[], struct nlattr *data[],
 {
 	u16 mux_id;
 
-	if (!data || !data[IFLA_VLAN_ID])
+	if (!data || !data[IFLA_RMNET_MUX_ID])
 		return -EINVAL;
 
-	mux_id = nla_get_u16(data[IFLA_VLAN_ID]);
+	mux_id = nla_get_u16(data[IFLA_RMNET_MUX_ID]);
 	if (mux_id > (RMNET_MAX_LOGICAL_EP - 1))
 		return -ERANGE;
 
@@ -304,8 +309,8 @@ static int rmnet_changelink(struct net_device *dev, struct nlattr *tb[],
 
 	port = rmnet_get_port_rtnl(real_dev);
 
-	if (data[IFLA_VLAN_ID]) {
-		mux_id = nla_get_u16(data[IFLA_VLAN_ID]);
+	if (data[IFLA_RMNET_MUX_ID]) {
+		mux_id = nla_get_u16(data[IFLA_RMNET_MUX_ID]);
 		ep = rmnet_get_endpoint(port, priv->mux_id);
 
 		hlist_del_init_rcu(&ep->hlnode);
@@ -315,10 +320,10 @@ static int rmnet_changelink(struct net_device *dev, struct nlattr *tb[],
 		priv->mux_id = mux_id;
 	}
 
-	if (data[IFLA_VLAN_FLAGS]) {
-		struct ifla_vlan_flags *flags;
+	if (data[IFLA_RMNET_FLAGS]) {
+		struct ifla_rmnet_flags *flags;
 
-		flags = nla_data(data[IFLA_VLAN_FLAGS]);
+		flags = nla_data(data[IFLA_RMNET_FLAGS]);
 		port->data_format = flags->flags & flags->mask;
 	}
 
@@ -327,13 +332,45 @@ static int rmnet_changelink(struct net_device *dev, struct nlattr *tb[],
 
 static size_t rmnet_get_size(const struct net_device *dev)
 {
-	return nla_total_size(2) /* IFLA_VLAN_ID */ +
-	       nla_total_size(sizeof(struct ifla_vlan_flags)); /* IFLA_VLAN_FLAGS */
+	return
+		/* IFLA_RMNET_MUX_ID */
+		nla_total_size(2) +
+		/* IFLA_RMNET_FLAGS */
+		nla_total_size(sizeof(struct ifla_rmnet_flags));
+}
+
+static int rmnet_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+	struct rmnet_priv *priv = netdev_priv(dev);
+	struct net_device *real_dev;
+	struct ifla_rmnet_flags f;
+	struct rmnet_port *port;
+
+	real_dev = priv->real_dev;
+
+	if (!rmnet_is_real_dev_registered(real_dev))
+		return -ENODEV;
+
+	if (nla_put_u16(skb, IFLA_RMNET_MUX_ID, priv->mux_id))
+		goto nla_put_failure;
+
+	port = rmnet_get_port_rtnl(real_dev);
+
+	f.flags = port->data_format;
+	f.mask  = ~0;
+
+	if (nla_put(skb, IFLA_RMNET_FLAGS, sizeof(f), &f))
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
 }
 
 struct rtnl_link_ops rmnet_link_ops __read_mostly = {
 	.kind		= "rmnet",
-	.maxtype	= __IFLA_VLAN_MAX,
+	.maxtype	= __IFLA_RMNET_MAX,
 	.priv_size	= sizeof(struct rmnet_priv),
 	.setup		= rmnet_vnd_setup,
 	.validate	= rmnet_rtnl_validate,
@@ -341,6 +378,8 @@ struct rtnl_link_ops rmnet_link_ops __read_mostly = {
 	.dellink	= rmnet_dellink,
 	.get_size	= rmnet_get_size,
 	.changelink     = rmnet_changelink,
+	.policy		= rmnet_policy,
+	.fill_info	= rmnet_fill_info,
 };
 
 /* Needs either rcu_read_lock() or rtnl lock */
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
index 00e4634100d3..0b5b5da80198 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2013-2014, 2016-2017 The Linux Foundation. All rights reserved.
+/* Copyright (c) 2013-2014, 2016-2018 The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
index 601edec28c5f..6fcd586e9804 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2013-2018, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -70,7 +70,7 @@ __rmnet_map_ingress_handler(struct sk_buff *skb,
 	u8 mux_id;
 
 	if (RMNET_MAP_GET_CD_BIT(skb)) {
-		if (port->data_format & RMNET_INGRESS_FORMAT_MAP_COMMANDS)
+		if (port->data_format & RMNET_FLAGS_INGRESS_MAP_COMMANDS)
 			return rmnet_map_command(skb, port);
 
 		goto free_skb;
@@ -93,7 +93,7 @@ __rmnet_map_ingress_handler(struct sk_buff *skb,
 	skb_pull(skb, sizeof(struct rmnet_map_header));
 	rmnet_set_skb_proto(skb);
 
-	if (port->data_format & RMNET_INGRESS_FORMAT_MAP_CKSUMV4) {
+	if (port->data_format & RMNET_FLAGS_INGRESS_MAP_CKSUMV4) {
 		if (!rmnet_map_checksum_downlink_packet(skb, len + pad))
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
@@ -121,7 +121,7 @@ rmnet_map_ingress_handler(struct sk_buff *skb,
 		skb_push(skb, ETH_HLEN);
 	}
 
-	if (port->data_format & RMNET_INGRESS_FORMAT_DEAGGREGATION) {
+	if (port->data_format & RMNET_FLAGS_INGRESS_DEAGGREGATION) {
 		while ((skbn = rmnet_map_deaggregate(skb, port)) != NULL)
 			__rmnet_map_ingress_handler(skbn, port);
 
@@ -141,7 +141,7 @@ static int rmnet_map_egress_handler(struct sk_buff *skb,
 	additional_header_len = 0;
 	required_headroom = sizeof(struct rmnet_map_header);
 
-	if (port->data_format & RMNET_EGRESS_FORMAT_MAP_CKSUMV4) {
+	if (port->data_format & RMNET_FLAGS_EGRESS_MAP_CKSUMV4) {
 		additional_header_len = sizeof(struct rmnet_map_ul_csum_header);
 		required_headroom += additional_header_len;
 	}
@@ -151,7 +151,7 @@ static int rmnet_map_egress_handler(struct sk_buff *skb,
 			goto fail;
 	}
 
-	if (port->data_format & RMNET_EGRESS_FORMAT_MAP_CKSUMV4)
+	if (port->data_format & RMNET_FLAGS_EGRESS_MAP_CKSUMV4)
 		rmnet_map_checksum_uplink_packet(skb, orig_dev);
 
 	map_header = rmnet_map_add_map_header(skb, additional_header_len, 0);
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
index 6ce31e29136d..884f1f52dcc2 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2013-2018, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -23,8 +23,8 @@ struct rmnet_map_control_command {
 		struct {
 			u16 ip_family:2;
 			u16 reserved:14;
-			u16 flow_control_seq_num;
-			u32 qos_id;
+			__be16 flow_control_seq_num;
+			__be32 qos_id;
 		} flow_control;
 		u8 data[0];
 	};
@@ -44,7 +44,7 @@ struct rmnet_map_header {
 	u8  reserved_bit:1;
 	u8  cd_bit:1;
 	u8  mux_id;
-	u16 pkt_len;
+	__be16 pkt_len;
 }  __aligned(1);
 
 struct rmnet_map_dl_csum_trailer {
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
index b0dbca070c00..78fdad0c6f76 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2013-2018, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -69,7 +69,7 @@ static void rmnet_map_send_ack(struct sk_buff *skb,
 	struct rmnet_map_control_command *cmd;
 	int xmit_status;
 
-	if (port->data_format & RMNET_INGRESS_FORMAT_MAP_CKSUMV4) {
+	if (port->data_format & RMNET_FLAGS_INGRESS_MAP_CKSUMV4) {
 		if (skb->len < sizeof(struct rmnet_map_header) +
 		    RMNET_MAP_GET_LENGTH(skb) +
 		    sizeof(struct rmnet_map_dl_csum_trailer)) {
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
index c74a6c56d315..a6ea09416f8d 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2013-2018, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -309,7 +309,7 @@ struct sk_buff *rmnet_map_deaggregate(struct sk_buff *skb,
 	maph = (struct rmnet_map_header *)skb->data;
 	packet_len = ntohs(maph->pkt_len) + sizeof(struct rmnet_map_header);
 
-	if (port->data_format & RMNET_INGRESS_FORMAT_MAP_CKSUMV4)
+	if (port->data_format & RMNET_FLAGS_INGRESS_MAP_CKSUMV4)
 		packet_len += sizeof(struct rmnet_map_dl_csum_trailer);
 
 	if (((int)skb->len - (int)packet_len) < 0)
@@ -323,7 +323,6 @@ struct sk_buff *rmnet_map_deaggregate(struct sk_buff *skb,
 	if (!skbn)
 		return NULL;
 
-	skbn->dev = skb->dev;
 	skb_reserve(skbn, RMNET_MAP_DEAGGR_HEADROOM);
 	skb_put(skbn, packet_len);
 	memcpy(skbn->data, skb->data, packet_len);
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h
index de0143eaa05a..b9cc4f85f229 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2013-2014, 2016-2017 The Linux Foundation. All rights reserved.
+/* Copyright (c) 2013-2014, 2016-2018 The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -18,12 +18,6 @@
 #define RMNET_NEEDED_HEADROOM      16
 #define RMNET_TX_QUEUE_LEN         1000
 
-/* Constants */
-#define RMNET_INGRESS_FORMAT_DEAGGREGATION      BIT(0)
-#define RMNET_INGRESS_FORMAT_MAP_COMMANDS       BIT(1)
-#define RMNET_INGRESS_FORMAT_MAP_CKSUMV4        BIT(2)
-#define RMNET_EGRESS_FORMAT_MAP_CKSUMV4         BIT(3)
-
 /* Replace skb->dev to a virtual rmnet device and pass up the stack */
 #define RMNET_EPMODE_VND (1)
 /* Pass the frame directly to another device with dev_queue_xmit() */
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
index 346d310914df..2ea16a088de8 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2013-2018, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 0bf7d1759250..630409e0337f 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -99,12 +99,12 @@ static const int multicast_filter_limit = 32;
 #define RTL8169_PHY_TIMEOUT	(10*HZ)
 
 /* write/read MMIO register */
-#define RTL_W8(reg, val8)	writeb ((val8), ioaddr + (reg))
-#define RTL_W16(reg, val16)	writew ((val16), ioaddr + (reg))
-#define RTL_W32(reg, val32)	writel ((val32), ioaddr + (reg))
-#define RTL_R8(reg)		readb (ioaddr + (reg))
-#define RTL_R16(reg)		readw (ioaddr + (reg))
-#define RTL_R32(reg)		readl (ioaddr + (reg))
+#define RTL_W8(tp, reg, val8)	writeb((val8), tp->mmio_addr + (reg))
+#define RTL_W16(tp, reg, val16)	writew((val16), tp->mmio_addr + (reg))
+#define RTL_W32(tp, reg, val32)	writel((val32), tp->mmio_addr + (reg))
+#define RTL_R8(tp, reg)		readb(tp->mmio_addr + (reg))
+#define RTL_R16(tp, reg)		readw(tp->mmio_addr + (reg))
+#define RTL_R32(tp, reg)		readl(tp->mmio_addr + (reg))
 
 enum mac_version {
 	RTL_GIGA_MAC_VER_01 = 0,
@@ -735,12 +735,6 @@ struct ring_info {
 	u8		__pad[sizeof(void *) - sizeof(u32)];
 };
 
-enum features {
-	RTL_FEATURE_WOL		= (1 << 0),
-	RTL_FEATURE_MSI		= (1 << 1),
-	RTL_FEATURE_GMII	= (1 << 2),
-};
-
 struct rtl8169_counters {
 	__le64	tx_packets;
 	__le64	rx_packets;
@@ -829,7 +823,7 @@ struct rtl8169_private {
 	void (*phy_reset_enable)(struct rtl8169_private *tp);
 	void (*hw_start)(struct net_device *);
 	unsigned int (*phy_reset_pending)(struct rtl8169_private *tp);
-	unsigned int (*link_ok)(void __iomem *);
+	unsigned int (*link_ok)(struct rtl8169_private *tp);
 	int (*do_ioctl)(struct rtl8169_private *tp, struct mii_ioctl_data *data, int cmd);
 	bool (*tso_csum)(struct rtl8169_private *, struct sk_buff *, u32 *);
 
@@ -893,6 +887,11 @@ MODULE_FIRMWARE(FIRMWARE_8168H_2);
 MODULE_FIRMWARE(FIRMWARE_8107E_1);
 MODULE_FIRMWARE(FIRMWARE_8107E_2);
 
+static inline struct device *tp_to_dev(struct rtl8169_private *tp)
+{
+	return &tp->pci_dev->dev;
+}
+
 static void rtl_lock_work(struct rtl8169_private *tp)
 {
 	mutex_lock(&tp->wk.mutex);
@@ -903,9 +902,9 @@ static void rtl_unlock_work(struct rtl8169_private *tp)
 	mutex_unlock(&tp->wk.mutex);
 }
 
-static void rtl_tx_performance_tweak(struct pci_dev *pdev, u16 force)
+static void rtl_tx_performance_tweak(struct rtl8169_private *tp, u16 force)
 {
-	pcie_capability_clear_and_set_word(pdev, PCI_EXP_DEVCTL,
+	pcie_capability_clear_and_set_word(tp->pci_dev, PCI_EXP_DEVCTL,
 					   PCI_EXP_DEVCTL_READRQ, force);
 }
 
@@ -984,56 +983,46 @@ static bool rtl_ocp_reg_failure(struct rtl8169_private *tp, u32 reg)
 
 DECLARE_RTL_COND(rtl_ocp_gphy_cond)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R32(GPHY_OCP) & OCPAR_FLAG;
+	return RTL_R32(tp, GPHY_OCP) & OCPAR_FLAG;
 }
 
 static void r8168_phy_ocp_write(struct rtl8169_private *tp, u32 reg, u32 data)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	if (rtl_ocp_reg_failure(tp, reg))
 		return;
 
-	RTL_W32(GPHY_OCP, OCPAR_FLAG | (reg << 15) | data);
+	RTL_W32(tp, GPHY_OCP, OCPAR_FLAG | (reg << 15) | data);
 
 	rtl_udelay_loop_wait_low(tp, &rtl_ocp_gphy_cond, 25, 10);
 }
 
 static u16 r8168_phy_ocp_read(struct rtl8169_private *tp, u32 reg)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	if (rtl_ocp_reg_failure(tp, reg))
 		return 0;
 
-	RTL_W32(GPHY_OCP, reg << 15);
+	RTL_W32(tp, GPHY_OCP, reg << 15);
 
 	return rtl_udelay_loop_wait_high(tp, &rtl_ocp_gphy_cond, 25, 10) ?
-		(RTL_R32(GPHY_OCP) & 0xffff) : ~0;
+		(RTL_R32(tp, GPHY_OCP) & 0xffff) : ~0;
 }
 
 static void r8168_mac_ocp_write(struct rtl8169_private *tp, u32 reg, u32 data)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	if (rtl_ocp_reg_failure(tp, reg))
 		return;
 
-	RTL_W32(OCPDR, OCPAR_FLAG | (reg << 15) | data);
+	RTL_W32(tp, OCPDR, OCPAR_FLAG | (reg << 15) | data);
 }
 
 static u16 r8168_mac_ocp_read(struct rtl8169_private *tp, u32 reg)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	if (rtl_ocp_reg_failure(tp, reg))
 		return 0;
 
-	RTL_W32(OCPDR, reg << 15);
+	RTL_W32(tp, OCPDR, reg << 15);
 
-	return RTL_R32(OCPDR);
+	return RTL_R32(tp, OCPDR);
 }
 
 #define OCP_STD_PHY_BASE	0xa400
@@ -1076,16 +1065,12 @@ static int mac_mcu_read(struct rtl8169_private *tp, int reg)
 
 DECLARE_RTL_COND(rtl_phyar_cond)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R32(PHYAR) & 0x80000000;
+	return RTL_R32(tp, PHYAR) & 0x80000000;
 }
 
 static void r8169_mdio_write(struct rtl8169_private *tp, int reg, int value)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(PHYAR, 0x80000000 | (reg & 0x1f) << 16 | (value & 0xffff));
+	RTL_W32(tp, PHYAR, 0x80000000 | (reg & 0x1f) << 16 | (value & 0xffff));
 
 	rtl_udelay_loop_wait_low(tp, &rtl_phyar_cond, 25, 20);
 	/*
@@ -1097,13 +1082,12 @@ static void r8169_mdio_write(struct rtl8169_private *tp, int reg, int value)
 
 static int r8169_mdio_read(struct rtl8169_private *tp, int reg)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	int value;
 
-	RTL_W32(PHYAR, 0x0 | (reg & 0x1f) << 16);
+	RTL_W32(tp, PHYAR, 0x0 | (reg & 0x1f) << 16);
 
 	value = rtl_udelay_loop_wait_high(tp, &rtl_phyar_cond, 25, 20) ?
-		RTL_R32(PHYAR) & 0xffff : ~0;
+		RTL_R32(tp, PHYAR) & 0xffff : ~0;
 
 	/*
 	 * According to hardware specs a 20us delay is required after read
@@ -1116,18 +1100,14 @@ static int r8169_mdio_read(struct rtl8169_private *tp, int reg)
 
 DECLARE_RTL_COND(rtl_ocpar_cond)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R32(OCPAR) & OCPAR_FLAG;
+	return RTL_R32(tp, OCPAR) & OCPAR_FLAG;
 }
 
 static void r8168dp_1_mdio_access(struct rtl8169_private *tp, int reg, u32 data)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(OCPDR, data | ((reg & OCPDR_REG_MASK) << OCPDR_GPHY_REG_SHIFT));
-	RTL_W32(OCPAR, OCPAR_GPHY_WRITE_CMD);
-	RTL_W32(EPHY_RXER_NUM, 0);
+	RTL_W32(tp, OCPDR, data | ((reg & OCPDR_REG_MASK) << OCPDR_GPHY_REG_SHIFT));
+	RTL_W32(tp, OCPAR, OCPAR_GPHY_WRITE_CMD);
+	RTL_W32(tp, EPHY_RXER_NUM, 0);
 
 	rtl_udelay_loop_wait_low(tp, &rtl_ocpar_cond, 1000, 100);
 }
@@ -1140,51 +1120,46 @@ static void r8168dp_1_mdio_write(struct rtl8169_private *tp, int reg, int value)
 
 static int r8168dp_1_mdio_read(struct rtl8169_private *tp, int reg)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	r8168dp_1_mdio_access(tp, reg, OCPDR_READ_CMD);
 
 	mdelay(1);
-	RTL_W32(OCPAR, OCPAR_GPHY_READ_CMD);
-	RTL_W32(EPHY_RXER_NUM, 0);
+	RTL_W32(tp, OCPAR, OCPAR_GPHY_READ_CMD);
+	RTL_W32(tp, EPHY_RXER_NUM, 0);
 
 	return rtl_udelay_loop_wait_high(tp, &rtl_ocpar_cond, 1000, 100) ?
-		RTL_R32(OCPDR) & OCPDR_DATA_MASK : ~0;
+		RTL_R32(tp, OCPDR) & OCPDR_DATA_MASK : ~0;
 }
 
 #define R8168DP_1_MDIO_ACCESS_BIT	0x00020000
 
-static void r8168dp_2_mdio_start(void __iomem *ioaddr)
+static void r8168dp_2_mdio_start(struct rtl8169_private *tp)
 {
-	RTL_W32(0xd0, RTL_R32(0xd0) & ~R8168DP_1_MDIO_ACCESS_BIT);
+	RTL_W32(tp, 0xd0, RTL_R32(tp, 0xd0) & ~R8168DP_1_MDIO_ACCESS_BIT);
 }
 
-static void r8168dp_2_mdio_stop(void __iomem *ioaddr)
+static void r8168dp_2_mdio_stop(struct rtl8169_private *tp)
 {
-	RTL_W32(0xd0, RTL_R32(0xd0) | R8168DP_1_MDIO_ACCESS_BIT);
+	RTL_W32(tp, 0xd0, RTL_R32(tp, 0xd0) | R8168DP_1_MDIO_ACCESS_BIT);
 }
 
 static void r8168dp_2_mdio_write(struct rtl8169_private *tp, int reg, int value)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	r8168dp_2_mdio_start(ioaddr);
+	r8168dp_2_mdio_start(tp);
 
 	r8169_mdio_write(tp, reg, value);
 
-	r8168dp_2_mdio_stop(ioaddr);
+	r8168dp_2_mdio_stop(tp);
 }
 
 static int r8168dp_2_mdio_read(struct rtl8169_private *tp, int reg)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	int value;
 
-	r8168dp_2_mdio_start(ioaddr);
+	r8168dp_2_mdio_start(tp);
 
 	value = r8169_mdio_read(tp, reg);
 
-	r8168dp_2_mdio_stop(ioaddr);
+	r8168dp_2_mdio_stop(tp);
 
 	return value;
 }
@@ -1229,16 +1204,12 @@ static int rtl_mdio_read(struct net_device *dev, int phy_id, int location)
 
 DECLARE_RTL_COND(rtl_ephyar_cond)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R32(EPHYAR) & EPHYAR_FLAG;
+	return RTL_R32(tp, EPHYAR) & EPHYAR_FLAG;
 }
 
 static void rtl_ephy_write(struct rtl8169_private *tp, int reg_addr, int value)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(EPHYAR, EPHYAR_WRITE_CMD | (value & EPHYAR_DATA_MASK) |
+	RTL_W32(tp, EPHYAR, EPHYAR_WRITE_CMD | (value & EPHYAR_DATA_MASK) |
 		(reg_addr & EPHYAR_REG_MASK) << EPHYAR_REG_SHIFT);
 
 	rtl_udelay_loop_wait_low(tp, &rtl_ephyar_cond, 10, 100);
@@ -1248,41 +1219,33 @@ static void rtl_ephy_write(struct rtl8169_private *tp, int reg_addr, int value)
 
 static u16 rtl_ephy_read(struct rtl8169_private *tp, int reg_addr)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(EPHYAR, (reg_addr & EPHYAR_REG_MASK) << EPHYAR_REG_SHIFT);
+	RTL_W32(tp, EPHYAR, (reg_addr & EPHYAR_REG_MASK) << EPHYAR_REG_SHIFT);
 
 	return rtl_udelay_loop_wait_high(tp, &rtl_ephyar_cond, 10, 100) ?
-		RTL_R32(EPHYAR) & EPHYAR_DATA_MASK : ~0;
+		RTL_R32(tp, EPHYAR) & EPHYAR_DATA_MASK : ~0;
 }
 
 DECLARE_RTL_COND(rtl_eriar_cond)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R32(ERIAR) & ERIAR_FLAG;
+	return RTL_R32(tp, ERIAR) & ERIAR_FLAG;
 }
 
 static void rtl_eri_write(struct rtl8169_private *tp, int addr, u32 mask,
 			  u32 val, int type)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	BUG_ON((addr & 3) || (mask == 0));
-	RTL_W32(ERIDR, val);
-	RTL_W32(ERIAR, ERIAR_WRITE_CMD | type | mask | addr);
+	RTL_W32(tp, ERIDR, val);
+	RTL_W32(tp, ERIAR, ERIAR_WRITE_CMD | type | mask | addr);
 
 	rtl_udelay_loop_wait_low(tp, &rtl_eriar_cond, 100, 100);
 }
 
 static u32 rtl_eri_read(struct rtl8169_private *tp, int addr, int type)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(ERIAR, ERIAR_READ_CMD | type | ERIAR_MASK_1111 | addr);
+	RTL_W32(tp, ERIAR, ERIAR_READ_CMD | type | ERIAR_MASK_1111 | addr);
 
 	return rtl_udelay_loop_wait_high(tp, &rtl_eriar_cond, 100, 100) ?
-		RTL_R32(ERIDR) : ~0;
+		RTL_R32(tp, ERIDR) : ~0;
 }
 
 static void rtl_w0w1_eri(struct rtl8169_private *tp, int addr, u32 mask, u32 p,
@@ -1296,11 +1259,9 @@ static void rtl_w0w1_eri(struct rtl8169_private *tp, int addr, u32 mask, u32 p,
 
 static u32 r8168dp_ocp_read(struct rtl8169_private *tp, u8 mask, u16 reg)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(OCPAR, ((u32)mask & 0x0f) << 12 | (reg & 0x0fff));
+	RTL_W32(tp, OCPAR, ((u32)mask & 0x0f) << 12 | (reg & 0x0fff));
 	return rtl_udelay_loop_wait_high(tp, &rtl_ocpar_cond, 100, 20) ?
-		RTL_R32(OCPDR) : ~0;
+		RTL_R32(tp, OCPDR) : ~0;
 }
 
 static u32 r8168ep_ocp_read(struct rtl8169_private *tp, u8 mask, u16 reg)
@@ -1328,10 +1289,8 @@ static u32 ocp_read(struct rtl8169_private *tp, u8 mask, u16 reg)
 static void r8168dp_ocp_write(struct rtl8169_private *tp, u8 mask, u16 reg,
 			      u32 data)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(OCPDR, data);
-	RTL_W32(OCPAR, OCPAR_FLAG | ((u32)mask & 0x0f) << 12 | (reg & 0x0fff));
+	RTL_W32(tp, OCPDR, data);
+	RTL_W32(tp, OCPAR, OCPAR_FLAG | ((u32)mask & 0x0f) << 12 | (reg & 0x0fff));
 	rtl_udelay_loop_wait_low(tp, &rtl_ocpar_cond, 100, 20);
 }
 
@@ -1393,19 +1352,15 @@ DECLARE_RTL_COND(rtl_ep_ocp_read_cond)
 
 DECLARE_RTL_COND(rtl_ocp_tx_cond)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R8(IBISR0) & 0x20;
+	return RTL_R8(tp, IBISR0) & 0x20;
 }
 
 static void rtl8168ep_stop_cmac(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W8(IBCR2, RTL_R8(IBCR2) & ~0x01);
+	RTL_W8(tp, IBCR2, RTL_R8(tp, IBCR2) & ~0x01);
 	rtl_msleep_loop_wait_high(tp, &rtl_ocp_tx_cond, 50, 2000);
-	RTL_W8(IBISR0, RTL_R8(IBISR0) | 0x20);
-	RTL_W8(IBCR0, RTL_R8(IBCR0) & ~0x01);
+	RTL_W8(tp, IBISR0, RTL_R8(tp, IBISR0) | 0x20);
+	RTL_W8(tp, IBCR0, RTL_R8(tp, IBCR0) & ~0x01);
 }
 
 static void rtl8168dp_driver_start(struct rtl8169_private *tp)
@@ -1473,19 +1428,19 @@ static void rtl8168_driver_stop(struct rtl8169_private *tp)
 	}
 }
 
-static int r8168dp_check_dash(struct rtl8169_private *tp)
+static bool r8168dp_check_dash(struct rtl8169_private *tp)
 {
 	u16 reg = rtl8168_get_ocp_reg(tp);
 
-	return (ocp_read(tp, 0x0f, reg) & 0x00008000) ? 1 : 0;
+	return !!(ocp_read(tp, 0x0f, reg) & 0x00008000);
 }
 
-static int r8168ep_check_dash(struct rtl8169_private *tp)
+static bool r8168ep_check_dash(struct rtl8169_private *tp)
 {
-	return (ocp_read(tp, 0x0f, 0x128) & 0x00000001) ? 1 : 0;
+	return !!(ocp_read(tp, 0x0f, 0x128) & 0x00000001);
 }
 
-static int r8168_check_dash(struct rtl8169_private *tp)
+static bool r8168_check_dash(struct rtl8169_private *tp)
 {
 	switch (tp->mac_version) {
 	case RTL_GIGA_MAC_VER_27:
@@ -1497,7 +1452,7 @@ static int r8168_check_dash(struct rtl8169_private *tp)
 	case RTL_GIGA_MAC_VER_51:
 		return r8168ep_check_dash(tp);
 	default:
-		return 0;
+		return false;
 	}
 }
 
@@ -1518,49 +1473,37 @@ static void rtl_write_exgmac_batch(struct rtl8169_private *tp,
 
 DECLARE_RTL_COND(rtl_efusear_cond)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R32(EFUSEAR) & EFUSEAR_FLAG;
+	return RTL_R32(tp, EFUSEAR) & EFUSEAR_FLAG;
 }
 
 static u8 rtl8168d_efuse_read(struct rtl8169_private *tp, int reg_addr)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(EFUSEAR, (reg_addr & EFUSEAR_REG_MASK) << EFUSEAR_REG_SHIFT);
+	RTL_W32(tp, EFUSEAR, (reg_addr & EFUSEAR_REG_MASK) << EFUSEAR_REG_SHIFT);
 
 	return rtl_udelay_loop_wait_high(tp, &rtl_efusear_cond, 100, 300) ?
-		RTL_R32(EFUSEAR) & EFUSEAR_DATA_MASK : ~0;
+		RTL_R32(tp, EFUSEAR) & EFUSEAR_DATA_MASK : ~0;
 }
 
 static u16 rtl_get_events(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R16(IntrStatus);
+	return RTL_R16(tp, IntrStatus);
 }
 
 static void rtl_ack_events(struct rtl8169_private *tp, u16 bits)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W16(IntrStatus, bits);
+	RTL_W16(tp, IntrStatus, bits);
 	mmiowb();
 }
 
 static void rtl_irq_disable(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W16(IntrMask, 0);
+	RTL_W16(tp, IntrMask, 0);
 	mmiowb();
 }
 
 static void rtl_irq_enable(struct rtl8169_private *tp, u16 bits)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W16(IntrMask, bits);
+	RTL_W16(tp, IntrMask, bits);
 }
 
 #define RTL_EVENT_NAPI_RX	(RxOK | RxErr)
@@ -1574,18 +1517,14 @@ static void rtl_irq_enable_all(struct rtl8169_private *tp)
 
 static void rtl8169_irq_mask_and_ack(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	rtl_irq_disable(tp);
 	rtl_ack_events(tp, RTL_EVENT_NAPI | tp->event_slow);
-	RTL_R8(ChipCmd);
+	RTL_R8(tp, ChipCmd);
 }
 
 static unsigned int rtl8169_tbi_reset_pending(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R32(TBICSR) & TBIReset;
+	return RTL_R32(tp, TBICSR) & TBIReset;
 }
 
 static unsigned int rtl8169_xmii_reset_pending(struct rtl8169_private *tp)
@@ -1593,21 +1532,19 @@ static unsigned int rtl8169_xmii_reset_pending(struct rtl8169_private *tp)
 	return rtl_readphy(tp, MII_BMCR) & BMCR_RESET;
 }
 
-static unsigned int rtl8169_tbi_link_ok(void __iomem *ioaddr)
+static unsigned int rtl8169_tbi_link_ok(struct rtl8169_private *tp)
 {
-	return RTL_R32(TBICSR) & TBILinkOk;
+	return RTL_R32(tp, TBICSR) & TBILinkOk;
 }
 
-static unsigned int rtl8169_xmii_link_ok(void __iomem *ioaddr)
+static unsigned int rtl8169_xmii_link_ok(struct rtl8169_private *tp)
 {
-	return RTL_R8(PHYstatus) & LinkStatus;
+	return RTL_R8(tp, PHYstatus) & LinkStatus;
 }
 
 static void rtl8169_tbi_reset_enable(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(TBICSR, RTL_R32(TBICSR) | TBIReset);
+	RTL_W32(tp, TBICSR, RTL_R32(tp, TBICSR) | TBIReset);
 }
 
 static void rtl8169_xmii_reset_enable(struct rtl8169_private *tp)
@@ -1620,7 +1557,6 @@ static void rtl8169_xmii_reset_enable(struct rtl8169_private *tp)
 
 static void rtl_link_chg_patch(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	struct net_device *dev = tp->dev;
 
 	if (!netif_running(dev))
@@ -1628,12 +1564,12 @@ static void rtl_link_chg_patch(struct rtl8169_private *tp)
 
 	if (tp->mac_version == RTL_GIGA_MAC_VER_34 ||
 	    tp->mac_version == RTL_GIGA_MAC_VER_38) {
-		if (RTL_R8(PHYstatus) & _1000bpsF) {
+		if (RTL_R8(tp, PHYstatus) & _1000bpsF) {
 			rtl_eri_write(tp, 0x1bc, ERIAR_MASK_1111, 0x00000011,
 				      ERIAR_EXGMAC);
 			rtl_eri_write(tp, 0x1dc, ERIAR_MASK_1111, 0x00000005,
 				      ERIAR_EXGMAC);
-		} else if (RTL_R8(PHYstatus) & _100bps) {
+		} else if (RTL_R8(tp, PHYstatus) & _100bps) {
 			rtl_eri_write(tp, 0x1bc, ERIAR_MASK_1111, 0x0000001f,
 				      ERIAR_EXGMAC);
 			rtl_eri_write(tp, 0x1dc, ERIAR_MASK_1111, 0x00000005,
@@ -1651,7 +1587,7 @@ static void rtl_link_chg_patch(struct rtl8169_private *tp)
 			     ERIAR_EXGMAC);
 	} else if (tp->mac_version == RTL_GIGA_MAC_VER_35 ||
 		   tp->mac_version == RTL_GIGA_MAC_VER_36) {
-		if (RTL_R8(PHYstatus) & _1000bpsF) {
+		if (RTL_R8(tp, PHYstatus) & _1000bpsF) {
 			rtl_eri_write(tp, 0x1bc, ERIAR_MASK_1111, 0x00000011,
 				      ERIAR_EXGMAC);
 			rtl_eri_write(tp, 0x1dc, ERIAR_MASK_1111, 0x00000005,
@@ -1663,7 +1599,7 @@ static void rtl_link_chg_patch(struct rtl8169_private *tp)
 				      ERIAR_EXGMAC);
 		}
 	} else if (tp->mac_version == RTL_GIGA_MAC_VER_37) {
-		if (RTL_R8(PHYstatus) & _10bps) {
+		if (RTL_R8(tp, PHYstatus) & _10bps) {
 			rtl_eri_write(tp, 0x1d0, ERIAR_MASK_0011, 0x4d02,
 				      ERIAR_EXGMAC);
 			rtl_eri_write(tp, 0x1dc, ERIAR_MASK_0011, 0x0060,
@@ -1676,20 +1612,21 @@ static void rtl_link_chg_patch(struct rtl8169_private *tp)
 }
 
 static void rtl8169_check_link_status(struct net_device *dev,
-				      struct rtl8169_private *tp,
-				      void __iomem *ioaddr)
+				      struct rtl8169_private *tp)
 {
-	if (tp->link_ok(ioaddr)) {
+	struct device *d = tp_to_dev(tp);
+
+	if (tp->link_ok(tp)) {
 		rtl_link_chg_patch(tp);
 		/* This is to cancel a scheduled suspend if there's one. */
-		pm_request_resume(&tp->pci_dev->dev);
+		pm_request_resume(d);
 		netif_carrier_on(dev);
 		if (net_ratelimit())
 			netif_info(tp, ifup, dev, "link up\n");
 	} else {
 		netif_carrier_off(dev);
 		netif_info(tp, ifdown, dev, "link down\n");
-		pm_runtime_idle(&tp->pci_dev->dev);
+		pm_runtime_idle(d);
 	}
 }
 
@@ -1697,15 +1634,14 @@ static void rtl8169_check_link_status(struct net_device *dev,
 
 static u32 __rtl8169_get_wol(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	u8 options;
 	u32 wolopts = 0;
 
-	options = RTL_R8(Config1);
+	options = RTL_R8(tp, Config1);
 	if (!(options & PMEnable))
 		return 0;
 
-	options = RTL_R8(Config3);
+	options = RTL_R8(tp, Config3);
 	if (options & LinkUp)
 		wolopts |= WAKE_PHY;
 	switch (tp->mac_version) {
@@ -1735,7 +1671,7 @@ static u32 __rtl8169_get_wol(struct rtl8169_private *tp)
 		break;
 	}
 
-	options = RTL_R8(Config5);
+	options = RTL_R8(tp, Config5);
 	if (options & UWF)
 		wolopts |= WAKE_UCAST;
 	if (options & BWF)
@@ -1749,7 +1685,7 @@ static u32 __rtl8169_get_wol(struct rtl8169_private *tp)
 static void rtl8169_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	struct device *d = &tp->pci_dev->dev;
+	struct device *d = tp_to_dev(tp);
 
 	pm_runtime_get_noresume(d);
 
@@ -1768,7 +1704,6 @@ static void rtl8169_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 
 static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	unsigned int i, tmp;
 	static const struct {
 		u32 opt;
@@ -1784,7 +1719,7 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
 	};
 	u8 options;
 
-	RTL_W8(Cfg9346, Cfg9346_Unlock);
+	RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
 
 	switch (tp->mac_version) {
 	case RTL_GIGA_MAC_VER_34:
@@ -1826,43 +1761,39 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
 	}
 
 	for (i = 0; i < tmp; i++) {
-		options = RTL_R8(cfg[i].reg) & ~cfg[i].mask;
+		options = RTL_R8(tp, cfg[i].reg) & ~cfg[i].mask;
 		if (wolopts & cfg[i].opt)
 			options |= cfg[i].mask;
-		RTL_W8(cfg[i].reg, options);
+		RTL_W8(tp, cfg[i].reg, options);
 	}
 
 	switch (tp->mac_version) {
 	case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_17:
-		options = RTL_R8(Config1) & ~PMEnable;
+		options = RTL_R8(tp, Config1) & ~PMEnable;
 		if (wolopts)
 			options |= PMEnable;
-		RTL_W8(Config1, options);
+		RTL_W8(tp, Config1, options);
 		break;
 	default:
-		options = RTL_R8(Config2) & ~PME_SIGNAL;
+		options = RTL_R8(tp, Config2) & ~PME_SIGNAL;
 		if (wolopts)
 			options |= PME_SIGNAL;
-		RTL_W8(Config2, options);
+		RTL_W8(tp, Config2, options);
 		break;
 	}
 
-	RTL_W8(Cfg9346, Cfg9346_Lock);
+	RTL_W8(tp, Cfg9346, Cfg9346_Lock);
 }
 
 static int rtl8169_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	struct device *d = &tp->pci_dev->dev;
+	struct device *d = tp_to_dev(tp);
 
 	pm_runtime_get_noresume(d);
 
 	rtl_lock_work(tp);
 
-	if (wol->wolopts)
-		tp->features |= RTL_FEATURE_WOL;
-	else
-		tp->features &= ~RTL_FEATURE_WOL;
 	if (pm_runtime_active(d))
 		__rtl8169_set_wol(tp, wol->wolopts);
 	else
@@ -1870,7 +1801,7 @@ static int rtl8169_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 
 	rtl_unlock_work(tp);
 
-	device_set_wakeup_enable(&tp->pci_dev->dev, wol->wolopts);
+	device_set_wakeup_enable(d, wol->wolopts);
 
 	pm_runtime_put_noidle(d);
 
@@ -1906,16 +1837,15 @@ static int rtl8169_set_speed_tbi(struct net_device *dev,
 				 u8 autoneg, u16 speed, u8 duplex, u32 ignored)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	void __iomem *ioaddr = tp->mmio_addr;
 	int ret = 0;
 	u32 reg;
 
-	reg = RTL_R32(TBICSR);
+	reg = RTL_R32(tp, TBICSR);
 	if ((autoneg == AUTONEG_DISABLE) && (speed == SPEED_1000) &&
 	    (duplex == DUPLEX_FULL)) {
-		RTL_W32(TBICSR, reg & ~(TBINwEnable | TBINwRestart));
+		RTL_W32(tp, TBICSR, reg & ~(TBINwEnable | TBINwRestart));
 	} else if (autoneg == AUTONEG_ENABLE)
-		RTL_W32(TBICSR, reg | TBINwEnable | TBINwRestart);
+		RTL_W32(tp, TBICSR, reg | TBINwEnable | TBINwRestart);
 	else {
 		netif_warn(tp, link, dev,
 			   "incorrect speed setting refused in TBI mode\n");
@@ -2040,16 +1970,15 @@ static void __rtl8169_set_features(struct net_device *dev,
 				   netdev_features_t features)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	void __iomem *ioaddr = tp->mmio_addr;
 	u32 rx_config;
 
-	rx_config = RTL_R32(RxConfig);
+	rx_config = RTL_R32(tp, RxConfig);
 	if (features & NETIF_F_RXALL)
 		rx_config |= (AcceptErr | AcceptRunt);
 	else
 		rx_config &= ~(AcceptErr | AcceptRunt);
 
-	RTL_W32(RxConfig, rx_config);
+	RTL_W32(tp, RxConfig, rx_config);
 
 	if (features & NETIF_F_RXCSUM)
 		tp->cp_cmd |= RxChkSum;
@@ -2061,10 +1990,10 @@ static void __rtl8169_set_features(struct net_device *dev,
 	else
 		tp->cp_cmd &= ~RxVlan;
 
-	tp->cp_cmd |= RTL_R16(CPlusCmd) & ~(RxVlan | RxChkSum);
+	tp->cp_cmd |= RTL_R16(tp, CPlusCmd) & ~(RxVlan | RxChkSum);
 
-	RTL_W16(CPlusCmd, tp->cp_cmd);
-	RTL_R16(CPlusCmd);
+	RTL_W16(tp, CPlusCmd, tp->cp_cmd);
+	RTL_R16(tp, CPlusCmd);
 }
 
 static int rtl8169_set_features(struct net_device *dev,
@@ -2101,7 +2030,6 @@ static int rtl8169_get_link_ksettings_tbi(struct net_device *dev,
 					  struct ethtool_link_ksettings *cmd)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	void __iomem *ioaddr = tp->mmio_addr;
 	u32 status;
 	u32 supported, advertising;
 
@@ -2109,7 +2037,7 @@ static int rtl8169_get_link_ksettings_tbi(struct net_device *dev,
 		SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg | SUPPORTED_FIBRE;
 	cmd->base.port = PORT_FIBRE;
 
-	status = RTL_R32(TBICSR);
+	status = RTL_R32(tp, TBICSR);
 	advertising = (status & TBINwEnable) ?  ADVERTISED_Autoneg : 0;
 	cmd->base.autoneg = !!(status & TBINwEnable);
 
@@ -2224,23 +2152,20 @@ static int rtl8169_get_sset_count(struct net_device *dev, int sset)
 
 DECLARE_RTL_COND(rtl_counters_cond)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R32(CounterAddrLow) & (CounterReset | CounterDump);
+	return RTL_R32(tp, CounterAddrLow) & (CounterReset | CounterDump);
 }
 
 static bool rtl8169_do_counters(struct net_device *dev, u32 counter_cmd)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	void __iomem *ioaddr = tp->mmio_addr;
 	dma_addr_t paddr = tp->counters_phys_addr;
 	u32 cmd;
 
-	RTL_W32(CounterAddrHigh, (u64)paddr >> 32);
-	RTL_R32(CounterAddrHigh);
+	RTL_W32(tp, CounterAddrHigh, (u64)paddr >> 32);
+	RTL_R32(tp, CounterAddrHigh);
 	cmd = (u64)paddr & DMA_BIT_MASK(32);
-	RTL_W32(CounterAddrLow, cmd);
-	RTL_W32(CounterAddrLow, cmd | counter_cmd);
+	RTL_W32(tp, CounterAddrLow, cmd);
+	RTL_W32(tp, CounterAddrLow, cmd | counter_cmd);
 
 	return rtl_udelay_loop_wait_low(tp, &rtl_counters_cond, 10, 1000);
 }
@@ -2262,13 +2187,12 @@ static bool rtl8169_reset_counters(struct net_device *dev)
 static bool rtl8169_update_counters(struct net_device *dev)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	void __iomem *ioaddr = tp->mmio_addr;
 
 	/*
 	 * Some chips are unable to dump tally counters when the receiver
 	 * is disabled.
 	 */
-	if ((RTL_R8(ChipCmd) & CmdRxEnb) == 0)
+	if ((RTL_R8(tp, ChipCmd) & CmdRxEnb) == 0)
 		return true;
 
 	return rtl8169_do_counters(dev, CounterDump);
@@ -2317,7 +2241,7 @@ static void rtl8169_get_ethtool_stats(struct net_device *dev,
 				      struct ethtool_stats *stats, u64 *data)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	struct device *d = &tp->pci_dev->dev;
+	struct device *d = tp_to_dev(tp);
 	struct rtl8169_counters *counters = tp->counters;
 
 	ASSERT_RTNL();
@@ -2448,7 +2372,6 @@ static const struct rtl_coalesce_info *rtl_coalesce_info(struct net_device *dev)
 static int rtl_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	void __iomem *ioaddr = tp->mmio_addr;
 	const struct rtl_coalesce_info *ci;
 	const struct rtl_coalesce_scale *scale;
 	struct {
@@ -2468,10 +2391,10 @@ static int rtl_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
 	if (IS_ERR(ci))
 		return PTR_ERR(ci);
 
-	scale = &ci->scalev[RTL_R16(CPlusCmd) & 3];
+	scale = &ci->scalev[RTL_R16(tp, CPlusCmd) & 3];
 
 	/* read IntrMitigate and adjust according to scale */
-	for (w = RTL_R16(IntrMitigate); w; w >>= RTL_COALESCE_SHIFT, p++) {
+	for (w = RTL_R16(tp, IntrMitigate); w; w >>= RTL_COALESCE_SHIFT, p++) {
 		*p->max_frames = (w & RTL_COALESCE_MASK) << 2;
 		w >>= RTL_COALESCE_SHIFT;
 		*p->usecs = w & RTL_COALESCE_MASK;
@@ -2518,7 +2441,6 @@ static const struct rtl_coalesce_scale *rtl_coalesce_choose_scale(
 static int rtl_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	void __iomem *ioaddr = tp->mmio_addr;
 	const struct rtl_coalesce_scale *scale;
 	struct {
 		u32 frames;
@@ -2566,11 +2488,11 @@ static int rtl_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
 
 	rtl_lock_work(tp);
 
-	RTL_W16(IntrMitigate, swab16(w));
+	RTL_W16(tp, IntrMitigate, swab16(w));
 
 	tp->cp_cmd = (tp->cp_cmd & ~3) | cp01;
-	RTL_W16(CPlusCmd, tp->cp_cmd);
-	RTL_R16(CPlusCmd);
+	RTL_W16(tp, CPlusCmd, tp->cp_cmd);
+	RTL_R16(tp, CPlusCmd);
 
 	rtl_unlock_work(tp);
 
@@ -2600,17 +2522,16 @@ static const struct ethtool_ops rtl8169_ethtool_ops = {
 static void rtl8169_get_mac_version(struct rtl8169_private *tp,
 				    struct net_device *dev, u8 default_version)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	/*
 	 * The driver currently handles the 8168Bf and the 8168Be identically
 	 * but they can be identified more specifically through the test below
 	 * if needed:
 	 *
-	 * (RTL_R32(TxConfig) & 0x700000) == 0x500000 ? 8168Bf : 8168Be
+	 * (RTL_R32(tp, TxConfig) & 0x700000) == 0x500000 ? 8168Bf : 8168Be
 	 *
 	 * Same thing for the 8101Eb and the 8101Ec:
 	 *
-	 * (RTL_R32(TxConfig) & 0x700000) == 0x200000 ? 8101Eb : 8101Ec
+	 * (RTL_R32(tp, TxConfig) & 0x700000) == 0x200000 ? 8101Eb : 8101Ec
 	 */
 	static const struct rtl_mac_info {
 		u32 mask;
@@ -2708,7 +2629,7 @@ static void rtl8169_get_mac_version(struct rtl8169_private *tp,
 	const struct rtl_mac_info *p = mac_info;
 	u32 reg;
 
-	reg = RTL_R32(TxConfig);
+	reg = RTL_R32(tp, TxConfig);
 	while ((reg & p->mask) != p->val)
 		p++;
 	tp->mac_version = p->mac_version;
@@ -3805,8 +3726,6 @@ static void rtl8168e_2_hw_phy_config(struct rtl8169_private *tp)
 	rtl_writephy(tp, 0x1f, 0x0005);
 	rtl_w0w1_phy(tp, 0x01, 0x0100, 0x0000);
 	rtl_writephy(tp, 0x1f, 0x0000);
-	/* soft-reset phy */
-	rtl_writephy(tp, MII_BMCR, BMCR_RESET | BMCR_ANENABLE | BMCR_ANRESTART);
 
 	/* Broken BIOS workaround: feed GigaMAC registers with MAC address. */
 	rtl_rar_exgmac_set(tp, tp->dev->dev_addr);
@@ -4591,7 +4510,6 @@ static void rtl_hw_phy_config(struct net_device *dev)
 static void rtl_phy_work(struct rtl8169_private *tp)
 {
 	struct timer_list *timer = &tp->timer;
-	void __iomem *ioaddr = tp->mmio_addr;
 	unsigned long timeout = RTL8169_PHY_TIMEOUT;
 
 	assert(tp->mac_version > RTL_GIGA_MAC_VER_01);
@@ -4605,7 +4523,7 @@ static void rtl_phy_work(struct rtl8169_private *tp)
 		goto out_mod_timer;
 	}
 
-	if (tp->link_ok(ioaddr))
+	if (tp->link_ok(tp))
 		return;
 
 	netif_dbg(tp, link, tp->dev, "PHY reset until link up\n");
@@ -4643,21 +4561,17 @@ static void rtl8169_phy_reset(struct net_device *dev,
 
 static bool rtl_tbi_enabled(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	return (tp->mac_version == RTL_GIGA_MAC_VER_01) &&
-	    (RTL_R8(PHYstatus) & TBI_Enable);
+	    (RTL_R8(tp, PHYstatus) & TBI_Enable);
 }
 
 static void rtl8169_init_phy(struct net_device *dev, struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	rtl_hw_phy_config(dev);
 
 	if (tp->mac_version <= RTL_GIGA_MAC_VER_06) {
 		dprintk("Set MAC Reg C+CR Offset 0x82h = 0x01h\n");
-		RTL_W8(0x82, 0x01);
+		RTL_W8(tp, 0x82, 0x01);
 	}
 
 	pci_write_config_byte(tp->pci_dev, PCI_LATENCY_TIMER, 0x40);
@@ -4667,7 +4581,7 @@ static void rtl8169_init_phy(struct net_device *dev, struct rtl8169_private *tp)
 
 	if (tp->mac_version == RTL_GIGA_MAC_VER_02) {
 		dprintk("Set MAC Reg C+CR Offset 0x82h = 0x01h\n");
-		RTL_W8(0x82, 0x01);
+		RTL_W8(tp, 0x82, 0x01);
 		dprintk("Set PHY Reg 0x0bh = 0x00h\n");
 		rtl_writephy(tp, 0x0b, 0x0000); //w 0x0b 15 0 0
 	}
@@ -4687,22 +4601,20 @@ static void rtl8169_init_phy(struct net_device *dev, struct rtl8169_private *tp)
 
 static void rtl_rar_set(struct rtl8169_private *tp, u8 *addr)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	rtl_lock_work(tp);
 
-	RTL_W8(Cfg9346, Cfg9346_Unlock);
+	RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
 
-	RTL_W32(MAC4, addr[4] | addr[5] << 8);
-	RTL_R32(MAC4);
+	RTL_W32(tp, MAC4, addr[4] | addr[5] << 8);
+	RTL_R32(tp, MAC4);
 
-	RTL_W32(MAC0, addr[0] | addr[1] << 8 | addr[2] << 16 | addr[3] << 24);
-	RTL_R32(MAC0);
+	RTL_W32(tp, MAC0, addr[0] | addr[1] << 8 | addr[2] << 16 | addr[3] << 24);
+	RTL_R32(tp, MAC0);
 
 	if (tp->mac_version == RTL_GIGA_MAC_VER_34)
 		rtl_rar_exgmac_set(tp, addr);
 
-	RTL_W8(Cfg9346, Cfg9346_Lock);
+	RTL_W8(tp, Cfg9346, Cfg9346_Lock);
 
 	rtl_unlock_work(tp);
 }
@@ -4710,13 +4622,12 @@ static void rtl_rar_set(struct rtl8169_private *tp, u8 *addr)
 static int rtl_set_mac_address(struct net_device *dev, void *p)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	struct device *d = &tp->pci_dev->dev;
-	struct sockaddr *addr = p;
-
-	if (!is_valid_ether_addr(addr->sa_data))
-		return -EADDRNOTAVAIL;
+	struct device *d = tp_to_dev(tp);
+	int ret;
 
-	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
+	ret = eth_mac_addr(dev, p);
+	if (ret)
+		return ret;
 
 	pm_runtime_get_noresume(d);
 
@@ -4822,8 +4733,6 @@ static void rtl_speed_down(struct rtl8169_private *tp)
 
 static void rtl_wol_suspend_quirk(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	switch (tp->mac_version) {
 	case RTL_GIGA_MAC_VER_25:
 	case RTL_GIGA_MAC_VER_26:
@@ -4847,7 +4756,7 @@ static void rtl_wol_suspend_quirk(struct rtl8169_private *tp)
 	case RTL_GIGA_MAC_VER_49:
 	case RTL_GIGA_MAC_VER_50:
 	case RTL_GIGA_MAC_VER_51:
-		RTL_W32(RxConfig, RTL_R32(RxConfig) |
+		RTL_W32(tp, RxConfig, RTL_R32(tp, RxConfig) |
 			AcceptBroadcast | AcceptMulticast | AcceptMyPhys);
 		break;
 	default:
@@ -4880,8 +4789,6 @@ static void r810x_phy_power_up(struct rtl8169_private *tp)
 
 static void r810x_pll_power_down(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	if (rtl_wol_pll_power_down(tp))
 		return;
 
@@ -4896,15 +4803,13 @@ static void r810x_pll_power_down(struct rtl8169_private *tp)
 	case RTL_GIGA_MAC_VER_16:
 		break;
 	default:
-		RTL_W8(PMCH, RTL_R8(PMCH) & ~0x80);
+		RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) & ~0x80);
 		break;
 	}
 }
 
 static void r810x_pll_power_up(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	r810x_phy_power_up(tp);
 
 	switch (tp->mac_version) {
@@ -4917,10 +4822,10 @@ static void r810x_pll_power_up(struct rtl8169_private *tp)
 		break;
 	case RTL_GIGA_MAC_VER_47:
 	case RTL_GIGA_MAC_VER_48:
-		RTL_W8(PMCH, RTL_R8(PMCH) | 0xc0);
+		RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0xc0);
 		break;
 	default:
-		RTL_W8(PMCH, RTL_R8(PMCH) | 0x80);
+		RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0x80);
 		break;
 	}
 }
@@ -4987,21 +4892,12 @@ static void r8168_phy_power_down(struct rtl8169_private *tp)
 
 static void r8168_pll_power_down(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	if ((tp->mac_version == RTL_GIGA_MAC_VER_27 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_28 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_31 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_49 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_50 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_51) &&
-	    r8168_check_dash(tp)) {
+	if (r8168_check_dash(tp))
 		return;
-	}
 
 	if ((tp->mac_version == RTL_GIGA_MAC_VER_23 ||
 	     tp->mac_version == RTL_GIGA_MAC_VER_24) &&
-	    (RTL_R16(CPlusCmd) & ASF)) {
+	    (RTL_R16(tp, CPlusCmd) & ASF)) {
 		return;
 	}
 
@@ -5027,22 +4923,20 @@ static void r8168_pll_power_down(struct rtl8169_private *tp)
 	case RTL_GIGA_MAC_VER_46:
 	case RTL_GIGA_MAC_VER_50:
 	case RTL_GIGA_MAC_VER_51:
-		RTL_W8(PMCH, RTL_R8(PMCH) & ~0x80);
+		RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) & ~0x80);
 		break;
 	case RTL_GIGA_MAC_VER_40:
 	case RTL_GIGA_MAC_VER_41:
 	case RTL_GIGA_MAC_VER_49:
 		rtl_w0w1_eri(tp, 0x1a8, ERIAR_MASK_1111, 0x00000000,
 			     0xfc000000, ERIAR_EXGMAC);
-		RTL_W8(PMCH, RTL_R8(PMCH) & ~0x80);
+		RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) & ~0x80);
 		break;
 	}
 }
 
 static void r8168_pll_power_up(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	switch (tp->mac_version) {
 	case RTL_GIGA_MAC_VER_25:
 	case RTL_GIGA_MAC_VER_26:
@@ -5051,19 +4945,19 @@ static void r8168_pll_power_up(struct rtl8169_private *tp)
 	case RTL_GIGA_MAC_VER_31:
 	case RTL_GIGA_MAC_VER_32:
 	case RTL_GIGA_MAC_VER_33:
-		RTL_W8(PMCH, RTL_R8(PMCH) | 0x80);
+		RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0x80);
 		break;
 	case RTL_GIGA_MAC_VER_44:
 	case RTL_GIGA_MAC_VER_45:
 	case RTL_GIGA_MAC_VER_46:
 	case RTL_GIGA_MAC_VER_50:
 	case RTL_GIGA_MAC_VER_51:
-		RTL_W8(PMCH, RTL_R8(PMCH) | 0xc0);
+		RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0xc0);
 		break;
 	case RTL_GIGA_MAC_VER_40:
 	case RTL_GIGA_MAC_VER_41:
 	case RTL_GIGA_MAC_VER_49:
-		RTL_W8(PMCH, RTL_R8(PMCH) | 0xc0);
+		RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0xc0);
 		rtl_w0w1_eri(tp, 0x1a8, ERIAR_MASK_1111, 0xfc000000,
 			     0x00000000, ERIAR_EXGMAC);
 		break;
@@ -5153,8 +5047,6 @@ static void rtl_init_pll_power_ops(struct rtl8169_private *tp)
 
 static void rtl_init_rxcfg(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	switch (tp->mac_version) {
 	case RTL_GIGA_MAC_VER_01:
 	case RTL_GIGA_MAC_VER_02:
@@ -5170,7 +5062,7 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp)
 	case RTL_GIGA_MAC_VER_15:
 	case RTL_GIGA_MAC_VER_16:
 	case RTL_GIGA_MAC_VER_17:
-		RTL_W32(RxConfig, RX_FIFO_THRESH | RX_DMA_BURST);
+		RTL_W32(tp, RxConfig, RX_FIFO_THRESH | RX_DMA_BURST);
 		break;
 	case RTL_GIGA_MAC_VER_18:
 	case RTL_GIGA_MAC_VER_19:
@@ -5181,7 +5073,7 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp)
 	case RTL_GIGA_MAC_VER_24:
 	case RTL_GIGA_MAC_VER_34:
 	case RTL_GIGA_MAC_VER_35:
-		RTL_W32(RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST);
+		RTL_W32(tp, RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST);
 		break;
 	case RTL_GIGA_MAC_VER_40:
 	case RTL_GIGA_MAC_VER_41:
@@ -5195,10 +5087,10 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp)
 	case RTL_GIGA_MAC_VER_49:
 	case RTL_GIGA_MAC_VER_50:
 	case RTL_GIGA_MAC_VER_51:
-		RTL_W32(RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST | RX_EARLY_OFF);
+		RTL_W32(tp, RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST | RX_EARLY_OFF);
 		break;
 	default:
-		RTL_W32(RxConfig, RX128_INT_EN | RX_DMA_BURST);
+		RTL_W32(tp, RxConfig, RX128_INT_EN | RX_DMA_BURST);
 		break;
 	}
 }
@@ -5210,102 +5102,82 @@ static void rtl8169_init_ring_indexes(struct rtl8169_private *tp)
 
 static void rtl_hw_jumbo_enable(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W8(Cfg9346, Cfg9346_Unlock);
+	RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
 	rtl_generic_op(tp, tp->jumbo_ops.enable);
-	RTL_W8(Cfg9346, Cfg9346_Lock);
+	RTL_W8(tp, Cfg9346, Cfg9346_Lock);
 }
 
 static void rtl_hw_jumbo_disable(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W8(Cfg9346, Cfg9346_Unlock);
+	RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
 	rtl_generic_op(tp, tp->jumbo_ops.disable);
-	RTL_W8(Cfg9346, Cfg9346_Lock);
+	RTL_W8(tp, Cfg9346, Cfg9346_Lock);
 }
 
 static void r8168c_hw_jumbo_enable(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W8(Config3, RTL_R8(Config3) | Jumbo_En0);
-	RTL_W8(Config4, RTL_R8(Config4) | Jumbo_En1);
-	rtl_tx_performance_tweak(tp->pci_dev, PCI_EXP_DEVCTL_READRQ_512B);
+	RTL_W8(tp, Config3, RTL_R8(tp, Config3) | Jumbo_En0);
+	RTL_W8(tp, Config4, RTL_R8(tp, Config4) | Jumbo_En1);
+	rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_512B);
 }
 
 static void r8168c_hw_jumbo_disable(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W8(Config3, RTL_R8(Config3) & ~Jumbo_En0);
-	RTL_W8(Config4, RTL_R8(Config4) & ~Jumbo_En1);
-	rtl_tx_performance_tweak(tp->pci_dev, 0x5 << MAX_READ_REQUEST_SHIFT);
+	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Jumbo_En0);
+	RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~Jumbo_En1);
+	rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 }
 
 static void r8168dp_hw_jumbo_enable(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W8(Config3, RTL_R8(Config3) | Jumbo_En0);
+	RTL_W8(tp, Config3, RTL_R8(tp, Config3) | Jumbo_En0);
 }
 
 static void r8168dp_hw_jumbo_disable(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W8(Config3, RTL_R8(Config3) & ~Jumbo_En0);
+	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Jumbo_En0);
 }
 
 static void r8168e_hw_jumbo_enable(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W8(MaxTxPacketSize, 0x3f);
-	RTL_W8(Config3, RTL_R8(Config3) | Jumbo_En0);
-	RTL_W8(Config4, RTL_R8(Config4) | 0x01);
-	rtl_tx_performance_tweak(tp->pci_dev, PCI_EXP_DEVCTL_READRQ_512B);
+	RTL_W8(tp, MaxTxPacketSize, 0x3f);
+	RTL_W8(tp, Config3, RTL_R8(tp, Config3) | Jumbo_En0);
+	RTL_W8(tp, Config4, RTL_R8(tp, Config4) | 0x01);
+	rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_512B);
 }
 
 static void r8168e_hw_jumbo_disable(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W8(MaxTxPacketSize, 0x0c);
-	RTL_W8(Config3, RTL_R8(Config3) & ~Jumbo_En0);
-	RTL_W8(Config4, RTL_R8(Config4) & ~0x01);
-	rtl_tx_performance_tweak(tp->pci_dev, 0x5 << MAX_READ_REQUEST_SHIFT);
+	RTL_W8(tp, MaxTxPacketSize, 0x0c);
+	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Jumbo_En0);
+	RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~0x01);
+	rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 }
 
 static void r8168b_0_hw_jumbo_enable(struct rtl8169_private *tp)
 {
-	rtl_tx_performance_tweak(tp->pci_dev,
+	rtl_tx_performance_tweak(tp,
 		PCI_EXP_DEVCTL_READRQ_512B | PCI_EXP_DEVCTL_NOSNOOP_EN);
 }
 
 static void r8168b_0_hw_jumbo_disable(struct rtl8169_private *tp)
 {
-	rtl_tx_performance_tweak(tp->pci_dev,
+	rtl_tx_performance_tweak(tp,
 		(0x5 << MAX_READ_REQUEST_SHIFT) | PCI_EXP_DEVCTL_NOSNOOP_EN);
 }
 
 static void r8168b_1_hw_jumbo_enable(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	r8168b_0_hw_jumbo_enable(tp);
 
-	RTL_W8(Config4, RTL_R8(Config4) | (1 << 0));
+	RTL_W8(tp, Config4, RTL_R8(tp, Config4) | (1 << 0));
 }
 
 static void r8168b_1_hw_jumbo_disable(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	r8168b_0_hw_jumbo_disable(tp);
 
-	RTL_W8(Config4, RTL_R8(Config4) & ~(1 << 0));
+	RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~(1 << 0));
 }
 
 static void rtl_init_jumbo_ops(struct rtl8169_private *tp)
@@ -5372,16 +5244,12 @@ static void rtl_init_jumbo_ops(struct rtl8169_private *tp)
 
 DECLARE_RTL_COND(rtl_chipcmd_cond)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R8(ChipCmd) & CmdReset;
+	return RTL_R8(tp, ChipCmd) & CmdReset;
 }
 
 static void rtl_hw_reset(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W8(ChipCmd, CmdReset);
+	RTL_W8(tp, ChipCmd, CmdReset);
 
 	rtl_udelay_loop_wait_low(tp, &rtl_chipcmd_cond, 100, 100);
 }
@@ -5400,7 +5268,7 @@ static void rtl_request_uncached_firmware(struct rtl8169_private *tp)
 	if (!rtl_fw)
 		goto err_warn;
 
-	rc = request_firmware(&rtl_fw->fw, name, &tp->pci_dev->dev);
+	rc = request_firmware(&rtl_fw->fw, name, tp_to_dev(tp));
 	if (rc < 0)
 		goto err_free;
 
@@ -5432,29 +5300,21 @@ static void rtl_request_firmware(struct rtl8169_private *tp)
 
 static void rtl_rx_close(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(RxConfig, RTL_R32(RxConfig) & ~RX_CONFIG_ACCEPT_MASK);
+	RTL_W32(tp, RxConfig, RTL_R32(tp, RxConfig) & ~RX_CONFIG_ACCEPT_MASK);
 }
 
 DECLARE_RTL_COND(rtl_npq_cond)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R8(TxPoll) & NPQ;
+	return RTL_R8(tp, TxPoll) & NPQ;
 }
 
 DECLARE_RTL_COND(rtl_txcfg_empty_cond)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R32(TxConfig) & TXCFG_EMPTY;
+	return RTL_R32(tp, TxConfig) & TXCFG_EMPTY;
 }
 
 static void rtl8169_hw_reset(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	/* Disable interrupts */
 	rtl8169_irq_mask_and_ack(tp);
 
@@ -5481,10 +5341,10 @@ static void rtl8169_hw_reset(struct rtl8169_private *tp)
 		   tp->mac_version == RTL_GIGA_MAC_VER_49 ||
 		   tp->mac_version == RTL_GIGA_MAC_VER_50 ||
 		   tp->mac_version == RTL_GIGA_MAC_VER_51) {
-		RTL_W8(ChipCmd, RTL_R8(ChipCmd) | StopReq);
+		RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) | StopReq);
 		rtl_udelay_loop_wait_high(tp, &rtl_txcfg_empty_cond, 100, 666);
 	} else {
-		RTL_W8(ChipCmd, RTL_R8(ChipCmd) | StopReq);
+		RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) | StopReq);
 		udelay(100);
 	}
 
@@ -5493,10 +5353,8 @@ static void rtl8169_hw_reset(struct rtl8169_private *tp)
 
 static void rtl_set_rx_tx_config_registers(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	/* Set DMA burst size and Interframe Gap Time */
-	RTL_W32(TxConfig, (TX_DMA_BURST << TxDMAShift) |
+	RTL_W32(tp, TxConfig, (TX_DMA_BURST << TxDMAShift) |
 		(InterFrameGap << TxInterFrameGapShift));
 }
 
@@ -5509,36 +5367,35 @@ static void rtl_hw_start(struct net_device *dev)
 	rtl_irq_enable_all(tp);
 }
 
-static void rtl_set_rx_tx_desc_registers(struct rtl8169_private *tp,
-					 void __iomem *ioaddr)
+static void rtl_set_rx_tx_desc_registers(struct rtl8169_private *tp)
 {
 	/*
 	 * Magic spell: some iop3xx ARM board needs the TxDescAddrHigh
 	 * register to be written before TxDescAddrLow to work.
 	 * Switching from MMIO to I/O access fixes the issue as well.
 	 */
-	RTL_W32(TxDescStartAddrHigh, ((u64) tp->TxPhyAddr) >> 32);
-	RTL_W32(TxDescStartAddrLow, ((u64) tp->TxPhyAddr) & DMA_BIT_MASK(32));
-	RTL_W32(RxDescAddrHigh, ((u64) tp->RxPhyAddr) >> 32);
-	RTL_W32(RxDescAddrLow, ((u64) tp->RxPhyAddr) & DMA_BIT_MASK(32));
+	RTL_W32(tp, TxDescStartAddrHigh, ((u64) tp->TxPhyAddr) >> 32);
+	RTL_W32(tp, TxDescStartAddrLow, ((u64) tp->TxPhyAddr) & DMA_BIT_MASK(32));
+	RTL_W32(tp, RxDescAddrHigh, ((u64) tp->RxPhyAddr) >> 32);
+	RTL_W32(tp, RxDescAddrLow, ((u64) tp->RxPhyAddr) & DMA_BIT_MASK(32));
 }
 
-static u16 rtl_rw_cpluscmd(void __iomem *ioaddr)
+static u16 rtl_rw_cpluscmd(struct rtl8169_private *tp)
 {
 	u16 cmd;
 
-	cmd = RTL_R16(CPlusCmd);
-	RTL_W16(CPlusCmd, cmd);
+	cmd = RTL_R16(tp, CPlusCmd);
+	RTL_W16(tp, CPlusCmd, cmd);
 	return cmd;
 }
 
-static void rtl_set_rx_max_size(void __iomem *ioaddr, unsigned int rx_buf_sz)
+static void rtl_set_rx_max_size(struct rtl8169_private *tp, unsigned int rx_buf_sz)
 {
 	/* Low hurts. Let's disable the filtering. */
-	RTL_W16(RxMaxSize, rx_buf_sz + 1);
+	RTL_W16(tp, RxMaxSize, rx_buf_sz + 1);
 }
 
-static void rtl8169_set_magic_reg(void __iomem *ioaddr, unsigned mac_version)
+static void rtl8169_set_magic_reg(struct rtl8169_private *tp, unsigned mac_version)
 {
 	static const struct rtl_cfg2_info {
 		u32 mac_version;
@@ -5554,10 +5411,10 @@ static void rtl8169_set_magic_reg(void __iomem *ioaddr, unsigned mac_version)
 	unsigned int i;
 	u32 clk;
 
-	clk = RTL_R8(Config2) & PCI_Clock_66MHz;
+	clk = RTL_R8(tp, Config2) & PCI_Clock_66MHz;
 	for (i = 0; i < ARRAY_SIZE(cfg2_info); i++, p++) {
 		if ((p->mac_version == mac_version) && (p->clk == clk)) {
-			RTL_W32(0x7c, p->val);
+			RTL_W32(tp, 0x7c, p->val);
 			break;
 		}
 	}
@@ -5566,7 +5423,6 @@ static void rtl8169_set_magic_reg(void __iomem *ioaddr, unsigned mac_version)
 static void rtl_set_rx_mode(struct net_device *dev)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	void __iomem *ioaddr = tp->mmio_addr;
 	u32 mc_filter[2];	/* Multicast hash filter */
 	int rx_mode;
 	u32 tmp = 0;
@@ -5598,7 +5454,7 @@ static void rtl_set_rx_mode(struct net_device *dev)
 	if (dev->features & NETIF_F_RXALL)
 		rx_mode |= (AcceptErr | AcceptRunt);
 
-	tmp = (RTL_R32(RxConfig) & ~RX_CONFIG_ACCEPT_MASK) | rx_mode;
+	tmp = (RTL_R32(tp, RxConfig) & ~RX_CONFIG_ACCEPT_MASK) | rx_mode;
 
 	if (tp->mac_version > RTL_GIGA_MAC_VER_06) {
 		u32 data = mc_filter[0];
@@ -5610,35 +5466,34 @@ static void rtl_set_rx_mode(struct net_device *dev)
 	if (tp->mac_version == RTL_GIGA_MAC_VER_35)
 		mc_filter[1] = mc_filter[0] = 0xffffffff;
 
-	RTL_W32(MAR0 + 4, mc_filter[1]);
-	RTL_W32(MAR0 + 0, mc_filter[0]);
+	RTL_W32(tp, MAR0 + 4, mc_filter[1]);
+	RTL_W32(tp, MAR0 + 0, mc_filter[0]);
 
-	RTL_W32(RxConfig, tmp);
+	RTL_W32(tp, RxConfig, tmp);
 }
 
 static void rtl_hw_start_8169(struct net_device *dev)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	void __iomem *ioaddr = tp->mmio_addr;
 	struct pci_dev *pdev = tp->pci_dev;
 
 	if (tp->mac_version == RTL_GIGA_MAC_VER_05) {
-		RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) | PCIMulRW);
+		RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) | PCIMulRW);
 		pci_write_config_byte(pdev, PCI_CACHE_LINE_SIZE, 0x08);
 	}
 
-	RTL_W8(Cfg9346, Cfg9346_Unlock);
+	RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
 	if (tp->mac_version == RTL_GIGA_MAC_VER_01 ||
 	    tp->mac_version == RTL_GIGA_MAC_VER_02 ||
 	    tp->mac_version == RTL_GIGA_MAC_VER_03 ||
 	    tp->mac_version == RTL_GIGA_MAC_VER_04)
-		RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb);
+		RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
 
 	rtl_init_rxcfg(tp);
 
-	RTL_W8(EarlyTxThres, NoEarlyTx);
+	RTL_W8(tp, EarlyTxThres, NoEarlyTx);
 
-	rtl_set_rx_max_size(ioaddr, rx_buf_sz);
+	rtl_set_rx_max_size(tp, rx_buf_sz);
 
 	if (tp->mac_version == RTL_GIGA_MAC_VER_01 ||
 	    tp->mac_version == RTL_GIGA_MAC_VER_02 ||
@@ -5646,7 +5501,7 @@ static void rtl_hw_start_8169(struct net_device *dev)
 	    tp->mac_version == RTL_GIGA_MAC_VER_04)
 		rtl_set_rx_tx_config_registers(tp);
 
-	tp->cp_cmd |= rtl_rw_cpluscmd(ioaddr) | PCIMulRW;
+	tp->cp_cmd |= rtl_rw_cpluscmd(tp) | PCIMulRW;
 
 	if (tp->mac_version == RTL_GIGA_MAC_VER_02 ||
 	    tp->mac_version == RTL_GIGA_MAC_VER_03) {
@@ -5655,37 +5510,37 @@ static void rtl_hw_start_8169(struct net_device *dev)
 		tp->cp_cmd |= (1 << 14);
 	}
 
-	RTL_W16(CPlusCmd, tp->cp_cmd);
+	RTL_W16(tp, CPlusCmd, tp->cp_cmd);
 
-	rtl8169_set_magic_reg(ioaddr, tp->mac_version);
+	rtl8169_set_magic_reg(tp, tp->mac_version);
 
 	/*
 	 * Undocumented corner. Supposedly:
 	 * (TxTimer << 12) | (TxPackets << 8) | (RxTimer << 4) | RxPackets
 	 */
-	RTL_W16(IntrMitigate, 0x0000);
+	RTL_W16(tp, IntrMitigate, 0x0000);
 
-	rtl_set_rx_tx_desc_registers(tp, ioaddr);
+	rtl_set_rx_tx_desc_registers(tp);
 
 	if (tp->mac_version != RTL_GIGA_MAC_VER_01 &&
 	    tp->mac_version != RTL_GIGA_MAC_VER_02 &&
 	    tp->mac_version != RTL_GIGA_MAC_VER_03 &&
 	    tp->mac_version != RTL_GIGA_MAC_VER_04) {
-		RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb);
+		RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
 		rtl_set_rx_tx_config_registers(tp);
 	}
 
-	RTL_W8(Cfg9346, Cfg9346_Lock);
+	RTL_W8(tp, Cfg9346, Cfg9346_Lock);
 
 	/* Initially a 10 us delay. Turned it into a PCI commit. - FR */
-	RTL_R8(IntrMask);
+	RTL_R8(tp, IntrMask);
 
-	RTL_W32(RxMissed, 0);
+	RTL_W32(tp, RxMissed, 0);
 
 	rtl_set_rx_mode(dev);
 
 	/* no early-rx interrupts */
-	RTL_W16(MultiIntr, RTL_R16(MultiIntr) & 0xf000);
+	RTL_W16(tp, MultiIntr, RTL_R16(tp, MultiIntr) & 0xf000);
 }
 
 static void rtl_csi_write(struct rtl8169_private *tp, int addr, int value)
@@ -5719,17 +5574,13 @@ static void rtl_csi_access_enable_2(struct rtl8169_private *tp)
 
 DECLARE_RTL_COND(rtl_csiar_cond)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R32(CSIAR) & CSIAR_FLAG;
+	return RTL_R32(tp, CSIAR) & CSIAR_FLAG;
 }
 
 static void r8169_csi_write(struct rtl8169_private *tp, int addr, int value)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(CSIDR, value);
-	RTL_W32(CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) |
+	RTL_W32(tp, CSIDR, value);
+	RTL_W32(tp, CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) |
 		CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT);
 
 	rtl_udelay_loop_wait_low(tp, &rtl_csiar_cond, 10, 100);
@@ -5737,21 +5588,17 @@ static void r8169_csi_write(struct rtl8169_private *tp, int addr, int value)
 
 static u32 r8169_csi_read(struct rtl8169_private *tp, int addr)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(CSIAR, (addr & CSIAR_ADDR_MASK) |
+	RTL_W32(tp, CSIAR, (addr & CSIAR_ADDR_MASK) |
 		CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT);
 
 	return rtl_udelay_loop_wait_high(tp, &rtl_csiar_cond, 10, 100) ?
-		RTL_R32(CSIDR) : ~0;
+		RTL_R32(tp, CSIDR) : ~0;
 }
 
 static void r8402_csi_write(struct rtl8169_private *tp, int addr, int value)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(CSIDR, value);
-	RTL_W32(CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) |
+	RTL_W32(tp, CSIDR, value);
+	RTL_W32(tp, CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) |
 		CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT |
 		CSIAR_FUNC_NIC);
 
@@ -5760,21 +5607,17 @@ static void r8402_csi_write(struct rtl8169_private *tp, int addr, int value)
 
 static u32 r8402_csi_read(struct rtl8169_private *tp, int addr)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(CSIAR, (addr & CSIAR_ADDR_MASK) | CSIAR_FUNC_NIC |
+	RTL_W32(tp, CSIAR, (addr & CSIAR_ADDR_MASK) | CSIAR_FUNC_NIC |
 		CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT);
 
 	return rtl_udelay_loop_wait_high(tp, &rtl_csiar_cond, 10, 100) ?
-		RTL_R32(CSIDR) : ~0;
+		RTL_R32(tp, CSIDR) : ~0;
 }
 
 static void r8411_csi_write(struct rtl8169_private *tp, int addr, int value)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(CSIDR, value);
-	RTL_W32(CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) |
+	RTL_W32(tp, CSIDR, value);
+	RTL_W32(tp, CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) |
 		CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT |
 		CSIAR_FUNC_NIC2);
 
@@ -5783,13 +5626,11 @@ static void r8411_csi_write(struct rtl8169_private *tp, int addr, int value)
 
 static u32 r8411_csi_read(struct rtl8169_private *tp, int addr)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(CSIAR, (addr & CSIAR_ADDR_MASK) | CSIAR_FUNC_NIC2 |
+	RTL_W32(tp, CSIAR, (addr & CSIAR_ADDR_MASK) | CSIAR_FUNC_NIC2 |
 		CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT);
 
 	return rtl_udelay_loop_wait_high(tp, &rtl_csiar_cond, 10, 100) ?
-		RTL_R32(CSIDR) : ~0;
+		RTL_R32(tp, CSIDR) : ~0;
 }
 
 static void rtl_init_csi_ops(struct rtl8169_private *tp)
@@ -5851,31 +5692,30 @@ static void rtl_ephy_init(struct rtl8169_private *tp, const struct ephy_info *e,
 	}
 }
 
-static void rtl_disable_clock_request(struct pci_dev *pdev)
+static void rtl_disable_clock_request(struct rtl8169_private *tp)
 {
-	pcie_capability_clear_word(pdev, PCI_EXP_LNKCTL,
+	pcie_capability_clear_word(tp->pci_dev, PCI_EXP_LNKCTL,
 				   PCI_EXP_LNKCTL_CLKREQ_EN);
 }
 
-static void rtl_enable_clock_request(struct pci_dev *pdev)
+static void rtl_enable_clock_request(struct rtl8169_private *tp)
 {
-	pcie_capability_set_word(pdev, PCI_EXP_LNKCTL,
+	pcie_capability_set_word(tp->pci_dev, PCI_EXP_LNKCTL,
 				 PCI_EXP_LNKCTL_CLKREQ_EN);
 }
 
 static void rtl_pcie_state_l2l3_enable(struct rtl8169_private *tp, bool enable)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	u8 data;
 
-	data = RTL_R8(Config3);
+	data = RTL_R8(tp, Config3);
 
 	if (enable)
 		data |= Rdy_to_L23;
 	else
 		data &= ~Rdy_to_L23;
 
-	RTL_W8(Config3, data);
+	RTL_W8(tp, Config3, data);
 }
 
 #define R8168_CPCMD_QUIRK_MASK (\
@@ -5891,45 +5731,37 @@ static void rtl_pcie_state_l2l3_enable(struct rtl8169_private *tp, bool enable)
 
 static void rtl_hw_start_8168bb(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-	struct pci_dev *pdev = tp->pci_dev;
+	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 
-	RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en);
-
-	RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
+	RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
 
 	if (tp->dev->mtu <= ETH_DATA_LEN) {
-		rtl_tx_performance_tweak(pdev, (0x5 << MAX_READ_REQUEST_SHIFT) |
+		rtl_tx_performance_tweak(tp, (0x5 << MAX_READ_REQUEST_SHIFT) |
 					 PCI_EXP_DEVCTL_NOSNOOP_EN);
 	}
 }
 
 static void rtl_hw_start_8168bef(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	rtl_hw_start_8168bb(tp);
 
-	RTL_W8(MaxTxPacketSize, TxPacketMax);
+	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
-	RTL_W8(Config4, RTL_R8(Config4) & ~(1 << 0));
+	RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~(1 << 0));
 }
 
 static void __rtl_hw_start_8168cp(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-	struct pci_dev *pdev = tp->pci_dev;
+	RTL_W8(tp, Config1, RTL_R8(tp, Config1) | Speed_down);
 
-	RTL_W8(Config1, RTL_R8(Config1) | Speed_down);
-
-	RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en);
+	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 
 	if (tp->dev->mtu <= ETH_DATA_LEN)
-		rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+		rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
-	rtl_disable_clock_request(pdev);
+	rtl_disable_clock_request(tp);
 
-	RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
+	RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
 }
 
 static void rtl_hw_start_8168cp_1(struct rtl8169_private *tp)
@@ -5951,42 +5783,35 @@ static void rtl_hw_start_8168cp_1(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168cp_2(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-	struct pci_dev *pdev = tp->pci_dev;
-
 	rtl_csi_access_enable_2(tp);
 
-	RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en);
+	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 
 	if (tp->dev->mtu <= ETH_DATA_LEN)
-		rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+		rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
-	RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
+	RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
 }
 
 static void rtl_hw_start_8168cp_3(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-	struct pci_dev *pdev = tp->pci_dev;
-
 	rtl_csi_access_enable_2(tp);
 
-	RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en);
+	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 
 	/* Magic. */
-	RTL_W8(DBG_REG, 0x20);
+	RTL_W8(tp, DBG_REG, 0x20);
 
-	RTL_W8(MaxTxPacketSize, TxPacketMax);
+	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
 	if (tp->dev->mtu <= ETH_DATA_LEN)
-		rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+		rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
-	RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
+	RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
 }
 
 static void rtl_hw_start_8168c_1(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	static const struct ephy_info e_info_8168c_1[] = {
 		{ 0x02, 0x0800,	0x1000 },
 		{ 0x03, 0,	0x0002 },
@@ -5995,7 +5820,7 @@ static void rtl_hw_start_8168c_1(struct rtl8169_private *tp)
 
 	rtl_csi_access_enable_2(tp);
 
-	RTL_W8(DBG_REG, 0x06 | FIX_NAK_1 | FIX_NAK_2);
+	RTL_W8(tp, DBG_REG, 0x06 | FIX_NAK_1 | FIX_NAK_2);
 
 	rtl_ephy_init(tp, e_info_8168c_1, ARRAY_SIZE(e_info_8168c_1));
 
@@ -6030,40 +5855,32 @@ static void rtl_hw_start_8168c_4(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168d(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-	struct pci_dev *pdev = tp->pci_dev;
-
 	rtl_csi_access_enable_2(tp);
 
-	rtl_disable_clock_request(pdev);
+	rtl_disable_clock_request(tp);
 
-	RTL_W8(MaxTxPacketSize, TxPacketMax);
+	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
 	if (tp->dev->mtu <= ETH_DATA_LEN)
-		rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+		rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
-	RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
+	RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
 }
 
 static void rtl_hw_start_8168dp(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-	struct pci_dev *pdev = tp->pci_dev;
-
 	rtl_csi_access_enable_1(tp);
 
 	if (tp->dev->mtu <= ETH_DATA_LEN)
-		rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+		rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
-	RTL_W8(MaxTxPacketSize, TxPacketMax);
+	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
-	rtl_disable_clock_request(pdev);
+	rtl_disable_clock_request(tp);
 }
 
 static void rtl_hw_start_8168d_4(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-	struct pci_dev *pdev = tp->pci_dev;
 	static const struct ephy_info e_info_8168d_4[] = {
 		{ 0x0b, 0x0000,	0x0048 },
 		{ 0x19, 0x0020,	0x0050 },
@@ -6072,19 +5889,17 @@ static void rtl_hw_start_8168d_4(struct rtl8169_private *tp)
 
 	rtl_csi_access_enable_1(tp);
 
-	rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+	rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
-	RTL_W8(MaxTxPacketSize, TxPacketMax);
+	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
 	rtl_ephy_init(tp, e_info_8168d_4, ARRAY_SIZE(e_info_8168d_4));
 
-	rtl_enable_clock_request(pdev);
+	rtl_enable_clock_request(tp);
 }
 
 static void rtl_hw_start_8168e_1(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-	struct pci_dev *pdev = tp->pci_dev;
 	static const struct ephy_info e_info_8168e_1[] = {
 		{ 0x00, 0x0200,	0x0100 },
 		{ 0x00, 0x0000,	0x0004 },
@@ -6106,23 +5921,21 @@ static void rtl_hw_start_8168e_1(struct rtl8169_private *tp)
 	rtl_ephy_init(tp, e_info_8168e_1, ARRAY_SIZE(e_info_8168e_1));
 
 	if (tp->dev->mtu <= ETH_DATA_LEN)
-		rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+		rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
-	RTL_W8(MaxTxPacketSize, TxPacketMax);
+	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
-	rtl_disable_clock_request(pdev);
+	rtl_disable_clock_request(tp);
 
 	/* Reset tx FIFO pointer */
-	RTL_W32(MISC, RTL_R32(MISC) | TXPLA_RST);
-	RTL_W32(MISC, RTL_R32(MISC) & ~TXPLA_RST);
+	RTL_W32(tp, MISC, RTL_R32(tp, MISC) | TXPLA_RST);
+	RTL_W32(tp, MISC, RTL_R32(tp, MISC) & ~TXPLA_RST);
 
-	RTL_W8(Config5, RTL_R8(Config5) & ~Spi_en);
+	RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~Spi_en);
 }
 
 static void rtl_hw_start_8168e_2(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-	struct pci_dev *pdev = tp->pci_dev;
 	static const struct ephy_info e_info_8168e_2[] = {
 		{ 0x09, 0x0000,	0x0080 },
 		{ 0x19, 0x0000,	0x0224 }
@@ -6133,7 +5946,7 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp)
 	rtl_ephy_init(tp, e_info_8168e_2, ARRAY_SIZE(e_info_8168e_2));
 
 	if (tp->dev->mtu <= ETH_DATA_LEN)
-		rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+		rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
 	rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
 	rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
@@ -6144,29 +5957,26 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp)
 	rtl_w0w1_eri(tp, 0x1b0, ERIAR_MASK_0001, 0x10, 0x00, ERIAR_EXGMAC);
 	rtl_w0w1_eri(tp, 0x0d4, ERIAR_MASK_0011, 0x0c00, 0xff00, ERIAR_EXGMAC);
 
-	RTL_W8(MaxTxPacketSize, EarlySize);
+	RTL_W8(tp, MaxTxPacketSize, EarlySize);
 
-	rtl_disable_clock_request(pdev);
+	rtl_disable_clock_request(tp);
 
-	RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO);
-	RTL_W8(MCU, RTL_R8(MCU) & ~NOW_IS_OOB);
+	RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
+	RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);
 
 	/* Adjust EEE LED frequency */
-	RTL_W8(EEE_LED, RTL_R8(EEE_LED) & ~0x07);
+	RTL_W8(tp, EEE_LED, RTL_R8(tp, EEE_LED) & ~0x07);
 
-	RTL_W8(DLLPR, RTL_R8(DLLPR) | PFM_EN);
-	RTL_W32(MISC, RTL_R32(MISC) | PWM_EN);
-	RTL_W8(Config5, RTL_R8(Config5) & ~Spi_en);
+	RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) | PFM_EN);
+	RTL_W32(tp, MISC, RTL_R32(tp, MISC) | PWM_EN);
+	RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~Spi_en);
 }
 
 static void rtl_hw_start_8168f(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-	struct pci_dev *pdev = tp->pci_dev;
-
 	rtl_csi_access_enable_2(tp);
 
-	rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+	rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
 	rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
 	rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
@@ -6179,20 +5989,19 @@ static void rtl_hw_start_8168f(struct rtl8169_private *tp)
 	rtl_eri_write(tp, 0xcc, ERIAR_MASK_1111, 0x00000050, ERIAR_EXGMAC);
 	rtl_eri_write(tp, 0xd0, ERIAR_MASK_1111, 0x00000060, ERIAR_EXGMAC);
 
-	RTL_W8(MaxTxPacketSize, EarlySize);
+	RTL_W8(tp, MaxTxPacketSize, EarlySize);
 
-	rtl_disable_clock_request(pdev);
+	rtl_disable_clock_request(tp);
 
-	RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO);
-	RTL_W8(MCU, RTL_R8(MCU) & ~NOW_IS_OOB);
-	RTL_W8(DLLPR, RTL_R8(DLLPR) | PFM_EN);
-	RTL_W32(MISC, RTL_R32(MISC) | PWM_EN);
-	RTL_W8(Config5, RTL_R8(Config5) & ~Spi_en);
+	RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
+	RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);
+	RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) | PFM_EN);
+	RTL_W32(tp, MISC, RTL_R32(tp, MISC) | PWM_EN);
+	RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~Spi_en);
 }
 
 static void rtl_hw_start_8168f_1(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	static const struct ephy_info e_info_8168f_1[] = {
 		{ 0x06, 0x00c0,	0x0020 },
 		{ 0x08, 0x0001,	0x0002 },
@@ -6207,7 +6016,7 @@ static void rtl_hw_start_8168f_1(struct rtl8169_private *tp)
 	rtl_w0w1_eri(tp, 0x0d4, ERIAR_MASK_0011, 0x0c00, 0xff00, ERIAR_EXGMAC);
 
 	/* Adjust EEE LED frequency */
-	RTL_W8(EEE_LED, RTL_R8(EEE_LED) & ~0x07);
+	RTL_W8(tp, EEE_LED, RTL_R8(tp, EEE_LED) & ~0x07);
 }
 
 static void rtl_hw_start_8411(struct rtl8169_private *tp)
@@ -6229,10 +6038,7 @@ static void rtl_hw_start_8411(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168g(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-	struct pci_dev *pdev = tp->pci_dev;
-
-	RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO);
+	RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
 
 	rtl_eri_write(tp, 0xc8, ERIAR_MASK_0101, 0x080002, ERIAR_EXGMAC);
 	rtl_eri_write(tp, 0xcc, ERIAR_MASK_0001, 0x38, ERIAR_EXGMAC);
@@ -6241,20 +6047,20 @@ static void rtl_hw_start_8168g(struct rtl8169_private *tp)
 
 	rtl_csi_access_enable_1(tp);
 
-	rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+	rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
 	rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x00, 0x01, ERIAR_EXGMAC);
 	rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x01, 0x00, ERIAR_EXGMAC);
 	rtl_eri_write(tp, 0x2f8, ERIAR_MASK_0011, 0x1d8f, ERIAR_EXGMAC);
 
-	RTL_W32(MISC, RTL_R32(MISC) & ~RXDV_GATED_EN);
-	RTL_W8(MaxTxPacketSize, EarlySize);
+	RTL_W32(tp, MISC, RTL_R32(tp, MISC) & ~RXDV_GATED_EN);
+	RTL_W8(tp, MaxTxPacketSize, EarlySize);
 
 	rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
 	rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
 
 	/* Adjust EEE LED frequency */
-	RTL_W8(EEE_LED, RTL_R8(EEE_LED) & ~0x07);
+	RTL_W8(tp, EEE_LED, RTL_R8(tp, EEE_LED) & ~0x07);
 
 	rtl_w0w1_eri(tp, 0x2fc, ERIAR_MASK_0001, 0x01, 0x06, ERIAR_EXGMAC);
 	rtl_w0w1_eri(tp, 0x1b0, ERIAR_MASK_0011, 0x0000, 0x1000, ERIAR_EXGMAC);
@@ -6264,7 +6070,6 @@ static void rtl_hw_start_8168g(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168g_1(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	static const struct ephy_info e_info_8168g_1[] = {
 		{ 0x00, 0x0000,	0x0008 },
 		{ 0x0c, 0x37d0,	0x0820 },
@@ -6275,14 +6080,13 @@ static void rtl_hw_start_8168g_1(struct rtl8169_private *tp)
 	rtl_hw_start_8168g(tp);
 
 	/* disable aspm and clock request before access ephy */
-	RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
-	RTL_W8(Config5, RTL_R8(Config5) & ~ASPM_en);
+	RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
+	RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
 	rtl_ephy_init(tp, e_info_8168g_1, ARRAY_SIZE(e_info_8168g_1));
 }
 
 static void rtl_hw_start_8168g_2(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	static const struct ephy_info e_info_8168g_2[] = {
 		{ 0x00, 0x0000,	0x0008 },
 		{ 0x0c, 0x3df0,	0x0200 },
@@ -6293,14 +6097,13 @@ static void rtl_hw_start_8168g_2(struct rtl8169_private *tp)
 	rtl_hw_start_8168g(tp);
 
 	/* disable aspm and clock request before access ephy */
-	RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
-	RTL_W8(Config5, RTL_R8(Config5) & ~ASPM_en);
+	RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
+	RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
 	rtl_ephy_init(tp, e_info_8168g_2, ARRAY_SIZE(e_info_8168g_2));
 }
 
 static void rtl_hw_start_8411_2(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	static const struct ephy_info e_info_8411_2[] = {
 		{ 0x00, 0x0000,	0x0008 },
 		{ 0x0c, 0x3df0,	0x0200 },
@@ -6312,15 +6115,13 @@ static void rtl_hw_start_8411_2(struct rtl8169_private *tp)
 	rtl_hw_start_8168g(tp);
 
 	/* disable aspm and clock request before access ephy */
-	RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
-	RTL_W8(Config5, RTL_R8(Config5) & ~ASPM_en);
+	RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
+	RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
 	rtl_ephy_init(tp, e_info_8411_2, ARRAY_SIZE(e_info_8411_2));
 }
 
 static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-	struct pci_dev *pdev = tp->pci_dev;
 	int rg_saw_cnt;
 	u32 data;
 	static const struct ephy_info e_info_8168h_1[] = {
@@ -6333,11 +6134,11 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
 	};
 
 	/* disable aspm and clock request before access ephy */
-	RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
-	RTL_W8(Config5, RTL_R8(Config5) & ~ASPM_en);
+	RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
+	RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
 	rtl_ephy_init(tp, e_info_8168h_1, ARRAY_SIZE(e_info_8168h_1));
 
-	RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO);
+	RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
 
 	rtl_eri_write(tp, 0xc8, ERIAR_MASK_0101, 0x00080002, ERIAR_EXGMAC);
 	rtl_eri_write(tp, 0xcc, ERIAR_MASK_0001, 0x38, ERIAR_EXGMAC);
@@ -6346,7 +6147,7 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
 
 	rtl_csi_access_enable_1(tp);
 
-	rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+	rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
 	rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x00, 0x01, ERIAR_EXGMAC);
 	rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x01, 0x00, ERIAR_EXGMAC);
@@ -6357,19 +6158,19 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
 
 	rtl_eri_write(tp, 0x5f0, ERIAR_MASK_0011, 0x4f87, ERIAR_EXGMAC);
 
-	RTL_W32(MISC, RTL_R32(MISC) & ~RXDV_GATED_EN);
-	RTL_W8(MaxTxPacketSize, EarlySize);
+	RTL_W32(tp, MISC, RTL_R32(tp, MISC) & ~RXDV_GATED_EN);
+	RTL_W8(tp, MaxTxPacketSize, EarlySize);
 
 	rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
 	rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
 
 	/* Adjust EEE LED frequency */
-	RTL_W8(EEE_LED, RTL_R8(EEE_LED) & ~0x07);
+	RTL_W8(tp, EEE_LED, RTL_R8(tp, EEE_LED) & ~0x07);
 
-	RTL_W8(DLLPR, RTL_R8(DLLPR) & ~PFM_EN);
-	RTL_W8(MISC_1, RTL_R8(MISC_1) & ~PFM_D3COLD_EN);
+	RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~PFM_EN);
+	RTL_W8(tp, MISC_1, RTL_R8(tp, MISC_1) & ~PFM_D3COLD_EN);
 
-	RTL_W8(DLLPR, RTL_R8(DLLPR) & ~TX_10M_PS_EN);
+	RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~TX_10M_PS_EN);
 
 	rtl_w0w1_eri(tp, 0x1b0, ERIAR_MASK_0011, 0x0000, 0x1000, ERIAR_EXGMAC);
 
@@ -6417,12 +6218,9 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168ep(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-	struct pci_dev *pdev = tp->pci_dev;
-
 	rtl8168ep_stop_cmac(tp);
 
-	RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO);
+	RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
 
 	rtl_eri_write(tp, 0xc8, ERIAR_MASK_0101, 0x00080002, ERIAR_EXGMAC);
 	rtl_eri_write(tp, 0xcc, ERIAR_MASK_0001, 0x2f, ERIAR_EXGMAC);
@@ -6431,7 +6229,7 @@ static void rtl_hw_start_8168ep(struct rtl8169_private *tp)
 
 	rtl_csi_access_enable_1(tp);
 
-	rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+	rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
 	rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x00, 0x01, ERIAR_EXGMAC);
 	rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x01, 0x00, ERIAR_EXGMAC);
@@ -6440,25 +6238,24 @@ static void rtl_hw_start_8168ep(struct rtl8169_private *tp)
 
 	rtl_eri_write(tp, 0x5f0, ERIAR_MASK_0011, 0x4f87, ERIAR_EXGMAC);
 
-	RTL_W32(MISC, RTL_R32(MISC) & ~RXDV_GATED_EN);
-	RTL_W8(MaxTxPacketSize, EarlySize);
+	RTL_W32(tp, MISC, RTL_R32(tp, MISC) & ~RXDV_GATED_EN);
+	RTL_W8(tp, MaxTxPacketSize, EarlySize);
 
 	rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
 	rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
 
 	/* Adjust EEE LED frequency */
-	RTL_W8(EEE_LED, RTL_R8(EEE_LED) & ~0x07);
+	RTL_W8(tp, EEE_LED, RTL_R8(tp, EEE_LED) & ~0x07);
 
 	rtl_w0w1_eri(tp, 0x2fc, ERIAR_MASK_0001, 0x01, 0x06, ERIAR_EXGMAC);
 
-	RTL_W8(DLLPR, RTL_R8(DLLPR) & ~TX_10M_PS_EN);
+	RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~TX_10M_PS_EN);
 
 	rtl_pcie_state_l2l3_enable(tp, false);
 }
 
 static void rtl_hw_start_8168ep_1(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	static const struct ephy_info e_info_8168ep_1[] = {
 		{ 0x00, 0xffff,	0x10ab },
 		{ 0x06, 0xffff,	0xf030 },
@@ -6468,8 +6265,8 @@ static void rtl_hw_start_8168ep_1(struct rtl8169_private *tp)
 	};
 
 	/* disable aspm and clock request before access ephy */
-	RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
-	RTL_W8(Config5, RTL_R8(Config5) & ~ASPM_en);
+	RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
+	RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
 	rtl_ephy_init(tp, e_info_8168ep_1, ARRAY_SIZE(e_info_8168ep_1));
 
 	rtl_hw_start_8168ep(tp);
@@ -6477,7 +6274,6 @@ static void rtl_hw_start_8168ep_1(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168ep_2(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	static const struct ephy_info e_info_8168ep_2[] = {
 		{ 0x00, 0xffff,	0x10a3 },
 		{ 0x19, 0xffff,	0xfc00 },
@@ -6485,19 +6281,18 @@ static void rtl_hw_start_8168ep_2(struct rtl8169_private *tp)
 	};
 
 	/* disable aspm and clock request before access ephy */
-	RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
-	RTL_W8(Config5, RTL_R8(Config5) & ~ASPM_en);
+	RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
+	RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
 	rtl_ephy_init(tp, e_info_8168ep_2, ARRAY_SIZE(e_info_8168ep_2));
 
 	rtl_hw_start_8168ep(tp);
 
-	RTL_W8(DLLPR, RTL_R8(DLLPR) & ~PFM_EN);
-	RTL_W8(MISC_1, RTL_R8(MISC_1) & ~PFM_D3COLD_EN);
+	RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~PFM_EN);
+	RTL_W8(tp, MISC_1, RTL_R8(tp, MISC_1) & ~PFM_D3COLD_EN);
 }
 
 static void rtl_hw_start_8168ep_3(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	u32 data;
 	static const struct ephy_info e_info_8168ep_3[] = {
 		{ 0x00, 0xffff,	0x10a3 },
@@ -6507,14 +6302,14 @@ static void rtl_hw_start_8168ep_3(struct rtl8169_private *tp)
 	};
 
 	/* disable aspm and clock request before access ephy */
-	RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
-	RTL_W8(Config5, RTL_R8(Config5) & ~ASPM_en);
+	RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
+	RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
 	rtl_ephy_init(tp, e_info_8168ep_3, ARRAY_SIZE(e_info_8168ep_3));
 
 	rtl_hw_start_8168ep(tp);
 
-	RTL_W8(DLLPR, RTL_R8(DLLPR) & ~PFM_EN);
-	RTL_W8(MISC_1, RTL_R8(MISC_1) & ~PFM_D3COLD_EN);
+	RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~PFM_EN);
+	RTL_W8(tp, MISC_1, RTL_R8(tp, MISC_1) & ~PFM_D3COLD_EN);
 
 	data = r8168_mac_ocp_read(tp, 0xd3e2);
 	data &= 0xf000;
@@ -6533,19 +6328,18 @@ static void rtl_hw_start_8168ep_3(struct rtl8169_private *tp)
 static void rtl_hw_start_8168(struct net_device *dev)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	void __iomem *ioaddr = tp->mmio_addr;
 
-	RTL_W8(Cfg9346, Cfg9346_Unlock);
+	RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
 
-	RTL_W8(MaxTxPacketSize, TxPacketMax);
+	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
-	rtl_set_rx_max_size(ioaddr, rx_buf_sz);
+	rtl_set_rx_max_size(tp, rx_buf_sz);
 
-	tp->cp_cmd |= RTL_R16(CPlusCmd) | PktCntrDisable | INTT_1;
+	tp->cp_cmd |= RTL_R16(tp, CPlusCmd) | PktCntrDisable | INTT_1;
 
-	RTL_W16(CPlusCmd, tp->cp_cmd);
+	RTL_W16(tp, CPlusCmd, tp->cp_cmd);
 
-	RTL_W16(IntrMitigate, 0x5151);
+	RTL_W16(tp, IntrMitigate, 0x5151);
 
 	/* Work around for RxFIFO overflow. */
 	if (tp->mac_version == RTL_GIGA_MAC_VER_11) {
@@ -6553,11 +6347,11 @@ static void rtl_hw_start_8168(struct net_device *dev)
 		tp->event_slow &= ~RxOverflow;
 	}
 
-	rtl_set_rx_tx_desc_registers(tp, ioaddr);
+	rtl_set_rx_tx_desc_registers(tp);
 
 	rtl_set_rx_tx_config_registers(tp);
 
-	RTL_R8(IntrMask);
+	RTL_R8(tp, IntrMask);
 
 	switch (tp->mac_version) {
 	case RTL_GIGA_MAC_VER_11:
@@ -6663,13 +6457,13 @@ static void rtl_hw_start_8168(struct net_device *dev)
 		break;
 	}
 
-	RTL_W8(Cfg9346, Cfg9346_Lock);
+	RTL_W8(tp, Cfg9346, Cfg9346_Lock);
 
-	RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb);
+	RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
 
 	rtl_set_rx_mode(dev);
 
-	RTL_W16(MultiIntr, RTL_R16(MultiIntr) & 0xf000);
+	RTL_W16(tp, MultiIntr, RTL_R16(tp, MultiIntr) & 0xf000);
 }
 
 #define R810X_CPCMD_QUIRK_MASK (\
@@ -6685,8 +6479,6 @@ static void rtl_hw_start_8168(struct net_device *dev)
 
 static void rtl_hw_start_8102e_1(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-	struct pci_dev *pdev = tp->pci_dev;
 	static const struct ephy_info e_info_8102e_1[] = {
 		{ 0x01,	0, 0x6e65 },
 		{ 0x02,	0, 0x091f },
@@ -6701,32 +6493,29 @@ static void rtl_hw_start_8102e_1(struct rtl8169_private *tp)
 
 	rtl_csi_access_enable_2(tp);
 
-	RTL_W8(DBG_REG, FIX_NAK_1);
+	RTL_W8(tp, DBG_REG, FIX_NAK_1);
 
-	rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+	rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
-	RTL_W8(Config1,
+	RTL_W8(tp, Config1,
 	       LEDS1 | LEDS0 | Speed_down | MEMMAP | IOMAP | VPD | PMEnable);
-	RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en);
+	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 
-	cfg1 = RTL_R8(Config1);
+	cfg1 = RTL_R8(tp, Config1);
 	if ((cfg1 & LEDS0) && (cfg1 & LEDS1))
-		RTL_W8(Config1, cfg1 & ~LEDS0);
+		RTL_W8(tp, Config1, cfg1 & ~LEDS0);
 
 	rtl_ephy_init(tp, e_info_8102e_1, ARRAY_SIZE(e_info_8102e_1));
 }
 
 static void rtl_hw_start_8102e_2(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-	struct pci_dev *pdev = tp->pci_dev;
-
 	rtl_csi_access_enable_2(tp);
 
-	rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+	rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
-	RTL_W8(Config1, MEMMAP | IOMAP | VPD | PMEnable);
-	RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en);
+	RTL_W8(tp, Config1, MEMMAP | IOMAP | VPD | PMEnable);
+	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 }
 
 static void rtl_hw_start_8102e_3(struct rtl8169_private *tp)
@@ -6738,7 +6527,6 @@ static void rtl_hw_start_8102e_3(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8105e_1(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	static const struct ephy_info e_info_8105e_1[] = {
 		{ 0x07,	0, 0x4000 },
 		{ 0x19,	0, 0x0200 },
@@ -6751,13 +6539,13 @@ static void rtl_hw_start_8105e_1(struct rtl8169_private *tp)
 	};
 
 	/* Force LAN exit from ASPM if Rx/Tx are not idle */
-	RTL_W32(FuncEvent, RTL_R32(FuncEvent) | 0x002800);
+	RTL_W32(tp, FuncEvent, RTL_R32(tp, FuncEvent) | 0x002800);
 
 	/* Disable Early Tally Counter */
-	RTL_W32(FuncEvent, RTL_R32(FuncEvent) & ~0x010000);
+	RTL_W32(tp, FuncEvent, RTL_R32(tp, FuncEvent) & ~0x010000);
 
-	RTL_W8(MCU, RTL_R8(MCU) | EN_NDP | EN_OOB_RESET);
-	RTL_W8(DLLPR, RTL_R8(DLLPR) | PFM_EN);
+	RTL_W8(tp, MCU, RTL_R8(tp, MCU) | EN_NDP | EN_OOB_RESET);
+	RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) | PFM_EN);
 
 	rtl_ephy_init(tp, e_info_8105e_1, ARRAY_SIZE(e_info_8105e_1));
 
@@ -6772,7 +6560,6 @@ static void rtl_hw_start_8105e_2(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8402(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	static const struct ephy_info e_info_8402[] = {
 		{ 0x19,	0xffff, 0xff64 },
 		{ 0x1e,	0, 0x4000 }
@@ -6781,14 +6568,14 @@ static void rtl_hw_start_8402(struct rtl8169_private *tp)
 	rtl_csi_access_enable_2(tp);
 
 	/* Force LAN exit from ASPM if Rx/Tx are not idle */
-	RTL_W32(FuncEvent, RTL_R32(FuncEvent) | 0x002800);
+	RTL_W32(tp, FuncEvent, RTL_R32(tp, FuncEvent) | 0x002800);
 
-	RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO);
-	RTL_W8(MCU, RTL_R8(MCU) & ~NOW_IS_OOB);
+	RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
+	RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);
 
 	rtl_ephy_init(tp, e_info_8402, ARRAY_SIZE(e_info_8402));
 
-	rtl_tx_performance_tweak(tp->pci_dev, 0x5 << MAX_READ_REQUEST_SHIFT);
+	rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
 	rtl_eri_write(tp, 0xc8, ERIAR_MASK_1111, 0x00000002, ERIAR_EXGMAC);
 	rtl_eri_write(tp, 0xe8, ERIAR_MASK_1111, 0x00000006, ERIAR_EXGMAC);
@@ -6803,14 +6590,12 @@ static void rtl_hw_start_8402(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8106(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	/* Force LAN exit from ASPM if Rx/Tx are not idle */
-	RTL_W32(FuncEvent, RTL_R32(FuncEvent) | 0x002800);
+	RTL_W32(tp, FuncEvent, RTL_R32(tp, FuncEvent) | 0x002800);
 
-	RTL_W32(MISC, (RTL_R32(MISC) | DISABLE_LAN_EN) & ~EARLY_TALLY_EN);
-	RTL_W8(MCU, RTL_R8(MCU) | EN_NDP | EN_OOB_RESET);
-	RTL_W8(DLLPR, RTL_R8(DLLPR) & ~PFM_EN);
+	RTL_W32(tp, MISC, (RTL_R32(tp, MISC) | DISABLE_LAN_EN) & ~EARLY_TALLY_EN);
+	RTL_W8(tp, MCU, RTL_R8(tp, MCU) | EN_NDP | EN_OOB_RESET);
+	RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~PFM_EN);
 
 	rtl_pcie_state_l2l3_enable(tp, false);
 }
@@ -6818,7 +6603,6 @@ static void rtl_hw_start_8106(struct rtl8169_private *tp)
 static void rtl_hw_start_8101(struct net_device *dev)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	void __iomem *ioaddr = tp->mmio_addr;
 	struct pci_dev *pdev = tp->pci_dev;
 
 	if (tp->mac_version >= RTL_GIGA_MAC_VER_30)
@@ -6829,16 +6613,16 @@ static void rtl_hw_start_8101(struct net_device *dev)
 		pcie_capability_set_word(pdev, PCI_EXP_DEVCTL,
 					 PCI_EXP_DEVCTL_NOSNOOP_EN);
 
-	RTL_W8(Cfg9346, Cfg9346_Unlock);
+	RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
 
-	RTL_W8(MaxTxPacketSize, TxPacketMax);
+	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
-	rtl_set_rx_max_size(ioaddr, rx_buf_sz);
+	rtl_set_rx_max_size(tp, rx_buf_sz);
 
 	tp->cp_cmd &= ~R810X_CPCMD_QUIRK_MASK;
-	RTL_W16(CPlusCmd, tp->cp_cmd);
+	RTL_W16(tp, CPlusCmd, tp->cp_cmd);
 
-	rtl_set_rx_tx_desc_registers(tp, ioaddr);
+	rtl_set_rx_tx_desc_registers(tp);
 
 	rtl_set_rx_tx_config_registers(tp);
 
@@ -6878,17 +6662,17 @@ static void rtl_hw_start_8101(struct net_device *dev)
 		break;
 	}
 
-	RTL_W8(Cfg9346, Cfg9346_Lock);
+	RTL_W8(tp, Cfg9346, Cfg9346_Lock);
 
-	RTL_W16(IntrMitigate, 0x0000);
+	RTL_W16(tp, IntrMitigate, 0x0000);
 
-	RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb);
+	RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
 
 	rtl_set_rx_mode(dev);
 
-	RTL_R8(IntrMask);
+	RTL_R8(tp, IntrMask);
 
-	RTL_W16(MultiIntr, RTL_R16(MultiIntr) & 0xf000);
+	RTL_W16(tp, MultiIntr, RTL_R16(tp, MultiIntr) & 0xf000);
 }
 
 static int rtl8169_change_mtu(struct net_device *dev, int new_mtu)
@@ -6915,7 +6699,7 @@ static inline void rtl8169_make_unusable_by_asic(struct RxDesc *desc)
 static void rtl8169_free_rx_databuff(struct rtl8169_private *tp,
 				     void **data_buff, struct RxDesc *desc)
 {
-	dma_unmap_single(&tp->pci_dev->dev, le64_to_cpu(desc->addr), rx_buf_sz,
+	dma_unmap_single(tp_to_dev(tp), le64_to_cpu(desc->addr), rx_buf_sz,
 			 DMA_FROM_DEVICE);
 
 	kfree(*data_buff);
@@ -6950,7 +6734,7 @@ static struct sk_buff *rtl8169_alloc_rx_data(struct rtl8169_private *tp,
 {
 	void *data;
 	dma_addr_t mapping;
-	struct device *d = &tp->pci_dev->dev;
+	struct device *d = tp_to_dev(tp);
 	struct net_device *dev = tp->dev;
 	int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1;
 
@@ -7062,7 +6846,7 @@ static void rtl8169_tx_clear_range(struct rtl8169_private *tp, u32 start,
 		if (len) {
 			struct sk_buff *skb = tx_skb->skb;
 
-			rtl8169_unmap_tx_skb(&tp->pci_dev->dev, tx_skb,
+			rtl8169_unmap_tx_skb(tp_to_dev(tp), tx_skb,
 					     tp->TxDescArray + entry);
 			if (skb) {
 				dev_consume_skb_any(skb);
@@ -7098,7 +6882,7 @@ static void rtl_reset_work(struct rtl8169_private *tp)
 	napi_enable(&tp->napi);
 	rtl_hw_start(dev);
 	netif_wake_queue(dev);
-	rtl8169_check_link_status(dev, tp, tp->mmio_addr);
+	rtl8169_check_link_status(dev, tp);
 }
 
 static void rtl8169_tx_timeout(struct net_device *dev)
@@ -7114,7 +6898,7 @@ static int rtl8169_xmit_frags(struct rtl8169_private *tp, struct sk_buff *skb,
 	struct skb_shared_info *info = skb_shinfo(skb);
 	unsigned int cur_frag, entry;
 	struct TxDesc *uninitialized_var(txd);
-	struct device *d = &tp->pci_dev->dev;
+	struct device *d = tp_to_dev(tp);
 
 	entry = tp->cur_tx;
 	for (cur_frag = 0; cur_frag < info->nr_frags; cur_frag++) {
@@ -7346,8 +7130,7 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb,
 	struct rtl8169_private *tp = netdev_priv(dev);
 	unsigned int entry = tp->cur_tx % NUM_TX_DESC;
 	struct TxDesc *txd = tp->TxDescArray + entry;
-	void __iomem *ioaddr = tp->mmio_addr;
-	struct device *d = &tp->pci_dev->dev;
+	struct device *d = tp_to_dev(tp);
 	dma_addr_t mapping;
 	u32 status, len;
 	u32 opts[2];
@@ -7406,7 +7189,7 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb,
 
 	tp->cur_tx += frags + 1;
 
-	RTL_W8(TxPoll, NPQ);
+	RTL_W8(tp, TxPoll, NPQ);
 
 	mmiowb();
 
@@ -7477,11 +7260,9 @@ static void rtl8169_pcierr_interrupt(struct net_device *dev)
 
 	/* The infamous DAC f*ckup only happens at boot time */
 	if ((tp->cp_cmd & PCIDAC) && !tp->cur_rx) {
-		void __iomem *ioaddr = tp->mmio_addr;
-
 		netif_info(tp, intr, dev, "disabling PCI DAC\n");
 		tp->cp_cmd &= ~PCIDAC;
-		RTL_W16(CPlusCmd, tp->cp_cmd);
+		RTL_W16(tp, CPlusCmd, tp->cp_cmd);
 		dev->features &= ~NETIF_F_HIGHDMA;
 	}
 
@@ -7513,7 +7294,7 @@ static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp)
 		 */
 		dma_rmb();
 
-		rtl8169_unmap_tx_skb(&tp->pci_dev->dev, tx_skb,
+		rtl8169_unmap_tx_skb(tp_to_dev(tp), tx_skb,
 				     tp->TxDescArray + entry);
 		if (status & LastFrag) {
 			u64_stats_update_begin(&tp->tx_stats.syncp);
@@ -7547,11 +7328,8 @@ static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp)
 		 * of start_xmit activity is detected (if it is not detected,
 		 * it is slow enough). -- FR
 		 */
-		if (tp->cur_tx != dirty_tx) {
-			void __iomem *ioaddr = tp->mmio_addr;
-
-			RTL_W8(TxPoll, NPQ);
-		}
+		if (tp->cur_tx != dirty_tx)
+			RTL_W8(tp, TxPoll, NPQ);
 	}
 }
 
@@ -7577,7 +7355,7 @@ static struct sk_buff *rtl8169_try_rx_copy(void *data,
 					   dma_addr_t addr)
 {
 	struct sk_buff *skb;
-	struct device *d = &tp->pci_dev->dev;
+	struct device *d = tp_to_dev(tp);
 
 	data = rtl8169_align(data);
 	dma_sync_single_for_cpu(d, addr, pkt_size, DMA_FROM_DEVICE);
@@ -7732,7 +7510,7 @@ static void rtl_slow_event_work(struct rtl8169_private *tp)
 		rtl8169_pcierr_interrupt(dev);
 
 	if (status & LinkChg)
-		rtl8169_check_link_status(dev, tp, tp->mmio_addr);
+		rtl8169_check_link_status(dev, tp);
 
 	rtl_irq_enable_all(tp);
 }
@@ -7804,21 +7582,20 @@ static int rtl8169_poll(struct napi_struct *napi, int budget)
 	return work_done;
 }
 
-static void rtl8169_rx_missed(struct net_device *dev, void __iomem *ioaddr)
+static void rtl8169_rx_missed(struct net_device *dev)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
 
 	if (tp->mac_version > RTL_GIGA_MAC_VER_06)
 		return;
 
-	dev->stats.rx_missed_errors += (RTL_R32(RxMissed) & 0xffffff);
-	RTL_W32(RxMissed, 0);
+	dev->stats.rx_missed_errors += RTL_R32(tp, RxMissed) & 0xffffff;
+	RTL_W32(tp, RxMissed, 0);
 }
 
 static void rtl8169_down(struct net_device *dev)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	void __iomem *ioaddr = tp->mmio_addr;
 
 	del_timer_sync(&tp->timer);
 
@@ -7831,7 +7608,7 @@ static void rtl8169_down(struct net_device *dev)
 	 * as netif_running is not true (rtl8169_interrupt, rtl8169_reset_task)
 	 * and napi is disabled (rtl8169_poll).
 	 */
-	rtl8169_rx_missed(dev, ioaddr);
+	rtl8169_rx_missed(dev);
 
 	/* Give a racing hard_start_xmit a few cycles to complete. */
 	synchronize_sched();
@@ -7861,7 +7638,7 @@ static int rtl8169_close(struct net_device *dev)
 
 	cancel_work_sync(&tp->wk.work);
 
-	free_irq(pdev->irq, dev);
+	pci_free_irq(pdev, 0, dev);
 
 	dma_free_coherent(&pdev->dev, R8169_RX_RING_BYTES, tp->RxDescArray,
 			  tp->RxPhyAddr);
@@ -7880,14 +7657,13 @@ static void rtl8169_netpoll(struct net_device *dev)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
 
-	rtl8169_interrupt(tp->pci_dev->irq, dev);
+	rtl8169_interrupt(pci_irq_vector(tp->pci_dev, 0), dev);
 }
 #endif
 
 static int rtl_open(struct net_device *dev)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	void __iomem *ioaddr = tp->mmio_addr;
 	struct pci_dev *pdev = tp->pci_dev;
 	int retval = -ENOMEM;
 
@@ -7917,9 +7693,8 @@ static int rtl_open(struct net_device *dev)
 
 	rtl_request_firmware(tp);
 
-	retval = request_irq(pdev->irq, rtl8169_interrupt,
-			     (tp->features & RTL_FEATURE_MSI) ? 0 : IRQF_SHARED,
-			     dev->name, dev);
+	retval = pci_request_irq(pdev, 0, rtl8169_interrupt, NULL, dev,
+				 dev->name);
 	if (retval < 0)
 		goto err_release_fw_2;
 
@@ -7947,7 +7722,7 @@ static int rtl_open(struct net_device *dev)
 	tp->saved_wolopts = 0;
 	pm_runtime_put_sync(&pdev->dev);
 
-	rtl8169_check_link_status(dev, tp, ioaddr);
+	rtl8169_check_link_status(dev, tp);
 out:
 	return retval;
 
@@ -7971,7 +7746,6 @@ static void
 rtl8169_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	void __iomem *ioaddr = tp->mmio_addr;
 	struct pci_dev *pdev = tp->pci_dev;
 	struct rtl8169_counters *counters = tp->counters;
 	unsigned int start;
@@ -7979,7 +7753,7 @@ rtl8169_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 	pm_runtime_get_noresume(&pdev->dev);
 
 	if (netif_running(dev) && pm_runtime_active(&pdev->dev))
-		rtl8169_rx_missed(dev, ioaddr);
+		rtl8169_rx_missed(dev);
 
 	do {
 		start = u64_stats_fetch_begin_irq(&tp->rx_stats.syncp);
@@ -8102,7 +7876,7 @@ static int rtl8169_runtime_suspend(struct device *device)
 	rtl8169_net_suspend(dev);
 
 	/* Update counters before going runtime suspend */
-	rtl8169_rx_missed(dev, tp->mmio_addr);
+	rtl8169_rx_missed(dev);
 	rtl8169_update_counters(dev);
 
 	return 0;
@@ -8163,8 +7937,6 @@ static const struct dev_pm_ops rtl8169_pm_ops = {
 
 static void rtl_wol_shutdown_quirk(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	/* WoL fails with 8168b when the receiver is disabled. */
 	switch (tp->mac_version) {
 	case RTL_GIGA_MAC_VER_11:
@@ -8172,9 +7944,9 @@ static void rtl_wol_shutdown_quirk(struct rtl8169_private *tp)
 	case RTL_GIGA_MAC_VER_17:
 		pci_clear_master(tp->pci_dev);
 
-		RTL_W8(ChipCmd, CmdRxEnb);
+		RTL_W8(tp, ChipCmd, CmdRxEnb);
 		/* PCI commit */
-		RTL_R8(ChipCmd);
+		RTL_R8(tp, ChipCmd);
 		break;
 	default:
 		break;
@@ -8209,15 +7981,8 @@ static void rtl_remove_one(struct pci_dev *pdev)
 	struct net_device *dev = pci_get_drvdata(pdev);
 	struct rtl8169_private *tp = netdev_priv(dev);
 
-	if ((tp->mac_version == RTL_GIGA_MAC_VER_27 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_28 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_31 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_49 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_50 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_51) &&
-	    r8168_check_dash(tp)) {
+	if (r8168_check_dash(tp))
 		rtl8168_driver_stop(tp);
-	}
 
 	netif_napi_del(&tp->napi);
 
@@ -8256,7 +8021,7 @@ static const struct rtl_cfg_info {
 	unsigned int region;
 	unsigned int align;
 	u16 event_slow;
-	unsigned features;
+	unsigned int has_gmii:1;
 	const struct rtl_coalesce_info *coalesce_info;
 	u8 default_ver;
 } rtl_cfg_infos [] = {
@@ -8265,7 +8030,7 @@ static const struct rtl_cfg_info {
 		.region		= 1,
 		.align		= 0,
 		.event_slow	= SYSErr | LinkChg | RxOverflow | RxFIFOOver,
-		.features	= RTL_FEATURE_GMII,
+		.has_gmii	= 1,
 		.coalesce_info	= rtl_coalesce_info_8169,
 		.default_ver	= RTL_GIGA_MAC_VER_01,
 	},
@@ -8274,7 +8039,7 @@ static const struct rtl_cfg_info {
 		.region		= 2,
 		.align		= 8,
 		.event_slow	= SYSErr | LinkChg | RxOverflow,
-		.features	= RTL_FEATURE_GMII | RTL_FEATURE_MSI,
+		.has_gmii	= 1,
 		.coalesce_info	= rtl_coalesce_info_8168_8136,
 		.default_ver	= RTL_GIGA_MAC_VER_11,
 	},
@@ -8284,56 +8049,44 @@ static const struct rtl_cfg_info {
 		.align		= 8,
 		.event_slow	= SYSErr | LinkChg | RxOverflow | RxFIFOOver |
 				  PCSTimeout,
-		.features	= RTL_FEATURE_MSI,
 		.coalesce_info	= rtl_coalesce_info_8168_8136,
 		.default_ver	= RTL_GIGA_MAC_VER_13,
 	}
 };
 
-/* Cfg9346_Unlock assumed. */
-static unsigned rtl_try_msi(struct rtl8169_private *tp,
-			    const struct rtl_cfg_info *cfg)
+static int rtl_alloc_irq(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-	unsigned msi = 0;
-	u8 cfg2;
+	unsigned int flags;
 
-	cfg2 = RTL_R8(Config2) & ~MSIEnable;
-	if (cfg->features & RTL_FEATURE_MSI) {
-		if (pci_enable_msi(tp->pci_dev)) {
-			netif_info(tp, hw, tp->dev, "no MSI. Back to INTx.\n");
-		} else {
-			cfg2 |= MSIEnable;
-			msi = RTL_FEATURE_MSI;
-		}
+	if (tp->mac_version <= RTL_GIGA_MAC_VER_06) {
+		RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
+		RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~MSIEnable);
+		RTL_W8(tp, Cfg9346, Cfg9346_Lock);
+		flags = PCI_IRQ_LEGACY;
+	} else {
+		flags = PCI_IRQ_ALL_TYPES;
 	}
-	if (tp->mac_version <= RTL_GIGA_MAC_VER_06)
-		RTL_W8(Config2, cfg2);
-	return msi;
+
+	return pci_alloc_irq_vectors(tp->pci_dev, 1, 1, flags);
 }
 
 DECLARE_RTL_COND(rtl_link_list_ready_cond)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R8(MCU) & LINK_LIST_RDY;
+	return RTL_R8(tp, MCU) & LINK_LIST_RDY;
 }
 
 DECLARE_RTL_COND(rtl_rxtx_empty_cond)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return (RTL_R8(MCU) & RXTX_EMPTY) == RXTX_EMPTY;
+	return (RTL_R8(tp, MCU) & RXTX_EMPTY) == RXTX_EMPTY;
 }
 
 static void rtl_hw_init_8168g(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	u32 data;
 
 	tp->ocp_base = OCP_STD_PHY_BASE;
 
-	RTL_W32(MISC, RTL_R32(MISC) | RXDV_GATED_EN);
+	RTL_W32(tp, MISC, RTL_R32(tp, MISC) | RXDV_GATED_EN);
 
 	if (!rtl_udelay_loop_wait_high(tp, &rtl_txcfg_empty_cond, 100, 42))
 		return;
@@ -8341,9 +8094,9 @@ static void rtl_hw_init_8168g(struct rtl8169_private *tp)
 	if (!rtl_udelay_loop_wait_high(tp, &rtl_rxtx_empty_cond, 100, 42))
 		return;
 
-	RTL_W8(ChipCmd, RTL_R8(ChipCmd) & ~(CmdTxEnb | CmdRxEnb));
+	RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) & ~(CmdTxEnb | CmdRxEnb));
 	msleep(1);
-	RTL_W8(MCU, RTL_R8(MCU) & ~NOW_IS_OOB);
+	RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);
 
 	data = r8168_mac_ocp_read(tp, 0xe8de);
 	data &= ~(1 << 14);
@@ -8397,7 +8150,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	struct rtl8169_private *tp;
 	struct mii_if_info *mii;
 	struct net_device *dev;
-	void __iomem *ioaddr;
 	int chipset, i;
 	int rc;
 
@@ -8423,7 +8175,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	mii->mdio_write = rtl_mdio_write;
 	mii->phy_id_mask = 0x1f;
 	mii->reg_num_mask = 0x1f;
-	mii->supports_gmii = !!(cfg->features & RTL_FEATURE_GMII);
+	mii->supports_gmii = cfg->has_gmii;
 
 	/* disable ASPM completely as that cause random device stop working
 	 * problems as well as full system hangs for some PCIe devices users */
@@ -8455,20 +8207,13 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		return -ENODEV;
 	}
 
-	rc = pci_request_regions(pdev, MODULENAME);
+	rc = pcim_iomap_regions(pdev, BIT(region), MODULENAME);
 	if (rc < 0) {
-		netif_err(tp, probe, dev, "could not request regions\n");
+		netif_err(tp, probe, dev, "cannot remap MMIO, aborting\n");
 		return rc;
 	}
 
-	/* ioremap MMIO region */
-	ioaddr = devm_ioremap(&pdev->dev, pci_resource_start(pdev, region),
-			      R8169_REGS_SIZE);
-	if (!ioaddr) {
-		netif_err(tp, probe, dev, "cannot remap MMIO, aborting\n");
-		return -EIO;
-	}
-	tp->mmio_addr = ioaddr;
+	tp->mmio_addr = pcim_iomap_table(pdev)[region];
 
 	if (!pci_is_pcie(pdev))
 		netif_info(tp, probe, dev, "not PCI Express\n");
@@ -8518,41 +8263,14 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	chipset = tp->mac_version;
 	tp->txd_version = rtl_chip_infos[chipset].txd_version;
 
-	RTL_W8(Cfg9346, Cfg9346_Unlock);
-	RTL_W8(Config1, RTL_R8(Config1) | PMEnable);
-	RTL_W8(Config5, RTL_R8(Config5) & (BWF | MWF | UWF | LanWake | PMEStatus));
-	switch (tp->mac_version) {
-	case RTL_GIGA_MAC_VER_34:
-	case RTL_GIGA_MAC_VER_35:
-	case RTL_GIGA_MAC_VER_36:
-	case RTL_GIGA_MAC_VER_37:
-	case RTL_GIGA_MAC_VER_38:
-	case RTL_GIGA_MAC_VER_40:
-	case RTL_GIGA_MAC_VER_41:
-	case RTL_GIGA_MAC_VER_42:
-	case RTL_GIGA_MAC_VER_43:
-	case RTL_GIGA_MAC_VER_44:
-	case RTL_GIGA_MAC_VER_45:
-	case RTL_GIGA_MAC_VER_46:
-	case RTL_GIGA_MAC_VER_47:
-	case RTL_GIGA_MAC_VER_48:
-	case RTL_GIGA_MAC_VER_49:
-	case RTL_GIGA_MAC_VER_50:
-	case RTL_GIGA_MAC_VER_51:
-		if (rtl_eri_read(tp, 0xdc, ERIAR_EXGMAC) & MagicPacket_v2)
-			tp->features |= RTL_FEATURE_WOL;
-		if ((RTL_R8(Config3) & LinkUp) != 0)
-			tp->features |= RTL_FEATURE_WOL;
-		break;
-	default:
-		if ((RTL_R8(Config3) & (LinkUp | MagicPacket)) != 0)
-			tp->features |= RTL_FEATURE_WOL;
-		break;
+	rc = rtl_alloc_irq(tp);
+	if (rc < 0) {
+		netif_err(tp, probe, dev, "Can't allocate interrupt\n");
+		return rc;
 	}
-	if ((RTL_R8(Config5) & (UWF | BWF | MWF)) != 0)
-		tp->features |= RTL_FEATURE_WOL;
-	tp->features |= rtl_try_msi(tp, cfg);
-	RTL_W8(Cfg9346, Cfg9346_Lock);
+
+	/* override BIOS settings, use userspace tools to enable WOL */
+	__rtl8169_set_wol(tp, 0);
 
 	if (rtl_tbi_enabled(tp)) {
 		tp->set_speed = rtl8169_set_speed_tbi;
@@ -8600,7 +8318,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 			rtl_rar_set(tp, (u8 *)mac_addr);
 	}
 	for (i = 0; i < ETH_ALEN; i++)
-		dev->dev_addr[i] = RTL_R8(MAC0 + i);
+		dev->dev_addr[i] = RTL_R8(tp, MAC0 + i);
 
 	dev->ethtool_ops = &rtl8169_ethtool_ops;
 	dev->watchdog_timeo = RTL8169_TX_TIMEOUT;
@@ -8667,8 +8385,9 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	pci_set_drvdata(pdev, dev);
 
 	netif_info(tp, probe, dev, "%s at 0x%p, %pM, XID %08x IRQ %d\n",
-		   rtl_chip_infos[chipset].name, ioaddr, dev->dev_addr,
-		   (u32)(RTL_R32(TxConfig) & 0x9cf0f8ff), pdev->irq);
+		   rtl_chip_infos[chipset].name, tp->mmio_addr, dev->dev_addr,
+		   (u32)(RTL_R32(tp, TxConfig) & 0x9cf0f8ff),
+		   pci_irq_vector(pdev, 0));
 	if (rtl_chip_infos[chipset].jumbo_max != JUMBO_1K) {
 		netif_info(tp, probe, dev, "jumbo features [frames: %d bytes, "
 			   "tx checksumming: %s]\n",
@@ -8676,15 +8395,8 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 			   rtl_chip_infos[chipset].jumbo_tx_csum ? "ok" : "ko");
 	}
 
-	if ((tp->mac_version == RTL_GIGA_MAC_VER_27 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_28 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_31 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_49 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_50 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_51) &&
-	    r8168_check_dash(tp)) {
+	if (r8168_check_dash(tp))
 		rtl8168_driver_start(tp);
-	}
 
 	netif_carrier_off(dev);
 
diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h
index 96a27b00c90e..b81f4faf7b10 100644
--- a/drivers/net/ethernet/renesas/ravb.h
+++ b/drivers/net/ethernet/renesas/ravb.h
@@ -1018,6 +1018,7 @@ struct ravb_private {
 	u32 dirty_rx[NUM_RX_QUEUE];	/* Producer ring indices */
 	u32 cur_tx[NUM_TX_QUEUE];
 	u32 dirty_tx[NUM_TX_QUEUE];
+	u32 rx_buf_sz;			/* Based on MTU+slack. */
 	struct napi_struct napi[NUM_RX_QUEUE];
 	struct work_struct work;
 	/* MII transceiver section. */
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index a95fbd5510d9..68f122140966 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -238,7 +238,7 @@ static void ravb_ring_free(struct net_device *ndev, int q)
 					       le32_to_cpu(desc->dptr)))
 				dma_unmap_single(ndev->dev.parent,
 						 le32_to_cpu(desc->dptr),
-						 PKT_BUF_SZ,
+						 priv->rx_buf_sz,
 						 DMA_FROM_DEVICE);
 		}
 		ring_size = sizeof(struct ravb_ex_rx_desc) *
@@ -300,9 +300,9 @@ static void ravb_ring_format(struct net_device *ndev, int q)
 	for (i = 0; i < priv->num_rx_ring[q]; i++) {
 		/* RX descriptor */
 		rx_desc = &priv->rx_ring[q][i];
-		rx_desc->ds_cc = cpu_to_le16(PKT_BUF_SZ);
+		rx_desc->ds_cc = cpu_to_le16(priv->rx_buf_sz);
 		dma_addr = dma_map_single(ndev->dev.parent, priv->rx_skb[q][i]->data,
-					  PKT_BUF_SZ,
+					  priv->rx_buf_sz,
 					  DMA_FROM_DEVICE);
 		/* We just set the data size to 0 for a failed mapping which
 		 * should prevent DMA from happening...
@@ -346,6 +346,9 @@ static int ravb_ring_init(struct net_device *ndev, int q)
 	int ring_size;
 	int i;
 
+	priv->rx_buf_sz = (ndev->mtu <= 1492 ? PKT_BUF_SZ : ndev->mtu) +
+		ETH_HLEN + VLAN_HLEN;
+
 	/* Allocate RX and TX skb rings */
 	priv->rx_skb[q] = kcalloc(priv->num_rx_ring[q],
 				  sizeof(*priv->rx_skb[q]), GFP_KERNEL);
@@ -355,7 +358,7 @@ static int ravb_ring_init(struct net_device *ndev, int q)
 		goto error;
 
 	for (i = 0; i < priv->num_rx_ring[q]; i++) {
-		skb = netdev_alloc_skb(ndev, PKT_BUF_SZ + RAVB_ALIGN - 1);
+		skb = netdev_alloc_skb(ndev, priv->rx_buf_sz + RAVB_ALIGN - 1);
 		if (!skb)
 			goto error;
 		ravb_set_buffer_align(skb);
@@ -586,7 +589,7 @@ static bool ravb_rx(struct net_device *ndev, int *quota, int q)
 			skb = priv->rx_skb[q][entry];
 			priv->rx_skb[q][entry] = NULL;
 			dma_unmap_single(ndev->dev.parent, le32_to_cpu(desc->dptr),
-					 PKT_BUF_SZ,
+					 priv->rx_buf_sz,
 					 DMA_FROM_DEVICE);
 			get_ts &= (q == RAVB_NC) ?
 					RAVB_RXTSTAMP_TYPE_V2_L2_EVENT :
@@ -619,11 +622,12 @@ static bool ravb_rx(struct net_device *ndev, int *quota, int q)
 	for (; priv->cur_rx[q] - priv->dirty_rx[q] > 0; priv->dirty_rx[q]++) {
 		entry = priv->dirty_rx[q] % priv->num_rx_ring[q];
 		desc = &priv->rx_ring[q][entry];
-		desc->ds_cc = cpu_to_le16(PKT_BUF_SZ);
+		desc->ds_cc = cpu_to_le16(priv->rx_buf_sz);
 
 		if (!priv->rx_skb[q][entry]) {
 			skb = netdev_alloc_skb(ndev,
-					       PKT_BUF_SZ + RAVB_ALIGN - 1);
+					       priv->rx_buf_sz +
+					       RAVB_ALIGN - 1);
 			if (!skb)
 				break;	/* Better luck next round. */
 			ravb_set_buffer_align(skb);
@@ -1854,6 +1858,17 @@ static int ravb_do_ioctl(struct net_device *ndev, struct ifreq *req, int cmd)
 	return phy_mii_ioctl(phydev, req, cmd);
 }
 
+static int ravb_change_mtu(struct net_device *ndev, int new_mtu)
+{
+	if (netif_running(ndev))
+		return -EBUSY;
+
+	ndev->mtu = new_mtu;
+	netdev_update_features(ndev);
+
+	return 0;
+}
+
 static void ravb_set_rx_csum(struct net_device *ndev, bool enable)
 {
 	struct ravb_private *priv = netdev_priv(ndev);
@@ -1895,6 +1910,7 @@ static const struct net_device_ops ravb_netdev_ops = {
 	.ndo_set_rx_mode	= ravb_set_rx_mode,
 	.ndo_tx_timeout		= ravb_tx_timeout,
 	.ndo_do_ioctl		= ravb_do_ioctl,
+	.ndo_change_mtu		= ravb_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
 	.ndo_set_features	= ravb_set_features,
@@ -2117,6 +2133,9 @@ static int ravb_probe(struct platform_device *pdev)
 		goto out_release;
 	}
 
+	ndev->max_mtu = 2048 - (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN);
+	ndev->min_mtu = ETH_MIN_MTU;
+
 	/* Set function */
 	ndev->netdev_ops = &ravb_netdev_ops;
 	ndev->ethtool_ops = &ravb_ethtool_ops;
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 14c839bb09e7..3557fe3f2bb5 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -123,8 +123,8 @@ static const u16 sh_eth_offset_gigabit[SH_ETH_MAX_REGISTER_OFFSET] = {
 	[TSU_FWSL0]	= 0x0030,
 	[TSU_FWSL1]	= 0x0034,
 	[TSU_FWSLC]	= 0x0038,
-	[TSU_QTAG0]	= 0x0040,
-	[TSU_QTAG1]	= 0x0044,
+	[TSU_QTAGM0]	= 0x0040,
+	[TSU_QTAGM1]	= 0x0044,
 	[TSU_FWSR]	= 0x0050,
 	[TSU_FWINMK]	= 0x0054,
 	[TSU_ADQT0]	= 0x0048,
@@ -763,6 +763,7 @@ static struct sh_eth_cpu_data sh7757_data = {
 	.rpadir		= 1,
 	.rpadir_value   = 2 << 16,
 	.rtrate		= 1,
+	.dual_port	= 1,
 };
 
 #define SH_GIGA_ETH_BASE	0xfee00000UL
@@ -841,6 +842,7 @@ static struct sh_eth_cpu_data sh7757_data_giga = {
 	.no_trimd	= 1,
 	.no_ade		= 1,
 	.tsu		= 1,
+	.dual_port	= 1,
 };
 
 /* SH7734 */
@@ -911,6 +913,7 @@ static struct sh_eth_cpu_data sh7763_data = {
 	.tsu		= 1,
 	.irq_flags	= IRQF_SHARED,
 	.magic		= 1,
+	.dual_port	= 1,
 };
 
 static struct sh_eth_cpu_data sh7619_data = {
@@ -943,6 +946,7 @@ static struct sh_eth_cpu_data sh771x_data = {
 			  EESIPR_RRFIP | EESIPR_RTLFIP | EESIPR_RTSFIP |
 			  EESIPR_PREIP | EESIPR_CERFIP,
 	.tsu		= 1,
+	.dual_port	= 1,
 };
 
 static void sh_eth_set_default_cpu_data(struct sh_eth_cpu_data *cd)
@@ -972,20 +976,16 @@ static void sh_eth_set_default_cpu_data(struct sh_eth_cpu_data *cd)
 
 static int sh_eth_check_reset(struct net_device *ndev)
 {
-	int ret = 0;
-	int cnt = 100;
+	int cnt;
 
-	while (cnt > 0) {
+	for (cnt = 100; cnt > 0; cnt--) {
 		if (!(sh_eth_read(ndev, EDMR) & EDMR_SRST_GETHER))
-			break;
+			return 0;
 		mdelay(1);
-		cnt--;
-	}
-	if (cnt <= 0) {
-		netdev_err(ndev, "Device reset failed\n");
-		ret = -ETIMEDOUT;
 	}
-	return ret;
+
+	netdev_err(ndev, "Device reset failed\n");
+	return -ETIMEDOUT;
 }
 
 static int sh_eth_reset(struct net_device *ndev)
@@ -2112,8 +2112,6 @@ static size_t __sh_eth_get_regs(struct net_device *ndev, u32 *buf)
 		add_tsu_reg(TSU_FWSL0);
 		add_tsu_reg(TSU_FWSL1);
 		add_tsu_reg(TSU_FWSLC);
-		add_tsu_reg(TSU_QTAG0);
-		add_tsu_reg(TSU_QTAG1);
 		add_tsu_reg(TSU_QTAGM0);
 		add_tsu_reg(TSU_QTAGM1);
 		add_tsu_reg(TSU_FWSR);
@@ -2932,7 +2930,7 @@ static int sh_eth_vlan_rx_kill_vid(struct net_device *ndev,
 /* SuperH's TSU register init function */
 static void sh_eth_tsu_init(struct sh_eth_private *mdp)
 {
-	if (sh_eth_is_rz_fast_ether(mdp)) {
+	if (!mdp->cd->dual_port) {
 		sh_eth_tsu_write(mdp, 0, TSU_TEN); /* Disable all CAM entry */
 		sh_eth_tsu_write(mdp, TSU_FWSLC_POSTENU | TSU_FWSLC_POSTENL,
 				 TSU_FWSLC);	/* Enable POST registers */
@@ -2949,13 +2947,8 @@ static void sh_eth_tsu_init(struct sh_eth_private *mdp)
 	sh_eth_tsu_write(mdp, 0, TSU_FWSL0);
 	sh_eth_tsu_write(mdp, 0, TSU_FWSL1);
 	sh_eth_tsu_write(mdp, TSU_FWSLC_POSTENU | TSU_FWSLC_POSTENL, TSU_FWSLC);
-	if (sh_eth_is_gether(mdp)) {
-		sh_eth_tsu_write(mdp, 0, TSU_QTAG0);	/* Disable QTAG(0->1) */
-		sh_eth_tsu_write(mdp, 0, TSU_QTAG1);	/* Disable QTAG(1->0) */
-	} else {
-		sh_eth_tsu_write(mdp, 0, TSU_QTAGM0);	/* Disable QTAG(0->1) */
-		sh_eth_tsu_write(mdp, 0, TSU_QTAGM1);	/* Disable QTAG(1->0) */
-	}
+	sh_eth_tsu_write(mdp, 0, TSU_QTAGM0);	/* Disable QTAG(0->1) */
+	sh_eth_tsu_write(mdp, 0, TSU_QTAGM1);	/* Disable QTAG(1->0) */
 	sh_eth_tsu_write(mdp, 0, TSU_FWSR);	/* all interrupt status clear */
 	sh_eth_tsu_write(mdp, 0, TSU_FWINMK);	/* Disable all interrupt */
 	sh_eth_tsu_write(mdp, 0, TSU_TEN);	/* Disable all CAM entry */
diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h
index e5fe70134690..21047d58a93f 100644
--- a/drivers/net/ethernet/renesas/sh_eth.h
+++ b/drivers/net/ethernet/renesas/sh_eth.h
@@ -118,8 +118,8 @@ enum {
 	TSU_FWSL0,
 	TSU_FWSL1,
 	TSU_FWSLC,
-	TSU_QTAG0,
-	TSU_QTAG1,
+	TSU_QTAG0,			/* Same as TSU_QTAGM0 */
+	TSU_QTAG1,			/* Same as TSU_QTAGM1 */
 	TSU_QTAGM0,
 	TSU_QTAGM1,
 	TSU_FWSR,
@@ -509,6 +509,7 @@ struct sh_eth_cpu_data {
 	unsigned rmiimode:1;	/* EtherC has RMIIMODE register */
 	unsigned rtrate:1;	/* EtherC has RTRATE register */
 	unsigned magic:1;	/* EtherC has ECMR.MPDE and ECSR.MPD */
+	unsigned dual_port:1;	/* Dual EtherC/E-DMAC */
 };
 
 struct sh_eth_private {
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index 75fbf58e421c..e100273b623d 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -28,9 +28,6 @@ enum {
 	EFX_EF10_TEST = 1,
 	EFX_EF10_REFILL,
 };
-
-/* The reserved RSS context value */
-#define EFX_EF10_RSS_CONTEXT_INVALID	0xffffffff
 /* The maximum size of a shared RSS context */
 /* TODO: this should really be from the mcdi protocol export */
 #define EFX_EF10_MAX_SHARED_RSS_CONTEXT_SIZE 64UL
@@ -697,7 +694,7 @@ static int efx_ef10_probe(struct efx_nic *efx)
 	}
 	nic_data->warm_boot_count = rc;
 
-	nic_data->rx_rss_context = EFX_EF10_RSS_CONTEXT_INVALID;
+	efx->rss_context.context_id = EFX_EF10_RSS_CONTEXT_INVALID;
 
 	nic_data->vport_id = EVB_PORT_ID_ASSIGNED;
 
@@ -1489,8 +1486,8 @@ static int efx_ef10_init_nic(struct efx_nic *efx)
 	}
 
 	/* don't fail init if RSS setup doesn't work */
-	rc = efx->type->rx_push_rss_config(efx, false, efx->rx_indir_table, NULL);
-	efx->rss_active = (rc == 0);
+	rc = efx->type->rx_push_rss_config(efx, false,
+					   efx->rss_context.rx_indir_table, NULL);
 
 	return 0;
 }
@@ -1507,7 +1504,7 @@ static void efx_ef10_reset_mc_allocations(struct efx_nic *efx)
 	nic_data->must_restore_filters = true;
 	nic_data->must_restore_piobufs = true;
 	efx_ef10_forget_old_piobufs(efx);
-	nic_data->rx_rss_context = EFX_EF10_RSS_CONTEXT_INVALID;
+	efx->rss_context.context_id = EFX_EF10_RSS_CONTEXT_INVALID;
 
 	/* Driver-created vswitches and vports must be re-created */
 	nic_data->must_probe_vswitching = true;
@@ -2703,27 +2700,30 @@ static int efx_ef10_get_rss_flags(struct efx_nic *efx, u32 context, u32 *flags)
  * Defaults are 4-tuple for TCP and 2-tuple for UDP and other-IP, so we
  * just need to set the UDP ports flags (for both IP versions).
  */
-static void efx_ef10_set_rss_flags(struct efx_nic *efx, u32 context)
+static void efx_ef10_set_rss_flags(struct efx_nic *efx,
+				   struct efx_rss_context *ctx)
 {
 	MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_SET_FLAGS_IN_LEN);
 	u32 flags;
 
 	BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_SET_FLAGS_OUT_LEN != 0);
 
-	if (efx_ef10_get_rss_flags(efx, context, &flags) != 0)
+	if (efx_ef10_get_rss_flags(efx, ctx->context_id, &flags) != 0)
 		return;
-	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_SET_FLAGS_IN_RSS_CONTEXT_ID, context);
+	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_SET_FLAGS_IN_RSS_CONTEXT_ID,
+		       ctx->context_id);
 	flags |= RSS_MODE_HASH_PORTS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV4_RSS_MODE_LBN;
 	flags |= RSS_MODE_HASH_PORTS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV6_RSS_MODE_LBN;
 	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_SET_FLAGS_IN_FLAGS, flags);
 	if (!efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_SET_FLAGS, inbuf, sizeof(inbuf),
 			  NULL, 0, NULL))
 		/* Succeeded, so UDP 4-tuple is now enabled */
-		efx->rx_hash_udp_4tuple = true;
+		ctx->rx_hash_udp_4tuple = true;
 }
 
-static int efx_ef10_alloc_rss_context(struct efx_nic *efx, u32 *context,
-				      bool exclusive, unsigned *context_size)
+static int efx_ef10_alloc_rss_context(struct efx_nic *efx, bool exclusive,
+				      struct efx_rss_context *ctx,
+				      unsigned *context_size)
 {
 	MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_ALLOC_IN_LEN);
 	MCDI_DECLARE_BUF(outbuf, MC_CMD_RSS_CONTEXT_ALLOC_OUT_LEN);
@@ -2739,7 +2739,7 @@ static int efx_ef10_alloc_rss_context(struct efx_nic *efx, u32 *context,
 				    EFX_EF10_MAX_SHARED_RSS_CONTEXT_SIZE);
 
 	if (!exclusive && rss_spread == 1) {
-		*context = EFX_EF10_RSS_CONTEXT_INVALID;
+		ctx->context_id = EFX_EF10_RSS_CONTEXT_INVALID;
 		if (context_size)
 			*context_size = 1;
 		return 0;
@@ -2762,29 +2762,26 @@ static int efx_ef10_alloc_rss_context(struct efx_nic *efx, u32 *context,
 	if (outlen < MC_CMD_RSS_CONTEXT_ALLOC_OUT_LEN)
 		return -EIO;
 
-	*context = MCDI_DWORD(outbuf, RSS_CONTEXT_ALLOC_OUT_RSS_CONTEXT_ID);
+	ctx->context_id = MCDI_DWORD(outbuf, RSS_CONTEXT_ALLOC_OUT_RSS_CONTEXT_ID);
 
 	if (context_size)
 		*context_size = rss_spread;
 
 	if (nic_data->datapath_caps &
 	    1 << MC_CMD_GET_CAPABILITIES_OUT_ADDITIONAL_RSS_MODES_LBN)
-		efx_ef10_set_rss_flags(efx, *context);
+		efx_ef10_set_rss_flags(efx, ctx);
 
 	return 0;
 }
 
-static void efx_ef10_free_rss_context(struct efx_nic *efx, u32 context)
+static int efx_ef10_free_rss_context(struct efx_nic *efx, u32 context)
 {
 	MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_FREE_IN_LEN);
-	int rc;
 
 	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_FREE_IN_RSS_CONTEXT_ID,
 		       context);
-
-	rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_FREE, inbuf, sizeof(inbuf),
+	return efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_FREE, inbuf, sizeof(inbuf),
 			    NULL, 0, NULL);
-	WARN_ON(rc != 0);
 }
 
 static int efx_ef10_populate_rss_table(struct efx_nic *efx, u32 context,
@@ -2796,15 +2793,15 @@ static int efx_ef10_populate_rss_table(struct efx_nic *efx, u32 context,
 
 	MCDI_SET_DWORD(tablebuf, RSS_CONTEXT_SET_TABLE_IN_RSS_CONTEXT_ID,
 		       context);
-	BUILD_BUG_ON(ARRAY_SIZE(efx->rx_indir_table) !=
+	BUILD_BUG_ON(ARRAY_SIZE(efx->rss_context.rx_indir_table) !=
 		     MC_CMD_RSS_CONTEXT_SET_TABLE_IN_INDIRECTION_TABLE_LEN);
 
-	/* This iterates over the length of efx->rx_indir_table, but copies
-	 * bytes from rx_indir_table.  That's because the latter is a pointer
-	 * rather than an array, but should have the same length.
-	 * The efx->rx_hash_key loop below is similar.
+	/* This iterates over the length of efx->rss_context.rx_indir_table, but
+	 * copies bytes from rx_indir_table.  That's because the latter is a
+	 * pointer rather than an array, but should have the same length.
+	 * The efx->rss_context.rx_hash_key loop below is similar.
 	 */
-	for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table); ++i)
+	for (i = 0; i < ARRAY_SIZE(efx->rss_context.rx_indir_table); ++i)
 		MCDI_PTR(tablebuf,
 			 RSS_CONTEXT_SET_TABLE_IN_INDIRECTION_TABLE)[i] =
 				(u8) rx_indir_table[i];
@@ -2816,9 +2813,9 @@ static int efx_ef10_populate_rss_table(struct efx_nic *efx, u32 context,
 
 	MCDI_SET_DWORD(keybuf, RSS_CONTEXT_SET_KEY_IN_RSS_CONTEXT_ID,
 		       context);
-	BUILD_BUG_ON(ARRAY_SIZE(efx->rx_hash_key) !=
+	BUILD_BUG_ON(ARRAY_SIZE(efx->rss_context.rx_hash_key) !=
 		     MC_CMD_RSS_CONTEXT_SET_KEY_IN_TOEPLITZ_KEY_LEN);
-	for (i = 0; i < ARRAY_SIZE(efx->rx_hash_key); ++i)
+	for (i = 0; i < ARRAY_SIZE(efx->rss_context.rx_hash_key); ++i)
 		MCDI_PTR(keybuf, RSS_CONTEXT_SET_KEY_IN_TOEPLITZ_KEY)[i] = key[i];
 
 	return efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_SET_KEY, keybuf,
@@ -2827,27 +2824,27 @@ static int efx_ef10_populate_rss_table(struct efx_nic *efx, u32 context,
 
 static void efx_ef10_rx_free_indir_table(struct efx_nic *efx)
 {
-	struct efx_ef10_nic_data *nic_data = efx->nic_data;
+	int rc;
 
-	if (nic_data->rx_rss_context != EFX_EF10_RSS_CONTEXT_INVALID)
-		efx_ef10_free_rss_context(efx, nic_data->rx_rss_context);
-	nic_data->rx_rss_context = EFX_EF10_RSS_CONTEXT_INVALID;
+	if (efx->rss_context.context_id != EFX_EF10_RSS_CONTEXT_INVALID) {
+		rc = efx_ef10_free_rss_context(efx, efx->rss_context.context_id);
+		WARN_ON(rc != 0);
+	}
+	efx->rss_context.context_id = EFX_EF10_RSS_CONTEXT_INVALID;
 }
 
 static int efx_ef10_rx_push_shared_rss_config(struct efx_nic *efx,
 					      unsigned *context_size)
 {
-	u32 new_rx_rss_context;
 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
-	int rc = efx_ef10_alloc_rss_context(efx, &new_rx_rss_context,
-					    false, context_size);
+	int rc = efx_ef10_alloc_rss_context(efx, false, &efx->rss_context,
+					    context_size);
 
 	if (rc != 0)
 		return rc;
 
-	nic_data->rx_rss_context = new_rx_rss_context;
 	nic_data->rx_rss_context_exclusive = false;
-	efx_set_default_rx_indir_table(efx);
+	efx_set_default_rx_indir_table(efx, &efx->rss_context);
 	return 0;
 }
 
@@ -2855,50 +2852,79 @@ static int efx_ef10_rx_push_exclusive_rss_config(struct efx_nic *efx,
 						 const u32 *rx_indir_table,
 						 const u8 *key)
 {
+	u32 old_rx_rss_context = efx->rss_context.context_id;
 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
 	int rc;
-	u32 new_rx_rss_context;
 
-	if (nic_data->rx_rss_context == EFX_EF10_RSS_CONTEXT_INVALID ||
+	if (efx->rss_context.context_id == EFX_EF10_RSS_CONTEXT_INVALID ||
 	    !nic_data->rx_rss_context_exclusive) {
-		rc = efx_ef10_alloc_rss_context(efx, &new_rx_rss_context,
-						true, NULL);
+		rc = efx_ef10_alloc_rss_context(efx, true, &efx->rss_context,
+						NULL);
 		if (rc == -EOPNOTSUPP)
 			return rc;
 		else if (rc != 0)
 			goto fail1;
-	} else {
-		new_rx_rss_context = nic_data->rx_rss_context;
 	}
 
-	rc = efx_ef10_populate_rss_table(efx, new_rx_rss_context,
+	rc = efx_ef10_populate_rss_table(efx, efx->rss_context.context_id,
 					 rx_indir_table, key);
 	if (rc != 0)
 		goto fail2;
 
-	if (nic_data->rx_rss_context != new_rx_rss_context)
-		efx_ef10_rx_free_indir_table(efx);
-	nic_data->rx_rss_context = new_rx_rss_context;
+	if (efx->rss_context.context_id != old_rx_rss_context &&
+	    old_rx_rss_context != EFX_EF10_RSS_CONTEXT_INVALID)
+		WARN_ON(efx_ef10_free_rss_context(efx, old_rx_rss_context) != 0);
 	nic_data->rx_rss_context_exclusive = true;
-	if (rx_indir_table != efx->rx_indir_table)
-		memcpy(efx->rx_indir_table, rx_indir_table,
-		       sizeof(efx->rx_indir_table));
-	if (key != efx->rx_hash_key)
-		memcpy(efx->rx_hash_key, key, efx->type->rx_hash_key_size);
+	if (rx_indir_table != efx->rss_context.rx_indir_table)
+		memcpy(efx->rss_context.rx_indir_table, rx_indir_table,
+		       sizeof(efx->rss_context.rx_indir_table));
+	if (key != efx->rss_context.rx_hash_key)
+		memcpy(efx->rss_context.rx_hash_key, key,
+		       efx->type->rx_hash_key_size);
 
 	return 0;
 
 fail2:
-	if (new_rx_rss_context != nic_data->rx_rss_context)
-		efx_ef10_free_rss_context(efx, new_rx_rss_context);
+	if (old_rx_rss_context != efx->rss_context.context_id) {
+		WARN_ON(efx_ef10_free_rss_context(efx, efx->rss_context.context_id) != 0);
+		efx->rss_context.context_id = old_rx_rss_context;
+	}
 fail1:
 	netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc);
 	return rc;
 }
 
-static int efx_ef10_rx_pull_rss_config(struct efx_nic *efx)
+static int efx_ef10_rx_push_rss_context_config(struct efx_nic *efx,
+					       struct efx_rss_context *ctx,
+					       const u32 *rx_indir_table,
+					       const u8 *key)
+{
+	int rc;
+
+	if (ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID) {
+		rc = efx_ef10_alloc_rss_context(efx, true, ctx, NULL);
+		if (rc)
+			return rc;
+	}
+
+	if (!rx_indir_table) /* Delete this context */
+		return efx_ef10_free_rss_context(efx, ctx->context_id);
+
+	rc = efx_ef10_populate_rss_table(efx, ctx->context_id,
+					 rx_indir_table, key);
+	if (rc)
+		return rc;
+
+	memcpy(ctx->rx_indir_table, rx_indir_table,
+	       sizeof(efx->rss_context.rx_indir_table));
+	memcpy(ctx->rx_hash_key, key, efx->type->rx_hash_key_size);
+
+	return 0;
+}
+
+static int efx_ef10_rx_pull_rss_context_config(struct efx_nic *efx,
+					       struct efx_rss_context *ctx)
 {
-	struct efx_ef10_nic_data *nic_data = efx->nic_data;
 	MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_GET_TABLE_IN_LEN);
 	MCDI_DECLARE_BUF(tablebuf, MC_CMD_RSS_CONTEXT_GET_TABLE_OUT_LEN);
 	MCDI_DECLARE_BUF(keybuf, MC_CMD_RSS_CONTEXT_GET_KEY_OUT_LEN);
@@ -2908,12 +2934,12 @@ static int efx_ef10_rx_pull_rss_config(struct efx_nic *efx)
 	BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_GET_TABLE_IN_LEN !=
 		     MC_CMD_RSS_CONTEXT_GET_KEY_IN_LEN);
 
-	if (nic_data->rx_rss_context == EFX_EF10_RSS_CONTEXT_INVALID)
+	if (ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID)
 		return -ENOENT;
 
 	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_TABLE_IN_RSS_CONTEXT_ID,
-		       nic_data->rx_rss_context);
-	BUILD_BUG_ON(ARRAY_SIZE(efx->rx_indir_table) !=
+		       ctx->context_id);
+	BUILD_BUG_ON(ARRAY_SIZE(ctx->rx_indir_table) !=
 		     MC_CMD_RSS_CONTEXT_GET_TABLE_OUT_INDIRECTION_TABLE_LEN);
 	rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_GET_TABLE, inbuf, sizeof(inbuf),
 			  tablebuf, sizeof(tablebuf), &outlen);
@@ -2923,13 +2949,13 @@ static int efx_ef10_rx_pull_rss_config(struct efx_nic *efx)
 	if (WARN_ON(outlen != MC_CMD_RSS_CONTEXT_GET_TABLE_OUT_LEN))
 		return -EIO;
 
-	for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table); i++)
-		efx->rx_indir_table[i] = MCDI_PTR(tablebuf,
+	for (i = 0; i < ARRAY_SIZE(ctx->rx_indir_table); i++)
+		ctx->rx_indir_table[i] = MCDI_PTR(tablebuf,
 				RSS_CONTEXT_GET_TABLE_OUT_INDIRECTION_TABLE)[i];
 
 	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_KEY_IN_RSS_CONTEXT_ID,
-		       nic_data->rx_rss_context);
-	BUILD_BUG_ON(ARRAY_SIZE(efx->rx_hash_key) !=
+		       ctx->context_id);
+	BUILD_BUG_ON(ARRAY_SIZE(ctx->rx_hash_key) !=
 		     MC_CMD_RSS_CONTEXT_SET_KEY_IN_TOEPLITZ_KEY_LEN);
 	rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_GET_KEY, inbuf, sizeof(inbuf),
 			  keybuf, sizeof(keybuf), &outlen);
@@ -2939,13 +2965,38 @@ static int efx_ef10_rx_pull_rss_config(struct efx_nic *efx)
 	if (WARN_ON(outlen != MC_CMD_RSS_CONTEXT_GET_KEY_OUT_LEN))
 		return -EIO;
 
-	for (i = 0; i < ARRAY_SIZE(efx->rx_hash_key); ++i)
-		efx->rx_hash_key[i] = MCDI_PTR(
+	for (i = 0; i < ARRAY_SIZE(ctx->rx_hash_key); ++i)
+		ctx->rx_hash_key[i] = MCDI_PTR(
 				keybuf, RSS_CONTEXT_GET_KEY_OUT_TOEPLITZ_KEY)[i];
 
 	return 0;
 }
 
+static int efx_ef10_rx_pull_rss_config(struct efx_nic *efx)
+{
+	return efx_ef10_rx_pull_rss_context_config(efx, &efx->rss_context);
+}
+
+static void efx_ef10_rx_restore_rss_contexts(struct efx_nic *efx)
+{
+	struct efx_rss_context *ctx;
+	int rc;
+
+	list_for_each_entry(ctx, &efx->rss_context.list, list) {
+		/* previous NIC RSS context is gone */
+		ctx->context_id = EFX_EF10_RSS_CONTEXT_INVALID;
+		/* so try to allocate a new one */
+		rc = efx_ef10_rx_push_rss_context_config(efx, ctx,
+							 ctx->rx_indir_table,
+							 ctx->rx_hash_key);
+		if (rc)
+			netif_warn(efx, probe, efx->net_dev,
+				   "failed to restore RSS context %u, rc=%d"
+				   "; RSS filters may fail to be applied\n",
+				   ctx->user_id, rc);
+	}
+}
+
 static int efx_ef10_pf_rx_push_rss_config(struct efx_nic *efx, bool user,
 					  const u32 *rx_indir_table,
 					  const u8 *key)
@@ -2956,7 +3007,7 @@ static int efx_ef10_pf_rx_push_rss_config(struct efx_nic *efx, bool user,
 		return 0;
 
 	if (!key)
-		key = efx->rx_hash_key;
+		key = efx->rss_context.rx_hash_key;
 
 	rc = efx_ef10_rx_push_exclusive_rss_config(efx, rx_indir_table, key);
 
@@ -2965,7 +3016,8 @@ static int efx_ef10_pf_rx_push_rss_config(struct efx_nic *efx, bool user,
 		bool mismatch = false;
 		size_t i;
 
-		for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table) && !mismatch;
+		for (i = 0;
+		     i < ARRAY_SIZE(efx->rss_context.rx_indir_table) && !mismatch;
 		     i++)
 			mismatch = rx_indir_table[i] !=
 				ethtool_rxfh_indir_default(i, efx->rss_spread);
@@ -3000,11 +3052,9 @@ static int efx_ef10_vf_rx_push_rss_config(struct efx_nic *efx, bool user,
 					  const u8 *key
 					  __attribute__ ((unused)))
 {
-	struct efx_ef10_nic_data *nic_data = efx->nic_data;
-
 	if (user)
 		return -EOPNOTSUPP;
-	if (nic_data->rx_rss_context != EFX_EF10_RSS_CONTEXT_INVALID)
+	if (efx->rss_context.context_id != EFX_EF10_RSS_CONTEXT_INVALID)
 		return 0;
 	return efx_ef10_rx_push_shared_rss_config(efx, NULL);
 }
@@ -4109,6 +4159,7 @@ efx_ef10_filter_push_prep_set_match_fields(struct efx_nic *efx,
 static void efx_ef10_filter_push_prep(struct efx_nic *efx,
 				      const struct efx_filter_spec *spec,
 				      efx_dword_t *inbuf, u64 handle,
+				      struct efx_rss_context *ctx,
 				      bool replacing)
 {
 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
@@ -4116,11 +4167,16 @@ static void efx_ef10_filter_push_prep(struct efx_nic *efx,
 
 	memset(inbuf, 0, MC_CMD_FILTER_OP_EXT_IN_LEN);
 
-	/* Remove RSS flag if we don't have an RSS context. */
-	if (flags & EFX_FILTER_FLAG_RX_RSS &&
-	    spec->rss_context == EFX_FILTER_RSS_CONTEXT_DEFAULT &&
-	    nic_data->rx_rss_context == EFX_EF10_RSS_CONTEXT_INVALID)
-		flags &= ~EFX_FILTER_FLAG_RX_RSS;
+	/* If RSS filter, caller better have given us an RSS context */
+	if (flags & EFX_FILTER_FLAG_RX_RSS) {
+		/* We don't have the ability to return an error, so we'll just
+		 * log a warning and disable RSS for the filter.
+		 */
+		if (WARN_ON_ONCE(!ctx))
+			flags &= ~EFX_FILTER_FLAG_RX_RSS;
+		else if (WARN_ON_ONCE(ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID))
+			flags &= ~EFX_FILTER_FLAG_RX_RSS;
+	}
 
 	if (replacing) {
 		MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP,
@@ -4146,21 +4202,18 @@ static void efx_ef10_filter_push_prep(struct efx_nic *efx,
 		       MC_CMD_FILTER_OP_IN_RX_MODE_RSS :
 		       MC_CMD_FILTER_OP_IN_RX_MODE_SIMPLE);
 	if (flags & EFX_FILTER_FLAG_RX_RSS)
-		MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_CONTEXT,
-			       spec->rss_context !=
-			       EFX_FILTER_RSS_CONTEXT_DEFAULT ?
-			       spec->rss_context : nic_data->rx_rss_context);
+		MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_CONTEXT, ctx->context_id);
 }
 
 static int efx_ef10_filter_push(struct efx_nic *efx,
-				const struct efx_filter_spec *spec,
-				u64 *handle, bool replacing)
+				const struct efx_filter_spec *spec, u64 *handle,
+				struct efx_rss_context *ctx, bool replacing)
 {
 	MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
 	MCDI_DECLARE_BUF(outbuf, MC_CMD_FILTER_OP_EXT_OUT_LEN);
 	int rc;
 
-	efx_ef10_filter_push_prep(efx, spec, inbuf, *handle, replacing);
+	efx_ef10_filter_push_prep(efx, spec, inbuf, *handle, ctx, replacing);
 	rc = efx_mcdi_rpc(efx, MC_CMD_FILTER_OP, inbuf, sizeof(inbuf),
 			  outbuf, sizeof(outbuf), NULL);
 	if (rc == 0)
@@ -4252,6 +4305,7 @@ static s32 efx_ef10_filter_insert(struct efx_nic *efx,
 	struct efx_ef10_filter_table *table = efx->filter_state;
 	DECLARE_BITMAP(mc_rem_map, EFX_EF10_FILTER_SEARCH_LIMIT);
 	struct efx_filter_spec *saved_spec;
+	struct efx_rss_context *ctx = NULL;
 	unsigned int match_pri, hash;
 	unsigned int priv_flags;
 	bool replacing = false;
@@ -4275,6 +4329,18 @@ static s32 efx_ef10_filter_insert(struct efx_nic *efx,
 	if (is_mc_recip)
 		bitmap_zero(mc_rem_map, EFX_EF10_FILTER_SEARCH_LIMIT);
 
+	if (spec->flags & EFX_FILTER_FLAG_RX_RSS) {
+		if (spec->rss_context)
+			ctx = efx_find_rss_context_entry(spec->rss_context,
+							 &efx->rss_context.list);
+		else
+			ctx = &efx->rss_context;
+		if (!ctx)
+			return -ENOENT;
+		if (ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID)
+			return -EOPNOTSUPP;
+	}
+
 	/* Find any existing filters with the same match tuple or
 	 * else a free slot to insert at.  If any of them are busy,
 	 * we have to wait and retry.
@@ -4390,7 +4456,7 @@ found:
 	spin_unlock_bh(&efx->filter_lock);
 
 	rc = efx_ef10_filter_push(efx, spec, &table->entry[ins_index].handle,
-				  replacing);
+				  ctx, replacing);
 
 	/* Finalise the software table entry */
 	spin_lock_bh(&efx->filter_lock);
@@ -4534,12 +4600,13 @@ static int efx_ef10_filter_remove_internal(struct efx_nic *efx,
 
 		new_spec.priority = EFX_FILTER_PRI_AUTO;
 		new_spec.flags = (EFX_FILTER_FLAG_RX |
-				  (efx_rss_enabled(efx) ?
+				  (efx_rss_active(&efx->rss_context) ?
 				   EFX_FILTER_FLAG_RX_RSS : 0));
 		new_spec.dmaq_id = 0;
-		new_spec.rss_context = EFX_FILTER_RSS_CONTEXT_DEFAULT;
+		new_spec.rss_context = 0;
 		rc = efx_ef10_filter_push(efx, &new_spec,
 					  &table->entry[filter_idx].handle,
+					  &efx->rss_context,
 					  true);
 
 		spin_lock_bh(&efx->filter_lock);
@@ -4783,7 +4850,8 @@ static s32 efx_ef10_filter_rfs_insert(struct efx_nic *efx,
 	cookie = replacing << 31 | ins_index << 16 | spec->dmaq_id;
 
 	efx_ef10_filter_push_prep(efx, spec, inbuf,
-				  table->entry[ins_index].handle, replacing);
+				  table->entry[ins_index].handle, NULL,
+				  replacing);
 	efx_mcdi_rpc_async(efx, MC_CMD_FILTER_OP, inbuf, sizeof(inbuf),
 			   MC_CMD_FILTER_OP_OUT_LEN,
 			   efx_ef10_filter_rfs_insert_complete, cookie);
@@ -5104,6 +5172,7 @@ static void efx_ef10_filter_table_restore(struct efx_nic *efx)
 	unsigned int invalid_filters = 0, failed = 0;
 	struct efx_ef10_filter_vlan *vlan;
 	struct efx_filter_spec *spec;
+	struct efx_rss_context *ctx;
 	unsigned int filter_idx;
 	u32 mcdi_flags;
 	int match_pri;
@@ -5133,17 +5202,34 @@ static void efx_ef10_filter_table_restore(struct efx_nic *efx)
 			invalid_filters++;
 			goto not_restored;
 		}
-		if (spec->rss_context != EFX_FILTER_RSS_CONTEXT_DEFAULT &&
-		    spec->rss_context != nic_data->rx_rss_context)
-			netif_warn(efx, drv, efx->net_dev,
-				   "Warning: unable to restore a filter with specific RSS context.\n");
+		if (spec->rss_context)
+			ctx = efx_find_rss_context_entry(spec->rss_context,
+							 &efx->rss_context.list);
+		else
+			ctx = &efx->rss_context;
+		if (spec->flags & EFX_FILTER_FLAG_RX_RSS) {
+			if (!ctx) {
+				netif_warn(efx, drv, efx->net_dev,
+					   "Warning: unable to restore a filter with nonexistent RSS context %u.\n",
+					   spec->rss_context);
+				invalid_filters++;
+				goto not_restored;
+			}
+			if (ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID) {
+				netif_warn(efx, drv, efx->net_dev,
+					   "Warning: unable to restore a filter with RSS context %u as it was not created.\n",
+					   spec->rss_context);
+				invalid_filters++;
+				goto not_restored;
+			}
+		}
 
 		table->entry[filter_idx].spec |= EFX_EF10_FILTER_FLAG_BUSY;
 		spin_unlock_bh(&efx->filter_lock);
 
 		rc = efx_ef10_filter_push(efx, spec,
 					  &table->entry[filter_idx].handle,
-					  false);
+					  ctx, false);
 		if (rc)
 			failed++;
 		spin_lock_bh(&efx->filter_lock);
@@ -6784,6 +6870,9 @@ const struct efx_nic_type efx_hunt_a0_nic_type = {
 	.tx_limit_len = efx_ef10_tx_limit_len,
 	.rx_push_rss_config = efx_ef10_pf_rx_push_rss_config,
 	.rx_pull_rss_config = efx_ef10_rx_pull_rss_config,
+	.rx_push_rss_context_config = efx_ef10_rx_push_rss_context_config,
+	.rx_pull_rss_context_config = efx_ef10_rx_pull_rss_context_config,
+	.rx_restore_rss_contexts = efx_ef10_rx_restore_rss_contexts,
 	.rx_probe = efx_ef10_rx_probe,
 	.rx_init = efx_ef10_rx_init,
 	.rx_remove = efx_ef10_rx_remove,
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 16757cfc5b29..7321a4cf6f4d 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -1353,12 +1353,13 @@ static void efx_fini_io(struct efx_nic *efx)
 		pci_disable_device(efx->pci_dev);
 }
 
-void efx_set_default_rx_indir_table(struct efx_nic *efx)
+void efx_set_default_rx_indir_table(struct efx_nic *efx,
+				    struct efx_rss_context *ctx)
 {
 	size_t i;
 
-	for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table); i++)
-		efx->rx_indir_table[i] =
+	for (i = 0; i < ARRAY_SIZE(ctx->rx_indir_table); i++)
+		ctx->rx_indir_table[i] =
 			ethtool_rxfh_indir_default(i, efx->rss_spread);
 }
 
@@ -1739,9 +1740,9 @@ static int efx_probe_nic(struct efx_nic *efx)
 	} while (rc == -EAGAIN);
 
 	if (efx->n_channels > 1)
-		netdev_rss_key_fill(&efx->rx_hash_key,
-				    sizeof(efx->rx_hash_key));
-	efx_set_default_rx_indir_table(efx);
+		netdev_rss_key_fill(efx->rss_context.rx_hash_key,
+				    sizeof(efx->rss_context.rx_hash_key));
+	efx_set_default_rx_indir_table(efx, &efx->rss_context);
 
 	netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels);
 	netif_set_real_num_rx_queues(efx->net_dev, efx->n_rx_channels);
@@ -2700,6 +2701,8 @@ int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok)
 			   " VFs may not function\n", rc);
 #endif
 
+	if (efx->type->rx_restore_rss_contexts)
+		efx->type->rx_restore_rss_contexts(efx);
 	down_read(&efx->filter_sem);
 	efx_restore_filters(efx);
 	up_read(&efx->filter_sem);
@@ -3003,6 +3006,7 @@ static int efx_init_struct(struct efx_nic *efx,
 		efx->type->rx_hash_offset - efx->type->rx_prefix_size;
 	efx->rx_packet_ts_offset =
 		efx->type->rx_ts_offset - efx->type->rx_prefix_size;
+	INIT_LIST_HEAD(&efx->rss_context.list);
 	spin_lock_init(&efx->stats_lock);
 	efx->vi_stride = EFX_DEFAULT_VI_STRIDE;
 	efx->num_mac_stats = MC_CMD_MAC_NSTATS;
@@ -3072,6 +3076,55 @@ void efx_update_sw_stats(struct efx_nic *efx, u64 *stats)
 	stats[GENERIC_STAT_rx_noskb_drops] = atomic_read(&efx->n_rx_noskb_drops);
 }
 
+/* RSS contexts.  We're using linked lists and crappy O(n) algorithms, because
+ * (a) this is an infrequent control-plane operation and (b) n is small (max 64)
+ */
+struct efx_rss_context *efx_alloc_rss_context_entry(struct list_head *head)
+{
+	struct efx_rss_context *ctx, *new;
+	u32 id = 1; /* Don't use zero, that refers to the master RSS context */
+
+	/* Search for first gap in the numbering */
+	list_for_each_entry(ctx, head, list) {
+		if (ctx->user_id != id)
+			break;
+		id++;
+		/* Check for wrap.  If this happens, we have nearly 2^32
+		 * allocated RSS contexts, which seems unlikely.
+		 */
+		if (WARN_ON_ONCE(!id))
+			return NULL;
+	}
+
+	/* Create the new entry */
+	new = kmalloc(sizeof(struct efx_rss_context), GFP_KERNEL);
+	if (!new)
+		return NULL;
+	new->context_id = EFX_EF10_RSS_CONTEXT_INVALID;
+	new->rx_hash_udp_4tuple = false;
+
+	/* Insert the new entry into the gap */
+	new->user_id = id;
+	list_add_tail(&new->list, &ctx->list);
+	return new;
+}
+
+struct efx_rss_context *efx_find_rss_context_entry(u32 id, struct list_head *head)
+{
+	struct efx_rss_context *ctx;
+
+	list_for_each_entry(ctx, head, list)
+		if (ctx->user_id == id)
+			return ctx;
+	return NULL;
+}
+
+void efx_free_rss_context_entry(struct efx_rss_context *ctx)
+{
+	list_del(&ctx->list);
+	kfree(ctx);
+}
+
 /**************************************************************************
  *
  * PCI interface
diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h
index 0cddc5ad77b1..3429ae3f3b08 100644
--- a/drivers/net/ethernet/sfc/efx.h
+++ b/drivers/net/ethernet/sfc/efx.h
@@ -34,7 +34,8 @@ extern unsigned int efx_piobuf_size;
 extern bool efx_separate_tx_channels;
 
 /* RX */
-void efx_set_default_rx_indir_table(struct efx_nic *efx);
+void efx_set_default_rx_indir_table(struct efx_nic *efx,
+				    struct efx_rss_context *ctx);
 void efx_rx_config_page_split(struct efx_nic *efx);
 int efx_probe_rx_queue(struct efx_rx_queue *rx_queue);
 void efx_remove_rx_queue(struct efx_rx_queue *rx_queue);
@@ -182,6 +183,15 @@ static inline void efx_filter_rfs_expire(struct efx_channel *channel) {}
 #endif
 bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec);
 
+/* RSS contexts */
+struct efx_rss_context *efx_alloc_rss_context_entry(struct list_head *list);
+struct efx_rss_context *efx_find_rss_context_entry(u32 id, struct list_head *list);
+void efx_free_rss_context_entry(struct efx_rss_context *ctx);
+static inline bool efx_rss_active(struct efx_rss_context *ctx)
+{
+	return ctx->context_id != EFX_EF10_RSS_CONTEXT_INVALID;
+}
+
 /* Channels */
 int efx_channel_dummy_op_int(struct efx_channel *channel);
 void efx_channel_dummy_op_void(struct efx_channel *channel);
diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c
index 4db2dc2bf52f..bb1c80d48d12 100644
--- a/drivers/net/ethernet/sfc/ethtool.c
+++ b/drivers/net/ethernet/sfc/ethtool.c
@@ -808,7 +808,8 @@ static inline void ip6_fill_mask(__be32 *mask)
 }
 
 static int efx_ethtool_get_class_rule(struct efx_nic *efx,
-				      struct ethtool_rx_flow_spec *rule)
+				      struct ethtool_rx_flow_spec *rule,
+				      u32 *rss_context)
 {
 	struct ethtool_tcpip4_spec *ip_entry = &rule->h_u.tcp_ip4_spec;
 	struct ethtool_tcpip4_spec *ip_mask = &rule->m_u.tcp_ip4_spec;
@@ -964,6 +965,11 @@ static int efx_ethtool_get_class_rule(struct efx_nic *efx,
 		rule->m_ext.vlan_tci = htons(0xfff);
 	}
 
+	if (spec.flags & EFX_FILTER_FLAG_RX_RSS) {
+		rule->flow_type |= FLOW_RSS;
+		*rss_context = spec.rss_context;
+	}
+
 	return rc;
 }
 
@@ -972,6 +978,8 @@ efx_ethtool_get_rxnfc(struct net_device *net_dev,
 		      struct ethtool_rxnfc *info, u32 *rule_locs)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
+	u32 rss_context = 0;
+	s32 rc;
 
 	switch (info->cmd) {
 	case ETHTOOL_GRXRINGS:
@@ -979,12 +987,20 @@ efx_ethtool_get_rxnfc(struct net_device *net_dev,
 		return 0;
 
 	case ETHTOOL_GRXFH: {
+		struct efx_rss_context *ctx = &efx->rss_context;
+
+		if (info->flow_type & FLOW_RSS && info->rss_context) {
+			ctx = efx_find_rss_context_entry(info->rss_context,
+							 &efx->rss_context.list);
+			if (!ctx)
+				return -ENOENT;
+		}
 		info->data = 0;
-		if (!efx->rss_active) /* No RSS */
+		if (!efx_rss_active(ctx)) /* No RSS */
 			return 0;
-		switch (info->flow_type) {
+		switch (info->flow_type & ~FLOW_RSS) {
 		case UDP_V4_FLOW:
-			if (efx->rx_hash_udp_4tuple)
+			if (ctx->rx_hash_udp_4tuple)
 				/* fall through */
 		case TCP_V4_FLOW:
 				info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
@@ -995,7 +1011,7 @@ efx_ethtool_get_rxnfc(struct net_device *net_dev,
 			info->data |= RXH_IP_SRC | RXH_IP_DST;
 			break;
 		case UDP_V6_FLOW:
-			if (efx->rx_hash_udp_4tuple)
+			if (ctx->rx_hash_udp_4tuple)
 				/* fall through */
 		case TCP_V6_FLOW:
 				info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
@@ -1023,10 +1039,14 @@ efx_ethtool_get_rxnfc(struct net_device *net_dev,
 	case ETHTOOL_GRXCLSRULE:
 		if (efx_filter_get_rx_id_limit(efx) == 0)
 			return -EOPNOTSUPP;
-		return efx_ethtool_get_class_rule(efx, &info->fs);
+		rc = efx_ethtool_get_class_rule(efx, &info->fs, &rss_context);
+		if (rc < 0)
+			return rc;
+		if (info->fs.flow_type & FLOW_RSS)
+			info->rss_context = rss_context;
+		return 0;
 
-	case ETHTOOL_GRXCLSRLALL: {
-		s32 rc;
+	case ETHTOOL_GRXCLSRLALL:
 		info->data = efx_filter_get_rx_id_limit(efx);
 		if (info->data == 0)
 			return -EOPNOTSUPP;
@@ -1036,7 +1056,6 @@ efx_ethtool_get_rxnfc(struct net_device *net_dev,
 			return rc;
 		info->rule_cnt = rc;
 		return 0;
-	}
 
 	default:
 		return -EOPNOTSUPP;
@@ -1054,7 +1073,8 @@ static inline bool ip6_mask_is_empty(__be32 mask[4])
 }
 
 static int efx_ethtool_set_class_rule(struct efx_nic *efx,
-				      struct ethtool_rx_flow_spec *rule)
+				      struct ethtool_rx_flow_spec *rule,
+				      u32 rss_context)
 {
 	struct ethtool_tcpip4_spec *ip_entry = &rule->h_u.tcp_ip4_spec;
 	struct ethtool_tcpip4_spec *ip_mask = &rule->m_u.tcp_ip4_spec;
@@ -1066,6 +1086,7 @@ static int efx_ethtool_set_class_rule(struct efx_nic *efx,
 	struct ethtool_usrip6_spec *uip6_mask = &rule->m_u.usr_ip6_spec;
 	struct ethhdr *mac_entry = &rule->h_u.ether_spec;
 	struct ethhdr *mac_mask = &rule->m_u.ether_spec;
+	enum efx_filter_flags flags = 0;
 	struct efx_filter_spec spec;
 	int rc;
 
@@ -1084,12 +1105,19 @@ static int efx_ethtool_set_class_rule(struct efx_nic *efx,
 	     rule->m_ext.data[1]))
 		return -EINVAL;
 
-	efx_filter_init_rx(&spec, EFX_FILTER_PRI_MANUAL,
-			   efx->rx_scatter ? EFX_FILTER_FLAG_RX_SCATTER : 0,
+	if (efx->rx_scatter)
+		flags |= EFX_FILTER_FLAG_RX_SCATTER;
+	if (rule->flow_type & FLOW_RSS)
+		flags |= EFX_FILTER_FLAG_RX_RSS;
+
+	efx_filter_init_rx(&spec, EFX_FILTER_PRI_MANUAL, flags,
 			   (rule->ring_cookie == RX_CLS_FLOW_DISC) ?
 			   EFX_FILTER_RX_DMAQ_ID_DROP : rule->ring_cookie);
 
-	switch (rule->flow_type & ~FLOW_EXT) {
+	if (rule->flow_type & FLOW_RSS)
+		spec.rss_context = rss_context;
+
+	switch (rule->flow_type & ~(FLOW_EXT | FLOW_RSS)) {
 	case TCP_V4_FLOW:
 	case UDP_V4_FLOW:
 		spec.match_flags = (EFX_FILTER_MATCH_ETHER_TYPE |
@@ -1265,7 +1293,8 @@ static int efx_ethtool_set_rxnfc(struct net_device *net_dev,
 
 	switch (info->cmd) {
 	case ETHTOOL_SRXCLSRLINS:
-		return efx_ethtool_set_class_rule(efx, &info->fs);
+		return efx_ethtool_set_class_rule(efx, &info->fs,
+						  info->rss_context);
 
 	case ETHTOOL_SRXCLSRLDEL:
 		return efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_MANUAL,
@@ -1280,7 +1309,9 @@ static u32 efx_ethtool_get_rxfh_indir_size(struct net_device *net_dev)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
 
-	return (efx->n_rx_channels == 1) ? 0 : ARRAY_SIZE(efx->rx_indir_table);
+	if (efx->n_rx_channels == 1)
+		return 0;
+	return ARRAY_SIZE(efx->rss_context.rx_indir_table);
 }
 
 static u32 efx_ethtool_get_rxfh_key_size(struct net_device *net_dev)
@@ -1303,9 +1334,11 @@ static int efx_ethtool_get_rxfh(struct net_device *net_dev, u32 *indir, u8 *key,
 	if (hfunc)
 		*hfunc = ETH_RSS_HASH_TOP;
 	if (indir)
-		memcpy(indir, efx->rx_indir_table, sizeof(efx->rx_indir_table));
+		memcpy(indir, efx->rss_context.rx_indir_table,
+		       sizeof(efx->rss_context.rx_indir_table));
 	if (key)
-		memcpy(key, efx->rx_hash_key, efx->type->rx_hash_key_size);
+		memcpy(key, efx->rss_context.rx_hash_key,
+		       efx->type->rx_hash_key_size);
 	return 0;
 }
 
@@ -1321,13 +1354,93 @@ static int efx_ethtool_set_rxfh(struct net_device *net_dev, const u32 *indir,
 		return 0;
 
 	if (!key)
-		key = efx->rx_hash_key;
+		key = efx->rss_context.rx_hash_key;
 	if (!indir)
-		indir = efx->rx_indir_table;
+		indir = efx->rss_context.rx_indir_table;
 
 	return efx->type->rx_push_rss_config(efx, true, indir, key);
 }
 
+static int efx_ethtool_get_rxfh_context(struct net_device *net_dev, u32 *indir,
+					u8 *key, u8 *hfunc, u32 rss_context)
+{
+	struct efx_nic *efx = netdev_priv(net_dev);
+	struct efx_rss_context *ctx;
+	int rc;
+
+	if (!efx->type->rx_pull_rss_context_config)
+		return -EOPNOTSUPP;
+	ctx = efx_find_rss_context_entry(rss_context, &efx->rss_context.list);
+	if (!ctx)
+		return -ENOENT;
+	rc = efx->type->rx_pull_rss_context_config(efx, ctx);
+	if (rc)
+		return rc;
+
+	if (hfunc)
+		*hfunc = ETH_RSS_HASH_TOP;
+	if (indir)
+		memcpy(indir, ctx->rx_indir_table, sizeof(ctx->rx_indir_table));
+	if (key)
+		memcpy(key, ctx->rx_hash_key, efx->type->rx_hash_key_size);
+	return 0;
+}
+
+static int efx_ethtool_set_rxfh_context(struct net_device *net_dev,
+					const u32 *indir, const u8 *key,
+					const u8 hfunc, u32 *rss_context,
+					bool delete)
+{
+	struct efx_nic *efx = netdev_priv(net_dev);
+	struct efx_rss_context *ctx;
+	bool allocated = false;
+	int rc;
+
+	if (!efx->type->rx_push_rss_context_config)
+		return -EOPNOTSUPP;
+	/* Hash function is Toeplitz, cannot be changed */
+	if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
+		return -EOPNOTSUPP;
+	if (*rss_context == ETH_RXFH_CONTEXT_ALLOC) {
+		if (delete)
+			/* alloc + delete == Nothing to do */
+			return -EINVAL;
+		ctx = efx_alloc_rss_context_entry(&efx->rss_context.list);
+		if (!ctx)
+			return -ENOMEM;
+		ctx->context_id = EFX_EF10_RSS_CONTEXT_INVALID;
+		/* Initialise indir table and key to defaults */
+		efx_set_default_rx_indir_table(efx, ctx);
+		netdev_rss_key_fill(ctx->rx_hash_key, sizeof(ctx->rx_hash_key));
+		allocated = true;
+	} else {
+		ctx = efx_find_rss_context_entry(*rss_context,
+						 &efx->rss_context.list);
+		if (!ctx)
+			return -ENOENT;
+	}
+
+	if (delete) {
+		/* delete this context */
+		rc = efx->type->rx_push_rss_context_config(efx, ctx, NULL, NULL);
+		if (!rc)
+			efx_free_rss_context_entry(ctx);
+		return rc;
+	}
+
+	if (!key)
+		key = ctx->rx_hash_key;
+	if (!indir)
+		indir = ctx->rx_indir_table;
+
+	rc = efx->type->rx_push_rss_context_config(efx, ctx, indir, key);
+	if (rc && allocated)
+		efx_free_rss_context_entry(ctx);
+	else
+		*rss_context = ctx->user_id;
+	return rc;
+}
+
 static int efx_ethtool_get_ts_info(struct net_device *net_dev,
 				   struct ethtool_ts_info *ts_info)
 {
@@ -1375,6 +1488,36 @@ static int efx_ethtool_get_module_info(struct net_device *net_dev,
 	return ret;
 }
 
+static int efx_ethtool_get_fecparam(struct net_device *net_dev,
+				    struct ethtool_fecparam *fecparam)
+{
+	struct efx_nic *efx = netdev_priv(net_dev);
+	int rc;
+
+	if (!efx->phy_op || !efx->phy_op->get_fecparam)
+		return -EOPNOTSUPP;
+	mutex_lock(&efx->mac_lock);
+	rc = efx->phy_op->get_fecparam(efx, fecparam);
+	mutex_unlock(&efx->mac_lock);
+
+	return rc;
+}
+
+static int efx_ethtool_set_fecparam(struct net_device *net_dev,
+				    struct ethtool_fecparam *fecparam)
+{
+	struct efx_nic *efx = netdev_priv(net_dev);
+	int rc;
+
+	if (!efx->phy_op || !efx->phy_op->get_fecparam)
+		return -EOPNOTSUPP;
+	mutex_lock(&efx->mac_lock);
+	rc = efx->phy_op->set_fecparam(efx, fecparam);
+	mutex_unlock(&efx->mac_lock);
+
+	return rc;
+}
+
 const struct ethtool_ops efx_ethtool_ops = {
 	.get_drvinfo		= efx_ethtool_get_drvinfo,
 	.get_regs_len		= efx_ethtool_get_regs_len,
@@ -1403,9 +1546,13 @@ const struct ethtool_ops efx_ethtool_ops = {
 	.get_rxfh_key_size	= efx_ethtool_get_rxfh_key_size,
 	.get_rxfh		= efx_ethtool_get_rxfh,
 	.set_rxfh		= efx_ethtool_set_rxfh,
+	.get_rxfh_context	= efx_ethtool_get_rxfh_context,
+	.set_rxfh_context	= efx_ethtool_set_rxfh_context,
 	.get_ts_info		= efx_ethtool_get_ts_info,
 	.get_module_info	= efx_ethtool_get_module_info,
 	.get_module_eeprom	= efx_ethtool_get_module_eeprom,
 	.get_link_ksettings	= efx_ethtool_get_link_ksettings,
 	.set_link_ksettings	= efx_ethtool_set_link_ksettings,
+	.get_fecparam		= efx_ethtool_get_fecparam,
+	.set_fecparam		= efx_ethtool_set_fecparam,
 };
diff --git a/drivers/net/ethernet/sfc/falcon/enum.h b/drivers/net/ethernet/sfc/falcon/enum.h
index 30a1136fc909..4824fcf5c3d4 100644
--- a/drivers/net/ethernet/sfc/falcon/enum.h
+++ b/drivers/net/ethernet/sfc/falcon/enum.h
@@ -81,7 +81,6 @@ enum ef4_loopback_mode {
 			    (1 << LOOPBACK_XAUI) |		\
 			    (1 << LOOPBACK_GMII) |		\
 			    (1 << LOOPBACK_SGMII) |		\
-			    (1 << LOOPBACK_SGMII) |		\
 			    (1 << LOOPBACK_XGBR) |		\
 			    (1 << LOOPBACK_XFI) |		\
 			    (1 << LOOPBACK_XAUI_FAR) |		\
diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c
index 266b9bee1f3a..ad001e77d554 100644
--- a/drivers/net/ethernet/sfc/farch.c
+++ b/drivers/net/ethernet/sfc/farch.c
@@ -1630,12 +1630,12 @@ void efx_farch_rx_push_indir_table(struct efx_nic *efx)
 	size_t i = 0;
 	efx_dword_t dword;
 
-	BUILD_BUG_ON(ARRAY_SIZE(efx->rx_indir_table) !=
+	BUILD_BUG_ON(ARRAY_SIZE(efx->rss_context.rx_indir_table) !=
 		     FR_BZ_RX_INDIRECTION_TBL_ROWS);
 
 	for (i = 0; i < FR_BZ_RX_INDIRECTION_TBL_ROWS; i++) {
 		EFX_POPULATE_DWORD_1(dword, FRF_BZ_IT_QUEUE,
-				     efx->rx_indir_table[i]);
+				     efx->rss_context.rx_indir_table[i]);
 		efx_writed(efx, &dword,
 			   FR_BZ_RX_INDIRECTION_TBL +
 			   FR_BZ_RX_INDIRECTION_TBL_STEP * i);
@@ -1647,14 +1647,14 @@ void efx_farch_rx_pull_indir_table(struct efx_nic *efx)
 	size_t i = 0;
 	efx_dword_t dword;
 
-	BUILD_BUG_ON(ARRAY_SIZE(efx->rx_indir_table) !=
+	BUILD_BUG_ON(ARRAY_SIZE(efx->rss_context.rx_indir_table) !=
 		     FR_BZ_RX_INDIRECTION_TBL_ROWS);
 
 	for (i = 0; i < FR_BZ_RX_INDIRECTION_TBL_ROWS; i++) {
 		efx_readd(efx, &dword,
 			   FR_BZ_RX_INDIRECTION_TBL +
 			   FR_BZ_RX_INDIRECTION_TBL_STEP * i);
-		efx->rx_indir_table[i] = EFX_DWORD_FIELD(dword, FRF_BZ_IT_QUEUE);
+		efx->rss_context.rx_indir_table[i] = EFX_DWORD_FIELD(dword, FRF_BZ_IT_QUEUE);
 	}
 }
 
@@ -2032,8 +2032,7 @@ efx_farch_filter_from_gen_spec(struct efx_farch_filter_spec *spec,
 {
 	bool is_full = false;
 
-	if ((gen_spec->flags & EFX_FILTER_FLAG_RX_RSS) &&
-	    gen_spec->rss_context != EFX_FILTER_RSS_CONTEXT_DEFAULT)
+	if ((gen_spec->flags & EFX_FILTER_FLAG_RX_RSS) && gen_spec->rss_context)
 		return -EINVAL;
 
 	spec->priority = gen_spec->priority;
diff --git a/drivers/net/ethernet/sfc/filter.h b/drivers/net/ethernet/sfc/filter.h
index 8189a1cd973f..59021ad6d98d 100644
--- a/drivers/net/ethernet/sfc/filter.h
+++ b/drivers/net/ethernet/sfc/filter.h
@@ -125,7 +125,9 @@ enum efx_encap_type {
  * @match_flags: Match type flags, from &enum efx_filter_match_flags
  * @priority: Priority of the filter, from &enum efx_filter_priority
  * @flags: Miscellaneous flags, from &enum efx_filter_flags
- * @rss_context: RSS context to use, if %EFX_FILTER_FLAG_RX_RSS is set
+ * @rss_context: RSS context to use, if %EFX_FILTER_FLAG_RX_RSS is set.  This
+ *	is a user_id (with 0 meaning the driver/default RSS context), not an
+ *	MCFW context_id.
  * @dmaq_id: Source/target queue index, or %EFX_FILTER_RX_DMAQ_ID_DROP for
  *	an RX drop filter
  * @outer_vid: Outer VLAN ID to match, if %EFX_FILTER_MATCH_OUTER_VID is set
@@ -173,7 +175,6 @@ struct efx_filter_spec {
 };
 
 enum {
-	EFX_FILTER_RSS_CONTEXT_DEFAULT = 0xffffffff,
 	EFX_FILTER_RX_DMAQ_ID_DROP = 0xfff
 };
 
@@ -185,7 +186,7 @@ static inline void efx_filter_init_rx(struct efx_filter_spec *spec,
 	memset(spec, 0, sizeof(*spec));
 	spec->priority = priority;
 	spec->flags = EFX_FILTER_FLAG_RX | flags;
-	spec->rss_context = EFX_FILTER_RSS_CONTEXT_DEFAULT;
+	spec->rss_context = 0;
 	spec->dmaq_id = rxq_id;
 }
 
diff --git a/drivers/net/ethernet/sfc/mcdi_pcol.h b/drivers/net/ethernet/sfc/mcdi_pcol.h
index 869d76f8f589..3839eec783ea 100644
--- a/drivers/net/ethernet/sfc/mcdi_pcol.h
+++ b/drivers/net/ethernet/sfc/mcdi_pcol.h
@@ -273,7 +273,8 @@
 #define MC_CMD_ERR_NO_PRIVILEGE 0x1013
 /* Workaround 26807 could not be turned on/off because some functions
  * have already installed filters. See the comment at
- * MC_CMD_WORKAROUND_BUG26807. */
+ * MC_CMD_WORKAROUND_BUG26807.
+ * May also returned for other operations such as sub-variant switching. */
 #define MC_CMD_ERR_FILTERS_PRESENT 0x1014
 /* The clock whose frequency you've attempted to set set
  * doesn't exist on this NIC */
@@ -292,6 +293,10 @@
  * away.  This is distinct from MC_CMD_ERR_DATAPATH_DISABLED in that the
  * datapath absence may be temporary*/
 #define MC_CMD_ERR_NO_DATAPATH 0x1019
+/* The operation could not complete because some VIs are allocated */
+#define MC_CMD_ERR_VIS_PRESENT 0x101a
+/* The operation could not complete because some PIO buffers are allocated */
+#define MC_CMD_ERR_PIOBUFS_PRESENT 0x101b
 
 #define MC_CMD_ERR_CODE_OFST 0
 
@@ -312,10 +317,17 @@
 #define SIENA_MC_BOOTROM_COPYCODE_VEC (0x800 - 3 * 0x4)
 #define HUNT_MC_BOOTROM_COPYCODE_VEC (0x8000 - 3 * 0x4)
 #define MEDFORD_MC_BOOTROM_COPYCODE_VEC (0x10000 - 3 * 0x4)
-/* Points to the recovery mode entry point. */
+/* Points to the recovery mode entry point. Misnamed but kept for compatibility. */
 #define SIENA_MC_BOOTROM_NOFLASH_VEC (0x800 - 2 * 0x4)
 #define HUNT_MC_BOOTROM_NOFLASH_VEC (0x8000 - 2 * 0x4)
 #define MEDFORD_MC_BOOTROM_NOFLASH_VEC (0x10000 - 2 * 0x4)
+/* Points to the recovery mode entry point. Same as above, but the right name. */
+#define SIENA_MC_BOOTROM_RECOVERY_VEC (0x800 - 2 * 0x4)
+#define HUNT_MC_BOOTROM_RECOVERY_VEC (0x8000 - 2 * 0x4)
+#define MEDFORD_MC_BOOTROM_RECOVERY_VEC (0x10000 - 2 * 0x4)
+
+/* Points to noflash mode entry point. */
+#define MEDFORD_MC_BOOTROM_REAL_NOFLASH_VEC (0x10000 - 4 * 0x4)
 
 /* The command set exported by the boot ROM (MCDI v0) */
 #define MC_CMD_GET_VERSION_V0_SUPPORTED_FUNCS {		\
@@ -365,7 +377,7 @@
 #define       MCDI_EVENT_LEVEL_LBN 33
 #define       MCDI_EVENT_LEVEL_WIDTH 3
 /* enum: Info. */
-#define          MCDI_EVENT_LEVEL_INFO  0x0
+#define          MCDI_EVENT_LEVEL_INFO 0x0
 /* enum: Warning. */
 #define          MCDI_EVENT_LEVEL_WARN 0x1
 /* enum: Error. */
@@ -385,21 +397,21 @@
 #define        MCDI_EVENT_LINKCHANGE_SPEED_LBN 16
 #define        MCDI_EVENT_LINKCHANGE_SPEED_WIDTH 4
 /* enum: Link is down or link speed could not be determined */
-#define          MCDI_EVENT_LINKCHANGE_SPEED_UNKNOWN  0x0
+#define          MCDI_EVENT_LINKCHANGE_SPEED_UNKNOWN 0x0
 /* enum: 100Mbs */
-#define          MCDI_EVENT_LINKCHANGE_SPEED_100M  0x1
+#define          MCDI_EVENT_LINKCHANGE_SPEED_100M 0x1
 /* enum: 1Gbs */
-#define          MCDI_EVENT_LINKCHANGE_SPEED_1G  0x2
+#define          MCDI_EVENT_LINKCHANGE_SPEED_1G 0x2
 /* enum: 10Gbs */
-#define          MCDI_EVENT_LINKCHANGE_SPEED_10G  0x3
+#define          MCDI_EVENT_LINKCHANGE_SPEED_10G 0x3
 /* enum: 40Gbs */
-#define          MCDI_EVENT_LINKCHANGE_SPEED_40G  0x4
+#define          MCDI_EVENT_LINKCHANGE_SPEED_40G 0x4
 /* enum: 25Gbs */
-#define          MCDI_EVENT_LINKCHANGE_SPEED_25G  0x5
+#define          MCDI_EVENT_LINKCHANGE_SPEED_25G 0x5
 /* enum: 50Gbs */
-#define          MCDI_EVENT_LINKCHANGE_SPEED_50G  0x6
+#define          MCDI_EVENT_LINKCHANGE_SPEED_50G 0x6
 /* enum: 100Gbs */
-#define          MCDI_EVENT_LINKCHANGE_SPEED_100G  0x7
+#define          MCDI_EVENT_LINKCHANGE_SPEED_100G 0x7
 #define        MCDI_EVENT_LINKCHANGE_FCNTL_LBN 20
 #define        MCDI_EVENT_LINKCHANGE_FCNTL_WIDTH 4
 #define        MCDI_EVENT_LINKCHANGE_LINK_FLAGS_LBN 24
@@ -606,23 +618,23 @@
 /* enum: Transmit error */
 #define          MCDI_EVENT_CODE_TX_ERR 0xb
 /* enum: Tx flush has completed */
-#define          MCDI_EVENT_CODE_TX_FLUSH  0xc
+#define          MCDI_EVENT_CODE_TX_FLUSH 0xc
 /* enum: PTP packet received timestamp */
-#define          MCDI_EVENT_CODE_PTP_RX  0xd
+#define          MCDI_EVENT_CODE_PTP_RX 0xd
 /* enum: PTP NIC failure */
-#define          MCDI_EVENT_CODE_PTP_FAULT  0xe
+#define          MCDI_EVENT_CODE_PTP_FAULT 0xe
 /* enum: PTP PPS event */
-#define          MCDI_EVENT_CODE_PTP_PPS  0xf
+#define          MCDI_EVENT_CODE_PTP_PPS 0xf
 /* enum: Rx flush has completed */
-#define          MCDI_EVENT_CODE_RX_FLUSH  0x10
+#define          MCDI_EVENT_CODE_RX_FLUSH 0x10
 /* enum: Receive error */
 #define          MCDI_EVENT_CODE_RX_ERR 0x11
 /* enum: AOE fault */
-#define          MCDI_EVENT_CODE_AOE  0x12
+#define          MCDI_EVENT_CODE_AOE 0x12
 /* enum: Network port calibration failed (VCAL). */
-#define          MCDI_EVENT_CODE_VCAL_FAIL  0x13
+#define          MCDI_EVENT_CODE_VCAL_FAIL 0x13
 /* enum: HW PPS event */
-#define          MCDI_EVENT_CODE_HW_PPS  0x14
+#define          MCDI_EVENT_CODE_HW_PPS 0x14
 /* enum: The MC has rebooted (huntington and later, siena uses CODE_REBOOT and
  * a different format)
  */
@@ -654,7 +666,7 @@
 /* enum: Artificial event generated by host and posted via MC for test
  * purposes.
  */
-#define          MCDI_EVENT_CODE_TESTGEN  0xfa
+#define          MCDI_EVENT_CODE_TESTGEN 0xfa
 #define       MCDI_EVENT_CMDDONE_DATA_OFST 0
 #define       MCDI_EVENT_CMDDONE_DATA_LEN 4
 #define       MCDI_EVENT_CMDDONE_DATA_LBN 0
@@ -784,7 +796,7 @@
 #define       FCDI_EVENT_LEVEL_LBN 33
 #define       FCDI_EVENT_LEVEL_WIDTH 3
 /* enum: Info. */
-#define          FCDI_EVENT_LEVEL_INFO  0x0
+#define          FCDI_EVENT_LEVEL_INFO 0x0
 /* enum: Warning. */
 #define          FCDI_EVENT_LEVEL_WARN 0x1
 /* enum: Error. */
@@ -916,7 +928,7 @@
 #define       MUM_EVENT_LEVEL_LBN 33
 #define       MUM_EVENT_LEVEL_WIDTH 3
 /* enum: Info. */
-#define          MUM_EVENT_LEVEL_INFO  0x0
+#define          MUM_EVENT_LEVEL_INFO 0x0
 /* enum: Warning. */
 #define          MUM_EVENT_LEVEL_WARN 0x1
 /* enum: Error. */
@@ -1002,7 +1014,9 @@
 
 /***********************************/
 /* MC_CMD_READ32
- * Read multiple 32byte words from MC memory.
+ * Read multiple 32byte words from MC memory. Note - this command really
+ * belongs to INSECURE category but is required by shmboot. The command handler
+ * has additional checks to reject insecure calls.
  */
 #define MC_CMD_READ32 0x1
 
@@ -1050,7 +1064,9 @@
 
 /***********************************/
 /* MC_CMD_COPYCODE
- * Copy MC code between two locations and jump.
+ * Copy MC code between two locations and jump. Note - this command really
+ * belongs to INSECURE category but is required by shmboot. The command handler
+ * has additional checks to reject insecure calls.
  */
 #define MC_CMD_COPYCODE 0x3
 
@@ -1139,7 +1155,7 @@
 #define       MC_CMD_GET_BOOT_STATUS_OUT_BOOT_OFFSET_OFST 0
 #define       MC_CMD_GET_BOOT_STATUS_OUT_BOOT_OFFSET_LEN 4
 /* enum: indicates that the MC wasn't flash booted */
-#define          MC_CMD_GET_BOOT_STATUS_OUT_BOOT_OFFSET_NULL  0xdeadbeef
+#define          MC_CMD_GET_BOOT_STATUS_OUT_BOOT_OFFSET_NULL 0xdeadbeef
 #define       MC_CMD_GET_BOOT_STATUS_OUT_FLAGS_OFST 4
 #define       MC_CMD_GET_BOOT_STATUS_OUT_FLAGS_LEN 4
 #define        MC_CMD_GET_BOOT_STATUS_OUT_FLAGS_WATCHDOG_LBN 0
@@ -1555,11 +1571,10 @@
 #define       MC_CMD_PTP_IN_MANFTEST_PACKET_TEST_ENABLE_OFST 8
 #define       MC_CMD_PTP_IN_MANFTEST_PACKET_TEST_ENABLE_LEN 4
 
-/* MC_CMD_PTP_IN_RESET_STATS msgrequest */
+/* MC_CMD_PTP_IN_RESET_STATS msgrequest: Reset PTP statistics */
 #define    MC_CMD_PTP_IN_RESET_STATS_LEN 8
 /*            MC_CMD_PTP_IN_CMD_OFST 0 */
 /*            MC_CMD_PTP_IN_CMD_LEN 4 */
-/* Reset PTP statistics */
 /*            MC_CMD_PTP_IN_PERIPH_ID_OFST 4 */
 /*            MC_CMD_PTP_IN_PERIPH_ID_LEN 4 */
 
@@ -1710,11 +1725,10 @@
 /* enum: External. */
 #define          MC_CMD_PTP_CLK_SRC_EXTERNAL 0x1
 
-/* MC_CMD_PTP_IN_RST_CLK msgrequest */
+/* MC_CMD_PTP_IN_RST_CLK msgrequest: Reset value of Timer Reg. */
 #define    MC_CMD_PTP_IN_RST_CLK_LEN 8
 /*            MC_CMD_PTP_IN_CMD_OFST 0 */
 /*            MC_CMD_PTP_IN_CMD_LEN 4 */
-/* Reset value of Timer Reg. */
 /*            MC_CMD_PTP_IN_PERIPH_ID_OFST 4 */
 /*            MC_CMD_PTP_IN_PERIPH_ID_LEN 4 */
 
@@ -2687,8 +2701,16 @@
 #define       MC_CMD_DRV_ATTACH_IN_NEW_STATE_LEN 4
 #define        MC_CMD_DRV_ATTACH_LBN 0
 #define        MC_CMD_DRV_ATTACH_WIDTH 1
+#define        MC_CMD_DRV_ATTACH_IN_ATTACH_LBN 0
+#define        MC_CMD_DRV_ATTACH_IN_ATTACH_WIDTH 1
 #define        MC_CMD_DRV_PREBOOT_LBN 1
 #define        MC_CMD_DRV_PREBOOT_WIDTH 1
+#define        MC_CMD_DRV_ATTACH_IN_PREBOOT_LBN 1
+#define        MC_CMD_DRV_ATTACH_IN_PREBOOT_WIDTH 1
+#define        MC_CMD_DRV_ATTACH_IN_SUBVARIANT_AWARE_LBN 2
+#define        MC_CMD_DRV_ATTACH_IN_SUBVARIANT_AWARE_WIDTH 1
+#define        MC_CMD_DRV_ATTACH_IN_WANT_VI_SPREADING_LBN 3
+#define        MC_CMD_DRV_ATTACH_IN_WANT_VI_SPREADING_WIDTH 1
 /* 1 to set new state, or 0 to just report the existing state */
 #define       MC_CMD_DRV_ATTACH_IN_UPDATE_OFST 4
 #define       MC_CMD_DRV_ATTACH_IN_UPDATE_LEN 4
@@ -2711,8 +2733,14 @@
  * support
  */
 #define          MC_CMD_FW_RULES_ENGINE 0x5
+/* enum: Prefer to use firmware with additional DPDK support */
+#define          MC_CMD_FW_DPDK 0x6
+/* enum: Prefer to use "l3xudp" custom datapath firmware (see SF-119495-PD and
+ * bug69716)
+ */
+#define          MC_CMD_FW_L3XUDP 0x7
 /* enum: Only this option is allowed for non-admin functions */
-#define          MC_CMD_FW_DONT_CARE  0xffffffff
+#define          MC_CMD_FW_DONT_CARE 0xffffffff
 
 /* MC_CMD_DRV_ATTACH_OUT msgresponse */
 #define    MC_CMD_DRV_ATTACH_OUT_LEN 4
@@ -2740,6 +2768,11 @@
  * refers to the Sorrento external FPGA port.
  */
 #define          MC_CMD_DRV_ATTACH_EXT_OUT_FLAG_NO_ACTIVE_PORT 0x3
+/* enum: If set, indicates that VI spreading is currently enabled. Will always
+ * indicate the current state, regardless of the value in the WANT_VI_SPREADING
+ * input.
+ */
+#define          MC_CMD_DRV_ATTACH_EXT_OUT_FLAG_VI_SPREADING_ENABLED 0x4
 
 
 /***********************************/
@@ -3294,83 +3327,83 @@
 #define       MC_CMD_GET_LOOPBACK_MODES_OUT_100M_LO_OFST 0
 #define       MC_CMD_GET_LOOPBACK_MODES_OUT_100M_HI_OFST 4
 /* enum: None. */
-#define          MC_CMD_LOOPBACK_NONE  0x0
+#define          MC_CMD_LOOPBACK_NONE 0x0
 /* enum: Data. */
-#define          MC_CMD_LOOPBACK_DATA  0x1
+#define          MC_CMD_LOOPBACK_DATA 0x1
 /* enum: GMAC. */
-#define          MC_CMD_LOOPBACK_GMAC  0x2
+#define          MC_CMD_LOOPBACK_GMAC 0x2
 /* enum: XGMII. */
 #define          MC_CMD_LOOPBACK_XGMII 0x3
 /* enum: XGXS. */
-#define          MC_CMD_LOOPBACK_XGXS  0x4
+#define          MC_CMD_LOOPBACK_XGXS 0x4
 /* enum: XAUI. */
-#define          MC_CMD_LOOPBACK_XAUI  0x5
+#define          MC_CMD_LOOPBACK_XAUI 0x5
 /* enum: GMII. */
-#define          MC_CMD_LOOPBACK_GMII  0x6
+#define          MC_CMD_LOOPBACK_GMII 0x6
 /* enum: SGMII. */
-#define          MC_CMD_LOOPBACK_SGMII  0x7
+#define          MC_CMD_LOOPBACK_SGMII 0x7
 /* enum: XGBR. */
-#define          MC_CMD_LOOPBACK_XGBR  0x8
+#define          MC_CMD_LOOPBACK_XGBR 0x8
 /* enum: XFI. */
-#define          MC_CMD_LOOPBACK_XFI  0x9
+#define          MC_CMD_LOOPBACK_XFI 0x9
 /* enum: XAUI Far. */
-#define          MC_CMD_LOOPBACK_XAUI_FAR  0xa
+#define          MC_CMD_LOOPBACK_XAUI_FAR 0xa
 /* enum: GMII Far. */
-#define          MC_CMD_LOOPBACK_GMII_FAR  0xb
+#define          MC_CMD_LOOPBACK_GMII_FAR 0xb
 /* enum: SGMII Far. */
-#define          MC_CMD_LOOPBACK_SGMII_FAR  0xc
+#define          MC_CMD_LOOPBACK_SGMII_FAR 0xc
 /* enum: XFI Far. */
-#define          MC_CMD_LOOPBACK_XFI_FAR  0xd
+#define          MC_CMD_LOOPBACK_XFI_FAR 0xd
 /* enum: GPhy. */
-#define          MC_CMD_LOOPBACK_GPHY  0xe
+#define          MC_CMD_LOOPBACK_GPHY 0xe
 /* enum: PhyXS. */
-#define          MC_CMD_LOOPBACK_PHYXS  0xf
+#define          MC_CMD_LOOPBACK_PHYXS 0xf
 /* enum: PCS. */
-#define          MC_CMD_LOOPBACK_PCS  0x10
+#define          MC_CMD_LOOPBACK_PCS 0x10
 /* enum: PMA-PMD. */
-#define          MC_CMD_LOOPBACK_PMAPMD  0x11
+#define          MC_CMD_LOOPBACK_PMAPMD 0x11
 /* enum: Cross-Port. */
-#define          MC_CMD_LOOPBACK_XPORT  0x12
+#define          MC_CMD_LOOPBACK_XPORT 0x12
 /* enum: XGMII-Wireside. */
-#define          MC_CMD_LOOPBACK_XGMII_WS  0x13
+#define          MC_CMD_LOOPBACK_XGMII_WS 0x13
 /* enum: XAUI Wireside. */
-#define          MC_CMD_LOOPBACK_XAUI_WS  0x14
+#define          MC_CMD_LOOPBACK_XAUI_WS 0x14
 /* enum: XAUI Wireside Far. */
-#define          MC_CMD_LOOPBACK_XAUI_WS_FAR  0x15
+#define          MC_CMD_LOOPBACK_XAUI_WS_FAR 0x15
 /* enum: XAUI Wireside near. */
-#define          MC_CMD_LOOPBACK_XAUI_WS_NEAR  0x16
+#define          MC_CMD_LOOPBACK_XAUI_WS_NEAR 0x16
 /* enum: GMII Wireside. */
-#define          MC_CMD_LOOPBACK_GMII_WS  0x17
+#define          MC_CMD_LOOPBACK_GMII_WS 0x17
 /* enum: XFI Wireside. */
-#define          MC_CMD_LOOPBACK_XFI_WS  0x18
+#define          MC_CMD_LOOPBACK_XFI_WS 0x18
 /* enum: XFI Wireside Far. */
-#define          MC_CMD_LOOPBACK_XFI_WS_FAR  0x19
+#define          MC_CMD_LOOPBACK_XFI_WS_FAR 0x19
 /* enum: PhyXS Wireside. */
-#define          MC_CMD_LOOPBACK_PHYXS_WS  0x1a
+#define          MC_CMD_LOOPBACK_PHYXS_WS 0x1a
 /* enum: PMA lanes MAC-Serdes. */
-#define          MC_CMD_LOOPBACK_PMA_INT  0x1b
+#define          MC_CMD_LOOPBACK_PMA_INT 0x1b
 /* enum: KR Serdes Parallel (Encoder). */
-#define          MC_CMD_LOOPBACK_SD_NEAR  0x1c
+#define          MC_CMD_LOOPBACK_SD_NEAR 0x1c
 /* enum: KR Serdes Serial. */
-#define          MC_CMD_LOOPBACK_SD_FAR  0x1d
+#define          MC_CMD_LOOPBACK_SD_FAR 0x1d
 /* enum: PMA lanes MAC-Serdes Wireside. */
-#define          MC_CMD_LOOPBACK_PMA_INT_WS  0x1e
+#define          MC_CMD_LOOPBACK_PMA_INT_WS 0x1e
 /* enum: KR Serdes Parallel Wireside (Full PCS). */
-#define          MC_CMD_LOOPBACK_SD_FEP2_WS  0x1f
+#define          MC_CMD_LOOPBACK_SD_FEP2_WS 0x1f
 /* enum: KR Serdes Parallel Wireside (Sym Aligner to TX). */
-#define          MC_CMD_LOOPBACK_SD_FEP1_5_WS  0x20
+#define          MC_CMD_LOOPBACK_SD_FEP1_5_WS 0x20
 /* enum: KR Serdes Parallel Wireside (Deserializer to Serializer). */
-#define          MC_CMD_LOOPBACK_SD_FEP_WS  0x21
+#define          MC_CMD_LOOPBACK_SD_FEP_WS 0x21
 /* enum: KR Serdes Serial Wireside. */
-#define          MC_CMD_LOOPBACK_SD_FES_WS  0x22
+#define          MC_CMD_LOOPBACK_SD_FES_WS 0x22
 /* enum: Near side of AOE Siena side port */
-#define          MC_CMD_LOOPBACK_AOE_INT_NEAR  0x23
+#define          MC_CMD_LOOPBACK_AOE_INT_NEAR 0x23
 /* enum: Medford Wireside datapath loopback */
-#define          MC_CMD_LOOPBACK_DATA_WS  0x24
+#define          MC_CMD_LOOPBACK_DATA_WS 0x24
 /* enum: Force link up without setting up any physical loopback (snapper use
  * only)
  */
-#define          MC_CMD_LOOPBACK_FORCE_EXT_LINK  0x25
+#define          MC_CMD_LOOPBACK_FORCE_EXT_LINK 0x25
 /* Supported loopbacks. */
 #define       MC_CMD_GET_LOOPBACK_MODES_OUT_1G_OFST 8
 #define       MC_CMD_GET_LOOPBACK_MODES_OUT_1G_LEN 8
@@ -3410,83 +3443,83 @@
 #define       MC_CMD_GET_LOOPBACK_MODES_OUT_V2_100M_LO_OFST 0
 #define       MC_CMD_GET_LOOPBACK_MODES_OUT_V2_100M_HI_OFST 4
 /* enum: None. */
-/*               MC_CMD_LOOPBACK_NONE  0x0 */
+/*               MC_CMD_LOOPBACK_NONE 0x0 */
 /* enum: Data. */
-/*               MC_CMD_LOOPBACK_DATA  0x1 */
+/*               MC_CMD_LOOPBACK_DATA 0x1 */
 /* enum: GMAC. */
-/*               MC_CMD_LOOPBACK_GMAC  0x2 */
+/*               MC_CMD_LOOPBACK_GMAC 0x2 */
 /* enum: XGMII. */
 /*               MC_CMD_LOOPBACK_XGMII 0x3 */
 /* enum: XGXS. */
-/*               MC_CMD_LOOPBACK_XGXS  0x4 */
+/*               MC_CMD_LOOPBACK_XGXS 0x4 */
 /* enum: XAUI. */
-/*               MC_CMD_LOOPBACK_XAUI  0x5 */
+/*               MC_CMD_LOOPBACK_XAUI 0x5 */
 /* enum: GMII. */
-/*               MC_CMD_LOOPBACK_GMII  0x6 */
+/*               MC_CMD_LOOPBACK_GMII 0x6 */
 /* enum: SGMII. */
-/*               MC_CMD_LOOPBACK_SGMII  0x7 */
+/*               MC_CMD_LOOPBACK_SGMII 0x7 */
 /* enum: XGBR. */
-/*               MC_CMD_LOOPBACK_XGBR  0x8 */
+/*               MC_CMD_LOOPBACK_XGBR 0x8 */
 /* enum: XFI. */
-/*               MC_CMD_LOOPBACK_XFI  0x9 */
+/*               MC_CMD_LOOPBACK_XFI 0x9 */
 /* enum: XAUI Far. */
-/*               MC_CMD_LOOPBACK_XAUI_FAR  0xa */
+/*               MC_CMD_LOOPBACK_XAUI_FAR 0xa */
 /* enum: GMII Far. */
-/*               MC_CMD_LOOPBACK_GMII_FAR  0xb */
+/*               MC_CMD_LOOPBACK_GMII_FAR 0xb */
 /* enum: SGMII Far. */
-/*               MC_CMD_LOOPBACK_SGMII_FAR  0xc */
+/*               MC_CMD_LOOPBACK_SGMII_FAR 0xc */
 /* enum: XFI Far. */
-/*               MC_CMD_LOOPBACK_XFI_FAR  0xd */
+/*               MC_CMD_LOOPBACK_XFI_FAR 0xd */
 /* enum: GPhy. */
-/*               MC_CMD_LOOPBACK_GPHY  0xe */
+/*               MC_CMD_LOOPBACK_GPHY 0xe */
 /* enum: PhyXS. */
-/*               MC_CMD_LOOPBACK_PHYXS  0xf */
+/*               MC_CMD_LOOPBACK_PHYXS 0xf */
 /* enum: PCS. */
-/*               MC_CMD_LOOPBACK_PCS  0x10 */
+/*               MC_CMD_LOOPBACK_PCS 0x10 */
 /* enum: PMA-PMD. */
-/*               MC_CMD_LOOPBACK_PMAPMD  0x11 */
+/*               MC_CMD_LOOPBACK_PMAPMD 0x11 */
 /* enum: Cross-Port. */
-/*               MC_CMD_LOOPBACK_XPORT  0x12 */
+/*               MC_CMD_LOOPBACK_XPORT 0x12 */
 /* enum: XGMII-Wireside. */
-/*               MC_CMD_LOOPBACK_XGMII_WS  0x13 */
+/*               MC_CMD_LOOPBACK_XGMII_WS 0x13 */
 /* enum: XAUI Wireside. */
-/*               MC_CMD_LOOPBACK_XAUI_WS  0x14 */
+/*               MC_CMD_LOOPBACK_XAUI_WS 0x14 */
 /* enum: XAUI Wireside Far. */
-/*               MC_CMD_LOOPBACK_XAUI_WS_FAR  0x15 */
+/*               MC_CMD_LOOPBACK_XAUI_WS_FAR 0x15 */
 /* enum: XAUI Wireside near. */
-/*               MC_CMD_LOOPBACK_XAUI_WS_NEAR  0x16 */
+/*               MC_CMD_LOOPBACK_XAUI_WS_NEAR 0x16 */
 /* enum: GMII Wireside. */
-/*               MC_CMD_LOOPBACK_GMII_WS  0x17 */
+/*               MC_CMD_LOOPBACK_GMII_WS 0x17 */
 /* enum: XFI Wireside. */
-/*               MC_CMD_LOOPBACK_XFI_WS  0x18 */
+/*               MC_CMD_LOOPBACK_XFI_WS 0x18 */
 /* enum: XFI Wireside Far. */
-/*               MC_CMD_LOOPBACK_XFI_WS_FAR  0x19 */
+/*               MC_CMD_LOOPBACK_XFI_WS_FAR 0x19 */
 /* enum: PhyXS Wireside. */
-/*               MC_CMD_LOOPBACK_PHYXS_WS  0x1a */
+/*               MC_CMD_LOOPBACK_PHYXS_WS 0x1a */
 /* enum: PMA lanes MAC-Serdes. */
-/*               MC_CMD_LOOPBACK_PMA_INT  0x1b */
+/*               MC_CMD_LOOPBACK_PMA_INT 0x1b */
 /* enum: KR Serdes Parallel (Encoder). */
-/*               MC_CMD_LOOPBACK_SD_NEAR  0x1c */
+/*               MC_CMD_LOOPBACK_SD_NEAR 0x1c */
 /* enum: KR Serdes Serial. */
-/*               MC_CMD_LOOPBACK_SD_FAR  0x1d */
+/*               MC_CMD_LOOPBACK_SD_FAR 0x1d */
 /* enum: PMA lanes MAC-Serdes Wireside. */
-/*               MC_CMD_LOOPBACK_PMA_INT_WS  0x1e */
+/*               MC_CMD_LOOPBACK_PMA_INT_WS 0x1e */
 /* enum: KR Serdes Parallel Wireside (Full PCS). */
-/*               MC_CMD_LOOPBACK_SD_FEP2_WS  0x1f */
+/*               MC_CMD_LOOPBACK_SD_FEP2_WS 0x1f */
 /* enum: KR Serdes Parallel Wireside (Sym Aligner to TX). */
-/*               MC_CMD_LOOPBACK_SD_FEP1_5_WS  0x20 */
+/*               MC_CMD_LOOPBACK_SD_FEP1_5_WS 0x20 */
 /* enum: KR Serdes Parallel Wireside (Deserializer to Serializer). */
-/*               MC_CMD_LOOPBACK_SD_FEP_WS  0x21 */
+/*               MC_CMD_LOOPBACK_SD_FEP_WS 0x21 */
 /* enum: KR Serdes Serial Wireside. */
-/*               MC_CMD_LOOPBACK_SD_FES_WS  0x22 */
+/*               MC_CMD_LOOPBACK_SD_FES_WS 0x22 */
 /* enum: Near side of AOE Siena side port */
-/*               MC_CMD_LOOPBACK_AOE_INT_NEAR  0x23 */
+/*               MC_CMD_LOOPBACK_AOE_INT_NEAR 0x23 */
 /* enum: Medford Wireside datapath loopback */
-/*               MC_CMD_LOOPBACK_DATA_WS  0x24 */
+/*               MC_CMD_LOOPBACK_DATA_WS 0x24 */
 /* enum: Force link up without setting up any physical loopback (snapper use
  * only)
  */
-/*               MC_CMD_LOOPBACK_FORCE_EXT_LINK  0x25 */
+/*               MC_CMD_LOOPBACK_FORCE_EXT_LINK 0x25 */
 /* Supported loopbacks. */
 #define       MC_CMD_GET_LOOPBACK_MODES_OUT_V2_1G_OFST 8
 #define       MC_CMD_GET_LOOPBACK_MODES_OUT_V2_1G_LEN 8
@@ -3537,6 +3570,37 @@
 /*            Enum values, see field(s): */
 /*               100M */
 
+/* AN_TYPE structuredef: Auto-negotiation types defined in IEEE802.3 */
+#define    AN_TYPE_LEN 4
+#define       AN_TYPE_TYPE_OFST 0
+#define       AN_TYPE_TYPE_LEN 4
+/* enum: None, AN disabled or not supported */
+#define          MC_CMD_AN_NONE 0x0
+/* enum: Clause 28 - BASE-T */
+#define          MC_CMD_AN_CLAUSE28 0x1
+/* enum: Clause 37 - BASE-X */
+#define          MC_CMD_AN_CLAUSE37 0x2
+/* enum: Clause 73 - BASE-R startup protocol for backplane and copper cable
+ * assemblies. Includes Clause 72/Clause 92 link-training.
+ */
+#define          MC_CMD_AN_CLAUSE73 0x3
+#define       AN_TYPE_TYPE_LBN 0
+#define       AN_TYPE_TYPE_WIDTH 32
+
+/* FEC_TYPE structuredef: Forward error correction types defined in IEEE802.3
+ */
+#define    FEC_TYPE_LEN 4
+#define       FEC_TYPE_TYPE_OFST 0
+#define       FEC_TYPE_TYPE_LEN 4
+/* enum: No FEC */
+#define          MC_CMD_FEC_NONE 0x0
+/* enum: Clause 74 BASE-R FEC (a.k.a Firecode) */
+#define          MC_CMD_FEC_BASER 0x1
+/* enum: Clause 91/Clause 108 Reed-Solomon FEC */
+#define          MC_CMD_FEC_RS 0x2
+#define       FEC_TYPE_TYPE_LBN 0
+#define       FEC_TYPE_TYPE_WIDTH 32
+
 
 /***********************************/
 /* MC_CMD_GET_LINK
@@ -3552,10 +3616,14 @@
 
 /* MC_CMD_GET_LINK_OUT msgresponse */
 #define    MC_CMD_GET_LINK_OUT_LEN 28
-/* near-side advertised capabilities */
+/* Near-side advertised capabilities. Refer to
+ * MC_CMD_GET_PHY_CFG_OUT/SUPPORTED_CAP for bit definitions.
+ */
 #define       MC_CMD_GET_LINK_OUT_CAP_OFST 0
 #define       MC_CMD_GET_LINK_OUT_CAP_LEN 4
-/* link-partner advertised capabilities */
+/* Link-partner advertised capabilities. Refer to
+ * MC_CMD_GET_PHY_CFG_OUT/SUPPORTED_CAP for bit definitions.
+ */
 #define       MC_CMD_GET_LINK_OUT_LP_CAP_OFST 4
 #define       MC_CMD_GET_LINK_OUT_LP_CAP_LEN 4
 /* Autonegotiated speed in mbit/s. The link may still be down even if this
@@ -3598,6 +3666,97 @@
 #define        MC_CMD_MAC_FAULT_PENDING_RECONFIG_LBN 3
 #define        MC_CMD_MAC_FAULT_PENDING_RECONFIG_WIDTH 1
 
+/* MC_CMD_GET_LINK_OUT_V2 msgresponse: Extended link state information */
+#define    MC_CMD_GET_LINK_OUT_V2_LEN 44
+/* Near-side advertised capabilities. Refer to
+ * MC_CMD_GET_PHY_CFG_OUT/SUPPORTED_CAP for bit definitions.
+ */
+#define       MC_CMD_GET_LINK_OUT_V2_CAP_OFST 0
+#define       MC_CMD_GET_LINK_OUT_V2_CAP_LEN 4
+/* Link-partner advertised capabilities. Refer to
+ * MC_CMD_GET_PHY_CFG_OUT/SUPPORTED_CAP for bit definitions.
+ */
+#define       MC_CMD_GET_LINK_OUT_V2_LP_CAP_OFST 4
+#define       MC_CMD_GET_LINK_OUT_V2_LP_CAP_LEN 4
+/* Autonegotiated speed in mbit/s. The link may still be down even if this
+ * reads non-zero.
+ */
+#define       MC_CMD_GET_LINK_OUT_V2_LINK_SPEED_OFST 8
+#define       MC_CMD_GET_LINK_OUT_V2_LINK_SPEED_LEN 4
+/* Current loopback setting. */
+#define       MC_CMD_GET_LINK_OUT_V2_LOOPBACK_MODE_OFST 12
+#define       MC_CMD_GET_LINK_OUT_V2_LOOPBACK_MODE_LEN 4
+/*            Enum values, see field(s): */
+/*               MC_CMD_GET_LOOPBACK_MODES/MC_CMD_GET_LOOPBACK_MODES_OUT/100M */
+#define       MC_CMD_GET_LINK_OUT_V2_FLAGS_OFST 16
+#define       MC_CMD_GET_LINK_OUT_V2_FLAGS_LEN 4
+#define        MC_CMD_GET_LINK_OUT_V2_LINK_UP_LBN 0
+#define        MC_CMD_GET_LINK_OUT_V2_LINK_UP_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_FULL_DUPLEX_LBN 1
+#define        MC_CMD_GET_LINK_OUT_V2_FULL_DUPLEX_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_BPX_LINK_LBN 2
+#define        MC_CMD_GET_LINK_OUT_V2_BPX_LINK_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_PHY_LINK_LBN 3
+#define        MC_CMD_GET_LINK_OUT_V2_PHY_LINK_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_LINK_FAULT_RX_LBN 6
+#define        MC_CMD_GET_LINK_OUT_V2_LINK_FAULT_RX_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_LINK_FAULT_TX_LBN 7
+#define        MC_CMD_GET_LINK_OUT_V2_LINK_FAULT_TX_WIDTH 1
+/* This returns the negotiated flow control value. */
+#define       MC_CMD_GET_LINK_OUT_V2_FCNTL_OFST 20
+#define       MC_CMD_GET_LINK_OUT_V2_FCNTL_LEN 4
+/*            Enum values, see field(s): */
+/*               MC_CMD_SET_MAC/MC_CMD_SET_MAC_IN/FCNTL */
+#define       MC_CMD_GET_LINK_OUT_V2_MAC_FAULT_OFST 24
+#define       MC_CMD_GET_LINK_OUT_V2_MAC_FAULT_LEN 4
+/*             MC_CMD_MAC_FAULT_XGMII_LOCAL_LBN 0 */
+/*             MC_CMD_MAC_FAULT_XGMII_LOCAL_WIDTH 1 */
+/*             MC_CMD_MAC_FAULT_XGMII_REMOTE_LBN 1 */
+/*             MC_CMD_MAC_FAULT_XGMII_REMOTE_WIDTH 1 */
+/*             MC_CMD_MAC_FAULT_SGMII_REMOTE_LBN 2 */
+/*             MC_CMD_MAC_FAULT_SGMII_REMOTE_WIDTH 1 */
+/*             MC_CMD_MAC_FAULT_PENDING_RECONFIG_LBN 3 */
+/*             MC_CMD_MAC_FAULT_PENDING_RECONFIG_WIDTH 1 */
+/* True local device capabilities (taking into account currently used PMD/MDI,
+ * e.g. plugged-in module). In general, subset of
+ * MC_CMD_GET_PHY_CFG_OUT/SUPPORTED_CAP, but may include extra _FEC_REQUEST
+ * bits, if the PMD requires FEC. 0 if unknown (e.g. module unplugged). Equal
+ * to SUPPORTED_CAP for non-pluggable PMDs. Refer to
+ * MC_CMD_GET_PHY_CFG_OUT/SUPPORTED_CAP for bit definitions.
+ */
+#define       MC_CMD_GET_LINK_OUT_V2_LD_CAP_OFST 28
+#define       MC_CMD_GET_LINK_OUT_V2_LD_CAP_LEN 4
+/* Auto-negotiation type used on the link */
+#define       MC_CMD_GET_LINK_OUT_V2_AN_TYPE_OFST 32
+#define       MC_CMD_GET_LINK_OUT_V2_AN_TYPE_LEN 4
+/*            Enum values, see field(s): */
+/*               AN_TYPE/TYPE */
+/* Forward error correction used on the link */
+#define       MC_CMD_GET_LINK_OUT_V2_FEC_TYPE_OFST 36
+#define       MC_CMD_GET_LINK_OUT_V2_FEC_TYPE_LEN 4
+/*            Enum values, see field(s): */
+/*               FEC_TYPE/TYPE */
+#define       MC_CMD_GET_LINK_OUT_V2_EXT_FLAGS_OFST 40
+#define       MC_CMD_GET_LINK_OUT_V2_EXT_FLAGS_LEN 4
+#define        MC_CMD_GET_LINK_OUT_V2_PMD_MDI_CONNECTED_LBN 0
+#define        MC_CMD_GET_LINK_OUT_V2_PMD_MDI_CONNECTED_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_PMD_READY_LBN 1
+#define        MC_CMD_GET_LINK_OUT_V2_PMD_READY_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_PMD_LINK_UP_LBN 2
+#define        MC_CMD_GET_LINK_OUT_V2_PMD_LINK_UP_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_PMA_LINK_UP_LBN 3
+#define        MC_CMD_GET_LINK_OUT_V2_PMA_LINK_UP_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_PCS_LOCK_LBN 4
+#define        MC_CMD_GET_LINK_OUT_V2_PCS_LOCK_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_ALIGN_LOCK_LBN 5
+#define        MC_CMD_GET_LINK_OUT_V2_ALIGN_LOCK_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_HI_BER_LBN 6
+#define        MC_CMD_GET_LINK_OUT_V2_HI_BER_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_FEC_LOCK_LBN 7
+#define        MC_CMD_GET_LINK_OUT_V2_FEC_LOCK_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_AN_DONE_LBN 8
+#define        MC_CMD_GET_LINK_OUT_V2_AN_DONE_WIDTH 1
+
 
 /***********************************/
 /* MC_CMD_SET_LINK
@@ -3610,7 +3769,9 @@
 
 /* MC_CMD_SET_LINK_IN msgrequest */
 #define    MC_CMD_SET_LINK_IN_LEN 16
-/* ??? */
+/* Near-side advertised capabilities. Refer to
+ * MC_CMD_GET_PHY_CFG_OUT/SUPPORTED_CAP for bit definitions.
+ */
 #define       MC_CMD_SET_LINK_IN_CAP_OFST 0
 #define       MC_CMD_SET_LINK_IN_CAP_LEN 4
 /* Flags */
@@ -3650,9 +3811,9 @@
 /* Set LED state. */
 #define       MC_CMD_SET_ID_LED_IN_STATE_OFST 0
 #define       MC_CMD_SET_ID_LED_IN_STATE_LEN 4
-#define          MC_CMD_LED_OFF  0x0 /* enum */
-#define          MC_CMD_LED_ON  0x1 /* enum */
-#define          MC_CMD_LED_DEFAULT  0x2 /* enum */
+#define          MC_CMD_LED_OFF 0x0 /* enum */
+#define          MC_CMD_LED_ON 0x1 /* enum */
+#define          MC_CMD_LED_DEFAULT 0x2 /* enum */
 
 /* MC_CMD_SET_ID_LED_OUT msgresponse */
 #define    MC_CMD_SET_ID_LED_OUT_LEN 0
@@ -3802,53 +3963,53 @@
 #define       MC_CMD_PHY_STATS_OUT_NO_DMA_STATISTICS_LEN 4
 #define       MC_CMD_PHY_STATS_OUT_NO_DMA_STATISTICS_NUM MC_CMD_PHY_NSTATS
 /* enum: OUI. */
-#define          MC_CMD_OUI  0x0
+#define          MC_CMD_OUI 0x0
 /* enum: PMA-PMD Link Up. */
-#define          MC_CMD_PMA_PMD_LINK_UP  0x1
+#define          MC_CMD_PMA_PMD_LINK_UP 0x1
 /* enum: PMA-PMD RX Fault. */
-#define          MC_CMD_PMA_PMD_RX_FAULT  0x2
+#define          MC_CMD_PMA_PMD_RX_FAULT 0x2
 /* enum: PMA-PMD TX Fault. */
-#define          MC_CMD_PMA_PMD_TX_FAULT  0x3
+#define          MC_CMD_PMA_PMD_TX_FAULT 0x3
 /* enum: PMA-PMD Signal */
-#define          MC_CMD_PMA_PMD_SIGNAL  0x4
+#define          MC_CMD_PMA_PMD_SIGNAL 0x4
 /* enum: PMA-PMD SNR A. */
-#define          MC_CMD_PMA_PMD_SNR_A  0x5
+#define          MC_CMD_PMA_PMD_SNR_A 0x5
 /* enum: PMA-PMD SNR B. */
-#define          MC_CMD_PMA_PMD_SNR_B  0x6
+#define          MC_CMD_PMA_PMD_SNR_B 0x6
 /* enum: PMA-PMD SNR C. */
-#define          MC_CMD_PMA_PMD_SNR_C  0x7
+#define          MC_CMD_PMA_PMD_SNR_C 0x7
 /* enum: PMA-PMD SNR D. */
-#define          MC_CMD_PMA_PMD_SNR_D  0x8
+#define          MC_CMD_PMA_PMD_SNR_D 0x8
 /* enum: PCS Link Up. */
-#define          MC_CMD_PCS_LINK_UP  0x9
+#define          MC_CMD_PCS_LINK_UP 0x9
 /* enum: PCS RX Fault. */
-#define          MC_CMD_PCS_RX_FAULT  0xa
+#define          MC_CMD_PCS_RX_FAULT 0xa
 /* enum: PCS TX Fault. */
-#define          MC_CMD_PCS_TX_FAULT  0xb
+#define          MC_CMD_PCS_TX_FAULT 0xb
 /* enum: PCS BER. */
-#define          MC_CMD_PCS_BER  0xc
+#define          MC_CMD_PCS_BER 0xc
 /* enum: PCS Block Errors. */
-#define          MC_CMD_PCS_BLOCK_ERRORS  0xd
+#define          MC_CMD_PCS_BLOCK_ERRORS 0xd
 /* enum: PhyXS Link Up. */
-#define          MC_CMD_PHYXS_LINK_UP  0xe
+#define          MC_CMD_PHYXS_LINK_UP 0xe
 /* enum: PhyXS RX Fault. */
-#define          MC_CMD_PHYXS_RX_FAULT  0xf
+#define          MC_CMD_PHYXS_RX_FAULT 0xf
 /* enum: PhyXS TX Fault. */
-#define          MC_CMD_PHYXS_TX_FAULT  0x10
+#define          MC_CMD_PHYXS_TX_FAULT 0x10
 /* enum: PhyXS Align. */
-#define          MC_CMD_PHYXS_ALIGN  0x11
+#define          MC_CMD_PHYXS_ALIGN 0x11
 /* enum: PhyXS Sync. */
-#define          MC_CMD_PHYXS_SYNC  0x12
+#define          MC_CMD_PHYXS_SYNC 0x12
 /* enum: AN link-up. */
-#define          MC_CMD_AN_LINK_UP  0x13
+#define          MC_CMD_AN_LINK_UP 0x13
 /* enum: AN Complete. */
-#define          MC_CMD_AN_COMPLETE  0x14
+#define          MC_CMD_AN_COMPLETE 0x14
 /* enum: AN 10GBaseT Status. */
-#define          MC_CMD_AN_10GBT_STATUS  0x15
+#define          MC_CMD_AN_10GBT_STATUS 0x15
 /* enum: Clause 22 Link-Up. */
-#define          MC_CMD_CL22_LINK_UP  0x16
+#define          MC_CMD_CL22_LINK_UP 0x16
 /* enum: (Last entry) */
-#define          MC_CMD_PHY_NSTATS  0x17
+#define          MC_CMD_PHY_NSTATS 0x17
 
 
 /***********************************/
@@ -3910,139 +4071,139 @@
 #define       MC_CMD_MAC_STATS_OUT_NO_DMA_STATISTICS_LO_OFST 0
 #define       MC_CMD_MAC_STATS_OUT_NO_DMA_STATISTICS_HI_OFST 4
 #define       MC_CMD_MAC_STATS_OUT_NO_DMA_STATISTICS_NUM MC_CMD_MAC_NSTATS
-#define          MC_CMD_MAC_GENERATION_START  0x0 /* enum */
-#define          MC_CMD_MAC_DMABUF_START  0x1 /* enum */
-#define          MC_CMD_MAC_TX_PKTS  0x1 /* enum */
-#define          MC_CMD_MAC_TX_PAUSE_PKTS  0x2 /* enum */
-#define          MC_CMD_MAC_TX_CONTROL_PKTS  0x3 /* enum */
-#define          MC_CMD_MAC_TX_UNICAST_PKTS  0x4 /* enum */
-#define          MC_CMD_MAC_TX_MULTICAST_PKTS  0x5 /* enum */
-#define          MC_CMD_MAC_TX_BROADCAST_PKTS  0x6 /* enum */
-#define          MC_CMD_MAC_TX_BYTES  0x7 /* enum */
-#define          MC_CMD_MAC_TX_BAD_BYTES  0x8 /* enum */
-#define          MC_CMD_MAC_TX_LT64_PKTS  0x9 /* enum */
-#define          MC_CMD_MAC_TX_64_PKTS  0xa /* enum */
-#define          MC_CMD_MAC_TX_65_TO_127_PKTS  0xb /* enum */
-#define          MC_CMD_MAC_TX_128_TO_255_PKTS  0xc /* enum */
-#define          MC_CMD_MAC_TX_256_TO_511_PKTS  0xd /* enum */
-#define          MC_CMD_MAC_TX_512_TO_1023_PKTS  0xe /* enum */
-#define          MC_CMD_MAC_TX_1024_TO_15XX_PKTS  0xf /* enum */
-#define          MC_CMD_MAC_TX_15XX_TO_JUMBO_PKTS  0x10 /* enum */
-#define          MC_CMD_MAC_TX_GTJUMBO_PKTS  0x11 /* enum */
-#define          MC_CMD_MAC_TX_BAD_FCS_PKTS  0x12 /* enum */
-#define          MC_CMD_MAC_TX_SINGLE_COLLISION_PKTS  0x13 /* enum */
-#define          MC_CMD_MAC_TX_MULTIPLE_COLLISION_PKTS  0x14 /* enum */
-#define          MC_CMD_MAC_TX_EXCESSIVE_COLLISION_PKTS  0x15 /* enum */
-#define          MC_CMD_MAC_TX_LATE_COLLISION_PKTS  0x16 /* enum */
-#define          MC_CMD_MAC_TX_DEFERRED_PKTS  0x17 /* enum */
-#define          MC_CMD_MAC_TX_EXCESSIVE_DEFERRED_PKTS  0x18 /* enum */
-#define          MC_CMD_MAC_TX_NON_TCPUDP_PKTS  0x19 /* enum */
-#define          MC_CMD_MAC_TX_MAC_SRC_ERR_PKTS  0x1a /* enum */
-#define          MC_CMD_MAC_TX_IP_SRC_ERR_PKTS  0x1b /* enum */
-#define          MC_CMD_MAC_RX_PKTS  0x1c /* enum */
-#define          MC_CMD_MAC_RX_PAUSE_PKTS  0x1d /* enum */
-#define          MC_CMD_MAC_RX_GOOD_PKTS  0x1e /* enum */
-#define          MC_CMD_MAC_RX_CONTROL_PKTS  0x1f /* enum */
-#define          MC_CMD_MAC_RX_UNICAST_PKTS  0x20 /* enum */
-#define          MC_CMD_MAC_RX_MULTICAST_PKTS  0x21 /* enum */
-#define          MC_CMD_MAC_RX_BROADCAST_PKTS  0x22 /* enum */
-#define          MC_CMD_MAC_RX_BYTES  0x23 /* enum */
-#define          MC_CMD_MAC_RX_BAD_BYTES  0x24 /* enum */
-#define          MC_CMD_MAC_RX_64_PKTS  0x25 /* enum */
-#define          MC_CMD_MAC_RX_65_TO_127_PKTS  0x26 /* enum */
-#define          MC_CMD_MAC_RX_128_TO_255_PKTS  0x27 /* enum */
-#define          MC_CMD_MAC_RX_256_TO_511_PKTS  0x28 /* enum */
-#define          MC_CMD_MAC_RX_512_TO_1023_PKTS  0x29 /* enum */
-#define          MC_CMD_MAC_RX_1024_TO_15XX_PKTS  0x2a /* enum */
-#define          MC_CMD_MAC_RX_15XX_TO_JUMBO_PKTS  0x2b /* enum */
-#define          MC_CMD_MAC_RX_GTJUMBO_PKTS  0x2c /* enum */
-#define          MC_CMD_MAC_RX_UNDERSIZE_PKTS  0x2d /* enum */
-#define          MC_CMD_MAC_RX_BAD_FCS_PKTS  0x2e /* enum */
-#define          MC_CMD_MAC_RX_OVERFLOW_PKTS  0x2f /* enum */
-#define          MC_CMD_MAC_RX_FALSE_CARRIER_PKTS  0x30 /* enum */
-#define          MC_CMD_MAC_RX_SYMBOL_ERROR_PKTS  0x31 /* enum */
-#define          MC_CMD_MAC_RX_ALIGN_ERROR_PKTS  0x32 /* enum */
-#define          MC_CMD_MAC_RX_LENGTH_ERROR_PKTS  0x33 /* enum */
-#define          MC_CMD_MAC_RX_INTERNAL_ERROR_PKTS  0x34 /* enum */
-#define          MC_CMD_MAC_RX_JABBER_PKTS  0x35 /* enum */
-#define          MC_CMD_MAC_RX_NODESC_DROPS  0x36 /* enum */
-#define          MC_CMD_MAC_RX_LANES01_CHAR_ERR  0x37 /* enum */
-#define          MC_CMD_MAC_RX_LANES23_CHAR_ERR  0x38 /* enum */
-#define          MC_CMD_MAC_RX_LANES01_DISP_ERR  0x39 /* enum */
-#define          MC_CMD_MAC_RX_LANES23_DISP_ERR  0x3a /* enum */
-#define          MC_CMD_MAC_RX_MATCH_FAULT  0x3b /* enum */
+#define          MC_CMD_MAC_GENERATION_START 0x0 /* enum */
+#define          MC_CMD_MAC_DMABUF_START 0x1 /* enum */
+#define          MC_CMD_MAC_TX_PKTS 0x1 /* enum */
+#define          MC_CMD_MAC_TX_PAUSE_PKTS 0x2 /* enum */
+#define          MC_CMD_MAC_TX_CONTROL_PKTS 0x3 /* enum */
+#define          MC_CMD_MAC_TX_UNICAST_PKTS 0x4 /* enum */
+#define          MC_CMD_MAC_TX_MULTICAST_PKTS 0x5 /* enum */
+#define          MC_CMD_MAC_TX_BROADCAST_PKTS 0x6 /* enum */
+#define          MC_CMD_MAC_TX_BYTES 0x7 /* enum */
+#define          MC_CMD_MAC_TX_BAD_BYTES 0x8 /* enum */
+#define          MC_CMD_MAC_TX_LT64_PKTS 0x9 /* enum */
+#define          MC_CMD_MAC_TX_64_PKTS 0xa /* enum */
+#define          MC_CMD_MAC_TX_65_TO_127_PKTS 0xb /* enum */
+#define          MC_CMD_MAC_TX_128_TO_255_PKTS 0xc /* enum */
+#define          MC_CMD_MAC_TX_256_TO_511_PKTS 0xd /* enum */
+#define          MC_CMD_MAC_TX_512_TO_1023_PKTS 0xe /* enum */
+#define          MC_CMD_MAC_TX_1024_TO_15XX_PKTS 0xf /* enum */
+#define          MC_CMD_MAC_TX_15XX_TO_JUMBO_PKTS 0x10 /* enum */
+#define          MC_CMD_MAC_TX_GTJUMBO_PKTS 0x11 /* enum */
+#define          MC_CMD_MAC_TX_BAD_FCS_PKTS 0x12 /* enum */
+#define          MC_CMD_MAC_TX_SINGLE_COLLISION_PKTS 0x13 /* enum */
+#define          MC_CMD_MAC_TX_MULTIPLE_COLLISION_PKTS 0x14 /* enum */
+#define          MC_CMD_MAC_TX_EXCESSIVE_COLLISION_PKTS 0x15 /* enum */
+#define          MC_CMD_MAC_TX_LATE_COLLISION_PKTS 0x16 /* enum */
+#define          MC_CMD_MAC_TX_DEFERRED_PKTS 0x17 /* enum */
+#define          MC_CMD_MAC_TX_EXCESSIVE_DEFERRED_PKTS 0x18 /* enum */
+#define          MC_CMD_MAC_TX_NON_TCPUDP_PKTS 0x19 /* enum */
+#define          MC_CMD_MAC_TX_MAC_SRC_ERR_PKTS 0x1a /* enum */
+#define          MC_CMD_MAC_TX_IP_SRC_ERR_PKTS 0x1b /* enum */
+#define          MC_CMD_MAC_RX_PKTS 0x1c /* enum */
+#define          MC_CMD_MAC_RX_PAUSE_PKTS 0x1d /* enum */
+#define          MC_CMD_MAC_RX_GOOD_PKTS 0x1e /* enum */
+#define          MC_CMD_MAC_RX_CONTROL_PKTS 0x1f /* enum */
+#define          MC_CMD_MAC_RX_UNICAST_PKTS 0x20 /* enum */
+#define          MC_CMD_MAC_RX_MULTICAST_PKTS 0x21 /* enum */
+#define          MC_CMD_MAC_RX_BROADCAST_PKTS 0x22 /* enum */
+#define          MC_CMD_MAC_RX_BYTES 0x23 /* enum */
+#define          MC_CMD_MAC_RX_BAD_BYTES 0x24 /* enum */
+#define          MC_CMD_MAC_RX_64_PKTS 0x25 /* enum */
+#define          MC_CMD_MAC_RX_65_TO_127_PKTS 0x26 /* enum */
+#define          MC_CMD_MAC_RX_128_TO_255_PKTS 0x27 /* enum */
+#define          MC_CMD_MAC_RX_256_TO_511_PKTS 0x28 /* enum */
+#define          MC_CMD_MAC_RX_512_TO_1023_PKTS 0x29 /* enum */
+#define          MC_CMD_MAC_RX_1024_TO_15XX_PKTS 0x2a /* enum */
+#define          MC_CMD_MAC_RX_15XX_TO_JUMBO_PKTS 0x2b /* enum */
+#define          MC_CMD_MAC_RX_GTJUMBO_PKTS 0x2c /* enum */
+#define          MC_CMD_MAC_RX_UNDERSIZE_PKTS 0x2d /* enum */
+#define          MC_CMD_MAC_RX_BAD_FCS_PKTS 0x2e /* enum */
+#define          MC_CMD_MAC_RX_OVERFLOW_PKTS 0x2f /* enum */
+#define          MC_CMD_MAC_RX_FALSE_CARRIER_PKTS 0x30 /* enum */
+#define          MC_CMD_MAC_RX_SYMBOL_ERROR_PKTS 0x31 /* enum */
+#define          MC_CMD_MAC_RX_ALIGN_ERROR_PKTS 0x32 /* enum */
+#define          MC_CMD_MAC_RX_LENGTH_ERROR_PKTS 0x33 /* enum */
+#define          MC_CMD_MAC_RX_INTERNAL_ERROR_PKTS 0x34 /* enum */
+#define          MC_CMD_MAC_RX_JABBER_PKTS 0x35 /* enum */
+#define          MC_CMD_MAC_RX_NODESC_DROPS 0x36 /* enum */
+#define          MC_CMD_MAC_RX_LANES01_CHAR_ERR 0x37 /* enum */
+#define          MC_CMD_MAC_RX_LANES23_CHAR_ERR 0x38 /* enum */
+#define          MC_CMD_MAC_RX_LANES01_DISP_ERR 0x39 /* enum */
+#define          MC_CMD_MAC_RX_LANES23_DISP_ERR 0x3a /* enum */
+#define          MC_CMD_MAC_RX_MATCH_FAULT 0x3b /* enum */
 /* enum: PM trunc_bb_overflow counter. Valid for EF10 with PM_AND_RXDP_COUNTERS
  * capability only.
  */
-#define          MC_CMD_MAC_PM_TRUNC_BB_OVERFLOW  0x3c
+#define          MC_CMD_MAC_PM_TRUNC_BB_OVERFLOW 0x3c
 /* enum: PM discard_bb_overflow counter. Valid for EF10 with
  * PM_AND_RXDP_COUNTERS capability only.
  */
-#define          MC_CMD_MAC_PM_DISCARD_BB_OVERFLOW  0x3d
+#define          MC_CMD_MAC_PM_DISCARD_BB_OVERFLOW 0x3d
 /* enum: PM trunc_vfifo_full counter. Valid for EF10 with PM_AND_RXDP_COUNTERS
  * capability only.
  */
-#define          MC_CMD_MAC_PM_TRUNC_VFIFO_FULL  0x3e
+#define          MC_CMD_MAC_PM_TRUNC_VFIFO_FULL 0x3e
 /* enum: PM discard_vfifo_full counter. Valid for EF10 with
  * PM_AND_RXDP_COUNTERS capability only.
  */
-#define          MC_CMD_MAC_PM_DISCARD_VFIFO_FULL  0x3f
+#define          MC_CMD_MAC_PM_DISCARD_VFIFO_FULL 0x3f
 /* enum: PM trunc_qbb counter. Valid for EF10 with PM_AND_RXDP_COUNTERS
  * capability only.
  */
-#define          MC_CMD_MAC_PM_TRUNC_QBB  0x40
+#define          MC_CMD_MAC_PM_TRUNC_QBB 0x40
 /* enum: PM discard_qbb counter. Valid for EF10 with PM_AND_RXDP_COUNTERS
  * capability only.
  */
-#define          MC_CMD_MAC_PM_DISCARD_QBB  0x41
+#define          MC_CMD_MAC_PM_DISCARD_QBB 0x41
 /* enum: PM discard_mapping counter. Valid for EF10 with PM_AND_RXDP_COUNTERS
  * capability only.
  */
-#define          MC_CMD_MAC_PM_DISCARD_MAPPING  0x42
+#define          MC_CMD_MAC_PM_DISCARD_MAPPING 0x42
 /* enum: RXDP counter: Number of packets dropped due to the queue being
  * disabled. Valid for EF10 with PM_AND_RXDP_COUNTERS capability only.
  */
-#define          MC_CMD_MAC_RXDP_Q_DISABLED_PKTS  0x43
+#define          MC_CMD_MAC_RXDP_Q_DISABLED_PKTS 0x43
 /* enum: RXDP counter: Number of packets dropped by the DICPU. Valid for EF10
  * with PM_AND_RXDP_COUNTERS capability only.
  */
-#define          MC_CMD_MAC_RXDP_DI_DROPPED_PKTS  0x45
+#define          MC_CMD_MAC_RXDP_DI_DROPPED_PKTS 0x45
 /* enum: RXDP counter: Number of non-host packets. Valid for EF10 with
  * PM_AND_RXDP_COUNTERS capability only.
  */
-#define          MC_CMD_MAC_RXDP_STREAMING_PKTS  0x46
+#define          MC_CMD_MAC_RXDP_STREAMING_PKTS 0x46
 /* enum: RXDP counter: Number of times an hlb descriptor fetch was performed.
  * Valid for EF10 with PM_AND_RXDP_COUNTERS capability only.
  */
-#define          MC_CMD_MAC_RXDP_HLB_FETCH_CONDITIONS  0x47
+#define          MC_CMD_MAC_RXDP_HLB_FETCH_CONDITIONS 0x47
 /* enum: RXDP counter: Number of times the DPCPU waited for an existing
  * descriptor fetch. Valid for EF10 with PM_AND_RXDP_COUNTERS capability only.
  */
-#define          MC_CMD_MAC_RXDP_HLB_WAIT_CONDITIONS  0x48
-#define          MC_CMD_MAC_VADAPTER_RX_DMABUF_START  0x4c /* enum */
-#define          MC_CMD_MAC_VADAPTER_RX_UNICAST_PACKETS  0x4c /* enum */
-#define          MC_CMD_MAC_VADAPTER_RX_UNICAST_BYTES  0x4d /* enum */
-#define          MC_CMD_MAC_VADAPTER_RX_MULTICAST_PACKETS  0x4e /* enum */
-#define          MC_CMD_MAC_VADAPTER_RX_MULTICAST_BYTES  0x4f /* enum */
-#define          MC_CMD_MAC_VADAPTER_RX_BROADCAST_PACKETS  0x50 /* enum */
-#define          MC_CMD_MAC_VADAPTER_RX_BROADCAST_BYTES  0x51 /* enum */
-#define          MC_CMD_MAC_VADAPTER_RX_BAD_PACKETS  0x52 /* enum */
-#define          MC_CMD_MAC_VADAPTER_RX_BAD_BYTES  0x53 /* enum */
-#define          MC_CMD_MAC_VADAPTER_RX_OVERFLOW  0x54 /* enum */
-#define          MC_CMD_MAC_VADAPTER_TX_DMABUF_START  0x57 /* enum */
-#define          MC_CMD_MAC_VADAPTER_TX_UNICAST_PACKETS  0x57 /* enum */
-#define          MC_CMD_MAC_VADAPTER_TX_UNICAST_BYTES  0x58 /* enum */
-#define          MC_CMD_MAC_VADAPTER_TX_MULTICAST_PACKETS  0x59 /* enum */
-#define          MC_CMD_MAC_VADAPTER_TX_MULTICAST_BYTES  0x5a /* enum */
-#define          MC_CMD_MAC_VADAPTER_TX_BROADCAST_PACKETS  0x5b /* enum */
-#define          MC_CMD_MAC_VADAPTER_TX_BROADCAST_BYTES  0x5c /* enum */
-#define          MC_CMD_MAC_VADAPTER_TX_BAD_PACKETS  0x5d /* enum */
-#define          MC_CMD_MAC_VADAPTER_TX_BAD_BYTES  0x5e /* enum */
-#define          MC_CMD_MAC_VADAPTER_TX_OVERFLOW  0x5f /* enum */
+#define          MC_CMD_MAC_RXDP_HLB_WAIT_CONDITIONS 0x48
+#define          MC_CMD_MAC_VADAPTER_RX_DMABUF_START 0x4c /* enum */
+#define          MC_CMD_MAC_VADAPTER_RX_UNICAST_PACKETS 0x4c /* enum */
+#define          MC_CMD_MAC_VADAPTER_RX_UNICAST_BYTES 0x4d /* enum */
+#define          MC_CMD_MAC_VADAPTER_RX_MULTICAST_PACKETS 0x4e /* enum */
+#define          MC_CMD_MAC_VADAPTER_RX_MULTICAST_BYTES 0x4f /* enum */
+#define          MC_CMD_MAC_VADAPTER_RX_BROADCAST_PACKETS 0x50 /* enum */
+#define          MC_CMD_MAC_VADAPTER_RX_BROADCAST_BYTES 0x51 /* enum */
+#define          MC_CMD_MAC_VADAPTER_RX_BAD_PACKETS 0x52 /* enum */
+#define          MC_CMD_MAC_VADAPTER_RX_BAD_BYTES 0x53 /* enum */
+#define          MC_CMD_MAC_VADAPTER_RX_OVERFLOW 0x54 /* enum */
+#define          MC_CMD_MAC_VADAPTER_TX_DMABUF_START 0x57 /* enum */
+#define          MC_CMD_MAC_VADAPTER_TX_UNICAST_PACKETS 0x57 /* enum */
+#define          MC_CMD_MAC_VADAPTER_TX_UNICAST_BYTES 0x58 /* enum */
+#define          MC_CMD_MAC_VADAPTER_TX_MULTICAST_PACKETS 0x59 /* enum */
+#define          MC_CMD_MAC_VADAPTER_TX_MULTICAST_BYTES 0x5a /* enum */
+#define          MC_CMD_MAC_VADAPTER_TX_BROADCAST_PACKETS 0x5b /* enum */
+#define          MC_CMD_MAC_VADAPTER_TX_BROADCAST_BYTES 0x5c /* enum */
+#define          MC_CMD_MAC_VADAPTER_TX_BAD_PACKETS 0x5d /* enum */
+#define          MC_CMD_MAC_VADAPTER_TX_BAD_BYTES 0x5e /* enum */
+#define          MC_CMD_MAC_VADAPTER_TX_OVERFLOW 0x5f /* enum */
 /* enum: Start of GMAC stats buffer space, for Siena only. */
-#define          MC_CMD_GMAC_DMABUF_START  0x40
+#define          MC_CMD_GMAC_DMABUF_START 0x40
 /* enum: End of GMAC stats buffer space, for Siena only. */
-#define          MC_CMD_GMAC_DMABUF_END    0x5f
+#define          MC_CMD_GMAC_DMABUF_END 0x5f
 /* enum: GENERATION_END value, used together with GENERATION_START to verify
  * consistency of DMAd data. For legacy firmware / drivers without extended
  * stats (more precisely, when DMA_LEN == MC_CMD_MAC_NSTATS *
@@ -4054,7 +4215,7 @@
  * sizeof(uint64_t). See SF-109306-TC, Section 9.2 for details.
  */
 #define          MC_CMD_MAC_GENERATION_END 0x60
-#define          MC_CMD_MAC_NSTATS  0x61 /* enum */
+#define          MC_CMD_MAC_NSTATS 0x61 /* enum */
 
 /* MC_CMD_MAC_STATS_V2_OUT_DMA msgresponse */
 #define    MC_CMD_MAC_STATS_V2_OUT_DMA_LEN 0
@@ -4067,25 +4228,25 @@
 #define       MC_CMD_MAC_STATS_V2_OUT_NO_DMA_STATISTICS_HI_OFST 4
 #define       MC_CMD_MAC_STATS_V2_OUT_NO_DMA_STATISTICS_NUM MC_CMD_MAC_NSTATS_V2
 /* enum: Start of FEC stats buffer space, Medford2 and up */
-#define          MC_CMD_MAC_FEC_DMABUF_START  0x61
+#define          MC_CMD_MAC_FEC_DMABUF_START 0x61
 /* enum: Number of uncorrected FEC codewords on link (RS-FEC only for Medford2)
  */
-#define          MC_CMD_MAC_FEC_UNCORRECTED_ERRORS  0x61
+#define          MC_CMD_MAC_FEC_UNCORRECTED_ERRORS 0x61
 /* enum: Number of corrected FEC codewords on link (RS-FEC only for Medford2)
  */
-#define          MC_CMD_MAC_FEC_CORRECTED_ERRORS  0x62
+#define          MC_CMD_MAC_FEC_CORRECTED_ERRORS 0x62
 /* enum: Number of corrected 10-bit symbol errors, lane 0 (RS-FEC only) */
-#define          MC_CMD_MAC_FEC_CORRECTED_SYMBOLS_LANE0  0x63
+#define          MC_CMD_MAC_FEC_CORRECTED_SYMBOLS_LANE0 0x63
 /* enum: Number of corrected 10-bit symbol errors, lane 1 (RS-FEC only) */
-#define          MC_CMD_MAC_FEC_CORRECTED_SYMBOLS_LANE1  0x64
+#define          MC_CMD_MAC_FEC_CORRECTED_SYMBOLS_LANE1 0x64
 /* enum: Number of corrected 10-bit symbol errors, lane 2 (RS-FEC only) */
-#define          MC_CMD_MAC_FEC_CORRECTED_SYMBOLS_LANE2  0x65
+#define          MC_CMD_MAC_FEC_CORRECTED_SYMBOLS_LANE2 0x65
 /* enum: Number of corrected 10-bit symbol errors, lane 3 (RS-FEC only) */
-#define          MC_CMD_MAC_FEC_CORRECTED_SYMBOLS_LANE3  0x66
+#define          MC_CMD_MAC_FEC_CORRECTED_SYMBOLS_LANE3 0x66
 /* enum: This includes the space at offset 103 which is the final
  * GENERATION_END in a MAC_STATS_V2 response and otherwise unused.
  */
-#define          MC_CMD_MAC_NSTATS_V2  0x68
+#define          MC_CMD_MAC_NSTATS_V2 0x68
 /*            Other enum values, see field(s): */
 /*               MC_CMD_MAC_STATS_OUT_NO_DMA/STATISTICS */
 
@@ -4100,66 +4261,66 @@
 #define       MC_CMD_MAC_STATS_V3_OUT_NO_DMA_STATISTICS_HI_OFST 4
 #define       MC_CMD_MAC_STATS_V3_OUT_NO_DMA_STATISTICS_NUM MC_CMD_MAC_NSTATS_V3
 /* enum: Start of CTPIO stats buffer space, Medford2 and up */
-#define          MC_CMD_MAC_CTPIO_DMABUF_START  0x68
+#define          MC_CMD_MAC_CTPIO_DMABUF_START 0x68
 /* enum: Number of CTPIO fallbacks because a DMA packet was in progress on the
  * target VI
  */
-#define          MC_CMD_MAC_CTPIO_VI_BUSY_FALLBACK  0x68
+#define          MC_CMD_MAC_CTPIO_VI_BUSY_FALLBACK 0x68
 /* enum: Number of times a CTPIO send wrote beyond frame end (informational
  * only)
  */
-#define          MC_CMD_MAC_CTPIO_LONG_WRITE_SUCCESS  0x69
+#define          MC_CMD_MAC_CTPIO_LONG_WRITE_SUCCESS 0x69
 /* enum: Number of CTPIO failures because the TX doorbell was written before
  * the end of the frame data
  */
-#define          MC_CMD_MAC_CTPIO_MISSING_DBELL_FAIL  0x6a
+#define          MC_CMD_MAC_CTPIO_MISSING_DBELL_FAIL 0x6a
 /* enum: Number of CTPIO failures because the internal FIFO overflowed */
-#define          MC_CMD_MAC_CTPIO_OVERFLOW_FAIL  0x6b
+#define          MC_CMD_MAC_CTPIO_OVERFLOW_FAIL 0x6b
 /* enum: Number of CTPIO failures because the host did not deliver data fast
  * enough to avoid MAC underflow
  */
-#define          MC_CMD_MAC_CTPIO_UNDERFLOW_FAIL  0x6c
+#define          MC_CMD_MAC_CTPIO_UNDERFLOW_FAIL 0x6c
 /* enum: Number of CTPIO failures because the host did not deliver all the
  * frame data within the timeout
  */
-#define          MC_CMD_MAC_CTPIO_TIMEOUT_FAIL  0x6d
+#define          MC_CMD_MAC_CTPIO_TIMEOUT_FAIL 0x6d
 /* enum: Number of CTPIO failures because the frame data arrived out of order
  * or with gaps
  */
-#define          MC_CMD_MAC_CTPIO_NONCONTIG_WR_FAIL  0x6e
+#define          MC_CMD_MAC_CTPIO_NONCONTIG_WR_FAIL 0x6e
 /* enum: Number of CTPIO failures because the host started a new frame before
  * completing the previous one
  */
-#define          MC_CMD_MAC_CTPIO_FRM_CLOBBER_FAIL  0x6f
+#define          MC_CMD_MAC_CTPIO_FRM_CLOBBER_FAIL 0x6f
 /* enum: Number of CTPIO failures because a write was not a multiple of 32 bits
  * or not 32-bit aligned
  */
-#define          MC_CMD_MAC_CTPIO_INVALID_WR_FAIL  0x70
+#define          MC_CMD_MAC_CTPIO_INVALID_WR_FAIL 0x70
 /* enum: Number of CTPIO fallbacks because another VI on the same port was
  * sending a CTPIO frame
  */
-#define          MC_CMD_MAC_CTPIO_VI_CLOBBER_FALLBACK  0x71
+#define          MC_CMD_MAC_CTPIO_VI_CLOBBER_FALLBACK 0x71
 /* enum: Number of CTPIO fallbacks because target VI did not have CTPIO enabled
  */
-#define          MC_CMD_MAC_CTPIO_UNQUALIFIED_FALLBACK  0x72
+#define          MC_CMD_MAC_CTPIO_UNQUALIFIED_FALLBACK 0x72
 /* enum: Number of CTPIO fallbacks because length in header was less than 29
  * bytes
  */
-#define          MC_CMD_MAC_CTPIO_RUNT_FALLBACK  0x73
+#define          MC_CMD_MAC_CTPIO_RUNT_FALLBACK 0x73
 /* enum: Total number of successful CTPIO sends on this port */
-#define          MC_CMD_MAC_CTPIO_SUCCESS  0x74
+#define          MC_CMD_MAC_CTPIO_SUCCESS 0x74
 /* enum: Total number of CTPIO fallbacks on this port */
-#define          MC_CMD_MAC_CTPIO_FALLBACK  0x75
+#define          MC_CMD_MAC_CTPIO_FALLBACK 0x75
 /* enum: Total number of CTPIO poisoned frames on this port, whether erased or
  * not
  */
-#define          MC_CMD_MAC_CTPIO_POISON  0x76
+#define          MC_CMD_MAC_CTPIO_POISON 0x76
 /* enum: Total number of CTPIO erased frames on this port */
-#define          MC_CMD_MAC_CTPIO_ERASE  0x77
+#define          MC_CMD_MAC_CTPIO_ERASE 0x77
 /* enum: This includes the space at offset 120 which is the final
  * GENERATION_END in a MAC_STATS_V3 response and otherwise unused.
  */
-#define          MC_CMD_MAC_NSTATS_V3  0x79
+#define          MC_CMD_MAC_NSTATS_V3 0x79
 /*            Other enum values, see field(s): */
 /*               MC_CMD_MAC_STATS_V2_OUT_NO_DMA/STATISTICS */
 
@@ -4268,25 +4429,25 @@
 #define    MC_CMD_WOL_FILTER_SET_IN_LEN 192
 #define       MC_CMD_WOL_FILTER_SET_IN_FILTER_MODE_OFST 0
 #define       MC_CMD_WOL_FILTER_SET_IN_FILTER_MODE_LEN 4
-#define          MC_CMD_FILTER_MODE_SIMPLE    0x0 /* enum */
+#define          MC_CMD_FILTER_MODE_SIMPLE 0x0 /* enum */
 #define          MC_CMD_FILTER_MODE_STRUCTURED 0xffffffff /* enum */
 /* A type value of 1 is unused. */
 #define       MC_CMD_WOL_FILTER_SET_IN_WOL_TYPE_OFST 4
 #define       MC_CMD_WOL_FILTER_SET_IN_WOL_TYPE_LEN 4
 /* enum: Magic */
-#define          MC_CMD_WOL_TYPE_MAGIC      0x0
+#define          MC_CMD_WOL_TYPE_MAGIC 0x0
 /* enum: MS Windows Magic */
 #define          MC_CMD_WOL_TYPE_WIN_MAGIC 0x2
 /* enum: IPv4 Syn */
-#define          MC_CMD_WOL_TYPE_IPV4_SYN   0x3
+#define          MC_CMD_WOL_TYPE_IPV4_SYN 0x3
 /* enum: IPv6 Syn */
-#define          MC_CMD_WOL_TYPE_IPV6_SYN   0x4
+#define          MC_CMD_WOL_TYPE_IPV6_SYN 0x4
 /* enum: Bitmap */
-#define          MC_CMD_WOL_TYPE_BITMAP     0x5
+#define          MC_CMD_WOL_TYPE_BITMAP 0x5
 /* enum: Link */
-#define          MC_CMD_WOL_TYPE_LINK       0x6
+#define          MC_CMD_WOL_TYPE_LINK 0x6
 /* enum: (Above this for future use) */
-#define          MC_CMD_WOL_TYPE_MAX        0x7
+#define          MC_CMD_WOL_TYPE_MAX 0x7
 #define       MC_CMD_WOL_FILTER_SET_IN_DATA_OFST 8
 #define       MC_CMD_WOL_FILTER_SET_IN_DATA_LEN 4
 #define       MC_CMD_WOL_FILTER_SET_IN_DATA_NUM 46
@@ -4515,6 +4676,8 @@
 #define        MC_CMD_NVRAM_INFO_OUT_PROTECTED_WIDTH 1
 #define        MC_CMD_NVRAM_INFO_OUT_TLV_LBN 1
 #define        MC_CMD_NVRAM_INFO_OUT_TLV_WIDTH 1
+#define        MC_CMD_NVRAM_INFO_OUT_READ_ONLY_IF_TSA_BOUND_LBN 2
+#define        MC_CMD_NVRAM_INFO_OUT_READ_ONLY_IF_TSA_BOUND_WIDTH 1
 #define        MC_CMD_NVRAM_INFO_OUT_READ_ONLY_LBN 5
 #define        MC_CMD_NVRAM_INFO_OUT_READ_ONLY_WIDTH 1
 #define        MC_CMD_NVRAM_INFO_OUT_CMAC_LBN 6
@@ -4542,6 +4705,8 @@
 #define        MC_CMD_NVRAM_INFO_V2_OUT_PROTECTED_WIDTH 1
 #define        MC_CMD_NVRAM_INFO_V2_OUT_TLV_LBN 1
 #define        MC_CMD_NVRAM_INFO_V2_OUT_TLV_WIDTH 1
+#define        MC_CMD_NVRAM_INFO_V2_OUT_READ_ONLY_IF_TSA_BOUND_LBN 2
+#define        MC_CMD_NVRAM_INFO_V2_OUT_READ_ONLY_IF_TSA_BOUND_WIDTH 1
 #define        MC_CMD_NVRAM_INFO_V2_OUT_READ_ONLY_LBN 5
 #define        MC_CMD_NVRAM_INFO_V2_OUT_READ_ONLY_WIDTH 1
 #define        MC_CMD_NVRAM_INFO_V2_OUT_A_B_LBN 7
@@ -4560,7 +4725,11 @@
 /* MC_CMD_NVRAM_UPDATE_START
  * Start a group of update operations on a virtual NVRAM partition. Locks
  * required: PHY_LOCK if type==*PHY*. Returns: 0, EINVAL (bad type), EACCES (if
- * PHY_LOCK required and not held).
+ * PHY_LOCK required and not held). In an adapter bound to a TSA controller,
+ * MC_CMD_NVRAM_UPDATE_START can only be used on a subset of partition types
+ * i.e. static config, dynamic config and expansion ROM config. Attempting to
+ * perform this operation on a restricted partition will return the error
+ * EPERM.
  */
 #define MC_CMD_NVRAM_UPDATE_START 0x38
 
@@ -4720,8 +4889,12 @@
 /***********************************/
 /* MC_CMD_NVRAM_UPDATE_FINISH
  * Finish a group of update operations on a virtual NVRAM partition. Locks
- * required: PHY_LOCK if type==*PHY*. Returns: 0, EINVAL (bad
- * type/offset/length), EACCES (if PHY_LOCK required and not held)
+ * required: PHY_LOCK if type==*PHY*. Returns: 0, EINVAL (bad type/offset/
+ * length), EACCES (if PHY_LOCK required and not held). In an adapter bound to
+ * a TSA controller, MC_CMD_NVRAM_UPDATE_FINISH can only be used on a subset of
+ * partition types i.e. static config, dynamic config and expansion ROM config.
+ * Attempting to perform this operation on a restricted partition will return
+ * the error EPERM.
  */
 #define MC_CMD_NVRAM_UPDATE_FINISH 0x3c
 
@@ -4958,181 +5131,181 @@
 #define       MC_CMD_SENSOR_INFO_OUT_MASK_OFST 0
 #define       MC_CMD_SENSOR_INFO_OUT_MASK_LEN 4
 /* enum: Controller temperature: degC */
-#define          MC_CMD_SENSOR_CONTROLLER_TEMP  0x0
+#define          MC_CMD_SENSOR_CONTROLLER_TEMP 0x0
 /* enum: Phy common temperature: degC */
-#define          MC_CMD_SENSOR_PHY_COMMON_TEMP  0x1
+#define          MC_CMD_SENSOR_PHY_COMMON_TEMP 0x1
 /* enum: Controller cooling: bool */
-#define          MC_CMD_SENSOR_CONTROLLER_COOLING  0x2
+#define          MC_CMD_SENSOR_CONTROLLER_COOLING 0x2
 /* enum: Phy 0 temperature: degC */
-#define          MC_CMD_SENSOR_PHY0_TEMP  0x3
+#define          MC_CMD_SENSOR_PHY0_TEMP 0x3
 /* enum: Phy 0 cooling: bool */
-#define          MC_CMD_SENSOR_PHY0_COOLING  0x4
+#define          MC_CMD_SENSOR_PHY0_COOLING 0x4
 /* enum: Phy 1 temperature: degC */
-#define          MC_CMD_SENSOR_PHY1_TEMP  0x5
+#define          MC_CMD_SENSOR_PHY1_TEMP 0x5
 /* enum: Phy 1 cooling: bool */
-#define          MC_CMD_SENSOR_PHY1_COOLING  0x6
+#define          MC_CMD_SENSOR_PHY1_COOLING 0x6
 /* enum: 1.0v power: mV */
-#define          MC_CMD_SENSOR_IN_1V0  0x7
+#define          MC_CMD_SENSOR_IN_1V0 0x7
 /* enum: 1.2v power: mV */
-#define          MC_CMD_SENSOR_IN_1V2  0x8
+#define          MC_CMD_SENSOR_IN_1V2 0x8
 /* enum: 1.8v power: mV */
-#define          MC_CMD_SENSOR_IN_1V8  0x9
+#define          MC_CMD_SENSOR_IN_1V8 0x9
 /* enum: 2.5v power: mV */
-#define          MC_CMD_SENSOR_IN_2V5  0xa
+#define          MC_CMD_SENSOR_IN_2V5 0xa
 /* enum: 3.3v power: mV */
-#define          MC_CMD_SENSOR_IN_3V3  0xb
+#define          MC_CMD_SENSOR_IN_3V3 0xb
 /* enum: 12v power: mV */
-#define          MC_CMD_SENSOR_IN_12V0  0xc
+#define          MC_CMD_SENSOR_IN_12V0 0xc
 /* enum: 1.2v analogue power: mV */
-#define          MC_CMD_SENSOR_IN_1V2A  0xd
+#define          MC_CMD_SENSOR_IN_1V2A 0xd
 /* enum: reference voltage: mV */
-#define          MC_CMD_SENSOR_IN_VREF  0xe
+#define          MC_CMD_SENSOR_IN_VREF 0xe
 /* enum: AOE FPGA power: mV */
-#define          MC_CMD_SENSOR_OUT_VAOE  0xf
+#define          MC_CMD_SENSOR_OUT_VAOE 0xf
 /* enum: AOE FPGA temperature: degC */
-#define          MC_CMD_SENSOR_AOE_TEMP  0x10
+#define          MC_CMD_SENSOR_AOE_TEMP 0x10
 /* enum: AOE FPGA PSU temperature: degC */
-#define          MC_CMD_SENSOR_PSU_AOE_TEMP  0x11
+#define          MC_CMD_SENSOR_PSU_AOE_TEMP 0x11
 /* enum: AOE PSU temperature: degC */
-#define          MC_CMD_SENSOR_PSU_TEMP  0x12
+#define          MC_CMD_SENSOR_PSU_TEMP 0x12
 /* enum: Fan 0 speed: RPM */
-#define          MC_CMD_SENSOR_FAN_0  0x13
+#define          MC_CMD_SENSOR_FAN_0 0x13
 /* enum: Fan 1 speed: RPM */
-#define          MC_CMD_SENSOR_FAN_1  0x14
+#define          MC_CMD_SENSOR_FAN_1 0x14
 /* enum: Fan 2 speed: RPM */
-#define          MC_CMD_SENSOR_FAN_2  0x15
+#define          MC_CMD_SENSOR_FAN_2 0x15
 /* enum: Fan 3 speed: RPM */
-#define          MC_CMD_SENSOR_FAN_3  0x16
+#define          MC_CMD_SENSOR_FAN_3 0x16
 /* enum: Fan 4 speed: RPM */
-#define          MC_CMD_SENSOR_FAN_4  0x17
+#define          MC_CMD_SENSOR_FAN_4 0x17
 /* enum: AOE FPGA input power: mV */
-#define          MC_CMD_SENSOR_IN_VAOE  0x18
+#define          MC_CMD_SENSOR_IN_VAOE 0x18
 /* enum: AOE FPGA current: mA */
-#define          MC_CMD_SENSOR_OUT_IAOE  0x19
+#define          MC_CMD_SENSOR_OUT_IAOE 0x19
 /* enum: AOE FPGA input current: mA */
-#define          MC_CMD_SENSOR_IN_IAOE  0x1a
+#define          MC_CMD_SENSOR_IN_IAOE 0x1a
 /* enum: NIC power consumption: W */
-#define          MC_CMD_SENSOR_NIC_POWER  0x1b
+#define          MC_CMD_SENSOR_NIC_POWER 0x1b
 /* enum: 0.9v power voltage: mV */
-#define          MC_CMD_SENSOR_IN_0V9  0x1c
+#define          MC_CMD_SENSOR_IN_0V9 0x1c
 /* enum: 0.9v power current: mA */
-#define          MC_CMD_SENSOR_IN_I0V9  0x1d
+#define          MC_CMD_SENSOR_IN_I0V9 0x1d
 /* enum: 1.2v power current: mA */
-#define          MC_CMD_SENSOR_IN_I1V2  0x1e
+#define          MC_CMD_SENSOR_IN_I1V2 0x1e
 /* enum: Not a sensor: reserved for the next page flag */
-#define          MC_CMD_SENSOR_PAGE0_NEXT  0x1f
+#define          MC_CMD_SENSOR_PAGE0_NEXT 0x1f
 /* enum: 0.9v power voltage (at ADC): mV */
-#define          MC_CMD_SENSOR_IN_0V9_ADC  0x20
+#define          MC_CMD_SENSOR_IN_0V9_ADC 0x20
 /* enum: Controller temperature 2: degC */
-#define          MC_CMD_SENSOR_CONTROLLER_2_TEMP  0x21
+#define          MC_CMD_SENSOR_CONTROLLER_2_TEMP 0x21
 /* enum: Voltage regulator internal temperature: degC */
-#define          MC_CMD_SENSOR_VREG_INTERNAL_TEMP  0x22
+#define          MC_CMD_SENSOR_VREG_INTERNAL_TEMP 0x22
 /* enum: 0.9V voltage regulator temperature: degC */
-#define          MC_CMD_SENSOR_VREG_0V9_TEMP  0x23
+#define          MC_CMD_SENSOR_VREG_0V9_TEMP 0x23
 /* enum: 1.2V voltage regulator temperature: degC */
-#define          MC_CMD_SENSOR_VREG_1V2_TEMP  0x24
+#define          MC_CMD_SENSOR_VREG_1V2_TEMP 0x24
 /* enum: controller internal temperature sensor voltage (internal ADC): mV */
-#define          MC_CMD_SENSOR_CONTROLLER_VPTAT  0x25
+#define          MC_CMD_SENSOR_CONTROLLER_VPTAT 0x25
 /* enum: controller internal temperature (internal ADC): degC */
-#define          MC_CMD_SENSOR_CONTROLLER_INTERNAL_TEMP  0x26
+#define          MC_CMD_SENSOR_CONTROLLER_INTERNAL_TEMP 0x26
 /* enum: controller internal temperature sensor voltage (external ADC): mV */
-#define          MC_CMD_SENSOR_CONTROLLER_VPTAT_EXTADC  0x27
+#define          MC_CMD_SENSOR_CONTROLLER_VPTAT_EXTADC 0x27
 /* enum: controller internal temperature (external ADC): degC */
-#define          MC_CMD_SENSOR_CONTROLLER_INTERNAL_TEMP_EXTADC  0x28
+#define          MC_CMD_SENSOR_CONTROLLER_INTERNAL_TEMP_EXTADC 0x28
 /* enum: ambient temperature: degC */
-#define          MC_CMD_SENSOR_AMBIENT_TEMP  0x29
+#define          MC_CMD_SENSOR_AMBIENT_TEMP 0x29
 /* enum: air flow: bool */
-#define          MC_CMD_SENSOR_AIRFLOW  0x2a
+#define          MC_CMD_SENSOR_AIRFLOW 0x2a
 /* enum: voltage between VSS08D and VSS08D at CSR: mV */
-#define          MC_CMD_SENSOR_VDD08D_VSS08D_CSR  0x2b
+#define          MC_CMD_SENSOR_VDD08D_VSS08D_CSR 0x2b
 /* enum: voltage between VSS08D and VSS08D at CSR (external ADC): mV */
-#define          MC_CMD_SENSOR_VDD08D_VSS08D_CSR_EXTADC  0x2c
+#define          MC_CMD_SENSOR_VDD08D_VSS08D_CSR_EXTADC 0x2c
 /* enum: Hotpoint temperature: degC */
-#define          MC_CMD_SENSOR_HOTPOINT_TEMP  0x2d
+#define          MC_CMD_SENSOR_HOTPOINT_TEMP 0x2d
 /* enum: Port 0 PHY power switch over-current: bool */
-#define          MC_CMD_SENSOR_PHY_POWER_PORT0  0x2e
+#define          MC_CMD_SENSOR_PHY_POWER_PORT0 0x2e
 /* enum: Port 1 PHY power switch over-current: bool */
-#define          MC_CMD_SENSOR_PHY_POWER_PORT1  0x2f
-/* enum: Mop-up microcontroller reference voltage (millivolts) */
-#define          MC_CMD_SENSOR_MUM_VCC  0x30
+#define          MC_CMD_SENSOR_PHY_POWER_PORT1 0x2f
+/* enum: Mop-up microcontroller reference voltage: mV */
+#define          MC_CMD_SENSOR_MUM_VCC 0x30
 /* enum: 0.9v power phase A voltage: mV */
-#define          MC_CMD_SENSOR_IN_0V9_A  0x31
+#define          MC_CMD_SENSOR_IN_0V9_A 0x31
 /* enum: 0.9v power phase A current: mA */
-#define          MC_CMD_SENSOR_IN_I0V9_A  0x32
+#define          MC_CMD_SENSOR_IN_I0V9_A 0x32
 /* enum: 0.9V voltage regulator phase A temperature: degC */
-#define          MC_CMD_SENSOR_VREG_0V9_A_TEMP  0x33
+#define          MC_CMD_SENSOR_VREG_0V9_A_TEMP 0x33
 /* enum: 0.9v power phase B voltage: mV */
-#define          MC_CMD_SENSOR_IN_0V9_B  0x34
+#define          MC_CMD_SENSOR_IN_0V9_B 0x34
 /* enum: 0.9v power phase B current: mA */
-#define          MC_CMD_SENSOR_IN_I0V9_B  0x35
+#define          MC_CMD_SENSOR_IN_I0V9_B 0x35
 /* enum: 0.9V voltage regulator phase B temperature: degC */
-#define          MC_CMD_SENSOR_VREG_0V9_B_TEMP  0x36
+#define          MC_CMD_SENSOR_VREG_0V9_B_TEMP 0x36
 /* enum: CCOM AVREG 1v2 supply (interval ADC): mV */
-#define          MC_CMD_SENSOR_CCOM_AVREG_1V2_SUPPLY  0x37
+#define          MC_CMD_SENSOR_CCOM_AVREG_1V2_SUPPLY 0x37
 /* enum: CCOM AVREG 1v2 supply (external ADC): mV */
-#define          MC_CMD_SENSOR_CCOM_AVREG_1V2_SUPPLY_EXTADC  0x38
+#define          MC_CMD_SENSOR_CCOM_AVREG_1V2_SUPPLY_EXTADC 0x38
 /* enum: CCOM AVREG 1v8 supply (interval ADC): mV */
-#define          MC_CMD_SENSOR_CCOM_AVREG_1V8_SUPPLY  0x39
+#define          MC_CMD_SENSOR_CCOM_AVREG_1V8_SUPPLY 0x39
 /* enum: CCOM AVREG 1v8 supply (external ADC): mV */
-#define          MC_CMD_SENSOR_CCOM_AVREG_1V8_SUPPLY_EXTADC  0x3a
+#define          MC_CMD_SENSOR_CCOM_AVREG_1V8_SUPPLY_EXTADC 0x3a
 /* enum: CCOM RTS temperature: degC */
-#define          MC_CMD_SENSOR_CONTROLLER_RTS  0x3b
+#define          MC_CMD_SENSOR_CONTROLLER_RTS 0x3b
 /* enum: Not a sensor: reserved for the next page flag */
-#define          MC_CMD_SENSOR_PAGE1_NEXT  0x3f
+#define          MC_CMD_SENSOR_PAGE1_NEXT 0x3f
 /* enum: controller internal temperature sensor voltage on master core
  * (internal ADC): mV
  */
-#define          MC_CMD_SENSOR_CONTROLLER_MASTER_VPTAT  0x40
+#define          MC_CMD_SENSOR_CONTROLLER_MASTER_VPTAT 0x40
 /* enum: controller internal temperature on master core (internal ADC): degC */
-#define          MC_CMD_SENSOR_CONTROLLER_MASTER_INTERNAL_TEMP  0x41
+#define          MC_CMD_SENSOR_CONTROLLER_MASTER_INTERNAL_TEMP 0x41
 /* enum: controller internal temperature sensor voltage on master core
  * (external ADC): mV
  */
-#define          MC_CMD_SENSOR_CONTROLLER_MASTER_VPTAT_EXTADC  0x42
+#define          MC_CMD_SENSOR_CONTROLLER_MASTER_VPTAT_EXTADC 0x42
 /* enum: controller internal temperature on master core (external ADC): degC */
-#define          MC_CMD_SENSOR_CONTROLLER_MASTER_INTERNAL_TEMP_EXTADC  0x43
+#define          MC_CMD_SENSOR_CONTROLLER_MASTER_INTERNAL_TEMP_EXTADC 0x43
 /* enum: controller internal temperature on slave core sensor voltage (internal
  * ADC): mV
  */
-#define          MC_CMD_SENSOR_CONTROLLER_SLAVE_VPTAT  0x44
+#define          MC_CMD_SENSOR_CONTROLLER_SLAVE_VPTAT 0x44
 /* enum: controller internal temperature on slave core (internal ADC): degC */
-#define          MC_CMD_SENSOR_CONTROLLER_SLAVE_INTERNAL_TEMP  0x45
+#define          MC_CMD_SENSOR_CONTROLLER_SLAVE_INTERNAL_TEMP 0x45
 /* enum: controller internal temperature on slave core sensor voltage (external
  * ADC): mV
  */
-#define          MC_CMD_SENSOR_CONTROLLER_SLAVE_VPTAT_EXTADC  0x46
+#define          MC_CMD_SENSOR_CONTROLLER_SLAVE_VPTAT_EXTADC 0x46
 /* enum: controller internal temperature on slave core (external ADC): degC */
-#define          MC_CMD_SENSOR_CONTROLLER_SLAVE_INTERNAL_TEMP_EXTADC  0x47
+#define          MC_CMD_SENSOR_CONTROLLER_SLAVE_INTERNAL_TEMP_EXTADC 0x47
 /* enum: Voltage supplied to the SODIMMs from their power supply: mV */
-#define          MC_CMD_SENSOR_SODIMM_VOUT  0x49
+#define          MC_CMD_SENSOR_SODIMM_VOUT 0x49
 /* enum: Temperature of SODIMM 0 (if installed): degC */
-#define          MC_CMD_SENSOR_SODIMM_0_TEMP  0x4a
+#define          MC_CMD_SENSOR_SODIMM_0_TEMP 0x4a
 /* enum: Temperature of SODIMM 1 (if installed): degC */
-#define          MC_CMD_SENSOR_SODIMM_1_TEMP  0x4b
+#define          MC_CMD_SENSOR_SODIMM_1_TEMP 0x4b
 /* enum: Voltage supplied to the QSFP #0 from their power supply: mV */
-#define          MC_CMD_SENSOR_PHY0_VCC  0x4c
+#define          MC_CMD_SENSOR_PHY0_VCC 0x4c
 /* enum: Voltage supplied to the QSFP #1 from their power supply: mV */
-#define          MC_CMD_SENSOR_PHY1_VCC  0x4d
+#define          MC_CMD_SENSOR_PHY1_VCC 0x4d
 /* enum: Controller die temperature (TDIODE): degC */
-#define          MC_CMD_SENSOR_CONTROLLER_TDIODE_TEMP  0x4e
+#define          MC_CMD_SENSOR_CONTROLLER_TDIODE_TEMP 0x4e
 /* enum: Board temperature (front): degC */
-#define          MC_CMD_SENSOR_BOARD_FRONT_TEMP  0x4f
+#define          MC_CMD_SENSOR_BOARD_FRONT_TEMP 0x4f
 /* enum: Board temperature (back): degC */
-#define          MC_CMD_SENSOR_BOARD_BACK_TEMP  0x50
+#define          MC_CMD_SENSOR_BOARD_BACK_TEMP 0x50
 /* enum: 1.8v power current: mA */
-#define          MC_CMD_SENSOR_IN_I1V8  0x51
+#define          MC_CMD_SENSOR_IN_I1V8 0x51
 /* enum: 2.5v power current: mA */
-#define          MC_CMD_SENSOR_IN_I2V5  0x52
+#define          MC_CMD_SENSOR_IN_I2V5 0x52
 /* enum: 3.3v power current: mA */
-#define          MC_CMD_SENSOR_IN_I3V3  0x53
+#define          MC_CMD_SENSOR_IN_I3V3 0x53
 /* enum: 12v power current: mA */
-#define          MC_CMD_SENSOR_IN_I12V0  0x54
+#define          MC_CMD_SENSOR_IN_I12V0 0x54
 /* enum: 1.3v power: mV */
-#define          MC_CMD_SENSOR_IN_1V3  0x55
+#define          MC_CMD_SENSOR_IN_1V3 0x55
 /* enum: 1.3v power current: mA */
-#define          MC_CMD_SENSOR_IN_I1V3  0x56
+#define          MC_CMD_SENSOR_IN_I1V3 0x56
 /* enum: Not a sensor: reserved for the next page flag */
-#define          MC_CMD_SENSOR_PAGE2_NEXT  0x5f
+#define          MC_CMD_SENSOR_PAGE2_NEXT 0x5f
 /* MC_CMD_SENSOR_INFO_ENTRY_TYPEDEF */
 #define       MC_CMD_SENSOR_ENTRY_OFST 4
 #define       MC_CMD_SENSOR_ENTRY_LEN 8
@@ -5234,17 +5407,17 @@
 #define       MC_CMD_SENSOR_VALUE_ENTRY_TYPEDEF_STATE_OFST 2
 #define       MC_CMD_SENSOR_VALUE_ENTRY_TYPEDEF_STATE_LEN 1
 /* enum: Ok. */
-#define          MC_CMD_SENSOR_STATE_OK  0x0
+#define          MC_CMD_SENSOR_STATE_OK 0x0
 /* enum: Breached warning threshold. */
-#define          MC_CMD_SENSOR_STATE_WARNING  0x1
+#define          MC_CMD_SENSOR_STATE_WARNING 0x1
 /* enum: Breached fatal threshold. */
-#define          MC_CMD_SENSOR_STATE_FATAL  0x2
+#define          MC_CMD_SENSOR_STATE_FATAL 0x2
 /* enum: Fault with sensor. */
-#define          MC_CMD_SENSOR_STATE_BROKEN  0x3
+#define          MC_CMD_SENSOR_STATE_BROKEN 0x3
 /* enum: Sensor is working but does not currently have a reading. */
-#define          MC_CMD_SENSOR_STATE_NO_READING  0x4
+#define          MC_CMD_SENSOR_STATE_NO_READING 0x4
 /* enum: Sensor initialisation failed. */
-#define          MC_CMD_SENSOR_STATE_INIT_FAILED  0x5
+#define          MC_CMD_SENSOR_STATE_INIT_FAILED 0x5
 #define       MC_CMD_SENSOR_VALUE_ENTRY_TYPEDEF_STATE_LBN 16
 #define       MC_CMD_SENSOR_VALUE_ENTRY_TYPEDEF_STATE_WIDTH 8
 #define       MC_CMD_SENSOR_VALUE_ENTRY_TYPEDEF_TYPE_OFST 3
@@ -5327,7 +5500,7 @@
 #define       MC_CMD_ADD_LIGHTSOUT_OFFLOAD_IN_PROTOCOL_OFST 0
 #define       MC_CMD_ADD_LIGHTSOUT_OFFLOAD_IN_PROTOCOL_LEN 4
 #define          MC_CMD_LIGHTSOUT_OFFLOAD_PROTOCOL_ARP 0x1 /* enum */
-#define          MC_CMD_LIGHTSOUT_OFFLOAD_PROTOCOL_NS  0x2 /* enum */
+#define          MC_CMD_LIGHTSOUT_OFFLOAD_PROTOCOL_NS 0x2 /* enum */
 #define       MC_CMD_ADD_LIGHTSOUT_OFFLOAD_IN_DATA_OFST 4
 #define       MC_CMD_ADD_LIGHTSOUT_OFFLOAD_IN_DATA_LEN 4
 #define       MC_CMD_ADD_LIGHTSOUT_OFFLOAD_IN_DATA_MINNUM 1
@@ -5416,17 +5589,17 @@
 /* enum: Assert using the FAIL_ASSERTION_WITH_USEFUL_VALUES macro. Unless
  * you're testing firmware, this is what you want.
  */
-#define          MC_CMD_TESTASSERT_V2_IN_FAIL_ASSERTION_WITH_USEFUL_VALUES  0x0
+#define          MC_CMD_TESTASSERT_V2_IN_FAIL_ASSERTION_WITH_USEFUL_VALUES 0x0
 /* enum: Assert using assert(0); */
-#define          MC_CMD_TESTASSERT_V2_IN_ASSERT_FALSE  0x1
+#define          MC_CMD_TESTASSERT_V2_IN_ASSERT_FALSE 0x1
 /* enum: Deliberately trigger a watchdog */
-#define          MC_CMD_TESTASSERT_V2_IN_WATCHDOG  0x2
+#define          MC_CMD_TESTASSERT_V2_IN_WATCHDOG 0x2
 /* enum: Deliberately trigger a trap by loading from an invalid address */
-#define          MC_CMD_TESTASSERT_V2_IN_LOAD_TRAP  0x3
+#define          MC_CMD_TESTASSERT_V2_IN_LOAD_TRAP 0x3
 /* enum: Deliberately trigger a trap by storing to an invalid address */
-#define          MC_CMD_TESTASSERT_V2_IN_STORE_TRAP  0x4
+#define          MC_CMD_TESTASSERT_V2_IN_STORE_TRAP 0x4
 /* enum: Jump to an invalid address */
-#define          MC_CMD_TESTASSERT_V2_IN_JUMP_TRAP  0x5
+#define          MC_CMD_TESTASSERT_V2_IN_JUMP_TRAP 0x5
 
 /* MC_CMD_TESTASSERT_V2_OUT msgresponse */
 #define    MC_CMD_TESTASSERT_V2_OUT_LEN 0
@@ -5969,7 +6142,7 @@
 /*            MC_CMD_MUM_IN_CMD_LEN 4 */
 #define       MC_CMD_MUM_IN_LOG_OP_OFST 4
 #define       MC_CMD_MUM_IN_LOG_OP_LEN 4
-#define          MC_CMD_MUM_IN_LOG_OP_UART  0x1 /* enum */
+#define          MC_CMD_MUM_IN_LOG_OP_UART 0x1 /* enum */
 
 /* MC_CMD_MUM_IN_LOG_OP_UART msgrequest */
 #define    MC_CMD_MUM_IN_LOG_OP_UART_LEN 12
@@ -6464,17 +6637,17 @@
 #define       EVB_PORT_ID_PORT_ID_OFST 0
 #define       EVB_PORT_ID_PORT_ID_LEN 4
 /* enum: An invalid port handle. */
-#define          EVB_PORT_ID_NULL  0x0
+#define          EVB_PORT_ID_NULL 0x0
 /* enum: The port assigned to this function.. */
-#define          EVB_PORT_ID_ASSIGNED  0x1000000
+#define          EVB_PORT_ID_ASSIGNED 0x1000000
 /* enum: External network port 0 */
-#define          EVB_PORT_ID_MAC0  0x2000000
+#define          EVB_PORT_ID_MAC0 0x2000000
 /* enum: External network port 1 */
-#define          EVB_PORT_ID_MAC1  0x2000001
+#define          EVB_PORT_ID_MAC1 0x2000001
 /* enum: External network port 2 */
-#define          EVB_PORT_ID_MAC2  0x2000002
+#define          EVB_PORT_ID_MAC2 0x2000002
 /* enum: External network port 3 */
-#define          EVB_PORT_ID_MAC3  0x2000003
+#define          EVB_PORT_ID_MAC3 0x2000003
 #define       EVB_PORT_ID_PORT_ID_LBN 0
 #define       EVB_PORT_ID_PORT_ID_WIDTH 32
 
@@ -6486,7 +6659,7 @@
 #define       EVB_VLAN_TAG_MODE_LBN 12
 #define       EVB_VLAN_TAG_MODE_WIDTH 4
 /* enum: Insert the VLAN. */
-#define          EVB_VLAN_TAG_INSERT  0x0
+#define          EVB_VLAN_TAG_INSERT 0x0
 /* enum: Replace the VLAN if already present. */
 #define          EVB_VLAN_TAG_REPLACE 0x1
 
@@ -6515,110 +6688,110 @@
 #define       NVRAM_PARTITION_TYPE_ID_OFST 0
 #define       NVRAM_PARTITION_TYPE_ID_LEN 2
 /* enum: Primary MC firmware partition */
-#define          NVRAM_PARTITION_TYPE_MC_FIRMWARE          0x100
+#define          NVRAM_PARTITION_TYPE_MC_FIRMWARE 0x100
 /* enum: Secondary MC firmware partition */
-#define          NVRAM_PARTITION_TYPE_MC_FIRMWARE_BACKUP   0x200
+#define          NVRAM_PARTITION_TYPE_MC_FIRMWARE_BACKUP 0x200
 /* enum: Expansion ROM partition */
-#define          NVRAM_PARTITION_TYPE_EXPANSION_ROM        0x300
+#define          NVRAM_PARTITION_TYPE_EXPANSION_ROM 0x300
 /* enum: Static configuration TLV partition */
-#define          NVRAM_PARTITION_TYPE_STATIC_CONFIG        0x400
+#define          NVRAM_PARTITION_TYPE_STATIC_CONFIG 0x400
 /* enum: Dynamic configuration TLV partition */
-#define          NVRAM_PARTITION_TYPE_DYNAMIC_CONFIG       0x500
+#define          NVRAM_PARTITION_TYPE_DYNAMIC_CONFIG 0x500
 /* enum: Expansion ROM configuration data for port 0 */
-#define          NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT0  0x600
+#define          NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT0 0x600
 /* enum: Synonym for EXPROM_CONFIG_PORT0 as used in pmap files */
-#define          NVRAM_PARTITION_TYPE_EXPROM_CONFIG        0x600
+#define          NVRAM_PARTITION_TYPE_EXPROM_CONFIG 0x600
 /* enum: Expansion ROM configuration data for port 1 */
-#define          NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT1  0x601
+#define          NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT1 0x601
 /* enum: Expansion ROM configuration data for port 2 */
-#define          NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT2  0x602
+#define          NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT2 0x602
 /* enum: Expansion ROM configuration data for port 3 */
-#define          NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT3  0x603
+#define          NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT3 0x603
 /* enum: Non-volatile log output partition */
-#define          NVRAM_PARTITION_TYPE_LOG                  0x700
+#define          NVRAM_PARTITION_TYPE_LOG 0x700
 /* enum: Non-volatile log output of second core on dual-core device */
-#define          NVRAM_PARTITION_TYPE_LOG_SLAVE            0x701
+#define          NVRAM_PARTITION_TYPE_LOG_SLAVE 0x701
 /* enum: Device state dump output partition */
-#define          NVRAM_PARTITION_TYPE_DUMP                 0x800
+#define          NVRAM_PARTITION_TYPE_DUMP 0x800
 /* enum: Application license key storage partition */
-#define          NVRAM_PARTITION_TYPE_LICENSE              0x900
+#define          NVRAM_PARTITION_TYPE_LICENSE 0x900
 /* enum: Start of range used for PHY partitions (low 8 bits are the PHY ID) */
-#define          NVRAM_PARTITION_TYPE_PHY_MIN              0xa00
+#define          NVRAM_PARTITION_TYPE_PHY_MIN 0xa00
 /* enum: End of range used for PHY partitions (low 8 bits are the PHY ID) */
-#define          NVRAM_PARTITION_TYPE_PHY_MAX              0xaff
+#define          NVRAM_PARTITION_TYPE_PHY_MAX 0xaff
 /* enum: Primary FPGA partition */
-#define          NVRAM_PARTITION_TYPE_FPGA                 0xb00
+#define          NVRAM_PARTITION_TYPE_FPGA 0xb00
 /* enum: Secondary FPGA partition */
-#define          NVRAM_PARTITION_TYPE_FPGA_BACKUP          0xb01
+#define          NVRAM_PARTITION_TYPE_FPGA_BACKUP 0xb01
 /* enum: FC firmware partition */
-#define          NVRAM_PARTITION_TYPE_FC_FIRMWARE          0xb02
+#define          NVRAM_PARTITION_TYPE_FC_FIRMWARE 0xb02
 /* enum: FC License partition */
-#define          NVRAM_PARTITION_TYPE_FC_LICENSE           0xb03
+#define          NVRAM_PARTITION_TYPE_FC_LICENSE 0xb03
 /* enum: Non-volatile log output partition for FC */
-#define          NVRAM_PARTITION_TYPE_FC_LOG               0xb04
+#define          NVRAM_PARTITION_TYPE_FC_LOG 0xb04
 /* enum: MUM firmware partition */
-#define          NVRAM_PARTITION_TYPE_MUM_FIRMWARE         0xc00
+#define          NVRAM_PARTITION_TYPE_MUM_FIRMWARE 0xc00
 /* enum: SUC firmware partition (this is intentionally an alias of
  * MUM_FIRMWARE)
  */
-#define          NVRAM_PARTITION_TYPE_SUC_FIRMWARE         0xc00
+#define          NVRAM_PARTITION_TYPE_SUC_FIRMWARE 0xc00
 /* enum: MUM Non-volatile log output partition. */
-#define          NVRAM_PARTITION_TYPE_MUM_LOG              0xc01
+#define          NVRAM_PARTITION_TYPE_MUM_LOG 0xc01
 /* enum: MUM Application table partition. */
-#define          NVRAM_PARTITION_TYPE_MUM_APPTABLE         0xc02
+#define          NVRAM_PARTITION_TYPE_MUM_APPTABLE 0xc02
 /* enum: MUM boot rom partition. */
-#define          NVRAM_PARTITION_TYPE_MUM_BOOT_ROM         0xc03
+#define          NVRAM_PARTITION_TYPE_MUM_BOOT_ROM 0xc03
 /* enum: MUM production signatures & calibration rom partition. */
-#define          NVRAM_PARTITION_TYPE_MUM_PROD_ROM         0xc04
+#define          NVRAM_PARTITION_TYPE_MUM_PROD_ROM 0xc04
 /* enum: MUM user signatures & calibration rom partition. */
-#define          NVRAM_PARTITION_TYPE_MUM_USER_ROM         0xc05
+#define          NVRAM_PARTITION_TYPE_MUM_USER_ROM 0xc05
 /* enum: MUM fuses and lockbits partition. */
-#define          NVRAM_PARTITION_TYPE_MUM_FUSELOCK         0xc06
+#define          NVRAM_PARTITION_TYPE_MUM_FUSELOCK 0xc06
 /* enum: UEFI expansion ROM if separate from PXE */
-#define          NVRAM_PARTITION_TYPE_EXPANSION_UEFI       0xd00
+#define          NVRAM_PARTITION_TYPE_EXPANSION_UEFI 0xd00
 /* enum: Used by the expansion ROM for logging */
-#define          NVRAM_PARTITION_TYPE_PXE_LOG              0x1000
+#define          NVRAM_PARTITION_TYPE_PXE_LOG 0x1000
 /* enum: Used for XIP code of shmbooted images */
-#define          NVRAM_PARTITION_TYPE_XIP_SCRATCH          0x1100
+#define          NVRAM_PARTITION_TYPE_XIP_SCRATCH 0x1100
 /* enum: Spare partition 2 */
-#define          NVRAM_PARTITION_TYPE_SPARE_2              0x1200
+#define          NVRAM_PARTITION_TYPE_SPARE_2 0x1200
 /* enum: Manufacturing partition. Used during manufacture to pass information
  * between XJTAG and Manftest.
  */
-#define          NVRAM_PARTITION_TYPE_MANUFACTURING        0x1300
+#define          NVRAM_PARTITION_TYPE_MANUFACTURING 0x1300
 /* enum: Spare partition 4 */
-#define          NVRAM_PARTITION_TYPE_SPARE_4              0x1400
+#define          NVRAM_PARTITION_TYPE_SPARE_4 0x1400
 /* enum: Spare partition 5 */
-#define          NVRAM_PARTITION_TYPE_SPARE_5              0x1500
+#define          NVRAM_PARTITION_TYPE_SPARE_5 0x1500
 /* enum: Partition for reporting MC status. See mc_flash_layout.h
  * medford_mc_status_hdr_t for layout on Medford.
  */
-#define          NVRAM_PARTITION_TYPE_STATUS               0x1600
+#define          NVRAM_PARTITION_TYPE_STATUS 0x1600
 /* enum: Spare partition 13 */
-#define          NVRAM_PARTITION_TYPE_SPARE_13              0x1700
+#define          NVRAM_PARTITION_TYPE_SPARE_13 0x1700
 /* enum: Spare partition 14 */
-#define          NVRAM_PARTITION_TYPE_SPARE_14              0x1800
+#define          NVRAM_PARTITION_TYPE_SPARE_14 0x1800
 /* enum: Spare partition 15 */
-#define          NVRAM_PARTITION_TYPE_SPARE_15              0x1900
+#define          NVRAM_PARTITION_TYPE_SPARE_15 0x1900
 /* enum: Spare partition 16 */
-#define          NVRAM_PARTITION_TYPE_SPARE_16              0x1a00
+#define          NVRAM_PARTITION_TYPE_SPARE_16 0x1a00
 /* enum: Factory defaults for dynamic configuration */
-#define          NVRAM_PARTITION_TYPE_DYNCONFIG_DEFAULTS    0x1b00
+#define          NVRAM_PARTITION_TYPE_DYNCONFIG_DEFAULTS 0x1b00
 /* enum: Factory defaults for expansion ROM configuration */
-#define          NVRAM_PARTITION_TYPE_ROMCONFIG_DEFAULTS    0x1c00
+#define          NVRAM_PARTITION_TYPE_ROMCONFIG_DEFAULTS 0x1c00
 /* enum: Field Replaceable Unit inventory information for use on IPMI
  * platforms. See SF-119124-PS. The STATIC_CONFIG partition may contain a
  * subset of the information stored in this partition.
  */
-#define          NVRAM_PARTITION_TYPE_FRU_INFORMATION       0x1d00
+#define          NVRAM_PARTITION_TYPE_FRU_INFORMATION 0x1d00
 /* enum: Start of reserved value range (firmware may use for any purpose) */
-#define          NVRAM_PARTITION_TYPE_RESERVED_VALUES_MIN  0xff00
+#define          NVRAM_PARTITION_TYPE_RESERVED_VALUES_MIN 0xff00
 /* enum: End of reserved value range (firmware may use for any purpose) */
-#define          NVRAM_PARTITION_TYPE_RESERVED_VALUES_MAX  0xfffd
+#define          NVRAM_PARTITION_TYPE_RESERVED_VALUES_MAX 0xfffd
 /* enum: Recovery partition map (provided if real map is missing or corrupt) */
-#define          NVRAM_PARTITION_TYPE_RECOVERY_MAP         0xfffe
+#define          NVRAM_PARTITION_TYPE_RECOVERY_MAP 0xfffe
 /* enum: Partition map (real map as stored in flash) */
-#define          NVRAM_PARTITION_TYPE_PARTITION_MAP        0xffff
+#define          NVRAM_PARTITION_TYPE_PARTITION_MAP 0xffff
 #define       NVRAM_PARTITION_TYPE_ID_LBN 0
 #define       NVRAM_PARTITION_TYPE_ID_WIDTH 16
 
@@ -6627,37 +6800,37 @@
 #define       LICENSED_APP_ID_ID_OFST 0
 #define       LICENSED_APP_ID_ID_LEN 4
 /* enum: OpenOnload */
-#define          LICENSED_APP_ID_ONLOAD                  0x1
+#define          LICENSED_APP_ID_ONLOAD 0x1
 /* enum: PTP timestamping */
-#define          LICENSED_APP_ID_PTP                     0x2
+#define          LICENSED_APP_ID_PTP 0x2
 /* enum: SolarCapture Pro */
-#define          LICENSED_APP_ID_SOLARCAPTURE_PRO        0x4
+#define          LICENSED_APP_ID_SOLARCAPTURE_PRO 0x4
 /* enum: SolarSecure filter engine */
-#define          LICENSED_APP_ID_SOLARSECURE             0x8
+#define          LICENSED_APP_ID_SOLARSECURE 0x8
 /* enum: Performance monitor */
-#define          LICENSED_APP_ID_PERF_MONITOR            0x10
+#define          LICENSED_APP_ID_PERF_MONITOR 0x10
 /* enum: SolarCapture Live */
-#define          LICENSED_APP_ID_SOLARCAPTURE_LIVE       0x20
+#define          LICENSED_APP_ID_SOLARCAPTURE_LIVE 0x20
 /* enum: Capture SolarSystem */
-#define          LICENSED_APP_ID_CAPTURE_SOLARSYSTEM     0x40
+#define          LICENSED_APP_ID_CAPTURE_SOLARSYSTEM 0x40
 /* enum: Network Access Control */
-#define          LICENSED_APP_ID_NETWORK_ACCESS_CONTROL  0x80
+#define          LICENSED_APP_ID_NETWORK_ACCESS_CONTROL 0x80
 /* enum: TCP Direct */
-#define          LICENSED_APP_ID_TCP_DIRECT              0x100
+#define          LICENSED_APP_ID_TCP_DIRECT 0x100
 /* enum: Low Latency */
-#define          LICENSED_APP_ID_LOW_LATENCY             0x200
+#define          LICENSED_APP_ID_LOW_LATENCY 0x200
 /* enum: SolarCapture Tap */
-#define          LICENSED_APP_ID_SOLARCAPTURE_TAP        0x400
+#define          LICENSED_APP_ID_SOLARCAPTURE_TAP 0x400
 /* enum: Capture SolarSystem 40G */
 #define          LICENSED_APP_ID_CAPTURE_SOLARSYSTEM_40G 0x800
 /* enum: Capture SolarSystem 1G */
-#define          LICENSED_APP_ID_CAPTURE_SOLARSYSTEM_1G  0x1000
+#define          LICENSED_APP_ID_CAPTURE_SOLARSYSTEM_1G 0x1000
 /* enum: ScaleOut Onload */
-#define          LICENSED_APP_ID_SCALEOUT_ONLOAD         0x2000
+#define          LICENSED_APP_ID_SCALEOUT_ONLOAD 0x2000
 /* enum: SCS Network Analytics Dashboard */
-#define          LICENSED_APP_ID_DSHBRD                  0x4000
+#define          LICENSED_APP_ID_DSHBRD 0x4000
 /* enum: SolarCapture Trading Analytics */
-#define          LICENSED_APP_ID_SCATRD                  0x8000
+#define          LICENSED_APP_ID_SCATRD 0x8000
 #define       LICENSED_APP_ID_ID_LBN 0
 #define       LICENSED_APP_ID_ID_WIDTH 32
 
@@ -6775,23 +6948,23 @@
 #define       TX_TIMESTAMP_EVENT_TX_EV_TYPE_OFST 3
 #define       TX_TIMESTAMP_EVENT_TX_EV_TYPE_LEN 1
 /* enum: This is a TX completion event, not a timestamp */
-#define          TX_TIMESTAMP_EVENT_TX_EV_COMPLETION  0x0
+#define          TX_TIMESTAMP_EVENT_TX_EV_COMPLETION 0x0
 /* enum: This is a TX completion event for a CTPIO transmit. The event format
  * is the same as for TX_EV_COMPLETION.
  */
-#define          TX_TIMESTAMP_EVENT_TX_EV_CTPIO_COMPLETION  0x11
+#define          TX_TIMESTAMP_EVENT_TX_EV_CTPIO_COMPLETION 0x11
 /* enum: This is the low part of a TX timestamp for a CTPIO transmission. The
  * event format is the same as for TX_EV_TSTAMP_LO
  */
-#define          TX_TIMESTAMP_EVENT_TX_EV_CTPIO_TS_LO  0x12
+#define          TX_TIMESTAMP_EVENT_TX_EV_CTPIO_TS_LO 0x12
 /* enum: This is the high part of a TX timestamp for a CTPIO transmission. The
  * event format is the same as for TX_EV_TSTAMP_HI
  */
-#define          TX_TIMESTAMP_EVENT_TX_EV_CTPIO_TS_HI  0x13
+#define          TX_TIMESTAMP_EVENT_TX_EV_CTPIO_TS_HI 0x13
 /* enum: This is the low part of a TX timestamp event */
-#define          TX_TIMESTAMP_EVENT_TX_EV_TSTAMP_LO  0x51
+#define          TX_TIMESTAMP_EVENT_TX_EV_TSTAMP_LO 0x51
 /* enum: This is the high part of a TX timestamp event */
-#define          TX_TIMESTAMP_EVENT_TX_EV_TSTAMP_HI  0x52
+#define          TX_TIMESTAMP_EVENT_TX_EV_TSTAMP_HI 0x52
 #define       TX_TIMESTAMP_EVENT_TX_EV_TYPE_LBN 24
 #define       TX_TIMESTAMP_EVENT_TX_EV_TYPE_WIDTH 8
 /* upper 16 bits of timestamp data */
@@ -7071,17 +7244,17 @@
 #define       QUEUE_CRC_MODE_MODE_LBN 0
 #define       QUEUE_CRC_MODE_MODE_WIDTH 4
 /* enum: No CRC. */
-#define          QUEUE_CRC_MODE_NONE  0x0
+#define          QUEUE_CRC_MODE_NONE 0x0
 /* enum: CRC Fiber channel over ethernet. */
-#define          QUEUE_CRC_MODE_FCOE  0x1
+#define          QUEUE_CRC_MODE_FCOE 0x1
 /* enum: CRC (digest) iSCSI header only. */
-#define          QUEUE_CRC_MODE_ISCSI_HDR  0x2
+#define          QUEUE_CRC_MODE_ISCSI_HDR 0x2
 /* enum: CRC (digest) iSCSI header and payload. */
-#define          QUEUE_CRC_MODE_ISCSI  0x3
+#define          QUEUE_CRC_MODE_ISCSI 0x3
 /* enum: CRC Fiber channel over IP over ethernet. */
-#define          QUEUE_CRC_MODE_FCOIPOE  0x4
+#define          QUEUE_CRC_MODE_FCOIPOE 0x4
 /* enum: CRC MPA. */
-#define          QUEUE_CRC_MODE_MPA  0x5
+#define          QUEUE_CRC_MODE_MPA 0x5
 #define       QUEUE_CRC_MODE_SPARE_LBN 4
 #define       QUEUE_CRC_MODE_SPARE_WIDTH 4
 
@@ -7157,11 +7330,15 @@
 /* Size, in entries */
 #define       MC_CMD_INIT_RXQ_EXT_IN_SIZE_OFST 0
 #define       MC_CMD_INIT_RXQ_EXT_IN_SIZE_LEN 4
-/* The EVQ to send events to. This is an index originally specified to INIT_EVQ
+/* The EVQ to send events to. This is an index originally specified to
+ * INIT_EVQ. If DMA_MODE == PACKED_STREAM this must be equal to INSTANCE.
  */
 #define       MC_CMD_INIT_RXQ_EXT_IN_TARGET_EVQ_OFST 4
 #define       MC_CMD_INIT_RXQ_EXT_IN_TARGET_EVQ_LEN 4
-/* The value to put in the event data. Check hardware spec. for valid range. */
+/* The value to put in the event data. Check hardware spec. for valid range.
+ * This field is ignored if DMA_MODE == EQUAL_STRIDE_PACKED_STREAM or DMA_MODE
+ * == PACKED_STREAM.
+ */
 #define       MC_CMD_INIT_RXQ_EXT_IN_LABEL_OFST 8
 #define       MC_CMD_INIT_RXQ_EXT_IN_LABEL_LEN 4
 /* Desired instance. Must be set to a specific instance, which is a function
@@ -7189,18 +7366,25 @@
 #define        MC_CMD_INIT_RXQ_EXT_IN_DMA_MODE_LBN 10
 #define        MC_CMD_INIT_RXQ_EXT_IN_DMA_MODE_WIDTH 4
 /* enum: One packet per descriptor (for normal networking) */
-#define          MC_CMD_INIT_RXQ_EXT_IN_SINGLE_PACKET  0x0
+#define          MC_CMD_INIT_RXQ_EXT_IN_SINGLE_PACKET 0x0
 /* enum: Pack multiple packets into large descriptors (for SolarCapture) */
-#define          MC_CMD_INIT_RXQ_EXT_IN_PACKED_STREAM  0x1
+#define          MC_CMD_INIT_RXQ_EXT_IN_PACKED_STREAM 0x1
+/* enum: Pack multiple packets into large descriptors using the format designed
+ * to maximise packet rate. This mode uses 1 "bucket" per descriptor with
+ * multiple fixed-size packet buffers within each bucket. For a full
+ * description see SF-119419-TC. This mode is only supported by "dpdk" datapath
+ * firmware.
+ */
+#define          MC_CMD_INIT_RXQ_EXT_IN_EQUAL_STRIDE_PACKED_STREAM 0x2
 #define        MC_CMD_INIT_RXQ_EXT_IN_FLAG_SNAPSHOT_MODE_LBN 14
 #define        MC_CMD_INIT_RXQ_EXT_IN_FLAG_SNAPSHOT_MODE_WIDTH 1
 #define        MC_CMD_INIT_RXQ_EXT_IN_PACKED_STREAM_BUFF_SIZE_LBN 15
 #define        MC_CMD_INIT_RXQ_EXT_IN_PACKED_STREAM_BUFF_SIZE_WIDTH 3
-#define          MC_CMD_INIT_RXQ_EXT_IN_PS_BUFF_1M  0x0 /* enum */
-#define          MC_CMD_INIT_RXQ_EXT_IN_PS_BUFF_512K  0x1 /* enum */
-#define          MC_CMD_INIT_RXQ_EXT_IN_PS_BUFF_256K  0x2 /* enum */
-#define          MC_CMD_INIT_RXQ_EXT_IN_PS_BUFF_128K  0x3 /* enum */
-#define          MC_CMD_INIT_RXQ_EXT_IN_PS_BUFF_64K  0x4 /* enum */
+#define          MC_CMD_INIT_RXQ_EXT_IN_PS_BUFF_1M 0x0 /* enum */
+#define          MC_CMD_INIT_RXQ_EXT_IN_PS_BUFF_512K 0x1 /* enum */
+#define          MC_CMD_INIT_RXQ_EXT_IN_PS_BUFF_256K 0x2 /* enum */
+#define          MC_CMD_INIT_RXQ_EXT_IN_PS_BUFF_128K 0x3 /* enum */
+#define          MC_CMD_INIT_RXQ_EXT_IN_PS_BUFF_64K 0x4 /* enum */
 #define        MC_CMD_INIT_RXQ_EXT_IN_FLAG_WANT_OUTER_CLASSES_LBN 18
 #define        MC_CMD_INIT_RXQ_EXT_IN_FLAG_WANT_OUTER_CLASSES_WIDTH 1
 #define        MC_CMD_INIT_RXQ_EXT_IN_FLAG_FORCE_EV_MERGING_LBN 19
@@ -7221,12 +7405,122 @@
 #define       MC_CMD_INIT_RXQ_EXT_IN_SNAPSHOT_LENGTH_OFST 540
 #define       MC_CMD_INIT_RXQ_EXT_IN_SNAPSHOT_LENGTH_LEN 4
 
+/* MC_CMD_INIT_RXQ_V3_IN msgrequest */
+#define    MC_CMD_INIT_RXQ_V3_IN_LEN 560
+/* Size, in entries */
+#define       MC_CMD_INIT_RXQ_V3_IN_SIZE_OFST 0
+#define       MC_CMD_INIT_RXQ_V3_IN_SIZE_LEN 4
+/* The EVQ to send events to. This is an index originally specified to
+ * INIT_EVQ. If DMA_MODE == PACKED_STREAM this must be equal to INSTANCE.
+ */
+#define       MC_CMD_INIT_RXQ_V3_IN_TARGET_EVQ_OFST 4
+#define       MC_CMD_INIT_RXQ_V3_IN_TARGET_EVQ_LEN 4
+/* The value to put in the event data. Check hardware spec. for valid range.
+ * This field is ignored if DMA_MODE == EQUAL_STRIDE_PACKED_STREAM or DMA_MODE
+ * == PACKED_STREAM.
+ */
+#define       MC_CMD_INIT_RXQ_V3_IN_LABEL_OFST 8
+#define       MC_CMD_INIT_RXQ_V3_IN_LABEL_LEN 4
+/* Desired instance. Must be set to a specific instance, which is a function
+ * local queue index.
+ */
+#define       MC_CMD_INIT_RXQ_V3_IN_INSTANCE_OFST 12
+#define       MC_CMD_INIT_RXQ_V3_IN_INSTANCE_LEN 4
+/* There will be more flags here. */
+#define       MC_CMD_INIT_RXQ_V3_IN_FLAGS_OFST 16
+#define       MC_CMD_INIT_RXQ_V3_IN_FLAGS_LEN 4
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_BUFF_MODE_LBN 0
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_BUFF_MODE_WIDTH 1
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_HDR_SPLIT_LBN 1
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_HDR_SPLIT_WIDTH 1
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_TIMESTAMP_LBN 2
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_TIMESTAMP_WIDTH 1
+#define        MC_CMD_INIT_RXQ_V3_IN_CRC_MODE_LBN 3
+#define        MC_CMD_INIT_RXQ_V3_IN_CRC_MODE_WIDTH 4
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_CHAIN_LBN 7
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_CHAIN_WIDTH 1
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_PREFIX_LBN 8
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_PREFIX_WIDTH 1
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_DISABLE_SCATTER_LBN 9
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_DISABLE_SCATTER_WIDTH 1
+#define        MC_CMD_INIT_RXQ_V3_IN_DMA_MODE_LBN 10
+#define        MC_CMD_INIT_RXQ_V3_IN_DMA_MODE_WIDTH 4
+/* enum: One packet per descriptor (for normal networking) */
+#define          MC_CMD_INIT_RXQ_V3_IN_SINGLE_PACKET 0x0
+/* enum: Pack multiple packets into large descriptors (for SolarCapture) */
+#define          MC_CMD_INIT_RXQ_V3_IN_PACKED_STREAM 0x1
+/* enum: Pack multiple packets into large descriptors using the format designed
+ * to maximise packet rate. This mode uses 1 "bucket" per descriptor with
+ * multiple fixed-size packet buffers within each bucket. For a full
+ * description see SF-119419-TC. This mode is only supported by "dpdk" datapath
+ * firmware.
+ */
+#define          MC_CMD_INIT_RXQ_V3_IN_EQUAL_STRIDE_PACKED_STREAM 0x2
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_SNAPSHOT_MODE_LBN 14
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_SNAPSHOT_MODE_WIDTH 1
+#define        MC_CMD_INIT_RXQ_V3_IN_PACKED_STREAM_BUFF_SIZE_LBN 15
+#define        MC_CMD_INIT_RXQ_V3_IN_PACKED_STREAM_BUFF_SIZE_WIDTH 3
+#define          MC_CMD_INIT_RXQ_V3_IN_PS_BUFF_1M 0x0 /* enum */
+#define          MC_CMD_INIT_RXQ_V3_IN_PS_BUFF_512K 0x1 /* enum */
+#define          MC_CMD_INIT_RXQ_V3_IN_PS_BUFF_256K 0x2 /* enum */
+#define          MC_CMD_INIT_RXQ_V3_IN_PS_BUFF_128K 0x3 /* enum */
+#define          MC_CMD_INIT_RXQ_V3_IN_PS_BUFF_64K 0x4 /* enum */
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_WANT_OUTER_CLASSES_LBN 18
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_WANT_OUTER_CLASSES_WIDTH 1
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_FORCE_EV_MERGING_LBN 19
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_FORCE_EV_MERGING_WIDTH 1
+/* Owner ID to use if in buffer mode (zero if physical) */
+#define       MC_CMD_INIT_RXQ_V3_IN_OWNER_ID_OFST 20
+#define       MC_CMD_INIT_RXQ_V3_IN_OWNER_ID_LEN 4
+/* The port ID associated with the v-adaptor which should contain this DMAQ. */
+#define       MC_CMD_INIT_RXQ_V3_IN_PORT_ID_OFST 24
+#define       MC_CMD_INIT_RXQ_V3_IN_PORT_ID_LEN 4
+/* 64-bit address of 4k of 4k-aligned host memory buffer */
+#define       MC_CMD_INIT_RXQ_V3_IN_DMA_ADDR_OFST 28
+#define       MC_CMD_INIT_RXQ_V3_IN_DMA_ADDR_LEN 8
+#define       MC_CMD_INIT_RXQ_V3_IN_DMA_ADDR_LO_OFST 28
+#define       MC_CMD_INIT_RXQ_V3_IN_DMA_ADDR_HI_OFST 32
+#define       MC_CMD_INIT_RXQ_V3_IN_DMA_ADDR_NUM 64
+/* Maximum length of packet to receive, if SNAPSHOT_MODE flag is set */
+#define       MC_CMD_INIT_RXQ_V3_IN_SNAPSHOT_LENGTH_OFST 540
+#define       MC_CMD_INIT_RXQ_V3_IN_SNAPSHOT_LENGTH_LEN 4
+/* The number of packet buffers that will be contained within each
+ * EQUAL_STRIDE_PACKED_STREAM format bucket supplied by the driver. This field
+ * is ignored unless DMA_MODE == EQUAL_STRIDE_PACKED_STREAM.
+ */
+#define       MC_CMD_INIT_RXQ_V3_IN_ES_PACKET_BUFFERS_PER_BUCKET_OFST 544
+#define       MC_CMD_INIT_RXQ_V3_IN_ES_PACKET_BUFFERS_PER_BUCKET_LEN 4
+/* The length in bytes of the area in each packet buffer that can be written to
+ * by the adapter. This is used to store the packet prefix and the packet
+ * payload. This length does not include any end padding added by the driver.
+ * This field is ignored unless DMA_MODE == EQUAL_STRIDE_PACKED_STREAM.
+ */
+#define       MC_CMD_INIT_RXQ_V3_IN_ES_MAX_DMA_LEN_OFST 548
+#define       MC_CMD_INIT_RXQ_V3_IN_ES_MAX_DMA_LEN_LEN 4
+/* The length in bytes of a single packet buffer within a
+ * EQUAL_STRIDE_PACKED_STREAM format bucket. This field is ignored unless
+ * DMA_MODE == EQUAL_STRIDE_PACKED_STREAM.
+ */
+#define       MC_CMD_INIT_RXQ_V3_IN_ES_PACKET_STRIDE_OFST 552
+#define       MC_CMD_INIT_RXQ_V3_IN_ES_PACKET_STRIDE_LEN 4
+/* The maximum time in nanoseconds that the datapath will be backpressured if
+ * there are no RX descriptors available. If the timeout is reached and there
+ * are still no descriptors then the packet will be dropped. A timeout of 0
+ * means the datapath will never be blocked. This field is ignored unless
+ * DMA_MODE == EQUAL_STRIDE_PACKED_STREAM.
+ */
+#define       MC_CMD_INIT_RXQ_V3_IN_ES_HEAD_OF_LINE_BLOCK_TIMEOUT_OFST 556
+#define       MC_CMD_INIT_RXQ_V3_IN_ES_HEAD_OF_LINE_BLOCK_TIMEOUT_LEN 4
+
 /* MC_CMD_INIT_RXQ_OUT msgresponse */
 #define    MC_CMD_INIT_RXQ_OUT_LEN 0
 
 /* MC_CMD_INIT_RXQ_EXT_OUT msgresponse */
 #define    MC_CMD_INIT_RXQ_EXT_OUT_LEN 0
 
+/* MC_CMD_INIT_RXQ_V3_OUT msgresponse */
+#define    MC_CMD_INIT_RXQ_V3_OUT_LEN 0
+
 
 /***********************************/
 /* MC_CMD_INIT_TXQ
@@ -7466,7 +7760,7 @@
 #define        MC_CMD_PROXY_CMD_IN_TARGET_PF_WIDTH 16
 #define        MC_CMD_PROXY_CMD_IN_TARGET_VF_LBN 16
 #define        MC_CMD_PROXY_CMD_IN_TARGET_VF_WIDTH 16
-#define          MC_CMD_PROXY_CMD_IN_VF_NULL  0xffff /* enum */
+#define          MC_CMD_PROXY_CMD_IN_VF_NULL 0xffff /* enum */
 
 /* MC_CMD_PROXY_CMD_OUT msgresponse */
 #define    MC_CMD_PROXY_CMD_OUT_LEN 0
@@ -7479,7 +7773,7 @@
 #define       MC_PROXY_STATUS_BUFFER_HANDLE_OFST 0
 #define       MC_PROXY_STATUS_BUFFER_HANDLE_LEN 4
 /* enum: An invalid handle. */
-#define          MC_PROXY_STATUS_BUFFER_HANDLE_INVALID  0x0
+#define          MC_PROXY_STATUS_BUFFER_HANDLE_INVALID 0x0
 #define       MC_PROXY_STATUS_BUFFER_HANDLE_LBN 0
 #define       MC_PROXY_STATUS_BUFFER_HANDLE_WIDTH 32
 /* The requesting physical function number */
@@ -7748,17 +8042,17 @@
 #define       MC_CMD_FILTER_OP_IN_OP_OFST 0
 #define       MC_CMD_FILTER_OP_IN_OP_LEN 4
 /* enum: single-recipient filter insert */
-#define          MC_CMD_FILTER_OP_IN_OP_INSERT  0x0
+#define          MC_CMD_FILTER_OP_IN_OP_INSERT 0x0
 /* enum: single-recipient filter remove */
-#define          MC_CMD_FILTER_OP_IN_OP_REMOVE  0x1
+#define          MC_CMD_FILTER_OP_IN_OP_REMOVE 0x1
 /* enum: multi-recipient filter subscribe */
-#define          MC_CMD_FILTER_OP_IN_OP_SUBSCRIBE  0x2
+#define          MC_CMD_FILTER_OP_IN_OP_SUBSCRIBE 0x2
 /* enum: multi-recipient filter unsubscribe */
-#define          MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE  0x3
+#define          MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE 0x3
 /* enum: replace one recipient with another (warning - the filter handle may
  * change)
  */
-#define          MC_CMD_FILTER_OP_IN_OP_REPLACE  0x4
+#define          MC_CMD_FILTER_OP_IN_OP_REPLACE 0x4
 /* filter handle (for remove / unsubscribe operations) */
 #define       MC_CMD_FILTER_OP_IN_HANDLE_OFST 4
 #define       MC_CMD_FILTER_OP_IN_HANDLE_LEN 8
@@ -7803,15 +8097,15 @@
 #define       MC_CMD_FILTER_OP_IN_RX_DEST_OFST 20
 #define       MC_CMD_FILTER_OP_IN_RX_DEST_LEN 4
 /* enum: drop packets */
-#define          MC_CMD_FILTER_OP_IN_RX_DEST_DROP  0x0
+#define          MC_CMD_FILTER_OP_IN_RX_DEST_DROP 0x0
 /* enum: receive to host */
-#define          MC_CMD_FILTER_OP_IN_RX_DEST_HOST  0x1
+#define          MC_CMD_FILTER_OP_IN_RX_DEST_HOST 0x1
 /* enum: receive to MC */
-#define          MC_CMD_FILTER_OP_IN_RX_DEST_MC  0x2
+#define          MC_CMD_FILTER_OP_IN_RX_DEST_MC 0x2
 /* enum: loop back to TXDP 0 */
-#define          MC_CMD_FILTER_OP_IN_RX_DEST_TX0  0x3
+#define          MC_CMD_FILTER_OP_IN_RX_DEST_TX0 0x3
 /* enum: loop back to TXDP 1 */
-#define          MC_CMD_FILTER_OP_IN_RX_DEST_TX1  0x4
+#define          MC_CMD_FILTER_OP_IN_RX_DEST_TX1 0x4
 /* receive queue handle (for multiple queue modes, this is the base queue) */
 #define       MC_CMD_FILTER_OP_IN_RX_QUEUE_OFST 24
 #define       MC_CMD_FILTER_OP_IN_RX_QUEUE_LEN 4
@@ -7819,14 +8113,14 @@
 #define       MC_CMD_FILTER_OP_IN_RX_MODE_OFST 28
 #define       MC_CMD_FILTER_OP_IN_RX_MODE_LEN 4
 /* enum: receive to just the specified queue */
-#define          MC_CMD_FILTER_OP_IN_RX_MODE_SIMPLE  0x0
+#define          MC_CMD_FILTER_OP_IN_RX_MODE_SIMPLE 0x0
 /* enum: receive to multiple queues using RSS context */
-#define          MC_CMD_FILTER_OP_IN_RX_MODE_RSS  0x1
+#define          MC_CMD_FILTER_OP_IN_RX_MODE_RSS 0x1
 /* enum: receive to multiple queues using .1p mapping */
-#define          MC_CMD_FILTER_OP_IN_RX_MODE_DOT1P_MAPPING  0x2
+#define          MC_CMD_FILTER_OP_IN_RX_MODE_DOT1P_MAPPING 0x2
 /* enum: install a filter entry that will never match; for test purposes only
  */
-#define          MC_CMD_FILTER_OP_IN_RX_MODE_TEST_NEVER_MATCH  0x80000000
+#define          MC_CMD_FILTER_OP_IN_RX_MODE_TEST_NEVER_MATCH 0x80000000
 /* RSS context (for RX_MODE_RSS) or .1p mapping handle (for
  * RX_MODE_DOT1P_MAPPING), as returned by MC_CMD_RSS_CONTEXT_ALLOC or
  * MC_CMD_DOT1P_MAPPING_ALLOC.
@@ -7843,7 +8137,7 @@
 #define       MC_CMD_FILTER_OP_IN_TX_DEST_OFST 40
 #define       MC_CMD_FILTER_OP_IN_TX_DEST_LEN 4
 /* enum: request default behaviour (based on filter type) */
-#define          MC_CMD_FILTER_OP_IN_TX_DEST_DEFAULT  0xffffffff
+#define          MC_CMD_FILTER_OP_IN_TX_DEST_DEFAULT 0xffffffff
 #define        MC_CMD_FILTER_OP_IN_TX_DEST_MAC_LBN 0
 #define        MC_CMD_FILTER_OP_IN_TX_DEST_MAC_WIDTH 1
 #define        MC_CMD_FILTER_OP_IN_TX_DEST_PM_LBN 1
@@ -7971,15 +8265,15 @@
 #define       MC_CMD_FILTER_OP_EXT_IN_RX_DEST_OFST 20
 #define       MC_CMD_FILTER_OP_EXT_IN_RX_DEST_LEN 4
 /* enum: drop packets */
-#define          MC_CMD_FILTER_OP_EXT_IN_RX_DEST_DROP  0x0
+#define          MC_CMD_FILTER_OP_EXT_IN_RX_DEST_DROP 0x0
 /* enum: receive to host */
-#define          MC_CMD_FILTER_OP_EXT_IN_RX_DEST_HOST  0x1
+#define          MC_CMD_FILTER_OP_EXT_IN_RX_DEST_HOST 0x1
 /* enum: receive to MC */
-#define          MC_CMD_FILTER_OP_EXT_IN_RX_DEST_MC  0x2
+#define          MC_CMD_FILTER_OP_EXT_IN_RX_DEST_MC 0x2
 /* enum: loop back to TXDP 0 */
-#define          MC_CMD_FILTER_OP_EXT_IN_RX_DEST_TX0  0x3
+#define          MC_CMD_FILTER_OP_EXT_IN_RX_DEST_TX0 0x3
 /* enum: loop back to TXDP 1 */
-#define          MC_CMD_FILTER_OP_EXT_IN_RX_DEST_TX1  0x4
+#define          MC_CMD_FILTER_OP_EXT_IN_RX_DEST_TX1 0x4
 /* receive queue handle (for multiple queue modes, this is the base queue) */
 #define       MC_CMD_FILTER_OP_EXT_IN_RX_QUEUE_OFST 24
 #define       MC_CMD_FILTER_OP_EXT_IN_RX_QUEUE_LEN 4
@@ -7987,14 +8281,14 @@
 #define       MC_CMD_FILTER_OP_EXT_IN_RX_MODE_OFST 28
 #define       MC_CMD_FILTER_OP_EXT_IN_RX_MODE_LEN 4
 /* enum: receive to just the specified queue */
-#define          MC_CMD_FILTER_OP_EXT_IN_RX_MODE_SIMPLE  0x0
+#define          MC_CMD_FILTER_OP_EXT_IN_RX_MODE_SIMPLE 0x0
 /* enum: receive to multiple queues using RSS context */
-#define          MC_CMD_FILTER_OP_EXT_IN_RX_MODE_RSS  0x1
+#define          MC_CMD_FILTER_OP_EXT_IN_RX_MODE_RSS 0x1
 /* enum: receive to multiple queues using .1p mapping */
-#define          MC_CMD_FILTER_OP_EXT_IN_RX_MODE_DOT1P_MAPPING  0x2
+#define          MC_CMD_FILTER_OP_EXT_IN_RX_MODE_DOT1P_MAPPING 0x2
 /* enum: install a filter entry that will never match; for test purposes only
  */
-#define          MC_CMD_FILTER_OP_EXT_IN_RX_MODE_TEST_NEVER_MATCH  0x80000000
+#define          MC_CMD_FILTER_OP_EXT_IN_RX_MODE_TEST_NEVER_MATCH 0x80000000
 /* RSS context (for RX_MODE_RSS) or .1p mapping handle (for
  * RX_MODE_DOT1P_MAPPING), as returned by MC_CMD_RSS_CONTEXT_ALLOC or
  * MC_CMD_DOT1P_MAPPING_ALLOC.
@@ -8011,7 +8305,7 @@
 #define       MC_CMD_FILTER_OP_EXT_IN_TX_DEST_OFST 40
 #define       MC_CMD_FILTER_OP_EXT_IN_TX_DEST_LEN 4
 /* enum: request default behaviour (based on filter type) */
-#define          MC_CMD_FILTER_OP_EXT_IN_TX_DEST_DEFAULT  0xffffffff
+#define          MC_CMD_FILTER_OP_EXT_IN_TX_DEST_DEFAULT 0xffffffff
 #define        MC_CMD_FILTER_OP_EXT_IN_TX_DEST_MAC_LBN 0
 #define        MC_CMD_FILTER_OP_EXT_IN_TX_DEST_MAC_WIDTH 1
 #define        MC_CMD_FILTER_OP_EXT_IN_TX_DEST_PM_LBN 1
@@ -8054,17 +8348,17 @@
 #define        MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_LBN 24
 #define        MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_WIDTH 8
 /* enum: Match VXLAN traffic with this VNI */
-#define          MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_VXLAN  0x0
+#define          MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_VXLAN 0x0
 /* enum: Match Geneve traffic with this VNI */
-#define          MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_GENEVE  0x1
+#define          MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_GENEVE 0x1
 /* enum: Reserved for experimental development use */
-#define          MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_EXPERIMENTAL  0xfe
+#define          MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_EXPERIMENTAL 0xfe
 #define        MC_CMD_FILTER_OP_EXT_IN_VSID_VALUE_LBN 0
 #define        MC_CMD_FILTER_OP_EXT_IN_VSID_VALUE_WIDTH 24
 #define        MC_CMD_FILTER_OP_EXT_IN_VSID_TYPE_LBN 24
 #define        MC_CMD_FILTER_OP_EXT_IN_VSID_TYPE_WIDTH 8
 /* enum: Match NVGRE traffic with this VSID */
-#define          MC_CMD_FILTER_OP_EXT_IN_VSID_TYPE_NVGRE  0x0
+#define          MC_CMD_FILTER_OP_EXT_IN_VSID_TYPE_NVGRE 0x0
 /* source IP address to match (as bytes in network order; set last 12 bytes to
  * 0 for IPv4 address)
  */
@@ -8131,6 +8425,273 @@
 #define       MC_CMD_FILTER_OP_EXT_IN_IFRM_DST_IP_OFST 156
 #define       MC_CMD_FILTER_OP_EXT_IN_IFRM_DST_IP_LEN 16
 
+/* MC_CMD_FILTER_OP_V3_IN msgrequest: FILTER_OP extension to support additional
+ * filter actions for Intel's DPDK (Data Plane Development Kit, dpdk.org) via
+ * its rte_flow API. This extension is only useful with the sfc_efx driver
+ * included as part of DPDK, used in conjunction with the dpdk datapath
+ * firmware variant.
+ */
+#define    MC_CMD_FILTER_OP_V3_IN_LEN 180
+/* identifies the type of operation requested */
+#define       MC_CMD_FILTER_OP_V3_IN_OP_OFST 0
+#define       MC_CMD_FILTER_OP_V3_IN_OP_LEN 4
+/*            Enum values, see field(s): */
+/*               MC_CMD_FILTER_OP_IN/OP */
+/* filter handle (for remove / unsubscribe operations) */
+#define       MC_CMD_FILTER_OP_V3_IN_HANDLE_OFST 4
+#define       MC_CMD_FILTER_OP_V3_IN_HANDLE_LEN 8
+#define       MC_CMD_FILTER_OP_V3_IN_HANDLE_LO_OFST 4
+#define       MC_CMD_FILTER_OP_V3_IN_HANDLE_HI_OFST 8
+/* The port ID associated with the v-adaptor which should contain this filter.
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_PORT_ID_OFST 12
+#define       MC_CMD_FILTER_OP_V3_IN_PORT_ID_LEN 4
+/* fields to include in match criteria */
+#define       MC_CMD_FILTER_OP_V3_IN_MATCH_FIELDS_OFST 16
+#define       MC_CMD_FILTER_OP_V3_IN_MATCH_FIELDS_LEN 4
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_SRC_IP_LBN 0
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_SRC_IP_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_DST_IP_LBN 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_DST_IP_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_SRC_MAC_LBN 2
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_SRC_MAC_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_SRC_PORT_LBN 3
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_SRC_PORT_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_DST_MAC_LBN 4
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_DST_MAC_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_DST_PORT_LBN 5
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_DST_PORT_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_ETHER_TYPE_LBN 6
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_ETHER_TYPE_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_INNER_VLAN_LBN 7
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_INNER_VLAN_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_OUTER_VLAN_LBN 8
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_OUTER_VLAN_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IP_PROTO_LBN 9
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IP_PROTO_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_FWDEF0_LBN 10
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_FWDEF0_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_VNI_OR_VSID_LBN 11
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_VNI_OR_VSID_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_SRC_IP_LBN 12
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_SRC_IP_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_DST_IP_LBN 13
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_DST_IP_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_SRC_MAC_LBN 14
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_SRC_MAC_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_SRC_PORT_LBN 15
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_SRC_PORT_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_DST_MAC_LBN 16
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_DST_MAC_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_DST_PORT_LBN 17
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_DST_PORT_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_ETHER_TYPE_LBN 18
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_ETHER_TYPE_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_INNER_VLAN_LBN 19
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_INNER_VLAN_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_OUTER_VLAN_LBN 20
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_OUTER_VLAN_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_IP_PROTO_LBN 21
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_IP_PROTO_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_FWDEF0_LBN 22
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_FWDEF0_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_FWDEF1_LBN 23
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_FWDEF1_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_UNKNOWN_MCAST_DST_LBN 24
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_UNKNOWN_MCAST_DST_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_UNKNOWN_UCAST_DST_LBN 25
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_UNKNOWN_UCAST_DST_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_UNKNOWN_MCAST_DST_LBN 30
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_UNKNOWN_MCAST_DST_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_UNKNOWN_UCAST_DST_LBN 31
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_UNKNOWN_UCAST_DST_WIDTH 1
+/* receive destination */
+#define       MC_CMD_FILTER_OP_V3_IN_RX_DEST_OFST 20
+#define       MC_CMD_FILTER_OP_V3_IN_RX_DEST_LEN 4
+/* enum: drop packets */
+#define          MC_CMD_FILTER_OP_V3_IN_RX_DEST_DROP 0x0
+/* enum: receive to host */
+#define          MC_CMD_FILTER_OP_V3_IN_RX_DEST_HOST 0x1
+/* enum: receive to MC */
+#define          MC_CMD_FILTER_OP_V3_IN_RX_DEST_MC 0x2
+/* enum: loop back to TXDP 0 */
+#define          MC_CMD_FILTER_OP_V3_IN_RX_DEST_TX0 0x3
+/* enum: loop back to TXDP 1 */
+#define          MC_CMD_FILTER_OP_V3_IN_RX_DEST_TX1 0x4
+/* receive queue handle (for multiple queue modes, this is the base queue) */
+#define       MC_CMD_FILTER_OP_V3_IN_RX_QUEUE_OFST 24
+#define       MC_CMD_FILTER_OP_V3_IN_RX_QUEUE_LEN 4
+/* receive mode */
+#define       MC_CMD_FILTER_OP_V3_IN_RX_MODE_OFST 28
+#define       MC_CMD_FILTER_OP_V3_IN_RX_MODE_LEN 4
+/* enum: receive to just the specified queue */
+#define          MC_CMD_FILTER_OP_V3_IN_RX_MODE_SIMPLE 0x0
+/* enum: receive to multiple queues using RSS context */
+#define          MC_CMD_FILTER_OP_V3_IN_RX_MODE_RSS 0x1
+/* enum: receive to multiple queues using .1p mapping */
+#define          MC_CMD_FILTER_OP_V3_IN_RX_MODE_DOT1P_MAPPING 0x2
+/* enum: install a filter entry that will never match; for test purposes only
+ */
+#define          MC_CMD_FILTER_OP_V3_IN_RX_MODE_TEST_NEVER_MATCH 0x80000000
+/* RSS context (for RX_MODE_RSS) or .1p mapping handle (for
+ * RX_MODE_DOT1P_MAPPING), as returned by MC_CMD_RSS_CONTEXT_ALLOC or
+ * MC_CMD_DOT1P_MAPPING_ALLOC.
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_RX_CONTEXT_OFST 32
+#define       MC_CMD_FILTER_OP_V3_IN_RX_CONTEXT_LEN 4
+/* transmit domain (reserved; set to 0) */
+#define       MC_CMD_FILTER_OP_V3_IN_TX_DOMAIN_OFST 36
+#define       MC_CMD_FILTER_OP_V3_IN_TX_DOMAIN_LEN 4
+/* transmit destination (either set the MAC and/or PM bits for explicit
+ * control, or set this field to TX_DEST_DEFAULT for sensible default
+ * behaviour)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_TX_DEST_OFST 40
+#define       MC_CMD_FILTER_OP_V3_IN_TX_DEST_LEN 4
+/* enum: request default behaviour (based on filter type) */
+#define          MC_CMD_FILTER_OP_V3_IN_TX_DEST_DEFAULT 0xffffffff
+#define        MC_CMD_FILTER_OP_V3_IN_TX_DEST_MAC_LBN 0
+#define        MC_CMD_FILTER_OP_V3_IN_TX_DEST_MAC_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_TX_DEST_PM_LBN 1
+#define        MC_CMD_FILTER_OP_V3_IN_TX_DEST_PM_WIDTH 1
+/* source MAC address to match (as bytes in network order) */
+#define       MC_CMD_FILTER_OP_V3_IN_SRC_MAC_OFST 44
+#define       MC_CMD_FILTER_OP_V3_IN_SRC_MAC_LEN 6
+/* source port to match (as bytes in network order) */
+#define       MC_CMD_FILTER_OP_V3_IN_SRC_PORT_OFST 50
+#define       MC_CMD_FILTER_OP_V3_IN_SRC_PORT_LEN 2
+/* destination MAC address to match (as bytes in network order) */
+#define       MC_CMD_FILTER_OP_V3_IN_DST_MAC_OFST 52
+#define       MC_CMD_FILTER_OP_V3_IN_DST_MAC_LEN 6
+/* destination port to match (as bytes in network order) */
+#define       MC_CMD_FILTER_OP_V3_IN_DST_PORT_OFST 58
+#define       MC_CMD_FILTER_OP_V3_IN_DST_PORT_LEN 2
+/* Ethernet type to match (as bytes in network order) */
+#define       MC_CMD_FILTER_OP_V3_IN_ETHER_TYPE_OFST 60
+#define       MC_CMD_FILTER_OP_V3_IN_ETHER_TYPE_LEN 2
+/* Inner VLAN tag to match (as bytes in network order) */
+#define       MC_CMD_FILTER_OP_V3_IN_INNER_VLAN_OFST 62
+#define       MC_CMD_FILTER_OP_V3_IN_INNER_VLAN_LEN 2
+/* Outer VLAN tag to match (as bytes in network order) */
+#define       MC_CMD_FILTER_OP_V3_IN_OUTER_VLAN_OFST 64
+#define       MC_CMD_FILTER_OP_V3_IN_OUTER_VLAN_LEN 2
+/* IP protocol to match (in low byte; set high byte to 0) */
+#define       MC_CMD_FILTER_OP_V3_IN_IP_PROTO_OFST 66
+#define       MC_CMD_FILTER_OP_V3_IN_IP_PROTO_LEN 2
+/* Firmware defined register 0 to match (reserved; set to 0) */
+#define       MC_CMD_FILTER_OP_V3_IN_FWDEF0_OFST 68
+#define       MC_CMD_FILTER_OP_V3_IN_FWDEF0_LEN 4
+/* VNI (for VXLAN/Geneve, when IP protocol is UDP) or VSID (for NVGRE, when IP
+ * protocol is GRE) to match (as bytes in network order; set last byte to 0 for
+ * VXLAN/NVGRE, or 1 for Geneve)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_VNI_OR_VSID_OFST 72
+#define       MC_CMD_FILTER_OP_V3_IN_VNI_OR_VSID_LEN 4
+#define        MC_CMD_FILTER_OP_V3_IN_VNI_VALUE_LBN 0
+#define        MC_CMD_FILTER_OP_V3_IN_VNI_VALUE_WIDTH 24
+#define        MC_CMD_FILTER_OP_V3_IN_VNI_TYPE_LBN 24
+#define        MC_CMD_FILTER_OP_V3_IN_VNI_TYPE_WIDTH 8
+/* enum: Match VXLAN traffic with this VNI */
+#define          MC_CMD_FILTER_OP_V3_IN_VNI_TYPE_VXLAN 0x0
+/* enum: Match Geneve traffic with this VNI */
+#define          MC_CMD_FILTER_OP_V3_IN_VNI_TYPE_GENEVE 0x1
+/* enum: Reserved for experimental development use */
+#define          MC_CMD_FILTER_OP_V3_IN_VNI_TYPE_EXPERIMENTAL 0xfe
+#define        MC_CMD_FILTER_OP_V3_IN_VSID_VALUE_LBN 0
+#define        MC_CMD_FILTER_OP_V3_IN_VSID_VALUE_WIDTH 24
+#define        MC_CMD_FILTER_OP_V3_IN_VSID_TYPE_LBN 24
+#define        MC_CMD_FILTER_OP_V3_IN_VSID_TYPE_WIDTH 8
+/* enum: Match NVGRE traffic with this VSID */
+#define          MC_CMD_FILTER_OP_V3_IN_VSID_TYPE_NVGRE 0x0
+/* source IP address to match (as bytes in network order; set last 12 bytes to
+ * 0 for IPv4 address)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_SRC_IP_OFST 76
+#define       MC_CMD_FILTER_OP_V3_IN_SRC_IP_LEN 16
+/* destination IP address to match (as bytes in network order; set last 12
+ * bytes to 0 for IPv4 address)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_DST_IP_OFST 92
+#define       MC_CMD_FILTER_OP_V3_IN_DST_IP_LEN 16
+/* VXLAN/NVGRE inner frame source MAC address to match (as bytes in network
+ * order)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_SRC_MAC_OFST 108
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_SRC_MAC_LEN 6
+/* VXLAN/NVGRE inner frame source port to match (as bytes in network order) */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_SRC_PORT_OFST 114
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_SRC_PORT_LEN 2
+/* VXLAN/NVGRE inner frame destination MAC address to match (as bytes in
+ * network order)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_DST_MAC_OFST 116
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_DST_MAC_LEN 6
+/* VXLAN/NVGRE inner frame destination port to match (as bytes in network
+ * order)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_DST_PORT_OFST 122
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_DST_PORT_LEN 2
+/* VXLAN/NVGRE inner frame Ethernet type to match (as bytes in network order)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_ETHER_TYPE_OFST 124
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_ETHER_TYPE_LEN 2
+/* VXLAN/NVGRE inner frame Inner VLAN tag to match (as bytes in network order)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_INNER_VLAN_OFST 126
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_INNER_VLAN_LEN 2
+/* VXLAN/NVGRE inner frame Outer VLAN tag to match (as bytes in network order)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_OUTER_VLAN_OFST 128
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_OUTER_VLAN_LEN 2
+/* VXLAN/NVGRE inner frame IP protocol to match (in low byte; set high byte to
+ * 0)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_IP_PROTO_OFST 130
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_IP_PROTO_LEN 2
+/* VXLAN/NVGRE inner frame Firmware defined register 0 to match (reserved; set
+ * to 0)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_FWDEF0_OFST 132
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_FWDEF0_LEN 4
+/* VXLAN/NVGRE inner frame Firmware defined register 1 to match (reserved; set
+ * to 0)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_FWDEF1_OFST 136
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_FWDEF1_LEN 4
+/* VXLAN/NVGRE inner frame source IP address to match (as bytes in network
+ * order; set last 12 bytes to 0 for IPv4 address)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_SRC_IP_OFST 140
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_SRC_IP_LEN 16
+/* VXLAN/NVGRE inner frame destination IP address to match (as bytes in network
+ * order; set last 12 bytes to 0 for IPv4 address)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_DST_IP_OFST 156
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_DST_IP_LEN 16
+/* Set an action for all packets matching this filter. The DPDK driver and dpdk
+ * f/w variant use their own specific delivery structures, which are documented
+ * in the DPDK Firmware Driver Interface (SF-119419-TC). Requesting anything
+ * other than MATCH_ACTION_NONE when the NIC is running another f/w variant
+ * will cause the filter insertion to fail with ENOTSUP.
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_MATCH_ACTION_OFST 172
+#define       MC_CMD_FILTER_OP_V3_IN_MATCH_ACTION_LEN 4
+/* enum: do nothing extra */
+#define          MC_CMD_FILTER_OP_V3_IN_MATCH_ACTION_NONE 0x0
+/* enum: Set the match flag in the packet prefix for packets matching the
+ * filter (only with dpdk firmware, otherwise fails with ENOTSUP). Used to
+ * support the DPDK rte_flow "FLAG" action.
+ */
+#define          MC_CMD_FILTER_OP_V3_IN_MATCH_ACTION_FLAG 0x1
+/* enum: Insert MATCH_MARK_VALUE into the packet prefix for packets matching
+ * the filter (only with dpdk firmware, otherwise fails with ENOTSUP). Used to
+ * support the DPDK rte_flow "MARK" action.
+ */
+#define          MC_CMD_FILTER_OP_V3_IN_MATCH_ACTION_MARK 0x2
+/* the mark value for MATCH_ACTION_MARK */
+#define       MC_CMD_FILTER_OP_V3_IN_MATCH_MARK_VALUE_OFST 176
+#define       MC_CMD_FILTER_OP_V3_IN_MATCH_MARK_VALUE_LEN 4
+
 /* MC_CMD_FILTER_OP_OUT msgresponse */
 #define    MC_CMD_FILTER_OP_OUT_LEN 12
 /* identifies the type of operation requested */
@@ -8147,9 +8708,9 @@
 #define       MC_CMD_FILTER_OP_OUT_HANDLE_LO_OFST 4
 #define       MC_CMD_FILTER_OP_OUT_HANDLE_HI_OFST 8
 /* enum: guaranteed invalid filter handle (low 32 bits) */
-#define          MC_CMD_FILTER_OP_OUT_HANDLE_LO_INVALID  0xffffffff
+#define          MC_CMD_FILTER_OP_OUT_HANDLE_LO_INVALID 0xffffffff
 /* enum: guaranteed invalid filter handle (high 32 bits) */
-#define          MC_CMD_FILTER_OP_OUT_HANDLE_HI_INVALID  0xffffffff
+#define          MC_CMD_FILTER_OP_OUT_HANDLE_HI_INVALID 0xffffffff
 
 /* MC_CMD_FILTER_OP_EXT_OUT msgresponse */
 #define    MC_CMD_FILTER_OP_EXT_OUT_LEN 12
@@ -8184,20 +8745,20 @@
 #define       MC_CMD_GET_PARSER_DISP_INFO_IN_OP_OFST 0
 #define       MC_CMD_GET_PARSER_DISP_INFO_IN_OP_LEN 4
 /* enum: read the list of supported RX filter matches */
-#define          MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_SUPPORTED_RX_MATCHES  0x1
+#define          MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_SUPPORTED_RX_MATCHES 0x1
 /* enum: read flags indicating restrictions on filter insertion for the calling
  * client
  */
-#define          MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_RESTRICTIONS  0x2
+#define          MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_RESTRICTIONS 0x2
 /* enum: read properties relating to security rules (Medford-only; for use by
  * SolarSecure apps, not directly by drivers. See SF-114946-SW.)
  */
-#define          MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_SECURITY_RULE_INFO  0x3
+#define          MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_SECURITY_RULE_INFO 0x3
 /* enum: read the list of supported RX filter matches for VXLAN/NVGRE
  * encapsulated frames, which follow a different match sequence to normal
  * frames (Medford only)
  */
-#define          MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_SUPPORTED_ENCAP_RX_MATCHES  0x4
+#define          MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_SUPPORTED_ENCAP_RX_MATCHES 0x4
 
 /* MC_CMD_GET_PARSER_DISP_INFO_OUT msgresponse */
 #define    MC_CMD_GET_PARSER_DISP_INFO_OUT_LENMIN 8
@@ -8238,7 +8799,9 @@
  * Direct read/write of parser-dispatcher state (DICPUs and LUE) for debugging.
  * Please note that this interface is only of use to debug tools which have
  * knowledge of firmware and hardware data structures; nothing here is intended
- * for use by normal driver code.
+ * for use by normal driver code. Note that although this command is in the
+ * Admin privilege group, in tamperproof adapters, only read operations are
+ * permitted.
  */
 #define MC_CMD_PARSER_DISP_RW 0xe5
 
@@ -8250,32 +8813,36 @@
 #define       MC_CMD_PARSER_DISP_RW_IN_TARGET_OFST 0
 #define       MC_CMD_PARSER_DISP_RW_IN_TARGET_LEN 4
 /* enum: RX dispatcher CPU */
-#define          MC_CMD_PARSER_DISP_RW_IN_RX_DICPU  0x0
+#define          MC_CMD_PARSER_DISP_RW_IN_RX_DICPU 0x0
 /* enum: TX dispatcher CPU */
-#define          MC_CMD_PARSER_DISP_RW_IN_TX_DICPU  0x1
+#define          MC_CMD_PARSER_DISP_RW_IN_TX_DICPU 0x1
 /* enum: Lookup engine (with original metadata format). Deprecated; used only
  * by cmdclient as a fallback for very old Huntington firmware, and not
  * supported in firmware beyond v6.4.0.1005. Use LUE_VERSIONED_METADATA
  * instead.
  */
-#define          MC_CMD_PARSER_DISP_RW_IN_LUE  0x2
+#define          MC_CMD_PARSER_DISP_RW_IN_LUE 0x2
 /* enum: Lookup engine (with requested metadata format) */
-#define          MC_CMD_PARSER_DISP_RW_IN_LUE_VERSIONED_METADATA  0x3
+#define          MC_CMD_PARSER_DISP_RW_IN_LUE_VERSIONED_METADATA 0x3
 /* enum: RX0 dispatcher CPU (alias for RX_DICPU; Medford has 2 RX DICPUs) */
-#define          MC_CMD_PARSER_DISP_RW_IN_RX0_DICPU  0x0
+#define          MC_CMD_PARSER_DISP_RW_IN_RX0_DICPU 0x0
 /* enum: RX1 dispatcher CPU (only valid for Medford) */
-#define          MC_CMD_PARSER_DISP_RW_IN_RX1_DICPU  0x4
+#define          MC_CMD_PARSER_DISP_RW_IN_RX1_DICPU 0x4
 /* enum: Miscellaneous other state (only valid for Medford) */
-#define          MC_CMD_PARSER_DISP_RW_IN_MISC_STATE  0x5
+#define          MC_CMD_PARSER_DISP_RW_IN_MISC_STATE 0x5
 /* identifies the type of operation requested */
 #define       MC_CMD_PARSER_DISP_RW_IN_OP_OFST 4
 #define       MC_CMD_PARSER_DISP_RW_IN_OP_LEN 4
 /* enum: Read a word of DICPU DMEM or a LUE entry */
-#define          MC_CMD_PARSER_DISP_RW_IN_READ  0x0
-/* enum: Write a word of DICPU DMEM or a LUE entry. */
-#define          MC_CMD_PARSER_DISP_RW_IN_WRITE  0x1
-/* enum: Read-modify-write a word of DICPU DMEM (not valid for LUE). */
-#define          MC_CMD_PARSER_DISP_RW_IN_RMW  0x2
+#define          MC_CMD_PARSER_DISP_RW_IN_READ 0x0
+/* enum: Write a word of DICPU DMEM or a LUE entry. Not permitted on
+ * tamperproof adapters.
+ */
+#define          MC_CMD_PARSER_DISP_RW_IN_WRITE 0x1
+/* enum: Read-modify-write a word of DICPU DMEM (not valid for LUE). Not
+ * permitted on tamperproof adapters.
+ */
+#define          MC_CMD_PARSER_DISP_RW_IN_RMW 0x2
 /* data memory address (DICPU targets) or LUE index (LUE targets) */
 #define       MC_CMD_PARSER_DISP_RW_IN_ADDRESS_OFST 8
 #define       MC_CMD_PARSER_DISP_RW_IN_ADDRESS_LEN 4
@@ -8283,7 +8850,7 @@
 #define       MC_CMD_PARSER_DISP_RW_IN_SELECTOR_OFST 8
 #define       MC_CMD_PARSER_DISP_RW_IN_SELECTOR_LEN 4
 /* enum: Port to datapath mapping */
-#define          MC_CMD_PARSER_DISP_RW_IN_PORT_DP_MAPPING  0x1
+#define          MC_CMD_PARSER_DISP_RW_IN_PORT_DP_MAPPING 0x1
 /* value to write (for DMEM writes) */
 #define       MC_CMD_PARSER_DISP_RW_IN_DMEM_WRITE_VALUE_OFST 12
 #define       MC_CMD_PARSER_DISP_RW_IN_DMEM_WRITE_VALUE_LEN 4
@@ -8317,8 +8884,8 @@
 #define       MC_CMD_PARSER_DISP_RW_OUT_PORT_DP_MAPPING_OFST 0
 #define       MC_CMD_PARSER_DISP_RW_OUT_PORT_DP_MAPPING_LEN 4
 #define       MC_CMD_PARSER_DISP_RW_OUT_PORT_DP_MAPPING_NUM 4
-#define          MC_CMD_PARSER_DISP_RW_OUT_DP0  0x1 /* enum */
-#define          MC_CMD_PARSER_DISP_RW_OUT_DP1  0x2 /* enum */
+#define          MC_CMD_PARSER_DISP_RW_OUT_DP0 0x1 /* enum */
+#define          MC_CMD_PARSER_DISP_RW_OUT_DP1 0x2 /* enum */
 
 
 /***********************************/
@@ -8783,13 +9350,13 @@
 #define       MC_CMD_GET_TLP_PROCESSING_GLOBALS_IN_TLP_GLOBAL_CATEGORY_OFST 0
 #define       MC_CMD_GET_TLP_PROCESSING_GLOBALS_IN_TLP_GLOBAL_CATEGORY_LEN 4
 /* enum: MISC. */
-#define          MC_CMD_GET_TLP_PROCESSING_GLOBALS_IN_TLP_GLOBAL_CATEGORY_MISC  0x0
+#define          MC_CMD_GET_TLP_PROCESSING_GLOBALS_IN_TLP_GLOBAL_CATEGORY_MISC 0x0
 /* enum: IDO. */
-#define          MC_CMD_GET_TLP_PROCESSING_GLOBALS_IN_TLP_GLOBAL_CATEGORY_IDO  0x1
+#define          MC_CMD_GET_TLP_PROCESSING_GLOBALS_IN_TLP_GLOBAL_CATEGORY_IDO 0x1
 /* enum: RO. */
-#define          MC_CMD_GET_TLP_PROCESSING_GLOBALS_IN_TLP_GLOBAL_CATEGORY_RO  0x2
+#define          MC_CMD_GET_TLP_PROCESSING_GLOBALS_IN_TLP_GLOBAL_CATEGORY_RO 0x2
 /* enum: TPH Type. */
-#define          MC_CMD_GET_TLP_PROCESSING_GLOBALS_IN_TLP_GLOBAL_CATEGORY_TPH_TYPE  0x3
+#define          MC_CMD_GET_TLP_PROCESSING_GLOBALS_IN_TLP_GLOBAL_CATEGORY_TPH_TYPE 0x3
 
 /* MC_CMD_GET_TLP_PROCESSING_GLOBALS_OUT msgresponse */
 #define    MC_CMD_GET_TLP_PROCESSING_GLOBALS_OUT_LEN 8
@@ -8920,57 +9487,57 @@
  */
 #define       MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_OFST 0
 #define       MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_LEN 4
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_IDLE     0x0 /* enum */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_RESET    0x1 /* enum */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_IMEMS    0x2 /* enum */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_VECTORS  0x3 /* enum */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_READY    0x4 /* enum */
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_IDLE 0x0 /* enum */
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_RESET 0x1 /* enum */
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_IMEMS 0x2 /* enum */
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_VECTORS 0x3 /* enum */
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_READY 0x4 /* enum */
 /* Target for download. (These match the blob numbers defined in
  * mc_flash_layout.h.)
  */
 #define       MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_OFST 4
 #define       MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_LEN 4
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXDI_TEXT  0x0
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXDI_TEXT 0x0
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXDI_TEXT  0x1
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXDI_TEXT 0x1
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXDP_TEXT  0x2
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXDP_TEXT 0x2
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXDP_TEXT  0x3
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXDP_TEXT 0x3
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXHRSL_HR_LUT  0x4
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXHRSL_HR_LUT 0x4
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXHRSL_HR_LUT_CFG  0x5
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXHRSL_HR_LUT_CFG 0x5
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXHRSL_HR_LUT  0x6
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXHRSL_HR_LUT 0x6
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXHRSL_HR_LUT_CFG  0x7
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXHRSL_HR_LUT_CFG 0x7
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXHRSL_HR_PGM  0x8
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXHRSL_HR_PGM 0x8
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXHRSL_SL_PGM  0x9
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXHRSL_SL_PGM 0x9
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXHRSL_HR_PGM  0xa
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXHRSL_HR_PGM 0xa
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXHRSL_SL_PGM  0xb
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXHRSL_SL_PGM 0xb
 /* enum: Valid in phase 3 (PHASE_VECTORS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXDI_VTBL0  0xc
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXDI_VTBL0 0xc
 /* enum: Valid in phase 3 (PHASE_VECTORS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXDI_VTBL0  0xd
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXDI_VTBL0 0xd
 /* enum: Valid in phase 3 (PHASE_VECTORS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXDI_VTBL1  0xe
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXDI_VTBL1 0xe
 /* enum: Valid in phase 3 (PHASE_VECTORS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXDI_VTBL1  0xf
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXDI_VTBL1 0xf
 /* enum: Valid in phases 1 (PHASE_RESET) and 4 (PHASE_READY) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_ALL  0xffffffff
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_ALL 0xffffffff
 /* Chunk ID, or CHUNK_ID_LAST or CHUNK_ID_ABORT */
 #define       MC_CMD_SATELLITE_DOWNLOAD_IN_CHUNK_ID_OFST 8
 #define       MC_CMD_SATELLITE_DOWNLOAD_IN_CHUNK_ID_LEN 4
 /* enum: Last chunk, containing checksum rather than data */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_CHUNK_ID_LAST  0xffffffff
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_CHUNK_ID_LAST 0xffffffff
 /* enum: Abort download of this item */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_CHUNK_ID_ABORT  0xfffffffe
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_CHUNK_ID_ABORT 0xfffffffe
 /* Length of this chunk in bytes */
 #define       MC_CMD_SATELLITE_DOWNLOAD_IN_CHUNK_LEN_OFST 12
 #define       MC_CMD_SATELLITE_DOWNLOAD_IN_CHUNK_LEN_LEN 4
@@ -8989,21 +9556,21 @@
 #define       MC_CMD_SATELLITE_DOWNLOAD_OUT_INFO_OFST 4
 #define       MC_CMD_SATELLITE_DOWNLOAD_OUT_INFO_LEN 4
 /* enum: Code download OK, completed. */
-#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_OK_COMPLETE  0x0
+#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_OK_COMPLETE 0x0
 /* enum: Code download aborted as requested. */
-#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_OK_ABORTED  0x1
+#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_OK_ABORTED 0x1
 /* enum: Code download OK so far, send next chunk. */
-#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_OK_NEXT_CHUNK  0x2
+#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_OK_NEXT_CHUNK 0x2
 /* enum: Download phases out of sequence */
-#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_ERR_BAD_PHASE  0x100
+#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_ERR_BAD_PHASE 0x100
 /* enum: Bad target for this phase */
-#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_ERR_BAD_TARGET  0x101
+#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_ERR_BAD_TARGET 0x101
 /* enum: Chunk ID out of sequence */
-#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_ERR_BAD_CHUNK_ID  0x200
+#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_ERR_BAD_CHUNK_ID 0x200
 /* enum: Chunk length zero or too large */
-#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_ERR_BAD_CHUNK_LEN  0x201
+#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_ERR_BAD_CHUNK_LEN 0x201
 /* enum: Checksum was incorrect */
-#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_ERR_BAD_CHECKSUM  0x300
+#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_ERR_BAD_CHECKSUM 0x300
 
 
 /***********************************/
@@ -9087,54 +9654,58 @@
 #define       MC_CMD_GET_CAPABILITIES_OUT_RX_DPCPU_FW_ID_OFST 4
 #define       MC_CMD_GET_CAPABILITIES_OUT_RX_DPCPU_FW_ID_LEN 2
 /* enum: Standard RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP  0x0
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP 0x0
 /* enum: Low latency RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_LOW_LATENCY  0x1
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_LOW_LATENCY 0x1
 /* enum: Packed stream RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_PACKED_STREAM  0x2
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_PACKED_STREAM 0x2
 /* enum: Rules engine RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_RULES_ENGINE  0x5
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_RULES_ENGINE 0x5
+/* enum: DPDK RXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_DPDK 0x6
 /* enum: BIST RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_BIST  0x10a
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_BIST 0x10a
 /* enum: RXDP Test firmware image 1 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_TO_MC_CUT_THROUGH  0x101
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_TO_MC_CUT_THROUGH 0x101
 /* enum: RXDP Test firmware image 2 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD  0x102
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD 0x102
 /* enum: RXDP Test firmware image 3 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD_FIRST  0x103
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD_FIRST 0x103
 /* enum: RXDP Test firmware image 4 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_EVERY_EVENT_BATCHABLE  0x104
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_EVERY_EVENT_BATCHABLE 0x104
 /* enum: RXDP Test firmware image 5 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_BACKPRESSURE  0x105
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_BACKPRESSURE 0x105
 /* enum: RXDP Test firmware image 6 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_PACKET_EDITS  0x106
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_PACKET_EDITS 0x106
 /* enum: RXDP Test firmware image 7 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_RX_HDR_SPLIT  0x107
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_RX_HDR_SPLIT 0x107
 /* enum: RXDP Test firmware image 8 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_DISABLE_DL  0x108
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_DISABLE_DL 0x108
 /* enum: RXDP Test firmware image 9 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_DOORBELL_DELAY  0x10b
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_DOORBELL_DELAY 0x10b
 /* enum: RXDP Test firmware image 10 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_SLOW  0x10c
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_SLOW 0x10c
 /* TxDPCPU firmware id. */
 #define       MC_CMD_GET_CAPABILITIES_OUT_TX_DPCPU_FW_ID_OFST 6
 #define       MC_CMD_GET_CAPABILITIES_OUT_TX_DPCPU_FW_ID_LEN 2
 /* enum: Standard TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP  0x0
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP 0x0
 /* enum: Low latency TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_LOW_LATENCY  0x1
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_LOW_LATENCY 0x1
 /* enum: High packet rate TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_HIGH_PACKET_RATE  0x3
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_HIGH_PACKET_RATE 0x3
 /* enum: Rules engine TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_RULES_ENGINE  0x5
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_RULES_ENGINE 0x5
+/* enum: DPDK TXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_DPDK 0x6
 /* enum: BIST TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_BIST  0x12d
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_BIST 0x12d
 /* enum: TXDP Test firmware image 1 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_TEST_FW_TSO_EDIT  0x101
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_TEST_FW_TSO_EDIT 0x101
 /* enum: TXDP Test firmware image 2 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_TEST_FW_PACKET_EDITS  0x102
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_TEST_FW_PACKET_EDITS 0x102
 /* enum: TXDP CSR bus test firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_TEST_FW_CSR  0x103
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_TEST_FW_CSR 0x103
 #define       MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_VERSION_OFST 8
 #define       MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_VERSION_LEN 2
 #define        MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_VERSION_REV_LBN 0
@@ -9144,41 +9715,43 @@
 /* enum: reserved value - do not use (may indicate alternative interpretation
  * of REV field in future)
  */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_RESERVED  0x0
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_RESERVED 0x0
 /* enum: Trivial RX PD firmware for early Huntington development (Huntington
  * development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_FIRST_PKT  0x1
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_FIRST_PKT 0x1
 /* enum: RX PD firmware with approximately Siena-compatible behaviour
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_SIENA_COMPAT  0x2
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_SIENA_COMPAT 0x2
 /* enum: Full featured RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_FULL_FEATURED  0x3
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_FULL_FEATURED 0x3
 /* enum: (deprecated original name for the FULL_FEATURED variant) */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_VSWITCH  0x3
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_VSWITCH 0x3
 /* enum: siena_compat variant RX PD firmware using PM rather than MAC
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_SIENA_COMPAT_PM  0x4
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_SIENA_COMPAT_PM 0x4
 /* enum: Low latency RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_LOW_LATENCY  0x5
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_LOW_LATENCY 0x5
 /* enum: Packed stream RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_PACKED_STREAM  0x6
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_PACKED_STREAM 0x6
 /* enum: RX PD firmware handling layer 2 only for high packet rate performance
  * tests (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_LAYER2_PERF  0x7
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_LAYER2_PERF 0x7
 /* enum: Rules engine RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_RULES_ENGINE  0x8
-/* enum: reserved value - do not use (bug69716) */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_RESERVED_9  0x9
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_RULES_ENGINE 0x8
+/* enum: Custom firmware variant (see SF-119495-PD and bug69716) */
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_L3XUDP 0x9
+/* enum: DPDK RX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_DPDK 0xa
 /* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE 0xe
 /* enum: RX PD firmware parsing but not filtering network overlay tunnel
  * encapsulations (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_TESTFW_ENCAP_PARSING_ONLY  0xf
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_TESTFW_ENCAP_PARSING_ONLY 0xf
 #define       MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_VERSION_OFST 10
 #define       MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_VERSION_LEN 2
 #define        MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_VERSION_REV_LBN 0
@@ -9188,34 +9761,36 @@
 /* enum: reserved value - do not use (may indicate alternative interpretation
  * of REV field in future)
  */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_RESERVED  0x0
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_RESERVED 0x0
 /* enum: Trivial TX PD firmware for early Huntington development (Huntington
  * development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_FIRST_PKT  0x1
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_FIRST_PKT 0x1
 /* enum: TX PD firmware with approximately Siena-compatible behaviour
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_SIENA_COMPAT  0x2
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_SIENA_COMPAT 0x2
 /* enum: Full featured TX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_FULL_FEATURED  0x3
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_FULL_FEATURED 0x3
 /* enum: (deprecated original name for the FULL_FEATURED variant) */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_VSWITCH  0x3
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_VSWITCH 0x3
 /* enum: siena_compat variant TX PD firmware using PM rather than MAC
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_SIENA_COMPAT_PM  0x4
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_LOW_LATENCY  0x5 /* enum */
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_SIENA_COMPAT_PM 0x4
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_LOW_LATENCY 0x5 /* enum */
 /* enum: TX PD firmware handling layer 2 only for high packet rate performance
  * tests (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_LAYER2_PERF  0x7
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_LAYER2_PERF 0x7
 /* enum: Rules engine TX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_RULES_ENGINE  0x8
-/* enum: reserved value - do not use (bug69716) */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_RESERVED_9  0x9
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_RULES_ENGINE 0x8
+/* enum: Custom firmware variant (see SF-119495-PD and bug69716) */
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_L3XUDP 0x9
+/* enum: DPDK TX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_DPDK 0xa
 /* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE 0xe
 /* Hardware capabilities of NIC */
 #define       MC_CMD_GET_CAPABILITIES_OUT_HW_CAPABILITIES_OFST 12
 #define       MC_CMD_GET_CAPABILITIES_OUT_HW_CAPABILITIES_LEN 4
@@ -9293,54 +9868,58 @@
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_RX_DPCPU_FW_ID_OFST 4
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_RX_DPCPU_FW_ID_LEN 2
 /* enum: Standard RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP  0x0
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP 0x0
 /* enum: Low latency RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_LOW_LATENCY  0x1
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_LOW_LATENCY 0x1
 /* enum: Packed stream RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_PACKED_STREAM  0x2
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_PACKED_STREAM 0x2
 /* enum: Rules engine RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_RULES_ENGINE  0x5
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_RULES_ENGINE 0x5
+/* enum: DPDK RXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_DPDK 0x6
 /* enum: BIST RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_BIST  0x10a
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_BIST 0x10a
 /* enum: RXDP Test firmware image 1 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_TO_MC_CUT_THROUGH  0x101
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_TO_MC_CUT_THROUGH 0x101
 /* enum: RXDP Test firmware image 2 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD  0x102
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD 0x102
 /* enum: RXDP Test firmware image 3 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD_FIRST  0x103
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD_FIRST 0x103
 /* enum: RXDP Test firmware image 4 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_EVERY_EVENT_BATCHABLE  0x104
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_EVERY_EVENT_BATCHABLE 0x104
 /* enum: RXDP Test firmware image 5 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_BACKPRESSURE  0x105
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_BACKPRESSURE 0x105
 /* enum: RXDP Test firmware image 6 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_PACKET_EDITS  0x106
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_PACKET_EDITS 0x106
 /* enum: RXDP Test firmware image 7 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_RX_HDR_SPLIT  0x107
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_RX_HDR_SPLIT 0x107
 /* enum: RXDP Test firmware image 8 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_DISABLE_DL  0x108
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_DISABLE_DL 0x108
 /* enum: RXDP Test firmware image 9 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_DOORBELL_DELAY  0x10b
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_DOORBELL_DELAY 0x10b
 /* enum: RXDP Test firmware image 10 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_SLOW  0x10c
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_SLOW 0x10c
 /* TxDPCPU firmware id. */
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_TX_DPCPU_FW_ID_OFST 6
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_TX_DPCPU_FW_ID_LEN 2
 /* enum: Standard TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP  0x0
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP 0x0
 /* enum: Low latency TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_LOW_LATENCY  0x1
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_LOW_LATENCY 0x1
 /* enum: High packet rate TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_HIGH_PACKET_RATE  0x3
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_HIGH_PACKET_RATE 0x3
 /* enum: Rules engine TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_RULES_ENGINE  0x5
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_RULES_ENGINE 0x5
+/* enum: DPDK TXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_DPDK 0x6
 /* enum: BIST TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_BIST  0x12d
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_BIST 0x12d
 /* enum: TXDP Test firmware image 1 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_TEST_FW_TSO_EDIT  0x101
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_TEST_FW_TSO_EDIT 0x101
 /* enum: TXDP Test firmware image 2 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_TEST_FW_PACKET_EDITS  0x102
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_TEST_FW_PACKET_EDITS 0x102
 /* enum: TXDP CSR bus test firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_TEST_FW_CSR  0x103
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_TEST_FW_CSR 0x103
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_VERSION_OFST 8
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_VERSION_LEN 2
 #define        MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_VERSION_REV_LBN 0
@@ -9350,41 +9929,43 @@
 /* enum: reserved value - do not use (may indicate alternative interpretation
  * of REV field in future)
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_RESERVED  0x0
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_RESERVED 0x0
 /* enum: Trivial RX PD firmware for early Huntington development (Huntington
  * development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_FIRST_PKT  0x1
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_FIRST_PKT 0x1
 /* enum: RX PD firmware with approximately Siena-compatible behaviour
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_SIENA_COMPAT  0x2
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_SIENA_COMPAT 0x2
 /* enum: Full featured RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_FULL_FEATURED  0x3
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_FULL_FEATURED 0x3
 /* enum: (deprecated original name for the FULL_FEATURED variant) */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_VSWITCH  0x3
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_VSWITCH 0x3
 /* enum: siena_compat variant RX PD firmware using PM rather than MAC
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_SIENA_COMPAT_PM  0x4
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_SIENA_COMPAT_PM 0x4
 /* enum: Low latency RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_LOW_LATENCY  0x5
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_LOW_LATENCY 0x5
 /* enum: Packed stream RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_PACKED_STREAM  0x6
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_PACKED_STREAM 0x6
 /* enum: RX PD firmware handling layer 2 only for high packet rate performance
  * tests (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_LAYER2_PERF  0x7
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_LAYER2_PERF 0x7
 /* enum: Rules engine RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_RULES_ENGINE  0x8
-/* enum: reserved value - do not use (bug69716) */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_RESERVED_9  0x9
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_RULES_ENGINE 0x8
+/* enum: Custom firmware variant (see SF-119495-PD and bug69716) */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_L3XUDP 0x9
+/* enum: DPDK RX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_DPDK 0xa
 /* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE 0xe
 /* enum: RX PD firmware parsing but not filtering network overlay tunnel
  * encapsulations (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_TESTFW_ENCAP_PARSING_ONLY  0xf
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_TESTFW_ENCAP_PARSING_ONLY 0xf
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_VERSION_OFST 10
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_VERSION_LEN 2
 #define        MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_VERSION_REV_LBN 0
@@ -9394,34 +9975,36 @@
 /* enum: reserved value - do not use (may indicate alternative interpretation
  * of REV field in future)
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_RESERVED  0x0
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_RESERVED 0x0
 /* enum: Trivial TX PD firmware for early Huntington development (Huntington
  * development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_FIRST_PKT  0x1
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_FIRST_PKT 0x1
 /* enum: TX PD firmware with approximately Siena-compatible behaviour
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_SIENA_COMPAT  0x2
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_SIENA_COMPAT 0x2
 /* enum: Full featured TX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_FULL_FEATURED  0x3
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_FULL_FEATURED 0x3
 /* enum: (deprecated original name for the FULL_FEATURED variant) */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_VSWITCH  0x3
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_VSWITCH 0x3
 /* enum: siena_compat variant TX PD firmware using PM rather than MAC
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_SIENA_COMPAT_PM  0x4
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_LOW_LATENCY  0x5 /* enum */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_SIENA_COMPAT_PM 0x4
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_LOW_LATENCY 0x5 /* enum */
 /* enum: TX PD firmware handling layer 2 only for high packet rate performance
  * tests (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_LAYER2_PERF  0x7
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_LAYER2_PERF 0x7
 /* enum: Rules engine TX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_RULES_ENGINE  0x8
-/* enum: reserved value - do not use (bug69716) */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_RESERVED_9  0x9
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_RULES_ENGINE 0x8
+/* enum: Custom firmware variant (see SF-119495-PD and bug69716) */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_L3XUDP 0x9
+/* enum: DPDK TX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_DPDK 0xa
 /* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE 0xe
 /* Hardware capabilities of NIC */
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_HW_CAPABILITIES_OFST 12
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_HW_CAPABILITIES_LEN 4
@@ -9469,6 +10052,18 @@
 #define        MC_CMD_GET_CAPABILITIES_V2_OUT_TSA_BOUND_WIDTH 1
 #define        MC_CMD_GET_CAPABILITIES_V2_OUT_SF_ADAPTER_AUTHENTICATION_LBN 18
 #define        MC_CMD_GET_CAPABILITIES_V2_OUT_SF_ADAPTER_AUTHENTICATION_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_FILTER_ACTION_FLAG_LBN 19
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_FILTER_ACTION_FLAG_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_FILTER_ACTION_MARK_LBN 20
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_FILTER_ACTION_MARK_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_EQUAL_STRIDE_PACKED_STREAM_LBN 21
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_EQUAL_STRIDE_PACKED_STREAM_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_L3XUDP_SUPPORT_LBN 22
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_L3XUDP_SUPPORT_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_FW_SUBVARIANT_NO_TX_CSUM_LBN 23
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_FW_SUBVARIANT_NO_TX_CSUM_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_VI_SPREADING_LBN 24
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_VI_SPREADING_WIDTH 1
 /* Number of FATSOv2 contexts per datapath supported by this NIC. Not present
  * on older firmware (check the length).
  */
@@ -9482,18 +10077,18 @@
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_PFS_TO_PORTS_ASSIGNMENT_LEN 1
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_PFS_TO_PORTS_ASSIGNMENT_NUM 16
 /* enum: The caller is not permitted to access information on this PF. */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_ACCESS_NOT_PERMITTED  0xff
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_ACCESS_NOT_PERMITTED 0xff
 /* enum: PF does not exist. */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_PF_NOT_PRESENT  0xfe
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_PF_NOT_PRESENT 0xfe
 /* enum: PF does exist but is not assigned to any external port. */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_PF_NOT_ASSIGNED  0xfd
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_PF_NOT_ASSIGNED 0xfd
 /* enum: This value indicates that PF is assigned, but it cannot be expressed
  * in this field. It is intended for a possible future situation where a more
  * complex scheme of PFs to ports mapping is being used. The future driver
  * should look for a new field supporting the new scheme. The current/old
  * driver should treat this value as PF_NOT_ASSIGNED.
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_INCOMPATIBLE_ASSIGNMENT  0xfc
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_INCOMPATIBLE_ASSIGNMENT 0xfc
 /* One byte per PF containing the number of its VFs, indexed by PF number. A
  * special value indicates that a PF is not present.
  */
@@ -9501,9 +10096,9 @@
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_NUM_VFS_PER_PF_LEN 1
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_NUM_VFS_PER_PF_NUM 16
 /* enum: The caller is not permitted to access information on this PF. */
-/*               MC_CMD_GET_CAPABILITIES_V2_OUT_ACCESS_NOT_PERMITTED  0xff */
+/*               MC_CMD_GET_CAPABILITIES_V2_OUT_ACCESS_NOT_PERMITTED 0xff */
 /* enum: PF does not exist. */
-/*               MC_CMD_GET_CAPABILITIES_V2_OUT_PF_NOT_PRESENT  0xfe */
+/*               MC_CMD_GET_CAPABILITIES_V2_OUT_PF_NOT_PRESENT 0xfe */
 /* Number of VIs available for each external port */
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_NUM_VIS_PER_PORT_OFST 58
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_NUM_VIS_PER_PORT_LEN 2
@@ -9592,54 +10187,58 @@
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_RX_DPCPU_FW_ID_OFST 4
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_RX_DPCPU_FW_ID_LEN 2
 /* enum: Standard RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP  0x0
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP 0x0
 /* enum: Low latency RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_LOW_LATENCY  0x1
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_LOW_LATENCY 0x1
 /* enum: Packed stream RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_PACKED_STREAM  0x2
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_PACKED_STREAM 0x2
 /* enum: Rules engine RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_RULES_ENGINE  0x5
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_RULES_ENGINE 0x5
+/* enum: DPDK RXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_DPDK 0x6
 /* enum: BIST RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_BIST  0x10a
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_BIST 0x10a
 /* enum: RXDP Test firmware image 1 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_TO_MC_CUT_THROUGH  0x101
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_TO_MC_CUT_THROUGH 0x101
 /* enum: RXDP Test firmware image 2 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD  0x102
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD 0x102
 /* enum: RXDP Test firmware image 3 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD_FIRST  0x103
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD_FIRST 0x103
 /* enum: RXDP Test firmware image 4 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_EVERY_EVENT_BATCHABLE  0x104
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_EVERY_EVENT_BATCHABLE 0x104
 /* enum: RXDP Test firmware image 5 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_BACKPRESSURE  0x105
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_BACKPRESSURE 0x105
 /* enum: RXDP Test firmware image 6 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_PACKET_EDITS  0x106
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_PACKET_EDITS 0x106
 /* enum: RXDP Test firmware image 7 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_RX_HDR_SPLIT  0x107
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_RX_HDR_SPLIT 0x107
 /* enum: RXDP Test firmware image 8 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_DISABLE_DL  0x108
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_DISABLE_DL 0x108
 /* enum: RXDP Test firmware image 9 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_DOORBELL_DELAY  0x10b
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_DOORBELL_DELAY 0x10b
 /* enum: RXDP Test firmware image 10 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_SLOW  0x10c
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_SLOW 0x10c
 /* TxDPCPU firmware id. */
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_TX_DPCPU_FW_ID_OFST 6
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_TX_DPCPU_FW_ID_LEN 2
 /* enum: Standard TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP  0x0
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP 0x0
 /* enum: Low latency TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_LOW_LATENCY  0x1
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_LOW_LATENCY 0x1
 /* enum: High packet rate TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_HIGH_PACKET_RATE  0x3
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_HIGH_PACKET_RATE 0x3
 /* enum: Rules engine TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_RULES_ENGINE  0x5
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_RULES_ENGINE 0x5
+/* enum: DPDK TXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_DPDK 0x6
 /* enum: BIST TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_BIST  0x12d
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_BIST 0x12d
 /* enum: TXDP Test firmware image 1 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_TEST_FW_TSO_EDIT  0x101
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_TEST_FW_TSO_EDIT 0x101
 /* enum: TXDP Test firmware image 2 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_TEST_FW_PACKET_EDITS  0x102
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_TEST_FW_PACKET_EDITS 0x102
 /* enum: TXDP CSR bus test firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_TEST_FW_CSR  0x103
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_TEST_FW_CSR 0x103
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_VERSION_OFST 8
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_VERSION_LEN 2
 #define        MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_VERSION_REV_LBN 0
@@ -9649,41 +10248,43 @@
 /* enum: reserved value - do not use (may indicate alternative interpretation
  * of REV field in future)
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_RESERVED  0x0
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_RESERVED 0x0
 /* enum: Trivial RX PD firmware for early Huntington development (Huntington
  * development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_FIRST_PKT  0x1
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_FIRST_PKT 0x1
 /* enum: RX PD firmware with approximately Siena-compatible behaviour
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_SIENA_COMPAT  0x2
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_SIENA_COMPAT 0x2
 /* enum: Full featured RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_FULL_FEATURED  0x3
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_FULL_FEATURED 0x3
 /* enum: (deprecated original name for the FULL_FEATURED variant) */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_VSWITCH  0x3
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_VSWITCH 0x3
 /* enum: siena_compat variant RX PD firmware using PM rather than MAC
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_SIENA_COMPAT_PM  0x4
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_SIENA_COMPAT_PM 0x4
 /* enum: Low latency RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_LOW_LATENCY  0x5
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_LOW_LATENCY 0x5
 /* enum: Packed stream RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_PACKED_STREAM  0x6
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_PACKED_STREAM 0x6
 /* enum: RX PD firmware handling layer 2 only for high packet rate performance
  * tests (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_LAYER2_PERF  0x7
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_LAYER2_PERF 0x7
 /* enum: Rules engine RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_RULES_ENGINE  0x8
-/* enum: reserved value - do not use (bug69716) */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_RESERVED_9  0x9
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_RULES_ENGINE 0x8
+/* enum: Custom firmware variant (see SF-119495-PD and bug69716) */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_L3XUDP 0x9
+/* enum: DPDK RX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_DPDK 0xa
 /* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE 0xe
 /* enum: RX PD firmware parsing but not filtering network overlay tunnel
  * encapsulations (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_TESTFW_ENCAP_PARSING_ONLY  0xf
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_TESTFW_ENCAP_PARSING_ONLY 0xf
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_VERSION_OFST 10
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_VERSION_LEN 2
 #define        MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_VERSION_REV_LBN 0
@@ -9693,34 +10294,36 @@
 /* enum: reserved value - do not use (may indicate alternative interpretation
  * of REV field in future)
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_RESERVED  0x0
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_RESERVED 0x0
 /* enum: Trivial TX PD firmware for early Huntington development (Huntington
  * development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_FIRST_PKT  0x1
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_FIRST_PKT 0x1
 /* enum: TX PD firmware with approximately Siena-compatible behaviour
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_SIENA_COMPAT  0x2
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_SIENA_COMPAT 0x2
 /* enum: Full featured TX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_FULL_FEATURED  0x3
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_FULL_FEATURED 0x3
 /* enum: (deprecated original name for the FULL_FEATURED variant) */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_VSWITCH  0x3
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_VSWITCH 0x3
 /* enum: siena_compat variant TX PD firmware using PM rather than MAC
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_SIENA_COMPAT_PM  0x4
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_LOW_LATENCY  0x5 /* enum */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_SIENA_COMPAT_PM 0x4
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_LOW_LATENCY 0x5 /* enum */
 /* enum: TX PD firmware handling layer 2 only for high packet rate performance
  * tests (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_LAYER2_PERF  0x7
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_LAYER2_PERF 0x7
 /* enum: Rules engine TX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_RULES_ENGINE  0x8
-/* enum: reserved value - do not use (bug69716) */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_RESERVED_9  0x9
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_RULES_ENGINE 0x8
+/* enum: Custom firmware variant (see SF-119495-PD and bug69716) */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_L3XUDP 0x9
+/* enum: DPDK TX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_DPDK 0xa
 /* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE 0xe
 /* Hardware capabilities of NIC */
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_HW_CAPABILITIES_OFST 12
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_HW_CAPABILITIES_LEN 4
@@ -9768,6 +10371,18 @@
 #define        MC_CMD_GET_CAPABILITIES_V3_OUT_TSA_BOUND_WIDTH 1
 #define        MC_CMD_GET_CAPABILITIES_V3_OUT_SF_ADAPTER_AUTHENTICATION_LBN 18
 #define        MC_CMD_GET_CAPABILITIES_V3_OUT_SF_ADAPTER_AUTHENTICATION_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_FILTER_ACTION_FLAG_LBN 19
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_FILTER_ACTION_FLAG_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_FILTER_ACTION_MARK_LBN 20
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_FILTER_ACTION_MARK_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_EQUAL_STRIDE_PACKED_STREAM_LBN 21
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_EQUAL_STRIDE_PACKED_STREAM_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_L3XUDP_SUPPORT_LBN 22
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_L3XUDP_SUPPORT_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_FW_SUBVARIANT_NO_TX_CSUM_LBN 23
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_FW_SUBVARIANT_NO_TX_CSUM_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_VI_SPREADING_LBN 24
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_VI_SPREADING_WIDTH 1
 /* Number of FATSOv2 contexts per datapath supported by this NIC. Not present
  * on older firmware (check the length).
  */
@@ -9781,18 +10396,18 @@
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_PFS_TO_PORTS_ASSIGNMENT_LEN 1
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_PFS_TO_PORTS_ASSIGNMENT_NUM 16
 /* enum: The caller is not permitted to access information on this PF. */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_ACCESS_NOT_PERMITTED  0xff
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_ACCESS_NOT_PERMITTED 0xff
 /* enum: PF does not exist. */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_PF_NOT_PRESENT  0xfe
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_PF_NOT_PRESENT 0xfe
 /* enum: PF does exist but is not assigned to any external port. */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_PF_NOT_ASSIGNED  0xfd
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_PF_NOT_ASSIGNED 0xfd
 /* enum: This value indicates that PF is assigned, but it cannot be expressed
  * in this field. It is intended for a possible future situation where a more
  * complex scheme of PFs to ports mapping is being used. The future driver
  * should look for a new field supporting the new scheme. The current/old
  * driver should treat this value as PF_NOT_ASSIGNED.
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_INCOMPATIBLE_ASSIGNMENT  0xfc
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_INCOMPATIBLE_ASSIGNMENT 0xfc
 /* One byte per PF containing the number of its VFs, indexed by PF number. A
  * special value indicates that a PF is not present.
  */
@@ -9800,9 +10415,9 @@
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VFS_PER_PF_LEN 1
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VFS_PER_PF_NUM 16
 /* enum: The caller is not permitted to access information on this PF. */
-/*               MC_CMD_GET_CAPABILITIES_V3_OUT_ACCESS_NOT_PERMITTED  0xff */
+/*               MC_CMD_GET_CAPABILITIES_V3_OUT_ACCESS_NOT_PERMITTED 0xff */
 /* enum: PF does not exist. */
-/*               MC_CMD_GET_CAPABILITIES_V3_OUT_PF_NOT_PRESENT  0xfe */
+/*               MC_CMD_GET_CAPABILITIES_V3_OUT_PF_NOT_PRESENT 0xfe */
 /* Number of VIs available for each external port */
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VIS_PER_PORT_OFST 58
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VIS_PER_PORT_LEN 2
@@ -9833,11 +10448,11 @@
 /* enum: Each VI occupies 8k as on Huntington and Medford. PIO is at offset 4k.
  * CTPIO is not mapped.
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_8K   0x0
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_8K 0x0
 /* enum: Each VI occupies 16k. PIO is at offset 4k. CTPIO is at offset 12k. */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_16K  0x1
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_16K 0x1
 /* enum: Each VI occupies 64k. PIO is at offset 4k. CTPIO is at offset 12k. */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_64K  0x2
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_64K 0x2
 /* Number of vFIFOs per adapter that can be used for VFIFO Stuffing
  * (SF-115995-SW) in the present configuration of firmware and port mode.
  */
@@ -9916,54 +10531,58 @@
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_RX_DPCPU_FW_ID_OFST 4
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_RX_DPCPU_FW_ID_LEN 2
 /* enum: Standard RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP  0x0
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP 0x0
 /* enum: Low latency RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_LOW_LATENCY  0x1
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_LOW_LATENCY 0x1
 /* enum: Packed stream RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_PACKED_STREAM  0x2
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_PACKED_STREAM 0x2
 /* enum: Rules engine RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_RULES_ENGINE  0x5
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_RULES_ENGINE 0x5
+/* enum: DPDK RXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_DPDK 0x6
 /* enum: BIST RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_BIST  0x10a
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_BIST 0x10a
 /* enum: RXDP Test firmware image 1 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_TO_MC_CUT_THROUGH  0x101
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_TO_MC_CUT_THROUGH 0x101
 /* enum: RXDP Test firmware image 2 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD  0x102
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD 0x102
 /* enum: RXDP Test firmware image 3 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD_FIRST  0x103
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD_FIRST 0x103
 /* enum: RXDP Test firmware image 4 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_EVERY_EVENT_BATCHABLE  0x104
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_EVERY_EVENT_BATCHABLE 0x104
 /* enum: RXDP Test firmware image 5 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_BACKPRESSURE  0x105
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_BACKPRESSURE 0x105
 /* enum: RXDP Test firmware image 6 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_PACKET_EDITS  0x106
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_PACKET_EDITS 0x106
 /* enum: RXDP Test firmware image 7 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_RX_HDR_SPLIT  0x107
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_RX_HDR_SPLIT 0x107
 /* enum: RXDP Test firmware image 8 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_DISABLE_DL  0x108
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_DISABLE_DL 0x108
 /* enum: RXDP Test firmware image 9 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_DOORBELL_DELAY  0x10b
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_DOORBELL_DELAY 0x10b
 /* enum: RXDP Test firmware image 10 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_SLOW  0x10c
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_SLOW 0x10c
 /* TxDPCPU firmware id. */
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_TX_DPCPU_FW_ID_OFST 6
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_TX_DPCPU_FW_ID_LEN 2
 /* enum: Standard TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP  0x0
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP 0x0
 /* enum: Low latency TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_LOW_LATENCY  0x1
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_LOW_LATENCY 0x1
 /* enum: High packet rate TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_HIGH_PACKET_RATE  0x3
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_HIGH_PACKET_RATE 0x3
 /* enum: Rules engine TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_RULES_ENGINE  0x5
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_RULES_ENGINE 0x5
+/* enum: DPDK TXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_DPDK 0x6
 /* enum: BIST TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_BIST  0x12d
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_BIST 0x12d
 /* enum: TXDP Test firmware image 1 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_TEST_FW_TSO_EDIT  0x101
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_TEST_FW_TSO_EDIT 0x101
 /* enum: TXDP Test firmware image 2 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_TEST_FW_PACKET_EDITS  0x102
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_TEST_FW_PACKET_EDITS 0x102
 /* enum: TXDP CSR bus test firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_TEST_FW_CSR  0x103
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_TEST_FW_CSR 0x103
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_VERSION_OFST 8
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_VERSION_LEN 2
 #define        MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_VERSION_REV_LBN 0
@@ -9973,41 +10592,43 @@
 /* enum: reserved value - do not use (may indicate alternative interpretation
  * of REV field in future)
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_RESERVED  0x0
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_RESERVED 0x0
 /* enum: Trivial RX PD firmware for early Huntington development (Huntington
  * development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_FIRST_PKT  0x1
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_FIRST_PKT 0x1
 /* enum: RX PD firmware with approximately Siena-compatible behaviour
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_SIENA_COMPAT  0x2
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_SIENA_COMPAT 0x2
 /* enum: Full featured RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_FULL_FEATURED  0x3
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_FULL_FEATURED 0x3
 /* enum: (deprecated original name for the FULL_FEATURED variant) */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_VSWITCH  0x3
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_VSWITCH 0x3
 /* enum: siena_compat variant RX PD firmware using PM rather than MAC
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_SIENA_COMPAT_PM  0x4
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_SIENA_COMPAT_PM 0x4
 /* enum: Low latency RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_LOW_LATENCY  0x5
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_LOW_LATENCY 0x5
 /* enum: Packed stream RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_PACKED_STREAM  0x6
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_PACKED_STREAM 0x6
 /* enum: RX PD firmware handling layer 2 only for high packet rate performance
  * tests (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_LAYER2_PERF  0x7
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_LAYER2_PERF 0x7
 /* enum: Rules engine RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_RULES_ENGINE  0x8
-/* enum: reserved value - do not use (bug69716) */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_RESERVED_9  0x9
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_RULES_ENGINE 0x8
+/* enum: Custom firmware variant (see SF-119495-PD and bug69716) */
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_L3XUDP 0x9
+/* enum: DPDK RX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_DPDK 0xa
 /* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE 0xe
 /* enum: RX PD firmware parsing but not filtering network overlay tunnel
  * encapsulations (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_TESTFW_ENCAP_PARSING_ONLY  0xf
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_TESTFW_ENCAP_PARSING_ONLY 0xf
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_VERSION_OFST 10
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_VERSION_LEN 2
 #define        MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_VERSION_REV_LBN 0
@@ -10017,34 +10638,36 @@
 /* enum: reserved value - do not use (may indicate alternative interpretation
  * of REV field in future)
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_RESERVED  0x0
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_RESERVED 0x0
 /* enum: Trivial TX PD firmware for early Huntington development (Huntington
  * development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_FIRST_PKT  0x1
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_FIRST_PKT 0x1
 /* enum: TX PD firmware with approximately Siena-compatible behaviour
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_SIENA_COMPAT  0x2
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_SIENA_COMPAT 0x2
 /* enum: Full featured TX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_FULL_FEATURED  0x3
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_FULL_FEATURED 0x3
 /* enum: (deprecated original name for the FULL_FEATURED variant) */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_VSWITCH  0x3
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_VSWITCH 0x3
 /* enum: siena_compat variant TX PD firmware using PM rather than MAC
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_SIENA_COMPAT_PM  0x4
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_LOW_LATENCY  0x5 /* enum */
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_SIENA_COMPAT_PM 0x4
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_LOW_LATENCY 0x5 /* enum */
 /* enum: TX PD firmware handling layer 2 only for high packet rate performance
  * tests (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_LAYER2_PERF  0x7
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_LAYER2_PERF 0x7
 /* enum: Rules engine TX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_RULES_ENGINE  0x8
-/* enum: reserved value - do not use (bug69716) */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_RESERVED_9  0x9
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_RULES_ENGINE 0x8
+/* enum: Custom firmware variant (see SF-119495-PD and bug69716) */
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_L3XUDP 0x9
+/* enum: DPDK TX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_DPDK 0xa
 /* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE 0xe
 /* Hardware capabilities of NIC */
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_HW_CAPABILITIES_OFST 12
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_HW_CAPABILITIES_LEN 4
@@ -10092,6 +10715,18 @@
 #define        MC_CMD_GET_CAPABILITIES_V4_OUT_TSA_BOUND_WIDTH 1
 #define        MC_CMD_GET_CAPABILITIES_V4_OUT_SF_ADAPTER_AUTHENTICATION_LBN 18
 #define        MC_CMD_GET_CAPABILITIES_V4_OUT_SF_ADAPTER_AUTHENTICATION_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_FILTER_ACTION_FLAG_LBN 19
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_FILTER_ACTION_FLAG_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_FILTER_ACTION_MARK_LBN 20
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_FILTER_ACTION_MARK_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_EQUAL_STRIDE_PACKED_STREAM_LBN 21
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_EQUAL_STRIDE_PACKED_STREAM_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_L3XUDP_SUPPORT_LBN 22
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_L3XUDP_SUPPORT_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_FW_SUBVARIANT_NO_TX_CSUM_LBN 23
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_FW_SUBVARIANT_NO_TX_CSUM_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_VI_SPREADING_LBN 24
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_VI_SPREADING_WIDTH 1
 /* Number of FATSOv2 contexts per datapath supported by this NIC. Not present
  * on older firmware (check the length).
  */
@@ -10105,18 +10740,18 @@
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_PFS_TO_PORTS_ASSIGNMENT_LEN 1
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_PFS_TO_PORTS_ASSIGNMENT_NUM 16
 /* enum: The caller is not permitted to access information on this PF. */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_ACCESS_NOT_PERMITTED  0xff
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_ACCESS_NOT_PERMITTED 0xff
 /* enum: PF does not exist. */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_PF_NOT_PRESENT  0xfe
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_PF_NOT_PRESENT 0xfe
 /* enum: PF does exist but is not assigned to any external port. */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_PF_NOT_ASSIGNED  0xfd
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_PF_NOT_ASSIGNED 0xfd
 /* enum: This value indicates that PF is assigned, but it cannot be expressed
  * in this field. It is intended for a possible future situation where a more
  * complex scheme of PFs to ports mapping is being used. The future driver
  * should look for a new field supporting the new scheme. The current/old
  * driver should treat this value as PF_NOT_ASSIGNED.
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_INCOMPATIBLE_ASSIGNMENT  0xfc
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_INCOMPATIBLE_ASSIGNMENT 0xfc
 /* One byte per PF containing the number of its VFs, indexed by PF number. A
  * special value indicates that a PF is not present.
  */
@@ -10124,9 +10759,9 @@
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_NUM_VFS_PER_PF_LEN 1
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_NUM_VFS_PER_PF_NUM 16
 /* enum: The caller is not permitted to access information on this PF. */
-/*               MC_CMD_GET_CAPABILITIES_V4_OUT_ACCESS_NOT_PERMITTED  0xff */
+/*               MC_CMD_GET_CAPABILITIES_V4_OUT_ACCESS_NOT_PERMITTED 0xff */
 /* enum: PF does not exist. */
-/*               MC_CMD_GET_CAPABILITIES_V4_OUT_PF_NOT_PRESENT  0xfe */
+/*               MC_CMD_GET_CAPABILITIES_V4_OUT_PF_NOT_PRESENT 0xfe */
 /* Number of VIs available for each external port */
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_NUM_VIS_PER_PORT_OFST 58
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_NUM_VIS_PER_PORT_LEN 2
@@ -10157,11 +10792,11 @@
 /* enum: Each VI occupies 8k as on Huntington and Medford. PIO is at offset 4k.
  * CTPIO is not mapped.
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_VI_WINDOW_MODE_8K   0x0
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_VI_WINDOW_MODE_8K 0x0
 /* enum: Each VI occupies 16k. PIO is at offset 4k. CTPIO is at offset 12k. */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_VI_WINDOW_MODE_16K  0x1
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_VI_WINDOW_MODE_16K 0x1
 /* enum: Each VI occupies 64k. PIO is at offset 4k. CTPIO is at offset 12k. */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_VI_WINDOW_MODE_64K  0x2
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_VI_WINDOW_MODE_64K 0x2
 /* Number of vFIFOs per adapter that can be used for VFIFO Stuffing
  * (SF-115995-SW) in the present configuration of firmware and port mode.
  */
@@ -10201,7 +10836,16 @@
 #define       MC_CMD_V2_EXTN_IN_ACTUAL_LEN_LBN 16
 #define       MC_CMD_V2_EXTN_IN_ACTUAL_LEN_WIDTH 10
 #define       MC_CMD_V2_EXTN_IN_UNUSED2_LBN 26
-#define       MC_CMD_V2_EXTN_IN_UNUSED2_WIDTH 6
+#define       MC_CMD_V2_EXTN_IN_UNUSED2_WIDTH 2
+/* Type of command/response */
+#define       MC_CMD_V2_EXTN_IN_MESSAGE_TYPE_LBN 28
+#define       MC_CMD_V2_EXTN_IN_MESSAGE_TYPE_WIDTH 4
+/* enum: MCDI command directed to or response originating from the MC. */
+#define          MC_CMD_V2_EXTN_IN_MCDI_MESSAGE_TYPE_MC 0x0
+/* enum: MCDI command directed to a TSA controller. MCDI responses of this type
+ * are not defined.
+ */
+#define          MC_CMD_V2_EXTN_IN_MCDI_MESSAGE_TYPE_TSA 0x1
 
 
 /***********************************/
@@ -10412,15 +11056,15 @@
 #define       MC_CMD_VSWITCH_ALLOC_IN_TYPE_OFST 4
 #define       MC_CMD_VSWITCH_ALLOC_IN_TYPE_LEN 4
 /* enum: VLAN */
-#define          MC_CMD_VSWITCH_ALLOC_IN_VSWITCH_TYPE_VLAN  0x1
+#define          MC_CMD_VSWITCH_ALLOC_IN_VSWITCH_TYPE_VLAN 0x1
 /* enum: VEB */
-#define          MC_CMD_VSWITCH_ALLOC_IN_VSWITCH_TYPE_VEB  0x2
+#define          MC_CMD_VSWITCH_ALLOC_IN_VSWITCH_TYPE_VEB 0x2
 /* enum: VEPA (obsolete) */
-#define          MC_CMD_VSWITCH_ALLOC_IN_VSWITCH_TYPE_VEPA  0x3
+#define          MC_CMD_VSWITCH_ALLOC_IN_VSWITCH_TYPE_VEPA 0x3
 /* enum: MUX */
-#define          MC_CMD_VSWITCH_ALLOC_IN_VSWITCH_TYPE_MUX  0x4
+#define          MC_CMD_VSWITCH_ALLOC_IN_VSWITCH_TYPE_MUX 0x4
 /* enum: Snapper specific; semantics TBD */
-#define          MC_CMD_VSWITCH_ALLOC_IN_VSWITCH_TYPE_TEST  0x5
+#define          MC_CMD_VSWITCH_ALLOC_IN_VSWITCH_TYPE_TEST 0x5
 /* Flags controlling v-port creation */
 #define       MC_CMD_VSWITCH_ALLOC_IN_FLAGS_OFST 8
 #define       MC_CMD_VSWITCH_ALLOC_IN_FLAGS_LEN 4
@@ -10495,23 +11139,23 @@
 #define       MC_CMD_VPORT_ALLOC_IN_TYPE_OFST 4
 #define       MC_CMD_VPORT_ALLOC_IN_TYPE_LEN 4
 /* enum: VLAN (obsolete) */
-#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_VLAN  0x1
+#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_VLAN 0x1
 /* enum: VEB (obsolete) */
-#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_VEB  0x2
+#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_VEB 0x2
 /* enum: VEPA (obsolete) */
-#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_VEPA  0x3
+#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_VEPA 0x3
 /* enum: A normal v-port receives packets which match a specified MAC and/or
  * VLAN.
  */
-#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_NORMAL  0x4
+#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_NORMAL 0x4
 /* enum: An expansion v-port packets traffic which don't match any other
  * v-port.
  */
-#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_EXPANSION  0x5
+#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_EXPANSION 0x5
 /* enum: An test v-port receives packets which match any filters installed by
  * its downstream components.
  */
-#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_TEST  0x6
+#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_TEST 0x6
 /* Flags controlling v-port creation */
 #define       MC_CMD_VPORT_ALLOC_IN_FLAGS_OFST 8
 #define       MC_CMD_VPORT_ALLOC_IN_FLAGS_LEN 4
@@ -10595,7 +11239,7 @@
 #define       MC_CMD_VADAPTOR_ALLOC_IN_MACADDR_OFST 24
 #define       MC_CMD_VADAPTOR_ALLOC_IN_MACADDR_LEN 6
 /* enum: Derive the MAC address from the upstream port */
-#define          MC_CMD_VADAPTOR_ALLOC_IN_AUTO_MAC  0x0
+#define          MC_CMD_VADAPTOR_ALLOC_IN_AUTO_MAC 0x0
 
 /* MC_CMD_VADAPTOR_ALLOC_OUT msgresponse */
 #define    MC_CMD_VADAPTOR_ALLOC_OUT_LEN 0
@@ -10809,12 +11453,12 @@
 /* enum: Allocate a context for exclusive use. The key and indirection table
  * must be explicitly configured.
  */
-#define          MC_CMD_RSS_CONTEXT_ALLOC_IN_TYPE_EXCLUSIVE  0x0
+#define          MC_CMD_RSS_CONTEXT_ALLOC_IN_TYPE_EXCLUSIVE 0x0
 /* enum: Allocate a context for shared use; this will spread across a range of
  * queues, but the key and indirection table are pre-configured and may not be
  * changed. For this mode, NUM_QUEUES must 2, 4, 8, 16, 32 or 64.
  */
-#define          MC_CMD_RSS_CONTEXT_ALLOC_IN_TYPE_SHARED  0x1
+#define          MC_CMD_RSS_CONTEXT_ALLOC_IN_TYPE_SHARED 0x1
 /* Number of queues spanned by this context, in the range 1-64; valid offsets
  * in the indirection table will be in the range 0 to NUM_QUEUES-1.
  */
@@ -10830,7 +11474,7 @@
 #define       MC_CMD_RSS_CONTEXT_ALLOC_OUT_RSS_CONTEXT_ID_OFST 0
 #define       MC_CMD_RSS_CONTEXT_ALLOC_OUT_RSS_CONTEXT_ID_LEN 4
 /* enum: guaranteed invalid RSS context handle value */
-#define          MC_CMD_RSS_CONTEXT_ALLOC_OUT_RSS_CONTEXT_ID_INVALID  0xffffffff
+#define          MC_CMD_RSS_CONTEXT_ALLOC_OUT_RSS_CONTEXT_ID_INVALID 0xffffffff
 
 
 /***********************************/
@@ -11073,7 +11717,7 @@
 #define       MC_CMD_DOT1P_MAPPING_ALLOC_OUT_DOT1P_MAPPING_ID_OFST 0
 #define       MC_CMD_DOT1P_MAPPING_ALLOC_OUT_DOT1P_MAPPING_ID_LEN 4
 /* enum: guaranteed invalid .1p mapping handle value */
-#define          MC_CMD_DOT1P_MAPPING_ALLOC_OUT_DOT1P_MAPPING_ID_INVALID  0xffffffff
+#define          MC_CMD_DOT1P_MAPPING_ALLOC_OUT_DOT1P_MAPPING_ID_INVALID 0xffffffff
 
 
 /***********************************/
@@ -11385,11 +12029,11 @@
 #define        MC_CMD_SET_RXDP_CONFIG_IN_PAD_HOST_LEN_LBN 1
 #define        MC_CMD_SET_RXDP_CONFIG_IN_PAD_HOST_LEN_WIDTH 2
 /* enum: pad to 64 bytes */
-#define          MC_CMD_SET_RXDP_CONFIG_IN_PAD_HOST_64  0x0
+#define          MC_CMD_SET_RXDP_CONFIG_IN_PAD_HOST_64 0x0
 /* enum: pad to 128 bytes (Medford only) */
-#define          MC_CMD_SET_RXDP_CONFIG_IN_PAD_HOST_128  0x1
+#define          MC_CMD_SET_RXDP_CONFIG_IN_PAD_HOST_128 0x1
 /* enum: pad to 256 bytes (Medford only) */
-#define          MC_CMD_SET_RXDP_CONFIG_IN_PAD_HOST_256   0x2
+#define          MC_CMD_SET_RXDP_CONFIG_IN_PAD_HOST_256 0x2
 
 /* MC_CMD_SET_RXDP_CONFIG_OUT msgresponse */
 #define    MC_CMD_SET_RXDP_CONFIG_OUT_LEN 0
@@ -11453,37 +12097,37 @@
 #define       MC_CMD_SET_CLOCK_IN_SYS_FREQ_OFST 0
 #define       MC_CMD_SET_CLOCK_IN_SYS_FREQ_LEN 4
 /* enum: Leave the system clock domain frequency unchanged */
-#define          MC_CMD_SET_CLOCK_IN_SYS_DOMAIN_DONT_CHANGE  0x0
+#define          MC_CMD_SET_CLOCK_IN_SYS_DOMAIN_DONT_CHANGE 0x0
 /* Requested frequency in MHz for inter-core clock domain */
 #define       MC_CMD_SET_CLOCK_IN_ICORE_FREQ_OFST 4
 #define       MC_CMD_SET_CLOCK_IN_ICORE_FREQ_LEN 4
 /* enum: Leave the inter-core clock domain frequency unchanged */
-#define          MC_CMD_SET_CLOCK_IN_ICORE_DOMAIN_DONT_CHANGE  0x0
+#define          MC_CMD_SET_CLOCK_IN_ICORE_DOMAIN_DONT_CHANGE 0x0
 /* Requested frequency in MHz for DPCPU clock domain */
 #define       MC_CMD_SET_CLOCK_IN_DPCPU_FREQ_OFST 8
 #define       MC_CMD_SET_CLOCK_IN_DPCPU_FREQ_LEN 4
 /* enum: Leave the DPCPU clock domain frequency unchanged */
-#define          MC_CMD_SET_CLOCK_IN_DPCPU_DOMAIN_DONT_CHANGE  0x0
+#define          MC_CMD_SET_CLOCK_IN_DPCPU_DOMAIN_DONT_CHANGE 0x0
 /* Requested frequency in MHz for PCS clock domain */
 #define       MC_CMD_SET_CLOCK_IN_PCS_FREQ_OFST 12
 #define       MC_CMD_SET_CLOCK_IN_PCS_FREQ_LEN 4
 /* enum: Leave the PCS clock domain frequency unchanged */
-#define          MC_CMD_SET_CLOCK_IN_PCS_DOMAIN_DONT_CHANGE  0x0
+#define          MC_CMD_SET_CLOCK_IN_PCS_DOMAIN_DONT_CHANGE 0x0
 /* Requested frequency in MHz for MC clock domain */
 #define       MC_CMD_SET_CLOCK_IN_MC_FREQ_OFST 16
 #define       MC_CMD_SET_CLOCK_IN_MC_FREQ_LEN 4
 /* enum: Leave the MC clock domain frequency unchanged */
-#define          MC_CMD_SET_CLOCK_IN_MC_DOMAIN_DONT_CHANGE  0x0
+#define          MC_CMD_SET_CLOCK_IN_MC_DOMAIN_DONT_CHANGE 0x0
 /* Requested frequency in MHz for rmon clock domain */
 #define       MC_CMD_SET_CLOCK_IN_RMON_FREQ_OFST 20
 #define       MC_CMD_SET_CLOCK_IN_RMON_FREQ_LEN 4
 /* enum: Leave the rmon clock domain frequency unchanged */
-#define          MC_CMD_SET_CLOCK_IN_RMON_DOMAIN_DONT_CHANGE  0x0
+#define          MC_CMD_SET_CLOCK_IN_RMON_DOMAIN_DONT_CHANGE 0x0
 /* Requested frequency in MHz for vswitch clock domain */
 #define       MC_CMD_SET_CLOCK_IN_VSWITCH_FREQ_OFST 24
 #define       MC_CMD_SET_CLOCK_IN_VSWITCH_FREQ_LEN 4
 /* enum: Leave the vswitch clock domain frequency unchanged */
-#define          MC_CMD_SET_CLOCK_IN_VSWITCH_DOMAIN_DONT_CHANGE  0x0
+#define          MC_CMD_SET_CLOCK_IN_VSWITCH_DOMAIN_DONT_CHANGE 0x0
 
 /* MC_CMD_SET_CLOCK_OUT msgresponse */
 #define    MC_CMD_SET_CLOCK_OUT_LEN 28
@@ -11491,37 +12135,37 @@
 #define       MC_CMD_SET_CLOCK_OUT_SYS_FREQ_OFST 0
 #define       MC_CMD_SET_CLOCK_OUT_SYS_FREQ_LEN 4
 /* enum: The system clock domain doesn't exist */
-#define          MC_CMD_SET_CLOCK_OUT_SYS_DOMAIN_UNSUPPORTED  0x0
+#define          MC_CMD_SET_CLOCK_OUT_SYS_DOMAIN_UNSUPPORTED 0x0
 /* Resulting inter-core frequency in MHz */
 #define       MC_CMD_SET_CLOCK_OUT_ICORE_FREQ_OFST 4
 #define       MC_CMD_SET_CLOCK_OUT_ICORE_FREQ_LEN 4
 /* enum: The inter-core clock domain doesn't exist / isn't used */
-#define          MC_CMD_SET_CLOCK_OUT_ICORE_DOMAIN_UNSUPPORTED  0x0
+#define          MC_CMD_SET_CLOCK_OUT_ICORE_DOMAIN_UNSUPPORTED 0x0
 /* Resulting DPCPU frequency in MHz */
 #define       MC_CMD_SET_CLOCK_OUT_DPCPU_FREQ_OFST 8
 #define       MC_CMD_SET_CLOCK_OUT_DPCPU_FREQ_LEN 4
 /* enum: The dpcpu clock domain doesn't exist */
-#define          MC_CMD_SET_CLOCK_OUT_DPCPU_DOMAIN_UNSUPPORTED  0x0
+#define          MC_CMD_SET_CLOCK_OUT_DPCPU_DOMAIN_UNSUPPORTED 0x0
 /* Resulting PCS frequency in MHz */
 #define       MC_CMD_SET_CLOCK_OUT_PCS_FREQ_OFST 12
 #define       MC_CMD_SET_CLOCK_OUT_PCS_FREQ_LEN 4
 /* enum: The PCS clock domain doesn't exist / isn't controlled */
-#define          MC_CMD_SET_CLOCK_OUT_PCS_DOMAIN_UNSUPPORTED  0x0
+#define          MC_CMD_SET_CLOCK_OUT_PCS_DOMAIN_UNSUPPORTED 0x0
 /* Resulting MC frequency in MHz */
 #define       MC_CMD_SET_CLOCK_OUT_MC_FREQ_OFST 16
 #define       MC_CMD_SET_CLOCK_OUT_MC_FREQ_LEN 4
 /* enum: The MC clock domain doesn't exist / isn't controlled */
-#define          MC_CMD_SET_CLOCK_OUT_MC_DOMAIN_UNSUPPORTED  0x0
+#define          MC_CMD_SET_CLOCK_OUT_MC_DOMAIN_UNSUPPORTED 0x0
 /* Resulting rmon frequency in MHz */
 #define       MC_CMD_SET_CLOCK_OUT_RMON_FREQ_OFST 20
 #define       MC_CMD_SET_CLOCK_OUT_RMON_FREQ_LEN 4
 /* enum: The rmon clock domain doesn't exist / isn't controlled */
-#define          MC_CMD_SET_CLOCK_OUT_RMON_DOMAIN_UNSUPPORTED  0x0
+#define          MC_CMD_SET_CLOCK_OUT_RMON_DOMAIN_UNSUPPORTED 0x0
 /* Resulting vswitch frequency in MHz */
 #define       MC_CMD_SET_CLOCK_OUT_VSWITCH_FREQ_OFST 24
 #define       MC_CMD_SET_CLOCK_OUT_VSWITCH_FREQ_LEN 4
 /* enum: The vswitch clock domain doesn't exist / isn't controlled */
-#define          MC_CMD_SET_CLOCK_OUT_VSWITCH_DOMAIN_UNSUPPORTED  0x0
+#define          MC_CMD_SET_CLOCK_OUT_VSWITCH_DOMAIN_UNSUPPORTED 0x0
 
 
 /***********************************/
@@ -11537,21 +12181,21 @@
 #define       MC_CMD_DPCPU_RPC_IN_CPU_OFST 0
 #define       MC_CMD_DPCPU_RPC_IN_CPU_LEN 4
 /* enum: RxDPCPU0 */
-#define          MC_CMD_DPCPU_RPC_IN_DPCPU_RX0  0x0
+#define          MC_CMD_DPCPU_RPC_IN_DPCPU_RX0 0x0
 /* enum: TxDPCPU0 */
-#define          MC_CMD_DPCPU_RPC_IN_DPCPU_TX0  0x1
+#define          MC_CMD_DPCPU_RPC_IN_DPCPU_TX0 0x1
 /* enum: TxDPCPU1 */
-#define          MC_CMD_DPCPU_RPC_IN_DPCPU_TX1  0x2
+#define          MC_CMD_DPCPU_RPC_IN_DPCPU_TX1 0x2
 /* enum: RxDPCPU1 (Medford only) */
-#define          MC_CMD_DPCPU_RPC_IN_DPCPU_RX1   0x3
+#define          MC_CMD_DPCPU_RPC_IN_DPCPU_RX1 0x3
 /* enum: RxDPCPU (will be for the calling function; for now, just an alias of
  * DPCPU_RX0)
  */
-#define          MC_CMD_DPCPU_RPC_IN_DPCPU_RX   0x80
+#define          MC_CMD_DPCPU_RPC_IN_DPCPU_RX 0x80
 /* enum: TxDPCPU (will be for the calling function; for now, just an alias of
  * DPCPU_TX0)
  */
-#define          MC_CMD_DPCPU_RPC_IN_DPCPU_TX   0x81
+#define          MC_CMD_DPCPU_RPC_IN_DPCPU_TX 0x81
 /* First 8 bits [39:32] of DATA are consumed by MC-DPCPU protocol and must be
  * initialised to zero
  */
@@ -11559,15 +12203,15 @@
 #define       MC_CMD_DPCPU_RPC_IN_DATA_LEN 32
 #define        MC_CMD_DPCPU_RPC_IN_HDR_CMD_CMDNUM_LBN 8
 #define        MC_CMD_DPCPU_RPC_IN_HDR_CMD_CMDNUM_WIDTH 8
-#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_TXDPCPU_READ  0x6 /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_TXDPCPU_WRITE  0x7 /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_TXDPCPU_SELF_TEST  0xc /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_TXDPCPU_CSR_ACCESS  0xe /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_RXDPCPU_READ  0x46 /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_RXDPCPU_WRITE  0x47 /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_RXDPCPU_SELF_TEST  0x4a /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_RXDPCPU_CSR_ACCESS  0x4c /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_RXDPCPU_SET_MC_REPLAY_CNTXT  0x4d /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_TXDPCPU_READ 0x6 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_TXDPCPU_WRITE 0x7 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_TXDPCPU_SELF_TEST 0xc /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_TXDPCPU_CSR_ACCESS 0xe /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_RXDPCPU_READ 0x46 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_RXDPCPU_WRITE 0x47 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_RXDPCPU_SELF_TEST 0x4a /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_RXDPCPU_CSR_ACCESS 0x4c /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_RXDPCPU_SET_MC_REPLAY_CNTXT 0x4d /* enum */
 #define        MC_CMD_DPCPU_RPC_IN_HDR_CMD_REQ_OBJID_LBN 16
 #define        MC_CMD_DPCPU_RPC_IN_HDR_CMD_REQ_OBJID_WIDTH 16
 #define        MC_CMD_DPCPU_RPC_IN_HDR_CMD_REQ_ADDR_LBN 16
@@ -11578,11 +12222,11 @@
 #define        MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_INFO_WIDTH 240
 #define        MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_LBN 16
 #define        MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_WIDTH 16
-#define          MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_STOP_RETURN_RESULT  0x0 /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_START_READ  0x1 /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_START_WRITE  0x2 /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_START_WRITE_READ  0x3 /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_START_PIPELINED_READ  0x4 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_STOP_RETURN_RESULT 0x0 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_START_READ 0x1 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_START_WRITE 0x2 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_START_WRITE_READ 0x3 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_START_PIPELINED_READ 0x4 /* enum */
 #define        MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_START_DELAY_LBN 48
 #define        MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_START_DELAY_WIDTH 16
 #define        MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_RPT_COUNT_LBN 64
@@ -11591,9 +12235,9 @@
 #define        MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_GAP_DELAY_WIDTH 16
 #define        MC_CMD_DPCPU_RPC_IN_MC_REPLAY_MODE_LBN 16
 #define        MC_CMD_DPCPU_RPC_IN_MC_REPLAY_MODE_WIDTH 16
-#define          MC_CMD_DPCPU_RPC_IN_MC_REPLAY_MODE_CUT_THROUGH  0x1 /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_MC_REPLAY_MODE_STORE_FORWARD  0x2 /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_MC_REPLAY_MODE_STORE_FORWARD_FIRST  0x3 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_MC_REPLAY_MODE_CUT_THROUGH 0x1 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_MC_REPLAY_MODE_STORE_FORWARD 0x2 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_MC_REPLAY_MODE_STORE_FORWARD_FIRST 0x3 /* enum */
 #define        MC_CMD_DPCPU_RPC_IN_MC_REPLAY_CNTXT_LBN 64
 #define        MC_CMD_DPCPU_RPC_IN_MC_REPLAY_CNTXT_WIDTH 16
 #define       MC_CMD_DPCPU_RPC_IN_WDATA_OFST 12
@@ -11660,7 +12304,7 @@
 #define       MC_CMD_SHMBOOT_OP_IN_SHMBOOT_OP_OFST 0
 #define       MC_CMD_SHMBOOT_OP_IN_SHMBOOT_OP_LEN 4
 /* enum: Copy slave_data section to the slave core. (Greenport only) */
-#define          MC_CMD_SHMBOOT_OP_IN_PUSH_SLAVE_DATA  0x0
+#define          MC_CMD_SHMBOOT_OP_IN_PUSH_SLAVE_DATA 0x0
 
 /* MC_CMD_SHMBOOT_OP_OUT msgresponse */
 #define    MC_CMD_SHMBOOT_OP_OUT_LEN 0
@@ -11709,14 +12353,14 @@
 #define       MC_CMD_DUMP_DO_IN_PADDING_LEN 4
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_OFST 4
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_LEN 4
-#define          MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM  0x0 /* enum */
-#define          MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_DEFAULT  0x1 /* enum */
+#define          MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM 0x0 /* enum */
+#define          MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_DEFAULT 0x1 /* enum */
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_TYPE_OFST 8
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_TYPE_LEN 4
-#define          MC_CMD_DUMP_DO_IN_DUMP_LOCATION_NVRAM  0x1 /* enum */
-#define          MC_CMD_DUMP_DO_IN_DUMP_LOCATION_HOST_MEMORY  0x2 /* enum */
-#define          MC_CMD_DUMP_DO_IN_DUMP_LOCATION_HOST_MEMORY_MLI  0x3 /* enum */
-#define          MC_CMD_DUMP_DO_IN_DUMP_LOCATION_UART  0x4 /* enum */
+#define          MC_CMD_DUMP_DO_IN_DUMP_LOCATION_NVRAM 0x1 /* enum */
+#define          MC_CMD_DUMP_DO_IN_DUMP_LOCATION_HOST_MEMORY 0x2 /* enum */
+#define          MC_CMD_DUMP_DO_IN_DUMP_LOCATION_HOST_MEMORY_MLI 0x3 /* enum */
+#define          MC_CMD_DUMP_DO_IN_DUMP_LOCATION_UART 0x4 /* enum */
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_NVRAM_PARTITION_TYPE_ID_OFST 12
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_NVRAM_PARTITION_TYPE_ID_LEN 4
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_NVRAM_OFFSET_OFST 16
@@ -11727,24 +12371,24 @@
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_HOST_MEMORY_ADDR_HI_LEN 4
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_HOST_MEMORY_MLI_ROOT_ADDR_LO_OFST 12
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_HOST_MEMORY_MLI_ROOT_ADDR_LO_LEN 4
-#define          MC_CMD_DUMP_DO_IN_HOST_MEMORY_MLI_PAGE_SIZE  0x1000 /* enum */
+#define          MC_CMD_DUMP_DO_IN_HOST_MEMORY_MLI_PAGE_SIZE 0x1000 /* enum */
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_HOST_MEMORY_MLI_ROOT_ADDR_HI_OFST 16
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_HOST_MEMORY_MLI_ROOT_ADDR_HI_LEN 4
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_HOST_MEMORY_MLI_DEPTH_OFST 20
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_HOST_MEMORY_MLI_DEPTH_LEN 4
-#define          MC_CMD_DUMP_DO_IN_HOST_MEMORY_MLI_MAX_DEPTH  0x2 /* enum */
+#define          MC_CMD_DUMP_DO_IN_HOST_MEMORY_MLI_MAX_DEPTH 0x2 /* enum */
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_UART_PORT_OFST 12
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_UART_PORT_LEN 4
 /* enum: The uart port this command was received over (if using a uart
  * transport)
  */
-#define          MC_CMD_DUMP_DO_IN_UART_PORT_SRC  0xff
+#define          MC_CMD_DUMP_DO_IN_UART_PORT_SRC 0xff
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_SIZE_OFST 24
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_SIZE_LEN 4
 #define       MC_CMD_DUMP_DO_IN_DUMPFILE_DST_OFST 28
 #define       MC_CMD_DUMP_DO_IN_DUMPFILE_DST_LEN 4
-#define          MC_CMD_DUMP_DO_IN_DUMPFILE_DST_CUSTOM  0x0 /* enum */
-#define          MC_CMD_DUMP_DO_IN_DUMPFILE_DST_NVRAM_DUMP_PARTITION  0x1 /* enum */
+#define          MC_CMD_DUMP_DO_IN_DUMPFILE_DST_CUSTOM 0x0 /* enum */
+#define          MC_CMD_DUMP_DO_IN_DUMPFILE_DST_NVRAM_DUMP_PARTITION 0x1 /* enum */
 #define       MC_CMD_DUMP_DO_IN_DUMPFILE_DST_CUSTOM_TYPE_OFST 32
 #define       MC_CMD_DUMP_DO_IN_DUMPFILE_DST_CUSTOM_TYPE_LEN 4
 /*            Enum values, see field(s): */
@@ -11854,11 +12498,11 @@
 #define    MC_CMD_SET_PSU_IN_LEN 12
 #define       MC_CMD_SET_PSU_IN_PARAM_OFST 0
 #define       MC_CMD_SET_PSU_IN_PARAM_LEN 4
-#define          MC_CMD_SET_PSU_IN_PARAM_SUPPLY_VOLTAGE  0x0 /* enum */
+#define          MC_CMD_SET_PSU_IN_PARAM_SUPPLY_VOLTAGE 0x0 /* enum */
 #define       MC_CMD_SET_PSU_IN_RAIL_OFST 4
 #define       MC_CMD_SET_PSU_IN_RAIL_LEN 4
-#define          MC_CMD_SET_PSU_IN_RAIL_0V9  0x0 /* enum */
-#define          MC_CMD_SET_PSU_IN_RAIL_1V2  0x1 /* enum */
+#define          MC_CMD_SET_PSU_IN_RAIL_0V9 0x0 /* enum */
+#define          MC_CMD_SET_PSU_IN_RAIL_1V2 0x1 /* enum */
 /* desired value, eg voltage in mV */
 #define       MC_CMD_SET_PSU_IN_VALUE_OFST 8
 #define       MC_CMD_SET_PSU_IN_VALUE_LEN 4
@@ -12031,26 +12675,30 @@
 #define       MC_CMD_KR_TUNE_IN_KR_TUNE_OP_OFST 0
 #define       MC_CMD_KR_TUNE_IN_KR_TUNE_OP_LEN 1
 /* enum: Get current RXEQ settings */
-#define          MC_CMD_KR_TUNE_IN_RXEQ_GET  0x0
+#define          MC_CMD_KR_TUNE_IN_RXEQ_GET 0x0
 /* enum: Override RXEQ settings */
-#define          MC_CMD_KR_TUNE_IN_RXEQ_SET  0x1
+#define          MC_CMD_KR_TUNE_IN_RXEQ_SET 0x1
 /* enum: Get current TX Driver settings */
-#define          MC_CMD_KR_TUNE_IN_TXEQ_GET  0x2
+#define          MC_CMD_KR_TUNE_IN_TXEQ_GET 0x2
 /* enum: Override TX Driver settings */
-#define          MC_CMD_KR_TUNE_IN_TXEQ_SET  0x3
+#define          MC_CMD_KR_TUNE_IN_TXEQ_SET 0x3
 /* enum: Force KR Serdes reset / recalibration */
-#define          MC_CMD_KR_TUNE_IN_RECAL  0x4
+#define          MC_CMD_KR_TUNE_IN_RECAL 0x4
 /* enum: Start KR Serdes Eye diagram plot on a given lane. Lane must have valid
  * signal.
  */
-#define          MC_CMD_KR_TUNE_IN_START_EYE_PLOT  0x5
+#define          MC_CMD_KR_TUNE_IN_START_EYE_PLOT 0x5
 /* enum: Poll KR Serdes Eye diagram plot. Returns one row of BER data. The
  * caller should call this command repeatedly after starting eye plot, until no
  * more data is returned.
  */
-#define          MC_CMD_KR_TUNE_IN_POLL_EYE_PLOT  0x6
+#define          MC_CMD_KR_TUNE_IN_POLL_EYE_PLOT 0x6
 /* enum: Read Figure Of Merit (eye quality, higher is better). */
-#define          MC_CMD_KR_TUNE_IN_READ_FOM  0x7
+#define          MC_CMD_KR_TUNE_IN_READ_FOM 0x7
+/* enum: Start/stop link training frames */
+#define          MC_CMD_KR_TUNE_IN_LINK_TRAIN_RUN 0x8
+/* enum: Issue KR link training command (control training coefficients) */
+#define          MC_CMD_KR_TUNE_IN_LINK_TRAIN_CMD 0x9
 /* Align the arguments to 32 bits */
 #define       MC_CMD_KR_TUNE_IN_KR_TUNE_RSVD_OFST 1
 #define       MC_CMD_KR_TUNE_IN_KR_TUNE_RSVD_LEN 3
@@ -12084,98 +12732,98 @@
 #define        MC_CMD_KR_TUNE_RXEQ_GET_OUT_PARAM_ID_LBN 0
 #define        MC_CMD_KR_TUNE_RXEQ_GET_OUT_PARAM_ID_WIDTH 8
 /* enum: Attenuation (0-15, Huntington) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_ATT  0x0
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_ATT 0x0
 /* enum: CTLE Boost (0-15, Huntington) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_BOOST  0x1
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_BOOST 0x1
 /* enum: Edge DFE Tap1 (Huntington - 0 - max negative, 64 - zero, 127 - max
  * positive, Medford - 0-31)
  */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP1  0x2
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP1 0x2
 /* enum: Edge DFE Tap2 (Huntington - 0 - max negative, 32 - zero, 63 - max
  * positive, Medford - 0-31)
  */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP2  0x3
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP2 0x3
 /* enum: Edge DFE Tap3 (Huntington - 0 - max negative, 32 - zero, 63 - max
  * positive, Medford - 0-16)
  */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP3  0x4
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP3 0x4
 /* enum: Edge DFE Tap4 (Huntington - 0 - max negative, 32 - zero, 63 - max
  * positive, Medford - 0-16)
  */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP4  0x5
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP4 0x5
 /* enum: Edge DFE Tap5 (Huntington - 0 - max negative, 32 - zero, 63 - max
  * positive, Medford - 0-16)
  */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP5  0x6
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP5 0x6
 /* enum: Edge DFE DLEV (0-128 for Medford) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_DLEV  0x7
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_DLEV 0x7
 /* enum: Variable Gain Amplifier (0-15, Medford) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_VGA  0x8
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_VGA 0x8
 /* enum: CTLE EQ Capacitor (0-15, Medford) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CTLE_EQC  0x9
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CTLE_EQC 0x9
 /* enum: CTLE EQ Resistor (0-7, Medford) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CTLE_EQRES  0xa
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CTLE_EQRES 0xa
 /* enum: CTLE gain (0-31, Medford2) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CTLE_GAIN  0xb
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CTLE_GAIN 0xb
 /* enum: CTLE pole (0-31, Medford2) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CTLE_POLE  0xc
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CTLE_POLE 0xc
 /* enum: CTLE peaking (0-31, Medford2) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CTLE_PEAK  0xd
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CTLE_PEAK 0xd
 /* enum: DFE Tap1 - even path (Medford2 - 6 bit signed (-29 - +29)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP1_EVEN  0xe
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP1_EVEN 0xe
 /* enum: DFE Tap1 - odd path (Medford2 - 6 bit signed (-29 - +29)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP1_ODD  0xf
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP1_ODD 0xf
 /* enum: DFE Tap2 (Medford2 - 6 bit signed (-20 - +20)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP2  0x10
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP2 0x10
 /* enum: DFE Tap3 (Medford2 - 6 bit signed (-20 - +20)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP3  0x11
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP3 0x11
 /* enum: DFE Tap4 (Medford2 - 6 bit signed (-20 - +20)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP4  0x12
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP4 0x12
 /* enum: DFE Tap5 (Medford2 - 6 bit signed (-24 - +24)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP5  0x13
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP5 0x13
 /* enum: DFE Tap6 (Medford2 - 6 bit signed (-24 - +24)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP6  0x14
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP6 0x14
 /* enum: DFE Tap7 (Medford2 - 6 bit signed (-24 - +24)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP7  0x15
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP7 0x15
 /* enum: DFE Tap8 (Medford2 - 6 bit signed (-24 - +24)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP8  0x16
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP8 0x16
 /* enum: DFE Tap9 (Medford2 - 6 bit signed (-24 - +24)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP9  0x17
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP9 0x17
 /* enum: DFE Tap10 (Medford2 - 6 bit signed (-24 - +24)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP10  0x18
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP10 0x18
 /* enum: DFE Tap11 (Medford2 - 6 bit signed (-24 - +24)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP11  0x19
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP11 0x19
 /* enum: DFE Tap12 (Medford2 - 6 bit signed (-24 - +24)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP12  0x1a
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP12 0x1a
 /* enum: I/Q clk offset (Medford2 - 4 bit signed (-5 - +5))) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_IQ_OFF  0x1b
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_IQ_OFF 0x1b
 /* enum: Negative h1 polarity data sampler offset calibration code, even path
  * (Medford2 - 6 bit signed (-29 - +29)))
  */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_H1N_OFF_EVEN  0x1c
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_H1N_OFF_EVEN 0x1c
 /* enum: Negative h1 polarity data sampler offset calibration code, odd path
  * (Medford2 - 6 bit signed (-29 - +29)))
  */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_H1N_OFF_ODD  0x1d
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_H1N_OFF_ODD 0x1d
 /* enum: Positive h1 polarity data sampler offset calibration code, even path
  * (Medford2 - 6 bit signed (-29 - +29)))
  */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_H1P_OFF_EVEN  0x1e
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_H1P_OFF_EVEN 0x1e
 /* enum: Positive h1 polarity data sampler offset calibration code, odd path
  * (Medford2 - 6 bit signed (-29 - +29)))
  */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_H1P_OFF_ODD  0x1f
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_H1P_OFF_ODD 0x1f
 /* enum: CDR calibration loop code (Medford2) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CDR_PVT  0x20
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CDR_PVT 0x20
 /* enum: CDR integral loop code (Medford2) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CDR_INTEG  0x21
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CDR_INTEG 0x21
 #define        MC_CMD_KR_TUNE_RXEQ_GET_OUT_PARAM_LANE_LBN 8
 #define        MC_CMD_KR_TUNE_RXEQ_GET_OUT_PARAM_LANE_WIDTH 3
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_LANE_0  0x0 /* enum */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_LANE_1  0x1 /* enum */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_LANE_2  0x2 /* enum */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_LANE_3  0x3 /* enum */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_LANE_ALL  0x4 /* enum */
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_LANE_0 0x0 /* enum */
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_LANE_1 0x1 /* enum */
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_LANE_2 0x2 /* enum */
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_LANE_3 0x3 /* enum */
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_LANE_ALL 0x4 /* enum */
 #define        MC_CMD_KR_TUNE_RXEQ_GET_OUT_PARAM_AUTOCAL_LBN 11
 #define        MC_CMD_KR_TUNE_RXEQ_GET_OUT_PARAM_AUTOCAL_WIDTH 1
 #define        MC_CMD_KR_TUNE_RXEQ_GET_OUT_RESERVED_LBN 12
@@ -12241,38 +12889,38 @@
 #define        MC_CMD_KR_TUNE_TXEQ_GET_OUT_PARAM_ID_LBN 0
 #define        MC_CMD_KR_TUNE_TXEQ_GET_OUT_PARAM_ID_WIDTH 8
 /* enum: TX Amplitude (Huntington, Medford, Medford2) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_LEV  0x0
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_LEV 0x0
 /* enum: De-Emphasis Tap1 Magnitude (0-7) (Huntington) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_MODE  0x1
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_MODE 0x1
 /* enum: De-Emphasis Tap1 Fine */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_DTLEV  0x2
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_DTLEV 0x2
 /* enum: De-Emphasis Tap2 Magnitude (0-6) (Huntington) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_D2  0x3
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_D2 0x3
 /* enum: De-Emphasis Tap2 Fine (Huntington) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_D2TLEV  0x4
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_D2TLEV 0x4
 /* enum: Pre-Emphasis Magnitude (Huntington) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_E  0x5
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_E 0x5
 /* enum: Pre-Emphasis Fine (Huntington) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_ETLEV  0x6
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_ETLEV 0x6
 /* enum: TX Slew Rate Coarse control (Huntington) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_PREDRV_DLY  0x7
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_PREDRV_DLY 0x7
 /* enum: TX Slew Rate Fine control (Huntington) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_SR_SET  0x8
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_SR_SET 0x8
 /* enum: TX Termination Impedance control (Huntington) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_RT_SET  0x9
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_RT_SET 0x9
 /* enum: TX Amplitude Fine control (Medford) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_LEV_FINE  0xa
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_LEV_FINE 0xa
 /* enum: Pre-shoot Tap (Medford, Medford2) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TAP_ADV  0xb
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TAP_ADV 0xb
 /* enum: De-emphasis Tap (Medford, Medford2) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TAP_DLY  0xc
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TAP_DLY 0xc
 #define        MC_CMD_KR_TUNE_TXEQ_GET_OUT_PARAM_LANE_LBN 8
 #define        MC_CMD_KR_TUNE_TXEQ_GET_OUT_PARAM_LANE_WIDTH 3
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_LANE_0  0x0 /* enum */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_LANE_1  0x1 /* enum */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_LANE_2  0x2 /* enum */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_LANE_3  0x3 /* enum */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_LANE_ALL  0x4 /* enum */
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_LANE_0 0x0 /* enum */
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_LANE_1 0x1 /* enum */
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_LANE_2 0x2 /* enum */
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_LANE_3 0x3 /* enum */
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_LANE_ALL 0x4 /* enum */
 #define        MC_CMD_KR_TUNE_TXEQ_GET_OUT_RESERVED_LBN 11
 #define        MC_CMD_KR_TUNE_TXEQ_GET_OUT_RESERVED_WIDTH 5
 #define        MC_CMD_KR_TUNE_TXEQ_GET_OUT_PARAM_INITIAL_LBN 16
@@ -12345,9 +12993,12 @@
 /* Align the arguments to 32 bits */
 #define       MC_CMD_KR_TUNE_START_EYE_PLOT_V2_IN_KR_TUNE_RSVD_OFST 1
 #define       MC_CMD_KR_TUNE_START_EYE_PLOT_V2_IN_KR_TUNE_RSVD_LEN 3
-/* Port-relative lane to scan eye on */
 #define       MC_CMD_KR_TUNE_START_EYE_PLOT_V2_IN_LANE_OFST 4
 #define       MC_CMD_KR_TUNE_START_EYE_PLOT_V2_IN_LANE_LEN 4
+#define        MC_CMD_KR_TUNE_START_EYE_PLOT_V2_IN_LANE_NUM_LBN 0
+#define        MC_CMD_KR_TUNE_START_EYE_PLOT_V2_IN_LANE_NUM_WIDTH 8
+#define        MC_CMD_KR_TUNE_START_EYE_PLOT_V2_IN_LANE_ABS_REL_LBN 31
+#define        MC_CMD_KR_TUNE_START_EYE_PLOT_V2_IN_LANE_ABS_REL_WIDTH 1
 /* Scan duration / cycle count */
 #define       MC_CMD_KR_TUNE_START_EYE_PLOT_V2_IN_BER_OFST 8
 #define       MC_CMD_KR_TUNE_START_EYE_PLOT_V2_IN_BER_LEN 4
@@ -12383,12 +13034,91 @@
 #define       MC_CMD_KR_TUNE_READ_FOM_IN_KR_TUNE_RSVD_LEN 3
 #define       MC_CMD_KR_TUNE_READ_FOM_IN_LANE_OFST 4
 #define       MC_CMD_KR_TUNE_READ_FOM_IN_LANE_LEN 4
+#define        MC_CMD_KR_TUNE_READ_FOM_IN_LANE_NUM_LBN 0
+#define        MC_CMD_KR_TUNE_READ_FOM_IN_LANE_NUM_WIDTH 8
+#define        MC_CMD_KR_TUNE_READ_FOM_IN_LANE_ABS_REL_LBN 31
+#define        MC_CMD_KR_TUNE_READ_FOM_IN_LANE_ABS_REL_WIDTH 1
 
 /* MC_CMD_KR_TUNE_READ_FOM_OUT msgresponse */
 #define    MC_CMD_KR_TUNE_READ_FOM_OUT_LEN 4
 #define       MC_CMD_KR_TUNE_READ_FOM_OUT_FOM_OFST 0
 #define       MC_CMD_KR_TUNE_READ_FOM_OUT_FOM_LEN 4
 
+/* MC_CMD_KR_TUNE_LINK_TRAIN_RUN_IN msgrequest */
+#define    MC_CMD_KR_TUNE_LINK_TRAIN_RUN_IN_LEN 8
+/* Requested operation */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_RUN_IN_KR_TUNE_OP_OFST 0
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_RUN_IN_KR_TUNE_OP_LEN 1
+/* Align the arguments to 32 bits */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_RUN_IN_KR_TUNE_RSVD_OFST 1
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_RUN_IN_KR_TUNE_RSVD_LEN 3
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_RUN_IN_RUN_OFST 4
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_RUN_IN_RUN_LEN 4
+#define          MC_CMD_KR_TUNE_LINK_TRAIN_RUN_IN_STOP 0x0 /* enum */
+#define          MC_CMD_KR_TUNE_LINK_TRAIN_RUN_IN_START 0x1 /* enum */
+
+/* MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN msgrequest */
+#define    MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_LEN 28
+/* Requested operation */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_KR_TUNE_OP_OFST 0
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_KR_TUNE_OP_LEN 1
+/* Align the arguments to 32 bits */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_KR_TUNE_RSVD_OFST 1
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_KR_TUNE_RSVD_LEN 3
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_LANE_OFST 4
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_LANE_LEN 4
+/* Set INITIALIZE state */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_INITIALIZE_OFST 8
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_INITIALIZE_LEN 4
+/* Set PRESET state */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_PRESET_OFST 12
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_PRESET_LEN 4
+/* C(-1) request */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_CM1_OFST 16
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_CM1_LEN 4
+#define          MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_REQ_HOLD 0x0 /* enum */
+#define          MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_REQ_INCREMENT 0x1 /* enum */
+#define          MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_REQ_DECREMENT 0x2 /* enum */
+/* C(0) request */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_C0_OFST 20
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_C0_LEN 4
+/*            Enum values, see field(s): */
+/*               MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN/CM1 */
+/* C(+1) request */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_CP1_OFST 24
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_CP1_LEN 4
+/*            Enum values, see field(s): */
+/*               MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN/CM1 */
+
+/* MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT msgresponse */
+#define    MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_LEN 24
+/* C(-1) status */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_CM1_STATUS_OFST 0
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_CM1_STATUS_LEN 4
+#define          MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_STATUS_NOT_UPDATED 0x0 /* enum */
+#define          MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_STATUS_UPDATED 0x1 /* enum */
+#define          MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_STATUS_MINIMUM 0x2 /* enum */
+#define          MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_STATUS_MAXIMUM 0x3 /* enum */
+/* C(0) status */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_C0_STATUS_OFST 4
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_C0_STATUS_LEN 4
+/*            Enum values, see field(s): */
+/*               MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN/CM1 */
+/* C(+1) status */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_CP1_STATUS_OFST 8
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_CP1_STATUS_LEN 4
+/*            Enum values, see field(s): */
+/*               MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN/CM1 */
+/* C(-1) value */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_CM1_VALUE_OFST 12
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_CM1_VALUE_LEN 4
+/* C(0) value */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_C0_VALUE_OFST 16
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_C0_VALUE_LEN 4
+/* C(+1) status */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_CP1_VALUE_OFST 20
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_CP1_VALUE_LEN 4
+
 
 /***********************************/
 /* MC_CMD_PCIE_TUNE
@@ -12406,22 +13136,22 @@
 #define       MC_CMD_PCIE_TUNE_IN_PCIE_TUNE_OP_OFST 0
 #define       MC_CMD_PCIE_TUNE_IN_PCIE_TUNE_OP_LEN 1
 /* enum: Get current RXEQ settings */
-#define          MC_CMD_PCIE_TUNE_IN_RXEQ_GET  0x0
+#define          MC_CMD_PCIE_TUNE_IN_RXEQ_GET 0x0
 /* enum: Override RXEQ settings */
-#define          MC_CMD_PCIE_TUNE_IN_RXEQ_SET  0x1
+#define          MC_CMD_PCIE_TUNE_IN_RXEQ_SET 0x1
 /* enum: Get current TX Driver settings */
-#define          MC_CMD_PCIE_TUNE_IN_TXEQ_GET  0x2
+#define          MC_CMD_PCIE_TUNE_IN_TXEQ_GET 0x2
 /* enum: Override TX Driver settings */
-#define          MC_CMD_PCIE_TUNE_IN_TXEQ_SET  0x3
+#define          MC_CMD_PCIE_TUNE_IN_TXEQ_SET 0x3
 /* enum: Start PCIe Serdes Eye diagram plot on a given lane. */
-#define          MC_CMD_PCIE_TUNE_IN_START_EYE_PLOT  0x5
+#define          MC_CMD_PCIE_TUNE_IN_START_EYE_PLOT 0x5
 /* enum: Poll PCIe Serdes Eye diagram plot. Returns one row of BER data. The
  * caller should call this command repeatedly after starting eye plot, until no
  * more data is returned.
  */
-#define          MC_CMD_PCIE_TUNE_IN_POLL_EYE_PLOT  0x6
+#define          MC_CMD_PCIE_TUNE_IN_POLL_EYE_PLOT 0x6
 /* enum: Enable the SERDES BIST and set it to generate a 200MHz square wave */
-#define          MC_CMD_PCIE_TUNE_IN_BIST_SQUARE_WAVE  0x7
+#define          MC_CMD_PCIE_TUNE_IN_BIST_SQUARE_WAVE 0x7
 /* Align the arguments to 32 bits */
 #define       MC_CMD_PCIE_TUNE_IN_PCIE_TUNE_RSVD_OFST 1
 #define       MC_CMD_PCIE_TUNE_IN_PCIE_TUNE_RSVD_LEN 3
@@ -12455,46 +13185,46 @@
 #define        MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_PARAM_ID_LBN 0
 #define        MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_PARAM_ID_WIDTH 8
 /* enum: Attenuation (0-15) */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_ATT  0x0
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_ATT 0x0
 /* enum: CTLE Boost (0-15) */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_BOOST  0x1
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_BOOST 0x1
 /* enum: DFE Tap1 (0 - max negative, 64 - zero, 127 - max positive) */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_TAP1  0x2
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_TAP1 0x2
 /* enum: DFE Tap2 (0 - max negative, 32 - zero, 63 - max positive) */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_TAP2  0x3
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_TAP2 0x3
 /* enum: DFE Tap3 (0 - max negative, 32 - zero, 63 - max positive) */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_TAP3  0x4
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_TAP3 0x4
 /* enum: DFE Tap4 (0 - max negative, 32 - zero, 63 - max positive) */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_TAP4  0x5
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_TAP4 0x5
 /* enum: DFE Tap5 (0 - max negative, 32 - zero, 63 - max positive) */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_TAP5  0x6
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_TAP5 0x6
 /* enum: DFE DLev */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_DLEV  0x7
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_DLEV 0x7
 /* enum: Figure of Merit */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_FOM  0x8
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_FOM 0x8
 /* enum: CTLE EQ Capacitor (HF Gain) */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_CTLE_EQC  0x9
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_CTLE_EQC 0x9
 /* enum: CTLE EQ Resistor (DC Gain) */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_CTLE_EQRES  0xa
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_CTLE_EQRES 0xa
 #define        MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_PARAM_LANE_LBN 8
 #define        MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_PARAM_LANE_WIDTH 5
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_0  0x0 /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_1  0x1 /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_2  0x2 /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_3  0x3 /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_4  0x4 /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_5  0x5 /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_6  0x6 /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_7  0x7 /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_8  0x8 /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_9  0x9 /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_10  0xa /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_11  0xb /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_12  0xc /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_13  0xd /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_14  0xe /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_15  0xf /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_ALL  0x10 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_0 0x0 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_1 0x1 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_2 0x2 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_3 0x3 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_4 0x4 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_5 0x5 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_6 0x6 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_7 0x7 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_8 0x8 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_9 0x9 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_10 0xa /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_11 0xb /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_12 0xc /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_13 0xd /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_14 0xe /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_15 0xf /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_ALL 0x10 /* enum */
 #define        MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_PARAM_AUTOCAL_LBN 13
 #define        MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_PARAM_AUTOCAL_WIDTH 1
 #define        MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_RESERVED_LBN 14
@@ -12558,15 +13288,15 @@
 #define        MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_PARAM_ID_LBN 0
 #define        MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_PARAM_ID_WIDTH 8
 /* enum: TxMargin (PIPE) */
-#define          MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_TXMARGIN  0x0
+#define          MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_TXMARGIN 0x0
 /* enum: TxSwing (PIPE) */
-#define          MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_TXSWING  0x1
+#define          MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_TXSWING 0x1
 /* enum: De-emphasis coefficient C(-1) (PIPE) */
-#define          MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_CM1  0x2
+#define          MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_CM1 0x2
 /* enum: De-emphasis coefficient C(0) (PIPE) */
-#define          MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_C0  0x3
+#define          MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_C0 0x3
 /* enum: De-emphasis coefficient C(+1) (PIPE) */
-#define          MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_CP1  0x4
+#define          MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_CP1 0x4
 #define        MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_PARAM_LANE_LBN 8
 #define        MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_PARAM_LANE_WIDTH 4
 /*             Enum values, see field(s): */
@@ -12632,9 +13362,9 @@
 /* enum: re-read and apply licenses after a license key partition update; note
  * that this operation returns a zero-length response
  */
-#define          MC_CMD_LICENSING_IN_OP_UPDATE_LICENSE  0x0
+#define          MC_CMD_LICENSING_IN_OP_UPDATE_LICENSE 0x0
 /* enum: report counts of installed licenses */
-#define          MC_CMD_LICENSING_IN_OP_GET_KEY_STATS  0x1
+#define          MC_CMD_LICENSING_IN_OP_GET_KEY_STATS 0x1
 
 /* MC_CMD_LICENSING_OUT msgresponse */
 #define    MC_CMD_LICENSING_OUT_LEN 28
@@ -12665,9 +13395,9 @@
 #define       MC_CMD_LICENSING_OUT_LICENSING_SELF_TEST_OFST 24
 #define       MC_CMD_LICENSING_OUT_LICENSING_SELF_TEST_LEN 4
 /* enum: licensing subsystem self-test failed */
-#define          MC_CMD_LICENSING_OUT_SELF_TEST_FAIL  0x0
+#define          MC_CMD_LICENSING_OUT_SELF_TEST_FAIL 0x0
 /* enum: licensing subsystem self-test passed */
-#define          MC_CMD_LICENSING_OUT_SELF_TEST_PASS  0x1
+#define          MC_CMD_LICENSING_OUT_SELF_TEST_PASS 0x1
 
 
 /***********************************/
@@ -12687,11 +13417,11 @@
 /* enum: re-read and apply licenses after a license key partition update; note
  * that this operation returns a zero-length response
  */
-#define          MC_CMD_LICENSING_V3_IN_OP_UPDATE_LICENSE  0x0
+#define          MC_CMD_LICENSING_V3_IN_OP_UPDATE_LICENSE 0x0
 /* enum: report counts of installed licenses Returns EAGAIN if license
  * processing (updating) has been started but not yet completed.
  */
-#define          MC_CMD_LICENSING_V3_IN_OP_REPORT_LICENSE  0x1
+#define          MC_CMD_LICENSING_V3_IN_OP_REPORT_LICENSE 0x1
 
 /* MC_CMD_LICENSING_V3_OUT msgresponse */
 #define    MC_CMD_LICENSING_V3_OUT_LEN 88
@@ -12718,9 +13448,9 @@
 #define       MC_CMD_LICENSING_V3_OUT_LICENSING_SELF_TEST_OFST 20
 #define       MC_CMD_LICENSING_V3_OUT_LICENSING_SELF_TEST_LEN 4
 /* enum: licensing subsystem self-test failed */
-#define          MC_CMD_LICENSING_V3_OUT_SELF_TEST_FAIL  0x0
+#define          MC_CMD_LICENSING_V3_OUT_SELF_TEST_FAIL 0x0
 /* enum: licensing subsystem self-test passed */
-#define          MC_CMD_LICENSING_V3_OUT_SELF_TEST_PASS  0x1
+#define          MC_CMD_LICENSING_V3_OUT_SELF_TEST_PASS 0x1
 /* bitmask of licensed applications */
 #define       MC_CMD_LICENSING_V3_OUT_LICENSED_APPS_OFST 24
 #define       MC_CMD_LICENSING_V3_OUT_LICENSED_APPS_LEN 8
@@ -12806,9 +13536,9 @@
 #define       MC_CMD_GET_LICENSED_APP_STATE_OUT_STATE_OFST 0
 #define       MC_CMD_GET_LICENSED_APP_STATE_OUT_STATE_LEN 4
 /* enum: no (or invalid) license is present for the application */
-#define          MC_CMD_GET_LICENSED_APP_STATE_OUT_NOT_LICENSED  0x0
+#define          MC_CMD_GET_LICENSED_APP_STATE_OUT_NOT_LICENSED 0x0
 /* enum: a valid license is present for the application */
-#define          MC_CMD_GET_LICENSED_APP_STATE_OUT_LICENSED  0x1
+#define          MC_CMD_GET_LICENSED_APP_STATE_OUT_LICENSED 0x1
 
 
 /***********************************/
@@ -12837,9 +13567,9 @@
 #define       MC_CMD_GET_LICENSED_V3_APP_STATE_OUT_STATE_OFST 0
 #define       MC_CMD_GET_LICENSED_V3_APP_STATE_OUT_STATE_LEN 4
 /* enum: no (or invalid) license is present for the application */
-#define          MC_CMD_GET_LICENSED_V3_APP_STATE_OUT_NOT_LICENSED  0x0
+#define          MC_CMD_GET_LICENSED_V3_APP_STATE_OUT_NOT_LICENSED 0x0
 /* enum: a valid license is present for the application */
-#define          MC_CMD_GET_LICENSED_V3_APP_STATE_OUT_LICENSED  0x1
+#define          MC_CMD_GET_LICENSED_V3_APP_STATE_OUT_LICENSED 0x1
 
 
 /***********************************/
@@ -12891,9 +13621,9 @@
 #define       MC_CMD_LICENSED_APP_OP_IN_OP_OFST 4
 #define       MC_CMD_LICENSED_APP_OP_IN_OP_LEN 4
 /* enum: validate application */
-#define          MC_CMD_LICENSED_APP_OP_IN_OP_VALIDATE  0x0
+#define          MC_CMD_LICENSED_APP_OP_IN_OP_VALIDATE 0x0
 /* enum: mask application */
-#define          MC_CMD_LICENSED_APP_OP_IN_OP_MASK  0x1
+#define          MC_CMD_LICENSED_APP_OP_IN_OP_MASK 0x1
 /* arguments specific to this particular operation */
 #define       MC_CMD_LICENSED_APP_OP_IN_ARGS_OFST 8
 #define       MC_CMD_LICENSED_APP_OP_IN_ARGS_LEN 4
@@ -12984,9 +13714,9 @@
 #define       MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_UNITS_OFST 100
 #define       MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_UNITS_LEN 4
 /* enum: expiry units are accounting units */
-#define          MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_UNIT_ACC  0x0
+#define          MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_UNIT_ACC 0x0
 /* enum: expiry units are calendar days */
-#define          MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_UNIT_DAYS  0x1
+#define          MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_UNIT_DAYS 0x1
 /* base MAC address of the NIC stored in NVRAM (note that this is a constant
  * value for a given NIC regardless which function is calling, effectively this
  * is PF0 base MAC address)
@@ -13019,9 +13749,9 @@
 #define       MC_CMD_LICENSED_V3_MASK_FEATURES_IN_FLAG_OFST 8
 #define       MC_CMD_LICENSED_V3_MASK_FEATURES_IN_FLAG_LEN 4
 /* enum: turn the features off */
-#define          MC_CMD_LICENSED_V3_MASK_FEATURES_IN_OFF  0x0
+#define          MC_CMD_LICENSED_V3_MASK_FEATURES_IN_OFF 0x0
 /* enum: turn the features back on */
-#define          MC_CMD_LICENSED_V3_MASK_FEATURES_IN_ON  0x1
+#define          MC_CMD_LICENSED_V3_MASK_FEATURES_IN_ON 0x1
 
 /* MC_CMD_LICENSED_V3_MASK_FEATURES_OUT msgresponse */
 #define    MC_CMD_LICENSED_V3_MASK_FEATURES_OUT_LEN 0
@@ -13048,15 +13778,15 @@
  * This is an asynchronous operation owing to the time taken to validate an
  * ECDSA license
  */
-#define          MC_CMD_LICENSING_V3_TEMPORARY_SET  0x0
+#define          MC_CMD_LICENSING_V3_TEMPORARY_SET 0x0
 /* enum: clear the license immediately rather than waiting for the next power
  * cycle
  */
-#define          MC_CMD_LICENSING_V3_TEMPORARY_CLEAR  0x1
+#define          MC_CMD_LICENSING_V3_TEMPORARY_CLEAR 0x1
 /* enum: get the status of the asynchronous MC_CMD_LICENSING_V3_TEMPORARY_SET
  * operation
  */
-#define          MC_CMD_LICENSING_V3_TEMPORARY_STATUS  0x2
+#define          MC_CMD_LICENSING_V3_TEMPORARY_STATUS 0x2
 
 /* MC_CMD_LICENSING_V3_TEMPORARY_IN_SET msgrequest */
 #define    MC_CMD_LICENSING_V3_TEMPORARY_IN_SET_LEN 164
@@ -13082,13 +13812,13 @@
 #define       MC_CMD_LICENSING_V3_TEMPORARY_OUT_STATUS_STATUS_OFST 0
 #define       MC_CMD_LICENSING_V3_TEMPORARY_OUT_STATUS_STATUS_LEN 4
 /* enum: finished validating and installing license */
-#define          MC_CMD_LICENSING_V3_TEMPORARY_STATUS_OK  0x0
+#define          MC_CMD_LICENSING_V3_TEMPORARY_STATUS_OK 0x0
 /* enum: license validation and installation in progress */
-#define          MC_CMD_LICENSING_V3_TEMPORARY_STATUS_IN_PROGRESS  0x1
+#define          MC_CMD_LICENSING_V3_TEMPORARY_STATUS_IN_PROGRESS 0x1
 /* enum: licensing error. More specific error messages are not provided to
  * avoid exposing details of the licensing system to the client
  */
-#define          MC_CMD_LICENSING_V3_TEMPORARY_STATUS_ERROR  0x2
+#define          MC_CMD_LICENSING_V3_TEMPORARY_STATUS_ERROR 0x2
 /* bitmask of licensed features */
 #define       MC_CMD_LICENSING_V3_TEMPORARY_OUT_STATUS_LICENSED_FEATURES_OFST 4
 #define       MC_CMD_LICENSING_V3_TEMPORARY_OUT_STATUS_LICENSED_FEATURES_LEN 8
@@ -13124,9 +13854,9 @@
 #define       MC_CMD_SET_PORT_SNIFF_CONFIG_IN_RX_MODE_OFST 8
 #define       MC_CMD_SET_PORT_SNIFF_CONFIG_IN_RX_MODE_LEN 4
 /* enum: receive to just the specified queue */
-#define          MC_CMD_SET_PORT_SNIFF_CONFIG_IN_RX_MODE_SIMPLE  0x0
+#define          MC_CMD_SET_PORT_SNIFF_CONFIG_IN_RX_MODE_SIMPLE 0x0
 /* enum: receive to multiple queues using RSS context */
-#define          MC_CMD_SET_PORT_SNIFF_CONFIG_IN_RX_MODE_RSS  0x1
+#define          MC_CMD_SET_PORT_SNIFF_CONFIG_IN_RX_MODE_RSS 0x1
 /* RSS context (for RX_MODE_RSS) as returned by MC_CMD_RSS_CONTEXT_ALLOC. Note
  * that these handles should be considered opaque to the host, although a value
  * of 0xFFFFFFFF is guaranteed never to be a valid handle.
@@ -13146,7 +13876,7 @@
  */
 #define MC_CMD_GET_PORT_SNIFF_CONFIG 0xf8
 
-#define MC_CMD_0xf8_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+#define MC_CMD_0xf8_PRIVILEGE_CTG SRIOV_CTG_GENERAL
 
 /* MC_CMD_GET_PORT_SNIFF_CONFIG_IN msgrequest */
 #define    MC_CMD_GET_PORT_SNIFF_CONFIG_IN_LEN 0
@@ -13167,9 +13897,9 @@
 #define       MC_CMD_GET_PORT_SNIFF_CONFIG_OUT_RX_MODE_OFST 8
 #define       MC_CMD_GET_PORT_SNIFF_CONFIG_OUT_RX_MODE_LEN 4
 /* enum: receiving to just the specified queue */
-#define          MC_CMD_GET_PORT_SNIFF_CONFIG_OUT_RX_MODE_SIMPLE  0x0
+#define          MC_CMD_GET_PORT_SNIFF_CONFIG_OUT_RX_MODE_SIMPLE 0x0
 /* enum: receiving to multiple queues using RSS context */
-#define          MC_CMD_GET_PORT_SNIFF_CONFIG_OUT_RX_MODE_RSS  0x1
+#define          MC_CMD_GET_PORT_SNIFF_CONFIG_OUT_RX_MODE_RSS 0x1
 /* RSS context (for RX_MODE_RSS) */
 #define       MC_CMD_GET_PORT_SNIFF_CONFIG_OUT_RX_CONTEXT_OFST 12
 #define       MC_CMD_GET_PORT_SNIFF_CONFIG_OUT_RX_CONTEXT_LEN 4
@@ -13193,12 +13923,12 @@
 /* enum: Per-TXQ enable for multicast UDP destination lookup for possible
  * internal loopback. (ENTITY is a queue handle, VALUE is a single boolean.)
  */
-#define          MC_CMD_SET_PARSER_DISP_CONFIG_IN_TXQ_MCAST_UDP_DST_LOOKUP_EN  0x0
+#define          MC_CMD_SET_PARSER_DISP_CONFIG_IN_TXQ_MCAST_UDP_DST_LOOKUP_EN 0x0
 /* enum: Per-v-adaptor enable for suppression of self-transmissions on the
  * internal loopback path. (ENTITY is an EVB_PORT_ID, VALUE is a single
  * boolean.)
  */
-#define          MC_CMD_SET_PARSER_DISP_CONFIG_IN_VADAPTOR_SUPPRESS_SELF_TX  0x1
+#define          MC_CMD_SET_PARSER_DISP_CONFIG_IN_VADAPTOR_SUPPRESS_SELF_TX 0x1
 /* handle for the entity to update: queue handle, EVB port ID, etc. depending
  * on the type of configuration setting being changed
  */
@@ -13278,9 +14008,9 @@
 #define       MC_CMD_SET_TX_PORT_SNIFF_CONFIG_IN_RX_MODE_OFST 8
 #define       MC_CMD_SET_TX_PORT_SNIFF_CONFIG_IN_RX_MODE_LEN 4
 /* enum: receive to just the specified queue */
-#define          MC_CMD_SET_TX_PORT_SNIFF_CONFIG_IN_RX_MODE_SIMPLE  0x0
+#define          MC_CMD_SET_TX_PORT_SNIFF_CONFIG_IN_RX_MODE_SIMPLE 0x0
 /* enum: receive to multiple queues using RSS context */
-#define          MC_CMD_SET_TX_PORT_SNIFF_CONFIG_IN_RX_MODE_RSS  0x1
+#define          MC_CMD_SET_TX_PORT_SNIFF_CONFIG_IN_RX_MODE_RSS 0x1
 /* RSS context (for RX_MODE_RSS) as returned by MC_CMD_RSS_CONTEXT_ALLOC. Note
  * that these handles should be considered opaque to the host, although a value
  * of 0xFFFFFFFF is guaranteed never to be a valid handle.
@@ -13300,7 +14030,7 @@
  */
 #define MC_CMD_GET_TX_PORT_SNIFF_CONFIG 0xfc
 
-#define MC_CMD_0xfc_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+#define MC_CMD_0xfc_PRIVILEGE_CTG SRIOV_CTG_GENERAL
 
 /* MC_CMD_GET_TX_PORT_SNIFF_CONFIG_IN msgrequest */
 #define    MC_CMD_GET_TX_PORT_SNIFF_CONFIG_IN_LEN 0
@@ -13319,9 +14049,9 @@
 #define       MC_CMD_GET_TX_PORT_SNIFF_CONFIG_OUT_RX_MODE_OFST 8
 #define       MC_CMD_GET_TX_PORT_SNIFF_CONFIG_OUT_RX_MODE_LEN 4
 /* enum: receiving to just the specified queue */
-#define          MC_CMD_GET_TX_PORT_SNIFF_CONFIG_OUT_RX_MODE_SIMPLE  0x0
+#define          MC_CMD_GET_TX_PORT_SNIFF_CONFIG_OUT_RX_MODE_SIMPLE 0x0
 /* enum: receiving to multiple queues using RSS context */
-#define          MC_CMD_GET_TX_PORT_SNIFF_CONFIG_OUT_RX_MODE_RSS  0x1
+#define          MC_CMD_GET_TX_PORT_SNIFF_CONFIG_OUT_RX_MODE_RSS 0x1
 /* RSS context (for RX_MODE_RSS) */
 #define       MC_CMD_GET_TX_PORT_SNIFF_CONFIG_OUT_RX_CONTEXT_OFST 12
 #define       MC_CMD_GET_TX_PORT_SNIFF_CONFIG_OUT_RX_CONTEXT_LEN 4
@@ -13431,9 +14161,9 @@
 #define    MC_CMD_READ_ATB_IN_LEN 16
 #define       MC_CMD_READ_ATB_IN_SIGNAL_BUS_OFST 0
 #define       MC_CMD_READ_ATB_IN_SIGNAL_BUS_LEN 4
-#define          MC_CMD_READ_ATB_IN_BUS_CCOM  0x0 /* enum */
-#define          MC_CMD_READ_ATB_IN_BUS_CKR  0x1 /* enum */
-#define          MC_CMD_READ_ATB_IN_BUS_CPCIE  0x8 /* enum */
+#define          MC_CMD_READ_ATB_IN_BUS_CCOM 0x0 /* enum */
+#define          MC_CMD_READ_ATB_IN_BUS_CKR 0x1 /* enum */
+#define          MC_CMD_READ_ATB_IN_BUS_CPCIE 0x8 /* enum */
 #define       MC_CMD_READ_ATB_IN_SIGNAL_EN_BITNO_OFST 4
 #define       MC_CMD_READ_ATB_IN_SIGNAL_EN_BITNO_LEN 4
 #define       MC_CMD_READ_ATB_IN_SIGNAL_SEL_OFST 8
@@ -13503,46 +14233,46 @@
 #define        MC_CMD_PRIVILEGE_MASK_IN_FUNCTION_PF_WIDTH 16
 #define        MC_CMD_PRIVILEGE_MASK_IN_FUNCTION_VF_LBN 16
 #define        MC_CMD_PRIVILEGE_MASK_IN_FUNCTION_VF_WIDTH 16
-#define          MC_CMD_PRIVILEGE_MASK_IN_VF_NULL  0xffff /* enum */
+#define          MC_CMD_PRIVILEGE_MASK_IN_VF_NULL 0xffff /* enum */
 /* New privilege mask to be set. The mask will only be changed if the MSB is
  * set to 1.
  */
 #define       MC_CMD_PRIVILEGE_MASK_IN_NEW_MASK_OFST 4
 #define       MC_CMD_PRIVILEGE_MASK_IN_NEW_MASK_LEN 4
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_ADMIN             0x1 /* enum */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_LINK              0x2 /* enum */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_ONLOAD            0x4 /* enum */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_PTP               0x8 /* enum */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_INSECURE_FILTERS  0x10 /* enum */
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_ADMIN 0x1 /* enum */
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_LINK 0x2 /* enum */
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_ONLOAD 0x4 /* enum */
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_PTP 0x8 /* enum */
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_INSECURE_FILTERS 0x10 /* enum */
 /* enum: Deprecated. Equivalent to MAC_SPOOFING_TX combined with CHANGE_MAC. */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_MAC_SPOOFING      0x20
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_UNICAST           0x40 /* enum */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_MULTICAST         0x80 /* enum */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_BROADCAST         0x100 /* enum */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_ALL_MULTICAST     0x200 /* enum */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_PROMISCUOUS       0x400 /* enum */
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_MAC_SPOOFING 0x20
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_UNICAST 0x40 /* enum */
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_MULTICAST 0x80 /* enum */
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_BROADCAST 0x100 /* enum */
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_ALL_MULTICAST 0x200 /* enum */
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_PROMISCUOUS 0x400 /* enum */
 /* enum: Allows to set the TX packets' source MAC address to any arbitrary MAC
  * adress.
  */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_MAC_SPOOFING_TX   0x800
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_MAC_SPOOFING_TX 0x800
 /* enum: Privilege that allows a Function to change the MAC address configured
  * in its associated vAdapter/vPort.
  */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_CHANGE_MAC        0x1000
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_CHANGE_MAC 0x1000
 /* enum: Privilege that allows a Function to install filters that specify VLANs
  * that are not in the permit list for the associated vPort. This privilege is
  * primarily to support ESX where vPorts are created that restrict traffic to
  * only a set of permitted VLANs. See the vPort flag FLAG_VLAN_RESTRICT.
  */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_UNRESTRICTED_VLAN  0x2000
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_UNRESTRICTED_VLAN 0x2000
 /* enum: Privilege for insecure commands. Commands that belong to this group
  * are not permitted on secure adapters regardless of the privilege mask.
  */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_INSECURE          0x4000
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_INSECURE 0x4000
 /* enum: Set this bit to indicate that a new privilege mask is to be set,
  * otherwise the command will only read the existing mask.
  */
-#define          MC_CMD_PRIVILEGE_MASK_IN_DO_CHANGE             0x80000000
+#define          MC_CMD_PRIVILEGE_MASK_IN_DO_CHANGE 0x80000000
 
 /* MC_CMD_PRIVILEGE_MASK_OUT msgresponse */
 #define    MC_CMD_PRIVILEGE_MASK_OUT_LEN 4
@@ -13573,12 +14303,12 @@
 /* New link state mode to be set */
 #define       MC_CMD_LINK_STATE_MODE_IN_NEW_MODE_OFST 4
 #define       MC_CMD_LINK_STATE_MODE_IN_NEW_MODE_LEN 4
-#define          MC_CMD_LINK_STATE_MODE_IN_LINK_STATE_AUTO       0x0 /* enum */
-#define          MC_CMD_LINK_STATE_MODE_IN_LINK_STATE_UP         0x1 /* enum */
-#define          MC_CMD_LINK_STATE_MODE_IN_LINK_STATE_DOWN       0x2 /* enum */
+#define          MC_CMD_LINK_STATE_MODE_IN_LINK_STATE_AUTO 0x0 /* enum */
+#define          MC_CMD_LINK_STATE_MODE_IN_LINK_STATE_UP 0x1 /* enum */
+#define          MC_CMD_LINK_STATE_MODE_IN_LINK_STATE_DOWN 0x2 /* enum */
 /* enum: Use this value to just read the existing setting without modifying it.
  */
-#define          MC_CMD_LINK_STATE_MODE_IN_DO_NOT_CHANGE         0xffffffff
+#define          MC_CMD_LINK_STATE_MODE_IN_DO_NOT_CHANGE 0xffffffff
 
 /* MC_CMD_LINK_STATE_MODE_OUT msgresponse */
 #define    MC_CMD_LINK_STATE_MODE_OUT_LEN 4
@@ -13674,12 +14404,12 @@
 /* The groups of functions to have their privilege masks modified. */
 #define       MC_CMD_PRIVILEGE_MODIFY_IN_FN_GROUP_OFST 0
 #define       MC_CMD_PRIVILEGE_MODIFY_IN_FN_GROUP_LEN 4
-#define          MC_CMD_PRIVILEGE_MODIFY_IN_NONE       0x0 /* enum */
-#define          MC_CMD_PRIVILEGE_MODIFY_IN_ALL        0x1 /* enum */
-#define          MC_CMD_PRIVILEGE_MODIFY_IN_PFS_ONLY   0x2 /* enum */
-#define          MC_CMD_PRIVILEGE_MODIFY_IN_VFS_ONLY   0x3 /* enum */
-#define          MC_CMD_PRIVILEGE_MODIFY_IN_VFS_OF_PF  0x4 /* enum */
-#define          MC_CMD_PRIVILEGE_MODIFY_IN_ONE        0x5 /* enum */
+#define          MC_CMD_PRIVILEGE_MODIFY_IN_NONE 0x0 /* enum */
+#define          MC_CMD_PRIVILEGE_MODIFY_IN_ALL 0x1 /* enum */
+#define          MC_CMD_PRIVILEGE_MODIFY_IN_PFS_ONLY 0x2 /* enum */
+#define          MC_CMD_PRIVILEGE_MODIFY_IN_VFS_ONLY 0x3 /* enum */
+#define          MC_CMD_PRIVILEGE_MODIFY_IN_VFS_OF_PF 0x4 /* enum */
+#define          MC_CMD_PRIVILEGE_MODIFY_IN_ONE 0x5 /* enum */
 /* For VFS_OF_PF specify the PF, for ONE specify the target function */
 #define       MC_CMD_PRIVILEGE_MODIFY_IN_FUNCTION_OFST 4
 #define       MC_CMD_PRIVILEGE_MODIFY_IN_FUNCTION_LEN 4
@@ -13782,11 +14512,11 @@
 /* Sector type */
 #define       MC_CMD_XPM_READ_SECTOR_OUT_TYPE_OFST 0
 #define       MC_CMD_XPM_READ_SECTOR_OUT_TYPE_LEN 4
-#define          MC_CMD_XPM_READ_SECTOR_OUT_BLANK            0x0 /* enum */
-#define          MC_CMD_XPM_READ_SECTOR_OUT_CRYPTO_KEY_128   0x1 /* enum */
-#define          MC_CMD_XPM_READ_SECTOR_OUT_CRYPTO_KEY_256   0x2 /* enum */
-#define          MC_CMD_XPM_READ_SECTOR_OUT_CRYPTO_DATA      0x3 /* enum */
-#define          MC_CMD_XPM_READ_SECTOR_OUT_INVALID          0xff /* enum */
+#define          MC_CMD_XPM_READ_SECTOR_OUT_BLANK 0x0 /* enum */
+#define          MC_CMD_XPM_READ_SECTOR_OUT_CRYPTO_KEY_128 0x1 /* enum */
+#define          MC_CMD_XPM_READ_SECTOR_OUT_CRYPTO_KEY_256 0x2 /* enum */
+#define          MC_CMD_XPM_READ_SECTOR_OUT_CRYPTO_DATA 0x3 /* enum */
+#define          MC_CMD_XPM_READ_SECTOR_OUT_INVALID 0xff /* enum */
 /* Sector data */
 #define       MC_CMD_XPM_READ_SECTOR_OUT_DATA_OFST 4
 #define       MC_CMD_XPM_READ_SECTOR_OUT_DATA_LEN 1
@@ -14001,18 +14731,18 @@
 #define       TUNNEL_ENCAP_UDP_PORT_ENTRY_UDP_PORT_OFST 0
 #define       TUNNEL_ENCAP_UDP_PORT_ENTRY_UDP_PORT_LEN 2
 /* enum: the IANA allocated UDP port for VXLAN */
-#define          TUNNEL_ENCAP_UDP_PORT_ENTRY_IANA_VXLAN_UDP_PORT  0x12b5
+#define          TUNNEL_ENCAP_UDP_PORT_ENTRY_IANA_VXLAN_UDP_PORT 0x12b5
 /* enum: the IANA allocated UDP port for Geneve */
-#define          TUNNEL_ENCAP_UDP_PORT_ENTRY_IANA_GENEVE_UDP_PORT  0x17c1
+#define          TUNNEL_ENCAP_UDP_PORT_ENTRY_IANA_GENEVE_UDP_PORT 0x17c1
 #define       TUNNEL_ENCAP_UDP_PORT_ENTRY_UDP_PORT_LBN 0
 #define       TUNNEL_ENCAP_UDP_PORT_ENTRY_UDP_PORT_WIDTH 16
 /* tunnel encapsulation protocol (only those named below are supported) */
 #define       TUNNEL_ENCAP_UDP_PORT_ENTRY_PROTOCOL_OFST 2
 #define       TUNNEL_ENCAP_UDP_PORT_ENTRY_PROTOCOL_LEN 2
 /* enum: This port will be used for VXLAN on both IPv4 and IPv6 */
-#define          TUNNEL_ENCAP_UDP_PORT_ENTRY_VXLAN  0x0
+#define          TUNNEL_ENCAP_UDP_PORT_ENTRY_VXLAN 0x0
 /* enum: This port will be used for Geneve on both IPv4 and IPv6 */
-#define          TUNNEL_ENCAP_UDP_PORT_ENTRY_GENEVE  0x1
+#define          TUNNEL_ENCAP_UDP_PORT_ENTRY_GENEVE 0x1
 #define       TUNNEL_ENCAP_UDP_PORT_ENTRY_PROTOCOL_LBN 16
 #define       TUNNEL_ENCAP_UDP_PORT_ENTRY_PROTOCOL_WIDTH 16
 
@@ -14180,10 +14910,10 @@
 /* Timer mode. Meanings as per EVQ_TMR_REG.TC_TIMER_VAL */
 #define       MC_CMD_SET_EVQ_TMR_IN_TMR_MODE_OFST 12
 #define       MC_CMD_SET_EVQ_TMR_IN_TMR_MODE_LEN 4
-#define          MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_DIS  0x0 /* enum */
-#define          MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_IMMED_START  0x1 /* enum */
-#define          MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_TRIG_START  0x2 /* enum */
-#define          MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_INT_HLDOFF  0x3 /* enum */
+#define          MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_DIS 0x0 /* enum */
+#define          MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_IMMED_START 0x1 /* enum */
+#define          MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_TRIG_START 0x2 /* enum */
+#define          MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_INT_HLDOFF 0x3 /* enum */
 
 /* MC_CMD_SET_EVQ_TMR_OUT msgresponse */
 #define    MC_CMD_SET_EVQ_TMR_OUT_LEN 8
@@ -14269,7 +14999,7 @@
  */
 #define MC_CMD_ALLOCATE_TX_VFIFO_CP 0x11d
 
-#define MC_CMD_0x11d_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+#define MC_CMD_0x11d_PRIVILEGE_CTG SRIOV_CTG_GENERAL
 
 /* MC_CMD_ALLOCATE_TX_VFIFO_CP_IN msgrequest */
 #define    MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_LEN 20
@@ -14281,9 +15011,9 @@
 /* Will the common pool be used as TX_vFIFO_ULL (1) */
 #define       MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_MODE_OFST 4
 #define       MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_MODE_LEN 4
-#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_ENABLED       0x1 /* enum */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_ENABLED 0x1 /* enum */
 /* enum: Using this interface without TX_vFIFO_ULL is not supported for now */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_DISABLED      0x0
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_DISABLED 0x0
 /* Number of buffers to reserve for the common pool */
 #define       MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_SIZE_OFST 8
 #define       MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_SIZE_LEN 4
@@ -14291,20 +15021,20 @@
 #define       MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_INGRESS_OFST 12
 #define       MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_INGRESS_LEN 4
 /* enum: Extracts information from function */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_USE_FUNCTION_VALUE          -0x1
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_USE_FUNCTION_VALUE -0x1
 /* Network port or RX Engine to which the common pool connects. */
 #define       MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_EGRESS_OFST 16
 #define       MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_EGRESS_LEN 4
 /* enum: Extracts information from function */
-/*               MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_USE_FUNCTION_VALUE          -0x1 */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_PORT0          0x0 /* enum */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_PORT1          0x1 /* enum */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_PORT2          0x2 /* enum */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_PORT3          0x3 /* enum */
+/*               MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_USE_FUNCTION_VALUE -0x1 */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_PORT0 0x0 /* enum */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_PORT1 0x1 /* enum */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_PORT2 0x2 /* enum */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_PORT3 0x3 /* enum */
 /* enum: To enable Switch loopback with Rx engine 0 */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_RX_ENGINE0     0x4
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_RX_ENGINE0 0x4
 /* enum: To enable Switch loopback with Rx engine 1 */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_RX_ENGINE1     0x5
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_RX_ENGINE1 0x5
 
 /* MC_CMD_ALLOCATE_TX_VFIFO_CP_OUT msgresponse */
 #define    MC_CMD_ALLOCATE_TX_VFIFO_CP_OUT_LEN 4
@@ -14320,7 +15050,7 @@
  */
 #define MC_CMD_ALLOCATE_TX_VFIFO_VFIFO 0x11e
 
-#define MC_CMD_0x11e_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+#define MC_CMD_0x11e_PRIVILEGE_CTG SRIOV_CTG_GENERAL
 
 /* MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN msgrequest */
 #define    MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_LEN 20
@@ -14332,20 +15062,20 @@
 #define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_EGRESS_OFST 4
 #define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_EGRESS_LEN 4
 /* enum: Extracts information from common pool */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_USE_CP_VALUE   -0x1
-#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PORT0          0x0 /* enum */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PORT1          0x1 /* enum */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PORT2          0x2 /* enum */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PORT3          0x3 /* enum */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_USE_CP_VALUE -0x1
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PORT0 0x0 /* enum */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PORT1 0x1 /* enum */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PORT2 0x2 /* enum */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PORT3 0x3 /* enum */
 /* enum: To enable Switch loopback with Rx engine 0 */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_RX_ENGINE0     0x4
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_RX_ENGINE0 0x4
 /* enum: To enable Switch loopback with Rx engine 1 */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_RX_ENGINE1     0x5
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_RX_ENGINE1 0x5
 /* Minimum number of buffers that the pool must have */
 #define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_SIZE_OFST 8
 #define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_SIZE_LEN 4
 /* enum: Do not check the space available */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_NO_MINIMUM     0x0
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_NO_MINIMUM 0x0
 /* Will the vFIFO be used as TX_vFIFO_ULL */
 #define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_MODE_OFST 12
 #define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_MODE_LEN 4
@@ -14353,7 +15083,7 @@
 #define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PRIORITY_OFST 16
 #define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PRIORITY_LEN 4
 /* enum: Search for the lowest unused priority */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_LOWEST_AVAILABLE  -0x1
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_LOWEST_AVAILABLE -0x1
 
 /* MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_OUT msgresponse */
 #define    MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_OUT_LEN 8
@@ -14372,7 +15102,7 @@
  */
 #define MC_CMD_TEARDOWN_TX_VFIFO_VF 0x11f
 
-#define MC_CMD_0x11f_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+#define MC_CMD_0x11f_PRIVILEGE_CTG SRIOV_CTG_GENERAL
 
 /* MC_CMD_TEARDOWN_TX_VFIFO_VF_IN msgrequest */
 #define    MC_CMD_TEARDOWN_TX_VFIFO_VF_IN_LEN 4
@@ -14391,7 +15121,7 @@
  */
 #define MC_CMD_DEALLOCATE_TX_VFIFO_CP 0x121
 
-#define MC_CMD_0x121_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+#define MC_CMD_0x121_PRIVILEGE_CTG SRIOV_CTG_GENERAL
 
 /* MC_CMD_DEALLOCATE_TX_VFIFO_CP_IN msgrequest */
 #define    MC_CMD_DEALLOCATE_TX_VFIFO_CP_IN_LEN 4
@@ -14410,7 +15140,7 @@
  */
 #define MC_CMD_SWITCH_GET_UNASSIGNED_BUFFERS 0x124
 
-#define MC_CMD_0x124_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+#define MC_CMD_0x124_PRIVILEGE_CTG SRIOV_CTG_GENERAL
 
 /* MC_CMD_SWITCH_GET_UNASSIGNED_BUFFERS_IN msgrequest */
 #define    MC_CMD_SWITCH_GET_UNASSIGNED_BUFFERS_IN_LEN 0
diff --git a/drivers/net/ethernet/sfc/mcdi_port.c b/drivers/net/ethernet/sfc/mcdi_port.c
index ce8aabf9091e..9382bb0b4d5a 100644
--- a/drivers/net/ethernet/sfc/mcdi_port.c
+++ b/drivers/net/ethernet/sfc/mcdi_port.c
@@ -352,6 +352,64 @@ static void efx_mcdi_phy_decode_link(struct efx_nic *efx,
 	link_state->speed = speed;
 }
 
+/* The semantics of the ethtool FEC mode bitmask are not well defined,
+ * particularly the meaning of combinations of bits.  Which means we get to
+ * define our own semantics, as follows:
+ * OFF overrides any other bits, and means "disable all FEC" (with the
+ * exception of 25G KR4/CR4, where it is not possible to reject it if AN
+ * partner requests it).
+ * AUTO on its own means use cable requirements and link partner autoneg with
+ * fw-default preferences for the cable type.
+ * AUTO and either RS or BASER means use the specified FEC type if cable and
+ * link partner support it, otherwise autoneg/fw-default.
+ * RS or BASER alone means use the specified FEC type if cable and link partner
+ * support it and either requests it, otherwise no FEC.
+ * Both RS and BASER (whether AUTO or not) means use FEC if cable and link
+ * partner support it, preferring RS to BASER.
+ */
+static u32 ethtool_fec_caps_to_mcdi(u32 ethtool_cap)
+{
+	u32 ret = 0;
+
+	if (ethtool_cap & ETHTOOL_FEC_OFF)
+		return 0;
+
+	if (ethtool_cap & ETHTOOL_FEC_AUTO)
+		ret |= (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN) |
+		       (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN) |
+		       (1 << MC_CMD_PHY_CAP_RS_FEC_LBN);
+	if (ethtool_cap & ETHTOOL_FEC_RS)
+		ret |= (1 << MC_CMD_PHY_CAP_RS_FEC_LBN) |
+		       (1 << MC_CMD_PHY_CAP_RS_FEC_REQUESTED_LBN);
+	if (ethtool_cap & ETHTOOL_FEC_BASER)
+		ret |= (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN) |
+		       (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN) |
+		       (1 << MC_CMD_PHY_CAP_BASER_FEC_REQUESTED_LBN) |
+		       (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_REQUESTED_LBN);
+	return ret;
+}
+
+/* Invert ethtool_fec_caps_to_mcdi.  There are two combinations that function
+ * can never produce, (baser xor rs) and neither req; the implementation below
+ * maps both of those to AUTO.  This should never matter, and it's not clear
+ * what a better mapping would be anyway.
+ */
+static u32 mcdi_fec_caps_to_ethtool(u32 caps, bool is_25g)
+{
+	bool rs = caps & (1 << MC_CMD_PHY_CAP_RS_FEC_LBN),
+	     rs_req = caps & (1 << MC_CMD_PHY_CAP_RS_FEC_REQUESTED_LBN),
+	     baser = is_25g ? caps & (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN)
+			    : caps & (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN),
+	     baser_req = is_25g ? caps & (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_REQUESTED_LBN)
+				: caps & (1 << MC_CMD_PHY_CAP_BASER_FEC_REQUESTED_LBN);
+
+	if (!baser && !rs)
+		return ETHTOOL_FEC_OFF;
+	return (rs_req ? ETHTOOL_FEC_RS : 0) |
+	       (baser_req ? ETHTOOL_FEC_BASER : 0) |
+	       (baser == baser_req && rs == rs_req ? 0 : ETHTOOL_FEC_AUTO);
+}
+
 static int efx_mcdi_phy_probe(struct efx_nic *efx)
 {
 	struct efx_mcdi_phy_data *phy_data;
@@ -438,6 +496,13 @@ static int efx_mcdi_phy_probe(struct efx_nic *efx)
 		MCDI_DWORD(outbuf, GET_LINK_OUT_FLAGS),
 		MCDI_DWORD(outbuf, GET_LINK_OUT_FCNTL));
 
+	/* Record the initial FEC configuration (or nearest approximation
+	 * representable in the ethtool configuration space)
+	 */
+	efx->fec_config = mcdi_fec_caps_to_ethtool(caps,
+						   efx->link_state.speed == 25000 ||
+						   efx->link_state.speed == 50000);
+
 	/* Default to Autonegotiated flow control if the PHY supports it */
 	efx->wanted_fc = EFX_FC_RX | EFX_FC_TX;
 	if (phy_data->supported_cap & (1 << MC_CMD_PHY_CAP_AN_LBN))
@@ -458,6 +523,8 @@ int efx_mcdi_port_reconfigure(struct efx_nic *efx)
 		    ethtool_linkset_to_mcdi_cap(efx->link_advertising) :
 		    phy_cfg->forced_cap);
 
+	caps |= ethtool_fec_caps_to_mcdi(efx->fec_config);
+
 	return efx_mcdi_set_link(efx, caps, efx_get_mcdi_phy_flags(efx),
 				 efx->loopback_mode, 0);
 }
@@ -584,6 +651,8 @@ efx_mcdi_phy_set_link_ksettings(struct efx_nic *efx,
 		}
 	}
 
+	caps |= ethtool_fec_caps_to_mcdi(efx->fec_config);
+
 	rc = efx_mcdi_set_link(efx, caps, efx_get_mcdi_phy_flags(efx),
 			       efx->loopback_mode, 0);
 	if (rc)
@@ -599,6 +668,85 @@ efx_mcdi_phy_set_link_ksettings(struct efx_nic *efx,
 	return 0;
 }
 
+static int efx_mcdi_phy_get_fecparam(struct efx_nic *efx,
+				     struct ethtool_fecparam *fec)
+{
+	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LINK_OUT_V2_LEN);
+	u32 caps, active, speed; /* MCDI format */
+	bool is_25g = false;
+	size_t outlen;
+	int rc;
+
+	BUILD_BUG_ON(MC_CMD_GET_LINK_IN_LEN != 0);
+	rc = efx_mcdi_rpc(efx, MC_CMD_GET_LINK, NULL, 0,
+			  outbuf, sizeof(outbuf), &outlen);
+	if (rc)
+		return rc;
+	if (outlen < MC_CMD_GET_LINK_OUT_V2_LEN)
+		return -EOPNOTSUPP;
+
+	/* behaviour for 25G/50G links depends on 25G BASER bit */
+	speed = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_LINK_SPEED);
+	is_25g = speed == 25000 || speed == 50000;
+
+	caps = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_CAP);
+	fec->fec = mcdi_fec_caps_to_ethtool(caps, is_25g);
+	/* BASER is never supported on 100G */
+	if (speed == 100000)
+		fec->fec &= ~ETHTOOL_FEC_BASER;
+
+	active = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_FEC_TYPE);
+	switch (active) {
+	case MC_CMD_FEC_NONE:
+		fec->active_fec = ETHTOOL_FEC_OFF;
+		break;
+	case MC_CMD_FEC_BASER:
+		fec->active_fec = ETHTOOL_FEC_BASER;
+		break;
+	case MC_CMD_FEC_RS:
+		fec->active_fec = ETHTOOL_FEC_RS;
+		break;
+	default:
+		netif_warn(efx, hw, efx->net_dev,
+			   "Firmware reports unrecognised FEC_TYPE %u\n",
+			   active);
+		/* We don't know what firmware has picked.  AUTO is as good a
+		 * "can't happen" value as any other.
+		 */
+		fec->active_fec = ETHTOOL_FEC_AUTO;
+		break;
+	}
+
+	return 0;
+}
+
+static int efx_mcdi_phy_set_fecparam(struct efx_nic *efx,
+				     const struct ethtool_fecparam *fec)
+{
+	struct efx_mcdi_phy_data *phy_cfg = efx->phy_data;
+	u32 caps;
+	int rc;
+
+	/* Work out what efx_mcdi_phy_set_link_ksettings() would produce from
+	 * saved advertising bits
+	 */
+	if (test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, efx->link_advertising))
+		caps = (ethtool_linkset_to_mcdi_cap(efx->link_advertising) |
+			1 << MC_CMD_PHY_CAP_AN_LBN);
+	else
+		caps = phy_cfg->forced_cap;
+
+	caps |= ethtool_fec_caps_to_mcdi(fec->fec);
+	rc = efx_mcdi_set_link(efx, caps, efx_get_mcdi_phy_flags(efx),
+			       efx->loopback_mode, 0);
+	if (rc)
+		return rc;
+
+	/* Record the new FEC setting for subsequent set_link calls */
+	efx->fec_config = fec->fec;
+	return 0;
+}
+
 static int efx_mcdi_phy_test_alive(struct efx_nic *efx)
 {
 	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PHY_STATE_OUT_LEN);
@@ -977,6 +1125,8 @@ static const struct efx_phy_operations efx_mcdi_phy_ops = {
 	.remove		= efx_mcdi_phy_remove,
 	.get_link_ksettings = efx_mcdi_phy_get_link_ksettings,
 	.set_link_ksettings = efx_mcdi_phy_set_link_ksettings,
+	.get_fecparam	= efx_mcdi_phy_get_fecparam,
+	.set_fecparam	= efx_mcdi_phy_set_fecparam,
 	.test_alive	= efx_mcdi_phy_test_alive,
 	.run_tests	= efx_mcdi_phy_run_tests,
 	.test_name	= efx_mcdi_phy_test_name,
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index d20a8660ee48..2453f3849e72 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -627,6 +627,8 @@ static inline bool efx_link_state_equal(const struct efx_link_state *left,
  *	Serialised by the mac_lock.
  * @get_link_ksettings: Get ethtool settings. Serialised by the mac_lock.
  * @set_link_ksettings: Set ethtool settings. Serialised by the mac_lock.
+ * @get_fecparam: Get Forward Error Correction settings. Serialised by mac_lock.
+ * @set_fecparam: Set Forward Error Correction settings. Serialised by mac_lock.
  * @set_npage_adv: Set abilities advertised in (Extended) Next Page
  *	(only needed where AN bit is set in mmds)
  * @test_alive: Test that PHY is 'alive' (online)
@@ -645,6 +647,9 @@ struct efx_phy_operations {
 				   struct ethtool_link_ksettings *cmd);
 	int (*set_link_ksettings)(struct efx_nic *efx,
 				  const struct ethtool_link_ksettings *cmd);
+	int (*get_fecparam)(struct efx_nic *efx, struct ethtool_fecparam *fec);
+	int (*set_fecparam)(struct efx_nic *efx,
+			    const struct ethtool_fecparam *fec);
 	void (*set_npage_adv) (struct efx_nic *efx, u32);
 	int (*test_alive) (struct efx_nic *efx);
 	const char *(*test_name) (struct efx_nic *efx, unsigned int index);
@@ -704,6 +709,28 @@ union efx_multicast_hash {
 
 struct vfdi_status;
 
+/* The reserved RSS context value */
+#define EFX_EF10_RSS_CONTEXT_INVALID	0xffffffff
+/**
+ * struct efx_rss_context - A user-defined RSS context for filtering
+ * @list: node of linked list on which this struct is stored
+ * @context_id: the RSS_CONTEXT_ID returned by MC firmware, or
+ *	%EFX_EF10_RSS_CONTEXT_INVALID if this context is not present on the NIC.
+ *	For Siena, 0 if RSS is active, else %EFX_EF10_RSS_CONTEXT_INVALID.
+ * @user_id: the rss_context ID exposed to userspace over ethtool.
+ * @rx_hash_udp_4tuple: UDP 4-tuple hashing enabled
+ * @rx_hash_key: Toeplitz hash key for this RSS context
+ * @indir_table: Indirection table for this RSS context
+ */
+struct efx_rss_context {
+	struct list_head list;
+	u32 context_id;
+	u32 user_id;
+	bool rx_hash_udp_4tuple;
+	u8 rx_hash_key[40];
+	u32 rx_indir_table[128];
+};
+
 /**
  * struct efx_nic - an Efx NIC
  * @name: Device name (net device name or bus id before net device registered)
@@ -764,11 +791,9 @@ struct vfdi_status;
  *	(valid only for NICs that set %EFX_RX_PKT_PREFIX_LEN; always negative)
  * @rx_packet_ts_offset: Offset of timestamp from start of packet data
  *	(valid only if channel->sync_timestamps_enabled; always negative)
- * @rx_hash_key: Toeplitz hash key for RSS
- * @rx_indir_table: Indirection table for RSS
  * @rx_scatter: Scatter mode enabled for receives
- * @rss_active: RSS enabled on hardware
- * @rx_hash_udp_4tuple: UDP 4-tuple hashing enabled
+ * @rss_context: Main RSS context.  Its @list member is the head of the list of
+ *	RSS contexts created by user requests
  * @int_error_count: Number of internal errors seen recently
  * @int_error_expire: Time at which error count will be expired
  * @irq_soft_enabled: Are IRQs soft-enabled? If not, IRQ handler will
@@ -800,6 +825,8 @@ struct vfdi_status;
  * @mdio_bus: PHY MDIO bus ID (only used by Siena)
  * @phy_mode: PHY operating mode. Serialised by @mac_lock.
  * @link_advertising: Autonegotiation advertising flags
+ * @fec_config: Forward Error Correction configuration flags.  For bit positions
+ *	see &enum ethtool_fec_config_bits.
  * @link_state: Current state of the link
  * @n_link_state_changes: Number of times the link has changed state
  * @unicast_filter: Flag for Falcon-arch simple unicast filter.
@@ -909,11 +936,8 @@ struct efx_nic {
 	int rx_packet_hash_offset;
 	int rx_packet_len_offset;
 	int rx_packet_ts_offset;
-	u8 rx_hash_key[40];
-	u32 rx_indir_table[128];
 	bool rx_scatter;
-	bool rss_active;
-	bool rx_hash_udp_4tuple;
+	struct efx_rss_context rss_context;
 
 	unsigned int_error_count;
 	unsigned long int_error_expire;
@@ -955,6 +979,7 @@ struct efx_nic {
 	enum efx_phy_mode phy_mode;
 
 	__ETHTOOL_DECLARE_LINK_MODE_MASK(link_advertising);
+	u32 fec_config;
 	struct efx_link_state link_state;
 	unsigned int n_link_state_changes;
 
@@ -1099,6 +1124,10 @@ struct efx_udp_tunnel {
  * @tx_write: Write TX descriptors and doorbell
  * @rx_push_rss_config: Write RSS hash key and indirection table to the NIC
  * @rx_pull_rss_config: Read RSS hash key and indirection table back from the NIC
+ * @rx_push_rss_context_config: Write RSS hash key and indirection table for
+ *	user RSS context to the NIC
+ * @rx_pull_rss_context_config: Read RSS hash key and indirection table for user
+ *	RSS context back from the NIC
  * @rx_probe: Allocate resources for RX queue
  * @rx_init: Initialise RX queue on the NIC
  * @rx_remove: Free resources for RX queue
@@ -1237,6 +1266,13 @@ struct efx_nic_type {
 	int (*rx_push_rss_config)(struct efx_nic *efx, bool user,
 				  const u32 *rx_indir_table, const u8 *key);
 	int (*rx_pull_rss_config)(struct efx_nic *efx);
+	int (*rx_push_rss_context_config)(struct efx_nic *efx,
+					  struct efx_rss_context *ctx,
+					  const u32 *rx_indir_table,
+					  const u8 *key);
+	int (*rx_pull_rss_context_config)(struct efx_nic *efx,
+					  struct efx_rss_context *ctx);
+	void (*rx_restore_rss_contexts)(struct efx_nic *efx);
 	int (*rx_probe)(struct efx_rx_queue *rx_queue);
 	void (*rx_init)(struct efx_rx_queue *rx_queue);
 	void (*rx_remove)(struct efx_rx_queue *rx_queue);
diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h
index 6549fc685a48..d080a414e8f2 100644
--- a/drivers/net/ethernet/sfc/nic.h
+++ b/drivers/net/ethernet/sfc/nic.h
@@ -374,7 +374,6 @@ enum {
  * @piobuf_size: size of a single PIO buffer
  * @must_restore_piobufs: Flag: PIO buffers have yet to be restored after MC
  *	reboot
- * @rx_rss_context: Firmware handle for our RSS context
  * @rx_rss_context_exclusive: Whether our RSS context is exclusive or shared
  * @stats: Hardware statistics
  * @workaround_35388: Flag: firmware supports workaround for bug 35388
@@ -415,7 +414,6 @@ struct efx_ef10_nic_data {
 	unsigned int piobuf_handle[EF10_TX_PIOBUF_COUNT];
 	u16 piobuf_size;
 	bool must_restore_piobufs;
-	u32 rx_rss_context;
 	bool rx_rss_context_exclusive;
 	u64 stats[EF10_STAT_COUNT];
 	bool workaround_35388;
diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c
index ae8645ae4492..18aab25234ba 100644
--- a/drivers/net/ethernet/sfc/siena.c
+++ b/drivers/net/ethernet/sfc/siena.c
@@ -350,11 +350,11 @@ static int siena_rx_pull_rss_config(struct efx_nic *efx)
 	 * siena_rx_push_rss_config, below)
 	 */
 	efx_reado(efx, &temp, FR_CZ_RX_RSS_IPV6_REG1);
-	memcpy(efx->rx_hash_key, &temp, sizeof(temp));
+	memcpy(efx->rss_context.rx_hash_key, &temp, sizeof(temp));
 	efx_reado(efx, &temp, FR_CZ_RX_RSS_IPV6_REG2);
-	memcpy(efx->rx_hash_key + sizeof(temp), &temp, sizeof(temp));
+	memcpy(efx->rss_context.rx_hash_key + sizeof(temp), &temp, sizeof(temp));
 	efx_reado(efx, &temp, FR_CZ_RX_RSS_IPV6_REG3);
-	memcpy(efx->rx_hash_key + 2 * sizeof(temp), &temp,
+	memcpy(efx->rss_context.rx_hash_key + 2 * sizeof(temp), &temp,
 	       FRF_CZ_RX_RSS_IPV6_TKEY_HI_WIDTH / 8);
 	efx_farch_rx_pull_indir_table(efx);
 	return 0;
@@ -367,26 +367,26 @@ static int siena_rx_push_rss_config(struct efx_nic *efx, bool user,
 
 	/* Set hash key for IPv4 */
 	if (key)
-		memcpy(efx->rx_hash_key, key, sizeof(temp));
-	memcpy(&temp, efx->rx_hash_key, sizeof(temp));
+		memcpy(efx->rss_context.rx_hash_key, key, sizeof(temp));
+	memcpy(&temp, efx->rss_context.rx_hash_key, sizeof(temp));
 	efx_writeo(efx, &temp, FR_BZ_RX_RSS_TKEY);
 
 	/* Enable IPv6 RSS */
-	BUILD_BUG_ON(sizeof(efx->rx_hash_key) <
+	BUILD_BUG_ON(sizeof(efx->rss_context.rx_hash_key) <
 		     2 * sizeof(temp) + FRF_CZ_RX_RSS_IPV6_TKEY_HI_WIDTH / 8 ||
 		     FRF_CZ_RX_RSS_IPV6_TKEY_HI_LBN != 0);
-	memcpy(&temp, efx->rx_hash_key, sizeof(temp));
+	memcpy(&temp, efx->rss_context.rx_hash_key, sizeof(temp));
 	efx_writeo(efx, &temp, FR_CZ_RX_RSS_IPV6_REG1);
-	memcpy(&temp, efx->rx_hash_key + sizeof(temp), sizeof(temp));
+	memcpy(&temp, efx->rss_context.rx_hash_key + sizeof(temp), sizeof(temp));
 	efx_writeo(efx, &temp, FR_CZ_RX_RSS_IPV6_REG2);
 	EFX_POPULATE_OWORD_2(temp, FRF_CZ_RX_RSS_IPV6_THASH_ENABLE, 1,
 			     FRF_CZ_RX_RSS_IPV6_IP_THASH_ENABLE, 1);
-	memcpy(&temp, efx->rx_hash_key + 2 * sizeof(temp),
+	memcpy(&temp, efx->rss_context.rx_hash_key + 2 * sizeof(temp),
 	       FRF_CZ_RX_RSS_IPV6_TKEY_HI_WIDTH / 8);
 	efx_writeo(efx, &temp, FR_CZ_RX_RSS_IPV6_REG3);
 
-	memcpy(efx->rx_indir_table, rx_indir_table,
-	       sizeof(efx->rx_indir_table));
+	memcpy(efx->rss_context.rx_indir_table, rx_indir_table,
+	       sizeof(efx->rss_context.rx_indir_table));
 	efx_farch_rx_push_indir_table(efx);
 
 	return 0;
@@ -432,8 +432,8 @@ static int siena_init_nic(struct efx_nic *efx)
 			    EFX_RX_USR_BUF_SIZE >> 5);
 	efx_writeo(efx, &temp, FR_AZ_RX_CFG);
 
-	siena_rx_push_rss_config(efx, false, efx->rx_indir_table, NULL);
-	efx->rss_active = true;
+	siena_rx_push_rss_config(efx, false, efx->rss_context.rx_indir_table, NULL);
+	efx->rss_context.context_id = 0; /* indicates RSS is active */
 
 	/* Enable event logging */
 	rc = efx_mcdi_log_ctrl(efx, true, false, 0);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
index 5270d26f0bc6..2d5d4aea3bcb 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
@@ -48,26 +48,18 @@
 #define MUX_CLK_NUM_PARENTS		2
 
 struct meson8b_dwmac {
-	struct platform_device	*pdev;
-
+	struct device		*dev;
 	void __iomem		*regs;
-
 	phy_interface_t		phy_mode;
+	struct clk		*rgmii_tx_clk;
+	u32			tx_delay_ns;
+};
 
+struct meson8b_dwmac_clk_configs {
 	struct clk_mux		m250_mux;
-	struct clk		*m250_mux_clk;
-	struct clk		*m250_mux_parent[MUX_CLK_NUM_PARENTS];
-
 	struct clk_divider	m250_div;
-	struct clk		*m250_div_clk;
-
 	struct clk_fixed_factor	fixed_div2;
-	struct clk		*fixed_div2_clk;
-
 	struct clk_gate		rgmii_tx_en;
-	struct clk		*rgmii_tx_en_clk;
-
-	u32			tx_delay_ns;
 };
 
 static void meson8b_dwmac_mask_bits(struct meson8b_dwmac *dwmac, u32 reg,
@@ -82,106 +74,99 @@ static void meson8b_dwmac_mask_bits(struct meson8b_dwmac *dwmac, u32 reg,
 	writel(data, dwmac->regs + reg);
 }
 
-static int meson8b_init_rgmii_tx_clk(struct meson8b_dwmac *dwmac)
+static struct clk *meson8b_dwmac_register_clk(struct meson8b_dwmac *dwmac,
+					      const char *name_suffix,
+					      const char **parent_names,
+					      int num_parents,
+					      const struct clk_ops *ops,
+					      struct clk_hw *hw)
 {
 	struct clk_init_data init;
-	int i, ret;
-	struct device *dev = &dwmac->pdev->dev;
 	char clk_name[32];
-	const char *clk_div_parents[1];
-	const char *mux_parent_names[MUX_CLK_NUM_PARENTS];
+
+	snprintf(clk_name, sizeof(clk_name), "%s#%s", dev_name(dwmac->dev),
+		 name_suffix);
+
+	init.name = clk_name;
+	init.ops = ops;
+	init.flags = CLK_SET_RATE_PARENT;
+	init.parent_names = parent_names;
+	init.num_parents = num_parents;
+
+	hw->init = &init;
+
+	return devm_clk_register(dwmac->dev, hw);
+}
+
+static int meson8b_init_rgmii_tx_clk(struct meson8b_dwmac *dwmac)
+{
+	int i, ret;
+	struct clk *clk;
+	struct device *dev = dwmac->dev;
+	const char *parent_name, *mux_parent_names[MUX_CLK_NUM_PARENTS];
+	struct meson8b_dwmac_clk_configs *clk_configs;
+
+	clk_configs = devm_kzalloc(dev, sizeof(*clk_configs), GFP_KERNEL);
+	if (!clk_configs)
+		return -ENOMEM;
 
 	/* get the mux parents from DT */
 	for (i = 0; i < MUX_CLK_NUM_PARENTS; i++) {
 		char name[16];
 
 		snprintf(name, sizeof(name), "clkin%d", i);
-		dwmac->m250_mux_parent[i] = devm_clk_get(dev, name);
-		if (IS_ERR(dwmac->m250_mux_parent[i])) {
-			ret = PTR_ERR(dwmac->m250_mux_parent[i]);
+		clk = devm_clk_get(dev, name);
+		if (IS_ERR(clk)) {
+			ret = PTR_ERR(clk);
 			if (ret != -EPROBE_DEFER)
 				dev_err(dev, "Missing clock %s\n", name);
 			return ret;
 		}
 
-		mux_parent_names[i] =
-			__clk_get_name(dwmac->m250_mux_parent[i]);
+		mux_parent_names[i] = __clk_get_name(clk);
 	}
 
-	/* create the m250_mux */
-	snprintf(clk_name, sizeof(clk_name), "%s#m250_sel", dev_name(dev));
-	init.name = clk_name;
-	init.ops = &clk_mux_ops;
-	init.flags = CLK_SET_RATE_PARENT;
-	init.parent_names = mux_parent_names;
-	init.num_parents = MUX_CLK_NUM_PARENTS;
-
-	dwmac->m250_mux.reg = dwmac->regs + PRG_ETH0;
-	dwmac->m250_mux.shift = PRG_ETH0_CLK_M250_SEL_SHIFT;
-	dwmac->m250_mux.mask = PRG_ETH0_CLK_M250_SEL_MASK;
-	dwmac->m250_mux.flags = 0;
-	dwmac->m250_mux.table = NULL;
-	dwmac->m250_mux.hw.init = &init;
-
-	dwmac->m250_mux_clk = devm_clk_register(dev, &dwmac->m250_mux.hw);
-	if (WARN_ON(IS_ERR(dwmac->m250_mux_clk)))
-		return PTR_ERR(dwmac->m250_mux_clk);
-
-	/* create the m250_div */
-	snprintf(clk_name, sizeof(clk_name), "%s#m250_div", dev_name(dev));
-	init.name = devm_kstrdup(dev, clk_name, GFP_KERNEL);
-	init.ops = &clk_divider_ops;
-	init.flags = CLK_SET_RATE_PARENT;
-	clk_div_parents[0] = __clk_get_name(dwmac->m250_mux_clk);
-	init.parent_names = clk_div_parents;
-	init.num_parents = ARRAY_SIZE(clk_div_parents);
-
-	dwmac->m250_div.reg = dwmac->regs + PRG_ETH0;
-	dwmac->m250_div.shift = PRG_ETH0_CLK_M250_DIV_SHIFT;
-	dwmac->m250_div.width = PRG_ETH0_CLK_M250_DIV_WIDTH;
-	dwmac->m250_div.hw.init = &init;
-	dwmac->m250_div.flags = CLK_DIVIDER_ONE_BASED |
+	clk_configs->m250_mux.reg = dwmac->regs + PRG_ETH0;
+	clk_configs->m250_mux.shift = PRG_ETH0_CLK_M250_SEL_SHIFT;
+	clk_configs->m250_mux.mask = PRG_ETH0_CLK_M250_SEL_MASK;
+	clk = meson8b_dwmac_register_clk(dwmac, "m250_sel", mux_parent_names,
+					 MUX_CLK_NUM_PARENTS, &clk_mux_ops,
+					 &clk_configs->m250_mux.hw);
+	if (WARN_ON(IS_ERR(clk)))
+		return PTR_ERR(clk);
+
+	parent_name = __clk_get_name(clk);
+	clk_configs->m250_div.reg = dwmac->regs + PRG_ETH0;
+	clk_configs->m250_div.shift = PRG_ETH0_CLK_M250_DIV_SHIFT;
+	clk_configs->m250_div.width = PRG_ETH0_CLK_M250_DIV_WIDTH;
+	clk_configs->m250_div.flags = CLK_DIVIDER_ONE_BASED |
 				CLK_DIVIDER_ALLOW_ZERO |
 				CLK_DIVIDER_ROUND_CLOSEST;
-
-	dwmac->m250_div_clk = devm_clk_register(dev, &dwmac->m250_div.hw);
-	if (WARN_ON(IS_ERR(dwmac->m250_div_clk)))
-		return PTR_ERR(dwmac->m250_div_clk);
-
-	/* create the fixed_div2 */
-	snprintf(clk_name, sizeof(clk_name), "%s#fixed_div2", dev_name(dev));
-	init.name = devm_kstrdup(dev, clk_name, GFP_KERNEL);
-	init.ops = &clk_fixed_factor_ops;
-	init.flags = CLK_SET_RATE_PARENT;
-	clk_div_parents[0] = __clk_get_name(dwmac->m250_div_clk);
-	init.parent_names = clk_div_parents;
-	init.num_parents = ARRAY_SIZE(clk_div_parents);
-
-	dwmac->fixed_div2.mult = 1;
-	dwmac->fixed_div2.div = 2;
-	dwmac->fixed_div2.hw.init = &init;
-
-	dwmac->fixed_div2_clk = devm_clk_register(dev, &dwmac->fixed_div2.hw);
-	if (WARN_ON(IS_ERR(dwmac->fixed_div2_clk)))
-		return PTR_ERR(dwmac->fixed_div2_clk);
-
-	/* create the rgmii_tx_en */
-	init.name = devm_kasprintf(dev, GFP_KERNEL, "%s#rgmii_tx_en",
-				   dev_name(dev));
-	init.ops = &clk_gate_ops;
-	init.flags = CLK_SET_RATE_PARENT;
-	clk_div_parents[0] = __clk_get_name(dwmac->fixed_div2_clk);
-	init.parent_names = clk_div_parents;
-	init.num_parents = ARRAY_SIZE(clk_div_parents);
-
-	dwmac->rgmii_tx_en.reg = dwmac->regs + PRG_ETH0;
-	dwmac->rgmii_tx_en.bit_idx = PRG_ETH0_RGMII_TX_CLK_EN;
-	dwmac->rgmii_tx_en.hw.init = &init;
-
-	dwmac->rgmii_tx_en_clk = devm_clk_register(dev,
-						   &dwmac->rgmii_tx_en.hw);
-	if (WARN_ON(IS_ERR(dwmac->rgmii_tx_en_clk)))
-		return PTR_ERR(dwmac->rgmii_tx_en_clk);
+	clk = meson8b_dwmac_register_clk(dwmac, "m250_div", &parent_name, 1,
+					 &clk_divider_ops,
+					 &clk_configs->m250_div.hw);
+	if (WARN_ON(IS_ERR(clk)))
+		return PTR_ERR(clk);
+
+	parent_name = __clk_get_name(clk);
+	clk_configs->fixed_div2.mult = 1;
+	clk_configs->fixed_div2.div = 2;
+	clk = meson8b_dwmac_register_clk(dwmac, "fixed_div2", &parent_name, 1,
+					 &clk_fixed_factor_ops,
+					 &clk_configs->fixed_div2.hw);
+	if (WARN_ON(IS_ERR(clk)))
+		return PTR_ERR(clk);
+
+	parent_name = __clk_get_name(clk);
+	clk_configs->rgmii_tx_en.reg = dwmac->regs + PRG_ETH0;
+	clk_configs->rgmii_tx_en.bit_idx = PRG_ETH0_RGMII_TX_CLK_EN;
+	clk = meson8b_dwmac_register_clk(dwmac, "rgmii_tx_en", &parent_name, 1,
+					 &clk_gate_ops,
+					 &clk_configs->rgmii_tx_en.hw);
+	if (WARN_ON(IS_ERR(clk)))
+		return PTR_ERR(clk);
+
+	dwmac->rgmii_tx_clk = clk;
 
 	return 0;
 }
@@ -219,19 +204,23 @@ static int meson8b_init_prg_eth(struct meson8b_dwmac *dwmac)
 		 * a register) based on the line-speed (125MHz for Gbit speeds,
 		 * 25MHz for 100Mbit/s and 2.5MHz for 10Mbit/s).
 		 */
-		ret = clk_set_rate(dwmac->rgmii_tx_en_clk, 125 * 1000 * 1000);
+		ret = clk_set_rate(dwmac->rgmii_tx_clk, 125 * 1000 * 1000);
 		if (ret) {
-			dev_err(&dwmac->pdev->dev,
+			dev_err(dwmac->dev,
 				"failed to set RGMII TX clock\n");
 			return ret;
 		}
 
-		ret = clk_prepare_enable(dwmac->rgmii_tx_en_clk);
+		ret = clk_prepare_enable(dwmac->rgmii_tx_clk);
 		if (ret) {
-			dev_err(&dwmac->pdev->dev,
+			dev_err(dwmac->dev,
 				"failed to enable the RGMII TX clock\n");
 			return ret;
 		}
+
+		devm_add_action_or_reset(dwmac->dev,
+					(void(*)(void *))clk_disable_unprepare,
+					dwmac->rgmii_tx_clk);
 		break;
 
 	case PHY_INTERFACE_MODE_RMII:
@@ -251,7 +240,7 @@ static int meson8b_init_prg_eth(struct meson8b_dwmac *dwmac)
 		break;
 
 	default:
-		dev_err(&dwmac->pdev->dev, "unsupported phy-mode %s\n",
+		dev_err(dwmac->dev, "unsupported phy-mode %s\n",
 			phy_modes(dwmac->phy_mode));
 		return -EINVAL;
 	}
@@ -292,7 +281,7 @@ static int meson8b_dwmac_probe(struct platform_device *pdev)
 		goto err_remove_config_dt;
 	}
 
-	dwmac->pdev = pdev;
+	dwmac->dev = &pdev->dev;
 	dwmac->phy_mode = of_get_phy_mode(pdev->dev.of_node);
 	if (dwmac->phy_mode < 0) {
 		dev_err(&pdev->dev, "missing phy-mode property\n");
@@ -317,29 +306,16 @@ static int meson8b_dwmac_probe(struct platform_device *pdev)
 
 	ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
 	if (ret)
-		goto err_clk_disable;
+		goto err_remove_config_dt;
 
 	return 0;
 
-err_clk_disable:
-	if (phy_interface_mode_is_rgmii(dwmac->phy_mode))
-		clk_disable_unprepare(dwmac->rgmii_tx_en_clk);
 err_remove_config_dt:
 	stmmac_remove_config_dt(pdev, plat_dat);
 
 	return ret;
 }
 
-static int meson8b_dwmac_remove(struct platform_device *pdev)
-{
-	struct meson8b_dwmac *dwmac = get_stmmac_bsp_priv(&pdev->dev);
-
-	if (phy_interface_mode_is_rgmii(dwmac->phy_mode))
-		clk_disable_unprepare(dwmac->rgmii_tx_en_clk);
-
-	return stmmac_pltfr_remove(pdev);
-}
-
 static const struct of_device_id meson8b_dwmac_match[] = {
 	{ .compatible = "amlogic,meson8b-dwmac" },
 	{ .compatible = "amlogic,meson-gxbb-dwmac" },
@@ -349,7 +325,7 @@ MODULE_DEVICE_TABLE(of, meson8b_dwmac_match);
 
 static struct platform_driver meson8b_dwmac_driver = {
 	.probe  = meson8b_dwmac_probe,
-	.remove = meson8b_dwmac_remove,
+	.remove = stmmac_pltfr_remove,
 	.driver = {
 		.name           = "meson8b-dwmac",
 		.pm		= &stmmac_pltfr_pm_ops,
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index 63795ecafc8d..46b9ae20ff6c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -120,7 +120,7 @@ static void dwmac4_tx_queue_priority(struct mac_device_info *hw,
 	writel(value, ioaddr + base_register);
 }
 
-static void dwmac4_tx_queue_routing(struct mac_device_info *hw,
+static void dwmac4_rx_queue_routing(struct mac_device_info *hw,
 				    u8 packet, u32 queue)
 {
 	void __iomem *ioaddr = hw->pcsr;
@@ -713,7 +713,7 @@ static const struct stmmac_ops dwmac4_ops = {
 	.rx_queue_enable = dwmac4_rx_queue_enable,
 	.rx_queue_prio = dwmac4_rx_queue_priority,
 	.tx_queue_prio = dwmac4_tx_queue_priority,
-	.rx_queue_routing = dwmac4_tx_queue_routing,
+	.rx_queue_routing = dwmac4_rx_queue_routing,
 	.prog_mtl_rx_algorithms = dwmac4_prog_mtl_rx_algorithms,
 	.prog_mtl_tx_algorithms = dwmac4_prog_mtl_tx_algorithms,
 	.set_mtl_tx_queue_weight = dwmac4_set_mtl_tx_queue_weight,
@@ -744,7 +744,7 @@ static const struct stmmac_ops dwmac410_ops = {
 	.rx_queue_enable = dwmac4_rx_queue_enable,
 	.rx_queue_prio = dwmac4_rx_queue_priority,
 	.tx_queue_prio = dwmac4_tx_queue_priority,
-	.rx_queue_routing = dwmac4_tx_queue_routing,
+	.rx_queue_routing = dwmac4_rx_queue_routing,
 	.prog_mtl_rx_algorithms = dwmac4_prog_mtl_rx_algorithms,
 	.prog_mtl_tx_algorithms = dwmac4_prog_mtl_tx_algorithms,
 	.set_mtl_tx_queue_weight = dwmac4_set_mtl_tx_queue_weight,
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
index c728ffa095de..2a6521d33e43 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
@@ -389,6 +389,8 @@ static void dwmac4_rd_prepare_tso_tx_desc(struct dma_desc *p, int is_fs,
 
 static void dwmac4_release_tx_desc(struct dma_desc *p, int mode)
 {
+	p->des0 = 0;
+	p->des1 = 0;
 	p->des2 = 0;
 	p->des3 = 0;
 }
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index a916e13624eb..75161e1b7e55 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -58,6 +58,7 @@ struct stmmac_tx_queue {
 	unsigned int dirty_tx;
 	dma_addr_t dma_tx_phy;
 	u32 tx_tail_addr;
+	u32 mss;
 };
 
 struct stmmac_rx_queue {
@@ -138,7 +139,6 @@ struct stmmac_priv {
 	spinlock_t ptp_lock;
 	void __iomem *mmcaddr;
 	void __iomem *ptpaddr;
-	u32 mss;
 
 #ifdef CONFIG_DEBUG_FS
 	struct dentry *dbgfs_dir;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 7ad841434ec8..a9856a8bf8ad 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1355,6 +1355,7 @@ static int init_dma_tx_desc_rings(struct net_device *dev)
 
 		tx_q->dirty_tx = 0;
 		tx_q->cur_tx = 0;
+		tx_q->mss = 0;
 
 		netdev_tx_reset_queue(netdev_get_tx_queue(priv->dev, queue));
 	}
@@ -1843,6 +1844,11 @@ static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue)
 		if (unlikely(status & tx_dma_own))
 			break;
 
+		/* Make sure descriptor fields are read after reading
+		 * the own bit.
+		 */
+		dma_rmb();
+
 		/* Just consider the last segment and ...*/
 		if (likely(!(status & tx_not_ls))) {
 			/* ... verify the status error condition */
@@ -1946,6 +1952,7 @@ static void stmmac_tx_err(struct stmmac_priv *priv, u32 chan)
 						     (i == DMA_TX_SIZE - 1));
 	tx_q->dirty_tx = 0;
 	tx_q->cur_tx = 0;
+	tx_q->mss = 0;
 	netdev_tx_reset_queue(netdev_get_tx_queue(priv->dev, chan));
 	stmmac_start_tx_dma(priv, chan);
 
@@ -2430,7 +2437,7 @@ static void stmmac_mac_config_rx_queues_routing(struct stmmac_priv *priv)
 			continue;
 
 		packet = priv->plat->rx_queues_cfg[queue].pkt_route;
-		priv->hw->mac->rx_queue_prio(priv->hw, packet, queue);
+		priv->hw->mac->rx_queue_routing(priv->hw, packet, queue);
 	}
 }
 
@@ -2632,7 +2639,6 @@ static int stmmac_open(struct net_device *dev)
 
 	priv->dma_buf_sz = STMMAC_ALIGN(buf_sz);
 	priv->rx_copybreak = STMMAC_RX_COPYBREAK;
-	priv->mss = 0;
 
 	ret = alloc_dma_desc_resources(priv);
 	if (ret < 0) {
@@ -2793,6 +2799,7 @@ static void stmmac_tso_allocator(struct stmmac_priv *priv, unsigned int des,
 
 	while (tmp_len > 0) {
 		tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, DMA_TX_SIZE);
+		WARN_ON(tx_q->tx_skbuff[tx_q->cur_tx]);
 		desc = tx_q->dma_tx + tx_q->cur_tx;
 
 		desc->des0 = cpu_to_le32(des + (total_len - tmp_len));
@@ -2872,11 +2879,12 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 	mss = skb_shinfo(skb)->gso_size;
 
 	/* set new MSS value if needed */
-	if (mss != priv->mss) {
+	if (mss != tx_q->mss) {
 		mss_desc = tx_q->dma_tx + tx_q->cur_tx;
 		priv->hw->desc->set_mss(mss_desc, mss);
-		priv->mss = mss;
+		tx_q->mss = mss;
 		tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, DMA_TX_SIZE);
+		WARN_ON(tx_q->tx_skbuff[tx_q->cur_tx]);
 	}
 
 	if (netif_msg_tx_queued(priv)) {
@@ -2887,6 +2895,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 
 	first_entry = tx_q->cur_tx;
+	WARN_ON(tx_q->tx_skbuff[first_entry]);
 
 	desc = tx_q->dma_tx + first_entry;
 	first = desc;
@@ -2926,7 +2935,6 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 
 		tx_q->tx_skbuff_dma[tx_q->cur_tx].buf = des;
 		tx_q->tx_skbuff_dma[tx_q->cur_tx].len = skb_frag_size(frag);
-		tx_q->tx_skbuff[tx_q->cur_tx] = NULL;
 		tx_q->tx_skbuff_dma[tx_q->cur_tx].map_as_page = true;
 	}
 
@@ -2980,14 +2988,21 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 			tcp_hdrlen(skb) / 4, (skb->len - proto_hdr_len));
 
 	/* If context desc is used to change MSS */
-	if (mss_desc)
+	if (mss_desc) {
+		/* Make sure that first descriptor has been completely
+		 * written, including its own bit. This is because MSS is
+		 * actually before first descriptor, so we need to make
+		 * sure that MSS's own bit is the last thing written.
+		 */
+		dma_wmb();
 		priv->hw->desc->set_tx_owner(mss_desc);
+	}
 
 	/* The own bit must be the latest setting done when prepare the
 	 * descriptor and then barrier is needed to make sure that
 	 * all is coherent before granting the DMA engine.
 	 */
-	dma_wmb();
+	wmb();
 
 	if (netif_msg_pktdata(priv)) {
 		pr_info("%s: curr=%d dirty=%d f=%d, e=%d, f_p=%p, nfrags %d\n",
@@ -3062,6 +3077,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	entry = tx_q->cur_tx;
 	first_entry = entry;
+	WARN_ON(tx_q->tx_skbuff[first_entry]);
 
 	csum_insertion = (skb->ip_summed == CHECKSUM_PARTIAL);
 
@@ -3090,6 +3106,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 		bool last_segment = (i == (nfrags - 1));
 
 		entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE);
+		WARN_ON(tx_q->tx_skbuff[entry]);
 
 		if (likely(priv->extend_desc))
 			desc = (struct dma_desc *)(tx_q->dma_etx + entry);
@@ -3101,8 +3118,6 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 		if (dma_mapping_error(priv->device, des))
 			goto dma_map_err; /* should reuse desc w/o issues */
 
-		tx_q->tx_skbuff[entry] = NULL;
-
 		tx_q->tx_skbuff_dma[entry].buf = des;
 		if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00))
 			desc->des0 = cpu_to_le32(des);
@@ -3211,7 +3226,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 		 * descriptor and then barrier is needed to make sure that
 		 * all is coherent before granting the DMA engine.
 		 */
-		dma_wmb();
+		wmb();
 	}
 
 	netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len);
@@ -4436,6 +4451,7 @@ static void stmmac_reset_queues_param(struct stmmac_priv *priv)
 
 		tx_q->cur_tx = 0;
 		tx_q->dirty_tx = 0;
+		tx_q->mss = 0;
 	}
 }
 
@@ -4481,11 +4497,6 @@ int stmmac_resume(struct device *dev)
 
 	stmmac_reset_queues_param(priv);
 
-	/* reset private mss value to force mss context settings at
-	 * next tso xmit (only used for gmac4).
-	 */
-	priv->mss = 0;
-
 	stmmac_clear_descriptors(priv);
 
 	stmmac_hw_setup(ndev, false);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 05f122b8424a..ebd3e5ffa73c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -135,13 +135,14 @@ static struct stmmac_axi *stmmac_axi_setup(struct platform_device *pdev)
  * stmmac_mtl_setup - parse DT parameters for multiple queues configuration
  * @pdev: platform device
  */
-static void stmmac_mtl_setup(struct platform_device *pdev,
-			     struct plat_stmmacenet_data *plat)
+static int stmmac_mtl_setup(struct platform_device *pdev,
+			    struct plat_stmmacenet_data *plat)
 {
 	struct device_node *q_node;
 	struct device_node *rx_node;
 	struct device_node *tx_node;
 	u8 queue = 0;
+	int ret = 0;
 
 	/* For backwards-compatibility with device trees that don't have any
 	 * snps,mtl-rx-config or snps,mtl-tx-config properties, we fall back
@@ -159,12 +160,12 @@ static void stmmac_mtl_setup(struct platform_device *pdev,
 
 	rx_node = of_parse_phandle(pdev->dev.of_node, "snps,mtl-rx-config", 0);
 	if (!rx_node)
-		return;
+		return ret;
 
 	tx_node = of_parse_phandle(pdev->dev.of_node, "snps,mtl-tx-config", 0);
 	if (!tx_node) {
 		of_node_put(rx_node);
-		return;
+		return ret;
 	}
 
 	/* Processing RX queues common config */
@@ -220,6 +221,11 @@ static void stmmac_mtl_setup(struct platform_device *pdev,
 
 		queue++;
 	}
+	if (queue != plat->rx_queues_to_use) {
+		ret = -EINVAL;
+		dev_err(&pdev->dev, "Not all RX queues were configured\n");
+		goto out;
+	}
 
 	/* Processing TX queues common config */
 	if (of_property_read_u32(tx_node, "snps,tx-queues-to-use",
@@ -281,10 +287,18 @@ static void stmmac_mtl_setup(struct platform_device *pdev,
 
 		queue++;
 	}
+	if (queue != plat->tx_queues_to_use) {
+		ret = -EINVAL;
+		dev_err(&pdev->dev, "Not all TX queues were configured\n");
+		goto out;
+	}
 
+out:
 	of_node_put(rx_node);
 	of_node_put(tx_node);
 	of_node_put(q_node);
+
+	return ret;
 }
 
 /**
@@ -376,6 +390,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
 	struct device_node *np = pdev->dev.of_node;
 	struct plat_stmmacenet_data *plat;
 	struct stmmac_dma_cfg *dma_cfg;
+	int rc;
 
 	plat = devm_kzalloc(&pdev->dev, sizeof(*plat), GFP_KERNEL);
 	if (!plat)
@@ -402,8 +417,9 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
 		dev_warn(&pdev->dev, "snps,phy-addr property is deprecated\n");
 
 	/* To Configure PHY by using all device-tree supported properties */
-	if (stmmac_dt_phy(plat, np, &pdev->dev))
-		return ERR_PTR(-ENODEV);
+	rc = stmmac_dt_phy(plat, np, &pdev->dev);
+	if (rc)
+		return ERR_PTR(rc);
 
 	of_property_read_u32(np, "tx-fifo-depth", &plat->tx_fifo_size);
 
@@ -499,7 +515,11 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
 
 	plat->axi = stmmac_axi_setup(pdev);
 
-	stmmac_mtl_setup(pdev, plat);
+	rc = stmmac_mtl_setup(pdev, plat);
+	if (rc) {
+		stmmac_remove_config_dt(pdev, plat);
+		return ERR_PTR(rc);
+	}
 
 	/* clock setup */
 	plat->stmmac_clk = devm_clk_get(&pdev->dev,
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index b2b30c9df037..1b4af54a4968 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -120,14 +120,18 @@ do {								\
 #define CPDMA_RXCP		0x60
 
 #define CPSW_POLL_WEIGHT	64
+#define CPSW_RX_VLAN_ENCAP_HDR_SIZE		4
 #define CPSW_MIN_PACKET_SIZE	(VLAN_ETH_ZLEN)
-#define CPSW_MAX_PACKET_SIZE	(VLAN_ETH_FRAME_LEN + ETH_FCS_LEN)
+#define CPSW_MAX_PACKET_SIZE	(VLAN_ETH_FRAME_LEN +\
+				 ETH_FCS_LEN +\
+				 CPSW_RX_VLAN_ENCAP_HDR_SIZE)
 
 #define RX_PRIORITY_MAPPING	0x76543210
 #define TX_PRIORITY_MAPPING	0x33221100
 #define CPDMA_TX_PRIORITY_MAP	0x01234567
 
 #define CPSW_VLAN_AWARE		BIT(1)
+#define CPSW_RX_VLAN_ENCAP	BIT(2)
 #define CPSW_ALE_VLAN_AWARE	1
 
 #define CPSW_FIFO_NORMAL_MODE		(0 << 16)
@@ -148,6 +152,18 @@ do {								\
 #define CPSW_MAX_QUEUES		8
 #define CPSW_CPDMA_DESCS_POOL_SIZE_DEFAULT 256
 
+#define CPSW_RX_VLAN_ENCAP_HDR_PRIO_SHIFT	29
+#define CPSW_RX_VLAN_ENCAP_HDR_PRIO_MSK		GENMASK(2, 0)
+#define CPSW_RX_VLAN_ENCAP_HDR_VID_SHIFT	16
+#define CPSW_RX_VLAN_ENCAP_HDR_PKT_TYPE_SHIFT	8
+#define CPSW_RX_VLAN_ENCAP_HDR_PKT_TYPE_MSK	GENMASK(1, 0)
+enum {
+	CPSW_RX_VLAN_ENCAP_HDR_PKT_VLAN_TAG = 0,
+	CPSW_RX_VLAN_ENCAP_HDR_PKT_RESERV,
+	CPSW_RX_VLAN_ENCAP_HDR_PKT_PRIO_TAG,
+	CPSW_RX_VLAN_ENCAP_HDR_PKT_UNTAG,
+};
+
 static int debug_level;
 module_param(debug_level, int, 0);
 MODULE_PARM_DESC(debug_level, "cpsw debug level (NETIF_MSG bits)");
@@ -718,6 +734,49 @@ static void cpsw_tx_handler(void *token, int len, int status)
 	dev_kfree_skb_any(skb);
 }
 
+static void cpsw_rx_vlan_encap(struct sk_buff *skb)
+{
+	struct cpsw_priv *priv = netdev_priv(skb->dev);
+	struct cpsw_common *cpsw = priv->cpsw;
+	u32 rx_vlan_encap_hdr = *((u32 *)skb->data);
+	u16 vtag, vid, prio, pkt_type;
+
+	/* Remove VLAN header encapsulation word */
+	skb_pull(skb, CPSW_RX_VLAN_ENCAP_HDR_SIZE);
+
+	pkt_type = (rx_vlan_encap_hdr >>
+		    CPSW_RX_VLAN_ENCAP_HDR_PKT_TYPE_SHIFT) &
+		    CPSW_RX_VLAN_ENCAP_HDR_PKT_TYPE_MSK;
+	/* Ignore unknown & Priority-tagged packets*/
+	if (pkt_type == CPSW_RX_VLAN_ENCAP_HDR_PKT_RESERV ||
+	    pkt_type == CPSW_RX_VLAN_ENCAP_HDR_PKT_PRIO_TAG)
+		return;
+
+	vid = (rx_vlan_encap_hdr >>
+	       CPSW_RX_VLAN_ENCAP_HDR_VID_SHIFT) &
+	       VLAN_VID_MASK;
+	/* Ignore vid 0 and pass packet as is */
+	if (!vid)
+		return;
+	/* Ignore default vlans in dual mac mode */
+	if (cpsw->data.dual_emac &&
+	    vid == cpsw->slaves[priv->emac_port].port_vlan)
+		return;
+
+	prio = (rx_vlan_encap_hdr >>
+		CPSW_RX_VLAN_ENCAP_HDR_PRIO_SHIFT) &
+		CPSW_RX_VLAN_ENCAP_HDR_PRIO_MSK;
+
+	vtag = (prio << VLAN_PRIO_SHIFT) | vid;
+	__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vtag);
+
+	/* strip vlan tag for VLAN-tagged packet */
+	if (pkt_type == CPSW_RX_VLAN_ENCAP_HDR_PKT_VLAN_TAG) {
+		memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN);
+		skb_pull(skb, VLAN_HLEN);
+	}
+}
+
 static void cpsw_rx_handler(void *token, int len, int status)
 {
 	struct cpdma_chan	*ch;
@@ -752,6 +811,8 @@ static void cpsw_rx_handler(void *token, int len, int status)
 	if (new_skb) {
 		skb_copy_queue_mapping(new_skb, skb);
 		skb_put(skb, len);
+		if (status & CPDMA_RX_VLAN_ENCAP)
+			cpsw_rx_vlan_encap(skb);
 		cpts_rx_timestamp(cpsw->cpts, skb);
 		skb->protocol = eth_type_trans(skb, ndev);
 		netif_receive_skb(skb);
@@ -1407,7 +1468,7 @@ static void cpsw_init_host_port(struct cpsw_priv *priv)
 	cpsw_ale_control_set(cpsw->ale, HOST_PORT_NUM, ALE_VLAN_AWARE,
 			     CPSW_ALE_VLAN_AWARE);
 	control_reg = readl(&cpsw->regs->control);
-	control_reg |= CPSW_VLAN_AWARE;
+	control_reg |= CPSW_VLAN_AWARE | CPSW_RX_VLAN_ENCAP;
 	writel(control_reg, &cpsw->regs->control);
 	fifo_mode = (cpsw->data.dual_emac) ? CPSW_FIFO_DUAL_MAC_MODE :
 		     CPSW_FIFO_NORMAL_MODE;
@@ -3123,7 +3184,7 @@ static int cpsw_probe(struct platform_device *pdev)
 			cpsw->quirk_irq = true;
 	}
 
-	ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+	ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_CTAG_RX;
 
 	ndev->netdev_ops = &cpsw_netdev_ops;
 	ndev->ethtool_ops = &cpsw_ethtool_ops;
diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c
index 6f9173ff9414..31ae04117f0a 100644
--- a/drivers/net/ethernet/ti/davinci_cpdma.c
+++ b/drivers/net/ethernet/ti/davinci_cpdma.c
@@ -1164,7 +1164,7 @@ static int __cpdma_chan_process(struct cpdma_chan *chan)
 		outlen -= CPDMA_DESC_CRC_LEN;
 
 	status	= status & (CPDMA_DESC_EOQ | CPDMA_DESC_TD_COMPLETE |
-			    CPDMA_DESC_PORT_MASK);
+			    CPDMA_DESC_PORT_MASK | CPDMA_RX_VLAN_ENCAP);
 
 	chan->head = desc_from_phys(pool, desc_read(desc, hw_next));
 	chan_write(chan, cp, desc_dma);
diff --git a/drivers/net/ethernet/ti/davinci_cpdma.h b/drivers/net/ethernet/ti/davinci_cpdma.h
index fd65ce2b83de..d399af5389b8 100644
--- a/drivers/net/ethernet/ti/davinci_cpdma.h
+++ b/drivers/net/ethernet/ti/davinci_cpdma.h
@@ -19,6 +19,8 @@
 
 #define CPDMA_RX_SOURCE_PORT(__status__)	((__status__ >> 16) & 0x7)
 
+#define CPDMA_RX_VLAN_ENCAP BIT(19)
+
 #define CPDMA_EOI_RX_THRESH	0x0
 #define CPDMA_EOI_RX		0x1
 #define CPDMA_EOI_TX		0x2
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index b919e89a9b93..516dd59249d7 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -1694,6 +1694,7 @@ static struct pernet_operations geneve_net_ops = {
 	.exit_batch = geneve_exit_batch_net,
 	.id   = &geneve_net_id,
 	.size = sizeof(struct geneve_net),
+	.async = true,
 };
 
 static int __init geneve_init_module(void)
diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index f38e32a7ec9c..127edd23018f 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -1325,6 +1325,7 @@ static struct pernet_operations gtp_net_ops = {
 	.exit	= gtp_net_exit,
 	.id	= &gtp_net_id,
 	.size	= sizeof(struct gtp_net),
+	.async	= true,
 };
 
 static int __init gtp_init(void)
diff --git a/drivers/net/hyperv/Makefile b/drivers/net/hyperv/Makefile
index c8a66827100c..3f25b9c8ea59 100644
--- a/drivers/net/hyperv/Makefile
+++ b/drivers/net/hyperv/Makefile
@@ -1,3 +1,3 @@
 obj-$(CONFIG_HYPERV_NET) += hv_netvsc.o
 
-hv_netvsc-y := netvsc_drv.o netvsc.o rndis_filter.o
+hv_netvsc-y := netvsc_drv.o netvsc.o rndis_filter.o netvsc_trace.o
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 7472172823f3..4123d081b1c7 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -36,6 +36,7 @@
 #include <asm/sync_bitops.h>
 
 #include "hyperv_net.h"
+#include "netvsc_trace.h"
 
 /*
  * Switch the data path from the synthetic interface to the VF
@@ -57,6 +58,8 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf)
 		init_pkt->msg.v4_msg.active_dp.active_datapath =
 			NVSP_DATAPATH_SYNTHETIC;
 
+	trace_nvsp_send(ndev, init_pkt);
+
 	vmbus_sendpacket(dev->channel, init_pkt,
 			       sizeof(struct nvsp_message),
 			       (unsigned long)init_pkt,
@@ -129,6 +132,8 @@ static void netvsc_revoke_buf(struct hv_device *device,
 		revoke_packet->msg.v1_msg.
 		revoke_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;
 
+		trace_nvsp_send(ndev, revoke_packet);
+
 		ret = vmbus_sendpacket(device->channel,
 				       revoke_packet,
 				       sizeof(struct nvsp_message),
@@ -169,6 +174,8 @@ static void netvsc_revoke_buf(struct hv_device *device,
 		revoke_packet->msg.v1_msg.revoke_send_buf.id =
 			NETVSC_SEND_BUFFER_ID;
 
+		trace_nvsp_send(ndev, revoke_packet);
+
 		ret = vmbus_sendpacket(device->channel,
 				       revoke_packet,
 				       sizeof(struct nvsp_message),
@@ -298,6 +305,8 @@ static int netvsc_init_buf(struct hv_device *device,
 	init_packet->msg.v1_msg.
 		send_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;
 
+	trace_nvsp_send(ndev, init_packet);
+
 	/* Send the gpadl notification request */
 	ret = vmbus_sendpacket(device->channel, init_packet,
 			       sizeof(struct nvsp_message),
@@ -377,6 +386,8 @@ static int netvsc_init_buf(struct hv_device *device,
 		net_device->send_buf_gpadl_handle;
 	init_packet->msg.v1_msg.send_send_buf.id = NETVSC_SEND_BUFFER_ID;
 
+	trace_nvsp_send(ndev, init_packet);
+
 	/* Send the gpadl notification request */
 	ret = vmbus_sendpacket(device->channel, init_packet,
 			       sizeof(struct nvsp_message),
@@ -445,6 +456,8 @@ static int negotiate_nvsp_ver(struct hv_device *device,
 	init_packet->msg.init_msg.init.min_protocol_ver = nvsp_ver;
 	init_packet->msg.init_msg.init.max_protocol_ver = nvsp_ver;
 
+	trace_nvsp_send(ndev, init_packet);
+
 	/* Send the init request */
 	ret = vmbus_sendpacket(device->channel, init_packet,
 			       sizeof(struct nvsp_message),
@@ -477,6 +490,8 @@ static int negotiate_nvsp_ver(struct hv_device *device,
 		init_packet->msg.v2_msg.send_ndis_config.capability.teaming = 1;
 	}
 
+	trace_nvsp_send(ndev, init_packet);
+
 	ret = vmbus_sendpacket(device->channel, init_packet,
 				sizeof(struct nvsp_message),
 				(unsigned long)init_packet,
@@ -489,6 +504,7 @@ static int netvsc_connect_vsp(struct hv_device *device,
 			      struct netvsc_device *net_device,
 			      const struct netvsc_device_info *device_info)
 {
+	struct net_device *ndev = hv_get_drvdata(device);
 	static const u32 ver_list[] = {
 		NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2,
 		NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5
@@ -529,6 +545,8 @@ static int netvsc_connect_vsp(struct hv_device *device,
 		send_ndis_ver.ndis_minor_ver =
 				ndis_version & 0xFFFF;
 
+	trace_nvsp_send(ndev, init_packet);
+
 	/* Send the init request */
 	ret = vmbus_sendpacket(device->channel, init_packet,
 				sizeof(struct nvsp_message),
@@ -747,7 +765,7 @@ static inline int netvsc_send_pkt(
 	struct sk_buff *skb)
 {
 	struct nvsp_message nvmsg;
-	struct nvsp_1_message_send_rndis_packet * const rpkt =
+	struct nvsp_1_message_send_rndis_packet *rpkt =
 		&nvmsg.msg.v1_msg.send_rndis_pkt;
 	struct netvsc_channel * const nvchan =
 		&net_device->chan_table[packet->q_idx];
@@ -776,6 +794,8 @@ static inline int netvsc_send_pkt(
 	if (out_channel->rescind)
 		return -ENODEV;
 
+	trace_nvsp_send_pkt(ndev, out_channel, rpkt);
+
 	if (packet->page_buf_cnt) {
 		if (packet->cp_partial)
 			pb += packet->rmsg_pgcnt;
@@ -1079,6 +1099,8 @@ static int netvsc_receive(struct net_device *ndev,
 			+ vmxferpage_packet->ranges[i].byte_offset;
 		u32 buflen = vmxferpage_packet->ranges[i].byte_count;
 
+		trace_rndis_recv(ndev, q_idx, data);
+
 		/* Pass it to the upper layer */
 		status = rndis_filter_receive(ndev, net_device,
 					      channel, data, buflen);
@@ -1143,6 +1165,8 @@ static int netvsc_process_raw_pkt(struct hv_device *device,
 	struct net_device_context *net_device_ctx = netdev_priv(ndev);
 	struct nvsp_message *nvmsg = hv_pkt_data(desc);
 
+	trace_nvsp_recv(ndev, channel, nvmsg);
+
 	switch (desc->type) {
 	case VM_PKT_COMP:
 		netvsc_send_completion(net_device, channel, device,
diff --git a/drivers/net/hyperv/netvsc_trace.c b/drivers/net/hyperv/netvsc_trace.c
new file mode 100644
index 000000000000..bb0ce5a2bcd5
--- /dev/null
+++ b/drivers/net/hyperv/netvsc_trace.c
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/netdevice.h>
+
+#include "hyperv_net.h"
+
+#define CREATE_TRACE_POINTS
+#include "netvsc_trace.h"
diff --git a/drivers/net/hyperv/netvsc_trace.h b/drivers/net/hyperv/netvsc_trace.h
new file mode 100644
index 000000000000..f7585563dea5
--- /dev/null
+++ b/drivers/net/hyperv/netvsc_trace.h
@@ -0,0 +1,182 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#if !defined(_NETVSC_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _NETVSC_TRACE_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM netvsc
+#define TRACE_INCLUDE_FILE netvsc_trace
+
+TRACE_DEFINE_ENUM(RNDIS_MSG_PACKET);
+TRACE_DEFINE_ENUM(RNDIS_MSG_INDICATE);
+TRACE_DEFINE_ENUM(RNDIS_MSG_INIT);
+TRACE_DEFINE_ENUM(RNDIS_MSG_INIT_C);
+TRACE_DEFINE_ENUM(RNDIS_MSG_HALT);
+TRACE_DEFINE_ENUM(RNDIS_MSG_QUERY);
+TRACE_DEFINE_ENUM(RNDIS_MSG_QUERY_C);
+TRACE_DEFINE_ENUM(RNDIS_MSG_SET);
+TRACE_DEFINE_ENUM(RNDIS_MSG_SET_C);
+TRACE_DEFINE_ENUM(RNDIS_MSG_RESET);
+TRACE_DEFINE_ENUM(RNDIS_MSG_RESET_C);
+TRACE_DEFINE_ENUM(RNDIS_MSG_KEEPALIVE);
+TRACE_DEFINE_ENUM(RNDIS_MSG_KEEPALIVE_C);
+
+#define show_rndis_type(type)					\
+	__print_symbolic(type,					\
+		 { RNDIS_MSG_PACKET,	  "PACKET" },		\
+		 { RNDIS_MSG_INDICATE,	  "INDICATE", },	\
+		 { RNDIS_MSG_INIT,	  "INIT", },		\
+		 { RNDIS_MSG_INIT_C,	  "INIT_C", },		\
+		 { RNDIS_MSG_HALT,	  "HALT", },		\
+		 { RNDIS_MSG_QUERY,	  "QUERY", },		\
+		 { RNDIS_MSG_QUERY_C,	  "QUERY_C", },		\
+		 { RNDIS_MSG_SET,	  "SET", },		\
+		 { RNDIS_MSG_SET_C,	  "SET_C", },		\
+		 { RNDIS_MSG_RESET,	  "RESET", },		\
+		 { RNDIS_MSG_RESET_C,	  "RESET_C", },		\
+		 { RNDIS_MSG_KEEPALIVE,	  "KEEPALIVE", },	\
+		 { RNDIS_MSG_KEEPALIVE_C, "KEEPALIVE_C", })
+
+DECLARE_EVENT_CLASS(rndis_msg_class,
+       TP_PROTO(const struct net_device *ndev, u16 q,
+		const struct rndis_message *msg),
+       TP_ARGS(ndev, q, msg),
+       TP_STRUCT__entry(
+	       __string( name, ndev->name  )
+	       __field(	 u16,  queue	   )
+	       __field(	 u32,  req_id	   )
+	       __field(	 u32,  msg_type	   )
+	       __field(	 u32,  msg_len	   )
+       ),
+       TP_fast_assign(
+	       __assign_str(name, ndev->name);
+	       __entry->queue	 = q;
+	       __entry->req_id	 = msg->msg.init_req.req_id;
+	       __entry->msg_type = msg->ndis_msg_type;
+	       __entry->msg_len	 = msg->msg_len;
+       ),
+       TP_printk("dev=%s q=%u req=%#x type=%s msg_len=%u",
+		 __get_str(name), __entry->queue, __entry->req_id,
+		 show_rndis_type(__entry->msg_type), __entry->msg_len)
+);
+
+DEFINE_EVENT(rndis_msg_class, rndis_send,
+       TP_PROTO(const struct net_device *ndev, u16 q,
+		const struct rndis_message *msg),
+       TP_ARGS(ndev, q, msg)
+);
+
+DEFINE_EVENT(rndis_msg_class, rndis_recv,
+       TP_PROTO(const struct net_device *ndev, u16 q,
+		const struct rndis_message *msg),
+       TP_ARGS(ndev, q, msg)
+);
+
+TRACE_DEFINE_ENUM(NVSP_MSG_TYPE_INIT);
+TRACE_DEFINE_ENUM(NVSP_MSG_TYPE_INIT_COMPLETE);
+TRACE_DEFINE_ENUM(NVSP_MSG1_TYPE_SEND_NDIS_VER);
+TRACE_DEFINE_ENUM(NVSP_MSG1_TYPE_SEND_RECV_BUF);
+TRACE_DEFINE_ENUM(NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE);
+TRACE_DEFINE_ENUM(NVSP_MSG1_TYPE_REVOKE_RECV_BUF);
+TRACE_DEFINE_ENUM(NVSP_MSG1_TYPE_SEND_SEND_BUF);
+TRACE_DEFINE_ENUM(NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE);
+TRACE_DEFINE_ENUM(NVSP_MSG1_TYPE_REVOKE_SEND_BUF);
+TRACE_DEFINE_ENUM(NVSP_MSG1_TYPE_SEND_RNDIS_PKT);
+TRACE_DEFINE_ENUM(NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE);
+TRACE_DEFINE_ENUM(NVSP_MSG2_TYPE_SEND_NDIS_CONFIG);
+
+TRACE_DEFINE_ENUM(NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION);
+TRACE_DEFINE_ENUM(NVSP_MSG4_TYPE_SWITCH_DATA_PATH);
+
+TRACE_DEFINE_ENUM(NVSP_MSG5_TYPE_SUBCHANNEL);
+TRACE_DEFINE_ENUM(NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE);
+
+#define show_nvsp_type(type)								\
+	__print_symbolic(type,								\
+		  { NVSP_MSG_TYPE_INIT,			   "INIT" },			\
+		  { NVSP_MSG_TYPE_INIT_COMPLETE,	   "INIT_COMPLETE" },		\
+		  { NVSP_MSG1_TYPE_SEND_NDIS_VER,	   "SEND_NDIS_VER" },		\
+		  { NVSP_MSG1_TYPE_SEND_RECV_BUF,	   "SEND_RECV_BUF" },		\
+		  { NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE, "SEND_RECV_BUF_COMPLETE" },	\
+		  { NVSP_MSG1_TYPE_REVOKE_RECV_BUF,	   "REVOKE_RECV_BUF" },		\
+		  { NVSP_MSG1_TYPE_SEND_SEND_BUF,	   "SEND_SEND_BUF" },		\
+		  { NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE, "SEND_SEND_BUF_COMPLETE" },	\
+		  { NVSP_MSG1_TYPE_REVOKE_SEND_BUF,	   "REVOKE_SEND_BUF" },		\
+		  { NVSP_MSG1_TYPE_SEND_RNDIS_PKT,	   "SEND_RNDIS_PKT" },		\
+		  { NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE, "SEND_RNDIS_PKT_COMPLETE" },\
+		  { NVSP_MSG2_TYPE_SEND_NDIS_CONFIG,	   "SEND_NDIS_CONFIG" },	\
+		  { NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION,	   "SEND_VF_ASSOCIATION" },	\
+		  { NVSP_MSG4_TYPE_SWITCH_DATA_PATH,	   "SWITCH_DATA_PATH" },	\
+		  { NVSP_MSG5_TYPE_SUBCHANNEL,		    "SUBCHANNEL" },		\
+		  { NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE,  "SEND_INDIRECTION_TABLE" })
+
+TRACE_EVENT(nvsp_send,
+	TP_PROTO(const struct net_device *ndev,
+		 const struct nvsp_message *msg),
+	TP_ARGS(ndev, msg),
+	TP_STRUCT__entry(
+		__string( name,	ndev->name  )
+		__field(  u32,	msg_type    )
+	),
+	TP_fast_assign(
+		__assign_str(name, ndev->name);
+		__entry->msg_type = msg->hdr.msg_type;
+	),
+	TP_printk("dev=%s type=%s",
+		  __get_str(name),
+		  show_nvsp_type(__entry->msg_type))
+);
+
+TRACE_EVENT(nvsp_send_pkt,
+	TP_PROTO(const struct net_device *ndev,
+		 const struct vmbus_channel *chan,
+		 const struct nvsp_1_message_send_rndis_packet *rpkt),
+	TP_ARGS(ndev, chan, rpkt),
+	TP_STRUCT__entry(
+		__string( name,	ndev->name    )
+		__field(  u16,	qid	      )
+		__field(  u32,	channel_type  )
+		__field(  u32,	section_index )
+		__field(  u32,	section_size  )
+	),
+	TP_fast_assign(
+		__assign_str(name, ndev->name);
+		__entry->qid = chan->offermsg.offer.sub_channel_index;
+		__entry->channel_type = rpkt->channel_type;
+		__entry->section_index = rpkt->send_buf_section_index;
+		__entry->section_size = rpkt->send_buf_section_size;
+	),
+	TP_printk("dev=%s qid=%u type=%s section=%u size=%d",
+		  __get_str(name), __entry->qid,
+		  __entry->channel_type ? "CONTROL" : "DATA",
+		  __entry->section_index, __entry->section_size)
+);
+
+TRACE_EVENT(nvsp_recv,
+	TP_PROTO(const struct net_device *ndev,
+		 const struct vmbus_channel *chan,
+		 const struct nvsp_message *msg),
+	TP_ARGS(ndev, chan, msg),
+	TP_STRUCT__entry(
+		__string( name,	ndev->name  )
+		__field(  u16,	qid	    )
+		__field(  u32,	msg_type    )
+	),
+	TP_fast_assign(
+		__assign_str(name, ndev->name);
+		__entry->qid = chan->offermsg.offer.sub_channel_index;
+		__entry->msg_type = msg->hdr.msg_type;
+	),
+	TP_printk("dev=%s qid=%u type=%s",
+		  __get_str(name), __entry->qid,
+		  show_nvsp_type(__entry->msg_type))
+);
+
+#endif /* _NETVSC_TRACE_H */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../drivers/net/hyperv
+#include <trace/define_trace.h>
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index a6ec41c399d6..020f8bc54386 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -31,6 +31,7 @@
 #include <linux/rtnetlink.h>
 
 #include "hyperv_net.h"
+#include "netvsc_trace.h"
 
 static void rndis_set_multicast(struct work_struct *w);
 
@@ -241,6 +242,8 @@ static int rndis_filter_send_request(struct rndis_device *dev,
 			pb[0].len;
 	}
 
+	trace_rndis_send(dev->ndev, 0, &req->request_msg);
+
 	rcu_read_lock_bh();
 	ret = netvsc_send(dev->ndev, packet, NULL, pb, NULL);
 	rcu_read_unlock_bh();
@@ -942,12 +945,11 @@ static bool netvsc_device_idle(const struct netvsc_device *nvdev)
 	return true;
 }
 
-static void rndis_filter_halt_device(struct rndis_device *dev)
+static void rndis_filter_halt_device(struct netvsc_device *nvdev,
+				     struct rndis_device *dev)
 {
 	struct rndis_request *request;
 	struct rndis_halt_request *halt;
-	struct net_device_context *net_device_ctx = netdev_priv(dev->ndev);
-	struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
 
 	/* Attempt to do a rndis device halt */
 	request = get_rndis_request(dev, RNDIS_MSG_HALT,
@@ -1086,6 +1088,8 @@ void rndis_set_subchannel(struct work_struct *w)
 	init_packet->msg.v5_msg.subchn_req.op = NVSP_SUBCHANNEL_ALLOCATE;
 	init_packet->msg.v5_msg.subchn_req.num_subchannels =
 						nvdev->num_chn - 1;
+	trace_nvsp_send(ndev, init_packet);
+
 	ret = vmbus_sendpacket(hv_dev->channel, init_packet,
 			       sizeof(struct nvsp_message),
 			       (unsigned long)init_packet,
@@ -1350,7 +1354,7 @@ void rndis_filter_device_remove(struct hv_device *dev,
 	struct rndis_device *rndis_dev = net_dev->extension;
 
 	/* Halt and release the rndis device */
-	rndis_filter_halt_device(rndis_dev);
+	rndis_filter_halt_device(net_dev, rndis_dev);
 
 	net_dev->extension = NULL;
 
diff --git a/drivers/net/ieee802154/Kconfig b/drivers/net/ieee802154/Kconfig
index 303ba4133920..8782f5655e3f 100644
--- a/drivers/net/ieee802154/Kconfig
+++ b/drivers/net/ieee802154/Kconfig
@@ -104,3 +104,14 @@ config IEEE802154_CA8210_DEBUGFS
 	  exposes a debugfs node for each CA8210 instance which allows
 	  direct use of the Cascoda API, exposing the 802.15.4 MAC
 	  management entities.
+
+config IEEE802154_MCR20A
+       tristate "MCR20A transceiver driver"
+       depends on IEEE802154_DRIVERS && MAC802154
+       depends on SPI
+	---help---
+	  Say Y here to enable the MCR20A SPI 802.15.4 wireless
+	  controller.
+
+	  This driver can also be built as a module. To do so, say M here.
+	  the module will be called 'mcr20a'.
diff --git a/drivers/net/ieee802154/Makefile b/drivers/net/ieee802154/Makefile
index bea1de5e726c..104744d5a668 100644
--- a/drivers/net/ieee802154/Makefile
+++ b/drivers/net/ieee802154/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_IEEE802154_CC2520) += cc2520.o
 obj-$(CONFIG_IEEE802154_ATUSB) += atusb.o
 obj-$(CONFIG_IEEE802154_ADF7242) += adf7242.o
 obj-$(CONFIG_IEEE802154_CA8210) += ca8210.o
+obj-$(CONFIG_IEEE802154_MCR20A) += mcr20a.o
diff --git a/drivers/net/ieee802154/mcr20a.c b/drivers/net/ieee802154/mcr20a.c
new file mode 100644
index 000000000000..d9eb22a52551
--- /dev/null
+++ b/drivers/net/ieee802154/mcr20a.c
@@ -0,0 +1,1413 @@
+/*
+ * Driver for NXP MCR20A 802.15.4 Wireless-PAN Networking controller
+ *
+ * Copyright (C) 2018 Xue Liu <liuxuenetmail@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/gpio.h>
+#include <linux/spi/spi.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
+#include <linux/skbuff.h>
+#include <linux/of_gpio.h>
+#include <linux/regmap.h>
+#include <linux/ieee802154.h>
+#include <linux/debugfs.h>
+
+#include <net/mac802154.h>
+#include <net/cfg802154.h>
+
+#include <linux/device.h>
+
+#include "mcr20a.h"
+
+#define	SPI_COMMAND_BUFFER		3
+
+#define REGISTER_READ			BIT(7)
+#define REGISTER_WRITE			(0 << 7)
+#define REGISTER_ACCESS			(0 << 6)
+#define PACKET_BUFF_BURST_ACCESS	BIT(6)
+#define PACKET_BUFF_BYTE_ACCESS		BIT(5)
+
+#define MCR20A_WRITE_REG(x)		(x)
+#define MCR20A_READ_REG(x)		(REGISTER_READ | (x))
+#define MCR20A_BURST_READ_PACKET_BUF	(0xC0)
+#define MCR20A_BURST_WRITE_PACKET_BUF	(0x40)
+
+#define MCR20A_CMD_REG		0x80
+#define MCR20A_CMD_REG_MASK	0x3f
+#define MCR20A_CMD_WRITE	0x40
+#define MCR20A_CMD_FB		0x20
+
+/* Number of Interrupt Request Status Register */
+#define MCR20A_IRQSTS_NUM 2 /* only IRQ_STS1 and IRQ_STS2 */
+
+/* MCR20A CCA Type */
+enum {
+	MCR20A_CCA_ED,	  // energy detect - CCA bit not active,
+			  // not to be used for T and CCCA sequences
+	MCR20A_CCA_MODE1, // energy detect - CCA bit ACTIVE
+	MCR20A_CCA_MODE2, // 802.15.4 compliant signal detect - CCA bit ACTIVE
+	MCR20A_CCA_MODE3
+};
+
+enum {
+	MCR20A_XCVSEQ_IDLE	= 0x00,
+	MCR20A_XCVSEQ_RX	= 0x01,
+	MCR20A_XCVSEQ_TX	= 0x02,
+	MCR20A_XCVSEQ_CCA	= 0x03,
+	MCR20A_XCVSEQ_TR	= 0x04,
+	MCR20A_XCVSEQ_CCCA	= 0x05,
+};
+
+/* IEEE-802.15.4 defined constants (2.4 GHz logical channels) */
+#define	MCR20A_MIN_CHANNEL	(11)
+#define	MCR20A_MAX_CHANNEL	(26)
+#define	MCR20A_CHANNEL_SPACING	(5)
+
+/* MCR20A CCA Threshold constans */
+#define MCR20A_MIN_CCA_THRESHOLD (0x6EU)
+#define MCR20A_MAX_CCA_THRESHOLD (0x00U)
+
+/* version 0C */
+#define MCR20A_OVERWRITE_VERSION (0x0C)
+
+/* MCR20A PLL configurations */
+static const u8  PLL_INT[16] = {
+	/* 2405 */ 0x0B,	/* 2410 */ 0x0B,	/* 2415 */ 0x0B,
+	/* 2420 */ 0x0B,	/* 2425 */ 0x0B,	/* 2430 */ 0x0B,
+	/* 2435 */ 0x0C,	/* 2440 */ 0x0C,	/* 2445 */ 0x0C,
+	/* 2450 */ 0x0C,	/* 2455 */ 0x0C,	/* 2460 */ 0x0C,
+	/* 2465 */ 0x0D,	/* 2470 */ 0x0D,	/* 2475 */ 0x0D,
+	/* 2480 */ 0x0D
+};
+
+static const u8 PLL_FRAC[16] = {
+	/* 2405 */ 0x28,	/* 2410 */ 0x50,	/* 2415 */ 0x78,
+	/* 2420 */ 0xA0,	/* 2425 */ 0xC8,	/* 2430 */ 0xF0,
+	/* 2435 */ 0x18,	/* 2440 */ 0x40,	/* 2445 */ 0x68,
+	/* 2450 */ 0x90,	/* 2455 */ 0xB8,	/* 2460 */ 0xE0,
+	/* 2465 */ 0x08,	/* 2470 */ 0x30,	/* 2475 */ 0x58,
+	/* 2480 */ 0x80
+};
+
+static const struct reg_sequence mar20a_iar_overwrites[] = {
+	{ IAR_MISC_PAD_CTRL,	0x02 },
+	{ IAR_VCO_CTRL1,	0xB3 },
+	{ IAR_VCO_CTRL2,	0x07 },
+	{ IAR_PA_TUNING,	0x71 },
+	{ IAR_CHF_IBUF,		0x2F },
+	{ IAR_CHF_QBUF,		0x2F },
+	{ IAR_CHF_IRIN,		0x24 },
+	{ IAR_CHF_QRIN,		0x24 },
+	{ IAR_CHF_IL,		0x24 },
+	{ IAR_CHF_QL,		0x24 },
+	{ IAR_CHF_CC1,		0x32 },
+	{ IAR_CHF_CCL,		0x1D },
+	{ IAR_CHF_CC2,		0x2D },
+	{ IAR_CHF_IROUT,	0x24 },
+	{ IAR_CHF_QROUT,	0x24 },
+	{ IAR_PA_CAL,		0x28 },
+	{ IAR_AGC_THR1,		0x55 },
+	{ IAR_AGC_THR2,		0x2D },
+	{ IAR_ATT_RSSI1,	0x5F },
+	{ IAR_ATT_RSSI2,	0x8F },
+	{ IAR_RSSI_OFFSET,	0x61 },
+	{ IAR_CHF_PMA_GAIN,	0x03 },
+	{ IAR_CCA1_THRESH,	0x50 },
+	{ IAR_CORR_NVAL,	0x13 },
+	{ IAR_ACKDELAY,		0x3D },
+};
+
+#define MCR20A_VALID_CHANNELS (0x07FFF800)
+
+struct mcr20a_platform_data {
+	int rst_gpio;
+};
+
+#define MCR20A_MAX_BUF		(127)
+
+#define printdev(X) (&X->spi->dev)
+
+/* regmap information for Direct Access Register (DAR) access */
+#define MCR20A_DAR_WRITE	0x01
+#define MCR20A_DAR_READ		0x00
+#define MCR20A_DAR_NUMREGS	0x3F
+
+/* regmap information for Indirect Access Register (IAR) access */
+#define MCR20A_IAR_ACCESS	0x80
+#define MCR20A_IAR_NUMREGS	0xBEFF
+
+/* Read/Write SPI Commands for DAR and IAR registers. */
+#define MCR20A_READSHORT(reg)	((reg) << 1)
+#define MCR20A_WRITESHORT(reg)	((reg) << 1 | 1)
+#define MCR20A_READLONG(reg)	(1 << 15 | (reg) << 5)
+#define MCR20A_WRITELONG(reg)	(1 << 15 | (reg) << 5 | 1 << 4)
+
+/* Type definitions for link configuration of instantiable layers  */
+#define MCR20A_PHY_INDIRECT_QUEUE_SIZE (12)
+
+static bool
+mcr20a_dar_writeable(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case DAR_IRQ_STS1:
+	case DAR_IRQ_STS2:
+	case DAR_IRQ_STS3:
+	case DAR_PHY_CTRL1:
+	case DAR_PHY_CTRL2:
+	case DAR_PHY_CTRL3:
+	case DAR_PHY_CTRL4:
+	case DAR_SRC_CTRL:
+	case DAR_SRC_ADDRS_SUM_LSB:
+	case DAR_SRC_ADDRS_SUM_MSB:
+	case DAR_T3CMP_LSB:
+	case DAR_T3CMP_MSB:
+	case DAR_T3CMP_USB:
+	case DAR_T2PRIMECMP_LSB:
+	case DAR_T2PRIMECMP_MSB:
+	case DAR_T1CMP_LSB:
+	case DAR_T1CMP_MSB:
+	case DAR_T1CMP_USB:
+	case DAR_T2CMP_LSB:
+	case DAR_T2CMP_MSB:
+	case DAR_T2CMP_USB:
+	case DAR_T4CMP_LSB:
+	case DAR_T4CMP_MSB:
+	case DAR_T4CMP_USB:
+	case DAR_PLL_INT0:
+	case DAR_PLL_FRAC0_LSB:
+	case DAR_PLL_FRAC0_MSB:
+	case DAR_PA_PWR:
+	/* no DAR_ACM */
+	case DAR_OVERWRITE_VER:
+	case DAR_CLK_OUT_CTRL:
+	case DAR_PWR_MODES:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool
+mcr20a_dar_readable(struct device *dev, unsigned int reg)
+{
+	bool rc;
+
+	/* all writeable are also readable */
+	rc = mcr20a_dar_writeable(dev, reg);
+	if (rc)
+		return rc;
+
+	/* readonly regs */
+	switch (reg) {
+	case DAR_RX_FRM_LEN:
+	case DAR_CCA1_ED_FNL:
+	case DAR_EVENT_TMR_LSB:
+	case DAR_EVENT_TMR_MSB:
+	case DAR_EVENT_TMR_USB:
+	case DAR_TIMESTAMP_LSB:
+	case DAR_TIMESTAMP_MSB:
+	case DAR_TIMESTAMP_USB:
+	case DAR_SEQ_STATE:
+	case DAR_LQI_VALUE:
+	case DAR_RSSI_CCA_CONT:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool
+mcr20a_dar_volatile(struct device *dev, unsigned int reg)
+{
+	/* can be changed during runtime */
+	switch (reg) {
+	case DAR_IRQ_STS1:
+	case DAR_IRQ_STS2:
+	case DAR_IRQ_STS3:
+	/* use them in spi_async and regmap so it's volatile */
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool
+mcr20a_dar_precious(struct device *dev, unsigned int reg)
+{
+	/* don't clear irq line on read */
+	switch (reg) {
+	case DAR_IRQ_STS1:
+	case DAR_IRQ_STS2:
+	case DAR_IRQ_STS3:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static const struct regmap_config mcr20a_dar_regmap = {
+	.name			= "mcr20a_dar",
+	.reg_bits		= 8,
+	.val_bits		= 8,
+	.write_flag_mask	= REGISTER_ACCESS | REGISTER_WRITE,
+	.read_flag_mask		= REGISTER_ACCESS | REGISTER_READ,
+	.cache_type		= REGCACHE_RBTREE,
+	.writeable_reg		= mcr20a_dar_writeable,
+	.readable_reg		= mcr20a_dar_readable,
+	.volatile_reg		= mcr20a_dar_volatile,
+	.precious_reg		= mcr20a_dar_precious,
+	.fast_io		= true,
+	.can_multi_write	= true,
+};
+
+static bool
+mcr20a_iar_writeable(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case IAR_XTAL_TRIM:
+	case IAR_PMC_LP_TRIM:
+	case IAR_MACPANID0_LSB:
+	case IAR_MACPANID0_MSB:
+	case IAR_MACSHORTADDRS0_LSB:
+	case IAR_MACSHORTADDRS0_MSB:
+	case IAR_MACLONGADDRS0_0:
+	case IAR_MACLONGADDRS0_8:
+	case IAR_MACLONGADDRS0_16:
+	case IAR_MACLONGADDRS0_24:
+	case IAR_MACLONGADDRS0_32:
+	case IAR_MACLONGADDRS0_40:
+	case IAR_MACLONGADDRS0_48:
+	case IAR_MACLONGADDRS0_56:
+	case IAR_RX_FRAME_FILTER:
+	case IAR_PLL_INT1:
+	case IAR_PLL_FRAC1_LSB:
+	case IAR_PLL_FRAC1_MSB:
+	case IAR_MACPANID1_LSB:
+	case IAR_MACPANID1_MSB:
+	case IAR_MACSHORTADDRS1_LSB:
+	case IAR_MACSHORTADDRS1_MSB:
+	case IAR_MACLONGADDRS1_0:
+	case IAR_MACLONGADDRS1_8:
+	case IAR_MACLONGADDRS1_16:
+	case IAR_MACLONGADDRS1_24:
+	case IAR_MACLONGADDRS1_32:
+	case IAR_MACLONGADDRS1_40:
+	case IAR_MACLONGADDRS1_48:
+	case IAR_MACLONGADDRS1_56:
+	case IAR_DUAL_PAN_CTRL:
+	case IAR_DUAL_PAN_DWELL:
+	case IAR_CCA1_THRESH:
+	case IAR_CCA1_ED_OFFSET_COMP:
+	case IAR_LQI_OFFSET_COMP:
+	case IAR_CCA_CTRL:
+	case IAR_CCA2_CORR_PEAKS:
+	case IAR_CCA2_CORR_THRESH:
+	case IAR_TMR_PRESCALE:
+	case IAR_ANT_PAD_CTRL:
+	case IAR_MISC_PAD_CTRL:
+	case IAR_BSM_CTRL:
+	case IAR_RNG:
+	case IAR_RX_WTR_MARK:
+	case IAR_SOFT_RESET:
+	case IAR_TXDELAY:
+	case IAR_ACKDELAY:
+	case IAR_CORR_NVAL:
+	case IAR_ANT_AGC_CTRL:
+	case IAR_AGC_THR1:
+	case IAR_AGC_THR2:
+	case IAR_PA_CAL:
+	case IAR_ATT_RSSI1:
+	case IAR_ATT_RSSI2:
+	case IAR_RSSI_OFFSET:
+	case IAR_XTAL_CTRL:
+	case IAR_CHF_PMA_GAIN:
+	case IAR_CHF_IBUF:
+	case IAR_CHF_QBUF:
+	case IAR_CHF_IRIN:
+	case IAR_CHF_QRIN:
+	case IAR_CHF_IL:
+	case IAR_CHF_QL:
+	case IAR_CHF_CC1:
+	case IAR_CHF_CCL:
+	case IAR_CHF_CC2:
+	case IAR_CHF_IROUT:
+	case IAR_CHF_QROUT:
+	case IAR_PA_TUNING:
+	case IAR_VCO_CTRL1:
+	case IAR_VCO_CTRL2:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool
+mcr20a_iar_readable(struct device *dev, unsigned int reg)
+{
+	bool rc;
+
+	/* all writeable are also readable */
+	rc = mcr20a_iar_writeable(dev, reg);
+	if (rc)
+		return rc;
+
+	/* readonly regs */
+	switch (reg) {
+	case IAR_PART_ID:
+	case IAR_DUAL_PAN_STS:
+	case IAR_RX_BYTE_COUNT:
+	case IAR_FILTERFAIL_CODE1:
+	case IAR_FILTERFAIL_CODE2:
+	case IAR_RSSI:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool
+mcr20a_iar_volatile(struct device *dev, unsigned int reg)
+{
+/* can be changed during runtime */
+	switch (reg) {
+	case IAR_DUAL_PAN_STS:
+	case IAR_RX_BYTE_COUNT:
+	case IAR_FILTERFAIL_CODE1:
+	case IAR_FILTERFAIL_CODE2:
+	case IAR_RSSI:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static const struct regmap_config mcr20a_iar_regmap = {
+	.name			= "mcr20a_iar",
+	.reg_bits		= 16,
+	.val_bits		= 8,
+	.write_flag_mask	= REGISTER_ACCESS | REGISTER_WRITE | IAR_INDEX,
+	.read_flag_mask		= REGISTER_ACCESS | REGISTER_READ  | IAR_INDEX,
+	.cache_type		= REGCACHE_RBTREE,
+	.writeable_reg		= mcr20a_iar_writeable,
+	.readable_reg		= mcr20a_iar_readable,
+	.volatile_reg		= mcr20a_iar_volatile,
+	.fast_io		= true,
+};
+
+struct mcr20a_local {
+	struct spi_device *spi;
+
+	struct ieee802154_hw *hw;
+	struct mcr20a_platform_data *pdata;
+	struct regmap *regmap_dar;
+	struct regmap *regmap_iar;
+
+	u8 *buf;
+
+	bool is_tx;
+
+	/* for writing tx buffer */
+	struct spi_message tx_buf_msg;
+	u8 tx_header[1];
+	/* burst buffer write command */
+	struct spi_transfer tx_xfer_header;
+	u8 tx_len[1];
+	/* len of tx packet */
+	struct spi_transfer tx_xfer_len;
+	/* data of tx packet */
+	struct spi_transfer tx_xfer_buf;
+	struct sk_buff *tx_skb;
+
+	/* for read length rxfifo */
+	struct spi_message reg_msg;
+	u8 reg_cmd[1];
+	u8 reg_data[MCR20A_IRQSTS_NUM];
+	struct spi_transfer reg_xfer_cmd;
+	struct spi_transfer reg_xfer_data;
+
+	/* receive handling */
+	struct spi_message rx_buf_msg;
+	u8 rx_header[1];
+	struct spi_transfer rx_xfer_header;
+	u8 rx_lqi[1];
+	struct spi_transfer rx_xfer_lqi;
+	u8 rx_buf[MCR20A_MAX_BUF];
+	struct spi_transfer rx_xfer_buf;
+
+	/* isr handling for reading intstat */
+	struct spi_message irq_msg;
+	u8 irq_header[1];
+	u8 irq_data[MCR20A_IRQSTS_NUM];
+	struct spi_transfer irq_xfer_data;
+	struct spi_transfer irq_xfer_header;
+};
+
+static void
+mcr20a_write_tx_buf_complete(void *context)
+{
+	struct mcr20a_local *lp = context;
+	int ret;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	lp->reg_msg.complete = NULL;
+	lp->reg_cmd[0]	= MCR20A_WRITE_REG(DAR_PHY_CTRL1);
+	lp->reg_data[0] = MCR20A_XCVSEQ_TX;
+	lp->reg_xfer_data.len = 1;
+
+	ret = spi_async(lp->spi, &lp->reg_msg);
+	if (ret)
+		dev_err(printdev(lp), "failed to set SEQ TX\n");
+}
+
+static int
+mcr20a_xmit(struct ieee802154_hw *hw, struct sk_buff *skb)
+{
+	struct mcr20a_local *lp = hw->priv;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	lp->tx_skb = skb;
+
+	print_hex_dump_debug("mcr20a tx: ", DUMP_PREFIX_OFFSET, 16, 1,
+			     skb->data, skb->len, 0);
+
+	lp->is_tx = 1;
+
+	lp->reg_msg.complete	= NULL;
+	lp->reg_cmd[0]		= MCR20A_WRITE_REG(DAR_PHY_CTRL1);
+	lp->reg_data[0]		= MCR20A_XCVSEQ_IDLE;
+	lp->reg_xfer_data.len	= 1;
+
+	return spi_async(lp->spi, &lp->reg_msg);
+}
+
+static int
+mcr20a_ed(struct ieee802154_hw *hw, u8 *level)
+{
+	WARN_ON(!level);
+	*level = 0xbe;
+	return 0;
+}
+
+static int
+mcr20a_set_channel(struct ieee802154_hw *hw, u8 page, u8 channel)
+{
+	struct mcr20a_local *lp = hw->priv;
+	int ret;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	/* freqency = ((PLL_INT+64) + (PLL_FRAC/65536)) * 32 MHz */
+	ret = regmap_write(lp->regmap_dar, DAR_PLL_INT0, PLL_INT[channel - 11]);
+	if (ret)
+		return ret;
+	ret = regmap_write(lp->regmap_dar, DAR_PLL_FRAC0_LSB, 0x00);
+	if (ret)
+		return ret;
+	ret = regmap_write(lp->regmap_dar, DAR_PLL_FRAC0_MSB,
+			   PLL_FRAC[channel - 11]);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int
+mcr20a_start(struct ieee802154_hw *hw)
+{
+	struct mcr20a_local *lp = hw->priv;
+	int ret;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	/* No slotted operation */
+	dev_dbg(printdev(lp), "no slotted operation\n");
+	ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1,
+				 DAR_PHY_CTRL1_SLOTTED, 0x0);
+
+	/* enable irq */
+	enable_irq(lp->spi->irq);
+
+	/* Unmask SEQ interrupt */
+	ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL2,
+				 DAR_PHY_CTRL2_SEQMSK, 0x0);
+
+	/* Start the RX sequence */
+	dev_dbg(printdev(lp), "start the RX sequence\n");
+	ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1,
+				 DAR_PHY_CTRL1_XCVSEQ_MASK, MCR20A_XCVSEQ_RX);
+
+	return 0;
+}
+
+static void
+mcr20a_stop(struct ieee802154_hw *hw)
+{
+	struct mcr20a_local *lp = hw->priv;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	/* stop all running sequence */
+	regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1,
+			   DAR_PHY_CTRL1_XCVSEQ_MASK, MCR20A_XCVSEQ_IDLE);
+
+	/* disable irq */
+	disable_irq(lp->spi->irq);
+}
+
+static int
+mcr20a_set_hw_addr_filt(struct ieee802154_hw *hw,
+			struct ieee802154_hw_addr_filt *filt,
+			unsigned long changed)
+{
+	struct mcr20a_local *lp = hw->priv;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	if (changed & IEEE802154_AFILT_SADDR_CHANGED) {
+		u16 addr = le16_to_cpu(filt->short_addr);
+
+		regmap_write(lp->regmap_iar, IAR_MACSHORTADDRS0_LSB, addr);
+		regmap_write(lp->regmap_iar, IAR_MACSHORTADDRS0_MSB, addr >> 8);
+	}
+
+	if (changed & IEEE802154_AFILT_PANID_CHANGED) {
+		u16 pan = le16_to_cpu(filt->pan_id);
+
+		regmap_write(lp->regmap_iar, IAR_MACPANID0_LSB, pan);
+		regmap_write(lp->regmap_iar, IAR_MACPANID0_MSB, pan >> 8);
+	}
+
+	if (changed & IEEE802154_AFILT_IEEEADDR_CHANGED) {
+		u8 addr[8], i;
+
+		memcpy(addr, &filt->ieee_addr, 8);
+		for (i = 0; i < 8; i++)
+			regmap_write(lp->regmap_iar,
+				     IAR_MACLONGADDRS0_0 + i, addr[i]);
+	}
+
+	if (changed & IEEE802154_AFILT_PANC_CHANGED) {
+		if (filt->pan_coord) {
+			regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+					   DAR_PHY_CTRL4_PANCORDNTR0, 0x10);
+		} else {
+			regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+					   DAR_PHY_CTRL4_PANCORDNTR0, 0x00);
+		}
+	}
+
+	return 0;
+}
+
+/* -30 dBm to 10 dBm */
+#define MCR20A_MAX_TX_POWERS 0x14
+static const s32 mcr20a_powers[MCR20A_MAX_TX_POWERS + 1] = {
+	-3000, -2800, -2600, -2400, -2200, -2000, -1800, -1600, -1400,
+	-1200, -1000, -800, -600, -400, -200, 0, 200, 400, 600, 800, 1000
+};
+
+static int
+mcr20a_set_txpower(struct ieee802154_hw *hw, s32 mbm)
+{
+	struct mcr20a_local *lp = hw->priv;
+	u32 i;
+
+	dev_dbg(printdev(lp), "%s(%d)\n", __func__, mbm);
+
+	for (i = 0; i < lp->hw->phy->supported.tx_powers_size; i++) {
+		if (lp->hw->phy->supported.tx_powers[i] == mbm)
+			return regmap_write(lp->regmap_dar, DAR_PA_PWR,
+					    ((i + 8) & 0x1F));
+	}
+
+	return -EINVAL;
+}
+
+#define MCR20A_MAX_ED_LEVELS MCR20A_MIN_CCA_THRESHOLD
+static s32 mcr20a_ed_levels[MCR20A_MAX_ED_LEVELS + 1];
+
+static int
+mcr20a_set_cca_mode(struct ieee802154_hw *hw,
+		    const struct wpan_phy_cca *cca)
+{
+	struct mcr20a_local *lp = hw->priv;
+	unsigned int cca_mode = 0xff;
+	bool cca_mode_and = false;
+	int ret;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	/* mapping 802.15.4 to driver spec */
+	switch (cca->mode) {
+	case NL802154_CCA_ENERGY:
+		cca_mode = MCR20A_CCA_MODE1;
+		break;
+	case NL802154_CCA_CARRIER:
+		cca_mode = MCR20A_CCA_MODE2;
+		break;
+	case NL802154_CCA_ENERGY_CARRIER:
+		switch (cca->opt) {
+		case NL802154_CCA_OPT_ENERGY_CARRIER_AND:
+			cca_mode = MCR20A_CCA_MODE3;
+			cca_mode_and = true;
+			break;
+		case NL802154_CCA_OPT_ENERGY_CARRIER_OR:
+			cca_mode = MCR20A_CCA_MODE3;
+			cca_mode_and = false;
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+	default:
+		return -EINVAL;
+	}
+	ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+				 DAR_PHY_CTRL4_CCATYPE_MASK,
+				 cca_mode << DAR_PHY_CTRL4_CCATYPE_SHIFT);
+	if (ret < 0)
+		return ret;
+
+	if (cca_mode == MCR20A_CCA_MODE3) {
+		if (cca_mode_and) {
+			ret = regmap_update_bits(lp->regmap_iar, IAR_CCA_CTRL,
+						 IAR_CCA_CTRL_CCA3_AND_NOT_OR,
+						 0x08);
+		} else {
+			ret = regmap_update_bits(lp->regmap_iar,
+						 IAR_CCA_CTRL,
+						 IAR_CCA_CTRL_CCA3_AND_NOT_OR,
+						 0x00);
+		}
+		if (ret < 0)
+			return ret;
+	}
+
+	return ret;
+}
+
+static int
+mcr20a_set_cca_ed_level(struct ieee802154_hw *hw, s32 mbm)
+{
+	struct mcr20a_local *lp = hw->priv;
+	u32 i;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	for (i = 0; i < hw->phy->supported.cca_ed_levels_size; i++) {
+		if (hw->phy->supported.cca_ed_levels[i] == mbm)
+			return regmap_write(lp->regmap_iar, IAR_CCA1_THRESH, i);
+	}
+
+	return 0;
+}
+
+static int
+mcr20a_set_promiscuous_mode(struct ieee802154_hw *hw, const bool on)
+{
+	struct mcr20a_local *lp = hw->priv;
+	int ret;
+	u8 rx_frame_filter_reg = 0x0;
+	u8 val;
+
+	dev_dbg(printdev(lp), "%s(%d)\n", __func__, on);
+
+	if (on) {
+		/* All frame types accepted*/
+		val |= DAR_PHY_CTRL4_PROMISCUOUS;
+		rx_frame_filter_reg &= ~(IAR_RX_FRAME_FLT_FRM_VER);
+		rx_frame_filter_reg |= (IAR_RX_FRAME_FLT_ACK_FT |
+				  IAR_RX_FRAME_FLT_NS_FT);
+
+		ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+					 DAR_PHY_CTRL4_PROMISCUOUS,
+					 DAR_PHY_CTRL4_PROMISCUOUS);
+		if (ret < 0)
+			return ret;
+
+		ret = regmap_write(lp->regmap_iar, IAR_RX_FRAME_FILTER,
+				   rx_frame_filter_reg);
+		if (ret < 0)
+			return ret;
+	} else {
+		ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+					 DAR_PHY_CTRL4_PROMISCUOUS, 0x0);
+		if (ret < 0)
+			return ret;
+
+		ret = regmap_write(lp->regmap_iar, IAR_RX_FRAME_FILTER,
+				   IAR_RX_FRAME_FLT_FRM_VER |
+				   IAR_RX_FRAME_FLT_BEACON_FT |
+				   IAR_RX_FRAME_FLT_DATA_FT |
+				   IAR_RX_FRAME_FLT_CMD_FT);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+static const struct ieee802154_ops mcr20a_hw_ops = {
+	.owner			= THIS_MODULE,
+	.xmit_async		= mcr20a_xmit,
+	.ed			= mcr20a_ed,
+	.set_channel		= mcr20a_set_channel,
+	.start			= mcr20a_start,
+	.stop			= mcr20a_stop,
+	.set_hw_addr_filt	= mcr20a_set_hw_addr_filt,
+	.set_txpower		= mcr20a_set_txpower,
+	.set_cca_mode		= mcr20a_set_cca_mode,
+	.set_cca_ed_level	= mcr20a_set_cca_ed_level,
+	.set_promiscuous_mode	= mcr20a_set_promiscuous_mode,
+};
+
+static int
+mcr20a_request_rx(struct mcr20a_local *lp)
+{
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	/* Start the RX sequence */
+	regmap_update_bits_async(lp->regmap_dar, DAR_PHY_CTRL1,
+				 DAR_PHY_CTRL1_XCVSEQ_MASK, MCR20A_XCVSEQ_RX);
+
+	return 0;
+}
+
+static void
+mcr20a_handle_rx_read_buf_complete(void *context)
+{
+	struct mcr20a_local *lp = context;
+	u8 len = lp->reg_data[0] & DAR_RX_FRAME_LENGTH_MASK;
+	struct sk_buff *skb;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	dev_dbg(printdev(lp), "RX is done\n");
+
+	if (!ieee802154_is_valid_psdu_len(len)) {
+		dev_vdbg(&lp->spi->dev, "corrupted frame received\n");
+		len = IEEE802154_MTU;
+	}
+
+	len = len - 2;  /* get rid of frame check field */
+
+	skb = dev_alloc_skb(len);
+	if (!skb)
+		return;
+
+	memcpy(skb_put(skb, len), lp->rx_buf, len);
+	ieee802154_rx_irqsafe(lp->hw, skb, lp->rx_lqi[0]);
+
+	print_hex_dump_debug("mcr20a rx: ", DUMP_PREFIX_OFFSET, 16, 1,
+			     lp->rx_buf, len, 0);
+	pr_debug("mcr20a rx: lqi: %02hhx\n", lp->rx_lqi[0]);
+
+	/* start RX sequence */
+	mcr20a_request_rx(lp);
+}
+
+static void
+mcr20a_handle_rx_read_len_complete(void *context)
+{
+	struct mcr20a_local *lp = context;
+	u8 len;
+	int ret;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	/* get the length of received frame */
+	len = lp->reg_data[0] & DAR_RX_FRAME_LENGTH_MASK;
+	dev_dbg(printdev(lp), "frame len : %d\n", len);
+
+	/* prepare to read the rx buf */
+	lp->rx_buf_msg.complete = mcr20a_handle_rx_read_buf_complete;
+	lp->rx_header[0] = MCR20A_BURST_READ_PACKET_BUF;
+	lp->rx_xfer_buf.len = len;
+
+	ret = spi_async(lp->spi, &lp->rx_buf_msg);
+	if (ret)
+		dev_err(printdev(lp), "failed to read rx buffer length\n");
+}
+
+static int
+mcr20a_handle_rx(struct mcr20a_local *lp)
+{
+	dev_dbg(printdev(lp), "%s\n", __func__);
+	lp->reg_msg.complete = mcr20a_handle_rx_read_len_complete;
+	lp->reg_cmd[0] = MCR20A_READ_REG(DAR_RX_FRM_LEN);
+	lp->reg_xfer_data.len	= 1;
+
+	return spi_async(lp->spi, &lp->reg_msg);
+}
+
+static int
+mcr20a_handle_tx_complete(struct mcr20a_local *lp)
+{
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	ieee802154_xmit_complete(lp->hw, lp->tx_skb, false);
+
+	return mcr20a_request_rx(lp);
+}
+
+static int
+mcr20a_handle_tx(struct mcr20a_local *lp)
+{
+	int ret;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	/* write tx buffer */
+	lp->tx_header[0]	= MCR20A_BURST_WRITE_PACKET_BUF;
+	/* add 2 bytes of FCS */
+	lp->tx_len[0]		= lp->tx_skb->len + 2;
+	lp->tx_xfer_buf.tx_buf	= lp->tx_skb->data;
+	/* add 1 byte psduLength */
+	lp->tx_xfer_buf.len	= lp->tx_skb->len + 1;
+
+	ret = spi_async(lp->spi, &lp->tx_buf_msg);
+	if (ret) {
+		dev_err(printdev(lp), "SPI write Failed for TX buf\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static void
+mcr20a_irq_clean_complete(void *context)
+{
+	struct mcr20a_local *lp = context;
+	u8 seq_state = lp->irq_data[DAR_IRQ_STS1] & DAR_PHY_CTRL1_XCVSEQ_MASK;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	enable_irq(lp->spi->irq);
+
+	dev_dbg(printdev(lp), "IRQ STA1 (%02x) STA2 (%02x)\n",
+		lp->irq_data[DAR_IRQ_STS1], lp->irq_data[DAR_IRQ_STS2]);
+
+	switch (seq_state) {
+	/* TX IRQ, RX IRQ and SEQ IRQ */
+	case (0x03):
+		if (lp->is_tx) {
+			lp->is_tx = 0;
+			dev_dbg(printdev(lp), "TX is done. No ACK\n");
+			mcr20a_handle_tx_complete(lp);
+		}
+		break;
+	case (0x05):
+			/* rx is starting */
+			dev_dbg(printdev(lp), "RX is starting\n");
+			mcr20a_handle_rx(lp);
+		break;
+	case (0x07):
+		if (lp->is_tx) {
+			/* tx is done */
+			lp->is_tx = 0;
+			dev_dbg(printdev(lp), "TX is done. Get ACK\n");
+			mcr20a_handle_tx_complete(lp);
+		} else {
+			/* rx is starting */
+			dev_dbg(printdev(lp), "RX is starting\n");
+			mcr20a_handle_rx(lp);
+		}
+		break;
+	case (0x01):
+		if (lp->is_tx) {
+			dev_dbg(printdev(lp), "TX is starting\n");
+			mcr20a_handle_tx(lp);
+		} else {
+			dev_dbg(printdev(lp), "MCR20A is stop\n");
+		}
+		break;
+	}
+}
+
+static void mcr20a_irq_status_complete(void *context)
+{
+	int ret;
+	struct mcr20a_local *lp = context;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+	regmap_update_bits_async(lp->regmap_dar, DAR_PHY_CTRL1,
+				 DAR_PHY_CTRL1_XCVSEQ_MASK, MCR20A_XCVSEQ_IDLE);
+
+	lp->reg_msg.complete = mcr20a_irq_clean_complete;
+	lp->reg_cmd[0] = MCR20A_WRITE_REG(DAR_IRQ_STS1);
+	memcpy(lp->reg_data, lp->irq_data, MCR20A_IRQSTS_NUM);
+	lp->reg_xfer_data.len = MCR20A_IRQSTS_NUM;
+
+	ret = spi_async(lp->spi, &lp->reg_msg);
+
+	if (ret)
+		dev_err(printdev(lp), "failed to clean irq status\n");
+}
+
+static irqreturn_t mcr20a_irq_isr(int irq, void *data)
+{
+	struct mcr20a_local *lp = data;
+	int ret;
+
+	disable_irq_nosync(irq);
+
+	lp->irq_header[0] = MCR20A_READ_REG(DAR_IRQ_STS1);
+	/* read IRQSTSx */
+	ret = spi_async(lp->spi, &lp->irq_msg);
+	if (ret) {
+		enable_irq(irq);
+		return IRQ_NONE;
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int mcr20a_get_platform_data(struct spi_device *spi,
+				    struct mcr20a_platform_data *pdata)
+{
+	int ret = 0;
+
+	if (!spi->dev.of_node)
+		return -EINVAL;
+
+	pdata->rst_gpio = of_get_named_gpio(spi->dev.of_node, "rst_b-gpio", 0);
+	dev_dbg(&spi->dev, "rst_b-gpio: %d\n", pdata->rst_gpio);
+
+	return ret;
+}
+
+static void mcr20a_hw_setup(struct mcr20a_local *lp)
+{
+	u8 i;
+	struct ieee802154_hw *hw = lp->hw;
+	struct wpan_phy *phy = lp->hw->phy;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	phy->symbol_duration = 16;
+	phy->lifs_period = 40;
+	phy->sifs_period = 12;
+
+	hw->flags = IEEE802154_HW_TX_OMIT_CKSUM |
+			IEEE802154_HW_AFILT |
+			IEEE802154_HW_PROMISCUOUS;
+
+	phy->flags = WPAN_PHY_FLAG_TXPOWER | WPAN_PHY_FLAG_CCA_ED_LEVEL |
+			WPAN_PHY_FLAG_CCA_MODE;
+
+	phy->supported.cca_modes = BIT(NL802154_CCA_ENERGY) |
+		BIT(NL802154_CCA_CARRIER) | BIT(NL802154_CCA_ENERGY_CARRIER);
+	phy->supported.cca_opts = BIT(NL802154_CCA_OPT_ENERGY_CARRIER_AND) |
+		BIT(NL802154_CCA_OPT_ENERGY_CARRIER_OR);
+
+	/* initiating cca_ed_levels */
+	for (i = MCR20A_MAX_CCA_THRESHOLD; i < MCR20A_MIN_CCA_THRESHOLD + 1;
+	      ++i) {
+		mcr20a_ed_levels[i] =  -i * 100;
+	}
+
+	phy->supported.cca_ed_levels = mcr20a_ed_levels;
+	phy->supported.cca_ed_levels_size = ARRAY_SIZE(mcr20a_ed_levels);
+
+	phy->cca.mode = NL802154_CCA_ENERGY;
+
+	phy->supported.channels[0] = MCR20A_VALID_CHANNELS;
+	phy->current_page = 0;
+	/* MCR20A default reset value */
+	phy->current_channel = 20;
+	phy->symbol_duration = 16;
+	phy->supported.tx_powers = mcr20a_powers;
+	phy->supported.tx_powers_size = ARRAY_SIZE(mcr20a_powers);
+	phy->cca_ed_level = phy->supported.cca_ed_levels[75];
+	phy->transmit_power = phy->supported.tx_powers[0x0F];
+}
+
+static void
+mcr20a_setup_tx_spi_messages(struct mcr20a_local *lp)
+{
+	spi_message_init(&lp->tx_buf_msg);
+	lp->tx_buf_msg.context = lp;
+	lp->tx_buf_msg.complete = mcr20a_write_tx_buf_complete;
+
+	lp->tx_xfer_header.len = 1;
+	lp->tx_xfer_header.tx_buf = lp->tx_header;
+
+	lp->tx_xfer_len.len = 1;
+	lp->tx_xfer_len.tx_buf = lp->tx_len;
+
+	spi_message_add_tail(&lp->tx_xfer_header, &lp->tx_buf_msg);
+	spi_message_add_tail(&lp->tx_xfer_len, &lp->tx_buf_msg);
+	spi_message_add_tail(&lp->tx_xfer_buf, &lp->tx_buf_msg);
+}
+
+static void
+mcr20a_setup_rx_spi_messages(struct mcr20a_local *lp)
+{
+	spi_message_init(&lp->reg_msg);
+	lp->reg_msg.context = lp;
+
+	lp->reg_xfer_cmd.len = 1;
+	lp->reg_xfer_cmd.tx_buf = lp->reg_cmd;
+	lp->reg_xfer_cmd.rx_buf = lp->reg_cmd;
+
+	lp->reg_xfer_data.rx_buf = lp->reg_data;
+	lp->reg_xfer_data.tx_buf = lp->reg_data;
+
+	spi_message_add_tail(&lp->reg_xfer_cmd, &lp->reg_msg);
+	spi_message_add_tail(&lp->reg_xfer_data, &lp->reg_msg);
+
+	spi_message_init(&lp->rx_buf_msg);
+	lp->rx_buf_msg.context = lp;
+	lp->rx_buf_msg.complete = mcr20a_handle_rx_read_buf_complete;
+	lp->rx_xfer_header.len = 1;
+	lp->rx_xfer_header.tx_buf = lp->rx_header;
+	lp->rx_xfer_header.rx_buf = lp->rx_header;
+
+	lp->rx_xfer_buf.rx_buf = lp->rx_buf;
+
+	lp->rx_xfer_lqi.len = 1;
+	lp->rx_xfer_lqi.rx_buf = lp->rx_lqi;
+
+	spi_message_add_tail(&lp->rx_xfer_header, &lp->rx_buf_msg);
+	spi_message_add_tail(&lp->rx_xfer_buf, &lp->rx_buf_msg);
+	spi_message_add_tail(&lp->rx_xfer_lqi, &lp->rx_buf_msg);
+}
+
+static void
+mcr20a_setup_irq_spi_messages(struct mcr20a_local *lp)
+{
+	spi_message_init(&lp->irq_msg);
+	lp->irq_msg.context		= lp;
+	lp->irq_msg.complete	= mcr20a_irq_status_complete;
+	lp->irq_xfer_header.len	= 1;
+	lp->irq_xfer_header.tx_buf = lp->irq_header;
+	lp->irq_xfer_header.rx_buf = lp->irq_header;
+
+	lp->irq_xfer_data.len	= MCR20A_IRQSTS_NUM;
+	lp->irq_xfer_data.rx_buf = lp->irq_data;
+
+	spi_message_add_tail(&lp->irq_xfer_header, &lp->irq_msg);
+	spi_message_add_tail(&lp->irq_xfer_data, &lp->irq_msg);
+}
+
+static int
+mcr20a_phy_init(struct mcr20a_local *lp)
+{
+	u8 index;
+	unsigned int phy_reg = 0;
+	int ret;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	/* Disable Tristate on COCO MISO for SPI reads */
+	ret = regmap_write(lp->regmap_iar, IAR_MISC_PAD_CTRL, 0x02);
+	if (ret)
+		goto err_ret;
+
+	/* Clear all PP IRQ bits in IRQSTS1 to avoid unexpected interrupts
+	 * immediately after init
+	 */
+	ret = regmap_write(lp->regmap_dar, DAR_IRQ_STS1, 0xEF);
+	if (ret)
+		goto err_ret;
+
+	/* Clear all PP IRQ bits in IRQSTS2 */
+	ret = regmap_write(lp->regmap_dar, DAR_IRQ_STS2,
+			   DAR_IRQSTS2_ASM_IRQ | DAR_IRQSTS2_PB_ERR_IRQ |
+			   DAR_IRQSTS2_WAKE_IRQ);
+	if (ret)
+		goto err_ret;
+
+	/* Disable all timer interrupts */
+	ret = regmap_write(lp->regmap_dar, DAR_IRQ_STS3, 0xFF);
+	if (ret)
+		goto err_ret;
+
+	/*  PHY_CTRL1 : default HW settings + AUTOACK enabled */
+	ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1,
+				 DAR_PHY_CTRL1_AUTOACK, DAR_PHY_CTRL1_AUTOACK);
+
+	/*  PHY_CTRL2 : disable all interrupts */
+	ret = regmap_write(lp->regmap_dar, DAR_PHY_CTRL2, 0xFF);
+	if (ret)
+		goto err_ret;
+
+	/* PHY_CTRL3 : disable all timers and remaining interrupts */
+	ret = regmap_write(lp->regmap_dar, DAR_PHY_CTRL3,
+			   DAR_PHY_CTRL3_ASM_MSK | DAR_PHY_CTRL3_PB_ERR_MSK |
+			   DAR_PHY_CTRL3_WAKE_MSK);
+	if (ret)
+		goto err_ret;
+
+	/* SRC_CTRL : enable Acknowledge Frame Pending and
+	 * Source Address Matching Enable
+	 */
+	ret = regmap_write(lp->regmap_dar, DAR_SRC_CTRL,
+			   DAR_SRC_CTRL_ACK_FRM_PND |
+			   (DAR_SRC_CTRL_INDEX << DAR_SRC_CTRL_INDEX_SHIFT));
+	if (ret)
+		goto err_ret;
+
+	/*  RX_FRAME_FILTER */
+	/*  FRM_VER[1:0] = b11. Accept FrameVersion 0 and 1 packets */
+	ret = regmap_write(lp->regmap_iar, IAR_RX_FRAME_FILTER,
+			   IAR_RX_FRAME_FLT_FRM_VER |
+			   IAR_RX_FRAME_FLT_BEACON_FT |
+			   IAR_RX_FRAME_FLT_DATA_FT |
+			   IAR_RX_FRAME_FLT_CMD_FT);
+	if (ret)
+		goto err_ret;
+
+	dev_info(printdev(lp), "MCR20A DAR overwrites version: 0x%02x\n",
+		 MCR20A_OVERWRITE_VERSION);
+
+	/* Overwrites direct registers  */
+	ret = regmap_write(lp->regmap_dar, DAR_OVERWRITE_VER,
+			   MCR20A_OVERWRITE_VERSION);
+	if (ret)
+		goto err_ret;
+
+	/* Overwrites indirect registers  */
+	ret = regmap_multi_reg_write(lp->regmap_iar, mar20a_iar_overwrites,
+				     ARRAY_SIZE(mar20a_iar_overwrites));
+	if (ret)
+		goto err_ret;
+
+	/* Clear HW indirect queue */
+	dev_dbg(printdev(lp), "clear HW indirect queue\n");
+	for (index = 0; index < MCR20A_PHY_INDIRECT_QUEUE_SIZE; index++) {
+		phy_reg = (u8)(((index & DAR_SRC_CTRL_INDEX) <<
+			       DAR_SRC_CTRL_INDEX_SHIFT)
+			      | (DAR_SRC_CTRL_SRCADDR_EN)
+			      | (DAR_SRC_CTRL_INDEX_DISABLE));
+		ret = regmap_write(lp->regmap_dar, DAR_SRC_CTRL, phy_reg);
+		if (ret)
+			goto err_ret;
+		phy_reg = 0;
+	}
+
+	/* Assign HW Indirect hash table to PAN0 */
+	ret = regmap_read(lp->regmap_iar, IAR_DUAL_PAN_CTRL, &phy_reg);
+	if (ret)
+		goto err_ret;
+
+	/* Clear current lvl */
+	phy_reg &= ~IAR_DUAL_PAN_CTRL_DUAL_PAN_SAM_LVL_MSK;
+
+	/* Set new lvl */
+	phy_reg |= MCR20A_PHY_INDIRECT_QUEUE_SIZE <<
+		IAR_DUAL_PAN_CTRL_DUAL_PAN_SAM_LVL_SHIFT;
+	ret = regmap_write(lp->regmap_iar, IAR_DUAL_PAN_CTRL, phy_reg);
+	if (ret)
+		goto err_ret;
+
+	/* Set CCA threshold to -75 dBm */
+	ret = regmap_write(lp->regmap_iar, IAR_CCA1_THRESH, 0x4B);
+	if (ret)
+		goto err_ret;
+
+	/* Set prescaller to obtain 1 symbol (16us) timebase */
+	ret = regmap_write(lp->regmap_iar, IAR_TMR_PRESCALE, 0x05);
+	if (ret)
+		goto err_ret;
+
+	/* Enable autodoze mode. */
+	ret = regmap_update_bits(lp->regmap_dar, DAR_PWR_MODES,
+				 DAR_PWR_MODES_AUTODOZE,
+				 DAR_PWR_MODES_AUTODOZE);
+	if (ret)
+		goto err_ret;
+
+	/* Disable clk_out */
+	ret = regmap_update_bits(lp->regmap_dar, DAR_CLK_OUT_CTRL,
+				 DAR_CLK_OUT_CTRL_EN, 0x0);
+	if (ret)
+		goto err_ret;
+
+	return 0;
+
+err_ret:
+	return ret;
+}
+
+static int
+mcr20a_probe(struct spi_device *spi)
+{
+	struct ieee802154_hw *hw;
+	struct mcr20a_local *lp;
+	struct mcr20a_platform_data *pdata;
+	int irq_type;
+	int ret = -ENOMEM;
+
+	dev_dbg(&spi->dev, "%s\n", __func__);
+
+	if (!spi->irq) {
+		dev_err(&spi->dev, "no IRQ specified\n");
+		return -EINVAL;
+	}
+
+	pdata = kmalloc(sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return -ENOMEM;
+
+	/* set mcr20a platform data */
+	ret = mcr20a_get_platform_data(spi, pdata);
+	if (ret < 0) {
+		dev_crit(&spi->dev, "mcr20a_get_platform_data failed.\n");
+		return ret;
+	}
+
+	/* init reset gpio */
+	if (gpio_is_valid(pdata->rst_gpio)) {
+		ret = devm_gpio_request_one(&spi->dev, pdata->rst_gpio,
+					    GPIOF_OUT_INIT_HIGH, "reset");
+		if (ret)
+			return ret;
+	}
+
+	/* reset mcr20a */
+	if (gpio_is_valid(pdata->rst_gpio)) {
+		usleep_range(10, 20);
+		gpio_set_value_cansleep(pdata->rst_gpio, 0);
+		usleep_range(10, 20);
+		gpio_set_value_cansleep(pdata->rst_gpio, 1);
+		usleep_range(120, 240);
+	}
+
+	/* allocate ieee802154_hw and private data */
+	hw = ieee802154_alloc_hw(sizeof(*lp), &mcr20a_hw_ops);
+	if (!hw) {
+		dev_crit(&spi->dev, "ieee802154_alloc_hw failed\n");
+		return -ENOMEM;
+	}
+
+	/* init mcr20a local data */
+	lp = hw->priv;
+	lp->hw = hw;
+	lp->spi = spi;
+	lp->spi->dev.platform_data = pdata;
+	lp->pdata = pdata;
+
+	/* init ieee802154_hw */
+	hw->parent = &spi->dev;
+	ieee802154_random_extended_addr(&hw->phy->perm_extended_addr);
+
+	/* init buf */
+	lp->buf = devm_kzalloc(&spi->dev, SPI_COMMAND_BUFFER, GFP_KERNEL);
+
+	if (!lp->buf)
+		return -ENOMEM;
+
+	mcr20a_setup_tx_spi_messages(lp);
+	mcr20a_setup_rx_spi_messages(lp);
+	mcr20a_setup_irq_spi_messages(lp);
+
+	/* setup regmap */
+	lp->regmap_dar = devm_regmap_init_spi(spi, &mcr20a_dar_regmap);
+	if (IS_ERR(lp->regmap_dar)) {
+		ret = PTR_ERR(lp->regmap_dar);
+		dev_err(&spi->dev, "Failed to allocate dar map: %d\n",
+			ret);
+		goto free_dev;
+	}
+
+	lp->regmap_iar = devm_regmap_init_spi(spi, &mcr20a_iar_regmap);
+	if (IS_ERR(lp->regmap_iar)) {
+		ret = PTR_ERR(lp->regmap_iar);
+		dev_err(&spi->dev, "Failed to allocate iar map: %d\n", ret);
+		goto free_dev;
+	}
+
+	mcr20a_hw_setup(lp);
+
+	spi_set_drvdata(spi, lp);
+
+	ret = mcr20a_phy_init(lp);
+	if (ret < 0) {
+		dev_crit(&spi->dev, "mcr20a_phy_init failed\n");
+		goto free_dev;
+	}
+
+	irq_type = irq_get_trigger_type(spi->irq);
+	if (!irq_type)
+		irq_type = IRQF_TRIGGER_FALLING;
+
+	ret = devm_request_irq(&spi->dev, spi->irq, mcr20a_irq_isr,
+			       irq_type, dev_name(&spi->dev), lp);
+	if (ret) {
+		dev_err(&spi->dev, "could not request_irq for mcr20a\n");
+		ret = -ENODEV;
+		goto free_dev;
+	}
+
+	/* disable_irq by default and wait for starting hardware */
+	disable_irq(spi->irq);
+
+	ret = ieee802154_register_hw(hw);
+	if (ret) {
+		dev_crit(&spi->dev, "ieee802154_register_hw failed\n");
+		goto free_dev;
+	}
+
+	return ret;
+
+free_dev:
+	ieee802154_free_hw(lp->hw);
+
+	return ret;
+}
+
+static int mcr20a_remove(struct spi_device *spi)
+{
+	struct mcr20a_local *lp = spi_get_drvdata(spi);
+
+	dev_dbg(&spi->dev, "%s\n", __func__);
+
+	ieee802154_unregister_hw(lp->hw);
+	ieee802154_free_hw(lp->hw);
+
+	return 0;
+}
+
+static const struct of_device_id mcr20a_of_match[] = {
+	{ .compatible = "nxp,mcr20a", },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, mcr20a_of_match);
+
+static const struct spi_device_id mcr20a_device_id[] = {
+	{ .name = "mcr20a", },
+	{ },
+};
+MODULE_DEVICE_TABLE(spi, mcr20a_device_id);
+
+static struct spi_driver mcr20a_driver = {
+	.id_table = mcr20a_device_id,
+	.driver = {
+		.of_match_table = of_match_ptr(mcr20a_of_match),
+		.name	= "mcr20a",
+	},
+	.probe      = mcr20a_probe,
+	.remove     = mcr20a_remove,
+};
+
+module_spi_driver(mcr20a_driver);
+
+MODULE_DESCRIPTION("MCR20A Transceiver Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Xue Liu <liuxuenetmail@gmail>");
diff --git a/drivers/net/ieee802154/mcr20a.h b/drivers/net/ieee802154/mcr20a.h
new file mode 100644
index 000000000000..6da4fd00b3c5
--- /dev/null
+++ b/drivers/net/ieee802154/mcr20a.h
@@ -0,0 +1,498 @@
+/*
+ * Driver for NXP MCR20A 802.15.4 Wireless-PAN Networking controller
+ *
+ * Copyright (C) 2018 Xue Liu <liuxuenetmail@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef _MCR20A_H
+#define _MCR20A_H
+
+/* Direct Accress Register */
+#define DAR_IRQ_STS1		0x00
+#define DAR_IRQ_STS2		0x01
+#define DAR_IRQ_STS3		0x02
+#define DAR_PHY_CTRL1		0x03
+#define DAR_PHY_CTRL2		0x04
+#define DAR_PHY_CTRL3		0x05
+#define DAR_RX_FRM_LEN		0x06
+#define DAR_PHY_CTRL4		0x07
+#define DAR_SRC_CTRL		0x08
+#define DAR_SRC_ADDRS_SUM_LSB	0x09
+#define DAR_SRC_ADDRS_SUM_MSB	0x0A
+#define DAR_CCA1_ED_FNL		0x0B
+#define DAR_EVENT_TMR_LSB	0x0C
+#define DAR_EVENT_TMR_MSB	0x0D
+#define DAR_EVENT_TMR_USB	0x0E
+#define DAR_TIMESTAMP_LSB	0x0F
+#define DAR_TIMESTAMP_MSB	0x10
+#define DAR_TIMESTAMP_USB	0x11
+#define DAR_T3CMP_LSB		0x12
+#define DAR_T3CMP_MSB		0x13
+#define DAR_T3CMP_USB		0x14
+#define DAR_T2PRIMECMP_LSB	0x15
+#define DAR_T2PRIMECMP_MSB	0x16
+#define DAR_T1CMP_LSB		0x17
+#define DAR_T1CMP_MSB		0x18
+#define DAR_T1CMP_USB		0x19
+#define DAR_T2CMP_LSB		0x1A
+#define DAR_T2CMP_MSB		0x1B
+#define DAR_T2CMP_USB		0x1C
+#define DAR_T4CMP_LSB		0x1D
+#define DAR_T4CMP_MSB		0x1E
+#define DAR_T4CMP_USB		0x1F
+#define DAR_PLL_INT0		0x20
+#define DAR_PLL_FRAC0_LSB	0x21
+#define DAR_PLL_FRAC0_MSB	0x22
+#define DAR_PA_PWR		0x23
+#define DAR_SEQ_STATE		0x24
+#define DAR_LQI_VALUE		0x25
+#define DAR_RSSI_CCA_CONT	0x26
+/*------------------            0x27 */
+#define DAR_ASM_CTRL1		0x28
+#define DAR_ASM_CTRL2		0x29
+#define DAR_ASM_DATA_0		0x2A
+#define DAR_ASM_DATA_1		0x2B
+#define DAR_ASM_DATA_2		0x2C
+#define DAR_ASM_DATA_3		0x2D
+#define DAR_ASM_DATA_4		0x2E
+#define DAR_ASM_DATA_5		0x2F
+#define DAR_ASM_DATA_6		0x30
+#define DAR_ASM_DATA_7		0x31
+#define DAR_ASM_DATA_8		0x32
+#define DAR_ASM_DATA_9		0x33
+#define DAR_ASM_DATA_A		0x34
+#define DAR_ASM_DATA_B		0x35
+#define DAR_ASM_DATA_C		0x36
+#define DAR_ASM_DATA_D		0x37
+#define DAR_ASM_DATA_E		0x38
+#define DAR_ASM_DATA_F		0x39
+/*-----------------------       0x3A */
+#define DAR_OVERWRITE_VER	0x3B
+#define DAR_CLK_OUT_CTRL	0x3C
+#define DAR_PWR_MODES		0x3D
+#define IAR_INDEX		0x3E
+#define IAR_DATA		0x3F
+
+/* Indirect Resgister Memory */
+#define IAR_PART_ID		0x00
+#define IAR_XTAL_TRIM		0x01
+#define IAR_PMC_LP_TRIM		0x02
+#define IAR_MACPANID0_LSB	0x03
+#define IAR_MACPANID0_MSB	0x04
+#define IAR_MACSHORTADDRS0_LSB	0x05
+#define IAR_MACSHORTADDRS0_MSB	0x06
+#define IAR_MACLONGADDRS0_0	0x07
+#define IAR_MACLONGADDRS0_8	0x08
+#define IAR_MACLONGADDRS0_16	0x09
+#define IAR_MACLONGADDRS0_24	0x0A
+#define IAR_MACLONGADDRS0_32	0x0B
+#define IAR_MACLONGADDRS0_40	0x0C
+#define IAR_MACLONGADDRS0_48	0x0D
+#define IAR_MACLONGADDRS0_56	0x0E
+#define IAR_RX_FRAME_FILTER	0x0F
+#define IAR_PLL_INT1		0x10
+#define IAR_PLL_FRAC1_LSB	0x11
+#define IAR_PLL_FRAC1_MSB	0x12
+#define IAR_MACPANID1_LSB	0x13
+#define IAR_MACPANID1_MSB	0x14
+#define IAR_MACSHORTADDRS1_LSB	0x15
+#define IAR_MACSHORTADDRS1_MSB	0x16
+#define IAR_MACLONGADDRS1_0	0x17
+#define IAR_MACLONGADDRS1_8	0x18
+#define IAR_MACLONGADDRS1_16	0x19
+#define IAR_MACLONGADDRS1_24	0x1A
+#define IAR_MACLONGADDRS1_32	0x1B
+#define IAR_MACLONGADDRS1_40	0x1C
+#define IAR_MACLONGADDRS1_48	0x1D
+#define IAR_MACLONGADDRS1_56	0x1E
+#define IAR_DUAL_PAN_CTRL	0x1F
+#define IAR_DUAL_PAN_DWELL	0x20
+#define IAR_DUAL_PAN_STS	0x21
+#define IAR_CCA1_THRESH		0x22
+#define IAR_CCA1_ED_OFFSET_COMP	0x23
+#define IAR_LQI_OFFSET_COMP	0x24
+#define IAR_CCA_CTRL		0x25
+#define IAR_CCA2_CORR_PEAKS	0x26
+#define IAR_CCA2_CORR_THRESH	0x27
+#define IAR_TMR_PRESCALE	0x28
+/*--------------------          0x29 */
+#define IAR_GPIO_DATA		0x2A
+#define IAR_GPIO_DIR		0x2B
+#define IAR_GPIO_PUL_EN		0x2C
+#define IAR_GPIO_PUL_SEL	0x2D
+#define IAR_GPIO_DS		0x2E
+/*------------------            0x2F */
+#define IAR_ANT_PAD_CTRL	0x30
+#define IAR_MISC_PAD_CTRL	0x31
+#define IAR_BSM_CTRL		0x32
+/*-------------------           0x33 */
+#define IAR_RNG			0x34
+#define IAR_RX_BYTE_COUNT	0x35
+#define IAR_RX_WTR_MARK		0x36
+#define IAR_SOFT_RESET		0x37
+#define IAR_TXDELAY		0x38
+#define IAR_ACKDELAY		0x39
+#define IAR_SEQ_MGR_CTRL	0x3A
+#define IAR_SEQ_MGR_STS		0x3B
+#define IAR_SEQ_T_STS		0x3C
+#define IAR_ABORT_STS		0x3D
+#define IAR_CCCA_BUSY_CNT	0x3E
+#define IAR_SRC_ADDR_CHECKSUM1	0x3F
+#define IAR_SRC_ADDR_CHECKSUM2	0x40
+#define IAR_SRC_TBL_VALID1	0x41
+#define IAR_SRC_TBL_VALID2	0x42
+#define IAR_FILTERFAIL_CODE1	0x43
+#define IAR_FILTERFAIL_CODE2	0x44
+#define IAR_SLOT_PRELOAD	0x45
+/*--------------------          0x46 */
+#define IAR_CORR_VT		0x47
+#define IAR_SYNC_CTRL		0x48
+#define IAR_PN_LSB_0		0x49
+#define IAR_PN_LSB_1		0x4A
+#define IAR_PN_MSB_0		0x4B
+#define IAR_PN_MSB_1		0x4C
+#define IAR_CORR_NVAL		0x4D
+#define IAR_TX_MODE_CTRL	0x4E
+#define IAR_SNF_THR		0x4F
+#define IAR_FAD_THR		0x50
+#define IAR_ANT_AGC_CTRL	0x51
+#define IAR_AGC_THR1		0x52
+#define IAR_AGC_THR2		0x53
+#define IAR_AGC_HYS		0x54
+#define IAR_AFC			0x55
+/*-------------------           0x56 */
+/*-------------------           0x57 */
+#define IAR_PHY_STS		0x58
+#define IAR_RX_MAX_CORR		0x59
+#define IAR_RX_MAX_PREAMBLE	0x5A
+#define IAR_RSSI		0x5B
+/*-------------------           0x5C */
+/*-------------------           0x5D */
+#define IAR_PLL_DIG_CTRL	0x5E
+#define IAR_VCO_CAL		0x5F
+#define IAR_VCO_BEST_DIFF	0x60
+#define IAR_VCO_BIAS		0x61
+#define IAR_KMOD_CTRL		0x62
+#define IAR_KMOD_CAL		0x63
+#define IAR_PA_CAL		0x64
+#define IAR_PA_PWRCAL		0x65
+#define IAR_ATT_RSSI1		0x66
+#define IAR_ATT_RSSI2		0x67
+#define IAR_RSSI_OFFSET		0x68
+#define IAR_RSSI_SLOPE		0x69
+#define IAR_RSSI_CAL1		0x6A
+#define IAR_RSSI_CAL2		0x6B
+/*-------------------           0x6C */
+/*-------------------           0x6D */
+#define IAR_XTAL_CTRL		0x6E
+#define IAR_XTAL_COMP_MIN	0x6F
+#define IAR_XTAL_COMP_MAX	0x70
+#define IAR_XTAL_GM		0x71
+/*-------------------           0x72 */
+/*-------------------           0x73 */
+#define IAR_LNA_TUNE		0x74
+#define IAR_LNA_AGCGAIN		0x75
+/*-------------------           0x76 */
+/*-------------------           0x77 */
+#define IAR_CHF_PMA_GAIN	0x78
+#define IAR_CHF_IBUF		0x79
+#define IAR_CHF_QBUF		0x7A
+#define IAR_CHF_IRIN		0x7B
+#define IAR_CHF_QRIN		0x7C
+#define IAR_CHF_IL		0x7D
+#define IAR_CHF_QL		0x7E
+#define IAR_CHF_CC1		0x7F
+#define IAR_CHF_CCL		0x80
+#define IAR_CHF_CC2		0x81
+#define IAR_CHF_IROUT		0x82
+#define IAR_CHF_QROUT		0x83
+/*-------------------           0x84 */
+/*-------------------           0x85 */
+#define IAR_RSSI_CTRL		0x86
+/*-------------------           0x87 */
+/*-------------------           0x88 */
+#define IAR_PA_BIAS		0x89
+#define IAR_PA_TUNING		0x8A
+/*-------------------           0x8B */
+/*-------------------           0x8C */
+#define IAR_PMC_HP_TRIM		0x8D
+#define IAR_VREGA_TRIM		0x8E
+/*-------------------           0x8F */
+/*-------------------           0x90 */
+#define IAR_VCO_CTRL1		0x91
+#define IAR_VCO_CTRL2		0x92
+/*-------------------           0x93 */
+/*-------------------           0x94 */
+#define IAR_ANA_SPARE_OUT1	0x95
+#define IAR_ANA_SPARE_OUT2	0x96
+#define IAR_ANA_SPARE_IN	0x97
+#define IAR_MISCELLANEOUS	0x98
+/*-------------------           0x99 */
+#define IAR_SEQ_MGR_OVRD0	0x9A
+#define IAR_SEQ_MGR_OVRD1	0x9B
+#define IAR_SEQ_MGR_OVRD2	0x9C
+#define IAR_SEQ_MGR_OVRD3	0x9D
+#define IAR_SEQ_MGR_OVRD4	0x9E
+#define IAR_SEQ_MGR_OVRD5	0x9F
+#define IAR_SEQ_MGR_OVRD6	0xA0
+#define IAR_SEQ_MGR_OVRD7	0xA1
+/*-------------------           0xA2 */
+#define IAR_TESTMODE_CTRL	0xA3
+#define IAR_DTM_CTRL1		0xA4
+#define IAR_DTM_CTRL2		0xA5
+#define IAR_ATM_CTRL1		0xA6
+#define IAR_ATM_CTRL2		0xA7
+#define IAR_ATM_CTRL3		0xA8
+/*-------------------           0xA9 */
+#define IAR_LIM_FE_TEST_CTRL	0xAA
+#define IAR_CHF_TEST_CTRL	0xAB
+#define IAR_VCO_TEST_CTRL	0xAC
+#define IAR_PLL_TEST_CTRL	0xAD
+#define IAR_PA_TEST_CTRL	0xAE
+#define IAR_PMC_TEST_CTRL	0xAF
+#define IAR_SCAN_DTM_PROTECT_1	0xFE
+#define IAR_SCAN_DTM_PROTECT_0	0xFF
+
+/* IRQSTS1 bits */
+#define DAR_IRQSTS1_RX_FRM_PEND		BIT(7)
+#define DAR_IRQSTS1_PLL_UNLOCK_IRQ	BIT(6)
+#define DAR_IRQSTS1_FILTERFAIL_IRQ	BIT(5)
+#define DAR_IRQSTS1_RXWTRMRKIRQ		BIT(4)
+#define DAR_IRQSTS1_CCAIRQ		BIT(3)
+#define DAR_IRQSTS1_RXIRQ		BIT(2)
+#define DAR_IRQSTS1_TXIRQ		BIT(1)
+#define DAR_IRQSTS1_SEQIRQ		BIT(0)
+
+/* IRQSTS2 bits */
+#define DAR_IRQSTS2_CRCVALID		BIT(7)
+#define DAR_IRQSTS2_CCA			BIT(6)
+#define DAR_IRQSTS2_SRCADDR		BIT(5)
+#define DAR_IRQSTS2_PI			BIT(4)
+#define DAR_IRQSTS2_TMRSTATUS		BIT(3)
+#define DAR_IRQSTS2_ASM_IRQ		BIT(2)
+#define DAR_IRQSTS2_PB_ERR_IRQ		BIT(1)
+#define DAR_IRQSTS2_WAKE_IRQ		BIT(0)
+
+/* IRQSTS3 bits */
+#define DAR_IRQSTS3_TMR4MSK		BIT(7)
+#define DAR_IRQSTS3_TMR3MSK		BIT(6)
+#define DAR_IRQSTS3_TMR2MSK		BIT(5)
+#define DAR_IRQSTS3_TMR1MSK		BIT(4)
+#define DAR_IRQSTS3_TMR4IRQ		BIT(3)
+#define DAR_IRQSTS3_TMR3IRQ		BIT(2)
+#define DAR_IRQSTS3_TMR2IRQ		BIT(1)
+#define DAR_IRQSTS3_TMR1IRQ		BIT(0)
+
+/* PHY_CTRL1 bits */
+#define DAR_PHY_CTRL1_TMRTRIGEN		BIT(7)
+#define DAR_PHY_CTRL1_SLOTTED		BIT(6)
+#define DAR_PHY_CTRL1_CCABFRTX		BIT(5)
+#define DAR_PHY_CTRL1_CCABFRTX_SHIFT	5
+#define DAR_PHY_CTRL1_RXACKRQD		BIT(4)
+#define DAR_PHY_CTRL1_AUTOACK		BIT(3)
+#define DAR_PHY_CTRL1_XCVSEQ_MASK	0x07
+
+/* PHY_CTRL2 bits */
+#define DAR_PHY_CTRL2_CRC_MSK		BIT(7)
+#define DAR_PHY_CTRL2_PLL_UNLOCK_MSK	BIT(6)
+#define DAR_PHY_CTRL2_FILTERFAIL_MSK	BIT(5)
+#define DAR_PHY_CTRL2_RX_WMRK_MSK	BIT(4)
+#define DAR_PHY_CTRL2_CCAMSK		BIT(3)
+#define DAR_PHY_CTRL2_RXMSK		BIT(2)
+#define DAR_PHY_CTRL2_TXMSK		BIT(1)
+#define DAR_PHY_CTRL2_SEQMSK		BIT(0)
+
+/* PHY_CTRL3 bits */
+#define DAR_PHY_CTRL3_TMR4CMP_EN	BIT(7)
+#define DAR_PHY_CTRL3_TMR3CMP_EN	BIT(6)
+#define DAR_PHY_CTRL3_TMR2CMP_EN	BIT(5)
+#define DAR_PHY_CTRL3_TMR1CMP_EN	BIT(4)
+#define DAR_PHY_CTRL3_ASM_MSK		BIT(2)
+#define DAR_PHY_CTRL3_PB_ERR_MSK	BIT(1)
+#define DAR_PHY_CTRL3_WAKE_MSK		BIT(0)
+
+/* RX_FRM_LEN bits */
+#define DAR_RX_FRAME_LENGTH_MASK	(0x7F)
+
+/* PHY_CTRL4 bits */
+#define DAR_PHY_CTRL4_TRCV_MSK		BIT(7)
+#define DAR_PHY_CTRL4_TC3TMOUT		BIT(6)
+#define DAR_PHY_CTRL4_PANCORDNTR0	BIT(5)
+#define DAR_PHY_CTRL4_CCATYPE		(3)
+#define DAR_PHY_CTRL4_CCATYPE_SHIFT	(3)
+#define DAR_PHY_CTRL4_CCATYPE_MASK	(0x18)
+#define DAR_PHY_CTRL4_TMRLOAD		BIT(2)
+#define DAR_PHY_CTRL4_PROMISCUOUS	BIT(1)
+#define DAR_PHY_CTRL4_TC2PRIME_EN	BIT(0)
+
+/* SRC_CTRL bits */
+#define DAR_SRC_CTRL_INDEX		(0x0F)
+#define DAR_SRC_CTRL_INDEX_SHIFT	(4)
+#define DAR_SRC_CTRL_ACK_FRM_PND	BIT(3)
+#define DAR_SRC_CTRL_SRCADDR_EN		BIT(2)
+#define DAR_SRC_CTRL_INDEX_EN		BIT(1)
+#define DAR_SRC_CTRL_INDEX_DISABLE	BIT(0)
+
+/* DAR_ASM_CTRL1 bits */
+#define DAR_ASM_CTRL1_CLEAR		BIT(7)
+#define DAR_ASM_CTRL1_START		BIT(6)
+#define DAR_ASM_CTRL1_SELFTST		BIT(5)
+#define DAR_ASM_CTRL1_CTR		BIT(4)
+#define DAR_ASM_CTRL1_CBC		BIT(3)
+#define DAR_ASM_CTRL1_AES		BIT(2)
+#define DAR_ASM_CTRL1_LOAD_MAC		BIT(1)
+
+/* DAR_ASM_CTRL2 bits */
+#define DAR_ASM_CTRL2_DATA_REG_TYPE_SEL		(7)
+#define DAR_ASM_CTRL2_DATA_REG_TYPE_SEL_SHIFT	(5)
+#define DAR_ASM_CTRL2_TSTPAS			BIT(1)
+
+/* DAR_CLK_OUT_CTRL bits */
+#define DAR_CLK_OUT_CTRL_EXTEND		BIT(7)
+#define DAR_CLK_OUT_CTRL_HIZ		BIT(6)
+#define DAR_CLK_OUT_CTRL_SR		BIT(5)
+#define DAR_CLK_OUT_CTRL_DS		BIT(4)
+#define DAR_CLK_OUT_CTRL_EN		BIT(3)
+#define DAR_CLK_OUT_CTRL_DIV		(7)
+
+/* DAR_PWR_MODES bits */
+#define DAR_PWR_MODES_XTAL_READY	BIT(5)
+#define DAR_PWR_MODES_XTALEN		BIT(4)
+#define DAR_PWR_MODES_ASM_CLK_EN	BIT(3)
+#define DAR_PWR_MODES_AUTODOZE		BIT(1)
+#define DAR_PWR_MODES_PMC_MODE		BIT(0)
+
+/* RX_FRAME_FILTER bits */
+#define IAR_RX_FRAME_FLT_FRM_VER		(0xC0)
+#define IAR_RX_FRAME_FLT_FRM_VER_SHIFT		(6)
+#define IAR_RX_FRAME_FLT_ACTIVE_PROMISCUOUS	BIT(5)
+#define IAR_RX_FRAME_FLT_NS_FT			BIT(4)
+#define IAR_RX_FRAME_FLT_CMD_FT			BIT(3)
+#define IAR_RX_FRAME_FLT_ACK_FT			BIT(2)
+#define IAR_RX_FRAME_FLT_DATA_FT		BIT(1)
+#define IAR_RX_FRAME_FLT_BEACON_FT		BIT(0)
+
+/* DUAL_PAN_CTRL bits */
+#define IAR_DUAL_PAN_CTRL_DUAL_PAN_SAM_LVL_MSK	(0xF0)
+#define IAR_DUAL_PAN_CTRL_DUAL_PAN_SAM_LVL_SHIFT	(4)
+#define IAR_DUAL_PAN_CTRL_CURRENT_NETWORK	BIT(3)
+#define IAR_DUAL_PAN_CTRL_PANCORDNTR1		BIT(2)
+#define IAR_DUAL_PAN_CTRL_DUAL_PAN_AUTO		BIT(1)
+#define IAR_DUAL_PAN_CTRL_ACTIVE_NETWORK	BIT(0)
+
+/* DUAL_PAN_STS bits */
+#define IAR_DUAL_PAN_STS_RECD_ON_PAN1		BIT(7)
+#define IAR_DUAL_PAN_STS_RECD_ON_PAN0		BIT(6)
+#define IAR_DUAL_PAN_STS_DUAL_PAN_REMAIN	(0x3F)
+
+/* CCA_CTRL bits */
+#define IAR_CCA_CTRL_AGC_FRZ_EN			BIT(6)
+#define IAR_CCA_CTRL_CONT_RSSI_EN		BIT(5)
+#define IAR_CCA_CTRL_LQI_RSSI_NOT_CORR	BIT(4)
+#define IAR_CCA_CTRL_CCA3_AND_NOT_OR	BIT(3)
+#define IAR_CCA_CTRL_POWER_COMP_EN_LQI	BIT(2)
+#define IAR_CCA_CTRL_POWER_COMP_EN_ED	BIT(1)
+#define IAR_CCA_CTRL_POWER_COMP_EN_CCA1	BIT(0)
+
+/* ANT_PAD_CTRL bits */
+#define IAR_ANT_PAD_CTRL_ANTX_POL	(0x0F)
+#define IAR_ANT_PAD_CTRL_ANTX_POL_SHIFT	(4)
+#define IAR_ANT_PAD_CTRL_ANTX_CTRLMODE	BIT(3)
+#define IAR_ANT_PAD_CTRL_ANTX_HZ	BIT(2)
+#define IAR_ANT_PAD_CTRL_ANTX_EN	(3)
+
+/* MISC_PAD_CTRL bits */
+#define IAR_MISC_PAD_CTRL_MISO_HIZ_EN	BIT(3)
+#define IAR_MISC_PAD_CTRL_IRQ_B_OD	BIT(2)
+#define IAR_MISC_PAD_CTRL_NON_GPIO_DS	BIT(1)
+#define IAR_MISC_PAD_CTRL_ANTX_CURR	(1)
+
+/* ANT_AGC_CTRL bits */
+#define IAR_ANT_AGC_CTRL_FAD_EN_SHIFT	(0)
+#define IAR_ANT_AGC_CTRL_FAD_EN_MASK	(1)
+#define IAR_ANT_AGC_CTRL_ANTX_SHIFT	(1)
+#define IAR_ANT_AGC_CTRL_ANTX_MASK	BIT(AR_ANT_AGC_CTRL_ANTX_SHIFT)
+
+/* BSM_CTRL bits */
+#define BSM_CTRL_BSM_EN		(1)
+
+/* SOFT_RESET bits */
+#define IAR_SOFT_RESET_SOG_RST		BIT(7)
+#define IAR_SOFT_RESET_REGS_RST		BIT(4)
+#define IAR_SOFT_RESET_PLL_RST		BIT(3)
+#define IAR_SOFT_RESET_TX_RST		BIT(2)
+#define IAR_SOFT_RESET_RX_RST		BIT(1)
+#define IAR_SOFT_RESET_SEQ_MGR_RST	BIT(0)
+
+/* SEQ_MGR_CTRL bits */
+#define IAR_SEQ_MGR_CTRL_SEQ_STATE_CTRL		(3)
+#define IAR_SEQ_MGR_CTRL_SEQ_STATE_CTRL_SHIFT	(6)
+#define IAR_SEQ_MGR_CTRL_NO_RX_RECYCLE		BIT(5)
+#define IAR_SEQ_MGR_CTRL_LATCH_PREAMBLE		BIT(4)
+#define IAR_SEQ_MGR_CTRL_EVENT_TMR_DO_NOT_LATCH	BIT(3)
+#define IAR_SEQ_MGR_CTRL_CLR_NEW_SEQ_INHIBIT	BIT(2)
+#define IAR_SEQ_MGR_CTRL_PSM_LOCK_DIS		BIT(1)
+#define IAR_SEQ_MGR_CTRL_PLL_ABORT_OVRD		BIT(0)
+
+/* SEQ_MGR_STS bits */
+#define IAR_SEQ_MGR_STS_TMR2_SEQ_TRIG_ARMED	BIT(7)
+#define IAR_SEQ_MGR_STS_RX_MODE			BIT(6)
+#define IAR_SEQ_MGR_STS_RX_TIMEOUT_PENDING	BIT(5)
+#define IAR_SEQ_MGR_STS_NEW_SEQ_INHIBIT		BIT(4)
+#define IAR_SEQ_MGR_STS_SEQ_IDLE		BIT(3)
+#define IAR_SEQ_MGR_STS_XCVSEQ_ACTUAL		(7)
+
+/* ABORT_STS bits */
+#define IAR_ABORT_STS_PLL_ABORTED	BIT(2)
+#define IAR_ABORT_STS_TC3_ABORTED	BIT(1)
+#define IAR_ABORT_STS_SW_ABORTED	BIT(0)
+
+/* IAR_FILTERFAIL_CODE2 bits */
+#define IAR_FILTERFAIL_CODE2_PAN_SEL	BIT(7)
+#define IAR_FILTERFAIL_CODE2_9_8	(3)
+
+/* PHY_STS bits */
+#define IAR_PHY_STS_PLL_UNLOCK		BIT(7)
+#define IAR_PHY_STS_PLL_LOCK_ERR	BIT(6)
+#define IAR_PHY_STS_PLL_LOCK		BIT(5)
+#define IAR_PHY_STS_CRCVALID		BIT(3)
+#define IAR_PHY_STS_FILTERFAIL_FLAG_SEL	BIT(2)
+#define IAR_PHY_STS_SFD_DET		BIT(1)
+#define IAR_PHY_STS_PREAMBLE_DET	BIT(0)
+
+/* TESTMODE_CTRL bits */
+#define IAR_TEST_MODE_CTRL_HOT_ANT		BIT(4)
+#define IAR_TEST_MODE_CTRL_IDEAL_RSSI_EN	BIT(3)
+#define IAR_TEST_MODE_CTRL_IDEAL_PFC_EN		BIT(2)
+#define IAR_TEST_MODE_CTRL_CONTINUOUS_EN	BIT(1)
+#define IAR_TEST_MODE_CTRL_FPGA_EN		BIT(0)
+
+/* DTM_CTRL1 bits */
+#define IAR_DTM_CTRL1_ATM_LOCKED	BIT(7)
+#define IAR_DTM_CTRL1_DTM_EN		BIT(6)
+#define IAR_DTM_CTRL1_PAGE5		BIT(5)
+#define IAR_DTM_CTRL1_PAGE4		BIT(4)
+#define IAR_DTM_CTRL1_PAGE3		BIT(3)
+#define IAR_DTM_CTRL1_PAGE2		BIT(2)
+#define IAR_DTM_CTRL1_PAGE1		BIT(1)
+#define IAR_DTM_CTRL1_PAGE0		BIT(0)
+
+/* TX_MODE_CTRL */
+#define IAR_TX_MODE_CTRL_TX_INV		BIT(4)
+#define IAR_TX_MODE_CTRL_BT_EN		BIT(3)
+#define IAR_TX_MODE_CTRL_DTS2		BIT(2)
+#define IAR_TX_MODE_CTRL_DTS1		BIT(1)
+#define IAR_TX_MODE_CTRL_DTS0		BIT(0)
+
+#define TX_MODE_CTRL_DTS_MASK	(7)
+
+#endif /* _MCR20A_H */
diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h
index 5166575a164d..adb826f55e60 100644
--- a/drivers/net/ipvlan/ipvlan.h
+++ b/drivers/net/ipvlan/ipvlan.h
@@ -74,6 +74,7 @@ struct ipvl_dev {
 	DECLARE_BITMAP(mac_filters, IPVLAN_MAC_FILTER_SIZE);
 	netdev_features_t	sfeatures;
 	u32			msg_enable;
+	spinlock_t		addrs_lock;
 };
 
 struct ipvl_addr {
@@ -176,4 +177,10 @@ int ipvlan_link_new(struct net *src_net, struct net_device *dev,
 void ipvlan_link_delete(struct net_device *dev, struct list_head *head);
 void ipvlan_link_setup(struct net_device *dev);
 int ipvlan_link_register(struct rtnl_link_ops *ops);
+
+static inline bool netif_is_ipvlan_port(const struct net_device *dev)
+{
+	return rcu_access_pointer(dev->rx_handler) == ipvlan_handle_frame;
+}
+
 #endif /* __IPVLAN_H */
diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index c1f008fe4e1d..1a8132eb2a3e 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -35,6 +35,7 @@ void ipvlan_count_rx(const struct ipvl_dev *ipvlan,
 }
 EXPORT_SYMBOL_GPL(ipvlan_count_rx);
 
+#if IS_ENABLED(CONFIG_IPV6)
 static u8 ipvlan_get_v6_hash(const void *iaddr)
 {
 	const struct in6_addr *ip6_addr = iaddr;
@@ -42,6 +43,12 @@ static u8 ipvlan_get_v6_hash(const void *iaddr)
 	return __ipv6_addr_jhash(ip6_addr, ipvlan_jhash_secret) &
 	       IPVLAN_HASH_MASK;
 }
+#else
+static u8 ipvlan_get_v6_hash(const void *iaddr)
+{
+	return 0;
+}
+#endif
 
 static u8 ipvlan_get_v4_hash(const void *iaddr)
 {
@@ -51,6 +58,23 @@ static u8 ipvlan_get_v4_hash(const void *iaddr)
 	       IPVLAN_HASH_MASK;
 }
 
+static bool addr_equal(bool is_v6, struct ipvl_addr *addr, const void *iaddr)
+{
+	if (!is_v6 && addr->atype == IPVL_IPV4) {
+		struct in_addr *i4addr = (struct in_addr *)iaddr;
+
+		return addr->ip4addr.s_addr == i4addr->s_addr;
+#if IS_ENABLED(CONFIG_IPV6)
+	} else if (is_v6 && addr->atype == IPVL_IPV6) {
+		struct in6_addr *i6addr = (struct in6_addr *)iaddr;
+
+		return ipv6_addr_equal(&addr->ip6addr, i6addr);
+#endif
+	}
+
+	return false;
+}
+
 static struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port,
 					       const void *iaddr, bool is_v6)
 {
@@ -59,15 +83,9 @@ static struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port,
 
 	hash = is_v6 ? ipvlan_get_v6_hash(iaddr) :
 	       ipvlan_get_v4_hash(iaddr);
-	hlist_for_each_entry_rcu(addr, &port->hlhead[hash], hlnode) {
-		if (is_v6 && addr->atype == IPVL_IPV6 &&
-		    ipv6_addr_equal(&addr->ip6addr, iaddr))
-			return addr;
-		else if (!is_v6 && addr->atype == IPVL_IPV4 &&
-			 addr->ip4addr.s_addr ==
-				((struct in_addr *)iaddr)->s_addr)
+	hlist_for_each_entry_rcu(addr, &port->hlhead[hash], hlnode)
+		if (addr_equal(is_v6, addr, iaddr))
 			return addr;
-	}
 	return NULL;
 }
 
@@ -91,29 +109,33 @@ void ipvlan_ht_addr_del(struct ipvl_addr *addr)
 struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan,
 				   const void *iaddr, bool is_v6)
 {
-	struct ipvl_addr *addr;
+	struct ipvl_addr *addr, *ret = NULL;
 
-	list_for_each_entry(addr, &ipvlan->addrs, anode) {
-		if ((is_v6 && addr->atype == IPVL_IPV6 &&
-		    ipv6_addr_equal(&addr->ip6addr, iaddr)) ||
-		    (!is_v6 && addr->atype == IPVL_IPV4 &&
-		    addr->ip4addr.s_addr == ((struct in_addr *)iaddr)->s_addr))
-			return addr;
+	rcu_read_lock();
+	list_for_each_entry_rcu(addr, &ipvlan->addrs, anode) {
+		if (addr_equal(is_v6, addr, iaddr)) {
+			ret = addr;
+			break;
+		}
 	}
-	return NULL;
+	rcu_read_unlock();
+	return ret;
 }
 
 bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6)
 {
 	struct ipvl_dev *ipvlan;
+	bool ret = false;
 
-	ASSERT_RTNL();
-
-	list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
-		if (ipvlan_find_addr(ipvlan, iaddr, is_v6))
-			return true;
+	rcu_read_lock();
+	list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) {
+		if (ipvlan_find_addr(ipvlan, iaddr, is_v6)) {
+			ret = true;
+			break;
+		}
 	}
-	return false;
+	rcu_read_unlock();
+	return ret;
 }
 
 static void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int *type)
@@ -150,6 +172,7 @@ static void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int
 		lyr3h = ip4h;
 		break;
 	}
+#if IS_ENABLED(CONFIG_IPV6)
 	case htons(ETH_P_IPV6): {
 		struct ipv6hdr *ip6h;
 
@@ -188,6 +211,7 @@ static void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int
 		}
 		break;
 	}
+#endif
 	default:
 		return NULL;
 	}
@@ -337,14 +361,18 @@ static struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port,
 {
 	struct ipvl_addr *addr = NULL;
 
-	if (addr_type == IPVL_IPV6) {
+	switch (addr_type) {
+#if IS_ENABLED(CONFIG_IPV6)
+	case IPVL_IPV6: {
 		struct ipv6hdr *ip6h;
 		struct in6_addr *i6addr;
 
 		ip6h = (struct ipv6hdr *)lyr3h;
 		i6addr = use_dest ? &ip6h->daddr : &ip6h->saddr;
 		addr = ipvlan_ht_addr_lookup(port, i6addr, true);
-	} else if (addr_type == IPVL_ICMPV6) {
+		break;
+	}
+	case IPVL_ICMPV6: {
 		struct nd_msg *ndmh;
 		struct in6_addr *i6addr;
 
@@ -356,14 +384,19 @@ static struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port,
 			i6addr = &ndmh->target;
 			addr = ipvlan_ht_addr_lookup(port, i6addr, true);
 		}
-	} else if (addr_type == IPVL_IPV4) {
+		break;
+	}
+#endif
+	case IPVL_IPV4: {
 		struct iphdr *ip4h;
 		__be32 *i4addr;
 
 		ip4h = (struct iphdr *)lyr3h;
 		i4addr = use_dest ? &ip4h->daddr : &ip4h->saddr;
 		addr = ipvlan_ht_addr_lookup(port, i4addr, false);
-	} else if (addr_type == IPVL_ARP) {
+		break;
+	}
+	case IPVL_ARP: {
 		struct arphdr *arph;
 		unsigned char *arp_ptr;
 		__be32 dip;
@@ -377,6 +410,8 @@ static struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port,
 
 		memcpy(&dip, arp_ptr, 4);
 		addr = ipvlan_ht_addr_lookup(port, &dip, false);
+		break;
+	}
 	}
 
 	return addr;
@@ -420,6 +455,7 @@ out:
 	return ret;
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
 static int ipvlan_process_v6_outbound(struct sk_buff *skb)
 {
 	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
@@ -456,6 +492,12 @@ err:
 out:
 	return ret;
 }
+#else
+static int ipvlan_process_v6_outbound(struct sk_buff *skb)
+{
+	return NET_XMIT_DROP;
+}
+#endif
 
 static int ipvlan_process_outbound(struct sk_buff *skb)
 {
@@ -464,8 +506,8 @@ static int ipvlan_process_outbound(struct sk_buff *skb)
 
 	/* In this mode we dont care about multicast and broadcast traffic */
 	if (is_multicast_ether_addr(ethh->h_dest)) {
-		pr_warn_ratelimited("Dropped {multi|broad}cast of type= [%x]\n",
-				    ntohs(skb->protocol));
+		pr_debug_ratelimited("Dropped {multi|broad}cast of type=[%x]\n",
+				     ntohs(skb->protocol));
 		kfree_skb(skb);
 		goto out;
 	}
@@ -759,6 +801,7 @@ struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb,
 			goto out;
 		break;
 	}
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
 	{
 		struct dst_entry *dst;
@@ -774,10 +817,12 @@ struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb,
 		};
 
 		skb_dst_drop(skb);
-		dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6, flags);
+		dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6,
+					     skb, flags);
 		skb_dst_set(skb, dst);
 		break;
 	}
+#endif
 	default:
 		break;
 	}
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 2469df118fbf..743d37fb034a 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -22,12 +22,14 @@ static const struct nf_hook_ops ipvl_nfops[] = {
 		.hooknum  = NF_INET_LOCAL_IN,
 		.priority = INT_MAX,
 	},
+#if IS_ENABLED(CONFIG_IPV6)
 	{
 		.hook     = ipvlan_nf_input,
 		.pf       = NFPROTO_IPV6,
 		.hooknum  = NF_INET_LOCAL_IN,
 		.priority = INT_MAX,
 	},
+#endif
 };
 
 static const struct l3mdev_ops ipvl_l3mdev_ops = {
@@ -127,7 +129,6 @@ static int ipvlan_port_create(struct net_device *dev)
 	if (err)
 		goto err;
 
-	dev->priv_flags |= IFF_IPVLAN_MASTER;
 	return 0;
 
 err:
@@ -140,7 +141,6 @@ static void ipvlan_port_destroy(struct net_device *dev)
 	struct ipvl_port *port = ipvlan_port_get_rtnl(dev);
 	struct sk_buff *skb;
 
-	dev->priv_flags &= ~IFF_IPVLAN_MASTER;
 	if (port->mode == IPVLAN_MODE_L3S) {
 		dev->priv_flags &= ~IFF_L3MDEV_MASTER;
 		ipvlan_unregister_nf_hook(dev_net(dev));
@@ -176,7 +176,7 @@ static int ipvlan_init(struct net_device *dev)
 	dev->state = (dev->state & ~IPVLAN_STATE_MASK) |
 		     (phy_dev->state & IPVLAN_STATE_MASK);
 	dev->features = phy_dev->features & IPVLAN_FEATURES;
-	dev->features |= NETIF_F_LLTX;
+	dev->features |= NETIF_F_LLTX | NETIF_F_VLAN_CHALLENGED;
 	dev->gso_max_size = phy_dev->gso_max_size;
 	dev->gso_max_segs = phy_dev->gso_max_segs;
 	dev->hard_header_len = phy_dev->hard_header_len;
@@ -225,8 +225,10 @@ static int ipvlan_open(struct net_device *dev)
 	else
 		dev->flags &= ~IFF_NOARP;
 
-	list_for_each_entry(addr, &ipvlan->addrs, anode)
+	rcu_read_lock();
+	list_for_each_entry_rcu(addr, &ipvlan->addrs, anode)
 		ipvlan_ht_addr_add(ipvlan, addr);
+	rcu_read_unlock();
 
 	return dev_uc_add(phy_dev, phy_dev->dev_addr);
 }
@@ -242,8 +244,10 @@ static int ipvlan_stop(struct net_device *dev)
 
 	dev_uc_del(phy_dev, phy_dev->dev_addr);
 
-	list_for_each_entry(addr, &ipvlan->addrs, anode)
+	rcu_read_lock();
+	list_for_each_entry_rcu(addr, &ipvlan->addrs, anode)
 		ipvlan_ht_addr_del(addr);
+	rcu_read_unlock();
 
 	return 0;
 }
@@ -417,6 +421,12 @@ static const struct header_ops ipvlan_header_ops = {
 	.cache_update	= eth_header_cache_update,
 };
 
+static bool netif_is_ipvlan(const struct net_device *dev)
+{
+	/* both ipvlan and ipvtap devices use the same netdev_ops */
+	return dev->netdev_ops == &ipvlan_netdev_ops;
+}
+
 static int ipvlan_ethtool_get_link_ksettings(struct net_device *dev,
 					     struct ethtool_link_ksettings *cmd)
 {
@@ -586,6 +596,7 @@ int ipvlan_link_new(struct net *src_net, struct net_device *dev,
 	ipvlan->sfeatures = IPVLAN_FEATURES;
 	ipvlan_adjust_mtu(ipvlan, phy_dev);
 	INIT_LIST_HEAD(&ipvlan->addrs);
+	spin_lock_init(&ipvlan->addrs_lock);
 
 	/* TODO Probably put random address here to be presented to the
 	 * world but keep using the physical-dev address for the outgoing
@@ -593,7 +604,7 @@ int ipvlan_link_new(struct net *src_net, struct net_device *dev,
 	 */
 	memcpy(dev->dev_addr, phy_dev->dev_addr, ETH_ALEN);
 
-	dev->priv_flags |= IFF_IPVLAN_SLAVE;
+	dev->priv_flags |= IFF_NO_RX_HANDLER;
 
 	err = register_netdevice(dev);
 	if (err < 0)
@@ -663,11 +674,13 @@ void ipvlan_link_delete(struct net_device *dev, struct list_head *head)
 	struct ipvl_dev *ipvlan = netdev_priv(dev);
 	struct ipvl_addr *addr, *next;
 
+	spin_lock_bh(&ipvlan->addrs_lock);
 	list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) {
 		ipvlan_ht_addr_del(addr);
-		list_del(&addr->anode);
+		list_del_rcu(&addr->anode);
 		kfree_rcu(addr, rcu);
 	}
+	spin_unlock_bh(&ipvlan->addrs_lock);
 
 	ida_simple_remove(&ipvlan->port->ida, dev->dev_id);
 	list_del_rcu(&ipvlan->pnode);
@@ -758,8 +771,7 @@ static int ipvlan_device_event(struct notifier_block *unused,
 		if (dev->reg_state != NETREG_UNREGISTERING)
 			break;
 
-		list_for_each_entry_safe(ipvlan, next, &port->ipvlans,
-					 pnode)
+		list_for_each_entry_safe(ipvlan, next, &port->ipvlans, pnode)
 			ipvlan->dev->rtnl_link_ops->dellink(ipvlan->dev,
 							    &lst_kill);
 		unregister_netdevice_many(&lst_kill);
@@ -791,6 +803,7 @@ static int ipvlan_device_event(struct notifier_block *unused,
 	return NOTIFY_DONE;
 }
 
+/* the caller must held the addrs lock */
 static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
 {
 	struct ipvl_addr *addr;
@@ -800,14 +813,17 @@ static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
 		return -ENOMEM;
 
 	addr->master = ipvlan;
-	if (is_v6) {
-		memcpy(&addr->ip6addr, iaddr, sizeof(struct in6_addr));
-		addr->atype = IPVL_IPV6;
-	} else {
+	if (!is_v6) {
 		memcpy(&addr->ip4addr, iaddr, sizeof(struct in_addr));
 		addr->atype = IPVL_IPV4;
+#if IS_ENABLED(CONFIG_IPV6)
+	} else {
+		memcpy(&addr->ip6addr, iaddr, sizeof(struct in6_addr));
+		addr->atype = IPVL_IPV6;
+#endif
 	}
-	list_add_tail(&addr->anode, &ipvlan->addrs);
+
+	list_add_tail_rcu(&addr->anode, &ipvlan->addrs);
 
 	/* If the interface is not up, the address will be added to the hash
 	 * list by ipvlan_open.
@@ -822,32 +838,17 @@ static void ipvlan_del_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
 {
 	struct ipvl_addr *addr;
 
+	spin_lock_bh(&ipvlan->addrs_lock);
 	addr = ipvlan_find_addr(ipvlan, iaddr, is_v6);
-	if (!addr)
+	if (!addr) {
+		spin_unlock_bh(&ipvlan->addrs_lock);
 		return;
+	}
 
 	ipvlan_ht_addr_del(addr);
-	list_del(&addr->anode);
+	list_del_rcu(&addr->anode);
+	spin_unlock_bh(&ipvlan->addrs_lock);
 	kfree_rcu(addr, rcu);
-
-	return;
-}
-
-static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
-{
-	if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true)) {
-		netif_err(ipvlan, ifup, ipvlan->dev,
-			  "Failed to add IPv6=%pI6c addr for %s intf\n",
-			  ip6_addr, ipvlan->dev->name);
-		return -EINVAL;
-	}
-
-	return ipvlan_add_addr(ipvlan, ip6_addr, true);
-}
-
-static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
-{
-	return ipvlan_del_addr(ipvlan, ip6_addr, true);
 }
 
 static bool ipvlan_is_valid_dev(const struct net_device *dev)
@@ -863,6 +864,27 @@ static bool ipvlan_is_valid_dev(const struct net_device *dev)
 	return true;
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
+static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
+{
+	int ret = -EINVAL;
+
+	spin_lock_bh(&ipvlan->addrs_lock);
+	if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true))
+		netif_err(ipvlan, ifup, ipvlan->dev,
+			  "Failed to add IPv6=%pI6c addr for %s intf\n",
+			  ip6_addr, ipvlan->dev->name);
+	else
+		ret = ipvlan_add_addr(ipvlan, ip6_addr, true);
+	spin_unlock_bh(&ipvlan->addrs_lock);
+	return ret;
+}
+
+static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
+{
+	return ipvlan_del_addr(ipvlan, ip6_addr, true);
+}
+
 static int ipvlan_addr6_event(struct notifier_block *unused,
 			      unsigned long event, void *ptr)
 {
@@ -894,10 +916,6 @@ static int ipvlan_addr6_validator_event(struct notifier_block *unused,
 	struct net_device *dev = (struct net_device *)i6vi->i6vi_dev->dev;
 	struct ipvl_dev *ipvlan = netdev_priv(dev);
 
-	/* FIXME IPv6 autoconf calls us from bh without RTNL */
-	if (in_softirq())
-		return NOTIFY_DONE;
-
 	if (!ipvlan_is_valid_dev(dev))
 		return NOTIFY_DONE;
 
@@ -913,17 +931,21 @@ static int ipvlan_addr6_validator_event(struct notifier_block *unused,
 
 	return NOTIFY_OK;
 }
+#endif
 
 static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
 {
-	if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false)) {
+	int ret = -EINVAL;
+
+	spin_lock_bh(&ipvlan->addrs_lock);
+	if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false))
 		netif_err(ipvlan, ifup, ipvlan->dev,
 			  "Failed to add IPv4=%pI4 on %s intf.\n",
 			  ip4_addr, ipvlan->dev->name);
-		return -EINVAL;
-	}
-
-	return ipvlan_add_addr(ipvlan, ip4_addr, false);
+	else
+		ret = ipvlan_add_addr(ipvlan, ip4_addr, false);
+	spin_unlock_bh(&ipvlan->addrs_lock);
+	return ret;
 }
 
 static void ipvlan_del_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
@@ -993,6 +1015,7 @@ static struct notifier_block ipvlan_notifier_block __read_mostly = {
 	.notifier_call = ipvlan_device_event,
 };
 
+#if IS_ENABLED(CONFIG_IPV6)
 static struct notifier_block ipvlan_addr6_notifier_block __read_mostly = {
 	.notifier_call = ipvlan_addr6_event,
 };
@@ -1000,6 +1023,7 @@ static struct notifier_block ipvlan_addr6_notifier_block __read_mostly = {
 static struct notifier_block ipvlan_addr6_vtor_notifier_block __read_mostly = {
 	.notifier_call = ipvlan_addr6_validator_event,
 };
+#endif
 
 static void ipvlan_ns_exit(struct net *net)
 {
@@ -1016,6 +1040,7 @@ static struct pernet_operations ipvlan_net_ops = {
 	.id = &ipvlan_netid,
 	.size = sizeof(struct ipvlan_netns),
 	.exit = ipvlan_ns_exit,
+	.async = true,
 };
 
 static int __init ipvlan_init_module(void)
@@ -1024,9 +1049,11 @@ static int __init ipvlan_init_module(void)
 
 	ipvlan_init_secret();
 	register_netdevice_notifier(&ipvlan_notifier_block);
+#if IS_ENABLED(CONFIG_IPV6)
 	register_inet6addr_notifier(&ipvlan_addr6_notifier_block);
 	register_inet6addr_validator_notifier(
 	    &ipvlan_addr6_vtor_notifier_block);
+#endif
 	register_inetaddr_notifier(&ipvlan_addr4_notifier_block);
 	register_inetaddr_validator_notifier(&ipvlan_addr4_vtor_notifier_block);
 
@@ -1045,9 +1072,11 @@ error:
 	unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block);
 	unregister_inetaddr_validator_notifier(
 	    &ipvlan_addr4_vtor_notifier_block);
+#if IS_ENABLED(CONFIG_IPV6)
 	unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block);
 	unregister_inet6addr_validator_notifier(
 	    &ipvlan_addr6_vtor_notifier_block);
+#endif
 	unregister_netdevice_notifier(&ipvlan_notifier_block);
 	return err;
 }
@@ -1060,9 +1089,11 @@ static void __exit ipvlan_cleanup_module(void)
 	unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block);
 	unregister_inetaddr_validator_notifier(
 	    &ipvlan_addr4_vtor_notifier_block);
+#if IS_ENABLED(CONFIG_IPV6)
 	unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block);
 	unregister_inet6addr_validator_notifier(
 	    &ipvlan_addr6_vtor_notifier_block);
+#endif
 }
 
 module_init(ipvlan_init_module);
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 30612497643c..b97a907ea5aa 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -230,4 +230,5 @@ out:
 /* Registered in net/core/dev.c */
 struct pernet_operations __net_initdata loopback_net_ops = {
 	.init = loopback_net_init,
+	.async = true,
 };
diff --git a/drivers/net/phy/aquantia.c b/drivers/net/phy/aquantia.c
index e8ae50e1255e..319edc9c8ec7 100644
--- a/drivers/net/phy/aquantia.c
+++ b/drivers/net/phy/aquantia.c
@@ -38,14 +38,6 @@ static int aquantia_config_aneg(struct phy_device *phydev)
 	return 0;
 }
 
-static int aquantia_aneg_done(struct phy_device *phydev)
-{
-	int reg;
-
-	reg = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_STAT1);
-	return (reg < 0) ? reg : (reg & BMSR_ANEGCOMPLETE);
-}
-
 static int aquantia_config_intr(struct phy_device *phydev)
 {
 	int err;
@@ -125,7 +117,7 @@ static struct phy_driver aquantia_driver[] = {
 	.name		= "Aquantia AQ1202",
 	.features	= PHY_AQUANTIA_FEATURES,
 	.flags		= PHY_HAS_INTERRUPT,
-	.aneg_done	= aquantia_aneg_done,
+	.aneg_done	= genphy_c45_aneg_done,
 	.config_aneg    = aquantia_config_aneg,
 	.config_intr	= aquantia_config_intr,
 	.ack_interrupt	= aquantia_ack_interrupt,
@@ -137,7 +129,7 @@ static struct phy_driver aquantia_driver[] = {
 	.name		= "Aquantia AQ2104",
 	.features	= PHY_AQUANTIA_FEATURES,
 	.flags		= PHY_HAS_INTERRUPT,
-	.aneg_done	= aquantia_aneg_done,
+	.aneg_done	= genphy_c45_aneg_done,
 	.config_aneg    = aquantia_config_aneg,
 	.config_intr	= aquantia_config_intr,
 	.ack_interrupt	= aquantia_ack_interrupt,
@@ -149,7 +141,7 @@ static struct phy_driver aquantia_driver[] = {
 	.name		= "Aquantia AQR105",
 	.features	= PHY_AQUANTIA_FEATURES,
 	.flags		= PHY_HAS_INTERRUPT,
-	.aneg_done	= aquantia_aneg_done,
+	.aneg_done	= genphy_c45_aneg_done,
 	.config_aneg    = aquantia_config_aneg,
 	.config_intr	= aquantia_config_intr,
 	.ack_interrupt	= aquantia_ack_interrupt,
@@ -161,7 +153,7 @@ static struct phy_driver aquantia_driver[] = {
 	.name		= "Aquantia AQR106",
 	.features	= PHY_AQUANTIA_FEATURES,
 	.flags		= PHY_HAS_INTERRUPT,
-	.aneg_done	= aquantia_aneg_done,
+	.aneg_done	= genphy_c45_aneg_done,
 	.config_aneg    = aquantia_config_aneg,
 	.config_intr	= aquantia_config_intr,
 	.ack_interrupt	= aquantia_ack_interrupt,
@@ -173,7 +165,7 @@ static struct phy_driver aquantia_driver[] = {
 	.name		= "Aquantia AQR107",
 	.features	= PHY_AQUANTIA_FEATURES,
 	.flags		= PHY_HAS_INTERRUPT,
-	.aneg_done	= aquantia_aneg_done,
+	.aneg_done	= genphy_c45_aneg_done,
 	.config_aneg    = aquantia_config_aneg,
 	.config_intr	= aquantia_config_intr,
 	.ack_interrupt	= aquantia_ack_interrupt,
@@ -185,7 +177,7 @@ static struct phy_driver aquantia_driver[] = {
 	.name		= "Aquantia AQR405",
 	.features	= PHY_AQUANTIA_FEATURES,
 	.flags		= PHY_HAS_INTERRUPT,
-	.aneg_done	= aquantia_aneg_done,
+	.aneg_done	= genphy_c45_aneg_done,
 	.config_aneg    = aquantia_config_aneg,
 	.config_intr	= aquantia_config_intr,
 	.ack_interrupt	= aquantia_ack_interrupt,
diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c
index 421feb8f92fe..29b1c88b55cc 100644
--- a/drivers/net/phy/bcm7xxx.c
+++ b/drivers/net/phy/bcm7xxx.c
@@ -565,7 +565,7 @@ static int bcm7xxx_28nm_set_tunable(struct phy_device *phydev,
 	if (ret)
 		return ret;
 
-	/* Disable EEE advertisment since this prevents the PHY
+	/* Disable EEE advertisement since this prevents the PHY
 	 * from successfully linking up, trigger auto-negotiation restart
 	 * to let the MAC decide what to do.
 	 */
diff --git a/drivers/net/phy/cortina.c b/drivers/net/phy/cortina.c
index 9442db221834..8022cd317f62 100644
--- a/drivers/net/phy/cortina.c
+++ b/drivers/net/phy/cortina.c
@@ -30,14 +30,6 @@ static int cortina_read_reg(struct phy_device *phydev, u16 regnum)
 			    MII_ADDR_C45 | regnum);
 }
 
-static int cortina_config_aneg(struct phy_device *phydev)
-{
-	phydev->supported = SUPPORTED_10000baseT_Full;
-	phydev->advertising = SUPPORTED_10000baseT_Full;
-
-	return 0;
-}
-
 static int cortina_read_status(struct phy_device *phydev)
 {
 	int gpio_int_status, ret = 0;
@@ -61,11 +53,6 @@ err:
 	return ret;
 }
 
-static int cortina_soft_reset(struct phy_device *phydev)
-{
-	return 0;
-}
-
 static int cortina_probe(struct phy_device *phydev)
 {
 	u32 phy_id = 0;
@@ -101,9 +88,10 @@ static struct phy_driver cortina_driver[] = {
 	.phy_id		= PHY_ID_CS4340,
 	.phy_id_mask	= 0xffffffff,
 	.name		= "Cortina CS4340",
-	.config_aneg	= cortina_config_aneg,
+	.config_init	= gen10g_config_init,
+	.config_aneg	= gen10g_config_aneg,
 	.read_status	= cortina_read_status,
-	.soft_reset	= cortina_soft_reset,
+	.soft_reset	= gen10g_no_soft_reset,
 	.probe		= cortina_probe,
 },
 };
diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c
index ab58224f897f..b3935778b19f 100644
--- a/drivers/net/phy/dp83867.c
+++ b/drivers/net/phy/dp83867.c
@@ -75,6 +75,8 @@
 
 #define DP83867_IO_MUX_CFG_IO_IMPEDANCE_MAX	0x0
 #define DP83867_IO_MUX_CFG_IO_IMPEDANCE_MIN	0x1f
+#define DP83867_IO_MUX_CFG_CLK_O_SEL_MASK	(0x1f << 8)
+#define DP83867_IO_MUX_CFG_CLK_O_SEL_SHIFT	8
 
 /* CFG4 bits */
 #define DP83867_CFG4_PORT_MIRROR_EN              BIT(0)
@@ -92,6 +94,7 @@ struct dp83867_private {
 	int io_impedance;
 	int port_mirroring;
 	bool rxctrl_strap_quirk;
+	int clk_output_sel;
 };
 
 static int dp83867_ack_interrupt(struct phy_device *phydev)
@@ -160,6 +163,14 @@ static int dp83867_of_init(struct phy_device *phydev)
 	dp83867->io_impedance = -EINVAL;
 
 	/* Optional configuration */
+	ret = of_property_read_u32(of_node, "ti,clk-output-sel",
+				   &dp83867->clk_output_sel);
+	if (ret || dp83867->clk_output_sel > DP83867_CLK_O_SEL_REF_CLK)
+		/* Keep the default value if ti,clk-output-sel is not set
+		 * or too high
+		 */
+		dp83867->clk_output_sel = DP83867_CLK_O_SEL_REF_CLK;
+
 	if (of_property_read_bool(of_node, "ti,max-output-impedance"))
 		dp83867->io_impedance = DP83867_IO_MUX_CFG_IO_IMPEDANCE_MAX;
 	else if (of_property_read_bool(of_node, "ti,min-output-impedance"))
@@ -295,6 +306,14 @@ static int dp83867_config_init(struct phy_device *phydev)
 	if (dp83867->port_mirroring != DP83867_PORT_MIRROING_KEEP)
 		dp83867_config_port_mirroring(phydev);
 
+	/* Clock output selection if muxing property is set */
+	if (dp83867->clk_output_sel != DP83867_CLK_O_SEL_REF_CLK) {
+		val = phy_read_mmd(phydev, DP83867_DEVADDR, DP83867_IO_MUX_CFG);
+		val &= ~DP83867_IO_MUX_CFG_CLK_O_SEL_MASK;
+		val |= (dp83867->clk_output_sel << DP83867_IO_MUX_CFG_CLK_O_SEL_SHIFT);
+		phy_write_mmd(phydev, DP83867_DEVADDR, DP83867_IO_MUX_CFG, val);
+	}
+
 	return 0;
 }
 
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 0e0978d8a0eb..a75c511950c3 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -860,7 +860,7 @@ static int m88e1510_config_init(struct phy_device *phydev)
 			return err;
 
 		/* There appears to be a bug in the 88e1512 when used in
-		 * SGMII to copper mode, where the AN advertisment register
+		 * SGMII to copper mode, where the AN advertisement register
 		 * clears the pause bits each time a negotiation occurs.
 		 * This means we can never be truely sure what was advertised,
 		 * so disable Pause support.
diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c
index 8a0bd98fdec7..9564916d2d7b 100644
--- a/drivers/net/phy/marvell10g.c
+++ b/drivers/net/phy/marvell10g.c
@@ -71,15 +71,6 @@ static int mv3310_probe(struct phy_device *phydev)
 	return 0;
 }
 
-/*
- * Resetting the MV88X3310 causes it to become non-responsive.  Avoid
- * setting the reset bit(s).
- */
-static int mv3310_soft_reset(struct phy_device *phydev)
-{
-	return 0;
-}
-
 static int mv3310_config_init(struct phy_device *phydev)
 {
 	__ETHTOOL_DECLARE_LINK_MODE_MASK(supported) = { 0, };
@@ -317,7 +308,7 @@ static int mv3310_read_status(struct phy_device *phydev)
 		if (val < 0)
 			return val;
 
-		/* Read the link partner's 1G advertisment */
+		/* Read the link partner's 1G advertisement */
 		val = phy_read_mmd(phydev, MDIO_MMD_AN, MV_AN_STAT1000);
 		if (val < 0)
 			return val;
@@ -377,7 +368,7 @@ static struct phy_driver mv3310_drivers[] = {
 				  SUPPORTED_10000baseT_Full |
 				  SUPPORTED_Backplane,
 		.probe		= mv3310_probe,
-		.soft_reset	= mv3310_soft_reset,
+		.soft_reset	= gen10g_no_soft_reset,
 		.config_init	= mv3310_config_init,
 		.config_aneg	= mv3310_config_aneg,
 		.aneg_done	= mv3310_aneg_done,
diff --git a/drivers/net/phy/mdio-mux-mmioreg.c b/drivers/net/phy/mdio-mux-mmioreg.c
index 2573ab012f16..70f6115530af 100644
--- a/drivers/net/phy/mdio-mux-mmioreg.c
+++ b/drivers/net/phy/mdio-mux-mmioreg.c
@@ -163,8 +163,9 @@ static int mdio_mux_mmioreg_probe(struct platform_device *pdev)
 			    mdio_mux_mmioreg_switch_fn,
 			    &s->mux_handle, s, NULL);
 	if (ret) {
-		dev_err(&pdev->dev, "failed to register mdio-mux bus %pOF\n",
-			np);
+		if (ret != -EPROBE_DEFER)
+			dev_err(&pdev->dev,
+				"failed to register mdio-mux bus %pOF\n", np);
 		return ret;
 	}
 
diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c
index a4576859afae..e1225545362d 100644
--- a/drivers/net/phy/phy-c45.c
+++ b/drivers/net/phy/phy-c45.c
@@ -163,11 +163,11 @@ int genphy_c45_read_link(struct phy_device *phydev, u32 mmd_mask)
 EXPORT_SYMBOL_GPL(genphy_c45_read_link);
 
 /**
- * genphy_c45_read_lpa - read the link partner advertisment and pause
+ * genphy_c45_read_lpa - read the link partner advertisement and pause
  * @phydev: target phy_device struct
  *
  * Read the Clause 45 defined base (7.19) and 10G (7.33) status registers,
- * filling in the link partner advertisment, pause and asym_pause members
+ * filling in the link partner advertisement, pause and asym_pause members
  * in @phydev.  This assumes that the auto-negotiation MMD is present, and
  * the backplane bit (7.48.0) is clear.  Clause 45 PHY drivers are expected
  * to fill in the remainder of the link partner advert from vendor registers.
@@ -176,7 +176,7 @@ int genphy_c45_read_lpa(struct phy_device *phydev)
 {
 	int val;
 
-	/* Read the link partner's base page advertisment */
+	/* Read the link partner's base page advertisement */
 	val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_LPA);
 	if (val < 0)
 		return val;
@@ -185,7 +185,7 @@ int genphy_c45_read_lpa(struct phy_device *phydev)
 	phydev->pause = val & LPA_PAUSE_CAP ? 1 : 0;
 	phydev->asym_pause = val & LPA_PAUSE_ASYM ? 1 : 0;
 
-	/* Read the link partner's 10G advertisment */
+	/* Read the link partner's 10G advertisement */
 	val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_10GBT_STAT);
 	if (val < 0)
 		return val;
@@ -268,12 +268,13 @@ EXPORT_SYMBOL_GPL(genphy_c45_read_mdix);
 
 /* The gen10g_* functions are the old Clause 45 stub */
 
-static int gen10g_config_aneg(struct phy_device *phydev)
+int gen10g_config_aneg(struct phy_device *phydev)
 {
 	return 0;
 }
+EXPORT_SYMBOL_GPL(gen10g_config_aneg);
 
-static int gen10g_read_status(struct phy_device *phydev)
+int gen10g_read_status(struct phy_device *phydev)
 {
 	u32 mmd_mask = phydev->c45_ids.devices_in_package;
 	int ret;
@@ -291,14 +292,16 @@ static int gen10g_read_status(struct phy_device *phydev)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(gen10g_read_status);
 
-static int gen10g_soft_reset(struct phy_device *phydev)
+int gen10g_no_soft_reset(struct phy_device *phydev)
 {
 	/* Do nothing for now */
 	return 0;
 }
+EXPORT_SYMBOL_GPL(gen10g_no_soft_reset);
 
-static int gen10g_config_init(struct phy_device *phydev)
+int gen10g_config_init(struct phy_device *phydev)
 {
 	/* Temporarily just say we support everything */
 	phydev->supported = SUPPORTED_10000baseT_Full;
@@ -306,22 +309,25 @@ static int gen10g_config_init(struct phy_device *phydev)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(gen10g_config_init);
 
-static int gen10g_suspend(struct phy_device *phydev)
+int gen10g_suspend(struct phy_device *phydev)
 {
 	return 0;
 }
+EXPORT_SYMBOL_GPL(gen10g_suspend);
 
-static int gen10g_resume(struct phy_device *phydev)
+int gen10g_resume(struct phy_device *phydev)
 {
 	return 0;
 }
+EXPORT_SYMBOL_GPL(gen10g_resume);
 
 struct phy_driver genphy_10g_driver = {
 	.phy_id         = 0xffffffff,
 	.phy_id_mask    = 0xffffffff,
 	.name           = "Generic 10G PHY",
-	.soft_reset	= gen10g_soft_reset,
+	.soft_reset	= gen10g_no_soft_reset,
 	.config_init    = gen10g_config_init,
 	.features       = 0,
 	.config_aneg    = gen10g_config_aneg,
diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c
index 4083f00c97a5..c7da4cbb1103 100644
--- a/drivers/net/phy/phy-core.c
+++ b/drivers/net/phy/phy-core.c
@@ -190,10 +190,10 @@ size_t phy_speeds(unsigned int *speeds, size_t size,
 }
 
 /**
- * phy_resolve_aneg_linkmode - resolve the advertisments into phy settings
+ * phy_resolve_aneg_linkmode - resolve the advertisements into phy settings
  * @phydev: The phy_device struct
  *
- * Resolve our and the link partner advertisments into their corresponding
+ * Resolve our and the link partner advertisements into their corresponding
  * speed and duplex. If full duplex was negotiated, extract the pause mode
  * from the link partner mask.
  */
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 9aabfa1a455a..05c1e8ef15e6 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -628,19 +628,10 @@ static int phy_disable_interrupts(struct phy_device *phydev)
 	/* Disable PHY interrupts */
 	err = phy_config_interrupt(phydev, PHY_INTERRUPT_DISABLED);
 	if (err)
-		goto phy_err;
+		return err;
 
 	/* Clear the interrupt */
-	err = phy_clear_interrupt(phydev);
-	if (err)
-		goto phy_err;
-
-	return 0;
-
-phy_err:
-	phy_error(phydev);
-
-	return err;
+	return phy_clear_interrupt(phydev);
 }
 
 /**
@@ -773,13 +764,8 @@ void phy_stop(struct phy_device *phydev)
 	if (PHY_HALTED == phydev->state)
 		goto out_unlock;
 
-	if (phy_interrupt_is_valid(phydev)) {
-		/* Disable PHY Interrupts */
-		phy_config_interrupt(phydev, PHY_INTERRUPT_DISABLED);
-
-		/* Clear any pending interrupts */
-		phy_clear_interrupt(phydev);
-	}
+	if (phy_interrupt_is_valid(phydev))
+		phy_disable_interrupts(phydev);
 
 	phydev->state = PHY_HALTED;
 
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 74664a6c0cdc..ac23322a32e1 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -374,7 +374,7 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id,
 	dev->duplex = -1;
 	dev->pause = 0;
 	dev->asym_pause = 0;
-	dev->link = 1;
+	dev->link = 0;
 	dev->interface = PHY_INTERFACE_MODE_GMII;
 
 	dev->autoneg = AUTONEG_ENABLE;
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 6ac8b29b2dc3..51a011a349fe 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -364,7 +364,7 @@ static void phylink_get_fixed_state(struct phylink *pl, struct phylink_link_stat
 }
 
 /* Flow control is resolved according to our and the link partners
- * advertisments using the following drawn from the 802.3 specs:
+ * advertisements using the following drawn from the 802.3 specs:
  *  Local device  Link partner
  *  Pause AsymDir Pause AsymDir Result
  *    1     X       1     X     TX+RX
@@ -679,12 +679,11 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy)
 
 	mutex_lock(&phy->lock);
 	mutex_lock(&pl->state_mutex);
-	pl->netdev->phydev = phy;
 	pl->phydev = phy;
 	linkmode_copy(pl->supported, supported);
 	linkmode_copy(pl->link_config.advertising, config.advertising);
 
-	/* Restrict the phy advertisment according to the MAC support. */
+	/* Restrict the phy advertisement according to the MAC support. */
 	ethtool_convert_link_mode_to_legacy_u32(&advertising, config.advertising);
 	phy->advertising = advertising;
 	mutex_unlock(&pl->state_mutex);
@@ -817,7 +816,6 @@ void phylink_disconnect_phy(struct phylink *pl)
 	if (phy) {
 		mutex_lock(&phy->lock);
 		mutex_lock(&pl->state_mutex);
-		pl->netdev->phydev = NULL;
 		pl->phydev = NULL;
 		mutex_unlock(&pl->state_mutex);
 		mutex_unlock(&phy->lock);
@@ -889,7 +887,7 @@ void phylink_start(struct phylink *pl)
 
 	/* Apply the link configuration to the MAC when starting. This allows
 	 * a fixed-link to start with the correct parameters, and also
-	 * ensures that we set the appropriate advertisment for Serdes links.
+	 * ensures that we set the appropriate advertisement for Serdes links.
 	 */
 	phylink_resolve_flow(pl, &pl->link_config);
 	phylink_mac_config(pl, &pl->link_config);
@@ -1076,7 +1074,7 @@ int phylink_ethtool_ksettings_set(struct phylink *pl,
 
 	config = pl->link_config;
 
-	/* Mask out unsupported advertisments */
+	/* Mask out unsupported advertisements */
 	linkmode_and(config.advertising, kset->link_modes.advertising,
 		     pl->supported);
 
@@ -1121,7 +1119,7 @@ int phylink_ethtool_ksettings_set(struct phylink *pl,
 	if (phylink_validate(pl, pl->supported, &config))
 		return -EINVAL;
 
-	/* If autonegotiation is enabled, we must have an advertisment */
+	/* If autonegotiation is enabled, we must have an advertisement */
 	if (config.an_enabled && phylink_is_empty_linkmode(config.advertising))
 		return -EINVAL;
 
@@ -1584,25 +1582,14 @@ static int phylink_sfp_module_insert(void *upstream,
 	bool changed;
 	u8 port;
 
-	sfp_parse_support(pl->sfp_bus, id, support);
-	port = sfp_parse_port(pl->sfp_bus, id, support);
-	iface = sfp_parse_interface(pl->sfp_bus, id);
-
 	ASSERT_RTNL();
 
-	switch (iface) {
-	case PHY_INTERFACE_MODE_SGMII:
-	case PHY_INTERFACE_MODE_1000BASEX:
-	case PHY_INTERFACE_MODE_2500BASEX:
-	case PHY_INTERFACE_MODE_10GKR:
-		break;
-	default:
-		return -EINVAL;
-	}
+	sfp_parse_support(pl->sfp_bus, id, support);
+	port = sfp_parse_port(pl->sfp_bus, id, support);
 
 	memset(&config, 0, sizeof(config));
 	linkmode_copy(config.advertising, support);
-	config.interface = iface;
+	config.interface = PHY_INTERFACE_MODE_NA;
 	config.speed = SPEED_UNKNOWN;
 	config.duplex = DUPLEX_UNKNOWN;
 	config.pause = MLO_PAUSE_AN;
@@ -1611,6 +1598,22 @@ static int phylink_sfp_module_insert(void *upstream,
 	/* Ignore errors if we're expecting a PHY to attach later */
 	ret = phylink_validate(pl, support, &config);
 	if (ret) {
+		netdev_err(pl->netdev, "validation with support %*pb failed: %d\n",
+			   __ETHTOOL_LINK_MODE_MASK_NBITS, support, ret);
+		return ret;
+	}
+
+	iface = sfp_select_interface(pl->sfp_bus, id, config.advertising);
+	if (iface == PHY_INTERFACE_MODE_NA) {
+		netdev_err(pl->netdev,
+			   "selection of interface failed, advertisement %*pb\n",
+			   __ETHTOOL_LINK_MODE_MASK_NBITS, config.advertising);
+		return -EINVAL;
+	}
+
+	config.interface = iface;
+	ret = phylink_validate(pl, support, &config);
+	if (ret) {
 		netdev_err(pl->netdev, "validation of %s/%s with support %*pb failed: %d\n",
 			   phylink_an_mode_str(MLO_AN_INBAND),
 			   phy_modes(config.interface),
diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c
index 8961209ee949..3d4ff5d0d2a6 100644
--- a/drivers/net/phy/sfp-bus.c
+++ b/drivers/net/phy/sfp-bus.c
@@ -106,68 +106,6 @@ int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
 EXPORT_SYMBOL_GPL(sfp_parse_port);
 
 /**
- * sfp_parse_interface() - Parse the phy_interface_t
- * @bus: a pointer to the &struct sfp_bus structure for the sfp module
- * @id: a pointer to the module's &struct sfp_eeprom_id
- *
- * Derive the phy_interface_t mode for the information found in the
- * module's identifying EEPROM. There is no standard or defined way
- * to derive this information, so we use some heuristics.
- *
- * If the encoding is 64b66b, then the module must be >= 10G, so
- * return %PHY_INTERFACE_MODE_10GKR.
- *
- * If it's 8b10b, then it's 1G or slower. If it's definitely a fibre
- * module, return %PHY_INTERFACE_MODE_1000BASEX mode, otherwise return
- * %PHY_INTERFACE_MODE_SGMII mode.
- *
- * If the encoding is not known, return %PHY_INTERFACE_MODE_NA.
- */
-phy_interface_t sfp_parse_interface(struct sfp_bus *bus,
-				    const struct sfp_eeprom_id *id)
-{
-	phy_interface_t iface;
-
-	/* Setting the serdes link mode is guesswork: there's no field in
-	 * the EEPROM which indicates what mode should be used.
-	 *
-	 * If the module wants 64b66b, then it must be >= 10G.
-	 *
-	 * If it's a gigabit-only fiber module, it probably does not have
-	 * a PHY, so switch to 802.3z negotiation mode. Otherwise, switch
-	 * to SGMII mode (which is required to support non-gigabit speeds).
-	 */
-	switch (id->base.encoding) {
-	case SFP_ENCODING_8472_64B66B:
-		iface = PHY_INTERFACE_MODE_10GKR;
-		break;
-
-	case SFP_ENCODING_8B10B:
-		if (!id->base.e1000_base_t &&
-		    !id->base.e100_base_lx &&
-		    !id->base.e100_base_fx)
-			iface = PHY_INTERFACE_MODE_1000BASEX;
-		else
-			iface = PHY_INTERFACE_MODE_SGMII;
-		break;
-
-	default:
-		if (id->base.e1000_base_cx) {
-			iface = PHY_INTERFACE_MODE_1000BASEX;
-			break;
-		}
-
-		iface = PHY_INTERFACE_MODE_NA;
-		dev_err(bus->sfp_dev,
-			"SFP module encoding does not support 8b10b nor 64b66b\n");
-		break;
-	}
-
-	return iface;
-}
-EXPORT_SYMBOL_GPL(sfp_parse_interface);
-
-/**
  * sfp_parse_support() - Parse the eeprom id for supported link modes
  * @bus: a pointer to the &struct sfp_bus structure for the sfp module
  * @id: a pointer to the module's &struct sfp_eeprom_id
@@ -180,10 +118,7 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
 		       unsigned long *support)
 {
 	unsigned int br_min, br_nom, br_max;
-
-	phylink_set(support, Autoneg);
-	phylink_set(support, Pause);
-	phylink_set(support, Asym_Pause);
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(modes) = { 0, };
 
 	/* Decode the bitrate information to MBd */
 	br_min = br_nom = br_max = 0;
@@ -201,20 +136,20 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
 
 	/* Set ethtool support from the compliance fields. */
 	if (id->base.e10g_base_sr)
-		phylink_set(support, 10000baseSR_Full);
+		phylink_set(modes, 10000baseSR_Full);
 	if (id->base.e10g_base_lr)
-		phylink_set(support, 10000baseLR_Full);
+		phylink_set(modes, 10000baseLR_Full);
 	if (id->base.e10g_base_lrm)
-		phylink_set(support, 10000baseLRM_Full);
+		phylink_set(modes, 10000baseLRM_Full);
 	if (id->base.e10g_base_er)
-		phylink_set(support, 10000baseER_Full);
+		phylink_set(modes, 10000baseER_Full);
 	if (id->base.e1000_base_sx ||
 	    id->base.e1000_base_lx ||
 	    id->base.e1000_base_cx)
-		phylink_set(support, 1000baseX_Full);
+		phylink_set(modes, 1000baseX_Full);
 	if (id->base.e1000_base_t) {
-		phylink_set(support, 1000baseT_Half);
-		phylink_set(support, 1000baseT_Full);
+		phylink_set(modes, 1000baseT_Half);
+		phylink_set(modes, 1000baseT_Full);
 	}
 
 	/* 1000Base-PX or 1000Base-BX10 */
@@ -228,20 +163,20 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
 	if ((id->base.sfp_ct_passive || id->base.sfp_ct_active) && br_nom) {
 		/* This may look odd, but some manufacturers use 12000MBd */
 		if (br_min <= 12000 && br_max >= 10300)
-			phylink_set(support, 10000baseCR_Full);
+			phylink_set(modes, 10000baseCR_Full);
 		if (br_min <= 3200 && br_max >= 3100)
-			phylink_set(support, 2500baseX_Full);
+			phylink_set(modes, 2500baseX_Full);
 		if (br_min <= 1300 && br_max >= 1200)
-			phylink_set(support, 1000baseX_Full);
+			phylink_set(modes, 1000baseX_Full);
 	}
 	if (id->base.sfp_ct_passive) {
 		if (id->base.passive.sff8431_app_e)
-			phylink_set(support, 10000baseCR_Full);
+			phylink_set(modes, 10000baseCR_Full);
 	}
 	if (id->base.sfp_ct_active) {
 		if (id->base.active.sff8431_app_e ||
 		    id->base.active.sff8431_lim) {
-			phylink_set(support, 10000baseCR_Full);
+			phylink_set(modes, 10000baseCR_Full);
 		}
 	}
 
@@ -249,18 +184,18 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
 	case 0x00: /* Unspecified */
 		break;
 	case 0x02: /* 100Gbase-SR4 or 25Gbase-SR */
-		phylink_set(support, 100000baseSR4_Full);
-		phylink_set(support, 25000baseSR_Full);
+		phylink_set(modes, 100000baseSR4_Full);
+		phylink_set(modes, 25000baseSR_Full);
 		break;
 	case 0x03: /* 100Gbase-LR4 or 25Gbase-LR */
 	case 0x04: /* 100Gbase-ER4 or 25Gbase-ER */
-		phylink_set(support, 100000baseLR4_ER4_Full);
+		phylink_set(modes, 100000baseLR4_ER4_Full);
 		break;
 	case 0x0b: /* 100Gbase-CR4 or 25Gbase-CR CA-L */
 	case 0x0c: /* 25Gbase-CR CA-S */
 	case 0x0d: /* 25Gbase-CR CA-N */
-		phylink_set(support, 100000baseCR4_Full);
-		phylink_set(support, 25000baseCR_Full);
+		phylink_set(modes, 100000baseCR4_Full);
+		phylink_set(modes, 25000baseCR_Full);
 		break;
 	default:
 		dev_warn(bus->sfp_dev,
@@ -274,13 +209,70 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
 	    id->base.fc_speed_200 ||
 	    id->base.fc_speed_400) {
 		if (id->base.br_nominal >= 31)
-			phylink_set(support, 2500baseX_Full);
+			phylink_set(modes, 2500baseX_Full);
 		if (id->base.br_nominal >= 12)
-			phylink_set(support, 1000baseX_Full);
+			phylink_set(modes, 1000baseX_Full);
 	}
+
+	/* If we haven't discovered any modes that this module supports, try
+	 * the encoding and bitrate to determine supported modes. Some BiDi
+	 * modules (eg, 1310nm/1550nm) are not 1000BASE-BX compliant due to
+	 * the differing wavelengths, so do not set any transceiver bits.
+	 */
+	if (bitmap_empty(modes, __ETHTOOL_LINK_MODE_MASK_NBITS)) {
+		/* If the encoding and bit rate allows 1000baseX */
+		if (id->base.encoding == SFP_ENCODING_8B10B && br_nom &&
+		    br_min <= 1300 && br_max >= 1200)
+			phylink_set(modes, 1000baseX_Full);
+	}
+
+	bitmap_or(support, support, modes, __ETHTOOL_LINK_MODE_MASK_NBITS);
+
+	phylink_set(support, Autoneg);
+	phylink_set(support, Pause);
+	phylink_set(support, Asym_Pause);
 }
 EXPORT_SYMBOL_GPL(sfp_parse_support);
 
+/**
+ * sfp_select_interface() - Select appropriate phy_interface_t mode
+ * @bus: a pointer to the &struct sfp_bus structure for the sfp module
+ * @id: a pointer to the module's &struct sfp_eeprom_id
+ * @link_modes: ethtool link modes mask
+ *
+ * Derive the phy_interface_t mode for the information found in the
+ * module's identifying EEPROM and the link modes mask. There is no
+ * standard or defined way to derive this information, so we decide
+ * based upon the link mode mask.
+ */
+phy_interface_t sfp_select_interface(struct sfp_bus *bus,
+				     const struct sfp_eeprom_id *id,
+				     unsigned long *link_modes)
+{
+	if (phylink_test(link_modes, 10000baseCR_Full) ||
+	    phylink_test(link_modes, 10000baseSR_Full) ||
+	    phylink_test(link_modes, 10000baseLR_Full) ||
+	    phylink_test(link_modes, 10000baseLRM_Full) ||
+	    phylink_test(link_modes, 10000baseER_Full))
+		return PHY_INTERFACE_MODE_10GKR;
+
+	if (phylink_test(link_modes, 2500baseX_Full))
+		return PHY_INTERFACE_MODE_2500BASEX;
+
+	if (id->base.e1000_base_t ||
+	    id->base.e100_base_lx ||
+	    id->base.e100_base_fx)
+		return PHY_INTERFACE_MODE_SGMII;
+
+	if (phylink_test(link_modes, 1000baseX_Full))
+		return PHY_INTERFACE_MODE_1000BASEX;
+
+	dev_warn(bus->sfp_dev, "Unable to ascertain link mode\n");
+
+	return PHY_INTERFACE_MODE_NA;
+}
+EXPORT_SYMBOL_GPL(sfp_select_interface);
+
 static LIST_HEAD(sfp_buses);
 static DEFINE_MUTEX(sfp_mutex);
 
diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index 6c7d9289078d..83bf4959b043 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -42,6 +42,7 @@ enum {
 
 	SFP_MOD_EMPTY = 0,
 	SFP_MOD_PROBE,
+	SFP_MOD_HPOWER,
 	SFP_MOD_PRESENT,
 	SFP_MOD_ERROR,
 
@@ -86,6 +87,7 @@ static const enum gpiod_flags gpio_flags[] = {
  * access the I2C EEPROM.  However, Avago modules require 300ms.
  */
 #define T_PROBE_INIT	msecs_to_jiffies(300)
+#define T_HPOWER_LEVEL	msecs_to_jiffies(300)
 #define T_PROBE_RETRY	msecs_to_jiffies(100)
 
 /* SFP modules appear to always have their PHY configured for bus address
@@ -110,10 +112,12 @@ struct sfp {
 	struct sfp_bus *sfp_bus;
 	struct phy_device *mod_phy;
 	const struct sff_data *type;
+	u32 max_power_mW;
 
 	unsigned int (*get_state)(struct sfp *);
 	void (*set_state)(struct sfp *, unsigned int);
 	int (*read)(struct sfp *, bool, u8, void *, size_t);
+	int (*write)(struct sfp *, bool, u8, void *, size_t);
 
 	struct gpio_desc *gpio[GPIO_MAX];
 
@@ -201,10 +205,11 @@ static void sfp_gpio_set_state(struct sfp *sfp, unsigned int state)
 	}
 }
 
-static int sfp__i2c_read(struct i2c_adapter *i2c, u8 bus_addr, u8 dev_addr,
-			 void *buf, size_t len)
+static int sfp_i2c_read(struct sfp *sfp, bool a2, u8 dev_addr, void *buf,
+			size_t len)
 {
 	struct i2c_msg msgs[2];
+	u8 bus_addr = a2 ? 0x51 : 0x50;
 	int ret;
 
 	msgs[0].addr = bus_addr;
@@ -216,17 +221,38 @@ static int sfp__i2c_read(struct i2c_adapter *i2c, u8 bus_addr, u8 dev_addr,
 	msgs[1].len = len;
 	msgs[1].buf = buf;
 
-	ret = i2c_transfer(i2c, msgs, ARRAY_SIZE(msgs));
+	ret = i2c_transfer(sfp->i2c, msgs, ARRAY_SIZE(msgs));
 	if (ret < 0)
 		return ret;
 
 	return ret == ARRAY_SIZE(msgs) ? len : 0;
 }
 
-static int sfp_i2c_read(struct sfp *sfp, bool a2, u8 addr, void *buf,
-			size_t len)
+static int sfp_i2c_write(struct sfp *sfp, bool a2, u8 dev_addr, void *buf,
+	size_t len)
 {
-	return sfp__i2c_read(sfp->i2c, a2 ? 0x51 : 0x50, addr, buf, len);
+	struct i2c_msg msgs[1];
+	u8 bus_addr = a2 ? 0x51 : 0x50;
+	int ret;
+
+	msgs[0].addr = bus_addr;
+	msgs[0].flags = 0;
+	msgs[0].len = 1 + len;
+	msgs[0].buf = kmalloc(1 + len, GFP_KERNEL);
+	if (!msgs[0].buf)
+		return -ENOMEM;
+
+	msgs[0].buf[0] = dev_addr;
+	memcpy(&msgs[0].buf[1], buf, len);
+
+	ret = i2c_transfer(sfp->i2c, msgs, ARRAY_SIZE(msgs));
+
+	kfree(msgs[0].buf);
+
+	if (ret < 0)
+		return ret;
+
+	return ret == ARRAY_SIZE(msgs) ? len : 0;
 }
 
 static int sfp_i2c_configure(struct sfp *sfp, struct i2c_adapter *i2c)
@@ -239,6 +265,7 @@ static int sfp_i2c_configure(struct sfp *sfp, struct i2c_adapter *i2c)
 
 	sfp->i2c = i2c;
 	sfp->read = sfp_i2c_read;
+	sfp->write = sfp_i2c_write;
 
 	i2c_mii = mdio_i2c_alloc(sfp->dev, i2c);
 	if (IS_ERR(i2c_mii))
@@ -274,6 +301,11 @@ static int sfp_read(struct sfp *sfp, bool a2, u8 addr, void *buf, size_t len)
 	return sfp->read(sfp, a2, addr, buf, len);
 }
 
+static int sfp_write(struct sfp *sfp, bool a2, u8 addr, void *buf, size_t len)
+{
+	return sfp->write(sfp, a2, addr, buf, len);
+}
+
 static unsigned int sfp_check(void *buf, size_t len)
 {
 	u8 *p, check;
@@ -462,21 +494,83 @@ static void sfp_sm_mod_init(struct sfp *sfp)
 		sfp_sm_probe_phy(sfp);
 }
 
+static int sfp_sm_mod_hpower(struct sfp *sfp)
+{
+	u32 power;
+	u8 val;
+	int err;
+
+	power = 1000;
+	if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_POWER_DECL))
+		power = 1500;
+	if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_HIGH_POWER_LEVEL))
+		power = 2000;
+
+	if (sfp->id.ext.sff8472_compliance == SFP_SFF8472_COMPLIANCE_NONE &&
+	    (sfp->id.ext.diagmon & (SFP_DIAGMON_DDM | SFP_DIAGMON_ADDRMODE)) !=
+	    SFP_DIAGMON_DDM) {
+		/* The module appears not to implement bus address 0xa2,
+		 * or requires an address change sequence, so assume that
+		 * the module powers up in the indicated power mode.
+		 */
+		if (power > sfp->max_power_mW) {
+			dev_err(sfp->dev,
+				"Host does not support %u.%uW modules\n",
+				power / 1000, (power / 100) % 10);
+			return -EINVAL;
+		}
+		return 0;
+	}
+
+	if (power > sfp->max_power_mW) {
+		dev_warn(sfp->dev,
+			 "Host does not support %u.%uW modules, module left in power mode 1\n",
+			 power / 1000, (power / 100) % 10);
+		return 0;
+	}
+
+	if (power <= 1000)
+		return 0;
+
+	err = sfp_read(sfp, true, SFP_EXT_STATUS, &val, sizeof(val));
+	if (err != sizeof(val)) {
+		dev_err(sfp->dev, "Failed to read EEPROM: %d\n", err);
+		err = -EAGAIN;
+		goto err;
+	}
+
+	val |= BIT(0);
+
+	err = sfp_write(sfp, true, SFP_EXT_STATUS, &val, sizeof(val));
+	if (err != sizeof(val)) {
+		dev_err(sfp->dev, "Failed to write EEPROM: %d\n", err);
+		err = -EAGAIN;
+		goto err;
+	}
+
+	dev_info(sfp->dev, "Module switched to %u.%uW power level\n",
+		 power / 1000, (power / 100) % 10);
+	return T_HPOWER_LEVEL;
+
+err:
+	return err;
+}
+
 static int sfp_sm_mod_probe(struct sfp *sfp)
 {
 	/* SFP module inserted - read I2C data */
 	struct sfp_eeprom_id id;
 	u8 check;
-	int err;
+	int ret;
 
-	err = sfp_read(sfp, false, 0, &id, sizeof(id));
-	if (err < 0) {
-		dev_err(sfp->dev, "failed to read EEPROM: %d\n", err);
+	ret = sfp_read(sfp, false, 0, &id, sizeof(id));
+	if (ret < 0) {
+		dev_err(sfp->dev, "failed to read EEPROM: %d\n", ret);
 		return -EAGAIN;
 	}
 
-	if (err != sizeof(id)) {
-		dev_err(sfp->dev, "EEPROM short read: %d\n", err);
+	if (ret != sizeof(id)) {
+		dev_err(sfp->dev, "EEPROM short read: %d\n", ret);
 		return -EAGAIN;
 	}
 
@@ -521,7 +615,11 @@ static int sfp_sm_mod_probe(struct sfp *sfp)
 		dev_warn(sfp->dev,
 			 "module address swap to access page 0xA2 is not supported.\n");
 
-	return sfp_module_insert(sfp->sfp_bus, &sfp->id);
+	ret = sfp_module_insert(sfp->sfp_bus, &sfp->id);
+	if (ret < 0)
+		return ret;
+
+	return sfp_sm_mod_hpower(sfp);
 }
 
 static void sfp_sm_mod_remove(struct sfp *sfp)
@@ -560,17 +658,25 @@ static void sfp_sm_event(struct sfp *sfp, unsigned int event)
 		if (event == SFP_E_REMOVE) {
 			sfp_sm_ins_next(sfp, SFP_MOD_EMPTY, 0);
 		} else if (event == SFP_E_TIMEOUT) {
-			int err = sfp_sm_mod_probe(sfp);
+			int val = sfp_sm_mod_probe(sfp);
 
-			if (err == 0)
+			if (val == 0)
 				sfp_sm_ins_next(sfp, SFP_MOD_PRESENT, 0);
-			else if (err == -EAGAIN)
-				sfp_sm_set_timer(sfp, T_PROBE_RETRY);
-			else
+			else if (val > 0)
+				sfp_sm_ins_next(sfp, SFP_MOD_HPOWER, val);
+			else if (val != -EAGAIN)
 				sfp_sm_ins_next(sfp, SFP_MOD_ERROR, 0);
+			else
+				sfp_sm_set_timer(sfp, T_PROBE_RETRY);
 		}
 		break;
 
+	case SFP_MOD_HPOWER:
+		if (event == SFP_E_TIMEOUT) {
+			sfp_sm_ins_next(sfp, SFP_MOD_PRESENT, 0);
+			break;
+		}
+		/* fallthrough */
 	case SFP_MOD_PRESENT:
 	case SFP_MOD_ERROR:
 		if (event == SFP_E_REMOVE) {
@@ -889,6 +995,14 @@ static int sfp_probe(struct platform_device *pdev)
 	if (!(sfp->gpio[GPIO_MODDEF0]))
 		sfp->get_state = sff_gpio_get_state;
 
+	device_property_read_u32(&pdev->dev, "maximum-power-milliwatt",
+				 &sfp->max_power_mW);
+	if (!sfp->max_power_mW)
+		sfp->max_power_mW = 1000;
+
+	dev_info(sfp->dev, "Host maximum power %u.%uW\n",
+		 sfp->max_power_mW / 1000, (sfp->max_power_mW / 100) % 10);
+
 	sfp->sfp_bus = sfp_register_socket(sfp->dev, sfp, &sfp_module_ops);
 	if (!sfp->sfp_bus)
 		return -ENOMEM;
diff --git a/drivers/net/phy/teranetics.c b/drivers/net/phy/teranetics.c
index fb2cef764e9a..22f3bdd8206c 100644
--- a/drivers/net/phy/teranetics.c
+++ b/drivers/net/phy/teranetics.c
@@ -34,39 +34,17 @@ MODULE_LICENSE("GPL v2");
 				MDIO_PHYXS_LNSTAT_SYNC3 | \
 				MDIO_PHYXS_LNSTAT_ALIGN)
 
-static int teranetics_config_init(struct phy_device *phydev)
-{
-	phydev->supported = SUPPORTED_10000baseT_Full;
-	phydev->advertising = SUPPORTED_10000baseT_Full;
-
-	return 0;
-}
-
-static int teranetics_soft_reset(struct phy_device *phydev)
-{
-	return 0;
-}
-
 static int teranetics_aneg_done(struct phy_device *phydev)
 {
-	int reg;
-
 	/* auto negotiation state can only be checked when using copper
 	 * port, if using fiber port, just lie it's done.
 	 */
-	if (!phy_read_mmd(phydev, MDIO_MMD_VEND1, 93)) {
-		reg = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_STAT1);
-		return (reg < 0) ? reg : (reg & BMSR_ANEGCOMPLETE);
-	}
+	if (!phy_read_mmd(phydev, MDIO_MMD_VEND1, 93))
+		return genphy_c45_aneg_done(phydev);
 
 	return 1;
 }
 
-static int teranetics_config_aneg(struct phy_device *phydev)
-{
-	return 0;
-}
-
 static int teranetics_read_status(struct phy_device *phydev)
 {
 	int reg;
@@ -102,10 +80,10 @@ static struct phy_driver teranetics_driver[] = {
 	.phy_id		= PHY_ID_TN2020,
 	.phy_id_mask	= 0xffffffff,
 	.name		= "Teranetics TN2020",
-	.soft_reset	= teranetics_soft_reset,
+	.soft_reset	= gen10g_no_soft_reset,
 	.aneg_done	= teranetics_aneg_done,
-	.config_init    = teranetics_config_init,
-	.config_aneg    = teranetics_config_aneg,
+	.config_init    = gen10g_config_init,
+	.config_aneg    = gen10g_config_aneg,
 	.read_status	= teranetics_read_status,
 	.match_phy_device = teranetics_match_phy_device,
 },
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index da1937832c99..926c2c322d43 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -970,6 +970,7 @@ static struct pernet_operations ppp_net_ops = {
 	.exit = ppp_exit_net,
 	.id   = &ppp_net_id,
 	.size = sizeof(struct ppp_net),
+	.async = true,
 };
 
 static int ppp_unit_register(struct ppp *ppp, int unit, bool ifname_is_set)
diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index 5aa59f41bf8c..c10e6181a2f0 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -714,7 +714,7 @@ err_put:
 }
 
 static int pppoe_getname(struct socket *sock, struct sockaddr *uaddr,
-		  int *usockaddr_len, int peer)
+		  int peer)
 {
 	int len = sizeof(struct sockaddr_pppox);
 	struct sockaddr_pppox sp;
@@ -726,9 +726,7 @@ static int pppoe_getname(struct socket *sock, struct sockaddr *uaddr,
 
 	memcpy(uaddr, &sp, len);
 
-	*usockaddr_len = len;
-
-	return 0;
+	return len;
 }
 
 static int pppoe_ioctl(struct socket *sock, unsigned int cmd,
@@ -1163,6 +1161,7 @@ static struct pernet_operations pppoe_net_ops = {
 	.exit = pppoe_exit_net,
 	.id   = &pppoe_net_id,
 	.size = sizeof(struct pppoe_net),
+	.async = true,
 };
 
 static int __init pppoe_init(void)
diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c
index 6dde9a0cfe76..8249d46a7844 100644
--- a/drivers/net/ppp/pptp.c
+++ b/drivers/net/ppp/pptp.c
@@ -483,7 +483,7 @@ static int pptp_connect(struct socket *sock, struct sockaddr *uservaddr,
 }
 
 static int pptp_getname(struct socket *sock, struct sockaddr *uaddr,
-	int *usockaddr_len, int peer)
+	int peer)
 {
 	int len = sizeof(struct sockaddr_pppox);
 	struct sockaddr_pppox sp;
@@ -496,9 +496,7 @@ static int pptp_getname(struct socket *sock, struct sockaddr *uaddr,
 
 	memcpy(uaddr, &sp, len);
 
-	*usockaddr_len = len;
-
-	return 0;
+	return len;
 }
 
 static int pptp_release(struct socket *sock)
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 56c701b73c12..222093e878a8 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1105,14 +1105,15 @@ static void team_port_disable_netpoll(struct team_port *port)
 }
 #endif
 
-static int team_upper_dev_link(struct team *team, struct team_port *port)
+static int team_upper_dev_link(struct team *team, struct team_port *port,
+			       struct netlink_ext_ack *extack)
 {
 	struct netdev_lag_upper_info lag_upper_info;
 	int err;
 
 	lag_upper_info.tx_type = team->mode->lag_tx_type;
 	err = netdev_master_upper_dev_link(port->dev, team->dev, NULL,
-					   &lag_upper_info, NULL);
+					   &lag_upper_info, extack);
 	if (err)
 		return err;
 	port->dev->priv_flags |= IFF_TEAM_PORT;
@@ -1129,7 +1130,8 @@ static void __team_port_change_port_added(struct team_port *port, bool linkup);
 static int team_dev_type_check_change(struct net_device *dev,
 				      struct net_device *port_dev);
 
-static int team_port_add(struct team *team, struct net_device *port_dev)
+static int team_port_add(struct team *team, struct net_device *port_dev,
+			 struct netlink_ext_ack *extack)
 {
 	struct net_device *dev = team->dev;
 	struct team_port *port;
@@ -1137,12 +1139,14 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
 	int err;
 
 	if (port_dev->flags & IFF_LOOPBACK) {
+		NL_SET_ERR_MSG(extack, "Loopback device can't be added as a team port");
 		netdev_err(dev, "Device %s is loopback device. Loopback devices can't be added as a team port\n",
 			   portname);
 		return -EINVAL;
 	}
 
 	if (team_port_exists(port_dev)) {
+		NL_SET_ERR_MSG(extack, "Device is already a port of a team device");
 		netdev_err(dev, "Device %s is already a port "
 				"of a team device\n", portname);
 		return -EBUSY;
@@ -1150,6 +1154,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
 
 	if (port_dev->features & NETIF_F_VLAN_CHALLENGED &&
 	    vlan_uses_dev(dev)) {
+		NL_SET_ERR_MSG(extack, "Device is VLAN challenged and team device has VLAN set up");
 		netdev_err(dev, "Device %s is VLAN challenged and team device has VLAN set up\n",
 			   portname);
 		return -EPERM;
@@ -1160,6 +1165,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
 		return err;
 
 	if (port_dev->flags & IFF_UP) {
+		NL_SET_ERR_MSG(extack, "Device is up. Set it down before adding it as a team port");
 		netdev_err(dev, "Device %s is up. Set it down before adding it as a team port\n",
 			   portname);
 		return -EBUSY;
@@ -1227,7 +1233,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
 		goto err_handler_register;
 	}
 
-	err = team_upper_dev_link(team, port);
+	err = team_upper_dev_link(team, port, extack);
 	if (err) {
 		netdev_err(dev, "Device %s failed to set upper link\n",
 			   portname);
@@ -1921,7 +1927,7 @@ static int team_add_slave(struct net_device *dev, struct net_device *port_dev,
 	int err;
 
 	mutex_lock(&team->lock);
-	err = team_port_add(team, port_dev);
+	err = team_port_add(team, port_dev, extack);
 	mutex_unlock(&team->lock);
 
 	if (!err)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 28cfa642e39a..a1ba262f40ad 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -78,6 +78,7 @@
 #include <linux/mutex.h>
 
 #include <linux/uaccess.h>
+#include <linux/proc_fs.h>
 
 /* Uncomment to enable debugging */
 /* #define TUN_DEBUG 1 */
@@ -1613,7 +1614,6 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
 	unsigned int delta = 0;
 	char *buf;
 	size_t copied;
-	bool xdp_xmit = false;
 	int err, pad = TUN_RX_PAD;
 
 	rcu_read_lock();
@@ -1671,8 +1671,14 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
 			preempt_enable();
 			return NULL;
 		case XDP_TX:
-			xdp_xmit = true;
-			/* fall through */
+			get_page(alloc_frag->page);
+			alloc_frag->offset += buflen;
+			if (tun_xdp_xmit(tun->dev, &xdp))
+				goto err_redirect;
+			tun_xdp_flush(tun->dev);
+			rcu_read_unlock();
+			preempt_enable();
+			return NULL;
 		case XDP_PASS:
 			delta = orig_data - xdp.data;
 			break;
@@ -1699,14 +1705,6 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
 	get_page(alloc_frag->page);
 	alloc_frag->offset += buflen;
 
-	if (xdp_xmit) {
-		skb->dev = tun->dev;
-		generic_xdp_tx(skb, xdp_prog);
-		rcu_read_unlock();
-		preempt_enable();
-		return NULL;
-	}
-
 	rcu_read_unlock();
 	preempt_enable();
 
@@ -2287,11 +2285,67 @@ static int tun_validate(struct nlattr *tb[], struct nlattr *data[],
 	return -EINVAL;
 }
 
+static size_t tun_get_size(const struct net_device *dev)
+{
+	BUILD_BUG_ON(sizeof(u32) != sizeof(uid_t));
+	BUILD_BUG_ON(sizeof(u32) != sizeof(gid_t));
+
+	return nla_total_size(sizeof(uid_t)) + /* OWNER */
+	       nla_total_size(sizeof(gid_t)) + /* GROUP */
+	       nla_total_size(sizeof(u8)) + /* TYPE */
+	       nla_total_size(sizeof(u8)) + /* PI */
+	       nla_total_size(sizeof(u8)) + /* VNET_HDR */
+	       nla_total_size(sizeof(u8)) + /* PERSIST */
+	       nla_total_size(sizeof(u8)) + /* MULTI_QUEUE */
+	       nla_total_size(sizeof(u32)) + /* NUM_QUEUES */
+	       nla_total_size(sizeof(u32)) + /* NUM_DISABLED_QUEUES */
+	       0;
+}
+
+static int tun_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+	struct tun_struct *tun = netdev_priv(dev);
+
+	if (nla_put_u8(skb, IFLA_TUN_TYPE, tun->flags & TUN_TYPE_MASK))
+		goto nla_put_failure;
+	if (uid_valid(tun->owner) &&
+	    nla_put_u32(skb, IFLA_TUN_OWNER,
+			from_kuid_munged(current_user_ns(), tun->owner)))
+		goto nla_put_failure;
+	if (gid_valid(tun->group) &&
+	    nla_put_u32(skb, IFLA_TUN_GROUP,
+			from_kgid_munged(current_user_ns(), tun->group)))
+		goto nla_put_failure;
+	if (nla_put_u8(skb, IFLA_TUN_PI, !(tun->flags & IFF_NO_PI)))
+		goto nla_put_failure;
+	if (nla_put_u8(skb, IFLA_TUN_VNET_HDR, !!(tun->flags & IFF_VNET_HDR)))
+		goto nla_put_failure;
+	if (nla_put_u8(skb, IFLA_TUN_PERSIST, !!(tun->flags & IFF_PERSIST)))
+		goto nla_put_failure;
+	if (nla_put_u8(skb, IFLA_TUN_MULTI_QUEUE,
+		       !!(tun->flags & IFF_MULTI_QUEUE)))
+		goto nla_put_failure;
+	if (tun->flags & IFF_MULTI_QUEUE) {
+		if (nla_put_u32(skb, IFLA_TUN_NUM_QUEUES, tun->numqueues))
+			goto nla_put_failure;
+		if (nla_put_u32(skb, IFLA_TUN_NUM_DISABLED_QUEUES,
+				tun->numdisabled))
+			goto nla_put_failure;
+	}
+
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
 static struct rtnl_link_ops tun_link_ops __read_mostly = {
 	.kind		= DRV_NAME,
 	.priv_size	= sizeof(struct tun_struct),
 	.setup		= tun_setup,
 	.validate	= tun_validate,
+	.get_size       = tun_get_size,
+	.fill_info      = tun_fill_info,
 };
 
 static void tun_sock_write_space(struct sock *sk)
@@ -2783,6 +2837,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 	struct tun_struct *tun;
 	void __user* argp = (void __user*)arg;
 	struct ifreq ifr;
+	struct net *net;
 	kuid_t owner;
 	kgid_t group;
 	int sndbuf;
@@ -2791,7 +2846,8 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 	int le;
 	int ret;
 
-	if (cmd == TUNSETIFF || cmd == TUNSETQUEUE || _IOC_TYPE(cmd) == SOCK_IOC_TYPE) {
+	if (cmd == TUNSETIFF || cmd == TUNSETQUEUE ||
+	    (_IOC_TYPE(cmd) == SOCK_IOC_TYPE && cmd != SIOCGSKNS)) {
 		if (copy_from_user(&ifr, argp, ifreq_len))
 			return -EFAULT;
 	} else {
@@ -2811,6 +2867,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 	rtnl_lock();
 
 	tun = tun_get(tfile);
+	net = sock_net(&tfile->sk);
 	if (cmd == TUNSETIFF) {
 		ret = -EEXIST;
 		if (tun)
@@ -2818,7 +2875,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 
 		ifr.ifr_name[IFNAMSIZ-1] = '\0';
 
-		ret = tun_set_iff(sock_net(&tfile->sk), file, &ifr);
+		ret = tun_set_iff(net, file, &ifr);
 
 		if (ret)
 			goto unlock;
@@ -2840,6 +2897,14 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 		tfile->ifindex = ifindex;
 		goto unlock;
 	}
+	if (cmd == SIOCGSKNS) {
+		ret = -EPERM;
+		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+			goto unlock;
+
+		ret = open_related_ns(&net->ns, get_net_ns);
+		goto unlock;
+	}
 
 	ret = -EBADFD;
 	if (!tun)
diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c
index f32261ecd215..fb1b78d4b9ef 100644
--- a/drivers/net/usb/ax88179_178a.c
+++ b/drivers/net/usb/ax88179_178a.c
@@ -1223,7 +1223,7 @@ static int ax88179_led_setting(struct usbnet *dev)
 	return 0;
 }
 
-static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf)
+static int ax88179_link_bind_or_reset(struct usbnet *dev, bool do_reset)
 {
 	u8 buf[5];
 	u16 *tmp16;
@@ -1231,12 +1231,11 @@ static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf)
 	struct ax88179_data *ax179_data = (struct ax88179_data *)dev->data;
 	struct ethtool_eee eee_data;
 
-	usbnet_get_endpoints(dev, intf);
-
 	tmp16 = (u16 *)buf;
 	tmp = (u8 *)buf;
 
-	memset(ax179_data, 0, sizeof(*ax179_data));
+	if (!do_reset)
+		memset(ax179_data, 0, sizeof(*ax179_data));
 
 	/* Power up ethernet PHY */
 	*tmp16 = 0;
@@ -1249,9 +1248,13 @@ static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf)
 	ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_CLK_SELECT, 1, 1, tmp);
 	msleep(100);
 
+	if (do_reset)
+		ax88179_auto_detach(dev, 0);
+
 	ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_NODE_ID, ETH_ALEN,
 			 ETH_ALEN, dev->net->dev_addr);
-	memcpy(dev->net->perm_addr, dev->net->dev_addr, ETH_ALEN);
+	if (!do_reset)
+		memcpy(dev->net->perm_addr, dev->net->dev_addr, ETH_ALEN);
 
 	/* RX bulk configuration */
 	memcpy(tmp, &AX88179_BULKIN_SIZE[0], 5);
@@ -1266,19 +1269,21 @@ static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf)
 	ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_PAUSE_WATERLVL_HIGH,
 			  1, 1, tmp);
 
-	dev->net->netdev_ops = &ax88179_netdev_ops;
-	dev->net->ethtool_ops = &ax88179_ethtool_ops;
-	dev->net->needed_headroom = 8;
-	dev->net->max_mtu = 4088;
-
-	/* Initialize MII structure */
-	dev->mii.dev = dev->net;
-	dev->mii.mdio_read = ax88179_mdio_read;
-	dev->mii.mdio_write = ax88179_mdio_write;
-	dev->mii.phy_id_mask = 0xff;
-	dev->mii.reg_num_mask = 0xff;
-	dev->mii.phy_id = 0x03;
-	dev->mii.supports_gmii = 1;
+	if (!do_reset) {
+		dev->net->netdev_ops = &ax88179_netdev_ops;
+		dev->net->ethtool_ops = &ax88179_ethtool_ops;
+		dev->net->needed_headroom = 8;
+		dev->net->max_mtu = 4088;
+
+		/* Initialize MII structure */
+		dev->mii.dev = dev->net;
+		dev->mii.mdio_read = ax88179_mdio_read;
+		dev->mii.mdio_write = ax88179_mdio_write;
+		dev->mii.phy_id_mask = 0xff;
+		dev->mii.reg_num_mask = 0xff;
+		dev->mii.phy_id = 0x03;
+		dev->mii.supports_gmii = 1;
+	}
 
 	dev->net->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
 			      NETIF_F_RXCSUM;
@@ -1330,6 +1335,13 @@ static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf)
 	return 0;
 }
 
+static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf)
+{
+	usbnet_get_endpoints(dev, intf);
+
+	return ax88179_link_bind_or_reset(dev, false);
+}
+
 static void ax88179_unbind(struct usbnet *dev, struct usb_interface *intf)
 {
 	u16 tmp16;
@@ -1458,74 +1470,7 @@ ax88179_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags)
 
 static int ax88179_link_reset(struct usbnet *dev)
 {
-	struct ax88179_data *ax179_data = (struct ax88179_data *)dev->data;
-	u8 tmp[5], link_sts;
-	u16 mode, tmp16, delay = HZ / 10;
-	u32 tmp32 = 0x40000000;
-	unsigned long jtimeout;
-
-	jtimeout = jiffies + delay;
-	while (tmp32 & 0x40000000) {
-		mode = 0;
-		ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_RX_CTL, 2, 2, &mode);
-		ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_RX_CTL, 2, 2,
-				  &ax179_data->rxctl);
-
-		/*link up, check the usb device control TX FIFO full or empty*/
-		ax88179_read_cmd(dev, 0x81, 0x8c, 0, 4, &tmp32);
-
-		if (time_after(jiffies, jtimeout))
-			return 0;
-	}
-
-	mode = AX_MEDIUM_RECEIVE_EN | AX_MEDIUM_TXFLOW_CTRLEN |
-	       AX_MEDIUM_RXFLOW_CTRLEN;
-
-	ax88179_read_cmd(dev, AX_ACCESS_MAC, PHYSICAL_LINK_STATUS,
-			 1, 1, &link_sts);
-
-	ax88179_read_cmd(dev, AX_ACCESS_PHY, AX88179_PHY_ID,
-			 GMII_PHY_PHYSR, 2, &tmp16);
-
-	if (!(tmp16 & GMII_PHY_PHYSR_LINK)) {
-		return 0;
-	} else if (GMII_PHY_PHYSR_GIGA == (tmp16 & GMII_PHY_PHYSR_SMASK)) {
-		mode |= AX_MEDIUM_GIGAMODE | AX_MEDIUM_EN_125MHZ;
-		if (dev->net->mtu > 1500)
-			mode |= AX_MEDIUM_JUMBO_EN;
-
-		if (link_sts & AX_USB_SS)
-			memcpy(tmp, &AX88179_BULKIN_SIZE[0], 5);
-		else if (link_sts & AX_USB_HS)
-			memcpy(tmp, &AX88179_BULKIN_SIZE[1], 5);
-		else
-			memcpy(tmp, &AX88179_BULKIN_SIZE[3], 5);
-	} else if (GMII_PHY_PHYSR_100 == (tmp16 & GMII_PHY_PHYSR_SMASK)) {
-		mode |= AX_MEDIUM_PS;
-
-		if (link_sts & (AX_USB_SS | AX_USB_HS))
-			memcpy(tmp, &AX88179_BULKIN_SIZE[2], 5);
-		else
-			memcpy(tmp, &AX88179_BULKIN_SIZE[3], 5);
-	} else {
-		memcpy(tmp, &AX88179_BULKIN_SIZE[3], 5);
-	}
-
-	/* RX bulk configuration */
-	ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_RX_BULKIN_QCTRL, 5, 5, tmp);
-
-	dev->rx_urb_size = (1024 * (tmp[3] + 2));
-
-	if (tmp16 & GMII_PHY_PHYSR_FULL)
-		mode |= AX_MEDIUM_FULL_DUPLEX;
-	ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_MEDIUM_STATUS_MODE,
-			  2, 2, &mode);
-
-	ax179_data->eee_enabled = ax88179_chk_eee(dev);
-
-	netif_carrier_on(dev->net);
-
-	return 0;
+	return ax88179_link_bind_or_reset(dev, true);
 }
 
 static int ax88179_reset(struct usbnet *dev)
@@ -1556,7 +1501,6 @@ static int ax88179_reset(struct usbnet *dev)
 
 	ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_NODE_ID, ETH_ALEN, ETH_ALEN,
 			 dev->net->dev_addr);
-	memcpy(dev->net->perm_addr, dev->net->dev_addr, ETH_ALEN);
 
 	/* RX bulk configuration */
 	memcpy(tmp, &AX88179_BULKIN_SIZE[0], 5);
diff --git a/drivers/net/usb/cdc_eem.c b/drivers/net/usb/cdc_eem.c
index f7180f8db39e..61ea4eaace5d 100644
--- a/drivers/net/usb/cdc_eem.c
+++ b/drivers/net/usb/cdc_eem.c
@@ -83,11 +83,8 @@ static int eem_bind(struct usbnet *dev, struct usb_interface *intf)
 	int status = 0;
 
 	status = usbnet_get_endpoints(dev, intf);
-	if (status < 0) {
-		usb_set_intfdata(intf, NULL);
-		usb_driver_release_interface(driver_of(intf), intf);
+	if (status < 0)
 		return status;
-	}
 
 	/* no jumbogram (16K) support for now */
 
diff --git a/drivers/net/usb/kalmia.c b/drivers/net/usb/kalmia.c
index ce0b0b4e3a57..bd2ba3659028 100644
--- a/drivers/net/usb/kalmia.c
+++ b/drivers/net/usb/kalmia.c
@@ -114,14 +114,14 @@ kalmia_init_and_get_ethernet_addr(struct usbnet *dev, u8 *ethernet_addr)
 		return -ENOMEM;
 
 	memcpy(usb_buf, init_msg_1, 12);
-	status = kalmia_send_init_packet(dev, usb_buf, sizeof(init_msg_1)
-		/ sizeof(init_msg_1[0]), usb_buf, 24);
+	status = kalmia_send_init_packet(dev, usb_buf, ARRAY_SIZE(init_msg_1),
+					 usb_buf, 24);
 	if (status != 0)
 		return status;
 
 	memcpy(usb_buf, init_msg_2, 12);
-	status = kalmia_send_init_packet(dev, usb_buf, sizeof(init_msg_2)
-		/ sizeof(init_msg_2[0]), usb_buf, 28);
+	status = kalmia_send_init_packet(dev, usb_buf, ARRAY_SIZE(init_msg_2),
+					 usb_buf, 28);
 	if (status != 0)
 		return status;
 
@@ -150,12 +150,8 @@ kalmia_bind(struct usbnet *dev, struct usb_interface *intf)
 	dev->rx_urb_size = dev->hard_mtu * 10; // Found as optimal after testing
 
 	status = kalmia_init_and_get_ethernet_addr(dev, ethernet_addr);
-
-	if (status) {
-		usb_set_intfdata(intf, NULL);
-		usb_driver_release_interface(driver_of(intf), intf);
+	if (status)
 		return status;
-	}
 
 	memcpy(dev->net->dev_addr, ethernet_addr, ETH_ALEN);
 
diff --git a/drivers/net/usb/lg-vl600.c b/drivers/net/usb/lg-vl600.c
index dbabd7ca5268..257916f172cd 100644
--- a/drivers/net/usb/lg-vl600.c
+++ b/drivers/net/usb/lg-vl600.c
@@ -157,12 +157,8 @@ static int vl600_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
 
 		s->current_rx_buf = skb_copy_expand(skb, 0,
 				le32_to_cpup(&frame->len), GFP_ATOMIC);
-		if (!s->current_rx_buf) {
-			netif_err(dev, ifup, dev->net, "Reserving %i bytes "
-					"for packet assembly failed.\n",
-					le32_to_cpup(&frame->len));
+		if (!s->current_rx_buf)
 			dev->net->stats.rx_errors++;
-		}
 
 		return 0;
 	}
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 139c61c8244a..c6be49d3a9eb 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -736,7 +736,6 @@ static int vrf_rtable_create(struct net_device *dev)
 		return -ENOMEM;
 
 	rth->dst.output	= vrf_output;
-	rth->rt_table_id = vrf->tb_id;
 
 	rcu_assign_pointer(vrf->rth, rth);
 
@@ -942,6 +941,7 @@ static struct rt6_info *vrf_ip6_route_lookup(struct net *net,
 					     const struct net_device *dev,
 					     struct flowi6 *fl6,
 					     int ifindex,
+					     const struct sk_buff *skb,
 					     int flags)
 {
 	struct net_vrf *vrf = netdev_priv(dev);
@@ -960,7 +960,7 @@ static struct rt6_info *vrf_ip6_route_lookup(struct net *net,
 	if (!table)
 		return NULL;
 
-	return ip6_pol_route(net, table, ifindex, fl6, flags);
+	return ip6_pol_route(net, table, ifindex, fl6, skb, flags);
 }
 
 static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev,
@@ -978,7 +978,7 @@ static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev,
 	struct net *net = dev_net(vrf_dev);
 	struct rt6_info *rt6;
 
-	rt6 = vrf_ip6_route_lookup(net, vrf_dev, &fl6, ifindex,
+	rt6 = vrf_ip6_route_lookup(net, vrf_dev, &fl6, ifindex, skb,
 				   RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_IFACE);
 	if (unlikely(!rt6))
 		return;
@@ -1111,7 +1111,7 @@ static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev,
 	if (!ipv6_addr_any(&fl6->saddr))
 		flags |= RT6_LOOKUP_F_HAS_SADDR;
 
-	rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, flags);
+	rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, NULL, flags);
 	if (rt)
 		dst = &rt->dst;
 
@@ -1146,6 +1146,7 @@ static inline size_t vrf_fib_rule_nl_size(void)
 	sz  = NLMSG_ALIGN(sizeof(struct fib_rule_hdr));
 	sz += nla_total_size(sizeof(u8));	/* FRA_L3MDEV */
 	sz += nla_total_size(sizeof(u32));	/* FRA_PRIORITY */
+	sz += nla_total_size(sizeof(u8));       /* FRA_PROTOCOL */
 
 	return sz;
 }
@@ -1176,6 +1177,9 @@ static int vrf_fib_rule(const struct net_device *dev, __u8 family, bool add_it)
 	frh->family = family;
 	frh->action = FR_ACT_TO_TBL;
 
+	if (nla_put_u8(skb, FRA_PROTOCOL, RTPROT_KERNEL))
+		goto nla_put_failure;
+
 	if (nla_put_u8(skb, FRA_L3MDEV, 1))
 		goto nla_put_failure;
 
@@ -1431,6 +1435,7 @@ static struct pernet_operations vrf_net_ops __net_initdata = {
 	.init = vrf_netns_init,
 	.id   = &vrf_net_id,
 	.size = sizeof(bool),
+	.async = true,
 };
 
 static int __init vrf_init_module(void)
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index fab7a4db249e..aa5f034d6ad1 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -3752,6 +3752,7 @@ static struct pernet_operations vxlan_net_ops = {
 	.exit_batch = vxlan_exit_batch_net,
 	.id   = &vxlan_net_id,
 	.size = sizeof(struct vxlan_net),
+	.async = true,
 };
 
 static int __init vxlan_init_module(void)
diff --git a/drivers/net/wimax/i2400m/usb-rx.c b/drivers/net/wimax/i2400m/usb-rx.c
index b78ee676e102..5b64bda7d9e7 100644
--- a/drivers/net/wimax/i2400m/usb-rx.c
+++ b/drivers/net/wimax/i2400m/usb-rx.c
@@ -263,9 +263,6 @@ retry:
 		new_skb = skb_copy_expand(rx_skb, 0, rx_size - rx_skb->len,
 					  GFP_KERNEL);
 		if (new_skb == NULL) {
-			if (printk_ratelimit())
-				dev_err(dev, "RX: Can't reallocate skb to %d; "
-					"RX dropped\n", rx_size);
 			kfree_skb(rx_skb);
 			rx_skb = NULL;
 			goto out;	/* drop it...*/
diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c
index 768f63f38341..b799a5384abb 100644
--- a/drivers/net/wireless/ath/wil6210/cfg80211.c
+++ b/drivers/net/wireless/ath/wil6210/cfg80211.c
@@ -1599,7 +1599,8 @@ static void wil_probe_client_handle(struct wil6210_priv *wil,
 	 */
 	bool alive = (sta->status == wil_sta_connected);
 
-	cfg80211_probe_status(ndev, sta->addr, req->cookie, alive, GFP_KERNEL);
+	cfg80211_probe_status(ndev, sta->addr, req->cookie, alive,
+			      0, false, GFP_KERNEL);
 }
 
 static struct list_head *next_probe_client(struct wil6210_priv *wil)
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 35b21f8152bb..100cf42db65d 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -253,7 +253,7 @@ static inline void hwsim_clear_chanctx_magic(struct ieee80211_chanctx_conf *c)
 
 static unsigned int hwsim_net_id;
 
-static int hwsim_netgroup;
+static struct ida hwsim_netgroup_ida = IDA_INIT;
 
 struct hwsim_net {
 	int netgroup;
@@ -267,11 +267,13 @@ static inline int hwsim_net_get_netgroup(struct net *net)
 	return hwsim_net->netgroup;
 }
 
-static inline void hwsim_net_set_netgroup(struct net *net)
+static inline int hwsim_net_set_netgroup(struct net *net)
 {
 	struct hwsim_net *hwsim_net = net_generic(net, hwsim_net_id);
 
-	hwsim_net->netgroup = hwsim_netgroup++;
+	hwsim_net->netgroup = ida_simple_get(&hwsim_netgroup_ida,
+					     0, 0, GFP_KERNEL);
+	return hwsim_net->netgroup >= 0 ? 0 : -ENOMEM;
 }
 
 static inline u32 hwsim_net_get_wmediumd(struct net *net)
@@ -493,6 +495,7 @@ static LIST_HEAD(hwsim_radios);
 static struct workqueue_struct *hwsim_wq;
 static struct rhashtable hwsim_radios_rht;
 static int hwsim_radio_idx;
+static int hwsim_radios_generation = 1;
 
 static struct platform_driver mac80211_hwsim_driver = {
 	.driver = {
@@ -637,6 +640,7 @@ static const struct nla_policy hwsim_genl_policy[HWSIM_ATTR_MAX + 1] = {
 	[HWSIM_ATTR_RADIO_NAME] = { .type = NLA_STRING },
 	[HWSIM_ATTR_NO_VIF] = { .type = NLA_FLAG },
 	[HWSIM_ATTR_FREQ] = { .type = NLA_U32 },
+	[HWSIM_ATTR_PERM_ADDR] = { .type = NLA_UNSPEC, .len = ETH_ALEN },
 };
 
 static void mac80211_hwsim_tx_frame(struct ieee80211_hw *hw,
@@ -2408,6 +2412,7 @@ struct hwsim_new_radio_params {
 	bool destroy_on_close;
 	const char *hwname;
 	bool no_vif;
+	const u8 *perm_addr;
 };
 
 static void hwsim_mcast_config_msg(struct sk_buff *mcast_skb,
@@ -2572,15 +2577,25 @@ static int mac80211_hwsim_new_radio(struct genl_info *info,
 	skb_queue_head_init(&data->pending);
 
 	SET_IEEE80211_DEV(hw, data->dev);
-	eth_zero_addr(addr);
-	addr[0] = 0x02;
-	addr[3] = idx >> 8;
-	addr[4] = idx;
-	memcpy(data->addresses[0].addr, addr, ETH_ALEN);
-	memcpy(data->addresses[1].addr, addr, ETH_ALEN);
-	data->addresses[1].addr[0] |= 0x40;
-	hw->wiphy->n_addresses = 2;
-	hw->wiphy->addresses = data->addresses;
+	if (!param->perm_addr) {
+		eth_zero_addr(addr);
+		addr[0] = 0x02;
+		addr[3] = idx >> 8;
+		addr[4] = idx;
+		memcpy(data->addresses[0].addr, addr, ETH_ALEN);
+		/* Why need here second address ? */
+		data->addresses[1].addr[0] |= 0x40;
+		memcpy(data->addresses[1].addr, addr, ETH_ALEN);
+		hw->wiphy->n_addresses = 2;
+		hw->wiphy->addresses = data->addresses;
+		/* possible address clash is checked at hash table insertion */
+	} else {
+		memcpy(data->addresses[0].addr, param->perm_addr, ETH_ALEN);
+		/* compatibility with automatically generated mac addr */
+		memcpy(data->addresses[1].addr, param->perm_addr, ETH_ALEN);
+		hw->wiphy->n_addresses = 2;
+		hw->wiphy->addresses = data->addresses;
+	}
 
 	data->channels = param->channels;
 	data->use_chanctx = param->use_chanctx;
@@ -2786,13 +2801,17 @@ static int mac80211_hwsim_new_radio(struct genl_info *info,
 	err = rhashtable_insert_fast(&hwsim_radios_rht, &data->rht,
 				     hwsim_rht_params);
 	if (err < 0) {
-		pr_debug("mac80211_hwsim: radio index %d already present\n",
-			 idx);
+		if (info) {
+			GENL_SET_ERR_MSG(info, "perm addr already present");
+			NL_SET_BAD_ATTR(info->extack,
+					info->attrs[HWSIM_ATTR_PERM_ADDR]);
+		}
 		spin_unlock_bh(&hwsim_radio_lock);
 		goto failed_final_insert;
 	}
 
 	list_add_tail(&data->list, &hwsim_radios);
+	hwsim_radios_generation++;
 	spin_unlock_bh(&hwsim_radio_lock);
 
 	if (idx > 0)
@@ -3211,6 +3230,19 @@ static int hwsim_new_radio_nl(struct sk_buff *msg, struct genl_info *info)
 		param.regd = hwsim_world_regdom_custom[idx];
 	}
 
+	if (info->attrs[HWSIM_ATTR_PERM_ADDR]) {
+		if (!is_valid_ether_addr(
+				nla_data(info->attrs[HWSIM_ATTR_PERM_ADDR]))) {
+			GENL_SET_ERR_MSG(info,"MAC is no valid source addr");
+			NL_SET_BAD_ATTR(info->extack,
+					info->attrs[HWSIM_ATTR_PERM_ADDR]);
+			return -EINVAL;
+		}
+
+
+		param.perm_addr = nla_data(info->attrs[HWSIM_ATTR_PERM_ADDR]);
+	}
+
 	ret = mac80211_hwsim_new_radio(info, &param);
 	kfree(hwname);
 	return ret;
@@ -3250,6 +3282,7 @@ static int hwsim_del_radio_nl(struct sk_buff *msg, struct genl_info *info)
 		list_del(&data->list);
 		rhashtable_remove_fast(&hwsim_radios_rht, &data->rht,
 				       hwsim_rht_params);
+		hwsim_radios_generation++;
 		spin_unlock_bh(&hwsim_radio_lock);
 		mac80211_hwsim_del_radio(data, wiphy_name(data->hw->wiphy),
 					 info);
@@ -3306,17 +3339,19 @@ out_err:
 static int hwsim_dump_radio_nl(struct sk_buff *skb,
 			       struct netlink_callback *cb)
 {
-	int idx = cb->args[0];
+	int last_idx = cb->args[0];
 	struct mac80211_hwsim_data *data = NULL;
-	int res;
+	int res = 0;
+	void *hdr;
 
 	spin_lock_bh(&hwsim_radio_lock);
+	cb->seq = hwsim_radios_generation;
 
-	if (idx == hwsim_radio_idx)
+	if (last_idx >= hwsim_radio_idx-1)
 		goto done;
 
 	list_for_each_entry(data, &hwsim_radios, list) {
-		if (data->idx < idx)
+		if (data->idx <= last_idx)
 			continue;
 
 		if (!net_eq(wiphy_net(data->hw->wiphy), sock_net(skb->sk)))
@@ -3329,14 +3364,25 @@ static int hwsim_dump_radio_nl(struct sk_buff *skb,
 		if (res < 0)
 			break;
 
-		idx = data->idx + 1;
+		last_idx = data->idx;
 	}
 
-	cb->args[0] = idx;
+	cb->args[0] = last_idx;
+
+	/* list changed, but no new element sent, set interrupted flag */
+	if (skb->len == 0 && cb->prev_seq && cb->seq != cb->prev_seq) {
+		hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
+				  cb->nlh->nlmsg_seq, &hwsim_genl_family,
+				  NLM_F_MULTI, HWSIM_CMD_GET_RADIO);
+		if (!hdr)
+			res = -EMSGSIZE;
+		genl_dump_check_consistent(cb, hdr);
+		genlmsg_end(skb, hdr);
+	}
 
 done:
 	spin_unlock_bh(&hwsim_radio_lock);
-	return skb->len;
+	return res ?: skb->len;
 }
 
 /* Generic Netlink operations array */
@@ -3394,6 +3440,7 @@ static void destroy_radio(struct work_struct *work)
 	struct mac80211_hwsim_data *data =
 		container_of(work, struct mac80211_hwsim_data, destroy_work);
 
+	hwsim_radios_generation++;
 	mac80211_hwsim_del_radio(data, wiphy_name(data->hw->wiphy), NULL);
 }
 
@@ -3463,9 +3510,7 @@ failure:
 
 static __net_init int hwsim_init_net(struct net *net)
 {
-	hwsim_net_set_netgroup(net);
-
-	return 0;
+	return hwsim_net_set_netgroup(net);
 }
 
 static void __net_exit hwsim_exit_net(struct net *net)
@@ -3488,6 +3533,8 @@ static void __net_exit hwsim_exit_net(struct net *net)
 		queue_work(hwsim_wq, &data->destroy_work);
 	}
 	spin_unlock_bh(&hwsim_radio_lock);
+
+	ida_simple_remove(&hwsim_netgroup_ida, hwsim_net_get_netgroup(net));
 }
 
 static struct pernet_operations hwsim_net_ops = {
@@ -3495,6 +3542,7 @@ static struct pernet_operations hwsim_net_ops = {
 	.exit = hwsim_exit_net,
 	.id   = &hwsim_net_id,
 	.size = sizeof(struct hwsim_net),
+	.async = true,
 };
 
 static void hwsim_exit_netlink(void)
diff --git a/drivers/net/wireless/mac80211_hwsim.h b/drivers/net/wireless/mac80211_hwsim.h
index a96a79c1eff5..0fe3199f8c72 100644
--- a/drivers/net/wireless/mac80211_hwsim.h
+++ b/drivers/net/wireless/mac80211_hwsim.h
@@ -68,7 +68,12 @@ enum hwsim_tx_control_flags {
  *	%HWSIM_ATTR_SIGNAL, %HWSIM_ATTR_COOKIE
  * @HWSIM_CMD_NEW_RADIO: create a new radio with the given parameters,
  *	returns the radio ID (>= 0) or negative on errors, if successful
- *	then multicast the result
+ *	then multicast the result, uses optional parameter:
+ *	%HWSIM_ATTR_REG_STRICT_REG, %HWSIM_ATTR_SUPPORT_P2P_DEVICE,
+ *	%HWSIM_ATTR_DESTROY_RADIO_ON_CLOSE, %HWSIM_ATTR_CHANNELS,
+ *	%HWSIM_ATTR_NO_VIF, %HWSIM_ATTR_RADIO_NAME, %HWSIM_ATTR_USE_CHANCTX,
+ *	%HWSIM_ATTR_REG_HINT_ALPHA2, %HWSIM_ATTR_REG_CUSTOM_REG,
+ *	%HWSIM_ATTR_PERM_ADDR
  * @HWSIM_CMD_DEL_RADIO: destroy a radio, reply is multicasted
  * @HWSIM_CMD_GET_RADIO: fetch information about existing radios, uses:
  *	%HWSIM_ATTR_RADIO_ID
@@ -126,6 +131,7 @@ enum {
  * @HWSIM_ATTR_FREQ: Frequency at which packet is transmitted or received.
  * @HWSIM_ATTR_TX_INFO_FLAGS: additional flags for corresponding
  *	rates of %HWSIM_ATTR_TX_INFO
+ * @HWSIM_ATTR_PERM_ADDR: permanent mac address of new radio
  * @__HWSIM_ATTR_MAX: enum limit
  */
 
@@ -153,6 +159,7 @@ enum {
 	HWSIM_ATTR_FREQ,
 	HWSIM_ATTR_PAD,
 	HWSIM_ATTR_TX_INFO_FLAGS,
+	HWSIM_ATTR_PERM_ADDR,
 	__HWSIM_ATTR_MAX,
 };
 #define HWSIM_ATTR_MAX (__HWSIM_ATTR_MAX - 1)
diff --git a/drivers/net/wireless/ti/wl1251/tx.c b/drivers/net/wireless/ti/wl1251/tx.c
index de2fa6705574..12ed14ebc307 100644
--- a/drivers/net/wireless/ti/wl1251/tx.c
+++ b/drivers/net/wireless/ti/wl1251/tx.c
@@ -221,10 +221,8 @@ static int wl1251_tx_send_packet(struct wl1251 *wl, struct sk_buff *skb,
 			struct sk_buff *newskb = skb_copy_expand(skb, 0, 3,
 								 GFP_KERNEL);
 
-			if (unlikely(newskb == NULL)) {
-				wl1251_error("Can't allocate skb!");
+			if (unlikely(newskb == NULL))
 				return -EINVAL;
-			}
 
 			tx_hdr = (struct tx_double_buffer_desc *) newskb->data;
 
diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c
index b1cf7c6f407a..ef5887037b22 100644
--- a/drivers/net/xen-netback/rx.c
+++ b/drivers/net/xen-netback/rx.c
@@ -419,7 +419,7 @@ static void xenvif_rx_extra_slot(struct xenvif_queue *queue,
 	BUG();
 }
 
-void xenvif_rx_skb(struct xenvif_queue *queue)
+static void xenvif_rx_skb(struct xenvif_queue *queue)
 {
 	struct xenvif_pkt_state pkt;
 
diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 959c65cf75d9..4326715dc13e 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -233,8 +233,6 @@ static inline int qeth_is_ipa_enabled(struct qeth_ipa_info *ipa,
 #define QETH_IDX_FUNC_LEVEL_OSD		 0x0101
 #define QETH_IDX_FUNC_LEVEL_IQD		 0x4108
 
-#define QETH_REAL_CARD		1
-#define QETH_VLAN_CARD		2
 #define QETH_BUFSIZE		4096
 
 /**
@@ -556,12 +554,6 @@ enum qeth_prot_versions {
 	QETH_PROT_IPV6 = 0x0006,
 };
 
-enum qeth_ip_types {
-	QETH_IP_TYPE_NORMAL,
-	QETH_IP_TYPE_VIPA,
-	QETH_IP_TYPE_RXIP,
-};
-
 enum qeth_cmd_buffer_state {
 	BUF_STATE_FREE,
 	BUF_STATE_LOCKED,
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 3653bea38470..19203340f879 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -718,11 +718,8 @@ static int qeth_check_idx_response(struct qeth_card *card,
 
 	QETH_DBF_HEX(CTRL, 2, buffer, QETH_DBF_CTRL_LEN);
 	if ((buffer[2] & 0xc0) == 0xc0) {
-		QETH_DBF_MESSAGE(2, "received an IDX TERMINATE "
-			   "with cause code 0x%02x%s\n",
-			   buffer[4],
-			   ((buffer[4] == 0x22) ?
-			    " -- try another portname" : ""));
+		QETH_DBF_MESSAGE(2, "received an IDX TERMINATE with cause code %#02x\n",
+				 buffer[4]);
 		QETH_CARD_TEXT(card, 2, "ckidxres");
 		QETH_CARD_TEXT(card, 2, " idxterm");
 		QETH_CARD_TEXT_(card, 2, "  rc%d", -EIO);
@@ -2849,7 +2846,8 @@ static int qeth_init_input_buffer(struct qeth_card *card,
 	int i;
 
 	if ((card->options.cq == QETH_CQ_ENABLED) && (!buf->rx_skb)) {
-		buf->rx_skb = dev_alloc_skb(QETH_RX_PULL_LEN + ETH_HLEN);
+		buf->rx_skb = netdev_alloc_skb(card->dev,
+					       QETH_RX_PULL_LEN + ETH_HLEN);
 		if (!buf->rx_skb)
 			return 1;
 	}
@@ -2886,8 +2884,8 @@ int qeth_init_qdio_queues(struct qeth_card *card)
 	QETH_DBF_TEXT(SETUP, 2, "initqdqs");
 
 	/* inbound queue */
-	qdio_reset_buffers(card->qdio.in_q->qdio_bufs,
-			   QDIO_MAX_BUFFERS_PER_Q);
+	qdio_reset_buffers(card->qdio.in_q->qdio_bufs, QDIO_MAX_BUFFERS_PER_Q);
+	memset(&card->rx, 0, sizeof(struct qeth_rx));
 	qeth_initialize_working_pool_list(card);
 	/*give only as many buffers to hardware as we have buffer pool entries*/
 	for (i = 0; i < card->qdio.in_buf_pool.buf_count - 1; ++i)
@@ -2962,12 +2960,10 @@ struct qeth_cmd_buffer *qeth_get_ipacmd_buffer(struct qeth_card *card,
 		enum qeth_ipa_cmds ipacmd, enum qeth_prot_versions prot)
 {
 	struct qeth_cmd_buffer *iob;
-	struct qeth_ipa_cmd *cmd;
 
 	iob = qeth_get_buffer(&card->write);
 	if (iob) {
-		cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
-		qeth_fill_ipacmd_header(card, cmd, ipacmd, prot);
+		qeth_fill_ipacmd_header(card, __ipa_cmd(iob), ipacmd, prot);
 	} else {
 		dev_warn(&card->gdev->dev,
 			 "The qeth driver ran out of channel command buffers\n");
@@ -3078,7 +3074,7 @@ static struct qeth_cmd_buffer *qeth_get_adapter_cmd(struct qeth_card *card,
 	iob = qeth_get_ipacmd_buffer(card, IPA_CMD_SETADAPTERPARMS,
 				     QETH_PROT_IPV4);
 	if (iob) {
-		cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+		cmd = __ipa_cmd(iob);
 		cmd->data.setadapterparms.hdr.cmdlength = cmdlen;
 		cmd->data.setadapterparms.hdr.command_code = command;
 		cmd->data.setadapterparms.hdr.used_total = 1;
@@ -3220,7 +3216,7 @@ static int qeth_query_setdiagass(struct qeth_card *card)
 	iob = qeth_get_ipacmd_buffer(card, IPA_CMD_SET_DIAG_ASS, 0);
 	if (!iob)
 		return -ENOMEM;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	cmd->data.diagass.subcmd_len = 16;
 	cmd->data.diagass.subcmd = QETH_DIAGS_CMD_QUERY;
 	return qeth_send_ipa_cmd(card, iob, qeth_query_setdiagass_cb, NULL);
@@ -3273,7 +3269,7 @@ int qeth_hw_trap(struct qeth_card *card, enum qeth_diags_trap_action action)
 	iob = qeth_get_ipacmd_buffer(card, IPA_CMD_SET_DIAG_ASS, 0);
 	if (!iob)
 		return -ENOMEM;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	cmd->data.diagass.subcmd_len = 80;
 	cmd->data.diagass.subcmd = QETH_DIAGS_CMD_TRAP;
 	cmd->data.diagass.type = 1;
@@ -4251,7 +4247,7 @@ void qeth_setadp_promisc_mode(struct qeth_card *card)
 			sizeof(struct qeth_ipacmd_setadpparms_hdr) + 8);
 	if (!iob)
 		return;
-	cmd = (struct qeth_ipa_cmd *)(iob->data + IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	cmd->data.setadapterparms.data.mode = mode;
 	qeth_send_ipa_cmd(card, iob, qeth_setadp_promisc_mode_cb, NULL);
 }
@@ -4318,7 +4314,7 @@ int qeth_setadpparms_change_macaddr(struct qeth_card *card)
 				   sizeof(struct qeth_change_addr));
 	if (!iob)
 		return -ENOMEM;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	cmd->data.setadapterparms.data.change_addr.cmd = CHANGE_ADDR_READ_MAC;
 	cmd->data.setadapterparms.data.change_addr.addr_size = ETH_ALEN;
 	ether_addr_copy(cmd->data.setadapterparms.data.change_addr.addr,
@@ -4433,7 +4429,7 @@ static int qeth_setadpparms_set_access_ctrl(struct qeth_card *card,
 				   sizeof(struct qeth_set_access_ctrl));
 	if (!iob)
 		return -ENOMEM;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	access_ctrl_req = &cmd->data.setadapterparms.data.set_access_ctrl;
 	access_ctrl_req->subcmd_code = isolation;
 
@@ -4679,7 +4675,7 @@ static int qeth_snmp_command(struct qeth_card *card, char __user *udata)
 		rc = -ENOMEM;
 		goto out;
 	}
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	memcpy(&cmd->data.setadapterparms.data.snmp, &ureq->cmd, req_len);
 	rc = qeth_send_ipa_snmp_cmd(card, iob, QETH_SETADP_BASE_LEN + req_len,
 				    qeth_snmp_command_cb, (void *)&qinfo);
@@ -4764,7 +4760,7 @@ static int qeth_query_oat_command(struct qeth_card *card, char __user *udata)
 		rc = -ENOMEM;
 		goto out_free;
 	}
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	oat_req = &cmd->data.setadapterparms.data.query_oat;
 	oat_req->subcmd_code = oat_data.command;
 
@@ -5339,7 +5335,7 @@ struct sk_buff *qeth_core_get_next_skb(struct qeth_card *card,
 	} else {
 		unsigned int linear = (use_rx_sg) ? QETH_RX_PULL_LEN : skb_len;
 
-		skb = dev_alloc_skb(linear + headroom);
+		skb = napi_alloc_skb(&card->napi, linear + headroom);
 	}
 	if (!skb)
 		goto no_mem;
@@ -5503,7 +5499,7 @@ struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct qeth_card *card,
 	iob = qeth_get_ipacmd_buffer(card, IPA_CMD_SETASSPARMS, prot);
 
 	if (iob) {
-		cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+		cmd = __ipa_cmd(iob);
 		cmd->data.setassparms.hdr.assist_no = ipa_func;
 		cmd->data.setassparms.hdr.length = 8 + len;
 		cmd->data.setassparms.hdr.command_code = cmd_code;
@@ -5526,7 +5522,7 @@ int qeth_send_setassparms(struct qeth_card *card,
 
 	QETH_CARD_TEXT(card, 4, "sendassp");
 
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	if (len <= sizeof(__u32))
 		cmd->data.setassparms.data.flags_32bit = (__u32) data;
 	else   /* (len > sizeof(__u32)) */
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 5ef4c978ad19..50a313806dde 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -108,7 +108,7 @@ static int qeth_l2_send_setdelmac(struct qeth_card *card, __u8 *mac,
 	iob = qeth_get_ipacmd_buffer(card, ipacmd, QETH_PROT_IPV4);
 	if (!iob)
 		return -ENOMEM;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	cmd->data.setdelmac.mac_length = ETH_ALEN;
 	ether_addr_copy(cmd->data.setdelmac.mac, mac);
 	return qeth_setdelmac_makerc(card, qeth_send_ipa_cmd(card, iob,
@@ -305,7 +305,7 @@ static int qeth_l2_send_setdelvlan(struct qeth_card *card, __u16 i,
 	iob = qeth_get_ipacmd_buffer(card, ipacmd, QETH_PROT_IPV4);
 	if (!iob)
 		return -ENOMEM;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	cmd->data.setdelvlan.vlan_id = i;
 	return qeth_setdelvlan_makerc(card, qeth_send_ipa_cmd(card, iob,
 					    qeth_l2_send_setdelvlan_cb, NULL));
@@ -437,10 +437,8 @@ static int qeth_l2_process_inbound_buffer(struct qeth_card *card,
 			*done = 1;
 			break;
 		}
-		skb->dev = card->dev;
 		switch (hdr->hdr.l2.id) {
 		case QETH_HEADER_TYPE_LAYER2:
-			skb->pkt_type = PACKET_HOST;
 			skb->protocol = eth_type_trans(skb, skb->dev);
 			if ((card->dev->features & NETIF_F_RXCSUM)
 			   && ((hdr->hdr.l2.flags[1] &
@@ -975,6 +973,7 @@ static int qeth_l2_setup_netdev(struct qeth_card *card)
 		return -ENODEV;
 
 	card->dev->ml_priv = card;
+	card->dev->priv_flags |= IFF_UNICAST_FLT;
 	card->dev->watchdog_timeo = QETH_TX_TIMEOUT;
 	card->dev->mtu = card->info.initial_mtu;
 	card->dev->min_mtu = 64;
@@ -991,9 +990,16 @@ static int qeth_l2_setup_netdev(struct qeth_card *card)
 		card->dev->features |= NETIF_F_VLAN_CHALLENGED;
 	else
 		card->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+
+	if (card->info.type != QETH_CARD_TYPE_OSN &&
+	    card->info.type != QETH_CARD_TYPE_IQD) {
+		card->dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+		card->dev->needed_headroom = sizeof(struct qeth_hdr);
+		card->dev->hw_features |= NETIF_F_SG;
+		card->dev->vlan_features |= NETIF_F_SG;
+	}
+
 	if (card->info.type == QETH_CARD_TYPE_OSD && !card->info.guestlan) {
-		card->dev->hw_features = NETIF_F_SG;
-		card->dev->vlan_features = NETIF_F_SG;
 		card->dev->features |= NETIF_F_SG;
 		/* OSA 3S and earlier has no RX/TX support */
 		if (qeth_is_supported(card, IPA_OUTBOUND_CHECKSUM)) {
@@ -1005,11 +1011,6 @@ static int qeth_l2_setup_netdev(struct qeth_card *card)
 			card->dev->vlan_features |= NETIF_F_RXCSUM;
 		}
 	}
-	if (card->info.type != QETH_CARD_TYPE_OSN &&
-	    card->info.type != QETH_CARD_TYPE_IQD) {
-		card->dev->priv_flags &= ~IFF_TX_SKB_SHARING;
-		card->dev->needed_headroom = sizeof(struct qeth_hdr);
-	}
 
 	card->info.broadcast_capable = 1;
 	qeth_l2_request_initial_mac(card);
@@ -1086,7 +1087,6 @@ static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode)
 	qeth_l2_setup_bridgeport_attrs(card);
 
 	card->state = CARD_STATE_HARDSETUP;
-	memset(&card->rx, 0, sizeof(struct qeth_rx));
 	qeth_print_status_message(card);
 
 	/* softsetup */
@@ -1374,7 +1374,6 @@ int qeth_osn_assist(struct net_device *dev, void *data, int data_len)
 {
 	struct qeth_cmd_buffer *iob;
 	struct qeth_card *card;
-	int rc;
 
 	if (!dev)
 		return -ENODEV;
@@ -1385,9 +1384,8 @@ int qeth_osn_assist(struct net_device *dev, void *data, int data_len)
 	if (!qeth_card_hw_is_reachable(card))
 		return -ENODEV;
 	iob = qeth_wait_for_buffer(&card->write);
-	memcpy(iob->data+IPA_PDU_HEADER_SIZE, data, data_len);
-	rc = qeth_osn_send_ipa_cmd(card, iob, data_len);
-	return rc;
+	memcpy(__ipa_cmd(iob), data, data_len);
+	return qeth_osn_send_ipa_cmd(card, iob, data_len);
 }
 EXPORT_SYMBOL(qeth_osn_assist);
 
@@ -1764,7 +1762,7 @@ static struct qeth_cmd_buffer *qeth_sbp_build_cmd(struct qeth_card *card,
 	iob = qeth_get_ipacmd_buffer(card, ipa_cmd, 0);
 	if (!iob)
 		return iob;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	cmd->data.sbp.hdr.cmdlength = sizeof(struct qeth_ipacmd_sbp_hdr) +
 				      cmd_length;
 	cmd->data.sbp.hdr.command_code = sbp_cmd;
@@ -2129,7 +2127,7 @@ static int qeth_l2_vnicc_request(struct qeth_card *card,
 		return -ENOMEM;
 
 	/* create header for request */
-	cmd = (struct qeth_ipa_cmd *)(iob->data + IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	req = &cmd->data.vnicc;
 
 	/* create sub command header for request */
diff --git a/drivers/s390/net/qeth_l3.h b/drivers/s390/net/qeth_l3.h
index 498fe9af2cdb..87659cfc9066 100644
--- a/drivers/s390/net/qeth_l3.h
+++ b/drivers/s390/net/qeth_l3.h
@@ -15,21 +15,26 @@
 
 #define QETH_SNIFF_AVAIL	0x0008
 
+enum qeth_ip_types {
+	QETH_IP_TYPE_NORMAL,
+	QETH_IP_TYPE_VIPA,
+	QETH_IP_TYPE_RXIP,
+};
+
 struct qeth_ipaddr {
 	struct hlist_node hnode;
 	enum qeth_ip_types type;
-	enum qeth_ipa_setdelip_flags set_flags;
-	enum qeth_ipa_setdelip_flags del_flags;
+	unsigned char mac[ETH_ALEN];
 	u8 is_multicast:1;
 	u8 in_progress:1;
 	u8 disp_flag:2;
+	u8 ipato:1;			/* ucast only */
 
 	/* is changed only for normal ip addresses
 	 * for non-normal addresses it always is  1
 	 */
 	int  ref_counter;
 	enum qeth_prot_versions proto;
-	unsigned char mac[ETH_ALEN];
 	union {
 		struct {
 			unsigned int addr;
@@ -42,6 +47,16 @@ struct qeth_ipaddr {
 	} u;
 };
 
+static inline void qeth_l3_init_ipaddr(struct qeth_ipaddr *addr,
+				       enum qeth_ip_types type,
+				       enum qeth_prot_versions proto)
+{
+	memset(addr, 0, sizeof(*addr));
+	addr->type = type;
+	addr->proto = proto;
+	addr->disp_flag = QETH_DISP_ADDR_DO_NOTHING;
+}
+
 static inline bool qeth_l3_addr_match_ip(struct qeth_ipaddr *a1,
 					 struct qeth_ipaddr *a2)
 {
@@ -109,15 +124,10 @@ int qeth_l3_add_ipato_entry(struct qeth_card *, struct qeth_ipato_entry *);
 int qeth_l3_del_ipato_entry(struct qeth_card *card,
 			    enum qeth_prot_versions proto, u8 *addr,
 			    int mask_bits);
-int qeth_l3_add_vipa(struct qeth_card *, enum qeth_prot_versions, const u8 *);
-int qeth_l3_del_vipa(struct qeth_card *card, enum qeth_prot_versions proto,
-		     const u8 *addr);
-int qeth_l3_add_rxip(struct qeth_card *, enum qeth_prot_versions, const u8 *);
-int qeth_l3_del_rxip(struct qeth_card *card, enum qeth_prot_versions proto,
-		     const u8 *addr);
 void qeth_l3_update_ipato(struct qeth_card *card);
-struct qeth_ipaddr *qeth_l3_get_addr_buffer(enum qeth_prot_versions);
-int qeth_l3_add_ip(struct qeth_card *, struct qeth_ipaddr *);
-int qeth_l3_delete_ip(struct qeth_card *, struct qeth_ipaddr *);
+int qeth_l3_modify_hsuid(struct qeth_card *card, bool add);
+int qeth_l3_modify_rxip_vipa(struct qeth_card *card, bool add, const u8 *ip,
+			     enum qeth_ip_types type,
+			     enum qeth_prot_versions proto);
 
 #endif /* __QETH_L3_H__ */
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index b6b12220da71..c1a16a74aa83 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -67,6 +67,15 @@ void qeth_l3_ipaddr_to_string(enum qeth_prot_versions proto, const __u8 *addr,
 		qeth_l3_ipaddr6_to_string(addr, buf);
 }
 
+static struct qeth_ipaddr *qeth_l3_get_addr_buffer(enum qeth_prot_versions prot)
+{
+	struct qeth_ipaddr *addr = kmalloc(sizeof(*addr), GFP_ATOMIC);
+
+	if (addr)
+		qeth_l3_init_ipaddr(addr, QETH_IP_TYPE_NORMAL, prot);
+	return addr;
+}
+
 static struct qeth_ipaddr *qeth_l3_find_addr_by_ip(struct qeth_card *card,
 						   struct qeth_ipaddr *query)
 {
@@ -138,12 +147,18 @@ static bool qeth_l3_is_addr_covered_by_ipato(struct qeth_card *card,
 	return rc;
 }
 
-int qeth_l3_delete_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
+static int qeth_l3_delete_ip(struct qeth_card *card,
+			     struct qeth_ipaddr *tmp_addr)
 {
 	int rc = 0;
 	struct qeth_ipaddr *addr;
 
-	QETH_CARD_TEXT(card, 4, "delip");
+	if (tmp_addr->type == QETH_IP_TYPE_RXIP)
+		QETH_CARD_TEXT(card, 2, "delrxip");
+	else if (tmp_addr->type == QETH_IP_TYPE_VIPA)
+		QETH_CARD_TEXT(card, 2, "delvipa");
+	else
+		QETH_CARD_TEXT(card, 2, "delip");
 
 	if (tmp_addr->proto == QETH_PROT_IPV4)
 		QETH_CARD_HEX(card, 4, &tmp_addr->u.a4.addr, 4);
@@ -171,13 +186,18 @@ int qeth_l3_delete_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
 	return rc;
 }
 
-int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
+static int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
 {
 	int rc = 0;
 	struct qeth_ipaddr *addr;
 	char buf[40];
 
-	QETH_CARD_TEXT(card, 4, "addip");
+	if (tmp_addr->type == QETH_IP_TYPE_RXIP)
+		QETH_CARD_TEXT(card, 2, "addrxip");
+	else if (tmp_addr->type == QETH_IP_TYPE_VIPA)
+		QETH_CARD_TEXT(card, 2, "addvipa");
+	else
+		QETH_CARD_TEXT(card, 2, "addip");
 
 	if (tmp_addr->proto == QETH_PROT_IPV4)
 		QETH_CARD_HEX(card, 4, &tmp_addr->u.a4.addr, 4);
@@ -209,7 +229,7 @@ int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
 
 		if (qeth_l3_is_addr_covered_by_ipato(card, addr)) {
 			QETH_CARD_TEXT(card, 2, "tkovaddr");
-			addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG;
+			addr->ipato = 1;
 		}
 		hash_add(card->ip_htable, &addr->hnode,
 				qeth_l3_ipaddr_hash(addr));
@@ -251,23 +271,6 @@ int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
 	return rc;
 }
 
-
-struct qeth_ipaddr *qeth_l3_get_addr_buffer(
-				enum qeth_prot_versions prot)
-{
-	struct qeth_ipaddr *addr;
-
-	addr = kzalloc(sizeof(struct qeth_ipaddr), GFP_ATOMIC);
-	if (!addr)
-		return NULL;
-
-	addr->type = QETH_IP_TYPE_NORMAL;
-	addr->disp_flag = QETH_DISP_ADDR_DO_NOTHING;
-	addr->proto = prot;
-
-	return addr;
-}
-
 static void qeth_l3_clear_ip_htable(struct qeth_card *card, int recover)
 {
 	struct qeth_ipaddr *addr;
@@ -352,7 +355,7 @@ static int qeth_l3_send_setdelmc(struct qeth_card *card,
 	iob = qeth_get_ipacmd_buffer(card, ipacmd, addr->proto);
 	if (!iob)
 		return -ENOMEM;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	ether_addr_copy(cmd->data.setdelipm.mac, addr->mac);
 	if (addr->proto == QETH_PROT_IPV6)
 		memcpy(cmd->data.setdelipm.ip6, &addr->u.a6.addr,
@@ -379,21 +382,38 @@ static void qeth_l3_fill_netmask(u8 *netmask, unsigned int len)
 	}
 }
 
+static u32 qeth_l3_get_setdelip_flags(struct qeth_ipaddr *addr, bool set)
+{
+	switch (addr->type) {
+	case QETH_IP_TYPE_RXIP:
+		return (set) ? QETH_IPA_SETIP_TAKEOVER_FLAG : 0;
+	case QETH_IP_TYPE_VIPA:
+		return (set) ? QETH_IPA_SETIP_VIPA_FLAG :
+			       QETH_IPA_DELIP_VIPA_FLAG;
+	default:
+		return (set && addr->ipato) ? QETH_IPA_SETIP_TAKEOVER_FLAG : 0;
+	}
+}
+
 static int qeth_l3_send_setdelip(struct qeth_card *card,
-		struct qeth_ipaddr *addr, int ipacmd, unsigned int flags)
+				 struct qeth_ipaddr *addr,
+				 enum qeth_ipa_cmds ipacmd)
 {
-	int rc;
 	struct qeth_cmd_buffer *iob;
 	struct qeth_ipa_cmd *cmd;
 	__u8 netmask[16];
+	u32 flags;
 
 	QETH_CARD_TEXT(card, 4, "setdelip");
-	QETH_CARD_TEXT_(card, 4, "flags%02X", flags);
 
 	iob = qeth_get_ipacmd_buffer(card, ipacmd, addr->proto);
 	if (!iob)
 		return -ENOMEM;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
+
+	flags = qeth_l3_get_setdelip_flags(addr, ipacmd == IPA_CMD_SETIP);
+	QETH_CARD_TEXT_(card, 4, "flags%02X", flags);
+
 	if (addr->proto == QETH_PROT_IPV6) {
 		memcpy(cmd->data.setdelip6.ip_addr, &addr->u.a6.addr,
 		       sizeof(struct in6_addr));
@@ -407,9 +427,7 @@ static int qeth_l3_send_setdelip(struct qeth_card *card,
 		cmd->data.setdelip4.flags = flags;
 	}
 
-	rc = qeth_send_ipa_cmd(card, iob, NULL, NULL);
-
-	return rc;
+	return qeth_send_ipa_cmd(card, iob, NULL, NULL);
 }
 
 static int qeth_l3_send_setrouting(struct qeth_card *card,
@@ -423,7 +441,7 @@ static int qeth_l3_send_setrouting(struct qeth_card *card,
 	iob = qeth_get_ipacmd_buffer(card, IPA_CMD_SETRTG, prot);
 	if (!iob)
 		return -ENOMEM;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	cmd->data.setrtg.type = (type);
 	rc = qeth_send_ipa_cmd(card, iob, NULL, NULL);
 
@@ -525,10 +543,7 @@ void qeth_l3_update_ipato(struct qeth_card *card)
 	hash_for_each(card->ip_htable, i, addr, hnode) {
 		if (addr->type != QETH_IP_TYPE_NORMAL)
 			continue;
-		if (qeth_l3_is_addr_covered_by_ipato(card, addr))
-			addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG;
-		else
-			addr->set_flags &= ~QETH_IPA_SETIP_TAKEOVER_FLAG;
+		addr->ipato = qeth_l3_is_addr_covered_by_ipato(card, addr);
 	}
 }
 
@@ -606,132 +621,39 @@ int qeth_l3_del_ipato_entry(struct qeth_card *card,
 	return rc;
 }
 
-/*
- * VIPA related functions
- */
-int qeth_l3_add_vipa(struct qeth_card *card, enum qeth_prot_versions proto,
-	      const u8 *addr)
-{
-	struct qeth_ipaddr *ipaddr;
-	int rc;
-
-	ipaddr = qeth_l3_get_addr_buffer(proto);
-	if (ipaddr) {
-		if (proto == QETH_PROT_IPV4) {
-			QETH_CARD_TEXT(card, 2, "addvipa4");
-			memcpy(&ipaddr->u.a4.addr, addr, 4);
-			ipaddr->u.a4.mask = 0;
-		} else if (proto == QETH_PROT_IPV6) {
-			QETH_CARD_TEXT(card, 2, "addvipa6");
-			memcpy(&ipaddr->u.a6.addr, addr, 16);
-			ipaddr->u.a6.pfxlen = 0;
-		}
-		ipaddr->type = QETH_IP_TYPE_VIPA;
-		ipaddr->set_flags = QETH_IPA_SETIP_VIPA_FLAG;
-		ipaddr->del_flags = QETH_IPA_DELIP_VIPA_FLAG;
-	} else
-		return -ENOMEM;
-
-	spin_lock_bh(&card->ip_lock);
-	rc = qeth_l3_add_ip(card, ipaddr);
-	spin_unlock_bh(&card->ip_lock);
-
-	kfree(ipaddr);
-
-	return rc;
-}
-
-int qeth_l3_del_vipa(struct qeth_card *card, enum qeth_prot_versions proto,
-		     const u8 *addr)
-{
-	struct qeth_ipaddr *ipaddr;
-	int rc;
-
-	ipaddr = qeth_l3_get_addr_buffer(proto);
-	if (ipaddr) {
-		if (proto == QETH_PROT_IPV4) {
-			QETH_CARD_TEXT(card, 2, "delvipa4");
-			memcpy(&ipaddr->u.a4.addr, addr, 4);
-			ipaddr->u.a4.mask = 0;
-		} else if (proto == QETH_PROT_IPV6) {
-			QETH_CARD_TEXT(card, 2, "delvipa6");
-			memcpy(&ipaddr->u.a6.addr, addr, 16);
-			ipaddr->u.a6.pfxlen = 0;
-		}
-		ipaddr->type = QETH_IP_TYPE_VIPA;
-	} else
-		return -ENOMEM;
-
-	spin_lock_bh(&card->ip_lock);
-	rc = qeth_l3_delete_ip(card, ipaddr);
-	spin_unlock_bh(&card->ip_lock);
-
-	kfree(ipaddr);
-	return rc;
-}
-
-/*
- * proxy ARP related functions
- */
-int qeth_l3_add_rxip(struct qeth_card *card, enum qeth_prot_versions proto,
-	      const u8 *addr)
+int qeth_l3_modify_rxip_vipa(struct qeth_card *card, bool add, const u8 *ip,
+			     enum qeth_ip_types type,
+			     enum qeth_prot_versions proto)
 {
-	struct qeth_ipaddr *ipaddr;
+	struct qeth_ipaddr addr;
 	int rc;
 
-	ipaddr = qeth_l3_get_addr_buffer(proto);
-	if (ipaddr) {
-		if (proto == QETH_PROT_IPV4) {
-			QETH_CARD_TEXT(card, 2, "addrxip4");
-			memcpy(&ipaddr->u.a4.addr, addr, 4);
-			ipaddr->u.a4.mask = 0;
-		} else if (proto == QETH_PROT_IPV6) {
-			QETH_CARD_TEXT(card, 2, "addrxip6");
-			memcpy(&ipaddr->u.a6.addr, addr, 16);
-			ipaddr->u.a6.pfxlen = 0;
-		}
-
-		ipaddr->type = QETH_IP_TYPE_RXIP;
-		ipaddr->set_flags = QETH_IPA_SETIP_TAKEOVER_FLAG;
-		ipaddr->del_flags = 0;
-	} else
-		return -ENOMEM;
+	qeth_l3_init_ipaddr(&addr, type, proto);
+	if (proto == QETH_PROT_IPV4)
+		memcpy(&addr.u.a4.addr, ip, 4);
+	else
+		memcpy(&addr.u.a6.addr, ip, 16);
 
 	spin_lock_bh(&card->ip_lock);
-	rc = qeth_l3_add_ip(card, ipaddr);
+	rc = add ? qeth_l3_add_ip(card, &addr) : qeth_l3_delete_ip(card, &addr);
 	spin_unlock_bh(&card->ip_lock);
-
-	kfree(ipaddr);
-
 	return rc;
 }
 
-int qeth_l3_del_rxip(struct qeth_card *card, enum qeth_prot_versions proto,
-		     const u8 *addr)
+int qeth_l3_modify_hsuid(struct qeth_card *card, bool add)
 {
-	struct qeth_ipaddr *ipaddr;
-	int rc;
+	struct qeth_ipaddr addr;
+	int rc, i;
 
-	ipaddr = qeth_l3_get_addr_buffer(proto);
-	if (ipaddr) {
-		if (proto == QETH_PROT_IPV4) {
-			QETH_CARD_TEXT(card, 2, "delrxip4");
-			memcpy(&ipaddr->u.a4.addr, addr, 4);
-			ipaddr->u.a4.mask = 0;
-		} else if (proto == QETH_PROT_IPV6) {
-			QETH_CARD_TEXT(card, 2, "delrxip6");
-			memcpy(&ipaddr->u.a6.addr, addr, 16);
-			ipaddr->u.a6.pfxlen = 0;
-		}
-		ipaddr->type = QETH_IP_TYPE_RXIP;
-	} else
-		return -ENOMEM;
+	qeth_l3_init_ipaddr(&addr, QETH_IP_TYPE_NORMAL, QETH_PROT_IPV6);
+	addr.u.a6.addr.s6_addr[0] = 0xfe;
+	addr.u.a6.addr.s6_addr[1] = 0x80;
+	for (i = 0; i < 8; i++)
+		addr.u.a6.addr.s6_addr[8+i] = card->options.hsuid[i];
 
 	spin_lock_bh(&card->ip_lock);
-	rc = qeth_l3_delete_ip(card, ipaddr);
+	rc = add ? qeth_l3_add_ip(card, &addr) : qeth_l3_delete_ip(card, &addr);
 	spin_unlock_bh(&card->ip_lock);
-
-	kfree(ipaddr);
 	return rc;
 }
 
@@ -758,8 +680,7 @@ static int qeth_l3_register_addr_entry(struct qeth_card *card,
 		if (addr->is_multicast)
 			rc =  qeth_l3_send_setdelmc(card, addr, IPA_CMD_SETIPM);
 		else
-			rc = qeth_l3_send_setdelip(card, addr, IPA_CMD_SETIP,
-					addr->set_flags);
+			rc = qeth_l3_send_setdelip(card, addr, IPA_CMD_SETIP);
 		if (rc)
 			QETH_CARD_TEXT(card, 2, "failed");
 	} while ((--cnt > 0) && rc);
@@ -791,8 +712,7 @@ static int qeth_l3_deregister_addr_entry(struct qeth_card *card,
 	if (addr->is_multicast)
 		rc = qeth_l3_send_setdelmc(card, addr, IPA_CMD_DELIPM);
 	else
-		rc = qeth_l3_send_setdelip(card, addr, IPA_CMD_DELIP,
-					addr->del_flags);
+		rc = qeth_l3_send_setdelip(card, addr, IPA_CMD_DELIP);
 	if (rc)
 		QETH_CARD_TEXT(card, 2, "failed");
 
@@ -1072,7 +992,7 @@ static int qeth_l3_iqd_read_initial_mac(struct qeth_card *card)
 				     QETH_PROT_IPV6);
 	if (!iob)
 		return -ENOMEM;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	*((__u16 *) &cmd->data.create_destroy_addr.unique_id[6]) =
 			card->info.unique_id;
 
@@ -1117,7 +1037,7 @@ static int qeth_l3_get_unique_id(struct qeth_card *card)
 				     QETH_PROT_IPV6);
 	if (!iob)
 		return -ENOMEM;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	*((__u16 *) &cmd->data.create_destroy_addr.unique_id[6]) =
 			card->info.unique_id;
 
@@ -1193,7 +1113,7 @@ qeth_diags_trace(struct qeth_card *card, enum qeth_diags_trace_cmds diags_cmd)
 	iob = qeth_get_ipacmd_buffer(card, IPA_CMD_SET_DIAG_ASS, 0);
 	if (!iob)
 		return -ENOMEM;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	cmd->data.diagass.subcmd_len = 16;
 	cmd->data.diagass.subcmd = QETH_DIAGS_CMD_TRACE;
 	cmd->data.diagass.type = QETH_DIAGS_TYPE_HIPERSOCKET;
@@ -1502,30 +1422,24 @@ static void qeth_l3_rebuild_skb(struct qeth_card *card, struct sk_buff *skb,
 				ipv6_eth_mc_map(&ipv6_hdr(skb)->daddr, tg_addr);
 
 			card->stats.multicast++;
-			skb->pkt_type = PACKET_MULTICAST;
 			break;
 		case QETH_CAST_BROADCAST:
 			ether_addr_copy(tg_addr, card->dev->broadcast);
 			card->stats.multicast++;
-			skb->pkt_type = PACKET_BROADCAST;
 			break;
-		case QETH_CAST_UNICAST:
-		case QETH_CAST_ANYCAST:
-		case QETH_CAST_NOCAST:
 		default:
 			if (card->options.sniffer)
 				skb->pkt_type = PACKET_OTHERHOST;
-			else
-				skb->pkt_type = PACKET_HOST;
 			ether_addr_copy(tg_addr, card->dev->dev_addr);
 		}
+
 		if (hdr->hdr.l3.ext_flags & QETH_HDR_EXT_SRC_MAC_ADDR)
 			card->dev->header_ops->create(skb, card->dev, prot,
 				tg_addr, &hdr->hdr.l3.next_hop.rx.src_mac,
-				card->dev->addr_len);
+				skb->len);
 		else
 			card->dev->header_ops->create(skb, card->dev, prot,
-				tg_addr, "FAKELL", card->dev->addr_len);
+				tg_addr, "FAKELL", skb->len);
 	}
 
 	skb->protocol = eth_type_trans(skb, card->dev);
@@ -1572,20 +1486,16 @@ static int qeth_l3_process_inbound_buffer(struct qeth_card *card,
 			*done = 1;
 			break;
 		}
-		skb->dev = card->dev;
 		switch (hdr->hdr.l3.id) {
 		case QETH_HEADER_TYPE_LAYER3:
 			magic = *(__u16 *)skb->data;
 			if ((card->info.type == QETH_CARD_TYPE_IQD) &&
 			    (magic == ETH_P_AF_IUCV)) {
 				skb->protocol = cpu_to_be16(ETH_P_AF_IUCV);
-				skb->pkt_type = PACKET_HOST;
-				skb->mac_header = NET_SKB_PAD;
-				skb->dev = card->dev;
 				len = skb->len;
 				card->dev->header_ops->create(skb, card->dev, 0,
-					card->dev->dev_addr, "FAKELL",
-					card->dev->addr_len);
+					card->dev->dev_addr, "FAKELL", len);
+				skb_reset_mac_header(skb);
 				netif_receive_skb(skb);
 			} else {
 				qeth_l3_rebuild_skb(card, skb, hdr);
@@ -1594,7 +1504,6 @@ static int qeth_l3_process_inbound_buffer(struct qeth_card *card,
 			}
 			break;
 		case QETH_HEADER_TYPE_LAYER2: /* for HiperSockets sniffer */
-			skb->pkt_type = PACKET_HOST;
 			skb->protocol = eth_type_trans(skb, skb->dev);
 			len = skb->len;
 			netif_receive_skb(skb);
@@ -1613,69 +1522,6 @@ static int qeth_l3_process_inbound_buffer(struct qeth_card *card,
 	return work_done;
 }
 
-static int qeth_l3_verify_vlan_dev(struct net_device *dev,
-			struct qeth_card *card)
-{
-	int rc = 0;
-	u16 vid;
-
-	for_each_set_bit(vid, card->active_vlans, VLAN_N_VID) {
-		struct net_device *netdev;
-
-		rcu_read_lock();
-		netdev = __vlan_find_dev_deep_rcu(card->dev, htons(ETH_P_8021Q),
-					      vid);
-		rcu_read_unlock();
-		if (netdev == dev) {
-			rc = QETH_VLAN_CARD;
-			break;
-		}
-	}
-
-	if (rc && !(vlan_dev_real_dev(dev)->ml_priv == (void *)card))
-		return 0;
-
-	return rc;
-}
-
-static int qeth_l3_verify_dev(struct net_device *dev)
-{
-	struct qeth_card *card;
-	int rc = 0;
-	unsigned long flags;
-
-	read_lock_irqsave(&qeth_core_card_list.rwlock, flags);
-	list_for_each_entry(card, &qeth_core_card_list.list, list) {
-		if (card->dev == dev) {
-			rc = QETH_REAL_CARD;
-			break;
-		}
-		rc = qeth_l3_verify_vlan_dev(dev, card);
-		if (rc)
-			break;
-	}
-	read_unlock_irqrestore(&qeth_core_card_list.rwlock, flags);
-
-	return rc;
-}
-
-static struct qeth_card *qeth_l3_get_card_from_dev(struct net_device *dev)
-{
-	struct qeth_card *card = NULL;
-	int rc;
-
-	rc = qeth_l3_verify_dev(dev);
-	if (rc == QETH_REAL_CARD)
-		card = dev->ml_priv;
-	else if (rc == QETH_VLAN_CARD)
-		card = vlan_dev_real_dev(dev)->ml_priv;
-	if (card && card->options.layer2)
-		card = NULL;
-	if (card)
-		QETH_CARD_TEXT_(card, 4, "%d", rc);
-	return card ;
-}
-
 static void qeth_l3_stop_card(struct qeth_card *card, int recovery_mode)
 {
 	QETH_DBF_TEXT(SETUP, 2, "stopcard");
@@ -2004,7 +1850,7 @@ static int qeth_l3_query_arp_cache_info(struct qeth_card *card,
 				       prot);
 	if (!iob)
 		return -ENOMEM;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	cmd->data.setassparms.data.query_arp.request_bits = 0x000F;
 	cmd->data.setassparms.data.query_arp.reply_bits = 0;
 	cmd->data.setassparms.data.query_arp.no_entries = 0;
@@ -2785,14 +2631,16 @@ static int qeth_l3_setup_netdev(struct qeth_card *card)
 			if (!(card->info.unique_id & UNIQUE_ID_NOT_BY_CARD))
 				card->dev->dev_id = card->info.unique_id &
 							 0xffff;
+
+			card->dev->hw_features |= NETIF_F_SG;
+			card->dev->vlan_features |= NETIF_F_SG;
+
 			if (!card->info.guestlan) {
-				card->dev->hw_features = NETIF_F_SG |
-					NETIF_F_RXCSUM | NETIF_F_IP_CSUM |
-					NETIF_F_TSO;
-				card->dev->vlan_features = NETIF_F_SG |
-					NETIF_F_RXCSUM | NETIF_F_IP_CSUM |
-					NETIF_F_TSO;
 				card->dev->features |= NETIF_F_SG;
+				card->dev->hw_features |= NETIF_F_TSO |
+					NETIF_F_RXCSUM | NETIF_F_IP_CSUM;
+				card->dev->vlan_features |= NETIF_F_TSO |
+					NETIF_F_RXCSUM | NETIF_F_IP_CSUM;
 			}
 		}
 	} else if (card->info.type == QETH_CARD_TYPE_IQD) {
@@ -2907,7 +2755,6 @@ static int __qeth_l3_set_online(struct ccwgroup_device *gdev, int recovery_mode)
 		card->info.hwtrap = 0;
 
 	card->state = CARD_STATE_HARDSETUP;
-	memset(&card->rx, 0, sizeof(struct qeth_rx));
 	qeth_print_status_message(card);
 
 	/* softsetup */
@@ -3130,13 +2977,43 @@ struct qeth_discipline qeth_l3_discipline = {
 };
 EXPORT_SYMBOL_GPL(qeth_l3_discipline);
 
+static int qeth_l3_handle_ip_event(struct qeth_card *card,
+				   struct qeth_ipaddr *addr,
+				   unsigned long event)
+{
+	switch (event) {
+	case NETDEV_UP:
+		spin_lock_bh(&card->ip_lock);
+		qeth_l3_add_ip(card, addr);
+		spin_unlock_bh(&card->ip_lock);
+		return NOTIFY_OK;
+	case NETDEV_DOWN:
+		spin_lock_bh(&card->ip_lock);
+		qeth_l3_delete_ip(card, addr);
+		spin_unlock_bh(&card->ip_lock);
+		return NOTIFY_OK;
+	default:
+		return NOTIFY_DONE;
+	}
+}
+
+static struct qeth_card *qeth_l3_get_card_from_dev(struct net_device *dev)
+{
+	if (is_vlan_dev(dev))
+		dev = vlan_dev_real_dev(dev);
+	if (dev->netdev_ops == &qeth_l3_osa_netdev_ops ||
+	    dev->netdev_ops == &qeth_l3_netdev_ops)
+		return (struct qeth_card *) dev->ml_priv;
+	return NULL;
+}
+
 static int qeth_l3_ip_event(struct notifier_block *this,
 			    unsigned long event, void *ptr)
 {
 
 	struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
-	struct net_device *dev = (struct net_device *)ifa->ifa_dev->dev;
-	struct qeth_ipaddr *addr;
+	struct net_device *dev = ifa->ifa_dev->dev;
+	struct qeth_ipaddr addr;
 	struct qeth_card *card;
 
 	if (dev_net(dev) != &init_net)
@@ -3147,29 +3024,11 @@ static int qeth_l3_ip_event(struct notifier_block *this,
 		return NOTIFY_DONE;
 	QETH_CARD_TEXT(card, 3, "ipevent");
 
-	addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV4);
-	if (addr) {
-		addr->u.a4.addr = be32_to_cpu(ifa->ifa_address);
-		addr->u.a4.mask = be32_to_cpu(ifa->ifa_mask);
-		addr->type = QETH_IP_TYPE_NORMAL;
-	} else
-		return NOTIFY_DONE;
-
-	switch (event) {
-	case NETDEV_UP:
-		spin_lock_bh(&card->ip_lock);
-		qeth_l3_add_ip(card, addr);
-		spin_unlock_bh(&card->ip_lock);
-		break;
-	case NETDEV_DOWN:
-		spin_lock_bh(&card->ip_lock);
-		qeth_l3_delete_ip(card, addr);
-		spin_unlock_bh(&card->ip_lock);
-		break;
-	}
+	qeth_l3_init_ipaddr(&addr, QETH_IP_TYPE_NORMAL, QETH_PROT_IPV4);
+	addr.u.a4.addr = be32_to_cpu(ifa->ifa_address);
+	addr.u.a4.mask = be32_to_cpu(ifa->ifa_mask);
 
-	kfree(addr);
-	return NOTIFY_DONE;
+	return qeth_l3_handle_ip_event(card, &addr, event);
 }
 
 static struct notifier_block qeth_l3_ip_notifier = {
@@ -3181,8 +3040,8 @@ static int qeth_l3_ip6_event(struct notifier_block *this,
 			     unsigned long event, void *ptr)
 {
 	struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr;
-	struct net_device *dev = (struct net_device *)ifa->idev->dev;
-	struct qeth_ipaddr *addr;
+	struct net_device *dev = ifa->idev->dev;
+	struct qeth_ipaddr addr;
 	struct qeth_card *card;
 
 	card = qeth_l3_get_card_from_dev(dev);
@@ -3192,29 +3051,11 @@ static int qeth_l3_ip6_event(struct notifier_block *this,
 	if (!qeth_is_supported(card, IPA_IPV6))
 		return NOTIFY_DONE;
 
-	addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV6);
-	if (addr) {
-		memcpy(&addr->u.a6.addr, &ifa->addr, sizeof(struct in6_addr));
-		addr->u.a6.pfxlen = ifa->prefix_len;
-		addr->type = QETH_IP_TYPE_NORMAL;
-	} else
-		return NOTIFY_DONE;
-
-	switch (event) {
-	case NETDEV_UP:
-		spin_lock_bh(&card->ip_lock);
-		qeth_l3_add_ip(card, addr);
-		spin_unlock_bh(&card->ip_lock);
-		break;
-	case NETDEV_DOWN:
-		spin_lock_bh(&card->ip_lock);
-		qeth_l3_delete_ip(card, addr);
-		spin_unlock_bh(&card->ip_lock);
-		break;
-	}
+	qeth_l3_init_ipaddr(&addr, QETH_IP_TYPE_NORMAL, QETH_PROT_IPV6);
+	addr.u.a6.addr = ifa->addr;
+	addr.u.a6.pfxlen = ifa->prefix_len;
 
-	kfree(addr);
-	return NOTIFY_DONE;
+	return qeth_l3_handle_ip_event(card, &addr, event);
 }
 
 static struct notifier_block qeth_l3_ip6_notifier = {
diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c
index a645cfe66ddf..f61192a048f4 100644
--- a/drivers/s390/net/qeth_l3_sys.c
+++ b/drivers/s390/net/qeth_l3_sys.c
@@ -272,9 +272,8 @@ static ssize_t qeth_l3_dev_hsuid_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t count)
 {
 	struct qeth_card *card = dev_get_drvdata(dev);
-	struct qeth_ipaddr *addr;
 	char *tmp;
-	int rc, i;
+	int rc;
 
 	if (!card)
 		return -EINVAL;
@@ -293,25 +292,9 @@ static ssize_t qeth_l3_dev_hsuid_store(struct device *dev,
 	if (strlen(tmp) > 8)
 		return -EINVAL;
 
-	if (card->options.hsuid[0]) {
+	if (card->options.hsuid[0])
 		/* delete old ip address */
-		addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV6);
-		if (!addr)
-			return -ENOMEM;
-
-		addr->u.a6.addr.s6_addr32[0] = cpu_to_be32(0xfe800000);
-		addr->u.a6.addr.s6_addr32[1] = 0x00000000;
-		for (i = 8; i < 16; i++)
-			addr->u.a6.addr.s6_addr[i] =
-				card->options.hsuid[i - 8];
-		addr->u.a6.pfxlen = 0;
-		addr->type = QETH_IP_TYPE_NORMAL;
-
-		spin_lock_bh(&card->ip_lock);
-		qeth_l3_delete_ip(card, addr);
-		spin_unlock_bh(&card->ip_lock);
-		kfree(addr);
-	}
+		qeth_l3_modify_hsuid(card, false);
 
 	if (strlen(tmp) == 0) {
 		/* delete ip address only */
@@ -331,21 +314,7 @@ static ssize_t qeth_l3_dev_hsuid_store(struct device *dev,
 	if (card->dev)
 		memcpy(card->dev->perm_addr, card->options.hsuid, 9);
 
-	addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV6);
-	if (addr != NULL) {
-		addr->u.a6.addr.s6_addr32[0] = cpu_to_be32(0xfe800000);
-		addr->u.a6.addr.s6_addr32[1] = 0x00000000;
-		for (i = 8; i < 16; i++)
-			addr->u.a6.addr.s6_addr[i] = card->options.hsuid[i - 8];
-		addr->u.a6.pfxlen = 0;
-		addr->type = QETH_IP_TYPE_NORMAL;
-	} else
-		return -ENOMEM;
-
-	spin_lock_bh(&card->ip_lock);
-	rc = qeth_l3_add_ip(card, addr);
-	spin_unlock_bh(&card->ip_lock);
-	kfree(addr);
+	rc = qeth_l3_modify_hsuid(card, true);
 
 	return rc ? rc : count;
 }
@@ -767,7 +736,8 @@ static ssize_t qeth_l3_dev_vipa_add_store(const char *buf, size_t count,
 	mutex_lock(&card->conf_mutex);
 	rc = qeth_l3_parse_vipae(buf, proto, addr);
 	if (!rc)
-		rc = qeth_l3_add_vipa(card, proto, addr);
+		rc = qeth_l3_modify_rxip_vipa(card, true, addr,
+					      QETH_IP_TYPE_VIPA, proto);
 	mutex_unlock(&card->conf_mutex);
 	return rc ? rc : count;
 }
@@ -796,7 +766,8 @@ static ssize_t qeth_l3_dev_vipa_del_store(const char *buf, size_t count,
 	mutex_lock(&card->conf_mutex);
 	rc = qeth_l3_parse_vipae(buf, proto, addr);
 	if (!rc)
-		rc = qeth_l3_del_vipa(card, proto, addr);
+		rc = qeth_l3_modify_rxip_vipa(card, false, addr,
+					      QETH_IP_TYPE_VIPA, proto);
 	mutex_unlock(&card->conf_mutex);
 	return rc ? rc : count;
 }
@@ -908,7 +879,8 @@ static ssize_t qeth_l3_dev_rxip_add_store(const char *buf, size_t count,
 	mutex_lock(&card->conf_mutex);
 	rc = qeth_l3_parse_rxipe(buf, proto, addr);
 	if (!rc)
-		rc = qeth_l3_add_rxip(card, proto, addr);
+		rc = qeth_l3_modify_rxip_vipa(card, true, addr,
+					      QETH_IP_TYPE_RXIP, proto);
 	mutex_unlock(&card->conf_mutex);
 	return rc ? rc : count;
 }
@@ -937,7 +909,8 @@ static ssize_t qeth_l3_dev_rxip_del_store(const char *buf, size_t count,
 	mutex_lock(&card->conf_mutex);
 	rc = qeth_l3_parse_rxipe(buf, proto, addr);
 	if (!rc)
-		rc = qeth_l3_del_rxip(card, proto, addr);
+		rc = qeth_l3_modify_rxip_vipa(card, false, addr,
+					      QETH_IP_TYPE_RXIP, proto);
 	mutex_unlock(&card->conf_mutex);
 	return rc ? rc : count;
 }
diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index 6198559abbd8..0ad00dbf912d 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -732,7 +732,7 @@ static int iscsi_sw_tcp_conn_get_param(struct iscsi_cls_conn *cls_conn,
 	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
 	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
 	struct sockaddr_in6 addr;
-	int rc, len;
+	int rc;
 
 	switch(param) {
 	case ISCSI_PARAM_CONN_PORT:
@@ -745,12 +745,12 @@ static int iscsi_sw_tcp_conn_get_param(struct iscsi_cls_conn *cls_conn,
 		}
 		if (param == ISCSI_PARAM_LOCAL_PORT)
 			rc = kernel_getsockname(tcp_sw_conn->sock,
-						(struct sockaddr *)&addr, &len);
+						(struct sockaddr *)&addr);
 		else
 			rc = kernel_getpeername(tcp_sw_conn->sock,
-						(struct sockaddr *)&addr, &len);
+						(struct sockaddr *)&addr);
 		spin_unlock_bh(&conn->session->frwd_lock);
-		if (rc)
+		if (rc < 0)
 			return rc;
 
 		return iscsi_conn_get_addr_param((struct sockaddr_storage *)
@@ -771,7 +771,7 @@ static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost,
 	struct iscsi_tcp_conn *tcp_conn;
 	struct iscsi_sw_tcp_conn *tcp_sw_conn;
 	struct sockaddr_in6 addr;
-	int rc, len;
+	int rc;
 
 	switch (param) {
 	case ISCSI_HOST_PARAM_IPADDRESS:
@@ -793,9 +793,9 @@ static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost,
 		}
 
 		rc = kernel_getsockname(tcp_sw_conn->sock,
-					(struct sockaddr *)&addr, &len);
+					(struct sockaddr *)&addr);
 		spin_unlock_bh(&session->frwd_lock);
-		if (rc)
+		if (rc < 0)
 			return rc;
 
 		return iscsi_conn_get_addr_param((struct sockaddr_storage *)
diff --git a/drivers/soc/qcom/qmi_interface.c b/drivers/soc/qcom/qmi_interface.c
index 877611d5c42b..321982277697 100644
--- a/drivers/soc/qcom/qmi_interface.c
+++ b/drivers/soc/qcom/qmi_interface.c
@@ -586,7 +586,6 @@ static struct socket *qmi_sock_create(struct qmi_handle *qmi,
 				      struct sockaddr_qrtr *sq)
 {
 	struct socket *sock;
-	int sl = sizeof(*sq);
 	int ret;
 
 	ret = sock_create_kern(&init_net, AF_QIPCRTR, SOCK_DGRAM,
@@ -594,7 +593,7 @@ static struct socket *qmi_sock_create(struct qmi_handle *qmi,
 	if (ret < 0)
 		return ERR_PTR(ret);
 
-	ret = kernel_getsockname(sock, (struct sockaddr *)sq, &sl);
+	ret = kernel_getsockname(sock, (struct sockaddr *)sq);
 	if (ret < 0) {
 		sock_release(sock);
 		return ERR_PTR(ret);
diff --git a/drivers/staging/ipx/af_ipx.c b/drivers/staging/ipx/af_ipx.c
index d21a9d128d3e..5703dd176787 100644
--- a/drivers/staging/ipx/af_ipx.c
+++ b/drivers/staging/ipx/af_ipx.c
@@ -1577,7 +1577,7 @@ out:
 
 
 static int ipx_getname(struct socket *sock, struct sockaddr *uaddr,
-			int *uaddr_len, int peer)
+			int peer)
 {
 	struct ipx_address *addr;
 	struct sockaddr_ipx sipx;
@@ -1585,8 +1585,6 @@ static int ipx_getname(struct socket *sock, struct sockaddr *uaddr,
 	struct ipx_sock *ipxs = ipx_sk(sk);
 	int rc;
 
-	*uaddr_len = sizeof(struct sockaddr_ipx);
-
 	lock_sock(sk);
 	if (peer) {
 		rc = -ENOTCONN;
@@ -1620,7 +1618,7 @@ static int ipx_getname(struct socket *sock, struct sockaddr *uaddr,
 	sipx.sipx_zero	 = 0;
 	memcpy(uaddr, &sipx, sizeof(sipx));
 
-	rc = 0;
+	rc = sizeof(struct sockaddr_ipx);
 out:
 	release_sock(sk);
 	return rc;
diff --git a/drivers/staging/irda/net/af_irda.c b/drivers/staging/irda/net/af_irda.c
index 2f1e9ab3d6d0..c13553a9ee11 100644
--- a/drivers/staging/irda/net/af_irda.c
+++ b/drivers/staging/irda/net/af_irda.c
@@ -697,7 +697,7 @@ static int irda_discover_daddr_and_lsap_sel(struct irda_sock *self, char *name)
  *
  */
 static int irda_getname(struct socket *sock, struct sockaddr *uaddr,
-			int *uaddr_len, int peer)
+			int peer)
 {
 	struct sockaddr_irda saddr;
 	struct sock *sk = sock->sk;
@@ -720,11 +720,9 @@ static int irda_getname(struct socket *sock, struct sockaddr *uaddr,
 	pr_debug("%s(), tsap_sel = %#x\n", __func__, saddr.sir_lsap_sel);
 	pr_debug("%s(), addr = %08x\n", __func__, saddr.sir_addr);
 
-	/* uaddr_len come to us uninitialised */
-	*uaddr_len = sizeof (struct sockaddr_irda);
-	memcpy(uaddr, &saddr, *uaddr_len);
+	memcpy(uaddr, &saddr, sizeof (struct sockaddr_irda));
 
-	return 0;
+	return sizeof (struct sockaddr_irda);
 }
 
 /*
diff --git a/drivers/staging/lustre/lnet/lnet/lib-socket.c b/drivers/staging/lustre/lnet/lnet/lib-socket.c
index ce93806eefca..1bee667802b0 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-socket.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-socket.c
@@ -448,14 +448,13 @@ int
 lnet_sock_getaddr(struct socket *sock, bool remote, __u32 *ip, int *port)
 {
 	struct sockaddr_in sin;
-	int len = sizeof(sin);
 	int rc;
 
 	if (remote)
-		rc = kernel_getpeername(sock, (struct sockaddr *)&sin, &len);
+		rc = kernel_getpeername(sock, (struct sockaddr *)&sin);
 	else
-		rc = kernel_getsockname(sock, (struct sockaddr *)&sin, &len);
-	if (rc) {
+		rc = kernel_getsockname(sock, (struct sockaddr *)&sin);
+	if (rc < 0) {
 		CERROR("Error %d getting sock %s IP/port\n",
 		       rc, remote ? "peer" : "local");
 		return rc;
diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
index 64c5a57b92e4..99501785cdc1 100644
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c
@@ -1020,7 +1020,7 @@ int iscsit_accept_np(struct iscsi_np *np, struct iscsi_conn *conn)
 	struct socket *new_sock, *sock = np->np_socket;
 	struct sockaddr_in sock_in;
 	struct sockaddr_in6 sock_in6;
-	int rc, err;
+	int rc;
 
 	rc = kernel_accept(sock, &new_sock, 0);
 	if (rc < 0)
@@ -1033,8 +1033,8 @@ int iscsit_accept_np(struct iscsi_np *np, struct iscsi_conn *conn)
 		memset(&sock_in6, 0, sizeof(struct sockaddr_in6));
 
 		rc = conn->sock->ops->getname(conn->sock,
-				(struct sockaddr *)&sock_in6, &err, 1);
-		if (!rc) {
+				(struct sockaddr *)&sock_in6, 1);
+		if (rc >= 0) {
 			if (!ipv6_addr_v4mapped(&sock_in6.sin6_addr)) {
 				memcpy(&conn->login_sockaddr, &sock_in6, sizeof(sock_in6));
 			} else {
@@ -1047,8 +1047,8 @@ int iscsit_accept_np(struct iscsi_np *np, struct iscsi_conn *conn)
 		}
 
 		rc = conn->sock->ops->getname(conn->sock,
-				(struct sockaddr *)&sock_in6, &err, 0);
-		if (!rc) {
+				(struct sockaddr *)&sock_in6, 0);
+		if (rc >= 0) {
 			if (!ipv6_addr_v4mapped(&sock_in6.sin6_addr)) {
 				memcpy(&conn->local_sockaddr, &sock_in6, sizeof(sock_in6));
 			} else {
@@ -1063,13 +1063,13 @@ int iscsit_accept_np(struct iscsi_np *np, struct iscsi_conn *conn)
 		memset(&sock_in, 0, sizeof(struct sockaddr_in));
 
 		rc = conn->sock->ops->getname(conn->sock,
-				(struct sockaddr *)&sock_in, &err, 1);
-		if (!rc)
+				(struct sockaddr *)&sock_in, 1);
+		if (rc >= 0)
 			memcpy(&conn->login_sockaddr, &sock_in, sizeof(sock_in));
 
 		rc = conn->sock->ops->getname(conn->sock,
-				(struct sockaddr *)&sock_in, &err, 0);
-		if (!rc)
+				(struct sockaddr *)&sock_in, 0);
+		if (rc >= 0)
 			memcpy(&conn->local_sockaddr, &sock_in, sizeof(sock_in));
 	}
 
diff --git a/drivers/usb/gadget/function/f_eem.c b/drivers/usb/gadget/function/f_eem.c
index 37557651b600..c13befa31110 100644
--- a/drivers/usb/gadget/function/f_eem.c
+++ b/drivers/usb/gadget/function/f_eem.c
@@ -507,7 +507,6 @@ static int eem_unwrap(struct gether *port,
 						0,
 						GFP_ATOMIC);
 			if (unlikely(!skb3)) {
-				DBG(cdev, "unable to realign EEM packet\n");
 				dev_kfree_skb_any(skb2);
 				continue;
 			}
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 8139bc70ad7d..a31d9b240af8 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -1040,7 +1040,7 @@ static struct socket *get_raw_socket(int fd)
 		struct sockaddr_ll sa;
 		char  buf[MAX_ADDR_LEN];
 	} uaddr;
-	int uaddr_len = sizeof uaddr, r;
+	int r;
 	struct socket *sock = sockfd_lookup(fd, &r);
 
 	if (!sock)
@@ -1052,9 +1052,8 @@ static struct socket *get_raw_socket(int fd)
 		goto err;
 	}
 
-	r = sock->ops->getname(sock, (struct sockaddr *)&uaddr.sa,
-			       &uaddr_len, 0);
-	if (r)
+	r = sock->ops->getname(sock, (struct sockaddr *)&uaddr.sa, 0);
+	if (r < 0)
 		goto err;
 
 	if (uaddr.sa.sll_family != AF_PACKET) {
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 0d14e2ff19f1..0898dbdbf955 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -61,9 +61,9 @@ static struct vhost_vsock *__vhost_vsock_get(u32 guest_cid)
 		if (other_cid == 0)
 			continue;
 
-		if (other_cid == guest_cid) {
+		if (other_cid == guest_cid)
 			return vsock;
-		}
+
 	}
 
 	return NULL;
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index cff79ea0c01d..5243989a60cc 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -482,7 +482,6 @@ static void lowcomms_error_report(struct sock *sk)
 {
 	struct connection *con;
 	struct sockaddr_storage saddr;
-	int buflen;
 	void (*orig_report)(struct sock *) = NULL;
 
 	read_lock_bh(&sk->sk_callback_lock);
@@ -492,7 +491,7 @@ static void lowcomms_error_report(struct sock *sk)
 
 	orig_report = listen_sock.sk_error_report;
 	if (con->sock == NULL ||
-	    kernel_getpeername(con->sock, (struct sockaddr *)&saddr, &buflen)) {
+	    kernel_getpeername(con->sock, (struct sockaddr *)&saddr) < 0) {
 		printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
 				   "sending to node %d, port %d, "
 				   "sk_err=%d/%d\n", dlm_our_nodeid(),
@@ -757,8 +756,8 @@ static int tcp_accept_from_sock(struct connection *con)
 
 	/* Get the connected socket's peer */
 	memset(&peeraddr, 0, sizeof(peeraddr));
-	if (newsock->ops->getname(newsock, (struct sockaddr *)&peeraddr,
-				  &len, 2)) {
+	len = newsock->ops->getname(newsock, (struct sockaddr *)&peeraddr, 2);
+	if (len < 0) {
 		result = -ECONNABORTED;
 		goto accept_err;
 	}
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 9c36d614bf89..2dee4e03ff1c 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -709,6 +709,7 @@ static struct pernet_operations lockd_net_ops = {
 	.exit = lockd_exit_net,
 	.id = &lockd_net_id,
 	.size = sizeof(struct lockd_net),
+	.async = true,
 };
 
 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 7d893543cf3b..6c3083c992e5 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -2122,6 +2122,7 @@ static struct pernet_operations nfs_net_ops = {
 	.exit = nfs_net_exit,
 	.id   = &nfs_net_id,
 	.size = sizeof(struct nfs_net),
+	.async = true,
 };
 
 /*
diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c
index 5be08f02a76b..8c743a405df6 100644
--- a/fs/nfs_common/grace.c
+++ b/fs/nfs_common/grace.c
@@ -118,6 +118,7 @@ static struct pernet_operations grace_net_ops = {
 	.exit = grace_exit_net,
 	.id   = &grace_net_id,
 	.size = sizeof(struct list_head),
+	.async = true,
 };
 
 static int __init
diff --git a/fs/nsfs.c b/fs/nsfs.c
index 36b0772701a0..60702d677bd4 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -184,6 +184,7 @@ int open_related_ns(struct ns_common *ns,
 
 	return fd;
 }
+EXPORT_SYMBOL_GPL(open_related_ns);
 
 static long ns_ioctl(struct file *filp, unsigned int ioctl,
 			unsigned long arg)
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index eac5140aac47..e5076185cc1e 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -1819,7 +1819,7 @@ int o2net_register_hb_callbacks(void)
 
 static int o2net_accept_one(struct socket *sock, int *more)
 {
-	int ret, slen;
+	int ret;
 	struct sockaddr_in sin;
 	struct socket *new_sock = NULL;
 	struct o2nm_node *node = NULL;
@@ -1864,9 +1864,7 @@ static int o2net_accept_one(struct socket *sock, int *more)
 		goto out;
 	}
 
-	slen = sizeof(sin);
-	ret = new_sock->ops->getname(new_sock, (struct sockaddr *) &sin,
-				       &slen, 1);
+	ret = new_sock->ops->getname(new_sock, (struct sockaddr *) &sin, 1);
 	if (ret < 0)
 		goto out;
 
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 68c06ae7888c..da6f8733c9c5 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -237,6 +237,7 @@ static __net_exit void proc_net_ns_exit(struct net *net)
 static struct pernet_operations __net_initdata proc_net_ns_ops = {
 	.init = proc_net_ns_init,
 	.exit = proc_net_ns_exit,
+	.async = true,
 };
 
 int __init proc_net_init(void)
diff --git a/include/dt-bindings/net/ti-dp83867.h b/include/dt-bindings/net/ti-dp83867.h
index 172744a72eb7..7b1656427cbe 100644
--- a/include/dt-bindings/net/ti-dp83867.h
+++ b/include/dt-bindings/net/ti-dp83867.h
@@ -42,4 +42,18 @@
 #define	DP83867_RGMIIDCTL_3_75_NS	0xe
 #define	DP83867_RGMIIDCTL_4_00_NS	0xf
 
+/* IO_MUX_CFG - Clock output selection */
+#define DP83867_CLK_O_SEL_CHN_A_RCLK		0x0
+#define DP83867_CLK_O_SEL_CHN_B_RCLK		0x1
+#define DP83867_CLK_O_SEL_CHN_C_RCLK		0x2
+#define DP83867_CLK_O_SEL_CHN_D_RCLK		0x3
+#define DP83867_CLK_O_SEL_CHN_A_RCLK_DIV5	0x4
+#define DP83867_CLK_O_SEL_CHN_B_RCLK_DIV5	0x5
+#define DP83867_CLK_O_SEL_CHN_C_RCLK_DIV5	0x6
+#define DP83867_CLK_O_SEL_CHN_D_RCLK_DIV5	0x7
+#define DP83867_CLK_O_SEL_CHN_A_TCLK		0x8
+#define DP83867_CLK_O_SEL_CHN_B_TCLK		0x9
+#define DP83867_CLK_O_SEL_CHN_C_TCLK		0xA
+#define DP83867_CLK_O_SEL_CHN_D_TCLK		0xB
+#define DP83867_CLK_O_SEL_REF_CLK		0xC
 #endif
diff --git a/include/linux/atalk.h b/include/linux/atalk.h
index 4d356e168692..40373920ea58 100644
--- a/include/linux/atalk.h
+++ b/include/linux/atalk.h
@@ -113,10 +113,12 @@ extern void aarp_proto_init(void);
 /* Inter module exports */
 
 /* Give a device find its atif control structure */
+#if IS_ENABLED(CONFIG_IRDA) || IS_ENABLED(CONFIG_ATALK)
 static inline struct atalk_iface *atalk_find_dev(struct net_device *dev)
 {
 	return dev->atalk_ptr;
 }
+#endif
 
 extern struct atalk_addr *atalk_find_dev_addr(struct net_device *dev);
 extern struct net_device *atrtr_get_dev(struct atalk_addr *sa);
diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index 3ce61342fa31..b0a7f315bfbe 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -136,15 +136,21 @@ enum virtchnl_ops {
 	VIRTCHNL_OP_ENABLE_VLAN_STRIPPING = 27,
 	VIRTCHNL_OP_DISABLE_VLAN_STRIPPING = 28,
 	VIRTCHNL_OP_REQUEST_QUEUES = 29,
+	VIRTCHNL_OP_ENABLE_CHANNELS = 30,
+	VIRTCHNL_OP_DISABLE_CHANNELS = 31,
+	VIRTCHNL_OP_ADD_CLOUD_FILTER = 32,
+	VIRTCHNL_OP_DEL_CLOUD_FILTER = 33,
 };
 
-/* This macro is used to generate a compilation error if a structure
+/* These macros are used to generate compilation errors if a structure/union
  * is not exactly the correct length. It gives a divide by zero error if the
- * structure is not of the correct size, otherwise it creates an enum that is
- * never used.
+ * structure/union is not of the correct size, otherwise it creates an enum
+ * that is never used.
  */
 #define VIRTCHNL_CHECK_STRUCT_LEN(n, X) enum virtchnl_static_assert_enum_##X \
 	{ virtchnl_static_assert_##X = (n)/((sizeof(struct X) == (n)) ? 1 : 0) }
+#define VIRTCHNL_CHECK_UNION_LEN(n, X) enum virtchnl_static_asset_enum_##X \
+	{ virtchnl_static_assert_##X = (n)/((sizeof(union X) == (n)) ? 1 : 0) }
 
 /* Virtual channel message descriptor. This overlays the admin queue
  * descriptor. All other data is passed in external buffers.
@@ -244,6 +250,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource);
 #define VIRTCHNL_VF_OFFLOAD_ENCAP		0X00100000
 #define VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM		0X00200000
 #define VIRTCHNL_VF_OFFLOAD_RX_ENCAP_CSUM	0X00400000
+#define VIRTCHNL_VF_OFFLOAD_ADQ			0X00800000
 
 #define VF_BASE_MODE_OFFLOADS (VIRTCHNL_VF_OFFLOAD_L2 | \
 			       VIRTCHNL_VF_OFFLOAD_VLAN | \
@@ -496,6 +503,81 @@ struct virtchnl_rss_hena {
 
 VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_rss_hena);
 
+/* VIRTCHNL_OP_ENABLE_CHANNELS
+ * VIRTCHNL_OP_DISABLE_CHANNELS
+ * VF sends these messages to enable or disable channels based on
+ * the user specified queue count and queue offset for each traffic class.
+ * This struct encompasses all the information that the PF needs from
+ * VF to create a channel.
+ */
+struct virtchnl_channel_info {
+	u16 count; /* number of queues in a channel */
+	u16 offset; /* queues in a channel start from 'offset' */
+	u32 pad;
+	u64 max_tx_rate;
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_channel_info);
+
+struct virtchnl_tc_info {
+	u32	num_tc;
+	u32	pad;
+	struct	virtchnl_channel_info list[1];
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(24, virtchnl_tc_info);
+
+/* VIRTCHNL_ADD_CLOUD_FILTER
+ * VIRTCHNL_DEL_CLOUD_FILTER
+ * VF sends these messages to add or delete a cloud filter based on the
+ * user specified match and action filters. These structures encompass
+ * all the information that the PF needs from the VF to add/delete a
+ * cloud filter.
+ */
+
+struct virtchnl_l4_spec {
+	u8	src_mac[ETH_ALEN];
+	u8	dst_mac[ETH_ALEN];
+	__be16	vlan_id;
+	__be16	pad; /* reserved for future use */
+	__be32	src_ip[4];
+	__be32	dst_ip[4];
+	__be16	src_port;
+	__be16	dst_port;
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(52, virtchnl_l4_spec);
+
+union virtchnl_flow_spec {
+	struct	virtchnl_l4_spec tcp_spec;
+	u8	buffer[128]; /* reserved for future use */
+};
+
+VIRTCHNL_CHECK_UNION_LEN(128, virtchnl_flow_spec);
+
+enum virtchnl_action {
+	/* action types */
+	VIRTCHNL_ACTION_DROP = 0,
+	VIRTCHNL_ACTION_TC_REDIRECT,
+};
+
+enum virtchnl_flow_type {
+	/* flow types */
+	VIRTCHNL_TCP_V4_FLOW = 0,
+	VIRTCHNL_TCP_V6_FLOW,
+};
+
+struct virtchnl_filter {
+	union	virtchnl_flow_spec data;
+	union	virtchnl_flow_spec mask;
+	enum	virtchnl_flow_type flow_type;
+	enum	virtchnl_action action;
+	u32	action_meta;
+	__u8	field_flags;
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(272, virtchnl_filter);
+
 /* VIRTCHNL_OP_EVENT
  * PF sends this message to inform the VF driver of events that may affect it.
  * No direct response is expected from the VF, though it may generate other
@@ -711,6 +793,25 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
 	case VIRTCHNL_OP_REQUEST_QUEUES:
 		valid_len = sizeof(struct virtchnl_vf_res_request);
 		break;
+	case VIRTCHNL_OP_ENABLE_CHANNELS:
+		valid_len = sizeof(struct virtchnl_tc_info);
+		if (msglen >= valid_len) {
+			struct virtchnl_tc_info *vti =
+				(struct virtchnl_tc_info *)msg;
+			valid_len += vti->num_tc *
+				sizeof(struct virtchnl_channel_info);
+			if (vti->num_tc == 0)
+				err_msg_format = true;
+		}
+		break;
+	case VIRTCHNL_OP_DISABLE_CHANNELS:
+		break;
+	case VIRTCHNL_OP_ADD_CLOUD_FILTER:
+		valid_len = sizeof(struct virtchnl_filter);
+		break;
+	case VIRTCHNL_OP_DEL_CLOUD_FILTER:
+		valid_len = sizeof(struct virtchnl_filter);
+		break;
 	/* These are always errors coming from the VF. */
 	case VIRTCHNL_OP_EVENT:
 	case VIRTCHNL_OP_UNKNOWN:
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index a7f16e0f8d68..8a4566691c8f 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -96,7 +96,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
 #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk)				       \
 ({									       \
 	int __ret = 0;							       \
-	if (cgroup_bpf_enabled && sk) {					       \
+	if (cgroup_bpf_enabled) {					       \
 		__ret = __cgroup_bpf_run_filter_sk(sk,			       \
 						 BPF_CGROUP_INET_SOCK_CREATE); \
 	}								       \
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 66df387106de..819229c80eca 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -21,6 +21,7 @@ struct bpf_verifier_env;
 struct perf_event;
 struct bpf_prog;
 struct bpf_map;
+struct sock;
 
 /* map is generic key/value storage optionally accesible by eBPF programs */
 struct bpf_map_ops {
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 19b8349a3809..5e2e8a49fb21 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -13,6 +13,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout)
 BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit)
 BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops)
 BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb)
+BPF_PROG_TYPE(BPF_PROG_TYPE_SK_MSG, sk_msg)
 #endif
 #ifdef CONFIG_BPF_EVENTS
 BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe)
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 2ec41a7eb54f..ebe41811ed34 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -371,6 +371,11 @@ struct ethtool_ops {
 			    u8 *hfunc);
 	int	(*set_rxfh)(struct net_device *, const u32 *indir,
 			    const u8 *key, const u8 hfunc);
+	int	(*get_rxfh_context)(struct net_device *, u32 *indir, u8 *key,
+				    u8 *hfunc, u32 rss_context);
+	int	(*set_rxfh_context)(struct net_device *, const u32 *indir,
+				    const u8 *key, const u8 hfunc,
+				    u32 *rss_context, bool delete);
 	void	(*get_channels)(struct net_device *, struct ethtool_channels *);
 	int	(*set_channels)(struct net_device *, struct ethtool_channels *);
 	int	(*get_dump_flag)(struct net_device *, struct ethtool_dump *);
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 276932d75975..109d05ccea9a 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -20,7 +20,6 @@
 #include <linux/set_memory.h>
 #include <linux/kallsyms.h>
 
-#include <net/xdp.h>
 #include <net/sch_generic.h>
 
 #include <uapi/linux/filter.h>
@@ -30,6 +29,7 @@ struct sk_buff;
 struct sock;
 struct seccomp_data;
 struct bpf_prog_aux;
+struct xdp_rxq_info;
 
 /* ArgX, context and stack frame pointer register positions. Note,
  * Arg1, Arg2, Arg3, etc are used as argument mappings of function
@@ -507,6 +507,22 @@ struct xdp_buff {
 	struct xdp_rxq_info *rxq;
 };
 
+struct sk_msg_buff {
+	void *data;
+	void *data_end;
+	__u32 apply_bytes;
+	__u32 cork_bytes;
+	int sg_copybreak;
+	int sg_start;
+	int sg_curr;
+	int sg_end;
+	struct scatterlist sg_data[MAX_SKB_FRAGS];
+	bool sg_copy[MAX_SKB_FRAGS];
+	__u32 key;
+	__u32 flags;
+	struct bpf_map *map;
+};
+
 /* Compute the linear packet data range [data, data_end) which
  * will be accessed by various program types (cls_bpf, act_bpf,
  * lwt, ...). Subsystems allowing direct data access must (!)
@@ -771,6 +787,7 @@ xdp_data_meta_unsupported(const struct xdp_buff *xdp)
 void bpf_warn_invalid_xdp_action(u32 act);
 
 struct sock *do_sk_redirect_map(struct sk_buff *skb);
+struct sock *do_msg_redirect_map(struct sk_msg_buff *md);
 
 #ifdef CONFIG_BPF_JIT
 extern int bpf_jit_enable;
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index ee6657a0ed69..8fe7e4306816 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -8,6 +8,7 @@
  * Copyright (c) 2006, Michael Wu <flamingice@sourmilk.net>
  * Copyright (c) 2013 - 2014 Intel Mobile Communications GmbH
  * Copyright (c) 2016 - 2017 Intel Deutschland GmbH
+ * Copyright (c) 2018        Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -2111,7 +2112,7 @@ enum ieee80211_key_len {
 #define FILS_ERP_MAX_REALM_LEN		253
 #define FILS_ERP_MAX_RRK_LEN		64
 
-#define PMK_MAX_LEN			48
+#define PMK_MAX_LEN			64
 
 /* Public action codes (IEEE Std 802.11-2016, 9.6.8.1, Table 9-307) */
 enum ieee80211_pub_actioncode {
@@ -2502,6 +2503,17 @@ static inline u8 *ieee80211_get_qos_ctl(struct ieee80211_hdr *hdr)
 }
 
 /**
+ * ieee80211_get_tid - get qos TID
+ * @hdr: the frame
+ */
+static inline u8 ieee80211_get_tid(struct ieee80211_hdr *hdr)
+{
+	u8 *qc = ieee80211_get_qos_ctl(hdr);
+
+	return qc[0] & IEEE80211_QOS_CTL_TID_MASK;
+}
+
+/**
  * ieee80211_get_SA - get pointer to SA
  * @hdr: the frame
  *
diff --git a/include/linux/mlx5/accel.h b/include/linux/mlx5/accel.h
new file mode 100644
index 000000000000..70e7e5673ce9
--- /dev/null
+++ b/include/linux/mlx5/accel.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5_ACCEL_H__
+#define __MLX5_ACCEL_H__
+
+#include <linux/mlx5/driver.h>
+
+enum mlx5_accel_esp_aes_gcm_keymat_iv_algo {
+	MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ,
+};
+
+enum mlx5_accel_esp_flags {
+	MLX5_ACCEL_ESP_FLAGS_TUNNEL            = 0,    /* Default */
+	MLX5_ACCEL_ESP_FLAGS_TRANSPORT         = 1UL << 0,
+	MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED     = 1UL << 1,
+	MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP = 1UL << 2,
+};
+
+enum mlx5_accel_esp_action {
+	MLX5_ACCEL_ESP_ACTION_DECRYPT,
+	MLX5_ACCEL_ESP_ACTION_ENCRYPT,
+};
+
+enum mlx5_accel_esp_keymats {
+	MLX5_ACCEL_ESP_KEYMAT_AES_NONE,
+	MLX5_ACCEL_ESP_KEYMAT_AES_GCM,
+};
+
+enum mlx5_accel_esp_replay {
+	MLX5_ACCEL_ESP_REPLAY_NONE,
+	MLX5_ACCEL_ESP_REPLAY_BMP,
+};
+
+struct aes_gcm_keymat {
+	u64   seq_iv;
+	enum mlx5_accel_esp_aes_gcm_keymat_iv_algo iv_algo;
+
+	u32   salt;
+	u32   icv_len;
+
+	u32   key_len;
+	u32   aes_key[256 / 32];
+};
+
+struct mlx5_accel_esp_xfrm_attrs {
+	enum mlx5_accel_esp_action action;
+	u32   esn;
+	u32   spi;
+	u32   seq;
+	u32   tfc_pad;
+	u32   flags;
+	u32   sa_handle;
+	enum mlx5_accel_esp_replay replay_type;
+	union {
+		struct {
+			u32 size;
+
+		} bmp;
+	} replay;
+	enum mlx5_accel_esp_keymats keymat_type;
+	union {
+		struct aes_gcm_keymat aes_gcm;
+	} keymat;
+};
+
+struct mlx5_accel_esp_xfrm {
+	struct mlx5_core_dev  *mdev;
+	struct mlx5_accel_esp_xfrm_attrs attrs;
+};
+
+enum {
+	MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA = 1UL << 0,
+};
+
+enum mlx5_accel_ipsec_cap {
+	MLX5_ACCEL_IPSEC_CAP_DEVICE		= 1 << 0,
+	MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA	= 1 << 1,
+	MLX5_ACCEL_IPSEC_CAP_ESP		= 1 << 2,
+	MLX5_ACCEL_IPSEC_CAP_IPV6		= 1 << 3,
+	MLX5_ACCEL_IPSEC_CAP_LSO		= 1 << 4,
+	MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER	= 1 << 5,
+	MLX5_ACCEL_IPSEC_CAP_ESN		= 1 << 6,
+	MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN	= 1 << 7,
+};
+
+#ifdef CONFIG_MLX5_ACCEL
+
+u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev);
+
+struct mlx5_accel_esp_xfrm *
+mlx5_accel_esp_create_xfrm(struct mlx5_core_dev *mdev,
+			   const struct mlx5_accel_esp_xfrm_attrs *attrs,
+			   u32 flags);
+void mlx5_accel_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm);
+int mlx5_accel_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
+			       const struct mlx5_accel_esp_xfrm_attrs *attrs);
+
+#else
+
+static inline u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev) { return 0; }
+
+static inline struct mlx5_accel_esp_xfrm *
+mlx5_accel_esp_create_xfrm(struct mlx5_core_dev *mdev,
+			   const struct mlx5_accel_esp_xfrm_attrs *attrs,
+			   u32 flags) { return ERR_PTR(-EOPNOTSUPP); }
+static inline void
+mlx5_accel_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm) {}
+static inline int
+mlx5_accel_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
+			   const struct mlx5_accel_esp_xfrm_attrs *attrs) { return -EOPNOTSUPP; }
+
+#endif
+#endif
diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h
index 48c181a2acc9..445ad194e0fe 100644
--- a/include/linux/mlx5/cq.h
+++ b/include/linux/mlx5/cq.h
@@ -60,6 +60,7 @@ struct mlx5_core_cq {
 	} tasklet_ctx;
 	int			reset_notify_added;
 	struct list_head	reset_notify;
+	struct mlx5_eq		*eq;
 };
 
 
@@ -171,8 +172,17 @@ static inline void mlx5_cq_arm(struct mlx5_core_cq *cq, u32 cmd,
 	mlx5_write64(doorbell, uar_page + MLX5_CQ_DOORBELL, NULL);
 }
 
-int mlx5_init_cq_table(struct mlx5_core_dev *dev);
-void mlx5_cleanup_cq_table(struct mlx5_core_dev *dev);
+static inline void mlx5_cq_hold(struct mlx5_core_cq *cq)
+{
+	refcount_inc(&cq->refcount);
+}
+
+static inline void mlx5_cq_put(struct mlx5_core_cq *cq)
+{
+	if (refcount_dec_and_test(&cq->refcount))
+		complete(&cq->free);
+}
+
 int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
 			u32 *in, int inlen);
 int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 9d3a03364e6e..cded85ab6fe4 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -345,13 +345,6 @@ struct mlx5_buf_list {
 	dma_addr_t		map;
 };
 
-struct mlx5_buf {
-	struct mlx5_buf_list	direct;
-	int			npages;
-	int			size;
-	u8			page_shift;
-};
-
 struct mlx5_frag_buf {
 	struct mlx5_buf_list	*frags;
 	int			npages;
@@ -359,6 +352,15 @@ struct mlx5_frag_buf {
 	u8			page_shift;
 };
 
+struct mlx5_frag_buf_ctrl {
+	struct mlx5_frag_buf	frag_buf;
+	u32			sz_m1;
+	u32			frag_sz_m1;
+	u8			log_sz;
+	u8			log_stride;
+	u8			log_frag_strides;
+};
+
 struct mlx5_eq_tasklet {
 	struct list_head list;
 	struct list_head process_list;
@@ -375,11 +377,18 @@ struct mlx5_eq_pagefault {
 	mempool_t		*pool;
 };
 
+struct mlx5_cq_table {
+	/* protect radix tree */
+	spinlock_t		lock;
+	struct radix_tree_root	tree;
+};
+
 struct mlx5_eq {
 	struct mlx5_core_dev   *dev;
+	struct mlx5_cq_table	cq_table;
 	__be32 __iomem	       *doorbell;
 	u32			cons_index;
-	struct mlx5_buf		buf;
+	struct mlx5_frag_buf	buf;
 	int			size;
 	unsigned int		irqn;
 	u8			eqn;
@@ -526,13 +535,6 @@ struct mlx5_core_health {
 	struct delayed_work		recover_work;
 };
 
-struct mlx5_cq_table {
-	/* protect radix tree
-	 */
-	spinlock_t		lock;
-	struct radix_tree_root	tree;
-};
-
 struct mlx5_qp_table {
 	/* protect radix tree
 	 */
@@ -654,10 +656,6 @@ struct mlx5_priv {
 	struct dentry	       *cmdif_debugfs;
 	/* end: qp staff */
 
-	/* start: cq staff */
-	struct mlx5_cq_table	cq_table;
-	/* end: cq staff */
-
 	/* start: mkey staff */
 	struct mlx5_mkey_table	mkey_table;
 	/* end: mkey staff */
@@ -936,9 +934,9 @@ struct mlx5_hca_vport_context {
 	bool			grh_required;
 };
 
-static inline void *mlx5_buf_offset(struct mlx5_buf *buf, int offset)
+static inline void *mlx5_buf_offset(struct mlx5_frag_buf *buf, int offset)
 {
-		return buf->direct.buf + offset;
+		return buf->frags->buf + offset;
 }
 
 #define STRUCT_FIELD(header, field) \
@@ -977,6 +975,25 @@ static inline u32 mlx5_base_mkey(const u32 key)
 	return key & 0xffffff00u;
 }
 
+static inline void mlx5_core_init_cq_frag_buf(struct mlx5_frag_buf_ctrl *fbc,
+					      void *cqc)
+{
+	fbc->log_stride	= 6 + MLX5_GET(cqc, cqc, cqe_sz);
+	fbc->log_sz	= MLX5_GET(cqc, cqc, log_cq_size);
+	fbc->sz_m1	= (1 << fbc->log_sz) - 1;
+	fbc->log_frag_strides = PAGE_SHIFT - fbc->log_stride;
+	fbc->frag_sz_m1	= (1 << fbc->log_frag_strides) - 1;
+}
+
+static inline void *mlx5_frag_buf_get_wqe(struct mlx5_frag_buf_ctrl *fbc,
+					  u32 ix)
+{
+	unsigned int frag = (ix >> fbc->log_frag_strides);
+
+	return fbc->frag_buf.frags[frag].buf +
+		((fbc->frag_sz_m1 & ix) << fbc->log_stride);
+}
+
 int mlx5_cmd_init(struct mlx5_core_dev *dev);
 void mlx5_cmd_cleanup(struct mlx5_core_dev *dev);
 void mlx5_cmd_use_events(struct mlx5_core_dev *dev);
@@ -1002,9 +1019,10 @@ void mlx5_drain_health_wq(struct mlx5_core_dev *dev);
 void mlx5_trigger_health_work(struct mlx5_core_dev *dev);
 void mlx5_drain_health_recovery(struct mlx5_core_dev *dev);
 int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size,
-			struct mlx5_buf *buf, int node);
-int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf);
-void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf);
+			struct mlx5_frag_buf *buf, int node);
+int mlx5_buf_alloc(struct mlx5_core_dev *dev,
+		   int size, struct mlx5_frag_buf *buf);
+void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf);
 int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size,
 			     struct mlx5_frag_buf *buf, int node);
 void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf);
@@ -1049,22 +1067,12 @@ int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot);
 int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev);
 void mlx5_register_debugfs(void);
 void mlx5_unregister_debugfs(void);
-int mlx5_eq_init(struct mlx5_core_dev *dev);
-void mlx5_eq_cleanup(struct mlx5_core_dev *dev);
-void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas);
+
+void mlx5_fill_page_array(struct mlx5_frag_buf *buf, __be64 *pas);
 void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas);
-void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn);
 void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type);
 void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type);
 struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn);
-void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced);
-void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type);
-int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
-		       int nent, u64 mask, const char *name,
-		       enum mlx5_eq_type type);
-int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
-int mlx5_start_eqs(struct mlx5_core_dev *dev);
-void mlx5_stop_eqs(struct mlx5_core_dev *dev);
 int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
 		    unsigned int *irqn);
 int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn);
@@ -1076,14 +1084,6 @@ int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
 			 int size_in, void *data_out, int size_out,
 			 u16 reg_num, int arg, int write);
 
-int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
-void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
-int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
-		       u32 *out, int outlen);
-int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev);
-void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev);
-int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev);
-void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev);
 int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db);
 int mlx5_db_alloc_node(struct mlx5_core_dev *dev, struct mlx5_db *db,
 		       int node);
@@ -1224,6 +1224,12 @@ static inline int mlx5_core_is_pf(struct mlx5_core_dev *dev)
 	return !(dev->priv.pci_dev_data & MLX5_PCI_DEV_IS_VF);
 }
 
+#define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs((mdev)->pdev))
+#define MLX5_VPORT_MANAGER(mdev) \
+	(MLX5_CAP_GEN(mdev, vport_group_manager) && \
+	 (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \
+	 mlx5_core_is_pf(mdev))
+
 static inline int mlx5_get_gid_table_len(u16 param)
 {
 	if (param > 4) {
diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h
new file mode 100644
index 000000000000..d3c9db492b30
--- /dev/null
+++ b/include/linux/mlx5/eswitch.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef _MLX5_ESWITCH_
+#define _MLX5_ESWITCH_
+
+#include <linux/mlx5/driver.h>
+
+enum {
+	SRIOV_NONE,
+	SRIOV_LEGACY,
+	SRIOV_OFFLOADS
+};
+
+enum {
+	REP_ETH,
+	REP_IB,
+	NUM_REP_TYPES,
+};
+
+struct mlx5_eswitch_rep;
+struct mlx5_eswitch_rep_if {
+	int		       (*load)(struct mlx5_core_dev *dev,
+				       struct mlx5_eswitch_rep *rep);
+	void		       (*unload)(struct mlx5_eswitch_rep *rep);
+	void		       *(*get_proto_dev)(struct mlx5_eswitch_rep *rep);
+	void			*priv;
+	bool		       valid;
+};
+
+struct mlx5_eswitch_rep {
+	struct mlx5_eswitch_rep_if rep_if[NUM_REP_TYPES];
+	u16		       vport;
+	u8		       hw_id[ETH_ALEN];
+	u16		       vlan;
+	u32		       vlan_refcount;
+};
+
+void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
+				     int vport_index,
+				     struct mlx5_eswitch_rep_if *rep_if,
+				     u8 rep_type);
+void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
+				       int vport_index,
+				       u8 rep_type);
+void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
+				 int vport,
+				 u8 rep_type);
+struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
+						int vport);
+void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type);
+u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw);
+struct mlx5_flow_handle *
+mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw,
+				    int vport, u32 sqn);
+#endif
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index a0b48afcb422..b957e52434f8 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -40,6 +40,8 @@
 
 enum {
 	MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO	= 1 << 16,
+	MLX5_FLOW_CONTEXT_ACTION_ENCRYPT	= 1 << 17,
+	MLX5_FLOW_CONTEXT_ACTION_DECRYPT	= 1 << 18,
 };
 
 enum {
@@ -69,6 +71,7 @@ enum mlx5_flow_namespace_type {
 	MLX5_FLOW_NAMESPACE_ESW_INGRESS,
 	MLX5_FLOW_NAMESPACE_SNIFFER_RX,
 	MLX5_FLOW_NAMESPACE_SNIFFER_TX,
+	MLX5_FLOW_NAMESPACE_EGRESS,
 };
 
 struct mlx5_flow_table;
@@ -141,9 +144,11 @@ void mlx5_destroy_flow_group(struct mlx5_flow_group *fg);
 
 struct mlx5_flow_act {
 	u32 action;
+	bool has_flow_tag;
 	u32 flow_tag;
 	u32 encap_id;
 	u32 modify_id;
+	uintptr_t esp_id;
 };
 
 #define MLX5_DECLARE_FLOW_ACT(name) \
diff --git a/include/linux/mlx5/fs_helpers.h b/include/linux/mlx5/fs_helpers.h
new file mode 100644
index 000000000000..7b476bbae731
--- /dev/null
+++ b/include/linux/mlx5/fs_helpers.h
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _MLX5_FS_HELPERS_
+#define _MLX5_FS_HELPERS_
+
+#include <linux/mlx5/mlx5_ifc.h>
+
+#define MLX5_FS_IPV4_VERSION 4
+#define MLX5_FS_IPV6_VERSION 6
+
+static inline bool _mlx5_fs_is_outer_ipproto_flow(const u32 *match_c,
+						  const u32 *match_v, u8 match)
+{
+	const void *headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
+					     outer_headers);
+	const void *headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
+					     outer_headers);
+
+	return MLX5_GET(fte_match_set_lyr_2_4, headers_c, ip_protocol) == 0xff &&
+		MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol) == match;
+}
+
+static inline bool mlx5_fs_is_outer_tcp_flow(const u32 *match_c,
+					     const u32 *match_v)
+{
+	return _mlx5_fs_is_outer_ipproto_flow(match_c, match_v, IPPROTO_TCP);
+}
+
+static inline bool mlx5_fs_is_outer_udp_flow(const u32 *match_c,
+					     const u32 *match_v)
+{
+	return _mlx5_fs_is_outer_ipproto_flow(match_c, match_v, IPPROTO_UDP);
+}
+
+static inline bool mlx5_fs_is_vxlan_flow(const u32 *match_c)
+{
+	void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
+					   misc_parameters);
+
+	return MLX5_GET(fte_match_set_misc, misc_params_c, vxlan_vni);
+}
+
+static inline bool _mlx5_fs_is_outer_ipv_flow(struct mlx5_core_dev *mdev,
+					      const u32 *match_c,
+					      const u32 *match_v, int version)
+{
+	int match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+						  ft_field_support.outer_ip_version);
+	const void *headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
+					     outer_headers);
+	const void *headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
+					     outer_headers);
+
+	if (!match_ipv) {
+		u16 ethertype;
+
+		switch (version) {
+		case MLX5_FS_IPV4_VERSION:
+			ethertype = ETH_P_IP;
+			break;
+		case MLX5_FS_IPV6_VERSION:
+			ethertype = ETH_P_IPV6;
+			break;
+		default:
+			return false;
+		}
+
+		return MLX5_GET(fte_match_set_lyr_2_4, headers_c,
+				ethertype) == 0xffff &&
+			MLX5_GET(fte_match_set_lyr_2_4, headers_v,
+				 ethertype) == ethertype;
+	}
+
+	return MLX5_GET(fte_match_set_lyr_2_4, headers_c,
+			ip_version) == 0xf &&
+		MLX5_GET(fte_match_set_lyr_2_4, headers_v,
+			 ip_version) == version;
+}
+
+static inline bool
+mlx5_fs_is_outer_ipv4_flow(struct mlx5_core_dev *mdev, const u32 *match_c,
+			   const u32 *match_v)
+{
+	return _mlx5_fs_is_outer_ipv_flow(mdev, match_c, match_v,
+					  MLX5_FS_IPV4_VERSION);
+}
+
+static inline bool
+mlx5_fs_is_outer_ipv6_flow(struct mlx5_core_dev *mdev, const u32 *match_c,
+			   const u32 *match_v)
+{
+	return _mlx5_fs_is_outer_ipv_flow(mdev, match_c, match_v,
+					  MLX5_FS_IPV6_VERSION);
+}
+
+static inline bool mlx5_fs_is_outer_ipsec_flow(const u32 *match_c)
+{
+	void *misc_params_c =
+			MLX5_ADDR_OF(fte_match_param, match_c, misc_parameters);
+
+	return MLX5_GET(fte_match_set_misc, misc_params_c, outer_esp_spi);
+}
+
+#endif
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index f4e417686f62..14ad84afe8ba 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -295,7 +295,9 @@ struct mlx5_ifc_flow_table_fields_supported_bits {
 	u8         inner_tcp_dport[0x1];
 	u8         inner_tcp_flags[0x1];
 	u8         reserved_at_37[0x9];
-	u8         reserved_at_40[0x1a];
+	u8         reserved_at_40[0x17];
+	u8	   outer_esp_spi[0x1];
+	u8	   reserved_at_58[0x2];
 	u8         bth_dst_qp[0x1];
 
 	u8         reserved_at_5b[0x25];
@@ -437,7 +439,9 @@ struct mlx5_ifc_fte_match_set_misc_bits {
 
 	u8         reserved_at_120[0x28];
 	u8         bth_dst_qp[0x18];
-	u8         reserved_at_160[0xa0];
+	u8	   reserved_at_160[0x20];
+	u8	   outer_esp_spi[0x20];
+	u8         reserved_at_1a0[0x60];
 };
 
 struct mlx5_ifc_cmd_pas_bits {
@@ -1091,6 +1095,7 @@ enum mlx5_flow_destination_type {
 	MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE   = 0x1,
 	MLX5_FLOW_DESTINATION_TYPE_TIR          = 0x2,
 
+	MLX5_FLOW_DESTINATION_TYPE_PORT         = 0x99,
 	MLX5_FLOW_DESTINATION_TYPE_COUNTER      = 0x100,
 };
 
diff --git a/include/linux/mlx5/mlx5_ifc_fpga.h b/include/linux/mlx5/mlx5_ifc_fpga.h
index 255a88d08078..ec052491ba3d 100644
--- a/include/linux/mlx5/mlx5_ifc_fpga.h
+++ b/include/linux/mlx5/mlx5_ifc_fpga.h
@@ -373,7 +373,10 @@ struct mlx5_ifc_fpga_destroy_qp_out_bits {
 struct mlx5_ifc_ipsec_extended_cap_bits {
 	u8         encapsulation[0x20];
 
-	u8         reserved_0[0x15];
+	u8         reserved_0[0x12];
+	u8         v2_command[0x1];
+	u8         udp_encap[0x1];
+	u8         rx_no_trailer[0x1];
 	u8         ipv4_fragment[0x1];
 	u8         ipv6[0x1];
 	u8         esn[0x1];
@@ -429,4 +432,91 @@ struct mlx5_ifc_ipsec_counters_bits {
 	u8         dropped_cmd[0x40];
 };
 
+enum mlx5_ifc_fpga_ipsec_response_syndrome {
+	MLX5_FPGA_IPSEC_RESPONSE_SUCCESS = 0,
+	MLX5_FPGA_IPSEC_RESPONSE_ILLEGAL_REQUEST = 1,
+	MLX5_FPGA_IPSEC_RESPONSE_SADB_ISSUE = 2,
+	MLX5_FPGA_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE = 3,
+};
+
+struct mlx5_ifc_fpga_ipsec_cmd_resp {
+	__be32 syndrome;
+	union {
+		__be32 sw_sa_handle;
+		__be32 flags;
+	};
+	u8 reserved[24];
+} __packed;
+
+enum mlx5_ifc_fpga_ipsec_cmd_opcode {
+	MLX5_FPGA_IPSEC_CMD_OP_ADD_SA = 0,
+	MLX5_FPGA_IPSEC_CMD_OP_DEL_SA = 1,
+	MLX5_FPGA_IPSEC_CMD_OP_ADD_SA_V2 = 2,
+	MLX5_FPGA_IPSEC_CMD_OP_DEL_SA_V2 = 3,
+	MLX5_FPGA_IPSEC_CMD_OP_MOD_SA_V2 = 4,
+	MLX5_FPGA_IPSEC_CMD_OP_SET_CAP = 5,
+};
+
+enum mlx5_ifc_fpga_ipsec_cap {
+	MLX5_FPGA_IPSEC_CAP_NO_TRAILER = BIT(0),
+};
+
+struct mlx5_ifc_fpga_ipsec_cmd_cap {
+	__be32 cmd;
+	__be32 flags;
+	u8 reserved[24];
+} __packed;
+
+enum mlx5_ifc_fpga_ipsec_sa_flags {
+	MLX5_FPGA_IPSEC_SA_ESN_EN = BIT(0),
+	MLX5_FPGA_IPSEC_SA_ESN_OVERLAP = BIT(1),
+	MLX5_FPGA_IPSEC_SA_IPV6 = BIT(2),
+	MLX5_FPGA_IPSEC_SA_DIR_SX = BIT(3),
+	MLX5_FPGA_IPSEC_SA_SPI_EN = BIT(4),
+	MLX5_FPGA_IPSEC_SA_SA_VALID = BIT(5),
+	MLX5_FPGA_IPSEC_SA_IP_ESP = BIT(6),
+	MLX5_FPGA_IPSEC_SA_IP_AH = BIT(7),
+};
+
+enum mlx5_ifc_fpga_ipsec_sa_enc_mode {
+	MLX5_FPGA_IPSEC_SA_ENC_MODE_NONE = 0,
+	MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_128_AUTH_128 = 1,
+	MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_256_AUTH_128 = 3,
+};
+
+struct mlx5_ifc_fpga_ipsec_sa_v1 {
+	__be32 cmd;
+	u8 key_enc[32];
+	u8 key_auth[32];
+	__be32 sip[4];
+	__be32 dip[4];
+	union {
+		struct {
+			__be32 reserved;
+			u8 salt_iv[8];
+			__be32 salt;
+		} __packed gcm;
+		struct {
+			u8 salt[16];
+		} __packed cbc;
+	};
+	__be32 spi;
+	__be32 sw_sa_handle;
+	__be16 tfclen;
+	u8 enc_mode;
+	u8 reserved1[2];
+	u8 flags;
+	u8 reserved2[2];
+};
+
+struct mlx5_ifc_fpga_ipsec_sa {
+	struct mlx5_ifc_fpga_ipsec_sa_v1 ipsec_sa_v1;
+	__be16 udp_sp;
+	__be16 udp_dp;
+	u8 reserved1[4];
+	__be32 esn;
+	__be16 vid;	/* only 12 bits, rest is reserved */
+	__be16 reserved2;
+} __packed;
+
 #endif /* MLX5_IFC_FPGA_H */
diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index 5396521a776a..7ed82e4f11b3 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -4,11 +4,10 @@
 
 #include <linux/in.h>
 #include <linux/pim.h>
-#include <linux/rhashtable.h>
-#include <net/sock.h>
 #include <net/fib_rules.h>
 #include <net/fib_notifier.h>
 #include <uapi/linux/mroute.h>
+#include <linux/mroute_base.h>
 
 #ifdef CONFIG_IP_MROUTE
 static inline int ip_mroute_opt(int opt)
@@ -56,18 +55,6 @@ static inline bool ipmr_rule_default(const struct fib_rule *rule)
 }
 #endif
 
-struct vif_device {
-	struct net_device 	*dev;			/* Device we are using */
-	struct netdev_phys_item_id dev_parent_id;	/* Device parent ID    */
-	unsigned long	bytes_in,bytes_out;
-	unsigned long	pkt_in,pkt_out;		/* Statistics 			*/
-	unsigned long	rate_limit;		/* Traffic shaping (NI) 	*/
-	unsigned char	threshold;		/* TTL threshold 		*/
-	unsigned short	flags;			/* Control flags 		*/
-	__be32		local,remote;		/* Addresses(remote for tunnels)*/
-	int		link;			/* Physical interface index	*/
-};
-
 struct vif_entry_notifier_info {
 	struct fib_notifier_info info;
 	struct net_device *dev;
@@ -78,34 +65,6 @@ struct vif_entry_notifier_info {
 
 #define VIFF_STATIC 0x8000
 
-#define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
-
-struct mr_table {
-	struct list_head	list;
-	possible_net_t		net;
-	u32			id;
-	struct sock __rcu	*mroute_sk;
-	struct timer_list	ipmr_expire_timer;
-	struct list_head	mfc_unres_queue;
-	struct vif_device	vif_table[MAXVIFS];
-	struct rhltable		mfc_hash;
-	struct list_head	mfc_cache_list;
-	int			maxvif;
-	atomic_t		cache_resolve_queue_len;
-	bool			mroute_do_assert;
-	bool			mroute_do_pim;
-	int			mroute_reg_vif_num;
-};
-
-/* mfc_flags:
- * MFC_STATIC - the entry was added statically (not by a routing daemon)
- * MFC_OFFLOAD - the entry was offloaded to the hardware
- */
-enum {
-	MFC_STATIC = BIT(0),
-	MFC_OFFLOAD = BIT(1),
-};
-
 struct mfc_cache_cmp_arg {
 	__be32 mfc_mcastgrp;
 	__be32 mfc_origin;
@@ -113,28 +72,13 @@ struct mfc_cache_cmp_arg {
 
 /**
  * struct mfc_cache - multicast routing entries
- * @mnode: rhashtable list
+ * @_c: Common multicast routing information; has to be first [for casting]
  * @mfc_mcastgrp: destination multicast group address
  * @mfc_origin: source address
  * @cmparg: used for rhashtable comparisons
- * @mfc_parent: source interface (iif)
- * @mfc_flags: entry flags
- * @expires: unresolved entry expire time
- * @unresolved: unresolved cached skbs
- * @last_assert: time of last assert
- * @minvif: minimum VIF id
- * @maxvif: maximum VIF id
- * @bytes: bytes that have passed for this entry
- * @pkt: packets that have passed for this entry
- * @wrong_if: number of wrong source interface hits
- * @lastuse: time of last use of the group (traffic or update)
- * @ttls: OIF TTL threshold array
- * @refcount: reference count for this entry
- * @list: global entry list
- * @rcu: used for entry destruction
  */
 struct mfc_cache {
-	struct rhlist_head mnode;
+	struct mr_mfc _c;
 	union {
 		struct {
 			__be32 mfc_mcastgrp;
@@ -142,28 +86,6 @@ struct mfc_cache {
 		};
 		struct mfc_cache_cmp_arg cmparg;
 	};
-	vifi_t mfc_parent;
-	int mfc_flags;
-
-	union {
-		struct {
-			unsigned long expires;
-			struct sk_buff_head unresolved;
-		} unres;
-		struct {
-			unsigned long last_assert;
-			int minvif;
-			int maxvif;
-			unsigned long bytes;
-			unsigned long pkt;
-			unsigned long wrong_if;
-			unsigned long lastuse;
-			unsigned char ttls[MAXVIFS];
-			refcount_t refcount;
-		} res;
-	} mfc_un;
-	struct list_head list;
-	struct rcu_head	rcu;
 };
 
 struct mfc_entry_notifier_info {
@@ -187,12 +109,12 @@ static inline void ipmr_cache_free(struct mfc_cache *mfc_cache)
 
 static inline void ipmr_cache_put(struct mfc_cache *c)
 {
-	if (refcount_dec_and_test(&c->mfc_un.res.refcount))
+	if (refcount_dec_and_test(&c->_c.mfc_un.res.refcount))
 		ipmr_cache_free(c);
 }
 static inline void ipmr_cache_hold(struct mfc_cache *c)
 {
-	refcount_inc(&c->mfc_un.res.refcount);
+	refcount_inc(&c->_c.mfc_un.res.refcount);
 }
 
 #endif
diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h
index 3014c52bfd86..1ac38e6819f5 100644
--- a/include/linux/mroute6.h
+++ b/include/linux/mroute6.h
@@ -7,6 +7,7 @@
 #include <linux/skbuff.h>	/* for struct sk_buff_head */
 #include <net/net_namespace.h>
 #include <uapi/linux/mroute6.h>
+#include <linux/mroute_base.h>
 
 #ifdef CONFIG_IPV6_MROUTE
 static inline int ip6_mroute_opt(int opt)
@@ -62,57 +63,24 @@ static inline void ip6_mr_cleanup(void)
 }
 #endif
 
-struct mif_device {
-	struct net_device 	*dev;			/* Device we are using */
-	unsigned long	bytes_in,bytes_out;
-	unsigned long	pkt_in,pkt_out;		/* Statistics 			*/
-	unsigned long	rate_limit;		/* Traffic shaping (NI) 	*/
-	unsigned char	threshold;		/* TTL threshold 		*/
-	unsigned short	flags;			/* Control flags 		*/
-	int		link;			/* Physical interface index	*/
-};
-
 #define VIFF_STATIC 0x8000
 
-struct mfc6_cache {
-	struct list_head list;
-	struct in6_addr mf6c_mcastgrp;			/* Group the entry belongs to 	*/
-	struct in6_addr mf6c_origin;			/* Source of packet 		*/
-	mifi_t mf6c_parent;			/* Source interface		*/
-	int mfc_flags;				/* Flags on line		*/
+struct mfc6_cache_cmp_arg {
+	struct in6_addr mf6c_mcastgrp;
+	struct in6_addr mf6c_origin;
+};
 
+struct mfc6_cache {
+	struct mr_mfc _c;
 	union {
 		struct {
-			unsigned long expires;
-			struct sk_buff_head unresolved;	/* Unresolved buffers		*/
-		} unres;
-		struct {
-			unsigned long last_assert;
-			int minvif;
-			int maxvif;
-			unsigned long bytes;
-			unsigned long pkt;
-			unsigned long wrong_if;
-			unsigned long lastuse;
-			unsigned char ttls[MAXMIFS];	/* TTL thresholds		*/
-		} res;
-	} mfc_un;
+			struct in6_addr mf6c_mcastgrp;
+			struct in6_addr mf6c_origin;
+		};
+		struct mfc6_cache_cmp_arg cmparg;
+	};
 };
 
-#define MFC_STATIC		1
-#define MFC_NOTIFY		2
-
-#define MFC6_LINES		64
-
-#define MFC6_HASH(a, g) (((__force u32)(a)->s6_addr32[0] ^ \
-			  (__force u32)(a)->s6_addr32[1] ^ \
-			  (__force u32)(a)->s6_addr32[2] ^ \
-			  (__force u32)(a)->s6_addr32[3] ^ \
-			  (__force u32)(g)->s6_addr32[0] ^ \
-			  (__force u32)(g)->s6_addr32[1] ^ \
-			  (__force u32)(g)->s6_addr32[2] ^ \
-			  (__force u32)(g)->s6_addr32[3]) % MFC6_LINES)
-
 #define MFC_ASSERT_THRESH (3*HZ)		/* Maximal freq. of asserts */
 
 struct rtmsg;
@@ -120,12 +88,12 @@ extern int ip6mr_get_route(struct net *net, struct sk_buff *skb,
 			   struct rtmsg *rtm, u32 portid);
 
 #ifdef CONFIG_IPV6_MROUTE
-extern struct sock *mroute6_socket(struct net *net, struct sk_buff *skb);
+bool mroute6_is_socket(struct net *net, struct sk_buff *skb);
 extern int ip6mr_sk_done(struct sock *sk);
 #else
-static inline struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
+static inline bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
 {
-	return NULL;
+	return false;
 }
 static inline int ip6mr_sk_done(struct sock *sk)
 {
diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h
new file mode 100644
index 000000000000..c2560cb50f1d
--- /dev/null
+++ b/include/linux/mroute_base.h
@@ -0,0 +1,346 @@
+#ifndef __LINUX_MROUTE_BASE_H
+#define __LINUX_MROUTE_BASE_H
+
+#include <linux/netdevice.h>
+#include <linux/rhashtable.h>
+#include <linux/spinlock.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+
+/**
+ * struct vif_device - interface representor for multicast routing
+ * @dev: network device being used
+ * @bytes_in: statistic; bytes ingressing
+ * @bytes_out: statistic; bytes egresing
+ * @pkt_in: statistic; packets ingressing
+ * @pkt_out: statistic; packets egressing
+ * @rate_limit: Traffic shaping (NI)
+ * @threshold: TTL threshold
+ * @flags: Control flags
+ * @link: Physical interface index
+ * @dev_parent_id: device parent id
+ * @local: Local address
+ * @remote: Remote address for tunnels
+ */
+struct vif_device {
+	struct net_device *dev;
+	unsigned long bytes_in, bytes_out;
+	unsigned long pkt_in, pkt_out;
+	unsigned long rate_limit;
+	unsigned char threshold;
+	unsigned short flags;
+	int link;
+
+	/* Currently only used by ipmr */
+	struct netdev_phys_item_id dev_parent_id;
+	__be32 local, remote;
+};
+
+#ifndef MAXVIFS
+/* This one is nasty; value is defined in uapi using different symbols for
+ * mroute and morute6 but both map into same 32.
+ */
+#define MAXVIFS	32
+#endif
+
+#define VIF_EXISTS(_mrt, _idx) (!!((_mrt)->vif_table[_idx].dev))
+
+/* mfc_flags:
+ * MFC_STATIC - the entry was added statically (not by a routing daemon)
+ * MFC_OFFLOAD - the entry was offloaded to the hardware
+ */
+enum {
+	MFC_STATIC = BIT(0),
+	MFC_OFFLOAD = BIT(1),
+};
+
+/**
+ * struct mr_mfc - common multicast routing entries
+ * @mnode: rhashtable list
+ * @mfc_parent: source interface (iif)
+ * @mfc_flags: entry flags
+ * @expires: unresolved entry expire time
+ * @unresolved: unresolved cached skbs
+ * @last_assert: time of last assert
+ * @minvif: minimum VIF id
+ * @maxvif: maximum VIF id
+ * @bytes: bytes that have passed for this entry
+ * @pkt: packets that have passed for this entry
+ * @wrong_if: number of wrong source interface hits
+ * @lastuse: time of last use of the group (traffic or update)
+ * @ttls: OIF TTL threshold array
+ * @refcount: reference count for this entry
+ * @list: global entry list
+ * @rcu: used for entry destruction
+ */
+struct mr_mfc {
+	struct rhlist_head mnode;
+	unsigned short mfc_parent;
+	int mfc_flags;
+
+	union {
+		struct {
+			unsigned long expires;
+			struct sk_buff_head unresolved;
+		} unres;
+		struct {
+			unsigned long last_assert;
+			int minvif;
+			int maxvif;
+			unsigned long bytes;
+			unsigned long pkt;
+			unsigned long wrong_if;
+			unsigned long lastuse;
+			unsigned char ttls[MAXVIFS];
+			refcount_t refcount;
+		} res;
+	} mfc_un;
+	struct list_head list;
+	struct rcu_head	rcu;
+};
+
+struct mr_table;
+
+/**
+ * struct mr_table_ops - callbacks and info for protocol-specific ops
+ * @rht_params: parameters for accessing the MFC hash
+ * @cmparg_any: a hash key to be used for matching on (*,*) routes
+ */
+struct mr_table_ops {
+	const struct rhashtable_params *rht_params;
+	void *cmparg_any;
+};
+
+/**
+ * struct mr_table - a multicast routing table
+ * @list: entry within a list of multicast routing tables
+ * @net: net where this table belongs
+ * @ops: protocol specific operations
+ * @id: identifier of the table
+ * @mroute_sk: socket associated with the table
+ * @ipmr_expire_timer: timer for handling unresolved routes
+ * @mfc_unres_queue: list of unresolved MFC entries
+ * @vif_table: array containing all possible vifs
+ * @mfc_hash: Hash table of all resolved routes for easy lookup
+ * @mfc_cache_list: list of resovled routes for possible traversal
+ * @maxvif: Identifier of highest value vif currently in use
+ * @cache_resolve_queue_len: current size of unresolved queue
+ * @mroute_do_assert: Whether to inform userspace on wrong ingress
+ * @mroute_do_pim: Whether to receive IGMP PIMv1
+ * @mroute_reg_vif_num: PIM-device vif index
+ */
+struct mr_table {
+	struct list_head	list;
+	possible_net_t		net;
+	struct mr_table_ops	ops;
+	u32			id;
+	struct sock __rcu	*mroute_sk;
+	struct timer_list	ipmr_expire_timer;
+	struct list_head	mfc_unres_queue;
+	struct vif_device	vif_table[MAXVIFS];
+	struct rhltable		mfc_hash;
+	struct list_head	mfc_cache_list;
+	int			maxvif;
+	atomic_t		cache_resolve_queue_len;
+	bool			mroute_do_assert;
+	bool			mroute_do_pim;
+	int			mroute_reg_vif_num;
+};
+
+#ifdef CONFIG_IP_MROUTE_COMMON
+void vif_device_init(struct vif_device *v,
+		     struct net_device *dev,
+		     unsigned long rate_limit,
+		     unsigned char threshold,
+		     unsigned short flags,
+		     unsigned short get_iflink_mask);
+
+struct mr_table *
+mr_table_alloc(struct net *net, u32 id,
+	       struct mr_table_ops *ops,
+	       void (*expire_func)(struct timer_list *t),
+	       void (*table_set)(struct mr_table *mrt,
+				 struct net *net));
+
+/* These actually return 'struct mr_mfc *', but to avoid need for explicit
+ * castings they simply return void.
+ */
+void *mr_mfc_find_parent(struct mr_table *mrt,
+			 void *hasharg, int parent);
+void *mr_mfc_find_any_parent(struct mr_table *mrt, int vifi);
+void *mr_mfc_find_any(struct mr_table *mrt, int vifi, void *hasharg);
+
+int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+		   struct mr_mfc *c, struct rtmsg *rtm);
+int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
+		     struct mr_table *(*iter)(struct net *net,
+					      struct mr_table *mrt),
+		     int (*fill)(struct mr_table *mrt,
+				 struct sk_buff *skb,
+				 u32 portid, u32 seq, struct mr_mfc *c,
+				 int cmd, int flags),
+		     spinlock_t *lock);
+#else
+static inline void vif_device_init(struct vif_device *v,
+				   struct net_device *dev,
+				   unsigned long rate_limit,
+				   unsigned char threshold,
+				   unsigned short flags,
+				   unsigned short get_iflink_mask)
+{
+}
+
+static inline void *
+mr_table_alloc(struct net *net, u32 id,
+	       struct mr_table_ops *ops,
+	       void (*expire_func)(struct timer_list *t),
+	       void (*table_set)(struct mr_table *mrt,
+				 struct net *net))
+{
+	return NULL;
+}
+
+static inline void *mr_mfc_find_parent(struct mr_table *mrt,
+				       void *hasharg, int parent)
+{
+	return NULL;
+}
+
+static inline void *mr_mfc_find_any_parent(struct mr_table *mrt,
+					   int vifi)
+{
+	return NULL;
+}
+
+static inline struct mr_mfc *mr_mfc_find_any(struct mr_table *mrt,
+					     int vifi, void *hasharg)
+{
+	return NULL;
+}
+
+static inline int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+				 struct mr_mfc *c, struct rtmsg *rtm)
+{
+	return -EINVAL;
+}
+
+static inline int
+mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
+		 struct mr_table *(*iter)(struct net *net,
+					  struct mr_table *mrt),
+		 int (*fill)(struct mr_table *mrt,
+			     struct sk_buff *skb,
+			     u32 portid, u32 seq, struct mr_mfc *c,
+			     int cmd, int flags),
+		 spinlock_t *lock)
+{
+	return -EINVAL;
+}
+#endif
+
+static inline void *mr_mfc_find(struct mr_table *mrt, void *hasharg)
+{
+	return mr_mfc_find_parent(mrt, hasharg, -1);
+}
+
+#ifdef CONFIG_PROC_FS
+struct mr_vif_iter {
+	struct seq_net_private p;
+	struct mr_table *mrt;
+	int ct;
+};
+
+struct mr_mfc_iter {
+	struct seq_net_private p;
+	struct mr_table *mrt;
+	struct list_head *cache;
+
+	/* Lock protecting the mr_table's unresolved queue */
+	spinlock_t *lock;
+};
+
+#ifdef CONFIG_IP_MROUTE_COMMON
+void *mr_vif_seq_idx(struct net *net, struct mr_vif_iter *iter, loff_t pos);
+void *mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos);
+
+static inline void *mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	return *pos ? mr_vif_seq_idx(seq_file_net(seq),
+				     seq->private, *pos - 1)
+		    : SEQ_START_TOKEN;
+}
+
+/* These actually return 'struct mr_mfc *', but to avoid need for explicit
+ * castings they simply return void.
+ */
+void *mr_mfc_seq_idx(struct net *net,
+		     struct mr_mfc_iter *it, loff_t pos);
+void *mr_mfc_seq_next(struct seq_file *seq, void *v,
+		      loff_t *pos);
+
+static inline void *mr_mfc_seq_start(struct seq_file *seq, loff_t *pos,
+				     struct mr_table *mrt, spinlock_t *lock)
+{
+	struct mr_mfc_iter *it = seq->private;
+
+	it->mrt = mrt;
+	it->cache = NULL;
+	it->lock = lock;
+
+	return *pos ? mr_mfc_seq_idx(seq_file_net(seq),
+				     seq->private, *pos - 1)
+		    : SEQ_START_TOKEN;
+}
+
+static inline void mr_mfc_seq_stop(struct seq_file *seq, void *v)
+{
+	struct mr_mfc_iter *it = seq->private;
+	struct mr_table *mrt = it->mrt;
+
+	if (it->cache == &mrt->mfc_unres_queue)
+		spin_unlock_bh(it->lock);
+	else if (it->cache == &mrt->mfc_cache_list)
+		rcu_read_unlock();
+}
+#else
+static inline void *mr_vif_seq_idx(struct net *net, struct mr_vif_iter *iter,
+				   loff_t pos)
+{
+	return NULL;
+}
+
+static inline void *mr_vif_seq_next(struct seq_file *seq,
+				    void *v, loff_t *pos)
+{
+	return NULL;
+}
+
+static inline void *mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	return NULL;
+}
+
+static inline void *mr_mfc_seq_idx(struct net *net,
+				   struct mr_mfc_iter *it, loff_t pos)
+{
+	return NULL;
+}
+
+static inline void *mr_mfc_seq_next(struct seq_file *seq, void *v,
+				    loff_t *pos)
+{
+	return NULL;
+}
+
+static inline void *mr_mfc_seq_start(struct seq_file *seq, loff_t *pos,
+				     struct mr_table *mrt, spinlock_t *lock)
+{
+	return NULL;
+}
+
+static inline void mr_mfc_seq_stop(struct seq_file *seq, void *v)
+{
+}
+#endif
+#endif
+#endif
diff --git a/include/linux/net.h b/include/linux/net.h
index 2a0391eea05c..2248a052061d 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -146,7 +146,7 @@ struct proto_ops {
 				      struct socket *newsock, int flags, bool kern);
 	int		(*getname)   (struct socket *sock,
 				      struct sockaddr *addr,
-				      int *sockaddr_len, int peer);
+				      int peer);
 	__poll_t	(*poll)	     (struct file *file, struct socket *sock,
 				      struct poll_table_struct *wait);
 	int		(*ioctl)     (struct socket *sock, unsigned int cmd,
@@ -295,10 +295,8 @@ int kernel_listen(struct socket *sock, int backlog);
 int kernel_accept(struct socket *sock, struct socket **newsock, int flags);
 int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
 		   int flags);
-int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
-		       int *addrlen);
-int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
-		       int *addrlen);
+int kernel_getsockname(struct socket *sock, struct sockaddr *addr);
+int kernel_getpeername(struct socket *sock, struct sockaddr *addr);
 int kernel_getsockopt(struct socket *sock, int level, int optname, char *optval,
 		      int *optlen);
 int kernel_setsockopt(struct socket *sock, int level, int optname, char *optval,
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5eef6c8e2741..913b1cc882cf 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -585,6 +585,15 @@ struct netdev_queue {
 #endif
 } ____cacheline_aligned_in_smp;
 
+extern int sysctl_fb_tunnels_only_for_init_net;
+
+static inline bool net_has_fallback_tunnels(const struct net *net)
+{
+	return net == &init_net ||
+	       !IS_ENABLED(CONFIG_SYSCTL) ||
+	       !sysctl_fb_tunnels_only_for_init_net;
+}
+
 static inline int netdev_queue_numa_node_read(const struct netdev_queue *q)
 {
 #if defined(CONFIG_XPS) && defined(CONFIG_NUMA)
@@ -1381,8 +1390,6 @@ struct net_device_ops {
  * @IFF_MACVLAN: Macvlan device
  * @IFF_XMIT_DST_RELEASE_PERM: IFF_XMIT_DST_RELEASE not taking into account
  *	underlying stacked devices
- * @IFF_IPVLAN_MASTER: IPvlan master device
- * @IFF_IPVLAN_SLAVE: IPvlan slave device
  * @IFF_L3MDEV_MASTER: device is an L3 master device
  * @IFF_NO_QUEUE: device can run without qdisc attached
  * @IFF_OPENVSWITCH: device is a Open vSwitch master
@@ -1392,6 +1399,7 @@ struct net_device_ops {
  * @IFF_PHONY_HEADROOM: the headroom value is controlled by an external
  *	entity (i.e. the master device for bridged veth)
  * @IFF_MACSEC: device is a MACsec device
+ * @IFF_NO_RX_HANDLER: device doesn't support the rx_handler hook
  */
 enum netdev_priv_flags {
 	IFF_802_1Q_VLAN			= 1<<0,
@@ -1412,16 +1420,15 @@ enum netdev_priv_flags {
 	IFF_LIVE_ADDR_CHANGE		= 1<<15,
 	IFF_MACVLAN			= 1<<16,
 	IFF_XMIT_DST_RELEASE_PERM	= 1<<17,
-	IFF_IPVLAN_MASTER		= 1<<18,
-	IFF_IPVLAN_SLAVE		= 1<<19,
-	IFF_L3MDEV_MASTER		= 1<<20,
-	IFF_NO_QUEUE			= 1<<21,
-	IFF_OPENVSWITCH			= 1<<22,
-	IFF_L3MDEV_SLAVE		= 1<<23,
-	IFF_TEAM			= 1<<24,
-	IFF_RXFH_CONFIGURED		= 1<<25,
-	IFF_PHONY_HEADROOM		= 1<<26,
-	IFF_MACSEC			= 1<<27,
+	IFF_L3MDEV_MASTER		= 1<<18,
+	IFF_NO_QUEUE			= 1<<19,
+	IFF_OPENVSWITCH			= 1<<20,
+	IFF_L3MDEV_SLAVE		= 1<<21,
+	IFF_TEAM			= 1<<22,
+	IFF_RXFH_CONFIGURED		= 1<<23,
+	IFF_PHONY_HEADROOM		= 1<<24,
+	IFF_MACSEC			= 1<<25,
+	IFF_NO_RX_HANDLER		= 1<<26,
 };
 
 #define IFF_802_1Q_VLAN			IFF_802_1Q_VLAN
@@ -1442,8 +1449,6 @@ enum netdev_priv_flags {
 #define IFF_LIVE_ADDR_CHANGE		IFF_LIVE_ADDR_CHANGE
 #define IFF_MACVLAN			IFF_MACVLAN
 #define IFF_XMIT_DST_RELEASE_PERM	IFF_XMIT_DST_RELEASE_PERM
-#define IFF_IPVLAN_MASTER		IFF_IPVLAN_MASTER
-#define IFF_IPVLAN_SLAVE		IFF_IPVLAN_SLAVE
 #define IFF_L3MDEV_MASTER		IFF_L3MDEV_MASTER
 #define IFF_NO_QUEUE			IFF_NO_QUEUE
 #define IFF_OPENVSWITCH			IFF_OPENVSWITCH
@@ -1451,6 +1456,7 @@ enum netdev_priv_flags {
 #define IFF_TEAM			IFF_TEAM
 #define IFF_RXFH_CONFIGURED		IFF_RXFH_CONFIGURED
 #define IFF_MACSEC			IFF_MACSEC
+#define IFF_NO_RX_HANDLER		IFF_NO_RX_HANDLER
 
 /**
  *	struct net_device - The DEVICE structure.
@@ -1798,11 +1804,17 @@ struct net_device {
 #if IS_ENABLED(CONFIG_TIPC)
 	struct tipc_bearer __rcu *tipc_ptr;
 #endif
+#if IS_ENABLED(CONFIG_IRDA) || IS_ENABLED(CONFIG_ATALK)
 	void 			*atalk_ptr;
+#endif
 	struct in_device __rcu	*ip_ptr;
+#if IS_ENABLED(CONFIG_DECNET)
 	struct dn_dev __rcu     *dn_ptr;
+#endif
 	struct inet6_dev __rcu	*ip6_ptr;
+#if IS_ENABLED(CONFIG_AX25)
 	void			*ax25_ptr;
+#endif
 	struct wireless_dev	*ieee80211_ptr;
 	struct wpan_dev		*ieee802154_ptr;
 #if IS_ENABLED(CONFIG_MPLS_ROUTING)
@@ -4217,16 +4229,6 @@ static inline bool netif_is_macvlan_port(const struct net_device *dev)
 	return dev->priv_flags & IFF_MACVLAN_PORT;
 }
 
-static inline bool netif_is_ipvlan(const struct net_device *dev)
-{
-	return dev->priv_flags & IFF_IPVLAN_SLAVE;
-}
-
-static inline bool netif_is_ipvlan_port(const struct net_device *dev)
-{
-	return dev->priv_flags & IFF_IPVLAN_MASTER;
-}
-
 static inline bool netif_is_bond_master(const struct net_device *dev)
 {
 	return dev->flags & IFF_MASTER && dev->priv_flags & IFF_BONDING;
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 7c4c2379e010..f0b5870a6d40 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -999,6 +999,14 @@ int genphy_c45_pma_setup_forced(struct phy_device *phydev);
 int genphy_c45_an_disable_aneg(struct phy_device *phydev);
 int genphy_c45_read_mdix(struct phy_device *phydev);
 
+/* The gen10g_* functions are the old Clause 45 stub */
+int gen10g_config_aneg(struct phy_device *phydev);
+int gen10g_read_status(struct phy_device *phydev);
+int gen10g_no_soft_reset(struct phy_device *phydev);
+int gen10g_config_init(struct phy_device *phydev);
+int gen10g_suspend(struct phy_device *phydev);
+int gen10g_resume(struct phy_device *phydev);
+
 static inline int phy_read_status(struct phy_device *phydev)
 {
 	if (!phydev->drv)
diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h
index a079656b614c..059242030631 100644
--- a/include/linux/ptp_classify.h
+++ b/include/linux/ptp_classify.h
@@ -75,5 +75,9 @@ void __init ptp_classifier_init(void);
 static inline void ptp_classifier_init(void)
 {
 }
+static inline unsigned int ptp_classify_raw(struct sk_buff *skb)
+{
+	return PTP_CLASS_NONE;
+}
 #endif
 #endif /* _PTP_CLASSIFY_H_ */
diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h
index e6335227b844..6894976b54e3 100644
--- a/include/linux/ptr_ring.h
+++ b/include/linux/ptr_ring.h
@@ -296,13 +296,14 @@ static inline void *__ptr_ring_consume(struct ptr_ring *r)
 {
 	void *ptr;
 
+	/* The READ_ONCE in __ptr_ring_peek guarantees that anyone
+	 * accessing data through the pointer is up to date. Pairs
+	 * with smp_wmb in __ptr_ring_produce.
+	 */
 	ptr = __ptr_ring_peek(r);
 	if (ptr)
 		__ptr_ring_discard_one(r);
 
-	/* Make sure anyone accessing data through the pointer is up to date. */
-	/* Pairs with smp_wmb in __ptr_ring_produce. */
-	smp_read_barrier_depends();
 	return ptr;
 }
 
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 1fdcde96eb65..562a175c35a9 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -33,9 +33,10 @@ extern void rtnl_lock(void);
 extern void rtnl_unlock(void);
 extern int rtnl_trylock(void);
 extern int rtnl_is_locked(void);
+extern int rtnl_lock_killable(void);
 
 extern wait_queue_head_t netdev_unregistering_wq;
-extern struct mutex net_mutex;
+extern struct rw_semaphore net_sem;
 
 #ifdef CONFIG_PROVE_LOCKING
 extern bool lockdep_rtnl_is_held(void);
diff --git a/include/linux/sfp.h b/include/linux/sfp.h
index e724d5a3dd80..ebce9e24906a 100644
--- a/include/linux/sfp.h
+++ b/include/linux/sfp.h
@@ -422,10 +422,11 @@ struct sfp_upstream_ops {
 #if IS_ENABLED(CONFIG_SFP)
 int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
 		   unsigned long *support);
-phy_interface_t sfp_parse_interface(struct sfp_bus *bus,
-				    const struct sfp_eeprom_id *id);
 void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
 		       unsigned long *support);
+phy_interface_t sfp_select_interface(struct sfp_bus *bus,
+				     const struct sfp_eeprom_id *id,
+				     unsigned long *link_modes);
 
 int sfp_get_module_info(struct sfp_bus *bus, struct ethtool_modinfo *modinfo);
 int sfp_get_module_eeprom(struct sfp_bus *bus, struct ethtool_eeprom *ee,
@@ -444,18 +445,19 @@ static inline int sfp_parse_port(struct sfp_bus *bus,
 	return PORT_OTHER;
 }
 
-static inline phy_interface_t sfp_parse_interface(struct sfp_bus *bus,
-						const struct sfp_eeprom_id *id)
-{
-	return PHY_INTERFACE_MODE_NA;
-}
-
 static inline void sfp_parse_support(struct sfp_bus *bus,
 				     const struct sfp_eeprom_id *id,
 				     unsigned long *support)
 {
 }
 
+static inline phy_interface_t sfp_select_interface(struct sfp_bus *bus,
+						   const struct sfp_eeprom_id *id,
+						   unsigned long *link_modes)
+{
+	return PHY_INTERFACE_MODE_NA;
+}
+
 static inline int sfp_get_module_info(struct sfp_bus *bus,
 				      struct ethtool_modinfo *modinfo)
 {
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 99df17109e1b..47082f54ec1f 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -466,6 +466,9 @@ struct ubuf_info {
 
 #define skb_uarg(SKB)	((struct ubuf_info *)(skb_shinfo(SKB)->destructor_arg))
 
+int mm_account_pinned_pages(struct mmpin *mmp, size_t size);
+void mm_unaccount_pinned_pages(struct mmpin *mmp);
+
 struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size);
 struct ubuf_info *sock_zerocopy_realloc(struct sock *sk, size_t size,
 					struct ubuf_info *uarg);
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 9286a5a8c60c..60e01482a9c4 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -287,6 +287,7 @@ struct ucred {
 #define MSG_SENDPAGE_NOTLAST 0x20000 /* sendpage() internal : not the last page */
 #define MSG_BATCH	0x40000 /* sendmmsg(): more messages coming */
 #define MSG_EOF         MSG_FIN
+#define MSG_NO_SHARED_FRAGS 0x80000 /* sendpage() internal : page frags are not shared */
 
 #define MSG_ZEROCOPY	0x4000000	/* Use user data in kernel path */
 #define MSG_FASTOPEN	0x20000000	/* Send data in TCP SYN */
@@ -353,4 +354,6 @@ extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen
 			  unsigned int flags, struct timespec *timeout);
 extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg,
 			  unsigned int vlen, unsigned int flags);
+
+extern struct ns_common *get_net_ns(struct ns_common *ns);
 #endif /* _LINUX_SOCKET_H */
diff --git a/include/net/Space.h b/include/net/Space.h
index 27fb5c937c4f..9cce0d80d37a 100644
--- a/include/net/Space.h
+++ b/include/net/Space.h
@@ -20,8 +20,6 @@ struct net_device *cs89x0_probe(int unit);
 struct net_device *mvme147lance_probe(int unit);
 struct net_device *tc515_probe(int unit);
 struct net_device *lance_probe(int unit);
-struct net_device *mac8390_probe(int unit);
-struct net_device *mac89x0_probe(int unit);
 struct net_device *cops_probe(int unit);
 struct net_device *ltpc_probe(void);
 
diff --git a/include/net/act_api.h b/include/net/act_api.h
index 6ed9692f20bd..e0a9c2003b24 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -87,13 +87,17 @@ struct tc_action_ops {
 		       struct tcf_result *);
 	int     (*dump)(struct sk_buff *, struct tc_action *, int, int);
 	void	(*cleanup)(struct tc_action *);
-	int     (*lookup)(struct net *, struct tc_action **, u32);
+	int     (*lookup)(struct net *net, struct tc_action **a, u32 index,
+			  struct netlink_ext_ack *extack);
 	int     (*init)(struct net *net, struct nlattr *nla,
 			struct nlattr *est, struct tc_action **act, int ovr,
-			int bind);
+			int bind, struct netlink_ext_ack *extack);
 	int     (*walk)(struct net *, struct sk_buff *,
-			struct netlink_callback *, int, const struct tc_action_ops *);
+			struct netlink_callback *, int,
+			const struct tc_action_ops *,
+			struct netlink_ext_ack *);
 	void	(*stats_update)(struct tc_action *, u64, u32, u64);
+	size_t  (*get_fill_size)(const struct tc_action *act);
 	struct net_device *(*get_dev)(const struct tc_action *a);
 };
 
@@ -137,7 +141,8 @@ static inline void tc_action_net_exit(struct list_head *net_list,
 
 int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
 		       struct netlink_callback *cb, int type,
-		       const struct tc_action_ops *ops);
+		       const struct tc_action_ops *ops,
+		       struct netlink_ext_ack *extack);
 int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index);
 bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
 		    int bind);
@@ -162,10 +167,12 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
 		    int nr_actions, struct tcf_result *res);
 int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
 		    struct nlattr *est, char *name, int ovr, int bind,
-		    struct list_head *actions);
+		    struct list_head *actions, size_t *attr_size,
+		    struct netlink_ext_ack *extack);
 struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 				    struct nlattr *nla, struct nlattr *est,
-				    char *name, int ovr, int bind);
+				    char *name, int ovr, int bind,
+				    struct netlink_ext_ack *extack);
 int tcf_action_dump(struct sk_buff *skb, struct list_head *, int, int);
 int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int);
 int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int);
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index c4185a7b0e90..132e5b95167a 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -69,8 +69,8 @@ int addrconf_set_dstaddr(struct net *net, void __user *arg);
 int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
 		  const struct net_device *dev, int strict);
 int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
-			    const struct net_device *dev, int strict,
-			    u32 banned_flags);
+			    const struct net_device *dev, bool skip_dev_check,
+			    int strict, u32 banned_flags);
 
 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr);
diff --git a/include/net/ax25.h b/include/net/ax25.h
index 76fb39c272a7..c91bc87931c7 100644
--- a/include/net/ax25.h
+++ b/include/net/ax25.h
@@ -318,10 +318,12 @@ void ax25_digi_invert(const ax25_digi *, ax25_digi *);
 extern ax25_dev *ax25_dev_list;
 extern spinlock_t ax25_dev_lock;
 
+#if IS_ENABLED(CONFIG_AX25)
 static inline ax25_dev *ax25_dev_ax25dev(struct net_device *dev)
 {
 	return dev->ax25_ptr;
 }
+#endif
 
 ax25_dev *ax25_addr_ax25dev(ax25_address *);
 void ax25_dev_device_up(struct net_device *);
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 81174f9b8d14..fc40843baed3 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1147,6 +1147,7 @@ struct cfg80211_tid_stats {
  * @rx_duration: aggregate PPDU duration(usecs) for all the frames from a peer
  * @pertid: per-TID statistics, see &struct cfg80211_tid_stats, using the last
  *	(IEEE80211_NUM_TIDS) index for MSDUs not encapsulated in QoS-MPDUs.
+ * @ack_signal: signal strength (in dBm) of the last ACK frame.
  */
 struct station_info {
 	u64 filled;
@@ -1191,6 +1192,7 @@ struct station_info {
 	u64 rx_duration;
 	u8 rx_beacon_signal_avg;
 	struct cfg80211_tid_stats pertid[IEEE80211_NUM_TIDS + 1];
+	s8 ack_signal;
 };
 
 #if IS_ENABLED(CONFIG_CFG80211)
@@ -1905,11 +1907,16 @@ struct cfg80211_auth_request {
  * @ASSOC_REQ_DISABLE_HT:  Disable HT (802.11n)
  * @ASSOC_REQ_DISABLE_VHT:  Disable VHT
  * @ASSOC_REQ_USE_RRM: Declare RRM capability in this association
+ * @CONNECT_REQ_EXTERNAL_AUTH_SUPPORT: User space indicates external
+ *	authentication capability. Drivers can offload authentication to
+ *	userspace if this flag is set. Only applicable for cfg80211_connect()
+ *	request (connect callback).
  */
 enum cfg80211_assoc_req_flags {
-	ASSOC_REQ_DISABLE_HT		= BIT(0),
-	ASSOC_REQ_DISABLE_VHT		= BIT(1),
-	ASSOC_REQ_USE_RRM		= BIT(2),
+	ASSOC_REQ_DISABLE_HT			= BIT(0),
+	ASSOC_REQ_DISABLE_VHT			= BIT(1),
+	ASSOC_REQ_USE_RRM			= BIT(2),
+	CONNECT_REQ_EXTERNAL_AUTH_SUPPORT	= BIT(3),
 };
 
 /**
@@ -2601,6 +2608,33 @@ struct cfg80211_pmk_conf {
 };
 
 /**
+ * struct cfg80211_external_auth_params - Trigger External authentication.
+ *
+ * Commonly used across the external auth request and event interfaces.
+ *
+ * @action: action type / trigger for external authentication. Only significant
+ *	for the authentication request event interface (driver to user space).
+ * @bssid: BSSID of the peer with which the authentication has
+ *	to happen. Used by both the authentication request event and
+ *	authentication response command interface.
+ * @ssid: SSID of the AP.  Used by both the authentication request event and
+ *	authentication response command interface.
+ * @key_mgmt_suite: AKM suite of the respective authentication. Used by the
+ *	authentication request event interface.
+ * @status: status code, %WLAN_STATUS_SUCCESS for successful authentication,
+ *	use %WLAN_STATUS_UNSPECIFIED_FAILURE if user space cannot give you
+ *	the real status code for failures. Used only for the authentication
+ *	response command interface (user space to driver).
+ */
+struct cfg80211_external_auth_params {
+	enum nl80211_external_auth_action action;
+	u8 bssid[ETH_ALEN] __aligned(2);
+	struct cfg80211_ssid ssid;
+	unsigned int key_mgmt_suite;
+	u16 status;
+};
+
+/**
  * struct cfg80211_ops - backend description for wireless configuration
  *
  * This struct is registered by fullmac card drivers and/or wireless stacks
@@ -2923,6 +2957,9 @@ struct cfg80211_pmk_conf {
  *	(invoked with the wireless_dev mutex held)
  * @del_pmk: delete the previously configured PMK for the given authenticator.
  *	(invoked with the wireless_dev mutex held)
+ *
+ * @external_auth: indicates result of offloaded authentication processing from
+ *     user space
  */
 struct cfg80211_ops {
 	int	(*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
@@ -3216,6 +3253,8 @@ struct cfg80211_ops {
 			   const struct cfg80211_pmk_conf *conf);
 	int	(*del_pmk)(struct wiphy *wiphy, struct net_device *dev,
 			   const u8 *aa);
+	int     (*external_auth)(struct wiphy *wiphy, struct net_device *dev,
+				 struct cfg80211_external_auth_params *params);
 };
 
 /*
@@ -3517,6 +3556,35 @@ enum wiphy_vendor_command_flags {
 };
 
 /**
+ * enum wiphy_opmode_flag - Station's ht/vht operation mode information flags
+ *
+ * @STA_OPMODE_MAX_BW_CHANGED: Max Bandwidth changed
+ * @STA_OPMODE_SMPS_MODE_CHANGED: SMPS mode changed
+ * @STA_OPMODE_N_SS_CHANGED: max N_SS (number of spatial streams) changed
+ *
+ */
+enum wiphy_opmode_flag {
+	STA_OPMODE_MAX_BW_CHANGED	= BIT(0),
+	STA_OPMODE_SMPS_MODE_CHANGED	= BIT(1),
+	STA_OPMODE_N_SS_CHANGED		= BIT(2),
+};
+
+/**
+ * struct sta_opmode_info - Station's ht/vht operation mode information
+ * @changed: contains value from &enum wiphy_opmode_flag
+ * @smps_mode: New SMPS mode of a station
+ * @bw: new max bandwidth value of a station
+ * @rx_nss: new rx_nss value of a station
+ */
+
+struct sta_opmode_info {
+	u32 changed;
+	u8 smps_mode;
+	u8 bw;
+	u8 rx_nss;
+};
+
+/**
  * struct wiphy_vendor_command - vendor command definition
  * @info: vendor command identifying information, as used in nl80211
  * @flags: flags, see &enum wiphy_vendor_command_flags
@@ -4342,10 +4410,12 @@ unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr);
  *	of it being pushed into the SKB
  * @addr: the device MAC address
  * @iftype: the virtual interface type
+ * @data_offset: offset of payload after the 802.11 header
  * Return: 0 on success. Non-zero on error.
  */
 int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
-				  const u8 *addr, enum nl80211_iftype iftype);
+				  const u8 *addr, enum nl80211_iftype iftype,
+				  u8 data_offset);
 
 /**
  * ieee80211_data_to_8023 - convert an 802.11 data frame to 802.3
@@ -4357,7 +4427,7 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
 static inline int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
 					 enum nl80211_iftype iftype)
 {
-	return ieee80211_data_to_8023_exthdr(skb, NULL, addr, iftype);
+	return ieee80211_data_to_8023_exthdr(skb, NULL, addr, iftype, 0);
 }
 
 /**
@@ -5685,6 +5755,20 @@ void cfg80211_radar_event(struct wiphy *wiphy,
 			  struct cfg80211_chan_def *chandef, gfp_t gfp);
 
 /**
+ * cfg80211_sta_opmode_change_notify - STA's ht/vht operation mode change event
+ * @dev: network device
+ * @mac: MAC address of a station which opmode got modified
+ * @sta_opmode: station's current opmode value
+ * @gfp: context flags
+ *
+ * Driver should call this function when station's opmode modified via action
+ * frame.
+ */
+void cfg80211_sta_opmode_change_notify(struct net_device *dev, const u8 *mac,
+				       struct sta_opmode_info *sta_opmode,
+				       gfp_t gfp);
+
+/**
  * cfg80211_cac_event - Channel availability check (CAC) event
  * @netdev: network device
  * @chandef: chandef for the current channel
@@ -5758,10 +5842,13 @@ bool cfg80211_rx_unexpected_4addr_frame(struct net_device *dev,
  * @addr: the address of the peer
  * @cookie: the cookie filled in @probe_client previously
  * @acked: indicates whether probe was acked or not
+ * @ack_signal: signal strength (in dBm) of the ACK frame.
+ * @is_valid_ack_signal: indicates the ack_signal is valid or not.
  * @gfp: allocation flags
  */
 void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
-			   u64 cookie, bool acked, gfp_t gfp);
+			   u64 cookie, bool acked, s32 ack_signal,
+			   bool is_valid_ack_signal, gfp_t gfp);
 
 /**
  * cfg80211_report_obss_beacon - report beacon from other APs
@@ -6202,6 +6289,17 @@ void cfg80211_nan_func_terminated(struct wireless_dev *wdev,
 /* ethtool helper */
 void cfg80211_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info);
 
+/**
+ * cfg80211_external_auth_request - userspace request for authentication
+ * @netdev: network device
+ * @params: External authentication parameters
+ * @gfp: allocation flags
+ * Returns: 0 on success, < 0 on error
+ */
+int cfg80211_external_auth_request(struct net_device *netdev,
+				   struct cfg80211_external_auth_params *params,
+				   gfp_t gfp);
+
 /* Logging, debugging and troubleshooting/diagnostic helpers. */
 
 /* wiphy_printk helpers, similar to dev_printk */
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 4de35ed12bcc..d5b707375e48 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -234,13 +234,9 @@ struct devlink_dpipe_headers {
 /**
  * struct devlink_resource_ops - resource ops
  * @occ_get: get the occupied size
- * @size_validate: validate the size of the resource before update, reload
- *                 is needed for changes to take place
  */
 struct devlink_resource_ops {
 	u64 (*occ_get)(struct devlink *devlink);
-	int (*size_validate)(struct devlink *devlink, u64 size,
-			     struct netlink_ext_ack *extack);
 };
 
 /**
@@ -410,7 +406,6 @@ extern struct devlink_dpipe_header devlink_dpipe_header_ipv6;
 
 int devlink_resource_register(struct devlink *devlink,
 			      const char *resource_name,
-			      bool top_hierarchy,
 			      u64 resource_size,
 			      u64 resource_id,
 			      u64 parent_resource_id,
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 6cb602dd970c..60fb4ec8ba61 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -19,6 +19,7 @@
 #include <linux/workqueue.h>
 #include <linux/of.h>
 #include <linux/ethtool.h>
+#include <linux/net_tstamp.h>
 #include <net/devlink.h>
 #include <net/switchdev.h>
 
@@ -101,6 +102,7 @@ struct dsa_platform_data {
 };
 
 struct packet_type;
+struct dsa_switch;
 
 struct dsa_device_ops {
 	struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev);
@@ -357,7 +359,7 @@ struct dsa_switch_ops {
 	void	(*get_strings)(struct dsa_switch *ds, int port, uint8_t *data);
 	void	(*get_ethtool_stats)(struct dsa_switch *ds,
 				     int port, uint64_t *data);
-	int	(*get_sset_count)(struct dsa_switch *ds);
+	int	(*get_sset_count)(struct dsa_switch *ds, int port);
 
 	/*
 	 * ethtool Wake-on-LAN
@@ -368,6 +370,12 @@ struct dsa_switch_ops {
 			   struct ethtool_wolinfo *w);
 
 	/*
+	 * ethtool timestamp info
+	 */
+	int	(*get_ts_info)(struct dsa_switch *ds, int port,
+			       struct ethtool_ts_info *ts);
+
+	/*
 	 * Suspend and resume
 	 */
 	int	(*suspend)(struct dsa_switch *ds);
@@ -469,6 +477,18 @@ struct dsa_switch_ops {
 					 int port, struct net_device *br);
 	void	(*crosschip_bridge_leave)(struct dsa_switch *ds, int sw_index,
 					  int port, struct net_device *br);
+
+	/*
+	 * PTP functionality
+	 */
+	int	(*port_hwtstamp_get)(struct dsa_switch *ds, int port,
+				     struct ifreq *ifr);
+	int	(*port_hwtstamp_set)(struct dsa_switch *ds, int port,
+				     struct ifreq *ifr);
+	bool	(*port_txtstamp)(struct dsa_switch *ds, int port,
+				 struct sk_buff *clone, unsigned int type);
+	bool	(*port_rxtstamp)(struct dsa_switch *ds, int port,
+				 struct sk_buff *skb, unsigned int type);
 };
 
 struct dsa_switch_driver {
diff --git a/include/net/dst.h b/include/net/dst.h
index c63d2c37f6e9..b3219cd8a5a1 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -356,6 +356,7 @@ static inline void __skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev,
  *	skb_tunnel_rx - prepare skb for rx reinsert
  *	@skb: buffer
  *	@dev: tunnel device
+ *	@net: netns for packet i/o
  *
  *	After decapsulation, packet is going to re-enter (netif_rx()) our stack,
  *	so make some cleanups, and perform accounting.
diff --git a/include/net/dst_cache.h b/include/net/dst_cache.h
index 72fd5067c353..67634675e919 100644
--- a/include/net/dst_cache.h
+++ b/include/net/dst_cache.h
@@ -54,7 +54,7 @@ void dst_cache_set_ip4(struct dst_cache *dst_cache, struct dst_entry *dst,
  *	local BH must be disabled.
  */
 void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
-		       const struct in6_addr *addr);
+		       const struct in6_addr *saddr);
 
 /**
  *	dst_cache_get_ip6 - perform cache lookup and fetch ipv6 source address
@@ -71,7 +71,7 @@ struct dst_entry *dst_cache_get_ip6(struct dst_cache *dst_cache,
  *	dst_cache_reset - invalidate the cache contents
  *	@dst_cache: the cache
  *
- *	This do not free the cached dst to avoid races and contentions.
+ *	This does not free the cached dst to avoid races and contentions.
  *	the dst will be freed on later cache lookup.
  */
 static inline void dst_cache_reset(struct dst_cache *dst_cache)
diff --git a/include/net/ethoc.h b/include/net/ethoc.h
index bb7f467da7fc..29ba069a1d93 100644
--- a/include/net/ethoc.h
+++ b/include/net/ethoc.h
@@ -21,4 +21,3 @@ struct ethoc_platform_data {
 };
 
 #endif /* !LINUX_NET_ETHOC_H */
-
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index 648caf90ec07..e5cfcfc7dd93 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -26,7 +26,8 @@ struct fib_rule {
 	u32			table;
 	u8			action;
 	u8			l3mdev;
-	/* 2 bytes hole, try to use */
+	u8                      proto;
+	u8			ip_proto;
 	u32			target;
 	__be64			tun_id;
 	struct fib_rule __rcu	*ctarget;
@@ -39,11 +40,14 @@ struct fib_rule {
 	char			iifname[IFNAMSIZ];
 	char			oifname[IFNAMSIZ];
 	struct fib_kuid_range	uid_range;
+	struct fib_rule_port_range	sport_range;
+	struct fib_rule_port_range	dport_range;
 	struct rcu_head		rcu;
 };
 
 struct fib_lookup_arg {
 	void			*lookup_ptr;
+	const void		*lookup_data;
 	void			*result;
 	struct fib_rule		*rule;
 	u32			table;
@@ -108,7 +112,12 @@ struct fib_rule_notifier_info {
 	[FRA_SUPPRESS_IFGROUP] = { .type = NLA_U32 }, \
 	[FRA_GOTO]	= { .type = NLA_U32 }, \
 	[FRA_L3MDEV]	= { .type = NLA_U8 }, \
-	[FRA_UID_RANGE]	= { .len = sizeof(struct fib_rule_uid_range) }
+	[FRA_UID_RANGE]	= { .len = sizeof(struct fib_rule_uid_range) }, \
+	[FRA_PROTOCOL]  = { .type = NLA_U8 }, \
+	[FRA_IP_PROTO]  = { .type = NLA_U8 }, \
+	[FRA_SPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }, \
+	[FRA_DPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }
+
 
 static inline void fib_rule_get(struct fib_rule *rule)
 {
@@ -142,6 +151,38 @@ static inline u32 frh_get_table(struct fib_rule_hdr *frh, struct nlattr **nla)
 	return frh->table;
 }
 
+static inline bool fib_rule_port_range_set(const struct fib_rule_port_range *range)
+{
+	return range->start != 0 && range->end != 0;
+}
+
+static inline bool fib_rule_port_inrange(const struct fib_rule_port_range *a,
+					 __be16 port)
+{
+	return ntohs(port) >= a->start &&
+		ntohs(port) <= a->end;
+}
+
+static inline bool fib_rule_port_range_valid(const struct fib_rule_port_range *a)
+{
+	return a->start != 0 && a->end != 0 && a->end < 0xffff &&
+		a->start <= a->end;
+}
+
+static inline bool fib_rule_port_range_compare(struct fib_rule_port_range *a,
+					       struct fib_rule_port_range *b)
+{
+	return a->start == b->start &&
+		a->end == b->end;
+}
+
+static inline bool fib_rule_requires_fldissect(struct fib_rule *rule)
+{
+	return rule->ip_proto ||
+		fib_rule_port_range_set(&rule->sport_range) ||
+		fib_rule_port_range_set(&rule->dport_range);
+}
+
 struct fib_rules_ops *fib_rules_register(const struct fib_rules_ops *,
 					 struct net *);
 void fib_rules_unregister(struct fib_rules_ops *);
diff --git a/include/net/flow.h b/include/net/flow.h
index f1624fd5b1d0..8ce21793094e 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -125,7 +125,7 @@ static inline void flowi4_update_output(struct flowi4 *fl4, int oif, __u8 tos,
 	fl4->daddr = daddr;
 	fl4->saddr = saddr;
 }
-				      
+
 
 struct flowi6 {
 	struct flowi_common	__fl_common;
@@ -222,20 +222,4 @@ static inline unsigned int flow_key_size(u16 family)
 
 __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys);
 
-static inline __u32 get_hash_from_flowi6(const struct flowi6 *fl6)
-{
-	struct flow_keys keys;
-
-	return __get_hash_from_flowi6(fl6, &keys);
-}
-
-__u32 __get_hash_from_flowi4(const struct flowi4 *fl4, struct flow_keys *keys);
-
-static inline __u32 get_hash_from_flowi4(const struct flowi4 *fl4)
-{
-	struct flow_keys keys;
-
-	return __get_hash_from_flowi4(fl4, &keys);
-}
-
 #endif
diff --git a/include/net/gre.h b/include/net/gre.h
index f90585decbce..797142eee9cd 100644
--- a/include/net/gre.h
+++ b/include/net/gre.h
@@ -37,6 +37,9 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
 int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
 		     bool *csum_err, __be16 proto, int nhs);
 
+bool is_gretap_dev(const struct net_device *dev);
+bool is_ip6gretap_dev(const struct net_device *dev);
+
 static inline int gre_calc_hlen(__be16 o_flags)
 {
 	int addend = 4;
diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h
index d91f9e7f4d71..960236fb1681 100644
--- a/include/net/ieee80211_radiotap.h
+++ b/include/net/ieee80211_radiotap.h
@@ -149,6 +149,8 @@ enum ieee80211_radiotap_ampdu_flags {
 	IEEE80211_RADIOTAP_AMPDU_IS_LAST = 0x0008,
 	IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_ERR = 0x0010,
 	IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_KNOWN = 0x0020,
+	IEEE80211_RADIOTAP_AMPDU_EOF = 0x0040,
+	IEEE80211_RADIOTAP_AMPDU_EOF_KNOWN = 0x0080,
 };
 
 /* for IEEE80211_RADIOTAP_VHT */
diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index 5a54c9570977..500f81375200 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -32,7 +32,7 @@ int inet_shutdown(struct socket *sock, int how);
 int inet_listen(struct socket *sock, int backlog);
 void inet_sock_destruct(struct sock *sk);
 int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
-int inet_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len,
+int inet_getname(struct socket *sock, struct sockaddr *uaddr,
 		 int peer);
 int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
 int inet_ctl_sock_create(struct sock **sk, unsigned short family,
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index c1a93ce35e62..b68fea022a82 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -49,9 +49,9 @@ struct inet_connection_sock_af_ops {
 	u16	    net_header_len;
 	u16	    net_frag_header_len;
 	u16	    sockaddr_len;
-	int	    (*setsockopt)(struct sock *sk, int level, int optname, 
+	int	    (*setsockopt)(struct sock *sk, int level, int optname,
 				  char __user *optval, unsigned int optlen);
-	int	    (*getsockopt)(struct sock *sk, int level, int optname, 
+	int	    (*getsockopt)(struct sock *sk, int level, int optname,
 				  char __user *optval, int __user *optlen);
 #ifdef CONFIG_COMPAT
 	int	    (*compat_setsockopt)(struct sock *sk,
@@ -67,7 +67,7 @@ struct inet_connection_sock_af_ops {
 
 /** inet_connection_sock - INET connection oriented sock
  *
- * @icsk_accept_queue:	   FIFO of established children 
+ * @icsk_accept_queue:	   FIFO of established children
  * @icsk_bind_hash:	   Bind node
  * @icsk_timeout:	   Timeout
  * @icsk_retransmit_timer: Resend (no ack)
@@ -122,7 +122,7 @@ struct inet_connection_sock {
 		unsigned long	  timeout;	 /* Currently scheduled timeout		   */
 		__u32		  lrcvtime;	 /* timestamp of last received data packet */
 		__u16		  last_seg_size; /* Size of last incoming segment	   */
-		__u16		  rcv_mss;	 /* MSS used for delayed ACK decisions	   */ 
+		__u16		  rcv_mss;	 /* MSS used for delayed ACK decisions	   */
 	} icsk_ack;
 	struct {
 		int		  enabled;
@@ -201,7 +201,7 @@ extern const char inet_csk_timer_bug_msg[];
 static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
-	
+
 	if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) {
 		icsk->icsk_pending = 0;
 #ifdef INET_CSK_CLEAR_TIMERS
diff --git a/include/net/ip.h b/include/net/ip.h
index f49b3a576bec..36f8f7811093 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -91,6 +91,17 @@ static inline int inet_sdif(struct sk_buff *skb)
 	return 0;
 }
 
+/* Special input handler for packets caught by router alert option.
+   They are selected only by protocol field, and then processed likely
+   local ones; but only if someone wants them! Otherwise, router
+   not running rsvpd will kill RSVP.
+
+   It is user level problem, what it will make with them.
+   I have no idea, how it will masquearde or NAT them (it is joke, joke :-)),
+   but receiver should be enough clever f.e. to forward mtrace requests,
+   sent to multicast group to reach destination designated router.
+ */
+
 struct ip_ra_chain {
 	struct ip_ra_chain __rcu *next;
 	struct sock		*sk;
@@ -101,8 +112,6 @@ struct ip_ra_chain {
 	struct rcu_head		rcu;
 };
 
-extern struct ip_ra_chain __rcu *ip_ra_chain;
-
 /* IP flags. */
 #define IP_CE		0x8000		/* Flag: "Congestion"		*/
 #define IP_DF		0x4000		/* Flag: "Don't Fragment"	*/
@@ -186,15 +195,15 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
 void ip4_datagram_release_cb(struct sock *sk);
 
 struct ip_reply_arg {
-	struct kvec iov[1];   
+	struct kvec iov[1];
 	int	    flags;
 	__wsum 	    csum;
 	int	    csumoffset; /* u16 offset of csum in iov[0].iov_base */
-				/* -1 if not needed */ 
+				/* -1 if not needed */
 	int	    bound_dev_if;
 	u8  	    tos;
 	kuid_t	    uid;
-}; 
+};
 
 #define IP_REPLY_ARG_NOSRCCHECK 1
 
@@ -584,13 +593,13 @@ int ip_frag_mem(struct net *net);
 /*
  *	Functions provided by ip_forward.c
  */
- 
+
 int ip_forward(struct sk_buff *skb);
- 
+
 /*
  *	Functions provided by ip_options.c
  */
- 
+
 void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
 		      __be32 daddr, struct rtable *rt, int is_frag);
 
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 34ec321d6a03..5e86fd9dc857 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -350,7 +350,8 @@ struct fib6_table {
 
 typedef struct rt6_info *(*pol_lookup_t)(struct net *,
 					 struct fib6_table *,
-					 struct flowi6 *, int);
+					 struct flowi6 *,
+					 const struct sk_buff *, int);
 
 struct fib6_entry_notifier_info {
 	struct fib_notifier_info info; /* must be first */
@@ -364,6 +365,7 @@ struct fib6_entry_notifier_info {
 struct fib6_table *fib6_get_table(struct net *net, u32 id);
 struct fib6_table *fib6_new_table(struct net *net, u32 id);
 struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
+				   const struct sk_buff *skb,
 				   int flags, pol_lookup_t lookup);
 
 struct fib6_node *fib6_lookup(struct fib6_node *root,
@@ -415,6 +417,24 @@ void fib6_rules_cleanup(void);
 bool fib6_rule_default(const struct fib_rule *rule);
 int fib6_rules_dump(struct net *net, struct notifier_block *nb);
 unsigned int fib6_rules_seq_read(struct net *net);
+
+static inline bool fib6_rules_early_flow_dissect(struct net *net,
+						 struct sk_buff *skb,
+						 struct flowi6 *fl6,
+						 struct flow_keys *flkeys)
+{
+	unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
+
+	if (!net->ipv6.fib6_rules_require_fldissect)
+		return false;
+
+	skb_flow_dissect_flow_keys(skb, flkeys, flag);
+	fl6->fl6_sport = flkeys->ports.src;
+	fl6->fl6_dport = flkeys->ports.dst;
+	fl6->flowi6_proto = flkeys->basic.ip_proto;
+
+	return true;
+}
 #else
 static inline int               fib6_rules_init(void)
 {
@@ -436,5 +456,12 @@ static inline unsigned int fib6_rules_seq_read(struct net *net)
 {
 	return 0;
 }
+static inline bool fib6_rules_early_flow_dissect(struct net *net,
+						 struct sk_buff *skb,
+						 struct flowi6 *fl6,
+						 struct flow_keys *flkeys)
+{
+	return false;
+}
 #endif
 #endif
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index ac0866bb9e93..0084013d6bed 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -75,7 +75,8 @@ static inline bool rt6_qualify_for_ecmp(const struct rt6_info *rt)
 void ip6_route_input(struct sk_buff *skb);
 struct dst_entry *ip6_route_input_lookup(struct net *net,
 					 struct net_device *dev,
-					 struct flowi6 *fl6, int flags);
+					 struct flowi6 *fl6,
+					 const struct sk_buff *skb, int flags);
 
 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
 					 struct flowi6 *fl6, int flags);
@@ -88,9 +89,10 @@ static inline struct dst_entry *ip6_route_output(struct net *net,
 }
 
 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
-				   int flags);
+				   const struct sk_buff *skb, int flags);
 struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
-			       int ifindex, struct flowi6 *fl6, int flags);
+			       int ifindex, struct flowi6 *fl6,
+			       const struct sk_buff *skb, int flags);
 
 void ip6_route_init_special_entries(void);
 int ip6_route_init(void);
@@ -126,8 +128,10 @@ static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
 }
 
 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
-			    const struct in6_addr *saddr, int oif, int flags);
-u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb);
+			    const struct in6_addr *saddr, int oif,
+			    const struct sk_buff *skb, int flags);
+u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
+		       const struct sk_buff *skb, struct flow_keys *hkeys);
 
 struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6);
 
@@ -269,4 +273,5 @@ static inline bool rt6_duplicate_nexthop(struct rt6_info *a, struct rt6_info *b)
 	       ipv6_addr_equal(&a->rt6i_gateway, &b->rt6i_gateway) &&
 	       !lwtunnel_cmp_encap(a->dst.lwtstate, b->dst.lwtstate);
 }
+
 #endif
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 77d0a78cf7d2..81d0f2107ff1 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -158,7 +158,7 @@ struct fib_result_nl {
 	unsigned char	nh_sel;
 	unsigned char	type;
 	unsigned char	scope;
-	int             err;      
+	int             err;
 };
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
@@ -294,6 +294,13 @@ static inline unsigned int fib4_rules_seq_read(struct net *net)
 	return 0;
 }
 
+static inline bool fib4_rules_early_flow_dissect(struct net *net,
+						 struct sk_buff *skb,
+						 struct flowi4 *fl4,
+						 struct flow_keys *flkeys)
+{
+	return false;
+}
 #else /* CONFIG_IP_MULTIPLE_TABLES */
 int __net_init fib4_rules_init(struct net *net);
 void __net_exit fib4_rules_exit(struct net *net);
@@ -342,6 +349,24 @@ bool fib4_rule_default(const struct fib_rule *rule);
 int fib4_rules_dump(struct net *net, struct notifier_block *nb);
 unsigned int fib4_rules_seq_read(struct net *net);
 
+static inline bool fib4_rules_early_flow_dissect(struct net *net,
+						 struct sk_buff *skb,
+						 struct flowi4 *fl4,
+						 struct flow_keys *flkeys)
+{
+	unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
+
+	if (!net->ipv4.fib_rules_require_fldissect)
+		return false;
+
+	skb_flow_dissect_flow_keys(skb, flkeys, flag);
+	fl4->fl4_sport = flkeys->ports.src;
+	fl4->fl4_dport = flkeys->ports.dst;
+	fl4->flowi4_proto = flkeys->basic.ip_proto;
+
+	return true;
+}
+
 #endif /* CONFIG_IP_MULTIPLE_TABLES */
 
 /* Exported by fib_frontend.c */
@@ -371,8 +396,8 @@ int fib_sync_down_addr(struct net_device *dev, __be32 local);
 int fib_sync_up(struct net_device *dev, unsigned int nh_flags);
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
-		       const struct sk_buff *skb);
+int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
+		       const struct sk_buff *skb, struct flow_keys *flkeys);
 #endif
 void fib_select_multipath(struct fib_result *res, int hash);
 void fib_select_path(struct net *net, struct fib_result *res,
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 1f16773cfd76..540a4b4417bf 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -180,8 +180,10 @@ struct tnl_ptk_info {
 
 struct ip_tunnel_net {
 	struct net_device *fb_tunnel_dev;
+	struct rtnl_link_ops *rtnl_link_ops;
 	struct hlist_head tunnels[IP_TNL_HASH_SIZE];
 	struct ip_tunnel __rcu *collect_md_tun;
+	int type;
 };
 
 static inline void ip_tunnel_key_init(struct ip_tunnel_key *key,
@@ -254,6 +256,22 @@ static inline __be32 tunnel_id_to_key32(__be64 tun_id)
 
 #ifdef CONFIG_INET
 
+static inline void ip_tunnel_init_flow(struct flowi4 *fl4,
+				       int proto,
+				       __be32 daddr, __be32 saddr,
+				       __be32 key, __u8 tos, int oif,
+				       __u32 mark)
+{
+	memset(fl4, 0, sizeof(*fl4));
+	fl4->flowi4_oif = oif;
+	fl4->daddr = daddr;
+	fl4->saddr = saddr;
+	fl4->flowi4_tos = tos;
+	fl4->flowi4_proto = proto;
+	fl4->fl4_gre_key = key;
+	fl4->flowi4_mark = mark;
+}
+
 int ip_tunnel_init(struct net_device *dev);
 void ip_tunnel_uninit(struct net_device *dev);
 void  ip_tunnel_dellink(struct net_device *dev, struct list_head *head);
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 8606c9113d3f..50a6f0ddb878 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -105,8 +105,8 @@
 
 #define IPV6_ADDR_ANY		0x0000U
 
-#define IPV6_ADDR_UNICAST      	0x0001U	
-#define IPV6_ADDR_MULTICAST    	0x0002U	
+#define IPV6_ADDR_UNICAST	0x0001U
+#define IPV6_ADDR_MULTICAST	0x0002U
 
 #define IPV6_ADDR_LOOPBACK	0x0010U
 #define IPV6_ADDR_LINKLOCAL	0x0020U
@@ -447,7 +447,7 @@ ipv6_masked_addr_cmp(const struct in6_addr *a1, const struct in6_addr *m,
 #endif
 }
 
-static inline void ipv6_addr_prefix(struct in6_addr *pfx, 
+static inline void ipv6_addr_prefix(struct in6_addr *pfx,
 				    const struct in6_addr *addr,
 				    int plen)
 {
@@ -496,7 +496,7 @@ static inline void __ipv6_addr_set_half(__be32 *addr,
 	addr[1] = wl;
 }
 
-static inline void ipv6_addr_set(struct in6_addr *addr, 
+static inline void ipv6_addr_set(struct in6_addr *addr,
 				     __be32 w1, __be32 w2,
 				     __be32 w3, __be32 w4)
 {
@@ -732,7 +732,7 @@ static inline int __ipv6_addr_diff32(const void *token1, const void *token2, int
 	}
 
 	/*
-	 *	we should *never* get to this point since that 
+	 *	we should *never* get to this point since that
 	 *	would mean the addrs are equal
 	 *
 	 *	However, we do get to it 8) And exacly, when
@@ -888,6 +888,17 @@ static inline int ip6_default_np_autolabel(struct net *net)
 }
 #endif
 
+#if IS_ENABLED(CONFIG_IPV6)
+static inline int ip6_multipath_hash_policy(const struct net *net)
+{
+	return net->ipv6.sysctl.multipath_hash_policy;
+}
+#else
+static inline int ip6_multipath_hash_policy(const struct net *net)
+{
+	return 0;
+}
+#endif
 
 /*
  *	Header manipulation
@@ -1056,7 +1067,7 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu);
 
 int inet6_release(struct socket *sock);
 int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
-int inet6_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len,
+int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
 		  int peer);
 int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
 
diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h
index d747ef975cd8..33fd9ba7e0e5 100644
--- a/include/net/lwtunnel.h
+++ b/include/net/lwtunnel.h
@@ -127,6 +127,17 @@ int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb);
 int lwtunnel_input(struct sk_buff *skb);
 int lwtunnel_xmit(struct sk_buff *skb);
 
+static inline void lwtunnel_set_redirect(struct dst_entry *dst)
+{
+	if (lwtunnel_output_redirect(dst->lwtstate)) {
+		dst->lwtstate->orig_output = dst->output;
+		dst->output = lwtunnel_output;
+	}
+	if (lwtunnel_input_redirect(dst->lwtstate)) {
+		dst->lwtstate->orig_input = dst->input;
+		dst->input = lwtunnel_input;
+	}
+}
 #else
 
 static inline void lwtstate_free(struct lwtunnel_state *lws)
@@ -158,6 +169,10 @@ static inline bool lwtunnel_xmit_redirect(struct lwtunnel_state *lwtstate)
 	return false;
 }
 
+static inline void lwtunnel_set_redirect(struct dst_entry *dst)
+{
+}
+
 static inline unsigned int lwtunnel_headroom(struct lwtunnel_state *lwtstate,
 					     unsigned int mtu)
 {
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 2b581bd93812..2449982daf75 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -6,6 +6,7 @@
  * Copyright 2007-2010	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright (C) 2015 - 2017 Intel Deutschland GmbH
+ * Copyright (C) 2018        Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -934,6 +935,7 @@ struct ieee80211_tx_info {
 			u8 ampdu_len;
 			u8 antenna;
 			u16 tx_time;
+			bool is_valid_ack_signal;
 			void *status_driver_data[19 / sizeof(void *)];
 		} status;
 		struct {
@@ -1098,6 +1100,9 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info)
  *	the first subframe.
  * @RX_FLAG_ICV_STRIPPED: The ICV is stripped from this frame. CRC checking must
  *	be done in the hardware.
+ * @RX_FLAG_AMPDU_EOF_BIT: Value of the EOF bit in the A-MPDU delimiter for this
+ *	frame
+ * @RX_FLAG_AMPDU_EOF_BIT_KNOWN: The EOF value is known
  */
 enum mac80211_rx_flags {
 	RX_FLAG_MMIC_ERROR		= BIT(0),
@@ -1124,6 +1129,8 @@ enum mac80211_rx_flags {
 	RX_FLAG_MIC_STRIPPED		= BIT(21),
 	RX_FLAG_ALLOW_SAME_PN		= BIT(22),
 	RX_FLAG_ICV_STRIPPED		= BIT(23),
+	RX_FLAG_AMPDU_EOF_BIT		= BIT(24),
+	RX_FLAG_AMPDU_EOF_BIT_KNOWN	= BIT(25),
 };
 
 /**
@@ -2063,6 +2070,14 @@ struct ieee80211_txq {
  * @IEEE80211_HW_SUPPORTS_TDLS_BUFFER_STA: Hardware supports buffer STA on
  *	TDLS links.
  *
+ * @IEEE80211_HW_DEAUTH_NEED_MGD_TX_PREP: The driver requires the
+ *	mgd_prepare_tx() callback to be called before transmission of a
+ *	deauthentication frame in case the association was completed but no
+ *	beacon was heard. This is required in multi-channel scenarios, where the
+ *	virtual interface might not be given air time for the transmission of
+ *	the frame, as it is not synced with the AP/P2P GO yet, and thus the
+ *	deauthentication frame might not be transmitted.
+ >
  * @IEEE80211_HW_DOESNT_SUPPORT_QOS_NDP: The driver (or firmware) doesn't
  *	support QoS NDP for AP probing - that's most likely a driver bug.
  *
@@ -2109,6 +2124,7 @@ enum ieee80211_hw_flags {
 	IEEE80211_HW_REPORTS_LOW_ACK,
 	IEEE80211_HW_SUPPORTS_TX_FRAG,
 	IEEE80211_HW_SUPPORTS_TDLS_BUFFER_STA,
+	IEEE80211_HW_DEAUTH_NEED_MGD_TX_PREP,
 	IEEE80211_HW_DOESNT_SUPPORT_QOS_NDP,
 
 	/* keep last, obviously */
@@ -3354,6 +3370,9 @@ enum ieee80211_reconfig_type {
  *	management frame prior to having successfully associated to allow the
  *	driver to give it channel time for the transmission, to get a response
  *	and to be able to synchronize with the GO.
+ *	For drivers that set %IEEE80211_HW_DEAUTH_NEED_MGD_TX_PREP, mac80211
+ *	would also call this function before transmitting a deauthentication
+ *	frame in case that no beacon was heard from the AP/P2P GO.
  *	The callback will be called before each transmission and upon return
  *	mac80211 will transmit the frame right away.
  *	The callback is optional and can (should!) sleep.
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index f306b2aa15a4..09e30bdc7876 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -40,7 +40,7 @@ struct net_device;
 struct sock;
 struct ctl_table_header;
 struct net_generic;
-struct sock;
+struct uevent_sock;
 struct netns_ipvs;
 
 
@@ -59,8 +59,12 @@ struct net {
 	atomic64_t		cookie_gen;
 
 	struct list_head	list;		/* list of network namespaces */
-	struct list_head	cleanup_list;	/* namespaces on death row */
-	struct list_head	exit_list;	/* Use only net_mutex */
+	struct list_head	exit_list;	/* To linked to call pernet exit
+						 * methods on dead net (net_sem
+						 * read locked), or to unregister
+						 * pernet ops (net_sem wr locked).
+						 */
+	struct llist_node	cleanup_list;	/* namespaces on death row */
 
 	struct user_namespace   *user_ns;	/* Owning user namespace */
 	struct ucounts		*ucounts;
@@ -79,6 +83,8 @@ struct net {
 	struct sock 		*rtnl;			/* rtnetlink socket */
 	struct sock		*genl_sock;
 
+	struct uevent_sock	*uevent_sock;		/* uevent socket */
+
 	struct list_head 	dev_base_head;
 	struct hlist_head 	*dev_name_head;
 	struct hlist_head	*dev_index_head;
@@ -89,7 +95,7 @@ struct net {
 	/* core fib_rules */
 	struct list_head	rules_ops;
 
-	struct list_head	fib_notifier_ops;  /* protected by net_mutex */
+	struct list_head	fib_notifier_ops;  /* protected by net_sem */
 
 	struct net_device       *loopback_dev;          /* The loopback */
 	struct netns_core	core;
@@ -308,11 +314,31 @@ struct net *get_net_ns_by_id(struct net *net, int id);
 
 struct pernet_operations {
 	struct list_head list;
+	/*
+	 * Below methods are called without any exclusive locks.
+	 * More than one net may be constructed and destructed
+	 * in parallel on several cpus. Every pernet_operations
+	 * have to keep in mind all other pernet_operations and
+	 * to introduce a locking, if they share common resources.
+	 *
+	 * Exit methods using blocking RCU primitives, such as
+	 * synchronize_rcu(), should be implemented via exit_batch.
+	 * Then, destruction of a group of net requires single
+	 * synchronize_rcu() related to these pernet_operations,
+	 * instead of separate synchronize_rcu() for every net.
+	 * Please, avoid synchronize_rcu() at all, where it's possible.
+	 */
 	int (*init)(struct net *net);
 	void (*exit)(struct net *net);
 	void (*exit_batch)(struct list_head *net_exit_list);
 	unsigned int *id;
 	size_t size;
+	/*
+	 * Indicates above methods are allowed to be executed in parallel
+	 * with methods of any other pernet_operations, i.e. they are not
+	 * need write locked net_sem.
+	 */
+	bool async;
 };
 
 /*
diff --git a/include/net/netevent.h b/include/net/netevent.h
index 40e7bab68490..d9918261701c 100644
--- a/include/net/netevent.h
+++ b/include/net/netevent.h
@@ -26,7 +26,8 @@ enum netevent_notif_type {
 	NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */
 	NETEVENT_REDIRECT,	   /* arg is struct netevent_redirect ptr */
 	NETEVENT_DELAY_PROBE_TIME_UPDATE, /* arg is struct neigh_parms ptr */
-	NETEVENT_MULTIPATH_HASH_UPDATE, /* arg is struct net ptr */
+	NETEVENT_IPV4_MPATH_HASH_UPDATE, /* arg is struct net ptr */
+	NETEVENT_IPV6_MPATH_HASH_UPDATE, /* arg is struct net ptr */
 };
 
 int register_netevent_notifier(struct notifier_block *nb);
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 44668c29701a..8491bc9c86b1 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -49,9 +49,12 @@ struct netns_ipv4 {
 #endif
 	struct ipv4_devconf	*devconf_all;
 	struct ipv4_devconf	*devconf_dflt;
+	struct ip_ra_chain __rcu *ra_chain;
+	struct mutex		ra_mutex;
 #ifdef CONFIG_IP_MULTIPLE_TABLES
 	struct fib_rules_ops	*rules_ops;
 	bool			fib_has_custom_rules;
+	unsigned int		fib_rules_require_fldissect;
 	struct fib_table __rcu	*fib_main;
 	struct fib_table __rcu	*fib_default;
 #endif
@@ -167,6 +170,9 @@ struct netns_ipv4 {
 	atomic_t tfo_active_disable_times;
 	unsigned long tfo_active_disable_stamp;
 
+	int sysctl_udp_wmem_min;
+	int sysctl_udp_rmem_min;
+
 #ifdef CONFIG_NET_L3_MASTER_DEV
 	int sysctl_udp_l3mdev_accept;
 #endif
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 987cc4569cb8..5b51110435fc 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -28,6 +28,7 @@ struct netns_sysctl_ipv6 {
 	int ip6_rt_gc_elasticity;
 	int ip6_rt_mtu_expires;
 	int ip6_rt_min_advmss;
+	int multipath_hash_policy;
 	int flowlabel_consistency;
 	int auto_flowlabels;
 	int icmpv6_time;
@@ -71,7 +72,8 @@ struct netns_ipv6 {
 	unsigned int		 ip6_rt_gc_expire;
 	unsigned long		 ip6_rt_last_gc;
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
-	bool			 fib6_has_custom_rules;
+	unsigned int		fib6_rules_require_fldissect;
+	bool			fib6_has_custom_rules;
 	struct rt6_info         *ip6_prohibit_entry;
 	struct rt6_info         *ip6_blk_hole_entry;
 	struct fib6_table       *fib6_local_tbl;
@@ -84,7 +86,7 @@ struct netns_ipv6 {
 	struct sock		*mc_autojoin_sk;
 #ifdef CONFIG_IPV6_MROUTE
 #ifndef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
-	struct mr6_table	*mrt6;
+	struct mr_table		*mrt6;
 #else
 	struct list_head	mr6_tables;
 	struct fib_rules_ops	*mr6_rules_ops;
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 87406252f0a3..e828d31be5da 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -806,6 +806,7 @@ enum tc_prio_command {
 	TC_PRIO_REPLACE,
 	TC_PRIO_DESTROY,
 	TC_PRIO_STATS,
+	TC_PRIO_GRAFT,
 };
 
 struct tc_prio_qopt_offload_params {
@@ -818,6 +819,11 @@ struct tc_prio_qopt_offload_params {
 	struct gnet_stats_queue *qstats;
 };
 
+struct tc_prio_qopt_offload_graft_params {
+	u8 band;
+	u32 child_handle;
+};
+
 struct tc_prio_qopt_offload {
 	enum tc_prio_command command;
 	u32 handle;
@@ -825,6 +831,8 @@ struct tc_prio_qopt_offload {
 	union {
 		struct tc_prio_qopt_offload_params replace_params;
 		struct tc_qopt_offload_stats stats;
+		struct tc_prio_qopt_offload_graft_params graft_params;
 	};
 };
+
 #endif
diff --git a/include/net/route.h b/include/net/route.h
index 20a92ca9e115..dbb032d5921b 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -66,8 +66,6 @@ struct rtable {
 	u32			rt_mtu_locked:1,
 				rt_pmtu:31;
 
-	u32			rt_table_id;
-
 	struct list_head	rt_uncached;
 	struct uncached_list	*rt_uncached_list;
 };
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 2092d33194dd..493e311bbe93 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -540,7 +540,7 @@ static inline bool skb_skip_tc_classify(struct sk_buff *skb)
 	return false;
 }
 
-/* Reset all TX qdiscs greater then index of a device.  */
+/* Reset all TX qdiscs greater than index of a device.  */
 static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i)
 {
 	struct Qdisc *qdisc;
diff --git a/include/net/sctp/auth.h b/include/net/sctp/auth.h
index e5c57d0a082d..687e7f80037d 100644
--- a/include/net/sctp/auth.h
+++ b/include/net/sctp/auth.h
@@ -62,8 +62,10 @@ struct sctp_auth_bytes {
 /* Definition for a shared key, weather endpoint or association */
 struct sctp_shared_key {
 	struct list_head key_list;
-	__u16 key_id;
 	struct sctp_auth_bytes *key;
+	refcount_t refcnt;
+	__u16 key_id;
+	__u8 deactivated;
 };
 
 #define key_for_each(__key, __list_head) \
@@ -103,21 +105,22 @@ int sctp_auth_send_cid(enum sctp_cid chunk,
 int sctp_auth_recv_cid(enum sctp_cid chunk,
 		       const struct sctp_association *asoc);
 void sctp_auth_calculate_hmac(const struct sctp_association *asoc,
-			    struct sk_buff *skb,
-			    struct sctp_auth_chunk *auth, gfp_t gfp);
+			      struct sk_buff *skb, struct sctp_auth_chunk *auth,
+			      struct sctp_shared_key *ep_key, gfp_t gfp);
+void sctp_auth_shkey_release(struct sctp_shared_key *sh_key);
+void sctp_auth_shkey_hold(struct sctp_shared_key *sh_key);
 
 /* API Helpers */
 int sctp_auth_ep_add_chunkid(struct sctp_endpoint *ep, __u8 chunk_id);
 int sctp_auth_ep_set_hmacs(struct sctp_endpoint *ep,
 			    struct sctp_hmacalgo *hmacs);
-int sctp_auth_set_key(struct sctp_endpoint *ep,
-		      struct sctp_association *asoc,
+int sctp_auth_set_key(struct sctp_endpoint *ep, struct sctp_association *asoc,
 		      struct sctp_authkey *auth_key);
 int sctp_auth_set_active_key(struct sctp_endpoint *ep,
-		      struct sctp_association *asoc,
-		      __u16 key_id);
+			     struct sctp_association *asoc, __u16 key_id);
 int sctp_auth_del_key_id(struct sctp_endpoint *ep,
-		      struct sctp_association *asoc,
-		      __u16 key_id);
+			 struct sctp_association *asoc, __u16 key_id);
+int sctp_auth_deact_key_id(struct sctp_endpoint *ep,
+			   struct sctp_association *asoc, __u16 key_id);
 
 #endif
diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h
index b55c6a48a206..6640f84fe536 100644
--- a/include/net/sctp/command.h
+++ b/include/net/sctp/command.h
@@ -100,6 +100,7 @@ enum sctp_verb {
 	SCTP_CMD_SET_SK_ERR,	 /* Set sk_err */
 	SCTP_CMD_ASSOC_CHANGE,	 /* generate and send assoc_change event */
 	SCTP_CMD_ADAPTATION_IND, /* generate and send adaptation event */
+	SCTP_CMD_PEER_NO_AUTH,   /* generate and send authentication event */
 	SCTP_CMD_ASSOC_SHKEY,    /* generate the association shared keys */
 	SCTP_CMD_T1_RETRAN,	 /* Mark for retransmission after T1 timeout  */
 	SCTP_CMD_UPDATE_INITTAG, /* Update peer inittag */
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index f7ae6b0a21d0..72c5b8fc3232 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -180,14 +180,7 @@ struct sctp_transport *sctp_epaddr_lookup_transport(
 /*
  * sctp/proc.c
  */
-int sctp_snmp_proc_init(struct net *net);
-void sctp_snmp_proc_exit(struct net *net);
-int sctp_eps_proc_init(struct net *net);
-void sctp_eps_proc_exit(struct net *net);
-int sctp_assocs_proc_init(struct net *net);
-void sctp_assocs_proc_exit(struct net *net);
-int sctp_remaddr_proc_init(struct net *net);
-void sctp_remaddr_proc_exit(struct net *net);
+int __net_init sctp_proc_init(struct net *net);
 
 /*
  * sctp/offload.c
@@ -318,7 +311,6 @@ atomic_t sctp_dbg_objcnt_## name = ATOMIC_INIT(0)
 {.label= #name, .counter= &sctp_dbg_objcnt_## name}
 
 void sctp_dbg_objcnt_init(struct net *);
-void sctp_dbg_objcnt_exit(struct net *);
 
 #else
 
@@ -326,7 +318,6 @@ void sctp_dbg_objcnt_exit(struct net *);
 #define SCTP_DBG_OBJCNT_DEC(name)
 
 static inline void sctp_dbg_objcnt_init(struct net *net) { return; }
-static inline void sctp_dbg_objcnt_exit(struct net *net) { return; }
 
 #endif /* CONFIG_SCTP_DBG_OBJCOUNT */
 
diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index 2883c43c5258..2d0e782c9055 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -263,7 +263,8 @@ int sctp_process_asconf_ack(struct sctp_association *asoc,
 struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc,
 				    __u32 new_cum_tsn, size_t nstreams,
 				    struct sctp_fwdtsn_skip *skiplist);
-struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc);
+struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc,
+				  __u16 key_id);
 struct sctp_chunk *sctp_make_strreset_req(const struct sctp_association *asoc,
 					  __u16 stream_num, __be16 *stream_list,
 					  bool out, bool in);
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 03e92dda1813..012fb3e2f4cf 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -577,8 +577,12 @@ struct sctp_chunk {
 	/* This points to the sk_buff containing the actual data.  */
 	struct sk_buff *skb;
 
-	/* In case of GSO packets, this will store the head one */
-	struct sk_buff *head_skb;
+	union {
+		/* In case of GSO packets, this will store the head one */
+		struct sk_buff *head_skb;
+		/* In case of auth enabled, this will point to the shkey */
+		struct sctp_shared_key *shkey;
+	};
 
 	/* These are the SCTP headers by reverse order in a packet.
 	 * Note that some of these may happen more than once.  In that
@@ -1995,6 +1999,7 @@ struct sctp_association {
 	 * The current generated assocaition shared key (secret)
 	 */
 	struct sctp_auth_bytes *asoc_shared_key;
+	struct sctp_shared_key *shkey;
 
 	/* SCTP AUTH: hmac id of the first peer requested algorithm
 	 * that we support.
@@ -2112,6 +2117,9 @@ struct sctp_cmsgs {
 	struct sctp_initmsg *init;
 	struct sctp_sndrcvinfo *srinfo;
 	struct sctp_sndinfo *sinfo;
+	struct sctp_prinfo *prinfo;
+	struct sctp_authinfo *authinfo;
+	struct msghdr *addrs_msg;
 };
 
 /* Structure for tracking memory objects */
diff --git a/include/net/sock.h b/include/net/sock.h
index ae23f3b389ca..709311132d4c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -417,6 +417,7 @@ struct sock {
 	struct page_frag	sk_frag;
 	netdev_features_t	sk_route_caps;
 	netdev_features_t	sk_route_nocaps;
+	netdev_features_t	sk_route_forced_caps;
 	int			sk_gso_type;
 	unsigned int		sk_gso_max_size;
 	gfp_t			sk_allocation;
@@ -1585,7 +1586,7 @@ int sock_no_bind(struct socket *, struct sockaddr *, int);
 int sock_no_connect(struct socket *, struct sockaddr *, int, int);
 int sock_no_socketpair(struct socket *, struct socket *);
 int sock_no_accept(struct socket *, struct socket *, int, bool);
-int sock_no_getname(struct socket *, struct sockaddr *, int *, int);
+int sock_no_getname(struct socket *, struct sockaddr *, int);
 __poll_t sock_no_poll(struct file *, struct socket *,
 			  struct poll_table_struct *);
 int sock_no_ioctl(struct socket *, unsigned int, unsigned long);
@@ -1863,15 +1864,6 @@ static inline void sk_nocaps_add(struct sock *sk, netdev_features_t flags)
 	sk->sk_route_caps &= ~flags;
 }
 
-static inline bool sk_check_csum_caps(struct sock *sk)
-{
-	return (sk->sk_route_caps & NETIF_F_HW_CSUM) ||
-	       (sk->sk_family == PF_INET &&
-		(sk->sk_route_caps & NETIF_F_IP_CSUM)) ||
-	       (sk->sk_family == PF_INET6 &&
-		(sk->sk_route_caps & NETIF_F_IPV6_CSUM));
-}
-
 static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb,
 					   struct iov_iter *from, char *to,
 					   int copy, int offset)
@@ -2150,6 +2142,10 @@ static inline struct page_frag *sk_page_frag(struct sock *sk)
 
 bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag);
 
+int sk_alloc_sg(struct sock *sk, int len, struct scatterlist *sg,
+		int sg_start, int *sg_curr, unsigned int *sg_size,
+		int first_coalesce);
+
 /*
  *	Default write policy as shown to user space via poll/select/SIGIO
  */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index e3fc667f9ac2..9c9b3768b350 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -374,7 +374,8 @@ enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw,
 					      struct sk_buff *skb,
 					      const struct tcphdr *th);
 struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
-			   struct request_sock *req, bool fastopen);
+			   struct request_sock *req, bool fastopen,
+			   bool *lost_race);
 int tcp_child_process(struct sock *parent, struct sock *child,
 		      struct sk_buff *skb);
 void tcp_enter_loss(struct sock *sk);
@@ -510,8 +511,6 @@ __u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mss);
 #endif
 /* tcp_output.c */
 
-u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
-		     int min_tso_segs);
 void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
 			       int nonagle);
 int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs);
@@ -980,8 +979,8 @@ struct tcp_congestion_ops {
 	u32  (*undo_cwnd)(struct sock *sk);
 	/* hook for packet ack accounting (optional) */
 	void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
-	/* suggest number of segments for each skb to transmit (optional) */
-	u32 (*tso_segs_goal)(struct sock *sk);
+	/* override sysctl_tcp_min_tso_segs */
+	u32 (*min_tso_segs)(struct sock *sk);
 	/* returns the multiplier used in tcp_sndbuf_expand (optional) */
 	u32 (*sndbuf_expand)(struct sock *sk);
 	/* call when packets are delivered to update cwnd and pacing rate,
diff --git a/include/net/tcp_states.h b/include/net/tcp_states.h
index 50e78a74d0df..2875e169d744 100644
--- a/include/net/tcp_states.h
+++ b/include/net/tcp_states.h
@@ -32,21 +32,21 @@ enum {
 
 #define TCP_STATE_MASK	0xF
 
-#define TCP_ACTION_FIN	(1 << 7)
+#define TCP_ACTION_FIN	(1 << TCP_CLOSE)
 
 enum {
-	TCPF_ESTABLISHED = (1 << 1),
-	TCPF_SYN_SENT	 = (1 << 2),
-	TCPF_SYN_RECV	 = (1 << 3),
-	TCPF_FIN_WAIT1	 = (1 << 4),
-	TCPF_FIN_WAIT2	 = (1 << 5),
-	TCPF_TIME_WAIT	 = (1 << 6),
-	TCPF_CLOSE	 = (1 << 7),
-	TCPF_CLOSE_WAIT	 = (1 << 8),
-	TCPF_LAST_ACK	 = (1 << 9),
-	TCPF_LISTEN	 = (1 << 10),
-	TCPF_CLOSING	 = (1 << 11),
-	TCPF_NEW_SYN_RECV = (1 << 12),
+	TCPF_ESTABLISHED = (1 << TCP_ESTABLISHED),
+	TCPF_SYN_SENT	 = (1 << TCP_SYN_SENT),
+	TCPF_SYN_RECV	 = (1 << TCP_SYN_RECV),
+	TCPF_FIN_WAIT1	 = (1 << TCP_FIN_WAIT1),
+	TCPF_FIN_WAIT2	 = (1 << TCP_FIN_WAIT2),
+	TCPF_TIME_WAIT	 = (1 << TCP_TIME_WAIT),
+	TCPF_CLOSE	 = (1 << TCP_CLOSE),
+	TCPF_CLOSE_WAIT	 = (1 << TCP_CLOSE_WAIT),
+	TCPF_LAST_ACK	 = (1 << TCP_LAST_ACK),
+	TCPF_LISTEN	 = (1 << TCP_LISTEN),
+	TCPF_CLOSING	 = (1 << TCP_CLOSING),
+	TCPF_NEW_SYN_RECV = (1 << TCP_NEW_SYN_RECV),
 };
 
 #endif	/* _LINUX_TCP_STATES_H */
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 7d2077665c0b..aa027ba1d032 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1267,12 +1267,12 @@ static inline void xfrm_sk_free_policy(struct sock *sk)
 
 static inline void xfrm_sk_free_policy(struct sock *sk) {}
 static inline int xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk) { return 0; }
-static inline int xfrm6_route_forward(struct sk_buff *skb) { return 1; }  
-static inline int xfrm4_route_forward(struct sk_buff *skb) { return 1; } 
+static inline int xfrm6_route_forward(struct sk_buff *skb) { return 1; }
+static inline int xfrm4_route_forward(struct sk_buff *skb) { return 1; }
 static inline int xfrm6_policy_check(struct sock *sk, int dir, struct sk_buff *skb)
-{ 
-	return 1; 
-} 
+{
+	return 1;
+}
 static inline int xfrm4_policy_check(struct sock *sk, int dir, struct sk_buff *skb)
 {
 	return 1;
@@ -1356,7 +1356,7 @@ __xfrm6_state_addr_check(const struct xfrm_state *x,
 {
 	if (ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)&x->id.daddr) &&
 	    (ipv6_addr_equal((struct in6_addr *)saddr, (struct in6_addr *)&x->props.saddr) ||
-	     ipv6_addr_any((struct in6_addr *)saddr) || 
+	     ipv6_addr_any((struct in6_addr *)saddr) ||
 	     ipv6_addr_any((struct in6_addr *)&x->props.saddr)))
 		return 1;
 	return 0;
@@ -1666,7 +1666,7 @@ int xfrm_user_policy(struct sock *sk, int optname,
 static inline int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
 {
  	return -ENOPROTOOPT;
-} 
+}
 
 static inline int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 {
diff --git a/include/uapi/linux/batadv_packet.h b/include/uapi/linux/batadv_packet.h
index 5cb360be2a11..894d8d2f713d 100644
--- a/include/uapi/linux/batadv_packet.h
+++ b/include/uapi/linux/batadv_packet.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -196,8 +196,6 @@ struct batadv_bla_claim_dst {
 	__be16 group;		/* group id */
 };
 
-#pragma pack()
-
 /**
  * struct batadv_ogm_packet - ogm (routing protocol) packet
  * @packet_type: batman-adv packet type, part of the general header
@@ -222,9 +220,6 @@ struct batadv_ogm_packet {
 	__u8   reserved;
 	__u8   tq;
 	__be16 tvlv_len;
-	/* __packed is not needed as the struct size is divisible by 4,
-	 * and the largest data type in this struct has a size of 4.
-	 */
 };
 
 #define BATADV_OGM_HLEN sizeof(struct batadv_ogm_packet)
@@ -249,9 +244,6 @@ struct batadv_ogm2_packet {
 	__u8   orig[ETH_ALEN];
 	__be16 tvlv_len;
 	__be32 throughput;
-	/* __packed is not needed as the struct size is divisible by 4,
-	 * and the largest data type in this struct has a size of 4.
-	 */
 };
 
 #define BATADV_OGM2_HLEN sizeof(struct batadv_ogm2_packet)
@@ -405,7 +397,6 @@ struct batadv_icmp_packet_rr {
  * misalignment of the payload after the ethernet header. It may also lead to
  * leakage of information when the padding it not initialized before sending.
  */
-#pragma pack(2)
 
 /**
  * struct batadv_unicast_packet - unicast packet for network payload
@@ -533,8 +524,6 @@ struct batadv_coded_packet {
 	__be16 coded_len;
 };
 
-#pragma pack()
-
 /**
  * struct batadv_unicast_tvlv_packet - generic unicast packet with tvlv payload
  * @packet_type: batman-adv packet type, part of the general header
@@ -641,4 +630,6 @@ struct batadv_tvlv_mcast_data {
 	__u8 reserved[3];
 };
 
+#pragma pack()
+
 #endif /* _UAPI_LINUX_BATADV_PACKET_H_ */
diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
index ae00c99cbed0..324a0e1143e7 100644
--- a/include/uapi/linux/batman_adv.h
+++ b/include/uapi/linux/batman_adv.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: MIT */
-/* Copyright (C) 2016-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2016-2018  B.A.T.M.A.N. contributors:
  *
  * Matthias Schiffer
  *
@@ -92,6 +92,53 @@ enum batadv_tt_client_flags {
 };
 
 /**
+ * enum batadv_mcast_flags_priv - Private, own multicast flags
+ *
+ * These are internal, multicast related flags. Currently they describe certain
+ * multicast related attributes of the segment this originator bridges into the
+ * mesh.
+ *
+ * Those attributes are used to determine the public multicast flags this
+ * originator is going to announce via TT.
+ *
+ * For netlink, if BATADV_MCAST_FLAGS_BRIDGED is unset then all querier
+ * related flags are undefined.
+ */
+enum batadv_mcast_flags_priv {
+	/**
+	 * @BATADV_MCAST_FLAGS_BRIDGED: There is a bridge on top of the mesh
+	 * interface.
+	 */
+	BATADV_MCAST_FLAGS_BRIDGED			= (1 << 0),
+
+	/**
+	 * @BATADV_MCAST_FLAGS_QUERIER_IPV4_EXISTS: Whether an IGMP querier
+	 * exists in the mesh
+	 */
+	BATADV_MCAST_FLAGS_QUERIER_IPV4_EXISTS		= (1 << 1),
+
+	/**
+	 * @BATADV_MCAST_FLAGS_QUERIER_IPV6_EXISTS: Whether an MLD querier
+	 * exists in the mesh
+	 */
+	BATADV_MCAST_FLAGS_QUERIER_IPV6_EXISTS		= (1 << 2),
+
+	/**
+	 * @BATADV_MCAST_FLAGS_QUERIER_IPV4_SHADOWING: If an IGMP querier
+	 * exists, whether it is potentially shadowing multicast listeners
+	 * (i.e. querier is behind our own bridge segment)
+	 */
+	BATADV_MCAST_FLAGS_QUERIER_IPV4_SHADOWING	= (1 << 3),
+
+	/**
+	 * @BATADV_MCAST_FLAGS_QUERIER_IPV6_SHADOWING: If an MLD querier
+	 * exists, whether it is potentially shadowing multicast listeners
+	 * (i.e. querier is behind our own bridge segment)
+	 */
+	BATADV_MCAST_FLAGS_QUERIER_IPV6_SHADOWING	= (1 << 4),
+};
+
+/**
  * enum batadv_nl_attrs - batman-adv netlink attributes
  */
 enum batadv_nl_attrs {
@@ -272,6 +319,31 @@ enum batadv_nl_attrs {
 	 */
 	BATADV_ATTR_BLA_CRC,
 
+	/**
+	 * @BATADV_ATTR_DAT_CACHE_IP4ADDRESS: Client IPv4 address
+	 */
+	BATADV_ATTR_DAT_CACHE_IP4ADDRESS,
+
+	/**
+	 * @BATADV_ATTR_DAT_CACHE_HWADDRESS: Client MAC address
+	 */
+	BATADV_ATTR_DAT_CACHE_HWADDRESS,
+
+	/**
+	 * @BATADV_ATTR_DAT_CACHE_VID: VLAN ID
+	 */
+	BATADV_ATTR_DAT_CACHE_VID,
+
+	/**
+	 * @BATADV_ATTR_MCAST_FLAGS: Per originator multicast flags
+	 */
+	BATADV_ATTR_MCAST_FLAGS,
+
+	/**
+	 * @BATADV_ATTR_MCAST_FLAGS_PRIV: Private, own multicast flags
+	 */
+	BATADV_ATTR_MCAST_FLAGS_PRIV,
+
 	/* add attributes above here, update the policy in netlink.c */
 
 	/**
@@ -361,6 +433,16 @@ enum batadv_nl_commands {
 	 */
 	BATADV_CMD_GET_BLA_BACKBONE,
 
+	/**
+	 * @BATADV_CMD_GET_DAT_CACHE: Query list of DAT cache entries
+	 */
+	BATADV_CMD_GET_DAT_CACHE,
+
+	/**
+	 * @BATADV_CMD_GET_MCAST_FLAGS: Query list of multicast flags
+	 */
+	BATADV_CMD_GET_MCAST_FLAGS,
+
 	/* add new commands above here */
 
 	/**
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index db6bdc375126..18b7c510c511 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -133,6 +133,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_SOCK_OPS,
 	BPF_PROG_TYPE_SK_SKB,
 	BPF_PROG_TYPE_CGROUP_DEVICE,
+	BPF_PROG_TYPE_SK_MSG,
 };
 
 enum bpf_attach_type {
@@ -143,6 +144,7 @@ enum bpf_attach_type {
 	BPF_SK_SKB_STREAM_PARSER,
 	BPF_SK_SKB_STREAM_VERDICT,
 	BPF_CGROUP_DEVICE,
+	BPF_SK_MSG_VERDICT,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -231,6 +233,28 @@ enum bpf_attach_type {
 #define BPF_F_RDONLY		(1U << 3)
 #define BPF_F_WRONLY		(1U << 4)
 
+/* Flag for stack_map, store build_id+offset instead of pointer */
+#define BPF_F_STACK_BUILD_ID	(1U << 5)
+
+enum bpf_stack_build_id_status {
+	/* user space need an empty entry to identify end of a trace */
+	BPF_STACK_BUILD_ID_EMPTY = 0,
+	/* with valid build_id and offset */
+	BPF_STACK_BUILD_ID_VALID = 1,
+	/* couldn't get build_id, fallback to ip */
+	BPF_STACK_BUILD_ID_IP = 2,
+};
+
+#define BPF_BUILD_ID_SIZE 20
+struct bpf_stack_build_id {
+	__s32		status;
+	unsigned char	build_id[BPF_BUILD_ID_SIZE];
+	union {
+		__u64	offset;
+		__u64	ip;
+	};
+};
+
 union bpf_attr {
 	struct { /* anonymous struct used by BPF_MAP_CREATE command */
 		__u32	map_type;	/* one of enum bpf_map_type */
@@ -696,6 +720,15 @@ union bpf_attr {
  * int bpf_override_return(pt_regs, rc)
  *	@pt_regs: pointer to struct pt_regs
  *	@rc: the return value to set
+ *
+ * int bpf_msg_redirect_map(map, key, flags)
+ *     Redirect msg to a sock in map using key as a lookup key for the
+ *     sock in map.
+ *     @map: pointer to sockmap
+ *     @key: key to lookup sock in map
+ *     @flags: reserved for future use
+ *     Return: SK_PASS
+ *
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -757,7 +790,11 @@ union bpf_attr {
 	FN(perf_prog_read_value),	\
 	FN(getsockopt),			\
 	FN(override_return),		\
-	FN(sock_ops_cb_flags_set),
+	FN(sock_ops_cb_flags_set),	\
+	FN(msg_redirect_map),		\
+	FN(msg_apply_bytes),		\
+	FN(msg_cork_bytes),		\
+	FN(msg_pull_data),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -800,6 +837,7 @@ enum bpf_func_id {
 /* BPF_FUNC_skb_set_tunnel_key flags. */
 #define BPF_F_ZERO_CSUM_TX		(1ULL << 1)
 #define BPF_F_DONT_FRAGMENT		(1ULL << 2)
+#define BPF_F_SEQ_NUMBER		(1ULL << 3)
 
 /* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
  * BPF_FUNC_perf_event_read_value flags.
@@ -919,6 +957,14 @@ enum sk_action {
 	SK_PASS,
 };
 
+/* user accessible metadata for SK_MSG packet hook, new fields must
+ * be added to the end of this structure
+ */
+struct sk_msg_md {
+	void *data;
+	void *data_end;
+};
+
 #define BPF_TAG_SIZE	8
 
 struct bpf_prog_info {
diff --git a/include/uapi/linux/bpf_perf_event.h b/include/uapi/linux/bpf_perf_event.h
index 8f95303f9d80..eb1b9d21250c 100644
--- a/include/uapi/linux/bpf_perf_event.h
+++ b/include/uapi/linux/bpf_perf_event.h
@@ -13,6 +13,7 @@
 struct bpf_perf_event_data {
 	bpf_user_pt_regs_t regs;
 	__u64 sample_period;
+	__u64 addr;
 };
 
 #endif /* _UAPI__LINUX_BPF_PERF_EVENT_H__ */
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 44a0b675a6bc..20da156aaf64 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -914,12 +914,15 @@ static inline __u64 ethtool_get_flow_spec_ring_vf(__u64 ring_cookie)
  * @flow_type: Type of flow to be affected, e.g. %TCP_V4_FLOW
  * @data: Command-dependent value
  * @fs: Flow classification rule
+ * @rss_context: RSS context to be affected
  * @rule_cnt: Number of rules to be affected
  * @rule_locs: Array of used rule locations
  *
  * For %ETHTOOL_GRXFH and %ETHTOOL_SRXFH, @data is a bitmask indicating
  * the fields included in the flow hash, e.g. %RXH_IP_SRC.  The following
- * structure fields must not be used.
+ * structure fields must not be used, except that if @flow_type includes
+ * the %FLOW_RSS flag, then @rss_context determines which RSS context to
+ * act on.
  *
  * For %ETHTOOL_GRXRINGS, @data is set to the number of RX rings/queues
  * on return.
@@ -931,7 +934,9 @@ static inline __u64 ethtool_get_flow_spec_ring_vf(__u64 ring_cookie)
  * set in @data then special location values should not be used.
  *
  * For %ETHTOOL_GRXCLSRULE, @fs.@location specifies the location of an
- * existing rule on entry and @fs contains the rule on return.
+ * existing rule on entry and @fs contains the rule on return; if
+ * @fs.@flow_type includes the %FLOW_RSS flag, then @rss_context is
+ * filled with the RSS context ID associated with the rule.
  *
  * For %ETHTOOL_GRXCLSRLALL, @rule_cnt specifies the array size of the
  * user buffer for @rule_locs on entry.  On return, @data is the size
@@ -942,7 +947,11 @@ static inline __u64 ethtool_get_flow_spec_ring_vf(__u64 ring_cookie)
  * For %ETHTOOL_SRXCLSRLINS, @fs specifies the rule to add or update.
  * @fs.@location either specifies the location to use or is a special
  * location value with %RX_CLS_LOC_SPECIAL flag set.  On return,
- * @fs.@location is the actual rule location.
+ * @fs.@location is the actual rule location.  If @fs.@flow_type
+ * includes the %FLOW_RSS flag, @rss_context is the RSS context ID to
+ * use for flow spreading traffic which matches this rule.  The value
+ * from the rxfh indirection table will be added to @fs.@ring_cookie
+ * to choose which ring to deliver to.
  *
  * For %ETHTOOL_SRXCLSRLDEL, @fs.@location specifies the location of an
  * existing rule on entry.
@@ -963,7 +972,10 @@ struct ethtool_rxnfc {
 	__u32				flow_type;
 	__u64				data;
 	struct ethtool_rx_flow_spec	fs;
-	__u32				rule_cnt;
+	union {
+		__u32			rule_cnt;
+		__u32			rss_context;
+	};
 	__u32				rule_locs[0];
 };
 
@@ -990,7 +1002,11 @@ struct ethtool_rxfh_indir {
 /**
  * struct ethtool_rxfh - command to get/set RX flow hash indir or/and hash key.
  * @cmd: Specific command number - %ETHTOOL_GRSSH or %ETHTOOL_SRSSH
- * @rss_context: RSS context identifier.
+ * @rss_context: RSS context identifier.  Context 0 is the default for normal
+ *	traffic; other contexts can be referenced as the destination for RX flow
+ *	classification rules.  %ETH_RXFH_CONTEXT_ALLOC is used with command
+ *	%ETHTOOL_SRSSH to allocate a new RSS context; on return this field will
+ *	contain the ID of the newly allocated context.
  * @indir_size: On entry, the array size of the user buffer for the
  *	indirection table, which may be zero, or (for %ETHTOOL_SRSSH),
  *	%ETH_RXFH_INDIR_NO_CHANGE.  On return from %ETHTOOL_GRSSH,
@@ -1009,7 +1025,8 @@ struct ethtool_rxfh_indir {
  * size should be returned.  For %ETHTOOL_SRSSH, an @indir_size of
  * %ETH_RXFH_INDIR_NO_CHANGE means that indir table setting is not requested
  * and a @indir_size of zero means the indir table should be reset to default
- * values. An hfunc of zero means that hash function setting is not requested.
+ * values (if @rss_context == 0) or that the RSS context should be deleted.
+ * An hfunc of zero means that hash function setting is not requested.
  */
 struct ethtool_rxfh {
 	__u32   cmd;
@@ -1021,6 +1038,7 @@ struct ethtool_rxfh {
 	__u32	rsvd32;
 	__u32   rss_config[0];
 };
+#define ETH_RXFH_CONTEXT_ALLOC		0xffffffff
 #define ETH_RXFH_INDIR_NO_CHANGE	0xffffffff
 
 /**
@@ -1635,6 +1653,8 @@ static inline int ethtool_validate_duplex(__u8 duplex)
 /* Flag to enable additional fields in struct ethtool_rx_flow_spec */
 #define	FLOW_EXT	0x80000000
 #define	FLOW_MAC_EXT	0x40000000
+/* Flag to enable RSS spreading of traffic matching rule (nfc only) */
+#define	FLOW_RSS	0x20000000
 
 /* L3-L4 network traffic flow hash options */
 #define	RXH_L2DA	(1 << 1)
diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h
index 2b642bf9b5a0..232df14e1287 100644
--- a/include/uapi/linux/fib_rules.h
+++ b/include/uapi/linux/fib_rules.h
@@ -23,7 +23,7 @@ struct fib_rule_hdr {
 	__u8		tos;
 
 	__u8		table;
-	__u8		res1;	/* reserved */
+	__u8		res1;   /* reserved */
 	__u8		res2;	/* reserved */
 	__u8		action;
 
@@ -35,6 +35,11 @@ struct fib_rule_uid_range {
 	__u32		end;
 };
 
+struct fib_rule_port_range {
+	__u16		start;
+	__u16		end;
+};
+
 enum {
 	FRA_UNSPEC,
 	FRA_DST,	/* destination address */
@@ -58,6 +63,10 @@ enum {
 	FRA_PAD,
 	FRA_L3MDEV,	/* iif or oif is l3mdev goto its table */
 	FRA_UID_RANGE,	/* UID range */
+	FRA_PROTOCOL,   /* Originator of the rule */
+	FRA_IP_PROTO,	/* ip proto */
+	FRA_SPORT_RANGE, /* sport */
+	FRA_DPORT_RANGE, /* dport */
 	__FRA_MAX
 };
 
diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index 820de5d222d2..3a45b4ad71a3 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -89,6 +89,7 @@
 #define ETH_P_AOE	0x88A2		/* ATA over Ethernet		*/
 #define ETH_P_8021AD	0x88A8          /* 802.1ad Service VLAN		*/
 #define ETH_P_802_EX1	0x88B5		/* 802.1 Local Experimental 1.  */
+#define ETH_P_PREAUTH	0x88C7		/* 802.11 Preauthentication */
 #define ETH_P_TIPC	0x88CA		/* TIPC 			*/
 #define ETH_P_MACSEC	0x88E5		/* 802.1ae MACsec */
 #define ETH_P_8021AH	0x88E7          /* 802.1ah Backbone Service Tag */
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 6d9447700e18..68699f654118 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -941,4 +941,43 @@ enum {
 	IFLA_EVENT_BONDING_OPTIONS,	/* change in bonding options */
 };
 
+/* tun section */
+
+enum {
+	IFLA_TUN_UNSPEC,
+	IFLA_TUN_OWNER,
+	IFLA_TUN_GROUP,
+	IFLA_TUN_TYPE,
+	IFLA_TUN_PI,
+	IFLA_TUN_VNET_HDR,
+	IFLA_TUN_PERSIST,
+	IFLA_TUN_MULTI_QUEUE,
+	IFLA_TUN_NUM_QUEUES,
+	IFLA_TUN_NUM_DISABLED_QUEUES,
+	__IFLA_TUN_MAX,
+};
+
+#define IFLA_TUN_MAX (__IFLA_TUN_MAX - 1)
+
+/* rmnet section */
+
+#define RMNET_FLAGS_INGRESS_DEAGGREGATION         (1U << 0)
+#define RMNET_FLAGS_INGRESS_MAP_COMMANDS          (1U << 1)
+#define RMNET_FLAGS_INGRESS_MAP_CKSUMV4           (1U << 2)
+#define RMNET_FLAGS_EGRESS_MAP_CKSUMV4            (1U << 3)
+
+enum {
+	IFLA_RMNET_UNSPEC,
+	IFLA_RMNET_MUX_ID,
+	IFLA_RMNET_FLAGS,
+	__IFLA_RMNET_MAX,
+};
+
+#define IFLA_RMNET_MAX	(__IFLA_RMNET_MAX - 1)
+
+struct ifla_rmnet_flags {
+	__u32	flags;
+	__u32	mask;
+};
+
 #endif /* _UAPI_LINUX_IF_LINK_H */
diff --git a/include/uapi/linux/ncsi.h b/include/uapi/linux/ncsi.h
new file mode 100644
index 000000000000..4c292ecbb748
--- /dev/null
+++ b/include/uapi/linux/ncsi.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright Samuel Mendoza-Jonas, IBM Corporation 2018.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __UAPI_NCSI_NETLINK_H__
+#define __UAPI_NCSI_NETLINK_H__
+
+/**
+ * enum ncsi_nl_commands - supported NCSI commands
+ *
+ * @NCSI_CMD_UNSPEC: unspecified command to catch errors
+ * @NCSI_CMD_PKG_INFO: list package and channel attributes. Requires
+ *	NCSI_ATTR_IFINDEX. If NCSI_ATTR_PACKAGE_ID is specified returns the
+ *	specific package and its channels - otherwise a dump request returns
+ *	all packages and their associated channels.
+ * @NCSI_CMD_SET_INTERFACE: set preferred package and channel combination.
+ *	Requires NCSI_ATTR_IFINDEX and the preferred NCSI_ATTR_PACKAGE_ID and
+ *	optionally the preferred NCSI_ATTR_CHANNEL_ID.
+ * @NCSI_CMD_CLEAR_INTERFACE: clear any preferred package/channel combination.
+ *	Requires NCSI_ATTR_IFINDEX.
+ * @NCSI_CMD_MAX: highest command number
+ */
+enum ncsi_nl_commands {
+	NCSI_CMD_UNSPEC,
+	NCSI_CMD_PKG_INFO,
+	NCSI_CMD_SET_INTERFACE,
+	NCSI_CMD_CLEAR_INTERFACE,
+
+	__NCSI_CMD_AFTER_LAST,
+	NCSI_CMD_MAX = __NCSI_CMD_AFTER_LAST - 1
+};
+
+/**
+ * enum ncsi_nl_attrs - General NCSI netlink attributes
+ *
+ * @NCSI_ATTR_UNSPEC: unspecified attributes to catch errors
+ * @NCSI_ATTR_IFINDEX: ifindex of network device using NCSI
+ * @NCSI_ATTR_PACKAGE_LIST: nested array of NCSI_PKG_ATTR attributes
+ * @NCSI_ATTR_PACKAGE_ID: package ID
+ * @NCSI_ATTR_CHANNEL_ID: channel ID
+ * @NCSI_ATTR_MAX: highest attribute number
+ */
+enum ncsi_nl_attrs {
+	NCSI_ATTR_UNSPEC,
+	NCSI_ATTR_IFINDEX,
+	NCSI_ATTR_PACKAGE_LIST,
+	NCSI_ATTR_PACKAGE_ID,
+	NCSI_ATTR_CHANNEL_ID,
+
+	__NCSI_ATTR_AFTER_LAST,
+	NCSI_ATTR_MAX = __NCSI_ATTR_AFTER_LAST - 1
+};
+
+/**
+ * enum ncsi_nl_pkg_attrs - NCSI netlink package-specific attributes
+ *
+ * @NCSI_PKG_ATTR_UNSPEC: unspecified attributes to catch errors
+ * @NCSI_PKG_ATTR: nested array of package attributes
+ * @NCSI_PKG_ATTR_ID: package ID
+ * @NCSI_PKG_ATTR_FORCED: flag signifying a package has been set as preferred
+ * @NCSI_PKG_ATTR_CHANNEL_LIST: nested array of NCSI_CHANNEL_ATTR attributes
+ * @NCSI_PKG_ATTR_MAX: highest attribute number
+ */
+enum ncsi_nl_pkg_attrs {
+	NCSI_PKG_ATTR_UNSPEC,
+	NCSI_PKG_ATTR,
+	NCSI_PKG_ATTR_ID,
+	NCSI_PKG_ATTR_FORCED,
+	NCSI_PKG_ATTR_CHANNEL_LIST,
+
+	__NCSI_PKG_ATTR_AFTER_LAST,
+	NCSI_PKG_ATTR_MAX = __NCSI_PKG_ATTR_AFTER_LAST - 1
+};
+
+/**
+ * enum ncsi_nl_channel_attrs - NCSI netlink channel-specific attributes
+ *
+ * @NCSI_CHANNEL_ATTR_UNSPEC: unspecified attributes to catch errors
+ * @NCSI_CHANNEL_ATTR: nested array of channel attributes
+ * @NCSI_CHANNEL_ATTR_ID: channel ID
+ * @NCSI_CHANNEL_ATTR_VERSION_MAJOR: channel major version number
+ * @NCSI_CHANNEL_ATTR_VERSION_MINOR: channel minor version number
+ * @NCSI_CHANNEL_ATTR_VERSION_STR: channel version string
+ * @NCSI_CHANNEL_ATTR_LINK_STATE: channel link state flags
+ * @NCSI_CHANNEL_ATTR_ACTIVE: channels with this flag are in
+ *	NCSI_CHANNEL_ACTIVE state
+ * @NCSI_CHANNEL_ATTR_FORCED: flag signifying a channel has been set as
+ *	preferred
+ * @NCSI_CHANNEL_ATTR_VLAN_LIST: nested array of NCSI_CHANNEL_ATTR_VLAN_IDs
+ * @NCSI_CHANNEL_ATTR_VLAN_ID: VLAN ID being filtered on this channel
+ * @NCSI_CHANNEL_ATTR_MAX: highest attribute number
+ */
+enum ncsi_nl_channel_attrs {
+	NCSI_CHANNEL_ATTR_UNSPEC,
+	NCSI_CHANNEL_ATTR,
+	NCSI_CHANNEL_ATTR_ID,
+	NCSI_CHANNEL_ATTR_VERSION_MAJOR,
+	NCSI_CHANNEL_ATTR_VERSION_MINOR,
+	NCSI_CHANNEL_ATTR_VERSION_STR,
+	NCSI_CHANNEL_ATTR_LINK_STATE,
+	NCSI_CHANNEL_ATTR_ACTIVE,
+	NCSI_CHANNEL_ATTR_FORCED,
+	NCSI_CHANNEL_ATTR_VLAN_LIST,
+	NCSI_CHANNEL_ATTR_VLAN_ID,
+
+	__NCSI_CHANNEL_ATTR_AFTER_LAST,
+	NCSI_CHANNEL_ATTR_MAX = __NCSI_CHANNEL_ATTR_AFTER_LAST - 1
+};
+
+#endif /* __UAPI_NCSI_NETLINK_H__ */
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index c587a61c32bf..c13c84304be3 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -992,6 +992,32 @@
  *
  * @NL80211_CMD_RELOAD_REGDB: Request that the regdb firmware file is reloaded.
  *
+ * @NL80211_CMD_EXTERNAL_AUTH: This interface is exclusively defined for host
+ *	drivers that do not define separate commands for authentication and
+ *	association, but rely on user space for the authentication to happen.
+ *	This interface acts both as the event request (driver to user space)
+ *	to trigger the authentication and command response (userspace to
+ *	driver) to indicate the authentication status.
+ *
+ *	User space uses the %NL80211_CMD_CONNECT command to the host driver to
+ *	trigger a connection. The host driver selects a BSS and further uses
+ *	this interface to offload only the authentication part to the user
+ *	space. Authentication frames are passed between the driver and user
+ *	space through the %NL80211_CMD_FRAME interface. Host driver proceeds
+ *	further with the association after getting successful authentication
+ *	status. User space indicates the authentication status through
+ *	%NL80211_ATTR_STATUS_CODE attribute in %NL80211_CMD_EXTERNAL_AUTH
+ *	command interface.
+ *
+ *	Host driver reports this status on an authentication failure to the
+ *	user space through the connect result as the user space would have
+ *	initiated the connection through the connect request.
+ *
+ * @NL80211_CMD_STA_OPMODE_CHANGED: An event that notify station's
+ *	ht opmode or vht opmode changes using any of &NL80211_ATTR_SMPS_MODE,
+ *	&NL80211_ATTR_CHANNEL_WIDTH,&NL80211_ATTR_NSS attributes with its
+ *	address(specified in &NL80211_ATTR_MAC).
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -1198,6 +1224,10 @@ enum nl80211_commands {
 
 	NL80211_CMD_RELOAD_REGDB,
 
+	NL80211_CMD_EXTERNAL_AUTH,
+
+	NL80211_CMD_STA_OPMODE_CHANGED,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -2153,6 +2183,19 @@ enum nl80211_commands {
  * @NL80211_ATTR_PMKR0_NAME: PMK-R0 Name for offloaded FT.
  * @NL80211_ATTR_PORT_AUTHORIZED: (reserved)
  *
+ * @NL80211_ATTR_EXTERNAL_AUTH_ACTION: Identify the requested external
+ *     authentication operation (u32 attribute with an
+ *     &enum nl80211_external_auth_action value). This is used with the
+ *     &NL80211_CMD_EXTERNAL_AUTH request event.
+ * @NL80211_ATTR_EXTERNAL_AUTH_SUPPORT: Flag attribute indicating that the user
+ *     space supports external authentication. This attribute shall be used
+ *     only with %NL80211_CMD_CONNECT request. The driver may offload
+ *     authentication processing to user space if this capability is indicated
+ *     in NL80211_CMD_CONNECT requests from the user space.
+ *
+ * @NL80211_ATTR_NSS: Station's New/updated  RX_NSS value notified using this
+ *	u8 attribute. This is used with %NL80211_CMD_STA_OPMODE_CHANGED.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -2579,6 +2622,12 @@ enum nl80211_attrs {
 	NL80211_ATTR_PMKR0_NAME,
 	NL80211_ATTR_PORT_AUTHORIZED,
 
+	NL80211_ATTR_EXTERNAL_AUTH_ACTION,
+	NL80211_ATTR_EXTERNAL_AUTH_SUPPORT,
+
+	NL80211_ATTR_NSS,
+	NL80211_ATTR_ACK_SIGNAL,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -2899,6 +2948,7 @@ enum nl80211_sta_bss_param {
  * @NL80211_STA_INFO_RX_DURATION: aggregate PPDU duration for all frames
  *	received from the station (u64, usec)
  * @NL80211_STA_INFO_PAD: attribute used for padding for 64-bit alignment
+ * @NL80211_STA_INFO_ACK_SIGNAL: signal strength of the last ACK frame(u8, dBm)
  * @__NL80211_STA_INFO_AFTER_LAST: internal
  * @NL80211_STA_INFO_MAX: highest possible station info attribute
  */
@@ -2937,6 +2987,7 @@ enum nl80211_sta_info {
 	NL80211_STA_INFO_TID_STATS,
 	NL80211_STA_INFO_RX_DURATION,
 	NL80211_STA_INFO_PAD,
+	NL80211_STA_INFO_ACK_SIGNAL,
 
 	/* keep last */
 	__NL80211_STA_INFO_AFTER_LAST,
@@ -4945,6 +4996,9 @@ enum nl80211_feature_flags {
  *	probe request tx deferral and suppression
  * @NL80211_EXT_FEATURE_MFP_OPTIONAL: Driver supports the %NL80211_MFP_OPTIONAL
  *	value in %NL80211_ATTR_USE_MFP.
+ * @NL80211_EXT_FEATURE_LOW_SPAN_SCAN: Driver supports low span scan.
+ * @NL80211_EXT_FEATURE_LOW_POWER_SCAN: Driver supports low power scan.
+ * @NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN: Driver supports high accuracy scan.
  *
  * @NUM_NL80211_EXT_FEATURES: number of extended features.
  * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
@@ -4972,6 +5026,9 @@ enum nl80211_ext_feature_index {
 	NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE,
 	NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION,
 	NL80211_EXT_FEATURE_MFP_OPTIONAL,
+	NL80211_EXT_FEATURE_LOW_SPAN_SCAN,
+	NL80211_EXT_FEATURE_LOW_POWER_SCAN,
+	NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN,
 
 	/* add new features before the definition below */
 	NUM_NL80211_EXT_FEATURES,
@@ -5032,6 +5089,10 @@ enum nl80211_timeout_reason {
  * of NL80211_CMD_TRIGGER_SCAN and NL80211_CMD_START_SCHED_SCAN
  * requests.
  *
+ * NL80211_SCAN_FLAG_LOW_SPAN, NL80211_SCAN_FLAG_LOW_POWER, and
+ * NL80211_SCAN_FLAG_HIGH_ACCURACY flags are exclusive of each other, i.e., only
+ * one of them can be used in the request.
+ *
  * @NL80211_SCAN_FLAG_LOW_PRIORITY: scan request has low priority
  * @NL80211_SCAN_FLAG_FLUSH: flush cache before scanning
  * @NL80211_SCAN_FLAG_AP: force a scan even if the interface is configured
@@ -5059,7 +5120,20 @@ enum nl80211_timeout_reason {
  *	and suppression (if it has received a broadcast Probe Response frame,
  *	Beacon frame or FILS Discovery frame from an AP that the STA considers
  *	a suitable candidate for (re-)association - suitable in terms of
- *	SSID and/or RSSI
+ *	SSID and/or RSSI.
+ * @NL80211_SCAN_FLAG_LOW_SPAN: Span corresponds to the total time taken to
+ *	accomplish the scan. Thus, this flag intends the driver to perform the
+ *	scan request with lesser span/duration. It is specific to the driver
+ *	implementations on how this is accomplished. Scan accuracy may get
+ *	impacted with this flag.
+ * @NL80211_SCAN_FLAG_LOW_POWER: This flag intends the scan attempts to consume
+ *	optimal possible power. Drivers can resort to their specific means to
+ *	optimize the power. Scan accuracy may get impacted with this flag.
+ * @NL80211_SCAN_FLAG_HIGH_ACCURACY: Accuracy here intends to the extent of scan
+ *	results obtained. Thus HIGH_ACCURACY scan flag aims to get maximum
+ *	possible scan results. This flag hints the driver to use the best
+ *	possible scan configuration to improve the accuracy in scanning.
+ *	Latency and power use may get impacted with this flag.
  */
 enum nl80211_scan_flags {
 	NL80211_SCAN_FLAG_LOW_PRIORITY				= 1<<0,
@@ -5070,6 +5144,9 @@ enum nl80211_scan_flags {
 	NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP		= 1<<5,
 	NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE		= 1<<6,
 	NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION	= 1<<7,
+	NL80211_SCAN_FLAG_LOW_SPAN				= 1<<8,
+	NL80211_SCAN_FLAG_LOW_POWER				= 1<<9,
+	NL80211_SCAN_FLAG_HIGH_ACCURACY				= 1<<10,
 };
 
 /**
@@ -5469,4 +5546,15 @@ enum nl80211_nan_match_attributes {
 	NL80211_NAN_MATCH_ATTR_MAX = NUM_NL80211_NAN_MATCH_ATTR - 1
 };
 
+/**
+ * nl80211_external_auth_action - Action to perform with external
+ *     authentication request. Used by NL80211_ATTR_EXTERNAL_AUTH_ACTION.
+ * @NL80211_EXTERNAL_AUTH_START: Start the authentication.
+ * @NL80211_EXTERNAL_AUTH_ABORT: Abort the ongoing authentication.
+ */
+enum nl80211_external_auth_action {
+	NL80211_EXTERNAL_AUTH_START,
+	NL80211_EXTERNAL_AUTH_ABORT,
+};
+
 #endif /* __LINUX_NL80211_H */
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 46c506615f4a..be05e66c167b 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -475,6 +475,7 @@ enum {
 
 enum {
 	TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0),
+	TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1),
 };
 
 /* Match-all classifier */
@@ -555,7 +556,8 @@ enum {
 #define	TCF_EM_VLAN		6
 #define	TCF_EM_CANID		7
 #define	TCF_EM_IPSET		8
-#define	TCF_EM_MAX		8
+#define	TCF_EM_IPT		9
+#define	TCF_EM_MAX		9
 
 enum {
 	TCF_EM_PROG_TC
diff --git a/include/uapi/linux/rds.h b/include/uapi/linux/rds.h
index e71d4491f225..a66b213de3d7 100644
--- a/include/uapi/linux/rds.h
+++ b/include/uapi/linux/rds.h
@@ -103,6 +103,8 @@
 #define RDS_CMSG_MASKED_ATOMIC_FADD	8
 #define RDS_CMSG_MASKED_ATOMIC_CSWP	9
 #define RDS_CMSG_RXPATH_LATENCY		11
+#define	RDS_CMSG_ZCOPY_COOKIE		12
+#define	RDS_CMSG_ZCOPY_COMPLETION	13
 
 #define RDS_INFO_FIRST			10000
 #define RDS_INFO_COUNTERS		10000
@@ -316,6 +318,12 @@ struct rds_rdma_notify {
 #define RDS_RDMA_DROPPED	3
 #define RDS_RDMA_OTHER_ERROR	4
 
+#define	RDS_MAX_ZCOOKIES	8
+struct rds_zcopy_cookies {
+	__u32 num;
+	__u32 cookies[RDS_MAX_ZCOOKIES];
+};
+
 /*
  * Common set of flags for all RDMA related structs
  */
diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index 4c4db14786bd..afd4346386e0 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -99,6 +99,7 @@ typedef __s32 sctp_assoc_t;
 #define SCTP_RECVRCVINFO	32
 #define SCTP_RECVNXTINFO	33
 #define SCTP_DEFAULT_SNDINFO	34
+#define SCTP_AUTH_DEACTIVATE_KEY	35
 
 /* Internal Socket Options. Some of the sctp library functions are
  * implemented using these socket options.
@@ -260,6 +261,31 @@ struct sctp_nxtinfo {
 	sctp_assoc_t nxt_assoc_id;
 };
 
+/* 5.3.7 SCTP PR-SCTP Information Structure (SCTP_PRINFO)
+ *
+ *   This cmsghdr structure specifies SCTP options for sendmsg().
+ *
+ *   cmsg_level    cmsg_type      cmsg_data[]
+ *   ------------  ------------   -------------------
+ *   IPPROTO_SCTP  SCTP_PRINFO    struct sctp_prinfo
+ */
+struct sctp_prinfo {
+	__u16 pr_policy;
+	__u32 pr_value;
+};
+
+/* 5.3.8 SCTP AUTH Information Structure (SCTP_AUTHINFO)
+ *
+ *   This cmsghdr structure specifies SCTP options for sendmsg().
+ *
+ *   cmsg_level    cmsg_type      cmsg_data[]
+ *   ------------  ------------   -------------------
+ *   IPPROTO_SCTP  SCTP_AUTHINFO  struct sctp_authinfo
+ */
+struct sctp_authinfo {
+	__u16 auth_keynumber;
+};
+
 /*
  *  sinfo_flags: 16 bits (unsigned integer)
  *
@@ -271,6 +297,8 @@ enum sctp_sinfo_flags {
 	SCTP_ADDR_OVER		= (1 << 1), /* Override the primary destination. */
 	SCTP_ABORT		= (1 << 2), /* Send an ABORT message to the peer. */
 	SCTP_SACK_IMMEDIATELY	= (1 << 3), /* SACK should be sent without delay. */
+	/* 2 bits here have been used by SCTP_PR_SCTP_MASK */
+	SCTP_SENDALL		= (1 << 6),
 	SCTP_NOTIFICATION	= MSG_NOTIFICATION, /* Next message is not user msg but notification. */
 	SCTP_EOF		= MSG_FIN,  /* Initiate graceful shutdown process. */
 };
@@ -293,6 +321,14 @@ typedef enum sctp_cmsg_type {
 #define SCTP_RCVINFO	SCTP_RCVINFO
 	SCTP_NXTINFO,		/* 5.3.6 SCTP Next Receive Information Structure */
 #define SCTP_NXTINFO	SCTP_NXTINFO
+	SCTP_PRINFO,		/* 5.3.7 SCTP PR-SCTP Information Structure */
+#define SCTP_PRINFO	SCTP_PRINFO
+	SCTP_AUTHINFO,		/* 5.3.8 SCTP AUTH Information Structure */
+#define SCTP_AUTHINFO	SCTP_AUTHINFO
+	SCTP_DSTADDRV4,		/* 5.3.9 SCTP Destination IPv4 Address Structure */
+#define SCTP_DSTADDRV4	SCTP_DSTADDRV4
+	SCTP_DSTADDRV6,		/* 5.3.10 SCTP Destination IPv6 Address Structure */
+#define SCTP_DSTADDRV6	SCTP_DSTADDRV6
 } sctp_cmsg_t;
 
 /*
@@ -482,7 +518,12 @@ struct sctp_authkey_event {
 	sctp_assoc_t auth_assoc_id;
 };
 
-enum { SCTP_AUTH_NEWKEY = 0, };
+enum {
+	SCTP_AUTH_NEW_KEY,
+#define	SCTP_AUTH_NEWKEY	SCTP_AUTH_NEW_KEY /* compatible with before */
+	SCTP_AUTH_FREE_KEY,
+	SCTP_AUTH_NO_AUTH,
+};
 
 /*
  * 6.1.9. SCTP_SENDER_DRY_EVENT
diff --git a/include/uapi/linux/tc_ematch/tc_em_ipt.h b/include/uapi/linux/tc_ematch/tc_em_ipt.h
new file mode 100644
index 000000000000..49a65530992c
--- /dev/null
+++ b/include/uapi/linux/tc_ematch/tc_em_ipt.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __LINUX_TC_EM_IPT_H
+#define __LINUX_TC_EM_IPT_H
+
+#include <linux/types.h>
+#include <linux/pkt_cls.h>
+
+enum {
+	TCA_EM_IPT_UNSPEC,
+	TCA_EM_IPT_HOOK,
+	TCA_EM_IPT_MATCH_NAME,
+	TCA_EM_IPT_MATCH_REVISION,
+	TCA_EM_IPT_NFPROTO,
+	TCA_EM_IPT_MATCH_DATA,
+	__TCA_EM_IPT_MAX
+};
+
+#define TCA_EM_IPT_MAX (__TCA_EM_IPT_MAX - 1)
+
+#endif
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index b4a4f64635fa..560374c978f9 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -241,6 +241,9 @@ enum {
 	TCP_NLA_MIN_RTT,        /* minimum RTT */
 	TCP_NLA_RECUR_RETRANS,  /* Recurring retransmits for the current pkt */
 	TCP_NLA_DELIVERY_RATE_APP_LMT, /* delivery rate application limited ? */
+	TCP_NLA_SNDQ_SIZE,	/* Data (bytes) pending in send queue */
+	TCP_NLA_CA_STATE,	/* ca_state of socket */
+	TCP_NLA_SND_SSTHRESH,	/* Slow start size threshold */
 
 };
 
diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h
index 14bacc7e6cef..4ac9f1f02b06 100644
--- a/include/uapi/linux/tipc.h
+++ b/include/uapi/linux/tipc.h
@@ -61,50 +61,6 @@ struct tipc_name_seq {
 	__u32 upper;
 };
 
-/* TIPC Address Size, Offset, Mask specification for Z.C.N
- */
-#define TIPC_NODE_BITS          12
-#define TIPC_CLUSTER_BITS       12
-#define TIPC_ZONE_BITS          8
-
-#define TIPC_NODE_OFFSET        0
-#define TIPC_CLUSTER_OFFSET     TIPC_NODE_BITS
-#define TIPC_ZONE_OFFSET        (TIPC_CLUSTER_OFFSET + TIPC_CLUSTER_BITS)
-
-#define TIPC_NODE_SIZE          ((1UL << TIPC_NODE_BITS) - 1)
-#define TIPC_CLUSTER_SIZE       ((1UL << TIPC_CLUSTER_BITS) - 1)
-#define TIPC_ZONE_SIZE          ((1UL << TIPC_ZONE_BITS) - 1)
-
-#define TIPC_NODE_MASK		(TIPC_NODE_SIZE << TIPC_NODE_OFFSET)
-#define TIPC_CLUSTER_MASK	(TIPC_CLUSTER_SIZE << TIPC_CLUSTER_OFFSET)
-#define TIPC_ZONE_MASK		(TIPC_ZONE_SIZE << TIPC_ZONE_OFFSET)
-
-#define TIPC_ZONE_CLUSTER_MASK (TIPC_ZONE_MASK | TIPC_CLUSTER_MASK)
-
-static inline __u32 tipc_addr(unsigned int zone,
-			      unsigned int cluster,
-			      unsigned int node)
-{
-	return (zone << TIPC_ZONE_OFFSET) |
-		(cluster << TIPC_CLUSTER_OFFSET) |
-		node;
-}
-
-static inline unsigned int tipc_zone(__u32 addr)
-{
-	return addr >> TIPC_ZONE_OFFSET;
-}
-
-static inline unsigned int tipc_cluster(__u32 addr)
-{
-	return (addr & TIPC_CLUSTER_MASK) >> TIPC_CLUSTER_OFFSET;
-}
-
-static inline unsigned int tipc_node(__u32 addr)
-{
-	return addr & TIPC_NODE_MASK;
-}
-
 /*
  * Application-accessible port name types
  */
@@ -117,9 +73,10 @@ static inline unsigned int tipc_node(__u32 addr)
 /*
  * Publication scopes when binding port names and port name sequences
  */
-#define TIPC_ZONE_SCOPE         1
-#define TIPC_CLUSTER_SCOPE      2
-#define TIPC_NODE_SCOPE         3
+enum tipc_scope {
+	TIPC_CLUSTER_SCOPE = 2, /* 0 can also be used */
+	TIPC_NODE_SCOPE    = 3
+};
 
 /*
  * Limiting values for messages
@@ -243,7 +200,7 @@ struct sockaddr_tipc {
 struct tipc_group_req {
 	__u32 type;      /* group id */
 	__u32 instance;  /* member id */
-	__u32 scope;     /* zone/cluster/node */
+	__u32 scope;     /* cluster/node */
 	__u32 flags;
 };
 
@@ -268,4 +225,53 @@ struct tipc_sioc_ln_req {
 	__u32 bearer_id;
 	char linkname[TIPC_MAX_LINK_NAME];
 };
+
+
+/* The macros and functions below are deprecated:
+ */
+
+#define TIPC_ZONE_SCOPE         1
+
+#define TIPC_NODE_BITS          12
+#define TIPC_CLUSTER_BITS       12
+#define TIPC_ZONE_BITS          8
+
+#define TIPC_NODE_OFFSET        0
+#define TIPC_CLUSTER_OFFSET     TIPC_NODE_BITS
+#define TIPC_ZONE_OFFSET        (TIPC_CLUSTER_OFFSET + TIPC_CLUSTER_BITS)
+
+#define TIPC_NODE_SIZE          ((1UL << TIPC_NODE_BITS) - 1)
+#define TIPC_CLUSTER_SIZE       ((1UL << TIPC_CLUSTER_BITS) - 1)
+#define TIPC_ZONE_SIZE          ((1UL << TIPC_ZONE_BITS) - 1)
+
+#define TIPC_NODE_MASK		(TIPC_NODE_SIZE << TIPC_NODE_OFFSET)
+#define TIPC_CLUSTER_MASK	(TIPC_CLUSTER_SIZE << TIPC_CLUSTER_OFFSET)
+#define TIPC_ZONE_MASK		(TIPC_ZONE_SIZE << TIPC_ZONE_OFFSET)
+
+#define TIPC_ZONE_CLUSTER_MASK (TIPC_ZONE_MASK | TIPC_CLUSTER_MASK)
+
+static inline __u32 tipc_addr(unsigned int zone,
+			      unsigned int cluster,
+			      unsigned int node)
+{
+	return (zone << TIPC_ZONE_OFFSET) |
+		(cluster << TIPC_CLUSTER_OFFSET) |
+		node;
+}
+
+static inline unsigned int tipc_zone(__u32 addr)
+{
+	return addr >> TIPC_ZONE_OFFSET;
+}
+
+static inline unsigned int tipc_cluster(__u32 addr)
+{
+	return (addr & TIPC_CLUSTER_MASK) >> TIPC_CLUSTER_OFFSET;
+}
+
+static inline unsigned int tipc_node(__u32 addr)
+{
+	return addr & TIPC_NODE_MASK;
+}
+
 #endif
diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h
index 469aa67a5ecb..d896ded51bcb 100644
--- a/include/uapi/linux/tipc_netlink.h
+++ b/include/uapi/linux/tipc_netlink.h
@@ -114,6 +114,13 @@ enum {
 	TIPC_NLA_SOCK_REF,		/* u32 */
 	TIPC_NLA_SOCK_CON,		/* nest */
 	TIPC_NLA_SOCK_HAS_PUBL,		/* flag */
+	TIPC_NLA_SOCK_STAT,		/* nest */
+	TIPC_NLA_SOCK_TYPE,		/* u32 */
+	TIPC_NLA_SOCK_INO,		/* u32 */
+	TIPC_NLA_SOCK_UID,		/* u32 */
+	TIPC_NLA_SOCK_TIPC_STATE,	/* u32 */
+	TIPC_NLA_SOCK_COOKIE,		/* u64 */
+	TIPC_NLA_SOCK_PAD,		/* flag */
 
 	__TIPC_NLA_SOCK_MAX,
 	TIPC_NLA_SOCK_MAX = __TIPC_NLA_SOCK_MAX - 1
@@ -238,6 +245,18 @@ enum {
 	TIPC_NLA_CON_MAX = __TIPC_NLA_CON_MAX - 1
 };
 
+/* Nest, socket statistics info */
+enum {
+	TIPC_NLA_SOCK_STAT_RCVQ,	/* u32 */
+	TIPC_NLA_SOCK_STAT_SENDQ,	/* u32 */
+	TIPC_NLA_SOCK_STAT_LINK_CONG,	/* flag */
+	TIPC_NLA_SOCK_STAT_CONN_CONG,	/* flag */
+	TIPC_NLA_SOCK_STAT_DROP,	/* u32 */
+
+	__TIPC_NLA_SOCK_STAT_MAX,
+	TIPC_NLA_SOCK_STAT_MAX = __TIPC_NLA_SOCK_STAT_MAX - 1
+};
+
 /* Nest, link propreties. Valid for link, media and bearer */
 enum {
 	TIPC_NLA_PROP_UNSPEC,
diff --git a/include/uapi/linux/tipc_sockets_diag.h b/include/uapi/linux/tipc_sockets_diag.h
new file mode 100644
index 000000000000..7678cf2f0dcc
--- /dev/null
+++ b/include/uapi/linux/tipc_sockets_diag.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* AF_TIPC sock_diag interface for querying open sockets */
+
+#ifndef _UAPI__TIPC_SOCKETS_DIAG_H__
+#define _UAPI__TIPC_SOCKETS_DIAG_H__
+
+#include <linux/types.h>
+#include <linux/sock_diag.h>
+
+/* Request */
+struct tipc_sock_diag_req {
+	__u8	sdiag_family;	/* must be AF_TIPC */
+	__u8	sdiag_protocol;	/* must be 0 */
+	__u16	pad;		/* must be 0 */
+	__u32	tidiag_states;	/* query*/
+};
+#endif /* _UAPI__TIPC_SOCKETS_DIAG_H__ */
diff --git a/kernel/audit.c b/kernel/audit.c
index 227db99b0f19..5e49b614d0e6 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1526,6 +1526,7 @@ static struct pernet_operations audit_net_ops __net_initdata = {
 	.exit = audit_net_exit,
 	.id = &audit_net_id,
 	.size = sizeof(struct audit_net),
+	.async = true,
 };
 
 /* Initialize audit support at boot time. */
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 81e2f6995adb..bf6da59ae0d0 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -178,6 +178,9 @@ static int bpf_mkmap(struct dentry *dentry, umode_t mode, void *arg)
 static struct dentry *
 bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags)
 {
+	/* Dots in names (e.g. "/sys/fs/bpf/foo.bar") are reserved for future
+	 * extensions.
+	 */
 	if (strchr(dentry->d_name.name, '.'))
 		return ERR_PTR(-EPERM);
 
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index a927e89dad6e..69c5bccabd22 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -38,6 +38,7 @@
 #include <linux/skbuff.h>
 #include <linux/workqueue.h>
 #include <linux/list.h>
+#include <linux/mm.h>
 #include <net/strparser.h>
 #include <net/tcp.h>
 
@@ -47,6 +48,7 @@
 struct bpf_stab {
 	struct bpf_map map;
 	struct sock **sock_map;
+	struct bpf_prog *bpf_tx_msg;
 	struct bpf_prog *bpf_parse;
 	struct bpf_prog *bpf_verdict;
 };
@@ -62,8 +64,7 @@ struct smap_psock_map_entry {
 
 struct smap_psock {
 	struct rcu_head	rcu;
-	/* refcnt is used inside sk_callback_lock */
-	u32 refcnt;
+	refcount_t refcnt;
 
 	/* datapath variables */
 	struct sk_buff_head rxqueue;
@@ -74,7 +75,16 @@ struct smap_psock {
 	int save_off;
 	struct sk_buff *save_skb;
 
+	/* datapath variables for tx_msg ULP */
+	struct sock *sk_redir;
+	int apply_bytes;
+	int cork_bytes;
+	int sg_size;
+	int eval;
+	struct sk_msg_buff *cork;
+
 	struct strparser strp;
+	struct bpf_prog *bpf_tx_msg;
 	struct bpf_prog *bpf_parse;
 	struct bpf_prog *bpf_verdict;
 	struct list_head maps;
@@ -92,6 +102,11 @@ struct smap_psock {
 	void (*save_write_space)(struct sock *sk);
 };
 
+static void smap_release_sock(struct smap_psock *psock, struct sock *sock);
+static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
+static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
+			    int offset, size_t size, int flags);
+
 static inline struct smap_psock *smap_psock_sk(const struct sock *sk)
 {
 	return rcu_dereference_sk_user_data(sk);
@@ -116,27 +131,41 @@ static int bpf_tcp_init(struct sock *sk)
 
 	psock->save_close = sk->sk_prot->close;
 	psock->sk_proto = sk->sk_prot;
+
+	if (psock->bpf_tx_msg) {
+		tcp_bpf_proto.sendmsg = bpf_tcp_sendmsg;
+		tcp_bpf_proto.sendpage = bpf_tcp_sendpage;
+	}
+
 	sk->sk_prot = &tcp_bpf_proto;
 	rcu_read_unlock();
 	return 0;
 }
 
+static void smap_release_sock(struct smap_psock *psock, struct sock *sock);
+static int free_start_sg(struct sock *sk, struct sk_msg_buff *md);
+
 static void bpf_tcp_release(struct sock *sk)
 {
 	struct smap_psock *psock;
 
 	rcu_read_lock();
 	psock = smap_psock_sk(sk);
+	if (unlikely(!psock))
+		goto out;
 
-	if (likely(psock)) {
-		sk->sk_prot = psock->sk_proto;
-		psock->sk_proto = NULL;
+	if (psock->cork) {
+		free_start_sg(psock->sock, psock->cork);
+		kfree(psock->cork);
+		psock->cork = NULL;
 	}
+
+	sk->sk_prot = psock->sk_proto;
+	psock->sk_proto = NULL;
+out:
 	rcu_read_unlock();
 }
 
-static void smap_release_sock(struct smap_psock *psock, struct sock *sock);
-
 static void bpf_tcp_close(struct sock *sk, long timeout)
 {
 	void (*close_fun)(struct sock *sk, long timeout);
@@ -175,6 +204,7 @@ enum __sk_action {
 	__SK_DROP = 0,
 	__SK_PASS,
 	__SK_REDIRECT,
+	__SK_NONE,
 };
 
 static struct tcp_ulp_ops bpf_tcp_ulp_ops __read_mostly = {
@@ -186,10 +216,621 @@ static struct tcp_ulp_ops bpf_tcp_ulp_ops __read_mostly = {
 	.release	= bpf_tcp_release,
 };
 
+static int memcopy_from_iter(struct sock *sk,
+			     struct sk_msg_buff *md,
+			     struct iov_iter *from, int bytes)
+{
+	struct scatterlist *sg = md->sg_data;
+	int i = md->sg_curr, rc = -ENOSPC;
+
+	do {
+		int copy;
+		char *to;
+
+		if (md->sg_copybreak >= sg[i].length) {
+			md->sg_copybreak = 0;
+
+			if (++i == MAX_SKB_FRAGS)
+				i = 0;
+
+			if (i == md->sg_end)
+				break;
+		}
+
+		copy = sg[i].length - md->sg_copybreak;
+		to = sg_virt(&sg[i]) + md->sg_copybreak;
+		md->sg_copybreak += copy;
+
+		if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY)
+			rc = copy_from_iter_nocache(to, copy, from);
+		else
+			rc = copy_from_iter(to, copy, from);
+
+		if (rc != copy) {
+			rc = -EFAULT;
+			goto out;
+		}
+
+		bytes -= copy;
+		if (!bytes)
+			break;
+
+		md->sg_copybreak = 0;
+		if (++i == MAX_SKB_FRAGS)
+			i = 0;
+	} while (i != md->sg_end);
+out:
+	md->sg_curr = i;
+	return rc;
+}
+
+static int bpf_tcp_push(struct sock *sk, int apply_bytes,
+			struct sk_msg_buff *md,
+			int flags, bool uncharge)
+{
+	bool apply = apply_bytes;
+	struct scatterlist *sg;
+	int offset, ret = 0;
+	struct page *p;
+	size_t size;
+
+	while (1) {
+		sg = md->sg_data + md->sg_start;
+		size = (apply && apply_bytes < sg->length) ?
+			apply_bytes : sg->length;
+		offset = sg->offset;
+
+		tcp_rate_check_app_limited(sk);
+		p = sg_page(sg);
+retry:
+		ret = do_tcp_sendpages(sk, p, offset, size, flags);
+		if (ret != size) {
+			if (ret > 0) {
+				if (apply)
+					apply_bytes -= ret;
+				size -= ret;
+				offset += ret;
+				if (uncharge)
+					sk_mem_uncharge(sk, ret);
+				goto retry;
+			}
+
+			sg->length = size;
+			sg->offset = offset;
+			return ret;
+		}
+
+		if (apply)
+			apply_bytes -= ret;
+		sg->offset += ret;
+		sg->length -= ret;
+		if (uncharge)
+			sk_mem_uncharge(sk, ret);
+
+		if (!sg->length) {
+			put_page(p);
+			md->sg_start++;
+			if (md->sg_start == MAX_SKB_FRAGS)
+				md->sg_start = 0;
+			memset(sg, 0, sizeof(*sg));
+
+			if (md->sg_start == md->sg_end)
+				break;
+		}
+
+		if (apply && !apply_bytes)
+			break;
+	}
+	return 0;
+}
+
+static inline void bpf_compute_data_pointers_sg(struct sk_msg_buff *md)
+{
+	struct scatterlist *sg = md->sg_data + md->sg_start;
+
+	if (md->sg_copy[md->sg_start]) {
+		md->data = md->data_end = 0;
+	} else {
+		md->data = sg_virt(sg);
+		md->data_end = md->data + sg->length;
+	}
+}
+
+static void return_mem_sg(struct sock *sk, int bytes, struct sk_msg_buff *md)
+{
+	struct scatterlist *sg = md->sg_data;
+	int i = md->sg_start;
+
+	do {
+		int uncharge = (bytes < sg[i].length) ? bytes : sg[i].length;
+
+		sk_mem_uncharge(sk, uncharge);
+		bytes -= uncharge;
+		if (!bytes)
+			break;
+		i++;
+		if (i == MAX_SKB_FRAGS)
+			i = 0;
+	} while (i != md->sg_end);
+}
+
+static void free_bytes_sg(struct sock *sk, int bytes, struct sk_msg_buff *md)
+{
+	struct scatterlist *sg = md->sg_data;
+	int i = md->sg_start, free;
+
+	while (bytes && sg[i].length) {
+		free = sg[i].length;
+		if (bytes < free) {
+			sg[i].length -= bytes;
+			sg[i].offset += bytes;
+			sk_mem_uncharge(sk, bytes);
+			break;
+		}
+
+		sk_mem_uncharge(sk, sg[i].length);
+		put_page(sg_page(&sg[i]));
+		bytes -= sg[i].length;
+		sg[i].length = 0;
+		sg[i].page_link = 0;
+		sg[i].offset = 0;
+		i++;
+
+		if (i == MAX_SKB_FRAGS)
+			i = 0;
+	}
+}
+
+static int free_sg(struct sock *sk, int start, struct sk_msg_buff *md)
+{
+	struct scatterlist *sg = md->sg_data;
+	int i = start, free = 0;
+
+	while (sg[i].length) {
+		free += sg[i].length;
+		sk_mem_uncharge(sk, sg[i].length);
+		put_page(sg_page(&sg[i]));
+		sg[i].length = 0;
+		sg[i].page_link = 0;
+		sg[i].offset = 0;
+		i++;
+
+		if (i == MAX_SKB_FRAGS)
+			i = 0;
+	}
+
+	return free;
+}
+
+static int free_start_sg(struct sock *sk, struct sk_msg_buff *md)
+{
+	int free = free_sg(sk, md->sg_start, md);
+
+	md->sg_start = md->sg_end;
+	return free;
+}
+
+static int free_curr_sg(struct sock *sk, struct sk_msg_buff *md)
+{
+	return free_sg(sk, md->sg_curr, md);
+}
+
+static int bpf_map_msg_verdict(int _rc, struct sk_msg_buff *md)
+{
+	return ((_rc == SK_PASS) ?
+	       (md->map ? __SK_REDIRECT : __SK_PASS) :
+	       __SK_DROP);
+}
+
+static unsigned int smap_do_tx_msg(struct sock *sk,
+				   struct smap_psock *psock,
+				   struct sk_msg_buff *md)
+{
+	struct bpf_prog *prog;
+	unsigned int rc, _rc;
+
+	preempt_disable();
+	rcu_read_lock();
+
+	/* If the policy was removed mid-send then default to 'accept' */
+	prog = READ_ONCE(psock->bpf_tx_msg);
+	if (unlikely(!prog)) {
+		_rc = SK_PASS;
+		goto verdict;
+	}
+
+	bpf_compute_data_pointers_sg(md);
+	rc = (*prog->bpf_func)(md, prog->insnsi);
+	psock->apply_bytes = md->apply_bytes;
+
+	/* Moving return codes from UAPI namespace into internal namespace */
+	_rc = bpf_map_msg_verdict(rc, md);
+
+	/* The psock has a refcount on the sock but not on the map and because
+	 * we need to drop rcu read lock here its possible the map could be
+	 * removed between here and when we need it to execute the sock
+	 * redirect. So do the map lookup now for future use.
+	 */
+	if (_rc == __SK_REDIRECT) {
+		if (psock->sk_redir)
+			sock_put(psock->sk_redir);
+		psock->sk_redir = do_msg_redirect_map(md);
+		if (!psock->sk_redir) {
+			_rc = __SK_DROP;
+			goto verdict;
+		}
+		sock_hold(psock->sk_redir);
+	}
+verdict:
+	rcu_read_unlock();
+	preempt_enable();
+
+	return _rc;
+}
+
+static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send,
+				       struct sk_msg_buff *md,
+				       int flags)
+{
+	struct smap_psock *psock;
+	struct scatterlist *sg;
+	int i, err, free = 0;
+
+	sg = md->sg_data;
+
+	rcu_read_lock();
+	psock = smap_psock_sk(sk);
+	if (unlikely(!psock))
+		goto out_rcu;
+
+	if (!refcount_inc_not_zero(&psock->refcnt))
+		goto out_rcu;
+
+	rcu_read_unlock();
+	lock_sock(sk);
+	err = bpf_tcp_push(sk, send, md, flags, false);
+	release_sock(sk);
+	smap_release_sock(psock, sk);
+	if (unlikely(err))
+		goto out;
+	return 0;
+out_rcu:
+	rcu_read_unlock();
+out:
+	i = md->sg_start;
+	while (sg[i].length) {
+		free += sg[i].length;
+		put_page(sg_page(&sg[i]));
+		sg[i].length = 0;
+		i++;
+		if (i == MAX_SKB_FRAGS)
+			i = 0;
+	}
+	return free;
+}
+
+static inline void bpf_md_init(struct smap_psock *psock)
+{
+	if (!psock->apply_bytes) {
+		psock->eval =  __SK_NONE;
+		if (psock->sk_redir) {
+			sock_put(psock->sk_redir);
+			psock->sk_redir = NULL;
+		}
+	}
+}
+
+static void apply_bytes_dec(struct smap_psock *psock, int i)
+{
+	if (psock->apply_bytes) {
+		if (psock->apply_bytes < i)
+			psock->apply_bytes = 0;
+		else
+			psock->apply_bytes -= i;
+	}
+}
+
+static int bpf_exec_tx_verdict(struct smap_psock *psock,
+			       struct sk_msg_buff *m,
+			       struct sock *sk,
+			       int *copied, int flags)
+{
+	bool cork = false, enospc = (m->sg_start == m->sg_end);
+	struct sock *redir;
+	int err = 0;
+	int send;
+
+more_data:
+	if (psock->eval == __SK_NONE)
+		psock->eval = smap_do_tx_msg(sk, psock, m);
+
+	if (m->cork_bytes &&
+	    m->cork_bytes > psock->sg_size && !enospc) {
+		psock->cork_bytes = m->cork_bytes - psock->sg_size;
+		if (!psock->cork) {
+			psock->cork = kcalloc(1,
+					sizeof(struct sk_msg_buff),
+					GFP_ATOMIC | __GFP_NOWARN);
+
+			if (!psock->cork) {
+				err = -ENOMEM;
+				goto out_err;
+			}
+		}
+		memcpy(psock->cork, m, sizeof(*m));
+		goto out_err;
+	}
+
+	send = psock->sg_size;
+	if (psock->apply_bytes && psock->apply_bytes < send)
+		send = psock->apply_bytes;
+
+	switch (psock->eval) {
+	case __SK_PASS:
+		err = bpf_tcp_push(sk, send, m, flags, true);
+		if (unlikely(err)) {
+			*copied -= free_start_sg(sk, m);
+			break;
+		}
+
+		apply_bytes_dec(psock, send);
+		psock->sg_size -= send;
+		break;
+	case __SK_REDIRECT:
+		redir = psock->sk_redir;
+		apply_bytes_dec(psock, send);
+
+		if (psock->cork) {
+			cork = true;
+			psock->cork = NULL;
+		}
+
+		return_mem_sg(sk, send, m);
+		release_sock(sk);
+
+		err = bpf_tcp_sendmsg_do_redirect(redir, send, m, flags);
+		lock_sock(sk);
+
+		if (cork) {
+			free_start_sg(sk, m);
+			kfree(m);
+			m = NULL;
+		}
+		if (unlikely(err))
+			*copied -= err;
+		else
+			psock->sg_size -= send;
+		break;
+	case __SK_DROP:
+	default:
+		free_bytes_sg(sk, send, m);
+		apply_bytes_dec(psock, send);
+		*copied -= send;
+		psock->sg_size -= send;
+		err = -EACCES;
+		break;
+	}
+
+	if (likely(!err)) {
+		bpf_md_init(psock);
+		if (m &&
+		    m->sg_data[m->sg_start].page_link &&
+		    m->sg_data[m->sg_start].length)
+			goto more_data;
+	}
+
+out_err:
+	return err;
+}
+
+static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+{
+	int flags = msg->msg_flags | MSG_NO_SHARED_FRAGS;
+	struct sk_msg_buff md = {0};
+	unsigned int sg_copy = 0;
+	struct smap_psock *psock;
+	int copied = 0, err = 0;
+	struct scatterlist *sg;
+	long timeo;
+
+	/* Its possible a sock event or user removed the psock _but_ the ops
+	 * have not been reprogrammed yet so we get here. In this case fallback
+	 * to tcp_sendmsg. Note this only works because we _only_ ever allow
+	 * a single ULP there is no hierarchy here.
+	 */
+	rcu_read_lock();
+	psock = smap_psock_sk(sk);
+	if (unlikely(!psock)) {
+		rcu_read_unlock();
+		return tcp_sendmsg(sk, msg, size);
+	}
+
+	/* Increment the psock refcnt to ensure its not released while sending a
+	 * message. Required because sk lookup and bpf programs are used in
+	 * separate rcu critical sections. Its OK if we lose the map entry
+	 * but we can't lose the sock reference.
+	 */
+	if (!refcount_inc_not_zero(&psock->refcnt)) {
+		rcu_read_unlock();
+		return tcp_sendmsg(sk, msg, size);
+	}
+
+	sg = md.sg_data;
+	sg_init_table(sg, MAX_SKB_FRAGS);
+	rcu_read_unlock();
+
+	lock_sock(sk);
+	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+
+	while (msg_data_left(msg)) {
+		struct sk_msg_buff *m;
+		bool enospc = false;
+		int copy;
+
+		if (sk->sk_err) {
+			err = sk->sk_err;
+			goto out_err;
+		}
+
+		copy = msg_data_left(msg);
+		if (!sk_stream_memory_free(sk))
+			goto wait_for_sndbuf;
+
+		m = psock->cork_bytes ? psock->cork : &md;
+		m->sg_curr = m->sg_copybreak ? m->sg_curr : m->sg_end;
+		err = sk_alloc_sg(sk, copy, m->sg_data,
+				  m->sg_start, &m->sg_end, &sg_copy,
+				  m->sg_end - 1);
+		if (err) {
+			if (err != -ENOSPC)
+				goto wait_for_memory;
+			enospc = true;
+			copy = sg_copy;
+		}
+
+		err = memcopy_from_iter(sk, m, &msg->msg_iter, copy);
+		if (err < 0) {
+			free_curr_sg(sk, m);
+			goto out_err;
+		}
+
+		psock->sg_size += copy;
+		copied += copy;
+		sg_copy = 0;
+
+		/* When bytes are being corked skip running BPF program and
+		 * applying verdict unless there is no more buffer space. In
+		 * the ENOSPC case simply run BPF prorgram with currently
+		 * accumulated data. We don't have much choice at this point
+		 * we could try extending the page frags or chaining complex
+		 * frags but even in these cases _eventually_ we will hit an
+		 * OOM scenario. More complex recovery schemes may be
+		 * implemented in the future, but BPF programs must handle
+		 * the case where apply_cork requests are not honored. The
+		 * canonical method to verify this is to check data length.
+		 */
+		if (psock->cork_bytes) {
+			if (copy > psock->cork_bytes)
+				psock->cork_bytes = 0;
+			else
+				psock->cork_bytes -= copy;
+
+			if (psock->cork_bytes && !enospc)
+				goto out_cork;
+
+			/* All cork bytes accounted for re-run filter */
+			psock->eval = __SK_NONE;
+			psock->cork_bytes = 0;
+		}
+
+		err = bpf_exec_tx_verdict(psock, m, sk, &copied, flags);
+		if (unlikely(err < 0))
+			goto out_err;
+		continue;
+wait_for_sndbuf:
+		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+wait_for_memory:
+		err = sk_stream_wait_memory(sk, &timeo);
+		if (err)
+			goto out_err;
+	}
+out_err:
+	if (err < 0)
+		err = sk_stream_error(sk, msg->msg_flags, err);
+out_cork:
+	release_sock(sk);
+	smap_release_sock(psock, sk);
+	return copied ? copied : err;
+}
+
+static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
+			    int offset, size_t size, int flags)
+{
+	struct sk_msg_buff md = {0}, *m = NULL;
+	int err = 0, copied = 0;
+	struct smap_psock *psock;
+	struct scatterlist *sg;
+	bool enospc = false;
+
+	rcu_read_lock();
+	psock = smap_psock_sk(sk);
+	if (unlikely(!psock))
+		goto accept;
+
+	if (!refcount_inc_not_zero(&psock->refcnt))
+		goto accept;
+	rcu_read_unlock();
+
+	lock_sock(sk);
+
+	if (psock->cork_bytes)
+		m = psock->cork;
+	else
+		m = &md;
+
+	/* Catch case where ring is full and sendpage is stalled. */
+	if (unlikely(m->sg_end == m->sg_start &&
+	    m->sg_data[m->sg_end].length))
+		goto out_err;
+
+	psock->sg_size += size;
+	sg = &m->sg_data[m->sg_end];
+	sg_set_page(sg, page, size, offset);
+	get_page(page);
+	m->sg_copy[m->sg_end] = true;
+	sk_mem_charge(sk, size);
+	m->sg_end++;
+	copied = size;
+
+	if (m->sg_end == MAX_SKB_FRAGS)
+		m->sg_end = 0;
+
+	if (m->sg_end == m->sg_start)
+		enospc = true;
+
+	if (psock->cork_bytes) {
+		if (size > psock->cork_bytes)
+			psock->cork_bytes = 0;
+		else
+			psock->cork_bytes -= size;
+
+		if (psock->cork_bytes && !enospc)
+			goto out_err;
+
+		/* All cork bytes accounted for re-run filter */
+		psock->eval = __SK_NONE;
+		psock->cork_bytes = 0;
+	}
+
+	err = bpf_exec_tx_verdict(psock, m, sk, &copied, flags);
+out_err:
+	release_sock(sk);
+	smap_release_sock(psock, sk);
+	return copied ? copied : err;
+accept:
+	rcu_read_unlock();
+	return tcp_sendpage(sk, page, offset, size, flags);
+}
+
+static void bpf_tcp_msg_add(struct smap_psock *psock,
+			    struct sock *sk,
+			    struct bpf_prog *tx_msg)
+{
+	struct bpf_prog *orig_tx_msg;
+
+	orig_tx_msg = xchg(&psock->bpf_tx_msg, tx_msg);
+	if (orig_tx_msg)
+		bpf_prog_put(orig_tx_msg);
+}
+
 static int bpf_tcp_ulp_register(void)
 {
 	tcp_bpf_proto = tcp_prot;
 	tcp_bpf_proto.close = bpf_tcp_close;
+	/* Once BPF TX ULP is registered it is never unregistered. It
+	 * will be in the ULP list for the lifetime of the system. Doing
+	 * duplicate registers is not a problem.
+	 */
 	return tcp_register_ulp(&bpf_tcp_ulp_ops);
 }
 
@@ -373,15 +1014,13 @@ static void smap_destroy_psock(struct rcu_head *rcu)
 
 static void smap_release_sock(struct smap_psock *psock, struct sock *sock)
 {
-	psock->refcnt--;
-	if (psock->refcnt)
-		return;
-
-	tcp_cleanup_ulp(sock);
-	smap_stop_sock(psock, sock);
-	clear_bit(SMAP_TX_RUNNING, &psock->state);
-	rcu_assign_sk_user_data(sock, NULL);
-	call_rcu_sched(&psock->rcu, smap_destroy_psock);
+	if (refcount_dec_and_test(&psock->refcnt)) {
+		tcp_cleanup_ulp(sock);
+		smap_stop_sock(psock, sock);
+		clear_bit(SMAP_TX_RUNNING, &psock->state);
+		rcu_assign_sk_user_data(sock, NULL);
+		call_rcu_sched(&psock->rcu, smap_destroy_psock);
+	}
 }
 
 static int smap_parse_func_strparser(struct strparser *strp,
@@ -415,7 +1054,6 @@ static int smap_parse_func_strparser(struct strparser *strp,
 	return rc;
 }
 
-
 static int smap_read_sock_done(struct strparser *strp, int err)
 {
 	return err;
@@ -485,12 +1123,22 @@ static void smap_gc_work(struct work_struct *w)
 		bpf_prog_put(psock->bpf_parse);
 	if (psock->bpf_verdict)
 		bpf_prog_put(psock->bpf_verdict);
+	if (psock->bpf_tx_msg)
+		bpf_prog_put(psock->bpf_tx_msg);
+
+	if (psock->cork) {
+		free_start_sg(psock->sock, psock->cork);
+		kfree(psock->cork);
+	}
 
 	list_for_each_entry_safe(e, tmp, &psock->maps, list) {
 		list_del(&e->list);
 		kfree(e);
 	}
 
+	if (psock->sk_redir)
+		sock_put(psock->sk_redir);
+
 	sock_put(psock->sock);
 	kfree(psock);
 }
@@ -506,12 +1154,13 @@ static struct smap_psock *smap_init_psock(struct sock *sock,
 	if (!psock)
 		return ERR_PTR(-ENOMEM);
 
+	psock->eval =  __SK_NONE;
 	psock->sock = sock;
 	skb_queue_head_init(&psock->rxqueue);
 	INIT_WORK(&psock->tx_work, smap_tx_work);
 	INIT_WORK(&psock->gc_work, smap_gc_work);
 	INIT_LIST_HEAD(&psock->maps);
-	psock->refcnt = 1;
+	refcount_set(&psock->refcnt, 1);
 
 	rcu_assign_sk_user_data(sock, psock);
 	sock_hold(sock);
@@ -714,10 +1363,11 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
 {
 	struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
 	struct smap_psock_map_entry *e = NULL;
-	struct bpf_prog *verdict, *parse;
+	struct bpf_prog *verdict, *parse, *tx_msg;
 	struct sock *osock, *sock;
 	struct smap_psock *psock;
 	u32 i = *(u32 *)key;
+	bool new = false;
 	int err;
 
 	if (unlikely(flags > BPF_EXIST))
@@ -740,6 +1390,7 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
 	 */
 	verdict = READ_ONCE(stab->bpf_verdict);
 	parse = READ_ONCE(stab->bpf_parse);
+	tx_msg = READ_ONCE(stab->bpf_tx_msg);
 
 	if (parse && verdict) {
 		/* bpf prog refcnt may be zero if a concurrent attach operation
@@ -758,6 +1409,17 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
 		}
 	}
 
+	if (tx_msg) {
+		tx_msg = bpf_prog_inc_not_zero(stab->bpf_tx_msg);
+		if (IS_ERR(tx_msg)) {
+			if (verdict)
+				bpf_prog_put(verdict);
+			if (parse)
+				bpf_prog_put(parse);
+			return PTR_ERR(tx_msg);
+		}
+	}
+
 	write_lock_bh(&sock->sk_callback_lock);
 	psock = smap_psock_sk(sock);
 
@@ -772,7 +1434,14 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
 			err = -EBUSY;
 			goto out_progs;
 		}
-		psock->refcnt++;
+		if (READ_ONCE(psock->bpf_tx_msg) && tx_msg) {
+			err = -EBUSY;
+			goto out_progs;
+		}
+		if (!refcount_inc_not_zero(&psock->refcnt)) {
+			err = -EAGAIN;
+			goto out_progs;
+		}
 	} else {
 		psock = smap_init_psock(sock, stab);
 		if (IS_ERR(psock)) {
@@ -780,11 +1449,8 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
 			goto out_progs;
 		}
 
-		err = tcp_set_ulp_id(sock, TCP_ULP_BPF);
-		if (err)
-			goto out_progs;
-
 		set_bit(SMAP_TX_RUNNING, &psock->state);
+		new = true;
 	}
 
 	e = kzalloc(sizeof(*e), GFP_ATOMIC | __GFP_NOWARN);
@@ -797,6 +1463,14 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
 	/* 3. At this point we have a reference to a valid psock that is
 	 * running. Attach any BPF programs needed.
 	 */
+	if (tx_msg)
+		bpf_tcp_msg_add(psock, sock, tx_msg);
+	if (new) {
+		err = tcp_set_ulp_id(sock, TCP_ULP_BPF);
+		if (err)
+			goto out_free;
+	}
+
 	if (parse && verdict && !psock->strp_enabled) {
 		err = smap_init_sock(psock, sock);
 		if (err)
@@ -818,8 +1492,6 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
 		struct smap_psock *opsock = smap_psock_sk(osock);
 
 		write_lock_bh(&osock->sk_callback_lock);
-		if (osock != sock && parse)
-			smap_stop_sock(opsock, osock);
 		smap_list_remove(opsock, &stab->sock_map[i]);
 		smap_release_sock(opsock, osock);
 		write_unlock_bh(&osock->sk_callback_lock);
@@ -832,6 +1504,8 @@ out_progs:
 		bpf_prog_put(verdict);
 	if (parse)
 		bpf_prog_put(parse);
+	if (tx_msg)
+		bpf_prog_put(tx_msg);
 	write_unlock_bh(&sock->sk_callback_lock);
 	kfree(e);
 	return err;
@@ -846,6 +1520,9 @@ int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type)
 		return -EINVAL;
 
 	switch (type) {
+	case BPF_SK_MSG_VERDICT:
+		orig = xchg(&stab->bpf_tx_msg, prog);
+		break;
 	case BPF_SK_SKB_STREAM_PARSER:
 		orig = xchg(&stab->bpf_parse, prog);
 		break;
@@ -907,6 +1584,10 @@ static void sock_map_release(struct bpf_map *map, struct file *map_file)
 	orig = xchg(&stab->bpf_verdict, NULL);
 	if (orig)
 		bpf_prog_put(orig);
+
+	orig = xchg(&stab->bpf_tx_msg, NULL);
+	if (orig)
+		bpf_prog_put(orig);
 }
 
 const struct bpf_map_ops sock_map_ops = {
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index b0ecf43f5894..57eeb1234b67 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -9,16 +9,19 @@
 #include <linux/filter.h>
 #include <linux/stacktrace.h>
 #include <linux/perf_event.h>
+#include <linux/elf.h>
+#include <linux/pagemap.h>
 #include "percpu_freelist.h"
 
-#define STACK_CREATE_FLAG_MASK \
-	(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
+#define STACK_CREATE_FLAG_MASK					\
+	(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY |	\
+	 BPF_F_STACK_BUILD_ID)
 
 struct stack_map_bucket {
 	struct pcpu_freelist_node fnode;
 	u32 hash;
 	u32 nr;
-	u64 ip[];
+	u64 data[];
 };
 
 struct bpf_stack_map {
@@ -29,6 +32,17 @@ struct bpf_stack_map {
 	struct stack_map_bucket *buckets[];
 };
 
+static inline bool stack_map_use_build_id(struct bpf_map *map)
+{
+	return (map->map_flags & BPF_F_STACK_BUILD_ID);
+}
+
+static inline int stack_map_data_size(struct bpf_map *map)
+{
+	return stack_map_use_build_id(map) ?
+		sizeof(struct bpf_stack_build_id) : sizeof(u64);
+}
+
 static int prealloc_elems_and_freelist(struct bpf_stack_map *smap)
 {
 	u32 elem_size = sizeof(struct stack_map_bucket) + smap->map.value_size;
@@ -68,8 +82,16 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
 
 	/* check sanity of attributes */
 	if (attr->max_entries == 0 || attr->key_size != 4 ||
-	    value_size < 8 || value_size % 8 ||
-	    value_size / 8 > sysctl_perf_event_max_stack)
+	    value_size < 8 || value_size % 8)
+		return ERR_PTR(-EINVAL);
+
+	BUILD_BUG_ON(sizeof(struct bpf_stack_build_id) % sizeof(u64));
+	if (attr->map_flags & BPF_F_STACK_BUILD_ID) {
+		if (value_size % sizeof(struct bpf_stack_build_id) ||
+		    value_size / sizeof(struct bpf_stack_build_id)
+		    > sysctl_perf_event_max_stack)
+			return ERR_PTR(-EINVAL);
+	} else if (value_size / 8 > sysctl_perf_event_max_stack)
 		return ERR_PTR(-EINVAL);
 
 	/* hash table size must be power of 2 */
@@ -114,13 +136,184 @@ free_smap:
 	return ERR_PTR(err);
 }
 
+#define BPF_BUILD_ID 3
+/*
+ * Parse build id from the note segment. This logic can be shared between
+ * 32-bit and 64-bit system, because Elf32_Nhdr and Elf64_Nhdr are
+ * identical.
+ */
+static inline int stack_map_parse_build_id(void *page_addr,
+					   unsigned char *build_id,
+					   void *note_start,
+					   Elf32_Word note_size)
+{
+	Elf32_Word note_offs = 0, new_offs;
+
+	/* check for overflow */
+	if (note_start < page_addr || note_start + note_size < note_start)
+		return -EINVAL;
+
+	/* only supports note that fits in the first page */
+	if (note_start + note_size > page_addr + PAGE_SIZE)
+		return -EINVAL;
+
+	while (note_offs + sizeof(Elf32_Nhdr) < note_size) {
+		Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs);
+
+		if (nhdr->n_type == BPF_BUILD_ID &&
+		    nhdr->n_namesz == sizeof("GNU") &&
+		    nhdr->n_descsz == BPF_BUILD_ID_SIZE) {
+			memcpy(build_id,
+			       note_start + note_offs +
+			       ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr),
+			       BPF_BUILD_ID_SIZE);
+			return 0;
+		}
+		new_offs = note_offs + sizeof(Elf32_Nhdr) +
+			ALIGN(nhdr->n_namesz, 4) + ALIGN(nhdr->n_descsz, 4);
+		if (new_offs <= note_offs)  /* overflow */
+			break;
+		note_offs = new_offs;
+	}
+	return -EINVAL;
+}
+
+/* Parse build ID from 32-bit ELF */
+static int stack_map_get_build_id_32(void *page_addr,
+				     unsigned char *build_id)
+{
+	Elf32_Ehdr *ehdr = (Elf32_Ehdr *)page_addr;
+	Elf32_Phdr *phdr;
+	int i;
+
+	/* only supports phdr that fits in one page */
+	if (ehdr->e_phnum >
+	    (PAGE_SIZE - sizeof(Elf32_Ehdr)) / sizeof(Elf32_Phdr))
+		return -EINVAL;
+
+	phdr = (Elf32_Phdr *)(page_addr + sizeof(Elf32_Ehdr));
+
+	for (i = 0; i < ehdr->e_phnum; ++i)
+		if (phdr[i].p_type == PT_NOTE)
+			return stack_map_parse_build_id(page_addr, build_id,
+					page_addr + phdr[i].p_offset,
+					phdr[i].p_filesz);
+	return -EINVAL;
+}
+
+/* Parse build ID from 64-bit ELF */
+static int stack_map_get_build_id_64(void *page_addr,
+				     unsigned char *build_id)
+{
+	Elf64_Ehdr *ehdr = (Elf64_Ehdr *)page_addr;
+	Elf64_Phdr *phdr;
+	int i;
+
+	/* only supports phdr that fits in one page */
+	if (ehdr->e_phnum >
+	    (PAGE_SIZE - sizeof(Elf64_Ehdr)) / sizeof(Elf64_Phdr))
+		return -EINVAL;
+
+	phdr = (Elf64_Phdr *)(page_addr + sizeof(Elf64_Ehdr));
+
+	for (i = 0; i < ehdr->e_phnum; ++i)
+		if (phdr[i].p_type == PT_NOTE)
+			return stack_map_parse_build_id(page_addr, build_id,
+					page_addr + phdr[i].p_offset,
+					phdr[i].p_filesz);
+	return -EINVAL;
+}
+
+/* Parse build ID of ELF file mapped to vma */
+static int stack_map_get_build_id(struct vm_area_struct *vma,
+				  unsigned char *build_id)
+{
+	Elf32_Ehdr *ehdr;
+	struct page *page;
+	void *page_addr;
+	int ret;
+
+	/* only works for page backed storage  */
+	if (!vma->vm_file)
+		return -EINVAL;
+
+	page = find_get_page(vma->vm_file->f_mapping, 0);
+	if (!page)
+		return -EFAULT;	/* page not mapped */
+
+	ret = -EINVAL;
+	page_addr = page_address(page);
+	ehdr = (Elf32_Ehdr *)page_addr;
+
+	/* compare magic x7f "ELF" */
+	if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0)
+		goto out;
+
+	/* only support executable file and shared object file */
+	if (ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN)
+		goto out;
+
+	if (ehdr->e_ident[EI_CLASS] == ELFCLASS32)
+		ret = stack_map_get_build_id_32(page_addr, build_id);
+	else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64)
+		ret = stack_map_get_build_id_64(page_addr, build_id);
+out:
+	put_page(page);
+	return ret;
+}
+
+static void stack_map_get_build_id_offset(struct bpf_map *map,
+					  struct stack_map_bucket *bucket,
+					  u64 *ips, u32 trace_nr, bool user)
+{
+	int i;
+	struct vm_area_struct *vma;
+	struct bpf_stack_build_id *id_offs;
+
+	bucket->nr = trace_nr;
+	id_offs = (struct bpf_stack_build_id *)bucket->data;
+
+	/*
+	 * We cannot do up_read() in nmi context, so build_id lookup is
+	 * only supported for non-nmi events. If at some point, it is
+	 * possible to run find_vma() without taking the semaphore, we
+	 * would like to allow build_id lookup in nmi context.
+	 *
+	 * Same fallback is used for kernel stack (!user) on a stackmap
+	 * with build_id.
+	 */
+	if (!user || !current || !current->mm || in_nmi() ||
+	    down_read_trylock(&current->mm->mmap_sem) == 0) {
+		/* cannot access current->mm, fall back to ips */
+		for (i = 0; i < trace_nr; i++) {
+			id_offs[i].status = BPF_STACK_BUILD_ID_IP;
+			id_offs[i].ip = ips[i];
+		}
+		return;
+	}
+
+	for (i = 0; i < trace_nr; i++) {
+		vma = find_vma(current->mm, ips[i]);
+		if (!vma || stack_map_get_build_id(vma, id_offs[i].build_id)) {
+			/* per entry fall back to ips */
+			id_offs[i].status = BPF_STACK_BUILD_ID_IP;
+			id_offs[i].ip = ips[i];
+			continue;
+		}
+		id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i]
+			- vma->vm_start;
+		id_offs[i].status = BPF_STACK_BUILD_ID_VALID;
+	}
+	up_read(&current->mm->mmap_sem);
+}
+
 BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
 	   u64, flags)
 {
 	struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
 	struct perf_callchain_entry *trace;
 	struct stack_map_bucket *bucket, *new_bucket, *old_bucket;
-	u32 max_depth = map->value_size / 8;
+	u32 max_depth = map->value_size / stack_map_data_size(map);
 	/* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */
 	u32 init_nr = sysctl_perf_event_max_stack - max_depth;
 	u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
@@ -128,6 +321,7 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
 	bool user = flags & BPF_F_USER_STACK;
 	bool kernel = !user;
 	u64 *ips;
+	bool hash_matches;
 
 	if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
 			       BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID)))
@@ -156,24 +350,43 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
 	id = hash & (smap->n_buckets - 1);
 	bucket = READ_ONCE(smap->buckets[id]);
 
-	if (bucket && bucket->hash == hash) {
-		if (flags & BPF_F_FAST_STACK_CMP)
+	hash_matches = bucket && bucket->hash == hash;
+	/* fast cmp */
+	if (hash_matches && flags & BPF_F_FAST_STACK_CMP)
+		return id;
+
+	if (stack_map_use_build_id(map)) {
+		/* for build_id+offset, pop a bucket before slow cmp */
+		new_bucket = (struct stack_map_bucket *)
+			pcpu_freelist_pop(&smap->freelist);
+		if (unlikely(!new_bucket))
+			return -ENOMEM;
+		stack_map_get_build_id_offset(map, new_bucket, ips,
+					      trace_nr, user);
+		trace_len = trace_nr * sizeof(struct bpf_stack_build_id);
+		if (hash_matches && bucket->nr == trace_nr &&
+		    memcmp(bucket->data, new_bucket->data, trace_len) == 0) {
+			pcpu_freelist_push(&smap->freelist, &new_bucket->fnode);
 			return id;
-		if (bucket->nr == trace_nr &&
-		    memcmp(bucket->ip, ips, trace_len) == 0)
+		}
+		if (bucket && !(flags & BPF_F_REUSE_STACKID)) {
+			pcpu_freelist_push(&smap->freelist, &new_bucket->fnode);
+			return -EEXIST;
+		}
+	} else {
+		if (hash_matches && bucket->nr == trace_nr &&
+		    memcmp(bucket->data, ips, trace_len) == 0)
 			return id;
+		if (bucket && !(flags & BPF_F_REUSE_STACKID))
+			return -EEXIST;
+
+		new_bucket = (struct stack_map_bucket *)
+			pcpu_freelist_pop(&smap->freelist);
+		if (unlikely(!new_bucket))
+			return -ENOMEM;
+		memcpy(new_bucket->data, ips, trace_len);
 	}
 
-	/* this call stack is not in the map, try to add it */
-	if (bucket && !(flags & BPF_F_REUSE_STACKID))
-		return -EEXIST;
-
-	new_bucket = (struct stack_map_bucket *)
-		pcpu_freelist_pop(&smap->freelist);
-	if (unlikely(!new_bucket))
-		return -ENOMEM;
-
-	memcpy(new_bucket->ip, ips, trace_len);
 	new_bucket->hash = hash;
 	new_bucket->nr = trace_nr;
 
@@ -212,8 +425,8 @@ int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
 	if (!bucket)
 		return -ENOENT;
 
-	trace_len = bucket->nr * sizeof(u64);
-	memcpy(value, bucket->ip, trace_len);
+	trace_len = bucket->nr * stack_map_data_size(map);
+	memcpy(value, bucket->data, trace_len);
 	memset(value + trace_len, 0, map->value_size - trace_len);
 
 	old_bucket = xchg(&smap->buckets[id], bucket);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 43f95d190eea..dd172ee16716 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1315,7 +1315,8 @@ static int bpf_obj_get(const union bpf_attr *attr)
 
 #define BPF_PROG_ATTACH_LAST_FIELD attach_flags
 
-static int sockmap_get_from_fd(const union bpf_attr *attr, bool attach)
+static int sockmap_get_from_fd(const union bpf_attr *attr,
+			       int type, bool attach)
 {
 	struct bpf_prog *prog = NULL;
 	int ufd = attr->target_fd;
@@ -1329,8 +1330,7 @@ static int sockmap_get_from_fd(const union bpf_attr *attr, bool attach)
 		return PTR_ERR(map);
 
 	if (attach) {
-		prog = bpf_prog_get_type(attr->attach_bpf_fd,
-					 BPF_PROG_TYPE_SK_SKB);
+		prog = bpf_prog_get_type(attr->attach_bpf_fd, type);
 		if (IS_ERR(prog)) {
 			fdput(f);
 			return PTR_ERR(prog);
@@ -1382,9 +1382,11 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 	case BPF_CGROUP_DEVICE:
 		ptype = BPF_PROG_TYPE_CGROUP_DEVICE;
 		break;
+	case BPF_SK_MSG_VERDICT:
+		return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, true);
 	case BPF_SK_SKB_STREAM_PARSER:
 	case BPF_SK_SKB_STREAM_VERDICT:
-		return sockmap_get_from_fd(attr, true);
+		return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, true);
 	default:
 		return -EINVAL;
 	}
@@ -1437,9 +1439,11 @@ static int bpf_prog_detach(const union bpf_attr *attr)
 	case BPF_CGROUP_DEVICE:
 		ptype = BPF_PROG_TYPE_CGROUP_DEVICE;
 		break;
+	case BPF_SK_MSG_VERDICT:
+		return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, false);
 	case BPF_SK_SKB_STREAM_PARSER:
 	case BPF_SK_SKB_STREAM_VERDICT:
-		return sockmap_get_from_fd(attr, false);
+		return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, false);
 	default:
 		return -EINVAL;
 	}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c6eff108aa99..e9f7c20691c1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -508,10 +508,6 @@ err:
 static const int caller_saved[CALLER_SAVED_REGS] = {
 	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
 };
-#define CALLEE_SAVED_REGS 5
-static const int callee_saved[CALLEE_SAVED_REGS] = {
-	BPF_REG_6, BPF_REG_7, BPF_REG_8, BPF_REG_9
-};
 
 static void __mark_reg_not_init(struct bpf_reg_state *reg);
 
@@ -1252,6 +1248,7 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
 	case BPF_PROG_TYPE_XDP:
 	case BPF_PROG_TYPE_LWT_XMIT:
 	case BPF_PROG_TYPE_SK_SKB:
+	case BPF_PROG_TYPE_SK_MSG:
 		if (meta)
 			return meta->pkt_access;
 
@@ -2075,7 +2072,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 	case BPF_MAP_TYPE_SOCKMAP:
 		if (func_id != BPF_FUNC_sk_redirect_map &&
 		    func_id != BPF_FUNC_sock_map_update &&
-		    func_id != BPF_FUNC_map_delete_elem)
+		    func_id != BPF_FUNC_map_delete_elem &&
+		    func_id != BPF_FUNC_msg_redirect_map)
 			goto error;
 		break;
 	default:
@@ -2113,6 +2111,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 			goto error;
 		break;
 	case BPF_FUNC_sk_redirect_map:
+	case BPF_FUNC_msg_redirect_map:
 		if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
 			goto error;
 		break;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 01e6b3a38871..7f9691c86b6e 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -738,8 +738,7 @@ static const struct bpf_func_proto *pe_prog_func_proto(enum bpf_func_id func_id)
 static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
 				    struct bpf_insn_access_aux *info)
 {
-	const int size_sp = FIELD_SIZEOF(struct bpf_perf_event_data,
-					 sample_period);
+	const int size_u64 = sizeof(u64);
 
 	if (off < 0 || off >= sizeof(struct bpf_perf_event_data))
 		return false;
@@ -750,8 +749,13 @@ static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type
 
 	switch (off) {
 	case bpf_ctx_range(struct bpf_perf_event_data, sample_period):
-		bpf_ctx_record_field_size(info, size_sp);
-		if (!bpf_ctx_narrow_access_ok(off, size, size_sp))
+		bpf_ctx_record_field_size(info, size_u64);
+		if (!bpf_ctx_narrow_access_ok(off, size, size_u64))
+			return false;
+		break;
+	case bpf_ctx_range(struct bpf_perf_event_data, addr):
+		bpf_ctx_record_field_size(info, size_u64);
+		if (!bpf_ctx_narrow_access_ok(off, size, size_u64))
 			return false;
 		break;
 	default:
@@ -778,6 +782,14 @@ static u32 pe_prog_convert_ctx_access(enum bpf_access_type type,
 				      bpf_target_off(struct perf_sample_data, period, 8,
 						     target_size));
 		break;
+	case offsetof(struct bpf_perf_event_data, addr):
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
+						       data), si->dst_reg, si->src_reg,
+				      offsetof(struct bpf_perf_event_data_kern, data));
+		*insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg,
+				      bpf_target_off(struct perf_sample_data, addr, 8,
+						     target_size));
+		break;
 	default:
 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
 						       regs), si->dst_reg, si->src_reg,
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 9fe6ec8fda28..fa10ad8e9b17 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -25,6 +25,7 @@
 #include <linux/uuid.h>
 #include <linux/ctype.h>
 #include <net/sock.h>
+#include <net/netlink.h>
 #include <net/net_namespace.h>
 
 
@@ -32,11 +33,13 @@ u64 uevent_seqnum;
 #ifdef CONFIG_UEVENT_HELPER
 char uevent_helper[UEVENT_HELPER_PATH_LEN] = CONFIG_UEVENT_HELPER_PATH;
 #endif
-#ifdef CONFIG_NET
+
 struct uevent_sock {
 	struct list_head list;
 	struct sock *sk;
 };
+
+#ifdef CONFIG_NET
 static LIST_HEAD(uevent_sock_list);
 #endif
 
@@ -602,12 +605,88 @@ int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...)
 EXPORT_SYMBOL_GPL(add_uevent_var);
 
 #if defined(CONFIG_NET)
+static int uevent_net_broadcast(struct sock *usk, struct sk_buff *skb,
+				struct netlink_ext_ack *extack)
+{
+	/* u64 to chars: 2^64 - 1 = 21 chars */
+	char buf[sizeof("SEQNUM=") + 21];
+	struct sk_buff *skbc;
+	int ret;
+
+	/* bump and prepare sequence number */
+	ret = snprintf(buf, sizeof(buf), "SEQNUM=%llu", ++uevent_seqnum);
+	if (ret < 0 || (size_t)ret >= sizeof(buf))
+		return -ENOMEM;
+	ret++;
+
+	/* verify message does not overflow */
+	if ((skb->len + ret) > UEVENT_BUFFER_SIZE) {
+		NL_SET_ERR_MSG(extack, "uevent message too big");
+		return -EINVAL;
+	}
+
+	/* copy skb and extend to accommodate sequence number */
+	skbc = skb_copy_expand(skb, 0, ret, GFP_KERNEL);
+	if (!skbc)
+		return -ENOMEM;
+
+	/* append sequence number */
+	skb_put_data(skbc, buf, ret);
+
+	/* remove msg header */
+	skb_pull(skbc, NLMSG_HDRLEN);
+
+	/* set portid 0 to inform userspace message comes from kernel */
+	NETLINK_CB(skbc).portid = 0;
+	NETLINK_CB(skbc).dst_group = 1;
+
+	ret = netlink_broadcast(usk, skbc, 0, 1, GFP_KERNEL);
+	/* ENOBUFS should be handled in userspace */
+	if (ret == -ENOBUFS || ret == -ESRCH)
+		ret = 0;
+
+	return ret;
+}
+
+static int uevent_net_rcv_skb(struct sk_buff *skb, struct nlmsghdr *nlh,
+			      struct netlink_ext_ack *extack)
+{
+	struct net *net;
+	int ret;
+
+	if (!nlmsg_data(nlh))
+		return -EINVAL;
+
+	/*
+	 * Verify that we are allowed to send messages to the target
+	 * network namespace. The caller must have CAP_SYS_ADMIN in the
+	 * owning user namespace of the target network namespace.
+	 */
+	net = sock_net(NETLINK_CB(skb).sk);
+	if (!netlink_ns_capable(skb, net->user_ns, CAP_SYS_ADMIN)) {
+		NL_SET_ERR_MSG(extack, "missing CAP_SYS_ADMIN capability");
+		return -EPERM;
+	}
+
+	mutex_lock(&uevent_sock_mutex);
+	ret = uevent_net_broadcast(net->uevent_sock->sk, skb, extack);
+	mutex_unlock(&uevent_sock_mutex);
+
+	return ret;
+}
+
+static void uevent_net_rcv(struct sk_buff *skb)
+{
+	netlink_rcv_skb(skb, &uevent_net_rcv_skb);
+}
+
 static int uevent_net_init(struct net *net)
 {
 	struct uevent_sock *ue_sk;
 	struct netlink_kernel_cfg cfg = {
 		.groups	= 1,
-		.flags	= NL_CFG_F_NONROOT_RECV,
+		.input = uevent_net_rcv,
+		.flags	= NL_CFG_F_NONROOT_RECV
 	};
 
 	ue_sk = kzalloc(sizeof(*ue_sk), GFP_KERNEL);
@@ -621,6 +700,9 @@ static int uevent_net_init(struct net *net)
 		kfree(ue_sk);
 		return -ENODEV;
 	}
+
+	net->uevent_sock = ue_sk;
+
 	mutex_lock(&uevent_sock_mutex);
 	list_add_tail(&ue_sk->list, &uevent_sock_list);
 	mutex_unlock(&uevent_sock_mutex);
@@ -629,17 +711,9 @@ static int uevent_net_init(struct net *net)
 
 static void uevent_net_exit(struct net *net)
 {
-	struct uevent_sock *ue_sk;
+	struct uevent_sock *ue_sk = net->uevent_sock;
 
 	mutex_lock(&uevent_sock_mutex);
-	list_for_each_entry(ue_sk, &uevent_sock_list, list) {
-		if (sock_net(ue_sk->sk) == net)
-			goto found;
-	}
-	mutex_unlock(&uevent_sock_mutex);
-	return;
-
-found:
 	list_del(&ue_sk->list);
 	mutex_unlock(&uevent_sock_mutex);
 
@@ -650,6 +724,7 @@ found:
 static struct pernet_operations uevent_net_ops = {
 	.init	= uevent_net_init,
 	.exit	= uevent_net_exit,
+	.async  = true,
 };
 
 static int __init kobject_uevent_init(void)
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index bad01b14a4ad..bd0ed39f65fb 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -729,6 +729,7 @@ static struct pernet_operations vlan_net_ops = {
 	.exit = vlan_exit_net,
 	.id   = &vlan_net_id,
 	.size = sizeof(struct vlan_net),
+	.async = true,
 };
 
 static int __init vlan_proto_init(void)
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 03a9fc0771c0..9b6bc5abe946 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1238,7 +1238,7 @@ out:
  * fields into the sockaddr.
  */
 static int atalk_getname(struct socket *sock, struct sockaddr *uaddr,
-			 int *uaddr_len, int peer)
+			 int peer)
 {
 	struct sockaddr_at sat;
 	struct sock *sk = sock->sk;
@@ -1251,7 +1251,6 @@ static int atalk_getname(struct socket *sock, struct sockaddr *uaddr,
 		if (atalk_autobind(sk) < 0)
 			goto out;
 
-	*uaddr_len = sizeof(struct sockaddr_at);
 	memset(&sat, 0, sizeof(sat));
 
 	if (peer) {
@@ -1268,9 +1267,9 @@ static int atalk_getname(struct socket *sock, struct sockaddr *uaddr,
 		sat.sat_port	    = at->src_port;
 	}
 
-	err = 0;
 	sat.sat_family = AF_APPLETALK;
 	memcpy(uaddr, &sat, sizeof(sat));
+	err = sizeof(struct sockaddr_at);
 
 out:
 	release_sock(sk);
diff --git a/net/atm/pvc.c b/net/atm/pvc.c
index e1140b3bdcaa..2cb10af16afc 100644
--- a/net/atm/pvc.c
+++ b/net/atm/pvc.c
@@ -87,21 +87,20 @@ static int pvc_getsockopt(struct socket *sock, int level, int optname,
 }
 
 static int pvc_getname(struct socket *sock, struct sockaddr *sockaddr,
-		       int *sockaddr_len, int peer)
+		       int peer)
 {
 	struct sockaddr_atmpvc *addr;
 	struct atm_vcc *vcc = ATM_SD(sock);
 
 	if (!vcc->dev || !test_bit(ATM_VF_ADDR, &vcc->flags))
 		return -ENOTCONN;
-	*sockaddr_len = sizeof(struct sockaddr_atmpvc);
 	addr = (struct sockaddr_atmpvc *)sockaddr;
 	memset(addr, 0, sizeof(*addr));
 	addr->sap_family = AF_ATMPVC;
 	addr->sap_addr.itf = vcc->dev->number;
 	addr->sap_addr.vpi = vcc->vpi;
 	addr->sap_addr.vci = vcc->vci;
-	return 0;
+	return sizeof(struct sockaddr_atmpvc);
 }
 
 static const struct proto_ops pvc_proto_ops = {
diff --git a/net/atm/svc.c b/net/atm/svc.c
index c458adcbc177..2f91b766ac42 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -419,15 +419,14 @@ out:
 }
 
 static int svc_getname(struct socket *sock, struct sockaddr *sockaddr,
-		       int *sockaddr_len, int peer)
+		       int peer)
 {
 	struct sockaddr_atmsvc *addr;
 
-	*sockaddr_len = sizeof(struct sockaddr_atmsvc);
 	addr = (struct sockaddr_atmsvc *) sockaddr;
 	memcpy(addr, peer ? &ATM_SD(sock)->remote : &ATM_SD(sock)->local,
 	       sizeof(struct sockaddr_atmsvc));
-	return 0;
+	return sizeof(struct sockaddr_atmsvc);
 }
 
 int svc_change_qos(struct atm_vcc *vcc, struct atm_qos *qos)
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 47fdd399626b..c8319ed48485 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1388,7 +1388,7 @@ out:
 }
 
 static int ax25_getname(struct socket *sock, struct sockaddr *uaddr,
-	int *uaddr_len, int peer)
+	int peer)
 {
 	struct full_sockaddr_ax25 *fsa = (struct full_sockaddr_ax25 *)uaddr;
 	struct sock *sk = sock->sk;
@@ -1427,7 +1427,7 @@ static int ax25_getname(struct socket *sock, struct sockaddr *uaddr,
 			fsa->fsa_digipeater[0] = null_ax25_address;
 		}
 	}
-	*uaddr_len = sizeof (struct full_sockaddr_ax25);
+	err = sizeof (struct full_sockaddr_ax25);
 
 out:
 	release_sock(sk);
diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig
index c44f6515be5e..e4e2e02b7380 100644
--- a/net/batman-adv/Kconfig
+++ b/net/batman-adv/Kconfig
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-# Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+# Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
 #
 # Marek Lindner, Simon Wunderlich
 #
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index 022f6e77307b..b97ba6fb8353 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-# Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+# Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
 #
 # Marek Lindner, Simon Wunderlich
 #
diff --git a/net/batman-adv/bat_algo.c b/net/batman-adv/bat_algo.c
index 80c72c7d3cad..ea309ad06175 100644
--- a/net/batman-adv/bat_algo.c
+++ b/net/batman-adv/bat_algo.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h
index 029221615ba3..534b790c3753 100644
--- a/net/batman-adv/bat_algo.h
+++ b/net/batman-adv/bat_algo.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2011-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Linus Lüssing
  *
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 99abeadf416e..be09a9883825 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/bat_iv_ogm.h b/net/batman-adv/bat_iv_ogm.h
index 9dc0dd5c83df..317cafd302cf 100644
--- a/net/batman-adv/bat_iv_ogm.h
+++ b/net/batman-adv/bat_iv_ogm.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index c74f81341dab..ec93337ee259 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2013-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2018  B.A.T.M.A.N. contributors:
  *
  * Linus Lüssing, Marek Lindner
  *
diff --git a/net/batman-adv/bat_v.h b/net/batman-adv/bat_v.h
index a17ab68bbce8..ec4a2a569750 100644
--- a/net/batman-adv/bat_v.h
+++ b/net/batman-adv/bat_v.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2011-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Linus Lüssing
  *
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index a83478c46597..28687493599f 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2011-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018  B.A.T.M.A.N. contributors:
  *
  * Linus Lüssing, Marek Lindner
  *
diff --git a/net/batman-adv/bat_v_elp.h b/net/batman-adv/bat_v_elp.h
index 5e39d0588a48..e8c7b7fd290d 100644
--- a/net/batman-adv/bat_v_elp.h
+++ b/net/batman-adv/bat_v_elp.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2013-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2018  B.A.T.M.A.N. contributors:
  *
  * Linus Lüssing, Marek Lindner
  *
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index ba59b77c605d..2948b41b06d4 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2013-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2018  B.A.T.M.A.N. contributors:
  *
  * Antonio Quartulli
  *
diff --git a/net/batman-adv/bat_v_ogm.h b/net/batman-adv/bat_v_ogm.h
index 6a4c14ccc3c6..ed36c5e79fde 100644
--- a/net/batman-adv/bat_v_ogm.h
+++ b/net/batman-adv/bat_v_ogm.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2013-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2018  B.A.T.M.A.N. contributors:
  *
  * Antonio Quartulli
  *
diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c
index bdc1ef06e05b..a296a4d851f5 100644
--- a/net/batman-adv/bitarray.c
+++ b/net/batman-adv/bitarray.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2006-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2018  B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich, Marek Lindner
  *
diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h
index ca9d0753dd6b..48f683289531 100644
--- a/net/batman-adv/bitarray.h
+++ b/net/batman-adv/bitarray.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2006-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2018  B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich, Marek Lindner
  *
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index b1a08374088b..a2de5a44bd41 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2011-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018  B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich
  *
diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h
index b27571abcd2f..71f95a3e4d3f 100644
--- a/net/batman-adv/bridge_loop_avoidance.h
+++ b/net/batman-adv/bridge_loop_avoidance.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2011-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018  B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich
  *
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index 21d1189957a7..4229b01ac7b5 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2010-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h
index 90a08d35c501..37b069698b04 100644
--- a/net/batman-adv/debugfs.h
+++ b/net/batman-adv/debugfs.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2010-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 87cd962d28d5..a60bacf7120b 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2011-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018  B.A.T.M.A.N. contributors:
  *
  * Antonio Quartulli
  *
@@ -33,6 +33,7 @@
 #include <linux/kernel.h>
 #include <linux/kref.h>
 #include <linux/list.h>
+#include <linux/netlink.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
 #include <linux/seq_file.h>
@@ -43,13 +44,19 @@
 #include <linux/string.h>
 #include <linux/workqueue.h>
 #include <net/arp.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <net/sock.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "bridge_loop_avoidance.h"
 #include "hard-interface.h"
 #include "hash.h"
 #include "log.h"
+#include "netlink.h"
 #include "originator.h"
 #include "send.h"
+#include "soft-interface.h"
 #include "translation-table.h"
 #include "tvlv.h"
 
@@ -495,7 +502,7 @@ static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res,
 	 * the one with the lowest address
 	 */
 	if (tmp_max == max && max_orig_node &&
-	    batadv_compare_eth(candidate->orig, max_orig_node->orig) > 0)
+	    batadv_compare_eth(candidate->orig, max_orig_node->orig))
 		goto out;
 
 	ret = true;
@@ -852,6 +859,151 @@ out:
 #endif
 
 /**
+ * batadv_dat_cache_dump_entry() - dump one entry of the DAT cache table to a
+ *  netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @dat_entry: entry to dump
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_dat_cache_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+			    struct batadv_dat_entry *dat_entry)
+{
+	int msecs;
+	void *hdr;
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+			  NLM_F_MULTI, BATADV_CMD_GET_DAT_CACHE);
+	if (!hdr)
+		return -ENOBUFS;
+
+	msecs = jiffies_to_msecs(jiffies - dat_entry->last_update);
+
+	if (nla_put_in_addr(msg, BATADV_ATTR_DAT_CACHE_IP4ADDRESS,
+			    dat_entry->ip) ||
+	    nla_put(msg, BATADV_ATTR_DAT_CACHE_HWADDRESS, ETH_ALEN,
+		    dat_entry->mac_addr) ||
+	    nla_put_u16(msg, BATADV_ATTR_DAT_CACHE_VID, dat_entry->vid) ||
+	    nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, msecs)) {
+		genlmsg_cancel(msg, hdr);
+		return -EMSGSIZE;
+	}
+
+	genlmsg_end(msg, hdr);
+	return 0;
+}
+
+/**
+ * batadv_dat_cache_dump_bucket() - dump one bucket of the DAT cache table to
+ *  a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @head: bucket to dump
+ * @idx_skip: How many entries to skip
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_dat_cache_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+			     struct hlist_head *head, int *idx_skip)
+{
+	struct batadv_dat_entry *dat_entry;
+	int idx = 0;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(dat_entry, head, hash_entry) {
+		if (idx < *idx_skip)
+			goto skip;
+
+		if (batadv_dat_cache_dump_entry(msg, portid, seq,
+						dat_entry)) {
+			rcu_read_unlock();
+			*idx_skip = idx;
+
+			return -EMSGSIZE;
+		}
+
+skip:
+		idx++;
+	}
+	rcu_read_unlock();
+
+	return 0;
+}
+
+/**
+ * batadv_dat_cache_dump() - dump DAT cache table to a netlink socket
+ * @msg: buffer for the message
+ * @cb: callback structure containing arguments
+ *
+ * Return: message length.
+ */
+int batadv_dat_cache_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	struct batadv_hard_iface *primary_if = NULL;
+	int portid = NETLINK_CB(cb->skb).portid;
+	struct net *net = sock_net(cb->skb->sk);
+	struct net_device *soft_iface;
+	struct batadv_hashtable *hash;
+	struct batadv_priv *bat_priv;
+	int bucket = cb->args[0];
+	struct hlist_head *head;
+	int idx = cb->args[1];
+	int ifindex;
+	int ret = 0;
+
+	ifindex = batadv_netlink_get_ifindex(cb->nlh,
+					     BATADV_ATTR_MESH_IFINDEX);
+	if (!ifindex)
+		return -EINVAL;
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	bat_priv = netdev_priv(soft_iface);
+	hash = bat_priv->dat.hash;
+
+	primary_if = batadv_primary_if_get_selected(bat_priv);
+	if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	while (bucket < hash->size) {
+		head = &hash->table[bucket];
+
+		if (batadv_dat_cache_dump_bucket(msg, portid,
+						 cb->nlh->nlmsg_seq, head,
+						 &idx))
+			break;
+
+		bucket++;
+		idx = 0;
+	}
+
+	cb->args[0] = bucket;
+	cb->args[1] = idx;
+
+	ret = msg->len;
+
+out:
+	if (primary_if)
+		batadv_hardif_put(primary_if);
+
+	if (soft_iface)
+		dev_put(soft_iface);
+
+	return ret;
+}
+
+/**
  * batadv_arp_get_type() - parse an ARP packet and gets the type
  * @bat_priv: the bat priv with all the soft interface information
  * @skb: packet to analyse
diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h
index 12897eb46268..a04596028337 100644
--- a/net/batman-adv/distributed-arp-table.h
+++ b/net/batman-adv/distributed-arp-table.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2011-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018  B.A.T.M.A.N. contributors:
  *
  * Antonio Quartulli
  *
@@ -28,6 +28,7 @@
 
 #include "originator.h"
 
+struct netlink_callback;
 struct seq_file;
 struct sk_buff;
 
@@ -81,6 +82,7 @@ batadv_dat_init_own_addr(struct batadv_priv *bat_priv,
 int batadv_dat_init(struct batadv_priv *bat_priv);
 void batadv_dat_free(struct batadv_priv *bat_priv);
 int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset);
+int batadv_dat_cache_dump(struct sk_buff *msg, struct netlink_callback *cb);
 
 /**
  * batadv_dat_inc_counter() - increment the correct DAT packet counter
@@ -169,6 +171,12 @@ static inline void batadv_dat_free(struct batadv_priv *bat_priv)
 {
 }
 
+static inline int
+batadv_dat_cache_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline void batadv_dat_inc_counter(struct batadv_priv *bat_priv,
 					  u8 subtype)
 {
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 5afe641ee4b0..0fddc17106bd 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2013-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2018  B.A.T.M.A.N. contributors:
  *
  * Martin Hundebøll <martin@hundeboll.net>
  *
diff --git a/net/batman-adv/fragmentation.h b/net/batman-adv/fragmentation.h
index 138b22a1836a..944512e07782 100644
--- a/net/batman-adv/fragmentation.h
+++ b/net/batman-adv/fragmentation.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2013-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2018  B.A.T.M.A.N. contributors:
  *
  * Martin Hundebøll <martin@hundeboll.net>
  *
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 37fe9a644f22..c294f6fd43e0 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2009-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h
index 981f58421a32..f0b86fcb2493 100644
--- a/net/batman-adv/gateway_client.h
+++ b/net/batman-adv/gateway_client.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2009-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index b3e156af2256..936c107f3199 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2009-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h
index afebd9c7edf4..80afb2793687 100644
--- a/net/batman-adv/gateway_common.h
+++ b/net/batman-adv/gateway_common.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2009-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 68b54a39c51d..c405d15befd6 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index de5e9a374ece..d1c0f6189301 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c
index 04d964358c98..7b49e4001778 100644
--- a/net/batman-adv/hash.c
+++ b/net/batman-adv/hash.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2006-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2018  B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich, Marek Lindner
  *
diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h
index 4ce1b6d3ad5c..9490a7ca2ba6 100644
--- a/net/batman-adv/hash.h
+++ b/net/batman-adv/hash.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2006-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2018  B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich, Marek Lindner
  *
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index 5daa3d50da17..55c358ad3331 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h
index 84cddd01eeab..958be22beda9 100644
--- a/net/batman-adv/icmp_socket.h
+++ b/net/batman-adv/icmp_socket.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c
index cdbe0e5e208b..853773e45f79 100644
--- a/net/batman-adv/log.c
+++ b/net/batman-adv/log.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2010-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/log.h b/net/batman-adv/log.h
index 35e02b2b9e72..35f4f397ed57 100644
--- a/net/batman-adv/log.h
+++ b/net/batman-adv/log.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index d31c8266e244..69c0d85bceb3 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index f7ba3f96d8f3..057a28a9fe88 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -25,7 +25,7 @@
 #define BATADV_DRIVER_DEVICE "batman-adv"
 
 #ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2018.0"
+#define BATADV_SOURCE_VERSION "2018.1"
 #endif
 
 /* B.A.T.M.A.N. parameters */
@@ -331,11 +331,13 @@ static inline bool batadv_has_timed_out(unsigned long timestamp,
  *
  * Return: true when x is a predecessor of y, false otherwise
  */
-#define batadv_seq_before(x, y) ({typeof(x)_d1 = (x); \
-				 typeof(y)_d2 = (y); \
-				 typeof(x)_dummy = (_d1 - _d2); \
-				 (void)(&_d1 == &_d2); \
-				 _dummy > batadv_smallest_signed_int(_dummy); })
+#define batadv_seq_before(x, y) ({ \
+	typeof(x)_d1 = (x); \
+	typeof(y)_d2 = (y); \
+	typeof(x)_dummy = (_d1 - _d2); \
+	(void)(&_d1 == &_d2); \
+	_dummy > batadv_smallest_signed_int(_dummy); \
+})
 
 /**
  * batadv_seq_after() - Checks if a sequence number x is a successor of y
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index d70640135e3a..de3a055f7dd8 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2014-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2014-2018  B.A.T.M.A.N. contributors:
  *
  * Linus Lüssing
  *
@@ -40,6 +40,7 @@
 #include <linux/list.h>
 #include <linux/lockdep.h>
 #include <linux/netdevice.h>
+#include <linux/netlink.h>
 #include <linux/printk.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
@@ -52,14 +53,20 @@
 #include <linux/types.h>
 #include <linux/workqueue.h>
 #include <net/addrconf.h>
+#include <net/genetlink.h>
 #include <net/if_inet6.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
+#include <net/netlink.h>
+#include <net/sock.h>
 #include <uapi/linux/batadv_packet.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "hard-interface.h"
 #include "hash.h"
 #include "log.h"
+#include "netlink.h"
+#include "soft-interface.h"
 #include "translation-table.h"
 #include "tvlv.h"
 
@@ -102,7 +109,36 @@ static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface)
 }
 
 /**
+ * batadv_mcast_addr_is_ipv4() - check if multicast MAC is IPv4
+ * @addr: the MAC address to check
+ *
+ * Return: True, if MAC address is one reserved for IPv4 multicast, false
+ * otherwise.
+ */
+static bool batadv_mcast_addr_is_ipv4(const u8 *addr)
+{
+	static const u8 prefix[] = {0x01, 0x00, 0x5E};
+
+	return memcmp(prefix, addr, sizeof(prefix)) == 0;
+}
+
+/**
+ * batadv_mcast_addr_is_ipv6() - check if multicast MAC is IPv6
+ * @addr: the MAC address to check
+ *
+ * Return: True, if MAC address is one reserved for IPv6 multicast, false
+ * otherwise.
+ */
+static bool batadv_mcast_addr_is_ipv6(const u8 *addr)
+{
+	static const u8 prefix[] = {0x33, 0x33};
+
+	return memcmp(prefix, addr, sizeof(prefix)) == 0;
+}
+
+/**
  * batadv_mcast_mla_softif_get() - get softif multicast listeners
+ * @bat_priv: the bat priv with all the soft interface information
  * @dev: the device to collect multicast addresses from
  * @mcast_list: a list to put found addresses into
  *
@@ -119,9 +155,12 @@ static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface)
  * Return: -ENOMEM on memory allocation error or the number of
  * items added to the mcast_list otherwise.
  */
-static int batadv_mcast_mla_softif_get(struct net_device *dev,
+static int batadv_mcast_mla_softif_get(struct batadv_priv *bat_priv,
+				       struct net_device *dev,
 				       struct hlist_head *mcast_list)
 {
+	bool all_ipv4 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV4;
+	bool all_ipv6 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV6;
 	struct net_device *bridge = batadv_mcast_get_bridge(dev);
 	struct netdev_hw_addr *mc_list_entry;
 	struct batadv_hw_addr *new;
@@ -129,6 +168,12 @@ static int batadv_mcast_mla_softif_get(struct net_device *dev,
 
 	netif_addr_lock_bh(bridge ? bridge : dev);
 	netdev_for_each_mc_addr(mc_list_entry, bridge ? bridge : dev) {
+		if (all_ipv4 && batadv_mcast_addr_is_ipv4(mc_list_entry->addr))
+			continue;
+
+		if (all_ipv6 && batadv_mcast_addr_is_ipv6(mc_list_entry->addr))
+			continue;
+
 		new = kmalloc(sizeof(*new), GFP_ATOMIC);
 		if (!new) {
 			ret = -ENOMEM;
@@ -193,6 +238,7 @@ static void batadv_mcast_mla_br_addr_cpy(char *dst, const struct br_ip *src)
 
 /**
  * batadv_mcast_mla_bridge_get() - get bridged-in multicast listeners
+ * @bat_priv: the bat priv with all the soft interface information
  * @dev: a bridge slave whose bridge to collect multicast addresses from
  * @mcast_list: a list to put found addresses into
  *
@@ -204,10 +250,13 @@ static void batadv_mcast_mla_br_addr_cpy(char *dst, const struct br_ip *src)
  * Return: -ENOMEM on memory allocation error or the number of
  * items added to the mcast_list otherwise.
  */
-static int batadv_mcast_mla_bridge_get(struct net_device *dev,
+static int batadv_mcast_mla_bridge_get(struct batadv_priv *bat_priv,
+				       struct net_device *dev,
 				       struct hlist_head *mcast_list)
 {
 	struct list_head bridge_mcast_list = LIST_HEAD_INIT(bridge_mcast_list);
+	bool all_ipv4 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV4;
+	bool all_ipv6 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV6;
 	struct br_ip_list *br_ip_entry, *tmp;
 	struct batadv_hw_addr *new;
 	u8 mcast_addr[ETH_ALEN];
@@ -221,6 +270,12 @@ static int batadv_mcast_mla_bridge_get(struct net_device *dev,
 		goto out;
 
 	list_for_each_entry(br_ip_entry, &bridge_mcast_list, list) {
+		if (all_ipv4 && br_ip_entry->addr.proto == htons(ETH_P_IP))
+			continue;
+
+		if (all_ipv6 && br_ip_entry->addr.proto == htons(ETH_P_IPV6))
+			continue;
+
 		batadv_mcast_mla_br_addr_cpy(mcast_addr, &br_ip_entry->addr);
 		if (batadv_mcast_mla_is_duplicate(mcast_addr, mcast_list))
 			continue;
@@ -568,11 +623,11 @@ static void __batadv_mcast_mla_update(struct batadv_priv *bat_priv)
 	if (!batadv_mcast_mla_tvlv_update(bat_priv))
 		goto update;
 
-	ret = batadv_mcast_mla_softif_get(soft_iface, &mcast_list);
+	ret = batadv_mcast_mla_softif_get(bat_priv, soft_iface, &mcast_list);
 	if (ret < 0)
 		goto out;
 
-	ret = batadv_mcast_mla_bridge_get(soft_iface, &mcast_list);
+	ret = batadv_mcast_mla_bridge_get(bat_priv, soft_iface, &mcast_list);
 	if (ret < 0)
 		goto out;
 
@@ -1286,6 +1341,236 @@ int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset)
 #endif
 
 /**
+ * batadv_mcast_mesh_info_put() - put multicast info into a netlink message
+ * @msg: buffer for the message
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Return: 0 or error code.
+ */
+int batadv_mcast_mesh_info_put(struct sk_buff *msg,
+			       struct batadv_priv *bat_priv)
+{
+	u32 flags = bat_priv->mcast.flags;
+	u32 flags_priv = BATADV_NO_FLAGS;
+
+	if (bat_priv->mcast.bridged) {
+		flags_priv |= BATADV_MCAST_FLAGS_BRIDGED;
+
+		if (bat_priv->mcast.querier_ipv4.exists)
+			flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV4_EXISTS;
+		if (bat_priv->mcast.querier_ipv6.exists)
+			flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV6_EXISTS;
+		if (bat_priv->mcast.querier_ipv4.shadowing)
+			flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV4_SHADOWING;
+		if (bat_priv->mcast.querier_ipv6.shadowing)
+			flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV6_SHADOWING;
+	}
+
+	if (nla_put_u32(msg, BATADV_ATTR_MCAST_FLAGS, flags) ||
+	    nla_put_u32(msg, BATADV_ATTR_MCAST_FLAGS_PRIV, flags_priv))
+		return -EMSGSIZE;
+
+	return 0;
+}
+
+/**
+ * batadv_mcast_flags_dump_entry() - dump one entry of the multicast flags table
+ *  to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @orig_node: originator to dump the multicast flags of
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_mcast_flags_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+			      struct batadv_orig_node *orig_node)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+			  NLM_F_MULTI, BATADV_CMD_GET_MCAST_FLAGS);
+	if (!hdr)
+		return -ENOBUFS;
+
+	if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN,
+		    orig_node->orig)) {
+		genlmsg_cancel(msg, hdr);
+		return -EMSGSIZE;
+	}
+
+	if (test_bit(BATADV_ORIG_CAPA_HAS_MCAST,
+		     &orig_node->capabilities)) {
+		if (nla_put_u32(msg, BATADV_ATTR_MCAST_FLAGS,
+				orig_node->mcast_flags)) {
+			genlmsg_cancel(msg, hdr);
+			return -EMSGSIZE;
+		}
+	}
+
+	genlmsg_end(msg, hdr);
+	return 0;
+}
+
+/**
+ * batadv_mcast_flags_dump_bucket() - dump one bucket of the multicast flags
+ *  table to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @head: bucket to dump
+ * @idx_skip: How many entries to skip
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_mcast_flags_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+			       struct hlist_head *head, long *idx_skip)
+{
+	struct batadv_orig_node *orig_node;
+	long idx = 0;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
+		if (!test_bit(BATADV_ORIG_CAPA_HAS_MCAST,
+			      &orig_node->capa_initialized))
+			continue;
+
+		if (idx < *idx_skip)
+			goto skip;
+
+		if (batadv_mcast_flags_dump_entry(msg, portid, seq,
+						  orig_node)) {
+			rcu_read_unlock();
+			*idx_skip = idx;
+
+			return -EMSGSIZE;
+		}
+
+skip:
+		idx++;
+	}
+	rcu_read_unlock();
+
+	return 0;
+}
+
+/**
+ * __batadv_mcast_flags_dump() - dump multicast flags table to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @bat_priv: the bat priv with all the soft interface information
+ * @bucket: current bucket to dump
+ * @idx: index in current bucket to the next entry to dump
+ *
+ * Return: 0 or error code.
+ */
+static int
+__batadv_mcast_flags_dump(struct sk_buff *msg, u32 portid, u32 seq,
+			  struct batadv_priv *bat_priv, long *bucket, long *idx)
+{
+	struct batadv_hashtable *hash = bat_priv->orig_hash;
+	long bucket_tmp = *bucket;
+	struct hlist_head *head;
+	long idx_tmp = *idx;
+
+	while (bucket_tmp < hash->size) {
+		head = &hash->table[bucket_tmp];
+
+		if (batadv_mcast_flags_dump_bucket(msg, portid, seq, head,
+						   &idx_tmp))
+			break;
+
+		bucket_tmp++;
+		idx_tmp = 0;
+	}
+
+	*bucket = bucket_tmp;
+	*idx = idx_tmp;
+
+	return msg->len;
+}
+
+/**
+ * batadv_mcast_netlink_get_primary() - get primary interface from netlink
+ *  callback
+ * @cb: netlink callback structure
+ * @primary_if: the primary interface pointer to return the result in
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_mcast_netlink_get_primary(struct netlink_callback *cb,
+				 struct batadv_hard_iface **primary_if)
+{
+	struct batadv_hard_iface *hard_iface = NULL;
+	struct net *net = sock_net(cb->skb->sk);
+	struct net_device *soft_iface;
+	struct batadv_priv *bat_priv;
+	int ifindex;
+	int ret = 0;
+
+	ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX);
+	if (!ifindex)
+		return -EINVAL;
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	bat_priv = netdev_priv(soft_iface);
+
+	hard_iface = batadv_primary_if_get_selected(bat_priv);
+	if (!hard_iface || hard_iface->if_status != BATADV_IF_ACTIVE) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+out:
+	if (soft_iface)
+		dev_put(soft_iface);
+
+	if (!ret && primary_if)
+		*primary_if = hard_iface;
+	else
+		batadv_hardif_put(hard_iface);
+
+	return ret;
+}
+
+/**
+ * batadv_mcast_flags_dump() - dump multicast flags table to a netlink socket
+ * @msg: buffer for the message
+ * @cb: callback structure containing arguments
+ *
+ * Return: message length.
+ */
+int batadv_mcast_flags_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	struct batadv_hard_iface *primary_if = NULL;
+	int portid = NETLINK_CB(cb->skb).portid;
+	struct batadv_priv *bat_priv;
+	long *bucket = &cb->args[0];
+	long *idx = &cb->args[1];
+	int ret;
+
+	ret = batadv_mcast_netlink_get_primary(cb, &primary_if);
+	if (ret)
+		return ret;
+
+	bat_priv = netdev_priv(primary_if->soft_iface);
+	ret = __batadv_mcast_flags_dump(msg, portid, cb->nlh->nlmsg_seq,
+					bat_priv, bucket, idx);
+
+	batadv_hardif_put(primary_if);
+	return ret;
+}
+
+/**
  * batadv_mcast_free() - free the multicast optimizations structures
  * @bat_priv: the bat priv with all the soft interface information
  */
diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h
index 3ac06337ab71..3b04ab13f0eb 100644
--- a/net/batman-adv/multicast.h
+++ b/net/batman-adv/multicast.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2014-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2014-2018  B.A.T.M.A.N. contributors:
  *
  * Linus Lüssing
  *
@@ -21,6 +21,7 @@
 
 #include "main.h"
 
+struct netlink_callback;
 struct seq_file;
 struct sk_buff;
 
@@ -54,6 +55,11 @@ void batadv_mcast_init(struct batadv_priv *bat_priv);
 
 int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset);
 
+int batadv_mcast_mesh_info_put(struct sk_buff *msg,
+			       struct batadv_priv *bat_priv);
+
+int batadv_mcast_flags_dump(struct sk_buff *msg, struct netlink_callback *cb);
+
 void batadv_mcast_free(struct batadv_priv *bat_priv);
 
 void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node);
@@ -72,6 +78,18 @@ static inline int batadv_mcast_init(struct batadv_priv *bat_priv)
 	return 0;
 }
 
+static inline int
+batadv_mcast_mesh_info_put(struct sk_buff *msg, struct batadv_priv *bat_priv)
+{
+	return 0;
+}
+
+static inline int batadv_mcast_flags_dump(struct sk_buff *msg,
+					  struct netlink_callback *cb)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline void batadv_mcast_free(struct batadv_priv *bat_priv)
 {
 }
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index a823d3899bad..0d9459b69bdb 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2016-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2016-2018  B.A.T.M.A.N. contributors:
  *
  * Matthias Schiffer
  *
@@ -45,8 +45,10 @@
 
 #include "bat_algo.h"
 #include "bridge_loop_avoidance.h"
+#include "distributed-arp-table.h"
 #include "gateway_client.h"
 #include "hard-interface.h"
+#include "multicast.h"
 #include "originator.h"
 #include "soft-interface.h"
 #include "tp_meter.h"
@@ -64,39 +66,44 @@ static const struct genl_multicast_group batadv_netlink_mcgrps[] = {
 };
 
 static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
-	[BATADV_ATTR_VERSION]		= { .type = NLA_STRING },
-	[BATADV_ATTR_ALGO_NAME]		= { .type = NLA_STRING },
-	[BATADV_ATTR_MESH_IFINDEX]	= { .type = NLA_U32 },
-	[BATADV_ATTR_MESH_IFNAME]	= { .type = NLA_STRING },
-	[BATADV_ATTR_MESH_ADDRESS]	= { .len = ETH_ALEN },
-	[BATADV_ATTR_HARD_IFINDEX]	= { .type = NLA_U32 },
-	[BATADV_ATTR_HARD_IFNAME]	= { .type = NLA_STRING },
-	[BATADV_ATTR_HARD_ADDRESS]	= { .len = ETH_ALEN },
-	[BATADV_ATTR_ORIG_ADDRESS]	= { .len = ETH_ALEN },
-	[BATADV_ATTR_TPMETER_RESULT]	= { .type = NLA_U8 },
-	[BATADV_ATTR_TPMETER_TEST_TIME]	= { .type = NLA_U32 },
-	[BATADV_ATTR_TPMETER_BYTES]	= { .type = NLA_U64 },
-	[BATADV_ATTR_TPMETER_COOKIE]	= { .type = NLA_U32 },
-	[BATADV_ATTR_ACTIVE]		= { .type = NLA_FLAG },
-	[BATADV_ATTR_TT_ADDRESS]	= { .len = ETH_ALEN },
-	[BATADV_ATTR_TT_TTVN]		= { .type = NLA_U8 },
-	[BATADV_ATTR_TT_LAST_TTVN]	= { .type = NLA_U8 },
-	[BATADV_ATTR_TT_CRC32]		= { .type = NLA_U32 },
-	[BATADV_ATTR_TT_VID]		= { .type = NLA_U16 },
-	[BATADV_ATTR_TT_FLAGS]		= { .type = NLA_U32 },
-	[BATADV_ATTR_FLAG_BEST]		= { .type = NLA_FLAG },
-	[BATADV_ATTR_LAST_SEEN_MSECS]	= { .type = NLA_U32 },
-	[BATADV_ATTR_NEIGH_ADDRESS]	= { .len = ETH_ALEN },
-	[BATADV_ATTR_TQ]		= { .type = NLA_U8 },
-	[BATADV_ATTR_THROUGHPUT]	= { .type = NLA_U32 },
-	[BATADV_ATTR_BANDWIDTH_UP]	= { .type = NLA_U32 },
-	[BATADV_ATTR_BANDWIDTH_DOWN]	= { .type = NLA_U32 },
-	[BATADV_ATTR_ROUTER]		= { .len = ETH_ALEN },
-	[BATADV_ATTR_BLA_OWN]		= { .type = NLA_FLAG },
-	[BATADV_ATTR_BLA_ADDRESS]	= { .len = ETH_ALEN },
-	[BATADV_ATTR_BLA_VID]		= { .type = NLA_U16 },
-	[BATADV_ATTR_BLA_BACKBONE]	= { .len = ETH_ALEN },
-	[BATADV_ATTR_BLA_CRC]		= { .type = NLA_U16 },
+	[BATADV_ATTR_VERSION]			= { .type = NLA_STRING },
+	[BATADV_ATTR_ALGO_NAME]			= { .type = NLA_STRING },
+	[BATADV_ATTR_MESH_IFINDEX]		= { .type = NLA_U32 },
+	[BATADV_ATTR_MESH_IFNAME]		= { .type = NLA_STRING },
+	[BATADV_ATTR_MESH_ADDRESS]		= { .len = ETH_ALEN },
+	[BATADV_ATTR_HARD_IFINDEX]		= { .type = NLA_U32 },
+	[BATADV_ATTR_HARD_IFNAME]		= { .type = NLA_STRING },
+	[BATADV_ATTR_HARD_ADDRESS]		= { .len = ETH_ALEN },
+	[BATADV_ATTR_ORIG_ADDRESS]		= { .len = ETH_ALEN },
+	[BATADV_ATTR_TPMETER_RESULT]		= { .type = NLA_U8 },
+	[BATADV_ATTR_TPMETER_TEST_TIME]		= { .type = NLA_U32 },
+	[BATADV_ATTR_TPMETER_BYTES]		= { .type = NLA_U64 },
+	[BATADV_ATTR_TPMETER_COOKIE]		= { .type = NLA_U32 },
+	[BATADV_ATTR_ACTIVE]			= { .type = NLA_FLAG },
+	[BATADV_ATTR_TT_ADDRESS]		= { .len = ETH_ALEN },
+	[BATADV_ATTR_TT_TTVN]			= { .type = NLA_U8 },
+	[BATADV_ATTR_TT_LAST_TTVN]		= { .type = NLA_U8 },
+	[BATADV_ATTR_TT_CRC32]			= { .type = NLA_U32 },
+	[BATADV_ATTR_TT_VID]			= { .type = NLA_U16 },
+	[BATADV_ATTR_TT_FLAGS]			= { .type = NLA_U32 },
+	[BATADV_ATTR_FLAG_BEST]			= { .type = NLA_FLAG },
+	[BATADV_ATTR_LAST_SEEN_MSECS]		= { .type = NLA_U32 },
+	[BATADV_ATTR_NEIGH_ADDRESS]		= { .len = ETH_ALEN },
+	[BATADV_ATTR_TQ]			= { .type = NLA_U8 },
+	[BATADV_ATTR_THROUGHPUT]		= { .type = NLA_U32 },
+	[BATADV_ATTR_BANDWIDTH_UP]		= { .type = NLA_U32 },
+	[BATADV_ATTR_BANDWIDTH_DOWN]		= { .type = NLA_U32 },
+	[BATADV_ATTR_ROUTER]			= { .len = ETH_ALEN },
+	[BATADV_ATTR_BLA_OWN]			= { .type = NLA_FLAG },
+	[BATADV_ATTR_BLA_ADDRESS]		= { .len = ETH_ALEN },
+	[BATADV_ATTR_BLA_VID]			= { .type = NLA_U16 },
+	[BATADV_ATTR_BLA_BACKBONE]		= { .len = ETH_ALEN },
+	[BATADV_ATTR_BLA_CRC]			= { .type = NLA_U16 },
+	[BATADV_ATTR_DAT_CACHE_IP4ADDRESS]	= { .type = NLA_U32 },
+	[BATADV_ATTR_DAT_CACHE_HWADDRESS]	= { .len = ETH_ALEN },
+	[BATADV_ATTR_DAT_CACHE_VID]		= { .type = NLA_U16 },
+	[BATADV_ATTR_MCAST_FLAGS]		= { .type = NLA_U32 },
+	[BATADV_ATTR_MCAST_FLAGS_PRIV]		= { .type = NLA_U32 },
 };
 
 /**
@@ -147,6 +154,9 @@ batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface)
 		goto out;
 #endif
 
+	if (batadv_mcast_mesh_info_put(msg, bat_priv))
+		goto out;
+
 	primary_if = batadv_primary_if_get_selected(bat_priv);
 	if (primary_if && primary_if->if_status == BATADV_IF_ACTIVE) {
 		hard_iface = primary_if->net_dev;
@@ -604,6 +614,18 @@ static const struct genl_ops batadv_netlink_ops[] = {
 		.policy = batadv_netlink_policy,
 		.dumpit = batadv_bla_backbone_dump,
 	},
+	{
+		.cmd = BATADV_CMD_GET_DAT_CACHE,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.dumpit = batadv_dat_cache_dump,
+	},
+	{
+		.cmd = BATADV_CMD_GET_MCAST_FLAGS,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.dumpit = batadv_mcast_flags_dump,
+	},
 
 };
 
diff --git a/net/batman-adv/netlink.h b/net/batman-adv/netlink.h
index 0e7e57b69b54..571d9a5ae7aa 100644
--- a/net/batman-adv/netlink.h
+++ b/net/batman-adv/netlink.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2016-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2016-2018  B.A.T.M.A.N. contributors:
  *
  * Matthias Schiffer
  *
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index b48116bb24ef..c3578444f3cb 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2012-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2012-2018  B.A.T.M.A.N. contributors:
  *
  * Martin Hundebøll, Jeppe Ledet-Pedersen
  *
diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h
index adaeafa4f71e..65c346812bc1 100644
--- a/net/batman-adv/network-coding.h
+++ b/net/batman-adv/network-coding.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2012-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2012-2018  B.A.T.M.A.N. contributors:
  *
  * Martin Hundebøll, Jeppe Ledet-Pedersen
  *
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 74782426bb77..716e5b43acfa 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2009-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index 15d896b2de6f..3b3f59b881e1 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index e61dc1293bb5..cc3ed93a6d51 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h
index a1289bc5f115..db54c2d9b8bf 100644
--- a/net/batman-adv/routing.h
+++ b/net/batman-adv/routing.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 2a5ab6f1076d..4a35f5c2f52b 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index 1e8c79093623..64cce07b8fe6 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 367a81fb785f..edeffcb9f3a2 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h
index 075c5b5b2ce1..daf87f07fadd 100644
--- a/net/batman-adv/soft-interface.h
+++ b/net/batman-adv/soft-interface.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index c1578fa0b952..f2eef43bd2ec 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2010-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/sysfs.h b/net/batman-adv/sysfs.h
index bbeee61221fa..c1e3fb69952d 100644
--- a/net/batman-adv/sysfs.h
+++ b/net/batman-adv/sysfs.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2010-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index 8b576712d0c1..11520de96ccb 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2012-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2012-2018  B.A.T.M.A.N. contributors:
  *
  * Edo Monticelli, Antonio Quartulli
  *
diff --git a/net/batman-adv/tp_meter.h b/net/batman-adv/tp_meter.h
index c8b8f2cb2c2b..68e600974759 100644
--- a/net/batman-adv/tp_meter.h
+++ b/net/batman-adv/tp_meter.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2012-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2012-2018  B.A.T.M.A.N. contributors:
  *
  * Edo Monticelli, Antonio Quartulli
  *
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 7550a9ccd695..0225616d5771 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich, Antonio Quartulli
  *
diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index 8d9e3abec2c8..01b6c8eafaf9 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich, Antonio Quartulli
  *
diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c
index 5ffcb45ac6ff..a637458205d1 100644
--- a/net/batman-adv/tvlv.c
+++ b/net/batman-adv/tvlv.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/tvlv.h b/net/batman-adv/tvlv.h
index a74df33f446d..ef5867f49824 100644
--- a/net/batman-adv/tvlv.h
+++ b/net/batman-adv/tvlv.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index a5aa6d61f4e2..476b052ad982 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 3394e6791673..66c0781773df 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -934,8 +934,8 @@ static bool is_advertising_allowed(struct hci_dev *hdev, bool connectable)
 		/* Slave connection state and connectable mode bit 38
 		 * and scannable bit 21.
 		 */
-		if (connectable && (!(hdev->le_states[4] & 0x01) ||
-				    !(hdev->le_states[2] & 0x40)))
+		if (connectable && (!(hdev->le_states[4] & 0x40) ||
+				    !(hdev->le_states[2] & 0x20)))
 			return false;
 	}
 
@@ -948,7 +948,7 @@ static bool is_advertising_allowed(struct hci_dev *hdev, bool connectable)
 		/* Master connection state and connectable mode bit 35 and
 		 * scannable 19.
 		 */
-		if (connectable && (!(hdev->le_states[4] & 0x10) ||
+		if (connectable && (!(hdev->le_states[4] & 0x08) ||
 				    !(hdev->le_states[2] & 0x08)))
 			return false;
 	}
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 923e9a271872..1506e1632394 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -1340,7 +1340,7 @@ done:
 }
 
 static int hci_sock_getname(struct socket *sock, struct sockaddr *addr,
-			    int *addr_len, int peer)
+			    int peer)
 {
 	struct sockaddr_hci *haddr = (struct sockaddr_hci *)addr;
 	struct sock *sk = sock->sk;
@@ -1360,10 +1360,10 @@ static int hci_sock_getname(struct socket *sock, struct sockaddr *addr,
 		goto done;
 	}
 
-	*addr_len = sizeof(*haddr);
 	haddr->hci_family = AF_BLUETOOTH;
 	haddr->hci_dev    = hdev->id;
 	haddr->hci_channel= hci_pi(sk)->channel;
+	err = sizeof(*haddr);
 
 done:
 	release_sock(sk);
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 67a8642f57ea..686bdc6b35b0 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -358,7 +358,7 @@ done:
 }
 
 static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr,
-			      int *len, int peer)
+			      int peer)
 {
 	struct sockaddr_l2 *la = (struct sockaddr_l2 *) addr;
 	struct sock *sk = sock->sk;
@@ -373,7 +373,6 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr,
 
 	memset(la, 0, sizeof(struct sockaddr_l2));
 	addr->sa_family = AF_BLUETOOTH;
-	*len = sizeof(struct sockaddr_l2);
 
 	la->l2_psm = chan->psm;
 
@@ -387,7 +386,7 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr,
 		la->l2_bdaddr_type = chan->src_type;
 	}
 
-	return 0;
+	return sizeof(struct sockaddr_l2);
 }
 
 static int l2cap_sock_getsockopt_old(struct socket *sock, int optname,
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 1aaccf637479..93a3b219db09 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -533,7 +533,7 @@ done:
 	return err;
 }
 
-static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int *len, int peer)
+static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int peer)
 {
 	struct sockaddr_rc *sa = (struct sockaddr_rc *) addr;
 	struct sock *sk = sock->sk;
@@ -552,8 +552,7 @@ static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int *
 	else
 		bacpy(&sa->rc_bdaddr, &rfcomm_pi(sk)->src);
 
-	*len = sizeof(struct sockaddr_rc);
-	return 0;
+	return sizeof(struct sockaddr_rc);
 }
 
 static int rfcomm_sock_sendmsg(struct socket *sock, struct msghdr *msg,
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 08df57665e1f..413b8ee49fec 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -680,7 +680,7 @@ done:
 }
 
 static int sco_sock_getname(struct socket *sock, struct sockaddr *addr,
-			    int *len, int peer)
+			    int peer)
 {
 	struct sockaddr_sco *sa = (struct sockaddr_sco *) addr;
 	struct sock *sk = sock->sk;
@@ -688,14 +688,13 @@ static int sco_sock_getname(struct socket *sock, struct sockaddr *addr,
 	BT_DBG("sock %p, sk %p", sock, sk);
 
 	addr->sa_family = AF_BLUETOOTH;
-	*len = sizeof(struct sockaddr_sco);
 
 	if (peer)
 		bacpy(&sa->sco_bdaddr, &sco_pi(sk)->dst);
 	else
 		bacpy(&sa->sco_bdaddr, &sco_pi(sk)->src);
 
-	return 0;
+	return sizeof(struct sockaddr_sco);
 }
 
 static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg,
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 6bf06e756df2..7770481a6506 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -188,6 +188,7 @@ static void __net_exit br_net_exit(struct net *net)
 
 static struct pernet_operations br_net_ops = {
 	.exit	= br_net_exit,
+	.async	= true,
 };
 
 static const struct stp_proto br_stp_proto = {
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 9b16eaf33819..c2120eb889a9 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -969,6 +969,7 @@ static struct pernet_operations brnf_net_ops __read_mostly = {
 	.exit = brnf_exit_net,
 	.id   = &brnf_net_id,
 	.size = sizeof(struct brnf_net),
+	.async = true,
 };
 
 static struct notifier_block brnf_notifier __read_mostly = {
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index 276b60262981..f070b5e5b9dd 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -77,6 +77,7 @@ static void __net_exit broute_net_exit(struct net *net)
 static struct pernet_operations broute_net_ops = {
 	.init = broute_net_init,
 	.exit = broute_net_exit,
+	.async = true,
 };
 
 static int __init ebtable_broute_init(void)
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index c41da5fac84f..4151afc8efcc 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -105,6 +105,7 @@ static void __net_exit frame_filter_net_exit(struct net *net)
 static struct pernet_operations frame_filter_net_ops = {
 	.init = frame_filter_net_init,
 	.exit = frame_filter_net_exit,
+	.async = true,
 };
 
 static int __init ebtable_filter_init(void)
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 08df7406ecb3..b8da2dfe2ec5 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -105,6 +105,7 @@ static void __net_exit frame_nat_net_exit(struct net *net)
 static struct pernet_operations frame_nat_net_ops = {
 	.init = frame_nat_net_init,
 	.exit = frame_nat_net_exit,
+	.async = true,
 };
 
 static int __init ebtable_nat_init(void)
diff --git a/net/bridge/netfilter/nf_log_bridge.c b/net/bridge/netfilter/nf_log_bridge.c
index bd2b3c78f59b..91bfc2ac055a 100644
--- a/net/bridge/netfilter/nf_log_bridge.c
+++ b/net/bridge/netfilter/nf_log_bridge.c
@@ -48,6 +48,7 @@ static void __net_exit nf_log_bridge_net_exit(struct net *net)
 static struct pernet_operations nf_log_bridge_net_ops = {
 	.init = nf_log_bridge_net_init,
 	.exit = nf_log_bridge_net_exit,
+	.async = true,
 };
 
 static int __init nf_log_bridge_init(void)
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index e0adcd123f48..7a78268cc572 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -544,6 +544,7 @@ static struct pernet_operations caif_net_ops = {
 	.exit = caif_exit_net,
 	.id   = &caif_net_id,
 	.size = sizeof(struct caif_net),
+	.async = true,
 };
 
 /* Initialize Caif devices list */
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 6da324550eec..e899970398a1 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -954,6 +954,7 @@ static struct notifier_block can_netdev_notifier __read_mostly = {
 static struct pernet_operations can_pernet_ops __read_mostly = {
 	.init = can_pernet_init,
 	.exit = can_pernet_exit,
+	.async = true,
 };
 
 static __init int can_init(void)
diff --git a/net/can/bcm.c b/net/can/bcm.c
index ac5e5e34fee3..26730d39e048 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1717,6 +1717,7 @@ static void canbcm_pernet_exit(struct net *net)
 static struct pernet_operations canbcm_pernet_ops __read_mostly = {
 	.init = canbcm_pernet_init,
 	.exit = canbcm_pernet_exit,
+	.async = true,
 };
 
 static int __init bcm_module_init(void)
diff --git a/net/can/gw.c b/net/can/gw.c
index 398dd0395ad9..08e97668d5cf 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -1010,6 +1010,7 @@ static void __net_exit cangw_pernet_exit(struct net *net)
 static struct pernet_operations cangw_pernet_ops = {
 	.init = cangw_pernet_init,
 	.exit = cangw_pernet_exit,
+	.async = true,
 };
 
 static __init int cgw_module_init(void)
diff --git a/net/can/raw.c b/net/can/raw.c
index f2ecc43376a1..1051eee82581 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -470,7 +470,7 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
 }
 
 static int raw_getname(struct socket *sock, struct sockaddr *uaddr,
-		       int *len, int peer)
+		       int peer)
 {
 	struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
 	struct sock *sk = sock->sk;
@@ -483,9 +483,7 @@ static int raw_getname(struct socket *sock, struct sockaddr *uaddr,
 	addr->can_family  = AF_CAN;
 	addr->can_ifindex = ro->ifindex;
 
-	*len = sizeof(*addr);
-
-	return 0;
+	return sizeof(*addr);
 }
 
 static int raw_setsockopt(struct socket *sock, int level, int optname,
diff --git a/net/core/dev.c b/net/core/dev.c
index 12be20535714..f9c28f44286c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2378,7 +2378,7 @@ EXPORT_SYMBOL(netdev_set_num_tc);
 
 /*
  * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
- * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
+ * greater than real_num_tx_queues stale skbs on the qdisc must be flushed.
  */
 int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
 {
@@ -4359,6 +4359,9 @@ int netdev_rx_handler_register(struct net_device *dev,
 	if (netdev_is_rx_handler_busy(dev))
 		return -EBUSY;
 
+	if (dev->priv_flags & IFF_NO_RX_HANDLER)
+		return -EINVAL;
+
 	/* Note: rx_handler_data must be set before rx_handler */
 	rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
 	rcu_assign_pointer(dev->rx_handler, rx_handler);
@@ -7554,6 +7557,19 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
 		}
 	}
 
+	/* LRO/HW-GRO features cannot be combined with RX-FCS */
+	if (features & NETIF_F_RXFCS) {
+		if (features & NETIF_F_LRO) {
+			netdev_dbg(dev, "Dropping LRO feature since RX-FCS is requested.\n");
+			features &= ~NETIF_F_LRO;
+		}
+
+		if (features & NETIF_F_GRO_HW) {
+			netdev_dbg(dev, "Dropping HW-GRO feature since RX-FCS is requested.\n");
+			features &= ~NETIF_F_GRO_HW;
+		}
+	}
+
 	return features;
 }
 
@@ -8010,7 +8026,8 @@ int register_netdev(struct net_device *dev)
 {
 	int err;
 
-	rtnl_lock();
+	if (rtnl_lock_killable())
+		return -EINTR;
 	err = register_netdevice(dev);
 	rtnl_unlock();
 	return err;
@@ -8153,8 +8170,9 @@ void netdev_run_todo(void)
 		BUG_ON(!list_empty(&dev->ptype_specific));
 		WARN_ON(rcu_access_pointer(dev->ip_ptr));
 		WARN_ON(rcu_access_pointer(dev->ip6_ptr));
+#if IS_ENABLED(CONFIG_DECNET)
 		WARN_ON(dev->dn_ptr);
-
+#endif
 		if (dev->priv_destructor)
 			dev->priv_destructor(dev);
 		if (dev->needs_free_netdev)
@@ -8852,6 +8870,7 @@ static void __net_exit netdev_exit(struct net *net)
 static struct pernet_operations __net_initdata netdev_net_ops = {
 	.init = netdev_init,
 	.exit = netdev_exit,
+	.async = true,
 };
 
 static void __net_exit default_device_exit(struct net *net)
@@ -8952,6 +8971,7 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
 static struct pernet_operations __net_initdata default_device_ops = {
 	.exit = default_device_exit,
 	.exit_batch = default_device_exit_batch,
+	.async = true,
 };
 
 /*
diff --git a/net/core/devlink.c b/net/core/devlink.c
index effd4848c2b4..9236e421bd62 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -2331,6 +2331,32 @@ out:
 	resource->size_valid = size_valid;
 }
 
+static int
+devlink_resource_validate_size(struct devlink_resource *resource, u64 size,
+			       struct netlink_ext_ack *extack)
+{
+	u64 reminder;
+	int err = 0;
+
+	if (size > resource->size_params.size_max) {
+		NL_SET_ERR_MSG_MOD(extack, "Size larger than maximum");
+		err = -EINVAL;
+	}
+
+	if (size < resource->size_params.size_min) {
+		NL_SET_ERR_MSG_MOD(extack, "Size smaller than minimum");
+		err = -EINVAL;
+	}
+
+	div64_u64_rem(size, resource->size_params.size_granularity, &reminder);
+	if (reminder) {
+		NL_SET_ERR_MSG_MOD(extack, "Wrong granularity");
+		err = -EINVAL;
+	}
+
+	return err;
+}
+
 static int devlink_nl_cmd_resource_set(struct sk_buff *skb,
 				       struct genl_info *info)
 {
@@ -2349,12 +2375,8 @@ static int devlink_nl_cmd_resource_set(struct sk_buff *skb,
 	if (!resource)
 		return -EINVAL;
 
-	if (!resource->resource_ops->size_validate)
-		return -EINVAL;
-
 	size = nla_get_u64(info->attrs[DEVLINK_ATTR_RESOURCE_SIZE]);
-	err = resource->resource_ops->size_validate(devlink, size,
-						    info->extack);
+	err = devlink_resource_validate_size(resource, size, info->extack);
 	if (err)
 		return err;
 
@@ -2714,22 +2736,22 @@ static const struct genl_ops devlink_nl_ops[] = {
 		.cmd = DEVLINK_CMD_DPIPE_TABLE_GET,
 		.doit = devlink_nl_cmd_dpipe_table_get,
 		.policy = devlink_nl_policy,
-		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+		/* can be retrieved by unprivileged users */
 	},
 	{
 		.cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET,
 		.doit = devlink_nl_cmd_dpipe_entries_get,
 		.policy = devlink_nl_policy,
-		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+		/* can be retrieved by unprivileged users */
 	},
 	{
 		.cmd = DEVLINK_CMD_DPIPE_HEADERS_GET,
 		.doit = devlink_nl_cmd_dpipe_headers_get,
 		.policy = devlink_nl_policy,
-		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+		/* can be retrieved by unprivileged users */
 	},
 	{
 		.cmd = DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET,
@@ -2749,8 +2771,8 @@ static const struct genl_ops devlink_nl_ops[] = {
 		.cmd = DEVLINK_CMD_RESOURCE_DUMP,
 		.doit = devlink_nl_cmd_resource_dump,
 		.policy = devlink_nl_policy,
-		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+		/* can be retrieved by unprivileged users */
 	},
 	{
 		.cmd = DEVLINK_CMD_RELOAD,
@@ -3144,7 +3166,6 @@ EXPORT_SYMBOL_GPL(devlink_dpipe_table_unregister);
  */
 int devlink_resource_register(struct devlink *devlink,
 			      const char *resource_name,
-			      bool top_hierarchy,
 			      u64 resource_size,
 			      u64 resource_id,
 			      u64 parent_resource_id,
@@ -3153,8 +3174,11 @@ int devlink_resource_register(struct devlink *devlink,
 {
 	struct devlink_resource *resource;
 	struct list_head *resource_list;
+	bool top_hierarchy;
 	int err = 0;
 
+	top_hierarchy = parent_resource_id == DEVLINK_RESOURCE_ID_PARENT_TOP;
+
 	mutex_lock(&devlink->lock);
 	resource = devlink_resource_find(devlink, NULL, resource_id);
 	if (resource) {
diff --git a/net/core/dst_cache.c b/net/core/dst_cache.c
index 554d36449231..64cef977484a 100644
--- a/net/core/dst_cache.c
+++ b/net/core/dst_cache.c
@@ -107,7 +107,7 @@ EXPORT_SYMBOL_GPL(dst_cache_set_ip4);
 
 #if IS_ENABLED(CONFIG_IPV6)
 void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
-		       const struct in6_addr *addr)
+		       const struct in6_addr *saddr)
 {
 	struct dst_cache_pcpu *idst;
 
@@ -117,7 +117,7 @@ void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
 	idst = this_cpu_ptr(dst_cache->cache);
 	dst_cache_per_cpu_dst_set(this_cpu_ptr(dst_cache->cache), dst,
 				  rt6_get_cookie((struct rt6_info *)dst));
-	idst->in6_saddr = *addr;
+	idst->in6_saddr = *saddr;
 }
 EXPORT_SYMBOL_GPL(dst_cache_set_ip6);
 
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 3f89c76d5c24..157cd9efa4be 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1022,6 +1022,15 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
 	if (copy_from_user(&info, useraddr, info_size))
 		return -EFAULT;
 
+	/* If FLOW_RSS was requested then user-space must be using the
+	 * new definition, as FLOW_RSS is newer.
+	 */
+	if (cmd == ETHTOOL_GRXFH && info.flow_type & FLOW_RSS) {
+		info_size = sizeof(info);
+		if (copy_from_user(&info, useraddr, info_size))
+			return -EFAULT;
+	}
+
 	if (info.cmd == ETHTOOL_GRXCLSRLALL) {
 		if (info.rule_cnt > 0) {
 			if (info.rule_cnt <= KMALLOC_MAX_SIZE / sizeof(u32))
@@ -1251,9 +1260,11 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
 	user_key_size = rxfh.key_size;
 
 	/* Check that reserved fields are 0 for now */
-	if (rxfh.rss_context || rxfh.rsvd8[0] || rxfh.rsvd8[1] ||
-	    rxfh.rsvd8[2] || rxfh.rsvd32)
+	if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd8[2] || rxfh.rsvd32)
 		return -EINVAL;
+	/* Most drivers don't handle rss_context, check it's 0 as well */
+	if (rxfh.rss_context && !ops->get_rxfh_context)
+		return -EOPNOTSUPP;
 
 	rxfh.indir_size = dev_indir_size;
 	rxfh.key_size = dev_key_size;
@@ -1276,7 +1287,12 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
 	if (user_key_size)
 		hkey = rss_config + indir_bytes;
 
-	ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey, &dev_hfunc);
+	if (rxfh.rss_context)
+		ret = dev->ethtool_ops->get_rxfh_context(dev, indir, hkey,
+							 &dev_hfunc,
+							 rxfh.rss_context);
+	else
+		ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey, &dev_hfunc);
 	if (ret)
 		goto out;
 
@@ -1306,6 +1322,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
 	u8 *hkey = NULL;
 	u8 *rss_config;
 	u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]);
+	bool delete = false;
 
 	if (!ops->get_rxnfc || !ops->set_rxfh)
 		return -EOPNOTSUPP;
@@ -1319,9 +1336,11 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
 		return -EFAULT;
 
 	/* Check that reserved fields are 0 for now */
-	if (rxfh.rss_context || rxfh.rsvd8[0] || rxfh.rsvd8[1] ||
-	    rxfh.rsvd8[2] || rxfh.rsvd32)
+	if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd8[2] || rxfh.rsvd32)
 		return -EINVAL;
+	/* Most drivers don't handle rss_context, check it's 0 as well */
+	if (rxfh.rss_context && !ops->set_rxfh_context)
+		return -EOPNOTSUPP;
 
 	/* If either indir, hash key or function is valid, proceed further.
 	 * Must request at least one change: indir size, hash key or function.
@@ -1346,7 +1365,8 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
 	if (ret)
 		goto out;
 
-	/* rxfh.indir_size == 0 means reset the indir table to default.
+	/* rxfh.indir_size == 0 means reset the indir table to default (master
+	 * context) or delete the context (other RSS contexts).
 	 * rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE means leave it unchanged.
 	 */
 	if (rxfh.indir_size &&
@@ -1359,9 +1379,13 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
 		if (ret)
 			goto out;
 	} else if (rxfh.indir_size == 0) {
-		indir = (u32 *)rss_config;
-		for (i = 0; i < dev_indir_size; i++)
-			indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data);
+		if (rxfh.rss_context == 0) {
+			indir = (u32 *)rss_config;
+			for (i = 0; i < dev_indir_size; i++)
+				indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data);
+		} else {
+			delete = true;
+		}
 	}
 
 	if (rxfh.key_size) {
@@ -1374,15 +1398,25 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
 		}
 	}
 
-	ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc);
+	if (rxfh.rss_context)
+		ret = ops->set_rxfh_context(dev, indir, hkey, rxfh.hfunc,
+					    &rxfh.rss_context, delete);
+	else
+		ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc);
 	if (ret)
 		goto out;
 
-	/* indicate whether rxfh was set to default */
-	if (rxfh.indir_size == 0)
-		dev->priv_flags &= ~IFF_RXFH_CONFIGURED;
-	else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE)
-		dev->priv_flags |= IFF_RXFH_CONFIGURED;
+	if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, rss_context),
+			 &rxfh.rss_context, sizeof(rxfh.rss_context)))
+		ret = -EFAULT;
+
+	if (!rxfh.rss_context) {
+		/* indicate whether rxfh was set to default */
+		if (rxfh.indir_size == 0)
+			dev->priv_flags &= ~IFF_RXFH_CONFIGURED;
+		else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE)
+			dev->priv_flags |= IFF_RXFH_CONFIGURED;
+	}
 
 out:
 	kfree(rss_config);
diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c
index 0c048bdeb016..5ace0705a3f9 100644
--- a/net/core/fib_notifier.c
+++ b/net/core/fib_notifier.c
@@ -171,6 +171,7 @@ static void __net_exit fib_notifier_net_exit(struct net *net)
 static struct pernet_operations fib_notifier_net_ops = {
 	.init = fib_notifier_net_init,
 	.exit = fib_notifier_net_exit,
+	.async = true,
 };
 
 static int __init fib_notifier_init(void)
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 98e1066c3d55..f6f04fc0f629 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -33,6 +33,10 @@ bool fib_rule_matchall(const struct fib_rule *rule)
 	if (!uid_eq(rule->uid_range.start, fib_kuid_range_unset.start) ||
 	    !uid_eq(rule->uid_range.end, fib_kuid_range_unset.end))
 		return false;
+	if (fib_rule_port_range_set(&rule->sport_range))
+		return false;
+	if (fib_rule_port_range_set(&rule->dport_range))
+		return false;
 	return true;
 }
 EXPORT_SYMBOL_GPL(fib_rule_matchall);
@@ -51,6 +55,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
 	r->pref = pref;
 	r->table = table;
 	r->flags = flags;
+	r->proto = RTPROT_KERNEL;
 	r->fr_net = ops->fro_net;
 	r->uid_range = fib_kuid_range_unset;
 
@@ -220,6 +225,26 @@ static int nla_put_uid_range(struct sk_buff *skb, struct fib_kuid_range *range)
 	return nla_put(skb, FRA_UID_RANGE, sizeof(out), &out);
 }
 
+static int nla_get_port_range(struct nlattr *pattr,
+			      struct fib_rule_port_range *port_range)
+{
+	const struct fib_rule_port_range *pr = nla_data(pattr);
+
+	if (!fib_rule_port_range_valid(pr))
+		return -EINVAL;
+
+	port_range->start = pr->start;
+	port_range->end = pr->end;
+
+	return 0;
+}
+
+static int nla_put_port_range(struct sk_buff *skb, int attrtype,
+			      struct fib_rule_port_range *range)
+{
+	return nla_put(skb, attrtype, sizeof(*range), range);
+}
+
 static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
 			  struct flowi *fl, int flags,
 			  struct fib_lookup_arg *arg)
@@ -424,6 +449,17 @@ static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh,
 		    !uid_eq(r->uid_range.end, rule->uid_range.end))
 			continue;
 
+		if (r->ip_proto != rule->ip_proto)
+			continue;
+
+		if (!fib_rule_port_range_compare(&r->sport_range,
+						 &rule->sport_range))
+			continue;
+
+		if (!fib_rule_port_range_compare(&r->dport_range,
+						 &rule->dport_range))
+			continue;
+
 		if (!ops->compare(r, frh, tb))
 			continue;
 		return 1;
@@ -469,6 +505,9 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 	rule->pref = tb[FRA_PRIORITY] ? nla_get_u32(tb[FRA_PRIORITY])
 	                              : fib_default_rule_pref(ops);
 
+	rule->proto = tb[FRA_PROTOCOL] ?
+		nla_get_u8(tb[FRA_PROTOCOL]) : RTPROT_UNSPEC;
+
 	if (tb[FRA_IIFNAME]) {
 		struct net_device *dev;
 
@@ -565,6 +604,23 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 		rule->uid_range = fib_kuid_range_unset;
 	}
 
+	if (tb[FRA_IP_PROTO])
+		rule->ip_proto = nla_get_u8(tb[FRA_IP_PROTO]);
+
+	if (tb[FRA_SPORT_RANGE]) {
+		err = nla_get_port_range(tb[FRA_SPORT_RANGE],
+					 &rule->sport_range);
+		if (err)
+			goto errout_free;
+	}
+
+	if (tb[FRA_DPORT_RANGE]) {
+		err = nla_get_port_range(tb[FRA_DPORT_RANGE],
+					 &rule->dport_range);
+		if (err)
+			goto errout_free;
+	}
+
 	if ((nlh->nlmsg_flags & NLM_F_EXCL) &&
 	    rule_exists(ops, frh, tb, rule)) {
 		err = -EEXIST;
@@ -630,6 +686,8 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 {
 	struct net *net = sock_net(skb->sk);
 	struct fib_rule_hdr *frh = nlmsg_data(nlh);
+	struct fib_rule_port_range sprange = {0, 0};
+	struct fib_rule_port_range dprange = {0, 0};
 	struct fib_rules_ops *ops = NULL;
 	struct fib_rule *rule, *r;
 	struct nlattr *tb[FRA_MAX+1];
@@ -663,7 +721,25 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 		range = fib_kuid_range_unset;
 	}
 
+	if (tb[FRA_SPORT_RANGE]) {
+		err = nla_get_port_range(tb[FRA_SPORT_RANGE],
+					 &sprange);
+		if (err)
+			goto errout;
+	}
+
+	if (tb[FRA_DPORT_RANGE]) {
+		err = nla_get_port_range(tb[FRA_DPORT_RANGE],
+					 &dprange);
+		if (err)
+			goto errout;
+	}
+
 	list_for_each_entry(rule, &ops->rules_list, list) {
+		if (tb[FRA_PROTOCOL] &&
+		    (rule->proto != nla_get_u8(tb[FRA_PROTOCOL])))
+			continue;
+
 		if (frh->action && (frh->action != rule->action))
 			continue;
 
@@ -704,6 +780,18 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 		     !uid_eq(rule->uid_range.end, range.end)))
 			continue;
 
+		if (tb[FRA_IP_PROTO] &&
+		    (rule->ip_proto != nla_get_u8(tb[FRA_IP_PROTO])))
+			continue;
+
+		if (fib_rule_port_range_set(&sprange) &&
+		    !fib_rule_port_range_compare(&rule->sport_range, &sprange))
+			continue;
+
+		if (fib_rule_port_range_set(&dprange) &&
+		    !fib_rule_port_range_compare(&rule->dport_range, &dprange))
+			continue;
+
 		if (!ops->compare(rule, frh, tb))
 			continue;
 
@@ -781,7 +869,11 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
 			 + nla_total_size(4) /* FRA_FWMARK */
 			 + nla_total_size(4) /* FRA_FWMASK */
 			 + nla_total_size_64bit(8) /* FRA_TUN_ID */
-			 + nla_total_size(sizeof(struct fib_kuid_range));
+			 + nla_total_size(sizeof(struct fib_kuid_range))
+			 + nla_total_size(1) /* FRA_PROTOCOL */
+			 + nla_total_size(1) /* FRA_IP_PROTO */
+			 + nla_total_size(sizeof(struct fib_rule_port_range)) /* FRA_SPORT_RANGE */
+			 + nla_total_size(sizeof(struct fib_rule_port_range)); /* FRA_DPORT_RANGE */
 
 	if (ops->nlmsg_payload)
 		payload += ops->nlmsg_payload(rule);
@@ -812,6 +904,9 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
 	frh->action = rule->action;
 	frh->flags = rule->flags;
 
+	if (nla_put_u8(skb, FRA_PROTOCOL, rule->proto))
+		goto nla_put_failure;
+
 	if (rule->action == FR_ACT_GOTO &&
 	    rcu_access_pointer(rule->ctarget) == NULL)
 		frh->flags |= FIB_RULE_UNRESOLVED;
@@ -843,7 +938,12 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
 	    (rule->l3mdev &&
 	     nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev)) ||
 	    (uid_range_set(&rule->uid_range) &&
-	     nla_put_uid_range(skb, &rule->uid_range)))
+	     nla_put_uid_range(skb, &rule->uid_range)) ||
+	    (fib_rule_port_range_set(&rule->sport_range) &&
+	     nla_put_port_range(skb, FRA_SPORT_RANGE, &rule->sport_range)) ||
+	    (fib_rule_port_range_set(&rule->dport_range) &&
+	     nla_put_port_range(skb, FRA_DPORT_RANGE, &rule->dport_range)) ||
+	    (rule->ip_proto && nla_put_u8(skb, FRA_IP_PROTO, rule->ip_proto)))
 		goto nla_put_failure;
 
 	if (rule->suppress_ifgroup != -1) {
@@ -1030,6 +1130,7 @@ static void __net_exit fib_rules_net_exit(struct net *net)
 static struct pernet_operations fib_rules_net_ops = {
 	.init = fib_rules_net_init,
 	.exit = fib_rules_net_exit,
+	.async = true,
 };
 
 static int __init fib_rules_init(void)
diff --git a/net/core/filter.c b/net/core/filter.c
index 48aa7c7320db..00c711c5f1a2 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1890,6 +1890,202 @@ static const struct bpf_func_proto bpf_sk_redirect_map_proto = {
 	.arg4_type      = ARG_ANYTHING,
 };
 
+BPF_CALL_4(bpf_msg_redirect_map, struct sk_msg_buff *, msg,
+	   struct bpf_map *, map, u32, key, u64, flags)
+{
+	/* If user passes invalid input drop the packet. */
+	if (unlikely(flags))
+		return SK_DROP;
+
+	msg->key = key;
+	msg->flags = flags;
+	msg->map = map;
+
+	return SK_PASS;
+}
+
+struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
+{
+	struct sock *sk = NULL;
+
+	if (msg->map) {
+		sk = __sock_map_lookup_elem(msg->map, msg->key);
+
+		msg->key = 0;
+		msg->map = NULL;
+	}
+
+	return sk;
+}
+
+static const struct bpf_func_proto bpf_msg_redirect_map_proto = {
+	.func           = bpf_msg_redirect_map,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_CONST_MAP_PTR,
+	.arg3_type      = ARG_ANYTHING,
+	.arg4_type      = ARG_ANYTHING,
+};
+
+BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg_buff *, msg, u32, bytes)
+{
+	msg->apply_bytes = bytes;
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_msg_apply_bytes_proto = {
+	.func           = bpf_msg_apply_bytes,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_ANYTHING,
+};
+
+BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg_buff *, msg, u32, bytes)
+{
+	msg->cork_bytes = bytes;
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_msg_cork_bytes_proto = {
+	.func           = bpf_msg_cork_bytes,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_ANYTHING,
+};
+
+BPF_CALL_4(bpf_msg_pull_data,
+	   struct sk_msg_buff *, msg, u32, start, u32, end, u64, flags)
+{
+	unsigned int len = 0, offset = 0, copy = 0;
+	struct scatterlist *sg = msg->sg_data;
+	int first_sg, last_sg, i, shift;
+	unsigned char *p, *to, *from;
+	int bytes = end - start;
+	struct page *page;
+
+	if (unlikely(flags || end <= start))
+		return -EINVAL;
+
+	/* First find the starting scatterlist element */
+	i = msg->sg_start;
+	do {
+		len = sg[i].length;
+		offset += len;
+		if (start < offset + len)
+			break;
+		i++;
+		if (i == MAX_SKB_FRAGS)
+			i = 0;
+	} while (i != msg->sg_end);
+
+	if (unlikely(start >= offset + len))
+		return -EINVAL;
+
+	if (!msg->sg_copy[i] && bytes <= len)
+		goto out;
+
+	first_sg = i;
+
+	/* At this point we need to linearize multiple scatterlist
+	 * elements or a single shared page. Either way we need to
+	 * copy into a linear buffer exclusively owned by BPF. Then
+	 * place the buffer in the scatterlist and fixup the original
+	 * entries by removing the entries now in the linear buffer
+	 * and shifting the remaining entries. For now we do not try
+	 * to copy partial entries to avoid complexity of running out
+	 * of sg_entry slots. The downside is reading a single byte
+	 * will copy the entire sg entry.
+	 */
+	do {
+		copy += sg[i].length;
+		i++;
+		if (i == MAX_SKB_FRAGS)
+			i = 0;
+		if (bytes < copy)
+			break;
+	} while (i != msg->sg_end);
+	last_sg = i;
+
+	if (unlikely(copy < end - start))
+		return -EINVAL;
+
+	page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC, get_order(copy));
+	if (unlikely(!page))
+		return -ENOMEM;
+	p = page_address(page);
+	offset = 0;
+
+	i = first_sg;
+	do {
+		from = sg_virt(&sg[i]);
+		len = sg[i].length;
+		to = p + offset;
+
+		memcpy(to, from, len);
+		offset += len;
+		sg[i].length = 0;
+		put_page(sg_page(&sg[i]));
+
+		i++;
+		if (i == MAX_SKB_FRAGS)
+			i = 0;
+	} while (i != last_sg);
+
+	sg[first_sg].length = copy;
+	sg_set_page(&sg[first_sg], page, copy, 0);
+
+	/* To repair sg ring we need to shift entries. If we only
+	 * had a single entry though we can just replace it and
+	 * be done. Otherwise walk the ring and shift the entries.
+	 */
+	shift = last_sg - first_sg - 1;
+	if (!shift)
+		goto out;
+
+	i = first_sg + 1;
+	do {
+		int move_from;
+
+		if (i + shift >= MAX_SKB_FRAGS)
+			move_from = i + shift - MAX_SKB_FRAGS;
+		else
+			move_from = i + shift;
+
+		if (move_from == msg->sg_end)
+			break;
+
+		sg[i] = sg[move_from];
+		sg[move_from].length = 0;
+		sg[move_from].page_link = 0;
+		sg[move_from].offset = 0;
+
+		i++;
+		if (i == MAX_SKB_FRAGS)
+			i = 0;
+	} while (1);
+	msg->sg_end -= shift;
+	if (msg->sg_end < 0)
+		msg->sg_end += MAX_SKB_FRAGS;
+out:
+	msg->data = sg_virt(&sg[i]) + start - offset;
+	msg->data_end = msg->data + bytes;
+
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_msg_pull_data_proto = {
+	.func		= bpf_msg_pull_data,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_ANYTHING,
+	.arg4_type	= ARG_ANYTHING,
+};
+
 BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
 {
 	return task_get_classid(skb);
@@ -2855,7 +3051,8 @@ bool bpf_helper_changes_pkt_data(void *func)
 	    func == bpf_l3_csum_replace ||
 	    func == bpf_l4_csum_replace ||
 	    func == bpf_xdp_adjust_head ||
-	    func == bpf_xdp_adjust_meta)
+	    func == bpf_xdp_adjust_meta ||
+	    func == bpf_msg_pull_data)
 		return true;
 
 	return false;
@@ -3015,7 +3212,7 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
 	struct ip_tunnel_info *info;
 
 	if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
-			       BPF_F_DONT_FRAGMENT)))
+			       BPF_F_DONT_FRAGMENT | BPF_F_SEQ_NUMBER)))
 		return -EINVAL;
 	if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
 		switch (size) {
@@ -3049,6 +3246,8 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
 		info->key.tun_flags |= TUNNEL_DONT_FRAGMENT;
 	if (flags & BPF_F_ZERO_CSUM_TX)
 		info->key.tun_flags &= ~TUNNEL_CSUM;
+	if (flags & BPF_F_SEQ_NUMBER)
+		info->key.tun_flags |= TUNNEL_SEQ;
 
 	info->key.tun_id = cpu_to_be64(from->tunnel_id);
 	info->key.tos = from->tunnel_tos;
@@ -3613,6 +3812,22 @@ static const struct bpf_func_proto *
 	}
 }
 
+static const struct bpf_func_proto *sk_msg_func_proto(enum bpf_func_id func_id)
+{
+	switch (func_id) {
+	case BPF_FUNC_msg_redirect_map:
+		return &bpf_msg_redirect_map_proto;
+	case BPF_FUNC_msg_apply_bytes:
+		return &bpf_msg_apply_bytes_proto;
+	case BPF_FUNC_msg_cork_bytes:
+		return &bpf_msg_cork_bytes_proto;
+	case BPF_FUNC_msg_pull_data:
+		return &bpf_msg_pull_data_proto;
+	default:
+		return bpf_base_func_proto(func_id);
+	}
+}
+
 static const struct bpf_func_proto *sk_skb_func_proto(enum bpf_func_id func_id)
 {
 	switch (func_id) {
@@ -4002,6 +4217,32 @@ static bool sk_skb_is_valid_access(int off, int size,
 	return bpf_skb_is_valid_access(off, size, type, info);
 }
 
+static bool sk_msg_is_valid_access(int off, int size,
+				   enum bpf_access_type type,
+				   struct bpf_insn_access_aux *info)
+{
+	if (type == BPF_WRITE)
+		return false;
+
+	switch (off) {
+	case offsetof(struct sk_msg_md, data):
+		info->reg_type = PTR_TO_PACKET;
+		break;
+	case offsetof(struct sk_msg_md, data_end):
+		info->reg_type = PTR_TO_PACKET_END;
+		break;
+	}
+
+	if (off < 0 || off >= sizeof(struct sk_msg_md))
+		return false;
+	if (off % size != 0)
+		return false;
+	if (size != sizeof(__u64))
+		return false;
+
+	return true;
+}
+
 static u32 bpf_convert_ctx_access(enum bpf_access_type type,
 				  const struct bpf_insn *si,
 				  struct bpf_insn *insn_buf,
@@ -4800,6 +5041,29 @@ static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
 	return insn - insn_buf;
 }
 
+static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
+				     const struct bpf_insn *si,
+				     struct bpf_insn *insn_buf,
+				     struct bpf_prog *prog, u32 *target_size)
+{
+	struct bpf_insn *insn = insn_buf;
+
+	switch (si->off) {
+	case offsetof(struct sk_msg_md, data):
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg_buff, data),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct sk_msg_buff, data));
+		break;
+	case offsetof(struct sk_msg_md, data_end):
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg_buff, data_end),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct sk_msg_buff, data_end));
+		break;
+	}
+
+	return insn - insn_buf;
+}
+
 const struct bpf_verifier_ops sk_filter_verifier_ops = {
 	.get_func_proto		= sk_filter_func_proto,
 	.is_valid_access	= sk_filter_is_valid_access,
@@ -4890,6 +5154,15 @@ const struct bpf_verifier_ops sk_skb_verifier_ops = {
 const struct bpf_prog_ops sk_skb_prog_ops = {
 };
 
+const struct bpf_verifier_ops sk_msg_verifier_ops = {
+	.get_func_proto		= sk_msg_func_proto,
+	.is_valid_access	= sk_msg_is_valid_access,
+	.convert_ctx_access	= sk_msg_convert_ctx_access,
+};
+
+const struct bpf_prog_ops sk_msg_prog_ops = {
+};
+
 int sk_detach_filter(struct sock *sk)
 {
 	int ret = -ENOENT;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 559db9ea8d86..d29f09bc5ff9 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -1341,22 +1341,6 @@ __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys)
 }
 EXPORT_SYMBOL(__get_hash_from_flowi6);
 
-__u32 __get_hash_from_flowi4(const struct flowi4 *fl4, struct flow_keys *keys)
-{
-	memset(keys, 0, sizeof(*keys));
-
-	keys->addrs.v4addrs.src = fl4->saddr;
-	keys->addrs.v4addrs.dst = fl4->daddr;
-	keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
-	keys->ports.src = fl4->fl4_sport;
-	keys->ports.dst = fl4->fl4_dport;
-	keys->keyid.keyid = fl4->fl4_gre_key;
-	keys->basic.ip_proto = fl4->flowi4_proto;
-
-	return flow_hash_from_keys(keys);
-}
-EXPORT_SYMBOL(__get_hash_from_flowi4);
-
 static const struct flow_dissector_key flow_keys_dissector_keys[] = {
 	{
 		.key_id = FLOW_DISSECTOR_KEY_CONTROL,
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index e010bb800d7b..65b51e778782 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -349,6 +349,7 @@ static void __net_exit dev_proc_net_exit(struct net *net)
 static struct pernet_operations __net_initdata dev_proc_ops = {
 	.init = dev_proc_net_init,
 	.exit = dev_proc_net_exit,
+	.async = true,
 };
 
 static int dev_mc_seq_show(struct seq_file *seq, void *v)
@@ -405,6 +406,7 @@ static void __net_exit dev_mc_net_exit(struct net *net)
 static struct pernet_operations __net_initdata dev_mc_net_ops = {
 	.init = dev_mc_net_init,
 	.exit = dev_mc_net_exit,
+	.async = true,
 };
 
 int __init dev_proc_init(void)
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 3cad5f51afd3..95ba2c53bd9a 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -29,7 +29,6 @@
 
 static LIST_HEAD(pernet_list);
 static struct list_head *first_device = &pernet_list;
-DEFINE_MUTEX(net_mutex);
 
 LIST_HEAD(net_namespace_list);
 EXPORT_SYMBOL_GPL(net_namespace_list);
@@ -41,6 +40,12 @@ struct net init_net = {
 EXPORT_SYMBOL(init_net);
 
 static bool init_net_initialized;
+static unsigned nr_sync_pernet_ops;
+/*
+ * net_sem: protects: pernet_list, net_generic_ids, nr_sync_pernet_ops,
+ * init_net_initialized and first_device pointer.
+ */
+DECLARE_RWSEM(net_sem);
 
 #define MIN_PERNET_OPS_ID	\
 	((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *))
@@ -65,11 +70,10 @@ static int net_assign_generic(struct net *net, unsigned int id, void *data)
 {
 	struct net_generic *ng, *old_ng;
 
-	BUG_ON(!mutex_is_locked(&net_mutex));
 	BUG_ON(id < MIN_PERNET_OPS_ID);
 
 	old_ng = rcu_dereference_protected(net->gen,
-					   lockdep_is_held(&net_mutex));
+					   lockdep_is_held(&net_sem));
 	if (old_ng->s.len > id) {
 		old_ng->ptr[id] = data;
 		return 0;
@@ -286,7 +290,7 @@ struct net *get_net_ns_by_id(struct net *net, int id)
  */
 static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
 {
-	/* Must be called with net_mutex held */
+	/* Must be called with net_sem held */
 	const struct pernet_operations *ops, *saved_ops;
 	int error = 0;
 	LIST_HEAD(net_exit_list);
@@ -297,12 +301,16 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
 	net->user_ns = user_ns;
 	idr_init(&net->netns_ids);
 	spin_lock_init(&net->nsid_lock);
+	mutex_init(&net->ipv4.ra_mutex);
 
 	list_for_each_entry(ops, &pernet_list, list) {
 		error = ops_init(ops, net);
 		if (error < 0)
 			goto out_undo;
 	}
+	rtnl_lock();
+	list_add_tail_rcu(&net->list, &net_namespace_list);
+	rtnl_unlock();
 out:
 	return error;
 
@@ -331,6 +339,7 @@ static int __net_init net_defaults_init_net(struct net *net)
 
 static struct pernet_operations net_defaults_ops = {
 	.init = net_defaults_init_net,
+	.async = true,
 };
 
 static __init int net_defaults_init(void)
@@ -354,7 +363,7 @@ static void dec_net_namespaces(struct ucounts *ucounts)
 	dec_ucount(ucounts, UCOUNT_NET_NAMESPACES);
 }
 
-static struct kmem_cache *net_cachep;
+static struct kmem_cache *net_cachep __ro_after_init;
 static struct workqueue_struct *netns_wq;
 
 static struct net *net_alloc(void)
@@ -397,6 +406,7 @@ struct net *copy_net_ns(unsigned long flags,
 {
 	struct ucounts *ucounts;
 	struct net *net;
+	unsigned write;
 	int rv;
 
 	if (!(flags & CLONE_NEWNET))
@@ -408,32 +418,38 @@ struct net *copy_net_ns(unsigned long flags,
 
 	net = net_alloc();
 	if (!net) {
-		dec_net_namespaces(ucounts);
-		return ERR_PTR(-ENOMEM);
+		rv = -ENOMEM;
+		goto dec_ucounts;
 	}
-
+	refcount_set(&net->passive, 1);
+	net->ucounts = ucounts;
 	get_user_ns(user_ns);
+again:
+	write = READ_ONCE(nr_sync_pernet_ops);
+	if (write)
+		rv = down_write_killable(&net_sem);
+	else
+		rv = down_read_killable(&net_sem);
+	if (rv < 0)
+		goto put_userns;
 
-	rv = mutex_lock_killable(&net_mutex);
-	if (rv < 0) {
-		net_free(net);
-		dec_net_namespaces(ucounts);
-		put_user_ns(user_ns);
-		return ERR_PTR(rv);
+	if (!write && unlikely(READ_ONCE(nr_sync_pernet_ops))) {
+		up_read(&net_sem);
+		goto again;
 	}
-
-	net->ucounts = ucounts;
 	rv = setup_net(net, user_ns);
-	if (rv == 0) {
-		rtnl_lock();
-		list_add_tail_rcu(&net->list, &net_namespace_list);
-		rtnl_unlock();
-	}
-	mutex_unlock(&net_mutex);
+
+	if (write)
+		up_write(&net_sem);
+	else
+		up_read(&net_sem);
+
 	if (rv < 0) {
-		dec_net_namespaces(ucounts);
+put_userns:
 		put_user_ns(user_ns);
 		net_drop_ns(net);
+dec_ucounts:
+		dec_net_namespaces(ucounts);
 		return ERR_PTR(rv);
 	}
 	return net;
@@ -466,26 +482,33 @@ static void unhash_nsid(struct net *net, struct net *last)
 	spin_unlock_bh(&net->nsid_lock);
 }
 
-static DEFINE_SPINLOCK(cleanup_list_lock);
-static LIST_HEAD(cleanup_list);  /* Must hold cleanup_list_lock to touch */
+static LLIST_HEAD(cleanup_list);
 
 static void cleanup_net(struct work_struct *work)
 {
 	const struct pernet_operations *ops;
 	struct net *net, *tmp, *last;
-	struct list_head net_kill_list;
+	struct llist_node *net_kill_list;
 	LIST_HEAD(net_exit_list);
+	unsigned write;
 
 	/* Atomically snapshot the list of namespaces to cleanup */
-	spin_lock_irq(&cleanup_list_lock);
-	list_replace_init(&cleanup_list, &net_kill_list);
-	spin_unlock_irq(&cleanup_list_lock);
+	net_kill_list = llist_del_all(&cleanup_list);
+again:
+	write = READ_ONCE(nr_sync_pernet_ops);
+	if (write)
+		down_write(&net_sem);
+	else
+		down_read(&net_sem);
 
-	mutex_lock(&net_mutex);
+	if (!write && unlikely(READ_ONCE(nr_sync_pernet_ops))) {
+		up_read(&net_sem);
+		goto again;
+	}
 
 	/* Don't let anyone else find us. */
 	rtnl_lock();
-	list_for_each_entry(net, &net_kill_list, cleanup_list)
+	llist_for_each_entry(net, net_kill_list, cleanup_list)
 		list_del_rcu(&net->list);
 	/* Cache last net. After we unlock rtnl, no one new net
 	 * added to net_namespace_list can assign nsid pointer
@@ -500,7 +523,7 @@ static void cleanup_net(struct work_struct *work)
 	last = list_last_entry(&net_namespace_list, struct net, list);
 	rtnl_unlock();
 
-	list_for_each_entry(net, &net_kill_list, cleanup_list) {
+	llist_for_each_entry(net, net_kill_list, cleanup_list) {
 		unhash_nsid(net, last);
 		list_add_tail(&net->exit_list, &net_exit_list);
 	}
@@ -520,7 +543,10 @@ static void cleanup_net(struct work_struct *work)
 	list_for_each_entry_reverse(ops, &pernet_list, list)
 		ops_free_list(ops, &net_exit_list);
 
-	mutex_unlock(&net_mutex);
+	if (write)
+		up_write(&net_sem);
+	else
+		up_read(&net_sem);
 
 	/* Ensure there are no outstanding rcu callbacks using this
 	 * network namespace.
@@ -547,8 +573,8 @@ static void cleanup_net(struct work_struct *work)
  */
 void net_ns_barrier(void)
 {
-	mutex_lock(&net_mutex);
-	mutex_unlock(&net_mutex);
+	down_write(&net_sem);
+	up_write(&net_sem);
 }
 EXPORT_SYMBOL(net_ns_barrier);
 
@@ -557,13 +583,8 @@ static DECLARE_WORK(net_cleanup_work, cleanup_net);
 void __put_net(struct net *net)
 {
 	/* Cleanup the network namespace in process context */
-	unsigned long flags;
-
-	spin_lock_irqsave(&cleanup_list_lock, flags);
-	list_add(&net->cleanup_list, &cleanup_list);
-	spin_unlock_irqrestore(&cleanup_list_lock, flags);
-
-	queue_work(netns_wq, &net_cleanup_work);
+	if (llist_add(&net->cleanup_list, &cleanup_list))
+		queue_work(netns_wq, &net_cleanup_work);
 }
 EXPORT_SYMBOL_GPL(__put_net);
 
@@ -633,6 +654,7 @@ static __net_exit void net_ns_net_exit(struct net *net)
 static struct pernet_operations __net_initdata net_ns_ops = {
 	.init = net_ns_net_init,
 	.exit = net_ns_net_exit,
+	.async = true,
 };
 
 static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
@@ -861,7 +883,7 @@ static int __init net_ns_init(void)
 #ifdef CONFIG_NET_NS
 	net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
 					SMP_CACHE_BYTES,
-					SLAB_PANIC, NULL);
+					SLAB_PANIC|SLAB_ACCOUNT, NULL);
 
 	/* Create workqueue for cleanup */
 	netns_wq = create_singlethread_workqueue("netns");
@@ -875,17 +897,12 @@ static int __init net_ns_init(void)
 
 	rcu_assign_pointer(init_net.gen, ng);
 
-	mutex_lock(&net_mutex);
+	down_write(&net_sem);
 	if (setup_net(&init_net, &init_user_ns))
 		panic("Could not setup the initial network namespace");
 
 	init_net_initialized = true;
-
-	rtnl_lock();
-	list_add_tail_rcu(&init_net.list, &net_namespace_list);
-	rtnl_unlock();
-
-	mutex_unlock(&net_mutex);
+	up_write(&net_sem);
 
 	register_pernet_subsys(&net_ns_ops);
 
@@ -989,6 +1006,9 @@ again:
 		rcu_barrier();
 		if (ops->id)
 			ida_remove(&net_generic_ids, *ops->id);
+	} else if (!ops->async) {
+		pr_info_once("Pernet operations %ps are sync.\n", ops);
+		nr_sync_pernet_ops++;
 	}
 
 	return error;
@@ -996,7 +1016,8 @@ again:
 
 static void unregister_pernet_operations(struct pernet_operations *ops)
 {
-	
+	if (!ops->async)
+		BUG_ON(nr_sync_pernet_ops-- == 0);
 	__unregister_pernet_operations(ops);
 	rcu_barrier();
 	if (ops->id)
@@ -1025,9 +1046,9 @@ static void unregister_pernet_operations(struct pernet_operations *ops)
 int register_pernet_subsys(struct pernet_operations *ops)
 {
 	int error;
-	mutex_lock(&net_mutex);
+	down_write(&net_sem);
 	error =  register_pernet_operations(first_device, ops);
-	mutex_unlock(&net_mutex);
+	up_write(&net_sem);
 	return error;
 }
 EXPORT_SYMBOL_GPL(register_pernet_subsys);
@@ -1043,9 +1064,9 @@ EXPORT_SYMBOL_GPL(register_pernet_subsys);
  */
 void unregister_pernet_subsys(struct pernet_operations *ops)
 {
-	mutex_lock(&net_mutex);
+	down_write(&net_sem);
 	unregister_pernet_operations(ops);
-	mutex_unlock(&net_mutex);
+	up_write(&net_sem);
 }
 EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
 
@@ -1071,11 +1092,11 @@ EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
 int register_pernet_device(struct pernet_operations *ops)
 {
 	int error;
-	mutex_lock(&net_mutex);
+	down_write(&net_sem);
 	error = register_pernet_operations(&pernet_list, ops);
 	if (!error && (first_device == &pernet_list))
 		first_device = &ops->list;
-	mutex_unlock(&net_mutex);
+	up_write(&net_sem);
 	return error;
 }
 EXPORT_SYMBOL_GPL(register_pernet_device);
@@ -1091,11 +1112,11 @@ EXPORT_SYMBOL_GPL(register_pernet_device);
  */
 void unregister_pernet_device(struct pernet_operations *ops)
 {
-	mutex_lock(&net_mutex);
+	down_write(&net_sem);
 	if (&ops->list == first_device)
 		first_device = first_device->next;
 	unregister_pernet_operations(ops);
-	mutex_unlock(&net_mutex);
+	up_write(&net_sem);
 }
 EXPORT_SYMBOL_GPL(unregister_pernet_device);
 
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index b8ab5c829511..545cf08cd558 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -906,13 +906,14 @@ static ssize_t pktgen_if_write(struct file *file,
 	i += len;
 
 	if (debug) {
-		size_t copy = min_t(size_t, count, 1023);
-		char tb[copy + 1];
-		if (copy_from_user(tb, user_buffer, copy))
-			return -EFAULT;
-		tb[copy] = 0;
-		pr_debug("%s,%lu  buffer -:%s:-\n",
-			 name, (unsigned long)count, tb);
+		size_t copy = min_t(size_t, count + 1, 1024);
+		char *tp = strndup_user(user_buffer, copy);
+
+		if (IS_ERR(tp))
+			return PTR_ERR(tp);
+
+		pr_debug("%s,%zu  buffer -:%s:-\n", name, count, tp);
+		kfree(tp);
 	}
 
 	if (!strcmp(name, "min_pkt_size")) {
@@ -3851,6 +3852,7 @@ static struct pernet_operations pg_net_ops = {
 	.exit = pg_net_exit,
 	.id   = &pg_net_id,
 	.size = sizeof(struct pktgen_net),
+	.async = true,
 };
 
 static int __init pg_init(void)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index bc290413a49d..87079eaa871b 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -75,6 +75,12 @@ void rtnl_lock(void)
 }
 EXPORT_SYMBOL(rtnl_lock);
 
+int rtnl_lock_killable(void)
+{
+	return mutex_lock_killable(&rtnl_mutex);
+}
+EXPORT_SYMBOL(rtnl_lock_killable);
+
 static struct sk_buff *defer_kfree_skb_list;
 void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail)
 {
@@ -454,11 +460,11 @@ static void rtnl_lock_unregistering_all(void)
 void rtnl_link_unregister(struct rtnl_link_ops *ops)
 {
 	/* Close the race with cleanup_net() */
-	mutex_lock(&net_mutex);
+	down_write(&net_sem);
 	rtnl_lock_unregistering_all();
 	__rtnl_link_unregister(ops);
 	rtnl_unlock();
-	mutex_unlock(&net_mutex);
+	up_write(&net_sem);
 }
 EXPORT_SYMBOL_GPL(rtnl_link_unregister);
 
@@ -4724,6 +4730,7 @@ static void __net_exit rtnetlink_net_exit(struct net *net)
 static struct pernet_operations rtnetlink_net_ops = {
 	.init = rtnetlink_net_init,
 	.exit = rtnetlink_net_exit,
+	.async = true,
 };
 
 void __init rtnetlink_init(void)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 1e7acdc30732..46cb22215ff4 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -77,8 +77,8 @@
 #include <linux/capability.h>
 #include <linux/user_namespace.h>
 
-struct kmem_cache *skbuff_head_cache __read_mostly;
-static struct kmem_cache *skbuff_fclone_cache __read_mostly;
+struct kmem_cache *skbuff_head_cache __ro_after_init;
+static struct kmem_cache *skbuff_fclone_cache __ro_after_init;
 int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
 EXPORT_SYMBOL(sysctl_max_skb_frags);
 
@@ -890,7 +890,7 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
 }
 EXPORT_SYMBOL_GPL(skb_morph);
 
-static int mm_account_pinned_pages(struct mmpin *mmp, size_t size)
+int mm_account_pinned_pages(struct mmpin *mmp, size_t size)
 {
 	unsigned long max_pg, num_pg, new_pg, old_pg;
 	struct user_struct *user;
@@ -919,14 +919,16 @@ static int mm_account_pinned_pages(struct mmpin *mmp, size_t size)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(mm_account_pinned_pages);
 
-static void mm_unaccount_pinned_pages(struct mmpin *mmp)
+void mm_unaccount_pinned_pages(struct mmpin *mmp)
 {
 	if (mmp->user) {
 		atomic_long_sub(mmp->num_pg, &mmp->user->locked_vm);
 		free_uid(mmp->user);
 	}
 }
+EXPORT_SYMBOL_GPL(mm_unaccount_pinned_pages);
 
 struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size)
 {
diff --git a/net/core/sock.c b/net/core/sock.c
index 85b0b64e7f9d..e689496dfd8a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1049,16 +1049,18 @@ set_rcvbuf:
 		break;
 
 	case SO_ZEROCOPY:
-		if (sk->sk_family != PF_INET && sk->sk_family != PF_INET6)
+		if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) {
+			if (sk->sk_protocol != IPPROTO_TCP)
+				ret = -ENOTSUPP;
+		} else if (sk->sk_family != PF_RDS) {
 			ret = -ENOTSUPP;
-		else if (sk->sk_protocol != IPPROTO_TCP)
-			ret = -ENOTSUPP;
-		else if (sk->sk_state != TCP_CLOSE)
-			ret = -EBUSY;
-		else if (val < 0 || val > 1)
-			ret = -EINVAL;
-		else
-			sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool);
+		}
+		if (!ret) {
+			if (val < 0 || val > 1)
+				ret = -EINVAL;
+			else
+				sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool);
+		}
 		break;
 
 	default:
@@ -1274,7 +1276,8 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 	{
 		char address[128];
 
-		if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
+		lv = sock->ops->getname(sock, (struct sockaddr *)address, 2);
+		if (lv < 0)
 			return -ENOTCONN;
 		if (lv < len)
 			return -EINVAL;
@@ -1773,7 +1776,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
 	u32 max_segs = 1;
 
 	sk_dst_set(sk, dst);
-	sk->sk_route_caps = dst->dev->features;
+	sk->sk_route_caps = dst->dev->features | sk->sk_route_forced_caps;
 	if (sk->sk_route_caps & NETIF_F_GSO)
 		sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
 	sk->sk_route_caps &= ~sk->sk_route_nocaps;
@@ -2234,6 +2237,67 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
 }
 EXPORT_SYMBOL(sk_page_frag_refill);
 
+int sk_alloc_sg(struct sock *sk, int len, struct scatterlist *sg,
+		int sg_start, int *sg_curr_index, unsigned int *sg_curr_size,
+		int first_coalesce)
+{
+	int sg_curr = *sg_curr_index, use = 0, rc = 0;
+	unsigned int size = *sg_curr_size;
+	struct page_frag *pfrag;
+	struct scatterlist *sge;
+
+	len -= size;
+	pfrag = sk_page_frag(sk);
+
+	while (len > 0) {
+		unsigned int orig_offset;
+
+		if (!sk_page_frag_refill(sk, pfrag)) {
+			rc = -ENOMEM;
+			goto out;
+		}
+
+		use = min_t(int, len, pfrag->size - pfrag->offset);
+
+		if (!sk_wmem_schedule(sk, use)) {
+			rc = -ENOMEM;
+			goto out;
+		}
+
+		sk_mem_charge(sk, use);
+		size += use;
+		orig_offset = pfrag->offset;
+		pfrag->offset += use;
+
+		sge = sg + sg_curr - 1;
+		if (sg_curr > first_coalesce && sg_page(sg) == pfrag->page &&
+		    sg->offset + sg->length == orig_offset) {
+			sg->length += use;
+		} else {
+			sge = sg + sg_curr;
+			sg_unmark_end(sge);
+			sg_set_page(sge, pfrag->page, use, orig_offset);
+			get_page(pfrag->page);
+			sg_curr++;
+
+			if (sg_curr == MAX_SKB_FRAGS)
+				sg_curr = 0;
+
+			if (sg_curr == sg_start) {
+				rc = -ENOSPC;
+				break;
+			}
+		}
+
+		len -= use;
+	}
+out:
+	*sg_curr_size = size;
+	*sg_curr_index = sg_curr;
+	return rc;
+}
+EXPORT_SYMBOL(sk_alloc_sg);
+
 static void __lock_sock(struct sock *sk)
 	__releases(&sk->sk_lock.slock)
 	__acquires(&sk->sk_lock.slock)
@@ -2497,7 +2561,7 @@ int sock_no_accept(struct socket *sock, struct socket *newsock, int flags,
 EXPORT_SYMBOL(sock_no_accept);
 
 int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
-		    int *len, int peer)
+		    int peer)
 {
 	return -EOPNOTSUPP;
 }
@@ -3111,6 +3175,7 @@ static void __net_exit sock_inuse_exit_net(struct net *net)
 static struct pernet_operations net_inuse_ops = {
 	.init = sock_inuse_init_net,
 	.exit = sock_inuse_exit_net,
+	.async = true,
 };
 
 static __init int net_inuse_init(void)
@@ -3405,6 +3470,7 @@ static __net_exit void proto_exit_net(struct net *net)
 static __net_initdata struct pernet_operations proto_net_ops = {
 	.init = proto_init_net,
 	.exit = proto_exit_net,
+	.async = true,
 };
 
 static int __init proto_init(void)
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index c37b5be7c5e4..a3392a8f9276 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -324,6 +324,7 @@ static void __net_exit diag_net_exit(struct net *net)
 static struct pernet_operations diag_net_ops = {
 	.init = diag_net_init,
 	.exit = diag_net_exit,
+	.async = true,
 };
 
 static int __init sock_diag_init(void)
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index f2d0462611c3..4f47f92459cc 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -32,6 +32,9 @@ static int max_skb_frags = MAX_SKB_FRAGS;
 
 static int net_msg_warn;	/* Unused, but still a sysctl */
 
+int sysctl_fb_tunnels_only_for_init_net __read_mostly = 0;
+EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net);
+
 #ifdef CONFIG_RPS
 static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
 				void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -513,6 +516,15 @@ static struct ctl_table net_core_table[] = {
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= &zero,
 	},
+	{
+		.procname	= "fb_tunnels_only_for_init_net",
+		.data		= &sysctl_fb_tunnels_only_for_init_net,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
 	{ }
 };
 
@@ -572,6 +584,7 @@ static __net_exit void sysctl_core_net_exit(struct net *net)
 static __net_initdata struct pernet_operations sysctl_core_ops = {
 	.init = sysctl_core_net_init,
 	.exit = sysctl_core_net_exit,
+	.async = true,
 };
 
 static __init int sysctl_core_init(void)
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index e65fcb45c3f6..13ad28ab1e79 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -1031,6 +1031,7 @@ static struct pernet_operations dccp_v4_ops = {
 	.init	= dccp_v4_init_net,
 	.exit	= dccp_v4_exit_net,
 	.exit_batch = dccp_v4_exit_batch,
+	.async	= true,
 };
 
 static int __init dccp_v4_init(void)
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 5df7857fc0f3..2f48c020f8c3 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -1116,6 +1116,7 @@ static struct pernet_operations dccp_v6_ops = {
 	.init   = dccp_v6_init_net,
 	.exit   = dccp_v6_exit_net,
 	.exit_batch = dccp_v6_exit_batch,
+	.async	= true,
 };
 
 static int __init dccp_v6_init(void)
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 791aff68af88..2ee8306c23e3 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1180,14 +1180,12 @@ static int dn_accept(struct socket *sock, struct socket *newsock, int flags,
 }
 
 
-static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int *uaddr_len,int peer)
+static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int peer)
 {
 	struct sockaddr_dn *sa = (struct sockaddr_dn *)uaddr;
 	struct sock *sk = sock->sk;
 	struct dn_scp *scp = DN_SK(sk);
 
-	*uaddr_len = sizeof(struct sockaddr_dn);
-
 	lock_sock(sk);
 
 	if (peer) {
@@ -1205,7 +1203,7 @@ static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int *uaddr_len
 
 	release_sock(sk);
 
-	return 0;
+	return sizeof(struct sockaddr_dn);
 }
 
 
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 6a9d0f50fbee..e63c554e0623 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -23,6 +23,7 @@
 #include <linux/netdevice.h>
 #include <linux/sysfs.h>
 #include <linux/phy_fixed.h>
+#include <linux/ptp_classify.h>
 #include <linux/gpio/consumer.h>
 #include <linux/etherdevice.h>
 
@@ -122,6 +123,38 @@ struct net_device *dsa_dev_to_net_device(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(dsa_dev_to_net_device);
 
+/* Determine if we should defer delivery of skb until we have a rx timestamp.
+ *
+ * Called from dsa_switch_rcv. For now, this will only work if tagging is
+ * enabled on the switch. Normally the MAC driver would retrieve the hardware
+ * timestamp when it reads the packet out of the hardware. However in a DSA
+ * switch, the DSA driver owning the interface to which the packet is
+ * delivered is never notified unless we do so here.
+ */
+static bool dsa_skb_defer_rx_timestamp(struct dsa_slave_priv *p,
+				       struct sk_buff *skb)
+{
+	struct dsa_switch *ds = p->dp->ds;
+	unsigned int type;
+
+	if (skb_headroom(skb) < ETH_HLEN)
+		return false;
+
+	__skb_push(skb, ETH_HLEN);
+
+	type = ptp_classify_raw(skb);
+
+	__skb_pull(skb, ETH_HLEN);
+
+	if (type == PTP_CLASS_NONE)
+		return false;
+
+	if (likely(ds->ops->port_rxtstamp))
+		return ds->ops->port_rxtstamp(ds, p->dp->index, skb, type);
+
+	return false;
+}
+
 static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
 			  struct packet_type *pt, struct net_device *unused)
 {
@@ -157,6 +190,9 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
 	s->rx_bytes += skb->len;
 	u64_stats_update_end(&s->syncp);
 
+	if (dsa_skb_defer_rx_timestamp(p, skb))
+		return 0;
+
 	netif_receive_skb(skb);
 
 	return 0;
diff --git a/net/dsa/master.c b/net/dsa/master.c
index 00589147f042..90e6df0351eb 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -42,7 +42,7 @@ static int dsa_master_get_sset_count(struct net_device *dev, int sset)
 		count += ops->get_sset_count(dev, sset);
 
 	if (sset == ETH_SS_STATS && ds->ops->get_sset_count)
-		count += ds->ops->get_sset_count(ds);
+		count += ds->ops->get_sset_count(ds, cpu_dp->index);
 
 	return count;
 }
@@ -76,7 +76,7 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset,
 		 * constructed earlier
 		 */
 		ds->ops->get_strings(ds, port, ndata);
-		count = ds->ops->get_sset_count(ds);
+		count = ds->ops->get_sset_count(ds, port);
 		for (i = 0; i < count; i++) {
 			memmove(ndata + (i * len + sizeof(pfx)),
 				ndata + i * len, len - sizeof(pfx));
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index f52307296de4..18561af7a8f1 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -21,6 +21,7 @@
 #include <net/tc_act/tc_mirred.h>
 #include <linux/if_bridge.h>
 #include <linux/netpoll.h>
+#include <linux/ptp_classify.h>
 
 #include "dsa_priv.h"
 
@@ -255,6 +256,22 @@ dsa_slave_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
 
 static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_switch *ds = p->dp->ds;
+	int port = p->dp->index;
+
+	/* Pass through to switch driver if it supports timestamping */
+	switch (cmd) {
+	case SIOCGHWTSTAMP:
+		if (ds->ops->port_hwtstamp_get)
+			return ds->ops->port_hwtstamp_get(ds, port, ifr);
+		break;
+	case SIOCSHWTSTAMP:
+		if (ds->ops->port_hwtstamp_set)
+			return ds->ops->port_hwtstamp_set(ds, port, ifr);
+		break;
+	}
+
 	if (!dev->phydev)
 		return -ENODEV;
 
@@ -385,6 +402,30 @@ static inline netdev_tx_t dsa_slave_netpoll_send_skb(struct net_device *dev,
 	return NETDEV_TX_OK;
 }
 
+static void dsa_skb_tx_timestamp(struct dsa_slave_priv *p,
+				 struct sk_buff *skb)
+{
+	struct dsa_switch *ds = p->dp->ds;
+	struct sk_buff *clone;
+	unsigned int type;
+
+	type = ptp_classify_raw(skb);
+	if (type == PTP_CLASS_NONE)
+		return;
+
+	if (!ds->ops->port_txtstamp)
+		return;
+
+	clone = skb_clone_sk(skb);
+	if (!clone)
+		return;
+
+	if (ds->ops->port_txtstamp(ds, p->dp->index, clone, type))
+		return;
+
+	kfree_skb(clone);
+}
+
 static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
@@ -397,6 +438,11 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
 	s->tx_bytes += skb->len;
 	u64_stats_update_end(&s->syncp);
 
+	/* Identify PTP protocol packets, clone them, and pass them to the
+	 * switch driver
+	 */
+	dsa_skb_tx_timestamp(p, skb);
+
 	/* Transmit function may have to reallocate the original SKB,
 	 * in which case it must have freed it. Only free it here on error.
 	 */
@@ -559,7 +605,7 @@ static int dsa_slave_get_sset_count(struct net_device *dev, int sset)
 
 		count = 4;
 		if (ds->ops->get_sset_count)
-			count += ds->ops->get_sset_count(ds);
+			count += ds->ops->get_sset_count(ds, dp->index);
 
 		return count;
 	}
@@ -918,6 +964,18 @@ static int dsa_slave_set_rxnfc(struct net_device *dev,
 	return ds->ops->set_rxnfc(ds, dp->index, nfc);
 }
 
+static int dsa_slave_get_ts_info(struct net_device *dev,
+				 struct ethtool_ts_info *ts)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_switch *ds = p->dp->ds;
+
+	if (!ds->ops->get_ts_info)
+		return -EOPNOTSUPP;
+
+	return ds->ops->get_ts_info(ds, p->dp->index, ts);
+}
+
 static const struct ethtool_ops dsa_slave_ethtool_ops = {
 	.get_drvinfo		= dsa_slave_get_drvinfo,
 	.get_regs_len		= dsa_slave_get_regs_len,
@@ -938,6 +996,7 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
 	.set_link_ksettings	= phy_ethtool_set_link_ksettings,
 	.get_rxnfc		= dsa_slave_get_rxnfc,
 	.set_rxnfc		= dsa_slave_set_rxnfc,
+	.get_ts_info		= dsa_slave_get_ts_info,
 };
 
 /* legacy way, bypassing the bridge *****************************************/
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index e9f0489e4229..275449b0d633 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -104,6 +104,7 @@ static void lowpan_setup(struct net_device *ldev)
 	/* We need an ipv6hdr as minimum len when calling xmit */
 	ldev->hard_header_len	= sizeof(struct ipv6hdr);
 	ldev->flags		= IFF_BROADCAST | IFF_MULTICAST;
+	ldev->priv_flags	|= IFF_NO_QUEUE;
 
 	ldev->netdev_ops	= &lowpan_netdev_ops;
 	ldev->header_ops	= &lowpan_header_ops;
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index 85bf86ad6b18..a9ccb1322f69 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -603,6 +603,7 @@ static void __net_exit lowpan_frags_exit_net(struct net *net)
 static struct pernet_operations lowpan_frags_ops = {
 	.init = lowpan_frags_init_net,
 	.exit = lowpan_frags_exit_net,
+	.async = true,
 };
 
 int __init lowpan_net_frag_init(void)
diff --git a/net/ieee802154/core.c b/net/ieee802154/core.c
index cb7176cd4cd6..9104943c15ba 100644
--- a/net/ieee802154/core.c
+++ b/net/ieee802154/core.c
@@ -345,6 +345,7 @@ static void __net_exit cfg802154_pernet_exit(struct net *net)
 
 static struct pernet_operations cfg802154_pernet_ops = {
 	.exit = cfg802154_pernet_exit,
+	.async = true,
 };
 
 static int __init wpan_phy_class_init(void)
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index f48fe6fc7e8c..80dad301361d 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -212,9 +212,14 @@ config NET_IPGRE_BROADCAST
 	  Network), but can be distributed all over the Internet. If you want
 	  to do that, say Y here and to "IP multicast routing" below.
 
+config IP_MROUTE_COMMON
+	bool
+	depends on IP_MROUTE || IPV6_MROUTE
+
 config IP_MROUTE
 	bool "IP: multicast routing"
 	depends on IP_MULTICAST
+	select IP_MROUTE_COMMON
 	help
 	  This is used if you want your machine to act as a router for IP
 	  packets that have several destination addresses. It is needed on the
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 47a0a6649a9d..a07b7dd06def 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
 obj-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
 obj-$(CONFIG_IP_MROUTE) += ipmr.o
+obj-$(CONFIG_IP_MROUTE_COMMON) += ipmr_base.o
 obj-$(CONFIG_NET_IPIP) += ipip.o
 gre-y := gre_demux.o
 obj-$(CONFIG_NET_FOU) += fou.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index e4329e161943..e8c7fad8c329 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -723,7 +723,7 @@ EXPORT_SYMBOL(inet_accept);
  *	This does both peername and sockname.
  */
 int inet_getname(struct socket *sock, struct sockaddr *uaddr,
-			int *uaddr_len, int peer)
+			int peer)
 {
 	struct sock *sk		= sock->sk;
 	struct inet_sock *inet	= inet_sk(sk);
@@ -745,8 +745,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
 		sin->sin_addr.s_addr = addr;
 	}
 	memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
-	*uaddr_len = sizeof(*sin);
-	return 0;
+	return sizeof(*sin);
 }
 EXPORT_SYMBOL(inet_getname);
 
@@ -1736,6 +1735,7 @@ static __net_exit void ipv4_mib_exit_net(struct net *net)
 static __net_initdata struct pernet_operations ipv4_mib_ops = {
 	.init = ipv4_mib_init_net,
 	.exit = ipv4_mib_exit_net,
+	.async = true,
 };
 
 static int __init init_ipv4_mibs(void)
@@ -1789,6 +1789,7 @@ static __net_exit void inet_exit_net(struct net *net)
 static __net_initdata struct pernet_operations af_inet_ops = {
 	.init = inet_init_net,
 	.exit = inet_exit_net,
+	.async = true,
 };
 
 static int __init init_inet_pernet_ops(void)
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index f28f06c91ead..7dc9de8444a9 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1447,6 +1447,7 @@ static void __net_exit arp_net_exit(struct net *net)
 static struct pernet_operations arp_net_ops = {
 	.init = arp_net_init,
 	.exit = arp_net_exit,
+	.async = true,
 };
 
 static int __init arp_proc_init(void)
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 40f001782c1b..5ae0d1f097ca 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -2469,6 +2469,7 @@ static __net_exit void devinet_exit_net(struct net *net)
 static __net_initdata struct pernet_operations devinet_ops = {
 	.init = devinet_init_net,
 	.exit = devinet_exit_net,
+	.async = true,
 };
 
 static struct rtnl_af_ops inet_af_ops __read_mostly = {
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index f05afaf3235c..ac71c3d496c0 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1362,6 +1362,7 @@ static void __net_exit fib_net_exit(struct net *net)
 static struct pernet_operations fib_net_ops = {
 	.init = fib_net_init,
 	.exit = fib_net_exit,
+	.async = true,
 };
 
 void __init ip_fib_init(void)
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 35d646a62ad4..737d11bc8838 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -182,6 +182,17 @@ static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
 	if (r->tos && (r->tos != fl4->flowi4_tos))
 		return 0;
 
+	if (rule->ip_proto && (rule->ip_proto != fl4->flowi4_proto))
+		return 0;
+
+	if (fib_rule_port_range_set(&rule->sport_range) &&
+	    !fib_rule_port_inrange(&rule->sport_range, fl4->fl4_sport))
+		return 0;
+
+	if (fib_rule_port_range_set(&rule->dport_range) &&
+	    !fib_rule_port_inrange(&rule->dport_range, fl4->fl4_dport))
+		return 0;
+
 	return 1;
 }
 
@@ -244,6 +255,9 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 	}
 #endif
 
+	if (fib_rule_requires_fldissect(rule))
+		net->ipv4.fib_rules_require_fldissect++;
+
 	rule4->src_len = frh->src_len;
 	rule4->srcmask = inet_make_mask(rule4->src_len);
 	rule4->dst_len = frh->dst_len;
@@ -272,6 +286,10 @@ static int fib4_rule_delete(struct fib_rule *rule)
 		net->ipv4.fib_num_tclassid_users--;
 #endif
 	net->ipv4.fib_has_custom_rules = true;
+
+	if (net->ipv4.fib_rules_require_fldissect &&
+	    fib_rule_requires_fldissect(rule))
+		net->ipv4.fib_rules_require_fldissect--;
 errout:
 	return err;
 }
@@ -389,6 +407,7 @@ int __net_init fib4_rules_init(struct net *net)
 		goto fail;
 	net->ipv4.rules_ops = ops;
 	net->ipv4.fib_has_custom_rules = false;
+	net->ipv4.fib_rules_require_fldissect = 0;
 	return 0;
 
 fail:
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 7d36a950d961..e7c602c600ac 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -171,7 +171,7 @@ static void free_nh_exceptions(struct fib_nh *nh)
 		fnhe = rcu_dereference_protected(hash[i].chain, 1);
 		while (fnhe) {
 			struct fib_nh_exception *next;
-			
+
 			next = rcu_dereference_protected(fnhe->fnhe_next, 1);
 
 			rt_fibinfo_free(&fnhe->fnhe_rth_input);
@@ -1765,14 +1765,12 @@ void fib_select_multipath(struct fib_result *res, int hash)
 void fib_select_path(struct net *net, struct fib_result *res,
 		     struct flowi4 *fl4, const struct sk_buff *skb)
 {
-	bool oif_check;
-
-	oif_check = (fl4->flowi4_oif == 0 ||
-		     fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF);
+	if (fl4->flowi4_oif && !(fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF))
+		goto check_saddr;
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-	if (res->fi->fib_nhs > 1 && oif_check) {
-		int h = fib_multipath_hash(res->fi, fl4, skb);
+	if (res->fi->fib_nhs > 1) {
+		int h = fib_multipath_hash(net, fl4, skb, NULL);
 
 		fib_select_multipath(res, h);
 	}
@@ -1780,10 +1778,10 @@ void fib_select_path(struct net *net, struct fib_result *res,
 #endif
 	if (!res->prefixlen &&
 	    res->table->tb_num_default > 1 &&
-	    res->type == RTN_UNICAST && oif_check)
+	    res->type == RTN_UNICAST)
 		fib_select_default(fl4, res);
 
+check_saddr:
 	if (!fl4->saddr)
 		fl4->saddr = FIB_RES_PREFSRC(net, *res);
 }
-EXPORT_SYMBOL_GPL(fib_select_path);
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 5530cd6fdbc7..62243a8abf92 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -50,6 +50,7 @@
 
 #define VERSION "0.409"
 
+#include <linux/cache.h>
 #include <linux/uaccess.h>
 #include <linux/bitops.h>
 #include <linux/types.h>
@@ -191,8 +192,8 @@ static size_t tnode_free_size;
  */
 static const int sync_pages = 128;
 
-static struct kmem_cache *fn_alias_kmem __read_mostly;
-static struct kmem_cache *trie_leaf_kmem __read_mostly;
+static struct kmem_cache *fn_alias_kmem __ro_after_init;
+static struct kmem_cache *trie_leaf_kmem __ro_after_init;
 
 static inline struct tnode *tn_info(struct key_vector *kv)
 {
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 1540db65241a..d3e1a9af478b 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -1081,6 +1081,7 @@ static struct pernet_operations fou_net_ops = {
 	.exit = fou_exit_net,
 	.id   = &fou_net_id,
 	.size = sizeof(struct fou_net),
+	.async = true,
 };
 
 static int __init fou_init(void)
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 1617604c9284..cc56efa64d5c 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -1257,6 +1257,7 @@ fail:
 static struct pernet_operations __net_initdata icmp_sk_ops = {
        .init = icmp_sk_init,
        .exit = icmp_sk_exit,
+       .async = true,
 };
 
 int __init icmp_init(void)
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index f2402581fef1..c2743763777e 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -3028,6 +3028,7 @@ static void __net_exit igmp_net_exit(struct net *net)
 static struct pernet_operations igmp_net_ops = {
 	.init = igmp_net_init,
 	.exit = igmp_net_exit,
+	.async = true,
 };
 #endif
 
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 914d56928578..1f04bd91fc2e 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -6,6 +6,7 @@
  *  Authors:	Andrey V. Savochkin <saw@msu.ru>
  */
 
+#include <linux/cache.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/slab.h>
@@ -51,7 +52,7 @@
  *		daddr: unchangeable
  */
 
-static struct kmem_cache *peer_cachep __read_mostly;
+static struct kmem_cache *peer_cachep __ro_after_init;
 
 void inet_peer_base_init(struct inet_peer_base *bp)
 {
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index bbf1b94942c0..5e843ae5e468 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -885,6 +885,7 @@ static void __net_exit ipv4_frags_exit_net(struct net *net)
 static struct pernet_operations ip4_frags_ops = {
 	.init = ipv4_frags_init_net,
 	.exit = ipv4_frags_exit_net,
+	.async = true,
 };
 
 void __init ipfrag_init(void)
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 0901de42ed85..9ab1aa2f7660 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -522,6 +522,7 @@ err_free_skb:
 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
 			__be16 proto)
 {
+	struct ip_tunnel *tunnel = netdev_priv(dev);
 	struct ip_tunnel_info *tun_info;
 	const struct ip_tunnel_key *key;
 	struct rtable *rt = NULL;
@@ -545,9 +546,11 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
 	if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
 		goto err_free_rt;
 
-	flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
+	flags = tun_info->key.tun_flags &
+		(TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
 	gre_build_header(skb, tunnel_hlen, flags, proto,
-			 tunnel_id_to_key32(tun_info->key.tun_id), 0);
+			 tunnel_id_to_key32(tun_info->key.tun_id),
+			 (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);
 
 	df = key->tun_flags & TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
 
@@ -1041,6 +1044,7 @@ static struct pernet_operations ipgre_net_ops = {
 	.exit_batch = ipgre_exit_batch_net,
 	.id   = &ipgre_net_id,
 	.size = sizeof(struct ip_tunnel_net),
+	.async = true,
 };
 
 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -1317,6 +1321,12 @@ static void ipgre_tap_setup(struct net_device *dev)
 	ip_tunnel_setup(dev, gre_tap_net_id);
 }
 
+bool is_gretap_dev(const struct net_device *dev)
+{
+	return dev->netdev_ops == &gre_tap_netdev_ops;
+}
+EXPORT_SYMBOL_GPL(is_gretap_dev);
+
 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
 			 struct nlattr *tb[], struct nlattr *data[],
 			 struct netlink_ext_ack *extack)
@@ -1618,6 +1628,7 @@ static struct pernet_operations ipgre_tap_net_ops = {
 	.exit_batch = ipgre_tap_exit_batch_net,
 	.id   = &gre_tap_net_id,
 	.size = sizeof(struct ip_tunnel_net),
+	.async = true,
 };
 
 static int __net_init erspan_init_net(struct net *net)
@@ -1636,6 +1647,7 @@ static struct pernet_operations erspan_net_ops = {
 	.exit_batch = erspan_exit_batch_net,
 	.id   = &erspan_net_id,
 	.size = sizeof(struct ip_tunnel_net),
+	.async = true,
 };
 
 static int __init ipgre_init(void)
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 57fc13c6ab2b..7582713dd18f 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -159,7 +159,7 @@ bool ip_call_ra_chain(struct sk_buff *skb)
 	struct net_device *dev = skb->dev;
 	struct net *net = dev_net(dev);
 
-	for (ra = rcu_dereference(ip_ra_chain); ra; ra = rcu_dereference(ra->next)) {
+	for (ra = rcu_dereference(net->ipv4.ra_chain); ra; ra = rcu_dereference(ra->next)) {
 		struct sock *sk = ra->sk;
 
 		/* If socket is bound to an interface, only report
@@ -167,8 +167,7 @@ bool ip_call_ra_chain(struct sk_buff *skb)
 		 */
 		if (sk && inet_sk(sk)->inet_num == protocol &&
 		    (!sk->sk_bound_dev_if ||
-		     sk->sk_bound_dev_if == dev->ifindex) &&
-		    net_eq(sock_net(sk), net)) {
+		     sk->sk_bound_dev_if == dev->ifindex)) {
 			if (ip_is_fragment(ip_hdr(skb))) {
 				if (ip_defrag(net, skb, IP_DEFRAG_CALL_RA_CHAIN))
 					return true;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 74c962b9b09c..5ad2d8ed3a3f 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -322,20 +322,6 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
 	return 0;
 }
 
-
-/* Special input handler for packets caught by router alert option.
-   They are selected only by protocol field, and then processed likely
-   local ones; but only if someone wants them! Otherwise, router
-   not running rsvpd will kill RSVP.
-
-   It is user level problem, what it will make with them.
-   I have no idea, how it will masquearde or NAT them (it is joke, joke :-)),
-   but receiver should be enough clever f.e. to forward mtrace requests,
-   sent to multicast group to reach destination designated router.
- */
-struct ip_ra_chain __rcu *ip_ra_chain;
-
-
 static void ip_ra_destroy_rcu(struct rcu_head *head)
 {
 	struct ip_ra_chain *ra = container_of(head, struct ip_ra_chain, rcu);
@@ -349,23 +335,28 @@ int ip_ra_control(struct sock *sk, unsigned char on,
 {
 	struct ip_ra_chain *ra, *new_ra;
 	struct ip_ra_chain __rcu **rap;
+	struct net *net = sock_net(sk);
 
 	if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW)
 		return -EINVAL;
 
 	new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
 
-	for (rap = &ip_ra_chain;
-	     (ra = rtnl_dereference(*rap)) != NULL;
+	mutex_lock(&net->ipv4.ra_mutex);
+	for (rap = &net->ipv4.ra_chain;
+	     (ra = rcu_dereference_protected(*rap,
+			lockdep_is_held(&net->ipv4.ra_mutex))) != NULL;
 	     rap = &ra->next) {
 		if (ra->sk == sk) {
 			if (on) {
+				mutex_unlock(&net->ipv4.ra_mutex);
 				kfree(new_ra);
 				return -EADDRINUSE;
 			}
 			/* dont let ip_call_ra_chain() use sk again */
 			ra->sk = NULL;
 			RCU_INIT_POINTER(*rap, ra->next);
+			mutex_unlock(&net->ipv4.ra_mutex);
 
 			if (ra->destructor)
 				ra->destructor(sk);
@@ -379,14 +370,17 @@ int ip_ra_control(struct sock *sk, unsigned char on,
 			return 0;
 		}
 	}
-	if (!new_ra)
+	if (!new_ra) {
+		mutex_unlock(&net->ipv4.ra_mutex);
 		return -ENOBUFS;
+	}
 	new_ra->sk = sk;
 	new_ra->destructor = destructor;
 
 	RCU_INIT_POINTER(new_ra->next, ra);
 	rcu_assign_pointer(*rap, new_ra);
 	sock_hold(sk);
+	mutex_unlock(&net->ipv4.ra_mutex);
 
 	return 0;
 }
@@ -586,7 +580,6 @@ static bool setsockopt_needs_rtnl(int optname)
 	case MCAST_LEAVE_GROUP:
 	case MCAST_LEAVE_SOURCE_GROUP:
 	case MCAST_UNBLOCK_SOURCE:
-	case IP_ROUTER_ALERT:
 		return true;
 	}
 	return false;
@@ -639,6 +632,8 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 
 	/* If optlen==0, it is equivalent to val == 0 */
 
+	if (optname == IP_ROUTER_ALERT)
+		return ip_ra_control(sk, val ? 1 : 0, NULL);
 	if (ip_mroute_opt(optname))
 		return ip_mroute_setsockopt(sk, optname, optval, optlen);
 
@@ -1149,9 +1144,6 @@ mc_msf_out:
 			goto e_inval;
 		inet->mc_all = val;
 		break;
-	case IP_ROUTER_ALERT:
-		err = ip_ra_control(sk, val ? 1 : 0, NULL);
-		break;
 
 	case IP_FREEBIND:
 		if (optlen < 1)
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 6d21068f9b55..5fcb17cb426b 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -290,22 +290,6 @@ failed:
 	return ERR_PTR(err);
 }
 
-static inline void init_tunnel_flow(struct flowi4 *fl4,
-				    int proto,
-				    __be32 daddr, __be32 saddr,
-				    __be32 key, __u8 tos, int oif,
-				    __u32 mark)
-{
-	memset(fl4, 0, sizeof(*fl4));
-	fl4->flowi4_oif = oif;
-	fl4->daddr = daddr;
-	fl4->saddr = saddr;
-	fl4->flowi4_tos = tos;
-	fl4->flowi4_proto = proto;
-	fl4->fl4_gre_key = key;
-	fl4->flowi4_mark = mark;
-}
-
 static int ip_tunnel_bind_dev(struct net_device *dev)
 {
 	struct net_device *tdev = NULL;
@@ -322,10 +306,10 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
 		struct flowi4 fl4;
 		struct rtable *rt;
 
-		init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
-				 iph->saddr, tunnel->parms.o_key,
-				 RT_TOS(iph->tos), tunnel->parms.link,
-				 tunnel->fwmark);
+		ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
+				    iph->saddr, tunnel->parms.o_key,
+				    RT_TOS(iph->tos), tunnel->parms.link,
+				    tunnel->fwmark);
 		rt = ip_route_output_key(tunnel->net, &fl4);
 
 		if (!IS_ERR(rt)) {
@@ -363,8 +347,7 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net,
 	struct net_device *dev;
 	int t_hlen;
 
-	BUG_ON(!itn->fb_tunnel_dev);
-	dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
+	dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms);
 	if (IS_ERR(dev))
 		return ERR_CAST(dev);
 
@@ -581,8 +564,8 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
 		else if (skb->protocol == htons(ETH_P_IPV6))
 			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
 	}
-	init_tunnel_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0,
-			 RT_TOS(tos), tunnel->parms.link, tunnel->fwmark);
+	ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0,
+			    RT_TOS(tos), tunnel->parms.link, tunnel->fwmark);
 	if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
 		goto tx_error;
 	rt = ip_route_output_key(tunnel->net, &fl4);
@@ -710,9 +693,9 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 		}
 	}
 
-	init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
-			 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
-			 tunnel->fwmark);
+	ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
+			    tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
+			    tunnel->fwmark);
 
 	if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
 		goto tx_error;
@@ -838,7 +821,6 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
 	struct net *net = t->net;
 	struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
 
-	BUG_ON(!itn->fb_tunnel_dev);
 	switch (cmd) {
 	case SIOCGETTUNNEL:
 		if (dev == itn->fb_tunnel_dev) {
@@ -863,7 +845,7 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
 				p->o_key = 0;
 		}
 
-		t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
+		t = ip_tunnel_find(itn, p, itn->type);
 
 		if (cmd == SIOCADDTUNNEL) {
 			if (!t) {
@@ -1007,10 +989,15 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
 	struct ip_tunnel_parm parms;
 	unsigned int i;
 
+	itn->rtnl_link_ops = ops;
 	for (i = 0; i < IP_TNL_HASH_SIZE; i++)
 		INIT_HLIST_HEAD(&itn->tunnels[i]);
 
-	if (!ops) {
+	if (!ops || !net_has_fallback_tunnels(net)) {
+		struct ip_tunnel_net *it_init_net;
+
+		it_init_net = net_generic(&init_net, ip_tnl_net_id);
+		itn->type = it_init_net->type;
 		itn->fb_tunnel_dev = NULL;
 		return 0;
 	}
@@ -1028,6 +1015,7 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
 		itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
 		itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
 		ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
+		itn->type = itn->fb_tunnel_dev->type;
 	}
 	rtnl_unlock();
 
@@ -1035,10 +1023,10 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
 }
 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
 
-static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
+static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
+			      struct list_head *head,
 			      struct rtnl_link_ops *ops)
 {
-	struct net *net = dev_net(itn->fb_tunnel_dev);
 	struct net_device *dev, *aux;
 	int h;
 
@@ -1070,7 +1058,7 @@ void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
 	rtnl_lock();
 	list_for_each_entry(net, net_list, exit_list) {
 		itn = net_generic(net, id);
-		ip_tunnel_destroy(itn, &list, ops);
+		ip_tunnel_destroy(net, itn, &list, ops);
 	}
 	unregister_netdevice_many(&list);
 	rtnl_unlock();
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 51b1669334fe..b10bf563afd9 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -454,6 +454,7 @@ static struct pernet_operations vti_net_ops = {
 	.exit_batch = vti_exit_batch_net,
 	.id   = &vti_net_id,
 	.size = sizeof(struct ip_tunnel_net),
+	.async = true,
 };
 
 static int vti_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index c891235b4966..9c5a4d164f09 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -669,6 +669,7 @@ static struct pernet_operations ipip_net_ops = {
 	.exit_batch = ipip_exit_batch_net,
 	.id   = &ipip_net_id,
 	.size = sizeof(struct ip_tunnel_net),
+	.async = true,
 };
 
 static int __init ipip_init(void)
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index b05689bbba31..f6be5db16da2 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -28,9 +28,9 @@
 
 #include <linux/uaccess.h>
 #include <linux/types.h>
+#include <linux/cache.h>
 #include <linux/capability.h>
 #include <linux/errno.h>
-#include <linux/timer.h>
 #include <linux/mm.h>
 #include <linux/kernel.h>
 #include <linux/fcntl.h>
@@ -52,7 +52,6 @@
 #include <net/protocol.h>
 #include <linux/skbuff.h>
 #include <net/route.h>
-#include <net/sock.h>
 #include <net/icmp.h>
 #include <net/udp.h>
 #include <net/raw.h>
@@ -96,7 +95,7 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
  * In this case data path is free of exclusive locks at all.
  */
 
-static struct kmem_cache *mrt_cachep __read_mostly;
+static struct kmem_cache *mrt_cachep __ro_after_init;
 
 static struct mr_table *ipmr_new_table(struct net *net, u32 id);
 static void ipmr_free_table(struct mr_table *mrt);
@@ -106,8 +105,6 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
 			  struct mfc_cache *cache, int local);
 static int ipmr_cache_report(struct mr_table *mrt,
 			     struct sk_buff *pkt, vifi_t vifi, int assert);
-static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
-			      struct mfc_cache *c, struct rtmsg *rtm);
 static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
 				 int cmd);
 static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
@@ -118,6 +115,23 @@ static void ipmr_expire_process(struct timer_list *t);
 #define ipmr_for_each_table(mrt, net) \
 	list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
 
+static struct mr_table *ipmr_mr_table_iter(struct net *net,
+					   struct mr_table *mrt)
+{
+	struct mr_table *ret;
+
+	if (!mrt)
+		ret = list_entry_rcu(net->ipv4.mr_tables.next,
+				     struct mr_table, list);
+	else
+		ret = list_entry_rcu(mrt->list.next,
+				     struct mr_table, list);
+
+	if (&ret->list == &net->ipv4.mr_tables)
+		return NULL;
+	return ret;
+}
+
 static struct mr_table *ipmr_get_table(struct net *net, u32 id)
 {
 	struct mr_table *mrt;
@@ -285,6 +299,14 @@ EXPORT_SYMBOL(ipmr_rule_default);
 #define ipmr_for_each_table(mrt, net) \
 	for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
 
+static struct mr_table *ipmr_mr_table_iter(struct net *net,
+					   struct mr_table *mrt)
+{
+	if (!mrt)
+		return net->ipv4.mrt;
+	return NULL;
+}
+
 static struct mr_table *ipmr_get_table(struct net *net, u32 id)
 {
 	return net->ipv4.mrt;
@@ -344,7 +366,7 @@ static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg,
 }
 
 static const struct rhashtable_params ipmr_rht_params = {
-	.head_offset = offsetof(struct mfc_cache, mnode),
+	.head_offset = offsetof(struct mr_mfc, mnode),
 	.key_offset = offsetof(struct mfc_cache, cmparg),
 	.key_len = sizeof(struct mfc_cache_cmp_arg),
 	.nelem_hint = 3,
@@ -353,6 +375,24 @@ static const struct rhashtable_params ipmr_rht_params = {
 	.automatic_shrinking = true,
 };
 
+static void ipmr_new_table_set(struct mr_table *mrt,
+			       struct net *net)
+{
+#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
+	list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
+#endif
+}
+
+static struct mfc_cache_cmp_arg ipmr_mr_table_ops_cmparg_any = {
+	.mfc_mcastgrp = htonl(INADDR_ANY),
+	.mfc_origin = htonl(INADDR_ANY),
+};
+
+static struct mr_table_ops ipmr_mr_table_ops = {
+	.rht_params = &ipmr_rht_params,
+	.cmparg_any = &ipmr_mr_table_ops_cmparg_any,
+};
+
 static struct mr_table *ipmr_new_table(struct net *net, u32 id)
 {
 	struct mr_table *mrt;
@@ -365,23 +405,8 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
 	if (mrt)
 		return mrt;
 
-	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
-	if (!mrt)
-		return ERR_PTR(-ENOMEM);
-	write_pnet(&mrt->net, net);
-	mrt->id = id;
-
-	rhltable_init(&mrt->mfc_hash, &ipmr_rht_params);
-	INIT_LIST_HEAD(&mrt->mfc_cache_list);
-	INIT_LIST_HEAD(&mrt->mfc_unres_queue);
-
-	timer_setup(&mrt->ipmr_expire_timer, ipmr_expire_process, 0);
-
-	mrt->mroute_reg_vif_num = -1;
-#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
-	list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
-#endif
-	return mrt;
+	return mr_table_alloc(net, id, &ipmr_mr_table_ops,
+			      ipmr_expire_process, ipmr_new_table_set);
 }
 
 static void ipmr_free_table(struct mr_table *mrt)
@@ -760,14 +785,14 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
 
 static void ipmr_cache_free_rcu(struct rcu_head *head)
 {
-	struct mfc_cache *c = container_of(head, struct mfc_cache, rcu);
+	struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
 
-	kmem_cache_free(mrt_cachep, c);
+	kmem_cache_free(mrt_cachep, (struct mfc_cache *)c);
 }
 
 void ipmr_cache_free(struct mfc_cache *c)
 {
-	call_rcu(&c->rcu, ipmr_cache_free_rcu);
+	call_rcu(&c->_c.rcu, ipmr_cache_free_rcu);
 }
 EXPORT_SYMBOL(ipmr_cache_free);
 
@@ -782,7 +807,7 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
 
 	atomic_dec(&mrt->cache_resolve_queue_len);
 
-	while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
+	while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved))) {
 		if (ip_hdr(skb)->version == 0) {
 			struct nlmsghdr *nlh = skb_pull(skb,
 							sizeof(struct iphdr));
@@ -806,9 +831,9 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
 static void ipmr_expire_process(struct timer_list *t)
 {
 	struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
-	unsigned long now;
+	struct mr_mfc *c, *next;
 	unsigned long expires;
-	struct mfc_cache *c, *next;
+	unsigned long now;
 
 	if (!spin_trylock(&mfc_unres_lock)) {
 		mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
@@ -830,8 +855,8 @@ static void ipmr_expire_process(struct timer_list *t)
 		}
 
 		list_del(&c->list);
-		mroute_netlink_event(mrt, c, RTM_DELROUTE);
-		ipmr_destroy_unres(mrt, c);
+		mroute_netlink_event(mrt, (struct mfc_cache *)c, RTM_DELROUTE);
+		ipmr_destroy_unres(mrt, (struct mfc_cache *)c);
 	}
 
 	if (!list_empty(&mrt->mfc_unres_queue))
@@ -842,7 +867,7 @@ out:
 }
 
 /* Fill oifs list. It is called under write locked mrt_lock. */
-static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
+static void ipmr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache,
 				   unsigned char *ttls)
 {
 	int vifi;
@@ -944,6 +969,10 @@ static int vif_add(struct net *net, struct mr_table *mrt,
 	ip_rt_multicast_event(in_dev);
 
 	/* Fill in the VIF structures */
+	vif_device_init(v, dev, vifc->vifc_rate_limit,
+			vifc->vifc_threshold,
+			vifc->vifc_flags | (!mrtsock ? VIFF_STATIC : 0),
+			(VIFF_TUNNEL | VIFF_REGISTER));
 
 	attr.orig_dev = dev;
 	if (!switchdev_port_attr_get(dev, &attr)) {
@@ -952,20 +981,9 @@ static int vif_add(struct net *net, struct mr_table *mrt,
 	} else {
 		v->dev_parent_id.id_len = 0;
 	}
-	v->rate_limit = vifc->vifc_rate_limit;
+
 	v->local = vifc->vifc_lcl_addr.s_addr;
 	v->remote = vifc->vifc_rmt_addr.s_addr;
-	v->flags = vifc->vifc_flags;
-	if (!mrtsock)
-		v->flags |= VIFF_STATIC;
-	v->threshold = vifc->vifc_threshold;
-	v->bytes_in = 0;
-	v->bytes_out = 0;
-	v->pkt_in = 0;
-	v->pkt_out = 0;
-	v->link = dev->ifindex;
-	if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER))
-		v->link = dev_get_iflink(dev);
 
 	/* And finish update writing critical data */
 	write_lock_bh(&mrt_lock);
@@ -988,33 +1006,8 @@ static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
 			.mfc_mcastgrp = mcastgrp,
 			.mfc_origin = origin
 	};
-	struct rhlist_head *tmp, *list;
-	struct mfc_cache *c;
-
-	list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
-	rhl_for_each_entry_rcu(c, tmp, list, mnode)
-		return c;
-
-	return NULL;
-}
-
-/* Look for a (*,*,oif) entry */
-static struct mfc_cache *ipmr_cache_find_any_parent(struct mr_table *mrt,
-						    int vifi)
-{
-	struct mfc_cache_cmp_arg arg = {
-			.mfc_mcastgrp = htonl(INADDR_ANY),
-			.mfc_origin = htonl(INADDR_ANY)
-	};
-	struct rhlist_head *tmp, *list;
-	struct mfc_cache *c;
-
-	list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
-	rhl_for_each_entry_rcu(c, tmp, list, mnode)
-		if (c->mfc_un.res.ttls[vifi] < 255)
-			return c;
 
-	return NULL;
+	return mr_mfc_find(mrt, &arg);
 }
 
 /* Look for a (*,G) entry */
@@ -1025,25 +1018,10 @@ static struct mfc_cache *ipmr_cache_find_any(struct mr_table *mrt,
 			.mfc_mcastgrp = mcastgrp,
 			.mfc_origin = htonl(INADDR_ANY)
 	};
-	struct rhlist_head *tmp, *list;
-	struct mfc_cache *c, *proxy;
 
 	if (mcastgrp == htonl(INADDR_ANY))
-		goto skip;
-
-	list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
-	rhl_for_each_entry_rcu(c, tmp, list, mnode) {
-		if (c->mfc_un.res.ttls[vifi] < 255)
-			return c;
-
-		/* It's ok if the vifi is part of the static tree */
-		proxy = ipmr_cache_find_any_parent(mrt, c->mfc_parent);
-		if (proxy && proxy->mfc_un.res.ttls[vifi] < 255)
-			return c;
-	}
-
-skip:
-	return ipmr_cache_find_any_parent(mrt, vifi);
+		return mr_mfc_find_any_parent(mrt, vifi);
+	return mr_mfc_find_any(mrt, vifi, &arg);
 }
 
 /* Look for a (S,G,iif) entry if parent != -1 */
@@ -1055,15 +1033,8 @@ static struct mfc_cache *ipmr_cache_find_parent(struct mr_table *mrt,
 			.mfc_mcastgrp = mcastgrp,
 			.mfc_origin = origin,
 	};
-	struct rhlist_head *tmp, *list;
-	struct mfc_cache *c;
-
-	list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
-	rhl_for_each_entry_rcu(c, tmp, list, mnode)
-		if (parent == -1 || parent == c->mfc_parent)
-			return c;
 
-	return NULL;
+	return mr_mfc_find_parent(mrt, &arg, parent);
 }
 
 /* Allocate a multicast cache entry */
@@ -1072,9 +1043,9 @@ static struct mfc_cache *ipmr_cache_alloc(void)
 	struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
 
 	if (c) {
-		c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
-		c->mfc_un.res.minvif = MAXVIFS;
-		refcount_set(&c->mfc_un.res.refcount, 1);
+		c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
+		c->_c.mfc_un.res.minvif = MAXVIFS;
+		refcount_set(&c->_c.mfc_un.res.refcount, 1);
 	}
 	return c;
 }
@@ -1084,8 +1055,8 @@ static struct mfc_cache *ipmr_cache_alloc_unres(void)
 	struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
 
 	if (c) {
-		skb_queue_head_init(&c->mfc_un.unres.unresolved);
-		c->mfc_un.unres.expires = jiffies + 10*HZ;
+		skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
+		c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
 	}
 	return c;
 }
@@ -1098,12 +1069,13 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
 	struct nlmsgerr *e;
 
 	/* Play the pending entries through our router */
-	while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
+	while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
 		if (ip_hdr(skb)->version == 0) {
 			struct nlmsghdr *nlh = skb_pull(skb,
 							sizeof(struct iphdr));
 
-			if (__ipmr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
+			if (mr_fill_mroute(mrt, skb, &c->_c,
+					   nlmsg_data(nlh)) > 0) {
 				nlh->nlmsg_len = skb_tail_pointer(skb) -
 						 (u8 *)nlh;
 			} else {
@@ -1211,7 +1183,7 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
 	int err;
 
 	spin_lock_bh(&mfc_unres_lock);
-	list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
+	list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
 		if (c->mfc_mcastgrp == iph->daddr &&
 		    c->mfc_origin == iph->saddr) {
 			found = true;
@@ -1230,12 +1202,13 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
 		}
 
 		/* Fill in the new cache entry */
-		c->mfc_parent	= -1;
+		c->_c.mfc_parent = -1;
 		c->mfc_origin	= iph->saddr;
 		c->mfc_mcastgrp	= iph->daddr;
 
 		/* Reflect first query at mrouted. */
 		err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
+
 		if (err < 0) {
 			/* If the report failed throw the cache entry
 			   out - Brad Parker
@@ -1248,15 +1221,16 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
 		}
 
 		atomic_inc(&mrt->cache_resolve_queue_len);
-		list_add(&c->list, &mrt->mfc_unres_queue);
+		list_add(&c->_c.list, &mrt->mfc_unres_queue);
 		mroute_netlink_event(mrt, c, RTM_NEWROUTE);
 
 		if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
-			mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
+			mod_timer(&mrt->ipmr_expire_timer,
+				  c->_c.mfc_un.unres.expires);
 	}
 
 	/* See if we can append the packet */
-	if (c->mfc_un.unres.unresolved.qlen > 3) {
+	if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
 		kfree_skb(skb);
 		err = -ENOBUFS;
 	} else {
@@ -1264,7 +1238,7 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
 			skb->dev = dev;
 			skb->skb_iif = dev->ifindex;
 		}
-		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
+		skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
 		err = 0;
 	}
 
@@ -1286,8 +1260,8 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
 	rcu_read_unlock();
 	if (!c)
 		return -ENOENT;
-	rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
-	list_del_rcu(&c->list);
+	rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ipmr_rht_params);
+	list_del_rcu(&c->_c.list);
 	call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id);
 	mroute_netlink_event(mrt, c, RTM_DELROUTE);
 	ipmr_cache_put(c);
@@ -1299,6 +1273,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
 			struct mfcctl *mfc, int mrtsock, int parent)
 {
 	struct mfc_cache *uc, *c;
+	struct mr_mfc *_uc;
 	bool found;
 	int ret;
 
@@ -1312,10 +1287,10 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
 	rcu_read_unlock();
 	if (c) {
 		write_lock_bh(&mrt_lock);
-		c->mfc_parent = mfc->mfcc_parent;
-		ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
+		c->_c.mfc_parent = mfc->mfcc_parent;
+		ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls);
 		if (!mrtsock)
-			c->mfc_flags |= MFC_STATIC;
+			c->_c.mfc_flags |= MFC_STATIC;
 		write_unlock_bh(&mrt_lock);
 		call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c,
 					      mrt->id);
@@ -1333,28 +1308,29 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
 
 	c->mfc_origin = mfc->mfcc_origin.s_addr;
 	c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
-	c->mfc_parent = mfc->mfcc_parent;
-	ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
+	c->_c.mfc_parent = mfc->mfcc_parent;
+	ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls);
 	if (!mrtsock)
-		c->mfc_flags |= MFC_STATIC;
+		c->_c.mfc_flags |= MFC_STATIC;
 
-	ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->mnode,
+	ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
 				  ipmr_rht_params);
 	if (ret) {
 		pr_err("ipmr: rhtable insert error %d\n", ret);
 		ipmr_cache_free(c);
 		return ret;
 	}
-	list_add_tail_rcu(&c->list, &mrt->mfc_cache_list);
+	list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
 	/* Check to see if we resolved a queued list. If so we
 	 * need to send on the frames and tidy up.
 	 */
 	found = false;
 	spin_lock_bh(&mfc_unres_lock);
-	list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
+	list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
+		uc = (struct mfc_cache *)_uc;
 		if (uc->mfc_origin == c->mfc_origin &&
 		    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
-			list_del(&uc->list);
+			list_del(&_uc->list);
 			atomic_dec(&mrt->cache_resolve_queue_len);
 			found = true;
 			break;
@@ -1377,7 +1353,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
 static void mroute_clean_tables(struct mr_table *mrt, bool all)
 {
 	struct net *net = read_pnet(&mrt->net);
-	struct mfc_cache *c, *tmp;
+	struct mr_mfc *c, *tmp;
+	struct mfc_cache *cache;
 	LIST_HEAD(list);
 	int i;
 
@@ -1395,18 +1372,20 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all)
 			continue;
 		rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
 		list_del_rcu(&c->list);
-		call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c,
+		cache = (struct mfc_cache *)c;
+		call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, cache,
 					      mrt->id);
-		mroute_netlink_event(mrt, c, RTM_DELROUTE);
-		ipmr_cache_put(c);
+		mroute_netlink_event(mrt, cache, RTM_DELROUTE);
+		ipmr_cache_put(cache);
 	}
 
 	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
 		spin_lock_bh(&mfc_unres_lock);
 		list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
 			list_del(&c->list);
-			mroute_netlink_event(mrt, c, RTM_DELROUTE);
-			ipmr_destroy_unres(mrt, c);
+			cache = (struct mfc_cache *)c;
+			mroute_netlink_event(mrt, cache, RTM_DELROUTE);
+			ipmr_destroy_unres(mrt, cache);
 		}
 		spin_unlock_bh(&mfc_unres_lock);
 	}
@@ -1420,7 +1399,7 @@ static void mrtsock_destruct(struct sock *sk)
 	struct net *net = sock_net(sk);
 	struct mr_table *mrt;
 
-	ASSERT_RTNL();
+	rtnl_lock();
 	ipmr_for_each_table(mrt, net) {
 		if (sk == rtnl_dereference(mrt->mroute_sk)) {
 			IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
@@ -1432,6 +1411,7 @@ static void mrtsock_destruct(struct sock *sk)
 			mroute_clean_tables(mrt, false);
 		}
 	}
+	rtnl_unlock();
 }
 
 /* Socket options and virtual interface manipulation. The whole
@@ -1496,8 +1476,13 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
 		if (sk != rcu_access_pointer(mrt->mroute_sk)) {
 			ret = -EACCES;
 		} else {
+			/* We need to unlock here because mrtsock_destruct takes
+			 * care of rtnl itself and we can't change that due to
+			 * the IP_ROUTER_ALERT setsockopt which runs without it.
+			 */
+			rtnl_unlock();
 			ret = ip_ra_control(sk, 0, NULL);
-			goto out_unlock;
+			goto out;
 		}
 		break;
 	case MRT_ADD_VIF:
@@ -1609,6 +1594,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
 	}
 out_unlock:
 	rtnl_unlock();
+out:
 	return ret;
 }
 
@@ -1698,9 +1684,9 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
 		rcu_read_lock();
 		c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
 		if (c) {
-			sr.pktcnt = c->mfc_un.res.pkt;
-			sr.bytecnt = c->mfc_un.res.bytes;
-			sr.wrong_if = c->mfc_un.res.wrong_if;
+			sr.pktcnt = c->_c.mfc_un.res.pkt;
+			sr.bytecnt = c->_c.mfc_un.res.bytes;
+			sr.wrong_if = c->_c.mfc_un.res.wrong_if;
 			rcu_read_unlock();
 
 			if (copy_to_user(arg, &sr, sizeof(sr)))
@@ -1772,9 +1758,9 @@ int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
 		rcu_read_lock();
 		c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
 		if (c) {
-			sr.pktcnt = c->mfc_un.res.pkt;
-			sr.bytecnt = c->mfc_un.res.bytes;
-			sr.wrong_if = c->mfc_un.res.wrong_if;
+			sr.pktcnt = c->_c.mfc_un.res.pkt;
+			sr.bytecnt = c->_c.mfc_un.res.bytes;
+			sr.wrong_if = c->_c.mfc_un.res.wrong_if;
 			rcu_read_unlock();
 
 			if (copy_to_user(arg, &sr, sizeof(sr)))
@@ -1998,26 +1984,26 @@ static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
 /* "local" means that we should preserve one skb (for local delivery) */
 static void ip_mr_forward(struct net *net, struct mr_table *mrt,
 			  struct net_device *dev, struct sk_buff *skb,
-			  struct mfc_cache *cache, int local)
+			  struct mfc_cache *c, int local)
 {
 	int true_vifi = ipmr_find_vif(mrt, dev);
 	int psend = -1;
 	int vif, ct;
 
-	vif = cache->mfc_parent;
-	cache->mfc_un.res.pkt++;
-	cache->mfc_un.res.bytes += skb->len;
-	cache->mfc_un.res.lastuse = jiffies;
+	vif = c->_c.mfc_parent;
+	c->_c.mfc_un.res.pkt++;
+	c->_c.mfc_un.res.bytes += skb->len;
+	c->_c.mfc_un.res.lastuse = jiffies;
 
-	if (cache->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) {
+	if (c->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) {
 		struct mfc_cache *cache_proxy;
 
 		/* For an (*,G) entry, we only check that the incomming
 		 * interface is part of the static tree.
 		 */
-		cache_proxy = ipmr_cache_find_any_parent(mrt, vif);
+		cache_proxy = mr_mfc_find_any_parent(mrt, vif);
 		if (cache_proxy &&
-		    cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
+		    cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255)
 			goto forward;
 	}
 
@@ -2038,7 +2024,7 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
 			goto dont_forward;
 		}
 
-		cache->mfc_un.res.wrong_if++;
+		c->_c.mfc_un.res.wrong_if++;
 
 		if (true_vifi >= 0 && mrt->mroute_do_assert &&
 		    /* pimsm uses asserts, when switching from RPT to SPT,
@@ -2047,10 +2033,11 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
 		     * large chunk of pimd to kernel. Ough... --ANK
 		     */
 		    (mrt->mroute_do_pim ||
-		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
+		     c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
 		    time_after(jiffies,
-			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
-			cache->mfc_un.res.last_assert = jiffies;
+			       c->_c.mfc_un.res.last_assert +
+			       MFC_ASSERT_THRESH)) {
+			c->_c.mfc_un.res.last_assert = jiffies;
 			ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
 		}
 		goto dont_forward;
@@ -2061,33 +2048,33 @@ forward:
 	mrt->vif_table[vif].bytes_in += skb->len;
 
 	/* Forward the frame */
-	if (cache->mfc_origin == htonl(INADDR_ANY) &&
-	    cache->mfc_mcastgrp == htonl(INADDR_ANY)) {
+	if (c->mfc_origin == htonl(INADDR_ANY) &&
+	    c->mfc_mcastgrp == htonl(INADDR_ANY)) {
 		if (true_vifi >= 0 &&
-		    true_vifi != cache->mfc_parent &&
+		    true_vifi != c->_c.mfc_parent &&
 		    ip_hdr(skb)->ttl >
-				cache->mfc_un.res.ttls[cache->mfc_parent]) {
+				c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
 			/* It's an (*,*) entry and the packet is not coming from
 			 * the upstream: forward the packet to the upstream
 			 * only.
 			 */
-			psend = cache->mfc_parent;
+			psend = c->_c.mfc_parent;
 			goto last_forward;
 		}
 		goto dont_forward;
 	}
-	for (ct = cache->mfc_un.res.maxvif - 1;
-	     ct >= cache->mfc_un.res.minvif; ct--) {
+	for (ct = c->_c.mfc_un.res.maxvif - 1;
+	     ct >= c->_c.mfc_un.res.minvif; ct--) {
 		/* For (*,G) entry, don't forward to the incoming interface */
-		if ((cache->mfc_origin != htonl(INADDR_ANY) ||
+		if ((c->mfc_origin != htonl(INADDR_ANY) ||
 		     ct != true_vifi) &&
-		    ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
+		    ip_hdr(skb)->ttl > c->_c.mfc_un.res.ttls[ct]) {
 			if (psend != -1) {
 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 
 				if (skb2)
 					ipmr_queue_xmit(net, mrt, true_vifi,
-							skb2, cache, psend);
+							skb2, c, psend);
 			}
 			psend = ct;
 		}
@@ -2099,9 +2086,9 @@ last_forward:
 
 			if (skb2)
 				ipmr_queue_xmit(net, mrt, true_vifi, skb2,
-						cache, psend);
+						c, psend);
 		} else {
-			ipmr_queue_xmit(net, mrt, true_vifi, skb, cache, psend);
+			ipmr_queue_xmit(net, mrt, true_vifi, skb, c, psend);
 			return;
 		}
 	}
@@ -2299,62 +2286,6 @@ drop:
 }
 #endif
 
-static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
-			      struct mfc_cache *c, struct rtmsg *rtm)
-{
-	struct rta_mfc_stats mfcs;
-	struct nlattr *mp_attr;
-	struct rtnexthop *nhp;
-	unsigned long lastuse;
-	int ct;
-
-	/* If cache is unresolved, don't try to parse IIF and OIF */
-	if (c->mfc_parent >= MAXVIFS) {
-		rtm->rtm_flags |= RTNH_F_UNRESOLVED;
-		return -ENOENT;
-	}
-
-	if (VIF_EXISTS(mrt, c->mfc_parent) &&
-	    nla_put_u32(skb, RTA_IIF, mrt->vif_table[c->mfc_parent].dev->ifindex) < 0)
-		return -EMSGSIZE;
-
-	if (c->mfc_flags & MFC_OFFLOAD)
-		rtm->rtm_flags |= RTNH_F_OFFLOAD;
-
-	if (!(mp_attr = nla_nest_start(skb, RTA_MULTIPATH)))
-		return -EMSGSIZE;
-
-	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
-		if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
-			if (!(nhp = nla_reserve_nohdr(skb, sizeof(*nhp)))) {
-				nla_nest_cancel(skb, mp_attr);
-				return -EMSGSIZE;
-			}
-
-			nhp->rtnh_flags = 0;
-			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
-			nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
-			nhp->rtnh_len = sizeof(*nhp);
-		}
-	}
-
-	nla_nest_end(skb, mp_attr);
-
-	lastuse = READ_ONCE(c->mfc_un.res.lastuse);
-	lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
-
-	mfcs.mfcs_packets = c->mfc_un.res.pkt;
-	mfcs.mfcs_bytes = c->mfc_un.res.bytes;
-	mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
-	if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
-	    nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
-			      RTA_PAD))
-		return -EMSGSIZE;
-
-	rtm->rtm_type = RTN_MULTICAST;
-	return 1;
-}
-
 int ipmr_get_route(struct net *net, struct sk_buff *skb,
 		   __be32 saddr, __be32 daddr,
 		   struct rtmsg *rtm, u32 portid)
@@ -2412,7 +2343,7 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb,
 	}
 
 	read_lock(&mrt_lock);
-	err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
+	err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
 	read_unlock(&mrt_lock);
 	rcu_read_unlock();
 	return err;
@@ -2440,7 +2371,7 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
 		goto nla_put_failure;
 	rtm->rtm_type     = RTN_MULTICAST;
 	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
-	if (c->mfc_flags & MFC_STATIC)
+	if (c->_c.mfc_flags & MFC_STATIC)
 		rtm->rtm_protocol = RTPROT_STATIC;
 	else
 		rtm->rtm_protocol = RTPROT_MROUTED;
@@ -2449,7 +2380,7 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
 	if (nla_put_in_addr(skb, RTA_SRC, c->mfc_origin) ||
 	    nla_put_in_addr(skb, RTA_DST, c->mfc_mcastgrp))
 		goto nla_put_failure;
-	err = __ipmr_fill_mroute(mrt, skb, c, rtm);
+	err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
 	/* do not break the dump if cache is unresolved */
 	if (err < 0 && err != -ENOENT)
 		goto nla_put_failure;
@@ -2462,6 +2393,14 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
+static int _ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+			     u32 portid, u32 seq, struct mr_mfc *c, int cmd,
+			     int flags)
+{
+	return ipmr_fill_mroute(mrt, skb, portid, seq, (struct mfc_cache *)c,
+				cmd, flags);
+}
+
 static size_t mroute_msgsize(bool unresolved, int maxvif)
 {
 	size_t len =
@@ -2490,7 +2429,8 @@ static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
 	struct sk_buff *skb;
 	int err = -ENOBUFS;
 
-	skb = nlmsg_new(mroute_msgsize(mfc->mfc_parent >= MAXVIFS, mrt->maxvif),
+	skb = nlmsg_new(mroute_msgsize(mfc->_c.mfc_parent >= MAXVIFS,
+				       mrt->maxvif),
 			GFP_ATOMIC);
 	if (!skb)
 		goto errout;
@@ -2634,62 +2574,8 @@ errout_free:
 
 static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	struct net *net = sock_net(skb->sk);
-	struct mr_table *mrt;
-	struct mfc_cache *mfc;
-	unsigned int t = 0, s_t;
-	unsigned int e = 0, s_e;
-
-	s_t = cb->args[0];
-	s_e = cb->args[1];
-
-	rcu_read_lock();
-	ipmr_for_each_table(mrt, net) {
-		if (t < s_t)
-			goto next_table;
-		list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) {
-			if (e < s_e)
-				goto next_entry;
-			if (ipmr_fill_mroute(mrt, skb,
-					     NETLINK_CB(cb->skb).portid,
-					     cb->nlh->nlmsg_seq,
-					     mfc, RTM_NEWROUTE,
-					     NLM_F_MULTI) < 0)
-				goto done;
-next_entry:
-			e++;
-		}
-		e = 0;
-		s_e = 0;
-
-		spin_lock_bh(&mfc_unres_lock);
-		list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
-			if (e < s_e)
-				goto next_entry2;
-			if (ipmr_fill_mroute(mrt, skb,
-					     NETLINK_CB(cb->skb).portid,
-					     cb->nlh->nlmsg_seq,
-					     mfc, RTM_NEWROUTE,
-					     NLM_F_MULTI) < 0) {
-				spin_unlock_bh(&mfc_unres_lock);
-				goto done;
-			}
-next_entry2:
-			e++;
-		}
-		spin_unlock_bh(&mfc_unres_lock);
-		e = 0;
-		s_e = 0;
-next_table:
-		t++;
-	}
-done:
-	rcu_read_unlock();
-
-	cb->args[1] = e;
-	cb->args[0] = t;
-
-	return skb->len;
+	return mr_rtm_dumproute(skb, cb, ipmr_mr_table_iter,
+				_ipmr_fill_mroute, &mfc_unres_lock);
 }
 
 static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = {
@@ -2946,31 +2832,11 @@ out:
 /* The /proc interfaces to multicast routing :
  * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif
  */
-struct ipmr_vif_iter {
-	struct seq_net_private p;
-	struct mr_table *mrt;
-	int ct;
-};
-
-static struct vif_device *ipmr_vif_seq_idx(struct net *net,
-					   struct ipmr_vif_iter *iter,
-					   loff_t pos)
-{
-	struct mr_table *mrt = iter->mrt;
-
-	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
-		if (!VIF_EXISTS(mrt, iter->ct))
-			continue;
-		if (pos-- == 0)
-			return &mrt->vif_table[iter->ct];
-	}
-	return NULL;
-}
 
 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
 	__acquires(mrt_lock)
 {
-	struct ipmr_vif_iter *iter = seq->private;
+	struct mr_vif_iter *iter = seq->private;
 	struct net *net = seq_file_net(seq);
 	struct mr_table *mrt;
 
@@ -2981,26 +2847,7 @@ static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
 	iter->mrt = mrt;
 
 	read_lock(&mrt_lock);
-	return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
-		: SEQ_START_TOKEN;
-}
-
-static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	struct ipmr_vif_iter *iter = seq->private;
-	struct net *net = seq_file_net(seq);
-	struct mr_table *mrt = iter->mrt;
-
-	++*pos;
-	if (v == SEQ_START_TOKEN)
-		return ipmr_vif_seq_idx(net, iter, 0);
-
-	while (++iter->ct < mrt->maxvif) {
-		if (!VIF_EXISTS(mrt, iter->ct))
-			continue;
-		return &mrt->vif_table[iter->ct];
-	}
-	return NULL;
+	return mr_vif_seq_start(seq, pos);
 }
 
 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
@@ -3011,7 +2858,7 @@ static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
 
 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
 {
-	struct ipmr_vif_iter *iter = seq->private;
+	struct mr_vif_iter *iter = seq->private;
 	struct mr_table *mrt = iter->mrt;
 
 	if (v == SEQ_START_TOKEN) {
@@ -3019,7 +2866,8 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
 	} else {
 		const struct vif_device *vif = v;
-		const char *name =  vif->dev ? vif->dev->name : "none";
+		const char *name =  vif->dev ?
+				    vif->dev->name : "none";
 
 		seq_printf(seq,
 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
@@ -3033,7 +2881,7 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
 
 static const struct seq_operations ipmr_vif_seq_ops = {
 	.start = ipmr_vif_seq_start,
-	.next  = ipmr_vif_seq_next,
+	.next  = mr_vif_seq_next,
 	.stop  = ipmr_vif_seq_stop,
 	.show  = ipmr_vif_seq_show,
 };
@@ -3041,7 +2889,7 @@ static const struct seq_operations ipmr_vif_seq_ops = {
 static int ipmr_vif_open(struct inode *inode, struct file *file)
 {
 	return seq_open_net(inode, file, &ipmr_vif_seq_ops,
-			    sizeof(struct ipmr_vif_iter));
+			    sizeof(struct mr_vif_iter));
 }
 
 static const struct file_operations ipmr_vif_fops = {
@@ -3051,40 +2899,8 @@ static const struct file_operations ipmr_vif_fops = {
 	.release = seq_release_net,
 };
 
-struct ipmr_mfc_iter {
-	struct seq_net_private p;
-	struct mr_table *mrt;
-	struct list_head *cache;
-};
-
-static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
-					  struct ipmr_mfc_iter *it, loff_t pos)
-{
-	struct mr_table *mrt = it->mrt;
-	struct mfc_cache *mfc;
-
-	rcu_read_lock();
-	it->cache = &mrt->mfc_cache_list;
-	list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
-		if (pos-- == 0)
-			return mfc;
-	rcu_read_unlock();
-
-	spin_lock_bh(&mfc_unres_lock);
-	it->cache = &mrt->mfc_unres_queue;
-	list_for_each_entry(mfc, it->cache, list)
-		if (pos-- == 0)
-			return mfc;
-	spin_unlock_bh(&mfc_unres_lock);
-
-	it->cache = NULL;
-	return NULL;
-}
-
-
 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 {
-	struct ipmr_mfc_iter *it = seq->private;
 	struct net *net = seq_file_net(seq);
 	struct mr_table *mrt;
 
@@ -3092,54 +2908,7 @@ static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 	if (!mrt)
 		return ERR_PTR(-ENOENT);
 
-	it->mrt = mrt;
-	it->cache = NULL;
-	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
-		: SEQ_START_TOKEN;
-}
-
-static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	struct ipmr_mfc_iter *it = seq->private;
-	struct net *net = seq_file_net(seq);
-	struct mr_table *mrt = it->mrt;
-	struct mfc_cache *mfc = v;
-
-	++*pos;
-
-	if (v == SEQ_START_TOKEN)
-		return ipmr_mfc_seq_idx(net, seq->private, 0);
-
-	if (mfc->list.next != it->cache)
-		return list_entry(mfc->list.next, struct mfc_cache, list);
-
-	if (it->cache == &mrt->mfc_unres_queue)
-		goto end_of_list;
-
-	/* exhausted cache_array, show unresolved */
-	rcu_read_unlock();
-	it->cache = &mrt->mfc_unres_queue;
-
-	spin_lock_bh(&mfc_unres_lock);
-	if (!list_empty(it->cache))
-		return list_first_entry(it->cache, struct mfc_cache, list);
-
-end_of_list:
-	spin_unlock_bh(&mfc_unres_lock);
-	it->cache = NULL;
-
-	return NULL;
-}
-
-static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
-{
-	struct ipmr_mfc_iter *it = seq->private;
-	struct mr_table *mrt = it->mrt;
-
-	if (it->cache == &mrt->mfc_unres_queue)
-		spin_unlock_bh(&mfc_unres_lock);
-	else if (it->cache == &mrt->mfc_cache_list)
-		rcu_read_unlock();
+	return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
 }
 
 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
@@ -3151,26 +2920,26 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 		 "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
 	} else {
 		const struct mfc_cache *mfc = v;
-		const struct ipmr_mfc_iter *it = seq->private;
+		const struct mr_mfc_iter *it = seq->private;
 		const struct mr_table *mrt = it->mrt;
 
 		seq_printf(seq, "%08X %08X %-3hd",
 			   (__force u32) mfc->mfc_mcastgrp,
 			   (__force u32) mfc->mfc_origin,
-			   mfc->mfc_parent);
+			   mfc->_c.mfc_parent);
 
 		if (it->cache != &mrt->mfc_unres_queue) {
 			seq_printf(seq, " %8lu %8lu %8lu",
-				   mfc->mfc_un.res.pkt,
-				   mfc->mfc_un.res.bytes,
-				   mfc->mfc_un.res.wrong_if);
-			for (n = mfc->mfc_un.res.minvif;
-			     n < mfc->mfc_un.res.maxvif; n++) {
+				   mfc->_c.mfc_un.res.pkt,
+				   mfc->_c.mfc_un.res.bytes,
+				   mfc->_c.mfc_un.res.wrong_if);
+			for (n = mfc->_c.mfc_un.res.minvif;
+			     n < mfc->_c.mfc_un.res.maxvif; n++) {
 				if (VIF_EXISTS(mrt, n) &&
-				    mfc->mfc_un.res.ttls[n] < 255)
+				    mfc->_c.mfc_un.res.ttls[n] < 255)
 					seq_printf(seq,
 					   " %2d:%-3d",
-					   n, mfc->mfc_un.res.ttls[n]);
+					   n, mfc->_c.mfc_un.res.ttls[n]);
 			}
 		} else {
 			/* unresolved mfc_caches don't contain
@@ -3185,15 +2954,15 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 
 static const struct seq_operations ipmr_mfc_seq_ops = {
 	.start = ipmr_mfc_seq_start,
-	.next  = ipmr_mfc_seq_next,
-	.stop  = ipmr_mfc_seq_stop,
+	.next  = mr_mfc_seq_next,
+	.stop  = mr_mfc_seq_stop,
 	.show  = ipmr_mfc_seq_show,
 };
 
 static int ipmr_mfc_open(struct inode *inode, struct file *file)
 {
 	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
-			    sizeof(struct ipmr_mfc_iter));
+			    sizeof(struct mr_mfc_iter));
 }
 
 static const struct file_operations ipmr_mfc_fops = {
@@ -3229,7 +2998,7 @@ static int ipmr_dump(struct net *net, struct notifier_block *nb)
 
 	ipmr_for_each_table(mrt, net) {
 		struct vif_device *v = &mrt->vif_table[0];
-		struct mfc_cache *mfc;
+		struct mr_mfc *mfc;
 		int vifi;
 
 		/* Notifiy on table VIF entries */
@@ -3246,7 +3015,8 @@ static int ipmr_dump(struct net *net, struct notifier_block *nb)
 		/* Notify on table MFC entries */
 		list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
 			call_ipmr_mfc_entry_notifier(nb, net,
-						     FIB_EVENT_ENTRY_ADD, mfc,
+						     FIB_EVENT_ENTRY_ADD,
+						     (struct mfc_cache *)mfc,
 						     mrt->id);
 	}
 
@@ -3327,6 +3097,7 @@ static void __net_exit ipmr_net_exit(struct net *net)
 static struct pernet_operations ipmr_net_ops = {
 	.init = ipmr_net_init,
 	.exit = ipmr_net_exit,
+	.async = true,
 };
 
 int __init ip_mr_init(void)
diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c
new file mode 100644
index 000000000000..8ba55bfda817
--- /dev/null
+++ b/net/ipv4/ipmr_base.c
@@ -0,0 +1,323 @@
+/* Linux multicast routing support
+ * Common logic shared by IPv4 [ipmr] and IPv6 [ip6mr] implementation
+ */
+
+#include <linux/mroute_base.h>
+
+/* Sets everything common except 'dev', since that is done under locking */
+void vif_device_init(struct vif_device *v,
+		     struct net_device *dev,
+		     unsigned long rate_limit,
+		     unsigned char threshold,
+		     unsigned short flags,
+		     unsigned short get_iflink_mask)
+{
+	v->dev = NULL;
+	v->bytes_in = 0;
+	v->bytes_out = 0;
+	v->pkt_in = 0;
+	v->pkt_out = 0;
+	v->rate_limit = rate_limit;
+	v->flags = flags;
+	v->threshold = threshold;
+	if (v->flags & get_iflink_mask)
+		v->link = dev_get_iflink(dev);
+	else
+		v->link = dev->ifindex;
+}
+EXPORT_SYMBOL(vif_device_init);
+
+struct mr_table *
+mr_table_alloc(struct net *net, u32 id,
+	       struct mr_table_ops *ops,
+	       void (*expire_func)(struct timer_list *t),
+	       void (*table_set)(struct mr_table *mrt,
+				 struct net *net))
+{
+	struct mr_table *mrt;
+
+	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
+	if (!mrt)
+		return NULL;
+	mrt->id = id;
+	write_pnet(&mrt->net, net);
+
+	mrt->ops = *ops;
+	rhltable_init(&mrt->mfc_hash, mrt->ops.rht_params);
+	INIT_LIST_HEAD(&mrt->mfc_cache_list);
+	INIT_LIST_HEAD(&mrt->mfc_unres_queue);
+
+	timer_setup(&mrt->ipmr_expire_timer, expire_func, 0);
+
+	mrt->mroute_reg_vif_num = -1;
+	table_set(mrt, net);
+	return mrt;
+}
+EXPORT_SYMBOL(mr_table_alloc);
+
+void *mr_mfc_find_parent(struct mr_table *mrt, void *hasharg, int parent)
+{
+	struct rhlist_head *tmp, *list;
+	struct mr_mfc *c;
+
+	list = rhltable_lookup(&mrt->mfc_hash, hasharg, *mrt->ops.rht_params);
+	rhl_for_each_entry_rcu(c, tmp, list, mnode)
+		if (parent == -1 || parent == c->mfc_parent)
+			return c;
+
+	return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_find_parent);
+
+void *mr_mfc_find_any_parent(struct mr_table *mrt, int vifi)
+{
+	struct rhlist_head *tmp, *list;
+	struct mr_mfc *c;
+
+	list = rhltable_lookup(&mrt->mfc_hash, mrt->ops.cmparg_any,
+			       *mrt->ops.rht_params);
+	rhl_for_each_entry_rcu(c, tmp, list, mnode)
+		if (c->mfc_un.res.ttls[vifi] < 255)
+			return c;
+
+	return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_find_any_parent);
+
+void *mr_mfc_find_any(struct mr_table *mrt, int vifi, void *hasharg)
+{
+	struct rhlist_head *tmp, *list;
+	struct mr_mfc *c, *proxy;
+
+	list = rhltable_lookup(&mrt->mfc_hash, hasharg, *mrt->ops.rht_params);
+	rhl_for_each_entry_rcu(c, tmp, list, mnode) {
+		if (c->mfc_un.res.ttls[vifi] < 255)
+			return c;
+
+		/* It's ok if the vifi is part of the static tree */
+		proxy = mr_mfc_find_any_parent(mrt, c->mfc_parent);
+		if (proxy && proxy->mfc_un.res.ttls[vifi] < 255)
+			return c;
+	}
+
+	return mr_mfc_find_any_parent(mrt, vifi);
+}
+EXPORT_SYMBOL(mr_mfc_find_any);
+
+#ifdef CONFIG_PROC_FS
+void *mr_vif_seq_idx(struct net *net, struct mr_vif_iter *iter, loff_t pos)
+{
+	struct mr_table *mrt = iter->mrt;
+
+	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
+		if (!VIF_EXISTS(mrt, iter->ct))
+			continue;
+		if (pos-- == 0)
+			return &mrt->vif_table[iter->ct];
+	}
+	return NULL;
+}
+EXPORT_SYMBOL(mr_vif_seq_idx);
+
+void *mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct mr_vif_iter *iter = seq->private;
+	struct net *net = seq_file_net(seq);
+	struct mr_table *mrt = iter->mrt;
+
+	++*pos;
+	if (v == SEQ_START_TOKEN)
+		return mr_vif_seq_idx(net, iter, 0);
+
+	while (++iter->ct < mrt->maxvif) {
+		if (!VIF_EXISTS(mrt, iter->ct))
+			continue;
+		return &mrt->vif_table[iter->ct];
+	}
+	return NULL;
+}
+EXPORT_SYMBOL(mr_vif_seq_next);
+
+void *mr_mfc_seq_idx(struct net *net,
+		     struct mr_mfc_iter *it, loff_t pos)
+{
+	struct mr_table *mrt = it->mrt;
+	struct mr_mfc *mfc;
+
+	rcu_read_lock();
+	it->cache = &mrt->mfc_cache_list;
+	list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
+		if (pos-- == 0)
+			return mfc;
+	rcu_read_unlock();
+
+	spin_lock_bh(it->lock);
+	it->cache = &mrt->mfc_unres_queue;
+	list_for_each_entry(mfc, it->cache, list)
+		if (pos-- == 0)
+			return mfc;
+	spin_unlock_bh(it->lock);
+
+	it->cache = NULL;
+	return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_seq_idx);
+
+void *mr_mfc_seq_next(struct seq_file *seq, void *v,
+		      loff_t *pos)
+{
+	struct mr_mfc_iter *it = seq->private;
+	struct net *net = seq_file_net(seq);
+	struct mr_table *mrt = it->mrt;
+	struct mr_mfc *c = v;
+
+	++*pos;
+
+	if (v == SEQ_START_TOKEN)
+		return mr_mfc_seq_idx(net, seq->private, 0);
+
+	if (c->list.next != it->cache)
+		return list_entry(c->list.next, struct mr_mfc, list);
+
+	if (it->cache == &mrt->mfc_unres_queue)
+		goto end_of_list;
+
+	/* exhausted cache_array, show unresolved */
+	rcu_read_unlock();
+	it->cache = &mrt->mfc_unres_queue;
+
+	spin_lock_bh(it->lock);
+	if (!list_empty(it->cache))
+		return list_first_entry(it->cache, struct mr_mfc, list);
+
+end_of_list:
+	spin_unlock_bh(it->lock);
+	it->cache = NULL;
+
+	return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_seq_next);
+#endif
+
+int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+		   struct mr_mfc *c, struct rtmsg *rtm)
+{
+	struct rta_mfc_stats mfcs;
+	struct nlattr *mp_attr;
+	struct rtnexthop *nhp;
+	unsigned long lastuse;
+	int ct;
+
+	/* If cache is unresolved, don't try to parse IIF and OIF */
+	if (c->mfc_parent >= MAXVIFS) {
+		rtm->rtm_flags |= RTNH_F_UNRESOLVED;
+		return -ENOENT;
+	}
+
+	if (VIF_EXISTS(mrt, c->mfc_parent) &&
+	    nla_put_u32(skb, RTA_IIF,
+			mrt->vif_table[c->mfc_parent].dev->ifindex) < 0)
+		return -EMSGSIZE;
+
+	if (c->mfc_flags & MFC_OFFLOAD)
+		rtm->rtm_flags |= RTNH_F_OFFLOAD;
+
+	mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
+	if (!mp_attr)
+		return -EMSGSIZE;
+
+	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
+		if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
+			struct vif_device *vif;
+
+			nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
+			if (!nhp) {
+				nla_nest_cancel(skb, mp_attr);
+				return -EMSGSIZE;
+			}
+
+			nhp->rtnh_flags = 0;
+			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
+			vif = &mrt->vif_table[ct];
+			nhp->rtnh_ifindex = vif->dev->ifindex;
+			nhp->rtnh_len = sizeof(*nhp);
+		}
+	}
+
+	nla_nest_end(skb, mp_attr);
+
+	lastuse = READ_ONCE(c->mfc_un.res.lastuse);
+	lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
+
+	mfcs.mfcs_packets = c->mfc_un.res.pkt;
+	mfcs.mfcs_bytes = c->mfc_un.res.bytes;
+	mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
+	if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
+	    nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
+			      RTA_PAD))
+		return -EMSGSIZE;
+
+	rtm->rtm_type = RTN_MULTICAST;
+	return 1;
+}
+EXPORT_SYMBOL(mr_fill_mroute);
+
+int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
+		     struct mr_table *(*iter)(struct net *net,
+					      struct mr_table *mrt),
+		     int (*fill)(struct mr_table *mrt,
+				 struct sk_buff *skb,
+				 u32 portid, u32 seq, struct mr_mfc *c,
+				 int cmd, int flags),
+		     spinlock_t *lock)
+{
+	unsigned int t = 0, e = 0, s_t = cb->args[0], s_e = cb->args[1];
+	struct net *net = sock_net(skb->sk);
+	struct mr_table *mrt;
+	struct mr_mfc *mfc;
+
+	rcu_read_lock();
+	for (mrt = iter(net, NULL); mrt; mrt = iter(net, mrt)) {
+		if (t < s_t)
+			goto next_table;
+		list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) {
+			if (e < s_e)
+				goto next_entry;
+			if (fill(mrt, skb, NETLINK_CB(cb->skb).portid,
+				 cb->nlh->nlmsg_seq, mfc,
+				 RTM_NEWROUTE, NLM_F_MULTI) < 0)
+				goto done;
+next_entry:
+			e++;
+		}
+		e = 0;
+		s_e = 0;
+
+		spin_lock_bh(lock);
+		list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
+			if (e < s_e)
+				goto next_entry2;
+			if (fill(mrt, skb, NETLINK_CB(cb->skb).portid,
+				 cb->nlh->nlmsg_seq, mfc,
+				 RTM_NEWROUTE, NLM_F_MULTI) < 0) {
+				spin_unlock_bh(lock);
+				goto done;
+			}
+next_entry2:
+			e++;
+		}
+		spin_unlock_bh(lock);
+		e = 0;
+		s_e = 0;
+next_table:
+		t++;
+	}
+done:
+	rcu_read_unlock();
+
+	cb->args[1] = e;
+	cb->args[0] = t;
+
+	return skb->len;
+}
+EXPORT_SYMBOL(mr_rtm_dumproute);
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index e3e420f3ba7b..c36ffce3c812 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1635,6 +1635,7 @@ static void __net_exit arp_tables_net_exit(struct net *net)
 static struct pernet_operations arp_tables_net_ops = {
 	.init = arp_tables_net_init,
 	.exit = arp_tables_net_exit,
+	.async = true,
 };
 
 static int __init arp_tables_init(void)
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 8f8713b4388f..49c2490193ae 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -65,6 +65,7 @@ static void __net_exit arptable_filter_net_exit(struct net *net)
 
 static struct pernet_operations arptable_filter_net_ops = {
 	.exit = arptable_filter_net_exit,
+	.async = true,
 };
 
 static int __init arptable_filter_init(void)
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index e38395a8dcf2..d4f7584d2dbe 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1916,6 +1916,7 @@ static void __net_exit ip_tables_net_exit(struct net *net)
 static struct pernet_operations ip_tables_net_ops = {
 	.init = ip_tables_net_init,
 	.exit = ip_tables_net_exit,
+	.async = true,
 };
 
 static int __init ip_tables_init(void)
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 8a8ae61cea71..0fc88fa7a4dc 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -845,6 +845,7 @@ static struct pernet_operations clusterip_net_ops = {
 	.exit = clusterip_net_exit,
 	.id   = &clusterip_net_id,
 	.size = sizeof(struct clusterip_net),
+	.async = true,
 };
 
 static int __init clusterip_tg_init(void)
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 9ac92ea7b93c..c1c136a93911 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -87,6 +87,7 @@ static void __net_exit iptable_filter_net_exit(struct net *net)
 static struct pernet_operations iptable_filter_net_ops = {
 	.init = iptable_filter_net_init,
 	.exit = iptable_filter_net_exit,
+	.async = true,
 };
 
 static int __init iptable_filter_init(void)
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index dea138ca8925..f6074059531a 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -113,6 +113,7 @@ static void __net_exit iptable_mangle_net_exit(struct net *net)
 
 static struct pernet_operations iptable_mangle_net_ops = {
 	.exit = iptable_mangle_net_exit,
+	.async = true,
 };
 
 static int __init iptable_mangle_init(void)
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index 0f7255cc65ee..b771af74be79 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -129,6 +129,7 @@ static void __net_exit iptable_nat_net_exit(struct net *net)
 
 static struct pernet_operations iptable_nat_net_ops = {
 	.exit	= iptable_nat_net_exit,
+	.async	= true,
 };
 
 static int __init iptable_nat_init(void)
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 960625aabf04..963753e50842 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -76,6 +76,7 @@ static void __net_exit iptable_raw_net_exit(struct net *net)
 
 static struct pernet_operations iptable_raw_net_ops = {
 	.exit = iptable_raw_net_exit,
+	.async = true,
 };
 
 static int __init iptable_raw_init(void)
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index e5379fe57b64..c40d6b3d8b6a 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -76,6 +76,7 @@ static void __net_exit iptable_security_net_exit(struct net *net)
 
 static struct pernet_operations iptable_security_net_ops = {
 	.exit = iptable_security_net_exit,
+	.async = true,
 };
 
 static int __init iptable_security_init(void)
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index b50721d9d30e..6531f69db010 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -399,6 +399,7 @@ static struct pernet_operations ipv4_net_ops = {
 	.exit = ipv4_net_exit,
 	.id = &conntrack4_net_id,
 	.size = sizeof(struct conntrack4_net),
+	.async = true,
 };
 
 static int __init nf_conntrack_l3proto_ipv4_init(void)
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index a0d3ad60a411..57244b62a4fc 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -118,6 +118,7 @@ static void __net_exit defrag4_net_exit(struct net *net)
 
 static struct pernet_operations defrag4_net_ops = {
 	.exit = defrag4_net_exit,
+	.async = true,
 };
 
 static int __init nf_defrag_init(void)
diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c
index df5c2a2061a4..162293469ac2 100644
--- a/net/ipv4/netfilter/nf_log_arp.c
+++ b/net/ipv4/netfilter/nf_log_arp.c
@@ -122,6 +122,7 @@ static void __net_exit nf_log_arp_net_exit(struct net *net)
 static struct pernet_operations nf_log_arp_net_ops = {
 	.init = nf_log_arp_net_init,
 	.exit = nf_log_arp_net_exit,
+	.async = true,
 };
 
 static int __init nf_log_arp_init(void)
diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c
index 4388de0e5380..7a06de140f3c 100644
--- a/net/ipv4/netfilter/nf_log_ipv4.c
+++ b/net/ipv4/netfilter/nf_log_ipv4.c
@@ -358,6 +358,7 @@ static void __net_exit nf_log_ipv4_net_exit(struct net *net)
 static struct pernet_operations nf_log_ipv4_net_ops = {
 	.init = nf_log_ipv4_net_init,
 	.exit = nf_log_ipv4_net_exit,
+	.async = true,
 };
 
 static int __init nf_log_ipv4_init(void)
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index b8f0db54b197..0164def9c808 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -1204,6 +1204,7 @@ static void __net_exit ping_v4_proc_exit_net(struct net *net)
 static struct pernet_operations ping_v4_net_ops = {
 	.init = ping_v4_proc_init_net,
 	.exit = ping_v4_proc_exit_net,
+	.async = true,
 };
 
 int __init ping_proc_init(void)
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index dc5edc8f7564..d97e83b2dd33 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -549,10 +549,10 @@ static __net_exit void ip_proc_exit_net(struct net *net)
 static __net_initdata struct pernet_operations ip_proc_ops = {
 	.init = ip_proc_init_net,
 	.exit = ip_proc_exit_net,
+	.async = true,
 };
 
 int __init ip_misc_proc_init(void)
 {
 	return register_pernet_subsys(&ip_proc_ops);
 }
-
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 9b367fc48d7d..720bef7da2f6 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -711,9 +711,7 @@ static void raw_close(struct sock *sk, long timeout)
 	/*
 	 * Raw sockets may have direct kernel references. Kill them.
 	 */
-	rtnl_lock();
 	ip_ra_control(sk, 0, NULL);
-	rtnl_unlock();
 
 	sk_common_release(sk);
 }
@@ -1156,6 +1154,7 @@ static __net_exit void raw_exit_net(struct net *net)
 static __net_initdata struct pernet_operations raw_net_ops = {
 	.init = raw_init_net,
 	.exit = raw_exit_net,
+	.async = true,
 };
 
 int __init raw_proc_init(void)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 299e247b2032..4ac5728689f5 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -418,6 +418,7 @@ static void __net_exit ip_rt_do_proc_exit(struct net *net)
 static struct pernet_operations ip_rt_proc_ops __net_initdata =  {
 	.init = ip_rt_do_proc_init,
 	.exit = ip_rt_do_proc_exit,
+	.async = true,
 };
 
 static int __init ip_rt_proc_init(void)
@@ -1532,7 +1533,6 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
 		rt->rt_mtu_locked = 0;
 		rt->rt_gateway = 0;
 		rt->rt_uses_gateway = 0;
-		rt->rt_table_id = 0;
 		INIT_LIST_HEAD(&rt->rt_uncached);
 
 		rt->dst.output = ip_output;
@@ -1668,19 +1668,6 @@ static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
 	spin_unlock_bh(&fnhe_lock);
 }
 
-static void set_lwt_redirect(struct rtable *rth)
-{
-	if (lwtunnel_output_redirect(rth->dst.lwtstate)) {
-		rth->dst.lwtstate->orig_output = rth->dst.output;
-		rth->dst.output = lwtunnel_output;
-	}
-
-	if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
-		rth->dst.lwtstate->orig_input = rth->dst.input;
-		rth->dst.input = lwtunnel_input;
-	}
-}
-
 /* called in rcu_read_lock() section */
 static int __mkroute_input(struct sk_buff *skb,
 			   const struct fib_result *res,
@@ -1763,15 +1750,13 @@ rt_cache:
 	}
 
 	rth->rt_is_input = 1;
-	if (res->table)
-		rth->rt_table_id = res->table->tb_id;
 	RT_CACHE_STAT_INC(in_slow_tot);
 
 	rth->dst.input = ip_forward;
 
 	rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag,
 		       do_cache);
-	set_lwt_redirect(rth);
+	lwtunnel_set_redirect(&rth->dst);
 	skb_dst_set(skb, &rth->dst);
 out:
 	err = 0;
@@ -1787,44 +1772,45 @@ static void ip_multipath_l3_keys(const struct sk_buff *skb,
 				 struct flow_keys *hash_keys)
 {
 	const struct iphdr *outer_iph = ip_hdr(skb);
+	const struct iphdr *key_iph = outer_iph;
 	const struct iphdr *inner_iph;
 	const struct icmphdr *icmph;
 	struct iphdr _inner_iph;
 	struct icmphdr _icmph;
 
-	hash_keys->addrs.v4addrs.src = outer_iph->saddr;
-	hash_keys->addrs.v4addrs.dst = outer_iph->daddr;
 	if (likely(outer_iph->protocol != IPPROTO_ICMP))
-		return;
+		goto out;
 
 	if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0))
-		return;
+		goto out;
 
 	icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph),
 				   &_icmph);
 	if (!icmph)
-		return;
+		goto out;
 
 	if (icmph->type != ICMP_DEST_UNREACH &&
 	    icmph->type != ICMP_REDIRECT &&
 	    icmph->type != ICMP_TIME_EXCEEDED &&
 	    icmph->type != ICMP_PARAMETERPROB)
-		return;
+		goto out;
 
 	inner_iph = skb_header_pointer(skb,
 				       outer_iph->ihl * 4 + sizeof(_icmph),
 				       sizeof(_inner_iph), &_inner_iph);
 	if (!inner_iph)
-		return;
-	hash_keys->addrs.v4addrs.src = inner_iph->saddr;
-	hash_keys->addrs.v4addrs.dst = inner_iph->daddr;
+		goto out;
+
+	key_iph = inner_iph;
+out:
+	hash_keys->addrs.v4addrs.src = key_iph->saddr;
+	hash_keys->addrs.v4addrs.dst = key_iph->daddr;
 }
 
 /* if skb is set it will be used and fl4 can be NULL */
-int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
-		       const struct sk_buff *skb)
+int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
+		       const struct sk_buff *skb, struct flow_keys *flkeys)
 {
-	struct net *net = fi->fib_net;
 	struct flow_keys hash_keys;
 	u32 mhash;
 
@@ -1848,15 +1834,20 @@ int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
 			/* short-circuit if we already have L4 hash present */
 			if (skb->l4_hash)
 				return skb_get_hash_raw(skb) >> 1;
+
 			memset(&hash_keys, 0, sizeof(hash_keys));
-			skb_flow_dissect_flow_keys(skb, &keys, flag);
+
+			if (!flkeys) {
+				skb_flow_dissect_flow_keys(skb, &keys, flag);
+				flkeys = &keys;
+			}
 
 			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
-			hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
-			hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
-			hash_keys.ports.src = keys.ports.src;
-			hash_keys.ports.dst = keys.ports.dst;
-			hash_keys.basic.ip_proto = keys.basic.ip_proto;
+			hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src;
+			hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst;
+			hash_keys.ports.src = flkeys->ports.src;
+			hash_keys.ports.dst = flkeys->ports.dst;
+			hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
 		} else {
 			memset(&hash_keys, 0, sizeof(hash_keys));
 			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
@@ -1872,17 +1863,17 @@ int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
 
 	return mhash >> 1;
 }
-EXPORT_SYMBOL_GPL(fib_multipath_hash);
 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
 
 static int ip_mkroute_input(struct sk_buff *skb,
 			    struct fib_result *res,
 			    struct in_device *in_dev,
-			    __be32 daddr, __be32 saddr, u32 tos)
+			    __be32 daddr, __be32 saddr, u32 tos,
+			    struct flow_keys *hkeys)
 {
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 	if (res->fi && res->fi->fib_nhs > 1) {
-		int h = fib_multipath_hash(res->fi, NULL, skb);
+		int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys);
 
 		fib_select_multipath(res, h);
 	}
@@ -1908,13 +1899,14 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 			       struct fib_result *res)
 {
 	struct in_device *in_dev = __in_dev_get_rcu(dev);
+	struct flow_keys *flkeys = NULL, _flkeys;
+	struct net    *net = dev_net(dev);
 	struct ip_tunnel_info *tun_info;
-	struct flowi4	fl4;
+	int		err = -EINVAL;
 	unsigned int	flags = 0;
 	u32		itag = 0;
 	struct rtable	*rth;
-	int		err = -EINVAL;
-	struct net    *net = dev_net(dev);
+	struct flowi4	fl4;
 	bool do_cache;
 
 	/* IP on this device is disabled. */
@@ -1973,6 +1965,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	fl4.daddr = daddr;
 	fl4.saddr = saddr;
 	fl4.flowi4_uid = sock_net_uid(net, NULL);
+
+	if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys))
+		flkeys = &_flkeys;
+
 	err = fib_lookup(net, &fl4, res, 0);
 	if (err != 0) {
 		if (!IN_DEV_FORWARD(in_dev))
@@ -1998,7 +1994,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	if (res->type != RTN_UNICAST)
 		goto martian_destination;
 
-	err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos);
+	err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, flkeys);
 out:	return err;
 
 brd_input:
@@ -2040,8 +2036,6 @@ local_input:
 	rth->dst.tclassid = itag;
 #endif
 	rth->rt_is_input = 1;
-	if (res->table)
-		rth->rt_table_id = res->table->tb_id;
 
 	RT_CACHE_STAT_INC(in_slow_tot);
 	if (res->type == RTN_UNREACHABLE) {
@@ -2270,8 +2264,6 @@ add:
 		return ERR_PTR(-ENOBUFS);
 
 	rth->rt_iif = orig_oif;
-	if (res->table)
-		rth->rt_table_id = res->table->tb_id;
 
 	RT_CACHE_STAT_INC(out_slow_tot);
 
@@ -2293,7 +2285,7 @@ add:
 	}
 
 	rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0, do_cache);
-	set_lwt_redirect(rth);
+	lwtunnel_set_redirect(&rth->dst);
 
 	return rth;
 }
@@ -2804,7 +2796,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 		rt->rt_flags |= RTCF_NOTIFY;
 
 	if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)
-		table_id = rt->rt_table_id;
+		table_id = res.table ? res.table->tb_id : 0;
 
 	if (rtm->rtm_flags & RTM_F_FIB_MATCH) {
 		if (!res.fi) {
@@ -3025,6 +3017,7 @@ static __net_exit void sysctl_route_net_exit(struct net *net)
 static __net_initdata struct pernet_operations sysctl_route_ops = {
 	.init = sysctl_route_net_init,
 	.exit = sysctl_route_net_exit,
+	.async = true,
 };
 #endif
 
@@ -3038,6 +3031,7 @@ static __net_init int rt_genid_init(struct net *net)
 
 static __net_initdata struct pernet_operations rt_genid_ops = {
 	.init = rt_genid_init,
+	.async = true,
 };
 
 static int __net_init ipv4_inetpeer_init(struct net *net)
@@ -3063,6 +3057,7 @@ static void __net_exit ipv4_inetpeer_exit(struct net *net)
 static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
 	.init	=	ipv4_inetpeer_init,
 	.exit	=	ipv4_inetpeer_exit,
+	.async	=	true,
 };
 
 #ifdef CONFIG_IP_ROUTE_CLASSID
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 93e172118a94..5b72d97693f8 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -400,7 +400,7 @@ static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write,
 
 	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 	if (write && ret == 0)
-		call_netevent_notifiers(NETEVENT_MULTIPATH_HASH_UPDATE, net);
+		call_netevent_notifiers(NETEVENT_IPV4_MPATH_HASH_UPDATE, net);
 
 	return ret;
 }
@@ -520,22 +520,6 @@ static struct ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_doulongvec_minmax,
 	},
-	{
-		.procname	= "udp_rmem_min",
-		.data		= &sysctl_udp_rmem_min,
-		.maxlen		= sizeof(sysctl_udp_rmem_min),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &one
-	},
-	{
-		.procname	= "udp_wmem_min",
-		.data		= &sysctl_udp_wmem_min,
-		.maxlen		= sizeof(sysctl_udp_wmem_min),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &one
-	},
 	{ }
 };
 
@@ -1167,6 +1151,22 @@ static struct ctl_table ipv4_net_table[] = {
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= &one,
 	},
+	{
+		.procname	= "udp_rmem_min",
+		.data		= &init_net.ipv4.sysctl_udp_rmem_min,
+		.maxlen		= sizeof(init_net.ipv4.sysctl_udp_rmem_min),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &one
+	},
+	{
+		.procname	= "udp_wmem_min",
+		.data		= &init_net.ipv4.sysctl_udp_wmem_min,
+		.maxlen		= sizeof(init_net.ipv4.sysctl_udp_wmem_min),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &one
+	},
 	{ }
 };
 
@@ -1219,6 +1219,7 @@ static __net_exit void ipv4_sysctl_exit_net(struct net *net)
 static __net_initdata struct pernet_operations ipv4_sysctl_ops = {
 	.init = ipv4_sysctl_init_net,
 	.exit = ipv4_sysctl_exit_net,
+	.async = true,
 };
 
 static __init int sysctl_ipv4_init(void)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 8b8059b7af4d..0c31be306572 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -453,6 +453,7 @@ void tcp_init_sock(struct sock *sk)
 	sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
 
 	sk_sockets_allocated_inc(sk);
+	sk->sk_route_forced_caps = NETIF_F_GSO;
 }
 EXPORT_SYMBOL(tcp_init_sock);
 
@@ -897,7 +898,7 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
 	struct tcp_sock *tp = tcp_sk(sk);
 	u32 new_size_goal, size_goal;
 
-	if (!large_allowed || !sk_can_gso(sk))
+	if (!large_allowed)
 		return mss_now;
 
 	/* Note : tcp_tso_autosize() will eventually split this later */
@@ -993,7 +994,9 @@ new_segment:
 			get_page(page);
 			skb_fill_page_desc(skb, i, page, offset, copy);
 		}
-		skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
+
+		if (!(flags & MSG_NO_SHARED_FRAGS))
+			skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
 
 		skb->len += copy;
 		skb->data_len += copy;
@@ -1062,8 +1065,7 @@ EXPORT_SYMBOL_GPL(do_tcp_sendpages);
 int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
 			size_t size, int flags)
 {
-	if (!(sk->sk_route_caps & NETIF_F_SG) ||
-	    !sk_check_csum_caps(sk))
+	if (!(sk->sk_route_caps & NETIF_F_SG))
 		return sock_no_sendpage_locked(sk, page, offset, size, flags);
 
 	tcp_rate_check_app_limited(sk);  /* is sending application-limited? */
@@ -1102,27 +1104,11 @@ static int linear_payload_sz(bool first_skb)
 	return 0;
 }
 
-static int select_size(const struct sock *sk, bool sg, bool first_skb, bool zc)
+static int select_size(bool first_skb, bool zc)
 {
-	const struct tcp_sock *tp = tcp_sk(sk);
-	int tmp = tp->mss_cache;
-
-	if (sg) {
-		if (zc)
-			return 0;
-
-		if (sk_can_gso(sk)) {
-			tmp = linear_payload_sz(first_skb);
-		} else {
-			int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER);
-
-			if (tmp >= pgbreak &&
-			    tmp <= pgbreak + (MAX_SKB_FRAGS - 1) * PAGE_SIZE)
-				tmp = pgbreak;
-		}
-	}
-
-	return tmp;
+	if (zc)
+		return 0;
+	return linear_payload_sz(first_skb);
 }
 
 void tcp_free_fastopen_req(struct tcp_sock *tp)
@@ -1187,7 +1173,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
 	int flags, err, copied = 0;
 	int mss_now = 0, size_goal, copied_syn = 0;
 	bool process_backlog = false;
-	bool sg, zc = false;
+	bool zc = false;
 	long timeo;
 
 	flags = msg->msg_flags;
@@ -1205,7 +1191,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
 			goto out_err;
 		}
 
-		zc = sk_check_csum_caps(sk) && sk->sk_route_caps & NETIF_F_SG;
+		zc = sk->sk_route_caps & NETIF_F_SG;
 		if (!zc)
 			uarg->zerocopy = 0;
 	}
@@ -1268,18 +1254,12 @@ restart:
 	if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
 		goto do_error;
 
-	sg = !!(sk->sk_route_caps & NETIF_F_SG);
-
 	while (msg_data_left(msg)) {
 		int copy = 0;
-		int max = size_goal;
 
 		skb = tcp_write_queue_tail(sk);
-		if (skb) {
-			if (skb->ip_summed == CHECKSUM_NONE)
-				max = mss_now;
-			copy = max - skb->len;
-		}
+		if (skb)
+			copy = size_goal - skb->len;
 
 		if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) {
 			bool first_skb;
@@ -1297,22 +1277,17 @@ new_segment:
 				goto restart;
 			}
 			first_skb = tcp_rtx_and_write_queues_empty(sk);
-			linear = select_size(sk, sg, first_skb, zc);
+			linear = select_size(first_skb, zc);
 			skb = sk_stream_alloc_skb(sk, linear, sk->sk_allocation,
 						  first_skb);
 			if (!skb)
 				goto wait_for_memory;
 
 			process_backlog = true;
-			/*
-			 * Check whether we can use HW checksum.
-			 */
-			if (sk_check_csum_caps(sk))
-				skb->ip_summed = CHECKSUM_PARTIAL;
+			skb->ip_summed = CHECKSUM_PARTIAL;
 
 			skb_entail(sk, skb);
 			copy = size_goal;
-			max = size_goal;
 
 			/* All packets are restored as if they have
 			 * already been sent. skb_mstamp isn't set to
@@ -1343,7 +1318,7 @@ new_segment:
 
 			if (!skb_can_coalesce(skb, i, pfrag->page,
 					      pfrag->offset)) {
-				if (i >= sysctl_max_skb_frags || !sg) {
+				if (i >= sysctl_max_skb_frags) {
 					tcp_mark_push(tp, skb);
 					goto new_segment;
 				}
@@ -1396,7 +1371,7 @@ new_segment:
 			goto out;
 		}
 
-		if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair))
+		if (skb->len < size_goal || (flags & MSG_OOB) || unlikely(tp->repair))
 			continue;
 
 		if (forced_push(tp)) {
@@ -3058,8 +3033,8 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
 	u32 rate;
 
 	stats = alloc_skb(7 * nla_total_size_64bit(sizeof(u64)) +
-			  3 * nla_total_size(sizeof(u32)) +
-			  2 * nla_total_size(sizeof(u8)), GFP_ATOMIC);
+			  5 * nla_total_size(sizeof(u32)) +
+			  3 * nla_total_size(sizeof(u8)), GFP_ATOMIC);
 	if (!stats)
 		return NULL;
 
@@ -3088,6 +3063,10 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
 
 	nla_put_u8(stats, TCP_NLA_RECUR_RETRANS, inet_csk(sk)->icsk_retransmits);
 	nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, !!tp->rate_app_limited);
+	nla_put_u32(stats, TCP_NLA_SND_SSTHRESH, tp->snd_ssthresh);
+
+	nla_put_u32(stats, TCP_NLA_SNDQ_SIZE, tp->write_seq - tp->snd_una);
+	nla_put_u8(stats, TCP_NLA_CA_STATE, inet_csk(sk)->icsk_ca_state);
 	return stats;
 }
 
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index a471f696e13c..158d105e76da 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -97,10 +97,9 @@ struct bbr {
 		packet_conservation:1,  /* use packet conservation? */
 		restore_cwnd:1,	     /* decided to revert cwnd to old value */
 		round_start:1,	     /* start of packet-timed tx->ack round? */
-		tso_segs_goal:7,     /* segments we want in each skb we send */
 		idle_restart:1,	     /* restarting after idle? */
 		probe_rtt_round_done:1,  /* a BBR_PROBE_RTT round at 4 pkts? */
-		unused:5,
+		unused:12,
 		lt_is_sampling:1,    /* taking long-term ("LT") samples now? */
 		lt_rtt_cnt:7,	     /* round trips in long-term interval */
 		lt_use_bw:1;	     /* use lt_bw as our bw estimate? */
@@ -261,23 +260,25 @@ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
 		sk->sk_pacing_rate = rate;
 }
 
-/* Return count of segments we want in the skbs we send, or 0 for default. */
-static u32 bbr_tso_segs_goal(struct sock *sk)
+/* override sysctl_tcp_min_tso_segs */
+static u32 bbr_min_tso_segs(struct sock *sk)
 {
-	struct bbr *bbr = inet_csk_ca(sk);
-
-	return bbr->tso_segs_goal;
+	return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
 }
 
-static void bbr_set_tso_segs_goal(struct sock *sk)
+static u32 bbr_tso_segs_goal(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct bbr *bbr = inet_csk_ca(sk);
-	u32 min_segs;
+	u32 segs, bytes;
+
+	/* Sort of tcp_tso_autosize() but ignoring
+	 * driver provided sk_gso_max_size.
+	 */
+	bytes = min_t(u32, sk->sk_pacing_rate >> sk->sk_pacing_shift,
+		      GSO_MAX_SIZE - 1 - MAX_TCP_HEADER);
+	segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
 
-	min_segs = sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
-	bbr->tso_segs_goal = min(tcp_tso_autosize(sk, tp->mss_cache, min_segs),
-				 0x7FU);
+	return min(segs, 0x7FU);
 }
 
 /* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */
@@ -348,7 +349,7 @@ static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain)
 	cwnd = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT;
 
 	/* Allow enough full-sized skbs in flight to utilize end systems. */
-	cwnd += 3 * bbr->tso_segs_goal;
+	cwnd += 3 * bbr_tso_segs_goal(sk);
 
 	/* Reduce delayed ACKs by rounding up cwnd to the next even number. */
 	cwnd = (cwnd + 1) & ~1U;
@@ -730,6 +731,8 @@ static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs)
 		bbr->mode = BBR_DRAIN;	/* drain queue we created */
 		bbr->pacing_gain = bbr_drain_gain;	/* pace slow to drain */
 		bbr->cwnd_gain = bbr_high_gain;	/* maintain cwnd */
+		tcp_sk(sk)->snd_ssthresh =
+				bbr_target_cwnd(sk, bbr_max_bw(sk), BBR_UNIT);
 	}	/* fall through to check if in-flight is already small: */
 	if (bbr->mode == BBR_DRAIN &&
 	    tcp_packets_in_flight(tcp_sk(sk)) <=
@@ -824,7 +827,6 @@ static void bbr_main(struct sock *sk, const struct rate_sample *rs)
 
 	bw = bbr_bw(sk);
 	bbr_set_pacing_rate(sk, bw, bbr->pacing_gain);
-	bbr_set_tso_segs_goal(sk);
 	bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain);
 }
 
@@ -834,7 +836,7 @@ static void bbr_init(struct sock *sk)
 	struct bbr *bbr = inet_csk_ca(sk);
 
 	bbr->prior_cwnd = 0;
-	bbr->tso_segs_goal = 0;	 /* default segs per skb until first ACK */
+	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
 	bbr->rtt_cnt = 0;
 	bbr->next_rtt_delivered = 0;
 	bbr->prev_ca_state = TCP_CA_Open;
@@ -887,7 +889,7 @@ static u32 bbr_undo_cwnd(struct sock *sk)
 static u32 bbr_ssthresh(struct sock *sk)
 {
 	bbr_save_cwnd(sk);
-	return TCP_INFINITE_SSTHRESH;	 /* BBR does not use ssthresh */
+	return tcp_sk(sk)->snd_ssthresh;
 }
 
 static size_t bbr_get_info(struct sock *sk, u32 ext, int *attr,
@@ -936,7 +938,7 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
 	.undo_cwnd	= bbr_undo_cwnd,
 	.cwnd_event	= bbr_cwnd_event,
 	.ssthresh	= bbr_ssthresh,
-	.tso_segs_goal	= bbr_tso_segs_goal,
+	.min_tso_segs	= bbr_min_tso_segs,
 	.get_info	= bbr_get_info,
 	.set_state	= bbr_set_state,
 };
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 9a1b3c1c1c14..451ef3012636 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1358,9 +1358,6 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
 	int len;
 	int in_sack;
 
-	if (!sk_can_gso(sk))
-		goto fallback;
-
 	/* Normally R but no L won't result in plain S */
 	if (!dup_sack &&
 	    (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS)
@@ -5862,10 +5859,12 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 	tp->rx_opt.saw_tstamp = 0;
 	req = tp->fastopen_rsk;
 	if (req) {
+		bool req_stolen;
+
 		WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
 		    sk->sk_state != TCP_FIN_WAIT1);
 
-		if (!tcp_check_req(sk, skb, req, true))
+		if (!tcp_check_req(sk, skb, req, true, &req_stolen))
 			goto discard;
 	}
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f8ad397e285e..2c6aec2643e8 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -561,16 +561,9 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
 {
 	struct tcphdr *th = tcp_hdr(skb);
 
-	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
-		skb->csum_start = skb_transport_header(skb) - skb->head;
-		skb->csum_offset = offsetof(struct tcphdr, check);
-	} else {
-		th->check = tcp_v4_check(skb->len, saddr, daddr,
-					 csum_partial(th,
-						      th->doff << 2,
-						      skb->csum));
-	}
+	th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
+	skb->csum_start = skb_transport_header(skb) - skb->head;
+	skb->csum_offset = offsetof(struct tcphdr, check);
 }
 
 /* This routine computes an IPv4 TCP checksum. */
@@ -1672,6 +1665,7 @@ process:
 
 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
 		struct request_sock *req = inet_reqsk(sk);
+		bool req_stolen = false;
 		struct sock *nsk;
 
 		sk = req->rsk_listener;
@@ -1694,10 +1688,20 @@ process:
 			th = (const struct tcphdr *)skb->data;
 			iph = ip_hdr(skb);
 			tcp_v4_fill_cb(skb, iph, th);
-			nsk = tcp_check_req(sk, skb, req, false);
+			nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
 		}
 		if (!nsk) {
 			reqsk_put(req);
+			if (req_stolen) {
+				/* Another cpu got exclusive access to req
+				 * and created a full blown socket.
+				 * Try to feed this packet to this socket
+				 * instead of discarding it.
+				 */
+				tcp_v4_restore_cb(skb);
+				sock_put(sk);
+				goto lookup;
+			}
 			goto discard_and_relse;
 		}
 		if (nsk == sk) {
@@ -2387,6 +2391,7 @@ static void __net_exit tcp4_proc_exit_net(struct net *net)
 static struct pernet_operations tcp4_net_ops = {
 	.init = tcp4_proc_init_net,
 	.exit = tcp4_proc_exit_net,
+	.async = true,
 };
 
 int __init tcp4_proc_init(void)
@@ -2573,6 +2578,7 @@ static struct pernet_operations __net_initdata tcp_sk_ops = {
        .init	   = tcp_sk_init,
        .exit	   = tcp_sk_exit,
        .exit_batch = tcp_sk_exit_batch,
+       .async	   = true,
 };
 
 void __init tcp_v4_init(void)
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 03b51cdcc731..aa6fea9f3328 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -1024,6 +1024,7 @@ static void __net_exit tcp_net_metrics_exit_batch(struct list_head *net_exit_lis
 static __net_initdata struct pernet_operations tcp_net_metrics_ops = {
 	.init		=	tcp_net_metrics_init,
 	.exit_batch	=	tcp_net_metrics_exit_batch,
+	.async		=	true,
 };
 
 void __init tcp_metrics_init(void)
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index a8384b0c11f8..e7e36433cdb5 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -578,7 +578,7 @@ EXPORT_SYMBOL(tcp_create_openreq_child);
 
 struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 			   struct request_sock *req,
-			   bool fastopen)
+			   bool fastopen, bool *req_stolen)
 {
 	struct tcp_options_received tmp_opt;
 	struct sock *child;
@@ -785,6 +785,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 
 	sock_rps_save_rxhash(child, skb);
 	tcp_synack_rtt_meas(child, req);
+	*req_stolen = !own_req;
 	return inet_csk_complete_hashdance(sk, child, req, own_req);
 
 listen_overflow:
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 6818042cd8a9..383cac0ff0ec 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1206,7 +1206,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
 /* Initialize TSO segments for a packet. */
 static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now)
 {
-	if (skb->len <= mss_now || skb->ip_summed == CHECKSUM_NONE) {
+	if (skb->len <= mss_now) {
 		/* Avoid the costly divide in the normal
 		 * non-TSO case.
 		 */
@@ -1335,21 +1335,9 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
 	TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
 	tcp_skb_fragment_eor(skb, buff);
 
-	if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
-		/* Copy and checksum data tail into the new buffer. */
-		buff->csum = csum_partial_copy_nocheck(skb->data + len,
-						       skb_put(buff, nsize),
-						       nsize, 0);
-
-		skb_trim(skb, len);
-
-		skb->csum = csum_block_sub(skb->csum, buff->csum, len);
-	} else {
-		skb->ip_summed = CHECKSUM_PARTIAL;
-		skb_split(skb, buff, len);
-	}
+	skb_split(skb, buff, len);
 
-	buff->ip_summed = skb->ip_summed;
+	buff->ip_summed = CHECKSUM_PARTIAL;
 
 	buff->tstamp = skb->tstamp;
 	tcp_fragment_tstamp(skb, buff);
@@ -1715,8 +1703,8 @@ static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp,
 /* Return how many segs we'd like on a TSO packet,
  * to send one TSO packet per ms
  */
-u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
-		     int min_tso_segs)
+static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
+			    int min_tso_segs)
 {
 	u32 bytes, segs;
 
@@ -1732,7 +1720,6 @@ u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
 
 	return segs;
 }
-EXPORT_SYMBOL(tcp_tso_autosize);
 
 /* Return the number of segments we want in the skb we are transmitting.
  * See if congestion control module wants to decide; otherwise, autosize.
@@ -1740,11 +1727,13 @@ EXPORT_SYMBOL(tcp_tso_autosize);
 static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
 {
 	const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
-	u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0;
+	u32 min_tso, tso_segs;
 
-	if (!tso_segs)
-		tso_segs = tcp_tso_autosize(sk, mss_now,
-				sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
+	min_tso = ca_ops->min_tso_segs ?
+			ca_ops->min_tso_segs(sk) :
+			sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs;
+
+	tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
 	return min_t(u32, tso_segs, sk->sk_gso_max_segs);
 }
 
@@ -1902,7 +1891,7 @@ static int tso_fragment(struct sock *sk, enum tcp_queue tcp_queue,
 
 	tcp_skb_fragment_eor(skb, buff);
 
-	buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL;
+	buff->ip_summed = CHECKSUM_PARTIAL;
 	skb_split(skb, buff, len);
 	tcp_fragment_tstamp(skb, buff);
 
@@ -2135,7 +2124,7 @@ static int tcp_mtu_probe(struct sock *sk)
 	TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK;
 	TCP_SKB_CB(nskb)->sacked = 0;
 	nskb->csum = 0;
-	nskb->ip_summed = skb->ip_summed;
+	nskb->ip_summed = CHECKSUM_PARTIAL;
 
 	tcp_insert_write_queue_before(nskb, skb, sk);
 	tcp_highest_sack_replace(sk, skb, nskb);
@@ -2143,14 +2132,7 @@ static int tcp_mtu_probe(struct sock *sk)
 	len = 0;
 	tcp_for_write_queue_from_safe(skb, next, sk) {
 		copy = min_t(int, skb->len, probe_size - len);
-		if (nskb->ip_summed) {
-			skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
-		} else {
-			__wsum csum = skb_copy_and_csum_bits(skb, 0,
-							     skb_put(nskb, copy),
-							     copy, 0);
-			nskb->csum = csum_block_add(nskb->csum, csum, len);
-		}
+		skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
 
 		if (skb->len <= copy) {
 			/* We've eaten all the data from this skb.
@@ -2167,9 +2149,6 @@ static int tcp_mtu_probe(struct sock *sk)
 						   ~(TCPHDR_FIN|TCPHDR_PSH);
 			if (!skb_shinfo(skb)->nr_frags) {
 				skb_pull(skb, copy);
-				if (skb->ip_summed != CHECKSUM_PARTIAL)
-					skb->csum = csum_partial(skb->data,
-								 skb->len, 0);
 			} else {
 				__pskb_trim_head(skb, copy);
 				tcp_set_skb_tso_segs(skb, mss_now);
@@ -2747,12 +2726,6 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
 	}
 	tcp_highest_sack_replace(sk, next_skb, skb);
 
-	if (next_skb->ip_summed == CHECKSUM_PARTIAL)
-		skb->ip_summed = CHECKSUM_PARTIAL;
-
-	if (skb->ip_summed != CHECKSUM_PARTIAL)
-		skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);
-
 	/* Update sequence range on original skb. */
 	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
 
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index ec35eaa5c029..c0630013c1ae 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -90,7 +90,7 @@ EXPORT_SYMBOL(xfrm4_tunnel_deregister);
 	for (handler = rcu_dereference(head);		\
 	     handler != NULL;				\
 	     handler = rcu_dereference(handler->next))	\
-	
+
 static int tunnel4_rcv(struct sk_buff *skb)
 {
 	struct xfrm_tunnel *handler;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index e5ef7c38c934..908fc02fb4f8 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -122,12 +122,6 @@ EXPORT_SYMBOL(udp_table);
 long sysctl_udp_mem[3] __read_mostly;
 EXPORT_SYMBOL(sysctl_udp_mem);
 
-int sysctl_udp_rmem_min __read_mostly;
-EXPORT_SYMBOL(sysctl_udp_rmem_min);
-
-int sysctl_udp_wmem_min __read_mostly;
-EXPORT_SYMBOL(sysctl_udp_wmem_min);
-
 atomic_long_t udp_memory_allocated;
 EXPORT_SYMBOL(udp_memory_allocated);
 
@@ -2533,35 +2527,35 @@ int udp_abort(struct sock *sk, int err)
 EXPORT_SYMBOL_GPL(udp_abort);
 
 struct proto udp_prot = {
-	.name		   = "UDP",
-	.owner		   = THIS_MODULE,
-	.close		   = udp_lib_close,
-	.connect	   = ip4_datagram_connect,
-	.disconnect	   = udp_disconnect,
-	.ioctl		   = udp_ioctl,
-	.init		   = udp_init_sock,
-	.destroy	   = udp_destroy_sock,
-	.setsockopt	   = udp_setsockopt,
-	.getsockopt	   = udp_getsockopt,
-	.sendmsg	   = udp_sendmsg,
-	.recvmsg	   = udp_recvmsg,
-	.sendpage	   = udp_sendpage,
-	.release_cb	   = ip4_datagram_release_cb,
-	.hash		   = udp_lib_hash,
-	.unhash		   = udp_lib_unhash,
-	.rehash		   = udp_v4_rehash,
-	.get_port	   = udp_v4_get_port,
-	.memory_allocated  = &udp_memory_allocated,
-	.sysctl_mem	   = sysctl_udp_mem,
-	.sysctl_wmem	   = &sysctl_udp_wmem_min,
-	.sysctl_rmem	   = &sysctl_udp_rmem_min,
-	.obj_size	   = sizeof(struct udp_sock),
-	.h.udp_table	   = &udp_table,
+	.name			= "UDP",
+	.owner			= THIS_MODULE,
+	.close			= udp_lib_close,
+	.connect		= ip4_datagram_connect,
+	.disconnect		= udp_disconnect,
+	.ioctl			= udp_ioctl,
+	.init			= udp_init_sock,
+	.destroy		= udp_destroy_sock,
+	.setsockopt		= udp_setsockopt,
+	.getsockopt		= udp_getsockopt,
+	.sendmsg		= udp_sendmsg,
+	.recvmsg		= udp_recvmsg,
+	.sendpage		= udp_sendpage,
+	.release_cb		= ip4_datagram_release_cb,
+	.hash			= udp_lib_hash,
+	.unhash			= udp_lib_unhash,
+	.rehash			= udp_v4_rehash,
+	.get_port		= udp_v4_get_port,
+	.memory_allocated	= &udp_memory_allocated,
+	.sysctl_mem		= sysctl_udp_mem,
+	.sysctl_wmem_offset	= offsetof(struct net, ipv4.sysctl_udp_wmem_min),
+	.sysctl_rmem_offset	= offsetof(struct net, ipv4.sysctl_udp_rmem_min),
+	.obj_size		= sizeof(struct udp_sock),
+	.h.udp_table		= &udp_table,
 #ifdef CONFIG_COMPAT
-	.compat_setsockopt = compat_udp_setsockopt,
-	.compat_getsockopt = compat_udp_getsockopt,
+	.compat_setsockopt	= compat_udp_setsockopt,
+	.compat_getsockopt	= compat_udp_getsockopt,
 #endif
-	.diag_destroy	   = udp_abort,
+	.diag_destroy		= udp_abort,
 };
 EXPORT_SYMBOL(udp_prot);
 
@@ -2762,6 +2756,7 @@ static void __net_exit udp4_proc_exit_net(struct net *net)
 static struct pernet_operations udp4_net_ops = {
 	.init = udp4_proc_init_net,
 	.exit = udp4_proc_exit_net,
+	.async = true,
 };
 
 int __init udp4_proc_init(void)
@@ -2830,6 +2825,26 @@ u32 udp_flow_hashrnd(void)
 }
 EXPORT_SYMBOL(udp_flow_hashrnd);
 
+static void __udp_sysctl_init(struct net *net)
+{
+	net->ipv4.sysctl_udp_rmem_min = SK_MEM_QUANTUM;
+	net->ipv4.sysctl_udp_wmem_min = SK_MEM_QUANTUM;
+
+#ifdef CONFIG_NET_L3_MASTER_DEV
+	net->ipv4.sysctl_udp_l3mdev_accept = 0;
+#endif
+}
+
+static int __net_init udp_sysctl_init(struct net *net)
+{
+	__udp_sysctl_init(net);
+	return 0;
+}
+
+static struct pernet_operations __net_initdata udp_sysctl_ops = {
+	.init       = udp_sysctl_init,
+};
+
 void __init udp_init(void)
 {
 	unsigned long limit;
@@ -2842,8 +2857,7 @@ void __init udp_init(void)
 	sysctl_udp_mem[1] = limit;
 	sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2;
 
-	sysctl_udp_rmem_min = SK_MEM_QUANTUM;
-	sysctl_udp_wmem_min = SK_MEM_QUANTUM;
+	__udp_sysctl_init(&init_net);
 
 	/* 16 spinlocks per cpu */
 	udp_busylocks_log = ilog2(nr_cpu_ids) + 4;
@@ -2853,4 +2867,7 @@ void __init udp_init(void)
 		panic("UDP: failed to alloc udp_busylocks\n");
 	for (i = 0; i < (1U << udp_busylocks_log); i++)
 		spin_lock_init(udp_busylocks + i);
+
+	if (register_pernet_subsys(&udp_sysctl_ops))
+		panic("UDP: failed to init sysctl parameters.\n");
 }
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index f96614e9b9a5..72f2c3806408 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -104,6 +104,7 @@ static void __net_exit udplite4_proc_exit_net(struct net *net)
 static struct pernet_operations udplite4_net_ops = {
 	.init = udplite4_proc_init_net,
 	.exit = udplite4_proc_exit_net,
+	.async = true,
 };
 
 static __init int udplite4_proc_init(void)
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index fbebda67ac1b..6c76a757fa4a 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -101,7 +101,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
 	xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway;
 	xdst->u.rt.rt_pmtu = rt->rt_pmtu;
 	xdst->u.rt.rt_mtu_locked = rt->rt_mtu_locked;
-	xdst->u.rt.rt_table_id = rt->rt_table_id;
 	INIT_LIST_HEAD(&xdst->u.rt.rt_uncached);
 	rt_add_uncached_list(&xdst->u.rt);
 
@@ -368,6 +367,7 @@ static void __net_exit xfrm4_net_exit(struct net *net)
 static struct pernet_operations __net_initdata xfrm4_net_ops = {
 	.init	= xfrm4_net_init,
 	.exit	= xfrm4_net_exit,
+	.async	= true,
 };
 
 static void __init xfrm4_policy_init(void)
@@ -382,4 +382,3 @@ void __init xfrm4_init(void)
 	xfrm4_protocol_init();
 	register_pernet_subsys(&xfrm4_net_ops);
 }
-
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index ea71e4b0ab7a..6794ddf0547c 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -278,6 +278,7 @@ config IPV6_SUBTREES
 config IPV6_MROUTE
 	bool "IPv6: multicast routing"
 	depends on IPV6
+	select IP_MROUTE_COMMON
 	---help---
 	  Experimental support for IPv6 multicast forwarding.
 	  If unsure, say N.
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index e1846b97ee69..6fd4bbdc444f 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1459,6 +1459,21 @@ static bool ipv6_use_optimistic_addr(struct net *net,
 #endif
 }
 
+static bool ipv6_allow_optimistic_dad(struct net *net,
+				      struct inet6_dev *idev)
+{
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+	if (!idev)
+		return false;
+	if (!net->ipv6.devconf_all->optimistic_dad && !idev->cnf.optimistic_dad)
+		return false;
+
+	return true;
+#else
+	return false;
+#endif
+}
+
 static int ipv6_get_saddr_eval(struct net *net,
 			       struct ipv6_saddr_score *score,
 			       struct ipv6_saddr_dst *dst,
@@ -1836,22 +1851,42 @@ static int ipv6_count_addresses(const struct inet6_dev *idev)
 int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
 		  const struct net_device *dev, int strict)
 {
-	return ipv6_chk_addr_and_flags(net, addr, dev, strict, IFA_F_TENTATIVE);
+	return ipv6_chk_addr_and_flags(net, addr, dev, !dev,
+				       strict, IFA_F_TENTATIVE);
 }
 EXPORT_SYMBOL(ipv6_chk_addr);
 
+/* device argument is used to find the L3 domain of interest. If
+ * skip_dev_check is set, then the ifp device is not checked against
+ * the passed in dev argument. So the 2 cases for addresses checks are:
+ *   1. does the address exist in the L3 domain that dev is part of
+ *      (skip_dev_check = true), or
+ *
+ *   2. does the address exist on the specific device
+ *      (skip_dev_check = false)
+ */
 int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
-			    const struct net_device *dev, int strict,
-			    u32 banned_flags)
+			    const struct net_device *dev, bool skip_dev_check,
+			    int strict, u32 banned_flags)
 {
 	unsigned int hash = inet6_addr_hash(net, addr);
+	const struct net_device *l3mdev;
 	struct inet6_ifaddr *ifp;
 	u32 ifp_flags;
 
 	rcu_read_lock();
+
+	l3mdev = l3mdev_master_dev_rcu(dev);
+	if (skip_dev_check)
+		dev = NULL;
+
 	hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
+
+		if (l3mdev_master_dev_rcu(ifp->idev->dev) != l3mdev)
+			continue;
+
 		/* Decouple optimistic from tentative for evaluation here.
 		 * Ban optimistic addresses explicitly, when required.
 		 */
@@ -1968,6 +2003,8 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
 		spin_lock_bh(&ifp->lock);
 		addrconf_del_dad_work(ifp);
 		ifp->flags |= IFA_F_TENTATIVE;
+		if (dad_failed)
+			ifp->flags &= ~IFA_F_OPTIMISTIC;
 		spin_unlock_bh(&ifp->lock);
 		if (dad_failed)
 			ipv6_ifa_notify(0, ifp);
@@ -4257,6 +4294,7 @@ static void __net_exit if6_proc_net_exit(struct net *net)
 static struct pernet_operations if6_proc_net_ops = {
 	.init = if6_proc_net_init,
 	.exit = if6_proc_net_exit,
+	.async = true,
 };
 
 int __init if6_proc_init(void)
@@ -4500,6 +4538,9 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,
 	    (ifp->flags & IFA_F_TEMPORARY || ifp->prefix_len != 64))
 		return -EINVAL;
 
+	if (!(ifp->flags & IFA_F_TENTATIVE) || ifp->flags & IFA_F_DADFAILED)
+		ifa_flags &= ~IFA_F_OPTIMISTIC;
+
 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
 	if (addrconf_finite_timeout(timeout)) {
 		expires = jiffies_to_clock_t(timeout * HZ);
@@ -4573,6 +4614,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct in6_addr *pfx, *peer_pfx;
 	struct inet6_ifaddr *ifa;
 	struct net_device *dev;
+	struct inet6_dev *idev;
 	u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME;
 	u32 ifa_flags;
 	int err;
@@ -4606,7 +4648,19 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	/* We ignore other flags so far. */
 	ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR |
-		     IFA_F_NOPREFIXROUTE | IFA_F_MCAUTOJOIN;
+		     IFA_F_NOPREFIXROUTE | IFA_F_MCAUTOJOIN | IFA_F_OPTIMISTIC;
+
+	idev = ipv6_find_idev(dev);
+	if (IS_ERR(idev))
+		return PTR_ERR(idev);
+
+	if (!ipv6_allow_optimistic_dad(net, idev))
+		ifa_flags &= ~IFA_F_OPTIMISTIC;
+
+	if (ifa_flags & IFA_F_NODAD && ifa_flags & IFA_F_OPTIMISTIC) {
+		NL_SET_ERR_MSG(extack, "IFA_F_NODAD and IFA_F_OPTIMISTIC are mutually exclusive");
+		return -EINVAL;
+	}
 
 	ifa = ipv6_get_ifaddr(net, pfx, dev, 1);
 	if (!ifa) {
@@ -6550,6 +6604,7 @@ static void __net_exit addrconf_exit_net(struct net *net)
 static struct pernet_operations addrconf_ops = {
 	.init = addrconf_init_net,
 	.exit = addrconf_exit_net,
+	.async = true,
 };
 
 static struct rtnl_af_ops inet6_ops __read_mostly = {
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 1d6ced37ad71..ba2e63633370 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -344,6 +344,7 @@ static void __net_exit ip6addrlbl_net_exit(struct net *net)
 static struct pernet_operations ipv6_addr_label_ops = {
 	.init = ip6addrlbl_net_init,
 	.exit = ip6addrlbl_net_exit,
+	.async = true,
 };
 
 int __init ipv6_addr_label_init(void)
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 416917719a6f..dbbe04018813 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -470,7 +470,7 @@ EXPORT_SYMBOL_GPL(inet6_destroy_sock);
  */
 
 int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
-		 int *uaddr_len, int peer)
+		 int peer)
 {
 	struct sockaddr_in6 *sin = (struct sockaddr_in6 *)uaddr;
 	struct sock *sk = sock->sk;
@@ -500,8 +500,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
 	}
 	sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr,
 						 sk->sk_bound_dev_if);
-	*uaddr_len = sizeof(*sin);
-	return 0;
+	return sizeof(*sin);
 }
 EXPORT_SYMBOL(inet6_getname);
 
@@ -858,6 +857,7 @@ static void __net_exit inet6_net_exit(struct net *net)
 static struct pernet_operations inet6_net_ops = {
 	.init = inet6_net_init,
 	.exit = inet6_net_exit,
+	.async = true,
 };
 
 static const struct ipv6_stub ipv6_stub_impl = {
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 8e085cc05aeb..d580d4d456a5 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -66,7 +66,11 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 		return -EPERM;
 	if (ipv6_addr_is_multicast(addr))
 		return -EINVAL;
-	if (ipv6_chk_addr(net, addr, NULL, 0))
+
+	if (ifindex)
+		dev = __dev_get_by_index(net, ifindex);
+
+	if (ipv6_chk_addr_and_flags(net, addr, dev, true, 0, IFA_F_TENTATIVE))
 		return -EINVAL;
 
 	pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL);
@@ -78,7 +82,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 	if (ifindex == 0) {
 		struct rt6_info *rt;
 
-		rt = rt6_lookup(net, addr, NULL, 0, 0);
+		rt = rt6_lookup(net, addr, NULL, 0, NULL, 0);
 		if (rt) {
 			dev = rt->dst.dev;
 			ip6_rt_put(rt);
@@ -90,8 +94,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 			dev = __dev_get_by_flags(net, IFF_UP,
 						 IFF_UP | IFF_LOOPBACK);
 		}
-	} else
-		dev = __dev_get_by_index(net, ifindex);
+	}
 
 	if (!dev) {
 		err = -ENODEV;
@@ -552,4 +555,3 @@ void ac6_proc_exit(struct net *net)
 	remove_proc_entry("anycast6", net->proc_net);
 }
 #endif
-
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index a9f7eca0b6a3..88bc2ef7c7a8 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -808,8 +808,9 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
 			if (addr_type != IPV6_ADDR_ANY) {
 				int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL;
 				if (!(inet_sk(sk)->freebind || inet_sk(sk)->transparent) &&
-				    !ipv6_chk_addr(net, &src_info->ipi6_addr,
-						   strict ? dev : NULL, 0) &&
+				    !ipv6_chk_addr_and_flags(net, &src_info->ipi6_addr,
+							     dev, !strict, 0,
+							     IFA_F_TENTATIVE) &&
 				    !ipv6_chk_acast_addr_src(net, dev,
 							     &src_info->ipi6_addr))
 					err = -EINVAL;
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index 11025f8d124b..b643f5ce6c80 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -279,4 +279,3 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
 	return nexthdr;
 }
 EXPORT_SYMBOL(ipv6_find_hdr);
-
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index b240f24a6e52..00ef9467f3c0 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -61,11 +61,13 @@ unsigned int fib6_rules_seq_read(struct net *net)
 }
 
 struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
+				   const struct sk_buff *skb,
 				   int flags, pol_lookup_t lookup)
 {
 	if (net->ipv6.fib6_has_custom_rules) {
 		struct fib_lookup_arg arg = {
 			.lookup_ptr = lookup,
+			.lookup_data = skb,
 			.flags = FIB_LOOKUP_NOREF,
 		};
 
@@ -80,11 +82,11 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
 	} else {
 		struct rt6_info *rt;
 
-		rt = lookup(net, net->ipv6.fib6_local_tbl, fl6, flags);
+		rt = lookup(net, net->ipv6.fib6_local_tbl, fl6, skb, flags);
 		if (rt != net->ipv6.ip6_null_entry && rt->dst.error != -EAGAIN)
 			return &rt->dst;
 		ip6_rt_put(rt);
-		rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
+		rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
 		if (rt->dst.error != -EAGAIN)
 			return &rt->dst;
 		ip6_rt_put(rt);
@@ -130,7 +132,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
 		goto out;
 	}
 
-	rt = lookup(net, table, flp6, flags);
+	rt = lookup(net, table, flp6, arg->lookup_data, flags);
 	if (rt != net->ipv6.ip6_null_entry) {
 		struct fib6_rule *r = (struct fib6_rule *)rule;
 
@@ -223,6 +225,17 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
 	if (r->tclass && r->tclass != ip6_tclass(fl6->flowlabel))
 		return 0;
 
+	if (rule->ip_proto && (rule->ip_proto != fl6->flowi6_proto))
+		return 0;
+
+	if (fib_rule_port_range_set(&rule->sport_range) &&
+	    !fib_rule_port_inrange(&rule->sport_range, fl6->fl6_sport))
+		return 0;
+
+	if (fib_rule_port_range_set(&rule->dport_range) &&
+	    !fib_rule_port_inrange(&rule->dport_range, fl6->fl6_dport))
+		return 0;
+
 	return 1;
 }
 
@@ -258,12 +271,26 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 	rule6->dst.plen = frh->dst_len;
 	rule6->tclass = frh->tos;
 
+	if (fib_rule_requires_fldissect(rule))
+		net->ipv6.fib6_rules_require_fldissect++;
+
 	net->ipv6.fib6_has_custom_rules = true;
 	err = 0;
 errout:
 	return err;
 }
 
+static int fib6_rule_delete(struct fib_rule *rule)
+{
+	struct net *net = rule->fr_net;
+
+	if (net->ipv6.fib6_rules_require_fldissect &&
+	    fib_rule_requires_fldissect(rule))
+		net->ipv6.fib6_rules_require_fldissect--;
+
+	return 0;
+}
+
 static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
 			     struct nlattr **tb)
 {
@@ -323,6 +350,7 @@ static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = {
 	.match			= fib6_rule_match,
 	.suppress		= fib6_rule_suppress,
 	.configure		= fib6_rule_configure,
+	.delete			= fib6_rule_delete,
 	.compare		= fib6_rule_compare,
 	.fill			= fib6_rule_fill,
 	.nlmsg_payload		= fib6_rule_nlmsg_payload,
@@ -350,6 +378,7 @@ static int __net_init fib6_rules_net_init(struct net *net)
 		goto out_fib6_rules_ops;
 
 	net->ipv6.fib6_rules_ops = ops;
+	net->ipv6.fib6_rules_require_fldissect = 0;
 out:
 	return err;
 
@@ -368,6 +397,7 @@ static void __net_exit fib6_rules_net_exit(struct net *net)
 static struct pernet_operations fib6_rules_net_ops = {
 	.init = fib6_rules_net_init,
 	.exit = fib6_rules_net_exit,
+	.async = true,
 };
 
 int __init fib6_rules_init(void)
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 6ae5dd3f4d0d..6f84668be6ea 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -522,7 +522,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
 	fl6.fl6_icmp_type = type;
 	fl6.fl6_icmp_code = code;
 	fl6.flowi6_uid = sock_net_uid(net, NULL);
-	fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
+	fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
 
 	sk = icmpv6_xmit_lock(net);
@@ -629,7 +629,8 @@ int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
 	skb_pull(skb2, nhs);
 	skb_reset_network_header(skb2);
 
-	rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0);
+	rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
+			skb, 0);
 
 	if (rt && rt->dst.dev)
 		skb2->dev = rt->dst.dev;
@@ -997,6 +998,7 @@ static void __net_exit icmpv6_sk_exit(struct net *net)
 static struct pernet_operations icmpv6_sk_ops = {
 	.init = icmpv6_sk_init,
 	.exit = icmpv6_sk_exit,
+	.async = true,
 };
 
 int __init icmpv6_init(void)
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 44c39c5f0638..e438699f000f 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -613,6 +613,7 @@ static struct pernet_operations ila_net_ops = {
 	.exit = ila_exit_net,
 	.id   = &ila_net_id,
 	.size = sizeof(struct ila_net),
+	.async = true,
 };
 
 static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila)
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 92b8d8c75eed..2f995e9e3050 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -299,11 +299,12 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id)
 }
 
 struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
+				   const struct sk_buff *skb,
 				   int flags, pol_lookup_t lookup)
 {
 	struct rt6_info *rt;
 
-	rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
+	rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
 	if (rt->dst.error == -EAGAIN) {
 		ip6_rt_put(rt);
 		rt = net->ipv6.ip6_null_entry;
@@ -2160,6 +2161,7 @@ static void fib6_net_exit(struct net *net)
 static struct pernet_operations fib6_net_ops = {
 	.init = fib6_net_init,
 	.exit = fib6_net_exit,
+	.async = true,
 };
 
 int __init fib6_init(void)
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 3dab664ff503..6ddf52282894 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -873,6 +873,7 @@ static void __net_exit ip6_flowlabel_net_exit(struct net *net)
 static struct pernet_operations ip6_flowlabel_net_ops = {
 	.init = ip6_flowlabel_proc_init,
 	.exit = ip6_flowlabel_net_exit,
+	.async = true,
 };
 
 int ip6_flowlabel_init(void)
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 1bbd0930063e..3a98c694da5f 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -237,7 +237,7 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
 		return t;
 
 	dev = ign->fb_tunnel_dev;
-	if (dev->flags & IFF_UP)
+	if (dev && dev->flags & IFF_UP)
 		return netdev_priv(dev);
 
 	return NULL;
@@ -696,9 +696,6 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
 	else
 		fl6->daddr = tunnel->parms.raddr;
 
-	if (tunnel->parms.o_flags & TUNNEL_SEQ)
-		tunnel->o_seqno++;
-
 	/* Push GRE header. */
 	protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto;
 
@@ -721,14 +718,20 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
 		fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL);
 
 		dsfield = key->tos;
-		flags = key->tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
+		flags = key->tun_flags &
+			(TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
 		tunnel->tun_hlen = gre_calc_hlen(flags);
 
 		gre_build_header(skb, tunnel->tun_hlen,
 				 flags, protocol,
-				 tunnel_id_to_key32(tun_info->key.tun_id), 0);
+				 tunnel_id_to_key32(tun_info->key.tun_id),
+				 (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++)
+						      : 0);
 
 	} else {
+		if (tunnel->parms.o_flags & TUNNEL_SEQ)
+			tunnel->o_seqno++;
+
 		gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
 				 protocol, tunnel->parms.o_key,
 				 htonl(tunnel->o_seqno));
@@ -1059,7 +1062,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
 
 		struct rt6_info *rt = rt6_lookup(t->net,
 						 &p->raddr, &p->laddr,
-						 p->link, strict);
+						 p->link, NULL, strict);
 
 		if (!rt)
 			return;
@@ -1475,6 +1478,8 @@ static int __net_init ip6gre_init_net(struct net *net)
 	struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
 	int err;
 
+	if (!net_has_fallback_tunnels(net))
+		return 0;
 	ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0",
 					  NET_NAME_UNKNOWN,
 					  ip6gre_tunnel_setup);
@@ -1523,6 +1528,7 @@ static struct pernet_operations ip6gre_net_ops = {
 	.exit_batch = ip6gre_exit_batch_net,
 	.id   = &ip6gre_net_id,
 	.size = sizeof(struct ip6gre_net),
+	.async = true,
 };
 
 static int ip6gre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -1790,6 +1796,12 @@ static void ip6gre_tap_setup(struct net_device *dev)
 	netif_keep_dst(dev);
 }
 
+bool is_ip6gretap_dev(const struct net_device *dev)
+{
+	return dev->netdev_ops == &ip6gre_tap_netdev_ops;
+}
+EXPORT_SYMBOL_GPL(is_ip6gretap_dev);
+
 static bool ip6gre_netlink_encap_parms(struct nlattr *data[],
 				       struct ip_tunnel_encap *ipencap)
 {
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index a8a919520090..2c7f09c3c39e 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -71,7 +71,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 
 		if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
-		    ((mroute6_socket(net, skb) &&
+		    ((mroute6_is_socket(net, skb) &&
 		     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
 		     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
 					 &ipv6_hdr(skb)->saddr))) {
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 6e0f21eed88a..456fcf942f95 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -679,7 +679,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 
 		/* Try to guess incoming interface */
 		rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr,
-				NULL, 0, 0);
+				NULL, 0, skb2, 0);
 
 		if (rt && rt->dst.dev)
 			skb2->dev = rt->dst.dev;
@@ -758,9 +758,11 @@ int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
 			ldev = dev_get_by_index_rcu(net, p->link);
 
 		if ((ipv6_addr_is_multicast(laddr) ||
-		     likely(ipv6_chk_addr(net, laddr, ldev, 0))) &&
+		     likely(ipv6_chk_addr_and_flags(net, laddr, ldev, false,
+						    0, IFA_F_TENTATIVE))) &&
 		    ((p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) ||
-		     likely(!ipv6_chk_addr(net, raddr, NULL, 0))))
+		     likely(!ipv6_chk_addr_and_flags(net, raddr, ldev, true,
+						     0, IFA_F_TENTATIVE))))
 			ret = 1;
 	}
 	return ret;
@@ -990,12 +992,14 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
 		if (p->link)
 			ldev = dev_get_by_index_rcu(net, p->link);
 
-		if (unlikely(!ipv6_chk_addr(net, laddr, ldev, 0)))
+		if (unlikely(!ipv6_chk_addr_and_flags(net, laddr, ldev, false,
+						      0, IFA_F_TENTATIVE)))
 			pr_warn("%s xmit: Local address not yet configured!\n",
 				p->name);
 		else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) &&
 			 !ipv6_addr_is_multicast(raddr) &&
-			 unlikely(ipv6_chk_addr(net, raddr, NULL, 0)))
+			 unlikely(ipv6_chk_addr_and_flags(net, raddr, ldev,
+							  true, 0, IFA_F_TENTATIVE)))
 			pr_warn("%s xmit: Routing loop! Remote address found on this node!\n",
 				p->name);
 		else
@@ -1444,7 +1448,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
 
 		struct rt6_info *rt = rt6_lookup(t->net,
 						 &p->raddr, &p->laddr,
-						 p->link, strict);
+						 p->link, NULL, strict);
 
 		if (!rt)
 			return;
@@ -2205,6 +2209,8 @@ static int __net_init ip6_tnl_init_net(struct net *net)
 	ip6n->tnls[0] = ip6n->tnls_wc;
 	ip6n->tnls[1] = ip6n->tnls_r_l;
 
+	if (!net_has_fallback_tunnels(net))
+		return 0;
 	err = -ENOMEM;
 	ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
 					NET_NAME_UNKNOWN, ip6_tnl_dev_setup);
@@ -2254,6 +2260,7 @@ static struct pernet_operations ip6_tnl_net_ops = {
 	.exit_batch = ip6_tnl_exit_batch_net,
 	.id   = &ip6_tnl_net_id,
 	.size = sizeof(struct ip6_tnl_net),
+	.async = true,
 };
 
 /**
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index fa3ae1cb50d3..a482b854eeea 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -645,7 +645,7 @@ static void vti6_link_config(struct ip6_tnl *t)
 			      (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
 		struct rt6_info *rt = rt6_lookup(t->net,
 						 &p->raddr, &p->laddr,
-						 p->link, strict);
+						 p->link, NULL, strict);
 
 		if (rt)
 			tdev = rt->dst.dev;
@@ -1148,6 +1148,7 @@ static struct pernet_operations vti6_net_ops = {
 	.exit_batch = vti6_exit_batch_net,
 	.id   = &vti6_net_id,
 	.size = sizeof(struct vti6_net),
+	.async = true,
 };
 
 static struct xfrm6_protocol vti_esp6_protocol __read_mostly = {
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 9f6cace9c817..7345bd6c4b7d 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -20,7 +20,6 @@
 #include <linux/types.h>
 #include <linux/sched.h>
 #include <linux/errno.h>
-#include <linux/timer.h>
 #include <linux/mm.h>
 #include <linux/kernel.h>
 #include <linux/fcntl.h>
@@ -32,11 +31,9 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/init.h>
-#include <linux/slab.h>
 #include <linux/compat.h>
 #include <net/protocol.h>
 #include <linux/skbuff.h>
-#include <net/sock.h>
 #include <net/raw.h>
 #include <linux/notifier.h>
 #include <linux/if_arp.h>
@@ -54,30 +51,12 @@
 #include <net/ip6_checksum.h>
 #include <linux/netconf.h>
 
-struct mr6_table {
-	struct list_head	list;
-	possible_net_t		net;
-	u32			id;
-	struct sock		*mroute6_sk;
-	struct timer_list	ipmr_expire_timer;
-	struct list_head	mfc6_unres_queue;
-	struct list_head	mfc6_cache_array[MFC6_LINES];
-	struct mif_device	vif6_table[MAXMIFS];
-	int			maxvif;
-	atomic_t		cache_resolve_queue_len;
-	bool			mroute_do_assert;
-	bool			mroute_do_pim;
-#ifdef CONFIG_IPV6_PIMSM_V2
-	int			mroute_reg_vif_num;
-#endif
-};
-
 struct ip6mr_rule {
 	struct fib_rule		common;
 };
 
 struct ip6mr_result {
-	struct mr6_table	*mrt;
+	struct mr_table	*mrt;
 };
 
 /* Big lock, protecting vif table, mrt cache and mroute socket state.
@@ -86,11 +65,7 @@ struct ip6mr_result {
 
 static DEFINE_RWLOCK(mrt_lock);
 
-/*
- *	Multicast router control variables
- */
-
-#define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
+/* Multicast router control variables */
 
 /* Special spinlock for queue of unresolved entries */
 static DEFINE_SPINLOCK(mfc_unres_lock);
@@ -105,30 +80,45 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
 
 static struct kmem_cache *mrt_cachep __read_mostly;
 
-static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
-static void ip6mr_free_table(struct mr6_table *mrt);
+static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
+static void ip6mr_free_table(struct mr_table *mrt);
 
-static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
+static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
 			   struct sk_buff *skb, struct mfc6_cache *cache);
-static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
+static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
 			      mifi_t mifi, int assert);
-static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
-			       struct mfc6_cache *c, struct rtmsg *rtm);
-static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
+static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
 			      int cmd);
-static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt);
+static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
 			       struct netlink_callback *cb);
-static void mroute_clean_tables(struct mr6_table *mrt, bool all);
+static void mroute_clean_tables(struct mr_table *mrt, bool all);
 static void ipmr_expire_process(struct timer_list *t);
 
 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 #define ip6mr_for_each_table(mrt, net) \
 	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
 
-static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
+static struct mr_table *ip6mr_mr_table_iter(struct net *net,
+					    struct mr_table *mrt)
+{
+	struct mr_table *ret;
+
+	if (!mrt)
+		ret = list_entry_rcu(net->ipv6.mr6_tables.next,
+				     struct mr_table, list);
+	else
+		ret = list_entry_rcu(mrt->list.next,
+				     struct mr_table, list);
+
+	if (&ret->list == &net->ipv6.mr6_tables)
+		return NULL;
+	return ret;
+}
+
+static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
 {
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 
 	ip6mr_for_each_table(mrt, net) {
 		if (mrt->id == id)
@@ -138,7 +128,7 @@ static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
 }
 
 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
-			    struct mr6_table **mrt)
+			    struct mr_table **mrt)
 {
 	int err;
 	struct ip6mr_result res;
@@ -159,7 +149,7 @@ static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
 			     int flags, struct fib_lookup_arg *arg)
 {
 	struct ip6mr_result *res = arg->result;
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 
 	switch (rule->action) {
 	case FR_ACT_TO_TBL:
@@ -227,7 +217,7 @@ static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
 static int __net_init ip6mr_rules_init(struct net *net)
 {
 	struct fib_rules_ops *ops;
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 	int err;
 
 	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
@@ -258,7 +248,7 @@ err1:
 
 static void __net_exit ip6mr_rules_exit(struct net *net)
 {
-	struct mr6_table *mrt, *next;
+	struct mr_table *mrt, *next;
 
 	rtnl_lock();
 	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
@@ -272,13 +262,21 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
 #define ip6mr_for_each_table(mrt, net) \
 	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
 
-static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
+static struct mr_table *ip6mr_mr_table_iter(struct net *net,
+					    struct mr_table *mrt)
+{
+	if (!mrt)
+		return net->ipv6.mrt6;
+	return NULL;
+}
+
+static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
 {
 	return net->ipv6.mrt6;
 }
 
 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
-			    struct mr6_table **mrt)
+			    struct mr_table **mrt)
 {
 	*mrt = net->ipv6.mrt6;
 	return 0;
@@ -299,112 +297,75 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
 }
 #endif
 
-static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
+static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
+			  const void *ptr)
 {
-	struct mr6_table *mrt;
-	unsigned int i;
+	const struct mfc6_cache_cmp_arg *cmparg = arg->key;
+	struct mfc6_cache *c = (struct mfc6_cache *)ptr;
 
-	mrt = ip6mr_get_table(net, id);
-	if (mrt)
-		return mrt;
-
-	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
-	if (!mrt)
-		return NULL;
-	mrt->id = id;
-	write_pnet(&mrt->net, net);
-
-	/* Forwarding cache */
-	for (i = 0; i < MFC6_LINES; i++)
-		INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
-
-	INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
+	return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
+	       !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
+}
 
-	timer_setup(&mrt->ipmr_expire_timer, ipmr_expire_process, 0);
+static const struct rhashtable_params ip6mr_rht_params = {
+	.head_offset = offsetof(struct mr_mfc, mnode),
+	.key_offset = offsetof(struct mfc6_cache, cmparg),
+	.key_len = sizeof(struct mfc6_cache_cmp_arg),
+	.nelem_hint = 3,
+	.locks_mul = 1,
+	.obj_cmpfn = ip6mr_hash_cmp,
+	.automatic_shrinking = true,
+};
 
-#ifdef CONFIG_IPV6_PIMSM_V2
-	mrt->mroute_reg_vif_num = -1;
-#endif
+static void ip6mr_new_table_set(struct mr_table *mrt,
+				struct net *net)
+{
 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
 #endif
-	return mrt;
 }
 
-static void ip6mr_free_table(struct mr6_table *mrt)
-{
-	del_timer_sync(&mrt->ipmr_expire_timer);
-	mroute_clean_tables(mrt, true);
-	kfree(mrt);
-}
-
-#ifdef CONFIG_PROC_FS
-
-struct ipmr_mfc_iter {
-	struct seq_net_private p;
-	struct mr6_table *mrt;
-	struct list_head *cache;
-	int ct;
+static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
+	.mf6c_origin = IN6ADDR_ANY_INIT,
+	.mf6c_mcastgrp = IN6ADDR_ANY_INIT,
 };
 
+static struct mr_table_ops ip6mr_mr_table_ops = {
+	.rht_params = &ip6mr_rht_params,
+	.cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
+};
 
-static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
-					   struct ipmr_mfc_iter *it, loff_t pos)
+static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
 {
-	struct mr6_table *mrt = it->mrt;
-	struct mfc6_cache *mfc;
-
-	read_lock(&mrt_lock);
-	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
-		it->cache = &mrt->mfc6_cache_array[it->ct];
-		list_for_each_entry(mfc, it->cache, list)
-			if (pos-- == 0)
-				return mfc;
-	}
-	read_unlock(&mrt_lock);
+	struct mr_table *mrt;
 
-	spin_lock_bh(&mfc_unres_lock);
-	it->cache = &mrt->mfc6_unres_queue;
-	list_for_each_entry(mfc, it->cache, list)
-		if (pos-- == 0)
-			return mfc;
-	spin_unlock_bh(&mfc_unres_lock);
+	mrt = ip6mr_get_table(net, id);
+	if (mrt)
+		return mrt;
 
-	it->cache = NULL;
-	return NULL;
+	return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
+			      ipmr_expire_process, ip6mr_new_table_set);
 }
 
-/*
- *	The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
- */
-
-struct ipmr_vif_iter {
-	struct seq_net_private p;
-	struct mr6_table *mrt;
-	int ct;
-};
-
-static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
-					    struct ipmr_vif_iter *iter,
-					    loff_t pos)
+static void ip6mr_free_table(struct mr_table *mrt)
 {
-	struct mr6_table *mrt = iter->mrt;
-
-	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
-		if (!MIF_EXISTS(mrt, iter->ct))
-			continue;
-		if (pos-- == 0)
-			return &mrt->vif6_table[iter->ct];
-	}
-	return NULL;
+	del_timer_sync(&mrt->ipmr_expire_timer);
+	mroute_clean_tables(mrt, true);
+	rhltable_destroy(&mrt->mfc_hash);
+	kfree(mrt);
 }
 
+#ifdef CONFIG_PROC_FS
+/* The /proc interfaces to multicast routing
+ * /proc/ip6_mr_cache /proc/ip6_mr_vif
+ */
+
 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
 	__acquires(mrt_lock)
 {
-	struct ipmr_vif_iter *iter = seq->private;
+	struct mr_vif_iter *iter = seq->private;
 	struct net *net = seq_file_net(seq);
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 
 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
 	if (!mrt)
@@ -413,26 +374,7 @@ static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
 	iter->mrt = mrt;
 
 	read_lock(&mrt_lock);
-	return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
-		: SEQ_START_TOKEN;
-}
-
-static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	struct ipmr_vif_iter *iter = seq->private;
-	struct net *net = seq_file_net(seq);
-	struct mr6_table *mrt = iter->mrt;
-
-	++*pos;
-	if (v == SEQ_START_TOKEN)
-		return ip6mr_vif_seq_idx(net, iter, 0);
-
-	while (++iter->ct < mrt->maxvif) {
-		if (!MIF_EXISTS(mrt, iter->ct))
-			continue;
-		return &mrt->vif6_table[iter->ct];
-	}
-	return NULL;
+	return mr_vif_seq_start(seq, pos);
 }
 
 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
@@ -443,19 +385,19 @@ static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
 
 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
 {
-	struct ipmr_vif_iter *iter = seq->private;
-	struct mr6_table *mrt = iter->mrt;
+	struct mr_vif_iter *iter = seq->private;
+	struct mr_table *mrt = iter->mrt;
 
 	if (v == SEQ_START_TOKEN) {
 		seq_puts(seq,
 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
 	} else {
-		const struct mif_device *vif = v;
+		const struct vif_device *vif = v;
 		const char *name = vif->dev ? vif->dev->name : "none";
 
 		seq_printf(seq,
 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
-			   vif - mrt->vif6_table,
+			   vif - mrt->vif_table,
 			   name, vif->bytes_in, vif->pkt_in,
 			   vif->bytes_out, vif->pkt_out,
 			   vif->flags);
@@ -465,7 +407,7 @@ static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
 
 static const struct seq_operations ip6mr_vif_seq_ops = {
 	.start = ip6mr_vif_seq_start,
-	.next  = ip6mr_vif_seq_next,
+	.next  = mr_vif_seq_next,
 	.stop  = ip6mr_vif_seq_stop,
 	.show  = ip6mr_vif_seq_show,
 };
@@ -473,7 +415,7 @@ static const struct seq_operations ip6mr_vif_seq_ops = {
 static int ip6mr_vif_open(struct inode *inode, struct file *file)
 {
 	return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
-			    sizeof(struct ipmr_vif_iter));
+			    sizeof(struct mr_vif_iter));
 }
 
 static const struct file_operations ip6mr_vif_fops = {
@@ -485,72 +427,14 @@ static const struct file_operations ip6mr_vif_fops = {
 
 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 {
-	struct ipmr_mfc_iter *it = seq->private;
 	struct net *net = seq_file_net(seq);
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 
 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
 	if (!mrt)
 		return ERR_PTR(-ENOENT);
 
-	it->mrt = mrt;
-	it->cache = NULL;
-	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
-		: SEQ_START_TOKEN;
-}
-
-static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	struct mfc6_cache *mfc = v;
-	struct ipmr_mfc_iter *it = seq->private;
-	struct net *net = seq_file_net(seq);
-	struct mr6_table *mrt = it->mrt;
-
-	++*pos;
-
-	if (v == SEQ_START_TOKEN)
-		return ipmr_mfc_seq_idx(net, seq->private, 0);
-
-	if (mfc->list.next != it->cache)
-		return list_entry(mfc->list.next, struct mfc6_cache, list);
-
-	if (it->cache == &mrt->mfc6_unres_queue)
-		goto end_of_list;
-
-	BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
-
-	while (++it->ct < MFC6_LINES) {
-		it->cache = &mrt->mfc6_cache_array[it->ct];
-		if (list_empty(it->cache))
-			continue;
-		return list_first_entry(it->cache, struct mfc6_cache, list);
-	}
-
-	/* exhausted cache_array, show unresolved */
-	read_unlock(&mrt_lock);
-	it->cache = &mrt->mfc6_unres_queue;
-	it->ct = 0;
-
-	spin_lock_bh(&mfc_unres_lock);
-	if (!list_empty(it->cache))
-		return list_first_entry(it->cache, struct mfc6_cache, list);
-
- end_of_list:
-	spin_unlock_bh(&mfc_unres_lock);
-	it->cache = NULL;
-
-	return NULL;
-}
-
-static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
-{
-	struct ipmr_mfc_iter *it = seq->private;
-	struct mr6_table *mrt = it->mrt;
-
-	if (it->cache == &mrt->mfc6_unres_queue)
-		spin_unlock_bh(&mfc_unres_lock);
-	else if (it->cache == &mrt->mfc6_cache_array[it->ct])
-		read_unlock(&mrt_lock);
+	return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
 }
 
 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
@@ -564,25 +448,25 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
 	} else {
 		const struct mfc6_cache *mfc = v;
-		const struct ipmr_mfc_iter *it = seq->private;
-		struct mr6_table *mrt = it->mrt;
+		const struct mr_mfc_iter *it = seq->private;
+		struct mr_table *mrt = it->mrt;
 
 		seq_printf(seq, "%pI6 %pI6 %-3hd",
 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
-			   mfc->mf6c_parent);
+			   mfc->_c.mfc_parent);
 
-		if (it->cache != &mrt->mfc6_unres_queue) {
+		if (it->cache != &mrt->mfc_unres_queue) {
 			seq_printf(seq, " %8lu %8lu %8lu",
-				   mfc->mfc_un.res.pkt,
-				   mfc->mfc_un.res.bytes,
-				   mfc->mfc_un.res.wrong_if);
-			for (n = mfc->mfc_un.res.minvif;
-			     n < mfc->mfc_un.res.maxvif; n++) {
-				if (MIF_EXISTS(mrt, n) &&
-				    mfc->mfc_un.res.ttls[n] < 255)
+				   mfc->_c.mfc_un.res.pkt,
+				   mfc->_c.mfc_un.res.bytes,
+				   mfc->_c.mfc_un.res.wrong_if);
+			for (n = mfc->_c.mfc_un.res.minvif;
+			     n < mfc->_c.mfc_un.res.maxvif; n++) {
+				if (VIF_EXISTS(mrt, n) &&
+				    mfc->_c.mfc_un.res.ttls[n] < 255)
 					seq_printf(seq,
-						   " %2d:%-3d",
-						   n, mfc->mfc_un.res.ttls[n]);
+						   " %2d:%-3d", n,
+						   mfc->_c.mfc_un.res.ttls[n]);
 			}
 		} else {
 			/* unresolved mfc_caches don't contain
@@ -597,15 +481,15 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 
 static const struct seq_operations ipmr_mfc_seq_ops = {
 	.start = ipmr_mfc_seq_start,
-	.next  = ipmr_mfc_seq_next,
-	.stop  = ipmr_mfc_seq_stop,
+	.next  = mr_mfc_seq_next,
+	.stop  = mr_mfc_seq_stop,
 	.show  = ipmr_mfc_seq_show,
 };
 
 static int ipmr_mfc_open(struct inode *inode, struct file *file)
 {
 	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
-			    sizeof(struct ipmr_mfc_iter));
+			    sizeof(struct mr_mfc_iter));
 }
 
 static const struct file_operations ip6mr_mfc_fops = {
@@ -624,7 +508,7 @@ static int pim6_rcv(struct sk_buff *skb)
 	struct ipv6hdr   *encap;
 	struct net_device  *reg_dev = NULL;
 	struct net *net = dev_net(skb->dev);
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 	struct flowi6 fl6 = {
 		.flowi6_iif	= skb->dev->ifindex,
 		.flowi6_mark	= skb->mark,
@@ -658,7 +542,7 @@ static int pim6_rcv(struct sk_buff *skb)
 
 	read_lock(&mrt_lock);
 	if (reg_vif_num >= 0)
-		reg_dev = mrt->vif6_table[reg_vif_num].dev;
+		reg_dev = mrt->vif_table[reg_vif_num].dev;
 	if (reg_dev)
 		dev_hold(reg_dev);
 	read_unlock(&mrt_lock);
@@ -693,7 +577,7 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
 				      struct net_device *dev)
 {
 	struct net *net = dev_net(dev);
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 	struct flowi6 fl6 = {
 		.flowi6_oif	= dev->ifindex,
 		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
@@ -736,7 +620,7 @@ static void reg_vif_setup(struct net_device *dev)
 	dev->features		|= NETIF_F_NETNS_LOCAL;
 }
 
-static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
+static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
 {
 	struct net_device *dev;
 	char name[IFNAMSIZ];
@@ -773,17 +657,17 @@ failure:
  *	Delete a VIF entry
  */
 
-static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
+static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
 		       struct list_head *head)
 {
-	struct mif_device *v;
+	struct vif_device *v;
 	struct net_device *dev;
 	struct inet6_dev *in6_dev;
 
 	if (vifi < 0 || vifi >= mrt->maxvif)
 		return -EADDRNOTAVAIL;
 
-	v = &mrt->vif6_table[vifi];
+	v = &mrt->vif_table[vifi];
 
 	write_lock_bh(&mrt_lock);
 	dev = v->dev;
@@ -802,7 +686,7 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
 	if (vifi + 1 == mrt->maxvif) {
 		int tmp;
 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
-			if (MIF_EXISTS(mrt, tmp))
+			if (VIF_EXISTS(mrt, tmp))
 				break;
 		}
 		mrt->maxvif = tmp + 1;
@@ -827,23 +711,30 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
 	return 0;
 }
 
+static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
+{
+	struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
+
+	kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
+}
+
 static inline void ip6mr_cache_free(struct mfc6_cache *c)
 {
-	kmem_cache_free(mrt_cachep, c);
+	call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
 }
 
 /* Destroy an unresolved cache entry, killing queued skbs
    and reporting error to netlink readers.
  */
 
-static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
+static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
 {
 	struct net *net = read_pnet(&mrt->net);
 	struct sk_buff *skb;
 
 	atomic_dec(&mrt->cache_resolve_queue_len);
 
-	while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
+	while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
 		if (ipv6_hdr(skb)->version == 0) {
 			struct nlmsghdr *nlh = skb_pull(skb,
 							sizeof(struct ipv6hdr));
@@ -862,13 +753,13 @@ static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
 
 /* Timer process for all the unresolved queue. */
 
-static void ipmr_do_expire_process(struct mr6_table *mrt)
+static void ipmr_do_expire_process(struct mr_table *mrt)
 {
 	unsigned long now = jiffies;
 	unsigned long expires = 10 * HZ;
-	struct mfc6_cache *c, *next;
+	struct mr_mfc *c, *next;
 
-	list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
+	list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
 		if (time_after(c->mfc_un.unres.expires, now)) {
 			/* not yet... */
 			unsigned long interval = c->mfc_un.unres.expires - now;
@@ -878,24 +769,24 @@ static void ipmr_do_expire_process(struct mr6_table *mrt)
 		}
 
 		list_del(&c->list);
-		mr6_netlink_event(mrt, c, RTM_DELROUTE);
-		ip6mr_destroy_unres(mrt, c);
+		mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
+		ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
 	}
 
-	if (!list_empty(&mrt->mfc6_unres_queue))
+	if (!list_empty(&mrt->mfc_unres_queue))
 		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
 }
 
 static void ipmr_expire_process(struct timer_list *t)
 {
-	struct mr6_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
+	struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
 
 	if (!spin_trylock(&mfc_unres_lock)) {
 		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
 		return;
 	}
 
-	if (!list_empty(&mrt->mfc6_unres_queue))
+	if (!list_empty(&mrt->mfc_unres_queue))
 		ipmr_do_expire_process(mrt);
 
 	spin_unlock(&mfc_unres_lock);
@@ -903,7 +794,8 @@ static void ipmr_expire_process(struct timer_list *t)
 
 /* Fill oifs list. It is called under write locked mrt_lock. */
 
-static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
+static void ip6mr_update_thresholds(struct mr_table *mrt,
+				    struct mr_mfc *cache,
 				    unsigned char *ttls)
 {
 	int vifi;
@@ -913,7 +805,7 @@ static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *ca
 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
 
 	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
-		if (MIF_EXISTS(mrt, vifi) &&
+		if (VIF_EXISTS(mrt, vifi) &&
 		    ttls[vifi] && ttls[vifi] < 255) {
 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
 			if (cache->mfc_un.res.minvif > vifi)
@@ -925,17 +817,17 @@ static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *ca
 	cache->mfc_un.res.lastuse = jiffies;
 }
 
-static int mif6_add(struct net *net, struct mr6_table *mrt,
+static int mif6_add(struct net *net, struct mr_table *mrt,
 		    struct mif6ctl *vifc, int mrtsock)
 {
 	int vifi = vifc->mif6c_mifi;
-	struct mif_device *v = &mrt->vif6_table[vifi];
+	struct vif_device *v = &mrt->vif_table[vifi];
 	struct net_device *dev;
 	struct inet6_dev *in6_dev;
 	int err;
 
 	/* Is vif busy ? */
-	if (MIF_EXISTS(mrt, vifi))
+	if (VIF_EXISTS(mrt, vifi))
 		return -EADDRINUSE;
 
 	switch (vifc->mif6c_flags) {
@@ -980,21 +872,10 @@ static int mif6_add(struct net *net, struct mr6_table *mrt,
 					     dev->ifindex, &in6_dev->cnf);
 	}
 
-	/*
-	 *	Fill in the VIF structures
-	 */
-	v->rate_limit = vifc->vifc_rate_limit;
-	v->flags = vifc->mif6c_flags;
-	if (!mrtsock)
-		v->flags |= VIFF_STATIC;
-	v->threshold = vifc->vifc_threshold;
-	v->bytes_in = 0;
-	v->bytes_out = 0;
-	v->pkt_in = 0;
-	v->pkt_out = 0;
-	v->link = dev->ifindex;
-	if (v->flags & MIFF_REGISTER)
-		v->link = dev_get_iflink(dev);
+	/* Fill in the VIF structures */
+	vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
+			vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
+			MIFF_REGISTER);
 
 	/* And finish update writing critical data */
 	write_lock_bh(&mrt_lock);
@@ -1009,75 +890,56 @@ static int mif6_add(struct net *net, struct mr6_table *mrt,
 	return 0;
 }
 
-static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
+static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
 					   const struct in6_addr *origin,
 					   const struct in6_addr *mcastgrp)
 {
-	int line = MFC6_HASH(mcastgrp, origin);
-	struct mfc6_cache *c;
-
-	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
-		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
-		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
-			return c;
-	}
-	return NULL;
-}
-
-/* Look for a (*,*,oif) entry */
-static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt,
-						      mifi_t mifi)
-{
-	int line = MFC6_HASH(&in6addr_any, &in6addr_any);
-	struct mfc6_cache *c;
-
-	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
-		if (ipv6_addr_any(&c->mf6c_origin) &&
-		    ipv6_addr_any(&c->mf6c_mcastgrp) &&
-		    (c->mfc_un.res.ttls[mifi] < 255))
-			return c;
+	struct mfc6_cache_cmp_arg arg = {
+		.mf6c_origin = *origin,
+		.mf6c_mcastgrp = *mcastgrp,
+	};
 
-	return NULL;
+	return mr_mfc_find(mrt, &arg);
 }
 
 /* Look for a (*,G) entry */
-static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt,
+static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
 					       struct in6_addr *mcastgrp,
 					       mifi_t mifi)
 {
-	int line = MFC6_HASH(mcastgrp, &in6addr_any);
-	struct mfc6_cache *c, *proxy;
+	struct mfc6_cache_cmp_arg arg = {
+		.mf6c_origin = in6addr_any,
+		.mf6c_mcastgrp = *mcastgrp,
+	};
 
 	if (ipv6_addr_any(mcastgrp))
-		goto skip;
-
-	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
-		if (ipv6_addr_any(&c->mf6c_origin) &&
-		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) {
-			if (c->mfc_un.res.ttls[mifi] < 255)
-				return c;
-
-			/* It's ok if the mifi is part of the static tree */
-			proxy = ip6mr_cache_find_any_parent(mrt,
-							    c->mf6c_parent);
-			if (proxy && proxy->mfc_un.res.ttls[mifi] < 255)
-				return c;
-		}
+		return mr_mfc_find_any_parent(mrt, mifi);
+	return mr_mfc_find_any(mrt, mifi, &arg);
+}
 
-skip:
-	return ip6mr_cache_find_any_parent(mrt, mifi);
+/* Look for a (S,G,iif) entry if parent != -1 */
+static struct mfc6_cache *
+ip6mr_cache_find_parent(struct mr_table *mrt,
+			const struct in6_addr *origin,
+			const struct in6_addr *mcastgrp,
+			int parent)
+{
+	struct mfc6_cache_cmp_arg arg = {
+		.mf6c_origin = *origin,
+		.mf6c_mcastgrp = *mcastgrp,
+	};
+
+	return mr_mfc_find_parent(mrt, &arg, parent);
 }
 
-/*
- *	Allocate a multicast cache entry
- */
+/* Allocate a multicast cache entry */
 static struct mfc6_cache *ip6mr_cache_alloc(void)
 {
 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
 	if (!c)
 		return NULL;
-	c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
-	c->mfc_un.res.minvif = MAXMIFS;
+	c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
+	c->_c.mfc_un.res.minvif = MAXMIFS;
 	return c;
 }
 
@@ -1086,8 +948,8 @@ static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
 	if (!c)
 		return NULL;
-	skb_queue_head_init(&c->mfc_un.unres.unresolved);
-	c->mfc_un.unres.expires = jiffies + 10 * HZ;
+	skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
+	c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
 	return c;
 }
 
@@ -1095,7 +957,7 @@ static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
  *	A cache entry has gone into a resolved state from queued
  */
 
-static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
+static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
 				struct mfc6_cache *uc, struct mfc6_cache *c)
 {
 	struct sk_buff *skb;
@@ -1104,12 +966,13 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
 	 *	Play the pending entries through our router
 	 */
 
-	while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
+	while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
 		if (ipv6_hdr(skb)->version == 0) {
 			struct nlmsghdr *nlh = skb_pull(skb,
 							sizeof(struct ipv6hdr));
 
-			if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
+			if (mr_fill_mroute(mrt, skb, &c->_c,
+					   nlmsg_data(nlh)) > 0) {
 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
 			} else {
 				nlh->nlmsg_type = NLMSG_ERROR;
@@ -1129,9 +992,10 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
  *	Called under mrt_lock.
  */
 
-static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
+static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
 			      mifi_t mifi, int assert)
 {
+	struct sock *mroute6_sk;
 	struct sk_buff *skb;
 	struct mrt6msg *msg;
 	int ret;
@@ -1201,17 +1065,19 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
 
-	if (!mrt->mroute6_sk) {
+	rcu_read_lock();
+	mroute6_sk = rcu_dereference(mrt->mroute_sk);
+	if (!mroute6_sk) {
+		rcu_read_unlock();
 		kfree_skb(skb);
 		return -EINVAL;
 	}
 
 	mrt6msg_netlink_event(mrt, skb);
 
-	/*
-	 *	Deliver to user space multicast routing algorithms
-	 */
-	ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
+	/* Deliver to user space multicast routing algorithms */
+	ret = sock_queue_rcv_skb(mroute6_sk, skb);
+	rcu_read_unlock();
 	if (ret < 0) {
 		net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
 		kfree_skb(skb);
@@ -1220,19 +1086,16 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
 	return ret;
 }
 
-/*
- *	Queue a packet for resolution. It gets locked cache entry!
- */
-
-static int
-ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
+/* Queue a packet for resolution. It gets locked cache entry! */
+static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
+				  struct sk_buff *skb)
 {
+	struct mfc6_cache *c;
 	bool found = false;
 	int err;
-	struct mfc6_cache *c;
 
 	spin_lock_bh(&mfc_unres_lock);
-	list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
+	list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
 			found = true;
@@ -1253,10 +1116,8 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
 			return -ENOBUFS;
 		}
 
-		/*
-		 *	Fill in the new cache entry
-		 */
-		c->mf6c_parent = -1;
+		/* Fill in the new cache entry */
+		c->_c.mfc_parent = -1;
 		c->mf6c_origin = ipv6_hdr(skb)->saddr;
 		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
 
@@ -1276,20 +1137,18 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
 		}
 
 		atomic_inc(&mrt->cache_resolve_queue_len);
-		list_add(&c->list, &mrt->mfc6_unres_queue);
+		list_add(&c->_c.list, &mrt->mfc_unres_queue);
 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
 
 		ipmr_do_expire_process(mrt);
 	}
 
-	/*
-	 *	See if we can append the packet
-	 */
-	if (c->mfc_un.unres.unresolved.qlen > 3) {
+	/* See if we can append the packet */
+	if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
 		kfree_skb(skb);
 		err = -ENOBUFS;
 	} else {
-		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
+		skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
 		err = 0;
 	}
 
@@ -1301,29 +1160,24 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
  *	MFC6 cache manipulation by user space
  */
 
-static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
+static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
 			    int parent)
 {
-	int line;
-	struct mfc6_cache *c, *next;
-
-	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
+	struct mfc6_cache *c;
 
-	list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
-		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
-		    ipv6_addr_equal(&c->mf6c_mcastgrp,
-				    &mfc->mf6cc_mcastgrp.sin6_addr) &&
-		    (parent == -1 || parent == c->mf6c_parent)) {
-			write_lock_bh(&mrt_lock);
-			list_del(&c->list);
-			write_unlock_bh(&mrt_lock);
+	/* The entries are added/deleted only under RTNL */
+	rcu_read_lock();
+	c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
+				    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
+	rcu_read_unlock();
+	if (!c)
+		return -ENOENT;
+	rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
+	list_del_rcu(&c->_c.list);
 
-			mr6_netlink_event(mrt, c, RTM_DELROUTE);
-			ip6mr_cache_free(c);
-			return 0;
-		}
-	}
-	return -ENOENT;
+	mr6_netlink_event(mrt, c, RTM_DELROUTE);
+	ip6mr_cache_free(c);
+	return 0;
 }
 
 static int ip6mr_device_event(struct notifier_block *this,
@@ -1331,15 +1185,15 @@ static int ip6mr_device_event(struct notifier_block *this,
 {
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 	struct net *net = dev_net(dev);
-	struct mr6_table *mrt;
-	struct mif_device *v;
+	struct mr_table *mrt;
+	struct vif_device *v;
 	int ct;
 
 	if (event != NETDEV_UNREGISTER)
 		return NOTIFY_DONE;
 
 	ip6mr_for_each_table(mrt, net) {
-		v = &mrt->vif6_table[0];
+		v = &mrt->vif_table[0];
 		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
 			if (v->dev == dev)
 				mif6_delete(mrt, ct, 1, NULL);
@@ -1397,6 +1251,7 @@ static void __net_exit ip6mr_net_exit(struct net *net)
 static struct pernet_operations ip6mr_net_ops = {
 	.init = ip6mr_net_init,
 	.exit = ip6mr_net_exit,
+	.async = true,
 };
 
 int __init ip6_mr_init(void)
@@ -1452,14 +1307,14 @@ void ip6_mr_cleanup(void)
 	kmem_cache_destroy(mrt_cachep);
 }
 
-static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
+static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
 			 struct mf6cctl *mfc, int mrtsock, int parent)
 {
-	bool found = false;
-	int line;
-	struct mfc6_cache *uc, *c;
 	unsigned char ttls[MAXMIFS];
-	int i;
+	struct mfc6_cache *uc, *c;
+	struct mr_mfc *_uc;
+	bool found;
+	int i, err;
 
 	if (mfc->mf6cc_parent >= MAXMIFS)
 		return -ENFILE;
@@ -1468,27 +1323,19 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
 	for (i = 0; i < MAXMIFS; i++) {
 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
 			ttls[i] = 1;
-
-	}
-
-	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
-
-	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
-		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
-		    ipv6_addr_equal(&c->mf6c_mcastgrp,
-				    &mfc->mf6cc_mcastgrp.sin6_addr) &&
-		    (parent == -1 || parent == mfc->mf6cc_parent)) {
-			found = true;
-			break;
-		}
 	}
 
-	if (found) {
+	/* The entries are added/deleted only under RTNL */
+	rcu_read_lock();
+	c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
+				    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
+	rcu_read_unlock();
+	if (c) {
 		write_lock_bh(&mrt_lock);
-		c->mf6c_parent = mfc->mf6cc_parent;
-		ip6mr_update_thresholds(mrt, c, ttls);
+		c->_c.mfc_parent = mfc->mf6cc_parent;
+		ip6mr_update_thresholds(mrt, &c->_c, ttls);
 		if (!mrtsock)
-			c->mfc_flags |= MFC_STATIC;
+			c->_c.mfc_flags |= MFC_STATIC;
 		write_unlock_bh(&mrt_lock);
 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
 		return 0;
@@ -1504,31 +1351,36 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
 
 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
-	c->mf6c_parent = mfc->mf6cc_parent;
-	ip6mr_update_thresholds(mrt, c, ttls);
+	c->_c.mfc_parent = mfc->mf6cc_parent;
+	ip6mr_update_thresholds(mrt, &c->_c, ttls);
 	if (!mrtsock)
-		c->mfc_flags |= MFC_STATIC;
+		c->_c.mfc_flags |= MFC_STATIC;
 
-	write_lock_bh(&mrt_lock);
-	list_add(&c->list, &mrt->mfc6_cache_array[line]);
-	write_unlock_bh(&mrt_lock);
+	err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
+				  ip6mr_rht_params);
+	if (err) {
+		pr_err("ip6mr: rhtable insert error %d\n", err);
+		ip6mr_cache_free(c);
+		return err;
+	}
+	list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
 
-	/*
-	 *	Check to see if we resolved a queued list. If so we
-	 *	need to send on the frames and tidy up.
+	/* Check to see if we resolved a queued list. If so we
+	 * need to send on the frames and tidy up.
 	 */
 	found = false;
 	spin_lock_bh(&mfc_unres_lock);
-	list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
+	list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
+		uc = (struct mfc6_cache *)_uc;
 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
-			list_del(&uc->list);
+			list_del(&_uc->list);
 			atomic_dec(&mrt->cache_resolve_queue_len);
 			found = true;
 			break;
 		}
 	}
-	if (list_empty(&mrt->mfc6_unres_queue))
+	if (list_empty(&mrt->mfc_unres_queue))
 		del_timer(&mrt->ipmr_expire_timer);
 	spin_unlock_bh(&mfc_unres_lock);
 
@@ -1544,61 +1396,55 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
  *	Close the multicast socket, and clear the vif tables etc
  */
 
-static void mroute_clean_tables(struct mr6_table *mrt, bool all)
+static void mroute_clean_tables(struct mr_table *mrt, bool all)
 {
-	int i;
+	struct mr_mfc *c, *tmp;
 	LIST_HEAD(list);
-	struct mfc6_cache *c, *next;
+	int i;
 
-	/*
-	 *	Shut down all active vif entries
-	 */
+	/* Shut down all active vif entries */
 	for (i = 0; i < mrt->maxvif; i++) {
-		if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC))
+		if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
 			continue;
 		mif6_delete(mrt, i, 0, &list);
 	}
 	unregister_netdevice_many(&list);
 
-	/*
-	 *	Wipe the cache
-	 */
-	for (i = 0; i < MFC6_LINES; i++) {
-		list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
-			if (!all && (c->mfc_flags & MFC_STATIC))
-				continue;
-			write_lock_bh(&mrt_lock);
-			list_del(&c->list);
-			write_unlock_bh(&mrt_lock);
-
-			mr6_netlink_event(mrt, c, RTM_DELROUTE);
-			ip6mr_cache_free(c);
-		}
+	/* Wipe the cache */
+	list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
+		if (!all && (c->mfc_flags & MFC_STATIC))
+			continue;
+		rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
+		list_del_rcu(&c->list);
+		mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
+		ip6mr_cache_free((struct mfc6_cache *)c);
 	}
 
 	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
 		spin_lock_bh(&mfc_unres_lock);
-		list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
+		list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
 			list_del(&c->list);
-			mr6_netlink_event(mrt, c, RTM_DELROUTE);
-			ip6mr_destroy_unres(mrt, c);
+			mr6_netlink_event(mrt, (struct mfc6_cache *)c,
+					  RTM_DELROUTE);
+			ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
 		}
 		spin_unlock_bh(&mfc_unres_lock);
 	}
 }
 
-static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
+static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
 {
 	int err = 0;
 	struct net *net = sock_net(sk);
 
 	rtnl_lock();
 	write_lock_bh(&mrt_lock);
-	if (likely(mrt->mroute6_sk == NULL)) {
-		mrt->mroute6_sk = sk;
-		net->ipv6.devconf_all->mc_forwarding++;
-	} else {
+	if (rtnl_dereference(mrt->mroute_sk)) {
 		err = -EADDRINUSE;
+	} else {
+		rcu_assign_pointer(mrt->mroute_sk, sk);
+		sock_set_flag(sk, SOCK_RCU_FREE);
+		net->ipv6.devconf_all->mc_forwarding++;
 	}
 	write_unlock_bh(&mrt_lock);
 
@@ -1616,7 +1462,7 @@ int ip6mr_sk_done(struct sock *sk)
 {
 	int err = -EACCES;
 	struct net *net = sock_net(sk);
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 
 	if (sk->sk_type != SOCK_RAW ||
 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
@@ -1624,9 +1470,13 @@ int ip6mr_sk_done(struct sock *sk)
 
 	rtnl_lock();
 	ip6mr_for_each_table(mrt, net) {
-		if (sk == mrt->mroute6_sk) {
+		if (sk == rtnl_dereference(mrt->mroute_sk)) {
 			write_lock_bh(&mrt_lock);
-			mrt->mroute6_sk = NULL;
+			RCU_INIT_POINTER(mrt->mroute_sk, NULL);
+			/* Note that mroute_sk had SOCK_RCU_FREE set,
+			 * so the RCU grace period before sk freeing
+			 * is guaranteed by sk_destruct()
+			 */
 			net->ipv6.devconf_all->mc_forwarding--;
 			write_unlock_bh(&mrt_lock);
 			inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
@@ -1644,9 +1494,9 @@ int ip6mr_sk_done(struct sock *sk)
 	return err;
 }
 
-struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
+bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
 {
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 	struct flowi6 fl6 = {
 		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
 		.flowi6_oif	= skb->dev->ifindex,
@@ -1656,8 +1506,9 @@ struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
 		return NULL;
 
-	return mrt->mroute6_sk;
+	return rcu_access_pointer(mrt->mroute_sk);
 }
+EXPORT_SYMBOL(mroute6_is_socket);
 
 /*
  *	Socket options and virtual interface manipulation. The whole
@@ -1673,7 +1524,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 	struct mf6cctl mfc;
 	mifi_t mifi;
 	struct net *net = sock_net(sk);
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 
 	if (sk->sk_type != SOCK_RAW ||
 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
@@ -1684,7 +1535,8 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 		return -ENOENT;
 
 	if (optname != MRT6_INIT) {
-		if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN))
+		if (sk != rcu_access_pointer(mrt->mroute_sk) &&
+		    !ns_capable(net->user_ns, CAP_NET_ADMIN))
 			return -EACCES;
 	}
 
@@ -1706,7 +1558,8 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 		if (vif.mif6c_mifi >= MAXMIFS)
 			return -ENFILE;
 		rtnl_lock();
-		ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
+		ret = mif6_add(net, mrt, &vif,
+			       sk == rtnl_dereference(mrt->mroute_sk));
 		rtnl_unlock();
 		return ret;
 
@@ -1741,7 +1594,9 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 			ret = ip6mr_mfc_delete(mrt, &mfc, parent);
 		else
 			ret = ip6mr_mfc_add(net, mrt, &mfc,
-					    sk == mrt->mroute6_sk, parent);
+					    sk ==
+					    rtnl_dereference(mrt->mroute_sk),
+					    parent);
 		rtnl_unlock();
 		return ret;
 
@@ -1793,7 +1648,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 		/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
 		if (v != RT_TABLE_DEFAULT && v >= 100000000)
 			return -EINVAL;
-		if (sk == mrt->mroute6_sk)
+		if (sk == rcu_access_pointer(mrt->mroute_sk))
 			return -EBUSY;
 
 		rtnl_lock();
@@ -1824,7 +1679,7 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
 	int olr;
 	int val;
 	struct net *net = sock_net(sk);
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 
 	if (sk->sk_type != SOCK_RAW ||
 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
@@ -1872,10 +1727,10 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
 {
 	struct sioc_sg_req6 sr;
 	struct sioc_mif_req6 vr;
-	struct mif_device *vif;
+	struct vif_device *vif;
 	struct mfc6_cache *c;
 	struct net *net = sock_net(sk);
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 
 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
 	if (!mrt)
@@ -1888,8 +1743,8 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
 		if (vr.mifi >= mrt->maxvif)
 			return -EINVAL;
 		read_lock(&mrt_lock);
-		vif = &mrt->vif6_table[vr.mifi];
-		if (MIF_EXISTS(mrt, vr.mifi)) {
+		vif = &mrt->vif_table[vr.mifi];
+		if (VIF_EXISTS(mrt, vr.mifi)) {
 			vr.icount = vif->pkt_in;
 			vr.ocount = vif->pkt_out;
 			vr.ibytes = vif->bytes_in;
@@ -1906,19 +1761,19 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
 		if (copy_from_user(&sr, arg, sizeof(sr)))
 			return -EFAULT;
 
-		read_lock(&mrt_lock);
+		rcu_read_lock();
 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
 		if (c) {
-			sr.pktcnt = c->mfc_un.res.pkt;
-			sr.bytecnt = c->mfc_un.res.bytes;
-			sr.wrong_if = c->mfc_un.res.wrong_if;
-			read_unlock(&mrt_lock);
+			sr.pktcnt = c->_c.mfc_un.res.pkt;
+			sr.bytecnt = c->_c.mfc_un.res.bytes;
+			sr.wrong_if = c->_c.mfc_un.res.wrong_if;
+			rcu_read_unlock();
 
 			if (copy_to_user(arg, &sr, sizeof(sr)))
 				return -EFAULT;
 			return 0;
 		}
-		read_unlock(&mrt_lock);
+		rcu_read_unlock();
 		return -EADDRNOTAVAIL;
 	default:
 		return -ENOIOCTLCMD;
@@ -1946,10 +1801,10 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
 {
 	struct compat_sioc_sg_req6 sr;
 	struct compat_sioc_mif_req6 vr;
-	struct mif_device *vif;
+	struct vif_device *vif;
 	struct mfc6_cache *c;
 	struct net *net = sock_net(sk);
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 
 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
 	if (!mrt)
@@ -1962,8 +1817,8 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
 		if (vr.mifi >= mrt->maxvif)
 			return -EINVAL;
 		read_lock(&mrt_lock);
-		vif = &mrt->vif6_table[vr.mifi];
-		if (MIF_EXISTS(mrt, vr.mifi)) {
+		vif = &mrt->vif_table[vr.mifi];
+		if (VIF_EXISTS(mrt, vr.mifi)) {
 			vr.icount = vif->pkt_in;
 			vr.ocount = vif->pkt_out;
 			vr.ibytes = vif->bytes_in;
@@ -1980,19 +1835,19 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
 		if (copy_from_user(&sr, arg, sizeof(sr)))
 			return -EFAULT;
 
-		read_lock(&mrt_lock);
+		rcu_read_lock();
 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
 		if (c) {
-			sr.pktcnt = c->mfc_un.res.pkt;
-			sr.bytecnt = c->mfc_un.res.bytes;
-			sr.wrong_if = c->mfc_un.res.wrong_if;
-			read_unlock(&mrt_lock);
+			sr.pktcnt = c->_c.mfc_un.res.pkt;
+			sr.bytecnt = c->_c.mfc_un.res.bytes;
+			sr.wrong_if = c->_c.mfc_un.res.wrong_if;
+			rcu_read_unlock();
 
 			if (copy_to_user(arg, &sr, sizeof(sr)))
 				return -EFAULT;
 			return 0;
 		}
-		read_unlock(&mrt_lock);
+		rcu_read_unlock();
 		return -EADDRNOTAVAIL;
 	default:
 		return -ENOIOCTLCMD;
@@ -2013,11 +1868,11 @@ static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct
  *	Processing handlers for ip6mr_forward
  */
 
-static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
+static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
 			  struct sk_buff *skb, struct mfc6_cache *c, int vifi)
 {
 	struct ipv6hdr *ipv6h;
-	struct mif_device *vif = &mrt->vif6_table[vifi];
+	struct vif_device *vif = &mrt->vif_table[vifi];
 	struct net_device *dev;
 	struct dst_entry *dst;
 	struct flowi6 fl6;
@@ -2087,46 +1942,50 @@ out_free:
 	return 0;
 }
 
-static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
+static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
 {
 	int ct;
 
 	for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
-		if (mrt->vif6_table[ct].dev == dev)
+		if (mrt->vif_table[ct].dev == dev)
 			break;
 	}
 	return ct;
 }
 
-static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
-			   struct sk_buff *skb, struct mfc6_cache *cache)
+static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
+			   struct sk_buff *skb, struct mfc6_cache *c)
 {
 	int psend = -1;
 	int vif, ct;
 	int true_vifi = ip6mr_find_vif(mrt, skb->dev);
 
-	vif = cache->mf6c_parent;
-	cache->mfc_un.res.pkt++;
-	cache->mfc_un.res.bytes += skb->len;
-	cache->mfc_un.res.lastuse = jiffies;
+	vif = c->_c.mfc_parent;
+	c->_c.mfc_un.res.pkt++;
+	c->_c.mfc_un.res.bytes += skb->len;
+	c->_c.mfc_un.res.lastuse = jiffies;
 
-	if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) {
+	if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
 		struct mfc6_cache *cache_proxy;
 
 		/* For an (*,G) entry, we only check that the incoming
 		 * interface is part of the static tree.
 		 */
-		cache_proxy = ip6mr_cache_find_any_parent(mrt, vif);
+		rcu_read_lock();
+		cache_proxy = mr_mfc_find_any_parent(mrt, vif);
 		if (cache_proxy &&
-		    cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
+		    cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) {
+			rcu_read_unlock();
 			goto forward;
+		}
+		rcu_read_unlock();
 	}
 
 	/*
 	 * Wrong interface: drop packet and (maybe) send PIM assert.
 	 */
-	if (mrt->vif6_table[vif].dev != skb->dev) {
-		cache->mfc_un.res.wrong_if++;
+	if (mrt->vif_table[vif].dev != skb->dev) {
+		c->_c.mfc_un.res.wrong_if++;
 
 		if (true_vifi >= 0 && mrt->mroute_do_assert &&
 		    /* pimsm uses asserts, when switching from RPT to SPT,
@@ -2135,52 +1994,55 @@ static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
 		       large chunk of pimd to kernel. Ough... --ANK
 		     */
 		    (mrt->mroute_do_pim ||
-		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
+		     c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
 		    time_after(jiffies,
-			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
-			cache->mfc_un.res.last_assert = jiffies;
+			       c->_c.mfc_un.res.last_assert +
+			       MFC_ASSERT_THRESH)) {
+			c->_c.mfc_un.res.last_assert = jiffies;
 			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
 		}
 		goto dont_forward;
 	}
 
 forward:
-	mrt->vif6_table[vif].pkt_in++;
-	mrt->vif6_table[vif].bytes_in += skb->len;
+	mrt->vif_table[vif].pkt_in++;
+	mrt->vif_table[vif].bytes_in += skb->len;
 
 	/*
 	 *	Forward the frame
 	 */
-	if (ipv6_addr_any(&cache->mf6c_origin) &&
-	    ipv6_addr_any(&cache->mf6c_mcastgrp)) {
+	if (ipv6_addr_any(&c->mf6c_origin) &&
+	    ipv6_addr_any(&c->mf6c_mcastgrp)) {
 		if (true_vifi >= 0 &&
-		    true_vifi != cache->mf6c_parent &&
+		    true_vifi != c->_c.mfc_parent &&
 		    ipv6_hdr(skb)->hop_limit >
-				cache->mfc_un.res.ttls[cache->mf6c_parent]) {
+				c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
 			/* It's an (*,*) entry and the packet is not coming from
 			 * the upstream: forward the packet to the upstream
 			 * only.
 			 */
-			psend = cache->mf6c_parent;
+			psend = c->_c.mfc_parent;
 			goto last_forward;
 		}
 		goto dont_forward;
 	}
-	for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
+	for (ct = c->_c.mfc_un.res.maxvif - 1;
+	     ct >= c->_c.mfc_un.res.minvif; ct--) {
 		/* For (*,G) entry, don't forward to the incoming interface */
-		if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) &&
-		    ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
+		if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
+		    ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
 			if (psend != -1) {
 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 				if (skb2)
-					ip6mr_forward2(net, mrt, skb2, cache, psend);
+					ip6mr_forward2(net, mrt, skb2,
+						       c, psend);
 			}
 			psend = ct;
 		}
 	}
 last_forward:
 	if (psend != -1) {
-		ip6mr_forward2(net, mrt, skb, cache, psend);
+		ip6mr_forward2(net, mrt, skb, c, psend);
 		return;
 	}
 
@@ -2197,7 +2059,7 @@ int ip6_mr_input(struct sk_buff *skb)
 {
 	struct mfc6_cache *cache;
 	struct net *net = dev_net(skb->dev);
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 	struct flowi6 fl6 = {
 		.flowi6_iif	= skb->dev->ifindex,
 		.flowi6_mark	= skb->mark,
@@ -2247,66 +2109,11 @@ int ip6_mr_input(struct sk_buff *skb)
 	return 0;
 }
 
-
-static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
-			       struct mfc6_cache *c, struct rtmsg *rtm)
-{
-	struct rta_mfc_stats mfcs;
-	struct nlattr *mp_attr;
-	struct rtnexthop *nhp;
-	unsigned long lastuse;
-	int ct;
-
-	/* If cache is unresolved, don't try to parse IIF and OIF */
-	if (c->mf6c_parent >= MAXMIFS) {
-		rtm->rtm_flags |= RTNH_F_UNRESOLVED;
-		return -ENOENT;
-	}
-
-	if (MIF_EXISTS(mrt, c->mf6c_parent) &&
-	    nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0)
-		return -EMSGSIZE;
-	mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
-	if (!mp_attr)
-		return -EMSGSIZE;
-
-	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
-		if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
-			nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
-			if (!nhp) {
-				nla_nest_cancel(skb, mp_attr);
-				return -EMSGSIZE;
-			}
-
-			nhp->rtnh_flags = 0;
-			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
-			nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
-			nhp->rtnh_len = sizeof(*nhp);
-		}
-	}
-
-	nla_nest_end(skb, mp_attr);
-
-	lastuse = READ_ONCE(c->mfc_un.res.lastuse);
-	lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
-
-	mfcs.mfcs_packets = c->mfc_un.res.pkt;
-	mfcs.mfcs_bytes = c->mfc_un.res.bytes;
-	mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
-	if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
-	    nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
-			      RTA_PAD))
-		return -EMSGSIZE;
-
-	rtm->rtm_type = RTN_MULTICAST;
-	return 1;
-}
-
 int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
 		    u32 portid)
 {
 	int err;
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 	struct mfc6_cache *cache;
 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
 
@@ -2367,15 +2174,12 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
 		return err;
 	}
 
-	if (rtm->rtm_flags & RTM_F_NOTIFY)
-		cache->mfc_flags |= MFC_NOTIFY;
-
-	err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
+	err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
 	read_unlock(&mrt_lock);
 	return err;
 }
 
-static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
+static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
 			     u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
 			     int flags)
 {
@@ -2397,7 +2201,7 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
 		goto nla_put_failure;
 	rtm->rtm_type = RTN_MULTICAST;
 	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
-	if (c->mfc_flags & MFC_STATIC)
+	if (c->_c.mfc_flags & MFC_STATIC)
 		rtm->rtm_protocol = RTPROT_STATIC;
 	else
 		rtm->rtm_protocol = RTPROT_MROUTED;
@@ -2406,7 +2210,7 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
 	if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
 	    nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
 		goto nla_put_failure;
-	err = __ip6mr_fill_mroute(mrt, skb, c, rtm);
+	err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
 	/* do not break the dump if cache is unresolved */
 	if (err < 0 && err != -ENOENT)
 		goto nla_put_failure;
@@ -2419,6 +2223,14 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
+static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+			      u32 portid, u32 seq, struct mr_mfc *c,
+			      int cmd, int flags)
+{
+	return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
+				 cmd, flags);
+}
+
 static int mr6_msgsize(bool unresolved, int maxvif)
 {
 	size_t len =
@@ -2440,14 +2252,14 @@ static int mr6_msgsize(bool unresolved, int maxvif)
 	return len;
 }
 
-static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
+static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
 			      int cmd)
 {
 	struct net *net = read_pnet(&mrt->net);
 	struct sk_buff *skb;
 	int err = -ENOBUFS;
 
-	skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif),
+	skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
 			GFP_ATOMIC);
 	if (!skb)
 		goto errout;
@@ -2482,7 +2294,7 @@ static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
 	return len;
 }
 
-static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt)
+static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
 {
 	struct net *net = read_pnet(&mrt->net);
 	struct nlmsghdr *nlh;
@@ -2532,65 +2344,6 @@ errout:
 
 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	struct net *net = sock_net(skb->sk);
-	struct mr6_table *mrt;
-	struct mfc6_cache *mfc;
-	unsigned int t = 0, s_t;
-	unsigned int h = 0, s_h;
-	unsigned int e = 0, s_e;
-
-	s_t = cb->args[0];
-	s_h = cb->args[1];
-	s_e = cb->args[2];
-
-	read_lock(&mrt_lock);
-	ip6mr_for_each_table(mrt, net) {
-		if (t < s_t)
-			goto next_table;
-		if (t > s_t)
-			s_h = 0;
-		for (h = s_h; h < MFC6_LINES; h++) {
-			list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
-				if (e < s_e)
-					goto next_entry;
-				if (ip6mr_fill_mroute(mrt, skb,
-						      NETLINK_CB(cb->skb).portid,
-						      cb->nlh->nlmsg_seq,
-						      mfc, RTM_NEWROUTE,
-						      NLM_F_MULTI) < 0)
-					goto done;
-next_entry:
-				e++;
-			}
-			e = s_e = 0;
-		}
-		spin_lock_bh(&mfc_unres_lock);
-		list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) {
-			if (e < s_e)
-				goto next_entry2;
-			if (ip6mr_fill_mroute(mrt, skb,
-					      NETLINK_CB(cb->skb).portid,
-					      cb->nlh->nlmsg_seq,
-					      mfc, RTM_NEWROUTE,
-					      NLM_F_MULTI) < 0) {
-				spin_unlock_bh(&mfc_unres_lock);
-				goto done;
-			}
-next_entry2:
-			e++;
-		}
-		spin_unlock_bh(&mfc_unres_lock);
-		e = s_e = 0;
-		s_h = 0;
-next_table:
-		t++;
-	}
-done:
-	read_unlock(&mrt_lock);
-
-	cb->args[2] = e;
-	cb->args[1] = h;
-	cb->args[0] = t;
-
-	return skb->len;
+	return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
+				_ip6mr_fill_mroute, &mfc_unres_lock);
 }
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 24535169663d..4d780c7f0130 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -1415,4 +1415,3 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname,
 }
 EXPORT_SYMBOL(compat_ipv6_getsockopt);
 #endif
-
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 9b9d2ff01b35..d1a0cefac273 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -165,7 +165,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 
 	if (ifindex == 0) {
 		struct rt6_info *rt;
-		rt = rt6_lookup(net, addr, NULL, 0, 0);
+		rt = rt6_lookup(net, addr, NULL, 0, NULL, 0);
 		if (rt) {
 			dev = rt->dst.dev;
 			ip6_rt_put(rt);
@@ -254,7 +254,7 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net,
 	struct inet6_dev *idev = NULL;
 
 	if (ifindex == 0) {
-		struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, 0);
+		struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, NULL, 0);
 
 		if (rt) {
 			dev = rt->dst.dev;
@@ -2997,6 +2997,7 @@ static void __net_exit igmp6_net_exit(struct net *net)
 static struct pernet_operations igmp6_net_ops = {
 	.init = igmp6_net_init,
 	.exit = igmp6_net_exit,
+	.async = true,
 };
 
 int __init igmp6_init(void)
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index ba5e04c6ae17..d1d0b2fa7a07 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -527,7 +527,7 @@ void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr,
 	}
 
 	if (!dev->addr_len)
-		inc_opt = 0;
+		inc_opt = false;
 	if (inc_opt)
 		optlen += ndisc_opt_addr_space(dev,
 					       NDISC_NEIGHBOUR_ADVERTISEMENT);
@@ -707,7 +707,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
 	int probes = atomic_read(&neigh->probes);
 
 	if (skb && ipv6_chk_addr_and_flags(dev_net(dev), &ipv6_hdr(skb)->saddr,
-					   dev, 1,
+					   dev, false, 1,
 					   IFA_F_TENTATIVE|IFA_F_OPTIMISTIC))
 		saddr = &ipv6_hdr(skb)->saddr;
 	probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES);
@@ -1883,6 +1883,7 @@ static void __net_exit ndisc_net_exit(struct net *net)
 static struct pernet_operations ndisc_net_ops = {
 	.init = ndisc_net_init,
 	.exit = ndisc_net_exit,
+	.async = true,
 };
 
 int __init ndisc_init(void)
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 62358b93bbac..4de8ac1e5af4 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1928,6 +1928,7 @@ static void __net_exit ip6_tables_net_exit(struct net *net)
 static struct pernet_operations ip6_tables_net_ops = {
 	.init = ip6_tables_net_init,
 	.exit = ip6_tables_net_exit,
+	.async = true,
 };
 
 static int __init ip6_tables_init(void)
diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c
index 91ed25a24b79..d12f511929f5 100644
--- a/net/ipv6/netfilter/ip6t_rpfilter.c
+++ b/net/ipv6/netfilter/ip6t_rpfilter.c
@@ -49,7 +49,7 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb,
 
 	fl6.flowi6_mark = flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
 
-	rt = (void *) ip6_route_lookup(net, &fl6, lookup_flags);
+	rt = (void *)ip6_route_lookup(net, &fl6, skb, lookup_flags);
 	if (rt->dst.error)
 		goto out;
 
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 1343077dde93..06561c84c0bc 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -87,6 +87,7 @@ static void __net_exit ip6table_filter_net_exit(struct net *net)
 static struct pernet_operations ip6table_filter_net_ops = {
 	.init = ip6table_filter_net_init,
 	.exit = ip6table_filter_net_exit,
+	.async = true,
 };
 
 static int __init ip6table_filter_init(void)
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index b0524b18c4fb..a11e25936b45 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -107,6 +107,7 @@ static void __net_exit ip6table_mangle_net_exit(struct net *net)
 
 static struct pernet_operations ip6table_mangle_net_ops = {
 	.exit = ip6table_mangle_net_exit,
+	.async = true,
 };
 
 static int __init ip6table_mangle_init(void)
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 47306e45a80a..4475fd300bb6 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -131,6 +131,7 @@ static void __net_exit ip6table_nat_net_exit(struct net *net)
 
 static struct pernet_operations ip6table_nat_net_ops = {
 	.exit	= ip6table_nat_net_exit,
+	.async	= true,
 };
 
 static int __init ip6table_nat_init(void)
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 710fa0806c37..a88f3b1995b1 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -75,6 +75,7 @@ static void __net_exit ip6table_raw_net_exit(struct net *net)
 
 static struct pernet_operations ip6table_raw_net_ops = {
 	.exit = ip6table_raw_net_exit,
+	.async = true,
 };
 
 static int __init ip6table_raw_init(void)
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index cf26ccb04056..320048c008dc 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -74,6 +74,7 @@ static void __net_exit ip6table_security_net_exit(struct net *net)
 
 static struct pernet_operations ip6table_security_net_ops = {
 	.exit = ip6table_security_net_exit,
+	.async = true,
 };
 
 static int __init ip6table_security_init(void)
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 663827ee3cf8..ba54bb3bd1e4 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -401,6 +401,7 @@ static struct pernet_operations ipv6_net_ops = {
 	.exit = ipv6_net_exit,
 	.id = &conntrack6_net_id,
 	.size = sizeof(struct conntrack6_net),
+	.async = true,
 };
 
 static int __init nf_conntrack_l3proto_ipv6_init(void)
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index b84ce3e6d728..34136fe80ed5 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -646,6 +646,7 @@ static void nf_ct_net_exit(struct net *net)
 static struct pernet_operations nf_ct_net_ops = {
 	.init = nf_ct_net_init,
 	.exit = nf_ct_net_exit,
+	.async = true,
 };
 
 int nf_ct_frag6_init(void)
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index c87b48359e8f..32f98bc06900 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -103,6 +103,7 @@ static void __net_exit defrag6_net_exit(struct net *net)
 
 static struct pernet_operations defrag6_net_ops = {
 	.exit = defrag6_net_exit,
+	.async = true,
 };
 
 static int __init nf_defrag_init(void)
diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c
index b397a8fe88b9..0220e584589c 100644
--- a/net/ipv6/netfilter/nf_log_ipv6.c
+++ b/net/ipv6/netfilter/nf_log_ipv6.c
@@ -390,6 +390,7 @@ static void __net_exit nf_log_ipv6_net_exit(struct net *net)
 static struct pernet_operations nf_log_ipv6_net_ops = {
 	.init = nf_log_ipv6_net_init,
 	.exit = nf_log_ipv6_net_exit,
+	.async = true,
 };
 
 static int __init nf_log_ipv6_init(void)
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index 62fc84d7bdff..36be3cf0adef 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -180,7 +180,8 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
 	}
 
 	*dest = 0;
-	rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, lookup_flags);
+	rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, pkt->skb,
+				      lookup_flags);
 	if (rt->dst.error)
 		goto put_rt_err;
 
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index d12c55dad7d1..318c6e914234 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -240,6 +240,7 @@ static void __net_init ping_v6_proc_exit_net(struct net *net)
 static struct pernet_operations ping_v6_net_ops = {
 	.init = ping_v6_proc_init_net,
 	.exit = ping_v6_proc_exit_net,
+	.async = true,
 };
 #endif
 
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index b67814242f78..1678cf037688 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -343,6 +343,7 @@ static void __net_exit ipv6_proc_exit_net(struct net *net)
 static struct pernet_operations ipv6_proc_ops = {
 	.init = ipv6_proc_init_net,
 	.exit = ipv6_proc_exit_net,
+	.async = true,
 };
 
 int __init ipv6_misc_proc_init(void)
@@ -354,4 +355,3 @@ void ipv6_misc_proc_exit(void)
 {
 	unregister_pernet_subsys(&ipv6_proc_ops);
 }
-
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 4c25339b1984..10a4ac4933b7 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1332,6 +1332,7 @@ static void __net_exit raw6_exit_net(struct net *net)
 static struct pernet_operations raw6_net_ops = {
 	.init = raw6_init_net,
 	.exit = raw6_exit_net,
+	.async = true,
 };
 
 int __init raw6_proc_init(void)
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index afbc000ad4f2..b5da69c83123 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -733,6 +733,7 @@ static void __net_exit ipv6_frags_exit_net(struct net *net)
 static struct pernet_operations ip6_frags_ops = {
 	.init = ipv6_frags_init_net,
 	.exit = ipv6_frags_exit_net,
+	.async = true,
 };
 
 int __init ipv6_frag_init(void)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index b0d5c64e1978..a2ed9fdd58d4 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -450,8 +450,10 @@ static bool rt6_check_expired(const struct rt6_info *rt)
 	return false;
 }
 
-static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
+static struct rt6_info *rt6_multipath_select(const struct net *net,
+					     struct rt6_info *match,
 					     struct flowi6 *fl6, int oif,
+					     const struct sk_buff *skb,
 					     int strict)
 {
 	struct rt6_info *sibling, *next_sibling;
@@ -460,7 +462,7 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
 	 * case it will always be non-zero. Otherwise now is the time to do it.
 	 */
 	if (!fl6->mp_hash)
-		fl6->mp_hash = rt6_multipath_hash(fl6, NULL);
+		fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
 
 	if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound))
 		return match;
@@ -914,7 +916,9 @@ static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
 
 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 					     struct fib6_table *table,
-					     struct flowi6 *fl6, int flags)
+					     struct flowi6 *fl6,
+					     const struct sk_buff *skb,
+					     int flags)
 {
 	struct rt6_info *rt, *rt_cache;
 	struct fib6_node *fn;
@@ -929,8 +933,8 @@ restart:
 		rt = rt6_device_match(net, rt, &fl6->saddr,
 				      fl6->flowi6_oif, flags);
 		if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
-			rt = rt6_multipath_select(rt, fl6,
-						  fl6->flowi6_oif, flags);
+			rt = rt6_multipath_select(net, rt, fl6, fl6->flowi6_oif,
+						  skb, flags);
 	}
 	if (rt == net->ipv6.ip6_null_entry) {
 		fn = fib6_backtrack(fn, &fl6->saddr);
@@ -954,14 +958,15 @@ restart:
 }
 
 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
-				    int flags)
+				   const struct sk_buff *skb, int flags)
 {
-	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
+	return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
 }
 EXPORT_SYMBOL_GPL(ip6_route_lookup);
 
 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
-			    const struct in6_addr *saddr, int oif, int strict)
+			    const struct in6_addr *saddr, int oif,
+			    const struct sk_buff *skb, int strict)
 {
 	struct flowi6 fl6 = {
 		.flowi6_oif = oif,
@@ -975,7 +980,7 @@ struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
 		flags |= RT6_LOOKUP_F_HAS_SADDR;
 	}
 
-	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
+	dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
 	if (dst->error == 0)
 		return (struct rt6_info *) dst;
 
@@ -1672,7 +1677,8 @@ void rt6_age_exceptions(struct rt6_info *rt,
 }
 
 struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
-			       int oif, struct flowi6 *fl6, int flags)
+			       int oif, struct flowi6 *fl6,
+			       const struct sk_buff *skb, int flags)
 {
 	struct fib6_node *fn, *saved_fn;
 	struct rt6_info *rt, *rt_cache;
@@ -1694,7 +1700,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 redo_rt6_select:
 	rt = rt6_select(net, fn, oif, strict);
 	if (rt->rt6i_nsiblings)
-		rt = rt6_multipath_select(rt, fl6, oif, strict);
+		rt = rt6_multipath_select(net, rt, fl6, oif, skb, strict);
 	if (rt == net->ipv6.ip6_null_entry) {
 		fn = fib6_backtrack(fn, &fl6->saddr);
 		if (fn)
@@ -1793,28 +1799,35 @@ uncached_rt_out:
 }
 EXPORT_SYMBOL_GPL(ip6_pol_route);
 
-static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
-					    struct flowi6 *fl6, int flags)
+static struct rt6_info *ip6_pol_route_input(struct net *net,
+					    struct fib6_table *table,
+					    struct flowi6 *fl6,
+					    const struct sk_buff *skb,
+					    int flags)
 {
-	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
+	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
 }
 
 struct dst_entry *ip6_route_input_lookup(struct net *net,
 					 struct net_device *dev,
-					 struct flowi6 *fl6, int flags)
+					 struct flowi6 *fl6,
+					 const struct sk_buff *skb,
+					 int flags)
 {
 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
 		flags |= RT6_LOOKUP_F_IFACE;
 
-	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
+	return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
 }
 EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
 
 static void ip6_multipath_l3_keys(const struct sk_buff *skb,
-				  struct flow_keys *keys)
+				  struct flow_keys *keys,
+				  struct flow_keys *flkeys)
 {
 	const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
 	const struct ipv6hdr *key_iph = outer_iph;
+	struct flow_keys *_flkeys = flkeys;
 	const struct ipv6hdr *inner_iph;
 	const struct icmp6hdr *icmph;
 	struct ipv6hdr _inner_iph;
@@ -1836,26 +1849,76 @@ static void ip6_multipath_l3_keys(const struct sk_buff *skb,
 		goto out;
 
 	key_iph = inner_iph;
+	_flkeys = NULL;
 out:
-	memset(keys, 0, sizeof(*keys));
-	keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
-	keys->addrs.v6addrs.src = key_iph->saddr;
-	keys->addrs.v6addrs.dst = key_iph->daddr;
-	keys->tags.flow_label = ip6_flowinfo(key_iph);
-	keys->basic.ip_proto = key_iph->nexthdr;
+	if (_flkeys) {
+		keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
+		keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
+		keys->tags.flow_label = _flkeys->tags.flow_label;
+		keys->basic.ip_proto = _flkeys->basic.ip_proto;
+	} else {
+		keys->addrs.v6addrs.src = key_iph->saddr;
+		keys->addrs.v6addrs.dst = key_iph->daddr;
+		keys->tags.flow_label = ip6_flowinfo(key_iph);
+		keys->basic.ip_proto = key_iph->nexthdr;
+	}
 }
 
 /* if skb is set it will be used and fl6 can be NULL */
-u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
+u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
+		       const struct sk_buff *skb, struct flow_keys *flkeys)
 {
 	struct flow_keys hash_keys;
+	u32 mhash;
 
-	if (skb) {
-		ip6_multipath_l3_keys(skb, &hash_keys);
-		return flow_hash_from_keys(&hash_keys) >> 1;
+	switch (ip6_multipath_hash_policy(net)) {
+	case 0:
+		memset(&hash_keys, 0, sizeof(hash_keys));
+		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+		if (skb) {
+			ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
+		} else {
+			hash_keys.addrs.v6addrs.src = fl6->saddr;
+			hash_keys.addrs.v6addrs.dst = fl6->daddr;
+			hash_keys.tags.flow_label = (__force u32)fl6->flowlabel;
+			hash_keys.basic.ip_proto = fl6->flowi6_proto;
+		}
+		break;
+	case 1:
+		if (skb) {
+			unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
+			struct flow_keys keys;
+
+			/* short-circuit if we already have L4 hash present */
+			if (skb->l4_hash)
+				return skb_get_hash_raw(skb) >> 1;
+
+			memset(&hash_keys, 0, sizeof(hash_keys));
+
+                        if (!flkeys) {
+				skb_flow_dissect_flow_keys(skb, &keys, flag);
+				flkeys = &keys;
+			}
+			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+			hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
+			hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
+			hash_keys.ports.src = flkeys->ports.src;
+			hash_keys.ports.dst = flkeys->ports.dst;
+			hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
+		} else {
+			memset(&hash_keys, 0, sizeof(hash_keys));
+			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+			hash_keys.addrs.v6addrs.src = fl6->saddr;
+			hash_keys.addrs.v6addrs.dst = fl6->daddr;
+			hash_keys.ports.src = fl6->fl6_sport;
+			hash_keys.ports.dst = fl6->fl6_dport;
+			hash_keys.basic.ip_proto = fl6->flowi6_proto;
+		}
+		break;
 	}
+	mhash = flow_hash_from_keys(&hash_keys);
 
-	return get_hash_from_flowi6(fl6) >> 1;
+	return mhash >> 1;
 }
 
 void ip6_route_input(struct sk_buff *skb)
@@ -1872,20 +1935,29 @@ void ip6_route_input(struct sk_buff *skb)
 		.flowi6_mark = skb->mark,
 		.flowi6_proto = iph->nexthdr,
 	};
+	struct flow_keys *flkeys = NULL, _flkeys;
 
 	tun_info = skb_tunnel_info(skb);
 	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
 		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
+
+	if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
+		flkeys = &_flkeys;
+
 	if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
-		fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
+		fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
 	skb_dst_drop(skb);
-	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
+	skb_dst_set(skb,
+		    ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
 }
 
-static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
-					     struct flowi6 *fl6, int flags)
+static struct rt6_info *ip6_pol_route_output(struct net *net,
+					     struct fib6_table *table,
+					     struct flowi6 *fl6,
+					     const struct sk_buff *skb,
+					     int flags)
 {
-	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
+	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
 }
 
 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
@@ -1913,7 +1985,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
 	else if (sk)
 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
 
-	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
+	return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
 }
 EXPORT_SYMBOL_GPL(ip6_route_output_flags);
 
@@ -2162,6 +2234,7 @@ struct ip6rd_flowi {
 static struct rt6_info *__ip6_route_redirect(struct net *net,
 					     struct fib6_table *table,
 					     struct flowi6 *fl6,
+					     const struct sk_buff *skb,
 					     int flags)
 {
 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
@@ -2235,8 +2308,9 @@ out:
 };
 
 static struct dst_entry *ip6_route_redirect(struct net *net,
-					const struct flowi6 *fl6,
-					const struct in6_addr *gateway)
+					    const struct flowi6 *fl6,
+					    const struct sk_buff *skb,
+					    const struct in6_addr *gateway)
 {
 	int flags = RT6_LOOKUP_F_HAS_SADDR;
 	struct ip6rd_flowi rdfl;
@@ -2244,7 +2318,7 @@ static struct dst_entry *ip6_route_redirect(struct net *net,
 	rdfl.fl6 = *fl6;
 	rdfl.gateway = *gateway;
 
-	return fib6_rule_lookup(net, &rdfl.fl6,
+	return fib6_rule_lookup(net, &rdfl.fl6, skb,
 				flags, __ip6_route_redirect);
 }
 
@@ -2264,7 +2338,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
 	fl6.flowlabel = ip6_flowinfo(iph);
 	fl6.flowi6_uid = uid;
 
-	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
+	dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
 	rt6_do_redirect(dst, NULL, skb);
 	dst_release(dst);
 }
@@ -2286,7 +2360,7 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
 	fl6.saddr = iph->daddr;
 	fl6.flowi6_uid = sock_net_uid(net, NULL);
 
-	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
+	dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
 	rt6_do_redirect(dst, NULL, skb);
 	dst_release(dst);
 }
@@ -2488,7 +2562,7 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,
 		flags |= RT6_LOOKUP_F_HAS_SADDR;
 
 	flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
-	rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
+	rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
 
 	/* if table lookup failed, fall back to full lookup */
 	if (rt == net->ipv6.ip6_null_entry) {
@@ -2501,7 +2575,7 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,
 
 static int ip6_route_check_nh_onlink(struct net *net,
 				     struct fib6_config *cfg,
-				     struct net_device *dev,
+				     const struct net_device *dev,
 				     struct netlink_ext_ack *extack)
 {
 	u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
@@ -2551,7 +2625,7 @@ static int ip6_route_check_nh(struct net *net,
 	}
 
 	if (!grt)
-		grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
+		grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
 
 	if (!grt)
 		goto out;
@@ -2577,6 +2651,79 @@ out:
 	return err;
 }
 
+static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
+			   struct net_device **_dev, struct inet6_dev **idev,
+			   struct netlink_ext_ack *extack)
+{
+	const struct in6_addr *gw_addr = &cfg->fc_gateway;
+	int gwa_type = ipv6_addr_type(gw_addr);
+	bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
+	const struct net_device *dev = *_dev;
+	bool need_addr_check = !dev;
+	int err = -EINVAL;
+
+	/* if gw_addr is local we will fail to detect this in case
+	 * address is still TENTATIVE (DAD in progress). rt6_lookup()
+	 * will return already-added prefix route via interface that
+	 * prefix route was assigned to, which might be non-loopback.
+	 */
+	if (dev &&
+	    ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
+		NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
+		goto out;
+	}
+
+	if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
+		/* IPv6 strictly inhibits using not link-local
+		 * addresses as nexthop address.
+		 * Otherwise, router will not able to send redirects.
+		 * It is very good, but in some (rare!) circumstances
+		 * (SIT, PtP, NBMA NOARP links) it is handy to allow
+		 * some exceptions. --ANK
+		 * We allow IPv4-mapped nexthops to support RFC4798-type
+		 * addressing
+		 */
+		if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
+			NL_SET_ERR_MSG(extack, "Invalid gateway address");
+			goto out;
+		}
+
+		if (cfg->fc_flags & RTNH_F_ONLINK)
+			err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
+		else
+			err = ip6_route_check_nh(net, cfg, _dev, idev);
+
+		if (err)
+			goto out;
+	}
+
+	/* reload in case device was changed */
+	dev = *_dev;
+
+	err = -EINVAL;
+	if (!dev) {
+		NL_SET_ERR_MSG(extack, "Egress device not specified");
+		goto out;
+	} else if (dev->flags & IFF_LOOPBACK) {
+		NL_SET_ERR_MSG(extack,
+			       "Egress device can not be loopback device for this route");
+		goto out;
+	}
+
+	/* if we did not check gw_addr above, do so now that the
+	 * egress device has been resolved.
+	 */
+	if (need_addr_check &&
+	    ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
+		NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
+		goto out;
+	}
+
+	err = 0;
+out:
+	return err;
+}
+
 static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 					      struct netlink_ext_ack *extack)
 {
@@ -2696,14 +2843,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 		if (err)
 			goto out;
 		rt->dst.lwtstate = lwtstate_get(lwtstate);
-		if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
-			rt->dst.lwtstate->orig_output = rt->dst.output;
-			rt->dst.output = lwtunnel_output;
-		}
-		if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
-			rt->dst.lwtstate->orig_input = rt->dst.input;
-			rt->dst.input = lwtunnel_input;
-		}
+		lwtunnel_set_redirect(&rt->dst);
 	}
 
 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
@@ -2766,61 +2906,11 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 	}
 
 	if (cfg->fc_flags & RTF_GATEWAY) {
-		const struct in6_addr *gw_addr;
-		int gwa_type;
-
-		gw_addr = &cfg->fc_gateway;
-		gwa_type = ipv6_addr_type(gw_addr);
-
-		/* if gw_addr is local we will fail to detect this in case
-		 * address is still TENTATIVE (DAD in progress). rt6_lookup()
-		 * will return already-added prefix route via interface that
-		 * prefix route was assigned to, which might be non-loopback.
-		 */
-		err = -EINVAL;
-		if (ipv6_chk_addr_and_flags(net, gw_addr,
-					    gwa_type & IPV6_ADDR_LINKLOCAL ?
-					    dev : NULL, 0, 0)) {
-			NL_SET_ERR_MSG(extack, "Invalid gateway address");
+		err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
+		if (err)
 			goto out;
-		}
-		rt->rt6i_gateway = *gw_addr;
-
-		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
-			/* IPv6 strictly inhibits using not link-local
-			   addresses as nexthop address.
-			   Otherwise, router will not able to send redirects.
-			   It is very good, but in some (rare!) circumstances
-			   (SIT, PtP, NBMA NOARP links) it is handy to allow
-			   some exceptions. --ANK
-			   We allow IPv4-mapped nexthops to support RFC4798-type
-			   addressing
-			 */
-			if (!(gwa_type & (IPV6_ADDR_UNICAST |
-					  IPV6_ADDR_MAPPED))) {
-				NL_SET_ERR_MSG(extack,
-					       "Invalid gateway address");
-				goto out;
-			}
 
-			if (cfg->fc_flags & RTNH_F_ONLINK) {
-				err = ip6_route_check_nh_onlink(net, cfg, dev,
-								extack);
-			} else {
-				err = ip6_route_check_nh(net, cfg, &dev, &idev);
-			}
-			if (err)
-				goto out;
-		}
-		err = -EINVAL;
-		if (!dev) {
-			NL_SET_ERR_MSG(extack, "Egress device not specified");
-			goto out;
-		} else if (dev->flags & IFF_LOOPBACK) {
-			NL_SET_ERR_MSG(extack,
-				       "Egress device can not be loopback device for this route");
-			goto out;
-		}
+		rt->rt6i_gateway = cfg->fc_gateway;
 	}
 
 	err = -ENODEV;
@@ -4612,7 +4702,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 		if (!ipv6_addr_any(&fl6.saddr))
 			flags |= RT6_LOOKUP_F_HAS_SADDR;
 
-		dst = ip6_route_input_lookup(net, dev, &fl6, flags);
+		dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
 
 		rcu_read_unlock();
 	} else {
@@ -4993,6 +5083,7 @@ static void __net_exit ip6_route_net_exit_late(struct net *net)
 static struct pernet_operations ip6_route_net_ops = {
 	.init = ip6_route_net_init,
 	.exit = ip6_route_net_exit,
+	.async = true,
 };
 
 static int __net_init ipv6_inetpeer_init(struct net *net)
@@ -5018,11 +5109,13 @@ static void __net_exit ipv6_inetpeer_exit(struct net *net)
 static struct pernet_operations ipv6_inetpeer_ops = {
 	.init	=	ipv6_inetpeer_init,
 	.exit	=	ipv6_inetpeer_exit,
+	.async	=	true,
 };
 
 static struct pernet_operations ip6_route_net_late_ops = {
 	.init = ip6_route_net_init_late,
 	.exit = ip6_route_net_exit_late,
+	.async = true,
 };
 
 static struct notifier_block ip6_route_dev_notifier = {
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index 7f5621d09571..c3f13c3bd8a9 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -395,6 +395,7 @@ static void __net_exit seg6_net_exit(struct net *net)
 static struct pernet_operations ip6_segments_ops = {
 	.init = seg6_net_init,
 	.exit = seg6_net_exit,
+	.async = true,
 };
 
 static const struct genl_ops seg6_genl_ops[] = {
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index ba3767ef5e93..45722327375a 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -161,7 +161,7 @@ static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
 		fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH;
 
 	if (!tbl_id) {
-		dst = ip6_route_input_lookup(net, skb->dev, &fl6, flags);
+		dst = ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags);
 	} else {
 		struct fib6_table *table;
 
@@ -169,7 +169,7 @@ static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
 		if (!table)
 			goto out;
 
-		rt = ip6_pol_route(net, table, 0, &fl6, flags);
+		rt = ip6_pol_route(net, table, 0, &fl6, skb, flags);
 		dst = &rt->dst;
 	}
 
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 0195598f7bb5..8a4f8fddd812 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -182,7 +182,7 @@ static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn)
 #ifdef CONFIG_IPV6_SIT_6RD
 	struct ip_tunnel *t = netdev_priv(dev);
 
-	if (dev == sitn->fb_tunnel_dev) {
+	if (dev == sitn->fb_tunnel_dev || !sitn->fb_tunnel_dev) {
 		ipv6_addr_set(&t->ip6rd.prefix, htonl(0x20020000), 0, 0, 0);
 		t->ip6rd.relay_prefix = 0;
 		t->ip6rd.prefixlen = 16;
@@ -1835,6 +1835,9 @@ static int __net_init sit_init_net(struct net *net)
 	sitn->tunnels[2] = sitn->tunnels_r;
 	sitn->tunnels[3] = sitn->tunnels_r_l;
 
+	if (!net_has_fallback_tunnels(net))
+		return 0;
+
 	sitn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "sit0",
 					   NET_NAME_UNKNOWN,
 					   ipip6_tunnel_setup);
@@ -1885,6 +1888,7 @@ static struct pernet_operations sit_net_ops = {
 	.exit_batch = sit_exit_batch_net,
 	.id   = &sit_net_id,
 	.size = sizeof(struct sit_net),
+	.async = true,
 };
 
 static void __exit sit_cleanup(void)
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index a789a8ac6a64..966c42af92f4 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -16,14 +16,31 @@
 #include <net/ipv6.h>
 #include <net/addrconf.h>
 #include <net/inet_frag.h>
+#include <net/netevent.h>
 #ifdef CONFIG_NETLABEL
 #include <net/calipso.h>
 #endif
 
+static int zero;
 static int one = 1;
 static int auto_flowlabels_min;
 static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX;
 
+static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write,
+					  void __user *buffer, size_t *lenp,
+					  loff_t *ppos)
+{
+	struct net *net;
+	int ret;
+
+	net = container_of(table->data, struct net,
+			   ipv6.sysctl.multipath_hash_policy);
+	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+	if (write && ret == 0)
+		call_netevent_notifiers(NETEVENT_IPV6_MPATH_HASH_UPDATE, net);
+
+	return ret;
+}
 
 static struct ctl_table ipv6_table_template[] = {
 	{
@@ -126,6 +143,15 @@ static struct ctl_table ipv6_table_template[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+	{
+		.procname	= "fib_multipath_hash_policy",
+		.data		= &init_net.ipv6.sysctl.multipath_hash_policy,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler   = proc_rt6_multipath_hash_policy,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
 	{ }
 };
 
@@ -190,6 +216,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
 	ipv6_table[11].data = &net->ipv6.sysctl.max_hbh_opts_cnt;
 	ipv6_table[12].data = &net->ipv6.sysctl.max_dst_opts_len;
 	ipv6_table[13].data = &net->ipv6.sysctl.max_hbh_opts_len;
+	ipv6_table[14].data = &net->ipv6.sysctl.multipath_hash_policy,
 
 	ipv6_route_table = ipv6_route_sysctl_init(net);
 	if (!ipv6_route_table)
@@ -251,6 +278,7 @@ static void __net_exit ipv6_sysctl_net_exit(struct net *net)
 static struct pernet_operations ipv6_sysctl_net_ops = {
 	.init = ipv6_sysctl_net_init,
 	.exit = ipv6_sysctl_net_exit,
+	.async = true,
 };
 
 static struct ctl_table_header *ip6_header;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 412139f4eccd..5425d7b100ee 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1451,6 +1451,7 @@ process:
 
 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
 		struct request_sock *req = inet_reqsk(sk);
+		bool req_stolen = false;
 		struct sock *nsk;
 
 		sk = req->rsk_listener;
@@ -1470,10 +1471,20 @@ process:
 			th = (const struct tcphdr *)skb->data;
 			hdr = ipv6_hdr(skb);
 			tcp_v6_fill_cb(skb, hdr, th);
-			nsk = tcp_check_req(sk, skb, req, false);
+			nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
 		}
 		if (!nsk) {
 			reqsk_put(req);
+			if (req_stolen) {
+				/* Another cpu got exclusive access to req
+				 * and created a full blown socket.
+				 * Try to feed this packet to this socket
+				 * instead of discarding it.
+				 */
+				tcp_v6_restore_cb(skb);
+				sock_put(sk);
+				goto lookup;
+			}
 			goto discard_and_relse;
 		}
 		if (nsk == sk) {
@@ -1996,6 +2007,7 @@ static struct pernet_operations tcpv6_net_ops = {
 	.init	    = tcpv6_net_init,
 	.exit	    = tcpv6_net_exit,
 	.exit_batch = tcpv6_net_exit_batch,
+	.async	    = true,
 };
 
 int __init tcpv6_init(void)
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 52e3ea0e6f50..ad30f5e31969 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1509,34 +1509,34 @@ void udp6_proc_exit(struct net *net)
 /* ------------------------------------------------------------------------ */
 
 struct proto udpv6_prot = {
-	.name		   = "UDPv6",
-	.owner		   = THIS_MODULE,
-	.close		   = udp_lib_close,
-	.connect	   = ip6_datagram_connect,
-	.disconnect	   = udp_disconnect,
-	.ioctl		   = udp_ioctl,
-	.init		   = udp_init_sock,
-	.destroy	   = udpv6_destroy_sock,
-	.setsockopt	   = udpv6_setsockopt,
-	.getsockopt	   = udpv6_getsockopt,
-	.sendmsg	   = udpv6_sendmsg,
-	.recvmsg	   = udpv6_recvmsg,
-	.release_cb	   = ip6_datagram_release_cb,
-	.hash		   = udp_lib_hash,
-	.unhash		   = udp_lib_unhash,
-	.rehash		   = udp_v6_rehash,
-	.get_port	   = udp_v6_get_port,
-	.memory_allocated  = &udp_memory_allocated,
-	.sysctl_mem	   = sysctl_udp_mem,
-	.sysctl_wmem	   = &sysctl_udp_wmem_min,
-	.sysctl_rmem	   = &sysctl_udp_rmem_min,
-	.obj_size	   = sizeof(struct udp6_sock),
-	.h.udp_table	   = &udp_table,
+	.name			= "UDPv6",
+	.owner			= THIS_MODULE,
+	.close			= udp_lib_close,
+	.connect		= ip6_datagram_connect,
+	.disconnect		= udp_disconnect,
+	.ioctl			= udp_ioctl,
+	.init			= udp_init_sock,
+	.destroy		= udpv6_destroy_sock,
+	.setsockopt		= udpv6_setsockopt,
+	.getsockopt		= udpv6_getsockopt,
+	.sendmsg		= udpv6_sendmsg,
+	.recvmsg		= udpv6_recvmsg,
+	.release_cb		= ip6_datagram_release_cb,
+	.hash			= udp_lib_hash,
+	.unhash			= udp_lib_unhash,
+	.rehash			= udp_v6_rehash,
+	.get_port		= udp_v6_get_port,
+	.memory_allocated	= &udp_memory_allocated,
+	.sysctl_mem		= sysctl_udp_mem,
+	.sysctl_wmem_offset     = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
+	.sysctl_rmem_offset     = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
+	.obj_size		= sizeof(struct udp6_sock),
+	.h.udp_table		= &udp_table,
 #ifdef CONFIG_COMPAT
-	.compat_setsockopt = compat_udpv6_setsockopt,
-	.compat_getsockopt = compat_udpv6_getsockopt,
+	.compat_setsockopt	= compat_udpv6_setsockopt,
+	.compat_getsockopt	= compat_udpv6_getsockopt,
 #endif
-	.diag_destroy      = udp_abort,
+	.diag_destroy		= udp_abort,
 };
 
 static struct inet_protosw udpv6_protosw = {
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 14ae32bb1f3d..f3839780dc31 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -123,6 +123,7 @@ static void __net_exit udplite6_proc_exit_net(struct net *net)
 static struct pernet_operations udplite6_net_ops = {
 	.init = udplite6_proc_init_net,
 	.exit = udplite6_proc_exit_net,
+	.async = true,
 };
 
 int __init udplite6_proc_init(void)
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 416fe67271a9..cbb270bd81b0 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -400,6 +400,7 @@ static void __net_exit xfrm6_net_exit(struct net *net)
 static struct pernet_operations xfrm6_net_ops = {
 	.init	= xfrm6_net_init,
 	.exit	= xfrm6_net_exit,
+	.async	= true,
 };
 
 int __init xfrm6_init(void)
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index b15075a5c227..16f434791763 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -196,4 +196,3 @@ void xfrm6_state_fini(void)
 {
 	xfrm_state_unregister_afinfo(&xfrm6_state_afinfo);
 }
-
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index f85f0d7480ac..a9673619e0e9 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -353,6 +353,7 @@ static struct pernet_operations xfrm6_tunnel_net_ops = {
 	.exit	= xfrm6_tunnel_net_exit,
 	.id	= &xfrm6_tunnel_net_id,
 	.size	= sizeof(struct xfrm6_tunnel_net),
+	.async	= true,
 };
 
 static int __init xfrm6_tunnel_init(void)
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 9e2643ab4ccb..893a022f9620 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -989,14 +989,13 @@ done:
 }
 
 static int iucv_sock_getname(struct socket *sock, struct sockaddr *addr,
-			     int *len, int peer)
+			     int peer)
 {
 	struct sockaddr_iucv *siucv = (struct sockaddr_iucv *) addr;
 	struct sock *sk = sock->sk;
 	struct iucv_sock *iucv = iucv_sk(sk);
 
 	addr->sa_family = AF_IUCV;
-	*len = sizeof(struct sockaddr_iucv);
 
 	if (peer) {
 		memcpy(siucv->siucv_user_id, iucv->dst_user_id, 8);
@@ -1009,7 +1008,7 @@ static int iucv_sock_getname(struct socket *sock, struct sockaddr *addr,
 	memset(&siucv->siucv_addr, 0, sizeof(siucv->siucv_addr));
 	memset(&siucv->siucv_nodeid, 0, sizeof(siucv->siucv_nodeid));
 
-	return 0;
+	return sizeof(struct sockaddr_iucv);
 }
 
 /**
diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c
index 9d5649e4e8b7..2c1c8b3e4452 100644
--- a/net/kcm/kcmproc.c
+++ b/net/kcm/kcmproc.c
@@ -433,6 +433,7 @@ static void kcm_proc_exit_net(struct net *net)
 static struct pernet_operations kcm_net_ops = {
 	.init = kcm_proc_init_net,
 	.exit = kcm_proc_exit_net,
+	.async = true,
 };
 
 int __init kcm_proc_init(void)
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 34355fd19f27..516cfad71b85 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1425,6 +1425,7 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
 	 */
 	if (csk->sk_user_data) {
 		write_unlock_bh(&csk->sk_callback_lock);
+		strp_stop(&psock->strp);
 		strp_done(&psock->strp);
 		kmem_cache_free(kcm_psockp, psock);
 		err = -EALREADY;
@@ -2027,6 +2028,7 @@ static struct pernet_operations kcm_net_ops = {
 	.exit = kcm_exit_net,
 	.id   = &kcm_net_id,
 	.size = sizeof(struct kcm_net),
+	.async = true,
 };
 
 static int __init kcm_init(void)
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 7e2e7188e7f4..3ac08ab26207 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3863,6 +3863,7 @@ static struct pernet_operations pfkey_net_ops = {
 	.exit = pfkey_net_exit,
 	.id   = &pfkey_net_id,
 	.size = sizeof(struct netns_pfkey),
+	.async = true,
 };
 
 static void __exit ipsec_pfkey_exit(void)
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 14b67dfacc4b..b86868da50d4 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1789,6 +1789,7 @@ static struct pernet_operations l2tp_net_ops = {
 	.exit = l2tp_exit_net,
 	.id   = &l2tp_net_id,
 	.size = sizeof(struct l2tp_net),
+	.async = true,
 };
 
 static int __init l2tp_init(void)
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 3428fba6f2b7..a9c05b2bc1b0 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -345,7 +345,7 @@ static int l2tp_ip_disconnect(struct sock *sk, int flags)
 }
 
 static int l2tp_ip_getname(struct socket *sock, struct sockaddr *uaddr,
-			   int *uaddr_len, int peer)
+			   int peer)
 {
 	struct sock *sk		= sock->sk;
 	struct inet_sock *inet	= inet_sk(sk);
@@ -366,8 +366,7 @@ static int l2tp_ip_getname(struct socket *sock, struct sockaddr *uaddr,
 		lsa->l2tp_conn_id = lsk->conn_id;
 		lsa->l2tp_addr.s_addr = addr;
 	}
-	*uaddr_len = sizeof(*lsa);
-	return 0;
+	return sizeof(*lsa);
 }
 
 static int l2tp_ip_backlog_recv(struct sock *sk, struct sk_buff *skb)
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 6f009eaa5fbe..957369192ca1 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -419,7 +419,7 @@ static int l2tp_ip6_disconnect(struct sock *sk, int flags)
 }
 
 static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr,
-			    int *uaddr_len, int peer)
+			    int peer)
 {
 	struct sockaddr_l2tpip6 *lsa = (struct sockaddr_l2tpip6 *)uaddr;
 	struct sock *sk = sock->sk;
@@ -447,8 +447,7 @@ static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr,
 	}
 	if (ipv6_addr_type(&lsa->l2tp_addr) & IPV6_ADDR_LINKLOCAL)
 		lsa->l2tp_scope_id = sk->sk_bound_dev_if;
-	*uaddr_len = sizeof(*lsa);
-	return 0;
+	return sizeof(*lsa);
 }
 
 static int l2tp_ip6_backlog_recv(struct sock *sk, struct sk_buff *skb)
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 3b02f24ea9ec..977bca659787 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -862,7 +862,7 @@ err:
 /* getname() support.
  */
 static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
-			    int *usockaddr_len, int peer)
+			    int peer)
 {
 	int len = 0;
 	int error = 0;
@@ -961,8 +961,7 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
 		memcpy(uaddr, &sp, len);
 	}
 
-	*usockaddr_len = len;
-	error = 0;
+	error = len;
 
 	sock_put(sk);
 end:
@@ -1763,6 +1762,7 @@ static struct pernet_operations pppol2tp_net_ops = {
 	.init = pppol2tp_init_net,
 	.exit = pppol2tp_exit_net,
 	.id   = &pppol2tp_net_id,
+	.async = true,
 };
 
 /*****************************************************************************
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index c38d16f22d2a..01dcc0823d1f 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -971,7 +971,7 @@ release:
  *	Return the address information of a socket.
  */
 static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr,
-			  int *uaddrlen, int peer)
+			  int peer)
 {
 	struct sockaddr_llc sllc;
 	struct sock *sk = sock->sk;
@@ -982,7 +982,6 @@ static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr,
 	lock_sock(sk);
 	if (sock_flag(sk, SOCK_ZAPPED))
 		goto out;
-	*uaddrlen = sizeof(sllc);
 	if (peer) {
 		rc = -ENOTCONN;
 		if (sk->sk_state != TCP_ESTABLISHED)
@@ -1003,9 +1002,9 @@ static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr,
 			       IFHWADDRLEN);
 		}
 	}
-	rc = 0;
 	sllc.sllc_family = AF_LLC;
 	memcpy(uaddr, &sllc, sizeof(sllc));
+	rc = sizeof(sllc);
 out:
 	release_sock(sk);
 	return rc;
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index d90928f50226..a7f7b8ff4729 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -394,8 +394,9 @@ static void llc_sap_mcast(struct llc_sap *sap,
 			  const struct llc_addr *laddr,
 			  struct sk_buff *skb)
 {
-	int i = 0, count = 256 / sizeof(struct sock *);
-	struct sock *sk, *stack[count];
+	int i = 0;
+	struct sock *sk;
+	struct sock *stack[256 / sizeof(struct sock *)];
 	struct llc_sock *llc;
 	struct hlist_head *dev_hb = llc_sk_dev_hash(sap, skb->dev->ifindex);
 
@@ -408,7 +409,7 @@ static void llc_sap_mcast(struct llc_sap *sap,
 			continue;
 
 		sock_hold(sk);
-		if (i < count)
+		if (i < ARRAY_SIZE(stack))
 			stack[i++] = sk;
 		else {
 			llc_do_mcast(sap, skb, stack, i);
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 1f3188d03840..e83c19d4c292 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -298,13 +298,23 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
 
 	if (test_bit(tid, sta->ampdu_mlme.agg_session_valid)) {
 		if (sta->ampdu_mlme.tid_rx_token[tid] == dialog_token) {
+			struct tid_ampdu_rx *tid_rx;
+
 			ht_dbg_ratelimited(sta->sdata,
 					   "updated AddBA Req from %pM on tid %u\n",
 					   sta->sta.addr, tid);
 			/* We have no API to update the timeout value in the
-			 * driver so reject the timeout update.
+			 * driver so reject the timeout update if the timeout
+			 * changed. If if did not change, i.e., no real update,
+			 * just reply with success.
 			 */
-			status = WLAN_STATUS_REQUEST_DECLINED;
+			rcu_read_lock();
+			tid_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]);
+			if (tid_rx && tid_rx->timeout == timeout)
+				status = WLAN_STATUS_SUCCESS;
+			else
+				status = WLAN_STATUS_REQUEST_DECLINED;
+			rcu_read_unlock();
 			goto end;
 		}
 
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index f4195a0f0279..fd68f6fb02d7 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -2685,6 +2685,7 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
 
 	ieee80211_recalc_ps(local);
 	ieee80211_recalc_ps_vif(sdata);
+	ieee80211_check_fast_rx_iface(sdata);
 
 	return 0;
 }
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 94c7ee9df33b..b5adf3625d16 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -212,6 +212,7 @@ static const char *hw_flag_names[] = {
 	FLAG(REPORTS_LOW_ACK),
 	FLAG(SUPPORTS_TX_FRAG),
 	FLAG(SUPPORTS_TDLS_BUFFER_STA),
+	FLAG(DEAUTH_NEED_MGD_TX_PREP),
 	FLAG(DOESNT_SUPPORT_QOS_NDP),
 #undef FLAG
 };
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 444ea8d127fe..4105081dc1df 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -160,12 +160,12 @@ static ssize_t sta_aqm_read(struct file *file, char __user *userbuf,
 		       sta->cparams.ecn ? "yes" : "no");
 	p += scnprintf(p,
 		       bufsz+buf-p,
-		       "tid ac backlog-bytes backlog-packets new-flows drops marks overlimit collisions tx-bytes tx-packets\n");
+		       "tid ac backlog-bytes backlog-packets new-flows drops marks overlimit collisions tx-bytes tx-packets flags\n");
 
 	for (i = 0; i < IEEE80211_NUM_TIDS; i++) {
 		txqi = to_txq_info(sta->sta.txq[i]);
 		p += scnprintf(p, bufsz+buf-p,
-			       "%d %d %u %u %u %u %u %u %u %u %u\n",
+			       "%d %d %u %u %u %u %u %u %u %u %u 0x%lx(%s%s%s)\n",
 			       txqi->txq.tid,
 			       txqi->txq.ac,
 			       txqi->tin.backlog_bytes,
@@ -176,7 +176,11 @@ static ssize_t sta_aqm_read(struct file *file, char __user *userbuf,
 			       txqi->tin.overlimit,
 			       txqi->tin.collisions,
 			       txqi->tin.tx_bytes,
-			       txqi->tin.tx_packets);
+			       txqi->tin.tx_packets,
+			       txqi->flags,
+			       txqi->flags & (1<<IEEE80211_TXQ_STOP) ? "STOP" : "RUN",
+			       txqi->flags & (1<<IEEE80211_TXQ_AMPDU) ? " AMPDU" : "",
+			       txqi->flags & (1<<IEEE80211_TXQ_NO_AMSDU) ? " NO-AMSDU" : "");
 	}
 
 	rcu_read_unlock();
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 5fe01f82df12..d13ba064951f 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1324,8 +1324,7 @@ static void ieee80211_iface_work(struct work_struct *work)
 			mutex_lock(&local->sta_mtx);
 			sta = sta_info_get_bss(sdata, mgmt->sa);
 			if (sta) {
-				u16 tid = *ieee80211_get_qos_ctl(hdr) &
-						IEEE80211_QOS_CTL_TID_MASK;
+				u16 tid = ieee80211_get_tid(hdr);
 
 				__ieee80211_stop_rx_ba_session(
 					sta, tid, WLAN_BACK_RECIPIENT,
diff --git a/net/mac80211/michael.c b/net/mac80211/michael.c
index 408649bd4702..37e172701a63 100644
--- a/net/mac80211/michael.c
+++ b/net/mac80211/michael.c
@@ -35,7 +35,7 @@ static void michael_mic_hdr(struct michael_mic_ctx *mctx, const u8 *key,
 	da = ieee80211_get_DA(hdr);
 	sa = ieee80211_get_SA(hdr);
 	if (ieee80211_is_data_qos(hdr->frame_control))
-		tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+		tid = ieee80211_get_tid(hdr);
 	else
 		tid = 0;
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 5f303abac5ad..fe4aefb06d9f 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -7,6 +7,7 @@
  * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright (C) 2015 - 2017 Intel Deutschland GmbH
+ * Copyright (C) 2018        Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -2009,9 +2010,22 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 		ieee80211_flush_queues(local, sdata, true);
 
 	/* deauthenticate/disassociate now */
-	if (tx || frame_buf)
+	if (tx || frame_buf) {
+		struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+
+		/*
+		 * In multi channel scenarios guarantee that the virtual
+		 * interface is granted immediate airtime to transmit the
+		 * deauthentication frame by calling mgd_prepare_tx, if the
+		 * driver requested so.
+		 */
+		if (ieee80211_hw_check(&local->hw, DEAUTH_NEED_MGD_TX_PREP) &&
+		    !ifmgd->have_beacon)
+			drv_mgd_prepare_tx(sdata->local, sdata);
+
 		ieee80211_send_deauth_disassoc(sdata, ifmgd->bssid, stype,
 					       reason, tx, frame_buf);
+	}
 
 	/* flush out frame - make sure the deauth was actually sent */
 	if (tx)
@@ -2152,7 +2166,7 @@ static void ieee80211_sta_tx_wmm_ac_notify(struct ieee80211_sub_if_data *sdata,
 					   u16 tx_time)
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
-	u16 tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+	u16 tid = ieee80211_get_tid(hdr);
 	int ac = ieee80211_ac_from_tid(tid);
 	struct ieee80211_sta_tx_tspec *tx_tspec = &ifmgd->tx_tspec[ac];
 	unsigned long now = jiffies;
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 4a5bdad9f303..fb586b6e5d49 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -669,7 +669,7 @@ minstrel_aggr_check(struct ieee80211_sta *pubsta, struct sk_buff *skb)
 	if (unlikely(skb->protocol == cpu_to_be16(ETH_P_PAE)))
 		return;
 
-	tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+	tid = ieee80211_get_tid(hdr);
 	if (likely(sta->ampdu_mlme.tid_tx[tid]))
 		return;
 
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 56fe16b07538..9c898a3688c6 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -439,6 +439,10 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
 			flags |= IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_ERR;
 		if (status->flag & RX_FLAG_AMPDU_DELIM_CRC_KNOWN)
 			flags |= IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_KNOWN;
+		if (status->flag & RX_FLAG_AMPDU_EOF_BIT_KNOWN)
+			flags |= IEEE80211_RADIOTAP_AMPDU_EOF_KNOWN;
+		if (status->flag & RX_FLAG_AMPDU_EOF_BIT)
+			flags |= IEEE80211_RADIOTAP_AMPDU_EOF;
 		put_unaligned_le16(flags, pos);
 		pos += 2;
 		if (status->flag & RX_FLAG_AMPDU_DELIM_CRC_KNOWN)
@@ -1185,7 +1189,7 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
 
 	ack_policy = *ieee80211_get_qos_ctl(hdr) &
 		     IEEE80211_QOS_CTL_ACK_POLICY_MASK;
-	tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+	tid = ieee80211_get_tid(hdr);
 
 	tid_agg_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]);
 	if (!tid_agg_rx) {
@@ -1524,9 +1528,7 @@ ieee80211_rx_h_uapsd_and_pspoll(struct ieee80211_rx_data *rx)
 		   ieee80211_has_pm(hdr->frame_control) &&
 		   (ieee80211_is_data_qos(hdr->frame_control) ||
 		    ieee80211_is_qos_nullfunc(hdr->frame_control))) {
-		u8 tid;
-
-		tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+		u8 tid = ieee80211_get_tid(hdr);
 
 		ieee80211_sta_uapsd_trigger(&rx->sta->sta, tid);
 	}
@@ -2351,39 +2353,17 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
 }
 
 static ieee80211_rx_result debug_noinline
-ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
+__ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx, u8 data_offset)
 {
 	struct net_device *dev = rx->sdata->dev;
 	struct sk_buff *skb = rx->skb;
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
 	__le16 fc = hdr->frame_control;
 	struct sk_buff_head frame_list;
-	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
 	struct ethhdr ethhdr;
 	const u8 *check_da = ethhdr.h_dest, *check_sa = ethhdr.h_source;
 
-	if (unlikely(!ieee80211_is_data(fc)))
-		return RX_CONTINUE;
-
-	if (unlikely(!ieee80211_is_data_present(fc)))
-		return RX_DROP_MONITOR;
-
-	if (!(status->rx_flags & IEEE80211_RX_AMSDU))
-		return RX_CONTINUE;
-
 	if (unlikely(ieee80211_has_a4(hdr->frame_control))) {
-		switch (rx->sdata->vif.type) {
-		case NL80211_IFTYPE_AP_VLAN:
-			if (!rx->sdata->u.vlan.sta)
-				return RX_DROP_UNUSABLE;
-			break;
-		case NL80211_IFTYPE_STATION:
-			if (!rx->sdata->u.mgd.use_4addr)
-				return RX_DROP_UNUSABLE;
-			break;
-		default:
-			return RX_DROP_UNUSABLE;
-		}
 		check_da = NULL;
 		check_sa = NULL;
 	} else switch (rx->sdata->vif.type) {
@@ -2403,15 +2383,13 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
 			break;
 	}
 
-	if (is_multicast_ether_addr(hdr->addr1))
-		return RX_DROP_UNUSABLE;
-
 	skb->dev = dev;
 	__skb_queue_head_init(&frame_list);
 
 	if (ieee80211_data_to_8023_exthdr(skb, &ethhdr,
 					  rx->sdata->vif.addr,
-					  rx->sdata->vif.type))
+					  rx->sdata->vif.type,
+					  data_offset))
 		return RX_DROP_UNUSABLE;
 
 	ieee80211_amsdu_to_8023s(skb, &frame_list, dev->dev_addr,
@@ -2433,6 +2411,44 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
 	return RX_QUEUED;
 }
 
+static ieee80211_rx_result debug_noinline
+ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
+{
+	struct sk_buff *skb = rx->skb;
+	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+	__le16 fc = hdr->frame_control;
+
+	if (!(status->rx_flags & IEEE80211_RX_AMSDU))
+		return RX_CONTINUE;
+
+	if (unlikely(!ieee80211_is_data(fc)))
+		return RX_CONTINUE;
+
+	if (unlikely(!ieee80211_is_data_present(fc)))
+		return RX_DROP_MONITOR;
+
+	if (unlikely(ieee80211_has_a4(hdr->frame_control))) {
+		switch (rx->sdata->vif.type) {
+		case NL80211_IFTYPE_AP_VLAN:
+			if (!rx->sdata->u.vlan.sta)
+				return RX_DROP_UNUSABLE;
+			break;
+		case NL80211_IFTYPE_STATION:
+			if (!rx->sdata->u.mgd.use_4addr)
+				return RX_DROP_UNUSABLE;
+			break;
+		default:
+			return RX_DROP_UNUSABLE;
+		}
+	}
+
+	if (is_multicast_ether_addr(hdr->addr1))
+		return RX_DROP_UNUSABLE;
+
+	return __ieee80211_rx_h_amsdu(rx, 0);
+}
+
 #ifdef CONFIG_MAC80211_MESH
 static ieee80211_rx_result
 ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
@@ -2533,11 +2549,8 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
 
 	fwd_skb = skb_copy_expand(skb, local->tx_headroom +
 				       sdata->encrypt_headroom, 0, GFP_ATOMIC);
-	if (!fwd_skb) {
-		net_info_ratelimited("%s: failed to clone mesh frame\n",
-				    sdata->name);
+	if (!fwd_skb)
 		goto out;
-	}
 
 	fwd_hdr =  (struct ieee80211_hdr *) fwd_skb->data;
 	fwd_hdr->frame_control &= ~cpu_to_le16(IEEE80211_FCTL_RETRY);
@@ -2848,6 +2861,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 		case WLAN_HT_ACTION_SMPS: {
 			struct ieee80211_supported_band *sband;
 			enum ieee80211_smps_mode smps_mode;
+			struct sta_opmode_info sta_opmode = {};
 
 			/* convert to HT capability */
 			switch (mgmt->u.action.u.ht_smps.smps_control) {
@@ -2868,17 +2882,24 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 			if (rx->sta->sta.smps_mode == smps_mode)
 				goto handled;
 			rx->sta->sta.smps_mode = smps_mode;
+			sta_opmode.smps_mode = smps_mode;
+			sta_opmode.changed = STA_OPMODE_SMPS_MODE_CHANGED;
 
 			sband = rx->local->hw.wiphy->bands[status->band];
 
 			rate_control_rate_update(local, sband, rx->sta,
 						 IEEE80211_RC_SMPS_CHANGED);
+			cfg80211_sta_opmode_change_notify(sdata->dev,
+							  rx->sta->addr,
+							  &sta_opmode,
+							  GFP_KERNEL);
 			goto handled;
 		}
 		case WLAN_HT_ACTION_NOTIFY_CHANWIDTH: {
 			struct ieee80211_supported_band *sband;
 			u8 chanwidth = mgmt->u.action.u.ht_notify_cw.chanwidth;
 			enum ieee80211_sta_rx_bandwidth max_bw, new_bw;
+			struct sta_opmode_info sta_opmode = {};
 
 			/* If it doesn't support 40 MHz it can't change ... */
 			if (!(rx->sta->sta.ht_cap.cap &
@@ -2899,9 +2920,15 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 
 			rx->sta->sta.bandwidth = new_bw;
 			sband = rx->local->hw.wiphy->bands[status->band];
+			sta_opmode.bw = new_bw;
+			sta_opmode.changed = STA_OPMODE_MAX_BW_CHANGED;
 
 			rate_control_rate_update(local, sband, rx->sta,
 						 IEEE80211_RC_BW_CHANGED);
+			cfg80211_sta_opmode_change_notify(sdata->dev,
+							  rx->sta->addr,
+							  &sta_opmode,
+							  GFP_KERNEL);
 			goto handled;
 		}
 		default:
@@ -3731,15 +3758,6 @@ void ieee80211_check_fast_rx(struct sta_info *sta)
 
 	switch (sdata->vif.type) {
 	case NL80211_IFTYPE_STATION:
-		/* 4-addr is harder to deal with, later maybe */
-		if (sdata->u.mgd.use_4addr)
-			goto clear;
-		/* software powersave is a huge mess, avoid all of it */
-		if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK))
-			goto clear;
-		if (ieee80211_hw_check(&local->hw, SUPPORTS_PS) &&
-		    !ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS))
-			goto clear;
 		if (sta->sta.tdls) {
 			fastrx.da_offs = offsetof(struct ieee80211_hdr, addr1);
 			fastrx.sa_offs = offsetof(struct ieee80211_hdr, addr2);
@@ -3751,6 +3769,23 @@ void ieee80211_check_fast_rx(struct sta_info *sta)
 			fastrx.expected_ds_bits =
 				cpu_to_le16(IEEE80211_FCTL_FROMDS);
 		}
+
+		if (sdata->u.mgd.use_4addr && !sta->sta.tdls) {
+			fastrx.expected_ds_bits |=
+				cpu_to_le16(IEEE80211_FCTL_TODS);
+			fastrx.da_offs = offsetof(struct ieee80211_hdr, addr3);
+			fastrx.sa_offs = offsetof(struct ieee80211_hdr, addr4);
+		}
+
+		if (!sdata->u.mgd.powersave)
+			break;
+
+		/* software powersave is a huge mess, avoid all of it */
+		if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK))
+			goto clear;
+		if (ieee80211_hw_check(&local->hw, SUPPORTS_PS) &&
+		    !ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS))
+			goto clear;
 		break;
 	case NL80211_IFTYPE_AP_VLAN:
 	case NL80211_IFTYPE_AP:
@@ -3767,6 +3802,15 @@ void ieee80211_check_fast_rx(struct sta_info *sta)
 			!(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) &&
 			(sdata->vif.type != NL80211_IFTYPE_AP_VLAN ||
 			 !sdata->u.vlan.sta);
+
+		if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN &&
+		    sdata->u.vlan.sta) {
+			fastrx.expected_ds_bits |=
+				cpu_to_le16(IEEE80211_FCTL_FROMDS);
+			fastrx.sa_offs = offsetof(struct ieee80211_hdr, addr4);
+			fastrx.internal_forward = 0;
+		}
+
 		break;
 	default:
 		goto clear;
@@ -3865,7 +3909,8 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
 	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
 	struct sta_info *sta = rx->sta;
 	int orig_len = skb->len;
-	int snap_offs = ieee80211_hdrlen(hdr->frame_control);
+	int hdrlen = ieee80211_hdrlen(hdr->frame_control);
+	int snap_offs = hdrlen;
 	struct {
 		u8 snap[sizeof(rfc1042_header)];
 		__be16 proto;
@@ -3896,10 +3941,6 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
 	    (status->flag & FAST_RX_CRYPT_FLAGS) != FAST_RX_CRYPT_FLAGS)
 		return false;
 
-	/* we don't deal with A-MSDU deaggregation here */
-	if (status->rx_flags & IEEE80211_RX_AMSDU)
-		return false;
-
 	if (unlikely(!ieee80211_is_data_present(hdr->frame_control)))
 		return false;
 
@@ -3931,21 +3972,24 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
 		snap_offs += IEEE80211_CCMP_HDR_LEN;
 	}
 
-	if (!pskb_may_pull(skb, snap_offs + sizeof(*payload)))
-		goto drop;
-	payload = (void *)(skb->data + snap_offs);
+	if (!(status->rx_flags & IEEE80211_RX_AMSDU)) {
+		if (!pskb_may_pull(skb, snap_offs + sizeof(*payload)))
+			goto drop;
 
-	if (!ether_addr_equal(payload->snap, fast_rx->rfc1042_hdr))
-		return false;
+		payload = (void *)(skb->data + snap_offs);
 
-	/* Don't handle these here since they require special code.
-	 * Accept AARP and IPX even though they should come with a
-	 * bridge-tunnel header - but if we get them this way then
-	 * there's little point in discarding them.
-	 */
-	if (unlikely(payload->proto == cpu_to_be16(ETH_P_TDLS) ||
-		     payload->proto == fast_rx->control_port_protocol))
-		return false;
+		if (!ether_addr_equal(payload->snap, fast_rx->rfc1042_hdr))
+			return false;
+
+		/* Don't handle these here since they require special code.
+		 * Accept AARP and IPX even though they should come with a
+		 * bridge-tunnel header - but if we get them this way then
+		 * there's little point in discarding them.
+		 */
+		if (unlikely(payload->proto == cpu_to_be16(ETH_P_TDLS) ||
+			     payload->proto == fast_rx->control_port_protocol))
+			return false;
+	}
 
 	/* after this point, don't punt to the slowpath! */
 
@@ -3959,12 +4003,6 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
 	}
 
 	/* statistics part of ieee80211_rx_h_sta_process() */
-	stats->last_rx = jiffies;
-	stats->last_rate = sta_stats_encode_rate(status);
-
-	stats->fragments++;
-	stats->packets++;
-
 	if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) {
 		stats->last_signal = status->signal;
 		if (!fast_rx->uses_rss)
@@ -3993,6 +4031,20 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
 	if (rx->key && !ieee80211_has_protected(hdr->frame_control))
 		goto drop;
 
+	if (status->rx_flags & IEEE80211_RX_AMSDU) {
+		if (__ieee80211_rx_h_amsdu(rx, snap_offs - hdrlen) !=
+		    RX_QUEUED)
+			goto drop;
+
+		return true;
+	}
+
+	stats->last_rx = jiffies;
+	stats->last_rate = sta_stats_encode_rate(status);
+
+	stats->fragments++;
+	stats->packets++;
+
 	/* do the header conversion - first grab the addresses */
 	ether_addr_copy(addrs.da, skb->data + fast_rx->da_offs);
 	ether_addr_copy(addrs.sa, skb->data + fast_rx->sa_offs);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index af0b608ee8ed..655c3d8b0d80 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -2288,6 +2288,12 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 		sinfo->filled |= BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT);
 		sinfo->expected_throughput = thr;
 	}
+
+	if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL)) &&
+	    sta->status_stats.ack_signal_filled) {
+		sinfo->ack_signal = sta->status_stats.last_ack_signal;
+		sinfo->filled |= BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL);
+	}
 }
 
 u32 sta_get_expected_throughput(struct sta_info *sta)
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index cd53619435b6..f64eb86ca64b 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -548,6 +548,8 @@ struct sta_info {
 		u64 msdu_retries[IEEE80211_NUM_TIDS + 1];
 		u64 msdu_failed[IEEE80211_NUM_TIDS + 1];
 		unsigned long last_ack;
+		s8 last_ack_signal;
+		bool ack_signal_filled;
 	} status_stats;
 
 	/* Updated from TX path only, no locking requirements */
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index da7427a41529..743e89c5926c 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -187,9 +187,16 @@ static void ieee80211_frame_acked(struct sta_info *sta, struct sk_buff *skb)
 	struct ieee80211_mgmt *mgmt = (void *) skb->data;
 	struct ieee80211_local *local = sta->local;
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
+	struct ieee80211_tx_info *txinfo = IEEE80211_SKB_CB(skb);
 
-	if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS))
+	if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
 		sta->status_stats.last_ack = jiffies;
+		if (txinfo->status.is_valid_ack_signal) {
+			sta->status_stats.last_ack_signal =
+					 (s8)txinfo->status.ack_signal;
+			sta->status_stats.ack_signal_filled = true;
+		}
+	}
 
 	if (ieee80211_is_data_qos(mgmt->frame_control)) {
 		struct ieee80211_hdr *hdr = (void *) skb->data;
@@ -487,6 +494,8 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local,
 			    ieee80211_is_qos_nullfunc(hdr->frame_control))
 				cfg80211_probe_status(sdata->dev, hdr->addr1,
 						      cookie, acked,
+						      info->status.ack_signal,
+						      info->status.is_valid_ack_signal,
 						      GFP_ATOMIC);
 			else
 				cfg80211_mgmt_tx_status(&sdata->wdev, cookie,
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 69722504e3e1..933c67b5f845 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -797,7 +797,6 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
 {
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
-	u8 *qc;
 	int tid;
 
 	/*
@@ -844,9 +843,7 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
 		return TX_CONTINUE;
 
 	/* include per-STA, per-TID sequence counter */
-
-	qc = ieee80211_get_qos_ctl(hdr);
-	tid = *qc & IEEE80211_QOS_CTL_TID_MASK;
+	tid = ieee80211_get_tid(hdr);
 	tx->sta->tx_stats.msdu[tid]++;
 
 	hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid);
@@ -1158,7 +1155,6 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_hdr *hdr;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	int tid;
-	u8 *qc;
 
 	memset(tx, 0, sizeof(*tx));
 	tx->skb = skb;
@@ -1198,8 +1194,7 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
 	    !ieee80211_hw_check(&local->hw, TX_AMPDU_SETUP_IN_HW)) {
 		struct tid_ampdu_tx *tid_tx;
 
-		qc = ieee80211_get_qos_ctl(hdr);
-		tid = *qc & IEEE80211_QOS_CTL_TID_MASK;
+		tid = ieee80211_get_tid(hdr);
 
 		tid_tx = rcu_dereference(tx->sta->ampdu_mlme.tid_tx[tid]);
 		if (tid_tx) {
@@ -1921,7 +1916,7 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
 {
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+	struct ieee80211_hdr *hdr;
 	int headroom;
 	bool may_encrypt;
 
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index b9276ac849fa..5714dee76b12 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -447,6 +447,7 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
 				  enum nl80211_band band)
 {
 	enum ieee80211_sta_rx_bandwidth new_bw;
+	struct sta_opmode_info sta_opmode = {};
 	u32 changed = 0;
 	u8 nss;
 
@@ -460,7 +461,9 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
 
 	if (sta->sta.rx_nss != nss) {
 		sta->sta.rx_nss = nss;
+		sta_opmode.rx_nss = nss;
 		changed |= IEEE80211_RC_NSS_CHANGED;
+		sta_opmode.changed |= STA_OPMODE_N_SS_CHANGED;
 	}
 
 	switch (opmode & IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK) {
@@ -481,9 +484,15 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
 	new_bw = ieee80211_sta_cur_vht_bw(sta);
 	if (new_bw != sta->sta.bandwidth) {
 		sta->sta.bandwidth = new_bw;
+		sta_opmode.bw = new_bw;
 		changed |= IEEE80211_RC_BW_CHANGED;
+		sta_opmode.changed |= STA_OPMODE_MAX_BW_CHANGED;
 	}
 
+	if (sta_opmode.changed)
+		cfg80211_sta_opmode_change_notify(sdata->dev, sta->addr,
+						  &sta_opmode, GFP_KERNEL);
+
 	return changed;
 }
 
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 785056cb76f6..58d0b258b684 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -340,7 +340,7 @@ static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *b_0, u8 *aad)
 	a4_included = ieee80211_has_a4(hdr->frame_control);
 
 	if (ieee80211_is_data_qos(hdr->frame_control))
-		qos_tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+		qos_tid = ieee80211_get_tid(hdr);
 	else
 		qos_tid = 0;
 
@@ -601,8 +601,7 @@ static void gcmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *j_0, u8 *aad)
 	aad[23] = 0;
 
 	if (ieee80211_is_data_qos(hdr->frame_control))
-		qos_tid = *ieee80211_get_qos_ctl(hdr) &
-			IEEE80211_QOS_CTL_TID_MASK;
+		qos_tid = ieee80211_get_tid(hdr);
 	else
 		qos_tid = 0;
 
@@ -867,8 +866,7 @@ ieee80211_crypto_cs_decrypt(struct ieee80211_rx_data *rx)
 		return RX_DROP_UNUSABLE;
 
 	if (ieee80211_is_data_qos(hdr->frame_control))
-		qos_tid = *ieee80211_get_qos_ctl(hdr) &
-				IEEE80211_QOS_CTL_TID_MASK;
+		qos_tid = ieee80211_get_tid(hdr);
 	else
 		qos_tid = 0;
 
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 7a4de6d618b1..d4a89a8be013 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -2488,6 +2488,7 @@ static void mpls_net_exit(struct net *net)
 static struct pernet_operations mpls_net_ops = {
 	.init = mpls_net_init,
 	.exit = mpls_net_exit,
+	.async = true,
 };
 
 static struct rtnl_af_ops mpls_af_ops __read_mostly = {
diff --git a/net/ncsi/Makefile b/net/ncsi/Makefile
index dd12b564f2e7..436ef68331f2 100644
--- a/net/ncsi/Makefile
+++ b/net/ncsi/Makefile
@@ -1,4 +1,4 @@
 #
 # Makefile for NCSI API
 #
-obj-$(CONFIG_NET_NCSI) += ncsi-cmd.o ncsi-rsp.o ncsi-aen.o ncsi-manage.o
+obj-$(CONFIG_NET_NCSI) += ncsi-cmd.o ncsi-rsp.o ncsi-aen.o ncsi-manage.o ncsi-netlink.o
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index d30f7bd741d0..8da84312cd3b 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -276,6 +276,8 @@ struct ncsi_dev_priv {
 	unsigned int        package_num;     /* Number of packages         */
 	struct list_head    packages;        /* List of packages           */
 	struct ncsi_channel *hot_channel;    /* Channel was ever active    */
+	struct ncsi_package *force_package;  /* Force a specific package   */
+	struct ncsi_channel *force_channel;  /* Force a specific channel   */
 	struct ncsi_request requests[256];   /* Request table              */
 	unsigned int        request_id;      /* Last used request ID       */
 #define NCSI_REQ_START_IDX	1
@@ -318,6 +320,7 @@ extern spinlock_t ncsi_dev_lock;
 	list_for_each_entry_rcu(nc, &np->channels, node)
 
 /* Resources */
+u32 *ncsi_get_filter(struct ncsi_channel *nc, int table, int index);
 int ncsi_find_filter(struct ncsi_channel *nc, int table, void *data);
 int ncsi_add_filter(struct ncsi_channel *nc, int table, void *data);
 int ncsi_remove_filter(struct ncsi_channel *nc, int table, int index);
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index c989211bbabc..c3695ba0cf94 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -12,7 +12,6 @@
 #include <linux/init.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
-#include <linux/netlink.h>
 
 #include <net/ncsi.h>
 #include <net/net_namespace.h>
@@ -23,6 +22,7 @@
 
 #include "internal.h"
 #include "ncsi-pkt.h"
+#include "ncsi-netlink.h"
 
 LIST_HEAD(ncsi_dev_list);
 DEFINE_SPINLOCK(ncsi_dev_lock);
@@ -38,7 +38,7 @@ static inline int ncsi_filter_size(int table)
 	return sizes[table];
 }
 
-static u32 *ncsi_get_filter(struct ncsi_channel *nc, int table, int index)
+u32 *ncsi_get_filter(struct ncsi_channel *nc, int table, int index)
 {
 	struct ncsi_channel_filter *ncf;
 	int size;
@@ -965,20 +965,37 @@ error:
 
 static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp)
 {
-	struct ncsi_package *np;
-	struct ncsi_channel *nc, *found, *hot_nc;
+	struct ncsi_package *np, *force_package;
+	struct ncsi_channel *nc, *found, *hot_nc, *force_channel;
 	struct ncsi_channel_mode *ncm;
 	unsigned long flags;
 
 	spin_lock_irqsave(&ndp->lock, flags);
 	hot_nc = ndp->hot_channel;
+	force_channel = ndp->force_channel;
+	force_package = ndp->force_package;
 	spin_unlock_irqrestore(&ndp->lock, flags);
 
+	/* Force a specific channel whether or not it has link if we have been
+	 * configured to do so
+	 */
+	if (force_package && force_channel) {
+		found = force_channel;
+		ncm = &found->modes[NCSI_MODE_LINK];
+		if (!(ncm->data[2] & 0x1))
+			netdev_info(ndp->ndev.dev,
+				    "NCSI: Channel %u forced, but it is link down\n",
+				    found->id);
+		goto out;
+	}
+
 	/* The search is done once an inactive channel with up
 	 * link is found.
 	 */
 	found = NULL;
 	NCSI_FOR_EACH_PACKAGE(ndp, np) {
+		if (ndp->force_package && np != ndp->force_package)
+			continue;
 		NCSI_FOR_EACH_CHANNEL(np, nc) {
 			spin_lock_irqsave(&nc->lock, flags);
 
@@ -1594,6 +1611,9 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
 	ndp->ptype.dev = dev;
 	dev_add_pack(&ndp->ptype);
 
+	/* Set up generic netlink interface */
+	ncsi_init_netlink(dev);
+
 	return nd;
 }
 EXPORT_SYMBOL_GPL(ncsi_register_dev);
@@ -1673,6 +1693,8 @@ void ncsi_unregister_dev(struct ncsi_dev *nd)
 #endif
 	spin_unlock_irqrestore(&ncsi_dev_lock, flags);
 
+	ncsi_unregister_netlink(nd->dev);
+
 	kfree(ndp);
 }
 EXPORT_SYMBOL_GPL(ncsi_unregister_dev);
diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c
new file mode 100644
index 000000000000..05fcfb4fbe1d
--- /dev/null
+++ b/net/ncsi/ncsi-netlink.c
@@ -0,0 +1,423 @@
+/*
+ * Copyright Samuel Mendoza-Jonas, IBM Corporation 2018.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/if_arp.h>
+#include <linux/rtnetlink.h>
+#include <linux/etherdevice.h>
+#include <linux/module.h>
+#include <net/genetlink.h>
+#include <net/ncsi.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <uapi/linux/ncsi.h>
+
+#include "internal.h"
+#include "ncsi-netlink.h"
+
+static struct genl_family ncsi_genl_family;
+
+static const struct nla_policy ncsi_genl_policy[NCSI_ATTR_MAX + 1] = {
+	[NCSI_ATTR_IFINDEX] =		{ .type = NLA_U32 },
+	[NCSI_ATTR_PACKAGE_LIST] =	{ .type = NLA_NESTED },
+	[NCSI_ATTR_PACKAGE_ID] =	{ .type = NLA_U32 },
+	[NCSI_ATTR_CHANNEL_ID] =	{ .type = NLA_U32 },
+};
+
+static struct ncsi_dev_priv *ndp_from_ifindex(struct net *net, u32 ifindex)
+{
+	struct ncsi_dev_priv *ndp;
+	struct net_device *dev;
+	struct ncsi_dev *nd;
+	struct ncsi_dev;
+
+	if (!net)
+		return NULL;
+
+	dev = dev_get_by_index(net, ifindex);
+	if (!dev) {
+		pr_err("NCSI netlink: No device for ifindex %u\n", ifindex);
+		return NULL;
+	}
+
+	nd = ncsi_find_dev(dev);
+	ndp = nd ? TO_NCSI_DEV_PRIV(nd) : NULL;
+
+	dev_put(dev);
+	return ndp;
+}
+
+static int ncsi_write_channel_info(struct sk_buff *skb,
+				   struct ncsi_dev_priv *ndp,
+				   struct ncsi_channel *nc)
+{
+	struct nlattr *vid_nest;
+	struct ncsi_channel_filter *ncf;
+	struct ncsi_channel_mode *m;
+	u32 *data;
+	int i;
+
+	nla_put_u32(skb, NCSI_CHANNEL_ATTR_ID, nc->id);
+	m = &nc->modes[NCSI_MODE_LINK];
+	nla_put_u32(skb, NCSI_CHANNEL_ATTR_LINK_STATE, m->data[2]);
+	if (nc->state == NCSI_CHANNEL_ACTIVE)
+		nla_put_flag(skb, NCSI_CHANNEL_ATTR_ACTIVE);
+	if (ndp->force_channel == nc)
+		nla_put_flag(skb, NCSI_CHANNEL_ATTR_FORCED);
+
+	nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MAJOR, nc->version.version);
+	nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MINOR, nc->version.alpha2);
+	nla_put_string(skb, NCSI_CHANNEL_ATTR_VERSION_STR, nc->version.fw_name);
+
+	vid_nest = nla_nest_start(skb, NCSI_CHANNEL_ATTR_VLAN_LIST);
+	if (!vid_nest)
+		return -ENOMEM;
+	ncf = nc->filters[NCSI_FILTER_VLAN];
+	i = -1;
+	if (ncf) {
+		while ((i = find_next_bit((void *)&ncf->bitmap, ncf->total,
+					  i + 1)) < ncf->total) {
+			data = ncsi_get_filter(nc, NCSI_FILTER_VLAN, i);
+			/* Uninitialised channels will have 'zero' vlan ids */
+			if (!data || !*data)
+				continue;
+			nla_put_u16(skb, NCSI_CHANNEL_ATTR_VLAN_ID,
+				    *(u16 *)data);
+		}
+	}
+	nla_nest_end(skb, vid_nest);
+
+	return 0;
+}
+
+static int ncsi_write_package_info(struct sk_buff *skb,
+				   struct ncsi_dev_priv *ndp, unsigned int id)
+{
+	struct nlattr *pnest, *cnest, *nest;
+	struct ncsi_package *np;
+	struct ncsi_channel *nc;
+	bool found;
+	int rc;
+
+	if (id > ndp->package_num) {
+		netdev_info(ndp->ndev.dev, "NCSI: No package with id %u\n", id);
+		return -ENODEV;
+	}
+
+	found = false;
+	NCSI_FOR_EACH_PACKAGE(ndp, np) {
+		if (np->id != id)
+			continue;
+		pnest = nla_nest_start(skb, NCSI_PKG_ATTR);
+		if (!pnest)
+			return -ENOMEM;
+		nla_put_u32(skb, NCSI_PKG_ATTR_ID, np->id);
+		if (ndp->force_package == np)
+			nla_put_flag(skb, NCSI_PKG_ATTR_FORCED);
+		cnest = nla_nest_start(skb, NCSI_PKG_ATTR_CHANNEL_LIST);
+		if (!cnest) {
+			nla_nest_cancel(skb, pnest);
+			return -ENOMEM;
+		}
+		NCSI_FOR_EACH_CHANNEL(np, nc) {
+			nest = nla_nest_start(skb, NCSI_CHANNEL_ATTR);
+			if (!nest) {
+				nla_nest_cancel(skb, cnest);
+				nla_nest_cancel(skb, pnest);
+				return -ENOMEM;
+			}
+			rc = ncsi_write_channel_info(skb, ndp, nc);
+			if (rc) {
+				nla_nest_cancel(skb, nest);
+				nla_nest_cancel(skb, cnest);
+				nla_nest_cancel(skb, pnest);
+				return rc;
+			}
+			nla_nest_end(skb, nest);
+		}
+		nla_nest_end(skb, cnest);
+		nla_nest_end(skb, pnest);
+		found = true;
+	}
+
+	if (!found)
+		return -ENODEV;
+
+	return 0;
+}
+
+static int ncsi_pkg_info_nl(struct sk_buff *msg, struct genl_info *info)
+{
+	struct ncsi_dev_priv *ndp;
+	unsigned int package_id;
+	struct sk_buff *skb;
+	struct nlattr *attr;
+	void *hdr;
+	int rc;
+
+	if (!info || !info->attrs)
+		return -EINVAL;
+
+	if (!info->attrs[NCSI_ATTR_IFINDEX])
+		return -EINVAL;
+
+	if (!info->attrs[NCSI_ATTR_PACKAGE_ID])
+		return -EINVAL;
+
+	ndp = ndp_from_ifindex(genl_info_net(info),
+			       nla_get_u32(info->attrs[NCSI_ATTR_IFINDEX]));
+	if (!ndp)
+		return -ENODEV;
+
+	skb = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
+	hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
+			  &ncsi_genl_family, 0, NCSI_CMD_PKG_INFO);
+	if (!hdr) {
+		kfree_skb(skb);
+		return -EMSGSIZE;
+	}
+
+	package_id = nla_get_u32(info->attrs[NCSI_ATTR_PACKAGE_ID]);
+
+	attr = nla_nest_start(skb, NCSI_ATTR_PACKAGE_LIST);
+	rc = ncsi_write_package_info(skb, ndp, package_id);
+
+	if (rc) {
+		nla_nest_cancel(skb, attr);
+		goto err;
+	}
+
+	nla_nest_end(skb, attr);
+
+	genlmsg_end(skb, hdr);
+	return genlmsg_reply(skb, info);
+
+err:
+	genlmsg_cancel(skb, hdr);
+	kfree_skb(skb);
+	return rc;
+}
+
+static int ncsi_pkg_info_all_nl(struct sk_buff *skb,
+				struct netlink_callback *cb)
+{
+	struct nlattr *attrs[NCSI_ATTR_MAX];
+	struct ncsi_package *np, *package;
+	struct ncsi_dev_priv *ndp;
+	unsigned int package_id;
+	struct nlattr *attr;
+	void *hdr;
+	int rc;
+
+	rc = genlmsg_parse(cb->nlh, &ncsi_genl_family, attrs, NCSI_ATTR_MAX,
+			   ncsi_genl_policy, NULL);
+	if (rc)
+		return rc;
+
+	if (!attrs[NCSI_ATTR_IFINDEX])
+		return -EINVAL;
+
+	ndp = ndp_from_ifindex(get_net(sock_net(skb->sk)),
+			       nla_get_u32(attrs[NCSI_ATTR_IFINDEX]));
+
+	if (!ndp)
+		return -ENODEV;
+
+	package_id = cb->args[0];
+	package = NULL;
+	NCSI_FOR_EACH_PACKAGE(ndp, np)
+		if (np->id == package_id)
+			package = np;
+
+	if (!package)
+		return 0; /* done */
+
+	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+			  &ncsi_genl_family, 0,  NCSI_CMD_PKG_INFO);
+	if (!hdr) {
+		rc = -EMSGSIZE;
+		goto err;
+	}
+
+	attr = nla_nest_start(skb, NCSI_ATTR_PACKAGE_LIST);
+	rc = ncsi_write_package_info(skb, ndp, package->id);
+	if (rc) {
+		nla_nest_cancel(skb, attr);
+		goto err;
+	}
+
+	nla_nest_end(skb, attr);
+	genlmsg_end(skb, hdr);
+
+	cb->args[0] = package_id + 1;
+
+	return skb->len;
+err:
+	genlmsg_cancel(skb, hdr);
+	return rc;
+}
+
+static int ncsi_set_interface_nl(struct sk_buff *msg, struct genl_info *info)
+{
+	struct ncsi_package *np, *package;
+	struct ncsi_channel *nc, *channel;
+	u32 package_id, channel_id;
+	struct ncsi_dev_priv *ndp;
+	unsigned long flags;
+
+	if (!info || !info->attrs)
+		return -EINVAL;
+
+	if (!info->attrs[NCSI_ATTR_IFINDEX])
+		return -EINVAL;
+
+	if (!info->attrs[NCSI_ATTR_PACKAGE_ID])
+		return -EINVAL;
+
+	ndp = ndp_from_ifindex(get_net(sock_net(msg->sk)),
+			       nla_get_u32(info->attrs[NCSI_ATTR_IFINDEX]));
+	if (!ndp)
+		return -ENODEV;
+
+	package_id = nla_get_u32(info->attrs[NCSI_ATTR_PACKAGE_ID]);
+	package = NULL;
+
+	spin_lock_irqsave(&ndp->lock, flags);
+
+	NCSI_FOR_EACH_PACKAGE(ndp, np)
+		if (np->id == package_id)
+			package = np;
+	if (!package) {
+		/* The user has set a package that does not exist */
+		spin_unlock_irqrestore(&ndp->lock, flags);
+		return -ERANGE;
+	}
+
+	channel = NULL;
+	if (!info->attrs[NCSI_ATTR_CHANNEL_ID]) {
+		/* Allow any channel */
+		channel_id = NCSI_RESERVED_CHANNEL;
+	} else {
+		channel_id = nla_get_u32(info->attrs[NCSI_ATTR_CHANNEL_ID]);
+		NCSI_FOR_EACH_CHANNEL(package, nc)
+			if (nc->id == channel_id)
+				channel = nc;
+	}
+
+	if (channel_id != NCSI_RESERVED_CHANNEL && !channel) {
+		/* The user has set a channel that does not exist on this
+		 * package
+		 */
+		spin_unlock_irqrestore(&ndp->lock, flags);
+		netdev_info(ndp->ndev.dev, "NCSI: Channel %u does not exist!\n",
+			    channel_id);
+		return -ERANGE;
+	}
+
+	ndp->force_package = package;
+	ndp->force_channel = channel;
+	spin_unlock_irqrestore(&ndp->lock, flags);
+
+	netdev_info(ndp->ndev.dev, "Set package 0x%x, channel 0x%x%s as preferred\n",
+		    package_id, channel_id,
+		    channel_id == NCSI_RESERVED_CHANNEL ? " (any)" : "");
+
+	/* Bounce the NCSI channel to set changes */
+	ncsi_stop_dev(&ndp->ndev);
+	ncsi_start_dev(&ndp->ndev);
+
+	return 0;
+}
+
+static int ncsi_clear_interface_nl(struct sk_buff *msg, struct genl_info *info)
+{
+	struct ncsi_dev_priv *ndp;
+	unsigned long flags;
+
+	if (!info || !info->attrs)
+		return -EINVAL;
+
+	if (!info->attrs[NCSI_ATTR_IFINDEX])
+		return -EINVAL;
+
+	ndp = ndp_from_ifindex(get_net(sock_net(msg->sk)),
+			       nla_get_u32(info->attrs[NCSI_ATTR_IFINDEX]));
+	if (!ndp)
+		return -ENODEV;
+
+	/* Clear any override */
+	spin_lock_irqsave(&ndp->lock, flags);
+	ndp->force_package = NULL;
+	ndp->force_channel = NULL;
+	spin_unlock_irqrestore(&ndp->lock, flags);
+	netdev_info(ndp->ndev.dev, "NCSI: Cleared preferred package/channel\n");
+
+	/* Bounce the NCSI channel to set changes */
+	ncsi_stop_dev(&ndp->ndev);
+	ncsi_start_dev(&ndp->ndev);
+
+	return 0;
+}
+
+static const struct genl_ops ncsi_ops[] = {
+	{
+		.cmd = NCSI_CMD_PKG_INFO,
+		.policy = ncsi_genl_policy,
+		.doit = ncsi_pkg_info_nl,
+		.dumpit = ncsi_pkg_info_all_nl,
+		.flags = 0,
+	},
+	{
+		.cmd = NCSI_CMD_SET_INTERFACE,
+		.policy = ncsi_genl_policy,
+		.doit = ncsi_set_interface_nl,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = NCSI_CMD_CLEAR_INTERFACE,
+		.policy = ncsi_genl_policy,
+		.doit = ncsi_clear_interface_nl,
+		.flags = GENL_ADMIN_PERM,
+	},
+};
+
+static struct genl_family ncsi_genl_family __ro_after_init = {
+	.name = "NCSI",
+	.version = 0,
+	.maxattr = NCSI_ATTR_MAX,
+	.module = THIS_MODULE,
+	.ops = ncsi_ops,
+	.n_ops = ARRAY_SIZE(ncsi_ops),
+};
+
+int ncsi_init_netlink(struct net_device *dev)
+{
+	int rc;
+
+	rc = genl_register_family(&ncsi_genl_family);
+	if (rc)
+		netdev_err(dev, "ncsi: failed to register netlink family\n");
+
+	return rc;
+}
+
+int ncsi_unregister_netlink(struct net_device *dev)
+{
+	int rc;
+
+	rc = genl_unregister_family(&ncsi_genl_family);
+	if (rc)
+		netdev_err(dev, "ncsi: failed to unregister netlink family\n");
+
+	return rc;
+}
diff --git a/net/ncsi/ncsi-netlink.h b/net/ncsi/ncsi-netlink.h
new file mode 100644
index 000000000000..91a5c256f8c4
--- /dev/null
+++ b/net/ncsi/ncsi-netlink.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright Samuel Mendoza-Jonas, IBM Corporation 2018.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __NCSI_NETLINK_H__
+#define __NCSI_NETLINK_H__
+
+#include <linux/netdevice.h>
+
+#include "internal.h"
+
+int ncsi_init_netlink(struct net_device *dev);
+int ncsi_unregister_netlink(struct net_device *dev);
+
+#endif /* __NCSI_NETLINK_H__ */
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 0f6b8172fb9a..d72cc786c7b7 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -629,6 +629,7 @@ static void __net_exit netfilter_net_exit(struct net *net)
 static struct pernet_operations netfilter_net_ops = {
 	.init = netfilter_net_init,
 	.exit = netfilter_net_exit,
+	.async = true,
 };
 
 int __init netfilter_init(void)
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 975a85a48d39..2523ebe2b3cc 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -2094,7 +2094,8 @@ static struct pernet_operations ip_set_net_ops = {
 	.init	= ip_set_net_init,
 	.exit   = ip_set_net_exit,
 	.id	= &ip_set_net_id,
-	.size	= sizeof(struct ip_set_net)
+	.size	= sizeof(struct ip_set_net),
+	.async	= true,
 };
 
 static int __init
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 5f6f73cf2174..6a6cb9db030b 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -2289,10 +2289,12 @@ static struct pernet_operations ipvs_core_ops = {
 	.exit = __ip_vs_cleanup,
 	.id   = &ip_vs_net_id,
 	.size = sizeof(struct netns_ipvs),
+	.async = true,
 };
 
 static struct pernet_operations ipvs_core_dev_ops = {
 	.exit = __ip_vs_dev_cleanup,
+	.async = true,
 };
 
 /*
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 58d5d05aec24..8b25aab41928 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -479,6 +479,7 @@ static void __ip_vs_ftp_exit(struct net *net)
 static struct pernet_operations ip_vs_ftp_ops = {
 	.init = __ip_vs_ftp_init,
 	.exit = __ip_vs_ftp_exit,
+	.async = true,
 };
 
 static int __init ip_vs_ftp_init(void)
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index d625179de485..6a340c94c4b8 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -604,6 +604,7 @@ static void __net_exit __ip_vs_lblc_exit(struct net *net) { }
 static struct pernet_operations ip_vs_lblc_ops = {
 	.init = __ip_vs_lblc_init,
 	.exit = __ip_vs_lblc_exit,
+	.async = true,
 };
 
 static int __init ip_vs_lblc_init(void)
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 84c57b62a588..0627881128da 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -789,6 +789,7 @@ static void __net_exit __ip_vs_lblcr_exit(struct net *net) { }
 static struct pernet_operations ip_vs_lblcr_ops = {
 	.init = __ip_vs_lblcr_init,
 	.exit = __ip_vs_lblcr_exit,
+	.async = true,
 };
 
 static int __init ip_vs_lblcr_init(void)
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index dd177ebee9aa..8884d302d33a 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -3417,6 +3417,7 @@ static void __net_exit ctnetlink_net_exit_batch(struct list_head *net_exit_list)
 static struct pernet_operations ctnetlink_net_ops = {
 	.init		= ctnetlink_net_init,
 	.exit_batch	= ctnetlink_net_exit_batch,
+	.async		= true,
 };
 
 static int __init ctnetlink_init(void)
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index d049ea5a3770..9bcd72fe91f9 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -406,6 +406,7 @@ static struct pernet_operations proto_gre_net_ops = {
 	.exit = proto_gre_net_exit,
 	.id   = &proto_gre_net_id,
 	.size = sizeof(struct netns_proto_gre),
+	.async = true,
 };
 
 static int __init nf_ct_proto_gre_init(void)
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 9123fdec5e14..3cdce391362e 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -705,6 +705,7 @@ static void nf_conntrack_pernet_exit(struct list_head *net_exit_list)
 static struct pernet_operations nf_conntrack_net_ops = {
 	.init		= nf_conntrack_pernet_init,
 	.exit_batch	= nf_conntrack_pernet_exit,
+	.async		= true,
 };
 
 static int __init nf_conntrack_standalone_init(void)
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index c2c1b16b7538..1ba3da51050d 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -577,6 +577,7 @@ static void __net_exit nf_log_net_exit(struct net *net)
 static struct pernet_operations nf_log_net_ops = {
 	.init = nf_log_net_init,
 	.exit = nf_log_net_exit,
+	.async = true,
 };
 
 int __init netfilter_log_init(void)
diff --git a/net/netfilter/nf_log_netdev.c b/net/netfilter/nf_log_netdev.c
index 350eb147754d..254c2c6bde48 100644
--- a/net/netfilter/nf_log_netdev.c
+++ b/net/netfilter/nf_log_netdev.c
@@ -47,6 +47,7 @@ static void __net_exit nf_log_netdev_net_exit(struct net *net)
 static struct pernet_operations nf_log_netdev_net_ops = {
 	.init = nf_log_netdev_net_init,
 	.exit = nf_log_netdev_net_exit,
+	.async = true,
 };
 
 static int __init nf_log_netdev_init(void)
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index 92139a087260..64b875e452ca 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -398,6 +398,7 @@ static struct pernet_operations synproxy_net_ops = {
 	.exit		= synproxy_net_exit,
 	.id		= &synproxy_net_id,
 	.size		= sizeof(struct synproxy_net),
+	.async		= true,
 };
 
 static int __init synproxy_core_init(void)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index c4acc7340eb1..fd13d28e4ca7 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -6597,6 +6597,7 @@ static void __net_exit nf_tables_exit_net(struct net *net)
 static struct pernet_operations nf_tables_net_ops = {
 	.init	= nf_tables_init_net,
 	.exit	= nf_tables_exit_net,
+	.async	= true,
 };
 
 static int __init nf_tables_module_init(void)
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 03ead8a9e90c..84fc4954862d 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -566,6 +566,7 @@ static void __net_exit nfnetlink_net_exit_batch(struct list_head *net_exit_list)
 static struct pernet_operations nfnetlink_net_ops = {
 	.init		= nfnetlink_net_init,
 	.exit_batch	= nfnetlink_net_exit_batch,
+	.async		= true,
 };
 
 static int __init nfnetlink_init(void)
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index 88d427f9f9e6..8d9f18bb8840 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -515,6 +515,7 @@ static void __net_exit nfnl_acct_net_exit(struct net *net)
 static struct pernet_operations nfnl_acct_ops = {
         .init   = nfnl_acct_net_init,
         .exit   = nfnl_acct_net_exit,
+	.async	= true,
 };
 
 static int __init nfnl_acct_init(void)
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 95b04702a655..6819300f7fb7 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -586,6 +586,7 @@ static void __net_exit cttimeout_net_exit(struct net *net)
 static struct pernet_operations cttimeout_ops = {
 	.init	= cttimeout_net_init,
 	.exit	= cttimeout_net_exit,
+	.async	= true,
 };
 
 static int __init cttimeout_init(void)
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 7b46aa4c478d..b21ef79849a1 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -1108,6 +1108,7 @@ static struct pernet_operations nfnl_log_net_ops = {
 	.exit	= nfnl_log_net_exit,
 	.id	= &nfnl_log_net_id,
 	.size	= sizeof(struct nfnl_log_net),
+	.async	= true,
 };
 
 static int __init nfnetlink_log_init(void)
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 8bba23160a68..9f572ed56208 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -833,11 +833,8 @@ nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff)
 		if (diff > skb_tailroom(e->skb)) {
 			nskb = skb_copy_expand(e->skb, skb_headroom(e->skb),
 					       diff, GFP_ATOMIC);
-			if (!nskb) {
-				printk(KERN_WARNING "nf_queue: OOM "
-				      "in mangle, dropping packet\n");
+			if (!nskb)
 				return -ENOMEM;
-			}
 			kfree_skb(e->skb);
 			e->skb = nskb;
 		}
@@ -1528,6 +1525,7 @@ static struct pernet_operations nfnl_queue_net_ops = {
 	.exit_batch	= nfnl_queue_net_exit_batch,
 	.id		= &nfnl_queue_net_id,
 	.size		= sizeof(struct nfnl_queue_net),
+	.async		= true,
 };
 
 static int __init nfnetlink_queue_init(void)
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 4aa01c90e9d1..6de1f6a4cb80 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1789,6 +1789,7 @@ static void __net_exit xt_net_exit(struct net *net)
 static struct pernet_operations xt_net_ops = {
 	.init = xt_net_init,
 	.exit = xt_net_exit,
+	.async = true,
 };
 
 static int __init xt_init(void)
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 3360f13dc208..ef65b7a9173e 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -1349,6 +1349,7 @@ static struct pernet_operations hashlimit_net_ops = {
 	.exit	= hashlimit_net_exit,
 	.id	= &hashlimit_net_id,
 	.size	= sizeof(struct hashlimit_net),
+	.async	= true,
 };
 
 static int __init hashlimit_mt_init(void)
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 81ee1d6543b2..486dd24da78b 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -687,6 +687,7 @@ static struct pernet_operations recent_net_ops = {
 	.exit	= recent_net_exit,
 	.id	= &recent_net_id,
 	.size	= sizeof(struct recent_net),
+	.async	= true,
 };
 
 static struct xt_match recent_mt_reg[] __read_mostly = {
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 07e8478068f0..5d10dcfe6411 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -253,6 +253,7 @@ static struct pernet_operations netlink_tap_net_ops = {
 	.exit = netlink_tap_exit_net,
 	.id   = &netlink_tap_net_id,
 	.size = sizeof(struct netlink_tap_net),
+	.async = true,
 };
 
 static bool netlink_filter_tap(const struct sk_buff *skb)
@@ -1105,7 +1106,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
 }
 
 static int netlink_getname(struct socket *sock, struct sockaddr *addr,
-			   int *addr_len, int peer)
+			   int peer)
 {
 	struct sock *sk = sock->sk;
 	struct netlink_sock *nlk = nlk_sk(sk);
@@ -1113,7 +1114,6 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr,
 
 	nladdr->nl_family = AF_NETLINK;
 	nladdr->nl_pad = 0;
-	*addr_len = sizeof(*nladdr);
 
 	if (peer) {
 		nladdr->nl_pid = nlk->dst_portid;
@@ -1124,7 +1124,7 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr,
 		nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0;
 		netlink_unlock_table();
 	}
-	return 0;
+	return sizeof(*nladdr);
 }
 
 static int netlink_ioctl(struct socket *sock, unsigned int cmd,
@@ -2726,6 +2726,7 @@ static void __init netlink_add_usersock_entry(void)
 static struct pernet_operations __net_initdata netlink_net_ops = {
 	.init = netlink_net_init,
 	.exit = netlink_net_exit,
+	.async = true,
 };
 
 static inline u32 netlink_hash(const void *data, u32 len, u32 seed)
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index b9ce82c9440f..af51b8c0a2cb 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -1035,6 +1035,7 @@ static void __net_exit genl_pernet_exit(struct net *net)
 static struct pernet_operations genl_pernet_ops = {
 	.init = genl_pernet_init,
 	.exit = genl_pernet_exit,
+	.async = true,
 };
 
 static int __init genl_init(void)
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 9ba30c63be3d..35bb6807927f 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -829,11 +829,12 @@ out_release:
 }
 
 static int nr_getname(struct socket *sock, struct sockaddr *uaddr,
-	int *uaddr_len, int peer)
+	int peer)
 {
 	struct full_sockaddr_ax25 *sax = (struct full_sockaddr_ax25 *)uaddr;
 	struct sock *sk = sock->sk;
 	struct nr_sock *nr = nr_sk(sk);
+	int uaddr_len;
 
 	memset(&sax->fsa_ax25, 0, sizeof(struct sockaddr_ax25));
 
@@ -848,16 +849,16 @@ static int nr_getname(struct socket *sock, struct sockaddr *uaddr,
 		sax->fsa_ax25.sax25_call   = nr->user_addr;
 		memset(sax->fsa_digipeater, 0, sizeof(sax->fsa_digipeater));
 		sax->fsa_digipeater[0]     = nr->dest_addr;
-		*uaddr_len = sizeof(struct full_sockaddr_ax25);
+		uaddr_len = sizeof(struct full_sockaddr_ax25);
 	} else {
 		sax->fsa_ax25.sax25_family = AF_NETROM;
 		sax->fsa_ax25.sax25_ndigis = 0;
 		sax->fsa_ax25.sax25_call   = nr->source_addr;
-		*uaddr_len = sizeof(struct sockaddr_ax25);
+		uaddr_len = sizeof(struct sockaddr_ax25);
 	}
 	release_sock(sk);
 
-	return 0;
+	return uaddr_len;
 }
 
 int nr_rx_frame(struct sk_buff *skb, struct net_device *dev)
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index 376040092142..ea0c0c6f1874 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -497,7 +497,7 @@ error:
 }
 
 static int llcp_sock_getname(struct socket *sock, struct sockaddr *uaddr,
-			     int *len, int peer)
+			     int peer)
 {
 	struct sock *sk = sock->sk;
 	struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
@@ -510,7 +510,6 @@ static int llcp_sock_getname(struct socket *sock, struct sockaddr *uaddr,
 		 llcp_sock->dsap, llcp_sock->ssap);
 
 	memset(llcp_addr, 0, sizeof(*llcp_addr));
-	*len = sizeof(struct sockaddr_nfc_llcp);
 
 	lock_sock(sk);
 	if (!llcp_sock->dev) {
@@ -528,7 +527,7 @@ static int llcp_sock_getname(struct socket *sock, struct sockaddr *uaddr,
 	       llcp_addr->service_name_len);
 	release_sock(sk);
 
-	return 0;
+	return sizeof(struct sockaddr_nfc_llcp);
 }
 
 static inline __poll_t llcp_accept_poll(struct sock *parent)
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index ef38e5aecd28..100191df0371 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -2384,6 +2384,7 @@ static struct pernet_operations ovs_net_ops = {
 	.exit = ovs_exit_net,
 	.id   = &ovs_net_id,
 	.size = sizeof(struct ovs_net),
+	.async = true,
 };
 
 static int __init dp_init(void)
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index b6c8524032a0..f81c1d0ddff4 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -464,10 +464,10 @@ int ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
 	return 0;
 }
 
-static unsigned int packet_length(const struct sk_buff *skb,
-				  struct net_device *dev)
+static int packet_length(const struct sk_buff *skb,
+			 struct net_device *dev)
 {
-	unsigned int length = skb->len - dev->hard_header_len;
+	int length = skb->len - dev->hard_header_len;
 
 	if (!skb_vlan_tag_present(skb) &&
 	    eth_type_vlan(skb->protocol))
@@ -478,7 +478,7 @@ static unsigned int packet_length(const struct sk_buff *skb,
 	 * account for 802.1ad. e.g. is_skb_forwardable().
 	 */
 
-	return length;
+	return length > 0 ? length : 0;
 }
 
 void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index e0f3f4aeeb4f..2c5a6fe5d749 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3409,7 +3409,7 @@ out:
 }
 
 static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
-			       int *uaddr_len, int peer)
+			       int peer)
 {
 	struct net_device *dev;
 	struct sock *sk	= sock->sk;
@@ -3424,13 +3424,12 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
 	if (dev)
 		strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data));
 	rcu_read_unlock();
-	*uaddr_len = sizeof(*uaddr);
 
-	return 0;
+	return sizeof(*uaddr);
 }
 
 static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
-			  int *uaddr_len, int peer)
+			  int peer)
 {
 	struct net_device *dev;
 	struct sock *sk = sock->sk;
@@ -3455,9 +3454,8 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
 		sll->sll_halen = 0;
 	}
 	rcu_read_unlock();
-	*uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
 
-	return 0;
+	return offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
 }
 
 static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
@@ -4559,6 +4557,7 @@ static void __net_exit packet_net_exit(struct net *net)
 static struct pernet_operations packet_net_ops = {
 	.init = packet_net_init,
 	.exit = packet_net_exit,
+	.async = true,
 };
 
 
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index 77787512fc32..9454e8393793 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -342,6 +342,7 @@ static struct pernet_operations phonet_net_ops = {
 	.exit = phonet_exit_net,
 	.id   = &phonet_net_id,
 	.size = sizeof(struct phonet_net),
+	.async = true,
 };
 
 /* Initialize Phonet devices list */
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index fffcd69f63ff..f9b40e6a18a5 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -326,7 +326,7 @@ static int pn_socket_accept(struct socket *sock, struct socket *newsock,
 }
 
 static int pn_socket_getname(struct socket *sock, struct sockaddr *addr,
-				int *sockaddr_len, int peer)
+				int peer)
 {
 	struct sock *sk = sock->sk;
 	struct pn_sock *pn = pn_sk(sk);
@@ -337,8 +337,7 @@ static int pn_socket_getname(struct socket *sock, struct sockaddr *addr,
 		pn_sockaddr_set_object((struct sockaddr_pn *)addr,
 					pn->sobject);
 
-	*sockaddr_len = sizeof(struct sockaddr_pn);
-	return 0;
+	return sizeof(struct sockaddr_pn);
 }
 
 static __poll_t pn_socket_poll(struct file *file, struct socket *sock,
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index 5fb3929e3d7d..b33e5aeb4c06 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -893,7 +893,7 @@ static int qrtr_connect(struct socket *sock, struct sockaddr *saddr,
 }
 
 static int qrtr_getname(struct socket *sock, struct sockaddr *saddr,
-			int *len, int peer)
+			int peer)
 {
 	struct qrtr_sock *ipc = qrtr_sk(sock->sk);
 	struct sockaddr_qrtr qaddr;
@@ -912,12 +912,11 @@ static int qrtr_getname(struct socket *sock, struct sockaddr *saddr,
 	}
 	release_sock(sk);
 
-	*len = sizeof(qaddr);
 	qaddr.sq_family = AF_QIPCRTR;
 
 	memcpy(saddr, &qaddr, sizeof(qaddr));
 
-	return 0;
+	return sizeof(qaddr);
 }
 
 static int qrtr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 744c637c86b0..ab751a150f70 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -77,6 +77,7 @@ static int rds_release(struct socket *sock)
 	rds_send_drop_to(rs, NULL);
 	rds_rdma_drop_keys(rs);
 	rds_notify_queue_get(rs, NULL);
+	rds_notify_msg_zcopy_purge(&rs->rs_zcookie_queue);
 
 	spin_lock_bh(&rds_sock_lock);
 	list_del_init(&rs->rs_item);
@@ -110,7 +111,7 @@ void rds_wake_sk_sleep(struct rds_sock *rs)
 }
 
 static int rds_getname(struct socket *sock, struct sockaddr *uaddr,
-		       int *uaddr_len, int peer)
+		       int peer)
 {
 	struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
 	struct rds_sock *rs = rds_sk_to_rs(sock->sk);
@@ -131,8 +132,7 @@ static int rds_getname(struct socket *sock, struct sockaddr *uaddr,
 
 	sin->sin_family = AF_INET;
 
-	*uaddr_len = sizeof(*sin);
-	return 0;
+	return sizeof(*sin);
 }
 
 /*
@@ -145,7 +145,7 @@ static int rds_getname(struct socket *sock, struct sockaddr *uaddr,
  *  -	to signal that a previously congested destination may have become
  *	uncongested
  *  -	A notification has been queued to the socket (this can be a congestion
- *	update, or a RDMA completion).
+ *	update, or a RDMA completion, or a MSG_ZEROCOPY completion).
  *
  * EPOLLOUT is asserted if there is room on the send queue. This does not mean
  * however, that the next sendmsg() call will succeed. If the application tries
@@ -179,10 +179,13 @@ static __poll_t rds_poll(struct file *file, struct socket *sock,
 		spin_unlock(&rs->rs_lock);
 	}
 	if (!list_empty(&rs->rs_recv_queue) ||
-	    !list_empty(&rs->rs_notify_queue))
+	    !list_empty(&rs->rs_notify_queue) ||
+	    !list_empty(&rs->rs_zcookie_queue.zcookie_head))
 		mask |= (EPOLLIN | EPOLLRDNORM);
 	if (rs->rs_snd_bytes < rds_sk_sndbuf(rs))
 		mask |= (EPOLLOUT | EPOLLWRNORM);
+	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+		mask |= POLLERR;
 	read_unlock_irqrestore(&rs->rs_recv_lock, flags);
 
 	/* clear state any time we wake a seen-congested socket */
@@ -512,6 +515,7 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol)
 	INIT_LIST_HEAD(&rs->rs_recv_queue);
 	INIT_LIST_HEAD(&rs->rs_notify_queue);
 	INIT_LIST_HEAD(&rs->rs_cong_list);
+	rds_message_zcopy_queue_init(&rs->rs_zcookie_queue);
 	spin_lock_init(&rs->rs_rdma_lock);
 	rs->rs_rdma_keys = RB_ROOT;
 	rs->rs_rx_traces = 0;
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 2da3176bf792..abef75da89a7 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -540,9 +540,9 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len,
 			  struct rds_info_iterator *iter,
 			  struct rds_info_lengths *lens,
 			  int (*visitor)(struct rds_connection *, void *),
+			  u64 *buffer,
 			  size_t item_len)
 {
-	uint64_t buffer[(item_len + 7) / 8];
 	struct hlist_head *head;
 	struct rds_connection *conn;
 	size_t i;
@@ -578,9 +578,9 @@ static void rds_walk_conn_path_info(struct socket *sock, unsigned int len,
 				    struct rds_info_iterator *iter,
 				    struct rds_info_lengths *lens,
 				    int (*visitor)(struct rds_conn_path *, void *),
+				    u64 *buffer,
 				    size_t item_len)
 {
-	u64  buffer[(item_len + 7) / 8];
 	struct hlist_head *head;
 	struct rds_connection *conn;
 	size_t i;
@@ -649,8 +649,11 @@ static void rds_conn_info(struct socket *sock, unsigned int len,
 			  struct rds_info_iterator *iter,
 			  struct rds_info_lengths *lens)
 {
+	u64 buffer[(sizeof(struct rds_info_connection) + 7) / 8];
+
 	rds_walk_conn_path_info(sock, len, iter, lens,
 				rds_conn_info_visitor,
+				buffer,
 				sizeof(struct rds_info_connection));
 }
 
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 50a88f3e7e39..02deee29e7f1 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -321,8 +321,11 @@ static void rds_ib_ic_info(struct socket *sock, unsigned int len,
 			   struct rds_info_iterator *iter,
 			   struct rds_info_lengths *lens)
 {
+	u64 buffer[(sizeof(struct rds_info_rdma_connection) + 7) / 8];
+
 	rds_for_each_conn_info(sock, len, iter, lens,
 				rds_ib_conn_info_visitor,
+				buffer,
 				sizeof(struct rds_info_rdma_connection));
 }
 
diff --git a/net/rds/message.c b/net/rds/message.c
index 4318cc9b78f7..a35f76971984 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -33,6 +33,9 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/export.h>
+#include <linux/skbuff.h>
+#include <linux/list.h>
+#include <linux/errqueue.h>
 
 #include "rds.h"
 
@@ -45,7 +48,6 @@ static unsigned int	rds_exthdr_size[__RDS_EXTHDR_MAX] = {
 [RDS_EXTHDR_GEN_NUM]	= sizeof(u32),
 };
 
-
 void rds_message_addref(struct rds_message *rm)
 {
 	rdsdebug("addref rm %p ref %d\n", rm, refcount_read(&rm->m_refcount));
@@ -53,20 +55,107 @@ void rds_message_addref(struct rds_message *rm)
 }
 EXPORT_SYMBOL_GPL(rds_message_addref);
 
+static inline bool rds_zcookie_add(struct rds_msg_zcopy_info *info, u32 cookie)
+{
+	struct rds_zcopy_cookies *ck = &info->zcookies;
+	int ncookies = ck->num;
+
+	if (ncookies == RDS_MAX_ZCOOKIES)
+		return false;
+	ck->cookies[ncookies] = cookie;
+	ck->num =  ++ncookies;
+	return true;
+}
+
+static struct rds_msg_zcopy_info *rds_info_from_znotifier(struct rds_znotifier *znotif)
+{
+	return container_of(znotif, struct rds_msg_zcopy_info, znotif);
+}
+
+void rds_notify_msg_zcopy_purge(struct rds_msg_zcopy_queue *q)
+{
+	unsigned long flags;
+	LIST_HEAD(copy);
+	struct rds_msg_zcopy_info *info, *tmp;
+
+	spin_lock_irqsave(&q->lock, flags);
+	list_splice(&q->zcookie_head, &copy);
+	INIT_LIST_HEAD(&q->zcookie_head);
+	spin_unlock_irqrestore(&q->lock, flags);
+
+	list_for_each_entry_safe(info, tmp, &copy, rs_zcookie_next) {
+		list_del(&info->rs_zcookie_next);
+		kfree(info);
+	}
+}
+
+static void rds_rm_zerocopy_callback(struct rds_sock *rs,
+				     struct rds_znotifier *znotif)
+{
+	struct rds_msg_zcopy_info *info;
+	struct rds_msg_zcopy_queue *q;
+	u32 cookie = znotif->z_cookie;
+	struct rds_zcopy_cookies *ck;
+	struct list_head *head;
+	unsigned long flags;
+
+	mm_unaccount_pinned_pages(&znotif->z_mmp);
+	q = &rs->rs_zcookie_queue;
+	spin_lock_irqsave(&q->lock, flags);
+	head = &q->zcookie_head;
+	if (!list_empty(head)) {
+		info = list_entry(head, struct rds_msg_zcopy_info,
+				  rs_zcookie_next);
+		if (info && rds_zcookie_add(info, cookie)) {
+			spin_unlock_irqrestore(&q->lock, flags);
+			kfree(rds_info_from_znotifier(znotif));
+			/* caller invokes rds_wake_sk_sleep() */
+			return;
+		}
+	}
+
+	info = rds_info_from_znotifier(znotif);
+	ck = &info->zcookies;
+	memset(ck, 0, sizeof(*ck));
+	WARN_ON(!rds_zcookie_add(info, cookie));
+	list_add_tail(&q->zcookie_head, &info->rs_zcookie_next);
+
+	spin_unlock_irqrestore(&q->lock, flags);
+	/* caller invokes rds_wake_sk_sleep() */
+}
+
 /*
  * This relies on dma_map_sg() not touching sg[].page during merging.
  */
 static void rds_message_purge(struct rds_message *rm)
 {
-	unsigned long i;
+	unsigned long i, flags;
+	bool zcopy = false;
 
 	if (unlikely(test_bit(RDS_MSG_PAGEVEC, &rm->m_flags)))
 		return;
 
+	spin_lock_irqsave(&rm->m_rs_lock, flags);
+	if (rm->m_rs) {
+		struct rds_sock *rs = rm->m_rs;
+
+		if (rm->data.op_mmp_znotifier) {
+			zcopy = true;
+			rds_rm_zerocopy_callback(rs, rm->data.op_mmp_znotifier);
+			rds_wake_sk_sleep(rs);
+			rm->data.op_mmp_znotifier = NULL;
+		}
+		sock_put(rds_rs_to_sk(rs));
+		rm->m_rs = NULL;
+	}
+	spin_unlock_irqrestore(&rm->m_rs_lock, flags);
+
 	for (i = 0; i < rm->data.op_nents; i++) {
-		rdsdebug("putting data page %p\n", (void *)sg_page(&rm->data.op_sg[i]));
 		/* XXX will have to put_page for page refs */
-		__free_page(sg_page(&rm->data.op_sg[i]));
+		if (!zcopy)
+			__free_page(sg_page(&rm->data.op_sg[i]));
+		else
+			put_page(sg_page(&rm->data.op_sg[i]));
 	}
 	rm->data.op_nents = 0;
 
@@ -266,12 +355,13 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in
 	return rm;
 }
 
-int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from)
+static int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter *from)
 {
-	unsigned long to_copy, nbytes;
-	unsigned long sg_off;
 	struct scatterlist *sg;
 	int ret = 0;
+	int length = iov_iter_count(from);
+	int total_copied = 0;
+	struct rds_msg_zcopy_info *info;
 
 	rm->m_inc.i_hdr.h_len = cpu_to_be32(iov_iter_count(from));
 
@@ -279,8 +369,67 @@ int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from)
 	 * now allocate and copy in the data payload.
 	 */
 	sg = rm->data.op_sg;
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+	INIT_LIST_HEAD(&info->rs_zcookie_next);
+	rm->data.op_mmp_znotifier = &info->znotif;
+	if (mm_account_pinned_pages(&rm->data.op_mmp_znotifier->z_mmp,
+				    length)) {
+		ret = -ENOMEM;
+		goto err;
+	}
+	while (iov_iter_count(from)) {
+		struct page *pages;
+		size_t start;
+		ssize_t copied;
+
+		copied = iov_iter_get_pages(from, &pages, PAGE_SIZE,
+					    1, &start);
+		if (copied < 0) {
+			struct mmpin *mmp;
+			int i;
+
+			for (i = 0; i < rm->data.op_nents; i++)
+				put_page(sg_page(&rm->data.op_sg[i]));
+			mmp = &rm->data.op_mmp_znotifier->z_mmp;
+			mm_unaccount_pinned_pages(mmp);
+			ret = -EFAULT;
+			goto err;
+		}
+		total_copied += copied;
+		iov_iter_advance(from, copied);
+		length -= copied;
+		sg_set_page(sg, pages, copied, start);
+		rm->data.op_nents++;
+		sg++;
+	}
+	WARN_ON_ONCE(length != 0);
+	return ret;
+err:
+	kfree(info);
+	rm->data.op_mmp_znotifier = NULL;
+	return ret;
+}
+
+int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
+			       bool zcopy)
+{
+	unsigned long to_copy, nbytes;
+	unsigned long sg_off;
+	struct scatterlist *sg;
+	int ret = 0;
+
+	rm->m_inc.i_hdr.h_len = cpu_to_be32(iov_iter_count(from));
+
+	/* now allocate and copy in the data payload.  */
+	sg = rm->data.op_sg;
 	sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */
 
+	if (zcopy)
+		return rds_message_zcopy_from_user(rm, from);
+
 	while (iov_iter_count(from)) {
 		if (!sg_page(sg)) {
 			ret = rds_page_remainder_alloc(sg, iov_iter_count(from),
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 7301b9b01890..b04c333d9d1c 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -356,6 +356,30 @@ static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie)
 #define RDS_MSG_PAGEVEC		7
 #define RDS_MSG_FLUSH		8
 
+struct rds_znotifier {
+	struct mmpin		z_mmp;
+	u32			z_cookie;
+};
+
+struct rds_msg_zcopy_info {
+	struct list_head rs_zcookie_next;
+	union {
+		struct rds_znotifier znotif;
+		struct rds_zcopy_cookies zcookies;
+	};
+};
+
+struct rds_msg_zcopy_queue {
+	struct list_head zcookie_head;
+	spinlock_t lock; /* protects zcookie_head queue */
+};
+
+static inline void rds_message_zcopy_queue_init(struct rds_msg_zcopy_queue *q)
+{
+	spin_lock_init(&q->lock);
+	INIT_LIST_HEAD(&q->zcookie_head);
+}
+
 struct rds_message {
 	refcount_t		m_refcount;
 	struct list_head	m_sock_item;
@@ -436,6 +460,7 @@ struct rds_message {
 			unsigned int		op_count;
 			unsigned int		op_dmasg;
 			unsigned int		op_dmaoff;
+			struct rds_znotifier	*op_mmp_znotifier;
 			struct scatterlist	*op_sg;
 		} data;
 	};
@@ -589,6 +614,7 @@ struct rds_sock {
 	/* Socket receive path trace points*/
 	u8			rs_rx_traces;
 	u8			rs_rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX];
+	struct rds_msg_zcopy_queue rs_zcookie_queue;
 };
 
 static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk)
@@ -709,6 +735,7 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len,
 			  struct rds_info_iterator *iter,
 			  struct rds_info_lengths *lens,
 			  int (*visitor)(struct rds_connection *, void *),
+			  u64 *buffer,
 			  size_t item_len);
 
 __printf(2, 3)
@@ -771,7 +798,8 @@ rds_conn_connecting(struct rds_connection *conn)
 /* message.c */
 struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp);
 struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents);
-int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from);
+int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
+			       bool zcopy);
 struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len);
 void rds_message_populate_header(struct rds_header *hdr, __be16 sport,
 				 __be16 dport, u64 seq);
@@ -786,6 +814,7 @@ void rds_message_addref(struct rds_message *rm);
 void rds_message_put(struct rds_message *rm);
 void rds_message_wait(struct rds_message *rm);
 void rds_message_unmapped(struct rds_message *rm);
+void rds_notify_msg_zcopy_purge(struct rds_msg_zcopy_queue *info);
 
 static inline void rds_message_make_checksum(struct rds_header *hdr)
 {
diff --git a/net/rds/recv.c b/net/rds/recv.c
index b25bcfe411ca..de50e2126e40 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -577,6 +577,41 @@ out:
 	return ret;
 }
 
+static bool rds_recvmsg_zcookie(struct rds_sock *rs, struct msghdr *msg)
+{
+	struct rds_msg_zcopy_queue *q = &rs->rs_zcookie_queue;
+	struct rds_msg_zcopy_info *info = NULL;
+	struct rds_zcopy_cookies *done;
+	unsigned long flags;
+
+	if (!msg->msg_control)
+		return false;
+
+	if (!sock_flag(rds_rs_to_sk(rs), SOCK_ZEROCOPY) ||
+	    msg->msg_controllen < CMSG_SPACE(sizeof(*done)))
+		return false;
+
+	spin_lock_irqsave(&q->lock, flags);
+	if (!list_empty(&q->zcookie_head)) {
+		info = list_entry(q->zcookie_head.next,
+				  struct rds_msg_zcopy_info, rs_zcookie_next);
+		list_del(&info->rs_zcookie_next);
+	}
+	spin_unlock_irqrestore(&q->lock, flags);
+	if (!info)
+		return false;
+	done = &info->zcookies;
+	if (put_cmsg(msg, SOL_RDS, RDS_CMSG_ZCOPY_COMPLETION, sizeof(*done),
+		     done)) {
+		spin_lock_irqsave(&q->lock, flags);
+		list_add(&info->rs_zcookie_next, &q->zcookie_head);
+		spin_unlock_irqrestore(&q->lock, flags);
+		return false;
+	}
+	kfree(info);
+	return true;
+}
+
 int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
 		int msg_flags)
 {
@@ -594,6 +629,8 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
 
 	if (msg_flags & MSG_OOB)
 		goto out;
+	if (msg_flags & MSG_ERRQUEUE)
+		return sock_recv_errqueue(sk, msg, size, SOL_IP, IP_RECVERR);
 
 	while (1) {
 		/* If there are pending notifications, do those - and nothing else */
@@ -609,7 +646,9 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
 
 		if (!rds_next_incoming(rs, &inc)) {
 			if (nonblock) {
-				ret = -EAGAIN;
+				bool reaped = rds_recvmsg_zcookie(rs, msg);
+
+				ret = reaped ?  0 : -EAGAIN;
 				break;
 			}
 
@@ -658,6 +697,7 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
 			ret = -EFAULT;
 			goto out;
 		}
+		rds_recvmsg_zcookie(rs, msg);
 
 		rds_stats_inc(s_recv_delivered);
 
diff --git a/net/rds/send.c b/net/rds/send.c
index b1b0022b8370..acad04243b41 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -649,7 +649,6 @@ static void rds_send_remove_from_sock(struct list_head *messages, int status)
 				rm->rdma.op_notifier = NULL;
 			}
 			was_on_sock = 1;
-			rm->m_rs = NULL;
 		}
 		spin_unlock(&rs->rs_lock);
 
@@ -756,9 +755,6 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
 		 */
 		if (!test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) {
 			spin_unlock_irqrestore(&cp->cp_lock, flags);
-			spin_lock_irqsave(&rm->m_rs_lock, flags);
-			rm->m_rs = NULL;
-			spin_unlock_irqrestore(&rm->m_rs_lock, flags);
 			continue;
 		}
 		list_del_init(&rm->m_conn_item);
@@ -774,7 +770,6 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
 		__rds_send_complete(rs, rm, RDS_RDMA_CANCELED);
 		spin_unlock(&rs->rs_lock);
 
-		rm->m_rs = NULL;
 		spin_unlock_irqrestore(&rm->m_rs_lock, flags);
 
 		rds_message_put(rm);
@@ -798,7 +793,6 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
 		__rds_send_complete(rs, rm, RDS_RDMA_CANCELED);
 		spin_unlock(&rs->rs_lock);
 
-		rm->m_rs = NULL;
 		spin_unlock_irqrestore(&rm->m_rs_lock, flags);
 
 		rds_message_put(rm);
@@ -849,6 +843,7 @@ static int rds_send_queue_rm(struct rds_sock *rs, struct rds_connection *conn,
 		list_add_tail(&rm->m_sock_item, &rs->rs_send_queue);
 		set_bit(RDS_MSG_ON_SOCK, &rm->m_flags);
 		rds_message_addref(rm);
+		sock_hold(rds_rs_to_sk(rs));
 		rm->m_rs = rs;
 
 		/* The code ordering is a little weird, but we're
@@ -880,12 +875,13 @@ out:
  * rds_message is getting to be quite complicated, and we'd like to allocate
  * it all in one go. This figures out how big it needs to be up front.
  */
-static int rds_rm_size(struct msghdr *msg, int data_len)
+static int rds_rm_size(struct msghdr *msg, int num_sgs)
 {
 	struct cmsghdr *cmsg;
 	int size = 0;
 	int cmsg_groups = 0;
 	int retval;
+	bool zcopy_cookie = false;
 
 	for_each_cmsghdr(cmsg, msg) {
 		if (!CMSG_OK(msg, cmsg))
@@ -904,6 +900,10 @@ static int rds_rm_size(struct msghdr *msg, int data_len)
 
 			break;
 
+		case RDS_CMSG_ZCOPY_COOKIE:
+			zcopy_cookie = true;
+			/* fall through */
+
 		case RDS_CMSG_RDMA_DEST:
 		case RDS_CMSG_RDMA_MAP:
 			cmsg_groups |= 2;
@@ -924,7 +924,10 @@ static int rds_rm_size(struct msghdr *msg, int data_len)
 
 	}
 
-	size += ceil(data_len, PAGE_SIZE) * sizeof(struct scatterlist);
+	if ((msg->msg_flags & MSG_ZEROCOPY) && !zcopy_cookie)
+		return -EINVAL;
+
+	size += num_sgs * sizeof(struct scatterlist);
 
 	/* Ensure (DEST, MAP) are never used with (ARGS, ATOMIC) */
 	if (cmsg_groups == 3)
@@ -933,6 +936,19 @@ static int rds_rm_size(struct msghdr *msg, int data_len)
 	return size;
 }
 
+static int rds_cmsg_zcopy(struct rds_sock *rs, struct rds_message *rm,
+			  struct cmsghdr *cmsg)
+{
+	u32 *cookie;
+
+	if (cmsg->cmsg_len < CMSG_LEN(sizeof(*cookie)) ||
+	    !rm->data.op_mmp_znotifier)
+		return -EINVAL;
+	cookie = CMSG_DATA(cmsg);
+	rm->data.op_mmp_znotifier->z_cookie = *cookie;
+	return 0;
+}
+
 static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
 			 struct msghdr *msg, int *allocated_mr)
 {
@@ -975,6 +991,10 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
 			ret = rds_cmsg_atomic(rs, rm, cmsg);
 			break;
 
+		case RDS_CMSG_ZCOPY_COOKIE:
+			ret = rds_cmsg_zcopy(rs, rm, cmsg);
+			break;
+
 		default:
 			return -EINVAL;
 		}
@@ -1045,10 +1065,13 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
 	long timeo = sock_sndtimeo(sk, nonblock);
 	struct rds_conn_path *cpath;
 	size_t total_payload_len = payload_len, rdma_payload_len = 0;
+	bool zcopy = ((msg->msg_flags & MSG_ZEROCOPY) &&
+		      sock_flag(rds_rs_to_sk(rs), SOCK_ZEROCOPY));
+	int num_sgs = ceil(payload_len, PAGE_SIZE);
 
 	/* Mirror Linux UDP mirror of BSD error message compatibility */
 	/* XXX: Perhaps MSG_MORE someday */
-	if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_CMSG_COMPAT)) {
+	if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_CMSG_COMPAT | MSG_ZEROCOPY)) {
 		ret = -EOPNOTSUPP;
 		goto out;
 	}
@@ -1092,8 +1115,15 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
 		goto out;
 	}
 
+	if (zcopy) {
+		if (rs->rs_transport->t_type != RDS_TRANS_TCP) {
+			ret = -EOPNOTSUPP;
+			goto out;
+		}
+		num_sgs = iov_iter_npages(&msg->msg_iter, INT_MAX);
+	}
 	/* size of rm including all sgs */
-	ret = rds_rm_size(msg, payload_len);
+	ret = rds_rm_size(msg, num_sgs);
 	if (ret < 0)
 		goto out;
 
@@ -1105,12 +1135,12 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
 
 	/* Attach data to the rm */
 	if (payload_len) {
-		rm->data.op_sg = rds_message_alloc_sgs(rm, ceil(payload_len, PAGE_SIZE));
+		rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs);
 		if (!rm->data.op_sg) {
 			ret = -ENOMEM;
 			goto out;
 		}
-		ret = rds_message_copy_from_user(rm, &msg->msg_iter);
+		ret = rds_message_copy_from_user(rm, &msg->msg_iter, zcopy);
 		if (ret)
 			goto out;
 	}
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 44c4652721af..4f3a32c38bf5 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -227,7 +227,6 @@ static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len,
 	struct rds_tcp_connection *tc;
 	unsigned long flags;
 	struct sockaddr_in sin;
-	int sinlen;
 	struct socket *sock;
 
 	spin_lock_irqsave(&rds_tcp_tc_list_lock, flags);
@@ -239,12 +238,10 @@ static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len,
 
 		sock = tc->t_sock;
 		if (sock) {
-			sock->ops->getname(sock, (struct sockaddr *)&sin,
-					   &sinlen, 0);
+			sock->ops->getname(sock, (struct sockaddr *)&sin, 0);
 			tsinfo.local_addr = sin.sin_addr.s_addr;
 			tsinfo.local_port = sin.sin_port;
-			sock->ops->getname(sock, (struct sockaddr *)&sin,
-					   &sinlen, 1);
+			sock->ops->getname(sock, (struct sockaddr *)&sin, 1);
 			tsinfo.peer_addr = sin.sin_addr.s_addr;
 			tsinfo.peer_port = sin.sin_port;
 		}
@@ -275,13 +272,14 @@ static int rds_tcp_laddr_check(struct net *net, __be32 addr)
 static void rds_tcp_conn_free(void *arg)
 {
 	struct rds_tcp_connection *tc = arg;
+	unsigned long flags;
 
 	rdsdebug("freeing tc %p\n", tc);
 
-	spin_lock_bh(&rds_tcp_conn_lock);
+	spin_lock_irqsave(&rds_tcp_conn_lock, flags);
 	if (!tc->t_tcp_node_detached)
 		list_del(&tc->t_tcp_node);
-	spin_unlock_bh(&rds_tcp_conn_lock);
+	spin_unlock_irqrestore(&rds_tcp_conn_lock, flags);
 
 	kmem_cache_free(rds_tcp_conn_slab, tc);
 }
@@ -311,13 +309,13 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
 		rdsdebug("rds_conn_path [%d] tc %p\n", i,
 			 conn->c_path[i].cp_transport_data);
 	}
-	spin_lock_bh(&rds_tcp_conn_lock);
+	spin_lock_irq(&rds_tcp_conn_lock);
 	for (i = 0; i < RDS_MPATH_WORKERS; i++) {
 		tc = conn->c_path[i].cp_transport_data;
 		tc->t_tcp_node_detached = false;
 		list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list);
 	}
-	spin_unlock_bh(&rds_tcp_conn_lock);
+	spin_unlock_irq(&rds_tcp_conn_lock);
 fail:
 	if (ret) {
 		for (j = 0; j < i; j++)
@@ -487,39 +485,6 @@ fail:
 	return err;
 }
 
-static void __net_exit rds_tcp_exit_net(struct net *net)
-{
-	struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
-
-	if (rtn->rds_tcp_sysctl)
-		unregister_net_sysctl_table(rtn->rds_tcp_sysctl);
-
-	if (net != &init_net && rtn->ctl_table)
-		kfree(rtn->ctl_table);
-
-	/* If rds_tcp_exit_net() is called as a result of netns deletion,
-	 * the rds_tcp_kill_sock() device notifier would already have cleaned
-	 * up the listen socket, thus there is no work to do in this function.
-	 *
-	 * If rds_tcp_exit_net() is called as a result of module unload,
-	 * i.e., due to rds_tcp_exit() -> unregister_pernet_subsys(), then
-	 * we do need to clean up the listen socket here.
-	 */
-	if (rtn->rds_tcp_listen_sock) {
-		struct socket *lsock = rtn->rds_tcp_listen_sock;
-
-		rtn->rds_tcp_listen_sock = NULL;
-		rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w);
-	}
-}
-
-static struct pernet_operations rds_tcp_net_ops = {
-	.init = rds_tcp_init_net,
-	.exit = rds_tcp_exit_net,
-	.id = &rds_tcp_netid,
-	.size = sizeof(struct rds_tcp_net),
-};
-
 static void rds_tcp_kill_sock(struct net *net)
 {
 	struct rds_tcp_connection *tc, *_tc;
@@ -529,7 +494,7 @@ static void rds_tcp_kill_sock(struct net *net)
 
 	rtn->rds_tcp_listen_sock = NULL;
 	rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w);
-	spin_lock_bh(&rds_tcp_conn_lock);
+	spin_lock_irq(&rds_tcp_conn_lock);
 	list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
 		struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
 
@@ -542,45 +507,43 @@ static void rds_tcp_kill_sock(struct net *net)
 			tc->t_tcp_node_detached = true;
 		}
 	}
-	spin_unlock_bh(&rds_tcp_conn_lock);
+	spin_unlock_irq(&rds_tcp_conn_lock);
 	list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node)
 		rds_conn_destroy(tc->t_cpath->cp_conn);
 }
 
-void *rds_tcp_listen_sock_def_readable(struct net *net)
+static void __net_exit rds_tcp_exit_net(struct net *net)
 {
 	struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
-	struct socket *lsock = rtn->rds_tcp_listen_sock;
 
-	if (!lsock)
-		return NULL;
+	rds_tcp_kill_sock(net);
 
-	return lsock->sk->sk_user_data;
+	if (rtn->rds_tcp_sysctl)
+		unregister_net_sysctl_table(rtn->rds_tcp_sysctl);
+
+	if (net != &init_net && rtn->ctl_table)
+		kfree(rtn->ctl_table);
 }
 
-static int rds_tcp_dev_event(struct notifier_block *this,
-			     unsigned long event, void *ptr)
+static struct pernet_operations rds_tcp_net_ops = {
+	.init = rds_tcp_init_net,
+	.exit = rds_tcp_exit_net,
+	.id = &rds_tcp_netid,
+	.size = sizeof(struct rds_tcp_net),
+	.async = true,
+};
+
+void *rds_tcp_listen_sock_def_readable(struct net *net)
 {
-	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+	struct socket *lsock = rtn->rds_tcp_listen_sock;
 
-	/* rds-tcp registers as a pernet subys, so the ->exit will only
-	 * get invoked after network acitivity has quiesced. We need to
-	 * clean up all sockets  to quiesce network activity, and use
-	 * the unregistration of the per-net loopback device as a trigger
-	 * to start that cleanup.
-	 */
-	if (event == NETDEV_UNREGISTER_FINAL &&
-	    dev->ifindex == LOOPBACK_IFINDEX)
-		rds_tcp_kill_sock(dev_net(dev));
+	if (!lsock)
+		return NULL;
 
-	return NOTIFY_DONE;
+	return lsock->sk->sk_user_data;
 }
 
-static struct notifier_block rds_tcp_dev_notifier = {
-	.notifier_call        = rds_tcp_dev_event,
-	.priority = -10, /* must be called after other network notifiers */
-};
-
 /* when sysctl is used to modify some kernel socket parameters,this
  * function  resets the RDS connections in that netns  so that we can
  * restart with new parameters.  The assumption is that such reset
@@ -590,7 +553,7 @@ static void rds_tcp_sysctl_reset(struct net *net)
 {
 	struct rds_tcp_connection *tc, *_tc;
 
-	spin_lock_bh(&rds_tcp_conn_lock);
+	spin_lock_irq(&rds_tcp_conn_lock);
 	list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
 		struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
 
@@ -600,7 +563,7 @@ static void rds_tcp_sysctl_reset(struct net *net)
 		/* reconnect with new parameters */
 		rds_conn_path_drop(tc->t_cpath, false);
 	}
-	spin_unlock_bh(&rds_tcp_conn_lock);
+	spin_unlock_irq(&rds_tcp_conn_lock);
 }
 
 static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write,
@@ -626,9 +589,7 @@ static void rds_tcp_exit(void)
 	rds_tcp_set_unloading();
 	synchronize_rcu();
 	rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
-	unregister_pernet_subsys(&rds_tcp_net_ops);
-	if (unregister_netdevice_notifier(&rds_tcp_dev_notifier))
-		pr_warn("could not unregister rds_tcp_dev_notifier\n");
+	unregister_pernet_device(&rds_tcp_net_ops);
 	rds_tcp_destroy_conns();
 	rds_trans_unregister(&rds_tcp_transport);
 	rds_tcp_recv_exit();
@@ -652,24 +613,15 @@ static int rds_tcp_init(void)
 	if (ret)
 		goto out_slab;
 
-	ret = register_pernet_subsys(&rds_tcp_net_ops);
+	ret = register_pernet_device(&rds_tcp_net_ops);
 	if (ret)
 		goto out_recv;
 
-	ret = register_netdevice_notifier(&rds_tcp_dev_notifier);
-	if (ret) {
-		pr_warn("could not register rds_tcp_dev_notifier\n");
-		goto out_pernet;
-	}
-
 	rds_trans_register(&rds_tcp_transport);
 
 	rds_info_register_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
 
 	goto out;
-
-out_pernet:
-	unregister_pernet_subsys(&rds_tcp_net_ops);
 out_recv:
 	rds_tcp_recv_exit();
 out_slab:
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 083bd251406f..5170373b797c 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -938,7 +938,7 @@ out_release:
 }
 
 static int rose_getname(struct socket *sock, struct sockaddr *uaddr,
-	int *uaddr_len, int peer)
+	int peer)
 {
 	struct full_sockaddr_rose *srose = (struct full_sockaddr_rose *)uaddr;
 	struct sock *sk = sock->sk;
@@ -964,8 +964,7 @@ static int rose_getname(struct socket *sock, struct sockaddr *uaddr,
 			srose->srose_digis[n] = rose->source_digis[n];
 	}
 
-	*uaddr_len = sizeof(struct full_sockaddr_rose);
-	return 0;
+	return sizeof(struct full_sockaddr_rose);
 }
 
 int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct rose_neigh *neigh, unsigned int lci)
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index 9d45d8b56744..7bff716e911e 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -272,7 +272,7 @@ static int rxrpc_locate_data(struct rxrpc_call *call, struct sk_buff *skb,
 			     unsigned int *_offset, unsigned int *_len)
 {
 	unsigned int offset = sizeof(struct rxrpc_wire_header);
-	unsigned int len = *_len;
+	unsigned int len;
 	int ret;
 	u8 annotation = *_annotation;
 
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index f24a6ae6819a..a01169fb5325 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -658,6 +658,18 @@ config NET_EMATCH_IPSET
 	  To compile this code as a module, choose M here: the
 	  module will be called em_ipset.
 
+config NET_EMATCH_IPT
+	tristate "IPtables Matches"
+	depends on NET_EMATCH && NETFILTER && NETFILTER_XTABLES
+	---help---
+	  Say Y here to be able to classify packets based on iptables
+	  matches.
+	  Current supported match is "policy" which allows packet classification
+	  based on IPsec policy that was used during decapsulation
+
+	  To compile this code as a module, choose M here: the
+	  module will be called em_ipt.
+
 config NET_CLS_ACT
 	bool "Actions"
 	select NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 5b635447e3f8..8811d3804878 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -75,3 +75,4 @@ obj-$(CONFIG_NET_EMATCH_META)	+= em_meta.o
 obj-$(CONFIG_NET_EMATCH_TEXT)	+= em_text.o
 obj-$(CONFIG_NET_EMATCH_CANID)	+= em_canid.o
 obj-$(CONFIG_NET_EMATCH_IPSET)	+= em_ipset.o
+obj-$(CONFIG_NET_EMATCH_IPT)	+= em_ipt.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index eba6682727dd..57cf37145282 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -109,6 +109,42 @@ int __tcf_idr_release(struct tc_action *p, bool bind, bool strict)
 }
 EXPORT_SYMBOL(__tcf_idr_release);
 
+static size_t tcf_action_shared_attrs_size(const struct tc_action *act)
+{
+	u32 cookie_len = 0;
+
+	if (act->act_cookie)
+		cookie_len = nla_total_size(act->act_cookie->len);
+
+	return  nla_total_size(0) /* action number nested */
+		+ nla_total_size(IFNAMSIZ) /* TCA_ACT_KIND */
+		+ cookie_len /* TCA_ACT_COOKIE */
+		+ nla_total_size(0) /* TCA_ACT_STATS nested */
+		/* TCA_STATS_BASIC */
+		+ nla_total_size_64bit(sizeof(struct gnet_stats_basic))
+		/* TCA_STATS_QUEUE */
+		+ nla_total_size_64bit(sizeof(struct gnet_stats_queue))
+		+ nla_total_size(0) /* TCA_OPTIONS nested */
+		+ nla_total_size(sizeof(struct tcf_t)); /* TCA_GACT_TM */
+}
+
+static size_t tcf_action_full_attrs_size(size_t sz)
+{
+	return NLMSG_HDRLEN                     /* struct nlmsghdr */
+		+ sizeof(struct tcamsg)
+		+ nla_total_size(0)             /* TCA_ACT_TAB nested */
+		+ sz;
+}
+
+static size_t tcf_action_fill_size(const struct tc_action *act)
+{
+	size_t sz = tcf_action_shared_attrs_size(act);
+
+	if (act->ops->get_fill_size)
+		return act->ops->get_fill_size(act) + sz;
+	return sz;
+}
+
 static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
 			   struct netlink_callback *cb)
 {
@@ -202,7 +238,8 @@ nla_put_failure:
 
 int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
 		       struct netlink_callback *cb, int type,
-		       const struct tc_action_ops *ops)
+		       const struct tc_action_ops *ops,
+		       struct netlink_ext_ack *extack)
 {
 	struct tcf_idrinfo *idrinfo = tn->idrinfo;
 
@@ -211,7 +248,8 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
 	} else if (type == RTM_GETACTION) {
 		return tcf_dump_walker(idrinfo, skb, cb);
 	} else {
-		WARN(1, "tcf_generic_walker: unknown action %d\n", type);
+		WARN(1, "tcf_generic_walker: unknown command %d\n", type);
+		NL_SET_ERR_MSG(extack, "tcf_generic_walker: unknown command");
 		return -EINVAL;
 	}
 }
@@ -605,7 +643,8 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb)
 
 struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 				    struct nlattr *nla, struct nlattr *est,
-				    char *name, int ovr, int bind)
+				    char *name, int ovr, int bind,
+				    struct netlink_ext_ack *extack)
 {
 	struct tc_action *a;
 	struct tc_action_ops *a_o;
@@ -616,31 +655,40 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 	int err;
 
 	if (name == NULL) {
-		err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, NULL);
+		err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, extack);
 		if (err < 0)
 			goto err_out;
 		err = -EINVAL;
 		kind = tb[TCA_ACT_KIND];
-		if (kind == NULL)
+		if (!kind) {
+			NL_SET_ERR_MSG(extack, "TC action kind must be specified");
 			goto err_out;
-		if (nla_strlcpy(act_name, kind, IFNAMSIZ) >= IFNAMSIZ)
+		}
+		if (nla_strlcpy(act_name, kind, IFNAMSIZ) >= IFNAMSIZ) {
+			NL_SET_ERR_MSG(extack, "TC action name too long");
 			goto err_out;
+		}
 		if (tb[TCA_ACT_COOKIE]) {
 			int cklen = nla_len(tb[TCA_ACT_COOKIE]);
 
-			if (cklen > TC_COOKIE_MAX_SIZE)
+			if (cklen > TC_COOKIE_MAX_SIZE) {
+				NL_SET_ERR_MSG(extack, "TC cookie size above the maximum");
 				goto err_out;
+			}
 
 			cookie = nla_memdup_cookie(tb);
 			if (!cookie) {
+				NL_SET_ERR_MSG(extack, "No memory to generate TC cookie");
 				err = -ENOMEM;
 				goto err_out;
 			}
 		}
 	} else {
-		err = -EINVAL;
-		if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ)
+		if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ) {
+			NL_SET_ERR_MSG(extack, "TC action name too long");
+			err = -EINVAL;
 			goto err_out;
+		}
 	}
 
 	a_o = tc_lookup_action_n(act_name);
@@ -663,15 +711,17 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 			goto err_mod;
 		}
 #endif
+		NL_SET_ERR_MSG(extack, "Failed to load TC action module");
 		err = -ENOENT;
 		goto err_out;
 	}
 
 	/* backward compatibility for policer */
 	if (name == NULL)
-		err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind);
+		err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind,
+				extack);
 	else
-		err = a_o->init(net, nla, est, &a, ovr, bind);
+		err = a_o->init(net, nla, est, &a, ovr, bind, extack);
 	if (err < 0)
 		goto err_mod;
 
@@ -697,6 +747,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 
 			list_add_tail(&a->list, &actions);
 			tcf_action_destroy(&actions, bind);
+			NL_SET_ERR_MSG(extack, "Failed to init TC action chain");
 			return ERR_PTR(err);
 		}
 	}
@@ -726,29 +777,35 @@ static void cleanup_a(struct list_head *actions, int ovr)
 
 int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
 		    struct nlattr *est, char *name, int ovr, int bind,
-		    struct list_head *actions)
+		    struct list_head *actions, size_t *attr_size,
+		    struct netlink_ext_ack *extack)
 {
 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
 	struct tc_action *act;
+	size_t sz = 0;
 	int err;
 	int i;
 
-	err = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, NULL);
+	err = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack);
 	if (err < 0)
 		return err;
 
 	for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
-		act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind);
+		act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind,
+					extack);
 		if (IS_ERR(act)) {
 			err = PTR_ERR(act);
 			goto err;
 		}
 		act->order = i;
+		sz += tcf_action_fill_size(act);
 		if (ovr)
 			act->tcfa_refcnt++;
 		list_add_tail(&act->list, actions);
 	}
 
+	*attr_size = tcf_action_full_attrs_size(sz);
+
 	/* Remove the temp refcnt which was necessary to protect against
 	 * destroying an existing action which was being replaced
 	 */
@@ -822,7 +879,7 @@ static int tca_get_fill(struct sk_buff *skb, struct list_head *actions,
 	t->tca__pad2 = 0;
 
 	nest = nla_nest_start(skb, TCA_ACT_TAB);
-	if (nest == NULL)
+	if (!nest)
 		goto out_nlmsg_trim;
 
 	if (tcf_action_dump(skb, actions, bind, ref) < 0)
@@ -840,7 +897,8 @@ out_nlmsg_trim:
 
 static int
 tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
-	       struct list_head *actions, int event)
+	       struct list_head *actions, int event,
+	       struct netlink_ext_ack *extack)
 {
 	struct sk_buff *skb;
 
@@ -849,6 +907,7 @@ tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
 		return -ENOBUFS;
 	if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, event,
 			 0, 0) <= 0) {
+		NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while adding TC action");
 		kfree_skb(skb);
 		return -EINVAL;
 	}
@@ -857,7 +916,8 @@ tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
 }
 
 static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla,
-					  struct nlmsghdr *n, u32 portid)
+					  struct nlmsghdr *n, u32 portid,
+					  struct netlink_ext_ack *extack)
 {
 	struct nlattr *tb[TCA_ACT_MAX + 1];
 	const struct tc_action_ops *ops;
@@ -865,22 +925,26 @@ static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla,
 	int index;
 	int err;
 
-	err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, NULL);
+	err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, extack);
 	if (err < 0)
 		goto err_out;
 
 	err = -EINVAL;
 	if (tb[TCA_ACT_INDEX] == NULL ||
-	    nla_len(tb[TCA_ACT_INDEX]) < sizeof(index))
+	    nla_len(tb[TCA_ACT_INDEX]) < sizeof(index)) {
+		NL_SET_ERR_MSG(extack, "Invalid TC action index value");
 		goto err_out;
+	}
 	index = nla_get_u32(tb[TCA_ACT_INDEX]);
 
 	err = -EINVAL;
 	ops = tc_lookup_action(tb[TCA_ACT_KIND]);
-	if (!ops) /* could happen in batch of actions */
+	if (!ops) { /* could happen in batch of actions */
+		NL_SET_ERR_MSG(extack, "Specified TC action not found");
 		goto err_out;
+	}
 	err = -ENOENT;
-	if (ops->lookup(net, &a, index) == 0)
+	if (ops->lookup(net, &a, index, extack) == 0)
 		goto err_mod;
 
 	module_put(ops->owner);
@@ -893,7 +957,8 @@ err_out:
 }
 
 static int tca_action_flush(struct net *net, struct nlattr *nla,
-			    struct nlmsghdr *n, u32 portid)
+			    struct nlmsghdr *n, u32 portid,
+			    struct netlink_ext_ack *extack)
 {
 	struct sk_buff *skb;
 	unsigned char *b;
@@ -907,39 +972,45 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
 	int err = -ENOMEM;
 
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
-	if (!skb) {
-		pr_debug("tca_action_flush: failed skb alloc\n");
+	if (!skb)
 		return err;
-	}
 
 	b = skb_tail_pointer(skb);
 
-	err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, NULL);
+	err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, extack);
 	if (err < 0)
 		goto err_out;
 
 	err = -EINVAL;
 	kind = tb[TCA_ACT_KIND];
 	ops = tc_lookup_action(kind);
-	if (!ops) /*some idjot trying to flush unknown action */
+	if (!ops) { /*some idjot trying to flush unknown action */
+		NL_SET_ERR_MSG(extack, "Cannot flush unknown TC action");
 		goto err_out;
+	}
 
 	nlh = nlmsg_put(skb, portid, n->nlmsg_seq, RTM_DELACTION,
 			sizeof(*t), 0);
-	if (!nlh)
+	if (!nlh) {
+		NL_SET_ERR_MSG(extack, "Failed to create TC action flush notification");
 		goto out_module_put;
+	}
 	t = nlmsg_data(nlh);
 	t->tca_family = AF_UNSPEC;
 	t->tca__pad1 = 0;
 	t->tca__pad2 = 0;
 
 	nest = nla_nest_start(skb, TCA_ACT_TAB);
-	if (nest == NULL)
+	if (!nest) {
+		NL_SET_ERR_MSG(extack, "Failed to add new netlink message");
 		goto out_module_put;
+	}
 
-	err = ops->walk(net, skb, &dcb, RTM_DELACTION, ops);
-	if (err <= 0)
+	err = ops->walk(net, skb, &dcb, RTM_DELACTION, ops, extack);
+	if (err <= 0) {
+		nla_nest_cancel(skb, nest);
 		goto out_module_put;
+	}
 
 	nla_nest_end(skb, nest);
 
@@ -950,6 +1021,8 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
 			     n->nlmsg_flags & NLM_F_ECHO);
 	if (err > 0)
 		return 0;
+	if (err < 0)
+		NL_SET_ERR_MSG(extack, "Failed to send TC action flush notification");
 
 	return err;
 
@@ -962,17 +1035,19 @@ err_out:
 
 static int
 tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
-	       u32 portid)
+	       u32 portid, size_t attr_size, struct netlink_ext_ack *extack)
 {
 	int ret;
 	struct sk_buff *skb;
 
-	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	skb = alloc_skb(attr_size <= NLMSG_GOODSIZE ? NLMSG_GOODSIZE : attr_size,
+			GFP_KERNEL);
 	if (!skb)
 		return -ENOBUFS;
 
 	if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, RTM_DELACTION,
 			 0, 1) <= 0) {
+		NL_SET_ERR_MSG(extack, "Failed to fill netlink TC action attributes");
 		kfree_skb(skb);
 		return -EINVAL;
 	}
@@ -980,6 +1055,7 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
 	/* now do the delete */
 	ret = tcf_action_destroy(actions, 0);
 	if (ret < 0) {
+		NL_SET_ERR_MSG(extack, "Failed to delete TC action");
 		kfree_skb(skb);
 		return ret;
 	}
@@ -993,38 +1069,43 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
 
 static int
 tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
-	      u32 portid, int event)
+	      u32 portid, int event, struct netlink_ext_ack *extack)
 {
 	int i, ret;
 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
 	struct tc_action *act;
+	size_t attr_size = 0;
 	LIST_HEAD(actions);
 
-	ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, NULL);
+	ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack);
 	if (ret < 0)
 		return ret;
 
 	if (event == RTM_DELACTION && n->nlmsg_flags & NLM_F_ROOT) {
-		if (tb[1] != NULL)
-			return tca_action_flush(net, tb[1], n, portid);
-		else
-			return -EINVAL;
+		if (tb[1])
+			return tca_action_flush(net, tb[1], n, portid, extack);
+
+		NL_SET_ERR_MSG(extack, "Invalid netlink attributes while flushing TC action");
+		return -EINVAL;
 	}
 
 	for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
-		act = tcf_action_get_1(net, tb[i], n, portid);
+		act = tcf_action_get_1(net, tb[i], n, portid, extack);
 		if (IS_ERR(act)) {
 			ret = PTR_ERR(act);
 			goto err;
 		}
 		act->order = i;
+		attr_size += tcf_action_fill_size(act);
 		list_add_tail(&act->list, &actions);
 	}
 
+	attr_size = tcf_action_full_attrs_size(attr_size);
+
 	if (event == RTM_GETACTION)
-		ret = tcf_get_notify(net, portid, n, &actions, event);
+		ret = tcf_get_notify(net, portid, n, &actions, event, extack);
 	else { /* delete */
-		ret = tcf_del_notify(net, n, &actions, portid);
+		ret = tcf_del_notify(net, n, &actions, portid, attr_size, extack);
 		if (ret)
 			goto err;
 		return ret;
@@ -1037,17 +1118,19 @@ err:
 
 static int
 tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
-	       u32 portid)
+	       u32 portid, size_t attr_size, struct netlink_ext_ack *extack)
 {
 	struct sk_buff *skb;
 	int err = 0;
 
-	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	skb = alloc_skb(attr_size <= NLMSG_GOODSIZE ? NLMSG_GOODSIZE : attr_size,
+			GFP_KERNEL);
 	if (!skb)
 		return -ENOBUFS;
 
 	if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, n->nlmsg_flags,
 			 RTM_NEWACTION, 0, 0) <= 0) {
+		NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while adding TC action");
 		kfree_skb(skb);
 		return -EINVAL;
 	}
@@ -1060,16 +1143,19 @@ tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
 }
 
 static int tcf_action_add(struct net *net, struct nlattr *nla,
-			  struct nlmsghdr *n, u32 portid, int ovr)
+			  struct nlmsghdr *n, u32 portid, int ovr,
+			  struct netlink_ext_ack *extack)
 {
+	size_t attr_size = 0;
 	int ret = 0;
 	LIST_HEAD(actions);
 
-	ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, &actions);
+	ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, &actions,
+			      &attr_size, extack);
 	if (ret)
 		return ret;
 
-	return tcf_add_notify(net, n, &actions, portid);
+	return tcf_add_notify(net, n, &actions, portid, attr_size, extack);
 }
 
 static u32 tcaa_root_flags_allowed = TCA_FLAG_LARGE_DUMP_ON;
@@ -1097,7 +1183,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
 		return ret;
 
 	if (tca[TCA_ACT_TAB] == NULL) {
-		pr_notice("tc_ctl_action: received NO action attribs\n");
+		NL_SET_ERR_MSG(extack, "Netlink action attributes missing");
 		return -EINVAL;
 	}
 
@@ -1113,17 +1199,18 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
 		if (n->nlmsg_flags & NLM_F_REPLACE)
 			ovr = 1;
 replay:
-		ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr);
+		ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr,
+				     extack);
 		if (ret == -EAGAIN)
 			goto replay;
 		break;
 	case RTM_DELACTION:
 		ret = tca_action_gd(net, tca[TCA_ACT_TAB], n,
-				    portid, RTM_DELACTION);
+				    portid, RTM_DELACTION, extack);
 		break;
 	case RTM_GETACTION:
 		ret = tca_action_gd(net, tca[TCA_ACT_TAB], n,
-				    portid, RTM_GETACTION);
+				    portid, RTM_GETACTION, extack);
 		break;
 	default:
 		BUG();
@@ -1218,7 +1305,7 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	if (nest == NULL)
 		goto out_module_put;
 
-	ret = a_o->walk(net, skb, cb, RTM_GETACTION, a_o);
+	ret = a_o->walk(net, skb, cb, RTM_GETACTION, a_o, NULL);
 	if (ret < 0)
 		goto out_module_put;
 
@@ -1454,6 +1541,7 @@ static struct pernet_operations tcf_action_net_ops = {
 	.exit = tcf_action_net_exit,
 	.id = &tcf_action_net_id,
 	.size = sizeof(struct tcf_action_net),
+	.async = true,
 };
 
 static int __init tc_action_init(void)
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 9d2cabf1dc7e..5cb9b268e8ff 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -272,7 +272,7 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog,
 
 static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 			struct nlattr *est, struct tc_action **act,
-			int replace, int bind)
+			int replace, int bind, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, bpf_net_id);
 	struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
@@ -367,14 +367,16 @@ static void tcf_bpf_cleanup(struct tc_action *act)
 
 static int tcf_bpf_walker(struct net *net, struct sk_buff *skb,
 			  struct netlink_callback *cb, int type,
-			  const struct tc_action_ops *ops)
+			  const struct tc_action_ops *ops,
+			  struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, bpf_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index,
+			  struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, bpf_net_id);
 
@@ -411,6 +413,7 @@ static struct pernet_operations bpf_net_ops = {
 	.exit_batch = bpf_exit_net,
 	.id   = &bpf_net_id,
 	.size = sizeof(struct tc_action_net),
+	.async = true,
 };
 
 static int __init bpf_init_module(void)
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 2b15ba84e0c8..371e5e4ab3e2 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -96,7 +96,8 @@ static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = {
 
 static int tcf_connmark_init(struct net *net, struct nlattr *nla,
 			     struct nlattr *est, struct tc_action **a,
-			     int ovr, int bind)
+			     int ovr, int bind,
+			     struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, connmark_net_id);
 	struct nlattr *tb[TCA_CONNMARK_MAX + 1];
@@ -176,14 +177,16 @@ nla_put_failure:
 
 static int tcf_connmark_walker(struct net *net, struct sk_buff *skb,
 			       struct netlink_callback *cb, int type,
-			       const struct tc_action_ops *ops)
+			       const struct tc_action_ops *ops,
+			       struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, connmark_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index,
+			       struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, connmark_net_id);
 
@@ -219,6 +222,7 @@ static struct pernet_operations connmark_net_ops = {
 	.exit_batch = connmark_exit_net,
 	.id   = &connmark_net_id,
 	.size = sizeof(struct tc_action_net),
+	.async = true,
 };
 
 static int __init connmark_init_module(void)
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 2a5c8fd860cf..a527e287c086 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -46,7 +46,7 @@ static struct tc_action_ops act_csum_ops;
 
 static int tcf_csum_init(struct net *net, struct nlattr *nla,
 			 struct nlattr *est, struct tc_action **a, int ovr,
-			 int bind)
+			 int bind, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, csum_net_id);
 	struct tcf_csum_params *params_old, *params_new;
@@ -632,14 +632,16 @@ static void tcf_csum_cleanup(struct tc_action *a)
 
 static int tcf_csum_walker(struct net *net, struct sk_buff *skb,
 			   struct netlink_callback *cb, int type,
-			   const struct tc_action_ops *ops)
+			   const struct tc_action_ops *ops,
+			   struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, csum_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index,
+			   struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, csum_net_id);
 
@@ -676,6 +678,7 @@ static struct pernet_operations csum_net_ops = {
 	.exit_batch = csum_exit_net,
 	.id   = &csum_net_id,
 	.size = sizeof(struct tc_action_net),
+	.async = true,
 };
 
 MODULE_DESCRIPTION("Checksum updating actions");
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index b56986d41c87..88fbb8403565 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -56,7 +56,7 @@ static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = {
 
 static int tcf_gact_init(struct net *net, struct nlattr *nla,
 			 struct nlattr *est, struct tc_action **a,
-			 int ovr, int bind)
+			 int ovr, int bind, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, gact_net_id);
 	struct nlattr *tb[TCA_GACT_MAX + 1];
@@ -201,20 +201,35 @@ nla_put_failure:
 
 static int tcf_gact_walker(struct net *net, struct sk_buff *skb,
 			   struct netlink_callback *cb, int type,
-			   const struct tc_action_ops *ops)
+			   const struct tc_action_ops *ops,
+			   struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, gact_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_gact_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_gact_search(struct net *net, struct tc_action **a, u32 index,
+			   struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, gact_net_id);
 
 	return tcf_idr_search(tn, a, index);
 }
 
+static size_t tcf_gact_get_fill_size(const struct tc_action *act)
+{
+	size_t sz = nla_total_size(sizeof(struct tc_gact)); /* TCA_GACT_PARMS */
+
+#ifdef CONFIG_GACT_PROB
+	if (to_gact(act)->tcfg_ptype)
+		/* TCA_GACT_PROB */
+		sz += nla_total_size(sizeof(struct tc_gact_p));
+#endif
+
+	return sz;
+}
+
 static struct tc_action_ops act_gact_ops = {
 	.kind		=	"gact",
 	.type		=	TCA_ACT_GACT,
@@ -225,6 +240,7 @@ static struct tc_action_ops act_gact_ops = {
 	.init		=	tcf_gact_init,
 	.walk		=	tcf_gact_walker,
 	.lookup		=	tcf_gact_search,
+	.get_fill_size	=	tcf_gact_get_fill_size,
 	.size		=	sizeof(struct tcf_gact),
 };
 
@@ -245,6 +261,7 @@ static struct pernet_operations gact_net_ops = {
 	.exit_batch = gact_exit_net,
 	.id   = &gact_net_id,
 	.size = sizeof(struct tc_action_net),
+	.async = true,
 };
 
 MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 5954e992685a..555b1caeff72 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -447,7 +447,7 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb,
 
 static int tcf_ife_init(struct net *net, struct nlattr *nla,
 			struct nlattr *est, struct tc_action **a,
-			int ovr, int bind)
+			int ovr, int bind, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, ife_net_id);
 	struct nlattr *tb[TCA_IFE_MAX + 1];
@@ -824,14 +824,16 @@ static int tcf_ife_act(struct sk_buff *skb, const struct tc_action *a,
 
 static int tcf_ife_walker(struct net *net, struct sk_buff *skb,
 			  struct netlink_callback *cb, int type,
-			  const struct tc_action_ops *ops)
+			  const struct tc_action_ops *ops,
+			  struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, ife_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index,
+			  struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, ife_net_id);
 
@@ -868,6 +870,7 @@ static struct pernet_operations ife_net_ops = {
 	.exit_batch = ife_exit_net,
 	.id   = &ife_net_id,
 	.size = sizeof(struct tc_action_net),
+	.async = true,
 };
 
 static int __init ife_init_module(void)
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 7e06b9b62613..b5e8565b89c7 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -196,7 +196,7 @@ err1:
 
 static int tcf_ipt_init(struct net *net, struct nlattr *nla,
 			struct nlattr *est, struct tc_action **a, int ovr,
-			int bind)
+			int bind, struct netlink_ext_ack *extack)
 {
 	return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, ovr,
 			      bind);
@@ -204,7 +204,7 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla,
 
 static int tcf_xt_init(struct net *net, struct nlattr *nla,
 		       struct nlattr *est, struct tc_action **a, int ovr,
-		       int bind)
+		       int bind, struct netlink_ext_ack *extack)
 {
 	return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, ovr,
 			      bind);
@@ -306,14 +306,16 @@ nla_put_failure:
 
 static int tcf_ipt_walker(struct net *net, struct sk_buff *skb,
 			  struct netlink_callback *cb, int type,
-			  const struct tc_action_ops *ops)
+			  const struct tc_action_ops *ops,
+			  struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, ipt_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index,
+			  struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, ipt_net_id);
 
@@ -350,18 +352,21 @@ static struct pernet_operations ipt_net_ops = {
 	.exit_batch = ipt_exit_net,
 	.id   = &ipt_net_id,
 	.size = sizeof(struct tc_action_net),
+	.async = true,
 };
 
 static int tcf_xt_walker(struct net *net, struct sk_buff *skb,
 			 struct netlink_callback *cb, int type,
-			 const struct tc_action_ops *ops)
+			 const struct tc_action_ops *ops,
+			 struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, xt_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index,
+			 struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, xt_net_id);
 
@@ -398,6 +403,7 @@ static struct pernet_operations xt_net_ops = {
 	.exit_batch = xt_exit_net,
 	.id   = &xt_net_id,
 	.size = sizeof(struct tc_action_net),
+	.async = true,
 };
 
 MODULE_AUTHOR("Jamal Hadi Salim(2002-13)");
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index e6ff88f72900..64c86579c3d9 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -69,7 +69,7 @@ static struct tc_action_ops act_mirred_ops;
 
 static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 			   struct nlattr *est, struct tc_action **a, int ovr,
-			   int bind)
+			   int bind, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, mirred_net_id);
 	struct nlattr *tb[TCA_MIRRED_MAX + 1];
@@ -80,13 +80,17 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 	bool exists = false;
 	int ret;
 
-	if (nla == NULL)
+	if (!nla) {
+		NL_SET_ERR_MSG_MOD(extack, "Mirred requires attributes to be passed");
 		return -EINVAL;
-	ret = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, mirred_policy, NULL);
+	}
+	ret = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, mirred_policy, extack);
 	if (ret < 0)
 		return ret;
-	if (tb[TCA_MIRRED_PARMS] == NULL)
+	if (!tb[TCA_MIRRED_PARMS]) {
+		NL_SET_ERR_MSG_MOD(extack, "Missing required mirred parameters");
 		return -EINVAL;
+	}
 	parm = nla_data(tb[TCA_MIRRED_PARMS]);
 
 	exists = tcf_idr_check(tn, parm->index, a, bind);
@@ -102,6 +106,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 	default:
 		if (exists)
 			tcf_idr_release(*a, bind);
+		NL_SET_ERR_MSG_MOD(extack, "Unknown mirred option");
 		return -EINVAL;
 	}
 	if (parm->ifindex) {
@@ -117,8 +122,10 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 	}
 
 	if (!exists) {
-		if (dev == NULL)
+		if (!dev) {
+			NL_SET_ERR_MSG_MOD(extack, "Specified device does not exist");
 			return -EINVAL;
+		}
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_mirred_ops, bind, true);
 		if (ret)
@@ -265,14 +272,16 @@ nla_put_failure:
 
 static int tcf_mirred_walker(struct net *net, struct sk_buff *skb,
 			     struct netlink_callback *cb, int type,
-			     const struct tc_action_ops *ops)
+			     const struct tc_action_ops *ops,
+			     struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, mirred_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_mirred_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_mirred_search(struct net *net, struct tc_action **a, u32 index,
+			     struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, mirred_net_id);
 
@@ -344,6 +353,7 @@ static struct pernet_operations mirred_net_ops = {
 	.exit_batch = mirred_exit_net,
 	.id   = &mirred_net_id,
 	.size = sizeof(struct tc_action_net),
+	.async = true,
 };
 
 MODULE_AUTHOR("Jamal Hadi Salim(2002)");
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 98c6a4b2f523..b1bc757f6491 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -37,7 +37,8 @@ static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
 };
 
 static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
-			struct tc_action **a, int ovr, int bind)
+			struct tc_action **a, int ovr, int bind,
+			struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, nat_net_id);
 	struct nlattr *tb[TCA_NAT_MAX + 1];
@@ -277,14 +278,16 @@ nla_put_failure:
 
 static int tcf_nat_walker(struct net *net, struct sk_buff *skb,
 			  struct netlink_callback *cb, int type,
-			  const struct tc_action_ops *ops)
+			  const struct tc_action_ops *ops,
+			  struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, nat_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index,
+			  struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, nat_net_id);
 
@@ -320,6 +323,7 @@ static struct pernet_operations nat_net_ops = {
 	.exit_batch = nat_exit_net,
 	.id   = &nat_net_id,
 	.size = sizeof(struct tc_action_net),
+	.async = true,
 };
 
 MODULE_DESCRIPTION("Stateless NAT actions");
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index fef08835f26d..f392ccaaa0d8 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -132,7 +132,7 @@ static int tcf_pedit_key_ex_dump(struct sk_buff *skb,
 
 static int tcf_pedit_init(struct net *net, struct nlattr *nla,
 			  struct nlattr *est, struct tc_action **a,
-			  int ovr, int bind)
+			  int ovr, int bind, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, pedit_net_id);
 	struct nlattr *tb[TCA_PEDIT_MAX + 1];
@@ -419,14 +419,16 @@ nla_put_failure:
 
 static int tcf_pedit_walker(struct net *net, struct sk_buff *skb,
 			    struct netlink_callback *cb, int type,
-			    const struct tc_action_ops *ops)
+			    const struct tc_action_ops *ops,
+			    struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, pedit_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index,
+			    struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, pedit_net_id);
 
@@ -463,6 +465,7 @@ static struct pernet_operations pedit_net_ops = {
 	.exit_batch = pedit_exit_net,
 	.id   = &pedit_net_id,
 	.size = sizeof(struct tc_action_net),
+	.async = true,
 };
 
 MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index faebf82b99f1..7081ec75e696 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -58,11 +58,12 @@ static struct tc_action_ops act_police_ops;
 
 static int tcf_act_police_walker(struct net *net, struct sk_buff *skb,
 				 struct netlink_callback *cb, int type,
-				 const struct tc_action_ops *ops)
+				 const struct tc_action_ops *ops,
+				 struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, police_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
 static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
@@ -74,7 +75,8 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
 
 static int tcf_act_police_init(struct net *net, struct nlattr *nla,
 			       struct nlattr *est, struct tc_action **a,
-			       int ovr, int bind)
+			       int ovr, int bind,
+			       struct netlink_ext_ack *extack)
 {
 	int ret = 0, err;
 	struct nlattr *tb[TCA_POLICE_MAX + 1];
@@ -304,7 +306,8 @@ nla_put_failure:
 	return -1;
 }
 
-static int tcf_police_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_police_search(struct net *net, struct tc_action **a, u32 index,
+			     struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, police_net_id);
 
@@ -344,6 +347,7 @@ static struct pernet_operations police_net_ops = {
 	.exit_batch = police_exit_net,
 	.id   = &police_net_id,
 	.size = sizeof(struct tc_action_net),
+	.async = true,
 };
 
 static int __init police_init_module(void)
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 74c5d7e6a0fa..3a89f98f17e6 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -37,7 +37,7 @@ static const struct nla_policy sample_policy[TCA_SAMPLE_MAX + 1] = {
 
 static int tcf_sample_init(struct net *net, struct nlattr *nla,
 			   struct nlattr *est, struct tc_action **a, int ovr,
-			   int bind)
+			   int bind, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, sample_net_id);
 	struct nlattr *tb[TCA_SAMPLE_MAX + 1];
@@ -203,14 +203,16 @@ nla_put_failure:
 
 static int tcf_sample_walker(struct net *net, struct sk_buff *skb,
 			     struct netlink_callback *cb, int type,
-			     const struct tc_action_ops *ops)
+			     const struct tc_action_ops *ops,
+			     struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, sample_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index,
+			     struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, sample_net_id);
 
@@ -247,6 +249,7 @@ static struct pernet_operations sample_net_ops = {
 	.exit_batch = sample_exit_net,
 	.id   = &sample_net_id,
 	.size = sizeof(struct tc_action_net),
+	.async = true,
 };
 
 static int __init sample_init_module(void)
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index b1f38063ada0..e84768ae610a 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -79,7 +79,7 @@ static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = {
 
 static int tcf_simp_init(struct net *net, struct nlattr *nla,
 			 struct nlattr *est, struct tc_action **a,
-			 int ovr, int bind)
+			 int ovr, int bind, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, simp_net_id);
 	struct nlattr *tb[TCA_DEF_MAX + 1];
@@ -170,14 +170,16 @@ nla_put_failure:
 
 static int tcf_simp_walker(struct net *net, struct sk_buff *skb,
 			   struct netlink_callback *cb, int type,
-			   const struct tc_action_ops *ops)
+			   const struct tc_action_ops *ops,
+			   struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, simp_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index,
+			   struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, simp_net_id);
 
@@ -214,6 +216,7 @@ static struct pernet_operations simp_net_ops = {
 	.exit_batch = simp_exit_net,
 	.id   = &simp_net_id,
 	.size = sizeof(struct tc_action_net),
+	.async = true,
 };
 
 MODULE_AUTHOR("Jamal Hadi Salim(2005)");
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 5a3f691bb545..7971510fe61b 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -66,7 +66,7 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
 
 static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 			    struct nlattr *est, struct tc_action **a,
-			    int ovr, int bind)
+			    int ovr, int bind, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, skbedit_net_id);
 	struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
@@ -208,14 +208,16 @@ nla_put_failure:
 
 static int tcf_skbedit_walker(struct net *net, struct sk_buff *skb,
 			      struct netlink_callback *cb, int type,
-			      const struct tc_action_ops *ops)
+			      const struct tc_action_ops *ops,
+			      struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, skbedit_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index,
+			      struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, skbedit_net_id);
 
@@ -251,6 +253,7 @@ static struct pernet_operations skbedit_net_ops = {
 	.exit_batch = skbedit_exit_net,
 	.id   = &skbedit_net_id,
 	.size = sizeof(struct tc_action_net),
+	.async = true,
 };
 
 MODULE_AUTHOR("Alexander Duyck, <alexander.h.duyck@intel.com>");
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index 7b0700f52b50..142a996ac776 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -84,7 +84,7 @@ static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = {
 
 static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
 			   struct nlattr *est, struct tc_action **a,
-			   int ovr, int bind)
+			   int ovr, int bind, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
 	struct nlattr *tb[TCA_SKBMOD_MAX + 1];
@@ -233,14 +233,16 @@ nla_put_failure:
 
 static int tcf_skbmod_walker(struct net *net, struct sk_buff *skb,
 			     struct netlink_callback *cb, int type,
-			     const struct tc_action_ops *ops)
+			     const struct tc_action_ops *ops,
+			     struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index,
+			     struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
 
@@ -277,6 +279,7 @@ static struct pernet_operations skbmod_net_ops = {
 	.exit_batch = skbmod_exit_net,
 	.id   = &skbmod_net_id,
 	.size = sizeof(struct tc_action_net),
+	.async = true,
 };
 
 MODULE_AUTHOR("Jamal Hadi Salim, <jhs@mojatatu.com>");
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 1281ca463727..a1c8dd406a04 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -70,7 +70,7 @@ static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = {
 
 static int tunnel_key_init(struct net *net, struct nlattr *nla,
 			   struct nlattr *est, struct tc_action **a,
-			   int ovr, int bind)
+			   int ovr, int bind, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
 	struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1];
@@ -293,14 +293,16 @@ nla_put_failure:
 
 static int tunnel_key_walker(struct net *net, struct sk_buff *skb,
 			     struct netlink_callback *cb, int type,
-			     const struct tc_action_ops *ops)
+			     const struct tc_action_ops *ops,
+			     struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index)
+static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index,
+			     struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
 
@@ -337,6 +339,7 @@ static struct pernet_operations tunnel_key_net_ops = {
 	.exit_batch = tunnel_key_exit_net,
 	.id   = &tunnel_key_net_id,
 	.size = sizeof(struct tc_action_net),
+	.async = true,
 };
 
 static int __init tunnel_key_init_module(void)
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index c49cb61adedf..4595391c2129 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -109,7 +109,7 @@ static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = {
 
 static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 			 struct nlattr *est, struct tc_action **a,
-			 int ovr, int bind)
+			 int ovr, int bind, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, vlan_net_id);
 	struct nlattr *tb[TCA_VLAN_MAX + 1];
@@ -268,14 +268,16 @@ nla_put_failure:
 
 static int tcf_vlan_walker(struct net *net, struct sk_buff *skb,
 			   struct netlink_callback *cb, int type,
-			   const struct tc_action_ops *ops)
+			   const struct tc_action_ops *ops,
+			   struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, vlan_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index,
+			   struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, vlan_net_id);
 
@@ -312,6 +314,7 @@ static struct pernet_operations vlan_net_ops = {
 	.exit_batch = vlan_exit_net,
 	.id   = &vlan_net_id,
 	.size = sizeof(struct tc_action_net),
+	.async = true,
 };
 
 static int __init vlan_init_module(void)
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 247b7cc20c13..ec5fe8ec0c3e 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1433,11 +1433,12 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 #ifdef CONFIG_NET_CLS_ACT
 	{
 		struct tc_action *act;
+		size_t attr_size = 0;
 
 		if (exts->police && tb[exts->police]) {
 			act = tcf_action_init_1(net, tp, tb[exts->police],
 						rate_tlv, "police", ovr,
-						TCA_ACT_BIND);
+						TCA_ACT_BIND, extack);
 			if (IS_ERR(act))
 				return PTR_ERR(act);
 
@@ -1450,7 +1451,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 
 			err = tcf_action_init(net, tp, tb[exts->action],
 					      rate_tlv, NULL, ovr, TCA_ACT_BIND,
-					      &actions);
+					      &actions, &attr_size, extack);
 			if (err)
 				return err;
 			list_for_each_entry(act, &actions, list)
@@ -1618,6 +1619,7 @@ static struct pernet_operations tcf_net_ops = {
 	.exit = tcf_net_exit,
 	.id   = &tcf_net_id,
 	.size = sizeof(struct tcf_net),
+	.async = true,
 };
 
 static int __init tc_filter_init(void)
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 7d0ce2c40f93..d964e60c730e 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -511,6 +511,9 @@ static int fl_set_key_flags(struct nlattr **tb,
 
 	fl_set_key_flag(key, mask, flags_key, flags_mask,
 			TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, FLOW_DIS_IS_FRAGMENT);
+	fl_set_key_flag(key, mask, flags_key, flags_mask,
+			TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST,
+			FLOW_DIS_FIRST_FRAG);
 
 	return 0;
 }
@@ -1130,6 +1133,9 @@ static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask)
 
 	fl_get_key_flag(flags_key, flags_mask, &key, &mask,
 			TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, FLOW_DIS_IS_FRAGMENT);
+	fl_get_key_flag(flags_key, flags_mask, &key, &mask,
+			TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST,
+			FLOW_DIS_FIRST_FRAG);
 
 	_key = cpu_to_be32(key);
 	_mask = cpu_to_be32(mask);
diff --git a/net/sched/em_ipt.c b/net/sched/em_ipt.c
new file mode 100644
index 000000000000..a5f34e930eff
--- /dev/null
+++ b/net/sched/em_ipt.c
@@ -0,0 +1,257 @@
+/*
+ * net/sched/em_ipt.c IPtables matches Ematch
+ *
+ * (c) 2018 Eyal Birger <eyal.birger@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/gfp.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <linux/tc_ematch/tc_em_ipt.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <net/pkt_cls.h>
+
+struct em_ipt_match {
+	const struct xt_match *match;
+	u32 hook;
+	u8 match_data[0] __aligned(8);
+};
+
+struct em_ipt_xt_match {
+	char *match_name;
+	int (*validate_match_data)(struct nlattr **tb, u8 mrev);
+};
+
+static const struct nla_policy em_ipt_policy[TCA_EM_IPT_MAX + 1] = {
+	[TCA_EM_IPT_MATCH_NAME]		= { .type = NLA_STRING,
+					    .len = XT_EXTENSION_MAXNAMELEN },
+	[TCA_EM_IPT_MATCH_REVISION]	= { .type = NLA_U8 },
+	[TCA_EM_IPT_HOOK]		= { .type = NLA_U32 },
+	[TCA_EM_IPT_NFPROTO]		= { .type = NLA_U8 },
+	[TCA_EM_IPT_MATCH_DATA]		= { .type = NLA_UNSPEC },
+};
+
+static int check_match(struct net *net, struct em_ipt_match *im, int mdata_len)
+{
+	struct xt_mtchk_param mtpar = {};
+	union {
+		struct ipt_entry e4;
+		struct ip6t_entry e6;
+	} e = {};
+
+	mtpar.net	= net;
+	mtpar.table	= "filter";
+	mtpar.hook_mask	= 1 << im->hook;
+	mtpar.family	= im->match->family;
+	mtpar.match	= im->match;
+	mtpar.entryinfo = &e;
+	mtpar.matchinfo	= (void *)im->match_data;
+	return xt_check_match(&mtpar, mdata_len, 0, 0);
+}
+
+static int policy_validate_match_data(struct nlattr **tb, u8 mrev)
+{
+	if (mrev != 0) {
+		pr_err("only policy match revision 0 supported");
+		return -EINVAL;
+	}
+
+	if (nla_get_u32(tb[TCA_EM_IPT_HOOK]) != NF_INET_PRE_ROUTING) {
+		pr_err("policy can only be matched on NF_INET_PRE_ROUTING");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static const struct em_ipt_xt_match em_ipt_xt_matches[] = {
+	{
+		.match_name = "policy",
+		.validate_match_data = policy_validate_match_data
+	},
+	{}
+};
+
+static struct xt_match *get_xt_match(struct nlattr **tb)
+{
+	const struct em_ipt_xt_match *m;
+	struct nlattr *mname_attr;
+	u8 nfproto, mrev = 0;
+	int ret;
+
+	mname_attr = tb[TCA_EM_IPT_MATCH_NAME];
+	for (m = em_ipt_xt_matches; m->match_name; m++) {
+		if (!nla_strcmp(mname_attr, m->match_name))
+			break;
+	}
+
+	if (!m->match_name) {
+		pr_err("Unsupported xt match");
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (tb[TCA_EM_IPT_MATCH_REVISION])
+		mrev = nla_get_u8(tb[TCA_EM_IPT_MATCH_REVISION]);
+
+	ret = m->validate_match_data(tb, mrev);
+	if (ret < 0)
+		return ERR_PTR(ret);
+
+	nfproto = nla_get_u8(tb[TCA_EM_IPT_NFPROTO]);
+	return xt_request_find_match(nfproto, m->match_name, mrev);
+}
+
+static int em_ipt_change(struct net *net, void *data, int data_len,
+			 struct tcf_ematch *em)
+{
+	struct nlattr *tb[TCA_EM_IPT_MAX + 1];
+	struct em_ipt_match *im = NULL;
+	struct xt_match *match;
+	int mdata_len, ret;
+
+	ret = nla_parse(tb, TCA_EM_IPT_MAX, data, data_len, em_ipt_policy,
+			NULL);
+	if (ret < 0)
+		return ret;
+
+	if (!tb[TCA_EM_IPT_HOOK] || !tb[TCA_EM_IPT_MATCH_NAME] ||
+	    !tb[TCA_EM_IPT_MATCH_DATA] || !tb[TCA_EM_IPT_NFPROTO])
+		return -EINVAL;
+
+	match = get_xt_match(tb);
+	if (IS_ERR(match)) {
+		pr_err("unable to load match\n");
+		return PTR_ERR(match);
+	}
+
+	mdata_len = XT_ALIGN(nla_len(tb[TCA_EM_IPT_MATCH_DATA]));
+	im = kzalloc(sizeof(*im) + mdata_len, GFP_KERNEL);
+	if (!im) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	im->match = match;
+	im->hook = nla_get_u32(tb[TCA_EM_IPT_HOOK]);
+	nla_memcpy(im->match_data, tb[TCA_EM_IPT_MATCH_DATA], mdata_len);
+
+	ret = check_match(net, im, mdata_len);
+	if (ret)
+		goto err;
+
+	em->datalen = sizeof(*im) + mdata_len;
+	em->data = (unsigned long)im;
+	return 0;
+
+err:
+	kfree(im);
+	module_put(match->me);
+	return ret;
+}
+
+static void em_ipt_destroy(struct tcf_ematch *em)
+{
+	struct em_ipt_match *im = (void *)em->data;
+
+	if (!im)
+		return;
+
+	if (im->match->destroy) {
+		struct xt_mtdtor_param par = {
+			.net = em->net,
+			.match = im->match,
+			.matchinfo = im->match_data,
+			.family = im->match->family
+		};
+		im->match->destroy(&par);
+	}
+	module_put(im->match->me);
+	kfree((void *)im);
+}
+
+static int em_ipt_match(struct sk_buff *skb, struct tcf_ematch *em,
+			struct tcf_pkt_info *info)
+{
+	const struct em_ipt_match *im = (const void *)em->data;
+	struct xt_action_param acpar = {};
+	struct net_device *indev = NULL;
+	struct nf_hook_state state;
+	int ret;
+
+	rcu_read_lock();
+
+	if (skb->skb_iif)
+		indev = dev_get_by_index_rcu(em->net, skb->skb_iif);
+
+	nf_hook_state_init(&state, im->hook, im->match->family,
+			   indev ?: skb->dev, skb->dev, NULL, em->net, NULL);
+
+	acpar.match = im->match;
+	acpar.matchinfo = im->match_data;
+	acpar.state = &state;
+
+	ret = im->match->match(skb, &acpar);
+
+	rcu_read_unlock();
+	return ret;
+}
+
+static int em_ipt_dump(struct sk_buff *skb, struct tcf_ematch *em)
+{
+	struct em_ipt_match *im = (void *)em->data;
+
+	if (nla_put_string(skb, TCA_EM_IPT_MATCH_NAME, im->match->name) < 0)
+		return -EMSGSIZE;
+	if (nla_put_u32(skb, TCA_EM_IPT_HOOK, im->hook) < 0)
+		return -EMSGSIZE;
+	if (nla_put_u8(skb, TCA_EM_IPT_MATCH_REVISION, im->match->revision) < 0)
+		return -EMSGSIZE;
+	if (nla_put_u8(skb, TCA_EM_IPT_NFPROTO, im->match->family) < 0)
+		return -EMSGSIZE;
+	if (nla_put(skb, TCA_EM_IPT_MATCH_DATA,
+		    im->match->usersize ?: im->match->matchsize,
+		    im->match_data) < 0)
+		return -EMSGSIZE;
+
+	return 0;
+}
+
+static struct tcf_ematch_ops em_ipt_ops = {
+	.kind	  = TCF_EM_IPT,
+	.change	  = em_ipt_change,
+	.destroy  = em_ipt_destroy,
+	.match	  = em_ipt_match,
+	.dump	  = em_ipt_dump,
+	.owner	  = THIS_MODULE,
+	.link	  = LIST_HEAD_INIT(em_ipt_ops.link)
+};
+
+static int __init init_em_ipt(void)
+{
+	return tcf_em_register(&em_ipt_ops);
+}
+
+static void __exit exit_em_ipt(void)
+{
+	tcf_em_unregister(&em_ipt_ops);
+}
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Eyal Birger <eyal.birger@gmail.com>");
+MODULE_DESCRIPTION("TC extended match for IPtables matches");
+
+module_init(init_em_ipt);
+module_exit(exit_em_ipt);
+
+MODULE_ALIAS_TCF_EMATCH(TCF_EM_IPT);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index d512f49ee83c..68f9d942bed4 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -739,6 +739,7 @@ static u32 qdisc_alloc_handle(struct net_device *dev)
 void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
 			       unsigned int len)
 {
+	bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
 	const struct Qdisc_class_ops *cops;
 	unsigned long cl;
 	u32 parentid;
@@ -760,8 +761,12 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
 		 * If child was empty even before update then backlog
 		 * counter is screwed and we skip notification because
 		 * parent class is already passive.
+		 *
+		 * If the original child was offloaded then it is allowed
+		 * to be seem as empty, so the parent is notified anyway.
 		 */
-		notify = !sch->q.qlen && !WARN_ON_ONCE(!n);
+		notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
+						       !qdisc_is_offloaded);
 		/* TODO: perform the search on a per txq basis */
 		sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
 		if (sch == NULL) {
@@ -2128,6 +2133,7 @@ static void __net_exit psched_net_exit(struct net *net)
 static struct pernet_operations psched_net_ops = {
 	.init = psched_net_init,
 	.exit = psched_net_exit,
+	.async = true,
 };
 
 static int __init pktsched_init(void)
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index efbf51f35778..222e53d3d27a 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -142,9 +142,8 @@ prio_reset(struct Qdisc *sch)
 	sch->q.qlen = 0;
 }
 
-static int prio_offload(struct Qdisc *sch, bool enable)
+static int prio_offload(struct Qdisc *sch, struct tc_prio_qopt *qopt)
 {
-	struct prio_sched_data *q = qdisc_priv(sch);
 	struct net_device *dev = qdisc_dev(sch);
 	struct tc_prio_qopt_offload opt = {
 		.handle = sch->handle,
@@ -154,10 +153,10 @@ static int prio_offload(struct Qdisc *sch, bool enable)
 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
 		return -EOPNOTSUPP;
 
-	if (enable) {
+	if (qopt) {
 		opt.command = TC_PRIO_REPLACE;
-		opt.replace_params.bands = q->bands;
-		memcpy(&opt.replace_params.priomap, q->prio2band,
+		opt.replace_params.bands = qopt->bands;
+		memcpy(&opt.replace_params.priomap, qopt->priomap,
 		       TC_PRIO_MAX + 1);
 		opt.replace_params.qstats = &sch->qstats;
 	} else {
@@ -174,7 +173,7 @@ prio_destroy(struct Qdisc *sch)
 	struct prio_sched_data *q = qdisc_priv(sch);
 
 	tcf_block_put(q->block);
-	prio_offload(sch, false);
+	prio_offload(sch, NULL);
 	for (prio = 0; prio < q->bands; prio++)
 		qdisc_destroy(q->queues[prio]);
 }
@@ -211,6 +210,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
 		}
 	}
 
+	prio_offload(sch, qopt);
 	sch_tree_lock(sch);
 	q->bands = qopt->bands;
 	memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
@@ -230,7 +230,6 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
 	}
 
 	sch_tree_unlock(sch);
-	prio_offload(sch, true);
 	return 0;
 }
 
@@ -309,12 +308,44 @@ static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 		      struct Qdisc **old, struct netlink_ext_ack *extack)
 {
 	struct prio_sched_data *q = qdisc_priv(sch);
+	struct tc_prio_qopt_offload graft_offload;
+	struct net_device *dev = qdisc_dev(sch);
 	unsigned long band = arg - 1;
+	bool any_qdisc_is_offloaded;
+	int err;
 
 	if (new == NULL)
 		new = &noop_qdisc;
 
 	*old = qdisc_replace(sch, new, &q->queues[band]);
+
+	if (!tc_can_offload(dev))
+		return 0;
+
+	graft_offload.handle = sch->handle;
+	graft_offload.parent = sch->parent;
+	graft_offload.graft_params.band = band;
+	graft_offload.graft_params.child_handle = new->handle;
+	graft_offload.command = TC_PRIO_GRAFT;
+
+	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_PRIO,
+					    &graft_offload);
+
+	/* Don't report error if the graft is part of destroy operation. */
+	if (err && new != &noop_qdisc) {
+		/* Don't report error if the parent, the old child and the new
+		 * one are not offloaded.
+		 */
+		any_qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
+		any_qdisc_is_offloaded |= new->flags & TCQ_F_OFFLOADED;
+		if (*old)
+			any_qdisc_is_offloaded |= (*old)->flags &
+						   TCQ_F_OFFLOADED;
+
+		if (any_qdisc_is_offloaded)
+			NL_SET_ERR_MSG(extack, "Offloading graft operation failed.");
+	}
+
 	return 0;
 }
 
diff --git a/net/sctp/Makefile b/net/sctp/Makefile
index 6776582ec449..e845e4588535 100644
--- a/net/sctp/Makefile
+++ b/net/sctp/Makefile
@@ -15,6 +15,8 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
 	  offload.o stream_sched.o stream_sched_prio.o \
 	  stream_sched_rr.o stream_interleave.o
 
+sctp_diag-y := diag.o
+
 sctp-$(CONFIG_SCTP_DBG_OBJCNT) += objcnt.o
 sctp-$(CONFIG_PROC_FS) += proc.o
 sctp-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 00667c50efa7..e64630cd3331 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -101,13 +101,14 @@ struct sctp_shared_key *sctp_auth_shkey_create(__u16 key_id, gfp_t gfp)
 		return NULL;
 
 	INIT_LIST_HEAD(&new->key_list);
+	refcount_set(&new->refcnt, 1);
 	new->key_id = key_id;
 
 	return new;
 }
 
 /* Free the shared key structure */
-static void sctp_auth_shkey_free(struct sctp_shared_key *sh_key)
+static void sctp_auth_shkey_destroy(struct sctp_shared_key *sh_key)
 {
 	BUG_ON(!list_empty(&sh_key->key_list));
 	sctp_auth_key_put(sh_key->key);
@@ -115,6 +116,17 @@ static void sctp_auth_shkey_free(struct sctp_shared_key *sh_key)
 	kfree(sh_key);
 }
 
+void sctp_auth_shkey_release(struct sctp_shared_key *sh_key)
+{
+	if (refcount_dec_and_test(&sh_key->refcnt))
+		sctp_auth_shkey_destroy(sh_key);
+}
+
+void sctp_auth_shkey_hold(struct sctp_shared_key *sh_key)
+{
+	refcount_inc(&sh_key->refcnt);
+}
+
 /* Destroy the entire key list.  This is done during the
  * associon and endpoint free process.
  */
@@ -128,7 +140,7 @@ void sctp_auth_destroy_keys(struct list_head *keys)
 
 	key_for_each_safe(ep_key, tmp, keys) {
 		list_del_init(&ep_key->key_list);
-		sctp_auth_shkey_free(ep_key);
+		sctp_auth_shkey_release(ep_key);
 	}
 }
 
@@ -409,13 +421,19 @@ int sctp_auth_asoc_init_active_key(struct sctp_association *asoc, gfp_t gfp)
 
 	sctp_auth_key_put(asoc->asoc_shared_key);
 	asoc->asoc_shared_key = secret;
+	asoc->shkey = ep_key;
 
 	/* Update send queue in case any chunk already in there now
 	 * needs authenticating
 	 */
 	list_for_each_entry(chunk, &asoc->outqueue.out_chunk_list, list) {
-		if (sctp_auth_send_cid(chunk->chunk_hdr->type, asoc))
+		if (sctp_auth_send_cid(chunk->chunk_hdr->type, asoc)) {
 			chunk->auth = 1;
+			if (!chunk->shkey) {
+				chunk->shkey = asoc->shkey;
+				sctp_auth_shkey_hold(chunk->shkey);
+			}
+		}
 	}
 
 	return 0;
@@ -431,8 +449,11 @@ struct sctp_shared_key *sctp_auth_get_shkey(
 
 	/* First search associations set of endpoint pair shared keys */
 	key_for_each(key, &asoc->endpoint_shared_keys) {
-		if (key->key_id == key_id)
-			return key;
+		if (key->key_id == key_id) {
+			if (!key->deactivated)
+				return key;
+			break;
+		}
 	}
 
 	return NULL;
@@ -703,16 +724,15 @@ int sctp_auth_recv_cid(enum sctp_cid chunk, const struct sctp_association *asoc)
  *    after the AUTH chunk in the SCTP packet.
  */
 void sctp_auth_calculate_hmac(const struct sctp_association *asoc,
-			      struct sk_buff *skb,
-			      struct sctp_auth_chunk *auth,
-			      gfp_t gfp)
+			      struct sk_buff *skb, struct sctp_auth_chunk *auth,
+			      struct sctp_shared_key *ep_key, gfp_t gfp)
 {
-	struct crypto_shash *tfm;
 	struct sctp_auth_bytes *asoc_key;
+	struct crypto_shash *tfm;
 	__u16 key_id, hmac_id;
-	__u8 *digest;
 	unsigned char *end;
 	int free_key = 0;
+	__u8 *digest;
 
 	/* Extract the info we need:
 	 * - hmac id
@@ -724,12 +744,7 @@ void sctp_auth_calculate_hmac(const struct sctp_association *asoc,
 	if (key_id == asoc->active_key_id)
 		asoc_key = asoc->asoc_shared_key;
 	else {
-		struct sctp_shared_key *ep_key;
-
-		ep_key = sctp_auth_get_shkey(asoc, key_id);
-		if (!ep_key)
-			return;
-
+		/* ep_key can't be NULL here */
 		asoc_key = sctp_auth_asoc_create_secret(asoc, ep_key, gfp);
 		if (!asoc_key)
 			return;
@@ -829,7 +844,7 @@ int sctp_auth_set_key(struct sctp_endpoint *ep,
 		      struct sctp_association *asoc,
 		      struct sctp_authkey *auth_key)
 {
-	struct sctp_shared_key *cur_key = NULL;
+	struct sctp_shared_key *cur_key, *shkey;
 	struct sctp_auth_bytes *key;
 	struct list_head *sh_keys;
 	int replace = 0;
@@ -842,46 +857,34 @@ int sctp_auth_set_key(struct sctp_endpoint *ep,
 	else
 		sh_keys = &ep->endpoint_shared_keys;
 
-	key_for_each(cur_key, sh_keys) {
-		if (cur_key->key_id == auth_key->sca_keynumber) {
+	key_for_each(shkey, sh_keys) {
+		if (shkey->key_id == auth_key->sca_keynumber) {
 			replace = 1;
 			break;
 		}
 	}
 
-	/* If we are not replacing a key id, we need to allocate
-	 * a shared key.
-	 */
-	if (!replace) {
-		cur_key = sctp_auth_shkey_create(auth_key->sca_keynumber,
-						 GFP_KERNEL);
-		if (!cur_key)
-			return -ENOMEM;
-	}
+	cur_key = sctp_auth_shkey_create(auth_key->sca_keynumber, GFP_KERNEL);
+	if (!cur_key)
+		return -ENOMEM;
 
 	/* Create a new key data based on the info passed in */
 	key = sctp_auth_create_key(auth_key->sca_keylength, GFP_KERNEL);
-	if (!key)
-		goto nomem;
+	if (!key) {
+		kfree(cur_key);
+		return -ENOMEM;
+	}
 
 	memcpy(key->data, &auth_key->sca_key[0], auth_key->sca_keylength);
+	cur_key->key = key;
 
-	/* If we are replacing, remove the old keys data from the
-	 * key id.  If we are adding new key id, add it to the
-	 * list.
-	 */
-	if (replace)
-		sctp_auth_key_put(cur_key->key);
-	else
-		list_add(&cur_key->key_list, sh_keys);
+	if (replace) {
+		list_del_init(&shkey->key_list);
+		sctp_auth_shkey_release(shkey);
+	}
+	list_add(&cur_key->key_list, sh_keys);
 
-	cur_key->key = key;
 	return 0;
-nomem:
-	if (!replace)
-		sctp_auth_shkey_free(cur_key);
-
-	return -ENOMEM;
 }
 
 int sctp_auth_set_active_key(struct sctp_endpoint *ep,
@@ -905,7 +908,7 @@ int sctp_auth_set_active_key(struct sctp_endpoint *ep,
 		}
 	}
 
-	if (!found)
+	if (!found || key->deactivated)
 		return -EINVAL;
 
 	if (asoc) {
@@ -952,7 +955,58 @@ int sctp_auth_del_key_id(struct sctp_endpoint *ep,
 
 	/* Delete the shared key */
 	list_del_init(&key->key_list);
-	sctp_auth_shkey_free(key);
+	sctp_auth_shkey_release(key);
+
+	return 0;
+}
+
+int sctp_auth_deact_key_id(struct sctp_endpoint *ep,
+			   struct sctp_association *asoc, __u16  key_id)
+{
+	struct sctp_shared_key *key;
+	struct list_head *sh_keys;
+	int found = 0;
+
+	/* The key identifier MUST NOT be the current active key
+	 * The key identifier MUST correst to an existing key
+	 */
+	if (asoc) {
+		if (asoc->active_key_id == key_id)
+			return -EINVAL;
+
+		sh_keys = &asoc->endpoint_shared_keys;
+	} else {
+		if (ep->active_key_id == key_id)
+			return -EINVAL;
+
+		sh_keys = &ep->endpoint_shared_keys;
+	}
+
+	key_for_each(key, sh_keys) {
+		if (key->key_id == key_id) {
+			found = 1;
+			break;
+		}
+	}
+
+	if (!found)
+		return -EINVAL;
+
+	/* refcnt == 1 and !list_empty mean it's not being used anywhere
+	 * and deactivated will be set, so it's time to notify userland
+	 * that this shkey can be freed.
+	 */
+	if (asoc && !list_empty(&key->key_list) &&
+	    refcount_read(&key->refcnt) == 1) {
+		struct sctp_ulpevent *ev;
+
+		ev = sctp_ulpevent_make_authkey(asoc, key->key_id,
+						SCTP_AUTH_FREE_KEY, GFP_KERNEL);
+		if (ev)
+			asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
+	}
+
+	key->deactivated = 1;
 
 	return 0;
 }
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 991a530c6b31..f889a84f264d 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -168,6 +168,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
 {
 	size_t len, first_len, max_data, remaining;
 	size_t msg_len = iov_iter_count(from);
+	struct sctp_shared_key *shkey = NULL;
 	struct list_head *pos, *temp;
 	struct sctp_chunk *chunk;
 	struct sctp_datamsg *msg;
@@ -204,6 +205,17 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
 		if (hmac_desc)
 			max_data -= SCTP_PAD4(sizeof(struct sctp_auth_chunk) +
 					      hmac_desc->hmac_len);
+
+		if (sinfo->sinfo_tsn &&
+		    sinfo->sinfo_ssn != asoc->active_key_id) {
+			shkey = sctp_auth_get_shkey(asoc, sinfo->sinfo_ssn);
+			if (!shkey) {
+				err = -EINVAL;
+				goto errout;
+			}
+		} else {
+			shkey = asoc->shkey;
+		}
 	}
 
 	/* Check what's our max considering the above */
@@ -275,6 +287,8 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
 		if (err < 0)
 			goto errout_chunk_free;
 
+		chunk->shkey = shkey;
+
 		/* Put the chunk->skb back into the form expected by send.  */
 		__skb_pull(chunk->skb, (__u8 *)chunk->chunk_hdr -
 				       chunk->skb->data);
diff --git a/net/sctp/sctp_diag.c b/net/sctp/diag.c
index a72a7d925d46..078f01a8d582 100644
--- a/net/sctp/sctp_diag.c
+++ b/net/sctp/diag.c
@@ -1,3 +1,34 @@
+/* SCTP kernel implementation
+ * (C) Copyright Red Hat Inc. 2017
+ *
+ * This file is part of the SCTP kernel implementation
+ *
+ * These functions implement sctp diag support.
+ *
+ * This SCTP implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This SCTP implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *                 ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email addresched(es):
+ *    lksctp developers <linux-sctp@vger.kernel.org>
+ *
+ * Written or modified by:
+ *    Xin Long <lucien.xin@gmail.com>
+ */
+
 #include <linux/module.h>
 #include <linux/inet_diag.h>
 #include <linux/sock_diag.h>
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index e35d4f73d2df..0d873c58e516 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -952,16 +952,16 @@ static int sctp_inet6_supported_addrs(const struct sctp_sock *opt,
 
 /* Handle SCTP_I_WANT_MAPPED_V4_ADDR for getpeername() and getsockname() */
 static int sctp_getname(struct socket *sock, struct sockaddr *uaddr,
-			int *uaddr_len, int peer)
+			int peer)
 {
 	int rc;
 
-	rc = inet6_getname(sock, uaddr, uaddr_len, peer);
+	rc = inet6_getname(sock, uaddr, peer);
 
-	if (rc != 0)
+	if (rc < 0)
 		return rc;
 
-	*uaddr_len = sctp_v6_addr_to_user(sctp_sk(sock->sk),
+	rc = sctp_v6_addr_to_user(sctp_sk(sock->sk),
 					  (union sctp_addr *)uaddr);
 
 	return rc;
diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c
index aeea6da81441..fd2684ad94c8 100644
--- a/net/sctp/objcnt.c
+++ b/net/sctp/objcnt.c
@@ -130,11 +130,3 @@ void sctp_dbg_objcnt_init(struct net *net)
 	if (!ent)
 		pr_warn("sctp_dbg_objcnt: Unable to create /proc entry.\n");
 }
-
-/* Cleanup the objcount entry in the proc filesystem.  */
-void sctp_dbg_objcnt_exit(struct net *net)
-{
-	remove_proc_entry("sctp_dbg_objcnt", net->sctp.proc_net_sctp);
-}
-
-
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 01a26ee051e3..d6e1c90cc09a 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -241,10 +241,13 @@ static enum sctp_xmit sctp_packet_bundle_auth(struct sctp_packet *pkt,
 	if (!chunk->auth)
 		return retval;
 
-	auth = sctp_make_auth(asoc);
+	auth = sctp_make_auth(asoc, chunk->shkey->key_id);
 	if (!auth)
 		return retval;
 
+	auth->shkey = chunk->shkey;
+	sctp_auth_shkey_hold(auth->shkey);
+
 	retval = __sctp_packet_append_chunk(pkt, auth);
 
 	if (retval != SCTP_XMIT_OK)
@@ -490,7 +493,8 @@ merge:
 		}
 
 		if (auth) {
-			sctp_auth_calculate_hmac(tp->asoc, nskb, auth, gfp);
+			sctp_auth_calculate_hmac(tp->asoc, nskb, auth,
+						 packet->auth->shkey, gfp);
 			/* free auth if no more chunks, or add it back */
 			if (list_empty(&packet->chunk_list))
 				sctp_chunk_free(packet->auth);
@@ -770,6 +774,16 @@ static enum sctp_xmit sctp_packet_will_fit(struct sctp_packet *packet,
 	enum sctp_xmit retval = SCTP_XMIT_OK;
 	size_t psize, pmtu, maxsize;
 
+	/* Don't bundle in this packet if this chunk's auth key doesn't
+	 * match other chunks already enqueued on this packet. Also,
+	 * don't bundle the chunk with auth key if other chunks in this
+	 * packet don't have auth key.
+	 */
+	if ((packet->auth && chunk->shkey != packet->auth->shkey) ||
+	    (!packet->auth && chunk->shkey &&
+	     chunk->chunk_hdr->type != SCTP_CID_AUTH))
+		return SCTP_XMIT_PMTU_FULL;
+
 	psize = packet->size;
 	if (packet->transport->asoc)
 		pmtu = packet->transport->asoc->pathmtu;
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 537545ebcb0e..17d0155d9de3 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -101,25 +101,6 @@ static const struct file_operations sctp_snmp_seq_fops = {
 	.release = single_release_net,
 };
 
-/* Set up the proc fs entry for 'snmp' object. */
-int __net_init sctp_snmp_proc_init(struct net *net)
-{
-	struct proc_dir_entry *p;
-
-	p = proc_create("snmp", S_IRUGO, net->sctp.proc_net_sctp,
-			&sctp_snmp_seq_fops);
-	if (!p)
-		return -ENOMEM;
-
-	return 0;
-}
-
-/* Cleanup the proc fs entry for 'snmp' object. */
-void sctp_snmp_proc_exit(struct net *net)
-{
-	remove_proc_entry("snmp", net->sctp.proc_net_sctp);
-}
-
 /* Dump local addresses of an association/endpoint. */
 static void sctp_seq_dump_local_addrs(struct seq_file *seq, struct sctp_ep_common *epb)
 {
@@ -259,25 +240,6 @@ static const struct file_operations sctp_eps_seq_fops = {
 	.release = seq_release_net,
 };
 
-/* Set up the proc fs entry for 'eps' object. */
-int __net_init sctp_eps_proc_init(struct net *net)
-{
-	struct proc_dir_entry *p;
-
-	p = proc_create("eps", S_IRUGO, net->sctp.proc_net_sctp,
-			&sctp_eps_seq_fops);
-	if (!p)
-		return -ENOMEM;
-
-	return 0;
-}
-
-/* Cleanup the proc fs entry for 'eps' object. */
-void sctp_eps_proc_exit(struct net *net)
-{
-	remove_proc_entry("eps", net->sctp.proc_net_sctp);
-}
-
 struct sctp_ht_iter {
 	struct seq_net_private p;
 	struct rhashtable_iter hti;
@@ -390,25 +352,6 @@ static const struct file_operations sctp_assocs_seq_fops = {
 	.release = seq_release_net,
 };
 
-/* Set up the proc fs entry for 'assocs' object. */
-int __net_init sctp_assocs_proc_init(struct net *net)
-{
-	struct proc_dir_entry *p;
-
-	p = proc_create("assocs", S_IRUGO, net->sctp.proc_net_sctp,
-			&sctp_assocs_seq_fops);
-	if (!p)
-		return -ENOMEM;
-
-	return 0;
-}
-
-/* Cleanup the proc fs entry for 'assocs' object. */
-void sctp_assocs_proc_exit(struct net *net)
-{
-	remove_proc_entry("assocs", net->sctp.proc_net_sctp);
-}
-
 static int sctp_remaddr_seq_show(struct seq_file *seq, void *v)
 {
 	struct sctp_association *assoc;
@@ -488,12 +431,6 @@ static const struct seq_operations sctp_remaddr_ops = {
 	.show  = sctp_remaddr_seq_show,
 };
 
-/* Cleanup the proc fs entry for 'remaddr' object. */
-void sctp_remaddr_proc_exit(struct net *net)
-{
-	remove_proc_entry("remaddr", net->sctp.proc_net_sctp);
-}
-
 static int sctp_remaddr_seq_open(struct inode *inode, struct file *file)
 {
 	return seq_open_net(inode, file, &sctp_remaddr_ops,
@@ -507,13 +444,28 @@ static const struct file_operations sctp_remaddr_seq_fops = {
 	.release = seq_release_net,
 };
 
-int __net_init sctp_remaddr_proc_init(struct net *net)
+/* Set up the proc fs entry for the SCTP protocol. */
+int __net_init sctp_proc_init(struct net *net)
 {
-	struct proc_dir_entry *p;
-
-	p = proc_create("remaddr", S_IRUGO, net->sctp.proc_net_sctp,
-			&sctp_remaddr_seq_fops);
-	if (!p)
+	net->sctp.proc_net_sctp = proc_net_mkdir(net, "sctp", net->proc_net);
+	if (!net->sctp.proc_net_sctp)
 		return -ENOMEM;
+	if (!proc_create("snmp", S_IRUGO, net->sctp.proc_net_sctp,
+			&sctp_snmp_seq_fops))
+		goto cleanup;
+	if (!proc_create("eps", S_IRUGO, net->sctp.proc_net_sctp,
+			&sctp_eps_seq_fops))
+		goto cleanup;
+	if (!proc_create("assocs", S_IRUGO, net->sctp.proc_net_sctp,
+			&sctp_assocs_seq_fops))
+		goto cleanup;
+	if (!proc_create("remaddr", S_IRUGO, net->sctp.proc_net_sctp,
+			&sctp_remaddr_seq_fops))
+		goto cleanup;
 	return 0;
+
+cleanup:
+	remove_proc_subtree("sctp", net->proc_net);
+	net->sctp.proc_net_sctp = NULL;
+	return -ENOMEM;
 }
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 91813e686c67..493b817f6a2a 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -80,56 +80,6 @@ long sysctl_sctp_mem[3];
 int sysctl_sctp_rmem[3];
 int sysctl_sctp_wmem[3];
 
-/* Set up the proc fs entry for the SCTP protocol. */
-static int __net_init sctp_proc_init(struct net *net)
-{
-#ifdef CONFIG_PROC_FS
-	net->sctp.proc_net_sctp = proc_net_mkdir(net, "sctp", net->proc_net);
-	if (!net->sctp.proc_net_sctp)
-		goto out_proc_net_sctp;
-	if (sctp_snmp_proc_init(net))
-		goto out_snmp_proc_init;
-	if (sctp_eps_proc_init(net))
-		goto out_eps_proc_init;
-	if (sctp_assocs_proc_init(net))
-		goto out_assocs_proc_init;
-	if (sctp_remaddr_proc_init(net))
-		goto out_remaddr_proc_init;
-
-	return 0;
-
-out_remaddr_proc_init:
-	sctp_assocs_proc_exit(net);
-out_assocs_proc_init:
-	sctp_eps_proc_exit(net);
-out_eps_proc_init:
-	sctp_snmp_proc_exit(net);
-out_snmp_proc_init:
-	remove_proc_entry("sctp", net->proc_net);
-	net->sctp.proc_net_sctp = NULL;
-out_proc_net_sctp:
-	return -ENOMEM;
-#endif /* CONFIG_PROC_FS */
-	return 0;
-}
-
-/* Clean up the proc fs entry for the SCTP protocol.
- * Note: Do not make this __exit as it is used in the init error
- * path.
- */
-static void sctp_proc_exit(struct net *net)
-{
-#ifdef CONFIG_PROC_FS
-	sctp_snmp_proc_exit(net);
-	sctp_eps_proc_exit(net);
-	sctp_assocs_proc_exit(net);
-	sctp_remaddr_proc_exit(net);
-
-	remove_proc_entry("sctp", net->proc_net);
-	net->sctp.proc_net_sctp = NULL;
-#endif
-}
-
 /* Private helper to extract ipv4 address and stash them in
  * the protocol structure.
  */
@@ -1285,10 +1235,12 @@ static int __net_init sctp_defaults_init(struct net *net)
 	if (status)
 		goto err_init_mibs;
 
+#ifdef CONFIG_PROC_FS
 	/* Initialize proc fs directory.  */
 	status = sctp_proc_init(net);
 	if (status)
 		goto err_init_proc;
+#endif
 
 	sctp_dbg_objcnt_init(net);
 
@@ -1320,9 +1272,10 @@ static void __net_exit sctp_defaults_exit(struct net *net)
 	sctp_free_addr_wq(net);
 	sctp_free_local_addr_list(net);
 
-	sctp_dbg_objcnt_exit(net);
-
-	sctp_proc_exit(net);
+#ifdef CONFIG_PROC_FS
+	remove_proc_subtree("sctp", net->proc_net);
+	net->sctp.proc_net_sctp = NULL;
+#endif
 	cleanup_sctp_mibs(net);
 	sctp_sysctl_net_unregister(net);
 }
@@ -1330,6 +1283,7 @@ static void __net_exit sctp_defaults_exit(struct net *net)
 static struct pernet_operations sctp_defaults_ops = {
 	.init = sctp_defaults_init,
 	.exit = sctp_defaults_exit,
+	.async = true,
 };
 
 static int __net_init sctp_ctrlsock_init(struct net *net)
@@ -1353,6 +1307,7 @@ static void __net_init sctp_ctrlsock_exit(struct net *net)
 static struct pernet_operations sctp_ctrlsock_ops = {
 	.init = sctp_ctrlsock_init,
 	.exit = sctp_ctrlsock_exit,
+	.async = true,
 };
 
 /* Initialize the universe into something sensible.  */
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index d01475f5f710..cc20bc39ee7c 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -87,7 +87,28 @@ static void  *sctp_addto_chunk_fixed(struct sctp_chunk *, int len,
 /* Control chunk destructor */
 static void sctp_control_release_owner(struct sk_buff *skb)
 {
-	/*TODO: do memory release */
+	struct sctp_chunk *chunk = skb_shinfo(skb)->destructor_arg;
+
+	if (chunk->shkey) {
+		struct sctp_shared_key *shkey = chunk->shkey;
+		struct sctp_association *asoc = chunk->asoc;
+
+		/* refcnt == 2 and !list_empty mean after this release, it's
+		 * not being used anywhere, and it's time to notify userland
+		 * that this shkey can be freed if it's been deactivated.
+		 */
+		if (shkey->deactivated && !list_empty(&shkey->key_list) &&
+		    refcount_read(&shkey->refcnt) == 2) {
+			struct sctp_ulpevent *ev;
+
+			ev = sctp_ulpevent_make_authkey(asoc, shkey->key_id,
+							SCTP_AUTH_FREE_KEY,
+							GFP_KERNEL);
+			if (ev)
+				asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
+		}
+		sctp_auth_shkey_release(chunk->shkey);
+	}
 }
 
 static void sctp_control_set_owner_w(struct sctp_chunk *chunk)
@@ -102,7 +123,12 @@ static void sctp_control_set_owner_w(struct sctp_chunk *chunk)
 	 *
 	 *  For now don't do anything for now.
 	 */
+	if (chunk->auth) {
+		chunk->shkey = asoc->shkey;
+		sctp_auth_shkey_hold(chunk->shkey);
+	}
 	skb->sk = asoc ? asoc->base.sk : NULL;
+	skb_shinfo(skb)->destructor_arg = chunk;
 	skb->destructor = sctp_control_release_owner;
 }
 
@@ -1271,7 +1297,8 @@ nodata:
 	return retval;
 }
 
-struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc)
+struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc,
+				  __u16 key_id)
 {
 	struct sctp_authhdr auth_hdr;
 	struct sctp_hmac *hmac_desc;
@@ -1289,7 +1316,7 @@ struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc)
 		return NULL;
 
 	auth_hdr.hmac_id = htons(hmac_desc->hmac_id);
-	auth_hdr.shkey_id = htons(asoc->active_key_id);
+	auth_hdr.shkey_id = htons(key_id);
 
 	retval->subh.auth_hdr = sctp_addto_chunk(retval, sizeof(auth_hdr),
 						 &auth_hdr);
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index b71e7fb0a20a..298112ca8c06 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -1049,6 +1049,16 @@ static void sctp_cmd_assoc_change(struct sctp_cmd_seq *commands,
 		asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
 }
 
+static void sctp_cmd_peer_no_auth(struct sctp_cmd_seq *commands,
+				  struct sctp_association *asoc)
+{
+	struct sctp_ulpevent *ev;
+
+	ev = sctp_ulpevent_make_authkey(asoc, 0, SCTP_AUTH_NO_AUTH, GFP_ATOMIC);
+	if (ev)
+		asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
+}
+
 /* Helper function to generate an adaptation indication event */
 static void sctp_cmd_adaptation_ind(struct sctp_cmd_seq *commands,
 				    struct sctp_association *asoc)
@@ -1755,6 +1765,9 @@ static int sctp_cmd_interpreter(enum sctp_event event_type,
 		case SCTP_CMD_ADAPTATION_IND:
 			sctp_cmd_adaptation_ind(commands, asoc);
 			break;
+		case SCTP_CMD_PEER_NO_AUTH:
+			sctp_cmd_peer_no_auth(commands, asoc);
+			break;
 
 		case SCTP_CMD_ASSOC_SHKEY:
 			error = sctp_auth_asoc_init_active_key(asoc,
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index eb7905ffe5f2..cc56a67dbb4d 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -659,7 +659,7 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net,
 					 void *arg,
 					 struct sctp_cmd_seq *commands)
 {
-	struct sctp_ulpevent *ev, *ai_ev = NULL;
+	struct sctp_ulpevent *ev, *ai_ev = NULL, *auth_ev = NULL;
 	struct sctp_association *new_asoc;
 	struct sctp_init_chunk *peer_init;
 	struct sctp_chunk *chunk = arg;
@@ -820,6 +820,14 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net,
 			goto nomem_aiev;
 	}
 
+	if (!new_asoc->peer.auth_capable) {
+		auth_ev = sctp_ulpevent_make_authkey(new_asoc, 0,
+						     SCTP_AUTH_NO_AUTH,
+						     GFP_ATOMIC);
+		if (!auth_ev)
+			goto nomem_authev;
+	}
+
 	/* Add all the state machine commands now since we've created
 	 * everything.  This way we don't introduce memory corruptions
 	 * during side-effect processing and correclty count established
@@ -847,8 +855,14 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net,
 		sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
 				SCTP_ULPEVENT(ai_ev));
 
+	if (auth_ev)
+		sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
+				SCTP_ULPEVENT(auth_ev));
+
 	return SCTP_DISPOSITION_CONSUME;
 
+nomem_authev:
+	sctp_ulpevent_free(ai_ev);
 nomem_aiev:
 	sctp_ulpevent_free(ev);
 nomem_ev:
@@ -953,6 +967,15 @@ enum sctp_disposition sctp_sf_do_5_1E_ca(struct net *net,
 				SCTP_ULPEVENT(ev));
 	}
 
+	if (!asoc->peer.auth_capable) {
+		ev = sctp_ulpevent_make_authkey(asoc, 0, SCTP_AUTH_NO_AUTH,
+						GFP_ATOMIC);
+		if (!ev)
+			goto nomem;
+		sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
+				SCTP_ULPEVENT(ev));
+	}
+
 	return SCTP_DISPOSITION_CONSUME;
 nomem:
 	return SCTP_DISPOSITION_NOMEM;
@@ -1908,6 +1931,9 @@ static enum sctp_disposition sctp_sf_do_dupcook_b(
 	if (asoc->peer.adaptation_ind)
 		sctp_add_cmd_sf(commands, SCTP_CMD_ADAPTATION_IND, SCTP_NULL());
 
+	if (!asoc->peer.auth_capable)
+		sctp_add_cmd_sf(commands, SCTP_CMD_PEER_NO_AUTH, SCTP_NULL());
+
 	return SCTP_DISPOSITION_CONSUME;
 
 nomem:
@@ -1954,7 +1980,7 @@ static enum sctp_disposition sctp_sf_do_dupcook_d(
 					struct sctp_cmd_seq *commands,
 					struct sctp_association *new_asoc)
 {
-	struct sctp_ulpevent *ev = NULL, *ai_ev = NULL;
+	struct sctp_ulpevent *ev = NULL, *ai_ev = NULL, *auth_ev = NULL;
 	struct sctp_chunk *repl;
 
 	/* Clarification from Implementor's Guide:
@@ -2001,6 +2027,14 @@ static enum sctp_disposition sctp_sf_do_dupcook_d(
 				goto nomem;
 
 		}
+
+		if (!asoc->peer.auth_capable) {
+			auth_ev = sctp_ulpevent_make_authkey(asoc, 0,
+							     SCTP_AUTH_NO_AUTH,
+							     GFP_ATOMIC);
+			if (!auth_ev)
+				goto nomem;
+		}
 	}
 
 	repl = sctp_make_cookie_ack(new_asoc, chunk);
@@ -2015,10 +2049,15 @@ static enum sctp_disposition sctp_sf_do_dupcook_d(
 	if (ai_ev)
 		sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
 					SCTP_ULPEVENT(ai_ev));
+	if (auth_ev)
+		sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
+				SCTP_ULPEVENT(auth_ev));
 
 	return SCTP_DISPOSITION_CONSUME;
 
 nomem:
+	if (auth_ev)
+		sctp_ulpevent_free(auth_ev);
 	if (ai_ev)
 		sctp_ulpevent_free(ai_ev);
 	if (ev)
@@ -4114,6 +4153,7 @@ static enum sctp_ierror sctp_sf_authenticate(
 					const union sctp_subtype type,
 					struct sctp_chunk *chunk)
 {
+	struct sctp_shared_key *sh_key = NULL;
 	struct sctp_authhdr *auth_hdr;
 	__u8 *save_digest, *digest;
 	struct sctp_hmac *hmac;
@@ -4135,9 +4175,11 @@ static enum sctp_ierror sctp_sf_authenticate(
 	 * configured
 	 */
 	key_id = ntohs(auth_hdr->shkey_id);
-	if (key_id != asoc->active_key_id && !sctp_auth_get_shkey(asoc, key_id))
-		return SCTP_IERROR_AUTH_BAD_KEYID;
-
+	if (key_id != asoc->active_key_id) {
+		sh_key = sctp_auth_get_shkey(asoc, key_id);
+		if (!sh_key)
+			return SCTP_IERROR_AUTH_BAD_KEYID;
+	}
 
 	/* Make sure that the length of the signature matches what
 	 * we expect.
@@ -4166,7 +4208,7 @@ static enum sctp_ierror sctp_sf_authenticate(
 
 	sctp_auth_calculate_hmac(asoc, chunk->skb,
 				 (struct sctp_auth_chunk *)chunk->chunk_hdr,
-				 GFP_ATOMIC);
+				 sh_key, GFP_ATOMIC);
 
 	/* Discard the packet if the digests do not match */
 	if (memcmp(save_digest, digest, sig_len)) {
@@ -4243,7 +4285,7 @@ enum sctp_disposition sctp_sf_eat_auth(struct net *net,
 		struct sctp_ulpevent *ev;
 
 		ev = sctp_ulpevent_make_authkey(asoc, ntohs(auth_hdr->shkey_id),
-				    SCTP_AUTH_NEWKEY, GFP_ATOMIC);
+				    SCTP_AUTH_NEW_KEY, GFP_ATOMIC);
 
 		if (!ev)
 			return -ENOMEM;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index bf271f8c2dc9..7a10ae3c3d82 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -156,6 +156,9 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk)
 	/* The sndbuf space is tracked per association.  */
 	sctp_association_hold(asoc);
 
+	if (chunk->shkey)
+		sctp_auth_shkey_hold(chunk->shkey);
+
 	skb_set_owner_w(chunk->skb, sk);
 
 	chunk->skb->destructor = sctp_wfree;
@@ -1606,396 +1609,303 @@ static int sctp_error(struct sock *sk, int flags, int err)
 static int sctp_msghdr_parse(const struct msghdr *msg,
 			     struct sctp_cmsgs *cmsgs);
 
-static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
+static int sctp_sendmsg_parse(struct sock *sk, struct sctp_cmsgs *cmsgs,
+			      struct sctp_sndrcvinfo *srinfo,
+			      const struct msghdr *msg, size_t msg_len)
 {
-	struct net *net = sock_net(sk);
-	struct sctp_sock *sp;
-	struct sctp_endpoint *ep;
-	struct sctp_association *new_asoc = NULL, *asoc = NULL;
-	struct sctp_transport *transport, *chunk_tp;
-	struct sctp_chunk *chunk;
-	union sctp_addr to;
-	struct sockaddr *msg_name = NULL;
-	struct sctp_sndrcvinfo default_sinfo;
-	struct sctp_sndrcvinfo *sinfo;
-	struct sctp_initmsg *sinit;
-	sctp_assoc_t associd = 0;
-	struct sctp_cmsgs cmsgs = { NULL };
-	enum sctp_scope scope;
-	bool fill_sinfo_ttl = false, wait_connect = false;
-	struct sctp_datamsg *datamsg;
-	int msg_flags = msg->msg_flags;
-	__u16 sinfo_flags = 0;
-	long timeo;
+	__u16 sflags;
 	int err;
 
-	err = 0;
-	sp = sctp_sk(sk);
-	ep = sp->ep;
-
-	pr_debug("%s: sk:%p, msg:%p, msg_len:%zu ep:%p\n", __func__, sk,
-		 msg, msg_len, ep);
+	if (sctp_sstate(sk, LISTENING) && sctp_style(sk, TCP))
+		return -EPIPE;
 
-	/* We cannot send a message over a TCP-style listening socket. */
-	if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING)) {
-		err = -EPIPE;
-		goto out_nounlock;
-	}
+	if (msg_len > sk->sk_sndbuf)
+		return -EMSGSIZE;
 
-	/* Parse out the SCTP CMSGs.  */
-	err = sctp_msghdr_parse(msg, &cmsgs);
+	memset(cmsgs, 0, sizeof(*cmsgs));
+	err = sctp_msghdr_parse(msg, cmsgs);
 	if (err) {
 		pr_debug("%s: msghdr parse err:%x\n", __func__, err);
-		goto out_nounlock;
+		return err;
 	}
 
-	/* Fetch the destination address for this packet.  This
-	 * address only selects the association--it is not necessarily
-	 * the address we will send to.
-	 * For a peeled-off socket, msg_name is ignored.
-	 */
-	if (!sctp_style(sk, UDP_HIGH_BANDWIDTH) && msg->msg_name) {
-		int msg_namelen = msg->msg_namelen;
+	memset(srinfo, 0, sizeof(*srinfo));
+	if (cmsgs->srinfo) {
+		srinfo->sinfo_stream = cmsgs->srinfo->sinfo_stream;
+		srinfo->sinfo_flags = cmsgs->srinfo->sinfo_flags;
+		srinfo->sinfo_ppid = cmsgs->srinfo->sinfo_ppid;
+		srinfo->sinfo_context = cmsgs->srinfo->sinfo_context;
+		srinfo->sinfo_assoc_id = cmsgs->srinfo->sinfo_assoc_id;
+		srinfo->sinfo_timetolive = cmsgs->srinfo->sinfo_timetolive;
+	}
 
-		err = sctp_verify_addr(sk, (union sctp_addr *)msg->msg_name,
-				       msg_namelen);
-		if (err)
-			return err;
+	if (cmsgs->sinfo) {
+		srinfo->sinfo_stream = cmsgs->sinfo->snd_sid;
+		srinfo->sinfo_flags = cmsgs->sinfo->snd_flags;
+		srinfo->sinfo_ppid = cmsgs->sinfo->snd_ppid;
+		srinfo->sinfo_context = cmsgs->sinfo->snd_context;
+		srinfo->sinfo_assoc_id = cmsgs->sinfo->snd_assoc_id;
+	}
 
-		if (msg_namelen > sizeof(to))
-			msg_namelen = sizeof(to);
-		memcpy(&to, msg->msg_name, msg_namelen);
-		msg_name = msg->msg_name;
+	if (cmsgs->prinfo) {
+		srinfo->sinfo_timetolive = cmsgs->prinfo->pr_value;
+		SCTP_PR_SET_POLICY(srinfo->sinfo_flags,
+				   cmsgs->prinfo->pr_policy);
 	}
 
-	sinit = cmsgs.init;
-	if (cmsgs.sinfo != NULL) {
-		memset(&default_sinfo, 0, sizeof(default_sinfo));
-		default_sinfo.sinfo_stream = cmsgs.sinfo->snd_sid;
-		default_sinfo.sinfo_flags = cmsgs.sinfo->snd_flags;
-		default_sinfo.sinfo_ppid = cmsgs.sinfo->snd_ppid;
-		default_sinfo.sinfo_context = cmsgs.sinfo->snd_context;
-		default_sinfo.sinfo_assoc_id = cmsgs.sinfo->snd_assoc_id;
+	sflags = srinfo->sinfo_flags;
+	if (!sflags && msg_len)
+		return 0;
 
-		sinfo = &default_sinfo;
-		fill_sinfo_ttl = true;
-	} else {
-		sinfo = cmsgs.srinfo;
-	}
-	/* Did the user specify SNDINFO/SNDRCVINFO? */
-	if (sinfo) {
-		sinfo_flags = sinfo->sinfo_flags;
-		associd = sinfo->sinfo_assoc_id;
-	}
+	if (sctp_style(sk, TCP) && (sflags & (SCTP_EOF | SCTP_ABORT)))
+		return -EINVAL;
 
-	pr_debug("%s: msg_len:%zu, sinfo_flags:0x%x\n", __func__,
-		 msg_len, sinfo_flags);
+	if (((sflags & SCTP_EOF) && msg_len > 0) ||
+	    (!(sflags & (SCTP_EOF | SCTP_ABORT)) && msg_len == 0))
+		return -EINVAL;
 
-	/* SCTP_EOF or SCTP_ABORT cannot be set on a TCP-style socket. */
-	if (sctp_style(sk, TCP) && (sinfo_flags & (SCTP_EOF | SCTP_ABORT))) {
-		err = -EINVAL;
-		goto out_nounlock;
-	}
+	if ((sflags & SCTP_ADDR_OVER) && !msg->msg_name)
+		return -EINVAL;
 
-	/* If SCTP_EOF is set, no data can be sent. Disallow sending zero
-	 * length messages when SCTP_EOF|SCTP_ABORT is not set.
-	 * If SCTP_ABORT is set, the message length could be non zero with
-	 * the msg_iov set to the user abort reason.
-	 */
-	if (((sinfo_flags & SCTP_EOF) && (msg_len > 0)) ||
-	    (!(sinfo_flags & (SCTP_EOF|SCTP_ABORT)) && (msg_len == 0))) {
-		err = -EINVAL;
-		goto out_nounlock;
-	}
+	return 0;
+}
 
-	/* If SCTP_ADDR_OVER is set, there must be an address
-	 * specified in msg_name.
-	 */
-	if ((sinfo_flags & SCTP_ADDR_OVER) && (!msg->msg_name)) {
-		err = -EINVAL;
-		goto out_nounlock;
-	}
+static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags,
+				 struct sctp_cmsgs *cmsgs,
+				 union sctp_addr *daddr,
+				 struct sctp_transport **tp)
+{
+	struct sctp_endpoint *ep = sctp_sk(sk)->ep;
+	struct net *net = sock_net(sk);
+	struct sctp_association *asoc;
+	enum sctp_scope scope;
+	struct cmsghdr *cmsg;
+	int err;
 
-	transport = NULL;
+	*tp = NULL;
 
-	pr_debug("%s: about to look up association\n", __func__);
+	if (sflags & (SCTP_EOF | SCTP_ABORT))
+		return -EINVAL;
 
-	lock_sock(sk);
+	if (sctp_style(sk, TCP) && (sctp_sstate(sk, ESTABLISHED) ||
+				    sctp_sstate(sk, CLOSING)))
+		return -EADDRNOTAVAIL;
 
-	/* If a msg_name has been specified, assume this is to be used.  */
-	if (msg_name) {
-		/* Look for a matching association on the endpoint. */
-		asoc = sctp_endpoint_lookup_assoc(ep, &to, &transport);
+	if (sctp_endpoint_is_peeled_off(ep, daddr))
+		return -EADDRNOTAVAIL;
 
-		/* If we could not find a matching association on the
-		 * endpoint, make sure that it is not a TCP-style
-		 * socket that already has an association or there is
-		 * no peeled-off association on another socket.
-		 */
-		if (!asoc &&
-		    ((sctp_style(sk, TCP) &&
-		      (sctp_sstate(sk, ESTABLISHED) ||
-		       sctp_sstate(sk, CLOSING))) ||
-		     sctp_endpoint_is_peeled_off(ep, &to))) {
-			err = -EADDRNOTAVAIL;
-			goto out_unlock;
-		}
+	if (!ep->base.bind_addr.port) {
+		if (sctp_autobind(sk))
+			return -EAGAIN;
 	} else {
-		asoc = sctp_id2assoc(sk, associd);
-		if (!asoc) {
-			err = -EPIPE;
-			goto out_unlock;
-		}
+		if (ep->base.bind_addr.port < inet_prot_sock(net) &&
+		    !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
+			return -EACCES;
 	}
 
-	if (asoc) {
-		pr_debug("%s: just looked up association:%p\n", __func__, asoc);
+	scope = sctp_scope(daddr);
 
-		/* We cannot send a message on a TCP-style SCTP_SS_ESTABLISHED
-		 * socket that has an association in CLOSED state. This can
-		 * happen when an accepted socket has an association that is
-		 * already CLOSED.
-		 */
-		if (sctp_state(asoc, CLOSED) && sctp_style(sk, TCP)) {
-			err = -EPIPE;
-			goto out_unlock;
-		}
+	asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL);
+	if (!asoc)
+		return -ENOMEM;
+
+	if (sctp_assoc_set_bind_addr_from_ep(asoc, scope, GFP_KERNEL) < 0) {
+		err = -ENOMEM;
+		goto free;
+	}
 
-		if (sinfo_flags & SCTP_EOF) {
-			pr_debug("%s: shutting down association:%p\n",
-				 __func__, asoc);
+	if (cmsgs->init) {
+		struct sctp_initmsg *init = cmsgs->init;
 
-			sctp_primitive_SHUTDOWN(net, asoc, NULL);
-			err = 0;
-			goto out_unlock;
+		if (init->sinit_num_ostreams) {
+			__u16 outcnt = init->sinit_num_ostreams;
+
+			asoc->c.sinit_num_ostreams = outcnt;
+			/* outcnt has been changed, need to re-init stream */
+			err = sctp_stream_init(&asoc->stream, outcnt, 0,
+					       GFP_KERNEL);
+			if (err)
+				goto free;
 		}
-		if (sinfo_flags & SCTP_ABORT) {
 
-			chunk = sctp_make_abort_user(asoc, msg, msg_len);
-			if (!chunk) {
-				err = -ENOMEM;
-				goto out_unlock;
-			}
+		if (init->sinit_max_instreams)
+			asoc->c.sinit_max_instreams = init->sinit_max_instreams;
 
-			pr_debug("%s: aborting association:%p\n",
-				 __func__, asoc);
+		if (init->sinit_max_attempts)
+			asoc->max_init_attempts = init->sinit_max_attempts;
 
-			sctp_primitive_ABORT(net, asoc, chunk);
-			err = 0;
-			goto out_unlock;
-		}
+		if (init->sinit_max_init_timeo)
+			asoc->max_init_timeo =
+				msecs_to_jiffies(init->sinit_max_init_timeo);
 	}
 
-	/* Do we need to create the association?  */
-	if (!asoc) {
-		pr_debug("%s: there is no association yet\n", __func__);
+	*tp = sctp_assoc_add_peer(asoc, daddr, GFP_KERNEL, SCTP_UNKNOWN);
+	if (!*tp) {
+		err = -ENOMEM;
+		goto free;
+	}
 
-		if (sinfo_flags & (SCTP_EOF | SCTP_ABORT)) {
-			err = -EINVAL;
-			goto out_unlock;
-		}
+	if (!cmsgs->addrs_msg)
+		return 0;
 
-		/* Check for invalid stream against the stream counts,
-		 * either the default or the user specified stream counts.
-		 */
-		if (sinfo) {
-			if (!sinit || !sinit->sinit_num_ostreams) {
-				/* Check against the defaults. */
-				if (sinfo->sinfo_stream >=
-				    sp->initmsg.sinit_num_ostreams) {
-					err = -EINVAL;
-					goto out_unlock;
-				}
-			} else {
-				/* Check against the requested.  */
-				if (sinfo->sinfo_stream >=
-				    sinit->sinit_num_ostreams) {
-					err = -EINVAL;
-					goto out_unlock;
-				}
-			}
-		}
+	/* sendv addr list parse */
+	for_each_cmsghdr(cmsg, cmsgs->addrs_msg) {
+		struct sctp_transport *transport;
+		struct sctp_association *old;
+		union sctp_addr _daddr;
+		int dlen;
 
-		/*
-		 * API 3.1.2 bind() - UDP Style Syntax
-		 * If a bind() or sctp_bindx() is not called prior to a
-		 * sendmsg() call that initiates a new association, the
-		 * system picks an ephemeral port and will choose an address
-		 * set equivalent to binding with a wildcard address.
-		 */
-		if (!ep->base.bind_addr.port) {
-			if (sctp_autobind(sk)) {
-				err = -EAGAIN;
-				goto out_unlock;
+		if (cmsg->cmsg_level != IPPROTO_SCTP ||
+		    (cmsg->cmsg_type != SCTP_DSTADDRV4 &&
+		     cmsg->cmsg_type != SCTP_DSTADDRV6))
+			continue;
+
+		daddr = &_daddr;
+		memset(daddr, 0, sizeof(*daddr));
+		dlen = cmsg->cmsg_len - sizeof(struct cmsghdr);
+		if (cmsg->cmsg_type == SCTP_DSTADDRV4) {
+			if (dlen < sizeof(struct in_addr)) {
+				err = -EINVAL;
+				goto free;
 			}
+
+			dlen = sizeof(struct in_addr);
+			daddr->v4.sin_family = AF_INET;
+			daddr->v4.sin_port = htons(asoc->peer.port);
+			memcpy(&daddr->v4.sin_addr, CMSG_DATA(cmsg), dlen);
 		} else {
-			/*
-			 * If an unprivileged user inherits a one-to-many
-			 * style socket with open associations on a privileged
-			 * port, it MAY be permitted to accept new associations,
-			 * but it SHOULD NOT be permitted to open new
-			 * associations.
-			 */
-			if (ep->base.bind_addr.port < inet_prot_sock(net) &&
-			    !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) {
-				err = -EACCES;
-				goto out_unlock;
+			if (dlen < sizeof(struct in6_addr)) {
+				err = -EINVAL;
+				goto free;
 			}
-		}
 
-		scope = sctp_scope(&to);
-		new_asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL);
-		if (!new_asoc) {
-			err = -ENOMEM;
-			goto out_unlock;
+			dlen = sizeof(struct in6_addr);
+			daddr->v6.sin6_family = AF_INET6;
+			daddr->v6.sin6_port = htons(asoc->peer.port);
+			memcpy(&daddr->v6.sin6_addr, CMSG_DATA(cmsg), dlen);
 		}
-		asoc = new_asoc;
-		err = sctp_assoc_set_bind_addr_from_ep(asoc, scope, GFP_KERNEL);
-		if (err < 0) {
-			err = -ENOMEM;
-			goto out_free;
+		err = sctp_verify_addr(sk, daddr, sizeof(*daddr));
+		if (err)
+			goto free;
+
+		old = sctp_endpoint_lookup_assoc(ep, daddr, &transport);
+		if (old && old != asoc) {
+			if (old->state >= SCTP_STATE_ESTABLISHED)
+				err = -EISCONN;
+			else
+				err = -EALREADY;
+			goto free;
 		}
 
-		/* If the SCTP_INIT ancillary data is specified, set all
-		 * the association init values accordingly.
-		 */
-		if (sinit) {
-			if (sinit->sinit_num_ostreams) {
-				__u16 outcnt = sinit->sinit_num_ostreams;
-
-				asoc->c.sinit_num_ostreams = outcnt;
-				/* outcnt has been changed, so re-init stream */
-				err = sctp_stream_init(&asoc->stream, outcnt, 0,
-						       GFP_KERNEL);
-				if (err)
-					goto out_free;
-			}
-			if (sinit->sinit_max_instreams) {
-				asoc->c.sinit_max_instreams =
-					sinit->sinit_max_instreams;
-			}
-			if (sinit->sinit_max_attempts) {
-				asoc->max_init_attempts
-					= sinit->sinit_max_attempts;
-			}
-			if (sinit->sinit_max_init_timeo) {
-				asoc->max_init_timeo =
-				 msecs_to_jiffies(sinit->sinit_max_init_timeo);
-			}
+		if (sctp_endpoint_is_peeled_off(ep, daddr)) {
+			err = -EADDRNOTAVAIL;
+			goto free;
 		}
 
-		/* Prime the peer's transport structures.  */
-		transport = sctp_assoc_add_peer(asoc, &to, GFP_KERNEL, SCTP_UNKNOWN);
+		transport = sctp_assoc_add_peer(asoc, daddr, GFP_KERNEL,
+						SCTP_UNKNOWN);
 		if (!transport) {
 			err = -ENOMEM;
-			goto out_free;
+			goto free;
 		}
 	}
 
-	/* ASSERT: we have a valid association at this point.  */
-	pr_debug("%s: we have a valid association\n", __func__);
+	return 0;
 
-	if (!sinfo) {
-		/* If the user didn't specify SNDINFO/SNDRCVINFO, make up
-		 * one with some defaults.
-		 */
-		memset(&default_sinfo, 0, sizeof(default_sinfo));
-		default_sinfo.sinfo_stream = asoc->default_stream;
-		default_sinfo.sinfo_flags = asoc->default_flags;
-		default_sinfo.sinfo_ppid = asoc->default_ppid;
-		default_sinfo.sinfo_context = asoc->default_context;
-		default_sinfo.sinfo_timetolive = asoc->default_timetolive;
-		default_sinfo.sinfo_assoc_id = sctp_assoc2id(asoc);
-
-		sinfo = &default_sinfo;
-	} else if (fill_sinfo_ttl) {
-		/* In case SNDINFO was specified, we still need to fill
-		 * it with a default ttl from the assoc here.
-		 */
-		sinfo->sinfo_timetolive = asoc->default_timetolive;
-	}
+free:
+	sctp_association_free(asoc);
+	return err;
+}
 
-	/* API 7.1.7, the sndbuf size per association bounds the
-	 * maximum size of data that can be sent in a single send call.
-	 */
-	if (msg_len > sk->sk_sndbuf) {
-		err = -EMSGSIZE;
-		goto out_free;
+static int sctp_sendmsg_check_sflags(struct sctp_association *asoc,
+				     __u16 sflags, struct msghdr *msg,
+				     size_t msg_len)
+{
+	struct sock *sk = asoc->base.sk;
+	struct net *net = sock_net(sk);
+
+	if (sctp_state(asoc, CLOSED) && sctp_style(sk, TCP))
+		return -EPIPE;
+
+	if ((sflags & SCTP_SENDALL) && sctp_style(sk, UDP) &&
+	    !sctp_state(asoc, ESTABLISHED))
+		return 0;
+
+	if (sflags & SCTP_EOF) {
+		pr_debug("%s: shutting down association:%p\n", __func__, asoc);
+		sctp_primitive_SHUTDOWN(net, asoc, NULL);
+
+		return 0;
 	}
 
-	if (asoc->pmtu_pending)
-		sctp_assoc_pending_pmtu(asoc);
+	if (sflags & SCTP_ABORT) {
+		struct sctp_chunk *chunk;
 
-	/* If fragmentation is disabled and the message length exceeds the
-	 * association fragmentation point, return EMSGSIZE.  The I-D
-	 * does not specify what this error is, but this looks like
-	 * a great fit.
-	 */
-	if (sctp_sk(sk)->disable_fragments && (msg_len > asoc->frag_point)) {
-		err = -EMSGSIZE;
-		goto out_free;
+		chunk = sctp_make_abort_user(asoc, msg, msg_len);
+		if (!chunk)
+			return -ENOMEM;
+
+		pr_debug("%s: aborting association:%p\n", __func__, asoc);
+		sctp_primitive_ABORT(net, asoc, chunk);
+
+		return 0;
 	}
 
-	/* Check for invalid stream. */
+	return 1;
+}
+
+static int sctp_sendmsg_to_asoc(struct sctp_association *asoc,
+				struct msghdr *msg, size_t msg_len,
+				struct sctp_transport *transport,
+				struct sctp_sndrcvinfo *sinfo)
+{
+	struct sock *sk = asoc->base.sk;
+	struct net *net = sock_net(sk);
+	struct sctp_datamsg *datamsg;
+	bool wait_connect = false;
+	struct sctp_chunk *chunk;
+	long timeo;
+	int err;
+
 	if (sinfo->sinfo_stream >= asoc->stream.outcnt) {
 		err = -EINVAL;
-		goto out_free;
+		goto err;
 	}
 
-	/* Allocate sctp_stream_out_ext if not already done */
 	if (unlikely(!asoc->stream.out[sinfo->sinfo_stream].ext)) {
 		err = sctp_stream_init_ext(&asoc->stream, sinfo->sinfo_stream);
 		if (err)
-			goto out_free;
+			goto err;
 	}
 
+	if (sctp_sk(sk)->disable_fragments && msg_len > asoc->frag_point) {
+		err = -EMSGSIZE;
+		goto err;
+	}
+
+	if (asoc->pmtu_pending)
+		sctp_assoc_pending_pmtu(asoc);
+
 	if (sctp_wspace(asoc) < msg_len)
 		sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc));
 
-	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
 	if (!sctp_wspace(asoc)) {
-		/* sk can be changed by peel off when waiting for buf. */
+		timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
 		err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
-		if (err) {
-			if (err == -ESRCH) {
-				/* asoc is already dead. */
-				new_asoc = NULL;
-				err = -EPIPE;
-			}
-			goto out_free;
-		}
+		if (err)
+			goto err;
 	}
 
-	/* If an address is passed with the sendto/sendmsg call, it is used
-	 * to override the primary destination address in the TCP model, or
-	 * when SCTP_ADDR_OVER flag is set in the UDP model.
-	 */
-	if ((sctp_style(sk, TCP) && msg_name) ||
-	    (sinfo_flags & SCTP_ADDR_OVER)) {
-		chunk_tp = sctp_assoc_lookup_paddr(asoc, &to);
-		if (!chunk_tp) {
-			err = -EINVAL;
-			goto out_free;
-		}
-	} else
-		chunk_tp = NULL;
-
-	/* Auto-connect, if we aren't connected already. */
 	if (sctp_state(asoc, CLOSED)) {
 		err = sctp_primitive_ASSOCIATE(net, asoc, NULL);
-		if (err < 0)
-			goto out_free;
+		if (err)
+			goto err;
 
-		/* If stream interleave is enabled, wait_connect has to be
-		 * done earlier than data enqueue, as it needs to make data
-		 * or idata according to asoc->intl_enable which is set
-		 * after connection is done.
-		 */
-		if (sctp_sk(asoc->base.sk)->strm_interleave) {
+		if (sctp_sk(sk)->strm_interleave) {
 			timeo = sock_sndtimeo(sk, 0);
 			err = sctp_wait_for_connect(asoc, &timeo);
 			if (err)
-				goto out_unlock;
+				goto err;
 		} else {
 			wait_connect = true;
 		}
@@ -2003,73 +1913,186 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 		pr_debug("%s: we associated primitively\n", __func__);
 	}
 
-	/* Break the message into multiple chunks of maximum size. */
 	datamsg = sctp_datamsg_from_user(asoc, sinfo, &msg->msg_iter);
 	if (IS_ERR(datamsg)) {
 		err = PTR_ERR(datamsg);
-		goto out_free;
+		goto err;
 	}
+
 	asoc->force_delay = !!(msg->msg_flags & MSG_MORE);
 
-	/* Now send the (possibly) fragmented message. */
 	list_for_each_entry(chunk, &datamsg->chunks, frag_list) {
 		sctp_chunk_hold(chunk);
-
-		/* Do accounting for the write space.  */
 		sctp_set_owner_w(chunk);
-
-		chunk->transport = chunk_tp;
+		chunk->transport = transport;
 	}
 
-	/* Send it to the lower layers.  Note:  all chunks
-	 * must either fail or succeed.   The lower layer
-	 * works that way today.  Keep it that way or this
-	 * breaks.
-	 */
 	err = sctp_primitive_SEND(net, asoc, datamsg);
-	/* Did the lower layer accept the chunk? */
 	if (err) {
 		sctp_datamsg_free(datamsg);
-		goto out_free;
+		goto err;
 	}
 
 	pr_debug("%s: we sent primitively\n", __func__);
 
 	sctp_datamsg_put(datamsg);
-	err = msg_len;
 
 	if (unlikely(wait_connect)) {
-		timeo = sock_sndtimeo(sk, msg_flags & MSG_DONTWAIT);
+		timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
 		sctp_wait_for_connect(asoc, &timeo);
 	}
 
-	/* If we are already past ASSOCIATE, the lower
-	 * layers are responsible for association cleanup.
-	 */
-	goto out_unlock;
+	err = msg_len;
 
-out_free:
-	if (new_asoc)
-		sctp_association_free(asoc);
-out_unlock:
-	release_sock(sk);
+err:
+	return err;
+}
 
-out_nounlock:
-	return sctp_error(sk, msg_flags, err);
+static union sctp_addr *sctp_sendmsg_get_daddr(struct sock *sk,
+					       const struct msghdr *msg,
+					       struct sctp_cmsgs *cmsgs)
+{
+	union sctp_addr *daddr = NULL;
+	int err;
 
-#if 0
-do_sock_err:
-	if (msg_len)
-		err = msg_len;
-	else
-		err = sock_error(sk);
-	goto out;
+	if (!sctp_style(sk, UDP_HIGH_BANDWIDTH) && msg->msg_name) {
+		int len = msg->msg_namelen;
 
-do_interrupted:
-	if (msg_len)
-		err = msg_len;
-	goto out;
-#endif /* 0 */
+		if (len > sizeof(*daddr))
+			len = sizeof(*daddr);
+
+		daddr = (union sctp_addr *)msg->msg_name;
+
+		err = sctp_verify_addr(sk, daddr, len);
+		if (err)
+			return ERR_PTR(err);
+	}
+
+	return daddr;
+}
+
+static void sctp_sendmsg_update_sinfo(struct sctp_association *asoc,
+				      struct sctp_sndrcvinfo *sinfo,
+				      struct sctp_cmsgs *cmsgs)
+{
+	if (!cmsgs->srinfo && !cmsgs->sinfo) {
+		sinfo->sinfo_stream = asoc->default_stream;
+		sinfo->sinfo_ppid = asoc->default_ppid;
+		sinfo->sinfo_context = asoc->default_context;
+		sinfo->sinfo_assoc_id = sctp_assoc2id(asoc);
+
+		if (!cmsgs->prinfo)
+			sinfo->sinfo_flags = asoc->default_flags;
+	}
+
+	if (!cmsgs->srinfo && !cmsgs->prinfo)
+		sinfo->sinfo_timetolive = asoc->default_timetolive;
+
+	if (cmsgs->authinfo) {
+		/* Reuse sinfo_tsn to indicate that authinfo was set and
+		 * sinfo_ssn to save the keyid on tx path.
+		 */
+		sinfo->sinfo_tsn = 1;
+		sinfo->sinfo_ssn = cmsgs->authinfo->auth_keynumber;
+	}
+}
+
+static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
+{
+	struct sctp_endpoint *ep = sctp_sk(sk)->ep;
+	struct sctp_transport *transport = NULL;
+	struct sctp_sndrcvinfo _sinfo, *sinfo;
+	struct sctp_association *asoc;
+	struct sctp_cmsgs cmsgs;
+	union sctp_addr *daddr;
+	bool new = false;
+	__u16 sflags;
+	int err;
+
+	/* Parse and get snd_info */
+	err = sctp_sendmsg_parse(sk, &cmsgs, &_sinfo, msg, msg_len);
+	if (err)
+		goto out;
+
+	sinfo  = &_sinfo;
+	sflags = sinfo->sinfo_flags;
+
+	/* Get daddr from msg */
+	daddr = sctp_sendmsg_get_daddr(sk, msg, &cmsgs);
+	if (IS_ERR(daddr)) {
+		err = PTR_ERR(daddr);
+		goto out;
+	}
+
+	lock_sock(sk);
+
+	/* SCTP_SENDALL process */
+	if ((sflags & SCTP_SENDALL) && sctp_style(sk, UDP)) {
+		list_for_each_entry(asoc, &ep->asocs, asocs) {
+			err = sctp_sendmsg_check_sflags(asoc, sflags, msg,
+							msg_len);
+			if (err == 0)
+				continue;
+			if (err < 0)
+				goto out_unlock;
+
+			sctp_sendmsg_update_sinfo(asoc, sinfo, &cmsgs);
+
+			err = sctp_sendmsg_to_asoc(asoc, msg, msg_len,
+						   NULL, sinfo);
+			if (err < 0)
+				goto out_unlock;
+
+			iov_iter_revert(&msg->msg_iter, err);
+		}
+
+		goto out_unlock;
+	}
+
+	/* Get and check or create asoc */
+	if (daddr) {
+		asoc = sctp_endpoint_lookup_assoc(ep, daddr, &transport);
+		if (asoc) {
+			err = sctp_sendmsg_check_sflags(asoc, sflags, msg,
+							msg_len);
+			if (err <= 0)
+				goto out_unlock;
+		} else {
+			err = sctp_sendmsg_new_asoc(sk, sflags, &cmsgs, daddr,
+						    &transport);
+			if (err)
+				goto out_unlock;
+
+			asoc = transport->asoc;
+			new = true;
+		}
+
+		if (!sctp_style(sk, TCP) && !(sflags & SCTP_ADDR_OVER))
+			transport = NULL;
+	} else {
+		asoc = sctp_id2assoc(sk, sinfo->sinfo_assoc_id);
+		if (!asoc) {
+			err = -EPIPE;
+			goto out_unlock;
+		}
+
+		err = sctp_sendmsg_check_sflags(asoc, sflags, msg, msg_len);
+		if (err <= 0)
+			goto out_unlock;
+	}
+
+	/* Update snd_info with the asoc */
+	sctp_sendmsg_update_sinfo(asoc, sinfo, &cmsgs);
+
+	/* Send msg to the asoc */
+	err = sctp_sendmsg_to_asoc(asoc, msg, msg_len, transport, sinfo);
+	if (err < 0 && err != -ESRCH && new)
+		sctp_association_free(asoc);
+
+out_unlock:
+	release_sock(sk);
+out:
+	return sctp_error(sk, msg->msg_flags, err);
 }
 
 /* This is an extended version of skb_pull() that removes the data from the
@@ -3624,6 +3647,33 @@ static int sctp_setsockopt_del_key(struct sock *sk,
 }
 
 /*
+ * 8.3.4  Deactivate a Shared Key (SCTP_AUTH_DEACTIVATE_KEY)
+ *
+ * This set option will deactivate a shared secret key.
+ */
+static int sctp_setsockopt_deactivate_key(struct sock *sk, char __user *optval,
+					  unsigned int optlen)
+{
+	struct sctp_endpoint *ep = sctp_sk(sk)->ep;
+	struct sctp_authkeyid val;
+	struct sctp_association *asoc;
+
+	if (!ep->auth_enable)
+		return -EACCES;
+
+	if (optlen != sizeof(struct sctp_authkeyid))
+		return -EINVAL;
+	if (copy_from_user(&val, optval, optlen))
+		return -EFAULT;
+
+	asoc = sctp_id2assoc(sk, val.scact_assoc_id);
+	if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP))
+		return -EINVAL;
+
+	return sctp_auth_deact_key_id(ep, asoc, val.scact_keynumber);
+}
+
+/*
  * 8.1.23 SCTP_AUTO_ASCONF
  *
  * This option will enable or disable the use of the automatic generation of
@@ -4215,6 +4265,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
 	case SCTP_AUTH_DELETE_KEY:
 		retval = sctp_setsockopt_del_key(sk, optval, optlen);
 		break;
+	case SCTP_AUTH_DEACTIVATE_KEY:
+		retval = sctp_setsockopt_deactivate_key(sk, optval, optlen);
+		break;
 	case SCTP_AUTO_ASCONF:
 		retval = sctp_setsockopt_auto_asconf(sk, optval, optlen);
 		break;
@@ -7189,6 +7242,7 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
 	case SCTP_AUTH_KEY:
 	case SCTP_AUTH_CHUNK:
 	case SCTP_AUTH_DELETE_KEY:
+	case SCTP_AUTH_DEACTIVATE_KEY:
 		retval = -EOPNOTSUPP;
 		break;
 	case SCTP_HMAC_IDENT:
@@ -7811,8 +7865,8 @@ static int sctp_msghdr_parse(const struct msghdr *msg, struct sctp_cmsgs *cmsgs)
 
 			if (cmsgs->srinfo->sinfo_flags &
 			    ~(SCTP_UNORDERED | SCTP_ADDR_OVER |
-			      SCTP_SACK_IMMEDIATELY | SCTP_PR_SCTP_MASK |
-			      SCTP_ABORT | SCTP_EOF))
+			      SCTP_SACK_IMMEDIATELY | SCTP_SENDALL |
+			      SCTP_PR_SCTP_MASK | SCTP_ABORT | SCTP_EOF))
 				return -EINVAL;
 			break;
 
@@ -7835,10 +7889,60 @@ static int sctp_msghdr_parse(const struct msghdr *msg, struct sctp_cmsgs *cmsgs)
 
 			if (cmsgs->sinfo->snd_flags &
 			    ~(SCTP_UNORDERED | SCTP_ADDR_OVER |
-			      SCTP_SACK_IMMEDIATELY | SCTP_PR_SCTP_MASK |
-			      SCTP_ABORT | SCTP_EOF))
+			      SCTP_SACK_IMMEDIATELY | SCTP_SENDALL |
+			      SCTP_PR_SCTP_MASK | SCTP_ABORT | SCTP_EOF))
 				return -EINVAL;
 			break;
+		case SCTP_PRINFO:
+			/* SCTP Socket API Extension
+			 * 5.3.7 SCTP PR-SCTP Information Structure (SCTP_PRINFO)
+			 *
+			 * This cmsghdr structure specifies SCTP options for sendmsg().
+			 *
+			 * cmsg_level    cmsg_type      cmsg_data[]
+			 * ------------  ------------   ---------------------
+			 * IPPROTO_SCTP  SCTP_PRINFO    struct sctp_prinfo
+			 */
+			if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct sctp_prinfo)))
+				return -EINVAL;
+
+			cmsgs->prinfo = CMSG_DATA(cmsg);
+			if (cmsgs->prinfo->pr_policy & ~SCTP_PR_SCTP_MASK)
+				return -EINVAL;
+
+			if (cmsgs->prinfo->pr_policy == SCTP_PR_SCTP_NONE)
+				cmsgs->prinfo->pr_value = 0;
+			break;
+		case SCTP_AUTHINFO:
+			/* SCTP Socket API Extension
+			 * 5.3.8 SCTP AUTH Information Structure (SCTP_AUTHINFO)
+			 *
+			 * This cmsghdr structure specifies SCTP options for sendmsg().
+			 *
+			 * cmsg_level    cmsg_type      cmsg_data[]
+			 * ------------  ------------   ---------------------
+			 * IPPROTO_SCTP  SCTP_AUTHINFO  struct sctp_authinfo
+			 */
+			if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct sctp_authinfo)))
+				return -EINVAL;
+
+			cmsgs->authinfo = CMSG_DATA(cmsg);
+			break;
+		case SCTP_DSTADDRV4:
+		case SCTP_DSTADDRV6:
+			/* SCTP Socket API Extension
+			 * 5.3.9/10 SCTP Destination IPv4/6 Address Structure (SCTP_DSTADDRV4/6)
+			 *
+			 * This cmsghdr structure specifies SCTP options for sendmsg().
+			 *
+			 * cmsg_level    cmsg_type         cmsg_data[]
+			 * ------------  ------------   ---------------------
+			 * IPPROTO_SCTP  SCTP_DSTADDRV4 struct in_addr
+			 * ------------  ------------   ---------------------
+			 * IPPROTO_SCTP  SCTP_DSTADDRV6 struct in6_addr
+			 */
+			cmsgs->addrs_msg = my_msg;
+			break;
 		default:
 			return -EINVAL;
 		}
@@ -8062,6 +8166,26 @@ static void sctp_wfree(struct sk_buff *skb)
 	sk->sk_wmem_queued   -= skb->truesize;
 	sk_mem_uncharge(sk, skb->truesize);
 
+	if (chunk->shkey) {
+		struct sctp_shared_key *shkey = chunk->shkey;
+
+		/* refcnt == 2 and !list_empty mean after this release, it's
+		 * not being used anywhere, and it's time to notify userland
+		 * that this shkey can be freed if it's been deactivated.
+		 */
+		if (shkey->deactivated && !list_empty(&shkey->key_list) &&
+		    refcount_read(&shkey->refcnt) == 2) {
+			struct sctp_ulpevent *ev;
+
+			ev = sctp_ulpevent_make_authkey(asoc, shkey->key_id,
+							SCTP_AUTH_FREE_KEY,
+							GFP_KERNEL);
+			if (ev)
+				asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
+		}
+		sctp_auth_shkey_release(chunk->shkey);
+	}
+
 	sock_wfree(skb);
 	sctp_wake_up_waiters(sk, asoc);
 
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 1e0d780855c3..5f8046c62d90 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -7,13 +7,11 @@
  *  applicable with RoCE-cards only
  *
  *  Initial restrictions:
- *    - non-blocking connect postponed
- *    - IPv6 support postponed
  *    - support for alternate links postponed
  *    - partial support for non-blocking sockets only
  *    - support for urgent data postponed
  *
- *  Copyright IBM Corp. 2016
+ *  Copyright IBM Corp. 2016, 2018
  *
  *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
  *              based on prototype from Frank Blaschka
@@ -24,7 +22,6 @@
 
 #include <linux/module.h>
 #include <linux/socket.h>
-#include <linux/inetdevice.h>
 #include <linux/workqueue.h>
 #include <linux/in.h>
 #include <linux/sched/signal.h>
@@ -66,6 +63,10 @@ static struct smc_hashinfo smc_v4_hashinfo = {
 	.lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock),
 };
 
+static struct smc_hashinfo smc_v6_hashinfo = {
+	.lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock),
+};
+
 int smc_hash_sk(struct sock *sk)
 {
 	struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
@@ -105,6 +106,18 @@ struct proto smc_proto = {
 };
 EXPORT_SYMBOL_GPL(smc_proto);
 
+struct proto smc_proto6 = {
+	.name		= "SMC6",
+	.owner		= THIS_MODULE,
+	.keepalive	= smc_set_keepalive,
+	.hash		= smc_hash_sk,
+	.unhash		= smc_unhash_sk,
+	.obj_size	= sizeof(struct smc_sock),
+	.h.smc_hash	= &smc_v6_hashinfo,
+	.slab_flags	= SLAB_TYPESAFE_BY_RCU,
+};
+EXPORT_SYMBOL_GPL(smc_proto6);
+
 static int smc_release(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
@@ -161,19 +174,22 @@ static void smc_destruct(struct sock *sk)
 	sk_refcnt_debug_dec(sk);
 }
 
-static struct sock *smc_sock_alloc(struct net *net, struct socket *sock)
+static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
+				   int protocol)
 {
 	struct smc_sock *smc;
+	struct proto *prot;
 	struct sock *sk;
 
-	sk = sk_alloc(net, PF_SMC, GFP_KERNEL, &smc_proto, 0);
+	prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
+	sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
 	if (!sk)
 		return NULL;
 
 	sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
 	sk->sk_state = SMC_INIT;
 	sk->sk_destruct = smc_destruct;
-	sk->sk_protocol = SMCPROTO_SMC;
+	sk->sk_protocol = protocol;
 	smc = smc_sk(sk);
 	INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
 	INIT_LIST_HEAD(&smc->accept_q);
@@ -200,10 +216,13 @@ static int smc_bind(struct socket *sock, struct sockaddr *uaddr,
 		goto out;
 
 	rc = -EAFNOSUPPORT;
+	if (addr->sin_family != AF_INET &&
+	    addr->sin_family != AF_INET6 &&
+	    addr->sin_family != AF_UNSPEC)
+		goto out;
 	/* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */
-	if ((addr->sin_family != AF_INET) &&
-	    ((addr->sin_family != AF_UNSPEC) ||
-	     (addr->sin_addr.s_addr != htonl(INADDR_ANY))))
+	if (addr->sin_family == AF_UNSPEC &&
+	    addr->sin_addr.s_addr != htonl(INADDR_ANY))
 		goto out;
 
 	lock_sock(sk);
@@ -273,47 +292,7 @@ static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
 	smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
 }
 
-/* determine subnet and mask of internal TCP socket */
-int smc_netinfo_by_tcpsk(struct socket *clcsock,
-			 __be32 *subnet, u8 *prefix_len)
-{
-	struct dst_entry *dst = sk_dst_get(clcsock->sk);
-	struct in_device *in_dev;
-	struct sockaddr_in addr;
-	int rc = -ENOENT;
-	int len;
-
-	if (!dst) {
-		rc = -ENOTCONN;
-		goto out;
-	}
-	if (!dst->dev) {
-		rc = -ENODEV;
-		goto out_rel;
-	}
-
-	/* get address to which the internal TCP socket is bound */
-	kernel_getsockname(clcsock, (struct sockaddr *)&addr, &len);
-	/* analyze IPv4 specific data of net_device belonging to TCP socket */
-	rcu_read_lock();
-	in_dev = __in_dev_get_rcu(dst->dev);
-	for_ifa(in_dev) {
-		if (!inet_ifa_match(addr.sin_addr.s_addr, ifa))
-			continue;
-		*prefix_len = inet_mask_len(ifa->ifa_mask);
-		*subnet = ifa->ifa_address & ifa->ifa_mask;
-		rc = 0;
-		break;
-	} endfor_ifa(in_dev);
-	rcu_read_unlock();
-
-out_rel:
-	dst_release(dst);
-out:
-	return rc;
-}
-
-static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid)
+static int smc_clnt_conf_first_link(struct smc_sock *smc)
 {
 	struct smc_link_group *lgr = smc->conn.lgr;
 	struct smc_link *link;
@@ -333,6 +312,9 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid)
 		return rc;
 	}
 
+	if (link->llc_confirm_rc)
+		return SMC_CLC_DECL_RMBE_EC;
+
 	rc = smc_ib_modify_qp_rts(link);
 	if (rc)
 		return SMC_CLC_DECL_INTERR;
@@ -347,11 +329,33 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid)
 	/* send CONFIRM LINK response over RoCE fabric */
 	rc = smc_llc_send_confirm_link(link,
 				       link->smcibdev->mac[link->ibport - 1],
-				       gid, SMC_LLC_RESP);
+				       &link->smcibdev->gid[link->ibport - 1],
+				       SMC_LLC_RESP);
 	if (rc < 0)
 		return SMC_CLC_DECL_TCL;
 
-	return rc;
+	/* receive ADD LINK request from server over RoCE fabric */
+	rest = wait_for_completion_interruptible_timeout(&link->llc_add,
+							 SMC_LLC_WAIT_TIME);
+	if (rest <= 0) {
+		struct smc_clc_msg_decline dclc;
+
+		rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
+				      SMC_CLC_DECLINE);
+		return rc;
+	}
+
+	/* send add link reject message, only one link supported for now */
+	rc = smc_llc_send_add_link(link,
+				   link->smcibdev->mac[link->ibport - 1],
+				   &link->smcibdev->gid[link->ibport - 1],
+				   SMC_LLC_RESP);
+	if (rc < 0)
+		return SMC_CLC_DECL_TCL;
+
+	link->state = SMC_LNK_ACTIVE;
+
+	return 0;
 }
 
 static void smc_conn_save_peer_info(struct smc_sock *smc,
@@ -373,19 +377,9 @@ static void smc_link_save_peer_info(struct smc_link *link,
 	link->peer_mtu = clc->qp_mtu;
 }
 
-static void smc_lgr_forget(struct smc_link_group *lgr)
-{
-	spin_lock_bh(&smc_lgr_list.lock);
-	/* do not use this link group for new connections */
-	if (!list_empty(&lgr->list))
-		list_del_init(&lgr->list);
-	spin_unlock_bh(&smc_lgr_list.lock);
-}
-
 /* setup for RDMA connection of client */
 static int smc_connect_rdma(struct smc_sock *smc)
 {
-	struct sockaddr_in *inaddr = (struct sockaddr_in *)smc->addr;
 	struct smc_clc_msg_accept_confirm aclc;
 	int local_contact = SMC_FIRST_CONTACT;
 	struct smc_ib_device *smcibdev;
@@ -439,8 +433,8 @@ static int smc_connect_rdma(struct smc_sock *smc)
 
 	srv_first_contact = aclc.hdr.flag;
 	mutex_lock(&smc_create_lgr_pending);
-	local_contact = smc_conn_create(smc, inaddr->sin_addr.s_addr, smcibdev,
-					ibport, &aclc.lcl, srv_first_contact);
+	local_contact = smc_conn_create(smc, smcibdev, ibport, &aclc.lcl,
+					srv_first_contact);
 	if (local_contact < 0) {
 		rc = local_contact;
 		if (rc == -ENOMEM)
@@ -499,8 +493,7 @@ static int smc_connect_rdma(struct smc_sock *smc)
 
 	if (local_contact == SMC_FIRST_CONTACT) {
 		/* QP confirmation over RoCE fabric */
-		reason_code = smc_clnt_conf_first_link(
-			smc, &smcibdev->gid[ibport - 1]);
+		reason_code = smc_clnt_conf_first_link(smc);
 		if (reason_code < 0) {
 			rc = reason_code;
 			goto out_err_unlock;
@@ -557,9 +550,8 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
 	/* separate smc parameter checking to be safe */
 	if (alen < sizeof(addr->sa_family))
 		goto out_err;
-	if (addr->sa_family != AF_INET)
+	if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
 		goto out_err;
-	smc->addr = addr;	/* needed for nonblocking connect */
 
 	lock_sock(sk);
 	switch (sk->sk_state) {
@@ -600,7 +592,7 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
 	int rc;
 
 	release_sock(lsk);
-	new_sk = smc_sock_alloc(sock_net(lsk), NULL);
+	new_sk = smc_sock_alloc(sock_net(lsk), NULL, lsk->sk_protocol);
 	if (!new_sk) {
 		rc = -ENOMEM;
 		lsk->sk_err = ENOMEM;
@@ -749,9 +741,34 @@ static int smc_serv_conf_first_link(struct smc_sock *smc)
 
 		rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
 				      SMC_CLC_DECLINE);
+		return rc;
 	}
 
-	return rc;
+	if (link->llc_confirm_resp_rc)
+		return SMC_CLC_DECL_RMBE_EC;
+
+	/* send ADD LINK request to client over the RoCE fabric */
+	rc = smc_llc_send_add_link(link,
+				   link->smcibdev->mac[link->ibport - 1],
+				   &link->smcibdev->gid[link->ibport - 1],
+				   SMC_LLC_REQ);
+	if (rc < 0)
+		return SMC_CLC_DECL_TCL;
+
+	/* receive ADD LINK response from client over the RoCE fabric */
+	rest = wait_for_completion_interruptible_timeout(&link->llc_add_resp,
+							 SMC_LLC_WAIT_TIME);
+	if (rest <= 0) {
+		struct smc_clc_msg_decline dclc;
+
+		rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
+				      SMC_CLC_DECLINE);
+		return rc;
+	}
+
+	link->state = SMC_LNK_ACTIVE;
+
+	return 0;
 }
 
 /* setup for RDMA connection of server */
@@ -767,13 +784,10 @@ static void smc_listen_work(struct work_struct *work)
 	struct sock *newsmcsk = &new_smc->sk;
 	struct smc_clc_msg_proposal *pclc;
 	struct smc_ib_device *smcibdev;
-	struct sockaddr_in peeraddr;
 	u8 buf[SMC_CLC_MAX_LEN];
 	struct smc_link *link;
 	int reason_code = 0;
-	int rc = 0, len;
-	__be32 subnet;
-	u8 prefix_len;
+	int rc = 0;
 	u8 ibport;
 
 	/* check if peer is smc capable */
@@ -808,28 +822,19 @@ static void smc_listen_work(struct work_struct *work)
 		goto decline_rdma;
 	}
 
-	/* determine subnet and mask from internal TCP socket */
-	rc = smc_netinfo_by_tcpsk(newclcsock, &subnet, &prefix_len);
-	if (rc) {
-		reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
-		goto decline_rdma;
-	}
-
 	pclc = (struct smc_clc_msg_proposal *)&buf;
 	pclc_prfx = smc_clc_proposal_get_prefix(pclc);
-	if (pclc_prfx->outgoing_subnet != subnet ||
-	    pclc_prfx->prefix_len != prefix_len) {
+
+	rc = smc_clc_prfx_match(newclcsock, pclc_prfx);
+	if (rc) {
 		reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
 		goto decline_rdma;
 	}
 
-	/* get address of the peer connected to the internal TCP socket */
-	kernel_getpeername(newclcsock, (struct sockaddr *)&peeraddr, &len);
-
 	/* allocate connection / link group */
 	mutex_lock(&smc_create_lgr_pending);
-	local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr,
-					smcibdev, ibport, &pclc->lcl, 0);
+	local_contact = smc_conn_create(new_smc, smcibdev, ibport, &pclc->lcl,
+					0);
 	if (local_contact < 0) {
 		rc = local_contact;
 		if (rc == -ENOMEM)
@@ -1071,7 +1076,7 @@ out:
 }
 
 static int smc_getname(struct socket *sock, struct sockaddr *addr,
-		       int *len, int peer)
+		       int peer)
 {
 	struct smc_sock *smc;
 
@@ -1081,7 +1086,7 @@ static int smc_getname(struct socket *sock, struct sockaddr *addr,
 
 	smc = smc_sk(sock->sk);
 
-	return smc->clcsock->ops->getname(smc->clcsock, addr, len, peer);
+	return smc->clcsock->ops->getname(smc->clcsock, addr, peer);
 }
 
 static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
@@ -1379,6 +1384,7 @@ static const struct proto_ops smc_sock_ops = {
 static int smc_create(struct net *net, struct socket *sock, int protocol,
 		      int kern)
 {
+	int family = (protocol == SMCPROTO_SMC6) ? PF_INET6 : PF_INET;
 	struct smc_sock *smc;
 	struct sock *sk;
 	int rc;
@@ -1388,20 +1394,20 @@ static int smc_create(struct net *net, struct socket *sock, int protocol,
 		goto out;
 
 	rc = -EPROTONOSUPPORT;
-	if ((protocol != IPPROTO_IP) && (protocol != IPPROTO_TCP))
+	if (protocol != SMCPROTO_SMC && protocol != SMCPROTO_SMC6)
 		goto out;
 
 	rc = -ENOBUFS;
 	sock->ops = &smc_sock_ops;
-	sk = smc_sock_alloc(net, sock);
+	sk = smc_sock_alloc(net, sock, protocol);
 	if (!sk)
 		goto out;
 
 	/* create internal TCP socket for CLC handshake and fallback */
 	smc = smc_sk(sk);
 	smc->use_fallback = false; /* assume rdma capability first */
-	rc = sock_create_kern(net, PF_INET, SOCK_STREAM,
-			      IPPROTO_TCP, &smc->clcsock);
+	rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
+			      &smc->clcsock);
 	if (rc) {
 		sk_common_release(sk);
 		goto out;
@@ -1441,16 +1447,23 @@ static int __init smc_init(void)
 
 	rc = proto_register(&smc_proto, 1);
 	if (rc) {
-		pr_err("%s: proto_register fails with %d\n", __func__, rc);
+		pr_err("%s: proto_register(v4) fails with %d\n", __func__, rc);
 		goto out_pnet;
 	}
 
+	rc = proto_register(&smc_proto6, 1);
+	if (rc) {
+		pr_err("%s: proto_register(v6) fails with %d\n", __func__, rc);
+		goto out_proto;
+	}
+
 	rc = sock_register(&smc_sock_family_ops);
 	if (rc) {
 		pr_err("%s: sock_register fails with %d\n", __func__, rc);
-		goto out_proto;
+		goto out_proto6;
 	}
 	INIT_HLIST_HEAD(&smc_v4_hashinfo.ht);
+	INIT_HLIST_HEAD(&smc_v6_hashinfo.ht);
 
 	rc = smc_ib_register_client();
 	if (rc) {
@@ -1463,6 +1476,8 @@ static int __init smc_init(void)
 
 out_sock:
 	sock_unregister(PF_SMC);
+out_proto6:
+	proto_unregister(&smc_proto6);
 out_proto:
 	proto_unregister(&smc_proto);
 out_pnet:
@@ -1481,11 +1496,13 @@ static void __exit smc_exit(void)
 	spin_unlock_bh(&smc_lgr_list.lock);
 	list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) {
 		list_del_init(&lgr->list);
+		cancel_delayed_work_sync(&lgr->free_work);
 		smc_lgr_free(lgr); /* free link group */
 	}
 	static_branch_disable(&tcp_have_smc);
 	smc_ib_unregister_client();
 	sock_unregister(PF_SMC);
+	proto_unregister(&smc_proto6);
 	proto_unregister(&smc_proto);
 	smc_pnet_exit();
 }
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 9518986c97b1..e4829a2f46ba 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -18,11 +18,13 @@
 
 #include "smc_ib.h"
 
-#define SMCPROTO_SMC		0	/* SMC protocol */
+#define SMCPROTO_SMC		0	/* SMC protocol, IPv4 */
+#define SMCPROTO_SMC6		1	/* SMC protocol, IPv6 */
 
 #define SMC_MAX_PORTS		2	/* Max # of ports */
 
 extern struct proto smc_proto;
+extern struct proto smc_proto6;
 
 #ifdef ATOMIC64_INIT
 #define KERNEL_HAS_ATOMIC64
@@ -172,7 +174,6 @@ struct smc_sock {				/* smc sock container */
 	struct sock		sk;
 	struct socket		*clcsock;	/* internal tcp socket */
 	struct smc_connection	conn;		/* smc connection */
-	struct sockaddr		*addr;		/* inet connect address */
 	struct smc_sock		*listen_smc;	/* listen parent */
 	struct work_struct	tcp_listen_work;/* handle tcp socket accepts */
 	struct work_struct	smc_listen_work;/* prepare new accept socket */
@@ -263,10 +264,8 @@ static inline bool using_ipsec(struct smc_sock *smc)
 
 struct smc_clc_msg_local;
 
-int smc_netinfo_by_tcpsk(struct socket *clcsock, __be32 *subnet,
-			 u8 *prefix_len);
 void smc_conn_free(struct smc_connection *conn);
-int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr,
+int smc_conn_create(struct smc_sock *smc,
 		    struct smc_ib_device *smcibdev, u8 ibport,
 		    struct smc_clc_msg_local *lcl, int srv_first_contact);
 struct sock *smc_accept_dequeue(struct sock *parent, struct socket *new_sock);
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 8ac51583a063..64fbc3230e6c 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -5,15 +5,17 @@
  *  CLC (connection layer control) handshake over initial TCP socket to
  *  prepare for RDMA traffic
  *
- *  Copyright IBM Corp. 2016
+ *  Copyright IBM Corp. 2016, 2018
  *
  *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
  */
 
 #include <linux/in.h>
+#include <linux/inetdevice.h>
 #include <linux/if_ether.h>
 #include <linux/sched/signal.h>
 
+#include <net/addrconf.h>
 #include <net/sock.h>
 #include <net/tcp.h>
 
@@ -22,6 +24,9 @@
 #include "smc_clc.h"
 #include "smc_ib.h"
 
+/* eye catcher "SMCR" EBCDIC for CLC messages */
+static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
+
 /* check if received message has a correct header length and contains valid
  * heading and trailing eyecatchers
  */
@@ -70,6 +75,172 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)
 	return true;
 }
 
+/* find ipv4 addr on device and get the prefix len, fill CLC proposal msg */
+static int smc_clc_prfx_set4_rcu(struct dst_entry *dst, __be32 ipv4,
+				 struct smc_clc_msg_proposal_prefix *prop)
+{
+	struct in_device *in_dev = __in_dev_get_rcu(dst->dev);
+
+	if (!in_dev)
+		return -ENODEV;
+	for_ifa(in_dev) {
+		if (!inet_ifa_match(ipv4, ifa))
+			continue;
+		prop->prefix_len = inet_mask_len(ifa->ifa_mask);
+		prop->outgoing_subnet = ifa->ifa_address & ifa->ifa_mask;
+		/* prop->ipv6_prefixes_cnt = 0; already done by memset before */
+		return 0;
+	} endfor_ifa(in_dev);
+	return -ENOENT;
+}
+
+/* fill CLC proposal msg with ipv6 prefixes from device */
+static int smc_clc_prfx_set6_rcu(struct dst_entry *dst,
+				 struct smc_clc_msg_proposal_prefix *prop,
+				 struct smc_clc_ipv6_prefix *ipv6_prfx)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+	struct inet6_dev *in6_dev = __in6_dev_get(dst->dev);
+	struct inet6_ifaddr *ifa;
+	int cnt = 0;
+
+	if (!in6_dev)
+		return -ENODEV;
+	/* use a maximum of 8 IPv6 prefixes from device */
+	list_for_each_entry(ifa, &in6_dev->addr_list, if_list) {
+		if (ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)
+			continue;
+		ipv6_addr_prefix(&ipv6_prfx[cnt].prefix,
+				 &ifa->addr, ifa->prefix_len);
+		ipv6_prfx[cnt].prefix_len = ifa->prefix_len;
+		cnt++;
+		if (cnt == SMC_CLC_MAX_V6_PREFIX)
+			break;
+	}
+	prop->ipv6_prefixes_cnt = cnt;
+	if (cnt)
+		return 0;
+#endif
+	return -ENOENT;
+}
+
+/* retrieve and set prefixes in CLC proposal msg */
+static int smc_clc_prfx_set(struct socket *clcsock,
+			    struct smc_clc_msg_proposal_prefix *prop,
+			    struct smc_clc_ipv6_prefix *ipv6_prfx)
+{
+	struct dst_entry *dst = sk_dst_get(clcsock->sk);
+	struct sockaddr_storage addrs;
+	struct sockaddr_in6 *addr6;
+	struct sockaddr_in *addr;
+	int rc = -ENOENT;
+
+	memset(prop, 0, sizeof(*prop));
+	if (!dst) {
+		rc = -ENOTCONN;
+		goto out;
+	}
+	if (!dst->dev) {
+		rc = -ENODEV;
+		goto out_rel;
+	}
+	/* get address to which the internal TCP socket is bound */
+	kernel_getsockname(clcsock, (struct sockaddr *)&addrs);
+	/* analyze IP specific data of net_device belonging to TCP socket */
+	addr6 = (struct sockaddr_in6 *)&addrs;
+	rcu_read_lock();
+	if (addrs.ss_family == PF_INET) {
+		/* IPv4 */
+		addr = (struct sockaddr_in *)&addrs;
+		rc = smc_clc_prfx_set4_rcu(dst, addr->sin_addr.s_addr, prop);
+	} else if (ipv6_addr_v4mapped(&addr6->sin6_addr)) {
+		/* mapped IPv4 address - peer is IPv4 only */
+		rc = smc_clc_prfx_set4_rcu(dst, addr6->sin6_addr.s6_addr32[3],
+					   prop);
+	} else {
+		/* IPv6 */
+		rc = smc_clc_prfx_set6_rcu(dst, prop, ipv6_prfx);
+	}
+	rcu_read_unlock();
+out_rel:
+	dst_release(dst);
+out:
+	return rc;
+}
+
+/* match ipv4 addrs of dev against addr in CLC proposal */
+static int smc_clc_prfx_match4_rcu(struct net_device *dev,
+				   struct smc_clc_msg_proposal_prefix *prop)
+{
+	struct in_device *in_dev = __in_dev_get_rcu(dev);
+
+	if (!in_dev)
+		return -ENODEV;
+	for_ifa(in_dev) {
+		if (prop->prefix_len == inet_mask_len(ifa->ifa_mask) &&
+		    inet_ifa_match(prop->outgoing_subnet, ifa))
+			return 0;
+	} endfor_ifa(in_dev);
+
+	return -ENOENT;
+}
+
+/* match ipv6 addrs of dev against addrs in CLC proposal */
+static int smc_clc_prfx_match6_rcu(struct net_device *dev,
+				   struct smc_clc_msg_proposal_prefix *prop)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+	struct inet6_dev *in6_dev = __in6_dev_get(dev);
+	struct smc_clc_ipv6_prefix *ipv6_prfx;
+	struct inet6_ifaddr *ifa;
+	int i, max;
+
+	if (!in6_dev)
+		return -ENODEV;
+	/* ipv6 prefix list starts behind smc_clc_msg_proposal_prefix */
+	ipv6_prfx = (struct smc_clc_ipv6_prefix *)((u8 *)prop + sizeof(*prop));
+	max = min_t(u8, prop->ipv6_prefixes_cnt, SMC_CLC_MAX_V6_PREFIX);
+	list_for_each_entry(ifa, &in6_dev->addr_list, if_list) {
+		if (ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)
+			continue;
+		for (i = 0; i < max; i++) {
+			if (ifa->prefix_len == ipv6_prfx[i].prefix_len &&
+			    ipv6_prefix_equal(&ifa->addr, &ipv6_prfx[i].prefix,
+					      ifa->prefix_len))
+				return 0;
+		}
+	}
+#endif
+	return -ENOENT;
+}
+
+/* check if proposed prefixes match one of our device prefixes */
+int smc_clc_prfx_match(struct socket *clcsock,
+		       struct smc_clc_msg_proposal_prefix *prop)
+{
+	struct dst_entry *dst = sk_dst_get(clcsock->sk);
+	int rc;
+
+	if (!dst) {
+		rc = -ENOTCONN;
+		goto out;
+	}
+	if (!dst->dev) {
+		rc = -ENODEV;
+		goto out_rel;
+	}
+	rcu_read_lock();
+	if (!prop->ipv6_prefixes_cnt)
+		rc = smc_clc_prfx_match4_rcu(dst->dev, prop);
+	else
+		rc = smc_clc_prfx_match6_rcu(dst->dev, prop);
+	rcu_read_unlock();
+out_rel:
+	dst_release(dst);
+out:
+	return rc;
+}
+
 /* Wait for data on the tcp-socket, analyze received data
  * Returns:
  * 0 if success and it was not a decline that we received.
@@ -189,16 +360,24 @@ int smc_clc_send_proposal(struct smc_sock *smc,
 			  struct smc_ib_device *smcibdev,
 			  u8 ibport)
 {
+	struct smc_clc_ipv6_prefix ipv6_prfx[SMC_CLC_MAX_V6_PREFIX];
 	struct smc_clc_msg_proposal_prefix pclc_prfx;
 	struct smc_clc_msg_proposal pclc;
 	struct smc_clc_msg_trail trl;
+	int len, i, plen, rc;
 	int reason_code = 0;
-	struct kvec vec[3];
+	struct kvec vec[4];
 	struct msghdr msg;
-	int len, plen, rc;
+
+	/* retrieve ip prefixes for CLC proposal msg */
+	rc = smc_clc_prfx_set(smc->clcsock, &pclc_prfx, ipv6_prfx);
+	if (rc)
+		return SMC_CLC_DECL_CNFERR; /* configuration error */
 
 	/* send SMC Proposal CLC message */
-	plen = sizeof(pclc) + sizeof(pclc_prfx) + sizeof(trl);
+	plen = sizeof(pclc) + sizeof(pclc_prfx) +
+	       (pclc_prfx.ipv6_prefixes_cnt * sizeof(ipv6_prfx[0])) +
+	       sizeof(trl);
 	memset(&pclc, 0, sizeof(pclc));
 	memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
 	pclc.hdr.type = SMC_CLC_PROPOSAL;
@@ -209,23 +388,22 @@ int smc_clc_send_proposal(struct smc_sock *smc,
 	memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN);
 	pclc.iparea_offset = htons(0);
 
-	memset(&pclc_prfx, 0, sizeof(pclc_prfx));
-	/* determine subnet and mask from internal TCP socket */
-	rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet,
-				  &pclc_prfx.prefix_len);
-	if (rc)
-		return SMC_CLC_DECL_CNFERR; /* configuration error */
-	pclc_prfx.ipv6_prefixes_cnt = 0;
 	memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
 	memset(&msg, 0, sizeof(msg));
-	vec[0].iov_base = &pclc;
-	vec[0].iov_len = sizeof(pclc);
-	vec[1].iov_base = &pclc_prfx;
-	vec[1].iov_len = sizeof(pclc_prfx);
-	vec[2].iov_base = &trl;
-	vec[2].iov_len = sizeof(trl);
+	i = 0;
+	vec[i].iov_base = &pclc;
+	vec[i++].iov_len = sizeof(pclc);
+	vec[i].iov_base = &pclc_prfx;
+	vec[i++].iov_len = sizeof(pclc_prfx);
+	if (pclc_prfx.ipv6_prefixes_cnt > 0) {
+		vec[i].iov_base = &ipv6_prfx[0];
+		vec[i++].iov_len = pclc_prfx.ipv6_prefixes_cnt *
+				   sizeof(ipv6_prfx[0]);
+	}
+	vec[i].iov_base = &trl;
+	vec[i++].iov_len = sizeof(trl);
 	/* due to the few bytes needed for clc-handshake this cannot block */
-	len = kernel_sendmsg(smc->clcsock, &msg, vec, 3, plen);
+	len = kernel_sendmsg(smc->clcsock, &msg, vec, i, plen);
 	if (len < sizeof(pclc)) {
 		if (len >= 0) {
 			reason_code = -ENETUNREACH;
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index c145a0f36a68..63bf1dc2c1f9 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -22,9 +22,6 @@
 #define SMC_CLC_CONFIRM		0x03
 #define SMC_CLC_DECLINE		0x04
 
-/* eye catcher "SMCR" EBCDIC for CLC messages */
-static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
-
 #define SMC_CLC_V1		0x1		/* SMC version                */
 #define CLC_WAIT_TIME		(6 * HZ)	/* max. wait time on clcsock  */
 #define SMC_CLC_DECL_MEM	0x01010000  /* insufficient memory resources  */
@@ -36,6 +33,7 @@ static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
 #define SMC_CLC_DECL_INTERR	0x99990000  /* internal error                 */
 #define SMC_CLC_DECL_TCL	0x02040000  /* timeout w4 QP confirm          */
 #define SMC_CLC_DECL_SEND	0x07000000  /* sending problem                */
+#define SMC_CLC_DECL_RMBE_EC	0x08000000  /* peer has eyecatcher in RMBE    */
 
 struct smc_clc_msg_hdr {	/* header1 of clc messages */
 	u8 eyecatcher[4];	/* eye catcher */
@@ -62,10 +60,15 @@ struct smc_clc_msg_local {	/* header2 of clc messages */
 	u8 mac[6];		/* mac of ib_device port */
 };
 
+#define SMC_CLC_MAX_V6_PREFIX	8
+
+/* Struct would be 4 byte aligned, but it is used in an array that is sent
+ * to peers and must conform to RFC7609, hence we need to use packed here.
+ */
 struct smc_clc_ipv6_prefix {
-	u8 prefix[4];
+	struct in6_addr prefix;
 	u8 prefix_len;
-} __packed;
+} __packed;			/* format defined in RFC7609 */
 
 struct smc_clc_msg_proposal_prefix {	/* prefix part of clc proposal message*/
 	__be32 outgoing_subnet;	/* subnet mask */
@@ -81,9 +84,11 @@ struct smc_clc_msg_proposal {	/* clc proposal message sent by Linux */
 } __aligned(4);
 
 #define SMC_CLC_PROPOSAL_MAX_OFFSET	0x28
-#define SMC_CLC_PROPOSAL_MAX_PREFIX	(8 * sizeof(struct smc_clc_ipv6_prefix))
+#define SMC_CLC_PROPOSAL_MAX_PREFIX	(SMC_CLC_MAX_V6_PREFIX * \
+					 sizeof(struct smc_clc_ipv6_prefix))
 #define SMC_CLC_MAX_LEN		(sizeof(struct smc_clc_msg_proposal) + \
 				 SMC_CLC_PROPOSAL_MAX_OFFSET + \
+				 sizeof(struct smc_clc_msg_proposal_prefix) + \
 				 SMC_CLC_PROPOSAL_MAX_PREFIX + \
 				 sizeof(struct smc_clc_msg_trail))
 
@@ -124,9 +129,8 @@ smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc)
 	       ((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset));
 }
 
-struct smc_sock;
-struct smc_ib_device;
-
+int smc_clc_prfx_match(struct socket *clcsock,
+		       struct smc_clc_msg_proposal_prefix *prop);
 int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
 		     u8 expected_type);
 int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info);
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 645dd226177b..f44f6803f7ff 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -32,6 +32,17 @@
 
 static u32 smc_lgr_num;			/* unique link group number */
 
+static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
+{
+	/* client link group creation always follows the server link group
+	 * creation. For client use a somewhat higher removal delay time,
+	 * otherwise there is a risk of out-of-sync link groups.
+	 */
+	mod_delayed_work(system_wq, &lgr->free_work,
+			 lgr->role == SMC_CLNT ? SMC_LGR_FREE_DELAY_CLNT :
+						 SMC_LGR_FREE_DELAY_SERV);
+}
+
 /* Register connection's alert token in our lookup structure.
  * To use rbtrees we have to implement our own insert core.
  * Requires @conns_lock
@@ -111,13 +122,7 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn)
 	write_unlock_bh(&lgr->conns_lock);
 	if (!reduced || lgr->conns_num)
 		return;
-	/* client link group creation always follows the server link group
-	 * creation. For client use a somewhat higher removal delay time,
-	 * otherwise there is a risk of out-of-sync link groups.
-	 */
-	mod_delayed_work(system_wq, &lgr->free_work,
-			 lgr->role == SMC_CLNT ? SMC_LGR_FREE_DELAY_CLNT :
-						 SMC_LGR_FREE_DELAY_SERV);
+	smc_lgr_schedule_free_work(lgr);
 }
 
 static void smc_lgr_free_work(struct work_struct *work)
@@ -140,11 +145,12 @@ static void smc_lgr_free_work(struct work_struct *work)
 	list_del_init(&lgr->list); /* remove from smc_lgr_list */
 free:
 	spin_unlock_bh(&smc_lgr_list.lock);
-	smc_lgr_free(lgr);
+	if (!delayed_work_pending(&lgr->free_work))
+		smc_lgr_free(lgr);
 }
 
 /* create a new SMC link group */
-static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
+static int smc_lgr_create(struct smc_sock *smc,
 			  struct smc_ib_device *smcibdev, u8 ibport,
 			  char *peer_systemid, unsigned short vlan_id)
 {
@@ -161,7 +167,6 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
 	}
 	lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
 	lgr->sync_err = false;
-	lgr->daddr = peer_in_addr;
 	memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN);
 	lgr->vlan_id = vlan_id;
 	rwlock_init(&lgr->sndbufs_lock);
@@ -177,6 +182,7 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
 
 	lnk = &lgr->lnk[SMC_SINGLE_LINK];
 	/* initialize link */
+	lnk->state = SMC_LNK_ACTIVATING;
 	lnk->link_id = SMC_SINGLE_LINK;
 	lnk->smcibdev = smcibdev;
 	lnk->ibport = ibport;
@@ -199,6 +205,8 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
 		goto destroy_qp;
 	init_completion(&lnk->llc_confirm);
 	init_completion(&lnk->llc_confirm_resp);
+	init_completion(&lnk->llc_add);
+	init_completion(&lnk->llc_add_resp);
 
 	smc->conn.lgr = lgr;
 	rwlock_init(&lgr->conns_lock);
@@ -307,6 +315,15 @@ void smc_lgr_free(struct smc_link_group *lgr)
 	kfree(lgr);
 }
 
+void smc_lgr_forget(struct smc_link_group *lgr)
+{
+	spin_lock_bh(&smc_lgr_list.lock);
+	/* do not use this link group for new connections */
+	if (!list_empty(&lgr->list))
+		list_del_init(&lgr->list);
+	spin_unlock_bh(&smc_lgr_list.lock);
+}
+
 /* terminate linkgroup abnormally */
 void smc_lgr_terminate(struct smc_link_group *lgr)
 {
@@ -314,15 +331,7 @@ void smc_lgr_terminate(struct smc_link_group *lgr)
 	struct smc_sock *smc;
 	struct rb_node *node;
 
-	spin_lock_bh(&smc_lgr_list.lock);
-	if (list_empty(&lgr->list)) {
-		/* termination already triggered */
-		spin_unlock_bh(&smc_lgr_list.lock);
-		return;
-	}
-	/* do not use this link group for new connections */
-	list_del_init(&lgr->list);
-	spin_unlock_bh(&smc_lgr_list.lock);
+	smc_lgr_forget(lgr);
 
 	write_lock_bh(&lgr->conns_lock);
 	node = rb_first(&lgr->conns_all);
@@ -340,6 +349,7 @@ void smc_lgr_terminate(struct smc_link_group *lgr)
 	}
 	write_unlock_bh(&lgr->conns_lock);
 	wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait);
+	smc_lgr_schedule_free_work(lgr);
 }
 
 /* Determine vlan of internal TCP socket.
@@ -401,7 +411,7 @@ static int smc_link_determine_gid(struct smc_link_group *lgr)
 }
 
 /* create a new SMC connection (and a new link group if necessary) */
-int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr,
+int smc_conn_create(struct smc_sock *smc,
 		    struct smc_ib_device *smcibdev, u8 ibport,
 		    struct smc_clc_msg_local *lcl, int srv_first_contact)
 {
@@ -458,7 +468,7 @@ int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr,
 
 create:
 	if (local_contact == SMC_FIRST_CONTACT) {
-		rc = smc_lgr_create(smc, peer_in_addr, smcibdev, ibport,
+		rc = smc_lgr_create(smc, smcibdev, ibport,
 				    lcl->id_for_peer, vlan_id);
 		if (rc)
 			goto out;
@@ -699,27 +709,55 @@ static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
 	return -ENOSPC;
 }
 
-/* save rkey and dma_addr received from peer during clc handshake */
-int smc_rmb_rtoken_handling(struct smc_connection *conn,
-			    struct smc_clc_msg_accept_confirm *clc)
+/* add a new rtoken from peer */
+int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey)
 {
-	u64 dma_addr = be64_to_cpu(clc->rmb_dma_addr);
-	struct smc_link_group *lgr = conn->lgr;
-	u32 rkey = ntohl(clc->rmb_rkey);
+	u64 dma_addr = be64_to_cpu(nw_vaddr);
+	u32 rkey = ntohl(nw_rkey);
 	int i;
 
 	for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
 		if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) &&
 		    (lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) &&
 		    test_bit(i, lgr->rtokens_used_mask)) {
-			conn->rtoken_idx = i;
+			/* already in list */
+			return i;
+		}
+	}
+	i = smc_rmb_reserve_rtoken_idx(lgr);
+	if (i < 0)
+		return i;
+	lgr->rtokens[i][SMC_SINGLE_LINK].rkey = rkey;
+	lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = dma_addr;
+	return i;
+}
+
+/* delete an rtoken */
+int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey)
+{
+	u32 rkey = ntohl(nw_rkey);
+	int i;
+
+	for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
+		if (lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey &&
+		    test_bit(i, lgr->rtokens_used_mask)) {
+			lgr->rtokens[i][SMC_SINGLE_LINK].rkey = 0;
+			lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = 0;
+
+			clear_bit(i, lgr->rtokens_used_mask);
 			return 0;
 		}
 	}
-	conn->rtoken_idx = smc_rmb_reserve_rtoken_idx(lgr);
+	return -ENOENT;
+}
+
+/* save rkey and dma_addr received from peer during clc handshake */
+int smc_rmb_rtoken_handling(struct smc_connection *conn,
+			    struct smc_clc_msg_accept_confirm *clc)
+{
+	conn->rtoken_idx = smc_rtoken_add(conn->lgr, clc->rmb_dma_addr,
+					  clc->rmb_rkey);
 	if (conn->rtoken_idx < 0)
 		return conn->rtoken_idx;
-	lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey = rkey;
-	lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].dma_addr = dma_addr;
 	return 0;
 }
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index fe691bf9af91..07e2a393e6d9 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -32,6 +32,12 @@ enum smc_lgr_role {		/* possible roles of a link group */
 	SMC_SERV	/* server */
 };
 
+enum smc_link_state {			/* possible states of a link */
+	SMC_LNK_INACTIVE,	/* link is inactive */
+	SMC_LNK_ACTIVATING,	/* link is being activated */
+	SMC_LNK_ACTIVE		/* link is active */
+};
+
 #define SMC_WR_BUF_SIZE		48	/* size of work request buffer */
 
 struct smc_wr_buf {
@@ -87,8 +93,14 @@ struct smc_link {
 	u8			peer_mac[ETH_ALEN];	/* = gid[8:10||13:15] */
 	u8			peer_gid[sizeof(union ib_gid)];	/* gid of peer*/
 	u8			link_id;	/* unique # within link group */
+
+	enum smc_link_state	state;		/* state of link */
 	struct completion	llc_confirm;	/* wait for rx of conf link */
 	struct completion	llc_confirm_resp; /* wait 4 rx of cnf lnk rsp */
+	int			llc_confirm_rc; /* rc from confirm link msg */
+	int			llc_confirm_resp_rc; /* rc from conf_resp msg */
+	struct completion	llc_add;	/* wait for rx of add link */
+	struct completion	llc_add_resp;	/* wait for rx of add link rsp*/
 };
 
 /* For now we just allow one parallel link per link group. The SMC protocol
@@ -124,7 +136,6 @@ struct smc_rtoken {				/* address/key of remote RMB */
 struct smc_link_group {
 	struct list_head	list;
 	enum smc_lgr_role	role;		/* client or server */
-	__be32			daddr;		/* destination ip address */
 	struct smc_link		lnk[SMC_LINKS_PER_LGR_MAX];	/* smc link */
 	char			peer_systemid[SMC_SYSTEMID_LEN];
 						/* unique system_id of peer */
@@ -186,10 +197,13 @@ struct smc_sock;
 struct smc_clc_msg_accept_confirm;
 
 void smc_lgr_free(struct smc_link_group *lgr);
+void smc_lgr_forget(struct smc_link_group *lgr);
 void smc_lgr_terminate(struct smc_link_group *lgr);
 int smc_buf_create(struct smc_sock *smc);
 int smc_rmb_rtoken_handling(struct smc_connection *conn,
 			    struct smc_clc_msg_accept_confirm *clc);
+int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey);
+int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey);
 void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn);
 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn);
 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn);
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 2a8957bd6d38..26df554f7588 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -23,6 +23,8 @@
 #include "smc_wr.h"
 #include "smc.h"
 
+#define SMC_MAX_CQE 32766	/* max. # of completion queue elements */
+
 #define SMC_QP_MIN_RNR_TIMER		5
 #define SMC_QP_TIMEOUT			15 /* 4096 * 2 ** timeout usec */
 #define SMC_QP_RETRY_CNT			7 /* 7: infinite */
@@ -438,9 +440,15 @@ out:
 long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev)
 {
 	struct ib_cq_init_attr cqattr =	{
-		.cqe = SMC_WR_MAX_CQE, .comp_vector = 0 };
+		.cqe = SMC_MAX_CQE, .comp_vector = 0 };
+	int cqe_size_order, smc_order;
 	long rc;
 
+	/* the calculated number of cq entries fits to mlx5 cq allocation */
+	cqe_size_order = cache_line_size() == 128 ? 7 : 6;
+	smc_order = MAX_ORDER - cqe_size_order - 1;
+	if (SMC_MAX_CQE + 2 > (0x00000001 << smc_order) * PAGE_SIZE)
+		cqattr.cqe = (0x00000001 << smc_order) * PAGE_SIZE - 2;
 	smcibdev->roce_cq_send = ib_create_cq(smcibdev->ibdev,
 					      smc_wr_tx_cq_handler, NULL,
 					      smcibdev, &cqattr);
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index b4aa4fcedb96..ea4b21981b4b 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -4,9 +4,6 @@
  *
  *  Link Layer Control (LLC)
  *
- *  For now, we only support the necessary "confirm link" functionality
- *  which happens for the first RoCE link after successful CLC handshake.
- *
  *  Copyright IBM Corp. 2016
  *
  *  Author(s):  Klaus Wacker <Klaus.Wacker@de.ibm.com>
@@ -21,6 +18,122 @@
 #include "smc_clc.h"
 #include "smc_llc.h"
 
+#define SMC_LLC_DATA_LEN		40
+
+struct smc_llc_hdr {
+	struct smc_wr_rx_hdr common;
+	u8 length;	/* 44 */
+#if defined(__BIG_ENDIAN_BITFIELD)
+	u8 reserved:4,
+	   add_link_rej_rsn:4;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+	u8 add_link_rej_rsn:4,
+	   reserved:4;
+#endif
+	u8 flags;
+};
+
+#define SMC_LLC_FLAG_NO_RMBE_EYEC	0x03
+
+struct smc_llc_msg_confirm_link {	/* type 0x01 */
+	struct smc_llc_hdr hd;
+	u8 sender_mac[ETH_ALEN];
+	u8 sender_gid[SMC_GID_SIZE];
+	u8 sender_qp_num[3];
+	u8 link_num;
+	u8 link_uid[SMC_LGR_ID_SIZE];
+	u8 max_links;
+	u8 reserved[9];
+};
+
+#define SMC_LLC_FLAG_ADD_LNK_REJ	0x40
+#define SMC_LLC_REJ_RSN_NO_ALT_PATH	1
+
+#define SMC_LLC_ADD_LNK_MAX_LINKS	2
+
+struct smc_llc_msg_add_link {		/* type 0x02 */
+	struct smc_llc_hdr hd;
+	u8 sender_mac[ETH_ALEN];
+	u8 reserved2[2];
+	u8 sender_gid[SMC_GID_SIZE];
+	u8 sender_qp_num[3];
+	u8 link_num;
+	u8 flags2;	/* QP mtu */
+	u8 initial_psn[3];
+	u8 reserved[8];
+};
+
+#define SMC_LLC_FLAG_DEL_LINK_ALL	0x40
+#define SMC_LLC_FLAG_DEL_LINK_ORDERLY	0x20
+
+struct smc_llc_msg_del_link {		/* type 0x04 */
+	struct smc_llc_hdr hd;
+	u8 link_num;
+	__be32 reason;
+	u8 reserved[35];
+} __packed;			/* format defined in RFC7609 */
+
+struct smc_llc_msg_test_link {		/* type 0x07 */
+	struct smc_llc_hdr hd;
+	u8 user_data[16];
+	u8 reserved[24];
+};
+
+struct smc_rmb_rtoken {
+	union {
+		u8 num_rkeys;	/* first rtoken byte of CONFIRM LINK msg */
+				/* is actually the num of rtokens, first */
+				/* rtoken is always for the current link */
+		u8 link_id;	/* link id of the rtoken */
+	};
+	__be32 rmb_key;
+	__be64 rmb_vaddr;
+} __packed;			/* format defined in RFC7609 */
+
+#define SMC_LLC_RKEYS_PER_MSG	3
+
+struct smc_llc_msg_confirm_rkey {	/* type 0x06 */
+	struct smc_llc_hdr hd;
+	struct smc_rmb_rtoken rtoken[SMC_LLC_RKEYS_PER_MSG];
+	u8 reserved;
+};
+
+struct smc_llc_msg_confirm_rkey_cont {	/* type 0x08 */
+	struct smc_llc_hdr hd;
+	u8 num_rkeys;
+	struct smc_rmb_rtoken rtoken[SMC_LLC_RKEYS_PER_MSG];
+};
+
+#define SMC_LLC_DEL_RKEY_MAX	8
+#define SMC_LLC_FLAG_RKEY_NEG	0x20
+
+struct smc_llc_msg_delete_rkey {	/* type 0x09 */
+	struct smc_llc_hdr hd;
+	u8 num_rkeys;
+	u8 err_mask;
+	u8 reserved[2];
+	__be32 rkey[8];
+	u8 reserved2[4];
+};
+
+union smc_llc_msg {
+	struct smc_llc_msg_confirm_link confirm_link;
+	struct smc_llc_msg_add_link add_link;
+	struct smc_llc_msg_del_link delete_link;
+
+	struct smc_llc_msg_confirm_rkey confirm_rkey;
+	struct smc_llc_msg_confirm_rkey_cont confirm_rkey_cont;
+	struct smc_llc_msg_delete_rkey delete_rkey;
+
+	struct smc_llc_msg_test_link test_link;
+	struct {
+		struct smc_llc_hdr hdr;
+		u8 data[SMC_LLC_DATA_LEN];
+	} raw;
+};
+
+#define SMC_LLC_FLAG_RESP		0x80
+
 /********************************** send *************************************/
 
 struct smc_llc_tx_pend {
@@ -87,6 +200,7 @@ int smc_llc_send_confirm_link(struct smc_link *link, u8 mac[],
 	memset(confllc, 0, sizeof(*confllc));
 	confllc->hd.common.type = SMC_LLC_CONFIRM_LINK;
 	confllc->hd.length = sizeof(struct smc_llc_msg_confirm_link);
+	confllc->hd.flags |= SMC_LLC_FLAG_NO_RMBE_EYEC;
 	if (reqresp == SMC_LLC_RESP)
 		confllc->hd.flags |= SMC_LLC_FLAG_RESP;
 	memcpy(confllc->sender_mac, mac, ETH_ALEN);
@@ -94,7 +208,104 @@ int smc_llc_send_confirm_link(struct smc_link *link, u8 mac[],
 	hton24(confllc->sender_qp_num, link->roce_qp->qp_num);
 	confllc->link_num = link->link_id;
 	memcpy(confllc->link_uid, lgr->id, SMC_LGR_ID_SIZE);
-	confllc->max_links = SMC_LINKS_PER_LGR_MAX;
+	confllc->max_links = SMC_LLC_ADD_LNK_MAX_LINKS; /* enforce peer resp. */
+	/* send llc message */
+	rc = smc_wr_tx_send(link, pend);
+	return rc;
+}
+
+/* send ADD LINK request or response */
+int smc_llc_send_add_link(struct smc_link *link, u8 mac[],
+			  union ib_gid *gid,
+			  enum smc_llc_reqresp reqresp)
+{
+	struct smc_llc_msg_add_link *addllc;
+	struct smc_wr_tx_pend_priv *pend;
+	struct smc_wr_buf *wr_buf;
+	int rc;
+
+	rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+	if (rc)
+		return rc;
+	addllc = (struct smc_llc_msg_add_link *)wr_buf;
+	memset(addllc, 0, sizeof(*addllc));
+	addllc->hd.common.type = SMC_LLC_ADD_LINK;
+	addllc->hd.length = sizeof(struct smc_llc_msg_add_link);
+	if (reqresp == SMC_LLC_RESP) {
+		addllc->hd.flags |= SMC_LLC_FLAG_RESP;
+		/* always reject more links for now */
+		addllc->hd.flags |= SMC_LLC_FLAG_ADD_LNK_REJ;
+		addllc->hd.add_link_rej_rsn = SMC_LLC_REJ_RSN_NO_ALT_PATH;
+	}
+	memcpy(addllc->sender_mac, mac, ETH_ALEN);
+	memcpy(addllc->sender_gid, gid, SMC_GID_SIZE);
+	/* send llc message */
+	rc = smc_wr_tx_send(link, pend);
+	return rc;
+}
+
+/* send DELETE LINK request or response */
+int smc_llc_send_delete_link(struct smc_link *link,
+			     enum smc_llc_reqresp reqresp)
+{
+	struct smc_llc_msg_del_link *delllc;
+	struct smc_wr_tx_pend_priv *pend;
+	struct smc_wr_buf *wr_buf;
+	int rc;
+
+	rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+	if (rc)
+		return rc;
+	delllc = (struct smc_llc_msg_del_link *)wr_buf;
+	memset(delllc, 0, sizeof(*delllc));
+	delllc->hd.common.type = SMC_LLC_DELETE_LINK;
+	delllc->hd.length = sizeof(struct smc_llc_msg_add_link);
+	if (reqresp == SMC_LLC_RESP)
+		delllc->hd.flags |= SMC_LLC_FLAG_RESP;
+	/* DEL_LINK_ALL because only 1 link supported */
+	delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL;
+	delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;
+	delllc->link_num = link->link_id;
+	/* send llc message */
+	rc = smc_wr_tx_send(link, pend);
+	return rc;
+}
+
+/* send LLC test link request or response */
+int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16],
+			   enum smc_llc_reqresp reqresp)
+{
+	struct smc_llc_msg_test_link *testllc;
+	struct smc_wr_tx_pend_priv *pend;
+	struct smc_wr_buf *wr_buf;
+	int rc;
+
+	rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+	if (rc)
+		return rc;
+	testllc = (struct smc_llc_msg_test_link *)wr_buf;
+	memset(testllc, 0, sizeof(*testllc));
+	testllc->hd.common.type = SMC_LLC_TEST_LINK;
+	testllc->hd.length = sizeof(struct smc_llc_msg_test_link);
+	if (reqresp == SMC_LLC_RESP)
+		testllc->hd.flags |= SMC_LLC_FLAG_RESP;
+	memcpy(testllc->user_data, user_data, sizeof(testllc->user_data));
+	/* send llc message */
+	rc = smc_wr_tx_send(link, pend);
+	return rc;
+}
+
+/* send a prepared message */
+static int smc_llc_send_message(struct smc_link *link, void *llcbuf, int llclen)
+{
+	struct smc_wr_tx_pend_priv *pend;
+	struct smc_wr_buf *wr_buf;
+	int rc;
+
+	rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+	if (rc)
+		return rc;
+	memcpy(wr_buf, llcbuf, llclen);
 	/* send llc message */
 	rc = smc_wr_tx_send(link, pend);
 	return rc;
@@ -106,19 +317,156 @@ static void smc_llc_rx_confirm_link(struct smc_link *link,
 				    struct smc_llc_msg_confirm_link *llc)
 {
 	struct smc_link_group *lgr;
+	int conf_rc;
 
 	lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
+
+	/* RMBE eyecatchers are not supported */
+	if (llc->hd.flags & SMC_LLC_FLAG_NO_RMBE_EYEC)
+		conf_rc = 0;
+	else
+		conf_rc = ENOTSUPP;
+
 	if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
-		if (lgr->role == SMC_SERV)
+		if (lgr->role == SMC_SERV &&
+		    link->state == SMC_LNK_ACTIVATING) {
+			link->llc_confirm_resp_rc = conf_rc;
 			complete(&link->llc_confirm_resp);
+		}
 	} else {
-		if (lgr->role == SMC_CLNT) {
+		if (lgr->role == SMC_CLNT &&
+		    link->state == SMC_LNK_ACTIVATING) {
+			link->llc_confirm_rc = conf_rc;
 			link->link_id = llc->link_num;
 			complete(&link->llc_confirm);
 		}
 	}
 }
 
+static void smc_llc_rx_add_link(struct smc_link *link,
+				struct smc_llc_msg_add_link *llc)
+{
+	struct smc_link_group *lgr = container_of(link, struct smc_link_group,
+						  lnk[SMC_SINGLE_LINK]);
+
+	if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+		if (link->state == SMC_LNK_ACTIVATING)
+			complete(&link->llc_add_resp);
+	} else {
+		if (link->state == SMC_LNK_ACTIVATING) {
+			complete(&link->llc_add);
+			return;
+		}
+
+		if (lgr->role == SMC_SERV) {
+			smc_llc_send_add_link(link,
+					link->smcibdev->mac[link->ibport - 1],
+					&link->smcibdev->gid[link->ibport - 1],
+					SMC_LLC_REQ);
+
+		} else {
+			smc_llc_send_add_link(link,
+					link->smcibdev->mac[link->ibport - 1],
+					&link->smcibdev->gid[link->ibport - 1],
+					SMC_LLC_RESP);
+		}
+	}
+}
+
+static void smc_llc_rx_delete_link(struct smc_link *link,
+				   struct smc_llc_msg_del_link *llc)
+{
+	struct smc_link_group *lgr = container_of(link, struct smc_link_group,
+						  lnk[SMC_SINGLE_LINK]);
+
+	if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+		if (lgr->role == SMC_SERV)
+			smc_lgr_terminate(lgr);
+	} else {
+		if (lgr->role == SMC_SERV) {
+			smc_lgr_forget(lgr);
+			smc_llc_send_delete_link(link, SMC_LLC_REQ);
+		} else {
+			smc_llc_send_delete_link(link, SMC_LLC_RESP);
+			smc_lgr_terminate(lgr);
+		}
+	}
+}
+
+static void smc_llc_rx_test_link(struct smc_link *link,
+				 struct smc_llc_msg_test_link *llc)
+{
+	if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+		/* unused as long as we don't send this type of msg */
+	} else {
+		smc_llc_send_test_link(link, llc->user_data, SMC_LLC_RESP);
+	}
+}
+
+static void smc_llc_rx_confirm_rkey(struct smc_link *link,
+				    struct smc_llc_msg_confirm_rkey *llc)
+{
+	struct smc_link_group *lgr;
+	int rc;
+
+	lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
+
+	if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+		/* unused as long as we don't send this type of msg */
+	} else {
+		rc = smc_rtoken_add(lgr,
+				    llc->rtoken[0].rmb_vaddr,
+				    llc->rtoken[0].rmb_key);
+
+		/* ignore rtokens for other links, we have only one link */
+
+		llc->hd.flags |= SMC_LLC_FLAG_RESP;
+		if (rc < 0)
+			llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
+		smc_llc_send_message(link, (void *)llc, sizeof(*llc));
+	}
+}
+
+static void smc_llc_rx_confirm_rkey_cont(struct smc_link *link,
+				      struct smc_llc_msg_confirm_rkey_cont *llc)
+{
+	if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+		/* unused as long as we don't send this type of msg */
+	} else {
+		/* ignore rtokens for other links, we have only one link */
+		llc->hd.flags |= SMC_LLC_FLAG_RESP;
+		smc_llc_send_message(link, (void *)llc, sizeof(*llc));
+	}
+}
+
+static void smc_llc_rx_delete_rkey(struct smc_link *link,
+				   struct smc_llc_msg_delete_rkey *llc)
+{
+	struct smc_link_group *lgr;
+	u8 err_mask = 0;
+	int i, max;
+
+	lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
+
+	if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+		/* unused as long as we don't send this type of msg */
+	} else {
+		max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX);
+		for (i = 0; i < max; i++) {
+			if (smc_rtoken_delete(lgr, llc->rkey[i]))
+				err_mask |= 1 << (SMC_LLC_DEL_RKEY_MAX - 1 - i);
+		}
+
+		if (err_mask) {
+			llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
+			llc->err_mask = err_mask;
+		}
+
+		llc->hd.flags |= SMC_LLC_FLAG_RESP;
+		smc_llc_send_message(link, (void *)llc, sizeof(*llc));
+	}
+}
+
 static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
 {
 	struct smc_link *link = (struct smc_link *)wc->qp->qp_context;
@@ -128,8 +476,30 @@ static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
 		return; /* short message */
 	if (llc->raw.hdr.length != sizeof(*llc))
 		return; /* invalid message */
-	if (llc->raw.hdr.common.type == SMC_LLC_CONFIRM_LINK)
+
+	switch (llc->raw.hdr.common.type) {
+	case SMC_LLC_TEST_LINK:
+		smc_llc_rx_test_link(link, &llc->test_link);
+		break;
+	case SMC_LLC_CONFIRM_LINK:
 		smc_llc_rx_confirm_link(link, &llc->confirm_link);
+		break;
+	case SMC_LLC_ADD_LINK:
+		smc_llc_rx_add_link(link, &llc->add_link);
+		break;
+	case SMC_LLC_DELETE_LINK:
+		smc_llc_rx_delete_link(link, &llc->delete_link);
+		break;
+	case SMC_LLC_CONFIRM_RKEY:
+		smc_llc_rx_confirm_rkey(link, &llc->confirm_rkey);
+		break;
+	case SMC_LLC_CONFIRM_RKEY_CONT:
+		smc_llc_rx_confirm_rkey_cont(link, &llc->confirm_rkey_cont);
+		break;
+	case SMC_LLC_DELETE_RKEY:
+		smc_llc_rx_delete_rkey(link, &llc->delete_rkey);
+		break;
+	}
 }
 
 /***************************** init, exit, misc ******************************/
@@ -140,6 +510,30 @@ static struct smc_wr_rx_handler smc_llc_rx_handlers[] = {
 		.type		= SMC_LLC_CONFIRM_LINK
 	},
 	{
+		.handler	= smc_llc_rx_handler,
+		.type		= SMC_LLC_TEST_LINK
+	},
+	{
+		.handler	= smc_llc_rx_handler,
+		.type		= SMC_LLC_ADD_LINK
+	},
+	{
+		.handler	= smc_llc_rx_handler,
+		.type		= SMC_LLC_DELETE_LINK
+	},
+	{
+		.handler	= smc_llc_rx_handler,
+		.type		= SMC_LLC_CONFIRM_RKEY
+	},
+	{
+		.handler	= smc_llc_rx_handler,
+		.type		= SMC_LLC_CONFIRM_RKEY_CONT
+	},
+	{
+		.handler	= smc_llc_rx_handler,
+		.type		= SMC_LLC_DELETE_RKEY
+	},
+	{
 		.handler	= NULL,
 	}
 };
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
index 51b27ce90dbd..e4a7d5e234d5 100644
--- a/net/smc/smc_llc.h
+++ b/net/smc/smc_llc.h
@@ -18,6 +18,7 @@
 #define SMC_LLC_FLAG_RESP		0x80
 
 #define SMC_LLC_WAIT_FIRST_TIME		(5 * HZ)
+#define SMC_LLC_WAIT_TIME		(2 * HZ)
 
 enum smc_llc_reqresp {
 	SMC_LLC_REQ,
@@ -26,39 +27,23 @@ enum smc_llc_reqresp {
 
 enum smc_llc_msg_type {
 	SMC_LLC_CONFIRM_LINK		= 0x01,
-};
-
-#define SMC_LLC_DATA_LEN		40
-
-struct smc_llc_hdr {
-	struct smc_wr_rx_hdr common;
-	u8 length;	/* 44 */
-	u8 reserved;
-	u8 flags;
-};
-
-struct smc_llc_msg_confirm_link {	/* type 0x01 */
-	struct smc_llc_hdr hd;
-	u8 sender_mac[ETH_ALEN];
-	u8 sender_gid[SMC_GID_SIZE];
-	u8 sender_qp_num[3];
-	u8 link_num;
-	u8 link_uid[SMC_LGR_ID_SIZE];
-	u8 max_links;
-	u8 reserved[9];
-};
-
-union smc_llc_msg {
-	struct smc_llc_msg_confirm_link confirm_link;
-	struct {
-		struct smc_llc_hdr hdr;
-		u8 data[SMC_LLC_DATA_LEN];
-	} raw;
+	SMC_LLC_ADD_LINK		= 0x02,
+	SMC_LLC_DELETE_LINK		= 0x04,
+	SMC_LLC_CONFIRM_RKEY		= 0x06,
+	SMC_LLC_TEST_LINK		= 0x07,
+	SMC_LLC_CONFIRM_RKEY_CONT	= 0x08,
+	SMC_LLC_DELETE_RKEY		= 0x09,
 };
 
 /* transmit */
 int smc_llc_send_confirm_link(struct smc_link *lnk, u8 mac[], union ib_gid *gid,
 			      enum smc_llc_reqresp reqresp);
+int smc_llc_send_add_link(struct smc_link *link, u8 mac[], union ib_gid *gid,
+			  enum smc_llc_reqresp reqresp);
+int smc_llc_send_delete_link(struct smc_link *link,
+			     enum smc_llc_reqresp reqresp);
+int smc_llc_send_test_link(struct smc_link *lnk, u8 user_data[16],
+			   enum smc_llc_reqresp reqresp);
 int smc_llc_init(void) __init;
 
 #endif /* SMC_LLC_H */
diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h
index ef0c3494c9cb..210bec3c3ebe 100644
--- a/net/smc/smc_wr.h
+++ b/net/smc/smc_wr.h
@@ -19,7 +19,6 @@
 #include "smc.h"
 #include "smc_core.h"
 
-#define SMC_WR_MAX_CQE 32768	/* max. # of completion queue elements */
 #define SMC_WR_BUF_CNT 16	/* # of ctrl buffers per link */
 
 #define SMC_WR_TX_WAIT_FREE_SLOT_TIME	(10 * HZ)
diff --git a/net/socket.c b/net/socket.c
index 08847c3b8c39..3d1948d27a25 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -104,7 +104,6 @@
 #include <linux/ipv6_route.h>
 #include <linux/route.h>
 #include <linux/sockios.h>
-#include <linux/atalk.h>
 #include <net/busy_poll.h>
 #include <linux/errqueue.h>
 
@@ -234,7 +233,7 @@ static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
 	return __put_user(klen, ulen);
 }
 
-static struct kmem_cache *sock_inode_cachep __read_mostly;
+static struct kmem_cache *sock_inode_cachep __ro_after_init;
 
 static struct inode *sock_alloc_inode(struct super_block *sb)
 {
@@ -991,10 +990,11 @@ static long sock_do_ioctl(struct net *net, struct socket *sock,
  *	what to do with it - that's up to the protocol still.
  */
 
-static struct ns_common *get_net_ns(struct ns_common *ns)
+struct ns_common *get_net_ns(struct ns_common *ns)
 {
 	return &get_net(container_of(ns, struct net, ns))->ns;
 }
+EXPORT_SYMBOL_GPL(get_net_ns);
 
 static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 {
@@ -1573,8 +1573,9 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
 		goto out_fd;
 
 	if (upeer_sockaddr) {
-		if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
-					  &len, 2) < 0) {
+		len = newsock->ops->getname(newsock,
+					(struct sockaddr *)&address, 2);
+		if (len < 0) {
 			err = -ECONNABORTED;
 			goto out_fd;
 		}
@@ -1654,7 +1655,7 @@ SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
 {
 	struct socket *sock;
 	struct sockaddr_storage address;
-	int len, err, fput_needed;
+	int err, fput_needed;
 
 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
 	if (!sock)
@@ -1664,10 +1665,11 @@ SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
 	if (err)
 		goto out_put;
 
-	err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
-	if (err)
+	err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
+	if (err < 0)
 		goto out_put;
-	err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
+        /* "err" is actually length in this case */
+	err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
 
 out_put:
 	fput_light(sock->file, fput_needed);
@@ -1685,7 +1687,7 @@ SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
 {
 	struct socket *sock;
 	struct sockaddr_storage address;
-	int len, err, fput_needed;
+	int err, fput_needed;
 
 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
 	if (sock != NULL) {
@@ -1695,11 +1697,10 @@ SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
 			return err;
 		}
 
-		err =
-		    sock->ops->getname(sock, (struct sockaddr *)&address, &len,
-				       1);
-		if (!err)
-			err = move_addr_to_user(&address, len, usockaddr,
+		err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
+		if (err >= 0)
+			/* "err" is actually length in this case */
+			err = move_addr_to_user(&address, err, usockaddr,
 						usockaddr_len);
 		fput_light(sock->file, fput_needed);
 	}
@@ -2288,10 +2289,12 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
 	if (!sock)
 		return err;
 
-	err = sock_error(sock->sk);
-	if (err) {
-		datagrams = err;
-		goto out_put;
+	if (likely(!(flags & MSG_ERRQUEUE))) {
+		err = sock_error(sock->sk);
+		if (err) {
+			datagrams = err;
+			goto out_put;
+		}
 	}
 
 	entry = mmsg;
@@ -3171,17 +3174,15 @@ int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
 }
 EXPORT_SYMBOL(kernel_connect);
 
-int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
-			 int *addrlen)
+int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
 {
-	return sock->ops->getname(sock, addr, addrlen, 0);
+	return sock->ops->getname(sock, addr, 0);
 }
 EXPORT_SYMBOL(kernel_getsockname);
 
-int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
-			 int *addrlen)
+int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
 {
-	return sock->ops->getname(sock, addr, addrlen, 1);
+	return sock->ops->getname(sock, addr, 1);
 }
 EXPORT_SYMBOL(kernel_getpeername);
 
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 6e432ecd7f99..806395687bb6 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1231,7 +1231,7 @@ static const struct sockaddr_in6 rpc_in6addr_loopback = {
  * negative errno is returned.
  */
 static int rpc_sockname(struct net *net, struct sockaddr *sap, size_t salen,
-			struct sockaddr *buf, int buflen)
+			struct sockaddr *buf)
 {
 	struct socket *sock;
 	int err;
@@ -1269,7 +1269,7 @@ static int rpc_sockname(struct net *net, struct sockaddr *sap, size_t salen,
 		goto out_release;
 	}
 
-	err = kernel_getsockname(sock, buf, &buflen);
+	err = kernel_getsockname(sock, buf);
 	if (err < 0) {
 		dprintk("RPC:       getsockname failed (%d)\n", err);
 		goto out_release;
@@ -1353,7 +1353,7 @@ int rpc_localaddr(struct rpc_clnt *clnt, struct sockaddr *buf, size_t buflen)
 	rcu_read_unlock();
 
 	rpc_set_port(sap, 0);
-	err = rpc_sockname(net, sap, salen, buf, buflen);
+	err = rpc_sockname(net, sap, salen, buf);
 	put_net(net);
 	if (err != 0)
 		/* Couldn't discover local address, return ANYADDR */
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 943f2a745cd5..08cd951aaeea 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -832,12 +832,13 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
 	}
 	set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
 
-	err = kernel_getpeername(newsock, sin, &slen);
+	err = kernel_getpeername(newsock, sin);
 	if (err < 0) {
 		net_warn_ratelimited("%s: peername failed (err %d)!\n",
 				     serv->sv_name, -err);
 		goto failed;		/* aborted connection or whatever */
 	}
+	slen = err;
 
 	/* Ideally, we would want to reject connections from unauthorized
 	 * hosts here, but when we get encryption, the IP of the host won't
@@ -866,7 +867,8 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
 	if (IS_ERR(newsvsk))
 		goto failed;
 	svc_xprt_set_remote(&newsvsk->sk_xprt, sin, slen);
-	err = kernel_getsockname(newsock, sin, &slen);
+	err = kernel_getsockname(newsock, sin);
+	slen = err;
 	if (unlikely(err < 0)) {
 		dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err);
 		slen = offsetof(struct sockaddr, sa_data);
@@ -1465,7 +1467,8 @@ int svc_addsock(struct svc_serv *serv, const int fd, char *name_return,
 		err = PTR_ERR(svsk);
 		goto out;
 	}
-	if (kernel_getsockname(svsk->sk_sock, sin, &salen) == 0)
+	salen = kernel_getsockname(svsk->sk_sock, sin);
+	if (salen >= 0)
 		svc_xprt_set_local(&svsk->sk_xprt, sin, salen);
 	svc_add_new_perm_xprt(serv, &svsk->sk_xprt);
 	return svc_one_sock_name(svsk, name_return, len);
@@ -1539,10 +1542,10 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
 	if (error < 0)
 		goto bummer;
 
-	newlen = len;
-	error = kernel_getsockname(sock, newsin, &newlen);
+	error = kernel_getsockname(sock, newsin);
 	if (error < 0)
 		goto bummer;
+	newlen = error;
 
 	if (protocol == IPPROTO_TCP) {
 		if ((error = kernel_listen(sock, 64)) < 0)
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index a6b8c1f8f92a..956e29c1438d 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1794,10 +1794,9 @@ static void xs_sock_set_reuseport(struct socket *sock)
 static unsigned short xs_sock_getport(struct socket *sock)
 {
 	struct sockaddr_storage buf;
-	int buflen;
 	unsigned short port = 0;
 
-	if (kernel_getsockname(sock, (struct sockaddr *)&buf, &buflen) < 0)
+	if (kernel_getsockname(sock, (struct sockaddr *)&buf) < 0)
 		goto out;
 	switch (buf.ss_family) {
 	case AF_INET6:
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 9aed6fe1bf1a..f424539829b7 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -89,6 +89,7 @@ static void __net_exit sysctl_net_exit(struct net *net)
 static struct pernet_operations sysctl_pernet_ops = {
 	.init = sysctl_net_init,
 	.exit = sysctl_net_exit,
+	.async = true,
 };
 
 static struct ctl_table_header *net_header;
diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig
index c25a3a149dc4..e450212121d2 100644
--- a/net/tipc/Kconfig
+++ b/net/tipc/Kconfig
@@ -34,3 +34,11 @@ config TIPC_MEDIA_UDP
 	  Saying Y here will enable support for running TIPC over IP/UDP
 	bool
 	default y
+
+config TIPC_DIAG
+	tristate "TIPC: socket monitoring interface"
+	depends on TIPC
+	default y
+	---help---
+	Support for TIPC socket monitoring interface used by ss tool.
+	If unsure, say Y.
diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index 37bb0bfbd936..aca168f2abb1 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile
@@ -9,8 +9,13 @@ tipc-y	+= addr.o bcast.o bearer.o \
 	   core.o link.o discover.o msg.o  \
 	   name_distr.o  subscr.o monitor.o name_table.o net.o  \
 	   netlink.o netlink_compat.o node.o socket.o eth_media.o \
-	   server.o socket.o group.o
+	   topsrv.o socket.o group.o
 
 tipc-$(CONFIG_TIPC_MEDIA_UDP)	+= udp_media.o
 tipc-$(CONFIG_TIPC_MEDIA_IB)	+= ib_media.o
 tipc-$(CONFIG_SYSCTL)		+= sysctl.o
+
+
+obj-$(CONFIG_TIPC_DIAG)	+= diag.o
+
+tipc_diag-y	:= diag.o
diff --git a/net/tipc/addr.c b/net/tipc/addr.c
index 48fd3b5a73fb..97cd857d7f43 100644
--- a/net/tipc/addr.c
+++ b/net/tipc/addr.c
@@ -64,23 +64,6 @@ int in_own_node(struct net *net, u32 addr)
 }
 
 /**
- * addr_domain - convert 2-bit scope value to equivalent message lookup domain
- *
- * Needed when address of a named message must be looked up a second time
- * after a network hop.
- */
-u32 addr_domain(struct net *net, u32 sc)
-{
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-
-	if (likely(sc == TIPC_NODE_SCOPE))
-		return tn->own_addr;
-	if (sc == TIPC_CLUSTER_SCOPE)
-		return tipc_cluster_mask(tn->own_addr);
-	return tipc_zone_mask(tn->own_addr);
-}
-
-/**
  * tipc_addr_domain_valid - validates a network domain address
  *
  * Accepts <Z.C.N>, <Z.C.0>, <Z.0.0>, and <0.0.0>,
@@ -124,20 +107,6 @@ int tipc_in_scope(u32 domain, u32 addr)
 	return 0;
 }
 
-/**
- * tipc_addr_scope - convert message lookup domain to a 2-bit scope value
- */
-int tipc_addr_scope(u32 domain)
-{
-	if (likely(!domain))
-		return TIPC_ZONE_SCOPE;
-	if (tipc_node(domain))
-		return TIPC_NODE_SCOPE;
-	if (tipc_cluster(domain))
-		return TIPC_CLUSTER_SCOPE;
-	return TIPC_ZONE_SCOPE;
-}
-
 char *tipc_addr_string_fill(char *string, u32 addr)
 {
 	snprintf(string, 16, "<%u.%u.%u>",
diff --git a/net/tipc/addr.h b/net/tipc/addr.h
index bebb347803ce..2ecf5a5d40dd 100644
--- a/net/tipc/addr.h
+++ b/net/tipc/addr.h
@@ -60,6 +60,16 @@ static inline u32 tipc_cluster_mask(u32 addr)
 	return addr & TIPC_ZONE_CLUSTER_MASK;
 }
 
+static inline int tipc_node2scope(u32 node)
+{
+	return node ? TIPC_NODE_SCOPE : TIPC_CLUSTER_SCOPE;
+}
+
+static inline int tipc_scope2node(struct net *net, int sc)
+{
+	return sc != TIPC_NODE_SCOPE ? 0 : tipc_own_addr(net);
+}
+
 u32 tipc_own_addr(struct net *net);
 int in_own_cluster(struct net *net, u32 addr);
 int in_own_cluster_exact(struct net *net, u32 addr);
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 37892b3909af..f3711176be45 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -574,5 +574,5 @@ void tipc_nlist_purge(struct tipc_nlist *nl)
 {
 	tipc_dest_list_purge(&nl->list);
 	nl->remote = 0;
-	nl->local = 0;
+	nl->local = false;
 }
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 3e3dce3d4c63..f3d2e83313e1 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -956,11 +956,11 @@ int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info)
 
 int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
 {
-	int err;
-	char *name;
 	struct tipc_bearer *b;
 	struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
 	struct net *net = sock_net(skb->sk);
+	char *name;
+	int err;
 
 	if (!info->attrs[TIPC_NLA_BEARER])
 		return -EINVAL;
@@ -987,8 +987,10 @@ int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
 		if (err)
 			return err;
 
-		if (props[TIPC_NLA_PROP_TOL])
+		if (props[TIPC_NLA_PROP_TOL]) {
 			b->tolerance = nla_get_u32(props[TIPC_NLA_PROP_TOL]);
+			tipc_node_apply_tolerance(net, b);
+		}
 		if (props[TIPC_NLA_PROP_PRIO])
 			b->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
 		if (props[TIPC_NLA_PROP_WIN])
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 0b982d048fb9..04fd91bb11d7 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -105,6 +105,7 @@ static struct pernet_operations tipc_net_ops = {
 	.exit = tipc_exit_net,
 	.id   = &tipc_net_id,
 	.size = sizeof(struct tipc_net),
+	.async = true,
 };
 
 static int __init tipc_init(void)
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 20b21af2ff14..347f850dc872 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -64,7 +64,7 @@ struct tipc_bearer;
 struct tipc_bc_base;
 struct tipc_link;
 struct tipc_name_table;
-struct tipc_server;
+struct tipc_topsrv;
 struct tipc_monitor;
 
 #define TIPC_MOD_VER "2.0.0"
@@ -112,7 +112,7 @@ struct tipc_net {
 	struct list_head dist_queue;
 
 	/* Topology subscription server */
-	struct tipc_server *topsrv;
+	struct tipc_topsrv *topsrv;
 	atomic_t subscription_count;
 };
 
@@ -131,7 +131,12 @@ static inline struct list_head *tipc_nodes(struct net *net)
 	return &tipc_net(net)->node_list;
 }
 
-static inline struct tipc_server *tipc_topsrv(struct net *net)
+static inline struct name_table *tipc_name_table(struct net *net)
+{
+	return tipc_net(net)->nametbl;
+}
+
+static inline struct tipc_topsrv *tipc_topsrv(struct net *net)
 {
 	return tipc_net(net)->topsrv;
 }
diff --git a/net/tipc/diag.c b/net/tipc/diag.c
new file mode 100644
index 000000000000..46d9cd62f781
--- /dev/null
+++ b/net/tipc/diag.c
@@ -0,0 +1,114 @@
+/*
+ * net/tipc/diag.c: TIPC socket diag
+ *
+ * Copyright (c) 2018, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "ASIS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "core.h"
+#include "socket.h"
+#include <linux/sock_diag.h>
+#include <linux/tipc_sockets_diag.h>
+
+static u64 __tipc_diag_gen_cookie(struct sock *sk)
+{
+	u32 res[2];
+
+	sock_diag_save_cookie(sk, res);
+	return *((u64 *)res);
+}
+
+static int __tipc_add_sock_diag(struct sk_buff *skb,
+				struct netlink_callback *cb,
+				struct tipc_sock *tsk)
+{
+	struct tipc_sock_diag_req *req = nlmsg_data(cb->nlh);
+	struct nlmsghdr *nlh;
+	int err;
+
+	nlh = nlmsg_put_answer(skb, cb, SOCK_DIAG_BY_FAMILY, 0,
+			       NLM_F_MULTI);
+	if (!nlh)
+		return -EMSGSIZE;
+
+	err = tipc_sk_fill_sock_diag(skb, tsk, req->tidiag_states,
+				     __tipc_diag_gen_cookie);
+	if (err)
+		return err;
+
+	nlmsg_end(skb, nlh);
+	return 0;
+}
+
+static int tipc_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	return tipc_nl_sk_walk(skb, cb, __tipc_add_sock_diag);
+}
+
+static int tipc_sock_diag_handler_dump(struct sk_buff *skb,
+				       struct nlmsghdr *h)
+{
+	int hdrlen = sizeof(struct tipc_sock_diag_req);
+	struct net *net = sock_net(skb->sk);
+
+	if (nlmsg_len(h) < hdrlen)
+		return -EINVAL;
+
+	if (h->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.dump = tipc_diag_dump,
+		};
+		netlink_dump_start(net->diag_nlsk, skb, h, &c);
+		return 0;
+	}
+	return -EOPNOTSUPP;
+}
+
+static const struct sock_diag_handler tipc_sock_diag_handler = {
+	.family = AF_TIPC,
+	.dump = tipc_sock_diag_handler_dump,
+};
+
+static int __init tipc_diag_init(void)
+{
+	return sock_diag_register(&tipc_sock_diag_handler);
+}
+
+static void __exit tipc_diag_exit(void)
+{
+	sock_diag_unregister(&tipc_sock_diag_handler);
+}
+
+module_init(tipc_diag_init);
+module_exit(tipc_diag_exit);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, AF_TIPC);
diff --git a/net/tipc/group.c b/net/tipc/group.c
index 04e516d18054..d7a7befeddd4 100644
--- a/net/tipc/group.c
+++ b/net/tipc/group.c
@@ -37,7 +37,7 @@
 #include "addr.h"
 #include "group.h"
 #include "bcast.h"
-#include "server.h"
+#include "topsrv.h"
 #include "msg.h"
 #include "socket.h"
 #include "node.h"
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 2d6b2aed30e0..3c230466804d 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -2126,7 +2126,8 @@ void tipc_link_set_tolerance(struct tipc_link *l, u32 tol,
 			     struct sk_buff_head *xmitq)
 {
 	l->tolerance = tol;
-	tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, tol, 0, xmitq);
+	if (link_is_up(l))
+		tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, tol, 0, xmitq);
 }
 
 void tipc_link_set_prio(struct tipc_link *l, u32 prio,
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 4e1c6f6450bb..b6c45dccba3d 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -580,7 +580,7 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err)
 	msg = buf_msg(skb);
 	if (msg_reroute_cnt(msg))
 		return false;
-	dnode = addr_domain(net, msg_lookup_scope(msg));
+	dnode = tipc_scope2node(net, msg_lookup_scope(msg));
 	dport = tipc_nametbl_translate(net, msg_nametype(msg),
 				       msg_nameinst(msg), &dnode);
 	if (!dport)
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 23f8899e0f8c..28d095a7d8bb 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -56,7 +56,7 @@ static void publ_to_item(struct distr_item *i, struct publication *p)
 	i->type = htonl(p->type);
 	i->lower = htonl(p->lower);
 	i->upper = htonl(p->upper);
-	i->ref = htonl(p->ref);
+	i->port = htonl(p->port);
 	i->key = htonl(p->key);
 }
 
@@ -86,25 +86,25 @@ static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size,
  */
 struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	struct sk_buff *buf;
+	struct name_table *nt = tipc_name_table(net);
 	struct distr_item *item;
+	struct sk_buff *skb;
 
-	list_add_tail_rcu(&publ->local_list,
-			  &tn->nametbl->publ_list[publ->scope]);
-
-	if (publ->scope == TIPC_NODE_SCOPE)
+	if (publ->scope == TIPC_NODE_SCOPE) {
+		list_add_tail_rcu(&publ->binding_node, &nt->node_scope);
 		return NULL;
+	}
+	list_add_tail_rcu(&publ->binding_node, &nt->cluster_scope);
 
-	buf = named_prepare_buf(net, PUBLICATION, ITEM_SIZE, 0);
-	if (!buf) {
+	skb = named_prepare_buf(net, PUBLICATION, ITEM_SIZE, 0);
+	if (!skb) {
 		pr_warn("Publication distribution failure\n");
 		return NULL;
 	}
 
-	item = (struct distr_item *)msg_data(buf_msg(buf));
+	item = (struct distr_item *)msg_data(buf_msg(skb));
 	publ_to_item(item, publ);
-	return buf;
+	return skb;
 }
 
 /**
@@ -115,7 +115,7 @@ struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ)
 	struct sk_buff *buf;
 	struct distr_item *item;
 
-	list_del(&publ->local_list);
+	list_del(&publ->binding_node);
 
 	if (publ->scope == TIPC_NODE_SCOPE)
 		return NULL;
@@ -147,7 +147,7 @@ static void named_distribute(struct net *net, struct sk_buff_head *list,
 			ITEM_SIZE) * ITEM_SIZE;
 	u32 msg_rem = msg_dsz;
 
-	list_for_each_entry(publ, pls, local_list) {
+	list_for_each_entry(publ, pls, binding_node) {
 		/* Prepare next buffer: */
 		if (!skb) {
 			skb = named_prepare_buf(net, PUBLICATION, msg_rem,
@@ -184,16 +184,13 @@ static void named_distribute(struct net *net, struct sk_buff_head *list,
  */
 void tipc_named_node_up(struct net *net, u32 dnode)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	struct name_table *nt = tipc_name_table(net);
 	struct sk_buff_head head;
 
 	__skb_queue_head_init(&head);
 
 	rcu_read_lock();
-	named_distribute(net, &head, dnode,
-			 &tn->nametbl->publ_list[TIPC_CLUSTER_SCOPE]);
-	named_distribute(net, &head, dnode,
-			 &tn->nametbl->publ_list[TIPC_ZONE_SCOPE]);
+	named_distribute(net, &head, dnode, &nt->cluster_scope);
 	rcu_read_unlock();
 
 	tipc_node_xmit(net, &head, dnode, 0);
@@ -212,15 +209,15 @@ static void tipc_publ_purge(struct net *net, struct publication *publ, u32 addr)
 
 	spin_lock_bh(&tn->nametbl_lock);
 	p = tipc_nametbl_remove_publ(net, publ->type, publ->lower,
-				     publ->node, publ->ref, publ->key);
+				     publ->node, publ->port, publ->key);
 	if (p)
-		tipc_node_unsubscribe(net, &p->nodesub_list, addr);
+		tipc_node_unsubscribe(net, &p->binding_node, addr);
 	spin_unlock_bh(&tn->nametbl_lock);
 
 	if (p != publ) {
 		pr_err("Unable to remove publication from failed node\n"
-		       " (type=%u, lower=%u, node=0x%x, ref=%u, key=%u)\n",
-		       publ->type, publ->lower, publ->node, publ->ref,
+		       " (type=%u, lower=%u, node=0x%x, port=%u, key=%u)\n",
+		       publ->type, publ->lower, publ->node, publ->port,
 		       publ->key);
 	}
 
@@ -249,7 +246,7 @@ void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr)
 {
 	struct publication *publ, *tmp;
 
-	list_for_each_entry_safe(publ, tmp, nsub_list, nodesub_list)
+	list_for_each_entry_safe(publ, tmp, nsub_list, binding_node)
 		tipc_publ_purge(net, publ, addr);
 	tipc_dist_queue_purge(net, addr);
 }
@@ -271,18 +268,18 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i,
 						ntohl(i->lower),
 						ntohl(i->upper),
 						TIPC_CLUSTER_SCOPE, node,
-						ntohl(i->ref), ntohl(i->key));
+						ntohl(i->port), ntohl(i->key));
 		if (publ) {
-			tipc_node_subscribe(net, &publ->nodesub_list, node);
+			tipc_node_subscribe(net, &publ->binding_node, node);
 			return true;
 		}
 	} else if (dtype == WITHDRAWAL) {
 		publ = tipc_nametbl_remove_publ(net, ntohl(i->type),
 						ntohl(i->lower),
-						node, ntohl(i->ref),
+						node, ntohl(i->port),
 						ntohl(i->key));
 		if (publ) {
-			tipc_node_unsubscribe(net, &publ->nodesub_list, node);
+			tipc_node_unsubscribe(net, &publ->binding_node, node);
 			kfree_rcu(publ, rcu);
 			return true;
 		}
@@ -382,16 +379,16 @@ void tipc_named_rcv(struct net *net, struct sk_buff_head *inputq)
  */
 void tipc_named_reinit(struct net *net)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	struct name_table *nt = tipc_name_table(net);
+	struct tipc_net *tn = tipc_net(net);
 	struct publication *publ;
-	int scope;
 
 	spin_lock_bh(&tn->nametbl_lock);
 
-	for (scope = TIPC_ZONE_SCOPE; scope <= TIPC_NODE_SCOPE; scope++)
-		list_for_each_entry_rcu(publ, &tn->nametbl->publ_list[scope],
-					local_list)
-			publ->node = tn->own_addr;
+	list_for_each_entry_rcu(publ, &nt->node_scope, binding_node)
+		publ->node = tn->own_addr;
+	list_for_each_entry_rcu(publ, &nt->cluster_scope, binding_node)
+		publ->node = tn->own_addr;
 
 	spin_unlock_bh(&tn->nametbl_lock);
 }
diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h
index 1264ba0af937..4753e628d7c4 100644
--- a/net/tipc/name_distr.h
+++ b/net/tipc/name_distr.h
@@ -63,7 +63,7 @@ struct distr_item {
 	__be32 type;
 	__be32 lower;
 	__be32 upper;
-	__be32 ref;
+	__be32 port;
 	__be32 key;
 };
 
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index ed0457cc99d6..bbbfc0702634 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -1,7 +1,7 @@
 /*
  * net/tipc/name_table.c: TIPC name table code
  *
- * Copyright (c) 2000-2006, 2014-2015, Ericsson AB
+ * Copyright (c) 2000-2006, 2014-2018, Ericsson AB
  * Copyright (c) 2004-2008, 2010-2014, Wind River Systems
  * All rights reserved.
  *
@@ -50,24 +50,12 @@
 
 /**
  * struct name_info - name sequence publication info
- * @node_list: circular list of publications made by own node
- * @cluster_list: circular list of publications made by own cluster
- * @zone_list: circular list of publications made by own zone
- * @node_list_size: number of entries in "node_list"
- * @cluster_list_size: number of entries in "cluster_list"
- * @zone_list_size: number of entries in "zone_list"
- *
- * Note: The zone list always contains at least one entry, since all
- *       publications of the associated name sequence belong to it.
- *       (The cluster and node lists may be empty.)
+ * @node_list: list of publications on own node of this <type,lower,upper>
+ * @all_publ: list of all publications of this <type,lower,upper>
  */
 struct name_info {
-	struct list_head node_list;
-	struct list_head cluster_list;
-	struct list_head zone_list;
-	u32 node_list_size;
-	u32 cluster_list_size;
-	u32 zone_list_size;
+	struct list_head local_publ;
+	struct list_head all_publ;
 };
 
 /**
@@ -114,7 +102,7 @@ static int hash(int x)
  * publ_create - create a publication structure
  */
 static struct publication *publ_create(u32 type, u32 lower, u32 upper,
-				       u32 scope, u32 node, u32 port_ref,
+				       u32 scope, u32 node, u32 port,
 				       u32 key)
 {
 	struct publication *publ = kzalloc(sizeof(*publ), GFP_ATOMIC);
@@ -128,9 +116,9 @@ static struct publication *publ_create(u32 type, u32 lower, u32 upper,
 	publ->upper = upper;
 	publ->scope = scope;
 	publ->node = node;
-	publ->ref = port_ref;
+	publ->port = port;
 	publ->key = key;
-	INIT_LIST_HEAD(&publ->pport_list);
+	INIT_LIST_HEAD(&publ->binding_sock);
 	return publ;
 }
 
@@ -249,9 +237,9 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net,
 		info = sseq->info;
 
 		/* Check if an identical publication already exists */
-		list_for_each_entry(publ, &info->zone_list, zone_list) {
-			if ((publ->ref == port) && (publ->key == key) &&
-			    (!publ->node || (publ->node == node)))
+		list_for_each_entry(publ, &info->all_publ, all_publ) {
+			if (publ->port == port && publ->key == key &&
+			    (!publ->node || publ->node == node))
 				return NULL;
 		}
 	} else {
@@ -290,9 +278,8 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net,
 			return NULL;
 		}
 
-		INIT_LIST_HEAD(&info->node_list);
-		INIT_LIST_HEAD(&info->cluster_list);
-		INIT_LIST_HEAD(&info->zone_list);
+		INIT_LIST_HEAD(&info->local_publ);
+		INIT_LIST_HEAD(&info->all_publ);
 
 		/* Insert new sub-sequence */
 		sseq = &nseq->sseqs[inspos];
@@ -311,25 +298,17 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net,
 	if (!publ)
 		return NULL;
 
-	list_add(&publ->zone_list, &info->zone_list);
-	info->zone_list_size++;
+	list_add(&publ->all_publ, &info->all_publ);
 
-	if (in_own_cluster(net, node)) {
-		list_add(&publ->cluster_list, &info->cluster_list);
-		info->cluster_list_size++;
-	}
-
-	if (in_own_node(net, node)) {
-		list_add(&publ->node_list, &info->node_list);
-		info->node_list_size++;
-	}
+	if (in_own_node(net, node))
+		list_add(&publ->local_publ, &info->local_publ);
 
 	/* Any subscriptions waiting for notification?  */
 	list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) {
-		tipc_subscrp_report_overlap(s, publ->lower, publ->upper,
-					    TIPC_PUBLISHED, publ->ref,
-					    publ->node, publ->scope,
-					    created_subseq);
+		tipc_sub_report_overlap(s, publ->lower, publ->upper,
+					TIPC_PUBLISHED, publ->port,
+					publ->node, publ->scope,
+					created_subseq);
 	}
 	return publ;
 }
@@ -348,7 +327,7 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net,
 static struct publication *tipc_nameseq_remove_publ(struct net *net,
 						    struct name_seq *nseq,
 						    u32 inst, u32 node,
-						    u32 ref, u32 key)
+						    u32 port, u32 key)
 {
 	struct publication *publ;
 	struct sub_seq *sseq = nameseq_find_subseq(nseq, inst);
@@ -363,32 +342,20 @@ static struct publication *tipc_nameseq_remove_publ(struct net *net,
 	info = sseq->info;
 
 	/* Locate publication, if it exists */
-	list_for_each_entry(publ, &info->zone_list, zone_list) {
-		if ((publ->key == key) && (publ->ref == ref) &&
-		    (!publ->node || (publ->node == node)))
+	list_for_each_entry(publ, &info->all_publ, all_publ) {
+		if (publ->key == key && publ->port == port &&
+		    (!publ->node || publ->node == node))
 			goto found;
 	}
 	return NULL;
 
 found:
-	/* Remove publication from zone scope list */
-	list_del(&publ->zone_list);
-	info->zone_list_size--;
-
-	/* Remove publication from cluster scope list, if present */
-	if (in_own_cluster(net, node)) {
-		list_del(&publ->cluster_list);
-		info->cluster_list_size--;
-	}
-
-	/* Remove publication from node scope list, if present */
-	if (in_own_node(net, node)) {
-		list_del(&publ->node_list);
-		info->node_list_size--;
-	}
+	list_del(&publ->all_publ);
+	if (in_own_node(net, node))
+		list_del(&publ->local_publ);
 
 	/* Contract subseq list if no more publications for that subseq */
-	if (list_empty(&info->zone_list)) {
+	if (list_empty(&info->all_publ)) {
 		kfree(info);
 		free = &nseq->sseqs[nseq->first_free--];
 		memmove(sseq, sseq + 1, (free - (sseq + 1)) * sizeof(*sseq));
@@ -397,10 +364,10 @@ found:
 
 	/* Notify any waiting subscriptions */
 	list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) {
-		tipc_subscrp_report_overlap(s, publ->lower, publ->upper,
-					    TIPC_WITHDRAWN, publ->ref,
-					    publ->node, publ->scope,
-					    removed_subseq);
+		tipc_sub_report_overlap(s, publ->lower, publ->upper,
+					TIPC_WITHDRAWN, publ->port,
+					publ->node, publ->scope,
+					removed_subseq);
 	}
 
 	return publ;
@@ -412,33 +379,38 @@ found:
  * sequence overlapping with the requested sequence
  */
 static void tipc_nameseq_subscribe(struct name_seq *nseq,
-				   struct tipc_subscription *s,
-				   bool status)
+				   struct tipc_subscription *sub)
 {
 	struct sub_seq *sseq = nseq->sseqs;
 	struct tipc_name_seq ns;
+	struct tipc_subscr *s = &sub->evt.s;
+	bool no_status;
 
-	tipc_subscrp_convert_seq(&s->evt.s.seq, s->swap, &ns);
+	ns.type = tipc_sub_read(s, seq.type);
+	ns.lower = tipc_sub_read(s, seq.lower);
+	ns.upper = tipc_sub_read(s, seq.upper);
+	no_status = tipc_sub_read(s, filter) & TIPC_SUB_NO_STATUS;
 
-	tipc_subscrp_get(s);
-	list_add(&s->nameseq_list, &nseq->subscriptions);
+	tipc_sub_get(sub);
+	list_add(&sub->nameseq_list, &nseq->subscriptions);
 
-	if (!status || !sseq)
+	if (no_status || !sseq)
 		return;
 
 	while (sseq != &nseq->sseqs[nseq->first_free]) {
-		if (tipc_subscrp_check_overlap(&ns, sseq->lower, sseq->upper)) {
+		if (tipc_sub_check_overlap(&ns, sseq->lower, sseq->upper)) {
 			struct publication *crs;
 			struct name_info *info = sseq->info;
 			int must_report = 1;
 
-			list_for_each_entry(crs, &info->zone_list, zone_list) {
-				tipc_subscrp_report_overlap(s, sseq->lower,
-							    sseq->upper,
-							    TIPC_PUBLISHED,
-							    crs->ref, crs->node,
-							    crs->scope,
-							    must_report);
+			list_for_each_entry(crs, &info->all_publ, all_publ) {
+				tipc_sub_report_overlap(sub, sseq->lower,
+							sseq->upper,
+							TIPC_PUBLISHED,
+							crs->port,
+							crs->node,
+							crs->scope,
+							must_report);
 				must_report = 0;
 			}
 		}
@@ -470,8 +442,7 @@ struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type,
 	struct name_seq *seq = nametbl_find_seq(net, type);
 	int index = hash(type);
 
-	if ((scope < TIPC_ZONE_SCOPE) || (scope > TIPC_NODE_SCOPE) ||
-	    (lower > upper)) {
+	if (scope > TIPC_NODE_SCOPE || lower > upper) {
 		pr_debug("Failed to publish illegal {%u,%u,%u} with scope %u\n",
 			 type, lower, upper, scope);
 		return NULL;
@@ -490,7 +461,7 @@ struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type,
 }
 
 struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
-					     u32 lower, u32 node, u32 ref,
+					     u32 lower, u32 node, u32 port,
 					     u32 key)
 {
 	struct publication *publ;
@@ -500,7 +471,7 @@ struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
 		return NULL;
 
 	spin_lock_bh(&seq->lock);
-	publ = tipc_nameseq_remove_publ(net, seq, lower, node, ref, key);
+	publ = tipc_nameseq_remove_publ(net, seq, lower, node, port, key);
 	if (!seq->first_free && list_empty(&seq->subscriptions)) {
 		hlist_del_init_rcu(&seq->ns_list);
 		kfree(seq->sseqs);
@@ -533,7 +504,7 @@ u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance,
 	struct name_info *info;
 	struct publication *publ;
 	struct name_seq *seq;
-	u32 ref = 0;
+	u32 port = 0;
 	u32 node = 0;
 
 	if (!tipc_in_scope(*destnode, tn->own_addr))
@@ -551,54 +522,42 @@ u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance,
 
 	/* Closest-First Algorithm */
 	if (likely(!*destnode)) {
-		if (!list_empty(&info->node_list)) {
-			publ = list_first_entry(&info->node_list,
-						struct publication,
-						node_list);
-			list_move_tail(&publ->node_list,
-				       &info->node_list);
-		} else if (!list_empty(&info->cluster_list)) {
-			publ = list_first_entry(&info->cluster_list,
+		if (!list_empty(&info->local_publ)) {
+			publ = list_first_entry(&info->local_publ,
 						struct publication,
-						cluster_list);
-			list_move_tail(&publ->cluster_list,
-				       &info->cluster_list);
+						local_publ);
+			list_move_tail(&publ->local_publ,
+				       &info->local_publ);
 		} else {
-			publ = list_first_entry(&info->zone_list,
+			publ = list_first_entry(&info->all_publ,
 						struct publication,
-						zone_list);
-			list_move_tail(&publ->zone_list,
-				       &info->zone_list);
+						all_publ);
+			list_move_tail(&publ->all_publ,
+				       &info->all_publ);
 		}
 	}
 
 	/* Round-Robin Algorithm */
 	else if (*destnode == tn->own_addr) {
-		if (list_empty(&info->node_list))
+		if (list_empty(&info->local_publ))
 			goto no_match;
-		publ = list_first_entry(&info->node_list, struct publication,
-					node_list);
-		list_move_tail(&publ->node_list, &info->node_list);
-	} else if (in_own_cluster_exact(net, *destnode)) {
-		if (list_empty(&info->cluster_list))
-			goto no_match;
-		publ = list_first_entry(&info->cluster_list, struct publication,
-					cluster_list);
-		list_move_tail(&publ->cluster_list, &info->cluster_list);
+		publ = list_first_entry(&info->local_publ, struct publication,
+					local_publ);
+		list_move_tail(&publ->local_publ, &info->local_publ);
 	} else {
-		publ = list_first_entry(&info->zone_list, struct publication,
-					zone_list);
-		list_move_tail(&publ->zone_list, &info->zone_list);
+		publ = list_first_entry(&info->all_publ, struct publication,
+					all_publ);
+		list_move_tail(&publ->all_publ, &info->all_publ);
 	}
 
-	ref = publ->ref;
+	port = publ->port;
 	node = publ->node;
 no_match:
 	spin_unlock_bh(&seq->lock);
 not_found:
 	rcu_read_unlock();
 	*destnode = node;
-	return ref;
+	return port;
 }
 
 bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 scope,
@@ -620,16 +579,16 @@ bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 scope,
 	sseq = nameseq_find_subseq(seq, instance);
 	if (likely(sseq)) {
 		info = sseq->info;
-		list_for_each_entry(publ, &info->zone_list, zone_list) {
+		list_for_each_entry(publ, &info->all_publ, all_publ) {
 			if (publ->scope != scope)
 				continue;
-			if (publ->ref == exclude && publ->node == self)
+			if (publ->port == exclude && publ->node == self)
 				continue;
-			tipc_dest_push(dsts, publ->node, publ->ref);
+			tipc_dest_push(dsts, publ->node, publ->port);
 			(*dstcnt)++;
 			if (all)
 				continue;
-			list_move_tail(&publ->zone_list, &info->zone_list);
+			list_move_tail(&publ->all_publ, &info->all_publ);
 			break;
 		}
 	}
@@ -639,15 +598,14 @@ exit:
 	return !list_empty(dsts);
 }
 
-int tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
-			   u32 scope, bool exact, struct list_head *dports)
+void tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
+			    u32 scope, bool exact, struct list_head *dports)
 {
 	struct sub_seq *sseq_stop;
 	struct name_info *info;
 	struct publication *p;
 	struct name_seq *seq;
 	struct sub_seq *sseq;
-	int res = 0;
 
 	rcu_read_lock();
 	seq = nametbl_find_seq(net, type);
@@ -661,18 +619,14 @@ int tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
 		if (sseq->lower > upper)
 			break;
 		info = sseq->info;
-		list_for_each_entry(p, &info->node_list, node_list) {
+		list_for_each_entry(p, &info->local_publ, local_publ) {
 			if (p->scope == scope || (!exact && p->scope < scope))
-				tipc_dest_push(dports, 0, p->ref);
+				tipc_dest_push(dports, 0, p->port);
 		}
-
-		if (info->cluster_list_size != info->node_list_size)
-			res = 1;
 	}
 	spin_unlock_bh(&seq->lock);
 exit:
 	rcu_read_unlock();
-	return res;
 }
 
 /* tipc_nametbl_lookup_dst_nodes - find broadcast destination nodes
@@ -697,7 +651,7 @@ void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
 	stop = seq->sseqs + seq->first_free;
 	for (; sseq != stop && sseq->lower <= upper; sseq++) {
 		info = sseq->info;
-		list_for_each_entry(publ, &info->zone_list, zone_list) {
+		list_for_each_entry(publ, &info->all_publ, all_publ) {
 			tipc_nlist_add(nodes, publ->node);
 		}
 	}
@@ -726,10 +680,10 @@ void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
 	stop = seq->sseqs + seq->first_free;
 	for (; sseq != stop; sseq++) {
 		info = sseq->info;
-		list_for_each_entry(p, &info->zone_list, zone_list) {
+		list_for_each_entry(p, &info->all_publ, all_publ) {
 			if (p->scope != scope)
 				continue;
-			tipc_group_add_member(grp, p->node, p->ref, p->lower);
+			tipc_group_add_member(grp, p->node, p->port, p->lower);
 		}
 	}
 	spin_unlock_bh(&seq->lock);
@@ -774,7 +728,7 @@ struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower,
 /**
  * tipc_nametbl_withdraw - withdraw name publication from network name tables
  */
-int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref,
+int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 port,
 			  u32 key)
 {
 	struct publication *publ;
@@ -783,18 +737,18 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref,
 
 	spin_lock_bh(&tn->nametbl_lock);
 	publ = tipc_nametbl_remove_publ(net, type, lower, tn->own_addr,
-					ref, key);
+					port, key);
 	if (likely(publ)) {
 		tn->nametbl->local_publ_count--;
 		skb = tipc_named_withdraw(net, publ);
 		/* Any pending external events? */
 		tipc_named_process_backlog(net);
-		list_del_init(&publ->pport_list);
+		list_del_init(&publ->binding_sock);
 		kfree_rcu(publ, rcu);
 	} else {
 		pr_err("Unable to remove local publication\n"
-		       "(type=%u, lower=%u, ref=%u, key=%u)\n",
-		       type, lower, ref, key);
+		       "(type=%u, lower=%u, port=%u, key=%u)\n",
+		       type, lower, port, key);
 	}
 	spin_unlock_bh(&tn->nametbl_lock);
 
@@ -808,24 +762,27 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref,
 /**
  * tipc_nametbl_subscribe - add a subscription object to the name table
  */
-void tipc_nametbl_subscribe(struct tipc_subscription *s, bool status)
+void tipc_nametbl_subscribe(struct tipc_subscription *sub)
 {
-	struct tipc_net *tn = net_generic(s->net, tipc_net_id);
-	u32 type = tipc_subscrp_convert_seq_type(s->evt.s.seq.type, s->swap);
+	struct tipc_net *tn = tipc_net(sub->net);
+	struct tipc_subscr *s = &sub->evt.s;
+	u32 type = tipc_sub_read(s, seq.type);
 	int index = hash(type);
 	struct name_seq *seq;
 	struct tipc_name_seq ns;
 
 	spin_lock_bh(&tn->nametbl_lock);
-	seq = nametbl_find_seq(s->net, type);
+	seq = nametbl_find_seq(sub->net, type);
 	if (!seq)
 		seq = tipc_nameseq_create(type, &tn->nametbl->seq_hlist[index]);
 	if (seq) {
 		spin_lock_bh(&seq->lock);
-		tipc_nameseq_subscribe(seq, s, status);
+		tipc_nameseq_subscribe(seq, sub);
 		spin_unlock_bh(&seq->lock);
 	} else {
-		tipc_subscrp_convert_seq(&s->evt.s.seq, s->swap, &ns);
+		ns.type = tipc_sub_read(s, seq.type);
+		ns.lower = tipc_sub_read(s, seq.lower);
+		ns.upper = tipc_sub_read(s, seq.upper);
 		pr_warn("Failed to create subscription for {%u,%u,%u}\n",
 			ns.type, ns.lower, ns.upper);
 	}
@@ -835,18 +792,19 @@ void tipc_nametbl_subscribe(struct tipc_subscription *s, bool status)
 /**
  * tipc_nametbl_unsubscribe - remove a subscription object from name table
  */
-void tipc_nametbl_unsubscribe(struct tipc_subscription *s)
+void tipc_nametbl_unsubscribe(struct tipc_subscription *sub)
 {
-	struct tipc_net *tn = net_generic(s->net, tipc_net_id);
+	struct tipc_subscr *s = &sub->evt.s;
+	struct tipc_net *tn = tipc_net(sub->net);
 	struct name_seq *seq;
-	u32 type = tipc_subscrp_convert_seq_type(s->evt.s.seq.type, s->swap);
+	u32 type = tipc_sub_read(s, seq.type);
 
 	spin_lock_bh(&tn->nametbl_lock);
-	seq = nametbl_find_seq(s->net, type);
+	seq = nametbl_find_seq(sub->net, type);
 	if (seq != NULL) {
 		spin_lock_bh(&seq->lock);
-		list_del_init(&s->nameseq_list);
-		tipc_subscrp_put(s);
+		list_del_init(&sub->nameseq_list);
+		tipc_sub_put(sub);
 		if (!seq->first_free && list_empty(&seq->subscriptions)) {
 			hlist_del_init_rcu(&seq->ns_list);
 			kfree(seq->sseqs);
@@ -872,9 +830,8 @@ int tipc_nametbl_init(struct net *net)
 	for (i = 0; i < TIPC_NAMETBL_SIZE; i++)
 		INIT_HLIST_HEAD(&tipc_nametbl->seq_hlist[i]);
 
-	INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_ZONE_SCOPE]);
-	INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_CLUSTER_SCOPE]);
-	INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_NODE_SCOPE]);
+	INIT_LIST_HEAD(&tipc_nametbl->node_scope);
+	INIT_LIST_HEAD(&tipc_nametbl->cluster_scope);
 	tn->nametbl = tipc_nametbl;
 	spin_lock_init(&tn->nametbl_lock);
 	return 0;
@@ -894,9 +851,9 @@ static void tipc_purge_publications(struct net *net, struct name_seq *seq)
 	spin_lock_bh(&seq->lock);
 	sseq = seq->sseqs;
 	info = sseq->info;
-	list_for_each_entry_safe(publ, safe, &info->zone_list, zone_list) {
+	list_for_each_entry_safe(publ, safe, &info->all_publ, all_publ) {
 		tipc_nameseq_remove_publ(net, seq, publ->lower, publ->node,
-					 publ->ref, publ->key);
+					 publ->port, publ->key);
 		kfree_rcu(publ, rcu);
 	}
 	hlist_del_init_rcu(&seq->ns_list);
@@ -943,17 +900,17 @@ static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg,
 	struct publication *p;
 
 	if (*last_publ) {
-		list_for_each_entry(p, &sseq->info->zone_list, zone_list)
+		list_for_each_entry(p, &sseq->info->all_publ, all_publ)
 			if (p->key == *last_publ)
 				break;
 		if (p->key != *last_publ)
 			return -EPIPE;
 	} else {
-		p = list_first_entry(&sseq->info->zone_list, struct publication,
-				     zone_list);
+		p = list_first_entry(&sseq->info->all_publ, struct publication,
+				     all_publ);
 	}
 
-	list_for_each_entry_from(p, &sseq->info->zone_list, zone_list) {
+	list_for_each_entry_from(p, &sseq->info->all_publ, all_publ) {
 		*last_publ = p->key;
 
 		hdr = genlmsg_put(msg->skb, msg->portid, msg->seq,
@@ -980,7 +937,7 @@ static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg,
 			goto publ_msg_full;
 		if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_NODE, p->node))
 			goto publ_msg_full;
-		if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_REF, p->ref))
+		if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_REF, p->port))
 			goto publ_msg_full;
 		if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_KEY, p->key))
 			goto publ_msg_full;
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index f56e7cb3d436..34a4ccb907aa 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -1,7 +1,7 @@
 /*
  * net/tipc/name_table.h: Include file for TIPC name table code
  *
- * Copyright (c) 2000-2006, 2014-2015, Ericsson AB
+ * Copyright (c) 2000-2006, 2014-2018, Ericsson AB
  * Copyright (c) 2004-2005, 2010-2011, Wind River Systems
  * All rights reserved.
  *
@@ -54,19 +54,22 @@ struct tipc_group;
  * @type: name sequence type
  * @lower: name sequence lower bound
  * @upper: name sequence upper bound
- * @scope: scope of publication
- * @node: network address of publishing port's node
- * @ref: publishing port
- * @key: publication key
- * @nodesub_list: subscription to "node down" event (off-node publication only)
- * @local_list: adjacent entries in list of publications made by this node
- * @pport_list: adjacent entries in list of publications made by this port
- * @node_list: adjacent matching name seq publications with >= node scope
- * @cluster_list: adjacent matching name seq publications with >= cluster scope
- * @zone_list: adjacent matching name seq publications with >= zone scope
+ * @scope: scope of publication, TIPC_NODE_SCOPE or TIPC_CLUSTER_SCOPE
+ * @node: network address of publishing socket's node
+ * @port: publishing port
+ * @key: publication key, unique across the cluster
+ * @binding_node: all publications from the same node which bound this one
+ * - Remote publications: in node->publ_list
+ *   Used by node/name distr to withdraw publications when node is lost
+ * - Local/node scope publications: in name_table->node_scope list
+ * - Local/cluster scope publications: in name_table->cluster_scope list
+ * @binding_sock: all publications from the same socket which bound this one
+ *   Used by socket to withdraw publications when socket is unbound/released
+ * @local_publ: list of identical publications made from this node
+ *   Used by closest_first and multicast receive lookup algorithms
+ * @all_publ: all publications identical to this one, whatever node and scope
+ *   Used by round-robin lookup algorithm
  * @rcu: RCU callback head used for deferred freeing
- *
- * Note that the node list, cluster list, and zone list are circular lists.
  */
 struct publication {
 	u32 type;
@@ -74,34 +77,37 @@ struct publication {
 	u32 upper;
 	u32 scope;
 	u32 node;
-	u32 ref;
+	u32 port;
 	u32 key;
-	struct list_head nodesub_list;
-	struct list_head local_list;
-	struct list_head pport_list;
-	struct list_head node_list;
-	struct list_head cluster_list;
-	struct list_head zone_list;
+	struct list_head binding_node;
+	struct list_head binding_sock;
+	struct list_head local_publ;
+	struct list_head all_publ;
 	struct rcu_head rcu;
 };
 
 /**
  * struct name_table - table containing all existing port name publications
  * @seq_hlist: name sequence hash lists
- * @publ_list: pulication lists
+ * @node_scope: all local publications with node scope
+ *               - used by name_distr during re-init of name table
+ * @cluster_scope: all local publications with cluster scope
+ *               - used by name_distr to send bulk updates to new nodes
+ *               - used by name_distr during re-init of name table
  * @local_publ_count: number of publications issued by this node
  */
 struct name_table {
 	struct hlist_head seq_hlist[TIPC_NAMETBL_SIZE];
-	struct list_head publ_list[TIPC_PUBL_SCOPE_NUM];
+	struct list_head node_scope;
+	struct list_head cluster_scope;
 	u32 local_publ_count;
 };
 
 int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb);
 
 u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *node);
-int tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
-			   u32 scope, bool exact, struct list_head *dports);
+void tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
+			    u32 scope, bool exact, struct list_head *dports);
 void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
 			      u32 type, u32 domain);
 void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
@@ -120,7 +126,7 @@ struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type,
 struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
 					     u32 lower, u32 node, u32 ref,
 					     u32 key);
-void tipc_nametbl_subscribe(struct tipc_subscription *s, bool status);
+void tipc_nametbl_subscribe(struct tipc_subscription *s);
 void tipc_nametbl_unsubscribe(struct tipc_subscription *s);
 int tipc_nametbl_init(struct net *net);
 void tipc_nametbl_stop(struct net *net);
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 1a2fde0d6f61..5c4c4405b78e 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -118,7 +118,7 @@ int tipc_net_start(struct net *net, u32 addr)
 	tipc_sk_reinit(net);
 
 	tipc_nametbl_publish(net, TIPC_CFG_SRV, tn->own_addr, tn->own_addr,
-			     TIPC_ZONE_SCOPE, 0, tn->own_addr);
+			     TIPC_CLUSTER_SCOPE, 0, tn->own_addr);
 
 	pr_info("Started in network mode\n");
 	pr_info("Own node address %s, network identity %u\n",
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 9036d8756e73..389193d7cf67 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1618,6 +1618,30 @@ discard:
 	kfree_skb(skb);
 }
 
+void tipc_node_apply_tolerance(struct net *net, struct tipc_bearer *b)
+{
+	struct tipc_net *tn = tipc_net(net);
+	int bearer_id = b->identity;
+	struct sk_buff_head xmitq;
+	struct tipc_link_entry *e;
+	struct tipc_node *n;
+
+	__skb_queue_head_init(&xmitq);
+
+	rcu_read_lock();
+
+	list_for_each_entry_rcu(n, &tn->node_list, list) {
+		tipc_node_write_lock(n);
+		e = &n->links[bearer_id];
+		if (e->link)
+			tipc_link_set_tolerance(e->link, b->tolerance, &xmitq);
+		tipc_node_write_unlock(n);
+		tipc_bearer_xmit(net, bearer_id, &xmitq, &e->maddr);
+	}
+
+	rcu_read_unlock();
+}
+
 int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info)
 {
 	struct net *net = sock_net(skb->sk);
diff --git a/net/tipc/node.h b/net/tipc/node.h
index acd58d23a70e..4ce5e3a185c0 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -65,6 +65,7 @@ void tipc_node_check_dest(struct net *net, u32 onode,
 			  struct tipc_media_addr *maddr,
 			  bool *respond, bool *dupl_addr);
 void tipc_node_delete_links(struct net *net, int bearer_id);
+void tipc_node_apply_tolerance(struct net *net, struct tipc_bearer *b);
 int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node,
 			   char *linkname, size_t len);
 int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode,
diff --git a/net/tipc/server.c b/net/tipc/server.c
deleted file mode 100644
index df0c563c90cd..000000000000
--- a/net/tipc/server.c
+++ /dev/null
@@ -1,710 +0,0 @@
-/*
- * net/tipc/server.c: TIPC server infrastructure
- *
- * Copyright (c) 2012-2013, Wind River Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "server.h"
-#include "core.h"
-#include "socket.h"
-#include "addr.h"
-#include "msg.h"
-#include <net/sock.h>
-#include <linux/module.h>
-
-/* Number of messages to send before rescheduling */
-#define MAX_SEND_MSG_COUNT	25
-#define MAX_RECV_MSG_COUNT	25
-#define CF_CONNECTED		1
-#define CF_SERVER		2
-
-#define sock2con(x) ((struct tipc_conn *)(x)->sk_user_data)
-
-/**
- * struct tipc_conn - TIPC connection structure
- * @kref: reference counter to connection object
- * @conid: connection identifier
- * @sock: socket handler associated with connection
- * @flags: indicates connection state
- * @server: pointer to connected server
- * @rwork: receive work item
- * @usr_data: user-specified field
- * @rx_action: what to do when connection socket is active
- * @outqueue: pointer to first outbound message in queue
- * @outqueue_lock: control access to the outqueue
- * @outqueue: list of connection objects for its server
- * @swork: send work item
- */
-struct tipc_conn {
-	struct kref kref;
-	int conid;
-	struct socket *sock;
-	unsigned long flags;
-	struct tipc_server *server;
-	struct work_struct rwork;
-	int (*rx_action) (struct tipc_conn *con);
-	void *usr_data;
-	struct list_head outqueue;
-	spinlock_t outqueue_lock;
-	struct work_struct swork;
-};
-
-/* An entry waiting to be sent */
-struct outqueue_entry {
-	struct list_head list;
-	struct kvec iov;
-	struct sockaddr_tipc dest;
-};
-
-static void tipc_recv_work(struct work_struct *work);
-static void tipc_send_work(struct work_struct *work);
-static void tipc_clean_outqueues(struct tipc_conn *con);
-
-static void tipc_conn_kref_release(struct kref *kref)
-{
-	struct tipc_conn *con = container_of(kref, struct tipc_conn, kref);
-	struct tipc_server *s = con->server;
-	struct sockaddr_tipc *saddr = s->saddr;
-	struct socket *sock = con->sock;
-	struct sock *sk;
-
-	if (sock) {
-		sk = sock->sk;
-		if (test_bit(CF_SERVER, &con->flags)) {
-			__module_get(sock->ops->owner);
-			__module_get(sk->sk_prot_creator->owner);
-		}
-		saddr->scope = -TIPC_NODE_SCOPE;
-		kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr));
-		sock_release(sock);
-		con->sock = NULL;
-	}
-	spin_lock_bh(&s->idr_lock);
-	idr_remove(&s->conn_idr, con->conid);
-	s->idr_in_use--;
-	spin_unlock_bh(&s->idr_lock);
-	tipc_clean_outqueues(con);
-	kfree(con);
-}
-
-static void conn_put(struct tipc_conn *con)
-{
-	kref_put(&con->kref, tipc_conn_kref_release);
-}
-
-static void conn_get(struct tipc_conn *con)
-{
-	kref_get(&con->kref);
-}
-
-static struct tipc_conn *tipc_conn_lookup(struct tipc_server *s, int conid)
-{
-	struct tipc_conn *con;
-
-	spin_lock_bh(&s->idr_lock);
-	con = idr_find(&s->conn_idr, conid);
-	if (con) {
-		if (!test_bit(CF_CONNECTED, &con->flags) ||
-		    !kref_get_unless_zero(&con->kref))
-			con = NULL;
-	}
-	spin_unlock_bh(&s->idr_lock);
-	return con;
-}
-
-static void sock_data_ready(struct sock *sk)
-{
-	struct tipc_conn *con;
-
-	read_lock_bh(&sk->sk_callback_lock);
-	con = sock2con(sk);
-	if (con && test_bit(CF_CONNECTED, &con->flags)) {
-		conn_get(con);
-		if (!queue_work(con->server->rcv_wq, &con->rwork))
-			conn_put(con);
-	}
-	read_unlock_bh(&sk->sk_callback_lock);
-}
-
-static void sock_write_space(struct sock *sk)
-{
-	struct tipc_conn *con;
-
-	read_lock_bh(&sk->sk_callback_lock);
-	con = sock2con(sk);
-	if (con && test_bit(CF_CONNECTED, &con->flags)) {
-		conn_get(con);
-		if (!queue_work(con->server->send_wq, &con->swork))
-			conn_put(con);
-	}
-	read_unlock_bh(&sk->sk_callback_lock);
-}
-
-static void tipc_register_callbacks(struct socket *sock, struct tipc_conn *con)
-{
-	struct sock *sk = sock->sk;
-
-	write_lock_bh(&sk->sk_callback_lock);
-
-	sk->sk_data_ready = sock_data_ready;
-	sk->sk_write_space = sock_write_space;
-	sk->sk_user_data = con;
-
-	con->sock = sock;
-
-	write_unlock_bh(&sk->sk_callback_lock);
-}
-
-static void tipc_close_conn(struct tipc_conn *con)
-{
-	struct tipc_server *s = con->server;
-	struct sock *sk = con->sock->sk;
-	bool disconnect = false;
-
-	write_lock_bh(&sk->sk_callback_lock);
-	disconnect = test_and_clear_bit(CF_CONNECTED, &con->flags);
-	if (disconnect) {
-		sk->sk_user_data = NULL;
-		if (con->conid)
-			s->tipc_conn_release(con->conid, con->usr_data);
-	}
-	write_unlock_bh(&sk->sk_callback_lock);
-
-	/* Handle concurrent calls from sending and receiving threads */
-	if (!disconnect)
-		return;
-
-	/* Don't flush pending works, -just let them expire */
-	kernel_sock_shutdown(con->sock, SHUT_RDWR);
-	conn_put(con);
-}
-
-static struct tipc_conn *tipc_alloc_conn(struct tipc_server *s)
-{
-	struct tipc_conn *con;
-	int ret;
-
-	con = kzalloc(sizeof(struct tipc_conn), GFP_ATOMIC);
-	if (!con)
-		return ERR_PTR(-ENOMEM);
-
-	kref_init(&con->kref);
-	INIT_LIST_HEAD(&con->outqueue);
-	spin_lock_init(&con->outqueue_lock);
-	INIT_WORK(&con->swork, tipc_send_work);
-	INIT_WORK(&con->rwork, tipc_recv_work);
-
-	spin_lock_bh(&s->idr_lock);
-	ret = idr_alloc(&s->conn_idr, con, 0, 0, GFP_ATOMIC);
-	if (ret < 0) {
-		kfree(con);
-		spin_unlock_bh(&s->idr_lock);
-		return ERR_PTR(-ENOMEM);
-	}
-	con->conid = ret;
-	s->idr_in_use++;
-	spin_unlock_bh(&s->idr_lock);
-
-	set_bit(CF_CONNECTED, &con->flags);
-	con->server = s;
-
-	return con;
-}
-
-static int tipc_receive_from_sock(struct tipc_conn *con)
-{
-	struct tipc_server *s = con->server;
-	struct sock *sk = con->sock->sk;
-	struct sockaddr_tipc addr;
-	struct msghdr msg = {};
-	struct kvec iov;
-	void *buf;
-	int ret;
-
-	buf = kmem_cache_alloc(s->rcvbuf_cache, GFP_ATOMIC);
-	if (!buf) {
-		ret = -ENOMEM;
-		goto out_close;
-	}
-
-	iov.iov_base = buf;
-	iov.iov_len = s->max_rcvbuf_size;
-	msg.msg_name = &addr;
-	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, iov.iov_len);
-	ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT);
-	if (ret <= 0) {
-		kmem_cache_free(s->rcvbuf_cache, buf);
-		goto out_close;
-	}
-
-	read_lock_bh(&sk->sk_callback_lock);
-	if (test_bit(CF_CONNECTED, &con->flags))
-		ret = s->tipc_conn_recvmsg(sock_net(con->sock->sk), con->conid,
-					   &addr, con->usr_data, buf, ret);
-	read_unlock_bh(&sk->sk_callback_lock);
-	kmem_cache_free(s->rcvbuf_cache, buf);
-	if (ret < 0)
-		tipc_conn_terminate(s, con->conid);
-	return ret;
-
-out_close:
-	if (ret != -EWOULDBLOCK)
-		tipc_close_conn(con);
-	else if (ret == 0)
-		/* Don't return success if we really got EOF */
-		ret = -EAGAIN;
-
-	return ret;
-}
-
-static int tipc_accept_from_sock(struct tipc_conn *con)
-{
-	struct tipc_server *s = con->server;
-	struct socket *sock = con->sock;
-	struct socket *newsock;
-	struct tipc_conn *newcon;
-	int ret;
-
-	ret = kernel_accept(sock, &newsock, O_NONBLOCK);
-	if (ret < 0)
-		return ret;
-
-	newcon = tipc_alloc_conn(con->server);
-	if (IS_ERR(newcon)) {
-		ret = PTR_ERR(newcon);
-		sock_release(newsock);
-		return ret;
-	}
-
-	newcon->rx_action = tipc_receive_from_sock;
-	tipc_register_callbacks(newsock, newcon);
-
-	/* Notify that new connection is incoming */
-	newcon->usr_data = s->tipc_conn_new(newcon->conid);
-	if (!newcon->usr_data) {
-		sock_release(newsock);
-		conn_put(newcon);
-		return -ENOMEM;
-	}
-
-	/* Wake up receive process in case of 'SYN+' message */
-	newsock->sk->sk_data_ready(newsock->sk);
-	return ret;
-}
-
-static struct socket *tipc_create_listen_sock(struct tipc_conn *con)
-{
-	struct tipc_server *s = con->server;
-	struct socket *sock = NULL;
-	int ret;
-
-	ret = sock_create_kern(s->net, AF_TIPC, SOCK_SEQPACKET, 0, &sock);
-	if (ret < 0)
-		return NULL;
-	ret = kernel_setsockopt(sock, SOL_TIPC, TIPC_IMPORTANCE,
-				(char *)&s->imp, sizeof(s->imp));
-	if (ret < 0)
-		goto create_err;
-	ret = kernel_bind(sock, (struct sockaddr *)s->saddr, sizeof(*s->saddr));
-	if (ret < 0)
-		goto create_err;
-
-	switch (s->type) {
-	case SOCK_STREAM:
-	case SOCK_SEQPACKET:
-		con->rx_action = tipc_accept_from_sock;
-
-		ret = kernel_listen(sock, 0);
-		if (ret < 0)
-			goto create_err;
-		break;
-	case SOCK_DGRAM:
-	case SOCK_RDM:
-		con->rx_action = tipc_receive_from_sock;
-		break;
-	default:
-		pr_err("Unknown socket type %d\n", s->type);
-		goto create_err;
-	}
-
-	/* As server's listening socket owner and creator is the same module,
-	 * we have to decrease TIPC module reference count to guarantee that
-	 * it remains zero after the server socket is created, otherwise,
-	 * executing "rmmod" command is unable to make TIPC module deleted
-	 * after TIPC module is inserted successfully.
-	 *
-	 * However, the reference count is ever increased twice in
-	 * sock_create_kern(): one is to increase the reference count of owner
-	 * of TIPC socket's proto_ops struct; another is to increment the
-	 * reference count of owner of TIPC proto struct. Therefore, we must
-	 * decrement the module reference count twice to ensure that it keeps
-	 * zero after server's listening socket is created. Of course, we
-	 * must bump the module reference count twice as well before the socket
-	 * is closed.
-	 */
-	module_put(sock->ops->owner);
-	module_put(sock->sk->sk_prot_creator->owner);
-	set_bit(CF_SERVER, &con->flags);
-
-	return sock;
-
-create_err:
-	kernel_sock_shutdown(sock, SHUT_RDWR);
-	sock_release(sock);
-	return NULL;
-}
-
-static int tipc_open_listening_sock(struct tipc_server *s)
-{
-	struct socket *sock;
-	struct tipc_conn *con;
-
-	con = tipc_alloc_conn(s);
-	if (IS_ERR(con))
-		return PTR_ERR(con);
-
-	sock = tipc_create_listen_sock(con);
-	if (!sock) {
-		idr_remove(&s->conn_idr, con->conid);
-		s->idr_in_use--;
-		kfree(con);
-		return -EINVAL;
-	}
-
-	tipc_register_callbacks(sock, con);
-	return 0;
-}
-
-static struct outqueue_entry *tipc_alloc_entry(void *data, int len)
-{
-	struct outqueue_entry *entry;
-	void *buf;
-
-	entry = kmalloc(sizeof(struct outqueue_entry), GFP_ATOMIC);
-	if (!entry)
-		return NULL;
-
-	buf = kmemdup(data, len, GFP_ATOMIC);
-	if (!buf) {
-		kfree(entry);
-		return NULL;
-	}
-
-	entry->iov.iov_base = buf;
-	entry->iov.iov_len = len;
-
-	return entry;
-}
-
-static void tipc_free_entry(struct outqueue_entry *e)
-{
-	kfree(e->iov.iov_base);
-	kfree(e);
-}
-
-static void tipc_clean_outqueues(struct tipc_conn *con)
-{
-	struct outqueue_entry *e, *safe;
-
-	spin_lock_bh(&con->outqueue_lock);
-	list_for_each_entry_safe(e, safe, &con->outqueue, list) {
-		list_del(&e->list);
-		tipc_free_entry(e);
-	}
-	spin_unlock_bh(&con->outqueue_lock);
-}
-
-int tipc_conn_sendmsg(struct tipc_server *s, int conid,
-		      struct sockaddr_tipc *addr, void *data, size_t len)
-{
-	struct outqueue_entry *e;
-	struct tipc_conn *con;
-
-	con = tipc_conn_lookup(s, conid);
-	if (!con)
-		return -EINVAL;
-
-	if (!test_bit(CF_CONNECTED, &con->flags)) {
-		conn_put(con);
-		return 0;
-	}
-
-	e = tipc_alloc_entry(data, len);
-	if (!e) {
-		conn_put(con);
-		return -ENOMEM;
-	}
-
-	if (addr)
-		memcpy(&e->dest, addr, sizeof(struct sockaddr_tipc));
-
-	spin_lock_bh(&con->outqueue_lock);
-	list_add_tail(&e->list, &con->outqueue);
-	spin_unlock_bh(&con->outqueue_lock);
-
-	if (!queue_work(s->send_wq, &con->swork))
-		conn_put(con);
-	return 0;
-}
-
-void tipc_conn_terminate(struct tipc_server *s, int conid)
-{
-	struct tipc_conn *con;
-
-	con = tipc_conn_lookup(s, conid);
-	if (con) {
-		tipc_close_conn(con);
-		conn_put(con);
-	}
-}
-
-bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
-			     u32 upper, u32 filter, int *conid)
-{
-	struct tipc_subscriber *scbr;
-	struct tipc_subscr sub;
-	struct tipc_server *s;
-	struct tipc_conn *con;
-
-	sub.seq.type = type;
-	sub.seq.lower = lower;
-	sub.seq.upper = upper;
-	sub.timeout = TIPC_WAIT_FOREVER;
-	sub.filter = filter;
-	*(u32 *)&sub.usr_handle = port;
-
-	con = tipc_alloc_conn(tipc_topsrv(net));
-	if (IS_ERR(con))
-		return false;
-
-	*conid = con->conid;
-	s = con->server;
-	scbr = s->tipc_conn_new(*conid);
-	if (!scbr) {
-		conn_put(con);
-		return false;
-	}
-
-	con->usr_data = scbr;
-	con->sock = NULL;
-	s->tipc_conn_recvmsg(net, *conid, NULL, scbr, &sub, sizeof(sub));
-	return true;
-}
-
-void tipc_topsrv_kern_unsubscr(struct net *net, int conid)
-{
-	struct tipc_conn *con;
-	struct tipc_server *srv;
-
-	con = tipc_conn_lookup(tipc_topsrv(net), conid);
-	if (!con)
-		return;
-
-	test_and_clear_bit(CF_CONNECTED, &con->flags);
-	srv = con->server;
-	if (con->conid)
-		srv->tipc_conn_release(con->conid, con->usr_data);
-	conn_put(con);
-	conn_put(con);
-}
-
-static void tipc_send_kern_top_evt(struct net *net, struct tipc_event *evt)
-{
-	u32 port = *(u32 *)&evt->s.usr_handle;
-	u32 self = tipc_own_addr(net);
-	struct sk_buff_head evtq;
-	struct sk_buff *skb;
-
-	skb = tipc_msg_create(TOP_SRV, 0, INT_H_SIZE, sizeof(*evt),
-			      self, self, port, port, 0);
-	if (!skb)
-		return;
-	msg_set_dest_droppable(buf_msg(skb), true);
-	memcpy(msg_data(buf_msg(skb)), evt, sizeof(*evt));
-	skb_queue_head_init(&evtq);
-	__skb_queue_tail(&evtq, skb);
-	tipc_sk_rcv(net, &evtq);
-}
-
-static void tipc_send_to_sock(struct tipc_conn *con)
-{
-	struct tipc_server *s = con->server;
-	struct outqueue_entry *e;
-	struct tipc_event *evt;
-	struct msghdr msg;
-	int count = 0;
-	int ret;
-
-	spin_lock_bh(&con->outqueue_lock);
-	while (test_bit(CF_CONNECTED, &con->flags)) {
-		e = list_entry(con->outqueue.next, struct outqueue_entry, list);
-		if ((struct list_head *) e == &con->outqueue)
-			break;
-
-		spin_unlock_bh(&con->outqueue_lock);
-
-		if (con->sock) {
-			memset(&msg, 0, sizeof(msg));
-			msg.msg_flags = MSG_DONTWAIT;
-			if (s->type == SOCK_DGRAM || s->type == SOCK_RDM) {
-				msg.msg_name = &e->dest;
-				msg.msg_namelen = sizeof(struct sockaddr_tipc);
-			}
-			ret = kernel_sendmsg(con->sock, &msg, &e->iov, 1,
-					     e->iov.iov_len);
-			if (ret == -EWOULDBLOCK || ret == 0) {
-				cond_resched();
-				goto out;
-			} else if (ret < 0) {
-				goto send_err;
-			}
-		} else {
-			evt = e->iov.iov_base;
-			tipc_send_kern_top_evt(s->net, evt);
-		}
-		/* Don't starve users filling buffers */
-		if (++count >= MAX_SEND_MSG_COUNT) {
-			cond_resched();
-			count = 0;
-		}
-
-		spin_lock_bh(&con->outqueue_lock);
-		list_del(&e->list);
-		tipc_free_entry(e);
-	}
-	spin_unlock_bh(&con->outqueue_lock);
-out:
-	return;
-
-send_err:
-	tipc_close_conn(con);
-}
-
-static void tipc_recv_work(struct work_struct *work)
-{
-	struct tipc_conn *con = container_of(work, struct tipc_conn, rwork);
-	int count = 0;
-
-	while (test_bit(CF_CONNECTED, &con->flags)) {
-		if (con->rx_action(con))
-			break;
-
-		/* Don't flood Rx machine */
-		if (++count >= MAX_RECV_MSG_COUNT) {
-			cond_resched();
-			count = 0;
-		}
-	}
-	conn_put(con);
-}
-
-static void tipc_send_work(struct work_struct *work)
-{
-	struct tipc_conn *con = container_of(work, struct tipc_conn, swork);
-
-	if (test_bit(CF_CONNECTED, &con->flags))
-		tipc_send_to_sock(con);
-
-	conn_put(con);
-}
-
-static void tipc_work_stop(struct tipc_server *s)
-{
-	destroy_workqueue(s->rcv_wq);
-	destroy_workqueue(s->send_wq);
-}
-
-static int tipc_work_start(struct tipc_server *s)
-{
-	s->rcv_wq = alloc_ordered_workqueue("tipc_rcv", 0);
-	if (!s->rcv_wq) {
-		pr_err("can't start tipc receive workqueue\n");
-		return -ENOMEM;
-	}
-
-	s->send_wq = alloc_ordered_workqueue("tipc_send", 0);
-	if (!s->send_wq) {
-		pr_err("can't start tipc send workqueue\n");
-		destroy_workqueue(s->rcv_wq);
-		return -ENOMEM;
-	}
-
-	return 0;
-}
-
-int tipc_server_start(struct tipc_server *s)
-{
-	int ret;
-
-	spin_lock_init(&s->idr_lock);
-	idr_init(&s->conn_idr);
-	s->idr_in_use = 0;
-
-	s->rcvbuf_cache = kmem_cache_create(s->name, s->max_rcvbuf_size,
-					    0, SLAB_HWCACHE_ALIGN, NULL);
-	if (!s->rcvbuf_cache)
-		return -ENOMEM;
-
-	ret = tipc_work_start(s);
-	if (ret < 0) {
-		kmem_cache_destroy(s->rcvbuf_cache);
-		return ret;
-	}
-	ret = tipc_open_listening_sock(s);
-	if (ret < 0) {
-		tipc_work_stop(s);
-		kmem_cache_destroy(s->rcvbuf_cache);
-		return ret;
-	}
-	return ret;
-}
-
-void tipc_server_stop(struct tipc_server *s)
-{
-	struct tipc_conn *con;
-	int id;
-
-	spin_lock_bh(&s->idr_lock);
-	for (id = 0; s->idr_in_use; id++) {
-		con = idr_find(&s->conn_idr, id);
-		if (con) {
-			spin_unlock_bh(&s->idr_lock);
-			tipc_close_conn(con);
-			spin_lock_bh(&s->idr_lock);
-		}
-	}
-	spin_unlock_bh(&s->idr_lock);
-
-	tipc_work_stop(s);
-	kmem_cache_destroy(s->rcvbuf_cache);
-	idr_destroy(&s->conn_idr);
-}
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 7dfa9fc99ec3..732ec894f69f 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -644,7 +644,7 @@ static int tipc_bind(struct socket *sock, struct sockaddr *uaddr,
 		goto exit;
 	}
 
-	res = (addr->scope > 0) ?
+	res = (addr->scope >= 0) ?
 		tipc_sk_publish(tsk, addr->scope, &addr->addr.nameseq) :
 		tipc_sk_withdraw(tsk, -addr->scope, &addr->addr.nameseq);
 exit:
@@ -666,7 +666,7 @@ exit:
  *       a completely predictable manner).
  */
 static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
-			int *uaddr_len, int peer)
+			int peer)
 {
 	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
 	struct sock *sk = sock->sk;
@@ -685,13 +685,12 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
 		addr->addr.id.node = tn->own_addr;
 	}
 
-	*uaddr_len = sizeof(*addr);
 	addr->addrtype = TIPC_ADDR_ID;
 	addr->family = AF_TIPC;
 	addr->scope = 0;
 	addr->addr.name.domain = 0;
 
-	return 0;
+	return sizeof(*addr);
 }
 
 /**
@@ -1281,8 +1280,8 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
 	struct tipc_msg *hdr = &tsk->phdr;
 	struct tipc_name_seq *seq;
 	struct sk_buff_head pkts;
-	u32 type, inst, domain;
 	u32 dnode, dport;
+	u32 type, inst;
 	int mtu, rc;
 
 	if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE))
@@ -1333,13 +1332,12 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
 	if (dest->addrtype == TIPC_ADDR_NAME) {
 		type = dest->addr.name.name.type;
 		inst = dest->addr.name.name.instance;
-		domain = dest->addr.name.domain;
-		dnode = domain;
+		dnode = dest->addr.name.domain;
 		msg_set_type(hdr, TIPC_NAMED_MSG);
 		msg_set_hdr_sz(hdr, NAMED_H_SIZE);
 		msg_set_nametype(hdr, type);
 		msg_set_nameinst(hdr, inst);
-		msg_set_lookup_scope(hdr, tipc_addr_scope(domain));
+		msg_set_lookup_scope(hdr, tipc_node2scope(dnode));
 		dport = tipc_nametbl_translate(net, type, inst, &dnode);
 		msg_set_destnode(hdr, dnode);
 		msg_set_destport(hdr, dport);
@@ -2124,8 +2122,10 @@ static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb,
 		    (!sk_conn && msg_connected(hdr)) ||
 		    (!grp && msg_in_group(hdr)))
 			err = TIPC_ERR_NO_PORT;
-		else if (sk_rmem_alloc_get(sk) + skb->truesize >= limit)
+		else if (sk_rmem_alloc_get(sk) + skb->truesize >= limit) {
+			atomic_inc(&sk->sk_drops);
 			err = TIPC_ERR_OVERLOAD;
+		}
 
 		if (unlikely(err)) {
 			tipc_skb_reject(net, err, skb, xmitq);
@@ -2204,6 +2204,7 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
 
 		/* Overload => reject message back to sender */
 		onode = tipc_own_addr(sock_net(sk));
+		atomic_inc(&sk->sk_drops);
 		if (tipc_msg_reverse(onode, &skb, TIPC_ERR_OVERLOAD))
 			__skb_queue_tail(xmitq, skb);
 		break;
@@ -2593,6 +2594,9 @@ static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
 	struct publication *publ;
 	u32 key;
 
+	if (scope != TIPC_NODE_SCOPE)
+		scope = TIPC_CLUSTER_SCOPE;
+
 	if (tipc_sk_connected(sk))
 		return -EINVAL;
 	key = tsk->portid + tsk->pub_count + 1;
@@ -2604,7 +2608,7 @@ static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
 	if (unlikely(!publ))
 		return -EINVAL;
 
-	list_add(&publ->pport_list, &tsk->publications);
+	list_add(&publ->binding_sock, &tsk->publications);
 	tsk->pub_count++;
 	tsk->published = 1;
 	return 0;
@@ -2618,7 +2622,10 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
 	struct publication *safe;
 	int rc = -EINVAL;
 
-	list_for_each_entry_safe(publ, safe, &tsk->publications, pport_list) {
+	if (scope != TIPC_NODE_SCOPE)
+		scope = TIPC_CLUSTER_SCOPE;
+
+	list_for_each_entry_safe(publ, safe, &tsk->publications, binding_sock) {
 		if (seq) {
 			if (publ->scope != scope)
 				continue;
@@ -2629,12 +2636,12 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
 			if (publ->upper != seq->upper)
 				break;
 			tipc_nametbl_withdraw(net, publ->type, publ->lower,
-					      publ->ref, publ->key);
+					      publ->port, publ->key);
 			rc = 0;
 			break;
 		}
 		tipc_nametbl_withdraw(net, publ->type, publ->lower,
-				      publ->ref, publ->key);
+				      publ->port, publ->key);
 		rc = 0;
 	}
 	if (list_empty(&tsk->publications))
@@ -3156,16 +3163,33 @@ msg_full:
 	return -EMSGSIZE;
 }
 
+static int __tipc_nl_add_sk_info(struct sk_buff *skb, struct tipc_sock
+			  *tsk)
+{
+	struct net *net = sock_net(skb->sk);
+	struct tipc_net *tn = tipc_net(net);
+	struct sock *sk = &tsk->sk;
+
+	if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->portid) ||
+	    nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tn->own_addr))
+		return -EMSGSIZE;
+
+	if (tipc_sk_connected(sk)) {
+		if (__tipc_nl_add_sk_con(skb, tsk))
+			return -EMSGSIZE;
+	} else if (!list_empty(&tsk->publications)) {
+		if (nla_put_flag(skb, TIPC_NLA_SOCK_HAS_PUBL))
+			return -EMSGSIZE;
+	}
+	return 0;
+}
+
 /* Caller should hold socket lock for the passed tipc socket. */
 static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb,
 			    struct tipc_sock *tsk)
 {
-	int err;
-	void *hdr;
 	struct nlattr *attrs;
-	struct net *net = sock_net(skb->sk);
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	struct sock *sk = &tsk->sk;
+	void *hdr;
 
 	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
 			  &tipc_genl_family, NLM_F_MULTI, TIPC_NL_SOCK_GET);
@@ -3175,19 +3199,10 @@ static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb,
 	attrs = nla_nest_start(skb, TIPC_NLA_SOCK);
 	if (!attrs)
 		goto genlmsg_cancel;
-	if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->portid))
-		goto attr_msg_cancel;
-	if (nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tn->own_addr))
+
+	if (__tipc_nl_add_sk_info(skb, tsk))
 		goto attr_msg_cancel;
 
-	if (tipc_sk_connected(sk)) {
-		err = __tipc_nl_add_sk_con(skb, tsk);
-		if (err)
-			goto attr_msg_cancel;
-	} else if (!list_empty(&tsk->publications)) {
-		if (nla_put_flag(skb, TIPC_NLA_SOCK_HAS_PUBL))
-			goto attr_msg_cancel;
-	}
 	nla_nest_end(skb, attrs);
 	genlmsg_end(skb, hdr);
 
@@ -3201,16 +3216,19 @@ msg_cancel:
 	return -EMSGSIZE;
 }
 
-int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb)
+int tipc_nl_sk_walk(struct sk_buff *skb, struct netlink_callback *cb,
+		    int (*skb_handler)(struct sk_buff *skb,
+				       struct netlink_callback *cb,
+				       struct tipc_sock *tsk))
 {
-	int err;
-	struct tipc_sock *tsk;
-	const struct bucket_table *tbl;
-	struct rhash_head *pos;
 	struct net *net = sock_net(skb->sk);
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	u32 tbl_id = cb->args[0];
+	struct tipc_net *tn = tipc_net(net);
+	const struct bucket_table *tbl;
 	u32 prev_portid = cb->args[1];
+	u32 tbl_id = cb->args[0];
+	struct rhash_head *pos;
+	struct tipc_sock *tsk;
+	int err;
 
 	rcu_read_lock();
 	tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht);
@@ -3222,12 +3240,13 @@ int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb)
 				continue;
 			}
 
-			err = __tipc_nl_add_sk(skb, cb, tsk);
+			err = skb_handler(skb, cb, tsk);
 			if (err) {
 				prev_portid = tsk->portid;
 				spin_unlock_bh(&tsk->sk.sk_lock.slock);
 				goto out;
 			}
+
 			prev_portid = 0;
 			spin_unlock_bh(&tsk->sk.sk_lock.slock);
 		}
@@ -3239,6 +3258,75 @@ out:
 
 	return skb->len;
 }
+EXPORT_SYMBOL(tipc_nl_sk_walk);
+
+int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct tipc_sock *tsk,
+			   u32 sk_filter_state,
+			   u64 (*tipc_diag_gen_cookie)(struct sock *sk))
+{
+	struct sock *sk = &tsk->sk;
+	struct nlattr *attrs;
+	struct nlattr *stat;
+
+	/*filter response w.r.t sk_state*/
+	if (!(sk_filter_state & (1 << sk->sk_state)))
+		return 0;
+
+	attrs = nla_nest_start(skb, TIPC_NLA_SOCK);
+	if (!attrs)
+		goto msg_cancel;
+
+	if (__tipc_nl_add_sk_info(skb, tsk))
+		goto attr_msg_cancel;
+
+	if (nla_put_u32(skb, TIPC_NLA_SOCK_TYPE, (u32)sk->sk_type) ||
+	    nla_put_u32(skb, TIPC_NLA_SOCK_TIPC_STATE, (u32)sk->sk_state) ||
+	    nla_put_u32(skb, TIPC_NLA_SOCK_INO, sock_i_ino(sk)) ||
+	    nla_put_u32(skb, TIPC_NLA_SOCK_UID,
+			from_kuid_munged(sk_user_ns(sk), sock_i_uid(sk))) ||
+	    nla_put_u64_64bit(skb, TIPC_NLA_SOCK_COOKIE,
+			      tipc_diag_gen_cookie(sk),
+			      TIPC_NLA_SOCK_PAD))
+		goto attr_msg_cancel;
+
+	stat = nla_nest_start(skb, TIPC_NLA_SOCK_STAT);
+	if (!stat)
+		goto attr_msg_cancel;
+
+	if (nla_put_u32(skb, TIPC_NLA_SOCK_STAT_RCVQ,
+			skb_queue_len(&sk->sk_receive_queue)) ||
+	    nla_put_u32(skb, TIPC_NLA_SOCK_STAT_SENDQ,
+			skb_queue_len(&sk->sk_write_queue)) ||
+	    nla_put_u32(skb, TIPC_NLA_SOCK_STAT_DROP,
+			atomic_read(&sk->sk_drops)))
+		goto stat_msg_cancel;
+
+	if (tsk->cong_link_cnt &&
+	    nla_put_flag(skb, TIPC_NLA_SOCK_STAT_LINK_CONG))
+		goto stat_msg_cancel;
+
+	if (tsk_conn_cong(tsk) &&
+	    nla_put_flag(skb, TIPC_NLA_SOCK_STAT_CONN_CONG))
+		goto stat_msg_cancel;
+
+	nla_nest_end(skb, stat);
+	nla_nest_end(skb, attrs);
+
+	return 0;
+
+stat_msg_cancel:
+	nla_nest_cancel(skb, stat);
+attr_msg_cancel:
+	nla_nest_cancel(skb, attrs);
+msg_cancel:
+	return -EMSGSIZE;
+}
+EXPORT_SYMBOL(tipc_sk_fill_sock_diag);
+
+int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	return tipc_nl_sk_walk(skb, cb, __tipc_nl_add_sk);
+}
 
 /* Caller should hold socket lock for the passed tipc socket. */
 static int __tipc_nl_add_sk_publ(struct sk_buff *skb,
@@ -3288,7 +3376,7 @@ static int __tipc_nl_list_sk_publ(struct sk_buff *skb,
 	struct publication *p;
 
 	if (*last_publ) {
-		list_for_each_entry(p, &tsk->publications, pport_list) {
+		list_for_each_entry(p, &tsk->publications, binding_sock) {
 			if (p->key == *last_publ)
 				break;
 		}
@@ -3305,10 +3393,10 @@ static int __tipc_nl_list_sk_publ(struct sk_buff *skb,
 		}
 	} else {
 		p = list_first_entry(&tsk->publications, struct publication,
-				     pport_list);
+				     binding_sock);
 	}
 
-	list_for_each_entry_from(p, &tsk->publications, pport_list) {
+	list_for_each_entry_from(p, &tsk->publications, binding_sock) {
 		err = __tipc_nl_add_sk_publ(skb, cb, p);
 		if (err) {
 			*last_publ = p->key;
diff --git a/net/tipc/socket.h b/net/tipc/socket.h
index 06fb5944cf76..aae3fd4cd06c 100644
--- a/net/tipc/socket.h
+++ b/net/tipc/socket.h
@@ -49,6 +49,8 @@
 #define RCVBUF_DEF  (FLOWCTL_BLK_SZ * 1024 * 2)
 #define RCVBUF_MAX  (FLOWCTL_BLK_SZ * 1024 * 16)
 
+struct tipc_sock;
+
 int tipc_socket_init(void);
 void tipc_socket_stop(void);
 void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq);
@@ -59,5 +61,11 @@ int tipc_sk_rht_init(struct net *net);
 void tipc_sk_rht_destroy(struct net *net);
 int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb);
 int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb);
-
+int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct tipc_sock *tsk,
+			   u32 sk_filter_state,
+			   u64 (*tipc_diag_gen_cookie)(struct sock *sk));
+int tipc_nl_sk_walk(struct sk_buff *skb, struct netlink_callback *cb,
+		    int (*skb_handler)(struct sk_buff *skb,
+				       struct netlink_callback *cb,
+				       struct tipc_sock *tsk));
 #endif
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 68e26470c516..6925a989569b 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -1,7 +1,7 @@
 /*
  * net/tipc/subscr.c: TIPC network topology service
  *
- * Copyright (c) 2000-2006, Ericsson AB
+ * Copyright (c) 2000-2017, Ericsson AB
  * Copyright (c) 2005-2007, 2010-2013, Wind River Systems
  * All rights reserved.
  *
@@ -38,61 +38,30 @@
 #include "name_table.h"
 #include "subscr.h"
 
-/**
- * struct tipc_subscriber - TIPC network topology subscriber
- * @kref: reference counter to tipc_subscription object
- * @conid: connection identifier to server connecting to subscriber
- * @lock: control access to subscriber
- * @subscrp_list: list of subscription objects for this subscriber
- */
-struct tipc_subscriber {
-	struct kref kref;
-	int conid;
-	spinlock_t lock;
-	struct list_head subscrp_list;
-};
-
-static void tipc_subscrb_put(struct tipc_subscriber *subscriber);
-
-/**
- * htohl - convert value to endianness used by destination
- * @in: value to convert
- * @swap: non-zero if endianness must be reversed
- *
- * Returns converted value
- */
-static u32 htohl(u32 in, int swap)
-{
-	return swap ? swab32(in) : in;
-}
-
-static void tipc_subscrp_send_event(struct tipc_subscription *sub,
-				    u32 found_lower, u32 found_upper,
-				    u32 event, u32 port_ref, u32 node)
+static void tipc_sub_send_event(struct tipc_subscription *sub,
+				u32 found_lower, u32 found_upper,
+				u32 event, u32 port, u32 node)
 {
-	struct tipc_net *tn = net_generic(sub->net, tipc_net_id);
-	struct tipc_subscriber *subscriber = sub->subscriber;
-	struct kvec msg_sect;
+	struct tipc_event *evt = &sub->evt;
 
-	msg_sect.iov_base = (void *)&sub->evt;
-	msg_sect.iov_len = sizeof(struct tipc_event);
-	sub->evt.event = htohl(event, sub->swap);
-	sub->evt.found_lower = htohl(found_lower, sub->swap);
-	sub->evt.found_upper = htohl(found_upper, sub->swap);
-	sub->evt.port.ref = htohl(port_ref, sub->swap);
-	sub->evt.port.node = htohl(node, sub->swap);
-	tipc_conn_sendmsg(tn->topsrv, subscriber->conid, NULL,
-			  msg_sect.iov_base, msg_sect.iov_len);
+	if (sub->inactive)
+		return;
+	tipc_evt_write(evt, event, event);
+	tipc_evt_write(evt, found_lower, found_lower);
+	tipc_evt_write(evt, found_upper, found_upper);
+	tipc_evt_write(evt, port.ref, port);
+	tipc_evt_write(evt, port.node, node);
+	tipc_topsrv_queue_evt(sub->net, sub->conid, event, evt);
 }
 
 /**
- * tipc_subscrp_check_overlap - test for subscription overlap with the
+ * tipc_sub_check_overlap - test for subscription overlap with the
  * given values
  *
  * Returns 1 if there is overlap, otherwise 0.
  */
-int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
-			       u32 found_upper)
+int tipc_sub_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
+			   u32 found_upper)
 {
 	if (found_lower < seq->lower)
 		found_lower = seq->lower;
@@ -103,298 +72,98 @@ int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
 	return 1;
 }
 
-u32 tipc_subscrp_convert_seq_type(u32 type, int swap)
-{
-	return htohl(type, swap);
-}
-
-void tipc_subscrp_convert_seq(struct tipc_name_seq *in, int swap,
-			      struct tipc_name_seq *out)
-{
-	out->type = htohl(in->type, swap);
-	out->lower = htohl(in->lower, swap);
-	out->upper = htohl(in->upper, swap);
-}
-
-void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower,
-				 u32 found_upper, u32 event, u32 port_ref,
-				 u32 node, u32 scope, int must)
+void tipc_sub_report_overlap(struct tipc_subscription *sub,
+			     u32 found_lower, u32 found_upper,
+			     u32 event, u32 port, u32 node,
+			     u32 scope, int must)
 {
-	u32 filter = htohl(sub->evt.s.filter, sub->swap);
+	struct tipc_subscr *s = &sub->evt.s;
+	u32 filter = tipc_sub_read(s, filter);
 	struct tipc_name_seq seq;
 
-	tipc_subscrp_convert_seq(&sub->evt.s.seq, sub->swap, &seq);
-	if (!tipc_subscrp_check_overlap(&seq, found_lower, found_upper))
+	seq.type = tipc_sub_read(s, seq.type);
+	seq.lower = tipc_sub_read(s, seq.lower);
+	seq.upper = tipc_sub_read(s, seq.upper);
+
+	if (!tipc_sub_check_overlap(&seq, found_lower, found_upper))
 		return;
+
 	if (!must && !(filter & TIPC_SUB_PORTS))
 		return;
 	if (filter & TIPC_SUB_CLUSTER_SCOPE && scope == TIPC_NODE_SCOPE)
 		return;
 	if (filter & TIPC_SUB_NODE_SCOPE && scope != TIPC_NODE_SCOPE)
 		return;
-
-	tipc_subscrp_send_event(sub, found_lower, found_upper, event, port_ref,
-				node);
+	spin_lock(&sub->lock);
+	tipc_sub_send_event(sub, found_lower, found_upper,
+			    event, port, node);
+	spin_unlock(&sub->lock);
 }
 
-static void tipc_subscrp_timeout(struct timer_list *t)
+static void tipc_sub_timeout(struct timer_list *t)
 {
 	struct tipc_subscription *sub = from_timer(sub, t, timer);
-	struct tipc_subscriber *subscriber = sub->subscriber;
-
-	spin_lock_bh(&subscriber->lock);
-	tipc_nametbl_unsubscribe(sub);
-	list_del(&sub->subscrp_list);
-	spin_unlock_bh(&subscriber->lock);
-
-	/* Notify subscriber of timeout */
-	tipc_subscrp_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper,
-				TIPC_SUBSCR_TIMEOUT, 0, 0);
-
-	tipc_subscrp_put(sub);
-}
-
-static void tipc_subscrb_kref_release(struct kref *kref)
-{
-	kfree(container_of(kref,struct tipc_subscriber, kref));
-}
-
-static void tipc_subscrb_put(struct tipc_subscriber *subscriber)
-{
-	kref_put(&subscriber->kref, tipc_subscrb_kref_release);
-}
+	struct tipc_subscr *s = &sub->evt.s;
 
-static void tipc_subscrb_get(struct tipc_subscriber *subscriber)
-{
-	kref_get(&subscriber->kref);
+	spin_lock(&sub->lock);
+	tipc_sub_send_event(sub, s->seq.lower, s->seq.upper,
+			    TIPC_SUBSCR_TIMEOUT, 0, 0);
+	sub->inactive = true;
+	spin_unlock(&sub->lock);
 }
 
-static void tipc_subscrp_kref_release(struct kref *kref)
+static void tipc_sub_kref_release(struct kref *kref)
 {
-	struct tipc_subscription *sub = container_of(kref,
-						     struct tipc_subscription,
-						     kref);
-	struct tipc_net *tn = net_generic(sub->net, tipc_net_id);
-	struct tipc_subscriber *subscriber = sub->subscriber;
-
-	atomic_dec(&tn->subscription_count);
-	kfree(sub);
-	tipc_subscrb_put(subscriber);
+	kfree(container_of(kref, struct tipc_subscription, kref));
 }
 
-void tipc_subscrp_put(struct tipc_subscription *subscription)
+void tipc_sub_put(struct tipc_subscription *subscription)
 {
-	kref_put(&subscription->kref, tipc_subscrp_kref_release);
+	kref_put(&subscription->kref, tipc_sub_kref_release);
 }
 
-void tipc_subscrp_get(struct tipc_subscription *subscription)
+void tipc_sub_get(struct tipc_subscription *subscription)
 {
 	kref_get(&subscription->kref);
 }
 
-/* tipc_subscrb_subscrp_delete - delete a specific subscription or all
- * subscriptions for a given subscriber.
- */
-static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber,
-					struct tipc_subscr *s)
-{
-	struct list_head *subscription_list = &subscriber->subscrp_list;
-	struct tipc_subscription *sub, *temp;
-	u32 timeout;
-
-	spin_lock_bh(&subscriber->lock);
-	list_for_each_entry_safe(sub, temp, subscription_list,  subscrp_list) {
-		if (s && memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr)))
-			continue;
-
-		timeout = htohl(sub->evt.s.timeout, sub->swap);
-		if (timeout == TIPC_WAIT_FOREVER || del_timer(&sub->timer)) {
-			tipc_nametbl_unsubscribe(sub);
-			list_del(&sub->subscrp_list);
-			tipc_subscrp_put(sub);
-		}
-
-		if (s)
-			break;
-	}
-	spin_unlock_bh(&subscriber->lock);
-}
-
-static struct tipc_subscriber *tipc_subscrb_create(int conid)
-{
-	struct tipc_subscriber *subscriber;
-
-	subscriber = kzalloc(sizeof(*subscriber), GFP_ATOMIC);
-	if (!subscriber) {
-		pr_warn("Subscriber rejected, no memory\n");
-		return NULL;
-	}
-	INIT_LIST_HEAD(&subscriber->subscrp_list);
-	kref_init(&subscriber->kref);
-	subscriber->conid = conid;
-	spin_lock_init(&subscriber->lock);
-
-	return subscriber;
-}
-
-static void tipc_subscrb_delete(struct tipc_subscriber *subscriber)
-{
-	tipc_subscrb_subscrp_delete(subscriber, NULL);
-	tipc_subscrb_put(subscriber);
-}
-
-static void tipc_subscrp_cancel(struct tipc_subscr *s,
-				struct tipc_subscriber *subscriber)
-{
-	tipc_subscrb_get(subscriber);
-	tipc_subscrb_subscrp_delete(subscriber, s);
-	tipc_subscrb_put(subscriber);
-}
-
-static struct tipc_subscription *tipc_subscrp_create(struct net *net,
-						     struct tipc_subscr *s,
-						     int swap)
+struct tipc_subscription *tipc_sub_subscribe(struct net *net,
+					     struct tipc_subscr *s,
+					     int conid)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	u32 filter = tipc_sub_read(s, filter);
 	struct tipc_subscription *sub;
-	u32 filter = htohl(s->filter, swap);
+	u32 timeout;
 
-	/* Refuse subscription if global limit exceeded */
-	if (atomic_read(&tn->subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) {
-		pr_warn("Subscription rejected, limit reached (%u)\n",
-			TIPC_MAX_SUBSCRIPTIONS);
+	if ((filter & TIPC_SUB_PORTS && filter & TIPC_SUB_SERVICE) ||
+	    (tipc_sub_read(s, seq.lower) > tipc_sub_read(s, seq.upper))) {
+		pr_warn("Subscription rejected, illegal request\n");
 		return NULL;
 	}
-
-	/* Allocate subscription object */
 	sub = kmalloc(sizeof(*sub), GFP_ATOMIC);
 	if (!sub) {
 		pr_warn("Subscription rejected, no memory\n");
 		return NULL;
 	}
-
-	/* Initialize subscription object */
 	sub->net = net;
-	if (((filter & TIPC_SUB_PORTS) && (filter & TIPC_SUB_SERVICE)) ||
-	    (htohl(s->seq.lower, swap) > htohl(s->seq.upper, swap))) {
-		pr_warn("Subscription rejected, illegal request\n");
-		kfree(sub);
-		return NULL;
-	}
-
-	sub->swap = swap;
+	sub->conid = conid;
+	sub->inactive = false;
 	memcpy(&sub->evt.s, s, sizeof(*s));
-	atomic_inc(&tn->subscription_count);
+	spin_lock_init(&sub->lock);
 	kref_init(&sub->kref);
-	return sub;
-}
-
-static int tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s,
-				  struct tipc_subscriber *subscriber, int swap,
-				  bool status)
-{
-	struct tipc_subscription *sub = NULL;
-	u32 timeout;
-
-	sub = tipc_subscrp_create(net, s, swap);
-	if (!sub)
-		return -1;
-
-	spin_lock_bh(&subscriber->lock);
-	list_add(&sub->subscrp_list, &subscriber->subscrp_list);
-	sub->subscriber = subscriber;
-	tipc_nametbl_subscribe(sub, status);
-	tipc_subscrb_get(subscriber);
-	spin_unlock_bh(&subscriber->lock);
-
-	timer_setup(&sub->timer, tipc_subscrp_timeout, 0);
-	timeout = htohl(sub->evt.s.timeout, swap);
-
+	tipc_nametbl_subscribe(sub);
+	timer_setup(&sub->timer, tipc_sub_timeout, 0);
+	timeout = tipc_sub_read(&sub->evt.s, timeout);
 	if (timeout != TIPC_WAIT_FOREVER)
 		mod_timer(&sub->timer, jiffies + msecs_to_jiffies(timeout));
-	return 0;
-}
-
-/* Handle one termination request for the subscriber */
-static void tipc_subscrb_release_cb(int conid, void *usr_data)
-{
-	tipc_subscrb_delete((struct tipc_subscriber *)usr_data);
-}
-
-/* Handle one request to create a new subscription for the subscriber */
-static int tipc_subscrb_rcv_cb(struct net *net, int conid,
-			       struct sockaddr_tipc *addr, void *usr_data,
-			       void *buf, size_t len)
-{
-	struct tipc_subscriber *subscriber = usr_data;
-	struct tipc_subscr *s = (struct tipc_subscr *)buf;
-	bool status;
-	int swap;
-
-	/* Determine subscriber's endianness */
-	swap = !(s->filter & (TIPC_SUB_PORTS | TIPC_SUB_SERVICE |
-			      TIPC_SUB_CANCEL));
-
-	/* Detect & process a subscription cancellation request */
-	if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) {
-		s->filter &= ~htohl(TIPC_SUB_CANCEL, swap);
-		tipc_subscrp_cancel(s, subscriber);
-		return 0;
-	}
-	status = !(s->filter & htohl(TIPC_SUB_NO_STATUS, swap));
-	return tipc_subscrp_subscribe(net, s, subscriber, swap, status);
-}
-
-/* Handle one request to establish a new subscriber */
-static void *tipc_subscrb_connect_cb(int conid)
-{
-	return (void *)tipc_subscrb_create(conid);
-}
-
-int tipc_topsrv_start(struct net *net)
-{
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	const char name[] = "topology_server";
-	struct tipc_server *topsrv;
-	struct sockaddr_tipc *saddr;
-
-	saddr = kzalloc(sizeof(*saddr), GFP_ATOMIC);
-	if (!saddr)
-		return -ENOMEM;
-	saddr->family			= AF_TIPC;
-	saddr->addrtype			= TIPC_ADDR_NAMESEQ;
-	saddr->addr.nameseq.type	= TIPC_TOP_SRV;
-	saddr->addr.nameseq.lower	= TIPC_TOP_SRV;
-	saddr->addr.nameseq.upper	= TIPC_TOP_SRV;
-	saddr->scope			= TIPC_NODE_SCOPE;
-
-	topsrv = kzalloc(sizeof(*topsrv), GFP_ATOMIC);
-	if (!topsrv) {
-		kfree(saddr);
-		return -ENOMEM;
-	}
-	topsrv->net			= net;
-	topsrv->saddr			= saddr;
-	topsrv->imp			= TIPC_CRITICAL_IMPORTANCE;
-	topsrv->type			= SOCK_SEQPACKET;
-	topsrv->max_rcvbuf_size		= sizeof(struct tipc_subscr);
-	topsrv->tipc_conn_recvmsg	= tipc_subscrb_rcv_cb;
-	topsrv->tipc_conn_new		= tipc_subscrb_connect_cb;
-	topsrv->tipc_conn_release	= tipc_subscrb_release_cb;
-
-	strncpy(topsrv->name, name, strlen(name) + 1);
-	tn->topsrv = topsrv;
-	atomic_set(&tn->subscription_count, 0);
-
-	return tipc_server_start(topsrv);
+	return sub;
 }
 
-void tipc_topsrv_stop(struct net *net)
+void tipc_sub_unsubscribe(struct tipc_subscription *sub)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	struct tipc_server *topsrv = tn->topsrv;
-
-	tipc_server_stop(topsrv);
-	kfree(topsrv->saddr);
-	kfree(topsrv);
+	tipc_nametbl_unsubscribe(sub);
+	if (sub->evt.s.timeout != TIPC_WAIT_FOREVER)
+		del_timer_sync(&sub->timer);
+	list_del(&sub->sub_list);
+	tipc_sub_put(sub);
 }
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index f3edca775d9f..8b2d22b18f22 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -1,7 +1,7 @@
 /*
  * net/tipc/subscr.h: Include file for TIPC network topology service
  *
- * Copyright (c) 2003-2006, Ericsson AB
+ * Copyright (c) 2003-2017, Ericsson AB
  * Copyright (c) 2005-2007, 2012-2013, Wind River Systems
  * All rights reserved.
  *
@@ -37,48 +37,72 @@
 #ifndef _TIPC_SUBSCR_H
 #define _TIPC_SUBSCR_H
 
-#include "server.h"
+#include "topsrv.h"
 
-#define TIPC_MAX_SUBSCRIPTIONS	65535
-#define TIPC_MAX_PUBLICATIONS	65535
+#define TIPC_MAX_SUBSCR         65535
+#define TIPC_MAX_PUBLICATIONS   65535
 
 struct tipc_subscription;
-struct tipc_subscriber;
+struct tipc_conn;
 
 /**
  * struct tipc_subscription - TIPC network topology subscription object
  * @subscriber: pointer to its subscriber
  * @seq: name sequence associated with subscription
- * @net: point to network namespace
  * @timer: timer governing subscription duration (optional)
  * @nameseq_list: adjacent subscriptions in name sequence's subscription list
- * @subscrp_list: adjacent subscriptions in subscriber's subscription list
- * @swap: indicates if subscriber uses opposite endianness in its messages
+ * @sub_list: adjacent subscriptions in subscriber's subscription list
  * @evt: template for events generated by subscription
  */
 struct tipc_subscription {
 	struct kref kref;
-	struct tipc_subscriber *subscriber;
 	struct net *net;
 	struct timer_list timer;
 	struct list_head nameseq_list;
-	struct list_head subscrp_list;
-	int swap;
+	struct list_head sub_list;
 	struct tipc_event evt;
+	int conid;
+	bool inactive;
+	spinlock_t lock; /* serialize up/down and timer events */
 };
 
-int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
-			       u32 found_upper);
-void tipc_subscrp_report_overlap(struct tipc_subscription *sub,
-				 u32 found_lower, u32 found_upper, u32 event,
-				 u32 port_ref, u32 node, u32 scope, int must);
-void tipc_subscrp_convert_seq(struct tipc_name_seq *in, int swap,
-			      struct tipc_name_seq *out);
-u32 tipc_subscrp_convert_seq_type(u32 type, int swap);
+struct tipc_subscription *tipc_sub_subscribe(struct net *net,
+					     struct tipc_subscr *s,
+					     int conid);
+void tipc_sub_unsubscribe(struct tipc_subscription *sub);
+
+int tipc_sub_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
+			   u32 found_upper);
+void tipc_sub_report_overlap(struct tipc_subscription *sub,
+			     u32 found_lower, u32 found_upper,
+			     u32 event, u32 port, u32 node,
+			     u32 scope, int must);
 int tipc_topsrv_start(struct net *net);
 void tipc_topsrv_stop(struct net *net);
 
-void tipc_subscrp_put(struct tipc_subscription *subscription);
-void tipc_subscrp_get(struct tipc_subscription *subscription);
+void tipc_sub_put(struct tipc_subscription *subscription);
+void tipc_sub_get(struct tipc_subscription *subscription);
+
+#define TIPC_FILTER_MASK (TIPC_SUB_PORTS | TIPC_SUB_SERVICE | TIPC_SUB_CANCEL)
+
+/* tipc_sub_read - return field_ of struct sub_ in host endian format
+ */
+#define tipc_sub_read(sub_, field_)					\
+	({								\
+		struct tipc_subscr *sub__ = sub_;			\
+		u32 val__ = (sub__)->field_;				\
+		int swap_ = !((sub__)->filter & TIPC_FILTER_MASK);	\
+		(swap_ ? swab32(val__) : val__);			\
+	})
+
+/* tipc_evt_write - write val_ to field_ of struct evt_ in user endian format
+ */
+#define tipc_evt_write(evt_, field_, val_)				\
+	({								\
+		struct tipc_event *evt__ = evt_;			\
+		u32 val__ = val_;					\
+		int swap_ = !((evt__)->s.filter & (TIPC_FILTER_MASK));	\
+		(evt__)->field_ = swap_ ? swab32(val__) : val__;	\
+	})
 
 #endif
diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
new file mode 100644
index 000000000000..c8e34ef22c30
--- /dev/null
+++ b/net/tipc/topsrv.c
@@ -0,0 +1,703 @@
+/*
+ * net/tipc/server.c: TIPC server infrastructure
+ *
+ * Copyright (c) 2012-2013, Wind River Systems
+ * Copyright (c) 2017-2018, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "subscr.h"
+#include "topsrv.h"
+#include "core.h"
+#include "socket.h"
+#include "addr.h"
+#include "msg.h"
+#include <net/sock.h>
+#include <linux/module.h>
+
+/* Number of messages to send before rescheduling */
+#define MAX_SEND_MSG_COUNT	25
+#define MAX_RECV_MSG_COUNT	25
+#define CF_CONNECTED		1
+#define CF_SERVER		2
+
+#define TIPC_SERVER_NAME_LEN	32
+
+/**
+ * struct tipc_topsrv - TIPC server structure
+ * @conn_idr: identifier set of connection
+ * @idr_lock: protect the connection identifier set
+ * @idr_in_use: amount of allocated identifier entry
+ * @net: network namspace instance
+ * @rcvbuf_cache: memory cache of server receive buffer
+ * @rcv_wq: receive workqueue
+ * @send_wq: send workqueue
+ * @max_rcvbuf_size: maximum permitted receive message length
+ * @tipc_conn_new: callback will be called when new connection is incoming
+ * @tipc_conn_release: callback will be called before releasing the connection
+ * @tipc_conn_recvmsg: callback will be called when message arrives
+ * @name: server name
+ * @imp: message importance
+ * @type: socket type
+ */
+struct tipc_topsrv {
+	struct idr conn_idr;
+	spinlock_t idr_lock; /* for idr list */
+	int idr_in_use;
+	struct net *net;
+	struct work_struct awork;
+	struct workqueue_struct *rcv_wq;
+	struct workqueue_struct *send_wq;
+	int max_rcvbuf_size;
+	struct socket *listener;
+	char name[TIPC_SERVER_NAME_LEN];
+};
+
+/**
+ * struct tipc_conn - TIPC connection structure
+ * @kref: reference counter to connection object
+ * @conid: connection identifier
+ * @sock: socket handler associated with connection
+ * @flags: indicates connection state
+ * @server: pointer to connected server
+ * @sub_list: lsit to all pertaing subscriptions
+ * @sub_lock: lock protecting the subscription list
+ * @outqueue_lock: control access to the outqueue
+ * @rwork: receive work item
+ * @rx_action: what to do when connection socket is active
+ * @outqueue: pointer to first outbound message in queue
+ * @outqueue_lock: control access to the outqueue
+ * @swork: send work item
+ */
+struct tipc_conn {
+	struct kref kref;
+	int conid;
+	struct socket *sock;
+	unsigned long flags;
+	struct tipc_topsrv *server;
+	struct list_head sub_list;
+	spinlock_t sub_lock; /* for subscription list */
+	struct work_struct rwork;
+	struct list_head outqueue;
+	spinlock_t outqueue_lock; /* for outqueue */
+	struct work_struct swork;
+};
+
+/* An entry waiting to be sent */
+struct outqueue_entry {
+	bool inactive;
+	struct tipc_event evt;
+	struct list_head list;
+};
+
+static void tipc_conn_recv_work(struct work_struct *work);
+static void tipc_conn_send_work(struct work_struct *work);
+static void tipc_topsrv_kern_evt(struct net *net, struct tipc_event *evt);
+static void tipc_conn_delete_sub(struct tipc_conn *con, struct tipc_subscr *s);
+
+static bool connected(struct tipc_conn *con)
+{
+	return con && test_bit(CF_CONNECTED, &con->flags);
+}
+
+static void tipc_conn_kref_release(struct kref *kref)
+{
+	struct tipc_conn *con = container_of(kref, struct tipc_conn, kref);
+	struct tipc_topsrv *s = con->server;
+	struct outqueue_entry *e, *safe;
+
+	spin_lock_bh(&s->idr_lock);
+	idr_remove(&s->conn_idr, con->conid);
+	s->idr_in_use--;
+	spin_unlock_bh(&s->idr_lock);
+	if (con->sock)
+		sock_release(con->sock);
+
+	spin_lock_bh(&con->outqueue_lock);
+	list_for_each_entry_safe(e, safe, &con->outqueue, list) {
+		list_del(&e->list);
+		kfree(e);
+	}
+	spin_unlock_bh(&con->outqueue_lock);
+	kfree(con);
+}
+
+static void conn_put(struct tipc_conn *con)
+{
+	kref_put(&con->kref, tipc_conn_kref_release);
+}
+
+static void conn_get(struct tipc_conn *con)
+{
+	kref_get(&con->kref);
+}
+
+static void tipc_conn_close(struct tipc_conn *con)
+{
+	struct sock *sk = con->sock->sk;
+	bool disconnect = false;
+
+	write_lock_bh(&sk->sk_callback_lock);
+	disconnect = test_and_clear_bit(CF_CONNECTED, &con->flags);
+
+	if (disconnect) {
+		sk->sk_user_data = NULL;
+		tipc_conn_delete_sub(con, NULL);
+	}
+	write_unlock_bh(&sk->sk_callback_lock);
+
+	/* Handle concurrent calls from sending and receiving threads */
+	if (!disconnect)
+		return;
+
+	/* Don't flush pending works, -just let them expire */
+	kernel_sock_shutdown(con->sock, SHUT_RDWR);
+
+	conn_put(con);
+}
+
+static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s)
+{
+	struct tipc_conn *con;
+	int ret;
+
+	con = kzalloc(sizeof(*con), GFP_ATOMIC);
+	if (!con)
+		return ERR_PTR(-ENOMEM);
+
+	kref_init(&con->kref);
+	INIT_LIST_HEAD(&con->outqueue);
+	INIT_LIST_HEAD(&con->sub_list);
+	spin_lock_init(&con->outqueue_lock);
+	spin_lock_init(&con->sub_lock);
+	INIT_WORK(&con->swork, tipc_conn_send_work);
+	INIT_WORK(&con->rwork, tipc_conn_recv_work);
+
+	spin_lock_bh(&s->idr_lock);
+	ret = idr_alloc(&s->conn_idr, con, 0, 0, GFP_ATOMIC);
+	if (ret < 0) {
+		kfree(con);
+		spin_unlock_bh(&s->idr_lock);
+		return ERR_PTR(-ENOMEM);
+	}
+	con->conid = ret;
+	s->idr_in_use++;
+	spin_unlock_bh(&s->idr_lock);
+
+	set_bit(CF_CONNECTED, &con->flags);
+	con->server = s;
+
+	return con;
+}
+
+static struct tipc_conn *tipc_conn_lookup(struct tipc_topsrv *s, int conid)
+{
+	struct tipc_conn *con;
+
+	spin_lock_bh(&s->idr_lock);
+	con = idr_find(&s->conn_idr, conid);
+	if (!connected(con) || !kref_get_unless_zero(&con->kref))
+		con = NULL;
+	spin_unlock_bh(&s->idr_lock);
+	return con;
+}
+
+/* tipc_conn_delete_sub - delete a specific or all subscriptions
+ * for a given subscriber
+ */
+static void tipc_conn_delete_sub(struct tipc_conn *con, struct tipc_subscr *s)
+{
+	struct tipc_net *tn = tipc_net(con->server->net);
+	struct list_head *sub_list = &con->sub_list;
+	struct tipc_subscription *sub, *tmp;
+
+	spin_lock_bh(&con->sub_lock);
+	list_for_each_entry_safe(sub, tmp, sub_list, sub_list) {
+		if (!s || !memcmp(s, &sub->evt.s, sizeof(*s))) {
+			tipc_sub_unsubscribe(sub);
+			atomic_dec(&tn->subscription_count);
+		} else if (s) {
+			break;
+		}
+	}
+	spin_unlock_bh(&con->sub_lock);
+}
+
+static void tipc_conn_send_to_sock(struct tipc_conn *con)
+{
+	struct list_head *queue = &con->outqueue;
+	struct tipc_topsrv *srv = con->server;
+	struct outqueue_entry *e;
+	struct tipc_event *evt;
+	struct msghdr msg;
+	struct kvec iov;
+	int count = 0;
+	int ret;
+
+	spin_lock_bh(&con->outqueue_lock);
+
+	while (!list_empty(queue)) {
+		e = list_first_entry(queue, struct outqueue_entry, list);
+		evt = &e->evt;
+		spin_unlock_bh(&con->outqueue_lock);
+
+		if (e->inactive)
+			tipc_conn_delete_sub(con, &evt->s);
+
+		memset(&msg, 0, sizeof(msg));
+		msg.msg_flags = MSG_DONTWAIT;
+		iov.iov_base = evt;
+		iov.iov_len = sizeof(*evt);
+		msg.msg_name = NULL;
+
+		if (con->sock) {
+			ret = kernel_sendmsg(con->sock, &msg, &iov,
+					     1, sizeof(*evt));
+			if (ret == -EWOULDBLOCK || ret == 0) {
+				cond_resched();
+				return;
+			} else if (ret < 0) {
+				return tipc_conn_close(con);
+			}
+		} else {
+			tipc_topsrv_kern_evt(srv->net, evt);
+		}
+
+		/* Don't starve users filling buffers */
+		if (++count >= MAX_SEND_MSG_COUNT) {
+			cond_resched();
+			count = 0;
+		}
+		spin_lock_bh(&con->outqueue_lock);
+		list_del(&e->list);
+		kfree(e);
+	}
+	spin_unlock_bh(&con->outqueue_lock);
+}
+
+static void tipc_conn_send_work(struct work_struct *work)
+{
+	struct tipc_conn *con = container_of(work, struct tipc_conn, swork);
+
+	if (connected(con))
+		tipc_conn_send_to_sock(con);
+
+	conn_put(con);
+}
+
+/* tipc_conn_queue_evt() - interrupt level call from a subscription instance
+ * The queued work is launched into tipc_send_work()->tipc_send_to_sock()
+ */
+void tipc_topsrv_queue_evt(struct net *net, int conid,
+			   u32 event, struct tipc_event *evt)
+{
+	struct tipc_topsrv *srv = tipc_topsrv(net);
+	struct outqueue_entry *e;
+	struct tipc_conn *con;
+
+	con = tipc_conn_lookup(srv, conid);
+	if (!con)
+		return;
+
+	if (!connected(con))
+		goto err;
+
+	e = kmalloc(sizeof(*e), GFP_ATOMIC);
+	if (!e)
+		goto err;
+	e->inactive = (event == TIPC_SUBSCR_TIMEOUT);
+	memcpy(&e->evt, evt, sizeof(*evt));
+	spin_lock_bh(&con->outqueue_lock);
+	list_add_tail(&e->list, &con->outqueue);
+	spin_unlock_bh(&con->outqueue_lock);
+
+	if (queue_work(srv->send_wq, &con->swork))
+		return;
+err:
+	conn_put(con);
+}
+
+/* tipc_conn_write_space - interrupt callback after a sendmsg EAGAIN
+ * Indicates that there now is more space in the send buffer
+ * The queued work is launched into tipc_send_work()->tipc_conn_send_to_sock()
+ */
+static void tipc_conn_write_space(struct sock *sk)
+{
+	struct tipc_conn *con;
+
+	read_lock_bh(&sk->sk_callback_lock);
+	con = sk->sk_user_data;
+	if (connected(con)) {
+		conn_get(con);
+		if (!queue_work(con->server->send_wq, &con->swork))
+			conn_put(con);
+	}
+	read_unlock_bh(&sk->sk_callback_lock);
+}
+
+static int tipc_conn_rcv_sub(struct tipc_topsrv *srv,
+			     struct tipc_conn *con,
+			     struct tipc_subscr *s)
+{
+	struct tipc_net *tn = tipc_net(srv->net);
+	struct tipc_subscription *sub;
+
+	if (tipc_sub_read(s, filter) & TIPC_SUB_CANCEL) {
+		tipc_conn_delete_sub(con, s);
+		return 0;
+	}
+	if (atomic_read(&tn->subscription_count) >= TIPC_MAX_SUBSCR) {
+		pr_warn("Subscription rejected, max (%u)\n", TIPC_MAX_SUBSCR);
+		return -1;
+	}
+	sub = tipc_sub_subscribe(srv->net, s, con->conid);
+	if (!sub)
+		return -1;
+	atomic_inc(&tn->subscription_count);
+	spin_lock_bh(&con->sub_lock);
+	list_add(&sub->sub_list, &con->sub_list);
+	spin_unlock_bh(&con->sub_lock);
+	return 0;
+}
+
+static int tipc_conn_rcv_from_sock(struct tipc_conn *con)
+{
+	struct tipc_topsrv *srv = con->server;
+	struct sock *sk = con->sock->sk;
+	struct msghdr msg = {};
+	struct tipc_subscr s;
+	struct kvec iov;
+	int ret;
+
+	iov.iov_base = &s;
+	iov.iov_len = sizeof(s);
+	msg.msg_name = NULL;
+	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, iov.iov_len);
+	ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT);
+	if (ret == -EWOULDBLOCK)
+		return -EWOULDBLOCK;
+	if (ret > 0) {
+		read_lock_bh(&sk->sk_callback_lock);
+		ret = tipc_conn_rcv_sub(srv, con, &s);
+		read_unlock_bh(&sk->sk_callback_lock);
+	}
+	if (ret < 0)
+		tipc_conn_close(con);
+
+	return ret;
+}
+
+static void tipc_conn_recv_work(struct work_struct *work)
+{
+	struct tipc_conn *con = container_of(work, struct tipc_conn, rwork);
+	int count = 0;
+
+	while (connected(con)) {
+		if (tipc_conn_rcv_from_sock(con))
+			break;
+
+		/* Don't flood Rx machine */
+		if (++count >= MAX_RECV_MSG_COUNT) {
+			cond_resched();
+			count = 0;
+		}
+	}
+	conn_put(con);
+}
+
+/* tipc_conn_data_ready - interrupt callback indicating the socket has data
+ * The queued work is launched into tipc_recv_work()->tipc_conn_rcv_from_sock()
+ */
+static void tipc_conn_data_ready(struct sock *sk)
+{
+	struct tipc_conn *con;
+
+	read_lock_bh(&sk->sk_callback_lock);
+	con = sk->sk_user_data;
+	if (connected(con)) {
+		conn_get(con);
+		if (!queue_work(con->server->rcv_wq, &con->rwork))
+			conn_put(con);
+	}
+	read_unlock_bh(&sk->sk_callback_lock);
+}
+
+static void tipc_topsrv_accept(struct work_struct *work)
+{
+	struct tipc_topsrv *srv = container_of(work, struct tipc_topsrv, awork);
+	struct socket *lsock = srv->listener;
+	struct socket *newsock;
+	struct tipc_conn *con;
+	struct sock *newsk;
+	int ret;
+
+	while (1) {
+		ret = kernel_accept(lsock, &newsock, O_NONBLOCK);
+		if (ret < 0)
+			return;
+		con = tipc_conn_alloc(srv);
+		if (IS_ERR(con)) {
+			ret = PTR_ERR(con);
+			sock_release(newsock);
+			return;
+		}
+		/* Register callbacks */
+		newsk = newsock->sk;
+		write_lock_bh(&newsk->sk_callback_lock);
+		newsk->sk_data_ready = tipc_conn_data_ready;
+		newsk->sk_write_space = tipc_conn_write_space;
+		newsk->sk_user_data = con;
+		con->sock = newsock;
+		write_unlock_bh(&newsk->sk_callback_lock);
+
+		/* Wake up receive process in case of 'SYN+' message */
+		newsk->sk_data_ready(newsk);
+	}
+}
+
+/* tipc_toprsv_listener_data_ready - interrupt callback with connection request
+ * The queued job is launched into tipc_topsrv_accept()
+ */
+static void tipc_topsrv_listener_data_ready(struct sock *sk)
+{
+	struct tipc_topsrv *srv;
+
+	read_lock_bh(&sk->sk_callback_lock);
+	srv = sk->sk_user_data;
+	if (srv->listener)
+		queue_work(srv->rcv_wq, &srv->awork);
+	read_unlock_bh(&sk->sk_callback_lock);
+}
+
+static int tipc_topsrv_create_listener(struct tipc_topsrv *srv)
+{
+	int imp = TIPC_CRITICAL_IMPORTANCE;
+	struct socket *lsock = NULL;
+	struct sockaddr_tipc saddr;
+	struct sock *sk;
+	int rc;
+
+	rc = sock_create_kern(srv->net, AF_TIPC, SOCK_SEQPACKET, 0, &lsock);
+	if (rc < 0)
+		return rc;
+
+	srv->listener = lsock;
+	sk = lsock->sk;
+	write_lock_bh(&sk->sk_callback_lock);
+	sk->sk_data_ready = tipc_topsrv_listener_data_ready;
+	sk->sk_user_data = srv;
+	write_unlock_bh(&sk->sk_callback_lock);
+
+	rc = kernel_setsockopt(lsock, SOL_TIPC, TIPC_IMPORTANCE,
+			       (char *)&imp, sizeof(imp));
+	if (rc < 0)
+		goto err;
+
+	saddr.family	                = AF_TIPC;
+	saddr.addrtype		        = TIPC_ADDR_NAMESEQ;
+	saddr.addr.nameseq.type	        = TIPC_TOP_SRV;
+	saddr.addr.nameseq.lower	= TIPC_TOP_SRV;
+	saddr.addr.nameseq.upper	= TIPC_TOP_SRV;
+	saddr.scope			= TIPC_NODE_SCOPE;
+
+	rc = kernel_bind(lsock, (struct sockaddr *)&saddr, sizeof(saddr));
+	if (rc < 0)
+		goto err;
+	rc = kernel_listen(lsock, 0);
+	if (rc < 0)
+		goto err;
+
+	/* As server's listening socket owner and creator is the same module,
+	 * we have to decrease TIPC module reference count to guarantee that
+	 * it remains zero after the server socket is created, otherwise,
+	 * executing "rmmod" command is unable to make TIPC module deleted
+	 * after TIPC module is inserted successfully.
+	 *
+	 * However, the reference count is ever increased twice in
+	 * sock_create_kern(): one is to increase the reference count of owner
+	 * of TIPC socket's proto_ops struct; another is to increment the
+	 * reference count of owner of TIPC proto struct. Therefore, we must
+	 * decrement the module reference count twice to ensure that it keeps
+	 * zero after server's listening socket is created. Of course, we
+	 * must bump the module reference count twice as well before the socket
+	 * is closed.
+	 */
+	module_put(lsock->ops->owner);
+	module_put(sk->sk_prot_creator->owner);
+
+	return 0;
+err:
+	sock_release(lsock);
+	return -EINVAL;
+}
+
+bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
+			     u32 upper, u32 filter, int *conid)
+{
+	struct tipc_subscr sub;
+	struct tipc_conn *con;
+	int rc;
+
+	sub.seq.type = type;
+	sub.seq.lower = lower;
+	sub.seq.upper = upper;
+	sub.timeout = TIPC_WAIT_FOREVER;
+	sub.filter = filter;
+	*(u32 *)&sub.usr_handle = port;
+
+	con = tipc_conn_alloc(tipc_topsrv(net));
+	if (IS_ERR(con))
+		return false;
+
+	*conid = con->conid;
+	con->sock = NULL;
+	rc = tipc_conn_rcv_sub(tipc_topsrv(net), con, &sub);
+	if (rc >= 0)
+		return true;
+	conn_put(con);
+	return false;
+}
+
+void tipc_topsrv_kern_unsubscr(struct net *net, int conid)
+{
+	struct tipc_conn *con;
+
+	con = tipc_conn_lookup(tipc_topsrv(net), conid);
+	if (!con)
+		return;
+
+	test_and_clear_bit(CF_CONNECTED, &con->flags);
+	tipc_conn_delete_sub(con, NULL);
+	conn_put(con);
+	conn_put(con);
+}
+
+static void tipc_topsrv_kern_evt(struct net *net, struct tipc_event *evt)
+{
+	u32 port = *(u32 *)&evt->s.usr_handle;
+	u32 self = tipc_own_addr(net);
+	struct sk_buff_head evtq;
+	struct sk_buff *skb;
+
+	skb = tipc_msg_create(TOP_SRV, 0, INT_H_SIZE, sizeof(*evt),
+			      self, self, port, port, 0);
+	if (!skb)
+		return;
+	msg_set_dest_droppable(buf_msg(skb), true);
+	memcpy(msg_data(buf_msg(skb)), evt, sizeof(*evt));
+	skb_queue_head_init(&evtq);
+	__skb_queue_tail(&evtq, skb);
+	tipc_sk_rcv(net, &evtq);
+}
+
+static int tipc_topsrv_work_start(struct tipc_topsrv *s)
+{
+	s->rcv_wq = alloc_ordered_workqueue("tipc_rcv", 0);
+	if (!s->rcv_wq) {
+		pr_err("can't start tipc receive workqueue\n");
+		return -ENOMEM;
+	}
+
+	s->send_wq = alloc_ordered_workqueue("tipc_send", 0);
+	if (!s->send_wq) {
+		pr_err("can't start tipc send workqueue\n");
+		destroy_workqueue(s->rcv_wq);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void tipc_topsrv_work_stop(struct tipc_topsrv *s)
+{
+	destroy_workqueue(s->rcv_wq);
+	destroy_workqueue(s->send_wq);
+}
+
+int tipc_topsrv_start(struct net *net)
+{
+	struct tipc_net *tn = tipc_net(net);
+	const char name[] = "topology_server";
+	struct tipc_topsrv *srv;
+	int ret;
+
+	srv = kzalloc(sizeof(*srv), GFP_ATOMIC);
+	if (!srv)
+		return -ENOMEM;
+
+	srv->net = net;
+	srv->max_rcvbuf_size = sizeof(struct tipc_subscr);
+	INIT_WORK(&srv->awork, tipc_topsrv_accept);
+
+	strncpy(srv->name, name, strlen(name) + 1);
+	tn->topsrv = srv;
+	atomic_set(&tn->subscription_count, 0);
+
+	spin_lock_init(&srv->idr_lock);
+	idr_init(&srv->conn_idr);
+	srv->idr_in_use = 0;
+
+	ret = tipc_topsrv_work_start(srv);
+	if (ret < 0)
+		return ret;
+
+	ret = tipc_topsrv_create_listener(srv);
+	if (ret < 0)
+		tipc_topsrv_work_stop(srv);
+
+	return ret;
+}
+
+void tipc_topsrv_stop(struct net *net)
+{
+	struct tipc_topsrv *srv = tipc_topsrv(net);
+	struct socket *lsock = srv->listener;
+	struct tipc_conn *con;
+	int id;
+
+	spin_lock_bh(&srv->idr_lock);
+	for (id = 0; srv->idr_in_use; id++) {
+		con = idr_find(&srv->conn_idr, id);
+		if (con) {
+			spin_unlock_bh(&srv->idr_lock);
+			tipc_conn_close(con);
+			spin_lock_bh(&srv->idr_lock);
+		}
+	}
+	__module_get(lsock->ops->owner);
+	__module_get(lsock->sk->sk_prot_creator->owner);
+	srv->listener = NULL;
+	spin_unlock_bh(&srv->idr_lock);
+	sock_release(lsock);
+	tipc_topsrv_work_stop(srv);
+	idr_destroy(&srv->conn_idr);
+	kfree(srv);
+}
diff --git a/net/tipc/server.h b/net/tipc/topsrv.h
index 64df7513cd70..c7ea71293748 100644
--- a/net/tipc/server.h
+++ b/net/tipc/topsrv.h
@@ -2,6 +2,7 @@
  * net/tipc/server.h: Include file for TIPC server code
  *
  * Copyright (c) 2012-2013, Wind River Systems
+ * Copyright (c) 2017, Ericsson AB
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -36,68 +37,18 @@
 #ifndef _TIPC_SERVER_H
 #define _TIPC_SERVER_H
 
-#include <linux/idr.h>
-#include <linux/tipc.h>
-#include <net/net_namespace.h>
+#include "core.h"
 
 #define TIPC_SERVER_NAME_LEN	32
 #define TIPC_SUB_CLUSTER_SCOPE  0x20
 #define TIPC_SUB_NODE_SCOPE     0x40
 #define TIPC_SUB_NO_STATUS      0x80
 
-/**
- * struct tipc_server - TIPC server structure
- * @conn_idr: identifier set of connection
- * @idr_lock: protect the connection identifier set
- * @idr_in_use: amount of allocated identifier entry
- * @net: network namspace instance
- * @rcvbuf_cache: memory cache of server receive buffer
- * @rcv_wq: receive workqueue
- * @send_wq: send workqueue
- * @max_rcvbuf_size: maximum permitted receive message length
- * @tipc_conn_new: callback will be called when new connection is incoming
- * @tipc_conn_release: callback will be called before releasing the connection
- * @tipc_conn_recvmsg: callback will be called when message arrives
- * @saddr: TIPC server address
- * @name: server name
- * @imp: message importance
- * @type: socket type
- */
-struct tipc_server {
-	struct idr conn_idr;
-	spinlock_t idr_lock;
-	int idr_in_use;
-	struct net *net;
-	struct kmem_cache *rcvbuf_cache;
-	struct workqueue_struct *rcv_wq;
-	struct workqueue_struct *send_wq;
-	int max_rcvbuf_size;
-	void *(*tipc_conn_new)(int conid);
-	void (*tipc_conn_release)(int conid, void *usr_data);
-	int (*tipc_conn_recvmsg)(struct net *net, int conid,
-				 struct sockaddr_tipc *addr, void *usr_data,
-				 void *buf, size_t len);
-	struct sockaddr_tipc *saddr;
-	char name[TIPC_SERVER_NAME_LEN];
-	int imp;
-	int type;
-};
-
-int tipc_conn_sendmsg(struct tipc_server *s, int conid,
-		      struct sockaddr_tipc *addr, void *data, size_t len);
+void tipc_topsrv_queue_evt(struct net *net, int conid,
+			   u32 event, struct tipc_event *evt);
 
 bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
 			     u32 upper, u32 filter, int *conid);
 void tipc_topsrv_kern_unsubscr(struct net *net, int conid);
 
-/**
- * tipc_conn_terminate - terminate connection with server
- *
- * Note: Must call it in process context since it might sleep
- */
-void tipc_conn_terminate(struct tipc_server *s, int conid);
-int tipc_server_start(struct tipc_server *s);
-
-void tipc_server_stop(struct tipc_server *s);
-
 #endif
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index f26376e954ae..057a558ed6d7 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -87,71 +87,16 @@ static void trim_both_sgl(struct sock *sk, int target_size)
 		target_size);
 }
 
-static int alloc_sg(struct sock *sk, int len, struct scatterlist *sg,
-		    int *sg_num_elem, unsigned int *sg_size,
-		    int first_coalesce)
-{
-	struct page_frag *pfrag;
-	unsigned int size = *sg_size;
-	int num_elem = *sg_num_elem, use = 0, rc = 0;
-	struct scatterlist *sge;
-	unsigned int orig_offset;
-
-	len -= size;
-	pfrag = sk_page_frag(sk);
-
-	while (len > 0) {
-		if (!sk_page_frag_refill(sk, pfrag)) {
-			rc = -ENOMEM;
-			goto out;
-		}
-
-		use = min_t(int, len, pfrag->size - pfrag->offset);
-
-		if (!sk_wmem_schedule(sk, use)) {
-			rc = -ENOMEM;
-			goto out;
-		}
-
-		sk_mem_charge(sk, use);
-		size += use;
-		orig_offset = pfrag->offset;
-		pfrag->offset += use;
-
-		sge = sg + num_elem - 1;
-		if (num_elem > first_coalesce && sg_page(sg) == pfrag->page &&
-		    sg->offset + sg->length == orig_offset) {
-			sg->length += use;
-		} else {
-			sge++;
-			sg_unmark_end(sge);
-			sg_set_page(sge, pfrag->page, use, orig_offset);
-			get_page(pfrag->page);
-			++num_elem;
-			if (num_elem == MAX_SKB_FRAGS) {
-				rc = -ENOSPC;
-				break;
-			}
-		}
-
-		len -= use;
-	}
-	goto out;
-
-out:
-	*sg_size = size;
-	*sg_num_elem = num_elem;
-	return rc;
-}
-
 static int alloc_encrypted_sg(struct sock *sk, int len)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
 	int rc = 0;
 
-	rc = alloc_sg(sk, len, ctx->sg_encrypted_data,
-		      &ctx->sg_encrypted_num_elem, &ctx->sg_encrypted_size, 0);
+	rc = sk_alloc_sg(sk, len,
+			 ctx->sg_encrypted_data, 0,
+			 &ctx->sg_encrypted_num_elem,
+			 &ctx->sg_encrypted_size, 0);
 
 	return rc;
 }
@@ -162,9 +107,9 @@ static int alloc_plaintext_sg(struct sock *sk, int len)
 	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
 	int rc = 0;
 
-	rc = alloc_sg(sk, len, ctx->sg_plaintext_data,
-		      &ctx->sg_plaintext_num_elem, &ctx->sg_plaintext_size,
-		      tls_ctx->pending_open_record_frags);
+	rc = sk_alloc_sg(sk, len, ctx->sg_plaintext_data, 0,
+			 &ctx->sg_plaintext_num_elem, &ctx->sg_plaintext_size,
+			 tls_ctx->pending_open_record_frags);
 
 	return rc;
 }
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 2d465bdeccbc..bc2970a8e7f3 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -637,7 +637,7 @@ static int unix_stream_connect(struct socket *, struct sockaddr *,
 			       int addr_len, int flags);
 static int unix_socketpair(struct socket *, struct socket *);
 static int unix_accept(struct socket *, struct socket *, int, bool);
-static int unix_getname(struct socket *, struct sockaddr *, int *, int);
+static int unix_getname(struct socket *, struct sockaddr *, int);
 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
 static __poll_t unix_dgram_poll(struct file *, struct socket *,
 				    poll_table *);
@@ -1453,7 +1453,7 @@ out:
 }
 
 
-static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
+static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
 {
 	struct sock *sk = sock->sk;
 	struct unix_sock *u;
@@ -1476,12 +1476,12 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_
 	if (!u->addr) {
 		sunaddr->sun_family = AF_UNIX;
 		sunaddr->sun_path[0] = 0;
-		*uaddr_len = sizeof(short);
+		err = sizeof(short);
 	} else {
 		struct unix_address *addr = u->addr;
 
-		*uaddr_len = addr->len;
-		memcpy(sunaddr, addr->name, *uaddr_len);
+		err = addr->len;
+		memcpy(sunaddr, addr->name, addr->len);
 	}
 	unix_state_unlock(sk);
 	sock_put(sk);
@@ -2913,6 +2913,7 @@ static void __net_exit unix_net_exit(struct net *net)
 static struct pernet_operations unix_net_ops = {
 	.init = unix_net_init,
 	.exit = unix_net_exit,
+	.async = true,
 };
 
 static int __init af_unix_init(void)
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index e0fc84daed94..aac9b8f6552e 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -759,7 +759,7 @@ vsock_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 }
 
 static int vsock_getname(struct socket *sock,
-			 struct sockaddr *addr, int *addr_len, int peer)
+			 struct sockaddr *addr, int peer)
 {
 	int err;
 	struct sock *sk;
@@ -794,7 +794,7 @@ static int vsock_getname(struct socket *sock,
 	 */
 	BUILD_BUG_ON(sizeof(*vm_addr) > 128);
 	memcpy(addr, vm_addr, sizeof(*vm_addr));
-	*addr_len = sizeof(*vm_addr);
+	err = sizeof(*vm_addr);
 
 out:
 	release_sock(sk);
diff --git a/net/wireless/core.c b/net/wireless/core.c
index a6f3cac8c640..670aa229168a 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -1340,6 +1340,7 @@ static void __net_exit cfg80211_pernet_exit(struct net *net)
 
 static struct pernet_operations cfg80211_pernet_ops = {
 	.exit = cfg80211_pernet_exit,
+	.async = true,
 };
 
 static int __init cfg80211_init(void)
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 9c0dcc8324b0..a910150f8169 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -421,6 +421,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[NL80211_ATTR_FILS_CACHE_ID] = { .len = 2 },
 	[NL80211_ATTR_PMK] = { .type = NLA_BINARY, .len = PMK_MAX_LEN },
 	[NL80211_ATTR_SCHED_SCAN_MULTI] = { .type = NLA_FLAG },
+	[NL80211_ATTR_EXTERNAL_AUTH_SUPPORT] = { .type = NLA_FLAG },
 };
 
 /* policy for the key attributes */
@@ -3923,9 +3924,10 @@ static bool nl80211_valid_auth_type(struct cfg80211_registered_device *rdev,
 			return false;
 		return true;
 	case NL80211_CMD_CONNECT:
-		/* SAE not supported yet */
-		if (auth_type == NL80211_AUTHTYPE_SAE)
+		if (!(rdev->wiphy.features & NL80211_FEATURE_SAE) &&
+		    auth_type == NL80211_AUTHTYPE_SAE)
 			return false;
+
 		/* FILS with SK PFS or PK not supported yet */
 		if (auth_type == NL80211_AUTHTYPE_FILS_SK_PFS ||
 		    auth_type == NL80211_AUTHTYPE_FILS_PK)
@@ -4487,6 +4489,7 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid,
 	PUT_SINFO_U64(RX_DROP_MISC, rx_dropped_misc);
 	PUT_SINFO_U64(BEACON_RX, rx_beacon);
 	PUT_SINFO(BEACON_SIGNAL_AVG, rx_beacon_signal_avg, u8);
+	PUT_SINFO(ACK_SIGNAL, ack_signal, u8);
 
 #undef PUT_SINFO
 #undef PUT_SINFO_U64
@@ -5848,7 +5851,6 @@ static int nl80211_get_mesh_config(struct sk_buff *skb,
 	return genlmsg_reply(msg, info);
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
  out:
 	nlmsg_free(msg);
 	return -ENOBUFS;
@@ -6329,7 +6331,6 @@ static int nl80211_get_reg_do(struct sk_buff *skb, struct genl_info *info)
 nla_put_failure_rcu:
 	rcu_read_unlock();
 nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 put_failure:
 	nlmsg_free(msg);
 	return -EMSGSIZE;
@@ -6718,8 +6719,17 @@ nl80211_check_scan_flags(struct wiphy *wiphy, struct wireless_dev *wdev,
 
 	*flags = nla_get_u32(attrs[NL80211_ATTR_SCAN_FLAGS]);
 
-	if ((*flags & NL80211_SCAN_FLAG_LOW_PRIORITY) &&
-	    !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN))
+	if (((*flags & NL80211_SCAN_FLAG_LOW_PRIORITY) &&
+	     !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) ||
+	    ((*flags & NL80211_SCAN_FLAG_LOW_SPAN) &&
+	     !wiphy_ext_feature_isset(wiphy,
+				      NL80211_EXT_FEATURE_LOW_SPAN_SCAN)) ||
+	    ((*flags & NL80211_SCAN_FLAG_LOW_POWER) &&
+	     !wiphy_ext_feature_isset(wiphy,
+				      NL80211_EXT_FEATURE_LOW_POWER_SCAN)) ||
+	    ((*flags & NL80211_SCAN_FLAG_HIGH_ACCURACY) &&
+	     !wiphy_ext_feature_isset(wiphy,
+				      NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN)))
 		return -EOPNOTSUPP;
 
 	if (*flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
@@ -9155,6 +9165,15 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
 		return -EINVAL;
 	}
 
+	if (nla_get_flag(info->attrs[NL80211_ATTR_EXTERNAL_AUTH_SUPPORT])) {
+		if (!info->attrs[NL80211_ATTR_SOCKET_OWNER]) {
+			GENL_SET_ERR_MSG(info,
+					 "external auth requires connection ownership");
+			return -EINVAL;
+		}
+		connect.flags |= CONNECT_REQ_EXTERNAL_AUTH_SUPPORT;
+	}
+
 	wdev_lock(dev->ieee80211_ptr);
 
 	err = cfg80211_connect(rdev, dev, &connect, connkeys,
@@ -12463,6 +12482,41 @@ static int nl80211_del_pmk(struct sk_buff *skb, struct genl_info *info)
 	return ret;
 }
 
+static int nl80211_external_auth(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct net_device *dev = info->user_ptr[1];
+	struct cfg80211_external_auth_params params;
+
+	if (!rdev->ops->external_auth)
+		return -EOPNOTSUPP;
+
+	if (!info->attrs[NL80211_ATTR_SSID])
+		return -EINVAL;
+
+	if (!info->attrs[NL80211_ATTR_BSSID])
+		return -EINVAL;
+
+	if (!info->attrs[NL80211_ATTR_STATUS_CODE])
+		return -EINVAL;
+
+	memset(&params, 0, sizeof(params));
+
+	params.ssid.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]);
+	if (params.ssid.ssid_len == 0 ||
+	    params.ssid.ssid_len > IEEE80211_MAX_SSID_LEN)
+		return -EINVAL;
+	memcpy(params.ssid.ssid, nla_data(info->attrs[NL80211_ATTR_SSID]),
+	       params.ssid.ssid_len);
+
+	memcpy(params.bssid, nla_data(info->attrs[NL80211_ATTR_BSSID]),
+	       ETH_ALEN);
+
+	params.status = nla_get_u16(info->attrs[NL80211_ATTR_STATUS_CODE]);
+
+	return rdev_external_auth(rdev, dev, &params);
+}
+
 #define NL80211_FLAG_NEED_WIPHY		0x01
 #define NL80211_FLAG_NEED_NETDEV	0x02
 #define NL80211_FLAG_NEED_RTNL		0x04
@@ -13358,6 +13412,14 @@ static const struct genl_ops nl80211_ops[] = {
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
+	{
+		.cmd = NL80211_CMD_EXTERNAL_AUTH,
+		.doit = nl80211_external_auth,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
+				  NL80211_FLAG_NEED_RTNL,
+	},
 
 };
 
@@ -13672,7 +13734,6 @@ void nl80211_common_reg_change_event(enum nl80211_commands cmd_id,
 	return;
 
 nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
@@ -13720,7 +13781,6 @@ static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
@@ -13808,7 +13868,6 @@ static void nl80211_send_mlme_timeout(struct cfg80211_registered_device *rdev,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
@@ -13884,7 +13943,6 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
@@ -13924,7 +13982,6 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
@@ -13954,7 +14011,6 @@ void nl80211_send_port_authorized(struct cfg80211_registered_device *rdev,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
@@ -13991,7 +14047,6 @@ void nl80211_send_disconnected(struct cfg80211_registered_device *rdev,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
@@ -14024,7 +14079,6 @@ void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
@@ -14065,7 +14119,6 @@ void cfg80211_notify_new_peer_candidate(struct net_device *dev, const u8 *addr,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 EXPORT_SYMBOL(cfg80211_notify_new_peer_candidate);
@@ -14104,7 +14157,6 @@ void nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
@@ -14159,7 +14211,6 @@ void nl80211_send_beacon_hint_event(struct wiphy *wiphy,
 	return;
 
 nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
@@ -14205,7 +14256,6 @@ static void nl80211_send_remain_on_chan_event(
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
@@ -14319,7 +14369,6 @@ void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 EXPORT_SYMBOL(cfg80211_conn_failed);
@@ -14356,7 +14405,6 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd,
 	return true;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 	return true;
 }
@@ -14440,7 +14488,6 @@ int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
 	return genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid);
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 	return -ENOBUFS;
 }
@@ -14484,7 +14531,6 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 EXPORT_SYMBOL(cfg80211_mgmt_tx_status);
@@ -14693,7 +14739,6 @@ static void nl80211_gtk_rekey_notify(struct cfg80211_registered_device *rdev,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
@@ -14751,7 +14796,6 @@ nl80211_pmksa_candidate_notify(struct cfg80211_registered_device *rdev,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
@@ -14804,7 +14848,6 @@ static void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
@@ -14886,12 +14929,67 @@ nl80211_radar_notify(struct cfg80211_registered_device *rdev,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
+void cfg80211_sta_opmode_change_notify(struct net_device *dev, const u8 *mac,
+				       struct sta_opmode_info *sta_opmode,
+				       gfp_t gfp)
+{
+	struct sk_buff *msg;
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+	void *hdr;
+
+	if (WARN_ON(!mac))
+		return;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_STA_OPMODE_CHANGED);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx))
+		goto nla_put_failure;
+
+	if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex))
+		goto nla_put_failure;
+
+	if (nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, mac))
+		goto nla_put_failure;
+
+	if ((sta_opmode->changed & STA_OPMODE_SMPS_MODE_CHANGED) &&
+	    nla_put_u8(msg, NL80211_ATTR_SMPS_MODE, sta_opmode->smps_mode))
+		goto nla_put_failure;
+
+	if ((sta_opmode->changed & STA_OPMODE_MAX_BW_CHANGED) &&
+	    nla_put_u8(msg, NL80211_ATTR_CHANNEL_WIDTH, sta_opmode->bw))
+		goto nla_put_failure;
+
+	if ((sta_opmode->changed & STA_OPMODE_N_SS_CHANGED) &&
+	    nla_put_u8(msg, NL80211_ATTR_NSS, sta_opmode->rx_nss))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+
+	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
+				NL80211_MCGRP_MLME, gfp);
+
+	return;
+
+nla_put_failure:
+	nlmsg_free(msg);
+}
+EXPORT_SYMBOL(cfg80211_sta_opmode_change_notify);
+
 void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
-			   u64 cookie, bool acked, gfp_t gfp)
+			   u64 cookie, bool acked, s32 ack_signal,
+			   bool is_valid_ack_signal, gfp_t gfp)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
@@ -14916,7 +15014,9 @@ void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
 	    nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr) ||
 	    nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, cookie,
 			      NL80211_ATTR_PAD) ||
-	    (acked && nla_put_flag(msg, NL80211_ATTR_ACK)))
+	    (acked && nla_put_flag(msg, NL80211_ATTR_ACK)) ||
+	    (is_valid_ack_signal && nla_put_s32(msg, NL80211_ATTR_ACK_SIGNAL,
+						ack_signal)))
 		goto nla_put_failure;
 
 	genlmsg_end(msg, hdr);
@@ -14926,7 +15026,6 @@ void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 EXPORT_SYMBOL(cfg80211_probe_status);
@@ -14971,8 +15070,6 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy,
 
  nla_put_failure:
 	spin_unlock_bh(&rdev->beacon_registrations_lock);
-	if (hdr)
-		genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 EXPORT_SYMBOL(cfg80211_report_obss_beacon);
@@ -15188,7 +15285,6 @@ void cfg80211_tdls_oper_request(struct net_device *dev, const u8 *peer,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 EXPORT_SYMBOL(cfg80211_tdls_oper_request);
@@ -15333,8 +15429,6 @@ void cfg80211_crit_proto_stopped(struct wireless_dev *wdev, gfp_t gfp)
 	return;
 
  nla_put_failure:
-	if (hdr)
-		genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 EXPORT_SYMBOL(cfg80211_crit_proto_stopped);
@@ -15369,6 +15463,47 @@ void nl80211_send_ap_stopped(struct wireless_dev *wdev)
 	nlmsg_free(msg);
 }
 
+int cfg80211_external_auth_request(struct net_device *dev,
+				   struct cfg80211_external_auth_params *params,
+				   gfp_t gfp)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+	struct sk_buff *msg;
+	void *hdr;
+
+	if (!wdev->conn_owner_nlportid)
+		return -EINVAL;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
+	if (!msg)
+		return -ENOMEM;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_EXTERNAL_AUTH);
+	if (!hdr)
+		goto nla_put_failure;
+
+	if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
+	    nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) ||
+	    nla_put_u32(msg, NL80211_ATTR_AKM_SUITES, params->key_mgmt_suite) ||
+	    nla_put_u32(msg, NL80211_ATTR_EXTERNAL_AUTH_ACTION,
+			params->action) ||
+	    nla_put(msg, NL80211_ATTR_BSSID, ETH_ALEN, params->bssid) ||
+	    nla_put(msg, NL80211_ATTR_SSID, params->ssid.ssid_len,
+		    params->ssid.ssid))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	genlmsg_unicast(wiphy_net(&rdev->wiphy), msg,
+			wdev->conn_owner_nlportid);
+	return 0;
+
+ nla_put_failure:
+	nlmsg_free(msg);
+	return -ENOBUFS;
+}
+EXPORT_SYMBOL(cfg80211_external_auth_request);
+
 /* initialisation/exit functions */
 
 int __init nl80211_init(void)
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 0c06240d25af..84f23ae015fc 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -1190,4 +1190,19 @@ static inline int rdev_del_pmk(struct cfg80211_registered_device *rdev,
 	trace_rdev_return_int(&rdev->wiphy, ret);
 	return ret;
 }
+
+static inline int
+rdev_external_auth(struct cfg80211_registered_device *rdev,
+		   struct net_device *dev,
+		   struct cfg80211_external_auth_params *params)
+{
+	int ret = -EOPNOTSUPP;
+
+	trace_rdev_external_auth(&rdev->wiphy, dev, params);
+	if (rdev->ops->external_auth)
+		ret = rdev->ops->external_auth(&rdev->wiphy, dev, params);
+	trace_rdev_return_int(&rdev->wiphy, ret);
+	return ret;
+}
+
 #endif /* __CFG80211_RDEV_OPS */
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index bcfedd39e7a3..5152938b358d 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -2319,6 +2319,29 @@ TRACE_EVENT(rdev_del_pmk,
 		  WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(aa))
 );
 
+TRACE_EVENT(rdev_external_auth,
+	    TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+		     struct cfg80211_external_auth_params *params),
+	    TP_ARGS(wiphy, netdev, params),
+	    TP_STRUCT__entry(WIPHY_ENTRY
+			     NETDEV_ENTRY
+			     MAC_ENTRY(bssid)
+			     __array(u8, ssid, IEEE80211_MAX_SSID_LEN + 1)
+			     __field(u16, status)
+	    ),
+	    TP_fast_assign(WIPHY_ASSIGN;
+			   NETDEV_ASSIGN;
+			   MAC_ASSIGN(bssid, params->bssid);
+			   memset(__entry->ssid, 0, IEEE80211_MAX_SSID_LEN + 1);
+			   memcpy(__entry->ssid, params->ssid.ssid,
+				  params->ssid.ssid_len);
+			   __entry->status = params->status;
+	    ),
+	    TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: " MAC_PR_FMT
+		      ", ssid: %s, status: %u", WIPHY_PR_ARG, NETDEV_PR_ARG,
+		      __entry->bssid, __entry->ssid, __entry->status)
+);
+
 /*************************************************************
  *	     cfg80211 exported functions traces		     *
  *************************************************************/
diff --git a/net/wireless/util.c b/net/wireless/util.c
index c69160694b6c..d112e9a89364 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -420,7 +420,8 @@ unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr)
 EXPORT_SYMBOL(ieee80211_get_mesh_hdrlen);
 
 int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
-				  const u8 *addr, enum nl80211_iftype iftype)
+				  const u8 *addr, enum nl80211_iftype iftype,
+				  u8 data_offset)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 	struct {
@@ -434,7 +435,7 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
 	if (unlikely(!ieee80211_is_data_present(hdr->frame_control)))
 		return -1;
 
-	hdrlen = ieee80211_hdrlen(hdr->frame_control);
+	hdrlen = ieee80211_hdrlen(hdr->frame_control) + data_offset;
 	if (skb->len < hdrlen + 8)
 		return -1;
 
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index 9efbfc753347..bc7064486b15 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -390,6 +390,7 @@ static void __net_exit wext_pernet_exit(struct net *net)
 static struct pernet_operations wext_pernet_ops = {
 	.init = wext_pernet_init,
 	.exit = wext_pernet_exit,
+	.async = true,
 };
 
 static int __init wireless_nlevent_init(void)
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 562cc11131f6..d49aa79b7997 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -896,7 +896,7 @@ out:
 }
 
 static int x25_getname(struct socket *sock, struct sockaddr *uaddr,
-		       int *uaddr_len, int peer)
+		       int peer)
 {
 	struct sockaddr_x25 *sx25 = (struct sockaddr_x25 *)uaddr;
 	struct sock *sk = sock->sk;
@@ -913,7 +913,7 @@ static int x25_getname(struct socket *sock, struct sockaddr *uaddr,
 		sx25->sx25_addr = x25->source_addr;
 
 	sx25->sx25_family = AF_X25;
-	*uaddr_len = sizeof(*sx25);
+	rc = sizeof(*sx25);
 
 out:
 	return rc;
diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c
index db0b1315d577..9c214ec681ac 100644
--- a/net/x25/x25_subr.c
+++ b/net/x25/x25_subr.c
@@ -335,8 +335,7 @@ int x25_decode(struct sock *sk, struct sk_buff *skb, int *ns, int *nr, int *q,
 		}
 	}
 
-	pr_debug("invalid PLP frame %02X %02X %02X\n",
-	       frame[0], frame[1], frame[2]);
+	pr_debug("invalid PLP frame %3ph\n", frame);
 
 	return X25_ILLEGAL;
 }
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 625b3fca5704..cb3bb9ae4407 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2985,6 +2985,7 @@ static void __net_exit xfrm_net_exit(struct net *net)
 static struct pernet_operations __net_initdata xfrm_net_ops = {
 	.init = xfrm_net_init,
 	.exit = xfrm_net_exit,
+	.async = true,
 };
 
 void __init xfrm_init(void)
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 080035f056d9..e92b8c019c88 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -3253,6 +3253,7 @@ static void __net_exit xfrm_user_net_exit(struct list_head *net_exit_list)
 static struct pernet_operations xfrm_user_net_ops = {
 	.init	    = xfrm_user_net_init,
 	.exit_batch = xfrm_user_net_exit,
+	.async	    = true,
 };
 
 static int __init xfrm_user_init(void)
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index ec3fc8d88e87..2c2a587e0942 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -43,6 +43,7 @@ hostprogs-y += xdp_redirect_cpu
 hostprogs-y += xdp_monitor
 hostprogs-y += xdp_rxq_info
 hostprogs-y += syscall_tp
+hostprogs-y += cpustat
 
 # Libbpf dependencies
 LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o
@@ -93,6 +94,7 @@ xdp_redirect_cpu-objs := bpf_load.o $(LIBBPF) xdp_redirect_cpu_user.o
 xdp_monitor-objs := bpf_load.o $(LIBBPF) xdp_monitor_user.o
 xdp_rxq_info-objs := bpf_load.o $(LIBBPF) xdp_rxq_info_user.o
 syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o
+cpustat-objs := bpf_load.o $(LIBBPF) cpustat_user.o
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
@@ -144,6 +146,7 @@ always += xdp_monitor_kern.o
 always += xdp_rxq_info_kern.o
 always += xdp2skb_meta_kern.o
 always += syscall_tp_kern.o
+always += cpustat_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 HOSTCFLAGS += -I$(srctree)/tools/lib/
@@ -188,6 +191,7 @@ HOSTLOADLIBES_xdp_redirect_cpu += -lelf
 HOSTLOADLIBES_xdp_monitor += -lelf
 HOSTLOADLIBES_xdp_rxq_info += -lelf
 HOSTLOADLIBES_syscall_tp += -lelf
+HOSTLOADLIBES_cpustat += -lelf
 
 # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
 #  make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index 69806d74fa53..b1a310c3ae89 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -67,6 +67,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 	bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0;
 	bool is_sockops = strncmp(event, "sockops", 7) == 0;
 	bool is_sk_skb = strncmp(event, "sk_skb", 6) == 0;
+	bool is_sk_msg = strncmp(event, "sk_msg", 6) == 0;
 	size_t insns_cnt = size / sizeof(struct bpf_insn);
 	enum bpf_prog_type prog_type;
 	char buf[256];
@@ -96,6 +97,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 		prog_type = BPF_PROG_TYPE_SOCK_OPS;
 	} else if (is_sk_skb) {
 		prog_type = BPF_PROG_TYPE_SK_SKB;
+	} else if (is_sk_msg) {
+		prog_type = BPF_PROG_TYPE_SK_MSG;
 	} else {
 		printf("Unknown event '%s'\n", event);
 		return -1;
@@ -113,7 +116,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 	if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk)
 		return 0;
 
-	if (is_socket || is_sockops || is_sk_skb) {
+	if (is_socket || is_sockops || is_sk_skb || is_sk_msg) {
 		if (is_socket)
 			event += 6;
 		else
@@ -589,7 +592,8 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
 		    memcmp(shname, "socket", 6) == 0 ||
 		    memcmp(shname, "cgroup/", 7) == 0 ||
 		    memcmp(shname, "sockops", 7) == 0 ||
-		    memcmp(shname, "sk_skb", 6) == 0) {
+		    memcmp(shname, "sk_skb", 6) == 0 ||
+		    memcmp(shname, "sk_msg", 6) == 0) {
 			ret = load_and_attach(shname, data->d_buf,
 					      data->d_size);
 			if (ret != 0)
diff --git a/samples/bpf/cpustat_kern.c b/samples/bpf/cpustat_kern.c
new file mode 100644
index 000000000000..68c84da065b1
--- /dev/null
+++ b/samples/bpf/cpustat_kern.c
@@ -0,0 +1,281 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/version.h>
+#include <linux/ptrace.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+/*
+ * The CPU number, cstate number and pstate number are based
+ * on 96boards Hikey with octa CA53 CPUs.
+ *
+ * Every CPU have three idle states for cstate:
+ *   WFI, CPU_OFF, CLUSTER_OFF
+ *
+ * Every CPU have 5 operating points:
+ *   208MHz, 432MHz, 729MHz, 960MHz, 1200MHz
+ *
+ * This code is based on these assumption and other platforms
+ * need to adjust these definitions.
+ */
+#define MAX_CPU			8
+#define MAX_PSTATE_ENTRIES	5
+#define MAX_CSTATE_ENTRIES	3
+
+static int cpu_opps[] = { 208000, 432000, 729000, 960000, 1200000 };
+
+/*
+ * my_map structure is used to record cstate and pstate index and
+ * timestamp (Idx, Ts), when new event incoming we need to update
+ * combination for new state index and timestamp (Idx`, Ts`).
+ *
+ * Based on (Idx, Ts) and (Idx`, Ts`) we can calculate the time
+ * interval for the previous state: Duration(Idx) = Ts` - Ts.
+ *
+ * Every CPU has one below array for recording state index and
+ * timestamp, and record for cstate and pstate saperately:
+ *
+ * +--------------------------+
+ * | cstate timestamp         |
+ * +--------------------------+
+ * | cstate index             |
+ * +--------------------------+
+ * | pstate timestamp         |
+ * +--------------------------+
+ * | pstate index             |
+ * +--------------------------+
+ */
+#define MAP_OFF_CSTATE_TIME	0
+#define MAP_OFF_CSTATE_IDX	1
+#define MAP_OFF_PSTATE_TIME	2
+#define MAP_OFF_PSTATE_IDX	3
+#define MAP_OFF_NUM		4
+
+struct bpf_map_def SEC("maps") my_map = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(u32),
+	.value_size = sizeof(u64),
+	.max_entries = MAX_CPU * MAP_OFF_NUM,
+};
+
+/* cstate_duration records duration time for every idle state per CPU */
+struct bpf_map_def SEC("maps") cstate_duration = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(u32),
+	.value_size = sizeof(u64),
+	.max_entries = MAX_CPU * MAX_CSTATE_ENTRIES,
+};
+
+/* pstate_duration records duration time for every operating point per CPU */
+struct bpf_map_def SEC("maps") pstate_duration = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(u32),
+	.value_size = sizeof(u64),
+	.max_entries = MAX_CPU * MAX_PSTATE_ENTRIES,
+};
+
+/*
+ * The trace events for cpu_idle and cpu_frequency are taken from:
+ * /sys/kernel/debug/tracing/events/power/cpu_idle/format
+ * /sys/kernel/debug/tracing/events/power/cpu_frequency/format
+ *
+ * These two events have same format, so define one common structure.
+ */
+struct cpu_args {
+	u64 pad;
+	u32 state;
+	u32 cpu_id;
+};
+
+/* calculate pstate index, returns MAX_PSTATE_ENTRIES for failure */
+static u32 find_cpu_pstate_idx(u32 frequency)
+{
+	u32 i;
+
+	for (i = 0; i < sizeof(cpu_opps) / sizeof(u32); i++) {
+		if (frequency == cpu_opps[i])
+			return i;
+	}
+
+	return i;
+}
+
+SEC("tracepoint/power/cpu_idle")
+int bpf_prog1(struct cpu_args *ctx)
+{
+	u64 *cts, *pts, *cstate, *pstate, prev_state, cur_ts, delta;
+	u32 key, cpu, pstate_idx;
+	u64 *val;
+
+	if (ctx->cpu_id > MAX_CPU)
+		return 0;
+
+	cpu = ctx->cpu_id;
+
+	key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_TIME;
+	cts = bpf_map_lookup_elem(&my_map, &key);
+	if (!cts)
+		return 0;
+
+	key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX;
+	cstate = bpf_map_lookup_elem(&my_map, &key);
+	if (!cstate)
+		return 0;
+
+	key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME;
+	pts = bpf_map_lookup_elem(&my_map, &key);
+	if (!pts)
+		return 0;
+
+	key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX;
+	pstate = bpf_map_lookup_elem(&my_map, &key);
+	if (!pstate)
+		return 0;
+
+	prev_state = *cstate;
+	*cstate = ctx->state;
+
+	if (!*cts) {
+		*cts = bpf_ktime_get_ns();
+		return 0;
+	}
+
+	cur_ts = bpf_ktime_get_ns();
+	delta = cur_ts - *cts;
+	*cts = cur_ts;
+
+	/*
+	 * When state doesn't equal to (u32)-1, the cpu will enter
+	 * one idle state; for this case we need to record interval
+	 * for the pstate.
+	 *
+	 *                 OPP2
+	 *            +---------------------+
+	 *     OPP1   |                     |
+	 *   ---------+                     |
+	 *                                  |  Idle state
+	 *                                  +---------------
+	 *
+	 *            |<- pstate duration ->|
+	 *            ^                     ^
+	 *           pts                  cur_ts
+	 */
+	if (ctx->state != (u32)-1) {
+
+		/* record pstate after have first cpu_frequency event */
+		if (!*pts)
+			return 0;
+
+		delta = cur_ts - *pts;
+
+		pstate_idx = find_cpu_pstate_idx(*pstate);
+		if (pstate_idx >= MAX_PSTATE_ENTRIES)
+			return 0;
+
+		key = cpu * MAX_PSTATE_ENTRIES + pstate_idx;
+		val = bpf_map_lookup_elem(&pstate_duration, &key);
+		if (val)
+			__sync_fetch_and_add((long *)val, delta);
+
+	/*
+	 * When state equal to (u32)-1, the cpu just exits from one
+	 * specific idle state; for this case we need to record
+	 * interval for the pstate.
+	 *
+	 *       OPP2
+	 *   -----------+
+	 *              |                          OPP1
+	 *              |                     +-----------
+	 *              |     Idle state      |
+	 *              +---------------------+
+	 *
+	 *              |<- cstate duration ->|
+	 *              ^                     ^
+	 *             cts                  cur_ts
+	 */
+	} else {
+
+		key = cpu * MAX_CSTATE_ENTRIES + prev_state;
+		val = bpf_map_lookup_elem(&cstate_duration, &key);
+		if (val)
+			__sync_fetch_and_add((long *)val, delta);
+	}
+
+	/* Update timestamp for pstate as new start time */
+	if (*pts)
+		*pts = cur_ts;
+
+	return 0;
+}
+
+SEC("tracepoint/power/cpu_frequency")
+int bpf_prog2(struct cpu_args *ctx)
+{
+	u64 *pts, *cstate, *pstate, prev_state, cur_ts, delta;
+	u32 key, cpu, pstate_idx;
+	u64 *val;
+
+	cpu = ctx->cpu_id;
+
+	key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME;
+	pts = bpf_map_lookup_elem(&my_map, &key);
+	if (!pts)
+		return 0;
+
+	key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX;
+	pstate = bpf_map_lookup_elem(&my_map, &key);
+	if (!pstate)
+		return 0;
+
+	key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX;
+	cstate = bpf_map_lookup_elem(&my_map, &key);
+	if (!cstate)
+		return 0;
+
+	prev_state = *pstate;
+	*pstate = ctx->state;
+
+	if (!*pts) {
+		*pts = bpf_ktime_get_ns();
+		return 0;
+	}
+
+	cur_ts = bpf_ktime_get_ns();
+	delta = cur_ts - *pts;
+	*pts = cur_ts;
+
+	/* When CPU is in idle, bail out to skip pstate statistics */
+	if (*cstate != (u32)(-1))
+		return 0;
+
+	/*
+	 * The cpu changes to another different OPP (in below diagram
+	 * change frequency from OPP3 to OPP1), need recording interval
+	 * for previous frequency OPP3 and update timestamp as start
+	 * time for new frequency OPP1.
+	 *
+	 *                 OPP3
+	 *            +---------------------+
+	 *     OPP2   |                     |
+	 *   ---------+                     |
+	 *                                  |    OPP1
+	 *                                  +---------------
+	 *
+	 *            |<- pstate duration ->|
+	 *            ^                     ^
+	 *           pts                  cur_ts
+	 */
+	pstate_idx = find_cpu_pstate_idx(*pstate);
+	if (pstate_idx >= MAX_PSTATE_ENTRIES)
+		return 0;
+
+	key = cpu * MAX_PSTATE_ENTRIES + pstate_idx;
+	val = bpf_map_lookup_elem(&pstate_duration, &key);
+	if (val)
+		__sync_fetch_and_add((long *)val, delta);
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/cpustat_user.c b/samples/bpf/cpustat_user.c
new file mode 100644
index 000000000000..2b4cd1ae57c5
--- /dev/null
+++ b/samples/bpf/cpustat_user.c
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sched.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <linux/bpf.h>
+#include <locale.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/wait.h>
+
+#include "libbpf.h"
+#include "bpf_load.h"
+
+#define MAX_CPU			8
+#define MAX_PSTATE_ENTRIES	5
+#define MAX_CSTATE_ENTRIES	3
+#define MAX_STARS		40
+
+#define CPUFREQ_MAX_SYSFS_PATH	"/sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq"
+#define CPUFREQ_LOWEST_FREQ	"208000"
+#define CPUFREQ_HIGHEST_FREQ	"12000000"
+
+struct cpu_stat_data {
+	unsigned long cstate[MAX_CSTATE_ENTRIES];
+	unsigned long pstate[MAX_PSTATE_ENTRIES];
+};
+
+static struct cpu_stat_data stat_data[MAX_CPU];
+
+static void cpu_stat_print(void)
+{
+	int i, j;
+	char state_str[sizeof("cstate-9")];
+	struct cpu_stat_data *data;
+
+	/* Clear screen */
+	printf("\033[2J");
+
+	/* Header */
+	printf("\nCPU states statistics:\n");
+	printf("%-10s ", "state(ms)");
+
+	for (i = 0; i < MAX_CSTATE_ENTRIES; i++) {
+		sprintf(state_str, "cstate-%d", i);
+		printf("%-11s ", state_str);
+	}
+
+	for (i = 0; i < MAX_PSTATE_ENTRIES; i++) {
+		sprintf(state_str, "pstate-%d", i);
+		printf("%-11s ", state_str);
+	}
+
+	printf("\n");
+
+	for (j = 0; j < MAX_CPU; j++) {
+		data = &stat_data[j];
+
+		printf("CPU-%-6d ", j);
+		for (i = 0; i < MAX_CSTATE_ENTRIES; i++)
+			printf("%-11ld ", data->cstate[i] / 1000000);
+
+		for (i = 0; i < MAX_PSTATE_ENTRIES; i++)
+			printf("%-11ld ", data->pstate[i] / 1000000);
+
+		printf("\n");
+	}
+}
+
+static void cpu_stat_update(int cstate_fd, int pstate_fd)
+{
+	unsigned long key, value;
+	int c, i;
+
+	for (c = 0; c < MAX_CPU; c++) {
+		for (i = 0; i < MAX_CSTATE_ENTRIES; i++) {
+			key = c * MAX_CSTATE_ENTRIES + i;
+			bpf_map_lookup_elem(cstate_fd, &key, &value);
+			stat_data[c].cstate[i] = value;
+		}
+
+		for (i = 0; i < MAX_PSTATE_ENTRIES; i++) {
+			key = c * MAX_PSTATE_ENTRIES + i;
+			bpf_map_lookup_elem(pstate_fd, &key, &value);
+			stat_data[c].pstate[i] = value;
+		}
+	}
+}
+
+/*
+ * This function is copied from 'idlestat' tool function
+ * idlestat_wake_all() in idlestate.c.
+ *
+ * It sets the self running task affinity to cpus one by one so can wake up
+ * the specific CPU to handle scheduling; this results in all cpus can be
+ * waken up once and produce ftrace event 'trace_cpu_idle'.
+ */
+static int cpu_stat_inject_cpu_idle_event(void)
+{
+	int rcpu, i, ret;
+	cpu_set_t cpumask;
+	cpu_set_t original_cpumask;
+
+	ret = sysconf(_SC_NPROCESSORS_CONF);
+	if (ret < 0)
+		return -1;
+
+	rcpu = sched_getcpu();
+	if (rcpu < 0)
+		return -1;
+
+	/* Keep track of the CPUs we will run on */
+	sched_getaffinity(0, sizeof(original_cpumask), &original_cpumask);
+
+	for (i = 0; i < ret; i++) {
+
+		/* Pointless to wake up ourself */
+		if (i == rcpu)
+			continue;
+
+		/* Pointless to wake CPUs we will not run on */
+		if (!CPU_ISSET(i, &original_cpumask))
+			continue;
+
+		CPU_ZERO(&cpumask);
+		CPU_SET(i, &cpumask);
+
+		sched_setaffinity(0, sizeof(cpumask), &cpumask);
+	}
+
+	/* Enable all the CPUs of the original mask */
+	sched_setaffinity(0, sizeof(original_cpumask), &original_cpumask);
+	return 0;
+}
+
+/*
+ * It's possible to have no any frequency change for long time and cannot
+ * get ftrace event 'trace_cpu_frequency' for long period, this introduces
+ * big deviation for pstate statistics.
+ *
+ * To solve this issue, below code forces to set 'scaling_max_freq' to 208MHz
+ * for triggering ftrace event 'trace_cpu_frequency' and then recovery back to
+ * the maximum frequency value 1.2GHz.
+ */
+static int cpu_stat_inject_cpu_frequency_event(void)
+{
+	int len, fd;
+
+	fd = open(CPUFREQ_MAX_SYSFS_PATH, O_WRONLY);
+	if (fd < 0) {
+		printf("failed to open scaling_max_freq, errno=%d\n", errno);
+		return fd;
+	}
+
+	len = write(fd, CPUFREQ_LOWEST_FREQ, strlen(CPUFREQ_LOWEST_FREQ));
+	if (len < 0) {
+		printf("failed to open scaling_max_freq, errno=%d\n", errno);
+		goto err;
+	}
+
+	len = write(fd, CPUFREQ_HIGHEST_FREQ, strlen(CPUFREQ_HIGHEST_FREQ));
+	if (len < 0) {
+		printf("failed to open scaling_max_freq, errno=%d\n", errno);
+		goto err;
+	}
+
+err:
+	close(fd);
+	return len;
+}
+
+static void int_exit(int sig)
+{
+	cpu_stat_inject_cpu_idle_event();
+	cpu_stat_inject_cpu_frequency_event();
+	cpu_stat_update(map_fd[1], map_fd[2]);
+	cpu_stat_print();
+	exit(0);
+}
+
+int main(int argc, char **argv)
+{
+	char filename[256];
+	int ret;
+
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+	if (load_bpf_file(filename)) {
+		printf("%s", bpf_log_buf);
+		return 1;
+	}
+
+	ret = cpu_stat_inject_cpu_idle_event();
+	if (ret < 0)
+		return 1;
+
+	ret = cpu_stat_inject_cpu_frequency_event();
+	if (ret < 0)
+		return 1;
+
+	signal(SIGINT, int_exit);
+	signal(SIGTERM, int_exit);
+
+	while (1) {
+		cpu_stat_update(map_fd[1], map_fd[2]);
+		cpu_stat_print();
+		sleep(5);
+	}
+
+	return 0;
+}
diff --git a/samples/bpf/tcbpf2_kern.c b/samples/bpf/tcbpf2_kern.c
index efdc16d195ff..9a8db7bd6db4 100644
--- a/samples/bpf/tcbpf2_kern.c
+++ b/samples/bpf/tcbpf2_kern.c
@@ -52,7 +52,8 @@ int _gre_set_tunnel(struct __sk_buff *skb)
 	key.tunnel_tos = 0;
 	key.tunnel_ttl = 64;
 
-	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX);
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_ZERO_CSUM_TX | BPF_F_SEQ_NUMBER);
 	if (ret < 0) {
 		ERROR(ret);
 		return TC_ACT_SHOT;
@@ -92,7 +93,8 @@ int _ip6gretap_set_tunnel(struct __sk_buff *skb)
 	key.tunnel_label = 0xabcde;
 
 	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
-				     BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX);
+				     BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
+				     BPF_F_SEQ_NUMBER);
 	if (ret < 0) {
 		ERROR(ret);
 		return TC_ACT_SHOT;
diff --git a/samples/bpf/test_cgrp2_sock.sh b/samples/bpf/test_cgrp2_sock.sh
index 8ee0371a100a..9f6174236856 100755
--- a/samples/bpf/test_cgrp2_sock.sh
+++ b/samples/bpf/test_cgrp2_sock.sh
@@ -61,6 +61,7 @@ cleanup_and_exit()
 
 	[ -n "$msg" ] && echo "ERROR: $msg"
 
+	test_cgrp2_sock -d ${CGRP_MNT}/sockopts
 	ip li del cgrp2_sock
 	umount ${CGRP_MNT}
 
diff --git a/samples/bpf/test_cgrp2_sock2.sh b/samples/bpf/test_cgrp2_sock2.sh
index fc4e64d00cb3..0f396a86e0cb 100755
--- a/samples/bpf/test_cgrp2_sock2.sh
+++ b/samples/bpf/test_cgrp2_sock2.sh
@@ -28,6 +28,9 @@ function attach_bpf {
 }
 
 function cleanup {
+	if [ -d /tmp/cgroupv2/foo ]; then
+		test_cgrp2_sock -d /tmp/cgroupv2/foo
+	fi
 	ip link del veth0b
 	ip netns delete at_ns0
 	umount /tmp/cgroupv2
diff --git a/samples/bpf/test_tunnel_bpf.sh b/samples/bpf/test_tunnel_bpf.sh
index 43ce049996ee..c265863ccdf9 100755
--- a/samples/bpf/test_tunnel_bpf.sh
+++ b/samples/bpf/test_tunnel_bpf.sh
@@ -23,7 +23,8 @@ function config_device {
 function add_gre_tunnel {
 	# in namespace
 	ip netns exec at_ns0 \
-		ip link add dev $DEV_NS type $TYPE key 2 local 172.16.1.100 remote 172.16.1.200
+        ip link add dev $DEV_NS type $TYPE seq key 2 \
+		local 172.16.1.100 remote 172.16.1.200
 	ip netns exec at_ns0 ip link set dev $DEV_NS up
 	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
 
@@ -43,7 +44,7 @@ function add_ip6gretap_tunnel {
 
 	# in namespace
 	ip netns exec at_ns0 \
-		ip link add dev $DEV_NS type $TYPE flowlabel 0xbcdef key 2 \
+		ip link add dev $DEV_NS type $TYPE seq flowlabel 0xbcdef key 2 \
 		local ::11 remote ::22
 
 	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
diff --git a/samples/bpf/trace_event_kern.c b/samples/bpf/trace_event_kern.c
index a77a583d94d4..7068fbdde951 100644
--- a/samples/bpf/trace_event_kern.c
+++ b/samples/bpf/trace_event_kern.c
@@ -39,6 +39,7 @@ int bpf_prog1(struct bpf_perf_event_data *ctx)
 {
 	char time_fmt1[] = "Time Enabled: %llu, Time Running: %llu";
 	char time_fmt2[] = "Get Time Failed, ErrCode: %d";
+	char addr_fmt[] = "Address recorded on event: %llx";
 	char fmt[] = "CPU-%d period %lld ip %llx";
 	u32 cpu = bpf_get_smp_processor_id();
 	struct bpf_perf_event_value value_buf;
@@ -64,6 +65,9 @@ int bpf_prog1(struct bpf_perf_event_data *ctx)
 	else
 	  bpf_trace_printk(time_fmt2, sizeof(time_fmt2), ret);
 
+	if (ctx->addr != 0)
+	  bpf_trace_printk(addr_fmt, sizeof(addr_fmt), ctx->addr);
+
 	val = bpf_map_lookup_elem(&counts, &key);
 	if (val)
 		(*val)++;
diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c
index bf4f1b6d9a52..56f7a259a7c9 100644
--- a/samples/bpf/trace_event_user.c
+++ b/samples/bpf/trace_event_user.c
@@ -215,6 +215,17 @@ static void test_bpf_perf_event(void)
 		/* Intel Instruction Retired */
 		.config = 0xc0,
 	};
+	struct perf_event_attr attr_type_raw_lock_load = {
+		.sample_freq = SAMPLE_FREQ,
+		.freq = 1,
+		.type = PERF_TYPE_RAW,
+		/* Intel MEM_UOPS_RETIRED.LOCK_LOADS */
+		.config = 0x21d0,
+		/* Request to record lock address from PEBS */
+		.sample_type = PERF_SAMPLE_ADDR,
+		/* Record address value requires precise event */
+		.precise_ip = 2,
+	};
 
 	printf("Test HW_CPU_CYCLES\n");
 	test_perf_event_all_cpu(&attr_type_hw);
@@ -236,6 +247,10 @@ static void test_bpf_perf_event(void)
 	test_perf_event_all_cpu(&attr_type_raw);
 	test_perf_event_task(&attr_type_raw);
 
+	printf("Test Lock Load\n");
+	test_perf_event_all_cpu(&attr_type_raw_lock_load);
+	test_perf_event_task(&attr_type_raw_lock_load);
+
 	printf("*** PASS ***\n");
 }
 
diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c
index d54e91eb6cbf..b701b5c21342 100644
--- a/samples/bpf/xdp_redirect_user.c
+++ b/samples/bpf/xdp_redirect_user.c
@@ -20,6 +20,7 @@
 #include <string.h>
 #include <unistd.h>
 #include <libgen.h>
+#include <sys/resource.h>
 
 #include "bpf_load.h"
 #include "bpf_util.h"
@@ -75,6 +76,7 @@ static void usage(const char *prog)
 
 int main(int argc, char **argv)
 {
+	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
 	const char *optstr = "SN";
 	char filename[256];
 	int ret, opt, key = 0;
@@ -98,6 +100,11 @@ int main(int argc, char **argv)
 		return 1;
 	}
 
+	if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+		perror("setrlimit(RLIMIT_MEMLOCK)");
+		return 1;
+	}
+
 	ifindex_in = strtoul(argv[optind], NULL, 0);
 	ifindex_out = strtoul(argv[optind + 1], NULL, 0);
 	printf("input: %d output: %d\n", ifindex_in, ifindex_out);
diff --git a/samples/sockmap/Makefile b/samples/sockmap/Makefile
index 73f1da4d116c..9bf2881bd11b 100644
--- a/samples/sockmap/Makefile
+++ b/samples/sockmap/Makefile
@@ -2,7 +2,7 @@
 hostprogs-y := sockmap
 
 # Libbpf dependencies
-LIBBPF := ../../tools/lib/bpf/bpf.o
+LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 HOSTCFLAGS += -I$(srctree)/tools/lib/
diff --git a/samples/sockmap/sockmap_kern.c b/samples/sockmap/sockmap_kern.c
index 52b0053274f4..9ad5ba79c85a 100644
--- a/samples/sockmap/sockmap_kern.c
+++ b/samples/sockmap/sockmap_kern.c
@@ -43,6 +43,42 @@ struct bpf_map_def SEC("maps") sock_map = {
 	.max_entries = 20,
 };
 
+struct bpf_map_def SEC("maps") sock_map_txmsg = {
+	.type = BPF_MAP_TYPE_SOCKMAP,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_map_redir = {
+	.type = BPF_MAP_TYPE_SOCKMAP,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") sock_apply_bytes = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 1
+};
+
+struct bpf_map_def SEC("maps") sock_cork_bytes = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 1
+};
+
+struct bpf_map_def SEC("maps") sock_pull_bytes = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 2
+};
+
+
 SEC("sk_skb1")
 int bpf_prog1(struct __sk_buff *skb)
 {
@@ -105,4 +141,165 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
 
 	return 0;
 }
+
+SEC("sk_msg1")
+int bpf_prog4(struct sk_msg_md *msg)
+{
+	int *bytes, zero = 0, one = 1;
+	int *start, *end;
+
+	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+	if (bytes)
+		bpf_msg_apply_bytes(msg, *bytes);
+	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+	if (bytes)
+		bpf_msg_cork_bytes(msg, *bytes);
+	start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+	end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+	if (start && end)
+		bpf_msg_pull_data(msg, *start, *end, 0);
+	return SK_PASS;
+}
+
+SEC("sk_msg2")
+int bpf_prog5(struct sk_msg_md *msg)
+{
+	int err1 = -1, err2 = -1, zero = 0, one = 1;
+	int *bytes, *start, *end, len1, len2;
+
+	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+	if (bytes)
+		err1 = bpf_msg_apply_bytes(msg, *bytes);
+	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+	if (bytes)
+		err2 = bpf_msg_cork_bytes(msg, *bytes);
+	len1 = (__u64)msg->data_end - (__u64)msg->data;
+	start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+	end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+	if (start && end) {
+		int err;
+
+		bpf_printk("sk_msg2: pull(%i:%i)\n",
+			   start ? *start : 0, end ? *end : 0);
+		err = bpf_msg_pull_data(msg, *start, *end, 0);
+		if (err)
+			bpf_printk("sk_msg2: pull_data err %i\n",
+				   err);
+		len2 = (__u64)msg->data_end - (__u64)msg->data;
+		bpf_printk("sk_msg2: length update %i->%i\n",
+			   len1, len2);
+	}
+	bpf_printk("sk_msg2: data length %i err1 %i err2 %i\n",
+		   len1, err1, err2);
+	return SK_PASS;
+}
+
+SEC("sk_msg3")
+int bpf_prog6(struct sk_msg_md *msg)
+{
+	int *bytes, zero = 0, one = 1;
+	int *start, *end;
+
+	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+	if (bytes)
+		bpf_msg_apply_bytes(msg, *bytes);
+	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+	if (bytes)
+		bpf_msg_cork_bytes(msg, *bytes);
+	start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+	end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+	if (start && end)
+		bpf_msg_pull_data(msg, *start, *end, 0);
+	return bpf_msg_redirect_map(msg, &sock_map_redir, zero, 0);
+}
+
+SEC("sk_msg4")
+int bpf_prog7(struct sk_msg_md *msg)
+{
+	int err1 = 0, err2 = 0, zero = 0, one = 1;
+	int *bytes, *start, *end, len1, len2;
+
+	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+	if (bytes)
+		err1 = bpf_msg_apply_bytes(msg, *bytes);
+	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+	if (bytes)
+		err2 = bpf_msg_cork_bytes(msg, *bytes);
+	len1 = (__u64)msg->data_end - (__u64)msg->data;
+	start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+	end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+	if (start && end) {
+		int err;
+
+		bpf_printk("sk_msg2: pull(%i:%i)\n",
+			   start ? *start : 0, end ? *end : 0);
+		err = bpf_msg_pull_data(msg, *start, *end, 0);
+		if (err)
+			bpf_printk("sk_msg2: pull_data err %i\n",
+				   err);
+		len2 = (__u64)msg->data_end - (__u64)msg->data;
+		bpf_printk("sk_msg2: length update %i->%i\n",
+			   len1, len2);
+	}
+	bpf_printk("sk_msg3: redirect(%iB) err1=%i err2=%i\n",
+		   len1, err1, err2);
+	return bpf_msg_redirect_map(msg, &sock_map_redir, zero, 0);
+}
+
+SEC("sk_msg5")
+int bpf_prog8(struct sk_msg_md *msg)
+{
+	void *data_end = (void *)(long) msg->data_end;
+	void *data = (void *)(long) msg->data;
+	int ret = 0, *bytes, zero = 0;
+
+	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+	if (bytes) {
+		ret = bpf_msg_apply_bytes(msg, *bytes);
+		if (ret)
+			return SK_DROP;
+	} else {
+		return SK_DROP;
+	}
+	return SK_PASS;
+}
+SEC("sk_msg6")
+int bpf_prog9(struct sk_msg_md *msg)
+{
+	void *data_end = (void *)(long) msg->data_end;
+	void *data = (void *)(long) msg->data;
+	int ret = 0, *bytes, zero = 0;
+
+	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+	if (bytes) {
+		if (((__u64)data_end - (__u64)data) >= *bytes)
+			return SK_PASS;
+		ret = bpf_msg_cork_bytes(msg, *bytes);
+		if (ret)
+			return SK_DROP;
+	}
+	return SK_PASS;
+}
+
+SEC("sk_msg7")
+int bpf_prog10(struct sk_msg_md *msg)
+{
+	int *bytes, zero = 0, one = 1;
+	int *start, *end;
+
+	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+	if (bytes)
+		bpf_msg_apply_bytes(msg, *bytes);
+	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+	if (bytes)
+		bpf_msg_cork_bytes(msg, *bytes);
+	start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+	end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+	if (start && end)
+		bpf_msg_pull_data(msg, *start, *end, 0);
+
+	return SK_DROP;
+}
+
+
 char _license[] SEC("license") = "GPL";
diff --git a/samples/sockmap/sockmap_test.sh b/samples/sockmap/sockmap_test.sh
new file mode 100755
index 000000000000..6d8cc40cca22
--- /dev/null
+++ b/samples/sockmap/sockmap_test.sh
@@ -0,0 +1,450 @@
+#Test a bunch of positive cases to verify basic functionality
+for prog in "--txmsg" "--txmsg_redir" "--txmsg_drop"; do
+for t in "sendmsg" "sendpage"; do
+for r in 1 10 100; do
+	for i in 1 10 100; do
+		for l in 1 10 100; do
+			TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+			echo $TEST
+			$TEST
+			sleep 2
+		done
+	done
+done
+done
+done
+
+#Test max iov
+t="sendmsg"
+r=1
+i=1024
+l=1
+prog="--txmsg"
+
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+echo $TEST
+$TEST
+sleep 2
+prog="--txmsg_redir"
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+echo $TEST
+$TEST
+
+# Test max iov with 1k send
+
+t="sendmsg"
+r=1
+i=1024
+l=1024
+prog="--txmsg"
+
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+echo $TEST
+$TEST
+sleep 2
+prog="--txmsg_redir"
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+echo $TEST
+$TEST
+sleep 2
+
+# Test apply with 1B
+r=1
+i=1024
+l=1024
+prog="--txmsg_apply 1"
+
+for t in "sendmsg" "sendpage"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test apply with larger value than send
+r=1
+i=8
+l=1024
+prog="--txmsg_apply 2048"
+
+for t in "sendmsg" "sendpage"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test apply with apply that never reaches limit
+r=1024
+i=1
+l=1
+prog="--txmsg_apply 2048"
+
+for t in "sendmsg" "sendpage"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test apply and redirect with 1B
+r=1
+i=1024
+l=1024
+prog="--txmsg_redir --txmsg_apply 1"
+
+for t in "sendmsg" "sendpage"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test apply and redirect with larger value than send
+r=1
+i=8
+l=1024
+prog="--txmsg_redir --txmsg_apply 2048"
+
+for t in "sendmsg" "sendpage"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test apply and redirect with apply that never reaches limit
+r=1024
+i=1
+l=1
+prog="--txmsg_apply 2048"
+
+for t in "sendmsg" "sendpage"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test cork with 1B not really useful but test it anyways
+r=1
+i=1024
+l=1024
+prog="--txmsg_cork 1"
+
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test cork with a more reasonable 100B
+r=1
+i=1000
+l=1000
+prog="--txmsg_cork 100"
+
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test cork with larger value than send
+r=1
+i=8
+l=1024
+prog="--txmsg_cork 2048"
+
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test cork with cork that never reaches limit
+r=1024
+i=1
+l=1
+prog="--txmsg_cork 2048"
+
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+r=1
+i=1024
+l=1024
+prog="--txmsg_redir --txmsg_cork 1"
+
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test cork with a more reasonable 100B
+r=1
+i=1000
+l=1000
+prog="--txmsg_redir --txmsg_cork 100"
+
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test cork with larger value than send
+r=1
+i=8
+l=1024
+prog="--txmsg_redir --txmsg_cork 2048"
+
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test cork with cork that never reaches limit
+r=1024
+i=1
+l=1
+prog="--txmsg_cork 2048"
+
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+
+# mix and match cork and apply not really useful but valid programs
+
+# Test apply < cork
+r=100
+i=1
+l=5
+prog="--txmsg_apply 10 --txmsg_cork 100"
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Try again with larger sizes so we hit overflow case
+r=100
+i=1000
+l=2048
+prog="--txmsg_apply 4096 --txmsg_cork 8096"
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test apply > cork
+r=100
+i=1
+l=5
+prog="--txmsg_apply 100 --txmsg_cork 10"
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Again with larger sizes so we hit overflow cases
+r=100
+i=1000
+l=2048
+prog="--txmsg_apply 8096 --txmsg_cork 4096"
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+
+# Test apply = cork
+r=100
+i=1
+l=5
+prog="--txmsg_apply 10 --txmsg_cork 10"
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+r=100
+i=1000
+l=2048
+prog="--txmsg_apply 4096 --txmsg_cork 4096"
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test apply < cork
+r=100
+i=1
+l=5
+prog="--txmsg_redir --txmsg_apply 10 --txmsg_cork 100"
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Try again with larger sizes so we hit overflow case
+r=100
+i=1000
+l=2048
+prog="--txmsg_redir --txmsg_apply 4096 --txmsg_cork 8096"
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test apply > cork
+r=100
+i=1
+l=5
+prog="--txmsg_redir --txmsg_apply 100 --txmsg_cork 10"
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Again with larger sizes so we hit overflow cases
+r=100
+i=1000
+l=2048
+prog="--txmsg_redir --txmsg_apply 8096 --txmsg_cork 4096"
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+
+# Test apply = cork
+r=100
+i=1
+l=5
+prog="--txmsg_redir --txmsg_apply 10 --txmsg_cork 10"
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+r=100
+i=1000
+l=2048
+prog="--txmsg_redir --txmsg_apply 4096 --txmsg_cork 4096"
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Tests for bpf_msg_pull_data()
+for i in `seq 99 100 1600`; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
+		--txmsg --txmsg_start 0 --txmsg_end $i --txmsg_cork 1600"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+for i in `seq 199 100 1600`; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
+		--txmsg --txmsg_start 100 --txmsg_end $i --txmsg_cork 1600"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
+	--txmsg --txmsg_start 1500 --txmsg_end 1600 --txmsg_cork 1600"
+echo $TEST
+$TEST
+sleep 2
+
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
+	--txmsg --txmsg_start 1111 --txmsg_end 1112 --txmsg_cork 1600"
+echo $TEST
+$TEST
+sleep 2
+
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
+	--txmsg --txmsg_start 1111 --txmsg_end 0 --txmsg_cork 1600"
+echo $TEST
+$TEST
+sleep 2
+
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
+	--txmsg --txmsg_start 0 --txmsg_end 1601 --txmsg_cork 1600"
+echo $TEST
+$TEST
+sleep 2
+
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
+	--txmsg --txmsg_start 0 --txmsg_end 1601 --txmsg_cork 1602"
+echo $TEST
+$TEST
+sleep 2
+
+# Run through gamut again with start and end
+for prog in "--txmsg" "--txmsg_redir" "--txmsg_drop"; do
+for t in "sendmsg" "sendpage"; do
+for r in 1 10 100; do
+	for i in 1 10 100; do
+		for l in 1 10 100; do
+			TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog --txmsg_start 1 --txmsg_end 2"
+			echo $TEST
+			$TEST
+			sleep 2
+		done
+	done
+done
+done
+done
+
+# Some specific tests to cover specific code paths
+./sockmap --cgroup /mnt/cgroup2/ -t sendpage \
+	-r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 3
+./sockmap --cgroup /mnt/cgroup2/ -t sendmsg \
+	-r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 3
+./sockmap --cgroup /mnt/cgroup2/ -t sendpage \
+	-r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 5
+./sockmap --cgroup /mnt/cgroup2/ -t sendmsg \
+	-r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 5
diff --git a/samples/sockmap/sockmap_user.c b/samples/sockmap/sockmap_user.c
index 7c25c0c112bc..07aa237221d1 100644
--- a/samples/sockmap/sockmap_user.c
+++ b/samples/sockmap/sockmap_user.c
@@ -29,6 +29,7 @@
 #include <sys/time.h>
 #include <sys/resource.h>
 #include <sys/types.h>
+#include <sys/sendfile.h>
 
 #include <linux/netlink.h>
 #include <linux/socket.h>
@@ -54,6 +55,16 @@ void running_handler(int a);
 /* global sockets */
 int s1, s2, c1, c2, p1, p2;
 
+int txmsg_pass;
+int txmsg_noisy;
+int txmsg_redir;
+int txmsg_redir_noisy;
+int txmsg_drop;
+int txmsg_apply;
+int txmsg_cork;
+int txmsg_start;
+int txmsg_end;
+
 static const struct option long_options[] = {
 	{"help",	no_argument,		NULL, 'h' },
 	{"cgroup",	required_argument,	NULL, 'c' },
@@ -62,6 +73,16 @@ static const struct option long_options[] = {
 	{"iov_count",	required_argument,	NULL, 'i' },
 	{"length",	required_argument,	NULL, 'l' },
 	{"test",	required_argument,	NULL, 't' },
+	{"data_test",   no_argument,		NULL, 'd' },
+	{"txmsg",		no_argument,	&txmsg_pass,  1  },
+	{"txmsg_noisy",		no_argument,	&txmsg_noisy, 1  },
+	{"txmsg_redir",		no_argument,	&txmsg_redir, 1  },
+	{"txmsg_redir_noisy",	no_argument,	&txmsg_redir_noisy, 1},
+	{"txmsg_drop",		no_argument,	&txmsg_drop, 1 },
+	{"txmsg_apply",	required_argument,	NULL, 'a'},
+	{"txmsg_cork",	required_argument,	NULL, 'k'},
+	{"txmsg_start", required_argument,	NULL, 's'},
+	{"txmsg_end",	required_argument,	NULL, 'e'},
 	{0, 0, NULL, 0 }
 };
 
@@ -195,19 +216,71 @@ struct msg_stats {
 	struct timespec end;
 };
 
+struct sockmap_options {
+	int verbose;
+	bool base;
+	bool sendpage;
+	bool data_test;
+	bool drop_expected;
+};
+
+static int msg_loop_sendpage(int fd, int iov_length, int cnt,
+			     struct msg_stats *s,
+			     struct sockmap_options *opt)
+{
+	bool drop = opt->drop_expected;
+	unsigned char k = 0;
+	FILE *file;
+	int i, fp;
+
+	file = fopen(".sendpage_tst.tmp", "w+");
+	for (i = 0; i < iov_length * cnt; i++, k++)
+		fwrite(&k, sizeof(char), 1, file);
+	fflush(file);
+	fseek(file, 0, SEEK_SET);
+	fclose(file);
+
+	fp = open(".sendpage_tst.tmp", O_RDONLY);
+	clock_gettime(CLOCK_MONOTONIC, &s->start);
+	for (i = 0; i < cnt; i++) {
+		int sent = sendfile(fd, fp, NULL, iov_length);
+
+		if (!drop && sent < 0) {
+			perror("send loop error:");
+			close(fp);
+			return sent;
+		} else if (drop && sent >= 0) {
+			printf("sendpage loop error expected: %i\n", sent);
+			close(fp);
+			return -EIO;
+		}
+
+		if (sent > 0)
+			s->bytes_sent += sent;
+	}
+	clock_gettime(CLOCK_MONOTONIC, &s->end);
+	close(fp);
+	return 0;
+}
+
 static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
-		    struct msg_stats *s, bool tx)
+		    struct msg_stats *s, bool tx,
+		    struct sockmap_options *opt)
 {
 	struct msghdr msg = {0};
 	int err, i, flags = MSG_NOSIGNAL;
 	struct iovec *iov;
+	unsigned char k;
+	bool data_test = opt->data_test;
+	bool drop = opt->drop_expected;
 
 	iov = calloc(iov_count, sizeof(struct iovec));
 	if (!iov)
 		return errno;
 
+	k = 0;
 	for (i = 0; i < iov_count; i++) {
-		char *d = calloc(iov_length, sizeof(char));
+		unsigned char *d = calloc(iov_length, sizeof(char));
 
 		if (!d) {
 			fprintf(stderr, "iov_count %i/%i OOM\n", i, iov_count);
@@ -215,21 +288,34 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
 		}
 		iov[i].iov_base = d;
 		iov[i].iov_len = iov_length;
+
+		if (data_test && tx) {
+			int j;
+
+			for (j = 0; j < iov_length; j++)
+				d[j] = k++;
+		}
 	}
 
 	msg.msg_iov = iov;
 	msg.msg_iovlen = iov_count;
+	k = 0;
 
 	if (tx) {
 		clock_gettime(CLOCK_MONOTONIC, &s->start);
 		for (i = 0; i < cnt; i++) {
 			int sent = sendmsg(fd, &msg, flags);
 
-			if (sent < 0) {
+			if (!drop && sent < 0) {
 				perror("send loop error:");
 				goto out_errno;
+			} else if (drop && sent >= 0) {
+				printf("send loop error expected: %i\n", sent);
+				errno = -EIO;
+				goto out_errno;
 			}
-			s->bytes_sent += sent;
+			if (sent > 0)
+				s->bytes_sent += sent;
 		}
 		clock_gettime(CLOCK_MONOTONIC, &s->end);
 	} else {
@@ -272,6 +358,26 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
 			}
 
 			s->bytes_recvd += recv;
+
+			if (data_test) {
+				int j;
+
+				for (i = 0; i < msg.msg_iovlen; i++) {
+					unsigned char *d = iov[i].iov_base;
+
+					for (j = 0;
+					     j < iov[i].iov_len && recv; j++) {
+						if (d[j] != k++) {
+							errno = -EIO;
+							fprintf(stderr,
+								"detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n",
+								i, j, d[j], k - 1, d[j+1], k + 1);
+							goto out_errno;
+						}
+						recv--;
+					}
+				}
+			}
 		}
 		clock_gettime(CLOCK_MONOTONIC, &s->end);
 	}
@@ -300,7 +406,7 @@ static inline float recvdBps(struct msg_stats s)
 }
 
 static int sendmsg_test(int iov_count, int iov_buf, int cnt,
-			int verbose, bool base)
+			struct sockmap_options *opt)
 {
 	float sent_Bps = 0, recvd_Bps = 0;
 	int rx_fd, txpid, rxpid, err = 0;
@@ -309,14 +415,20 @@ static int sendmsg_test(int iov_count, int iov_buf, int cnt,
 
 	errno = 0;
 
-	if (base)
+	if (opt->base)
 		rx_fd = p1;
 	else
 		rx_fd = p2;
 
 	rxpid = fork();
 	if (rxpid == 0) {
-		err = msg_loop(rx_fd, iov_count, iov_buf, cnt, &s, false);
+		if (opt->drop_expected)
+			exit(1);
+
+		if (opt->sendpage)
+			iov_count = 1;
+		err = msg_loop(rx_fd, iov_count, iov_buf,
+			       cnt, &s, false, opt);
 		if (err)
 			fprintf(stderr,
 				"msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n",
@@ -339,7 +451,12 @@ static int sendmsg_test(int iov_count, int iov_buf, int cnt,
 
 	txpid = fork();
 	if (txpid == 0) {
-		err = msg_loop(c1, iov_count, iov_buf, cnt, &s, true);
+		if (opt->sendpage)
+			err = msg_loop_sendpage(c1, iov_buf, cnt, &s, opt);
+		else
+			err = msg_loop(c1, iov_count, iov_buf,
+				       cnt, &s, true, opt);
+
 		if (err)
 			fprintf(stderr,
 				"msg_loop_tx: iov_count %i iov_buf %i cnt %i err %i\n",
@@ -364,7 +481,7 @@ static int sendmsg_test(int iov_count, int iov_buf, int cnt,
 	return err;
 }
 
-static int forever_ping_pong(int rate, int verbose)
+static int forever_ping_pong(int rate, struct sockmap_options *opt)
 {
 	struct timeval timeout;
 	char buf[1024] = {0};
@@ -429,7 +546,7 @@ static int forever_ping_pong(int rate, int verbose)
 		if (rate)
 			sleep(rate);
 
-		if (verbose) {
+		if (opt->verbose) {
 			printf(".");
 			fflush(stdout);
 
@@ -443,20 +560,34 @@ enum {
 	PING_PONG,
 	SENDMSG,
 	BASE,
+	BASE_SENDPAGE,
+	SENDPAGE,
 };
 
 int main(int argc, char **argv)
 {
-	int iov_count = 1, length = 1024, rate = 1, verbose = 0;
+	int iov_count = 1, length = 1024, rate = 1, tx_prog_fd;
 	struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
 	int opt, longindex, err, cg_fd = 0;
+	struct sockmap_options options = {0};
 	int test = PING_PONG;
 	char filename[256];
 
-	while ((opt = getopt_long(argc, argv, "hvc:r:i:l:t:",
+	while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:",
 				  long_options, &longindex)) != -1) {
 		switch (opt) {
-		/* Cgroup configuration */
+		case 's':
+			txmsg_start = atoi(optarg);
+			break;
+		case 'e':
+			txmsg_end = atoi(optarg);
+			break;
+		case 'a':
+			txmsg_apply = atoi(optarg);
+			break;
+		case 'k':
+			txmsg_cork = atoi(optarg);
+			break;
 		case 'c':
 			cg_fd = open(optarg, O_DIRECTORY, O_RDONLY);
 			if (cg_fd < 0) {
@@ -470,7 +601,7 @@ int main(int argc, char **argv)
 			rate = atoi(optarg);
 			break;
 		case 'v':
-			verbose = 1;
+			options.verbose = 1;
 			break;
 		case 'i':
 			iov_count = atoi(optarg);
@@ -478,6 +609,9 @@ int main(int argc, char **argv)
 		case 'l':
 			length = atoi(optarg);
 			break;
+		case 'd':
+			options.data_test = true;
+			break;
 		case 't':
 			if (strcmp(optarg, "ping") == 0) {
 				test = PING_PONG;
@@ -485,11 +619,17 @@ int main(int argc, char **argv)
 				test = SENDMSG;
 			} else if (strcmp(optarg, "base") == 0) {
 				test = BASE;
+			} else if (strcmp(optarg, "base_sendpage") == 0) {
+				test = BASE_SENDPAGE;
+			} else if (strcmp(optarg, "sendpage") == 0) {
+				test = SENDPAGE;
 			} else {
 				usage(argv);
 				return -1;
 			}
 			break;
+		case 0:
+			break;
 		case 'h':
 		default:
 			usage(argv);
@@ -515,16 +655,16 @@ int main(int argc, char **argv)
 	/* catch SIGINT */
 	signal(SIGINT, running_handler);
 
-	/* If base test skip BPF setup */
-	if (test == BASE)
-		goto run;
-
 	if (load_bpf_file(filename)) {
 		fprintf(stderr, "load_bpf_file: (%s) %s\n",
 			filename, strerror(errno));
 		return 1;
 	}
 
+	/* If base test skip BPF setup */
+	if (test == BASE || test == BASE_SENDPAGE)
+		goto run;
+
 	/* Attach programs to sockmap */
 	err = bpf_prog_attach(prog_fd[0], map_fd[0],
 				BPF_SK_SKB_STREAM_PARSER, 0);
@@ -557,15 +697,129 @@ run:
 		goto out;
 	}
 
-	if (test == PING_PONG)
-		err = forever_ping_pong(rate, verbose);
-	else if (test == SENDMSG)
-		err = sendmsg_test(iov_count, length, rate, verbose, false);
-	else if (test == BASE)
-		err = sendmsg_test(iov_count, length, rate, verbose, true);
+	/* Attach txmsg program to sockmap */
+	if (txmsg_pass)
+		tx_prog_fd = prog_fd[3];
+	else if (txmsg_noisy)
+		tx_prog_fd = prog_fd[4];
+	else if (txmsg_redir)
+		tx_prog_fd = prog_fd[5];
+	else if (txmsg_redir_noisy)
+		tx_prog_fd = prog_fd[6];
+	else if (txmsg_drop)
+		tx_prog_fd = prog_fd[9];
+	/* apply and cork must be last */
+	else if (txmsg_apply)
+		tx_prog_fd = prog_fd[7];
+	else if (txmsg_cork)
+		tx_prog_fd = prog_fd[8];
 	else
+		tx_prog_fd = 0;
+
+	if (tx_prog_fd) {
+		int redir_fd, i = 0;
+
+		err = bpf_prog_attach(tx_prog_fd,
+				      map_fd[1], BPF_SK_MSG_VERDICT, 0);
+		if (err) {
+			fprintf(stderr,
+				"ERROR: bpf_prog_attach (txmsg): %d (%s)\n",
+				err, strerror(errno));
+			return err;
+		}
+
+		err = bpf_map_update_elem(map_fd[1], &i, &c1, BPF_ANY);
+		if (err) {
+			fprintf(stderr,
+				"ERROR: bpf_map_update_elem (txmsg):  %d (%s\n",
+				err, strerror(errno));
+			return err;
+		}
+
+		if (txmsg_redir || txmsg_redir_noisy)
+			redir_fd = c2;
+		else
+			redir_fd = c1;
+
+		err = bpf_map_update_elem(map_fd[2], &i, &redir_fd, BPF_ANY);
+		if (err) {
+			fprintf(stderr,
+				"ERROR: bpf_map_update_elem (txmsg):  %d (%s\n",
+				err, strerror(errno));
+			return err;
+		}
+
+		if (txmsg_apply) {
+			err = bpf_map_update_elem(map_fd[3],
+						  &i, &txmsg_apply, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (apply_bytes):  %d (%s\n",
+					err, strerror(errno));
+				return err;
+			}
+		}
+
+		if (txmsg_cork) {
+			err = bpf_map_update_elem(map_fd[4],
+						  &i, &txmsg_cork, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (cork_bytes):  %d (%s\n",
+					err, strerror(errno));
+				return err;
+			}
+		}
+
+		if (txmsg_start) {
+			err = bpf_map_update_elem(map_fd[5],
+						  &i, &txmsg_start, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (txmsg_start):  %d (%s)\n",
+					err, strerror(errno));
+				return err;
+			}
+		}
+
+		if (txmsg_end) {
+			i = 1;
+			err = bpf_map_update_elem(map_fd[5],
+						  &i, &txmsg_end, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (txmsg_end):  %d (%s)\n",
+					err, strerror(errno));
+				return err;
+			}
+		}
+	}
+
+	if (txmsg_drop)
+		options.drop_expected = true;
+
+	if (test == PING_PONG)
+		err = forever_ping_pong(rate, &options);
+	else if (test == SENDMSG) {
+		options.base = false;
+		options.sendpage = false;
+		err = sendmsg_test(iov_count, length, rate, &options);
+	} else if (test == SENDPAGE) {
+		options.base = false;
+		options.sendpage = true;
+		err = sendmsg_test(iov_count, length, rate, &options);
+	} else if (test == BASE) {
+		options.base = true;
+		options.sendpage = false;
+		err = sendmsg_test(iov_count, length, rate, &options);
+	} else if (test == BASE_SENDPAGE) {
+		options.base = true;
+		options.sendpage = true;
+		err = sendmsg_test(iov_count, length, rate, &options);
+	} else
 		fprintf(stderr, "unknown test\n");
 out:
+	bpf_prog_detach2(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS);
 	close(s1);
 	close(s2);
 	close(p1);
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 8644d864e3c1..b4d7b6242a40 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -6743,6 +6743,7 @@ static void __net_exit selinux_nf_unregister(struct net *net)
 static struct pernet_operations selinux_net_ops = {
 	.init = selinux_nf_register,
 	.exit = selinux_nf_unregister,
+	.async = true,
 };
 
 static int __init selinux_nf_ip_init(void)
diff --git a/security/smack/smack_netfilter.c b/security/smack/smack_netfilter.c
index e36d17835d4f..3f29c03162ca 100644
--- a/security/smack/smack_netfilter.c
+++ b/security/smack/smack_netfilter.c
@@ -89,6 +89,7 @@ static void __net_exit smack_nf_unregister(struct net *net)
 static struct pernet_operations smack_net_ops = {
 	.init = smack_nf_register,
 	.exit = smack_nf_unregister,
+	.async = true,
 };
 
 static int __init smack_nf_ip_init(void)
diff --git a/security/tomoyo/network.c b/security/tomoyo/network.c
index cd6932e5225c..9094f4b3b367 100644
--- a/security/tomoyo/network.c
+++ b/security/tomoyo/network.c
@@ -655,10 +655,11 @@ int tomoyo_socket_listen_permission(struct socket *sock)
 		return 0;
 	{
 		const int error = sock->ops->getname(sock, (struct sockaddr *)
-						     &addr, &addr_len, 0);
+						     &addr, 0);
 
-		if (error)
+		if (error < 0)
 			return error;
+		addr_len = error;
 	}
 	address.protocol = type;
 	address.operation = TOMOYO_NETWORK_LISTEN;
diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile
index c8ec0ae16bf0..1ea545965ee3 100644
--- a/tools/bpf/Makefile
+++ b/tools/bpf/Makefile
@@ -1,19 +1,28 @@
 # SPDX-License-Identifier: GPL-2.0
-prefix = /usr
+include ../scripts/Makefile.include
+
+prefix ?= /usr/local
 
 CC = gcc
 LEX = flex
 YACC = bison
 MAKE = make
+INSTALL ?= install
 
 CFLAGS += -Wall -O2
-CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include
+CFLAGS += -D__EXPORTED_HEADERS__ -I$(srctree)/include/uapi -I$(srctree)/include
 
 ifeq ($(srctree),)
 srctree := $(patsubst %/,%,$(dir $(CURDIR)))
 srctree := $(patsubst %/,%,$(dir $(srctree)))
 endif
 
+ifeq ($(V),1)
+  Q =
+else
+  Q = @
+endif
+
 FEATURE_USER = .bpf
 FEATURE_TESTS = libbfd disassembler-four-args
 FEATURE_DISPLAY = libbfd disassembler-four-args
@@ -38,40 +47,59 @@ ifeq ($(feature-disassembler-four-args), 1)
 CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE
 endif
 
-%.yacc.c: %.y
-	$(YACC) -o $@ -d $<
+$(OUTPUT)%.yacc.c: $(srctree)/tools/bpf/%.y
+	$(QUIET_BISON)$(YACC) -o $@ -d $<
 
-%.lex.c: %.l
-	$(LEX) -o $@ $<
+$(OUTPUT)%.lex.c: $(srctree)/tools/bpf/%.l
+	$(QUIET_FLEX)$(LEX) -o $@ $<
 
-all: bpf_jit_disasm bpf_dbg bpf_asm bpftool
+$(OUTPUT)%.o: $(srctree)/tools/bpf/%.c
+	$(QUIET_CC)$(COMPILE.c) -o $@ $<
 
-bpf_jit_disasm : CFLAGS += -DPACKAGE='bpf_jit_disasm'
-bpf_jit_disasm : LDLIBS = -lopcodes -lbfd -ldl
-bpf_jit_disasm : bpf_jit_disasm.o
+$(OUTPUT)%.yacc.o: $(OUTPUT)%.yacc.c
+	$(QUIET_CC)$(COMPILE.c) -o $@ $<
+$(OUTPUT)%.lex.o: $(OUTPUT)%.lex.c
+	$(QUIET_CC)$(COMPILE.c) -o $@ $<
 
-bpf_dbg : LDLIBS = -lreadline
-bpf_dbg : bpf_dbg.o
+PROGS = $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg $(OUTPUT)bpf_asm
 
-bpf_asm : LDLIBS =
-bpf_asm : bpf_asm.o bpf_exp.yacc.o bpf_exp.lex.o
-bpf_exp.lex.o : bpf_exp.yacc.c
+all: $(PROGS) bpftool
 
-clean: bpftool_clean
-	rm -rf *.o bpf_jit_disasm bpf_dbg bpf_asm bpf_exp.yacc.* bpf_exp.lex.*
+$(OUTPUT)bpf_jit_disasm: CFLAGS += -DPACKAGE='bpf_jit_disasm'
+$(OUTPUT)bpf_jit_disasm: $(OUTPUT)bpf_jit_disasm.o
+	$(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $^ -lopcodes -lbfd -ldl
 
-install: bpftool_install
-	install bpf_jit_disasm $(prefix)/bin/bpf_jit_disasm
-	install bpf_dbg $(prefix)/bin/bpf_dbg
-	install bpf_asm $(prefix)/bin/bpf_asm
+$(OUTPUT)bpf_dbg: $(OUTPUT)bpf_dbg.o
+	$(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $^ -lreadline
+
+$(OUTPUT)bpf_asm: $(OUTPUT)bpf_asm.o $(OUTPUT)bpf_exp.yacc.o $(OUTPUT)bpf_exp.lex.o
+	$(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $^
+
+$(OUTPUT)bpf_exp.lex.c: $(OUTPUT)bpf_exp.yacc.c
+
+clean: bpftool_clean
+	$(call QUIET_CLEAN, bpf-progs)
+	$(Q)rm -rf $(OUTPUT)*.o $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg \
+	       $(OUTPUT)bpf_asm $(OUTPUT)bpf_exp.yacc.* $(OUTPUT)bpf_exp.lex.*
+	$(call QUIET_CLEAN, core-gen)
+	$(Q)rm -f $(OUTPUT)FEATURE-DUMP.bpf
+
+install: $(PROGS) bpftool_install
+	$(call QUIET_INSTALL, bpf_jit_disasm)
+	$(Q)$(INSTALL) -m 0755 -d $(DESTDIR)$(prefix)/bin
+	$(Q)$(INSTALL) $(OUTPUT)bpf_jit_disasm $(DESTDIR)$(prefix)/bin/bpf_jit_disasm
+	$(call QUIET_INSTALL, bpf_dbg)
+	$(Q)$(INSTALL) $(OUTPUT)bpf_dbg $(DESTDIR)$(prefix)/bin/bpf_dbg
+	$(call QUIET_INSTALL, bpf_asm)
+	$(Q)$(INSTALL) $(OUTPUT)bpf_asm $(DESTDIR)$(prefix)/bin/bpf_asm
 
 bpftool:
-	$(MAKE) -C bpftool
+	$(call descend,bpftool)
 
 bpftool_install:
-	$(MAKE) -C bpftool install
+	$(call descend,bpftool,install)
 
 bpftool_clean:
-	$(MAKE) -C bpftool clean
+	$(call descend,bpftool,clean)
 
-.PHONY: bpftool FORCE
+.PHONY: all install clean bpftool bpftool_install bpftool_clean
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index e4ceee7f2dff..67ca6c69376c 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -21,7 +21,7 @@ MAP COMMANDS
 =============
 
 |	**bpftool** **prog { show | list }** [*PROG*]
-|	**bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes**}]
+|	**bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes** | **visual**}]
 |	**bpftool** **prog dump jited**  *PROG* [{**file** *FILE* | **opcodes**}]
 |	**bpftool** **prog pin** *PROG* *FILE*
 |	**bpftool** **prog load** *OBJ* *FILE*
@@ -39,12 +39,18 @@ DESCRIPTION
 		  Output will start with program ID followed by program type and
 		  zero or more named attributes (depending on kernel version).
 
-	**bpftool prog dump xlated** *PROG* [{ **file** *FILE* | **opcodes** }]
-		  Dump eBPF instructions of the program from the kernel.
-		  If *FILE* is specified image will be written to a file,
-		  otherwise it will be disassembled and printed to stdout.
+	**bpftool prog dump xlated** *PROG* [{ **file** *FILE* | **opcodes** | **visual** }]
+		  Dump eBPF instructions of the program from the kernel. By
+		  default, eBPF will be disassembled and printed to standard
+		  output in human-readable format. In this case, **opcodes**
+		  controls if raw opcodes should be printed as well.
 
-		  **opcodes** controls if raw opcodes will be printed.
+		  If **file** is specified, the binary image will instead be
+		  written to *FILE*.
+
+		  If **visual** is specified, control flow graph (CFG) will be
+		  built instead, and eBPF instructions will be presented with
+		  CFG in DOT format, on standard output.
 
 	**bpftool prog dump jited**  *PROG* [{ **file** *FILE* | **opcodes** }]
 		  Dump jited image (host machine code) of the program.
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 26901ec87361..4e69782c4a79 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -38,7 +38,7 @@ bash_compdir ?= /usr/share/bash-completion/completions
 CC = gcc
 
 CFLAGS += -O2
-CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow
+CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow -Wno-missing-field-initializers
 CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ -I$(srctree)/tools/include/uapi -I$(srctree)/tools/include -I$(srctree)/tools/lib/bpf -I$(srctree)/kernel/bpf/
 CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"'
 LIBS = -lelf -lbfd -lopcodes $(LIBBPF)
@@ -70,7 +70,7 @@ ifeq ($(feature-disassembler-four-args), 1)
 CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE
 endif
 
-include $(wildcard *.d)
+include $(wildcard $(OUTPUT)*.d)
 
 all: $(OUTPUT)bpftool
 
@@ -89,6 +89,8 @@ $(OUTPUT)%.o: %.c
 clean: $(LIBBPF)-clean
 	$(call QUIET_CLEAN, bpftool)
 	$(Q)$(RM) $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d
+	$(call QUIET_CLEAN, core-gen)
+	$(Q)$(RM) $(OUTPUT)FEATURE-DUMP.bpftool
 
 install: $(OUTPUT)bpftool
 	$(call QUIET_INSTALL, bpftool)
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 08719c54a614..490811b45fa7 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -147,7 +147,7 @@ _bpftool()
 
     # Deal with simplest keywords
     case $prev in
-        help|key|opcodes)
+        help|key|opcodes|visual)
             return 0
             ;;
         tag)
@@ -223,11 +223,16 @@ _bpftool()
                             return 0
                             ;;
                     *)
-                            _bpftool_once_attr 'file'
+                        _bpftool_once_attr 'file'
+                        if _bpftool_search_list 'xlated'; then
+                            COMPREPLY+=( $( compgen -W 'opcodes visual' -- \
+                                "$cur" ) )
+                        else
                             COMPREPLY+=( $( compgen -W 'opcodes' -- \
                                 "$cur" ) )
-                            return 0
-                            ;;
+                        fi
+                        return 0
+                        ;;
                     esac
                     ;;
                 pin)
diff --git a/tools/bpf/bpftool/cfg.c b/tools/bpf/bpftool/cfg.c
new file mode 100644
index 000000000000..f30b3a4a840b
--- /dev/null
+++ b/tools/bpf/bpftool/cfg.c
@@ -0,0 +1,514 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * Copyright (C) 2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/list.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "cfg.h"
+#include "main.h"
+#include "xlated_dumper.h"
+
+struct cfg {
+	struct list_head funcs;
+	int func_num;
+};
+
+struct func_node {
+	struct list_head l;
+	struct list_head bbs;
+	struct bpf_insn *start;
+	struct bpf_insn *end;
+	int idx;
+	int bb_num;
+};
+
+struct bb_node {
+	struct list_head l;
+	struct list_head e_prevs;
+	struct list_head e_succs;
+	struct bpf_insn *head;
+	struct bpf_insn *tail;
+	int idx;
+};
+
+#define EDGE_FLAG_EMPTY		0x0
+#define EDGE_FLAG_FALLTHROUGH	0x1
+#define EDGE_FLAG_JUMP		0x2
+struct edge_node {
+	struct list_head l;
+	struct bb_node *src;
+	struct bb_node *dst;
+	int flags;
+};
+
+#define ENTRY_BLOCK_INDEX	0
+#define EXIT_BLOCK_INDEX	1
+#define NUM_FIXED_BLOCKS	2
+#define func_prev(func)		list_prev_entry(func, l)
+#define func_next(func)		list_next_entry(func, l)
+#define bb_prev(bb)		list_prev_entry(bb, l)
+#define bb_next(bb)		list_next_entry(bb, l)
+#define entry_bb(func)		func_first_bb(func)
+#define exit_bb(func)		func_last_bb(func)
+#define cfg_first_func(cfg)	\
+	list_first_entry(&cfg->funcs, struct func_node, l)
+#define cfg_last_func(cfg)	\
+	list_last_entry(&cfg->funcs, struct func_node, l)
+#define func_first_bb(func)	\
+	list_first_entry(&func->bbs, struct bb_node, l)
+#define func_last_bb(func)	\
+	list_last_entry(&func->bbs, struct bb_node, l)
+
+static struct func_node *cfg_append_func(struct cfg *cfg, struct bpf_insn *insn)
+{
+	struct func_node *new_func, *func;
+
+	list_for_each_entry(func, &cfg->funcs, l) {
+		if (func->start == insn)
+			return func;
+		else if (func->start > insn)
+			break;
+	}
+
+	func = func_prev(func);
+	new_func = calloc(1, sizeof(*new_func));
+	if (!new_func) {
+		p_err("OOM when allocating FUNC node");
+		return NULL;
+	}
+	new_func->start = insn;
+	new_func->idx = cfg->func_num;
+	list_add(&new_func->l, &func->l);
+	cfg->func_num++;
+
+	return new_func;
+}
+
+static struct bb_node *func_append_bb(struct func_node *func,
+				      struct bpf_insn *insn)
+{
+	struct bb_node *new_bb, *bb;
+
+	list_for_each_entry(bb, &func->bbs, l) {
+		if (bb->head == insn)
+			return bb;
+		else if (bb->head > insn)
+			break;
+	}
+
+	bb = bb_prev(bb);
+	new_bb = calloc(1, sizeof(*new_bb));
+	if (!new_bb) {
+		p_err("OOM when allocating BB node");
+		return NULL;
+	}
+	new_bb->head = insn;
+	INIT_LIST_HEAD(&new_bb->e_prevs);
+	INIT_LIST_HEAD(&new_bb->e_succs);
+	list_add(&new_bb->l, &bb->l);
+
+	return new_bb;
+}
+
+static struct bb_node *func_insert_dummy_bb(struct list_head *after)
+{
+	struct bb_node *bb;
+
+	bb = calloc(1, sizeof(*bb));
+	if (!bb) {
+		p_err("OOM when allocating BB node");
+		return NULL;
+	}
+
+	INIT_LIST_HEAD(&bb->e_prevs);
+	INIT_LIST_HEAD(&bb->e_succs);
+	list_add(&bb->l, after);
+
+	return bb;
+}
+
+static bool cfg_partition_funcs(struct cfg *cfg, struct bpf_insn *cur,
+				struct bpf_insn *end)
+{
+	struct func_node *func, *last_func;
+
+	func = cfg_append_func(cfg, cur);
+	if (!func)
+		return true;
+
+	for (; cur < end; cur++) {
+		if (cur->code != (BPF_JMP | BPF_CALL))
+			continue;
+		if (cur->src_reg != BPF_PSEUDO_CALL)
+			continue;
+		func = cfg_append_func(cfg, cur + cur->off + 1);
+		if (!func)
+			return true;
+	}
+
+	last_func = cfg_last_func(cfg);
+	last_func->end = end - 1;
+	func = cfg_first_func(cfg);
+	list_for_each_entry_from(func, &last_func->l, l) {
+		func->end = func_next(func)->start - 1;
+	}
+
+	return false;
+}
+
+static bool func_partition_bb_head(struct func_node *func)
+{
+	struct bpf_insn *cur, *end;
+	struct bb_node *bb;
+
+	cur = func->start;
+	end = func->end;
+	INIT_LIST_HEAD(&func->bbs);
+	bb = func_append_bb(func, cur);
+	if (!bb)
+		return true;
+
+	for (; cur <= end; cur++) {
+		if (BPF_CLASS(cur->code) == BPF_JMP) {
+			u8 opcode = BPF_OP(cur->code);
+
+			if (opcode == BPF_EXIT || opcode == BPF_CALL)
+				continue;
+
+			bb = func_append_bb(func, cur + cur->off + 1);
+			if (!bb)
+				return true;
+
+			if (opcode != BPF_JA) {
+				bb = func_append_bb(func, cur + 1);
+				if (!bb)
+					return true;
+			}
+		}
+	}
+
+	return false;
+}
+
+static void func_partition_bb_tail(struct func_node *func)
+{
+	unsigned int bb_idx = NUM_FIXED_BLOCKS;
+	struct bb_node *bb, *last;
+
+	last = func_last_bb(func);
+	last->tail = func->end;
+	bb = func_first_bb(func);
+	list_for_each_entry_from(bb, &last->l, l) {
+		bb->tail = bb_next(bb)->head - 1;
+		bb->idx = bb_idx++;
+	}
+
+	last->idx = bb_idx++;
+	func->bb_num = bb_idx;
+}
+
+static bool func_add_special_bb(struct func_node *func)
+{
+	struct bb_node *bb;
+
+	bb = func_insert_dummy_bb(&func->bbs);
+	if (!bb)
+		return true;
+	bb->idx = ENTRY_BLOCK_INDEX;
+
+	bb = func_insert_dummy_bb(&func_last_bb(func)->l);
+	if (!bb)
+		return true;
+	bb->idx = EXIT_BLOCK_INDEX;
+
+	return false;
+}
+
+static bool func_partition_bb(struct func_node *func)
+{
+	if (func_partition_bb_head(func))
+		return true;
+
+	func_partition_bb_tail(func);
+
+	return false;
+}
+
+static struct bb_node *func_search_bb_with_head(struct func_node *func,
+						struct bpf_insn *insn)
+{
+	struct bb_node *bb;
+
+	list_for_each_entry(bb, &func->bbs, l) {
+		if (bb->head == insn)
+			return bb;
+	}
+
+	return NULL;
+}
+
+static struct edge_node *new_edge(struct bb_node *src, struct bb_node *dst,
+				  int flags)
+{
+	struct edge_node *e;
+
+	e = calloc(1, sizeof(*e));
+	if (!e) {
+		p_err("OOM when allocating edge node");
+		return NULL;
+	}
+
+	if (src)
+		e->src = src;
+	if (dst)
+		e->dst = dst;
+
+	e->flags |= flags;
+
+	return e;
+}
+
+static bool func_add_bb_edges(struct func_node *func)
+{
+	struct bpf_insn *insn;
+	struct edge_node *e;
+	struct bb_node *bb;
+
+	bb = entry_bb(func);
+	e = new_edge(bb, bb_next(bb), EDGE_FLAG_FALLTHROUGH);
+	if (!e)
+		return true;
+	list_add_tail(&e->l, &bb->e_succs);
+
+	bb = exit_bb(func);
+	e = new_edge(bb_prev(bb), bb, EDGE_FLAG_FALLTHROUGH);
+	if (!e)
+		return true;
+	list_add_tail(&e->l, &bb->e_prevs);
+
+	bb = entry_bb(func);
+	bb = bb_next(bb);
+	list_for_each_entry_from(bb, &exit_bb(func)->l, l) {
+		e = new_edge(bb, NULL, EDGE_FLAG_EMPTY);
+		if (!e)
+			return true;
+		e->src = bb;
+
+		insn = bb->tail;
+		if (BPF_CLASS(insn->code) != BPF_JMP ||
+		    BPF_OP(insn->code) == BPF_EXIT) {
+			e->dst = bb_next(bb);
+			e->flags |= EDGE_FLAG_FALLTHROUGH;
+			list_add_tail(&e->l, &bb->e_succs);
+			continue;
+		} else if (BPF_OP(insn->code) == BPF_JA) {
+			e->dst = func_search_bb_with_head(func,
+							  insn + insn->off + 1);
+			e->flags |= EDGE_FLAG_JUMP;
+			list_add_tail(&e->l, &bb->e_succs);
+			continue;
+		}
+
+		e->dst = bb_next(bb);
+		e->flags |= EDGE_FLAG_FALLTHROUGH;
+		list_add_tail(&e->l, &bb->e_succs);
+
+		e = new_edge(bb, NULL, EDGE_FLAG_JUMP);
+		if (!e)
+			return true;
+		e->src = bb;
+		e->dst = func_search_bb_with_head(func, insn + insn->off + 1);
+		list_add_tail(&e->l, &bb->e_succs);
+	}
+
+	return false;
+}
+
+static bool cfg_build(struct cfg *cfg, struct bpf_insn *insn, unsigned int len)
+{
+	int cnt = len / sizeof(*insn);
+	struct func_node *func;
+
+	INIT_LIST_HEAD(&cfg->funcs);
+
+	if (cfg_partition_funcs(cfg, insn, insn + cnt))
+		return true;
+
+	list_for_each_entry(func, &cfg->funcs, l) {
+		if (func_partition_bb(func) || func_add_special_bb(func))
+			return true;
+
+		if (func_add_bb_edges(func))
+			return true;
+	}
+
+	return false;
+}
+
+static void cfg_destroy(struct cfg *cfg)
+{
+	struct func_node *func, *func2;
+
+	list_for_each_entry_safe(func, func2, &cfg->funcs, l) {
+		struct bb_node *bb, *bb2;
+
+		list_for_each_entry_safe(bb, bb2, &func->bbs, l) {
+			struct edge_node *e, *e2;
+
+			list_for_each_entry_safe(e, e2, &bb->e_prevs, l) {
+				list_del(&e->l);
+				free(e);
+			}
+
+			list_for_each_entry_safe(e, e2, &bb->e_succs, l) {
+				list_del(&e->l);
+				free(e);
+			}
+
+			list_del(&bb->l);
+			free(bb);
+		}
+
+		list_del(&func->l);
+		free(func);
+	}
+}
+
+static void draw_bb_node(struct func_node *func, struct bb_node *bb)
+{
+	const char *shape;
+
+	if (bb->idx == ENTRY_BLOCK_INDEX || bb->idx == EXIT_BLOCK_INDEX)
+		shape = "Mdiamond";
+	else
+		shape = "record";
+
+	printf("\tfn_%d_bb_%d [shape=%s,style=filled,label=\"",
+	       func->idx, bb->idx, shape);
+
+	if (bb->idx == ENTRY_BLOCK_INDEX) {
+		printf("ENTRY");
+	} else if (bb->idx == EXIT_BLOCK_INDEX) {
+		printf("EXIT");
+	} else {
+		unsigned int start_idx;
+		struct dump_data dd = {};
+
+		printf("{");
+		kernel_syms_load(&dd);
+		start_idx = bb->head - func->start;
+		dump_xlated_for_graph(&dd, bb->head, bb->tail, start_idx);
+		kernel_syms_destroy(&dd);
+		printf("}");
+	}
+
+	printf("\"];\n\n");
+}
+
+static void draw_bb_succ_edges(struct func_node *func, struct bb_node *bb)
+{
+	const char *style = "\"solid,bold\"";
+	const char *color = "black";
+	int func_idx = func->idx;
+	struct edge_node *e;
+	int weight = 10;
+
+	if (list_empty(&bb->e_succs))
+		return;
+
+	list_for_each_entry(e, &bb->e_succs, l) {
+		printf("\tfn_%d_bb_%d:s -> fn_%d_bb_%d:n [style=%s, color=%s, weight=%d, constraint=true",
+		       func_idx, e->src->idx, func_idx, e->dst->idx,
+		       style, color, weight);
+		printf("];\n");
+	}
+}
+
+static void func_output_bb_def(struct func_node *func)
+{
+	struct bb_node *bb;
+
+	list_for_each_entry(bb, &func->bbs, l) {
+		draw_bb_node(func, bb);
+	}
+}
+
+static void func_output_edges(struct func_node *func)
+{
+	int func_idx = func->idx;
+	struct bb_node *bb;
+
+	list_for_each_entry(bb, &func->bbs, l) {
+		draw_bb_succ_edges(func, bb);
+	}
+
+	/* Add an invisible edge from ENTRY to EXIT, this is to
+	 * improve the graph layout.
+	 */
+	printf("\tfn_%d_bb_%d:s -> fn_%d_bb_%d:n [style=\"invis\", constraint=true];\n",
+	       func_idx, ENTRY_BLOCK_INDEX, func_idx, EXIT_BLOCK_INDEX);
+}
+
+static void cfg_dump(struct cfg *cfg)
+{
+	struct func_node *func;
+
+	printf("digraph \"DOT graph for eBPF program\" {\n");
+	list_for_each_entry(func, &cfg->funcs, l) {
+		printf("subgraph \"cluster_%d\" {\n\tstyle=\"dashed\";\n\tcolor=\"black\";\n\tlabel=\"func_%d ()\";\n",
+		       func->idx, func->idx);
+		func_output_bb_def(func);
+		func_output_edges(func);
+		printf("}\n");
+	}
+	printf("}\n");
+}
+
+void dump_xlated_cfg(void *buf, unsigned int len)
+{
+	struct bpf_insn *insn = buf;
+	struct cfg cfg;
+
+	memset(&cfg, 0, sizeof(cfg));
+	if (cfg_build(&cfg, insn, len))
+		return;
+
+	cfg_dump(&cfg);
+
+	cfg_destroy(&cfg);
+}
diff --git a/tools/bpf/bpftool/cfg.h b/tools/bpf/bpftool/cfg.h
new file mode 100644
index 000000000000..2cc9bd990b13
--- /dev/null
+++ b/tools/bpf/bpftool/cfg.h
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * Copyright (C) 2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __BPF_TOOL_CFG_H
+#define __BPF_TOOL_CFG_H
+
+void dump_xlated_cfg(void *buf, unsigned int len);
+
+#endif /* __BPF_TOOL_CFG_H */
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 185acfa229b5..1ec852d21d44 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -46,6 +46,9 @@
 
 #include "main.h"
 
+#define BATCH_LINE_LEN_MAX 65536
+#define BATCH_ARG_NB_MAX 4096
+
 const char *bin_name;
 static int last_argc;
 static char **last_argv;
@@ -157,6 +160,54 @@ void fprint_hex(FILE *f, void *arg, unsigned int n, const char *sep)
 	}
 }
 
+/* Split command line into argument vector. */
+static int make_args(char *line, char *n_argv[], int maxargs, int cmd_nb)
+{
+	static const char ws[] = " \t\r\n";
+	char *cp = line;
+	int n_argc = 0;
+
+	while (*cp) {
+		/* Skip leading whitespace. */
+		cp += strspn(cp, ws);
+
+		if (*cp == '\0')
+			break;
+
+		if (n_argc >= (maxargs - 1)) {
+			p_err("too many arguments to command %d", cmd_nb);
+			return -1;
+		}
+
+		/* Word begins with quote. */
+		if (*cp == '\'' || *cp == '"') {
+			char quote = *cp++;
+
+			n_argv[n_argc++] = cp;
+			/* Find ending quote. */
+			cp = strchr(cp, quote);
+			if (!cp) {
+				p_err("unterminated quoted string in command %d",
+				      cmd_nb);
+				return -1;
+			}
+		} else {
+			n_argv[n_argc++] = cp;
+
+			/* Find end of word. */
+			cp += strcspn(cp, ws);
+			if (*cp == '\0')
+				break;
+		}
+
+		/* Separate words. */
+		*cp++ = 0;
+	}
+	n_argv[n_argc] = NULL;
+
+	return n_argc;
+}
+
 static int do_batch(int argc, char **argv);
 
 static const struct cmd cmds[] = {
@@ -171,11 +222,12 @@ static const struct cmd cmds[] = {
 
 static int do_batch(int argc, char **argv)
 {
+	char buf[BATCH_LINE_LEN_MAX], contline[BATCH_LINE_LEN_MAX];
+	char *n_argv[BATCH_ARG_NB_MAX];
 	unsigned int lines = 0;
-	char *n_argv[4096];
-	char buf[65536];
 	int n_argc;
 	FILE *fp;
+	char *cp;
 	int err;
 	int i;
 
@@ -191,7 +243,10 @@ static int do_batch(int argc, char **argv)
 	}
 	NEXT_ARG();
 
-	fp = fopen(*argv, "r");
+	if (!strcmp(*argv, "-"))
+		fp = stdin;
+	else
+		fp = fopen(*argv, "r");
 	if (!fp) {
 		p_err("Can't open file (%s): %s", *argv, strerror(errno));
 		return -1;
@@ -200,27 +255,45 @@ static int do_batch(int argc, char **argv)
 	if (json_output)
 		jsonw_start_array(json_wtr);
 	while (fgets(buf, sizeof(buf), fp)) {
+		cp = strchr(buf, '#');
+		if (cp)
+			*cp = '\0';
+
 		if (strlen(buf) == sizeof(buf) - 1) {
 			errno = E2BIG;
 			break;
 		}
 
-		n_argc = 0;
-		n_argv[n_argc] = strtok(buf, " \t\n");
-
-		while (n_argv[n_argc]) {
-			n_argc++;
-			if (n_argc == ARRAY_SIZE(n_argv)) {
-				p_err("line %d has too many arguments, skip",
+		/* Append continuation lines if any (coming after a line ending
+		 * with '\' in the batch file).
+		 */
+		while ((cp = strstr(buf, "\\\n")) != NULL) {
+			if (!fgets(contline, sizeof(contline), fp) ||
+			    strlen(contline) == 0) {
+				p_err("missing continuation line on command %d",
 				      lines);
-				n_argc = 0;
-				break;
+				err = -1;
+				goto err_close;
+			}
+
+			cp = strchr(contline, '#');
+			if (cp)
+				*cp = '\0';
+
+			if (strlen(buf) + strlen(contline) + 1 > sizeof(buf)) {
+				p_err("command %d is too long", lines);
+				err = -1;
+				goto err_close;
 			}
-			n_argv[n_argc] = strtok(NULL, " \t\n");
+			buf[strlen(buf) - 2] = '\0';
+			strcat(buf, contline);
 		}
 
+		n_argc = make_args(buf, n_argv, BATCH_ARG_NB_MAX, lines);
 		if (!n_argc)
 			continue;
+		if (n_argc < 0)
+			goto err_close;
 
 		if (json_output) {
 			jsonw_start_object(json_wtr);
@@ -247,11 +320,12 @@ static int do_batch(int argc, char **argv)
 		p_err("reading batch file failed: %s", strerror(errno));
 		err = -1;
 	} else {
-		p_info("processed %d lines", lines);
+		p_info("processed %d commands", lines);
 		err = 0;
 	}
 err_close:
-	fclose(fp);
+	if (fp != stdin)
+		fclose(fp);
 
 	if (json_output)
 		jsonw_end_array(json_wtr);
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index e549e329be82..f7a810897eac 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -47,8 +47,9 @@
 #include <bpf.h>
 #include <libbpf.h>
 
+#include "cfg.h"
 #include "main.h"
-#include "disasm.h"
+#include "xlated_dumper.h"
 
 static const char * const prog_type_name[] = {
 	[BPF_PROG_TYPE_UNSPEC]		= "unspec",
@@ -407,259 +408,6 @@ static int do_show(int argc, char **argv)
 	return err;
 }
 
-#define SYM_MAX_NAME	256
-
-struct kernel_sym {
-	unsigned long address;
-	char name[SYM_MAX_NAME];
-};
-
-struct dump_data {
-	unsigned long address_call_base;
-	struct kernel_sym *sym_mapping;
-	__u32 sym_count;
-	char scratch_buff[SYM_MAX_NAME];
-};
-
-static int kernel_syms_cmp(const void *sym_a, const void *sym_b)
-{
-	return ((struct kernel_sym *)sym_a)->address -
-	       ((struct kernel_sym *)sym_b)->address;
-}
-
-static void kernel_syms_load(struct dump_data *dd)
-{
-	struct kernel_sym *sym;
-	char buff[256];
-	void *tmp, *address;
-	FILE *fp;
-
-	fp = fopen("/proc/kallsyms", "r");
-	if (!fp)
-		return;
-
-	while (!feof(fp)) {
-		if (!fgets(buff, sizeof(buff), fp))
-			break;
-		tmp = realloc(dd->sym_mapping,
-			      (dd->sym_count + 1) *
-			      sizeof(*dd->sym_mapping));
-		if (!tmp) {
-out:
-			free(dd->sym_mapping);
-			dd->sym_mapping = NULL;
-			fclose(fp);
-			return;
-		}
-		dd->sym_mapping = tmp;
-		sym = &dd->sym_mapping[dd->sym_count];
-		if (sscanf(buff, "%p %*c %s", &address, sym->name) != 2)
-			continue;
-		sym->address = (unsigned long)address;
-		if (!strcmp(sym->name, "__bpf_call_base")) {
-			dd->address_call_base = sym->address;
-			/* sysctl kernel.kptr_restrict was set */
-			if (!sym->address)
-				goto out;
-		}
-		if (sym->address)
-			dd->sym_count++;
-	}
-
-	fclose(fp);
-
-	qsort(dd->sym_mapping, dd->sym_count,
-	      sizeof(*dd->sym_mapping), kernel_syms_cmp);
-}
-
-static void kernel_syms_destroy(struct dump_data *dd)
-{
-	free(dd->sym_mapping);
-}
-
-static struct kernel_sym *kernel_syms_search(struct dump_data *dd,
-					     unsigned long key)
-{
-	struct kernel_sym sym = {
-		.address = key,
-	};
-
-	return dd->sym_mapping ?
-	       bsearch(&sym, dd->sym_mapping, dd->sym_count,
-		       sizeof(*dd->sym_mapping), kernel_syms_cmp) : NULL;
-}
-
-static void print_insn(struct bpf_verifier_env *env, const char *fmt, ...)
-{
-	va_list args;
-
-	va_start(args, fmt);
-	vprintf(fmt, args);
-	va_end(args);
-}
-
-static const char *print_call_pcrel(struct dump_data *dd,
-				    struct kernel_sym *sym,
-				    unsigned long address,
-				    const struct bpf_insn *insn)
-{
-	if (sym)
-		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
-			 "%+d#%s", insn->off, sym->name);
-	else
-		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
-			 "%+d#0x%lx", insn->off, address);
-	return dd->scratch_buff;
-}
-
-static const char *print_call_helper(struct dump_data *dd,
-				     struct kernel_sym *sym,
-				     unsigned long address)
-{
-	if (sym)
-		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
-			 "%s", sym->name);
-	else
-		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
-			 "0x%lx", address);
-	return dd->scratch_buff;
-}
-
-static const char *print_call(void *private_data,
-			      const struct bpf_insn *insn)
-{
-	struct dump_data *dd = private_data;
-	unsigned long address = dd->address_call_base + insn->imm;
-	struct kernel_sym *sym;
-
-	sym = kernel_syms_search(dd, address);
-	if (insn->src_reg == BPF_PSEUDO_CALL)
-		return print_call_pcrel(dd, sym, address, insn);
-	else
-		return print_call_helper(dd, sym, address);
-}
-
-static const char *print_imm(void *private_data,
-			     const struct bpf_insn *insn,
-			     __u64 full_imm)
-{
-	struct dump_data *dd = private_data;
-
-	if (insn->src_reg == BPF_PSEUDO_MAP_FD)
-		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
-			 "map[id:%u]", insn->imm);
-	else
-		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
-			 "0x%llx", (unsigned long long)full_imm);
-	return dd->scratch_buff;
-}
-
-static void dump_xlated_plain(struct dump_data *dd, void *buf,
-			      unsigned int len, bool opcodes)
-{
-	const struct bpf_insn_cbs cbs = {
-		.cb_print	= print_insn,
-		.cb_call	= print_call,
-		.cb_imm		= print_imm,
-		.private_data	= dd,
-	};
-	struct bpf_insn *insn = buf;
-	bool double_insn = false;
-	unsigned int i;
-
-	for (i = 0; i < len / sizeof(*insn); i++) {
-		if (double_insn) {
-			double_insn = false;
-			continue;
-		}
-
-		double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW);
-
-		printf("% 4d: ", i);
-		print_bpf_insn(&cbs, NULL, insn + i, true);
-
-		if (opcodes) {
-			printf("       ");
-			fprint_hex(stdout, insn + i, 8, " ");
-			if (double_insn && i < len - 1) {
-				printf(" ");
-				fprint_hex(stdout, insn + i + 1, 8, " ");
-			}
-			printf("\n");
-		}
-	}
-}
-
-static void print_insn_json(struct bpf_verifier_env *env, const char *fmt, ...)
-{
-	unsigned int l = strlen(fmt);
-	char chomped_fmt[l];
-	va_list args;
-
-	va_start(args, fmt);
-	if (l > 0) {
-		strncpy(chomped_fmt, fmt, l - 1);
-		chomped_fmt[l - 1] = '\0';
-	}
-	jsonw_vprintf_enquote(json_wtr, chomped_fmt, args);
-	va_end(args);
-}
-
-static void dump_xlated_json(struct dump_data *dd, void *buf,
-			     unsigned int len, bool opcodes)
-{
-	const struct bpf_insn_cbs cbs = {
-		.cb_print	= print_insn_json,
-		.cb_call	= print_call,
-		.cb_imm		= print_imm,
-		.private_data	= dd,
-	};
-	struct bpf_insn *insn = buf;
-	bool double_insn = false;
-	unsigned int i;
-
-	jsonw_start_array(json_wtr);
-	for (i = 0; i < len / sizeof(*insn); i++) {
-		if (double_insn) {
-			double_insn = false;
-			continue;
-		}
-		double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW);
-
-		jsonw_start_object(json_wtr);
-		jsonw_name(json_wtr, "disasm");
-		print_bpf_insn(&cbs, NULL, insn + i, true);
-
-		if (opcodes) {
-			jsonw_name(json_wtr, "opcodes");
-			jsonw_start_object(json_wtr);
-
-			jsonw_name(json_wtr, "code");
-			jsonw_printf(json_wtr, "\"0x%02hhx\"", insn[i].code);
-
-			jsonw_name(json_wtr, "src_reg");
-			jsonw_printf(json_wtr, "\"0x%hhx\"", insn[i].src_reg);
-
-			jsonw_name(json_wtr, "dst_reg");
-			jsonw_printf(json_wtr, "\"0x%hhx\"", insn[i].dst_reg);
-
-			jsonw_name(json_wtr, "off");
-			print_hex_data_json((uint8_t *)(&insn[i].off), 2);
-
-			jsonw_name(json_wtr, "imm");
-			if (double_insn && i < len - 1)
-				print_hex_data_json((uint8_t *)(&insn[i].imm),
-						    12);
-			else
-				print_hex_data_json((uint8_t *)(&insn[i].imm),
-						    4);
-			jsonw_end_object(json_wtr);
-		}
-		jsonw_end_object(json_wtr);
-	}
-	jsonw_end_array(json_wtr);
-}
-
 static int do_dump(int argc, char **argv)
 {
 	struct bpf_prog_info info = {};
@@ -668,6 +416,7 @@ static int do_dump(int argc, char **argv)
 	unsigned int buf_size;
 	char *filepath = NULL;
 	bool opcodes = false;
+	bool visual = false;
 	unsigned char *buf;
 	__u32 *member_len;
 	__u64 *member_ptr;
@@ -706,6 +455,9 @@ static int do_dump(int argc, char **argv)
 	} else if (is_prefix(*argv, "opcodes")) {
 		opcodes = true;
 		NEXT_ARG();
+	} else if (is_prefix(*argv, "visual")) {
+		visual = true;
+		NEXT_ARG();
 	}
 
 	if (argc) {
@@ -777,27 +529,30 @@ static int do_dump(int argc, char **argv)
 
 		if (json_output)
 			jsonw_null(json_wtr);
-	} else {
-		if (member_len == &info.jited_prog_len) {
-			const char *name = NULL;
-
-			if (info.ifindex) {
-				name = ifindex_to_bfd_name_ns(info.ifindex,
-							      info.netns_dev,
-							      info.netns_ino);
-				if (!name)
-					goto err_free;
-			}
-
-			disasm_print_insn(buf, *member_len, opcodes, name);
-		} else {
-			kernel_syms_load(&dd);
-			if (json_output)
-				dump_xlated_json(&dd, buf, *member_len, opcodes);
-			else
-				dump_xlated_plain(&dd, buf, *member_len, opcodes);
-			kernel_syms_destroy(&dd);
+	} else if (member_len == &info.jited_prog_len) {
+		const char *name = NULL;
+
+		if (info.ifindex) {
+			name = ifindex_to_bfd_name_ns(info.ifindex,
+						      info.netns_dev,
+						      info.netns_ino);
+			if (!name)
+				goto err_free;
 		}
+
+		disasm_print_insn(buf, *member_len, opcodes, name);
+	} else if (visual) {
+		if (json_output)
+			jsonw_null(json_wtr);
+		else
+			dump_xlated_cfg(buf, *member_len);
+	} else {
+		kernel_syms_load(&dd);
+		if (json_output)
+			dump_xlated_json(&dd, buf, *member_len, opcodes);
+		else
+			dump_xlated_plain(&dd, buf, *member_len, opcodes);
+		kernel_syms_destroy(&dd);
 	}
 
 	free(buf);
@@ -851,7 +606,7 @@ static int do_help(int argc, char **argv)
 
 	fprintf(stderr,
 		"Usage: %s %s { show | list } [PROG]\n"
-		"       %s %s dump xlated PROG [{ file FILE | opcodes }]\n"
+		"       %s %s dump xlated PROG [{ file FILE | opcodes | visual }]\n"
 		"       %s %s dump jited  PROG [{ file FILE | opcodes }]\n"
 		"       %s %s pin   PROG FILE\n"
 		"       %s %s load  OBJ  FILE\n"
diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c
new file mode 100644
index 000000000000..20da835e9e38
--- /dev/null
+++ b/tools/bpf/bpftool/xlated_dumper.c
@@ -0,0 +1,338 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * Copyright (C) 2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include "disasm.h"
+#include "json_writer.h"
+#include "main.h"
+#include "xlated_dumper.h"
+
+static int kernel_syms_cmp(const void *sym_a, const void *sym_b)
+{
+	return ((struct kernel_sym *)sym_a)->address -
+	       ((struct kernel_sym *)sym_b)->address;
+}
+
+void kernel_syms_load(struct dump_data *dd)
+{
+	struct kernel_sym *sym;
+	char buff[256];
+	void *tmp, *address;
+	FILE *fp;
+
+	fp = fopen("/proc/kallsyms", "r");
+	if (!fp)
+		return;
+
+	while (!feof(fp)) {
+		if (!fgets(buff, sizeof(buff), fp))
+			break;
+		tmp = realloc(dd->sym_mapping,
+			      (dd->sym_count + 1) *
+			      sizeof(*dd->sym_mapping));
+		if (!tmp) {
+out:
+			free(dd->sym_mapping);
+			dd->sym_mapping = NULL;
+			fclose(fp);
+			return;
+		}
+		dd->sym_mapping = tmp;
+		sym = &dd->sym_mapping[dd->sym_count];
+		if (sscanf(buff, "%p %*c %s", &address, sym->name) != 2)
+			continue;
+		sym->address = (unsigned long)address;
+		if (!strcmp(sym->name, "__bpf_call_base")) {
+			dd->address_call_base = sym->address;
+			/* sysctl kernel.kptr_restrict was set */
+			if (!sym->address)
+				goto out;
+		}
+		if (sym->address)
+			dd->sym_count++;
+	}
+
+	fclose(fp);
+
+	qsort(dd->sym_mapping, dd->sym_count,
+	      sizeof(*dd->sym_mapping), kernel_syms_cmp);
+}
+
+void kernel_syms_destroy(struct dump_data *dd)
+{
+	free(dd->sym_mapping);
+}
+
+static struct kernel_sym *kernel_syms_search(struct dump_data *dd,
+					     unsigned long key)
+{
+	struct kernel_sym sym = {
+		.address = key,
+	};
+
+	return dd->sym_mapping ?
+	       bsearch(&sym, dd->sym_mapping, dd->sym_count,
+		       sizeof(*dd->sym_mapping), kernel_syms_cmp) : NULL;
+}
+
+static void print_insn(struct bpf_verifier_env *env, const char *fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	vprintf(fmt, args);
+	va_end(args);
+}
+
+static void
+print_insn_for_graph(struct bpf_verifier_env *env, const char *fmt, ...)
+{
+	char buf[64], *p;
+	va_list args;
+
+	va_start(args, fmt);
+	vsnprintf(buf, sizeof(buf), fmt, args);
+	va_end(args);
+
+	p = buf;
+	while (*p != '\0') {
+		if (*p == '\n') {
+			memmove(p + 3, p, strlen(buf) + 1 - (p - buf));
+			/* Align each instruction dump row left. */
+			*p++ = '\\';
+			*p++ = 'l';
+			/* Output multiline concatenation. */
+			*p++ = '\\';
+		} else if (*p == '<' || *p == '>' || *p == '|' || *p == '&') {
+			memmove(p + 1, p, strlen(buf) + 1 - (p - buf));
+			/* Escape special character. */
+			*p++ = '\\';
+		}
+
+		p++;
+	}
+
+	printf("%s", buf);
+}
+
+static void print_insn_json(struct bpf_verifier_env *env, const char *fmt, ...)
+{
+	unsigned int l = strlen(fmt);
+	char chomped_fmt[l];
+	va_list args;
+
+	va_start(args, fmt);
+	if (l > 0) {
+		strncpy(chomped_fmt, fmt, l - 1);
+		chomped_fmt[l - 1] = '\0';
+	}
+	jsonw_vprintf_enquote(json_wtr, chomped_fmt, args);
+	va_end(args);
+}
+
+static const char *print_call_pcrel(struct dump_data *dd,
+				    struct kernel_sym *sym,
+				    unsigned long address,
+				    const struct bpf_insn *insn)
+{
+	if (sym)
+		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+			 "%+d#%s", insn->off, sym->name);
+	else
+		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+			 "%+d#0x%lx", insn->off, address);
+	return dd->scratch_buff;
+}
+
+static const char *print_call_helper(struct dump_data *dd,
+				     struct kernel_sym *sym,
+				     unsigned long address)
+{
+	if (sym)
+		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+			 "%s", sym->name);
+	else
+		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+			 "0x%lx", address);
+	return dd->scratch_buff;
+}
+
+static const char *print_call(void *private_data,
+			      const struct bpf_insn *insn)
+{
+	struct dump_data *dd = private_data;
+	unsigned long address = dd->address_call_base + insn->imm;
+	struct kernel_sym *sym;
+
+	sym = kernel_syms_search(dd, address);
+	if (insn->src_reg == BPF_PSEUDO_CALL)
+		return print_call_pcrel(dd, sym, address, insn);
+	else
+		return print_call_helper(dd, sym, address);
+}
+
+static const char *print_imm(void *private_data,
+			     const struct bpf_insn *insn,
+			     __u64 full_imm)
+{
+	struct dump_data *dd = private_data;
+
+	if (insn->src_reg == BPF_PSEUDO_MAP_FD)
+		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+			 "map[id:%u]", insn->imm);
+	else
+		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+			 "0x%llx", (unsigned long long)full_imm);
+	return dd->scratch_buff;
+}
+
+void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len,
+		      bool opcodes)
+{
+	const struct bpf_insn_cbs cbs = {
+		.cb_print	= print_insn_json,
+		.cb_call	= print_call,
+		.cb_imm		= print_imm,
+		.private_data	= dd,
+	};
+	struct bpf_insn *insn = buf;
+	bool double_insn = false;
+	unsigned int i;
+
+	jsonw_start_array(json_wtr);
+	for (i = 0; i < len / sizeof(*insn); i++) {
+		if (double_insn) {
+			double_insn = false;
+			continue;
+		}
+		double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW);
+
+		jsonw_start_object(json_wtr);
+		jsonw_name(json_wtr, "disasm");
+		print_bpf_insn(&cbs, NULL, insn + i, true);
+
+		if (opcodes) {
+			jsonw_name(json_wtr, "opcodes");
+			jsonw_start_object(json_wtr);
+
+			jsonw_name(json_wtr, "code");
+			jsonw_printf(json_wtr, "\"0x%02hhx\"", insn[i].code);
+
+			jsonw_name(json_wtr, "src_reg");
+			jsonw_printf(json_wtr, "\"0x%hhx\"", insn[i].src_reg);
+
+			jsonw_name(json_wtr, "dst_reg");
+			jsonw_printf(json_wtr, "\"0x%hhx\"", insn[i].dst_reg);
+
+			jsonw_name(json_wtr, "off");
+			print_hex_data_json((uint8_t *)(&insn[i].off), 2);
+
+			jsonw_name(json_wtr, "imm");
+			if (double_insn && i < len - 1)
+				print_hex_data_json((uint8_t *)(&insn[i].imm),
+						    12);
+			else
+				print_hex_data_json((uint8_t *)(&insn[i].imm),
+						    4);
+			jsonw_end_object(json_wtr);
+		}
+		jsonw_end_object(json_wtr);
+	}
+	jsonw_end_array(json_wtr);
+}
+
+void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len,
+		       bool opcodes)
+{
+	const struct bpf_insn_cbs cbs = {
+		.cb_print	= print_insn,
+		.cb_call	= print_call,
+		.cb_imm		= print_imm,
+		.private_data	= dd,
+	};
+	struct bpf_insn *insn = buf;
+	bool double_insn = false;
+	unsigned int i;
+
+	for (i = 0; i < len / sizeof(*insn); i++) {
+		if (double_insn) {
+			double_insn = false;
+			continue;
+		}
+
+		double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW);
+
+		printf("% 4d: ", i);
+		print_bpf_insn(&cbs, NULL, insn + i, true);
+
+		if (opcodes) {
+			printf("       ");
+			fprint_hex(stdout, insn + i, 8, " ");
+			if (double_insn && i < len - 1) {
+				printf(" ");
+				fprint_hex(stdout, insn + i + 1, 8, " ");
+			}
+			printf("\n");
+		}
+	}
+}
+
+void dump_xlated_for_graph(struct dump_data *dd, void *buf_start, void *buf_end,
+			   unsigned int start_idx)
+{
+	const struct bpf_insn_cbs cbs = {
+		.cb_print	= print_insn_for_graph,
+		.cb_call	= print_call,
+		.cb_imm		= print_imm,
+		.private_data	= dd,
+	};
+	struct bpf_insn *insn_start = buf_start;
+	struct bpf_insn *insn_end = buf_end;
+	struct bpf_insn *cur = insn_start;
+
+	for (; cur <= insn_end; cur++) {
+		printf("% 4d: ", (int)(cur - insn_start + start_idx));
+		print_bpf_insn(&cbs, NULL, cur, true);
+		if (cur != insn_end)
+			printf(" | ");
+	}
+}
diff --git a/tools/bpf/bpftool/xlated_dumper.h b/tools/bpf/bpftool/xlated_dumper.h
new file mode 100644
index 000000000000..b34affa7ef2d
--- /dev/null
+++ b/tools/bpf/bpftool/xlated_dumper.h
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * Copyright (C) 2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __BPF_TOOL_XLATED_DUMPER_H
+#define __BPF_TOOL_XLATED_DUMPER_H
+
+#define SYM_MAX_NAME	256
+
+struct kernel_sym {
+	unsigned long address;
+	char name[SYM_MAX_NAME];
+};
+
+struct dump_data {
+	unsigned long address_call_base;
+	struct kernel_sym *sym_mapping;
+	__u32 sym_count;
+	char scratch_buff[SYM_MAX_NAME + 8];
+};
+
+void kernel_syms_load(struct dump_data *dd);
+void kernel_syms_destroy(struct dump_data *dd);
+void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len,
+		      bool opcodes);
+void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len,
+		       bool opcodes);
+void dump_xlated_for_graph(struct dump_data *dd, void *buf, void *buf_end,
+			   unsigned int start_index);
+
+#endif
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index db6bdc375126..d245c41213ac 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -133,6 +133,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_SOCK_OPS,
 	BPF_PROG_TYPE_SK_SKB,
 	BPF_PROG_TYPE_CGROUP_DEVICE,
+	BPF_PROG_TYPE_SK_MSG,
 };
 
 enum bpf_attach_type {
@@ -143,6 +144,7 @@ enum bpf_attach_type {
 	BPF_SK_SKB_STREAM_PARSER,
 	BPF_SK_SKB_STREAM_VERDICT,
 	BPF_CGROUP_DEVICE,
+	BPF_SK_MSG_VERDICT,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -231,6 +233,28 @@ enum bpf_attach_type {
 #define BPF_F_RDONLY		(1U << 3)
 #define BPF_F_WRONLY		(1U << 4)
 
+/* Flag for stack_map, store build_id+offset instead of pointer */
+#define BPF_F_STACK_BUILD_ID	(1U << 5)
+
+enum bpf_stack_build_id_status {
+	/* user space need an empty entry to identify end of a trace */
+	BPF_STACK_BUILD_ID_EMPTY = 0,
+	/* with valid build_id and offset */
+	BPF_STACK_BUILD_ID_VALID = 1,
+	/* couldn't get build_id, fallback to ip */
+	BPF_STACK_BUILD_ID_IP = 2,
+};
+
+#define BPF_BUILD_ID_SIZE 20
+struct bpf_stack_build_id {
+	__s32		status;
+	unsigned char	build_id[BPF_BUILD_ID_SIZE];
+	union {
+		__u64	offset;
+		__u64	ip;
+	};
+};
+
 union bpf_attr {
 	struct { /* anonymous struct used by BPF_MAP_CREATE command */
 		__u32	map_type;	/* one of enum bpf_map_type */
@@ -696,6 +720,15 @@ union bpf_attr {
  * int bpf_override_return(pt_regs, rc)
  *	@pt_regs: pointer to struct pt_regs
  *	@rc: the return value to set
+ *
+ * int bpf_msg_redirect_map(map, key, flags)
+ *     Redirect msg to a sock in map using key as a lookup key for the
+ *     sock in map.
+ *     @map: pointer to sockmap
+ *     @key: key to lookup sock in map
+ *     @flags: reserved for future use
+ *     Return: SK_PASS
+ *
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -757,7 +790,11 @@ union bpf_attr {
 	FN(perf_prog_read_value),	\
 	FN(getsockopt),			\
 	FN(override_return),		\
-	FN(sock_ops_cb_flags_set),
+	FN(sock_ops_cb_flags_set),	\
+	FN(msg_redirect_map),		\
+	FN(msg_apply_bytes),		\
+	FN(msg_cork_bytes),		\
+	FN(msg_pull_data),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -919,6 +956,14 @@ enum sk_action {
 	SK_PASS,
 };
 
+/* user accessible metadata for SK_MSG packet hook, new fields must
+ * be added to the end of this structure
+ */
+struct sk_msg_md {
+	void *data;
+	void *data_end;
+};
+
 #define BPF_TAG_SIZE	8
 
 struct bpf_prog_info {
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 5bbbf285af74..64a8fc384186 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1857,6 +1857,7 @@ static const struct {
 	BPF_PROG_SEC("lwt_xmit",	BPF_PROG_TYPE_LWT_XMIT),
 	BPF_PROG_SEC("sockops",		BPF_PROG_TYPE_SOCK_OPS),
 	BPF_PROG_SEC("sk_skb",		BPF_PROG_TYPE_SK_SKB),
+	BPF_PROG_SEC("sk_msg",		BPF_PROG_TYPE_SK_MSG),
 };
 #undef BPF_PROG_SEC
 
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 5c43c187f27c..f35fb02bdf56 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -13,6 +13,14 @@ endif
 CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include
 LDLIBS += -lcap -lelf -lrt -lpthread
 
+TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read
+all: $(TEST_CUSTOM_PROGS)
+
+$(TEST_CUSTOM_PROGS): urandom_read
+
+urandom_read: urandom_read.c
+	$(CC) -o $(TEST_CUSTOM_PROGS) -static $<
+
 # Order correspond to 'make run_tests' order
 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
 	test_align test_verifier_log test_dev_cgroup test_tcpbpf_user
@@ -21,7 +29,8 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
 	test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o     \
 	sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
 	test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \
-	sample_map_ret0.o test_tcpbpf_kern.o
+	sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \
+	sockmap_tcp_msg_prog.o
 
 # Order correspond to 'make run_tests' order
 TEST_PROGS := test_kmod.sh \
@@ -35,12 +44,14 @@ TEST_GEN_PROGS_EXTENDED = test_libbpf_open
 
 include ../lib.mk
 
-BPFOBJ := $(OUTPUT)/libbpf.a cgroup_helpers.c
+BPFOBJ := $(OUTPUT)/libbpf.a
 
 $(TEST_GEN_PROGS): $(BPFOBJ)
 
 $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a
 
+$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
+
 .PHONY: force
 
 # force a rebuild of BPFOBJ when its dependencies are updated
@@ -72,3 +83,5 @@ $(OUTPUT)/%.o: %.c
 	$(CLANG) $(CLANG_FLAGS) \
 		 -O2 -target bpf -emit-llvm -c $< -o - |      \
 	$(LLC) -march=bpf -mcpu=$(CPU) -filetype=obj -o $@
+
+EXTRA_CLEAN := $(TEST_CUSTOM_PROGS)
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index dde2c11d7771..7cae376d8d0c 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -86,6 +86,14 @@ static int (*bpf_perf_prog_read_value)(void *ctx, void *buf,
 	(void *) BPF_FUNC_perf_prog_read_value;
 static int (*bpf_override_return)(void *ctx, unsigned long rc) =
 	(void *) BPF_FUNC_override_return;
+static int (*bpf_msg_redirect_map)(void *ctx, void *map, int key, int flags) =
+	(void *) BPF_FUNC_msg_redirect_map;
+static int (*bpf_msg_apply_bytes)(void *ctx, int len) =
+	(void *) BPF_FUNC_msg_apply_bytes;
+static int (*bpf_msg_cork_bytes)(void *ctx, int len) =
+	(void *) BPF_FUNC_msg_cork_bytes;
+static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) =
+	(void *) BPF_FUNC_msg_pull_data;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
@@ -123,6 +131,8 @@ static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) =
 	(void *) BPF_FUNC_skb_under_cgroup;
 static int (*bpf_skb_change_head)(void *, int len, int flags) =
 	(void *) BPF_FUNC_skb_change_head;
+static int (*bpf_skb_pull_data)(void *, int len) =
+	(void *) BPF_FUNC_skb_pull_data;
 
 /* Scan the ARCH passed in from ARCH env variable (see Makefile) */
 #if defined(__TARGET_ARCH_x86)
diff --git a/tools/testing/selftests/bpf/bpf_rlimit.h b/tools/testing/selftests/bpf/bpf_rlimit.h
new file mode 100644
index 000000000000..9dac9b30f8ef
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_rlimit.h
@@ -0,0 +1,28 @@
+#include <sys/resource.h>
+#include <stdio.h>
+
+static  __attribute__((constructor)) void bpf_rlimit_ctor(void)
+{
+	struct rlimit rlim_old, rlim_new = {
+		.rlim_cur	= RLIM_INFINITY,
+		.rlim_max	= RLIM_INFINITY,
+	};
+
+	getrlimit(RLIMIT_MEMLOCK, &rlim_old);
+	/* For the sake of running the test cases, we temporarily
+	 * set rlimit to infinity in order for kernel to focus on
+	 * errors from actual test cases and not getting noise
+	 * from hitting memlock limits. The limit is on per-process
+	 * basis and not a global one, hence destructor not really
+	 * needed here.
+	 */
+	if (setrlimit(RLIMIT_MEMLOCK, &rlim_new) < 0) {
+		perror("Unable to lift memlock rlimit");
+		/* Trying out lower limit, but expect potential test
+		 * case failures from this!
+		 */
+		rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20);
+		rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20);
+		setrlimit(RLIMIT_MEMLOCK, &rlim_new);
+	}
+}
diff --git a/tools/testing/selftests/bpf/sockmap_parse_prog.c b/tools/testing/selftests/bpf/sockmap_parse_prog.c
index a1dec2b6d9c5..0f92858f6226 100644
--- a/tools/testing/selftests/bpf/sockmap_parse_prog.c
+++ b/tools/testing/selftests/bpf/sockmap_parse_prog.c
@@ -20,14 +20,25 @@ int bpf_prog1(struct __sk_buff *skb)
 	__u32 lport = skb->local_port;
 	__u32 rport = skb->remote_port;
 	__u8 *d = data;
+	__u32 len = (__u32) data_end - (__u32) data;
+	int err;
 
-	if (data + 10 > data_end)
-		return skb->len;
+	if (data + 10 > data_end) {
+		err = bpf_skb_pull_data(skb, 10);
+		if (err)
+			return SK_DROP;
+
+		data_end = (void *)(long)skb->data_end;
+		data = (void *)(long)skb->data;
+		if (data + 10 > data_end)
+			return SK_DROP;
+	}
 
 	/* This write/read is a bit pointless but tests the verifier and
 	 * strparser handler for read/write pkt data and access into sk
 	 * fields.
 	 */
+	d = data;
 	d[7] = 1;
 	return skb->len;
 }
diff --git a/tools/testing/selftests/bpf/sockmap_tcp_msg_prog.c b/tools/testing/selftests/bpf/sockmap_tcp_msg_prog.c
new file mode 100644
index 000000000000..12a7b5c82ed6
--- /dev/null
+++ b/tools/testing/selftests/bpf/sockmap_tcp_msg_prog.c
@@ -0,0 +1,33 @@
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+#include "bpf_util.h"
+#include "bpf_endian.h"
+
+int _version SEC("version") = 1;
+
+#define bpf_printk(fmt, ...)					\
+({								\
+	       char ____fmt[] = fmt;				\
+	       bpf_trace_printk(____fmt, sizeof(____fmt),	\
+				##__VA_ARGS__);			\
+})
+
+SEC("sk_msg1")
+int bpf_prog1(struct sk_msg_md *msg)
+{
+	void *data_end = (void *)(long) msg->data_end;
+	void *data = (void *)(long) msg->data;
+
+	char *d;
+
+	if (data + 8 > data_end)
+		return SK_DROP;
+
+	bpf_printk("data length %i\n", (__u64)msg->data_end - (__u64)msg->data);
+	d = (char *)data;
+	bpf_printk("hello sendmsg hook %i %i\n", d[0], d[1]);
+
+	return SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/sockmap_verdict_prog.c
index d7bea972cb21..2ce7634a4012 100644
--- a/tools/testing/selftests/bpf/sockmap_verdict_prog.c
+++ b/tools/testing/selftests/bpf/sockmap_verdict_prog.c
@@ -26,6 +26,13 @@ struct bpf_map_def SEC("maps") sock_map_tx = {
 	.max_entries = 20,
 };
 
+struct bpf_map_def SEC("maps") sock_map_msg = {
+	.type = BPF_MAP_TYPE_SOCKMAP,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 20,
+};
+
 struct bpf_map_def SEC("maps") sock_map_break = {
 	.type = BPF_MAP_TYPE_ARRAY,
 	.key_size = sizeof(int),
diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c
index ff8bd7e3e50c..6b1b302310fe 100644
--- a/tools/testing/selftests/bpf/test_align.c
+++ b/tools/testing/selftests/bpf/test_align.c
@@ -9,8 +9,6 @@
 #include <stddef.h>
 #include <stdbool.h>
 
-#include <sys/resource.h>
-
 #include <linux/unistd.h>
 #include <linux/filter.h>
 #include <linux/bpf_perf_event.h>
@@ -19,6 +17,7 @@
 #include <bpf/bpf.h>
 
 #include "../../../include/linux/filter.h"
+#include "bpf_rlimit.h"
 
 #ifndef ARRAY_SIZE
 # define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
@@ -702,9 +701,6 @@ static int do_test(unsigned int from, unsigned int to)
 int main(int argc, char **argv)
 {
 	unsigned int from = 0, to = ARRAY_SIZE(tests);
-	struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
-
-	setrlimit(RLIMIT_MEMLOCK, &rinf);
 
 	if (argc == 3) {
 		unsigned int l = atoi(argv[argc - 2]);
diff --git a/tools/testing/selftests/bpf/test_dev_cgroup.c b/tools/testing/selftests/bpf/test_dev_cgroup.c
index 3489cc283433..9c8b50bac7e0 100644
--- a/tools/testing/selftests/bpf/test_dev_cgroup.c
+++ b/tools/testing/selftests/bpf/test_dev_cgroup.c
@@ -11,13 +11,13 @@
 #include <errno.h>
 #include <assert.h>
 #include <sys/time.h>
-#include <sys/resource.h>
 
 #include <linux/bpf.h>
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
 
 #include "cgroup_helpers.h"
+#include "bpf_rlimit.h"
 
 #define DEV_CGROUP_PROG "./dev_cgroup.o"
 
@@ -25,15 +25,11 @@
 
 int main(int argc, char **argv)
 {
-	struct rlimit limit  = { RLIM_INFINITY, RLIM_INFINITY };
 	struct bpf_object *obj;
 	int error = EXIT_FAILURE;
 	int prog_fd, cgroup_fd;
 	__u32 prog_cnt;
 
-	if (setrlimit(RLIMIT_MEMLOCK, &limit) < 0)
-		perror("Unable to lift memlock rlimit");
-
 	if (bpf_prog_load(DEV_CGROUP_PROG, BPF_PROG_TYPE_CGROUP_DEVICE,
 			  &obj, &prog_fd)) {
 		printf("Failed to load DEV_CGROUP program\n");
diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c
index 2be87e9ee28d..147e34cfceb7 100644
--- a/tools/testing/selftests/bpf/test_lpm_map.c
+++ b/tools/testing/selftests/bpf/test_lpm_map.c
@@ -22,10 +22,11 @@
 #include <unistd.h>
 #include <arpa/inet.h>
 #include <sys/time.h>
-#include <sys/resource.h>
 
 #include <bpf/bpf.h>
+
 #include "bpf_util.h"
+#include "bpf_rlimit.h"
 
 struct tlpm_node {
 	struct tlpm_node *next;
@@ -736,17 +737,11 @@ static void test_lpm_multi_thread(void)
 
 int main(void)
 {
-	struct rlimit limit  = { RLIM_INFINITY, RLIM_INFINITY };
-	int i, ret;
+	int i;
 
 	/* we want predictable, pseudo random tests */
 	srand(0xf00ba1);
 
-	/* allow unlimited locked memory */
-	ret = setrlimit(RLIMIT_MEMLOCK, &limit);
-	if (ret < 0)
-		perror("Unable to lift memlock rlimit");
-
 	test_lpm_basic();
 	test_lpm_order();
 
@@ -755,11 +750,8 @@ int main(void)
 		test_lpm_map(i);
 
 	test_lpm_ipaddr();
-
 	test_lpm_delete();
-
 	test_lpm_get_next_key();
-
 	test_lpm_multi_thread();
 
 	printf("test_lpm: OK\n");
diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c
index 8c10c9180c1a..781c7de343be 100644
--- a/tools/testing/selftests/bpf/test_lru_map.c
+++ b/tools/testing/selftests/bpf/test_lru_map.c
@@ -16,10 +16,11 @@
 #include <time.h>
 
 #include <sys/wait.h>
-#include <sys/resource.h>
 
 #include <bpf/bpf.h>
+
 #include "bpf_util.h"
+#include "bpf_rlimit.h"
 
 #define LOCAL_FREE_TARGET	(128)
 #define PERCPU_FREE_TARGET	(4)
@@ -613,7 +614,6 @@ static void test_lru_sanity6(int map_type, int map_flags, int tgt_free)
 
 int main(int argc, char **argv)
 {
-	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
 	int map_types[] = {BPF_MAP_TYPE_LRU_HASH,
 			     BPF_MAP_TYPE_LRU_PERCPU_HASH};
 	int map_flags[] = {0, BPF_F_NO_COMMON_LRU};
@@ -621,8 +621,6 @@ int main(int argc, char **argv)
 
 	setbuf(stdout, NULL);
 
-	assert(!setrlimit(RLIMIT_MEMLOCK, &r));
-
 	nr_cpus = bpf_num_possible_cpus();
 	assert(nr_cpus != -1);
 	printf("nr_cpus:%d\n\n", nr_cpus);
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 9e03a4c356a4..6c253343a6f9 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -17,13 +17,14 @@
 #include <stdlib.h>
 
 #include <sys/wait.h>
-#include <sys/resource.h>
 
 #include <linux/bpf.h>
 
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
+
 #include "bpf_util.h"
+#include "bpf_rlimit.h"
 
 static int map_flags;
 
@@ -463,15 +464,17 @@ static void test_devmap(int task, void *data)
 #include <linux/err.h>
 #define SOCKMAP_PARSE_PROG "./sockmap_parse_prog.o"
 #define SOCKMAP_VERDICT_PROG "./sockmap_verdict_prog.o"
+#define SOCKMAP_TCP_MSG_PROG "./sockmap_tcp_msg_prog.o"
 static void test_sockmap(int tasks, void *data)
 {
-	int one = 1, map_fd_rx = 0, map_fd_tx = 0, map_fd_break, s, sc, rc;
-	struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_break;
+	struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_msg, *bpf_map_break;
+	int map_fd_msg = 0, map_fd_rx = 0, map_fd_tx = 0, map_fd_break;
 	int ports[] = {50200, 50201, 50202, 50204};
 	int err, i, fd, udp, sfd[6] = {0xdeadbeef};
 	u8 buf[20] = {0x0, 0x5, 0x3, 0x2, 0x1, 0x0};
-	int parse_prog, verdict_prog;
+	int parse_prog, verdict_prog, msg_prog;
 	struct sockaddr_in addr;
+	int one = 1, s, sc, rc;
 	struct bpf_object *obj;
 	struct timeval to;
 	__u32 key, value;
@@ -583,6 +586,12 @@ static void test_sockmap(int tasks, void *data)
 		goto out_sockmap;
 	}
 
+	err = bpf_prog_attach(-1, fd, BPF_SK_MSG_VERDICT, 0);
+	if (!err) {
+		printf("Failed invalid msg verdict prog attach\n");
+		goto out_sockmap;
+	}
+
 	err = bpf_prog_attach(-1, fd, __MAX_BPF_ATTACH_TYPE, 0);
 	if (!err) {
 		printf("Failed unknown prog attach\n");
@@ -601,6 +610,12 @@ static void test_sockmap(int tasks, void *data)
 		goto out_sockmap;
 	}
 
+	err = bpf_prog_detach(fd, BPF_SK_MSG_VERDICT);
+	if (err) {
+		printf("Failed empty msg verdict prog detach\n");
+		goto out_sockmap;
+	}
+
 	err = bpf_prog_detach(fd, __MAX_BPF_ATTACH_TYPE);
 	if (!err) {
 		printf("Detach invalid prog successful\n");
@@ -615,6 +630,13 @@ static void test_sockmap(int tasks, void *data)
 		goto out_sockmap;
 	}
 
+	err = bpf_prog_load(SOCKMAP_TCP_MSG_PROG,
+			    BPF_PROG_TYPE_SK_MSG, &obj, &msg_prog);
+	if (err) {
+		printf("Failed to load SK_SKB msg prog\n");
+		goto out_sockmap;
+	}
+
 	err = bpf_prog_load(SOCKMAP_VERDICT_PROG,
 			    BPF_PROG_TYPE_SK_SKB, &obj, &verdict_prog);
 	if (err) {
@@ -630,7 +652,7 @@ static void test_sockmap(int tasks, void *data)
 
 	map_fd_rx = bpf_map__fd(bpf_map_rx);
 	if (map_fd_rx < 0) {
-		printf("Failed to get map fd\n");
+		printf("Failed to get map rx fd\n");
 		goto out_sockmap;
 	}
 
@@ -646,6 +668,18 @@ static void test_sockmap(int tasks, void *data)
 		goto out_sockmap;
 	}
 
+	bpf_map_msg = bpf_object__find_map_by_name(obj, "sock_map_msg");
+	if (IS_ERR(bpf_map_msg)) {
+		printf("Failed to load map msg from msg_verdict prog\n");
+		goto out_sockmap;
+	}
+
+	map_fd_msg = bpf_map__fd(bpf_map_msg);
+	if (map_fd_msg < 0) {
+		printf("Failed to get map msg fd\n");
+		goto out_sockmap;
+	}
+
 	bpf_map_break = bpf_object__find_map_by_name(obj, "sock_map_break");
 	if (IS_ERR(bpf_map_break)) {
 		printf("Failed to load map tx from verdict prog\n");
@@ -679,6 +713,12 @@ static void test_sockmap(int tasks, void *data)
 		goto out_sockmap;
 	}
 
+	err = bpf_prog_attach(msg_prog, map_fd_msg, BPF_SK_MSG_VERDICT, 0);
+	if (err) {
+		printf("Failed msg verdict bpf prog attach\n");
+		goto out_sockmap;
+	}
+
 	err = bpf_prog_attach(verdict_prog, map_fd_rx,
 			      __MAX_BPF_ATTACH_TYPE, 0);
 	if (!err) {
@@ -718,6 +758,14 @@ static void test_sockmap(int tasks, void *data)
 		}
 	}
 
+	/* Put sfd[2] (sending fd below) into msg map to test sendmsg bpf */
+	i = 0;
+	err = bpf_map_update_elem(map_fd_msg, &i, &sfd[2], BPF_ANY);
+	if (err) {
+		printf("Failed map_fd_msg update sockmap %i\n", err);
+		goto out_sockmap;
+	}
+
 	/* Test map send/recv */
 	for (i = 0; i < 2; i++) {
 		buf[0] = i;
@@ -1126,10 +1174,6 @@ static void run_all_tests(void)
 
 int main(void)
 {
-	struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
-
-	setrlimit(RLIMIT_MEMLOCK, &rinf);
-
 	map_flags = 0;
 	run_all_tests();
 
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index b549308abd19..e9df48b306df 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -26,7 +26,6 @@ typedef __u16 __sum16;
 
 #include <sys/ioctl.h>
 #include <sys/wait.h>
-#include <sys/resource.h>
 #include <sys/types.h>
 #include <fcntl.h>
 
@@ -34,9 +33,11 @@ typedef __u16 __sum16;
 #include <linux/err.h>
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
+
 #include "test_iptunnel_common.h"
 #include "bpf_util.h"
 #include "bpf_endian.h"
+#include "bpf_rlimit.h"
 
 static int error_cnt, pass_cnt;
 
@@ -840,7 +841,8 @@ static void test_tp_attach_query(void)
 static int compare_map_keys(int map1_fd, int map2_fd)
 {
 	__u32 key, next_key;
-	char val_buf[PERF_MAX_STACK_DEPTH * sizeof(__u64)];
+	char val_buf[PERF_MAX_STACK_DEPTH *
+		     sizeof(struct bpf_stack_build_id)];
 	int err;
 
 	err = bpf_map_get_next_key(map1_fd, NULL, &key);
@@ -963,12 +965,168 @@ out:
 	return;
 }
 
-int main(void)
+static int extract_build_id(char *build_id, size_t size)
+{
+	FILE *fp;
+	char *line = NULL;
+	size_t len = 0;
+
+	fp = popen("readelf -n ./urandom_read | grep 'Build ID'", "r");
+	if (fp == NULL)
+		return -1;
+
+	if (getline(&line, &len, fp) == -1)
+		goto err;
+	fclose(fp);
+
+	if (len > size)
+		len = size;
+	memcpy(build_id, line, len);
+	build_id[len] = '\0';
+	return 0;
+err:
+	fclose(fp);
+	return -1;
+}
+
+static void test_stacktrace_build_id(void)
 {
-	struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
+	int control_map_fd, stackid_hmap_fd, stackmap_fd;
+	const char *file = "./test_stacktrace_build_id.o";
+	int bytes, efd, err, pmu_fd, prog_fd;
+	struct perf_event_attr attr = {};
+	__u32 key, previous_key, val, duration = 0;
+	struct bpf_object *obj;
+	char buf[256];
+	int i, j;
+	struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH];
+	int build_id_matches = 0;
 
-	setrlimit(RLIMIT_MEMLOCK, &rinf);
+	err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
+	if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
+		goto out;
+
+	/* Get the ID for the sched/sched_switch tracepoint */
+	snprintf(buf, sizeof(buf),
+		 "/sys/kernel/debug/tracing/events/random/urandom_read/id");
+	efd = open(buf, O_RDONLY, 0);
+	if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
+		goto close_prog;
 
+	bytes = read(efd, buf, sizeof(buf));
+	close(efd);
+	if (CHECK(bytes <= 0 || bytes >= sizeof(buf),
+		  "read", "bytes %d errno %d\n", bytes, errno))
+		goto close_prog;
+
+	/* Open the perf event and attach bpf progrram */
+	attr.config = strtol(buf, NULL, 0);
+	attr.type = PERF_TYPE_TRACEPOINT;
+	attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN;
+	attr.sample_period = 1;
+	attr.wakeup_events = 1;
+	pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+			 0 /* cpu 0 */, -1 /* group id */,
+			 0 /* flags */);
+	if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n",
+		  pmu_fd, errno))
+		goto close_prog;
+
+	err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+	if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n",
+		  err, errno))
+		goto close_pmu;
+
+	err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
+	if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n",
+		  err, errno))
+		goto disable_pmu;
+
+	/* find map fds */
+	control_map_fd = bpf_find_map(__func__, obj, "control_map");
+	if (CHECK(control_map_fd < 0, "bpf_find_map control_map",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
+	if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
+	if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n",
+		  err, errno))
+		goto disable_pmu;
+
+	assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")
+	       == 0);
+	assert(system("./urandom_read if=/dev/urandom of=/dev/zero count=4 2> /dev/null") == 0);
+	/* disable stack trace collection */
+	key = 0;
+	val = 1;
+	bpf_map_update_elem(control_map_fd, &key, &val, 0);
+
+	/* for every element in stackid_hmap, we can find a corresponding one
+	 * in stackmap, and vise versa.
+	 */
+	err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
+	if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
+	if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	err = extract_build_id(buf, 256);
+
+	if (CHECK(err, "get build_id with readelf",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	err = bpf_map_get_next_key(stackmap_fd, NULL, &key);
+	if (CHECK(err, "get_next_key from stackmap",
+		  "err %d, errno %d\n", err, errno))
+		goto disable_pmu;
+
+	do {
+		char build_id[64];
+
+		err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs);
+		if (CHECK(err, "lookup_elem from stackmap",
+			  "err %d, errno %d\n", err, errno))
+			goto disable_pmu;
+		for (i = 0; i < PERF_MAX_STACK_DEPTH; ++i)
+			if (id_offs[i].status == BPF_STACK_BUILD_ID_VALID &&
+			    id_offs[i].offset != 0) {
+				for (j = 0; j < 20; ++j)
+					sprintf(build_id + 2 * j, "%02x",
+						id_offs[i].build_id[j] & 0xff);
+				if (strstr(buf, build_id) != NULL)
+					build_id_matches = 1;
+			}
+		previous_key = key;
+	} while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0);
+
+	CHECK(build_id_matches < 1, "build id match",
+	      "Didn't find expected build ID from the map");
+
+disable_pmu:
+	ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
+
+close_pmu:
+	close(pmu_fd);
+
+close_prog:
+	bpf_object__close(obj);
+
+out:
+	return;
+}
+
+int main(void)
+{
 	test_pkt_access();
 	test_xdp();
 	test_l4lb_all();
@@ -979,6 +1137,7 @@ int main(void)
 	test_obj_name();
 	test_tp_attach_query();
 	test_stacktrace_map();
+	test_stacktrace_build_id();
 
 	printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
 	return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
diff --git a/tools/testing/selftests/bpf/test_stacktrace_build_id.c b/tools/testing/selftests/bpf/test_stacktrace_build_id.c
new file mode 100644
index 000000000000..b755bd783ce5
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_stacktrace_build_id.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+#ifndef PERF_MAX_STACK_DEPTH
+#define PERF_MAX_STACK_DEPTH         127
+#endif
+
+struct bpf_map_def SEC("maps") control_map = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u32),
+	.max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") stackid_hmap = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u32),
+	.max_entries = 10000,
+};
+
+struct bpf_map_def SEC("maps") stackmap = {
+	.type = BPF_MAP_TYPE_STACK_TRACE,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(struct bpf_stack_build_id)
+		* PERF_MAX_STACK_DEPTH,
+	.max_entries = 128,
+	.map_flags = BPF_F_STACK_BUILD_ID,
+};
+
+/* taken from /sys/kernel/debug/tracing/events/random/urandom_read/format */
+struct random_urandom_args {
+	unsigned long long pad;
+	int got_bits;
+	int pool_left;
+	int input_left;
+};
+
+SEC("tracepoint/random/urandom_read")
+int oncpu(struct random_urandom_args *args)
+{
+	__u32 key = 0, val = 0, *value_p;
+
+	value_p = bpf_map_lookup_elem(&control_map, &key);
+	if (value_p && *value_p)
+		return 0; /* skip if non-zero *value_p */
+
+	/* The size of stackmap and stackid_hmap should be the same */
+	key = bpf_get_stackid(args, &stackmap, BPF_F_USER_STACK);
+	if ((int)key >= 0)
+		bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
diff --git a/tools/testing/selftests/bpf/test_tag.c b/tools/testing/selftests/bpf/test_tag.c
index 8b201895c569..6272c784ca2a 100644
--- a/tools/testing/selftests/bpf/test_tag.c
+++ b/tools/testing/selftests/bpf/test_tag.c
@@ -12,7 +12,6 @@
 #include <assert.h>
 
 #include <sys/socket.h>
-#include <sys/resource.h>
 
 #include <linux/filter.h>
 #include <linux/bpf.h>
@@ -21,6 +20,7 @@
 #include <bpf/bpf.h>
 
 #include "../../../include/linux/filter.h"
+#include "bpf_rlimit.h"
 
 static struct bpf_insn prog[BPF_MAXINSNS];
 
@@ -184,11 +184,9 @@ static void do_test(uint32_t *tests, int start_insns, int fd_map,
 
 int main(void)
 {
-	struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
 	uint32_t tests = 0;
 	int i, fd_map;
 
-	setrlimit(RLIMIT_MEMLOCK, &rinf);
 	fd_map = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(int),
 				sizeof(int), 1, BPF_F_NO_PREALLOC);
 	assert(fd_map > 0);
diff --git a/tools/testing/selftests/bpf/test_tcpbpf_user.c b/tools/testing/selftests/bpf/test_tcpbpf_user.c
index 95a370f3d378..84ab5163c828 100644
--- a/tools/testing/selftests/bpf/test_tcpbpf_user.c
+++ b/tools/testing/selftests/bpf/test_tcpbpf_user.c
@@ -11,12 +11,14 @@
 #include <linux/ptrace.h>
 #include <linux/bpf.h>
 #include <sys/ioctl.h>
+#include <sys/time.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
 #include "bpf_util.h"
+#include "bpf_rlimit.h"
 #include <linux/perf_event.h>
 #include "test_tcpbpf.h"
 
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 437c0b1c9d21..3e7718b1a9ae 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -24,7 +24,6 @@
 #include <limits.h>
 
 #include <sys/capability.h>
-#include <sys/resource.h>
 
 #include <linux/unistd.h>
 #include <linux/filter.h>
@@ -41,7 +40,7 @@
 #  define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 1
 # endif
 #endif
-
+#include "bpf_rlimit.h"
 #include "../../../include/linux/filter.h"
 
 #ifndef ARRAY_SIZE
@@ -57,6 +56,9 @@
 #define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS	(1 << 0)
 #define F_LOAD_WITH_STRICT_ALIGNMENT		(1 << 1)
 
+#define UNPRIV_SYSCTL "kernel/unprivileged_bpf_disabled"
+static bool unpriv_disabled = false;
+
 struct bpf_test {
 	const char *descr;
 	struct bpf_insn	insns[MAX_INSNS];
@@ -1595,6 +1597,60 @@ static struct bpf_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_SK_SKB,
 	},
 	{
+		"direct packet read for SK_MSG",
+		.insns = {
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct sk_msg_md, data)),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct sk_msg_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_MSG,
+	},
+	{
+		"direct packet write for SK_MSG",
+		.insns = {
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct sk_msg_md, data)),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct sk_msg_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_MSG,
+	},
+	{
+		"overlapping checks for direct packet access SK_MSG",
+		.insns = {
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct sk_msg_md, data)),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct sk_msg_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_2, 6),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_MSG,
+	},
+	{
 		"check skb->mark is not writeable by sockets",
 		.insns = {
 			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
@@ -2587,17 +2643,74 @@ static struct bpf_test tests[] = {
 		.result = ACCEPT,
 	},
 	{
+		"runtime/jit: tail_call within bounds, prog once",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_tail_call),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_prog = { 1 },
+		.result = ACCEPT,
+		.retval = 42,
+	},
+	{
+		"runtime/jit: tail_call within bounds, prog loop",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_3, 1),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_tail_call),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_prog = { 1 },
+		.result = ACCEPT,
+		.retval = 41,
+	},
+	{
+		"runtime/jit: tail_call within bounds, no prog",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_3, 2),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_tail_call),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_prog = { 1 },
+		.result = ACCEPT,
+		.retval = 1,
+	},
+	{
+		"runtime/jit: tail_call out of bounds",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_3, 256),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_tail_call),
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_prog = { 1 },
+		.result = ACCEPT,
+		.retval = 2,
+	},
+	{
 		"runtime/jit: pass negative index to tail_call",
 		.insns = {
 			BPF_MOV64_IMM(BPF_REG_3, -1),
 			BPF_LD_MAP_FD(BPF_REG_2, 0),
 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 				     BPF_FUNC_tail_call),
-			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 2),
 			BPF_EXIT_INSN(),
 		},
 		.fixup_prog = { 1 },
 		.result = ACCEPT,
+		.retval = 2,
 	},
 	{
 		"runtime/jit: pass > 32bit index to tail_call",
@@ -2606,11 +2719,12 @@ static struct bpf_test tests[] = {
 			BPF_LD_MAP_FD(BPF_REG_2, 0),
 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 				     BPF_FUNC_tail_call),
-			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 2),
 			BPF_EXIT_INSN(),
 		},
 		.fixup_prog = { 2 },
 		.result = ACCEPT,
+		.retval = 42,
 	},
 	{
 		"stack pointer arithmetic",
@@ -11164,6 +11278,94 @@ static struct bpf_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
 	{
+		"jit: lsh, rsh, arsh by 1",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_MOV64_IMM(BPF_REG_1, 0xff),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 1),
+			BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x3fc, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 1),
+			BPF_ALU32_IMM(BPF_RSH, BPF_REG_1, 1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0xff, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_ARSH, BPF_REG_1, 1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x7f, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 2,
+	},
+	{
+		"jit: mov32 for ldimm64, 1",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_LD_IMM64(BPF_REG_1, 0xfeffffffffffffffULL),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 32),
+			BPF_LD_IMM64(BPF_REG_2, 0xfeffffffULL),
+			BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 1),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 2,
+	},
+	{
+		"jit: mov32 for ldimm64, 2",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_LD_IMM64(BPF_REG_1, 0x1ffffffffULL),
+			BPF_LD_IMM64(BPF_REG_2, 0xffffffffULL),
+			BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 1),
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 2,
+	},
+	{
+		"jit: various mul tests",
+		.insns = {
+			BPF_LD_IMM64(BPF_REG_2, 0xeeff0d413122ULL),
+			BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL),
+			BPF_LD_IMM64(BPF_REG_1, 0xefefefULL),
+			BPF_ALU64_REG(BPF_MUL, BPF_REG_0, BPF_REG_1),
+			BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL),
+			BPF_ALU64_REG(BPF_MUL, BPF_REG_3, BPF_REG_1),
+			BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV32_REG(BPF_REG_2, BPF_REG_2),
+			BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL),
+			BPF_ALU32_REG(BPF_MUL, BPF_REG_0, BPF_REG_1),
+			BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL),
+			BPF_ALU32_REG(BPF_MUL, BPF_REG_3, BPF_REG_1),
+			BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_LD_IMM64(BPF_REG_0, 0x952a7bbcULL),
+			BPF_LD_IMM64(BPF_REG_1, 0xfefefeULL),
+			BPF_LD_IMM64(BPF_REG_2, 0xeeff0d413122ULL),
+			BPF_ALU32_REG(BPF_MUL, BPF_REG_2, BPF_REG_1),
+			BPF_JMP_REG(BPF_JEQ, BPF_REG_2, BPF_REG_0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 2,
+	},
+	{
 		"xadd/w check unaligned stack",
 		.insns = {
 			BPF_MOV64_IMM(BPF_REG_0, 1),
@@ -11245,16 +11447,61 @@ static int create_map(uint32_t size_value, uint32_t max_elem)
 	return fd;
 }
 
+static int create_prog_dummy1(void)
+{
+	struct bpf_insn prog[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 42),
+		BPF_EXIT_INSN(),
+	};
+
+	return bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog,
+				ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
+}
+
+static int create_prog_dummy2(int mfd, int idx)
+{
+	struct bpf_insn prog[] = {
+		BPF_MOV64_IMM(BPF_REG_3, idx),
+		BPF_LD_MAP_FD(BPF_REG_2, mfd),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+			     BPF_FUNC_tail_call),
+		BPF_MOV64_IMM(BPF_REG_0, 41),
+		BPF_EXIT_INSN(),
+	};
+
+	return bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog,
+				ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
+}
+
 static int create_prog_array(void)
 {
-	int fd;
+	int p1key = 0, p2key = 1;
+	int mfd, p1fd, p2fd;
 
-	fd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY, sizeof(int),
-			    sizeof(int), 4, 0);
-	if (fd < 0)
+	mfd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY, sizeof(int),
+			     sizeof(int), 4, 0);
+	if (mfd < 0) {
 		printf("Failed to create prog array '%s'!\n", strerror(errno));
+		return -1;
+	}
 
-	return fd;
+	p1fd = create_prog_dummy1();
+	p2fd = create_prog_dummy2(mfd, p2key);
+	if (p1fd < 0 || p2fd < 0)
+		goto out;
+	if (bpf_map_update_elem(mfd, &p1key, &p1fd, BPF_ANY) < 0)
+		goto out;
+	if (bpf_map_update_elem(mfd, &p2key, &p2fd, BPF_ANY) < 0)
+		goto out;
+	close(p2fd);
+	close(p1fd);
+
+	return mfd;
+out:
+	close(p2fd);
+	close(p1fd);
+	close(mfd);
+	return -1;
 }
 
 static int create_map_in_map(void)
@@ -11375,7 +11622,8 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 			goto fail_log;
 		}
 		if (!strstr(bpf_vlog, expected_err) && !reject_from_alignment) {
-			printf("FAIL\nUnexpected error message!\n");
+			printf("FAIL\nUnexpected error message!\n\tEXP: %s\n\tRES: %s\n",
+			      expected_err, bpf_vlog);
 			goto fail_log;
 		}
 	}
@@ -11459,9 +11707,20 @@ out:
 	return ret;
 }
 
+static void get_unpriv_disabled()
+{
+	char buf[2];
+	FILE *fd;
+
+	fd = fopen("/proc/sys/"UNPRIV_SYSCTL, "r");
+	if (fgets(buf, 2, fd) == buf && atoi(buf))
+		unpriv_disabled = true;
+	fclose(fd);
+}
+
 static int do_test(bool unpriv, unsigned int from, unsigned int to)
 {
-	int i, passes = 0, errors = 0;
+	int i, passes = 0, errors = 0, skips = 0;
 
 	for (i = from; i < to; i++) {
 		struct bpf_test *test = &tests[i];
@@ -11469,7 +11728,10 @@ static int do_test(bool unpriv, unsigned int from, unsigned int to)
 		/* Program types that are not supported by non-root we
 		 * skip right away.
 		 */
-		if (!test->prog_type) {
+		if (!test->prog_type && unpriv_disabled) {
+			printf("#%d/u %s SKIP\n", i, test->descr);
+			skips++;
+		} else if (!test->prog_type) {
 			if (!unpriv)
 				set_admin(false);
 			printf("#%d/u %s ", i, test->descr);
@@ -11478,20 +11740,22 @@ static int do_test(bool unpriv, unsigned int from, unsigned int to)
 				set_admin(true);
 		}
 
-		if (!unpriv) {
+		if (unpriv) {
+			printf("#%d/p %s SKIP\n", i, test->descr);
+			skips++;
+		} else {
 			printf("#%d/p %s ", i, test->descr);
 			do_test_single(test, false, &passes, &errors);
 		}
 	}
 
-	printf("Summary: %d PASSED, %d FAILED\n", passes, errors);
+	printf("Summary: %d PASSED, %d SKIPPED, %d FAILED\n", passes,
+	       skips, errors);
 	return errors ? EXIT_FAILURE : EXIT_SUCCESS;
 }
 
 int main(int argc, char **argv)
 {
-	struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
-	struct rlimit rlim = { 1 << 20, 1 << 20 };
 	unsigned int from = 0, to = ARRAY_SIZE(tests);
 	bool unpriv = !is_admin();
 
@@ -11512,6 +11776,12 @@ int main(int argc, char **argv)
 		}
 	}
 
-	setrlimit(RLIMIT_MEMLOCK, unpriv ? &rlim : &rinf);
+	get_unpriv_disabled();
+	if (unpriv && unpriv_disabled) {
+		printf("Cannot run as unprivileged user with sysctl %s.\n",
+		       UNPRIV_SYSCTL);
+		return EXIT_FAILURE;
+	}
+
 	return do_test(unpriv, from, to);
 }
diff --git a/tools/testing/selftests/bpf/test_verifier_log.c b/tools/testing/selftests/bpf/test_verifier_log.c
index e9626cf5607a..8d6918c3b4a2 100644
--- a/tools/testing/selftests/bpf/test_verifier_log.c
+++ b/tools/testing/selftests/bpf/test_verifier_log.c
@@ -4,7 +4,6 @@
 #include <string.h>
 #include <unistd.h>
 #include <sys/time.h>
-#include <sys/resource.h>
 
 #include <linux/bpf.h>
 #include <linux/filter.h>
@@ -12,6 +11,8 @@
 
 #include <bpf/bpf.h>
 
+#include "bpf_rlimit.h"
+
 #define LOG_SIZE (1 << 20)
 
 #define err(str...)	printf("ERROR: " str)
@@ -133,16 +134,11 @@ static void test_log_bad(char *log, size_t log_len, int log_level)
 
 int main(int argc, char **argv)
 {
-	struct rlimit limit  = { RLIM_INFINITY, RLIM_INFINITY };
 	char full_log[LOG_SIZE];
 	char log[LOG_SIZE];
 	size_t want_len;
 	int i;
 
-	/* allow unlimited locked memory to have more consistent error code */
-	if (setrlimit(RLIMIT_MEMLOCK, &limit) < 0)
-		perror("Unable to lift memlock rlimit");
-
 	memset(log, 1, LOG_SIZE);
 
 	/* Test incorrect attr */
diff --git a/tools/testing/selftests/bpf/urandom_read.c b/tools/testing/selftests/bpf/urandom_read.c
new file mode 100644
index 000000000000..4acfdebf36fa
--- /dev/null
+++ b/tools/testing/selftests/bpf/urandom_read.c
@@ -0,0 +1,22 @@
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdlib.h>
+
+#define BUF_SIZE 256
+int main(void)
+{
+	int fd = open("/dev/urandom", O_RDONLY);
+	int i;
+	char buf[BUF_SIZE];
+
+	if (fd < 0)
+		return 1;
+	for (i = 0; i < 4; ++i)
+		read(fd, buf, BUF_SIZE);
+
+	close(fd);
+	return 0;
+}
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index d7c30d366935..785fc18a16b4 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -5,7 +5,7 @@ CFLAGS =  -Wall -Wl,--no-as-needed -O2 -g
 CFLAGS += -I../../../../usr/include/
 
 TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
-TEST_PROGS += fib_tests.sh
+TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh
 TEST_GEN_FILES =  socket
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
 TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 7177bea1fdfa..6a75a3ea44ad 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -2,3 +2,8 @@ CONFIG_USER_NS=y
 CONFIG_BPF_SYSCALL=y
 CONFIG_TEST_BPF=m
 CONFIG_NUMA=y
+CONFIG_NET_VRF=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_IPV6=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_VETH=y
diff --git a/tools/testing/selftests/net/fib-onlink-tests.sh b/tools/testing/selftests/net/fib-onlink-tests.sh
new file mode 100755
index 000000000000..3991ad1a368d
--- /dev/null
+++ b/tools/testing/selftests/net/fib-onlink-tests.sh
@@ -0,0 +1,467 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# IPv4 and IPv6 onlink tests
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+# Network interfaces
+# - odd in current namespace; even in peer ns
+declare -A NETIFS
+# default VRF
+NETIFS[p1]=veth1
+NETIFS[p2]=veth2
+NETIFS[p3]=veth3
+NETIFS[p4]=veth4
+# VRF
+NETIFS[p5]=veth5
+NETIFS[p6]=veth6
+NETIFS[p7]=veth7
+NETIFS[p8]=veth8
+
+# /24 network
+declare -A V4ADDRS
+V4ADDRS[p1]=169.254.1.1
+V4ADDRS[p2]=169.254.1.2
+V4ADDRS[p3]=169.254.3.1
+V4ADDRS[p4]=169.254.3.2
+V4ADDRS[p5]=169.254.5.1
+V4ADDRS[p6]=169.254.5.2
+V4ADDRS[p7]=169.254.7.1
+V4ADDRS[p8]=169.254.7.2
+
+# /64 network
+declare -A V6ADDRS
+V6ADDRS[p1]=2001:db8:101::1
+V6ADDRS[p2]=2001:db8:101::2
+V6ADDRS[p3]=2001:db8:301::1
+V6ADDRS[p4]=2001:db8:301::2
+V6ADDRS[p5]=2001:db8:501::1
+V6ADDRS[p6]=2001:db8:501::2
+V6ADDRS[p7]=2001:db8:701::1
+V6ADDRS[p8]=2001:db8:701::2
+
+# Test networks:
+# [1] = default table
+# [2] = VRF
+#
+# /32 host routes
+declare -A TEST_NET4
+TEST_NET4[1]=169.254.101
+TEST_NET4[2]=169.254.102
+# /128 host routes
+declare -A TEST_NET6
+TEST_NET6[1]=2001:db8:101
+TEST_NET6[2]=2001:db8:102
+
+# connected gateway
+CONGW[1]=169.254.1.254
+CONGW[2]=169.254.3.254
+CONGW[3]=169.254.5.254
+
+# recursive gateway
+RECGW4[1]=169.254.11.254
+RECGW4[2]=169.254.12.254
+RECGW6[1]=2001:db8:11::64
+RECGW6[2]=2001:db8:12::64
+
+# for v4 mapped to v6
+declare -A TEST_NET4IN6IN6
+TEST_NET4IN6[1]=10.1.1.254
+TEST_NET4IN6[2]=10.2.1.254
+
+# mcast address
+MCAST6=ff02::1
+
+
+PEER_NS=bart
+PEER_CMD="ip netns exec ${PEER_NS}"
+VRF=lisa
+VRF_TABLE=1101
+PBR_TABLE=101
+
+################################################################################
+# utilities
+
+log_test()
+{
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		nsuccess=$((nsuccess+1))
+		printf "\n    TEST: %-50s  [ OK ]\n" "${msg}"
+	else
+		nfail=$((nfail+1))
+		printf "\n    TEST: %-50s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+}
+
+log_section()
+{
+	echo
+	echo "######################################################################"
+	echo "TEST SECTION: $*"
+	echo "######################################################################"
+}
+
+log_subsection()
+{
+	echo
+	echo "#########################################"
+	echo "TEST SUBSECTION: $*"
+}
+
+run_cmd()
+{
+	echo
+	echo "COMMAND: $*"
+	eval $*
+}
+
+get_linklocal()
+{
+	local dev=$1
+	local pfx
+	local addr
+
+	addr=$(${pfx} ip -6 -br addr show dev ${dev} | \
+	awk '{
+		for (i = 3; i <= NF; ++i) {
+			if ($i ~ /^fe80/)
+				print $i
+		}
+	}'
+	)
+	addr=${addr/\/*}
+
+	[ -z "$addr" ] && return 1
+
+	echo $addr
+
+	return 0
+}
+
+################################################################################
+#
+
+setup()
+{
+	echo
+	echo "########################################"
+	echo "Configuring interfaces"
+
+	set -e
+
+	# create namespace
+	ip netns add ${PEER_NS}
+	ip -netns ${PEER_NS} li set lo up
+
+	# add vrf table
+	ip li add ${VRF} type vrf table ${VRF_TABLE}
+	ip li set ${VRF} up
+	ip ro add table ${VRF_TABLE} unreachable default
+	ip -6 ro add table ${VRF_TABLE} unreachable default
+
+	# create test interfaces
+	ip li add ${NETIFS[p1]} type veth peer name ${NETIFS[p2]}
+	ip li add ${NETIFS[p3]} type veth peer name ${NETIFS[p4]}
+	ip li add ${NETIFS[p5]} type veth peer name ${NETIFS[p6]}
+	ip li add ${NETIFS[p7]} type veth peer name ${NETIFS[p8]}
+
+	# enslave vrf interfaces
+	for n in 5 7; do
+		ip li set ${NETIFS[p${n}]} vrf ${VRF}
+	done
+
+	# add addresses
+	for n in 1 3 5 7; do
+		ip li set ${NETIFS[p${n}]} up
+		ip addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]}
+		ip addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]}
+	done
+
+	# move peer interfaces to namespace and add addresses
+	for n in 2 4 6 8; do
+		ip li set ${NETIFS[p${n}]} netns ${PEER_NS} up
+		ip -netns ${PEER_NS} addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]}
+		ip -netns ${PEER_NS} addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]}
+	done
+
+	set +e
+
+	# let DAD complete - assume default of 1 probe
+	sleep 1
+}
+
+cleanup()
+{
+	# make sure we start from a clean slate
+	ip netns del ${PEER_NS} 2>/dev/null
+	for n in 1 3 5 7; do
+		ip link del ${NETIFS[p${n}]} 2>/dev/null
+	done
+	ip link del ${VRF} 2>/dev/null
+	ip ro flush table ${VRF_TABLE}
+	ip -6 ro flush table ${VRF_TABLE}
+}
+
+################################################################################
+# IPv4 tests
+#
+
+run_ip()
+{
+	local table="$1"
+	local prefix="$2"
+	local gw="$3"
+	local dev="$4"
+	local exp_rc="$5"
+	local desc="$6"
+
+	# dev arg may be empty
+	[ -n "${dev}" ] && dev="dev ${dev}"
+
+	run_cmd ip ro add table "${table}" "${prefix}"/32 via "${gw}" "${dev}" onlink
+	log_test $? ${exp_rc} "${desc}"
+}
+
+run_ip_mpath()
+{
+	local table="$1"
+	local prefix="$2"
+	local nh1="$3"
+	local nh2="$4"
+	local exp_rc="$5"
+	local desc="$6"
+
+	# dev arg may be empty
+	[ -n "${dev}" ] && dev="dev ${dev}"
+
+	run_cmd ip ro add table "${table}" "${prefix}"/32 \
+		nexthop via ${nh1} nexthop via ${nh2}
+	log_test $? ${exp_rc} "${desc}"
+}
+
+valid_onlink_ipv4()
+{
+	# - unicast connected, unicast recursive
+	#
+	log_subsection "default VRF - main table"
+
+	run_ip 254 ${TEST_NET4[1]}.1 ${CONGW[1]} ${NETIFS[p1]} 0 "unicast connected"
+	run_ip 254 ${TEST_NET4[1]}.2 ${RECGW4[1]} ${NETIFS[p1]} 0 "unicast recursive"
+
+	log_subsection "VRF ${VRF}"
+
+	run_ip ${VRF_TABLE} ${TEST_NET4[2]}.1 ${CONGW[3]} ${NETIFS[p5]} 0 "unicast connected"
+	run_ip ${VRF_TABLE} ${TEST_NET4[2]}.2 ${RECGW4[2]} ${NETIFS[p5]} 0 "unicast recursive"
+
+	log_subsection "VRF device, PBR table"
+
+	run_ip ${PBR_TABLE} ${TEST_NET4[2]}.3 ${CONGW[3]} ${NETIFS[p5]} 0 "unicast connected"
+	run_ip ${PBR_TABLE} ${TEST_NET4[2]}.4 ${RECGW4[2]} ${NETIFS[p5]} 0 "unicast recursive"
+
+	# multipath version
+	#
+	log_subsection "default VRF - main table - multipath"
+
+	run_ip_mpath 254 ${TEST_NET4[1]}.5 \
+		"${CONGW[1]} dev ${NETIFS[p1]} onlink" \
+		"${CONGW[2]} dev ${NETIFS[p3]} onlink" \
+		0 "unicast connected - multipath"
+
+	run_ip_mpath 254 ${TEST_NET4[1]}.6 \
+		"${RECGW4[1]} dev ${NETIFS[p1]} onlink" \
+		"${RECGW4[2]} dev ${NETIFS[p3]} onlink" \
+		0 "unicast recursive - multipath"
+
+	run_ip_mpath 254 ${TEST_NET4[1]}.7 \
+		"${CONGW[1]} dev ${NETIFS[p1]}"        \
+		"${CONGW[2]} dev ${NETIFS[p3]} onlink" \
+		0 "unicast connected - multipath onlink first only"
+
+	run_ip_mpath 254 ${TEST_NET4[1]}.8 \
+		"${CONGW[1]} dev ${NETIFS[p1]} onlink" \
+		"${CONGW[2]} dev ${NETIFS[p3]}"        \
+		0 "unicast connected - multipath onlink second only"
+}
+
+invalid_onlink_ipv4()
+{
+	run_ip 254 ${TEST_NET4[1]}.11 ${V4ADDRS[p1]} ${NETIFS[p1]} 2 \
+		"Invalid gw - local unicast address"
+
+	run_ip ${VRF_TABLE} ${TEST_NET4[2]}.11 ${V4ADDRS[p5]} ${NETIFS[p5]} 2 \
+		"Invalid gw - local unicast address, VRF"
+
+	run_ip 254 ${TEST_NET4[1]}.101 ${V4ADDRS[p1]} "" 2 "No nexthop device given"
+
+	run_ip 254 ${TEST_NET4[1]}.102 ${V4ADDRS[p3]} ${NETIFS[p1]} 2 \
+		"Gateway resolves to wrong nexthop device"
+
+	run_ip ${VRF_TABLE} ${TEST_NET4[2]}.103 ${V4ADDRS[p7]} ${NETIFS[p5]} 2 \
+		"Gateway resolves to wrong nexthop device - VRF"
+}
+
+################################################################################
+# IPv6 tests
+#
+
+run_ip6()
+{
+	local table="$1"
+	local prefix="$2"
+	local gw="$3"
+	local dev="$4"
+	local exp_rc="$5"
+	local desc="$6"
+
+	# dev arg may be empty
+	[ -n "${dev}" ] && dev="dev ${dev}"
+
+	run_cmd ip -6 ro add table "${table}" "${prefix}"/128 via "${gw}" "${dev}" onlink
+	log_test $? ${exp_rc} "${desc}"
+}
+
+run_ip6_mpath()
+{
+	local table="$1"
+	local prefix="$2"
+	local opts="$3"
+	local nh1="$4"
+	local nh2="$5"
+	local exp_rc="$6"
+	local desc="$7"
+
+	run_cmd ip -6 ro add table "${table}" "${prefix}"/128 "${opts}" \
+		nexthop via ${nh1} nexthop via ${nh2}
+	log_test $? ${exp_rc} "${desc}"
+}
+
+valid_onlink_ipv6()
+{
+	# - unicast connected, unicast recursive, v4-mapped
+	#
+	log_subsection "default VRF - main table"
+
+	run_ip6 254 ${TEST_NET6[1]}::1 ${V6ADDRS[p1]/::*}::64 ${NETIFS[p1]} 0 "unicast connected"
+	run_ip6 254 ${TEST_NET6[1]}::2 ${RECGW6[1]} ${NETIFS[p1]} 0 "unicast recursive"
+	run_ip6 254 ${TEST_NET6[1]}::3 ::ffff:${TEST_NET4IN6[1]} ${NETIFS[p1]} 0 "v4-mapped"
+
+	log_subsection "VRF ${VRF}"
+
+	run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::1 ${V6ADDRS[p5]/::*}::64 ${NETIFS[p5]} 0 "unicast connected"
+	run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::2 ${RECGW6[2]} ${NETIFS[p5]} 0 "unicast recursive"
+	run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::3 ::ffff:${TEST_NET4IN6[2]} ${NETIFS[p5]} 0 "v4-mapped"
+
+	log_subsection "VRF device, PBR table"
+
+	run_ip6 ${PBR_TABLE} ${TEST_NET6[2]}::4 ${V6ADDRS[p5]/::*}::64 ${NETIFS[p5]} 0 "unicast connected"
+	run_ip6 ${PBR_TABLE} ${TEST_NET6[2]}::5 ${RECGW6[2]} ${NETIFS[p5]} 0 "unicast recursive"
+	run_ip6 ${PBR_TABLE} ${TEST_NET6[2]}::6 ::ffff:${TEST_NET4IN6[2]} ${NETIFS[p5]} 0 "v4-mapped"
+
+	# multipath version
+	#
+	log_subsection "default VRF - main table - multipath"
+
+	run_ip6_mpath 254 ${TEST_NET6[1]}::4 "onlink" \
+		"${V6ADDRS[p1]/::*}::64 dev ${NETIFS[p1]}" \
+		"${V6ADDRS[p3]/::*}::64 dev ${NETIFS[p3]}" \
+		0 "unicast connected - multipath onlink"
+
+	run_ip6_mpath 254 ${TEST_NET6[1]}::5 "onlink" \
+		"${RECGW6[1]} dev ${NETIFS[p1]}" \
+		"${RECGW6[2]} dev ${NETIFS[p3]}" \
+		0 "unicast recursive - multipath onlink"
+
+	run_ip6_mpath 254 ${TEST_NET6[1]}::6 "onlink" \
+		"::ffff:${TEST_NET4IN6[1]} dev ${NETIFS[p1]}" \
+		"::ffff:${TEST_NET4IN6[2]} dev ${NETIFS[p3]}" \
+		0 "v4-mapped - multipath onlink"
+
+	run_ip6_mpath 254 ${TEST_NET6[1]}::7 "" \
+		"${V6ADDRS[p1]/::*}::64 dev ${NETIFS[p1]} onlink" \
+		"${V6ADDRS[p3]/::*}::64 dev ${NETIFS[p3]} onlink" \
+		0 "unicast connected - multipath onlink both nexthops"
+
+	run_ip6_mpath 254 ${TEST_NET6[1]}::8 "" \
+		"${V6ADDRS[p1]/::*}::64 dev ${NETIFS[p1]} onlink" \
+		"${V6ADDRS[p3]/::*}::64 dev ${NETIFS[p3]}" \
+		0 "unicast connected - multipath onlink first only"
+
+	run_ip6_mpath 254 ${TEST_NET6[1]}::9 "" \
+		"${V6ADDRS[p1]/::*}::64 dev ${NETIFS[p1]}"        \
+		"${V6ADDRS[p3]/::*}::64 dev ${NETIFS[p3]} onlink" \
+		0 "unicast connected - multipath onlink second only"
+}
+
+invalid_onlink_ipv6()
+{
+	local lladdr
+
+	lladdr=$(get_linklocal ${NETIFS[p1]}) || return 1
+
+	run_ip6 254 ${TEST_NET6[1]}::11 ${V6ADDRS[p1]} ${NETIFS[p1]} 2 \
+		"Invalid gw - local unicast address"
+	run_ip6 254 ${TEST_NET6[1]}::12 ${lladdr} ${NETIFS[p1]} 2 \
+		"Invalid gw - local linklocal address"
+	run_ip6 254 ${TEST_NET6[1]}::12 ${MCAST6} ${NETIFS[p1]} 2 \
+		"Invalid gw - multicast address"
+
+	lladdr=$(get_linklocal ${NETIFS[p5]}) || return 1
+	run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::11 ${V6ADDRS[p5]} ${NETIFS[p5]} 2 \
+		"Invalid gw - local unicast address, VRF"
+	run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::12 ${lladdr} ${NETIFS[p5]} 2 \
+		"Invalid gw - local linklocal address, VRF"
+	run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::12 ${MCAST6} ${NETIFS[p5]} 2 \
+		"Invalid gw - multicast address, VRF"
+
+	run_ip6 254 ${TEST_NET6[1]}::101 ${V6ADDRS[p1]} "" 2 \
+		"No nexthop device given"
+
+	# default VRF validation is done against LOCAL table
+	# run_ip6 254 ${TEST_NET6[1]}::102 ${V6ADDRS[p3]/::[0-9]/::64} ${NETIFS[p1]} 2 \
+	#	"Gateway resolves to wrong nexthop device"
+
+	run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::103 ${V6ADDRS[p7]/::[0-9]/::64} ${NETIFS[p5]} 2 \
+		"Gateway resolves to wrong nexthop device - VRF"
+}
+
+run_onlink_tests()
+{
+	log_section "IPv4 onlink"
+	log_subsection "Valid onlink commands"
+	valid_onlink_ipv4
+	log_subsection "Invalid onlink commands"
+	invalid_onlink_ipv4
+
+	log_section "IPv6 onlink"
+	log_subsection "Valid onlink commands"
+	valid_onlink_ipv6
+	log_subsection "Invalid onlink commands"
+	invalid_onlink_ipv6
+}
+
+################################################################################
+# main
+
+nsuccess=0
+nfail=0
+
+cleanup
+setup
+run_onlink_tests
+cleanup
+
+if [ "$TESTS" != "none" ]; then
+	printf "\nTests passed: %3d\n" ${nsuccess}
+	printf "Tests failed: %3d\n"   ${nfail}
+fi
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index a9154eefb2e2..9164e60d4b66 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -6,154 +6,179 @@
 
 ret=0
 
-check_err()
-{
-	if [ $ret -eq 0 ]; then
-		ret=$1
-	fi
-}
+VERBOSE=${VERBOSE:=0}
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+IP="ip -netns testns"
 
-check_fail()
+log_test()
 {
-	if [ $1 -eq 0 ]; then
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		printf "    TEST: %-60s  [ OK ]\n" "${msg}"
+	else
 		ret=1
+		printf "    TEST: %-60s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+		echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
 	fi
 }
 
-netns_create()
+setup()
 {
-	local testns=$1
+	set -e
+	ip netns add testns
+	$IP link set dev lo up
+
+	$IP link add dummy0 type dummy
+	$IP link set dev dummy0 up
+	$IP address add 198.51.100.1/24 dev dummy0
+	$IP -6 address add 2001:db8:1::1/64 dev dummy0
+	set +e
 
-	ip netns add $testns
-	ip netns exec $testns ip link set dev lo up
 }
 
-fib_unreg_unicast_test()
+cleanup()
 {
-	ret=0
+	$IP link del dev dummy0 &> /dev/null
+	ip netns del testns
+}
 
-	netns_create "testns"
+get_linklocal()
+{
+	local dev=$1
+	local addr
 
-	ip netns exec testns ip link add dummy0 type dummy
-	ip netns exec testns ip link set dev dummy0 up
+	addr=$($IP -6 -br addr show dev ${dev} | \
+	awk '{
+		for (i = 3; i <= NF; ++i) {
+			if ($i ~ /^fe80/)
+				print $i
+		}
+	}'
+	)
+	addr=${addr/\/*}
 
-	ip netns exec testns ip address add 198.51.100.1/24 dev dummy0
-	ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
+	[ -z "$addr" ] && return 1
 
-	ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null
-	check_err $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null
-	check_err $?
+	echo $addr
 
-	ip netns exec testns ip link del dev dummy0
-	check_err $?
+	return 0
+}
 
-	ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null
-	check_fail $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null
-	check_fail $?
+fib_unreg_unicast_test()
+{
+	echo
+	echo "Single path route test"
 
-	ip netns del testns
+	setup
 
-	if [ $ret -ne 0 ]; then
-		echo "FAIL: unicast route test"
-		return 1
-	fi
-	echo "PASS: unicast route test"
+	echo "    Start point"
+	$IP route get fibmatch 198.51.100.2 &> /dev/null
+	log_test $? 0 "IPv4 fibmatch"
+	$IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+	log_test $? 0 "IPv6 fibmatch"
+
+	set -e
+	$IP link del dev dummy0
+	set +e
+
+	echo "    Nexthop device deleted"
+	$IP route get fibmatch 198.51.100.2 &> /dev/null
+	log_test $? 2 "IPv4 fibmatch - no route"
+	$IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+	log_test $? 2 "IPv6 fibmatch - no route"
+
+	cleanup
 }
 
 fib_unreg_multipath_test()
 {
-	ret=0
-
-	netns_create "testns"
 
-	ip netns exec testns ip link add dummy0 type dummy
-	ip netns exec testns ip link set dev dummy0 up
+	echo
+	echo "Multipath route test"
 
-	ip netns exec testns ip link add dummy1 type dummy
-	ip netns exec testns ip link set dev dummy1 up
+	setup
 
-	ip netns exec testns ip address add 198.51.100.1/24 dev dummy0
-	ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
+	set -e
+	$IP link add dummy1 type dummy
+	$IP link set dev dummy1 up
+	$IP address add 192.0.2.1/24 dev dummy1
+	$IP -6 address add 2001:db8:2::1/64 dev dummy1
 
-	ip netns exec testns ip address add 192.0.2.1/24 dev dummy1
-	ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy1
-
-	ip netns exec testns ip route add 203.0.113.0/24 \
+	$IP route add 203.0.113.0/24 \
 		nexthop via 198.51.100.2 dev dummy0 \
 		nexthop via 192.0.2.2 dev dummy1
-	ip netns exec testns ip -6 route add 2001:db8:3::/64 \
+	$IP -6 route add 2001:db8:3::/64 \
 		nexthop via 2001:db8:1::2 dev dummy0 \
 		nexthop via 2001:db8:2::2 dev dummy1
+	set +e
+
+	echo "    Start point"
+	$IP route get fibmatch 203.0.113.1 &> /dev/null
+	log_test $? 0 "IPv4 fibmatch"
+	$IP -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+	log_test $? 0 "IPv6 fibmatch"
 
-	ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null
-	check_err $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null
-	check_err $?
+	set -e
+	$IP link del dev dummy0
+	set +e
 
-	ip netns exec testns ip link del dev dummy0
-	check_err $?
+	echo "    One nexthop device deleted"
+	$IP route get fibmatch 203.0.113.1 &> /dev/null
+	log_test $? 2 "IPv4 - multipath route removed on delete"
 
-	ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null
-	check_fail $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+	$IP -6 route get fibmatch 2001:db8:3::1 &> /dev/null
 	# In IPv6 we do not flush the entire multipath route.
-	check_err $?
+	log_test $? 0 "IPv6 - multipath down to single path"
 
-	ip netns exec testns ip link del dev dummy1
+	set -e
+	$IP link del dev dummy1
+	set +e
 
-	ip netns del testns
+	echo "    Second nexthop device deleted"
+	$IP -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+	log_test $? 2 "IPv6 - no route"
 
-	if [ $ret -ne 0 ]; then
-		echo "FAIL: multipath route test"
-		return 1
-	fi
-	echo "PASS: multipath route test"
+	cleanup
 }
 
 fib_unreg_test()
 {
-	echo "Running netdev unregister tests"
-
 	fib_unreg_unicast_test
 	fib_unreg_multipath_test
 }
 
 fib_down_unicast_test()
 {
-	ret=0
-
-	netns_create "testns"
-
-	ip netns exec testns ip link add dummy0 type dummy
-	ip netns exec testns ip link set dev dummy0 up
+	echo
+	echo "Single path, admin down"
 
-	ip netns exec testns ip address add 198.51.100.1/24 dev dummy0
-	ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
+	setup
 
-	ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null
-	check_err $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null
-	check_err $?
+	echo "    Start point"
+	$IP route get fibmatch 198.51.100.2 &> /dev/null
+	log_test $? 0 "IPv4 fibmatch"
+	$IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+	log_test $? 0 "IPv6 fibmatch"
 
-	ip netns exec testns ip link set dev dummy0 down
-	check_err $?
+	set -e
+	$IP link set dev dummy0 down
+	set +e
 
-	ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null
-	check_fail $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null
-	check_fail $?
+	echo "    Route deleted on down"
+	$IP route get fibmatch 198.51.100.2 &> /dev/null
+	log_test $? 2 "IPv4 fibmatch"
+	$IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+	log_test $? 2 "IPv6 fibmatch"
 
-	ip netns exec testns ip link del dev dummy0
-
-	ip netns del testns
-
-	if [ $ret -ne 0 ]; then
-		echo "FAIL: unicast route test"
-		return 1
-	fi
-	echo "PASS: unicast route test"
+	cleanup
 }
 
 fib_down_multipath_test_do()
@@ -161,251 +186,395 @@ fib_down_multipath_test_do()
 	local down_dev=$1
 	local up_dev=$2
 
-	ip netns exec testns ip route get fibmatch 203.0.113.1 \
+	$IP route get fibmatch 203.0.113.1 \
 		oif $down_dev &> /dev/null
-	check_fail $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 \
+	log_test $? 2 "IPv4 fibmatch on down device"
+	$IP -6 route get fibmatch 2001:db8:3::1 \
 		oif $down_dev &> /dev/null
-	check_fail $?
+	log_test $? 2 "IPv6 fibmatch on down device"
 
-	ip netns exec testns ip route get fibmatch 203.0.113.1 \
+	$IP route get fibmatch 203.0.113.1 \
 		oif $up_dev &> /dev/null
-	check_err $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 \
+	log_test $? 0 "IPv4 fibmatch on up device"
+	$IP -6 route get fibmatch 2001:db8:3::1 \
 		oif $up_dev &> /dev/null
-	check_err $?
+	log_test $? 0 "IPv6 fibmatch on up device"
 
-	ip netns exec testns ip route get fibmatch 203.0.113.1 | \
+	$IP route get fibmatch 203.0.113.1 | \
 		grep $down_dev | grep -q "dead linkdown"
-	check_err $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 | \
+	log_test $? 0 "IPv4 flags on down device"
+	$IP -6 route get fibmatch 2001:db8:3::1 | \
 		grep $down_dev | grep -q "dead linkdown"
-	check_err $?
+	log_test $? 0 "IPv6 flags on down device"
 
-	ip netns exec testns ip route get fibmatch 203.0.113.1 | \
+	$IP route get fibmatch 203.0.113.1 | \
 		grep $up_dev | grep -q "dead linkdown"
-	check_fail $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 | \
+	log_test $? 1 "IPv4 flags on up device"
+	$IP -6 route get fibmatch 2001:db8:3::1 | \
 		grep $up_dev | grep -q "dead linkdown"
-	check_fail $?
+	log_test $? 1 "IPv6 flags on up device"
 }
 
 fib_down_multipath_test()
 {
-	ret=0
-
-	netns_create "testns"
+	echo
+	echo "Admin down multipath"
 
-	ip netns exec testns ip link add dummy0 type dummy
-	ip netns exec testns ip link set dev dummy0 up
+	setup
 
-	ip netns exec testns ip link add dummy1 type dummy
-	ip netns exec testns ip link set dev dummy1 up
+	set -e
+	$IP link add dummy1 type dummy
+	$IP link set dev dummy1 up
 
-	ip netns exec testns ip address add 198.51.100.1/24 dev dummy0
-	ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
+	$IP address add 192.0.2.1/24 dev dummy1
+	$IP -6 address add 2001:db8:2::1/64 dev dummy1
 
-	ip netns exec testns ip address add 192.0.2.1/24 dev dummy1
-	ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy1
-
-	ip netns exec testns ip route add 203.0.113.0/24 \
+	$IP route add 203.0.113.0/24 \
 		nexthop via 198.51.100.2 dev dummy0 \
 		nexthop via 192.0.2.2 dev dummy1
-	ip netns exec testns ip -6 route add 2001:db8:3::/64 \
+	$IP -6 route add 2001:db8:3::/64 \
 		nexthop via 2001:db8:1::2 dev dummy0 \
 		nexthop via 2001:db8:2::2 dev dummy1
+	set +e
+
+	echo "    Verify start point"
+	$IP route get fibmatch 203.0.113.1 &> /dev/null
+	log_test $? 0 "IPv4 fibmatch"
 
-	ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null
-	check_err $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null
-	check_err $?
+	$IP -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+	log_test $? 0 "IPv6 fibmatch"
 
-	ip netns exec testns ip link set dev dummy0 down
-	check_err $?
+	set -e
+	$IP link set dev dummy0 down
+	set +e
 
+	echo "    One device down, one up"
 	fib_down_multipath_test_do "dummy0" "dummy1"
 
-	ip netns exec testns ip link set dev dummy0 up
-	check_err $?
-	ip netns exec testns ip link set dev dummy1 down
-	check_err $?
+	set -e
+	$IP link set dev dummy0 up
+	$IP link set dev dummy1 down
+	set +e
 
+	echo "    Other device down and up"
 	fib_down_multipath_test_do "dummy1" "dummy0"
 
-	ip netns exec testns ip link set dev dummy0 down
-	check_err $?
-
-	ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null
-	check_fail $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null
-	check_fail $?
+	set -e
+	$IP link set dev dummy0 down
+	set +e
 
-	ip netns exec testns ip link del dev dummy1
-	ip netns exec testns ip link del dev dummy0
-
-	ip netns del testns
+	echo "    Both devices down"
+	$IP route get fibmatch 203.0.113.1 &> /dev/null
+	log_test $? 2 "IPv4 fibmatch"
+	$IP -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+	log_test $? 2 "IPv6 fibmatch"
 
-	if [ $ret -ne 0 ]; then
-		echo "FAIL: multipath route test"
-		return 1
-	fi
-	echo "PASS: multipath route test"
+	$IP link del dev dummy1
+	cleanup
 }
 
 fib_down_test()
 {
-	echo "Running netdev down tests"
-
 	fib_down_unicast_test
 	fib_down_multipath_test
 }
 
+# Local routes should not be affected when carrier changes.
 fib_carrier_local_test()
 {
-	ret=0
-
-	# Local routes should not be affected when carrier changes.
-	netns_create "testns"
-
-	ip netns exec testns ip link add dummy0 type dummy
-	ip netns exec testns ip link set dev dummy0 up
+	echo
+	echo "Local carrier tests - single path"
 
-	ip netns exec testns ip link set dev dummy0 carrier on
+	setup
 
-	ip netns exec testns ip address add 198.51.100.1/24 dev dummy0
-	ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
+	set -e
+	$IP link set dev dummy0 carrier on
+	set +e
 
-	ip netns exec testns ip route get fibmatch 198.51.100.1 &> /dev/null
-	check_err $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 &> /dev/null
-	check_err $?
+	echo "    Start point"
+	$IP route get fibmatch 198.51.100.1 &> /dev/null
+	log_test $? 0 "IPv4 fibmatch"
+	$IP -6 route get fibmatch 2001:db8:1::1 &> /dev/null
+	log_test $? 0 "IPv6 fibmatch"
 
-	ip netns exec testns ip route get fibmatch 198.51.100.1 | \
+	$IP route get fibmatch 198.51.100.1 | \
 		grep -q "linkdown"
-	check_fail $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 | \
+	log_test $? 1 "IPv4 - no linkdown flag"
+	$IP -6 route get fibmatch 2001:db8:1::1 | \
 		grep -q "linkdown"
-	check_fail $?
+	log_test $? 1 "IPv6 - no linkdown flag"
 
-	ip netns exec testns ip link set dev dummy0 carrier off
+	set -e
+	$IP link set dev dummy0 carrier off
+	sleep 1
+	set +e
 
-	ip netns exec testns ip route get fibmatch 198.51.100.1 &> /dev/null
-	check_err $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 &> /dev/null
-	check_err $?
+	echo "    Carrier off on nexthop"
+	$IP route get fibmatch 198.51.100.1 &> /dev/null
+	log_test $? 0 "IPv4 fibmatch"
+	$IP -6 route get fibmatch 2001:db8:1::1 &> /dev/null
+	log_test $? 0 "IPv6 fibmatch"
 
-	ip netns exec testns ip route get fibmatch 198.51.100.1 | \
+	$IP route get fibmatch 198.51.100.1 | \
 		grep -q "linkdown"
-	check_fail $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 | \
+	log_test $? 1 "IPv4 - linkdown flag set"
+	$IP -6 route get fibmatch 2001:db8:1::1 | \
 		grep -q "linkdown"
-	check_fail $?
+	log_test $? 1 "IPv6 - linkdown flag set"
 
-	ip netns exec testns ip address add 192.0.2.1/24 dev dummy0
-	ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy0
+	set -e
+	$IP address add 192.0.2.1/24 dev dummy0
+	$IP -6 address add 2001:db8:2::1/64 dev dummy0
+	set +e
 
-	ip netns exec testns ip route get fibmatch 192.0.2.1 &> /dev/null
-	check_err $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:2::1 &> /dev/null
-	check_err $?
+	echo "    Route to local address with carrier down"
+	$IP route get fibmatch 192.0.2.1 &> /dev/null
+	log_test $? 0 "IPv4 fibmatch"
+	$IP -6 route get fibmatch 2001:db8:2::1 &> /dev/null
+	log_test $? 0 "IPv6 fibmatch"
 
-	ip netns exec testns ip route get fibmatch 192.0.2.1 | \
+	$IP route get fibmatch 192.0.2.1 | \
 		grep -q "linkdown"
-	check_fail $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:2::1 | \
+	log_test $? 1 "IPv4 linkdown flag set"
+	$IP -6 route get fibmatch 2001:db8:2::1 | \
 		grep -q "linkdown"
-	check_fail $?
+	log_test $? 1 "IPv6 linkdown flag set"
 
-	ip netns exec testns ip link del dev dummy0
-
-	ip netns del testns
-
-	if [ $ret -ne 0 ]; then
-		echo "FAIL: local route carrier test"
-		return 1
-	fi
-	echo "PASS: local route carrier test"
+	cleanup
 }
 
 fib_carrier_unicast_test()
 {
 	ret=0
 
-	netns_create "testns"
+	echo
+	echo "Single path route carrier test"
 
-	ip netns exec testns ip link add dummy0 type dummy
-	ip netns exec testns ip link set dev dummy0 up
+	setup
 
-	ip netns exec testns ip link set dev dummy0 carrier on
+	set -e
+	$IP link set dev dummy0 carrier on
+	set +e
 
-	ip netns exec testns ip address add 198.51.100.1/24 dev dummy0
-	ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
+	echo "    Start point"
+	$IP route get fibmatch 198.51.100.2 &> /dev/null
+	log_test $? 0 "IPv4 fibmatch"
+	$IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+	log_test $? 0 "IPv6 fibmatch"
 
-	ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null
-	check_err $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null
-	check_err $?
-
-	ip netns exec testns ip route get fibmatch 198.51.100.2 | \
+	$IP route get fibmatch 198.51.100.2 | \
 		grep -q "linkdown"
-	check_fail $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 | \
+	log_test $? 1 "IPv4 no linkdown flag"
+	$IP -6 route get fibmatch 2001:db8:1::2 | \
 		grep -q "linkdown"
-	check_fail $?
+	log_test $? 1 "IPv6 no linkdown flag"
 
-	ip netns exec testns ip link set dev dummy0 carrier off
+	set -e
+	$IP link set dev dummy0 carrier off
+	set +e
 
-	ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null
-	check_err $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null
-	check_err $?
+	echo "    Carrier down"
+	$IP route get fibmatch 198.51.100.2 &> /dev/null
+	log_test $? 0 "IPv4 fibmatch"
+	$IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+	log_test $? 0 "IPv6 fibmatch"
 
-	ip netns exec testns ip route get fibmatch 198.51.100.2 | \
+	$IP route get fibmatch 198.51.100.2 | \
 		grep -q "linkdown"
-	check_err $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 | \
+	log_test $? 0 "IPv4 linkdown flag set"
+	$IP -6 route get fibmatch 2001:db8:1::2 | \
 		grep -q "linkdown"
-	check_err $?
+	log_test $? 0 "IPv6 linkdown flag set"
 
-	ip netns exec testns ip address add 192.0.2.1/24 dev dummy0
-	ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy0
+	set -e
+	$IP address add 192.0.2.1/24 dev dummy0
+	$IP -6 address add 2001:db8:2::1/64 dev dummy0
+	set +e
 
-	ip netns exec testns ip route get fibmatch 192.0.2.2 &> /dev/null
-	check_err $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:2::2 &> /dev/null
-	check_err $?
+	echo "    Second address added with carrier down"
+	$IP route get fibmatch 192.0.2.2 &> /dev/null
+	log_test $? 0 "IPv4 fibmatch"
+	$IP -6 route get fibmatch 2001:db8:2::2 &> /dev/null
+	log_test $? 0 "IPv6 fibmatch"
 
-	ip netns exec testns ip route get fibmatch 192.0.2.2 | \
+	$IP route get fibmatch 192.0.2.2 | \
 		grep -q "linkdown"
-	check_err $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:2::2 | \
+	log_test $? 0 "IPv4 linkdown flag set"
+	$IP -6 route get fibmatch 2001:db8:2::2 | \
 		grep -q "linkdown"
-	check_err $?
+	log_test $? 0 "IPv6 linkdown flag set"
 
-	ip netns exec testns ip link del dev dummy0
+	cleanup
+}
 
-	ip netns del testns
+fib_carrier_test()
+{
+	fib_carrier_local_test
+	fib_carrier_unicast_test
+}
 
-	if [ $ret -ne 0 ]; then
-		echo "FAIL: unicast route carrier test"
-		return 1
+################################################################################
+# Tests on nexthop spec
+
+# run 'ip route add' with given spec
+add_rt()
+{
+	local desc="$1"
+	local erc=$2
+	local vrf=$3
+	local pfx=$4
+	local gw=$5
+	local dev=$6
+	local cmd out rc
+
+	[ "$vrf" = "-" ] && vrf="default"
+	[ -n "$gw" ] && gw="via $gw"
+	[ -n "$dev" ] && dev="dev $dev"
+
+	cmd="$IP route add vrf $vrf $pfx $gw $dev"
+	if [ "$VERBOSE" = "1" ]; then
+		printf "\n    COMMAND: $cmd\n"
+	fi
+
+	out=$(eval $cmd 2>&1)
+	rc=$?
+	if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+		echo "    $out"
 	fi
-	echo "PASS: unicast route carrier test"
+	log_test $rc $erc "$desc"
 }
 
-fib_carrier_test()
+fib4_nexthop()
 {
-	echo "Running netdev carrier change tests"
+	echo
+	echo "IPv4 nexthop tests"
 
-	fib_carrier_local_test
-	fib_carrier_unicast_test
+	echo "<<< write me >>>"
 }
 
+fib6_nexthop()
+{
+	local lldummy=$(get_linklocal dummy0)
+	local llv1=$(get_linklocal dummy0)
+
+	if [ -z "$lldummy" ]; then
+		echo "Failed to get linklocal address for dummy0"
+		return 1
+	fi
+	if [ -z "$llv1" ]; then
+		echo "Failed to get linklocal address for veth1"
+		return 1
+	fi
+
+	echo
+	echo "IPv6 nexthop tests"
+
+	add_rt "Directly connected nexthop, unicast address" 0 \
+		- 2001:db8:101::/64 2001:db8:1::2
+	add_rt "Directly connected nexthop, unicast address with device" 0 \
+		- 2001:db8:102::/64 2001:db8:1::2 "dummy0"
+	add_rt "Gateway is linklocal address" 0 \
+		- 2001:db8:103::1/64 $llv1 "veth0"
+
+	# fails because LL address requires a device
+	add_rt "Gateway is linklocal address, no device" 2 \
+		- 2001:db8:104::1/64 $llv1
+
+	# local address can not be a gateway
+	add_rt "Gateway can not be local unicast address" 2 \
+		- 2001:db8:105::/64 2001:db8:1::1
+	add_rt "Gateway can not be local unicast address, with device" 2 \
+		- 2001:db8:106::/64 2001:db8:1::1 "dummy0"
+	add_rt "Gateway can not be a local linklocal address" 2 \
+		- 2001:db8:107::1/64 $lldummy "dummy0"
+
+	# VRF tests
+	add_rt "Gateway can be local address in a VRF" 0 \
+		- 2001:db8:108::/64 2001:db8:51::2
+	add_rt "Gateway can be local address in a VRF, with device" 0 \
+		- 2001:db8:109::/64 2001:db8:51::2 "veth0"
+	add_rt "Gateway can be local linklocal address in a VRF" 0 \
+		- 2001:db8:110::1/64 $llv1 "veth0"
+
+	add_rt "Redirect to VRF lookup" 0 \
+		- 2001:db8:111::/64 "" "red"
+
+	add_rt "VRF route, gateway can be local address in default VRF" 0 \
+		red 2001:db8:112::/64 2001:db8:51::1
+
+	# local address in same VRF fails
+	add_rt "VRF route, gateway can not be a local address" 2 \
+		red 2001:db8:113::1/64 2001:db8:2::1
+	add_rt "VRF route, gateway can not be a local addr with device" 2 \
+		red 2001:db8:114::1/64 2001:db8:2::1 "dummy1"
+}
+
+# Default VRF:
+#   dummy0 - 198.51.100.1/24 2001:db8:1::1/64
+#   veth0  - 192.0.2.1/24    2001:db8:51::1/64
+#
+# VRF red:
+#   dummy1 - 192.168.2.1/24 2001:db8:2::1/64
+#   veth1  - 192.0.2.2/24   2001:db8:51::2/64
+#
+#  [ dummy0   veth0 ]--[ veth1   dummy1 ]
+
+fib_nexthop_test()
+{
+	setup
+
+	set -e
+
+	$IP -4 rule add pref 32765 table local
+	$IP -4 rule del pref 0
+	$IP -6 rule add pref 32765 table local
+	$IP -6 rule del pref 0
+
+	$IP link add red type vrf table 1
+	$IP link set red up
+	$IP -4 route add vrf red unreachable default metric 4278198272
+	$IP -6 route add vrf red unreachable default metric 4278198272
+
+	$IP link add veth0 type veth peer name veth1
+	$IP link set dev veth0 up
+	$IP address add 192.0.2.1/24 dev veth0
+	$IP -6 address add 2001:db8:51::1/64 dev veth0
+
+	$IP link set dev veth1 vrf red up
+	$IP address add 192.0.2.2/24 dev veth1
+	$IP -6 address add 2001:db8:51::2/64 dev veth1
+
+	$IP link add dummy1 type dummy
+	$IP link set dev dummy1 vrf red up
+	$IP address add 192.168.2.1/24 dev dummy1
+	$IP -6 address add 2001:db8:2::1/64 dev dummy1
+	set +e
+
+	sleep 1
+	fib4_nexthop
+	fib6_nexthop
+
+	(
+	$IP link del dev dummy1
+	$IP link del veth0
+	$IP link del red
+	) 2>/dev/null
+	cleanup
+}
+
+################################################################################
+#
+
 fib_test()
 {
-	fib_unreg_test
-	fib_down_test
-	fib_carrier_test
+	if [ -n "$TEST" ]; then
+		eval $TEST
+	else
+		fib_unreg_test
+		fib_down_test
+		fib_carrier_test
+		fib_nexthop_test
+	fi
 }
 
 if [ "$(id -u)" -ne 0 ];then
@@ -424,6 +593,9 @@ if [ $? -ne 0 ]; then
 	exit 0
 fi
 
+# start clean
+cleanup &> /dev/null
+
 fib_test
 
 exit $ret
diff --git a/tools/testing/selftests/net/forwarding/.gitignore b/tools/testing/selftests/net/forwarding/.gitignore
new file mode 100644
index 000000000000..a793eef5b876
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/.gitignore
@@ -0,0 +1 @@
+forwarding.config
diff --git a/tools/testing/selftests/net/forwarding/README b/tools/testing/selftests/net/forwarding/README
new file mode 100644
index 000000000000..4a0964c42860
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/README
@@ -0,0 +1,56 @@
+Motivation
+==========
+
+One of the nice things about network namespaces is that they allow one
+to easily create and test complex environments.
+
+Unfortunately, these namespaces can not be used with actual switching
+ASICs, as their ports can not be migrated to other network namespaces
+(NETIF_F_NETNS_LOCAL) and most of them probably do not support the
+L1-separation provided by namespaces.
+
+However, a similar kind of flexibility can be achieved by using VRFs and
+by looping the switch ports together. For example:
+
+                             br0
+                              +
+               vrf-h1         |           vrf-h2
+                 +        +---+----+        +
+                 |        |        |        |
+    192.0.2.1/24 +        +        +        + 192.0.2.2/24
+               swp1     swp2     swp3     swp4
+                 +        +        +        +
+                 |        |        |        |
+                 +--------+        +--------+
+
+The VRFs act as lightweight namespaces representing hosts connected to
+the switch.
+
+This approach for testing switch ASICs has several advantages over the
+traditional method that requires multiple physical machines, to name a
+few:
+
+1. Only the device under test (DUT) is being tested without noise from
+other system.
+
+2. Ability to easily provision complex topologies. Testing bridging
+between 4-ports LAGs or 8-way ECMP requires many physical links that are
+not always available. With the VRF-based approach one merely needs to
+loopback more ports.
+
+These tests are written with switch ASICs in mind, but they can be run
+on any Linux box using veth pairs to emulate physical loopbacks.
+
+Guidelines for Writing Tests
+============================
+
+o Where possible, reuse an existing topology for different tests instead
+  of recreating the same topology.
+o Where possible, IPv6 and IPv4 addresses shall conform to RFC 3849 and
+  RFC 5737, respectively.
+o Where possible, tests shall be written so that they can be reused by
+  multiple topologies and added to lib.sh.
+o Checks shall be added to lib.sh for any external dependencies.
+o Code shall be checked using ShellCheck [1] prior to submission.
+
+1. https://www.shellcheck.net/
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
new file mode 100755
index 000000000000..75d922438bc9
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
@@ -0,0 +1,88 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=4
+CHECK_TC="yes"
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+switch_create()
+{
+	# 10 Seconds ageing time.
+	ip link add dev br0 type bridge vlan_filtering 1 ageing_time 1000 \
+		mcast_snooping 0
+
+	ip link set dev $swp1 master br0
+	ip link set dev $swp2 master br0
+
+	ip link set dev br0 up
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+}
+
+switch_destroy()
+{
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+
+	ip link del dev br0
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+ping_test $h1 192.0.2.2
+ping6_test $h1 2001:db8:1::2
+learning_test "br0" $swp1 $h1 $h2
+flood_test $swp2 $h1 $h2
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
new file mode 100755
index 000000000000..1cddf06f691d
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
@@ -0,0 +1,86 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+switch_create()
+{
+	# 10 Seconds ageing time.
+	ip link add dev br0 type bridge ageing_time 1000 mcast_snooping 0
+
+	ip link set dev $swp1 master br0
+	ip link set dev $swp2 master br0
+
+	ip link set dev br0 up
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+}
+
+switch_destroy()
+{
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+
+	ip link del dev br0
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+ping_test $h1 192.0.2.2
+ping6_test $h1 2001:db8:1::2
+learning_test "br0" $swp1 $h1 $h2
+flood_test $swp2 $h1 $h2
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config
new file mode 100644
index 000000000000..5cd2aed97958
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/config
@@ -0,0 +1,12 @@
+CONFIG_BRIDGE=m
+CONFIG_VLAN_8021Q=m
+CONFIG_BRIDGE_VLAN_FILTERING=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_NET_VRF=m
+CONFIG_BPF_SYSCALL=y
+CONFIG_CGROUP_BPF=y
+CONFIG_NET_CLS_FLOWER=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_VETH=m
diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample
new file mode 100644
index 000000000000..e819d049d9ce
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample
@@ -0,0 +1,35 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+##############################################################################
+# Topology description. p1 looped back to p2, p3 to p4 and so on.
+declare -A NETIFS
+
+NETIFS[p1]=veth0
+NETIFS[p2]=veth1
+NETIFS[p3]=veth2
+NETIFS[p4]=veth3
+NETIFS[p5]=veth4
+NETIFS[p6]=veth5
+NETIFS[p7]=veth6
+NETIFS[p8]=veth7
+
+##############################################################################
+# Defines
+
+# IPv4 ping utility name
+PING=ping
+# IPv6 ping utility name. Some distributions use 'ping' for IPv6.
+PING6=ping6
+# Packet generator. Some distributions use 'mz'.
+MZ=mausezahn
+# Time to wait after interfaces participating in the test are all UP
+WAIT_TIME=5
+# Whether to pause on failure or not.
+PAUSE_ON_FAIL=no
+# Whether to pause on cleanup or not.
+PAUSE_ON_CLEANUP=no
+# Type of network interface to create
+NETIF_TYPE=veth
+# Whether to create virtual interfaces (veth) or not
+NETIF_CREATE=yes
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
new file mode 100644
index 000000000000..1ac6c62271f3
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -0,0 +1,577 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+##############################################################################
+# Defines
+
+# Can be overridden by the configuration file.
+PING=${PING:=ping}
+PING6=${PING6:=ping6}
+MZ=${MZ:=mausezahn}
+WAIT_TIME=${WAIT_TIME:=5}
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+PAUSE_ON_CLEANUP=${PAUSE_ON_CLEANUP:=no}
+NETIF_TYPE=${NETIF_TYPE:=veth}
+NETIF_CREATE=${NETIF_CREATE:=yes}
+
+if [[ -f forwarding.config ]]; then
+	source forwarding.config
+fi
+
+##############################################################################
+# Sanity checks
+
+check_tc_version()
+{
+	tc -j &> /dev/null
+	if [[ $? -ne 0 ]]; then
+		echo "SKIP: iproute2 too old; tc is missing JSON support"
+		exit 1
+	fi
+
+	tc filter help 2>&1 | grep block &> /dev/null
+	if [[ $? -ne 0 ]]; then
+		echo "SKIP: iproute2 too old; tc is missing shared block support"
+		exit 1
+	fi
+}
+
+if [[ "$(id -u)" -ne 0 ]]; then
+	echo "SKIP: need root privileges"
+	exit 0
+fi
+
+if [[ "$CHECK_TC" = "yes" ]]; then
+	check_tc_version
+fi
+
+if [[ ! -x "$(command -v jq)" ]]; then
+	echo "SKIP: jq not installed"
+	exit 1
+fi
+
+if [[ ! -x "$(command -v $MZ)" ]]; then
+	echo "SKIP: $MZ not installed"
+	exit 1
+fi
+
+if [[ ! -v NUM_NETIFS ]]; then
+	echo "SKIP: importer does not define \"NUM_NETIFS\""
+	exit 1
+fi
+
+##############################################################################
+# Command line options handling
+
+count=0
+
+while [[ $# -gt 0 ]]; do
+	if [[ "$count" -eq "0" ]]; then
+		unset NETIFS
+		declare -A NETIFS
+	fi
+	count=$((count + 1))
+	NETIFS[p$count]="$1"
+	shift
+done
+
+##############################################################################
+# Network interfaces configuration
+
+create_netif_veth()
+{
+	local i
+
+	for i in $(eval echo {1..$NUM_NETIFS}); do
+		local j=$((i+1))
+
+		ip link show dev ${NETIFS[p$i]} &> /dev/null
+		if [[ $? -ne 0 ]]; then
+			ip link add ${NETIFS[p$i]} type veth \
+				peer name ${NETIFS[p$j]}
+			if [[ $? -ne 0 ]]; then
+				echo "Failed to create netif"
+				exit 1
+			fi
+		fi
+		i=$j
+	done
+}
+
+create_netif()
+{
+	case "$NETIF_TYPE" in
+	veth) create_netif_veth
+	      ;;
+	*) echo "Can not create interfaces of type \'$NETIF_TYPE\'"
+	   exit 1
+	   ;;
+	esac
+}
+
+if [[ "$NETIF_CREATE" = "yes" ]]; then
+	create_netif
+fi
+
+for i in $(eval echo {1..$NUM_NETIFS}); do
+	ip link show dev ${NETIFS[p$i]} &> /dev/null
+	if [[ $? -ne 0 ]]; then
+		echo "SKIP: could not find all required interfaces"
+		exit 1
+	fi
+done
+
+##############################################################################
+# Helpers
+
+# Exit status to return at the end. Set in case one of the tests fails.
+EXIT_STATUS=0
+# Per-test return value. Clear at the beginning of each test.
+RET=0
+
+check_err()
+{
+	local err=$1
+	local msg=$2
+
+	if [[ $RET -eq 0 && $err -ne 0 ]]; then
+		RET=$err
+		retmsg=$msg
+	fi
+}
+
+check_fail()
+{
+	local err=$1
+	local msg=$2
+
+	if [[ $RET -eq 0 && $err -eq 0 ]]; then
+		RET=1
+		retmsg=$msg
+	fi
+}
+
+log_test()
+{
+	local test_name=$1
+	local opt_str=$2
+
+	if [[ $# -eq 2 ]]; then
+		opt_str="($opt_str)"
+	fi
+
+	if [[ $RET -ne 0 ]]; then
+		EXIT_STATUS=1
+		printf "TEST: %-60s  [FAIL]\n" "$test_name $opt_str"
+		if [[ ! -z "$retmsg" ]]; then
+			printf "\t%s\n" "$retmsg"
+		fi
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			echo "Hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+		return 1
+	fi
+
+	printf "TEST: %-60s  [PASS]\n" "$test_name $opt_str"
+	return 0
+}
+
+log_info()
+{
+	local msg=$1
+
+	echo "INFO: $msg"
+}
+
+setup_wait()
+{
+	for i in $(eval echo {1..$NUM_NETIFS}); do
+		while true; do
+			ip link show dev ${NETIFS[p$i]} up \
+				| grep 'state UP' &> /dev/null
+			if [[ $? -ne 0 ]]; then
+				sleep 1
+			else
+				break
+			fi
+		done
+	done
+
+	# Make sure links are ready.
+	sleep $WAIT_TIME
+}
+
+pre_cleanup()
+{
+	if [ "${PAUSE_ON_CLEANUP}" = "yes" ]; then
+		echo "Pausing before cleanup, hit any key to continue"
+		read
+	fi
+}
+
+vrf_prepare()
+{
+	ip -4 rule add pref 32765 table local
+	ip -4 rule del pref 0
+	ip -6 rule add pref 32765 table local
+	ip -6 rule del pref 0
+}
+
+vrf_cleanup()
+{
+	ip -6 rule add pref 0 table local
+	ip -6 rule del pref 32765
+	ip -4 rule add pref 0 table local
+	ip -4 rule del pref 32765
+}
+
+__last_tb_id=0
+declare -A __TB_IDS
+
+__vrf_td_id_assign()
+{
+	local vrf_name=$1
+
+	__last_tb_id=$((__last_tb_id + 1))
+	__TB_IDS[$vrf_name]=$__last_tb_id
+	return $__last_tb_id
+}
+
+__vrf_td_id_lookup()
+{
+	local vrf_name=$1
+
+	return ${__TB_IDS[$vrf_name]}
+}
+
+vrf_create()
+{
+	local vrf_name=$1
+	local tb_id
+
+	__vrf_td_id_assign $vrf_name
+	tb_id=$?
+
+	ip link add dev $vrf_name type vrf table $tb_id
+	ip -4 route add table $tb_id unreachable default metric 4278198272
+	ip -6 route add table $tb_id unreachable default metric 4278198272
+}
+
+vrf_destroy()
+{
+	local vrf_name=$1
+	local tb_id
+
+	__vrf_td_id_lookup $vrf_name
+	tb_id=$?
+
+	ip -6 route del table $tb_id unreachable default metric 4278198272
+	ip -4 route del table $tb_id unreachable default metric 4278198272
+	ip link del dev $vrf_name
+}
+
+__addr_add_del()
+{
+	local if_name=$1
+	local add_del=$2
+	local array
+
+	shift
+	shift
+	array=("${@}")
+
+	for addrstr in "${array[@]}"; do
+		ip address $add_del $addrstr dev $if_name
+	done
+}
+
+simple_if_init()
+{
+	local if_name=$1
+	local vrf_name
+	local array
+
+	shift
+	vrf_name=v$if_name
+	array=("${@}")
+
+	vrf_create $vrf_name
+	ip link set dev $if_name master $vrf_name
+	ip link set dev $vrf_name up
+	ip link set dev $if_name up
+
+	__addr_add_del $if_name add "${array[@]}"
+}
+
+simple_if_fini()
+{
+	local if_name=$1
+	local vrf_name
+	local array
+
+	shift
+	vrf_name=v$if_name
+	array=("${@}")
+
+	__addr_add_del $if_name del "${array[@]}"
+
+	ip link set dev $if_name down
+	vrf_destroy $vrf_name
+}
+
+master_name_get()
+{
+	local if_name=$1
+
+	ip -j link show dev $if_name | jq -r '.[]["master"]'
+}
+
+link_stats_tx_packets_get()
+{
+       local if_name=$1
+
+       ip -j -s link show dev $if_name | jq '.[]["stats64"]["tx"]["packets"]'
+}
+
+mac_get()
+{
+	local if_name=$1
+
+	ip -j link show dev $if_name | jq -r '.[]["address"]'
+}
+
+bridge_ageing_time_get()
+{
+	local bridge=$1
+	local ageing_time
+
+	# Need to divide by 100 to convert to seconds.
+	ageing_time=$(ip -j -d link show dev $bridge \
+		      | jq '.[]["linkinfo"]["info_data"]["ageing_time"]')
+	echo $((ageing_time / 100))
+}
+
+forwarding_enable()
+{
+       ipv4_fwd=$(sysctl -n net.ipv4.conf.all.forwarding)
+       ipv6_fwd=$(sysctl -n net.ipv6.conf.all.forwarding)
+
+       sysctl -q -w net.ipv4.conf.all.forwarding=1
+       sysctl -q -w net.ipv6.conf.all.forwarding=1
+}
+
+forwarding_restore()
+{
+       sysctl -q -w net.ipv6.conf.all.forwarding=$ipv6_fwd
+       sysctl -q -w net.ipv4.conf.all.forwarding=$ipv4_fwd
+}
+
+tc_offload_check()
+{
+	for i in $(eval echo {1..$NUM_NETIFS}); do
+		ethtool -k ${NETIFS[p$i]} \
+			| grep "hw-tc-offload: on" &> /dev/null
+		if [[ $? -ne 0 ]]; then
+			return 1
+		fi
+	done
+
+	return 0
+}
+
+##############################################################################
+# Tests
+
+ping_test()
+{
+	local if_name=$1
+	local dip=$2
+	local vrf_name
+
+	RET=0
+
+	vrf_name=$(master_name_get $if_name)
+	ip vrf exec $vrf_name $PING $dip -c 10 -i 0.1 -w 2 &> /dev/null
+	check_err $?
+	log_test "ping"
+}
+
+ping6_test()
+{
+	local if_name=$1
+	local dip=$2
+	local vrf_name
+
+	RET=0
+
+	vrf_name=$(master_name_get $if_name)
+	ip vrf exec $vrf_name $PING6 $dip -c 10 -i 0.1 -w 2 &> /dev/null
+	check_err $?
+	log_test "ping6"
+}
+
+learning_test()
+{
+	local bridge=$1
+	local br_port1=$2	# Connected to `host1_if`.
+	local host1_if=$3
+	local host2_if=$4
+	local mac=de:ad:be:ef:13:37
+	local ageing_time
+
+	RET=0
+
+	bridge -j fdb show br $bridge brport $br_port1 \
+		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+	check_fail $? "Found FDB record when should not"
+
+	# Disable unknown unicast flooding on `br_port1` to make sure
+	# packets are only forwarded through the port after a matching
+	# FDB entry was installed.
+	bridge link set dev $br_port1 flood off
+
+	tc qdisc add dev $host1_if ingress
+	tc filter add dev $host1_if ingress protocol ip pref 1 handle 101 \
+		flower dst_mac $mac action drop
+
+	$MZ $host2_if -c 1 -p 64 -b $mac -t ip -q
+	sleep 1
+
+	tc -j -s filter show dev $host1_if ingress \
+		| jq -e ".[] | select(.options.handle == 101) \
+		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
+	check_fail $? "Packet reached second host when should not"
+
+	$MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
+	sleep 1
+
+	bridge -j fdb show br $bridge brport $br_port1 \
+		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+	check_err $? "Did not find FDB record when should"
+
+	$MZ $host2_if -c 1 -p 64 -b $mac -t ip -q
+	sleep 1
+
+	tc -j -s filter show dev $host1_if ingress \
+		| jq -e ".[] | select(.options.handle == 101) \
+		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
+	check_err $? "Packet did not reach second host when should"
+
+	# Wait for 10 seconds after the ageing time to make sure FDB
+	# record was aged-out.
+	ageing_time=$(bridge_ageing_time_get $bridge)
+	sleep $((ageing_time + 10))
+
+	bridge -j fdb show br $bridge brport $br_port1 \
+		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+	check_fail $? "Found FDB record when should not"
+
+	bridge link set dev $br_port1 learning off
+
+	$MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
+	sleep 1
+
+	bridge -j fdb show br $bridge brport $br_port1 \
+		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+	check_fail $? "Found FDB record when should not"
+
+	bridge link set dev $br_port1 learning on
+
+	tc filter del dev $host1_if ingress protocol ip pref 1 handle 101 flower
+	tc qdisc del dev $host1_if ingress
+
+	bridge link set dev $br_port1 flood on
+
+	log_test "FDB learning"
+}
+
+flood_test_do()
+{
+	local should_flood=$1
+	local mac=$2
+	local ip=$3
+	local host1_if=$4
+	local host2_if=$5
+	local err=0
+
+	# Add an ACL on `host2_if` which will tell us whether the packet
+	# was flooded to it or not.
+	tc qdisc add dev $host2_if ingress
+	tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \
+		flower dst_mac $mac action drop
+
+	$MZ $host1_if -c 1 -p 64 -b $mac -B $ip -t ip -q
+	sleep 1
+
+	tc -j -s filter show dev $host2_if ingress \
+		| jq -e ".[] | select(.options.handle == 101) \
+		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
+	if [[ $? -ne 0 && $should_flood == "true" || \
+	      $? -eq 0 && $should_flood == "false" ]]; then
+		err=1
+	fi
+
+	tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower
+	tc qdisc del dev $host2_if ingress
+
+	return $err
+}
+
+flood_unicast_test()
+{
+	local br_port=$1
+	local host1_if=$2
+	local host2_if=$3
+	local mac=de:ad:be:ef:13:37
+	local ip=192.0.2.100
+
+	RET=0
+
+	bridge link set dev $br_port flood off
+
+	flood_test_do false $mac $ip $host1_if $host2_if
+	check_err $? "Packet flooded when should not"
+
+	bridge link set dev $br_port flood on
+
+	flood_test_do true $mac $ip $host1_if $host2_if
+	check_err $? "Packet was not flooded when should"
+
+	log_test "Unknown unicast flood"
+}
+
+flood_multicast_test()
+{
+	local br_port=$1
+	local host1_if=$2
+	local host2_if=$3
+	local mac=01:00:5e:00:00:01
+	local ip=239.0.0.1
+
+	RET=0
+
+	bridge link set dev $br_port mcast_flood off
+
+	flood_test_do false $mac $ip $host1_if $host2_if
+	check_err $? "Packet flooded when should not"
+
+	bridge link set dev $br_port mcast_flood on
+
+	flood_test_do true $mac $ip $host1_if $host2_if
+	check_err $? "Packet was not flooded when should"
+
+	log_test "Unregistered multicast flood"
+}
+
+flood_test()
+{
+	# `br_port` is connected to `host2_if`
+	local br_port=$1
+	local host1_if=$2
+	local host2_if=$3
+
+	flood_unicast_test $br_port $host1_if $host2_if
+	flood_multicast_test $br_port $host1_if $host2_if
+}
diff --git a/tools/testing/selftests/net/forwarding/router.sh b/tools/testing/selftests/net/forwarding/router.sh
new file mode 100755
index 000000000000..cc6a14abfa87
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router.sh
@@ -0,0 +1,125 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+	vrf_create "vrf-h1"
+	ip link set dev $h1 master vrf-h1
+
+	ip link set dev vrf-h1 up
+	ip link set dev $h1 up
+
+	ip address add 192.0.2.2/24 dev $h1
+	ip address add 2001:db8:1::2/64 dev $h1
+
+	ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+	ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+	ip route del 2001:db8:2::/64 vrf vrf-h1
+	ip route del 198.51.100.0/24 vrf vrf-h1
+
+	ip address del 2001:db8:1::2/64 dev $h1
+	ip address del 192.0.2.2/24 dev $h1
+
+	ip link set dev $h1 down
+	vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+	vrf_create "vrf-h2"
+	ip link set dev $h2 master vrf-h2
+
+	ip link set dev vrf-h2 up
+	ip link set dev $h2 up
+
+	ip address add 198.51.100.2/24 dev $h2
+	ip address add 2001:db8:2::2/64 dev $h2
+
+	ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+	ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+	ip route del 2001:db8:1::/64 vrf vrf-h2
+	ip route del 192.0.2.0/24 vrf vrf-h2
+
+	ip address del 2001:db8:2::2/64 dev $h2
+	ip address del 198.51.100.2/24 dev $h2
+
+	ip link set dev $h2 down
+	vrf_destroy "vrf-h2"
+}
+
+router_create()
+{
+	ip link set dev $rp1 up
+	ip link set dev $rp2 up
+
+	ip address add 192.0.2.1/24 dev $rp1
+	ip address add 2001:db8:1::1/64 dev $rp1
+
+	ip address add 198.51.100.1/24 dev $rp2
+	ip address add 2001:db8:2::1/64 dev $rp2
+}
+
+router_destroy()
+{
+	ip address del 2001:db8:2::1/64 dev $rp2
+	ip address del 198.51.100.1/24 dev $rp2
+
+	ip address del 2001:db8:1::1/64 dev $rp1
+	ip address del 192.0.2.1/24 dev $rp1
+
+	ip link set dev $rp2 down
+	ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp1=${NETIFS[p2]}
+
+	rp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	router_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	router_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+ping_test $h1 198.51.100.2
+ping6_test $h1 2001:db8:2::2
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh
new file mode 100755
index 000000000000..3bc351008db6
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_multipath.sh
@@ -0,0 +1,376 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=8
+source lib.sh
+
+h1_create()
+{
+	vrf_create "vrf-h1"
+	ip link set dev $h1 master vrf-h1
+
+	ip link set dev vrf-h1 up
+	ip link set dev $h1 up
+
+	ip address add 192.0.2.2/24 dev $h1
+	ip address add 2001:db8:1::2/64 dev $h1
+
+	ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+	ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+	ip route del 2001:db8:2::/64 vrf vrf-h1
+	ip route del 198.51.100.0/24 vrf vrf-h1
+
+	ip address del 2001:db8:1::2/64 dev $h1
+	ip address del 192.0.2.2/24 dev $h1
+
+	ip link set dev $h1 down
+	vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+	vrf_create "vrf-h2"
+	ip link set dev $h2 master vrf-h2
+
+	ip link set dev vrf-h2 up
+	ip link set dev $h2 up
+
+	ip address add 198.51.100.2/24 dev $h2
+	ip address add 2001:db8:2::2/64 dev $h2
+
+	ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+	ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+	ip route del 2001:db8:1::/64 vrf vrf-h2
+	ip route del 192.0.2.0/24 vrf vrf-h2
+
+	ip address del 2001:db8:2::2/64 dev $h2
+	ip address del 198.51.100.2/24 dev $h2
+
+	ip link set dev $h2 down
+	vrf_destroy "vrf-h2"
+}
+
+router1_create()
+{
+	vrf_create "vrf-r1"
+	ip link set dev $rp11 master vrf-r1
+	ip link set dev $rp12 master vrf-r1
+	ip link set dev $rp13 master vrf-r1
+
+	ip link set dev vrf-r1 up
+	ip link set dev $rp11 up
+	ip link set dev $rp12 up
+	ip link set dev $rp13 up
+
+	ip address add 192.0.2.1/24 dev $rp11
+	ip address add 2001:db8:1::1/64 dev $rp11
+
+	ip address add 169.254.2.12/24 dev $rp12
+	ip address add fe80:2::12/64 dev $rp12
+
+	ip address add 169.254.3.13/24 dev $rp13
+	ip address add fe80:3::13/64 dev $rp13
+
+	ip route add 198.51.100.0/24 vrf vrf-r1 \
+		nexthop via 169.254.2.22 dev $rp12 \
+		nexthop via 169.254.3.23 dev $rp13
+	ip route add 2001:db8:2::/64 vrf vrf-r1 \
+		nexthop via fe80:2::22 dev $rp12 \
+		nexthop via fe80:3::23 dev $rp13
+}
+
+router1_destroy()
+{
+	ip route del 2001:db8:2::/64 vrf vrf-r1
+	ip route del 198.51.100.0/24 vrf vrf-r1
+
+	ip address del fe80:3::13/64 dev $rp13
+	ip address del 169.254.3.13/24 dev $rp13
+
+	ip address del fe80:2::12/64 dev $rp12
+	ip address del 169.254.2.12/24 dev $rp12
+
+	ip address del 2001:db8:1::1/64 dev $rp11
+	ip address del 192.0.2.1/24 dev $rp11
+
+	ip link set dev $rp13 down
+	ip link set dev $rp12 down
+	ip link set dev $rp11 down
+
+	vrf_destroy "vrf-r1"
+}
+
+router2_create()
+{
+	vrf_create "vrf-r2"
+	ip link set dev $rp21 master vrf-r2
+	ip link set dev $rp22 master vrf-r2
+	ip link set dev $rp23 master vrf-r2
+
+	ip link set dev vrf-r2 up
+	ip link set dev $rp21 up
+	ip link set dev $rp22 up
+	ip link set dev $rp23 up
+
+	ip address add 198.51.100.1/24 dev $rp21
+	ip address add 2001:db8:2::1/64 dev $rp21
+
+	ip address add 169.254.2.22/24 dev $rp22
+	ip address add fe80:2::22/64 dev $rp22
+
+	ip address add 169.254.3.23/24 dev $rp23
+	ip address add fe80:3::23/64 dev $rp23
+
+	ip route add 192.0.2.0/24 vrf vrf-r2 \
+		nexthop via 169.254.2.12 dev $rp22 \
+		nexthop via 169.254.3.13 dev $rp23
+	ip route add 2001:db8:1::/64 vrf vrf-r2 \
+		nexthop via fe80:2::12 dev $rp22 \
+		nexthop via fe80:3::13 dev $rp23
+}
+
+router2_destroy()
+{
+	ip route del 2001:db8:1::/64 vrf vrf-r2
+	ip route del 192.0.2.0/24 vrf vrf-r2
+
+	ip address del fe80:3::23/64 dev $rp23
+	ip address del 169.254.3.23/24 dev $rp23
+
+	ip address del fe80:2::22/64 dev $rp22
+	ip address del 169.254.2.22/24 dev $rp22
+
+	ip address del 2001:db8:2::1/64 dev $rp21
+	ip address del 198.51.100.1/24 dev $rp21
+
+	ip link set dev $rp23 down
+	ip link set dev $rp22 down
+	ip link set dev $rp21 down
+
+	vrf_destroy "vrf-r2"
+}
+
+multipath_eval()
+{
+       local desc="$1"
+       local weight_rp12=$2
+       local weight_rp13=$3
+       local packets_rp12=$4
+       local packets_rp13=$5
+       local weights_ratio packets_ratio diff
+
+       RET=0
+
+       if [[ "$packets_rp12" -eq "0" || "$packets_rp13" -eq "0" ]]; then
+              check_err 1 "Packet difference is 0"
+              log_test "Multipath"
+              log_info "Expected ratio $weights_ratio"
+              return
+       fi
+
+       if [[ "$weight_rp12" -gt "$weight_rp13" ]]; then
+               weights_ratio=$(echo "scale=2; $weight_rp12 / $weight_rp13" \
+		       | bc -l)
+               packets_ratio=$(echo "scale=2; $packets_rp12 / $packets_rp13" \
+		       | bc -l)
+       else
+               weights_ratio=$(echo "scale=2; $weight_rp13 / $weight_rp12" | \
+		       bc -l)
+               packets_ratio=$(echo "scale=2; $packets_rp13 / $packets_rp12" | \
+		       bc -l)
+       fi
+
+       diff=$(echo $weights_ratio - $packets_ratio | bc -l)
+       diff=${diff#-}
+
+       test "$(echo "$diff / $weights_ratio > 0.1" | bc -l)" -eq 0
+       check_err $? "Too large discrepancy between expected and measured ratios"
+       log_test "$desc"
+       log_info "Expected ratio $weights_ratio Measured ratio $packets_ratio"
+}
+
+multipath4_test()
+{
+       local desc="$1"
+       local weight_rp12=$2
+       local weight_rp13=$3
+       local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+       local packets_rp12 packets_rp13
+       local hash_policy
+
+       # Transmit multiple flows from h1 to h2 and make sure they are
+       # distributed between both multipath links (rp12 and rp13)
+       # according to the configured weights.
+       hash_policy=$(sysctl -n net.ipv4.fib_multipath_hash_policy)
+       sysctl -q -w net.ipv4.fib_multipath_hash_policy=1
+       ip route replace 198.51.100.0/24 vrf vrf-r1 \
+               nexthop via 169.254.2.22 dev $rp12 weight $weight_rp12 \
+               nexthop via 169.254.3.23 dev $rp13 weight $weight_rp13
+
+       t0_rp12=$(link_stats_tx_packets_get $rp12)
+       t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+       ip vrf exec vrf-h1 $MZ -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
+	       -d 1msec -t udp "sp=1024,dp=0-32768"
+
+       t1_rp12=$(link_stats_tx_packets_get $rp12)
+       t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+       let "packets_rp12 = $t1_rp12 - $t0_rp12"
+       let "packets_rp13 = $t1_rp13 - $t0_rp13"
+       multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+       # Restore settings.
+       ip route replace 198.51.100.0/24 vrf vrf-r1 \
+               nexthop via 169.254.2.22 dev $rp12 \
+               nexthop via 169.254.3.23 dev $rp13
+       sysctl -q -w net.ipv4.fib_multipath_hash_policy=$hash_policy
+}
+
+multipath6_l4_test()
+{
+       local desc="$1"
+       local weight_rp12=$2
+       local weight_rp13=$3
+       local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+       local packets_rp12 packets_rp13
+       local hash_policy
+
+       # Transmit multiple flows from h1 to h2 and make sure they are
+       # distributed between both multipath links (rp12 and rp13)
+       # according to the configured weights.
+       hash_policy=$(sysctl -n net.ipv6.fib_multipath_hash_policy)
+       sysctl -q -w net.ipv6.fib_multipath_hash_policy=1
+
+       ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+	       nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \
+	       nexthop via fe80:3::23 dev $rp13 weight $weight_rp13
+
+       t0_rp12=$(link_stats_tx_packets_get $rp12)
+       t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+       $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
+	       -d 1msec -t udp "sp=1024,dp=0-32768"
+
+       t1_rp12=$(link_stats_tx_packets_get $rp12)
+       t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+       let "packets_rp12 = $t1_rp12 - $t0_rp12"
+       let "packets_rp13 = $t1_rp13 - $t0_rp13"
+       multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+       ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+	       nexthop via fe80:2::22 dev $rp12 \
+	       nexthop via fe80:3::23 dev $rp13
+
+       sysctl -q -w net.ipv6.fib_multipath_hash_policy=$hash_policy
+}
+
+multipath6_test()
+{
+       local desc="$1"
+       local weight_rp12=$2
+       local weight_rp13=$3
+       local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+       local packets_rp12 packets_rp13
+
+       ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+	       nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \
+	       nexthop via fe80:3::23 dev $rp13 weight $weight_rp13
+
+       t0_rp12=$(link_stats_tx_packets_get $rp12)
+       t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+       # Generate 16384 echo requests, each with a random flow label.
+       for _ in $(seq 1 16384); do
+	       ip vrf exec vrf-h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null
+       done
+
+       t1_rp12=$(link_stats_tx_packets_get $rp12)
+       t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+       let "packets_rp12 = $t1_rp12 - $t0_rp12"
+       let "packets_rp13 = $t1_rp13 - $t0_rp13"
+       multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+       ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+	       nexthop via fe80:2::22 dev $rp12 \
+	       nexthop via fe80:3::23 dev $rp13
+}
+
+multipath_test()
+{
+	log_info "Running IPv4 multipath tests"
+	multipath4_test "ECMP" 1 1
+	multipath4_test "Weighted MP 2:1" 2 1
+	multipath4_test "Weighted MP 11:45" 11 45
+
+	log_info "Running IPv6 multipath tests"
+	multipath6_test "ECMP" 1 1
+	multipath6_test "Weighted MP 2:1" 2 1
+	multipath6_test "Weighted MP 11:45" 11 45
+
+	log_info "Running IPv6 L4 hash multipath tests"
+	multipath6_l4_test "ECMP" 1 1
+	multipath6_l4_test "Weighted MP 2:1" 2 1
+	multipath6_l4_test "Weighted MP 11:45" 11 45
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp11=${NETIFS[p2]}
+
+	rp12=${NETIFS[p3]}
+	rp22=${NETIFS[p4]}
+
+	rp13=${NETIFS[p5]}
+	rp23=${NETIFS[p6]}
+
+	rp21=${NETIFS[p7]}
+	h2=${NETIFS[p8]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	router1_create
+	router2_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	router2_destroy
+	router1_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+ping_test $h1 198.51.100.2
+ping6_test $h1 2001:db8:2::2
+multipath_test
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
new file mode 100755
index 000000000000..3a6385ebd5d0
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -0,0 +1,202 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=4
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/24
+	tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2 192.0.2.2/24
+}
+
+switch_create()
+{
+	simple_if_init $swp1 192.0.2.2/24
+	tc qdisc add dev $swp1 clsact
+
+	simple_if_init $swp2 192.0.2.1/24
+}
+
+switch_destroy()
+{
+	simple_if_fini $swp2 192.0.2.1/24
+
+	tc qdisc del dev $swp1 clsact
+	simple_if_fini $swp1 192.0.2.2/24
+}
+
+mirred_egress_test()
+{
+	local action=$1
+
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched without redirect rule inserted"
+
+	tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.2.2 action mirred egress $action \
+		dev $swp2
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Did not match incoming $action packet"
+
+	tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+	log_test "mirred egress $action ($tcflags)"
+}
+
+gact_drop_and_ok_test()
+{
+	RET=0
+
+	tc filter add dev $swp1 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $swp1 ingress" 102 1
+	check_err $? "Packet was not dropped"
+
+	tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.2.2 action ok
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $swp1 ingress" 101 1
+	check_err $? "Did not see passed packet"
+
+	tc_check_packets "dev $swp1 ingress" 102 2
+	check_fail $? "Packet was dropped and it should not reach here"
+
+	tc filter del dev $swp1 ingress protocol ip pref 2 handle 102 flower
+	tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+	log_test "gact drop and ok ($tcflags)"
+}
+
+gact_trap_test()
+{
+	RET=0
+
+	tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+		skip_hw dst_ip 192.0.2.2 action drop
+	tc filter add dev $swp1 ingress protocol ip pref 3 handle 103 flower \
+		$tcflags dst_ip 192.0.2.2 action mirred egress redirect \
+		dev $swp2
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $swp1 ingress" 101 1
+	check_fail $? "Saw packet without trap rule inserted"
+
+	tc filter add dev $swp1 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_ip 192.0.2.2 action trap
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $swp1 ingress" 102 1
+	check_err $? "Packet was not trapped"
+
+	tc_check_packets "dev $swp1 ingress" 101 1
+	check_err $? "Did not see trapped packet"
+
+	tc filter del dev $swp1 ingress protocol ip pref 3 handle 103 flower
+	tc filter del dev $swp1 ingress protocol ip pref 2 handle 102 flower
+	tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+	log_test "trap ($tcflags)"
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	h1mac=$(mac_get $h1)
+	h2mac=$(mac_get $h2)
+
+	swp1origmac=$(mac_get $swp1)
+	swp2origmac=$(mac_get $swp2)
+	ip link set $swp1 address $h2mac
+	ip link set $swp2 address $h1mac
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+
+	ip link set $swp2 address $swp2origmac
+	ip link set $swp1 address $swp1origmac
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+gact_drop_and_ok_test
+mirred_egress_test "redirect"
+mirred_egress_test "mirror"
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+	log_info "Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	gact_drop_and_ok_test
+	mirred_egress_test "redirect"
+	mirred_egress_test "mirror"
+	gact_trap_test
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_chains.sh b/tools/testing/selftests/net/forwarding/tc_chains.sh
new file mode 100755
index 000000000000..2fd15226974b
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_chains.sh
@@ -0,0 +1,122 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=2
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/24
+	tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2 192.0.2.2/24
+}
+
+unreachable_chain_test()
+{
+	RET=0
+
+	tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+		flower $tcflags dst_mac $h2mac action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 1101 1
+	check_fail $? "matched on filter in unreachable chain"
+
+	tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+		flower
+
+	log_test "unreachable chain ($tcflags)"
+}
+
+gact_goto_chain_test()
+{
+	RET=0
+
+	tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+		flower $tcflags dst_mac $h2mac action drop
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_mac $h2mac action drop
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_mac $h2mac action goto chain 1
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_fail $? "Matched on a wrong filter"
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Did not match on correct filter with goto chain action"
+
+	tc_check_packets "dev $h2 ingress" 1101 1
+	check_err $? "Did not match on correct filter in chain 1"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+	tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+		flower
+
+	log_test "gact goto chain ($tcflags)"
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	h2=${NETIFS[p2]}
+	h1mac=$(mac_get $h1)
+	h2mac=$(mac_get $h2)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+unreachable_chain_test
+gact_goto_chain_test
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+	log_info "Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	unreachable_chain_test
+	gact_goto_chain_test
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_common.sh b/tools/testing/selftests/net/forwarding/tc_common.sh
new file mode 100644
index 000000000000..9d3b64a2a264
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_common.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+CHECK_TC="yes"
+
+tc_check_packets()
+{
+	local id=$1
+	local handle=$2
+	local count=$3
+	local ret
+
+	output="$(tc -j -s filter show $id)"
+	# workaround the jq bug which causes jq to return 0 in case input is ""
+	ret=$?
+	if [[ $ret -ne 0 ]]; then
+		return $ret
+	fi
+	echo $output | \
+		jq -e ".[] \
+		| select(.options.handle == $handle) \
+		| select(.options.actions[0].stats.packets == $count)" \
+		&> /dev/null
+	return $?
+}
diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh
new file mode 100755
index 000000000000..032b882adfc0
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_flower.sh
@@ -0,0 +1,196 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=2
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24 198.51.100.1/24
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/24 198.51.100.1/24
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/24 198.51.100.2/24
+	tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2 192.0.2.2/24 198.51.100.2/24
+}
+
+match_dst_mac_test()
+{
+	local dummy_mac=de:ad:be:ef:aa:aa
+
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_mac $dummy_mac action drop
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_mac $h2mac action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched on a wrong filter"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match on correct filter"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+	log_test "dst_mac match ($tcflags)"
+}
+
+match_src_mac_test()
+{
+	local dummy_mac=de:ad:be:ef:aa:aa
+
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags src_mac $dummy_mac action drop
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags src_mac $h1mac action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched on a wrong filter"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match on correct filter"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+	log_test "src_mac match ($tcflags)"
+}
+
+match_dst_ip_test()
+{
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 198.51.100.2 action drop
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+	tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+		$tcflags dst_ip 192.0.2.0/24 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched on a wrong filter"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match on correct filter"
+
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_err $? "Did not match on correct filter with mask"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+
+	log_test "dst_ip match ($tcflags)"
+}
+
+match_src_ip_test()
+{
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags src_ip 198.51.100.1 action drop
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags src_ip 192.0.2.1 action drop
+	tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+		$tcflags src_ip 192.0.2.0/24 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched on a wrong filter"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match on correct filter"
+
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_err $? "Did not match on correct filter with mask"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+
+	log_test "src_ip match ($tcflags)"
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	h2=${NETIFS[p2]}
+	h1mac=$(mac_get $h1)
+	h2mac=$(mac_get $h2)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+match_dst_mac_test
+match_src_mac_test
+match_dst_ip_test
+match_src_ip_test
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+	log_info "Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	match_dst_mac_test
+	match_src_mac_test
+	match_dst_ip_test
+	match_src_ip_test
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_shblocks.sh b/tools/testing/selftests/net/forwarding/tc_shblocks.sh
new file mode 100755
index 000000000000..077b98048ef4
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_shblocks.sh
@@ -0,0 +1,122 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=4
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.1/24
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 192.0.2.1/24
+}
+
+switch_create()
+{
+	simple_if_init $swp1 192.0.2.2/24
+	tc qdisc add dev $swp1 ingress_block 22 egress_block 23 clsact
+
+	simple_if_init $swp2 192.0.2.2/24
+	tc qdisc add dev $swp2 ingress_block 22 egress_block 23 clsact
+}
+
+switch_destroy()
+{
+	tc qdisc del dev $swp2 clsact
+	simple_if_fini $swp2 192.0.2.2/24
+
+	tc qdisc del dev $swp1 clsact
+	simple_if_fini $swp1 192.0.2.2/24
+}
+
+shared_block_test()
+{
+	RET=0
+
+	tc filter add block 22 protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $swmac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "block 22" 101 1
+	check_err $? "Did not match first incoming packet on a block"
+
+	$MZ $h2 -c 1 -p 64 -a $h2mac -b $swmac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "block 22" 101 2
+	check_err $? "Did not match second incoming packet on a block"
+
+	tc filter del block 22 protocol ip pref 1 handle 101 flower
+
+	log_test "shared block ($tcflags)"
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	h1mac=$(mac_get $h1)
+	h2mac=$(mac_get $h2)
+
+	swmac=$(mac_get $swp1)
+	swp2origmac=$(mac_get $swp2)
+	ip link set $swp2 address $swmac
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+
+	ip link set $swp2 address $swp2origmac
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+shared_block_test
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+	log_info "Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	shared_block_test
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/in_netns.sh b/tools/testing/selftests/net/in_netns.sh
new file mode 100755
index 000000000000..88795b510b32
--- /dev/null
+++ b/tools/testing/selftests/net/in_netns.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Execute a subprocess in a network namespace
+
+set -e
+
+readonly NETNS="ns-$(mktemp -u XXXXXX)"
+
+setup() {
+	ip netns add "${NETNS}"
+	ip -netns "${NETNS}" link set lo up
+}
+
+cleanup() {
+	ip netns del "${NETNS}"
+}
+
+trap cleanup EXIT
+setup
+
+ip netns exec "${NETNS}" "$@"
+exit "$?"
diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c
index e11fe84de0fd..406cc70c571d 100644
--- a/tools/testing/selftests/net/msg_zerocopy.c
+++ b/tools/testing/selftests/net/msg_zerocopy.c
@@ -14,6 +14,9 @@
  * - SOCK_DGRAM
  * - SOCK_RAW
  *
+ * PF_RDS
+ * - SOCK_SEQPACKET
+ *
  * Start this program on two connected hosts, one in send mode and
  * the other with option '-r' to put it in receiver mode.
  *
@@ -53,6 +56,7 @@
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <unistd.h>
+#include <linux/rds.h>
 
 #ifndef SO_EE_ORIGIN_ZEROCOPY
 #define SO_EE_ORIGIN_ZEROCOPY		5
@@ -164,17 +168,39 @@ static int do_accept(int fd)
 	return fd;
 }
 
-static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy)
+static void add_zcopy_cookie(struct msghdr *msg, uint32_t cookie)
+{
+	struct cmsghdr *cm;
+
+	if (!msg->msg_control)
+		error(1, errno, "NULL cookie");
+	cm = (void *)msg->msg_control;
+	cm->cmsg_len = CMSG_LEN(sizeof(cookie));
+	cm->cmsg_level = SOL_RDS;
+	cm->cmsg_type = RDS_CMSG_ZCOPY_COOKIE;
+	memcpy(CMSG_DATA(cm), &cookie, sizeof(cookie));
+}
+
+static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy, int domain)
 {
 	int ret, len, i, flags;
+	static uint32_t cookie;
+	char ckbuf[CMSG_SPACE(sizeof(cookie))];
 
 	len = 0;
 	for (i = 0; i < msg->msg_iovlen; i++)
 		len += msg->msg_iov[i].iov_len;
 
 	flags = MSG_DONTWAIT;
-	if (do_zerocopy)
+	if (do_zerocopy) {
 		flags |= MSG_ZEROCOPY;
+		if (domain == PF_RDS) {
+			memset(&msg->msg_control, 0, sizeof(msg->msg_control));
+			msg->msg_controllen = CMSG_SPACE(sizeof(cookie));
+			msg->msg_control = (struct cmsghdr *)ckbuf;
+			add_zcopy_cookie(msg, ++cookie);
+		}
+	}
 
 	ret = sendmsg(fd, msg, flags);
 	if (ret == -1 && errno == EAGAIN)
@@ -190,6 +216,10 @@ static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy)
 		if (do_zerocopy && ret)
 			expected_completions++;
 	}
+	if (do_zerocopy && domain == PF_RDS) {
+		msg->msg_control = NULL;
+		msg->msg_controllen = 0;
+	}
 
 	return true;
 }
@@ -216,7 +246,9 @@ static void do_sendmsg_corked(int fd, struct msghdr *msg)
 		msg->msg_iov[0].iov_len = payload_len + extra_len;
 		extra_len = 0;
 
-		do_sendmsg(fd, msg, do_zerocopy);
+		do_sendmsg(fd, msg, do_zerocopy,
+			   (cfg_dst_addr.ss_family == AF_INET ?
+			    PF_INET : PF_INET6));
 	}
 
 	do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0);
@@ -300,14 +332,65 @@ static int do_setup_tx(int domain, int type, int protocol)
 	if (cfg_zerocopy)
 		do_setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, 1);
 
-	if (domain != PF_PACKET)
+	if (domain != PF_PACKET && domain != PF_RDS)
 		if (connect(fd, (void *) &cfg_dst_addr, cfg_alen))
 			error(1, errno, "connect");
 
+	if (domain == PF_RDS) {
+		if (bind(fd, (void *) &cfg_src_addr, cfg_alen))
+			error(1, errno, "bind");
+	}
+
 	return fd;
 }
 
-static bool do_recv_completion(int fd)
+static uint32_t do_process_zerocopy_cookies(struct rds_zcopy_cookies *ck)
+{
+	int i;
+
+	if (ck->num > RDS_MAX_ZCOOKIES)
+		error(1, 0, "Returned %d cookies, max expected %d\n",
+		      ck->num, RDS_MAX_ZCOOKIES);
+	for (i = 0; i < ck->num; i++)
+		if (cfg_verbose >= 2)
+			fprintf(stderr, "%d\n", ck->cookies[i]);
+	return ck->num;
+}
+
+static bool do_recvmsg_completion(int fd)
+{
+	char cmsgbuf[CMSG_SPACE(sizeof(struct rds_zcopy_cookies))];
+	struct rds_zcopy_cookies *ck;
+	struct cmsghdr *cmsg;
+	struct msghdr msg;
+	bool ret = false;
+
+	memset(&msg, 0, sizeof(msg));
+	msg.msg_control = cmsgbuf;
+	msg.msg_controllen = sizeof(cmsgbuf);
+
+	if (recvmsg(fd, &msg, MSG_DONTWAIT))
+		return ret;
+
+	if (msg.msg_flags & MSG_CTRUNC)
+		error(1, errno, "recvmsg notification: truncated");
+
+	for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+		if (cmsg->cmsg_level == SOL_RDS &&
+		    cmsg->cmsg_type == RDS_CMSG_ZCOPY_COMPLETION) {
+
+			ck = (struct rds_zcopy_cookies *)CMSG_DATA(cmsg);
+			completions += do_process_zerocopy_cookies(ck);
+			ret = true;
+			break;
+		}
+		error(0, 0, "ignoring cmsg at level %d type %d\n",
+			    cmsg->cmsg_level, cmsg->cmsg_type);
+	}
+	return ret;
+}
+
+static bool do_recv_completion(int fd, int domain)
 {
 	struct sock_extended_err *serr;
 	struct msghdr msg = {};
@@ -316,6 +399,9 @@ static bool do_recv_completion(int fd)
 	int ret, zerocopy;
 	char control[100];
 
+	if (domain == PF_RDS)
+		return do_recvmsg_completion(fd);
+
 	msg.msg_control = control;
 	msg.msg_controllen = sizeof(control);
 
@@ -337,6 +423,7 @@ static bool do_recv_completion(int fd)
 		      cm->cmsg_level, cm->cmsg_type);
 
 	serr = (void *) CMSG_DATA(cm);
+
 	if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY)
 		error(1, 0, "serr: wrong origin: %u", serr->ee_origin);
 	if (serr->ee_errno != 0)
@@ -371,20 +458,20 @@ static bool do_recv_completion(int fd)
 }
 
 /* Read all outstanding messages on the errqueue */
-static void do_recv_completions(int fd)
+static void do_recv_completions(int fd, int domain)
 {
-	while (do_recv_completion(fd)) {}
+	while (do_recv_completion(fd, domain)) {}
 }
 
 /* Wait for all remaining completions on the errqueue */
-static void do_recv_remaining_completions(int fd)
+static void do_recv_remaining_completions(int fd, int domain)
 {
 	int64_t tstop = gettimeofday_ms() + cfg_waittime_ms;
 
 	while (completions < expected_completions &&
 	       gettimeofday_ms() < tstop) {
-		if (do_poll(fd, POLLERR))
-			do_recv_completions(fd);
+		if (do_poll(fd, domain == PF_RDS ? POLLIN : POLLERR))
+			do_recv_completions(fd, domain);
 	}
 
 	if (completions < expected_completions)
@@ -444,6 +531,13 @@ static void do_tx(int domain, int type, int protocol)
 		msg.msg_iovlen++;
 	}
 
+	if (domain == PF_RDS) {
+		msg.msg_name = &cfg_dst_addr;
+		msg.msg_namelen =  (cfg_dst_addr.ss_family == AF_INET ?
+				    sizeof(struct sockaddr_in) :
+				    sizeof(struct sockaddr_in6));
+	}
+
 	iov[2].iov_base = payload;
 	iov[2].iov_len = cfg_payload_len;
 	msg.msg_iovlen++;
@@ -454,17 +548,17 @@ static void do_tx(int domain, int type, int protocol)
 		if (cfg_cork)
 			do_sendmsg_corked(fd, &msg);
 		else
-			do_sendmsg(fd, &msg, cfg_zerocopy);
+			do_sendmsg(fd, &msg, cfg_zerocopy, domain);
 
 		while (!do_poll(fd, POLLOUT)) {
 			if (cfg_zerocopy)
-				do_recv_completions(fd);
+				do_recv_completions(fd, domain);
 		}
 
 	} while (gettimeofday_ms() < tstop);
 
 	if (cfg_zerocopy)
-		do_recv_remaining_completions(fd);
+		do_recv_remaining_completions(fd, domain);
 
 	if (close(fd))
 		error(1, errno, "close");
@@ -610,6 +704,7 @@ static void parse_opts(int argc, char **argv)
 				    40 /* max tcp options */;
 	int c;
 	char *daddr = NULL, *saddr = NULL;
+	char *cfg_test;
 
 	cfg_payload_len = max_payload_len;
 
@@ -667,6 +762,14 @@ static void parse_opts(int argc, char **argv)
 			break;
 		}
 	}
+
+	cfg_test = argv[argc - 1];
+	if (strcmp(cfg_test, "rds") == 0) {
+		if (!daddr)
+			error(1, 0, "-D <server addr> required for PF_RDS\n");
+		if (!cfg_rx && !saddr)
+			error(1, 0, "-S <client addr> required for PF_RDS\n");
+	}
 	setup_sockaddr(cfg_family, daddr, &cfg_dst_addr);
 	setup_sockaddr(cfg_family, saddr, &cfg_src_addr);
 
@@ -699,6 +802,8 @@ int main(int argc, char **argv)
 		do_test(cfg_family, SOCK_STREAM, 0);
 	else if (!strcmp(cfg_test, "udp"))
 		do_test(cfg_family, SOCK_DGRAM, 0);
+	else if (!strcmp(cfg_test, "rds"))
+		do_test(PF_RDS, SOCK_SEQPACKET, 0);
 	else
 		error(1, 0, "unknown cfg_test %s", cfg_test);
 
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
new file mode 100755
index 000000000000..1e428781a625
--- /dev/null
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -0,0 +1,471 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Check that route PMTU values match expectations, and that initial device MTU
+# values are assigned correctly
+#
+# Tests currently implemented:
+#
+# - pmtu_vti4_exception
+#	Set up vti tunnel on top of veth, with xfrm states and policies, in two
+#	namespaces with matching endpoints. Check that route exception is not
+#	created if link layer MTU is not exceeded, then exceed it and check that
+#	exception is created with the expected PMTU. The approach described
+#	below for IPv6 doesn't apply here, because, on IPv4, administrative MTU
+#	changes alone won't affect PMTU
+#
+# - pmtu_vti6_exception
+#	Set up vti6 tunnel on top of veth, with xfrm states and policies, in two
+#	namespaces with matching endpoints. Check that route exception is
+#	created by exceeding link layer MTU with ping to other endpoint. Then
+#	decrease and increase MTU of tunnel, checking that route exception PMTU
+#	changes accordingly
+#
+# - pmtu_vti4_default_mtu
+#	Set up vti4 tunnel on top of veth, in two namespaces with matching
+#	endpoints. Check that MTU assigned to vti interface is the MTU of the
+#	lower layer (veth) minus additional lower layer headers (zero, for veth)
+#	minus IPv4 header length
+#
+# - pmtu_vti6_default_mtu
+#	Same as above, for IPv6
+#
+# - pmtu_vti4_link_add_mtu
+#	Set up vti4 interface passing MTU value at link creation, check MTU is
+#	configured, and that link is not created with invalid MTU values
+#
+# - pmtu_vti6_link_add_mtu
+#	Same as above, for IPv6
+#
+# - pmtu_vti6_link_change_mtu
+#	Set up two dummy interfaces with different MTUs, create a vti6 tunnel
+#	and check that configured MTU is used on link creation and changes, and
+#	that MTU is properly calculated instead when MTU is not configured from
+#	userspace
+
+tests="
+	pmtu_vti6_exception		vti6: PMTU exceptions
+	pmtu_vti4_exception		vti4: PMTU exceptions
+	pmtu_vti4_default_mtu		vti4: default MTU assignment
+	pmtu_vti6_default_mtu		vti6: default MTU assignment
+	pmtu_vti4_link_add_mtu		vti4: MTU setting on link creation
+	pmtu_vti6_link_add_mtu		vti6: MTU setting on link creation
+	pmtu_vti6_link_change_mtu	vti6: MTU changes on link changes"
+
+NS_A="ns-$(mktemp -u XXXXXX)"
+NS_B="ns-$(mktemp -u XXXXXX)"
+ns_a="ip netns exec ${NS_A}"
+ns_b="ip netns exec ${NS_B}"
+
+veth4_a_addr="192.168.1.1"
+veth4_b_addr="192.168.1.2"
+veth4_mask="24"
+veth6_a_addr="fd00:1::a"
+veth6_b_addr="fd00:1::b"
+veth6_mask="64"
+
+vti4_a_addr="192.168.2.1"
+vti4_b_addr="192.168.2.2"
+vti4_mask="24"
+vti6_a_addr="fd00:2::a"
+vti6_b_addr="fd00:2::b"
+vti6_mask="64"
+
+dummy6_0_addr="fc00:1000::0"
+dummy6_1_addr="fc00:1001::0"
+dummy6_mask="64"
+
+cleanup_done=1
+err_buf=
+
+err() {
+	err_buf="${err_buf}${1}
+"
+}
+
+err_flush() {
+	echo -n "${err_buf}"
+	err_buf=
+}
+
+setup_namespaces() {
+	ip netns add ${NS_A} || return 1
+	ip netns add ${NS_B}
+}
+
+setup_veth() {
+	${ns_a} ip link add veth_a type veth peer name veth_b || return 1
+	${ns_a} ip link set veth_b netns ${NS_B}
+
+	${ns_a} ip addr add ${veth4_a_addr}/${veth4_mask} dev veth_a
+	${ns_b} ip addr add ${veth4_b_addr}/${veth4_mask} dev veth_b
+
+	${ns_a} ip addr add ${veth6_a_addr}/${veth6_mask} dev veth_a
+	${ns_b} ip addr add ${veth6_b_addr}/${veth6_mask} dev veth_b
+
+	${ns_a} ip link set veth_a up
+	${ns_b} ip link set veth_b up
+}
+
+setup_vti() {
+	proto=${1}
+	veth_a_addr="${2}"
+	veth_b_addr="${3}"
+	vti_a_addr="${4}"
+	vti_b_addr="${5}"
+	vti_mask=${6}
+
+	[ ${proto} -eq 6 ] && vti_type="vti6" || vti_type="vti"
+
+	${ns_a} ip link add vti${proto}_a type ${vti_type} local ${veth_a_addr} remote ${veth_b_addr} key 10 || return 1
+	${ns_b} ip link add vti${proto}_b type ${vti_type} local ${veth_b_addr} remote ${veth_a_addr} key 10
+
+	${ns_a} ip addr add ${vti_a_addr}/${vti_mask} dev vti${proto}_a
+	${ns_b} ip addr add ${vti_b_addr}/${vti_mask} dev vti${proto}_b
+
+	${ns_a} ip link set vti${proto}_a up
+	${ns_b} ip link set vti${proto}_b up
+
+	sleep 1
+}
+
+setup_vti4() {
+	setup_vti 4 ${veth4_a_addr} ${veth4_b_addr} ${vti4_a_addr} ${vti4_b_addr} ${vti4_mask}
+}
+
+setup_vti6() {
+	setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${vti6_a_addr} ${vti6_b_addr} ${vti6_mask}
+}
+
+setup_xfrm() {
+	proto=${1}
+	veth_a_addr="${2}"
+	veth_b_addr="${3}"
+
+	${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 1
+	${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
+	${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
+	${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
+
+	${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
+	${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
+	${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
+	${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
+}
+
+setup_xfrm4() {
+	setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr}
+}
+
+setup_xfrm6() {
+	setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr}
+}
+
+setup() {
+	[ "$(id -u)" -ne 0 ] && echo "  need to run as root" && return 1
+
+	cleanup_done=0
+	for arg do
+		eval setup_${arg} || { echo "  ${arg} not supported"; return 1; }
+	done
+}
+
+cleanup() {
+	[ ${cleanup_done} -eq 1 ] && return
+	ip netns del ${NS_A} 2 > /dev/null
+	ip netns del ${NS_B} 2 > /dev/null
+	cleanup_done=1
+}
+
+mtu() {
+	ns_cmd="${1}"
+	dev="${2}"
+	mtu="${3}"
+
+	${ns_cmd} ip link set dev ${dev} mtu ${mtu}
+}
+
+mtu_parse() {
+	input="${1}"
+
+	next=0
+	for i in ${input}; do
+		[ ${next} -eq 1 ] && echo "${i}" && return
+		[ "${i}" = "mtu" ] && next=1
+	done
+}
+
+link_get() {
+	ns_cmd="${1}"
+	name="${2}"
+
+	${ns_cmd} ip link show dev "${name}"
+}
+
+link_get_mtu() {
+	ns_cmd="${1}"
+	name="${2}"
+
+	mtu_parse "$(link_get "${ns_cmd}" ${name})"
+}
+
+route_get_dst_exception() {
+	ns_cmd="${1}"
+	dst="${2}"
+
+	${ns_cmd} ip route get "${dst}"
+}
+
+route_get_dst_pmtu_from_exception() {
+	ns_cmd="${1}"
+	dst="${2}"
+
+	mtu_parse "$(route_get_dst_exception "${ns_cmd}" ${dst})"
+}
+
+test_pmtu_vti4_exception() {
+	setup namespaces veth vti4 xfrm4 || return 2
+
+	veth_mtu=1500
+	vti_mtu=$((veth_mtu - 20))
+
+	#                                SPI   SN   IV  ICV   pad length   next header
+	esp_payload_rfc4106=$((vti_mtu - 4   - 4  - 8 - 16  - 1          - 1))
+	ping_payload=$((esp_payload_rfc4106 - 28))
+
+	mtu "${ns_a}" veth_a ${veth_mtu}
+	mtu "${ns_b}" veth_b ${veth_mtu}
+	mtu "${ns_a}" vti4_a ${vti_mtu}
+	mtu "${ns_b}" vti4_b ${vti_mtu}
+
+	# Send DF packet without exceeding link layer MTU, check that no
+	# exception is created
+	${ns_a} ping -q -M want -i 0.1 -w 2 -s ${ping_payload} ${vti4_b_addr} > /dev/null
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})"
+	if [ "${pmtu}" != "" ]; then
+		err "  unexpected exception created with PMTU ${pmtu} for IP payload length ${esp_payload_rfc4106}"
+		return 1
+	fi
+
+	# Now exceed link layer MTU by one byte, check that exception is created
+	${ns_a} ping -q -M want -i 0.1 -w 2 -s $((ping_payload + 1)) ${vti4_b_addr} > /dev/null
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})"
+	if [ "${pmtu}" = "" ]; then
+		err "  exception not created for IP payload length $((esp_payload_rfc4106 + 1))"
+		return 1
+	fi
+
+	# ...with the right PMTU value
+	if [ ${pmtu} -ne ${esp_payload_rfc4106} ]; then
+		err "  wrong PMTU ${pmtu} in exception, expected: ${esp_payload_rfc4106}"
+		return 1
+	fi
+}
+
+test_pmtu_vti6_exception() {
+	setup namespaces veth vti6 xfrm6 || return 2
+	fail=0
+
+	# Create route exception by exceeding link layer MTU
+	mtu "${ns_a}" veth_a 4000
+	mtu "${ns_b}" veth_b 4000
+	mtu "${ns_a}" vti6_a 5000
+	mtu "${ns_b}" vti6_b 5000
+	${ns_a} ping6 -q -i 0.1 -w 2 -s 60000 ${vti6_b_addr} > /dev/null
+
+	# Check that exception was created
+	if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" = "" ]; then
+		err "  tunnel exceeding link layer MTU didn't create route exception"
+		return 1
+	fi
+
+	# Decrease tunnel MTU, check for PMTU decrease in route exception
+	mtu "${ns_a}" vti6_a 3000
+
+	if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" -ne 3000 ]; then
+		err "  decreasing tunnel MTU didn't decrease route exception PMTU"
+		fail=1
+	fi
+
+	# Increase tunnel MTU, check for PMTU increase in route exception
+	mtu "${ns_a}" vti6_a 9000
+	if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" -ne 9000 ]; then
+		err "  increasing tunnel MTU didn't increase route exception PMTU"
+		fail=1
+	fi
+
+	return ${fail}
+}
+
+test_pmtu_vti4_default_mtu() {
+	setup namespaces veth vti4 || return 2
+
+	# Check that MTU of vti device is MTU of veth minus IPv4 header length
+	veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
+	vti4_mtu="$(link_get_mtu "${ns_a}" vti4_a)"
+	if [ $((veth_mtu - vti4_mtu)) -ne 20 ]; then
+		err "  vti MTU ${vti4_mtu} is not veth MTU ${veth_mtu} minus IPv4 header length"
+		return 1
+	fi
+}
+
+test_pmtu_vti6_default_mtu() {
+	setup namespaces veth vti6 || return 2
+
+	# Check that MTU of vti device is MTU of veth minus IPv6 header length
+	veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
+	vti6_mtu="$(link_get_mtu "${ns_a}" vti6_a)"
+	if [ $((veth_mtu - vti6_mtu)) -ne 40 ]; then
+		err "  vti MTU ${vti6_mtu} is not veth MTU ${veth_mtu} minus IPv6 header length"
+		return 1
+	fi
+}
+
+test_pmtu_vti4_link_add_mtu() {
+	setup namespaces || return 2
+
+	${ns_a} ip link add vti4_a type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
+	[ $? -ne 0 ] && err "  vti not supported" && return 2
+	${ns_a} ip link del vti4_a
+
+	fail=0
+
+	min=68
+	max=$((65528 - 20))
+	# Check invalid values first
+	for v in $((min - 1)) $((max + 1)); do
+		${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10 2>/dev/null
+		# This can fail, or MTU can be adjusted to a proper value
+		[ $? -ne 0 ] && continue
+		mtu="$(link_get_mtu "${ns_a}" vti4_a)"
+		if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then
+			err "  vti tunnel created with invalid MTU ${mtu}"
+			fail=1
+		fi
+		${ns_a} ip link del vti4_a
+	done
+
+	# Now check valid values
+	for v in ${min} 1300 ${max}; do
+		${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
+		mtu="$(link_get_mtu "${ns_a}" vti4_a)"
+		${ns_a} ip link del vti4_a
+		if [ "${mtu}" != "${v}" ]; then
+			err "  vti MTU ${mtu} doesn't match configured value ${v}"
+			fail=1
+		fi
+	done
+
+	return ${fail}
+}
+
+test_pmtu_vti6_link_add_mtu() {
+	setup namespaces || return 2
+
+	${ns_a} ip link add vti6_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
+	[ $? -ne 0 ] && err "  vti6 not supported" && return 2
+	${ns_a} ip link del vti6_a
+
+	fail=0
+
+	min=1280
+	max=$((65535 - 40))
+	# Check invalid values first
+	for v in $((min - 1)) $((max + 1)); do
+		${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 2>/dev/null
+		# This can fail, or MTU can be adjusted to a proper value
+		[ $? -ne 0 ] && continue
+		mtu="$(link_get_mtu "${ns_a}" vti6_a)"
+		if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then
+			err "  vti6 tunnel created with invalid MTU ${v}"
+			fail=1
+		fi
+		${ns_a} ip link del vti6_a
+	done
+
+	# Now check valid values
+	for v in 1280 1300 $((65535 - 40)); do
+		${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
+		mtu="$(link_get_mtu "${ns_a}" vti6_a)"
+		${ns_a} ip link del vti6_a
+		if [ "${mtu}" != "${v}" ]; then
+			err "  vti6 MTU ${mtu} doesn't match configured value ${v}"
+			fail=1
+		fi
+	done
+
+	return ${fail}
+}
+
+test_pmtu_vti6_link_change_mtu() {
+	setup namespaces || return 2
+
+	${ns_a} ip link add dummy0 mtu 1500 type dummy
+	[ $? -ne 0 ] && err "  dummy not supported" && return 2
+	${ns_a} ip link add dummy1 mtu 3000 type dummy
+	${ns_a} ip link set dummy0 up
+	${ns_a} ip link set dummy1 up
+
+	${ns_a} ip addr add ${dummy6_0_addr}/${dummy6_mask} dev dummy0
+	${ns_a} ip addr add ${dummy6_1_addr}/${dummy6_mask} dev dummy1
+
+	fail=0
+
+	# Create vti6 interface bound to device, passing MTU, check it
+	${ns_a} ip link add vti6_a mtu 1300 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}
+	mtu="$(link_get_mtu "${ns_a}" vti6_a)"
+	if [ ${mtu} -ne 1300 ]; then
+		err "  vti6 MTU ${mtu} doesn't match configured value 1300"
+		fail=1
+	fi
+
+	# Move to another device with different MTU, without passing MTU, check
+	# MTU is adjusted
+	${ns_a} ip link set vti6_a type vti6 remote ${dummy6_1_addr} local ${dummy6_1_addr}
+	mtu="$(link_get_mtu "${ns_a}" vti6_a)"
+	if [ ${mtu} -ne $((3000 - 40)) ]; then
+		err "  vti MTU ${mtu} is not dummy MTU 3000 minus IPv6 header length"
+		fail=1
+	fi
+
+	# Move it back, passing MTU, check MTU is not overridden
+	${ns_a} ip link set vti6_a mtu 1280 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}
+	mtu="$(link_get_mtu "${ns_a}" vti6_a)"
+	if [ ${mtu} -ne 1280 ]; then
+		err "  vti6 MTU ${mtu} doesn't match configured value 1280"
+		fail=1
+	fi
+
+	return ${fail}
+}
+
+trap cleanup EXIT
+
+exitcode=0
+desc=0
+IFS="	
+"
+for t in ${tests}; do
+	[ $desc -eq 0 ] && name="${t}" && desc=1 && continue || desc=0
+
+	(
+		unset IFS
+		eval test_${name}
+		ret=$?
+		cleanup
+
+		if [ $ret -eq 0 ]; then
+			printf "TEST: %-60s  [ OK ]\n" "${t}"
+		elif [ $ret -eq 1 ]; then
+			printf "TEST: %-60s  [FAIL]\n" "${t}"
+			err_flush
+			exit 1
+		elif [ $ret -eq 2 ]; then
+			printf "TEST: %-60s  [SKIP]\n" "${t}"
+			err_flush
+		fi
+	)
+	[ $? -ne 0 ] && exitcode=1
+done
+
+exit ${exitcode}
diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c
index 989f917068d1..bd9b9632c72b 100644
--- a/tools/testing/selftests/net/psock_fanout.c
+++ b/tools/testing/selftests/net/psock_fanout.c
@@ -50,6 +50,7 @@
 #include <linux/filter.h>
 #include <linux/bpf.h>
 #include <linux/if_packet.h>
+#include <net/if.h>
 #include <net/ethernet.h>
 #include <netinet/ip.h>
 #include <netinet/udp.h>
@@ -73,14 +74,29 @@
  * @return -1 if mode is bad, a valid socket otherwise */
 static int sock_fanout_open(uint16_t typeflags, uint16_t group_id)
 {
+	struct sockaddr_ll addr = {0};
 	int fd, val;
 
-	fd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_IP));
+	fd = socket(PF_PACKET, SOCK_RAW, 0);
 	if (fd < 0) {
 		perror("socket packet");
 		exit(1);
 	}
 
+	pair_udp_setfilter(fd);
+
+	addr.sll_family = AF_PACKET;
+	addr.sll_protocol = htons(ETH_P_IP);
+	addr.sll_ifindex = if_nametoindex("lo");
+	if (addr.sll_ifindex == 0) {
+		perror("if_nametoindex");
+		exit(1);
+	}
+	if (bind(fd, (void *) &addr, sizeof(addr))) {
+		perror("bind packet");
+		exit(1);
+	}
+
 	val = (((int) typeflags) << 16) | group_id;
 	if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val))) {
 		if (close(fd)) {
@@ -90,7 +106,6 @@ static int sock_fanout_open(uint16_t typeflags, uint16_t group_id)
 		return -1;
 	}
 
-	pair_udp_setfilter(fd);
 	return fd;
 }
 
@@ -128,6 +143,8 @@ static void sock_fanout_getopts(int fd, uint16_t *typeflags, uint16_t *group_id)
 
 static void sock_fanout_set_ebpf(int fd)
 {
+	static char log_buf[65536];
+
 	const int len_off = __builtin_offsetof(struct __sk_buff, len);
 	struct bpf_insn prog[] = {
 		{ BPF_ALU64 | BPF_MOV | BPF_X,   6, 1, 0, 0 },
@@ -140,7 +157,6 @@ static void sock_fanout_set_ebpf(int fd)
 		{ BPF_ALU   | BPF_MOV | BPF_K,   0, 0, 0, 0 },
 		{ BPF_JMP   | BPF_EXIT,          0, 0, 0, 0 }
 	};
-	char log_buf[512];
 	union bpf_attr attr;
 	int pfd;
 
@@ -228,7 +244,7 @@ static int sock_fanout_read(int fds[], char *rings[], const int expect[])
 
 	if ((!(ret[0] == expect[0] && ret[1] == expect[1])) &&
 	    (!(ret[0] == expect[1] && ret[1] == expect[0]))) {
-		fprintf(stderr, "ERROR: incorrect queue lengths\n");
+		fprintf(stderr, "warning: incorrect queue lengths\n");
 		return 1;
 	}
 
@@ -347,7 +363,8 @@ static int test_datapath(uint16_t typeflags, int port_off,
 	uint8_t type = typeflags & 0xFF;
 	int fds[2], fds_udp[2][2], ret;
 
-	fprintf(stderr, "test: datapath 0x%hx\n", typeflags);
+	fprintf(stderr, "\ntest: datapath 0x%hx ports %hu,%hu\n",
+		typeflags, PORT_BASE, PORT_BASE + port_off);
 
 	fds[0] = sock_fanout_open(typeflags, 0);
 	fds[1] = sock_fanout_open(typeflags, 0);
@@ -418,7 +435,7 @@ int main(int argc, char **argv)
 	const int expect_cpu1[2][2]	= { { 0, 20 },  { 0, 20 } };
 	const int expect_bpf[2][2]	= { { 15, 5 },  { 15, 20 } };
 	const int expect_uniqueid[2][2] = { { 20, 20},  { 20, 20 } };
-	int port_off = 2, tries = 5, ret;
+	int port_off = 2, tries = 20, ret;
 
 	test_control_single();
 	test_control_group();
@@ -427,10 +444,14 @@ int main(int argc, char **argv)
 	/* find a set of ports that do not collide onto the same socket */
 	ret = test_datapath(PACKET_FANOUT_HASH, port_off,
 			    expect_hash[0], expect_hash[1]);
-	while (ret && tries--) {
+	while (ret) {
 		fprintf(stderr, "info: trying alternate ports (%d)\n", tries);
 		ret = test_datapath(PACKET_FANOUT_HASH, ++port_off,
 				    expect_hash[0], expect_hash[1]);
+		if (!--tries) {
+			fprintf(stderr, "too many collisions\n");
+			return 1;
+		}
 	}
 
 	ret |= test_datapath(PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_ROLLOVER,
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index a622eeecc3a6..e6f485235435 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -517,6 +517,7 @@ kci_test_gretap()
 	ip link help gretap 2>&1 | grep -q "^Usage:"
 	if [ $? -ne 0 ];then
 		echo "SKIP: gretap: iproute2 too old"
+		ip netns del "$testns"
 		return 1
 	fi
 
@@ -543,6 +544,7 @@ kci_test_gretap()
 
 	if [ $ret -ne 0 ]; then
 		echo "FAIL: gretap"
+		ip netns del "$testns"
 		return 1
 	fi
 	echo "PASS: gretap"
@@ -565,6 +567,7 @@ kci_test_ip6gretap()
 	ip link help ip6gretap 2>&1 | grep -q "^Usage:"
 	if [ $? -ne 0 ];then
 		echo "SKIP: ip6gretap: iproute2 too old"
+		ip netns del "$testns"
 		return 1
 	fi
 
@@ -591,6 +594,7 @@ kci_test_ip6gretap()
 
 	if [ $ret -ne 0 ]; then
 		echo "FAIL: ip6gretap"
+		ip netns del "$testns"
 		return 1
 	fi
 	echo "PASS: ip6gretap"
@@ -655,6 +659,7 @@ kci_test_erspan()
 
 	if [ $ret -ne 0 ]; then
 		echo "FAIL: erspan"
+		ip netns del "$testns"
 		return 1
 	fi
 	echo "PASS: erspan"
@@ -720,6 +725,7 @@ kci_test_ip6erspan()
 
 	if [ $ret -ne 0 ]; then
 		echo "FAIL: ip6erspan"
+		ip netns del "$testns"
 		return 1
 	fi
 	echo "PASS: ip6erspan"
diff --git a/tools/testing/selftests/net/run_afpackettests b/tools/testing/selftests/net/run_afpackettests
index 21fe149e3de1..bea079edc278 100755
--- a/tools/testing/selftests/net/run_afpackettests
+++ b/tools/testing/selftests/net/run_afpackettests
@@ -9,7 +9,7 @@ fi
 echo "--------------------"
 echo "running psock_fanout test"
 echo "--------------------"
-./psock_fanout
+./in_netns.sh ./psock_fanout
 if [ $? -ne 0 ]; then
 	echo "[FAIL]"
 else
@@ -19,7 +19,7 @@ fi
 echo "--------------------"
 echo "running psock_tpacket test"
 echo "--------------------"
-./psock_tpacket
+./in_netns.sh ./psock_tpacket
 if [ $? -ne 0 ]; then
 	echo "[FAIL]"
 else
diff --git a/tools/testing/selftests/networking/timestamping/txtimestamp.c b/tools/testing/selftests/networking/timestamping/txtimestamp.c
index 5df07047ca86..81a98a240456 100644
--- a/tools/testing/selftests/networking/timestamping/txtimestamp.c
+++ b/tools/testing/selftests/networking/timestamping/txtimestamp.c
@@ -68,9 +68,11 @@ static int cfg_num_pkts = 4;
 static int do_ipv4 = 1;
 static int do_ipv6 = 1;
 static int cfg_payload_len = 10;
+static int cfg_poll_timeout = 100;
 static bool cfg_show_payload;
 static bool cfg_do_pktinfo;
 static bool cfg_loop_nodata;
+static bool cfg_no_delay;
 static uint16_t dest_port = 9000;
 
 static struct sockaddr_in daddr;
@@ -171,7 +173,7 @@ static void __poll(int fd)
 
 	memset(&pollfd, 0, sizeof(pollfd));
 	pollfd.fd = fd;
-	ret = poll(&pollfd, 1, 100);
+	ret = poll(&pollfd, 1, cfg_poll_timeout);
 	if (ret != 1)
 		error(1, errno, "poll");
 }
@@ -371,7 +373,8 @@ static void do_test(int family, unsigned int opt)
 			error(1, errno, "send");
 
 		/* wait for all errors to be queued, else ACKs arrive OOO */
-		usleep(50 * 1000);
+		if (!cfg_no_delay)
+			usleep(50 * 1000);
 
 		__poll(fd);
 
@@ -392,6 +395,9 @@ static void __attribute__((noreturn)) usage(const char *filepath)
 			"  -4:   only IPv4\n"
 			"  -6:   only IPv6\n"
 			"  -h:   show this message\n"
+			"  -c N: number of packets for each test\n"
+			"  -D:   no delay between packets\n"
+			"  -F:   poll() waits forever for an event\n"
 			"  -I:   request PKTINFO\n"
 			"  -l N: send N bytes at a time\n"
 			"  -n:   set no-payload option\n"
@@ -409,7 +415,7 @@ static void parse_opt(int argc, char **argv)
 	int proto_count = 0;
 	char c;
 
-	while ((c = getopt(argc, argv, "46hIl:np:rRux")) != -1) {
+	while ((c = getopt(argc, argv, "46c:DFhIl:np:rRux")) != -1) {
 		switch (c) {
 		case '4':
 			do_ipv6 = 0;
@@ -417,6 +423,15 @@ static void parse_opt(int argc, char **argv)
 		case '6':
 			do_ipv4 = 0;
 			break;
+		case 'c':
+			cfg_num_pkts = strtoul(optarg, NULL, 10);
+			break;
+		case 'D':
+			cfg_no_delay = true;
+			break;
+		case 'F':
+			cfg_poll_timeout = -1;
+			break;
 		case 'I':
 			cfg_do_pktinfo = true;
 			break;
diff --git a/tools/testing/selftests/tc-testing/README b/tools/testing/selftests/tc-testing/README
index 970ff294fec8..3a0336782d2d 100644
--- a/tools/testing/selftests/tc-testing/README
+++ b/tools/testing/selftests/tc-testing/README
@@ -14,11 +14,11 @@ REQUIREMENTS
 
 *  The kernel must have network namespace support
 
-*   The kernel must have veth support available, as a veth pair is created
+*  The kernel must have veth support available, as a veth pair is created
    prior to running the tests.
 
-*  All tc-related features must be built in or available as modules.
-   To check what is required in current setup run:
+*  All tc-related features being tested must be built in or available as
+   modules.  To check what is required in current setup run:
    ./tdc.py -c
 
    Note:
@@ -44,10 +44,13 @@ using the -p option when running tdc:
 RUNNING TDC
 -----------
 
-To use tdc, root privileges are required. tdc will not run otherwise.
+To use tdc, root privileges are required.  This is because the
+commands being tested must be run as root.  The code that enforces
+execution by root uid has been moved into a plugin (see PLUGIN
+ARCHITECTURE, below).
 
-All tests are executed inside a network namespace to prevent conflicts
-within the host.
+If nsPlugin is linked, all tests are executed inside a network
+namespace to prevent conflicts within the host.
 
 Running tdc without any arguments will run all tests. Refer to the section
 on command line arguments for more information, or run:
@@ -59,6 +62,33 @@ output captured from the failing test will be printed immediately following
 the failed test in the TAP output.
 
 
+OVERVIEW OF TDC EXECUTION
+-------------------------
+
+One run of tests is considered a "test suite" (this will be refined in the
+future).  A test suite has one or more test cases in it.
+
+A test case has four stages:
+
+  - setup
+  - execute
+  - verify
+  - teardown
+
+The setup and teardown stages can run zero or more commands.  The setup
+stage does some setup if the test needs it.  The teardown stage undoes
+the setup and returns the system to a "neutral" state so any other test
+can be run next.  These two stages require any commands run to return
+success, but do not otherwise verify the results.
+
+The execute and verify stages each run one command.  The execute stage
+tests the return code against one or more acceptable values.  The
+verify stage checks the return code for success, and also compares
+the stdout with a regular expression.
+
+Each of the commands in any stage will run in a shell instance.
+
+
 USER-DEFINED CONSTANTS
 ----------------------
 
@@ -70,23 +100,132 @@ executed as part of the test. More will be added as test cases require.
 Example:
 	$TC qdisc add dev $DEV1 ingress
 
+The NAMES values are used to substitute into the commands in the test cases.
+
 
 COMMAND LINE ARGUMENTS
 ----------------------
 
 Run tdc.py -h to see the full list of available arguments.
 
--p PATH           Specify the tc executable located at PATH to be used on this
-                  test run
--c                Show the available test case categories in this test file
--c CATEGORY       Run only tests that belong to CATEGORY
--f FILE           Read test cases from the JSON file named FILE
--l [CATEGORY]     List all test cases in the JSON file. If CATEGORY is
-                  specified, list test cases matching that category.
--s ID             Show the test case matching ID
--e ID             Execute the test case identified by ID
--i                Generate unique ID numbers for test cases with no existing
-                  ID number
+usage: tdc.py [-h] [-p PATH] [-D DIR [DIR ...]] [-f FILE [FILE ...]]
+              [-c [CATG [CATG ...]]] [-e ID [ID ...]] [-l] [-s] [-i] [-v]
+              [-d DEVICE] [-n NS] [-V]
+
+Linux TC unit tests
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -p PATH, --path PATH  The full path to the tc executable to use
+  -v, --verbose         Show the commands that are being run
+  -d DEVICE, --device DEVICE
+                        Execute the test case in flower category
+
+selection:
+  select which test cases: files plus directories; filtered by categories
+  plus testids
+
+  -D DIR [DIR ...], --directory DIR [DIR ...]
+                        Collect tests from the specified directory(ies)
+                        (default [tc-tests])
+  -f FILE [FILE ...], --file FILE [FILE ...]
+                        Run tests from the specified file(s)
+  -c [CATG [CATG ...]], --category [CATG [CATG ...]]
+                        Run tests only from the specified category/ies, or if
+                        no category/ies is/are specified, list known
+                        categories.
+  -e ID [ID ...], --execute ID [ID ...]
+                        Execute the specified test cases with specified IDs
+
+action:
+  select action to perform on selected test cases
+
+  -l, --list            List all test cases, or those only within the
+                        specified category
+  -s, --show            Display the selected test cases
+  -i, --id              Generate ID numbers for new test cases
+
+netns:
+  options for nsPlugin(run commands in net namespace)
+
+  -n NS, --namespace NS
+                        Run commands in namespace NS
+
+valgrind:
+  options for valgrindPlugin (run command under test under Valgrind)
+
+  -V, --valgrind        Run commands under valgrind
+
+
+PLUGIN ARCHITECTURE
+-------------------
+
+There is now a plugin architecture, and some of the functionality that
+was in the tdc.py script has been moved into the plugins.
+
+The plugins are in the directory plugin-lib.  The are executed from
+directory plugins.  Put symbolic links from plugins to plugin-lib,
+and name them according to the order you want them to run.
+
+Example:
+
+bjb@bee:~/work/tc-testing$ ls -l plugins
+total 4
+lrwxrwxrwx  1 bjb  bjb    27 Oct  4 16:12 10-rootPlugin.py -> ../plugin-lib/rootPlugin.py
+lrwxrwxrwx  1 bjb  bjb    25 Oct 12 17:55 20-nsPlugin.py -> ../plugin-lib/nsPlugin.py
+-rwxr-xr-x  1 bjb  bjb     0 Sep 29 15:56 __init__.py
+
+The plugins are a subclass of TdcPlugin, defined in TdcPlugin.py and
+must be called "SubPlugin" so tdc can find them.  They are
+distinguished from each other in the python program by their module
+name.
+
+This base class supplies "hooks" to run extra functions.  These hooks are as follows:
+
+pre- and post-suite
+pre- and post-case
+pre- and post-execute stage
+adjust-command (runs in all stages and receives the stage name)
+
+The pre-suite hook receives the number of tests and an array of test ids.
+This allows you to dump out the list of skipped tests in the event of a
+failure during setup or teardown stage.
+
+The pre-case hook receives the ordinal number and test id of the current test.
+
+The adjust-command hook receives the stage id (see list below) and the
+full command to be executed.  This allows for last-minute adjustment
+of the command.
+
+The stages are identified by the following strings:
+
+  - pre  (pre-suite)
+  - setup
+  - command
+  - verify
+  - teardown
+  - post (post-suite)
+
+
+To write a plugin, you need to inherit from TdcPlugin in
+TdcPlugin.py.  To use the plugin, you have to put the
+implementation file in plugin-lib, and add a symbolic link to it from
+plugins.  It will be detected at run time and invoked at the
+appropriate times.  There are a few examples in the plugin-lib
+directory:
+
+  - rootPlugin.py:
+      implements the enforcement of running as root
+  - nsPlugin.py:
+      sets up a network namespace and runs all commands in that namespace
+  - valgrindPlugin.py
+      runs each command in the execute stage under valgrind,
+      and checks for leaks.
+      This plugin will output an extra test for each test in the test file,
+      one is the existing output as to whether the test passed or failed,
+      and the other is a test whether the command leaked memory or not.
+      (This one is a preliminary version, it may not work quite right yet,
+      but the overall template is there and it should only need tweaks.)
 
 
 ACKNOWLEDGEMENTS
diff --git a/tools/testing/selftests/tc-testing/TODO.txt b/tools/testing/selftests/tc-testing/TODO.txt
index 6a266d811a78..c40698557e2f 100644
--- a/tools/testing/selftests/tc-testing/TODO.txt
+++ b/tools/testing/selftests/tc-testing/TODO.txt
@@ -5,6 +5,27 @@ tc Testing Suite To-Do list:
 
 - Add support for multiple versions of tc to run successively
 
-- Improve error messages when tdc aborts its run
+- Improve error messages when tdc aborts its run.  Partially done - still
+  need to better handle problems in pre- and post-suite.
 
-- Allow tdc to write its results to file
+- Use python logger module for debug/verbose output
+
+- Allow tdc to write its results to file.
+  Maybe use python logger module for this too.
+
+- A better implementation of the "hooks".  Currently, every plugin
+  will attempt to run a function at every hook point.  Could be
+  changed so that plugin __init__ methods will register functions to
+  be run in the various predefined times.  Then if a plugin does not
+  require action at a specific point, no penalty will be paid for
+  trying to run a function that will do nothing.
+
+- Proper exception handling - make an exception class and use it
+
+- a TestCase class, for easier testcase handling, searching, comparison
+
+- a TestSuite class
+  and a way to configure a test suite,
+  to automate running multiple "test suites" with different requirements
+
+- super simple test case example using ls, touch, etc
diff --git a/tools/testing/selftests/tc-testing/TdcPlugin.py b/tools/testing/selftests/tc-testing/TdcPlugin.py
new file mode 100644
index 000000000000..3ee9a6dacb52
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/TdcPlugin.py
@@ -0,0 +1,74 @@
+#!/usr/bin/env python3
+
+class TdcPlugin:
+    def __init__(self):
+        super().__init__()
+        print(' -- {}.__init__'.format(self.sub_class))
+
+    def pre_suite(self, testcount, testidlist):
+        '''run commands before test_runner goes into a test loop'''
+        self.testcount = testcount
+        self.testidlist = testidlist
+        if self.args.verbose > 1:
+            print(' -- {}.pre_suite'.format(self.sub_class))
+
+    def post_suite(self, index):
+        '''run commands after test_runner completes the test loop
+        index is the last ordinal number of test that was attempted'''
+        if self.args.verbose > 1:
+            print(' -- {}.post_suite'.format(self.sub_class))
+
+    def pre_case(self, test_ordinal, testid):
+        '''run commands before test_runner does one test'''
+        if self.args.verbose > 1:
+            print(' -- {}.pre_case'.format(self.sub_class))
+        self.args.testid = testid
+        self.args.test_ordinal = test_ordinal
+
+    def post_case(self):
+        '''run commands after test_runner does one test'''
+        if self.args.verbose > 1:
+            print(' -- {}.post_case'.format(self.sub_class))
+
+    def pre_execute(self):
+        '''run command before test-runner does the execute step'''
+        if self.args.verbose > 1:
+            print(' -- {}.pre_execute'.format(self.sub_class))
+
+    def post_execute(self):
+        '''run command after test-runner does the execute step'''
+        if self.args.verbose > 1:
+            print(' -- {}.post_execute'.format(self.sub_class))
+
+    def adjust_command(self, stage, command):
+        '''adjust the command'''
+        if self.args.verbose > 1:
+            print(' -- {}.adjust_command {}'.format(self.sub_class, stage))
+
+        # if stage == 'pre':
+        #     pass
+        # elif stage == 'setup':
+        #     pass
+        # elif stage == 'execute':
+        #     pass
+        # elif stage == 'verify':
+        #     pass
+        # elif stage == 'teardown':
+        #     pass
+        # elif stage == 'post':
+        #     pass
+        # else:
+        #     pass
+
+        return command
+
+    def add_args(self, parser):
+        '''Get the plugin args from the command line'''
+        self.argparser = parser
+        return self.argparser
+
+    def check_args(self, args, remaining):
+        '''Check that the args are set correctly'''
+        self.args = args
+        if self.args.verbose > 1:
+            print(' -- {}.check_args'.format(self.sub_class))
diff --git a/tools/testing/selftests/tc-testing/creating-plugins/AddingPlugins.txt b/tools/testing/selftests/tc-testing/creating-plugins/AddingPlugins.txt
new file mode 100644
index 000000000000..c18f88d09360
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/creating-plugins/AddingPlugins.txt
@@ -0,0 +1,104 @@
+tdc - Adding plugins for tdc
+
+Author: Brenda J. Butler - bjb@mojatatu.com
+
+ADDING PLUGINS
+--------------
+
+A new plugin should be written in python as a class that inherits from TdcPlugin.
+There are some examples in plugin-lib.
+
+The plugin can be used to add functionality to the test framework,
+such as:
+
+- adding commands to be run before and/or after the test suite
+- adding commands to be run before and/or after the test cases
+- adding commands to be run before and/or after the execute phase of the test cases
+- ability to alter the command to be run in any phase:
+    pre        (the pre-suite stage)
+    prepare
+    execute
+    verify
+    teardown
+    post       (the post-suite stage)
+- ability to add to the command line args, and use them at run time
+
+
+The functions in the class should follow the following interfaces:
+
+    def __init__(self)
+    def pre_suite(self, testcount, testidlist)     # see "PRE_SUITE" below
+    def post_suite(self, ordinal)                  # see "SKIPPING" below
+    def pre_case(self, test_ordinal, testid)       # see "PRE_CASE" below
+    def post_case(self)
+    def pre_execute(self)
+    def post_execute(self)
+    def adjust_command(self, stage, command)       # see "ADJUST" below
+    def add_args(self, parser)                     # see "ADD_ARGS" below
+    def check_args(self, args, remaining)          # see "CHECK_ARGS" below
+
+
+PRE_SUITE
+
+This method takes a testcount (number of tests to be run) and
+testidlist (array of test ids for tests that will be run).  This is
+useful for various things, including when an exception occurs and the
+rest of the tests must be skipped.  The info is stored in the object,
+and the post_suite method can refer to it when dumping the "skipped"
+TAP output.  The tdc.py script will do that for the test suite as
+defined in the test case, but if the plugin is being used to run extra
+tests on each test (eg, check for memory leaks on associated
+co-processes) then that other tap output can be generated in the
+post-suite method using this info passed in to the pre_suite method.
+
+
+SKIPPING
+
+The post_suite method will receive the ordinal number of the last
+test to be attempted.  It can use this info when outputting
+the TAP output for the extra test cases.
+
+
+PRE_CASE
+
+The pre_case method will receive the ordinal number of the test
+and the test id.  Useful for outputing the extra test results.
+
+
+ADJUST
+
+The adjust_command method receives a string representing
+the execution stage and a string which is the actual command to be
+executed.  The plugin can adjust the command, based on the stage of
+execution.
+
+The stages are represented by the following strings:
+
+    'pre'
+    'setup'
+    'command'
+    'verify'
+    'teardown'
+    'post'
+
+The adjust_command method must return the adjusted command so tdc
+can use it.
+
+
+ADD_ARGS
+
+The add_args method receives the argparser object and can add
+arguments to it.  Care should be taken that the new arguments do not
+conflict with any from tdc.py or from other plugins that will be used
+concurrently.
+
+The add_args method should return the argparser object.
+
+
+CHECK_ARGS
+
+The check_args method is so that the plugin can do validation on
+the args, if needed.  If there is a problem, and Exception should
+be raised, with a string that explains the problem.
+
+eg:  raise Exception('plugin xxx, arg -y is wrong, fix it')
diff --git a/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt b/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt
index 00438331ba47..17b267dedbd9 100644
--- a/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt
+++ b/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt
@@ -12,14 +12,18 @@ template.json for the required JSON format for test cases.
 Include the 'id' field, but do not assign a value. Running tdc with the -i
 option will generate a unique ID for that test case.
 
-tdc will recursively search the 'tc' subdirectory for .json files.  Any
-test case files you create in these directories will automatically be included.
-If you wish to store your custom test cases elsewhere, be sure to run tdc
-with the -f argument and the path to your file.
+tdc will recursively search the 'tc-tests' subdirectory (or the
+directories named with the -D option) for .json files.  Any test case
+files you create in these directories will automatically be included.
+If you wish to store your custom test cases elsewhere, be sure to run
+tdc with the -f argument and the path to your file, or the -D argument
+and the path to your directory(ies).
 
-Be aware of required escape characters in the JSON data - particularly when
-defining the match pattern. Refer to the tctests.json file for examples when
-in doubt.
+Be aware of required escape characters in the JSON data - particularly
+when defining the match pattern. Refer to the supplied json test files
+for examples when in doubt.  The match pattern is written in json, and
+will be used by python.  So the match pattern will be a python regular
+expression, but should be written using json syntax.
 
 
 TEST CASE STRUCTURE
@@ -69,7 +73,8 @@ SETUP/TEARDOWN ERRORS
 If an error is detected during the setup/teardown process, execution of the
 tests will immediately stop with an error message and the namespace in which
 the tests are run will be destroyed. This is to prevent inaccurate results
-in the test cases.
+in the test cases.  tdc will output a series of TAP results for the skipped
+tests.
 
 Repeated failures of the setup/teardown may indicate a problem with the test
 case, or possibly even a bug in one of the commands that are not being tested.
@@ -79,3 +84,17 @@ so that it doesn't halt the script for an error that doesn't matter. Turn the
 individual command into a list, with the command being first, followed by all
 acceptable exit codes for the command.
 
+Example:
+
+A pair of setup commands.  The first can have exit code 0, 1 or 255, the
+second must have exit code 0.
+
+        "setup": [
+            [
+                "$TC actions flush action gact",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action reclassify index 65536"
+        ],
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/README-PLUGINS b/tools/testing/selftests/tc-testing/plugin-lib/README-PLUGINS
new file mode 100644
index 000000000000..aa8a2669702b
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/plugin-lib/README-PLUGINS
@@ -0,0 +1,27 @@
+tdc.py will look for plugins in a directory plugins off the cwd.
+Make a set of numbered symbolic links from there to the actual plugins.
+Eg:
+
+tdc.py
+plugin-lib/
+plugins/
+    __init__.py
+    10-rootPlugin.py -> ../plugin-lib/rootPlugin.py
+    20-valgrindPlugin.py -> ../plugin-lib/valgrindPlugin.py
+    30-nsPlugin.py -> ../plugin-lib/nsPlugin.py
+
+
+tdc.py will find them and use them.
+
+
+rootPlugin
+    Check if the uid is root.  If not, bail out.
+
+valgrindPlugin
+    Run the command under test with valgrind, and produce an extra set of TAP results for the memory tests.
+    This plugin will write files to the cwd, called vgnd-xxx.log.  These will contain
+    the valgrind output for test xxx.  Any file matching the glob 'vgnd-*.log' will be
+    deleted at the end of the run.
+
+nsPlugin
+    Run all the commands in a network namespace.
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
new file mode 100644
index 000000000000..a194b1af2b30
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
@@ -0,0 +1,141 @@
+import os
+import signal
+from string import Template
+import subprocess
+import time
+from TdcPlugin import TdcPlugin
+
+from tdc_config import *
+
+class SubPlugin(TdcPlugin):
+    def __init__(self):
+        self.sub_class = 'ns/SubPlugin'
+        super().__init__()
+
+    def pre_suite(self, testcount, testidlist):
+        '''run commands before test_runner goes into a test loop'''
+        super().pre_suite(testcount, testidlist)
+
+        if self.args.namespace:
+            self._ns_create()
+
+    def post_suite(self, index):
+        '''run commands after test_runner goes into a test loop'''
+        super().post_suite(index)
+        if self.args.verbose:
+            print('{}.post_suite'.format(self.sub_class))
+
+        if self.args.namespace:
+            self._ns_destroy()
+
+    def add_args(self, parser):
+        super().add_args(parser)
+        self.argparser_group = self.argparser.add_argument_group(
+            'netns',
+            'options for nsPlugin(run commands in net namespace)')
+        self.argparser_group.add_argument(
+            '-n', '--namespace', action='store_true',
+            help='Run commands in namespace')
+        return self.argparser
+
+    def adjust_command(self, stage, command):
+        super().adjust_command(stage, command)
+        cmdform = 'list'
+        cmdlist = list()
+
+        if not self.args.namespace:
+            return command
+
+        if self.args.verbose:
+            print('{}.adjust_command'.format(self.sub_class))
+
+        if not isinstance(command, list):
+            cmdform = 'str'
+            cmdlist = command.split()
+        else:
+            cmdlist = command
+        if stage == 'setup' or stage == 'execute' or stage == 'verify' or stage == 'teardown':
+            if self.args.verbose:
+                print('adjust_command:  stage is {}; inserting netns stuff in command [{}] list [{}]'.format(stage, command, cmdlist))
+            cmdlist.insert(0, self.args.NAMES['NS'])
+            cmdlist.insert(0, 'exec')
+            cmdlist.insert(0, 'netns')
+            cmdlist.insert(0, 'ip')
+        else:
+            pass
+
+        if cmdform == 'str':
+            command = ' '.join(cmdlist)
+        else:
+            command = cmdlist
+
+        if self.args.verbose:
+            print('adjust_command:  return command [{}]'.format(command))
+        return command
+
+    def _ns_create(self):
+        '''
+        Create the network namespace in which the tests will be run and set up
+        the required network devices for it.
+        '''
+        if self.args.namespace:
+            cmd = 'ip netns add {}'.format(self.args.NAMES['NS'])
+            self._exec_cmd('pre', cmd)
+            cmd = 'ip link add $DEV0 type veth peer name $DEV1'
+            self._exec_cmd('pre', cmd)
+            cmd = 'ip link set $DEV1 netns {}'.format(self.args.NAMES['NS'])
+            self._exec_cmd('pre', cmd)
+            cmd = 'ip link set $DEV0 up'
+            self._exec_cmd('pre', cmd)
+            cmd = 'ip -n {} link set $DEV1 up'.format(self.args.NAMES['NS'])
+            self._exec_cmd('pre', cmd)
+            if self.args.device:
+                cmd = 'ip link set $DEV2 netns {}'.format(self.args.NAMES['NS'])
+                self._exec_cmd('pre', cmd)
+                cmd = 'ip -n {} link set $DEV2 up'.format(self.args.NAMES['NS'])
+                self._exec_cmd('pre', cmd)
+
+    def _ns_destroy(self):
+        '''
+        Destroy the network namespace for testing (and any associated network
+        devices as well)
+        '''
+        if self.args.namespace:
+            cmd = 'ip netns delete {}'.format(self.args.NAMES['NS'])
+            self._exec_cmd('post', cmd)
+
+    def _exec_cmd(self, stage, command):
+        '''
+        Perform any required modifications on an executable command, then run
+        it in a subprocess and return the results.
+        '''
+        if '$' in command:
+            command = self._replace_keywords(command)
+
+        self.adjust_command(stage, command)
+        if self.args.verbose:
+            print('_exec_cmd:  command "{}"'.format(command))
+        proc = subprocess.Popen(command,
+            shell=True,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            env=ENVIR)
+        (rawout, serr) = proc.communicate()
+
+        if proc.returncode != 0 and len(serr) > 0:
+            foutput = serr.decode("utf-8")
+        else:
+            foutput = rawout.decode("utf-8")
+
+        proc.stdout.close()
+        proc.stderr.close()
+        return proc, foutput
+
+    def _replace_keywords(self, cmd):
+        """
+        For a given executable command, substitute any known
+        variables contained within NAMES with the correct values
+        """
+        tcmd = Template(cmd)
+        subcmd = tcmd.safe_substitute(self.args.NAMES)
+        return subcmd
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/rootPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/rootPlugin.py
new file mode 100644
index 000000000000..e36775bd4d12
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/plugin-lib/rootPlugin.py
@@ -0,0 +1,19 @@
+import os
+import sys
+from TdcPlugin import TdcPlugin
+
+from tdc_config import *
+
+
+class SubPlugin(TdcPlugin):
+    def __init__(self):
+        self.sub_class = 'root/SubPlugin'
+        super().__init__()
+
+    def pre_suite(self, testcount, testidlist):
+        # run commands before test_runner goes into a test loop
+        super().pre_suite(testcount, testidlist)
+
+        if os.geteuid():
+            print('This script must be run with root privileges', file=sys.stderr)
+            exit(1)
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py
new file mode 100644
index 000000000000..477a7bd7d7fb
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py
@@ -0,0 +1,142 @@
+'''
+run the command under test, under valgrind and collect memory leak info
+as a separate test.
+'''
+
+
+import os
+import re
+import signal
+from string import Template
+import subprocess
+import time
+from TdcPlugin import TdcPlugin
+
+from tdc_config import *
+
+def vp_extract_num_from_string(num_as_string_maybe_with_commas):
+    return int(num_as_string_maybe_with_commas.replace(',',''))
+
+class SubPlugin(TdcPlugin):
+    def __init__(self):
+        self.sub_class = 'valgrind/SubPlugin'
+        self.tap = ''
+        super().__init__()
+
+    def pre_suite(self, testcount, testidlist):
+        '''run commands before test_runner goes into a test loop'''
+        super().pre_suite(testcount, testidlist)
+        if self.args.verbose > 1:
+            print('{}.pre_suite'.format(self.sub_class))
+        if self.args.valgrind:
+            self._add_to_tap('1..{}\n'.format(self.testcount))
+
+    def post_suite(self, index):
+        '''run commands after test_runner goes into a test loop'''
+        super().post_suite(index)
+        self._add_to_tap('\n|---\n')
+        if self.args.verbose > 1:
+            print('{}.post_suite'.format(self.sub_class))
+        print('{}'.format(self.tap))
+        if self.args.verbose < 4:
+            subprocess.check_output('rm -f vgnd-*.log', shell=True)
+
+    def add_args(self, parser):
+        super().add_args(parser)
+        self.argparser_group = self.argparser.add_argument_group(
+            'valgrind',
+            'options for valgrindPlugin (run command under test under Valgrind)')
+
+        self.argparser_group.add_argument(
+            '-V', '--valgrind', action='store_true',
+            help='Run commands under valgrind')
+
+        return self.argparser
+
+    def adjust_command(self, stage, command):
+        super().adjust_command(stage, command)
+        cmdform = 'list'
+        cmdlist = list()
+
+        if not self.args.valgrind:
+            return command
+
+        if self.args.verbose > 1:
+            print('{}.adjust_command'.format(self.sub_class))
+
+        if not isinstance(command, list):
+            cmdform = 'str'
+            cmdlist = command.split()
+        else:
+            cmdlist = command
+
+        if stage == 'execute':
+            if self.args.verbose > 1:
+                print('adjust_command:  stage is {}; inserting valgrind stuff in command [{}] list [{}]'.
+                      format(stage, command, cmdlist))
+            cmdlist.insert(0, '--track-origins=yes')
+            cmdlist.insert(0, '--show-leak-kinds=definite,indirect')
+            cmdlist.insert(0, '--leak-check=full')
+            cmdlist.insert(0, '--log-file=vgnd-{}.log'.format(self.args.testid))
+            cmdlist.insert(0, '-v')  # ask for summary of non-leak errors
+            cmdlist.insert(0, ENVIR['VALGRIND_BIN'])
+        else:
+            pass
+
+        if cmdform == 'str':
+            command = ' '.join(cmdlist)
+        else:
+            command = cmdlist
+
+        if self.args.verbose > 1:
+            print('adjust_command:  return command [{}]'.format(command))
+        return command
+
+    def post_execute(self):
+        if not self.args.valgrind:
+            return
+
+        self.definitely_lost_re = re.compile(
+            r'definitely lost:\s+([,0-9]+)\s+bytes in\s+([,0-9]+)\sblocks', re.MULTILINE | re.DOTALL)
+        self.indirectly_lost_re = re.compile(
+            r'indirectly lost:\s+([,0-9]+)\s+bytes in\s+([,0-9]+)\s+blocks', re.MULTILINE | re.DOTALL)
+        self.possibly_lost_re = re.compile(
+            r'possibly lost:\s+([,0-9]+)bytes in\s+([,0-9]+)\s+blocks', re.MULTILINE | re.DOTALL)
+        self.non_leak_error_re = re.compile(
+            r'ERROR SUMMARY:\s+([,0-9]+) errors from\s+([,0-9]+)\s+contexts', re.MULTILINE | re.DOTALL)
+
+        def_num = 0
+        ind_num = 0
+        pos_num = 0
+        nle_num = 0
+
+        # what about concurrent test runs?  Maybe force them to be in different directories?
+        with open('vgnd-{}.log'.format(self.args.testid)) as vfd:
+            content = vfd.read()
+            def_mo = self.definitely_lost_re.search(content)
+            ind_mo = self.indirectly_lost_re.search(content)
+            pos_mo = self.possibly_lost_re.search(content)
+            nle_mo = self.non_leak_error_re.search(content)
+
+            if def_mo:
+                def_num = int(def_mo.group(2))
+            if ind_mo:
+                ind_num = int(ind_mo.group(2))
+            if pos_mo:
+                pos_num = int(pos_mo.group(2))
+            if nle_mo:
+                nle_num = int(nle_mo.group(1))
+
+        mem_results = ''
+        if (def_num > 0) or (ind_num > 0) or (pos_num > 0) or (nle_num > 0):
+            mem_results += 'not '
+
+        mem_results += 'ok {} - {}-mem # {}\n'.format(
+            self.args.test_ordinal, self.args.testid, 'memory leak check')
+        self._add_to_tap(mem_results)
+        if mem_results.startswith('not '):
+            print('{}'.format(content))
+            self._add_to_tap(content)
+
+    def _add_to_tap(self, more_tap_output):
+        self.tap += more_tap_output
diff --git a/tools/testing/selftests/tc-testing/plugins/__init__.py b/tools/testing/selftests/tc-testing/plugins/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/plugins/__init__.py
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json b/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json
new file mode 100644
index 000000000000..93cf8fea8ae7
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json
@@ -0,0 +1,410 @@
+[
+    {
+        "id": "6d84",
+        "name": "Add csum iph action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum iph index 800",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 800",
+        "matchPattern": "action order [0-9]*: csum \\(iph\\) action pass.*index 800 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "1862",
+        "name": "Add csum ip4h action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum ip4h index 7",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 7",
+        "matchPattern": "action order [0-9]*: csum \\(iph\\) action pass.*index 7 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "15c6",
+        "name": "Add csum ipv4h action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum ipv4h index 1122",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 1122",
+        "matchPattern": "action order [0-9]*: csum \\(iph\\) action pass.*index 1122 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "bf47",
+        "name": "Add csum icmp action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum icmp index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 1",
+        "matchPattern": "action order [0-9]*: csum \\(icmp\\) action pass.*index 1 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "cc1d",
+        "name": "Add csum igmp action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum igmp index 999",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 999",
+        "matchPattern": "action order [0-9]*: csum \\(igmp\\) action pass.*index 999 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "bccc",
+        "name": "Add csum foobar action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum foobar index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions ls action csum",
+        "matchPattern": "action order [0-9]*: csum \\(foobar\\) action pass.*index 1 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "3bb4",
+        "name": "Add csum tcp action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum tcp index 9999",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 9999",
+        "matchPattern": "action order [0-9]*: csum \\(tcp\\) action pass.*index 9999 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "759c",
+        "name": "Add csum udp action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum udp index 334455",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 334455",
+        "matchPattern": "action order [0-9]*: csum \\(udp\\) action pass.*index 334455 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "bdb6",
+        "name": "Add csum udp xor iph action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum udp xor iph index 3",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions ls action csum",
+        "matchPattern": "action order [0-9]*: csum \\(udp xor iph\\) action pass.*index 3 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "c220",
+        "name": "Add csum udplite action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum udplite continue index 3",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 3",
+        "matchPattern": "action order [0-9]*: csum \\(udplite\\) action continue.*index 3 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "8993",
+        "name": "Add csum sctp action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum sctp index 777",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 777",
+        "matchPattern": "action order [0-9]*: csum \\(sctp\\) action pass.*index 777 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "b138",
+        "name": "Add csum ip & icmp action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum ip and icmp pipe index 123",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 123",
+        "matchPattern": "action order [0-9]*: csum \\(iph, icmp\\) action pipe.*index 123 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "eeda",
+        "name": "Add csum ip & sctp action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum ipv4h sctp continue index 2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 2",
+        "matchPattern": "action order [0-9]*: csum \\(iph, sctp\\) action continue.*index 2 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "0017",
+        "name": "Add csum udp or tcp action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum udp or tcp continue index 27",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 27",
+        "matchPattern": "action order [0-9]*: csum \\(tcp, udp\\) action continue.*index 27 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "ce92",
+        "name": "Add csum udp action with cookie",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum udp pipe index 7 cookie 12345678",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 7",
+        "matchPattern": "action order [0-9]*: csum \\(udp\\) action pipe.*index 7.*cookie 12345678",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "912f",
+        "name": "Add csum icmp action with large cookie",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum icmp pipe index 17 cookie aabbccddeeff1122",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 17",
+        "matchPattern": "action order [0-9]*: csum \\(icmp\\) action pipe.*index 17.*cookie aabbccddeeff1122",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "879b",
+        "name": "Add batch of 32 csum tcp actions",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "for i in `seq 1 32`; do cmd=\"action csum tcp continue index $i \"; args=\"$args$cmd\"; done && $TC actions add $args",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions ls action csum",
+        "matchPattern": "^[ \t]+index [0-9]* ref",
+        "matchCount": "32",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json
index e2187b6e0b7a..ae96d0350d7e 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json
@@ -465,5 +465,76 @@
         "teardown": [
             "$TC actions flush action gact"
         ]
+    },
+    {
+        "id": "1021",
+        "name": "Add batch of 32 gact pass actions",
+        "category": [
+            "actions",
+            "gact"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action gact",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "for i in `seq 1 32`; do cmd=\"action pass index $i \"; args=\"$args$cmd\"; done && $TC actions add $args",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action gact",
+        "matchPattern": "^[ \t]+index [0-9]+ ref",
+        "matchCount": "32",
+        "teardown": [
+            "$TC actions flush action gact"
+        ]
+    },
+    {
+        "id": "da7a",
+        "name": "Add batch of 32 gact continue actions with cookie",
+        "category": [
+            "actions",
+            "gact"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action gact",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "for i in `seq 1 32`; do cmd=\"action continue index $i cookie aabbccddeeff112233445566778800a1 \"; args=\"$args$cmd\"; done && $TC actions add $args",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action gact",
+        "matchPattern": "^[ \t]+index [0-9]+ ref",
+        "matchCount": "32",
+        "teardown": [
+            "$TC actions flush action gact"
+        ]
+    },
+    {
+        "id": "8aa3",
+        "name": "Delete batch of 32 gact continue actions",
+        "category": [
+            "actions",
+            "gact"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action gact",
+                0,
+                1,
+                255
+            ],
+            "for i in `seq 1 32`; do cmd=\"action continue index $i \"; args=\"$args$cmd\"; done && $TC actions add $args"
+        ],
+        "cmdUnderTest": "for i in `seq 1 32`; do cmd=\"action gact index $i \"; args=\"$args$cmd\"; done && $TC actions del $args",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action gact",
+        "matchPattern": "^[ \t]+index [0-9]+ ref",
+        "matchCount": "0",
+        "teardown": []
     }
-]
+]
+\ No newline at end of file
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json
new file mode 100644
index 000000000000..4510ddfa6e54
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json
@@ -0,0 +1,410 @@
+[
+    {
+        "id": "6f5a",
+        "name": "Add vlan pop action",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan pop index 8",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*pop.*index 8 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "ee6f",
+        "name": "Add vlan pop action with large index",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan pop index 4294967295",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*pop.*index 4294967295 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "b6b9",
+        "name": "Add vlan pop action with jump opcode",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan pop jump 10 index 8",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*jump 10.*index 8 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "87c3",
+        "name": "Add vlan pop action with trap opcode",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan pop trap index 8",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*pop trap.*index 8 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "2b91",
+        "name": "Add vlan invalid action",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan bad_mode",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*bad_mode",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "57fc",
+        "name": "Add vlan action with invalid protocol type",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan push protocol ABCD",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*push",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "3989",
+        "name": "Add vlan push action with default protocol and priority",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan push id 123 index 18",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 18",
+        "matchPattern": "action order [0-9]+: vlan.*push id 123 protocol 802.1Q priority 0 pipe.*index 18 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "79dc",
+        "name": "Add vlan push action with protocol 802.1Q and priority 3",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan push id 77 protocol 802.1Q priority 3 continue index 734",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 734",
+        "matchPattern": "action order [0-9]+: vlan.*push id 77 protocol 802.1Q priority 3 continue.*index 734 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "4d73",
+        "name": "Add vlan push action with protocol 802.1AD",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan push id 1024 protocol 802.1AD pass index 10000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 10000",
+        "matchPattern": "action order [0-9]+: vlan.*push id 1024 protocol 802.1ad priority 0 pass.*index 10000 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "1f7b",
+        "name": "Add vlan push action with invalid vlan ID",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan push id 5678 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*push id 5678.*index 1 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "5d02",
+        "name": "Add vlan push action with invalid IEEE 802.1p priority",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan push id 5 priority 10 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*push id 5.*index 1 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "6812",
+        "name": "Add vlan modify action for protocol 802.1Q",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan modify protocol 802.1Q id 5 index 100",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 100",
+        "matchPattern": "action order [0-9]+: vlan.*modify id 100 protocol 802.1Q priority 0 pipe.*index 100 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "5a31",
+        "name": "Add vlan modify action for protocol 802.1AD",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan modify protocol 802.1ad id 500 reclassify index 12",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 12",
+        "matchPattern": "action order [0-9]+: vlan.*modify id 500 protocol 802.1ad priority 0 reclassify.*index 12 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "83a4",
+        "name": "Delete vlan pop action",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action vlan pop index 44"
+        ],
+        "cmdUnderTest": "$TC actions del action vlan index 44",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*pop.*index 44 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "ed1e",
+        "name": "Delete vlan push action for protocol 802.1Q",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action vlan push id 4094 protocol 802.1Q index 999"
+        ],
+        "cmdUnderTest": "$TC actions del action vlan index 999",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*push id 4094 protocol 802.1Q priority 0 pipe.*index 999 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "a2a3",
+        "name": "Flush vlan actions",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action vlan push id 4 protocol 802.1ad index 10",
+            "$TC actions add action vlan push id 4 protocol 802.1ad index 11",
+            "$TC actions add action vlan push id 4 protocol 802.1ad index 12",
+            "$TC actions add action vlan push id 4 protocol 802.1ad index 13"
+        ],
+        "cmdUnderTest": "$TC actions flush action vlan",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*push id 4 protocol 802.1ad",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "1d78",
+        "name": "Add vlan action with cookie",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan push id 4 cookie a0a0a0a0a0a0a0",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*push id 4.*cookie a0a0a0a0a0a0a0",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    }
+]
diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py
index fc373fdf2bdc..44de4a272a11 100755
--- a/tools/testing/selftests/tc-testing/tdc.py
+++ b/tools/testing/selftests/tc-testing/tdc.py
@@ -11,16 +11,96 @@ import re
 import os
 import sys
 import argparse
+import importlib
 import json
 import subprocess
+import time
+import traceback
 from collections import OrderedDict
 from string import Template
 
 from tdc_config import *
 from tdc_helper import *
 
-
-USE_NS = True
+import TdcPlugin
+
+
+class PluginMgrTestFail(Exception):
+    def __init__(self, stage, output, message):
+        self.stage = stage
+        self.output = output
+        self.message = message
+
+class PluginMgr:
+    def __init__(self, argparser):
+        super().__init__()
+        self.plugins = {}
+        self.plugin_instances = []
+        self.args = []
+        self.argparser = argparser
+
+        # TODO, put plugins in order
+        plugindir = os.getenv('TDC_PLUGIN_DIR', './plugins')
+        for dirpath, dirnames, filenames in os.walk(plugindir):
+            for fn in filenames:
+                if (fn.endswith('.py') and
+                    not fn == '__init__.py' and
+                    not fn.startswith('#') and
+                    not fn.startswith('.#')):
+                    mn = fn[0:-3]
+                    foo = importlib.import_module('plugins.' + mn)
+                    self.plugins[mn] = foo
+                    self.plugin_instances.append(foo.SubPlugin())
+
+    def call_pre_suite(self, testcount, testidlist):
+        for pgn_inst in self.plugin_instances:
+            pgn_inst.pre_suite(testcount, testidlist)
+
+    def call_post_suite(self, index):
+        for pgn_inst in reversed(self.plugin_instances):
+            pgn_inst.post_suite(index)
+
+    def call_pre_case(self, test_ordinal, testid):
+        for pgn_inst in self.plugin_instances:
+            try:
+                pgn_inst.pre_case(test_ordinal, testid)
+            except Exception as ee:
+                print('exception {} in call to pre_case for {} plugin'.
+                      format(ee, pgn_inst.__class__))
+                print('test_ordinal is {}'.format(test_ordinal))
+                print('testid is {}'.format(testid))
+                raise
+
+    def call_post_case(self):
+        for pgn_inst in reversed(self.plugin_instances):
+            pgn_inst.post_case()
+
+    def call_pre_execute(self):
+        for pgn_inst in self.plugin_instances:
+            pgn_inst.pre_execute()
+
+    def call_post_execute(self):
+        for pgn_inst in reversed(self.plugin_instances):
+            pgn_inst.post_execute()
+
+    def call_add_args(self, parser):
+        for pgn_inst in self.plugin_instances:
+            parser = pgn_inst.add_args(parser)
+        return parser
+
+    def call_check_args(self, args, remaining):
+        for pgn_inst in self.plugin_instances:
+            pgn_inst.check_args(args, remaining)
+
+    def call_adjust_command(self, stage, command):
+        for pgn_inst in self.plugin_instances:
+            command = pgn_inst.adjust_command(stage, command)
+        return command
+
+    @staticmethod
+    def _make_argparser(args):
+        self.argparser = argparse.ArgumentParser(
+            description='Linux TC unit tests')
 
 
 def replace_keywords(cmd):
@@ -33,21 +113,24 @@ def replace_keywords(cmd):
     return subcmd
 
 
-def exec_cmd(command, nsonly=True):
+def exec_cmd(args, pm, stage, command):
     """
     Perform any required modifications on an executable command, then run
     it in a subprocess and return the results.
     """
-    if (USE_NS and nsonly):
-        command = 'ip netns exec $NS ' + command
-
+    if len(command.strip()) == 0:
+        return None, None
     if '$' in command:
         command = replace_keywords(command)
 
+    command = pm.call_adjust_command(stage, command)
+    if args.verbose > 0:
+        print('command "{}"'.format(command))
     proc = subprocess.Popen(command,
         shell=True,
         stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE)
+        stderr=subprocess.PIPE,
+        env=ENVIR)
     (rawout, serr) = proc.communicate()
 
     if proc.returncode != 0 and len(serr) > 0:
@@ -60,36 +143,99 @@ def exec_cmd(command, nsonly=True):
     return proc, foutput
 
 
-def prepare_env(cmdlist):
+def prepare_env(args, pm, stage, prefix, cmdlist, output = None):
     """
-    Execute the setup/teardown commands for a test case. Optionally
-    terminate test execution if the command fails.
+    Execute the setup/teardown commands for a test case.
+    Optionally terminate test execution if the command fails.
     """
+    if args.verbose > 0:
+        print('{}'.format(prefix))
     for cmdinfo in cmdlist:
-        if (type(cmdinfo) == list):
+        if isinstance(cmdinfo, list):
             exit_codes = cmdinfo[1:]
             cmd = cmdinfo[0]
         else:
             exit_codes = [0]
             cmd = cmdinfo
 
-        if (len(cmd) == 0):
+        if not cmd:
             continue
 
-        (proc, foutput) = exec_cmd(cmd)
+        (proc, foutput) = exec_cmd(args, pm, stage, cmd)
+
+        if proc and (proc.returncode not in exit_codes):
+            print('', file=sys.stderr)
+            print("{} *** Could not execute: \"{}\"".format(prefix, cmd),
+                  file=sys.stderr)
+            print("\n{} *** Error message: \"{}\"".format(prefix, foutput),
+                  file=sys.stderr)
+            print("\n{} *** Aborting test run.".format(prefix), file=sys.stderr)
+            print("\n\n{} *** stdout ***".format(proc.stdout), file=sys.stderr)
+            print("\n\n{} *** stderr ***".format(proc.stderr), file=sys.stderr)
+            raise PluginMgrTestFail(
+                stage, output,
+                '"{}" did not complete successfully'.format(prefix))
+
+def run_one_test(pm, args, index, tidx):
+    global NAMES
+    result = True
+    tresult = ""
+    tap = ""
+    if args.verbose > 0:
+        print("\t====================\n=====> ", end="")
+    print("Test " + tidx["id"] + ": " + tidx["name"])
+
+    # populate NAMES with TESTID for this test
+    NAMES['TESTID'] = tidx['id']
+
+    pm.call_pre_case(index, tidx['id'])
+    prepare_env(args, pm, 'setup', "-----> prepare stage", tidx["setup"])
+
+    if (args.verbose > 0):
+        print('-----> execute stage')
+    pm.call_pre_execute()
+    (p, procout) = exec_cmd(args, pm, 'execute', tidx["cmdUnderTest"])
+    exit_code = p.returncode
+    pm.call_post_execute()
+
+    if (exit_code != int(tidx["expExitCode"])):
+        result = False
+        print("exit:", exit_code, int(tidx["expExitCode"]))
+        print(procout)
+    else:
+        if args.verbose > 0:
+            print('-----> verify stage')
+        match_pattern = re.compile(
+            str(tidx["matchPattern"]), re.DOTALL | re.MULTILINE)
+        (p, procout) = exec_cmd(args, pm, 'verify', tidx["verifyCmd"])
+        if procout:
+            match_index = re.findall(match_pattern, procout)
+            if len(match_index) != int(tidx["matchCount"]):
+                result = False
+        elif int(tidx["matchCount"]) != 0:
+            result = False
+
+    if not result:
+        tresult += 'not '
+    tresult += 'ok {} - {} # {}\n'.format(str(index), tidx['id'], tidx['name'])
+    tap += tresult
 
-        if proc.returncode not in exit_codes:
-            print
-            print("Could not execute:")
-            print(cmd)
-            print("\nError message:")
-            print(foutput)
-            print("\nAborting test run.")
-            ns_destroy()
-            exit(1)
+    if result == False:
+        if procout:
+            tap += procout
+        else:
+            tap += 'No output!\n'
+
+    prepare_env(args, pm, 'teardown', '-----> teardown stage', tidx['teardown'], procout)
+    pm.call_post_case()
 
+    index += 1
+
+    # remove TESTID from NAMES
+    del(NAMES['TESTID'])
+    return tap
 
-def test_runner(filtered_tests, args):
+def test_runner(pm, args, filtered_tests):
     """
     Driver function for the unit tests.
 
@@ -101,75 +247,92 @@ def test_runner(filtered_tests, args):
     testlist = filtered_tests
     tcount = len(testlist)
     index = 1
-    tap = str(index) + ".." + str(tcount) + "\n"
-
+    tap = ''
+    badtest = None
+    stage = None
+    emergency_exit = False
+    emergency_exit_message = ''
+
+    if args.notap:
+        if args.verbose:
+            tap = 'notap requested:  omitting test plan\n'
+    else:
+        tap = str(index) + ".." + str(tcount) + "\n"
+    try:
+        pm.call_pre_suite(tcount, [tidx['id'] for tidx in testlist])
+    except Exception as ee:
+        ex_type, ex, ex_tb = sys.exc_info()
+        print('Exception {} {} (caught in pre_suite).'.
+              format(ex_type, ex))
+        # when the extra print statements are uncommented,
+        # the traceback does not appear between them
+        # (it appears way earlier in the tdc.py output)
+        # so don't bother ...
+        # print('--------------------(')
+        # print('traceback')
+        traceback.print_tb(ex_tb)
+        # print('--------------------)')
+        emergency_exit_message = 'EMERGENCY EXIT, call_pre_suite failed with exception {} {}\n'.format(ex_type, ex)
+        emergency_exit = True
+        stage = 'pre-SUITE'
+
+    if emergency_exit:
+        pm.call_post_suite(index)
+        return emergency_exit_message
+    if args.verbose > 1:
+        print('give test rig 2 seconds to stabilize')
+    time.sleep(2)
     for tidx in testlist:
-        result = True
-        tresult = ""
         if "flower" in tidx["category"] and args.device == None:
+            if args.verbose > 1:
+                print('Not executing test {} {} because DEV2 not defined'.
+                      format(tidx['id'], tidx['name']))
             continue
-        print("Test " + tidx["id"] + ": " + tidx["name"])
-        prepare_env(tidx["setup"])
-        (p, procout) = exec_cmd(tidx["cmdUnderTest"])
-        exit_code = p.returncode
-
-        if (exit_code != int(tidx["expExitCode"])):
-            result = False
-            print("exit:", exit_code, int(tidx["expExitCode"]))
-            print(procout)
-        else:
-            match_pattern = re.compile(str(tidx["matchPattern"]), re.DOTALL)
-            (p, procout) = exec_cmd(tidx["verifyCmd"])
-            match_index = re.findall(match_pattern, procout)
-            if len(match_index) != int(tidx["matchCount"]):
-                result = False
-
-        if result == True:
-            tresult += "ok "
-        else:
-            tresult += "not ok "
-        tap += tresult + str(index) + " " + tidx["id"] + " " + tidx["name"] + "\n"
-
-        if result == False:
-            tap += procout
-
-        prepare_env(tidx["teardown"])
+        try:
+            badtest = tidx  # in case it goes bad
+            tap += run_one_test(pm, args, index, tidx)
+        except PluginMgrTestFail as pmtf:
+            ex_type, ex, ex_tb = sys.exc_info()
+            stage = pmtf.stage
+            message = pmtf.message
+            output = pmtf.output
+            print(message)
+            print('Exception {} {} (caught in test_runner, running test {} {} {} stage {})'.
+                  format(ex_type, ex, index, tidx['id'], tidx['name'], stage))
+            print('---------------')
+            print('traceback')
+            traceback.print_tb(ex_tb)
+            print('---------------')
+            if stage == 'teardown':
+                print('accumulated output for this test:')
+                if pmtf.output:
+                    print(pmtf.output)
+            print('---------------')
+            break
         index += 1
 
-    return tap
-
+    # if we failed in setup or teardown,
+    # fill in the remaining tests with ok-skipped
+    count = index
+    if not args.notap:
+        tap += 'about to flush the tap output if tests need to be skipped\n'
+        if tcount + 1 != index:
+            for tidx in testlist[index - 1:]:
+                msg = 'skipped - previous {} failed'.format(stage)
+                tap += 'ok {} - {} # {} {} {}\n'.format(
+                    count, tidx['id'], msg, index, badtest.get('id', '--Unknown--'))
+                count += 1
 
-def ns_create():
-    """
-    Create the network namespace in which the tests will be run and set up
-    the required network devices for it.
-    """
-    if (USE_NS):
-        cmd = 'ip netns add $NS'
-        exec_cmd(cmd, False)
-        cmd = 'ip link add $DEV0 type veth peer name $DEV1'
-        exec_cmd(cmd, False)
-        cmd = 'ip link set $DEV1 netns $NS'
-        exec_cmd(cmd, False)
-        cmd = 'ip link set $DEV0 up'
-        exec_cmd(cmd, False)
-        cmd = 'ip -n $NS link set $DEV1 up'
-        exec_cmd(cmd, False)
-        cmd = 'ip link set $DEV2 netns $NS'
-        exec_cmd(cmd, False)
-        cmd = 'ip -n $NS link set $DEV2 up'
-        exec_cmd(cmd, False)
+        tap += 'done flushing skipped test tap output\n'
 
+    if args.pause:
+        print('Want to pause\nPress enter to continue ...')
+        if input(sys.stdin):
+            print('got something on stdin')
 
-def ns_destroy():
-    """
-    Destroy the network namespace for testing (and any associated network
-    devices as well)
-    """
-    if (USE_NS):
-        cmd = 'ip netns delete $NS'
-        exec_cmd(cmd, False)
+    pm.call_post_suite(index)
 
+    return tap
 
 def has_blank_ids(idlist):
     """
@@ -209,41 +372,70 @@ def set_args(parser):
     """
     Set the command line arguments for tdc.
     """
-    parser.add_argument('-p', '--path', type=str,
-                        help='The full path to the tc executable to use')
-    parser.add_argument('-c', '--category', type=str, nargs='?', const='+c',
-                        help='Run tests only from the specified category, or if no category is specified, list known categories.')
-    parser.add_argument('-f', '--file', type=str,
-                        help='Run tests from the specified file')
-    parser.add_argument('-l', '--list', type=str, nargs='?', const="++", metavar='CATEGORY',
-                        help='List all test cases, or those only within the specified category')
-    parser.add_argument('-s', '--show', type=str, nargs=1, metavar='ID', dest='showID',
-                        help='Display the test case with specified id')
-    parser.add_argument('-e', '--execute', type=str, nargs=1, metavar='ID',
-                        help='Execute the single test case with specified ID')
-    parser.add_argument('-i', '--id', action='store_true', dest='gen_id',
-                        help='Generate ID numbers for new test cases')
+    parser.add_argument(
+        '-p', '--path', type=str,
+        help='The full path to the tc executable to use')
+    sg = parser.add_argument_group(
+        'selection', 'select which test cases: ' +
+        'files plus directories; filtered by categories plus testids')
+    ag = parser.add_argument_group(
+        'action', 'select action to perform on selected test cases')
+
+    sg.add_argument(
+        '-D', '--directory', nargs='+', metavar='DIR',
+        help='Collect tests from the specified directory(ies) ' +
+        '(default [tc-tests])')
+    sg.add_argument(
+        '-f', '--file', nargs='+', metavar='FILE',
+        help='Run tests from the specified file(s)')
+    sg.add_argument(
+        '-c', '--category', nargs='*', metavar='CATG', default=['+c'],
+        help='Run tests only from the specified category/ies, ' +
+        'or if no category/ies is/are specified, list known categories.')
+    sg.add_argument(
+        '-e', '--execute', nargs='+', metavar='ID',
+        help='Execute the specified test cases with specified IDs')
+    ag.add_argument(
+        '-l', '--list', action='store_true',
+        help='List all test cases, or those only within the specified category')
+    ag.add_argument(
+        '-s', '--show', action='store_true', dest='showID',
+        help='Display the selected test cases')
+    ag.add_argument(
+        '-i', '--id', action='store_true', dest='gen_id',
+        help='Generate ID numbers for new test cases')
+    parser.add_argument(
+        '-v', '--verbose', action='count', default=0,
+        help='Show the commands that are being run')
+    parser.add_argument(
+        '-N', '--notap', action='store_true',
+        help='Suppress tap results for command under test')
     parser.add_argument('-d', '--device',
                         help='Execute the test case in flower category')
+    parser.add_argument(
+        '-P', '--pause', action='store_true',
+        help='Pause execution just before post-suite stage')
     return parser
 
 
-def check_default_settings(args):
+def check_default_settings(args, remaining, pm):
     """
-    Process any arguments overriding the default settings, and ensure the
-    settings are correct.
+    Process any arguments overriding the default settings,
+    and ensure the settings are correct.
     """
     # Allow for overriding specific settings
     global NAMES
 
     if args.path != None:
-         NAMES['TC'] = args.path
+        NAMES['TC'] = args.path
     if args.device != None:
-         NAMES['DEV2'] = args.device
+        NAMES['DEV2'] = args.device
     if not os.path.isfile(NAMES['TC']):
         print("The specified tc path " + NAMES['TC'] + " does not exist.")
         exit(1)
 
+    pm.call_check_args(args, remaining)
+
 
 def get_id_list(alltests):
     """
@@ -277,7 +469,7 @@ def generate_case_ids(alltests):
     for c in alltests:
         if (c["id"] == ""):
             while True:
-                newid = str('%04x' % random.randrange(16**4))
+                newid = str('{:04x}'.format(random.randrange(16**4)))
                 if (does_id_exist(alltests, newid)):
                     continue
                 else:
@@ -300,40 +492,107 @@ def generate_case_ids(alltests):
         json.dump(testlist, outfile, indent=4)
         outfile.close()
 
+def filter_tests_by_id(args, testlist):
+    '''
+    Remove tests from testlist that are not in the named id list.
+    If id list is empty, return empty list.
+    '''
+    newlist = list()
+    if testlist and args.execute:
+        target_ids = args.execute
+
+        if isinstance(target_ids, list) and (len(target_ids) > 0):
+            newlist = list(filter(lambda x: x['id'] in target_ids, testlist))
+    return newlist
+
+def filter_tests_by_category(args, testlist):
+    '''
+    Remove tests from testlist that are not in a named category.
+    '''
+    answer = list()
+    if args.category and testlist:
+        test_ids = list()
+        for catg in set(args.category):
+            if catg == '+c':
+                continue
+            print('considering category {}'.format(catg))
+            for tc in testlist:
+                if catg in tc['category'] and tc['id'] not in test_ids:
+                    answer.append(tc)
+                    test_ids.append(tc['id'])
+
+    return answer
 
 def get_test_cases(args):
     """
     If a test case file is specified, retrieve tests from that file.
     Otherwise, glob for all json files in subdirectories and load from
     each one.
+    Also, if requested, filter by category, and add tests matching
+    certain ids.
     """
     import fnmatch
-    if args.file != None:
-        if not os.path.isfile(args.file):
-            print("The specified test case file " + args.file + " does not exist.")
-            exit(1)
-        flist = [args.file]
-    else:
-        flist = []
-        for root, dirnames, filenames in os.walk('tc-tests'):
+
+    flist = []
+    testdirs = ['tc-tests']
+
+    if args.file:
+        # at least one file was specified - remove the default directory
+        testdirs = []
+
+        for ff in args.file:
+            if not os.path.isfile(ff):
+                print("IGNORING file " + ff + "\n\tBECAUSE does not exist.")
+            else:
+                flist.append(os.path.abspath(ff))
+
+    if args.directory:
+        testdirs = args.directory
+
+    for testdir in testdirs:
+        for root, dirnames, filenames in os.walk(testdir):
             for filename in fnmatch.filter(filenames, '*.json'):
-                flist.append(os.path.join(root, filename))
-    alltests = list()
+                candidate = os.path.abspath(os.path.join(root, filename))
+                if candidate not in testdirs:
+                    flist.append(candidate)
+
+    alltestcases = list()
     for casefile in flist:
-        alltests = alltests + (load_from_file(casefile))
-    return alltests
+        alltestcases = alltestcases + (load_from_file(casefile))
+
+    allcatlist = get_test_categories(alltestcases)
+    allidlist = get_id_list(alltestcases)
 
+    testcases_by_cats = get_categorized_testlist(alltestcases, allcatlist)
+    idtestcases = filter_tests_by_id(args, alltestcases)
+    cattestcases = filter_tests_by_category(args, alltestcases)
 
-def set_operation_mode(args):
+    cat_ids = [x['id'] for x in cattestcases]
+    if args.execute:
+        if args.category:
+            alltestcases = cattestcases + [x for x in idtestcases if x['id'] not in cat_ids]
+        else:
+            alltestcases = idtestcases
+    else:
+        if cat_ids:
+            alltestcases = cattestcases
+        else:
+            # just accept the existing value of alltestcases,
+            # which has been filtered by file/directory
+            pass
+
+    return allcatlist, allidlist, testcases_by_cats, alltestcases
+
+
+def set_operation_mode(pm, args):
     """
     Load the test case data and process remaining arguments to determine
     what the script should do for this run, and call the appropriate
     function.
     """
-    alltests = get_test_cases(args)
+    ucat, idlist, testcases, alltests = get_test_cases(args)
 
     if args.gen_id:
-        idlist = get_id_list(alltests)
         if (has_blank_ids(idlist)):
             alltests = generate_case_ids(alltests)
         else:
@@ -347,70 +606,29 @@ def set_operation_mode(args):
         print("Please correct them before continuing.")
         exit(1)
 
-    ucat = get_test_categories(alltests)
-
     if args.showID:
-        show_test_case_by_id(alltests, args.showID[0])
+        for atest in alltests:
+            print_test_case(atest)
         exit(0)
 
-    if args.execute:
-        target_id = args.execute[0]
-    else:
-        target_id = ""
-
-    if args.category:
-        if (args.category == '+c'):
-            print("Available categories:")
-            print_sll(ucat)
-            exit(0)
-        else:
-            target_category = args.category
-    else:
-        target_category = ""
-
-
-    testcases = get_categorized_testlist(alltests, ucat)
+    if isinstance(args.category, list) and (len(args.category) == 0):
+        print("Available categories:")
+        print_sll(ucat)
+        exit(0)
 
     if args.list:
-        if (args.list == "++"):
+        if args.list:
             list_test_cases(alltests)
             exit(0)
-        elif(len(args.list) > 0):
-            if (args.list not in ucat):
-                print("Unknown category " + args.list)
-                print("Available categories:")
-                print_sll(ucat)
-                exit(1)
-            list_test_cases(testcases[args.list])
-            exit(0)
-
-    if (os.geteuid() != 0):
-        print("This script must be run with root privileges.\n")
-        exit(1)
-
-    ns_create()
-
-    if (len(target_category) == 0):
-        if (len(target_id) > 0):
-            alltests = list(filter(lambda x: target_id in x['id'], alltests))
-            if (len(alltests) == 0):
-                print("Cannot find a test case with ID matching " + target_id)
-                exit(1)
-        catresults = test_runner(alltests, args)
-        print("All test results: " + "\n\n" + catresults)
-    elif (len(target_category) > 0):
-        if (target_category == "flower") and args.device == None:
-            print("Please specify a NIC device (-d) to run category flower")
-            exit(1)
-        if (target_category not in ucat):
-            print("Specified category is not present in this file.")
-            exit(1)
-        else:
-            catresults = test_runner(testcases[target_category], args)
-            print("Category " + target_category + "\n\n" + catresults)
-
-    ns_destroy()
 
+    if len(alltests):
+        catresults = test_runner(pm, args, alltests)
+    else:
+        catresults = 'No tests found\n'
+    if args.notap:
+        print('Tap output suppression requested\n')
+    else:
+        print('All test results: \n\n{}'.format(catresults))
 
 def main():
     """
@@ -419,10 +637,15 @@ def main():
     """
     parser = args_parse()
     parser = set_args(parser)
+    pm = PluginMgr(parser)
+    parser = pm.call_add_args(parser)
     (args, remaining) = parser.parse_known_args()
-    check_default_settings(args)
+    args.NAMES = NAMES
+    check_default_settings(args, remaining, pm)
+    if args.verbose > 2:
+        print('args is {}'.format(args))
 
-    set_operation_mode(args)
+    set_operation_mode(pm, args)
 
     exit(0)
 
diff --git a/tools/testing/selftests/tc-testing/tdc_batch.py b/tools/testing/selftests/tc-testing/tdc_batch.py
index 707c6bfef689..52fa539dc662 100755
--- a/tools/testing/selftests/tc-testing/tdc_batch.py
+++ b/tools/testing/selftests/tc-testing/tdc_batch.py
@@ -49,13 +49,13 @@ index = 0
 for i in range(0x100):
     for j in range(0x100):
         for k in range(0x100):
-            mac = ("%02x:%02x:%02x" % (i, j, k))
+            mac = ("{:02x}:{:02x}:{:02x}".format(i, j, k))
             src_mac = "e4:11:00:" + mac
             dst_mac = "e4:12:00:" + mac
-            cmd = ("filter add dev %s %s protocol ip parent ffff: flower %s "
-                   "src_mac %s dst_mac %s action drop %s" %
+            cmd = ("filter add dev {} {} protocol ip parent ffff: flower {} "
+                   "src_mac {} dst_mac {} action drop {}".format
                    (device, prio, skip, src_mac, dst_mac, share_action))
-            file.write("%s\n" % cmd)
+            file.write("{}\n".format(cmd))
             index += 1
             if index >= number:
                 file.close()
diff --git a/tools/testing/selftests/tc-testing/tdc_helper.py b/tools/testing/selftests/tc-testing/tdc_helper.py
index db381120a566..9f35c96c88a0 100644
--- a/tools/testing/selftests/tc-testing/tdc_helper.py
+++ b/tools/testing/selftests/tc-testing/tdc_helper.py
@@ -57,20 +57,11 @@ def print_sll(items):
 
 def print_test_case(tcase):
     """ Pretty-printing of a given test case. """
+    print('\n==============\nTest {}\t{}\n'.format(tcase['id'], tcase['name']))
     for k in tcase.keys():
         if (isinstance(tcase[k], list)):
             print(k + ":")
             print_list(tcase[k])
         else:
-            print(k + ": " + tcase[k])
-
-
-def show_test_case_by_id(testlist, caseID):
-    """ Find the specified test case to pretty-print. """
-    if not any(d.get('id', None) == caseID for d in testlist):
-        print("That ID does not exist.")
-        exit(1)
-    else:
-        print_test_case(next((d for d in testlist if d['id'] == caseID)))
-
-
+            if not ((k == 'id') or (k == 'name')):
+                print(k + ": " + str(tcase[k]))