1245 files changed, 30221 insertions, 12156 deletions
diff --git a/.mailmap b/.mailmap
index 08b80428f583..52489f564069 100644
--- a/.mailmap
+++ b/.mailmap
@@ -21,6 +21,7 @@ Andrey Ryabinin <ryabinin.a.a@gmail.com> <a.ryabinin@samsung.com>
 Andrew Morton <akpm@linux-foundation.org>
 Andrew Vasquez <andrew.vasquez@qlogic.com>
 Andy Adamson <andros@citi.umich.edu>
+Antoine Tenart <antoine.tenart@free-electrons.com>
 Antonio Ospite <ao2@ao2.it> <ao2@amarulasolutions.com>
 Archit Taneja <archit@ti.com>
 Arnaud Patard <arnaud.patard@rtp-net.org>
@@ -30,6 +31,9 @@ Axel Lin <axel.lin@gmail.com>
 Ben Gardner <bgardner@wabtec.com>
 Ben M Cahill <ben.m.cahill@intel.com>
 Björn Steinbrink <B.Steinbrink@gmx.de>
+Boris Brezillon <boris.brezillon@free-electrons.com>
+Boris Brezillon <boris.brezillon@free-electrons.com> <b.brezillon.dev@gmail.com>
+Boris Brezillon <boris.brezillon@free-electrons.com> <b.brezillon@overkiz.com>
 Brian Avery <b.avery@hp.com>
 Brian King <brking@us.ibm.com>
 Christoph Hellwig <hch@lst.de>
@@ -89,6 +93,7 @@ Leonid I Ananiev <leonid.i.ananiev@intel.com>
 Linas Vepstas <linas@austin.ibm.com>
 Mark Brown <broonie@sirena.org.uk>
 Matthieu CASTET <castet.matthieu@free.fr>
+Mauro Carvalho Chehab <mchehab@kernel.org> <maurochehab@gmail.com> <mchehab@infradead.org> <mchehab@redhat.com> <m.chehab@samsung.com> <mchehab@osg.samsung.com> <mchehab@s-opensource.com>
 Mayuresh Janorkar <mayur@ti.com>
 Michael Buesch <m@bues.ch>
 Michel Dänzer <michel@tungstengraphics.com>
@@ -122,6 +127,7 @@ Santosh Shilimkar <santosh.shilimkar@oracle.org>
 Sascha Hauer <s.hauer@pengutronix.de>
 S.Çağlar Onur <caglar@pardus.org.tr>
 Shiraz Hashim <shiraz.linux.kernel@gmail.com> <shiraz.hashim@st.com>
+Shuah Khan <shuah@kernel.org> <shuahkhan@gmail.com> <shuah.khan@hp.com> <shuahkh@osg.samsung.com> <shuah.kh@samsung.com>
 Simon Kelley <simon@thekelleys.org.uk>
 Stéphane Witzmann <stephane.witzmann@ubpmes.univ-bpclermont.fr>
 Stephen Hemminger <shemminger@osdl.org>
diff --git a/CREDITS b/CREDITS
index 0f0bf22afe0c..2a3fbcd229e6 100644
--- a/CREDITS
+++ b/CREDITS
@@ -649,6 +649,7 @@ D: Configure, Menuconfig, xconfig
 
 N: Mauro Carvalho Chehab
 E: m.chehab@samsung.org
+E: mchehab@osg.samsung.com
 E: mchehab@infradead.org
 D: Media subsystem (V4L/DVB) drivers and core
 D: EDAC drivers and EDAC 3.0 core rework
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-uvc b/Documentation/ABI/testing/configfs-usb-gadget-uvc
index 2f4a0051b32d..1ba0d0fda9c0 100644
--- a/Documentation/ABI/testing/configfs-usb-gadget-uvc
+++ b/Documentation/ABI/testing/configfs-usb-gadget-uvc
@@ -1,6 +1,6 @@
 What:		/config/usb-gadget/gadget/functions/uvc.name
 Date:		Dec 2014
-KernelVersion:	3.20
+KernelVersion:	4.0
 Description:	UVC function directory
 
 		streaming_maxburst	- 0..15 (ss only)
@@ -9,37 +9,37 @@ Description:	UVC function directory
 
 What:		/config/usb-gadget/gadget/functions/uvc.name/control
 Date:		Dec 2014
-KernelVersion:	3.20
+KernelVersion:	4.0
 Description:	Control descriptors
 
 What:		/config/usb-gadget/gadget/functions/uvc.name/control/class
 Date:		Dec 2014
-KernelVersion:	3.20
+KernelVersion:	4.0
 Description:	Class descriptors
 
 What:		/config/usb-gadget/gadget/functions/uvc.name/control/class/ss
 Date:		Dec 2014
-KernelVersion:	3.20
+KernelVersion:	4.0
 Description:	Super speed control class descriptors
 
 What:		/config/usb-gadget/gadget/functions/uvc.name/control/class/fs
 Date:		Dec 2014
-KernelVersion:	3.20
+KernelVersion:	4.0
 Description:	Full speed control class descriptors
 
 What:		/config/usb-gadget/gadget/functions/uvc.name/control/terminal
 Date:		Dec 2014
-KernelVersion:	3.20
+KernelVersion:	4.0
 Description:	Terminal descriptors
 
 What:		/config/usb-gadget/gadget/functions/uvc.name/control/terminal/output
 Date:		Dec 2014
-KernelVersion:	3.20
+KernelVersion:	4.0
 Description:	Output terminal descriptors
 
 What:		/config/usb-gadget/gadget/functions/uvc.name/control/terminal/output/default
 Date:		Dec 2014
-KernelVersion:	3.20
+KernelVersion:	4.0
 Description:	Default output terminal descriptors
 
 		All attributes read only:
@@ -53,12 +53,12 @@ Description:	Default output terminal descriptors
 
 What:		/config/usb-gadget/gadget/functions/uvc.name/control/terminal/camera
 Date:		Dec 2014
-KernelVersion:	3.20
+KernelVersion:	4.0
 Description:	Camera terminal descriptors
 
 What:		/config/usb-gadget/gadget/functions/uvc.name/control/terminal/camera/default
 Date:		Dec 2014
-KernelVersion:	3.20
+KernelVersion:	4.0
 Description:	Default camera terminal descriptors
 
 		All attributes read only:
@@ -75,12 +75,12 @@ Description:	Default camera terminal descriptors
 
 What:		/config/usb-gadget/gadget/functions/uvc.name/control/processing
 Date:		Dec 2014
-KernelVersion:	3.20
+KernelVersion:	4.0
 Description:	Processing unit descriptors
 
 What:		/config/usb-gadget/gadget/functions/uvc.name/control/processing/default
 Date:		Dec 2014
-KernelVersion:	3.20
+KernelVersion:	4.0
 Description:	Default processing unit descriptors
 
 		All attributes read only:
@@ -94,49 +94,49 @@ Description:	Default processing unit descriptors
 
 What:		/config/usb-gadget/gadget/functions/uvc.name/control/header
 Date:		Dec 2014
-KernelVersion:	3.20
+KernelVersion:	4.0
 Description:	Control header descriptors
 
 What:		/config/usb-gadget/gadget/functions/uvc.name/control/header/name
 Date:		Dec 2014
-KernelVersion:	3.20
+KernelVersion:	4.0
 Description:	Specific control header descriptors
 
 dwClockFrequency
 bcdUVC
 What:		/config/usb-gadget/gadget/functions/uvc.name/streaming
 Date:		Dec 2014
-KernelVersion:	3.20
+KernelVersion:	4.0
 Description:	Streaming descriptors
 
 What:		/config/usb-gadget/gadget/functions/uvc.name/streaming/class
 Date:		Dec 2014
-KernelVersion:	3.20
+KernelVersion:	4.0
 Description:	Streaming class descriptors
 
 What:		/config/usb-gadget/gadget/functions/uvc.name/streaming/class/ss
 Date:		Dec 2014
-KernelVersion:	3.20
+KernelVersion:	4.0
 Description:	Super speed streaming class descriptors
 
 What:		/config/usb-gadget/gadget/functions/uvc.name/streaming/class/hs
 Date:		Dec 2014
-KernelVersion:	3.20
+KernelVersion:	4.0
 Description:	High speed streaming class descriptors
 
 What:		/config/usb-gadget/gadget/functions/uvc.name/streaming/class/fs
 Date:		Dec 2014
-KernelVersion:	3.20
+KernelVersion:	4.0
 Description:	Full speed streaming class descriptors
 
 What:		/config/usb-gadget/gadget/functions/uvc.name/streaming/color_matching
 Date:		Dec 2014
-KernelVersion:	3.20
+KernelVersion:	4.0
 Description:	Color matching descriptors
 
 What:		/config/usb-gadget/gadget/functions/uvc.name/streaming/color_matching/default
 Date:		Dec 2014
-KernelVersion:	3.20
+KernelVersion:	4.0
 Description:	Default color matching descriptors
 
 		All attributes read only:
@@ -150,12 +150,12 @@ Description:	Default color matching descriptors
 
 What:		/config/usb-gadget/gadget/functions/uvc.name/streaming/mjpeg
 Date:		Dec 2014
-KernelVersion:	3.20
+KernelVersion:	4.0
 Description:	MJPEG format descriptors
 
 What:		/config/usb-gadget/gadget/functions/uvc.name/streaming/mjpeg/name
 Date:		Dec 2014
-KernelVersion:	3.20
+KernelVersion:	4.0
 Description:	Specific MJPEG format descriptors
 
 		All attributes read only,
@@ -174,7 +174,7 @@ Description:	Specific MJPEG format descriptors
 
 What:		/config/usb-gadget/gadget/functions/uvc.name/streaming/mjpeg/name/name
 Date:		Dec 2014
-KernelVersion:	3.20
+KernelVersion:	4.0
 Description:	Specific MJPEG frame descriptors
 
 		dwFrameInterval		- indicates how frame interval can be
@@ -196,12 +196,12 @@ Description:	Specific MJPEG frame descriptors
 
 What:		/config/usb-gadget/gadget/functions/uvc.name/streaming/uncompressed
 Date:		Dec 2014
-KernelVersion:	3.20
+KernelVersion:	4.0
 Description:	Uncompressed format descriptors
 
 What:		/config/usb-gadget/gadget/functions/uvc.name/streaming/uncompressed/name
 Date:		Dec 2014
-KernelVersion:	3.20
+KernelVersion:	4.0
 Description:	Specific uncompressed format descriptors
 
 		bmaControls		- this format's data for bmaControls in
@@ -221,7 +221,7 @@ Description:	Specific uncompressed format descriptors
 
 What:		/config/usb-gadget/gadget/functions/uvc.name/streaming/uncompressed/name/name
 Date:		Dec 2014
-KernelVersion:	3.20
+KernelVersion:	4.0
 Description:	Specific uncompressed frame descriptors
 
 		dwFrameInterval		- indicates how frame interval can be
@@ -243,12 +243,12 @@ Description:	Specific uncompressed frame descriptors
 
 What:		/config/usb-gadget/gadget/functions/uvc.name/streaming/header
 Date:		Dec 2014
-KernelVersion:	3.20
+KernelVersion:	4.0
 Description:	Streaming header descriptors
 
 What:		/config/usb-gadget/gadget/functions/uvc.name/streaming/header/name
 Date:		Dec 2014
-KernelVersion:	3.20
+KernelVersion:	4.0
 Description:	Specific streaming header descriptors
 
 		All attributes read only:
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-proximity-as3935 b/Documentation/ABI/testing/sysfs-bus-iio-proximity-as3935
index 6708c5e264aa..33e96f740639 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio-proximity-as3935
+++ b/Documentation/ABI/testing/sysfs-bus-iio-proximity-as3935
@@ -1,4 +1,4 @@
-What		/sys/bus/iio/devices/iio:deviceX/in_proximity_raw
+What		/sys/bus/iio/devices/iio:deviceX/in_proximity_input
 Date:		March 2014
 KernelVersion:	3.15
 Contact:	Matt Ranostay <mranostay@gmail.com>
diff --git a/Documentation/ABI/testing/sysfs-class-net-batman-adv b/Documentation/ABI/testing/sysfs-class-net-batman-adv
index 518f6a1dbc0c..898106849e27 100644
--- a/Documentation/ABI/testing/sysfs-class-net-batman-adv
+++ b/Documentation/ABI/testing/sysfs-class-net-batman-adv
@@ -1,19 +1,10 @@
 
-What:		/sys/class/net/<iface>/batman-adv/throughput_override
-Date:		Feb 2014
-Contact:	Antonio Quartulli <antonio@meshcoding.com>
-description:
-		Defines the throughput value to be used by B.A.T.M.A.N. V
-		when estimating the link throughput using this interface.
-		If the value is set to 0 then batman-adv will try to
-		estimate the throughput by itself.
-
 What:           /sys/class/net/<iface>/batman-adv/elp_interval
 Date:           Feb 2014
 Contact:        Linus Lüssing <linus.luessing@web.de>
 Description:
                 Defines the interval in milliseconds in which batman
-                sends its probing packets for link quality measurements.
+                emits probing packets for neighbor sensing (ELP).
 
 What:           /sys/class/net/<iface>/batman-adv/iface_status
 Date:           May 2010
@@ -28,3 +19,12 @@ Description:
                 The /sys/class/net/<iface>/batman-adv/mesh_iface file
                 displays the batman mesh interface this <iface>
                 currently is associated with.
+
+What:           /sys/class/net/<iface>/batman-adv/throughput_override
+Date:           Feb 2014
+Contact:        Antonio Quartulli <a@unstable.cc>
+description:
+                Defines the throughput value to be used by B.A.T.M.A.N. V
+                when estimating the link throughput using this interface.
+                If the value is set to 0 then batman-adv will try to
+                estimate the throughput by itself.
diff --git a/Documentation/devicetree/bindings/hwmon/ina2xx.txt b/Documentation/devicetree/bindings/hwmon/ina2xx.txt
index 9bcd5e87830d..02af0d94e921 100644
--- a/Documentation/devicetree/bindings/hwmon/ina2xx.txt
+++ b/Documentation/devicetree/bindings/hwmon/ina2xx.txt
@@ -7,6 +7,7 @@ Required properties:
 	- "ti,ina220" for ina220
 	- "ti,ina226" for ina226
 	- "ti,ina230" for ina230
+	- "ti,ina231" for ina231
 - reg: I2C address
 
 Optional properties:
diff --git a/Documentation/devicetree/bindings/i2c/i2c-arb-gpio-challenge.txt b/Documentation/devicetree/bindings/i2c/i2c-arb-gpio-challenge.txt
index bfeabb843941..71191ff0e781 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-arb-gpio-challenge.txt
+++ b/Documentation/devicetree/bindings/i2c/i2c-arb-gpio-challenge.txt
@@ -44,8 +44,8 @@ Required properties:
 - our-claim-gpio: The GPIO that we use to claim the bus.
 - their-claim-gpios: The GPIOs that the other sides use to claim the bus.
   Note that some implementations may only support a single other master.
-- Standard I2C mux properties. See mux.txt in this directory.
-- Single I2C child bus node at reg 0. See mux.txt in this directory.
+- Standard I2C mux properties. See i2c-mux.txt in this directory.
+- Single I2C child bus node at reg 0. See i2c-mux.txt in this directory.
 
 Optional properties:
 - slew-delay-us: microseconds to wait for a GPIO to go high. Default is 10 us.
diff --git a/Documentation/devicetree/bindings/i2c/i2c-demux-pinctrl.txt b/Documentation/devicetree/bindings/i2c/i2c-demux-pinctrl.txt
index 6078aefe7ed4..7ce23ac61308 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-demux-pinctrl.txt
+++ b/Documentation/devicetree/bindings/i2c/i2c-demux-pinctrl.txt
@@ -27,7 +27,8 @@ Required properties:
 - i2c-bus-name: The name of this bus. Also needed as pinctrl-name for the I2C
 		parents.
 
-Furthermore, I2C mux properties and child nodes. See mux.txt in this directory.
+Furthermore, I2C mux properties and child nodes. See i2c-mux.txt in this
+directory.
 
 Example:
 
diff --git a/Documentation/devicetree/bindings/i2c/i2c-mux-gpio.txt b/Documentation/devicetree/bindings/i2c/i2c-mux-gpio.txt
index 66709a825541..21da3ecbb370 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-mux-gpio.txt
+++ b/Documentation/devicetree/bindings/i2c/i2c-mux-gpio.txt
@@ -22,8 +22,8 @@ Required properties:
 - i2c-parent: The phandle of the I2C bus that this multiplexer's master-side
   port is connected to.
 - mux-gpios: list of gpios used to control the muxer
-* Standard I2C mux properties. See mux.txt in this directory.
-* I2C child bus nodes. See mux.txt in this directory.
+* Standard I2C mux properties. See i2c-mux.txt in this directory.
+* I2C child bus nodes. See i2c-mux.txt in this directory.
 
 Optional properties:
 - idle-state: value to set the muxer to when idle. When no value is
@@ -33,7 +33,7 @@ For each i2c child node, an I2C child bus will be created. They will
 be numbered based on their order in the device tree.
 
 Whenever an access is made to a device on a child bus, the value set
-in the revelant node's reg property will be output using the list of
+in the relevant node's reg property will be output using the list of
 GPIOs, the first in the list holding the least-significant value.
 
 If an idle state is defined, using the idle-state (optional) property,
diff --git a/Documentation/devicetree/bindings/i2c/i2c-mux-pinctrl.txt b/Documentation/devicetree/bindings/i2c/i2c-mux-pinctrl.txt
index ae8af1694e95..33119a98e144 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-mux-pinctrl.txt
+++ b/Documentation/devicetree/bindings/i2c/i2c-mux-pinctrl.txt
@@ -28,9 +28,9 @@ Also required are:
 * Standard pinctrl properties that specify the pin mux state for each child
   bus. See ../pinctrl/pinctrl-bindings.txt.
 
-* Standard I2C mux properties. See mux.txt in this directory.
+* Standard I2C mux properties. See i2c-mux.txt in this directory.
 
-* I2C child bus nodes. See mux.txt in this directory.
+* I2C child bus nodes. See i2c-mux.txt in this directory.
 
 For each named state defined in the pinctrl-names property, an I2C child bus
 will be created. I2C child bus numbers are assigned based on the index into
diff --git a/Documentation/devicetree/bindings/i2c/i2c-mux-reg.txt b/Documentation/devicetree/bindings/i2c/i2c-mux-reg.txt
index 688783fbe696..de00d7fc450b 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-mux-reg.txt
+++ b/Documentation/devicetree/bindings/i2c/i2c-mux-reg.txt
@@ -7,8 +7,8 @@ Required properties:
 - compatible: i2c-mux-reg
 - i2c-parent: The phandle of the I2C bus that this multiplexer's master-side
   port is connected to.
-* Standard I2C mux properties. See mux.txt in this directory.
-* I2C child bus nodes. See mux.txt in this directory.
+* Standard I2C mux properties. See i2c-mux.txt in this directory.
+* I2C child bus nodes. See i2c-mux.txt in this directory.
 
 Optional properties:
 - reg: this pair of <offset size> specifies the register to control the mux.
@@ -24,7 +24,7 @@ Optional properties:
   given, it defaults to the last value used.
 
 Whenever an access is made to a device on a child bus, the value set
-in the revelant node's reg property will be output to the register.
+in the relevant node's reg property will be output to the register.
 
 If an idle state is defined, using the idle-state (optional) property,
 whenever an access is not being made to a device on a child bus, the
diff --git a/Documentation/devicetree/bindings/net/dsa/b53.txt b/Documentation/devicetree/bindings/net/dsa/b53.txt
index ca752db14dff..d6c6e41648d4 100644
--- a/Documentation/devicetree/bindings/net/dsa/b53.txt
+++ b/Documentation/devicetree/bindings/net/dsa/b53.txt
@@ -20,6 +20,15 @@ Required properties:
       "brcm,bcm53018-srab"
       "brcm,bcm53019-srab" and the mandatory "brcm,bcm5301x-srab" string
 
+  For the BCM585xx/586XX/88312 SoCs with an integrated switch, must be one of:
+      "brcm,bcm58522-srab"
+      "brcm,bcm58523-srab"
+      "brcm,bcm58525-srab"
+      "brcm,bcm58622-srab"
+      "brcm,bcm58623-srab"
+      "brcm,bcm58625-srab"
+      "brcm,bcm88312-srab" and the mandatory "brcm,nsp-srab string
+
   For the BCM63xx/33xx SoCs with an integrated switch, must be one of:
       "brcm,bcm3384-switch"
       "brcm,bcm6328-switch"
diff --git a/Documentation/devicetree/bindings/net/keystone-netcp.txt b/Documentation/devicetree/bindings/net/keystone-netcp.txt
index b30ab6b5cbfa..04ba1dc34fd6 100644
--- a/Documentation/devicetree/bindings/net/keystone-netcp.txt
+++ b/Documentation/devicetree/bindings/net/keystone-netcp.txt
@@ -2,7 +2,7 @@ This document describes the device tree bindings associated with the
 keystone network coprocessor(NetCP) driver support.
 
 The network coprocessor (NetCP) is a hardware accelerator that processes
-Ethernet packets. NetCP has a gigabit Ethernet (GbE) subsytem with a ethernet
+Ethernet packets. NetCP has a gigabit Ethernet (GbE) subsystem with a ethernet
 switch sub-module to send and receive packets. NetCP also includes a packet
 accelerator (PA) module to perform packet classification operations such as
 header matching, and packet modification operations such as checksum
diff --git a/Documentation/devicetree/bindings/net/marvell-bt-sd8xxx.txt b/Documentation/devicetree/bindings/net/marvell-bt-sd8xxx.txt
index 14aa6cf58201..6a9a63cb0543 100644
--- a/Documentation/devicetree/bindings/net/marvell-bt-sd8xxx.txt
+++ b/Documentation/devicetree/bindings/net/marvell-bt-sd8xxx.txt
@@ -13,10 +13,10 @@ Optional properties:
 		      initialization. This is an array of 28 values(u8).
 
   - marvell,wakeup-pin: It represents wakeup pin number of the bluetooth chip.
-		        firmware will use the pin to wakeup host system.
+		        firmware will use the pin to wakeup host system (u16).
   - marvell,wakeup-gap-ms: wakeup gap represents wakeup latency of the host
 		      platform. The value will be configured to firmware. This
-		      is needed to work chip's sleep feature as expected.
+		      is needed to work chip's sleep feature as expected (u16).
   - interrupt-parent: phandle of the parent interrupt controller
   - interrupts : interrupt pin number to the cpu. Driver will request an irq based
 		 on this interrupt number. During system suspend, the irq will be
@@ -50,7 +50,7 @@ calibration data is also available in below example.
 			0x37 0x01 0x1c 0x00 0xff 0xff 0xff 0xff 0x01 0x7f 0x04 0x02
 			0x00 0x00 0xba 0xce 0xc0 0xc6 0x2d 0x00 0x00 0x00 0x00 0x00
 			0x00 0x00 0xf0 0x00>;
-		marvell,wakeup-pin = <0x0d>;
-		marvell,wakeup-gap-ms = <0x64>;
+		marvell,wakeup-pin = /bits/ 16 <0x0d>;
+		marvell,wakeup-gap-ms = /bits/ 16 <0x64>;
 	};
 };
diff --git a/Documentation/devicetree/bindings/net/socfpga-dwmac.txt b/Documentation/devicetree/bindings/net/socfpga-dwmac.txt
index 72d82d684342..2e68a3cd8513 100644
--- a/Documentation/devicetree/bindings/net/socfpga-dwmac.txt
+++ b/Documentation/devicetree/bindings/net/socfpga-dwmac.txt
@@ -17,9 +17,26 @@ Required properties:
 Optional properties:
 altr,emac-splitter: Should be the phandle to the emac splitter soft IP node if
 		DWMAC controller is connected emac splitter.
+phy-mode: The phy mode the ethernet operates in
+altr,sgmii-to-sgmii-converter: phandle to the TSE SGMII converter
+
+This device node has additional phandle dependency, the sgmii converter:
+
+Required properties:
+ - compatible	: Should be altr,gmii-to-sgmii-2.0
+ - reg-names	: Should be "eth_tse_control_port"
 
 Example:
 
+gmii_to_sgmii_converter: phy@0x100000240 {
+	compatible = "altr,gmii-to-sgmii-2.0";
+	reg = <0x00000001 0x00000240 0x00000008>,
+		<0x00000001 0x00000200 0x00000040>;
+	reg-names = "eth_tse_control_port";
+	clocks = <&sgmii_1_clk_0 &emac1 1 &sgmii_clk_125 &sgmii_clk_125>;
+	clock-names = "tse_pcs_ref_clk_clock_connection", "tse_rx_cdr_refclk";
+};
+
 gmac0: ethernet@ff700000 {
 	compatible = "altr,socfpga-stmmac", "snps,dwmac-3.70a", "snps,dwmac";
 	altr,sysmgr-syscon = <&sysmgr 0x60 0>;
@@ -30,4 +47,6 @@ gmac0: ethernet@ff700000 {
 	mac-address = [00 00 00 00 00 00];/* Filled in by U-Boot */
 	clocks = <&emac_0_clk>;
 	clock-names = "stmmaceth";
+	phy-mode = "sgmii";
+	altr,gmii-to-sgmii-converter = <&gmii_to_sgmii_converter>;
 };
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt
index a7440bcd67ff..2c2500df0dce 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.txt
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
@@ -255,6 +255,7 @@ synology	Synology, Inc.
 SUNW	Sun Microsystems, Inc
 tbs	TBS Technologies
 tcl	Toby Churchill Ltd.
+technexion	TechNexion
 technologic	Technologic Systems
 thine	THine Electronics, Inc.
 ti	Texas Instruments
@@ -269,6 +270,7 @@ tronsmart	Tronsmart
 truly	Truly Semiconductors Limited
 tyan	Tyan Computer Corporation
 upisemi	uPI Semiconductor Corp.
+uniwest	United Western Technologies Corp (UniWest)
 urt	United Radiant Technology Corporation
 usi	Universal Scientific Industrial Co., Ltd.
 v3	V3 Semiconductor
diff --git a/Documentation/leds/leds-class.txt b/Documentation/leds/leds-class.txt
index d406d98339b2..44f5e6bccd97 100644
--- a/Documentation/leds/leds-class.txt
+++ b/Documentation/leds/leds-class.txt
@@ -74,8 +74,8 @@ blink_set() function (see <linux/leds.h>). To set an LED to blinking,
 however, it is better to use the API function led_blink_set(), as it
 will check and implement software fallback if necessary.
 
-To turn off blinking again, use the API function led_brightness_set()
-as that will not just set the LED brightness but also stop any software
+To turn off blinking, use the API function led_brightness_set()
+with brightness value LED_OFF, which should stop any software
 timers that may have been required for blinking.
 
 The blink_set() function should choose a user friendly blinking value
diff --git a/Documentation/networking/nf_conntrack-sysctl.txt b/Documentation/networking/nf_conntrack-sysctl.txt
index f55599c62c9d..4fb51d32fccc 100644
--- a/Documentation/networking/nf_conntrack-sysctl.txt
+++ b/Documentation/networking/nf_conntrack-sysctl.txt
@@ -7,12 +7,13 @@ nf_conntrack_acct - BOOLEAN
 	Enable connection tracking flow accounting. 64-bit byte and packet
 	counters per flow are added.
 
-nf_conntrack_buckets - INTEGER (read-only)
+nf_conntrack_buckets - INTEGER
 	Size of hash table. If not specified as parameter during module
 	loading, the default size is calculated by dividing total memory
 	by 16384 to determine the number of buckets but the hash table will
 	never have fewer than 32 and limited to 16384 buckets. For systems
 	with more than 4GB of memory it will be 65536 buckets.
+	This sysctl is only writeable in the initial net namespace.
 
 nf_conntrack_checksum - BOOLEAN
 	0 - disabled
diff --git a/Documentation/networking/vrf.txt b/Documentation/networking/vrf.txt
index 5da679c573d2..755dab856392 100644
--- a/Documentation/networking/vrf.txt
+++ b/Documentation/networking/vrf.txt
@@ -15,9 +15,9 @@ the use of higher priority ip rules (Policy Based Routing, PBR) to take
 precedence over the VRF device rules directing specific traffic as desired.
 
 In addition, VRF devices allow VRFs to be nested within namespaces. For
-example network namespaces provide separation of network interfaces at L1
-(Layer 1 separation), VLANs on the interfaces within a namespace provide
-L2 separation and then VRF devices provide L3 separation.
+example network namespaces provide separation of network interfaces at the
+device layer, VLANs on the interfaces within a namespace provide L2 separation
+and then VRF devices provide L3 separation.
 
 Design
 ------
@@ -37,21 +37,22 @@ are then enslaved to a VRF device:
                               +------+ +------+
 
 Packets received on an enslaved device and are switched to the VRF device
-using an rx_handler which gives the impression that packets flow through
-the VRF device. Similarly on egress routing rules are used to send packets
-to the VRF device driver before getting sent out the actual interface. This
-allows tcpdump on a VRF device to capture all packets into and out of the
-VRF as a whole.[1] Similarly, netfilter [2] and tc rules can be applied
-using the VRF device to specify rules that apply to the VRF domain as a whole.
+in the IPv4 and IPv6 processing stacks giving the impression that packets
+flow through the VRF device. Similarly on egress routing rules are used to
+send packets to the VRF device driver before getting sent out the actual
+interface. This allows tcpdump on a VRF device to capture all packets into
+and out of the VRF as a whole.[1] Similarly, netfilter[2] and tc rules can be
+applied using the VRF device to specify rules that apply to the VRF domain
+as a whole.
 
 [1] Packets in the forwarded state do not flow through the device, so those
     packets are not seen by tcpdump. Will revisit this limitation in a
     future release.
 
-[2] Iptables on ingress is limited to NF_INET_PRE_ROUTING only with skb->dev
-    set to real ingress device and egress is limited to NF_INET_POST_ROUTING.
-    Will revisit this limitation in a future release.
-
+[2] Iptables on ingress supports PREROUTING with skb->dev set to the real
+    ingress device and both INPUT and PREROUTING rules with skb->dev set to
+    the VRF device. For egress POSTROUTING and OUTPUT rules can be written
+    using either the VRF device or real egress device.
 
 Setup
 -----
@@ -59,23 +60,33 @@ Setup
    e.g, ip link add vrf-blue type vrf table 10
         ip link set dev vrf-blue up
 
-2. Rules are added that send lookups to the associated FIB table when the
-   iif or oif is the VRF device. e.g.,
+2. An l3mdev FIB rule directs lookups to the table associated with the device.
+   A single l3mdev rule is sufficient for all VRFs. The VRF device adds the
+   l3mdev rule for IPv4 and IPv6 when the first device is created with a
+   default preference of 1000. Users may delete the rule if desired and add
+   with a different priority or install per-VRF rules.
+
+   Prior to the v4.8 kernel iif and oif rules are needed for each VRF device:
        ip ru add oif vrf-blue table 10
        ip ru add iif vrf-blue table 10
 
-   Set the default route for the table (and hence default route for the VRF).
-   e.g, ip route add table 10 prohibit default
+3. Set the default route for the table (and hence default route for the VRF).
+       ip route add table 10 unreachable default
 
-3. Enslave L3 interfaces to a VRF device.
-   e.g,  ip link set dev eth1 master vrf-blue
+4. Enslave L3 interfaces to a VRF device.
+       ip link set dev eth1 master vrf-blue
 
    Local and connected routes for enslaved devices are automatically moved to
    the table associated with VRF device. Any additional routes depending on
-   the enslaved device will need to be reinserted following the enslavement.
+   the enslaved device are dropped and will need to be reinserted to the VRF
+   FIB table following the enslavement.
+
+   The IPv6 sysctl option keep_addr_on_down can be enabled to keep IPv6 global
+   addresses as VRF enslavement changes.
+       sysctl -w net.ipv6.conf.all.keep_addr_on_down=1
 
-4. Additional VRF routes are added to associated table.
-   e.g., ip route add table 10 ...
+5. Additional VRF routes are added to associated table.
+       ip route add table 10 ...
 
 
 Applications
@@ -87,39 +98,34 @@ VRF device:
 
 or to specify the output device using cmsg and IP_PKTINFO.
 
+TCP services running in the default VRF context (ie., not bound to any VRF
+device) can work across all VRF domains by enabling the tcp_l3mdev_accept
+sysctl option:
+    sysctl -w net.ipv4.tcp_l3mdev_accept=1
 
-Limitations
------------
-Index of original ingress interface is not available via cmsg. Will address
-soon.
+netfilter rules on the VRF device can be used to limit access to services
+running in the default VRF context as well.
+
+The default VRF does not have limited scope with respect to port bindings.
+That is, if a process does a wildcard bind to a port in the default VRF it
+owns the port across all VRF domains within the network namespace.
 
 ################################################################################
 
 Using iproute2 for VRFs
 =======================
-VRF devices do *not* have to start with 'vrf-'. That is a convention used here
-for emphasis of the device type, similar to use of 'br' in bridge names.
+iproute2 supports the vrf keyword as of v4.7. For backwards compatibility this
+section lists both commands where appropriate -- with the vrf keyword and the
+older form without it.
 
 1. Create a VRF
 
    To instantiate a VRF device and associate it with a table:
        $ ip link add dev NAME type vrf table ID
 
-   Remember to add the ip rules as well:
-       $ ip ru add oif NAME table 10
-       $ ip ru add iif NAME table 10
-       $ ip -6 ru add oif NAME table 10
-       $ ip -6 ru add iif NAME table 10
-
-   Without the rules route lookups are not directed to the table.
-
-   For example:
-   $ ip link add dev vrf-blue type vrf table 10
-   $ ip ru add pref 200 oif vrf-blue table 10
-   $ ip ru add pref 200 iif vrf-blue table 10
-   $ ip -6 ru add pref 200 oif vrf-blue table 10
-   $ ip -6 ru add pref 200 iif vrf-blue table 10
-
+   As of v4.8 the kernel supports the l3mdev FIB rule where a single rule
+   covers all VRFs. The l3mdev rule is created for IPv4 and IPv6 on first
+   device create.
 
 2. List VRFs
 
@@ -129,16 +135,16 @@ for emphasis of the device type, similar to use of 'br' in bridge names.
 
    For example:
    $ ip -d link show type vrf
-   11: vrf-mgmt: <NOARP,MASTER,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000
+   11: mgmt: <NOARP,MASTER,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000
        link/ether 72:b3:ba:91:e2:24 brd ff:ff:ff:ff:ff:ff promiscuity 0
        vrf table 1 addrgenmode eui64
-   12: vrf-red: <NOARP,MASTER,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000
+   12: red: <NOARP,MASTER,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000
        link/ether b6:6f:6e:f6:da:73 brd ff:ff:ff:ff:ff:ff promiscuity 0
        vrf table 10 addrgenmode eui64
-   13: vrf-blue: <NOARP,MASTER,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000
+   13: blue: <NOARP,MASTER,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000
        link/ether 36:62:e8:7d:bb:8c brd ff:ff:ff:ff:ff:ff promiscuity 0
        vrf table 66 addrgenmode eui64
-   14: vrf-green: <NOARP,MASTER,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000
+   14: green: <NOARP,MASTER,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000
        link/ether e6:28:b8:63:70:bb brd ff:ff:ff:ff:ff:ff promiscuity 0
        vrf table 81 addrgenmode eui64
 
@@ -146,43 +152,44 @@ for emphasis of the device type, similar to use of 'br' in bridge names.
    Or in brief output:
 
    $ ip -br link show type vrf
-   vrf-mgmt         UP             72:b3:ba:91:e2:24 <NOARP,MASTER,UP,LOWER_UP>
-   vrf-red          UP             b6:6f:6e:f6:da:73 <NOARP,MASTER,UP,LOWER_UP>
-   vrf-blue         UP             36:62:e8:7d:bb:8c <NOARP,MASTER,UP,LOWER_UP>
-   vrf-green        UP             e6:28:b8:63:70:bb <NOARP,MASTER,UP,LOWER_UP>
+   mgmt         UP             72:b3:ba:91:e2:24 <NOARP,MASTER,UP,LOWER_UP>
+   red          UP             b6:6f:6e:f6:da:73 <NOARP,MASTER,UP,LOWER_UP>
+   blue         UP             36:62:e8:7d:bb:8c <NOARP,MASTER,UP,LOWER_UP>
+   green        UP             e6:28:b8:63:70:bb <NOARP,MASTER,UP,LOWER_UP>
 
 
 3. Assign a Network Interface to a VRF
 
    Network interfaces are assigned to a VRF by enslaving the netdevice to a
    VRF device:
-       $ ip link set dev NAME master VRF-NAME
+       $ ip link set dev NAME master NAME
 
    On enslavement connected and local routes are automatically moved to the
    table associated with the VRF device.
 
    For example:
-   $ ip link set dev eth0 master vrf-mgmt
+   $ ip link set dev eth0 master mgmt
 
 
 4. Show Devices Assigned to a VRF
 
    To show devices that have been assigned to a specific VRF add the master
    option to the ip command:
-       $ ip link show master VRF-NAME
+       $ ip link show vrf NAME
+       $ ip link show master NAME
 
    For example:
-   $ ip link show master vrf-red
-   3: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master vrf-red state UP mode DEFAULT group default qlen 1000
+   $ ip link show vrf red
+   3: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master red state UP mode DEFAULT group default qlen 1000
        link/ether 02:00:00:00:02:02 brd ff:ff:ff:ff:ff:ff
-   4: eth2: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master vrf-red state UP mode DEFAULT group default qlen 1000
+   4: eth2: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master red state UP mode DEFAULT group default qlen 1000
        link/ether 02:00:00:00:02:03 brd ff:ff:ff:ff:ff:ff
-   7: eth5: <BROADCAST,MULTICAST> mtu 1500 qdisc noop master vrf-red state DOWN mode DEFAULT group default qlen 1000
+   7: eth5: <BROADCAST,MULTICAST> mtu 1500 qdisc noop master red state DOWN mode DEFAULT group default qlen 1000
        link/ether 02:00:00:00:02:06 brd ff:ff:ff:ff:ff:ff
 
 
    Or using the brief output:
-   $ ip -br link show master vrf-red
+   $ ip -br link show vrf red
    eth1             UP             02:00:00:00:02:02 <BROADCAST,MULTICAST,UP,LOWER_UP>
    eth2             UP             02:00:00:00:02:03 <BROADCAST,MULTICAST,UP,LOWER_UP>
    eth5             DOWN           02:00:00:00:02:06 <BROADCAST,MULTICAST>
@@ -192,26 +199,28 @@ for emphasis of the device type, similar to use of 'br' in bridge names.
 
    To list neighbor entries associated with devices enslaved to a VRF device
    add the master option to the ip command:
-       $ ip [-6] neigh show master VRF-NAME
+       $ ip [-6] neigh show vrf NAME
+       $ ip [-6] neigh show master NAME
 
    For example:
-   $  ip neigh show master vrf-red
+   $  ip neigh show vrf red
    10.2.1.254 dev eth1 lladdr a6:d9:c7:4f:06:23 REACHABLE
    10.2.2.254 dev eth2 lladdr 5e:54:01:6a:ee:80 REACHABLE
 
-    $ ip -6 neigh show master vrf-red
-    2002:1::64 dev eth1 lladdr a6:d9:c7:4f:06:23 REACHABLE
+   $ ip -6 neigh show vrf red
+   2002:1::64 dev eth1 lladdr a6:d9:c7:4f:06:23 REACHABLE
 
 
 6. Show Addresses for a VRF
 
    To show addresses for interfaces associated with a VRF add the master
    option to the ip command:
-       $ ip addr show master VRF-NAME
+       $ ip addr show vrf NAME
+       $ ip addr show master NAME
 
    For example:
-   $ ip addr show master vrf-red
-   3: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master vrf-red state UP group default qlen 1000
+   $ ip addr show vrf red
+   3: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master red state UP group default qlen 1000
        link/ether 02:00:00:00:02:02 brd ff:ff:ff:ff:ff:ff
        inet 10.2.1.2/24 brd 10.2.1.255 scope global eth1
           valid_lft forever preferred_lft forever
@@ -219,7 +228,7 @@ for emphasis of the device type, similar to use of 'br' in bridge names.
           valid_lft forever preferred_lft forever
        inet6 fe80::ff:fe00:202/64 scope link
           valid_lft forever preferred_lft forever
-   4: eth2: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master vrf-red state UP group default qlen 1000
+   4: eth2: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master red state UP group default qlen 1000
        link/ether 02:00:00:00:02:03 brd ff:ff:ff:ff:ff:ff
        inet 10.2.2.2/24 brd 10.2.2.255 scope global eth2
           valid_lft forever preferred_lft forever
@@ -227,11 +236,11 @@ for emphasis of the device type, similar to use of 'br' in bridge names.
           valid_lft forever preferred_lft forever
        inet6 fe80::ff:fe00:203/64 scope link
           valid_lft forever preferred_lft forever
-   7: eth5: <BROADCAST,MULTICAST> mtu 1500 qdisc noop master vrf-red state DOWN group default qlen 1000
+   7: eth5: <BROADCAST,MULTICAST> mtu 1500 qdisc noop master red state DOWN group default qlen 1000
        link/ether 02:00:00:00:02:06 brd ff:ff:ff:ff:ff:ff
 
    Or in brief format:
-   $ ip -br addr show master vrf-red
+   $ ip -br addr show vrf red
    eth1             UP             10.2.1.2/24 2002:1::2/120 fe80::ff:fe00:202/64
    eth2             UP             10.2.2.2/24 2002:2::2/120 fe80::ff:fe00:203/64
    eth5             DOWN
@@ -241,10 +250,11 @@ for emphasis of the device type, similar to use of 'br' in bridge names.
 
    To show routes for a VRF use the ip command to display the table associated
    with the VRF device:
+       $ ip [-6] route show vrf NAME
        $ ip [-6] route show table ID
 
    For example:
-   $ ip route show table vrf-red
+   $ ip route show vrf red
    prohibit default
    broadcast 10.2.1.0 dev eth1  proto kernel  scope link  src 10.2.1.2
    10.2.1.0/24 dev eth1  proto kernel  scope link  src 10.2.1.2
@@ -255,7 +265,7 @@ for emphasis of the device type, similar to use of 'br' in bridge names.
    local 10.2.2.2 dev eth2  proto kernel  scope host  src 10.2.2.2
    broadcast 10.2.2.255 dev eth2  proto kernel  scope link  src 10.2.2.2
 
-   $ ip -6 route show table vrf-red
+   $ ip -6 route show vrf red
    local 2002:1:: dev lo  proto none  metric 0  pref medium
    local 2002:1::2 dev lo  proto none  metric 0  pref medium
    2002:1::/120 dev eth1  proto kernel  metric 256  pref medium
@@ -268,23 +278,24 @@ for emphasis of the device type, similar to use of 'br' in bridge names.
    local fe80::ff:fe00:203 dev lo  proto none  metric 0  pref medium
    fe80::/64 dev eth1  proto kernel  metric 256  pref medium
    fe80::/64 dev eth2  proto kernel  metric 256  pref medium
-   ff00::/8 dev vrf-red  metric 256  pref medium
+   ff00::/8 dev red  metric 256  pref medium
    ff00::/8 dev eth1  metric 256  pref medium
    ff00::/8 dev eth2  metric 256  pref medium
 
 
 8. Route Lookup for a VRF
 
-   A test route lookup can be done for a VRF by adding the oif option to ip:
-       $ ip [-6] route get oif VRF-NAME ADDRESS
+   A test route lookup can be done for a VRF:
+       $ ip [-6] route get vrf NAME ADDRESS
+       $ ip [-6] route get oif NAME ADDRESS
 
    For example:
-   $ ip route get 10.2.1.40 oif vrf-red
-   10.2.1.40 dev eth1  table vrf-red  src 10.2.1.2
+   $ ip route get 10.2.1.40 vrf red
+   10.2.1.40 dev eth1  table red  src 10.2.1.2
        cache
 
-   $ ip -6 route get 2002:1::32 oif vrf-red
-   2002:1::32 from :: dev eth1  table vrf-red  proto kernel  src 2002:1::2  metric 256  pref medium
+   $ ip -6 route get 2002:1::32 vrf red
+   2002:1::32 from :: dev eth1  table red  proto kernel  src 2002:1::2  metric 256  pref medium
 
 
 9. Removing Network Interface from a VRF
@@ -303,46 +314,40 @@ for emphasis of the device type, similar to use of 'br' in bridge names.
 
 Commands used in this example:
 
-cat >> /etc/iproute2/rt_tables <<EOF
-1  vrf-mgmt
-10 vrf-red
-66 vrf-blue
-81 vrf-green
+cat >> /etc/iproute2/rt_tables.d/vrf.conf <<EOF
+1  mgmt
+10 red
+66 blue
+81 green
 EOF
 
 function vrf_create
 {
     VRF=$1
     TBID=$2
-    # create VRF device
-    ip link add vrf-${VRF} type vrf table ${TBID}
 
-    # add rules that direct lookups to vrf table
-    ip ru add pref 200 oif vrf-${VRF} table ${TBID}
-    ip ru add pref 200 iif vrf-${VRF} table ${TBID}
-    ip -6 ru add pref 200 oif vrf-${VRF} table ${TBID}
-    ip -6 ru add pref 200 iif vrf-${VRF} table ${TBID}
+    # create VRF device
+    ip link add ${VRF} type vrf table ${TBID}
 
     if [ "${VRF}" != "mgmt" ]; then
-        ip route add table ${TBID} prohibit default
+        ip route add table ${TBID} unreachable default
     fi
-    ip link set dev vrf-${VRF} up
-    ip link set dev vrf-${VRF} state up
+    ip link set dev ${VRF} up
 }
 
 vrf_create mgmt 1
-ip link set dev eth0 master vrf-mgmt
+ip link set dev eth0 master mgmt
 
 vrf_create red 10
-ip link set dev eth1 master vrf-red
-ip link set dev eth2 master vrf-red
-ip link set dev eth5 master vrf-red
+ip link set dev eth1 master red
+ip link set dev eth2 master red
+ip link set dev eth5 master red
 
 vrf_create blue 66
-ip link set dev eth3 master vrf-blue
+ip link set dev eth3 master blue
 
 vrf_create green 81
-ip link set dev eth4 master vrf-green
+ip link set dev eth4 master green
 
 
 Interface addresses from /etc/network/interfaces:
diff --git a/Documentation/scsi/scsi_eh.txt b/Documentation/scsi/scsi_eh.txt
index 8638f61c8c9d..37eca00796ee 100644
--- a/Documentation/scsi/scsi_eh.txt
+++ b/Documentation/scsi/scsi_eh.txt
@@ -263,19 +263,23 @@ scmd->allowed.
 
  3. scmd recovered
     ACTION: scsi_eh_finish_cmd() is invoked to EH-finish scmd
-	- shost->host_failed--
 	- clear scmd->eh_eflags
 	- scsi_setup_cmd_retry()
 	- move from local eh_work_q to local eh_done_q
     LOCKING: none
+    CONCURRENCY: at most one thread per separate eh_work_q to
+		 keep queue manipulation lockless
 
  4. EH completes
     ACTION: scsi_eh_flush_done_q() retries scmds or notifies upper
-	    layer of failure.
+	    layer of failure. May be called concurrently but must have
+	    a no more than one thread per separate eh_work_q to
+	    manipulate the queue locklessly
 	- scmd is removed from eh_done_q and scmd->eh_entry is cleared
 	- if retry is necessary, scmd is requeued using
           scsi_queue_insert()
 	- otherwise, scsi_finish_command() is invoked for scmd
+	- zero shost->host_failed
     LOCKING: queue or finish function performs appropriate locking
 
 
diff --git a/MAINTAINERS b/MAINTAINERS
index f5ddaa901133..06e84119c690 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -595,6 +595,10 @@ S:	Odd Fixes
 L:	linux-alpha@vger.kernel.org
 F:	arch/alpha/
 
+ALPS PS/2 TOUCHPAD DRIVER
+R:	Pali Rohár <pali.rohar@gmail.com>
+F:	drivers/input/mouse/alps.*
+
 ALTERA MAILBOX DRIVER
 M:	Ley Foon Tan <lftan@altera.com>
 L:	nios2-dev@lists.rocketboards.org (moderated for non-subscribers)
@@ -1159,6 +1163,7 @@ F:	arch/arm/mach-footbridge/
 ARM/FREESCALE IMX / MXC ARM ARCHITECTURE
 M:	Shawn Guo <shawnguo@kernel.org>
 M:	Sascha Hauer <kernel@pengutronix.de>
+R:	Fabio Estevam <fabio.estevam@nxp.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/shawnguo/linux.git
@@ -2242,7 +2247,8 @@ F:	include/net/ax25.h
 F:	net/ax25/
 
 AZ6007 DVB DRIVER
-M:	Mauro Carvalho Chehab <mchehab@osg.samsung.com>
+M:	Mauro Carvalho Chehab <mchehab@s-opensource.com>
+M:	Mauro Carvalho Chehab <mchehab@kernel.org>
 L:	linux-media@vger.kernel.org
 W:	https://linuxtv.org
 T:	git git://linuxtv.org/media_tree.git
@@ -2291,6 +2297,7 @@ S:	Maintained
 F:	Documentation/ABI/testing/sysfs-class-net-batman-adv
 F:	Documentation/ABI/testing/sysfs-class-net-mesh
 F:	Documentation/networking/batman-adv.txt
+F:	include/uapi/linux/batman_adv.h
 F:	net/batman-adv/
 
 BAYCOM/HDLCDRV DRIVERS FOR AX.25
@@ -2716,7 +2723,8 @@ F:	Documentation/filesystems/btrfs.txt
 F:	fs/btrfs/
 
 BTTV VIDEO4LINUX DRIVER
-M:	Mauro Carvalho Chehab <mchehab@osg.samsung.com>
+M:	Mauro Carvalho Chehab <mchehab@s-opensource.com>
+M:	Mauro Carvalho Chehab <mchehab@kernel.org>
 L:	linux-media@vger.kernel.org
 W:	https://linuxtv.org
 T:	git git://linuxtv.org/media_tree.git
@@ -2780,9 +2788,9 @@ F:	include/net/caif/
 F:	net/caif/
 
 CALGARY x86-64 IOMMU
-M:	Muli Ben-Yehuda <muli@il.ibm.com>
-M:	"Jon D. Mason" <jdmason@kudzu.us>
-L:	discuss@x86-64.org
+M:	Muli Ben-Yehuda <mulix@mulix.org>
+M:	Jon Mason <jdmason@kudzu.us>
+L:	iommu@lists.linux-foundation.org
 S:	Maintained
 F:	arch/x86/kernel/pci-calgary_64.c
 F:	arch/x86/kernel/tce_64.c
@@ -3352,7 +3360,8 @@ S:	Maintained
 F:	drivers/media/dvb-frontends/cx24120*
 
 CX88 VIDEO4LINUX DRIVER
-M:	Mauro Carvalho Chehab <mchehab@osg.samsung.com>
+M:	Mauro Carvalho Chehab <mchehab@s-opensource.com>
+M:	Mauro Carvalho Chehab <mchehab@kernel.org>
 L:	linux-media@vger.kernel.org
 W:	https://linuxtv.org
 T:	git git://linuxtv.org/media_tree.git
@@ -3782,6 +3791,7 @@ Q:	https://patchwork.kernel.org/project/linux-dmaengine/list/
 S:	Maintained
 F:	drivers/dma/
 F:	include/linux/dmaengine.h
+F:	Documentation/devicetree/bindings/dma/
 F:	Documentation/dmaengine/
 T:	git git://git.infradead.org/users/vkoul/slave-dma.git
 
@@ -4299,7 +4309,8 @@ F:	fs/ecryptfs/
 EDAC-CORE
 M:	Doug Thompson <dougthompson@xmission.com>
 M:	Borislav Petkov <bp@alien8.de>
-M:	Mauro Carvalho Chehab <mchehab@osg.samsung.com>
+M:	Mauro Carvalho Chehab <mchehab@s-opensource.com>
+M:	Mauro Carvalho Chehab <mchehab@kernel.org>
 L:	linux-edac@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp.git for-next
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-edac.git linux_next
@@ -4344,7 +4355,8 @@ S:	Maintained
 F:	drivers/edac/e7xxx_edac.c
 
 EDAC-GHES
-M:	Mauro Carvalho Chehab <mchehab@osg.samsung.com>
+M:	Mauro Carvalho Chehab <mchehab@s-opensource.com>
+M:	Mauro Carvalho Chehab <mchehab@kernel.org>
 L:	linux-edac@vger.kernel.org
 S:	Maintained
 F:	drivers/edac/ghes_edac.c
@@ -4368,19 +4380,22 @@ S:	Maintained
 F:	drivers/edac/i5000_edac.c
 
 EDAC-I5400
-M:	Mauro Carvalho Chehab <mchehab@osg.samsung.com>
+M:	Mauro Carvalho Chehab <mchehab@s-opensource.com>
+M:	Mauro Carvalho Chehab <mchehab@kernel.org>
 L:	linux-edac@vger.kernel.org
 S:	Maintained
 F:	drivers/edac/i5400_edac.c
 
 EDAC-I7300
-M:	Mauro Carvalho Chehab <mchehab@osg.samsung.com>
+M:	Mauro Carvalho Chehab <mchehab@s-opensource.com>
+M:	Mauro Carvalho Chehab <mchehab@kernel.org>
 L:	linux-edac@vger.kernel.org
 S:	Maintained
 F:	drivers/edac/i7300_edac.c
 
 EDAC-I7CORE
-M:	Mauro Carvalho Chehab <mchehab@osg.samsung.com>
+M:	Mauro Carvalho Chehab <mchehab@s-opensource.com>
+M:	Mauro Carvalho Chehab <mchehab@kernel.org>
 L:	linux-edac@vger.kernel.org
 S:	Maintained
 F:	drivers/edac/i7core_edac.c
@@ -4417,7 +4432,8 @@ S:	Maintained
 F:	drivers/edac/r82600_edac.c
 
 EDAC-SBRIDGE
-M:	Mauro Carvalho Chehab <mchehab@osg.samsung.com>
+M:	Mauro Carvalho Chehab <mchehab@s-opensource.com>
+M:	Mauro Carvalho Chehab <mchehab@kernel.org>
 L:	linux-edac@vger.kernel.org
 S:	Maintained
 F:	drivers/edac/sb_edac.c
@@ -4476,7 +4492,8 @@ S:	Maintained
 F:	drivers/net/ethernet/ibm/ehea/
 
 EM28XX VIDEO4LINUX DRIVER
-M:	Mauro Carvalho Chehab <mchehab@osg.samsung.com>
+M:	Mauro Carvalho Chehab <mchehab@s-opensource.com>
+M:	Mauro Carvalho Chehab <mchehab@kernel.org>
 L:	linux-media@vger.kernel.org
 W:	https://linuxtv.org
 T:	git git://linuxtv.org/media_tree.git
@@ -5421,6 +5438,15 @@ F:	include/uapi/linux/if_hippi.h
 F:	net/802/hippi.c
 F:	drivers/net/hippi/
 
+HISILICON NETWORK SUBSYSTEM DRIVER
+M:	Yisen Zhuang <yisen.zhuang@huawei.com>
+M:	Salil Mehta <salil.mehta@huawei.com>
+L:	netdev@vger.kernel.org
+W:	http://www.hisilicon.com
+S:	Maintained
+F:	drivers/net/ethernet/hisilicon/
+F:	Documentation/devicetree/bindings/net/hisilicon*.txt
+
 HISILICON SAS Controller
 M:	John Garry <john.garry@huawei.com>
 W:	http://www.hisilicon.com
@@ -6502,6 +6528,7 @@ F:	include/uapi/linux/sunrpc/
 
 KERNEL SELFTEST FRAMEWORK
 M:	Shuah Khan <shuahkh@osg.samsung.com>
+M:	Shuah Khan <shuah@kernel.org>
 L:	linux-kselftest@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/shuah/linux-kselftest
 S:	Maintained
@@ -7374,7 +7401,8 @@ S:	Supported
 F:	drivers/media/pci/netup_unidvb/*
 
 MEDIA INPUT INFRASTRUCTURE (V4L/DVB)
-M:	Mauro Carvalho Chehab <mchehab@osg.samsung.com>
+M:	Mauro Carvalho Chehab <mchehab@s-opensource.com>
+M:	Mauro Carvalho Chehab <mchehab@kernel.org>
 P:	LinuxTV.org Project
 L:	linux-media@vger.kernel.org
 W:	https://linuxtv.org
@@ -7422,7 +7450,7 @@ F:	drivers/scsi/megaraid.*
 F:	drivers/scsi/megaraid/
 
 MELLANOX ETHERNET DRIVER (mlx4_en)
-M: 	Eugenia Emantayev <eugenia@mellanox.com>
+M:	Tariq Toukan <tariqt@mellanox.com>
 L:	netdev@vger.kernel.org
 S:	Supported
 W:	http://www.mellanox.com
@@ -8423,10 +8451,9 @@ F:	drivers/i2c/busses/i2c-ocores.c
 OPEN FIRMWARE AND FLATTENED DEVICE TREE
 M:	Rob Herring <robh+dt@kernel.org>
 M:	Frank Rowand <frowand.list@gmail.com>
-M:	Grant Likely <grant.likely@linaro.org>
 L:	devicetree@vger.kernel.org
 W:	http://www.devicetree.org/
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/glikely/linux.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/robh/linux.git
 S:	Maintained
 F:	drivers/of/
 F:	include/linux/of*.h
@@ -8434,12 +8461,10 @@ F:	scripts/dtc/
 
 OPEN FIRMWARE AND FLATTENED DEVICE TREE BINDINGS
 M:	Rob Herring <robh+dt@kernel.org>
-M:	Pawel Moll <pawel.moll@arm.com>
 M:	Mark Rutland <mark.rutland@arm.com>
-M:	Ian Campbell <ijc+devicetree@hellion.org.uk>
-M:	Kumar Gala <galak@codeaurora.org>
 L:	devicetree@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/robh/linux.git
+Q:	http://patchwork.ozlabs.org/project/devicetree-bindings/list/
 S:	Maintained
 F:	Documentation/devicetree/
 F:	arch/*/boot/dts/
@@ -8964,6 +8989,7 @@ L:	linux-gpio@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-pinctrl.git
 S:	Maintained
 F:	Documentation/devicetree/bindings/pinctrl/
+F:	Documentation/pinctrl.txt
 F:	drivers/pinctrl/
 F:	include/linux/pinctrl/
 
@@ -9533,7 +9559,7 @@ M:	Florian Fainelli <florian@openwrt.org>
 S:	Maintained
 
 RDC R6040 FAST ETHERNET DRIVER
-M:	Florian Fainelli <florian@openwrt.org>
+M:	Florian Fainelli <f.fainelli@gmail.com>
 L:	netdev@vger.kernel.org
 S:	Maintained
 F:	drivers/net/ethernet/rdc/r6040.c
@@ -9684,7 +9710,6 @@ F:	Documentation/ABI/*/sysfs-driver-hid-roccat*
 
 ROCKER DRIVER
 M:	Jiri Pirko <jiri@resnulli.us>
-M:	Scott Feldman <sfeldma@gmail.com>
 L:	netdev@vger.kernel.org
 S:	Supported
 F:	drivers/net/ethernet/rocker/
@@ -9871,7 +9896,8 @@ S:	Odd Fixes
 F:	drivers/media/i2c/saa6588*
 
 SAA7134 VIDEO4LINUX DRIVER
-M:	Mauro Carvalho Chehab <mchehab@osg.samsung.com>
+M:	Mauro Carvalho Chehab <mchehab@s-opensource.com>
+M:	Mauro Carvalho Chehab <mchehab@kernel.org>
 L:	linux-media@vger.kernel.org
 W:	https://linuxtv.org
 T:	git git://linuxtv.org/media_tree.git
@@ -10389,7 +10415,8 @@ S:	Maintained
 F:	drivers/media/radio/si4713/radio-usb-si4713.c
 
 SIANO DVB DRIVER
-M:	Mauro Carvalho Chehab <mchehab@osg.samsung.com>
+M:	Mauro Carvalho Chehab <mchehab@s-opensource.com>
+M:	Mauro Carvalho Chehab <mchehab@kernel.org>
 L:	linux-media@vger.kernel.org
 W:	https://linuxtv.org
 T:	git git://linuxtv.org/media_tree.git
@@ -11155,7 +11182,8 @@ S:	Maintained
 F:	drivers/media/i2c/tda9840*
 
 TEA5761 TUNER DRIVER
-M:	Mauro Carvalho Chehab <mchehab@osg.samsung.com>
+M:	Mauro Carvalho Chehab <mchehab@s-opensource.com>
+M:	Mauro Carvalho Chehab <mchehab@kernel.org>
 L:	linux-media@vger.kernel.org
 W:	https://linuxtv.org
 T:	git git://linuxtv.org/media_tree.git
@@ -11163,7 +11191,8 @@ S:	Odd fixes
 F:	drivers/media/tuners/tea5761.*
 
 TEA5767 TUNER DRIVER
-M:	Mauro Carvalho Chehab <mchehab@osg.samsung.com>
+M:	Mauro Carvalho Chehab <mchehab@s-opensource.com>
+M:	Mauro Carvalho Chehab <mchehab@kernel.org>
 L:	linux-media@vger.kernel.org
 W:	https://linuxtv.org
 T:	git git://linuxtv.org/media_tree.git
@@ -11550,7 +11579,8 @@ F:	include/linux/shmem_fs.h
 F:	mm/shmem.c
 
 TM6000 VIDEO4LINUX DRIVER
-M:	Mauro Carvalho Chehab <mchehab@osg.samsung.com>
+M:	Mauro Carvalho Chehab <mchehab@s-opensource.com>
+M:	Mauro Carvalho Chehab <mchehab@kernel.org>
 L:	linux-media@vger.kernel.org
 W:	https://linuxtv.org
 T:	git git://linuxtv.org/media_tree.git
@@ -11904,7 +11934,8 @@ F:	drivers/usb/common/usb-otg-fsm.c
 
 USB OVER IP DRIVER
 M:	Valentina Manea <valentina.manea.m@gmail.com>
-M:	Shuah Khan <shuah.kh@samsung.com>
+M:	Shuah Khan <shuahkh@osg.samsung.com>
+M:	Shuah Khan <shuah@kernel.org>
 L:	linux-usb@vger.kernel.org
 S:	Maintained
 F:	Documentation/usb/usbip_protocol.txt
@@ -11975,6 +12006,7 @@ L:	linux-usb@vger.kernel.org
 W:	http://www.linux-usb.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
 S:	Supported
+F:	Documentation/devicetree/bindings/usb/
 F:	Documentation/usb/
 F:	drivers/usb/
 F:	include/linux/usb.h
@@ -12148,6 +12180,7 @@ VIRTIO CORE, NET AND BLOCK DRIVERS
 M:	"Michael S. Tsirkin" <mst@redhat.com>
 L:	virtualization@lists.linux-foundation.org
 S:	Maintained
+F:	Documentation/devicetree/bindings/virtio/
 F:	drivers/virtio/
 F:	tools/virtio/
 F:	drivers/net/virtio_net.c
@@ -12536,7 +12569,8 @@ S:	Maintained
 F:	arch/x86/entry/vdso/
 
 XC2028/3028 TUNER DRIVER
-M:	Mauro Carvalho Chehab <mchehab@osg.samsung.com>
+M:	Mauro Carvalho Chehab <mchehab@s-opensource.com>
+M:	Mauro Carvalho Chehab <mchehab@kernel.org>
 L:	linux-media@vger.kernel.org
 W:	https://linuxtv.org
 T:	git git://linuxtv.org/media_tree.git
diff --git a/Makefile b/Makefile
index 8d1301ab59fd..0d504893df6e 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 7
 SUBLEVEL = 0
-EXTRAVERSION = -rc2
+EXTRAVERSION = -rc6
 NAME = Psychotic Stoned Sheep
 
 # *DOCUMENTATION*
@@ -363,11 +363,13 @@ CHECK		= sparse
 
 CHECKFLAGS     := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ \
 		  -Wbitwise -Wno-return-void $(CF)
+NOSTDINC_FLAGS  =
 CFLAGS_MODULE   =
 AFLAGS_MODULE   =
 LDFLAGS_MODULE  =
 CFLAGS_KERNEL	=
 AFLAGS_KERNEL	=
+LDFLAGS_vmlinux =
 CFLAGS_GCOV	= -fprofile-arcs -ftest-coverage -fno-tree-loop-im -Wno-maybe-uninitialized
 CFLAGS_KCOV	= -fsanitize-coverage=trace-pc
 
diff --git a/arch/Kconfig b/arch/Kconfig
index d794384a0404..15996290fed4 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -226,8 +226,8 @@ config ARCH_INIT_TASK
 config ARCH_TASK_STRUCT_ALLOCATOR
 	bool
 
-# Select if arch has its private alloc_thread_info() function
-config ARCH_THREAD_INFO_ALLOCATOR
+# Select if arch has its private alloc_thread_stack() function
+config ARCH_THREAD_STACK_ALLOCATOR
 	bool
 
 # Select if arch wants to size task_struct dynamically via arch_task_struct_size:
@@ -606,6 +606,9 @@ config HAVE_ARCH_HASH
 	  file which provides platform-specific implementations of some
 	  functions in <linux/hash.h> or fs/namei.c.
 
+config ISA_BUS_API
+	def_bool ISA
+
 #
 # ABI hall of shame
 #
diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h
index aab14a019c20..c2ebb6f36c9d 100644
--- a/arch/alpha/include/asm/pgalloc.h
+++ b/arch/alpha/include/asm/pgalloc.h
@@ -40,7 +40,7 @@ pgd_free(struct mm_struct *mm, pgd_t *pgd)
 static inline pmd_t *
 pmd_alloc_one(struct mm_struct *mm, unsigned long address)
 {
-	pmd_t *ret = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+	pmd_t *ret = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
 	return ret;
 }
 
@@ -53,7 +53,7 @@ pmd_free(struct mm_struct *mm, pmd_t *pmd)
 static inline pte_t *
 pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 {
-	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
 	return pte;
 }
 
diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index d4df6be66d58..85814e74677d 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -66,8 +66,6 @@ endif
 
 endif
 
-cflags-$(CONFIG_ARC_DW2_UNWIND)		+= -fasynchronous-unwind-tables
-
 # By default gcc 4.8 generates dwarf4 which kernel unwinder can't grok
 ifeq ($(atleast_gcc48),y)
 cflags-$(CONFIG_ARC_DW2_UNWIND)		+= -gdwarf-2
diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h
index 86ed671286df..3749234b7419 100644
--- a/arch/arc/include/asm/pgalloc.h
+++ b/arch/arc/include/asm/pgalloc.h
@@ -95,7 +95,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 {
 	pte_t *pte;
 
-	pte = (pte_t *) __get_free_pages(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO,
+	pte = (pte_t *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
 					 __get_order_pte());
 
 	return pte;
@@ -107,7 +107,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address)
 	pgtable_t pte_pg;
 	struct page *page;
 
-	pte_pg = (pgtable_t)__get_free_pages(GFP_KERNEL | __GFP_REPEAT, __get_order_pte());
+	pte_pg = (pgtable_t)__get_free_pages(GFP_KERNEL, __get_order_pte());
 	if (!pte_pg)
 		return 0;
 	memzero((void *)pte_pg, PTRS_PER_PTE * sizeof(pte_t));
diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c
index e0efff15a5ae..b9192a653b7e 100644
--- a/arch/arc/kernel/stacktrace.c
+++ b/arch/arc/kernel/stacktrace.c
@@ -142,7 +142,7 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs,
 	 * prelogue is setup (callee regs saved and then fp set and not other
 	 * way around
 	 */
-	pr_warn("CONFIG_ARC_DW2_UNWIND needs to be enabled\n");
+	pr_warn_once("CONFIG_ARC_DW2_UNWIND needs to be enabled\n");
 	return 0;
 
 #endif
diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile
index 06b6c2d695bf..414b42710a36 100644
--- a/arch/arm/boot/dts/Makefile
+++ b/arch/arm/boot/dts/Makefile
@@ -741,6 +741,7 @@ dtb-$(CONFIG_MACH_SUN7I) += \
 	sun7i-a20-olimex-som-evb.dtb \
 	sun7i-a20-olinuxino-lime.dtb \
 	sun7i-a20-olinuxino-lime2.dtb \
+	sun7i-a20-olinuxino-lime2-emmc.dtb \
 	sun7i-a20-olinuxino-micro.dtb \
 	sun7i-a20-orangepi.dtb \
 	sun7i-a20-orangepi-mini.dtb \
diff --git a/arch/arm/boot/dts/am437x-sk-evm.dts b/arch/arm/boot/dts/am437x-sk-evm.dts
index d82dd6e3f9b1..5687d6b4da60 100644
--- a/arch/arm/boot/dts/am437x-sk-evm.dts
+++ b/arch/arm/boot/dts/am437x-sk-evm.dts
@@ -418,7 +418,7 @@
 	status = "okay";
 	pinctrl-names = "default";
 	pinctrl-0 = <&i2c0_pins>;
-	clock-frequency = <400000>;
+	clock-frequency = <100000>;
 
 	tps@24 {
 		compatible = "ti,tps65218";
diff --git a/arch/arm/boot/dts/am57xx-idk-common.dtsi b/arch/arm/boot/dts/am57xx-idk-common.dtsi
index b01a5948cdd0..0e63b9dff6e7 100644
--- a/arch/arm/boot/dts/am57xx-idk-common.dtsi
+++ b/arch/arm/boot/dts/am57xx-idk-common.dtsi
@@ -60,10 +60,26 @@
 
 		tps659038_pmic {
 			compatible = "ti,tps659038-pmic";
+
+			smps12-in-supply = <&vmain>;
+			smps3-in-supply = <&vmain>;
+			smps45-in-supply = <&vmain>;
+			smps6-in-supply = <&vmain>;
+			smps7-in-supply = <&vmain>;
+			smps8-in-supply = <&vmain>;
+			smps9-in-supply = <&vmain>;
+			ldo1-in-supply = <&vmain>;
+			ldo2-in-supply = <&vmain>;
+			ldo3-in-supply = <&vmain>;
+			ldo4-in-supply = <&vmain>;
+			ldo9-in-supply = <&vmain>;
+			ldoln-in-supply = <&vmain>;
+			ldousb-in-supply = <&vmain>;
+			ldortc-in-supply = <&vmain>;
+
 			regulators {
 				smps12_reg: smps12 {
 					/* VDD_MPU */
-					vin-supply = <&vmain>;
 					regulator-name = "smps12";
 					regulator-min-microvolt = <850000>;
 					regulator-max-microvolt = <1250000>;
@@ -73,7 +89,6 @@
 
 				smps3_reg: smps3 {
 					/* VDD_DDR EMIF1 EMIF2 */
-					vin-supply = <&vmain>;
 					regulator-name = "smps3";
 					regulator-min-microvolt = <1350000>;
 					regulator-max-microvolt = <1350000>;
@@ -84,7 +99,6 @@
 				smps45_reg: smps45 {
 					/* VDD_DSPEVE on AM572 */
 					/* VDD_IVA + VDD_DSP on AM571 */
-					vin-supply = <&vmain>;
 					regulator-name = "smps45";
 					regulator-min-microvolt = <850000>;
 					regulator-max-microvolt = <1250000>;
@@ -94,7 +108,6 @@
 
 				smps6_reg: smps6 {
 					/* VDD_GPU */
-					vin-supply = <&vmain>;
 					regulator-name = "smps6";
 					regulator-min-microvolt = <850000>;
 					regulator-max-microvolt = <1250000>;
@@ -104,7 +117,6 @@
 
 				smps7_reg: smps7 {
 					/* VDD_CORE */
-					vin-supply = <&vmain>;
 					regulator-name = "smps7";
 					regulator-min-microvolt = <850000>;
 					regulator-max-microvolt = <1150000>;
@@ -115,13 +127,11 @@
 				smps8_reg: smps8 {
 					/* 5728 - VDD_IVAHD */
 					/* 5718 - N.C. test point */
-					vin-supply = <&vmain>;
 					regulator-name = "smps8";
 				};
 
 				smps9_reg: smps9 {
 					/* VDD_3_3D */
-					vin-supply = <&vmain>;
 					regulator-name = "smps9";
 					regulator-min-microvolt = <3300000>;
 					regulator-max-microvolt = <3300000>;
@@ -132,7 +142,6 @@
 				ldo1_reg: ldo1 {
 					/* VDDSHV8 - VSDMMC  */
 					/* NOTE: on rev 1.3a, data supply */
-					vin-supply = <&vmain>;
 					regulator-name = "ldo1";
 					regulator-min-microvolt = <1800000>;
 					regulator-max-microvolt = <3300000>;
@@ -142,7 +151,6 @@
 
 				ldo2_reg: ldo2 {
 					/* VDDSH18V */
-					vin-supply = <&vmain>;
 					regulator-name = "ldo2";
 					regulator-min-microvolt = <1800000>;
 					regulator-max-microvolt = <1800000>;
@@ -152,7 +160,6 @@
 
 				ldo3_reg: ldo3 {
 					/* R1.3a 572x V1_8PHY_LDO3: USB, SATA */
-					vin-supply = <&vmain>;
 					regulator-name = "ldo3";
 					regulator-min-microvolt = <1800000>;
 					regulator-max-microvolt = <1800000>;
@@ -162,7 +169,6 @@
 
 				ldo4_reg: ldo4 {
 					/* R1.3a 572x V1_8PHY_LDO4: PCIE, HDMI*/
-					vin-supply = <&vmain>;
 					regulator-name = "ldo4";
 					regulator-min-microvolt = <1800000>;
 					regulator-max-microvolt = <1800000>;
@@ -174,7 +180,6 @@
 
 				ldo9_reg: ldo9 {
 					/* VDD_RTC  */
-					vin-supply = <&vmain>;
 					regulator-name = "ldo9";
 					regulator-min-microvolt = <840000>;
 					regulator-max-microvolt = <1160000>;
@@ -184,7 +189,6 @@
 
 				ldoln_reg: ldoln {
 					/* VDDA_1V8_PLL */
-					vin-supply = <&vmain>;
 					regulator-name = "ldoln";
 					regulator-min-microvolt = <1800000>;
 					regulator-max-microvolt = <1800000>;
@@ -194,7 +198,6 @@
 
 				ldousb_reg: ldousb {
 					/* VDDA_3V_USB: VDDA_USBHS33 */
-					vin-supply = <&vmain>;
 					regulator-name = "ldousb";
 					regulator-min-microvolt = <3300000>;
 					regulator-max-microvolt = <3300000>;
@@ -204,7 +207,6 @@
 
 				ldortc_reg: ldortc {
 					/* VDDA_RTC  */
-					vin-supply = <&vmain>;
 					regulator-name = "ldortc";
 					regulator-min-microvolt = <1800000>;
 					regulator-max-microvolt = <1800000>;
diff --git a/arch/arm/boot/dts/dm8148-evm.dts b/arch/arm/boot/dts/dm8148-evm.dts
index cbc17b0794b1..4128fa91823c 100644
--- a/arch/arm/boot/dts/dm8148-evm.dts
+++ b/arch/arm/boot/dts/dm8148-evm.dts
@@ -93,6 +93,10 @@
 	};
 };
 
+&mmc1 {
+        status = "disabled";
+};
+
 &mmc2 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&sd1_pins>;
@@ -101,6 +105,10 @@
 	cd-gpios = <&gpio2 6 GPIO_ACTIVE_LOW>;
 };
 
+&mmc3 {
+        status = "disabled";
+};
+
 &pincntl {
 	sd1_pins: pinmux_sd1_pins {
 		pinctrl-single,pins = <
diff --git a/arch/arm/boot/dts/dm8148-t410.dts b/arch/arm/boot/dts/dm8148-t410.dts
index 5d4313fd5a46..3f184863e0c5 100644
--- a/arch/arm/boot/dts/dm8148-t410.dts
+++ b/arch/arm/boot/dts/dm8148-t410.dts
@@ -45,6 +45,14 @@
 	phy-mode = "rgmii";
 };
 
+&mmc1 {
+	status = "disabled";
+};
+
+&mmc2 {
+	status = "disabled";
+};
+
 &mmc3 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&sd2_pins>;
@@ -53,6 +61,7 @@
 	dmas = <&edma_xbar 8 0 1	/* use SDTXEVT1 instead of MCASP0TX */
 		&edma_xbar 9 0 2>;	/* use SDRXEVT1 instead of MCASP0RX */
 	dma-names = "tx", "rx";
+	non-removable;
 };
 
 &pincntl {
diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi
index f8b39a5a487a..de559f6e4fee 100644
--- a/arch/arm/boot/dts/dra7.dtsi
+++ b/arch/arm/boot/dts/dra7.dtsi
@@ -1451,6 +1451,8 @@
 			ti,hwmods = "gpmc";
 			reg = <0x50000000 0x37c>;      /* device IO registers */
 			interrupts = <GIC_SPI 15 IRQ_TYPE_LEVEL_HIGH>;
+			dmas = <&edma_xbar 4 0>;
+			dma-names = "rxtx";
 			gpmc,num-cs = <8>;
 			gpmc,num-waitpins = <2>;
 			#address-cells = <2>;
diff --git a/arch/arm/boot/dts/dra74x.dtsi b/arch/arm/boot/dts/dra74x.dtsi
index 4220eeffc65a..5e06020f450b 100644
--- a/arch/arm/boot/dts/dra74x.dtsi
+++ b/arch/arm/boot/dts/dra74x.dtsi
@@ -107,8 +107,8 @@
 	reg = <0x58000000 0x80>,
 	      <0x58004054 0x4>,
 	      <0x58004300 0x20>,
-	      <0x58005054 0x4>,
-	      <0x58005300 0x20>;
+	      <0x58009054 0x4>,
+	      <0x58009300 0x20>;
 	reg-names = "dss", "pll1_clkctrl", "pll1",
 		    "pll2_clkctrl", "pll2";
 
diff --git a/arch/arm/boot/dts/exynos5250-snow-common.dtsi b/arch/arm/boot/dts/exynos5250-snow-common.dtsi
index ddfe1f558c10..fa14f77df563 100644
--- a/arch/arm/boot/dts/exynos5250-snow-common.dtsi
+++ b/arch/arm/boot/dts/exynos5250-snow-common.dtsi
@@ -242,7 +242,7 @@
 	hpd-gpios = <&gpx0 7 GPIO_ACTIVE_HIGH>;
 
 	ports {
-		port0 {
+		port {
 			dp_out: endpoint {
 				remote-endpoint = <&bridge_in>;
 			};
@@ -485,13 +485,20 @@
 		edid-emulation = <5>;
 
 		ports {
-			port0 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			port@0 {
+				reg = <0>;
+
 				bridge_out: endpoint {
 					remote-endpoint = <&panel_in>;
 				};
 			};
 
-			port1 {
+			port@1 {
+				reg = <1>;
+
 				bridge_in: endpoint {
 					remote-endpoint = <&dp_out>;
 				};
diff --git a/arch/arm/boot/dts/exynos5420-peach-pit.dts b/arch/arm/boot/dts/exynos5420-peach-pit.dts
index f9d2e4f1a0e0..1de972d46a87 100644
--- a/arch/arm/boot/dts/exynos5420-peach-pit.dts
+++ b/arch/arm/boot/dts/exynos5420-peach-pit.dts
@@ -163,7 +163,7 @@
 	hpd-gpios = <&gpx2 6 GPIO_ACTIVE_HIGH>;
 
 	ports {
-		port0 {
+		port {
 			dp_out: endpoint {
 				remote-endpoint = <&bridge_in>;
 			};
@@ -631,13 +631,20 @@
 		use-external-pwm;
 
 		ports {
-			port0 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			port@0 {
+				reg = <0>;
+
 				bridge_out: endpoint {
 					remote-endpoint = <&panel_in>;
 				};
 			};
 
-			port1 {
+			port@1 {
+				reg = <1>;
+
 				bridge_in: endpoint {
 					remote-endpoint = <&dp_out>;
 				};
diff --git a/arch/arm/boot/dts/omap3-evm-37xx.dts b/arch/arm/boot/dts/omap3-evm-37xx.dts
index 76056ba92ced..ed449827c3d3 100644
--- a/arch/arm/boot/dts/omap3-evm-37xx.dts
+++ b/arch/arm/boot/dts/omap3-evm-37xx.dts
@@ -85,7 +85,7 @@
 			OMAP3_CORE1_IOPAD(0x2158, PIN_INPUT_PULLUP | MUX_MODE0)	/* sdmmc2_clk.sdmmc2_clk */
 			OMAP3_CORE1_IOPAD(0x215a, PIN_INPUT_PULLUP | MUX_MODE0)	/* sdmmc2_cmd.sdmmc2_cmd */
 			OMAP3_CORE1_IOPAD(0x215c, PIN_INPUT_PULLUP | MUX_MODE0)	/* sdmmc2_dat0.sdmmc2_dat0 */
-			OMAP3_CORE1_IOPAD(0x215e, WAKEUP_EN | PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc2_dat1.sdmmc2_dat1 */
+			OMAP3_CORE1_IOPAD(0x215e, PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc2_dat1.sdmmc2_dat1 */
 			OMAP3_CORE1_IOPAD(0x2160, PIN_INPUT_PULLUP | MUX_MODE0)	/* sdmmc2_dat2.sdmmc2_dat2 */
 			OMAP3_CORE1_IOPAD(0x2162, PIN_INPUT_PULLUP | MUX_MODE0)	/* sdmmc2_dat3.sdmmc2_dat3 */
 		>;
diff --git a/arch/arm/boot/dts/omap3-igep.dtsi b/arch/arm/boot/dts/omap3-igep.dtsi
index 41f5d386f21f..f4f2ce46d681 100644
--- a/arch/arm/boot/dts/omap3-igep.dtsi
+++ b/arch/arm/boot/dts/omap3-igep.dtsi
@@ -188,6 +188,7 @@
 	vmmc-supply = <&vmmc1>;
 	vmmc_aux-supply = <&vsim>;
 	bus-width = <4>;
+	cd-gpios = <&twl_gpio 0 GPIO_ACTIVE_LOW>;
 };
 
 &mmc3 {
diff --git a/arch/arm/boot/dts/omap3-igep0020-common.dtsi b/arch/arm/boot/dts/omap3-igep0020-common.dtsi
index d6f839cab649..b6971060648a 100644
--- a/arch/arm/boot/dts/omap3-igep0020-common.dtsi
+++ b/arch/arm/boot/dts/omap3-igep0020-common.dtsi
@@ -194,6 +194,12 @@
 			OMAP3630_CORE2_IOPAD(0x25f8, PIN_OUTPUT | MUX_MODE4) /* etk_d14.gpio_28 */
 		>;
 	};
+
+	mmc1_wp_pins: pinmux_mmc1_cd_pins {
+		pinctrl-single,pins = <
+			OMAP3630_CORE2_IOPAD(0x25fa, PIN_INPUT | MUX_MODE4)   /* etk_d15.gpio_29 */
+		>;
+	};
 };
 
 &i2c3 {
@@ -250,3 +256,8 @@
 		};
 	};
 };
+
+&mmc1 {
+	pinctrl-0 = <&mmc1_pins &mmc1_wp_pins>;
+	wp-gpios = <&gpio1 29 GPIO_ACTIVE_LOW>;	/* gpio_29 */
+};
diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts
index d9e2d9c6e999..2b74a81d1de2 100644
--- a/arch/arm/boot/dts/omap3-n900.dts
+++ b/arch/arm/boot/dts/omap3-n900.dts
@@ -288,7 +288,7 @@
 		pinctrl-single,pins = <
 			OMAP3_CORE1_IOPAD(0x2180, PIN_INPUT_PULLUP | MUX_MODE1)	/* ssi1_rdy_tx */
 			OMAP3_CORE1_IOPAD(0x217e, PIN_OUTPUT | MUX_MODE1)		/* ssi1_flag_tx */
-			OMAP3_CORE1_IOPAD(0x2182, PIN_INPUT | WAKEUP_EN | MUX_MODE4) /* ssi1_wake_tx (cawake) */
+			OMAP3_CORE1_IOPAD(0x2182, PIN_INPUT | MUX_MODE4)		/* ssi1_wake_tx (cawake) */
 			OMAP3_CORE1_IOPAD(0x217c, PIN_OUTPUT | MUX_MODE1)		/* ssi1_dat_tx */
 			OMAP3_CORE1_IOPAD(0x2184, PIN_INPUT | MUX_MODE1)		/* ssi1_dat_rx */
 			OMAP3_CORE1_IOPAD(0x2186, PIN_INPUT | MUX_MODE1)		/* ssi1_flag_rx */
@@ -300,7 +300,7 @@
 	modem_pins: pinmux_modem {
 		pinctrl-single,pins = <
 			OMAP3_CORE1_IOPAD(0x20dc, PIN_OUTPUT | MUX_MODE4)		/* gpio 70 => cmt_apeslpx */
-			OMAP3_CORE1_IOPAD(0x20e0, PIN_INPUT | WAKEUP_EN | MUX_MODE4) /* gpio 72 => ape_rst_rq */
+			OMAP3_CORE1_IOPAD(0x20e0, PIN_INPUT | MUX_MODE4)		/* gpio 72 => ape_rst_rq */
 			OMAP3_CORE1_IOPAD(0x20e2, PIN_OUTPUT | MUX_MODE4)		/* gpio 73 => cmt_rst_rq */
 			OMAP3_CORE1_IOPAD(0x20e4, PIN_OUTPUT | MUX_MODE4)		/* gpio 74 => cmt_en */
 			OMAP3_CORE1_IOPAD(0x20e6, PIN_OUTPUT | MUX_MODE4)		/* gpio 75 => cmt_rst */
diff --git a/arch/arm/boot/dts/omap3-n950-n9.dtsi b/arch/arm/boot/dts/omap3-n950-n9.dtsi
index a00ca761675d..927b17fc4ed8 100644
--- a/arch/arm/boot/dts/omap3-n950-n9.dtsi
+++ b/arch/arm/boot/dts/omap3-n950-n9.dtsi
@@ -97,7 +97,7 @@
 			OMAP3_CORE1_IOPAD(0x217c, PIN_OUTPUT | MUX_MODE1)            /* ssi1_dat_tx */
 			OMAP3_CORE1_IOPAD(0x217e, PIN_OUTPUT | MUX_MODE1)            /* ssi1_flag_tx */
 			OMAP3_CORE1_IOPAD(0x2180, PIN_INPUT_PULLUP | MUX_MODE1)      /* ssi1_rdy_tx */
-			OMAP3_CORE1_IOPAD(0x2182, PIN_INPUT | WAKEUP_EN | MUX_MODE4) /* ssi1_wake_tx (cawake) */
+			OMAP3_CORE1_IOPAD(0x2182, PIN_INPUT | MUX_MODE4)	/* ssi1_wake_tx (cawake) */
 			OMAP3_CORE1_IOPAD(0x2184, PIN_INPUT | MUX_MODE1)             /* ssi1_dat_rx */
 			OMAP3_CORE1_IOPAD(0x2186, PIN_INPUT | MUX_MODE1)             /* ssi1_flag_rx */
 			OMAP3_CORE1_IOPAD(0x2188, PIN_OUTPUT | MUX_MODE1)            /* ssi1_rdy_rx */
@@ -110,7 +110,7 @@
 			OMAP3_CORE1_IOPAD(0x217c, PIN_OUTPUT | MUX_MODE7)            /* ssi1_dat_tx */
 			OMAP3_CORE1_IOPAD(0x217e, PIN_OUTPUT | MUX_MODE7)            /* ssi1_flag_tx */
 			OMAP3_CORE1_IOPAD(0x2180, PIN_INPUT_PULLDOWN | MUX_MODE7)    /* ssi1_rdy_tx */
-			OMAP3_CORE1_IOPAD(0x2182, PIN_INPUT | WAKEUP_EN | MUX_MODE4) /* ssi1_wake_tx (cawake) */
+			OMAP3_CORE1_IOPAD(0x2182, PIN_INPUT | MUX_MODE4)	/* ssi1_wake_tx (cawake) */
 			OMAP3_CORE1_IOPAD(0x2184, PIN_INPUT | MUX_MODE7)             /* ssi1_dat_rx */
 			OMAP3_CORE1_IOPAD(0x2186, PIN_INPUT | MUX_MODE7)             /* ssi1_flag_rx */
 			OMAP3_CORE1_IOPAD(0x2188, PIN_OUTPUT | MUX_MODE4)            /* ssi1_rdy_rx */
@@ -120,7 +120,7 @@
 
 	modem_pins1: pinmux_modem_core1_pins {
 		pinctrl-single,pins = <
-			OMAP3_CORE1_IOPAD(0x207a, PIN_INPUT | WAKEUP_EN | MUX_MODE4) /* gpio_34 (ape_rst_rq) */
+			OMAP3_CORE1_IOPAD(0x207a, PIN_INPUT | MUX_MODE4)	/* gpio_34 (ape_rst_rq) */
 			OMAP3_CORE1_IOPAD(0x2100, PIN_OUTPUT | MUX_MODE4)            /* gpio_88 (cmt_rst_rq) */
 			OMAP3_CORE1_IOPAD(0x210a, PIN_OUTPUT | MUX_MODE4)            /* gpio_93 (cmt_apeslpx) */
 		>;
diff --git a/arch/arm/boot/dts/omap3-zoom3.dts b/arch/arm/boot/dts/omap3-zoom3.dts
index f19170bdcc1f..c29b41dc7b95 100644
--- a/arch/arm/boot/dts/omap3-zoom3.dts
+++ b/arch/arm/boot/dts/omap3-zoom3.dts
@@ -98,7 +98,7 @@
 		pinctrl-single,pins = <
                         OMAP3_CORE1_IOPAD(0x2174, PIN_INPUT_PULLUP | MUX_MODE0)	/* uart2_cts.uart2_cts */
                         OMAP3_CORE1_IOPAD(0x2176, PIN_OUTPUT | MUX_MODE0)		/* uart2_rts.uart2_rts */
-                        OMAP3_CORE1_IOPAD(0x217a, WAKEUP_EN | PIN_INPUT | MUX_MODE0) /* uart2_rx.uart2_rx */
+                        OMAP3_CORE1_IOPAD(0x217a, PIN_INPUT | MUX_MODE0)		/* uart2_rx.uart2_rx */
                         OMAP3_CORE1_IOPAD(0x2178, PIN_OUTPUT | MUX_MODE0)		/* uart2_tx.uart2_tx */
 		>;
 	};
@@ -107,7 +107,7 @@
 		pinctrl-single,pins = <
                         OMAP3_CORE1_IOPAD(0x219a, PIN_INPUT_PULLDOWN | MUX_MODE0)	/* uart3_cts_rctx.uart3_cts_rctx */
                         OMAP3_CORE1_IOPAD(0x219c, PIN_OUTPUT | MUX_MODE0)		/* uart3_rts_sd.uart3_rts_sd */
-                        OMAP3_CORE1_IOPAD(0x219e, WAKEUP_EN | PIN_INPUT | MUX_MODE0) /* uart3_rx_irrx.uart3_rx_irrx */
+                        OMAP3_CORE1_IOPAD(0x219e, PIN_INPUT | MUX_MODE0)		/* uart3_rx_irrx.uart3_rx_irrx */
                         OMAP3_CORE1_IOPAD(0x21a0, PIN_OUTPUT | MUX_MODE0)		/* uart3_tx_irtx.uart3_tx_irtx */
 		>;
 	};
@@ -125,7 +125,7 @@
 		pinctrl-single,pins = <
 			OMAP3630_CORE2_IOPAD(0x25d8, PIN_INPUT_PULLUP | MUX_MODE2)	/* etk_clk.sdmmc3_clk */
 			OMAP3630_CORE2_IOPAD(0x25e4, PIN_INPUT_PULLUP | MUX_MODE2)	/* etk_d4.sdmmc3_dat0 */
-			OMAP3630_CORE2_IOPAD(0x25e6, WAKEUP_EN | PIN_INPUT_PULLUP | MUX_MODE2) /* etk_d5.sdmmc3_dat1 */
+			OMAP3630_CORE2_IOPAD(0x25e6, PIN_INPUT_PULLUP | MUX_MODE2)	/* etk_d5.sdmmc3_dat1 */
 			OMAP3630_CORE2_IOPAD(0x25e8, PIN_INPUT_PULLUP | MUX_MODE2)	/* etk_d6.sdmmc3_dat2 */
 			OMAP3630_CORE2_IOPAD(0x25e2, PIN_INPUT_PULLUP | MUX_MODE2)	/* etk_d3.sdmmc3_dat3 */
 		>;
diff --git a/arch/arm/boot/dts/omap5-board-common.dtsi b/arch/arm/boot/dts/omap5-board-common.dtsi
index dc759a3028b7..5d5b620b7d9b 100644
--- a/arch/arm/boot/dts/omap5-board-common.dtsi
+++ b/arch/arm/boot/dts/omap5-board-common.dtsi
@@ -14,6 +14,29 @@
 		display0 = &hdmi0;
 	};
 
+	vmain: fixedregulator-vmain {
+		compatible = "regulator-fixed";
+		regulator-name = "vmain";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+	};
+
+	vsys_cobra: fixedregulator-vsys_cobra {
+		compatible = "regulator-fixed";
+		regulator-name = "vsys_cobra";
+		vin-supply = <&vmain>;
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+	};
+
+	vdds_1v8_main: fixedregulator-vdds_1v8_main {
+		compatible = "regulator-fixed";
+		regulator-name = "vdds_1v8_main";
+		vin-supply = <&smps7_reg>;
+		regulator-min-microvolt = <1800000>;
+		regulator-max-microvolt = <1800000>;
+	};
+
 	vmmcsd_fixed: fixedregulator-mmcsd {
 		compatible = "regulator-fixed";
 		regulator-name = "vmmcsd_fixed";
@@ -309,7 +332,7 @@
 
 	wlcore_irq_pin: pinmux_wlcore_irq_pin {
 		pinctrl-single,pins = <
-			OMAP5_IOPAD(0x40, WAKEUP_EN | PIN_INPUT_PULLUP | MUX_MODE6)	/* llia_wakereqin.gpio1_wk14 */
+			OMAP5_IOPAD(0x40, PIN_INPUT_PULLUP | MUX_MODE6)	/* llia_wakereqin.gpio1_wk14 */
 		>;
 	};
 };
@@ -409,6 +432,26 @@
 
 			ti,ldo6-vibrator;
 
+			smps123-in-supply = <&vsys_cobra>;
+			smps45-in-supply = <&vsys_cobra>;
+			smps6-in-supply = <&vsys_cobra>;
+			smps7-in-supply = <&vsys_cobra>;
+			smps8-in-supply = <&vsys_cobra>;
+			smps9-in-supply = <&vsys_cobra>;
+			smps10_out2-in-supply = <&vsys_cobra>;
+			smps10_out1-in-supply = <&vsys_cobra>;
+			ldo1-in-supply = <&vsys_cobra>;
+			ldo2-in-supply = <&vsys_cobra>;
+			ldo3-in-supply = <&vdds_1v8_main>;
+			ldo4-in-supply = <&vdds_1v8_main>;
+			ldo5-in-supply = <&vsys_cobra>;
+			ldo6-in-supply = <&vdds_1v8_main>;
+			ldo7-in-supply = <&vsys_cobra>;
+			ldo8-in-supply = <&vsys_cobra>;
+			ldo9-in-supply = <&vmmcsd_fixed>;
+			ldoln-in-supply = <&vsys_cobra>;
+			ldousb-in-supply = <&vsys_cobra>;
+
 			regulators {
 				smps123_reg: smps123 {
 					/* VDD_OPP_MPU */
@@ -600,7 +643,8 @@
 		pinctrl-0 = <&twl6040_pins>;
 
 		interrupts = <GIC_SPI 119 IRQ_TYPE_NONE>; /* IRQ_SYS_2N cascaded to gic */
-		ti,audpwron-gpio = <&gpio5 13 GPIO_ACTIVE_HIGH>;  /* gpio line 141 */
+
+		/* audpwron gpio defined in the board specific dts */
 
 		vio-supply = <&smps7_reg>;
 		v2v1-supply = <&smps9_reg>;
diff --git a/arch/arm/boot/dts/omap5-igep0050.dts b/arch/arm/boot/dts/omap5-igep0050.dts
index 46ecb1dd3b5c..f75ce02fb398 100644
--- a/arch/arm/boot/dts/omap5-igep0050.dts
+++ b/arch/arm/boot/dts/omap5-igep0050.dts
@@ -35,6 +35,22 @@
 	};
 };
 
+/* LDO4 is VPP1 - ball AD9 */
+&ldo4_reg {
+	regulator-min-microvolt = <2000000>;
+	regulator-max-microvolt = <2000000>;
+};
+
+/*
+ * LDO7 is used for HDMI: VDDA_DSIPORTA - ball AA33, VDDA_DSIPORTC - ball AE33,
+ * VDDA_HDMI - ball AN25
+ */
+&ldo7_reg {
+	status = "okay";
+	regulator-min-microvolt = <1800000>;
+	regulator-max-microvolt = <1800000>;
+};
+
 &omap5_pmx_core {
 	i2c4_pins: pinmux_i2c4_pins {
 		pinctrl-single,pins = <
@@ -52,3 +68,13 @@
 		<&gpio7 3 0>;		/* 195, SDA */
 };
 
+&twl6040 {
+	ti,audpwron-gpio = <&gpio5 16 GPIO_ACTIVE_HIGH>;  /* gpio line 144 */
+};
+
+&twl6040_pins {
+	pinctrl-single,pins = <
+		OMAP5_IOPAD(0x1c4, PIN_OUTPUT | MUX_MODE6)	/* mcspi1_somi.gpio5_144 */
+		OMAP5_IOPAD(0x1ca, PIN_OUTPUT | MUX_MODE6)	/* perslimbus2_clock.gpio5_145 */
+	>;
+};
diff --git a/arch/arm/boot/dts/omap5-uevm.dts b/arch/arm/boot/dts/omap5-uevm.dts
index 60b3fbb3bf07..a51e60518eb6 100644
--- a/arch/arm/boot/dts/omap5-uevm.dts
+++ b/arch/arm/boot/dts/omap5-uevm.dts
@@ -51,3 +51,13 @@
 		<&gpio9 1 GPIO_ACTIVE_HIGH>,	/* TCA6424A P00, LS OE */
 		<&gpio7 1 GPIO_ACTIVE_HIGH>;	/* GPIO 193, HPD */
 };
+
+&twl6040 {
+	ti,audpwron-gpio = <&gpio5 13 GPIO_ACTIVE_HIGH>;  /* gpio line 141 */
+};
+
+&twl6040_pins {
+	pinctrl-single,pins = <
+		OMAP5_IOPAD(0x1be, PIN_OUTPUT | MUX_MODE6)	/* mcspi1_somi.gpio5_141 */
+	>;
+};
diff --git a/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts b/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts
index a3601e4c0a2e..b844473601d2 100644
--- a/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts
+++ b/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts
@@ -136,6 +136,7 @@
 &gmac1 {
 	status = "okay";
 	phy-mode = "rgmii";
+	phy-handle = <&phy1>;
 
 	snps,reset-gpio = <&porta 0 GPIO_ACTIVE_LOW>;
 	snps,reset-active-low;
diff --git a/arch/arm/boot/dts/stih407-family.dtsi b/arch/arm/boot/dts/stih407-family.dtsi
index ad8ba10764a3..d294e82447a2 100644
--- a/arch/arm/boot/dts/stih407-family.dtsi
+++ b/arch/arm/boot/dts/stih407-family.dtsi
@@ -24,18 +24,21 @@
 			compatible = "shared-dma-pool";
 			reg = <0x40000000 0x01000000>;
 			no-map;
+			status = "disabled";
 		};
 
 		gp1_reserved: rproc@41000000 {
 			compatible = "shared-dma-pool";
 			reg = <0x41000000 0x01000000>;
 			no-map;
+			status = "disabled";
 		};
 
 		audio_reserved: rproc@42000000 {
 			compatible = "shared-dma-pool";
 			reg = <0x42000000 0x01000000>;
 			no-map;
+			status = "disabled";
 		};
 
 		dmu_reserved: rproc@43000000 {
diff --git a/arch/arm/boot/dts/sun6i-a31s-primo81.dts b/arch/arm/boot/dts/sun6i-a31s-primo81.dts
index 68b479b8772c..73c133f5e79c 100644
--- a/arch/arm/boot/dts/sun6i-a31s-primo81.dts
+++ b/arch/arm/boot/dts/sun6i-a31s-primo81.dts
@@ -176,8 +176,6 @@
 };
 
 &reg_dc1sw {
-	regulator-min-microvolt = <3000000>;
-	regulator-max-microvolt = <3000000>;
 	regulator-name = "vcc-lcd";
 };
 
diff --git a/arch/arm/boot/dts/sun6i-a31s-yones-toptech-bs1078-v2.dts b/arch/arm/boot/dts/sun6i-a31s-yones-toptech-bs1078-v2.dts
index 360adfb1e9ca..d6ad6196a768 100644
--- a/arch/arm/boot/dts/sun6i-a31s-yones-toptech-bs1078-v2.dts
+++ b/arch/arm/boot/dts/sun6i-a31s-yones-toptech-bs1078-v2.dts
@@ -135,8 +135,6 @@
 
 &reg_dc1sw {
 	regulator-name = "vcc-lcd-usb2";
-	regulator-min-microvolt = <3000000>;
-	regulator-max-microvolt = <3000000>;
 };
 
 &reg_dc5ldo {
diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig
index 10f49ab5328e..47195e8690b4 100644
--- a/arch/arm/configs/exynos_defconfig
+++ b/arch/arm/configs/exynos_defconfig
@@ -82,6 +82,7 @@ CONFIG_TOUCHSCREEN_MMS114=y
 CONFIG_INPUT_MISC=y
 CONFIG_INPUT_MAX77693_HAPTIC=y
 CONFIG_INPUT_MAX8997_HAPTIC=y
+CONFIG_KEYBOARD_SAMSUNG=y
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_SAMSUNG=y
 CONFIG_SERIAL_SAMSUNG_CONSOLE=y
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index 8f857564657f..8a5fff1b7f6f 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -264,6 +264,7 @@ CONFIG_KEYBOARD_TEGRA=y
 CONFIG_KEYBOARD_SPEAR=y
 CONFIG_KEYBOARD_ST_KEYSCAN=y
 CONFIG_KEYBOARD_CROS_EC=m
+CONFIG_KEYBOARD_SAMSUNG=m
 CONFIG_MOUSE_PS2_ELANTECH=y
 CONFIG_MOUSE_CYAPA=m
 CONFIG_MOUSE_ELAN_I2C=y
diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h
index 19cfab526d13..20febb368844 100644
--- a/arch/arm/include/asm/pgalloc.h
+++ b/arch/arm/include/asm/pgalloc.h
@@ -29,7 +29,7 @@
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return (pmd_t *)get_zeroed_page(GFP_KERNEL | __GFP_REPEAT);
+	return (pmd_t *)get_zeroed_page(GFP_KERNEL);
 }
 
 static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h
index aeddd28b3595..92fd2c8a9af0 100644
--- a/arch/arm/include/asm/pgtable-2level.h
+++ b/arch/arm/include/asm/pgtable-2level.h
@@ -193,6 +193,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
 
 #define pmd_large(pmd)		(pmd_val(pmd) & 2)
 #define pmd_bad(pmd)		(pmd_val(pmd) & 2)
+#define pmd_present(pmd)	(pmd_val(pmd))
 
 #define copy_pmd(pmdpd,pmdps)		\
 	do {				\
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index fa70db7c714b..2a029bceaf2f 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -211,6 +211,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
 						: !!(pmd_val(pmd) & (val)))
 #define pmd_isclear(pmd, val)	(!(pmd_val(pmd) & (val)))
 
+#define pmd_present(pmd)	(pmd_isset((pmd), L_PMD_SECT_VALID))
 #define pmd_young(pmd)		(pmd_isset((pmd), PMD_SECT_AF))
 #define pte_special(pte)	(pte_isset((pte), L_PTE_SPECIAL))
 static inline pte_t pte_mkspecial(pte_t pte)
@@ -249,10 +250,10 @@ PMD_BIT_FUNC(mkyoung,   |= PMD_SECT_AF);
 #define pfn_pmd(pfn,prot)	(__pmd(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)))
 #define mk_pmd(page,prot)	pfn_pmd(page_to_pfn(page),prot)
 
-/* represent a notpresent pmd by zero, this is used by pmdp_invalidate */
+/* represent a notpresent pmd by faulting entry, this is used by pmdp_invalidate */
 static inline pmd_t pmd_mknotpresent(pmd_t pmd)
 {
-	return __pmd(0);
+	return __pmd(pmd_val(pmd) & ~L_PMD_SECT_VALID);
 }
 
 static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 348caabb7625..d62204060cbe 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -182,7 +182,6 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
 #define pgd_offset_k(addr)	pgd_offset(&init_mm, addr)
 
 #define pmd_none(pmd)		(!pmd_val(pmd))
-#define pmd_present(pmd)	(pmd_val(pmd))
 
 static inline pte_t *pmd_page_vaddr(pmd_t pmd)
 {
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index df90bc59bfce..861521606c6d 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -486,7 +486,7 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = {
 
 static void smp_cross_call(const struct cpumask *target, unsigned int ipinr)
 {
-	trace_ipi_raise(target, ipi_types[ipinr]);
+	trace_ipi_raise_rcuidle(target, ipi_types[ipinr]);
 	__smp_cross_call(target, ipinr);
 }
 
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 893941ec98dc..f1bde7c4e736 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -263,6 +263,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 	kvm_timer_vcpu_terminate(vcpu);
 	kvm_vgic_vcpu_destroy(vcpu);
 	kvm_pmu_vcpu_destroy(vcpu);
+	kvm_vcpu_uninit(vcpu);
 	kmem_cache_free(kvm_vcpu_cache, vcpu);
 }
 
diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig
index e65aa7d11b20..20dcf6e904b2 100644
--- a/arch/arm/mach-exynos/Kconfig
+++ b/arch/arm/mach-exynos/Kconfig
@@ -61,7 +61,6 @@ config ARCH_EXYNOS4
 	select CLKSRC_SAMSUNG_PWM if CPU_EXYNOS4210
 	select CPU_EXYNOS4210
 	select GIC_NON_BANKED
-	select KEYBOARD_SAMSUNG if INPUT_KEYBOARD
 	select MIGHT_HAVE_CACHE_L2X0
 	help
 	  Samsung EXYNOS4 (Cortex-A9) SoC based systems
diff --git a/arch/arm/mach-imx/mach-imx6ul.c b/arch/arm/mach-imx/mach-imx6ul.c
index a38b16b69923..b56de4b8cdf2 100644
--- a/arch/arm/mach-imx/mach-imx6ul.c
+++ b/arch/arm/mach-imx/mach-imx6ul.c
@@ -46,7 +46,7 @@ static int ksz8081_phy_fixup(struct phy_device *dev)
 static void __init imx6ul_enet_phy_init(void)
 {
 	if (IS_BUILTIN(CONFIG_PHYLIB))
-		phy_register_fixup_for_uid(PHY_ID_KSZ8081, 0xffffffff,
+		phy_register_fixup_for_uid(PHY_ID_KSZ8081, MICREL_PHY_ID_MASK,
 					   ksz8081_phy_fixup);
 }
 
diff --git a/arch/arm/mach-omap1/ams-delta-fiq-handler.S b/arch/arm/mach-omap1/ams-delta-fiq-handler.S
index 5d7fb596bf4a..bf608441b357 100644
--- a/arch/arm/mach-omap1/ams-delta-fiq-handler.S
+++ b/arch/arm/mach-omap1/ams-delta-fiq-handler.S
@@ -43,8 +43,8 @@
 #define OTHERS_MASK			(MODEM_IRQ_MASK | HOOK_SWITCH_MASK)
 
 /* IRQ handler register bitmasks */
-#define DEFERRED_FIQ_MASK		(0x1 << (INT_DEFERRED_FIQ % IH2_BASE))
-#define GPIO_BANK1_MASK  		(0x1 << INT_GPIO_BANK1)
+#define DEFERRED_FIQ_MASK		OMAP_IRQ_BIT(INT_DEFERRED_FIQ)
+#define GPIO_BANK1_MASK  		OMAP_IRQ_BIT(INT_GPIO_BANK1)
 
 /* Driver buffer byte offsets */
 #define BUF_MASK			(FIQ_MASK * 4)
@@ -110,7 +110,7 @@ ENTRY(qwerty_fiqin_start)
 	mov r8, #2				@ reset FIQ agreement
 	str r8, [r12, #IRQ_CONTROL_REG_OFFSET]
 
-	cmp r10, #INT_GPIO_BANK1		@ is it GPIO bank interrupt?
+	cmp r10, #(INT_GPIO_BANK1 - NR_IRQS_LEGACY)	@ is it GPIO interrupt?
 	beq gpio				@ yes - process it
 
 	mov r8, #1
diff --git a/arch/arm/mach-omap1/ams-delta-fiq.c b/arch/arm/mach-omap1/ams-delta-fiq.c
index d1f12095f315..ec760ae2f917 100644
--- a/arch/arm/mach-omap1/ams-delta-fiq.c
+++ b/arch/arm/mach-omap1/ams-delta-fiq.c
@@ -109,7 +109,8 @@ void __init ams_delta_init_fiq(void)
 	 * Since no set_type() method is provided by OMAP irq chip,
 	 * switch to edge triggered interrupt type manually.
 	 */
-	offset = IRQ_ILR0_REG_OFFSET + INT_DEFERRED_FIQ * 0x4;
+	offset = IRQ_ILR0_REG_OFFSET +
+			((INT_DEFERRED_FIQ - NR_IRQS_LEGACY) & 0x1f) * 0x4;
 	val = omap_readl(DEFERRED_FIQ_IH_BASE + offset) & ~(1 << 1);
 	omap_writel(val, DEFERRED_FIQ_IH_BASE + offset);
 
@@ -149,7 +150,7 @@ void __init ams_delta_init_fiq(void)
 	/*
 	 * Redirect GPIO interrupts to FIQ
 	 */
-	offset = IRQ_ILR0_REG_OFFSET + INT_GPIO_BANK1 * 0x4;
+	offset = IRQ_ILR0_REG_OFFSET + (INT_GPIO_BANK1 - NR_IRQS_LEGACY) * 0x4;
 	val = omap_readl(OMAP_IH1_BASE + offset) | 1;
 	omap_writel(val, OMAP_IH1_BASE + offset);
 }
diff --git a/arch/arm/mach-omap1/include/mach/ams-delta-fiq.h b/arch/arm/mach-omap1/include/mach/ams-delta-fiq.h
index adb5e7649659..6dfc3e1210a3 100644
--- a/arch/arm/mach-omap1/include/mach/ams-delta-fiq.h
+++ b/arch/arm/mach-omap1/include/mach/ams-delta-fiq.h
@@ -14,6 +14,8 @@
 #ifndef __AMS_DELTA_FIQ_H
 #define __AMS_DELTA_FIQ_H
 
+#include <mach/irqs.h>
+
 /*
  * Interrupt number used for passing control from FIQ to IRQ.
  * IRQ12, described as reserved, has been selected.
diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig
index 0517f0c1581a..1a648e9dfaa0 100644
--- a/arch/arm/mach-omap2/Kconfig
+++ b/arch/arm/mach-omap2/Kconfig
@@ -17,6 +17,7 @@ config ARCH_OMAP3
 	select PM_OPP if PM
 	select PM if CPU_IDLE
 	select SOC_HAS_OMAP2_SDRC
+	select ARM_ERRATA_430973
 
 config ARCH_OMAP4
 	bool "TI OMAP4"
@@ -36,6 +37,7 @@ config ARCH_OMAP4
 	select PM if CPU_IDLE
 	select ARM_ERRATA_754322
 	select ARM_ERRATA_775420
+	select OMAP_INTERCONNECT
 
 config SOC_OMAP5
 	bool "TI OMAP5"
@@ -67,6 +69,8 @@ config SOC_AM43XX
 	select HAVE_ARM_SCU
 	select GENERIC_CLOCKEVENTS_BROADCAST
 	select HAVE_ARM_TWD
+	select ARM_ERRATA_754322
+	select ARM_ERRATA_775420
 
 config SOC_DRA7XX
 	bool "TI DRA7XX"
@@ -240,4 +244,12 @@ endmenu
 
 endif
 
+config OMAP5_ERRATA_801819
+	bool "Errata 801819: An eviction from L1 data cache might stall indefinitely"
+	depends on SOC_OMAP5 || SOC_DRA7XX
+	help
+	  A livelock can occur in the L2 cache arbitration that might prevent
+	  a snoop from completing. Under certain conditions this can cause the
+	  system to deadlock.
+
 endmenu
diff --git a/arch/arm/mach-omap2/omap-secure.h b/arch/arm/mach-omap2/omap-secure.h
index af2851fbcdf0..bae263fba640 100644
--- a/arch/arm/mach-omap2/omap-secure.h
+++ b/arch/arm/mach-omap2/omap-secure.h
@@ -46,6 +46,7 @@
 
 #define OMAP5_DRA7_MON_SET_CNTFRQ_INDEX	0x109
 #define OMAP5_MON_AMBA_IF_INDEX		0x108
+#define OMAP5_DRA7_MON_SET_ACR_INDEX	0x107
 
 /* Secure PPA(Primary Protected Application) APIs */
 #define OMAP4_PPA_L2_POR_INDEX		0x23
diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c
index c625cc10d9f9..8cd1de914ee4 100644
--- a/arch/arm/mach-omap2/omap-smp.c
+++ b/arch/arm/mach-omap2/omap-smp.c
@@ -50,6 +50,39 @@ void __iomem *omap4_get_scu_base(void)
 	return scu_base;
 }
 
+#ifdef CONFIG_OMAP5_ERRATA_801819
+void omap5_erratum_workaround_801819(void)
+{
+	u32 acr, revidr;
+	u32 acr_mask;
+
+	/* REVIDR[3] indicates erratum fix available on silicon */
+	asm volatile ("mrc p15, 0, %0, c0, c0, 6" : "=r" (revidr));
+	if (revidr & (0x1 << 3))
+		return;
+
+	asm volatile ("mrc p15, 0, %0, c1, c0, 1" : "=r" (acr));
+	/*
+	 * BIT(27) - Disables streaming. All write-allocate lines allocate in
+	 * the L1 or L2 cache.
+	 * BIT(25) - Disables streaming. All write-allocate lines allocate in
+	 * the L1 cache.
+	 */
+	acr_mask = (0x3 << 25) | (0x3 << 27);
+	/* do we already have it done.. if yes, skip expensive smc */
+	if ((acr & acr_mask) == acr_mask)
+		return;
+
+	acr |= acr_mask;
+	omap_smc1(OMAP5_DRA7_MON_SET_ACR_INDEX, acr);
+
+	pr_debug("%s: ARM erratum workaround 801819 applied on CPU%d\n",
+		 __func__, smp_processor_id());
+}
+#else
+static inline void omap5_erratum_workaround_801819(void) { }
+#endif
+
 static void omap4_secondary_init(unsigned int cpu)
 {
 	/*
@@ -64,12 +97,15 @@ static void omap4_secondary_init(unsigned int cpu)
 		omap_secure_dispatcher(OMAP4_PPA_CPU_ACTRL_SMP_INDEX,
 							4, 0, 0, 0, 0, 0);
 
-	/*
-	 * Configure the CNTFRQ register for the secondary cpu's which
-	 * indicates the frequency of the cpu local timers.
-	 */
-	if (soc_is_omap54xx() || soc_is_dra7xx())
+	if (soc_is_omap54xx() || soc_is_dra7xx()) {
+		/*
+		 * Configure the CNTFRQ register for the secondary cpu's which
+		 * indicates the frequency of the cpu local timers.
+		 */
 		set_cntfreq();
+		/* Configure ACR to disable streaming WA for 801819 */
+		omap5_erratum_workaround_801819();
+	}
 
 	/*
 	 * Synchronise with the boot thread.
@@ -218,6 +254,8 @@ static void __init omap4_smp_prepare_cpus(unsigned int max_cpus)
 
 	if (cpu_is_omap446x())
 		startup_addr = omap4460_secondary_startup;
+	if (soc_is_dra74x() || soc_is_omap54xx())
+		omap5_erratum_workaround_801819();
 
 	/*
 	 * Write the address of secondary startup routine into the
diff --git a/arch/arm/mach-omap2/powerdomain.c b/arch/arm/mach-omap2/powerdomain.c
index 78af6d8cf2e2..daf2753de7aa 100644
--- a/arch/arm/mach-omap2/powerdomain.c
+++ b/arch/arm/mach-omap2/powerdomain.c
@@ -186,8 +186,9 @@ static int _pwrdm_state_switch(struct powerdomain *pwrdm, int flag)
 			trace_state = (PWRDM_TRACE_STATES_FLAG |
 				       ((next & OMAP_POWERSTATE_MASK) << 8) |
 				       ((prev & OMAP_POWERSTATE_MASK) << 0));
-			trace_power_domain_target(pwrdm->name, trace_state,
-						  smp_processor_id());
+			trace_power_domain_target_rcuidle(pwrdm->name,
+							  trace_state,
+							  smp_processor_id());
 		}
 		break;
 	default:
@@ -523,8 +524,8 @@ int pwrdm_set_next_pwrst(struct powerdomain *pwrdm, u8 pwrst)
 
 	if (arch_pwrdm && arch_pwrdm->pwrdm_set_next_pwrst) {
 		/* Trace the pwrdm desired target state */
-		trace_power_domain_target(pwrdm->name, pwrst,
-					  smp_processor_id());
+		trace_power_domain_target_rcuidle(pwrdm->name, pwrst,
+						  smp_processor_id());
 		/* Program the pwrdm desired target state */
 		ret = arch_pwrdm->pwrdm_set_next_pwrst(pwrdm, pwrst);
 	}
diff --git a/arch/arm/mach-omap2/powerdomains7xx_data.c b/arch/arm/mach-omap2/powerdomains7xx_data.c
index 0ec2d00f4237..eb350a673133 100644
--- a/arch/arm/mach-omap2/powerdomains7xx_data.c
+++ b/arch/arm/mach-omap2/powerdomains7xx_data.c
@@ -36,14 +36,7 @@ static struct powerdomain iva_7xx_pwrdm = {
 	.prcm_offs	  = DRA7XX_PRM_IVA_INST,
 	.prcm_partition	  = DRA7XX_PRM_PARTITION,
 	.pwrsts		  = PWRSTS_OFF_ON,
-	.pwrsts_logic_ret = PWRSTS_OFF,
 	.banks		  = 4,
-	.pwrsts_mem_ret	= {
-		[0] = PWRSTS_OFF_RET,	/* hwa_mem */
-		[1] = PWRSTS_OFF_RET,	/* sl2_mem */
-		[2] = PWRSTS_OFF_RET,	/* tcm1_mem */
-		[3] = PWRSTS_OFF_RET,	/* tcm2_mem */
-	},
 	.pwrsts_mem_on	= {
 		[0] = PWRSTS_ON,	/* hwa_mem */
 		[1] = PWRSTS_ON,	/* sl2_mem */
@@ -76,12 +69,7 @@ static struct powerdomain ipu_7xx_pwrdm = {
 	.prcm_offs	  = DRA7XX_PRM_IPU_INST,
 	.prcm_partition	  = DRA7XX_PRM_PARTITION,
 	.pwrsts		  = PWRSTS_OFF_ON,
-	.pwrsts_logic_ret = PWRSTS_OFF,
 	.banks		  = 2,
-	.pwrsts_mem_ret	= {
-		[0] = PWRSTS_OFF_RET,	/* aessmem */
-		[1] = PWRSTS_OFF_RET,	/* periphmem */
-	},
 	.pwrsts_mem_on	= {
 		[0] = PWRSTS_ON,	/* aessmem */
 		[1] = PWRSTS_ON,	/* periphmem */
@@ -95,11 +83,7 @@ static struct powerdomain dss_7xx_pwrdm = {
 	.prcm_offs	  = DRA7XX_PRM_DSS_INST,
 	.prcm_partition	  = DRA7XX_PRM_PARTITION,
 	.pwrsts		  = PWRSTS_OFF_ON,
-	.pwrsts_logic_ret = PWRSTS_OFF,
 	.banks		  = 1,
-	.pwrsts_mem_ret	= {
-		[0] = PWRSTS_OFF_RET,	/* dss_mem */
-	},
 	.pwrsts_mem_on	= {
 		[0] = PWRSTS_ON,	/* dss_mem */
 	},
@@ -111,13 +95,8 @@ static struct powerdomain l4per_7xx_pwrdm = {
 	.name		  = "l4per_pwrdm",
 	.prcm_offs	  = DRA7XX_PRM_L4PER_INST,
 	.prcm_partition	  = DRA7XX_PRM_PARTITION,
-	.pwrsts		  = PWRSTS_RET_ON,
-	.pwrsts_logic_ret = PWRSTS_RET,
+	.pwrsts		  = PWRSTS_ON,
 	.banks		  = 2,
-	.pwrsts_mem_ret	= {
-		[0] = PWRSTS_OFF_RET,	/* nonretained_bank */
-		[1] = PWRSTS_OFF_RET,	/* retained_bank */
-	},
 	.pwrsts_mem_on	= {
 		[0] = PWRSTS_ON,	/* nonretained_bank */
 		[1] = PWRSTS_ON,	/* retained_bank */
@@ -132,9 +111,6 @@ static struct powerdomain gpu_7xx_pwrdm = {
 	.prcm_partition	  = DRA7XX_PRM_PARTITION,
 	.pwrsts		  = PWRSTS_OFF_ON,
 	.banks		  = 1,
-	.pwrsts_mem_ret	= {
-		[0] = PWRSTS_OFF_RET,	/* gpu_mem */
-	},
 	.pwrsts_mem_on	= {
 		[0] = PWRSTS_ON,	/* gpu_mem */
 	},
@@ -148,8 +124,6 @@ static struct powerdomain wkupaon_7xx_pwrdm = {
 	.prcm_partition	  = DRA7XX_PRM_PARTITION,
 	.pwrsts		  = PWRSTS_ON,
 	.banks		  = 1,
-	.pwrsts_mem_ret	= {
-	},
 	.pwrsts_mem_on	= {
 		[0] = PWRSTS_ON,	/* wkup_bank */
 	},
@@ -161,15 +135,7 @@ static struct powerdomain core_7xx_pwrdm = {
 	.prcm_offs	  = DRA7XX_PRM_CORE_INST,
 	.prcm_partition	  = DRA7XX_PRM_PARTITION,
 	.pwrsts		  = PWRSTS_ON,
-	.pwrsts_logic_ret = PWRSTS_RET,
 	.banks		  = 5,
-	.pwrsts_mem_ret	= {
-		[0] = PWRSTS_OFF_RET,	/* core_nret_bank */
-		[1] = PWRSTS_OFF_RET,	/* core_ocmram */
-		[2] = PWRSTS_OFF_RET,	/* core_other_bank */
-		[3] = PWRSTS_OFF_RET,	/* ipu_l2ram */
-		[4] = PWRSTS_OFF_RET,	/* ipu_unicache */
-	},
 	.pwrsts_mem_on	= {
 		[0] = PWRSTS_ON,	/* core_nret_bank */
 		[1] = PWRSTS_ON,	/* core_ocmram */
@@ -226,11 +192,7 @@ static struct powerdomain vpe_7xx_pwrdm = {
 	.prcm_offs	  = DRA7XX_PRM_VPE_INST,
 	.prcm_partition	  = DRA7XX_PRM_PARTITION,
 	.pwrsts		  = PWRSTS_OFF_ON,
-	.pwrsts_logic_ret = PWRSTS_OFF,
 	.banks		  = 1,
-	.pwrsts_mem_ret	= {
-		[0] = PWRSTS_OFF_RET,	/* vpe_bank */
-	},
 	.pwrsts_mem_on	= {
 		[0] = PWRSTS_ON,	/* vpe_bank */
 	},
@@ -260,14 +222,8 @@ static struct powerdomain l3init_7xx_pwrdm = {
 	.name		  = "l3init_pwrdm",
 	.prcm_offs	  = DRA7XX_PRM_L3INIT_INST,
 	.prcm_partition	  = DRA7XX_PRM_PARTITION,
-	.pwrsts		  = PWRSTS_RET_ON,
-	.pwrsts_logic_ret = PWRSTS_RET,
+	.pwrsts		  = PWRSTS_ON,
 	.banks		  = 3,
-	.pwrsts_mem_ret	= {
-		[0] = PWRSTS_OFF_RET,	/* gmac_bank */
-		[1] = PWRSTS_OFF_RET,	/* l3init_bank1 */
-		[2] = PWRSTS_OFF_RET,	/* l3init_bank2 */
-	},
 	.pwrsts_mem_on	= {
 		[0] = PWRSTS_ON,	/* gmac_bank */
 		[1] = PWRSTS_ON,	/* l3init_bank1 */
@@ -283,9 +239,6 @@ static struct powerdomain eve3_7xx_pwrdm = {
 	.prcm_partition	  = DRA7XX_PRM_PARTITION,
 	.pwrsts		  = PWRSTS_OFF_ON,
 	.banks		  = 1,
-	.pwrsts_mem_ret	= {
-		[0] = PWRSTS_OFF_RET,	/* eve3_bank */
-	},
 	.pwrsts_mem_on	= {
 		[0] = PWRSTS_ON,	/* eve3_bank */
 	},
@@ -299,9 +252,6 @@ static struct powerdomain emu_7xx_pwrdm = {
 	.prcm_partition	  = DRA7XX_PRM_PARTITION,
 	.pwrsts		  = PWRSTS_OFF_ON,
 	.banks		  = 1,
-	.pwrsts_mem_ret	= {
-		[0] = PWRSTS_OFF_RET,	/* emu_bank */
-	},
 	.pwrsts_mem_on	= {
 		[0] = PWRSTS_ON,	/* emu_bank */
 	},
@@ -314,11 +264,6 @@ static struct powerdomain dsp2_7xx_pwrdm = {
 	.prcm_partition	  = DRA7XX_PRM_PARTITION,
 	.pwrsts		  = PWRSTS_OFF_ON,
 	.banks		  = 3,
-	.pwrsts_mem_ret	= {
-		[0] = PWRSTS_OFF_RET,	/* dsp2_edma */
-		[1] = PWRSTS_OFF_RET,	/* dsp2_l1 */
-		[2] = PWRSTS_OFF_RET,	/* dsp2_l2 */
-	},
 	.pwrsts_mem_on	= {
 		[0] = PWRSTS_ON,	/* dsp2_edma */
 		[1] = PWRSTS_ON,	/* dsp2_l1 */
@@ -334,11 +279,6 @@ static struct powerdomain dsp1_7xx_pwrdm = {
 	.prcm_partition	  = DRA7XX_PRM_PARTITION,
 	.pwrsts		  = PWRSTS_OFF_ON,
 	.banks		  = 3,
-	.pwrsts_mem_ret	= {
-		[0] = PWRSTS_OFF_RET,	/* dsp1_edma */
-		[1] = PWRSTS_OFF_RET,	/* dsp1_l1 */
-		[2] = PWRSTS_OFF_RET,	/* dsp1_l2 */
-	},
 	.pwrsts_mem_on	= {
 		[0] = PWRSTS_ON,	/* dsp1_edma */
 		[1] = PWRSTS_ON,	/* dsp1_l1 */
@@ -354,9 +294,6 @@ static struct powerdomain cam_7xx_pwrdm = {
 	.prcm_partition	  = DRA7XX_PRM_PARTITION,
 	.pwrsts		  = PWRSTS_OFF_ON,
 	.banks		  = 1,
-	.pwrsts_mem_ret	= {
-		[0] = PWRSTS_OFF_RET,	/* vip_bank */
-	},
 	.pwrsts_mem_on	= {
 		[0] = PWRSTS_ON,	/* vip_bank */
 	},
@@ -370,9 +307,6 @@ static struct powerdomain eve4_7xx_pwrdm = {
 	.prcm_partition	  = DRA7XX_PRM_PARTITION,
 	.pwrsts		  = PWRSTS_OFF_ON,
 	.banks		  = 1,
-	.pwrsts_mem_ret	= {
-		[0] = PWRSTS_OFF_RET,	/* eve4_bank */
-	},
 	.pwrsts_mem_on	= {
 		[0] = PWRSTS_ON,	/* eve4_bank */
 	},
@@ -386,9 +320,6 @@ static struct powerdomain eve2_7xx_pwrdm = {
 	.prcm_partition	  = DRA7XX_PRM_PARTITION,
 	.pwrsts		  = PWRSTS_OFF_ON,
 	.banks		  = 1,
-	.pwrsts_mem_ret	= {
-		[0] = PWRSTS_OFF_RET,	/* eve2_bank */
-	},
 	.pwrsts_mem_on	= {
 		[0] = PWRSTS_ON,	/* eve2_bank */
 	},
@@ -402,9 +333,6 @@ static struct powerdomain eve1_7xx_pwrdm = {
 	.prcm_partition	  = DRA7XX_PRM_PARTITION,
 	.pwrsts		  = PWRSTS_OFF_ON,
 	.banks		  = 1,
-	.pwrsts_mem_ret	= {
-		[0] = PWRSTS_OFF_RET,	/* eve1_bank */
-	},
 	.pwrsts_mem_on	= {
 		[0] = PWRSTS_ON,	/* eve1_bank */
 	},
diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c
index 5b385bb8aff9..cb9497a20fb3 100644
--- a/arch/arm/mach-omap2/timer.c
+++ b/arch/arm/mach-omap2/timer.c
@@ -496,8 +496,7 @@ void __init omap_init_time(void)
 	__omap_sync32k_timer_init(1, "timer_32k_ck", "ti,timer-alwon",
 			2, "timer_sys_ck", NULL, false);
 
-	if (of_have_populated_dt())
-		clocksource_probe();
+	clocksource_probe();
 }
 
 #if defined(CONFIG_ARCH_OMAP3) || defined(CONFIG_SOC_AM43XX)
@@ -505,6 +504,8 @@ void __init omap3_secure_sync32k_timer_init(void)
 {
 	__omap_sync32k_timer_init(12, "secure_32k_fck", "ti,timer-secure",
 			2, "timer_sys_ck", NULL, false);
+
+	clocksource_probe();
 }
 #endif /* CONFIG_ARCH_OMAP3 */
 
@@ -513,6 +514,8 @@ void __init omap3_gptimer_timer_init(void)
 {
 	__omap_sync32k_timer_init(2, "timer_sys_ck", NULL,
 			1, "timer_sys_ck", "ti,timer-alwon", true);
+
+	clocksource_probe();
 }
 #endif
 
diff --git a/arch/arm/plat-samsung/devs.c b/arch/arm/plat-samsung/devs.c
index 84baa16f4c0b..e93aa6734147 100644
--- a/arch/arm/plat-samsung/devs.c
+++ b/arch/arm/plat-samsung/devs.c
@@ -68,7 +68,7 @@
 #include <linux/platform_data/asoc-s3c.h>
 #include <linux/platform_data/spi-s3c64xx.h>
 
-static u64 samsung_device_dma_mask = DMA_BIT_MASK(32);
+#define samsung_device_dma_mask (*((u64[]) { DMA_BIT_MASK(32) }))
 
 /* AC97 */
 #ifdef CONFIG_CPU_S3C2440
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 7085e322dc42..648a32c89541 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -95,7 +95,7 @@ boot := arch/arm64/boot
 Image: vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
 
-Image.%: vmlinux
+Image.%: Image
 	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
 
 zinstall install:
diff --git a/arch/arm64/boot/dts/lg/lg1312.dtsi b/arch/arm64/boot/dts/lg/lg1312.dtsi
index 3a4e9a2ab313..fbafa24cd533 100644
--- a/arch/arm64/boot/dts/lg/lg1312.dtsi
+++ b/arch/arm64/boot/dts/lg/lg1312.dtsi
@@ -125,7 +125,7 @@
 		#size-cells = <1>;
 		#interrupts-cells = <3>;
 
-		compatible = "arm,amba-bus";
+		compatible = "simple-bus";
 		interrupt-parent = <&gic>;
 		ranges;
 
diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
index 46f325a143b0..d7f8e06910bc 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
@@ -163,7 +163,7 @@
 	};
 
 	amba {
-		compatible = "arm,amba-bus";
+		compatible = "simple-bus";
 		#address-cells = <2>;
 		#size-cells = <2>;
 		ranges;
diff --git a/arch/arm64/include/asm/kgdb.h b/arch/arm64/include/asm/kgdb.h
index f69f69c8120c..da84645525b9 100644
--- a/arch/arm64/include/asm/kgdb.h
+++ b/arch/arm64/include/asm/kgdb.h
@@ -38,25 +38,54 @@ extern int kgdb_fault_expected;
 #endif /* !__ASSEMBLY__ */
 
 /*
- * gdb is expecting the following registers layout.
+ * gdb remote procotol (well most versions of it) expects the following
+ * register layout.
  *
  * General purpose regs:
  *     r0-r30: 64 bit
  *     sp,pc : 64 bit
- *     pstate  : 64 bit
- *     Total: 34
+ *     pstate  : 32 bit
+ *     Total: 33 + 1
  * FPU regs:
  *     f0-f31: 128 bit
- *     Total: 32
- * Extra regs
  *     fpsr & fpcr: 32 bit
- *     Total: 2
+ *     Total: 32 + 2
  *
+ * To expand a little on the "most versions of it"... when the gdb remote
+ * protocol for AArch64 was developed it depended on a statement in the
+ * Architecture Reference Manual that claimed "SPSR_ELx is a 32-bit register".
+ * and, as a result, allocated only 32-bits for the PSTATE in the remote
+ * protocol. In fact this statement is still present in ARM DDI 0487A.i.
+ *
+ * Unfortunately "is a 32-bit register" has a very special meaning for
+ * system registers. It means that "the upper bits, bits[63:32], are
+ * RES0.". RES0 is heavily used in the ARM architecture documents as a
+ * way to leave space for future architecture changes. So to translate a
+ * little for people who don't spend their spare time reading ARM architecture
+ * manuals, what "is a 32-bit register" actually means in this context is
+ * "is a 64-bit register but one with no meaning allocated to any of the
+ * upper 32-bits... *yet*".
+ *
+ * Perhaps then we should not be surprised that this has led to some
+ * confusion. Specifically a patch, influenced by the above translation,
+ * that extended PSTATE to 64-bit was accepted into gdb-7.7 but the patch
+ * was reverted in gdb-7.8.1 and all later releases, when this was
+ * discovered to be an undocumented protocol change.
+ *
+ * So... it is *not* wrong for us to only allocate 32-bits to PSTATE
+ * here even though the kernel itself allocates 64-bits for the same
+ * state. That is because this bit of code tells the kernel how the gdb
+ * remote protocol (well most versions of it) describes the register state.
+ *
+ * Note that if you are using one of the versions of gdb that supports
+ * the gdb-7.7 version of the protocol you cannot use kgdb directly
+ * without providing a custom register description (gdb can load new
+ * protocol descriptions at runtime).
  */
 
-#define _GP_REGS		34
+#define _GP_REGS		33
 #define _FP_REGS		32
-#define _EXTRA_REGS		2
+#define _EXTRA_REGS		3
 /*
  * general purpose registers size in bytes.
  * pstate is only 4 bytes. subtract 4 bytes
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index ff98585d085a..d25f4f137c2a 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -26,7 +26,7 @@
 
 #define check_pgt_cache()		do { } while (0)
 
-#define PGALLOC_GFP	(GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO)
+#define PGALLOC_GFP	(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO)
 #define PGD_SIZE	(PTRS_PER_PGD * sizeof(pgd_t))
 
 #if CONFIG_PGTABLE_LEVELS > 2
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index 433e50405274..022644704a93 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -124,6 +124,18 @@ static inline void cpu_panic_kernel(void)
 	cpu_park_loop();
 }
 
+/*
+ * If a secondary CPU enters the kernel but fails to come online,
+ * (e.g. due to mismatched features), and cannot exit the kernel,
+ * we increment cpus_stuck_in_kernel and leave the CPU in a
+ * quiesecent loop within the kernel text. The memory containing
+ * this loop must not be re-used for anything else as the 'stuck'
+ * core is executing it.
+ *
+ * This function is used to inhibit features like kexec and hibernate.
+ */
+bool cpus_are_stuck_in_kernel(void);
+
 #endif /* ifndef __ASSEMBLY__ */
 
 #endif /* ifndef __ASM_SMP_H */
diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h
index fc9682bfe002..e875a5a551d7 100644
--- a/arch/arm64/include/asm/spinlock.h
+++ b/arch/arm64/include/asm/spinlock.h
@@ -30,22 +30,53 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
 {
 	unsigned int tmp;
 	arch_spinlock_t lockval;
+	u32 owner;
+
+	/*
+	 * Ensure prior spin_lock operations to other locks have completed
+	 * on this CPU before we test whether "lock" is locked.
+	 */
+	smp_mb();
+	owner = READ_ONCE(lock->owner) << 16;
 
 	asm volatile(
 "	sevl\n"
 "1:	wfe\n"
 "2:	ldaxr	%w0, %2\n"
+	/* Is the lock free? */
 "	eor	%w1, %w0, %w0, ror #16\n"
-"	cbnz	%w1, 1b\n"
+"	cbz	%w1, 3f\n"
+	/* Lock taken -- has there been a subsequent unlock->lock transition? */
+"	eor	%w1, %w3, %w0, lsl #16\n"
+"	cbz	%w1, 1b\n"
+	/*
+	 * The owner has been updated, so there was an unlock->lock
+	 * transition that we missed. That means we can rely on the
+	 * store-release of the unlock operation paired with the
+	 * load-acquire of the lock operation to publish any of our
+	 * previous stores to the new lock owner and therefore don't
+	 * need to bother with the writeback below.
+	 */
+"	b	4f\n"
+"3:\n"
+	/*
+	 * Serialise against any concurrent lockers by writing back the
+	 * unlocked lock value
+	 */
 	ARM64_LSE_ATOMIC_INSN(
 	/* LL/SC */
 "	stxr	%w1, %w0, %2\n"
-"	cbnz	%w1, 2b\n", /* Serialise against any concurrent lockers */
-	/* LSE atomics */
 "	nop\n"
-"	nop\n")
+"	nop\n",
+	/* LSE atomics */
+"	mov	%w1, %w0\n"
+"	cas	%w0, %w0, %2\n"
+"	eor	%w1, %w1, %w0\n")
+	/* Somebody else wrote to the lock, GOTO 10 and reload the value */
+"	cbnz	%w1, 2b\n"
+"4:"
 	: "=&r" (lockval), "=&r" (tmp), "+Q" (*lock)
-	:
+	: "r" (owner)
 	: "memory");
 }
 
@@ -148,6 +179,7 @@ static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
 
 static inline int arch_spin_is_locked(arch_spinlock_t *lock)
 {
+	smp_mb(); /* See arch_spin_unlock_wait */
 	return !arch_spin_value_unlocked(READ_ONCE(*lock));
 }
 
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index f8df75d740f4..21ab5df9fa76 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -33,6 +33,7 @@
 #include <asm/pgtable.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/sections.h>
+#include <asm/smp.h>
 #include <asm/suspend.h>
 #include <asm/virt.h>
 
@@ -236,6 +237,11 @@ int swsusp_arch_suspend(void)
 	unsigned long flags;
 	struct sleep_stack_data state;
 
+	if (cpus_are_stuck_in_kernel()) {
+		pr_err("Can't hibernate: no mechanism to offline secondary CPUs.\n");
+		return -EBUSY;
+	}
+
 	local_dbg_save(flags);
 
 	if (__cpu_suspend_enter(&state)) {
diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
index b67531a13136..b5f063e5eff7 100644
--- a/arch/arm64/kernel/kgdb.c
+++ b/arch/arm64/kernel/kgdb.c
@@ -58,7 +58,17 @@ struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
 	{ "x30", 8, offsetof(struct pt_regs, regs[30])},
 	{ "sp", 8, offsetof(struct pt_regs, sp)},
 	{ "pc", 8, offsetof(struct pt_regs, pc)},
-	{ "pstate", 8, offsetof(struct pt_regs, pstate)},
+	/*
+	 * struct pt_regs thinks PSTATE is 64-bits wide but gdb remote
+	 * protocol disagrees. Therefore we must extract only the lower
+	 * 32-bits. Look for the big comment in asm/kgdb.h for more
+	 * detail.
+	 */
+	{ "pstate", 4, offsetof(struct pt_regs, pstate)
+#ifdef CONFIG_CPU_BIG_ENDIAN
+							+ 4
+#endif
+	},
 	{ "v0", 16, -1 },
 	{ "v1", 16, -1 },
 	{ "v2", 16, -1 },
@@ -128,6 +138,8 @@ sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task)
 	memset((char *)gdb_regs, 0, NUMREGBYTES);
 	thread_regs = task_pt_regs(task);
 	memcpy((void *)gdb_regs, (void *)thread_regs->regs, GP_REG_BYTES);
+	/* Special case for PSTATE (check comments in asm/kgdb.h for details) */
+	dbg_get_reg(33, gdb_regs + GP_REG_BYTES, thread_regs);
 }
 
 void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc)
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 678e0842cb3b..62ff3c0622e2 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -909,3 +909,21 @@ int setup_profiling_timer(unsigned int multiplier)
 {
 	return -EINVAL;
 }
+
+static bool have_cpu_die(void)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+	int any_cpu = raw_smp_processor_id();
+
+	if (cpu_ops[any_cpu]->cpu_die)
+		return true;
+#endif
+	return false;
+}
+
+bool cpus_are_stuck_in_kernel(void)
+{
+	bool smp_spin_tables = (num_possible_cpus() > 1 && !have_cpu_die());
+
+	return !!cpus_stuck_in_kernel || smp_spin_tables;
+}
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index f7cf463107df..2a43012616b7 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -64,8 +64,7 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom,
 
 	/*
 	 * We need to switch to kernel mode so that we can use __get_user
-	 * to safely read from kernel space.  Note that we now dump the
-	 * code first, just in case the backtrace kills us.
+	 * to safely read from kernel space.
 	 */
 	fs = get_fs();
 	set_fs(KERNEL_DS);
@@ -111,21 +110,12 @@ static void dump_backtrace_entry(unsigned long where)
 	print_ip_sym(where);
 }
 
-static void dump_instr(const char *lvl, struct pt_regs *regs)
+static void __dump_instr(const char *lvl, struct pt_regs *regs)
 {
 	unsigned long addr = instruction_pointer(regs);
-	mm_segment_t fs;
 	char str[sizeof("00000000 ") * 5 + 2 + 1], *p = str;
 	int i;
 
-	/*
-	 * We need to switch to kernel mode so that we can use __get_user
-	 * to safely read from kernel space.  Note that we now dump the
-	 * code first, just in case the backtrace kills us.
-	 */
-	fs = get_fs();
-	set_fs(KERNEL_DS);
-
 	for (i = -4; i < 1; i++) {
 		unsigned int val, bad;
 
@@ -139,8 +129,18 @@ static void dump_instr(const char *lvl, struct pt_regs *regs)
 		}
 	}
 	printk("%sCode: %s\n", lvl, str);
+}
 
-	set_fs(fs);
+static void dump_instr(const char *lvl, struct pt_regs *regs)
+{
+	if (!user_mode(regs)) {
+		mm_segment_t fs = get_fs();
+		set_fs(KERNEL_DS);
+		__dump_instr(lvl, regs);
+		set_fs(fs);
+	} else {
+		__dump_instr(lvl, regs);
+	}
 }
 
 static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index b7b397802088..efcf1f7ef1e4 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -179,7 +179,7 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu)
 						 &asid_generation);
 	flush_context(cpu);
 
-	/* We have at least 1 ASID per CPU, so this will always succeed */
+	/* We have more ASIDs than CPUs, so this will always succeed */
 	asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1);
 
 set_asid:
@@ -227,8 +227,11 @@ switch_mm_fastpath:
 static int asids_init(void)
 {
 	asid_bits = get_cpu_asid_bits();
-	/* If we end up with more CPUs than ASIDs, expect things to crash */
-	WARN_ON(NUM_USER_ASIDS < num_possible_cpus());
+	/*
+	 * Expect allocation after rollover to fail if we don't have at least
+	 * one more ASID than CPUs. ASID #0 is reserved for init_mm.
+	 */
+	WARN_ON(NUM_USER_ASIDS - 1 <= num_possible_cpus());
 	atomic64_set(&asid_generation, ASID_FIRST_VERSION);
 	asid_map = kzalloc(BITS_TO_LONGS(NUM_USER_ASIDS) * sizeof(*asid_map),
 			   GFP_KERNEL);
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 5954881a35ac..013e2cbe7924 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -109,7 +109,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
 	 * PTE_RDONLY is cleared by default in the asm below, so set it in
 	 * back if necessary (read-only or clean PTE).
 	 */
-	if (!pte_write(entry) || !dirty)
+	if (!pte_write(entry) || !pte_sw_dirty(entry))
 		pte_val(entry) |= PTE_RDONLY;
 
 	/*
@@ -441,7 +441,7 @@ static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs)
 	return 1;
 }
 
-static struct fault_info {
+static const struct fault_info {
 	int	(*fn)(unsigned long addr, unsigned int esr, struct pt_regs *regs);
 	int	sig;
 	int	code;
diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index dbd12ea8ce68..43a76b07eb32 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -71,10 +71,6 @@ void __sync_icache_dcache(pte_t pte, unsigned long addr)
 {
 	struct page *page = pte_page(pte);
 
-	/* no flushing needed for anonymous pages */
-	if (!page_mapping(page))
-		return;
-
 	if (!test_and_set_bit(PG_dcache_clean, &page->flags))
 		sync_icache_aliases(page_address(page),
 				    PAGE_SIZE << compound_order(page));
diff --git a/arch/avr32/include/asm/pgalloc.h b/arch/avr32/include/asm/pgalloc.h
index 1aba19d68c5e..db039cb368be 100644
--- a/arch/avr32/include/asm/pgalloc.h
+++ b/arch/avr32/include/asm/pgalloc.h
@@ -43,7 +43,7 @@ static inline void pgd_ctor(void *x)
  */
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
-	return quicklist_alloc(QUICK_PGD, GFP_KERNEL | __GFP_REPEAT, pgd_ctor);
+	return quicklist_alloc(QUICK_PGD, GFP_KERNEL, pgd_ctor);
 }
 
 static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
@@ -54,7 +54,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 					  unsigned long address)
 {
-	return quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL);
+	return quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL);
 }
 
 static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
@@ -63,7 +63,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
 	struct page *page;
 	void *pg;
 
-	pg = quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL);
+	pg = quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL);
 	if (!pg)
 		return NULL;
 
diff --git a/arch/cris/include/asm/pgalloc.h b/arch/cris/include/asm/pgalloc.h
index 235ece437ddd..42f1affb9c2d 100644
--- a/arch/cris/include/asm/pgalloc.h
+++ b/arch/cris/include/asm/pgalloc.h
@@ -24,14 +24,14 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 {
-  	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
  	return pte;
 }
 
 static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
 	struct page *pte;
-	pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
+	pte = alloc_pages(GFP_KERNEL|__GFP_ZERO, 0);
 	if (!pte)
 		return NULL;
 	if (!pgtable_page_ctor(pte)) {
diff --git a/arch/frv/mm/pgalloc.c b/arch/frv/mm/pgalloc.c
index 41907d25ed38..c9ed14f6c67d 100644
--- a/arch/frv/mm/pgalloc.c
+++ b/arch/frv/mm/pgalloc.c
@@ -22,7 +22,7 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((aligned(PAGE_SIZE)));
 
 pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 {
-	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
+	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL);
 	if (pte)
 		clear_page(pte);
 	return pte;
@@ -33,9 +33,9 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
 	struct page *page;
 
 #ifdef CONFIG_HIGHPTE
-	page = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT, 0);
+	page = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM, 0);
 #else
-	page = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0);
+	page = alloc_pages(GFP_KERNEL, 0);
 #endif
 	if (!page)
 		return NULL;
diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h
index 77da3b0ae3c2..eeebf862c46c 100644
--- a/arch/hexagon/include/asm/pgalloc.h
+++ b/arch/hexagon/include/asm/pgalloc.h
@@ -64,7 +64,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
 {
 	struct page *pte;
 
-	pte = alloc_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO);
+	pte = alloc_page(GFP_KERNEL | __GFP_ZERO);
 	if (!pte)
 		return NULL;
 	if (!pgtable_page_ctor(pte)) {
@@ -78,7 +78,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 					  unsigned long address)
 {
-	gfp_t flags =  GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO;
+	gfp_t flags =  GFP_KERNEL | __GFP_ZERO;
 	return (pte_t *) __get_free_page(flags);
 }
 
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index f80758cb7157..e109ee95e919 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -45,7 +45,7 @@ config IA64
 	select GENERIC_SMP_IDLE_THREAD
 	select ARCH_INIT_TASK
 	select ARCH_TASK_STRUCT_ALLOCATOR
-	select ARCH_THREAD_INFO_ALLOCATOR
+	select ARCH_THREAD_STACK_ALLOCATOR
 	select ARCH_CLOCKSOURCE_DATA
 	select GENERIC_TIME_VSYSCALL_OLD
 	select SYSCTL_ARCH_UNALIGN_NO_WARN
diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h
index aa995b67c3f5..d1212b84fb83 100644
--- a/arch/ia64/include/asm/thread_info.h
+++ b/arch/ia64/include/asm/thread_info.h
@@ -48,15 +48,15 @@ struct thread_info {
 #ifndef ASM_OFFSETS_C
 /* how to get the thread information struct from C */
 #define current_thread_info()	((struct thread_info *) ((char *) current + IA64_TASK_SIZE))
-#define alloc_thread_info_node(tsk, node)	\
-		((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE))
+#define alloc_thread_stack_node(tsk, node)	\
+		((unsigned long *) ((char *) (tsk) + IA64_TASK_SIZE))
 #define task_thread_info(tsk)	((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE))
 #else
 #define current_thread_info()	((struct thread_info *) 0)
-#define alloc_thread_info_node(tsk, node)	((struct thread_info *) 0)
+#define alloc_thread_stack_node(tsk, node)	((unsigned long *) 0)
 #define task_thread_info(tsk)	((struct thread_info *) 0)
 #endif
-#define free_thread_info(ti)	/* nothing */
+#define free_thread_stack(ti)	/* nothing */
 #define task_stack_page(tsk)	((void *)(tsk))
 
 #define __HAVE_THREAD_FUNCTIONS
diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c
index f9efe9739d3f..0eaa89f3defd 100644
--- a/arch/ia64/kernel/init_task.c
+++ b/arch/ia64/kernel/init_task.c
@@ -26,6 +26,7 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
  * handled. This is done by having a special ".data..init_task" section...
  */
 #define init_thread_info	init_task_mem.s.thread_info
+#define init_stack		init_task_mem.stack
 
 union {
 	struct {
diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
index f9924fbcfe42..fb95aed5f428 100644
--- a/arch/m68k/include/asm/mcf_pgalloc.h
+++ b/arch/m68k/include/asm/mcf_pgalloc.h
@@ -14,7 +14,7 @@ extern const char bad_pmd_string[];
 extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	unsigned long address)
 {
-	unsigned long page = __get_free_page(GFP_DMA|__GFP_REPEAT);
+	unsigned long page = __get_free_page(GFP_DMA);
 
 	if (!page)
 		return NULL;
@@ -51,7 +51,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page,
 static inline struct page *pte_alloc_one(struct mm_struct *mm,
 	unsigned long address)
 {
-	struct page *page = alloc_pages(GFP_DMA|__GFP_REPEAT, 0);
+	struct page *page = alloc_pages(GFP_DMA, 0);
 	pte_t *pte;
 
 	if (!page)
diff --git a/arch/m68k/include/asm/motorola_pgalloc.h b/arch/m68k/include/asm/motorola_pgalloc.h
index 24bcba496c75..c895b987202c 100644
--- a/arch/m68k/include/asm/motorola_pgalloc.h
+++ b/arch/m68k/include/asm/motorola_pgalloc.h
@@ -11,7 +11,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long ad
 {
 	pte_t *pte;
 
-	pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+	pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
 	if (pte) {
 		__flush_page_to_ram(pte);
 		flush_tlb_kernel_page(pte);
@@ -32,7 +32,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addres
 	struct page *page;
 	pte_t *pte;
 
-	page = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
+	page = alloc_pages(GFP_KERNEL|__GFP_ZERO, 0);
 	if(!page)
 		return NULL;
 	if (!pgtable_page_ctor(page)) {
diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h
index 0931388de47f..1901f61f926f 100644
--- a/arch/m68k/include/asm/sun3_pgalloc.h
+++ b/arch/m68k/include/asm/sun3_pgalloc.h
@@ -37,7 +37,7 @@ do {							\
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 					  unsigned long address)
 {
-	unsigned long page = __get_free_page(GFP_KERNEL|__GFP_REPEAT);
+	unsigned long page = __get_free_page(GFP_KERNEL);
 
 	if (!page)
 		return NULL;
@@ -49,7 +49,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
 					unsigned long address)
 {
-        struct page *page = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0);
+        struct page *page = alloc_pages(GFP_KERNEL, 0);
 
 	if (page == NULL)
 		return NULL;
diff --git a/arch/metag/include/asm/pgalloc.h b/arch/metag/include/asm/pgalloc.h
index 3104df0a4822..c2caa1ee4360 100644
--- a/arch/metag/include/asm/pgalloc.h
+++ b/arch/metag/include/asm/pgalloc.h
@@ -42,8 +42,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 					  unsigned long address)
 {
-	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT |
-					      __GFP_ZERO);
+	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
 	return pte;
 }
 
@@ -51,7 +50,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
 				      unsigned long address)
 {
 	struct page *pte;
-	pte = alloc_pages(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO, 0);
+	pte = alloc_pages(GFP_KERNEL  | __GFP_ZERO, 0);
 	if (!pte)
 		return NULL;
 	if (!pgtable_page_ctor(pte)) {
diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h
index 61436d69775c..7c89390c0c13 100644
--- a/arch/microblaze/include/asm/pgalloc.h
+++ b/arch/microblaze/include/asm/pgalloc.h
@@ -116,9 +116,9 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
 	struct page *ptepage;
 
 #ifdef CONFIG_HIGHPTE
-	int flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT;
+	int flags = GFP_KERNEL | __GFP_HIGHMEM;
 #else
-	int flags = GFP_KERNEL | __GFP_REPEAT;
+	int flags = GFP_KERNEL;
 #endif
 
 	ptepage = alloc_pages(flags, 0);
diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c
index 4f4520e779a5..eb99fcc76088 100644
--- a/arch/microblaze/mm/pgtable.c
+++ b/arch/microblaze/mm/pgtable.c
@@ -239,8 +239,7 @@ __init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 {
 	pte_t *pte;
 	if (mem_init_done) {
-		pte = (pte_t *)__get_free_page(GFP_KERNEL |
-					__GFP_REPEAT | __GFP_ZERO);
+		pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
 	} else {
 		pte = (pte_t *)early_get_page();
 		if (pte)
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 6733ac575da4..36a391d289aa 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -74,7 +74,7 @@
 #define KVM_GUEST_KUSEG			0x00000000UL
 #define KVM_GUEST_KSEG0			0x40000000UL
 #define KVM_GUEST_KSEG23		0x60000000UL
-#define KVM_GUEST_KSEGX(a)		((_ACAST32_(a)) & 0x60000000)
+#define KVM_GUEST_KSEGX(a)		((_ACAST32_(a)) & 0xe0000000)
 #define KVM_GUEST_CPHYSADDR(a)		((_ACAST32_(a)) & 0x1fffffff)
 
 #define KVM_GUEST_CKSEG0ADDR(a)		(KVM_GUEST_CPHYSADDR(a) | KVM_GUEST_KSEG0)
@@ -338,6 +338,7 @@ struct kvm_mips_tlb {
 #define KVM_MIPS_GUEST_TLB_SIZE	64
 struct kvm_vcpu_arch {
 	void *host_ebase, *guest_ebase;
+	int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu);
 	unsigned long host_stack;
 	unsigned long host_gp;
 
diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
index b336037e8768..93c079a1cfc8 100644
--- a/arch/mips/include/asm/pgalloc.h
+++ b/arch/mips/include/asm/pgalloc.h
@@ -69,7 +69,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 {
 	pte_t *pte;
 
-	pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, PTE_ORDER);
+	pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_ZERO, PTE_ORDER);
 
 	return pte;
 }
@@ -79,7 +79,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
 {
 	struct page *pte;
 
-	pte = alloc_pages(GFP_KERNEL | __GFP_REPEAT, PTE_ORDER);
+	pte = alloc_pages(GFP_KERNEL, PTE_ORDER);
 	if (!pte)
 		return NULL;
 	clear_highpage(pte);
@@ -113,7 +113,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
 {
 	pmd_t *pmd;
 
-	pmd = (pmd_t *) __get_free_pages(GFP_KERNEL|__GFP_REPEAT, PMD_ORDER);
+	pmd = (pmd_t *) __get_free_pages(GFP_KERNEL, PMD_ORDER);
 	if (pmd)
 		pmd_init((unsigned long)pmd, (unsigned long)invalid_pte_table);
 	return pmd;
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index a6b611f1da43..f53816744d60 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -24,7 +24,7 @@ struct mm_struct;
 struct vm_area_struct;
 
 #define PAGE_NONE	__pgprot(_PAGE_PRESENT | _PAGE_NO_READ | \
-				 _CACHE_CACHABLE_NONCOHERENT)
+				 _page_cachable_default)
 #define PAGE_SHARED	__pgprot(_PAGE_PRESENT | _PAGE_WRITE | \
 				 _page_cachable_default)
 #define PAGE_COPY	__pgprot(_PAGE_PRESENT | _PAGE_NO_EXEC | \
@@ -476,7 +476,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 	pte.pte_low  &= (_PAGE_MODIFIED | _PAGE_ACCESSED | _PFNX_MASK);
 	pte.pte_high &= (_PFN_MASK | _CACHE_MASK);
 	pte.pte_low  |= pgprot_val(newprot) & ~_PFNX_MASK;
-	pte.pte_high |= pgprot_val(newprot) & ~_PFN_MASK;
+	pte.pte_high |= pgprot_val(newprot) & ~(_PFN_MASK | _CACHE_MASK);
 	return pte;
 }
 #elif defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
@@ -491,7 +491,8 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 #else
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
-	return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
+	return __pte((pte_val(pte) & _PAGE_CHG_MASK) |
+		     (pgprot_val(newprot) & ~_PAGE_CHG_MASK));
 }
 #endif
 
@@ -632,7 +633,8 @@ static inline struct page *pmd_page(pmd_t pmd)
 
 static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
 {
-	pmd_val(pmd) = (pmd_val(pmd) & _PAGE_CHG_MASK) | pgprot_val(newprot);
+	pmd_val(pmd) = (pmd_val(pmd) & _PAGE_CHG_MASK) |
+		       (pgprot_val(newprot) & ~_PAGE_CHG_MASK);
 	return pmd;
 }
 
diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c
index 396df6eb0a12..645c8a1982a7 100644
--- a/arch/mips/kvm/emulate.c
+++ b/arch/mips/kvm/emulate.c
@@ -1636,6 +1636,7 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc,
 		if (index < 0) {
 			vcpu->arch.host_cp0_entryhi = (va & VPN2_MASK);
 			vcpu->arch.host_cp0_badvaddr = va;
+			vcpu->arch.pc = curr_pc;
 			er = kvm_mips_emulate_tlbmiss_ld(cause, NULL, run,
 							 vcpu);
 			preempt_enable();
@@ -1647,6 +1648,8 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc,
 			 * invalid exception to the guest
 			 */
 			if (!TLB_IS_VALID(*tlb, va)) {
+				vcpu->arch.host_cp0_badvaddr = va;
+				vcpu->arch.pc = curr_pc;
 				er = kvm_mips_emulate_tlbinv_ld(cause, NULL,
 								run, vcpu);
 				preempt_enable();
@@ -1666,7 +1669,7 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc,
 			cache, op, base, arch->gprs[base], offset);
 		er = EMULATE_FAIL;
 		preempt_enable();
-		goto dont_update_pc;
+		goto done;
 
 	}
 
@@ -1694,16 +1697,20 @@ skip_fault:
 		kvm_err("NO-OP CACHE (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n",
 			cache, op, base, arch->gprs[base], offset);
 		er = EMULATE_FAIL;
-		preempt_enable();
-		goto dont_update_pc;
 	}
 
 	preempt_enable();
+done:
+	/* Rollback PC only if emulation was unsuccessful */
+	if (er == EMULATE_FAIL)
+		vcpu->arch.pc = curr_pc;
 
 dont_update_pc:
-	/* Rollback PC */
-	vcpu->arch.pc = curr_pc;
-done:
+	/*
+	 * This is for exceptions whose emulation updates the PC, so do not
+	 * overwrite the PC under any circumstances
+	 */
+
 	return er;
 }
 
diff --git a/arch/mips/kvm/interrupt.h b/arch/mips/kvm/interrupt.h
index 4ab4bdfad703..2143884709e4 100644
--- a/arch/mips/kvm/interrupt.h
+++ b/arch/mips/kvm/interrupt.h
@@ -28,6 +28,7 @@
 #define MIPS_EXC_MAX                12
 /* XXXSL More to follow */
 
+extern char __kvm_mips_vcpu_run_end[];
 extern char mips32_exception[], mips32_exceptionEnd[];
 extern char mips32_GuestException[], mips32_GuestExceptionEnd[];
 
diff --git a/arch/mips/kvm/locore.S b/arch/mips/kvm/locore.S
index 3ef03009de5f..828fcfc1cd7f 100644
--- a/arch/mips/kvm/locore.S
+++ b/arch/mips/kvm/locore.S
@@ -202,6 +202,7 @@ FEXPORT(__kvm_mips_load_k0k1)
 
 	/* Jump to guest */
 	eret
+EXPORT(__kvm_mips_vcpu_run_end)
 
 VECTOR(MIPSX(exception), unknown)
 /* Find out what mode we came from and jump to the proper handler. */
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index dc052fb5c7a2..44da5259f390 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -315,6 +315,15 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 	memcpy(gebase + offset, mips32_GuestException,
 	       mips32_GuestExceptionEnd - mips32_GuestException);
 
+#ifdef MODULE
+	offset += mips32_GuestExceptionEnd - mips32_GuestException;
+	memcpy(gebase + offset, (char *)__kvm_mips_vcpu_run,
+	       __kvm_mips_vcpu_run_end - (char *)__kvm_mips_vcpu_run);
+	vcpu->arch.vcpu_run = gebase + offset;
+#else
+	vcpu->arch.vcpu_run = __kvm_mips_vcpu_run;
+#endif
+
 	/* Invalidate the icache for these ranges */
 	local_flush_icache_range((unsigned long)gebase,
 				(unsigned long)gebase + ALIGN(size, PAGE_SIZE));
@@ -404,7 +413,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	/* Disable hardware page table walking while in guest */
 	htw_stop();
 
-	r = __kvm_mips_vcpu_run(run, vcpu);
+	r = vcpu->arch.vcpu_run(run, vcpu);
 
 	/* Re-enable HTW before enabling interrupts */
 	htw_start();
diff --git a/arch/mn10300/include/asm/thread_info.h b/arch/mn10300/include/asm/thread_info.h
index 4861a78c7160..f5f90bbf019d 100644
--- a/arch/mn10300/include/asm/thread_info.h
+++ b/arch/mn10300/include/asm/thread_info.h
@@ -115,7 +115,7 @@ static inline unsigned long current_stack_pointer(void)
 }
 
 #ifndef CONFIG_KGDB
-void arch_release_thread_info(struct thread_info *ti);
+void arch_release_thread_stack(unsigned long *stack);
 #endif
 #define get_thread_info(ti)	get_task_struct((ti)->task)
 #define put_thread_info(ti)	put_task_struct((ti)->task)
diff --git a/arch/mn10300/kernel/kgdb.c b/arch/mn10300/kernel/kgdb.c
index 99770823451a..2d7986c386fe 100644
--- a/arch/mn10300/kernel/kgdb.c
+++ b/arch/mn10300/kernel/kgdb.c
@@ -397,8 +397,9 @@ static bool kgdb_arch_undo_singlestep(struct pt_regs *regs)
  * single-step state is cleared.  At this point the breakpoints should have
  * been removed by __switch_to().
  */
-void arch_release_thread_info(struct thread_info *ti)
+void arch_release_thread_stack(unsigned long *stack)
 {
+	struct thread_info *ti = (void *)stack;
 	if (kgdb_sstep_thread == ti) {
 		kgdb_sstep_thread = NULL;
 
diff --git a/arch/mn10300/mm/pgtable.c b/arch/mn10300/mm/pgtable.c
index e77a7c728081..9577cf768875 100644
--- a/arch/mn10300/mm/pgtable.c
+++ b/arch/mn10300/mm/pgtable.c
@@ -63,7 +63,7 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
 
 pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 {
-	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
+	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL);
 	if (pte)
 		clear_page(pte);
 	return pte;
@@ -74,9 +74,9 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
 	struct page *pte;
 
 #ifdef CONFIG_HIGHPTE
-	pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT, 0);
+	pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM, 0);
 #else
-	pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0);
+	pte = alloc_pages(GFP_KERNEL, 0);
 #endif
 	if (!pte)
 		return NULL;
diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h
index 6e2985e0a7b9..bb47d08c8ef7 100644
--- a/arch/nios2/include/asm/pgalloc.h
+++ b/arch/nios2/include/asm/pgalloc.h
@@ -42,8 +42,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 {
 	pte_t *pte;
 
-	pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO,
-					PTE_ORDER);
+	pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_ZERO, PTE_ORDER);
 
 	return pte;
 }
@@ -53,7 +52,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
 {
 	struct page *pte;
 
-	pte = alloc_pages(GFP_KERNEL | __GFP_REPEAT, PTE_ORDER);
+	pte = alloc_pages(GFP_KERNEL, PTE_ORDER);
 	if (pte) {
 		if (!pgtable_page_ctor(pte)) {
 			__free_page(pte);
diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h
index 21484e5b9e9a..87eebd185089 100644
--- a/arch/openrisc/include/asm/pgalloc.h
+++ b/arch/openrisc/include/asm/pgalloc.h
@@ -77,7 +77,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
 					 unsigned long address)
 {
 	struct page *pte;
-	pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0);
+	pte = alloc_pages(GFP_KERNEL, 0);
 	if (!pte)
 		return NULL;
 	clear_page(page_address(pte));
diff --git a/arch/openrisc/mm/ioremap.c b/arch/openrisc/mm/ioremap.c
index 62b08ef392be..5b2a95116e8f 100644
--- a/arch/openrisc/mm/ioremap.c
+++ b/arch/openrisc/mm/ioremap.c
@@ -122,7 +122,7 @@ pte_t __init_refok *pte_alloc_one_kernel(struct mm_struct *mm,
 	pte_t *pte;
 
 	if (likely(mem_init_done)) {
-		pte = (pte_t *) __get_free_page(GFP_KERNEL | __GFP_REPEAT);
+		pte = (pte_t *) __get_free_page(GFP_KERNEL);
 	} else {
 		pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
 #if 0
diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
index f2fd327dce2e..f08dda3f0995 100644
--- a/arch/parisc/include/asm/pgalloc.h
+++ b/arch/parisc/include/asm/pgalloc.h
@@ -63,8 +63,7 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd)
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
 {
-	pmd_t *pmd = (pmd_t *)__get_free_pages(GFP_KERNEL|__GFP_REPEAT,
-					       PMD_ORDER);
+	pmd_t *pmd = (pmd_t *)__get_free_pages(GFP_KERNEL, PMD_ORDER);
 	if (pmd)
 		memset(pmd, 0, PAGE_SIZE<<PMD_ORDER);
 	return pmd;
@@ -124,7 +123,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
 static inline pgtable_t
 pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
-	struct page *page = alloc_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+	struct page *page = alloc_page(GFP_KERNEL|__GFP_ZERO);
 	if (!page)
 		return NULL;
 	if (!pgtable_page_ctor(page)) {
@@ -137,7 +136,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address)
 static inline pte_t *
 pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
 {
-	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
 	return pte;
 }
 
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 01f7464d9fea..0a9d439bcda6 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -128,7 +128,7 @@ config PPC
 	select IRQ_FORCED_THREADING
 	select HAVE_RCU_TABLE_FREE if SMP
 	select HAVE_SYSCALL_TRACEPOINTS
-	select HAVE_CBPF_JIT
+	select HAVE_CBPF_JIT if CPU_BIG_ENDIAN
 	select HAVE_ARCH_JUMP_LABEL
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select ARCH_HAS_GCOV_PROFILE_ALL
diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
index a2350194fc76..8e21bb492dca 100644
--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -102,7 +102,6 @@ static inline void pgtable_free_tlb(struct mmu_gather *tlb,
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
 				  unsigned long address)
 {
-	tlb_flush_pgtable(tlb, address);
 	pgtable_page_dtor(table);
 	pgtable_free_tlb(tlb, page_address(table), 0);
 }
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 290157e8d5b2..74839f24f412 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -88,6 +88,7 @@
 #define HPTE_R_RPN_SHIFT	12
 #define HPTE_R_RPN		ASM_CONST(0x0ffffffffffff000)
 #define HPTE_R_PP		ASM_CONST(0x0000000000000003)
+#define HPTE_R_PPP		ASM_CONST(0x8000000000000003)
 #define HPTE_R_N		ASM_CONST(0x0000000000000004)
 #define HPTE_R_G		ASM_CONST(0x0000000000000008)
 #define HPTE_R_M		ASM_CONST(0x0000000000000010)
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 488279edb1f0..cd5e7aa8cc34 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -41,7 +41,7 @@ extern struct kmem_cache *pgtable_cache[];
 			pgtable_cache[(shift) - 1];	\
 		})
 
-#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO
+#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO
 
 extern pte_t *pte_fragment_alloc(struct mm_struct *, unsigned long, int);
 extern void pte_fragment_free(unsigned long *, int);
@@ -56,7 +56,7 @@ static inline pgd_t *radix__pgd_alloc(struct mm_struct *mm)
 	return (pgd_t *)__get_free_page(PGALLOC_GFP);
 #else
 	struct page *page;
-	page = alloc_pages(PGALLOC_GFP, 4);
+	page = alloc_pages(PGALLOC_GFP | __GFP_REPEAT, 4);
 	if (!page)
 		return NULL;
 	return (pgd_t *) page_address(page);
@@ -93,8 +93,7 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE),
-				GFP_KERNEL|__GFP_REPEAT);
+	return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), GFP_KERNEL);
 }
 
 static inline void pud_free(struct mm_struct *mm, pud_t *pud)
@@ -110,13 +109,17 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
                                   unsigned long address)
 {
+	/*
+	 * By now all the pud entries should be none entries. So go
+	 * ahead and flush the page walk cache
+	 */
+	flush_tlb_pgtable(tlb, address);
         pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE);
 }
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX),
-				GFP_KERNEL|__GFP_REPEAT);
+	return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX), GFP_KERNEL);
 }
 
 static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
@@ -127,6 +130,11 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
                                   unsigned long address)
 {
+	/*
+	 * By now all the pud entries should be none entries. So go
+	 * ahead and flush the page walk cache
+	 */
+	flush_tlb_pgtable(tlb, address);
         return pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX);
 }
 
@@ -151,7 +159,7 @@ static inline pgtable_t pmd_pgtable(pmd_t pmd)
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 					  unsigned long address)
 {
-	return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO);
+	return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
 }
 
 static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
@@ -198,7 +206,11 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
 				  unsigned long address)
 {
-	tlb_flush_pgtable(tlb, address);
+	/*
+	 * By now all the pud entries should be none entries. So go
+	 * ahead and flush the page walk cache
+	 */
+	flush_tlb_pgtable(tlb, address);
 	pgtable_free_tlb(tlb, table, 0);
 }
 
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 88a5ecaa157b..ab84c89c9e98 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -230,6 +230,7 @@ extern unsigned long __kernel_virt_size;
 #define KERN_VIRT_SIZE  __kernel_virt_size
 extern struct page *vmemmap;
 extern unsigned long ioremap_bot;
+extern unsigned long pci_io_base;
 #endif /* __ASSEMBLY__ */
 
 #include <asm/book3s/64/hash.h>
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index 937d4e247ac3..df294224e280 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -228,5 +228,20 @@ extern void radix__vmemmap_remove_mapping(unsigned long start,
 
 extern int radix__map_kernel_page(unsigned long ea, unsigned long pa,
 				 pgprot_t flags, unsigned int psz);
+
+static inline unsigned long radix__get_tree_size(void)
+{
+	unsigned long rts_field;
+	/*
+	 * we support 52 bits, hence 52-31 = 21, 0b10101
+	 * RTS encoding details
+	 * bits 0 - 3 of rts -> bits 6 - 8 unsigned long
+	 * bits 4 - 5 of rts -> bits 62 - 63 of unsigned long
+	 */
+	rts_field = (0x5UL << 5); /* 6 - 8 bits */
+	rts_field |= (0x2UL << 61);
+
+	return rts_field;
+}
 #endif /* __ASSEMBLY__ */
 #endif
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
index 13ef38828dfe..3fa94fcac628 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
@@ -18,16 +18,19 @@ extern void radix__local_flush_tlb_mm(struct mm_struct *mm);
 extern void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
 extern void radix___local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
 				    unsigned long ap, int nid);
+extern void radix__local_flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr);
 extern void radix__tlb_flush(struct mmu_gather *tlb);
 #ifdef CONFIG_SMP
 extern void radix__flush_tlb_mm(struct mm_struct *mm);
 extern void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
 extern void radix___flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
 			      unsigned long ap, int nid);
+extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr);
 #else
 #define radix__flush_tlb_mm(mm)		radix__local_flush_tlb_mm(mm)
 #define radix__flush_tlb_page(vma,addr)	radix__local_flush_tlb_page(vma,addr)
 #define radix___flush_tlb_page(mm,addr,p,i)	radix___local_flush_tlb_page(mm,addr,p,i)
+#define radix__flush_tlb_pwc(tlb, addr)	radix__local_flush_tlb_pwc(tlb, addr)
 #endif
 
 #endif
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h
index d98424ae356c..96e5769b18b0 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h
@@ -72,5 +72,19 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
 #define flush_tlb_mm(mm)		local_flush_tlb_mm(mm)
 #define flush_tlb_page(vma, addr)	local_flush_tlb_page(vma, addr)
 #endif /* CONFIG_SMP */
+/*
+ * flush the page walk cache for the address
+ */
+static inline void flush_tlb_pgtable(struct mmu_gather *tlb, unsigned long address)
+{
+	/*
+	 * Flush the page table walk cache on freeing a page table. We already
+	 * have marked the upper/higher level page table entry none by now.
+	 * So it is safe to flush PWC here.
+	 */
+	if (!radix_enabled())
+		return;
 
+	radix__flush_tlb_pwc(tlb, address);
+}
 #endif /*  _ASM_POWERPC_BOOK3S_64_TLBFLUSH_H */
diff --git a/arch/powerpc/include/asm/book3s/pgalloc.h b/arch/powerpc/include/asm/book3s/pgalloc.h
index 54f591e9572e..c0a69ae92256 100644
--- a/arch/powerpc/include/asm/book3s/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/pgalloc.h
@@ -4,11 +4,6 @@
 #include <linux/mm.h>
 
 extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
-static inline void tlb_flush_pgtable(struct mmu_gather *tlb,
-				     unsigned long address)
-{
-
-}
 
 #ifdef CONFIG_PPC64
 #include <asm/book3s/64/pgalloc.h>
diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
index 0c12a3bfe2ab..897d2e1c8a9b 100644
--- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
@@ -57,8 +57,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE),
-				GFP_KERNEL|__GFP_REPEAT);
+	return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), GFP_KERNEL);
 }
 
 static inline void pud_free(struct mm_struct *mm, pud_t *pud)
@@ -88,7 +87,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 					  unsigned long address)
 {
-	return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO);
+	return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
 }
 
 static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
@@ -172,7 +171,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
-	pte_fragment_fre((unsigned long *)pte, 1);
+	pte_fragment_free((unsigned long *)pte, 1);
 }
 
 static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
@@ -190,8 +189,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX),
-				GFP_KERNEL|__GFP_REPEAT);
+	return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX), GFP_KERNEL);
 }
 
 static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 2714a3b81d24..d70101e1e25c 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -642,13 +642,12 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
 		if (pe->type & EEH_PE_VF) {
 			eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
 		} else {
-			eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
 			pci_lock_rescan_remove();
 			pci_hp_remove_devices(bus);
 			pci_unlock_rescan_remove();
 		}
 	} else if (frozen_bus) {
-		eeh_pe_dev_traverse(pe, eeh_rmv_device, &rmv_data);
+		eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data);
 	}
 
 	/*
@@ -692,10 +691,12 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
 		 */
 		edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
 		eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
-		if (pe->type & EEH_PE_VF)
+		if (pe->type & EEH_PE_VF) {
 			eeh_add_virt_device(edev, NULL);
-		else
+		} else {
+			eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
 			pci_hp_add_devices(bus);
+		}
 	} else if (frozen_bus && rmv_data->removed) {
 		pr_info("EEH: Sleep 5s ahead of partial hotplug\n");
 		ssleep(5);
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 4c9440629128..8bcc1b457115 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1399,11 +1399,12 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_RADIX)
 	lwz	r9,PACA_EXSLB+EX_CCR(r13)	/* get saved CR */
 
 	mtlr	r10
-BEGIN_MMU_FTR_SECTION
-	b	2f
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_RADIX)
 	andi.	r10,r12,MSR_RI	/* check for unrecoverable exception */
+BEGIN_MMU_FTR_SECTION
 	beq-	2f
+FTR_SECTION_ELSE
+	b	2f
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_RADIX)
 
 .machine	push
 .machine	"power4"
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index 3759df52bd67..a5ae49a2dcc4 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -47,7 +47,6 @@ static int __init pcibios_init(void)
 
 	printk(KERN_INFO "PCI: Probing PCI hardware\n");
 
-	pci_io_base = ISA_IO_BASE;
 	/* For now, override phys_mem_access_prot. If we need it,g
 	 * later, we may move that initialization to each ppc_md
 	 */
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index e2f12cbcade9..0b93893424f5 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1505,6 +1505,16 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
 		current->thread.regs = regs - 1;
 	}
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	/*
+	 * Clear any transactional state, we're exec()ing. The cause is
+	 * not important as there will never be a recheckpoint so it's not
+	 * user visible.
+	 */
+	if (MSR_TM_SUSPENDED(mfmsr()))
+		tm_reclaim_current(0);
+#endif
+
 	memset(regs->gpr, 0, sizeof(regs->gpr));
 	regs->ctr = 0;
 	regs->link = 0;
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index ccd2037c797f..6ee4b72cda42 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -719,7 +719,7 @@ unsigned char ibm_architecture_vec[] = {
 	 * must match by the macro below. Update the definition if
 	 * the structure layout changes.
 	 */
-#define IBM_ARCH_VEC_NRCORES_OFFSET	125
+#define IBM_ARCH_VEC_NRCORES_OFFSET	133
 	W(NR_CPUS),			/* number of cores supported */
 	0,
 	0,
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 30a03c03fe73..060b140f03c6 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -377,7 +377,7 @@ static int fpr_get(struct task_struct *target, const struct user_regset *regset,
 
 #else
 	BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) !=
-		     offsetof(struct thread_fp_state, fpr[32][0]));
+		     offsetof(struct thread_fp_state, fpr[32]));
 
 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
 				   &target->thread.fp_state, 0, -1);
@@ -405,7 +405,7 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset,
 	return 0;
 #else
 	BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) !=
-		     offsetof(struct thread_fp_state, fpr[32][0]));
+		     offsetof(struct thread_fp_state, fpr[32]));
 
 	return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
 				  &target->thread.fp_state, 0, -1);
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index bf8f34a58670..b7019b559ddb 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -110,17 +110,11 @@ _GLOBAL(tm_reclaim)
 	std	r3, STK_PARAM(R3)(r1)
 	SAVE_NVGPRS(r1)
 
-	/* We need to setup MSR for VSX register save instructions.  Here we
-	 * also clear the MSR RI since when we do the treclaim, we won't have a
-	 * valid kernel pointer for a while.  We clear RI here as it avoids
-	 * adding another mtmsr closer to the treclaim.  This makes the region
-	 * maked as non-recoverable wider than it needs to be but it saves on
-	 * inserting another mtmsrd later.
-	 */
+	/* We need to setup MSR for VSX register save instructions. */
 	mfmsr	r14
 	mr	r15, r14
 	ori	r15, r15, MSR_FP
-	li	r16, MSR_RI
+	li	r16, 0
 	ori	r16, r16, MSR_EE /* IRQs hard off */
 	andc	r15, r15, r16
 	oris	r15, r15, MSR_VEC@h
@@ -176,7 +170,17 @@ dont_backup_fp:
 1:	tdeqi   r6, 0
 	EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0
 
-	/* The moment we treclaim, ALL of our GPRs will switch
+	/* Clear MSR RI since we are about to change r1, EE is already off. */
+	li	r4, 0
+	mtmsrd	r4, 1
+
+	/*
+	 * BE CAREFUL HERE:
+	 * At this point we can't take an SLB miss since we have MSR_RI
+	 * off. Load only to/from the stack/paca which are in SLB bolted regions
+	 * until we turn MSR RI back on.
+	 *
+	 * The moment we treclaim, ALL of our GPRs will switch
 	 * to user register state.  (FPRs, CCR etc. also!)
 	 * Use an sprg and a tm_scratch in the PACA to shuffle.
 	 */
@@ -197,6 +201,11 @@ dont_backup_fp:
 
 	/* Store the PPR in r11 and reset to decent value */
 	std	r11, GPR11(r1)			/* Temporary stash */
+
+	/* Reset MSR RI so we can take SLB faults again */
+	li	r11, MSR_RI
+	mtmsrd	r11, 1
+
 	mfspr	r11, SPRN_PPR
 	HMT_MEDIUM
 
@@ -397,11 +406,6 @@ restore_gprs:
 	ld	r5, THREAD_TM_DSCR(r3)
 	ld	r6, THREAD_TM_PPR(r3)
 
-	/* Clear the MSR RI since we are about to change R1.  EE is already off
-	 */
-	li	r4, 0
-	mtmsrd	r4, 1
-
 	REST_GPR(0, r7)				/* GPR0 */
 	REST_2GPRS(2, r7)			/* GPR2-3 */
 	REST_GPR(4, r7)				/* GPR4 */
@@ -439,10 +443,33 @@ restore_gprs:
 	ld	r6, _CCR(r7)
 	mtcr    r6
 
-	REST_GPR(1, r7)				/* GPR1 */
-	REST_GPR(5, r7)				/* GPR5-7 */
 	REST_GPR(6, r7)
-	ld	r7, GPR7(r7)
+
+	/*
+	 * Store r1 and r5 on the stack so that we can access them
+	 * after we clear MSR RI.
+	 */
+
+	REST_GPR(5, r7)
+	std	r5, -8(r1)
+	ld	r5, GPR1(r7)
+	std	r5, -16(r1)
+
+	REST_GPR(7, r7)
+
+	/* Clear MSR RI since we are about to change r1. EE is already off */
+	li	r5, 0
+	mtmsrd	r5, 1
+
+	/*
+	 * BE CAREFUL HERE:
+	 * At this point we can't take an SLB miss since we have MSR_RI
+	 * off. Load only to/from the stack/paca which are in SLB bolted regions
+	 * until we turn MSR RI back on.
+	 */
+
+	ld	r5, -8(r1)
+	ld	r1, -16(r1)
 
 	/* Commit register state as checkpointed state: */
 	TRECHKPT
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index d873f6507f72..f8a871a72985 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -316,8 +316,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
 			DBG_LOW(" -> hit\n");
 			/* Update the HPTE */
 			hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) &
-						~(HPTE_R_PP | HPTE_R_N)) |
-					       (newpp & (HPTE_R_PP | HPTE_R_N |
+						~(HPTE_R_PPP | HPTE_R_N)) |
+					       (newpp & (HPTE_R_PPP | HPTE_R_N |
 							 HPTE_R_C)));
 		}
 		native_unlock_hpte(hptep);
@@ -385,8 +385,8 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
 
 	/* Update the HPTE */
 	hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) &
-			~(HPTE_R_PP | HPTE_R_N)) |
-		(newpp & (HPTE_R_PP | HPTE_R_N)));
+				~(HPTE_R_PPP | HPTE_R_N)) |
+			       (newpp & (HPTE_R_PPP | HPTE_R_N)));
 	/*
 	 * Ensure it is out of the tlb too. Bolted entries base and
 	 * actual page size will be same.
@@ -550,7 +550,11 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
 		}
 	}
 	/* This works for all page sizes, and for 256M and 1T segments */
-	*ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
+	if (cpu_has_feature(CPU_FTR_ARCH_300))
+		*ssize = hpte_r >> HPTE_R_3_0_SSIZE_SHIFT;
+	else
+		*ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
+
 	shift = mmu_psize_defs[size].shift;
 
 	avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm);
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index b2740c67e172..2971ea18c768 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -201,9 +201,8 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags)
 	/*
 	 * We can't allow hardware to update hpte bits. Hence always
 	 * set 'R' bit and set 'C' if it is a write fault
-	 * Memory coherence is always enabled
 	 */
-	rflags |=  HPTE_R_R | HPTE_R_M;
+	rflags |=  HPTE_R_R;
 
 	if (pteflags & _PAGE_DIRTY)
 		rflags |= HPTE_R_C;
@@ -213,10 +212,15 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags)
 
 	if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_TOLERANT)
 		rflags |= HPTE_R_I;
-	if ((pteflags & _PAGE_CACHE_CTL ) == _PAGE_NON_IDEMPOTENT)
+	else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_NON_IDEMPOTENT)
 		rflags |= (HPTE_R_I | HPTE_R_G);
-	if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_SAO)
-		rflags |= (HPTE_R_I | HPTE_R_W);
+	else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_SAO)
+		rflags |= (HPTE_R_W | HPTE_R_I | HPTE_R_M);
+	else
+		/*
+		 * Add memory coherence if cache inhibited is not set
+		 */
+		rflags |= HPTE_R_M;
 
 	return rflags;
 }
@@ -918,6 +922,10 @@ void __init hash__early_init_mmu(void)
 	vmemmap = (struct page *)H_VMEMMAP_BASE;
 	ioremap_bot = IOREMAP_BASE;
 
+#ifdef CONFIG_PCI
+	pci_io_base = ISA_IO_BASE;
+#endif
+
 	/* Initialize the MMU Hash table and create the linear mapping
 	 * of memory. Has to be done before SLB initialization as this is
 	 * currently where the page size encoding is obtained.
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 5aac1a3f86cd..119d18611500 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -73,7 +73,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
 	cachep = PGT_CACHE(pdshift - pshift);
 #endif
 
-	new = kmem_cache_zalloc(cachep, GFP_KERNEL|__GFP_REPEAT);
+	new = kmem_cache_zalloc(cachep, GFP_KERNEL);
 
 	BUG_ON(pshift > HUGEPD_SHIFT_MASK);
 	BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK);
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index 227b2a6c4544..196222227e82 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -65,7 +65,7 @@ static int radix__init_new_context(struct mm_struct *mm, int index)
 	/*
 	 * set the process table entry,
 	 */
-	rts_field = 3ull << PPC_BITLSHIFT(2);
+	rts_field = radix__get_tree_size();
 	process_tb[index].prtb0 = cpu_to_be64(rts_field | __pa(mm->pgd) | RADIX_PGD_INDEX_SIZE);
 	return 0;
 }
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index c939e6e57a9e..7931e1496f0d 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -160,9 +160,8 @@ redo:
 	process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT);
 	/*
 	 * Fill in the process table.
-	 * we support 52 bits, hence 52-28 = 24, 11000
 	 */
-	rts_field = 3ull << PPC_BITLSHIFT(2);
+	rts_field = radix__get_tree_size();
 	process_tb->prtb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE);
 	/*
 	 * Fill in the partition table. We are suppose to use effective address
@@ -176,10 +175,8 @@ redo:
 static void __init radix_init_partition_table(void)
 {
 	unsigned long rts_field;
-	/*
-	 * we support 52 bits, hence 52-28 = 24, 11000
-	 */
-	rts_field = 3ull << PPC_BITLSHIFT(2);
+
+	rts_field = radix__get_tree_size();
 
 	BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 24), "Partition table size too large.");
 	partition_tb = early_alloc_pgtable(1UL << PATB_SIZE_SHIFT);
@@ -331,6 +328,11 @@ void __init radix__early_init_mmu(void)
 	__vmalloc_end = RADIX_VMALLOC_END;
 	vmemmap = (struct page *)RADIX_VMEMMAP_BASE;
 	ioremap_bot = IOREMAP_BASE;
+
+#ifdef CONFIG_PCI
+	pci_io_base = ISA_IO_BASE;
+#endif
+
 	/*
 	 * For now radix also use the same frag size
 	 */
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index bf7bf32b54f8..7f922f557936 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -84,7 +84,7 @@ __init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long add
 	pte_t *pte;
 
 	if (slab_is_available()) {
-		pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+		pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
 	} else {
 		pte = __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE));
 		if (pte)
@@ -97,7 +97,7 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
 	struct page *ptepage;
 
-	gfp_t flags = GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO;
+	gfp_t flags = GFP_KERNEL | __GFP_ZERO;
 
 	ptepage = alloc_pages(flags, 0);
 	if (!ptepage)
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index e009e0604a8a..f5e8d4edb808 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -350,8 +350,7 @@ static pte_t *get_from_cache(struct mm_struct *mm)
 static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel)
 {
 	void *ret = NULL;
-	struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK |
-				       __GFP_REPEAT | __GFP_ZERO);
+	struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
 	if (!page)
 		return NULL;
 	if (!kernel && !pgtable_page_ctor(page)) {
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index 0fdaf93a3e09..ab2f60e812e2 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -18,16 +18,20 @@
 
 static DEFINE_RAW_SPINLOCK(native_tlbie_lock);
 
-static inline void __tlbiel_pid(unsigned long pid, int set)
+#define RIC_FLUSH_TLB 0
+#define RIC_FLUSH_PWC 1
+#define RIC_FLUSH_ALL 2
+
+static inline void __tlbiel_pid(unsigned long pid, int set,
+				unsigned long ric)
 {
-	unsigned long rb,rs,ric,prs,r;
+	unsigned long rb,rs,prs,r;
 
 	rb = PPC_BIT(53); /* IS = 1 */
 	rb |= set << PPC_BITLSHIFT(51);
 	rs = ((unsigned long)pid) << PPC_BITLSHIFT(31);
 	prs = 1; /* process scoped */
 	r = 1;   /* raidx format */
-	ric = 2;  /* invalidate all the caches */
 
 	asm volatile("ptesync": : :"memory");
 	asm volatile(".long 0x7c000224 | (%0 << 11) | (%1 << 16) |"
@@ -39,25 +43,24 @@ static inline void __tlbiel_pid(unsigned long pid, int set)
 /*
  * We use 128 set in radix mode and 256 set in hpt mode.
  */
-static inline void _tlbiel_pid(unsigned long pid)
+static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
 {
 	int set;
 
 	for (set = 0; set < POWER9_TLB_SETS_RADIX ; set++) {
-		__tlbiel_pid(pid, set);
+		__tlbiel_pid(pid, set, ric);
 	}
 	return;
 }
 
-static inline void _tlbie_pid(unsigned long pid)
+static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
 {
-	unsigned long rb,rs,ric,prs,r;
+	unsigned long rb,rs,prs,r;
 
 	rb = PPC_BIT(53); /* IS = 1 */
 	rs = pid << PPC_BITLSHIFT(31);
 	prs = 1; /* process scoped */
 	r = 1;   /* raidx format */
-	ric = 2;  /* invalidate all the caches */
 
 	asm volatile("ptesync": : :"memory");
 	asm volatile(".long 0x7c000264 | (%0 << 11) | (%1 << 16) |"
@@ -67,16 +70,15 @@ static inline void _tlbie_pid(unsigned long pid)
 }
 
 static inline void _tlbiel_va(unsigned long va, unsigned long pid,
-			      unsigned long ap)
+			      unsigned long ap, unsigned long ric)
 {
-	unsigned long rb,rs,ric,prs,r;
+	unsigned long rb,rs,prs,r;
 
 	rb = va & ~(PPC_BITMASK(52, 63));
 	rb |= ap << PPC_BITLSHIFT(58);
 	rs = pid << PPC_BITLSHIFT(31);
 	prs = 1; /* process scoped */
 	r = 1;   /* raidx format */
-	ric = 0;  /* no cluster flush yet */
 
 	asm volatile("ptesync": : :"memory");
 	asm volatile(".long 0x7c000224 | (%0 << 11) | (%1 << 16) |"
@@ -86,16 +88,15 @@ static inline void _tlbiel_va(unsigned long va, unsigned long pid,
 }
 
 static inline void _tlbie_va(unsigned long va, unsigned long pid,
-			     unsigned long ap)
+			     unsigned long ap, unsigned long ric)
 {
-	unsigned long rb,rs,ric,prs,r;
+	unsigned long rb,rs,prs,r;
 
 	rb = va & ~(PPC_BITMASK(52, 63));
 	rb |= ap << PPC_BITLSHIFT(58);
 	rs = pid << PPC_BITLSHIFT(31);
 	prs = 1; /* process scoped */
 	r = 1;   /* raidx format */
-	ric = 0;  /* no cluster flush yet */
 
 	asm volatile("ptesync": : :"memory");
 	asm volatile(".long 0x7c000264 | (%0 << 11) | (%1 << 16) |"
@@ -117,25 +118,40 @@ static inline void _tlbie_va(unsigned long va, unsigned long pid,
  */
 void radix__local_flush_tlb_mm(struct mm_struct *mm)
 {
-	unsigned int pid;
+	unsigned long pid;
 
 	preempt_disable();
 	pid = mm->context.id;
 	if (pid != MMU_NO_CONTEXT)
-		_tlbiel_pid(pid);
+		_tlbiel_pid(pid, RIC_FLUSH_ALL);
 	preempt_enable();
 }
 EXPORT_SYMBOL(radix__local_flush_tlb_mm);
 
+void radix__local_flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr)
+{
+	unsigned long pid;
+	struct mm_struct *mm = tlb->mm;
+
+	preempt_disable();
+
+	pid = mm->context.id;
+	if (pid != MMU_NO_CONTEXT)
+		_tlbiel_pid(pid, RIC_FLUSH_PWC);
+
+	preempt_enable();
+}
+EXPORT_SYMBOL(radix__local_flush_tlb_pwc);
+
 void radix___local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
 			    unsigned long ap, int nid)
 {
-	unsigned int pid;
+	unsigned long pid;
 
 	preempt_disable();
 	pid = mm ? mm->context.id : 0;
 	if (pid != MMU_NO_CONTEXT)
-		_tlbiel_va(vmaddr, pid, ap);
+		_tlbiel_va(vmaddr, pid, ap, RIC_FLUSH_TLB);
 	preempt_enable();
 }
 
@@ -160,7 +176,7 @@ static int mm_is_core_local(struct mm_struct *mm)
 
 void radix__flush_tlb_mm(struct mm_struct *mm)
 {
-	unsigned int pid;
+	unsigned long pid;
 
 	preempt_disable();
 	pid = mm->context.id;
@@ -172,20 +188,46 @@ void radix__flush_tlb_mm(struct mm_struct *mm)
 
 		if (lock_tlbie)
 			raw_spin_lock(&native_tlbie_lock);
-		_tlbie_pid(pid);
+		_tlbie_pid(pid, RIC_FLUSH_ALL);
 		if (lock_tlbie)
 			raw_spin_unlock(&native_tlbie_lock);
 	} else
-		_tlbiel_pid(pid);
+		_tlbiel_pid(pid, RIC_FLUSH_ALL);
 no_context:
 	preempt_enable();
 }
 EXPORT_SYMBOL(radix__flush_tlb_mm);
 
+void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr)
+{
+	unsigned long pid;
+	struct mm_struct *mm = tlb->mm;
+
+	preempt_disable();
+
+	pid = mm->context.id;
+	if (unlikely(pid == MMU_NO_CONTEXT))
+		goto no_context;
+
+	if (!mm_is_core_local(mm)) {
+		int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
+
+		if (lock_tlbie)
+			raw_spin_lock(&native_tlbie_lock);
+		_tlbie_pid(pid, RIC_FLUSH_PWC);
+		if (lock_tlbie)
+			raw_spin_unlock(&native_tlbie_lock);
+	} else
+		_tlbiel_pid(pid, RIC_FLUSH_PWC);
+no_context:
+	preempt_enable();
+}
+EXPORT_SYMBOL(radix__flush_tlb_pwc);
+
 void radix___flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
 		       unsigned long ap, int nid)
 {
-	unsigned int pid;
+	unsigned long pid;
 
 	preempt_disable();
 	pid = mm ? mm->context.id : 0;
@@ -196,11 +238,11 @@ void radix___flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
 
 		if (lock_tlbie)
 			raw_spin_lock(&native_tlbie_lock);
-		_tlbie_va(vmaddr, pid, ap);
+		_tlbie_va(vmaddr, pid, ap, RIC_FLUSH_TLB);
 		if (lock_tlbie)
 			raw_spin_unlock(&native_tlbie_lock);
 	} else
-		_tlbiel_va(vmaddr, pid, ap);
+		_tlbiel_va(vmaddr, pid, ap, RIC_FLUSH_TLB);
 bail:
 	preempt_enable();
 }
@@ -224,7 +266,7 @@ void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
 
 	if (lock_tlbie)
 		raw_spin_lock(&native_tlbie_lock);
-	_tlbie_pid(0);
+	_tlbie_pid(0, RIC_FLUSH_ALL);
 	if (lock_tlbie)
 		raw_spin_unlock(&native_tlbie_lock);
 }
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index b7dfc1359d01..3e8865b187de 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -927,7 +927,7 @@ static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail,
 	dn = pci_device_to_OF_node(dev);
 	pdn = PCI_DN(dn);
 	buid = pdn->phb->buid;
-	cfg_addr = (pdn->busno << 8) | pdn->devfn;
+	cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8));
 
 	ret = rtas_call(ddw_avail[0], 3, 5, (u32 *)query,
 		  cfg_addr, BUID_HI(buid), BUID_LO(buid));
@@ -956,7 +956,7 @@ static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail,
 	dn = pci_device_to_OF_node(dev);
 	pdn = PCI_DN(dn);
 	buid = pdn->phb->buid;
-	cfg_addr = (pdn->busno << 8) | pdn->devfn;
+	cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8));
 
 	do {
 		/* extra outputs are LIOBN and dma-addr (hi, lo) */
diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h
index 5e04f3cbd320..8ae236b0f80b 100644
--- a/arch/s390/include/asm/fpu/api.h
+++ b/arch/s390/include/asm/fpu/api.h
@@ -22,7 +22,7 @@ static inline int test_fp_ctl(u32 fpc)
 		"	la	%0,0\n"
 		"1:\n"
 		EX_TABLE(0b,1b)
-		: "=d" (rc), "=d" (orig_fpc)
+		: "=d" (rc), "=&d" (orig_fpc)
 		: "d" (fpc), "0" (-EINVAL));
 	return rc;
 }
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 37b9017c6a96..ac82e8eb936d 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -245,6 +245,7 @@ struct kvm_vcpu_stat {
 	u32 exit_stop_request;
 	u32 exit_validity;
 	u32 exit_instruction;
+	u32 exit_pei;
 	u32 halt_successful_poll;
 	u32 halt_attempted_poll;
 	u32 halt_poll_invalid;
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index f20abdb5630a..d14069d4b88d 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -2064,12 +2064,5 @@ void s390_reset_system(void)
 	S390_lowcore.program_new_psw.addr =
 		(unsigned long) s390_base_pgm_handler;
 
-	/*
-	 * Clear subchannel ID and number to signal new kernel that no CCW or
-	 * SCSI IPL has been done (for kexec and kdump)
-	 */
-	S390_lowcore.subchannel_id = 0;
-	S390_lowcore.subchannel_nr = 0;
-
 	do_reset_calls();
 }
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 59215c518f37..7ec63b1d920d 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -649,6 +649,8 @@ static int cpumf_pmu_commit_txn(struct pmu *pmu)
 
 /* Performance monitoring unit for s390x */
 static struct pmu cpumf_pmu = {
+	.task_ctx_nr  = perf_sw_context,
+	.capabilities = PERF_PMU_CAP_NO_INTERRUPT,
 	.pmu_enable   = cpumf_pmu_enable,
 	.pmu_disable  = cpumf_pmu_disable,
 	.event_init   = cpumf_pmu_event_init,
@@ -708,12 +710,6 @@ static int __init cpumf_pmu_init(void)
 		goto out;
 	}
 
-	/* The CPU measurement counter facility does not have overflow
-	 * interrupts to do sampling.  Sampling must be provided by
-	 * external means, for example, by timers.
-	 */
-	cpumf_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
-
 	cpumf_pmu.attr_groups = cpumf_cf_event_group();
 	rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW);
 	if (rc) {
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 2e6b54e4d3f9..252157181302 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -341,6 +341,8 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
 
 static int handle_partial_execution(struct kvm_vcpu *vcpu)
 {
+	vcpu->stat.exit_pei++;
+
 	if (vcpu->arch.sie_block->ipa == 0xb254)	/* MVPG */
 		return handle_mvpg_pei(vcpu);
 	if (vcpu->arch.sie_block->ipa >> 8 == 0xae)	/* SIGP */
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 6d8ec3ac9dd8..43f2a2b80490 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -61,6 +61,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "exit_external_request", VCPU_STAT(exit_external_request) },
 	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
 	{ "exit_instruction", VCPU_STAT(exit_instruction) },
+	{ "exit_pei", VCPU_STAT(exit_pei) },
 	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
@@ -657,7 +658,7 @@ static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 		kvm->arch.model.cpuid = proc->cpuid;
 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
 		unblocked_ibc = sclp.ibc & 0xfff;
-		if (lowest_ibc) {
+		if (lowest_ibc && proc->ibc) {
 			if (proc->ibc > unblocked_ibc)
 				kvm->arch.model.ibc = unblocked_ibc;
 			else if (proc->ibc < lowest_ibc)
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index e8b5962ac12a..e2565d2d0c32 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -169,7 +169,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
 			return table;
 	}
 	/* Allocate a fresh page */
-	page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
+	page = alloc_page(GFP_KERNEL);
 	if (!page)
 		return NULL;
 	if (!pgtable_page_ctor(page)) {
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 4324b87f9398..9f0ce0e6eeb4 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -437,7 +437,7 @@ void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
 	pgste = pgste_get_lock(ptep);
 	pgstev = pgste_val(pgste);
 	pte = *ptep;
-	if (pte_swap(pte) &&
+	if (!reset && pte_swap(pte) &&
 	    ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED ||
 	     (pgstev & _PGSTE_GPS_ZERO))) {
 		ptep_zap_swap_entry(mm, pte_to_swp_entry(pte));
diff --git a/arch/score/include/asm/pgalloc.h b/arch/score/include/asm/pgalloc.h
index 2e067657db98..49b012d78c1a 100644
--- a/arch/score/include/asm/pgalloc.h
+++ b/arch/score/include/asm/pgalloc.h
@@ -42,8 +42,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 {
 	pte_t *pte;
 
-	pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO,
-					PTE_ORDER);
+	pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_ZERO, PTE_ORDER);
 
 	return pte;
 }
@@ -53,7 +52,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
 {
 	struct page *pte;
 
-	pte = alloc_pages(GFP_KERNEL | __GFP_REPEAT, PTE_ORDER);
+	pte = alloc_pages(GFP_KERNEL, PTE_ORDER);
 	if (!pte)
 		return NULL;
 	clear_highpage(pte);
diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h
index a33673b3687d..f3f42c84c40f 100644
--- a/arch/sh/include/asm/pgalloc.h
+++ b/arch/sh/include/asm/pgalloc.h
@@ -34,7 +34,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 					  unsigned long address)
 {
-	return quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL);
+	return quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL);
 }
 
 static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
@@ -43,7 +43,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
 	struct page *page;
 	void *pg;
 
-	pg = quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL);
+	pg = quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL);
 	if (!pg)
 		return NULL;
 	page = virt_to_page(pg);
diff --git a/arch/sh/mm/pgtable.c b/arch/sh/mm/pgtable.c
index 26e03a1f7ca4..a62bd8696779 100644
--- a/arch/sh/mm/pgtable.c
+++ b/arch/sh/mm/pgtable.c
@@ -1,7 +1,7 @@
 #include <linux/mm.h>
 #include <linux/slab.h>
 
-#define PGALLOC_GFP GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO
+#define PGALLOC_GFP GFP_KERNEL | __GFP_ZERO
 
 static struct kmem_cache *pgd_cachep;
 #if PAGETABLE_LEVELS > 2
diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h
index 5e3187185b4a..3529f1378cd8 100644
--- a/arch/sparc/include/asm/pgalloc_64.h
+++ b/arch/sparc/include/asm/pgalloc_64.h
@@ -41,8 +41,7 @@ static inline void __pud_populate(pud_t *pud, pmd_t *pmd)
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return kmem_cache_alloc(pgtable_cache,
-				GFP_KERNEL|__GFP_REPEAT);
+	return kmem_cache_alloc(pgtable_cache, GFP_KERNEL);
 }
 
 static inline void pud_free(struct mm_struct *mm, pud_t *pud)
@@ -52,8 +51,7 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud)
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return kmem_cache_alloc(pgtable_cache,
-				GFP_KERNEL|__GFP_REPEAT);
+	return kmem_cache_alloc(pgtable_cache, GFP_KERNEL);
 }
 
 static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 14bb0d5ed3c6..aec508e37490 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2704,8 +2704,7 @@ void __flush_tlb_all(void)
 pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 			    unsigned long address)
 {
-	struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK |
-				       __GFP_REPEAT | __GFP_ZERO);
+	struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
 	pte_t *pte = NULL;
 
 	if (page)
@@ -2717,8 +2716,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 pgtable_t pte_alloc_one(struct mm_struct *mm,
 			unsigned long address)
 {
-	struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK |
-				       __GFP_REPEAT | __GFP_ZERO);
+	struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
 	if (!page)
 		return NULL;
 	if (!pgtable_page_ctor(page)) {
diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h
index 4b7cef9e94e0..c1467ac59ce6 100644
--- a/arch/tile/include/asm/thread_info.h
+++ b/arch/tile/include/asm/thread_info.h
@@ -78,7 +78,7 @@ struct thread_info {
 
 #ifndef __ASSEMBLY__
 
-void arch_release_thread_info(struct thread_info *info);
+void arch_release_thread_stack(unsigned long *stack);
 
 /* How to get the thread information struct from C. */
 register unsigned long stack_pointer __asm__("sp");
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index 6b705ccc9cc1..a465d8372edd 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -73,8 +73,9 @@ void arch_cpu_idle(void)
 /*
  * Release a thread_info structure
  */
-void arch_release_thread_info(struct thread_info *info)
+void arch_release_thread_stack(unsigned long *stack)
 {
+	struct thread_info *info = (void *)stack;
 	struct single_step_state *step_state = info->step_state;
 
 	if (step_state) {
diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c
index 7bf2491a9c1f..c4d5bf841a7f 100644
--- a/arch/tile/mm/pgtable.c
+++ b/arch/tile/mm/pgtable.c
@@ -231,7 +231,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 struct page *pgtable_alloc_one(struct mm_struct *mm, unsigned long address,
 			       int order)
 {
-	gfp_t flags = GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO;
+	gfp_t flags = GFP_KERNEL|__GFP_ZERO;
 	struct page *p;
 	int i;
 
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index b2a2dff50b4e..e7437ec62710 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -204,7 +204,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 {
 	pte_t *pte;
 
-	pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+	pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
 	return pte;
 }
 
@@ -212,7 +212,7 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
 	struct page *pte;
 
-	pte = alloc_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+	pte = alloc_page(GFP_KERNEL|__GFP_ZERO);
 	if (!pte)
 		return NULL;
 	if (!pgtable_page_ctor(pte)) {
diff --git a/arch/unicore32/include/asm/pgalloc.h b/arch/unicore32/include/asm/pgalloc.h
index 2e02d1356fdf..26775793c204 100644
--- a/arch/unicore32/include/asm/pgalloc.h
+++ b/arch/unicore32/include/asm/pgalloc.h
@@ -28,7 +28,7 @@ extern void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd);
 #define pgd_alloc(mm)			get_pgd_slow(mm)
 #define pgd_free(mm, pgd)		free_pgd_slow(mm, pgd)
 
-#define PGALLOC_GFP	(GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO)
+#define PGALLOC_GFP	(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO)
 
 /*
  * Allocate one PTE table.
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0a7b885964ba..d9a94da0c29f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2439,6 +2439,15 @@ config PCI_CNB20LE_QUIRK
 
 source "drivers/pci/Kconfig"
 
+config ISA_BUS
+	bool "ISA-style bus support on modern systems" if EXPERT
+	select ISA_BUS_API
+	help
+	  Enables ISA-style drivers on modern systems. This is necessary to
+	  support PC/104 devices on X86_64 platforms.
+
+	  If unsure, say N.
+
 # x86_64 have no ISA slots, but can have ISA-style DMA.
 config ISA_DMA_API
 	bool "ISA-style DMA support" if (X86_64 && EXPERT)
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 700a9c6e6159..be8e688fa0d4 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -162,6 +162,9 @@ isoimage: $(obj)/bzImage
 	for i in lib lib64 share end ; do \
 		if [ -f /usr/$$i/syslinux/isolinux.bin ] ; then \
 			cp /usr/$$i/syslinux/isolinux.bin $(obj)/isoimage ; \
+			if [ -f /usr/$$i/syslinux/ldlinux.c32 ]; then \
+				cp /usr/$$i/syslinux/ldlinux.c32 $(obj)/isoimage ; \
+			fi ; \
 			break ; \
 		fi ; \
 		if [ $$i = end ] ; then exit 1 ; fi ; \
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 99c4bab123cd..e30eef4f29a6 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -714,7 +714,7 @@ static void cleanup_rapl_pmus(void)
 	int i;
 
 	for (i = 0; i < rapl_pmus->maxpkg; i++)
-		kfree(rapl_pmus->pmus + i);
+		kfree(rapl_pmus->pmus[i]);
 	kfree(rapl_pmus);
 }
 
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index b2625867ebd1..874e8bd64d1d 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -2868,27 +2868,10 @@ static struct intel_uncore_type bdx_uncore_cbox = {
 	.format_group		= &hswep_uncore_cbox_format_group,
 };
 
-static struct intel_uncore_type bdx_uncore_sbox = {
-	.name			= "sbox",
-	.num_counters		= 4,
-	.num_boxes		= 4,
-	.perf_ctr_bits		= 48,
-	.event_ctl		= HSWEP_S0_MSR_PMON_CTL0,
-	.perf_ctr		= HSWEP_S0_MSR_PMON_CTR0,
-	.event_mask		= HSWEP_S_MSR_PMON_RAW_EVENT_MASK,
-	.box_ctl		= HSWEP_S0_MSR_PMON_BOX_CTL,
-	.msr_offset		= HSWEP_SBOX_MSR_OFFSET,
-	.ops			= &hswep_uncore_sbox_msr_ops,
-	.format_group		= &hswep_uncore_sbox_format_group,
-};
-
-#define BDX_MSR_UNCORE_SBOX	3
-
 static struct intel_uncore_type *bdx_msr_uncores[] = {
 	&bdx_uncore_ubox,
 	&bdx_uncore_cbox,
 	&hswep_uncore_pcu,
-	&bdx_uncore_sbox,
 	NULL,
 };
 
@@ -2897,10 +2880,6 @@ void bdx_uncore_cpu_init(void)
 	if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
 		bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
 	uncore_msr_uncores = bdx_msr_uncores;
-
-	/* BDX-DE doesn't have SBOX */
-	if (boot_cpu_data.x86_model == 86)
-		uncore_msr_uncores[BDX_MSR_UNCORE_SBOX] = NULL;
 }
 
 static struct intel_uncore_type bdx_uncore_ha = {
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
new file mode 100644
index 000000000000..6999f7d01a0d
--- /dev/null
+++ b/arch/x86/include/asm/intel-family.h
@@ -0,0 +1,68 @@
+#ifndef _ASM_X86_INTEL_FAMILY_H
+#define _ASM_X86_INTEL_FAMILY_H
+
+/*
+ * "Big Core" Processors (Branded as Core, Xeon, etc...)
+ *
+ * The "_X" parts are generally the EP and EX Xeons, or the
+ * "Extreme" ones, like Broadwell-E.
+ *
+ * Things ending in "2" are usually because we have no better
+ * name for them.  There's no processor called "WESTMERE2".
+ */
+
+#define INTEL_FAM6_CORE_YONAH		0x0E
+#define INTEL_FAM6_CORE2_MEROM		0x0F
+#define INTEL_FAM6_CORE2_MEROM_L	0x16
+#define INTEL_FAM6_CORE2_PENRYN		0x17
+#define INTEL_FAM6_CORE2_DUNNINGTON	0x1D
+
+#define INTEL_FAM6_NEHALEM		0x1E
+#define INTEL_FAM6_NEHALEM_EP		0x1A
+#define INTEL_FAM6_NEHALEM_EX		0x2E
+#define INTEL_FAM6_WESTMERE		0x25
+#define INTEL_FAM6_WESTMERE2		0x1F
+#define INTEL_FAM6_WESTMERE_EP		0x2C
+#define INTEL_FAM6_WESTMERE_EX		0x2F
+
+#define INTEL_FAM6_SANDYBRIDGE		0x2A
+#define INTEL_FAM6_SANDYBRIDGE_X	0x2D
+#define INTEL_FAM6_IVYBRIDGE		0x3A
+#define INTEL_FAM6_IVYBRIDGE_X		0x3E
+
+#define INTEL_FAM6_HASWELL_CORE		0x3C
+#define INTEL_FAM6_HASWELL_X		0x3F
+#define INTEL_FAM6_HASWELL_ULT		0x45
+#define INTEL_FAM6_HASWELL_GT3E		0x46
+
+#define INTEL_FAM6_BROADWELL_CORE	0x3D
+#define INTEL_FAM6_BROADWELL_XEON_D	0x56
+#define INTEL_FAM6_BROADWELL_GT3E	0x47
+#define INTEL_FAM6_BROADWELL_X		0x4F
+
+#define INTEL_FAM6_SKYLAKE_MOBILE	0x4E
+#define INTEL_FAM6_SKYLAKE_DESKTOP	0x5E
+#define INTEL_FAM6_SKYLAKE_X		0x55
+#define INTEL_FAM6_KABYLAKE_MOBILE	0x8E
+#define INTEL_FAM6_KABYLAKE_DESKTOP	0x9E
+
+/* "Small Core" Processors (Atom) */
+
+#define INTEL_FAM6_ATOM_PINEVIEW	0x1C
+#define INTEL_FAM6_ATOM_LINCROFT	0x26
+#define INTEL_FAM6_ATOM_PENWELL		0x27
+#define INTEL_FAM6_ATOM_CLOVERVIEW	0x35
+#define INTEL_FAM6_ATOM_CEDARVIEW	0x36
+#define INTEL_FAM6_ATOM_SILVERMONT1	0x37 /* BayTrail/BYT / Valleyview */
+#define INTEL_FAM6_ATOM_SILVERMONT2	0x4D /* Avaton/Rangely */
+#define INTEL_FAM6_ATOM_AIRMONT		0x4C /* CherryTrail / Braswell */
+#define INTEL_FAM6_ATOM_MERRIFIELD1	0x4A /* Tangier */
+#define INTEL_FAM6_ATOM_MERRIFIELD2	0x5A /* Annidale */
+#define INTEL_FAM6_ATOM_GOLDMONT	0x5C
+#define INTEL_FAM6_ATOM_DENVERTON	0x5F /* Goldmont Microserver */
+
+/* Xeon Phi */
+
+#define INTEL_FAM6_XEON_PHI_KNL		0x57 /* Knights Landing */
+
+#endif /* _ASM_X86_INTEL_FAMILY_H */
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index 4421b5da409d..d1d1e5094c28 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -38,12 +38,11 @@ typedef u8 kprobe_opcode_t;
 #define RELATIVECALL_OPCODE 0xe8
 #define RELATIVE_ADDR_SIZE 4
 #define MAX_STACK_SIZE 64
-#define MIN_STACK_SIZE(ADDR)					       \
-	(((MAX_STACK_SIZE) < (((unsigned long)current_thread_info()) + \
-			      THREAD_SIZE - (unsigned long)(ADDR)))    \
-	 ? (MAX_STACK_SIZE)					       \
-	 : (((unsigned long)current_thread_info()) +		       \
-	    THREAD_SIZE - (unsigned long)(ADDR)))
+#define CUR_STACK_SIZE(ADDR) \
+	(current_top_of_stack() - (unsigned long)(ADDR))
+#define MIN_STACK_SIZE(ADDR)				\
+	(MAX_STACK_SIZE < CUR_STACK_SIZE(ADDR) ?	\
+	 MAX_STACK_SIZE : CUR_STACK_SIZE(ADDR))
 
 #define flush_insn_slot(p)	do { } while (0)
 
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e0fbe7e70dc1..69e62862b622 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -27,6 +27,7 @@
 #include <linux/irqbypass.h>
 #include <linux/hyperv.h>
 
+#include <asm/apic.h>
 #include <asm/pvclock-abi.h>
 #include <asm/desc.h>
 #include <asm/mtrr.h>
@@ -1368,4 +1369,14 @@ static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
 
 static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
 
+static inline int kvm_cpu_get_apicid(int mps_cpu)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+	return __default_cpu_present_to_apicid(mps_cpu);
+#else
+	WARN_ON_ONCE(1);
+	return BAD_APICID;
+#endif
+}
+
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 7dc1d8fef7fd..b5fee97813cd 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -122,7 +122,7 @@ notrace static inline void native_write_msr(unsigned int msr,
 		     "2:\n"
 		     _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_unsafe)
 		     : : "c" (msr), "a"(low), "d" (high) : "memory");
-	if (msr_tracepoint_active(__tracepoint_read_msr))
+	if (msr_tracepoint_active(__tracepoint_write_msr))
 		do_trace_write_msr(msr, ((u64)high << 32 | low), 0);
 }
 
@@ -141,7 +141,7 @@ notrace static inline int native_write_msr_safe(unsigned int msr,
 		     : "c" (msr), "0" (low), "d" (high),
 		       [fault] "i" (-EIO)
 		     : "memory");
-	if (msr_tracepoint_active(__tracepoint_read_msr))
+	if (msr_tracepoint_active(__tracepoint_write_msr))
 		do_trace_write_msr(msr, ((u64)high << 32 | low), err);
 	return err;
 }
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index bf7f8b55b0f9..574c23cf761a 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -81,7 +81,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
 	struct page *page;
-	page = alloc_pages(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO, 0);
+	page = alloc_pages(GFP_KERNEL |  __GFP_ZERO, 0);
 	if (!page)
 		return NULL;
 	if (!pgtable_pmd_page_ctor(page)) {
@@ -125,7 +125,7 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return (pud_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
+	return (pud_t *)get_zeroed_page(GFP_KERNEL);
 }
 
 static inline void pud_free(struct mm_struct *mm, pud_t *pud)
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index fdcc04020636..7c1c89598688 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -69,29 +69,22 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift)
 }
 
 static __always_inline
-u64 pvclock_get_nsec_offset(const struct pvclock_vcpu_time_info *src)
-{
-	u64 delta = rdtsc_ordered() - src->tsc_timestamp;
-	return pvclock_scale_delta(delta, src->tsc_to_system_mul,
-				   src->tsc_shift);
-}
-
-static __always_inline
 unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src,
 			       cycle_t *cycles, u8 *flags)
 {
 	unsigned version;
-	cycle_t ret, offset;
-	u8 ret_flags;
+	cycle_t offset;
+	u64 delta;
 
 	version = src->version;
+	/* Make the latest version visible */
+	smp_rmb();
 
-	offset = pvclock_get_nsec_offset(src);
-	ret = src->system_time + offset;
-	ret_flags = src->flags;
-
-	*cycles = ret;
-	*flags = ret_flags;
+	delta = rdtsc_ordered() - src->tsc_timestamp;
+	offset = pvclock_scale_delta(delta, src->tsc_to_system_mul,
+				   src->tsc_shift);
+	*cycles = src->system_time + offset;
+	*flags = src->flags;
 	return version;
 }
 
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 7c247e7404be..0944218af9e2 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -14,7 +14,7 @@ extern int kstack_depth_to_print;
 struct thread_info;
 struct stacktrace_ops;
 
-typedef unsigned long (*walk_stack_t)(struct thread_info *tinfo,
+typedef unsigned long (*walk_stack_t)(struct task_struct *task,
 				      unsigned long *stack,
 				      unsigned long bp,
 				      const struct stacktrace_ops *ops,
@@ -23,13 +23,13 @@ typedef unsigned long (*walk_stack_t)(struct thread_info *tinfo,
 				      int *graph);
 
 extern unsigned long
-print_context_stack(struct thread_info *tinfo,
+print_context_stack(struct task_struct *task,
 		    unsigned long *stack, unsigned long bp,
 		    const struct stacktrace_ops *ops, void *data,
 		    unsigned long *end, int *graph);
 
 extern unsigned long
-print_context_stack_bp(struct thread_info *tinfo,
+print_context_stack_bp(struct task_struct *task,
 		       unsigned long *stack, unsigned long bp,
 		       const struct stacktrace_ops *ops, void *data,
 		       unsigned long *end, int *graph);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 84e33ff5a6d5..446702ed99dc 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2588,8 +2588,8 @@ static struct resource * __init ioapic_setup_resources(void)
 		res[num].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
 		snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i);
 		mem += IOAPIC_RESOURCE_NAME_SIZE;
+		ioapics[i].iomem_res = &res[num];
 		num++;
-		ioapics[i].iomem_res = res;
 	}
 
 	ioapic_resources = res;
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index c343a54bed39..f5c69d8974e1 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -674,14 +674,14 @@ static void init_amd_bd(struct cpuinfo_x86 *c)
 	u64 value;
 
 	/* re-enable TopologyExtensions if switched off by BIOS */
-	if ((c->x86_model >= 0x10) && (c->x86_model <= 0x1f) &&
+	if ((c->x86_model >= 0x10) && (c->x86_model <= 0x6f) &&
 	    !cpu_has(c, X86_FEATURE_TOPOEXT)) {
 
 		if (msr_set_bit(0xc0011005, 54) > 0) {
 			rdmsrl(0xc0011005, value);
 			if (value & BIT_64(54)) {
 				set_cpu_cap(c, X86_FEATURE_TOPOEXT);
-				pr_info(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n");
+				pr_info_once(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n");
 			}
 		}
 	}
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 2bb25c3fe2e8..ef8017ca5ba9 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -42,16 +42,14 @@ void printk_address(unsigned long address)
 static void
 print_ftrace_graph_addr(unsigned long addr, void *data,
 			const struct stacktrace_ops *ops,
-			struct thread_info *tinfo, int *graph)
+			struct task_struct *task, int *graph)
 {
-	struct task_struct *task;
 	unsigned long ret_addr;
 	int index;
 
 	if (addr != (unsigned long)return_to_handler)
 		return;
 
-	task = tinfo->task;
 	index = task->curr_ret_stack;
 
 	if (!task->ret_stack || index < *graph)
@@ -68,7 +66,7 @@ print_ftrace_graph_addr(unsigned long addr, void *data,
 static inline void
 print_ftrace_graph_addr(unsigned long addr, void *data,
 			const struct stacktrace_ops *ops,
-			struct thread_info *tinfo, int *graph)
+			struct task_struct *task, int *graph)
 { }
 #endif
 
@@ -79,10 +77,10 @@ print_ftrace_graph_addr(unsigned long addr, void *data,
  * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
  */
 
-static inline int valid_stack_ptr(struct thread_info *tinfo,
+static inline int valid_stack_ptr(struct task_struct *task,
 			void *p, unsigned int size, void *end)
 {
-	void *t = tinfo;
+	void *t = task_stack_page(task);
 	if (end) {
 		if (p < end && p >= (end-THREAD_SIZE))
 			return 1;
@@ -93,14 +91,14 @@ static inline int valid_stack_ptr(struct thread_info *tinfo,
 }
 
 unsigned long
-print_context_stack(struct thread_info *tinfo,
+print_context_stack(struct task_struct *task,
 		unsigned long *stack, unsigned long bp,
 		const struct stacktrace_ops *ops, void *data,
 		unsigned long *end, int *graph)
 {
 	struct stack_frame *frame = (struct stack_frame *)bp;
 
-	while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
+	while (valid_stack_ptr(task, stack, sizeof(*stack), end)) {
 		unsigned long addr;
 
 		addr = *stack;
@@ -112,7 +110,7 @@ print_context_stack(struct thread_info *tinfo,
 			} else {
 				ops->address(data, addr, 0);
 			}
-			print_ftrace_graph_addr(addr, data, ops, tinfo, graph);
+			print_ftrace_graph_addr(addr, data, ops, task, graph);
 		}
 		stack++;
 	}
@@ -121,7 +119,7 @@ print_context_stack(struct thread_info *tinfo,
 EXPORT_SYMBOL_GPL(print_context_stack);
 
 unsigned long
-print_context_stack_bp(struct thread_info *tinfo,
+print_context_stack_bp(struct task_struct *task,
 		       unsigned long *stack, unsigned long bp,
 		       const struct stacktrace_ops *ops, void *data,
 		       unsigned long *end, int *graph)
@@ -129,7 +127,7 @@ print_context_stack_bp(struct thread_info *tinfo,
 	struct stack_frame *frame = (struct stack_frame *)bp;
 	unsigned long *ret_addr = &frame->return_address;
 
-	while (valid_stack_ptr(tinfo, ret_addr, sizeof(*ret_addr), end)) {
+	while (valid_stack_ptr(task, ret_addr, sizeof(*ret_addr), end)) {
 		unsigned long addr = *ret_addr;
 
 		if (!__kernel_text_address(addr))
@@ -139,7 +137,7 @@ print_context_stack_bp(struct thread_info *tinfo,
 			break;
 		frame = frame->next_frame;
 		ret_addr = &frame->return_address;
-		print_ftrace_graph_addr(addr, data, ops, tinfo, graph);
+		print_ftrace_graph_addr(addr, data, ops, task, graph);
 	}
 
 	return (unsigned long)frame;
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 464ffd69b92e..fef917e79b9d 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -61,15 +61,13 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
 		bp = stack_frame(task, regs);
 
 	for (;;) {
-		struct thread_info *context;
 		void *end_stack;
 
 		end_stack = is_hardirq_stack(stack, cpu);
 		if (!end_stack)
 			end_stack = is_softirq_stack(stack, cpu);
 
-		context = task_thread_info(task);
-		bp = ops->walk_stack(context, stack, bp, ops, data,
+		bp = ops->walk_stack(task, stack, bp, ops, data,
 				     end_stack, &graph);
 
 		/* Stop if not on irq stack */
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 5f1c6266eb30..d558a8a49016 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -153,7 +153,6 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
 		const struct stacktrace_ops *ops, void *data)
 {
 	const unsigned cpu = get_cpu();
-	struct thread_info *tinfo;
 	unsigned long *irq_stack = (unsigned long *)per_cpu(irq_stack_ptr, cpu);
 	unsigned long dummy;
 	unsigned used = 0;
@@ -179,7 +178,6 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
 	 * current stack address. If the stacks consist of nested
 	 * exceptions
 	 */
-	tinfo = task_thread_info(task);
 	while (!done) {
 		unsigned long *stack_end;
 		enum stack_type stype;
@@ -202,7 +200,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
 			if (ops->stack(data, id) < 0)
 				break;
 
-			bp = ops->walk_stack(tinfo, stack, bp, ops,
+			bp = ops->walk_stack(task, stack, bp, ops,
 					     data, stack_end, &graph);
 			ops->stack(data, "<EOE>");
 			/*
@@ -218,7 +216,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
 
 			if (ops->stack(data, "IRQ") < 0)
 				break;
-			bp = ops->walk_stack(tinfo, stack, bp,
+			bp = ops->walk_stack(task, stack, bp,
 				     ops, data, stack_end, &graph);
 			/*
 			 * We link to the next stack (which would be
@@ -240,7 +238,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
 	/*
 	 * This handles the process stack:
 	 */
-	bp = ops->walk_stack(tinfo, stack, bp, ops, data, NULL, &graph);
+	bp = ops->walk_stack(task, stack, bp, ops, data, NULL, &graph);
 	put_cpu();
 }
 EXPORT_SYMBOL(dump_trace);
diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
index 4d38416e2a7f..04f89caef9c4 100644
--- a/arch/x86/kernel/espfix_64.c
+++ b/arch/x86/kernel/espfix_64.c
@@ -57,7 +57,7 @@
 # error "Need more than one PGD for the ESPFIX hack"
 #endif
 
-#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO)
+#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO)
 
 /* This contains the *bottom* address of the espfix stack */
 DEFINE_PER_CPU_READ_MOSTLY(unsigned long, espfix_stack);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 38da8f29a9c8..c627bf8d98ad 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -130,11 +130,9 @@ void irq_ctx_init(int cpu)
 
 void do_softirq_own_stack(void)
 {
-	struct thread_info *curstk;
 	struct irq_stack *irqstk;
 	u32 *isp, *prev_esp;
 
-	curstk = current_stack();
 	irqstk = __this_cpu_read(softirq_stack);
 
 	/* build the stack frame on the softirq stack */
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 38cf7a741250..7847e5c0e0b5 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -961,7 +961,19 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 		 * normal page fault.
 		 */
 		regs->ip = (unsigned long)cur->addr;
+		/*
+		 * Trap flag (TF) has been set here because this fault
+		 * happened where the single stepping will be done.
+		 * So clear it by resetting the current kprobe:
+		 */
+		regs->flags &= ~X86_EFLAGS_TF;
+
+		/*
+		 * If the TF flag was set before the kprobe hit,
+		 * don't touch it:
+		 */
 		regs->flags |= kcb->kprobe_old_flags;
+
 		if (kcb->kprobe_status == KPROBE_REENTER)
 			restore_previous_kprobe(kcb);
 		else
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 99bfc025111d..06c58ce46762 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -61,11 +61,16 @@ void pvclock_resume(void)
 u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src)
 {
 	unsigned version;
-	cycle_t ret;
 	u8 flags;
 
 	do {
-		version = __pvclock_read_cycles(src, &ret, &flags);
+		version = src->version;
+		/* Make the latest version visible */
+		smp_rmb();
+
+		flags = src->flags;
+		/* Make sure that the version double-check is last. */
+		smp_rmb();
 	} while ((src->version & 1) || version != src->version);
 
 	return flags & valid_flags;
@@ -80,6 +85,8 @@ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
 
 	do {
 		version = __pvclock_read_cycles(src, &ret, &flags);
+		/* Make sure that the version double-check is last. */
+		smp_rmb();
 	} while ((src->version & 1) || version != src->version);
 
 	if (unlikely((flags & PVCLOCK_GUEST_STOPPED) != 0)) {
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index d1590486204a..00f03d82e69a 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -96,6 +96,12 @@ static inline void cond_local_irq_disable(struct pt_regs *regs)
 		local_irq_disable();
 }
 
+/*
+ * In IST context, we explicitly disable preemption.  This serves two
+ * purposes: it makes it much less likely that we would accidentally
+ * schedule in IST context and it will force a warning if we somehow
+ * manage to schedule by accident.
+ */
 void ist_enter(struct pt_regs *regs)
 {
 	if (user_mode(regs)) {
@@ -110,13 +116,7 @@ void ist_enter(struct pt_regs *regs)
 		rcu_nmi_enter();
 	}
 
-	/*
-	 * We are atomic because we're on the IST stack; or we're on
-	 * x86_32, in which case we still shouldn't schedule; or we're
-	 * on x86_64 and entered from user mode, in which case we're
-	 * still atomic unless ist_begin_non_atomic is called.
-	 */
-	preempt_count_add(HARDIRQ_OFFSET);
+	preempt_disable();
 
 	/* This code is a bit fragile.  Test it. */
 	RCU_LOCKDEP_WARN(!rcu_is_watching(), "ist_enter didn't work");
@@ -124,7 +124,7 @@ void ist_enter(struct pt_regs *regs)
 
 void ist_exit(struct pt_regs *regs)
 {
-	preempt_count_sub(HARDIRQ_OFFSET);
+	preempt_enable_no_resched();
 
 	if (!user_mode(regs))
 		rcu_nmi_exit();
@@ -155,7 +155,7 @@ void ist_begin_non_atomic(struct pt_regs *regs)
 	BUG_ON((unsigned long)(current_top_of_stack() -
 			       current_stack_pointer()) >= THREAD_SIZE);
 
-	preempt_count_sub(HARDIRQ_OFFSET);
+	preempt_enable_no_resched();
 }
 
 /**
@@ -165,7 +165,7 @@ void ist_begin_non_atomic(struct pt_regs *regs)
  */
 void ist_end_non_atomic(void)
 {
-	preempt_count_add(HARDIRQ_OFFSET);
+	preempt_disable();
 }
 
 static nokprobe_inline int
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index bbb5b283ff63..a397200281c1 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1310,7 +1310,8 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
 
 	/* __delay is delay_tsc whenever the hardware has TSC, thus always.  */
 	if (guest_tsc < tsc_deadline)
-		__delay(tsc_deadline - guest_tsc);
+		__delay(min(tsc_deadline - guest_tsc,
+			nsec_to_cycles(vcpu, lapic_timer_advance_ns)));
 }
 
 static void start_apic_timer(struct kvm_lapic *apic)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1163e8173e5a..16ef31b87452 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -238,7 +238,9 @@ module_param(nested, int, S_IRUGO);
 
 /* enable / disable AVIC */
 static int avic;
+#ifdef CONFIG_X86_LOCAL_APIC
 module_param(avic, int, S_IRUGO);
+#endif
 
 static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
 static void svm_flush_tlb(struct kvm_vcpu *vcpu);
@@ -981,11 +983,14 @@ static __init int svm_hardware_setup(void)
 	} else
 		kvm_disable_tdp();
 
-	if (avic && (!npt_enabled || !boot_cpu_has(X86_FEATURE_AVIC)))
-		avic = false;
-
-	if (avic)
-		pr_info("AVIC enabled\n");
+	if (avic) {
+		if (!npt_enabled ||
+		    !boot_cpu_has(X86_FEATURE_AVIC) ||
+		    !IS_ENABLED(CONFIG_X86_LOCAL_APIC))
+			avic = false;
+		else
+			pr_info("AVIC enabled\n");
+	}
 
 	return 0;
 
@@ -1324,7 +1329,7 @@ free_avic:
 static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
 {
 	u64 entry;
-	int h_physical_id = __default_cpu_present_to_apicid(vcpu->cpu);
+	int h_physical_id = kvm_cpu_get_apicid(vcpu->cpu);
 	struct vcpu_svm *svm = to_svm(vcpu);
 
 	if (!kvm_vcpu_apicv_active(vcpu))
@@ -1349,7 +1354,7 @@ static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	u64 entry;
 	/* ID = 0xff (broadcast), ID > 0xff (reserved) */
-	int h_physical_id = __default_cpu_present_to_apicid(cpu);
+	int h_physical_id = kvm_cpu_get_apicid(cpu);
 	struct vcpu_svm *svm = to_svm(vcpu);
 
 	if (!kvm_vcpu_apicv_active(vcpu))
@@ -4236,7 +4241,7 @@ static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
 
 	if (avic_vcpu_is_running(vcpu))
 		wrmsrl(SVM_AVIC_DOORBELL,
-		       __default_cpu_present_to_apicid(vcpu->cpu));
+		       kvm_cpu_get_apicid(vcpu->cpu));
 	else
 		kvm_vcpu_wake_up(vcpu);
 }
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index fb93010beaa4..64a79f271276 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2072,7 +2072,8 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
 	unsigned int dest;
 
 	if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
-		!irq_remapping_cap(IRQ_POSTING_CAP))
+		!irq_remapping_cap(IRQ_POSTING_CAP)  ||
+		!kvm_vcpu_apicv_active(vcpu))
 		return;
 
 	do {
@@ -2180,7 +2181,8 @@ static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
 	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
 
 	if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
-		!irq_remapping_cap(IRQ_POSTING_CAP))
+		!irq_remapping_cap(IRQ_POSTING_CAP)  ||
+		!kvm_vcpu_apicv_active(vcpu))
 		return;
 
 	/* Set SN when the vCPU is preempted */
@@ -6669,7 +6671,13 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
 
 	/* Checks for #GP/#SS exceptions. */
 	exn = false;
-	if (is_protmode(vcpu)) {
+	if (is_long_mode(vcpu)) {
+		/* Long mode: #GP(0)/#SS(0) if the memory address is in a
+		 * non-canonical form. This is the only check on the memory
+		 * destination for long mode!
+		 */
+		exn = is_noncanonical_address(*ret);
+	} else if (is_protmode(vcpu)) {
 		/* Protected mode: apply checks for segment validity in the
 		 * following order:
 		 * - segment type check (#GP(0) may be thrown)
@@ -6686,17 +6694,10 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
 			 * execute-only code segment
 			 */
 			exn = ((s.type & 0xa) == 8);
-	}
-	if (exn) {
-		kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
-		return 1;
-	}
-	if (is_long_mode(vcpu)) {
-		/* Long mode: #GP(0)/#SS(0) if the memory address is in a
-		 * non-canonical form. This is an only check for long mode.
-		 */
-		exn = is_noncanonical_address(*ret);
-	} else if (is_protmode(vcpu)) {
+		if (exn) {
+			kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
+			return 1;
+		}
 		/* Protected mode: #GP(0)/#SS(0) if the segment is unusable.
 		 */
 		exn = (s.unusable != 0);
@@ -10714,7 +10715,8 @@ static int vmx_pre_block(struct kvm_vcpu *vcpu)
 	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
 
 	if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
-		!irq_remapping_cap(IRQ_POSTING_CAP))
+		!irq_remapping_cap(IRQ_POSTING_CAP)  ||
+		!kvm_vcpu_apicv_active(vcpu))
 		return 0;
 
 	vcpu->pre_pcpu = vcpu->cpu;
@@ -10780,7 +10782,8 @@ static void vmx_post_block(struct kvm_vcpu *vcpu)
 	unsigned long flags;
 
 	if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
-		!irq_remapping_cap(IRQ_POSTING_CAP))
+		!irq_remapping_cap(IRQ_POSTING_CAP)  ||
+		!kvm_vcpu_apicv_active(vcpu))
 		return;
 
 	do {
@@ -10833,7 +10836,8 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
 	int idx, ret = -EINVAL;
 
 	if (!kvm_arch_has_assigned_device(kvm) ||
-		!irq_remapping_cap(IRQ_POSTING_CAP))
+		!irq_remapping_cap(IRQ_POSTING_CAP) ||
+		!kvm_vcpu_apicv_active(kvm->vcpus[0]))
 		return 0;
 
 	idx = srcu_read_lock(&kvm->irq_srcu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 902d9da12392..7da5dd2057a9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1244,12 +1244,6 @@ static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0);
 static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
 static unsigned long max_tsc_khz;
 
-static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
-{
-	return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult,
-				   vcpu->arch.virtual_tsc_shift);
-}
-
 static u32 adjust_tsc_khz(u32 khz, s32 ppm)
 {
 	u64 v = (u64)khz * (1000000 + ppm);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 7ce3634ab5fe..a82ca466b62e 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -2,6 +2,7 @@
 #define ARCH_X86_KVM_X86_H
 
 #include <linux/kvm_host.h>
+#include <asm/pvclock.h>
 #include "kvm_cache_regs.h"
 
 #define MSR_IA32_CR_PAT_DEFAULT  0x0007040600070406ULL
@@ -195,6 +196,12 @@ extern unsigned int lapic_timer_advance_ns;
 
 extern struct static_key kvm_no_apic_vcpu;
 
+static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
+{
+	return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult,
+				   vcpu->arch.virtual_tsc_shift);
+}
+
 /* Same "calling convention" as do_div:
  * - divide (n << 32) by base
  * - put result in n
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 4eb287e25043..aa0ff4b02a96 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -6,7 +6,7 @@
 #include <asm/fixmap.h>
 #include <asm/mtrr.h>
 
-#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO
+#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO
 
 #ifdef CONFIG_HIGHPTE
 #define PGALLOC_USER_GFP __GFP_HIGHMEM
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 6e7242be1c87..b226b3f497f1 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -139,7 +139,7 @@ int __init efi_alloc_page_tables(void)
 	if (efi_enabled(EFI_OLD_MEMMAP))
 		return 0;
 
-	gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO;
+	gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO;
 	efi_pgd = (pgd_t *)__get_free_page(gfp_mask);
 	if (!efi_pgd)
 		return -ENOMEM;
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 478a2de543a5..67433714b791 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1113,7 +1113,7 @@ static void __init xen_cleanhighmap(unsigned long vaddr,
 
 	/* NOTE: The loop is more greedy than the cleanup_highmap variant.
 	 * We include the PMD passed in on _both_ boundaries. */
-	for (; vaddr <= vaddr_end && (pmd < (level2_kernel_pgt + PAGE_SIZE));
+	for (; vaddr <= vaddr_end && (pmd < (level2_kernel_pgt + PTRS_PER_PMD));
 			pmd++, vaddr += PMD_SIZE) {
 		if (pmd_none(*pmd))
 			continue;
@@ -1551,41 +1551,6 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
 #endif
 }
 
-#ifdef CONFIG_X86_32
-static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
-{
-	/* If there's an existing pte, then don't allow _PAGE_RW to be set */
-	if (pte_val_ma(*ptep) & _PAGE_PRESENT)
-		pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) &
-			       pte_val_ma(pte));
-
-	return pte;
-}
-#else /* CONFIG_X86_64 */
-static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
-{
-	unsigned long pfn;
-
-	if (xen_feature(XENFEAT_writable_page_tables) ||
-	    xen_feature(XENFEAT_auto_translated_physmap) ||
-	    xen_start_info->mfn_list >= __START_KERNEL_map)
-		return pte;
-
-	/*
-	 * Pages belonging to the initial p2m list mapped outside the default
-	 * address range must be mapped read-only. This region contains the
-	 * page tables for mapping the p2m list, too, and page tables MUST be
-	 * mapped read-only.
-	 */
-	pfn = pte_pfn(pte);
-	if (pfn >= xen_start_info->first_p2m_pfn &&
-	    pfn < xen_start_info->first_p2m_pfn + xen_start_info->nr_p2m_frames)
-		pte = __pte_ma(pte_val_ma(pte) & ~_PAGE_RW);
-
-	return pte;
-}
-#endif /* CONFIG_X86_64 */
-
 /*
  * Init-time set_pte while constructing initial pagetables, which
  * doesn't allow RO page table pages to be remapped RW.
@@ -1600,13 +1565,37 @@ static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
  * so always write the PTE directly and rely on Xen trapping and
  * emulating any updates as necessary.
  */
-static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
+__visible pte_t xen_make_pte_init(pteval_t pte)
 {
-	if (pte_mfn(pte) != INVALID_P2M_ENTRY)
-		pte = mask_rw_pte(ptep, pte);
-	else
-		pte = __pte_ma(0);
+#ifdef CONFIG_X86_64
+	unsigned long pfn;
+
+	/*
+	 * Pages belonging to the initial p2m list mapped outside the default
+	 * address range must be mapped read-only. This region contains the
+	 * page tables for mapping the p2m list, too, and page tables MUST be
+	 * mapped read-only.
+	 */
+	pfn = (pte & PTE_PFN_MASK) >> PAGE_SHIFT;
+	if (xen_start_info->mfn_list < __START_KERNEL_map &&
+	    pfn >= xen_start_info->first_p2m_pfn &&
+	    pfn < xen_start_info->first_p2m_pfn + xen_start_info->nr_p2m_frames)
+		pte &= ~_PAGE_RW;
+#endif
+	pte = pte_pfn_to_mfn(pte);
+	return native_make_pte(pte);
+}
+PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_init);
 
+static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
+{
+#ifdef CONFIG_X86_32
+	/* If there's an existing pte, then don't allow _PAGE_RW to be set */
+	if (pte_mfn(pte) != INVALID_P2M_ENTRY
+	    && pte_val_ma(*ptep) & _PAGE_PRESENT)
+		pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) &
+			       pte_val_ma(pte));
+#endif
 	native_set_pte(ptep, pte);
 }
 
@@ -2407,6 +2396,7 @@ static void __init xen_post_allocator_init(void)
 	pv_mmu_ops.alloc_pud = xen_alloc_pud;
 	pv_mmu_ops.release_pud = xen_release_pud;
 #endif
+	pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte);
 
 #ifdef CONFIG_X86_64
 	pv_mmu_ops.write_cr3 = &xen_write_cr3;
@@ -2455,7 +2445,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
 	.pte_val = PV_CALLEE_SAVE(xen_pte_val),
 	.pgd_val = PV_CALLEE_SAVE(xen_pgd_val),
 
-	.make_pte = PV_CALLEE_SAVE(xen_make_pte),
+	.make_pte = PV_CALLEE_SAVE(xen_make_pte_init),
 	.make_pgd = PV_CALLEE_SAVE(xen_make_pgd),
 
 #ifdef CONFIG_X86_PAE
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index cab9f766bb06..dd2a49a8aacc 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -182,7 +182,7 @@ static void * __ref alloc_p2m_page(void)
 	if (unlikely(!slab_is_available()))
 		return alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);
 
-	return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT);
+	return (void *)__get_free_page(GFP_KERNEL);
 }
 
 static void __ref free_p2m_page(void *p)
diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h
index d38eb9237e64..1065bc8bcae5 100644
--- a/arch/xtensa/include/asm/pgalloc.h
+++ b/arch/xtensa/include/asm/pgalloc.h
@@ -44,7 +44,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	pte_t *ptep;
 	int i;
 
-	ptep = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
+	ptep = (pte_t *)__get_free_page(GFP_KERNEL);
 	if (!ptep)
 		return NULL;
 	for (i = 0; i < 1024; i++)
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 23d7f301a196..9e29dc351695 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -113,6 +113,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 		ret = submit_bio_wait(type, bio);
 		if (ret == -EOPNOTSUPP)
 			ret = 0;
+		bio_put(bio);
 	}
 	blk_finish_plug(&plug);
 
@@ -165,8 +166,10 @@ int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
 		}
 	}
 
-	if (bio)
+	if (bio) {
 		ret = submit_bio_wait(REQ_WRITE | REQ_WRITE_SAME, bio);
+		bio_put(bio);
+	}
 	return ret != -EOPNOTSUPP ? ret : 0;
 }
 EXPORT_SYMBOL(blkdev_issue_write_same);
@@ -206,8 +209,11 @@ static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
 		}
 	}
 
-	if (bio)
-		return submit_bio_wait(WRITE, bio);
+	if (bio) {
+		ret = submit_bio_wait(WRITE, bio);
+		bio_put(bio);
+		return ret;
+	}
 	return 0;
 }
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 29cbc1b5fbdb..f9b9049b1284 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1262,12 +1262,9 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 
 	blk_queue_split(q, &bio, q->bio_split);
 
-	if (!is_flush_fua && !blk_queue_nomerges(q)) {
-		if (blk_attempt_plug_merge(q, bio, &request_count,
-					   &same_queue_rq))
-			return BLK_QC_T_NONE;
-	} else
-		request_count = blk_plug_queued_count(q);
+	if (!is_flush_fua && !blk_queue_nomerges(q) &&
+	    blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq))
+		return BLK_QC_T_NONE;
 
 	rq = blk_mq_map_request(q, bio, &data);
 	if (unlikely(!rq))
@@ -1358,9 +1355,11 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
 
 	blk_queue_split(q, &bio, q->bio_split);
 
-	if (!is_flush_fua && !blk_queue_nomerges(q) &&
-	    blk_attempt_plug_merge(q, bio, &request_count, NULL))
-		return BLK_QC_T_NONE;
+	if (!is_flush_fua && !blk_queue_nomerges(q)) {
+		if (blk_attempt_plug_merge(q, bio, &request_count, NULL))
+			return BLK_QC_T_NONE;
+	} else
+		request_count = blk_plug_queued_count(q);
 
 	rq = blk_mq_map_request(q, bio, &data);
 	if (unlikely(!rq))
diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c
index 43fe85f20d57..7097a3395b25 100644
--- a/crypto/crypto_user.c
+++ b/crypto/crypto_user.c
@@ -455,6 +455,7 @@ static const int crypto_msg_min[CRYPTO_NR_MSGTYPES] = {
 	[CRYPTO_MSG_NEWALG	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
 	[CRYPTO_MSG_DELALG	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
 	[CRYPTO_MSG_UPDATEALG	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
+	[CRYPTO_MSG_GETALG	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
 	[CRYPTO_MSG_DELRNG	- CRYPTO_MSG_BASE] = 0,
 };
 
diff --git a/drivers/acpi/acpica/exconfig.c b/drivers/acpi/acpica/exconfig.c
index a1d177d58254..21932d640a41 100644
--- a/drivers/acpi/acpica/exconfig.c
+++ b/drivers/acpi/acpica/exconfig.c
@@ -108,7 +108,9 @@ acpi_ex_add_table(u32 table_index,
 
 	/* Add the table to the namespace */
 
+	acpi_ex_exit_interpreter();
 	status = acpi_ns_load_table(table_index, parent_node);
+	acpi_ex_enter_interpreter();
 	if (ACPI_FAILURE(status)) {
 		acpi_ut_remove_reference(obj_desc);
 		*ddb_handle = NULL;
diff --git a/drivers/acpi/acpica/hwregs.c b/drivers/acpi/acpica/hwregs.c
index daceb80022b0..3b7fb99362b6 100644
--- a/drivers/acpi/acpica/hwregs.c
+++ b/drivers/acpi/acpica/hwregs.c
@@ -306,12 +306,6 @@ acpi_status acpi_hw_read(u32 *value, struct acpi_generic_address *reg)
 acpi_status acpi_hw_write(u32 value, struct acpi_generic_address *reg)
 {
 	u64 address;
-	u8 access_width;
-	u32 bit_width;
-	u8 bit_offset;
-	u64 value64;
-	u32 new_value32, old_value32;
-	u8 index;
 	acpi_status status;
 
 	ACPI_FUNCTION_NAME(hw_write);
@@ -323,145 +317,23 @@ acpi_status acpi_hw_write(u32 value, struct acpi_generic_address *reg)
 		return (status);
 	}
 
-	/* Convert access_width into number of bits based */
-
-	access_width = acpi_hw_get_access_bit_width(reg, 32);
-	bit_width = reg->bit_offset + reg->bit_width;
-	bit_offset = reg->bit_offset;
-
 	/*
 	 * Two address spaces supported: Memory or IO. PCI_Config is
 	 * not supported here because the GAS structure is insufficient
 	 */
-	index = 0;
-	while (bit_width) {
-		/*
-		 * Use offset style bit reads because "Index * AccessWidth" is
-		 * ensured to be less than 32-bits by acpi_hw_validate_register().
-		 */
-		new_value32 = ACPI_GET_BITS(&value, index * access_width,
-					    ACPI_MASK_BITS_ABOVE_32
-					    (access_width));
-
-		if (bit_offset >= access_width) {
-			bit_offset -= access_width;
-		} else {
-			/*
-			 * Use offset style bit masks because access_width is ensured
-			 * to be less than 32-bits by acpi_hw_validate_register() and
-			 * bit_offset/bit_width is less than access_width here.
-			 */
-			if (bit_offset) {
-				new_value32 &= ACPI_MASK_BITS_BELOW(bit_offset);
-			}
-			if (bit_width < access_width) {
-				new_value32 &= ACPI_MASK_BITS_ABOVE(bit_width);
-			}
-
-			if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
-				if (bit_offset || bit_width < access_width) {
-					/*
-					 * Read old values in order not to modify the bits that
-					 * are beyond the register bit_width/bit_offset setting.
-					 */
-					status =
-					    acpi_os_read_memory((acpi_physical_address)
-								address +
-								index *
-								ACPI_DIV_8
-								(access_width),
-								&value64,
-								access_width);
-					old_value32 = (u32)value64;
-
-					/*
-					 * Use offset style bit masks because access_width is
-					 * ensured to be less than 32-bits by
-					 * acpi_hw_validate_register() and bit_offset/bit_width is
-					 * less than access_width here.
-					 */
-					if (bit_offset) {
-						old_value32 &=
-						    ACPI_MASK_BITS_ABOVE
-						    (bit_offset);
-						bit_offset = 0;
-					}
-					if (bit_width < access_width) {
-						old_value32 &=
-						    ACPI_MASK_BITS_BELOW
-						    (bit_width);
-					}
-
-					new_value32 |= old_value32;
-				}
-
-				value64 = (u64)new_value32;
-				status =
-				    acpi_os_write_memory((acpi_physical_address)
-							 address +
-							 index *
-							 ACPI_DIV_8
-							 (access_width),
-							 value64, access_width);
-			} else {	/* ACPI_ADR_SPACE_SYSTEM_IO, validated earlier */
-
-				if (bit_offset || bit_width < access_width) {
-					/*
-					 * Read old values in order not to modify the bits that
-					 * are beyond the register bit_width/bit_offset setting.
-					 */
-					status =
-					    acpi_hw_read_port((acpi_io_address)
-							      address +
-							      index *
-							      ACPI_DIV_8
-							      (access_width),
-							      &old_value32,
-							      access_width);
-
-					/*
-					 * Use offset style bit masks because access_width is
-					 * ensured to be less than 32-bits by
-					 * acpi_hw_validate_register() and bit_offset/bit_width is
-					 * less than access_width here.
-					 */
-					if (bit_offset) {
-						old_value32 &=
-						    ACPI_MASK_BITS_ABOVE
-						    (bit_offset);
-						bit_offset = 0;
-					}
-					if (bit_width < access_width) {
-						old_value32 &=
-						    ACPI_MASK_BITS_BELOW
-						    (bit_width);
-					}
-
-					new_value32 |= old_value32;
-				}
-
-				status = acpi_hw_write_port((acpi_io_address)
-							    address +
-							    index *
-							    ACPI_DIV_8
-							    (access_width),
-							    new_value32,
-							    access_width);
-			}
-		}
-
-		/*
-		 * Index * access_width is ensured to be less than 32-bits by
-		 * acpi_hw_validate_register().
-		 */
-		bit_width -=
-		    bit_width > access_width ? access_width : bit_width;
-		index++;
+	if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
+		status = acpi_os_write_memory((acpi_physical_address)
+					      address, (u64)value,
+					      reg->bit_width);
+	} else {		/* ACPI_ADR_SPACE_SYSTEM_IO, validated earlier */
+
+		status = acpi_hw_write_port((acpi_io_address)
+					    address, value, reg->bit_width);
 	}
 
 	ACPI_DEBUG_PRINT((ACPI_DB_IO,
 			  "Wrote: %8.8X width %2d   to %8.8X%8.8X (%s)\n",
-			  value, access_width, ACPI_FORMAT_UINT64(address),
+			  value, reg->bit_width, ACPI_FORMAT_UINT64(address),
 			  acpi_ut_get_region_name(reg->space_id)));
 
 	return (status);
diff --git a/drivers/acpi/acpica/nsparse.c b/drivers/acpi/acpica/nsparse.c
index f631a47724f0..1783cd7e1446 100644
--- a/drivers/acpi/acpica/nsparse.c
+++ b/drivers/acpi/acpica/nsparse.c
@@ -47,6 +47,7 @@
 #include "acparser.h"
 #include "acdispat.h"
 #include "actables.h"
+#include "acinterp.h"
 
 #define _COMPONENT          ACPI_NAMESPACE
 ACPI_MODULE_NAME("nsparse")
@@ -170,6 +171,8 @@ acpi_ns_parse_table(u32 table_index, struct acpi_namespace_node *start_node)
 
 	ACPI_FUNCTION_TRACE(ns_parse_table);
 
+	acpi_ex_enter_interpreter();
+
 	/*
 	 * AML Parse, pass 1
 	 *
@@ -185,7 +188,7 @@ acpi_ns_parse_table(u32 table_index, struct acpi_namespace_node *start_node)
 	status = acpi_ns_one_complete_parse(ACPI_IMODE_LOAD_PASS1,
 					    table_index, start_node);
 	if (ACPI_FAILURE(status)) {
-		return_ACPI_STATUS(status);
+		goto error_exit;
 	}
 
 	/*
@@ -201,8 +204,10 @@ acpi_ns_parse_table(u32 table_index, struct acpi_namespace_node *start_node)
 	status = acpi_ns_one_complete_parse(ACPI_IMODE_LOAD_PASS2,
 					    table_index, start_node);
 	if (ACPI_FAILURE(status)) {
-		return_ACPI_STATUS(status);
+		goto error_exit;
 	}
 
+error_exit:
+	acpi_ex_exit_interpreter();
 	return_ACPI_STATUS(status);
 }
diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c
index 2215fc847fa9..ac6ddcc080d4 100644
--- a/drivers/acpi/nfit.c
+++ b/drivers/acpi/nfit.c
@@ -928,7 +928,7 @@ static ssize_t format_show(struct device *dev,
 {
 	struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
 
-	return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->code));
+	return sprintf(buf, "0x%04x\n", le16_to_cpu(dcr->code));
 }
 static DEVICE_ATTR_RO(format);
 
@@ -961,8 +961,8 @@ static ssize_t format1_show(struct device *dev,
 				continue;
 			if (nfit_dcr->dcr->code == dcr->code)
 				continue;
-			rc = sprintf(buf, "%#x\n",
-					be16_to_cpu(nfit_dcr->dcr->code));
+			rc = sprintf(buf, "0x%04x\n",
+					le16_to_cpu(nfit_dcr->dcr->code));
 			break;
 		}
 		if (rc != ENXIO)
@@ -1131,11 +1131,11 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
 
 	/*
 	 * Until standardization materializes we need to consider up to 3
-	 * different command sets.  Note, that checking for function0 (bit0)
-	 * tells us if any commands are reachable through this uuid.
+	 * different command sets.  Note, that checking for zero functions
+	 * tells us if any commands might be reachable through this uuid.
 	 */
 	for (i = NVDIMM_FAMILY_INTEL; i <= NVDIMM_FAMILY_HPE2; i++)
-		if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 1))
+		if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 0))
 			break;
 
 	/* limit the supported commands to those that are publicly documented */
diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h
index 11cb38348aef..02b9ea1e8d2e 100644
--- a/drivers/acpi/nfit.h
+++ b/drivers/acpi/nfit.h
@@ -53,12 +53,12 @@ enum nfit_uuids {
 };
 
 /*
- * Region format interface codes are stored as an array of bytes in the
- * NFIT DIMM Control Region structure
+ * Region format interface codes are stored with the interface as the
+ * LSB and the function as the MSB.
  */
-#define NFIT_FIC_BYTE cpu_to_be16(0x101) /* byte-addressable energy backed */
-#define NFIT_FIC_BLK cpu_to_be16(0x201) /* block-addressable non-energy backed */
-#define NFIT_FIC_BYTEN cpu_to_be16(0x301) /* byte-addressable non-energy backed */
+#define NFIT_FIC_BYTE cpu_to_le16(0x101) /* byte-addressable energy backed */
+#define NFIT_FIC_BLK cpu_to_le16(0x201) /* block-addressable non-energy backed */
+#define NFIT_FIC_BYTEN cpu_to_le16(0x301) /* byte-addressable non-energy backed */
 
 enum {
 	NFIT_BLK_READ_FLUSH = 1,
diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c
index 8fc7323ed3e8..4ed4061813e6 100644
--- a/drivers/acpi/pci_link.c
+++ b/drivers/acpi/pci_link.c
@@ -839,7 +839,7 @@ void acpi_penalize_isa_irq(int irq, int active)
 {
 	if ((irq >= 0) && (irq < ARRAY_SIZE(acpi_isa_irq_penalty)))
 		acpi_isa_irq_penalty[irq] = acpi_irq_get_penalty(irq) +
-			active ? PIRQ_PENALTY_ISA_USED : PIRQ_PENALTY_PCI_USING;
+		  (active ? PIRQ_PENALTY_ISA_USED : PIRQ_PENALTY_PCI_USING);
 }
 
 bool acpi_isa_irq_available(int irq)
diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c
index 22c09952e177..b4de130f2d57 100644
--- a/drivers/acpi/utils.c
+++ b/drivers/acpi/utils.c
@@ -680,9 +680,6 @@ bool acpi_check_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 funcs)
 	u64 mask = 0;
 	union acpi_object *obj;
 
-	if (funcs == 0)
-		return false;
-
 	obj = acpi_evaluate_dsm(handle, uuid, rev, 0, NULL);
 	if (!obj)
 		return false;
@@ -695,6 +692,9 @@ bool acpi_check_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 funcs)
 			mask |= (((u64)obj->buffer.pointer[i]) << (i * 8));
 	ACPI_FREE(obj);
 
+	if (funcs == 0)
+		return true;
+
 	/*
 	 * Bit 0 indicates whether there's support for any functions other than
 	 * function 0 for the specified UUID and revision.
diff --git a/drivers/ata/ahci_seattle.c b/drivers/ata/ahci_seattle.c
index 6e702ab57220..1d31c0c0fc20 100644
--- a/drivers/ata/ahci_seattle.c
+++ b/drivers/ata/ahci_seattle.c
@@ -137,7 +137,7 @@ static const struct ata_port_info *ahci_seattle_get_port_info(
 	u32 val;
 
 	plat_data = devm_kzalloc(dev, sizeof(*plat_data), GFP_KERNEL);
-	if (IS_ERR(plat_data))
+	if (!plat_data)
 		return &ahci_port_info;
 
 	plat_data->sgpio_ctrl = devm_ioremap_resource(dev,
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 61dc7a99e89a..c6f017458958 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -606,7 +606,7 @@ void ata_scsi_error(struct Scsi_Host *host)
 	ata_scsi_port_error_handler(host, ap);
 
 	/* finish or retry handled scmd's and clean up */
-	WARN_ON(host->host_failed || !list_empty(&eh_work_q));
+	WARN_ON(!list_empty(&eh_work_q));
 
 	DPRINTK("EXIT\n");
 }
diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c
index bd74ee555278..745489a1c86a 100644
--- a/drivers/ata/sata_mv.c
+++ b/drivers/ata/sata_mv.c
@@ -986,7 +986,7 @@ static inline void mv_write_cached_reg(void __iomem *addr, u32 *old, u32 new)
 		 * Looks like a lot of fuss, but it avoids an unnecessary
 		 * +1 usec read-after-write delay for unaffected registers.
 		 */
-		laddr = (long)addr & 0xffff;
+		laddr = (unsigned long)addr & 0xffff;
 		if (laddr >= 0x300 && laddr <= 0x33c) {
 			laddr &= 0x000f;
 			if (laddr == 0x4 || laddr == 0xc) {
diff --git a/drivers/atm/horizon.c b/drivers/atm/horizon.c
index 527bbd595e37..5fc81e240c24 100644
--- a/drivers/atm/horizon.c
+++ b/drivers/atm/horizon.c
@@ -2795,9 +2795,7 @@ static int hrz_probe(struct pci_dev *pci_dev,
 	dev->atm_dev->ci_range.vpi_bits = vpi_bits;
 	dev->atm_dev->ci_range.vci_bits = 10-vpi_bits;
 
-	init_timer(&dev->housekeeping);
-	dev->housekeeping.function = do_housekeeping;
-	dev->housekeeping.data = (unsigned long) dev;
+	setup_timer(&dev->housekeeping, do_housekeeping, (unsigned long) dev);
 	mod_timer(&dev->housekeeping, jiffies);
 
 out:
diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index 6b2a84e7f2be..2609ba20b396 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -10,7 +10,7 @@ obj-$(CONFIG_DMA_CMA) += dma-contiguous.o
 obj-y			+= power/
 obj-$(CONFIG_HAS_DMA)	+= dma-mapping.o
 obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
-obj-$(CONFIG_ISA)	+= isa.o
+obj-$(CONFIG_ISA_BUS_API)	+= isa.o
 obj-$(CONFIG_FW_LOADER)	+= firmware_class.o
 obj-$(CONFIG_NUMA)	+= node.o
 obj-$(CONFIG_MEMORY_HOTPLUG_SPARSE) += memory.o
diff --git a/drivers/base/isa.c b/drivers/base/isa.c
index 91dba65d7264..cd6ccdcf9df0 100644
--- a/drivers/base/isa.c
+++ b/drivers/base/isa.c
@@ -180,4 +180,4 @@ static int __init isa_bus_init(void)
 	return error;
 }
 
-device_initcall(isa_bus_init);
+postcore_initcall(isa_bus_init);
diff --git a/drivers/base/module.c b/drivers/base/module.c
index db930d3ee312..2a215780eda2 100644
--- a/drivers/base/module.c
+++ b/drivers/base/module.c
@@ -24,10 +24,12 @@ static char *make_driver_name(struct device_driver *drv)
 
 static void module_create_drivers_dir(struct module_kobject *mk)
 {
-	if (!mk || mk->drivers_dir)
-		return;
+	static DEFINE_MUTEX(drivers_dir_mutex);
 
-	mk->drivers_dir = kobject_create_and_add("drivers", &mk->kobj);
+	mutex_lock(&drivers_dir_mutex);
+	if (mk && !mk->drivers_dir)
+		mk->drivers_dir = kobject_create_and_add("drivers", &mk->kobj);
+	mutex_unlock(&drivers_dir_mutex);
 }
 
 void module_add_driver(struct module *mod, struct device_driver *drv)
diff --git a/drivers/base/power/opp/cpu.c b/drivers/base/power/opp/cpu.c
index 83d6e7ba1a34..8c3434bdb26d 100644
--- a/drivers/base/power/opp/cpu.c
+++ b/drivers/base/power/opp/cpu.c
@@ -211,7 +211,7 @@ int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev,
 		}
 
 		/* Mark opp-table as multiple CPUs are sharing it now */
-		opp_table->shared_opp = true;
+		opp_table->shared_opp = OPP_TABLE_ACCESS_SHARED;
 	}
 unlock:
 	mutex_unlock(&opp_table_lock);
@@ -227,7 +227,8 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_set_sharing_cpus);
  *
  * This updates the @cpumask with CPUs that are sharing OPPs with @cpu_dev.
  *
- * Returns -ENODEV if OPP table isn't already present.
+ * Returns -ENODEV if OPP table isn't already present and -EINVAL if the OPP
+ * table's status is access-unknown.
  *
  * Locking: The internal opp_table and opp structures are RCU protected.
  * Hence this function internally uses RCU updater strategy with mutex locks
@@ -249,9 +250,14 @@ int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask)
 		goto unlock;
 	}
 
+	if (opp_table->shared_opp == OPP_TABLE_ACCESS_UNKNOWN) {
+		ret = -EINVAL;
+		goto unlock;
+	}
+
 	cpumask_clear(cpumask);
 
-	if (opp_table->shared_opp) {
+	if (opp_table->shared_opp == OPP_TABLE_ACCESS_SHARED) {
 		list_for_each_entry(opp_dev, &opp_table->dev_list, node)
 			cpumask_set_cpu(opp_dev->dev->id, cpumask);
 	} else {
diff --git a/drivers/base/power/opp/of.c b/drivers/base/power/opp/of.c
index 94d2010558e3..1dfd3dd92624 100644
--- a/drivers/base/power/opp/of.c
+++ b/drivers/base/power/opp/of.c
@@ -34,7 +34,10 @@ static struct opp_table *_managed_opp(const struct device_node *np)
 			 * But the OPPs will be considered as shared only if the
 			 * OPP table contains a "opp-shared" property.
 			 */
-			return opp_table->shared_opp ? opp_table : NULL;
+			if (opp_table->shared_opp == OPP_TABLE_ACCESS_SHARED)
+				return opp_table;
+
+			return NULL;
 		}
 	}
 
@@ -353,7 +356,10 @@ static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np)
 	}
 
 	opp_table->np = opp_np;
-	opp_table->shared_opp = of_property_read_bool(opp_np, "opp-shared");
+	if (of_property_read_bool(opp_np, "opp-shared"))
+		opp_table->shared_opp = OPP_TABLE_ACCESS_SHARED;
+	else
+		opp_table->shared_opp = OPP_TABLE_ACCESS_EXCLUSIVE;
 
 	mutex_unlock(&opp_table_lock);
 
diff --git a/drivers/base/power/opp/opp.h b/drivers/base/power/opp/opp.h
index 20f3be22e060..fabd5ca1a083 100644
--- a/drivers/base/power/opp/opp.h
+++ b/drivers/base/power/opp/opp.h
@@ -119,6 +119,12 @@ struct opp_device {
 #endif
 };
 
+enum opp_table_access {
+	OPP_TABLE_ACCESS_UNKNOWN = 0,
+	OPP_TABLE_ACCESS_EXCLUSIVE = 1,
+	OPP_TABLE_ACCESS_SHARED = 2,
+};
+
 /**
  * struct opp_table - Device opp structure
  * @node:	table node - contains the devices with OPPs that
@@ -166,7 +172,7 @@ struct opp_table {
 	/* For backward compatibility with v1 bindings */
 	unsigned int voltage_tolerance_v1;
 
-	bool shared_opp;
+	enum opp_table_access shared_opp;
 	struct dev_pm_opp *suspend_opp;
 
 	unsigned int *supported_hw;
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index d597e432e195..ab19adb07a12 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -1750,7 +1750,7 @@ aoecmd_init(void)
 	int ret;
 
 	/* get_zeroed_page returns page with ref count 1 */
-	p = (void *) get_zeroed_page(GFP_KERNEL | __GFP_REPEAT);
+	p = (void *) get_zeroed_page(GFP_KERNEL);
 	if (!p)
 		return -ENOMEM;
 	empty_page = virt_to_page(p);
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 31e73a7a40f2..6a48ed41963f 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -941,7 +941,7 @@ static int nbd_dev_dbg_init(struct nbd_device *nbd)
 	debugfs_create_u64("size_bytes", 0444, dir, &nbd->bytesize);
 	debugfs_create_u32("timeout", 0444, dir, &nbd->xmit_timeout);
 	debugfs_create_u32("blocksize", 0444, dir, &nbd->blksize);
-	debugfs_create_file("flags", 0444, dir, &nbd, &nbd_dbg_flags_ops);
+	debugfs_create_file("flags", 0444, dir, nbd, &nbd_dbg_flags_ops);
 
 	return 0;
 }
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index ca13df854639..2e6d1e9c3345 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -874,8 +874,12 @@ static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
 			  const struct blk_mq_queue_data *qd)
 {
 	unsigned long flags;
-	struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)hctx->driver_data;
+	int qid = hctx->queue_num;
+	struct blkfront_info *info = hctx->queue->queuedata;
+	struct blkfront_ring_info *rinfo = NULL;
 
+	BUG_ON(info->nr_rings <= qid);
+	rinfo = &info->rinfo[qid];
 	blk_mq_start_request(qd->rq);
 	spin_lock_irqsave(&rinfo->ring_lock, flags);
 	if (RING_FULL(&rinfo->ring))
@@ -901,20 +905,9 @@ out_busy:
 	return BLK_MQ_RQ_QUEUE_BUSY;
 }
 
-static int blk_mq_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
-			    unsigned int index)
-{
-	struct blkfront_info *info = (struct blkfront_info *)data;
-
-	BUG_ON(info->nr_rings <= index);
-	hctx->driver_data = &info->rinfo[index];
-	return 0;
-}
-
 static struct blk_mq_ops blkfront_mq_ops = {
 	.queue_rq = blkif_queue_rq,
 	.map_queue = blk_mq_map_queue,
-	.init_hctx = blk_mq_init_hctx,
 };
 
 static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
@@ -950,6 +943,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
 		return PTR_ERR(rq);
 	}
 
+	rq->queuedata = info;
 	queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
 
 	if (info->feature_discard) {
@@ -2149,6 +2143,8 @@ static int blkfront_resume(struct xenbus_device *dev)
 		return err;
 
 	err = talk_to_blkback(dev, info);
+	if (!err)
+		blk_mq_update_nr_hw_queues(&info->tag_set, info->nr_rings);
 
 	/*
 	 * We have to wait for the backend to switch to
@@ -2485,10 +2481,23 @@ static void blkback_changed(struct xenbus_device *dev,
 		break;
 
 	case XenbusStateConnected:
-		if (dev->state != XenbusStateInitialised) {
+		/*
+		 * talk_to_blkback sets state to XenbusStateInitialised
+		 * and blkfront_connect sets it to XenbusStateConnected
+		 * (if connection went OK).
+		 *
+		 * If the backend (or toolstack) decides to poke at backend
+		 * state (and re-trigger the watch by setting the state repeatedly
+		 * to XenbusStateConnected (4)) we need to deal with this.
+		 * This is allowed as this is used to communicate to the guest
+		 * that the size of disk has changed!
+		 */
+		if ((dev->state != XenbusStateInitialised) &&
+		    (dev->state != XenbusStateConnected)) {
 			if (talk_to_blkback(dev, info))
 				break;
 		}
+
 		blkfront_connect(info);
 		break;
 
diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c
index 25894687c168..fadba88745dc 100644
--- a/drivers/bluetooth/ath3k.c
+++ b/drivers/bluetooth/ath3k.c
@@ -123,6 +123,7 @@ static const struct usb_device_id ath3k_table[] = {
 	{ USB_DEVICE(0x13d3, 0x3472) },
 	{ USB_DEVICE(0x13d3, 0x3474) },
 	{ USB_DEVICE(0x13d3, 0x3487) },
+	{ USB_DEVICE(0x13d3, 0x3490) },
 
 	/* Atheros AR5BBU12 with sflash firmware */
 	{ USB_DEVICE(0x0489, 0xE02C) },
@@ -190,6 +191,7 @@ static const struct usb_device_id ath3k_blist_tbl[] = {
 	{ USB_DEVICE(0x13d3, 0x3472), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x13d3, 0x3474), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x13d3, 0x3487), .driver_info = BTUSB_ATH3012 },
+	{ USB_DEVICE(0x13d3, 0x3490), .driver_info = BTUSB_ATH3012 },
 
 	/* Atheros AR5BBU22 with sflash firmware */
 	{ USB_DEVICE(0x0489, 0xE036), .driver_info = BTUSB_ATH3012 },
diff --git a/drivers/bluetooth/btmrvl_main.c b/drivers/bluetooth/btmrvl_main.c
index 7ad8d61c0c61..e6a85f0e6309 100644
--- a/drivers/bluetooth/btmrvl_main.c
+++ b/drivers/bluetooth/btmrvl_main.c
@@ -138,7 +138,7 @@ int btmrvl_process_event(struct btmrvl_private *priv, struct sk_buff *skb)
 			if (event->length > 3 && event->data[3])
 				priv->btmrvl_dev.dev_type = HCI_AMP;
 			else
-				priv->btmrvl_dev.dev_type = HCI_BREDR;
+				priv->btmrvl_dev.dev_type = HCI_PRIMARY;
 
 			BT_DBG("dev_type: %d", priv->btmrvl_dev.dev_type);
 		} else if (priv->btmrvl_dev.sendcmdflag &&
diff --git a/drivers/bluetooth/btmrvl_sdio.c b/drivers/bluetooth/btmrvl_sdio.c
index f425ddf91a24..b7c3928cf494 100644
--- a/drivers/bluetooth/btmrvl_sdio.c
+++ b/drivers/bluetooth/btmrvl_sdio.c
@@ -1071,7 +1071,6 @@ static int btmrvl_sdio_host_to_card(struct btmrvl_private *priv,
 {
 	struct btmrvl_sdio_card *card = priv->btmrvl_dev.card;
 	int ret = 0;
-	int buf_block_len;
 	int blksz;
 	int i = 0;
 	u8 *buf = NULL;
@@ -1083,9 +1082,13 @@ static int btmrvl_sdio_host_to_card(struct btmrvl_private *priv,
 		return -EINVAL;
 	}
 
+	blksz = DIV_ROUND_UP(nb, SDIO_BLOCK_SIZE) * SDIO_BLOCK_SIZE;
+
 	buf = payload;
-	if ((unsigned long) payload & (BTSDIO_DMA_ALIGN - 1)) {
-		tmpbufsz = ALIGN_SZ(nb, BTSDIO_DMA_ALIGN);
+	if ((unsigned long) payload & (BTSDIO_DMA_ALIGN - 1) ||
+	    nb < blksz) {
+		tmpbufsz = ALIGN_SZ(blksz, BTSDIO_DMA_ALIGN) +
+			   BTSDIO_DMA_ALIGN;
 		tmpbuf = kzalloc(tmpbufsz, GFP_KERNEL);
 		if (!tmpbuf)
 			return -ENOMEM;
@@ -1093,15 +1096,12 @@ static int btmrvl_sdio_host_to_card(struct btmrvl_private *priv,
 		memcpy(buf, payload, nb);
 	}
 
-	blksz = SDIO_BLOCK_SIZE;
-	buf_block_len = DIV_ROUND_UP(nb, blksz);
-
 	sdio_claim_host(card->func);
 
 	do {
 		/* Transfer data to card */
 		ret = sdio_writesb(card->func, card->ioport, buf,
-				   buf_block_len * blksz);
+				   blksz);
 		if (ret < 0) {
 			i++;
 			BT_ERR("i=%d writesb failed: %d", i, ret);
diff --git a/drivers/bluetooth/btsdio.c b/drivers/bluetooth/btsdio.c
index 2b05661e3818..1cb958e199eb 100644
--- a/drivers/bluetooth/btsdio.c
+++ b/drivers/bluetooth/btsdio.c
@@ -311,7 +311,7 @@ static int btsdio_probe(struct sdio_func *func,
 	if (id->class == SDIO_CLASS_BT_AMP)
 		hdev->dev_type = HCI_AMP;
 	else
-		hdev->dev_type = HCI_BREDR;
+		hdev->dev_type = HCI_PRIMARY;
 
 	data->hdev = hdev;
 
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index a3be65e6231a..811f9b97e360 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -237,6 +237,7 @@ static const struct usb_device_id blacklist_table[] = {
 	{ USB_DEVICE(0x13d3, 0x3472), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x13d3, 0x3474), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x13d3, 0x3487), .driver_info = BTUSB_ATH3012 },
+	{ USB_DEVICE(0x13d3, 0x3490), .driver_info = BTUSB_ATH3012 },
 
 	/* Atheros AR5BBU12 with sflash firmware */
 	{ USB_DEVICE(0x0489, 0xe02c), .driver_info = BTUSB_IGNORE },
@@ -249,6 +250,7 @@ static const struct usb_device_id blacklist_table[] = {
 	{ USB_DEVICE(0x0cf3, 0xe007), .driver_info = BTUSB_QCA_ROME },
 	{ USB_DEVICE(0x0cf3, 0xe300), .driver_info = BTUSB_QCA_ROME },
 	{ USB_DEVICE(0x0cf3, 0xe360), .driver_info = BTUSB_QCA_ROME },
+	{ USB_DEVICE(0x0489, 0xe092), .driver_info = BTUSB_QCA_ROME },
 
 	/* Broadcom BCM2035 */
 	{ USB_DEVICE(0x0a5c, 0x2009), .driver_info = BTUSB_BCM92035 },
@@ -314,6 +316,7 @@ static const struct usb_device_id blacklist_table[] = {
 	{ USB_DEVICE(0x8087, 0x07dc), .driver_info = BTUSB_INTEL },
 	{ USB_DEVICE(0x8087, 0x0a2a), .driver_info = BTUSB_INTEL },
 	{ USB_DEVICE(0x8087, 0x0a2b), .driver_info = BTUSB_INTEL_NEW },
+	{ USB_DEVICE(0x8087, 0x0aa7), .driver_info = BTUSB_INTEL },
 
 	/* Other Intel Bluetooth devices */
 	{ USB_VENDOR_AND_INTERFACE_INFO(0x8087, 0xe0, 0x01, 0x01),
@@ -2103,10 +2106,14 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
 	/* With this Intel bootloader only the hardware variant and device
 	 * revision information are used to select the right firmware.
 	 *
-	 * Currently this bootloader support is limited to hardware variant
-	 * iBT 3.0 (LnP/SfP) which is identified by the value 11 (0x0b).
+	 * The firmware filename is ibt-<hw_variant>-<dev_revid>.sfi.
+	 *
+	 * Currently the supported hardware variants are:
+	 *   11 (0x0b) for iBT3.0 (LnP/SfP)
+	 *   12 (0x0c) for iBT3.5 (WsP)
 	 */
-	snprintf(fwname, sizeof(fwname), "intel/ibt-11-%u.sfi",
+	snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u.sfi",
+		 le16_to_cpu(ver.hw_variant),
 		 le16_to_cpu(params->dev_revid));
 
 	err = request_firmware(&fw, fwname, &hdev->dev);
@@ -2122,7 +2129,8 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
 	/* Save the DDC file name for later use to apply once the firmware
 	 * downloading is done.
 	 */
-	snprintf(fwname, sizeof(fwname), "intel/ibt-11-%u.ddc",
+	snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u.ddc",
+		 le16_to_cpu(ver.hw_variant),
 		 le16_to_cpu(params->dev_revid));
 
 	kfree_skb(skb);
@@ -2825,7 +2833,7 @@ static int btusb_probe(struct usb_interface *intf,
 	if (id->driver_info & BTUSB_AMP)
 		hdev->dev_type = HCI_AMP;
 	else
-		hdev->dev_type = HCI_BREDR;
+		hdev->dev_type = HCI_PRIMARY;
 
 	data->hdev = hdev;
 
diff --git a/drivers/bluetooth/hci_intel.c b/drivers/bluetooth/hci_intel.c
index f6f2b01a1fea..ed0a4201b551 100644
--- a/drivers/bluetooth/hci_intel.c
+++ b/drivers/bluetooth/hci_intel.c
@@ -537,9 +537,7 @@ static int intel_setup(struct hci_uart *hu)
 {
 	static const u8 reset_param[] = { 0x00, 0x01, 0x00, 0x01,
 					  0x00, 0x08, 0x04, 0x00 };
-	static const u8 lpm_param[] = { 0x03, 0x07, 0x01, 0x0b };
 	struct intel_data *intel = hu->priv;
-	struct intel_device *idev = NULL;
 	struct hci_dev *hdev = hu->hdev;
 	struct sk_buff *skb;
 	struct intel_version ver;
@@ -884,35 +882,23 @@ done:
 
 	bt_dev_info(hdev, "Device booted in %llu usecs", duration);
 
-	/* Enable LPM if matching pdev with wakeup enabled */
+	/* Enable LPM if matching pdev with wakeup enabled, set TX active
+	 * until further LPM TX notification.
+	 */
 	mutex_lock(&intel_device_list_lock);
 	list_for_each(p, &intel_device_list) {
 		struct intel_device *dev = list_entry(p, struct intel_device,
 						      list);
 		if (hu->tty->dev->parent == dev->pdev->dev.parent) {
-			if (device_may_wakeup(&dev->pdev->dev))
-				idev = dev;
+			if (device_may_wakeup(&dev->pdev->dev)) {
+				set_bit(STATE_LPM_ENABLED, &intel->flags);
+				set_bit(STATE_TX_ACTIVE, &intel->flags);
+			}
 			break;
 		}
 	}
 	mutex_unlock(&intel_device_list_lock);
 
-	if (!idev)
-		goto no_lpm;
-
-	bt_dev_info(hdev, "Enabling LPM");
-
-	skb = __hci_cmd_sync(hdev, 0xfc8b, sizeof(lpm_param), lpm_param,
-			     HCI_CMD_TIMEOUT);
-	if (IS_ERR(skb)) {
-		bt_dev_err(hdev, "Failed to enable LPM");
-		goto no_lpm;
-	}
-	kfree_skb(skb);
-
-	set_bit(STATE_LPM_ENABLED, &intel->flags);
-
-no_lpm:
 	/* Ignore errors, device can work without DDC parameters */
 	btintel_load_ddc_config(hdev, fwname);
 
diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c
index 49b3e1e2d236..dda97398c59a 100644
--- a/drivers/bluetooth/hci_ldisc.c
+++ b/drivers/bluetooth/hci_ldisc.c
@@ -609,7 +609,7 @@ static int hci_uart_register_dev(struct hci_uart *hu)
 	if (test_bit(HCI_UART_CREATE_AMP, &hu->hdev_flags))
 		hdev->dev_type = HCI_AMP;
 	else
-		hdev->dev_type = HCI_BREDR;
+		hdev->dev_type = HCI_PRIMARY;
 
 	if (test_bit(HCI_UART_INIT_PENDING, &hu->hdev_flags))
 		return 0;
diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c
index aba31210c802..3ff229b2e7f3 100644
--- a/drivers/bluetooth/hci_vhci.c
+++ b/drivers/bluetooth/hci_vhci.c
@@ -97,10 +97,10 @@ static int __vhci_create_device(struct vhci_data *data, __u8 opcode)
 	if (data->hdev)
 		return -EBADFD;
 
-	/* bits 0-1 are dev_type (BR/EDR or AMP) */
+	/* bits 0-1 are dev_type (Primary or AMP) */
 	dev_type = opcode & 0x03;
 
-	if (dev_type != HCI_BREDR && dev_type != HCI_AMP)
+	if (dev_type != HCI_PRIMARY && dev_type != HCI_AMP)
 		return -EINVAL;
 
 	/* bits 2-5 are reserved (must be zero) */
@@ -316,7 +316,7 @@ static void vhci_open_timeout(struct work_struct *work)
 	struct vhci_data *data = container_of(work, struct vhci_data,
 					      open_timeout.work);
 
-	vhci_create_device(data, amp ? HCI_AMP : HCI_BREDR);
+	vhci_create_device(data, amp ? HCI_AMP : HCI_PRIMARY);
 }
 
 static int vhci_open(struct inode *inode, struct file *file)
diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index 94fb407d8561..44b1bd6baa38 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -3820,6 +3820,7 @@ static void handle_new_recv_msgs(ipmi_smi_t intf)
 	while (!list_empty(&intf->waiting_rcv_msgs)) {
 		smi_msg = list_entry(intf->waiting_rcv_msgs.next,
 				     struct ipmi_smi_msg, link);
+		list_del(&smi_msg->link);
 		if (!run_to_completion)
 			spin_unlock_irqrestore(&intf->waiting_rcv_msgs_lock,
 					       flags);
@@ -3829,11 +3830,14 @@ static void handle_new_recv_msgs(ipmi_smi_t intf)
 		if (rv > 0) {
 			/*
 			 * To preserve message order, quit if we
-			 * can't handle a message.
+			 * can't handle a message.  Add the message
+			 * back at the head, this is safe because this
+			 * tasklet is the only thing that pulls the
+			 * messages.
 			 */
+			list_add(&smi_msg->link, &intf->waiting_rcv_msgs);
 			break;
 		} else {
-			list_del(&smi_msg->link);
 			if (rv == 0)
 				/* Message handled */
 				ipmi_free_smi_msg(smi_msg);
diff --git a/drivers/clk/clk-oxnas.c b/drivers/clk/clk-oxnas.c
index efba7d4dbcfc..79bcb2e42060 100644
--- a/drivers/clk/clk-oxnas.c
+++ b/drivers/clk/clk-oxnas.c
@@ -144,9 +144,9 @@ static int oxnas_stdclk_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	regmap = syscon_node_to_regmap(of_get_parent(np));
-	if (!regmap) {
+	if (IS_ERR(regmap)) {
 		dev_err(&pdev->dev, "failed to have parent regmap\n");
-		return -EINVAL;
+		return PTR_ERR(regmap);
 	}
 
 	for (i = 0; i < ARRAY_SIZE(clk_oxnas_init); i++) {
diff --git a/drivers/clk/rockchip/clk-cpu.c b/drivers/clk/rockchip/clk-cpu.c
index 4bb130cd0062..05b3d73bfefa 100644
--- a/drivers/clk/rockchip/clk-cpu.c
+++ b/drivers/clk/rockchip/clk-cpu.c
@@ -321,9 +321,9 @@ struct clk *rockchip_clk_register_cpuclk(const char *name,
 	}
 
 	cclk = clk_register(NULL, &cpuclk->hw);
-	if (IS_ERR(clk)) {
+	if (IS_ERR(cclk)) {
 		pr_err("%s: could not register cpuclk %s\n", __func__,	name);
-		ret = PTR_ERR(clk);
+		ret = PTR_ERR(cclk);
 		goto free_rate_table;
 	}
 
diff --git a/drivers/clk/rockchip/clk-mmc-phase.c b/drivers/clk/rockchip/clk-mmc-phase.c
index bc856f21f6b2..077fcdc7908b 100644
--- a/drivers/clk/rockchip/clk-mmc-phase.c
+++ b/drivers/clk/rockchip/clk-mmc-phase.c
@@ -41,8 +41,6 @@ static unsigned long rockchip_mmc_recalc(struct clk_hw *hw,
 #define ROCKCHIP_MMC_DEGREE_MASK 0x3
 #define ROCKCHIP_MMC_DELAYNUM_OFFSET 2
 #define ROCKCHIP_MMC_DELAYNUM_MASK (0xff << ROCKCHIP_MMC_DELAYNUM_OFFSET)
-#define ROCKCHIP_MMC_INIT_STATE_RESET 0x1
-#define ROCKCHIP_MMC_INIT_STATE_SHIFT 1
 
 #define PSECS_PER_SEC 1000000000000LL
 
@@ -154,6 +152,7 @@ struct clk *rockchip_clk_register_mmc(const char *name,
 		return ERR_PTR(-ENOMEM);
 
 	init.name = name;
+	init.flags = 0;
 	init.num_parents = num_parents;
 	init.parent_names = parent_names;
 	init.ops = &rockchip_mmc_clk_ops;
@@ -162,15 +161,6 @@ struct clk *rockchip_clk_register_mmc(const char *name,
 	mmc_clock->reg = reg;
 	mmc_clock->shift = shift;
 
-	/*
-	 * Assert init_state to soft reset the CLKGEN
-	 * for mmc tuning phase and degree
-	 */
-	if (mmc_clock->shift == ROCKCHIP_MMC_INIT_STATE_SHIFT)
-		writel(HIWORD_UPDATE(ROCKCHIP_MMC_INIT_STATE_RESET,
-				     ROCKCHIP_MMC_INIT_STATE_RESET,
-				     mmc_clock->shift), mmc_clock->reg);
-
 	clk = clk_register(NULL, &mmc_clock->hw);
 	if (IS_ERR(clk))
 		kfree(mmc_clock);
diff --git a/drivers/clk/rockchip/clk-rk3399.c b/drivers/clk/rockchip/clk-rk3399.c
index 291543f52caa..8059a8d3ea36 100644
--- a/drivers/clk/rockchip/clk-rk3399.c
+++ b/drivers/clk/rockchip/clk-rk3399.c
@@ -832,9 +832,9 @@ static struct rockchip_clk_branch rk3399_clk_branches[] __initdata = {
 			RK3399_CLKGATE_CON(13), 1, GFLAGS),
 
 	/* perihp */
-	GATE(0, "cpll_aclk_perihp_src", "gpll", CLK_IGNORE_UNUSED,
+	GATE(0, "cpll_aclk_perihp_src", "cpll", CLK_IGNORE_UNUSED,
 			RK3399_CLKGATE_CON(5), 0, GFLAGS),
-	GATE(0, "gpll_aclk_perihp_src", "cpll", CLK_IGNORE_UNUSED,
+	GATE(0, "gpll_aclk_perihp_src", "gpll", CLK_IGNORE_UNUSED,
 			RK3399_CLKGATE_CON(5), 1, GFLAGS),
 	COMPOSITE(ACLK_PERIHP, "aclk_perihp", mux_aclk_perihp_p, CLK_IGNORE_UNUSED,
 			RK3399_CLKSEL_CON(14), 7, 1, MFLAGS, 0, 5, DFLAGS,
@@ -1466,6 +1466,8 @@ static struct rockchip_clk_branch rk3399_clk_pmu_branches[] __initdata = {
 
 static const char *const rk3399_cru_critical_clocks[] __initconst = {
 	"aclk_cci_pre",
+	"aclk_gic",
+	"aclk_gic_noc",
 	"pclk_perilp0",
 	"pclk_perilp0",
 	"hclk_perilp0",
@@ -1508,6 +1510,7 @@ static void __init rk3399_clk_init(struct device_node *np)
 	ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS);
 	if (IS_ERR(ctx)) {
 		pr_err("%s: rockchip clk init failed\n", __func__);
+		iounmap(reg_base);
 		return;
 	}
 
@@ -1553,6 +1556,7 @@ static void __init rk3399_pmu_clk_init(struct device_node *np)
 	ctx = rockchip_clk_init(np, reg_base, CLKPMU_NR_CLKS);
 	if (IS_ERR(ctx)) {
 		pr_err("%s: rockchip pmu clk init failed\n", __func__);
+		iounmap(reg_base);
 		return;
 	}
 
diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c
index 15d06fcf0b50..a782ce87715c 100644
--- a/drivers/connector/cn_proc.c
+++ b/drivers/connector/cn_proc.c
@@ -22,7 +22,6 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 
-#include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/ktime.h>
 #include <linux/init.h>
@@ -56,11 +55,21 @@ static struct cb_id cn_proc_event_id = { CN_IDX_PROC, CN_VAL_PROC };
 /* proc_event_counts is used as the sequence number of the netlink message */
 static DEFINE_PER_CPU(__u32, proc_event_counts) = { 0 };
 
-static inline void get_seq(__u32 *ts, int *cpu)
+static inline void send_msg(struct cn_msg *msg)
 {
 	preempt_disable();
-	*ts = __this_cpu_inc_return(proc_event_counts) - 1;
-	*cpu = smp_processor_id();
+
+	msg->seq = __this_cpu_inc_return(proc_event_counts) - 1;
+	((struct proc_event *)msg->data)->cpu = smp_processor_id();
+
+	/*
+	 * Preemption remains disabled during send to ensure the messages are
+	 * ordered according to their sequence numbers.
+	 *
+	 * If cn_netlink_send() fails, the data is not sent.
+	 */
+	cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_NOWAIT);
+
 	preempt_enable();
 }
 
@@ -77,7 +86,6 @@ void proc_fork_connector(struct task_struct *task)
 	msg = buffer_to_cn_msg(buffer);
 	ev = (struct proc_event *)msg->data;
 	memset(&ev->event_data, 0, sizeof(ev->event_data));
-	get_seq(&msg->seq, &ev->cpu);
 	ev->timestamp_ns = ktime_get_ns();
 	ev->what = PROC_EVENT_FORK;
 	rcu_read_lock();
@@ -92,8 +100,7 @@ void proc_fork_connector(struct task_struct *task)
 	msg->ack = 0; /* not used */
 	msg->len = sizeof(*ev);
 	msg->flags = 0; /* not used */
-	/*  If cn_netlink_send() failed, the data is not sent */
-	cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL);
+	send_msg(msg);
 }
 
 void proc_exec_connector(struct task_struct *task)
@@ -108,7 +115,6 @@ void proc_exec_connector(struct task_struct *task)
 	msg = buffer_to_cn_msg(buffer);
 	ev = (struct proc_event *)msg->data;
 	memset(&ev->event_data, 0, sizeof(ev->event_data));
-	get_seq(&msg->seq, &ev->cpu);
 	ev->timestamp_ns = ktime_get_ns();
 	ev->what = PROC_EVENT_EXEC;
 	ev->event_data.exec.process_pid = task->pid;
@@ -118,7 +124,7 @@ void proc_exec_connector(struct task_struct *task)
 	msg->ack = 0; /* not used */
 	msg->len = sizeof(*ev);
 	msg->flags = 0; /* not used */
-	cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL);
+	send_msg(msg);
 }
 
 void proc_id_connector(struct task_struct *task, int which_id)
@@ -150,14 +156,13 @@ void proc_id_connector(struct task_struct *task, int which_id)
 		return;
 	}
 	rcu_read_unlock();
-	get_seq(&msg->seq, &ev->cpu);
 	ev->timestamp_ns = ktime_get_ns();
 
 	memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
 	msg->ack = 0; /* not used */
 	msg->len = sizeof(*ev);
 	msg->flags = 0; /* not used */
-	cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL);
+	send_msg(msg);
 }
 
 void proc_sid_connector(struct task_struct *task)
@@ -172,7 +177,6 @@ void proc_sid_connector(struct task_struct *task)
 	msg = buffer_to_cn_msg(buffer);
 	ev = (struct proc_event *)msg->data;
 	memset(&ev->event_data, 0, sizeof(ev->event_data));
-	get_seq(&msg->seq, &ev->cpu);
 	ev->timestamp_ns = ktime_get_ns();
 	ev->what = PROC_EVENT_SID;
 	ev->event_data.sid.process_pid = task->pid;
@@ -182,7 +186,7 @@ void proc_sid_connector(struct task_struct *task)
 	msg->ack = 0; /* not used */
 	msg->len = sizeof(*ev);
 	msg->flags = 0; /* not used */
-	cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL);
+	send_msg(msg);
 }
 
 void proc_ptrace_connector(struct task_struct *task, int ptrace_id)
@@ -197,7 +201,6 @@ void proc_ptrace_connector(struct task_struct *task, int ptrace_id)
 	msg = buffer_to_cn_msg(buffer);
 	ev = (struct proc_event *)msg->data;
 	memset(&ev->event_data, 0, sizeof(ev->event_data));
-	get_seq(&msg->seq, &ev->cpu);
 	ev->timestamp_ns = ktime_get_ns();
 	ev->what = PROC_EVENT_PTRACE;
 	ev->event_data.ptrace.process_pid  = task->pid;
@@ -215,7 +218,7 @@ void proc_ptrace_connector(struct task_struct *task, int ptrace_id)
 	msg->ack = 0; /* not used */
 	msg->len = sizeof(*ev);
 	msg->flags = 0; /* not used */
-	cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL);
+	send_msg(msg);
 }
 
 void proc_comm_connector(struct task_struct *task)
@@ -230,7 +233,6 @@ void proc_comm_connector(struct task_struct *task)
 	msg = buffer_to_cn_msg(buffer);
 	ev = (struct proc_event *)msg->data;
 	memset(&ev->event_data, 0, sizeof(ev->event_data));
-	get_seq(&msg->seq, &ev->cpu);
 	ev->timestamp_ns = ktime_get_ns();
 	ev->what = PROC_EVENT_COMM;
 	ev->event_data.comm.process_pid  = task->pid;
@@ -241,7 +243,7 @@ void proc_comm_connector(struct task_struct *task)
 	msg->ack = 0; /* not used */
 	msg->len = sizeof(*ev);
 	msg->flags = 0; /* not used */
-	cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL);
+	send_msg(msg);
 }
 
 void proc_coredump_connector(struct task_struct *task)
@@ -256,7 +258,6 @@ void proc_coredump_connector(struct task_struct *task)
 	msg = buffer_to_cn_msg(buffer);
 	ev = (struct proc_event *)msg->data;
 	memset(&ev->event_data, 0, sizeof(ev->event_data));
-	get_seq(&msg->seq, &ev->cpu);
 	ev->timestamp_ns = ktime_get_ns();
 	ev->what = PROC_EVENT_COREDUMP;
 	ev->event_data.coredump.process_pid = task->pid;
@@ -266,7 +267,7 @@ void proc_coredump_connector(struct task_struct *task)
 	msg->ack = 0; /* not used */
 	msg->len = sizeof(*ev);
 	msg->flags = 0; /* not used */
-	cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL);
+	send_msg(msg);
 }
 
 void proc_exit_connector(struct task_struct *task)
@@ -281,7 +282,6 @@ void proc_exit_connector(struct task_struct *task)
 	msg = buffer_to_cn_msg(buffer);
 	ev = (struct proc_event *)msg->data;
 	memset(&ev->event_data, 0, sizeof(ev->event_data));
-	get_seq(&msg->seq, &ev->cpu);
 	ev->timestamp_ns = ktime_get_ns();
 	ev->what = PROC_EVENT_EXIT;
 	ev->event_data.exit.process_pid = task->pid;
@@ -293,7 +293,7 @@ void proc_exit_connector(struct task_struct *task)
 	msg->ack = 0; /* not used */
 	msg->len = sizeof(*ev);
 	msg->flags = 0; /* not used */
-	cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL);
+	send_msg(msg);
 }
 
 /*
@@ -325,7 +325,7 @@ static void cn_proc_ack(int err, int rcvd_seq, int rcvd_ack)
 	msg->ack = rcvd_ack + 1;
 	msg->len = sizeof(*ev);
 	msg->flags = 0; /* not used */
-	cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL);
+	send_msg(msg);
 }
 
 /**
@@ -389,5 +389,4 @@ static int __init cn_proc_init(void)
 	}
 	return 0;
 }
-
-module_init(cn_proc_init);
+device_initcall(cn_proc_init);
diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index 3646b143bbf5..0bb44d5b5df4 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -79,15 +79,16 @@ static const struct of_device_id machines[] __initconst = {
 static int __init cpufreq_dt_platdev_init(void)
 {
 	struct device_node *np = of_find_node_by_path("/");
+	const struct of_device_id *match;
 
 	if (!np)
 		return -ENODEV;
 
-	if (!of_match_node(machines, np))
+	match = of_match_node(machines, np);
+	of_node_put(np);
+	if (!match)
 		return -ENODEV;
 
-	of_node_put(of_root);
-
 	return PTR_ERR_OR_ZERO(platform_device_register_simple("cpufreq-dt", -1,
 							       NULL, 0));
 }
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 9009295f5134..5617c7087d77 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -2261,6 +2261,10 @@ int cpufreq_update_policy(unsigned int cpu)
 	 * -> ask driver for current freq and notify governors about a change
 	 */
 	if (cpufreq_driver->get && !cpufreq_driver->setpolicy) {
+		if (cpufreq_suspended) {
+			ret = -EAGAIN;
+			goto unlock;
+		}
 		new_policy.cur = cpufreq_update_current_freq(policy);
 		if (WARN_ON(!new_policy.cur)) {
 			ret = -EIO;
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index ee367e9b7d2e..1fa1a32928d7 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -372,26 +372,9 @@ static bool intel_pstate_get_ppc_enable_status(void)
 	return acpi_ppc;
 }
 
-/*
- * The max target pstate ratio is a 8 bit value in both PLATFORM_INFO MSR and
- * in TURBO_RATIO_LIMIT MSR, which pstate driver stores in max_pstate and
- * max_turbo_pstate fields. The PERF_CTL MSR contains 16 bit value for P state
- * ratio, out of it only high 8 bits are used. For example 0x1700 is setting
- * target ratio 0x17. The _PSS control value stores in a format which can be
- * directly written to PERF_CTL MSR. But in intel_pstate driver this shift
- * occurs during write to PERF_CTL (E.g. for cores core_set_pstate()).
- * This function converts the _PSS control value to intel pstate driver format
- * for comparison and assignment.
- */
-static int convert_to_native_pstate_format(struct cpudata *cpu, int index)
-{
-	return cpu->acpi_perf_data.states[index].control >> 8;
-}
-
 static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
 {
 	struct cpudata *cpu;
-	int turbo_pss_ctl;
 	int ret;
 	int i;
 
@@ -441,11 +424,10 @@ static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
 	 * max frequency, which will cause a reduced performance as
 	 * this driver uses real max turbo frequency as the max
 	 * frequency. So correct this frequency in _PSS table to
-	 * correct max turbo frequency based on the turbo ratio.
+	 * correct max turbo frequency based on the turbo state.
 	 * Also need to convert to MHz as _PSS freq is in MHz.
 	 */
-	turbo_pss_ctl = convert_to_native_pstate_format(cpu, 0);
-	if (turbo_pss_ctl > cpu->pstate.max_pstate)
+	if (!limits->turbo_disabled)
 		cpu->acpi_perf_data.states[0].core_frequency =
 					policy->cpuinfo.max_freq / 1000;
 	cpu->valid_pss_table = true;
@@ -1418,6 +1400,9 @@ static void intel_pstate_set_update_util_hook(unsigned int cpu_num)
 {
 	struct cpudata *cpu = all_cpu_data[cpu_num];
 
+	if (cpu->update_util_set)
+		return;
+
 	/* Prevent intel_pstate_update_util() from using stale data. */
 	cpu->sample.time = 0;
 	cpufreq_add_update_util_hook(cpu_num, &cpu->update_util,
@@ -1458,8 +1443,6 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 	if (!policy->cpuinfo.max_freq)
 		return -ENODEV;
 
-	intel_pstate_clear_update_util_hook(policy->cpu);
-
 	pr_debug("set_policy cpuinfo.max %u policy->max %u\n",
 		 policy->cpuinfo.max_freq, policy->max);
 
diff --git a/drivers/cpufreq/pcc-cpufreq.c b/drivers/cpufreq/pcc-cpufreq.c
index 808a320e9d5d..a7ecb9a84c15 100644
--- a/drivers/cpufreq/pcc-cpufreq.c
+++ b/drivers/cpufreq/pcc-cpufreq.c
@@ -487,7 +487,7 @@ static int __init pcc_cpufreq_probe(void)
 	doorbell.space_id = reg_resource->space_id;
 	doorbell.bit_width = reg_resource->bit_width;
 	doorbell.bit_offset = reg_resource->bit_offset;
-	doorbell.access_width = 64;
+	doorbell.access_width = 4;
 	doorbell.address = reg_resource->address;
 
 	pr_debug("probe: doorbell: space_id is %d, bit_width is %d, "
diff --git a/drivers/crypto/ux500/hash/hash_core.c b/drivers/crypto/ux500/hash/hash_core.c
index 574e87c7f2b8..9acccad26928 100644
--- a/drivers/crypto/ux500/hash/hash_core.c
+++ b/drivers/crypto/ux500/hash/hash_core.c
@@ -781,7 +781,7 @@ static int hash_process_data(struct hash_device_data *device_data,
 						&device_data->state);
 				memmove(req_ctx->state.buffer,
 					device_data->state.buffer,
-					HASH_BLOCK_SIZE / sizeof(u32));
+					HASH_BLOCK_SIZE);
 				if (ret) {
 					dev_err(device_data->dev,
 						"%s: hash_resume_state() failed!\n",
@@ -832,7 +832,7 @@ static int hash_process_data(struct hash_device_data *device_data,
 
 			memmove(device_data->state.buffer,
 				req_ctx->state.buffer,
-				HASH_BLOCK_SIZE / sizeof(u32));
+				HASH_BLOCK_SIZE);
 			if (ret) {
 				dev_err(device_data->dev, "%s: hash_save_state() failed!\n",
 					__func__);
diff --git a/drivers/crypto/vmx/aes_cbc.c b/drivers/crypto/vmx/aes_cbc.c
index 495577b6d31b..94ad5c0adbcb 100644
--- a/drivers/crypto/vmx/aes_cbc.c
+++ b/drivers/crypto/vmx/aes_cbc.c
@@ -182,7 +182,7 @@ struct crypto_alg p8_aes_cbc_alg = {
 	.cra_name = "cbc(aes)",
 	.cra_driver_name = "p8_aes_cbc",
 	.cra_module = THIS_MODULE,
-	.cra_priority = 1000,
+	.cra_priority = 2000,
 	.cra_type = &crypto_blkcipher_type,
 	.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK,
 	.cra_alignmask = 0,
diff --git a/drivers/crypto/vmx/aes_ctr.c b/drivers/crypto/vmx/aes_ctr.c
index 0a3c1b04cf3c..38ed10d761d0 100644
--- a/drivers/crypto/vmx/aes_ctr.c
+++ b/drivers/crypto/vmx/aes_ctr.c
@@ -166,7 +166,7 @@ struct crypto_alg p8_aes_ctr_alg = {
 	.cra_name = "ctr(aes)",
 	.cra_driver_name = "p8_aes_ctr",
 	.cra_module = THIS_MODULE,
-	.cra_priority = 1000,
+	.cra_priority = 2000,
 	.cra_type = &crypto_blkcipher_type,
 	.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK,
 	.cra_alignmask = 0,
diff --git a/drivers/crypto/vmx/ppc-xlate.pl b/drivers/crypto/vmx/ppc-xlate.pl
index 9f4994cabcc7..b18e67d0e065 100644
--- a/drivers/crypto/vmx/ppc-xlate.pl
+++ b/drivers/crypto/vmx/ppc-xlate.pl
@@ -141,7 +141,7 @@ my $vmr = sub {
 
 # Some ABIs specify vrsave, special-purpose register #256, as reserved
 # for system use.
-my $no_vrsave = ($flavour =~ /aix|linux64le/);
+my $no_vrsave = ($flavour =~ /linux-ppc64le/);
 my $mtspr = sub {
     my ($f,$idx,$ra) = @_;
     if ($idx == 256 && $no_vrsave) {
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 1d6c803804d5..e92418facc92 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -268,8 +268,11 @@ int update_devfreq(struct devfreq *devfreq)
 	devfreq_notify_transition(devfreq, &freqs, DEVFREQ_PRECHANGE);
 
 	err = devfreq->profile->target(devfreq->dev.parent, &freq, flags);
-	if (err)
+	if (err) {
+		freqs.new = cur_freq;
+		devfreq_notify_transition(devfreq, &freqs, DEVFREQ_POSTCHANGE);
 		return err;
+	}
 
 	freqs.new = freq;
 	devfreq_notify_transition(devfreq, &freqs, DEVFREQ_POSTCHANGE);
@@ -552,6 +555,7 @@ struct devfreq *devfreq_add_device(struct device *dev,
 	devfreq->profile = profile;
 	strncpy(devfreq->governor_name, governor_name, DEVFREQ_NAME_LEN);
 	devfreq->previous_freq = profile->initial_freq;
+	devfreq->last_status.current_frequency = profile->initial_freq;
 	devfreq->data = data;
 	devfreq->nb.notifier_call = devfreq_notifier_call;
 
@@ -561,23 +565,22 @@ struct devfreq *devfreq_add_device(struct device *dev,
 		mutex_lock(&devfreq->lock);
 	}
 
-	devfreq->trans_table =	devm_kzalloc(dev, sizeof(unsigned int) *
-						devfreq->profile->max_state *
-						devfreq->profile->max_state,
-						GFP_KERNEL);
-	devfreq->time_in_state = devm_kzalloc(dev, sizeof(unsigned long) *
-						devfreq->profile->max_state,
-						GFP_KERNEL);
-	devfreq->last_stat_updated = jiffies;
-
 	dev_set_name(&devfreq->dev, "%s", dev_name(dev));
 	err = device_register(&devfreq->dev);
 	if (err) {
-		put_device(&devfreq->dev);
 		mutex_unlock(&devfreq->lock);
 		goto err_out;
 	}
 
+	devfreq->trans_table =	devm_kzalloc(&devfreq->dev, sizeof(unsigned int) *
+						devfreq->profile->max_state *
+						devfreq->profile->max_state,
+						GFP_KERNEL);
+	devfreq->time_in_state = devm_kzalloc(&devfreq->dev, sizeof(unsigned long) *
+						devfreq->profile->max_state,
+						GFP_KERNEL);
+	devfreq->last_stat_updated = jiffies;
+
 	srcu_init_notifier_head(&devfreq->transition_notifier_list);
 
 	mutex_unlock(&devfreq->lock);
@@ -603,7 +606,6 @@ struct devfreq *devfreq_add_device(struct device *dev,
 err_init:
 	list_del(&devfreq->node);
 	device_unregister(&devfreq->dev);
-	kfree(devfreq);
 err_out:
 	return ERR_PTR(err);
 }
@@ -621,7 +623,6 @@ int devfreq_remove_device(struct devfreq *devfreq)
 		return -EINVAL;
 
 	device_unregister(&devfreq->dev);
-	put_device(&devfreq->dev);
 
 	return 0;
 }
diff --git a/drivers/devfreq/event/exynos-nocp.c b/drivers/devfreq/event/exynos-nocp.c
index 6b6a5f310486..a5841403bde8 100644
--- a/drivers/devfreq/event/exynos-nocp.c
+++ b/drivers/devfreq/event/exynos-nocp.c
@@ -220,9 +220,6 @@ static int exynos_nocp_parse_dt(struct platform_device *pdev,
 
 	/* Maps the memory mapped IO to control nocp register */
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (IS_ERR(res))
-		return PTR_ERR(res);
-
 	base = devm_ioremap_resource(dev, res);
 	if (IS_ERR(base))
 		return PTR_ERR(base);
diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c
index 8e304b1befc5..75bd6621dc5d 100644
--- a/drivers/dma/at_xdmac.c
+++ b/drivers/dma/at_xdmac.c
@@ -242,7 +242,7 @@ struct at_xdmac_lld {
 	u32		mbr_dus;	/* Destination Microblock Stride Register */
 };
 
-
+/* 64-bit alignment needed to update CNDA and CUBC registers in an atomic way. */
 struct at_xdmac_desc {
 	struct at_xdmac_lld		lld;
 	enum dma_transfer_direction	direction;
@@ -253,7 +253,7 @@ struct at_xdmac_desc {
 	unsigned int			xfer_size;
 	struct list_head		descs_list;
 	struct list_head		xfer_node;
-};
+} __aligned(sizeof(u64));
 
 static inline void __iomem *at_xdmac_chan_reg_base(struct at_xdmac *atxdmac, unsigned int chan_nb)
 {
@@ -1400,6 +1400,7 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 	u32			cur_nda, check_nda, cur_ubc, mask, value;
 	u8			dwidth = 0;
 	unsigned long		flags;
+	bool			initd;
 
 	ret = dma_cookie_status(chan, cookie, txstate);
 	if (ret == DMA_COMPLETE)
@@ -1424,7 +1425,16 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 	residue = desc->xfer_size;
 	/*
 	 * Flush FIFO: only relevant when the transfer is source peripheral
-	 * synchronized.
+	 * synchronized. Flush is needed before reading CUBC because data in
+	 * the FIFO are not reported by CUBC. Reporting a residue of the
+	 * transfer length while we have data in FIFO can cause issue.
+	 * Usecase: atmel USART has a timeout which means I have received
+	 * characters but there is no more character received for a while. On
+	 * timeout, it requests the residue. If the data are in the DMA FIFO,
+	 * we will return a residue of the transfer length. It means no data
+	 * received. If an application is waiting for these data, it will hang
+	 * since we won't have another USART timeout without receiving new
+	 * data.
 	 */
 	mask = AT_XDMAC_CC_TYPE | AT_XDMAC_CC_DSYNC;
 	value = AT_XDMAC_CC_TYPE_PER_TRAN | AT_XDMAC_CC_DSYNC_PER2MEM;
@@ -1435,34 +1445,43 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 	}
 
 	/*
-	 * When processing the residue, we need to read two registers but we
-	 * can't do it in an atomic way. AT_XDMAC_CNDA is used to find where
-	 * we stand in the descriptor list and AT_XDMAC_CUBC is used
-	 * to know how many data are remaining for the current descriptor.
-	 * Since the dma channel is not paused to not loose data, between the
-	 * AT_XDMAC_CNDA and AT_XDMAC_CUBC read, we may have change of
-	 * descriptor.
-	 * For that reason, after reading AT_XDMAC_CUBC, we check if we are
-	 * still using the same descriptor by reading a second time
-	 * AT_XDMAC_CNDA. If AT_XDMAC_CNDA has changed, it means we have to
-	 * read again AT_XDMAC_CUBC.
+	 * The easiest way to compute the residue should be to pause the DMA
+	 * but doing this can lead to miss some data as some devices don't
+	 * have FIFO.
+	 * We need to read several registers because:
+	 * - DMA is running therefore a descriptor change is possible while
+	 * reading these registers
+	 * - When the block transfer is done, the value of the CUBC register
+	 * is set to its initial value until the fetch of the next descriptor.
+	 * This value will corrupt the residue calculation so we have to skip
+	 * it.
+	 *
+	 * INITD --------                    ------------
+	 *              |____________________|
+	 *       _______________________  _______________
+	 * NDA       @desc2             \/   @desc3
+	 *       _______________________/\_______________
+	 *       __________  ___________  _______________
+	 * CUBC       0    \/ MAX desc1 \/  MAX desc2
+	 *       __________/\___________/\_______________
+	 *
+	 * Since descriptors are aligned on 64 bits, we can assume that
+	 * the update of NDA and CUBC is atomic.
 	 * Memory barriers are used to ensure the read order of the registers.
-	 * A max number of retries is set because unlikely it can never ends if
-	 * we are transferring a lot of data with small buffers.
+	 * A max number of retries is set because unlikely it could never ends.
 	 */
-	cur_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc;
-	rmb();
-	cur_ubc = at_xdmac_chan_read(atchan, AT_XDMAC_CUBC);
 	for (retry = 0; retry < AT_XDMAC_RESIDUE_MAX_RETRIES; retry++) {
-		rmb();
 		check_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc;
-
-		if (likely(cur_nda == check_nda))
-			break;
-
-		cur_nda = check_nda;
+		rmb();
+		initd = !!(at_xdmac_chan_read(atchan, AT_XDMAC_CC) & AT_XDMAC_CC_INITD);
 		rmb();
 		cur_ubc = at_xdmac_chan_read(atchan, AT_XDMAC_CUBC);
+		rmb();
+		cur_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc;
+		rmb();
+
+		if ((check_nda == cur_nda) && initd)
+			break;
 	}
 
 	if (unlikely(retry >= AT_XDMAC_RESIDUE_MAX_RETRIES)) {
@@ -1471,6 +1490,19 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 	}
 
 	/*
+	 * Flush FIFO: only relevant when the transfer is source peripheral
+	 * synchronized. Another flush is needed here because CUBC is updated
+	 * when the controller sends the data write command. It can lead to
+	 * report data that are not written in the memory or the device. The
+	 * FIFO flush ensures that data are really written.
+	 */
+	if ((desc->lld.mbr_cfg & mask) == value) {
+		at_xdmac_write(atxdmac, AT_XDMAC_GSWF, atchan->mask);
+		while (!(at_xdmac_chan_read(atchan, AT_XDMAC_CIS) & AT_XDMAC_CIS_FIS))
+			cpu_relax();
+	}
+
+	/*
 	 * Remove size of all microblocks already transferred and the current
 	 * one. Then add the remaining size to transfer of the current
 	 * microblock.
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index 25d1dadcddd1..d0446a75990a 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -703,8 +703,9 @@ static int mv_chan_memcpy_self_test(struct mv_xor_chan *mv_chan)
 		goto free_resources;
 	}
 
-	src_dma = dma_map_page(dma_chan->device->dev, virt_to_page(src), 0,
-				 PAGE_SIZE, DMA_TO_DEVICE);
+	src_dma = dma_map_page(dma_chan->device->dev, virt_to_page(src),
+			       (size_t)src & ~PAGE_MASK, PAGE_SIZE,
+			       DMA_TO_DEVICE);
 	unmap->addr[0] = src_dma;
 
 	ret = dma_mapping_error(dma_chan->device->dev, src_dma);
@@ -714,8 +715,9 @@ static int mv_chan_memcpy_self_test(struct mv_xor_chan *mv_chan)
 	}
 	unmap->to_cnt = 1;
 
-	dest_dma = dma_map_page(dma_chan->device->dev, virt_to_page(dest), 0,
-				  PAGE_SIZE, DMA_FROM_DEVICE);
+	dest_dma = dma_map_page(dma_chan->device->dev, virt_to_page(dest),
+				(size_t)dest & ~PAGE_MASK, PAGE_SIZE,
+				DMA_FROM_DEVICE);
 	unmap->addr[1] = dest_dma;
 
 	ret = dma_mapping_error(dma_chan->device->dev, dest_dma);
diff --git a/drivers/extcon/extcon-palmas.c b/drivers/extcon/extcon-palmas.c
index 8b3226dca1d9..caff46c0e214 100644
--- a/drivers/extcon/extcon-palmas.c
+++ b/drivers/extcon/extcon-palmas.c
@@ -360,6 +360,8 @@ static int palmas_usb_probe(struct platform_device *pdev)
 
 	palmas_enable_irq(palmas_usb);
 	/* perform initial detection */
+	if (palmas_usb->enable_gpio_vbus_detection)
+		palmas_vbus_irq_handler(palmas_usb->gpio_vbus_irq, palmas_usb);
 	palmas_gpio_id_detect(&palmas_usb->wq_detectid.work);
 	device_set_wakeup_capable(&pdev->dev, true);
 	return 0;
diff --git a/drivers/firmware/efi/arm-init.c b/drivers/firmware/efi/arm-init.c
index a850cbc48d8d..c49d50e68aee 100644
--- a/drivers/firmware/efi/arm-init.c
+++ b/drivers/firmware/efi/arm-init.c
@@ -174,6 +174,7 @@ static __init void reserve_regions(void)
 {
 	efi_memory_desc_t *md;
 	u64 paddr, npages, size;
+	int resv;
 
 	if (efi_enabled(EFI_DBG))
 		pr_info("Processing EFI memory map:\n");
@@ -190,12 +191,14 @@ static __init void reserve_regions(void)
 		paddr = md->phys_addr;
 		npages = md->num_pages;
 
+		resv = is_reserve_region(md);
 		if (efi_enabled(EFI_DBG)) {
 			char buf[64];
 
-			pr_info("  0x%012llx-0x%012llx %s",
+			pr_info("  0x%012llx-0x%012llx %s%s\n",
 				paddr, paddr + (npages << EFI_PAGE_SHIFT) - 1,
-				efi_md_typeattr_format(buf, sizeof(buf), md));
+				efi_md_typeattr_format(buf, sizeof(buf), md),
+				resv ? "*" : "");
 		}
 
 		memrange_efi_to_native(&paddr, &npages);
@@ -204,14 +207,9 @@ static __init void reserve_regions(void)
 		if (is_normal_ram(md))
 			early_init_dt_add_memory_arch(paddr, size);
 
-		if (is_reserve_region(md)) {
+		if (resv)
 			memblock_mark_nomap(paddr, size);
-			if (efi_enabled(EFI_DBG))
-				pr_cont("*");
-		}
 
-		if (efi_enabled(EFI_DBG))
-			pr_cont("\n");
 	}
 
 	set_bit(EFI_MEMMAP, &efi.flags);
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 48da857f4774..cebcb405812e 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -33,6 +33,7 @@ config ARCH_REQUIRE_GPIOLIB
 
 menuconfig GPIOLIB
 	bool "GPIO Support"
+	select ANON_INODES
 	help
 	  This enables GPIO support through the generic GPIO library.
 	  You only need to enable this, if you also want to enable
@@ -530,7 +531,7 @@ menu "Port-mapped I/O GPIO drivers"
 
 config GPIO_104_DIO_48E
 	tristate "ACCES 104-DIO-48E GPIO support"
-	depends on ISA
+	depends on ISA_BUS_API
 	select GPIOLIB_IRQCHIP
 	help
 	  Enables GPIO support for the ACCES 104-DIO-48E series (104-DIO-48E,
@@ -540,7 +541,7 @@ config GPIO_104_DIO_48E
 
 config GPIO_104_IDIO_16
 	tristate "ACCES 104-IDIO-16 GPIO support"
-	depends on ISA
+	depends on ISA_BUS_API
 	select GPIOLIB_IRQCHIP
 	help
 	  Enables GPIO support for the ACCES 104-IDIO-16 family (104-IDIO-16,
@@ -551,7 +552,7 @@ config GPIO_104_IDIO_16
 
 config GPIO_104_IDI_48
 	tristate "ACCES 104-IDI-48 GPIO support"
-	depends on ISA
+	depends on ISA_BUS_API
 	select GPIOLIB_IRQCHIP
 	help
 	  Enables GPIO support for the ACCES 104-IDI-48 family (104-IDI-48A,
@@ -627,7 +628,7 @@ config GPIO_TS5500
 
 config GPIO_WS16C48
 	tristate "WinSystems WS16C48 GPIO support"
-	depends on ISA
+	depends on ISA_BUS_API
 	select GPIOLIB_IRQCHIP
 	help
 	  Enables GPIO support for the WinSystems WS16C48. The base port
diff --git a/drivers/gpio/gpio-104-dio-48e.c b/drivers/gpio/gpio-104-dio-48e.c
index 1a647c07be67..fcf776971ca9 100644
--- a/drivers/gpio/gpio-104-dio-48e.c
+++ b/drivers/gpio/gpio-104-dio-48e.c
@@ -75,7 +75,7 @@ static int dio48e_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
 {
 	struct dio48e_gpio *const dio48egpio = gpiochip_get_data(chip);
 	const unsigned io_port = offset / 8;
-	const unsigned control_port = io_port / 2;
+	const unsigned int control_port = io_port / 3;
 	const unsigned control_addr = dio48egpio->base + 3 + control_port*4;
 	unsigned long flags;
 	unsigned control;
@@ -115,7 +115,7 @@ static int dio48e_gpio_direction_output(struct gpio_chip *chip, unsigned offset,
 {
 	struct dio48e_gpio *const dio48egpio = gpiochip_get_data(chip);
 	const unsigned io_port = offset / 8;
-	const unsigned control_port = io_port / 2;
+	const unsigned int control_port = io_port / 3;
 	const unsigned mask = BIT(offset % 8);
 	const unsigned control_addr = dio48egpio->base + 3 + control_port*4;
 	const unsigned out_port = (io_port > 2) ? io_port + 1 : io_port;
diff --git a/drivers/gpio/gpio-104-idi-48.c b/drivers/gpio/gpio-104-idi-48.c
index 6c75c83baf5a..2d2763ea1a68 100644
--- a/drivers/gpio/gpio-104-idi-48.c
+++ b/drivers/gpio/gpio-104-idi-48.c
@@ -247,6 +247,7 @@ static int idi_48_probe(struct device *dev, unsigned int id)
 	idi48gpio->irq = irq[id];
 
 	spin_lock_init(&idi48gpio->lock);
+	spin_lock_init(&idi48gpio->ack_lock);
 
 	dev_set_drvdata(dev, idi48gpio);
 
diff --git a/drivers/gpio/gpio-bcm-kona.c b/drivers/gpio/gpio-bcm-kona.c
index 9aabc48ff5de..953e4b829e32 100644
--- a/drivers/gpio/gpio-bcm-kona.c
+++ b/drivers/gpio/gpio-bcm-kona.c
@@ -547,11 +547,11 @@ static void bcm_kona_gpio_reset(struct bcm_kona_gpio *kona_gpio)
 	/* disable interrupts and clear status */
 	for (i = 0; i < kona_gpio->num_bank; i++) {
 		/* Unlock the entire bank first */
-		bcm_kona_gpio_write_lock_regs(kona_gpio, i, UNLOCK_CODE);
+		bcm_kona_gpio_write_lock_regs(reg_base, i, UNLOCK_CODE);
 		writel(0xffffffff, reg_base + GPIO_INT_MASK(i));
 		writel(0xffffffff, reg_base + GPIO_INT_STATUS(i));
 		/* Now re-lock the bank */
-		bcm_kona_gpio_write_lock_regs(kona_gpio, i, LOCK_CODE);
+		bcm_kona_gpio_write_lock_regs(reg_base, i, LOCK_CODE);
 	}
 }
 
diff --git a/drivers/gpio/gpio-tegra.c b/drivers/gpio/gpio-tegra.c
index ec891a27952f..661b0e34e067 100644
--- a/drivers/gpio/gpio-tegra.c
+++ b/drivers/gpio/gpio-tegra.c
@@ -98,7 +98,6 @@ struct tegra_gpio_info {
 	const struct tegra_gpio_soc_config	*soc;
 	struct gpio_chip			gc;
 	struct irq_chip				ic;
-	struct lock_class_key			lock_class;
 	u32					bank_count;
 };
 
@@ -547,6 +546,12 @@ static const struct dev_pm_ops tegra_gpio_pm_ops = {
 	SET_SYSTEM_SLEEP_PM_OPS(tegra_gpio_suspend, tegra_gpio_resume)
 };
 
+/*
+ * This lock class tells lockdep that GPIO irqs are in a different category
+ * than their parents, so it won't report false recursion.
+ */
+static struct lock_class_key gpio_lock_class;
+
 static int tegra_gpio_probe(struct platform_device *pdev)
 {
 	const struct tegra_gpio_soc_config *config;
@@ -660,7 +665,7 @@ static int tegra_gpio_probe(struct platform_device *pdev)
 
 		bank = &tgi->bank_info[GPIO_BANK(gpio)];
 
-		irq_set_lockdep_class(irq, &tgi->lock_class);
+		irq_set_lockdep_class(irq, &gpio_lock_class);
 		irq_set_chip_data(irq, bank);
 		irq_set_chip_and_handler(irq, &tgi->ic, handle_simple_irq);
 	}
diff --git a/drivers/gpio/gpio-zynq.c b/drivers/gpio/gpio-zynq.c
index 75c6355b018d..e72794e463aa 100644
--- a/drivers/gpio/gpio-zynq.c
+++ b/drivers/gpio/gpio-zynq.c
@@ -709,7 +709,13 @@ static int zynq_gpio_probe(struct platform_device *pdev)
 		dev_err(&pdev->dev, "input clock not found.\n");
 		return PTR_ERR(gpio->clk);
 	}
+	ret = clk_prepare_enable(gpio->clk);
+	if (ret) {
+		dev_err(&pdev->dev, "Unable to enable clock.\n");
+		return ret;
+	}
 
+	pm_runtime_set_active(&pdev->dev);
 	pm_runtime_enable(&pdev->dev);
 	ret = pm_runtime_get_sync(&pdev->dev);
 	if (ret < 0)
@@ -747,6 +753,7 @@ err_pm_put:
 	pm_runtime_put(&pdev->dev);
 err_pm_dis:
 	pm_runtime_disable(&pdev->dev);
+	clk_disable_unprepare(gpio->clk);
 
 	return ret;
 }
diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index d22dcc38179d..4aabddb38b59 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -16,6 +16,7 @@
 #include <linux/errno.h>
 #include <linux/module.h>
 #include <linux/io.h>
+#include <linux/io-mapping.h>
 #include <linux/gpio/consumer.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 24f60d28f0c0..570771ed19e6 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -449,7 +449,6 @@ static void gpiodevice_release(struct device *dev)
 {
 	struct gpio_device *gdev = dev_get_drvdata(dev);
 
-	cdev_del(&gdev->chrdev);
 	list_del(&gdev->list);
 	ida_simple_remove(&gpio_ida, gdev->id);
 	kfree(gdev->label);
@@ -482,7 +481,6 @@ static int gpiochip_setup_dev(struct gpio_device *gdev)
 
 	/* From this point, the .release() function cleans up gpio_device */
 	gdev->dev.release = gpiodevice_release;
-	get_device(&gdev->dev);
 	pr_debug("%s: registered GPIOs %d to %d on device: %s (%s)\n",
 		 __func__, gdev->base, gdev->base + gdev->ngpio - 1,
 		 dev_name(&gdev->dev), gdev->chip->label ? : "generic");
@@ -770,6 +768,8 @@ void gpiochip_remove(struct gpio_chip *chip)
 	 * be removed, else it will be dangling until the last user is
 	 * gone.
 	 */
+	cdev_del(&gdev->chrdev);
+	device_del(&gdev->dev);
 	put_device(&gdev->dev);
 }
 EXPORT_SYMBOL_GPL(gpiochip_remove);
@@ -869,7 +869,7 @@ struct gpio_chip *gpiochip_find(void *data,
 
 	spin_lock_irqsave(&gpio_lock, flags);
 	list_for_each_entry(gdev, &gpio_devices, list)
-		if (match(gdev->chip, data))
+		if (gdev->chip && match(gdev->chip, data))
 			break;
 
 	/* No match? */
@@ -1373,8 +1373,12 @@ done:
 #define VALIDATE_DESC(desc) do { \
 	if (!desc) \
 		return 0; \
+	if (IS_ERR(desc)) {						\
+		pr_warn("%s: invalid GPIO (errorpointer)\n", __func__); \
+		return PTR_ERR(desc); \
+	} \
 	if (!desc->gdev) { \
-		pr_warn("%s: invalid GPIO\n", __func__); \
+		pr_warn("%s: invalid GPIO (no device)\n", __func__); \
 		return -EINVAL; \
 	} \
 	if ( !desc->gdev->chip ) { \
@@ -1386,8 +1390,12 @@ done:
 #define VALIDATE_DESC_VOID(desc) do { \
 	if (!desc) \
 		return; \
+	if (IS_ERR(desc)) {						\
+		pr_warn("%s: invalid GPIO (errorpointer)\n", __func__); \
+		return; \
+	} \
 	if (!desc->gdev) { \
-		pr_warn("%s: invalid GPIO\n", __func__); \
+		pr_warn("%s: invalid GPIO (no device)\n", __func__); \
 		return; \
 	} \
 	if (!desc->gdev->chip) { \
@@ -2056,7 +2064,14 @@ int gpiod_to_irq(const struct gpio_desc *desc)
 	struct gpio_chip *chip;
 	int offset;
 
-	VALIDATE_DESC(desc);
+	/*
+	 * Cannot VALIDATE_DESC() here as gpiod_to_irq() consumer semantics
+	 * requires this function to not return zero on an invalid descriptor
+	 * but rather a negative error number.
+	 */
+	if (!desc || IS_ERR(desc) || !desc->gdev || !desc->gdev->chip)
+		return -EINVAL;
+
 	chip = desc->gdev->chip;
 	offset = gpio_chip_hwgpio(desc);
 	if (chip->to_irq) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 01c36b8d6222..e055d5be1c3c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -799,7 +799,6 @@ struct amdgpu_ring {
 	unsigned		cond_exe_offs;
 	u64				cond_exe_gpu_addr;
 	volatile u32	*cond_exe_cpu_addr;
-	int                     vmid;
 };
 
 /*
@@ -937,8 +936,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring,
 		    unsigned vm_id, uint64_t pd_addr,
 		    uint32_t gds_base, uint32_t gds_size,
 		    uint32_t gws_base, uint32_t gws_size,
-		    uint32_t oa_base, uint32_t oa_size,
-		    bool vmid_switch);
+		    uint32_t oa_base, uint32_t oa_size);
 void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id);
 uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr);
 int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
@@ -1822,6 +1820,8 @@ struct amdgpu_asic_funcs {
 	/* MM block clocks */
 	int (*set_uvd_clocks)(struct amdgpu_device *adev, u32 vclk, u32 dclk);
 	int (*set_vce_clocks)(struct amdgpu_device *adev, u32 evclk, u32 ecclk);
+	/* query virtual capabilities */
+	u32 (*get_virtual_caps)(struct amdgpu_device *adev);
 };
 
 /*
@@ -1916,8 +1916,12 @@ void amdgpu_cgs_destroy_device(struct cgs_device *cgs_device);
 
 
 /* GPU virtualization */
+#define AMDGPU_VIRT_CAPS_SRIOV_EN       (1 << 0)
+#define AMDGPU_VIRT_CAPS_IS_VF          (1 << 1)
 struct amdgpu_virtualization {
 	bool supports_sr_iov;
+	bool is_virtual;
+	u32 caps;
 };
 
 /*
@@ -2206,6 +2210,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_asic_get_xclk(adev) (adev)->asic_funcs->get_xclk((adev))
 #define amdgpu_asic_set_uvd_clocks(adev, v, d) (adev)->asic_funcs->set_uvd_clocks((adev), (v), (d))
 #define amdgpu_asic_set_vce_clocks(adev, ev, ec) (adev)->asic_funcs->set_vce_clocks((adev), (ev), (ec))
+#define amdgpu_asic_get_virtual_caps(adev) ((adev)->asic_funcs->get_virtual_caps((adev)))
 #define amdgpu_asic_get_gpu_clock_counter(adev) (adev)->asic_funcs->get_gpu_clock_counter((adev))
 #define amdgpu_asic_read_disabled_bios(adev) (adev)->asic_funcs->read_disabled_bios((adev))
 #define amdgpu_asic_read_bios_from_rom(adev, b, l) (adev)->asic_funcs->read_bios_from_rom((adev), (b), (l))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index 8943099eb135..cf6f49fc1c75 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -909,7 +909,7 @@ static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
 	struct cgs_acpi_method_argument *argument = NULL;
 	uint32_t i, count;
 	acpi_status status;
-	int result;
+	int result = 0;
 	uint32_t func_no = 0xFFFFFFFF;
 
 	handle = ACPI_HANDLE(&adev->pdev->dev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 964f31404f17..6e920086af46 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1385,6 +1385,15 @@ static int amdgpu_resume(struct amdgpu_device *adev)
 	return 0;
 }
 
+static bool amdgpu_device_is_virtual(void)
+{
+#ifdef CONFIG_X86
+	return boot_cpu_has(X86_FEATURE_HYPERVISOR);
+#else
+	return false;
+#endif
+}
+
 /**
  * amdgpu_device_init - initialize the driver
  *
@@ -1519,8 +1528,14 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	adev->virtualization.supports_sr_iov =
 		amdgpu_atombios_has_gpu_virtualization_table(adev);
 
+	/* Check if we are executing in a virtualized environment */
+	adev->virtualization.is_virtual = amdgpu_device_is_virtual();
+	adev->virtualization.caps = amdgpu_asic_get_virtual_caps(adev);
+
 	/* Post card if necessary */
-	if (!amdgpu_card_posted(adev)) {
+	if (!amdgpu_card_posted(adev) ||
+	    (adev->virtualization.is_virtual &&
+	     !(adev->virtualization.caps & AMDGPU_VIRT_CAPS_SRIOV_EN))) {
 		if (!adev->bios) {
 			dev_err(adev->dev, "Card not posted and no BIOS - ignoring\n");
 			return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 7a0b1e50f293..34e35423b78e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -122,7 +122,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	bool skip_preamble, need_ctx_switch;
 	unsigned patch_offset = ~0;
 	struct amdgpu_vm *vm;
-	int vmid = 0, old_vmid = ring->vmid;
 	struct fence *hwf;
 	uint64_t ctx;
 
@@ -136,11 +135,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	if (job) {
 		vm = job->vm;
 		ctx = job->ctx;
-		vmid = job->vm_id;
 	} else {
 		vm = NULL;
 		ctx = 0;
-		vmid = 0;
 	}
 
 	if (!ring->ready) {
@@ -166,8 +163,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		r = amdgpu_vm_flush(ring, job->vm_id, job->vm_pd_addr,
 				    job->gds_base, job->gds_size,
 				    job->gws_base, job->gws_size,
-				    job->oa_base, job->oa_size,
-				    (ring->current_ctx == ctx) && (old_vmid != vmid));
+				    job->oa_base, job->oa_size);
 		if (r) {
 			amdgpu_ring_undo(ring);
 			return r;
@@ -184,6 +180,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	need_ctx_switch = ring->current_ctx != ctx;
 	for (i = 0; i < num_ibs; ++i) {
 		ib = &ibs[i];
+
 		/* drop preamble IBs if we don't have a context switch */
 		if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble)
 			continue;
@@ -191,7 +188,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0,
 				    need_ctx_switch);
 		need_ctx_switch = false;
-		ring->vmid = vmid;
 	}
 
 	if (ring->funcs->emit_hdp_invalidate)
@@ -202,7 +198,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		dev_err(adev->dev, "failed to emit fence (%d)\n", r);
 		if (job && job->vm_id)
 			amdgpu_vm_reset_id(adev, job->vm_id);
-		ring->vmid = old_vmid;
 		amdgpu_ring_undo(ring);
 		return r;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 40a23704a981..d851ea15059f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -447,7 +447,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 			dev_info.max_memory_clock = adev->pm.default_mclk * 10;
 		}
 		dev_info.enabled_rb_pipes_mask = adev->gfx.config.backend_enable_mask;
-		dev_info.num_rb_pipes = adev->gfx.config.num_rbs;
+		dev_info.num_rb_pipes = adev->gfx.config.max_backends_per_se *
+			adev->gfx.config.max_shader_engines;
 		dev_info.num_hw_gfx_contexts = adev->gfx.config.max_hw_contexts;
 		dev_info._pad = 0;
 		dev_info.ids_flags = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 589b36e8c5cf..0e13d80d2a95 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -270,30 +270,28 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
 	enum amd_pm_state_type state = 0;
-	long idx;
+	unsigned long idx;
 	int ret;
 
 	if (strlen(buf) == 1)
 		adev->pp_force_state_enabled = false;
-	else {
-		ret = kstrtol(buf, 0, &idx);
+	else if (adev->pp_enabled) {
+		struct pp_states_info data;
 
-		if (ret) {
+		ret = kstrtoul(buf, 0, &idx);
+		if (ret || idx >= ARRAY_SIZE(data.states)) {
 			count = -EINVAL;
 			goto fail;
 		}
 
-		if (adev->pp_enabled) {
-			struct pp_states_info data;
-			amdgpu_dpm_get_pp_num_states(adev, &data);
-			state = data.states[idx];
-			/* only set user selected power states */
-			if (state != POWER_STATE_TYPE_INTERNAL_BOOT &&
-				state != POWER_STATE_TYPE_DEFAULT) {
-				amdgpu_dpm_dispatch_task(adev,
-						AMD_PP_EVENT_ENABLE_USER_STATE, &state, NULL);
-				adev->pp_force_state_enabled = true;
-			}
+		amdgpu_dpm_get_pp_num_states(adev, &data);
+		state = data.states[idx];
+		/* only set user selected power states */
+		if (state != POWER_STATE_TYPE_INTERNAL_BOOT &&
+		    state != POWER_STATE_TYPE_DEFAULT) {
+			amdgpu_dpm_dispatch_task(adev,
+					AMD_PP_EVENT_ENABLE_USER_STATE, &state, NULL);
+			adev->pp_force_state_enabled = true;
 		}
 	}
 fail:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index e19520c4b4b6..d9c88d13f8db 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -1106,6 +1106,10 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
 	if (fences == 0 && handles == 0) {
 		if (adev->pm.dpm_enabled) {
 			amdgpu_dpm_enable_uvd(adev, false);
+			/* just work around for uvd clock remain high even
+			 * when uvd dpm disabled on Polaris10 */
+			if (adev->asic_type == CHIP_POLARIS10)
+				amdgpu_asic_set_uvd_clocks(adev, 0, 0);
 		} else {
 			amdgpu_asic_set_uvd_clocks(adev, 0, 0);
 		}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 62a4c127620f..9f36ed30ba11 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -298,8 +298,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring,
 		    unsigned vm_id, uint64_t pd_addr,
 		    uint32_t gds_base, uint32_t gds_size,
 		    uint32_t gws_base, uint32_t gws_size,
-		    uint32_t oa_base, uint32_t oa_size,
-		    bool vmid_switch)
+		    uint32_t oa_base, uint32_t oa_size)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_vm_id *id = &adev->vm_manager.ids[vm_id];
@@ -313,7 +312,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring,
 	int r;
 
 	if (ring->funcs->emit_pipeline_sync && (
-	    pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed || vmid_switch))
+	    pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed ||
+		    ring->type == AMDGPU_RING_TYPE_COMPUTE))
 		amdgpu_ring_emit_pipeline_sync(ring);
 
 	if (ring->funcs->emit_vm_flush &&
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index 07bc795a4ca9..910431808542 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -962,6 +962,12 @@ static bool cik_read_bios_from_rom(struct amdgpu_device *adev,
 	return true;
 }
 
+static u32 cik_get_virtual_caps(struct amdgpu_device *adev)
+{
+	/* CIK does not support SR-IOV */
+	return 0;
+}
+
 static const struct amdgpu_allowed_register_entry cik_allowed_read_registers[] = {
 	{mmGRBM_STATUS, false},
 	{mmGB_ADDR_CONFIG, false},
@@ -2007,6 +2013,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
 	.get_xclk = &cik_get_xclk,
 	.set_uvd_clocks = &cik_set_uvd_clocks,
 	.set_vce_clocks = &cik_set_vce_clocks,
+	.get_virtual_caps = &cik_get_virtual_caps,
 	/* these should be moved to their own ip modules */
 	.get_gpu_clock_counter = &gfx_v7_0_get_gpu_clock_counter,
 	.wait_for_mc_idle = &gmc_v7_0_mc_wait_for_idle,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 8c6ad1e72f02..fc8ff4d3ccf8 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -4833,7 +4833,7 @@ static int gfx_v7_0_eop_irq(struct amdgpu_device *adev,
 	case 2:
 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 			ring = &adev->gfx.compute_ring[i];
-			if ((ring->me == me_id) & (ring->pipe == pipe_id))
+			if ((ring->me == me_id) && (ring->pipe == pipe_id))
 				amdgpu_fence_process(ring);
 		}
 		break;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 9f6f8669edc3..b2ebd4fef6cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -47,6 +47,8 @@
 #include "dce/dce_10_0_d.h"
 #include "dce/dce_10_0_sh_mask.h"
 
+#include "smu/smu_7_1_3_d.h"
+
 #define GFX8_NUM_GFX_RINGS     1
 #define GFX8_NUM_COMPUTE_RINGS 8
 
@@ -297,7 +299,8 @@ static const u32 polaris11_golden_common_all[] =
 static const u32 golden_settings_polaris10_a11[] =
 {
 	mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
-	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00006208,
+	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
+	mmCB_HW_CONTROL_2, 0, 0x0f000000,
 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
@@ -692,6 +695,7 @@ static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
 		amdgpu_program_register_sequence(adev,
 						 polaris10_golden_common_all,
 						 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
+		WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
 		break;
 	case CHIP_CARRIZO:
 		amdgpu_program_register_sequence(adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 2c88d0b66cf3..a65c96029476 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -421,6 +421,20 @@ static bool vi_read_bios_from_rom(struct amdgpu_device *adev,
 	return true;
 }
 
+static u32 vi_get_virtual_caps(struct amdgpu_device *adev)
+{
+	u32 caps = 0;
+	u32 reg = RREG32(mmBIF_IOV_FUNC_IDENTIFIER);
+
+	if (REG_GET_FIELD(reg, BIF_IOV_FUNC_IDENTIFIER, IOV_ENABLE))
+		caps |= AMDGPU_VIRT_CAPS_SRIOV_EN;
+
+	if (REG_GET_FIELD(reg, BIF_IOV_FUNC_IDENTIFIER, FUNC_IDENTIFIER))
+		caps |= AMDGPU_VIRT_CAPS_IS_VF;
+
+	return caps;
+}
+
 static const struct amdgpu_allowed_register_entry tonga_allowed_read_registers[] = {
 	{mmGB_MACROTILE_MODE7, true},
 };
@@ -1118,6 +1132,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs =
 	.get_xclk = &vi_get_xclk,
 	.set_uvd_clocks = &vi_set_uvd_clocks,
 	.set_vce_clocks = &vi_set_vce_clocks,
+	.get_virtual_caps = &vi_get_virtual_caps,
 	/* these should be moved to their own ip modules */
 	.get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
 	.wait_for_mc_idle = &gmc_v8_0_mc_wait_for_idle,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index ac005796b71c..7708d90b9da9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -242,13 +242,19 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
 	pqm_uninit(&p->pqm);
 
 	/* Iterate over all process device data structure and check
-	 * if we should reset all wavefronts */
-	list_for_each_entry(pdd, &p->per_device_data, per_device_list)
+	 * if we should delete debug managers and reset all wavefronts
+	 */
+	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
+		if ((pdd->dev->dbgmgr) &&
+				(pdd->dev->dbgmgr->pasid == p->pasid))
+			kfd_dbgmgr_destroy(pdd->dev->dbgmgr);
+
 		if (pdd->reset_wavefronts) {
 			pr_warn("amdkfd: Resetting all wave fronts\n");
 			dbgdev_wave_reset_wavefronts(pdd->dev, p);
 			pdd->reset_wavefronts = false;
 		}
+	}
 
 	mutex_unlock(&p->mutex);
 
@@ -404,42 +410,52 @@ void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid)
 
 	idx = srcu_read_lock(&kfd_processes_srcu);
 
+	/*
+	 * Look for the process that matches the pasid. If there is no such
+	 * process, we either released it in amdkfd's own notifier, or there
+	 * is a bug. Unfortunately, there is no way to tell...
+	 */
 	hash_for_each_rcu(kfd_processes_table, i, p, kfd_processes)
-		if (p->pasid == pasid)
-			break;
+		if (p->pasid == pasid) {
 
-	srcu_read_unlock(&kfd_processes_srcu, idx);
+			srcu_read_unlock(&kfd_processes_srcu, idx);
 
-	BUG_ON(p->pasid != pasid);
+			pr_debug("Unbinding process %d from IOMMU\n", pasid);
 
-	mutex_lock(&p->mutex);
+			mutex_lock(&p->mutex);
 
-	if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid))
-		kfd_dbgmgr_destroy(dev->dbgmgr);
+			if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid))
+				kfd_dbgmgr_destroy(dev->dbgmgr);
 
-	pqm_uninit(&p->pqm);
+			pqm_uninit(&p->pqm);
 
-	pdd = kfd_get_process_device_data(dev, p);
+			pdd = kfd_get_process_device_data(dev, p);
 
-	if (!pdd) {
-		mutex_unlock(&p->mutex);
-		return;
-	}
+			if (!pdd) {
+				mutex_unlock(&p->mutex);
+				return;
+			}
 
-	if (pdd->reset_wavefronts) {
-		dbgdev_wave_reset_wavefronts(pdd->dev, p);
-		pdd->reset_wavefronts = false;
-	}
+			if (pdd->reset_wavefronts) {
+				dbgdev_wave_reset_wavefronts(pdd->dev, p);
+				pdd->reset_wavefronts = false;
+			}
 
-	/*
-	 * Just mark pdd as unbound, because we still need it to call
-	 * amd_iommu_unbind_pasid() in when the process exits.
-	 * We don't call amd_iommu_unbind_pasid() here
-	 * because the IOMMU called us.
-	 */
-	pdd->bound = false;
+			/*
+			 * Just mark pdd as unbound, because we still need it
+			 * to call amd_iommu_unbind_pasid() in when the
+			 * process exits.
+			 * We don't call amd_iommu_unbind_pasid() here
+			 * because the IOMMU called us.
+			 */
+			pdd->bound = false;
 
-	mutex_unlock(&p->mutex);
+			mutex_unlock(&p->mutex);
+
+			return;
+		}
+
+	srcu_read_unlock(&kfd_processes_srcu, idx);
 }
 
 struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 74909e72a009..884c96f50c3d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -666,7 +666,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
 			dev->node_props.simd_count);
 
 	if (dev->mem_bank_count < dev->node_props.mem_banks_count) {
-		pr_warn("kfd: mem_banks_count truncated from %d to %d\n",
+		pr_info_once("kfd: mem_banks_count truncated from %d to %d\n",
 				dev->node_props.mem_banks_count,
 				dev->mem_bank_count);
 		sysfs_show_32bit_prop(buffer, "mem_banks_count",
diff --git a/drivers/gpu/drm/amd/include/atombios.h b/drivers/gpu/drm/amd/include/atombios.h
index 32f3e345de08..3493da5c8f0e 100644
--- a/drivers/gpu/drm/amd/include/atombios.h
+++ b/drivers/gpu/drm/amd/include/atombios.h
@@ -5538,6 +5538,78 @@ typedef struct  _ATOM_ASIC_PROFILING_INFO_V3_5
   ULONG  ulReserved[12];
 }ATOM_ASIC_PROFILING_INFO_V3_5;
 
+/* for Polars10/11 AVFS parameters */
+typedef struct  _ATOM_ASIC_PROFILING_INFO_V3_6
+{
+  ATOM_COMMON_TABLE_HEADER         asHeader;
+  ULONG  ulMaxVddc;
+  ULONG  ulMinVddc;
+  USHORT usLkgEuseIndex;
+  UCHAR  ucLkgEfuseBitLSB;
+  UCHAR  ucLkgEfuseLength;
+  ULONG  ulLkgEncodeLn_MaxDivMin;
+  ULONG  ulLkgEncodeMax;
+  ULONG  ulLkgEncodeMin;
+  EFUSE_LINEAR_FUNC_PARAM sRoFuse;
+  ULONG  ulEvvDefaultVddc;
+  ULONG  ulEvvNoCalcVddc;
+  ULONG  ulSpeed_Model;
+  ULONG  ulSM_A0;
+  ULONG  ulSM_A1;
+  ULONG  ulSM_A2;
+  ULONG  ulSM_A3;
+  ULONG  ulSM_A4;
+  ULONG  ulSM_A5;
+  ULONG  ulSM_A6;
+  ULONG  ulSM_A7;
+  UCHAR  ucSM_A0_sign;
+  UCHAR  ucSM_A1_sign;
+  UCHAR  ucSM_A2_sign;
+  UCHAR  ucSM_A3_sign;
+  UCHAR  ucSM_A4_sign;
+  UCHAR  ucSM_A5_sign;
+  UCHAR  ucSM_A6_sign;
+  UCHAR  ucSM_A7_sign;
+  ULONG  ulMargin_RO_a;
+  ULONG  ulMargin_RO_b;
+  ULONG  ulMargin_RO_c;
+  ULONG  ulMargin_fixed;
+  ULONG  ulMargin_Fmax_mean;
+  ULONG  ulMargin_plat_mean;
+  ULONG  ulMargin_Fmax_sigma;
+  ULONG  ulMargin_plat_sigma;
+  ULONG  ulMargin_DC_sigma;
+  ULONG  ulLoadLineSlop;
+  ULONG  ulaTDClimitPerDPM[8];
+  ULONG  ulaNoCalcVddcPerDPM[8];
+  ULONG  ulAVFS_meanNsigma_Acontant0;
+  ULONG  ulAVFS_meanNsigma_Acontant1;
+  ULONG  ulAVFS_meanNsigma_Acontant2;
+  USHORT usAVFS_meanNsigma_DC_tol_sigma;
+  USHORT usAVFS_meanNsigma_Platform_mean;
+  USHORT usAVFS_meanNsigma_Platform_sigma;
+  ULONG  ulGB_VDROOP_TABLE_CKSOFF_a0;
+  ULONG  ulGB_VDROOP_TABLE_CKSOFF_a1;
+  ULONG  ulGB_VDROOP_TABLE_CKSOFF_a2;
+  ULONG  ulGB_VDROOP_TABLE_CKSON_a0;
+  ULONG  ulGB_VDROOP_TABLE_CKSON_a1;
+  ULONG  ulGB_VDROOP_TABLE_CKSON_a2;
+  ULONG  ulAVFSGB_FUSE_TABLE_CKSOFF_m1;
+  USHORT usAVFSGB_FUSE_TABLE_CKSOFF_m2;
+  ULONG  ulAVFSGB_FUSE_TABLE_CKSOFF_b;
+  ULONG  ulAVFSGB_FUSE_TABLE_CKSON_m1;
+  USHORT usAVFSGB_FUSE_TABLE_CKSON_m2;
+  ULONG  ulAVFSGB_FUSE_TABLE_CKSON_b;
+  USHORT usMaxVoltage_0_25mv;
+  UCHAR  ucEnableGB_VDROOP_TABLE_CKSOFF;
+  UCHAR  ucEnableGB_VDROOP_TABLE_CKSON;
+  UCHAR  ucEnableGB_FUSE_TABLE_CKSOFF;
+  UCHAR  ucEnableGB_FUSE_TABLE_CKSON;
+  USHORT usPSM_Age_ComFactor;
+  UCHAR  ucEnableApplyAVFS_CKS_OFF_Voltage;
+  UCHAR  ucReserved;
+}ATOM_ASIC_PROFILING_INFO_V3_6;
+
 
 typedef struct _ATOM_SCLK_FCW_RANGE_ENTRY_V1{
   ULONG  ulMaxSclkFreq;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c
index 586f73276226..92912ab20944 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c
@@ -633,6 +633,8 @@ static int fiji_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 	data->vddci_control = FIJI_VOLTAGE_CONTROL_NONE;
 	data->mvdd_control = FIJI_VOLTAGE_CONTROL_NONE;
 
+	data->force_pcie_gen = PP_PCIEGenInvalid;
+
 	if (atomctrl_is_voltage_controled_by_gpio_v3(hwmgr,
 			VOLTAGE_TYPE_VDDC, VOLTAGE_OBJ_SVID2))
 		data->voltage_control = FIJI_VOLTAGE_CONTROL_BY_SVID2;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
index fa208ada6892..efb77eda7508 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
@@ -306,10 +306,14 @@ int phm_store_dal_configuration_data(struct pp_hwmgr *hwmgr,
 {
 	PHM_FUNC_CHECK(hwmgr);
 
-	if (hwmgr->hwmgr_func->store_cc6_data == NULL)
+	if (display_config == NULL)
 		return -EINVAL;
 
 	hwmgr->display_config = *display_config;
+
+	if (hwmgr->hwmgr_func->store_cc6_data == NULL)
+		return -EINVAL;
+
 	/* to do pass other display configuration in furture */
 
 	if (hwmgr->hwmgr_func->store_cc6_data)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr_ppt.h b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr_ppt.h
index 347fef127ce9..2930a3355948 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr_ppt.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr_ppt.h
@@ -39,6 +39,7 @@ struct phm_ppt_v1_clock_voltage_dependency_record {
 	uint8_t phases;
 	uint8_t cks_enable;
 	uint8_t cks_voffset;
+	uint32_t sclk_offset;
 };
 
 typedef struct phm_ppt_v1_clock_voltage_dependency_record phm_ppt_v1_clock_voltage_dependency_record;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
index aa6be033f21b..ec2a7ada346a 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
@@ -98,6 +98,7 @@
 #define PCIE_BUS_CLK                10000
 #define TCLK                        (PCIE_BUS_CLK / 10)
 
+#define CEILING_UCHAR(double) ((double-(uint8_t)(double)) > 0 ? (uint8_t)(double+1) : (uint8_t)(double))
 
 static const uint16_t polaris10_clock_stretcher_lookup_table[2][4] =
 { {600, 1050, 3, 0}, {600, 1050, 6, 1} };
@@ -999,7 +1000,7 @@ static int polaris10_get_dependency_volt_by_clk(struct pp_hwmgr *hwmgr,
 				vddci = phm_find_closest_vddci(&(data->vddci_voltage_table),
 						(dep_table->entries[i].vddc -
 								(uint16_t)data->vddc_vddci_delta));
-				*voltage |= (vddci * VOLTAGE_SCALE) <<	VDDCI_SHIFT;
+				*voltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT;
 			}
 
 			if (POLARIS10_VOLTAGE_CONTROL_NONE == data->mvdd_control)
@@ -1296,7 +1297,6 @@ static int polaris10_populate_single_memory_level(struct pp_hwmgr *hwmgr,
 	}
 
 	mem_level->MclkFrequency = clock;
-	mem_level->StutterEnable = 0;
 	mem_level->EnabledForThrottle = 1;
 	mem_level->EnabledForActivity = 0;
 	mem_level->UpHyst = 0;
@@ -1304,7 +1304,6 @@ static int polaris10_populate_single_memory_level(struct pp_hwmgr *hwmgr,
 	mem_level->VoltageDownHyst = 0;
 	mem_level->ActivityLevel = (uint16_t)data->mclk_activity_target;
 	mem_level->StutterEnable = false;
-
 	mem_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW;
 
 	data->display_timing.num_existing_displays = info.display_count;
@@ -1363,7 +1362,7 @@ static int polaris10_populate_all_memory_levels(struct pp_hwmgr *hwmgr)
 	 * a higher state by default such that we are not effected by
 	 * up threshold or and MCLK DPM latency.
 	 */
-	levels[0].ActivityLevel = (uint16_t)data->mclk_dpm0_activity_target;
+	levels[0].ActivityLevel = 0x1f;
 	CONVERT_FROM_HOST_TO_SMC_US(levels[0].ActivityLevel);
 
 	data->smc_state_table.MemoryDpmLevelCount =
@@ -1424,22 +1423,19 @@ static int polaris10_populate_smc_acpi_level(struct pp_hwmgr *hwmgr,
 
 	table->ACPILevel.Flags &= ~PPSMC_SWSTATE_FLAG_DC;
 
-	if (!data->sclk_dpm_key_disabled) {
-		/* Get MinVoltage and Frequency from DPM0,
-		 * already converted to SMC_UL */
-		sclk_frequency = data->dpm_table.sclk_table.dpm_levels[0].value;
-		result = polaris10_get_dependency_volt_by_clk(hwmgr,
-				table_info->vdd_dep_on_sclk,
-				table->ACPILevel.SclkFrequency,
-				&table->ACPILevel.MinVoltage, &mvdd);
-		PP_ASSERT_WITH_CODE((0 == result),
-				"Cannot find ACPI VDDC voltage value "
-				"in Clock Dependency Table", );
-	} else {
-		sclk_frequency = data->vbios_boot_state.sclk_bootup_value;
-		table->ACPILevel.MinVoltage =
-				data->vbios_boot_state.vddc_bootup_value * VOLTAGE_SCALE;
-	}
+
+	/* Get MinVoltage and Frequency from DPM0,
+	 * already converted to SMC_UL */
+	sclk_frequency = data->dpm_table.sclk_table.dpm_levels[0].value;
+	result = polaris10_get_dependency_volt_by_clk(hwmgr,
+			table_info->vdd_dep_on_sclk,
+			sclk_frequency,
+			&table->ACPILevel.MinVoltage, &mvdd);
+	PP_ASSERT_WITH_CODE((0 == result),
+			"Cannot find ACPI VDDC voltage value "
+			"in Clock Dependency Table",
+			);
+
 
 	result = polaris10_calculate_sclk_params(hwmgr, sclk_frequency,  &(table->ACPILevel.SclkSetting));
 	PP_ASSERT_WITH_CODE(result == 0, "Error retrieving Engine Clock dividers from VBIOS.", return result);
@@ -1464,24 +1460,18 @@ static int polaris10_populate_smc_acpi_level(struct pp_hwmgr *hwmgr,
 	CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Fcw1_frac);
 	CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Sclk_ss_slew_rate);
 
-	if (!data->mclk_dpm_key_disabled) {
-		/* Get MinVoltage and Frequency from DPM0, already converted to SMC_UL */
-		table->MemoryACPILevel.MclkFrequency =
-				data->dpm_table.mclk_table.dpm_levels[0].value;
-		result = polaris10_get_dependency_volt_by_clk(hwmgr,
-				table_info->vdd_dep_on_mclk,
-				table->MemoryACPILevel.MclkFrequency,
-				&table->MemoryACPILevel.MinVoltage, &mvdd);
-		PP_ASSERT_WITH_CODE((0 == result),
-				"Cannot find ACPI VDDCI voltage value "
-				"in Clock Dependency Table",
-				);
-	} else {
-		table->MemoryACPILevel.MclkFrequency =
-				data->vbios_boot_state.mclk_bootup_value;
-		table->MemoryACPILevel.MinVoltage =
-				data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE;
-	}
+
+	/* Get MinVoltage and Frequency from DPM0, already converted to SMC_UL */
+	table->MemoryACPILevel.MclkFrequency =
+			data->dpm_table.mclk_table.dpm_levels[0].value;
+	result = polaris10_get_dependency_volt_by_clk(hwmgr,
+			table_info->vdd_dep_on_mclk,
+			table->MemoryACPILevel.MclkFrequency,
+			&table->MemoryACPILevel.MinVoltage, &mvdd);
+	PP_ASSERT_WITH_CODE((0 == result),
+			"Cannot find ACPI VDDCI voltage value "
+			"in Clock Dependency Table",
+			);
 
 	us_mvdd = 0;
 	if ((POLARIS10_VOLTAGE_CONTROL_NONE == data->mvdd_control) ||
@@ -1526,6 +1516,7 @@ static int polaris10_populate_smc_vce_level(struct pp_hwmgr *hwmgr,
 	struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table =
 			table_info->mm_dep_table;
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	uint32_t vddci;
 
 	table->VceLevelCount = (uint8_t)(mm_table->count);
 	table->VceBootLevel = 0;
@@ -1535,9 +1526,18 @@ static int polaris10_populate_smc_vce_level(struct pp_hwmgr *hwmgr,
 		table->VceLevel[count].MinVoltage = 0;
 		table->VceLevel[count].MinVoltage |=
 				(mm_table->entries[count].vddc * VOLTAGE_SCALE) << VDDC_SHIFT;
+
+		if (POLARIS10_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control)
+			vddci = (uint32_t)phm_find_closest_vddci(&(data->vddci_voltage_table),
+						mm_table->entries[count].vddc - VDDC_VDDCI_DELTA);
+		else if (POLARIS10_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control)
+			vddci = mm_table->entries[count].vddc - VDDC_VDDCI_DELTA;
+		else
+			vddci = (data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE) << VDDCI_SHIFT;
+
+
 		table->VceLevel[count].MinVoltage |=
-				((mm_table->entries[count].vddc - data->vddc_vddci_delta) *
-						VOLTAGE_SCALE) << VDDCI_SHIFT;
+				(vddci * VOLTAGE_SCALE) << VDDCI_SHIFT;
 		table->VceLevel[count].MinVoltage |= 1 << PHASES_SHIFT;
 
 		/*retrieve divider value for VBIOS */
@@ -1566,6 +1566,7 @@ static int polaris10_populate_smc_samu_level(struct pp_hwmgr *hwmgr,
 	struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table =
 			table_info->mm_dep_table;
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	uint32_t vddci;
 
 	table->SamuBootLevel = 0;
 	table->SamuLevelCount = (uint8_t)(mm_table->count);
@@ -1576,8 +1577,16 @@ static int polaris10_populate_smc_samu_level(struct pp_hwmgr *hwmgr,
 		table->SamuLevel[count].Frequency = mm_table->entries[count].samclock;
 		table->SamuLevel[count].MinVoltage |= (mm_table->entries[count].vddc *
 				VOLTAGE_SCALE) << VDDC_SHIFT;
-		table->SamuLevel[count].MinVoltage |= ((mm_table->entries[count].vddc -
-				data->vddc_vddci_delta) * VOLTAGE_SCALE) << VDDCI_SHIFT;
+
+		if (POLARIS10_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control)
+			vddci = (uint32_t)phm_find_closest_vddci(&(data->vddci_voltage_table),
+						mm_table->entries[count].vddc - VDDC_VDDCI_DELTA);
+		else if (POLARIS10_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control)
+			vddci = mm_table->entries[count].vddc - VDDC_VDDCI_DELTA;
+		else
+			vddci = (data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE) << VDDCI_SHIFT;
+
+		table->SamuLevel[count].MinVoltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT;
 		table->SamuLevel[count].MinVoltage |= 1 << PHASES_SHIFT;
 
 		/* retrieve divider value for VBIOS */
@@ -1660,6 +1669,7 @@ static int polaris10_populate_smc_uvd_level(struct pp_hwmgr *hwmgr,
 	struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table =
 			table_info->mm_dep_table;
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	uint32_t vddci;
 
 	table->UvdLevelCount = (uint8_t)(mm_table->count);
 	table->UvdBootLevel = 0;
@@ -1670,8 +1680,16 @@ static int polaris10_populate_smc_uvd_level(struct pp_hwmgr *hwmgr,
 		table->UvdLevel[count].DclkFrequency = mm_table->entries[count].dclk;
 		table->UvdLevel[count].MinVoltage |= (mm_table->entries[count].vddc *
 				VOLTAGE_SCALE) << VDDC_SHIFT;
-		table->UvdLevel[count].MinVoltage |= ((mm_table->entries[count].vddc -
-				data->vddc_vddci_delta) * VOLTAGE_SCALE) << VDDCI_SHIFT;
+
+		if (POLARIS10_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control)
+			vddci = (uint32_t)phm_find_closest_vddci(&(data->vddci_voltage_table),
+						mm_table->entries[count].vddc - VDDC_VDDCI_DELTA);
+		else if (POLARIS10_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control)
+			vddci = mm_table->entries[count].vddc - VDDC_VDDCI_DELTA;
+		else
+			vddci = (data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE) << VDDCI_SHIFT;
+
+		table->UvdLevel[count].MinVoltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT;
 		table->UvdLevel[count].MinVoltage |= 1 << PHASES_SHIFT;
 
 		/* retrieve divider value for VBIOS */
@@ -1692,8 +1710,8 @@ static int polaris10_populate_smc_uvd_level(struct pp_hwmgr *hwmgr,
 		CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].VclkFrequency);
 		CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].DclkFrequency);
 		CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].MinVoltage);
-
 	}
+
 	return result;
 }
 
@@ -1761,12 +1779,9 @@ static int polaris10_populate_smc_initailial_state(struct pp_hwmgr *hwmgr)
 
 static int polaris10_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr)
 {
-	uint32_t ro, efuse, efuse2, clock_freq, volt_without_cks,
-			volt_with_cks, value;
-	uint16_t clock_freq_u16;
+	uint32_t ro, efuse, volt_without_cks, volt_with_cks, value, max, min;
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
-	uint8_t type, i, j, cks_setting, stretch_amount, stretch_amount2,
-			volt_offset = 0;
+	uint8_t i, stretch_amount, stretch_amount2, volt_offset = 0;
 	struct phm_ppt_v1_information *table_info =
 			(struct phm_ppt_v1_information *)(hwmgr->pptable);
 	struct phm_ppt_v1_clock_voltage_dependency_table *sclk_table =
@@ -1778,50 +1793,46 @@ static int polaris10_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr)
 	 * if the part is SS or FF. if RO >= 1660MHz, part is FF.
 	 */
 	efuse = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC,
-			ixSMU_EFUSE_0 + (146 * 4));
-	efuse2 = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC,
-			ixSMU_EFUSE_0 + (148 * 4));
+			ixSMU_EFUSE_0 + (67 * 4));
 	efuse &= 0xFF000000;
 	efuse = efuse >> 24;
-	efuse2 &= 0xF;
-
-	if (efuse2 == 1)
-		ro = (2300 - 1350) * efuse / 255 + 1350;
-	else
-		ro = (2500 - 1000) * efuse / 255 + 1000;
 
-	if (ro >= 1660)
-		type = 0;
-	else
-		type = 1;
+	if (hwmgr->chip_id == CHIP_POLARIS10) {
+		min = 1000;
+		max = 2300;
+	} else {
+		min = 1100;
+		max = 2100;
+	}
 
-	/* Populate Stretch amount */
-	data->smc_state_table.ClockStretcherAmount = stretch_amount;
+	ro = efuse * (max -min)/255 + min;
 
-	/* Populate Sclk_CKS_masterEn0_7 and Sclk_voltageOffset */
+	/* Populate Sclk_CKS_masterEn0_7 and Sclk_voltageOffset
+	 * there is a little difference in calculating
+	 * volt_with_cks with windows */
 	for (i = 0; i < sclk_table->count; i++) {
 		data->smc_state_table.Sclk_CKS_masterEn0_7 |=
 				sclk_table->entries[i].cks_enable << i;
-		volt_without_cks = (uint32_t)((14041 *
-			(sclk_table->entries[i].clk/100) / 10000 + 3571 + 75 - ro) * 1000 /
-			(4026 - (13924 * (sclk_table->entries[i].clk/100) / 10000)));
-		volt_with_cks = (uint32_t)((13946 *
-			(sclk_table->entries[i].clk/100) / 10000 + 3320 + 45 - ro) * 1000 /
-			(3664 - (11454 * (sclk_table->entries[i].clk/100) / 10000)));
+		if (hwmgr->chip_id == CHIP_POLARIS10) {
+			volt_without_cks = (uint32_t)((2753594000 + (sclk_table->entries[i].clk/100) * 136418 -(ro - 70) * 1000000) / \
+						(2424180 - (sclk_table->entries[i].clk/100) * 1132925/1000));
+			volt_with_cks = (uint32_t)((279720200 + sclk_table->entries[i].clk * 3232 - (ro - 65) * 100000000) / \
+					(252248000 - sclk_table->entries[i].clk/100 * 115764));
+		} else {
+			volt_without_cks = (uint32_t)((2416794800 + (sclk_table->entries[i].clk/100) * 1476925/10 -(ro - 50) * 1000000) / \
+						(2625416 - (sclk_table->entries[i].clk/100) * 12586807/10000));
+			volt_with_cks = (uint32_t)((2999656000 + sclk_table->entries[i].clk * 392803/100 - (ro - 44) * 1000000) / \
+					(3422454 - sclk_table->entries[i].clk/100 * 18886376/10000));
+		}
+
 		if (volt_without_cks >= volt_with_cks)
-			volt_offset = (uint8_t)(((volt_without_cks - volt_with_cks +
-					sclk_table->entries[i].cks_voffset) * 100 / 625) + 1);
+			volt_offset = (uint8_t)CEILING_UCHAR((volt_without_cks - volt_with_cks +
+					sclk_table->entries[i].cks_voffset) * 100 / 625);
+
 		data->smc_state_table.Sclk_voltageOffset[i] = volt_offset;
 	}
 
-	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, PWR_CKS_ENABLE,
-			STRETCH_ENABLE, 0x0);
-	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, PWR_CKS_ENABLE,
-			masterReset, 0x1);
-	/* PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, PWR_CKS_ENABLE, staticEnable, 0x1); */
-	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, PWR_CKS_ENABLE,
-			masterReset, 0x0);
-
+	data->smc_state_table.LdoRefSel = (table_info->cac_dtp_table->ucCKS_LDO_REFSEL != 0) ? table_info->cac_dtp_table->ucCKS_LDO_REFSEL : 6;
 	/* Populate CKS Lookup Table */
 	if (stretch_amount == 1 || stretch_amount == 2 || stretch_amount == 5)
 		stretch_amount2 = 0;
@@ -1835,69 +1846,6 @@ static int polaris10_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr)
 				return -EINVAL);
 	}
 
-	value = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC,
-			ixPWR_CKS_CNTL);
-	value &= 0xFFC2FF87;
-	data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].minFreq =
-			polaris10_clock_stretcher_lookup_table[stretch_amount2][0];
-	data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].maxFreq =
-			polaris10_clock_stretcher_lookup_table[stretch_amount2][1];
-	clock_freq_u16 = (uint16_t)(PP_SMC_TO_HOST_UL(data->smc_state_table.
-			GraphicsLevel[data->smc_state_table.GraphicsDpmLevelCount - 1].SclkSetting.SclkFrequency) / 100);
-	if (polaris10_clock_stretcher_lookup_table[stretch_amount2][0] < clock_freq_u16
-	&& polaris10_clock_stretcher_lookup_table[stretch_amount2][1] > clock_freq_u16) {
-		/* Program PWR_CKS_CNTL. CKS_USE_FOR_LOW_FREQ */
-		value |= (polaris10_clock_stretcher_lookup_table[stretch_amount2][3]) << 16;
-		/* Program PWR_CKS_CNTL. CKS_LDO_REFSEL */
-		value |= (polaris10_clock_stretcher_lookup_table[stretch_amount2][2]) << 18;
-		/* Program PWR_CKS_CNTL. CKS_STRETCH_AMOUNT */
-		value |= (polaris10_clock_stretch_amount_conversion
-				[polaris10_clock_stretcher_lookup_table[stretch_amount2][3]]
-				 [stretch_amount]) << 3;
-	}
-	CONVERT_FROM_HOST_TO_SMC_US(data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].minFreq);
-	CONVERT_FROM_HOST_TO_SMC_US(data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].maxFreq);
-	data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].setting =
-			polaris10_clock_stretcher_lookup_table[stretch_amount2][2] & 0x7F;
-	data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].setting |=
-			(polaris10_clock_stretcher_lookup_table[stretch_amount2][3]) << 7;
-
-	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
-			ixPWR_CKS_CNTL, value);
-
-	/* Populate DDT Lookup Table */
-	for (i = 0; i < 4; i++) {
-		/* Assign the minimum and maximum VID stored
-		 * in the last row of Clock Stretcher Voltage Table.
-		 */
-		data->smc_state_table.ClockStretcherDataTable.ClockStretcherDataTableEntry[i].minVID =
-				(uint8_t) polaris10_clock_stretcher_ddt_table[type][i][2];
-		data->smc_state_table.ClockStretcherDataTable.ClockStretcherDataTableEntry[i].maxVID =
-				(uint8_t) polaris10_clock_stretcher_ddt_table[type][i][3];
-		/* Loop through each SCLK and check the frequency
-		 * to see if it lies within the frequency for clock stretcher.
-		 */
-		for (j = 0; j < data->smc_state_table.GraphicsDpmLevelCount; j++) {
-			cks_setting = 0;
-			clock_freq = PP_SMC_TO_HOST_UL(
-					data->smc_state_table.GraphicsLevel[j].SclkSetting.SclkFrequency);
-			/* Check the allowed frequency against the sclk level[j].
-			 *  Sclk's endianness has already been converted,
-			 *  and it's in 10Khz unit,
-			 *  as opposed to Data table, which is in Mhz unit.
-			 */
-			if (clock_freq >= (polaris10_clock_stretcher_ddt_table[type][i][0]) * 100) {
-				cks_setting |= 0x2;
-				if (clock_freq < (polaris10_clock_stretcher_ddt_table[type][i][1]) * 100)
-					cks_setting |= 0x1;
-			}
-			data->smc_state_table.ClockStretcherDataTable.ClockStretcherDataTableEntry[i].setting
-							|= cks_setting << (j * 2);
-		}
-		CONVERT_FROM_HOST_TO_SMC_US(
-			data->smc_state_table.ClockStretcherDataTable.ClockStretcherDataTableEntry[i].setting);
-	}
-
 	value = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixPWR_CKS_CNTL);
 	value &= 0xFFFFFFFE;
 	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixPWR_CKS_CNTL, value);
@@ -1956,6 +1904,90 @@ static int polaris10_populate_vr_config(struct pp_hwmgr *hwmgr,
 	return 0;
 }
 
+
+int polaris10_populate_avfs_parameters(struct pp_hwmgr *hwmgr)
+{
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	SMU74_Discrete_DpmTable  *table = &(data->smc_state_table);
+	int result = 0;
+	struct pp_atom_ctrl__avfs_parameters avfs_params = {0};
+	AVFS_meanNsigma_t AVFS_meanNsigma = { {0} };
+	AVFS_Sclk_Offset_t AVFS_SclkOffset = { {0} };
+	uint32_t tmp, i;
+	struct pp_smumgr *smumgr = hwmgr->smumgr;
+	struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(smumgr->backend);
+
+	struct phm_ppt_v1_information *table_info =
+			(struct phm_ppt_v1_information *)hwmgr->pptable;
+	struct phm_ppt_v1_clock_voltage_dependency_table *sclk_table =
+			table_info->vdd_dep_on_sclk;
+
+
+	if (smu_data->avfs.avfs_btc_status == AVFS_BTC_NOTSUPPORTED)
+		return result;
+
+	result = atomctrl_get_avfs_information(hwmgr, &avfs_params);
+
+	if (0 == result) {
+		table->BTCGB_VDROOP_TABLE[0].a0  = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a0);
+		table->BTCGB_VDROOP_TABLE[0].a1  = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a1);
+		table->BTCGB_VDROOP_TABLE[0].a2  = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a2);
+		table->BTCGB_VDROOP_TABLE[1].a0  = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a0);
+		table->BTCGB_VDROOP_TABLE[1].a1  = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a1);
+		table->BTCGB_VDROOP_TABLE[1].a2  = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a2);
+		table->AVFSGB_VDROOP_TABLE[0].m1 = PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSON_m1);
+		table->AVFSGB_VDROOP_TABLE[0].m2 = PP_HOST_TO_SMC_US(avfs_params.usAVFSGB_FUSE_TABLE_CKSON_m2);
+		table->AVFSGB_VDROOP_TABLE[0].b  = PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSON_b);
+		table->AVFSGB_VDROOP_TABLE[0].m1_shift = 24;
+		table->AVFSGB_VDROOP_TABLE[0].m2_shift  = 12;
+		table->AVFSGB_VDROOP_TABLE[1].m1 = PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_m1);
+		table->AVFSGB_VDROOP_TABLE[1].m2 = PP_HOST_TO_SMC_US(avfs_params.usAVFSGB_FUSE_TABLE_CKSOFF_m2);
+		table->AVFSGB_VDROOP_TABLE[1].b  = PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_b);
+		table->AVFSGB_VDROOP_TABLE[1].m1_shift = 24;
+		table->AVFSGB_VDROOP_TABLE[1].m2_shift  = 12;
+		table->MaxVoltage                = PP_HOST_TO_SMC_US(avfs_params.usMaxVoltage_0_25mv);
+		AVFS_meanNsigma.Aconstant[0]      = PP_HOST_TO_SMC_UL(avfs_params.ulAVFS_meanNsigma_Acontant0);
+		AVFS_meanNsigma.Aconstant[1]      = PP_HOST_TO_SMC_UL(avfs_params.ulAVFS_meanNsigma_Acontant1);
+		AVFS_meanNsigma.Aconstant[2]      = PP_HOST_TO_SMC_UL(avfs_params.ulAVFS_meanNsigma_Acontant2);
+		AVFS_meanNsigma.DC_tol_sigma      = PP_HOST_TO_SMC_US(avfs_params.usAVFS_meanNsigma_DC_tol_sigma);
+		AVFS_meanNsigma.Platform_mean     = PP_HOST_TO_SMC_US(avfs_params.usAVFS_meanNsigma_Platform_mean);
+		AVFS_meanNsigma.PSM_Age_CompFactor = PP_HOST_TO_SMC_US(avfs_params.usPSM_Age_ComFactor);
+		AVFS_meanNsigma.Platform_sigma     = PP_HOST_TO_SMC_US(avfs_params.usAVFS_meanNsigma_Platform_sigma);
+
+		for (i = 0; i < NUM_VFT_COLUMNS; i++) {
+			AVFS_meanNsigma.Static_Voltage_Offset[i] = (uint8_t)(sclk_table->entries[i].cks_voffset * 100 / 625);
+			AVFS_SclkOffset.Sclk_Offset[i] = PP_HOST_TO_SMC_US((uint16_t)(sclk_table->entries[i].sclk_offset) / 100);
+		}
+
+		result = polaris10_read_smc_sram_dword(smumgr,
+				SMU7_FIRMWARE_HEADER_LOCATION + offsetof(SMU74_Firmware_Header, AvfsMeanNSigma),
+				&tmp, data->sram_end);
+
+		polaris10_copy_bytes_to_smc(smumgr,
+					tmp,
+					(uint8_t *)&AVFS_meanNsigma,
+					sizeof(AVFS_meanNsigma_t),
+					data->sram_end);
+
+		result = polaris10_read_smc_sram_dword(smumgr,
+				SMU7_FIRMWARE_HEADER_LOCATION + offsetof(SMU74_Firmware_Header, AvfsSclkOffsetTable),
+				&tmp, data->sram_end);
+		polaris10_copy_bytes_to_smc(smumgr,
+					tmp,
+					(uint8_t *)&AVFS_SclkOffset,
+					sizeof(AVFS_Sclk_Offset_t),
+					data->sram_end);
+
+		data->avfs_vdroop_override_setting = (avfs_params.ucEnableGB_VDROOP_TABLE_CKSON << BTCGB0_Vdroop_Enable_SHIFT) |
+						(avfs_params.ucEnableGB_VDROOP_TABLE_CKSOFF << BTCGB1_Vdroop_Enable_SHIFT) |
+						(avfs_params.ucEnableGB_FUSE_TABLE_CKSON << AVFSGB0_Vdroop_Enable_SHIFT) |
+						(avfs_params.ucEnableGB_FUSE_TABLE_CKSOFF << AVFSGB1_Vdroop_Enable_SHIFT);
+		data->apply_avfs_cks_off_voltage = (avfs_params.ucEnableApplyAVFS_CKS_OFF_Voltage == 1) ? true : false;
+	}
+	return result;
+}
+
+
 /**
 * Initializes the SMC table and uploads it
 *
@@ -2056,6 +2088,10 @@ static int polaris10_init_smc_table(struct pp_hwmgr *hwmgr)
 				"Failed to populate Clock Stretcher Data Table!",
 				return result);
 	}
+
+	result = polaris10_populate_avfs_parameters(hwmgr);
+	PP_ASSERT_WITH_CODE(0 == result, "Failed to populate AVFS Parameters!", return result;);
+
 	table->CurrSclkPllRange = 0xff;
 	table->GraphicsVoltageChangeEnable  = 1;
 	table->GraphicsThermThrottleEnable  = 1;
@@ -2252,6 +2288,9 @@ static int polaris10_enable_deep_sleep_master_switch(struct pp_hwmgr *hwmgr)
 static int polaris10_enable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 {
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	uint32_t soft_register_value = 0;
+	uint32_t handshake_disables_offset = data->soft_regs_start
+				+ offsetof(SMU74_SoftRegisters, HandshakeDisables);
 
 	/* enable SCLK dpm */
 	if (!data->sclk_dpm_key_disabled)
@@ -2262,6 +2301,12 @@ static int polaris10_enable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 
 	/* enable MCLK dpm */
 	if (0 == data->mclk_dpm_key_disabled) {
+/* Disable UVD - SMU handshake for MCLK. */
+		soft_register_value = cgs_read_ind_register(hwmgr->device,
+					CGS_IND_REG__SMC, handshake_disables_offset);
+		soft_register_value |= SMU7_UVD_MCLK_HANDSHAKE_DISABLE;
+		cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+				handshake_disables_offset, soft_register_value);
 
 		PP_ASSERT_WITH_CODE(
 				(0 == smum_send_msg_to_smc(hwmgr->smumgr,
@@ -2269,7 +2314,6 @@ static int polaris10_enable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 				"Failed to enable MCLK DPM during DPM Start Function!",
 				return -1);
 
-
 		PHM_WRITE_FIELD(hwmgr->device, MC_SEQ_CNTL_3, CAC_EN, 0x1);
 
 		cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixLCAC_MC0_CNTL, 0x5);
@@ -2471,6 +2515,8 @@ int polaris10_enable_dpm_tasks(struct pp_hwmgr *hwmgr)
 	PP_ASSERT_WITH_CODE((0 == tmp_result),
 			"Failed to enable VR hot GPIO interrupt!", result = tmp_result);
 
+	smum_send_msg_to_smc(hwmgr->smumgr, (PPSMC_Msg)PPSMC_HasDisplay);
+
 	tmp_result = polaris10_enable_sclk_control(hwmgr);
 	PP_ASSERT_WITH_CODE((0 == tmp_result),
 			"Failed to enable SCLK control!", result = tmp_result);
@@ -2606,6 +2652,7 @@ int polaris10_set_features_platform_caps(struct pp_hwmgr *hwmgr)
 
 	phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 						PHM_PlatformCaps_FanSpeedInTableIsRPM);
+
 	if (hwmgr->chip_id == CHIP_POLARIS11)
 		phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 					PHM_PlatformCaps_SPLLShutdownSupport);
@@ -2896,6 +2943,31 @@ static int polaris10_set_private_data_based_on_pptable(struct pp_hwmgr *hwmgr)
 	return 0;
 }
 
+int polaris10_patch_voltage_workaround(struct pp_hwmgr *hwmgr)
+{
+	struct phm_ppt_v1_information *table_info =
+		       (struct phm_ppt_v1_information *)(hwmgr->pptable);
+	struct phm_ppt_v1_clock_voltage_dependency_table *dep_mclk_table =
+			table_info->vdd_dep_on_mclk;
+	struct phm_ppt_v1_voltage_lookup_table *lookup_table =
+			table_info->vddc_lookup_table;
+	uint32_t i;
+
+	if (hwmgr->chip_id == CHIP_POLARIS10 && hwmgr->hw_revision == 0xC7) {
+		if (lookup_table->entries[dep_mclk_table->entries[dep_mclk_table->count-1].vddInd].us_vdd >= 1000)
+			return 0;
+
+		for (i = 0; i < lookup_table->count; i++) {
+			if (lookup_table->entries[i].us_vdd < 0xff01 && lookup_table->entries[i].us_vdd >= 1000) {
+				dep_mclk_table->entries[dep_mclk_table->count-1].vddInd = (uint8_t) i;
+				return 0;
+			}
+		}
+	}
+	return 0;
+}
+
+
 int polaris10_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 {
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
@@ -2938,6 +3010,11 @@ int polaris10_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 	data->vddci_control = POLARIS10_VOLTAGE_CONTROL_NONE;
 	data->mvdd_control = POLARIS10_VOLTAGE_CONTROL_NONE;
 
+	data->enable_tdc_limit_feature = true;
+	data->enable_pkg_pwr_tracking_feature = true;
+	data->force_pcie_gen = PP_PCIEGenInvalid;
+	data->mclk_stutter_mode_threshold = 40000;
+
 	if (atomctrl_is_voltage_controled_by_gpio_v3(hwmgr,
 			VOLTAGE_TYPE_VDDC, VOLTAGE_OBJ_SVID2))
 		data->voltage_control = POLARIS10_VOLTAGE_CONTROL_BY_SVID2;
@@ -2962,8 +3039,13 @@ int polaris10_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 			data->vddci_control = POLARIS10_VOLTAGE_CONTROL_BY_SVID2;
 	}
 
+	if (table_info->cac_dtp_table->usClockStretchAmount != 0)
+		phm_cap_set(hwmgr->platform_descriptor.platformCaps,
+					PHM_PlatformCaps_ClockStretcher);
+
 	polaris10_set_features_platform_caps(hwmgr);
 
+	polaris10_patch_voltage_workaround(hwmgr);
 	polaris10_init_dpm_defaults(hwmgr);
 
 	/* Get leakage voltage based on leakage ID. */
@@ -3520,10 +3602,11 @@ static int polaris10_get_pp_table_entry_callback_func(struct pp_hwmgr *hwmgr,
 	ATOM_Tonga_State *state_entry = (ATOM_Tonga_State *)state;
 	ATOM_Tonga_POWERPLAYTABLE *powerplay_table =
 			(ATOM_Tonga_POWERPLAYTABLE *)pp_table;
-	ATOM_Tonga_SCLK_Dependency_Table *sclk_dep_table =
-			(ATOM_Tonga_SCLK_Dependency_Table *)
+	PPTable_Generic_SubTable_Header *sclk_dep_table =
+			(PPTable_Generic_SubTable_Header *)
 			(((unsigned long)powerplay_table) +
 				le16_to_cpu(powerplay_table->usSclkDependencyTableOffset));
+
 	ATOM_Tonga_MCLK_Dependency_Table *mclk_dep_table =
 			(ATOM_Tonga_MCLK_Dependency_Table *)
 			(((unsigned long)powerplay_table) +
@@ -3575,7 +3658,11 @@ static int polaris10_get_pp_table_entry_callback_func(struct pp_hwmgr *hwmgr,
 	/* Performance levels are arranged from low to high. */
 	performance_level->memory_clock = mclk_dep_table->entries
 			[state_entry->ucMemoryClockIndexLow].ulMclk;
-	performance_level->engine_clock = sclk_dep_table->entries
+	if (sclk_dep_table->ucRevId == 0)
+		performance_level->engine_clock = ((ATOM_Tonga_SCLK_Dependency_Table *)sclk_dep_table)->entries
+			[state_entry->ucEngineClockIndexLow].ulSclk;
+	else if (sclk_dep_table->ucRevId == 1)
+		performance_level->engine_clock = ((ATOM_Polaris_SCLK_Dependency_Table *)sclk_dep_table)->entries
 			[state_entry->ucEngineClockIndexLow].ulSclk;
 	performance_level->pcie_gen = get_pcie_gen_support(data->pcie_gen_cap,
 			state_entry->ucPCIEGenLow);
@@ -3586,8 +3673,14 @@ static int polaris10_get_pp_table_entry_callback_func(struct pp_hwmgr *hwmgr,
 			[polaris10_power_state->performance_level_count++]);
 	performance_level->memory_clock = mclk_dep_table->entries
 			[state_entry->ucMemoryClockIndexHigh].ulMclk;
-	performance_level->engine_clock = sclk_dep_table->entries
+
+	if (sclk_dep_table->ucRevId == 0)
+		performance_level->engine_clock = ((ATOM_Tonga_SCLK_Dependency_Table *)sclk_dep_table)->entries
+			[state_entry->ucEngineClockIndexHigh].ulSclk;
+	else if (sclk_dep_table->ucRevId == 1)
+		performance_level->engine_clock = ((ATOM_Polaris_SCLK_Dependency_Table *)sclk_dep_table)->entries
 			[state_entry->ucEngineClockIndexHigh].ulSclk;
+
 	performance_level->pcie_gen = get_pcie_gen_support(data->pcie_gen_cap,
 			state_entry->ucPCIEGenHigh);
 	performance_level->pcie_lane = get_pcie_lane_support(data->pcie_lane_cap,
@@ -3645,7 +3738,6 @@ static int polaris10_get_pp_table_entry(struct pp_hwmgr *hwmgr,
 		switch (state->classification.ui_label) {
 		case PP_StateUILabel_Performance:
 			data->use_pcie_performance_levels = true;
-
 			for (i = 0; i < ps->performance_level_count; i++) {
 				if (data->pcie_gen_performance.max <
 						ps->performance_levels[i].pcie_gen)
@@ -3661,7 +3753,6 @@ static int polaris10_get_pp_table_entry(struct pp_hwmgr *hwmgr,
 						ps->performance_levels[i].pcie_lane)
 					data->pcie_lane_performance.max =
 							ps->performance_levels[i].pcie_lane;
-
 				if (data->pcie_lane_performance.min >
 						ps->performance_levels[i].pcie_lane)
 					data->pcie_lane_performance.min =
@@ -4187,12 +4278,9 @@ int polaris10_update_samu_dpm(struct pp_hwmgr *hwmgr, bool bgate)
 {
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
 	uint32_t mm_boot_level_offset, mm_boot_level_value;
-	struct phm_ppt_v1_information *table_info =
-			(struct phm_ppt_v1_information *)(hwmgr->pptable);
 
 	if (!bgate) {
-		data->smc_state_table.SamuBootLevel =
-				(uint8_t) (table_info->mm_dep_table->count - 1);
+		data->smc_state_table.SamuBootLevel = 0;
 		mm_boot_level_offset = data->dpm_table_start +
 				offsetof(SMU74_Discrete_DpmTable, SamuBootLevel);
 		mm_boot_level_offset /= 4;
@@ -4327,6 +4415,15 @@ static int polaris10_notify_link_speed_change_after_state_change(
 	return 0;
 }
 
+static int polaris10_notify_smc_display(struct pp_hwmgr *hwmgr)
+{
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+
+	smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
+		(PPSMC_Msg)PPSMC_MSG_SetVBITimeout, data->frame_time_x2);
+	return (smum_send_msg_to_smc(hwmgr->smumgr, (PPSMC_Msg)PPSMC_HasDisplay) == 0) ?  0 : -EINVAL;
+}
+
 static int polaris10_set_power_state_tasks(struct pp_hwmgr *hwmgr, const void *input)
 {
 	int tmp_result, result = 0;
@@ -4375,6 +4472,11 @@ static int polaris10_set_power_state_tasks(struct pp_hwmgr *hwmgr, const void *i
 			"Failed to program memory timing parameters!",
 			result = tmp_result);
 
+	tmp_result = polaris10_notify_smc_display(hwmgr);
+	PP_ASSERT_WITH_CODE((0 == tmp_result),
+			"Failed to notify smc display settings!",
+			result = tmp_result);
+
 	tmp_result = polaris10_unfreeze_sclk_mclk_dpm(hwmgr);
 	PP_ASSERT_WITH_CODE((0 == tmp_result),
 			"Failed to unfreeze SCLK MCLK DPM!",
@@ -4409,6 +4511,7 @@ static int polaris10_set_max_fan_pwm_output(struct pp_hwmgr *hwmgr, uint16_t us_
 			PPSMC_MSG_SetFanPwmMax, us_max_fan_pwm);
 }
 
+
 int polaris10_notify_smc_display_change(struct pp_hwmgr *hwmgr, bool has_display)
 {
 	PPSMC_Msg msg = has_display ? (PPSMC_Msg)PPSMC_HasDisplay : (PPSMC_Msg)PPSMC_NoDisplay;
@@ -4428,8 +4531,6 @@ int polaris10_notify_smc_display_config_after_ps_adjustment(struct pp_hwmgr *hwm
 
 	if (num_active_displays > 1)  /* to do && (pHwMgr->pPECI->displayConfiguration.bMultiMonitorInSync != TRUE)) */
 		polaris10_notify_smc_display_change(hwmgr, false);
-	else
-		polaris10_notify_smc_display_change(hwmgr, true);
 
 	return 0;
 }
@@ -4470,6 +4571,8 @@ int polaris10_program_display_gap(struct pp_hwmgr *hwmgr)
 	frame_time_in_us = 1000000 / refresh_rate;
 
 	pre_vbi_time_in_us = frame_time_in_us - 200 - mode_info.vblank_time_us;
+	data->frame_time_x2 = frame_time_in_us * 2 / 100;
+
 	display_gap2 = pre_vbi_time_in_us * (ref_clock / 100);
 
 	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixCG_DISPLAY_GAP_CNTL2, display_gap2);
@@ -4478,8 +4581,6 @@ int polaris10_program_display_gap(struct pp_hwmgr *hwmgr)
 
 	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, data->soft_regs_start + offsetof(SMU74_SoftRegisters, VBlankTimeout), (frame_time_in_us - pre_vbi_time_in_us));
 
-	polaris10_notify_smc_display_change(hwmgr, num_active_displays != 0);
-
 	return 0;
 }
 
@@ -4591,7 +4692,7 @@ int polaris10_upload_mc_firmware(struct pp_hwmgr *hwmgr)
 		return 0;
 	}
 
-	data->need_long_memory_training = true;
+	data->need_long_memory_training = false;
 
 /*
  *	PPMCME_FirmwareDescriptorEntry *pfd = NULL;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.h
index beedf35cbfa6..afc3434822d1 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.h
@@ -312,6 +312,10 @@ struct polaris10_hwmgr {
 
 	/* soft pptable for re-uploading into smu */
 	void *soft_pp_table;
+
+	uint32_t                              avfs_vdroop_override_setting;
+	bool                                  apply_avfs_cks_off_voltage;
+	uint32_t                              frame_time_x2;
 };
 
 /* To convert to Q8.8 format for firmware */
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_thermal.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_thermal.c
index aba167f7d167..b206632d4650 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_thermal.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_thermal.c
@@ -625,10 +625,14 @@ static int tf_polaris10_thermal_avfs_enable(struct pp_hwmgr *hwmgr,
 	int ret;
 	struct pp_smumgr *smumgr = (struct pp_smumgr *)(hwmgr->smumgr);
 	struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(smumgr->backend);
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
 
-	if (smu_data->avfs.avfs_btc_status != AVFS_BTC_ENABLEAVFS)
+	if (smu_data->avfs.avfs_btc_status == AVFS_BTC_NOTSUPPORTED)
 		return 0;
 
+	ret = smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
+			PPSMC_MSG_SetGBDroopSettings, data->avfs_vdroop_override_setting);
+
 	ret = (smum_send_msg_to_smc(smumgr, PPSMC_MSG_EnableAvfs) == 0) ?
 			0 : -1;
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c
index 58742e0d1492..a3c38bbd1e94 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c
@@ -44,6 +44,20 @@ bool acpi_atcs_functions_supported(void *device, uint32_t index)
 	return result == 0 ? (output_buf.function_bits & (1 << (index - 1))) != 0 : false;
 }
 
+bool acpi_atcs_notify_pcie_device_ready(void *device)
+{
+	int32_t temp_buffer = 1;
+
+	return cgs_call_acpi_method(device, CGS_ACPI_METHOD_ATCS,
+				ATCS_FUNCTION_PCIE_DEVICE_READY_NOTIFICATION,
+						&temp_buffer,
+						NULL,
+						0,
+						sizeof(temp_buffer),
+						0);
+}
+
+
 int acpi_pcie_perf_request(void *device, uint8_t perf_req, bool advertise)
 {
 	struct atcs_pref_req_input atcs_input;
@@ -52,7 +66,7 @@ int acpi_pcie_perf_request(void *device, uint8_t perf_req, bool advertise)
 	int result;
 	struct cgs_system_info info = {0};
 
-	if (!acpi_atcs_functions_supported(device, ATCS_FUNCTION_PCIE_PERFORMANCE_REQUEST))
+	if( 0 != acpi_atcs_notify_pcie_device_ready(device))
 		return -EINVAL;
 
 	info.size = sizeof(struct cgs_system_info);
@@ -77,7 +91,7 @@ int acpi_pcie_perf_request(void *device, uint8_t perf_req, bool advertise)
 						ATCS_FUNCTION_PCIE_PERFORMANCE_REQUEST,
 						&atcs_input,
 						&atcs_output,
-						0,
+						1,
 						sizeof(atcs_input),
 						sizeof(atcs_output));
 		if (result != 0)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c
index da9f5f1b6dc2..bf4e18fd3872 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c
@@ -1302,3 +1302,46 @@ int atomctrl_get_smc_sclk_range_table(struct pp_hwmgr *hwmgr, struct pp_atom_ctr
 
 	return 0;
 }
+
+int atomctrl_get_avfs_information(struct pp_hwmgr *hwmgr, struct pp_atom_ctrl__avfs_parameters *param)
+{
+	ATOM_ASIC_PROFILING_INFO_V3_6 *profile = NULL;
+
+	if (param == NULL)
+		return -EINVAL;
+
+	profile = (ATOM_ASIC_PROFILING_INFO_V3_6 *)
+			cgs_atom_get_data_table(hwmgr->device,
+					GetIndexIntoMasterTable(DATA, ASIC_ProfilingInfo),
+					NULL, NULL, NULL);
+	if (!profile)
+		return -1;
+
+	param->ulAVFS_meanNsigma_Acontant0 = profile->ulAVFS_meanNsigma_Acontant0;
+	param->ulAVFS_meanNsigma_Acontant1 = profile->ulAVFS_meanNsigma_Acontant1;
+	param->ulAVFS_meanNsigma_Acontant2 = profile->ulAVFS_meanNsigma_Acontant2;
+	param->usAVFS_meanNsigma_DC_tol_sigma = profile->usAVFS_meanNsigma_DC_tol_sigma;
+	param->usAVFS_meanNsigma_Platform_mean = profile->usAVFS_meanNsigma_Platform_mean;
+	param->usAVFS_meanNsigma_Platform_sigma = profile->usAVFS_meanNsigma_Platform_sigma;
+	param->ulGB_VDROOP_TABLE_CKSOFF_a0 = profile->ulGB_VDROOP_TABLE_CKSOFF_a0;
+	param->ulGB_VDROOP_TABLE_CKSOFF_a1 = profile->ulGB_VDROOP_TABLE_CKSOFF_a1;
+	param->ulGB_VDROOP_TABLE_CKSOFF_a2 = profile->ulGB_VDROOP_TABLE_CKSOFF_a2;
+	param->ulGB_VDROOP_TABLE_CKSON_a0 = profile->ulGB_VDROOP_TABLE_CKSON_a0;
+	param->ulGB_VDROOP_TABLE_CKSON_a1 = profile->ulGB_VDROOP_TABLE_CKSON_a1;
+	param->ulGB_VDROOP_TABLE_CKSON_a2 = profile->ulGB_VDROOP_TABLE_CKSON_a2;
+	param->ulAVFSGB_FUSE_TABLE_CKSOFF_m1 = profile->ulAVFSGB_FUSE_TABLE_CKSOFF_m1;
+	param->usAVFSGB_FUSE_TABLE_CKSOFF_m2 = profile->usAVFSGB_FUSE_TABLE_CKSOFF_m2;
+	param->ulAVFSGB_FUSE_TABLE_CKSOFF_b = profile->ulAVFSGB_FUSE_TABLE_CKSOFF_b;
+	param->ulAVFSGB_FUSE_TABLE_CKSON_m1 = profile->ulAVFSGB_FUSE_TABLE_CKSON_m1;
+	param->usAVFSGB_FUSE_TABLE_CKSON_m2 = profile->usAVFSGB_FUSE_TABLE_CKSON_m2;
+	param->ulAVFSGB_FUSE_TABLE_CKSON_b = profile->ulAVFSGB_FUSE_TABLE_CKSON_b;
+	param->usMaxVoltage_0_25mv = profile->usMaxVoltage_0_25mv;
+	param->ucEnableGB_VDROOP_TABLE_CKSOFF = profile->ucEnableGB_VDROOP_TABLE_CKSOFF;
+	param->ucEnableGB_VDROOP_TABLE_CKSON = profile->ucEnableGB_VDROOP_TABLE_CKSON;
+	param->ucEnableGB_FUSE_TABLE_CKSOFF = profile->ucEnableGB_FUSE_TABLE_CKSOFF;
+	param->ucEnableGB_FUSE_TABLE_CKSON = profile->ucEnableGB_FUSE_TABLE_CKSON;
+	param->usPSM_Age_ComFactor = profile->usPSM_Age_ComFactor;
+	param->ucEnableApplyAVFS_CKS_OFF_Voltage = profile->ucEnableApplyAVFS_CKS_OFF_Voltage;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h
index d24ebb566905..248c5db5f380 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h
@@ -250,6 +250,35 @@ struct pp_atomctrl_gpio_pin_assignment {
 };
 typedef struct pp_atomctrl_gpio_pin_assignment pp_atomctrl_gpio_pin_assignment;
 
+struct pp_atom_ctrl__avfs_parameters {
+	uint32_t  ulAVFS_meanNsigma_Acontant0;
+	uint32_t  ulAVFS_meanNsigma_Acontant1;
+	uint32_t  ulAVFS_meanNsigma_Acontant2;
+	uint16_t usAVFS_meanNsigma_DC_tol_sigma;
+	uint16_t usAVFS_meanNsigma_Platform_mean;
+	uint16_t usAVFS_meanNsigma_Platform_sigma;
+	uint32_t  ulGB_VDROOP_TABLE_CKSOFF_a0;
+	uint32_t  ulGB_VDROOP_TABLE_CKSOFF_a1;
+	uint32_t  ulGB_VDROOP_TABLE_CKSOFF_a2;
+	uint32_t  ulGB_VDROOP_TABLE_CKSON_a0;
+	uint32_t  ulGB_VDROOP_TABLE_CKSON_a1;
+	uint32_t  ulGB_VDROOP_TABLE_CKSON_a2;
+	uint32_t  ulAVFSGB_FUSE_TABLE_CKSOFF_m1;
+	uint16_t  usAVFSGB_FUSE_TABLE_CKSOFF_m2;
+	uint32_t  ulAVFSGB_FUSE_TABLE_CKSOFF_b;
+	uint32_t  ulAVFSGB_FUSE_TABLE_CKSON_m1;
+	uint16_t  usAVFSGB_FUSE_TABLE_CKSON_m2;
+	uint32_t  ulAVFSGB_FUSE_TABLE_CKSON_b;
+	uint16_t  usMaxVoltage_0_25mv;
+	uint8_t  ucEnableGB_VDROOP_TABLE_CKSOFF;
+	uint8_t  ucEnableGB_VDROOP_TABLE_CKSON;
+	uint8_t  ucEnableGB_FUSE_TABLE_CKSOFF;
+	uint8_t  ucEnableGB_FUSE_TABLE_CKSON;
+	uint16_t usPSM_Age_ComFactor;
+	uint8_t  ucEnableApplyAVFS_CKS_OFF_Voltage;
+	uint8_t  ucReserved;
+};
+
 extern bool atomctrl_get_pp_assign_pin(struct pp_hwmgr *hwmgr, const uint32_t pinId, pp_atomctrl_gpio_pin_assignment *gpio_pin_assignment);
 extern int atomctrl_get_voltage_evv_on_sclk(struct pp_hwmgr *hwmgr, uint8_t voltage_type, uint32_t sclk, uint16_t virtual_voltage_Id, uint16_t *voltage);
 extern uint32_t atomctrl_get_mpll_reference_clock(struct pp_hwmgr *hwmgr);
@@ -278,5 +307,8 @@ extern int atomctrl_set_ac_timing_ai(struct pp_hwmgr *hwmgr, uint32_t memory_clo
 extern int atomctrl_get_voltage_evv_on_sclk_ai(struct pp_hwmgr *hwmgr, uint8_t voltage_type,
 				uint32_t sclk, uint16_t virtual_voltage_Id, uint16_t *voltage);
 extern int atomctrl_get_smc_sclk_range_table(struct pp_hwmgr *hwmgr, struct pp_atom_ctrl_sclk_range_table *table);
+
+extern int atomctrl_get_avfs_information(struct pp_hwmgr *hwmgr, struct pp_atom_ctrl__avfs_parameters *param);
+
 #endif
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
index d27e8c40602a..233eb7f36c1d 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
@@ -4489,6 +4489,7 @@ int tonga_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 	data->vdd_ci_control = TONGA_VOLTAGE_CONTROL_NONE;
 	data->vdd_gfx_control = TONGA_VOLTAGE_CONTROL_NONE;
 	data->mvdd_control = TONGA_VOLTAGE_CONTROL_NONE;
+	data->force_pcie_gen = PP_PCIEGenInvalid;
 
 	if (atomctrl_is_voltage_controled_by_gpio_v3(hwmgr,
 				VOLTAGE_TYPE_VDDC, VOLTAGE_OBJ_SVID2)) {
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_pptable.h b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_pptable.h
index 1b44f4e9b8f5..f127198aafc4 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_pptable.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_pptable.h
@@ -197,6 +197,22 @@ typedef struct _ATOM_Tonga_SCLK_Dependency_Table {
 	ATOM_Tonga_SCLK_Dependency_Record entries[1];				 /* Dynamically allocate entries. */
 } ATOM_Tonga_SCLK_Dependency_Table;
 
+typedef struct _ATOM_Polaris_SCLK_Dependency_Record {
+	UCHAR  ucVddInd;											/* Base voltage */
+	USHORT usVddcOffset;										/* Offset relative to base voltage */
+	ULONG ulSclk;
+	USHORT usEdcCurrent;
+	UCHAR  ucReliabilityTemperature;
+	UCHAR  ucCKSVOffsetandDisable;			/* Bits 0~6: Voltage offset for CKS, Bit 7: Disable/enable for the SCLK level. */
+	ULONG  ulSclkOffset;
+} ATOM_Polaris_SCLK_Dependency_Record;
+
+typedef struct _ATOM_Polaris_SCLK_Dependency_Table {
+	UCHAR ucRevId;
+	UCHAR ucNumEntries;							/* Number of entries. */
+	ATOM_Polaris_SCLK_Dependency_Record entries[1];				 /* Dynamically allocate entries. */
+} ATOM_Polaris_SCLK_Dependency_Table;
+
 typedef struct _ATOM_Tonga_PCIE_Record {
 	UCHAR ucPCIEGenSpeed;
 	UCHAR usPCIELaneWidth;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c
index 296ec7ef6d45..671fdb4d615a 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c
@@ -408,41 +408,78 @@ static int get_mclk_voltage_dependency_table(
 static int get_sclk_voltage_dependency_table(
 		struct pp_hwmgr *hwmgr,
 		phm_ppt_v1_clock_voltage_dependency_table **pp_tonga_sclk_dep_table,
-		const ATOM_Tonga_SCLK_Dependency_Table * sclk_dep_table
+		const PPTable_Generic_SubTable_Header *sclk_dep_table
 		)
 {
 	uint32_t table_size, i;
 	phm_ppt_v1_clock_voltage_dependency_table *sclk_table;
 
-	PP_ASSERT_WITH_CODE((0 != sclk_dep_table->ucNumEntries),
-		"Invalid PowerPlay Table!", return -1);
+	if (sclk_dep_table->ucRevId < 1) {
+		const ATOM_Tonga_SCLK_Dependency_Table *tonga_table =
+			    (ATOM_Tonga_SCLK_Dependency_Table *)sclk_dep_table;
 
-	table_size = sizeof(uint32_t) + sizeof(phm_ppt_v1_clock_voltage_dependency_record)
-		* sclk_dep_table->ucNumEntries;
+		PP_ASSERT_WITH_CODE((0 != tonga_table->ucNumEntries),
+			"Invalid PowerPlay Table!", return -1);
 
-	sclk_table = (phm_ppt_v1_clock_voltage_dependency_table *)
-		kzalloc(table_size, GFP_KERNEL);
+		table_size = sizeof(uint32_t) + sizeof(phm_ppt_v1_clock_voltage_dependency_record)
+			* tonga_table->ucNumEntries;
 
-	if (NULL == sclk_table)
-		return -ENOMEM;
+		sclk_table = (phm_ppt_v1_clock_voltage_dependency_table *)
+			kzalloc(table_size, GFP_KERNEL);
 
-	memset(sclk_table, 0x00, table_size);
-
-	sclk_table->count = (uint32_t)sclk_dep_table->ucNumEntries;
-
-	for (i = 0; i < sclk_dep_table->ucNumEntries; i++) {
-		sclk_table->entries[i].vddInd =
-			sclk_dep_table->entries[i].ucVddInd;
-		sclk_table->entries[i].vdd_offset =
-			sclk_dep_table->entries[i].usVddcOffset;
-		sclk_table->entries[i].clk =
-			sclk_dep_table->entries[i].ulSclk;
-		sclk_table->entries[i].cks_enable =
-			(((sclk_dep_table->entries[i].ucCKSVOffsetandDisable & 0x80) >> 7) == 0) ? 1 : 0;
-		sclk_table->entries[i].cks_voffset =
-			(sclk_dep_table->entries[i].ucCKSVOffsetandDisable & 0x7F);
-	}
+		if (NULL == sclk_table)
+			return -ENOMEM;
+
+		memset(sclk_table, 0x00, table_size);
+
+		sclk_table->count = (uint32_t)tonga_table->ucNumEntries;
+
+		for (i = 0; i < tonga_table->ucNumEntries; i++) {
+			sclk_table->entries[i].vddInd =
+				tonga_table->entries[i].ucVddInd;
+			sclk_table->entries[i].vdd_offset =
+				tonga_table->entries[i].usVddcOffset;
+			sclk_table->entries[i].clk =
+				tonga_table->entries[i].ulSclk;
+			sclk_table->entries[i].cks_enable =
+				(((tonga_table->entries[i].ucCKSVOffsetandDisable & 0x80) >> 7) == 0) ? 1 : 0;
+			sclk_table->entries[i].cks_voffset =
+				(tonga_table->entries[i].ucCKSVOffsetandDisable & 0x7F);
+		}
+	} else {
+		const ATOM_Polaris_SCLK_Dependency_Table *polaris_table =
+			    (ATOM_Polaris_SCLK_Dependency_Table *)sclk_dep_table;
 
+		PP_ASSERT_WITH_CODE((0 != polaris_table->ucNumEntries),
+			"Invalid PowerPlay Table!", return -1);
+
+		table_size = sizeof(uint32_t) + sizeof(phm_ppt_v1_clock_voltage_dependency_record)
+			* polaris_table->ucNumEntries;
+
+		sclk_table = (phm_ppt_v1_clock_voltage_dependency_table *)
+			kzalloc(table_size, GFP_KERNEL);
+
+		if (NULL == sclk_table)
+			return -ENOMEM;
+
+		memset(sclk_table, 0x00, table_size);
+
+		sclk_table->count = (uint32_t)polaris_table->ucNumEntries;
+
+		for (i = 0; i < polaris_table->ucNumEntries; i++) {
+			sclk_table->entries[i].vddInd =
+				polaris_table->entries[i].ucVddInd;
+			sclk_table->entries[i].vdd_offset =
+				polaris_table->entries[i].usVddcOffset;
+			sclk_table->entries[i].clk =
+				polaris_table->entries[i].ulSclk;
+			sclk_table->entries[i].cks_enable =
+				(((polaris_table->entries[i].ucCKSVOffsetandDisable & 0x80) >> 7) == 0) ? 1 : 0;
+			sclk_table->entries[i].cks_voffset =
+				(polaris_table->entries[i].ucCKSVOffsetandDisable & 0x7F);
+			sclk_table->entries[i].sclk_offset = polaris_table->entries[i].ulSclkOffset;
+		}
+	}
 	*pp_tonga_sclk_dep_table = sclk_table;
 
 	return 0;
@@ -708,8 +745,8 @@ static int init_clock_voltage_dependency(
 	const ATOM_Tonga_MCLK_Dependency_Table *mclk_dep_table =
 		(const ATOM_Tonga_MCLK_Dependency_Table *)(((unsigned long) powerplay_table) +
 		le16_to_cpu(powerplay_table->usMclkDependencyTableOffset));
-	const ATOM_Tonga_SCLK_Dependency_Table *sclk_dep_table =
-		(const ATOM_Tonga_SCLK_Dependency_Table *)(((unsigned long) powerplay_table) +
+	const PPTable_Generic_SubTable_Header *sclk_dep_table =
+		(const PPTable_Generic_SubTable_Header *)(((unsigned long) powerplay_table) +
 		le16_to_cpu(powerplay_table->usSclkDependencyTableOffset));
 	const ATOM_Tonga_Hard_Limit_Table *pHardLimits =
 		(const ATOM_Tonga_Hard_Limit_Table *)(((unsigned long) powerplay_table) +
diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
index 28f571449495..77e8e33d5870 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
@@ -411,6 +411,8 @@ struct phm_cac_tdp_table {
 	uint8_t  ucVr_I2C_Line;
 	uint8_t  ucPlx_I2C_address;
 	uint8_t  ucPlx_I2C_Line;
+	uint32_t usBoostPowerLimit;
+	uint8_t  ucCKS_LDO_REFSEL;
 };
 
 struct phm_ppm_table {
diff --git a/drivers/gpu/drm/amd/powerplay/inc/polaris10_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/polaris10_ppsmc.h
index 0c6a413eaa5b..b8f4b73c322e 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/polaris10_ppsmc.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/polaris10_ppsmc.h
@@ -27,6 +27,7 @@
 
 #pragma pack(push, 1)
 
+#define PPSMC_MSG_SetGBDroopSettings          ((uint16_t) 0x305)
 
 #define PPSMC_SWSTATE_FLAG_DC                           0x01
 #define PPSMC_SWSTATE_FLAG_UVD                          0x02
@@ -391,6 +392,8 @@ typedef uint16_t PPSMC_Result;
 #define PPSMC_MSG_SetGpuPllDfsForSclk         ((uint16_t) 0x300)
 #define PPSMC_MSG_Didt_Block_Function		  ((uint16_t) 0x301)
 
+#define PPSMC_MSG_SetVBITimeout               ((uint16_t) 0x306)
+
 #define PPSMC_MSG_SecureSRBMWrite             ((uint16_t) 0x600)
 #define PPSMC_MSG_SecureSRBMRead              ((uint16_t) 0x601)
 #define PPSMC_MSG_SetAddress                  ((uint16_t) 0x800)
diff --git a/drivers/gpu/drm/amd/powerplay/inc/pp_acpi.h b/drivers/gpu/drm/amd/powerplay/inc/pp_acpi.h
index 3bd5e69b9045..3df5de2cdab0 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/pp_acpi.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/pp_acpi.h
@@ -26,3 +26,4 @@ extern bool acpi_atcs_functions_supported(void *device,
 extern int acpi_pcie_perf_request(void *device,
 						uint8_t perf_req,
 						bool advertise);
+extern bool acpi_atcs_notify_pcie_device_ready(void *device);
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu74.h b/drivers/gpu/drm/amd/powerplay/inc/smu74.h
index 1a12d85b8e97..fd10a9fa843d 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu74.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu74.h
@@ -34,6 +34,30 @@
 #define SMU__NUM_LCLK_DPM_LEVELS 8
 #define SMU__NUM_PCIE_DPM_LEVELS 8
 
+#define EXP_M1  35
+#define EXP_M2  92821
+#define EXP_B   66629747
+
+#define EXP_M1_1  365
+#define EXP_M2_1  658700
+#define EXP_B_1   305506134
+
+#define EXP_M1_2  189
+#define EXP_M2_2  379692
+#define EXP_B_2   194609469
+
+#define EXP_M1_3  99
+#define EXP_M2_3  217915
+#define EXP_B_3   122255994
+
+#define EXP_M1_4  51
+#define EXP_M2_4  122643
+#define EXP_B_4   74893384
+
+#define EXP_M1_5  423
+#define EXP_M2_5  1103326
+#define EXP_B_5   728122621
+
 enum SID_OPTION {
 	SID_OPTION_HI,
 	SID_OPTION_LO,
@@ -548,20 +572,20 @@ struct SMU74_Firmware_Header {
 	uint32_t CacConfigTable;
 	uint32_t CacStatusTable;
 
-
 	uint32_t mcRegisterTable;
 
-
 	uint32_t mcArbDramTimingTable;
 
-
-
-
 	uint32_t PmFuseTable;
 	uint32_t Globals;
 	uint32_t ClockStretcherTable;
 	uint32_t VftTable;
-	uint32_t Reserved[21];
+	uint32_t Reserved1;
+	uint32_t AvfsTable;
+	uint32_t AvfsCksOffGbvTable;
+	uint32_t AvfsMeanNSigma;
+	uint32_t AvfsSclkOffsetTable;
+	uint32_t Reserved[16];
 	uint32_t Signature;
 };
 
@@ -701,8 +725,6 @@ VR Config info is contained in dpmTable.VRConfig */
 struct SMU_ClockStretcherDataTableEntry {
 	uint8_t minVID;
 	uint8_t maxVID;
-
-
 	uint16_t setting;
 };
 typedef struct SMU_ClockStretcherDataTableEntry SMU_ClockStretcherDataTableEntry;
@@ -769,6 +791,43 @@ struct VFT_TABLE_t {
 typedef struct VFT_TABLE_t VFT_TABLE_t;
 
 
+/* Total margin, root mean square of Fmax + DC + Platform */
+struct AVFS_Margin_t {
+	VFT_CELL_t Cell[NUM_VFT_COLUMNS];
+};
+typedef struct AVFS_Margin_t AVFS_Margin_t;
+
+#define BTCGB_VDROOP_TABLE_MAX_ENTRIES 2
+#define AVFSGB_VDROOP_TABLE_MAX_ENTRIES 2
+
+struct GB_VDROOP_TABLE_t {
+	int32_t a0;
+	int32_t a1;
+	int32_t a2;
+	uint32_t spare;
+};
+typedef struct GB_VDROOP_TABLE_t GB_VDROOP_TABLE_t;
+
+struct AVFS_CksOff_Gbv_t {
+	VFT_CELL_t Cell[NUM_VFT_COLUMNS];
+};
+typedef struct AVFS_CksOff_Gbv_t AVFS_CksOff_Gbv_t;
+
+struct AVFS_meanNsigma_t {
+	uint32_t Aconstant[3];
+	uint16_t DC_tol_sigma;
+	uint16_t Platform_mean;
+	uint16_t Platform_sigma;
+	uint16_t PSM_Age_CompFactor;
+	uint8_t  Static_Voltage_Offset[NUM_VFT_COLUMNS];
+};
+typedef struct AVFS_meanNsigma_t AVFS_meanNsigma_t;
+
+struct AVFS_Sclk_Offset_t {
+	uint16_t Sclk_Offset[8];
+};
+typedef struct AVFS_Sclk_Offset_t AVFS_Sclk_Offset_t;
+
 #endif
 
 
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu74_discrete.h b/drivers/gpu/drm/amd/powerplay/inc/smu74_discrete.h
index 0dfe82336dc7..899d6d8108c2 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu74_discrete.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu74_discrete.h
@@ -223,6 +223,16 @@ struct SMU74_Discrete_StateInfo {
 
 typedef struct SMU74_Discrete_StateInfo SMU74_Discrete_StateInfo;
 
+struct SMU_QuadraticCoeffs {
+	int32_t m1;
+	uint32_t b;
+
+	int16_t m2;
+	uint8_t m1_shift;
+	uint8_t m2_shift;
+};
+typedef struct SMU_QuadraticCoeffs SMU_QuadraticCoeffs;
+
 struct SMU74_Discrete_DpmTable {
 
 	SMU74_PIDController                  GraphicsPIDController;
@@ -258,7 +268,15 @@ struct SMU74_Discrete_DpmTable {
 	uint8_t                             ThermOutPolarity;
 	uint8_t                             ThermOutMode;
 	uint8_t                             BootPhases;
-	uint32_t                            Reserved[4];
+
+	uint8_t                             VRHotLevel;
+	uint8_t                             LdoRefSel;
+	uint8_t                             Reserved1[2];
+	uint16_t                            FanStartTemperature;
+	uint16_t                            FanStopTemperature;
+	uint16_t                            MaxVoltage;
+	uint16_t                            Reserved2;
+	uint32_t                            Reserved[1];
 
 	SMU74_Discrete_GraphicsLevel        GraphicsLevel[SMU74_MAX_LEVELS_GRAPHICS];
 	SMU74_Discrete_MemoryLevel          MemoryACPILevel;
@@ -347,6 +365,8 @@ struct SMU74_Discrete_DpmTable {
 
 	uint32_t                            CurrSclkPllRange;
 	sclkFcwRange_t                      SclkFcwRangeTable[NUM_SCLK_RANGE];
+	GB_VDROOP_TABLE_t                   BTCGB_VDROOP_TABLE[BTCGB_VDROOP_TABLE_MAX_ENTRIES];
+	SMU_QuadraticCoeffs                 AVFSGB_VDROOP_TABLE[AVFSGB_VDROOP_TABLE_MAX_ENTRIES];
 };
 
 typedef struct SMU74_Discrete_DpmTable SMU74_Discrete_DpmTable;
@@ -550,16 +570,6 @@ struct SMU7_AcpiScoreboard {
 
 typedef struct SMU7_AcpiScoreboard SMU7_AcpiScoreboard;
 
-struct SMU_QuadraticCoeffs {
-	int32_t m1;
-	uint32_t b;
-
-	int16_t m2;
-	uint8_t m1_shift;
-	uint8_t m2_shift;
-};
-typedef struct SMU_QuadraticCoeffs SMU_QuadraticCoeffs;
-
 struct SMU74_Discrete_PmFuses {
 	uint8_t BapmVddCVidHiSidd[8];
 	uint8_t BapmVddCVidLoSidd[8];
@@ -821,6 +831,17 @@ typedef struct SMU7_GfxCuPgScoreboard SMU7_GfxCuPgScoreboard;
 #define DB_PCC_SHIFT 26 
 #define DB_EDC_SHIFT 27
 
+#define BTCGB0_Vdroop_Enable_MASK  0x1
+#define BTCGB1_Vdroop_Enable_MASK  0x2
+#define AVFSGB0_Vdroop_Enable_MASK 0x4
+#define AVFSGB1_Vdroop_Enable_MASK 0x8
+
+#define BTCGB0_Vdroop_Enable_SHIFT  0
+#define BTCGB1_Vdroop_Enable_SHIFT  1
+#define AVFSGB0_Vdroop_Enable_SHIFT 2
+#define AVFSGB1_Vdroop_Enable_SHIFT 3
+
+
 #pragma pack(pop)
 
 
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
index 043b6ac09d5f..5dba7c509710 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
@@ -52,19 +52,18 @@
 static const SMU74_Discrete_GraphicsLevel avfs_graphics_level_polaris10[8] = {
 	/*  Min      pcie   DeepSleep Activity  CgSpll      CgSpll    CcPwr  CcPwr  Sclk         Enabled      Enabled                       Voltage    Power */
 	/* Voltage, DpmLevel, DivId,  Level,  FuncCntl3,  FuncCntl4,  DynRm, DynRm1 Did, Padding,ForActivity, ForThrottle, UpHyst, DownHyst, DownHyst, Throttle */
-	{ 0x3c0fd047, 0x00, 0x03, 0x1e00, 0x00200410, 0x87020000, 0, 0, 0x16, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0x30750000, 0, 0, 0, 0, 0, 0, 0 } },
-	{ 0xa00fd047, 0x01, 0x04, 0x1e00, 0x00800510, 0x87020000, 0, 0, 0x16, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0x409c0000, 0, 0, 0, 0, 0, 0, 0 } },
-	{ 0x0410d047, 0x01, 0x00, 0x1e00, 0x00600410, 0x87020000, 0, 0, 0x0e, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0x50c30000, 0, 0, 0, 0, 0, 0, 0 } },
-	{ 0x6810d047, 0x01, 0x00, 0x1e00, 0x00800410, 0x87020000, 0, 0, 0x0c, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0x60ea0000, 0, 0, 0, 0, 0, 0, 0 } },
-	{ 0xcc10d047, 0x01, 0x00, 0x1e00, 0x00e00410, 0x87020000, 0, 0, 0x0c, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0xe8fd0000, 0, 0, 0, 0, 0, 0, 0 } },
-	{ 0x3011d047, 0x01, 0x00, 0x1e00, 0x00400510, 0x87020000, 0, 0, 0x0c, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0x70110100, 0, 0, 0, 0, 0, 0, 0 } },
-	{ 0x9411d047, 0x01, 0x00, 0x1e00, 0x00a00510, 0x87020000, 0, 0, 0x0c, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0xf8240100, 0, 0, 0, 0, 0, 0, 0 } },
-	{ 0xf811d047, 0x01, 0x00, 0x1e00, 0x00000610, 0x87020000, 0, 0, 0x0c, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0x80380100, 0, 0, 0, 0, 0, 0, 0 } }
+	{ 0x100ea446, 0x00, 0x03, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x30750000, 0x3000, 0, 0x2600, 0, 0, 0x0004, 0x8f02, 0xffff, 0x2f00, 0x300e, 0x2700 } },
+	{ 0x400ea446, 0x01, 0x04, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x409c0000, 0x2000, 0, 0x1e00, 1, 1, 0x0004, 0x8300, 0xffff, 0x1f00, 0xcb5e, 0x1a00 } },
+	{ 0x740ea446, 0x01, 0x00, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x50c30000, 0x2800, 0, 0x2000, 1, 1, 0x0004, 0x0c02, 0xffff, 0x2700, 0x6433, 0x2100 } },
+	{ 0xa40ea446, 0x01, 0x00, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x60ea0000, 0x3000, 0, 0x2600, 1, 1, 0x0004, 0x8f02, 0xffff, 0x2f00, 0x300e, 0x2700 } },
+	{ 0xd80ea446, 0x01, 0x00, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x70110100, 0x3800, 0, 0x2c00, 1, 1, 0x0004, 0x1203, 0xffff, 0x3600, 0xc9e2, 0x2e00 } },
+	{ 0x3c0fa446, 0x01, 0x00, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x80380100, 0x2000, 0, 0x1e00, 2, 1, 0x0004, 0x8300, 0xffff, 0x1f00, 0xcb5e, 0x1a00 } },
+	{ 0x6c0fa446, 0x01, 0x00, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x905f0100, 0x2400, 0, 0x1e00, 2, 1, 0x0004, 0x8901, 0xffff, 0x2300, 0x314c, 0x1d00 } },
+	{ 0xa00fa446, 0x01, 0x00, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0xa0860100, 0x2800, 0, 0x2000, 2, 1, 0x0004, 0x0c02, 0xffff, 0x2700, 0x6433, 0x2100 } }
 };
 
 static const SMU74_Discrete_MemoryLevel avfs_memory_level_polaris10 =
-	{0x50140000, 0x50140000, 0x00320000, 0x00, 0x00,
-	 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x0000, 0x00, 0x00};
+	{0x100ea446, 0, 0x30750000, 0x01, 0x01, 0x01, 0x00, 0x00, 0x64, 0x00, 0x00, 0x1f00, 0x00, 0x00};
 
 /**
 * Set the address for reading/writing the SMC SRAM space.
@@ -219,6 +218,18 @@ bool polaris10_is_smc_ram_running(struct pp_smumgr *smumgr)
 	&& (0x20100 <= cgs_read_ind_register(smumgr->device, CGS_IND_REG__SMC, ixSMC_PC_C)));
 }
 
+static bool polaris10_is_hw_avfs_present(struct pp_smumgr *smumgr)
+{
+	uint32_t efuse;
+
+	efuse = cgs_read_ind_register(smumgr->device, CGS_IND_REG__SMC, ixSMU_EFUSE_0 + (49*4));
+	efuse &= 0x00000001;
+	if (efuse)
+		return true;
+
+	return false;
+}
+
 /**
 * Send a message to the SMC, and wait for its response.
 *
@@ -228,21 +239,27 @@ bool polaris10_is_smc_ram_running(struct pp_smumgr *smumgr)
 */
 int polaris10_send_msg_to_smc(struct pp_smumgr *smumgr, uint16_t msg)
 {
+	int ret;
+
 	if (!polaris10_is_smc_ram_running(smumgr))
 		return -1;
 
+
 	SMUM_WAIT_FIELD_UNEQUAL(smumgr, SMC_RESP_0, SMC_RESP, 0);
 
-	if (1 != SMUM_READ_FIELD(smumgr->device, SMC_RESP_0, SMC_RESP))
-		printk("Failed to send Previous Message.\n");
+	ret = SMUM_READ_FIELD(smumgr->device, SMC_RESP_0, SMC_RESP);
 
+	if (ret != 1)
+		printk("\n failed to send pre message %x ret is %d \n",  msg, ret);
 
 	cgs_write_register(smumgr->device, mmSMC_MESSAGE_0, msg);
 
 	SMUM_WAIT_FIELD_UNEQUAL(smumgr, SMC_RESP_0, SMC_RESP, 0);
 
-	if (1 != SMUM_READ_FIELD(smumgr->device, SMC_RESP_0, SMC_RESP))
-		printk("Failed to send Message.\n");
+	ret = SMUM_READ_FIELD(smumgr->device, SMC_RESP_0, SMC_RESP);
+
+	if (ret != 1)
+		printk("\n failed to send message %x ret is %d \n",  msg, ret);
 
 	return 0;
 }
@@ -953,6 +970,11 @@ static int polaris10_smu_init(struct pp_smumgr *smumgr)
 		(cgs_handle_t)smu_data->smu_buffer.handle);
 		return -1;);
 
+	if (polaris10_is_hw_avfs_present(smumgr))
+		smu_data->avfs.avfs_btc_status = AVFS_BTC_BOOT;
+	else
+		smu_data->avfs.avfs_btc_status = AVFS_BTC_NOTSUPPORTED;
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c
index 39802c0539b6..3d34fc4ca826 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c
@@ -266,9 +266,10 @@ int atmel_hlcdc_create_outputs(struct drm_device *dev)
 		if (!ret)
 			ret = atmel_hlcdc_check_endpoint(dev, &ep);
 
-		of_node_put(ep_np);
-		if (ret)
+		if (ret) {
+			of_node_put(ep_np);
 			return ret;
+		}
 	}
 
 	for_each_endpoint_of_node(dev->dev->of_node, ep_np) {
@@ -276,9 +277,10 @@ int atmel_hlcdc_create_outputs(struct drm_device *dev)
 		if (!ret)
 			ret = atmel_hlcdc_attach_endpoint(dev, &ep);
 
-		of_node_put(ep_np);
-		if (ret)
+		if (ret) {
+			of_node_put(ep_np);
 			return ret;
+		}
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
index aef3ca8a81fa..016c191221f3 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
@@ -339,6 +339,8 @@ atmel_hlcdc_plane_update_pos_and_size(struct atmel_hlcdc_plane *plane,
 
 		atmel_hlcdc_layer_update_cfg(&plane->layer, 13, 0xffffffff,
 					     factor_reg);
+	} else {
+		atmel_hlcdc_layer_update_cfg(&plane->layer, 13, 0xffffffff, 0);
 	}
 }
 
diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index c204ef32df16..9bb99e274d23 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -1296,14 +1296,39 @@ EXPORT_SYMBOL(drm_atomic_add_affected_planes);
  */
 void drm_atomic_legacy_backoff(struct drm_atomic_state *state)
 {
+	struct drm_device *dev = state->dev;
+	unsigned crtc_mask = 0;
+	struct drm_crtc *crtc;
 	int ret;
+	bool global = false;
+
+	drm_for_each_crtc(crtc, dev) {
+		if (crtc->acquire_ctx != state->acquire_ctx)
+			continue;
+
+		crtc_mask |= drm_crtc_mask(crtc);
+		crtc->acquire_ctx = NULL;
+	}
+
+	if (WARN_ON(dev->mode_config.acquire_ctx == state->acquire_ctx)) {
+		global = true;
+
+		dev->mode_config.acquire_ctx = NULL;
+	}
 
 retry:
 	drm_modeset_backoff(state->acquire_ctx);
 
-	ret = drm_modeset_lock_all_ctx(state->dev, state->acquire_ctx);
+	ret = drm_modeset_lock_all_ctx(dev, state->acquire_ctx);
 	if (ret)
 		goto retry;
+
+	drm_for_each_crtc(crtc, dev)
+		if (drm_crtc_mask(crtc) & crtc_mask)
+			crtc->acquire_ctx = state->acquire_ctx;
+
+	if (global)
+		dev->mode_config.acquire_ctx = state->acquire_ctx;
 }
 EXPORT_SYMBOL(drm_atomic_legacy_backoff);
 
diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
index a6e42433ef0e..26feb2f8453f 100644
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@@ -528,11 +528,11 @@ drm_crtc_helper_disable(struct drm_crtc *crtc)
 int drm_crtc_helper_set_config(struct drm_mode_set *set)
 {
 	struct drm_device *dev;
-	struct drm_crtc *new_crtc;
-	struct drm_encoder *save_encoders, *new_encoder, *encoder;
+	struct drm_crtc **save_encoder_crtcs, *new_crtc;
+	struct drm_encoder **save_connector_encoders, *new_encoder, *encoder;
 	bool mode_changed = false; /* if true do a full mode set */
 	bool fb_changed = false; /* if true and !mode_changed just do a flip */
-	struct drm_connector *save_connectors, *connector;
+	struct drm_connector *connector;
 	int count = 0, ro, fail = 0;
 	const struct drm_crtc_helper_funcs *crtc_funcs;
 	struct drm_mode_set save_set;
@@ -574,15 +574,15 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set)
 	 * Allocate space for the backup of all (non-pointer) encoder and
 	 * connector data.
 	 */
-	save_encoders = kzalloc(dev->mode_config.num_encoder *
-				sizeof(struct drm_encoder), GFP_KERNEL);
-	if (!save_encoders)
+	save_encoder_crtcs = kzalloc(dev->mode_config.num_encoder *
+				sizeof(struct drm_crtc *), GFP_KERNEL);
+	if (!save_encoder_crtcs)
 		return -ENOMEM;
 
-	save_connectors = kzalloc(dev->mode_config.num_connector *
-				sizeof(struct drm_connector), GFP_KERNEL);
-	if (!save_connectors) {
-		kfree(save_encoders);
+	save_connector_encoders = kzalloc(dev->mode_config.num_connector *
+				sizeof(struct drm_encoder *), GFP_KERNEL);
+	if (!save_connector_encoders) {
+		kfree(save_encoder_crtcs);
 		return -ENOMEM;
 	}
 
@@ -593,12 +593,12 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set)
 	 */
 	count = 0;
 	drm_for_each_encoder(encoder, dev) {
-		save_encoders[count++] = *encoder;
+		save_encoder_crtcs[count++] = encoder->crtc;
 	}
 
 	count = 0;
 	drm_for_each_connector(connector, dev) {
-		save_connectors[count++] = *connector;
+		save_connector_encoders[count++] = connector->encoder;
 	}
 
 	save_set.crtc = set->crtc;
@@ -631,8 +631,12 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set)
 		mode_changed = true;
 	}
 
-	/* take a reference on all connectors in set */
+	/* take a reference on all unbound connectors in set, reuse the
+	 * already taken reference for bound connectors
+	 */
 	for (ro = 0; ro < set->num_connectors; ro++) {
+		if (set->connectors[ro]->encoder)
+			continue;
 		drm_connector_reference(set->connectors[ro]);
 	}
 
@@ -754,30 +758,28 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set)
 		}
 	}
 
-	/* after fail drop reference on all connectors in save set */
-	count = 0;
-	drm_for_each_connector(connector, dev) {
-		drm_connector_unreference(&save_connectors[count++]);
-	}
-
-	kfree(save_connectors);
-	kfree(save_encoders);
+	kfree(save_connector_encoders);
+	kfree(save_encoder_crtcs);
 	return 0;
 
 fail:
 	/* Restore all previous data. */
 	count = 0;
 	drm_for_each_encoder(encoder, dev) {
-		*encoder = save_encoders[count++];
+		encoder->crtc = save_encoder_crtcs[count++];
 	}
 
 	count = 0;
 	drm_for_each_connector(connector, dev) {
-		*connector = save_connectors[count++];
+		connector->encoder = save_connector_encoders[count++];
 	}
 
-	/* after fail drop reference on all connectors in set */
+	/* after fail drop reference on all unbound connectors in set, let
+	 * bound connectors keep their reference
+	 */
 	for (ro = 0; ro < set->num_connectors; ro++) {
+		if (set->connectors[ro]->encoder)
+			continue;
 		drm_connector_unreference(set->connectors[ro]);
 	}
 
@@ -787,8 +789,8 @@ fail:
 				      save_set.y, save_set.fb))
 		DRM_ERROR("failed to restore config after modeset failure\n");
 
-	kfree(save_connectors);
-	kfree(save_encoders);
+	kfree(save_connector_encoders);
+	kfree(save_encoder_crtcs);
 	return ret;
 }
 EXPORT_SYMBOL(drm_crtc_helper_set_config);
diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
index a13edf5de2d6..6537908050d7 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -2927,11 +2927,9 @@ static void drm_dp_destroy_connector_work(struct work_struct *work)
 		drm_dp_port_teardown_pdt(port, port->pdt);
 
 		if (!port->input && port->vcpi.vcpi > 0) {
-			if (mgr->mst_state) {
-				drm_dp_mst_reset_vcpi_slots(mgr, port);
-				drm_dp_update_payload_part1(mgr);
-				drm_dp_mst_put_payload_id(mgr, port->vcpi.vcpi);
-			}
+			drm_dp_mst_reset_vcpi_slots(mgr, port);
+			drm_dp_update_payload_part1(mgr);
+			drm_dp_mst_put_payload_id(mgr, port->vcpi.vcpi);
 		}
 
 		kref_put(&port->kref, drm_dp_free_mst_port);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu.c b/drivers/gpu/drm/etnaviv/etnaviv_iommu.c
index 522cfd447892..16353ee81651 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_iommu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu.c
@@ -225,6 +225,7 @@ struct iommu_domain *etnaviv_iommu_domain_alloc(struct etnaviv_gpu *gpu)
 
 	etnaviv_domain->domain.type = __IOMMU_DOMAIN_PAGING;
 	etnaviv_domain->domain.ops = &etnaviv_iommu_ops.ops;
+	etnaviv_domain->domain.pgsize_bitmap = SZ_4K;
 	etnaviv_domain->domain.geometry.aperture_start = GPU_MEM_START;
 	etnaviv_domain->domain.geometry.aperture_end = GPU_MEM_START + PT_ENTRIES * SZ_4K - 1;
 
diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
index f6223f907c15..7f9901b7777b 100644
--- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
@@ -31,7 +31,6 @@
 #include "exynos_drm_plane.h"
 #include "exynos_drm_drv.h"
 #include "exynos_drm_fb.h"
-#include "exynos_drm_fbdev.h"
 #include "exynos_drm_iommu.h"
 
 /*
diff --git a/drivers/gpu/drm/exynos/exynos_dp.c b/drivers/gpu/drm/exynos/exynos_dp.c
index 468498e3fec1..4c1fb3f8b5a6 100644
--- a/drivers/gpu/drm/exynos/exynos_dp.c
+++ b/drivers/gpu/drm/exynos/exynos_dp.c
@@ -34,7 +34,7 @@
 
 struct exynos_dp_device {
 	struct drm_encoder         encoder;
-	struct drm_connector       connector;
+	struct drm_connector       *connector;
 	struct drm_bridge          *ptn_bridge;
 	struct drm_device          *drm_dev;
 	struct device              *dev;
@@ -70,7 +70,7 @@ static int exynos_dp_poweroff(struct analogix_dp_plat_data *plat_data)
 static int exynos_dp_get_modes(struct analogix_dp_plat_data *plat_data)
 {
 	struct exynos_dp_device *dp = to_dp(plat_data);
-	struct drm_connector *connector = &dp->connector;
+	struct drm_connector *connector = dp->connector;
 	struct drm_display_mode *mode;
 	int num_modes = 0;
 
@@ -103,6 +103,7 @@ static int exynos_dp_bridge_attach(struct analogix_dp_plat_data *plat_data,
 	int ret;
 
 	drm_connector_register(connector);
+	dp->connector = connector;
 
 	/* Pre-empt DP connector creation if there's a bridge */
 	if (dp->ptn_bridge) {
diff --git a/drivers/gpu/drm/exynos/exynos_drm_core.c b/drivers/gpu/drm/exynos/exynos_drm_core.c
index 011211e4167d..edbd98ff293e 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_core.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_core.c
@@ -15,7 +15,6 @@
 #include <drm/drmP.h>
 #include "exynos_drm_drv.h"
 #include "exynos_drm_crtc.h"
-#include "exynos_drm_fbdev.h"
 
 static LIST_HEAD(exynos_drm_subdrv_list);
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
index 3efe1aa89416..d47216488985 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
@@ -30,7 +30,6 @@
 
 #include "exynos_drm_drv.h"
 #include "exynos_drm_fb.h"
-#include "exynos_drm_fbdev.h"
 #include "exynos_drm_crtc.h"
 #include "exynos_drm_plane.h"
 #include "exynos_drm_iommu.h"
@@ -120,7 +119,6 @@ static struct fimd_driver_data s3c64xx_fimd_driver_data = {
 	.timing_base = 0x0,
 	.has_clksel = 1,
 	.has_limited_fmt = 1,
-	.has_hw_trigger = 1,
 };
 
 static struct fimd_driver_data exynos3_fimd_driver_data = {
@@ -171,14 +169,11 @@ static struct fimd_driver_data exynos5420_fimd_driver_data = {
 	.lcdblk_vt_shift = 24,
 	.lcdblk_bypass_shift = 15,
 	.lcdblk_mic_bypass_shift = 11,
-	.trg_type = I80_HW_TRG,
 	.has_shadowcon = 1,
 	.has_vidoutcon = 1,
 	.has_vtsel = 1,
 	.has_mic_bypass = 1,
 	.has_dp_clk = 1,
-	.has_hw_trigger = 1,
-	.has_trigger_per_te = 1,
 };
 
 struct fimd_context {
diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
index 493552368295..8564c3da0d22 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
@@ -48,13 +48,13 @@
 
 /* registers for base address */
 #define G2D_SRC_BASE_ADDR		0x0304
-#define G2D_SRC_STRIDE_REG		0x0308
+#define G2D_SRC_STRIDE			0x0308
 #define G2D_SRC_COLOR_MODE		0x030C
 #define G2D_SRC_LEFT_TOP		0x0310
 #define G2D_SRC_RIGHT_BOTTOM		0x0314
 #define G2D_SRC_PLANE2_BASE_ADDR	0x0318
 #define G2D_DST_BASE_ADDR		0x0404
-#define G2D_DST_STRIDE_REG		0x0408
+#define G2D_DST_STRIDE			0x0408
 #define G2D_DST_COLOR_MODE		0x040C
 #define G2D_DST_LEFT_TOP		0x0410
 #define G2D_DST_RIGHT_BOTTOM		0x0414
@@ -563,7 +563,7 @@ static enum g2d_reg_type g2d_get_reg_type(int reg_offset)
 
 	switch (reg_offset) {
 	case G2D_SRC_BASE_ADDR:
-	case G2D_SRC_STRIDE_REG:
+	case G2D_SRC_STRIDE:
 	case G2D_SRC_COLOR_MODE:
 	case G2D_SRC_LEFT_TOP:
 	case G2D_SRC_RIGHT_BOTTOM:
@@ -573,7 +573,7 @@ static enum g2d_reg_type g2d_get_reg_type(int reg_offset)
 		reg_type = REG_TYPE_SRC_PLANE2;
 		break;
 	case G2D_DST_BASE_ADDR:
-	case G2D_DST_STRIDE_REG:
+	case G2D_DST_STRIDE:
 	case G2D_DST_COLOR_MODE:
 	case G2D_DST_LEFT_TOP:
 	case G2D_DST_RIGHT_BOTTOM:
@@ -968,8 +968,8 @@ static int g2d_check_reg_offset(struct device *dev,
 			} else
 				buf_info->types[reg_type] = BUF_TYPE_GEM;
 			break;
-		case G2D_SRC_STRIDE_REG:
-		case G2D_DST_STRIDE_REG:
+		case G2D_SRC_STRIDE:
+		case G2D_DST_STRIDE:
 			if (for_addr)
 				goto err;
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_plane.c b/drivers/gpu/drm/exynos/exynos_drm_plane.c
index 55f1d37c666a..77f12c00abf9 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_plane.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_plane.c
@@ -242,7 +242,7 @@ exynos_drm_plane_check_size(const struct exynos_drm_plane_config *config,
 	    state->v_ratio == (1 << 15))
 		height_ok = true;
 
-	if (width_ok & height_ok)
+	if (width_ok && height_ok)
 		return 0;
 
 	DRM_DEBUG_KMS("scaling mode is not supported");
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 32690332d441..103546834b60 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2365,16 +2365,16 @@ static int i915_ppgtt_info(struct seq_file *m, void *data)
 		task = get_pid_task(file->pid, PIDTYPE_PID);
 		if (!task) {
 			ret = -ESRCH;
-			goto out_put;
+			goto out_unlock;
 		}
 		seq_printf(m, "\nproc: %s\n", task->comm);
 		put_task_struct(task);
 		idr_for_each(&file_priv->context_idr, per_file_ctx,
 			     (void *)(unsigned long)m);
 	}
+out_unlock:
 	mutex_unlock(&dev->filelist_mutex);
 
-out_put:
 	intel_runtime_pm_put(dev_priv);
 	mutex_unlock(&dev->struct_mutex);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5faacc6e548d..7c334e902266 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3481,6 +3481,7 @@ int intel_bios_init(struct drm_i915_private *dev_priv);
 bool intel_bios_is_valid_vbt(const void *buf, size_t size);
 bool intel_bios_is_tv_present(struct drm_i915_private *dev_priv);
 bool intel_bios_is_lvds_present(struct drm_i915_private *dev_priv, u8 *i2c_pin);
+bool intel_bios_is_port_present(struct drm_i915_private *dev_priv, enum port port);
 bool intel_bios_is_port_edp(struct drm_i915_private *dev_priv, enum port port);
 bool intel_bios_is_port_dp_dual_mode(struct drm_i915_private *dev_priv, enum port port);
 bool intel_bios_is_dsi_present(struct drm_i915_private *dev_priv, enum port *port);
diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c
index b235b6e88ead..b9022fa053d6 100644
--- a/drivers/gpu/drm/i915/intel_bios.c
+++ b/drivers/gpu/drm/i915/intel_bios.c
@@ -139,6 +139,11 @@ fill_detail_timing_data(struct drm_display_mode *panel_fixed_mode,
 	else
 		panel_fixed_mode->flags |= DRM_MODE_FLAG_NVSYNC;
 
+	panel_fixed_mode->width_mm = (dvo_timing->himage_hi << 8) |
+		dvo_timing->himage_lo;
+	panel_fixed_mode->height_mm = (dvo_timing->vimage_hi << 8) |
+		dvo_timing->vimage_lo;
+
 	/* Some VBTs have bogus h/vtotal values */
 	if (panel_fixed_mode->hsync_end > panel_fixed_mode->htotal)
 		panel_fixed_mode->htotal = panel_fixed_mode->hsync_end + 1;
@@ -1187,7 +1192,7 @@ parse_device_mapping(struct drm_i915_private *dev_priv,
 	}
 	if (bdb->version < 106) {
 		expected_size = 22;
-	} else if (bdb->version < 109) {
+	} else if (bdb->version < 111) {
 		expected_size = 27;
 	} else if (bdb->version < 195) {
 		BUILD_BUG_ON(sizeof(struct old_child_dev_config) != 33);
@@ -1546,6 +1551,45 @@ bool intel_bios_is_lvds_present(struct drm_i915_private *dev_priv, u8 *i2c_pin)
 }
 
 /**
+ * intel_bios_is_port_present - is the specified digital port present
+ * @dev_priv:	i915 device instance
+ * @port:	port to check
+ *
+ * Return true if the device in %port is present.
+ */
+bool intel_bios_is_port_present(struct drm_i915_private *dev_priv, enum port port)
+{
+	static const struct {
+		u16 dp, hdmi;
+	} port_mapping[] = {
+		[PORT_B] = { DVO_PORT_DPB, DVO_PORT_HDMIB, },
+		[PORT_C] = { DVO_PORT_DPC, DVO_PORT_HDMIC, },
+		[PORT_D] = { DVO_PORT_DPD, DVO_PORT_HDMID, },
+		[PORT_E] = { DVO_PORT_DPE, DVO_PORT_HDMIE, },
+	};
+	int i;
+
+	/* FIXME maybe deal with port A as well? */
+	if (WARN_ON(port == PORT_A) || port >= ARRAY_SIZE(port_mapping))
+		return false;
+
+	if (!dev_priv->vbt.child_dev_num)
+		return false;
+
+	for (i = 0; i < dev_priv->vbt.child_dev_num; i++) {
+		const union child_device_config *p_child =
+			&dev_priv->vbt.child_dev[i];
+		if ((p_child->common.dvo_port == port_mapping[port].dp ||
+		     p_child->common.dvo_port == port_mapping[port].hdmi) &&
+		    (p_child->common.device_type & (DEVICE_TYPE_TMDS_DVI_SIGNALING |
+						    DEVICE_TYPE_DISPLAYPORT_OUTPUT)))
+			return true;
+	}
+
+	return false;
+}
+
+/**
  * intel_bios_is_port_edp - is the device in given port eDP
  * @dev_priv:	i915 device instance
  * @port:	port to check
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 2113f401f0ba..04452cf3eae8 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -8275,12 +8275,14 @@ static void ironlake_init_pch_refclk(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_encoder *encoder;
+	int i;
 	u32 val, final;
 	bool has_lvds = false;
 	bool has_cpu_edp = false;
 	bool has_panel = false;
 	bool has_ck505 = false;
 	bool can_ssc = false;
+	bool using_ssc_source = false;
 
 	/* We need to take the global config into account */
 	for_each_intel_encoder(dev, encoder) {
@@ -8307,8 +8309,22 @@ static void ironlake_init_pch_refclk(struct drm_device *dev)
 		can_ssc = true;
 	}
 
-	DRM_DEBUG_KMS("has_panel %d has_lvds %d has_ck505 %d\n",
-		      has_panel, has_lvds, has_ck505);
+	/* Check if any DPLLs are using the SSC source */
+	for (i = 0; i < dev_priv->num_shared_dpll; i++) {
+		u32 temp = I915_READ(PCH_DPLL(i));
+
+		if (!(temp & DPLL_VCO_ENABLE))
+			continue;
+
+		if ((temp & PLL_REF_INPUT_MASK) ==
+		    PLLB_REF_INPUT_SPREADSPECTRUMIN) {
+			using_ssc_source = true;
+			break;
+		}
+	}
+
+	DRM_DEBUG_KMS("has_panel %d has_lvds %d has_ck505 %d using_ssc_source %d\n",
+		      has_panel, has_lvds, has_ck505, using_ssc_source);
 
 	/* Ironlake: try to setup display ref clock before DPLL
 	 * enabling. This is only under driver's control after
@@ -8345,9 +8361,9 @@ static void ironlake_init_pch_refclk(struct drm_device *dev)
 				final |= DREF_CPU_SOURCE_OUTPUT_NONSPREAD;
 		} else
 			final |= DREF_CPU_SOURCE_OUTPUT_DISABLE;
-	} else {
-		final |= DREF_SSC_SOURCE_DISABLE;
-		final |= DREF_CPU_SOURCE_OUTPUT_DISABLE;
+	} else if (using_ssc_source) {
+		final |= DREF_SSC_SOURCE_ENABLE;
+		final |= DREF_SSC1_ENABLE;
 	}
 
 	if (final == val)
@@ -8393,7 +8409,7 @@ static void ironlake_init_pch_refclk(struct drm_device *dev)
 		POSTING_READ(PCH_DREF_CONTROL);
 		udelay(200);
 	} else {
-		DRM_DEBUG_KMS("Disabling SSC entirely\n");
+		DRM_DEBUG_KMS("Disabling CPU source output\n");
 
 		val &= ~DREF_CPU_SOURCE_OUTPUT_MASK;
 
@@ -8404,16 +8420,20 @@ static void ironlake_init_pch_refclk(struct drm_device *dev)
 		POSTING_READ(PCH_DREF_CONTROL);
 		udelay(200);
 
-		/* Turn off the SSC source */
-		val &= ~DREF_SSC_SOURCE_MASK;
-		val |= DREF_SSC_SOURCE_DISABLE;
+		if (!using_ssc_source) {
+			DRM_DEBUG_KMS("Disabling SSC source\n");
 
-		/* Turn off SSC1 */
-		val &= ~DREF_SSC1_ENABLE;
+			/* Turn off the SSC source */
+			val &= ~DREF_SSC_SOURCE_MASK;
+			val |= DREF_SSC_SOURCE_DISABLE;
 
-		I915_WRITE(PCH_DREF_CONTROL, val);
-		POSTING_READ(PCH_DREF_CONTROL);
-		udelay(200);
+			/* Turn off SSC1 */
+			val &= ~DREF_SSC1_ENABLE;
+
+			I915_WRITE(PCH_DREF_CONTROL, val);
+			POSTING_READ(PCH_DREF_CONTROL);
+			udelay(200);
+		}
 	}
 
 	BUG_ON(val != final);
@@ -8427,16 +8447,16 @@ static void lpt_reset_fdi_mphy(struct drm_i915_private *dev_priv)
 	tmp |= FDI_MPHY_IOSFSB_RESET_CTL;
 	I915_WRITE(SOUTH_CHICKEN2, tmp);
 
-	if (wait_for_atomic_us(I915_READ(SOUTH_CHICKEN2) &
-			       FDI_MPHY_IOSFSB_RESET_STATUS, 100))
+	if (wait_for_us(I915_READ(SOUTH_CHICKEN2) &
+			FDI_MPHY_IOSFSB_RESET_STATUS, 100))
 		DRM_ERROR("FDI mPHY reset assert timeout\n");
 
 	tmp = I915_READ(SOUTH_CHICKEN2);
 	tmp &= ~FDI_MPHY_IOSFSB_RESET_CTL;
 	I915_WRITE(SOUTH_CHICKEN2, tmp);
 
-	if (wait_for_atomic_us((I915_READ(SOUTH_CHICKEN2) &
-				FDI_MPHY_IOSFSB_RESET_STATUS) == 0, 100))
+	if (wait_for_us((I915_READ(SOUTH_CHICKEN2) &
+			 FDI_MPHY_IOSFSB_RESET_STATUS) == 0, 100))
 		DRM_ERROR("FDI mPHY reset de-assert timeout\n");
 }
 
@@ -9420,8 +9440,8 @@ static void hsw_disable_lcpll(struct drm_i915_private *dev_priv,
 		val |= LCPLL_CD_SOURCE_FCLK;
 		I915_WRITE(LCPLL_CTL, val);
 
-		if (wait_for_atomic_us(I915_READ(LCPLL_CTL) &
-				       LCPLL_CD_SOURCE_FCLK_DONE, 1))
+		if (wait_for_us(I915_READ(LCPLL_CTL) &
+				LCPLL_CD_SOURCE_FCLK_DONE, 1))
 			DRM_ERROR("Switching to FCLK failed\n");
 
 		val = I915_READ(LCPLL_CTL);
@@ -9494,8 +9514,8 @@ static void hsw_restore_lcpll(struct drm_i915_private *dev_priv)
 		val &= ~LCPLL_CD_SOURCE_FCLK;
 		I915_WRITE(LCPLL_CTL, val);
 
-		if (wait_for_atomic_us((I915_READ(LCPLL_CTL) &
-					LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1))
+		if (wait_for_us((I915_READ(LCPLL_CTL) &
+				 LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1))
 			DRM_ERROR("Switching back to LCPLL failed\n");
 	}
 
@@ -14554,6 +14574,8 @@ static void intel_setup_outputs(struct drm_device *dev)
 		if (I915_READ(PCH_DP_D) & DP_DETECTED)
 			intel_dp_init(dev, PCH_DP_D, PORT_D);
 	} else if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) {
+		bool has_edp, has_port;
+
 		/*
 		 * The DP_DETECTED bit is the latched state of the DDC
 		 * SDA pin at boot. However since eDP doesn't require DDC
@@ -14562,27 +14584,37 @@ static void intel_setup_outputs(struct drm_device *dev)
 		 * Thus we can't rely on the DP_DETECTED bit alone to detect
 		 * eDP ports. Consult the VBT as well as DP_DETECTED to
 		 * detect eDP ports.
+		 *
+		 * Sadly the straps seem to be missing sometimes even for HDMI
+		 * ports (eg. on Voyo V3 - CHT x7-Z8700), so check both strap
+		 * and VBT for the presence of the port. Additionally we can't
+		 * trust the port type the VBT declares as we've seen at least
+		 * HDMI ports that the VBT claim are DP or eDP.
 		 */
-		if (I915_READ(VLV_HDMIB) & SDVO_DETECTED &&
-		    !intel_dp_is_edp(dev, PORT_B))
+		has_edp = intel_dp_is_edp(dev, PORT_B);
+		has_port = intel_bios_is_port_present(dev_priv, PORT_B);
+		if (I915_READ(VLV_DP_B) & DP_DETECTED || has_port)
+			has_edp &= intel_dp_init(dev, VLV_DP_B, PORT_B);
+		if ((I915_READ(VLV_HDMIB) & SDVO_DETECTED || has_port) && !has_edp)
 			intel_hdmi_init(dev, VLV_HDMIB, PORT_B);
-		if (I915_READ(VLV_DP_B) & DP_DETECTED ||
-		    intel_dp_is_edp(dev, PORT_B))
-			intel_dp_init(dev, VLV_DP_B, PORT_B);
 
-		if (I915_READ(VLV_HDMIC) & SDVO_DETECTED &&
-		    !intel_dp_is_edp(dev, PORT_C))
+		has_edp = intel_dp_is_edp(dev, PORT_C);
+		has_port = intel_bios_is_port_present(dev_priv, PORT_C);
+		if (I915_READ(VLV_DP_C) & DP_DETECTED || has_port)
+			has_edp &= intel_dp_init(dev, VLV_DP_C, PORT_C);
+		if ((I915_READ(VLV_HDMIC) & SDVO_DETECTED || has_port) && !has_edp)
 			intel_hdmi_init(dev, VLV_HDMIC, PORT_C);
-		if (I915_READ(VLV_DP_C) & DP_DETECTED ||
-		    intel_dp_is_edp(dev, PORT_C))
-			intel_dp_init(dev, VLV_DP_C, PORT_C);
 
 		if (IS_CHERRYVIEW(dev)) {
-			/* eDP not supported on port D, so don't check VBT */
-			if (I915_READ(CHV_HDMID) & SDVO_DETECTED)
-				intel_hdmi_init(dev, CHV_HDMID, PORT_D);
-			if (I915_READ(CHV_DP_D) & DP_DETECTED)
+			/*
+			 * eDP not supported on port D,
+			 * so no need to worry about it
+			 */
+			has_port = intel_bios_is_port_present(dev_priv, PORT_D);
+			if (I915_READ(CHV_DP_D) & DP_DETECTED || has_port)
 				intel_dp_init(dev, CHV_DP_D, PORT_D);
+			if (I915_READ(CHV_HDMID) & SDVO_DETECTED || has_port)
+				intel_hdmi_init(dev, CHV_HDMID, PORT_D);
 		}
 
 		intel_dsi_init(dev);
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index f192f58708c2..40745e38d438 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -663,7 +663,7 @@ intel_dp_aux_wait_done(struct intel_dp *intel_dp, bool has_aux_irq)
 		done = wait_event_timeout(dev_priv->gmbus_wait_queue, C,
 					  msecs_to_jiffies_timeout(10));
 	else
-		done = wait_for_atomic(C, 10) == 0;
+		done = wait_for(C, 10) == 0;
 	if (!done)
 		DRM_ERROR("dp aux hw did not signal timeout (has irq: %i)!\n",
 			  has_aux_irq);
@@ -4899,13 +4899,15 @@ static void intel_edp_panel_vdd_sanitize(struct intel_dp *intel_dp)
 
 void intel_dp_encoder_reset(struct drm_encoder *encoder)
 {
-	struct intel_dp *intel_dp;
+	struct drm_i915_private *dev_priv = to_i915(encoder->dev);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
+
+	if (!HAS_DDI(dev_priv))
+		intel_dp->DP = I915_READ(intel_dp->output_reg);
 
 	if (to_intel_encoder(encoder)->type != INTEL_OUTPUT_EDP)
 		return;
 
-	intel_dp = enc_to_intel_dp(encoder);
-
 	pps_lock(intel_dp);
 
 	/*
@@ -4977,9 +4979,6 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd)
 	intel_display_power_get(dev_priv, power_domain);
 
 	if (long_hpd) {
-		/* indicate that we need to restart link training */
-		intel_dp->train_set_valid = false;
-
 		intel_dp_long_pulse(intel_dp->attached_connector);
 		if (intel_dp->is_mst)
 			ret = IRQ_HANDLED;
@@ -5725,8 +5724,11 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp,
 	if (!fixed_mode && dev_priv->vbt.lfp_lvds_vbt_mode) {
 		fixed_mode = drm_mode_duplicate(dev,
 					dev_priv->vbt.lfp_lvds_vbt_mode);
-		if (fixed_mode)
+		if (fixed_mode) {
 			fixed_mode->type |= DRM_MODE_TYPE_PREFERRED;
+			connector->display_info.width_mm = fixed_mode->width_mm;
+			connector->display_info.height_mm = fixed_mode->height_mm;
+		}
 	}
 	mutex_unlock(&dev->mode_config.mutex);
 
@@ -5923,9 +5925,9 @@ fail:
 	return false;
 }
 
-void
-intel_dp_init(struct drm_device *dev,
-	      i915_reg_t output_reg, enum port port)
+bool intel_dp_init(struct drm_device *dev,
+		   i915_reg_t output_reg,
+		   enum port port)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_digital_port *intel_dig_port;
@@ -5935,7 +5937,7 @@ intel_dp_init(struct drm_device *dev,
 
 	intel_dig_port = kzalloc(sizeof(*intel_dig_port), GFP_KERNEL);
 	if (!intel_dig_port)
-		return;
+		return false;
 
 	intel_connector = intel_connector_alloc();
 	if (!intel_connector)
@@ -5992,7 +5994,7 @@ intel_dp_init(struct drm_device *dev,
 	if (!intel_dp_init_connector(intel_dig_port, intel_connector))
 		goto err_init_connector;
 
-	return;
+	return true;
 
 err_init_connector:
 	drm_encoder_cleanup(encoder);
@@ -6000,8 +6002,7 @@ err_encoder_init:
 	kfree(intel_connector);
 err_connector_alloc:
 	kfree(intel_dig_port);
-
-	return;
+	return false;
 }
 
 void intel_dp_mst_suspend(struct drm_device *dev)
diff --git a/drivers/gpu/drm/i915/intel_dp_link_training.c b/drivers/gpu/drm/i915/intel_dp_link_training.c
index 0b8eefc2acc5..60fb39cd220b 100644
--- a/drivers/gpu/drm/i915/intel_dp_link_training.c
+++ b/drivers/gpu/drm/i915/intel_dp_link_training.c
@@ -85,8 +85,7 @@ static bool
 intel_dp_reset_link_train(struct intel_dp *intel_dp,
 			uint8_t dp_train_pat)
 {
-	if (!intel_dp->train_set_valid)
-		memset(intel_dp->train_set, 0, sizeof(intel_dp->train_set));
+	memset(intel_dp->train_set, 0, sizeof(intel_dp->train_set));
 	intel_dp_set_signal_levels(intel_dp);
 	return intel_dp_set_link_train(intel_dp, dp_train_pat);
 }
@@ -161,23 +160,6 @@ intel_dp_link_training_clock_recovery(struct intel_dp *intel_dp)
 			break;
 		}
 
-		/*
-		 * if we used previously trained voltage and pre-emphasis values
-		 * and we don't get clock recovery, reset link training values
-		 */
-		if (intel_dp->train_set_valid) {
-			DRM_DEBUG_KMS("clock recovery not ok, reset");
-			/* clear the flag as we are not reusing train set */
-			intel_dp->train_set_valid = false;
-			if (!intel_dp_reset_link_train(intel_dp,
-						       DP_TRAINING_PATTERN_1 |
-						       DP_LINK_SCRAMBLING_DISABLE)) {
-				DRM_ERROR("failed to enable link training\n");
-				return;
-			}
-			continue;
-		}
-
 		/* Check to see if we've tried the max voltage */
 		for (i = 0; i < intel_dp->lane_count; i++)
 			if ((intel_dp->train_set[i] & DP_TRAIN_MAX_SWING_REACHED) == 0)
@@ -284,7 +266,6 @@ intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp)
 		/* Make sure clock is still ok */
 		if (!drm_dp_clock_recovery_ok(link_status,
 					      intel_dp->lane_count)) {
-			intel_dp->train_set_valid = false;
 			intel_dp_link_training_clock_recovery(intel_dp);
 			intel_dp_set_link_train(intel_dp,
 						training_pattern |
@@ -301,7 +282,6 @@ intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp)
 
 		/* Try 5 times, then try clock recovery if that fails */
 		if (tries > 5) {
-			intel_dp->train_set_valid = false;
 			intel_dp_link_training_clock_recovery(intel_dp);
 			intel_dp_set_link_train(intel_dp,
 						training_pattern |
@@ -322,10 +302,8 @@ intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp)
 
 	intel_dp_set_idle_link_train(intel_dp);
 
-	if (channel_eq) {
-		intel_dp->train_set_valid = true;
+	if (channel_eq)
 		DRM_DEBUG_KMS("Channel EQ done. DP Training successful\n");
-	}
 }
 
 void intel_dp_stop_link_train(struct intel_dp *intel_dp)
diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c
index 3ac705936b04..58f60b27837e 100644
--- a/drivers/gpu/drm/i915/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c
@@ -366,6 +366,9 @@ ibx_get_dpll(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state,
 					     DPLL_ID_PCH_PLL_B);
 	}
 
+	if (!pll)
+		return NULL;
+
 	/* reference the pll */
 	intel_reference_shared_dpll(pll, crtc_state);
 
@@ -1374,8 +1377,8 @@ static void bxt_ddi_pll_enable(struct drm_i915_private *dev_priv,
 	I915_WRITE(BXT_PORT_PLL_ENABLE(port), temp);
 	POSTING_READ(BXT_PORT_PLL_ENABLE(port));
 
-	if (wait_for_atomic_us((I915_READ(BXT_PORT_PLL_ENABLE(port)) &
-			PORT_PLL_LOCK), 200))
+	if (wait_for_us((I915_READ(BXT_PORT_PLL_ENABLE(port)) & PORT_PLL_LOCK),
+			200))
 		DRM_ERROR("PLL %d not locked\n", port);
 
 	/*
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index a28b4aac1e02..f7f0f01814f6 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -863,8 +863,6 @@ struct intel_dp {
 	/* This is called before a link training is starterd */
 	void (*prepare_link_retrain)(struct intel_dp *intel_dp);
 
-	bool train_set_valid;
-
 	/* Displayport compliance testing */
 	unsigned long compliance_test_type;
 	unsigned long compliance_test_data;
@@ -1284,7 +1282,7 @@ void intel_csr_ucode_suspend(struct drm_i915_private *);
 void intel_csr_ucode_resume(struct drm_i915_private *);
 
 /* intel_dp.c */
-void intel_dp_init(struct drm_device *dev, i915_reg_t output_reg, enum port port);
+bool intel_dp_init(struct drm_device *dev, i915_reg_t output_reg, enum port port);
 bool intel_dp_init_connector(struct intel_digital_port *intel_dig_port,
 			     struct intel_connector *intel_connector);
 void intel_dp_set_link_params(struct intel_dp *intel_dp,
diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c
index 366ad6c67ce4..4756ef639648 100644
--- a/drivers/gpu/drm/i915/intel_dsi.c
+++ b/drivers/gpu/drm/i915/intel_dsi.c
@@ -1545,6 +1545,9 @@ void intel_dsi_init(struct drm_device *dev)
 		goto err;
 	}
 
+	connector->display_info.width_mm = fixed_mode->width_mm;
+	connector->display_info.height_mm = fixed_mode->height_mm;
+
 	intel_panel_init(&intel_connector->panel, fixed_mode, NULL);
 
 	intel_dsi_add_properties(intel_connector);
diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
index d5a7cfec589b..647127f3aaff 100644
--- a/drivers/gpu/drm/i915/intel_fbc.c
+++ b/drivers/gpu/drm/i915/intel_fbc.c
@@ -824,8 +824,7 @@ static bool intel_fbc_can_choose(struct intel_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
 	struct intel_fbc *fbc = &dev_priv->fbc;
-	bool enable_by_default = IS_HASWELL(dev_priv) ||
-				 IS_BROADWELL(dev_priv);
+	bool enable_by_default = IS_BROADWELL(dev_priv);
 
 	if (intel_vgpu_active(dev_priv->dev)) {
 		fbc->no_fbc_reason = "VGPU is active";
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index 2c3bd9c2573e..a8844702d11b 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -2142,6 +2142,9 @@ void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port,
 	enum port port = intel_dig_port->port;
 	uint8_t alternate_ddc_pin;
 
+	DRM_DEBUG_KMS("Adding HDMI connector on port %c\n",
+		      port_name(port));
+
 	if (WARN(intel_dig_port->max_lanes < 4,
 		 "Not enough lanes (%d) for HDMI on port %c\n",
 		 intel_dig_port->max_lanes, port_name(port)))
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index bc53c0dd34d0..96281e628d2a 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -1082,6 +1082,8 @@ void intel_lvds_init(struct drm_device *dev)
 		fixed_mode = drm_mode_duplicate(dev, dev_priv->vbt.lfp_lvds_vbt_mode);
 		if (fixed_mode) {
 			fixed_mode->type |= DRM_MODE_TYPE_PREFERRED;
+			connector->display_info.width_mm = fixed_mode->width_mm;
+			connector->display_info.height_mm = fixed_mode->height_mm;
 			goto out;
 		}
 	}
diff --git a/drivers/gpu/drm/i915/intel_vbt_defs.h b/drivers/gpu/drm/i915/intel_vbt_defs.h
index c15051de8023..44fb0b35eed3 100644
--- a/drivers/gpu/drm/i915/intel_vbt_defs.h
+++ b/drivers/gpu/drm/i915/intel_vbt_defs.h
@@ -403,9 +403,10 @@ struct lvds_dvo_timing {
 	u8 vsync_off:4;
 	u8 rsvd0:6;
 	u8 hsync_off_hi:2;
-	u8 h_image;
-	u8 v_image;
-	u8 max_hv;
+	u8 himage_lo;
+	u8 vimage_lo;
+	u8 vimage_hi:4;
+	u8 himage_hi:4;
 	u8 h_border;
 	u8 v_border;
 	u8 rsvd1:3;
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
index 300ea03be8f0..d1f248fd3506 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
@@ -552,7 +552,8 @@ nouveau_fbcon_init(struct drm_device *dev)
 	if (ret)
 		goto fini;
 
-	fbcon->helper.fbdev->pixmap.buf_align = 4;
+	if (fbcon->helper.fbdev)
+		fbcon->helper.fbdev->pixmap.buf_align = 4;
 	return 0;
 
 fini:
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
index 18fab3973ce5..62ad0300cfa5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
@@ -1614,7 +1614,7 @@ nvkm_device_pci_func = {
 	.fini = nvkm_device_pci_fini,
 	.resource_addr = nvkm_device_pci_resource_addr,
 	.resource_size = nvkm_device_pci_resource_size,
-	.cpu_coherent = !IS_ENABLED(CONFIG_ARM) && !IS_ENABLED(CONFIG_ARM64),
+	.cpu_coherent = !IS_ENABLED(CONFIG_ARM),
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/base.c
index 323c79abe468..41bd5d0f7692 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/base.c
@@ -276,6 +276,8 @@ nvkm_iccsense_oneinit(struct nvkm_subdev *subdev)
 		struct pwr_rail_t *r = &stbl.rail[i];
 		struct nvkm_iccsense_rail *rail;
 		struct nvkm_iccsense_sensor *sensor;
+		int (*read)(struct nvkm_iccsense *,
+			    struct nvkm_iccsense_rail *);
 
 		if (!r->mode || r->resistor_mohm == 0)
 			continue;
@@ -284,31 +286,31 @@ nvkm_iccsense_oneinit(struct nvkm_subdev *subdev)
 		if (!sensor)
 			continue;
 
-		rail = kmalloc(sizeof(*rail), GFP_KERNEL);
-		if (!rail)
-			return -ENOMEM;
-
 		switch (sensor->type) {
 		case NVBIOS_EXTDEV_INA209:
 			if (r->rail != 0)
 				continue;
-			rail->read = nvkm_iccsense_ina209_read;
+			read = nvkm_iccsense_ina209_read;
 			break;
 		case NVBIOS_EXTDEV_INA219:
 			if (r->rail != 0)
 				continue;
-			rail->read = nvkm_iccsense_ina219_read;
+			read = nvkm_iccsense_ina219_read;
 			break;
 		case NVBIOS_EXTDEV_INA3221:
 			if (r->rail >= 3)
 				continue;
-			rail->read = nvkm_iccsense_ina3221_read;
+			read = nvkm_iccsense_ina3221_read;
 			break;
 		default:
 			continue;
 		}
 
+		rail = kmalloc(sizeof(*rail), GFP_KERNEL);
+		if (!rail)
+			return -ENOMEM;
 		sensor->rail_mask |= 1 << r->rail;
+		rail->read = read;
 		rail->sensor = sensor;
 		rail->idx = r->rail;
 		rail->mohm = r->resistor_mohm;
diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index 2e216e2ea78c..259cd6e6d71c 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -589,7 +589,8 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc,
 		if (ASIC_IS_DCE41(rdev) || ASIC_IS_DCE61(rdev) || ASIC_IS_DCE8(rdev))
 			radeon_crtc->pll_flags |= RADEON_PLL_USE_FRAC_FB_DIV;
 		/* use frac fb div on RS780/RS880 */
-		if ((rdev->family == CHIP_RS780) || (rdev->family == CHIP_RS880))
+		if (((rdev->family == CHIP_RS780) || (rdev->family == CHIP_RS880))
+		    && !radeon_crtc->ss_enabled)
 			radeon_crtc->pll_flags |= RADEON_PLL_USE_FRAC_FB_DIV;
 		if (ASIC_IS_DCE32(rdev) && mode->clock > 165000)
 			radeon_crtc->pll_flags |= RADEON_PLL_USE_FRAC_FB_DIV;
@@ -626,7 +627,7 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc,
 			if (radeon_crtc->ss.refdiv) {
 				radeon_crtc->pll_flags |= RADEON_PLL_USE_REF_DIV;
 				radeon_crtc->pll_reference_div = radeon_crtc->ss.refdiv;
-				if (ASIC_IS_AVIVO(rdev))
+				if (rdev->family >= CHIP_RV770)
 					radeon_crtc->pll_flags |= RADEON_PLL_USE_FRAC_FB_DIV;
 			}
 		}
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index e721e6b2766e..21c44b2293bc 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -630,6 +630,23 @@ void radeon_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc)
 /*
  * GPU helpers function.
  */
+
+/**
+ * radeon_device_is_virtual - check if we are running is a virtual environment
+ *
+ * Check if the asic has been passed through to a VM (all asics).
+ * Used at driver startup.
+ * Returns true if virtual or false if not.
+ */
+static bool radeon_device_is_virtual(void)
+{
+#ifdef CONFIG_X86
+	return boot_cpu_has(X86_FEATURE_HYPERVISOR);
+#else
+	return false;
+#endif
+}
+
 /**
  * radeon_card_posted - check if the hw has already been initialized
  *
@@ -643,6 +660,10 @@ bool radeon_card_posted(struct radeon_device *rdev)
 {
 	uint32_t reg;
 
+	/* for pass through, always force asic_init */
+	if (radeon_device_is_virtual())
+		return false;
+
 	/* required for EFI mode on macbook2,1 which uses an r5xx asic */
 	if (efi_enabled(EFI_BOOT) &&
 	    (rdev->pdev->subsystem_vendor == PCI_VENDOR_ID_APPLE) &&
@@ -1631,7 +1652,7 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend,
 	radeon_agp_suspend(rdev);
 
 	pci_save_state(dev->pdev);
-	if (freeze && rdev->family >= CHIP_R600) {
+	if (freeze && rdev->family >= CHIP_CEDAR) {
 		rdev->asic->asic_reset(rdev, true);
 		pci_restore_state(dev->pdev);
 	} else if (suspend) {
diff --git a/drivers/gpu/drm/sun4i/Kconfig b/drivers/gpu/drm/sun4i/Kconfig
index 99510e64e91a..a4b357db8856 100644
--- a/drivers/gpu/drm/sun4i/Kconfig
+++ b/drivers/gpu/drm/sun4i/Kconfig
@@ -1,6 +1,6 @@
 config DRM_SUN4I
 	tristate "DRM Support for Allwinner A10 Display Engine"
-	depends on DRM && ARM
+	depends on DRM && ARM && COMMON_CLK
 	depends on ARCH_SUNXI || COMPILE_TEST
 	select DRM_GEM_CMA_HELPER
 	select DRM_KMS_HELPER
diff --git a/drivers/gpu/drm/sun4i/sun4i_backend.c b/drivers/gpu/drm/sun4i/sun4i_backend.c
index f7a15c1a93bf..3ab560450a82 100644
--- a/drivers/gpu/drm/sun4i/sun4i_backend.c
+++ b/drivers/gpu/drm/sun4i/sun4i_backend.c
@@ -190,7 +190,7 @@ int sun4i_backend_update_layer_buffer(struct sun4i_backend *backend,
 	/* Get the physical address of the buffer in memory */
 	gem = drm_fb_cma_get_gem_obj(fb, 0);
 
-	DRM_DEBUG_DRIVER("Using GEM @ 0x%x\n", gem->paddr);
+	DRM_DEBUG_DRIVER("Using GEM @ %pad\n", &gem->paddr);
 
 	/* Compute the start of the displayed memory */
 	bpp = drm_format_plane_cpp(fb->pixel_format, 0);
@@ -198,7 +198,7 @@ int sun4i_backend_update_layer_buffer(struct sun4i_backend *backend,
 	paddr += (state->src_x >> 16) * bpp;
 	paddr += (state->src_y >> 16) * fb->pitches[0];
 
-	DRM_DEBUG_DRIVER("Setting buffer address to 0x%x\n", paddr);
+	DRM_DEBUG_DRIVER("Setting buffer address to %pad\n", &paddr);
 
 	/* Write the 32 lower bits of the address (in bits) */
 	lo_paddr = paddr << 3;
diff --git a/drivers/gpu/drm/sun4i/sun4i_dotclock.c b/drivers/gpu/drm/sun4i/sun4i_dotclock.c
index 3ff668cb463c..5b3463197c48 100644
--- a/drivers/gpu/drm/sun4i/sun4i_dotclock.c
+++ b/drivers/gpu/drm/sun4i/sun4i_dotclock.c
@@ -72,14 +72,40 @@ static unsigned long sun4i_dclk_recalc_rate(struct clk_hw *hw,
 static long sun4i_dclk_round_rate(struct clk_hw *hw, unsigned long rate,
 				  unsigned long *parent_rate)
 {
-	return *parent_rate / DIV_ROUND_CLOSEST(*parent_rate, rate);
+	unsigned long best_parent = 0;
+	u8 best_div = 1;
+	int i;
+
+	for (i = 6; i < 127; i++) {
+		unsigned long ideal = rate * i;
+		unsigned long rounded;
+
+		rounded = clk_hw_round_rate(clk_hw_get_parent(hw),
+					    ideal);
+
+		if (rounded == ideal) {
+			best_parent = rounded;
+			best_div = i;
+			goto out;
+		}
+
+		if ((rounded < ideal) && (rounded > best_parent)) {
+			best_parent = rounded;
+			best_div = i;
+		}
+	}
+
+out:
+	*parent_rate = best_parent;
+
+	return best_parent / best_div;
 }
 
 static int sun4i_dclk_set_rate(struct clk_hw *hw, unsigned long rate,
 			       unsigned long parent_rate)
 {
 	struct sun4i_dclk *dclk = hw_to_dclk(hw);
-	int div = DIV_ROUND_CLOSEST(parent_rate, rate);
+	u8 div = parent_rate / rate;
 
 	return regmap_update_bits(dclk->regmap, SUN4I_TCON0_DCLK_REG,
 				  GENMASK(6, 0), div);
@@ -127,10 +153,14 @@ int sun4i_dclk_create(struct device *dev, struct sun4i_tcon *tcon)
 	const char *clk_name, *parent_name;
 	struct clk_init_data init;
 	struct sun4i_dclk *dclk;
+	int ret;
 
 	parent_name = __clk_get_name(tcon->sclk0);
-	of_property_read_string_index(dev->of_node, "clock-output-names", 0,
-				      &clk_name);
+	ret = of_property_read_string_index(dev->of_node,
+					    "clock-output-names", 0,
+					    &clk_name);
+	if (ret)
+		return ret;
 
 	dclk = devm_kzalloc(dev, sizeof(*dclk), GFP_KERNEL);
 	if (!dclk)
@@ -140,6 +170,7 @@ int sun4i_dclk_create(struct device *dev, struct sun4i_tcon *tcon)
 	init.ops = &sun4i_dclk_ops;
 	init.parent_names = &parent_name;
 	init.num_parents = 1;
+	init.flags = CLK_SET_RATE_PARENT;
 
 	dclk->regmap = tcon->regs;
 	dclk->hw.init = &init;
diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c
index 76e922bb60e5..257d2b4f3645 100644
--- a/drivers/gpu/drm/sun4i/sun4i_drv.c
+++ b/drivers/gpu/drm/sun4i/sun4i_drv.c
@@ -24,34 +24,6 @@
 #include "sun4i_layer.h"
 #include "sun4i_tcon.h"
 
-static int sun4i_drv_connector_plug_all(struct drm_device *drm)
-{
-	struct drm_connector *connector, *failed;
-	int ret;
-
-	mutex_lock(&drm->mode_config.mutex);
-	list_for_each_entry(connector, &drm->mode_config.connector_list, head) {
-		ret = drm_connector_register(connector);
-		if (ret) {
-			failed = connector;
-			goto err;
-		}
-	}
-	mutex_unlock(&drm->mode_config.mutex);
-	return 0;
-
-err:
-	list_for_each_entry(connector, &drm->mode_config.connector_list, head) {
-		if (failed == connector)
-			break;
-
-		drm_connector_unregister(connector);
-	}
-	mutex_unlock(&drm->mode_config.mutex);
-
-	return ret;
-}
-
 static int sun4i_drv_enable_vblank(struct drm_device *drm, unsigned int pipe)
 {
 	struct sun4i_drv *drv = drm->dev_private;
@@ -125,6 +97,22 @@ static struct drm_driver sun4i_drv_driver = {
 	.disable_vblank		= sun4i_drv_disable_vblank,
 };
 
+static void sun4i_remove_framebuffers(void)
+{
+	struct apertures_struct *ap;
+
+	ap = alloc_apertures(1);
+	if (!ap)
+		return;
+
+	/* The framebuffer can be located anywhere in RAM */
+	ap->ranges[0].base = 0;
+	ap->ranges[0].size = ~0;
+
+	remove_conflicting_framebuffers(ap, "sun4i-drm-fb", false);
+	kfree(ap);
+}
+
 static int sun4i_drv_bind(struct device *dev)
 {
 	struct drm_device *drm;
@@ -172,6 +160,9 @@ static int sun4i_drv_bind(struct device *dev)
 	}
 	drm->irq_enabled = true;
 
+	/* Remove early framebuffers (ie. simplefb) */
+	sun4i_remove_framebuffers();
+
 	/* Create our framebuffer */
 	drv->fbdev = sun4i_framebuffer_init(drm);
 	if (IS_ERR(drv->fbdev)) {
@@ -187,7 +178,7 @@ static int sun4i_drv_bind(struct device *dev)
 	if (ret)
 		goto free_drm;
 
-	ret = sun4i_drv_connector_plug_all(drm);
+	ret = drm_connector_register_all(drm);
 	if (ret)
 		goto unregister_drm;
 
@@ -204,6 +195,7 @@ static void sun4i_drv_unbind(struct device *dev)
 {
 	struct drm_device *drm = dev_get_drvdata(dev);
 
+	drm_connector_unregister_all(drm);
 	drm_dev_unregister(drm);
 	drm_kms_helper_poll_fini(drm);
 	sun4i_framebuffer_free(drm);
diff --git a/drivers/gpu/drm/sun4i/sun4i_rgb.c b/drivers/gpu/drm/sun4i/sun4i_rgb.c
index ab6494818050..aaffe9e64ffb 100644
--- a/drivers/gpu/drm/sun4i/sun4i_rgb.c
+++ b/drivers/gpu/drm/sun4i/sun4i_rgb.c
@@ -54,8 +54,13 @@ static int sun4i_rgb_get_modes(struct drm_connector *connector)
 static int sun4i_rgb_mode_valid(struct drm_connector *connector,
 				struct drm_display_mode *mode)
 {
+	struct sun4i_rgb *rgb = drm_connector_to_sun4i_rgb(connector);
+	struct sun4i_drv *drv = rgb->drv;
+	struct sun4i_tcon *tcon = drv->tcon;
 	u32 hsync = mode->hsync_end - mode->hsync_start;
 	u32 vsync = mode->vsync_end - mode->vsync_start;
+	unsigned long rate = mode->clock * 1000;
+	long rounded_rate;
 
 	DRM_DEBUG_DRIVER("Validating modes...\n");
 
@@ -87,6 +92,15 @@ static int sun4i_rgb_mode_valid(struct drm_connector *connector,
 
 	DRM_DEBUG_DRIVER("Vertical parameters OK\n");
 
+	rounded_rate = clk_round_rate(tcon->dclk, rate);
+	if (rounded_rate < rate)
+		return MODE_CLOCK_LOW;
+
+	if (rounded_rate > rate)
+		return MODE_CLOCK_HIGH;
+
+	DRM_DEBUG_DRIVER("Clock rate OK\n");
+
 	return MODE_OK;
 }
 
@@ -203,7 +217,7 @@ int sun4i_rgb_init(struct drm_device *drm)
 	int ret;
 
 	/* If we don't have a panel, there's no point in going on */
-	if (!tcon->panel)
+	if (IS_ERR(tcon->panel))
 		return -ENODEV;
 
 	rgb = devm_kzalloc(drm->dev, sizeof(*rgb), GFP_KERNEL);
diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c
index 9f19b0e08560..652385f09735 100644
--- a/drivers/gpu/drm/sun4i/sun4i_tcon.c
+++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c
@@ -425,11 +425,11 @@ static struct drm_panel *sun4i_tcon_find_panel(struct device_node *node)
 
 	remote = of_graph_get_remote_port_parent(end_node);
 	if (!remote) {
-		DRM_DEBUG_DRIVER("Enable to parse remote node\n");
+		DRM_DEBUG_DRIVER("Unable to parse remote node\n");
 		return ERR_PTR(-EINVAL);
 	}
 
-	return of_drm_find_panel(remote);
+	return of_drm_find_panel(remote) ?: ERR_PTR(-EPROBE_DEFER);
 }
 
 static int sun4i_tcon_bind(struct device *dev, struct device *master,
@@ -490,7 +490,11 @@ static int sun4i_tcon_bind(struct device *dev, struct device *master,
 		return 0;
 	}
 
-	return sun4i_rgb_init(drm);
+	ret = sun4i_rgb_init(drm);
+	if (ret < 0)
+		goto err_free_clocks;
+
+	return 0;
 
 err_free_clocks:
 	sun4i_tcon_free_clocks(tcon);
@@ -522,12 +526,13 @@ static int sun4i_tcon_probe(struct platform_device *pdev)
 	 * Defer the probe.
 	 */
 	panel = sun4i_tcon_find_panel(node);
-	if (IS_ERR(panel)) {
-		/*
-		 * If we don't have a panel endpoint, just go on
-		 */
-		if (PTR_ERR(panel) != -ENODEV)
-			return -EPROBE_DEFER;
+
+	/*
+	 * If we don't have a panel endpoint, just go on
+	 */
+	if (PTR_ERR(panel) == -EPROBE_DEFER) {
+		DRM_DEBUG_DRIVER("Still waiting for our panel. Deferring...\n");
+		return -EPROBE_DEFER;
 	}
 
 	return component_add(&pdev->dev, &sun4i_tcon_ops);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
index 6de283c8fa3e..f0374f9b56ca 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
@@ -28,6 +28,7 @@
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/frame.h>
 #include <asm/hypervisor.h>
 #include "drmP.h"
 #include "vmwgfx_msg.h"
@@ -194,7 +195,7 @@ static int vmw_send_msg(struct rpc_channel *channel, const char *msg)
 
 	return -EINVAL;
 }
-
+STACK_FRAME_NON_STANDARD(vmw_send_msg);
 
 
 /**
@@ -304,6 +305,7 @@ static int vmw_recv_msg(struct rpc_channel *channel, void **msg,
 
 	return 0;
 }
+STACK_FRAME_NON_STANDARD(vmw_recv_msg);
 
 
 /**
diff --git a/drivers/hid/hid-elo.c b/drivers/hid/hid-elo.c
index aad8c162a825..0cd4f7216239 100644
--- a/drivers/hid/hid-elo.c
+++ b/drivers/hid/hid-elo.c
@@ -261,7 +261,7 @@ static void elo_remove(struct hid_device *hdev)
 	struct elo_priv *priv = hid_get_drvdata(hdev);
 
 	hid_hw_stop(hdev);
-	flush_workqueue(wq);
+	cancel_delayed_work_sync(&priv->work);
 	kfree(priv);
 }
 
diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
index c741f5e50a66..fb6f1f447279 100644
--- a/drivers/hid/hid-multitouch.c
+++ b/drivers/hid/hid-multitouch.c
@@ -61,6 +61,7 @@ MODULE_LICENSE("GPL");
 #define MT_QUIRK_ALWAYS_VALID		(1 << 4)
 #define MT_QUIRK_VALID_IS_INRANGE	(1 << 5)
 #define MT_QUIRK_VALID_IS_CONFIDENCE	(1 << 6)
+#define MT_QUIRK_CONFIDENCE		(1 << 7)
 #define MT_QUIRK_SLOT_IS_CONTACTID_MINUS_ONE	(1 << 8)
 #define MT_QUIRK_NO_AREA		(1 << 9)
 #define MT_QUIRK_IGNORE_DUPLICATES	(1 << 10)
@@ -78,6 +79,7 @@ struct mt_slot {
 	__s32 contactid;	/* the device ContactID assigned to this slot */
 	bool touch_state;	/* is the touch valid? */
 	bool inrange_state;	/* is the finger in proximity of the sensor? */
+	bool confidence_state;  /* is the touch made by a finger? */
 };
 
 struct mt_class {
@@ -503,10 +505,8 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi,
 			return 1;
 		case HID_DG_CONFIDENCE:
 			if (cls->name == MT_CLS_WIN_8 &&
-				field->application == HID_DG_TOUCHPAD) {
-				cls->quirks &= ~MT_QUIRK_ALWAYS_VALID;
-				cls->quirks |= MT_QUIRK_VALID_IS_CONFIDENCE;
-			}
+				field->application == HID_DG_TOUCHPAD)
+				cls->quirks |= MT_QUIRK_CONFIDENCE;
 			mt_store_field(usage, td, hi);
 			return 1;
 		case HID_DG_TIPSWITCH:
@@ -619,6 +619,7 @@ static void mt_complete_slot(struct mt_device *td, struct input_dev *input)
 		return;
 
 	if (td->curvalid || (td->mtclass.quirks & MT_QUIRK_ALWAYS_VALID)) {
+		int active;
 		int slotnum = mt_compute_slot(td, input);
 		struct mt_slot *s = &td->curdata;
 		struct input_mt *mt = input->mt;
@@ -633,10 +634,14 @@ static void mt_complete_slot(struct mt_device *td, struct input_dev *input)
 				return;
 		}
 
+		if (!(td->mtclass.quirks & MT_QUIRK_CONFIDENCE))
+			s->confidence_state = 1;
+		active = (s->touch_state || s->inrange_state) &&
+							s->confidence_state;
+
 		input_mt_slot(input, slotnum);
-		input_mt_report_slot_state(input, MT_TOOL_FINGER,
-			s->touch_state || s->inrange_state);
-		if (s->touch_state || s->inrange_state) {
+		input_mt_report_slot_state(input, MT_TOOL_FINGER, active);
+		if (active) {
 			/* this finger is in proximity of the sensor */
 			int wide = (s->w > s->h);
 			/* divided by two to match visual scale of touch */
@@ -701,6 +706,8 @@ static void mt_process_mt_event(struct hid_device *hid, struct hid_field *field,
 			td->curdata.touch_state = value;
 			break;
 		case HID_DG_CONFIDENCE:
+			if (quirks & MT_QUIRK_CONFIDENCE)
+				td->curdata.confidence_state = value;
 			if (quirks & MT_QUIRK_VALID_IS_CONFIDENCE)
 				td->curvalid = value;
 			break;
@@ -1401,6 +1408,11 @@ static const struct hid_device_id mt_devices[] = {
 		MT_USB_DEVICE(USB_VENDOR_ID_NOVATEK,
 			USB_DEVICE_ID_NOVATEK_PCT) },
 
+	/* Ntrig Panel */
+	{ .driver_data = MT_CLS_NSMU,
+		HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8,
+			USB_VENDOR_ID_NTRIG, 0x1b05) },
+
 	/* PixArt optical touch screen */
 	{ .driver_data = MT_CLS_INRANGE_CONTACTNUMBER,
 		MT_USB_DEVICE(USB_VENDOR_ID_PIXART,
diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c
index 2f1ddca6f2e0..700145b15088 100644
--- a/drivers/hid/usbhid/hiddev.c
+++ b/drivers/hid/usbhid/hiddev.c
@@ -516,13 +516,13 @@ static noinline int hiddev_ioctl_usage(struct hiddev *hiddev, unsigned int cmd,
 					goto inval;
 			} else if (uref->usage_index >= field->report_count)
 				goto inval;
-
-			else if ((cmd == HIDIOCGUSAGES || cmd == HIDIOCSUSAGES) &&
-				 (uref_multi->num_values > HID_MAX_MULTI_USAGES ||
-				  uref->usage_index + uref_multi->num_values > field->report_count))
-				goto inval;
 		}
 
+		if ((cmd == HIDIOCGUSAGES || cmd == HIDIOCSUSAGES) &&
+		    (uref_multi->num_values > HID_MAX_MULTI_USAGES ||
+		     uref->usage_index + uref_multi->num_values > field->report_count))
+			goto inval;
+
 		switch (cmd) {
 		case HIDIOCGUSAGE:
 			uref->value = field->value[uref->usage_index];
diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c
index c43318d3416e..2ac87d553e22 100644
--- a/drivers/hwmon/dell-smm-hwmon.c
+++ b/drivers/hwmon/dell-smm-hwmon.c
@@ -35,6 +35,7 @@
 #include <linux/uaccess.h>
 #include <linux/io.h>
 #include <linux/sched.h>
+#include <linux/ctype.h>
 
 #include <linux/i8k.h>
 
@@ -66,11 +67,13 @@
 
 static DEFINE_MUTEX(i8k_mutex);
 static char bios_version[4];
+static char bios_machineid[16];
 static struct device *i8k_hwmon_dev;
 static u32 i8k_hwmon_flags;
 static uint i8k_fan_mult = I8K_FAN_MULT;
 static uint i8k_pwm_mult;
 static uint i8k_fan_max = I8K_FAN_HIGH;
+static bool disallow_fan_type_call;
 
 #define I8K_HWMON_HAVE_TEMP1	(1 << 0)
 #define I8K_HWMON_HAVE_TEMP2	(1 << 1)
@@ -94,13 +97,13 @@ module_param(ignore_dmi, bool, 0);
 MODULE_PARM_DESC(ignore_dmi, "Continue probing hardware even if DMI data does not match");
 
 #if IS_ENABLED(CONFIG_I8K)
-static bool restricted;
+static bool restricted = true;
 module_param(restricted, bool, 0);
-MODULE_PARM_DESC(restricted, "Allow fan control if SYS_ADMIN capability set");
+MODULE_PARM_DESC(restricted, "Restrict fan control and serial number to CAP_SYS_ADMIN (default: 1)");
 
 static bool power_status;
 module_param(power_status, bool, 0600);
-MODULE_PARM_DESC(power_status, "Report power status in /proc/i8k");
+MODULE_PARM_DESC(power_status, "Report power status in /proc/i8k (default: 0)");
 #endif
 
 static uint fan_mult;
@@ -235,14 +238,28 @@ static int i8k_get_fan_speed(int fan)
 /*
  * Read the fan type.
  */
-static int i8k_get_fan_type(int fan)
+static int _i8k_get_fan_type(int fan)
 {
 	struct smm_regs regs = { .eax = I8K_SMM_GET_FAN_TYPE, };
 
+	if (disallow_fan_type_call)
+		return -EINVAL;
+
 	regs.ebx = fan & 0xff;
 	return i8k_smm(&regs) ? : regs.eax & 0xff;
 }
 
+static int i8k_get_fan_type(int fan)
+{
+	/* I8K_SMM_GET_FAN_TYPE SMM call is expensive, so cache values */
+	static int types[2] = { INT_MIN, INT_MIN };
+
+	if (types[fan] == INT_MIN)
+		types[fan] = _i8k_get_fan_type(fan);
+
+	return types[fan];
+}
+
 /*
  * Read the fan nominal rpm for specific fan speed.
  */
@@ -387,14 +404,20 @@ i8k_ioctl_unlocked(struct file *fp, unsigned int cmd, unsigned long arg)
 
 	switch (cmd) {
 	case I8K_BIOS_VERSION:
+		if (!isdigit(bios_version[0]) || !isdigit(bios_version[1]) ||
+		    !isdigit(bios_version[2]))
+			return -EINVAL;
+
 		val = (bios_version[0] << 16) |
 				(bios_version[1] << 8) | bios_version[2];
 		break;
 
 	case I8K_MACHINE_ID:
-		memset(buff, 0, 16);
-		strlcpy(buff, i8k_get_dmi_data(DMI_PRODUCT_SERIAL),
-			sizeof(buff));
+		if (restricted && !capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		memset(buff, 0, sizeof(buff));
+		strlcpy(buff, bios_machineid, sizeof(buff));
 		break;
 
 	case I8K_FN_STATUS:
@@ -511,7 +534,7 @@ static int i8k_proc_show(struct seq_file *seq, void *offset)
 	seq_printf(seq, "%s %s %s %d %d %d %d %d %d %d\n",
 		   I8K_PROC_FMT,
 		   bios_version,
-		   i8k_get_dmi_data(DMI_PRODUCT_SERIAL),
+		   (restricted && !capable(CAP_SYS_ADMIN)) ? "-1" : bios_machineid,
 		   cpu_temp,
 		   left_fan, right_fan, left_speed, right_speed,
 		   ac_power, fn_key);
@@ -718,6 +741,9 @@ static struct attribute *i8k_attrs[] = {
 static umode_t i8k_is_visible(struct kobject *kobj, struct attribute *attr,
 			      int index)
 {
+	if (disallow_fan_type_call &&
+	    (index == 9 || index == 12))
+		return 0;
 	if (index >= 0 && index <= 1 &&
 	    !(i8k_hwmon_flags & I8K_HWMON_HAVE_TEMP1))
 		return 0;
@@ -767,13 +793,17 @@ static int __init i8k_init_hwmon(void)
 	if (err >= 0)
 		i8k_hwmon_flags |= I8K_HWMON_HAVE_TEMP4;
 
-	/* First fan attributes, if fan type is OK */
-	err = i8k_get_fan_type(0);
+	/* First fan attributes, if fan status or type is OK */
+	err = i8k_get_fan_status(0);
+	if (err < 0)
+		err = i8k_get_fan_type(0);
 	if (err >= 0)
 		i8k_hwmon_flags |= I8K_HWMON_HAVE_FAN1;
 
-	/* Second fan attributes, if fan type is OK */
-	err = i8k_get_fan_type(1);
+	/* Second fan attributes, if fan status or type is OK */
+	err = i8k_get_fan_status(1);
+	if (err < 0)
+		err = i8k_get_fan_type(1);
 	if (err >= 0)
 		i8k_hwmon_flags |= I8K_HWMON_HAVE_FAN2;
 
@@ -929,12 +959,14 @@ static struct dmi_system_id i8k_dmi_table[] __initdata = {
 
 MODULE_DEVICE_TABLE(dmi, i8k_dmi_table);
 
-static struct dmi_system_id i8k_blacklist_dmi_table[] __initdata = {
+/*
+ * On some machines once I8K_SMM_GET_FAN_TYPE is issued then CPU fan speed
+ * randomly going up and down due to bug in Dell SMM or BIOS. Here is blacklist
+ * of affected Dell machines for which we disallow I8K_SMM_GET_FAN_TYPE call.
+ * See bug: https://bugzilla.kernel.org/show_bug.cgi?id=100121
+ */
+static struct dmi_system_id i8k_blacklist_fan_type_dmi_table[] __initdata = {
 	{
-		/*
-		 * CPU fan speed going up and down on Dell Studio XPS 8000
-		 * for unknown reasons.
-		 */
 		.ident = "Dell Studio XPS 8000",
 		.matches = {
 			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
@@ -942,16 +974,19 @@ static struct dmi_system_id i8k_blacklist_dmi_table[] __initdata = {
 		},
 	},
 	{
-		/*
-		 * CPU fan speed going up and down on Dell Studio XPS 8100
-		 * for unknown reasons.
-		 */
 		.ident = "Dell Studio XPS 8100",
 		.matches = {
 			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
 			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Studio XPS 8100"),
 		},
 	},
+	{
+		.ident = "Dell Inspiron 580",
+		.matches = {
+			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Inspiron 580 "),
+		},
+	},
 	{ }
 };
 
@@ -966,8 +1001,7 @@ static int __init i8k_probe(void)
 	/*
 	 * Get DMI information
 	 */
-	if (!dmi_check_system(i8k_dmi_table) ||
-	    dmi_check_system(i8k_blacklist_dmi_table)) {
+	if (!dmi_check_system(i8k_dmi_table)) {
 		if (!ignore_dmi && !force)
 			return -ENODEV;
 
@@ -978,8 +1012,13 @@ static int __init i8k_probe(void)
 			i8k_get_dmi_data(DMI_BIOS_VERSION));
 	}
 
+	if (dmi_check_system(i8k_blacklist_fan_type_dmi_table))
+		disallow_fan_type_call = true;
+
 	strlcpy(bios_version, i8k_get_dmi_data(DMI_BIOS_VERSION),
 		sizeof(bios_version));
+	strlcpy(bios_machineid, i8k_get_dmi_data(DMI_PRODUCT_SERIAL),
+		sizeof(bios_machineid));
 
 	/*
 	 * Get SMM Dell signature
diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c
index eb97a9241d17..15aa49d082c4 100644
--- a/drivers/hwmon/fam15h_power.c
+++ b/drivers/hwmon/fam15h_power.c
@@ -172,9 +172,9 @@ static void do_read_registers_on_cu(void *_data)
  */
 static int read_registers(struct fam15h_power_data *data)
 {
-	int this_cpu, ret, cpu;
 	int core, this_core;
 	cpumask_var_t mask;
+	int ret, cpu;
 
 	ret = zalloc_cpumask_var(&mask, GFP_KERNEL);
 	if (!ret)
@@ -183,7 +183,6 @@ static int read_registers(struct fam15h_power_data *data)
 	memset(data->cu_on, 0, sizeof(int) * MAX_CUS);
 
 	get_online_cpus();
-	this_cpu = smp_processor_id();
 
 	/*
 	 * Choose the first online core of each compute unit, and then
@@ -205,12 +204,9 @@ static int read_registers(struct fam15h_power_data *data)
 		cpumask_set_cpu(cpumask_any(topology_sibling_cpumask(cpu)), mask);
 	}
 
-	if (cpumask_test_cpu(this_cpu, mask))
-		do_read_registers_on_cu(data);
+	on_each_cpu_mask(mask, do_read_registers_on_cu, data, true);
 
-	smp_call_function_many(mask, do_read_registers_on_cu, data, true);
 	put_online_cpus();
-
 	free_cpumask_var(mask);
 
 	return 0;
diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c
index c9ff08dbe10c..e30a5939dc0d 100644
--- a/drivers/hwmon/lm90.c
+++ b/drivers/hwmon/lm90.c
@@ -375,7 +375,7 @@ struct lm90_data {
 	int kind;
 	u32 flags;
 
-	int update_interval;	/* in milliseconds */
+	unsigned int update_interval; /* in milliseconds */
 
 	u8 config_orig;		/* Original configuration register value */
 	u8 convrate_orig;	/* Original conversion rate register value */
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c
index 847d1b5f2c13..688be9e060fc 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-etr.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
@@ -300,13 +300,10 @@ int tmc_read_unprepare_etr(struct tmc_drvdata *drvdata)
 	if (local_read(&drvdata->mode) == CS_MODE_SYSFS) {
 		/*
 		 * The trace run will continue with the same allocated trace
-		 * buffer. As such zero-out the buffer so that we don't end
-		 * up with stale data.
-		 *
-		 * Since the tracer is still enabled drvdata::buf
-		 * can't be NULL.
+		 * buffer. The trace buffer is cleared in tmc_etr_enable_hw(),
+		 * so we don't have to explicitly clear it. Also, since the
+		 * tracer is still enabled drvdata::buf can't be NULL.
 		 */
-		memset(drvdata->buf, 0, drvdata->size);
 		tmc_etr_enable_hw(drvdata);
 	} else {
 		/*
@@ -315,7 +312,7 @@ int tmc_read_unprepare_etr(struct tmc_drvdata *drvdata)
 		 */
 		vaddr = drvdata->vaddr;
 		paddr = drvdata->paddr;
-		drvdata->buf = NULL;
+		drvdata->buf = drvdata->vaddr = NULL;
 	}
 
 	drvdata->reading = false;
diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c
index 5443d03a1eec..d08d1ab9bba5 100644
--- a/drivers/hwtracing/coresight/coresight.c
+++ b/drivers/hwtracing/coresight/coresight.c
@@ -385,7 +385,6 @@ static int _coresight_build_path(struct coresight_device *csdev,
 	int i;
 	bool found = false;
 	struct coresight_node *node;
-	struct coresight_connection *conn;
 
 	/* An activated sink has been found.  Enqueue the element */
 	if ((csdev->type == CORESIGHT_DEV_TYPE_SINK ||
@@ -394,8 +393,9 @@ static int _coresight_build_path(struct coresight_device *csdev,
 
 	/* Not a sink - recursively explore each port found on this element */
 	for (i = 0; i < csdev->nr_outport; i++) {
-		conn = &csdev->conns[i];
-		if (_coresight_build_path(conn->child_dev, path) == 0) {
+		struct coresight_device *child_dev = csdev->conns[i].child_dev;
+
+		if (child_dev && _coresight_build_path(child_dev, path) == 0) {
 			found = true;
 			break;
 		}
@@ -425,6 +425,7 @@ out:
 struct list_head *coresight_build_path(struct coresight_device *csdev)
 {
 	struct list_head *path;
+	int rc;
 
 	path = kzalloc(sizeof(struct list_head), GFP_KERNEL);
 	if (!path)
@@ -432,9 +433,10 @@ struct list_head *coresight_build_path(struct coresight_device *csdev)
 
 	INIT_LIST_HEAD(path);
 
-	if (_coresight_build_path(csdev, path)) {
+	rc = _coresight_build_path(csdev, path);
+	if (rc) {
 		kfree(path);
-		path = NULL;
+		return ERR_PTR(rc);
 	}
 
 	return path;
@@ -507,8 +509,9 @@ int coresight_enable(struct coresight_device *csdev)
 		goto out;
 
 	path = coresight_build_path(csdev);
-	if (!path) {
+	if (IS_ERR(path)) {
 		pr_err("building path(s) failed\n");
+		ret = PTR_ERR(path);
 		goto out;
 	}
 
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index 64b1208bca5e..4a60ad214747 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -245,6 +245,13 @@ struct i801_priv {
 	struct platform_device *mux_pdev;
 #endif
 	struct platform_device *tco_pdev;
+
+	/*
+	 * If set to true the host controller registers are reserved for
+	 * ACPI AML use. Protected by acpi_lock.
+	 */
+	bool acpi_reserved;
+	struct mutex acpi_lock;
 };
 
 #define FEATURE_SMBUS_PEC	(1 << 0)
@@ -718,6 +725,12 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr,
 	int ret = 0, xact = 0;
 	struct i801_priv *priv = i2c_get_adapdata(adap);
 
+	mutex_lock(&priv->acpi_lock);
+	if (priv->acpi_reserved) {
+		mutex_unlock(&priv->acpi_lock);
+		return -EBUSY;
+	}
+
 	pm_runtime_get_sync(&priv->pci_dev->dev);
 
 	hwpec = (priv->features & FEATURE_SMBUS_PEC) && (flags & I2C_CLIENT_PEC)
@@ -820,6 +833,7 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr,
 out:
 	pm_runtime_mark_last_busy(&priv->pci_dev->dev);
 	pm_runtime_put_autosuspend(&priv->pci_dev->dev);
+	mutex_unlock(&priv->acpi_lock);
 	return ret;
 }
 
@@ -1257,6 +1271,83 @@ static void i801_add_tco(struct i801_priv *priv)
 	priv->tco_pdev = pdev;
 }
 
+#ifdef CONFIG_ACPI
+static acpi_status
+i801_acpi_io_handler(u32 function, acpi_physical_address address, u32 bits,
+		     u64 *value, void *handler_context, void *region_context)
+{
+	struct i801_priv *priv = handler_context;
+	struct pci_dev *pdev = priv->pci_dev;
+	acpi_status status;
+
+	/*
+	 * Once BIOS AML code touches the OpRegion we warn and inhibit any
+	 * further access from the driver itself. This device is now owned
+	 * by the system firmware.
+	 */
+	mutex_lock(&priv->acpi_lock);
+
+	if (!priv->acpi_reserved) {
+		priv->acpi_reserved = true;
+
+		dev_warn(&pdev->dev, "BIOS is accessing SMBus registers\n");
+		dev_warn(&pdev->dev, "Driver SMBus register access inhibited\n");
+
+		/*
+		 * BIOS is accessing the host controller so prevent it from
+		 * suspending automatically from now on.
+		 */
+		pm_runtime_get_sync(&pdev->dev);
+	}
+
+	if ((function & ACPI_IO_MASK) == ACPI_READ)
+		status = acpi_os_read_port(address, (u32 *)value, bits);
+	else
+		status = acpi_os_write_port(address, (u32)*value, bits);
+
+	mutex_unlock(&priv->acpi_lock);
+
+	return status;
+}
+
+static int i801_acpi_probe(struct i801_priv *priv)
+{
+	struct acpi_device *adev;
+	acpi_status status;
+
+	adev = ACPI_COMPANION(&priv->pci_dev->dev);
+	if (adev) {
+		status = acpi_install_address_space_handler(adev->handle,
+				ACPI_ADR_SPACE_SYSTEM_IO, i801_acpi_io_handler,
+				NULL, priv);
+		if (ACPI_SUCCESS(status))
+			return 0;
+	}
+
+	return acpi_check_resource_conflict(&priv->pci_dev->resource[SMBBAR]);
+}
+
+static void i801_acpi_remove(struct i801_priv *priv)
+{
+	struct acpi_device *adev;
+
+	adev = ACPI_COMPANION(&priv->pci_dev->dev);
+	if (!adev)
+		return;
+
+	acpi_remove_address_space_handler(adev->handle,
+		ACPI_ADR_SPACE_SYSTEM_IO, i801_acpi_io_handler);
+
+	mutex_lock(&priv->acpi_lock);
+	if (priv->acpi_reserved)
+		pm_runtime_put(&priv->pci_dev->dev);
+	mutex_unlock(&priv->acpi_lock);
+}
+#else
+static inline int i801_acpi_probe(struct i801_priv *priv) { return 0; }
+static inline void i801_acpi_remove(struct i801_priv *priv) { }
+#endif
+
 static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
 {
 	unsigned char temp;
@@ -1274,6 +1365,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
 	priv->adapter.dev.parent = &dev->dev;
 	ACPI_COMPANION_SET(&priv->adapter.dev, ACPI_COMPANION(&dev->dev));
 	priv->adapter.retries = 3;
+	mutex_init(&priv->acpi_lock);
 
 	priv->pci_dev = dev;
 	switch (dev->device) {
@@ -1336,10 +1428,8 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
 		return -ENODEV;
 	}
 
-	err = acpi_check_resource_conflict(&dev->resource[SMBBAR]);
-	if (err) {
+	if (i801_acpi_probe(priv))
 		return -ENODEV;
-	}
 
 	err = pcim_iomap_regions(dev, 1 << SMBBAR,
 				 dev_driver_string(&dev->dev));
@@ -1348,6 +1438,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
 			"Failed to request SMBus region 0x%lx-0x%Lx\n",
 			priv->smba,
 			(unsigned long long)pci_resource_end(dev, SMBBAR));
+		i801_acpi_remove(priv);
 		return err;
 	}
 
@@ -1412,6 +1503,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
 	err = i2c_add_adapter(&priv->adapter);
 	if (err) {
 		dev_err(&dev->dev, "Failed to add SMBus adapter\n");
+		i801_acpi_remove(priv);
 		return err;
 	}
 
@@ -1438,6 +1530,7 @@ static void i801_remove(struct pci_dev *dev)
 
 	i801_del_mux(priv);
 	i2c_del_adapter(&priv->adapter);
+	i801_acpi_remove(priv);
 	pci_write_config_byte(dev, SMBHSTCFG, priv->original_hstcfg);
 
 	platform_device_unregister(priv->tco_pdev);
diff --git a/drivers/i2c/busses/i2c-octeon.c b/drivers/i2c/busses/i2c-octeon.c
index aa5f01efd826..30ae35146723 100644
--- a/drivers/i2c/busses/i2c-octeon.c
+++ b/drivers/i2c/busses/i2c-octeon.c
@@ -934,8 +934,15 @@ static int octeon_i2c_read(struct octeon_i2c *i2c, int target,
 		return result;
 
 	for (i = 0; i < length; i++) {
-		/* for the last byte TWSI_CTL_AAK must not be set */
-		if (i + 1 == length)
+		/*
+		 * For the last byte to receive TWSI_CTL_AAK must not be set.
+		 *
+		 * A special case is I2C_M_RECV_LEN where we don't know the
+		 * additional length yet. If recv_len is set we assume we're
+		 * not reading the final byte and therefore need to set
+		 * TWSI_CTL_AAK.
+		 */
+		if ((i + 1 == length) && !(recv_len && i == 0))
 			final_read = true;
 
 		/* clear iflg to allow next event */
@@ -950,12 +957,8 @@ static int octeon_i2c_read(struct octeon_i2c *i2c, int target,
 
 		data[i] = octeon_i2c_data_read(i2c);
 		if (recv_len && i == 0) {
-			if (data[i] > I2C_SMBUS_BLOCK_MAX + 1) {
-				dev_err(i2c->dev,
-					"%s: read len > I2C_SMBUS_BLOCK_MAX %d\n",
-					__func__, data[i]);
+			if (data[i] > I2C_SMBUS_BLOCK_MAX + 1)
 				return -EPROTO;
-			}
 			length += data[i];
 		}
 
diff --git a/drivers/i2c/muxes/i2c-mux-reg.c b/drivers/i2c/muxes/i2c-mux-reg.c
index 6773cadf7c9f..26e7c5187a58 100644
--- a/drivers/i2c/muxes/i2c-mux-reg.c
+++ b/drivers/i2c/muxes/i2c-mux-reg.c
@@ -260,6 +260,7 @@ static struct platform_driver i2c_mux_reg_driver = {
 	.remove	= i2c_mux_reg_remove,
 	.driver	= {
 		.name	= "i2c-mux-reg",
+		.of_match_table = of_match_ptr(i2c_mux_reg_of_match),
 	},
 };
 
diff --git a/drivers/iio/accel/kxsd9.c b/drivers/iio/accel/kxsd9.c
index 923f56598d4b..3a9f106787d2 100644
--- a/drivers/iio/accel/kxsd9.c
+++ b/drivers/iio/accel/kxsd9.c
@@ -81,7 +81,7 @@ static int kxsd9_write_scale(struct iio_dev *indio_dev, int micro)
 
 	mutex_lock(&st->buf_lock);
 	ret = spi_w8r8(st->us, KXSD9_READ(KXSD9_REG_CTRL_C));
-	if (ret)
+	if (ret < 0)
 		goto error_ret;
 	st->tx[0] = KXSD9_WRITE(KXSD9_REG_CTRL_C);
 	st->tx[1] = (ret & ~KXSD9_FS_MASK) | i;
@@ -163,7 +163,7 @@ static int kxsd9_read_raw(struct iio_dev *indio_dev,
 		break;
 	case IIO_CHAN_INFO_SCALE:
 		ret = spi_w8r8(st->us, KXSD9_READ(KXSD9_REG_CTRL_C));
-		if (ret)
+		if (ret < 0)
 			goto error_ret;
 		*val2 = kxsd9_micro_scales[ret & KXSD9_FS_MASK];
 		ret = IIO_VAL_INT_PLUS_MICRO;
diff --git a/drivers/iio/accel/st_accel_buffer.c b/drivers/iio/accel/st_accel_buffer.c
index a1e642ee13d6..7fddc137e91e 100644
--- a/drivers/iio/accel/st_accel_buffer.c
+++ b/drivers/iio/accel/st_accel_buffer.c
@@ -91,7 +91,7 @@ static const struct iio_buffer_setup_ops st_accel_buffer_setup_ops = {
 
 int st_accel_allocate_ring(struct iio_dev *indio_dev)
 {
-	return iio_triggered_buffer_setup(indio_dev, &iio_pollfunc_store_time,
+	return iio_triggered_buffer_setup(indio_dev, NULL,
 		&st_sensors_trigger_handler, &st_accel_buffer_setup_ops);
 }
 
diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c
index dc73f2d85e6d..4d95bfc4786c 100644
--- a/drivers/iio/accel/st_accel_core.c
+++ b/drivers/iio/accel/st_accel_core.c
@@ -741,6 +741,7 @@ static const struct iio_info accel_info = {
 static const struct iio_trigger_ops st_accel_trigger_ops = {
 	.owner = THIS_MODULE,
 	.set_trigger_state = ST_ACCEL_TRIGGER_SET_STATE,
+	.validate_device = st_sensors_validate_device,
 };
 #define ST_ACCEL_TRIGGER_OPS (&st_accel_trigger_ops)
 #else
diff --git a/drivers/iio/adc/ad7266.c b/drivers/iio/adc/ad7266.c
index 21e19b60e2b9..2123f0ac2e2a 100644
--- a/drivers/iio/adc/ad7266.c
+++ b/drivers/iio/adc/ad7266.c
@@ -396,8 +396,8 @@ static int ad7266_probe(struct spi_device *spi)
 
 	st = iio_priv(indio_dev);
 
-	st->reg = devm_regulator_get(&spi->dev, "vref");
-	if (!IS_ERR_OR_NULL(st->reg)) {
+	st->reg = devm_regulator_get_optional(&spi->dev, "vref");
+	if (!IS_ERR(st->reg)) {
 		ret = regulator_enable(st->reg);
 		if (ret)
 			return ret;
@@ -408,6 +408,9 @@ static int ad7266_probe(struct spi_device *spi)
 
 		st->vref_mv = ret / 1000;
 	} else {
+		/* Any other error indicates that the regulator does exist */
+		if (PTR_ERR(st->reg) != -ENODEV)
+			return PTR_ERR(st->reg);
 		/* Use internal reference */
 		st->vref_mv = 2500;
 	}
diff --git a/drivers/iio/common/st_sensors/st_sensors_buffer.c b/drivers/iio/common/st_sensors/st_sensors_buffer.c
index c55898543a47..f1693dbebb8a 100644
--- a/drivers/iio/common/st_sensors/st_sensors_buffer.c
+++ b/drivers/iio/common/st_sensors/st_sensors_buffer.c
@@ -57,31 +57,20 @@ irqreturn_t st_sensors_trigger_handler(int irq, void *p)
 	struct iio_poll_func *pf = p;
 	struct iio_dev *indio_dev = pf->indio_dev;
 	struct st_sensor_data *sdata = iio_priv(indio_dev);
+	s64 timestamp;
 
-	/* If we have a status register, check if this IRQ came from us */
-	if (sdata->sensor_settings->drdy_irq.addr_stat_drdy) {
-		u8 status;
-
-		len = sdata->tf->read_byte(&sdata->tb, sdata->dev,
-			   sdata->sensor_settings->drdy_irq.addr_stat_drdy,
-			   &status);
-		if (len < 0)
-			dev_err(sdata->dev, "could not read channel status\n");
-
-		/*
-		 * If this was not caused by any channels on this sensor,
-		 * return IRQ_NONE
-		 */
-		if (!(status & (u8)indio_dev->active_scan_mask[0]))
-			return IRQ_NONE;
-	}
+	/* If we do timetamping here, do it before reading the values */
+	if (sdata->hw_irq_trigger)
+		timestamp = sdata->hw_timestamp;
+	else
+		timestamp = iio_get_time_ns();
 
 	len = st_sensors_get_buffer_element(indio_dev, sdata->buffer_data);
 	if (len < 0)
 		goto st_sensors_get_buffer_element_error;
 
 	iio_push_to_buffers_with_timestamp(indio_dev, sdata->buffer_data,
-		pf->timestamp);
+					   timestamp);
 
 st_sensors_get_buffer_element_error:
 	iio_trigger_notify_done(indio_dev->trig);
diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c
index dffe00692169..9e59c90f6a8d 100644
--- a/drivers/iio/common/st_sensors/st_sensors_core.c
+++ b/drivers/iio/common/st_sensors/st_sensors_core.c
@@ -363,6 +363,11 @@ int st_sensors_init_sensor(struct iio_dev *indio_dev,
 	if (err < 0)
 		return err;
 
+	/* Disable DRDY, this might be still be enabled after reboot. */
+	err = st_sensors_set_dataready_irq(indio_dev, false);
+	if (err < 0)
+		return err;
+
 	if (sdata->current_fullscale) {
 		err = st_sensors_set_fullscale(indio_dev,
 						sdata->current_fullscale->num);
@@ -424,6 +429,9 @@ int st_sensors_set_dataready_irq(struct iio_dev *indio_dev, bool enable)
 	else
 		drdy_mask = sdata->sensor_settings->drdy_irq.mask_int2;
 
+	/* Flag to the poll function that the hardware trigger is in use */
+	sdata->hw_irq_trigger = enable;
+
 	/* Enable/Disable the interrupt generator for data ready. */
 	err = st_sensors_write_data_with_mask(indio_dev,
 					sdata->sensor_settings->drdy_irq.addr,
diff --git a/drivers/iio/common/st_sensors/st_sensors_trigger.c b/drivers/iio/common/st_sensors/st_sensors_trigger.c
index da72279fcf99..296e4ff19ae8 100644
--- a/drivers/iio/common/st_sensors/st_sensors_trigger.c
+++ b/drivers/iio/common/st_sensors/st_sensors_trigger.c
@@ -17,6 +17,73 @@
 #include <linux/iio/common/st_sensors.h>
 #include "st_sensors_core.h"
 
+/**
+ * st_sensors_irq_handler() - top half of the IRQ-based triggers
+ * @irq: irq number
+ * @p: private handler data
+ */
+irqreturn_t st_sensors_irq_handler(int irq, void *p)
+{
+	struct iio_trigger *trig = p;
+	struct iio_dev *indio_dev = iio_trigger_get_drvdata(trig);
+	struct st_sensor_data *sdata = iio_priv(indio_dev);
+
+	/* Get the time stamp as close in time as possible */
+	sdata->hw_timestamp = iio_get_time_ns();
+	return IRQ_WAKE_THREAD;
+}
+
+/**
+ * st_sensors_irq_thread() - bottom half of the IRQ-based triggers
+ * @irq: irq number
+ * @p: private handler data
+ */
+irqreturn_t st_sensors_irq_thread(int irq, void *p)
+{
+	struct iio_trigger *trig = p;
+	struct iio_dev *indio_dev = iio_trigger_get_drvdata(trig);
+	struct st_sensor_data *sdata = iio_priv(indio_dev);
+	int ret;
+
+	/*
+	 * If this trigger is backed by a hardware interrupt and we have a
+	 * status register, check if this IRQ came from us
+	 */
+	if (sdata->sensor_settings->drdy_irq.addr_stat_drdy) {
+		u8 status;
+
+		ret = sdata->tf->read_byte(&sdata->tb, sdata->dev,
+			   sdata->sensor_settings->drdy_irq.addr_stat_drdy,
+			   &status);
+		if (ret < 0) {
+			dev_err(sdata->dev, "could not read channel status\n");
+			goto out_poll;
+		}
+		/*
+		 * the lower bits of .active_scan_mask[0] is directly mapped
+		 * to the channels on the sensor: either bit 0 for
+		 * one-dimensional sensors, or e.g. x,y,z for accelerometers,
+		 * gyroscopes or magnetometers. No sensor use more than 3
+		 * channels, so cut the other status bits here.
+		 */
+		status &= 0x07;
+
+		/*
+		 * If this was not caused by any channels on this sensor,
+		 * return IRQ_NONE
+		 */
+		if (!indio_dev->active_scan_mask)
+			return IRQ_NONE;
+		if (!(status & (u8)indio_dev->active_scan_mask[0]))
+			return IRQ_NONE;
+	}
+
+out_poll:
+	/* It's our IRQ: proceed to handle the register polling */
+	iio_trigger_poll_chained(p);
+	return IRQ_HANDLED;
+}
+
 int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
 				const struct iio_trigger_ops *trigger_ops)
 {
@@ -30,6 +97,10 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
 		return -ENOMEM;
 	}
 
+	iio_trigger_set_drvdata(sdata->trig, indio_dev);
+	sdata->trig->ops = trigger_ops;
+	sdata->trig->dev.parent = sdata->dev;
+
 	irq = sdata->get_irq_data_ready(indio_dev);
 	irq_trig = irqd_get_trigger_type(irq_get_irq_data(irq));
 	/*
@@ -77,9 +148,12 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
 	    sdata->sensor_settings->drdy_irq.addr_stat_drdy)
 		irq_trig |= IRQF_SHARED;
 
-	err = request_threaded_irq(irq,
-			iio_trigger_generic_data_rdy_poll,
-			NULL,
+	/* Let's create an interrupt thread masking the hard IRQ here */
+	irq_trig |= IRQF_ONESHOT;
+
+	err = request_threaded_irq(sdata->get_irq_data_ready(indio_dev),
+			st_sensors_irq_handler,
+			st_sensors_irq_thread,
 			irq_trig,
 			sdata->trig->name,
 			sdata->trig);
@@ -88,10 +162,6 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
 		goto iio_trigger_free;
 	}
 
-	iio_trigger_set_drvdata(sdata->trig, indio_dev);
-	sdata->trig->ops = trigger_ops;
-	sdata->trig->dev.parent = sdata->dev;
-
 	err = iio_trigger_register(sdata->trig);
 	if (err < 0) {
 		dev_err(&indio_dev->dev, "failed to register iio trigger.\n");
@@ -119,6 +189,18 @@ void st_sensors_deallocate_trigger(struct iio_dev *indio_dev)
 }
 EXPORT_SYMBOL(st_sensors_deallocate_trigger);
 
+int st_sensors_validate_device(struct iio_trigger *trig,
+			       struct iio_dev *indio_dev)
+{
+	struct iio_dev *indio = iio_trigger_get_drvdata(trig);
+
+	if (indio != indio_dev)
+		return -EINVAL;
+
+	return 0;
+}
+EXPORT_SYMBOL(st_sensors_validate_device);
+
 MODULE_AUTHOR("Denis Ciocca <denis.ciocca@st.com>");
 MODULE_DESCRIPTION("STMicroelectronics ST-sensors trigger");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/iio/dac/Kconfig b/drivers/iio/dac/Kconfig
index e63b957c985f..f7c71da42f15 100644
--- a/drivers/iio/dac/Kconfig
+++ b/drivers/iio/dac/Kconfig
@@ -247,7 +247,7 @@ config MCP4922
 
 config STX104
 	tristate "Apex Embedded Systems STX104 DAC driver"
-	depends on X86 && ISA
+	depends on X86 && ISA_BUS_API
 	help
 	  Say yes here to build support for the 2-channel DAC on the Apex
 	  Embedded Systems STX104 integrated analog PC/104 card. The base port
diff --git a/drivers/iio/dac/ad5592r-base.c b/drivers/iio/dac/ad5592r-base.c
index 948f600e7059..69bde5909854 100644
--- a/drivers/iio/dac/ad5592r-base.c
+++ b/drivers/iio/dac/ad5592r-base.c
@@ -525,7 +525,7 @@ static int ad5592r_alloc_channels(struct ad5592r_state *st)
 
 	device_for_each_child_node(st->dev, child) {
 		ret = fwnode_property_read_u32(child, "reg", &reg);
-		if (ret || reg > ARRAY_SIZE(st->channel_modes))
+		if (ret || reg >= ARRAY_SIZE(st->channel_modes))
 			continue;
 
 		ret = fwnode_property_read_u32(child, "adi,mode", &tmp);
diff --git a/drivers/iio/gyro/st_gyro_buffer.c b/drivers/iio/gyro/st_gyro_buffer.c
index d67b17b6a7aa..a5377044e42f 100644
--- a/drivers/iio/gyro/st_gyro_buffer.c
+++ b/drivers/iio/gyro/st_gyro_buffer.c
@@ -91,7 +91,7 @@ static const struct iio_buffer_setup_ops st_gyro_buffer_setup_ops = {
 
 int st_gyro_allocate_ring(struct iio_dev *indio_dev)
 {
-	return iio_triggered_buffer_setup(indio_dev, &iio_pollfunc_store_time,
+	return iio_triggered_buffer_setup(indio_dev, NULL,
 		&st_sensors_trigger_handler, &st_gyro_buffer_setup_ops);
 }
 
diff --git a/drivers/iio/gyro/st_gyro_core.c b/drivers/iio/gyro/st_gyro_core.c
index 52a3c87c375c..a8012955a1f6 100644
--- a/drivers/iio/gyro/st_gyro_core.c
+++ b/drivers/iio/gyro/st_gyro_core.c
@@ -409,6 +409,7 @@ static const struct iio_info gyro_info = {
 static const struct iio_trigger_ops st_gyro_trigger_ops = {
 	.owner = THIS_MODULE,
 	.set_trigger_state = ST_GYRO_TRIGGER_SET_STATE,
+	.validate_device = st_sensors_validate_device,
 };
 #define ST_GYRO_TRIGGER_OPS (&st_gyro_trigger_ops)
 #else
diff --git a/drivers/iio/humidity/am2315.c b/drivers/iio/humidity/am2315.c
index 3be6d209a159..11535911a5c6 100644
--- a/drivers/iio/humidity/am2315.c
+++ b/drivers/iio/humidity/am2315.c
@@ -165,10 +165,8 @@ static irqreturn_t am2315_trigger_handler(int irq, void *p)
 	struct am2315_sensor_data sensor_data;
 
 	ret = am2315_read_data(data, &sensor_data);
-	if (ret < 0) {
-		mutex_unlock(&data->lock);
+	if (ret < 0)
 		goto err;
-	}
 
 	mutex_lock(&data->lock);
 	if (*(indio_dev->active_scan_mask) == AM2315_ALL_CHANNEL_MASK) {
diff --git a/drivers/iio/humidity/hdc100x.c b/drivers/iio/humidity/hdc100x.c
index fa4767613173..a03832a5fc95 100644
--- a/drivers/iio/humidity/hdc100x.c
+++ b/drivers/iio/humidity/hdc100x.c
@@ -55,7 +55,7 @@ static const struct {
 	},
 	{ /* IIO_HUMIDITYRELATIVE channel */
 		.shift = 8,
-		.mask = 2,
+		.mask = 3,
 	},
 };
 
@@ -164,14 +164,14 @@ static int hdc100x_get_measurement(struct hdc100x_data *data,
 		dev_err(&client->dev, "cannot read high byte measurement");
 		return ret;
 	}
-	val = ret << 6;
+	val = ret << 8;
 
 	ret = i2c_smbus_read_byte(client);
 	if (ret < 0) {
 		dev_err(&client->dev, "cannot read low byte measurement");
 		return ret;
 	}
-	val |= ret >> 2;
+	val |= ret;
 
 	return val;
 }
@@ -211,18 +211,18 @@ static int hdc100x_read_raw(struct iio_dev *indio_dev,
 		return IIO_VAL_INT_PLUS_MICRO;
 	case IIO_CHAN_INFO_SCALE:
 		if (chan->type == IIO_TEMP) {
-			*val = 165;
-			*val2 = 65536 >> 2;
+			*val = 165000;
+			*val2 = 65536;
 			return IIO_VAL_FRACTIONAL;
 		} else {
-			*val = 0;
-			*val2 = 10000;
-			return IIO_VAL_INT_PLUS_MICRO;
+			*val = 100;
+			*val2 = 65536;
+			return IIO_VAL_FRACTIONAL;
 		}
 		break;
 	case IIO_CHAN_INFO_OFFSET:
-		*val = -3971;
-		*val2 = 879096;
+		*val = -15887;
+		*val2 = 515151;
 		return IIO_VAL_INT_PLUS_MICRO;
 	default:
 		return -EINVAL;
diff --git a/drivers/iio/imu/bmi160/bmi160_core.c b/drivers/iio/imu/bmi160/bmi160_core.c
index 0bf92b06d7d8..b8a290ec984e 100644
--- a/drivers/iio/imu/bmi160/bmi160_core.c
+++ b/drivers/iio/imu/bmi160/bmi160_core.c
@@ -209,11 +209,11 @@ static const struct  bmi160_scale_item bmi160_scale_table[] = {
 };
 
 static const struct bmi160_odr bmi160_accel_odr[] = {
-	{0x01, 0, 78125},
-	{0x02, 1, 5625},
-	{0x03, 3, 125},
-	{0x04, 6, 25},
-	{0x05, 12, 5},
+	{0x01, 0, 781250},
+	{0x02, 1, 562500},
+	{0x03, 3, 125000},
+	{0x04, 6, 250000},
+	{0x05, 12, 500000},
 	{0x06, 25, 0},
 	{0x07, 50, 0},
 	{0x08, 100, 0},
@@ -229,7 +229,7 @@ static const struct bmi160_odr bmi160_gyro_odr[] = {
 	{0x08, 100, 0},
 	{0x09, 200, 0},
 	{0x0A, 400, 0},
-	{0x0B, 8000, 0},
+	{0x0B, 800, 0},
 	{0x0C, 1600, 0},
 	{0x0D, 3200, 0},
 };
@@ -364,8 +364,8 @@ int bmi160_set_odr(struct bmi160_data *data, enum bmi160_sensor_type t,
 
 	return regmap_update_bits(data->regmap,
 				  bmi160_regs[t].config,
-				  bmi160_odr_table[t].tbl[i].bits,
-				  bmi160_regs[t].config_odr_mask);
+				  bmi160_regs[t].config_odr_mask,
+				  bmi160_odr_table[t].tbl[i].bits);
 }
 
 static int bmi160_get_odr(struct bmi160_data *data, enum bmi160_sensor_type t,
diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_acpi.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_acpi.c
index f62b8bd9ad7e..dd6fc6d21f9d 100644
--- a/drivers/iio/imu/inv_mpu6050/inv_mpu_acpi.c
+++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_acpi.c
@@ -56,6 +56,7 @@ static int asus_acpi_get_sensor_info(struct acpi_device *adev,
 	int i;
 	acpi_status status;
 	union acpi_object *cpm;
+	int ret;
 
 	status = acpi_evaluate_object(adev->handle, "CNF0", NULL, &buffer);
 	if (ACPI_FAILURE(status))
@@ -82,10 +83,10 @@ static int asus_acpi_get_sensor_info(struct acpi_device *adev,
 			}
 		}
 	}
-
+	ret = cpm->package.count;
 	kfree(buffer.pointer);
 
-	return cpm->package.count;
+	return ret;
 }
 
 static int acpi_i2c_check_resource(struct acpi_resource *ares, void *data)
diff --git a/drivers/iio/industrialio-trigger.c b/drivers/iio/industrialio-trigger.c
index ae2806aafb72..0c52dfe64977 100644
--- a/drivers/iio/industrialio-trigger.c
+++ b/drivers/iio/industrialio-trigger.c
@@ -210,22 +210,35 @@ static int iio_trigger_attach_poll_func(struct iio_trigger *trig,
 
 	/* Prevent the module from being removed whilst attached to a trigger */
 	__module_get(pf->indio_dev->info->driver_module);
+
+	/* Get irq number */
 	pf->irq = iio_trigger_get_irq(trig);
+	if (pf->irq < 0)
+		goto out_put_module;
+
+	/* Request irq */
 	ret = request_threaded_irq(pf->irq, pf->h, pf->thread,
 				   pf->type, pf->name,
 				   pf);
-	if (ret < 0) {
-		module_put(pf->indio_dev->info->driver_module);
-		return ret;
-	}
+	if (ret < 0)
+		goto out_put_irq;
 
+	/* Enable trigger in driver */
 	if (trig->ops && trig->ops->set_trigger_state && notinuse) {
 		ret = trig->ops->set_trigger_state(trig, true);
 		if (ret < 0)
-			module_put(pf->indio_dev->info->driver_module);
+			goto out_free_irq;
 	}
 
 	return ret;
+
+out_free_irq:
+	free_irq(pf->irq, pf);
+out_put_irq:
+	iio_trigger_put_irq(trig, pf->irq);
+out_put_module:
+	module_put(pf->indio_dev->info->driver_module);
+	return ret;
 }
 
 static int iio_trigger_detach_poll_func(struct iio_trigger *trig,
diff --git a/drivers/iio/light/apds9960.c b/drivers/iio/light/apds9960.c
index b4dbb3912977..651d57b8abbf 100644
--- a/drivers/iio/light/apds9960.c
+++ b/drivers/iio/light/apds9960.c
@@ -1011,6 +1011,7 @@ static int apds9960_probe(struct i2c_client *client,
 
 	iio_device_attach_buffer(indio_dev, buffer);
 
+	indio_dev->dev.parent = &client->dev;
 	indio_dev->info = &apds9960_info;
 	indio_dev->name = APDS9960_DRV_NAME;
 	indio_dev->channels = apds9960_channels;
diff --git a/drivers/iio/light/bh1780.c b/drivers/iio/light/bh1780.c
index 72b364e4aa72..b54dcba05a82 100644
--- a/drivers/iio/light/bh1780.c
+++ b/drivers/iio/light/bh1780.c
@@ -84,7 +84,7 @@ static int bh1780_debugfs_reg_access(struct iio_dev *indio_dev,
 	int ret;
 
 	if (!readval)
-		bh1780_write(bh1780, (u8)reg, (u8)writeval);
+		return bh1780_write(bh1780, (u8)reg, (u8)writeval);
 
 	ret = bh1780_read(bh1780, (u8)reg);
 	if (ret < 0)
@@ -187,7 +187,7 @@ static int bh1780_probe(struct i2c_client *client,
 
 	indio_dev->dev.parent = &client->dev;
 	indio_dev->info = &bh1780_info;
-	indio_dev->name = id->name;
+	indio_dev->name = "bh1780";
 	indio_dev->channels = bh1780_channels;
 	indio_dev->num_channels = ARRAY_SIZE(bh1780_channels);
 	indio_dev->modes = INDIO_DIRECT_MODE;
@@ -226,7 +226,8 @@ static int bh1780_remove(struct i2c_client *client)
 static int bh1780_runtime_suspend(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
-	struct bh1780_data *bh1780 = i2c_get_clientdata(client);
+	struct iio_dev *indio_dev = i2c_get_clientdata(client);
+	struct bh1780_data *bh1780 = iio_priv(indio_dev);
 	int ret;
 
 	ret = bh1780_write(bh1780, BH1780_REG_CONTROL, BH1780_POFF);
@@ -241,7 +242,8 @@ static int bh1780_runtime_suspend(struct device *dev)
 static int bh1780_runtime_resume(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
-	struct bh1780_data *bh1780 = i2c_get_clientdata(client);
+	struct iio_dev *indio_dev = i2c_get_clientdata(client);
+	struct bh1780_data *bh1780 = iio_priv(indio_dev);
 	int ret;
 
 	ret = bh1780_write(bh1780, BH1780_REG_CONTROL, BH1780_PON);
diff --git a/drivers/iio/light/max44000.c b/drivers/iio/light/max44000.c
index e01e58a9bd14..f17cb2ea18f5 100644
--- a/drivers/iio/light/max44000.c
+++ b/drivers/iio/light/max44000.c
@@ -147,7 +147,6 @@ static const struct iio_chan_spec max44000_channels[] = {
 	{
 		.type = IIO_PROXIMITY,
 		.info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
-		.info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE),
 		.scan_index = MAX44000_SCAN_INDEX_PRX,
 		.scan_type = {
 			.sign		= 'u',
diff --git a/drivers/iio/magnetometer/st_magn_buffer.c b/drivers/iio/magnetometer/st_magn_buffer.c
index ecd3bd0a9769..0a9e8fadfa9d 100644
--- a/drivers/iio/magnetometer/st_magn_buffer.c
+++ b/drivers/iio/magnetometer/st_magn_buffer.c
@@ -82,7 +82,7 @@ static const struct iio_buffer_setup_ops st_magn_buffer_setup_ops = {
 
 int st_magn_allocate_ring(struct iio_dev *indio_dev)
 {
-	return iio_triggered_buffer_setup(indio_dev, &iio_pollfunc_store_time,
+	return iio_triggered_buffer_setup(indio_dev, NULL,
 		&st_sensors_trigger_handler, &st_magn_buffer_setup_ops);
 }
 
diff --git a/drivers/iio/magnetometer/st_magn_core.c b/drivers/iio/magnetometer/st_magn_core.c
index 62036d2a9956..8250fc322c56 100644
--- a/drivers/iio/magnetometer/st_magn_core.c
+++ b/drivers/iio/magnetometer/st_magn_core.c
@@ -572,6 +572,7 @@ static const struct iio_info magn_info = {
 static const struct iio_trigger_ops st_magn_trigger_ops = {
 	.owner = THIS_MODULE,
 	.set_trigger_state = ST_MAGN_TRIGGER_SET_STATE,
+	.validate_device = st_sensors_validate_device,
 };
 #define ST_MAGN_TRIGGER_OPS (&st_magn_trigger_ops)
 #else
diff --git a/drivers/iio/pressure/bmp280.c b/drivers/iio/pressure/bmp280.c
index 2f1498e12bb2..724452d61846 100644
--- a/drivers/iio/pressure/bmp280.c
+++ b/drivers/iio/pressure/bmp280.c
@@ -879,8 +879,8 @@ static int bmp280_probe(struct i2c_client *client,
 	if (ret < 0)
 		return ret;
 	if (chip_id != id->driver_data) {
-		dev_err(&client->dev, "bad chip id.  expected %x got %x\n",
-			BMP280_CHIP_ID, chip_id);
+		dev_err(&client->dev, "bad chip id.  expected %lx got %x\n",
+			id->driver_data, chip_id);
 		return -EINVAL;
 	}
 
diff --git a/drivers/iio/pressure/st_pressure_buffer.c b/drivers/iio/pressure/st_pressure_buffer.c
index 2ff53f222352..99468d0a64e7 100644
--- a/drivers/iio/pressure/st_pressure_buffer.c
+++ b/drivers/iio/pressure/st_pressure_buffer.c
@@ -82,7 +82,7 @@ static const struct iio_buffer_setup_ops st_press_buffer_setup_ops = {
 
 int st_press_allocate_ring(struct iio_dev *indio_dev)
 {
-	return iio_triggered_buffer_setup(indio_dev, &iio_pollfunc_store_time,
+	return iio_triggered_buffer_setup(indio_dev, NULL,
 		&st_sensors_trigger_handler, &st_press_buffer_setup_ops);
 }
 
diff --git a/drivers/iio/pressure/st_pressure_core.c b/drivers/iio/pressure/st_pressure_core.c
index 9e9b72a8f18f..92a118c3c4ac 100644
--- a/drivers/iio/pressure/st_pressure_core.c
+++ b/drivers/iio/pressure/st_pressure_core.c
@@ -28,15 +28,21 @@
 #include <linux/iio/common/st_sensors.h>
 #include "st_pressure.h"
 
+#define MCELSIUS_PER_CELSIUS			1000
+
+/* Default pressure sensitivity */
 #define ST_PRESS_LSB_PER_MBAR			4096UL
 #define ST_PRESS_KPASCAL_NANO_SCALE		(100000000UL / \
 						 ST_PRESS_LSB_PER_MBAR)
+
+/* Default temperature sensitivity */
 #define ST_PRESS_LSB_PER_CELSIUS		480UL
-#define ST_PRESS_CELSIUS_NANO_SCALE		(1000000000UL / \
-						 ST_PRESS_LSB_PER_CELSIUS)
+#define ST_PRESS_MILLI_CELSIUS_OFFSET		42500UL
+
 #define ST_PRESS_NUMBER_DATA_CHANNELS		1
 
 /* FULLSCALE */
+#define ST_PRESS_FS_AVL_1100MB			1100
 #define ST_PRESS_FS_AVL_1260MB			1260
 
 #define ST_PRESS_1_OUT_XL_ADDR			0x28
@@ -54,9 +60,6 @@
 #define ST_PRESS_LPS331AP_PW_MASK		0x80
 #define ST_PRESS_LPS331AP_FS_ADDR		0x23
 #define ST_PRESS_LPS331AP_FS_MASK		0x30
-#define ST_PRESS_LPS331AP_FS_AVL_1260_VAL	0x00
-#define ST_PRESS_LPS331AP_FS_AVL_1260_GAIN	ST_PRESS_KPASCAL_NANO_SCALE
-#define ST_PRESS_LPS331AP_FS_AVL_TEMP_GAIN	ST_PRESS_CELSIUS_NANO_SCALE
 #define ST_PRESS_LPS331AP_BDU_ADDR		0x20
 #define ST_PRESS_LPS331AP_BDU_MASK		0x04
 #define ST_PRESS_LPS331AP_DRDY_IRQ_ADDR		0x22
@@ -67,9 +70,14 @@
 #define ST_PRESS_LPS331AP_OD_IRQ_ADDR		0x22
 #define ST_PRESS_LPS331AP_OD_IRQ_MASK		0x40
 #define ST_PRESS_LPS331AP_MULTIREAD_BIT		true
-#define ST_PRESS_LPS331AP_TEMP_OFFSET		42500
 
 /* CUSTOM VALUES FOR LPS001WP SENSOR */
+
+/* LPS001WP pressure resolution */
+#define ST_PRESS_LPS001WP_LSB_PER_MBAR		16UL
+/* LPS001WP temperature resolution */
+#define ST_PRESS_LPS001WP_LSB_PER_CELSIUS	64UL
+
 #define ST_PRESS_LPS001WP_WAI_EXP		0xba
 #define ST_PRESS_LPS001WP_ODR_ADDR		0x20
 #define ST_PRESS_LPS001WP_ODR_MASK		0x30
@@ -78,6 +86,8 @@
 #define ST_PRESS_LPS001WP_ODR_AVL_13HZ_VAL	0x03
 #define ST_PRESS_LPS001WP_PW_ADDR		0x20
 #define ST_PRESS_LPS001WP_PW_MASK		0x40
+#define ST_PRESS_LPS001WP_FS_AVL_PRESS_GAIN \
+	(100000000UL / ST_PRESS_LPS001WP_LSB_PER_MBAR)
 #define ST_PRESS_LPS001WP_BDU_ADDR		0x20
 #define ST_PRESS_LPS001WP_BDU_MASK		0x04
 #define ST_PRESS_LPS001WP_MULTIREAD_BIT		true
@@ -94,11 +104,6 @@
 #define ST_PRESS_LPS25H_ODR_AVL_25HZ_VAL	0x04
 #define ST_PRESS_LPS25H_PW_ADDR			0x20
 #define ST_PRESS_LPS25H_PW_MASK			0x80
-#define ST_PRESS_LPS25H_FS_ADDR			0x00
-#define ST_PRESS_LPS25H_FS_MASK			0x00
-#define ST_PRESS_LPS25H_FS_AVL_1260_VAL		0x00
-#define ST_PRESS_LPS25H_FS_AVL_1260_GAIN	ST_PRESS_KPASCAL_NANO_SCALE
-#define ST_PRESS_LPS25H_FS_AVL_TEMP_GAIN	ST_PRESS_CELSIUS_NANO_SCALE
 #define ST_PRESS_LPS25H_BDU_ADDR		0x20
 #define ST_PRESS_LPS25H_BDU_MASK		0x04
 #define ST_PRESS_LPS25H_DRDY_IRQ_ADDR		0x23
@@ -109,7 +114,6 @@
 #define ST_PRESS_LPS25H_OD_IRQ_ADDR		0x22
 #define ST_PRESS_LPS25H_OD_IRQ_MASK		0x40
 #define ST_PRESS_LPS25H_MULTIREAD_BIT		true
-#define ST_PRESS_LPS25H_TEMP_OFFSET		42500
 #define ST_PRESS_LPS25H_OUT_XL_ADDR		0x28
 #define ST_TEMP_LPS25H_OUT_L_ADDR		0x2b
 
@@ -161,7 +165,9 @@ static const struct iio_chan_spec st_press_lps001wp_channels[] = {
 			.storagebits = 16,
 			.endianness = IIO_LE,
 		},
-		.info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
+		.info_mask_separate =
+			BIT(IIO_CHAN_INFO_RAW) |
+			BIT(IIO_CHAN_INFO_SCALE),
 		.modified = 0,
 	},
 	{
@@ -177,7 +183,7 @@ static const struct iio_chan_spec st_press_lps001wp_channels[] = {
 		},
 		.info_mask_separate =
 			BIT(IIO_CHAN_INFO_RAW) |
-			BIT(IIO_CHAN_INFO_OFFSET),
+			BIT(IIO_CHAN_INFO_SCALE),
 		.modified = 0,
 	},
 	IIO_CHAN_SOFT_TIMESTAMP(1)
@@ -212,11 +218,14 @@ static const struct st_sensor_settings st_press_sensors_settings[] = {
 			.addr = ST_PRESS_LPS331AP_FS_ADDR,
 			.mask = ST_PRESS_LPS331AP_FS_MASK,
 			.fs_avl = {
+				/*
+				 * Pressure and temperature sensitivity values
+				 * as defined in table 3 of LPS331AP datasheet.
+				 */
 				[0] = {
 					.num = ST_PRESS_FS_AVL_1260MB,
-					.value = ST_PRESS_LPS331AP_FS_AVL_1260_VAL,
-					.gain = ST_PRESS_LPS331AP_FS_AVL_1260_GAIN,
-					.gain2 = ST_PRESS_LPS331AP_FS_AVL_TEMP_GAIN,
+					.gain = ST_PRESS_KPASCAL_NANO_SCALE,
+					.gain2 = ST_PRESS_LSB_PER_CELSIUS,
 				},
 			},
 		},
@@ -261,7 +270,17 @@ static const struct st_sensor_settings st_press_sensors_settings[] = {
 			.value_off = ST_SENSORS_DEFAULT_POWER_OFF_VALUE,
 		},
 		.fs = {
-			.addr = 0,
+			.fs_avl = {
+				/*
+				 * Pressure and temperature resolution values
+				 * as defined in table 3 of LPS001WP datasheet.
+				 */
+				[0] = {
+					.num = ST_PRESS_FS_AVL_1100MB,
+					.gain = ST_PRESS_LPS001WP_FS_AVL_PRESS_GAIN,
+					.gain2 = ST_PRESS_LPS001WP_LSB_PER_CELSIUS,
+				},
+			},
 		},
 		.bdu = {
 			.addr = ST_PRESS_LPS001WP_BDU_ADDR,
@@ -298,14 +317,15 @@ static const struct st_sensor_settings st_press_sensors_settings[] = {
 			.value_off = ST_SENSORS_DEFAULT_POWER_OFF_VALUE,
 		},
 		.fs = {
-			.addr = ST_PRESS_LPS25H_FS_ADDR,
-			.mask = ST_PRESS_LPS25H_FS_MASK,
 			.fs_avl = {
+				/*
+				 * Pressure and temperature sensitivity values
+				 * as defined in table 3 of LPS25H datasheet.
+				 */
 				[0] = {
 					.num = ST_PRESS_FS_AVL_1260MB,
-					.value = ST_PRESS_LPS25H_FS_AVL_1260_VAL,
-					.gain = ST_PRESS_LPS25H_FS_AVL_1260_GAIN,
-					.gain2 = ST_PRESS_LPS25H_FS_AVL_TEMP_GAIN,
+					.gain = ST_PRESS_KPASCAL_NANO_SCALE,
+					.gain2 = ST_PRESS_LSB_PER_CELSIUS,
 				},
 			},
 		},
@@ -364,26 +384,26 @@ static int st_press_read_raw(struct iio_dev *indio_dev,
 
 		return IIO_VAL_INT;
 	case IIO_CHAN_INFO_SCALE:
-		*val = 0;
-
 		switch (ch->type) {
 		case IIO_PRESSURE:
+			*val = 0;
 			*val2 = press_data->current_fullscale->gain;
-			break;
+			return IIO_VAL_INT_PLUS_NANO;
 		case IIO_TEMP:
+			*val = MCELSIUS_PER_CELSIUS;
 			*val2 = press_data->current_fullscale->gain2;
-			break;
+			return IIO_VAL_FRACTIONAL;
 		default:
 			err = -EINVAL;
 			goto read_error;
 		}
 
-		return IIO_VAL_INT_PLUS_NANO;
 	case IIO_CHAN_INFO_OFFSET:
 		switch (ch->type) {
 		case IIO_TEMP:
-			*val = 425;
-			*val2 = 10;
+			*val = ST_PRESS_MILLI_CELSIUS_OFFSET *
+			       press_data->current_fullscale->gain2;
+			*val2 = MCELSIUS_PER_CELSIUS;
 			break;
 		default:
 			err = -EINVAL;
@@ -425,6 +445,7 @@ static const struct iio_info press_info = {
 static const struct iio_trigger_ops st_press_trigger_ops = {
 	.owner = THIS_MODULE,
 	.set_trigger_state = ST_PRESS_TRIGGER_SET_STATE,
+	.validate_device = st_sensors_validate_device,
 };
 #define ST_PRESS_TRIGGER_OPS (&st_press_trigger_ops)
 #else
diff --git a/drivers/iio/proximity/as3935.c b/drivers/iio/proximity/as3935.c
index f4d29d5dbd5f..e2f926cdcad2 100644
--- a/drivers/iio/proximity/as3935.c
+++ b/drivers/iio/proximity/as3935.c
@@ -64,6 +64,7 @@ struct as3935_state {
 	struct delayed_work work;
 
 	u32 tune_cap;
+	u8 buffer[16]; /* 8-bit data + 56-bit padding + 64-bit timestamp */
 	u8 buf[2] ____cacheline_aligned;
 };
 
@@ -72,7 +73,8 @@ static const struct iio_chan_spec as3935_channels[] = {
 		.type           = IIO_PROXIMITY,
 		.info_mask_separate =
 			BIT(IIO_CHAN_INFO_RAW) |
-			BIT(IIO_CHAN_INFO_PROCESSED),
+			BIT(IIO_CHAN_INFO_PROCESSED) |
+			BIT(IIO_CHAN_INFO_SCALE),
 		.scan_index     = 0,
 		.scan_type = {
 			.sign           = 'u',
@@ -181,7 +183,12 @@ static int as3935_read_raw(struct iio_dev *indio_dev,
 		/* storm out of range */
 		if (*val == AS3935_DATA_MASK)
 			return -EINVAL;
-		*val *= 1000;
+
+		if (m == IIO_CHAN_INFO_PROCESSED)
+			*val *= 1000;
+		break;
+	case IIO_CHAN_INFO_SCALE:
+		*val = 1000;
 		break;
 	default:
 		return -EINVAL;
@@ -206,10 +213,10 @@ static irqreturn_t as3935_trigger_handler(int irq, void *private)
 	ret = as3935_read(st, AS3935_DATA, &val);
 	if (ret)
 		goto err_read;
-	val &= AS3935_DATA_MASK;
-	val *= 1000;
 
-	iio_push_to_buffers_with_timestamp(indio_dev, &val, pf->timestamp);
+	st->buffer[0] = val & AS3935_DATA_MASK;
+	iio_push_to_buffers_with_timestamp(indio_dev, &st->buffer,
+					   pf->timestamp);
 err_read:
 	iio_trigger_notify_done(indio_dev->trig);
 
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 040966775f40..1a2984c28b95 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -411,7 +411,9 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
 
 	for (ix = 0; ix < table->sz; ix++)
 		if (table->data_vec[ix].attr.ndev == ndev)
-			if (!del_gid(ib_dev, port, table, ix, false))
+			if (!del_gid(ib_dev, port, table, ix,
+				     !!(table->data_vec[ix].props &
+					GID_TABLE_ENTRY_DEFAULT)))
 				deleted = true;
 
 	write_unlock_irq(&table->rwlock);
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index f0c91ba3178a..ad1b1adcf6f0 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -708,17 +708,6 @@ static void cma_deref_id(struct rdma_id_private *id_priv)
 		complete(&id_priv->comp);
 }
 
-static int cma_disable_callback(struct rdma_id_private *id_priv,
-				enum rdma_cm_state state)
-{
-	mutex_lock(&id_priv->handler_mutex);
-	if (id_priv->state != state) {
-		mutex_unlock(&id_priv->handler_mutex);
-		return -EINVAL;
-	}
-	return 0;
-}
-
 struct rdma_cm_id *rdma_create_id(struct net *net,
 				  rdma_cm_event_handler event_handler,
 				  void *context, enum rdma_port_space ps,
@@ -1671,11 +1660,12 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 	struct rdma_cm_event event;
 	int ret = 0;
 
+	mutex_lock(&id_priv->handler_mutex);
 	if ((ib_event->event != IB_CM_TIMEWAIT_EXIT &&
-		cma_disable_callback(id_priv, RDMA_CM_CONNECT)) ||
+	     id_priv->state != RDMA_CM_CONNECT) ||
 	    (ib_event->event == IB_CM_TIMEWAIT_EXIT &&
-		cma_disable_callback(id_priv, RDMA_CM_DISCONNECT)))
-		return 0;
+	     id_priv->state != RDMA_CM_DISCONNECT))
+		goto out;
 
 	memset(&event, 0, sizeof event);
 	switch (ib_event->event) {
@@ -1870,7 +1860,7 @@ static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_e
 
 static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 {
-	struct rdma_id_private *listen_id, *conn_id;
+	struct rdma_id_private *listen_id, *conn_id = NULL;
 	struct rdma_cm_event event;
 	struct net_device *net_dev;
 	int offset, ret;
@@ -1884,9 +1874,10 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 		goto net_dev_put;
 	}
 
-	if (cma_disable_callback(listen_id, RDMA_CM_LISTEN)) {
+	mutex_lock(&listen_id->handler_mutex);
+	if (listen_id->state != RDMA_CM_LISTEN) {
 		ret = -ECONNABORTED;
-		goto net_dev_put;
+		goto err1;
 	}
 
 	memset(&event, 0, sizeof event);
@@ -1976,8 +1967,9 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
 	struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr;
 	struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
 
-	if (cma_disable_callback(id_priv, RDMA_CM_CONNECT))
-		return 0;
+	mutex_lock(&id_priv->handler_mutex);
+	if (id_priv->state != RDMA_CM_CONNECT)
+		goto out;
 
 	memset(&event, 0, sizeof event);
 	switch (iw_event->event) {
@@ -2029,6 +2021,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
 		return ret;
 	}
 
+out:
 	mutex_unlock(&id_priv->handler_mutex);
 	return ret;
 }
@@ -2039,13 +2032,15 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
 	struct rdma_cm_id *new_cm_id;
 	struct rdma_id_private *listen_id, *conn_id;
 	struct rdma_cm_event event;
-	int ret;
+	int ret = -ECONNABORTED;
 	struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr;
 	struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
 
 	listen_id = cm_id->context;
-	if (cma_disable_callback(listen_id, RDMA_CM_LISTEN))
-		return -ECONNABORTED;
+
+	mutex_lock(&listen_id->handler_mutex);
+	if (listen_id->state != RDMA_CM_LISTEN)
+		goto out;
 
 	/* Create a new RDMA id for the new IW CM ID */
 	new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net,
@@ -3216,8 +3211,9 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
 	struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd;
 	int ret = 0;
 
-	if (cma_disable_callback(id_priv, RDMA_CM_CONNECT))
-		return 0;
+	mutex_lock(&id_priv->handler_mutex);
+	if (id_priv->state != RDMA_CM_CONNECT)
+		goto out;
 
 	memset(&event, 0, sizeof event);
 	switch (ib_event->event) {
@@ -3673,12 +3669,13 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
 	struct rdma_id_private *id_priv;
 	struct cma_multicast *mc = multicast->context;
 	struct rdma_cm_event event;
-	int ret;
+	int ret = 0;
 
 	id_priv = mc->id_priv;
-	if (cma_disable_callback(id_priv, RDMA_CM_ADDR_BOUND) &&
-	    cma_disable_callback(id_priv, RDMA_CM_ADDR_RESOLVED))
-		return 0;
+	mutex_lock(&id_priv->handler_mutex);
+	if (id_priv->state != RDMA_CM_ADDR_BOUND &&
+	    id_priv->state != RDMA_CM_ADDR_RESOLVED)
+		goto out;
 
 	if (!status)
 		status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey));
@@ -3720,6 +3717,7 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
 		return 0;
 	}
 
+out:
 	mutex_unlock(&id_priv->handler_mutex);
 	return 0;
 }
@@ -3878,12 +3876,12 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
 	gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
 		   rdma_start_port(id_priv->cma_dev->device)];
 	if (addr->sa_family == AF_INET) {
-		if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+		if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
+			mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
 			err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid,
 					    true);
-		if (!err) {
-			mc->igmp_joined = true;
-			mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
+			if (!err)
+				mc->igmp_joined = true;
 		}
 	} else {
 		if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 1a8babb8ee3c..825021d1008b 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1747,7 +1747,7 @@ static int create_qp(struct ib_uverbs_file *file,
 	struct ib_srq			*srq = NULL;
 	struct ib_qp			*qp;
 	char				*buf;
-	struct ib_qp_init_attr		attr;
+	struct ib_qp_init_attr		attr = {};
 	struct ib_uverbs_ex_create_qp_resp resp;
 	int				ret;
 
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 1d7d4cf442e3..6298f54b4137 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -511,12 +511,16 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
 		ah_attr->grh.dgid = sgid;
 
 		if (!rdma_cap_eth_ah(device, port_num)) {
-			ret = ib_find_cached_gid_by_port(device, &dgid,
-							 IB_GID_TYPE_IB,
-							 port_num, NULL,
-							 &gid_index);
-			if (ret)
-				return ret;
+			if (dgid.global.interface_id != cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) {
+				ret = ib_find_cached_gid_by_port(device, &dgid,
+								 IB_GID_TYPE_IB,
+								 port_num, NULL,
+								 &gid_index);
+				if (ret)
+					return ret;
+			} else {
+				gid_index = 0;
+			}
 		}
 
 		ah_attr->grh.sgid_index = (u8) gid_index;
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 81619fbb5842..f5de85178055 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -1037,7 +1037,7 @@ static void dc_shutdown(struct hfi1_devdata *);
 static void dc_start(struct hfi1_devdata *);
 static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp,
 			   unsigned int *np);
-static void remove_full_mgmt_pkey(struct hfi1_pportdata *ppd);
+static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd);
 
 /*
  * Error interrupt table entry.  This is used as input to the interrupt
@@ -6962,8 +6962,6 @@ void handle_link_down(struct work_struct *work)
 	}
 
 	reset_neighbor_info(ppd);
-	if (ppd->mgmt_allowed)
-		remove_full_mgmt_pkey(ppd);
 
 	/* disable the port */
 	clear_rcvctrl(ppd->dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
@@ -7070,12 +7068,16 @@ static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd)
 			    __func__, ppd->pkeys[2], FULL_MGMT_P_KEY);
 	ppd->pkeys[2] = FULL_MGMT_P_KEY;
 	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
+	hfi1_event_pkey_change(ppd->dd, ppd->port);
 }
 
-static void remove_full_mgmt_pkey(struct hfi1_pportdata *ppd)
+static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd)
 {
-	ppd->pkeys[2] = 0;
-	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
+	if (ppd->pkeys[2] != 0) {
+		ppd->pkeys[2] = 0;
+		(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
+		hfi1_event_pkey_change(ppd->dd, ppd->port);
+	}
 }
 
 /*
@@ -9168,6 +9170,13 @@ int start_link(struct hfi1_pportdata *ppd)
 		return 0;
 	}
 
+	/*
+	 * FULL_MGMT_P_KEY is cleared from the pkey table, so that the
+	 * pkey table can be configured properly if the HFI unit is connected
+	 * to switch port with MgmtAllowed=NO
+	 */
+	clear_full_mgmt_pkey(ppd);
+
 	return set_link_state(ppd, HLS_DN_POLL);
 }
 
@@ -9777,7 +9786,7 @@ static void set_send_length(struct hfi1_pportdata *ppd)
 	u64 len1 = 0, len2 = (((dd->vld[15].mtu + max_hb) >> 2)
 			      & SEND_LEN_CHECK1_LEN_VL15_MASK) <<
 		SEND_LEN_CHECK1_LEN_VL15_SHIFT;
-	int i;
+	int i, j;
 	u32 thres;
 
 	for (i = 0; i < ppd->vls_supported; i++) {
@@ -9801,7 +9810,10 @@ static void set_send_length(struct hfi1_pportdata *ppd)
 			    sc_mtu_to_threshold(dd->vld[i].sc,
 						dd->vld[i].mtu,
 						dd->rcd[0]->rcvhdrqentsize));
-		sc_set_cr_threshold(dd->vld[i].sc, thres);
+		for (j = 0; j < INIT_SC_PER_VL; j++)
+			sc_set_cr_threshold(
+					pio_select_send_context_vl(dd, j, i),
+					    thres);
 	}
 	thres = min(sc_percent_to_threshold(dd->vld[15].sc, 50),
 		    sc_mtu_to_threshold(dd->vld[15].sc,
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index 7a5b0e676cc7..c702a009608f 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -203,6 +203,9 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
 
 	switch (cmd) {
 	case HFI1_IOCTL_ASSIGN_CTXT:
+		if (uctxt)
+			return -EINVAL;
+
 		if (copy_from_user(&uinfo,
 				   (struct hfi1_user_info __user *)arg,
 				   sizeof(uinfo)))
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 0d28a5a40fae..eed971ccd2a1 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -1383,7 +1383,7 @@ static void postinit_cleanup(struct hfi1_devdata *dd)
 static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	int ret = 0, j, pidx, initfail;
-	struct hfi1_devdata *dd = NULL;
+	struct hfi1_devdata *dd = ERR_PTR(-EINVAL);
 	struct hfi1_pportdata *ppd;
 
 	/* First, lock the non-writable module parameters */
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index 219029576ba0..fca07a1d6c28 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -78,6 +78,16 @@ static inline void clear_opa_smp_data(struct opa_smp *smp)
 	memset(data, 0, size);
 }
 
+void hfi1_event_pkey_change(struct hfi1_devdata *dd, u8 port)
+{
+	struct ib_event event;
+
+	event.event = IB_EVENT_PKEY_CHANGE;
+	event.device = &dd->verbs_dev.rdi.ibdev;
+	event.element.port_num = port;
+	ib_dispatch_event(&event);
+}
+
 static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len)
 {
 	struct ib_mad_send_buf *send_buf;
@@ -1418,15 +1428,10 @@ static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
 	}
 
 	if (changed) {
-		struct ib_event event;
-
 		(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
-
-		event.event = IB_EVENT_PKEY_CHANGE;
-		event.device = &dd->verbs_dev.rdi.ibdev;
-		event.element.port_num = port;
-		ib_dispatch_event(&event);
+		hfi1_event_pkey_change(dd, port);
 	}
+
 	return 0;
 }
 
diff --git a/drivers/infiniband/hw/hfi1/mad.h b/drivers/infiniband/hw/hfi1/mad.h
index 55ee08675333..8b734aaae88a 100644
--- a/drivers/infiniband/hw/hfi1/mad.h
+++ b/drivers/infiniband/hw/hfi1/mad.h
@@ -434,4 +434,6 @@ struct sc2vlnt {
 		    COUNTER_MASK(1, 3) | \
 		    COUNTER_MASK(1, 4))
 
+void hfi1_event_pkey_change(struct hfi1_devdata *dd, u8 port);
+
 #endif				/* _HFI1_MAD_H */
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index d5edb1afbb8f..d4022450b73f 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -995,7 +995,7 @@ static void sc_wait_for_packet_egress(struct send_context *sc, int pause)
 		/* counter is reset if occupancy count changes */
 		if (reg != reg_prev)
 			loop = 0;
-		if (loop > 500) {
+		if (loop > 50000) {
 			/* timed out - bounce the link */
 			dd_dev_err(dd,
 				   "%s: context %u(%u) timeout waiting for packets to egress, remaining count %u, bouncing link\n",
@@ -1798,6 +1798,21 @@ static void pio_map_rcu_callback(struct rcu_head *list)
 }
 
 /*
+ * Set credit return threshold for the kernel send context
+ */
+static void set_threshold(struct hfi1_devdata *dd, int scontext, int i)
+{
+	u32 thres;
+
+	thres = min(sc_percent_to_threshold(dd->kernel_send_context[scontext],
+					    50),
+		    sc_mtu_to_threshold(dd->kernel_send_context[scontext],
+					dd->vld[i].mtu,
+					dd->rcd[0]->rcvhdrqentsize));
+	sc_set_cr_threshold(dd->kernel_send_context[scontext], thres);
+}
+
+/*
  * pio_map_init - called when #vls change
  * @dd: hfi1_devdata
  * @port: port number
@@ -1872,11 +1887,16 @@ int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_scontexts)
 			if (!newmap->map[i])
 				goto bail;
 			newmap->map[i]->mask = (1 << ilog2(sz)) - 1;
-			/* assign send contexts */
+			/*
+			 * assign send contexts and
+			 * adjust credit return threshold
+			 */
 			for (j = 0; j < sz; j++) {
-				if (dd->kernel_send_context[scontext])
+				if (dd->kernel_send_context[scontext]) {
 					newmap->map[i]->ksc[j] =
 					dd->kernel_send_context[scontext];
+					set_threshold(dd, scontext, i);
+				}
 				if (++scontext >= first_scontext +
 						  vl_scontexts[i])
 					/* wrap back to first send context */
diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c
index 2441669f0817..9fb561682c66 100644
--- a/drivers/infiniband/hw/hfi1/qsfp.c
+++ b/drivers/infiniband/hw/hfi1/qsfp.c
@@ -579,7 +579,8 @@ int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len)
 
 	if (ppd->qsfp_info.cache_valid) {
 		if (QSFP_IS_CU(cache[QSFP_MOD_TECH_OFFS]))
-			sprintf(lenstr, "%dM ", cache[QSFP_MOD_LEN_OFFS]);
+			snprintf(lenstr, sizeof(lenstr), "%dM ",
+				 cache[QSFP_MOD_LEN_OFFS]);
 
 		power_byte = cache[QSFP_MOD_PWR_OFFS];
 		sofar += scnprintf(buf + sofar, len - sofar, "PWR:%.3sW\n",
diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.c b/drivers/infiniband/hw/hfi1/verbs_txreq.c
index bc95c4112c61..d8fb056526f8 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.c
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.c
@@ -92,11 +92,10 @@ void hfi1_put_txreq(struct verbs_txreq *tx)
 
 struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
 				struct rvt_qp *qp)
+	__must_hold(&qp->s_lock)
 {
 	struct verbs_txreq *tx = ERR_PTR(-EBUSY);
-	unsigned long flags;
 
-	spin_lock_irqsave(&qp->s_lock, flags);
 	write_seqlock(&dev->iowait_lock);
 	if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
 		struct hfi1_qp_priv *priv;
@@ -116,7 +115,6 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
 	}
 out:
 	write_sequnlock(&dev->iowait_lock);
-	spin_unlock_irqrestore(&qp->s_lock, flags);
 	return tx;
 }
 
diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h
index 1cf69b2fe4a5..a1d6e0807f97 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.h
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h
@@ -73,6 +73,7 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
 
 static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev,
 					    struct rvt_qp *qp)
+	__must_hold(&qp->slock)
 {
 	struct verbs_txreq *tx;
 	struct hfi1_qp_priv *priv = qp->priv;
diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h
index 8b9532034558..b738acdb9b02 100644
--- a/drivers/infiniband/hw/i40iw/i40iw.h
+++ b/drivers/infiniband/hw/i40iw/i40iw.h
@@ -113,6 +113,8 @@
 
 #define IW_HMC_OBJ_TYPE_NUM ARRAY_SIZE(iw_hmc_obj_types)
 #define IW_CFG_FPM_QP_COUNT		32768
+#define I40IW_MAX_PAGES_PER_FMR		512
+#define I40IW_MIN_PAGES_PER_FMR		1
 
 #define I40IW_MTU_TO_MSS		40
 #define I40IW_DEFAULT_MSS		1460
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 02a735b64208..33959ed14563 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -79,6 +79,7 @@ static int i40iw_query_device(struct ib_device *ibdev,
 	props->max_qp_init_rd_atom = props->max_qp_rd_atom;
 	props->atomic_cap = IB_ATOMIC_NONE;
 	props->max_map_per_fmr = 1;
+	props->max_fast_reg_page_list_len = I40IW_MAX_PAGES_PER_FMR;
 	return 0;
 }
 
@@ -1527,7 +1528,7 @@ static struct ib_mr *i40iw_alloc_mr(struct ib_pd *pd,
 	mutex_lock(&iwdev->pbl_mutex);
 	status = i40iw_get_pble(&iwdev->sc_dev, iwdev->pble_rsrc, palloc, iwmr->page_cnt);
 	mutex_unlock(&iwdev->pbl_mutex);
-	if (!status)
+	if (status)
 		goto err1;
 
 	if (palloc->level != I40IW_LEVEL_1)
@@ -2149,6 +2150,7 @@ static int i40iw_post_send(struct ib_qp *ibqp,
 			struct i40iw_sc_dev *dev = &iwqp->iwdev->sc_dev;
 			struct i40iw_fast_reg_stag_info info;
 
+			memset(&info, 0, sizeof(info));
 			info.access_rights = I40IW_ACCESS_FLAGS_LOCALREAD;
 			info.access_rights |= i40iw_get_user_access(flags);
 			info.stag_key = reg_wr(ib_wr)->key & 0xff;
@@ -2158,10 +2160,14 @@ static int i40iw_post_send(struct ib_qp *ibqp,
 			info.addr_type = I40IW_ADDR_TYPE_VA_BASED;
 			info.va = (void *)(uintptr_t)iwmr->ibmr.iova;
 			info.total_len = iwmr->ibmr.length;
+			info.reg_addr_pa = *(u64 *)palloc->level1.addr;
 			info.first_pm_pbl_index = palloc->level1.idx;
 			info.local_fence = ib_wr->send_flags & IB_SEND_FENCE;
 			info.signaled = ib_wr->send_flags & IB_SEND_SIGNALED;
 
+			if (iwmr->npages > I40IW_MIN_PAGES_PER_FMR)
+				info.chunk_size = 1;
+
 			if (page_shift == 21)
 				info.page_size = 1; /* 2M page */
 
@@ -2327,13 +2333,16 @@ static int i40iw_req_notify_cq(struct ib_cq *ibcq,
 {
 	struct i40iw_cq *iwcq;
 	struct i40iw_cq_uk *ukcq;
-	enum i40iw_completion_notify cq_notify = IW_CQ_COMPL_SOLICITED;
+	unsigned long flags;
+	enum i40iw_completion_notify cq_notify = IW_CQ_COMPL_EVENT;
 
 	iwcq = (struct i40iw_cq *)ibcq;
 	ukcq = &iwcq->sc_cq.cq_uk;
-	if (notify_flags == IB_CQ_NEXT_COMP)
-		cq_notify = IW_CQ_COMPL_EVENT;
+	if (notify_flags == IB_CQ_SOLICITED)
+		cq_notify = IW_CQ_COMPL_SOLICITED;
+	spin_lock_irqsave(&iwcq->lock, flags);
 	ukcq->ops.iw_cq_request_notification(ukcq, cq_notify);
+	spin_unlock_irqrestore(&iwcq->lock, flags);
 	return 0;
 }
 
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
index 105246fba2e7..5fc623362731 100644
--- a/drivers/infiniband/hw/mlx4/ah.c
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -47,6 +47,7 @@ static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
 
 	ah->av.ib.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
 	ah->av.ib.g_slid  = ah_attr->src_path_bits;
+	ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
 	if (ah_attr->ah_flags & IB_AH_GRH) {
 		ah->av.ib.g_slid   |= 0x80;
 		ah->av.ib.gid_index = ah_attr->grh.sgid_index;
@@ -64,7 +65,6 @@ static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
 		       !(1 << ah->av.ib.stat_rate & dev->caps.stat_rate_support))
 			--ah->av.ib.stat_rate;
 	}
-	ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
 
 	return &ah->ibah;
 }
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index d68f506c1922..9c2e53d28f98 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -527,7 +527,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
 		tun_tx_ix = (++tun_qp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1);
 	spin_unlock(&tun_qp->tx_lock);
 	if (ret)
-		goto out;
+		goto end;
 
 	tun_mad = (struct mlx4_rcv_tunnel_mad *) (tun_qp->tx_ring[tun_tx_ix].buf.addr);
 	if (tun_qp->tx_ring[tun_tx_ix].ah)
@@ -596,9 +596,15 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
 	wr.wr.send_flags = IB_SEND_SIGNALED;
 
 	ret = ib_post_send(src_qp, &wr.wr, &bad_wr);
-out:
-	if (ret)
-		ib_destroy_ah(ah);
+	if (!ret)
+		return 0;
+ out:
+	spin_lock(&tun_qp->tx_lock);
+	tun_qp->tx_ix_tail++;
+	spin_unlock(&tun_qp->tx_lock);
+	tun_qp->tx_ring[tun_tx_ix].ah = NULL;
+end:
+	ib_destroy_ah(ah);
 	return ret;
 }
 
@@ -1326,9 +1332,15 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
 
 
 	ret = ib_post_send(send_qp, &wr.wr, &bad_wr);
+	if (!ret)
+		return 0;
+
+	spin_lock(&sqp->tx_lock);
+	sqp->tx_ix_tail++;
+	spin_unlock(&sqp->tx_lock);
+	sqp->tx_ring[wire_tx_ix].ah = NULL;
 out:
-	if (ret)
-		ib_destroy_ah(ah);
+	ib_destroy_ah(ah);
 	return ret;
 }
 
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 0eb09e104542..42a46078d7d5 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1704,6 +1704,9 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
 	struct mlx4_dev *dev = (to_mdev(qp->device))->dev;
 	int is_bonded = mlx4_is_bonded(dev);
 
+	if (flow_attr->port < 1 || flow_attr->port > qp->device->phys_port_cnt)
+		return ERR_PTR(-EINVAL);
+
 	if ((flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) &&
 	    (flow_attr->type != IB_FLOW_ATTR_NORMAL))
 		return ERR_PTR(-EOPNOTSUPP);
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 6c5ac5d8f32f..29acda249612 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -139,7 +139,7 @@ struct mlx4_ib_mr {
 	u32			max_pages;
 	struct mlx4_mr		mmr;
 	struct ib_umem	       *umem;
-	void			*pages_alloc;
+	size_t			page_map_size;
 };
 
 struct mlx4_ib_mw {
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 631272172a0b..5d73989d9771 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -277,20 +277,23 @@ mlx4_alloc_priv_pages(struct ib_device *device,
 		      struct mlx4_ib_mr *mr,
 		      int max_pages)
 {
-	int size = max_pages * sizeof(u64);
-	int add_size;
 	int ret;
 
-	add_size = max_t(int, MLX4_MR_PAGES_ALIGN - ARCH_KMALLOC_MINALIGN, 0);
+	/* Ensure that size is aligned to DMA cacheline
+	 * requirements.
+	 * max_pages is limited to MLX4_MAX_FAST_REG_PAGES
+	 * so page_map_size will never cross PAGE_SIZE.
+	 */
+	mr->page_map_size = roundup(max_pages * sizeof(u64),
+				    MLX4_MR_PAGES_ALIGN);
 
-	mr->pages_alloc = kzalloc(size + add_size, GFP_KERNEL);
-	if (!mr->pages_alloc)
+	/* Prevent cross page boundary allocation. */
+	mr->pages = (__be64 *)get_zeroed_page(GFP_KERNEL);
+	if (!mr->pages)
 		return -ENOMEM;
 
-	mr->pages = PTR_ALIGN(mr->pages_alloc, MLX4_MR_PAGES_ALIGN);
-
 	mr->page_map = dma_map_single(device->dma_device, mr->pages,
-				      size, DMA_TO_DEVICE);
+				      mr->page_map_size, DMA_TO_DEVICE);
 
 	if (dma_mapping_error(device->dma_device, mr->page_map)) {
 		ret = -ENOMEM;
@@ -298,9 +301,9 @@ mlx4_alloc_priv_pages(struct ib_device *device,
 	}
 
 	return 0;
-err:
-	kfree(mr->pages_alloc);
 
+err:
+	free_page((unsigned long)mr->pages);
 	return ret;
 }
 
@@ -309,11 +312,10 @@ mlx4_free_priv_pages(struct mlx4_ib_mr *mr)
 {
 	if (mr->pages) {
 		struct ib_device *device = mr->ibmr.device;
-		int size = mr->max_pages * sizeof(u64);
 
 		dma_unmap_single(device->dma_device, mr->page_map,
-				 size, DMA_TO_DEVICE);
-		kfree(mr->pages_alloc);
+				 mr->page_map_size, DMA_TO_DEVICE);
+		free_page((unsigned long)mr->pages);
 		mr->pages = NULL;
 	}
 }
@@ -537,14 +539,12 @@ int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
 	mr->npages = 0;
 
 	ib_dma_sync_single_for_cpu(ibmr->device, mr->page_map,
-				   sizeof(u64) * mr->max_pages,
-				   DMA_TO_DEVICE);
+				   mr->page_map_size, DMA_TO_DEVICE);
 
 	rc = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, mlx4_set_page);
 
 	ib_dma_sync_single_for_device(ibmr->device, mr->page_map,
-				      sizeof(u64) * mr->max_pages,
-				      DMA_TO_DEVICE);
+				      mr->page_map_size, DMA_TO_DEVICE);
 
 	return rc;
 }
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 81b0e1fbec1d..8db8405c1e99 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -362,7 +362,7 @@ static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags)
 			sizeof (struct mlx4_wqe_raddr_seg);
 	case MLX4_IB_QPT_RC:
 		return sizeof (struct mlx4_wqe_ctrl_seg) +
-			sizeof (struct mlx4_wqe_atomic_seg) +
+			sizeof (struct mlx4_wqe_masked_atomic_seg) +
 			sizeof (struct mlx4_wqe_raddr_seg);
 	case MLX4_IB_QPT_SMI:
 	case MLX4_IB_QPT_GSI:
@@ -1191,8 +1191,10 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
 	{
 		err = create_qp_common(to_mdev(pd->device), pd, init_attr,
 				       udata, 0, &qp, gfp);
-		if (err)
+		if (err) {
+			kfree(qp);
 			return ERR_PTR(err);
+		}
 
 		qp->ibqp.qp_num = qp->mqp.qpn;
 		qp->xrcdn = xrcdn;
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index 1534af113058..364aab9f3c9e 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -121,7 +121,7 @@ static void pma_cnt_ext_assign(struct ib_pma_portcounters_ext *pma_cnt_ext,
 	pma_cnt_ext->port_xmit_data =
 		cpu_to_be64(MLX5_SUM_CNT(out, transmitted_ib_unicast.octets,
 					 transmitted_ib_multicast.octets) >> 2);
-	pma_cnt_ext->port_xmit_data =
+	pma_cnt_ext->port_rcv_data =
 		cpu_to_be64(MLX5_SUM_CNT(out, received_ib_unicast.octets,
 					 received_ib_multicast.octets) >> 2);
 	pma_cnt_ext->port_xmit_packets =
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index b48ad85315dc..dad63f038bb8 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1528,21 +1528,18 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
 {
 	struct mlx5_flow_table	*ft = ft_prio->flow_table;
 	struct mlx5_ib_flow_handler *handler;
+	struct mlx5_flow_spec *spec;
 	void *ib_flow = flow_attr + 1;
-	u8 match_criteria_enable = 0;
 	unsigned int spec_index;
-	u32 *match_c;
-	u32 *match_v;
 	u32 action;
 	int err = 0;
 
 	if (!is_valid_attr(flow_attr))
 		return ERR_PTR(-EINVAL);
 
-	match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
-	match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
+	spec = mlx5_vzalloc(sizeof(*spec));
 	handler = kzalloc(sizeof(*handler), GFP_KERNEL);
-	if (!handler || !match_c || !match_v) {
+	if (!handler || !spec) {
 		err = -ENOMEM;
 		goto free;
 	}
@@ -1550,7 +1547,8 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
 	INIT_LIST_HEAD(&handler->list);
 
 	for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
-		err = parse_flow_attr(match_c, match_v, ib_flow);
+		err = parse_flow_attr(spec->match_criteria,
+				      spec->match_value, ib_flow);
 		if (err < 0)
 			goto free;
 
@@ -1558,11 +1556,11 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
 	}
 
 	/* Outer header support only */
-	match_criteria_enable = (!outer_header_zero(match_c)) << 0;
+	spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria))
+		<< 0;
 	action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
 		MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
-	handler->rule = mlx5_add_flow_rule(ft, match_criteria_enable,
-					   match_c, match_v,
+	handler->rule = mlx5_add_flow_rule(ft, spec,
 					   action,
 					   MLX5_FS_DEFAULT_FLOW_TAG,
 					   dst);
@@ -1578,8 +1576,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
 free:
 	if (err)
 		kfree(handler);
-	kfree(match_c);
-	kfree(match_v);
+	kvfree(spec);
 	return err ? ERR_PTR(err) : handler;
 }
 
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index ce434228a5ea..ce0a7ab35a22 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -3332,10 +3332,11 @@ static u8 get_fence(u8 fence, struct ib_send_wr *wr)
 			return MLX5_FENCE_MODE_SMALL_AND_FENCE;
 		else
 			return fence;
-
-	} else {
-		return 0;
+	} else if (unlikely(wr->send_flags & IB_SEND_FENCE)) {
+		return MLX5_FENCE_MODE_FENCE;
 	}
+
+	return 0;
 }
 
 static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index ff946d5f59e4..382466a90da7 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -2178,6 +2178,11 @@ static ssize_t qib_write(struct file *fp, const char __user *data,
 
 	switch (cmd.type) {
 	case QIB_CMD_ASSIGN_CTXT:
+		if (rcd) {
+			ret = -EINVAL;
+			goto bail;
+		}
+
 		ret = qib_assign_ctxt(fp, &cmd.cmd.user_info);
 		if (ret)
 			goto bail;
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 7de5134bec85..41ba7e9cadaa 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -369,8 +369,8 @@ static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
 			/* wrap to first map page, invert bit 0 */
 			offset = qpt->incr | ((offset & 1) ^ 1);
 		}
-		/* there can be no bits at shift and below */
-		WARN_ON(offset & (rdi->dparms.qos_shift - 1));
+		/* there can be no set bits in low-order QoS bits */
+		WARN_ON(offset & (BIT(rdi->dparms.qos_shift) - 1));
 		qpn = mk_qpn(qpt, map, offset);
 	}
 
@@ -576,12 +576,6 @@ static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
 	qp->s_ssn = 1;
 	qp->s_lsn = 0;
 	qp->s_mig_state = IB_MIG_MIGRATED;
-	if (qp->s_ack_queue)
-		memset(
-			qp->s_ack_queue,
-			0,
-			rvt_max_atomic(rdi) *
-				sizeof(*qp->s_ack_queue));
 	qp->r_head_ack_queue = 0;
 	qp->s_tail_ack_queue = 0;
 	qp->s_num_rd_atomic = 0;
@@ -705,8 +699,10 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
 		 * initialization that is needed.
 		 */
 		priv = rdi->driver_f.qp_priv_alloc(rdi, qp, gfp);
-		if (!priv)
+		if (IS_ERR(priv)) {
+			ret = priv;
 			goto bail_qp;
+		}
 		qp->priv = priv;
 		qp->timeout_jiffies =
 			usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index e1cc2cc42f25..30c4fda7a05a 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -501,9 +501,7 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb)
 			    !rdi->driver_f.quiesce_qp ||
 			    !rdi->driver_f.notify_error_qp ||
 			    !rdi->driver_f.mtu_from_qp ||
-			    !rdi->driver_f.mtu_to_path_mtu ||
-			    !rdi->driver_f.shut_down_port ||
-			    !rdi->driver_f.cap_mask_chg)
+			    !rdi->driver_f.mtu_to_path_mtu)
 				return -EINVAL;
 		break;
 
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index e68b20cba70b..4a4155640d51 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -1638,8 +1638,7 @@ retry:
 	 */
 	qp_init->cap.max_send_wr = srp_sq_size / 2;
 	qp_init->cap.max_rdma_ctxs = srp_sq_size / 2;
-	qp_init->cap.max_send_sge = max(sdev->device->attrs.max_sge_rd,
-					sdev->device->attrs.max_sge);
+	qp_init->cap.max_send_sge = SRPT_DEF_SG_PER_WQE;
 	qp_init->port_num = ch->sport->port;
 
 	ch->qp = ib_create_qp(sdev->pd, qp_init);
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h
index fee6bfd7ca21..389030487da7 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.h
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.h
@@ -106,6 +106,7 @@ enum {
 	SRP_LOGIN_RSP_MULTICHAN_MAINTAINED = 0x2,
 
 	SRPT_DEF_SG_TABLESIZE = 128,
+	SRPT_DEF_SG_PER_WQE = 16,
 
 	MIN_SRPT_SQ_SIZE = 16,
 	DEF_SRPT_SQ_SIZE = 4096,
diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
index 804dbcc37d3f..3438e98c145a 100644
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -1031,17 +1031,17 @@ static int xpad_play_effect(struct input_dev *dev, void *data, struct ff_effect
 
 	case XTYPE_XBOXONE:
 		packet->data[0] = 0x09; /* activate rumble */
-		packet->data[1] = 0x08;
+		packet->data[1] = 0x00;
 		packet->data[2] = xpad->odata_serial++;
-		packet->data[3] = 0x08; /* continuous effect */
-		packet->data[4] = 0x00; /* simple rumble mode */
-		packet->data[5] = 0x03; /* L and R actuator only */
-		packet->data[6] = 0x00; /* TODO: LT actuator */
-		packet->data[7] = 0x00; /* TODO: RT actuator */
+		packet->data[3] = 0x09;
+		packet->data[4] = 0x00;
+		packet->data[5] = 0x0F;
+		packet->data[6] = 0x00;
+		packet->data[7] = 0x00;
 		packet->data[8] = strong / 512;	/* left actuator */
 		packet->data[9] = weak / 512;	/* right actuator */
-		packet->data[10] = 0x80;	/* length of pulse */
-		packet->data[11] = 0x00;	/* stop period of pulse */
+		packet->data[10] = 0xFF;
+		packet->data[11] = 0x00;
 		packet->data[12] = 0x00;
 		packet->len = 13;
 		packet->pending = true;
@@ -1437,16 +1437,6 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id
 			break;
 	}
 
-	if (xpad_device[i].xtype == XTYPE_XBOXONE &&
-	    intf->cur_altsetting->desc.bInterfaceNumber != 0) {
-		/*
-		 * The Xbox One controller lists three interfaces all with the
-		 * same interface class, subclass and protocol. Differentiate by
-		 * interface number.
-		 */
-		return -ENODEV;
-	}
-
 	xpad = kzalloc(sizeof(struct usb_xpad), GFP_KERNEL);
 	if (!xpad)
 		return -ENOMEM;
@@ -1478,6 +1468,8 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id
 		if (intf->cur_altsetting->desc.bInterfaceClass == USB_CLASS_VENDOR_SPEC) {
 			if (intf->cur_altsetting->desc.bInterfaceProtocol == 129)
 				xpad->xtype = XTYPE_XBOX360W;
+			else if (intf->cur_altsetting->desc.bInterfaceProtocol == 208)
+				xpad->xtype = XTYPE_XBOXONE;
 			else
 				xpad->xtype = XTYPE_XBOX360;
 		} else {
@@ -1492,6 +1484,17 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id
 			xpad->mapping |= MAP_STICKS_TO_NULL;
 	}
 
+	if (xpad->xtype == XTYPE_XBOXONE &&
+	    intf->cur_altsetting->desc.bInterfaceNumber != 0) {
+		/*
+		 * The Xbox One controller lists three interfaces all with the
+		 * same interface class, subclass and protocol. Differentiate by
+		 * interface number.
+		 */
+		error = -ENODEV;
+		goto err_free_in_urb;
+	}
+
 	error = xpad_init_output(intf, xpad);
 	if (error)
 		goto err_free_in_urb;
diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c
index 78f93cf68840..be5b399da5d3 100644
--- a/drivers/input/mouse/elantech.c
+++ b/drivers/input/mouse/elantech.c
@@ -1568,13 +1568,7 @@ static int elantech_set_properties(struct elantech_data *etd)
 		case 5:
 			etd->hw_version = 3;
 			break;
-		case 6:
-		case 7:
-		case 8:
-		case 9:
-		case 10:
-		case 13:
-		case 14:
+		case 6 ... 14:
 			etd->hw_version = 4;
 			break;
 		default:
diff --git a/drivers/input/mouse/vmmouse.c b/drivers/input/mouse/vmmouse.c
index a3f0f5a47490..0f586780ceb4 100644
--- a/drivers/input/mouse/vmmouse.c
+++ b/drivers/input/mouse/vmmouse.c
@@ -355,18 +355,11 @@ int vmmouse_detect(struct psmouse *psmouse, bool set_properties)
 		return -ENXIO;
 	}
 
-	if (!request_region(VMMOUSE_PROTO_PORT, 4, "vmmouse")) {
-		psmouse_dbg(psmouse, "VMMouse port in use.\n");
-		return -EBUSY;
-	}
-
 	/* Check if the device is present */
 	response = ~VMMOUSE_PROTO_MAGIC;
 	VMMOUSE_CMD(GETVERSION, 0, version, response, dummy1, dummy2);
-	if (response != VMMOUSE_PROTO_MAGIC || version == 0xffffffffU) {
-		release_region(VMMOUSE_PROTO_PORT, 4);
+	if (response != VMMOUSE_PROTO_MAGIC || version == 0xffffffffU)
 		return -ENXIO;
-	}
 
 	if (set_properties) {
 		psmouse->vendor = VMMOUSE_VENDOR;
@@ -374,8 +367,6 @@ int vmmouse_detect(struct psmouse *psmouse, bool set_properties)
 		psmouse->model = version;
 	}
 
-	release_region(VMMOUSE_PROTO_PORT, 4);
-
 	return 0;
 }
 
@@ -394,7 +385,6 @@ static void vmmouse_disconnect(struct psmouse *psmouse)
 	psmouse_reset(psmouse);
 	input_unregister_device(priv->abs_dev);
 	kfree(priv);
-	release_region(VMMOUSE_PROTO_PORT, 4);
 }
 
 /**
@@ -438,15 +428,10 @@ int vmmouse_init(struct psmouse *psmouse)
 	struct input_dev *rel_dev = psmouse->dev, *abs_dev;
 	int error;
 
-	if (!request_region(VMMOUSE_PROTO_PORT, 4, "vmmouse")) {
-		psmouse_dbg(psmouse, "VMMouse port in use.\n");
-		return -EBUSY;
-	}
-
 	psmouse_reset(psmouse);
 	error = vmmouse_enable(psmouse);
 	if (error)
-		goto release_region;
+		return error;
 
 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
 	abs_dev = input_allocate_device();
@@ -502,8 +487,5 @@ init_fail:
 	kfree(priv);
 	psmouse->private = NULL;
 
-release_region:
-	release_region(VMMOUSE_PROTO_PORT, 4);
-
 	return error;
 }
diff --git a/drivers/input/touchscreen/wacom_w8001.c b/drivers/input/touchscreen/wacom_w8001.c
index bab3c6acf6a2..0c9191cf324d 100644
--- a/drivers/input/touchscreen/wacom_w8001.c
+++ b/drivers/input/touchscreen/wacom_w8001.c
@@ -27,7 +27,7 @@ MODULE_AUTHOR("Jaya Kumar <jayakumar.lkml@gmail.com>");
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_LICENSE("GPL");
 
-#define W8001_MAX_LENGTH	11
+#define W8001_MAX_LENGTH	13
 #define W8001_LEAD_MASK		0x80
 #define W8001_LEAD_BYTE		0x80
 #define W8001_TAB_MASK		0x40
@@ -339,6 +339,15 @@ static irqreturn_t w8001_interrupt(struct serio *serio,
 		w8001->idx = 0;
 		parse_multi_touch(w8001);
 		break;
+
+	default:
+		/*
+		 * ThinkPad X60 Tablet PC (pen only device) sometimes
+		 * sends invalid data packets that are larger than
+		 * W8001_PKTLEN_TPCPEN. Let's start over again.
+		 */
+		if (!w8001->touch_dev && w8001->idx > W8001_PKTLEN_TPCPEN - 1)
+			w8001->idx = 0;
 	}
 
 	return IRQ_HANDLED;
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 9e0034196e10..d091defc3426 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -1107,13 +1107,13 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
 				break;
 			}
 
+			devid = e->devid;
 			DUMP_printk("  DEV_ACPI_HID(%s[%s])\t\tdevid: %02x:%02x.%x\n",
 				    hid, uid,
 				    PCI_BUS_NUM(devid),
 				    PCI_SLOT(devid),
 				    PCI_FUNC(devid));
 
-			devid  = e->devid;
 			flags = e->flags;
 
 			ret = add_acpi_hid_device(hid, uid, &devid, false);
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 94b68213c50d..5f6b3bcab078 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -1941,6 +1941,7 @@ static struct iommu_ops arm_smmu_ops = {
 	.attach_dev		= arm_smmu_attach_dev,
 	.map			= arm_smmu_map,
 	.unmap			= arm_smmu_unmap,
+	.map_sg			= default_iommu_map_sg,
 	.iova_to_phys		= arm_smmu_iova_to_phys,
 	.add_device		= arm_smmu_add_device,
 	.remove_device		= arm_smmu_remove_device,
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index a644d0cec2d8..cfe410eedaf0 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -3222,11 +3222,6 @@ static int __init init_dmars(void)
 			}
 		}
 
-		iommu_flush_write_buffer(iommu);
-		iommu_set_root_entry(iommu);
-		iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
-		iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
-
 		if (!ecap_pass_through(iommu->ecap))
 			hw_pass_through = 0;
 #ifdef CONFIG_INTEL_IOMMU_SVM
@@ -3235,6 +3230,18 @@ static int __init init_dmars(void)
 #endif
 	}
 
+	/*
+	 * Now that qi is enabled on all iommus, set the root entry and flush
+	 * caches. This is required on some Intel X58 chipsets, otherwise the
+	 * flush_context function will loop forever and the boot hangs.
+	 */
+	for_each_active_iommu(iommu, drhd) {
+		iommu_flush_write_buffer(iommu);
+		iommu_set_root_entry(iommu);
+		iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
+		iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
+	}
+
 	if (iommu_pass_through)
 		iommu_identity_mapping |= IDENTMAP_ALL;
 
@@ -4600,7 +4607,7 @@ static void free_all_cpu_cached_iovas(unsigned int cpu)
 		if (!iommu)
 			continue;
 
-		for (did = 0; did < 0xffff; did++) {
+		for (did = 0; did < cap_ndoms(iommu->cap); did++) {
 			domain = get_iommu_domain(iommu, did);
 
 			if (!domain)
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index ba764a0835d3..e23001bfcfee 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -420,8 +420,10 @@ retry:
 
 		/* Try replenishing IOVAs by flushing rcache. */
 		flushed_rcache = true;
+		preempt_disable();
 		for_each_online_cpu(cpu)
 			free_cpu_cached_iovas(cpu, iovad);
+		preempt_enable();
 		goto retry;
 	}
 
@@ -749,7 +751,7 @@ static bool __iova_rcache_insert(struct iova_domain *iovad,
 	bool can_insert = false;
 	unsigned long flags;
 
-	cpu_rcache = this_cpu_ptr(rcache->cpu_rcaches);
+	cpu_rcache = get_cpu_ptr(rcache->cpu_rcaches);
 	spin_lock_irqsave(&cpu_rcache->lock, flags);
 
 	if (!iova_magazine_full(cpu_rcache->loaded)) {
@@ -779,6 +781,7 @@ static bool __iova_rcache_insert(struct iova_domain *iovad,
 		iova_magazine_push(cpu_rcache->loaded, iova_pfn);
 
 	spin_unlock_irqrestore(&cpu_rcache->lock, flags);
+	put_cpu_ptr(rcache->cpu_rcaches);
 
 	if (mag_to_free) {
 		iova_magazine_free_pfns(mag_to_free, iovad);
@@ -812,7 +815,7 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache,
 	bool has_pfn = false;
 	unsigned long flags;
 
-	cpu_rcache = this_cpu_ptr(rcache->cpu_rcaches);
+	cpu_rcache = get_cpu_ptr(rcache->cpu_rcaches);
 	spin_lock_irqsave(&cpu_rcache->lock, flags);
 
 	if (!iova_magazine_empty(cpu_rcache->loaded)) {
@@ -834,6 +837,7 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache,
 		iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn);
 
 	spin_unlock_irqrestore(&cpu_rcache->lock, flags);
+	put_cpu_ptr(rcache->cpu_rcaches);
 
 	return iova_pfn;
 }
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index c7d6156ff536..25b4627cb57f 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -815,7 +815,7 @@ static int rk_iommu_attach_device(struct iommu_domain *domain,
 	dte_addr = virt_to_phys(rk_domain->dt);
 	for (i = 0; i < iommu->num_mmu; i++) {
 		rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, dte_addr);
-		rk_iommu_command(iommu->bases[i], RK_MMU_CMD_ZAP_CACHE);
+		rk_iommu_base_command(iommu->bases[i], RK_MMU_CMD_ZAP_CACHE);
 		rk_iommu_write(iommu->bases[i], RK_MMU_INT_MASK, RK_MMU_IRQ_MASK);
 	}
 
diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index 3b5e10aa48ab..8a4adbeb2b8c 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -746,6 +746,12 @@ static int gic_irq_domain_alloc(struct irq_domain *d, unsigned int virq,
 		/* verify that it doesn't conflict with an IPI irq */
 		if (test_bit(spec->hwirq, ipi_resrv))
 			return -EBUSY;
+
+		hwirq = GIC_SHARED_TO_HWIRQ(spec->hwirq);
+
+		return irq_domain_set_hwirq_and_chip(d, virq, hwirq,
+						     &gic_level_irq_controller,
+						     NULL);
 	} else {
 		base_hwirq = find_first_bit(ipi_resrv, gic_shared_intrs);
 		if (base_hwirq == gic_shared_intrs) {
@@ -867,10 +873,14 @@ static int gic_dev_domain_alloc(struct irq_domain *d, unsigned int virq,
 						    &gic_level_irq_controller,
 						    NULL);
 		if (ret)
-			return ret;
+			goto error;
 	}
 
 	return 0;
+
+error:
+	irq_domain_free_irqs_parent(d, virq, nr_irqs);
+	return ret;
 }
 
 void gic_dev_domain_free(struct irq_domain *d, unsigned int virq,
diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c
index 3495d5d6547f..3bce44893021 100644
--- a/drivers/leds/led-core.c
+++ b/drivers/leds/led-core.c
@@ -53,11 +53,12 @@ static void led_timer_function(unsigned long data)
 
 	if (!led_cdev->blink_delay_on || !led_cdev->blink_delay_off) {
 		led_set_brightness_nosleep(led_cdev, LED_OFF);
+		led_cdev->flags &= ~LED_BLINK_SW;
 		return;
 	}
 
 	if (led_cdev->flags & LED_BLINK_ONESHOT_STOP) {
-		led_cdev->flags &= ~LED_BLINK_ONESHOT_STOP;
+		led_cdev->flags &=  ~(LED_BLINK_ONESHOT_STOP | LED_BLINK_SW);
 		return;
 	}
 
@@ -151,6 +152,7 @@ static void led_set_software_blink(struct led_classdev *led_cdev,
 		return;
 	}
 
+	led_cdev->flags |= LED_BLINK_SW;
 	mod_timer(&led_cdev->blink_timer, jiffies + 1);
 }
 
@@ -219,6 +221,7 @@ void led_stop_software_blink(struct led_classdev *led_cdev)
 	del_timer_sync(&led_cdev->blink_timer);
 	led_cdev->blink_delay_on = 0;
 	led_cdev->blink_delay_off = 0;
+	led_cdev->flags &= ~LED_BLINK_SW;
 }
 EXPORT_SYMBOL_GPL(led_stop_software_blink);
 
@@ -226,10 +229,10 @@ void led_set_brightness(struct led_classdev *led_cdev,
 			enum led_brightness brightness)
 {
 	/*
-	 * In case blinking is on delay brightness setting
+	 * If software blink is active, delay brightness setting
 	 * until the next timer tick.
 	 */
-	if (led_cdev->blink_delay_on || led_cdev->blink_delay_off) {
+	if (led_cdev->flags & LED_BLINK_SW) {
 		/*
 		 * If we need to disable soft blinking delegate this to the
 		 * work queue task to avoid problems in case we are called
diff --git a/drivers/leds/trigger/ledtrig-heartbeat.c b/drivers/leds/trigger/ledtrig-heartbeat.c
index 410c39c62dc7..c9f386213e9e 100644
--- a/drivers/leds/trigger/ledtrig-heartbeat.c
+++ b/drivers/leds/trigger/ledtrig-heartbeat.c
@@ -19,6 +19,7 @@
 #include <linux/sched.h>
 #include <linux/leds.h>
 #include <linux/reboot.h>
+#include <linux/suspend.h>
 #include "../leds.h"
 
 static int panic_heartbeats;
@@ -154,6 +155,30 @@ static struct led_trigger heartbeat_led_trigger = {
 	.deactivate = heartbeat_trig_deactivate,
 };
 
+static int heartbeat_pm_notifier(struct notifier_block *nb,
+				 unsigned long pm_event, void *unused)
+{
+	int rc;
+
+	switch (pm_event) {
+	case PM_SUSPEND_PREPARE:
+	case PM_HIBERNATION_PREPARE:
+	case PM_RESTORE_PREPARE:
+		led_trigger_unregister(&heartbeat_led_trigger);
+		break;
+	case PM_POST_SUSPEND:
+	case PM_POST_HIBERNATION:
+	case PM_POST_RESTORE:
+		rc = led_trigger_register(&heartbeat_led_trigger);
+		if (rc)
+			pr_err("could not re-register heartbeat trigger\n");
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
 static int heartbeat_reboot_notifier(struct notifier_block *nb,
 				     unsigned long code, void *unused)
 {
@@ -168,6 +193,10 @@ static int heartbeat_panic_notifier(struct notifier_block *nb,
 	return NOTIFY_DONE;
 }
 
+static struct notifier_block heartbeat_pm_nb = {
+	.notifier_call = heartbeat_pm_notifier,
+};
+
 static struct notifier_block heartbeat_reboot_nb = {
 	.notifier_call = heartbeat_reboot_notifier,
 };
@@ -184,12 +213,14 @@ static int __init heartbeat_trig_init(void)
 		atomic_notifier_chain_register(&panic_notifier_list,
 					       &heartbeat_panic_nb);
 		register_reboot_notifier(&heartbeat_reboot_nb);
+		register_pm_notifier(&heartbeat_pm_nb);
 	}
 	return rc;
 }
 
 static void __exit heartbeat_trig_exit(void)
 {
+	unregister_pm_notifier(&heartbeat_pm_nb);
 	unregister_reboot_notifier(&heartbeat_reboot_nb);
 	atomic_notifier_chain_unregister(&panic_notifier_list,
 					 &heartbeat_panic_nb);
diff --git a/drivers/mcb/mcb-core.c b/drivers/mcb/mcb-core.c
index b73c6e7d28e4..6f2c8522e14a 100644
--- a/drivers/mcb/mcb-core.c
+++ b/drivers/mcb/mcb-core.c
@@ -61,21 +61,36 @@ static int mcb_probe(struct device *dev)
 	struct mcb_driver *mdrv = to_mcb_driver(dev->driver);
 	struct mcb_device *mdev = to_mcb_device(dev);
 	const struct mcb_device_id *found_id;
+	struct module *carrier_mod;
+	int ret;
 
 	found_id = mcb_match_id(mdrv->id_table, mdev);
 	if (!found_id)
 		return -ENODEV;
 
-	return mdrv->probe(mdev, found_id);
+	carrier_mod = mdev->dev.parent->driver->owner;
+	if (!try_module_get(carrier_mod))
+		return -EINVAL;
+
+	get_device(dev);
+	ret = mdrv->probe(mdev, found_id);
+	if (ret)
+		module_put(carrier_mod);
+
+	return ret;
 }
 
 static int mcb_remove(struct device *dev)
 {
 	struct mcb_driver *mdrv = to_mcb_driver(dev->driver);
 	struct mcb_device *mdev = to_mcb_device(dev);
+	struct module *carrier_mod;
 
 	mdrv->remove(mdev);
 
+	carrier_mod = mdev->dev.parent->driver->owner;
+	module_put(carrier_mod);
+
 	put_device(&mdev->dev);
 
 	return 0;
diff --git a/drivers/media/usb/uvc/uvc_v4l2.c b/drivers/media/usb/uvc/uvc_v4l2.c
index d7723ce772b3..c04bc6afb965 100644
--- a/drivers/media/usb/uvc/uvc_v4l2.c
+++ b/drivers/media/usb/uvc/uvc_v4l2.c
@@ -1274,8 +1274,6 @@ struct uvc_xu_control_mapping32 {
 static int uvc_v4l2_get_xu_mapping(struct uvc_xu_control_mapping *kp,
 			const struct uvc_xu_control_mapping32 __user *up)
 {
-	struct uvc_menu_info __user *umenus;
-	struct uvc_menu_info __user *kmenus;
 	compat_caddr_t p;
 
 	if (!access_ok(VERIFY_READ, up, sizeof(*up)) ||
@@ -1292,17 +1290,7 @@ static int uvc_v4l2_get_xu_mapping(struct uvc_xu_control_mapping *kp,
 
 	if (__get_user(p, &up->menu_info))
 		return -EFAULT;
-	umenus = compat_ptr(p);
-	if (!access_ok(VERIFY_READ, umenus, kp->menu_count * sizeof(*umenus)))
-		return -EFAULT;
-
-	kmenus = compat_alloc_user_space(kp->menu_count * sizeof(*kmenus));
-	if (kmenus == NULL)
-		return -EFAULT;
-	kp->menu_info = kmenus;
-
-	if (copy_in_user(kmenus, umenus, kp->menu_count * sizeof(*umenus)))
-		return -EFAULT;
+	kp->menu_info = compat_ptr(p);
 
 	return 0;
 }
@@ -1310,10 +1298,6 @@ static int uvc_v4l2_get_xu_mapping(struct uvc_xu_control_mapping *kp,
 static int uvc_v4l2_put_xu_mapping(const struct uvc_xu_control_mapping *kp,
 			struct uvc_xu_control_mapping32 __user *up)
 {
-	struct uvc_menu_info __user *umenus;
-	struct uvc_menu_info __user *kmenus = kp->menu_info;
-	compat_caddr_t p;
-
 	if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) ||
 	    __copy_to_user(up, kp, offsetof(typeof(*up), menu_info)) ||
 	    __put_user(kp->menu_count, &up->menu_count))
@@ -1322,16 +1306,6 @@ static int uvc_v4l2_put_xu_mapping(const struct uvc_xu_control_mapping *kp,
 	if (__clear_user(up->reserved, sizeof(up->reserved)))
 		return -EFAULT;
 
-	if (kp->menu_count == 0)
-		return 0;
-
-	if (get_user(p, &up->menu_info))
-		return -EFAULT;
-	umenus = compat_ptr(p);
-
-	if (copy_in_user(umenus, kmenus, kp->menu_count * sizeof(*umenus)))
-		return -EFAULT;
-
 	return 0;
 }
 
@@ -1346,8 +1320,6 @@ struct uvc_xu_control_query32 {
 static int uvc_v4l2_get_xu_query(struct uvc_xu_control_query *kp,
 			const struct uvc_xu_control_query32 __user *up)
 {
-	u8 __user *udata;
-	u8 __user *kdata;
 	compat_caddr_t p;
 
 	if (!access_ok(VERIFY_READ, up, sizeof(*up)) ||
@@ -1361,17 +1333,7 @@ static int uvc_v4l2_get_xu_query(struct uvc_xu_control_query *kp,
 
 	if (__get_user(p, &up->data))
 		return -EFAULT;
-	udata = compat_ptr(p);
-	if (!access_ok(VERIFY_READ, udata, kp->size))
-		return -EFAULT;
-
-	kdata = compat_alloc_user_space(kp->size);
-	if (kdata == NULL)
-		return -EFAULT;
-	kp->data = kdata;
-
-	if (copy_in_user(kdata, udata, kp->size))
-		return -EFAULT;
+	kp->data = compat_ptr(p);
 
 	return 0;
 }
@@ -1379,26 +1341,10 @@ static int uvc_v4l2_get_xu_query(struct uvc_xu_control_query *kp,
 static int uvc_v4l2_put_xu_query(const struct uvc_xu_control_query *kp,
 			struct uvc_xu_control_query32 __user *up)
 {
-	u8 __user *udata;
-	u8 __user *kdata = kp->data;
-	compat_caddr_t p;
-
 	if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) ||
 	    __copy_to_user(up, kp, offsetof(typeof(*up), data)))
 		return -EFAULT;
 
-	if (kp->size == 0)
-		return 0;
-
-	if (get_user(p, &up->data))
-		return -EFAULT;
-	udata = compat_ptr(p);
-	if (!access_ok(VERIFY_READ, udata, kp->size))
-		return -EFAULT;
-
-	if (copy_in_user(udata, kdata, kp->size))
-		return -EFAULT;
-
 	return 0;
 }
 
@@ -1408,47 +1354,44 @@ static int uvc_v4l2_put_xu_query(const struct uvc_xu_control_query *kp,
 static long uvc_v4l2_compat_ioctl32(struct file *file,
 		     unsigned int cmd, unsigned long arg)
 {
+	struct uvc_fh *handle = file->private_data;
 	union {
 		struct uvc_xu_control_mapping xmap;
 		struct uvc_xu_control_query xqry;
 	} karg;
 	void __user *up = compat_ptr(arg);
-	mm_segment_t old_fs;
 	long ret;
 
 	switch (cmd) {
 	case UVCIOC_CTRL_MAP32:
-		cmd = UVCIOC_CTRL_MAP;
 		ret = uvc_v4l2_get_xu_mapping(&karg.xmap, up);
+		if (ret)
+			return ret;
+		ret = uvc_ioctl_ctrl_map(handle->chain, &karg.xmap);
+		if (ret)
+			return ret;
+		ret = uvc_v4l2_put_xu_mapping(&karg.xmap, up);
+		if (ret)
+			return ret;
+
 		break;
 
 	case UVCIOC_CTRL_QUERY32:
-		cmd = UVCIOC_CTRL_QUERY;
 		ret = uvc_v4l2_get_xu_query(&karg.xqry, up);
+		if (ret)
+			return ret;
+		ret = uvc_xu_ctrl_query(handle->chain, &karg.xqry);
+		if (ret)
+			return ret;
+		ret = uvc_v4l2_put_xu_query(&karg.xqry, up);
+		if (ret)
+			return ret;
 		break;
 
 	default:
 		return -ENOIOCTLCMD;
 	}
 
-	old_fs = get_fs();
-	set_fs(KERNEL_DS);
-	ret = video_ioctl2(file, cmd, (unsigned long)&karg);
-	set_fs(old_fs);
-
-	if (ret < 0)
-		return ret;
-
-	switch (cmd) {
-	case UVCIOC_CTRL_MAP:
-		ret = uvc_v4l2_put_xu_mapping(&karg.xmap, up);
-		break;
-
-	case UVCIOC_CTRL_QUERY:
-		ret = uvc_v4l2_put_xu_query(&karg.xqry, up);
-		break;
-	}
-
 	return ret;
 }
 #endif
diff --git a/drivers/media/v4l2-core/v4l2-mc.c b/drivers/media/v4l2-core/v4l2-mc.c
index ca94bded3386..8bef4331bd51 100644
--- a/drivers/media/v4l2-core/v4l2-mc.c
+++ b/drivers/media/v4l2-core/v4l2-mc.c
@@ -1,7 +1,7 @@
 /*
  * Media Controller ancillary functions
  *
- * Copyright (c) 2016 Mauro Carvalho Chehab <mchehab@osg.samsung.com>
+ * Copyright (c) 2016 Mauro Carvalho Chehab <mchehab@kernel.org>
  * Copyright (C) 2016 Shuah Khan <shuahkh@osg.samsung.com>
  * Copyright (C) 2006-2010 Nokia Corporation
  * Copyright (c) 2016 Intel Corporation.
diff --git a/drivers/memory/omap-gpmc.c b/drivers/memory/omap-gpmc.c
index af4884ba6b7c..15508df24e5d 100644
--- a/drivers/memory/omap-gpmc.c
+++ b/drivers/memory/omap-gpmc.c
@@ -398,7 +398,7 @@ static void gpmc_cs_bool_timings(int cs, const struct gpmc_bool_timings *p)
 	gpmc_cs_modify_reg(cs, GPMC_CS_CONFIG4,
 			   GPMC_CONFIG4_OEEXTRADELAY, p->oe_extra_delay);
 	gpmc_cs_modify_reg(cs, GPMC_CS_CONFIG4,
-			   GPMC_CONFIG4_OEEXTRADELAY, p->we_extra_delay);
+			   GPMC_CONFIG4_WEEXTRADELAY, p->we_extra_delay);
 	gpmc_cs_modify_reg(cs, GPMC_CS_CONFIG6,
 			   GPMC_CONFIG6_CYCLE2CYCLESAMECSEN,
 			   p->cycle2cyclesamecsen);
diff --git a/drivers/mfd/max77620.c b/drivers/mfd/max77620.c
index 199d261990be..f32fbb8e8129 100644
--- a/drivers/mfd/max77620.c
+++ b/drivers/mfd/max77620.c
@@ -203,6 +203,7 @@ static int max77620_get_fps_period_reg_value(struct max77620_chip *chip,
 		break;
 	case MAX77620:
 		fps_min_period = MAX77620_FPS_PERIOD_MIN_US;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -236,6 +237,7 @@ static int max77620_config_fps(struct max77620_chip *chip,
 		break;
 	case MAX77620:
 		fps_max_period = MAX77620_FPS_PERIOD_MAX_US;
+		break;
 	default:
 		return -EINVAL;
 	}
diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c
index eed254da63a8..641c1a566687 100644
--- a/drivers/misc/mei/client.c
+++ b/drivers/misc/mei/client.c
@@ -730,7 +730,7 @@ static void mei_cl_wake_all(struct mei_cl *cl)
 	/* synchronized under device mutex */
 	if (waitqueue_active(&cl->wait)) {
 		cl_dbg(dev, cl, "Waking up ctrl write clients!\n");
-		wake_up_interruptible(&cl->wait);
+		wake_up(&cl->wait);
 	}
 }
 
diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index 16baeb51b2bd..ef3618299494 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -1147,11 +1147,17 @@ int ubi_detach_mtd_dev(int ubi_num, int anyway)
  */
 static struct mtd_info * __init open_mtd_by_chdev(const char *mtd_dev)
 {
-	struct kstat stat;
 	int err, minor;
+	struct path path;
+	struct kstat stat;
 
 	/* Probably this is an MTD character device node path */
-	err = vfs_stat(mtd_dev, &stat);
+	err = kern_path(mtd_dev, LOOKUP_FOLLOW, &path);
+	if (err)
+		return ERR_PTR(err);
+
+	err = vfs_getattr(&path, &stat);
+	path_put(&path);
 	if (err)
 		return ERR_PTR(err);
 
@@ -1160,6 +1166,7 @@ static struct mtd_info * __init open_mtd_by_chdev(const char *mtd_dev)
 		return ERR_PTR(-EINVAL);
 
 	minor = MINOR(stat.rdev);
+
 	if (minor & 1)
 		/*
 		 * Just do not think the "/dev/mtdrX" devices support is need,
diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c
index 5780dd1ba79d..ebf517271d29 100644
--- a/drivers/mtd/ubi/eba.c
+++ b/drivers/mtd/ubi/eba.c
@@ -575,6 +575,7 @@ static int recover_peb(struct ubi_device *ubi, int pnum, int vol_id, int lnum,
 	int err, idx = vol_id2idx(ubi, vol_id), new_pnum, data_size, tries = 0;
 	struct ubi_volume *vol = ubi->volumes[idx];
 	struct ubi_vid_hdr *vid_hdr;
+	uint32_t crc;
 
 	vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS);
 	if (!vid_hdr)
@@ -599,14 +600,8 @@ retry:
 		goto out_put;
 	}
 
-	vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi));
-	err = ubi_io_write_vid_hdr(ubi, new_pnum, vid_hdr);
-	if (err) {
-		up_read(&ubi->fm_eba_sem);
-		goto write_error;
-	}
+	ubi_assert(vid_hdr->vol_type == UBI_VID_DYNAMIC);
 
-	data_size = offset + len;
 	mutex_lock(&ubi->buf_mutex);
 	memset(ubi->peb_buf + offset, 0xFF, len);
 
@@ -621,6 +616,19 @@ retry:
 
 	memcpy(ubi->peb_buf + offset, buf, len);
 
+	data_size = offset + len;
+	crc = crc32(UBI_CRC32_INIT, ubi->peb_buf, data_size);
+	vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi));
+	vid_hdr->copy_flag = 1;
+	vid_hdr->data_size = cpu_to_be32(data_size);
+	vid_hdr->data_crc = cpu_to_be32(crc);
+	err = ubi_io_write_vid_hdr(ubi, new_pnum, vid_hdr);
+	if (err) {
+		mutex_unlock(&ubi->buf_mutex);
+		up_read(&ubi->fm_eba_sem);
+		goto write_error;
+	}
+
 	err = ubi_io_write_data(ubi, ubi->peb_buf, new_pnum, 0, data_size);
 	if (err) {
 		mutex_unlock(&ubi->buf_mutex);
diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c
index 348dbbcbedc8..a9e2cef7c95c 100644
--- a/drivers/mtd/ubi/kapi.c
+++ b/drivers/mtd/ubi/kapi.c
@@ -302,6 +302,7 @@ EXPORT_SYMBOL_GPL(ubi_open_volume_nm);
 struct ubi_volume_desc *ubi_open_volume_path(const char *pathname, int mode)
 {
 	int error, ubi_num, vol_id;
+	struct path path;
 	struct kstat stat;
 
 	dbg_gen("open volume %s, mode %d", pathname, mode);
@@ -309,7 +310,12 @@ struct ubi_volume_desc *ubi_open_volume_path(const char *pathname, int mode)
 	if (!pathname || !*pathname)
 		return ERR_PTR(-EINVAL);
 
-	error = vfs_stat(pathname, &stat);
+	error = kern_path(pathname, LOOKUP_FOLLOW, &path);
+	if (error)
+		return ERR_PTR(error);
+
+	error = vfs_getattr(&path, &stat);
+	path_put(&path);
 	if (error)
 		return ERR_PTR(error);
 
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index b9304a295f86..edc70ffad660 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -101,11 +101,14 @@ enum ad_link_speed_type {
 #define MAC_ADDRESS_EQUAL(A, B)	\
 	ether_addr_equal_64bits((const u8 *)A, (const u8 *)B)
 
-static struct mac_addr null_mac_addr = { { 0, 0, 0, 0, 0, 0 } };
+static const u8 null_mac_addr[ETH_ALEN + 2] __long_aligned = {
+	0, 0, 0, 0, 0, 0
+};
 static u16 ad_ticks_per_sec;
 static const int ad_delta_in_ticks = (AD_TIMER_INTERVAL * HZ) / 1000;
 
-static const u8 lacpdu_mcast_addr[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
+static const u8 lacpdu_mcast_addr[ETH_ALEN + 2] __long_aligned =
+	MULTICAST_LACPDU_ADDR;
 
 /* ================= main 802.3ad protocol functions ================== */
 static int ad_lacpdu_send(struct port *port);
@@ -657,6 +660,20 @@ static void __set_agg_ports_ready(struct aggregator *aggregator, int val)
 	}
 }
 
+static int __agg_active_ports(struct aggregator *agg)
+{
+	struct port *port;
+	int active = 0;
+
+	for (port = agg->lag_ports; port;
+	     port = port->next_port_in_aggregator) {
+		if (port->is_enabled)
+			active++;
+	}
+
+	return active;
+}
+
 /**
  * __get_agg_bandwidth - get the total bandwidth of an aggregator
  * @aggregator: the aggregator we're looking at
@@ -664,39 +681,40 @@ static void __set_agg_ports_ready(struct aggregator *aggregator, int val)
  */
 static u32 __get_agg_bandwidth(struct aggregator *aggregator)
 {
+	int nports = __agg_active_ports(aggregator);
 	u32 bandwidth = 0;
 
-	if (aggregator->num_of_ports) {
+	if (nports) {
 		switch (__get_link_speed(aggregator->lag_ports)) {
 		case AD_LINK_SPEED_1MBPS:
-			bandwidth = aggregator->num_of_ports;
+			bandwidth = nports;
 			break;
 		case AD_LINK_SPEED_10MBPS:
-			bandwidth = aggregator->num_of_ports * 10;
+			bandwidth = nports * 10;
 			break;
 		case AD_LINK_SPEED_100MBPS:
-			bandwidth = aggregator->num_of_ports * 100;
+			bandwidth = nports * 100;
 			break;
 		case AD_LINK_SPEED_1000MBPS:
-			bandwidth = aggregator->num_of_ports * 1000;
+			bandwidth = nports * 1000;
 			break;
 		case AD_LINK_SPEED_2500MBPS:
-			bandwidth = aggregator->num_of_ports * 2500;
+			bandwidth = nports * 2500;
 			break;
 		case AD_LINK_SPEED_10000MBPS:
-			bandwidth = aggregator->num_of_ports * 10000;
+			bandwidth = nports * 10000;
 			break;
 		case AD_LINK_SPEED_20000MBPS:
-			bandwidth = aggregator->num_of_ports * 20000;
+			bandwidth = nports * 20000;
 			break;
 		case AD_LINK_SPEED_40000MBPS:
-			bandwidth = aggregator->num_of_ports * 40000;
+			bandwidth = nports * 40000;
 			break;
 		case AD_LINK_SPEED_56000MBPS:
-			bandwidth = aggregator->num_of_ports * 56000;
+			bandwidth = nports * 56000;
 			break;
 		case AD_LINK_SPEED_100000MBPS:
-			bandwidth = aggregator->num_of_ports * 100000;
+			bandwidth = nports * 100000;
 			break;
 		default:
 			bandwidth = 0; /* to silence the compiler */
@@ -1530,10 +1548,10 @@ static struct aggregator *ad_agg_selection_test(struct aggregator *best,
 
 	switch (__get_agg_selection_mode(curr->lag_ports)) {
 	case BOND_AD_COUNT:
-		if (curr->num_of_ports > best->num_of_ports)
+		if (__agg_active_ports(curr) > __agg_active_ports(best))
 			return curr;
 
-		if (curr->num_of_ports < best->num_of_ports)
+		if (__agg_active_ports(curr) < __agg_active_ports(best))
 			return best;
 
 		/*FALLTHROUGH*/
@@ -1561,8 +1579,14 @@ static int agg_device_up(const struct aggregator *agg)
 	if (!port)
 		return 0;
 
-	return netif_running(port->slave->dev) &&
-	       netif_carrier_ok(port->slave->dev);
+	for (port = agg->lag_ports; port;
+	     port = port->next_port_in_aggregator) {
+		if (netif_running(port->slave->dev) &&
+		    netif_carrier_ok(port->slave->dev))
+			return 1;
+	}
+
+	return 0;
 }
 
 /**
@@ -1610,7 +1634,7 @@ static void ad_agg_selection_logic(struct aggregator *agg,
 
 		agg->is_active = 0;
 
-		if (agg->num_of_ports && agg_device_up(agg))
+		if (__agg_active_ports(agg) && agg_device_up(agg))
 			best = ad_agg_selection_test(best, agg);
 	}
 
@@ -1622,7 +1646,7 @@ static void ad_agg_selection_logic(struct aggregator *agg,
 		 * answering partner.
 		 */
 		if (active && active->lag_ports &&
-		    active->lag_ports->is_enabled &&
+		    __agg_active_ports(active) &&
 		    (__agg_has_partner(active) ||
 		     (!__agg_has_partner(active) &&
 		     !__agg_has_partner(best)))) {
@@ -1718,7 +1742,7 @@ static void ad_clear_agg(struct aggregator *aggregator)
 		aggregator->is_individual = false;
 		aggregator->actor_admin_aggregator_key = 0;
 		aggregator->actor_oper_aggregator_key = 0;
-		aggregator->partner_system = null_mac_addr;
+		eth_zero_addr(aggregator->partner_system.mac_addr_value);
 		aggregator->partner_system_priority = 0;
 		aggregator->partner_oper_aggregator_key = 0;
 		aggregator->receive_state = 0;
@@ -1740,7 +1764,7 @@ static void ad_initialize_agg(struct aggregator *aggregator)
 	if (aggregator) {
 		ad_clear_agg(aggregator);
 
-		aggregator->aggregator_mac_address = null_mac_addr;
+		eth_zero_addr(aggregator->aggregator_mac_address.mac_addr_value);
 		aggregator->aggregator_identifier = 0;
 		aggregator->slave = NULL;
 	}
@@ -2133,7 +2157,7 @@ void bond_3ad_unbind_slave(struct slave *slave)
 				else
 					temp_aggregator->lag_ports = temp_port->next_port_in_aggregator;
 				temp_aggregator->num_of_ports--;
-				if (temp_aggregator->num_of_ports == 0) {
+				if (__agg_active_ports(temp_aggregator) == 0) {
 					select_new_active_agg = temp_aggregator->is_active;
 					ad_clear_agg(temp_aggregator);
 					if (select_new_active_agg) {
@@ -2432,7 +2456,9 @@ void bond_3ad_adapter_speed_duplex_changed(struct slave *slave)
  */
 void bond_3ad_handle_link_change(struct slave *slave, char link)
 {
+	struct aggregator *agg;
 	struct port *port;
+	bool dummy;
 
 	port = &(SLAVE_AD_INFO(slave)->port);
 
@@ -2459,6 +2485,9 @@ void bond_3ad_handle_link_change(struct slave *slave, char link)
 		port->is_enabled = false;
 		ad_update_actor_keys(port, true);
 	}
+	agg = __get_first_agg(port);
+	ad_agg_selection_logic(agg, &dummy);
+
 	netdev_dbg(slave->bond->dev, "Port %d changed link status to %s\n",
 		   port->actor_port_number,
 		   link == BOND_LINK_UP ? "UP" : "DOWN");
@@ -2499,7 +2528,7 @@ int bond_3ad_set_carrier(struct bonding *bond)
 	active = __get_active_agg(&(SLAVE_AD_INFO(first_slave)->aggregator));
 	if (active) {
 		/* are enough slaves available to consider link up? */
-		if (active->num_of_ports < bond->params.min_links) {
+		if (__agg_active_ports(active) < bond->params.min_links) {
 			if (netif_carrier_ok(bond->dev)) {
 				netif_carrier_off(bond->dev);
 				goto out;
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index c5ac160a8ae9..551f0f8dead3 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -42,13 +42,10 @@
 
 
 
-#ifndef __long_aligned
-#define __long_aligned __attribute__((aligned((sizeof(long)))))
-#endif
-static const u8 mac_bcast[ETH_ALEN] __long_aligned = {
+static const u8 mac_bcast[ETH_ALEN + 2] __long_aligned = {
 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 };
-static const u8 mac_v6_allmcast[ETH_ALEN] __long_aligned = {
+static const u8 mac_v6_allmcast[ETH_ALEN + 2] __long_aligned = {
 	0x33, 0x33, 0x00, 0x00, 0x00, 0x01
 };
 static const int alb_delta_in_ticks = HZ / ALB_TIMER_TICKS_PER_SEC;
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 90157e20357e..b571ed9fd63d 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1584,6 +1584,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 	}
 
 	/* check for initial state */
+	new_slave->link = BOND_LINK_NOCHANGE;
 	if (bond->params.miimon) {
 		if (bond_check_dev_link(bond, slave_dev, 0) == BMSR_LSTATUS) {
 			if (bond->params.updelay) {
@@ -4137,6 +4138,8 @@ static const struct net_device_ops bond_netdev_ops = {
 	.ndo_add_slave		= bond_enslave,
 	.ndo_del_slave		= bond_release,
 	.ndo_fix_features	= bond_fix_features,
+	.ndo_neigh_construct	= netdev_default_l2upper_neigh_construct,
+	.ndo_neigh_destroy	= netdev_default_l2upper_neigh_destroy,
 	.ndo_bridge_setlink	= switchdev_port_bridge_setlink,
 	.ndo_bridge_getlink	= switchdev_port_bridge_getlink,
 	.ndo_bridge_dellink	= switchdev_port_bridge_dellink,
diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c
index 8b3275d7792a..8f5e93cb7975 100644
--- a/drivers/net/can/at91_can.c
+++ b/drivers/net/can/at91_can.c
@@ -712,9 +712,10 @@ static int at91_poll_rx(struct net_device *dev, int quota)
 
 	/* upper group completed, look again in lower */
 	if (priv->rx_next > get_mb_rx_low_last(priv) &&
-	    quota > 0 && mb > get_mb_rx_last(priv)) {
+	    mb > get_mb_rx_last(priv)) {
 		priv->rx_next = get_mb_rx_first(priv);
-		goto again;
+		if (quota > 0)
+			goto again;
 	}
 
 	return received;
diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c
index f91b094288da..e3dccd3200d5 100644
--- a/drivers/net/can/c_can/c_can.c
+++ b/drivers/net/can/c_can/c_can.c
@@ -332,9 +332,23 @@ static void c_can_setup_tx_object(struct net_device *dev, int iface,
 
 	priv->write_reg(priv, C_CAN_IFACE(MSGCTRL_REG, iface), ctrl);
 
-	for (i = 0; i < frame->can_dlc; i += 2) {
-		priv->write_reg(priv, C_CAN_IFACE(DATA1_REG, iface) + i / 2,
-				frame->data[i] | (frame->data[i + 1] << 8));
+	if (priv->type == BOSCH_D_CAN) {
+		u32 data = 0, dreg = C_CAN_IFACE(DATA1_REG, iface);
+
+		for (i = 0; i < frame->can_dlc; i += 4, dreg += 2) {
+			data = (u32)frame->data[i];
+			data |= (u32)frame->data[i + 1] << 8;
+			data |= (u32)frame->data[i + 2] << 16;
+			data |= (u32)frame->data[i + 3] << 24;
+			priv->write_reg32(priv, dreg, data);
+		}
+	} else {
+		for (i = 0; i < frame->can_dlc; i += 2) {
+			priv->write_reg(priv,
+					C_CAN_IFACE(DATA1_REG, iface) + i / 2,
+					frame->data[i] |
+					(frame->data[i + 1] << 8));
+		}
 	}
 }
 
@@ -402,10 +416,20 @@ static int c_can_read_msg_object(struct net_device *dev, int iface, u32 ctrl)
 	} else {
 		int i, dreg = C_CAN_IFACE(DATA1_REG, iface);
 
-		for (i = 0; i < frame->can_dlc; i += 2, dreg ++) {
-			data = priv->read_reg(priv, dreg);
-			frame->data[i] = data;
-			frame->data[i + 1] = data >> 8;
+		if (priv->type == BOSCH_D_CAN) {
+			for (i = 0; i < frame->can_dlc; i += 4, dreg += 2) {
+				data = priv->read_reg32(priv, dreg);
+				frame->data[i] = data;
+				frame->data[i + 1] = data >> 8;
+				frame->data[i + 2] = data >> 16;
+				frame->data[i + 3] = data >> 24;
+			}
+		} else {
+			for (i = 0; i < frame->can_dlc; i += 2, dreg++) {
+				data = priv->read_reg(priv, dreg);
+				frame->data[i] = data;
+				frame->data[i + 1] = data >> 8;
+			}
 		}
 	}
 
diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 7188137fa08e..e21f7cc5ae4d 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -828,6 +828,9 @@ static int can_validate(struct nlattr *tb[], struct nlattr *data[])
 	 * - control mode with CAN_CTRLMODE_FD set
 	 */
 
+	if (!data)
+		return 0;
+
 	if (data[IFLA_CAN_CTRLMODE]) {
 		struct can_ctrlmode *cm = nla_data(data[IFLA_CAN_CTRLMODE]);
 
@@ -1038,6 +1041,11 @@ static int can_newlink(struct net *src_net, struct net_device *dev,
 	return -EOPNOTSUPP;
 }
 
+static void can_dellink(struct net_device *dev, struct list_head *head)
+{
+	return;
+}
+
 static struct rtnl_link_ops can_link_ops __read_mostly = {
 	.kind		= "can",
 	.maxtype	= IFLA_CAN_MAX,
@@ -1046,6 +1054,7 @@ static struct rtnl_link_ops can_link_ops __read_mostly = {
 	.validate	= can_validate,
 	.newlink	= can_newlink,
 	.changelink	= can_changelink,
+	.dellink	= can_dellink,
 	.get_size	= can_get_size,
 	.fill_info	= can_fill_info,
 	.get_xstats_size = can_get_xstats_size,
diff --git a/drivers/net/can/usb/Kconfig b/drivers/net/can/usb/Kconfig
index bcb272f6c68a..8483a40e7e9e 100644
--- a/drivers/net/can/usb/Kconfig
+++ b/drivers/net/can/usb/Kconfig
@@ -16,7 +16,8 @@ config CAN_ESD_USB2
 config CAN_GS_USB
 	tristate "Geschwister Schneider UG interfaces"
 	---help---
-	  This driver supports the Geschwister Schneider USB/CAN devices.
+	  This driver supports the Geschwister Schneider and bytewerk.org
+	  candleLight USB CAN interfaces USB/CAN devices
 	  If unsure choose N,
 	  choose Y for built in support,
 	  M to compile as module (module will be named: gs_usb).
@@ -46,6 +47,8 @@ config CAN_KVASER_USB
 	    - Kvaser USBcan R
 	    - Kvaser Leaf Light v2
 	    - Kvaser Mini PCI Express HS
+	    - Kvaser Mini PCI Express 2xHS
+	    - Kvaser USBcan Light 2xHS
 	    - Kvaser USBcan II HS/HS
 	    - Kvaser USBcan II HS/LS
 	    - Kvaser USBcan Rugged ("USBcan Rev B")
diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
index 360764324c54..6f0cbc38782e 100644
--- a/drivers/net/can/usb/gs_usb.c
+++ b/drivers/net/can/usb/gs_usb.c
@@ -1,7 +1,9 @@
-/* CAN driver for Geschwister Schneider USB/CAN devices.
+/* CAN driver for Geschwister Schneider USB/CAN devices
+ * and bytewerk.org candleLight USB CAN interfaces.
  *
- * Copyright (C) 2013 Geschwister Schneider Technologie-,
+ * Copyright (C) 2013-2016 Geschwister Schneider Technologie-,
  * Entwicklungs- und Vertriebs UG (Haftungsbeschränkt).
+ * Copyright (C) 2016 Hubert Denkmair
  *
  * Many thanks to all socketcan devs!
  *
@@ -29,6 +31,9 @@
 #define USB_GSUSB_1_VENDOR_ID      0x1d50
 #define USB_GSUSB_1_PRODUCT_ID     0x606f
 
+#define USB_CANDLELIGHT_VENDOR_ID  0x1209
+#define USB_CANDLELIGHT_PRODUCT_ID 0x2323
+
 #define GSUSB_ENDPOINT_IN          1
 #define GSUSB_ENDPOINT_OUT         2
 
@@ -1009,6 +1014,8 @@ static void gs_usb_disconnect(struct usb_interface *intf)
 static const struct usb_device_id gs_usb_table[] = {
 	{ USB_DEVICE_INTERFACE_NUMBER(USB_GSUSB_1_VENDOR_ID,
 				      USB_GSUSB_1_PRODUCT_ID, 0) },
+	{ USB_DEVICE_INTERFACE_NUMBER(USB_CANDLELIGHT_VENDOR_ID,
+				      USB_CANDLELIGHT_PRODUCT_ID, 0) },
 	{} /* Terminating entry */
 };
 
@@ -1026,5 +1033,6 @@ module_usb_driver(gs_usb_driver);
 MODULE_AUTHOR("Maximilian Schneider <mws@schneidersoft.net>");
 MODULE_DESCRIPTION(
 "Socket CAN device driver for Geschwister Schneider Technologie-, "
-"Entwicklungs- und Vertriebs UG. USB2.0 to CAN interfaces.");
+"Entwicklungs- und Vertriebs UG. USB2.0 to CAN interfaces\n"
+"and bytewerk.org candleLight USB CAN interfaces.");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c
index 022bfa13ebfa..6f1f3b675ff5 100644
--- a/drivers/net/can/usb/kvaser_usb.c
+++ b/drivers/net/can/usb/kvaser_usb.c
@@ -59,11 +59,14 @@
 #define USB_CAN_R_PRODUCT_ID		39
 #define USB_LEAF_LITE_V2_PRODUCT_ID	288
 #define USB_MINI_PCIE_HS_PRODUCT_ID	289
+#define USB_LEAF_LIGHT_HS_V2_OEM_PRODUCT_ID 290
+#define USB_USBCAN_LIGHT_2HS_PRODUCT_ID	291
+#define USB_MINI_PCIE_2HS_PRODUCT_ID	292
 
 static inline bool kvaser_is_leaf(const struct usb_device_id *id)
 {
 	return id->idProduct >= USB_LEAF_DEVEL_PRODUCT_ID &&
-	       id->idProduct <= USB_MINI_PCIE_HS_PRODUCT_ID;
+	       id->idProduct <= USB_MINI_PCIE_2HS_PRODUCT_ID;
 }
 
 /* Kvaser USBCan-II devices */
@@ -537,6 +540,9 @@ static const struct usb_device_id kvaser_usb_table[] = {
 		.driver_info = KVASER_HAS_TXRX_ERRORS },
 	{ USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_V2_PRODUCT_ID) },
 	{ USB_DEVICE(KVASER_VENDOR_ID, USB_MINI_PCIE_HS_PRODUCT_ID) },
+	{ USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LIGHT_HS_V2_OEM_PRODUCT_ID) },
+	{ USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_LIGHT_2HS_PRODUCT_ID) },
+	{ USB_DEVICE(KVASER_VENDOR_ID, USB_MINI_PCIE_2HS_PRODUCT_ID) },
 
 	/* USBCANII family IDs */
 	{ USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN2_PRODUCT_ID),
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 444de66667b9..bda37d336736 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -1581,6 +1581,18 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.jumbo_pm_reg = B53_JUMBO_PORT_MASK,
 		.jumbo_size_reg = B53_JUMBO_MAX_SIZE,
 	},
+	{
+		.chip_id = BCM58XX_DEVICE_ID,
+		.dev_name = "BCM585xx/586xx/88312",
+		.vlans	= 4096,
+		.enabled_ports = 0x1ff,
+		.arl_entries = 4,
+		.cpu_port = B53_CPU_PORT_25,
+		.vta_regs = B53_VTA_REGS,
+		.duplex_reg = B53_DUPLEX_STAT_GE,
+		.jumbo_pm_reg = B53_JUMBO_PORT_MASK,
+		.jumbo_size_reg = B53_JUMBO_MAX_SIZE,
+	},
 };
 
 static int b53_switch_init(struct b53_device *dev)
diff --git a/drivers/net/dsa/b53/b53_mmap.c b/drivers/net/dsa/b53/b53_mmap.c
index b70daf9174d4..21f1068b0804 100644
--- a/drivers/net/dsa/b53/b53_mmap.c
+++ b/drivers/net/dsa/b53/b53_mmap.c
@@ -70,6 +70,8 @@ static int b53_mmap_read32(struct b53_device *dev, u8 page, u8 reg, u32 *val)
 
 static int b53_mmap_read48(struct b53_device *dev, u8 page, u8 reg, u64 *val)
 {
+	u8 __iomem *regs = dev->priv;
+
 	if (WARN_ON(reg % 2))
 		return -EINVAL;
 
@@ -77,16 +79,26 @@ static int b53_mmap_read48(struct b53_device *dev, u8 page, u8 reg, u64 *val)
 		u16 lo;
 		u32 hi;
 
-		b53_mmap_read16(dev, page, reg, &lo);
-		b53_mmap_read32(dev, page, reg + 2, &hi);
+		if (dev->pdata && dev->pdata->big_endian) {
+			lo = ioread16be(regs + (page << 8) + reg);
+			hi = ioread32be(regs + (page << 8) + reg + 2);
+		} else {
+			lo = readw(regs + (page << 8) + reg);
+			hi = readl(regs + (page << 8) + reg + 2);
+		}
 
 		*val = ((u64)hi << 16) | lo;
 	} else {
 		u32 lo;
 		u16 hi;
 
-		b53_mmap_read32(dev, page, reg, &lo);
-		b53_mmap_read16(dev, page, reg + 4, &hi);
+		if (dev->pdata && dev->pdata->big_endian) {
+			lo = ioread32be(regs + (page << 8) + reg);
+			hi = ioread16be(regs + (page << 8) + reg + 4);
+		} else {
+			lo = readl(regs + (page << 8) + reg);
+			hi = readw(regs + (page << 8) + reg + 4);
+		}
 
 		*val = ((u64)hi << 32) | lo;
 	}
@@ -96,13 +108,19 @@ static int b53_mmap_read48(struct b53_device *dev, u8 page, u8 reg, u64 *val)
 
 static int b53_mmap_read64(struct b53_device *dev, u8 page, u8 reg, u64 *val)
 {
+	u8 __iomem *regs = dev->priv;
 	u32 hi, lo;
 
 	if (WARN_ON(reg % 4))
 		return -EINVAL;
 
-	b53_mmap_read32(dev, page, reg, &lo);
-	b53_mmap_read32(dev, page, reg + 4, &hi);
+	if (dev->pdata && dev->pdata->big_endian) {
+		lo = ioread32be(regs + (page << 8) + reg);
+		hi = ioread32be(regs + (page << 8) + reg + 4);
+	} else {
+		lo = readl(regs + (page << 8) + reg);
+		hi = readl(regs + (page << 8) + reg + 4);
+	}
 
 	*val = ((u64)hi << 32) | lo;
 
diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h
index 5d8c602fb877..835a744f206e 100644
--- a/drivers/net/dsa/b53/b53_priv.h
+++ b/drivers/net/dsa/b53/b53_priv.h
@@ -59,6 +59,7 @@ enum {
 	BCM53012_DEVICE_ID = 0x53012,
 	BCM53018_DEVICE_ID = 0x53018,
 	BCM53019_DEVICE_ID = 0x53019,
+	BCM58XX_DEVICE_ID = 0x5800,
 };
 
 #define B53_N_PORTS	9
diff --git a/drivers/net/dsa/b53/b53_srab.c b/drivers/net/dsa/b53/b53_srab.c
index 70fd47284535..3e2d4a5fcd5a 100644
--- a/drivers/net/dsa/b53/b53_srab.c
+++ b/drivers/net/dsa/b53/b53_srab.c
@@ -21,6 +21,7 @@
 #include <linux/delay.h>
 #include <linux/platform_device.h>
 #include <linux/platform_data/b53.h>
+#include <linux/of.h>
 
 #include "b53_priv.h"
 
@@ -356,12 +357,45 @@ static struct b53_io_ops b53_srab_ops = {
 	.write64 = b53_srab_write64,
 };
 
+static const struct of_device_id b53_srab_of_match[] = {
+	{ .compatible = "brcm,bcm53010-srab" },
+	{ .compatible = "brcm,bcm53011-srab" },
+	{ .compatible = "brcm,bcm53012-srab" },
+	{ .compatible = "brcm,bcm53018-srab" },
+	{ .compatible = "brcm,bcm53019-srab" },
+	{ .compatible = "brcm,bcm5301x-srab" },
+	{ .compatible = "brcm,bcm58522-srab", .data = (void *)BCM58XX_DEVICE_ID },
+	{ .compatible = "brcm,bcm58525-srab", .data = (void *)BCM58XX_DEVICE_ID },
+	{ .compatible = "brcm,bcm58535-srab", .data = (void *)BCM58XX_DEVICE_ID },
+	{ .compatible = "brcm,bcm58622-srab", .data = (void *)BCM58XX_DEVICE_ID },
+	{ .compatible = "brcm,bcm58623-srab", .data = (void *)BCM58XX_DEVICE_ID },
+	{ .compatible = "brcm,bcm58625-srab", .data = (void *)BCM58XX_DEVICE_ID },
+	{ .compatible = "brcm,bcm88312-srab", .data = (void *)BCM58XX_DEVICE_ID },
+	{ .compatible = "brcm,nsp-srab", .data = (void *)BCM58XX_DEVICE_ID },
+	{ /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, b53_srab_of_match);
+
 static int b53_srab_probe(struct platform_device *pdev)
 {
+	struct b53_platform_data *pdata = pdev->dev.platform_data;
+	struct device_node *dn = pdev->dev.of_node;
+	const struct of_device_id *of_id = NULL;
 	struct b53_srab_priv *priv;
 	struct b53_device *dev;
 	struct resource *r;
 
+	if (dn)
+		of_id = of_match_node(b53_srab_of_match, dn);
+
+	if (of_id) {
+		pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
+		if (!pdata)
+			return -ENOMEM;
+
+		pdata->chip_id = (u32)(unsigned long)of_id->data;
+	}
+
 	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
@@ -375,6 +409,9 @@ static int b53_srab_probe(struct platform_device *pdev)
 	if (!dev)
 		return -ENOMEM;
 
+	if (pdata)
+		dev->pdata = pdata;
+
 	platform_set_drvdata(pdev, dev);
 
 	return b53_switch_register(dev);
@@ -390,16 +427,6 @@ static int b53_srab_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static const struct of_device_id b53_srab_of_match[] = {
-	{ .compatible = "brcm,bcm53010-srab" },
-	{ .compatible = "brcm,bcm53011-srab" },
-	{ .compatible = "brcm,bcm53012-srab" },
-	{ .compatible = "brcm,bcm53018-srab" },
-	{ .compatible = "brcm,bcm53019-srab" },
-	{ .compatible = "brcm,bcm5301x-srab" },
-	{ /* sentinel */ },
-};
-
 static struct platform_driver b53_srab_driver = {
 	.probe = b53_srab_probe,
 	.remove = b53_srab_remove,
diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c
index e0fb0f1122db..20760e10211a 100644
--- a/drivers/net/ethernet/amd/au1000_eth.c
+++ b/drivers/net/ethernet/amd/au1000_eth.c
@@ -509,8 +509,8 @@ static int au1000_mii_probe(struct net_device *dev)
 	 * on the current MAC's MII bus
 	 */
 	for (phy_addr = 0; phy_addr < PHY_MAX_ADDR; phy_addr++)
-		if (mdiobus_get_phy(aup->mii_bus, aup->phy_addr)) {
-			phydev = mdiobus_get_phy(aup->mii_bus, aup->phy_addr);
+		if (mdiobus_get_phy(aup->mii_bus, phy_addr)) {
+			phydev = mdiobus_get_phy(aup->mii_bus, phy_addr);
 			if (!aup->phy_search_highest_addr)
 				/* break out with first one found */
 				break;
diff --git a/drivers/net/ethernet/arc/emac.h b/drivers/net/ethernet/arc/emac.h
index ca562bc034c3..e4feb712d4f2 100644
--- a/drivers/net/ethernet/arc/emac.h
+++ b/drivers/net/ethernet/arc/emac.h
@@ -134,7 +134,6 @@ struct arc_emac_priv {
 
 	/* Devices */
 	struct device *dev;
-	struct phy_device *phy_dev;
 	struct mii_bus *bus;
 	struct arc_emac_mdio_bus_data bus_data;
 
diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c
index a3a9392a4954..586bedac457d 100644
--- a/drivers/net/ethernet/arc/emac_main.c
+++ b/drivers/net/ethernet/arc/emac_main.c
@@ -47,7 +47,7 @@ static inline int arc_emac_tx_avail(struct arc_emac_priv *priv)
 static void arc_emac_adjust_link(struct net_device *ndev)
 {
 	struct arc_emac_priv *priv = netdev_priv(ndev);
-	struct phy_device *phy_dev = priv->phy_dev;
+	struct phy_device *phy_dev = ndev->phydev;
 	unsigned int reg, state_changed = 0;
 
 	if (priv->link != phy_dev->link) {
@@ -80,46 +80,6 @@ static void arc_emac_adjust_link(struct net_device *ndev)
 }
 
 /**
- * arc_emac_get_settings - Get PHY settings.
- * @ndev:	Pointer to net_device structure.
- * @cmd:	Pointer to ethtool_cmd structure.
- *
- * This implements ethtool command for getting PHY settings. If PHY could
- * not be found, the function returns -ENODEV. This function calls the
- * relevant PHY ethtool API to get the PHY settings.
- * Issue "ethtool ethX" under linux prompt to execute this function.
- */
-static int arc_emac_get_settings(struct net_device *ndev,
-				 struct ethtool_cmd *cmd)
-{
-	struct arc_emac_priv *priv = netdev_priv(ndev);
-
-	return phy_ethtool_gset(priv->phy_dev, cmd);
-}
-
-/**
- * arc_emac_set_settings - Set PHY settings as passed in the argument.
- * @ndev:	Pointer to net_device structure.
- * @cmd:	Pointer to ethtool_cmd structure.
- *
- * This implements ethtool command for setting various PHY settings. If PHY
- * could not be found, the function returns -ENODEV. This function calls the
- * relevant PHY ethtool API to set the PHY.
- * Issue e.g. "ethtool -s ethX speed 1000" under linux prompt to execute this
- * function.
- */
-static int arc_emac_set_settings(struct net_device *ndev,
-				 struct ethtool_cmd *cmd)
-{
-	struct arc_emac_priv *priv = netdev_priv(ndev);
-
-	if (!capable(CAP_NET_ADMIN))
-		return -EPERM;
-
-	return phy_ethtool_sset(priv->phy_dev, cmd);
-}
-
-/**
  * arc_emac_get_drvinfo - Get EMAC driver information.
  * @ndev:	Pointer to net_device structure.
  * @info:	Pointer to ethtool_drvinfo structure.
@@ -137,10 +97,10 @@ static void arc_emac_get_drvinfo(struct net_device *ndev,
 }
 
 static const struct ethtool_ops arc_emac_ethtool_ops = {
-	.get_settings	= arc_emac_get_settings,
-	.set_settings	= arc_emac_set_settings,
 	.get_drvinfo	= arc_emac_get_drvinfo,
 	.get_link	= ethtool_op_get_link,
+	.get_link_ksettings = phy_ethtool_get_link_ksettings,
+	.set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
 
 #define FIRST_OR_LAST_MASK	(FIRST_MASK | LAST_MASK)
@@ -403,7 +363,7 @@ static void arc_emac_poll_controller(struct net_device *dev)
 static int arc_emac_open(struct net_device *ndev)
 {
 	struct arc_emac_priv *priv = netdev_priv(ndev);
-	struct phy_device *phy_dev = priv->phy_dev;
+	struct phy_device *phy_dev = ndev->phydev;
 	int i;
 
 	phy_dev->autoneg = AUTONEG_ENABLE;
@@ -474,7 +434,7 @@ static int arc_emac_open(struct net_device *ndev)
 	/* Enable EMAC */
 	arc_reg_or(priv, R_CTRL, EN_MASK);
 
-	phy_start_aneg(priv->phy_dev);
+	phy_start_aneg(ndev->phydev);
 
 	netif_start_queue(ndev);
 
@@ -772,6 +732,7 @@ int arc_emac_probe(struct net_device *ndev, int interface)
 	struct device *dev = ndev->dev.parent;
 	struct resource res_regs;
 	struct device_node *phy_node;
+	struct phy_device *phydev = NULL;
 	struct arc_emac_priv *priv;
 	const char *mac_addr;
 	unsigned int id, clock_frequency, irq;
@@ -887,16 +848,16 @@ int arc_emac_probe(struct net_device *ndev, int interface)
 		goto out_clken;
 	}
 
-	priv->phy_dev = of_phy_connect(ndev, phy_node, arc_emac_adjust_link, 0,
-				       interface);
-	if (!priv->phy_dev) {
+	phydev = of_phy_connect(ndev, phy_node, arc_emac_adjust_link, 0,
+				interface);
+	if (!phydev) {
 		dev_err(dev, "of_phy_connect() failed\n");
 		err = -ENODEV;
 		goto out_mdio;
 	}
 
 	dev_info(dev, "connected to %s phy with id 0x%x\n",
-		 priv->phy_dev->drv->name, priv->phy_dev->phy_id);
+		 phydev->drv->name, phydev->phy_id);
 
 	netif_napi_add(ndev, &priv->napi, arc_emac_poll, ARC_EMAC_NAPI_WEIGHT);
 
@@ -910,8 +871,7 @@ int arc_emac_probe(struct net_device *ndev, int interface)
 
 out_netif_api:
 	netif_napi_del(&priv->napi);
-	phy_disconnect(priv->phy_dev);
-	priv->phy_dev = NULL;
+	phy_disconnect(phydev);
 out_mdio:
 	arc_mdio_remove(priv);
 out_clken:
@@ -925,8 +885,7 @@ int arc_emac_remove(struct net_device *ndev)
 {
 	struct arc_emac_priv *priv = netdev_priv(ndev);
 
-	phy_disconnect(priv->phy_dev);
-	priv->phy_dev = NULL;
+	phy_disconnect(ndev->phydev);
 	arc_mdio_remove(priv);
 	unregister_netdev(ndev);
 	netif_napi_del(&priv->napi);
diff --git a/drivers/net/ethernet/atheros/alx/alx.h b/drivers/net/ethernet/atheros/alx/alx.h
index d02c4240b7df..8fc93c5f6abc 100644
--- a/drivers/net/ethernet/atheros/alx/alx.h
+++ b/drivers/net/ethernet/atheros/alx/alx.h
@@ -96,10 +96,6 @@ struct alx_priv {
 	unsigned int rx_ringsz;
 	unsigned int rxbuf_size;
 
-	struct page  *rx_page;
-	unsigned int rx_page_offset;
-	unsigned int rx_frag_size;
-
 	struct napi_struct napi;
 	struct alx_tx_queue txq;
 	struct alx_rx_queue rxq;
diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index c98acdc0d14f..e708e360a9e3 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -70,35 +70,6 @@ static void alx_free_txbuf(struct alx_priv *alx, int entry)
 	}
 }
 
-static struct sk_buff *alx_alloc_skb(struct alx_priv *alx, gfp_t gfp)
-{
-	struct sk_buff *skb;
-	struct page *page;
-
-	if (alx->rx_frag_size > PAGE_SIZE)
-		return __netdev_alloc_skb(alx->dev, alx->rxbuf_size, gfp);
-
-	page = alx->rx_page;
-	if (!page) {
-		alx->rx_page = page = alloc_page(gfp);
-		if (unlikely(!page))
-			return NULL;
-		alx->rx_page_offset = 0;
-	}
-
-	skb = build_skb(page_address(page) + alx->rx_page_offset,
-			alx->rx_frag_size);
-	if (likely(skb)) {
-		alx->rx_page_offset += alx->rx_frag_size;
-		if (alx->rx_page_offset >= PAGE_SIZE)
-			alx->rx_page = NULL;
-		else
-			get_page(page);
-	}
-	return skb;
-}
-
-
 static int alx_refill_rx_ring(struct alx_priv *alx, gfp_t gfp)
 {
 	struct alx_rx_queue *rxq = &alx->rxq;
@@ -115,9 +86,22 @@ static int alx_refill_rx_ring(struct alx_priv *alx, gfp_t gfp)
 	while (!cur_buf->skb && next != rxq->read_idx) {
 		struct alx_rfd *rfd = &rxq->rfd[cur];
 
-		skb = alx_alloc_skb(alx, gfp);
+		/*
+		 * When DMA RX address is set to something like
+		 * 0x....fc0, it will be very likely to cause DMA
+		 * RFD overflow issue.
+		 *
+		 * To work around it, we apply rx skb with 64 bytes
+		 * longer space, and offset the address whenever
+		 * 0x....fc0 is detected.
+		 */
+		skb = __netdev_alloc_skb(alx->dev, alx->rxbuf_size + 64, gfp);
 		if (!skb)
 			break;
+
+		if (((unsigned long)skb->data & 0xfff) == 0xfc0)
+			skb_reserve(skb, 64);
+
 		dma = dma_map_single(&alx->hw.pdev->dev,
 				     skb->data, alx->rxbuf_size,
 				     DMA_FROM_DEVICE);
@@ -153,7 +137,6 @@ static int alx_refill_rx_ring(struct alx_priv *alx, gfp_t gfp)
 		alx_write_mem16(&alx->hw, ALX_RFD_PIDX, cur);
 	}
 
-
 	return count;
 }
 
@@ -622,11 +605,6 @@ static void alx_free_rings(struct alx_priv *alx)
 	kfree(alx->txq.bufs);
 	kfree(alx->rxq.bufs);
 
-	if (alx->rx_page) {
-		put_page(alx->rx_page);
-		alx->rx_page = NULL;
-	}
-
 	dma_free_coherent(&alx->hw.pdev->dev,
 			  alx->descmem.size,
 			  alx->descmem.virt,
@@ -681,7 +659,6 @@ static int alx_request_irq(struct alx_priv *alx)
 				  alx->dev->name, alx);
 		if (!err)
 			goto out;
-
 		/* fall back to legacy interrupt */
 		pci_disable_msi(alx->hw.pdev);
 	}
@@ -725,7 +702,6 @@ static int alx_init_sw(struct alx_priv *alx)
 	struct pci_dev *pdev = alx->hw.pdev;
 	struct alx_hw *hw = &alx->hw;
 	int err;
-	unsigned int head_size;
 
 	err = alx_identify_hw(alx);
 	if (err) {
@@ -741,12 +717,7 @@ static int alx_init_sw(struct alx_priv *alx)
 
 	hw->smb_timer = 400;
 	hw->mtu = alx->dev->mtu;
-
 	alx->rxbuf_size = ALX_MAX_FRAME_LEN(hw->mtu);
-	head_size = SKB_DATA_ALIGN(alx->rxbuf_size + NET_SKB_PAD) +
-		    SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
-	alx->rx_frag_size = roundup_pow_of_two(head_size);
-
 	alx->tx_ringsz = 256;
 	alx->rx_ringsz = 512;
 	hw->imt = 200;
@@ -848,7 +819,6 @@ static int alx_change_mtu(struct net_device *netdev, int mtu)
 {
 	struct alx_priv *alx = netdev_priv(netdev);
 	int max_frame = ALX_MAX_FRAME_LEN(mtu);
-	unsigned int head_size;
 
 	if ((max_frame < ALX_MIN_FRAME_SIZE) ||
 	    (max_frame > ALX_MAX_FRAME_SIZE))
@@ -860,9 +830,6 @@ static int alx_change_mtu(struct net_device *netdev, int mtu)
 	netdev->mtu = mtu;
 	alx->hw.mtu = mtu;
 	alx->rxbuf_size = max(max_frame, ALX_DEF_RXBUF_SIZE);
-	head_size = SKB_DATA_ALIGN(alx->rxbuf_size + NET_SKB_PAD) +
-		    SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
-	alx->rx_frag_size = roundup_pow_of_two(head_size);
 	netdev_update_features(netdev);
 	if (netif_running(netdev))
 		alx_reinit(alx);
diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig
index d74a92e1c27d..bd8c80c0b71c 100644
--- a/drivers/net/ethernet/broadcom/Kconfig
+++ b/drivers/net/ethernet/broadcom/Kconfig
@@ -140,10 +140,18 @@ config BNX2X_SRIOV
 	  allows for virtual function acceleration in virtual environments.
 
 config BGMAC
-	tristate "BCMA bus GBit core support"
+	tristate
+	help
+	  This enables the integrated ethernet controller support for many
+	  Broadcom (mostly iProc) SoCs. An appropriate bus interface driver
+	  needs to be enabled to select this.
+
+config BGMAC_BCMA
+	tristate "Broadcom iProc GBit BCMA support"
 	depends on BCMA && BCMA_HOST_SOC
 	depends on HAS_DMA
 	depends on BCM47XX || ARCH_BCM_5301X || COMPILE_TEST
+	select BGMAC
 	select PHYLIB
 	select FIXED_PHY
 	---help---
@@ -152,6 +160,19 @@ config BGMAC
 	  In case of using this driver on BCM4706 it's also requires to enable
 	  BCMA_DRIVER_GMAC_CMN to make it work.
 
+config BGMAC_PLATFORM
+	tristate "Broadcom iProc GBit platform support"
+	depends on HAS_DMA
+	depends on ARCH_BCM_IPROC || COMPILE_TEST
+	depends on OF
+	select BGMAC
+	select PHYLIB
+	select FIXED_PHY
+	default ARCH_BCM_IPROC
+	---help---
+	  Say Y here if you want to use the Broadcom iProc Gigabit Ethernet
+	  controller through the generic platform interface
+
 config SYSTEMPORT
 	tristate "Broadcom SYSTEMPORT internal MAC support"
 	depends on OF
diff --git a/drivers/net/ethernet/broadcom/Makefile b/drivers/net/ethernet/broadcom/Makefile
index 00584d78b3e0..79f2372c66ec 100644
--- a/drivers/net/ethernet/broadcom/Makefile
+++ b/drivers/net/ethernet/broadcom/Makefile
@@ -11,5 +11,7 @@ obj-$(CONFIG_BNX2X) += bnx2x/
 obj-$(CONFIG_SB1250_MAC) += sb1250-mac.o
 obj-$(CONFIG_TIGON3) += tg3.o
 obj-$(CONFIG_BGMAC) += bgmac.o
+obj-$(CONFIG_BGMAC_BCMA) += bgmac-bcma.o bgmac-bcma-mdio.o
+obj-$(CONFIG_BGMAC_PLATFORM) += bgmac-platform.o
 obj-$(CONFIG_SYSTEMPORT) += bcmsysport.o
 obj-$(CONFIG_BNXT) += bnxt/
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index 834afbb51aff..b2d30863caeb 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -370,7 +370,7 @@ static void bcm_sysport_get_stats(struct net_device *dev,
 		else
 			p = (char *)priv;
 		p += s->stat_offset;
-		data[i] = *(u32 *)p;
+		data[i] = *(unsigned long *)p;
 	}
 }
 
diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma-mdio.c b/drivers/net/ethernet/broadcom/bgmac-bcma-mdio.c
new file mode 100644
index 000000000000..7c19c8e2bf91
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bgmac-bcma-mdio.c
@@ -0,0 +1,266 @@
+/*
+ * Driver for (BCM4706)? GBit MAC core on BCMA bus.
+ *
+ * Copyright (C) 2012 Rafał Miłecki <zajec5@gmail.com>
+ *
+ * Licensed under the GNU/GPL. See COPYING for details.
+ */
+
+#define pr_fmt(fmt)		KBUILD_MODNAME ": " fmt
+
+#include <linux/bcma/bcma.h>
+#include <linux/brcmphy.h>
+#include "bgmac.h"
+
+struct bcma_mdio {
+	struct bcma_device *core;
+	u8 phyaddr;
+};
+
+static bool bcma_mdio_wait_value(struct bcma_device *core, u16 reg, u32 mask,
+				 u32 value, int timeout)
+{
+	u32 val;
+	int i;
+
+	for (i = 0; i < timeout / 10; i++) {
+		val = bcma_read32(core, reg);
+		if ((val & mask) == value)
+			return true;
+		udelay(10);
+	}
+	dev_err(&core->dev, "Timeout waiting for reg 0x%X\n", reg);
+	return false;
+}
+
+/**************************************************
+ * PHY ops
+ **************************************************/
+
+static u16 bcma_mdio_phy_read(struct bcma_mdio *bcma_mdio, u8 phyaddr, u8 reg)
+{
+	struct bcma_device *core;
+	u16 phy_access_addr;
+	u16 phy_ctl_addr;
+	u32 tmp;
+
+	BUILD_BUG_ON(BGMAC_PA_DATA_MASK != BCMA_GMAC_CMN_PA_DATA_MASK);
+	BUILD_BUG_ON(BGMAC_PA_ADDR_MASK != BCMA_GMAC_CMN_PA_ADDR_MASK);
+	BUILD_BUG_ON(BGMAC_PA_ADDR_SHIFT != BCMA_GMAC_CMN_PA_ADDR_SHIFT);
+	BUILD_BUG_ON(BGMAC_PA_REG_MASK != BCMA_GMAC_CMN_PA_REG_MASK);
+	BUILD_BUG_ON(BGMAC_PA_REG_SHIFT != BCMA_GMAC_CMN_PA_REG_SHIFT);
+	BUILD_BUG_ON(BGMAC_PA_WRITE != BCMA_GMAC_CMN_PA_WRITE);
+	BUILD_BUG_ON(BGMAC_PA_START != BCMA_GMAC_CMN_PA_START);
+	BUILD_BUG_ON(BGMAC_PC_EPA_MASK != BCMA_GMAC_CMN_PC_EPA_MASK);
+	BUILD_BUG_ON(BGMAC_PC_MCT_MASK != BCMA_GMAC_CMN_PC_MCT_MASK);
+	BUILD_BUG_ON(BGMAC_PC_MCT_SHIFT != BCMA_GMAC_CMN_PC_MCT_SHIFT);
+	BUILD_BUG_ON(BGMAC_PC_MTE != BCMA_GMAC_CMN_PC_MTE);
+
+	if (bcma_mdio->core->id.id == BCMA_CORE_4706_MAC_GBIT) {
+		core = bcma_mdio->core->bus->drv_gmac_cmn.core;
+		phy_access_addr = BCMA_GMAC_CMN_PHY_ACCESS;
+		phy_ctl_addr = BCMA_GMAC_CMN_PHY_CTL;
+	} else {
+		core = bcma_mdio->core;
+		phy_access_addr = BGMAC_PHY_ACCESS;
+		phy_ctl_addr = BGMAC_PHY_CNTL;
+	}
+
+	tmp = bcma_read32(core, phy_ctl_addr);
+	tmp &= ~BGMAC_PC_EPA_MASK;
+	tmp |= phyaddr;
+	bcma_write32(core, phy_ctl_addr, tmp);
+
+	tmp = BGMAC_PA_START;
+	tmp |= phyaddr << BGMAC_PA_ADDR_SHIFT;
+	tmp |= reg << BGMAC_PA_REG_SHIFT;
+	bcma_write32(core, phy_access_addr, tmp);
+
+	if (!bcma_mdio_wait_value(core, phy_access_addr, BGMAC_PA_START, 0,
+				  1000)) {
+		dev_err(&core->dev, "Reading PHY %d register 0x%X failed\n",
+			phyaddr, reg);
+		return 0xffff;
+	}
+
+	return bcma_read32(core, phy_access_addr) & BGMAC_PA_DATA_MASK;
+}
+
+/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipphywr */
+static int bcma_mdio_phy_write(struct bcma_mdio *bcma_mdio, u8 phyaddr, u8 reg,
+			       u16 value)
+{
+	struct bcma_device *core;
+	u16 phy_access_addr;
+	u16 phy_ctl_addr;
+	u32 tmp;
+
+	if (bcma_mdio->core->id.id == BCMA_CORE_4706_MAC_GBIT) {
+		core = bcma_mdio->core->bus->drv_gmac_cmn.core;
+		phy_access_addr = BCMA_GMAC_CMN_PHY_ACCESS;
+		phy_ctl_addr = BCMA_GMAC_CMN_PHY_CTL;
+	} else {
+		core = bcma_mdio->core;
+		phy_access_addr = BGMAC_PHY_ACCESS;
+		phy_ctl_addr = BGMAC_PHY_CNTL;
+	}
+
+	tmp = bcma_read32(core, phy_ctl_addr);
+	tmp &= ~BGMAC_PC_EPA_MASK;
+	tmp |= phyaddr;
+	bcma_write32(core, phy_ctl_addr, tmp);
+
+	bcma_write32(bcma_mdio->core, BGMAC_INT_STATUS, BGMAC_IS_MDIO);
+	if (bcma_read32(bcma_mdio->core, BGMAC_INT_STATUS) & BGMAC_IS_MDIO)
+		dev_warn(&core->dev, "Error setting MDIO int\n");
+
+	tmp = BGMAC_PA_START;
+	tmp |= BGMAC_PA_WRITE;
+	tmp |= phyaddr << BGMAC_PA_ADDR_SHIFT;
+	tmp |= reg << BGMAC_PA_REG_SHIFT;
+	tmp |= value;
+	bcma_write32(core, phy_access_addr, tmp);
+
+	if (!bcma_mdio_wait_value(core, phy_access_addr, BGMAC_PA_START, 0,
+				  1000)) {
+		dev_err(&core->dev, "Writing to PHY %d register 0x%X failed\n",
+			phyaddr, reg);
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipphyinit */
+static void bcma_mdio_phy_init(struct bcma_mdio *bcma_mdio)
+{
+	struct bcma_chipinfo *ci = &bcma_mdio->core->bus->chipinfo;
+	u8 i;
+
+	if (ci->id == BCMA_CHIP_ID_BCM5356) {
+		for (i = 0; i < 5; i++) {
+			bcma_mdio_phy_write(bcma_mdio, i, 0x1f, 0x008b);
+			bcma_mdio_phy_write(bcma_mdio, i, 0x15, 0x0100);
+			bcma_mdio_phy_write(bcma_mdio, i, 0x1f, 0x000f);
+			bcma_mdio_phy_write(bcma_mdio, i, 0x12, 0x2aaa);
+			bcma_mdio_phy_write(bcma_mdio, i, 0x1f, 0x000b);
+		}
+	}
+	if ((ci->id == BCMA_CHIP_ID_BCM5357 && ci->pkg != 10) ||
+	    (ci->id == BCMA_CHIP_ID_BCM4749 && ci->pkg != 10) ||
+	    (ci->id == BCMA_CHIP_ID_BCM53572 && ci->pkg != 9)) {
+		struct bcma_drv_cc *cc = &bcma_mdio->core->bus->drv_cc;
+
+		bcma_chipco_chipctl_maskset(cc, 2, ~0xc0000000, 0);
+		bcma_chipco_chipctl_maskset(cc, 4, ~0x80000000, 0);
+		for (i = 0; i < 5; i++) {
+			bcma_mdio_phy_write(bcma_mdio, i, 0x1f, 0x000f);
+			bcma_mdio_phy_write(bcma_mdio, i, 0x16, 0x5284);
+			bcma_mdio_phy_write(bcma_mdio, i, 0x1f, 0x000b);
+			bcma_mdio_phy_write(bcma_mdio, i, 0x17, 0x0010);
+			bcma_mdio_phy_write(bcma_mdio, i, 0x1f, 0x000f);
+			bcma_mdio_phy_write(bcma_mdio, i, 0x16, 0x5296);
+			bcma_mdio_phy_write(bcma_mdio, i, 0x17, 0x1073);
+			bcma_mdio_phy_write(bcma_mdio, i, 0x17, 0x9073);
+			bcma_mdio_phy_write(bcma_mdio, i, 0x16, 0x52b6);
+			bcma_mdio_phy_write(bcma_mdio, i, 0x17, 0x9273);
+			bcma_mdio_phy_write(bcma_mdio, i, 0x1f, 0x000b);
+		}
+	}
+}
+
+/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipphyreset */
+static int bcma_mdio_phy_reset(struct mii_bus *bus)
+{
+	struct bcma_mdio *bcma_mdio = bus->priv;
+	u8 phyaddr = bcma_mdio->phyaddr;
+
+	if (bcma_mdio->phyaddr == BGMAC_PHY_NOREGS)
+		return 0;
+
+	bcma_mdio_phy_write(bcma_mdio, phyaddr, MII_BMCR, BMCR_RESET);
+	udelay(100);
+	if (bcma_mdio_phy_read(bcma_mdio, phyaddr, MII_BMCR) & BMCR_RESET)
+		dev_err(&bcma_mdio->core->dev, "PHY reset failed\n");
+	bcma_mdio_phy_init(bcma_mdio);
+
+	return 0;
+}
+
+/**************************************************
+ * MII
+ **************************************************/
+
+static int bcma_mdio_mii_read(struct mii_bus *bus, int mii_id, int regnum)
+{
+	return bcma_mdio_phy_read(bus->priv, mii_id, regnum);
+}
+
+static int bcma_mdio_mii_write(struct mii_bus *bus, int mii_id, int regnum,
+			       u16 value)
+{
+	return bcma_mdio_phy_write(bus->priv, mii_id, regnum, value);
+}
+
+struct mii_bus *bcma_mdio_mii_register(struct bcma_device *core, u8 phyaddr)
+{
+	struct bcma_mdio *bcma_mdio;
+	struct mii_bus *mii_bus;
+	int err;
+
+	bcma_mdio = kzalloc(sizeof(*bcma_mdio), GFP_KERNEL);
+	if (!bcma_mdio)
+		return ERR_PTR(-ENOMEM);
+
+	mii_bus = mdiobus_alloc();
+	if (!mii_bus) {
+		err = -ENOMEM;
+		goto err;
+	}
+
+	mii_bus->name = "bcma_mdio mii bus";
+	sprintf(mii_bus->id, "%s-%d-%d", "bcma_mdio", core->bus->num,
+		core->core_unit);
+	mii_bus->priv = bcma_mdio;
+	mii_bus->read = bcma_mdio_mii_read;
+	mii_bus->write = bcma_mdio_mii_write;
+	mii_bus->reset = bcma_mdio_phy_reset;
+	mii_bus->parent = &core->dev;
+	mii_bus->phy_mask = ~(1 << phyaddr);
+
+	bcma_mdio->core = core;
+	bcma_mdio->phyaddr = phyaddr;
+
+	err = mdiobus_register(mii_bus);
+	if (err) {
+		dev_err(&core->dev, "Registration of mii bus failed\n");
+		goto err_free_bus;
+	}
+
+	return mii_bus;
+
+err_free_bus:
+	mdiobus_free(mii_bus);
+err:
+	kfree(bcma_mdio);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(bcma_mdio_mii_register);
+
+void bcma_mdio_mii_unregister(struct mii_bus *mii_bus)
+{
+	struct bcma_mdio *bcma_mdio;
+
+	if (!mii_bus)
+		return;
+
+	bcma_mdio = mii_bus->priv;
+
+	mdiobus_unregister(mii_bus);
+	mdiobus_free(mii_bus);
+	kfree(bcma_mdio);
+}
+EXPORT_SYMBOL_GPL(bcma_mdio_mii_unregister);
+
+MODULE_AUTHOR("Rafał Miłecki");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma.c b/drivers/net/ethernet/broadcom/bgmac-bcma.c
new file mode 100644
index 000000000000..9a9745c4047c
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bgmac-bcma.c
@@ -0,0 +1,315 @@
+/*
+ * Driver for (BCM4706)? GBit MAC core on BCMA bus.
+ *
+ * Copyright (C) 2012 Rafał Miłecki <zajec5@gmail.com>
+ *
+ * Licensed under the GNU/GPL. See COPYING for details.
+ */
+
+#define pr_fmt(fmt)		KBUILD_MODNAME ": " fmt
+
+#include <linux/bcma/bcma.h>
+#include <linux/brcmphy.h>
+#include <linux/etherdevice.h>
+#include "bgmac.h"
+
+static inline bool bgmac_is_bcm4707_family(struct bcma_device *core)
+{
+	switch (core->bus->chipinfo.id) {
+	case BCMA_CHIP_ID_BCM4707:
+	case BCMA_CHIP_ID_BCM47094:
+	case BCMA_CHIP_ID_BCM53018:
+		return true;
+	default:
+		return false;
+	}
+}
+
+/**************************************************
+ * BCMA bus ops
+ **************************************************/
+
+static u32 bcma_bgmac_read(struct bgmac *bgmac, u16 offset)
+{
+	return bcma_read32(bgmac->bcma.core, offset);
+}
+
+static void bcma_bgmac_write(struct bgmac *bgmac, u16 offset, u32 value)
+{
+	bcma_write32(bgmac->bcma.core, offset, value);
+}
+
+static u32 bcma_bgmac_idm_read(struct bgmac *bgmac, u16 offset)
+{
+	return bcma_aread32(bgmac->bcma.core, offset);
+}
+
+static void bcma_bgmac_idm_write(struct bgmac *bgmac, u16 offset, u32 value)
+{
+	return bcma_awrite32(bgmac->bcma.core, offset, value);
+}
+
+static bool bcma_bgmac_clk_enabled(struct bgmac *bgmac)
+{
+	return bcma_core_is_enabled(bgmac->bcma.core);
+}
+
+static void bcma_bgmac_clk_enable(struct bgmac *bgmac, u32 flags)
+{
+	bcma_core_enable(bgmac->bcma.core, flags);
+}
+
+static void bcma_bgmac_cco_ctl_maskset(struct bgmac *bgmac, u32 offset,
+				       u32 mask, u32 set)
+{
+	struct bcma_drv_cc *cc = &bgmac->bcma.core->bus->drv_cc;
+
+	bcma_chipco_chipctl_maskset(cc, offset, mask, set);
+}
+
+static u32 bcma_bgmac_get_bus_clock(struct bgmac *bgmac)
+{
+	struct bcma_drv_cc *cc = &bgmac->bcma.core->bus->drv_cc;
+
+	return bcma_pmu_get_bus_clock(cc);
+}
+
+static void bcma_bgmac_cmn_maskset32(struct bgmac *bgmac, u16 offset, u32 mask,
+				     u32 set)
+{
+	bcma_maskset32(bgmac->bcma.cmn, offset, mask, set);
+}
+
+static const struct bcma_device_id bgmac_bcma_tbl[] = {
+	BCMA_CORE(BCMA_MANUF_BCM, BCMA_CORE_4706_MAC_GBIT,
+		  BCMA_ANY_REV, BCMA_ANY_CLASS),
+	BCMA_CORE(BCMA_MANUF_BCM, BCMA_CORE_MAC_GBIT, BCMA_ANY_REV,
+		  BCMA_ANY_CLASS),
+	{},
+};
+MODULE_DEVICE_TABLE(bcma, bgmac_bcma_tbl);
+
+/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipattach */
+static int bgmac_probe(struct bcma_device *core)
+{
+	struct ssb_sprom *sprom = &core->bus->sprom;
+	struct mii_bus *mii_bus;
+	struct bgmac *bgmac;
+	u8 *mac;
+	int err;
+
+	bgmac = kzalloc(sizeof(*bgmac), GFP_KERNEL);
+	if (!bgmac)
+		return -ENOMEM;
+
+	bgmac->bcma.core = core;
+	bgmac->dev = &core->dev;
+	bgmac->dma_dev = core->dma_dev;
+	bgmac->irq = core->irq;
+
+	bcma_set_drvdata(core, bgmac);
+
+	switch (core->core_unit) {
+	case 0:
+		mac = sprom->et0mac;
+		break;
+	case 1:
+		mac = sprom->et1mac;
+		break;
+	case 2:
+		mac = sprom->et2mac;
+		break;
+	default:
+		dev_err(bgmac->dev, "Unsupported core_unit %d\n",
+			core->core_unit);
+		err = -ENOTSUPP;
+		goto err;
+	}
+
+	ether_addr_copy(bgmac->mac_addr, mac);
+
+	/* On BCM4706 we need common core to access PHY */
+	if (core->id.id == BCMA_CORE_4706_MAC_GBIT &&
+	    !core->bus->drv_gmac_cmn.core) {
+		dev_err(bgmac->dev, "GMAC CMN core not found (required for BCM4706)\n");
+		err = -ENODEV;
+		goto err;
+	}
+	bgmac->bcma.cmn = core->bus->drv_gmac_cmn.core;
+
+	switch (core->core_unit) {
+	case 0:
+		bgmac->phyaddr = sprom->et0phyaddr;
+		break;
+	case 1:
+		bgmac->phyaddr = sprom->et1phyaddr;
+		break;
+	case 2:
+		bgmac->phyaddr = sprom->et2phyaddr;
+		break;
+	}
+	bgmac->phyaddr &= BGMAC_PHY_MASK;
+	if (bgmac->phyaddr == BGMAC_PHY_MASK) {
+		dev_err(bgmac->dev, "No PHY found\n");
+		err = -ENODEV;
+		goto err;
+	}
+	dev_info(bgmac->dev, "Found PHY addr: %d%s\n", bgmac->phyaddr,
+		 bgmac->phyaddr == BGMAC_PHY_NOREGS ? " (NOREGS)" : "");
+
+	if (!bgmac_is_bcm4707_family(core)) {
+		mii_bus = bcma_mdio_mii_register(core, bgmac->phyaddr);
+		if (!IS_ERR(mii_bus)) {
+			err = PTR_ERR(mii_bus);
+			goto err;
+		}
+
+		bgmac->mii_bus = mii_bus;
+	}
+
+	if (core->bus->hosttype == BCMA_HOSTTYPE_PCI) {
+		dev_err(bgmac->dev, "PCI setup not implemented\n");
+		err = -ENOTSUPP;
+		goto err1;
+	}
+
+	bgmac->has_robosw = !!(core->bus->sprom.boardflags_lo &
+			       BGMAC_BFL_ENETROBO);
+	if (bgmac->has_robosw)
+		dev_warn(bgmac->dev, "Support for Roboswitch not implemented\n");
+
+	if (core->bus->sprom.boardflags_lo & BGMAC_BFL_ENETADM)
+		dev_warn(bgmac->dev, "Support for ADMtek ethernet switch not implemented\n");
+
+	/* Feature Flags */
+	switch (core->bus->chipinfo.id) {
+	case BCMA_CHIP_ID_BCM5357:
+		bgmac->feature_flags |= BGMAC_FEAT_SET_RXQ_CLK;
+		bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
+		bgmac->feature_flags |= BGMAC_FEAT_FLW_CTRL1;
+		bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_PHY;
+		if (core->bus->chipinfo.pkg == BCMA_PKG_ID_BCM47186) {
+			bgmac->feature_flags |= BGMAC_FEAT_IOST_ATTACHED;
+			bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_RGMII;
+		}
+		if (core->bus->chipinfo.pkg == BCMA_PKG_ID_BCM5358)
+			bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_EPHYRMII;
+		break;
+	case BCMA_CHIP_ID_BCM53572:
+		bgmac->feature_flags |= BGMAC_FEAT_SET_RXQ_CLK;
+		bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
+		bgmac->feature_flags |= BGMAC_FEAT_FLW_CTRL1;
+		bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_PHY;
+		if (core->bus->chipinfo.pkg == BCMA_PKG_ID_BCM47188) {
+			bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_RGMII;
+			bgmac->feature_flags |= BGMAC_FEAT_IOST_ATTACHED;
+		}
+		break;
+	case BCMA_CHIP_ID_BCM4749:
+		bgmac->feature_flags |= BGMAC_FEAT_SET_RXQ_CLK;
+		bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
+		bgmac->feature_flags |= BGMAC_FEAT_FLW_CTRL1;
+		bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_PHY;
+		if (core->bus->chipinfo.pkg == 10) {
+			bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_RGMII;
+			bgmac->feature_flags |= BGMAC_FEAT_IOST_ATTACHED;
+		}
+		break;
+	case BCMA_CHIP_ID_BCM4716:
+		bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
+		/* fallthrough */
+	case BCMA_CHIP_ID_BCM47162:
+		bgmac->feature_flags |= BGMAC_FEAT_FLW_CTRL2;
+		bgmac->feature_flags |= BGMAC_FEAT_SET_RXQ_CLK;
+		break;
+	/* bcm4707_family */
+	case BCMA_CHIP_ID_BCM4707:
+	case BCMA_CHIP_ID_BCM47094:
+	case BCMA_CHIP_ID_BCM53018:
+		bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
+		bgmac->feature_flags |= BGMAC_FEAT_NO_RESET;
+		bgmac->feature_flags |= BGMAC_FEAT_FORCE_SPEED_2500;
+		break;
+	default:
+		bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
+		bgmac->feature_flags |= BGMAC_FEAT_SET_RXQ_CLK;
+	}
+
+	if (!bgmac_is_bcm4707_family(core) && core->id.rev > 2)
+		bgmac->feature_flags |= BGMAC_FEAT_MISC_PLL_REQ;
+
+	if (core->id.id == BCMA_CORE_4706_MAC_GBIT) {
+		bgmac->feature_flags |= BGMAC_FEAT_CMN_PHY_CTL;
+		bgmac->feature_flags |= BGMAC_FEAT_NO_CLR_MIB;
+	}
+
+	if (core->id.rev >= 4) {
+		bgmac->feature_flags |= BGMAC_FEAT_CMDCFG_SR_REV4;
+		bgmac->feature_flags |= BGMAC_FEAT_TX_MASK_SETUP;
+		bgmac->feature_flags |= BGMAC_FEAT_RX_MASK_SETUP;
+	}
+
+	bgmac->read = bcma_bgmac_read;
+	bgmac->write = bcma_bgmac_write;
+	bgmac->idm_read = bcma_bgmac_idm_read;
+	bgmac->idm_write = bcma_bgmac_idm_write;
+	bgmac->clk_enabled = bcma_bgmac_clk_enabled;
+	bgmac->clk_enable = bcma_bgmac_clk_enable;
+	bgmac->cco_ctl_maskset = bcma_bgmac_cco_ctl_maskset;
+	bgmac->get_bus_clock = bcma_bgmac_get_bus_clock;
+	bgmac->cmn_maskset32 = bcma_bgmac_cmn_maskset32;
+
+	err = bgmac_enet_probe(bgmac);
+	if (err)
+		goto err1;
+
+	return 0;
+
+err1:
+	bcma_mdio_mii_unregister(bgmac->mii_bus);
+err:
+	kfree(bgmac);
+	bcma_set_drvdata(core, NULL);
+
+	return err;
+}
+
+static void bgmac_remove(struct bcma_device *core)
+{
+	struct bgmac *bgmac = bcma_get_drvdata(core);
+
+	bcma_mdio_mii_unregister(bgmac->mii_bus);
+	bgmac_enet_remove(bgmac);
+	bcma_set_drvdata(core, NULL);
+	kfree(bgmac);
+}
+
+static struct bcma_driver bgmac_bcma_driver = {
+	.name		= KBUILD_MODNAME,
+	.id_table	= bgmac_bcma_tbl,
+	.probe		= bgmac_probe,
+	.remove		= bgmac_remove,
+};
+
+static int __init bgmac_init(void)
+{
+	int err;
+
+	err = bcma_driver_register(&bgmac_bcma_driver);
+	if (err)
+		return err;
+	pr_info("Broadcom 47xx GBit MAC driver loaded\n");
+
+	return 0;
+}
+
+static void __exit bgmac_exit(void)
+{
+	bcma_driver_unregister(&bgmac_bcma_driver);
+}
+
+module_init(bgmac_init)
+module_exit(bgmac_exit)
+
+MODULE_AUTHOR("Rafał Miłecki");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/broadcom/bgmac-platform.c b/drivers/net/ethernet/broadcom/bgmac-platform.c
new file mode 100644
index 000000000000..be52f270c2c1
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bgmac-platform.c
@@ -0,0 +1,185 @@
+/*
+ * Copyright (C) 2016 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt)		KBUILD_MODNAME ": " fmt
+
+#include <linux/bcma/bcma.h>
+#include <linux/etherdevice.h>
+#include <linux/of_address.h>
+#include <linux/of_net.h>
+#include "bgmac.h"
+
+static u32 platform_bgmac_read(struct bgmac *bgmac, u16 offset)
+{
+	return readl(bgmac->plat.base + offset);
+}
+
+static void platform_bgmac_write(struct bgmac *bgmac, u16 offset, u32 value)
+{
+	writel(value, bgmac->plat.base + offset);
+}
+
+static u32 platform_bgmac_idm_read(struct bgmac *bgmac, u16 offset)
+{
+	return readl(bgmac->plat.idm_base + offset);
+}
+
+static void platform_bgmac_idm_write(struct bgmac *bgmac, u16 offset, u32 value)
+{
+	return writel(value, bgmac->plat.idm_base + offset);
+}
+
+static bool platform_bgmac_clk_enabled(struct bgmac *bgmac)
+{
+	if ((bgmac_idm_read(bgmac, BCMA_IOCTL) &
+	     (BCMA_IOCTL_CLK | BCMA_IOCTL_FGC)) != BCMA_IOCTL_CLK)
+		return false;
+	if (bgmac_idm_read(bgmac, BCMA_RESET_CTL) & BCMA_RESET_CTL_RESET)
+		return false;
+	return true;
+}
+
+static void platform_bgmac_clk_enable(struct bgmac *bgmac, u32 flags)
+{
+	bgmac_idm_write(bgmac, BCMA_IOCTL,
+			(BCMA_IOCTL_CLK | BCMA_IOCTL_FGC | flags));
+	bgmac_idm_read(bgmac, BCMA_IOCTL);
+
+	bgmac_idm_write(bgmac, BCMA_RESET_CTL, 0);
+	bgmac_idm_read(bgmac, BCMA_RESET_CTL);
+	udelay(1);
+
+	bgmac_idm_write(bgmac, BCMA_IOCTL, (BCMA_IOCTL_CLK | flags));
+	bgmac_idm_read(bgmac, BCMA_IOCTL);
+	udelay(1);
+}
+
+static void platform_bgmac_cco_ctl_maskset(struct bgmac *bgmac, u32 offset,
+					   u32 mask, u32 set)
+{
+	/* This shouldn't be encountered */
+	WARN_ON(1);
+}
+
+static u32 platform_bgmac_get_bus_clock(struct bgmac *bgmac)
+{
+	/* This shouldn't be encountered */
+	WARN_ON(1);
+
+	return 0;
+}
+
+static void platform_bgmac_cmn_maskset32(struct bgmac *bgmac, u16 offset,
+					 u32 mask, u32 set)
+{
+	/* This shouldn't be encountered */
+	WARN_ON(1);
+}
+
+static int bgmac_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct bgmac *bgmac;
+	struct resource *regs;
+	const u8 *mac_addr;
+
+	bgmac = devm_kzalloc(&pdev->dev, sizeof(*bgmac), GFP_KERNEL);
+	if (!bgmac)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, bgmac);
+
+	/* Set the features of the 4707 family */
+	bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
+	bgmac->feature_flags |= BGMAC_FEAT_NO_RESET;
+	bgmac->feature_flags |= BGMAC_FEAT_FORCE_SPEED_2500;
+	bgmac->feature_flags |= BGMAC_FEAT_CMDCFG_SR_REV4;
+	bgmac->feature_flags |= BGMAC_FEAT_TX_MASK_SETUP;
+	bgmac->feature_flags |= BGMAC_FEAT_RX_MASK_SETUP;
+
+	bgmac->dev = &pdev->dev;
+	bgmac->dma_dev = &pdev->dev;
+
+	mac_addr = of_get_mac_address(np);
+	if (mac_addr)
+		ether_addr_copy(bgmac->mac_addr, mac_addr);
+	else
+		dev_warn(&pdev->dev, "MAC address not present in device tree\n");
+
+	bgmac->irq = platform_get_irq(pdev, 0);
+	if (bgmac->irq < 0) {
+		dev_err(&pdev->dev, "Unable to obtain IRQ\n");
+		return bgmac->irq;
+	}
+
+	regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "amac_base");
+	if (!regs) {
+		dev_err(&pdev->dev, "Unable to obtain base resource\n");
+		return -EINVAL;
+	}
+
+	bgmac->plat.base = devm_ioremap_resource(&pdev->dev, regs);
+	if (IS_ERR(bgmac->plat.base))
+		return PTR_ERR(bgmac->plat.base);
+
+	regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "idm_base");
+	if (!regs) {
+		dev_err(&pdev->dev, "Unable to obtain idm resource\n");
+		return -EINVAL;
+	}
+
+	bgmac->plat.idm_base = devm_ioremap_resource(&pdev->dev, regs);
+	if (IS_ERR(bgmac->plat.idm_base))
+		return PTR_ERR(bgmac->plat.idm_base);
+
+	bgmac->read = platform_bgmac_read;
+	bgmac->write = platform_bgmac_write;
+	bgmac->idm_read = platform_bgmac_idm_read;
+	bgmac->idm_write = platform_bgmac_idm_write;
+	bgmac->clk_enabled = platform_bgmac_clk_enabled;
+	bgmac->clk_enable = platform_bgmac_clk_enable;
+	bgmac->cco_ctl_maskset = platform_bgmac_cco_ctl_maskset;
+	bgmac->get_bus_clock = platform_bgmac_get_bus_clock;
+	bgmac->cmn_maskset32 = platform_bgmac_cmn_maskset32;
+
+	return bgmac_enet_probe(bgmac);
+}
+
+static int bgmac_remove(struct platform_device *pdev)
+{
+	struct bgmac *bgmac = platform_get_drvdata(pdev);
+
+	bgmac_enet_remove(bgmac);
+
+	return 0;
+}
+
+static const struct of_device_id bgmac_of_enet_match[] = {
+	{.compatible = "brcm,amac",},
+	{.compatible = "brcm,nsp-amac",},
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, bgmac_of_enet_match);
+
+static struct platform_driver bgmac_enet_driver = {
+	.driver = {
+		.name  = "bgmac-enet",
+		.of_match_table = bgmac_of_enet_match,
+	},
+	.probe = bgmac_probe,
+	.remove = bgmac_remove,
+};
+
+module_platform_driver(bgmac_enet_driver);
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
index e6e74ca86e60..13b072591332 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -6,51 +6,27 @@
  * Licensed under the GNU/GPL. See COPYING for details.
  */
 
-#include "bgmac.h"
 
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/delay.h>
+#define pr_fmt(fmt)		KBUILD_MODNAME ": " fmt
+
+#include <linux/bcma/bcma.h>
 #include <linux/etherdevice.h>
-#include <linux/mii.h>
-#include <linux/phy.h>
-#include <linux/phy_fixed.h>
-#include <linux/interrupt.h>
-#include <linux/dma-mapping.h>
 #include <linux/bcm47xx_nvram.h>
+#include "bgmac.h"
 
-static const struct bcma_device_id bgmac_bcma_tbl[] = {
-	BCMA_CORE(BCMA_MANUF_BCM, BCMA_CORE_4706_MAC_GBIT, BCMA_ANY_REV, BCMA_ANY_CLASS),
-	BCMA_CORE(BCMA_MANUF_BCM, BCMA_CORE_MAC_GBIT, BCMA_ANY_REV, BCMA_ANY_CLASS),
-	{},
-};
-MODULE_DEVICE_TABLE(bcma, bgmac_bcma_tbl);
-
-static inline bool bgmac_is_bcm4707_family(struct bgmac *bgmac)
-{
-	switch (bgmac->core->bus->chipinfo.id) {
-	case BCMA_CHIP_ID_BCM4707:
-	case BCMA_CHIP_ID_BCM47094:
-	case BCMA_CHIP_ID_BCM53018:
-		return true;
-	default:
-		return false;
-	}
-}
-
-static bool bgmac_wait_value(struct bcma_device *core, u16 reg, u32 mask,
+static bool bgmac_wait_value(struct bgmac *bgmac, u16 reg, u32 mask,
 			     u32 value, int timeout)
 {
 	u32 val;
 	int i;
 
 	for (i = 0; i < timeout / 10; i++) {
-		val = bcma_read32(core, reg);
+		val = bgmac_read(bgmac, reg);
 		if ((val & mask) == value)
 			return true;
 		udelay(10);
 	}
-	pr_err("Timeout waiting for reg 0x%X\n", reg);
+	dev_err(bgmac->dev, "Timeout waiting for reg 0x%X\n", reg);
 	return false;
 }
 
@@ -84,22 +60,22 @@ static void bgmac_dma_tx_reset(struct bgmac *bgmac, struct bgmac_dma_ring *ring)
 		udelay(10);
 	}
 	if (i)
-		bgmac_err(bgmac, "Timeout suspending DMA TX ring 0x%X (BGMAC_DMA_TX_STAT: 0x%08X)\n",
-			  ring->mmio_base, val);
+		dev_err(bgmac->dev, "Timeout suspending DMA TX ring 0x%X (BGMAC_DMA_TX_STAT: 0x%08X)\n",
+			ring->mmio_base, val);
 
 	/* Remove SUSPEND bit */
 	bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_CTL, 0);
-	if (!bgmac_wait_value(bgmac->core,
+	if (!bgmac_wait_value(bgmac,
 			      ring->mmio_base + BGMAC_DMA_TX_STATUS,
 			      BGMAC_DMA_TX_STAT, BGMAC_DMA_TX_STAT_DISABLED,
 			      10000)) {
-		bgmac_warn(bgmac, "DMA TX ring 0x%X wasn't disabled on time, waiting additional 300us\n",
-			   ring->mmio_base);
+		dev_warn(bgmac->dev, "DMA TX ring 0x%X wasn't disabled on time, waiting additional 300us\n",
+			 ring->mmio_base);
 		udelay(300);
 		val = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_TX_STATUS);
 		if ((val & BGMAC_DMA_TX_STAT) != BGMAC_DMA_TX_STAT_DISABLED)
-			bgmac_err(bgmac, "Reset of DMA TX ring 0x%X failed\n",
-				  ring->mmio_base);
+			dev_err(bgmac->dev, "Reset of DMA TX ring 0x%X failed\n",
+				ring->mmio_base);
 	}
 }
 
@@ -109,7 +85,7 @@ static void bgmac_dma_tx_enable(struct bgmac *bgmac,
 	u32 ctl;
 
 	ctl = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_TX_CTL);
-	if (bgmac->core->id.rev >= 4) {
+	if (bgmac->feature_flags & BGMAC_FEAT_TX_MASK_SETUP) {
 		ctl &= ~BGMAC_DMA_TX_BL_MASK;
 		ctl |= BGMAC_DMA_TX_BL_128 << BGMAC_DMA_TX_BL_SHIFT;
 
@@ -152,7 +128,7 @@ static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
 				    struct bgmac_dma_ring *ring,
 				    struct sk_buff *skb)
 {
-	struct device *dma_dev = bgmac->core->dma_dev;
+	struct device *dma_dev = bgmac->dma_dev;
 	struct net_device *net_dev = bgmac->net_dev;
 	int index = ring->end % BGMAC_TX_RING_SLOTS;
 	struct bgmac_slot_info *slot = &ring->slots[index];
@@ -161,7 +137,7 @@ static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
 	int i;
 
 	if (skb->len > BGMAC_DESC_CTL1_LEN) {
-		bgmac_err(bgmac, "Too long skb (%d)\n", skb->len);
+		netdev_err(bgmac->net_dev, "Too long skb (%d)\n", skb->len);
 		goto err_drop;
 	}
 
@@ -174,7 +150,7 @@ static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
 	 * even when ring->end overflows
 	 */
 	if (ring->end - ring->start + nr_frags + 1 >= BGMAC_TX_RING_SLOTS) {
-		bgmac_err(bgmac, "TX ring is full, queue should be stopped!\n");
+		netdev_err(bgmac->net_dev, "TX ring is full, queue should be stopped!\n");
 		netif_stop_queue(net_dev);
 		return NETDEV_TX_BUSY;
 	}
@@ -241,8 +217,8 @@ err_dma:
 	}
 
 err_dma_head:
-	bgmac_err(bgmac, "Mapping error of skb on ring 0x%X\n",
-		  ring->mmio_base);
+	netdev_err(bgmac->net_dev, "Mapping error of skb on ring 0x%X\n",
+		   ring->mmio_base);
 
 err_drop:
 	dev_kfree_skb(skb);
@@ -254,7 +230,7 @@ err_drop:
 /* Free transmitted packets */
 static void bgmac_dma_tx_free(struct bgmac *bgmac, struct bgmac_dma_ring *ring)
 {
-	struct device *dma_dev = bgmac->core->dma_dev;
+	struct device *dma_dev = bgmac->dma_dev;
 	int empty_slot;
 	bool freed = false;
 	unsigned bytes_compl = 0, pkts_compl = 0;
@@ -269,15 +245,16 @@ static void bgmac_dma_tx_free(struct bgmac *bgmac, struct bgmac_dma_ring *ring)
 	while (ring->start != ring->end) {
 		int slot_idx = ring->start % BGMAC_TX_RING_SLOTS;
 		struct bgmac_slot_info *slot = &ring->slots[slot_idx];
-		u32 ctl1;
+		u32 ctl0, ctl1;
 		int len;
 
 		if (slot_idx == empty_slot)
 			break;
 
+		ctl0 = le32_to_cpu(ring->cpu_base[slot_idx].ctl0);
 		ctl1 = le32_to_cpu(ring->cpu_base[slot_idx].ctl1);
 		len = ctl1 & BGMAC_DESC_CTL1_LEN;
-		if (ctl1 & BGMAC_DESC_CTL0_SOF)
+		if (ctl0 & BGMAC_DESC_CTL0_SOF)
 			/* Unmap no longer used buffer */
 			dma_unmap_single(dma_dev, slot->dma_addr, len,
 					 DMA_TO_DEVICE);
@@ -316,12 +293,12 @@ static void bgmac_dma_rx_reset(struct bgmac *bgmac, struct bgmac_dma_ring *ring)
 		return;
 
 	bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_RX_CTL, 0);
-	if (!bgmac_wait_value(bgmac->core,
+	if (!bgmac_wait_value(bgmac,
 			      ring->mmio_base + BGMAC_DMA_RX_STATUS,
 			      BGMAC_DMA_RX_STAT, BGMAC_DMA_RX_STAT_DISABLED,
 			      10000))
-		bgmac_err(bgmac, "Reset of ring 0x%X RX failed\n",
-			  ring->mmio_base);
+		dev_err(bgmac->dev, "Reset of ring 0x%X RX failed\n",
+			ring->mmio_base);
 }
 
 static void bgmac_dma_rx_enable(struct bgmac *bgmac,
@@ -330,7 +307,7 @@ static void bgmac_dma_rx_enable(struct bgmac *bgmac,
 	u32 ctl;
 
 	ctl = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_RX_CTL);
-	if (bgmac->core->id.rev >= 4) {
+	if (bgmac->feature_flags & BGMAC_FEAT_RX_MASK_SETUP) {
 		ctl &= ~BGMAC_DMA_RX_BL_MASK;
 		ctl |= BGMAC_DMA_RX_BL_128 << BGMAC_DMA_RX_BL_SHIFT;
 
@@ -351,7 +328,7 @@ static void bgmac_dma_rx_enable(struct bgmac *bgmac,
 static int bgmac_dma_rx_skb_for_slot(struct bgmac *bgmac,
 				     struct bgmac_slot_info *slot)
 {
-	struct device *dma_dev = bgmac->core->dma_dev;
+	struct device *dma_dev = bgmac->dma_dev;
 	dma_addr_t dma_addr;
 	struct bgmac_rx_header *rx;
 	void *buf;
@@ -370,7 +347,7 @@ static int bgmac_dma_rx_skb_for_slot(struct bgmac *bgmac,
 	dma_addr = dma_map_single(dma_dev, buf + BGMAC_RX_BUF_OFFSET,
 				  BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE);
 	if (dma_mapping_error(dma_dev, dma_addr)) {
-		bgmac_err(bgmac, "DMA mapping error\n");
+		netdev_err(bgmac->net_dev, "DMA mapping error\n");
 		put_page(virt_to_head_page(buf));
 		return -ENOMEM;
 	}
@@ -440,7 +417,7 @@ static int bgmac_dma_rx_read(struct bgmac *bgmac, struct bgmac_dma_ring *ring,
 	end_slot /= sizeof(struct bgmac_dma_desc);
 
 	while (ring->start != end_slot) {
-		struct device *dma_dev = bgmac->core->dma_dev;
+		struct device *dma_dev = bgmac->dma_dev;
 		struct bgmac_slot_info *slot = &ring->slots[ring->start];
 		struct bgmac_rx_header *rx = slot->buf + BGMAC_RX_BUF_OFFSET;
 		struct sk_buff *skb;
@@ -465,16 +442,16 @@ static int bgmac_dma_rx_read(struct bgmac *bgmac, struct bgmac_dma_ring *ring,
 
 			/* Check for poison and drop or pass the packet */
 			if (len == 0xdead && flags == 0xbeef) {
-				bgmac_err(bgmac, "Found poisoned packet at slot %d, DMA issue!\n",
-					  ring->start);
+				netdev_err(bgmac->net_dev, "Found poisoned packet at slot %d, DMA issue!\n",
+					   ring->start);
 				put_page(virt_to_head_page(buf));
 				bgmac->net_dev->stats.rx_errors++;
 				break;
 			}
 
 			if (len > BGMAC_RX_ALLOC_SIZE) {
-				bgmac_err(bgmac, "Found oversized packet at slot %d, DMA issue!\n",
-					  ring->start);
+				netdev_err(bgmac->net_dev, "Found oversized packet at slot %d, DMA issue!\n",
+					   ring->start);
 				put_page(virt_to_head_page(buf));
 				bgmac->net_dev->stats.rx_length_errors++;
 				bgmac->net_dev->stats.rx_errors++;
@@ -486,7 +463,7 @@ static int bgmac_dma_rx_read(struct bgmac *bgmac, struct bgmac_dma_ring *ring,
 
 			skb = build_skb(buf, BGMAC_RX_ALLOC_SIZE);
 			if (unlikely(!skb)) {
-				bgmac_err(bgmac, "build_skb failed\n");
+				netdev_err(bgmac->net_dev, "build_skb failed\n");
 				put_page(virt_to_head_page(buf));
 				bgmac->net_dev->stats.rx_errors++;
 				break;
@@ -543,7 +520,7 @@ static bool bgmac_dma_unaligned(struct bgmac *bgmac,
 static void bgmac_dma_tx_ring_free(struct bgmac *bgmac,
 				   struct bgmac_dma_ring *ring)
 {
-	struct device *dma_dev = bgmac->core->dma_dev;
+	struct device *dma_dev = bgmac->dma_dev;
 	struct bgmac_dma_desc *dma_desc = ring->cpu_base;
 	struct bgmac_slot_info *slot;
 	int i;
@@ -569,7 +546,7 @@ static void bgmac_dma_tx_ring_free(struct bgmac *bgmac,
 static void bgmac_dma_rx_ring_free(struct bgmac *bgmac,
 				   struct bgmac_dma_ring *ring)
 {
-	struct device *dma_dev = bgmac->core->dma_dev;
+	struct device *dma_dev = bgmac->dma_dev;
 	struct bgmac_slot_info *slot;
 	int i;
 
@@ -590,7 +567,7 @@ static void bgmac_dma_ring_desc_free(struct bgmac *bgmac,
 				     struct bgmac_dma_ring *ring,
 				     int num_slots)
 {
-	struct device *dma_dev = bgmac->core->dma_dev;
+	struct device *dma_dev = bgmac->dma_dev;
 	int size;
 
 	if (!ring->cpu_base)
@@ -628,7 +605,7 @@ static void bgmac_dma_free(struct bgmac *bgmac)
 
 static int bgmac_dma_alloc(struct bgmac *bgmac)
 {
-	struct device *dma_dev = bgmac->core->dma_dev;
+	struct device *dma_dev = bgmac->dma_dev;
 	struct bgmac_dma_ring *ring;
 	static const u16 ring_base[] = { BGMAC_DMA_BASE0, BGMAC_DMA_BASE1,
 					 BGMAC_DMA_BASE2, BGMAC_DMA_BASE3, };
@@ -639,8 +616,8 @@ static int bgmac_dma_alloc(struct bgmac *bgmac)
 	BUILD_BUG_ON(BGMAC_MAX_TX_RINGS > ARRAY_SIZE(ring_base));
 	BUILD_BUG_ON(BGMAC_MAX_RX_RINGS > ARRAY_SIZE(ring_base));
 
-	if (!(bcma_aread32(bgmac->core, BCMA_IOST) & BCMA_IOST_DMA64)) {
-		bgmac_err(bgmac, "Core does not report 64-bit DMA\n");
+	if (!(bgmac_idm_read(bgmac, BCMA_IOST) & BCMA_IOST_DMA64)) {
+		dev_err(bgmac->dev, "Core does not report 64-bit DMA\n");
 		return -ENOTSUPP;
 	}
 
@@ -654,8 +631,8 @@ static int bgmac_dma_alloc(struct bgmac *bgmac)
 						     &ring->dma_base,
 						     GFP_KERNEL);
 		if (!ring->cpu_base) {
-			bgmac_err(bgmac, "Allocation of TX ring 0x%X failed\n",
-				  ring->mmio_base);
+			dev_err(bgmac->dev, "Allocation of TX ring 0x%X failed\n",
+				ring->mmio_base);
 			goto err_dma_free;
 		}
 
@@ -679,8 +656,8 @@ static int bgmac_dma_alloc(struct bgmac *bgmac)
 						     &ring->dma_base,
 						     GFP_KERNEL);
 		if (!ring->cpu_base) {
-			bgmac_err(bgmac, "Allocation of RX ring 0x%X failed\n",
-				  ring->mmio_base);
+			dev_err(bgmac->dev, "Allocation of RX ring 0x%X failed\n",
+				ring->mmio_base);
 			err = -ENOMEM;
 			goto err_dma_free;
 		}
@@ -755,150 +732,6 @@ error:
 	return err;
 }
 
-/**************************************************
- * PHY ops
- **************************************************/
-
-static u16 bgmac_phy_read(struct bgmac *bgmac, u8 phyaddr, u8 reg)
-{
-	struct bcma_device *core;
-	u16 phy_access_addr;
-	u16 phy_ctl_addr;
-	u32 tmp;
-
-	BUILD_BUG_ON(BGMAC_PA_DATA_MASK != BCMA_GMAC_CMN_PA_DATA_MASK);
-	BUILD_BUG_ON(BGMAC_PA_ADDR_MASK != BCMA_GMAC_CMN_PA_ADDR_MASK);
-	BUILD_BUG_ON(BGMAC_PA_ADDR_SHIFT != BCMA_GMAC_CMN_PA_ADDR_SHIFT);
-	BUILD_BUG_ON(BGMAC_PA_REG_MASK != BCMA_GMAC_CMN_PA_REG_MASK);
-	BUILD_BUG_ON(BGMAC_PA_REG_SHIFT != BCMA_GMAC_CMN_PA_REG_SHIFT);
-	BUILD_BUG_ON(BGMAC_PA_WRITE != BCMA_GMAC_CMN_PA_WRITE);
-	BUILD_BUG_ON(BGMAC_PA_START != BCMA_GMAC_CMN_PA_START);
-	BUILD_BUG_ON(BGMAC_PC_EPA_MASK != BCMA_GMAC_CMN_PC_EPA_MASK);
-	BUILD_BUG_ON(BGMAC_PC_MCT_MASK != BCMA_GMAC_CMN_PC_MCT_MASK);
-	BUILD_BUG_ON(BGMAC_PC_MCT_SHIFT != BCMA_GMAC_CMN_PC_MCT_SHIFT);
-	BUILD_BUG_ON(BGMAC_PC_MTE != BCMA_GMAC_CMN_PC_MTE);
-
-	if (bgmac->core->id.id == BCMA_CORE_4706_MAC_GBIT) {
-		core = bgmac->core->bus->drv_gmac_cmn.core;
-		phy_access_addr = BCMA_GMAC_CMN_PHY_ACCESS;
-		phy_ctl_addr = BCMA_GMAC_CMN_PHY_CTL;
-	} else {
-		core = bgmac->core;
-		phy_access_addr = BGMAC_PHY_ACCESS;
-		phy_ctl_addr = BGMAC_PHY_CNTL;
-	}
-
-	tmp = bcma_read32(core, phy_ctl_addr);
-	tmp &= ~BGMAC_PC_EPA_MASK;
-	tmp |= phyaddr;
-	bcma_write32(core, phy_ctl_addr, tmp);
-
-	tmp = BGMAC_PA_START;
-	tmp |= phyaddr << BGMAC_PA_ADDR_SHIFT;
-	tmp |= reg << BGMAC_PA_REG_SHIFT;
-	bcma_write32(core, phy_access_addr, tmp);
-
-	if (!bgmac_wait_value(core, phy_access_addr, BGMAC_PA_START, 0, 1000)) {
-		bgmac_err(bgmac, "Reading PHY %d register 0x%X failed\n",
-			  phyaddr, reg);
-		return 0xffff;
-	}
-
-	return bcma_read32(core, phy_access_addr) & BGMAC_PA_DATA_MASK;
-}
-
-/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipphywr */
-static int bgmac_phy_write(struct bgmac *bgmac, u8 phyaddr, u8 reg, u16 value)
-{
-	struct bcma_device *core;
-	u16 phy_access_addr;
-	u16 phy_ctl_addr;
-	u32 tmp;
-
-	if (bgmac->core->id.id == BCMA_CORE_4706_MAC_GBIT) {
-		core = bgmac->core->bus->drv_gmac_cmn.core;
-		phy_access_addr = BCMA_GMAC_CMN_PHY_ACCESS;
-		phy_ctl_addr = BCMA_GMAC_CMN_PHY_CTL;
-	} else {
-		core = bgmac->core;
-		phy_access_addr = BGMAC_PHY_ACCESS;
-		phy_ctl_addr = BGMAC_PHY_CNTL;
-	}
-
-	tmp = bcma_read32(core, phy_ctl_addr);
-	tmp &= ~BGMAC_PC_EPA_MASK;
-	tmp |= phyaddr;
-	bcma_write32(core, phy_ctl_addr, tmp);
-
-	bgmac_write(bgmac, BGMAC_INT_STATUS, BGMAC_IS_MDIO);
-	if (bgmac_read(bgmac, BGMAC_INT_STATUS) & BGMAC_IS_MDIO)
-		bgmac_warn(bgmac, "Error setting MDIO int\n");
-
-	tmp = BGMAC_PA_START;
-	tmp |= BGMAC_PA_WRITE;
-	tmp |= phyaddr << BGMAC_PA_ADDR_SHIFT;
-	tmp |= reg << BGMAC_PA_REG_SHIFT;
-	tmp |= value;
-	bcma_write32(core, phy_access_addr, tmp);
-
-	if (!bgmac_wait_value(core, phy_access_addr, BGMAC_PA_START, 0, 1000)) {
-		bgmac_err(bgmac, "Writing to PHY %d register 0x%X failed\n",
-			  phyaddr, reg);
-		return -ETIMEDOUT;
-	}
-
-	return 0;
-}
-
-/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipphyinit */
-static void bgmac_phy_init(struct bgmac *bgmac)
-{
-	struct bcma_chipinfo *ci = &bgmac->core->bus->chipinfo;
-	struct bcma_drv_cc *cc = &bgmac->core->bus->drv_cc;
-	u8 i;
-
-	if (ci->id == BCMA_CHIP_ID_BCM5356) {
-		for (i = 0; i < 5; i++) {
-			bgmac_phy_write(bgmac, i, 0x1f, 0x008b);
-			bgmac_phy_write(bgmac, i, 0x15, 0x0100);
-			bgmac_phy_write(bgmac, i, 0x1f, 0x000f);
-			bgmac_phy_write(bgmac, i, 0x12, 0x2aaa);
-			bgmac_phy_write(bgmac, i, 0x1f, 0x000b);
-		}
-	}
-	if ((ci->id == BCMA_CHIP_ID_BCM5357 && ci->pkg != 10) ||
-	    (ci->id == BCMA_CHIP_ID_BCM4749 && ci->pkg != 10) ||
-	    (ci->id == BCMA_CHIP_ID_BCM53572 && ci->pkg != 9)) {
-		bcma_chipco_chipctl_maskset(cc, 2, ~0xc0000000, 0);
-		bcma_chipco_chipctl_maskset(cc, 4, ~0x80000000, 0);
-		for (i = 0; i < 5; i++) {
-			bgmac_phy_write(bgmac, i, 0x1f, 0x000f);
-			bgmac_phy_write(bgmac, i, 0x16, 0x5284);
-			bgmac_phy_write(bgmac, i, 0x1f, 0x000b);
-			bgmac_phy_write(bgmac, i, 0x17, 0x0010);
-			bgmac_phy_write(bgmac, i, 0x1f, 0x000f);
-			bgmac_phy_write(bgmac, i, 0x16, 0x5296);
-			bgmac_phy_write(bgmac, i, 0x17, 0x1073);
-			bgmac_phy_write(bgmac, i, 0x17, 0x9073);
-			bgmac_phy_write(bgmac, i, 0x16, 0x52b6);
-			bgmac_phy_write(bgmac, i, 0x17, 0x9273);
-			bgmac_phy_write(bgmac, i, 0x1f, 0x000b);
-		}
-	}
-}
-
-/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipphyreset */
-static void bgmac_phy_reset(struct bgmac *bgmac)
-{
-	if (bgmac->phyaddr == BGMAC_PHY_NOREGS)
-		return;
-
-	bgmac_phy_write(bgmac, bgmac->phyaddr, MII_BMCR, BMCR_RESET);
-	udelay(100);
-	if (bgmac_phy_read(bgmac, bgmac->phyaddr, MII_BMCR) & BMCR_RESET)
-		bgmac_err(bgmac, "PHY reset failed\n");
-	bgmac_phy_init(bgmac);
-}
 
 /**************************************************
  * Chip ops
@@ -912,14 +745,20 @@ static void bgmac_cmdcfg_maskset(struct bgmac *bgmac, u32 mask, u32 set,
 {
 	u32 cmdcfg = bgmac_read(bgmac, BGMAC_CMDCFG);
 	u32 new_val = (cmdcfg & mask) | set;
+	u32 cmdcfg_sr;
+
+	if (bgmac->feature_flags & BGMAC_FEAT_CMDCFG_SR_REV4)
+		cmdcfg_sr = BGMAC_CMDCFG_SR_REV4;
+	else
+		cmdcfg_sr = BGMAC_CMDCFG_SR_REV0;
 
-	bgmac_set(bgmac, BGMAC_CMDCFG, BGMAC_CMDCFG_SR(bgmac->core->id.rev));
+	bgmac_set(bgmac, BGMAC_CMDCFG, cmdcfg_sr);
 	udelay(2);
 
 	if (new_val != cmdcfg || force)
 		bgmac_write(bgmac, BGMAC_CMDCFG, new_val);
 
-	bgmac_mask(bgmac, BGMAC_CMDCFG, ~BGMAC_CMDCFG_SR(bgmac->core->id.rev));
+	bgmac_mask(bgmac, BGMAC_CMDCFG, ~cmdcfg_sr);
 	udelay(2);
 }
 
@@ -948,7 +787,7 @@ static void bgmac_chip_stats_update(struct bgmac *bgmac)
 {
 	int i;
 
-	if (bgmac->core->id.id != BCMA_CORE_4706_MAC_GBIT) {
+	if (!(bgmac->feature_flags & BGMAC_FEAT_NO_CLR_MIB)) {
 		for (i = 0; i < BGMAC_NUM_MIB_TX_REGS; i++)
 			bgmac->mib_tx_regs[i] =
 				bgmac_read(bgmac,
@@ -967,7 +806,7 @@ static void bgmac_clear_mib(struct bgmac *bgmac)
 {
 	int i;
 
-	if (bgmac->core->id.id == BCMA_CORE_4706_MAC_GBIT)
+	if (bgmac->feature_flags & BGMAC_FEAT_NO_CLR_MIB)
 		return;
 
 	bgmac_set(bgmac, BGMAC_DEV_CTL, BGMAC_DC_MROR);
@@ -997,7 +836,8 @@ static void bgmac_mac_speed(struct bgmac *bgmac)
 		set |= BGMAC_CMDCFG_ES_2500;
 		break;
 	default:
-		bgmac_err(bgmac, "Unsupported speed: %d\n", bgmac->mac_speed);
+		dev_err(bgmac->dev, "Unsupported speed: %d\n",
+			bgmac->mac_speed);
 	}
 
 	if (bgmac->mac_duplex == DUPLEX_HALF)
@@ -1008,17 +848,16 @@ static void bgmac_mac_speed(struct bgmac *bgmac)
 
 static void bgmac_miiconfig(struct bgmac *bgmac)
 {
-	struct bcma_device *core = bgmac->core;
-	u8 imode;
-
-	if (bgmac_is_bcm4707_family(bgmac)) {
-		bcma_awrite32(core, BCMA_IOCTL,
-			      bcma_aread32(core, BCMA_IOCTL) | 0x40 |
-			      BGMAC_BCMA_IOCTL_SW_CLKEN);
+	if (bgmac->feature_flags & BGMAC_FEAT_FORCE_SPEED_2500) {
+		bgmac_idm_write(bgmac, BCMA_IOCTL,
+				bgmac_idm_read(bgmac, BCMA_IOCTL) | 0x40 |
+				BGMAC_BCMA_IOCTL_SW_CLKEN);
 		bgmac->mac_speed = SPEED_2500;
 		bgmac->mac_duplex = DUPLEX_FULL;
 		bgmac_mac_speed(bgmac);
 	} else {
+		u8 imode;
+
 		imode = (bgmac_read(bgmac, BGMAC_DEV_STATUS) &
 			BGMAC_DS_MM_MASK) >> BGMAC_DS_MM_SHIFT;
 		if (imode == 0 || imode == 1) {
@@ -1032,14 +871,11 @@ static void bgmac_miiconfig(struct bgmac *bgmac)
 /* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipreset */
 static void bgmac_chip_reset(struct bgmac *bgmac)
 {
-	struct bcma_device *core = bgmac->core;
-	struct bcma_bus *bus = core->bus;
-	struct bcma_chipinfo *ci = &bus->chipinfo;
-	u32 flags;
+	u32 cmdcfg_sr;
 	u32 iost;
 	int i;
 
-	if (bcma_core_is_enabled(core)) {
+	if (bgmac_clk_enabled(bgmac)) {
 		if (!bgmac->stats_grabbed) {
 			/* bgmac_chip_stats_update(bgmac); */
 			bgmac->stats_grabbed = true;
@@ -1057,38 +893,32 @@ static void bgmac_chip_reset(struct bgmac *bgmac)
 		/* TODO: Clear software multicast filter list */
 	}
 
-	iost = bcma_aread32(core, BCMA_IOST);
-	if ((ci->id == BCMA_CHIP_ID_BCM5357 && ci->pkg == BCMA_PKG_ID_BCM47186) ||
-	    (ci->id == BCMA_CHIP_ID_BCM4749 && ci->pkg == 10) ||
-	    (ci->id == BCMA_CHIP_ID_BCM53572 && ci->pkg == BCMA_PKG_ID_BCM47188))
+	iost = bgmac_idm_read(bgmac, BCMA_IOST);
+	if (bgmac->feature_flags & BGMAC_FEAT_IOST_ATTACHED)
 		iost &= ~BGMAC_BCMA_IOST_ATTACHED;
 
 	/* 3GMAC: for BCM4707 & BCM47094, only do core reset at bgmac_probe() */
-	if (ci->id != BCMA_CHIP_ID_BCM4707 &&
-	    ci->id != BCMA_CHIP_ID_BCM47094) {
-		flags = 0;
+	if (!(bgmac->feature_flags & BGMAC_FEAT_NO_RESET)) {
+		u32 flags = 0;
 		if (iost & BGMAC_BCMA_IOST_ATTACHED) {
 			flags = BGMAC_BCMA_IOCTL_SW_CLKEN;
 			if (!bgmac->has_robosw)
 				flags |= BGMAC_BCMA_IOCTL_SW_RESET;
 		}
-		bcma_core_enable(core, flags);
+		bgmac_clk_enable(bgmac, flags);
 	}
 
 	/* Request Misc PLL for corerev > 2 */
-	if (core->id.rev > 2 && !bgmac_is_bcm4707_family(bgmac)) {
+	if (bgmac->feature_flags & BGMAC_FEAT_MISC_PLL_REQ) {
 		bgmac_set(bgmac, BCMA_CLKCTLST,
 			  BGMAC_BCMA_CLKCTLST_MISC_PLL_REQ);
-		bgmac_wait_value(bgmac->core, BCMA_CLKCTLST,
+		bgmac_wait_value(bgmac, BCMA_CLKCTLST,
 				 BGMAC_BCMA_CLKCTLST_MISC_PLL_ST,
 				 BGMAC_BCMA_CLKCTLST_MISC_PLL_ST,
 				 1000);
 	}
 
-	if (ci->id == BCMA_CHIP_ID_BCM5357 ||
-	    ci->id == BCMA_CHIP_ID_BCM4749 ||
-	    ci->id == BCMA_CHIP_ID_BCM53572) {
-		struct bcma_drv_cc *cc = &bgmac->core->bus->drv_cc;
+	if (bgmac->feature_flags & BGMAC_FEAT_SW_TYPE_PHY) {
 		u8 et_swtype = 0;
 		u8 sw_type = BGMAC_CHIPCTL_1_SW_TYPE_EPHY |
 			     BGMAC_CHIPCTL_1_IF_TYPE_MII;
@@ -1096,35 +926,37 @@ static void bgmac_chip_reset(struct bgmac *bgmac)
 
 		if (bcm47xx_nvram_getenv("et_swtype", buf, sizeof(buf)) > 0) {
 			if (kstrtou8(buf, 0, &et_swtype))
-				bgmac_err(bgmac, "Failed to parse et_swtype (%s)\n",
-					  buf);
+				dev_err(bgmac->dev, "Failed to parse et_swtype (%s)\n",
+					buf);
 			et_swtype &= 0x0f;
 			et_swtype <<= 4;
 			sw_type = et_swtype;
-		} else if (ci->id == BCMA_CHIP_ID_BCM5357 && ci->pkg == BCMA_PKG_ID_BCM5358) {
+		} else if (bgmac->feature_flags & BGMAC_FEAT_SW_TYPE_EPHYRMII) {
 			sw_type = BGMAC_CHIPCTL_1_SW_TYPE_EPHYRMII;
-		} else if ((ci->id == BCMA_CHIP_ID_BCM5357 && ci->pkg == BCMA_PKG_ID_BCM47186) ||
-			   (ci->id == BCMA_CHIP_ID_BCM4749 && ci->pkg == 10) ||
-			   (ci->id == BCMA_CHIP_ID_BCM53572 && ci->pkg == BCMA_PKG_ID_BCM47188)) {
+		} else if (bgmac->feature_flags & BGMAC_FEAT_SW_TYPE_RGMII) {
 			sw_type = BGMAC_CHIPCTL_1_IF_TYPE_RGMII |
 				  BGMAC_CHIPCTL_1_SW_TYPE_RGMII;
 		}
-		bcma_chipco_chipctl_maskset(cc, 1,
-					    ~(BGMAC_CHIPCTL_1_IF_TYPE_MASK |
-					      BGMAC_CHIPCTL_1_SW_TYPE_MASK),
-					    sw_type);
+		bgmac_cco_ctl_maskset(bgmac, 1, ~(BGMAC_CHIPCTL_1_IF_TYPE_MASK |
+						  BGMAC_CHIPCTL_1_SW_TYPE_MASK),
+				      sw_type);
 	}
 
 	if (iost & BGMAC_BCMA_IOST_ATTACHED && !bgmac->has_robosw)
-		bcma_awrite32(core, BCMA_IOCTL,
-			      bcma_aread32(core, BCMA_IOCTL) &
-			      ~BGMAC_BCMA_IOCTL_SW_RESET);
+		bgmac_idm_write(bgmac, BCMA_IOCTL,
+				bgmac_idm_read(bgmac, BCMA_IOCTL) &
+				~BGMAC_BCMA_IOCTL_SW_RESET);
 
 	/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/gmac_reset
 	 * Specs don't say about using BGMAC_CMDCFG_SR, but in this routine
 	 * BGMAC_CMDCFG is read _after_ putting chip in a reset. So it has to
 	 * be keps until taking MAC out of the reset.
 	 */
+	if (bgmac->feature_flags & BGMAC_FEAT_CMDCFG_SR_REV4)
+		cmdcfg_sr = BGMAC_CMDCFG_SR_REV4;
+	else
+		cmdcfg_sr = BGMAC_CMDCFG_SR_REV0;
+
 	bgmac_cmdcfg_maskset(bgmac,
 			     ~(BGMAC_CMDCFG_TE |
 			       BGMAC_CMDCFG_RE |
@@ -1142,19 +974,20 @@ static void bgmac_chip_reset(struct bgmac *bgmac)
 			     BGMAC_CMDCFG_PROM |
 			     BGMAC_CMDCFG_NLC |
 			     BGMAC_CMDCFG_CFE |
-			     BGMAC_CMDCFG_SR(core->id.rev),
+			     cmdcfg_sr,
 			     false);
 	bgmac->mac_speed = SPEED_UNKNOWN;
 	bgmac->mac_duplex = DUPLEX_UNKNOWN;
 
 	bgmac_clear_mib(bgmac);
-	if (core->id.id == BCMA_CORE_4706_MAC_GBIT)
-		bcma_maskset32(bgmac->cmn, BCMA_GMAC_CMN_PHY_CTL, ~0,
-			       BCMA_GMAC_CMN_PC_MTE);
+	if (bgmac->feature_flags & BGMAC_FEAT_CMN_PHY_CTL)
+		bgmac_cmn_maskset32(bgmac, BCMA_GMAC_CMN_PHY_CTL, ~0,
+				    BCMA_GMAC_CMN_PC_MTE);
 	else
 		bgmac_set(bgmac, BGMAC_PHY_CNTL, BGMAC_PC_MTE);
 	bgmac_miiconfig(bgmac);
-	bgmac_phy_init(bgmac);
+	if (bgmac->mii_bus)
+		bgmac->mii_bus->reset(bgmac->mii_bus);
 
 	netdev_reset_queue(bgmac->net_dev);
 }
@@ -1173,50 +1006,51 @@ static void bgmac_chip_intrs_off(struct bgmac *bgmac)
 /* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/gmac_enable */
 static void bgmac_enable(struct bgmac *bgmac)
 {
-	struct bcma_chipinfo *ci = &bgmac->core->bus->chipinfo;
+	u32 cmdcfg_sr;
 	u32 cmdcfg;
 	u32 mode;
-	u32 rxq_ctl;
-	u32 fl_ctl;
-	u16 bp_clk;
-	u8 mdp;
+
+	if (bgmac->feature_flags & BGMAC_FEAT_CMDCFG_SR_REV4)
+		cmdcfg_sr = BGMAC_CMDCFG_SR_REV4;
+	else
+		cmdcfg_sr = BGMAC_CMDCFG_SR_REV0;
 
 	cmdcfg = bgmac_read(bgmac, BGMAC_CMDCFG);
 	bgmac_cmdcfg_maskset(bgmac, ~(BGMAC_CMDCFG_TE | BGMAC_CMDCFG_RE),
-			     BGMAC_CMDCFG_SR(bgmac->core->id.rev), true);
+			     cmdcfg_sr, true);
 	udelay(2);
 	cmdcfg |= BGMAC_CMDCFG_TE | BGMAC_CMDCFG_RE;
 	bgmac_write(bgmac, BGMAC_CMDCFG, cmdcfg);
 
 	mode = (bgmac_read(bgmac, BGMAC_DEV_STATUS) & BGMAC_DS_MM_MASK) >>
 		BGMAC_DS_MM_SHIFT;
-	if (ci->id != BCMA_CHIP_ID_BCM47162 || mode != 0)
+	if (bgmac->feature_flags & BGMAC_FEAT_CLKCTLST || mode != 0)
 		bgmac_set(bgmac, BCMA_CLKCTLST, BCMA_CLKCTLST_FORCEHT);
-	if (ci->id == BCMA_CHIP_ID_BCM47162 && mode == 2)
-		bcma_chipco_chipctl_maskset(&bgmac->core->bus->drv_cc, 1, ~0,
-					    BGMAC_CHIPCTL_1_RXC_DLL_BYPASS);
-
-	switch (ci->id) {
-	case BCMA_CHIP_ID_BCM5357:
-	case BCMA_CHIP_ID_BCM4749:
-	case BCMA_CHIP_ID_BCM53572:
-	case BCMA_CHIP_ID_BCM4716:
-	case BCMA_CHIP_ID_BCM47162:
-		fl_ctl = 0x03cb04cb;
-		if (ci->id == BCMA_CHIP_ID_BCM5357 ||
-		    ci->id == BCMA_CHIP_ID_BCM4749 ||
-		    ci->id == BCMA_CHIP_ID_BCM53572)
+	if (bgmac->feature_flags & BGMAC_FEAT_CLKCTLST && mode == 2)
+		bgmac_cco_ctl_maskset(bgmac, 1, ~0,
+				      BGMAC_CHIPCTL_1_RXC_DLL_BYPASS);
+
+	if (bgmac->feature_flags & (BGMAC_FEAT_FLW_CTRL1 |
+				    BGMAC_FEAT_FLW_CTRL2)) {
+		u32 fl_ctl;
+
+		if (bgmac->feature_flags & BGMAC_FEAT_FLW_CTRL1)
 			fl_ctl = 0x2300e1;
+		else
+			fl_ctl = 0x03cb04cb;
+
 		bgmac_write(bgmac, BGMAC_FLOW_CTL_THRESH, fl_ctl);
 		bgmac_write(bgmac, BGMAC_PAUSE_CTL, 0x27fff);
-		break;
 	}
 
-	if (!bgmac_is_bcm4707_family(bgmac)) {
+	if (bgmac->feature_flags & BGMAC_FEAT_SET_RXQ_CLK) {
+		u32 rxq_ctl;
+		u16 bp_clk;
+		u8 mdp;
+
 		rxq_ctl = bgmac_read(bgmac, BGMAC_RXQ_CTL);
 		rxq_ctl &= ~BGMAC_RXQ_CTL_MDP_MASK;
-		bp_clk = bcma_pmu_get_bus_clock(&bgmac->core->bus->drv_cc) /
-				1000000;
+		bp_clk = bgmac_get_bus_clock(bgmac) / 1000000;
 		mdp = (bp_clk * 128 / 1000) - 3;
 		rxq_ctl |= (mdp << BGMAC_RXQ_CTL_MDP_SHIFT);
 		bgmac_write(bgmac, BGMAC_RXQ_CTL, rxq_ctl);
@@ -1260,7 +1094,7 @@ static irqreturn_t bgmac_interrupt(int irq, void *dev_id)
 
 	int_status &= ~(BGMAC_IS_TX0 | BGMAC_IS_RX);
 	if (int_status)
-		bgmac_err(bgmac, "Unknown IRQs: 0x%08X\n", int_status);
+		dev_err(bgmac->dev, "Unknown IRQs: 0x%08X\n", int_status);
 
 	/* Disable new interrupts until handling existing ones */
 	bgmac_chip_intrs_off(bgmac);
@@ -1311,10 +1145,10 @@ static int bgmac_open(struct net_device *net_dev)
 	/* Specs say about reclaiming rings here, but we do that in DMA init */
 	bgmac_chip_init(bgmac);
 
-	err = request_irq(bgmac->core->irq, bgmac_interrupt, IRQF_SHARED,
+	err = request_irq(bgmac->irq, bgmac_interrupt, IRQF_SHARED,
 			  KBUILD_MODNAME, net_dev);
 	if (err < 0) {
-		bgmac_err(bgmac, "IRQ request error: %d!\n", err);
+		dev_err(bgmac->dev, "IRQ request error: %d!\n", err);
 		bgmac_dma_cleanup(bgmac);
 		return err;
 	}
@@ -1322,7 +1156,8 @@ static int bgmac_open(struct net_device *net_dev)
 
 	phy_start(net_dev->phydev);
 
-	netif_carrier_on(net_dev);
+	netif_start_queue(net_dev);
+
 	return 0;
 }
 
@@ -1336,7 +1171,7 @@ static int bgmac_stop(struct net_device *net_dev)
 
 	napi_disable(&bgmac->napi);
 	bgmac_chip_intrs_off(bgmac);
-	free_irq(bgmac->core->irq, net_dev);
+	free_irq(bgmac->irq, net_dev);
 
 	bgmac_chip_reset(bgmac);
 	bgmac_dma_cleanup(bgmac);
@@ -1515,7 +1350,7 @@ static void bgmac_get_drvinfo(struct net_device *net_dev,
 			      struct ethtool_drvinfo *info)
 {
 	strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
-	strlcpy(info->bus_info, "BCMA", sizeof(info->bus_info));
+	strlcpy(info->bus_info, "AXI", sizeof(info->bus_info));
 }
 
 static const struct ethtool_ops bgmac_ethtool_ops = {
@@ -1531,17 +1366,6 @@ static const struct ethtool_ops bgmac_ethtool_ops = {
  * MII
  **************************************************/
 
-static int bgmac_mii_read(struct mii_bus *bus, int mii_id, int regnum)
-{
-	return bgmac_phy_read(bus->priv, mii_id, regnum);
-}
-
-static int bgmac_mii_write(struct mii_bus *bus, int mii_id, int regnum,
-			   u16 value)
-{
-	return bgmac_phy_write(bus->priv, mii_id, regnum, value);
-}
-
 static void bgmac_adjust_link(struct net_device *net_dev)
 {
 	struct bgmac *bgmac = netdev_priv(net_dev);
@@ -1566,7 +1390,7 @@ static void bgmac_adjust_link(struct net_device *net_dev)
 	}
 }
 
-static int bgmac_fixed_phy_register(struct bgmac *bgmac)
+static int bgmac_phy_connect_direct(struct bgmac *bgmac)
 {
 	struct fixed_phy_status fphy_status = {
 		.link = 1,
@@ -1578,194 +1402,76 @@ static int bgmac_fixed_phy_register(struct bgmac *bgmac)
 
 	phy_dev = fixed_phy_register(PHY_POLL, &fphy_status, -1, NULL);
 	if (!phy_dev || IS_ERR(phy_dev)) {
-		bgmac_err(bgmac, "Failed to register fixed PHY device\n");
+		dev_err(bgmac->dev, "Failed to register fixed PHY device\n");
 		return -ENODEV;
 	}
 
 	err = phy_connect_direct(bgmac->net_dev, phy_dev, bgmac_adjust_link,
 				 PHY_INTERFACE_MODE_MII);
 	if (err) {
-		bgmac_err(bgmac, "Connecting PHY failed\n");
+		dev_err(bgmac->dev, "Connecting PHY failed\n");
 		return err;
 	}
 
 	return err;
 }
 
-static int bgmac_mii_register(struct bgmac *bgmac)
+static int bgmac_phy_connect(struct bgmac *bgmac)
 {
-	struct mii_bus *mii_bus;
 	struct phy_device *phy_dev;
 	char bus_id[MII_BUS_ID_SIZE + 3];
-	int err = 0;
-
-	if (bgmac_is_bcm4707_family(bgmac))
-		return bgmac_fixed_phy_register(bgmac);
-
-	mii_bus = mdiobus_alloc();
-	if (!mii_bus)
-		return -ENOMEM;
-
-	mii_bus->name = "bgmac mii bus";
-	sprintf(mii_bus->id, "%s-%d-%d", "bgmac", bgmac->core->bus->num,
-		bgmac->core->core_unit);
-	mii_bus->priv = bgmac;
-	mii_bus->read = bgmac_mii_read;
-	mii_bus->write = bgmac_mii_write;
-	mii_bus->parent = &bgmac->core->dev;
-	mii_bus->phy_mask = ~(1 << bgmac->phyaddr);
-
-	err = mdiobus_register(mii_bus);
-	if (err) {
-		bgmac_err(bgmac, "Registration of mii bus failed\n");
-		goto err_free_bus;
-	}
-
-	bgmac->mii_bus = mii_bus;
 
 	/* Connect to the PHY */
-	snprintf(bus_id, sizeof(bus_id), PHY_ID_FMT, mii_bus->id,
+	snprintf(bus_id, sizeof(bus_id), PHY_ID_FMT, bgmac->mii_bus->id,
 		 bgmac->phyaddr);
 	phy_dev = phy_connect(bgmac->net_dev, bus_id, &bgmac_adjust_link,
 			      PHY_INTERFACE_MODE_MII);
 	if (IS_ERR(phy_dev)) {
-		bgmac_err(bgmac, "PHY connection failed\n");
-		err = PTR_ERR(phy_dev);
-		goto err_unregister_bus;
+		dev_err(bgmac->dev, "PHY connecton failed\n");
+		return PTR_ERR(phy_dev);
 	}
 
-	return err;
-
-err_unregister_bus:
-	mdiobus_unregister(mii_bus);
-err_free_bus:
-	mdiobus_free(mii_bus);
-	return err;
+	return 0;
 }
 
-static void bgmac_mii_unregister(struct bgmac *bgmac)
-{
-	struct mii_bus *mii_bus = bgmac->mii_bus;
-
-	mdiobus_unregister(mii_bus);
-	mdiobus_free(mii_bus);
-}
-
-/**************************************************
- * BCMA bus ops
- **************************************************/
-
-/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipattach */
-static int bgmac_probe(struct bcma_device *core)
+int bgmac_enet_probe(struct bgmac *info)
 {
 	struct net_device *net_dev;
 	struct bgmac *bgmac;
-	struct ssb_sprom *sprom = &core->bus->sprom;
-	u8 *mac;
 	int err;
 
-	switch (core->core_unit) {
-	case 0:
-		mac = sprom->et0mac;
-		break;
-	case 1:
-		mac = sprom->et1mac;
-		break;
-	case 2:
-		mac = sprom->et2mac;
-		break;
-	default:
-		pr_err("Unsupported core_unit %d\n", core->core_unit);
-		return -ENOTSUPP;
-	}
-
-	if (!is_valid_ether_addr(mac)) {
-		dev_err(&core->dev, "Invalid MAC addr: %pM\n", mac);
-		eth_random_addr(mac);
-		dev_warn(&core->dev, "Using random MAC: %pM\n", mac);
-	}
-
-	/* This (reset &) enable is not preset in specs or reference driver but
-	 * Broadcom does it in arch PCI code when enabling fake PCI device.
-	 */
-	bcma_core_enable(core, 0);
-
 	/* Allocation and references */
 	net_dev = alloc_etherdev(sizeof(*bgmac));
 	if (!net_dev)
 		return -ENOMEM;
+
 	net_dev->netdev_ops = &bgmac_netdev_ops;
-	net_dev->irq = core->irq;
 	net_dev->ethtool_ops = &bgmac_ethtool_ops;
 	bgmac = netdev_priv(net_dev);
+	memcpy(bgmac, info, sizeof(*bgmac));
 	bgmac->net_dev = net_dev;
-	bgmac->core = core;
-	bcma_set_drvdata(core, bgmac);
-	SET_NETDEV_DEV(net_dev, &core->dev);
-
-	/* Defaults */
-	memcpy(bgmac->net_dev->dev_addr, mac, ETH_ALEN);
-
-	/* On BCM4706 we need common core to access PHY */
-	if (core->id.id == BCMA_CORE_4706_MAC_GBIT &&
-	    !core->bus->drv_gmac_cmn.core) {
-		bgmac_err(bgmac, "GMAC CMN core not found (required for BCM4706)\n");
-		err = -ENODEV;
-		goto err_netdev_free;
-	}
-	bgmac->cmn = core->bus->drv_gmac_cmn.core;
-
-	switch (core->core_unit) {
-	case 0:
-		bgmac->phyaddr = sprom->et0phyaddr;
-		break;
-	case 1:
-		bgmac->phyaddr = sprom->et1phyaddr;
-		break;
-	case 2:
-		bgmac->phyaddr = sprom->et2phyaddr;
-		break;
+	net_dev->irq = bgmac->irq;
+	SET_NETDEV_DEV(net_dev, bgmac->dev);
+
+	if (!is_valid_ether_addr(bgmac->mac_addr)) {
+		dev_err(bgmac->dev, "Invalid MAC addr: %pM\n",
+			bgmac->mac_addr);
+		eth_random_addr(bgmac->mac_addr);
+		dev_warn(bgmac->dev, "Using random MAC: %pM\n",
+			 bgmac->mac_addr);
 	}
-	bgmac->phyaddr &= BGMAC_PHY_MASK;
-	if (bgmac->phyaddr == BGMAC_PHY_MASK) {
-		bgmac_err(bgmac, "No PHY found\n");
-		err = -ENODEV;
-		goto err_netdev_free;
-	}
-	bgmac_info(bgmac, "Found PHY addr: %d%s\n", bgmac->phyaddr,
-		   bgmac->phyaddr == BGMAC_PHY_NOREGS ? " (NOREGS)" : "");
+	ether_addr_copy(net_dev->dev_addr, bgmac->mac_addr);
 
-	if (core->bus->hosttype == BCMA_HOSTTYPE_PCI) {
-		bgmac_err(bgmac, "PCI setup not implemented\n");
-		err = -ENOTSUPP;
-		goto err_netdev_free;
-	}
+	/* This (reset &) enable is not preset in specs or reference driver but
+	 * Broadcom does it in arch PCI code when enabling fake PCI device.
+	 */
+	bgmac_clk_enable(bgmac, 0);
 
 	bgmac_chip_reset(bgmac);
 
-	/* For Northstar, we have to take all GMAC core out of reset */
-	if (bgmac_is_bcm4707_family(bgmac)) {
-		struct bcma_device *ns_core;
-		int ns_gmac;
-
-		/* Northstar has 4 GMAC cores */
-		for (ns_gmac = 0; ns_gmac < 4; ns_gmac++) {
-			/* As Northstar requirement, we have to reset all GMACs
-			 * before accessing one. bgmac_chip_reset() call
-			 * bcma_core_enable() for this core. Then the other
-			 * three GMACs didn't reset.  We do it here.
-			 */
-			ns_core = bcma_find_core_unit(core->bus,
-						      BCMA_CORE_MAC_GBIT,
-						      ns_gmac);
-			if (ns_core && !bcma_core_is_enabled(ns_core))
-				bcma_core_enable(ns_core, 0);
-		}
-	}
-
 	err = bgmac_dma_alloc(bgmac);
 	if (err) {
-		bgmac_err(bgmac, "Unable to alloc memory for DMA\n");
+		dev_err(bgmac->dev, "Unable to alloc memory for DMA\n");
 		goto err_netdev_free;
 	}
 
@@ -1773,22 +1479,14 @@ static int bgmac_probe(struct bcma_device *core)
 	if (bcm47xx_nvram_getenv("et0_no_txint", NULL, 0) == 0)
 		bgmac->int_mask &= ~BGMAC_IS_TX_MASK;
 
-	/* TODO: reset the external phy. Specs are needed */
-	bgmac_phy_reset(bgmac);
-
-	bgmac->has_robosw = !!(core->bus->sprom.boardflags_lo &
-			       BGMAC_BFL_ENETROBO);
-	if (bgmac->has_robosw)
-		bgmac_warn(bgmac, "Support for Roboswitch not implemented\n");
-
-	if (core->bus->sprom.boardflags_lo & BGMAC_BFL_ENETADM)
-		bgmac_warn(bgmac, "Support for ADMtek ethernet switch not implemented\n");
-
 	netif_napi_add(net_dev, &bgmac->napi, bgmac_poll, BGMAC_WEIGHT);
 
-	err = bgmac_mii_register(bgmac);
+	if (!bgmac->mii_bus)
+		err = bgmac_phy_connect_direct(bgmac);
+	else
+		err = bgmac_phy_connect(bgmac);
 	if (err) {
-		bgmac_err(bgmac, "Cannot register MDIO\n");
+		dev_err(bgmac->dev, "Cannot connect to phy\n");
 		goto err_dma_free;
 	}
 
@@ -1798,64 +1496,34 @@ static int bgmac_probe(struct bcma_device *core)
 
 	err = register_netdev(bgmac->net_dev);
 	if (err) {
-		bgmac_err(bgmac, "Cannot register net device\n");
-		goto err_mii_unregister;
+		dev_err(bgmac->dev, "Cannot register net device\n");
+		goto err_phy_disconnect;
 	}
 
 	netif_carrier_off(net_dev);
 
 	return 0;
 
-err_mii_unregister:
-	bgmac_mii_unregister(bgmac);
+err_phy_disconnect:
+	phy_disconnect(net_dev->phydev);
 err_dma_free:
 	bgmac_dma_free(bgmac);
-
 err_netdev_free:
-	bcma_set_drvdata(core, NULL);
 	free_netdev(net_dev);
 
 	return err;
 }
+EXPORT_SYMBOL_GPL(bgmac_enet_probe);
 
-static void bgmac_remove(struct bcma_device *core)
+void bgmac_enet_remove(struct bgmac *bgmac)
 {
-	struct bgmac *bgmac = bcma_get_drvdata(core);
-
 	unregister_netdev(bgmac->net_dev);
-	bgmac_mii_unregister(bgmac);
+	phy_disconnect(bgmac->net_dev->phydev);
 	netif_napi_del(&bgmac->napi);
 	bgmac_dma_free(bgmac);
-	bcma_set_drvdata(core, NULL);
 	free_netdev(bgmac->net_dev);
 }
-
-static struct bcma_driver bgmac_bcma_driver = {
-	.name		= KBUILD_MODNAME,
-	.id_table	= bgmac_bcma_tbl,
-	.probe		= bgmac_probe,
-	.remove		= bgmac_remove,
-};
-
-static int __init bgmac_init(void)
-{
-	int err;
-
-	err = bcma_driver_register(&bgmac_bcma_driver);
-	if (err)
-		return err;
-	pr_info("Broadcom 47xx GBit MAC driver loaded\n");
-
-	return 0;
-}
-
-static void __exit bgmac_exit(void)
-{
-	bcma_driver_unregister(&bgmac_bcma_driver);
-}
-
-module_init(bgmac_init)
-module_exit(bgmac_exit)
+EXPORT_SYMBOL_GPL(bgmac_enet_remove);
 
 MODULE_AUTHOR("Rafał Miłecki");
 MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/broadcom/bgmac.h b/drivers/net/ethernet/broadcom/bgmac.h
index 99beb181f577..24a250267b88 100644
--- a/drivers/net/ethernet/broadcom/bgmac.h
+++ b/drivers/net/ethernet/broadcom/bgmac.h
@@ -1,19 +1,6 @@
 #ifndef _BGMAC_H
 #define _BGMAC_H
 
-#define pr_fmt(fmt)		KBUILD_MODNAME ": " fmt
-
-#define bgmac_err(bgmac, fmt, ...) \
-	dev_err(&(bgmac)->core->dev, fmt, ##__VA_ARGS__)
-#define bgmac_warn(bgmac, fmt, ...) \
-	dev_warn(&(bgmac)->core->dev, fmt,  ##__VA_ARGS__)
-#define bgmac_info(bgmac, fmt, ...) \
-	dev_info(&(bgmac)->core->dev, fmt,  ##__VA_ARGS__)
-#define bgmac_dbg(bgmac, fmt, ...) \
-	dev_dbg(&(bgmac)->core->dev, fmt, ##__VA_ARGS__)
-
-#include <linux/bcma/bcma.h>
-#include <linux/brcmphy.h>
 #include <linux/netdevice.h>
 
 #define BGMAC_DEV_CTL				0x000
@@ -201,7 +188,6 @@
 #define  BGMAC_CMDCFG_HD_SHIFT			10
 #define  BGMAC_CMDCFG_SR_REV0			0x00000800	/* Set to reset mode, for core rev 0-3 */
 #define  BGMAC_CMDCFG_SR_REV4			0x00002000	/* Set to reset mode, for core rev >= 4 */
-#define  BGMAC_CMDCFG_SR(rev)  ((rev >= 4) ? BGMAC_CMDCFG_SR_REV4 : BGMAC_CMDCFG_SR_REV0)
 #define  BGMAC_CMDCFG_ML			0x00008000	/* Set to activate mac loopback mode */
 #define  BGMAC_CMDCFG_AE			0x00400000
 #define  BGMAC_CMDCFG_CFE			0x00800000
@@ -387,6 +373,24 @@
 
 #define ETHER_MAX_LEN   1518
 
+/* Feature Flags */
+#define BGMAC_FEAT_TX_MASK_SETUP	BIT(0)
+#define BGMAC_FEAT_RX_MASK_SETUP	BIT(1)
+#define BGMAC_FEAT_IOST_ATTACHED	BIT(2)
+#define BGMAC_FEAT_NO_RESET		BIT(3)
+#define BGMAC_FEAT_MISC_PLL_REQ		BIT(4)
+#define BGMAC_FEAT_SW_TYPE_PHY		BIT(5)
+#define BGMAC_FEAT_SW_TYPE_EPHYRMII	BIT(6)
+#define BGMAC_FEAT_SW_TYPE_RGMII	BIT(7)
+#define BGMAC_FEAT_CMN_PHY_CTL		BIT(8)
+#define BGMAC_FEAT_FLW_CTRL1		BIT(9)
+#define BGMAC_FEAT_FLW_CTRL2		BIT(10)
+#define BGMAC_FEAT_SET_RXQ_CLK		BIT(11)
+#define BGMAC_FEAT_CLKCTLST		BIT(12)
+#define BGMAC_FEAT_NO_CLR_MIB		BIT(13)
+#define BGMAC_FEAT_FORCE_SPEED_2500	BIT(14)
+#define BGMAC_FEAT_CMDCFG_SR_REV4	BIT(15)
+
 struct bgmac_slot_info {
 	union {
 		struct sk_buff *skb;
@@ -436,8 +440,23 @@ struct bgmac_rx_header {
 };
 
 struct bgmac {
-	struct bcma_device *core;
-	struct bcma_device *cmn; /* Reference to CMN core for BCM4706 */
+	union {
+		struct {
+			void *base;
+			void *idm_base;
+		} plat;
+		struct {
+			struct bcma_device *core;
+			/* Reference to CMN core for BCM4706 */
+			struct bcma_device *cmn;
+		} bcma;
+	};
+
+	struct device *dev;
+	struct device *dma_dev;
+	unsigned char mac_addr[ETH_ALEN];
+	u32 feature_flags;
+
 	struct net_device *net_dev;
 	struct napi_struct napi;
 	struct mii_bus *mii_bus;
@@ -452,6 +471,7 @@ struct bgmac {
 	u32 mib_rx_regs[BGMAC_NUM_MIB_RX_REGS];
 
 	/* Int */
+	int irq;
 	u32 int_mask;
 
 	/* Current MAC state */
@@ -462,16 +482,71 @@ struct bgmac {
 	bool has_robosw;
 
 	bool loopback;
+
+	u32 (*read)(struct bgmac *bgmac, u16 offset);
+	void (*write)(struct bgmac *bgmac, u16 offset, u32 value);
+	u32 (*idm_read)(struct bgmac *bgmac, u16 offset);
+	void (*idm_write)(struct bgmac *bgmac, u16 offset, u32 value);
+	bool (*clk_enabled)(struct bgmac *bgmac);
+	void (*clk_enable)(struct bgmac *bgmac, u32 flags);
+	void (*cco_ctl_maskset)(struct bgmac *bgmac, u32 offset, u32 mask,
+				u32 set);
+	u32 (*get_bus_clock)(struct bgmac *bgmac);
+	void (*cmn_maskset32)(struct bgmac *bgmac, u16 offset, u32 mask,
+			      u32 set);
 };
 
+int bgmac_enet_probe(struct bgmac *info);
+void bgmac_enet_remove(struct bgmac *bgmac);
+
+struct mii_bus *bcma_mdio_mii_register(struct bcma_device *core, u8 phyaddr);
+void bcma_mdio_mii_unregister(struct mii_bus *mii_bus);
+
 static inline u32 bgmac_read(struct bgmac *bgmac, u16 offset)
 {
-	return bcma_read32(bgmac->core, offset);
+	return bgmac->read(bgmac, offset);
 }
 
 static inline void bgmac_write(struct bgmac *bgmac, u16 offset, u32 value)
 {
-	bcma_write32(bgmac->core, offset, value);
+	bgmac->write(bgmac, offset, value);
+}
+
+static inline u32 bgmac_idm_read(struct bgmac *bgmac, u16 offset)
+{
+	return bgmac->idm_read(bgmac, offset);
+}
+
+static inline void bgmac_idm_write(struct bgmac *bgmac, u16 offset, u32 value)
+{
+	bgmac->idm_write(bgmac, offset, value);
+}
+
+static inline bool bgmac_clk_enabled(struct bgmac *bgmac)
+{
+	return bgmac->clk_enabled(bgmac);
+}
+
+static inline void bgmac_clk_enable(struct bgmac *bgmac, u32 flags)
+{
+	bgmac->clk_enable(bgmac, flags);
+}
+
+static inline void bgmac_cco_ctl_maskset(struct bgmac *bgmac, u32 offset,
+					 u32 mask, u32 set)
+{
+	bgmac->cco_ctl_maskset(bgmac, offset, mask, set);
+}
+
+static inline u32 bgmac_get_bus_clock(struct bgmac *bgmac)
+{
+	return bgmac->get_bus_clock(bgmac);
+}
+
+static inline void bgmac_cmn_maskset32(struct bgmac *bgmac, u16 offset,
+				       u32 mask, u32 set)
+{
+	bgmac->cmn_maskset32(bgmac, offset, mask, set);
 }
 
 static inline void bgmac_maskset(struct bgmac *bgmac, u16 offset, u32 mask,
@@ -489,5 +564,4 @@ static inline void bgmac_set(struct bgmac *bgmac, u16 offset, u32 set)
 {
 	bgmac_maskset(bgmac, offset, ~0, set);
 }
-
 #endif /* _BGMAC_H */
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 673f4d62e73e..659faa6511c0 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -997,7 +997,7 @@ static struct sk_buff *bnxt_gro_func_5731x(struct bnxt_tpa_info *tpa_info,
 		 * correct protocol ID, it must be a loopback packet where
 		 * the offsets are off by 4.
 		 */
-		if (proto != htons(ETH_P_IP) && proto && htons(ETH_P_IPV6))
+		if (proto != htons(ETH_P_IP) && proto != htons(ETH_P_IPV6))
 			loopback = true;
 	}
 	if (loopback) {
@@ -3414,7 +3414,8 @@ static int bnxt_hwrm_vnic_cfg(struct bnxt *bp, u16 vnic_id)
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VNIC_CFG, -1, -1);
 	/* Only RSS support for now TBD: COS & LB */
 	req.enables = cpu_to_le32(VNIC_CFG_REQ_ENABLES_DFLT_RING_GRP |
-				  VNIC_CFG_REQ_ENABLES_RSS_RULE);
+				  VNIC_CFG_REQ_ENABLES_RSS_RULE |
+				  VNIC_CFG_REQ_ENABLES_MRU);
 	req.rss_rule = cpu_to_le16(vnic->fw_rss_cos_lb_ctx);
 	req.cos_rule = cpu_to_le16(0xffff);
 	if (vnic->flags & BNXT_VNIC_RSS_FLAG)
@@ -3951,7 +3952,7 @@ static int bnxt_hwrm_stat_ctx_alloc(struct bnxt *bp)
 
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_STAT_CTX_ALLOC, -1, -1);
 
-	req.update_period_ms = cpu_to_le32(1000);
+	req.update_period_ms = cpu_to_le32(bp->stats_coal_ticks / 1000);
 
 	mutex_lock(&bp->hwrm_cmd_lock);
 	for (i = 0; i < bp->cp_nr_rings; i++) {
@@ -4025,6 +4026,7 @@ int bnxt_hwrm_func_qcaps(struct bnxt *bp)
 
 		pf->fw_fid = le16_to_cpu(resp->fid);
 		pf->port_id = le16_to_cpu(resp->port_id);
+		bp->dev->dev_port = pf->port_id;
 		memcpy(pf->mac_addr, resp->mac_address, ETH_ALEN);
 		memcpy(bp->dev->dev_addr, pf->mac_addr, ETH_ALEN);
 		pf->max_rsscos_ctxs = le16_to_cpu(resp->max_rsscos_ctx);
@@ -4315,6 +4317,16 @@ static int bnxt_alloc_rfs_vnics(struct bnxt *bp)
 #endif
 }
 
+/* Allow PF and VF with default VLAN to be in promiscuous mode */
+static bool bnxt_promisc_ok(struct bnxt *bp)
+{
+#ifdef CONFIG_BNXT_SRIOV
+	if (BNXT_VF(bp) && !bp->vf.vlan)
+		return false;
+#endif
+	return true;
+}
+
 static int bnxt_cfg_rx_mode(struct bnxt *);
 static bool bnxt_mc_list_updated(struct bnxt *, u32 *);
 
@@ -4380,7 +4392,7 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init)
 
 	vnic->rx_mask = CFA_L2_SET_RX_MASK_REQ_MASK_BCAST;
 
-	if ((bp->dev->flags & IFF_PROMISC) && BNXT_PF(bp))
+	if ((bp->dev->flags & IFF_PROMISC) && bnxt_promisc_ok(bp))
 		vnic->rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS;
 
 	if (bp->dev->flags & IFF_ALLMULTI) {
@@ -5295,12 +5307,19 @@ static int bnxt_open(struct net_device *dev)
 	struct bnxt *bp = netdev_priv(dev);
 	int rc = 0;
 
-	rc = bnxt_hwrm_func_reset(bp);
-	if (rc) {
-		netdev_err(bp->dev, "hwrm chip reset failure rc: %x\n",
-			   rc);
-		rc = -1;
-		return rc;
+	if (!test_bit(BNXT_STATE_FN_RST_DONE, &bp->state)) {
+		rc = bnxt_hwrm_func_reset(bp);
+		if (rc) {
+			netdev_err(bp->dev, "hwrm chip reset failure rc: %x\n",
+				   rc);
+			rc = -EBUSY;
+			return rc;
+		}
+		/* Do func_reset during the 1st PF open only to prevent killing
+		 * the VFs when the PF is brought down and up.
+		 */
+		if (BNXT_PF(bp))
+			set_bit(BNXT_STATE_FN_RST_DONE, &bp->state);
 	}
 	return __bnxt_open_nic(bp, true, true);
 }
@@ -5520,8 +5539,7 @@ static void bnxt_set_rx_mode(struct net_device *dev)
 		  CFA_L2_SET_RX_MASK_REQ_MASK_MCAST |
 		  CFA_L2_SET_RX_MASK_REQ_MASK_ALL_MCAST);
 
-	/* Only allow PF to be in promiscuous mode */
-	if ((dev->flags & IFF_PROMISC) && BNXT_PF(bp))
+	if ((dev->flags & IFF_PROMISC) && bnxt_promisc_ok(bp))
 		mask |= CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS;
 
 	uc_update = bnxt_uc_list_updated(bp);
@@ -5976,6 +5994,8 @@ static int bnxt_init_board(struct pci_dev *pdev, struct net_device *dev)
 	bp->tx_coal_ticks_irq = 2;
 	bp->tx_coal_bufs_irq = 2;
 
+	bp->stats_coal_ticks = BNXT_DEF_STATS_COAL_TICKS;
+
 	init_timer(&bp->timer);
 	bp->timer.data = (unsigned long)bp;
 	bp->timer.function = bnxt_timer;
@@ -6041,7 +6061,7 @@ static int bnxt_change_mtu(struct net_device *dev, int new_mtu)
 {
 	struct bnxt *bp = netdev_priv(dev);
 
-	if (new_mtu < 60 || new_mtu > 9000)
+	if (new_mtu < 60 || new_mtu > 9500)
 		return -EINVAL;
 
 	if (netif_running(dev))
@@ -6676,6 +6696,7 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev,
 					       pci_channel_state_t state)
 {
 	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct bnxt *bp = netdev_priv(netdev);
 
 	netdev_info(netdev, "PCI I/O error detected\n");
 
@@ -6690,6 +6711,8 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev,
 	if (netif_running(netdev))
 		bnxt_close(netdev);
 
+	/* So that func_reset will be done during slot_reset */
+	clear_bit(BNXT_STATE_FN_RST_DONE, &bp->state);
 	pci_disable_device(pdev);
 	rtnl_unlock();
 
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 927ece9c408a..2313e37e6eb5 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -11,10 +11,10 @@
 #define BNXT_H
 
 #define DRV_MODULE_NAME		"bnxt_en"
-#define DRV_MODULE_VERSION	"1.2.0"
+#define DRV_MODULE_VERSION	"1.3.0"
 
 #define DRV_VER_MAJ	1
-#define DRV_VER_MIN	0
+#define DRV_VER_MIN	3
 #define DRV_VER_UPD	0
 
 struct tx_bd {
@@ -359,7 +359,8 @@ struct rx_tpa_end_cmp {
 	 RX_TPA_END_CMP_FLAGS_PLACEMENT_ANY_GRO)
 
 #define TPA_END_GRO_TS(rx_tpa_end)					\
-	((rx_tpa_end)->rx_tpa_end_cmp_tsdelta & cpu_to_le32(RX_TPA_END_GRO_TS))
+	(!!((rx_tpa_end)->rx_tpa_end_cmp_tsdelta &			\
+	    cpu_to_le32(RX_TPA_END_GRO_TS)))
 
 struct rx_tpa_end_cmp_ext {
 	__le32 rx_tpa_end_cmp_dup_acks;
@@ -753,8 +754,8 @@ struct bnxt_vf_info {
 struct bnxt_pf_info {
 #define BNXT_FIRST_PF_FID	1
 #define BNXT_FIRST_VF_FID	128
-	u32	fw_fid;
-	u8	port_id;
+	u16	fw_fid;
+	u16	port_id;
 	u8	mac_addr[ETH_ALEN];
 	u16	max_rsscos_ctxs;
 	u16	max_cp_rings;
@@ -1017,6 +1018,7 @@ struct bnxt {
 	unsigned long		state;
 #define BNXT_STATE_OPEN		0
 #define BNXT_STATE_IN_SP_TASK	1
+#define BNXT_STATE_FN_RST_DONE	2
 
 	struct bnxt_irq	*irq_tbl;
 	u8			mac_addr[ETH_ALEN];
@@ -1065,6 +1067,11 @@ struct bnxt {
 
 #define BNXT_USEC_TO_COAL_TIMER(x)	((x) * 25 / 2)
 
+	u32			stats_coal_ticks;
+#define BNXT_DEF_STATS_COAL_TICKS	 1000000
+#define BNXT_MIN_STATS_COAL_TICKS	  250000
+#define BNXT_MAX_STATS_COAL_TICKS	 1000000
+
 	struct work_struct	sp_task;
 	unsigned long		sp_event;
 #define BNXT_RX_MASK_SP_EVENT		0
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index d7ab2d7982c2..0f7dd861ab4d 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -56,6 +56,8 @@ static int bnxt_get_coalesce(struct net_device *dev,
 	coal->tx_coalesce_usecs_irq = bp->tx_coal_ticks_irq;
 	coal->tx_max_coalesced_frames_irq = bp->tx_coal_bufs_irq;
 
+	coal->stats_block_coalesce_usecs = bp->stats_coal_ticks;
+
 	return 0;
 }
 
@@ -63,6 +65,7 @@ static int bnxt_set_coalesce(struct net_device *dev,
 			     struct ethtool_coalesce *coal)
 {
 	struct bnxt *bp = netdev_priv(dev);
+	bool update_stats = false;
 	int rc = 0;
 
 	bp->rx_coal_ticks = coal->rx_coalesce_usecs;
@@ -76,8 +79,26 @@ static int bnxt_set_coalesce(struct net_device *dev,
 	bp->tx_coal_ticks_irq = coal->tx_coalesce_usecs_irq;
 	bp->tx_coal_bufs_irq = coal->tx_max_coalesced_frames_irq;
 
-	if (netif_running(dev))
-		rc = bnxt_hwrm_set_coal(bp);
+	if (bp->stats_coal_ticks != coal->stats_block_coalesce_usecs) {
+		u32 stats_ticks = coal->stats_block_coalesce_usecs;
+
+		stats_ticks = clamp_t(u32, stats_ticks,
+				      BNXT_MIN_STATS_COAL_TICKS,
+				      BNXT_MAX_STATS_COAL_TICKS);
+		stats_ticks = rounddown(stats_ticks, BNXT_MIN_STATS_COAL_TICKS);
+		bp->stats_coal_ticks = stats_ticks;
+		update_stats = true;
+	}
+
+	if (netif_running(dev)) {
+		if (update_stats) {
+			rc = bnxt_close_nic(bp, true, false);
+			if (!rc)
+				rc = bnxt_open_nic(bp, true, false);
+		} else {
+			rc = bnxt_hwrm_set_coal(bp);
+		}
+	}
 
 	return rc;
 }
@@ -961,7 +982,7 @@ static int bnxt_set_pauseparam(struct net_device *dev,
 	struct bnxt_link_info *link_info = &bp->link_info;
 
 	if (!BNXT_SINGLE_PF(bp))
-		return rc;
+		return -EOPNOTSUPP;
 
 	if (epause->autoneg) {
 		if (!(link_info->autoneg & BNXT_AUTONEG_SPEED))
@@ -1059,6 +1080,8 @@ static int bnxt_firmware_reset(struct net_device *dev,
 	case BNX_DIR_TYPE_APE_FW:
 	case BNX_DIR_TYPE_APE_PATCH:
 		req.embedded_proc_type = FW_RESET_REQ_EMBEDDED_PROC_TYPE_MGMT;
+		/* Self-reset APE upon next PCIe reset: */
+		req.selfrst_status = FW_RESET_REQ_SELFRST_STATUS_SELFRSTPCIERST;
 		break;
 	case BNX_DIR_TYPE_KONG_FW:
 	case BNX_DIR_TYPE_KONG_PATCH:
@@ -1092,9 +1115,27 @@ static int bnxt_flash_firmware(struct net_device *dev,
 	case BNX_DIR_TYPE_BOOTCODE_2:
 		code_type = CODE_BOOT;
 		break;
+	case BNX_DIR_TYPE_CHIMP_PATCH:
+		code_type = CODE_CHIMP_PATCH;
+		break;
 	case BNX_DIR_TYPE_APE_FW:
 		code_type = CODE_MCTP_PASSTHRU;
 		break;
+	case BNX_DIR_TYPE_APE_PATCH:
+		code_type = CODE_APE_PATCH;
+		break;
+	case BNX_DIR_TYPE_KONG_FW:
+		code_type = CODE_KONG_FW;
+		break;
+	case BNX_DIR_TYPE_KONG_PATCH:
+		code_type = CODE_KONG_PATCH;
+		break;
+	case BNX_DIR_TYPE_BONO_FW:
+		code_type = CODE_BONO_FW;
+		break;
+	case BNX_DIR_TYPE_BONO_PATCH:
+		code_type = CODE_BONO_PATCH;
+		break;
 	default:
 		netdev_err(dev, "Unsupported directory entry type: %u\n",
 			   dir_type);
@@ -1149,6 +1190,8 @@ static bool bnxt_dir_type_is_ape_bin_format(u16 dir_type)
 	case BNX_DIR_TYPE_APE_PATCH:
 	case BNX_DIR_TYPE_KONG_FW:
 	case BNX_DIR_TYPE_KONG_PATCH:
+	case BNX_DIR_TYPE_BONO_FW:
+	case BNX_DIR_TYPE_BONO_PATCH:
 		return true;
 	}
 
@@ -1186,7 +1229,8 @@ static int bnxt_flash_firmware_from_file(struct net_device *dev,
 	const struct firmware  *fw;
 	int			rc;
 
-	if (bnxt_dir_type_is_executable(dir_type) == false)
+	if (dir_type != BNX_DIR_TYPE_UPDATE &&
+	    bnxt_dir_type_is_executable(dir_type) == false)
 		return -EINVAL;
 
 	rc = request_firmware(&fw, filename, &dev->dev);
@@ -1483,7 +1527,7 @@ static int bnxt_set_eee(struct net_device *dev, struct ethtool_eee *edata)
 	int rc = 0;
 
 	if (!BNXT_SINGLE_PF(bp))
-		return 0;
+		return -EOPNOTSUPP;
 
 	if (!(bp->flags & BNXT_FLAG_EEE_CAP))
 		return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_fw_hdr.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_fw_hdr.h
index 461675caaacd..82bf44ab811b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_fw_hdr.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_fw_hdr.h
@@ -70,6 +70,7 @@ enum SUPPORTED_CODE {
 	CODE_KONG_PATCH,	/* 18 - KONG Patch firmware */
 	CODE_BONO_FW,		/* 19 - BONO firmware */
 	CODE_BONO_PATCH,	/* 20 - BONO Patch firmware */
+	CODE_CHIMP_PATCH,	/* 21 - ChiMP Patch firmware */
 
 	MAX_CODE_TYPE,
 };
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
index 05e3c49a7677..517567f6d651 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
@@ -105,6 +105,7 @@ struct hwrm_async_event_cmpl {
 	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED (0x4UL << 0)
 	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_NOT_ALLOWED (0x5UL << 0)
 	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE (0x6UL << 0)
+	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_PHY_CFG_CHANGE (0x7UL << 0)
 	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_UNLOAD   (0x10UL << 0)
 	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_LOAD     (0x11UL << 0)
 	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_UNLOAD     (0x20UL << 0)
@@ -484,12 +485,12 @@ struct hwrm_async_event_cmpl_hwrm_error {
 	#define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA1_TIMESTAMP 0x1UL
 };
 
-/* HW Resource Manager Specification 1.2.2 */
+/* HW Resource Manager Specification 1.3.0 */
 #define HWRM_VERSION_MAJOR	1
-#define HWRM_VERSION_MINOR	2
-#define HWRM_VERSION_UPDATE	2
+#define HWRM_VERSION_MINOR	3
+#define HWRM_VERSION_UPDATE	0
 
-#define HWRM_VERSION_STR	"1.2.2"
+#define HWRM_VERSION_STR	"1.3.0"
 /*
  * Following is the signature for HWRM message field that indicates not
  * applicable (All F's). Need to cast it the size of the field if needed.
@@ -611,6 +612,9 @@ struct cmd_nums {
 	#define HWRM_FWD_RESP					   (0xd2UL)
 	#define HWRM_FWD_ASYNC_EVENT_CMPL			   (0xd3UL)
 	#define HWRM_TEMP_MONITOR_QUERY			   (0xe0UL)
+	#define HWRM_WOL_FILTER_ALLOC				   (0xf0UL)
+	#define HWRM_WOL_FILTER_FREE				   (0xf1UL)
+	#define HWRM_WOL_FILTER_QCFG				   (0xf2UL)
 	#define HWRM_DBG_READ_DIRECT				   (0xff10UL)
 	#define HWRM_DBG_READ_INDIRECT				   (0xff11UL)
 	#define HWRM_DBG_WRITE_DIRECT				   (0xff12UL)
@@ -1020,6 +1024,10 @@ struct hwrm_func_qcaps_output {
 	#define FUNC_QCAPS_RESP_FLAGS_PUSH_MODE_SUPPORTED	    0x1UL
 	#define FUNC_QCAPS_RESP_FLAGS_GLOBAL_MSIX_AUTOMASKING      0x2UL
 	#define FUNC_QCAPS_RESP_FLAGS_PTP_SUPPORTED		    0x4UL
+	#define FUNC_QCAPS_RESP_FLAGS_ROCE_V1_SUPPORTED	    0x8UL
+	#define FUNC_QCAPS_RESP_FLAGS_ROCE_V2_SUPPORTED	    0x10UL
+	#define FUNC_QCAPS_RESP_FLAGS_WOL_MAGICPKT_SUPPORTED       0x20UL
+	#define FUNC_QCAPS_RESP_FLAGS_WOL_BMP_SUPPORTED	    0x40UL
 	u8 mac_address[6];
 	__le16 max_rsscos_ctx;
 	__le16 max_cmpl_rings;
@@ -1066,8 +1074,9 @@ struct hwrm_func_qcfg_output {
 	__le16 fid;
 	__le16 port_id;
 	__le16 vlan;
-	u8 unused_0;
-	u8 unused_1;
+	__le16 flags;
+	#define FUNC_QCFG_RESP_FLAGS_OOB_WOL_MAGICPKT_ENABLED      0x1UL
+	#define FUNC_QCFG_RESP_FLAGS_OOB_WOL_BMP_ENABLED	    0x2UL
 	u8 mac_address[6];
 	__le16 pci_id;
 	__le16 alloc_rsscos_ctx;
@@ -1086,23 +1095,23 @@ struct hwrm_func_qcfg_output {
 	#define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_5	   (0x3UL << 0)
 	#define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR2_0	   (0x4UL << 0)
 	#define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_UNKNOWN	   (0xffUL << 0)
-	u8 unused_2;
+	u8 unused_0;
 	__le16 dflt_vnic_id;
-	u8 unused_3;
-	u8 unused_4;
+	u8 unused_1;
+	u8 unused_2;
 	__le32 min_bw;
 	__le32 max_bw;
 	u8 evb_mode;
 	#define FUNC_QCFG_RESP_EVB_MODE_NO_EVB			   (0x0UL << 0)
 	#define FUNC_QCFG_RESP_EVB_MODE_VEB			   (0x1UL << 0)
 	#define FUNC_QCFG_RESP_EVB_MODE_VEPA			   (0x2UL << 0)
-	u8 unused_5;
-	__le16 unused_6;
+	u8 unused_3;
+	__le16 unused_4;
 	__le32 alloc_mcast_filters;
 	__le32 alloc_hw_ring_grps;
+	u8 unused_5;
+	u8 unused_6;
 	u8 unused_7;
-	u8 unused_8;
-	u8 unused_9;
 	u8 valid;
 };
 
@@ -1410,8 +1419,8 @@ struct hwrm_func_buf_rgtr_input {
 	#define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_4K		   (0xcUL << 0)
 	#define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_8K		   (0xdUL << 0)
 	#define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_64K	   (0x10UL << 0)
-	#define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_2M		   (0x16UL << 0)
-	#define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_4M		   (0x17UL << 0)
+	#define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_2M		   (0x15UL << 0)
+	#define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_4M		   (0x16UL << 0)
 	#define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_1G		   (0x1eUL << 0)
 	__le16 req_buf_len;
 	__le16 resp_buf_len;
@@ -1499,6 +1508,12 @@ struct hwrm_port_phy_cfg_input {
 	#define PORT_PHY_CFG_REQ_FLAGS_EEE_DISABLE		    0x20UL
 	#define PORT_PHY_CFG_REQ_FLAGS_EEE_TX_LPI_ENABLE	    0x40UL
 	#define PORT_PHY_CFG_REQ_FLAGS_EEE_TX_LPI_DISABLE	    0x80UL
+	#define PORT_PHY_CFG_REQ_FLAGS_FEC_AUTONEG_ENABLE	    0x100UL
+	#define PORT_PHY_CFG_REQ_FLAGS_FEC_AUTONEG_DISABLE	    0x200UL
+	#define PORT_PHY_CFG_REQ_FLAGS_FEC_CLAUSE74_ENABLE	    0x400UL
+	#define PORT_PHY_CFG_REQ_FLAGS_FEC_CLAUSE74_DISABLE	    0x800UL
+	#define PORT_PHY_CFG_REQ_FLAGS_FEC_CLAUSE91_ENABLE	    0x1000UL
+	#define PORT_PHY_CFG_REQ_FLAGS_FEC_CLAUSE91_DISABLE	    0x2000UL
 	__le32 enables;
 	#define PORT_PHY_CFG_REQ_ENABLES_AUTO_MODE		    0x1UL
 	#define PORT_PHY_CFG_REQ_ENABLES_AUTO_DUPLEX		    0x2UL
@@ -1815,13 +1830,22 @@ struct hwrm_port_phy_qcfg_output {
 	#define PORT_PHY_QCFG_RESP_XCVR_IDENTIFIER_TYPE_QSFP      (0xcUL << 24)
 	#define PORT_PHY_QCFG_RESP_XCVR_IDENTIFIER_TYPE_QSFPPLUS  (0xdUL << 24)
 	#define PORT_PHY_QCFG_RESP_XCVR_IDENTIFIER_TYPE_QSFP28    (0x11UL << 24)
-	__le32 unused_1;
+	__le16 fec_cfg;
+	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_NONE_SUPPORTED      0x1UL
+	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_AUTONEG_SUPPORTED   0x2UL
+	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_AUTONEG_ENABLED     0x4UL
+	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_CLAUSE74_SUPPORTED  0x8UL
+	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_CLAUSE74_ENABLED    0x10UL
+	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_CLAUSE91_SUPPORTED  0x20UL
+	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_CLAUSE91_ENABLED    0x40UL
+	u8 unused_1;
+	u8 unused_2;
 	char phy_vendor_name[16];
 	char phy_vendor_partnumber[16];
-	__le32 unused_2;
-	u8 unused_3;
+	__le32 unused_3;
 	u8 unused_4;
 	u8 unused_5;
+	u8 unused_6;
 	u8 valid;
 };
 
@@ -1842,6 +1866,8 @@ struct hwrm_port_mac_cfg_input {
 	#define PORT_MAC_CFG_REQ_FLAGS_PTP_RX_TS_CAPTURE_DISABLE   0x20UL
 	#define PORT_MAC_CFG_REQ_FLAGS_PTP_TX_TS_CAPTURE_ENABLE    0x40UL
 	#define PORT_MAC_CFG_REQ_FLAGS_PTP_TX_TS_CAPTURE_DISABLE   0x80UL
+	#define PORT_MAC_CFG_REQ_FLAGS_OOB_WOL_ENABLE		    0x100UL
+	#define PORT_MAC_CFG_REQ_FLAGS_OOB_WOL_DISABLE		    0x200UL
 	__le32 enables;
 	#define PORT_MAC_CFG_REQ_ENABLES_IPG			    0x1UL
 	#define PORT_MAC_CFG_REQ_ENABLES_LPBK			    0x2UL
@@ -2127,6 +2153,7 @@ struct hwrm_port_phy_i2c_read_output {
 	u8 valid;
 };
 
+/* hwrm_queue_qportcfg */
 /* Input (24 bytes) */
 struct hwrm_queue_qportcfg_input {
 	__le16 req_type;
@@ -2382,7 +2409,7 @@ struct hwrm_queue_cos2bw_cfg_input {
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_SP      (0x0UL << 0)
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_ETS     (0x1UL << 0)
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0)
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_RESERVED_LAST (0xffffUL << 0)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0)
 	u8 queue_id0_pri_lvl;
 	u8 queue_id0_bw_weight;
 	u8 queue_id1;
@@ -2392,7 +2419,7 @@ struct hwrm_queue_cos2bw_cfg_input {
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_SP      (0x0UL << 0)
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_ETS     (0x1UL << 0)
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0)
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_RESERVED_LAST (0xffffUL << 0)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0)
 	u8 queue_id1_pri_lvl;
 	u8 queue_id1_bw_weight;
 	u8 queue_id2;
@@ -2402,7 +2429,7 @@ struct hwrm_queue_cos2bw_cfg_input {
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_SP      (0x0UL << 0)
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_ETS     (0x1UL << 0)
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0)
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_RESERVED_LAST (0xffffUL << 0)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0)
 	u8 queue_id2_pri_lvl;
 	u8 queue_id2_bw_weight;
 	u8 queue_id3;
@@ -2412,7 +2439,7 @@ struct hwrm_queue_cos2bw_cfg_input {
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_SP      (0x0UL << 0)
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_ETS     (0x1UL << 0)
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0)
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_RESERVED_LAST (0xffffUL << 0)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0)
 	u8 queue_id3_pri_lvl;
 	u8 queue_id3_bw_weight;
 	u8 queue_id4;
@@ -2422,7 +2449,7 @@ struct hwrm_queue_cos2bw_cfg_input {
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_SP      (0x0UL << 0)
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_ETS     (0x1UL << 0)
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0)
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_RESERVED_LAST (0xffffUL << 0)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0)
 	u8 queue_id4_pri_lvl;
 	u8 queue_id4_bw_weight;
 	u8 queue_id5;
@@ -2432,7 +2459,7 @@ struct hwrm_queue_cos2bw_cfg_input {
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_SP      (0x0UL << 0)
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_ETS     (0x1UL << 0)
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0)
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_RESERVED_LAST (0xffffUL << 0)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0)
 	u8 queue_id5_pri_lvl;
 	u8 queue_id5_bw_weight;
 	u8 queue_id6;
@@ -2442,7 +2469,7 @@ struct hwrm_queue_cos2bw_cfg_input {
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_SP      (0x0UL << 0)
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_ETS     (0x1UL << 0)
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0)
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_RESERVED_LAST (0xffffUL << 0)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0)
 	u8 queue_id6_pri_lvl;
 	u8 queue_id6_bw_weight;
 	u8 queue_id7;
@@ -2452,7 +2479,7 @@ struct hwrm_queue_cos2bw_cfg_input {
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_SP      (0x0UL << 0)
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_ETS     (0x1UL << 0)
 	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0)
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_RESERVED_LAST (0xffffUL << 0)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0)
 	u8 queue_id7_pri_lvl;
 	u8 queue_id7_bw_weight;
 	u8 unused_1[5];
@@ -3150,7 +3177,7 @@ struct hwrm_cfa_l2_filter_cfg_output {
 };
 
 /* hwrm_cfa_l2_set_rx_mask */
-/* Input (40 bytes) */
+/* Input (56 bytes) */
 struct hwrm_cfa_l2_set_rx_mask_input {
 	__le16 req_type;
 	__le16 cmpl_ring;
@@ -3165,9 +3192,15 @@ struct hwrm_cfa_l2_set_rx_mask_input {
 	#define CFA_L2_SET_RX_MASK_REQ_MASK_BCAST		    0x8UL
 	#define CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS	    0x10UL
 	#define CFA_L2_SET_RX_MASK_REQ_MASK_OUTERMOST		    0x20UL
+	#define CFA_L2_SET_RX_MASK_REQ_MASK_VLANONLY		    0x40UL
+	#define CFA_L2_SET_RX_MASK_REQ_MASK_VLAN_NONVLAN	    0x80UL
+	#define CFA_L2_SET_RX_MASK_REQ_MASK_ANYVLAN_NONVLAN	    0x100UL
 	__le64 mc_tbl_addr;
 	__le32 num_mc_entries;
 	__le32 unused_0;
+	__le64 vlan_tag_tbl_addr;
+	__le32 num_vlan_tags;
+	__le32 unused_1;
 };
 
 /* Output (16 bytes) */
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_nvm_defs.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_nvm_defs.h
index 40a7b0e09612..73f2249555b5 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_nvm_defs.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_nvm_defs.h
@@ -13,6 +13,7 @@
 enum bnxt_nvm_directory_type {
 	BNX_DIR_TYPE_UNUSED = 0,
 	BNX_DIR_TYPE_PKG_LOG = 1,
+	BNX_DIR_TYPE_UPDATE = 2,
 	BNX_DIR_TYPE_CHIMP_PATCH = 3,
 	BNX_DIR_TYPE_BOOTCODE = 4,
 	BNX_DIR_TYPE_VPD = 5,
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 541456398dfb..8d4f8495dbb3 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -453,29 +453,25 @@ static inline void bcmgenet_rdma_ring_writel(struct bcmgenet_priv *priv,
 static int bcmgenet_get_settings(struct net_device *dev,
 				 struct ethtool_cmd *cmd)
 {
-	struct bcmgenet_priv *priv = netdev_priv(dev);
-
 	if (!netif_running(dev))
 		return -EINVAL;
 
-	if (!priv->phydev)
+	if (!dev->phydev)
 		return -ENODEV;
 
-	return phy_ethtool_gset(priv->phydev, cmd);
+	return phy_ethtool_gset(dev->phydev, cmd);
 }
 
 static int bcmgenet_set_settings(struct net_device *dev,
 				 struct ethtool_cmd *cmd)
 {
-	struct bcmgenet_priv *priv = netdev_priv(dev);
-
 	if (!netif_running(dev))
 		return -EINVAL;
 
-	if (!priv->phydev)
+	if (!dev->phydev)
 		return -ENODEV;
 
-	return phy_ethtool_sset(priv->phydev, cmd);
+	return phy_ethtool_sset(dev->phydev, cmd);
 }
 
 static int bcmgenet_set_rx_csum(struct net_device *dev,
@@ -941,7 +937,7 @@ static int bcmgenet_get_eee(struct net_device *dev, struct ethtool_eee *e)
 	e->eee_active = p->eee_active;
 	e->tx_lpi_timer = bcmgenet_umac_readl(priv, UMAC_EEE_LPI_TIMER);
 
-	return phy_ethtool_get_eee(priv->phydev, e);
+	return phy_ethtool_get_eee(dev->phydev, e);
 }
 
 static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e)
@@ -958,7 +954,7 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e)
 	if (!p->eee_enabled) {
 		bcmgenet_eee_enable_set(dev, false);
 	} else {
-		ret = phy_init_eee(priv->phydev, 0);
+		ret = phy_init_eee(dev->phydev, 0);
 		if (ret) {
 			netif_err(priv, hw, dev, "EEE initialization failed\n");
 			return ret;
@@ -968,14 +964,12 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e)
 		bcmgenet_eee_enable_set(dev, true);
 	}
 
-	return phy_ethtool_set_eee(priv->phydev, e);
+	return phy_ethtool_set_eee(dev->phydev, e);
 }
 
 static int bcmgenet_nway_reset(struct net_device *dev)
 {
-	struct bcmgenet_priv *priv = netdev_priv(dev);
-
-	return genphy_restart_aneg(priv->phydev);
+	return genphy_restart_aneg(dev->phydev);
 }
 
 /* standard ethtool support functions. */
@@ -1002,12 +996,13 @@ static struct ethtool_ops bcmgenet_ethtool_ops = {
 static int bcmgenet_power_down(struct bcmgenet_priv *priv,
 				enum bcmgenet_power_mode mode)
 {
+	struct net_device *ndev = priv->dev;
 	int ret = 0;
 	u32 reg;
 
 	switch (mode) {
 	case GENET_POWER_CABLE_SENSE:
-		phy_detach(priv->phydev);
+		phy_detach(ndev->phydev);
 		break;
 
 	case GENET_POWER_WOL_MAGIC:
@@ -1068,7 +1063,6 @@ static void bcmgenet_power_up(struct bcmgenet_priv *priv,
 /* ioctl handle special commands that are not present in ethtool. */
 static int bcmgenet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
-	struct bcmgenet_priv *priv = netdev_priv(dev);
 	int val = 0;
 
 	if (!netif_running(dev))
@@ -1078,10 +1072,10 @@ static int bcmgenet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 	case SIOCGMIIPHY:
 	case SIOCGMIIREG:
 	case SIOCSMIIREG:
-		if (!priv->phydev)
+		if (!dev->phydev)
 			val = -ENODEV;
 		else
-			val = phy_mii_ioctl(priv->phydev, rq, cmd);
+			val = phy_mii_ioctl(dev->phydev, rq, cmd);
 		break;
 
 	default:
@@ -2464,6 +2458,7 @@ static void bcmgenet_irq_task(struct work_struct *work)
 {
 	struct bcmgenet_priv *priv = container_of(
 			work, struct bcmgenet_priv, bcmgenet_irq_work);
+	struct net_device *ndev = priv->dev;
 
 	netif_dbg(priv, intr, priv->dev, "%s\n", __func__);
 
@@ -2476,7 +2471,7 @@ static void bcmgenet_irq_task(struct work_struct *work)
 
 	/* Link UP/DOWN event */
 	if (priv->irq0_stat & UMAC_IRQ_LINK_EVENT) {
-		phy_mac_interrupt(priv->phydev,
+		phy_mac_interrupt(ndev->phydev,
 				  !!(priv->irq0_stat & UMAC_IRQ_LINK_UP));
 		priv->irq0_stat &= ~UMAC_IRQ_LINK_EVENT;
 	}
@@ -2838,7 +2833,7 @@ static void bcmgenet_netif_start(struct net_device *dev)
 	/* Monitor link interrupts now */
 	bcmgenet_link_intr_enable(priv);
 
-	phy_start(priv->phydev);
+	phy_start(dev->phydev);
 }
 
 static int bcmgenet_open(struct net_device *dev)
@@ -2937,7 +2932,7 @@ static void bcmgenet_netif_stop(struct net_device *dev)
 	struct bcmgenet_priv *priv = netdev_priv(dev);
 
 	netif_tx_stop_all_queues(dev);
-	phy_stop(priv->phydev);
+	phy_stop(dev->phydev);
 	bcmgenet_intr_disable(priv);
 	bcmgenet_disable_rx_napi(priv);
 	bcmgenet_disable_tx_napi(priv);
@@ -2963,7 +2958,7 @@ static int bcmgenet_close(struct net_device *dev)
 	bcmgenet_netif_stop(dev);
 
 	/* Really kill the PHY state machine and disconnect from it */
-	phy_disconnect(priv->phydev);
+	phy_disconnect(dev->phydev);
 
 	/* Disable MAC receive */
 	umac_enable_set(priv, CMD_RX_EN, false);
@@ -3522,7 +3517,7 @@ static int bcmgenet_suspend(struct device *d)
 
 	bcmgenet_netif_stop(dev);
 
-	phy_suspend(priv->phydev);
+	phy_suspend(dev->phydev);
 
 	netif_device_detach(dev);
 
@@ -3586,7 +3581,7 @@ static int bcmgenet_resume(struct device *d)
 	if (priv->wolopts)
 		clk_disable_unprepare(priv->clk_wol);
 
-	phy_init_hw(priv->phydev);
+	phy_init_hw(dev->phydev);
 	/* Speed settings must be restored */
 	bcmgenet_mii_config(priv->dev);
 
@@ -3619,7 +3614,7 @@ static int bcmgenet_resume(struct device *d)
 
 	netif_device_attach(dev);
 
-	phy_resume(priv->phydev);
+	phy_resume(dev->phydev);
 
 	if (priv->eee.eee_enabled)
 		bcmgenet_eee_enable_set(dev, true);
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index 1e2dc34d331a..0f0868c56f05 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -597,7 +597,6 @@ struct bcmgenet_priv {
 
 	/* MDIO bus variables */
 	wait_queue_head_t wq;
-	struct phy_device *phydev;
 	bool internal_phy;
 	struct device_node *phy_dn;
 	struct device_node *mdio_dn;
diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c
index 457c3bc8cfff..e907acd81da9 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
@@ -86,7 +86,7 @@ static int bcmgenet_mii_write(struct mii_bus *bus, int phy_id,
 void bcmgenet_mii_setup(struct net_device *dev)
 {
 	struct bcmgenet_priv *priv = netdev_priv(dev);
-	struct phy_device *phydev = priv->phydev;
+	struct phy_device *phydev = dev->phydev;
 	u32 reg, cmd_bits = 0;
 	bool status_changed = false;
 
@@ -183,9 +183,9 @@ void bcmgenet_mii_reset(struct net_device *dev)
 	if (GENET_IS_V4(priv))
 		return;
 
-	if (priv->phydev) {
-		phy_init_hw(priv->phydev);
-		phy_start_aneg(priv->phydev);
+	if (dev->phydev) {
+		phy_init_hw(dev->phydev);
+		phy_start_aneg(dev->phydev);
 	}
 }
 
@@ -236,6 +236,7 @@ static void bcmgenet_internal_phy_setup(struct net_device *dev)
 
 static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv)
 {
+	struct net_device *ndev = priv->dev;
 	u32 reg;
 
 	/* Speed settings are set in bcmgenet_mii_setup() */
@@ -244,14 +245,14 @@ static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv)
 	bcmgenet_sys_writel(priv, reg, SYS_PORT_CTRL);
 
 	if (priv->hw_params->flags & GENET_HAS_MOCA_LINK_DET)
-		fixed_phy_set_link_update(priv->phydev,
+		fixed_phy_set_link_update(ndev->phydev,
 					  bcmgenet_fixed_phy_link_update);
 }
 
 int bcmgenet_mii_config(struct net_device *dev)
 {
 	struct bcmgenet_priv *priv = netdev_priv(dev);
-	struct phy_device *phydev = priv->phydev;
+	struct phy_device *phydev = dev->phydev;
 	struct device *kdev = &priv->pdev->dev;
 	const char *phy_name = NULL;
 	u32 id_mode_dis = 0;
@@ -302,7 +303,7 @@ int bcmgenet_mii_config(struct net_device *dev)
 		 * capabilities, use that knowledge to also configure the
 		 * Reverse MII interface correctly.
 		 */
-		if ((priv->phydev->supported & PHY_BASIC_FEATURES) ==
+		if ((phydev->supported & PHY_BASIC_FEATURES) ==
 				PHY_BASIC_FEATURES)
 			port_ctrl = PORT_MODE_EXT_RVMII_25;
 		else
@@ -371,7 +372,7 @@ int bcmgenet_mii_probe(struct net_device *dev)
 			return -ENODEV;
 		}
 	} else {
-		phydev = priv->phydev;
+		phydev = dev->phydev;
 		phydev->dev_flags = phy_flags;
 
 		ret = phy_connect_direct(dev, phydev, bcmgenet_mii_setup,
@@ -382,8 +383,6 @@ int bcmgenet_mii_probe(struct net_device *dev)
 		}
 	}
 
-	priv->phydev = phydev;
-
 	/* Configure port multiplexer based on what the probed PHY device since
 	 * reading the 'max-speed' property determines the maximum supported
 	 * PHY speed which is needed for bcmgenet_mii_config() to configure
@@ -391,7 +390,7 @@ int bcmgenet_mii_probe(struct net_device *dev)
 	 */
 	ret = bcmgenet_mii_config(dev);
 	if (ret) {
-		phy_disconnect(priv->phydev);
+		phy_disconnect(phydev);
 		return ret;
 	}
 
@@ -401,7 +400,7 @@ int bcmgenet_mii_probe(struct net_device *dev)
 	 * Ethernet MAC ISRs
 	 */
 	if (priv->internal_phy)
-		priv->phydev->irq = PHY_IGNORE_INTERRUPT;
+		phydev->irq = PHY_IGNORE_INTERRUPT;
 
 	return 0;
 }
@@ -606,7 +605,6 @@ static int bcmgenet_mii_pd_init(struct bcmgenet_priv *priv)
 
 	}
 
-	priv->phydev = phydev;
 	priv->phy_interface = pd->phy_interface;
 
 	return 0;
diff --git a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c
index d35864ada9a3..c03d37016a48 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c
@@ -19,26 +19,16 @@
 * This file may also be available under a different license from Cavium.
 * Contact Cavium, Inc. for more information
 **********************************************************************/
-#include <linux/version.h>
-#include <linux/types.h>
-#include <linux/list.h>
-#include <linux/interrupt.h>
 #include <linux/pci.h>
-#include <linux/kthread.h>
 #include <linux/netdevice.h>
-#include "octeon_config.h"
 #include "liquidio_common.h"
 #include "octeon_droq.h"
 #include "octeon_iq.h"
 #include "response_manager.h"
 #include "octeon_device.h"
-#include "octeon_nic.h"
 #include "octeon_main.h"
-#include "octeon_network.h"
 #include "cn66xx_regs.h"
 #include "cn66xx_device.h"
-#include "liquidio_image.h"
-#include "octeon_mem_ops.h"
 
 int lio_cn6xxx_soft_reset(struct octeon_device *oct)
 {
@@ -74,9 +64,9 @@ void lio_cn6xxx_enable_error_reporting(struct octeon_device *oct)
 	u32 val;
 
 	pci_read_config_dword(oct->pci_dev, CN6XXX_PCIE_DEVCTL, &val);
-	if (val & 0x000f0000) {
+	if (val & 0x000c0000) {
 		dev_err(&oct->pci_dev->dev, "PCI-E Link error detected: 0x%08x\n",
-			val & 0x000f0000);
+			val & 0x000c0000);
 	}
 
 	val |= 0xf;          /* Enable Link error reporting */
@@ -229,7 +219,7 @@ void lio_cn6xxx_setup_global_output_regs(struct octeon_device *oct)
 	/* / Select Packet count instead of bytes for SLI_PKTi_CNTS[CNT] */
 	octeon_write_csr(oct, CN6XXX_SLI_PKT_OUT_BMODE, 0);
 
-	/* / Select ES,RO,NS setting from register for Output Queue Packet
+	/* Select ES, RO, NS setting from register for Output Queue Packet
 	 * Address
 	 */
 	octeon_write_csr(oct, CN6XXX_SLI_PKT_DPADDR, 0xFFFFFFFF);
@@ -547,14 +537,14 @@ static void lio_cn6xxx_get_pcie_qlmport(struct octeon_device *oct)
 	dev_dbg(&oct->pci_dev->dev, "Using PCIE Port %d\n", oct->pcie_port);
 }
 
-void
+static void
 lio_cn6xxx_process_pcie_error_intr(struct octeon_device *oct, u64 intr64)
 {
 	dev_err(&oct->pci_dev->dev, "Error Intr: 0x%016llx\n",
 		CVM_CAST64(intr64));
 }
 
-int lio_cn6xxx_process_droq_intr_regs(struct octeon_device *oct)
+static int lio_cn6xxx_process_droq_intr_regs(struct octeon_device *oct)
 {
 	struct octeon_droq *droq;
 	int oq_no;
@@ -579,7 +569,7 @@ int lio_cn6xxx_process_droq_intr_regs(struct octeon_device *oct)
 			continue;
 
 		droq = oct->droq[oq_no];
-		pkt_count = octeon_droq_check_hw_for_pkts(oct, droq);
+		pkt_count = octeon_droq_check_hw_for_pkts(droq);
 		if (pkt_count) {
 			oct->droq_intr |= (1ULL << oq_no);
 			if (droq->ops.poll_mode) {
diff --git a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.h b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.h
index fe2932cb7ed8..28c47224221a 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.h
+++ b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.h
@@ -82,8 +82,6 @@ void lio_cn6xxx_setup_iq_regs(struct octeon_device *oct, u32 iq_no);
 void lio_cn6xxx_setup_oq_regs(struct octeon_device *oct, u32 oq_no);
 void lio_cn6xxx_enable_io_queues(struct octeon_device *oct);
 void lio_cn6xxx_disable_io_queues(struct octeon_device *oct);
-void lio_cn6xxx_process_pcie_error_intr(struct octeon_device *oct, u64 intr64);
-int lio_cn6xxx_process_droq_intr_regs(struct octeon_device *oct);
 irqreturn_t lio_cn6xxx_process_interrupt_regs(void *dev);
 void lio_cn6xxx_reinit_regs(struct octeon_device *oct);
 void lio_cn6xxx_bar1_idx_setup(struct octeon_device *oct, u64 core_addr,
diff --git a/drivers/net/ethernet/cavium/liquidio/cn68xx_device.c b/drivers/net/ethernet/cavium/liquidio/cn68xx_device.c
index 8e830d0c0754..29755bc68f12 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn68xx_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/cn68xx_device.c
@@ -19,28 +19,17 @@
 * This file may also be available under a different license from Cavium.
 * Contact Cavium, Inc. for more information
 **********************************************************************/
-#include <linux/version.h>
-#include <linux/types.h>
-#include <linux/list.h>
-#include <linux/interrupt.h>
 #include <linux/pci.h>
-#include <linux/kthread.h>
 #include <linux/netdevice.h>
-#include "octeon_config.h"
 #include "liquidio_common.h"
 #include "octeon_droq.h"
 #include "octeon_iq.h"
 #include "response_manager.h"
 #include "octeon_device.h"
-#include "octeon_nic.h"
 #include "octeon_main.h"
-#include "octeon_network.h"
 #include "cn66xx_regs.h"
 #include "cn66xx_device.h"
 #include "cn68xx_regs.h"
-#include "cn68xx_device.h"
-#include "liquidio_image.h"
-#include "octeon_mem_ops.h"
 
 static void lio_cn68xx_set_dpi_regs(struct octeon_device *oct)
 {
@@ -129,7 +118,7 @@ static inline void lio_cn68xx_vendor_message_fix(struct octeon_device *oct)
 	pci_write_config_dword(oct->pci_dev, CN6XXX_PCIE_FLTMSK, val);
 }
 
-int lio_is_210nv(struct octeon_device *oct)
+static int lio_is_210nv(struct octeon_device *oct)
 {
 	u64 mio_qlm4_cfg = lio_pci_readq(oct, CN6XXX_MIO_QLM4_CFG);
 
diff --git a/drivers/net/ethernet/cavium/liquidio/cn68xx_device.h b/drivers/net/ethernet/cavium/liquidio/cn68xx_device.h
index d4e1c9fb0bf2..ea7bdcce6044 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn68xx_device.h
+++ b/drivers/net/ethernet/cavium/liquidio/cn68xx_device.h
@@ -28,6 +28,5 @@
 #define  __CN68XX_DEVICE_H__
 
 int lio_setup_cn68xx_octeon_device(struct octeon_device *oct);
-int lio_is_210nv(struct octeon_device *oct);
 
 #endif
diff --git a/drivers/net/ethernet/cavium/liquidio/cn68xx_regs.h b/drivers/net/ethernet/cavium/liquidio/cn68xx_regs.h
index 38cddbd107b6..d45a0f4aaf1f 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn68xx_regs.h
+++ b/drivers/net/ethernet/cavium/liquidio/cn68xx_regs.h
@@ -29,7 +29,6 @@
 
 #ifndef __CN68XX_REGS_H__
 #define __CN68XX_REGS_H__
-#include "cn66xx_regs.h"
 
 /*###################### REQUEST QUEUE #########################*/
 
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
index 03bfa9771e4d..289eb8907922 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
@@ -19,13 +19,9 @@
 * This file may also be available under a different license from Cavium.
 * Contact Cavium, Inc. for more information
 **********************************************************************/
-#include <linux/version.h>
 #include <linux/netdevice.h>
 #include <linux/net_tstamp.h>
-#include <linux/ethtool.h>
-#include <linux/dma-mapping.h>
 #include <linux/pci.h>
-#include "octeon_config.h"
 #include "liquidio_common.h"
 #include "octeon_droq.h"
 #include "octeon_iq.h"
@@ -36,9 +32,6 @@
 #include "octeon_network.h"
 #include "cn66xx_regs.h"
 #include "cn66xx_device.h"
-#include "cn68xx_regs.h"
-#include "cn68xx_device.h"
-#include "liquidio_image.h"
 
 static int octnet_get_link_stats(struct net_device *netdev);
 
@@ -106,6 +99,7 @@ static const char oct_stats_strings[][ETH_GSTRING_LEN] = {
 	"tx_tso",
 	"tx_tso_packets",
 	"tx_tso_err",
+	"tx_vxlan",
 
 	"mac_tx_total_pkts",
 	"mac_tx_total_bytes",
@@ -129,6 +123,9 @@ static const char oct_stats_strings[][ETH_GSTRING_LEN] = {
 	"rx_err_link",
 	"rx_err_drop",
 
+	"rx_vxlan",
+	"rx_vxlan_err",
+
 	"rx_lro_pkts",
 	"rx_lro_bytes",
 	"rx_total_lro",
@@ -167,6 +164,7 @@ static const char oct_iq_stats_strings[][ETH_GSTRING_LEN] = {
 	"fw_bytes_sent",
 
 	"tso",
+	"vxlan",
 	"txq_restart",
 };
 
@@ -186,6 +184,7 @@ static const char oct_droq_stats_strings[][ETH_GSTRING_LEN] = {
 	"fw_bytes_received",
 	"fw_dropped_nodispatch",
 
+	"vxlan",
 	"buffer_alloc_failure",
 };
 
@@ -340,20 +339,18 @@ static void octnet_mdio_resp_callback(struct octeon_device *oct,
 				      u32 status,
 				      void *buf)
 {
-	struct oct_mdio_cmd_resp *mdio_cmd_rsp;
 	struct oct_mdio_cmd_context *mdio_cmd_ctx;
 	struct octeon_soft_command *sc = (struct octeon_soft_command *)buf;
 
-	mdio_cmd_rsp = (struct oct_mdio_cmd_resp *)sc->virtrptr;
 	mdio_cmd_ctx = (struct oct_mdio_cmd_context *)sc->ctxptr;
 
 	oct = lio_get_device(mdio_cmd_ctx->octeon_id);
 	if (status) {
 		dev_err(&oct->pci_dev->dev, "MIDO instruction failed. Status: %llx\n",
 			CVM_CAST64(status));
-		ACCESS_ONCE(mdio_cmd_ctx->cond) = -1;
+		WRITE_ONCE(mdio_cmd_ctx->cond, -1);
 	} else {
-		ACCESS_ONCE(mdio_cmd_ctx->cond) = 1;
+		WRITE_ONCE(mdio_cmd_ctx->cond, 1);
 	}
 	wake_up_interruptible(&mdio_cmd_ctx->wc);
 }
@@ -384,7 +381,7 @@ octnet_mdio45_access(struct lio *lio, int op, int loc, int *value)
 	mdio_cmd_rsp = (struct oct_mdio_cmd_resp *)sc->virtrptr;
 	mdio_cmd = (struct oct_mdio_cmd *)sc->virtdptr;
 
-	ACCESS_ONCE(mdio_cmd_ctx->cond) = 0;
+	WRITE_ONCE(mdio_cmd_ctx->cond, 0);
 	mdio_cmd_ctx->octeon_id = lio_get_device_id(oct_dev);
 	mdio_cmd->op = op;
 	mdio_cmd->mdio_addr = loc;
@@ -423,7 +420,7 @@ octnet_mdio45_access(struct lio *lio, int op, int loc, int *value)
 			octeon_swap_8B_data((u64 *)(&mdio_cmd_rsp->resp),
 					    sizeof(struct oct_mdio_cmd) / 8);
 
-			if (ACCESS_ONCE(mdio_cmd_ctx->cond) == 1) {
+			if (READ_ONCE(mdio_cmd_ctx->cond) == 1) {
 				if (!op)
 					*value = mdio_cmd_rsp->resp.value1;
 			} else {
@@ -467,18 +464,16 @@ static int lio_set_phys_id(struct net_device *netdev,
 
 			/* Configure Beacon values */
 			value = LIO68XX_LED_BEACON_CFGON;
-			ret =
-				octnet_mdio45_access(lio, 1,
-						     LIO68XX_LED_BEACON_ADDR,
-						     &value);
+			ret = octnet_mdio45_access(lio, 1,
+						   LIO68XX_LED_BEACON_ADDR,
+						   &value);
 			if (ret)
 				return ret;
 
 			value = LIO68XX_LED_CTRL_CFGON;
-			ret =
-				octnet_mdio45_access(lio, 1,
-						     LIO68XX_LED_CTRL_ADDR,
-						     &value);
+			ret = octnet_mdio45_access(lio, 1,
+						   LIO68XX_LED_CTRL_ADDR,
+						   &value);
 			if (ret)
 				return ret;
 		} else {
@@ -557,7 +552,7 @@ lio_ethtool_get_ringparam(struct net_device *netdev,
 		tx_pending = CFG_GET_NUM_TX_DESCS_NIC_IF(conf6x, lio->ifidx);
 	}
 
-	if (lio->mtu > OCTNET_DEFAULT_FRM_SIZE) {
+	if (lio->mtu > OCTNET_DEFAULT_FRM_SIZE - OCTNET_FRM_HEADER_SIZE) {
 		ering->rx_pending = 0;
 		ering->rx_max_pending = 0;
 		ering->rx_mini_pending = 0;
@@ -617,7 +612,8 @@ lio_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
 
 static void
 lio_get_ethtool_stats(struct net_device *netdev,
-		      struct ethtool_stats *stats, u64 *data)
+		      struct ethtool_stats *stats  __attribute__((unused)),
+		      u64 *data)
 {
 	struct lio *lio = GET_LIO(netdev);
 	struct octeon_device *oct_dev = lio->oct_dev;
@@ -675,6 +671,10 @@ lio_get_ethtool_stats(struct net_device *netdev,
 	 *fw_err_tso
 	 */
 	data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.fw_err_tso);
+	/*per_core_stats[cvmx_get_core_num()].link_stats[idx].fromhost.
+	 *fw_tx_vxlan
+	 */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.fw_tx_vxlan);
 
 	/* mac tx statistics */
 	/*CVMX_BGXX_CMRX_TX_STAT5 */
@@ -729,6 +729,15 @@ lio_get_ethtool_stats(struct net_device *netdev,
 	 */
 	data[i++] = CVM_CAST64(oct_dev->link_stats.fromwire.fw_err_drop);
 
+	/*per_core_stats[cvmx_get_core_num()].link_stats[lro_ctx->ifidx].
+	 *fromwire.fw_rx_vxlan
+	 */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromwire.fw_rx_vxlan);
+	/*per_core_stats[cvmx_get_core_num()].link_stats[lro_ctx->ifidx].
+	 *fromwire.fw_rx_vxlan_err
+	 */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromwire.fw_rx_vxlan_err);
+
 	/* LRO */
 	/*per_core_stats[cvmx_get_core_num()].link_stats[ifidx].fromwire.
 	 *fw_lro_pkts
@@ -822,6 +831,8 @@ lio_get_ethtool_stats(struct net_device *netdev,
 
 		/*tso request*/
 		data[i++] = CVM_CAST64(oct_dev->instr_queue[j]->stats.tx_gso);
+		/*vxlan request*/
+		data[i++] = CVM_CAST64(oct_dev->instr_queue[j]->stats.tx_vxlan);
 		/*txq restart*/
 		data[i++] =
 			CVM_CAST64(oct_dev->instr_queue[j]->stats.tx_restart);
@@ -858,6 +869,9 @@ lio_get_ethtool_stats(struct net_device *netdev,
 			CVM_CAST64(oct_dev->droq[j]->stats.bytes_received);
 		data[i++] =
 			CVM_CAST64(oct_dev->droq[j]->stats.dropped_nodispatch);
+
+		data[i++] =
+			CVM_CAST64(oct_dev->droq[j]->stats.rx_vxlan);
 		data[i++] =
 			CVM_CAST64(oct_dev->droq[j]->stats.rx_alloc_failure);
 	}
@@ -945,7 +959,6 @@ static int lio_get_intr_coalesce(struct net_device *netdev,
 			intr_coal->rx_max_coalesced_frames =
 				CFG_GET_OQ_INTR_PKT(cn6xxx->conf);
 		}
-
 		iq = oct->instr_queue[lio->linfo.txpciq[0].s.q_no];
 		intr_coal->tx_max_coalesced_frames = iq->fill_threshold;
 		break;
@@ -1043,7 +1056,7 @@ static int octnet_set_intrmod_cfg(struct lio *lio,
 	return 0;
 }
 
-void
+static void
 octnet_nic_stats_callback(struct octeon_device *oct_dev,
 			  u32 status, void *ptr)
 {
@@ -1083,6 +1096,9 @@ octnet_nic_stats_callback(struct octeon_device *oct_dev,
 		rstats->fw_err_pko = rsp_rstats->fw_err_pko;
 		rstats->fw_err_link = rsp_rstats->fw_err_link;
 		rstats->fw_err_drop = rsp_rstats->fw_err_drop;
+		rstats->fw_rx_vxlan = rsp_rstats->fw_rx_vxlan;
+		rstats->fw_rx_vxlan_err = rsp_rstats->fw_rx_vxlan_err;
+
 		/* Number of packets that are LROed      */
 		rstats->fw_lro_pkts = rsp_rstats->fw_lro_pkts;
 		/* Number of octets that are LROed       */
@@ -1127,6 +1143,8 @@ octnet_nic_stats_callback(struct octeon_device *oct_dev,
 		tstats->fw_tso = rsp_tstats->fw_tso;
 		tstats->fw_tso_fwd = rsp_tstats->fw_tso_fwd;
 		tstats->fw_err_tso = rsp_tstats->fw_err_tso;
+		tstats->fw_tx_vxlan = rsp_tstats->fw_tx_vxlan;
+
 		resp->status = 1;
 	} else {
 		resp->status = -1;
@@ -1523,7 +1541,7 @@ static int lio_nway_reset(struct net_device *netdev)
 }
 
 /* Return register dump len. */
-static int lio_get_regs_len(struct net_device *dev)
+static int lio_get_regs_len(struct net_device *dev __attribute__((unused)))
 {
 	return OCT_ETHTOOL_REGDUMP_LEN;
 }
@@ -1667,13 +1685,12 @@ static void lio_get_regs(struct net_device *dev,
 	int len = 0;
 	struct octeon_device *oct = lio->oct_dev;
 
-	memset(regbuf, 0, OCT_ETHTOOL_REGDUMP_LEN);
 	regs->version = OCT_ETHTOOL_REGSVER;
 
 	switch (oct->chip_id) {
-	/* case OCTEON_CN73XX: Todo */
 	case OCTEON_CN68XX:
 	case OCTEON_CN66XX:
+		memset(regbuf, 0, OCT_ETHTOOL_REGDUMP_LEN);
 		len += cn6xxx_read_csr_reg(regbuf + len, oct);
 		len += cn6xxx_read_config_reg(regbuf + len, oct);
 		break;
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 1a584ebde42c..20d6942edf40 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -20,24 +20,12 @@
 * Contact Cavium, Inc. for more information
 **********************************************************************/
 #include <linux/version.h>
-#include <linux/module.h>
-#include <linux/crc32.h>
-#include <linux/dma-mapping.h>
 #include <linux/pci.h>
-#include <linux/pci_ids.h>
-#include <linux/ip.h>
-#include <net/ip.h>
-#include <linux/ipv6.h>
 #include <linux/net_tstamp.h>
 #include <linux/if_vlan.h>
 #include <linux/firmware.h>
-#include <linux/ethtool.h>
 #include <linux/ptp_clock_kernel.h>
-#include <linux/types.h>
-#include <linux/list.h>
-#include <linux/workqueue.h>
-#include <linux/interrupt.h>
-#include "octeon_config.h"
+#include <net/vxlan.h>
 #include "liquidio_common.h"
 #include "octeon_droq.h"
 #include "octeon_iq.h"
@@ -48,7 +36,6 @@
 #include "octeon_network.h"
 #include "cn66xx_regs.h"
 #include "cn66xx_device.h"
-#include "cn68xx_regs.h"
 #include "cn68xx_device.h"
 #include "liquidio_image.h"
 
@@ -251,8 +238,7 @@ static int lio_wait_for_oq_pkts(struct octeon_device *oct)
 		for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES(oct); i++) {
 			if (!(oct->io_qmask.oq & (1ULL << i)))
 				continue;
-			pkt_cnt += octeon_droq_check_hw_for_pkts(oct,
-								 oct->droq[i]);
+			pkt_cnt += octeon_droq_check_hw_for_pkts(oct->droq[i]);
 		}
 		if (pkt_cnt > 0) {
 			pending_pkts += pkt_cnt;
@@ -507,7 +493,8 @@ static pci_ers_result_t liquidio_pcie_error_detected(struct pci_dev *pdev,
  * \brief mmio handler
  * @param pdev Pointer to PCI device
  */
-static pci_ers_result_t liquidio_pcie_mmio_enabled(struct pci_dev *pdev)
+static pci_ers_result_t liquidio_pcie_mmio_enabled(
+				struct pci_dev *pdev __attribute__((unused)))
 {
 	/* We should never hit this since we never ask for a reset for a Fatal
 	 * Error. We always return DISCONNECT in io_error above.
@@ -523,7 +510,8 @@ static pci_ers_result_t liquidio_pcie_mmio_enabled(struct pci_dev *pdev)
  * Restart the card from scratch, as if from a cold-boot. Implementation
  * resembles the first-half of the octeon_resume routine.
  */
-static pci_ers_result_t liquidio_pcie_slot_reset(struct pci_dev *pdev)
+static pci_ers_result_t liquidio_pcie_slot_reset(
+				struct pci_dev *pdev __attribute__((unused)))
 {
 	/* We should never hit this since we never ask for a reset for a Fatal
 	 * Error. We always return DISCONNECT in io_error above.
@@ -540,7 +528,7 @@ static pci_ers_result_t liquidio_pcie_slot_reset(struct pci_dev *pdev)
  * its OK to resume normal operation. Implementation resembles the
  * second-half of the octeon_resume routine.
  */
-static void liquidio_pcie_resume(struct pci_dev *pdev)
+static void liquidio_pcie_resume(struct pci_dev *pdev __attribute__((unused)))
 {
 	/* Nothing to be done here. */
 }
@@ -551,7 +539,8 @@ static void liquidio_pcie_resume(struct pci_dev *pdev)
  * @param pdev Pointer to PCI device
  * @param state state to suspend to
  */
-static int liquidio_suspend(struct pci_dev *pdev, pm_message_t state)
+static int liquidio_suspend(struct pci_dev *pdev __attribute__((unused)),
+			    pm_message_t state __attribute__((unused)))
 {
 	return 0;
 }
@@ -560,7 +549,7 @@ static int liquidio_suspend(struct pci_dev *pdev, pm_message_t state)
  * \brief called when resuming
  * @param pdev Pointer to PCI device
  */
-static int liquidio_resume(struct pci_dev *pdev)
+static int liquidio_resume(struct pci_dev *pdev __attribute__((unused)))
 {
 	return 0;
 }
@@ -1104,7 +1093,9 @@ static int octeon_setup_interrupt(struct octeon_device *oct)
  * @param pdev PCI device structure
  * @param ent unused
  */
-static int liquidio_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+static int
+liquidio_probe(struct pci_dev *pdev,
+	       const struct pci_device_id *ent __attribute__((unused)))
 {
 	struct octeon_device *oct_dev = NULL;
 	struct handshake *hs;
@@ -1267,7 +1258,7 @@ static void octeon_destroy_resources(struct octeon_device *oct)
 
 		/* Nothing to be done here either */
 		break;
-	}                       /* end switch(oct->status) */
+	}                       /* end switch (oct->status) */
 
 	tasklet_kill(&oct_priv->droq_tasklet);
 }
@@ -1724,8 +1715,10 @@ static int liquidio_ptp_settime(struct ptp_clock_info *ptp,
  * @param rq request
  * @param on is it on
  */
-static int liquidio_ptp_enable(struct ptp_clock_info *ptp,
-			       struct ptp_clock_request *rq, int on)
+static int
+liquidio_ptp_enable(struct ptp_clock_info *ptp __attribute__((unused)),
+		    struct ptp_clock_request *rq __attribute__((unused)),
+		    int on __attribute__((unused)))
 {
 	return -EOPNOTSUPP;
 }
@@ -1866,7 +1859,7 @@ static int octeon_setup_droq(struct octeon_device *oct, int q_no, int num_descs,
  * @param buf pointer to resp structure
  */
 static void if_cfg_callback(struct octeon_device *oct,
-			    u32 status,
+			    u32 status __attribute__((unused)),
 			    void *buf)
 {
 	struct octeon_soft_command *sc = (struct octeon_soft_command *)buf;
@@ -1880,7 +1873,7 @@ static void if_cfg_callback(struct octeon_device *oct,
 	if (resp->status)
 		dev_err(&oct->pci_dev->dev, "nic if cfg instruction failed. Status: %llx\n",
 			CVM_CAST64(resp->status));
-	ACCESS_ONCE(ctx->cond) = 1;
+	WRITE_ONCE(ctx->cond, 1);
 
 	snprintf(oct->fw_info.liquidio_firmware_version, 32, "%s",
 		 resp->cfg_info.liquidio_firmware_version);
@@ -1900,7 +1893,8 @@ static void if_cfg_callback(struct octeon_device *oct,
  * @returns selected queue number
  */
 static u16 select_q(struct net_device *dev, struct sk_buff *skb,
-		    void *accel_priv, select_queue_fallback_t fallback)
+		    void *accel_priv __attribute__((unused)),
+		    select_queue_fallback_t fallback __attribute__((unused)))
 {
 	u32 qindex = 0;
 	struct lio *lio;
@@ -1920,7 +1914,7 @@ static u16 select_q(struct net_device *dev, struct sk_buff *skb,
  * @param arg	   - farg registered in droq_ops
  */
 static void
-liquidio_push_packet(u32 octeon_id,
+liquidio_push_packet(u32 octeon_id __attribute__((unused)),
 		     void *skbuff,
 		     u32 len,
 		     union octeon_rh *rh,
@@ -2000,14 +1994,25 @@ liquidio_push_packet(u32 octeon_id,
 		}
 
 		skb->protocol = eth_type_trans(skb, skb->dev);
-
 		if ((netdev->features & NETIF_F_RXCSUM) &&
-		    (rh->r_dh.csum_verified == CNNIC_CSUM_VERIFIED))
+		    (((rh->r_dh.encap_on) &&
+		      (rh->r_dh.csum_verified & CNNIC_TUN_CSUM_VERIFIED)) ||
+		     (!(rh->r_dh.encap_on) &&
+		      (rh->r_dh.csum_verified & CNNIC_CSUM_VERIFIED))))
 			/* checksum has already been verified */
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 		else
 			skb->ip_summed = CHECKSUM_NONE;
 
+		/* Setting Encapsulation field on basis of status received
+		 * from the firmware
+		 */
+		if (rh->r_dh.encap_on) {
+			skb->encapsulation = 1;
+			skb->csum_level = 1;
+			droq->stats.rx_vxlan++;
+		}
+
 		/* inbound VLAN tag */
 		if ((netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
 		    (rh->r_dh.vlan != 0)) {
@@ -2120,7 +2125,7 @@ static int liquidio_napi_poll(struct napi_struct *napi, int budget)
 /**
  * \brief Setup input and output queues
  * @param octeon_dev octeon device
- * @param net_device Net device
+ * @param ifidx  Interface Index
  *
  * Note: Queues are with respect to the octeon device. Thus
  * an input queue is for egress packets, and output queues
@@ -2331,7 +2336,6 @@ static int liquidio_stop(struct net_device *netdev)
 	}
 
 	dev_info(&oct->pci_dev->dev, "%s interface is stopped\n", netdev->name);
-	module_put(THIS_MODULE);
 
 	return 0;
 }
@@ -2342,6 +2346,7 @@ void liquidio_link_ctrl_cmd_completion(void *nctrl_ptr)
 	struct net_device *netdev = (struct net_device *)nctrl->netpndev;
 	struct lio *lio = GET_LIO(netdev);
 	struct octeon_device *oct = lio->oct_dev;
+	u8 *mac;
 
 	switch (nctrl->ncmd.s.cmd) {
 	case OCTNET_CMD_CHANGE_DEVFLAGS:
@@ -2349,22 +2354,24 @@ void liquidio_link_ctrl_cmd_completion(void *nctrl_ptr)
 		break;
 
 	case OCTNET_CMD_CHANGE_MACADDR:
-		/* If command is successful, change the MACADDR. */
-		netif_info(lio, probe, lio->netdev, " MACAddr changed to 0x%llx\n",
-			   CVM_CAST64(nctrl->udd[0]));
-		dev_info(&oct->pci_dev->dev, "%s MACAddr changed to 0x%llx\n",
-			 netdev->name, CVM_CAST64(nctrl->udd[0]));
-		memcpy(netdev->dev_addr, ((u8 *)&nctrl->udd[0]) + 2, ETH_ALEN);
+		mac = ((u8 *)&nctrl->udd[0]) + 2;
+		netif_info(lio, probe, lio->netdev,
+			   "%s %2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x\n",
+			   "MACAddr changed to", mac[0], mac[1],
+			   mac[2], mac[3], mac[4], mac[5]);
 		break;
 
 	case OCTNET_CMD_CHANGE_MTU:
 		/* If command is successful, change the MTU. */
 		netif_info(lio, probe, lio->netdev, " MTU Changed from %d to %d\n",
-			   netdev->mtu, nctrl->ncmd.s.param2);
+			   netdev->mtu, nctrl->ncmd.s.param1);
 		dev_info(&oct->pci_dev->dev, "%s MTU Changed from %d to %d\n",
 			 netdev->name, netdev->mtu,
-			 nctrl->ncmd.s.param2);
-		netdev->mtu = nctrl->ncmd.s.param2;
+			 nctrl->ncmd.s.param1);
+		rtnl_lock();
+		netdev->mtu = nctrl->ncmd.s.param1;
+		call_netdevice_notifiers(NETDEV_CHANGEMTU, netdev);
+		rtnl_unlock();
 		break;
 
 	case OCTNET_CMD_GPIO_ACCESS:
@@ -2410,6 +2417,55 @@ void liquidio_link_ctrl_cmd_completion(void *nctrl_ptr)
 			 netdev->name);
 
 		break;
+		/* Case to handle "OCTNET_CMD_TNL_RX_CSUM_CTL"
+		 * Command passed by NIC driver
+		 */
+	case OCTNET_CMD_TNL_RX_CSUM_CTL:
+		if (nctrl->ncmd.s.param1 == OCTNET_CMD_RXCSUM_ENABLE) {
+			netif_info(lio, probe, lio->netdev,
+				   "%s RX Checksum Offload Enabled\n",
+				   netdev->name);
+		} else if (nctrl->ncmd.s.param1 ==
+			   OCTNET_CMD_RXCSUM_DISABLE) {
+			netif_info(lio, probe, lio->netdev,
+				   "%s RX Checksum Offload Disabled\n",
+				   netdev->name);
+		}
+		break;
+
+		/* Case to handle "OCTNET_CMD_TNL_TX_CSUM_CTL"
+		 * Command passed by NIC driver
+		 */
+	case OCTNET_CMD_TNL_TX_CSUM_CTL:
+		if (nctrl->ncmd.s.param1 == OCTNET_CMD_TXCSUM_ENABLE) {
+			netif_info(lio, probe, lio->netdev,
+				   "%s TX Checksum Offload Enabled\n",
+				   netdev->name);
+		} else if (nctrl->ncmd.s.param1 ==
+			   OCTNET_CMD_TXCSUM_DISABLE) {
+			netif_info(lio, probe, lio->netdev,
+				   "%s TX Checksum Offload Disabled\n",
+				   netdev->name);
+		}
+		break;
+
+		/* Case to handle "OCTNET_CMD_VXLAN_PORT_CONFIG"
+		 * Command passed by NIC driver
+		 */
+	case OCTNET_CMD_VXLAN_PORT_CONFIG:
+		if (nctrl->ncmd.s.more == OCTNET_CMD_VXLAN_PORT_ADD) {
+			netif_info(lio, probe, lio->netdev,
+				   "%s VxLAN Destination UDP PORT:%d ADDED\n",
+				   netdev->name,
+				   nctrl->ncmd.s.param1);
+		} else if (nctrl->ncmd.s.more ==
+			   OCTNET_CMD_VXLAN_PORT_DEL) {
+			netif_info(lio, probe, lio->netdev,
+				   "%s VxLAN Destination UDP PORT:%d DELETED\n",
+				   netdev->name,
+				   nctrl->ncmd.s.param1);
+		}
+		break;
 
 	case OCTNET_CMD_SET_FLOW_CTL:
 		netif_info(lio, probe, lio->netdev, "Set RX/TX flow control parameters\n");
@@ -2465,7 +2521,7 @@ static void liquidio_set_mcast_list(struct net_device *netdev)
 	struct octnic_ctrl_pkt nctrl;
 	struct netdev_hw_addr *ha;
 	u64 *mc;
-	int ret, i;
+	int ret;
 	int mc_count = min(netdev_mc_count(netdev), MAX_OCTEON_MULTICAST_ADDR);
 
 	memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
@@ -2481,7 +2537,6 @@ static void liquidio_set_mcast_list(struct net_device *netdev)
 	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
 
 	/* copy all the addresses into the udd */
-	i = 0;
 	mc = &nctrl.udd[0];
 	netdev_for_each_mc_addr(ha, netdev) {
 		*mc = 0;
@@ -2604,18 +2659,16 @@ static int liquidio_change_mtu(struct net_device *netdev, int new_mtu)
 	struct lio *lio = GET_LIO(netdev);
 	struct octeon_device *oct = lio->oct_dev;
 	struct octnic_ctrl_pkt nctrl;
-	int max_frm_size = new_mtu + OCTNET_FRM_HEADER_SIZE;
 	int ret = 0;
 
-	/* Limit the MTU to make sure the ethernet packets are between 64 bytes
-	 * and 65535 bytes
+	/* Limit the MTU to make sure the ethernet packets are between 68 bytes
+	 * and 16000 bytes
 	 */
-	if ((max_frm_size < OCTNET_MIN_FRM_SIZE) ||
-	    (max_frm_size > OCTNET_MAX_FRM_SIZE)) {
+	if ((new_mtu < LIO_MIN_MTU_SIZE) ||
+	    (new_mtu > LIO_MAX_MTU_SIZE)) {
 		dev_err(&oct->pci_dev->dev, "Invalid MTU: %d\n", new_mtu);
 		dev_err(&oct->pci_dev->dev, "Valid range %d and %d\n",
-			(OCTNET_MIN_FRM_SIZE - OCTNET_FRM_HEADER_SIZE),
-			(OCTNET_MAX_FRM_SIZE - OCTNET_FRM_HEADER_SIZE));
+			LIO_MIN_MTU_SIZE, LIO_MAX_MTU_SIZE);
 		return -EINVAL;
 	}
 
@@ -2646,7 +2699,7 @@ static int liquidio_change_mtu(struct net_device *netdev, int new_mtu)
  * @param ifr interface request
  * @param cmd command
  */
-static int hwtstamp_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
+static int hwtstamp_ioctl(struct net_device *netdev, struct ifreq *ifr)
 {
 	struct hwtstamp_config conf;
 	struct lio *lio = GET_LIO(netdev);
@@ -2707,7 +2760,7 @@ static int liquidio_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
 {
 	switch (cmd) {
 	case SIOCSHWTSTAMP:
-		return hwtstamp_ioctl(netdev, ifr, cmd);
+		return hwtstamp_ioctl(netdev, ifr);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -2886,12 +2939,12 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 			/* defer sending if queue is full */
 			stats->tx_iq_busy++;
 			netif_info(lio, tx_err, lio->netdev, "Transmit failed iq:%d full\n",
-				   ndata.q_no);
+				   lio->txq);
 			return NETDEV_TX_BUSY;
 		}
 	}
 	/* pr_info(" XMIT - valid Qs: %d, 1st Q no: %d, cpu:  %d, q_no:%d\n",
-	 *	lio->linfo.num_txpciq, lio->txq, cpu, ndata.q_no );
+	 *	lio->linfo.num_txpciq, lio->txq, cpu, ndata.q_no);
 	 */
 
 	ndata.datasize = skb->len;
@@ -2899,9 +2952,14 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 	cmdsetup.u64 = 0;
 	cmdsetup.s.iq_no = iq_no;
 
-	if (skb->ip_summed == CHECKSUM_PARTIAL)
-		cmdsetup.s.transport_csum = 1;
-
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		if (skb->encapsulation) {
+			cmdsetup.s.tnl_csum = 1;
+			stats->tx_vxlan++;
+		} else {
+			cmdsetup.s.transport_csum = 1;
+		}
+	}
 	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
 		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
 		cmdsetup.s.timestamp = 1;
@@ -2910,6 +2968,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 	if (skb_shinfo(skb)->nr_frags == 0) {
 		cmdsetup.s.u.datasize = skb->len;
 		octnet_prepare_pci_cmd(oct, &ndata.cmd, &cmdsetup, tag);
+
 		/* Offload checksum calculation for TCP/UDP packets */
 		dptr = dma_map_single(&oct->pci_dev->dev,
 				      skb->data,
@@ -3124,6 +3183,72 @@ static int liquidio_vlan_rx_kill_vid(struct net_device *netdev,
 	return ret;
 }
 
+/** Sending command to enable/disable RX checksum offload
+ * @param netdev                pointer to network device
+ * @param command               OCTNET_CMD_TNL_RX_CSUM_CTL
+ * @param rx_cmd_bit            OCTNET_CMD_RXCSUM_ENABLE/
+ *                              OCTNET_CMD_RXCSUM_DISABLE
+ * @returns                     SUCCESS or FAILURE
+ */
+int liquidio_set_rxcsum_command(struct net_device *netdev, int command,
+				u8 rx_cmd)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+	struct octnic_ctrl_pkt nctrl;
+	int ret = 0;
+
+	nctrl.ncmd.u64 = 0;
+	nctrl.ncmd.s.cmd = command;
+	nctrl.ncmd.s.param1 = rx_cmd;
+	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
+	nctrl.wait_time = 100;
+	nctrl.netpndev = (u64)netdev;
+	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
+
+	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
+	if (ret < 0) {
+		dev_err(&oct->pci_dev->dev,
+			"DEVFLAGS RXCSUM change failed in core(ret:0x%x)\n",
+			ret);
+	}
+	return ret;
+}
+
+/** Sending command to add/delete VxLAN UDP port to firmware
+ * @param netdev                pointer to network device
+ * @param command               OCTNET_CMD_VXLAN_PORT_CONFIG
+ * @param vxlan_port            VxLAN port to be added or deleted
+ * @param vxlan_cmd_bit         OCTNET_CMD_VXLAN_PORT_ADD,
+ *                              OCTNET_CMD_VXLAN_PORT_DEL
+ * @returns                     SUCCESS or FAILURE
+ */
+static int liquidio_vxlan_port_command(struct net_device *netdev, int command,
+				       u16 vxlan_port, u8 vxlan_cmd_bit)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+	struct octnic_ctrl_pkt nctrl;
+	int ret = 0;
+
+	nctrl.ncmd.u64 = 0;
+	nctrl.ncmd.s.cmd = command;
+	nctrl.ncmd.s.more = vxlan_cmd_bit;
+	nctrl.ncmd.s.param1 = vxlan_port;
+	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
+	nctrl.wait_time = 100;
+	nctrl.netpndev = (u64)netdev;
+	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
+
+	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
+	if (ret < 0) {
+		dev_err(&oct->pci_dev->dev,
+			"VxLAN port add/delete failed in core (ret:0x%x)\n",
+			ret);
+	}
+	return ret;
+}
+
 int liquidio_set_feature(struct net_device *netdev, int cmd, u16 param1)
 {
 	struct lio *lio = GET_LIO(netdev);
@@ -3204,9 +3329,48 @@ static int liquidio_set_features(struct net_device *netdev,
 		liquidio_set_feature(netdev, OCTNET_CMD_LRO_DISABLE,
 				     OCTNIC_LROIPV4 | OCTNIC_LROIPV6);
 
+	/* Sending command to firmware to enable/disable RX checksum
+	 * offload settings using ethtool
+	 */
+	if (!(netdev->features & NETIF_F_RXCSUM) &&
+	    (lio->enc_dev_capability & NETIF_F_RXCSUM) &&
+	    (features & NETIF_F_RXCSUM))
+		liquidio_set_rxcsum_command(netdev,
+					    OCTNET_CMD_TNL_RX_CSUM_CTL,
+					    OCTNET_CMD_RXCSUM_ENABLE);
+	else if ((netdev->features & NETIF_F_RXCSUM) &&
+		 (lio->enc_dev_capability & NETIF_F_RXCSUM) &&
+		 !(features & NETIF_F_RXCSUM))
+		liquidio_set_rxcsum_command(netdev, OCTNET_CMD_TNL_RX_CSUM_CTL,
+					    OCTNET_CMD_RXCSUM_DISABLE);
+
 	return 0;
 }
 
+static void liquidio_add_vxlan_port(struct net_device *netdev,
+				    struct udp_tunnel_info *ti)
+{
+	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
+		return;
+
+	liquidio_vxlan_port_command(netdev,
+				    OCTNET_CMD_VXLAN_PORT_CONFIG,
+				    htons(ti->port),
+				    OCTNET_CMD_VXLAN_PORT_ADD);
+}
+
+static void liquidio_del_vxlan_port(struct net_device *netdev,
+				    struct udp_tunnel_info *ti)
+{
+	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
+		return;
+
+	liquidio_vxlan_port_command(netdev,
+				    OCTNET_CMD_VXLAN_PORT_CONFIG,
+				    htons(ti->port),
+				    OCTNET_CMD_VXLAN_PORT_DEL);
+}
+
 static struct net_device_ops lionetdevops = {
 	.ndo_open		= liquidio_open,
 	.ndo_stop		= liquidio_stop,
@@ -3222,6 +3386,8 @@ static struct net_device_ops lionetdevops = {
 	.ndo_do_ioctl		= liquidio_ioctl,
 	.ndo_fix_features	= liquidio_fix_features,
 	.ndo_set_features	= liquidio_set_features,
+	.ndo_udp_tunnel_add	= liquidio_add_vxlan_port,
+	.ndo_udp_tunnel_del	= liquidio_del_vxlan_port,
 };
 
 /** \brief Entry point for the liquidio module
@@ -3323,7 +3489,6 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 	struct liquidio_if_cfg_resp *resp;
 	struct octdev_props *props;
 	int retval, num_iqueues, num_oqueues;
-	int num_cpus = num_online_cpus();
 	union oct_nic_if_cfg if_cfg;
 	unsigned int base_queue;
 	unsigned int gmx_port_id;
@@ -3365,14 +3530,11 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 		gmx_port_id =
 			CFG_GET_GMXID_NIC_IF(octeon_get_conf(octeon_dev), i);
 		ifidx_or_pfnum = i;
-		if (num_iqueues > num_cpus)
-			num_iqueues = num_cpus;
-		if (num_oqueues > num_cpus)
-			num_oqueues = num_cpus;
+
 		dev_dbg(&octeon_dev->pci_dev->dev,
 			"requesting config for interface %d, iqs %d, oqs %d\n",
 			ifidx_or_pfnum, num_iqueues, num_oqueues);
-		ACCESS_ONCE(ctx->cond) = 0;
+		WRITE_ONCE(ctx->cond, 0);
 		ctx->octeon_id = lio_get_device_id(octeon_dev);
 		init_waitqueue_head(&ctx->wc);
 
@@ -3390,7 +3552,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 
 		sc->callback = if_cfg_callback;
 		sc->callback_arg = sc;
-		sc->wait_time = 1000;
+		sc->wait_time = 3000;
 
 		retval = octeon_send_soft_command(octeon_dev, sc);
 		if (retval == IQ_SEND_FAILED) {
@@ -3479,6 +3641,22 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 				| NETIF_F_LRO;
 		netif_set_gso_max_size(netdev, OCTNIC_GSO_MAX_SIZE);
 
+		/*  Copy of transmit encapsulation capabilities:
+		 *  TSO, TSO6, Checksums for this device
+		 */
+		lio->enc_dev_capability = NETIF_F_IP_CSUM
+					  | NETIF_F_IPV6_CSUM
+					  | NETIF_F_GSO_UDP_TUNNEL
+					  | NETIF_F_HW_CSUM | NETIF_F_SG
+					  | NETIF_F_RXCSUM
+					  | NETIF_F_TSO | NETIF_F_TSO6
+					  | NETIF_F_LRO;
+
+		netdev->hw_enc_features = (lio->enc_dev_capability &
+					   ~NETIF_F_LRO);
+
+		lio->dev_capability |= NETIF_F_GSO_UDP_TUNNEL;
+
 		netdev->vlan_features = lio->dev_capability;
 		/* Add any unchangeable hw features */
 		lio->dev_capability |=  NETIF_F_HW_VLAN_CTAG_FILTER |
@@ -3538,8 +3716,8 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 		octeon_dev->priv_flags = 0x0;
 
 		if (netdev->features & NETIF_F_LRO)
-		liquidio_set_feature(netdev, OCTNET_CMD_LRO_ENABLE,
-				     OCTNIC_LROIPV4 | OCTNIC_LROIPV6);
+			liquidio_set_feature(netdev, OCTNET_CMD_LRO_ENABLE,
+					     OCTNIC_LROIPV4 | OCTNIC_LROIPV6);
 
 		liquidio_set_feature(netdev, OCTNET_CMD_ENABLE_VLAN_FILTER, 0);
 
@@ -3561,6 +3739,15 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 
 		ifstate_set(lio, LIO_IFSTATE_REGISTERED);
 
+		/* Sending command to firmware to enable Rx checksum offload
+		 * by default at the time of setup of Liquidio driver for
+		 * this device
+		 */
+		liquidio_set_rxcsum_command(netdev, OCTNET_CMD_TNL_RX_CSUM_CTL,
+					    OCTNET_CMD_RXCSUM_ENABLE);
+		liquidio_set_feature(netdev, OCTNET_CMD_TNL_TX_CSUM_CTL,
+				     OCTNET_CMD_TXCSUM_ENABLE);
+
 		dev_dbg(&octeon_dev->pci_dev->dev,
 			"NIC ifidx:%d Setup successful\n", i);
 
@@ -3771,6 +3958,7 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
 		/* Release any previously allocated queues */
 		for (j = 0; j < octeon_dev->num_oqs; j++)
 			octeon_delete_droq(octeon_dev, j);
+		return 1;
 	}
 
 	atomic_set(&octeon_dev->status, OCT_DEV_DROQ_INIT_DONE);
@@ -3793,7 +3981,8 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
 
 	/* Setup the interrupt handler and record the INT SUM register address
 	 */
-	octeon_setup_interrupt(octeon_dev);
+	if (octeon_setup_interrupt(octeon_dev))
+		return 1;
 
 	/* Enable Octeon device interrupts */
 	octeon_dev->fn_list.enable_interrupt(octeon_dev->chip);
diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
index 5aa01f427d4a..199a8b9c7dc5 100644
--- a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
+++ b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
@@ -34,6 +34,7 @@
 #define LIQUIDIO_MICRO_VERSION  ".1"
 #define LIQUIDIO_PACKAGE ""
 #define LIQUIDIO_VERSION  "1.4.1"
+
 #define CONTROL_IQ 0
 /** Tag types used by Octeon cores in its work. */
 enum octeon_tag_type {
@@ -216,6 +217,13 @@ static inline void add_sg_size(struct octeon_sg_entry *sg_entry,
 #define   OCTNET_CMD_ENABLE_VLAN_FILTER 0x16
 #define   OCTNET_CMD_ADD_VLAN_FILTER  0x17
 #define   OCTNET_CMD_DEL_VLAN_FILTER  0x18
+#define   OCTNET_CMD_VXLAN_PORT_CONFIG 0x19
+#define   OCTNET_CMD_VXLAN_PORT_ADD    0x0
+#define   OCTNET_CMD_VXLAN_PORT_DEL    0x1
+#define   OCTNET_CMD_RXCSUM_ENABLE     0x0
+#define   OCTNET_CMD_RXCSUM_DISABLE    0x1
+#define   OCTNET_CMD_TXCSUM_ENABLE     0x0
+#define   OCTNET_CMD_TXCSUM_DISABLE    0x1
 
 /* RX(packets coming from wire) Checksum verification flags */
 /* TCP/UDP csum */
@@ -288,7 +296,7 @@ union octnet_cmd {
 
 #define   OCTNET_CMD_SIZE     (sizeof(union octnet_cmd))
 
-/* Instruction Header (DPI - CN23xx) - for OCTEON-III models */
+/* Instruction Header(DPI) - for OCTEON-III models */
 struct  octeon_instr_ih3 {
 #ifdef __BIG_ENDIAN_BITFIELD
 
@@ -338,7 +346,7 @@ struct  octeon_instr_ih3 {
 #endif
 };
 
-/* Optional PKI Instruction Header(PKI IH) - for OCTEON CN23XX models */
+/* Optional PKI Instruction Header(PKI IH) - for OCTEON-III models */
 /** BIG ENDIAN format.   */
 struct  octeon_instr_pki_ih3 {
 #ifdef __BIG_ENDIAN_BITFIELD
@@ -533,6 +541,8 @@ union octeon_rh {
 		u64 priority:3;
 		u64 csum_verified:3;     /** checksum verified. */
 		u64 has_hwtstamp:1;      /** Has hardware timestamp. 1 = yes. */
+		u64 encap_on:1;
+		u64 has_hash:1;          /** Has hash (rth or rss). 1 = yes. */
 	} r_dh;
 	struct {
 		u64 opcode:4;
@@ -542,7 +552,8 @@ union octeon_rh {
 		u64 num_gmx_ports:8;
 		u64 max_nic_ports:10;
 		u64 app_cap_flags:4;
-		u64 app_mode:16;
+		u64 app_mode:8;
+		u64 pkind:8;
 	} r_core_drv_init;
 	struct {
 		u64 opcode:4;
@@ -562,6 +573,8 @@ union octeon_rh {
 		u64 opcode:4;
 	} r;
 	struct {
+		u64 has_hash:1;          /** Has hash (rth or rss). 1 = yes. */
+		u64 encap_on:1;
 		u64 has_hwtstamp:1;      /** 1 = has hwtstamp */
 		u64 csum_verified:3;     /** checksum verified. */
 		u64 priority:3;
@@ -572,7 +585,8 @@ union octeon_rh {
 		u64 opcode:4;
 	} r_dh;
 	struct {
-		u64 app_mode:16;
+		u64 pkind:8;
+		u64 app_mode:8;
 		u64 app_cap_flags:4;
 		u64 max_nic_ports:10;
 		u64 num_gmx_ports:8;
@@ -630,9 +644,11 @@ union oct_link_status {
 		u64 autoneg:1;
 		u64 if_mode:5;
 		u64 pause:1;
-		u64 reserved:16;
+		u64 flashing:1;
+		u64 reserved:15;
 #else
-		u64 reserved:16;
+		u64 reserved:15;
+		u64 flashing:1;
 		u64 pause:1;
 		u64 if_mode:5;
 		u64 autoneg:1;
@@ -736,6 +752,8 @@ struct nic_rx_stats {
 	u64 fw_err_pko;
 	u64 fw_err_link;
 	u64 fw_err_drop;
+	u64 fw_rx_vxlan;
+	u64 fw_rx_vxlan_err;
 
 	/* LRO */
 	u64 fw_lro_pkts;   /* Number of packets that are LROed      */
@@ -776,6 +794,7 @@ struct nic_tx_stats {
 	u64 fw_err_tso;
 	u64 fw_tso;		/* number of tso requests */
 	u64 fw_tso_fwd;		/* number of packets segmented in tso */
+	u64 fw_tx_vxlan;
 };
 
 struct oct_link_stats {
@@ -856,9 +875,9 @@ union oct_nic_if_cfg {
 		u64 num_iqueues:16;
 		u64 num_oqueues:16;
 		u64 gmx_port_id:8;
-		u64 reserved:8;
+		u64 vf_id:8;
 #else
-		u64 reserved:8;
+		u64 vf_id:8;
 		u64 gmx_port_id:8;
 		u64 num_oqueues:16;
 		u64 num_iqueues:16;
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_config.h b/drivers/net/ethernet/cavium/liquidio/octeon_config.h
index 4b8c948400be..b3396e3a8bab 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_config.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_config.h
@@ -226,7 +226,7 @@ struct octeon_oq_config {
 	 */
 	u64 refill_threshold:16;
 
-	/** If set, the Output queue uses info-pointer mode. (Default: 1 ) */
+	/** If set, the Output queue uses info-pointer mode. (Default: 1) */
 	u64 info_ptr:32;
 
 	/* Max number of OQs available */
@@ -236,7 +236,7 @@ struct octeon_oq_config {
 	/* Max number of OQs available */
 	u64 max_oqs:8;
 
-	/** If set, the Output queue uses info-pointer mode. (Default: 1 ) */
+	/** If set, the Output queue uses info-pointer mode. (Default: 1) */
 	u64 info_ptr:32;
 
 	/** The number of buffers that were consumed during packet processing by
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_console.c b/drivers/net/ethernet/cavium/liquidio/octeon_console.c
index 466147e409c9..bbb50ea66f16 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_console.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_console.c
@@ -23,27 +23,14 @@
 /**
  * @file octeon_console.c
  */
-#include <linux/version.h>
-#include <linux/types.h>
-#include <linux/list.h>
-#include <linux/interrupt.h>
 #include <linux/pci.h>
-#include <linux/kthread.h>
 #include <linux/netdevice.h>
-#include "octeon_config.h"
 #include "liquidio_common.h"
 #include "octeon_droq.h"
 #include "octeon_iq.h"
 #include "response_manager.h"
 #include "octeon_device.h"
-#include "octeon_nic.h"
 #include "octeon_main.h"
-#include "octeon_network.h"
-#include "cn66xx_regs.h"
-#include "cn66xx_device.h"
-#include "cn68xx_regs.h"
-#include "cn68xx_device.h"
-#include "liquidio_image.h"
 #include "octeon_mem_ops.h"
 
 static void octeon_remote_lock(void);
@@ -51,6 +38,8 @@ static void octeon_remote_unlock(void);
 static u64 cvmx_bootmem_phy_named_block_find(struct octeon_device *oct,
 					     const char *name,
 					     u32 flags);
+static int octeon_console_read(struct octeon_device *oct, u32 console_num,
+			       char *buffer, u32 buf_size);
 
 #define MIN(a, b) min((a), (b))
 #define CAST_ULL(v) ((u64)(v))
@@ -170,8 +159,8 @@ struct octeon_pci_console_desc {
 				offsetof(struct cvmx_bootmem_desc, field),   \
 				SIZEOF_FIELD(struct cvmx_bootmem_desc, field))
 
-#define __cvmx_bootmem_lock(flags)
-#define __cvmx_bootmem_unlock(flags)
+#define __cvmx_bootmem_lock(flags)	(flags = flags)
+#define __cvmx_bootmem_unlock(flags)	(flags = flags)
 
 /**
  * This macro returns a member of the
@@ -234,7 +223,7 @@ static void CVMX_BOOTMEM_NAMED_GET_NAME(struct octeon_device *oct,
 					u32 len)
 {
 	addr += offsetof(struct cvmx_bootmem_named_block_desc, name);
-	octeon_pci_read_core_mem(oct, addr, str, len);
+	octeon_pci_read_core_mem(oct, addr, (u8 *)str, len);
 	str[len] = 0;
 }
 
@@ -323,6 +312,9 @@ static u64 cvmx_bootmem_phy_named_block_find(struct octeon_device *oct,
 			if (name && named_size) {
 				char *name_tmp =
 					kmalloc(name_length + 1, GFP_KERNEL);
+				if (!name_tmp)
+					break;
+
 				CVMX_BOOTMEM_NAMED_GET_NAME(oct, named_addr,
 							    name_tmp,
 							    name_length);
@@ -383,7 +375,7 @@ static void octeon_remote_unlock(void)
 int octeon_console_send_cmd(struct octeon_device *oct, char *cmd_str,
 			    u32 wait_hundredths)
 {
-	u32 len = strlen(cmd_str);
+	u32 len = (u32)strlen(cmd_str);
 
 	dev_dbg(&oct->pci_dev->dev, "sending \"%s\" to bootloader\n", cmd_str);
 
@@ -440,8 +432,7 @@ int octeon_wait_for_bootloader(struct octeon_device *oct,
 }
 
 static void octeon_console_handle_result(struct octeon_device *oct,
-					 size_t console_num,
-					 char *buffer, s32 bytes_read)
+					 size_t console_num)
 {
 	struct octeon_console *console;
 
@@ -492,7 +483,7 @@ static void check_console(struct work_struct *work)
 	struct octeon_console *console;
 	struct cavium_wk *wk = (struct cavium_wk *)work;
 	struct octeon_device *oct = (struct octeon_device *)wk->ctxptr;
-	size_t console_num = wk->ctxul;
+	u32 console_num = (u32)wk->ctxul;
 	u32 delay;
 
 	console = &oct->console[console_num];
@@ -505,20 +496,17 @@ static void check_console(struct work_struct *work)
 		 */
 		bytes_read =
 			octeon_console_read(oct, console_num, console_buffer,
-					    sizeof(console_buffer) - 1, 0);
+					    sizeof(console_buffer) - 1);
 		if (bytes_read > 0) {
 			total_read += bytes_read;
-			if (console->waiting) {
-				octeon_console_handle_result(oct, console_num,
-							     console_buffer,
-							     bytes_read);
-			}
+			if (console->waiting)
+				octeon_console_handle_result(oct, console_num);
 			if (octeon_console_debug_enabled(console_num)) {
 				output_console_line(oct, console, console_num,
 						    console_buffer, bytes_read);
 			}
 		} else if (bytes_read < 0) {
-			dev_err(&oct->pci_dev->dev, "Error reading console %lu, ret=%d\n",
+			dev_err(&oct->pci_dev->dev, "Error reading console %u, ret=%d\n",
 				console_num, bytes_read);
 		}
 
@@ -530,7 +518,7 @@ static void check_console(struct work_struct *work)
 	 */
 	if (octeon_console_debug_enabled(console_num) &&
 	    (total_read == 0) && (console->leftover[0])) {
-		dev_info(&oct->pci_dev->dev, "%lu: %s\n",
+		dev_info(&oct->pci_dev->dev, "%u: %s\n",
 			 console_num, console->leftover);
 		console->leftover[0] = '\0';
 	}
@@ -675,8 +663,8 @@ static inline int octeon_console_avail_bytes(u32 buffer_size,
 	       octeon_console_free_bytes(buffer_size, wr_idx, rd_idx);
 }
 
-int octeon_console_read(struct octeon_device *oct, u32 console_num,
-			char *buffer, u32 buf_size, u32 flags)
+static int octeon_console_read(struct octeon_device *oct, u32 console_num,
+			       char *buffer, u32 buf_size)
 {
 	int bytes_to_read;
 	u32 rd_idx, wr_idx;
@@ -712,7 +700,7 @@ int octeon_console_read(struct octeon_device *oct, u32 console_num,
 		bytes_to_read = console->buffer_size - rd_idx;
 
 	octeon_pci_read_core_mem(oct, console->output_base_addr + rd_idx,
-				 buffer, bytes_to_read);
+				 (u8 *)buffer, bytes_to_read);
 	octeon_write_device_mem32(oct, console->addr +
 				  offsetof(struct octeon_pci_console,
 					   output_read_index),
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c
index 337220721632..0eb504a4379a 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c
@@ -19,27 +19,19 @@
 * This file may also be available under a different license from Cavium.
 * Contact Cavium, Inc. for more information
 **********************************************************************/
-#include <linux/types.h>
-#include <linux/list.h>
-#include <linux/interrupt.h>
 #include <linux/pci.h>
 #include <linux/crc32.h>
-#include <linux/kthread.h>
 #include <linux/netdevice.h>
 #include <linux/vmalloc.h>
-#include "octeon_config.h"
 #include "liquidio_common.h"
 #include "octeon_droq.h"
 #include "octeon_iq.h"
 #include "response_manager.h"
 #include "octeon_device.h"
-#include "octeon_nic.h"
 #include "octeon_main.h"
 #include "octeon_network.h"
 #include "cn66xx_regs.h"
 #include "cn66xx_device.h"
-#include "cn68xx_regs.h"
-#include "cn68xx_device.h"
 #include "liquidio_image.h"
 #include "octeon_mem_ops.h"
 
@@ -448,10 +440,10 @@ static struct octeon_config_ptr {
 };
 
 static char oct_dev_state_str[OCT_DEV_STATES + 1][32] = {
-	"BEGIN",	"PCI-MAP-DONE",	      "DISPATCH-INIT-DONE",
+	"BEGIN", "PCI-MAP-DONE", "DISPATCH-INIT-DONE",
 	"IQ-INIT-DONE", "SCBUFF-POOL-INIT-DONE", "RESPLIST-INIT-DONE",
 	"DROQ-INIT-DONE", "IO-QUEUES-INIT-DONE", "CONSOLE-INIT-DONE",
-	"HOST-READY",	"CORE-READY",	      "RUNNING",	   "IN-RESET",
+	"HOST-READY", "CORE-READY", "RUNNING", "IN-RESET",
 	"INVALID"
 };
 
@@ -652,16 +644,16 @@ int octeon_download_firmware(struct octeon_device *oct, const u8 *data,
 
 void octeon_free_device_mem(struct octeon_device *oct)
 {
-	u32 i;
+	int i;
 
 	for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES(oct); i++) {
-		/* could check  mask as well */
-		vfree(oct->droq[i]);
+		if (oct->io_qmask.oq & (1ULL << i))
+			vfree(oct->droq[i]);
 	}
 
 	for (i = 0; i < MAX_OCTEON_INSTR_QUEUES(oct); i++) {
-		/* could check mask as well */
-		vfree(oct->instr_queue[i]);
+		if (oct->io_qmask.iq & (1ULL << i))
+			vfree(oct->instr_queue[i]);
 	}
 
 	i = oct->octeon_id;
@@ -752,13 +744,11 @@ struct octeon_device *octeon_allocate_device(u32 pci_id,
 /* this function is only for setting up the first queue */
 int octeon_setup_instr_queues(struct octeon_device *oct)
 {
-	u32 num_iqs = 0;
 	u32 num_descs = 0;
 	u32 iq_no = 0;
 	union oct_txpciq txpciq;
 	int numa_node = cpu_to_node(iq_no % num_online_cpus());
 
-	num_iqs = 1;
 	/* this causes queue 0 to be default queue */
 	if (OCTEON_CN6XXX(oct))
 		num_descs =
@@ -793,13 +783,11 @@ int octeon_setup_instr_queues(struct octeon_device *oct)
 
 int octeon_setup_output_queues(struct octeon_device *oct)
 {
-	u32 num_oqs = 0;
 	u32 num_descs = 0;
 	u32 desc_size = 0;
 	u32 oq_no = 0;
 	int numa_node = cpu_to_node(oq_no % num_online_cpus());
 
-	num_oqs = 1;
 	/* this causes queue 0 to be default queue */
 	if (OCTEON_CN6XXX(oct)) {
 		num_descs =
@@ -1019,79 +1007,6 @@ octeon_register_dispatch_fn(struct octeon_device *oct,
 	return 0;
 }
 
-/* octeon_unregister_dispatch_fn
- * Parameters:
- *   oct       - octeon device
- *   opcode    - driver should unregister the function for this opcode
- *   subcode   - driver should unregister the function for this subcode
- * Description:
- *   Unregister the function set for this opcode+subcode.
- * Returns:
- *   Success: 0
- *   Failure: 1
- * Locks:
- *   No locks are held.
- */
-int
-octeon_unregister_dispatch_fn(struct octeon_device *oct, u16 opcode,
-			      u16 subcode)
-{
-	int retval = 0;
-	u32 idx;
-	struct list_head *dispatch, *dfree = NULL, *tmp2;
-	u16 combined_opcode = OPCODE_SUBCODE(opcode, subcode);
-
-	idx = combined_opcode & OCTEON_OPCODE_MASK;
-
-	spin_lock_bh(&oct->dispatch.lock);
-
-	if (oct->dispatch.count == 0) {
-		spin_unlock_bh(&oct->dispatch.lock);
-		dev_err(&oct->pci_dev->dev,
-			"No dispatch functions registered for this device\n");
-		return 1;
-	}
-
-	if (oct->dispatch.dlist[idx].opcode == combined_opcode) {
-		dispatch = &oct->dispatch.dlist[idx].list;
-		if (dispatch->next != dispatch) {
-			dispatch = dispatch->next;
-			oct->dispatch.dlist[idx].opcode =
-				((struct octeon_dispatch *)dispatch)->opcode;
-			oct->dispatch.dlist[idx].dispatch_fn =
-				((struct octeon_dispatch *)
-				 dispatch)->dispatch_fn;
-			oct->dispatch.dlist[idx].arg =
-				((struct octeon_dispatch *)dispatch)->arg;
-			list_del(dispatch);
-			dfree = dispatch;
-		} else {
-			oct->dispatch.dlist[idx].opcode = 0;
-			oct->dispatch.dlist[idx].dispatch_fn = NULL;
-			oct->dispatch.dlist[idx].arg = NULL;
-		}
-	} else {
-		retval = 1;
-		list_for_each_safe(dispatch, tmp2,
-				   &(oct->dispatch.dlist[idx].
-				     list)) {
-			if (((struct octeon_dispatch *)dispatch)->opcode ==
-			    combined_opcode) {
-				list_del(dispatch);
-				dfree = dispatch;
-				retval = 0;
-			}
-		}
-	}
-
-	if (!retval)
-		oct->dispatch.count--;
-
-	spin_unlock_bh(&oct->dispatch.lock);
-	vfree(dfree);
-	return retval;
-}
-
 int octeon_core_drv_init(struct octeon_recv_info *recv_info, void *buf)
 {
 	u32 i;
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h
index b4e566dea008..01edfb404346 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h
@@ -221,7 +221,7 @@ struct octeon_fn_list {
 
 /* Structure for named memory blocks
  * Number of descriptors
- * available can be changed without affecting compatiblity,
+ * available can be changed without affecting compatibility,
  * but name length changes require a bump in the bootmem
  * descriptor version
  * Note: This structure must be naturally 64 bit aligned, as a single
@@ -254,7 +254,7 @@ struct oct_fw_info {
 struct cavium_wk {
 	struct delayed_work work;
 	void *ctxptr;
-	size_t ctxul;
+	u64 ctxul;
 };
 
 struct cavium_wq {
@@ -585,8 +585,7 @@ int octeon_add_console(struct octeon_device *oct, u32 console_num);
 int octeon_console_write(struct octeon_device *oct, u32 console_num,
 			 char *buffer, u32 write_request_size, u32 flags);
 int octeon_console_write_avail(struct octeon_device *oct, u32 console_num);
-int octeon_console_read(struct octeon_device *oct, u32 console_num,
-			char *buffer, u32 buf_size, u32 flags);
+
 int octeon_console_read_avail(struct octeon_device *oct, u32 console_num);
 
 /** Removes all attached consoles. */
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
index d9bb2f7e0836..e0afe4c1fd01 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
@@ -19,30 +19,18 @@
 * This file may also be available under a different license from Cavium.
 * Contact Cavium, Inc. for more information
 **********************************************************************/
-#include <linux/version.h>
-#include <linux/types.h>
-#include <linux/list.h>
 #include <linux/pci.h>
-#include <linux/kthread.h>
 #include <linux/netdevice.h>
 #include <linux/vmalloc.h>
-#include "octeon_config.h"
 #include "liquidio_common.h"
 #include "octeon_droq.h"
 #include "octeon_iq.h"
 #include "response_manager.h"
 #include "octeon_device.h"
-#include "octeon_nic.h"
 #include "octeon_main.h"
 #include "octeon_network.h"
 #include "cn66xx_regs.h"
 #include "cn66xx_device.h"
-#include "cn68xx_regs.h"
-#include "cn68xx_device.h"
-#include "liquidio_image.h"
-#include "octeon_mem_ops.h"
-
-/* #define CAVIUM_ONLY_PERF_MODE */
 
 #define     CVM_MIN(d1, d2)           (((d1) < (d2)) ? (d1) : (d2))
 #define     CVM_MAX(d1, d2)           (((d1) > (d2)) ? (d1) : (d2))
@@ -104,8 +92,12 @@ static inline void *octeon_get_dispatch_arg(struct octeon_device *octeon_dev,
 	return fn_arg;
 }
 
-u32 octeon_droq_check_hw_for_pkts(struct octeon_device *oct,
-				  struct octeon_droq *droq)
+/** Check for packets on Droq. This function should be called with
+ * lock held.
+ *  @param  droq - Droq on which count is checked.
+ *  @return Returns packet count.
+ */
+u32 octeon_droq_check_hw_for_pkts(struct octeon_droq *droq)
 {
 	u32 pkt_count = 0;
 
@@ -196,7 +188,6 @@ octeon_droq_setup_ring_buffers(struct octeon_device *oct,
 
 		droq->recv_buf_list[i].buffer = buf;
 		droq->recv_buf_list[i].data = get_rbd(buf);
-
 		droq->info_list[i].length = 0;
 
 		/* map ring buffers into memory */
@@ -569,7 +560,9 @@ octeon_droq_dispatch_pkt(struct octeon_device *oct,
 			droq->stats.dropped_nomem++;
 		}
 	} else {
-		dev_err(&oct->pci_dev->dev, "DROQ: No dispatch function\n");
+		dev_err(&oct->pci_dev->dev, "DROQ: No dispatch function (opcode %u/%u)\n",
+			(unsigned int)rh->r.opcode,
+			(unsigned int)rh->r.subcode);
 		droq->stats.dropped_nodispatch++;
 	}                       /* else (dispatch_fn ... */
 
@@ -654,6 +647,7 @@ octeon_droq_fast_process_packets(struct octeon_device *oct,
 					pg_info->page = NULL;
 				droq->recv_buf_list[droq->read_idx].buffer =
 					NULL;
+
 				INCR_INDEX_BY1(droq->read_idx, droq->max_count);
 				droq->refill_count++;
 			} else {
@@ -748,7 +742,7 @@ octeon_droq_process_packets(struct octeon_device *oct,
 	if (pkt_count > budget)
 		pkt_count = budget;
 
-	/* Grab the lock */
+	/* Grab the droq lock */
 	spin_lock(&droq->lock);
 
 	pkts_processed = octeon_droq_fast_process_packets(oct, droq, pkt_count);
@@ -810,7 +804,7 @@ octeon_droq_process_poll_pkts(struct octeon_device *oct,
 
 		total_pkts_processed += pkts_processed;
 
-		octeon_droq_check_hw_for_pkts(oct, droq);
+		octeon_droq_check_hw_for_pkts(droq);
 	}
 
 	spin_unlock(&droq->lock);
@@ -834,18 +828,6 @@ octeon_process_droq_poll_cmd(struct octeon_device *oct, u32 q_no, int cmd,
 			     u32 arg)
 {
 	struct octeon_droq *droq;
-	struct octeon_config *oct_cfg = NULL;
-
-	oct_cfg = octeon_get_conf(oct);
-
-	if (!oct_cfg)
-		return -EINVAL;
-
-	if (q_no >= CFG_GET_OQ_MAX_Q(oct_cfg)) {
-		dev_err(&oct->pci_dev->dev, "%s: droq id (%d) exceeds MAX (%d)\n",
-			__func__, q_no, (oct->num_oqs - 1));
-		return -EINVAL;
-	}
 
 	droq = oct->droq[q_no];
 
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
index 1ca9c4f05702..5a6fb9113bbd 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
@@ -121,6 +121,9 @@ struct oct_droq_stats {
 	/** Num of Packets dropped due to receive path failures. */
 	u64 rx_dropped;
 
+	/** Num of vxlan packets received; */
+	u64 rx_vxlan;
+
 	/** Num of failures of recv_buffer_alloc() */
 	u64 rx_alloc_failure;
 
@@ -413,24 +416,9 @@ int octeon_register_dispatch_fn(struct octeon_device *oct,
 				u16 subcode,
 				octeon_dispatch_fn_t fn, void *fn_arg);
 
-/**  Remove registration for an opcode/subcode. This will delete the mapping for
- *   an opcode/subcode. The dispatch function will be unregistered and will no
- *   longer be called if a packet with the opcode/subcode arrives in the driver
- *   output queues.
- *   @param  oct        -  the octeon device to unregister from.
- *   @param  opcode     -  the opcode to be unregistered.
- *   @param  subcode    -  the subcode to be unregistered.
- *
- *   @return Success: 0; Failure: 1
- */
-int octeon_unregister_dispatch_fn(struct octeon_device *oct,
-				  u16 opcode,
-				  u16 subcode);
-
 void octeon_droq_print_stats(void);
 
-u32 octeon_droq_check_hw_for_pkts(struct octeon_device *oct,
-				  struct octeon_droq *droq);
+u32 octeon_droq_check_hw_for_pkts(struct octeon_droq *droq);
 
 int octeon_create_droq(struct octeon_device *oct, u32 q_no,
 		       u32 num_descs, u32 desc_size, void *app_ctx);
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h
index caa2b4f30717..ff4b1d6f007b 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h
@@ -66,6 +66,7 @@ struct oct_iq_stats {
 	u64 tx_dropped;/**< Numof pkts dropped dueto xmitpath errors. */
 	u64 tx_tot_bytes;/**< Total count of bytes sento to network. */
 	u64 tx_gso;  /* count of tso */
+	u64 tx_vxlan; /* tunnel */
 	u64 tx_dmamap_fail;
 	u64 tx_restart;
 	/*u64 tx_timeout_count;*/
@@ -98,7 +99,7 @@ struct octeon_instr_queue {
 
 	u32 rsvd:17;
 
-	/* Controls the periodic flushing of iq */
+	/* Controls whether extra flushing of IQ is done on Tx */
 	u32 do_auto_flush:1;
 
 	u32 status:8;
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_main.h b/drivers/net/ethernet/cavium/liquidio/octeon_main.h
index 0ff3efc67b84..bc14e4c27332 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_main.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_main.h
@@ -174,7 +174,7 @@ sleep_cond(wait_queue_head_t *wait_queue, int *condition)
 
 	init_waitqueue_entry(&we, current);
 	add_wait_queue(wait_queue, &we);
-	while (!(ACCESS_ONCE(*condition))) {
+	while (!(READ_ONCE(*condition))) {
 		set_current_state(TASK_INTERRUPTIBLE);
 		if (signal_pending(current))
 			goto out;
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c b/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c
index 5aecef870377..95a4bbedf557 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c
@@ -19,43 +19,29 @@
  * This file may also be available under a different license from Cavium.
  * Contact Cavium, Inc. for more information
  **********************************************************************/
-#include <linux/version.h>
-#include <linux/types.h>
-#include <linux/list.h>
-#include <linux/interrupt.h>
 #include <linux/pci.h>
-#include <linux/kthread.h>
 #include <linux/netdevice.h>
-#include "octeon_config.h"
 #include "liquidio_common.h"
 #include "octeon_droq.h"
 #include "octeon_iq.h"
 #include "response_manager.h"
 #include "octeon_device.h"
-#include "octeon_nic.h"
-#include "octeon_main.h"
-#include "octeon_network.h"
-#include "cn66xx_regs.h"
-#include "cn66xx_device.h"
-#include "cn68xx_regs.h"
-#include "cn68xx_device.h"
-#include "liquidio_image.h"
-#include "octeon_mem_ops.h"
 
 #define MEMOPS_IDX   MAX_BAR1_MAP_INDEX
 
+#ifdef __BIG_ENDIAN_BITFIELD
 static inline void
-octeon_toggle_bar1_swapmode(struct octeon_device *oct __attribute__((unused)),
-			    u32 idx __attribute__((unused)))
+octeon_toggle_bar1_swapmode(struct octeon_device *oct, u32 idx)
 {
-#ifdef __BIG_ENDIAN_BITFIELD
 	u32 mask;
 
 	mask = oct->fn_list.bar1_idx_read(oct, idx);
 	mask = (mask & 0x2) ? (mask & ~2) : (mask | 2);
 	oct->fn_list.bar1_idx_write(oct, idx, mask);
-#endif
 }
+#else
+#define octeon_toggle_bar1_swapmode(oct, idx) (oct = oct)
+#endif
 
 static void
 octeon_pci_fastwrite(struct octeon_device *oct, u8 __iomem *mapped_addr,
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
index b481edc56c6e..fb820dc7fcb7 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
@@ -30,6 +30,9 @@
 #include <linux/dma-mapping.h>
 #include <linux/ptp_clock_kernel.h>
 
+#define LIO_MAX_MTU_SIZE (OCTNET_MAX_FRM_SIZE - OCTNET_FRM_HEADER_SIZE)
+#define LIO_MIN_MTU_SIZE 68
+
 struct oct_nic_stats_resp {
 	u64     rh;
 	struct oct_link_stats stats;
@@ -96,6 +99,12 @@ struct lio {
 	/** Copy of Interface capabilities: TSO, TSO6, LRO, Chescksums . */
 	u64 dev_capability;
 
+	/* Copy of transmit encapsulation capabilities:
+	 * TSO, TSO6, Checksums for this device for Kernel
+	 * 3.10.0 onwards
+	 */
+	u64 enc_dev_capability;
+
 	/** Copy of beacaon reg in phy */
 	u32 phy_beacon_val;
 
@@ -115,7 +124,6 @@ struct lio {
 
 	/* work queue for  txq status */
 	struct cavium_wq	txq_status_wq;
-
 };
 
 #define LIO_SIZE         (sizeof(struct lio))
@@ -351,7 +359,7 @@ lio_map_ring_info(struct octeon_droq *droq, u32 i)
 	dma_addr = dma_map_single(&oct->pci_dev->dev, &droq->info_list[i],
 				  OCT_DROQ_INFO_SIZE, DMA_FROM_DEVICE);
 
-	BUG_ON(dma_mapping_error(&oct->pci_dev->dev, dma_addr));
+	WARN_ON(dma_mapping_error(&oct->pci_dev->dev, dma_addr));
 
 	return (u64)dma_addr;
 }
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_nic.c b/drivers/net/ethernet/cavium/liquidio/octeon_nic.c
index 36f1970a860e..166727be928f 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_nic.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_nic.c
@@ -19,14 +19,9 @@
  * This file may also be available under a different license from Cavium.
  * Contact Cavium, Inc. for more information
  **********************************************************************/
-#include <linux/version.h>
-#include <linux/types.h>
-#include <linux/list.h>
 #include <linux/interrupt.h>
 #include <linux/pci.h>
-#include <linux/kthread.h>
 #include <linux/netdevice.h>
-#include "octeon_config.h"
 #include "liquidio_common.h"
 #include "octeon_droq.h"
 #include "octeon_iq.h"
@@ -34,13 +29,6 @@
 #include "octeon_device.h"
 #include "octeon_nic.h"
 #include "octeon_main.h"
-#include "octeon_network.h"
-#include "cn66xx_regs.h"
-#include "cn66xx_device.h"
-#include "cn68xx_regs.h"
-#include "cn68xx_device.h"
-#include "liquidio_image.h"
-#include "octeon_mem_ops.h"
 
 void *
 octeon_alloc_soft_command_resp(struct octeon_device    *oct,
diff --git a/drivers/net/ethernet/cavium/liquidio/request_manager.c b/drivers/net/ethernet/cavium/liquidio/request_manager.c
index 7eafa75ac095..d32492f185ff 100644
--- a/drivers/net/ethernet/cavium/liquidio/request_manager.c
+++ b/drivers/net/ethernet/cavium/liquidio/request_manager.c
@@ -19,28 +19,17 @@
  * This file may also be available under a different license from Cavium.
  * Contact Cavium, Inc. for more information
  **********************************************************************/
-#include <linux/version.h>
-#include <linux/types.h>
-#include <linux/list.h>
-#include <linux/interrupt.h>
 #include <linux/pci.h>
-#include <linux/kthread.h>
 #include <linux/netdevice.h>
 #include <linux/vmalloc.h>
-#include "octeon_config.h"
 #include "liquidio_common.h"
 #include "octeon_droq.h"
 #include "octeon_iq.h"
 #include "response_manager.h"
 #include "octeon_device.h"
-#include "octeon_nic.h"
 #include "octeon_main.h"
 #include "octeon_network.h"
-#include "cn66xx_regs.h"
 #include "cn66xx_device.h"
-#include "cn68xx_regs.h"
-#include "cn68xx_device.h"
-#include "liquidio_image.h"
 
 #define INCR_INSTRQUEUE_PKT_COUNT(octeon_dev_ptr, iq_no, field, count)  \
 	(octeon_dev_ptr->instr_queue[iq_no]->stats.field += count)
@@ -301,40 +290,8 @@ static inline void __copy_cmd_into_iq(struct octeon_instr_queue *iq,
 	memcpy(iqptr, cmd, cmdsize);
 }
 
-static inline int
-__post_command(struct octeon_device *octeon_dev __attribute__((unused)),
-	       struct octeon_instr_queue *iq,
-	       u32 force_db __attribute__((unused)), u8 *cmd)
-{
-	u32 index = -1;
-
-	/* This ensures that the read index does not wrap around to the same
-	 * position if queue gets full before Octeon could fetch any instr.
-	 */
-	if (atomic_read(&iq->instr_pending) >= (s32)(iq->max_count - 1))
-		return -1;
-
-	__copy_cmd_into_iq(iq, cmd);
-
-	/* "index" is returned, host_write_index is modified. */
-	index = iq->host_write_index;
-	INCR_INDEX_BY1(iq->host_write_index, iq->max_count);
-	iq->fill_cnt++;
-
-	/* Flush the command into memory. We need to be sure the data is in
-	 * memory before indicating that the instruction is pending.
-	 */
-	wmb();
-
-	atomic_inc(&iq->instr_pending);
-
-	return index;
-}
-
 static inline struct iq_post_status
-__post_command2(struct octeon_device *octeon_dev __attribute__((unused)),
-		struct octeon_instr_queue *iq,
-		u32 force_db __attribute__((unused)), u8 *cmd)
+__post_command2(struct octeon_instr_queue *iq, u8 *cmd)
 {
 	struct iq_post_status st;
 
@@ -392,6 +349,7 @@ __add_to_request_list(struct octeon_instr_queue *iq,
 	iq->request_list[idx].reqtype = reqtype;
 }
 
+/* Can only run in process context */
 int
 lio_process_iq_request_list(struct octeon_device *oct,
 			    struct octeon_instr_queue *iq, u32 napi_budget)
@@ -403,6 +361,7 @@ lio_process_iq_request_list(struct octeon_device *oct,
 	unsigned int pkts_compl = 0, bytes_compl = 0;
 	struct octeon_soft_command *sc;
 	struct octeon_instr_irh *irh;
+	unsigned long flags;
 
 	while (old != iq->octeon_read_index) {
 		reqtype = iq->request_list[old].reqtype;
@@ -432,17 +391,22 @@ lio_process_iq_request_list(struct octeon_device *oct,
 				 * command response list because we expect
 				 * a response from Octeon.
 				 */
-				spin_lock_bh(&oct->response_list
-					[OCTEON_ORDERED_SC_LIST].lock);
+				spin_lock_irqsave
+					(&oct->response_list
+					 [OCTEON_ORDERED_SC_LIST].lock,
+					 flags);
 				atomic_inc(&oct->response_list
 					[OCTEON_ORDERED_SC_LIST].
 					pending_req_count);
 				list_add_tail(&sc->node, &oct->response_list
 					[OCTEON_ORDERED_SC_LIST].head);
-				spin_unlock_bh(&oct->response_list
-					[OCTEON_ORDERED_SC_LIST].lock);
+				spin_unlock_irqrestore
+					(&oct->response_list
+					 [OCTEON_ORDERED_SC_LIST].lock,
+					 flags);
 			} else {
 				if (sc->callback) {
+					/* This callback must not sleep */
 					sc->callback(oct, OCTEON_REQUEST_DONE,
 						     sc->callback_arg);
 				}
@@ -559,11 +523,12 @@ static void check_db_timeout(struct work_struct *work)
 {
 	struct cavium_wk *wk = (struct cavium_wk *)work;
 	struct octeon_device *oct = (struct octeon_device *)wk->ctxptr;
-	unsigned long iq_no = wk->ctxul;
+	u64 iq_no = wk->ctxul;
 	struct cavium_wq *db_wq = &oct->check_db_wq[iq_no];
+	u32 delay = 10;
 
 	__check_db_timeout(oct, iq_no);
-	queue_delayed_work(db_wq->wq, &db_wq->wk.work, msecs_to_jiffies(1));
+	queue_delayed_work(db_wq->wq, &db_wq->wk.work, msecs_to_jiffies(delay));
 }
 
 int
@@ -579,7 +544,7 @@ octeon_send_command(struct octeon_device *oct, u32 iq_no,
 	 */
 	spin_lock_bh(&iq->post_lock);
 
-	st = __post_command2(oct, iq, force_db, cmd);
+	st = __post_command2(iq, cmd);
 
 	if (st.status != IQ_SEND_FAILED) {
 		octeon_report_sent_bytes_to_bql(buf, reqtype);
@@ -587,7 +552,7 @@ octeon_send_command(struct octeon_device *oct, u32 iq_no,
 		INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, bytes_sent, datasize);
 		INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, instr_posted, 1);
 
-		if (iq->fill_cnt >= iq->fill_threshold || force_db)
+		if (force_db)
 			ring_doorbell(oct, iq);
 	} else {
 		INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, instr_dropped, 1);
@@ -618,8 +583,8 @@ octeon_prepare_soft_command(struct octeon_device *oct,
 	struct octeon_instr_irh *irh;
 	struct octeon_instr_rdp *rdp;
 
-	BUG_ON(opcode > 15);
-	BUG_ON(subcode > 127);
+	WARN_ON(opcode > 15);
+	WARN_ON(subcode > 127);
 
 	oct_cfg = octeon_get_conf(oct);
 
@@ -661,7 +626,6 @@ int octeon_send_soft_command(struct octeon_device *oct,
 {
 	struct octeon_instr_ih2 *ih2;
 	struct octeon_instr_irh *irh;
-	struct octeon_instr_rdp *rdp;
 	u32 len;
 
 	ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2;
@@ -671,12 +635,10 @@ int octeon_send_soft_command(struct octeon_device *oct,
 	}
 	irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
 	if (irh->rflag) {
-		BUG_ON(!sc->dmarptr);
-		BUG_ON(!sc->status_word);
+		WARN_ON(!sc->dmarptr);
+		WARN_ON(!sc->status_word);
 		*sc->status_word = COMPLETION_WORD_INIT;
 
-		rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp;
-
 		sc->cmd.cmd2.rptr = sc->dmarptr;
 	}
 	len = (u32)ih2->dlengsz;
@@ -720,7 +682,7 @@ int octeon_free_sc_buffer_pool(struct octeon_device *oct)
 	struct list_head *tmp, *tmp2;
 	struct octeon_soft_command *sc;
 
-	spin_lock(&oct->sc_buf_pool.lock);
+	spin_lock_bh(&oct->sc_buf_pool.lock);
 
 	list_for_each_safe(tmp, tmp2, &oct->sc_buf_pool.head) {
 		list_del(tmp);
@@ -732,7 +694,7 @@ int octeon_free_sc_buffer_pool(struct octeon_device *oct)
 
 	INIT_LIST_HEAD(&oct->sc_buf_pool.head);
 
-	spin_unlock(&oct->sc_buf_pool.lock);
+	spin_unlock_bh(&oct->sc_buf_pool.lock);
 
 	return 0;
 }
@@ -748,13 +710,13 @@ struct octeon_soft_command *octeon_alloc_soft_command(struct octeon_device *oct,
 	struct octeon_soft_command *sc = NULL;
 	struct list_head *tmp;
 
-	BUG_ON((offset + datasize + rdatasize + ctxsize) >
+	WARN_ON((offset + datasize + rdatasize + ctxsize) >
 	       SOFT_COMMAND_BUFFER_SIZE);
 
-	spin_lock(&oct->sc_buf_pool.lock);
+	spin_lock_bh(&oct->sc_buf_pool.lock);
 
 	if (list_empty(&oct->sc_buf_pool.head)) {
-		spin_unlock(&oct->sc_buf_pool.lock);
+		spin_unlock_bh(&oct->sc_buf_pool.lock);
 		return NULL;
 	}
 
@@ -765,7 +727,7 @@ struct octeon_soft_command *octeon_alloc_soft_command(struct octeon_device *oct,
 
 	atomic_inc(&oct->sc_buf_pool.alloc_buf_count);
 
-	spin_unlock(&oct->sc_buf_pool.lock);
+	spin_unlock_bh(&oct->sc_buf_pool.lock);
 
 	sc = (struct octeon_soft_command *)tmp;
 
@@ -795,7 +757,7 @@ struct octeon_soft_command *octeon_alloc_soft_command(struct octeon_device *oct,
 	offset = (offset + datasize + 127) & 0xffffff80;
 
 	if (rdatasize) {
-		BUG_ON(rdatasize < 16);
+		WARN_ON(rdatasize < 16);
 		sc->virtrptr = (u8 *)sc + offset;
 		sc->dmarptr = dma_addr + offset;
 		sc->rdatasize = rdatasize;
@@ -808,11 +770,11 @@ struct octeon_soft_command *octeon_alloc_soft_command(struct octeon_device *oct,
 void octeon_free_soft_command(struct octeon_device *oct,
 			      struct octeon_soft_command *sc)
 {
-	spin_lock(&oct->sc_buf_pool.lock);
+	spin_lock_bh(&oct->sc_buf_pool.lock);
 
 	list_add_tail(&sc->node, &oct->sc_buf_pool.head);
 
 	atomic_dec(&oct->sc_buf_pool.alloc_buf_count);
 
-	spin_unlock(&oct->sc_buf_pool.lock);
+	spin_unlock_bh(&oct->sc_buf_pool.lock);
 }
diff --git a/drivers/net/ethernet/cavium/liquidio/response_manager.c b/drivers/net/ethernet/cavium/liquidio/response_manager.c
index c93210f99dda..709049e36627 100644
--- a/drivers/net/ethernet/cavium/liquidio/response_manager.c
+++ b/drivers/net/ethernet/cavium/liquidio/response_manager.c
@@ -19,28 +19,14 @@
  * This file may also be available under a different license from Cavium.
  * Contact Cavium, Inc. for more information
  **********************************************************************/
-#include <linux/version.h>
-#include <linux/types.h>
-#include <linux/list.h>
-#include <linux/interrupt.h>
-#include <linux/dma-mapping.h>
 #include <linux/pci.h>
-#include <linux/kthread.h>
 #include <linux/netdevice.h>
-#include "octeon_config.h"
 #include "liquidio_common.h"
 #include "octeon_droq.h"
 #include "octeon_iq.h"
 #include "response_manager.h"
 #include "octeon_device.h"
-#include "octeon_nic.h"
 #include "octeon_main.h"
-#include "octeon_network.h"
-#include "cn66xx_regs.h"
-#include "cn66xx_device.h"
-#include "cn68xx_regs.h"
-#include "cn68xx_device.h"
-#include "liquidio_image.h"
 
 static void oct_poll_req_completion(struct work_struct *work);
 
@@ -66,7 +52,7 @@ int octeon_setup_response_list(struct octeon_device *oct)
 	INIT_DELAYED_WORK(&cwq->wk.work, oct_poll_req_completion);
 	cwq->wk.ctxptr = oct;
 	oct->cmd_resp_state = OCT_DRV_ONLINE;
-	queue_delayed_work(cwq->wq, &cwq->wk.work, msecs_to_jiffies(100));
+	queue_delayed_work(cwq->wq, &cwq->wk.work, msecs_to_jiffies(50));
 
 	return ret;
 }
@@ -176,6 +162,5 @@ static void oct_poll_req_completion(struct work_struct *work)
 	struct cavium_wq *cwq = &oct->dma_comp_wq;
 
 	lio_process_ordered_list(oct, 0);
-
-	queue_delayed_work(cwq->wq, &cwq->wk.work, msecs_to_jiffies(100));
+	queue_delayed_work(cwq->wq, &cwq->wk.work, msecs_to_jiffies(50));
 }
diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
index 388cd799d9ed..e8bc15bcde70 100644
--- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
+++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
@@ -146,7 +146,6 @@ struct octeon_mgmt {
 	struct device *dev;
 	struct napi_struct napi;
 	struct tasklet_struct tx_clean_tasklet;
-	struct phy_device *phydev;
 	struct device_node *phy_np;
 	resource_size_t mix_phys;
 	resource_size_t mix_size;
@@ -787,14 +786,12 @@ static int octeon_mgmt_ioctl_hwtstamp(struct net_device *netdev,
 static int octeon_mgmt_ioctl(struct net_device *netdev,
 			     struct ifreq *rq, int cmd)
 {
-	struct octeon_mgmt *p = netdev_priv(netdev);
-
 	switch (cmd) {
 	case SIOCSHWTSTAMP:
 		return octeon_mgmt_ioctl_hwtstamp(netdev, rq, cmd);
 	default:
-		if (p->phydev)
-			return phy_mii_ioctl(p->phydev, rq, cmd);
+		if (netdev->phydev)
+			return phy_mii_ioctl(netdev->phydev, rq, cmd);
 		return -EINVAL;
 	}
 }
@@ -836,16 +833,18 @@ static void octeon_mgmt_enable_link(struct octeon_mgmt *p)
 
 static void octeon_mgmt_update_link(struct octeon_mgmt *p)
 {
+	struct net_device *ndev = p->netdev;
+	struct phy_device *phydev = ndev->phydev;
 	union cvmx_agl_gmx_prtx_cfg prtx_cfg;
 
 	prtx_cfg.u64 = cvmx_read_csr(p->agl + AGL_GMX_PRT_CFG);
 
-	if (!p->phydev->link)
+	if (!phydev->link)
 		prtx_cfg.s.duplex = 1;
 	else
-		prtx_cfg.s.duplex = p->phydev->duplex;
+		prtx_cfg.s.duplex = phydev->duplex;
 
-	switch (p->phydev->speed) {
+	switch (phydev->speed) {
 	case 10:
 		prtx_cfg.s.speed = 0;
 		prtx_cfg.s.slottime = 0;
@@ -871,7 +870,7 @@ static void octeon_mgmt_update_link(struct octeon_mgmt *p)
 			prtx_cfg.s.speed_msb = 0;
 			/* Only matters for half-duplex */
 			prtx_cfg.s.slottime = 1;
-			prtx_cfg.s.burst = p->phydev->duplex;
+			prtx_cfg.s.burst = phydev->duplex;
 		}
 		break;
 	case 0:  /* No link */
@@ -894,9 +893,9 @@ static void octeon_mgmt_update_link(struct octeon_mgmt *p)
 		/* MII (both speeds) and RGMII 1000 speed. */
 		agl_clk.s.clk_cnt = 1;
 		if (prtx_ctl.s.mode == 0) { /* RGMII mode */
-			if (p->phydev->speed == 10)
+			if (phydev->speed == 10)
 				agl_clk.s.clk_cnt = 50;
-			else if (p->phydev->speed == 100)
+			else if (phydev->speed == 100)
 				agl_clk.s.clk_cnt = 5;
 		}
 		cvmx_write_csr(p->agl + AGL_GMX_TX_CLK, agl_clk.u64);
@@ -906,39 +905,40 @@ static void octeon_mgmt_update_link(struct octeon_mgmt *p)
 static void octeon_mgmt_adjust_link(struct net_device *netdev)
 {
 	struct octeon_mgmt *p = netdev_priv(netdev);
+	struct phy_device *phydev = netdev->phydev;
 	unsigned long flags;
 	int link_changed = 0;
 
-	if (!p->phydev)
+	if (!phydev)
 		return;
 
 	spin_lock_irqsave(&p->lock, flags);
 
 
-	if (!p->phydev->link && p->last_link)
+	if (!phydev->link && p->last_link)
 		link_changed = -1;
 
-	if (p->phydev->link
-	    && (p->last_duplex != p->phydev->duplex
-		|| p->last_link != p->phydev->link
-		|| p->last_speed != p->phydev->speed)) {
+	if (phydev->link &&
+	    (p->last_duplex != phydev->duplex ||
+	     p->last_link != phydev->link ||
+	     p->last_speed != phydev->speed)) {
 		octeon_mgmt_disable_link(p);
 		link_changed = 1;
 		octeon_mgmt_update_link(p);
 		octeon_mgmt_enable_link(p);
 	}
 
-	p->last_link = p->phydev->link;
-	p->last_speed = p->phydev->speed;
-	p->last_duplex = p->phydev->duplex;
+	p->last_link = phydev->link;
+	p->last_speed = phydev->speed;
+	p->last_duplex = phydev->duplex;
 
 	spin_unlock_irqrestore(&p->lock, flags);
 
 	if (link_changed != 0) {
 		if (link_changed > 0) {
 			pr_info("%s: Link is up - %d/%s\n", netdev->name,
-				p->phydev->speed,
-				DUPLEX_FULL == p->phydev->duplex ?
+				phydev->speed,
+				phydev->duplex == DUPLEX_FULL ?
 				"Full" : "Half");
 		} else {
 			pr_info("%s: Link is down\n", netdev->name);
@@ -949,6 +949,7 @@ static void octeon_mgmt_adjust_link(struct net_device *netdev)
 static int octeon_mgmt_init_phy(struct net_device *netdev)
 {
 	struct octeon_mgmt *p = netdev_priv(netdev);
+	struct phy_device *phydev = NULL;
 
 	if (octeon_is_simulation() || p->phy_np == NULL) {
 		/* No PHYs in the simulator. */
@@ -956,11 +957,11 @@ static int octeon_mgmt_init_phy(struct net_device *netdev)
 		return 0;
 	}
 
-	p->phydev = of_phy_connect(netdev, p->phy_np,
-				   octeon_mgmt_adjust_link, 0,
-				   PHY_INTERFACE_MODE_MII);
+	phydev = of_phy_connect(netdev, p->phy_np,
+				octeon_mgmt_adjust_link, 0,
+				PHY_INTERFACE_MODE_MII);
 
-	if (!p->phydev)
+	if (!phydev)
 		return -ENODEV;
 
 	return 0;
@@ -1080,9 +1081,9 @@ static int octeon_mgmt_open(struct net_device *netdev)
 	}
 
 	/* Set the mode of the interface, RGMII/MII. */
-	if (OCTEON_IS_MODEL(OCTEON_CN6XXX) && p->phydev) {
+	if (OCTEON_IS_MODEL(OCTEON_CN6XXX) && netdev->phydev) {
 		union cvmx_agl_prtx_ctl agl_prtx_ctl;
-		int rgmii_mode = (p->phydev->supported &
+		int rgmii_mode = (netdev->phydev->supported &
 				  (SUPPORTED_1000baseT_Half | SUPPORTED_1000baseT_Full)) != 0;
 
 		agl_prtx_ctl.u64 = cvmx_read_csr(p->agl_prt_ctl);
@@ -1205,7 +1206,7 @@ static int octeon_mgmt_open(struct net_device *netdev)
 
 	/* Configure the port duplex, speed and enables */
 	octeon_mgmt_disable_link(p);
-	if (p->phydev)
+	if (netdev->phydev)
 		octeon_mgmt_update_link(p);
 	octeon_mgmt_enable_link(p);
 
@@ -1214,9 +1215,9 @@ static int octeon_mgmt_open(struct net_device *netdev)
 	/* PHY is not present in simulator. The carrier is enabled
 	 * while initializing the phy for simulator, leave it enabled.
 	 */
-	if (p->phydev) {
+	if (netdev->phydev) {
 		netif_carrier_off(netdev);
-		phy_start_aneg(p->phydev);
+		phy_start_aneg(netdev->phydev);
 	}
 
 	netif_wake_queue(netdev);
@@ -1244,9 +1245,8 @@ static int octeon_mgmt_stop(struct net_device *netdev)
 	napi_disable(&p->napi);
 	netif_stop_queue(netdev);
 
-	if (p->phydev)
-		phy_disconnect(p->phydev);
-	p->phydev = NULL;
+	if (netdev->phydev)
+		phy_disconnect(netdev->phydev);
 
 	netif_carrier_off(netdev);
 
@@ -1346,50 +1346,23 @@ static void octeon_mgmt_get_drvinfo(struct net_device *netdev,
 	strlcpy(info->bus_info, "N/A", sizeof(info->bus_info));
 }
 
-static int octeon_mgmt_get_settings(struct net_device *netdev,
-				    struct ethtool_cmd *cmd)
-{
-	struct octeon_mgmt *p = netdev_priv(netdev);
-
-	if (p->phydev)
-		return phy_ethtool_gset(p->phydev, cmd);
-
-	return -EOPNOTSUPP;
-}
-
-static int octeon_mgmt_set_settings(struct net_device *netdev,
-				    struct ethtool_cmd *cmd)
-{
-	struct octeon_mgmt *p = netdev_priv(netdev);
-
-	if (!capable(CAP_NET_ADMIN))
-		return -EPERM;
-
-	if (p->phydev)
-		return phy_ethtool_sset(p->phydev, cmd);
-
-	return -EOPNOTSUPP;
-}
-
 static int octeon_mgmt_nway_reset(struct net_device *dev)
 {
-	struct octeon_mgmt *p = netdev_priv(dev);
-
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
 
-	if (p->phydev)
-		return phy_start_aneg(p->phydev);
+	if (dev->phydev)
+		return phy_start_aneg(dev->phydev);
 
 	return -EOPNOTSUPP;
 }
 
 static const struct ethtool_ops octeon_mgmt_ethtool_ops = {
 	.get_drvinfo = octeon_mgmt_get_drvinfo,
-	.get_settings = octeon_mgmt_get_settings,
-	.set_settings = octeon_mgmt_set_settings,
 	.nway_reset = octeon_mgmt_nway_reset,
 	.get_link = ethtool_op_get_link,
+	.get_link_ksettings = phy_ethtool_get_link_ksettings,
+	.set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
 
 static const struct net_device_ops octeon_mgmt_ops = {
diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c
index 95f17f8cadac..16ed20357c5c 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nic_main.c
@@ -499,6 +499,7 @@ static void nic_tx_channel_cfg(struct nicpf *nic, u8 vnic,
 	u32 rr_quantum;
 	u8 sq_idx = sq->sq_num;
 	u8 pqs_vnic;
+	int svf;
 
 	if (sq->sqs_mode)
 		pqs_vnic = nic->pqs_vf[vnic];
@@ -511,10 +512,19 @@ static void nic_tx_channel_cfg(struct nicpf *nic, u8 vnic,
 	/* 24 bytes for FCS, IPG and preamble */
 	rr_quantum = ((NIC_HW_MAX_FRS + 24) / 4);
 
-	tl4 = (lmac * NIC_TL4_PER_LMAC) + (bgx * NIC_TL4_PER_BGX);
+	if (!sq->sqs_mode) {
+		tl4 = (lmac * NIC_TL4_PER_LMAC) + (bgx * NIC_TL4_PER_BGX);
+	} else {
+		for (svf = 0; svf < MAX_SQS_PER_VF; svf++) {
+			if (nic->vf_sqs[pqs_vnic][svf] == vnic)
+				break;
+		}
+		tl4 = (MAX_LMAC_PER_BGX * NIC_TL4_PER_LMAC);
+		tl4 += (lmac * NIC_TL4_PER_LMAC * MAX_SQS_PER_VF);
+		tl4 += (svf * NIC_TL4_PER_LMAC);
+		tl4 += (bgx * NIC_TL4_PER_BGX);
+	}
 	tl4 += sq_idx;
-	if (sq->sqs_mode)
-		tl4 += vnic * 8;
 
 	tl3 = tl4 / (NIC_MAX_TL4 / NIC_MAX_TL3);
 	nic_reg_write(nic, NIC_PF_QSET_0_127_SQ_0_7_CFG2 |
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index 3ed21988626b..63a39ac97d53 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -551,7 +551,9 @@ static int bgx_xaui_check_link(struct lmac *lmac)
 	}
 
 	/* Clear rcvflt bit (latching high) and read it back */
-	bgx_reg_modify(bgx, lmacid, BGX_SPUX_STATUS2, SPU_STATUS2_RCVFLT);
+	if (bgx_reg_read(bgx, lmacid, BGX_SPUX_STATUS2) & SPU_STATUS2_RCVFLT)
+		bgx_reg_modify(bgx, lmacid,
+			       BGX_SPUX_STATUS2, SPU_STATUS2_RCVFLT);
 	if (bgx_reg_read(bgx, lmacid, BGX_SPUX_STATUS2) & SPU_STATUS2_RCVFLT) {
 		dev_err(&bgx->pdev->dev, "Receive fault, retry training\n");
 		if (bgx->use_training) {
@@ -570,13 +572,6 @@ static int bgx_xaui_check_link(struct lmac *lmac)
 		return -1;
 	}
 
-	/* Wait for MAC RX to be ready */
-	if (bgx_poll_reg(bgx, lmacid, BGX_SMUX_RX_CTL,
-			 SMU_RX_CTL_STATUS, true)) {
-		dev_err(&bgx->pdev->dev, "SMU RX link not okay\n");
-		return -1;
-	}
-
 	/* Wait for BGX RX to be idle */
 	if (bgx_poll_reg(bgx, lmacid, BGX_SMUX_CTL, SMU_CTL_RX_IDLE, false)) {
 		dev_err(&bgx->pdev->dev, "SMU RX not idle\n");
@@ -589,29 +584,30 @@ static int bgx_xaui_check_link(struct lmac *lmac)
 		return -1;
 	}
 
-	if (bgx_reg_read(bgx, lmacid, BGX_SPUX_STATUS2) & SPU_STATUS2_RCVFLT) {
-		dev_err(&bgx->pdev->dev, "Receive fault\n");
-		return -1;
-	}
-
-	/* Receive link is latching low. Force it high and verify it */
-	bgx_reg_modify(bgx, lmacid, BGX_SPUX_STATUS1, SPU_STATUS1_RCV_LNK);
-	if (bgx_poll_reg(bgx, lmacid, BGX_SPUX_STATUS1,
-			 SPU_STATUS1_RCV_LNK, false)) {
-		dev_err(&bgx->pdev->dev, "SPU receive link down\n");
-		return -1;
-	}
-
+	/* Clear receive packet disable */
 	cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_MISC_CONTROL);
 	cfg &= ~SPU_MISC_CTL_RX_DIS;
 	bgx_reg_write(bgx, lmacid, BGX_SPUX_MISC_CONTROL, cfg);
-	return 0;
+
+	/* Check for MAC RX faults */
+	cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_RX_CTL);
+	/* 0 - Link is okay, 1 - Local fault, 2 - Remote fault */
+	cfg &= SMU_RX_CTL_STATUS;
+	if (!cfg)
+		return 0;
+
+	/* Rx local/remote fault seen.
+	 * Do lmac reinit to see if condition recovers
+	 */
+	bgx_lmac_xaui_init(bgx, lmacid, bgx->lmac_type);
+
+	return -1;
 }
 
 static void bgx_poll_for_link(struct work_struct *work)
 {
 	struct lmac *lmac;
-	u64 link;
+	u64 spu_link, smu_link;
 
 	lmac = container_of(work, struct lmac, dwork.work);
 
@@ -621,8 +617,11 @@ static void bgx_poll_for_link(struct work_struct *work)
 	bgx_poll_reg(lmac->bgx, lmac->lmacid, BGX_SPUX_STATUS1,
 		     SPU_STATUS1_RCV_LNK, false);
 
-	link = bgx_reg_read(lmac->bgx, lmac->lmacid, BGX_SPUX_STATUS1);
-	if (link & SPU_STATUS1_RCV_LNK) {
+	spu_link = bgx_reg_read(lmac->bgx, lmac->lmacid, BGX_SPUX_STATUS1);
+	smu_link = bgx_reg_read(lmac->bgx, lmac->lmacid, BGX_SMUX_RX_CTL);
+
+	if ((spu_link & SPU_STATUS1_RCV_LNK) &&
+	    !(smu_link & SMU_RX_CTL_STATUS)) {
 		lmac->link_up = 1;
 		if (lmac->bgx->lmac_type == BGX_MODE_XLAUI)
 			lmac->last_speed = 40000;
@@ -636,9 +635,15 @@ static void bgx_poll_for_link(struct work_struct *work)
 	}
 
 	if (lmac->last_link != lmac->link_up) {
+		if (lmac->link_up) {
+			if (bgx_xaui_check_link(lmac)) {
+				/* Errors, clear link_up state */
+				lmac->link_up = 0;
+				lmac->last_speed = SPEED_UNKNOWN;
+				lmac->last_duplex = DUPLEX_UNKNOWN;
+			}
+		}
 		lmac->last_link = lmac->link_up;
-		if (lmac->link_up)
-			bgx_xaui_check_link(lmac);
 	}
 
 	queue_delayed_work(lmac->check_link, &lmac->dwork, HZ * 2);
@@ -710,7 +715,7 @@ static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid)
 static void bgx_lmac_disable(struct bgx *bgx, u8 lmacid)
 {
 	struct lmac *lmac;
-	u64 cmrx_cfg;
+	u64 cfg;
 
 	lmac = &bgx->lmac[lmacid];
 	if (lmac->check_link) {
@@ -719,9 +724,33 @@ static void bgx_lmac_disable(struct bgx *bgx, u8 lmacid)
 		destroy_workqueue(lmac->check_link);
 	}
 
-	cmrx_cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG);
-	cmrx_cfg &= ~(1 << 15);
-	bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cmrx_cfg);
+	/* Disable packet reception */
+	cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG);
+	cfg &= ~CMR_PKT_RX_EN;
+	bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg);
+
+	/* Give chance for Rx/Tx FIFO to get drained */
+	bgx_poll_reg(bgx, lmacid, BGX_CMRX_RX_FIFO_LEN, (u64)0x1FFF, true);
+	bgx_poll_reg(bgx, lmacid, BGX_CMRX_TX_FIFO_LEN, (u64)0x3FFF, true);
+
+	/* Disable packet transmission */
+	cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG);
+	cfg &= ~CMR_PKT_TX_EN;
+	bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg);
+
+	/* Disable serdes lanes */
+        if (!lmac->is_sgmii)
+                bgx_reg_modify(bgx, lmacid,
+                               BGX_SPUX_CONTROL1, SPU_CTL_LOW_POWER);
+        else
+                bgx_reg_modify(bgx, lmacid,
+                               BGX_GMP_PCS_MRX_CTL, PCS_MRX_CTL_PWR_DN);
+
+	/* Disable LMAC */
+	cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG);
+	cfg &= ~CMR_EN;
+	bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg);
+
 	bgx_flush_dmac_addrs(bgx, lmacid);
 
 	if ((bgx->lmac_type != BGX_MODE_XFI) &&
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
index 149e179363a1..42010d2e5ddf 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
@@ -41,6 +41,7 @@
 #define BGX_CMRX_RX_STAT10		0xC0
 #define BGX_CMRX_RX_BP_DROP		0xC8
 #define BGX_CMRX_RX_DMAC_CTL		0x0E8
+#define BGX_CMRX_RX_FIFO_LEN		0x108
 #define BGX_CMR_RX_DMACX_CAM		0x200
 #define  RX_DMACX_CAM_EN			BIT_ULL(48)
 #define  RX_DMACX_CAM_LMACID(x)			(x << 49)
@@ -50,6 +51,7 @@
 #define BGX_CMR_CHAN_MSK_AND		0x450
 #define BGX_CMR_BIST_STATUS		0x460
 #define BGX_CMR_RX_LMACS		0x468
+#define BGX_CMRX_TX_FIFO_LEN		0x518
 #define BGX_CMRX_TX_STAT0		0x600
 #define BGX_CMRX_TX_STAT1		0x608
 #define BGX_CMRX_TX_STAT2		0x610
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
index c4b262ca7d43..2accab386323 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
@@ -36,8 +36,8 @@
 #define __T4FW_VERSION_H__
 
 #define T4FW_VERSION_MAJOR 0x01
-#define T4FW_VERSION_MINOR 0x0E
-#define T4FW_VERSION_MICRO 0x04
+#define T4FW_VERSION_MINOR 0x0F
+#define T4FW_VERSION_MICRO 0x25
 #define T4FW_VERSION_BUILD 0x00
 
 #define T4FW_MIN_VERSION_MAJOR 0x01
@@ -45,8 +45,8 @@
 #define T4FW_MIN_VERSION_MICRO 0x00
 
 #define T5FW_VERSION_MAJOR 0x01
-#define T5FW_VERSION_MINOR 0x0E
-#define T5FW_VERSION_MICRO 0x04
+#define T5FW_VERSION_MINOR 0x0F
+#define T5FW_VERSION_MICRO 0x25
 #define T5FW_VERSION_BUILD 0x00
 
 #define T5FW_MIN_VERSION_MAJOR 0x00
@@ -54,8 +54,8 @@
 #define T5FW_MIN_VERSION_MICRO 0x00
 
 #define T6FW_VERSION_MAJOR 0x01
-#define T6FW_VERSION_MINOR 0x0E
-#define T6FW_VERSION_MICRO 0x04
+#define T6FW_VERSION_MINOR 0x0F
+#define T6FW_VERSION_MICRO 0x25
 #define T6FW_VERSION_BUILD 0x00
 
 #define T6FW_MIN_VERSION_MAJOR 0x00
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 1873c74638cd..1f16e73f6d0c 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -3251,8 +3251,9 @@ static void be_msix_disable(struct be_adapter *adapter)
 
 static int be_msix_enable(struct be_adapter *adapter)
 {
-	unsigned int i, num_vec, max_roce_eqs;
+	unsigned int i, max_roce_eqs;
 	struct device *dev = &adapter->pdev->dev;
+	int num_vec;
 
 	/* If RoCE is supported, program the max number of vectors that
 	 * could be used for NIC and RoCE, else, just program the number
diff --git a/drivers/net/ethernet/ezchip/nps_enet.c b/drivers/net/ethernet/ezchip/nps_enet.c
index 06f031715b57..25faa3dca9c0 100644
--- a/drivers/net/ethernet/ezchip/nps_enet.c
+++ b/drivers/net/ethernet/ezchip/nps_enet.c
@@ -24,6 +24,14 @@
 
 #define DRV_NAME			"nps_mgt_enet"
 
+static inline bool nps_enet_is_tx_pending(struct nps_enet_priv *priv)
+{
+	u32 tx_ctrl_value = nps_enet_reg_get(priv, NPS_ENET_REG_TX_CTL);
+	u32 tx_ctrl_ct = (tx_ctrl_value & TX_CTL_CT_MASK) >> TX_CTL_CT_SHIFT;
+
+	return (!tx_ctrl_ct && priv->tx_skb);
+}
+
 static void nps_enet_clean_rx_fifo(struct net_device *ndev, u32 frame_len)
 {
 	struct nps_enet_priv *priv = netdev_priv(ndev);
@@ -46,16 +54,17 @@ static void nps_enet_read_rx_fifo(struct net_device *ndev,
 	if (dst_is_aligned) {
 		ioread32_rep(priv->regs_base + NPS_ENET_REG_RX_BUF, reg, len);
 		reg += len;
-	}
-	else { /* !dst_is_aligned */
+	} else { /* !dst_is_aligned */
 		for (i = 0; i < len; i++, reg++) {
 			u32 buf = nps_enet_reg_get(priv, NPS_ENET_REG_RX_BUF);
+
 			put_unaligned_be32(buf, reg);
 		}
 	}
 	/* copy last bytes (if any) */
 	if (last) {
 		u32 buf;
+
 		ioread32_rep(priv->regs_base + NPS_ENET_REG_RX_BUF, &buf, 1);
 		memcpy((u8 *)reg, &buf, last);
 	}
@@ -140,12 +149,11 @@ static void nps_enet_tx_handler(struct net_device *ndev)
 {
 	struct nps_enet_priv *priv = netdev_priv(ndev);
 	u32 tx_ctrl_value = nps_enet_reg_get(priv, NPS_ENET_REG_TX_CTL);
-	u32 tx_ctrl_ct = (tx_ctrl_value & TX_CTL_CT_MASK) >> TX_CTL_CT_SHIFT;
 	u32 tx_ctrl_et = (tx_ctrl_value & TX_CTL_ET_MASK) >> TX_CTL_ET_SHIFT;
 	u32 tx_ctrl_nt = (tx_ctrl_value & TX_CTL_NT_MASK) >> TX_CTL_NT_SHIFT;
 
 	/* Check if we got TX */
-	if (!priv->tx_skb || tx_ctrl_ct)
+	if (!nps_enet_is_tx_pending(priv))
 		return;
 
 	/* Ack Tx ctrl register */
@@ -183,9 +191,6 @@ static int nps_enet_poll(struct napi_struct *napi, int budget)
 	work_done = nps_enet_rx_handler(ndev);
 	if (work_done < budget) {
 		u32 buf_int_enable_value = 0;
-		u32 tx_ctrl_value = nps_enet_reg_get(priv, NPS_ENET_REG_TX_CTL);
-		u32 tx_ctrl_ct =
-			(tx_ctrl_value & TX_CTL_CT_MASK) >> TX_CTL_CT_SHIFT;
 
 		napi_complete(napi);
 
@@ -204,8 +209,7 @@ static int nps_enet_poll(struct napi_struct *napi, int budget)
 		 * the two code lines below will solve this situation by
 		 * re-adding ourselves to the poll list.
 		 */
-
-		if (priv->tx_skb && !tx_ctrl_ct) {
+		if (nps_enet_is_tx_pending(priv)) {
 			nps_enet_reg_set(priv, NPS_ENET_REG_BUF_INT_ENABLE, 0);
 			napi_reschedule(napi);
 		}
@@ -230,11 +234,9 @@ static irqreturn_t nps_enet_irq_handler(s32 irq, void *dev_instance)
 	struct net_device *ndev = dev_instance;
 	struct nps_enet_priv *priv = netdev_priv(ndev);
 	u32 rx_ctrl_value = nps_enet_reg_get(priv, NPS_ENET_REG_RX_CTL);
-	u32 tx_ctrl_value = nps_enet_reg_get(priv, NPS_ENET_REG_TX_CTL);
-	u32 tx_ctrl_ct = (tx_ctrl_value & TX_CTL_CT_MASK) >> TX_CTL_CT_SHIFT;
 	u32 rx_ctrl_cr = (rx_ctrl_value & RX_CTL_CR_MASK) >> RX_CTL_CR_SHIFT;
 
-	if ((!tx_ctrl_ct && priv->tx_skb) || rx_ctrl_cr)
+	if (nps_enet_is_tx_pending(priv) || rx_ctrl_cr)
 		if (likely(napi_schedule_prep(&priv->napi))) {
 			nps_enet_reg_set(priv, NPS_ENET_REG_BUF_INT_ENABLE, 0);
 			__napi_schedule(&priv->napi);
@@ -459,7 +461,6 @@ static void nps_enet_set_rx_mode(struct net_device *ndev)
 			 | NPS_ENET_ENABLE << CFG_2_DISK_DA_SHIFT;
 		ge_mac_cfg_2_value = (ge_mac_cfg_2_value & ~CFG_2_DISK_MC_MASK)
 			 | NPS_ENET_ENABLE << CFG_2_DISK_MC_SHIFT;
-
 	}
 
 	nps_enet_reg_set(priv, NPS_ENET_REG_GE_MAC_CFG_2, ge_mac_cfg_2_value);
diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h
index 3869322690ac..e093cbf26c8c 100644
--- a/drivers/net/ethernet/hisilicon/hns/hnae.h
+++ b/drivers/net/ethernet/hisilicon/hns/hnae.h
@@ -363,6 +363,14 @@ enum hnae_port_type {
 	HNAE_PORT_DEBUG
 };
 
+/* mac media type */
+enum hnae_media_type {
+	HNAE_MEDIA_TYPE_UNKNOWN = 0,
+	HNAE_MEDIA_TYPE_FIBER,
+	HNAE_MEDIA_TYPE_COPPER,
+	HNAE_MEDIA_TYPE_BACKPLANE,
+};
+
 /* This struct defines the operation on the handle.
  *
  * get_handle(): (mandatory)
@@ -525,6 +533,7 @@ struct hnae_handle {
 	u32 eport_id;
 	u32 dport_id;	/* v2 tx bd should fill the dport_id */
 	enum hnae_port_type port_type;
+	enum hnae_media_type media_type;
 	struct list_head node;    /* list to hnae_ae_dev->handle_list */
 	struct hnae_buf_ops *bops; /* operation for the buffer */
 	struct hnae_queue **qs;  /* array base of all queues */
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
index 835521bf1bbc..e28d960997af 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
@@ -134,6 +134,7 @@ struct hnae_handle *hns_ae_get_handle(struct hnae_ae_dev *dev,
 	ae_handle->phy_dev = vf_cb->mac_cb->phy_dev;
 	ae_handle->if_support = vf_cb->mac_cb->if_support;
 	ae_handle->port_type = vf_cb->mac_cb->mac_type;
+	ae_handle->media_type = vf_cb->mac_cb->media_type;
 	ae_handle->dport_id = port_id;
 
 	return ae_handle;
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
index c526558e6367..3fb87e233c49 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
@@ -56,20 +56,6 @@ static const enum mac_mode g_mac_mode_1000[] = {
 	[PHY_INTERFACE_MODE_RTBI]   = MAC_MODE_RTBI_1000
 };
 
-static enum mac_mode hns_mac_dev_to_enet_if(const struct hns_mac_cb *mac_cb)
-{
-	switch (mac_cb->max_speed) {
-	case MAC_SPEED_100:
-		return g_mac_mode_100[mac_cb->phy_if];
-	case MAC_SPEED_1000:
-		return g_mac_mode_1000[mac_cb->phy_if];
-	case MAC_SPEED_10000:
-		return MAC_MODE_XGMII_10000;
-	default:
-		return MAC_MODE_MII_100;
-	}
-}
-
 static enum mac_mode hns_get_enet_interface(const struct hns_mac_cb *mac_cb)
 {
 	switch (mac_cb->max_speed) {
@@ -134,7 +120,6 @@ void hns_mac_adjust_link(struct hns_mac_cb *mac_cb, int speed, int duplex)
 
 	mac_cb->speed = speed;
 	mac_cb->half_duplex = !duplex;
-	mac_ctrl_drv->mac_mode = hns_mac_dev_to_enet_if(mac_cb);
 
 	if (mac_ctrl_drv->adjust_link) {
 		ret = mac_ctrl_drv->adjust_link(mac_ctrl_drv,
@@ -748,6 +733,18 @@ static void hns_mac_register_phy(struct hns_mac_cb *mac_cb)
 			mac_cb->mac_id, addr);
 }
 
+#define MAC_MEDIA_TYPE_MAX_LEN		16
+
+static const struct {
+	enum hnae_media_type value;
+	const char *name;
+} media_type_defs[] = {
+	{HNAE_MEDIA_TYPE_UNKNOWN,	"unknown" },
+	{HNAE_MEDIA_TYPE_FIBER,		"fiber" },
+	{HNAE_MEDIA_TYPE_COPPER,	"copper" },
+	{HNAE_MEDIA_TYPE_BACKPLANE,	"backplane" },
+};
+
 /**
  *hns_mac_get_info  - get mac information from device node
  *@mac_cb: mac device
@@ -759,10 +756,13 @@ static int  hns_mac_get_info(struct hns_mac_cb *mac_cb)
 	struct device_node *np;
 	struct regmap *syscon;
 	struct of_phandle_args cpld_args;
+	const char *media_type;
+	u32 i;
 	u32 ret;
 
 	mac_cb->link = false;
 	mac_cb->half_duplex = false;
+	mac_cb->media_type = HNAE_MEDIA_TYPE_UNKNOWN;
 	mac_cb->speed = mac_phy_to_speed[mac_cb->phy_if];
 	mac_cb->max_speed = mac_cb->speed;
 
@@ -864,6 +864,17 @@ static int  hns_mac_get_info(struct hns_mac_cb *mac_cb)
 			mac_cb->mac_id);
 	}
 
+	if (!fwnode_property_read_string(mac_cb->fw_port, "media-type",
+					 &media_type)) {
+		for (i = 0; i < ARRAY_SIZE(media_type_defs); i++) {
+			if (!strncmp(media_type_defs[i].name, media_type,
+				     MAC_MEDIA_TYPE_MAX_LEN)) {
+				mac_cb->media_type = media_type_defs[i].value;
+				break;
+			}
+		}
+	}
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
index 05a6e8f7a419..4cbdf14f5c16 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
@@ -335,6 +335,7 @@ struct hns_mac_cb {
 	u64 txpkt_for_led;
 	u64 rxpkt_for_led;
 	enum hnae_port_type mac_type;
+	enum hnae_media_type media_type;
 	phy_interface_t phy_if;
 	enum hnae_loop loop_mode;
 
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
index 67e8e1323205..2ef4277d00b3 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
@@ -114,9 +114,9 @@ int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev)
 
 			dsaf_dev->sc_base = devm_ioremap_resource(&pdev->dev,
 								  res);
-			if (!dsaf_dev->sc_base) {
+			if (IS_ERR(dsaf_dev->sc_base)) {
 				dev_err(dsaf_dev->dev, "subctrl can not map!\n");
-				return -ENOMEM;
+				return PTR_ERR(dsaf_dev->sc_base);
 			}
 
 			res = platform_get_resource(pdev, IORESOURCE_MEM,
@@ -128,9 +128,9 @@ int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev)
 
 			dsaf_dev->sds_base = devm_ioremap_resource(&pdev->dev,
 								   res);
-			if (!dsaf_dev->sds_base) {
+			if (IS_ERR(dsaf_dev->sds_base)) {
 				dev_err(dsaf_dev->dev, "serdes-ctrl can not map!\n");
-				return -ENOMEM;
+				return PTR_ERR(dsaf_dev->sds_base);
 			}
 		} else {
 			dsaf_dev->sub_ctrl = syscon;
@@ -146,9 +146,9 @@ int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev)
 		}
 	}
 	dsaf_dev->ppe_base = devm_ioremap_resource(&pdev->dev, res);
-	if (!dsaf_dev->ppe_base) {
+	if (IS_ERR(dsaf_dev->ppe_base)) {
 		dev_err(dsaf_dev->dev, "ppe-base resource can not map!\n");
-		return -ENOMEM;
+		return PTR_ERR(dsaf_dev->ppe_base);
 	}
 	dsaf_dev->ppe_paddr = res->start;
 
@@ -165,9 +165,9 @@ int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev)
 			}
 		}
 		dsaf_dev->io_base = devm_ioremap_resource(&pdev->dev, res);
-		if (!dsaf_dev->io_base) {
+		if (IS_ERR(dsaf_dev->io_base)) {
 			dev_err(dsaf_dev->dev, "dsaf-base resource can not map!\n");
-			return -ENOMEM;
+			return PTR_ERR(dsaf_dev->io_base);
 		}
 	}
 
@@ -2540,45 +2540,45 @@ static char *hns_dsaf_get_node_stats_strings(char *data, int node,
 	bool is_ver1 = AE_IS_VER1(dsaf_dev->dsaf_ver);
 
 	snprintf(buff, ETH_GSTRING_LEN, "innod%d_pad_drop_pkts", node);
-	buff = buff + ETH_GSTRING_LEN;
+	buff += ETH_GSTRING_LEN;
 	snprintf(buff, ETH_GSTRING_LEN, "innod%d_manage_pkts", node);
-	buff = buff + ETH_GSTRING_LEN;
+	buff += ETH_GSTRING_LEN;
 	snprintf(buff, ETH_GSTRING_LEN, "innod%d_rx_pkts", node);
-	buff = buff + ETH_GSTRING_LEN;
+	buff += ETH_GSTRING_LEN;
 	snprintf(buff, ETH_GSTRING_LEN, "innod%d_rx_pkt_id", node);
-	buff = buff + ETH_GSTRING_LEN;
+	buff += ETH_GSTRING_LEN;
 	snprintf(buff, ETH_GSTRING_LEN, "innod%d_rx_pause_frame", node);
-	buff = buff + ETH_GSTRING_LEN;
+	buff += ETH_GSTRING_LEN;
 	snprintf(buff, ETH_GSTRING_LEN, "innod%d_release_buf_num", node);
-	buff = buff + ETH_GSTRING_LEN;
+	buff += ETH_GSTRING_LEN;
 	snprintf(buff, ETH_GSTRING_LEN, "innod%d_sbm_drop_pkts", node);
-	buff = buff + ETH_GSTRING_LEN;
+	buff += ETH_GSTRING_LEN;
 	snprintf(buff, ETH_GSTRING_LEN, "innod%d_crc_false_pkts", node);
-	buff = buff + ETH_GSTRING_LEN;
+	buff += ETH_GSTRING_LEN;
 	snprintf(buff, ETH_GSTRING_LEN, "innod%d_bp_drop_pkts", node);
-	buff = buff + ETH_GSTRING_LEN;
+	buff += ETH_GSTRING_LEN;
 	snprintf(buff, ETH_GSTRING_LEN, "innod%d_lookup_rslt_drop_pkts", node);
-	buff = buff + ETH_GSTRING_LEN;
+	buff += ETH_GSTRING_LEN;
 	snprintf(buff, ETH_GSTRING_LEN, "innod%d_local_rslt_fail_pkts", node);
-	buff = buff + ETH_GSTRING_LEN;
+	buff += ETH_GSTRING_LEN;
 	snprintf(buff, ETH_GSTRING_LEN, "innod%d_vlan_drop_pkts", node);
-	buff = buff + ETH_GSTRING_LEN;
+	buff += ETH_GSTRING_LEN;
 	snprintf(buff, ETH_GSTRING_LEN, "innod%d_stp_drop_pkts", node);
-	buff = buff + ETH_GSTRING_LEN;
-	if ((node < DSAF_SERVICE_NW_NUM) && (!is_ver1)) {
-		for (i = 0; i < DSAF_PRIO_NR; i++) {
-			snprintf(buff, ETH_GSTRING_LEN,
-				 "inod%d_pfc_prio%d_pkts", node, i);
-			buff = buff + ETH_GSTRING_LEN;
-		}
+	buff += ETH_GSTRING_LEN;
+	if (node < DSAF_SERVICE_NW_NUM && !is_ver1) {
 		for (i = 0; i < DSAF_PRIO_NR; i++) {
-			snprintf(buff, ETH_GSTRING_LEN,
-				 "onod%d_pfc_prio%d_pkts", node, i);
-			buff = buff + ETH_GSTRING_LEN;
+			snprintf(buff + 0 * ETH_GSTRING_LEN * DSAF_PRIO_NR,
+				 ETH_GSTRING_LEN, "inod%d_pfc_prio%d_pkts",
+				 node, i);
+			snprintf(buff + 1 * ETH_GSTRING_LEN * DSAF_PRIO_NR,
+				 ETH_GSTRING_LEN, "onod%d_pfc_prio%d_pkts",
+				 node, i);
+			buff += ETH_GSTRING_LEN;
 		}
+		buff += 1 * DSAF_PRIO_NR * ETH_GSTRING_LEN;
 	}
 	snprintf(buff, ETH_GSTRING_LEN, "onnod%d_tx_pkts", node);
-	buff = buff + ETH_GSTRING_LEN;
+	buff += ETH_GSTRING_LEN;
 
 	return buff;
 }
@@ -2604,10 +2604,10 @@ static u64 *hns_dsaf_get_node_stats(struct dsaf_device *ddev, u64 *data,
 	p[10] = hw_stats->local_addr_false;
 	p[11] = hw_stats->vlan_drop;
 	p[12] = hw_stats->stp_drop;
-	if ((node_num < DSAF_SERVICE_NW_NUM) && (!is_ver1)) {
+	if (node_num < DSAF_SERVICE_NW_NUM && !is_ver1) {
 		for (i = 0; i < DSAF_PRIO_NR; i++) {
-			p[13 + i] = hw_stats->rx_pfc[i];
-			p[13 + i + DSAF_PRIO_NR] = hw_stats->tx_pfc[i];
+			p[13 + i + 0 * DSAF_PRIO_NR] = hw_stats->rx_pfc[i];
+			p[13 + i + 1 * DSAF_PRIO_NR] = hw_stats->tx_pfc[i];
 		}
 		p[29] = hw_stats->tx_pkts;
 		return &p[30];
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
index 8473287d4c8b..611b67b6f450 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
@@ -253,10 +253,9 @@ static void hns_dsaf_ge_srst_by_port(struct dsaf_device *dsaf_dev, u32 port,
 		reg_val_1  = 0x1 << port;
 		port_rst_off = dsaf_dev->mac_cb[port]->port_rst_off;
 		/* there is difference between V1 and V2 in register.*/
-		if (AE_IS_VER1(dsaf_dev->dsaf_ver))
-			reg_val_2  = 0x1041041 << port_rst_off;
-		else
-			reg_val_2  = 0x2082082 << port_rst_off;
+		reg_val_2 = AE_IS_VER1(dsaf_dev->dsaf_ver) ?
+				0x1041041 : 0x2082082;
+		reg_val_2 <<= port_rst_off;
 
 		if (!dereset) {
 			dsaf_write_sub(dsaf_dev, DSAF_SUB_SC_GE_RESET_REQ1_REG,
@@ -272,12 +271,11 @@ static void hns_dsaf_ge_srst_by_port(struct dsaf_device *dsaf_dev, u32 port,
 				       reg_val_1);
 		}
 	} else {
-		reg_val_1 = 0x15540 << dsaf_dev->reset_offset;
+		reg_val_1 = 0x15540;
+		reg_val_2 = AE_IS_VER1(dsaf_dev->dsaf_ver) ? 0x100 : 0x40;
 
-		if (AE_IS_VER1(dsaf_dev->dsaf_ver))
-			reg_val_2 = 0x100 << dsaf_dev->reset_offset;
-		else
-			reg_val_2 = 0x40 << dsaf_dev->reset_offset;
+		reg_val_1 <<= dsaf_dev->reset_offset;
+		reg_val_2 <<= dsaf_dev->reset_offset;
 
 		if (!dereset) {
 			dsaf_write_sub(dsaf_dev, DSAF_SUB_SC_GE_RESET_REQ1_REG,
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index d5297ecfe4a5..d7e1f8c7ae92 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -762,13 +762,13 @@ static int hns_nic_rx_poll_one(struct hns_nic_ring_data *ring_data,
 	recv_pkts = 0, recv_bds = 0, clean_count = 0;
 recv:
 	while (recv_pkts < budget && recv_bds < num) {
-		/* reuse or realloc buffers*/
+		/* reuse or realloc buffers */
 		if (clean_count >= RCB_NOF_ALLOC_RX_BUFF_ONCE) {
 			hns_nic_alloc_rx_buffers(ring_data, clean_count);
 			clean_count = 0;
 		}
 
-		/* poll one pkt*/
+		/* poll one pkt */
 		err = hns_nic_poll_rx_skb(ring_data, &skb, &bnum);
 		if (unlikely(!skb)) /* this fault cannot be repaired */
 			goto out;
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
index a395ca1405c3..ab33487a5321 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
@@ -165,13 +165,21 @@ static int hns_nic_get_settings(struct net_device *net_dev,
 		cmd->advertising |= ADVERTISED_10000baseKR_Full;
 	}
 
-	if (h->port_type == HNAE_PORT_SERVICE) {
+	switch (h->media_type) {
+	case HNAE_MEDIA_TYPE_FIBER:
 		cmd->port = PORT_FIBRE;
-		cmd->supported |= SUPPORTED_Pause;
-	} else {
+		break;
+	case HNAE_MEDIA_TYPE_COPPER:
 		cmd->port = PORT_TP;
+		break;
+	case HNAE_MEDIA_TYPE_UNKNOWN:
+	default:
+		break;
 	}
 
+	if (!(AE_IS_VER1(priv->enet_ver) && h->port_type == HNAE_PORT_DEBUG))
+		cmd->supported |= SUPPORTED_Pause;
+
 	cmd->transceiver = XCVR_EXTERNAL;
 	cmd->mdio_support = (ETH_MDIO_SUPPORTS_C45 | ETH_MDIO_SUPPORTS_C22);
 	hns_get_mdix_mode(net_dev, cmd);
diff --git a/drivers/net/ethernet/hisilicon/hns_mdio.c b/drivers/net/ethernet/hisilicon/hns_mdio.c
index 761a32fceceb..33f4c483af0f 100644
--- a/drivers/net/ethernet/hisilicon/hns_mdio.c
+++ b/drivers/net/ethernet/hisilicon/hns_mdio.c
@@ -37,9 +37,19 @@
 
 #define MDIO_TIMEOUT			1000000
 
+struct hns_mdio_sc_reg {
+	u16 mdio_clk_en;
+	u16 mdio_clk_dis;
+	u16 mdio_reset_req;
+	u16 mdio_reset_dreq;
+	u16 mdio_clk_st;
+	u16 mdio_reset_st;
+};
+
 struct hns_mdio_device {
 	void *vbase;		/* mdio reg base address */
 	struct regmap *subctrl_vbase;
+	struct hns_mdio_sc_reg sc_reg;
 };
 
 /* mdio reg */
@@ -93,7 +103,6 @@ enum mdio_c45_op_seq {
 #define MDIO_SC_CLK_DIS		0x33C
 #define MDIO_SC_RESET_REQ	0xA38
 #define MDIO_SC_RESET_DREQ	0xA3C
-#define MDIO_SC_CTRL		0x2010
 #define MDIO_SC_CLK_ST		0x531C
 #define MDIO_SC_RESET_ST	0x5A1C
 
@@ -353,6 +362,7 @@ static int hns_mdio_read(struct mii_bus *bus, int phy_id, int regnum)
 static int hns_mdio_reset(struct mii_bus *bus)
 {
 	struct hns_mdio_device *mdio_dev = (struct hns_mdio_device *)bus->priv;
+	const struct hns_mdio_sc_reg *sc_reg;
 	int ret;
 
 	if (dev_of_node(bus->parent)) {
@@ -361,9 +371,10 @@ static int hns_mdio_reset(struct mii_bus *bus)
 			return -ENODEV;
 		}
 
+		sc_reg = &mdio_dev->sc_reg;
 		/* 1. reset req, and read reset st check */
-		ret = mdio_sc_cfg_reg_write(mdio_dev, MDIO_SC_RESET_REQ, 0x1,
-					    MDIO_SC_RESET_ST, 0x1,
+		ret = mdio_sc_cfg_reg_write(mdio_dev, sc_reg->mdio_reset_req,
+					    0x1, sc_reg->mdio_reset_st, 0x1,
 					    MDIO_CHECK_SET_ST);
 		if (ret) {
 			dev_err(&bus->dev, "MDIO reset fail\n");
@@ -371,8 +382,8 @@ static int hns_mdio_reset(struct mii_bus *bus)
 		}
 
 		/* 2. dis clk, and read clk st check */
-		ret = mdio_sc_cfg_reg_write(mdio_dev, MDIO_SC_CLK_DIS,
-					    0x1, MDIO_SC_CLK_ST, 0x1,
+		ret = mdio_sc_cfg_reg_write(mdio_dev, sc_reg->mdio_clk_dis,
+					    0x1, sc_reg->mdio_clk_st, 0x1,
 					    MDIO_CHECK_CLR_ST);
 		if (ret) {
 			dev_err(&bus->dev, "MDIO dis clk fail\n");
@@ -380,8 +391,8 @@ static int hns_mdio_reset(struct mii_bus *bus)
 		}
 
 		/* 3. reset dreq, and read reset st check */
-		ret = mdio_sc_cfg_reg_write(mdio_dev, MDIO_SC_RESET_DREQ, 0x1,
-					    MDIO_SC_RESET_ST, 0x1,
+		ret = mdio_sc_cfg_reg_write(mdio_dev, sc_reg->mdio_reset_dreq,
+					    0x1, sc_reg->mdio_reset_st, 0x1,
 					    MDIO_CHECK_CLR_ST);
 		if (ret) {
 			dev_err(&bus->dev, "MDIO dis clk fail\n");
@@ -389,8 +400,8 @@ static int hns_mdio_reset(struct mii_bus *bus)
 		}
 
 		/* 4. en clk, and read clk st check */
-		ret = mdio_sc_cfg_reg_write(mdio_dev, MDIO_SC_CLK_EN,
-					    0x1, MDIO_SC_CLK_ST, 0x1,
+		ret = mdio_sc_cfg_reg_write(mdio_dev, sc_reg->mdio_clk_en,
+					    0x1, sc_reg->mdio_clk_st, 0x1,
 					    MDIO_CHECK_SET_ST);
 		if (ret)
 			dev_err(&bus->dev, "MDIO en clk fail\n");
@@ -458,13 +469,54 @@ static int hns_mdio_probe(struct platform_device *pdev)
 	snprintf(new_bus->id, MII_BUS_ID_SIZE, "%s-%s", "Mii",
 		 dev_name(&pdev->dev));
 	if (dev_of_node(&pdev->dev)) {
-		mdio_dev->subctrl_vbase = syscon_node_to_regmap(
-			of_parse_phandle(pdev->dev.of_node,
-					 "subctrl-vbase", 0));
-		if (IS_ERR(mdio_dev->subctrl_vbase)) {
-			dev_warn(&pdev->dev, "no syscon hisilicon,peri-c-subctrl\n");
+		struct of_phandle_args reg_args;
+
+		ret = of_parse_phandle_with_fixed_args(pdev->dev.of_node,
+						       "subctrl-vbase",
+						       4,
+						       0,
+						       &reg_args);
+		if (!ret) {
+			mdio_dev->subctrl_vbase =
+				syscon_node_to_regmap(reg_args.np);
+			if (IS_ERR(mdio_dev->subctrl_vbase)) {
+				dev_warn(&pdev->dev, "syscon_node_to_regmap error\n");
+				mdio_dev->subctrl_vbase = NULL;
+			} else {
+				if (reg_args.args_count == 4) {
+					mdio_dev->sc_reg.mdio_clk_en =
+						(u16)reg_args.args[0];
+					mdio_dev->sc_reg.mdio_clk_dis =
+						(u16)reg_args.args[0] + 4;
+					mdio_dev->sc_reg.mdio_reset_req =
+						(u16)reg_args.args[1];
+					mdio_dev->sc_reg.mdio_reset_dreq =
+						(u16)reg_args.args[1] + 4;
+					mdio_dev->sc_reg.mdio_clk_st =
+						(u16)reg_args.args[2];
+					mdio_dev->sc_reg.mdio_reset_st =
+						(u16)reg_args.args[3];
+				} else {
+					/* for compatible */
+					mdio_dev->sc_reg.mdio_clk_en =
+						MDIO_SC_CLK_EN;
+					mdio_dev->sc_reg.mdio_clk_dis =
+						MDIO_SC_CLK_DIS;
+					mdio_dev->sc_reg.mdio_reset_req =
+						MDIO_SC_RESET_REQ;
+					mdio_dev->sc_reg.mdio_reset_dreq =
+						MDIO_SC_RESET_DREQ;
+					mdio_dev->sc_reg.mdio_clk_st =
+						MDIO_SC_CLK_ST;
+					mdio_dev->sc_reg.mdio_reset_st =
+						MDIO_SC_RESET_ST;
+				}
+			}
+		} else {
+			dev_warn(&pdev->dev, "find syscon ret = %#x\n", ret);
 			mdio_dev->subctrl_vbase = NULL;
 		}
+
 		ret = of_mdiobus_register(new_bus, pdev->dev.of_node);
 	} else if (is_acpi_node(pdev->dev.fwnode)) {
 		/* Clear all the IRQ properties */
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 864cb21351a4..ecdb6854a898 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2121,7 +2121,7 @@ static void handle_error_info_rsp(union ibmvnic_crq *crq,
 				  struct ibmvnic_adapter *adapter)
 {
 	struct device *dev = &adapter->vdev->dev;
-	struct ibmvnic_error_buff *error_buff;
+	struct ibmvnic_error_buff *error_buff, *tmp;
 	unsigned long flags;
 	bool found = false;
 	int i;
@@ -2133,7 +2133,7 @@ static void handle_error_info_rsp(union ibmvnic_crq *crq,
 	}
 
 	spin_lock_irqsave(&adapter->error_list_lock, flags);
-	list_for_each_entry(error_buff, &adapter->errors, list)
+	list_for_each_entry_safe(error_buff, tmp, &adapter->errors, list)
 		if (error_buff->error_id == crq->request_error_rsp.error_id) {
 			found = true;
 			list_del(&error_buff->list);
@@ -3141,14 +3141,14 @@ static void handle_request_ras_comp_num_rsp(union ibmvnic_crq *crq,
 
 static void ibmvnic_free_inflight(struct ibmvnic_adapter *adapter)
 {
-	struct ibmvnic_inflight_cmd *inflight_cmd;
+	struct ibmvnic_inflight_cmd *inflight_cmd, *tmp1;
 	struct device *dev = &adapter->vdev->dev;
-	struct ibmvnic_error_buff *error_buff;
+	struct ibmvnic_error_buff *error_buff, *tmp2;
 	unsigned long flags;
 	unsigned long flags2;
 
 	spin_lock_irqsave(&adapter->inflight_lock, flags);
-	list_for_each_entry(inflight_cmd, &adapter->inflight, list) {
+	list_for_each_entry_safe(inflight_cmd, tmp1, &adapter->inflight, list) {
 		switch (inflight_cmd->crq.generic.cmd) {
 		case LOGIN:
 			dma_unmap_single(dev, adapter->login_buf_token,
@@ -3165,8 +3165,8 @@ static void ibmvnic_free_inflight(struct ibmvnic_adapter *adapter)
 			break;
 		case REQUEST_ERROR_INFO:
 			spin_lock_irqsave(&adapter->error_list_lock, flags2);
-			list_for_each_entry(error_buff, &adapter->errors,
-					    list) {
+			list_for_each_entry_safe(error_buff, tmp2,
+						 &adapter->errors, list) {
 				dma_unmap_single(dev, error_buff->dma,
 						 error_buff->len,
 						 DMA_FROM_DEVICE);
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 75e60897b7e7..41f32c0b341e 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -2789,7 +2789,7 @@ static void e1000e_vlan_filter_enable(struct e1000_adapter *adapter)
 }
 
 /**
- * e1000e_vlan_strip_enable - helper to disable HW VLAN stripping
+ * e1000e_vlan_strip_disable - helper to disable HW VLAN stripping
  * @adapter: board private structure to initialize
  **/
 static void e1000e_vlan_strip_disable(struct e1000_adapter *adapter)
@@ -4352,7 +4352,8 @@ static cycle_t e1000e_cyclecounter_read(const struct cyclecounter *cc)
 
 			time_delta = systim_next - systim;
 			temp = time_delta;
-			rem = do_div(temp, incvalue);
+			/* VMWare users have seen incvalue of zero, don't div / 0 */
+			rem = incvalue ? do_div(temp, incvalue) : (time_delta != 0);
 
 			systim = systim_next;
 
@@ -6915,6 +6916,14 @@ static netdev_features_t e1000_fix_features(struct net_device *netdev,
 	if ((hw->mac.type >= e1000_pch2lan) && (netdev->mtu > ETH_DATA_LEN))
 		features &= ~NETIF_F_RXFCS;
 
+	/* Since there is no support for separate Rx/Tx vlan accel
+	 * enable/disable make sure Tx flag is always in same state as Rx.
+	 */
+	if (features & NETIF_F_HW_VLAN_CTAG_RX)
+		features |= NETIF_F_HW_VLAN_CTAG_TX;
+	else
+		features &= ~NETIF_F_HW_VLAN_CTAG_TX;
+
 	return features;
 }
 
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h
index fcf106e545c5..e98b86bf0ca1 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k.h
@@ -406,7 +406,7 @@ static inline u16 fm10k_desc_unused(struct fm10k_ring *ring)
 	 (&(((union fm10k_rx_desc *)((R)->desc))[i]))
 
 #define FM10K_MAX_TXD_PWR	14
-#define FM10K_MAX_DATA_PER_TXD	BIT(FM10K_MAX_TXD_PWR)
+#define FM10K_MAX_DATA_PER_TXD	(1u << FM10K_MAX_TXD_PWR)
 
 /* Tx Descriptors needed, worst case */
 #define TXD_USE_COUNT(S)	DIV_ROUND_UP((S), FM10K_MAX_DATA_PER_TXD)
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
index 9c0d87503977..9b5195435c87 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
@@ -983,9 +983,10 @@ void fm10k_write_reta(struct fm10k_intfc *interface, const u32 *indir)
 		/* generate a new table if we weren't given one */
 		for (j = 0; j < 4; j++) {
 			if (indir)
-				n = indir[i + j];
+				n = indir[4 * i + j];
 			else
-				n = ethtool_rxfh_indir_default(i + j, rss_i);
+				n = ethtool_rxfh_indir_default(4 * i + j,
+							       rss_i);
 
 			table[j] = n;
 		}
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
index 0e166e9c90c8..a9ccc1eb3ea4 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
@@ -56,7 +56,7 @@ static int __init fm10k_init_module(void)
 	pr_info("%s\n", fm10k_copyright);
 
 	/* create driver workqueue */
-	fm10k_workqueue = create_workqueue("fm10k");
+	fm10k_workqueue = alloc_workqueue("fm10k", WQ_MEM_RECLAIM, 0);
 
 	fm10k_dbg_init();
 
@@ -77,7 +77,6 @@ static void __exit fm10k_exit_module(void)
 	fm10k_dbg_exit();
 
 	/* destroy driver workqueue */
-	flush_workqueue(fm10k_workqueue);
 	destroy_workqueue(fm10k_workqueue);
 }
 module_exit(fm10k_exit_module);
@@ -272,7 +271,7 @@ static bool fm10k_add_rx_frag(struct fm10k_rx_buffer *rx_buffer,
 #if (PAGE_SIZE < 8192)
 	unsigned int truesize = FM10K_RX_BUFSZ;
 #else
-	unsigned int truesize = SKB_DATA_ALIGN(size);
+	unsigned int truesize = ALIGN(size, 512);
 #endif
 	unsigned int pull_len;
 
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
index d00cb193da9a..20a5bbe3f536 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
@@ -432,9 +432,7 @@ static void fm10k_restore_vxlan_port(struct fm10k_intfc *interface)
 /**
  * fm10k_add_vxlan_port
  * @netdev: network interface device structure
- * @sa_family: Address family of new port
- * @port: port number used for VXLAN
- * @type: Enumerated value specifying udp encapsulation type
+ * @ti: Tunnel endpoint information
  *
  * This function is called when a new VXLAN interface has added a new port
  * number to the range that is currently in use for VXLAN.  The new port
@@ -480,9 +478,7 @@ insert_tail:
 /**
  * fm10k_del_vxlan_port
  * @netdev: network interface device structure
- * @sa_family: Address family of freed port
- * @port: port number used for VXLAN
- * @type: Enumerated value specifying udp encapsulation type
+ * @ti: Tunnel endpoint information
  *
  * This function is called when a new VXLAN interface has freed a port
  * number from the range that is currently in use for VXLAN.  The freed
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 9c44739da5e2..e83fc8afb30f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -283,6 +283,7 @@ struct i40e_pf {
 #endif /* I40E_FCOE */
 	u16 num_lan_qps;           /* num lan queues this PF has set up */
 	u16 num_lan_msix;          /* num queue vectors for the base PF vsi */
+	u16 num_fdsb_msix;         /* num queue vectors for sideband Fdir */
 	u16 num_iwarp_msix;        /* num of iwarp vectors for this PF */
 	int iwarp_base_vector;
 	int queues_left;           /* queues left unclaimed */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index 422b41d61c9a..e447dc435464 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -1967,6 +1967,62 @@ aq_add_vsi_exit:
 }
 
 /**
+ * i40e_aq_set_default_vsi
+ * @hw: pointer to the hw struct
+ * @seid: vsi number
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+i40e_status i40e_aq_set_default_vsi(struct i40e_hw *hw,
+				    u16 seid,
+				    struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_set_vsi_promiscuous_modes *cmd =
+		(struct i40e_aqc_set_vsi_promiscuous_modes *)
+		&desc.params.raw;
+	i40e_status status;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					  i40e_aqc_opc_set_vsi_promiscuous_modes);
+
+	cmd->promiscuous_flags = cpu_to_le16(I40E_AQC_SET_VSI_DEFAULT);
+	cmd->valid_flags = cpu_to_le16(I40E_AQC_SET_VSI_DEFAULT);
+	cmd->seid = cpu_to_le16(seid);
+
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	return status;
+}
+
+/**
+ * i40e_aq_clear_default_vsi
+ * @hw: pointer to the hw struct
+ * @seid: vsi number
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+i40e_status i40e_aq_clear_default_vsi(struct i40e_hw *hw,
+				      u16 seid,
+				      struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_set_vsi_promiscuous_modes *cmd =
+		(struct i40e_aqc_set_vsi_promiscuous_modes *)
+		&desc.params.raw;
+	i40e_status status;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					  i40e_aqc_opc_set_vsi_promiscuous_modes);
+
+	cmd->promiscuous_flags = cpu_to_le16(0);
+	cmd->valid_flags = cpu_to_le16(I40E_AQC_SET_VSI_DEFAULT);
+	cmd->seid = cpu_to_le16(seid);
+
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	return status;
+}
+
+/**
  * i40e_aq_set_vsi_unicast_promiscuous
  * @hw: pointer to the hw struct
  * @seid: vsi number
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 5e8d84ff7d5f..4962e855fbd3 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -313,8 +313,7 @@ static void i40e_phy_type_to_ethtool(struct i40e_pf *pf, u32 *supported,
 		*advertising |= ADVERTISED_Autoneg |
 				ADVERTISED_40000baseCR4_Full;
 	}
-	if ((phy_types & I40E_CAP_PHY_TYPE_100BASE_TX) &&
-	    !(phy_types & I40E_CAP_PHY_TYPE_1000BASE_T)) {
+	if (phy_types & I40E_CAP_PHY_TYPE_100BASE_TX) {
 		*supported |= SUPPORTED_Autoneg |
 			      SUPPORTED_100baseT_Full;
 		*advertising |= ADVERTISED_Autoneg |
@@ -663,6 +662,7 @@ static int i40e_set_settings(struct net_device *netdev,
 	if (hw->phy.media_type != I40E_MEDIA_TYPE_BASET &&
 	    hw->phy.media_type != I40E_MEDIA_TYPE_FIBER &&
 	    hw->phy.media_type != I40E_MEDIA_TYPE_BACKPLANE &&
+	    hw->phy.media_type != I40E_MEDIA_TYPE_DA &&
 	    hw->phy.link_info.link_info & I40E_AQ_LINK_UP)
 		return -EOPNOTSUPP;
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 734cba693d16..2b1140563a64 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -40,8 +40,8 @@ static const char i40e_driver_string[] =
 #define DRV_KERN "-k"
 
 #define DRV_VERSION_MAJOR 1
-#define DRV_VERSION_MINOR 5
-#define DRV_VERSION_BUILD 16
+#define DRV_VERSION_MINOR 6
+#define DRV_VERSION_BUILD 4
 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \
 	     __stringify(DRV_VERSION_MINOR) "." \
 	     __stringify(DRV_VERSION_BUILD)    DRV_KERN
@@ -1579,14 +1579,8 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
 	vsi->tc_config.numtc = numtc;
 	vsi->tc_config.enabled_tc = enabled_tc ? enabled_tc : 1;
 	/* Number of queues per enabled TC */
-	/* In MFP case we can have a much lower count of MSIx
-	 * vectors available and so we need to lower the used
-	 * q count.
-	 */
-	if (pf->flags & I40E_FLAG_MSIX_ENABLED)
-		qcount = min_t(int, vsi->alloc_queue_pairs, pf->num_lan_msix);
-	else
-		qcount = vsi->alloc_queue_pairs;
+	qcount = vsi->alloc_queue_pairs;
+
 	num_tc_qps = qcount / numtc;
 	num_tc_qps = min_t(int, num_tc_qps, i40e_pf_get_max_q_per_tc(pf));
 
@@ -1840,8 +1834,10 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 {
 	struct list_head tmp_del_list, tmp_add_list;
 	struct i40e_mac_filter *f, *ftmp, *fclone;
+	struct i40e_hw *hw = &vsi->back->hw;
 	bool promisc_forced_on = false;
 	bool add_happened = false;
+	char vsi_name[16] = "PF";
 	int filter_list_len = 0;
 	u32 changed_flags = 0;
 	i40e_status aq_ret = 0;
@@ -1869,6 +1865,11 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 	INIT_LIST_HEAD(&tmp_del_list);
 	INIT_LIST_HEAD(&tmp_add_list);
 
+	if (vsi->type == I40E_VSI_SRIOV)
+		snprintf(vsi_name, sizeof(vsi_name) - 1, "VF %d", vsi->vf_id);
+	else if (vsi->type != I40E_VSI_MAIN)
+		snprintf(vsi_name, sizeof(vsi_name) - 1, "vsi %d", vsi->seid);
+
 	if (vsi->flags & I40E_VSI_FLAG_FILTER_CHANGED) {
 		vsi->flags &= ~I40E_VSI_FLAG_FILTER_CHANGED;
 
@@ -1920,7 +1921,7 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 	if (!list_empty(&tmp_del_list)) {
 		int del_list_size;
 
-		filter_list_len = pf->hw.aq.asq_buf_size /
+		filter_list_len = hw->aq.asq_buf_size /
 			    sizeof(struct i40e_aqc_remove_macvlan_element_data);
 		del_list_size = filter_list_len *
 			    sizeof(struct i40e_aqc_remove_macvlan_element_data);
@@ -1952,21 +1953,21 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 
 			/* flush a full buffer */
 			if (num_del == filter_list_len) {
-				aq_ret = i40e_aq_remove_macvlan(&pf->hw,
-								vsi->seid,
-								del_list,
-								num_del,
-								NULL);
-				aq_err = pf->hw.aq.asq_last_status;
+				aq_ret =
+					i40e_aq_remove_macvlan(hw, vsi->seid,
+							       del_list,
+							       num_del, NULL);
+				aq_err = hw->aq.asq_last_status;
 				num_del = 0;
 				memset(del_list, 0, del_list_size);
 
 				if (aq_ret && aq_err != I40E_AQ_RC_ENOENT) {
 					retval = -EIO;
 					dev_err(&pf->pdev->dev,
-						"ignoring delete macvlan error, err %s, aq_err %s while flushing a full buffer\n",
-						i40e_stat_str(&pf->hw, aq_ret),
-						i40e_aq_str(&pf->hw, aq_err));
+						 "ignoring delete macvlan error on %s, err %s, aq_err %s while flushing a full buffer\n",
+						 vsi_name,
+						 i40e_stat_str(hw, aq_ret),
+						 i40e_aq_str(hw, aq_err));
 				}
 			}
 			/* Release memory for MAC filter entries which were
@@ -1977,17 +1978,17 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 		}
 
 		if (num_del) {
-			aq_ret = i40e_aq_remove_macvlan(&pf->hw, vsi->seid,
-							del_list, num_del,
-							NULL);
-			aq_err = pf->hw.aq.asq_last_status;
+			aq_ret = i40e_aq_remove_macvlan(hw, vsi->seid, del_list,
+							num_del, NULL);
+			aq_err = hw->aq.asq_last_status;
 			num_del = 0;
 
 			if (aq_ret && aq_err != I40E_AQ_RC_ENOENT)
 				dev_info(&pf->pdev->dev,
-					 "ignoring delete macvlan error, err %s aq_err %s\n",
-					 i40e_stat_str(&pf->hw, aq_ret),
-					 i40e_aq_str(&pf->hw, aq_err));
+					 "ignoring delete macvlan error on %s, err %s aq_err %s\n",
+					 vsi_name,
+					 i40e_stat_str(hw, aq_ret),
+					 i40e_aq_str(hw, aq_err));
 		}
 
 		kfree(del_list);
@@ -1998,7 +1999,7 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 		int add_list_size;
 
 		/* do all the adds now */
-		filter_list_len = pf->hw.aq.asq_buf_size /
+		filter_list_len = hw->aq.asq_buf_size /
 			       sizeof(struct i40e_aqc_add_macvlan_element_data),
 		add_list_size = filter_list_len *
 			       sizeof(struct i40e_aqc_add_macvlan_element_data);
@@ -2033,10 +2034,10 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 
 			/* flush a full buffer */
 			if (num_add == filter_list_len) {
-				aq_ret = i40e_aq_add_macvlan(&pf->hw, vsi->seid,
+				aq_ret = i40e_aq_add_macvlan(hw, vsi->seid,
 							     add_list, num_add,
 							     NULL);
-				aq_err = pf->hw.aq.asq_last_status;
+				aq_err = hw->aq.asq_last_status;
 				num_add = 0;
 
 				if (aq_ret)
@@ -2051,9 +2052,9 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 		}
 
 		if (num_add) {
-			aq_ret = i40e_aq_add_macvlan(&pf->hw, vsi->seid,
+			aq_ret = i40e_aq_add_macvlan(hw, vsi->seid,
 						     add_list, num_add, NULL);
-			aq_err = pf->hw.aq.asq_last_status;
+			aq_err = hw->aq.asq_last_status;
 			num_add = 0;
 		}
 		kfree(add_list);
@@ -2062,16 +2063,18 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 		if (add_happened && aq_ret && aq_err != I40E_AQ_RC_EINVAL) {
 			retval = i40e_aq_rc_to_posix(aq_ret, aq_err);
 			dev_info(&pf->pdev->dev,
-				 "add filter failed, err %s aq_err %s\n",
-				 i40e_stat_str(&pf->hw, aq_ret),
-				 i40e_aq_str(&pf->hw, aq_err));
-			if ((pf->hw.aq.asq_last_status == I40E_AQ_RC_ENOSPC) &&
+				 "add filter failed on %s, err %s aq_err %s\n",
+				 vsi_name,
+				 i40e_stat_str(hw, aq_ret),
+				 i40e_aq_str(hw, aq_err));
+			if ((hw->aq.asq_last_status == I40E_AQ_RC_ENOSPC) &&
 			    !test_bit(__I40E_FILTER_OVERFLOW_PROMISC,
 				      &vsi->state)) {
 				promisc_forced_on = true;
 				set_bit(__I40E_FILTER_OVERFLOW_PROMISC,
 					&vsi->state);
-				dev_info(&pf->pdev->dev, "promiscuous mode forced on\n");
+				dev_info(&pf->pdev->dev, "promiscuous mode forced on %s\n",
+					 vsi_name);
 			}
 		}
 	}
@@ -2093,12 +2096,12 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 							       NULL);
 		if (aq_ret) {
 			retval = i40e_aq_rc_to_posix(aq_ret,
-						     pf->hw.aq.asq_last_status);
+						     hw->aq.asq_last_status);
 			dev_info(&pf->pdev->dev,
-				 "set multi promisc failed, err %s aq_err %s\n",
-				 i40e_stat_str(&pf->hw, aq_ret),
-				 i40e_aq_str(&pf->hw,
-					     pf->hw.aq.asq_last_status));
+				 "set multi promisc failed on %s, err %s aq_err %s\n",
+				 vsi_name,
+				 i40e_stat_str(hw, aq_ret),
+				 i40e_aq_str(hw, hw->aq.asq_last_status));
 		}
 	}
 	if ((changed_flags & IFF_PROMISC) || promisc_forced_on) {
@@ -2117,33 +2120,58 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 			 */
 			if (pf->cur_promisc != cur_promisc) {
 				pf->cur_promisc = cur_promisc;
-				set_bit(__I40E_PF_RESET_REQUESTED, &pf->state);
+				if (cur_promisc)
+					aq_ret =
+					      i40e_aq_set_default_vsi(hw,
+								      vsi->seid,
+								      NULL);
+				else
+					aq_ret =
+					    i40e_aq_clear_default_vsi(hw,
+								      vsi->seid,
+								      NULL);
+				if (aq_ret) {
+					retval = i40e_aq_rc_to_posix(aq_ret,
+							hw->aq.asq_last_status);
+					dev_info(&pf->pdev->dev,
+						 "Set default VSI failed on %s, err %s, aq_err %s\n",
+						 vsi_name,
+						 i40e_stat_str(hw, aq_ret),
+						 i40e_aq_str(hw,
+						     hw->aq.asq_last_status));
+				}
 			}
 		} else {
 			aq_ret = i40e_aq_set_vsi_unicast_promiscuous(
-							  &vsi->back->hw,
+							  hw,
 							  vsi->seid,
 							  cur_promisc, NULL,
 							  true);
 			if (aq_ret) {
 				retval =
 				i40e_aq_rc_to_posix(aq_ret,
-						    pf->hw.aq.asq_last_status);
+						    hw->aq.asq_last_status);
 				dev_info(&pf->pdev->dev,
-					 "set unicast promisc failed, err %d, aq_err %d\n",
-					 aq_ret, pf->hw.aq.asq_last_status);
+					 "set unicast promisc failed on %s, err %s, aq_err %s\n",
+					 vsi_name,
+					 i40e_stat_str(hw, aq_ret),
+					 i40e_aq_str(hw,
+						     hw->aq.asq_last_status));
 			}
 			aq_ret = i40e_aq_set_vsi_multicast_promiscuous(
-							  &vsi->back->hw,
+							  hw,
 							  vsi->seid,
 							  cur_promisc, NULL);
 			if (aq_ret) {
 				retval =
 				i40e_aq_rc_to_posix(aq_ret,
-						    pf->hw.aq.asq_last_status);
+						    hw->aq.asq_last_status);
 				dev_info(&pf->pdev->dev,
-					 "set multicast promisc failed, err %d, aq_err %d\n",
-					 aq_ret, pf->hw.aq.asq_last_status);
+					 "set multicast promisc failed on %s, err %s, aq_err %s\n",
+					 vsi_name,
+					 i40e_stat_str(hw, aq_ret),
+					 i40e_aq_str(hw,
+						     hw->aq.asq_last_status));
 			}
 		}
 		aq_ret = i40e_aq_set_vsi_broadcast(&vsi->back->hw,
@@ -2154,9 +2182,9 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 						     pf->hw.aq.asq_last_status);
 			dev_info(&pf->pdev->dev,
 				 "set brdcast promisc failed, err %s, aq_err %s\n",
-				 i40e_stat_str(&pf->hw, aq_ret),
-				 i40e_aq_str(&pf->hw,
-					     pf->hw.aq.asq_last_status));
+					 i40e_stat_str(hw, aq_ret),
+					 i40e_aq_str(hw,
+						     hw->aq.asq_last_status));
 		}
 	}
 out:
@@ -3947,6 +3975,7 @@ static void i40e_vsi_free_irq(struct i40e_vsi *vsi)
 			/* clear the affinity_mask in the IRQ descriptor */
 			irq_set_affinity_hint(pf->msix_entries[vector].vector,
 					      NULL);
+			synchronize_irq(pf->msix_entries[vector].vector);
 			free_irq(pf->msix_entries[vector].vector,
 				 vsi->q_vectors[i]);
 
@@ -4953,7 +4982,6 @@ static void i40e_dcb_reconfigure(struct i40e_pf *pf)
 			if (pf->vsi[v]->netdev)
 				i40e_dcbnl_set_all(pf->vsi[v]);
 		}
-		i40e_notify_client_of_l2_param_changes(pf->vsi[v]);
 	}
 }
 
@@ -5178,12 +5206,6 @@ static void i40e_vsi_reinit_locked(struct i40e_vsi *vsi)
 		usleep_range(1000, 2000);
 	i40e_down(vsi);
 
-	/* Give a VF some time to respond to the reset.  The
-	 * two second wait is based upon the watchdog cycle in
-	 * the VF driver.
-	 */
-	if (vsi->type == I40E_VSI_SRIOV)
-		msleep(2000);
 	i40e_up(vsi);
 	clear_bit(__I40E_CONFIG_BUSY, &pf->state);
 }
@@ -5226,6 +5248,9 @@ void i40e_down(struct i40e_vsi *vsi)
 		i40e_clean_tx_ring(vsi->tx_rings[i]);
 		i40e_clean_rx_ring(vsi->rx_rings[i]);
 	}
+
+	i40e_notify_client_of_netdev_close(vsi, false);
+
 }
 
 /**
@@ -5704,6 +5729,8 @@ static int i40e_handle_lldp_event(struct i40e_pf *pf,
 		i40e_service_event_schedule(pf);
 	} else {
 		i40e_pf_unquiesce_all_vsi(pf);
+		/* Notify the client for the DCB changes */
+		i40e_notify_client_of_l2_param_changes(pf->vsi[pf->lan_vsi]);
 	}
 
 exit:
@@ -5928,7 +5955,6 @@ static void i40e_fdir_flush_and_replay(struct i40e_pf *pf)
 		if (I40E_DEBUG_FD & pf->hw.debug_mask)
 			dev_info(&pf->pdev->dev, "FD Filter table flushed and FD-SB replayed.\n");
 	}
-
 }
 
 /**
@@ -7160,7 +7186,7 @@ static int i40e_set_num_rings_in_vsi(struct i40e_vsi *vsi)
 		vsi->alloc_queue_pairs = 1;
 		vsi->num_desc = ALIGN(I40E_FDIR_RING_COUNT,
 				      I40E_REQ_DESCRIPTOR_MULTIPLE);
-		vsi->num_q_vectors = 1;
+		vsi->num_q_vectors = pf->num_fdsb_msix;
 		break;
 
 	case I40E_VSI_VMDQ2:
@@ -7544,9 +7570,11 @@ static int i40e_init_msix(struct i40e_pf *pf)
 	/* reserve one vector for sideband flow director */
 	if (pf->flags & I40E_FLAG_FD_SB_ENABLED) {
 		if (vectors_left) {
+			pf->num_fdsb_msix = 1;
 			v_budget++;
 			vectors_left--;
 		} else {
+			pf->num_fdsb_msix = 0;
 			pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
 		}
 	}
@@ -8565,7 +8593,9 @@ bool i40e_set_ntuple(struct i40e_pf *pf, netdev_features_t features)
 		/* Enable filters and mark for reset */
 		if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED))
 			need_reset = true;
-		pf->flags |= I40E_FLAG_FD_SB_ENABLED;
+		/* enable FD_SB only if there is MSI-X vector */
+		if (pf->num_fdsb_msix > 0)
+			pf->flags |= I40E_FLAG_FD_SB_ENABLED;
 	} else {
 		/* turn off filters, mark for reset and clear SW filter list */
 		if (pf->flags & I40E_FLAG_FD_SB_ENABLED) {
@@ -10053,14 +10083,14 @@ void i40e_veb_release(struct i40e_veb *veb)
 static int i40e_add_veb(struct i40e_veb *veb, struct i40e_vsi *vsi)
 {
 	struct i40e_pf *pf = veb->pf;
-	bool is_default = veb->pf->cur_promisc;
 	bool enable_stats = !!(pf->flags & I40E_FLAG_VEB_STATS_ENABLED);
 	int ret;
 
-	/* get a VEB from the hardware */
 	ret = i40e_aq_add_veb(&pf->hw, veb->uplink_seid, vsi->seid,
-			      veb->enabled_tc, is_default,
+			      veb->enabled_tc, false,
 			      &veb->seid, enable_stats, NULL);
+
+	/* get a VEB from the hardware */
 	if (ret) {
 		dev_info(&pf->pdev->dev,
 			 "couldn't add VEB, err %s aq_err %s\n",
@@ -11441,6 +11471,7 @@ static int i40e_suspend(struct pci_dev *pdev, pm_message_t state)
 {
 	struct i40e_pf *pf = pci_get_drvdata(pdev);
 	struct i40e_hw *hw = &pf->hw;
+	int retval = 0;
 
 	set_bit(__I40E_SUSPENDED, &pf->state);
 	set_bit(__I40E_DOWN, &pf->state);
@@ -11452,10 +11483,16 @@ static int i40e_suspend(struct pci_dev *pdev, pm_message_t state)
 	wr32(hw, I40E_PFPM_APM, (pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0));
 	wr32(hw, I40E_PFPM_WUFC, (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0));
 
+	i40e_stop_misc_vector(pf);
+
+	retval = pci_save_state(pdev);
+	if (retval)
+		return retval;
+
 	pci_wake_from_d3(pdev, pf->wol_en);
 	pci_set_power_state(pdev, PCI_D3hot);
 
-	return 0;
+	return retval;
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
index 80403c6ee7f0..4660c5abc855 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
@@ -98,6 +98,8 @@ i40e_status i40e_aq_set_phy_debug(struct i40e_hw *hw, u8 cmd_flags,
 				struct i40e_asq_cmd_details *cmd_details);
 i40e_status i40e_aq_set_default_vsi(struct i40e_hw *hw, u16 vsi_id,
 				struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_clear_default_vsi(struct i40e_hw *hw, u16 vsi_id,
+				      struct i40e_asq_cmd_details *cmd_details);
 enum i40e_status_code i40e_aq_get_phy_capabilities(struct i40e_hw *hw,
 			bool qualified_modules, bool report_init,
 			struct i40e_aq_get_phy_abilities_resp *abilities,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 1fcafcfa8f14..6fcbf764f32b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -665,6 +665,8 @@ static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type)
 		goto error_alloc_vsi_res;
 	}
 	if (type == I40E_VSI_SRIOV) {
+		u64 hena = i40e_pf_get_default_rss_hena(pf);
+
 		vf->lan_vsi_idx = vsi->idx;
 		vf->lan_vsi_id = vsi->id;
 		/* If the port VLAN has been configured and then the
@@ -687,6 +689,10 @@ static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type)
 					vf->default_lan_addr.addr, vf->vf_id);
 		}
 		spin_unlock_bh(&vsi->mac_filter_list_lock);
+		i40e_write_rx_ctl(&pf->hw, I40E_VFQF_HENA1(0, vf->vf_id),
+				  (u32)hena);
+		i40e_write_rx_ctl(&pf->hw, I40E_VFQF_HENA1(1, vf->vf_id),
+				  (u32)(hena >> 32));
 	}
 
 	/* program mac filter */
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index 16c552952860..eac057b88055 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -37,8 +37,8 @@ static const char i40evf_driver_string[] =
 #define DRV_KERN "-k"
 
 #define DRV_VERSION_MAJOR 1
-#define DRV_VERSION_MINOR 5
-#define DRV_VERSION_BUILD 10
+#define DRV_VERSION_MINOR 6
+#define DRV_VERSION_BUILD 4
 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \
 	     __stringify(DRV_VERSION_MINOR) "." \
 	     __stringify(DRV_VERSION_BUILD) \
@@ -825,7 +825,7 @@ i40evf_mac_filter *i40evf_add_filter(struct i40evf_adapter *adapter,
 
 		ether_addr_copy(f->macaddr, macaddr);
 
-		list_add(&f->list, &adapter->mac_filter_list);
+		list_add_tail(&f->list, &adapter->mac_filter_list);
 		f->add = true;
 		adapter->aq_required |= I40EVF_FLAG_AQ_ADD_MAC_FILTER;
 	}
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
index f13445691507..d76c221d4c8a 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
@@ -434,6 +434,8 @@ void i40evf_add_ether_addrs(struct i40evf_adapter *adapter)
 			ether_addr_copy(veal->list[i].addr, f->macaddr);
 			i++;
 			f->add = false;
+			if (i == count)
+				break;
 		}
 	}
 	if (!more)
@@ -497,6 +499,8 @@ void i40evf_del_ether_addrs(struct i40evf_adapter *adapter)
 			i++;
 			list_del(&f->list);
 			kfree(f);
+			if (i == count)
+				break;
 		}
 	}
 	if (!more)
@@ -560,6 +564,8 @@ void i40evf_add_vlans(struct i40evf_adapter *adapter)
 			vvfl->vlan_id[i] = f->vlan;
 			i++;
 			f->add = false;
+			if (i == count)
+				break;
 		}
 	}
 	if (!more)
@@ -623,6 +629,8 @@ void i40evf_del_vlans(struct i40evf_adapter *adapter)
 			i++;
 			list_del(&f->list);
 			kfree(f);
+			if (i == count)
+				break;
 		}
 	}
 	if (!more)
diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index b9609afa5ca3..5387b3a96489 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -445,6 +445,7 @@ struct igb_adapter {
 	unsigned long ptp_tx_start;
 	unsigned long last_rx_ptp_check;
 	unsigned long last_rx_timestamp;
+	unsigned int ptp_flags;
 	spinlock_t tmreg_lock;
 	struct cyclecounter cc;
 	struct timecounter tc;
@@ -474,12 +475,15 @@ struct igb_adapter {
 	u16 eee_advert;
 };
 
+/* flags controlling PTP/1588 function */
+#define IGB_PTP_ENABLED		BIT(0)
+#define IGB_PTP_OVERFLOW_CHECK	BIT(1)
+
 #define IGB_FLAG_HAS_MSI		BIT(0)
 #define IGB_FLAG_DCA_ENABLED		BIT(1)
 #define IGB_FLAG_QUAD_PORT_A		BIT(2)
 #define IGB_FLAG_QUEUE_PAIRS		BIT(3)
 #define IGB_FLAG_DMAC			BIT(4)
-#define IGB_FLAG_PTP			BIT(5)
 #define IGB_FLAG_RSS_FIELD_IPV4_UDP	BIT(6)
 #define IGB_FLAG_RSS_FIELD_IPV6_UDP	BIT(7)
 #define IGB_FLAG_WOL_SUPPORTED		BIT(8)
@@ -546,6 +550,7 @@ void igb_set_fw_version(struct igb_adapter *);
 void igb_ptp_init(struct igb_adapter *adapter);
 void igb_ptp_stop(struct igb_adapter *adapter);
 void igb_ptp_reset(struct igb_adapter *adapter);
+void igb_ptp_suspend(struct igb_adapter *adapter);
 void igb_ptp_rx_hang(struct igb_adapter *adapter);
 void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb);
 void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, unsigned char *va,
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index ef3d642f5ff2..9bcba42abb91 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -2027,7 +2027,8 @@ void igb_reset(struct igb_adapter *adapter)
 	wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
 
 	/* Re-enable PTP, where applicable. */
-	igb_ptp_reset(adapter);
+	if (adapter->ptp_flags & IGB_PTP_ENABLED)
+		igb_ptp_reset(adapter);
 
 	igb_get_phy_info(hw);
 }
@@ -6855,12 +6856,12 @@ static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer,
  **/
 static bool igb_add_rx_frag(struct igb_ring *rx_ring,
 			    struct igb_rx_buffer *rx_buffer,
+			    unsigned int size,
 			    union e1000_adv_rx_desc *rx_desc,
 			    struct sk_buff *skb)
 {
 	struct page *page = rx_buffer->page;
 	unsigned char *va = page_address(page) + rx_buffer->page_offset;
-	unsigned int size = le16_to_cpu(rx_desc->wb.upper.length);
 #if (PAGE_SIZE < 8192)
 	unsigned int truesize = IGB_RX_BUFSZ;
 #else
@@ -6912,6 +6913,7 @@ static struct sk_buff *igb_fetch_rx_buffer(struct igb_ring *rx_ring,
 					   union e1000_adv_rx_desc *rx_desc,
 					   struct sk_buff *skb)
 {
+	unsigned int size = le16_to_cpu(rx_desc->wb.upper.length);
 	struct igb_rx_buffer *rx_buffer;
 	struct page *page;
 
@@ -6947,11 +6949,11 @@ static struct sk_buff *igb_fetch_rx_buffer(struct igb_ring *rx_ring,
 	dma_sync_single_range_for_cpu(rx_ring->dev,
 				      rx_buffer->dma,
 				      rx_buffer->page_offset,
-				      IGB_RX_BUFSZ,
+				      size,
 				      DMA_FROM_DEVICE);
 
 	/* pull page into skb */
-	if (igb_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) {
+	if (igb_add_rx_frag(rx_ring, rx_buffer, size, rx_desc, skb)) {
 		/* hand second half of page back to the ring */
 		igb_reuse_rx_page(rx_ring, rx_buffer);
 	} else {
@@ -7527,6 +7529,8 @@ static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
 	if (netif_running(netdev))
 		__igb_close(netdev, true);
 
+	igb_ptp_suspend(adapter);
+
 	igb_clear_interrupt_scheme(adapter);
 
 #ifdef CONFIG_PM
diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
index f097c5a8ab93..e61b647f5f2a 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -684,6 +684,7 @@ void igb_ptp_rx_hang(struct igb_adapter *adapter)
 	u32 tsyncrxctl = rd32(E1000_TSYNCRXCTL);
 	unsigned long rx_event;
 
+	/* Other hardware uses per-packet timestamps */
 	if (hw->mac.type != e1000_82576)
 		return;
 
@@ -1042,6 +1043,13 @@ int igb_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr)
 		-EFAULT : 0;
 }
 
+/**
+ * igb_ptp_init - Initialize PTP functionality
+ * @adapter: Board private structure
+ *
+ * This function is called at device probe to initialize the PTP
+ * functionality.
+ */
 void igb_ptp_init(struct igb_adapter *adapter)
 {
 	struct e1000_hw *hw = &adapter->hw;
@@ -1064,8 +1072,7 @@ void igb_ptp_init(struct igb_adapter *adapter)
 		adapter->cc.mask = CYCLECOUNTER_MASK(64);
 		adapter->cc.mult = 1;
 		adapter->cc.shift = IGB_82576_TSYNC_SHIFT;
-		/* Dial the nominal frequency. */
-		wr32(E1000_TIMINCA, INCPERIOD_82576 | INCVALUE_82576);
+		adapter->ptp_flags |= IGB_PTP_OVERFLOW_CHECK;
 		break;
 	case e1000_82580:
 	case e1000_i354:
@@ -1084,8 +1091,7 @@ void igb_ptp_init(struct igb_adapter *adapter)
 		adapter->cc.mask = CYCLECOUNTER_MASK(IGB_NBITS_82580);
 		adapter->cc.mult = 1;
 		adapter->cc.shift = 0;
-		/* Enable the timer functions by clearing bit 31. */
-		wr32(E1000_TSAUXC, 0x0);
+		adapter->ptp_flags |= IGB_PTP_OVERFLOW_CHECK;
 		break;
 	case e1000_i210:
 	case e1000_i211:
@@ -1110,44 +1116,24 @@ void igb_ptp_init(struct igb_adapter *adapter)
 		adapter->ptp_caps.settime64 = igb_ptp_settime_i210;
 		adapter->ptp_caps.enable = igb_ptp_feature_enable_i210;
 		adapter->ptp_caps.verify = igb_ptp_verify_pin;
-		/* Enable the timer functions by clearing bit 31. */
-		wr32(E1000_TSAUXC, 0x0);
 		break;
 	default:
 		adapter->ptp_clock = NULL;
 		return;
 	}
 
-	wrfl();
-
 	spin_lock_init(&adapter->tmreg_lock);
 	INIT_WORK(&adapter->ptp_tx_work, igb_ptp_tx_work);
 
-	/* Initialize the clock and overflow work for devices that need it. */
-	if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) {
-		struct timespec64 ts = ktime_to_timespec64(ktime_get_real());
-
-		igb_ptp_settime_i210(&adapter->ptp_caps, &ts);
-	} else {
-		timecounter_init(&adapter->tc, &adapter->cc,
-				 ktime_to_ns(ktime_get_real()));
-
+	if (adapter->ptp_flags & IGB_PTP_OVERFLOW_CHECK)
 		INIT_DELAYED_WORK(&adapter->ptp_overflow_work,
 				  igb_ptp_overflow_check);
 
-		schedule_delayed_work(&adapter->ptp_overflow_work,
-				      IGB_SYSTIM_OVERFLOW_PERIOD);
-	}
-
-	/* Initialize the time sync interrupts for devices that support it. */
-	if (hw->mac.type >= e1000_82580) {
-		wr32(E1000_TSIM, TSYNC_INTERRUPTS);
-		wr32(E1000_IMS, E1000_IMS_TS);
-	}
-
 	adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
 	adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF;
 
+	igb_ptp_reset(adapter);
+
 	adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps,
 						&adapter->pdev->dev);
 	if (IS_ERR(adapter->ptp_clock)) {
@@ -1156,32 +1142,24 @@ void igb_ptp_init(struct igb_adapter *adapter)
 	} else {
 		dev_info(&adapter->pdev->dev, "added PHC on %s\n",
 			 adapter->netdev->name);
-		adapter->flags |= IGB_FLAG_PTP;
+		adapter->ptp_flags |= IGB_PTP_ENABLED;
 	}
 }
 
 /**
- * igb_ptp_stop - Disable PTP device and stop the overflow check.
- * @adapter: Board private structure.
+ * igb_ptp_suspend - Disable PTP work items and prepare for suspend
+ * @adapter: Board private structure
  *
- * This function stops the PTP support and cancels the delayed work.
- **/
-void igb_ptp_stop(struct igb_adapter *adapter)
+ * This function stops the overflow check work and PTP Tx timestamp work, and
+ * will prepare the device for OS suspend.
+ */
+void igb_ptp_suspend(struct igb_adapter *adapter)
 {
-	switch (adapter->hw.mac.type) {
-	case e1000_82576:
-	case e1000_82580:
-	case e1000_i354:
-	case e1000_i350:
-		cancel_delayed_work_sync(&adapter->ptp_overflow_work);
-		break;
-	case e1000_i210:
-	case e1000_i211:
-		/* No delayed work to cancel. */
-		break;
-	default:
+	if (!(adapter->ptp_flags & IGB_PTP_ENABLED))
 		return;
-	}
+
+	if (adapter->ptp_flags & IGB_PTP_OVERFLOW_CHECK)
+		cancel_delayed_work_sync(&adapter->ptp_overflow_work);
 
 	cancel_work_sync(&adapter->ptp_tx_work);
 	if (adapter->ptp_tx_skb) {
@@ -1189,12 +1167,23 @@ void igb_ptp_stop(struct igb_adapter *adapter)
 		adapter->ptp_tx_skb = NULL;
 		clear_bit_unlock(__IGB_PTP_TX_IN_PROGRESS, &adapter->state);
 	}
+}
+
+/**
+ * igb_ptp_stop - Disable PTP device and stop the overflow check.
+ * @adapter: Board private structure.
+ *
+ * This function stops the PTP support and cancels the delayed work.
+ **/
+void igb_ptp_stop(struct igb_adapter *adapter)
+{
+	igb_ptp_suspend(adapter);
 
 	if (adapter->ptp_clock) {
 		ptp_clock_unregister(adapter->ptp_clock);
 		dev_info(&adapter->pdev->dev, "removed PHC on %s\n",
 			 adapter->netdev->name);
-		adapter->flags &= ~IGB_FLAG_PTP;
+		adapter->ptp_flags &= ~IGB_PTP_ENABLED;
 	}
 }
 
@@ -1209,9 +1198,6 @@ void igb_ptp_reset(struct igb_adapter *adapter)
 	struct e1000_hw *hw = &adapter->hw;
 	unsigned long flags;
 
-	if (!(adapter->flags & IGB_FLAG_PTP))
-		return;
-
 	/* reset the tstamp_config */
 	igb_ptp_set_timestamp_mode(adapter, &adapter->tstamp_config);
 
@@ -1248,4 +1234,10 @@ void igb_ptp_reset(struct igb_adapter *adapter)
 	}
 out:
 	spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
+
+	wrfl();
+
+	if (adapter->ptp_flags & IGB_PTP_OVERFLOW_CHECK)
+		schedule_delayed_work(&adapter->ptp_overflow_work,
+				      IGB_SYSTIM_OVERFLOW_PERIOD);
 }
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index 59b771b9b354..8a8450788124 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -2991,10 +2991,15 @@ static int ixgbe_get_ts_info(struct net_device *dev,
 {
 	struct ixgbe_adapter *adapter = netdev_priv(dev);
 
+	/* we always support timestamping disabled */
+	info->rx_filters = BIT(HWTSTAMP_FILTER_NONE);
+
 	switch (adapter->hw.mac.type) {
 	case ixgbe_mac_X550:
 	case ixgbe_mac_X550EM_x:
 	case ixgbe_mac_x550em_a:
+		info->rx_filters |= BIT(HWTSTAMP_FILTER_ALL);
+		/* fallthrough */
 	case ixgbe_mac_X540:
 	case ixgbe_mac_82599EB:
 		info->so_timestamping =
@@ -3014,8 +3019,7 @@ static int ixgbe_get_ts_info(struct net_device *dev,
 			BIT(HWTSTAMP_TX_OFF) |
 			BIT(HWTSTAMP_TX_ON);
 
-		info->rx_filters =
-			BIT(HWTSTAMP_FILTER_NONE) |
+		info->rx_filters |=
 			BIT(HWTSTAMP_FILTER_PTP_V1_L4_SYNC) |
 			BIT(HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ) |
 			BIT(HWTSTAMP_FILTER_PTP_V2_EVENT);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 468fa9ddfa06..918b94baca81 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -8300,14 +8300,53 @@ int ixgbe_setup_tc(struct net_device *dev, u8 tc)
 static int ixgbe_delete_clsu32(struct ixgbe_adapter *adapter,
 			       struct tc_cls_u32_offload *cls)
 {
+	u32 hdl = cls->knode.handle;
 	u32 uhtid = TC_U32_USERHTID(cls->knode.handle);
-	u32 loc;
-	int err;
+	u32 loc = cls->knode.handle & 0xfffff;
+	int err = 0, i, j;
+	struct ixgbe_jump_table *jump = NULL;
+
+	if (loc > IXGBE_MAX_HW_ENTRIES)
+		return -EINVAL;
 
 	if ((uhtid != 0x800) && (uhtid >= IXGBE_MAX_LINK_HANDLE))
 		return -EINVAL;
 
-	loc = cls->knode.handle & 0xfffff;
+	/* Clear this filter in the link data it is associated with */
+	if (uhtid != 0x800) {
+		jump = adapter->jump_tables[uhtid];
+		if (!jump)
+			return -EINVAL;
+		if (!test_bit(loc - 1, jump->child_loc_map))
+			return -EINVAL;
+		clear_bit(loc - 1, jump->child_loc_map);
+	}
+
+	/* Check if the filter being deleted is a link */
+	for (i = 1; i < IXGBE_MAX_LINK_HANDLE; i++) {
+		jump = adapter->jump_tables[i];
+		if (jump && jump->link_hdl == hdl) {
+			/* Delete filters in the hardware in the child hash
+			 * table associated with this link
+			 */
+			for (j = 0; j < IXGBE_MAX_HW_ENTRIES; j++) {
+				if (!test_bit(j, jump->child_loc_map))
+					continue;
+				spin_lock(&adapter->fdir_perfect_lock);
+				err = ixgbe_update_ethtool_fdir_entry(adapter,
+								      NULL,
+								      j + 1);
+				spin_unlock(&adapter->fdir_perfect_lock);
+				clear_bit(j, jump->child_loc_map);
+			}
+			/* Remove resources for this link */
+			kfree(jump->input);
+			kfree(jump->mask);
+			kfree(jump);
+			adapter->jump_tables[i] = NULL;
+			return err;
+		}
+	}
 
 	spin_lock(&adapter->fdir_perfect_lock);
 	err = ixgbe_update_ethtool_fdir_entry(adapter, NULL, loc);
@@ -8541,6 +8580,18 @@ static int ixgbe_configure_clsu32(struct ixgbe_adapter *adapter,
 		if (!test_bit(link_uhtid - 1, &adapter->tables))
 			return err;
 
+		/* Multiple filters as links to the same hash table are not
+		 * supported. To add a new filter with the same next header
+		 * but different match/jump conditions, create a new hash table
+		 * and link to it.
+		 */
+		if (adapter->jump_tables[link_uhtid] &&
+		    (adapter->jump_tables[link_uhtid])->link_hdl) {
+			e_err(drv, "Link filter exists for link: %x\n",
+			      link_uhtid);
+			return err;
+		}
+
 		for (i = 0; nexthdr[i].jump; i++) {
 			if (nexthdr[i].o != cls->knode.sel->offoff ||
 			    nexthdr[i].s != cls->knode.sel->offshift ||
@@ -8562,6 +8613,8 @@ static int ixgbe_configure_clsu32(struct ixgbe_adapter *adapter,
 			}
 			jump->input = input;
 			jump->mask = mask;
+			jump->link_hdl = cls->knode.handle;
+
 			err = ixgbe_clsu32_build_input(input, mask, cls,
 						       field_ptr, &nexthdr[i]);
 			if (!err) {
@@ -8589,6 +8642,20 @@ static int ixgbe_configure_clsu32(struct ixgbe_adapter *adapter,
 		if ((adapter->jump_tables[uhtid])->mask)
 			memcpy(mask, (adapter->jump_tables[uhtid])->mask,
 			       sizeof(*mask));
+
+		/* Lookup in all child hash tables if this location is already
+		 * filled with a filter
+		 */
+		for (i = 1; i < IXGBE_MAX_LINK_HANDLE; i++) {
+			struct ixgbe_jump_table *link = adapter->jump_tables[i];
+
+			if (link && (test_bit(loc - 1, link->child_loc_map))) {
+				e_err(drv, "Filter exists in location: %x\n",
+				      loc);
+				err = -EINVAL;
+				goto err_out;
+			}
+		}
 	}
 	err = ixgbe_clsu32_build_input(input, mask, cls, field_ptr, NULL);
 	if (err)
@@ -8620,6 +8687,9 @@ static int ixgbe_configure_clsu32(struct ixgbe_adapter *adapter,
 		ixgbe_update_ethtool_fdir_entry(adapter, input, input->sw_idx);
 	spin_unlock(&adapter->fdir_perfect_lock);
 
+	if ((uhtid != 0x800) && (adapter->jump_tables[uhtid]))
+		set_bit(loc - 1, (adapter->jump_tables[uhtid])->child_loc_map);
+
 	kfree(mask);
 	return err;
 err_out_w_lock:
@@ -8781,9 +8851,7 @@ static int ixgbe_set_features(struct net_device *netdev,
 /**
  * ixgbe_add_vxlan_port - Get notifications about VXLAN ports that come up
  * @dev: The port's netdev
- * @sa_family: Socket Family that VXLAN is notifiying us about
- * @port: New UDP port number that VXLAN started listening to
- * @type: Enumerated type specifying UDP tunnel type
+ * @ti: Tunnel endpoint information
  **/
 static void ixgbe_add_vxlan_port(struct net_device *dev,
 				 struct udp_tunnel_info *ti)
@@ -8818,9 +8886,7 @@ static void ixgbe_add_vxlan_port(struct net_device *dev,
 /**
  * ixgbe_del_vxlan_port - Get notifications about VXLAN ports that go away
  * @dev: The port's netdev
- * @sa_family: Socket Family that VXLAN is notifying us about
- * @port: UDP port number that VXLAN stopped listening to
- * @type: Enumerated type specifying UDP tunnel type
+ * @ti: Tunnel endpoint information
  **/
 static void ixgbe_del_vxlan_port(struct net_device *dev,
 				 struct udp_tunnel_info *ti)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h
index a8bed3d887f7..538a1c5475b6 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h
@@ -42,8 +42,12 @@ struct ixgbe_jump_table {
 	struct ixgbe_mat_field *mat;
 	struct ixgbe_fdir_filter *input;
 	union ixgbe_atr_input *mask;
+	u32 link_hdl;
+	unsigned long child_loc_map[32];
 };
 
+#define IXGBE_MAX_HW_ENTRIES 2045
+
 static inline int ixgbe_mat_prgm_sip(struct ixgbe_fdir_filter *input,
 				     union ixgbe_atr_input *mask,
 				     u32 val, u32 m)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index c5caacdd193d..8618599dfd6f 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -954,6 +954,7 @@ static int ixgbe_set_vf_macvlan_msg(struct ixgbe_adapter *adapter,
 			struct ixgbe_hw *hw = &adapter->hw;
 
 			hw->mac.ops.set_mac_anti_spoofing(hw, false, vf);
+			hw->mac.ops.set_vlan_anti_spoofing(hw, false, vf);
 		}
 	}
 
diff --git a/drivers/net/ethernet/intel/ixgbevf/mbx.c b/drivers/net/ethernet/intel/ixgbevf/mbx.c
index 61a80da8b6f0..2819abc454c7 100644
--- a/drivers/net/ethernet/intel/ixgbevf/mbx.c
+++ b/drivers/net/ethernet/intel/ixgbevf/mbx.c
@@ -85,7 +85,7 @@ static s32 ixgbevf_poll_for_ack(struct ixgbe_hw *hw)
 static s32 ixgbevf_read_posted_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size)
 {
 	struct ixgbe_mbx_info *mbx = &hw->mbx;
-	s32 ret_val = -IXGBE_ERR_MBX;
+	s32 ret_val = IXGBE_ERR_MBX;
 
 	if (!mbx->ops.read)
 		goto out;
@@ -111,7 +111,7 @@ out:
 static s32 ixgbevf_write_posted_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size)
 {
 	struct ixgbe_mbx_info *mbx = &hw->mbx;
-	s32 ret_val = -IXGBE_ERR_MBX;
+	s32 ret_val = IXGBE_ERR_MBX;
 
 	/* exit if either we can't write or there isn't a defined timeout */
 	if (!mbx->ops.write || !mbx->timeout)
diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c
index 0d2f8e934c59..91e09d68b7e2 100644
--- a/drivers/net/ethernet/lantiq_etop.c
+++ b/drivers/net/ethernet/lantiq_etop.c
@@ -102,7 +102,6 @@ struct ltq_etop_priv {
 	struct resource *res;
 
 	struct mii_bus *mii_bus;
-	struct phy_device *phydev;
 
 	struct ltq_etop_chan ch[MAX_DMA_CHAN];
 	int tx_free[MAX_DMA_CHAN >> 1];
@@ -305,34 +304,16 @@ ltq_etop_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
 }
 
 static int
-ltq_etop_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct ltq_etop_priv *priv = netdev_priv(dev);
-
-	return phy_ethtool_gset(priv->phydev, cmd);
-}
-
-static int
-ltq_etop_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct ltq_etop_priv *priv = netdev_priv(dev);
-
-	return phy_ethtool_sset(priv->phydev, cmd);
-}
-
-static int
 ltq_etop_nway_reset(struct net_device *dev)
 {
-	struct ltq_etop_priv *priv = netdev_priv(dev);
-
-	return phy_start_aneg(priv->phydev);
+	return phy_start_aneg(dev->phydev);
 }
 
 static const struct ethtool_ops ltq_etop_ethtool_ops = {
 	.get_drvinfo = ltq_etop_get_drvinfo,
-	.get_settings = ltq_etop_get_settings,
-	.set_settings = ltq_etop_set_settings,
 	.nway_reset = ltq_etop_nway_reset,
+	.get_link_ksettings = phy_ethtool_get_link_ksettings,
+	.set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
 
 static int
@@ -401,7 +382,6 @@ ltq_etop_mdio_probe(struct net_device *dev)
 			      | SUPPORTED_TP);
 
 	phydev->advertising = phydev->supported;
-	priv->phydev = phydev;
 	phy_attached_info(phydev);
 
 	return 0;
@@ -450,7 +430,7 @@ ltq_etop_mdio_cleanup(struct net_device *dev)
 {
 	struct ltq_etop_priv *priv = netdev_priv(dev);
 
-	phy_disconnect(priv->phydev);
+	phy_disconnect(dev->phydev);
 	mdiobus_unregister(priv->mii_bus);
 	mdiobus_free(priv->mii_bus);
 }
@@ -469,7 +449,7 @@ ltq_etop_open(struct net_device *dev)
 		ltq_dma_open(&ch->dma);
 		napi_enable(&ch->napi);
 	}
-	phy_start(priv->phydev);
+	phy_start(dev->phydev);
 	netif_tx_start_all_queues(dev);
 	return 0;
 }
@@ -481,7 +461,7 @@ ltq_etop_stop(struct net_device *dev)
 	int i;
 
 	netif_tx_stop_all_queues(dev);
-	phy_stop(priv->phydev);
+	phy_stop(dev->phydev);
 	for (i = 0; i < MAX_DMA_CHAN; i++) {
 		struct ltq_etop_chan *ch = &priv->ch[i];
 
@@ -556,10 +536,8 @@ ltq_etop_change_mtu(struct net_device *dev, int new_mtu)
 static int
 ltq_etop_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
-	struct ltq_etop_priv *priv = netdev_priv(dev);
-
 	/* TODO: mii-toll reports "No MII transceiver present!." ?!*/
-	return phy_mii_ioctl(priv->phydev, rq, cmd);
+	return phy_mii_ioctl(dev->phydev, rq, cmd);
 }
 
 static int
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index a6d26d351dfc..d5d263bda333 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -3458,6 +3458,8 @@ static int mvneta_open(struct net_device *dev)
 	return 0;
 
 err_free_irq:
+	unregister_cpu_notifier(&pp->cpu_notifier);
+	on_each_cpu(mvneta_percpu_disable, pp, true);
 	free_percpu_irq(pp->dev->irq, pp->ports);
 err_cleanup_txqs:
 	mvneta_cleanup_txqs(pp);
diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index 868a957f24bb..0b047178cda1 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -699,7 +699,6 @@ struct mvpp2_port {
 	u16 rx_ring_size;
 	struct mvpp2_pcpu_stats __percpu *stats;
 
-	struct phy_device *phy_dev;
 	phy_interface_t phy_interface;
 	struct device_node *phy_node;
 	unsigned int link;
@@ -4850,7 +4849,7 @@ static irqreturn_t mvpp2_isr(int irq, void *dev_id)
 static void mvpp2_link_event(struct net_device *dev)
 {
 	struct mvpp2_port *port = netdev_priv(dev);
-	struct phy_device *phydev = port->phy_dev;
+	struct phy_device *phydev = dev->phydev;
 	int status_change = 0;
 	u32 val;
 
@@ -5416,6 +5415,8 @@ static int mvpp2_poll(struct napi_struct *napi, int budget)
 /* Set hw internals when starting port */
 static void mvpp2_start_dev(struct mvpp2_port *port)
 {
+	struct net_device *ndev = port->dev;
+
 	mvpp2_gmac_max_rx_size_set(port);
 	mvpp2_txp_max_tx_size_set(port);
 
@@ -5425,13 +5426,15 @@ static void mvpp2_start_dev(struct mvpp2_port *port)
 	mvpp2_interrupts_enable(port);
 
 	mvpp2_port_enable(port);
-	phy_start(port->phy_dev);
+	phy_start(ndev->phydev);
 	netif_tx_start_all_queues(port->dev);
 }
 
 /* Set hw internals when stopping port */
 static void mvpp2_stop_dev(struct mvpp2_port *port)
 {
+	struct net_device *ndev = port->dev;
+
 	/* Stop new packets from arriving to RXQs */
 	mvpp2_ingress_disable(port);
 
@@ -5447,7 +5450,7 @@ static void mvpp2_stop_dev(struct mvpp2_port *port)
 
 	mvpp2_egress_disable(port);
 	mvpp2_port_disable(port);
-	phy_stop(port->phy_dev);
+	phy_stop(ndev->phydev);
 }
 
 /* Return positive if MTU is valid */
@@ -5535,7 +5538,6 @@ static int mvpp2_phy_connect(struct mvpp2_port *port)
 	phy_dev->supported &= PHY_GBIT_FEATURES;
 	phy_dev->advertising = phy_dev->supported;
 
-	port->phy_dev = phy_dev;
 	port->link    = 0;
 	port->duplex  = 0;
 	port->speed   = 0;
@@ -5545,8 +5547,9 @@ static int mvpp2_phy_connect(struct mvpp2_port *port)
 
 static void mvpp2_phy_disconnect(struct mvpp2_port *port)
 {
-	phy_disconnect(port->phy_dev);
-	port->phy_dev = NULL;
+	struct net_device *ndev = port->dev;
+
+	phy_disconnect(ndev->phydev);
 }
 
 static int mvpp2_open(struct net_device *dev)
@@ -5796,13 +5799,12 @@ mvpp2_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 
 static int mvpp2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
-	struct mvpp2_port *port = netdev_priv(dev);
 	int ret;
 
-	if (!port->phy_dev)
+	if (!dev->phydev)
 		return -ENOTSUPP;
 
-	ret = phy_mii_ioctl(port->phy_dev, ifr, cmd);
+	ret = phy_mii_ioctl(dev->phydev, ifr, cmd);
 	if (!ret)
 		mvpp2_link_event(dev);
 
@@ -5811,28 +5813,6 @@ static int mvpp2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 
 /* Ethtool methods */
 
-/* Get settings (phy address, speed) for ethtools */
-static int mvpp2_ethtool_get_settings(struct net_device *dev,
-				      struct ethtool_cmd *cmd)
-{
-	struct mvpp2_port *port = netdev_priv(dev);
-
-	if (!port->phy_dev)
-		return -ENODEV;
-	return phy_ethtool_gset(port->phy_dev, cmd);
-}
-
-/* Set settings (phy address, speed) for ethtools */
-static int mvpp2_ethtool_set_settings(struct net_device *dev,
-				      struct ethtool_cmd *cmd)
-{
-	struct mvpp2_port *port = netdev_priv(dev);
-
-	if (!port->phy_dev)
-		return -ENODEV;
-	return phy_ethtool_sset(port->phy_dev, cmd);
-}
-
 /* Set interrupt coalescing for ethtools */
 static int mvpp2_ethtool_set_coalesce(struct net_device *dev,
 				      struct ethtool_coalesce *c)
@@ -5967,13 +5947,13 @@ static const struct net_device_ops mvpp2_netdev_ops = {
 
 static const struct ethtool_ops mvpp2_eth_tool_ops = {
 	.get_link	= ethtool_op_get_link,
-	.get_settings	= mvpp2_ethtool_get_settings,
-	.set_settings	= mvpp2_ethtool_set_settings,
 	.set_coalesce	= mvpp2_ethtool_set_coalesce,
 	.get_coalesce	= mvpp2_ethtool_get_coalesce,
 	.get_drvinfo	= mvpp2_ethtool_get_drvinfo,
 	.get_ringparam	= mvpp2_ethtool_get_ringparam,
 	.set_ringparam	= mvpp2_ethtool_set_ringparam,
+	.get_link_ksettings = phy_ethtool_get_link_ksettings,
+	.set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
 
 /* Driver initialization */
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 4763252bbf85..b57ae3afb994 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -76,8 +76,8 @@ static int mtk_mdio_busy_wait(struct mtk_eth *eth)
 	return -1;
 }
 
-u32 _mtk_mdio_write(struct mtk_eth *eth, u32 phy_addr,
-		    u32 phy_register, u32 write_data)
+static u32 _mtk_mdio_write(struct mtk_eth *eth, u32 phy_addr,
+			   u32 phy_register, u32 write_data)
 {
 	if (mtk_mdio_busy_wait(eth))
 		return -1;
@@ -95,7 +95,7 @@ u32 _mtk_mdio_write(struct mtk_eth *eth, u32 phy_addr,
 	return 0;
 }
 
-u32 _mtk_mdio_read(struct mtk_eth *eth, int phy_addr, int phy_reg)
+static u32 _mtk_mdio_read(struct mtk_eth *eth, int phy_addr, int phy_reg)
 {
 	u32 d;
 
@@ -328,22 +328,24 @@ static void mtk_mdio_cleanup(struct mtk_eth *eth)
 
 static inline void mtk_irq_disable(struct mtk_eth *eth, u32 mask)
 {
+	unsigned long flags;
 	u32 val;
 
+	spin_lock_irqsave(&eth->irq_lock, flags);
 	val = mtk_r32(eth, MTK_QDMA_INT_MASK);
 	mtk_w32(eth, val & ~mask, MTK_QDMA_INT_MASK);
-	/* flush write */
-	mtk_r32(eth, MTK_QDMA_INT_MASK);
+	spin_unlock_irqrestore(&eth->irq_lock, flags);
 }
 
 static inline void mtk_irq_enable(struct mtk_eth *eth, u32 mask)
 {
+	unsigned long flags;
 	u32 val;
 
+	spin_lock_irqsave(&eth->irq_lock, flags);
 	val = mtk_r32(eth, MTK_QDMA_INT_MASK);
 	mtk_w32(eth, val | mask, MTK_QDMA_INT_MASK);
-	/* flush write */
-	mtk_r32(eth, MTK_QDMA_INT_MASK);
+	spin_unlock_irqrestore(&eth->irq_lock, flags);
 }
 
 static int mtk_set_mac_address(struct net_device *dev, void *p)
@@ -481,20 +483,23 @@ static inline void mtk_rx_get_desc(struct mtk_rx_dma *rxd,
 /* the qdma core needs scratch memory to be setup */
 static int mtk_init_fq_dma(struct mtk_eth *eth)
 {
-	dma_addr_t phy_ring_head, phy_ring_tail;
+	dma_addr_t phy_ring_tail;
 	int cnt = MTK_DMA_SIZE;
 	dma_addr_t dma_addr;
 	int i;
 
 	eth->scratch_ring = dma_alloc_coherent(eth->dev,
 					       cnt * sizeof(struct mtk_tx_dma),
-					       &phy_ring_head,
+					       &eth->phy_scratch_ring,
 					       GFP_ATOMIC | __GFP_ZERO);
 	if (unlikely(!eth->scratch_ring))
 		return -ENOMEM;
 
 	eth->scratch_head = kcalloc(cnt, MTK_QDMA_PAGE_SIZE,
 				    GFP_KERNEL);
+	if (unlikely(!eth->scratch_head))
+		return -ENOMEM;
+
 	dma_addr = dma_map_single(eth->dev,
 				  eth->scratch_head, cnt * MTK_QDMA_PAGE_SIZE,
 				  DMA_FROM_DEVICE);
@@ -502,19 +507,19 @@ static int mtk_init_fq_dma(struct mtk_eth *eth)
 		return -ENOMEM;
 
 	memset(eth->scratch_ring, 0x0, sizeof(struct mtk_tx_dma) * cnt);
-	phy_ring_tail = phy_ring_head +
+	phy_ring_tail = eth->phy_scratch_ring +
 			(sizeof(struct mtk_tx_dma) * (cnt - 1));
 
 	for (i = 0; i < cnt; i++) {
 		eth->scratch_ring[i].txd1 =
 					(dma_addr + (i * MTK_QDMA_PAGE_SIZE));
 		if (i < cnt - 1)
-			eth->scratch_ring[i].txd2 = (phy_ring_head +
+			eth->scratch_ring[i].txd2 = (eth->phy_scratch_ring +
 				((i + 1) * sizeof(struct mtk_tx_dma)));
 		eth->scratch_ring[i].txd3 = TX_DMA_SDL(MTK_QDMA_PAGE_SIZE);
 	}
 
-	mtk_w32(eth, phy_ring_head, MTK_QDMA_FQ_HEAD);
+	mtk_w32(eth, eth->phy_scratch_ring, MTK_QDMA_FQ_HEAD);
 	mtk_w32(eth, phy_ring_tail, MTK_QDMA_FQ_TAIL);
 	mtk_w32(eth, (cnt << 16) | cnt, MTK_QDMA_FQ_CNT);
 	mtk_w32(eth, MTK_QDMA_PAGE_SIZE << 16, MTK_QDMA_FQ_BLEN);
@@ -671,7 +676,7 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
 
 err_dma:
 	do {
-		tx_buf = mtk_desc_to_tx_buf(ring, txd);
+		tx_buf = mtk_desc_to_tx_buf(ring, itxd);
 
 		/* unmap dma */
 		mtk_tx_unmap(&dev->dev, tx_buf);
@@ -701,6 +706,20 @@ static inline int mtk_cal_txd_req(struct sk_buff *skb)
 	return nfrags;
 }
 
+static int mtk_queue_stopped(struct mtk_eth *eth)
+{
+	int i;
+
+	for (i = 0; i < MTK_MAC_COUNT; i++) {
+		if (!eth->netdev[i])
+			continue;
+		if (netif_queue_stopped(eth->netdev[i]))
+			return 1;
+	}
+
+	return 0;
+}
+
 static void mtk_wake_queue(struct mtk_eth *eth)
 {
 	int i;
@@ -766,12 +785,9 @@ static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (mtk_tx_map(skb, dev, tx_num, ring, gso) < 0)
 		goto drop;
 
-	if (unlikely(atomic_read(&ring->free_count) <= ring->thresh)) {
+	if (unlikely(atomic_read(&ring->free_count) <= ring->thresh))
 		mtk_stop_queue(eth);
-		if (unlikely(atomic_read(&ring->free_count) >
-			     ring->thresh))
-			mtk_wake_queue(eth);
-	}
+
 	spin_unlock_irqrestore(&eth->page_lock, flags);
 
 	return NETDEV_TX_OK;
@@ -784,7 +800,7 @@ drop:
 }
 
 static int mtk_poll_rx(struct napi_struct *napi, int budget,
-		       struct mtk_eth *eth, u32 rx_intr)
+		       struct mtk_eth *eth)
 {
 	struct mtk_rx_ring *ring = &eth->rx_ring;
 	int idx = ring->calc_idx;
@@ -826,6 +842,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
 					  DMA_FROM_DEVICE);
 		if (unlikely(dma_mapping_error(&netdev->dev, dma_addr))) {
 			skb_free_frag(new_data);
+			netdev->stats.rx_dropped++;
 			goto release_desc;
 		}
 
@@ -833,6 +850,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
 		skb = build_skb(data, ring->frag_size);
 		if (unlikely(!skb)) {
 			put_page(virt_to_head_page(new_data));
+			netdev->stats.rx_dropped++;
 			goto release_desc;
 		}
 		skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
@@ -870,22 +888,22 @@ release_desc:
 	}
 
 	if (done < budget)
-		mtk_w32(eth, rx_intr, MTK_QMTK_INT_STATUS);
+		mtk_w32(eth, MTK_RX_DONE_INT, MTK_QMTK_INT_STATUS);
 
 	return done;
 }
 
-static int mtk_poll_tx(struct mtk_eth *eth, int budget, bool *tx_again)
+static int mtk_poll_tx(struct mtk_eth *eth, int budget)
 {
 	struct mtk_tx_ring *ring = &eth->tx_ring;
 	struct mtk_tx_dma *desc;
 	struct sk_buff *skb;
 	struct mtk_tx_buf *tx_buf;
-	int total = 0, done[MTK_MAX_DEVS];
+	unsigned int done[MTK_MAX_DEVS];
 	unsigned int bytes[MTK_MAX_DEVS];
 	u32 cpu, dma;
 	static int condition;
-	int i;
+	int total = 0, i;
 
 	memset(done, 0, sizeof(done));
 	memset(bytes, 0, sizeof(bytes));
@@ -921,7 +939,6 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget, bool *tx_again)
 		}
 		mtk_tx_unmap(eth->dev, tx_buf);
 
-		ring->last_free->txd2 = next_cpu;
 		ring->last_free = desc;
 		atomic_inc(&ring->free_count);
 
@@ -937,64 +954,82 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget, bool *tx_again)
 		total += done[i];
 	}
 
-	/* read hw index again make sure no new tx packet */
-	if (cpu != dma || cpu != mtk_r32(eth, MTK_QTX_DRX_PTR))
-		*tx_again = true;
-	else
-		mtk_w32(eth, MTK_TX_DONE_INT, MTK_QMTK_INT_STATUS);
-
-	if (!total)
-		return 0;
-
-	if (atomic_read(&ring->free_count) > ring->thresh)
+	if (mtk_queue_stopped(eth) &&
+	    (atomic_read(&ring->free_count) > ring->thresh))
 		mtk_wake_queue(eth);
 
 	return total;
 }
 
-static int mtk_poll(struct napi_struct *napi, int budget)
+static void mtk_handle_status_irq(struct mtk_eth *eth)
 {
-	struct mtk_eth *eth = container_of(napi, struct mtk_eth, rx_napi);
-	u32 status, status2, mask, tx_intr, rx_intr, status_intr;
-	int tx_done, rx_done;
-	bool tx_again = false;
+	u32 status2 = mtk_r32(eth, MTK_INT_STATUS2);
 
-	status = mtk_r32(eth, MTK_QMTK_INT_STATUS);
-	status2 = mtk_r32(eth, MTK_INT_STATUS2);
-	tx_intr = MTK_TX_DONE_INT;
-	rx_intr = MTK_RX_DONE_INT;
-	status_intr = (MTK_GDM1_AF | MTK_GDM2_AF);
-	tx_done = 0;
-	rx_done = 0;
-	tx_again = 0;
+	if (unlikely(status2 & (MTK_GDM1_AF | MTK_GDM2_AF))) {
+		mtk_stats_update(eth);
+		mtk_w32(eth, (MTK_GDM1_AF | MTK_GDM2_AF),
+			MTK_INT_STATUS2);
+	}
+}
 
-	if (status & tx_intr)
-		tx_done = mtk_poll_tx(eth, budget, &tx_again);
+static int mtk_napi_tx(struct napi_struct *napi, int budget)
+{
+	struct mtk_eth *eth = container_of(napi, struct mtk_eth, tx_napi);
+	u32 status, mask;
+	int tx_done = 0;
 
-	if (status & rx_intr)
-		rx_done = mtk_poll_rx(napi, budget, eth, rx_intr);
+	mtk_handle_status_irq(eth);
+	mtk_w32(eth, MTK_TX_DONE_INT, MTK_QMTK_INT_STATUS);
+	tx_done = mtk_poll_tx(eth, budget);
 
-	if (unlikely(status2 & status_intr)) {
-		mtk_stats_update(eth);
-		mtk_w32(eth, status_intr, MTK_INT_STATUS2);
+	if (unlikely(netif_msg_intr(eth))) {
+		status = mtk_r32(eth, MTK_QMTK_INT_STATUS);
+		mask = mtk_r32(eth, MTK_QDMA_INT_MASK);
+		dev_info(eth->dev,
+			 "done tx %d, intr 0x%08x/0x%x\n",
+			 tx_done, status, mask);
 	}
 
+	if (tx_done == budget)
+		return budget;
+
+	status = mtk_r32(eth, MTK_QMTK_INT_STATUS);
+	if (status & MTK_TX_DONE_INT)
+		return budget;
+
+	napi_complete(napi);
+	mtk_irq_enable(eth, MTK_TX_DONE_INT);
+
+	return tx_done;
+}
+
+static int mtk_napi_rx(struct napi_struct *napi, int budget)
+{
+	struct mtk_eth *eth = container_of(napi, struct mtk_eth, rx_napi);
+	u32 status, mask;
+	int rx_done = 0;
+
+	mtk_handle_status_irq(eth);
+	mtk_w32(eth, MTK_RX_DONE_INT, MTK_QMTK_INT_STATUS);
+	rx_done = mtk_poll_rx(napi, budget, eth);
+
 	if (unlikely(netif_msg_intr(eth))) {
+		status = mtk_r32(eth, MTK_QMTK_INT_STATUS);
 		mask = mtk_r32(eth, MTK_QDMA_INT_MASK);
-		netdev_info(eth->netdev[0],
-			    "done tx %d, rx %d, intr 0x%08x/0x%x\n",
-			    tx_done, rx_done, status, mask);
+		dev_info(eth->dev,
+			 "done rx %d, intr 0x%08x/0x%x\n",
+			 rx_done, status, mask);
 	}
 
-	if (tx_again || rx_done == budget)
+	if (rx_done == budget)
 		return budget;
 
 	status = mtk_r32(eth, MTK_QMTK_INT_STATUS);
-	if (status & (tx_intr | rx_intr))
+	if (status & MTK_RX_DONE_INT)
 		return budget;
 
 	napi_complete(napi);
-	mtk_irq_enable(eth, tx_intr | rx_intr);
+	mtk_irq_enable(eth, MTK_RX_DONE_INT);
 
 	return rx_done;
 }
@@ -1027,9 +1062,8 @@ static int mtk_tx_alloc(struct mtk_eth *eth)
 
 	atomic_set(&ring->free_count, MTK_DMA_SIZE - 2);
 	ring->next_free = &ring->dma[0];
-	ring->last_free = &ring->dma[MTK_DMA_SIZE - 2];
-	ring->thresh = max((unsigned long)MTK_DMA_SIZE >> 2,
-			      MAX_SKB_FRAGS);
+	ring->last_free = &ring->dma[MTK_DMA_SIZE - 1];
+	ring->thresh = MAX_SKB_FRAGS;
 
 	/* make sure that all changes to the dma ring are flushed before we
 	 * continue
@@ -1207,6 +1241,14 @@ static void mtk_dma_free(struct mtk_eth *eth)
 	for (i = 0; i < MTK_MAC_COUNT; i++)
 		if (eth->netdev[i])
 			netdev_reset_queue(eth->netdev[i]);
+	if (eth->scratch_ring) {
+		dma_free_coherent(eth->dev,
+				  MTK_DMA_SIZE * sizeof(struct mtk_tx_dma),
+				  eth->scratch_ring,
+				  eth->phy_scratch_ring);
+		eth->scratch_ring = NULL;
+		eth->phy_scratch_ring = 0;
+	}
 	mtk_tx_clean(eth);
 	mtk_rx_clean(eth);
 	kfree(eth->scratch_head);
@@ -1223,22 +1265,26 @@ static void mtk_tx_timeout(struct net_device *dev)
 	schedule_work(&eth->pending_work);
 }
 
-static irqreturn_t mtk_handle_irq(int irq, void *_eth)
+static irqreturn_t mtk_handle_irq_rx(int irq, void *_eth)
 {
 	struct mtk_eth *eth = _eth;
-	u32 status;
 
-	status = mtk_r32(eth, MTK_QMTK_INT_STATUS);
-	if (unlikely(!status))
-		return IRQ_NONE;
+	if (likely(napi_schedule_prep(&eth->rx_napi))) {
+		__napi_schedule(&eth->rx_napi);
+		mtk_irq_disable(eth, MTK_RX_DONE_INT);
+	}
 
-	if (likely(status & (MTK_RX_DONE_INT | MTK_TX_DONE_INT))) {
-		if (likely(napi_schedule_prep(&eth->rx_napi)))
-			__napi_schedule(&eth->rx_napi);
-	} else {
-		mtk_w32(eth, status, MTK_QMTK_INT_STATUS);
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t mtk_handle_irq_tx(int irq, void *_eth)
+{
+	struct mtk_eth *eth = _eth;
+
+	if (likely(napi_schedule_prep(&eth->tx_napi))) {
+		__napi_schedule(&eth->tx_napi);
+		mtk_irq_disable(eth, MTK_TX_DONE_INT);
 	}
-	mtk_irq_disable(eth, (MTK_RX_DONE_INT | MTK_TX_DONE_INT));
 
 	return IRQ_HANDLED;
 }
@@ -1251,7 +1297,7 @@ static void mtk_poll_controller(struct net_device *dev)
 	u32 int_mask = MTK_TX_DONE_INT | MTK_RX_DONE_INT;
 
 	mtk_irq_disable(eth, int_mask);
-	mtk_handle_irq(dev->irq, dev);
+	mtk_handle_irq_rx(eth->irq[2], dev);
 	mtk_irq_enable(eth, int_mask);
 }
 #endif
@@ -1269,7 +1315,7 @@ static int mtk_start_dma(struct mtk_eth *eth)
 	mtk_w32(eth,
 		MTK_TX_WB_DDONE | MTK_RX_DMA_EN | MTK_TX_DMA_EN |
 		MTK_RX_2B_OFFSET | MTK_DMA_SIZE_16DWORDS |
-		MTK_RX_BT_32DWORDS,
+		MTK_RX_BT_32DWORDS | MTK_NDP_CO_PRO,
 		MTK_QDMA_GLO_CFG);
 
 	return 0;
@@ -1287,6 +1333,7 @@ static int mtk_open(struct net_device *dev)
 		if (err)
 			return err;
 
+		napi_enable(&eth->tx_napi);
 		napi_enable(&eth->rx_napi);
 		mtk_irq_enable(eth, MTK_TX_DONE_INT | MTK_RX_DONE_INT);
 	}
@@ -1335,6 +1382,7 @@ static int mtk_stop(struct net_device *dev)
 		return 0;
 
 	mtk_irq_disable(eth, MTK_TX_DONE_INT | MTK_RX_DONE_INT);
+	napi_disable(&eth->tx_napi);
 	napi_disable(&eth->rx_napi);
 
 	mtk_stop_dma(eth, MTK_QDMA_GLO_CFG);
@@ -1372,7 +1420,11 @@ static int __init mtk_hw_init(struct mtk_eth *eth)
 	/* Enable RX VLan Offloading */
 	mtk_w32(eth, 1, MTK_CDMP_EG_CTRL);
 
-	err = devm_request_irq(eth->dev, eth->irq, mtk_handle_irq, 0,
+	err = devm_request_irq(eth->dev, eth->irq[1], mtk_handle_irq_tx, 0,
+			       dev_name(eth->dev), eth);
+	if (err)
+		return err;
+	err = devm_request_irq(eth->dev, eth->irq[2], mtk_handle_irq_rx, 0,
 			       dev_name(eth->dev), eth);
 	if (err)
 		return err;
@@ -1383,12 +1435,16 @@ static int __init mtk_hw_init(struct mtk_eth *eth)
 
 	/* disable delay and normal interrupt */
 	mtk_w32(eth, 0, MTK_QDMA_DELAY_INT);
-	mtk_irq_disable(eth, MTK_TX_DONE_INT | MTK_RX_DONE_INT);
+	mtk_irq_disable(eth, ~0);
 	mtk_w32(eth, RST_GL_PSE, MTK_RST_GL);
 	mtk_w32(eth, 0, MTK_RST_GL);
 
 	/* FE int grouping */
-	mtk_w32(eth, 0, MTK_FE_INT_GRP);
+	mtk_w32(eth, MTK_TX_DONE_INT, MTK_PDMA_INT_GRP1);
+	mtk_w32(eth, MTK_RX_DONE_INT, MTK_PDMA_INT_GRP2);
+	mtk_w32(eth, MTK_TX_DONE_INT, MTK_QDMA_INT_GRP1);
+	mtk_w32(eth, MTK_RX_DONE_INT, MTK_QDMA_INT_GRP2);
+	mtk_w32(eth, 0x21021000, MTK_FE_INT_GRP);
 
 	for (i = 0; i < 2; i++) {
 		u32 val = mtk_r32(eth, MTK_GDMA_FWD_CFG(i));
@@ -1436,7 +1492,8 @@ static void mtk_uninit(struct net_device *dev)
 	phy_disconnect(mac->phy_dev);
 	mtk_mdio_cleanup(eth);
 	mtk_irq_disable(eth, ~0);
-	free_irq(dev->irq, dev);
+	free_irq(eth->irq[1], dev);
+	free_irq(eth->irq[2], dev);
 }
 
 static int mtk_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
@@ -1697,7 +1754,7 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
 	mac->hw_stats->reg_offset = id * MTK_STAT_OFFSET;
 
 	SET_NETDEV_DEV(eth->netdev[id], eth->dev);
-	eth->netdev[id]->watchdog_timeo = HZ;
+	eth->netdev[id]->watchdog_timeo = 5 * HZ;
 	eth->netdev[id]->netdev_ops = &mtk_netdev_ops;
 	eth->netdev[id]->base_addr = (unsigned long)eth->base;
 	eth->netdev[id]->vlan_features = MTK_HW_FEATURES &
@@ -1710,10 +1767,10 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
 		dev_err(eth->dev, "error bringing up device\n");
 		goto free_netdev;
 	}
-	eth->netdev[id]->irq = eth->irq;
+	eth->netdev[id]->irq = eth->irq[0];
 	netif_info(eth, probe, eth->netdev[id],
 		   "mediatek frame engine at 0x%08lx, irq %d\n",
-		   eth->netdev[id]->base_addr, eth->netdev[id]->irq);
+		   eth->netdev[id]->base_addr, eth->irq[0]);
 
 	return 0;
 
@@ -1730,6 +1787,7 @@ static int mtk_probe(struct platform_device *pdev)
 	struct mtk_soc_data *soc;
 	struct mtk_eth *eth;
 	int err;
+	int i;
 
 	match = of_match_device(of_mtk_match, &pdev->dev);
 	soc = (struct mtk_soc_data *)match->data;
@@ -1743,6 +1801,7 @@ static int mtk_probe(struct platform_device *pdev)
 		return PTR_ERR(eth->base);
 
 	spin_lock_init(&eth->page_lock);
+	spin_lock_init(&eth->irq_lock);
 
 	eth->ethsys = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
 						      "mediatek,ethsys");
@@ -1764,10 +1823,12 @@ static int mtk_probe(struct platform_device *pdev)
 		return PTR_ERR(eth->rstc);
 	}
 
-	eth->irq = platform_get_irq(pdev, 0);
-	if (eth->irq < 0) {
-		dev_err(&pdev->dev, "no IRQ resource found\n");
-		return -ENXIO;
+	for (i = 0; i < 3; i++) {
+		eth->irq[i] = platform_get_irq(pdev, i);
+		if (eth->irq[i] < 0) {
+			dev_err(&pdev->dev, "no IRQ%d resource found\n", i);
+			return -ENXIO;
+		}
 	}
 
 	eth->clk_ethif = devm_clk_get(&pdev->dev, "ethif");
@@ -1808,7 +1869,9 @@ static int mtk_probe(struct platform_device *pdev)
 	 * for NAPI to work
 	 */
 	init_dummy_netdev(&eth->dummy_dev);
-	netif_napi_add(&eth->dummy_dev, &eth->rx_napi, mtk_poll,
+	netif_napi_add(&eth->dummy_dev, &eth->tx_napi, mtk_napi_tx,
+		       MTK_NAPI_WEIGHT);
+	netif_napi_add(&eth->dummy_dev, &eth->rx_napi, mtk_napi_rx,
 		       MTK_NAPI_WEIGHT);
 
 	platform_set_drvdata(pdev, eth);
@@ -1829,6 +1892,7 @@ static int mtk_remove(struct platform_device *pdev)
 	clk_disable_unprepare(eth->clk_gp1);
 	clk_disable_unprepare(eth->clk_gp2);
 
+	netif_napi_del(&eth->tx_napi);
 	netif_napi_del(&eth->rx_napi);
 	mtk_cleanup(eth);
 	platform_set_drvdata(pdev, NULL);
@@ -1846,7 +1910,6 @@ static struct platform_driver mtk_driver = {
 	.remove = mtk_remove,
 	.driver = {
 		.name = "mtk_soc_eth",
-		.owner = THIS_MODULE,
 		.of_match_table = of_mtk_match,
 	},
 };
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index eed626d56ea4..f82e3acb947b 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -68,6 +68,10 @@
 /* Unicast Filter MAC Address Register - High */
 #define MTK_GDMA_MAC_ADRH(x)	(0x50C + (x * 0x1000))
 
+/* PDMA Interrupt grouping registers */
+#define MTK_PDMA_INT_GRP1	0xa50
+#define MTK_PDMA_INT_GRP2	0xa54
+
 /* QDMA TX Queue Configuration Registers */
 #define MTK_QTX_CFG(x)		(0x1800 + (x * 0x10))
 #define QDMA_RES_THRES		4
@@ -91,6 +95,7 @@
 #define MTK_QDMA_GLO_CFG	0x1A04
 #define MTK_RX_2B_OFFSET	BIT(31)
 #define MTK_RX_BT_32DWORDS	(3 << 11)
+#define MTK_NDP_CO_PRO		BIT(10)
 #define MTK_TX_WB_DDONE		BIT(6)
 #define MTK_DMA_SIZE_16DWORDS	(2 << 4)
 #define MTK_RX_DMA_BUSY		BIT(3)
@@ -124,6 +129,11 @@
 #define MTK_TX_DONE_INT		(MTK_TX_DONE_INT0 | MTK_TX_DONE_INT1 | \
 				 MTK_TX_DONE_INT2 | MTK_TX_DONE_INT3)
 
+/* QDMA Interrupt grouping registers */
+#define MTK_QDMA_INT_GRP1	0x1a20
+#define MTK_QDMA_INT_GRP2	0x1a24
+#define MTK_RLS_DONE_INT	BIT(0)
+
 /* QDMA Interrupt Status Register */
 #define MTK_QDMA_INT_MASK	0x1A1C
 
@@ -355,8 +365,10 @@ struct mtk_rx_ring {
  * @dma_refcnt:		track how many netdevs are using the DMA engine
  * @tx_ring:		Pointer to the memore holding info about the TX ring
  * @rx_ring:		Pointer to the memore holding info about the RX ring
- * @rx_napi:		The NAPI struct
+ * @tx_napi:		The TX NAPI struct
+ * @rx_napi:		The RX NAPI struct
  * @scratch_ring:	Newer SoCs need memory for a second HW managed TX ring
+ * @phy_scratch_ring:	physical address of scratch_ring
  * @scratch_head:	The scratch memory that scratch_ring points to.
  * @clk_ethif:		The ethif clock
  * @clk_esw:		The switch clock
@@ -371,10 +383,11 @@ struct mtk_eth {
 	void __iomem			*base;
 	struct reset_control		*rstc;
 	spinlock_t			page_lock;
+	spinlock_t			irq_lock;
 	struct net_device		dummy_dev;
 	struct net_device		*netdev[MTK_MAX_DEVS];
 	struct mtk_mac			*mac[MTK_MAX_DEVS];
-	int				irq;
+	int				irq[3];
 	u32				msg_enable;
 	unsigned long			sysclk;
 	struct regmap			*ethsys;
@@ -382,8 +395,10 @@ struct mtk_eth {
 	atomic_t			dma_refcnt;
 	struct mtk_tx_ring		tx_ring;
 	struct mtk_rx_ring		rx_ring;
+	struct napi_struct		tx_napi;
 	struct napi_struct		rx_napi;
 	struct mtk_tx_dma		*scratch_ring;
+	dma_addr_t			phy_scratch_ring;
 	void				*scratch_head;
 	struct clk			*clk_ethif;
 	struct clk			*clk_esw;
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index e94ca1c3fc7c..f04a423ff79d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -2597,7 +2597,6 @@ int mlx4_cmd_use_events(struct mlx4_dev *dev)
 	priv->cmd.free_head = 0;
 
 	sema_init(&priv->cmd.event_sem, priv->cmd.max_cmds);
-	spin_lock_init(&priv->cmd.context_lock);
 
 	for (priv->cmd.token_mask = 1;
 	     priv->cmd.token_mask < priv->cmd.max_cmds;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index d42083a8a104..6083775dae16 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -417,14 +417,18 @@ static int mlx4_en_vlan_rx_add_vid(struct net_device *dev,
 	mutex_lock(&mdev->state_lock);
 	if (mdev->device_up && priv->port_up) {
 		err = mlx4_SET_VLAN_FLTR(mdev->dev, priv);
-		if (err)
+		if (err) {
 			en_err(priv, "Failed configuring VLAN filter\n");
+			goto out;
+		}
 	}
-	if (mlx4_register_vlan(mdev->dev, priv->port, vid, &idx))
-		en_dbg(HW, priv, "failed adding vlan %d\n", vid);
-	mutex_unlock(&mdev->state_lock);
+	err = mlx4_register_vlan(mdev->dev, priv->port, vid, &idx);
+	if (err)
+		en_dbg(HW, priv, "Failed adding vlan %d\n", vid);
 
-	return 0;
+out:
+	mutex_unlock(&mdev->state_lock);
+	return err;
 }
 
 static int mlx4_en_vlan_rx_kill_vid(struct net_device *dev,
@@ -432,7 +436,7 @@ static int mlx4_en_vlan_rx_kill_vid(struct net_device *dev,
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_en_dev *mdev = priv->mdev;
-	int err;
+	int err = 0;
 
 	en_dbg(HW, priv, "Killing VID:%d\n", vid);
 
@@ -449,7 +453,7 @@ static int mlx4_en_vlan_rx_kill_vid(struct net_device *dev,
 	}
 	mutex_unlock(&mdev->state_lock);
 
-	return 0;
+	return err;
 }
 
 static void mlx4_en_u64_to_mac(unsigned char dst_mac[ETH_ALEN + 2], u64 src_mac)
@@ -2042,11 +2046,20 @@ err:
 	return -ENOMEM;
 }
 
+static void mlx4_en_shutdown(struct net_device *dev)
+{
+	rtnl_lock();
+	netif_device_detach(dev);
+	mlx4_en_close(dev);
+	rtnl_unlock();
+}
 
 void mlx4_en_destroy_netdev(struct net_device *dev)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_en_dev *mdev = priv->mdev;
+	bool shutdown = mdev->dev->persist->interface_state &
+					    MLX4_INTERFACE_STATE_SHUTDOWN;
 
 	en_dbg(DRV, priv, "Destroying netdev on port:%d\n", priv->port);
 
@@ -2054,7 +2067,10 @@ void mlx4_en_destroy_netdev(struct net_device *dev)
 	if (priv->registered) {
 		devlink_port_type_clear(mlx4_get_devlink_port(mdev->dev,
 							      priv->port));
-		unregister_netdev(dev);
+		if (shutdown)
+			mlx4_en_shutdown(dev);
+		else
+			unregister_netdev(dev);
 	}
 
 	if (priv->allocated)
@@ -2079,7 +2095,8 @@ void mlx4_en_destroy_netdev(struct net_device *dev)
 	kfree(priv->tx_ring);
 	kfree(priv->tx_cq);
 
-	free_netdev(dev);
+	if (!shutdown)
+		free_netdev(dev);
 }
 
 static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu)
@@ -2464,9 +2481,14 @@ static netdev_features_t mlx4_en_features_check(struct sk_buff *skb,
 	 * strip that feature if this is an IPv6 encapsulated frame.
 	 */
 	if (skb->encapsulation &&
-	    (skb->ip_summed == CHECKSUM_PARTIAL) &&
-	    (ip_hdr(skb)->version != 4))
-		features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
+	    (skb->ip_summed == CHECKSUM_PARTIAL)) {
+		struct mlx4_en_priv *priv = netdev_priv(dev);
+
+		if (!priv->vxlan_port ||
+		    (ip_hdr(skb)->version != 4) ||
+		    (udp_hdr(skb)->dest != priv->vxlan_port))
+			features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
+	}
 
 	return features;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c
index dec77d6f0ac9..7ae1cdad9bf0 100644
--- a/drivers/net/ethernet/mellanox/mlx4/intf.c
+++ b/drivers/net/ethernet/mellanox/mlx4/intf.c
@@ -147,7 +147,7 @@ int mlx4_do_bond(struct mlx4_dev *dev, bool enable)
 	if (enable) {
 		dev->flags |= MLX4_FLAG_BONDED;
 	} else {
-		 ret = mlx4_virt2phy_port_map(dev, 1, 2);
+		ret = mlx4_virt2phy_port_map(dev, 1, 2);
 		if (ret) {
 			mlx4_err(dev, "Fail to reset port map\n");
 			return ret;
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 3564aad778a3..75dd2e3d3059 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -2600,7 +2600,7 @@ static int mlx4_setup_hca(struct mlx4_dev *dev)
 	err = mlx4_init_uar_table(dev);
 	if (err) {
 		mlx4_err(dev, "Failed to initialize user access region table, aborting\n");
-		 return err;
+		return err;
 	}
 
 	err = mlx4_uar_alloc(dev, &priv->driver_uar);
@@ -3223,6 +3223,7 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
 
 	INIT_LIST_HEAD(&priv->pgdir_list);
 	mutex_init(&priv->pgdir_mutex);
+	spin_lock_init(&priv->cmd.context_lock);
 
 	INIT_LIST_HEAD(&priv->bf_list);
 	mutex_init(&priv->bf_mutex);
@@ -4135,8 +4136,11 @@ static void mlx4_shutdown(struct pci_dev *pdev)
 
 	mlx4_info(persist->dev, "mlx4_shutdown was called\n");
 	mutex_lock(&persist->interface_state_mutex);
-	if (persist->interface_state & MLX4_INTERFACE_STATE_UP)
+	if (persist->interface_state & MLX4_INTERFACE_STATE_UP) {
+		/* Notify mlx4 clients that the kernel is being shut down */
+		persist->interface_state |= MLX4_INTERFACE_STATE_SHUTDOWN;
 		mlx4_unload_one(pdev);
+	}
 	mutex_unlock(&persist->interface_state_mutex);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c
index f2d0920018a5..94b891c118c1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mcg.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c
@@ -618,8 +618,8 @@ static int remove_promisc_qp(struct mlx4_dev *dev, u8 port,
 				err = mlx4_READ_ENTRY(dev,
 						      entry->index,
 						      mailbox);
-					if (err)
-						goto out_mailbox;
+				if (err)
+					goto out_mailbox;
 				members_count =
 					be32_to_cpu(mgm->members_count) &
 					0xffffff;
@@ -657,8 +657,8 @@ static int remove_promisc_qp(struct mlx4_dev *dev, u8 port,
 				err = mlx4_WRITE_ENTRY(dev,
 						       entry->index,
 						       mailbox);
-					if (err)
-						goto out_mailbox;
+				if (err)
+					goto out_mailbox;
 			}
 		}
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c
index 93195191f45b..395b5463cfd9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mr.c
@@ -248,7 +248,7 @@ static void mlx4_free_mtt_range(struct mlx4_dev *dev, u32 offset, int order)
 				  offset, order);
 		return;
 	}
-	 __mlx4_free_mtt_range(dev, offset, order);
+	__mlx4_free_mtt_range(dev, offset, order);
 }
 
 void mlx4_mtt_cleanup(struct mlx4_dev *dev, struct mlx4_mtt *mtt)
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index cd9b2b28df88..8b81114bdc72 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -2372,16 +2372,15 @@ static int mpt_free_res(struct mlx4_dev *dev, int slave, int op, int cmd,
 		__mlx4_mpt_release(dev, index);
 		break;
 	case RES_OP_MAP_ICM:
-			index = get_param_l(&in_param);
-			id = index & mpt_mask(dev);
-			err = mr_res_start_move_to(dev, slave, id,
-						   RES_MPT_RESERVED, &mpt);
-			if (err)
-				return err;
-
-			__mlx4_mpt_free_icm(dev, mpt->key);
-			res_end_move(dev, slave, RES_MPT, id);
+		index = get_param_l(&in_param);
+		id = index & mpt_mask(dev);
+		err = mr_res_start_move_to(dev, slave, id,
+					   RES_MPT_RESERVED, &mpt);
+		if (err)
 			return err;
+
+		__mlx4_mpt_free_icm(dev, mpt->key);
+		res_end_move(dev, slave, RES_MPT, id);
 		break;
 	default:
 		err = -EINVAL;
@@ -4253,9 +4252,8 @@ int mlx4_UPDATE_QP_wrapper(struct mlx4_dev *dev, int slave,
 	     (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB)) &&
 		!(dev->caps.flags2 &
 		  MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB)) {
-			mlx4_warn(dev,
-				  "Src check LB for slave %d isn't supported\n",
-				   slave);
+		mlx4_warn(dev, "Src check LB for slave %d isn't supported\n",
+			  slave);
 		return -ENOTSUPP;
 	}
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index 1cf722eba607..aae46884bf93 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -4,6 +4,7 @@
 
 config MLX5_CORE
 	tristate "Mellanox Technologies ConnectX-4 and Connect-IB core driver"
+	depends on MAY_USE_DEVLINK
 	depends on PCI
 	default n
 	---help---
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index c4f450f1c658..05cc1effc13c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -5,9 +5,9 @@ mlx5_core-y :=	main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
 		mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
 		fs_counters.o rl.o
 
-mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o \
-		en_main.o en_fs.o en_ethtool.o en_tx.o en_rx.o \
-		en_rx_am.o en_txrx.o en_clock.o vxlan.o en_tc.o \
-		en_arfs.o
+mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \
+		en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \
+		en_rx.o en_rx_am.o en_txrx.o en_clock.o vxlan.o \
+		en_tc.o en_arfs.o en_rep.o en_fs_ethtool.o
 
 mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) +=  en_dcbnl.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index dcd2df6518de..d6e2a1cae19a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -295,6 +295,12 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 	case MLX5_CMD_OP_DESTROY_FLOW_GROUP:
 	case MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY:
 	case MLX5_CMD_OP_DEALLOC_FLOW_COUNTER:
+	case MLX5_CMD_OP_2ERR_QP:
+	case MLX5_CMD_OP_2RST_QP:
+	case MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT:
+	case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
+	case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
+	case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT:
 		return MLX5_CMD_STAT_OK;
 
 	case MLX5_CMD_OP_QUERY_HCA_CAP:
@@ -321,8 +327,6 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 	case MLX5_CMD_OP_RTR2RTS_QP:
 	case MLX5_CMD_OP_RTS2RTS_QP:
 	case MLX5_CMD_OP_SQERR2RTS_QP:
-	case MLX5_CMD_OP_2ERR_QP:
-	case MLX5_CMD_OP_2RST_QP:
 	case MLX5_CMD_OP_QUERY_QP:
 	case MLX5_CMD_OP_SQD_RTS_QP:
 	case MLX5_CMD_OP_INIT2INIT_QP:
@@ -342,7 +346,6 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 	case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT:
 	case MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT:
 	case MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT:
-	case MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT:
 	case MLX5_CMD_OP_QUERY_ROCE_ADDRESS:
 	case MLX5_CMD_OP_SET_ROCE_ADDRESS:
 	case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
@@ -390,11 +393,12 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 	case MLX5_CMD_OP_CREATE_RQT:
 	case MLX5_CMD_OP_MODIFY_RQT:
 	case MLX5_CMD_OP_QUERY_RQT:
+
 	case MLX5_CMD_OP_CREATE_FLOW_TABLE:
 	case MLX5_CMD_OP_QUERY_FLOW_TABLE:
 	case MLX5_CMD_OP_CREATE_FLOW_GROUP:
 	case MLX5_CMD_OP_QUERY_FLOW_GROUP:
-	case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
+
 	case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
 	case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
 	case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
@@ -545,6 +549,7 @@ const char *mlx5_command_str(int command)
 	MLX5_COMMAND_STR_CASE(ALLOC_FLOW_COUNTER);
 	MLX5_COMMAND_STR_CASE(DEALLOC_FLOW_COUNTER);
 	MLX5_COMMAND_STR_CASE(QUERY_FLOW_COUNTER);
+	MLX5_COMMAND_STR_CASE(MODIFY_FLOW_TABLE);
 	default: return "unknown command opcode";
 	}
 }
@@ -601,11 +606,36 @@ static void dump_command(struct mlx5_core_dev *dev,
 		pr_debug("\n");
 }
 
+static u16 msg_to_opcode(struct mlx5_cmd_msg *in)
+{
+	struct mlx5_inbox_hdr *hdr = (struct mlx5_inbox_hdr *)(in->first.data);
+
+	return be16_to_cpu(hdr->opcode);
+}
+
+static void cb_timeout_handler(struct work_struct *work)
+{
+	struct delayed_work *dwork = container_of(work, struct delayed_work,
+						  work);
+	struct mlx5_cmd_work_ent *ent = container_of(dwork,
+						     struct mlx5_cmd_work_ent,
+						     cb_timeout_work);
+	struct mlx5_core_dev *dev = container_of(ent->cmd, struct mlx5_core_dev,
+						 cmd);
+
+	ent->ret = -ETIMEDOUT;
+	mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n",
+		       mlx5_command_str(msg_to_opcode(ent->in)),
+		       msg_to_opcode(ent->in));
+	mlx5_cmd_comp_handler(dev, 1UL << ent->idx);
+}
+
 static void cmd_work_handler(struct work_struct *work)
 {
 	struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work);
 	struct mlx5_cmd *cmd = ent->cmd;
 	struct mlx5_core_dev *dev = container_of(cmd, struct mlx5_core_dev, cmd);
+	unsigned long cb_timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC);
 	struct mlx5_cmd_layout *lay;
 	struct semaphore *sem;
 	unsigned long flags;
@@ -646,6 +676,9 @@ static void cmd_work_handler(struct work_struct *work)
 	dump_command(dev, ent, 1);
 	ent->ts1 = ktime_get_ns();
 
+	if (ent->callback)
+		schedule_delayed_work(&ent->cb_timeout_work, cb_timeout);
+
 	/* ring doorbell after the descriptor is valid */
 	mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx);
 	wmb();
@@ -690,13 +723,6 @@ static const char *deliv_status_to_str(u8 status)
 	}
 }
 
-static u16 msg_to_opcode(struct mlx5_cmd_msg *in)
-{
-	struct mlx5_inbox_hdr *hdr = (struct mlx5_inbox_hdr *)(in->first.data);
-
-	return be16_to_cpu(hdr->opcode);
-}
-
 static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
 {
 	unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC);
@@ -705,13 +731,13 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
 
 	if (cmd->mode == CMD_MODE_POLLING) {
 		wait_for_completion(&ent->done);
-		err = ent->ret;
-	} else {
-		if (!wait_for_completion_timeout(&ent->done, timeout))
-			err = -ETIMEDOUT;
-		else
-			err = 0;
+	} else if (!wait_for_completion_timeout(&ent->done, timeout)) {
+		ent->ret = -ETIMEDOUT;
+		mlx5_cmd_comp_handler(dev, 1UL << ent->idx);
 	}
+
+	err = ent->ret;
+
 	if (err == -ETIMEDOUT) {
 		mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n",
 			       mlx5_command_str(msg_to_opcode(ent->in)),
@@ -760,6 +786,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
 	if (!callback)
 		init_completion(&ent->done);
 
+	INIT_DELAYED_WORK(&ent->cb_timeout_work, cb_timeout_handler);
 	INIT_WORK(&ent->work, cmd_work_handler);
 	if (page_queue) {
 		cmd_work_handler(&ent->work);
@@ -769,28 +796,26 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
 		goto out_free;
 	}
 
-	if (!callback) {
-		err = wait_func(dev, ent);
-		if (err == -ETIMEDOUT)
-			goto out;
-
-		ds = ent->ts2 - ent->ts1;
-		op = be16_to_cpu(((struct mlx5_inbox_hdr *)in->first.data)->opcode);
-		if (op < ARRAY_SIZE(cmd->stats)) {
-			stats = &cmd->stats[op];
-			spin_lock_irq(&stats->lock);
-			stats->sum += ds;
-			++stats->n;
-			spin_unlock_irq(&stats->lock);
-		}
-		mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME,
-				   "fw exec time for %s is %lld nsec\n",
-				   mlx5_command_str(op), ds);
-		*status = ent->status;
-		free_cmd(ent);
-	}
+	if (callback)
+		goto out;
 
-	return err;
+	err = wait_func(dev, ent);
+	if (err == -ETIMEDOUT)
+		goto out_free;
+
+	ds = ent->ts2 - ent->ts1;
+	op = be16_to_cpu(((struct mlx5_inbox_hdr *)in->first.data)->opcode);
+	if (op < ARRAY_SIZE(cmd->stats)) {
+		stats = &cmd->stats[op];
+		spin_lock_irq(&stats->lock);
+		stats->sum += ds;
+		++stats->n;
+		spin_unlock_irq(&stats->lock);
+	}
+	mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME,
+			   "fw exec time for %s is %lld nsec\n",
+			   mlx5_command_str(op), ds);
+	*status = ent->status;
 
 out_free:
 	free_cmd(ent);
@@ -1180,41 +1205,30 @@ err_dbg:
 	return err;
 }
 
-void mlx5_cmd_use_events(struct mlx5_core_dev *dev)
+static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode)
 {
 	struct mlx5_cmd *cmd = &dev->cmd;
 	int i;
 
 	for (i = 0; i < cmd->max_reg_cmds; i++)
 		down(&cmd->sem);
-
 	down(&cmd->pages_sem);
 
-	flush_workqueue(cmd->wq);
-
-	cmd->mode = CMD_MODE_EVENTS;
+	cmd->mode = mode;
 
 	up(&cmd->pages_sem);
 	for (i = 0; i < cmd->max_reg_cmds; i++)
 		up(&cmd->sem);
 }
 
-void mlx5_cmd_use_polling(struct mlx5_core_dev *dev)
+void mlx5_cmd_use_events(struct mlx5_core_dev *dev)
 {
-	struct mlx5_cmd *cmd = &dev->cmd;
-	int i;
-
-	for (i = 0; i < cmd->max_reg_cmds; i++)
-		down(&cmd->sem);
-
-	down(&cmd->pages_sem);
-
-	flush_workqueue(cmd->wq);
-	cmd->mode = CMD_MODE_POLLING;
+	mlx5_cmd_change_mod(dev, CMD_MODE_EVENTS);
+}
 
-	up(&cmd->pages_sem);
-	for (i = 0; i < cmd->max_reg_cmds; i++)
-		up(&cmd->sem);
+void mlx5_cmd_use_polling(struct mlx5_core_dev *dev)
+{
+	mlx5_cmd_change_mod(dev, CMD_MODE_POLLING);
 }
 
 static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
@@ -1250,6 +1264,8 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec)
 			struct semaphore *sem;
 
 			ent = cmd->ent_arr[i];
+			if (ent->callback)
+				cancel_delayed_work(&ent->cb_timeout_work);
 			if (ent->page_queue)
 				sem = &cmd->pages_sem;
 			else
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index da885c0dfebe..4cbd452fec25 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -44,6 +44,7 @@
 #include <linux/mlx5/vport.h>
 #include <linux/mlx5/transobj.h>
 #include <linux/rhashtable.h>
+#include <net/switchdev.h>
 #include "wq.h"
 #include "mlx5_core.h"
 #include "en_stats.h"
@@ -163,7 +164,6 @@ enum mlx5e_priv_flag {
 
 #ifdef CONFIG_MLX5_CORE_EN_DCB
 #define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */
-#define MLX5E_MIN_BW_ALLOC 1   /* Min percentage of BW allocation */
 #endif
 
 struct mlx5e_cq_moder {
@@ -214,6 +214,7 @@ struct mlx5e_tstamp {
 enum {
 	MLX5E_RQ_STATE_POST_WQES_ENABLE,
 	MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS,
+	MLX5E_RQ_STATE_FLUSH_TIMEOUT,
 	MLX5E_RQ_STATE_AM,
 };
 
@@ -245,6 +246,8 @@ typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq *rq,
 typedef int (*mlx5e_fp_alloc_wqe)(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe,
 				  u16 ix);
 
+typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq *rq, u16 ix);
+
 struct mlx5e_dma_info {
 	struct page	*page;
 	dma_addr_t	addr;
@@ -290,6 +293,7 @@ struct mlx5e_rq {
 	struct mlx5e_cq        cq;
 	mlx5e_fp_handle_rx_cqe handle_rx_cqe;
 	mlx5e_fp_alloc_wqe     alloc_wqe;
+	mlx5e_fp_dealloc_wqe   dealloc_wqe;
 
 	unsigned long          state;
 	int                    ix;
@@ -356,6 +360,7 @@ struct mlx5e_sq_dma {
 enum {
 	MLX5E_SQ_STATE_WAKE_TXQ_ENABLE,
 	MLX5E_SQ_STATE_BF_ENABLE,
+	MLX5E_SQ_STATE_TX_TIMEOUT,
 };
 
 struct mlx5e_ico_wqe_info {
@@ -453,7 +458,7 @@ enum mlx5e_traffic_types {
 };
 
 enum {
-	MLX5E_STATE_ASYNC_EVENTS_ENABLE,
+	MLX5E_STATE_ASYNC_EVENTS_ENABLED,
 	MLX5E_STATE_OPENED,
 	MLX5E_STATE_DESTROYING,
 };
@@ -543,8 +548,24 @@ enum {
 	MLX5E_ARFS_FT_LEVEL
 };
 
+struct mlx5e_ethtool_table {
+	struct mlx5_flow_table *ft;
+	int                    num_rules;
+};
+
+#define ETHTOOL_NUM_L3_L4_FTS 7
+#define ETHTOOL_NUM_L2_FTS 4
+
+struct mlx5e_ethtool_steering {
+	struct mlx5e_ethtool_table      l3_l4_ft[ETHTOOL_NUM_L3_L4_FTS];
+	struct mlx5e_ethtool_table      l2_ft[ETHTOOL_NUM_L2_FTS];
+	struct list_head                rules;
+	int                             tot_num_rules;
+};
+
 struct mlx5e_flow_steering {
 	struct mlx5_flow_namespace      *ns;
+	struct mlx5e_ethtool_steering   ethtool;
 	struct mlx5e_tc_table           tc;
 	struct mlx5e_vlan_table         vlan;
 	struct mlx5e_l2_table           l2;
@@ -552,9 +573,15 @@ struct mlx5e_flow_steering {
 	struct mlx5e_arfs_tables        arfs;
 };
 
-struct mlx5e_direct_tir {
-	u32              tirn;
+struct mlx5e_rqt {
 	u32              rqtn;
+	bool		 enabled;
+};
+
+struct mlx5e_tir {
+	u32		  tirn;
+	struct mlx5e_rqt  rqt;
+	struct list_head  list;
 };
 
 enum {
@@ -562,6 +589,22 @@ enum {
 	MLX5E_NIC_PRIO
 };
 
+struct mlx5e_profile {
+	void	(*init)(struct mlx5_core_dev *mdev,
+			struct net_device *netdev,
+			const struct mlx5e_profile *profile, void *ppriv);
+	void	(*cleanup)(struct mlx5e_priv *priv);
+	int	(*init_rx)(struct mlx5e_priv *priv);
+	void	(*cleanup_rx)(struct mlx5e_priv *priv);
+	int	(*init_tx)(struct mlx5e_priv *priv);
+	void	(*cleanup_tx)(struct mlx5e_priv *priv);
+	void	(*enable)(struct mlx5e_priv *priv);
+	void	(*disable)(struct mlx5e_priv *priv);
+	void	(*update_stats)(struct mlx5e_priv *priv);
+	int	(*max_nch)(struct mlx5_core_dev *mdev);
+	int	max_tc;
+};
+
 struct mlx5e_priv {
 	/* priv data path fields - start */
 	struct mlx5e_sq            **txq_to_sq_map;
@@ -570,18 +613,14 @@ struct mlx5e_priv {
 
 	unsigned long              state;
 	struct mutex               state_lock; /* Protects Interface state */
-	struct mlx5_uar            cq_uar;
-	u32                        pdn;
-	u32                        tdn;
-	struct mlx5_core_mkey      mkey;
 	struct mlx5_core_mkey      umr_mkey;
 	struct mlx5e_rq            drop_rq;
 
 	struct mlx5e_channel     **channel;
 	u32                        tisn[MLX5E_MAX_NUM_TC];
-	u32                        indir_rqtn;
-	u32                        indir_tirn[MLX5E_NUM_INDIR_TIRS];
-	struct mlx5e_direct_tir    direct_tir[MLX5E_MAX_NUM_CHANNELS];
+	struct mlx5e_rqt           indir_rqt;
+	struct mlx5e_tir           indir_tir[MLX5E_NUM_INDIR_TIRS];
+	struct mlx5e_tir           direct_tir[MLX5E_MAX_NUM_CHANNELS];
 	u32                        tx_rates[MLX5E_MAX_NUM_SQS];
 
 	struct mlx5e_flow_steering fs;
@@ -591,6 +630,7 @@ struct mlx5e_priv {
 	struct workqueue_struct    *wq;
 	struct work_struct         update_carrier_work;
 	struct work_struct         set_rx_mode_work;
+	struct work_struct         tx_timeout_work;
 	struct delayed_work        update_stats_work;
 
 	u32                        pflags;
@@ -599,6 +639,8 @@ struct mlx5e_priv {
 	struct mlx5e_stats         stats;
 	struct mlx5e_tstamp        tstamp;
 	u16 q_counter;
+	const struct mlx5e_profile *profile;
+	void                      *ppriv;
 };
 
 enum mlx5e_link_mode {
@@ -647,12 +689,16 @@ void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event);
 int mlx5e_napi_poll(struct napi_struct *napi, int budget);
 bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
 int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget);
+void mlx5e_free_tx_descs(struct mlx5e_sq *sq);
+void mlx5e_free_rx_descs(struct mlx5e_rq *rq);
 
 void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq);
 int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix);
 int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix);
+void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix);
+void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix);
 void mlx5e_post_rx_fragmented_mpwqe(struct mlx5e_rq *rq);
 void mlx5e_complete_rx_linear_mpwqe(struct mlx5e_rq *rq,
 				    struct mlx5_cqe64 *cqe,
@@ -680,6 +726,16 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv);
 void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv);
 void mlx5e_init_l2_addr(struct mlx5e_priv *priv);
 void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft);
+int mlx5e_ethtool_get_flow(struct mlx5e_priv *priv, struct ethtool_rxnfc *info,
+			   int location);
+int mlx5e_ethtool_get_all_flows(struct mlx5e_priv *priv,
+				struct ethtool_rxnfc *info, u32 *rule_locs);
+int mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv,
+			       struct ethtool_rx_flow_spec *fs);
+int mlx5e_ethtool_flow_remove(struct mlx5e_priv *priv,
+			      int location);
+void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv);
+void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv);
 void mlx5e_set_rx_mode_work(struct work_struct *work);
 
 void mlx5e_fill_hwstamp(struct mlx5e_tstamp *clock, u64 timestamp,
@@ -788,5 +844,39 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
 #endif
 
 u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev);
+int mlx5e_create_tir(struct mlx5_core_dev *mdev,
+		     struct mlx5e_tir *tir, u32 *in, int inlen);
+void mlx5e_destroy_tir(struct mlx5_core_dev *mdev,
+		       struct mlx5e_tir *tir);
+int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev);
+void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev);
+int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5_core_dev *mdev);
+
+struct mlx5_eswitch_rep;
+int mlx5e_vport_rep_load(struct mlx5_eswitch *esw,
+			 struct mlx5_eswitch_rep *rep);
+void mlx5e_vport_rep_unload(struct mlx5_eswitch *esw,
+			    struct mlx5_eswitch_rep *rep);
+int mlx5e_nic_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep);
+void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw,
+			  struct mlx5_eswitch_rep *rep);
+int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv);
+void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv);
+int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr);
+
+int mlx5e_create_direct_rqts(struct mlx5e_priv *priv);
+void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt);
+int mlx5e_create_direct_tirs(struct mlx5e_priv *priv);
+void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv);
+int mlx5e_create_tises(struct mlx5e_priv *priv);
+void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv);
+int mlx5e_close(struct net_device *netdev);
+int mlx5e_open(struct net_device *netdev);
+void mlx5e_update_stats_work(struct work_struct *work);
+void *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
+			  const struct mlx5e_profile *profile, void *ppriv);
+void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv);
+struct rtnl_link_stats64 *
+mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats);
 
 #endif /* __MLX5_EN_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
index 3515e78ba68f..a8cb38789774 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
@@ -93,14 +93,14 @@ static enum mlx5e_traffic_types arfs_get_tt(enum arfs_type type)
 static int arfs_disable(struct mlx5e_priv *priv)
 {
 	struct mlx5_flow_destination dest;
-	u32 *tirn = priv->indir_tirn;
+	struct mlx5e_tir *tir = priv->indir_tir;
 	int err = 0;
 	int tt;
 	int i;
 
 	dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
 	for (i = 0; i < ARFS_NUM_TYPES; i++) {
-		dest.tir_num = tirn[i];
+		dest.tir_num = tir[i].tirn;
 		tt = arfs_get_tt(i);
 		/* Modify ttc rules destination to bypass the aRFS tables*/
 		err = mlx5_modify_rule_destination(priv->fs.ttc.rules[tt],
@@ -175,15 +175,12 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv,
 {
 	struct arfs_table *arfs_t = &priv->fs.arfs.arfs_tables[type];
 	struct mlx5_flow_destination dest;
-	u8 match_criteria_enable = 0;
-	u32 *tirn = priv->indir_tirn;
-	u32 *match_criteria;
-	u32 *match_value;
+	struct mlx5e_tir *tir = priv->indir_tir;
+	struct mlx5_flow_spec *spec;
 	int err = 0;
 
-	match_value	= mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param));
-	match_criteria	= mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param));
-	if (!match_value || !match_criteria) {
+	spec = mlx5_vzalloc(sizeof(*spec));
+	if (!spec) {
 		netdev_err(priv->netdev, "%s: alloc failed\n", __func__);
 		err = -ENOMEM;
 		goto out;
@@ -192,24 +189,23 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv,
 	dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
 	switch (type) {
 	case ARFS_IPV4_TCP:
-		dest.tir_num = tirn[MLX5E_TT_IPV4_TCP];
+		dest.tir_num = tir[MLX5E_TT_IPV4_TCP].tirn;
 		break;
 	case ARFS_IPV4_UDP:
-		dest.tir_num = tirn[MLX5E_TT_IPV4_UDP];
+		dest.tir_num = tir[MLX5E_TT_IPV4_UDP].tirn;
 		break;
 	case ARFS_IPV6_TCP:
-		dest.tir_num = tirn[MLX5E_TT_IPV6_TCP];
+		dest.tir_num = tir[MLX5E_TT_IPV6_TCP].tirn;
 		break;
 	case ARFS_IPV6_UDP:
-		dest.tir_num = tirn[MLX5E_TT_IPV6_UDP];
+		dest.tir_num = tir[MLX5E_TT_IPV6_UDP].tirn;
 		break;
 	default:
 		err = -EINVAL;
 		goto out;
 	}
 
-	arfs_t->default_rule = mlx5_add_flow_rule(arfs_t->ft.t, match_criteria_enable,
-						  match_criteria, match_value,
+	arfs_t->default_rule = mlx5_add_flow_rule(arfs_t->ft.t, spec,
 						  MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
 						  MLX5_FS_DEFAULT_FLOW_TAG,
 						  &dest);
@@ -220,8 +216,7 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv,
 			   __func__, type);
 	}
 out:
-	kvfree(match_criteria);
-	kvfree(match_value);
+	kvfree(spec);
 	return err;
 }
 
@@ -475,23 +470,20 @@ static struct mlx5_flow_rule *arfs_add_rule(struct mlx5e_priv *priv,
 	struct mlx5_flow_rule *rule = NULL;
 	struct mlx5_flow_destination dest;
 	struct arfs_table *arfs_table;
-	u8 match_criteria_enable = 0;
+	struct mlx5_flow_spec *spec;
 	struct mlx5_flow_table *ft;
-	u32 *match_criteria;
-	u32 *match_value;
 	int err = 0;
 
-	match_value	= mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param));
-	match_criteria	= mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param));
-	if (!match_value || !match_criteria) {
+	spec = mlx5_vzalloc(sizeof(*spec));
+	if (!spec) {
 		netdev_err(priv->netdev, "%s: alloc failed\n", __func__);
 		err = -ENOMEM;
 		goto out;
 	}
-	match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
-	MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
 			 outer_headers.ethertype);
-	MLX5_SET(fte_match_param, match_value, outer_headers.ethertype,
+	MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype,
 		 ntohs(tuple->etype));
 	arfs_table = arfs_get_table(arfs, tuple->ip_proto, tuple->etype);
 	if (!arfs_table) {
@@ -501,59 +493,58 @@ static struct mlx5_flow_rule *arfs_add_rule(struct mlx5e_priv *priv,
 
 	ft = arfs_table->ft.t;
 	if (tuple->ip_proto == IPPROTO_TCP) {
-		MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
 				 outer_headers.tcp_dport);
-		MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
 				 outer_headers.tcp_sport);
-		MLX5_SET(fte_match_param, match_value, outer_headers.tcp_dport,
+		MLX5_SET(fte_match_param, spec->match_value, outer_headers.tcp_dport,
 			 ntohs(tuple->dst_port));
-		MLX5_SET(fte_match_param, match_value, outer_headers.tcp_sport,
+		MLX5_SET(fte_match_param, spec->match_value, outer_headers.tcp_sport,
 			 ntohs(tuple->src_port));
 	} else {
-		MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
 				 outer_headers.udp_dport);
-		MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
 				 outer_headers.udp_sport);
-		MLX5_SET(fte_match_param, match_value, outer_headers.udp_dport,
+		MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_dport,
 			 ntohs(tuple->dst_port));
-		MLX5_SET(fte_match_param, match_value, outer_headers.udp_sport,
+		MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_sport,
 			 ntohs(tuple->src_port));
 	}
 	if (tuple->etype == htons(ETH_P_IP)) {
-		memcpy(MLX5_ADDR_OF(fte_match_param, match_value,
+		memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
 				    outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4),
 		       &tuple->src_ipv4,
 		       4);
-		memcpy(MLX5_ADDR_OF(fte_match_param, match_value,
+		memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
 				    outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
 		       &tuple->dst_ipv4,
 		       4);
-		MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
 				 outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
-		MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
 				 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
 	} else {
-		memcpy(MLX5_ADDR_OF(fte_match_param, match_value,
+		memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
 				    outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6),
 		       &tuple->src_ipv6,
 		       16);
-		memcpy(MLX5_ADDR_OF(fte_match_param, match_value,
+		memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
 				    outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
 		       &tuple->dst_ipv6,
 		       16);
-		memset(MLX5_ADDR_OF(fte_match_param, match_criteria,
+		memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
 				    outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6),
 		       0xff,
 		       16);
-		memset(MLX5_ADDR_OF(fte_match_param, match_criteria,
+		memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
 				    outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
 		       0xff,
 		       16);
 	}
 	dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
 	dest.tir_num = priv->direct_tir[arfs_rule->rxq].tirn;
-	rule = mlx5_add_flow_rule(ft, match_criteria_enable, match_criteria,
-				  match_value, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+	rule = mlx5_add_flow_rule(ft, spec, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
 				  MLX5_FS_DEFAULT_FLOW_TAG,
 				  &dest);
 	if (IS_ERR(rule)) {
@@ -563,8 +554,7 @@ static struct mlx5_flow_rule *arfs_add_rule(struct mlx5e_priv *priv,
 	}
 
 out:
-	kvfree(match_criteria);
-	kvfree(match_value);
+	kvfree(spec);
 	return err ? ERR_PTR(err) : rule;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
new file mode 100644
index 000000000000..673043ccd76c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "en.h"
+
+/* mlx5e global resources should be placed in this file.
+ * Global resources are common to all the netdevices crated on the same nic.
+ */
+
+int mlx5e_create_tir(struct mlx5_core_dev *mdev,
+		     struct mlx5e_tir *tir, u32 *in, int inlen)
+{
+	int err;
+
+	err = mlx5_core_create_tir(mdev, in, inlen, &tir->tirn);
+	if (err)
+		return err;
+
+	list_add(&tir->list, &mdev->mlx5e_res.td.tirs_list);
+
+	return 0;
+}
+
+void mlx5e_destroy_tir(struct mlx5_core_dev *mdev,
+		       struct mlx5e_tir *tir)
+{
+	mlx5_core_destroy_tir(mdev, tir->tirn);
+	list_del(&tir->list);
+}
+
+static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
+			     struct mlx5_core_mkey *mkey)
+{
+	struct mlx5_create_mkey_mbox_in *in;
+	int err;
+
+	in = mlx5_vzalloc(sizeof(*in));
+	if (!in)
+		return -ENOMEM;
+
+	in->seg.flags = MLX5_PERM_LOCAL_WRITE |
+			MLX5_PERM_LOCAL_READ  |
+			MLX5_ACCESS_MODE_PA;
+	in->seg.flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64);
+	in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
+
+	err = mlx5_core_create_mkey(mdev, mkey, in, sizeof(*in), NULL, NULL,
+				    NULL);
+
+	kvfree(in);
+
+	return err;
+}
+
+int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev)
+{
+	struct mlx5e_resources *res = &mdev->mlx5e_res;
+	int err;
+
+	err = mlx5_alloc_map_uar(mdev, &res->cq_uar, false);
+	if (err) {
+		mlx5_core_err(mdev, "alloc_map uar failed, %d\n", err);
+		return err;
+	}
+
+	err = mlx5_core_alloc_pd(mdev, &res->pdn);
+	if (err) {
+		mlx5_core_err(mdev, "alloc pd failed, %d\n", err);
+		goto err_unmap_free_uar;
+	}
+
+	err = mlx5_core_alloc_transport_domain(mdev, &res->td.tdn);
+	if (err) {
+		mlx5_core_err(mdev, "alloc td failed, %d\n", err);
+		goto err_dealloc_pd;
+	}
+
+	err = mlx5e_create_mkey(mdev, res->pdn, &res->mkey);
+	if (err) {
+		mlx5_core_err(mdev, "create mkey failed, %d\n", err);
+		goto err_dealloc_transport_domain;
+	}
+
+	INIT_LIST_HEAD(&mdev->mlx5e_res.td.tirs_list);
+
+	return 0;
+
+err_dealloc_transport_domain:
+	mlx5_core_dealloc_transport_domain(mdev, res->td.tdn);
+err_dealloc_pd:
+	mlx5_core_dealloc_pd(mdev, res->pdn);
+err_unmap_free_uar:
+	mlx5_unmap_free_uar(mdev, &res->cq_uar);
+
+	return err;
+}
+
+void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev)
+{
+	struct mlx5e_resources *res = &mdev->mlx5e_res;
+
+	mlx5_core_destroy_mkey(mdev, &res->mkey);
+	mlx5_core_dealloc_transport_domain(mdev, res->td.tdn);
+	mlx5_core_dealloc_pd(mdev, res->pdn);
+	mlx5_unmap_free_uar(mdev, &res->cq_uar);
+}
+
+int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5_core_dev *mdev)
+{
+	struct mlx5e_tir *tir;
+	void *in;
+	int inlen;
+	int err;
+
+	inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
+	in = mlx5_vzalloc(inlen);
+	if (!in)
+		return -ENOMEM;
+
+	MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1);
+
+	list_for_each_entry(tir, &mdev->mlx5e_res.td.tirs_list, list) {
+		err = mlx5_core_modify_tir(mdev, tir->tirn, in, inlen);
+		if (err)
+			return err;
+	}
+
+	kvfree(in);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index e6883132b555..caa9a3ccc3f3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -96,7 +96,7 @@ static void mlx5e_build_tc_tx_bw(struct ieee_ets *ets, u8 *tc_tx_bw,
 			tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC;
 			break;
 		case IEEE_8021QAZ_TSA_ETS:
-			tc_tx_bw[i] = ets->tc_tx_bw[i] ?: MLX5E_MIN_BW_ALLOC;
+			tc_tx_bw[i] = ets->tc_tx_bw[i];
 			break;
 		}
 	}
@@ -140,8 +140,12 @@ static int mlx5e_dbcnl_validate_ets(struct ieee_ets *ets)
 
 	/* Validate Bandwidth Sum */
 	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
-		if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS)
+		if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) {
+			if (!ets->tc_tx_bw[i])
+				return -EINVAL;
+
 			bw_sum += ets->tc_tx_bw[i];
+		}
 	}
 
 	if (bw_sum != 0 && bw_sum != 100)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 39a4d961a58e..4a3757e60441 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -139,6 +139,18 @@ static unsigned long mlx5e_query_pfc_combined(struct mlx5e_priv *priv)
 	return err ? 0 : pfc_en_tx | pfc_en_rx;
 }
 
+static bool mlx5e_query_global_pause_combined(struct mlx5e_priv *priv)
+{
+	struct mlx5_core_dev *mdev = priv->mdev;
+	u32 rx_pause;
+	u32 tx_pause;
+	int err;
+
+	err = mlx5_query_port_pause(mdev, &rx_pause, &tx_pause);
+
+	return err ? false : rx_pause | tx_pause;
+}
+
 #define MLX5E_NUM_Q_CNTRS(priv) (NUM_Q_COUNTERS * (!!priv->q_counter))
 #define MLX5E_NUM_RQ_STATS(priv) \
 	(NUM_RQ_STATS * priv->params.num_channels * \
@@ -146,7 +158,9 @@ static unsigned long mlx5e_query_pfc_combined(struct mlx5e_priv *priv)
 #define MLX5E_NUM_SQ_STATS(priv) \
 	(NUM_SQ_STATS * priv->params.num_channels * priv->params.num_tc * \
 	 test_bit(MLX5E_STATE_OPENED, &priv->state))
-#define MLX5E_NUM_PFC_COUNTERS(priv) hweight8(mlx5e_query_pfc_combined(priv))
+#define MLX5E_NUM_PFC_COUNTERS(priv) \
+	((mlx5e_query_global_pause_combined(priv) + hweight8(mlx5e_query_pfc_combined(priv))) * \
+	  NUM_PPORT_PER_PRIO_PFC_COUNTERS)
 
 static int mlx5e_get_sset_count(struct net_device *dev, int sset)
 {
@@ -175,42 +189,51 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data)
 
 	/* SW counters */
 	for (i = 0; i < NUM_SW_COUNTERS; i++)
-		strcpy(data + (idx++) * ETH_GSTRING_LEN, sw_stats_desc[i].name);
+		strcpy(data + (idx++) * ETH_GSTRING_LEN, sw_stats_desc[i].format);
 
 	/* Q counters */
 	for (i = 0; i < MLX5E_NUM_Q_CNTRS(priv); i++)
-		strcpy(data + (idx++) * ETH_GSTRING_LEN, q_stats_desc[i].name);
+		strcpy(data + (idx++) * ETH_GSTRING_LEN, q_stats_desc[i].format);
 
 	/* VPORT counters */
 	for (i = 0; i < NUM_VPORT_COUNTERS; i++)
 		strcpy(data + (idx++) * ETH_GSTRING_LEN,
-		       vport_stats_desc[i].name);
+		       vport_stats_desc[i].format);
 
 	/* PPORT counters */
 	for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++)
 		strcpy(data + (idx++) * ETH_GSTRING_LEN,
-		       pport_802_3_stats_desc[i].name);
+		       pport_802_3_stats_desc[i].format);
 
 	for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++)
 		strcpy(data + (idx++) * ETH_GSTRING_LEN,
-		       pport_2863_stats_desc[i].name);
+		       pport_2863_stats_desc[i].format);
 
 	for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++)
 		strcpy(data + (idx++) * ETH_GSTRING_LEN,
-		       pport_2819_stats_desc[i].name);
+		       pport_2819_stats_desc[i].format);
 
 	for (prio = 0; prio < NUM_PPORT_PRIO; prio++) {
 		for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++)
-			sprintf(data + (idx++) * ETH_GSTRING_LEN, "prio%d_%s",
-				prio,
-				pport_per_prio_traffic_stats_desc[i].name);
+			sprintf(data + (idx++) * ETH_GSTRING_LEN,
+				pport_per_prio_traffic_stats_desc[i].format, prio);
 	}
 
 	pfc_combined = mlx5e_query_pfc_combined(priv);
 	for_each_set_bit(prio, &pfc_combined, NUM_PPORT_PRIO) {
 		for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) {
-			sprintf(data + (idx++) * ETH_GSTRING_LEN, "prio%d_%s",
-				prio, pport_per_prio_pfc_stats_desc[i].name);
+			char pfc_string[ETH_GSTRING_LEN];
+
+			snprintf(pfc_string, sizeof(pfc_string), "prio%d", prio);
+			sprintf(data + (idx++) * ETH_GSTRING_LEN,
+				pport_per_prio_pfc_stats_desc[i].format, pfc_string);
+		}
+	}
+
+	if (mlx5e_query_global_pause_combined(priv)) {
+		for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) {
+			sprintf(data + (idx++) * ETH_GSTRING_LEN,
+				pport_per_prio_pfc_stats_desc[i].format, "global");
 		}
 	}
 
@@ -220,16 +243,15 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data)
 	/* per channel counters */
 	for (i = 0; i < priv->params.num_channels; i++)
 		for (j = 0; j < NUM_RQ_STATS; j++)
-			sprintf(data + (idx++) * ETH_GSTRING_LEN, "rx%d_%s", i,
-				rq_stats_desc[j].name);
+			sprintf(data + (idx++) * ETH_GSTRING_LEN,
+				rq_stats_desc[j].format, i);
 
 	for (tc = 0; tc < priv->params.num_tc; tc++)
 		for (i = 0; i < priv->params.num_channels; i++)
 			for (j = 0; j < NUM_SQ_STATS; j++)
 				sprintf(data + (idx++) * ETH_GSTRING_LEN,
-					"tx%d_%s",
-					priv->channeltc_to_txq_map[i][tc],
-					sq_stats_desc[j].name);
+					sq_stats_desc[j].format,
+					priv->channeltc_to_txq_map[i][tc]);
 }
 
 static void mlx5e_get_strings(struct net_device *dev,
@@ -306,6 +328,13 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev,
 		}
 	}
 
+	if (mlx5e_query_global_pause_combined(priv)) {
+		for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) {
+			data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[0],
+							  pport_per_prio_pfc_stats_desc, 0);
+		}
+	}
+
 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
 		return;
 
@@ -876,7 +905,7 @@ static void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen)
 	mlx5e_build_tir_ctx_hash(tirc, priv);
 
 	for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++)
-		mlx5_core_modify_tir(mdev, priv->indir_tirn[i], in, inlen);
+		mlx5_core_modify_tir(mdev, priv->indir_tir[i].tirn, in, inlen);
 }
 
 static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir,
@@ -898,7 +927,7 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir,
 	mutex_lock(&priv->state_lock);
 
 	if (indir) {
-		u32 rqtn = priv->indir_rqtn;
+		u32 rqtn = priv->indir_rqt.rqtn;
 
 		memcpy(priv->params.indirection_rqt, indir,
 		       sizeof(priv->params.indirection_rqt));
@@ -931,6 +960,15 @@ static int mlx5e_get_rxnfc(struct net_device *netdev,
 	case ETHTOOL_GRXRINGS:
 		info->data = priv->params.num_channels;
 		break;
+	case ETHTOOL_GRXCLSRLCNT:
+		info->rule_cnt = priv->fs.ethtool.tot_num_rules;
+		break;
+	case ETHTOOL_GRXCLSRULE:
+		err = mlx5e_ethtool_get_flow(priv, info, info->fs.location);
+		break;
+	case ETHTOOL_GRXCLSRLALL:
+		err = mlx5e_ethtool_get_all_flows(priv, info, rule_locs);
+		break;
 	default:
 		err = -EOPNOTSUPP;
 		break;
@@ -1368,6 +1406,26 @@ static u32 mlx5e_get_priv_flags(struct net_device *netdev)
 	return priv->pflags;
 }
 
+static int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
+{
+	int err = 0;
+	struct mlx5e_priv *priv = netdev_priv(dev);
+
+	switch (cmd->cmd) {
+	case ETHTOOL_SRXCLSRLINS:
+		err = mlx5e_ethtool_flow_replace(priv, &cmd->fs);
+		break;
+	case ETHTOOL_SRXCLSRLDEL:
+		err = mlx5e_ethtool_flow_remove(priv, cmd->fs.location);
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		break;
+	}
+
+	return err;
+}
+
 const struct ethtool_ops mlx5e_ethtool_ops = {
 	.get_drvinfo       = mlx5e_get_drvinfo,
 	.get_link          = ethtool_op_get_link,
@@ -1387,6 +1445,7 @@ const struct ethtool_ops mlx5e_ethtool_ops = {
 	.get_rxfh          = mlx5e_get_rxfh,
 	.set_rxfh          = mlx5e_set_rxfh,
 	.get_rxnfc         = mlx5e_get_rxnfc,
+	.set_rxnfc         = mlx5e_set_rxnfc,
 	.get_tunable       = mlx5e_get_tunable,
 	.set_tunable       = mlx5e_set_tunable,
 	.get_pauseparam    = mlx5e_get_pauseparam,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index b32740092854..1587a9fd5724 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -156,19 +156,18 @@ enum mlx5e_vlan_rule_type {
 
 static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
 				 enum mlx5e_vlan_rule_type rule_type,
-				 u16 vid, u32 *mc, u32 *mv)
+				 u16 vid, struct mlx5_flow_spec *spec)
 {
 	struct mlx5_flow_table *ft = priv->fs.vlan.ft.t;
 	struct mlx5_flow_destination dest;
-	u8 match_criteria_enable = 0;
 	struct mlx5_flow_rule **rule_p;
 	int err = 0;
 
 	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
 	dest.ft = priv->fs.l2.ft.t;
 
-	match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
-	MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.vlan_tag);
+	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.vlan_tag);
 
 	switch (rule_type) {
 	case MLX5E_VLAN_RULE_TYPE_UNTAGGED:
@@ -176,17 +175,19 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
 		break;
 	case MLX5E_VLAN_RULE_TYPE_ANY_VID:
 		rule_p = &priv->fs.vlan.any_vlan_rule;
-		MLX5_SET(fte_match_param, mv, outer_headers.vlan_tag, 1);
+		MLX5_SET(fte_match_param, spec->match_value, outer_headers.vlan_tag, 1);
 		break;
 	default: /* MLX5E_VLAN_RULE_TYPE_MATCH_VID */
 		rule_p = &priv->fs.vlan.active_vlans_rule[vid];
-		MLX5_SET(fte_match_param, mv, outer_headers.vlan_tag, 1);
-		MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.first_vid);
-		MLX5_SET(fte_match_param, mv, outer_headers.first_vid, vid);
+		MLX5_SET(fte_match_param, spec->match_value, outer_headers.vlan_tag, 1);
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+				 outer_headers.first_vid);
+		MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid,
+			 vid);
 		break;
 	}
 
-	*rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv,
+	*rule_p = mlx5_add_flow_rule(ft, spec,
 				     MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
 				     MLX5_FS_DEFAULT_FLOW_TAG,
 				     &dest);
@@ -203,27 +204,21 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
 static int mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
 			       enum mlx5e_vlan_rule_type rule_type, u16 vid)
 {
-	u32 *match_criteria;
-	u32 *match_value;
+	struct mlx5_flow_spec *spec;
 	int err = 0;
 
-	match_value	= mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param));
-	match_criteria	= mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param));
-	if (!match_value || !match_criteria) {
+	spec = mlx5_vzalloc(sizeof(*spec));
+	if (!spec) {
 		netdev_err(priv->netdev, "%s: alloc failed\n", __func__);
-		err = -ENOMEM;
-		goto add_vlan_rule_out;
+		return -ENOMEM;
 	}
 
 	if (rule_type == MLX5E_VLAN_RULE_TYPE_MATCH_VID)
 		mlx5e_vport_context_update_vlans(priv);
 
-	err = __mlx5e_add_vlan_rule(priv, rule_type, vid, match_criteria,
-				    match_value);
+	err = __mlx5e_add_vlan_rule(priv, rule_type, vid, spec);
 
-add_vlan_rule_out:
-	kvfree(match_criteria);
-	kvfree(match_value);
+	kvfree(spec);
 
 	return err;
 }
@@ -598,32 +593,27 @@ static struct mlx5_flow_rule *mlx5e_generate_ttc_rule(struct mlx5e_priv *priv,
 						      u8 proto)
 {
 	struct mlx5_flow_rule *rule;
-	u8 match_criteria_enable = 0;
-	u32 *match_criteria;
-	u32 *match_value;
+	struct mlx5_flow_spec *spec;
 	int err = 0;
 
-	match_value	= mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param));
-	match_criteria	= mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param));
-	if (!match_value || !match_criteria) {
+	spec = mlx5_vzalloc(sizeof(*spec));
+	if (!spec) {
 		netdev_err(priv->netdev, "%s: alloc failed\n", __func__);
-		err = -ENOMEM;
-		goto out;
+		return ERR_PTR(-ENOMEM);
 	}
 
 	if (proto) {
-		match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
-		MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.ip_protocol);
-		MLX5_SET(fte_match_param, match_value, outer_headers.ip_protocol, proto);
+		spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol);
+		MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, proto);
 	}
 	if (etype) {
-		match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
-		MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.ethertype);
-		MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, etype);
+		spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype);
+		MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype);
 	}
 
-	rule = mlx5_add_flow_rule(ft, match_criteria_enable,
-				  match_criteria, match_value,
+	rule = mlx5_add_flow_rule(ft, spec,
 				  MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
 				  MLX5_FS_DEFAULT_FLOW_TAG,
 				  dest);
@@ -631,9 +621,8 @@ static struct mlx5_flow_rule *mlx5e_generate_ttc_rule(struct mlx5e_priv *priv,
 		err = PTR_ERR(rule);
 		netdev_err(priv->netdev, "%s: add rule failed\n", __func__);
 	}
-out:
-	kvfree(match_criteria);
-	kvfree(match_value);
+
+	kvfree(spec);
 	return err ? ERR_PTR(err) : rule;
 }
 
@@ -655,7 +644,7 @@ static int mlx5e_generate_ttc_table_rules(struct mlx5e_priv *priv)
 		if (tt == MLX5E_TT_ANY)
 			dest.tir_num = priv->direct_tir[0].tirn;
 		else
-			dest.tir_num = priv->indir_tirn[tt];
+			dest.tir_num = priv->indir_tir[tt].tirn;
 		rules[tt] = mlx5e_generate_ttc_rule(priv, ft, &dest,
 						    ttc_rules[tt].etype,
 						    ttc_rules[tt].proto);
@@ -792,24 +781,20 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv,
 {
 	struct mlx5_flow_table *ft = priv->fs.l2.ft.t;
 	struct mlx5_flow_destination dest;
-	u8 match_criteria_enable = 0;
-	u32 *match_criteria;
-	u32 *match_value;
+	struct mlx5_flow_spec *spec;
 	int err = 0;
 	u8 *mc_dmac;
 	u8 *mv_dmac;
 
-	match_value    = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param));
-	match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param));
-	if (!match_value || !match_criteria) {
+	spec = mlx5_vzalloc(sizeof(*spec));
+	if (!spec) {
 		netdev_err(priv->netdev, "%s: alloc failed\n", __func__);
-		err = -ENOMEM;
-		goto add_l2_rule_out;
+		return -ENOMEM;
 	}
 
-	mc_dmac = MLX5_ADDR_OF(fte_match_param, match_criteria,
+	mc_dmac = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
 			       outer_headers.dmac_47_16);
-	mv_dmac = MLX5_ADDR_OF(fte_match_param, match_value,
+	mv_dmac = MLX5_ADDR_OF(fte_match_param, spec->match_value,
 			       outer_headers.dmac_47_16);
 
 	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
@@ -817,13 +802,13 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv,
 
 	switch (type) {
 	case MLX5E_FULLMATCH:
-		match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+		spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
 		eth_broadcast_addr(mc_dmac);
 		ether_addr_copy(mv_dmac, ai->addr);
 		break;
 
 	case MLX5E_ALLMULTI:
-		match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+		spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
 		mc_dmac[0] = 0x01;
 		mv_dmac[0] = 0x01;
 		break;
@@ -832,8 +817,7 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv,
 		break;
 	}
 
-	ai->rule = mlx5_add_flow_rule(ft, match_criteria_enable, match_criteria,
-				      match_value,
+	ai->rule = mlx5_add_flow_rule(ft, spec,
 				      MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
 				      MLX5_FS_DEFAULT_FLOW_TAG, &dest);
 	if (IS_ERR(ai->rule)) {
@@ -843,9 +827,7 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv,
 		ai->rule = NULL;
 	}
 
-add_l2_rule_out:
-	kvfree(match_criteria);
-	kvfree(match_value);
+	kvfree(spec);
 
 	return err;
 }
@@ -1102,6 +1084,8 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv)
 		goto err_destroy_l2_table;
 	}
 
+	mlx5e_ethtool_init_steering(priv);
+
 	return 0;
 
 err_destroy_l2_table:
@@ -1121,4 +1105,5 @@ void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv)
 	mlx5e_destroy_l2_table(priv);
 	mlx5e_destroy_ttc_table(priv);
 	mlx5e_arfs_destroy_tables(priv);
+	mlx5e_ethtool_cleanup_steering(priv);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
new file mode 100644
index 000000000000..d17c24227900
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
@@ -0,0 +1,586 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx5/fs.h>
+#include "en.h"
+
+struct mlx5e_ethtool_rule {
+	struct list_head             list;
+	struct ethtool_rx_flow_spec  flow_spec;
+	struct mlx5_flow_rule        *rule;
+	struct mlx5e_ethtool_table   *eth_ft;
+};
+
+static void put_flow_table(struct mlx5e_ethtool_table *eth_ft)
+{
+	if (!--eth_ft->num_rules) {
+		mlx5_destroy_flow_table(eth_ft->ft);
+		eth_ft->ft = NULL;
+	}
+}
+
+#define MLX5E_ETHTOOL_L3_L4_PRIO 0
+#define MLX5E_ETHTOOL_L2_PRIO (MLX5E_ETHTOOL_L3_L4_PRIO + ETHTOOL_NUM_L3_L4_FTS)
+#define MLX5E_ETHTOOL_NUM_ENTRIES 64000
+#define MLX5E_ETHTOOL_NUM_GROUPS  10
+static struct mlx5e_ethtool_table *get_flow_table(struct mlx5e_priv *priv,
+						  struct ethtool_rx_flow_spec *fs,
+						  int num_tuples)
+{
+	struct mlx5e_ethtool_table *eth_ft;
+	struct mlx5_flow_namespace *ns;
+	struct mlx5_flow_table *ft;
+	int max_tuples;
+	int table_size;
+	int prio;
+
+	switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) {
+	case TCP_V4_FLOW:
+	case UDP_V4_FLOW:
+		max_tuples = ETHTOOL_NUM_L3_L4_FTS;
+		prio = MLX5E_ETHTOOL_L3_L4_PRIO + (max_tuples - num_tuples);
+		eth_ft = &priv->fs.ethtool.l3_l4_ft[prio];
+		break;
+	case IP_USER_FLOW:
+		max_tuples = ETHTOOL_NUM_L3_L4_FTS;
+		prio = MLX5E_ETHTOOL_L3_L4_PRIO + (max_tuples - num_tuples);
+		eth_ft = &priv->fs.ethtool.l3_l4_ft[prio];
+		break;
+	case ETHER_FLOW:
+		max_tuples = ETHTOOL_NUM_L2_FTS;
+		prio = max_tuples - num_tuples;
+		eth_ft = &priv->fs.ethtool.l2_ft[prio];
+		prio += MLX5E_ETHTOOL_L2_PRIO;
+		break;
+	default:
+		return ERR_PTR(-EINVAL);
+	}
+
+	eth_ft->num_rules++;
+	if (eth_ft->ft)
+		return eth_ft;
+
+	ns = mlx5_get_flow_namespace(priv->mdev,
+				     MLX5_FLOW_NAMESPACE_ETHTOOL);
+	if (!ns)
+		return ERR_PTR(-ENOTSUPP);
+
+	table_size = min_t(u32, BIT(MLX5_CAP_FLOWTABLE(priv->mdev,
+						       flow_table_properties_nic_receive.log_max_ft_size)),
+			   MLX5E_ETHTOOL_NUM_ENTRIES);
+	ft = mlx5_create_auto_grouped_flow_table(ns, prio,
+						 table_size,
+						 MLX5E_ETHTOOL_NUM_GROUPS, 0);
+	if (IS_ERR(ft))
+		return (void *)ft;
+
+	eth_ft->ft = ft;
+	return eth_ft;
+}
+
+static void mask_spec(u8 *mask, u8 *val, size_t size)
+{
+	unsigned int i;
+
+	for (i = 0; i < size; i++, mask++, val++)
+		*((u8 *)val) = *((u8 *)mask) & *((u8 *)val);
+}
+
+static void set_ips(void *outer_headers_v, void *outer_headers_c, __be32 ip4src_m,
+		    __be32 ip4src_v, __be32 ip4dst_m, __be32 ip4dst_v)
+{
+	if (ip4src_m) {
+		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
+		       &ip4src_v, sizeof(ip4src_v));
+		memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
+		       0xff, sizeof(ip4src_m));
+	}
+	if (ip4dst_m) {
+		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+		       &ip4dst_v, sizeof(ip4dst_v));
+		memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+		       0xff, sizeof(ip4dst_m));
+	}
+	MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+		 ethertype, ETH_P_IP);
+	MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+		 ethertype, 0xffff);
+}
+
+static int set_flow_attrs(u32 *match_c, u32 *match_v,
+			  struct ethtool_rx_flow_spec *fs)
+{
+	void *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
+					     outer_headers);
+	void *outer_headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
+					     outer_headers);
+	u32 flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT);
+	struct ethtool_tcpip4_spec *l4_mask;
+	struct ethtool_tcpip4_spec *l4_val;
+	struct ethtool_usrip4_spec *l3_mask;
+	struct ethtool_usrip4_spec *l3_val;
+	struct ethhdr *eth_val;
+	struct ethhdr *eth_mask;
+
+	switch (flow_type) {
+	case TCP_V4_FLOW:
+		l4_mask = &fs->m_u.tcp_ip4_spec;
+		l4_val = &fs->h_u.tcp_ip4_spec;
+		set_ips(outer_headers_v, outer_headers_c, l4_mask->ip4src,
+			l4_val->ip4src, l4_mask->ip4dst, l4_val->ip4dst);
+
+		if (l4_mask->psrc) {
+			MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport,
+				 0xffff);
+			MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_sport,
+				 ntohs(l4_val->psrc));
+		}
+		if (l4_mask->pdst) {
+			MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport,
+				 0xffff);
+			MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_dport,
+				 ntohs(l4_val->pdst));
+		}
+		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol,
+			 0xffff);
+		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol,
+			 IPPROTO_TCP);
+		break;
+	case UDP_V4_FLOW:
+		l4_mask = &fs->m_u.tcp_ip4_spec;
+		l4_val = &fs->h_u.tcp_ip4_spec;
+		set_ips(outer_headers_v, outer_headers_c, l4_mask->ip4src,
+			l4_val->ip4src, l4_mask->ip4dst, l4_val->ip4dst);
+
+		if (l4_mask->psrc) {
+			MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_sport,
+				 0xffff);
+			MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_sport,
+				 ntohs(l4_val->psrc));
+		}
+		if (l4_mask->pdst) {
+			MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_dport,
+				 0xffff);
+			MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_dport,
+				 ntohs(l4_val->pdst));
+		}
+		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol,
+			 0xffff);
+		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol,
+			 IPPROTO_UDP);
+		break;
+	case IP_USER_FLOW:
+		l3_mask = &fs->m_u.usr_ip4_spec;
+		l3_val = &fs->h_u.usr_ip4_spec;
+		set_ips(outer_headers_v, outer_headers_c, l3_mask->ip4src,
+			l3_val->ip4src, l3_mask->ip4dst, l3_val->ip4dst);
+		break;
+	case ETHER_FLOW:
+		eth_mask = &fs->m_u.ether_spec;
+		eth_val = &fs->h_u.ether_spec;
+
+		mask_spec((u8 *)eth_mask, (u8 *)eth_val, sizeof(*eth_mask));
+		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+					     outer_headers_c, smac_47_16),
+				eth_mask->h_source);
+		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+					     outer_headers_v, smac_47_16),
+				eth_val->h_source);
+		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+					     outer_headers_c, dmac_47_16),
+				eth_mask->h_dest);
+		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+					     outer_headers_v, dmac_47_16),
+				eth_val->h_dest);
+		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ethertype,
+			 ntohs(eth_mask->h_proto));
+		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ethertype,
+			 ntohs(eth_val->h_proto));
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if ((fs->flow_type & FLOW_EXT) &&
+	    (fs->m_ext.vlan_tci & cpu_to_be16(VLAN_VID_MASK))) {
+		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+			 vlan_tag, 1);
+		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+			 vlan_tag, 1);
+		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+			 first_vid, 0xfff);
+		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+			 first_vid, ntohs(fs->h_ext.vlan_tci));
+	}
+	if (fs->flow_type & FLOW_MAC_EXT &&
+	    !is_zero_ether_addr(fs->m_ext.h_dest)) {
+		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+					     outer_headers_c, dmac_47_16),
+				fs->m_ext.h_dest);
+		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+					     outer_headers_v, dmac_47_16),
+				fs->h_ext.h_dest);
+	}
+
+	return 0;
+}
+
+static void add_rule_to_list(struct mlx5e_priv *priv,
+			     struct mlx5e_ethtool_rule *rule)
+{
+	struct mlx5e_ethtool_rule *iter;
+	struct list_head *head = &priv->fs.ethtool.rules;
+
+	list_for_each_entry(iter, &priv->fs.ethtool.rules, list) {
+		if (iter->flow_spec.location > rule->flow_spec.location)
+			break;
+		head = &iter->list;
+	}
+	priv->fs.ethtool.tot_num_rules++;
+	list_add(&rule->list, head);
+}
+
+static bool outer_header_zero(u32 *match_criteria)
+{
+	int size = MLX5_ST_SZ_BYTES(fte_match_param);
+	char *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_criteria,
+					     outer_headers);
+
+	return outer_headers_c[0] == 0 && !memcmp(outer_headers_c,
+						  outer_headers_c + 1,
+						  size - 1);
+}
+
+static struct mlx5_flow_rule *add_ethtool_flow_rule(struct mlx5e_priv *priv,
+						    struct mlx5_flow_table *ft,
+						    struct ethtool_rx_flow_spec *fs)
+{
+	struct mlx5_flow_destination *dst = NULL;
+	struct mlx5_flow_spec *spec;
+	struct mlx5_flow_rule *rule;
+	int err = 0;
+	u32 action;
+
+	spec = mlx5_vzalloc(sizeof(*spec));
+	if (!spec)
+		return ERR_PTR(-ENOMEM);
+	err = set_flow_attrs(spec->match_criteria, spec->match_value,
+			     fs);
+	if (err)
+		goto free;
+
+	if (fs->ring_cookie == RX_CLS_FLOW_DISC) {
+		action = MLX5_FLOW_CONTEXT_ACTION_DROP;
+	} else {
+		dst = kzalloc(sizeof(*dst), GFP_KERNEL);
+		if (!dst) {
+			err = -ENOMEM;
+			goto free;
+		}
+
+		dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+		dst->tir_num = priv->direct_tir[fs->ring_cookie].tirn;
+		action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+	}
+
+	spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria));
+	rule = mlx5_add_flow_rule(ft, spec, action,
+				  MLX5_FS_DEFAULT_FLOW_TAG, dst);
+	if (IS_ERR(rule)) {
+		err = PTR_ERR(rule);
+		netdev_err(priv->netdev, "%s: failed to add ethtool steering rule: %d\n",
+			   __func__, err);
+		goto free;
+	}
+free:
+	kvfree(spec);
+	kfree(dst);
+	return err ? ERR_PTR(err) : rule;
+}
+
+static void del_ethtool_rule(struct mlx5e_priv *priv,
+			     struct mlx5e_ethtool_rule *eth_rule)
+{
+	if (eth_rule->rule)
+		mlx5_del_flow_rule(eth_rule->rule);
+	list_del(&eth_rule->list);
+	priv->fs.ethtool.tot_num_rules--;
+	put_flow_table(eth_rule->eth_ft);
+	kfree(eth_rule);
+}
+
+static struct mlx5e_ethtool_rule *find_ethtool_rule(struct mlx5e_priv *priv,
+						    int location)
+{
+	struct mlx5e_ethtool_rule *iter;
+
+	list_for_each_entry(iter, &priv->fs.ethtool.rules, list) {
+		if (iter->flow_spec.location == location)
+			return iter;
+	}
+	return NULL;
+}
+
+static struct mlx5e_ethtool_rule *get_ethtool_rule(struct mlx5e_priv *priv,
+						   int location)
+{
+	struct mlx5e_ethtool_rule *eth_rule;
+
+	eth_rule = find_ethtool_rule(priv, location);
+	if (eth_rule)
+		del_ethtool_rule(priv, eth_rule);
+
+	eth_rule = kzalloc(sizeof(*eth_rule), GFP_KERNEL);
+	if (!eth_rule)
+		return ERR_PTR(-ENOMEM);
+
+	add_rule_to_list(priv, eth_rule);
+	return eth_rule;
+}
+
+#define MAX_NUM_OF_ETHTOOL_RULES BIT(10)
+
+#define all_ones(field) (field == (__force typeof(field))-1)
+#define all_zeros_or_all_ones(field)		\
+	((field) == 0 || (field) == (__force typeof(field))-1)
+
+static int validate_flow(struct mlx5e_priv *priv,
+			 struct ethtool_rx_flow_spec *fs)
+{
+	struct ethtool_tcpip4_spec *l4_mask;
+	struct ethtool_usrip4_spec *l3_mask;
+	struct ethhdr *eth_mask;
+	int num_tuples = 0;
+
+	if (fs->location >= MAX_NUM_OF_ETHTOOL_RULES)
+		return -EINVAL;
+
+	if (fs->ring_cookie >= priv->params.num_channels &&
+	    fs->ring_cookie != RX_CLS_FLOW_DISC)
+		return -EINVAL;
+
+	switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) {
+	case ETHER_FLOW:
+		eth_mask = &fs->m_u.ether_spec;
+		if (!is_zero_ether_addr(eth_mask->h_dest))
+			num_tuples++;
+		if (!is_zero_ether_addr(eth_mask->h_source))
+			num_tuples++;
+		if (eth_mask->h_proto)
+			num_tuples++;
+		break;
+	case TCP_V4_FLOW:
+	case UDP_V4_FLOW:
+		if (fs->m_u.tcp_ip4_spec.tos)
+			return -EINVAL;
+		l4_mask = &fs->m_u.tcp_ip4_spec;
+		if (l4_mask->ip4src) {
+			if (!all_ones(l4_mask->ip4src))
+				return -EINVAL;
+			num_tuples++;
+		}
+		if (l4_mask->ip4dst) {
+			if (!all_ones(l4_mask->ip4dst))
+				return -EINVAL;
+			num_tuples++;
+		}
+		if (l4_mask->psrc) {
+			if (!all_ones(l4_mask->psrc))
+				return -EINVAL;
+			num_tuples++;
+		}
+		if (l4_mask->pdst) {
+			if (!all_ones(l4_mask->pdst))
+				return -EINVAL;
+			num_tuples++;
+		}
+		/* Flow is TCP/UDP */
+		num_tuples++;
+		break;
+	case IP_USER_FLOW:
+		l3_mask = &fs->m_u.usr_ip4_spec;
+		if (l3_mask->l4_4_bytes || l3_mask->tos || l3_mask->proto ||
+		    fs->h_u.usr_ip4_spec.ip_ver != ETH_RX_NFC_IP4)
+			return -EINVAL;
+		if (l3_mask->ip4src) {
+			if (!all_ones(l3_mask->ip4src))
+				return -EINVAL;
+			num_tuples++;
+		}
+		if (l3_mask->ip4dst) {
+			if (!all_ones(l3_mask->ip4dst))
+				return -EINVAL;
+			num_tuples++;
+		}
+		/* Flow is IPv4 */
+		num_tuples++;
+		break;
+	default:
+		return -EINVAL;
+	}
+	if ((fs->flow_type & FLOW_EXT)) {
+		if (fs->m_ext.vlan_etype ||
+		    (fs->m_ext.vlan_tci != cpu_to_be16(VLAN_VID_MASK)))
+			return -EINVAL;
+
+		if (fs->m_ext.vlan_tci) {
+			if (be16_to_cpu(fs->h_ext.vlan_tci) >= VLAN_N_VID)
+				return -EINVAL;
+		}
+		num_tuples++;
+	}
+
+	if (fs->flow_type & FLOW_MAC_EXT &&
+	    !is_zero_ether_addr(fs->m_ext.h_dest))
+		num_tuples++;
+
+	return num_tuples;
+}
+
+int mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv,
+			       struct ethtool_rx_flow_spec *fs)
+{
+	struct mlx5e_ethtool_table *eth_ft;
+	struct mlx5e_ethtool_rule *eth_rule;
+	struct mlx5_flow_rule *rule;
+	int num_tuples;
+	int err;
+
+	num_tuples = validate_flow(priv, fs);
+	if (num_tuples <= 0) {
+		netdev_warn(priv->netdev, "%s: flow is not valid\n",  __func__);
+		return -EINVAL;
+	}
+
+	eth_ft = get_flow_table(priv, fs, num_tuples);
+	if (IS_ERR(eth_ft))
+		return PTR_ERR(eth_ft);
+
+	eth_rule = get_ethtool_rule(priv, fs->location);
+	if (IS_ERR(eth_rule)) {
+		put_flow_table(eth_ft);
+		return PTR_ERR(eth_rule);
+	}
+
+	eth_rule->flow_spec = *fs;
+	eth_rule->eth_ft = eth_ft;
+	if (!eth_ft->ft) {
+		err = -EINVAL;
+		goto del_ethtool_rule;
+	}
+	rule = add_ethtool_flow_rule(priv, eth_ft->ft, fs);
+	if (IS_ERR(rule)) {
+		err = PTR_ERR(rule);
+		goto del_ethtool_rule;
+	}
+
+	eth_rule->rule = rule;
+
+	return 0;
+
+del_ethtool_rule:
+	del_ethtool_rule(priv, eth_rule);
+
+	return err;
+}
+
+int mlx5e_ethtool_flow_remove(struct mlx5e_priv *priv,
+			      int location)
+{
+	struct mlx5e_ethtool_rule *eth_rule;
+	int err = 0;
+
+	if (location >= MAX_NUM_OF_ETHTOOL_RULES)
+		return -ENOSPC;
+
+	eth_rule = find_ethtool_rule(priv, location);
+	if (!eth_rule) {
+		err =  -ENOENT;
+		goto out;
+	}
+
+	del_ethtool_rule(priv, eth_rule);
+out:
+	return err;
+}
+
+int mlx5e_ethtool_get_flow(struct mlx5e_priv *priv, struct ethtool_rxnfc *info,
+			   int location)
+{
+	struct mlx5e_ethtool_rule *eth_rule;
+
+	if (location < 0 || location >= MAX_NUM_OF_ETHTOOL_RULES)
+		return -EINVAL;
+
+	list_for_each_entry(eth_rule, &priv->fs.ethtool.rules, list) {
+		if (eth_rule->flow_spec.location == location) {
+			info->fs = eth_rule->flow_spec;
+			return 0;
+		}
+	}
+
+	return -ENOENT;
+}
+
+int mlx5e_ethtool_get_all_flows(struct mlx5e_priv *priv, struct ethtool_rxnfc *info,
+				u32 *rule_locs)
+{
+	int location = 0;
+	int idx = 0;
+	int err = 0;
+
+	while ((!err || err == -ENOENT) && idx < info->rule_cnt) {
+		err = mlx5e_ethtool_get_flow(priv, info, location);
+		if (!err)
+			rule_locs[idx++] = location;
+		location++;
+	}
+	return err;
+}
+
+void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv)
+{
+	struct mlx5e_ethtool_rule *iter;
+	struct mlx5e_ethtool_rule *temp;
+
+	list_for_each_entry_safe(iter, temp, &priv->fs.ethtool.rules, list)
+		del_ethtool_rule(priv, iter);
+}
+
+void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv)
+{
+	INIT_LIST_HEAD(&priv->fs.ethtool.rules);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 02a0f1796f7b..611ab550136e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -39,6 +39,13 @@
 #include "eswitch.h"
 #include "vxlan.h"
 
+enum {
+	MLX5_EN_QP_FLUSH_TIMEOUT_MS	= 5000,
+	MLX5_EN_QP_FLUSH_MSLEEP_QUANT	= 20,
+	MLX5_EN_QP_FLUSH_MAX_ITER	= MLX5_EN_QP_FLUSH_TIMEOUT_MS /
+					  MLX5_EN_QP_FLUSH_MSLEEP_QUANT,
+};
+
 struct mlx5e_rq_param {
 	u32			rqc[MLX5_ST_SZ_DW(rqc)];
 	struct mlx5_wq_param	wq;
@@ -76,10 +83,13 @@ static void mlx5e_update_carrier(struct mlx5e_priv *priv)
 	port_state = mlx5_query_vport_state(mdev,
 		MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0);
 
-	if (port_state == VPORT_STATE_UP)
+	if (port_state == VPORT_STATE_UP) {
+		netdev_info(priv->netdev, "Link up\n");
 		netif_carrier_on(priv->netdev);
-	else
+	} else {
+		netdev_info(priv->netdev, "Link down\n");
 		netif_carrier_off(priv->netdev);
+	}
 }
 
 static void mlx5e_update_carrier_work(struct work_struct *work)
@@ -93,6 +103,26 @@ static void mlx5e_update_carrier_work(struct work_struct *work)
 	mutex_unlock(&priv->state_lock);
 }
 
+static void mlx5e_tx_timeout_work(struct work_struct *work)
+{
+	struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
+					       tx_timeout_work);
+	int err;
+
+	rtnl_lock();
+	mutex_lock(&priv->state_lock);
+	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+		goto unlock;
+	mlx5e_close_locked(priv->netdev);
+	err = mlx5e_open_locked(priv->netdev);
+	if (err)
+		netdev_err(priv->netdev, "mlx5e_open_locked failed recovering from a tx_timeout, err(%d).\n",
+			   err);
+unlock:
+	mutex_unlock(&priv->state_lock);
+	rtnl_unlock();
+}
+
 static void mlx5e_update_sw_counters(struct mlx5e_priv *priv)
 {
 	struct mlx5e_sw_stats *s = &priv->stats.sw;
@@ -107,11 +137,11 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv)
 
 		s->rx_packets	+= rq_stats->packets;
 		s->rx_bytes	+= rq_stats->bytes;
-		s->lro_packets	+= rq_stats->lro_packets;
-		s->lro_bytes	+= rq_stats->lro_bytes;
+		s->rx_lro_packets += rq_stats->lro_packets;
+		s->rx_lro_bytes	+= rq_stats->lro_bytes;
 		s->rx_csum_none	+= rq_stats->csum_none;
-		s->rx_csum_sw	+= rq_stats->csum_sw;
-		s->rx_csum_inner += rq_stats->csum_inner;
+		s->rx_csum_complete += rq_stats->csum_complete;
+		s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner;
 		s->rx_wqe_err   += rq_stats->wqe_err;
 		s->rx_mpwqe_filler += rq_stats->mpwqe_filler;
 		s->rx_mpwqe_frag   += rq_stats->mpwqe_frag;
@@ -124,24 +154,23 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv)
 
 			s->tx_packets		+= sq_stats->packets;
 			s->tx_bytes		+= sq_stats->bytes;
-			s->tso_packets		+= sq_stats->tso_packets;
-			s->tso_bytes		+= sq_stats->tso_bytes;
-			s->tso_inner_packets	+= sq_stats->tso_inner_packets;
-			s->tso_inner_bytes	+= sq_stats->tso_inner_bytes;
+			s->tx_tso_packets	+= sq_stats->tso_packets;
+			s->tx_tso_bytes		+= sq_stats->tso_bytes;
+			s->tx_tso_inner_packets	+= sq_stats->tso_inner_packets;
+			s->tx_tso_inner_bytes	+= sq_stats->tso_inner_bytes;
 			s->tx_queue_stopped	+= sq_stats->stopped;
 			s->tx_queue_wake	+= sq_stats->wake;
 			s->tx_queue_dropped	+= sq_stats->dropped;
-			s->tx_csum_inner	+= sq_stats->csum_offload_inner;
-			tx_offload_none		+= sq_stats->csum_offload_none;
+			s->tx_csum_partial_inner += sq_stats->csum_partial_inner;
+			tx_offload_none		+= sq_stats->csum_none;
 		}
 	}
 
 	/* Update calculated offload counters */
-	s->tx_csum_offload = s->tx_packets - tx_offload_none - s->tx_csum_inner;
-	s->rx_csum_good    = s->rx_packets - s->rx_csum_none -
-			     s->rx_csum_sw;
+	s->tx_csum_partial = s->tx_packets - tx_offload_none - s->tx_csum_partial_inner;
+	s->rx_csum_unnecessary = s->rx_packets - s->rx_csum_none - s->rx_csum_complete;
 
-	s->link_down_events = MLX5_GET(ppcnt_reg,
+	s->link_down_events_phy = MLX5_GET(ppcnt_reg,
 				priv->stats.pport.phy_counters,
 				counter_set.phys_layer_cntrs.link_down_events);
 }
@@ -227,14 +256,14 @@ void mlx5e_update_stats(struct mlx5e_priv *priv)
 	mlx5e_update_sw_counters(priv);
 }
 
-static void mlx5e_update_stats_work(struct work_struct *work)
+void mlx5e_update_stats_work(struct work_struct *work)
 {
 	struct delayed_work *dwork = to_delayed_work(work);
 	struct mlx5e_priv *priv = container_of(dwork, struct mlx5e_priv,
 					       update_stats_work);
 	mutex_lock(&priv->state_lock);
 	if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
-		mlx5e_update_stats(priv);
+		priv->profile->update_stats(priv);
 		queue_delayed_work(priv->wq, dwork,
 				   msecs_to_jiffies(MLX5E_UPDATE_STATS_INTERVAL));
 	}
@@ -246,7 +275,7 @@ static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
 {
 	struct mlx5e_priv *priv = vpriv;
 
-	if (!test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state))
+	if (!test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state))
 		return;
 
 	switch (event) {
@@ -262,12 +291,12 @@ static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
 
 static void mlx5e_enable_async_events(struct mlx5e_priv *priv)
 {
-	set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
+	set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state);
 }
 
 static void mlx5e_disable_async_events(struct mlx5e_priv *priv)
 {
-	clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
+	clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state);
 	synchronize_irq(mlx5_get_msix_vec(priv->mdev, MLX5_EQ_VEC_ASYNC));
 }
 
@@ -308,6 +337,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
 		}
 		rq->handle_rx_cqe = mlx5e_handle_rx_cqe_mpwrq;
 		rq->alloc_wqe = mlx5e_alloc_rx_mpwqe;
+		rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
 
 		rq->mpwqe_stride_sz = BIT(priv->params.mpwqe_log_stride_sz);
 		rq->mpwqe_num_strides = BIT(priv->params.mpwqe_log_num_strides);
@@ -323,6 +353,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
 		}
 		rq->handle_rx_cqe = mlx5e_handle_rx_cqe;
 		rq->alloc_wqe = mlx5e_alloc_rx_wqe;
+		rq->dealloc_wqe = mlx5e_dealloc_rx_wqe;
 
 		rq->wqe_sz = (priv->params.lro_en) ?
 				priv->params.lro_wqe_sz :
@@ -534,12 +565,19 @@ err_destroy_rq:
 
 static void mlx5e_close_rq(struct mlx5e_rq *rq)
 {
+	int tout = 0;
+	int err;
+
 	clear_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state);
 	napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */
 
-	mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
-	while (!mlx5_wq_ll_is_empty(&rq->wq))
-		msleep(20);
+	err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
+	while (!mlx5_wq_ll_is_empty(&rq->wq) && !err &&
+	       tout++ < MLX5_EN_QP_FLUSH_MAX_ITER)
+		msleep(MLX5_EN_QP_FLUSH_MSLEEP_QUANT);
+
+	if (err || tout == MLX5_EN_QP_FLUSH_MAX_ITER)
+		set_bit(MLX5E_RQ_STATE_FLUSH_TIMEOUT, &rq->state);
 
 	/* avoid destroying rq before mlx5e_poll_rx_cq() is done with it */
 	napi_synchronize(&rq->channel->napi);
@@ -547,6 +585,7 @@ static void mlx5e_close_rq(struct mlx5e_rq *rq)
 	cancel_work_sync(&rq->am.work);
 
 	mlx5e_disable_rq(rq);
+	mlx5e_free_rx_descs(rq);
 	mlx5e_destroy_rq(rq);
 }
 
@@ -590,7 +629,7 @@ static int mlx5e_create_sq(struct mlx5e_channel *c,
 	void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq);
 	int err;
 
-	err = mlx5_alloc_map_uar(mdev, &sq->uar, true);
+	err = mlx5_alloc_map_uar(mdev, &sq->uar, !!MLX5_CAP_GEN(mdev, bf));
 	if (err)
 		return err;
 
@@ -801,6 +840,9 @@ static inline void netif_tx_disable_queue(struct netdev_queue *txq)
 
 static void mlx5e_close_sq(struct mlx5e_sq *sq)
 {
+	int tout = 0;
+	int err;
+
 	if (sq->txq) {
 		clear_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state);
 		/* prevent netif_tx_wake_queue */
@@ -811,16 +853,24 @@ static void mlx5e_close_sq(struct mlx5e_sq *sq)
 		if (mlx5e_sq_has_room_for(sq, 1))
 			mlx5e_send_nop(sq, true);
 
-		mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR,
-				false, 0);
+		err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY,
+				      MLX5_SQC_STATE_ERR, false, 0);
+		if (err)
+			set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state);
 	}
 
-	while (sq->cc != sq->pc) /* wait till sq is empty */
-		msleep(20);
+	/* wait till sq is empty, unless a TX timeout occurred on this SQ */
+	while (sq->cc != sq->pc &&
+	       !test_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state)) {
+		msleep(MLX5_EN_QP_FLUSH_MSLEEP_QUANT);
+		if (tout++ > MLX5_EN_QP_FLUSH_MAX_ITER)
+			set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state);
+	}
 
 	/* avoid destroying sq before mlx5e_poll_tx_cq() is done with it */
 	napi_synchronize(&sq->channel->napi);
 
+	mlx5e_free_tx_descs(sq);
 	mlx5e_disable_sq(sq);
 	mlx5e_destroy_sq(sq);
 }
@@ -859,7 +909,7 @@ static int mlx5e_create_cq(struct mlx5e_channel *c,
 	mcq->comp       = mlx5e_completion_event;
 	mcq->event      = mlx5e_cq_error_event;
 	mcq->irqn       = irqn;
-	mcq->uar        = &priv->cq_uar;
+	mcq->uar        = &mdev->mlx5e_res.cq_uar;
 
 	for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
 		struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
@@ -1037,7 +1087,7 @@ static void mlx5e_build_channeltc_to_txq_map(struct mlx5e_priv *priv, int ix)
 {
 	int i;
 
-	for (i = 0; i < MLX5E_MAX_NUM_TC; i++)
+	for (i = 0; i < priv->profile->max_tc; i++)
 		priv->channeltc_to_txq_map[ix][i] =
 			ix + i * priv->params.num_channels;
 }
@@ -1137,7 +1187,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
 	c->cpu      = cpu;
 	c->pdev     = &priv->mdev->pdev->dev;
 	c->netdev   = priv->netdev;
-	c->mkey_be  = cpu_to_be32(priv->mkey.key);
+	c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
 	c->num_tc   = priv->params.num_tc;
 
 	if (priv->params.rx_am_enabled)
@@ -1253,7 +1303,7 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv,
 	MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
 	MLX5_SET(wq, wq, log_wq_stride,    ilog2(sizeof(struct mlx5e_rx_wqe)));
 	MLX5_SET(wq, wq, log_wq_sz,        priv->params.log_rq_size);
-	MLX5_SET(wq, wq, pd,               priv->pdn);
+	MLX5_SET(wq, wq, pd,               priv->mdev->mlx5e_res.pdn);
 	MLX5_SET(rqc, rqc, counter_set_id, priv->q_counter);
 
 	param->wq.buf_numa_node = dev_to_node(&priv->mdev->pdev->dev);
@@ -1278,7 +1328,7 @@ static void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
 	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
 
 	MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
-	MLX5_SET(wq, wq, pd,            priv->pdn);
+	MLX5_SET(wq, wq, pd,            priv->mdev->mlx5e_res.pdn);
 
 	param->wq.buf_numa_node = dev_to_node(&priv->mdev->pdev->dev);
 }
@@ -1300,7 +1350,7 @@ static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
 {
 	void *cqc = param->cqc;
 
-	MLX5_SET(cqc, cqc, uar_page, priv->cq_uar.index);
+	MLX5_SET(cqc, cqc, uar_page, priv->mdev->mlx5e_res.cq_uar.index);
 }
 
 static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
@@ -1487,7 +1537,8 @@ static void mlx5e_fill_direct_rqt_rqn(struct mlx5e_priv *priv, void *rqtc,
 	MLX5_SET(rqtc, rqtc, rq_num[0], rqn);
 }
 
-static int mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, int ix, u32 *rqtn)
+static int mlx5e_create_rqt(struct mlx5e_priv *priv, int sz,
+			    int ix, struct mlx5e_rqt *rqt)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 	void *rqtc;
@@ -1510,34 +1561,36 @@ static int mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, int ix, u32 *rqtn)
 	else
 		mlx5e_fill_direct_rqt_rqn(priv, rqtc, ix);
 
-	err = mlx5_core_create_rqt(mdev, in, inlen, rqtn);
+	err = mlx5_core_create_rqt(mdev, in, inlen, &rqt->rqtn);
+	if (!err)
+		rqt->enabled = true;
 
 	kvfree(in);
 	return err;
 }
 
-static void mlx5e_destroy_rqt(struct mlx5e_priv *priv, u32 rqtn)
+void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt)
 {
-	mlx5_core_destroy_rqt(priv->mdev, rqtn);
+	rqt->enabled = false;
+	mlx5_core_destroy_rqt(priv->mdev, rqt->rqtn);
 }
 
-static int mlx5e_create_rqts(struct mlx5e_priv *priv)
+static int mlx5e_create_indirect_rqts(struct mlx5e_priv *priv)
 {
-	int nch = mlx5e_get_max_num_channels(priv->mdev);
-	u32 *rqtn;
+	struct mlx5e_rqt *rqt = &priv->indir_rqt;
+
+	return mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, 0, rqt);
+}
+
+int mlx5e_create_direct_rqts(struct mlx5e_priv *priv)
+{
+	struct mlx5e_rqt *rqt;
 	int err;
 	int ix;
 
-	/* Indirect RQT */
-	rqtn = &priv->indir_rqtn;
-	err = mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, 0, rqtn);
-	if (err)
-		return err;
-
-	/* Direct RQTs */
-	for (ix = 0; ix < nch; ix++) {
-		rqtn = &priv->direct_tir[ix].rqtn;
-		err = mlx5e_create_rqt(priv, 1 /*size */, ix, rqtn);
+	for (ix = 0; ix < priv->profile->max_nch(priv->mdev); ix++) {
+		rqt = &priv->direct_tir[ix].rqt;
+		err = mlx5e_create_rqt(priv, 1 /*size */, ix, rqt);
 		if (err)
 			goto err_destroy_rqts;
 	}
@@ -1546,24 +1599,11 @@ static int mlx5e_create_rqts(struct mlx5e_priv *priv)
 
 err_destroy_rqts:
 	for (ix--; ix >= 0; ix--)
-		mlx5e_destroy_rqt(priv, priv->direct_tir[ix].rqtn);
-
-	mlx5e_destroy_rqt(priv, priv->indir_rqtn);
+		mlx5e_destroy_rqt(priv, &priv->direct_tir[ix].rqt);
 
 	return err;
 }
 
-static void mlx5e_destroy_rqts(struct mlx5e_priv *priv)
-{
-	int nch = mlx5e_get_max_num_channels(priv->mdev);
-	int i;
-
-	for (i = 0; i < nch; i++)
-		mlx5e_destroy_rqt(priv, priv->direct_tir[i].rqtn);
-
-	mlx5e_destroy_rqt(priv, priv->indir_rqtn);
-}
-
 int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, int ix)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
@@ -1599,10 +1639,15 @@ static void mlx5e_redirect_rqts(struct mlx5e_priv *priv)
 	u32 rqtn;
 	int ix;
 
-	rqtn = priv->indir_rqtn;
-	mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, 0);
+	if (priv->indir_rqt.enabled) {
+		rqtn = priv->indir_rqt.rqtn;
+		mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, 0);
+	}
+
 	for (ix = 0; ix < priv->params.num_channels; ix++) {
-		rqtn = priv->direct_tir[ix].rqtn;
+		if (!priv->direct_tir[ix].rqt.enabled)
+			continue;
+		rqtn = priv->direct_tir[ix].rqt.rqtn;
 		mlx5e_redirect_rqt(priv, rqtn, 1, ix);
 	}
 }
@@ -1662,13 +1707,13 @@ static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv)
 	mlx5e_build_tir_ctx_lro(tirc, priv);
 
 	for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
-		err = mlx5_core_modify_tir(mdev, priv->indir_tirn[tt], in,
+		err = mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in,
 					   inlen);
 		if (err)
 			goto free_in;
 	}
 
-	for (ix = 0; ix < mlx5e_get_max_num_channels(mdev); ix++) {
+	for (ix = 0; ix < priv->profile->max_nch(priv->mdev); ix++) {
 		err = mlx5_core_modify_tir(mdev, priv->direct_tir[ix].tirn,
 					   in, inlen);
 		if (err)
@@ -1681,40 +1726,6 @@ free_in:
 	return err;
 }
 
-static int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5e_priv *priv)
-{
-	void *in;
-	int inlen;
-	int err;
-	int i;
-
-	inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
-	in = mlx5_vzalloc(inlen);
-	if (!in)
-		return -ENOMEM;
-
-	MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1);
-
-	for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) {
-		err = mlx5_core_modify_tir(priv->mdev, priv->indir_tirn[i], in,
-					   inlen);
-		if (err)
-			return err;
-	}
-
-	for (i = 0; i < priv->params.num_channels; i++) {
-		err = mlx5_core_modify_tir(priv->mdev,
-					   priv->direct_tir[i].tirn, in,
-					   inlen);
-		if (err)
-			return err;
-	}
-
-	kvfree(in);
-
-	return 0;
-}
-
 static int mlx5e_set_mtu(struct mlx5e_priv *priv, u16 mtu)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
@@ -1776,13 +1787,17 @@ static void mlx5e_netdev_set_tcs(struct net_device *netdev)
 
 	netdev_set_num_tc(netdev, ntc);
 
+	/* Map netdev TCs to offset 0
+	 * We have our own UP to TXQ mapping for QoS
+	 */
 	for (tc = 0; tc < ntc; tc++)
-		netdev_set_tc_queue(netdev, tc, nch, tc * nch);
+		netdev_set_tc_queue(netdev, tc, nch, 0);
 }
 
 int mlx5e_open_locked(struct net_device *netdev)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5_core_dev *mdev = priv->mdev;
 	int num_txqs;
 	int err;
 
@@ -1805,7 +1820,7 @@ int mlx5e_open_locked(struct net_device *netdev)
 		goto err_clear_state_opened_flag;
 	}
 
-	err = mlx5e_refresh_tirs_self_loopback_enable(priv);
+	err = mlx5e_refresh_tirs_self_loopback_enable(priv->mdev);
 	if (err) {
 		netdev_err(netdev, "%s: mlx5e_refresh_tirs_self_loopback_enable failed, %d\n",
 			   __func__, err);
@@ -1818,9 +1833,14 @@ int mlx5e_open_locked(struct net_device *netdev)
 #ifdef CONFIG_RFS_ACCEL
 	priv->netdev->rx_cpu_rmap = priv->mdev->rmap;
 #endif
+	if (priv->profile->update_stats)
+		queue_delayed_work(priv->wq, &priv->update_stats_work, 0);
 
-	queue_delayed_work(priv->wq, &priv->update_stats_work, 0);
-
+	if (MLX5_CAP_GEN(mdev, vport_group_manager)) {
+		err = mlx5e_add_sqs_fwd_rules(priv);
+		if (err)
+			goto err_close_channels;
+	}
 	return 0;
 
 err_close_channels:
@@ -1830,7 +1850,7 @@ err_clear_state_opened_flag:
 	return err;
 }
 
-static int mlx5e_open(struct net_device *netdev)
+int mlx5e_open(struct net_device *netdev)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	int err;
@@ -1845,6 +1865,7 @@ static int mlx5e_open(struct net_device *netdev)
 int mlx5e_close_locked(struct net_device *netdev)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5_core_dev *mdev = priv->mdev;
 
 	/* May already be CLOSED in case a previous configuration operation
 	 * (e.g RX/TX queue size change) that involves close&open failed.
@@ -1854,6 +1875,9 @@ int mlx5e_close_locked(struct net_device *netdev)
 
 	clear_bit(MLX5E_STATE_OPENED, &priv->state);
 
+	if (MLX5_CAP_GEN(mdev, vport_group_manager))
+		mlx5e_remove_sqs_fwd_rules(priv);
+
 	mlx5e_timestamp_cleanup(priv);
 	netif_carrier_off(priv->netdev);
 	mlx5e_redirect_rqts(priv);
@@ -1862,7 +1886,7 @@ int mlx5e_close_locked(struct net_device *netdev)
 	return 0;
 }
 
-static int mlx5e_close(struct net_device *netdev)
+int mlx5e_close(struct net_device *netdev)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	int err;
@@ -1921,7 +1945,7 @@ static int mlx5e_create_drop_cq(struct mlx5e_priv *priv,
 	mcq->comp       = mlx5e_completion_event;
 	mcq->event      = mlx5e_cq_error_event;
 	mcq->irqn       = irqn;
-	mcq->uar        = &priv->cq_uar;
+	mcq->uar        = &mdev->mlx5e_res.cq_uar;
 
 	cq->priv = priv;
 
@@ -1987,7 +2011,7 @@ static int mlx5e_create_tis(struct mlx5e_priv *priv, int tc)
 	memset(in, 0, sizeof(in));
 
 	MLX5_SET(tisc, tisc, prio, tc << 1);
-	MLX5_SET(tisc, tisc, transport_domain, priv->tdn);
+	MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.td.tdn);
 
 	return mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc]);
 }
@@ -1997,12 +2021,12 @@ static void mlx5e_destroy_tis(struct mlx5e_priv *priv, int tc)
 	mlx5_core_destroy_tis(priv->mdev, priv->tisn[tc]);
 }
 
-static int mlx5e_create_tises(struct mlx5e_priv *priv)
+int mlx5e_create_tises(struct mlx5e_priv *priv)
 {
 	int err;
 	int tc;
 
-	for (tc = 0; tc < MLX5E_MAX_NUM_TC; tc++) {
+	for (tc = 0; tc < priv->profile->max_tc; tc++) {
 		err = mlx5e_create_tis(priv, tc);
 		if (err)
 			goto err_close_tises;
@@ -2017,11 +2041,11 @@ err_close_tises:
 	return err;
 }
 
-static void mlx5e_destroy_tises(struct mlx5e_priv *priv)
+void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv)
 {
 	int tc;
 
-	for (tc = 0; tc < MLX5E_MAX_NUM_TC; tc++)
+	for (tc = 0; tc < priv->profile->max_tc; tc++)
 		mlx5e_destroy_tis(priv, tc);
 }
 
@@ -2030,7 +2054,7 @@ static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv, u32 *tirc,
 {
 	void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
 
-	MLX5_SET(tirc, tirc, transport_domain, priv->tdn);
+	MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn);
 
 #define MLX5_HASH_IP            (MLX5_HASH_FIELD_SEL_SRC_IP   |\
 				 MLX5_HASH_FIELD_SEL_DST_IP)
@@ -2047,7 +2071,7 @@ static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv, u32 *tirc,
 	mlx5e_build_tir_ctx_lro(tirc, priv);
 
 	MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
-	MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqtn);
+	MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn);
 	mlx5e_build_tir_ctx_hash(tirc, priv);
 
 	switch (tt) {
@@ -2137,7 +2161,7 @@ static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv, u32 *tirc,
 static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 *tirc,
 				       u32 rqtn)
 {
-	MLX5_SET(tirc, tirc, transport_domain, priv->tdn);
+	MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn);
 
 	mlx5e_build_tir_ctx_lro(tirc, priv);
 
@@ -2146,15 +2170,13 @@ static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 *tirc,
 	MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8);
 }
 
-static int mlx5e_create_tirs(struct mlx5e_priv *priv)
+static int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv)
 {
-	int nch = mlx5e_get_max_num_channels(priv->mdev);
+	struct mlx5e_tir *tir;
 	void *tirc;
 	int inlen;
-	u32 *tirn;
 	int err;
 	u32 *in;
-	int ix;
 	int tt;
 
 	inlen = MLX5_ST_SZ_BYTES(create_tir_in);
@@ -2162,25 +2184,51 @@ static int mlx5e_create_tirs(struct mlx5e_priv *priv)
 	if (!in)
 		return -ENOMEM;
 
-	/* indirect tirs */
 	for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
 		memset(in, 0, inlen);
-		tirn = &priv->indir_tirn[tt];
+		tir = &priv->indir_tir[tt];
 		tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
 		mlx5e_build_indir_tir_ctx(priv, tirc, tt);
-		err = mlx5_core_create_tir(priv->mdev, in, inlen, tirn);
+		err = mlx5e_create_tir(priv->mdev, tir, in, inlen);
 		if (err)
 			goto err_destroy_tirs;
 	}
 
-	/* direct tirs */
+	kvfree(in);
+
+	return 0;
+
+err_destroy_tirs:
+	for (tt--; tt >= 0; tt--)
+		mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[tt]);
+
+	kvfree(in);
+
+	return err;
+}
+
+int mlx5e_create_direct_tirs(struct mlx5e_priv *priv)
+{
+	int nch = priv->profile->max_nch(priv->mdev);
+	struct mlx5e_tir *tir;
+	void *tirc;
+	int inlen;
+	int err;
+	u32 *in;
+	int ix;
+
+	inlen = MLX5_ST_SZ_BYTES(create_tir_in);
+	in = mlx5_vzalloc(inlen);
+	if (!in)
+		return -ENOMEM;
+
 	for (ix = 0; ix < nch; ix++) {
 		memset(in, 0, inlen);
-		tirn = &priv->direct_tir[ix].tirn;
+		tir = &priv->direct_tir[ix];
 		tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
 		mlx5e_build_direct_tir_ctx(priv, tirc,
-					   priv->direct_tir[ix].rqtn);
-		err = mlx5_core_create_tir(priv->mdev, in, inlen, tirn);
+					   priv->direct_tir[ix].rqt.rqtn);
+		err = mlx5e_create_tir(priv->mdev, tir, in, inlen);
 		if (err)
 			goto err_destroy_ch_tirs;
 	}
@@ -2191,27 +2239,28 @@ static int mlx5e_create_tirs(struct mlx5e_priv *priv)
 
 err_destroy_ch_tirs:
 	for (ix--; ix >= 0; ix--)
-		mlx5_core_destroy_tir(priv->mdev, priv->direct_tir[ix].tirn);
-
-err_destroy_tirs:
-	for (tt--; tt >= 0; tt--)
-		mlx5_core_destroy_tir(priv->mdev, priv->indir_tirn[tt]);
+		mlx5e_destroy_tir(priv->mdev, &priv->direct_tir[ix]);
 
 	kvfree(in);
 
 	return err;
 }
 
-static void mlx5e_destroy_tirs(struct mlx5e_priv *priv)
+static void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv)
 {
-	int nch = mlx5e_get_max_num_channels(priv->mdev);
 	int i;
 
-	for (i = 0; i < nch; i++)
-		mlx5_core_destroy_tir(priv->mdev, priv->direct_tir[i].tirn);
-
 	for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++)
-		mlx5_core_destroy_tir(priv->mdev, priv->indir_tirn[i]);
+		mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[i]);
+}
+
+void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv)
+{
+	int nch = priv->profile->max_nch(priv->mdev);
+	int i;
+
+	for (i = 0; i < nch; i++)
+		mlx5e_destroy_tir(priv->mdev, &priv->direct_tir[i]);
 }
 
 int mlx5e_modify_rqs_vsd(struct mlx5e_priv *priv, bool vsd)
@@ -2285,7 +2334,7 @@ mqprio:
 	return mlx5e_setup_tc(dev, tc->tc);
 }
 
-static struct rtnl_link_stats64 *
+struct rtnl_link_stats64 *
 mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
@@ -2714,6 +2763,29 @@ static netdev_features_t mlx5e_features_check(struct sk_buff *skb,
 	return features;
 }
 
+static void mlx5e_tx_timeout(struct net_device *dev)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	bool sched_work = false;
+	int i;
+
+	netdev_err(dev, "TX timeout detected\n");
+
+	for (i = 0; i < priv->params.num_channels * priv->params.num_tc; i++) {
+		struct mlx5e_sq *sq = priv->txq_to_sq_map[i];
+
+		if (!netif_tx_queue_stopped(netdev_get_tx_queue(dev, i)))
+			continue;
+		sched_work = true;
+		set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state);
+		netdev_err(dev, "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x\n",
+			   i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc);
+	}
+
+	if (sched_work && test_bit(MLX5E_STATE_OPENED, &priv->state))
+		schedule_work(&priv->tx_timeout_work);
+}
+
 static const struct net_device_ops mlx5e_netdev_ops_basic = {
 	.ndo_open                = mlx5e_open,
 	.ndo_stop                = mlx5e_close,
@@ -2732,6 +2804,7 @@ static const struct net_device_ops mlx5e_netdev_ops_basic = {
 #ifdef CONFIG_RFS_ACCEL
 	.ndo_rx_flow_steer	 = mlx5e_rx_flow_steer,
 #endif
+	.ndo_tx_timeout          = mlx5e_tx_timeout,
 };
 
 static const struct net_device_ops mlx5e_netdev_ops_sriov = {
@@ -2762,6 +2835,7 @@ static const struct net_device_ops mlx5e_netdev_ops_sriov = {
 	.ndo_get_vf_config       = mlx5e_get_vf_config,
 	.ndo_set_vf_link_state   = mlx5e_set_vf_link_state,
 	.ndo_get_vf_stats        = mlx5e_get_vf_stats,
+	.ndo_tx_timeout          = mlx5e_tx_timeout,
 };
 
 static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
@@ -2893,9 +2967,10 @@ void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
 			MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE;
 }
 
-static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev,
-				    struct net_device *netdev,
-				    int num_channels)
+static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev,
+					struct net_device *netdev,
+					const struct mlx5e_profile *profile,
+					void *ppriv)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	u32 link_speed = 0;
@@ -2964,7 +3039,7 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev,
 			    sizeof(priv->params.toeplitz_hash_key));
 
 	mlx5e_build_default_indir_rqt(mdev, priv->params.indirection_rqt,
-				      MLX5E_INDIR_RQT_SIZE, num_channels);
+				      MLX5E_INDIR_RQT_SIZE, profile->max_nch(mdev));
 
 	priv->params.lro_wqe_sz            =
 		MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
@@ -2975,7 +3050,9 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev,
 
 	priv->mdev                         = mdev;
 	priv->netdev                       = netdev;
-	priv->params.num_channels          = num_channels;
+	priv->params.num_channels          = profile->max_nch(mdev);
+	priv->profile                      = profile;
+	priv->ppriv                        = ppriv;
 
 #ifdef CONFIG_MLX5_CORE_EN_DCB
 	mlx5e_ets_init(priv);
@@ -2985,6 +3062,7 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev,
 
 	INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
 	INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
+	INIT_WORK(&priv->tx_timeout_work, mlx5e_tx_timeout_work);
 	INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
 }
 
@@ -3000,7 +3078,11 @@ static void mlx5e_set_netdev_dev_addr(struct net_device *netdev)
 	}
 }
 
-static void mlx5e_build_netdev(struct net_device *netdev)
+static const struct switchdev_ops mlx5e_switchdev_ops = {
+	.switchdev_port_attr_get	= mlx5e_attr_get,
+};
+
+static void mlx5e_build_nic_netdev(struct net_device *netdev)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct mlx5_core_dev *mdev = priv->mdev;
@@ -3081,31 +3163,11 @@ static void mlx5e_build_netdev(struct net_device *netdev)
 	netdev->priv_flags       |= IFF_UNICAST_FLT;
 
 	mlx5e_set_netdev_dev_addr(netdev);
-}
 
-static int mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn,
-			     struct mlx5_core_mkey *mkey)
-{
-	struct mlx5_core_dev *mdev = priv->mdev;
-	struct mlx5_create_mkey_mbox_in *in;
-	int err;
-
-	in = mlx5_vzalloc(sizeof(*in));
-	if (!in)
-		return -ENOMEM;
-
-	in->seg.flags = MLX5_PERM_LOCAL_WRITE |
-			MLX5_PERM_LOCAL_READ  |
-			MLX5_ACCESS_MODE_PA;
-	in->seg.flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64);
-	in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
-
-	err = mlx5_core_create_mkey(mdev, mkey, in, sizeof(*in), NULL, NULL,
-				    NULL);
-
-	kvfree(in);
-
-	return err;
+#ifdef CONFIG_NET_SWITCHDEV
+	if (MLX5_CAP_GEN(mdev, vport_group_manager))
+		netdev->switchdev_ops = &mlx5e_switchdev_ops;
+#endif
 }
 
 static void mlx5e_create_q_counter(struct mlx5e_priv *priv)
@@ -3135,7 +3197,7 @@ static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv)
 	struct mlx5_mkey_seg *mkc;
 	int inlen = sizeof(*in);
 	u64 npages =
-		mlx5e_get_max_num_channels(mdev) * MLX5_CHANNEL_MAX_NUM_MTTS;
+		priv->profile->max_nch(mdev) * MLX5_CHANNEL_MAX_NUM_MTTS;
 	int err;
 
 	in = mlx5_vzalloc(inlen);
@@ -3150,7 +3212,7 @@ static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv)
 		     MLX5_ACCESS_MODE_MTT;
 
 	mkc->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
-	mkc->flags_pd = cpu_to_be32(priv->pdn);
+	mkc->flags_pd = cpu_to_be32(mdev->mlx5e_res.pdn);
 	mkc->len = cpu_to_be64(npages << PAGE_SHIFT);
 	mkc->xlt_oct_size = cpu_to_be32(mlx5e_get_mtt_octw(npages));
 	mkc->log2_page_size = PAGE_SHIFT;
@@ -3163,160 +3225,233 @@ static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv)
 	return err;
 }
 
-static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev)
+static void mlx5e_nic_init(struct mlx5_core_dev *mdev,
+			   struct net_device *netdev,
+			   const struct mlx5e_profile *profile,
+			   void *ppriv)
 {
-	struct net_device *netdev;
-	struct mlx5e_priv *priv;
-	int nch = mlx5e_get_max_num_channels(mdev);
-	int err;
-
-	if (mlx5e_check_required_hca_cap(mdev))
-		return NULL;
+	struct mlx5e_priv *priv = netdev_priv(netdev);
 
-	netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv),
-				    nch * MLX5E_MAX_NUM_TC,
-				    nch);
-	if (!netdev) {
-		mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n");
-		return NULL;
-	}
+	mlx5e_build_nic_netdev_priv(mdev, netdev, profile, ppriv);
+	mlx5e_build_nic_netdev(netdev);
+	mlx5e_vxlan_init(priv);
+}
 
-	mlx5e_build_netdev_priv(mdev, netdev, nch);
-	mlx5e_build_netdev(netdev);
+static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
+{
+	struct mlx5_core_dev *mdev = priv->mdev;
+	struct mlx5_eswitch *esw = mdev->priv.eswitch;
 
-	netif_carrier_off(netdev);
+	mlx5e_vxlan_cleanup(priv);
 
-	priv = netdev_priv(netdev);
+	if (MLX5_CAP_GEN(mdev, vport_group_manager))
+		mlx5_eswitch_unregister_vport_rep(esw, 0);
+}
 
-	priv->wq = create_singlethread_workqueue("mlx5e");
-	if (!priv->wq)
-		goto err_free_netdev;
+static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
+{
+	struct mlx5_core_dev *mdev = priv->mdev;
+	int err;
+	int i;
 
-	err = mlx5_alloc_map_uar(mdev, &priv->cq_uar, false);
+	err = mlx5e_create_indirect_rqts(priv);
 	if (err) {
-		mlx5_core_err(mdev, "alloc_map uar failed, %d\n", err);
-		goto err_destroy_wq;
+		mlx5_core_warn(mdev, "create indirect rqts failed, %d\n", err);
+		return err;
 	}
 
-	err = mlx5_core_alloc_pd(mdev, &priv->pdn);
+	err = mlx5e_create_direct_rqts(priv);
 	if (err) {
-		mlx5_core_err(mdev, "alloc pd failed, %d\n", err);
-		goto err_unmap_free_uar;
+		mlx5_core_warn(mdev, "create direct rqts failed, %d\n", err);
+		goto err_destroy_indirect_rqts;
 	}
 
-	err = mlx5_core_alloc_transport_domain(mdev, &priv->tdn);
+	err = mlx5e_create_indirect_tirs(priv);
 	if (err) {
-		mlx5_core_err(mdev, "alloc td failed, %d\n", err);
-		goto err_dealloc_pd;
+		mlx5_core_warn(mdev, "create indirect tirs failed, %d\n", err);
+		goto err_destroy_direct_rqts;
 	}
 
-	err = mlx5e_create_mkey(priv, priv->pdn, &priv->mkey);
+	err = mlx5e_create_direct_tirs(priv);
 	if (err) {
-		mlx5_core_err(mdev, "create mkey failed, %d\n", err);
-		goto err_dealloc_transport_domain;
+		mlx5_core_warn(mdev, "create direct tirs failed, %d\n", err);
+		goto err_destroy_indirect_tirs;
 	}
 
-	err = mlx5e_create_umr_mkey(priv);
+	err = mlx5e_create_flow_steering(priv);
 	if (err) {
-		mlx5_core_err(mdev, "create umr mkey failed, %d\n", err);
-		goto err_destroy_mkey;
+		mlx5_core_warn(mdev, "create flow steering failed, %d\n", err);
+		goto err_destroy_direct_tirs;
 	}
 
+	err = mlx5e_tc_init(priv);
+	if (err)
+		goto err_destroy_flow_steering;
+
+	return 0;
+
+err_destroy_flow_steering:
+	mlx5e_destroy_flow_steering(priv);
+err_destroy_direct_tirs:
+	mlx5e_destroy_direct_tirs(priv);
+err_destroy_indirect_tirs:
+	mlx5e_destroy_indirect_tirs(priv);
+err_destroy_direct_rqts:
+	for (i = 0; i < priv->profile->max_nch(mdev); i++)
+		mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt);
+err_destroy_indirect_rqts:
+	mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+	return err;
+}
+
+static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv)
+{
+	int i;
+
+	mlx5e_tc_cleanup(priv);
+	mlx5e_destroy_flow_steering(priv);
+	mlx5e_destroy_direct_tirs(priv);
+	mlx5e_destroy_indirect_tirs(priv);
+	for (i = 0; i < priv->profile->max_nch(priv->mdev); i++)
+		mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt);
+	mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+}
+
+static int mlx5e_init_nic_tx(struct mlx5e_priv *priv)
+{
+	int err;
+
 	err = mlx5e_create_tises(priv);
 	if (err) {
-		mlx5_core_warn(mdev, "create tises failed, %d\n", err);
-		goto err_destroy_umr_mkey;
+		mlx5_core_warn(priv->mdev, "create tises failed, %d\n", err);
+		return err;
 	}
 
-	err = mlx5e_open_drop_rq(priv);
-	if (err) {
-		mlx5_core_err(mdev, "open drop rq failed, %d\n", err);
-		goto err_destroy_tises;
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+	mlx5e_dcbnl_ieee_setets_core(priv, &priv->params.ets);
+#endif
+	return 0;
+}
+
+static void mlx5e_nic_enable(struct mlx5e_priv *priv)
+{
+	struct net_device *netdev = priv->netdev;
+	struct mlx5_core_dev *mdev = priv->mdev;
+	struct mlx5_eswitch *esw = mdev->priv.eswitch;
+	struct mlx5_eswitch_rep rep;
+
+	if (mlx5e_vxlan_allowed(mdev)) {
+		rtnl_lock();
+		udp_tunnel_get_rx_info(netdev);
+		rtnl_unlock();
 	}
 
-	err = mlx5e_create_rqts(priv);
-	if (err) {
-		mlx5_core_warn(mdev, "create rqts failed, %d\n", err);
-		goto err_close_drop_rq;
+	mlx5e_enable_async_events(priv);
+	queue_work(priv->wq, &priv->set_rx_mode_work);
+
+	if (MLX5_CAP_GEN(mdev, vport_group_manager)) {
+		rep.load = mlx5e_nic_rep_load;
+		rep.unload = mlx5e_nic_rep_unload;
+		rep.vport = 0;
+		rep.priv_data = priv;
+		mlx5_eswitch_register_vport_rep(esw, &rep);
 	}
+}
 
-	err = mlx5e_create_tirs(priv);
-	if (err) {
-		mlx5_core_warn(mdev, "create tirs failed, %d\n", err);
-		goto err_destroy_rqts;
+static void mlx5e_nic_disable(struct mlx5e_priv *priv)
+{
+	queue_work(priv->wq, &priv->set_rx_mode_work);
+	mlx5e_disable_async_events(priv);
+}
+
+static const struct mlx5e_profile mlx5e_nic_profile = {
+	.init		   = mlx5e_nic_init,
+	.cleanup	   = mlx5e_nic_cleanup,
+	.init_rx	   = mlx5e_init_nic_rx,
+	.cleanup_rx	   = mlx5e_cleanup_nic_rx,
+	.init_tx	   = mlx5e_init_nic_tx,
+	.cleanup_tx	   = mlx5e_cleanup_nic_tx,
+	.enable		   = mlx5e_nic_enable,
+	.disable	   = mlx5e_nic_disable,
+	.update_stats	   = mlx5e_update_stats,
+	.max_nch	   = mlx5e_get_max_num_channels,
+	.max_tc		   = MLX5E_MAX_NUM_TC,
+};
+
+void *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
+			  const struct mlx5e_profile *profile, void *ppriv)
+{
+	struct net_device *netdev;
+	struct mlx5e_priv *priv;
+	int nch = profile->max_nch(mdev);
+	int err;
+
+	netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv),
+				    nch * profile->max_tc,
+				    nch);
+	if (!netdev) {
+		mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n");
+		return NULL;
 	}
 
-	err = mlx5e_create_flow_steering(priv);
+	profile->init(mdev, netdev, profile, ppriv);
+
+	netif_carrier_off(netdev);
+
+	priv = netdev_priv(netdev);
+
+	priv->wq = create_singlethread_workqueue("mlx5e");
+	if (!priv->wq)
+		goto err_free_netdev;
+
+	err = mlx5e_create_umr_mkey(priv);
 	if (err) {
-		mlx5_core_warn(mdev, "create flow steering failed, %d\n", err);
-		goto err_destroy_tirs;
+		mlx5_core_err(mdev, "create umr mkey failed, %d\n", err);
+		goto err_destroy_wq;
 	}
 
-	mlx5e_create_q_counter(priv);
-
-	mlx5e_init_l2_addr(priv);
+	err = profile->init_tx(priv);
+	if (err)
+		goto err_destroy_umr_mkey;
 
-	mlx5e_vxlan_init(priv);
+	err = mlx5e_open_drop_rq(priv);
+	if (err) {
+		mlx5_core_err(mdev, "open drop rq failed, %d\n", err);
+		goto err_cleanup_tx;
+	}
 
-	err = mlx5e_tc_init(priv);
+	err = profile->init_rx(priv);
 	if (err)
-		goto err_dealloc_q_counters;
+		goto err_close_drop_rq;
 
-#ifdef CONFIG_MLX5_CORE_EN_DCB
-	mlx5e_dcbnl_ieee_setets_core(priv, &priv->params.ets);
-#endif
+	mlx5e_create_q_counter(priv);
+
+	mlx5e_init_l2_addr(priv);
 
 	err = register_netdev(netdev);
 	if (err) {
 		mlx5_core_err(mdev, "register_netdev failed, %d\n", err);
-		goto err_tc_cleanup;
-	}
-
-	if (mlx5e_vxlan_allowed(mdev)) {
-		rtnl_lock();
-		udp_tunnel_get_rx_info(netdev);
-		rtnl_unlock();
+		goto err_dealloc_q_counters;
 	}
 
-	mlx5e_enable_async_events(priv);
-	queue_work(priv->wq, &priv->set_rx_mode_work);
+	if (profile->enable)
+		profile->enable(priv);
 
 	return priv;
 
-err_tc_cleanup:
-	mlx5e_tc_cleanup(priv);
-
 err_dealloc_q_counters:
 	mlx5e_destroy_q_counter(priv);
-	mlx5e_destroy_flow_steering(priv);
-
-err_destroy_tirs:
-	mlx5e_destroy_tirs(priv);
-
-err_destroy_rqts:
-	mlx5e_destroy_rqts(priv);
+	profile->cleanup_rx(priv);
 
 err_close_drop_rq:
 	mlx5e_close_drop_rq(priv);
 
-err_destroy_tises:
-	mlx5e_destroy_tises(priv);
+err_cleanup_tx:
+	profile->cleanup_tx(priv);
 
 err_destroy_umr_mkey:
 	mlx5_core_destroy_mkey(mdev, &priv->umr_mkey);
 
-err_destroy_mkey:
-	mlx5_core_destroy_mkey(mdev, &priv->mkey);
-
-err_dealloc_transport_domain:
-	mlx5_core_dealloc_transport_domain(mdev, priv->tdn);
-
-err_dealloc_pd:
-	mlx5_core_dealloc_pd(mdev, priv->pdn);
-
-err_unmap_free_uar:
-	mlx5_unmap_free_uar(mdev, &priv->cq_uar);
-
 err_destroy_wq:
 	destroy_workqueue(priv->wq);
 
@@ -3326,15 +3461,59 @@ err_free_netdev:
 	return NULL;
 }
 
-static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, void *vpriv)
+static void mlx5e_register_vport_rep(struct mlx5_core_dev *mdev)
 {
-	struct mlx5e_priv *priv = vpriv;
+	struct mlx5_eswitch *esw = mdev->priv.eswitch;
+	int total_vfs = MLX5_TOTAL_VPORTS(mdev);
+	int vport;
+
+	if (!MLX5_CAP_GEN(mdev, vport_group_manager))
+		return;
+
+	for (vport = 1; vport < total_vfs; vport++) {
+		struct mlx5_eswitch_rep rep;
+
+		rep.load = mlx5e_vport_rep_load;
+		rep.unload = mlx5e_vport_rep_unload;
+		rep.vport = vport;
+		mlx5_eswitch_register_vport_rep(esw, &rep);
+	}
+}
+
+static void *mlx5e_add(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_eswitch *esw = mdev->priv.eswitch;
+	void *ppriv = NULL;
+	void *ret;
+
+	if (mlx5e_check_required_hca_cap(mdev))
+		return NULL;
+
+	if (mlx5e_create_mdev_resources(mdev))
+		return NULL;
+
+	mlx5e_register_vport_rep(mdev);
+
+	if (MLX5_CAP_GEN(mdev, vport_group_manager))
+		ppriv = &esw->offloads.vport_reps[0];
+
+	ret = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, ppriv);
+	if (!ret) {
+		mlx5e_destroy_mdev_resources(mdev);
+		return NULL;
+	}
+	return ret;
+}
+
+void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv)
+{
+	const struct mlx5e_profile *profile = priv->profile;
 	struct net_device *netdev = priv->netdev;
 
 	set_bit(MLX5E_STATE_DESTROYING, &priv->state);
+	if (profile->disable)
+		profile->disable(priv);
 
-	queue_work(priv->wq, &priv->set_rx_mode_work);
-	mlx5e_disable_async_events(priv);
 	flush_workqueue(priv->wq);
 	if (test_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &mdev->intf_state)) {
 		netif_device_detach(netdev);
@@ -3343,26 +3522,35 @@ static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, void *vpriv)
 		unregister_netdev(netdev);
 	}
 
-	mlx5e_tc_cleanup(priv);
-	mlx5e_vxlan_cleanup(priv);
 	mlx5e_destroy_q_counter(priv);
-	mlx5e_destroy_flow_steering(priv);
-	mlx5e_destroy_tirs(priv);
-	mlx5e_destroy_rqts(priv);
+	profile->cleanup_rx(priv);
 	mlx5e_close_drop_rq(priv);
-	mlx5e_destroy_tises(priv);
+	profile->cleanup_tx(priv);
 	mlx5_core_destroy_mkey(priv->mdev, &priv->umr_mkey);
-	mlx5_core_destroy_mkey(priv->mdev, &priv->mkey);
-	mlx5_core_dealloc_transport_domain(priv->mdev, priv->tdn);
-	mlx5_core_dealloc_pd(priv->mdev, priv->pdn);
-	mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar);
 	cancel_delayed_work_sync(&priv->update_stats_work);
 	destroy_workqueue(priv->wq);
+	if (profile->cleanup)
+		profile->cleanup(priv);
 
 	if (!test_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &mdev->intf_state))
 		free_netdev(netdev);
 }
 
+static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv)
+{
+	struct mlx5_eswitch *esw = mdev->priv.eswitch;
+	int total_vfs = MLX5_TOTAL_VPORTS(mdev);
+	struct mlx5e_priv *priv = vpriv;
+	int vport;
+
+	mlx5e_destroy_netdev(mdev, priv);
+
+	for (vport = 1; vport < total_vfs; vport++)
+		mlx5_eswitch_unregister_vport_rep(esw, vport);
+
+	mlx5e_destroy_mdev_resources(mdev);
+}
+
 static void *mlx5e_get_netdev(void *vpriv)
 {
 	struct mlx5e_priv *priv = vpriv;
@@ -3371,8 +3559,8 @@ static void *mlx5e_get_netdev(void *vpriv)
 }
 
 static struct mlx5_interface mlx5e_interface = {
-	.add       = mlx5e_create_netdev,
-	.remove    = mlx5e_destroy_netdev,
+	.add       = mlx5e_add,
+	.remove    = mlx5e_remove,
 	.event     = mlx5e_async_event,
 	.protocol  = MLX5_INTERFACE_PROTOCOL_ETH,
 	.get_dev   = mlx5e_get_netdev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
new file mode 100644
index 000000000000..1c7d8b8314bf
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -0,0 +1,432 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <generated/utsrelease.h>
+#include <linux/mlx5/fs.h>
+#include <net/switchdev.h>
+#include <net/pkt_cls.h>
+
+#include "eswitch.h"
+#include "en.h"
+#include "en_tc.h"
+
+static const char mlx5e_rep_driver_name[] = "mlx5e_rep";
+
+static void mlx5e_rep_get_drvinfo(struct net_device *dev,
+				  struct ethtool_drvinfo *drvinfo)
+{
+	strlcpy(drvinfo->driver, mlx5e_rep_driver_name,
+		sizeof(drvinfo->driver));
+	strlcpy(drvinfo->version, UTS_RELEASE, sizeof(drvinfo->version));
+}
+
+static const struct counter_desc sw_rep_stats_desc[] = {
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_packets) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_packets) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_bytes) },
+};
+
+#define NUM_VPORT_REP_COUNTERS	ARRAY_SIZE(sw_rep_stats_desc)
+
+static void mlx5e_rep_get_strings(struct net_device *dev,
+				  u32 stringset, uint8_t *data)
+{
+	int i;
+
+	switch (stringset) {
+	case ETH_SS_STATS:
+		for (i = 0; i < NUM_VPORT_REP_COUNTERS; i++)
+			strcpy(data + (i * ETH_GSTRING_LEN),
+			       sw_rep_stats_desc[i].format);
+		break;
+	}
+}
+
+static void mlx5e_update_sw_rep_counters(struct mlx5e_priv *priv)
+{
+	struct mlx5e_sw_stats *s = &priv->stats.sw;
+	struct mlx5e_rq_stats *rq_stats;
+	struct mlx5e_sq_stats *sq_stats;
+	int i, j;
+
+	memset(s, 0, sizeof(*s));
+	for (i = 0; i < priv->params.num_channels; i++) {
+		rq_stats = &priv->channel[i]->rq.stats;
+
+		s->rx_packets	+= rq_stats->packets;
+		s->rx_bytes	+= rq_stats->bytes;
+
+		for (j = 0; j < priv->params.num_tc; j++) {
+			sq_stats = &priv->channel[i]->sq[j].stats;
+
+			s->tx_packets		+= sq_stats->packets;
+			s->tx_bytes		+= sq_stats->bytes;
+		}
+	}
+}
+
+static void mlx5e_rep_get_ethtool_stats(struct net_device *dev,
+					struct ethtool_stats *stats, u64 *data)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	int i;
+
+	if (!data)
+		return;
+
+	mutex_lock(&priv->state_lock);
+	if (test_bit(MLX5E_STATE_OPENED, &priv->state))
+		mlx5e_update_sw_rep_counters(priv);
+	mutex_unlock(&priv->state_lock);
+
+	for (i = 0; i < NUM_VPORT_REP_COUNTERS; i++)
+		data[i] = MLX5E_READ_CTR64_CPU(&priv->stats.sw,
+					       sw_rep_stats_desc, i);
+}
+
+static int mlx5e_rep_get_sset_count(struct net_device *dev, int sset)
+{
+	switch (sset) {
+	case ETH_SS_STATS:
+		return NUM_VPORT_REP_COUNTERS;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static const struct ethtool_ops mlx5e_rep_ethtool_ops = {
+	.get_drvinfo	   = mlx5e_rep_get_drvinfo,
+	.get_link	   = ethtool_op_get_link,
+	.get_strings       = mlx5e_rep_get_strings,
+	.get_sset_count    = mlx5e_rep_get_sset_count,
+	.get_ethtool_stats = mlx5e_rep_get_ethtool_stats,
+};
+
+int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	u8 mac[ETH_ALEN];
+
+	if (esw->mode == SRIOV_NONE)
+		return -EOPNOTSUPP;
+
+	switch (attr->id) {
+	case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
+		mlx5_query_nic_vport_mac_address(priv->mdev, 0, mac);
+		attr->u.ppid.id_len = ETH_ALEN;
+		memcpy(&attr->u.ppid.id, &mac, ETH_ALEN);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv)
+
+{
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	struct mlx5_eswitch_rep *rep = priv->ppriv;
+	struct mlx5e_channel *c;
+	int n, tc, err, num_sqs = 0;
+	u16 *sqs;
+
+	sqs = kcalloc(priv->params.num_channels * priv->params.num_tc, sizeof(u16), GFP_KERNEL);
+	if (!sqs)
+		return -ENOMEM;
+
+	for (n = 0; n < priv->params.num_channels; n++) {
+		c = priv->channel[n];
+		for (tc = 0; tc < c->num_tc; tc++)
+			sqs[num_sqs++] = c->sq[tc].sqn;
+	}
+
+	err = mlx5_eswitch_sqs2vport_start(esw, rep, sqs, num_sqs);
+
+	kfree(sqs);
+	return err;
+}
+
+int mlx5e_nic_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep)
+{
+	struct mlx5e_priv *priv = rep->priv_data;
+
+	if (test_bit(MLX5E_STATE_OPENED, &priv->state))
+		return mlx5e_add_sqs_fwd_rules(priv);
+	return 0;
+}
+
+void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv)
+{
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	struct mlx5_eswitch_rep *rep = priv->ppriv;
+
+	mlx5_eswitch_sqs2vport_stop(esw, rep);
+}
+
+void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw,
+			  struct mlx5_eswitch_rep *rep)
+{
+	struct mlx5e_priv *priv = rep->priv_data;
+
+	if (test_bit(MLX5E_STATE_OPENED, &priv->state))
+		mlx5e_remove_sqs_fwd_rules(priv);
+
+	/* clean (and re-init) existing uplink offloaded TC rules */
+	mlx5e_tc_cleanup(priv);
+	mlx5e_tc_init(priv);
+}
+
+static int mlx5e_rep_get_phys_port_name(struct net_device *dev,
+					char *buf, size_t len)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct mlx5_eswitch_rep *rep = priv->ppriv;
+	int ret;
+
+	ret = snprintf(buf, len, "%d", rep->vport - 1);
+	if (ret >= len)
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
+static int mlx5e_rep_ndo_setup_tc(struct net_device *dev, u32 handle,
+				  __be16 proto, struct tc_to_netdev *tc)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+
+	if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS))
+		return -EOPNOTSUPP;
+
+	switch (tc->type) {
+	case TC_SETUP_CLSFLOWER:
+		switch (tc->cls_flower->command) {
+		case TC_CLSFLOWER_REPLACE:
+			return mlx5e_configure_flower(priv, proto, tc->cls_flower);
+		case TC_CLSFLOWER_DESTROY:
+			return mlx5e_delete_flower(priv, tc->cls_flower);
+		case TC_CLSFLOWER_STATS:
+			return mlx5e_stats_flower(priv, tc->cls_flower);
+		}
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static const struct switchdev_ops mlx5e_rep_switchdev_ops = {
+	.switchdev_port_attr_get	= mlx5e_attr_get,
+};
+
+static const struct net_device_ops mlx5e_netdev_ops_rep = {
+	.ndo_open                = mlx5e_open,
+	.ndo_stop                = mlx5e_close,
+	.ndo_start_xmit          = mlx5e_xmit,
+	.ndo_get_phys_port_name  = mlx5e_rep_get_phys_port_name,
+	.ndo_setup_tc            = mlx5e_rep_ndo_setup_tc,
+	.ndo_get_stats64         = mlx5e_get_stats,
+};
+
+static void mlx5e_build_rep_netdev_priv(struct mlx5_core_dev *mdev,
+					struct net_device *netdev,
+					const struct mlx5e_profile *profile,
+					void *ppriv)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	u8 cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
+					 MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
+					 MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
+
+	priv->params.log_sq_size           =
+		MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
+	priv->params.rq_wq_type = MLX5_WQ_TYPE_LINKED_LIST;
+	priv->params.log_rq_size = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE;
+
+	priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type,
+					    BIT(priv->params.log_rq_size));
+
+	priv->params.rx_am_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
+	mlx5e_set_rx_cq_mode_params(&priv->params, cq_period_mode);
+
+	priv->params.tx_max_inline         = mlx5e_get_max_inline_cap(mdev);
+	priv->params.num_tc                = 1;
+
+	priv->params.lro_wqe_sz            =
+		MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
+
+	priv->mdev                         = mdev;
+	priv->netdev                       = netdev;
+	priv->params.num_channels          = profile->max_nch(mdev);
+	priv->profile                      = profile;
+	priv->ppriv                        = ppriv;
+
+	mutex_init(&priv->state_lock);
+
+	INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
+}
+
+static void mlx5e_build_rep_netdev(struct net_device *netdev)
+{
+	netdev->netdev_ops = &mlx5e_netdev_ops_rep;
+
+	netdev->watchdog_timeo    = 15 * HZ;
+
+	netdev->ethtool_ops	  = &mlx5e_rep_ethtool_ops;
+
+#ifdef CONFIG_NET_SWITCHDEV
+	netdev->switchdev_ops = &mlx5e_rep_switchdev_ops;
+#endif
+
+	netdev->features	 |= NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_TC;
+	netdev->hw_features      |= NETIF_F_HW_TC;
+
+	eth_hw_addr_random(netdev);
+}
+
+static void mlx5e_init_rep(struct mlx5_core_dev *mdev,
+			   struct net_device *netdev,
+			   const struct mlx5e_profile *profile,
+			   void *ppriv)
+{
+	mlx5e_build_rep_netdev_priv(mdev, netdev, profile, ppriv);
+	mlx5e_build_rep_netdev(netdev);
+}
+
+static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
+{
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	struct mlx5_eswitch_rep *rep = priv->ppriv;
+	struct mlx5_core_dev *mdev = priv->mdev;
+	struct mlx5_flow_rule *flow_rule;
+	int err;
+	int i;
+
+	err = mlx5e_create_direct_rqts(priv);
+	if (err) {
+		mlx5_core_warn(mdev, "create direct rqts failed, %d\n", err);
+		return err;
+	}
+
+	err = mlx5e_create_direct_tirs(priv);
+	if (err) {
+		mlx5_core_warn(mdev, "create direct tirs failed, %d\n", err);
+		goto err_destroy_direct_rqts;
+	}
+
+	flow_rule = mlx5_eswitch_create_vport_rx_rule(esw,
+						      rep->vport,
+						      priv->direct_tir[0].tirn);
+	if (IS_ERR(flow_rule)) {
+		err = PTR_ERR(flow_rule);
+		goto err_destroy_direct_tirs;
+	}
+	rep->vport_rx_rule = flow_rule;
+
+	err = mlx5e_tc_init(priv);
+	if (err)
+		goto err_del_flow_rule;
+
+	return 0;
+
+err_del_flow_rule:
+	mlx5_del_flow_rule(rep->vport_rx_rule);
+err_destroy_direct_tirs:
+	mlx5e_destroy_direct_tirs(priv);
+err_destroy_direct_rqts:
+	for (i = 0; i < priv->params.num_channels; i++)
+		mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt);
+	return err;
+}
+
+static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv)
+{
+	struct mlx5_eswitch_rep *rep = priv->ppriv;
+	int i;
+
+	mlx5e_tc_cleanup(priv);
+	mlx5_del_flow_rule(rep->vport_rx_rule);
+	mlx5e_destroy_direct_tirs(priv);
+	for (i = 0; i < priv->params.num_channels; i++)
+		mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt);
+}
+
+static int mlx5e_init_rep_tx(struct mlx5e_priv *priv)
+{
+	int err;
+
+	err = mlx5e_create_tises(priv);
+	if (err) {
+		mlx5_core_warn(priv->mdev, "create tises failed, %d\n", err);
+		return err;
+	}
+	return 0;
+}
+
+static int mlx5e_get_rep_max_num_channels(struct mlx5_core_dev *mdev)
+{
+#define	MLX5E_PORT_REPRESENTOR_NCH 1
+	return MLX5E_PORT_REPRESENTOR_NCH;
+}
+
+static struct mlx5e_profile mlx5e_rep_profile = {
+	.init			= mlx5e_init_rep,
+	.init_rx		= mlx5e_init_rep_rx,
+	.cleanup_rx		= mlx5e_cleanup_rep_rx,
+	.init_tx		= mlx5e_init_rep_tx,
+	.cleanup_tx		= mlx5e_cleanup_nic_tx,
+	.update_stats           = mlx5e_update_sw_rep_counters,
+	.max_nch		= mlx5e_get_rep_max_num_channels,
+	.max_tc			= 1,
+};
+
+int mlx5e_vport_rep_load(struct mlx5_eswitch *esw,
+			 struct mlx5_eswitch_rep *rep)
+{
+	rep->priv_data = mlx5e_create_netdev(esw->dev, &mlx5e_rep_profile, rep);
+	if (!rep->priv_data) {
+		pr_warn("Failed to create representor for vport %d\n",
+			rep->vport);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+void mlx5e_vport_rep_unload(struct mlx5_eswitch *esw,
+			    struct mlx5_eswitch_rep *rep)
+{
+	struct mlx5e_priv *priv = rep->priv_data;
+
+	mlx5e_destroy_netdev(esw->dev, priv);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index bd947704b59c..9f2a16a507e0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -212,6 +212,20 @@ err_free_skb:
 	return -ENOMEM;
 }
 
+void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix)
+{
+	struct sk_buff *skb = rq->skb[ix];
+
+	if (skb) {
+		rq->skb[ix] = NULL;
+		dma_unmap_single(rq->pdev,
+				 *((dma_addr_t *)skb->cb),
+				 rq->wqe_sz,
+				 DMA_FROM_DEVICE);
+		dev_kfree_skb(skb);
+	}
+}
+
 static inline int mlx5e_mpwqe_strides_per_page(struct mlx5e_rq *rq)
 {
 	return rq->mpwqe_num_strides >> MLX5_MPWRQ_WQE_PAGE_ORDER;
@@ -574,6 +588,30 @@ int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix)
 	return 0;
 }
 
+void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
+{
+	struct mlx5e_mpw_info *wi = &rq->wqe_info[ix];
+
+	wi->free_wqe(rq, wi);
+}
+
+void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
+{
+	struct mlx5_wq_ll *wq = &rq->wq;
+	struct mlx5e_rx_wqe *wqe;
+	__be16 wqe_ix_be;
+	u16 wqe_ix;
+
+	while (!mlx5_wq_ll_is_empty(wq)) {
+		wqe_ix_be = *wq->tail_next;
+		wqe_ix    = be16_to_cpu(wqe_ix_be);
+		wqe       = mlx5_wq_ll_get_wqe(&rq->wq, wqe_ix);
+		rq->dealloc_wqe(rq, wqe_ix);
+		mlx5_wq_ll_pop(&rq->wq, wqe_ix_be,
+			       &wqe->next.next_wqe_index);
+	}
+}
+
 #define RQ_CANNOT_POST(rq) \
 		(!test_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state) || \
 		 test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state))
@@ -689,7 +727,7 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
 	if (is_first_ethertype_ip(skb)) {
 		skb->ip_summed = CHECKSUM_COMPLETE;
 		skb->csum = csum_unfold((__force __sum16)cqe->check_sum);
-		rq->stats.csum_sw++;
+		rq->stats.csum_complete++;
 		return;
 	}
 
@@ -699,7 +737,7 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
 		if (cqe_is_tunneled(cqe)) {
 			skb->csum_level = 1;
 			skb->encapsulation = 1;
-			rq->stats.csum_inner++;
+			rq->stats.csum_unnecessary_inner++;
 		}
 		return;
 	}
@@ -878,6 +916,9 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
 	struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq);
 	int work_done = 0;
 
+	if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH_TIMEOUT, &rq->state)))
+		return 0;
+
 	if (cq->decmprs_left)
 		work_done += mlx5e_decompress_cqes_cont(rq, cq, 0, budget);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 83bc32b25849..7b9d8a989b52 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -42,9 +42,11 @@
 	be64_to_cpu(*(__be32 *)((char *)ptr + dsc[i].offset))
 
 #define MLX5E_DECLARE_STAT(type, fld) #fld, offsetof(type, fld)
+#define MLX5E_DECLARE_RX_STAT(type, fld) "rx%d_"#fld, offsetof(type, fld)
+#define MLX5E_DECLARE_TX_STAT(type, fld) "tx%d_"#fld, offsetof(type, fld)
 
 struct counter_desc {
-	char		name[ETH_GSTRING_LEN];
+	char		format[ETH_GSTRING_LEN];
 	int		offset; /* Byte offset */
 };
 
@@ -53,18 +55,18 @@ struct mlx5e_sw_stats {
 	u64 rx_bytes;
 	u64 tx_packets;
 	u64 tx_bytes;
-	u64 tso_packets;
-	u64 tso_bytes;
-	u64 tso_inner_packets;
-	u64 tso_inner_bytes;
-	u64 lro_packets;
-	u64 lro_bytes;
-	u64 rx_csum_good;
+	u64 tx_tso_packets;
+	u64 tx_tso_bytes;
+	u64 tx_tso_inner_packets;
+	u64 tx_tso_inner_bytes;
+	u64 rx_lro_packets;
+	u64 rx_lro_bytes;
+	u64 rx_csum_unnecessary;
 	u64 rx_csum_none;
-	u64 rx_csum_sw;
-	u64 rx_csum_inner;
-	u64 tx_csum_offload;
-	u64 tx_csum_inner;
+	u64 rx_csum_complete;
+	u64 rx_csum_unnecessary_inner;
+	u64 tx_csum_partial;
+	u64 tx_csum_partial_inner;
 	u64 tx_queue_stopped;
 	u64 tx_queue_wake;
 	u64 tx_queue_dropped;
@@ -76,7 +78,7 @@ struct mlx5e_sw_stats {
 	u64 rx_cqe_compress_pkts;
 
 	/* Special handling counters */
-	u64 link_down_events;
+	u64 link_down_events_phy;
 };
 
 static const struct counter_desc sw_stats_desc[] = {
@@ -84,18 +86,18 @@ static const struct counter_desc sw_stats_desc[] = {
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_packets) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_bytes) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tso_packets) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tso_bytes) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tso_inner_packets) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tso_inner_bytes) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, lro_packets) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, lro_bytes) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_good) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_packets) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_bytes) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_packets) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_bytes) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_packets) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_bytes) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_sw) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_inner) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_offload) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_inner) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial_inner) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_stopped) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_wake) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_dropped) },
@@ -105,7 +107,7 @@ static const struct counter_desc sw_stats_desc[] = {
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, link_down_events) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, link_down_events_phy) },
 };
 
 struct mlx5e_qcounter_stats {
@@ -125,12 +127,6 @@ struct mlx5e_vport_stats {
 };
 
 static const struct counter_desc vport_stats_desc[] = {
-	{ "rx_vport_error_packets",
-		VPORT_COUNTER_OFF(received_errors.packets) },
-	{ "rx_vport_error_bytes", VPORT_COUNTER_OFF(received_errors.octets) },
-	{ "tx_vport_error_packets",
-		VPORT_COUNTER_OFF(transmit_errors.packets) },
-	{ "tx_vport_error_bytes", VPORT_COUNTER_OFF(transmit_errors.octets) },
 	{ "rx_vport_unicast_packets",
 		VPORT_COUNTER_OFF(received_eth_unicast.packets) },
 	{ "rx_vport_unicast_bytes",
@@ -155,6 +151,22 @@ static const struct counter_desc vport_stats_desc[] = {
 		VPORT_COUNTER_OFF(transmitted_eth_broadcast.packets) },
 	{ "tx_vport_broadcast_bytes",
 		VPORT_COUNTER_OFF(transmitted_eth_broadcast.octets) },
+	{ "rx_vport_rdma_unicast_packets",
+		VPORT_COUNTER_OFF(received_ib_unicast.packets) },
+	{ "rx_vport_rdma_unicast_bytes",
+		VPORT_COUNTER_OFF(received_ib_unicast.octets) },
+	{ "tx_vport_rdma_unicast_packets",
+		VPORT_COUNTER_OFF(transmitted_ib_unicast.packets) },
+	{ "tx_vport_rdma_unicast_bytes",
+		VPORT_COUNTER_OFF(transmitted_ib_unicast.octets) },
+	{ "rx_vport_rdma_multicast_packets",
+		VPORT_COUNTER_OFF(received_ib_multicast.packets) },
+	{ "rx_vport_rdma_multicast_bytes",
+		VPORT_COUNTER_OFF(received_ib_multicast.octets) },
+	{ "tx_vport_rdma_multicast_packets",
+		VPORT_COUNTER_OFF(transmitted_ib_multicast.packets) },
+	{ "tx_vport_rdma_multicast_bytes",
+		VPORT_COUNTER_OFF(transmitted_ib_multicast.octets) },
 };
 
 #define PPORT_802_3_OFF(c) \
@@ -192,94 +204,69 @@ struct mlx5e_pport_stats {
 };
 
 static const struct counter_desc pport_802_3_stats_desc[] = {
-	{ "frames_tx", PPORT_802_3_OFF(a_frames_transmitted_ok) },
-	{ "frames_rx", PPORT_802_3_OFF(a_frames_received_ok) },
-	{ "check_seq_err", PPORT_802_3_OFF(a_frame_check_sequence_errors) },
-	{ "alignment_err", PPORT_802_3_OFF(a_alignment_errors) },
-	{ "octets_tx", PPORT_802_3_OFF(a_octets_transmitted_ok) },
-	{ "octets_received", PPORT_802_3_OFF(a_octets_received_ok) },
-	{ "multicast_xmitted", PPORT_802_3_OFF(a_multicast_frames_xmitted_ok) },
-	{ "broadcast_xmitted", PPORT_802_3_OFF(a_broadcast_frames_xmitted_ok) },
-	{ "multicast_rx", PPORT_802_3_OFF(a_multicast_frames_received_ok) },
-	{ "broadcast_rx", PPORT_802_3_OFF(a_broadcast_frames_received_ok) },
-	{ "in_range_len_errors", PPORT_802_3_OFF(a_in_range_length_errors) },
-	{ "out_of_range_len", PPORT_802_3_OFF(a_out_of_range_length_field) },
-	{ "too_long_errors", PPORT_802_3_OFF(a_frame_too_long_errors) },
-	{ "symbol_err", PPORT_802_3_OFF(a_symbol_error_during_carrier) },
-	{ "mac_control_tx", PPORT_802_3_OFF(a_mac_control_frames_transmitted) },
-	{ "mac_control_rx", PPORT_802_3_OFF(a_mac_control_frames_received) },
-	{ "unsupported_op_rx",
-		PPORT_802_3_OFF(a_unsupported_opcodes_received) },
-	{ "pause_ctrl_rx", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_received) },
-	{ "pause_ctrl_tx",
-		PPORT_802_3_OFF(a_pause_mac_ctrl_frames_transmitted) },
+	{ "tx_packets_phy", PPORT_802_3_OFF(a_frames_transmitted_ok) },
+	{ "rx_packets_phy", PPORT_802_3_OFF(a_frames_received_ok) },
+	{ "rx_crc_errors_phy", PPORT_802_3_OFF(a_frame_check_sequence_errors) },
+	{ "tx_bytes_phy", PPORT_802_3_OFF(a_octets_transmitted_ok) },
+	{ "rx_bytes_phy", PPORT_802_3_OFF(a_octets_received_ok) },
+	{ "tx_multicast_phy", PPORT_802_3_OFF(a_multicast_frames_xmitted_ok) },
+	{ "tx_broadcast_phy", PPORT_802_3_OFF(a_broadcast_frames_xmitted_ok) },
+	{ "rx_multicast_phy", PPORT_802_3_OFF(a_multicast_frames_received_ok) },
+	{ "rx_broadcast_phy", PPORT_802_3_OFF(a_broadcast_frames_received_ok) },
+	{ "rx_in_range_len_errors_phy", PPORT_802_3_OFF(a_in_range_length_errors) },
+	{ "rx_out_of_range_len_phy", PPORT_802_3_OFF(a_out_of_range_length_field) },
+	{ "rx_oversize_pkts_phy", PPORT_802_3_OFF(a_frame_too_long_errors) },
+	{ "rx_symbol_err_phy", PPORT_802_3_OFF(a_symbol_error_during_carrier) },
+	{ "tx_mac_control_phy", PPORT_802_3_OFF(a_mac_control_frames_transmitted) },
+	{ "rx_mac_control_phy", PPORT_802_3_OFF(a_mac_control_frames_received) },
+	{ "rx_unsupported_op_phy", PPORT_802_3_OFF(a_unsupported_opcodes_received) },
+	{ "rx_pause_ctrl_phy", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_received) },
+	{ "tx_pause_ctrl_phy", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_transmitted) },
 };
 
 static const struct counter_desc pport_2863_stats_desc[] = {
-	{ "in_octets", PPORT_2863_OFF(if_in_octets) },
-	{ "in_ucast_pkts", PPORT_2863_OFF(if_in_ucast_pkts) },
-	{ "in_discards", PPORT_2863_OFF(if_in_discards) },
-	{ "in_errors", PPORT_2863_OFF(if_in_errors) },
-	{ "in_unknown_protos", PPORT_2863_OFF(if_in_unknown_protos) },
-	{ "out_octets", PPORT_2863_OFF(if_out_octets) },
-	{ "out_ucast_pkts", PPORT_2863_OFF(if_out_ucast_pkts) },
-	{ "out_discards", PPORT_2863_OFF(if_out_discards) },
-	{ "out_errors", PPORT_2863_OFF(if_out_errors) },
-	{ "in_multicast_pkts", PPORT_2863_OFF(if_in_multicast_pkts) },
-	{ "in_broadcast_pkts", PPORT_2863_OFF(if_in_broadcast_pkts) },
-	{ "out_multicast_pkts", PPORT_2863_OFF(if_out_multicast_pkts) },
-	{ "out_broadcast_pkts", PPORT_2863_OFF(if_out_broadcast_pkts) },
+	{ "rx_discards_phy", PPORT_2863_OFF(if_in_discards) },
+	{ "tx_discards_phy", PPORT_2863_OFF(if_out_discards) },
+	{ "tx_errors_phy", PPORT_2863_OFF(if_out_errors) },
 };
 
 static const struct counter_desc pport_2819_stats_desc[] = {
-	{ "drop_events", PPORT_2819_OFF(ether_stats_drop_events) },
-	{ "octets", PPORT_2819_OFF(ether_stats_octets) },
-	{ "pkts", PPORT_2819_OFF(ether_stats_pkts) },
-	{ "broadcast_pkts", PPORT_2819_OFF(ether_stats_broadcast_pkts) },
-	{ "multicast_pkts", PPORT_2819_OFF(ether_stats_multicast_pkts) },
-	{ "crc_align_errors", PPORT_2819_OFF(ether_stats_crc_align_errors) },
-	{ "undersize_pkts", PPORT_2819_OFF(ether_stats_undersize_pkts) },
-	{ "oversize_pkts", PPORT_2819_OFF(ether_stats_oversize_pkts) },
-	{ "fragments", PPORT_2819_OFF(ether_stats_fragments) },
-	{ "jabbers", PPORT_2819_OFF(ether_stats_jabbers) },
-	{ "collisions", PPORT_2819_OFF(ether_stats_collisions) },
-	{ "p64octets", PPORT_2819_OFF(ether_stats_pkts64octets) },
-	{ "p65to127octets", PPORT_2819_OFF(ether_stats_pkts65to127octets) },
-	{ "p128to255octets", PPORT_2819_OFF(ether_stats_pkts128to255octets) },
-	{ "p256to511octets", PPORT_2819_OFF(ether_stats_pkts256to511octets) },
-	{ "p512to1023octets", PPORT_2819_OFF(ether_stats_pkts512to1023octets) },
-	{ "p1024to1518octets",
-		PPORT_2819_OFF(ether_stats_pkts1024to1518octets) },
-	{ "p1519to2047octets",
-		PPORT_2819_OFF(ether_stats_pkts1519to2047octets) },
-	{ "p2048to4095octets",
-		PPORT_2819_OFF(ether_stats_pkts2048to4095octets) },
-	{ "p4096to8191octets",
-		PPORT_2819_OFF(ether_stats_pkts4096to8191octets) },
-	{ "p8192to10239octets",
-		PPORT_2819_OFF(ether_stats_pkts8192to10239octets) },
+	{ "rx_undersize_pkts_phy", PPORT_2819_OFF(ether_stats_undersize_pkts) },
+	{ "rx_fragments_phy", PPORT_2819_OFF(ether_stats_fragments) },
+	{ "rx_jabbers_phy", PPORT_2819_OFF(ether_stats_jabbers) },
+	{ "rx_64_bytes_phy", PPORT_2819_OFF(ether_stats_pkts64octets) },
+	{ "rx_65_to_127_bytes_phy", PPORT_2819_OFF(ether_stats_pkts65to127octets) },
+	{ "rx_128_to_255_bytes_phy", PPORT_2819_OFF(ether_stats_pkts128to255octets) },
+	{ "rx_256_to_511_bytes_phy", PPORT_2819_OFF(ether_stats_pkts256to511octets) },
+	{ "rx_512_to_1023_bytes_phy", PPORT_2819_OFF(ether_stats_pkts512to1023octets) },
+	{ "rx_1024_to_1518_bytes_phy", PPORT_2819_OFF(ether_stats_pkts1024to1518octets) },
+	{ "rx_1519_to_2047_bytes_phy", PPORT_2819_OFF(ether_stats_pkts1519to2047octets) },
+	{ "rx_2048_to_4095_bytes_phy", PPORT_2819_OFF(ether_stats_pkts2048to4095octets) },
+	{ "rx_4096_to_8191_bytes_phy", PPORT_2819_OFF(ether_stats_pkts4096to8191octets) },
+	{ "rx_8192_to_10239_bytes_phy", PPORT_2819_OFF(ether_stats_pkts8192to10239octets) },
 };
 
 static const struct counter_desc pport_per_prio_traffic_stats_desc[] = {
-	{ "rx_octets", PPORT_PER_PRIO_OFF(rx_octets) },
-	{ "rx_frames", PPORT_PER_PRIO_OFF(rx_frames) },
-	{ "tx_octets", PPORT_PER_PRIO_OFF(tx_octets) },
-	{ "tx_frames", PPORT_PER_PRIO_OFF(tx_frames) },
+	{ "rx_prio%d_bytes", PPORT_PER_PRIO_OFF(rx_octets) },
+	{ "rx_prio%d_packets", PPORT_PER_PRIO_OFF(rx_frames) },
+	{ "tx_prio%d_bytes", PPORT_PER_PRIO_OFF(tx_octets) },
+	{ "tx_prio%d_packets", PPORT_PER_PRIO_OFF(tx_frames) },
 };
 
 static const struct counter_desc pport_per_prio_pfc_stats_desc[] = {
-	{ "rx_pause", PPORT_PER_PRIO_OFF(rx_pause) },
-	{ "rx_pause_duration", PPORT_PER_PRIO_OFF(rx_pause_duration) },
-	{ "tx_pause", PPORT_PER_PRIO_OFF(tx_pause) },
-	{ "tx_pause_duration", PPORT_PER_PRIO_OFF(tx_pause_duration) },
-	{ "rx_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) },
+	/* %s is "global" or "prio{i}" */
+	{ "rx_%s_pause", PPORT_PER_PRIO_OFF(rx_pause) },
+	{ "rx_%s_pause_duration", PPORT_PER_PRIO_OFF(rx_pause_duration) },
+	{ "tx_%s_pause", PPORT_PER_PRIO_OFF(tx_pause) },
+	{ "tx_%s_pause_duration", PPORT_PER_PRIO_OFF(tx_pause_duration) },
+	{ "rx_%s_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) },
 };
 
 struct mlx5e_rq_stats {
 	u64 packets;
 	u64 bytes;
-	u64 csum_sw;
-	u64 csum_inner;
+	u64 csum_complete;
+	u64 csum_unnecessary_inner;
 	u64 csum_none;
 	u64 lro_packets;
 	u64 lro_bytes;
@@ -292,19 +279,19 @@ struct mlx5e_rq_stats {
 };
 
 static const struct counter_desc rq_stats_desc[] = {
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, packets) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, bytes) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, csum_sw) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, csum_inner) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, csum_none) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, lro_packets) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, lro_bytes) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, wqe_err) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, mpwqe_filler) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, mpwqe_frag) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, buff_alloc_err) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, cqe_compress_blks) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, packets) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, bytes) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_packets) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_frag) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) },
 };
 
 struct mlx5e_sq_stats {
@@ -315,28 +302,28 @@ struct mlx5e_sq_stats {
 	u64 tso_bytes;
 	u64 tso_inner_packets;
 	u64 tso_inner_bytes;
-	u64 csum_offload_inner;
+	u64 csum_partial_inner;
 	u64 nop;
 	/* less likely accessed in data path */
-	u64 csum_offload_none;
+	u64 csum_none;
 	u64 stopped;
 	u64 wake;
 	u64 dropped;
 };
 
 static const struct counter_desc sq_stats_desc[] = {
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, packets) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, bytes) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, tso_packets) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, tso_bytes) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, tso_inner_packets) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, tso_inner_bytes) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, csum_offload_inner) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, nop) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, csum_offload_none) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, stopped) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, wake) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, dropped) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, packets) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, bytes) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_packets) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_bytes) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_packets) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_bytes) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, nop) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_none) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, stopped) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, wake) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, dropped) },
 };
 
 #define NUM_SW_COUNTERS			ARRAY_SIZE(sw_stats_desc)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 704c3d30493e..0f19b01e3fff 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -37,8 +37,11 @@
 #include <linux/mlx5/fs.h>
 #include <linux/mlx5/device.h>
 #include <linux/rhashtable.h>
+#include <net/switchdev.h>
+#include <net/tc_act/tc_mirred.h>
 #include "en.h"
 #include "en_tc.h"
+#include "eswitch.h"
 
 struct mlx5e_tc_flow {
 	struct rhash_head	node;
@@ -49,9 +52,9 @@ struct mlx5e_tc_flow {
 #define MLX5E_TC_TABLE_NUM_ENTRIES 1024
 #define MLX5E_TC_TABLE_NUM_GROUPS 4
 
-static struct mlx5_flow_rule *mlx5e_tc_add_flow(struct mlx5e_priv *priv,
-						u32 *match_c, u32 *match_v,
-						u32 action, u32 flow_tag)
+static struct mlx5_flow_rule *mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
+						    struct mlx5_flow_spec *spec,
+						    u32 action, u32 flow_tag)
 {
 	struct mlx5_core_dev *dev = priv->mdev;
 	struct mlx5_flow_destination dest = { 0 };
@@ -62,7 +65,7 @@ static struct mlx5_flow_rule *mlx5e_tc_add_flow(struct mlx5e_priv *priv,
 	if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
 		dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
 		dest.ft = priv->fs.vlan.ft.t;
-	} else {
+	} else if (action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
 		counter = mlx5_fc_create(dev, true);
 		if (IS_ERR(counter))
 			return ERR_CAST(counter);
@@ -88,8 +91,8 @@ static struct mlx5_flow_rule *mlx5e_tc_add_flow(struct mlx5e_priv *priv,
 		table_created = true;
 	}
 
-	rule = mlx5_add_flow_rule(priv->fs.tc.t, MLX5_MATCH_OUTER_HEADERS,
-				  match_c, match_v,
+	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+	rule = mlx5_add_flow_rule(priv->fs.tc.t, spec,
 				  action, flow_tag,
 				  &dest);
 
@@ -109,6 +112,22 @@ err_create_ft:
 	return rule;
 }
 
+static struct mlx5_flow_rule *mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
+						    struct mlx5_flow_spec *spec,
+						    u32 action, u32 dst_vport)
+{
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	struct mlx5_eswitch_rep *rep = priv->ppriv;
+	u32 src_vport;
+
+	if (rep->vport) /* set source vport for the flow */
+		src_vport = rep->vport;
+	else
+		src_vport = FDB_UPLINK_VPORT;
+
+	return mlx5_eswitch_add_offloaded_rule(esw, spec, action, src_vport, dst_vport);
+}
+
 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
 			      struct mlx5_flow_rule *rule)
 {
@@ -120,18 +139,19 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
 
 	mlx5_fc_destroy(priv->mdev, counter);
 
-	if (!mlx5e_tc_num_filters(priv)) {
+	if (!mlx5e_tc_num_filters(priv) && (priv->fs.tc.t)) {
 		mlx5_destroy_flow_table(priv->fs.tc.t);
 		priv->fs.tc.t = NULL;
 	}
 }
 
-static int parse_cls_flower(struct mlx5e_priv *priv,
-			    u32 *match_c, u32 *match_v,
+static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec,
 			    struct tc_cls_flower_offload *f)
 {
-	void *headers_c = MLX5_ADDR_OF(fte_match_param, match_c, outer_headers);
-	void *headers_v = MLX5_ADDR_OF(fte_match_param, match_v, outer_headers);
+	void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+				       outer_headers);
+	void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+				       outer_headers);
 	u16 addr_type = 0;
 	u8 ip_proto = 0;
 
@@ -294,8 +314,8 @@ static int parse_cls_flower(struct mlx5e_priv *priv,
 	return 0;
 }
 
-static int parse_tc_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
-			    u32 *action, u32 *flow_tag)
+static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
+				u32 *action, u32 *flow_tag)
 {
 	const struct tc_action *a;
 
@@ -338,17 +358,66 @@ static int parse_tc_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 	return 0;
 }
 
+static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
+				u32 *action, u32 *dest_vport)
+{
+	const struct tc_action *a;
+
+	if (tc_no_actions(exts))
+		return -EINVAL;
+
+	*action = 0;
+
+	tc_for_each_action(a, exts) {
+		/* Only support a single action per rule */
+		if (*action)
+			return -EINVAL;
+
+		if (is_tcf_gact_shot(a)) {
+			*action = MLX5_FLOW_CONTEXT_ACTION_DROP |
+				  MLX5_FLOW_CONTEXT_ACTION_COUNT;
+			continue;
+		}
+
+		if (is_tcf_mirred_redirect(a)) {
+			int ifindex = tcf_mirred_ifindex(a);
+			struct net_device *out_dev;
+			struct mlx5e_priv *out_priv;
+			struct mlx5_eswitch_rep *out_rep;
+
+			out_dev = __dev_get_by_index(dev_net(priv->netdev), ifindex);
+
+			if (!switchdev_port_same_parent_id(priv->netdev, out_dev)) {
+				pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
+				       priv->netdev->name, out_dev->name);
+				return -EINVAL;
+			}
+
+			out_priv = netdev_priv(out_dev);
+			out_rep  = out_priv->ppriv;
+			if (out_rep->vport == 0)
+				*dest_vport = FDB_UPLINK_VPORT;
+			else
+				*dest_vport = out_rep->vport;
+			*action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+			continue;
+		}
+
+		return -EINVAL;
+	}
+	return 0;
+}
+
 int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
 			   struct tc_cls_flower_offload *f)
 {
 	struct mlx5e_tc_table *tc = &priv->fs.tc;
-	u32 *match_c;
-	u32 *match_v;
 	int err = 0;
-	u32 flow_tag;
-	u32 action;
+	u32 flow_tag, action, dest_vport = 0;
 	struct mlx5e_tc_flow *flow;
+	struct mlx5_flow_spec *spec;
 	struct mlx5_flow_rule *old = NULL;
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 
 	flow = rhashtable_lookup_fast(&tc->ht, &f->cookie,
 				      tc->ht_params);
@@ -357,49 +426,53 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
 	else
 		flow = kzalloc(sizeof(*flow), GFP_KERNEL);
 
-	match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
-	match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
-	if (!match_c || !match_v || !flow) {
+	spec = mlx5_vzalloc(sizeof(*spec));
+	if (!spec || !flow) {
 		err = -ENOMEM;
 		goto err_free;
 	}
 
 	flow->cookie = f->cookie;
 
-	err = parse_cls_flower(priv, match_c, match_v, f);
+	err = parse_cls_flower(priv, spec, f);
 	if (err < 0)
 		goto err_free;
 
-	err = parse_tc_actions(priv, f->exts, &action, &flow_tag);
-	if (err < 0)
+	if (esw && esw->mode == SRIOV_OFFLOADS) {
+		err = parse_tc_fdb_actions(priv, f->exts, &action, &dest_vport);
+		if (err < 0)
+			goto err_free;
+		flow->rule = mlx5e_tc_add_fdb_flow(priv, spec, action, dest_vport);
+	} else {
+		err = parse_tc_nic_actions(priv, f->exts, &action, &flow_tag);
+		if (err < 0)
+			goto err_free;
+		flow->rule = mlx5e_tc_add_nic_flow(priv, spec, action, flow_tag);
+	}
+
+	if (IS_ERR(flow->rule)) {
+		err = PTR_ERR(flow->rule);
 		goto err_free;
+	}
 
 	err = rhashtable_insert_fast(&tc->ht, &flow->node,
 				     tc->ht_params);
 	if (err)
-		goto err_free;
-
-	flow->rule = mlx5e_tc_add_flow(priv, match_c, match_v, action,
-				       flow_tag);
-	if (IS_ERR(flow->rule)) {
-		err = PTR_ERR(flow->rule);
-		goto err_hash_del;
-	}
+		goto err_del_rule;
 
 	if (old)
 		mlx5e_tc_del_flow(priv, old);
 
 	goto out;
 
-err_hash_del:
-	rhashtable_remove_fast(&tc->ht, &flow->node, tc->ht_params);
+err_del_rule:
+	mlx5_del_flow_rule(flow->rule);
 
 err_free:
 	if (!old)
 		kfree(flow);
 out:
-	kfree(match_c);
-	kfree(match_v);
+	kvfree(spec);
 	return err;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index b000ddc29553..5740b465ef84 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -110,8 +110,20 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 	int channel_ix = fallback(dev, skb);
-	int up = (netdev_get_num_tc(dev) && skb_vlan_tag_present(skb)) ?
-		 skb->vlan_tci >> VLAN_PRIO_SHIFT : 0;
+	int up = 0;
+
+	if (!netdev_get_num_tc(dev))
+		return channel_ix;
+
+	if (skb_vlan_tag_present(skb))
+		up = skb->vlan_tci >> VLAN_PRIO_SHIFT;
+
+	/* channel_ix can be larger than num_channels since
+	 * dev->num_real_tx_queues = num_channels * num_tc
+	 */
+	if (channel_ix >= priv->params.num_channels)
+		channel_ix = reciprocal_scale(channel_ix,
+					      priv->params.num_channels);
 
 	return priv->channeltc_to_txq_map[channel_ix][up];
 }
@@ -123,7 +135,7 @@ static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq,
 	 * headers and occur before the data gather.
 	 * Therefore these headers must be copied into the WQE
 	 */
-#define MLX5E_MIN_INLINE ETH_HLEN
+#define MLX5E_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
 
 	if (bf) {
 		u16 ihs = skb_headlen(skb);
@@ -135,7 +147,7 @@ static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq,
 			return skb_headlen(skb);
 	}
 
-	return MLX5E_MIN_INLINE;
+	return max(skb_network_offset(skb), MLX5E_MIN_INLINE);
 }
 
 static inline void mlx5e_tx_skb_pull_inline(unsigned char **skb_data,
@@ -192,12 +204,12 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb)
 		if (skb->encapsulation) {
 			eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM |
 					  MLX5_ETH_WQE_L4_INNER_CSUM;
-			sq->stats.csum_offload_inner++;
+			sq->stats.csum_partial_inner++;
 		} else {
 			eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM;
 		}
 	} else
-		sq->stats.csum_offload_none++;
+		sq->stats.csum_none++;
 
 	if (sq->cc != sq->prev_cc) {
 		sq->prev_cc = sq->cc;
@@ -341,6 +353,35 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
 	return mlx5e_sq_xmit(sq, skb);
 }
 
+void mlx5e_free_tx_descs(struct mlx5e_sq *sq)
+{
+	struct mlx5e_tx_wqe_info *wi;
+	struct sk_buff *skb;
+	u16 ci;
+	int i;
+
+	while (sq->cc != sq->pc) {
+		ci = sq->cc & sq->wq.sz_m1;
+		skb = sq->skb[ci];
+		wi = &sq->wqe_info[ci];
+
+		if (!skb) { /* nop */
+			sq->cc++;
+			continue;
+		}
+
+		for (i = 0; i < wi->num_dma; i++) {
+			struct mlx5e_sq_dma *dma =
+				mlx5e_dma_get(sq, sq->dma_fifo_cc++);
+
+			mlx5e_tx_dma_unmap(sq->pdev, dma);
+		}
+
+		dev_kfree_skb_any(skb);
+		sq->cc += wi->num_wqebbs;
+	}
+}
+
 bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 {
 	struct mlx5e_sq *sq;
@@ -352,6 +393,9 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 
 	sq = container_of(cq, struct mlx5e_sq, cq);
 
+	if (unlikely(test_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state)))
+		return false;
+
 	npkts = 0;
 	nbytes = 0;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index aebbd6ccb9fe..f6d667797ee1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -40,17 +40,6 @@
 
 #define UPLINK_VPORT 0xFFFF
 
-#define MLX5_DEBUG_ESWITCH_MASK BIT(3)
-
-#define esw_info(dev, format, ...)				\
-	pr_info("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__)
-
-#define esw_warn(dev, format, ...)				\
-	pr_warn("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__)
-
-#define esw_debug(dev, format, ...)				\
-	mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__)
-
 enum {
 	MLX5_ACTION_NONE = 0,
 	MLX5_ACTION_ADD  = 1,
@@ -92,6 +81,9 @@ enum {
 			    MC_ADDR_CHANGE | \
 			    PROMISC_CHANGE)
 
+int esw_offloads_init(struct mlx5_eswitch *esw, int nvports);
+void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports);
+
 static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport,
 					u32 events_mask)
 {
@@ -337,25 +329,23 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule,
 			    MLX5_MATCH_OUTER_HEADERS);
 	struct mlx5_flow_rule *flow_rule = NULL;
 	struct mlx5_flow_destination dest;
+	struct mlx5_flow_spec *spec;
 	void *mv_misc = NULL;
 	void *mc_misc = NULL;
 	u8 *dmac_v = NULL;
 	u8 *dmac_c = NULL;
-	u32 *match_v;
-	u32 *match_c;
 
 	if (rx_rule)
 		match_header |= MLX5_MATCH_MISC_PARAMETERS;
-	match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
-	match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
-	if (!match_v || !match_c) {
+
+	spec = mlx5_vzalloc(sizeof(*spec));
+	if (!spec) {
 		pr_warn("FDB: Failed to alloc match parameters\n");
-		goto out;
+		return NULL;
 	}
-
-	dmac_v = MLX5_ADDR_OF(fte_match_param, match_v,
+	dmac_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
 			      outer_headers.dmac_47_16);
-	dmac_c = MLX5_ADDR_OF(fte_match_param, match_c,
+	dmac_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
 			      outer_headers.dmac_47_16);
 
 	if (match_header & MLX5_MATCH_OUTER_HEADERS) {
@@ -364,8 +354,10 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule,
 	}
 
 	if (match_header & MLX5_MATCH_MISC_PARAMETERS) {
-		mv_misc  = MLX5_ADDR_OF(fte_match_param, match_v, misc_parameters);
-		mc_misc  = MLX5_ADDR_OF(fte_match_param, match_c, misc_parameters);
+		mv_misc  = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+					misc_parameters);
+		mc_misc  = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+					misc_parameters);
 		MLX5_SET(fte_match_set_misc, mv_misc, source_port, UPLINK_VPORT);
 		MLX5_SET_TO_ONES(fte_match_set_misc, mc_misc, source_port);
 	}
@@ -376,11 +368,9 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule,
 	esw_debug(esw->dev,
 		  "\tFDB add rule dmac_v(%pM) dmac_c(%pM) -> vport(%d)\n",
 		  dmac_v, dmac_c, vport);
+	spec->match_criteria_enable = match_header;
 	flow_rule =
-		mlx5_add_flow_rule(esw->fdb_table.fdb,
-				   match_header,
-				   match_c,
-				   match_v,
+		mlx5_add_flow_rule(esw->fdb_table.fdb, spec,
 				   MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
 				   0, &dest);
 	if (IS_ERR(flow_rule)) {
@@ -389,9 +379,8 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule,
 			 dmac_v, dmac_c, vport, PTR_ERR(flow_rule));
 		flow_rule = NULL;
 	}
-out:
-	kfree(match_v);
-	kfree(match_c);
+
+	kvfree(spec);
 	return flow_rule;
 }
 
@@ -428,7 +417,7 @@ esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u32 vport)
 	return __esw_fdb_set_vport_rule(esw, vport, true, mac_c, mac_v);
 }
 
-static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports)
+static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw, int nvports)
 {
 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
 	struct mlx5_core_dev *dev = esw->dev;
@@ -479,7 +468,7 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports)
 		esw_warn(dev, "Failed to create flow group err(%d)\n", err);
 		goto out;
 	}
-	esw->fdb_table.addr_grp = g;
+	esw->fdb_table.legacy.addr_grp = g;
 
 	/* Allmulti group : One rule that forwards any mcast traffic */
 	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
@@ -494,7 +483,7 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports)
 		esw_warn(dev, "Failed to create allmulti flow group err(%d)\n", err);
 		goto out;
 	}
-	esw->fdb_table.allmulti_grp = g;
+	esw->fdb_table.legacy.allmulti_grp = g;
 
 	/* Promiscuous group :
 	 * One rule that forward all unmatched traffic from previous groups
@@ -511,17 +500,17 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports)
 		esw_warn(dev, "Failed to create promisc flow group err(%d)\n", err);
 		goto out;
 	}
-	esw->fdb_table.promisc_grp = g;
+	esw->fdb_table.legacy.promisc_grp = g;
 
 out:
 	if (err) {
-		if (!IS_ERR_OR_NULL(esw->fdb_table.allmulti_grp)) {
-			mlx5_destroy_flow_group(esw->fdb_table.allmulti_grp);
-			esw->fdb_table.allmulti_grp = NULL;
+		if (!IS_ERR_OR_NULL(esw->fdb_table.legacy.allmulti_grp)) {
+			mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp);
+			esw->fdb_table.legacy.allmulti_grp = NULL;
 		}
-		if (!IS_ERR_OR_NULL(esw->fdb_table.addr_grp)) {
-			mlx5_destroy_flow_group(esw->fdb_table.addr_grp);
-			esw->fdb_table.addr_grp = NULL;
+		if (!IS_ERR_OR_NULL(esw->fdb_table.legacy.addr_grp)) {
+			mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp);
+			esw->fdb_table.legacy.addr_grp = NULL;
 		}
 		if (!IS_ERR_OR_NULL(esw->fdb_table.fdb)) {
 			mlx5_destroy_flow_table(esw->fdb_table.fdb);
@@ -533,20 +522,20 @@ out:
 	return err;
 }
 
-static void esw_destroy_fdb_table(struct mlx5_eswitch *esw)
+static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw)
 {
 	if (!esw->fdb_table.fdb)
 		return;
 
 	esw_debug(esw->dev, "Destroy FDB Table\n");
-	mlx5_destroy_flow_group(esw->fdb_table.promisc_grp);
-	mlx5_destroy_flow_group(esw->fdb_table.allmulti_grp);
-	mlx5_destroy_flow_group(esw->fdb_table.addr_grp);
+	mlx5_destroy_flow_group(esw->fdb_table.legacy.promisc_grp);
+	mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp);
+	mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp);
 	mlx5_destroy_flow_table(esw->fdb_table.fdb);
 	esw->fdb_table.fdb = NULL;
-	esw->fdb_table.addr_grp = NULL;
-	esw->fdb_table.allmulti_grp = NULL;
-	esw->fdb_table.promisc_grp = NULL;
+	esw->fdb_table.legacy.addr_grp = NULL;
+	esw->fdb_table.legacy.allmulti_grp = NULL;
+	esw->fdb_table.legacy.promisc_grp = NULL;
 }
 
 /* E-Switch vport UC/MC lists management */
@@ -578,7 +567,8 @@ static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
 	if (err)
 		goto abort;
 
-	if (esw->fdb_table.fdb) /* SRIOV is enabled: Forward UC MAC to vport */
+	/* SRIOV is enabled: Forward UC MAC to vport */
+	if (esw->fdb_table.fdb && esw->mode == SRIOV_LEGACY)
 		vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport);
 
 	esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM index:%d fr(%p)\n",
@@ -1300,9 +1290,8 @@ static void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw,
 static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
 				    struct mlx5_vport *vport)
 {
+	struct mlx5_flow_spec *spec;
 	u8 smac[ETH_ALEN];
-	u32 *match_v;
-	u32 *match_c;
 	int err = 0;
 	u8 *smac_v;
 
@@ -1336,9 +1325,8 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
 		  "vport[%d] configure ingress rules, vlan(%d) qos(%d)\n",
 		  vport->vport, vport->vlan, vport->qos);
 
-	match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
-	match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
-	if (!match_v || !match_c) {
+	spec = mlx5_vzalloc(sizeof(*spec));
+	if (!spec) {
 		err = -ENOMEM;
 		esw_warn(esw->dev, "vport[%d] configure ingress rules failed, err(%d)\n",
 			 vport->vport, err);
@@ -1346,22 +1334,20 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
 	}
 
 	if (vport->vlan || vport->qos)
-		MLX5_SET_TO_ONES(fte_match_param, match_c, outer_headers.vlan_tag);
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.vlan_tag);
 
 	if (vport->spoofchk) {
-		MLX5_SET_TO_ONES(fte_match_param, match_c, outer_headers.smac_47_16);
-		MLX5_SET_TO_ONES(fte_match_param, match_c, outer_headers.smac_15_0);
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_47_16);
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_15_0);
 		smac_v = MLX5_ADDR_OF(fte_match_param,
-				      match_v,
+				      spec->match_value,
 				      outer_headers.smac_47_16);
 		ether_addr_copy(smac_v, smac);
 	}
 
+	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
 	vport->ingress.allow_rule =
-		mlx5_add_flow_rule(vport->ingress.acl,
-				   MLX5_MATCH_OUTER_HEADERS,
-				   match_c,
-				   match_v,
+		mlx5_add_flow_rule(vport->ingress.acl, spec,
 				   MLX5_FLOW_CONTEXT_ACTION_ALLOW,
 				   0, NULL);
 	if (IS_ERR(vport->ingress.allow_rule)) {
@@ -1372,13 +1358,9 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
 		goto out;
 	}
 
-	memset(match_c, 0, MLX5_ST_SZ_BYTES(fte_match_param));
-	memset(match_v, 0, MLX5_ST_SZ_BYTES(fte_match_param));
+	memset(spec, 0, sizeof(*spec));
 	vport->ingress.drop_rule =
-		mlx5_add_flow_rule(vport->ingress.acl,
-				   0,
-				   match_c,
-				   match_v,
+		mlx5_add_flow_rule(vport->ingress.acl, spec,
 				   MLX5_FLOW_CONTEXT_ACTION_DROP,
 				   0, NULL);
 	if (IS_ERR(vport->ingress.drop_rule)) {
@@ -1392,17 +1374,14 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
 out:
 	if (err)
 		esw_vport_cleanup_ingress_rules(esw, vport);
-
-	kfree(match_v);
-	kfree(match_c);
+	kvfree(spec);
 	return err;
 }
 
 static int esw_vport_egress_config(struct mlx5_eswitch *esw,
 				   struct mlx5_vport *vport)
 {
-	u32 *match_v;
-	u32 *match_c;
+	struct mlx5_flow_spec *spec;
 	int err = 0;
 
 	esw_vport_cleanup_egress_rules(esw, vport);
@@ -1418,9 +1397,8 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw,
 		  "vport[%d] configure egress rules, vlan(%d) qos(%d)\n",
 		  vport->vport, vport->vlan, vport->qos);
 
-	match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
-	match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
-	if (!match_v || !match_c) {
+	spec = mlx5_vzalloc(sizeof(*spec));
+	if (!spec) {
 		err = -ENOMEM;
 		esw_warn(esw->dev, "vport[%d] configure egress rules failed, err(%d)\n",
 			 vport->vport, err);
@@ -1428,16 +1406,14 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw,
 	}
 
 	/* Allowed vlan rule */
-	MLX5_SET_TO_ONES(fte_match_param, match_c, outer_headers.vlan_tag);
-	MLX5_SET_TO_ONES(fte_match_param, match_v, outer_headers.vlan_tag);
-	MLX5_SET_TO_ONES(fte_match_param, match_c, outer_headers.first_vid);
-	MLX5_SET(fte_match_param, match_v, outer_headers.first_vid, vport->vlan);
+	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.vlan_tag);
+	MLX5_SET_TO_ONES(fte_match_param, spec->match_value, outer_headers.vlan_tag);
+	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.first_vid);
+	MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vport->vlan);
 
+	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
 	vport->egress.allowed_vlan =
-		mlx5_add_flow_rule(vport->egress.acl,
-				   MLX5_MATCH_OUTER_HEADERS,
-				   match_c,
-				   match_v,
+		mlx5_add_flow_rule(vport->egress.acl, spec,
 				   MLX5_FLOW_CONTEXT_ACTION_ALLOW,
 				   0, NULL);
 	if (IS_ERR(vport->egress.allowed_vlan)) {
@@ -1449,13 +1425,9 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw,
 	}
 
 	/* Drop others rule (star rule) */
-	memset(match_c, 0, MLX5_ST_SZ_BYTES(fte_match_param));
-	memset(match_v, 0, MLX5_ST_SZ_BYTES(fte_match_param));
+	memset(spec, 0, sizeof(*spec));
 	vport->egress.drop_rule =
-		mlx5_add_flow_rule(vport->egress.acl,
-				   0,
-				   match_c,
-				   match_v,
+		mlx5_add_flow_rule(vport->egress.acl, spec,
 				   MLX5_FLOW_CONTEXT_ACTION_DROP,
 				   0, NULL);
 	if (IS_ERR(vport->egress.drop_rule)) {
@@ -1465,8 +1437,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw,
 		vport->egress.drop_rule = NULL;
 	}
 out:
-	kfree(match_v);
-	kfree(match_c);
+	kvfree(spec);
 	return err;
 }
 
@@ -1540,10 +1511,10 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num)
 }
 
 /* Public E-Switch API */
-int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs)
+int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
 {
 	int err;
-	int i;
+	int i, enabled_events;
 
 	if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) ||
 	    MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
@@ -1561,16 +1532,20 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs)
 	if (!MLX5_CAP_ESW_EGRESS_ACL(esw->dev, ft_support))
 		esw_warn(esw->dev, "E-Switch engress ACL is not supported by FW\n");
 
-	esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d)\n", nvfs);
-
+	esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode);
+	esw->mode = mode;
 	esw_disable_vport(esw, 0);
 
-	err = esw_create_fdb_table(esw, nvfs + 1);
+	if (mode == SRIOV_LEGACY)
+		err = esw_create_legacy_fdb_table(esw, nvfs + 1);
+	else
+		err = esw_offloads_init(esw, nvfs + 1);
 	if (err)
 		goto abort;
 
+	enabled_events = (mode == SRIOV_LEGACY) ? SRIOV_VPORT_EVENTS : UC_ADDR_CHANGE;
 	for (i = 0; i <= nvfs; i++)
-		esw_enable_vport(esw, i, SRIOV_VPORT_EVENTS);
+		esw_enable_vport(esw, i, enabled_events);
 
 	esw_info(esw->dev, "SRIOV enabled: active vports(%d)\n",
 		 esw->enabled_vports);
@@ -1584,16 +1559,18 @@ abort:
 void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
 {
 	struct esw_mc_addr *mc_promisc;
+	int nvports;
 	int i;
 
 	if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) ||
 	    MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
 		return;
 
-	esw_info(esw->dev, "disable SRIOV: active vports(%d)\n",
-		 esw->enabled_vports);
+	esw_info(esw->dev, "disable SRIOV: active vports(%d) mode(%d)\n",
+		 esw->enabled_vports, esw->mode);
 
 	mc_promisc = esw->mc_promisc;
+	nvports = esw->enabled_vports;
 
 	for (i = 0; i < esw->total_vports; i++)
 		esw_disable_vport(esw, i);
@@ -1601,8 +1578,12 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
 	if (mc_promisc && mc_promisc->uplink_rule)
 		mlx5_del_flow_rule(mc_promisc->uplink_rule);
 
-	esw_destroy_fdb_table(esw);
+	if (esw->mode == SRIOV_LEGACY)
+		esw_destroy_legacy_fdb_table(esw);
+	else if (esw->mode == SRIOV_OFFLOADS)
+		esw_offloads_cleanup(esw, nvports);
 
+	esw->mode = SRIOV_NONE;
 	/* VPORT 0 (PF) must be enabled back with non-sriov configuration */
 	esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
 }
@@ -1660,6 +1641,14 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
 		goto abort;
 	}
 
+	esw->offloads.vport_reps =
+		kzalloc(total_vports * sizeof(struct mlx5_eswitch_rep),
+			GFP_KERNEL);
+	if (!esw->offloads.vport_reps) {
+		err = -ENOMEM;
+		goto abort;
+	}
+
 	mutex_init(&esw->state_lock);
 
 	for (vport_num = 0; vport_num < total_vports; vport_num++) {
@@ -1673,6 +1662,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
 
 	esw->total_vports = total_vports;
 	esw->enabled_vports = 0;
+	esw->mode = SRIOV_NONE;
 
 	dev->priv.eswitch = esw;
 	esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
@@ -1683,6 +1673,7 @@ abort:
 		destroy_workqueue(esw->work_queue);
 	kfree(esw->l2_table.bitmap);
 	kfree(esw->vports);
+	kfree(esw->offloads.vport_reps);
 	kfree(esw);
 	return err;
 }
@@ -1700,6 +1691,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
 	destroy_workqueue(esw->work_queue);
 	kfree(esw->l2_table.bitmap);
 	kfree(esw->mc_promisc);
+	kfree(esw->offloads.vport_reps);
 	kfree(esw->vports);
 	kfree(esw);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index fd6800256d4a..c0b05603fc31 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -35,6 +35,7 @@
 
 #include <linux/if_ether.h>
 #include <linux/if_link.h>
+#include <net/devlink.h>
 #include <linux/mlx5/device.h>
 
 #define MLX5_MAX_UC_PER_VPORT(dev) \
@@ -46,6 +47,8 @@
 #define MLX5_L2_ADDR_HASH_SIZE (BIT(BITS_PER_BYTE))
 #define MLX5_L2_ADDR_HASH(addr) (addr[5])
 
+#define FDB_UPLINK_VPORT 0xffff
+
 /* L2 -mac address based- hash helpers */
 struct l2addr_node {
 	struct hlist_node hlist;
@@ -134,9 +137,49 @@ struct mlx5_l2_table {
 
 struct mlx5_eswitch_fdb {
 	void *fdb;
-	struct mlx5_flow_group *addr_grp;
-	struct mlx5_flow_group *allmulti_grp;
-	struct mlx5_flow_group *promisc_grp;
+	union {
+		struct legacy_fdb {
+			struct mlx5_flow_group *addr_grp;
+			struct mlx5_flow_group *allmulti_grp;
+			struct mlx5_flow_group *promisc_grp;
+		} legacy;
+
+		struct offloads_fdb {
+			struct mlx5_flow_table *fdb;
+			struct mlx5_flow_group *send_to_vport_grp;
+			struct mlx5_flow_group *miss_grp;
+			struct mlx5_flow_rule  *miss_rule;
+		} offloads;
+	};
+};
+
+enum {
+	SRIOV_NONE,
+	SRIOV_LEGACY,
+	SRIOV_OFFLOADS
+};
+
+struct mlx5_esw_sq {
+	struct mlx5_flow_rule	*send_to_vport_rule;
+	struct list_head	 list;
+};
+
+struct mlx5_eswitch_rep {
+	int		       (*load)(struct mlx5_eswitch *esw,
+				       struct mlx5_eswitch_rep *rep);
+	void		       (*unload)(struct mlx5_eswitch *esw,
+					 struct mlx5_eswitch_rep *rep);
+	u16		       vport;
+	struct mlx5_flow_rule *vport_rx_rule;
+	void		      *priv_data;
+	struct list_head       vport_sqs_list;
+	bool		       valid;
+};
+
+struct mlx5_esw_offload {
+	struct mlx5_flow_table *ft_offloads;
+	struct mlx5_flow_group *vport_rx_group;
+	struct mlx5_eswitch_rep *vport_reps;
 };
 
 struct mlx5_eswitch {
@@ -153,13 +196,15 @@ struct mlx5_eswitch {
 	 */
 	struct mutex            state_lock;
 	struct esw_mc_addr      *mc_promisc;
+	struct mlx5_esw_offload offloads;
+	int                     mode;
 };
 
 /* E-Switch API */
 int mlx5_eswitch_init(struct mlx5_core_dev *dev);
 void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw);
 void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe);
-int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs);
+int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode);
 void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw);
 int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
 			       int vport, u8 mac[ETH_ALEN]);
@@ -177,4 +222,36 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
 				 int vport,
 				 struct ifla_vf_stats *vf_stats);
 
+struct mlx5_flow_spec;
+
+struct mlx5_flow_rule *
+mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
+				struct mlx5_flow_spec *spec,
+				u32 action, u32 src_vport, u32 dst_vport);
+struct mlx5_flow_rule *
+mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn);
+
+int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw,
+				 struct mlx5_eswitch_rep *rep,
+				 u16 *sqns_array, int sqns_num);
+void mlx5_eswitch_sqs2vport_stop(struct mlx5_eswitch *esw,
+				 struct mlx5_eswitch_rep *rep);
+
+int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode);
+int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode);
+void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
+				     struct mlx5_eswitch_rep *rep);
+void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
+				       int vport);
+
+#define MLX5_DEBUG_ESWITCH_MASK BIT(3)
+
+#define esw_info(dev, format, ...)				\
+	pr_info("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__)
+
+#define esw_warn(dev, format, ...)				\
+	pr_warn("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__)
+
+#define esw_debug(dev, format, ...)				\
+	mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__)
 #endif /* __MLX5_ESWITCH_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
new file mode 100644
index 000000000000..a357e8eeeed8
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -0,0 +1,624 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/mlx5_ifc.h>
+#include <linux/mlx5/vport.h>
+#include <linux/mlx5/fs.h>
+#include "mlx5_core.h"
+#include "eswitch.h"
+
+enum {
+	FDB_FAST_PATH = 0,
+	FDB_SLOW_PATH
+};
+
+struct mlx5_flow_rule *
+mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
+				struct mlx5_flow_spec *spec,
+				u32 action, u32 src_vport, u32 dst_vport)
+{
+	struct mlx5_flow_destination dest = { 0 };
+	struct mlx5_fc *counter = NULL;
+	struct mlx5_flow_rule *rule;
+	void *misc;
+
+	if (esw->mode != SRIOV_OFFLOADS)
+		return ERR_PTR(-EOPNOTSUPP);
+
+	if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+		dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+		dest.vport_num = dst_vport;
+		action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+	} else if (action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+		counter = mlx5_fc_create(esw->dev, true);
+		if (IS_ERR(counter))
+			return ERR_CAST(counter);
+		dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+		dest.counter = counter;
+	}
+
+	misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+	MLX5_SET(fte_match_set_misc, misc, source_port, src_vport);
+
+	misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+	MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+
+	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS |
+				      MLX5_MATCH_MISC_PARAMETERS;
+
+	rule = mlx5_add_flow_rule((struct mlx5_flow_table *)esw->fdb_table.fdb,
+				  spec, action, 0, &dest);
+
+	if (IS_ERR(rule))
+		mlx5_fc_destroy(esw->dev, counter);
+
+	return rule;
+}
+
+static struct mlx5_flow_rule *
+mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn)
+{
+	struct mlx5_flow_destination dest;
+	struct mlx5_flow_rule *flow_rule;
+	struct mlx5_flow_spec *spec;
+	void *misc;
+
+	spec = mlx5_vzalloc(sizeof(*spec));
+	if (!spec) {
+		esw_warn(esw->dev, "FDB: Failed to alloc match parameters\n");
+		flow_rule = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+
+	misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+	MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn);
+	MLX5_SET(fte_match_set_misc, misc, source_port, 0x0); /* source vport is 0 */
+
+	misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+	MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn);
+	MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+
+	spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+	dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+	dest.vport_num = vport;
+
+	flow_rule = mlx5_add_flow_rule(esw->fdb_table.fdb, spec,
+				       MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+				       0, &dest);
+	if (IS_ERR(flow_rule))
+		esw_warn(esw->dev, "FDB: Failed to add send to vport rule err %ld\n", PTR_ERR(flow_rule));
+out:
+	kvfree(spec);
+	return flow_rule;
+}
+
+void mlx5_eswitch_sqs2vport_stop(struct mlx5_eswitch *esw,
+				 struct mlx5_eswitch_rep *rep)
+{
+	struct mlx5_esw_sq *esw_sq, *tmp;
+
+	if (esw->mode != SRIOV_OFFLOADS)
+		return;
+
+	list_for_each_entry_safe(esw_sq, tmp, &rep->vport_sqs_list, list) {
+		mlx5_del_flow_rule(esw_sq->send_to_vport_rule);
+		list_del(&esw_sq->list);
+		kfree(esw_sq);
+	}
+}
+
+int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw,
+				 struct mlx5_eswitch_rep *rep,
+				 u16 *sqns_array, int sqns_num)
+{
+	struct mlx5_flow_rule *flow_rule;
+	struct mlx5_esw_sq *esw_sq;
+	int vport;
+	int err;
+	int i;
+
+	if (esw->mode != SRIOV_OFFLOADS)
+		return 0;
+
+	vport = rep->vport == 0 ?
+		FDB_UPLINK_VPORT : rep->vport;
+
+	for (i = 0; i < sqns_num; i++) {
+		esw_sq = kzalloc(sizeof(*esw_sq), GFP_KERNEL);
+		if (!esw_sq) {
+			err = -ENOMEM;
+			goto out_err;
+		}
+
+		/* Add re-inject rule to the PF/representor sqs */
+		flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw,
+								vport,
+								sqns_array[i]);
+		if (IS_ERR(flow_rule)) {
+			err = PTR_ERR(flow_rule);
+			kfree(esw_sq);
+			goto out_err;
+		}
+		esw_sq->send_to_vport_rule = flow_rule;
+		list_add(&esw_sq->list, &rep->vport_sqs_list);
+	}
+	return 0;
+
+out_err:
+	mlx5_eswitch_sqs2vport_stop(esw, rep);
+	return err;
+}
+
+static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
+{
+	struct mlx5_flow_destination dest;
+	struct mlx5_flow_rule *flow_rule = NULL;
+	struct mlx5_flow_spec *spec;
+	int err = 0;
+
+	spec = mlx5_vzalloc(sizeof(*spec));
+	if (!spec) {
+		esw_warn(esw->dev, "FDB: Failed to alloc match parameters\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+	dest.vport_num = 0;
+
+	flow_rule = mlx5_add_flow_rule(esw->fdb_table.offloads.fdb, spec,
+				       MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+				       0, &dest);
+	if (IS_ERR(flow_rule)) {
+		err = PTR_ERR(flow_rule);
+		esw_warn(esw->dev,  "FDB: Failed to add miss flow rule err %d\n", err);
+		goto out;
+	}
+
+	esw->fdb_table.offloads.miss_rule = flow_rule;
+out:
+	kvfree(spec);
+	return err;
+}
+
+#define MAX_PF_SQ 256
+#define ESW_OFFLOADS_NUM_ENTRIES (1 << 13) /* 8K */
+#define ESW_OFFLOADS_NUM_GROUPS  4
+
+static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports)
+{
+	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+	struct mlx5_core_dev *dev = esw->dev;
+	struct mlx5_flow_namespace *root_ns;
+	struct mlx5_flow_table *fdb = NULL;
+	struct mlx5_flow_group *g;
+	u32 *flow_group_in;
+	void *match_criteria;
+	int table_size, ix, err = 0;
+
+	flow_group_in = mlx5_vzalloc(inlen);
+	if (!flow_group_in)
+		return -ENOMEM;
+
+	root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
+	if (!root_ns) {
+		esw_warn(dev, "Failed to get FDB flow namespace\n");
+		goto ns_err;
+	}
+
+	esw_debug(dev, "Create offloads FDB table, log_max_size(%d)\n",
+		  MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
+
+	fdb = mlx5_create_auto_grouped_flow_table(root_ns, FDB_FAST_PATH,
+						  ESW_OFFLOADS_NUM_ENTRIES,
+						  ESW_OFFLOADS_NUM_GROUPS, 0);
+	if (IS_ERR(fdb)) {
+		err = PTR_ERR(fdb);
+		esw_warn(dev, "Failed to create Fast path FDB Table err %d\n", err);
+		goto fast_fdb_err;
+	}
+	esw->fdb_table.fdb = fdb;
+
+	table_size = nvports + MAX_PF_SQ + 1;
+	fdb = mlx5_create_flow_table(root_ns, FDB_SLOW_PATH, table_size, 0);
+	if (IS_ERR(fdb)) {
+		err = PTR_ERR(fdb);
+		esw_warn(dev, "Failed to create slow path FDB Table err %d\n", err);
+		goto slow_fdb_err;
+	}
+	esw->fdb_table.offloads.fdb = fdb;
+
+	/* create send-to-vport group */
+	memset(flow_group_in, 0, inlen);
+	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+		 MLX5_MATCH_MISC_PARAMETERS);
+
+	match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
+
+	MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn);
+	MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port);
+
+	ix = nvports + MAX_PF_SQ;
+	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix - 1);
+
+	g = mlx5_create_flow_group(fdb, flow_group_in);
+	if (IS_ERR(g)) {
+		err = PTR_ERR(g);
+		esw_warn(dev, "Failed to create send-to-vport flow group err(%d)\n", err);
+		goto send_vport_err;
+	}
+	esw->fdb_table.offloads.send_to_vport_grp = g;
+
+	/* create miss group */
+	memset(flow_group_in, 0, inlen);
+	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, 0);
+
+	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
+	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + 1);
+
+	g = mlx5_create_flow_group(fdb, flow_group_in);
+	if (IS_ERR(g)) {
+		err = PTR_ERR(g);
+		esw_warn(dev, "Failed to create miss flow group err(%d)\n", err);
+		goto miss_err;
+	}
+	esw->fdb_table.offloads.miss_grp = g;
+
+	err = esw_add_fdb_miss_rule(esw);
+	if (err)
+		goto miss_rule_err;
+
+	return 0;
+
+miss_rule_err:
+	mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
+miss_err:
+	mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
+send_vport_err:
+	mlx5_destroy_flow_table(esw->fdb_table.offloads.fdb);
+slow_fdb_err:
+	mlx5_destroy_flow_table(esw->fdb_table.fdb);
+fast_fdb_err:
+ns_err:
+	kvfree(flow_group_in);
+	return err;
+}
+
+static void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw)
+{
+	if (!esw->fdb_table.fdb)
+		return;
+
+	esw_debug(esw->dev, "Destroy offloads FDB Table\n");
+	mlx5_del_flow_rule(esw->fdb_table.offloads.miss_rule);
+	mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
+	mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
+
+	mlx5_destroy_flow_table(esw->fdb_table.offloads.fdb);
+	mlx5_destroy_flow_table(esw->fdb_table.fdb);
+}
+
+static int esw_create_offloads_table(struct mlx5_eswitch *esw)
+{
+	struct mlx5_flow_namespace *ns;
+	struct mlx5_flow_table *ft_offloads;
+	struct mlx5_core_dev *dev = esw->dev;
+	int err = 0;
+
+	ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_OFFLOADS);
+	if (!ns) {
+		esw_warn(esw->dev, "Failed to get offloads flow namespace\n");
+		return -ENOMEM;
+	}
+
+	ft_offloads = mlx5_create_flow_table(ns, 0, dev->priv.sriov.num_vfs + 2, 0);
+	if (IS_ERR(ft_offloads)) {
+		err = PTR_ERR(ft_offloads);
+		esw_warn(esw->dev, "Failed to create offloads table, err %d\n", err);
+		return err;
+	}
+
+	esw->offloads.ft_offloads = ft_offloads;
+	return 0;
+}
+
+static void esw_destroy_offloads_table(struct mlx5_eswitch *esw)
+{
+	struct mlx5_esw_offload *offloads = &esw->offloads;
+
+	mlx5_destroy_flow_table(offloads->ft_offloads);
+}
+
+static int esw_create_vport_rx_group(struct mlx5_eswitch *esw)
+{
+	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+	struct mlx5_flow_group *g;
+	struct mlx5_priv *priv = &esw->dev->priv;
+	u32 *flow_group_in;
+	void *match_criteria, *misc;
+	int err = 0;
+	int nvports = priv->sriov.num_vfs + 2;
+
+	flow_group_in = mlx5_vzalloc(inlen);
+	if (!flow_group_in)
+		return -ENOMEM;
+
+	/* create vport rx group */
+	memset(flow_group_in, 0, inlen);
+	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+		 MLX5_MATCH_MISC_PARAMETERS);
+
+	match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
+	misc = MLX5_ADDR_OF(fte_match_param, match_criteria, misc_parameters);
+	MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+
+	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, nvports - 1);
+
+	g = mlx5_create_flow_group(esw->offloads.ft_offloads, flow_group_in);
+
+	if (IS_ERR(g)) {
+		err = PTR_ERR(g);
+		mlx5_core_warn(esw->dev, "Failed to create vport rx group err %d\n", err);
+		goto out;
+	}
+
+	esw->offloads.vport_rx_group = g;
+out:
+	kfree(flow_group_in);
+	return err;
+}
+
+static void esw_destroy_vport_rx_group(struct mlx5_eswitch *esw)
+{
+	mlx5_destroy_flow_group(esw->offloads.vport_rx_group);
+}
+
+struct mlx5_flow_rule *
+mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn)
+{
+	struct mlx5_flow_destination dest;
+	struct mlx5_flow_rule *flow_rule;
+	struct mlx5_flow_spec *spec;
+	void *misc;
+
+	spec = mlx5_vzalloc(sizeof(*spec));
+	if (!spec) {
+		esw_warn(esw->dev, "Failed to alloc match parameters\n");
+		flow_rule = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+
+	misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+	MLX5_SET(fte_match_set_misc, misc, source_port, vport);
+
+	misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+	MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+
+	spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+	dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+	dest.tir_num = tirn;
+
+	flow_rule = mlx5_add_flow_rule(esw->offloads.ft_offloads, spec,
+				       MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+				       0, &dest);
+	if (IS_ERR(flow_rule)) {
+		esw_warn(esw->dev, "fs offloads: Failed to add vport rx rule err %ld\n", PTR_ERR(flow_rule));
+		goto out;
+	}
+
+out:
+	kvfree(spec);
+	return flow_rule;
+}
+
+static int esw_offloads_start(struct mlx5_eswitch *esw)
+{
+	int err, num_vfs = esw->dev->priv.sriov.num_vfs;
+
+	if (esw->mode != SRIOV_LEGACY) {
+		esw_warn(esw->dev, "Can't set offloads mode, SRIOV legacy not enabled\n");
+		return -EINVAL;
+	}
+
+	mlx5_eswitch_disable_sriov(esw);
+	err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS);
+	if (err)
+		esw_warn(esw->dev, "Failed set eswitch to offloads, err %d\n", err);
+	return err;
+}
+
+int esw_offloads_init(struct mlx5_eswitch *esw, int nvports)
+{
+	struct mlx5_eswitch_rep *rep;
+	int vport;
+	int err;
+
+	err = esw_create_offloads_fdb_table(esw, nvports);
+	if (err)
+		return err;
+
+	err = esw_create_offloads_table(esw);
+	if (err)
+		goto create_ft_err;
+
+	err = esw_create_vport_rx_group(esw);
+	if (err)
+		goto create_fg_err;
+
+	for (vport = 0; vport < nvports; vport++) {
+		rep = &esw->offloads.vport_reps[vport];
+		if (!rep->valid)
+			continue;
+
+		err = rep->load(esw, rep);
+		if (err)
+			goto err_reps;
+	}
+	return 0;
+
+err_reps:
+	for (vport--; vport >= 0; vport--) {
+		rep = &esw->offloads.vport_reps[vport];
+		if (!rep->valid)
+			continue;
+		rep->unload(esw, rep);
+	}
+	esw_destroy_vport_rx_group(esw);
+
+create_fg_err:
+	esw_destroy_offloads_table(esw);
+
+create_ft_err:
+	esw_destroy_offloads_fdb_table(esw);
+	return err;
+}
+
+static int esw_offloads_stop(struct mlx5_eswitch *esw)
+{
+	int err, num_vfs = esw->dev->priv.sriov.num_vfs;
+
+	mlx5_eswitch_disable_sriov(esw);
+	err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY);
+	if (err)
+		esw_warn(esw->dev, "Failed set eswitch legacy mode. err %d\n", err);
+
+	return err;
+}
+
+void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports)
+{
+	struct mlx5_eswitch_rep *rep;
+	int vport;
+
+	for (vport = 0; vport < nvports; vport++) {
+		rep = &esw->offloads.vport_reps[vport];
+		if (!rep->valid)
+			continue;
+		rep->unload(esw, rep);
+	}
+
+	esw_destroy_vport_rx_group(esw);
+	esw_destroy_offloads_table(esw);
+	esw_destroy_offloads_fdb_table(esw);
+}
+
+static int mlx5_esw_mode_from_devlink(u16 mode, u16 *mlx5_mode)
+{
+	switch (mode) {
+	case DEVLINK_ESWITCH_MODE_LEGACY:
+		*mlx5_mode = SRIOV_LEGACY;
+		break;
+	case DEVLINK_ESWITCH_MODE_SWITCHDEV:
+		*mlx5_mode = SRIOV_OFFLOADS;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode)
+{
+	struct mlx5_core_dev *dev;
+	u16 cur_mlx5_mode, mlx5_mode = 0;
+
+	dev = devlink_priv(devlink);
+
+	if (!MLX5_CAP_GEN(dev, vport_group_manager))
+		return -EOPNOTSUPP;
+
+	cur_mlx5_mode = dev->priv.eswitch->mode;
+
+	if (cur_mlx5_mode == SRIOV_NONE)
+		return -EOPNOTSUPP;
+
+	if (mlx5_esw_mode_from_devlink(mode, &mlx5_mode))
+		return -EINVAL;
+
+	if (cur_mlx5_mode == mlx5_mode)
+		return 0;
+
+	if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV)
+		return esw_offloads_start(dev->priv.eswitch);
+	else if (mode == DEVLINK_ESWITCH_MODE_LEGACY)
+		return esw_offloads_stop(dev->priv.eswitch);
+	else
+		return -EINVAL;
+}
+
+int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
+{
+	struct mlx5_core_dev *dev;
+
+	dev = devlink_priv(devlink);
+
+	if (!MLX5_CAP_GEN(dev, vport_group_manager))
+		return -EOPNOTSUPP;
+
+	if (dev->priv.eswitch->mode == SRIOV_NONE)
+		return -EOPNOTSUPP;
+
+	*mode = dev->priv.eswitch->mode;
+
+	return 0;
+}
+
+void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
+				     struct mlx5_eswitch_rep *rep)
+{
+	struct mlx5_esw_offload *offloads = &esw->offloads;
+
+	memcpy(&offloads->vport_reps[rep->vport], rep,
+	       sizeof(struct mlx5_eswitch_rep));
+
+	INIT_LIST_HEAD(&offloads->vport_reps[rep->vport].vport_sqs_list);
+	offloads->vport_reps[rep->vport].valid = true;
+}
+
+void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
+				       int vport)
+{
+	struct mlx5_esw_offload *offloads = &esw->offloads;
+	struct mlx5_eswitch_rep *rep;
+
+	rep = &offloads->vport_reps[vport];
+
+	if (esw->mode == SRIOV_OFFLOADS && esw->vports[vport].enabled)
+		rep->unload(esw, rep);
+
+	offloads->vport_reps[vport].valid = false;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index a5bb6b695242..9134010e2921 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -413,3 +413,70 @@ int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id,
 
 	return 0;
 }
+
+struct mlx5_cmd_fc_bulk {
+	u16 id;
+	int num;
+	int outlen;
+	u32 out[0];
+};
+
+struct mlx5_cmd_fc_bulk *
+mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u16 id, int num)
+{
+	struct mlx5_cmd_fc_bulk *b;
+	int outlen = sizeof(*b) +
+		MLX5_ST_SZ_BYTES(query_flow_counter_out) +
+		MLX5_ST_SZ_BYTES(traffic_counter) * num;
+
+	b = kzalloc(outlen, GFP_KERNEL);
+	if (!b)
+		return NULL;
+
+	b->id = id;
+	b->num = num;
+	b->outlen = outlen;
+
+	return b;
+}
+
+void mlx5_cmd_fc_bulk_free(struct mlx5_cmd_fc_bulk *b)
+{
+	kfree(b);
+}
+
+int
+mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b)
+{
+	u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)];
+
+	memset(in, 0, sizeof(in));
+
+	MLX5_SET(query_flow_counter_in, in, opcode,
+		 MLX5_CMD_OP_QUERY_FLOW_COUNTER);
+	MLX5_SET(query_flow_counter_in, in, op_mod, 0);
+	MLX5_SET(query_flow_counter_in, in, flow_counter_id, b->id);
+	MLX5_SET(query_flow_counter_in, in, num_of_counters, b->num);
+
+	return mlx5_cmd_exec_check_status(dev, in, sizeof(in),
+					  b->out, b->outlen);
+}
+
+void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev,
+			  struct mlx5_cmd_fc_bulk *b, u16 id,
+			  u64 *packets, u64 *bytes)
+{
+	int index = id - b->id;
+	void *stats;
+
+	if (index < 0 || index >= b->num) {
+		mlx5_core_warn(dev, "Flow counter id (0x%x) out of range (0x%x..0x%x). Counter ignored.\n",
+			       id, b->id, b->id + b->num - 1);
+		return;
+	}
+
+	stats = MLX5_ADDR_OF(query_flow_counter_out, b->out,
+			     flow_statistics[index]);
+	*packets = MLX5_GET64(traffic_counter, stats, packets);
+	*bytes = MLX5_GET64(traffic_counter, stats, octets);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
index fc4f7b83fe0a..158844cef82b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
@@ -76,4 +76,16 @@ int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u16 *id);
 int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u16 id);
 int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id,
 		      u64 *packets, u64 *bytes);
+
+struct mlx5_cmd_fc_bulk;
+
+struct mlx5_cmd_fc_bulk *
+mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u16 id, int num);
+void mlx5_cmd_fc_bulk_free(struct mlx5_cmd_fc_bulk *b);
+int
+mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b);
+void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev,
+			  struct mlx5_cmd_fc_bulk *b, u16 id,
+			  u64 *packets, u64 *bytes);
+
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index e912a3d2505e..1a377b403321 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -67,13 +67,21 @@
 #define FS_REQUIRED_CAPS(...) {.arr_sz = INIT_CAPS_ARRAY_SIZE(__VA_ARGS__), \
 			       .caps = (long[]) {__VA_ARGS__} }
 
+#define FS_CHAINING_CAPS  FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en), \
+					   FS_CAP(flow_table_properties_nic_receive.modify_root), \
+					   FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), \
+					   FS_CAP(flow_table_properties_nic_receive.flow_table_modify))
+
 #define LEFTOVERS_NUM_LEVELS 1
 #define LEFTOVERS_NUM_PRIOS 1
 
 #define BY_PASS_PRIO_NUM_LEVELS 1
-#define BY_PASS_MIN_LEVEL (KERNEL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\
+#define BY_PASS_MIN_LEVEL (ETHTOOL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\
 			   LEFTOVERS_NUM_PRIOS)
 
+#define ETHTOOL_PRIO_NUM_LEVELS 1
+#define ETHTOOL_NUM_PRIOS 10
+#define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS)
 /* Vlan, mac, ttc, aRFS */
 #define KERNEL_NIC_PRIO_NUM_LEVELS 4
 #define KERNEL_NIC_NUM_PRIOS 1
@@ -83,6 +91,11 @@
 #define ANCHOR_NUM_LEVELS 1
 #define ANCHOR_NUM_PRIOS 1
 #define ANCHOR_MIN_LEVEL (BY_PASS_MIN_LEVEL + 1)
+
+#define OFFLOADS_MAX_FT 1
+#define OFFLOADS_NUM_PRIOS 1
+#define OFFLOADS_MIN_LEVEL (ANCHOR_MIN_LEVEL + 1)
+
 struct node_caps {
 	size_t	arr_sz;
 	long	*caps;
@@ -98,24 +111,24 @@ static struct init_tree_node {
 	int num_levels;
 } root_fs = {
 	.type = FS_TYPE_NAMESPACE,
-	.ar_size = 4,
+	.ar_size = 6,
 	.children = (struct init_tree_node[]) {
 		ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0,
-			 FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en),
-					  FS_CAP(flow_table_properties_nic_receive.modify_root),
-					  FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode),
-					  FS_CAP(flow_table_properties_nic_receive.flow_table_modify)),
+			 FS_CHAINING_CAPS,
 			 ADD_NS(ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS,
 						  BY_PASS_PRIO_NUM_LEVELS))),
+		ADD_PRIO(0, OFFLOADS_MIN_LEVEL, 0, {},
+			 ADD_NS(ADD_MULTIPLE_PRIO(OFFLOADS_NUM_PRIOS, OFFLOADS_MAX_FT))),
+		ADD_PRIO(0, ETHTOOL_MIN_LEVEL, 0,
+			 FS_CHAINING_CAPS,
+			 ADD_NS(ADD_MULTIPLE_PRIO(ETHTOOL_NUM_PRIOS,
+						  ETHTOOL_PRIO_NUM_LEVELS))),
 		ADD_PRIO(0, KERNEL_MIN_LEVEL, 0, {},
 			 ADD_NS(ADD_MULTIPLE_PRIO(1, 1),
 				ADD_MULTIPLE_PRIO(KERNEL_NIC_NUM_PRIOS,
 						  KERNEL_NIC_PRIO_NUM_LEVELS))),
 		ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0,
-			 FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en),
-					  FS_CAP(flow_table_properties_nic_receive.modify_root),
-					  FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode),
-					  FS_CAP(flow_table_properties_nic_receive.flow_table_modify)),
+			 FS_CHAINING_CAPS,
 			 ADD_NS(ADD_MULTIPLE_PRIO(LEFTOVERS_NUM_PRIOS, LEFTOVERS_NUM_LEVELS))),
 		ADD_PRIO(0, ANCHOR_MIN_LEVEL, 0, {},
 			 ADD_NS(ADD_MULTIPLE_PRIO(ANCHOR_NUM_PRIOS, ANCHOR_NUM_LEVELS))),
@@ -1152,9 +1165,7 @@ static bool dest_is_valid(struct mlx5_flow_destination *dest,
 
 static struct mlx5_flow_rule *
 _mlx5_add_flow_rule(struct mlx5_flow_table *ft,
-		    u8 match_criteria_enable,
-		    u32 *match_criteria,
-		    u32 *match_value,
+		   struct mlx5_flow_spec *spec,
 		    u32 action,
 		    u32 flow_tag,
 		    struct mlx5_flow_destination *dest)
@@ -1168,22 +1179,23 @@ _mlx5_add_flow_rule(struct mlx5_flow_table *ft,
 	nested_lock_ref_node(&ft->node, FS_MUTEX_GRANDPARENT);
 	fs_for_each_fg(g, ft)
 		if (compare_match_criteria(g->mask.match_criteria_enable,
-					   match_criteria_enable,
+					   spec->match_criteria_enable,
 					   g->mask.match_criteria,
-					   match_criteria)) {
-			rule = add_rule_fg(g, match_value,
+					   spec->match_criteria)) {
+			rule = add_rule_fg(g, spec->match_value,
 					   action, flow_tag, dest);
 			if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOSPC)
 				goto unlock;
 		}
 
-	g = create_autogroup(ft, match_criteria_enable, match_criteria);
+	g = create_autogroup(ft, spec->match_criteria_enable,
+			     spec->match_criteria);
 	if (IS_ERR(g)) {
 		rule = (void *)g;
 		goto unlock;
 	}
 
-	rule = add_rule_fg(g, match_value,
+	rule = add_rule_fg(g, spec->match_value,
 			   action, flow_tag, dest);
 	if (IS_ERR(rule)) {
 		/* Remove assumes refcount > 0 and autogroup creates a group
@@ -1207,9 +1219,7 @@ static bool fwd_next_prio_supported(struct mlx5_flow_table *ft)
 
 struct mlx5_flow_rule *
 mlx5_add_flow_rule(struct mlx5_flow_table *ft,
-		   u8 match_criteria_enable,
-		   u32 *match_criteria,
-		   u32 *match_value,
+		   struct mlx5_flow_spec *spec,
 		   u32 action,
 		   u32 flow_tag,
 		   struct mlx5_flow_destination *dest)
@@ -1240,8 +1250,7 @@ mlx5_add_flow_rule(struct mlx5_flow_table *ft,
 		}
 	}
 
-	rule =	_mlx5_add_flow_rule(ft, match_criteria_enable, match_criteria,
-				    match_value, action, flow_tag, dest);
+	rule = _mlx5_add_flow_rule(ft, spec, action, flow_tag, dest);
 
 	if (sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
 		if (!IS_ERR_OR_NULL(rule) &&
@@ -1359,40 +1368,47 @@ void mlx5_destroy_flow_group(struct mlx5_flow_group *fg)
 struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
 						    enum mlx5_flow_namespace_type type)
 {
-	struct mlx5_flow_root_namespace *root_ns = dev->priv.root_ns;
+	struct mlx5_flow_steering *steering = dev->priv.steering;
+	struct mlx5_flow_root_namespace *root_ns;
 	int prio;
 	struct fs_prio *fs_prio;
 	struct mlx5_flow_namespace *ns;
 
-	if (!root_ns)
+	if (!steering)
 		return NULL;
 
 	switch (type) {
 	case MLX5_FLOW_NAMESPACE_BYPASS:
+	case MLX5_FLOW_NAMESPACE_OFFLOADS:
+	case MLX5_FLOW_NAMESPACE_ETHTOOL:
 	case MLX5_FLOW_NAMESPACE_KERNEL:
 	case MLX5_FLOW_NAMESPACE_LEFTOVERS:
 	case MLX5_FLOW_NAMESPACE_ANCHOR:
 		prio = type;
 		break;
 	case MLX5_FLOW_NAMESPACE_FDB:
-		if (dev->priv.fdb_root_ns)
-			return &dev->priv.fdb_root_ns->ns;
+		if (steering->fdb_root_ns)
+			return &steering->fdb_root_ns->ns;
 		else
 			return NULL;
 	case MLX5_FLOW_NAMESPACE_ESW_EGRESS:
-		if (dev->priv.esw_egress_root_ns)
-			return &dev->priv.esw_egress_root_ns->ns;
+		if (steering->esw_egress_root_ns)
+			return &steering->esw_egress_root_ns->ns;
 		else
 			return NULL;
 	case MLX5_FLOW_NAMESPACE_ESW_INGRESS:
-		if (dev->priv.esw_ingress_root_ns)
-			return &dev->priv.esw_ingress_root_ns->ns;
+		if (steering->esw_ingress_root_ns)
+			return &steering->esw_ingress_root_ns->ns;
 		else
 			return NULL;
 	default:
 		return NULL;
 	}
 
+	root_ns = steering->root_ns;
+	if (!root_ns)
+		return NULL;
+
 	fs_prio = find_prio(&root_ns->ns, prio);
 	if (!fs_prio)
 		return NULL;
@@ -1478,13 +1494,13 @@ static bool has_required_caps(struct mlx5_core_dev *dev, struct node_caps *caps)
 	return true;
 }
 
-static int init_root_tree_recursive(struct mlx5_core_dev *dev,
+static int init_root_tree_recursive(struct mlx5_flow_steering *steering,
 				    struct init_tree_node *init_node,
 				    struct fs_node *fs_parent_node,
 				    struct init_tree_node *init_parent_node,
 				    int prio)
 {
-	int max_ft_level = MLX5_CAP_FLOWTABLE(dev,
+	int max_ft_level = MLX5_CAP_FLOWTABLE(steering->dev,
 					      flow_table_properties_nic_receive.
 					      max_ft_level);
 	struct mlx5_flow_namespace *fs_ns;
@@ -1495,7 +1511,7 @@ static int init_root_tree_recursive(struct mlx5_core_dev *dev,
 
 	if (init_node->type == FS_TYPE_PRIO) {
 		if ((init_node->min_ft_level > max_ft_level) ||
-		    !has_required_caps(dev, &init_node->caps))
+		    !has_required_caps(steering->dev, &init_node->caps))
 			return 0;
 
 		fs_get_obj(fs_ns, fs_parent_node);
@@ -1516,7 +1532,7 @@ static int init_root_tree_recursive(struct mlx5_core_dev *dev,
 	}
 	prio = 0;
 	for (i = 0; i < init_node->ar_size; i++) {
-		err = init_root_tree_recursive(dev, &init_node->children[i],
+		err = init_root_tree_recursive(steering, &init_node->children[i],
 					       base, init_node, prio);
 		if (err)
 			return err;
@@ -1529,7 +1545,7 @@ static int init_root_tree_recursive(struct mlx5_core_dev *dev,
 	return 0;
 }
 
-static int init_root_tree(struct mlx5_core_dev *dev,
+static int init_root_tree(struct mlx5_flow_steering *steering,
 			  struct init_tree_node *init_node,
 			  struct fs_node *fs_parent_node)
 {
@@ -1539,7 +1555,7 @@ static int init_root_tree(struct mlx5_core_dev *dev,
 
 	fs_get_obj(fs_ns, fs_parent_node);
 	for (i = 0; i < init_node->ar_size; i++) {
-		err = init_root_tree_recursive(dev, &init_node->children[i],
+		err = init_root_tree_recursive(steering, &init_node->children[i],
 					       &fs_ns->node,
 					       init_node, i);
 		if (err)
@@ -1548,7 +1564,7 @@ static int init_root_tree(struct mlx5_core_dev *dev,
 	return 0;
 }
 
-static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_core_dev *dev,
+static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_flow_steering *steering,
 						       enum fs_flow_table_type
 						       table_type)
 {
@@ -1560,7 +1576,7 @@ static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_core_dev *dev
 	if (!root_ns)
 		return NULL;
 
-	root_ns->dev = dev;
+	root_ns->dev = steering->dev;
 	root_ns->table_type = table_type;
 
 	ns = &root_ns->ns;
@@ -1615,212 +1631,132 @@ static void set_prio_attrs(struct mlx5_flow_root_namespace *root_ns)
 #define ANCHOR_PRIO 0
 #define ANCHOR_SIZE 1
 #define ANCHOR_LEVEL 0
-static int create_anchor_flow_table(struct mlx5_core_dev
-							*dev)
+static int create_anchor_flow_table(struct mlx5_flow_steering *steering)
 {
 	struct mlx5_flow_namespace *ns = NULL;
 	struct mlx5_flow_table *ft;
 
-	ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_ANCHOR);
+	ns = mlx5_get_flow_namespace(steering->dev, MLX5_FLOW_NAMESPACE_ANCHOR);
 	if (!ns)
 		return -EINVAL;
 	ft = mlx5_create_flow_table(ns, ANCHOR_PRIO, ANCHOR_SIZE, ANCHOR_LEVEL);
 	if (IS_ERR(ft)) {
-		mlx5_core_err(dev, "Failed to create last anchor flow table");
+		mlx5_core_err(steering->dev, "Failed to create last anchor flow table");
 		return PTR_ERR(ft);
 	}
 	return 0;
 }
 
-static int init_root_ns(struct mlx5_core_dev *dev)
+static int init_root_ns(struct mlx5_flow_steering *steering)
 {
 
-	dev->priv.root_ns = create_root_ns(dev, FS_FT_NIC_RX);
-	if (IS_ERR_OR_NULL(dev->priv.root_ns))
+	steering->root_ns = create_root_ns(steering, FS_FT_NIC_RX);
+	if (IS_ERR_OR_NULL(steering->root_ns))
 		goto cleanup;
 
-	if (init_root_tree(dev, &root_fs, &dev->priv.root_ns->ns.node))
+	if (init_root_tree(steering, &root_fs, &steering->root_ns->ns.node))
 		goto cleanup;
 
-	set_prio_attrs(dev->priv.root_ns);
+	set_prio_attrs(steering->root_ns);
 
-	if (create_anchor_flow_table(dev))
+	if (create_anchor_flow_table(steering))
 		goto cleanup;
 
 	return 0;
 
 cleanup:
-	mlx5_cleanup_fs(dev);
+	mlx5_cleanup_fs(steering->dev);
 	return -ENOMEM;
 }
 
-static void cleanup_single_prio_root_ns(struct mlx5_core_dev *dev,
-					struct mlx5_flow_root_namespace *root_ns)
+static void clean_tree(struct fs_node *node)
 {
-	struct fs_node *prio;
-
-	if (!root_ns)
-		return;
+	if (node) {
+		struct fs_node *iter;
+		struct fs_node *temp;
 
-	if (!list_empty(&root_ns->ns.node.children)) {
-		prio = list_first_entry(&root_ns->ns.node.children,
-					struct fs_node,
-				 list);
-		if (tree_remove_node(prio))
-			mlx5_core_warn(dev,
-				       "Flow steering priority wasn't destroyed, refcount > 1\n");
+		list_for_each_entry_safe(iter, temp, &node->children, list)
+			clean_tree(iter);
+		tree_remove_node(node);
 	}
-	if (tree_remove_node(&root_ns->ns.node))
-		mlx5_core_warn(dev,
-			       "Flow steering namespace wasn't destroyed, refcount > 1\n");
-	root_ns = NULL;
 }
 
-static void destroy_flow_tables(struct fs_prio *prio)
+static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns)
 {
-	struct mlx5_flow_table *iter;
-	struct mlx5_flow_table *tmp;
-
-	fs_for_each_ft_safe(iter, tmp, prio)
-		mlx5_destroy_flow_table(iter);
-}
-
-static void cleanup_root_ns(struct mlx5_core_dev *dev)
-{
-	struct mlx5_flow_root_namespace *root_ns = dev->priv.root_ns;
-	struct fs_prio *iter_prio;
-
-	if (!MLX5_CAP_GEN(dev, nic_flow_table))
-		return;
-
 	if (!root_ns)
 		return;
 
-	/* stage 1 */
-	fs_for_each_prio(iter_prio, &root_ns->ns) {
-		struct fs_node *node;
-		struct mlx5_flow_namespace *iter_ns;
-
-		fs_for_each_ns_or_ft(node, iter_prio) {
-			if (node->type == FS_TYPE_FLOW_TABLE)
-				continue;
-			fs_get_obj(iter_ns, node);
-			while (!list_empty(&iter_ns->node.children)) {
-				struct fs_prio *obj_iter_prio2;
-				struct fs_node *iter_prio2 =
-					list_first_entry(&iter_ns->node.children,
-							 struct fs_node,
-							 list);
-
-				fs_get_obj(obj_iter_prio2, iter_prio2);
-				destroy_flow_tables(obj_iter_prio2);
-				if (tree_remove_node(iter_prio2)) {
-					mlx5_core_warn(dev,
-						       "Priority %d wasn't destroyed, refcount > 1\n",
-						       obj_iter_prio2->prio);
-					return;
-				}
-			}
-		}
-	}
-
-	/* stage 2 */
-	fs_for_each_prio(iter_prio, &root_ns->ns) {
-		while (!list_empty(&iter_prio->node.children)) {
-			struct fs_node *iter_ns =
-				list_first_entry(&iter_prio->node.children,
-						 struct fs_node,
-						 list);
-			if (tree_remove_node(iter_ns)) {
-				mlx5_core_warn(dev,
-					       "Namespace wasn't destroyed, refcount > 1\n");
-				return;
-			}
-		}
-	}
-
-	/* stage 3 */
-	while (!list_empty(&root_ns->ns.node.children)) {
-		struct fs_prio *obj_prio_node;
-		struct fs_node *prio_node =
-			list_first_entry(&root_ns->ns.node.children,
-					 struct fs_node,
-					 list);
-
-		fs_get_obj(obj_prio_node, prio_node);
-		if (tree_remove_node(prio_node)) {
-			mlx5_core_warn(dev,
-				       "Priority %d wasn't destroyed, refcount > 1\n",
-				       obj_prio_node->prio);
-			return;
-		}
-	}
-
-	if (tree_remove_node(&root_ns->ns.node)) {
-		mlx5_core_warn(dev,
-			       "root namespace wasn't destroyed, refcount > 1\n");
-		return;
-	}
-
-	dev->priv.root_ns = NULL;
+	clean_tree(&root_ns->ns.node);
 }
 
 void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
 {
+	struct mlx5_flow_steering *steering = dev->priv.steering;
+
 	if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
 		return;
 
-	cleanup_root_ns(dev);
-	cleanup_single_prio_root_ns(dev, dev->priv.fdb_root_ns);
-	cleanup_single_prio_root_ns(dev, dev->priv.esw_egress_root_ns);
-	cleanup_single_prio_root_ns(dev, dev->priv.esw_ingress_root_ns);
+	cleanup_root_ns(steering->root_ns);
+	cleanup_root_ns(steering->esw_egress_root_ns);
+	cleanup_root_ns(steering->esw_ingress_root_ns);
+	cleanup_root_ns(steering->fdb_root_ns);
 	mlx5_cleanup_fc_stats(dev);
+	kfree(steering);
 }
 
-static int init_fdb_root_ns(struct mlx5_core_dev *dev)
+static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
 {
 	struct fs_prio *prio;
 
-	dev->priv.fdb_root_ns = create_root_ns(dev, FS_FT_FDB);
-	if (!dev->priv.fdb_root_ns)
+	steering->fdb_root_ns = create_root_ns(steering, FS_FT_FDB);
+	if (!steering->fdb_root_ns)
 		return -ENOMEM;
 
-	/* Create single prio */
-	prio = fs_create_prio(&dev->priv.fdb_root_ns->ns, 0, 1);
-	if (IS_ERR(prio)) {
-		cleanup_single_prio_root_ns(dev, dev->priv.fdb_root_ns);
-		return PTR_ERR(prio);
-	} else {
-		return 0;
-	}
+	prio = fs_create_prio(&steering->fdb_root_ns->ns, 0, 1);
+	if (IS_ERR(prio))
+		goto out_err;
+
+	prio = fs_create_prio(&steering->fdb_root_ns->ns, 1, 1);
+	if (IS_ERR(prio))
+		goto out_err;
+
+	set_prio_attrs(steering->fdb_root_ns);
+	return 0;
+
+out_err:
+	cleanup_root_ns(steering->fdb_root_ns);
+	steering->fdb_root_ns = NULL;
+	return PTR_ERR(prio);
 }
 
-static int init_egress_acl_root_ns(struct mlx5_core_dev *dev)
+static int init_ingress_acl_root_ns(struct mlx5_flow_steering *steering)
 {
 	struct fs_prio *prio;
 
-	dev->priv.esw_egress_root_ns = create_root_ns(dev, FS_FT_ESW_EGRESS_ACL);
-	if (!dev->priv.esw_egress_root_ns)
+	steering->esw_egress_root_ns = create_root_ns(steering, FS_FT_ESW_EGRESS_ACL);
+	if (!steering->esw_egress_root_ns)
 		return -ENOMEM;
 
 	/* create 1 prio*/
-	prio = fs_create_prio(&dev->priv.esw_egress_root_ns->ns, 0, MLX5_TOTAL_VPORTS(dev));
+	prio = fs_create_prio(&steering->esw_egress_root_ns->ns, 0,
+			      MLX5_TOTAL_VPORTS(steering->dev));
 	if (IS_ERR(prio))
 		return PTR_ERR(prio);
 	else
 		return 0;
 }
 
-static int init_ingress_acl_root_ns(struct mlx5_core_dev *dev)
+static int init_egress_acl_root_ns(struct mlx5_flow_steering *steering)
 {
 	struct fs_prio *prio;
 
-	dev->priv.esw_ingress_root_ns = create_root_ns(dev, FS_FT_ESW_INGRESS_ACL);
-	if (!dev->priv.esw_ingress_root_ns)
+	steering->esw_ingress_root_ns = create_root_ns(steering, FS_FT_ESW_INGRESS_ACL);
+	if (!steering->esw_ingress_root_ns)
 		return -ENOMEM;
 
 	/* create 1 prio*/
-	prio = fs_create_prio(&dev->priv.esw_ingress_root_ns->ns, 0, MLX5_TOTAL_VPORTS(dev));
+	prio = fs_create_prio(&steering->esw_ingress_root_ns->ns, 0,
+			      MLX5_TOTAL_VPORTS(steering->dev));
 	if (IS_ERR(prio))
 		return PTR_ERR(prio);
 	else
@@ -1829,6 +1765,7 @@ static int init_ingress_acl_root_ns(struct mlx5_core_dev *dev)
 
 int mlx5_init_fs(struct mlx5_core_dev *dev)
 {
+	struct mlx5_flow_steering *steering;
 	int err = 0;
 
 	if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
@@ -1838,26 +1775,32 @@ int mlx5_init_fs(struct mlx5_core_dev *dev)
 	if (err)
 		return err;
 
+	steering = kzalloc(sizeof(*steering), GFP_KERNEL);
+	if (!steering)
+		return -ENOMEM;
+	steering->dev = dev;
+	dev->priv.steering = steering;
+
 	if (MLX5_CAP_GEN(dev, nic_flow_table) &&
 	    MLX5_CAP_FLOWTABLE_NIC_RX(dev, ft_support)) {
-		err = init_root_ns(dev);
+		err = init_root_ns(steering);
 		if (err)
 			goto err;
 	}
 
 	if (MLX5_CAP_GEN(dev, eswitch_flow_table)) {
 		if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, ft_support)) {
-			err = init_fdb_root_ns(dev);
+			err = init_fdb_root_ns(steering);
 			if (err)
 				goto err;
 		}
 		if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) {
-			err = init_egress_acl_root_ns(dev);
+			err = init_egress_acl_root_ns(steering);
 			if (err)
 				goto err;
 		}
 		if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) {
-			err = init_ingress_acl_root_ns(dev);
+			err = init_ingress_acl_root_ns(steering);
 			if (err)
 				goto err;
 		}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index aa41a7314691..9cffb6aeb4e9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -55,6 +55,14 @@ enum fs_fte_status {
 	FS_FTE_STATUS_EXISTING = 1UL << 0,
 };
 
+struct mlx5_flow_steering {
+	struct mlx5_core_dev *dev;
+	struct mlx5_flow_root_namespace *root_ns;
+	struct mlx5_flow_root_namespace *fdb_root_ns;
+	struct mlx5_flow_root_namespace *esw_egress_root_ns;
+	struct mlx5_flow_root_namespace *esw_ingress_root_ns;
+};
+
 struct fs_node {
 	struct list_head	list;
 	struct list_head	children;
@@ -103,6 +111,7 @@ struct mlx5_fc_cache {
 };
 
 struct mlx5_fc {
+	struct rb_node node;
 	struct list_head list;
 
 	/* last{packets,bytes} members are used when calculating the delta since
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
index 164dc37fda72..c2877e9de8a1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
@@ -32,6 +32,7 @@
 
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/fs.h>
+#include <linux/rbtree.h>
 #include "mlx5_core.h"
 #include "fs_core.h"
 #include "fs_cmd.h"
@@ -68,32 +69,108 @@
  *   elapsed, the thread will actually query the hardware.
  */
 
+static void mlx5_fc_stats_insert(struct rb_root *root, struct mlx5_fc *counter)
+{
+	struct rb_node **new = &root->rb_node;
+	struct rb_node *parent = NULL;
+
+	while (*new) {
+		struct mlx5_fc *this = container_of(*new, struct mlx5_fc, node);
+		int result = counter->id - this->id;
+
+		parent = *new;
+		if (result < 0)
+			new = &((*new)->rb_left);
+		else
+			new = &((*new)->rb_right);
+	}
+
+	/* Add new node and rebalance tree. */
+	rb_link_node(&counter->node, parent, new);
+	rb_insert_color(&counter->node, root);
+}
+
+static struct rb_node *mlx5_fc_stats_query(struct mlx5_core_dev *dev,
+					   struct mlx5_fc *first,
+					   u16 last_id)
+{
+	struct mlx5_cmd_fc_bulk *b;
+	struct rb_node *node = NULL;
+	u16 afirst_id;
+	int num;
+	int err;
+	int max_bulk = 1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk);
+
+	/* first id must be aligned to 4 when using bulk query */
+	afirst_id = first->id & ~0x3;
+
+	/* number of counters to query inc. the last counter */
+	num = ALIGN(last_id - afirst_id + 1, 4);
+	if (num > max_bulk) {
+		num = max_bulk;
+		last_id = afirst_id + num - 1;
+	}
+
+	b = mlx5_cmd_fc_bulk_alloc(dev, afirst_id, num);
+	if (!b) {
+		mlx5_core_err(dev, "Error allocating resources for bulk query\n");
+		return NULL;
+	}
+
+	err = mlx5_cmd_fc_bulk_query(dev, b);
+	if (err) {
+		mlx5_core_err(dev, "Error doing bulk query: %d\n", err);
+		goto out;
+	}
+
+	for (node = &first->node; node; node = rb_next(node)) {
+		struct mlx5_fc *counter = rb_entry(node, struct mlx5_fc, node);
+		struct mlx5_fc_cache *c = &counter->cache;
+
+		if (counter->id > last_id)
+			break;
+
+		mlx5_cmd_fc_bulk_get(dev, b,
+				     counter->id, &c->packets, &c->bytes);
+	}
+
+out:
+	mlx5_cmd_fc_bulk_free(b);
+
+	return node;
+}
+
 static void mlx5_fc_stats_work(struct work_struct *work)
 {
 	struct mlx5_core_dev *dev = container_of(work, struct mlx5_core_dev,
 						 priv.fc_stats.work.work);
 	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
 	unsigned long now = jiffies;
-	struct mlx5_fc *counter;
-	struct mlx5_fc *tmp;
-	int err = 0;
+	struct mlx5_fc *counter = NULL;
+	struct mlx5_fc *last = NULL;
+	struct rb_node *node;
+	LIST_HEAD(tmplist);
 
 	spin_lock(&fc_stats->addlist_lock);
 
-	list_splice_tail_init(&fc_stats->addlist, &fc_stats->list);
+	list_splice_tail_init(&fc_stats->addlist, &tmplist);
 
-	if (!list_empty(&fc_stats->list))
+	if (!list_empty(&tmplist) || !RB_EMPTY_ROOT(&fc_stats->counters))
 		queue_delayed_work(fc_stats->wq, &fc_stats->work, MLX5_FC_STATS_PERIOD);
 
 	spin_unlock(&fc_stats->addlist_lock);
 
-	list_for_each_entry_safe(counter, tmp, &fc_stats->list, list) {
-		struct mlx5_fc_cache *c = &counter->cache;
-		u64 packets;
-		u64 bytes;
+	list_for_each_entry(counter, &tmplist, list)
+		mlx5_fc_stats_insert(&fc_stats->counters, counter);
+
+	node = rb_first(&fc_stats->counters);
+	while (node) {
+		counter = rb_entry(node, struct mlx5_fc, node);
+
+		node = rb_next(node);
 
 		if (counter->deleted) {
-			list_del(&counter->list);
+			rb_erase(&counter->node, &fc_stats->counters);
 
 			mlx5_cmd_fc_free(dev, counter->id);
 
@@ -101,26 +178,20 @@ static void mlx5_fc_stats_work(struct work_struct *work)
 			continue;
 		}
 
-		if (time_before(now, fc_stats->next_query))
-			continue;
+		last = counter;
+	}
 
-		err = mlx5_cmd_fc_query(dev, counter->id, &packets, &bytes);
-		if (err) {
-			pr_err("Error querying stats for counter id %d\n",
-			       counter->id);
-			continue;
-		}
+	if (time_before(now, fc_stats->next_query) || !last)
+		return;
 
-		if (packets == c->packets)
-			continue;
+	node = rb_first(&fc_stats->counters);
+	while (node) {
+		counter = rb_entry(node, struct mlx5_fc, node);
 
-		c->lastuse = jiffies;
-		c->packets = packets;
-		c->bytes   = bytes;
+		node = mlx5_fc_stats_query(dev, counter, last->id);
 	}
 
-	if (time_after_eq(now, fc_stats->next_query))
-		fc_stats->next_query = now + MLX5_FC_STATS_PERIOD;
+	fc_stats->next_query = now + MLX5_FC_STATS_PERIOD;
 }
 
 struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging)
@@ -176,7 +247,7 @@ int mlx5_init_fc_stats(struct mlx5_core_dev *dev)
 {
 	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
 
-	INIT_LIST_HEAD(&fc_stats->list);
+	fc_stats->counters = RB_ROOT;
 	INIT_LIST_HEAD(&fc_stats->addlist);
 	spin_lock_init(&fc_stats->addlist_lock);
 
@@ -194,20 +265,32 @@ void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev)
 	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
 	struct mlx5_fc *counter;
 	struct mlx5_fc *tmp;
+	struct rb_node *node;
 
 	cancel_delayed_work_sync(&dev->priv.fc_stats.work);
 	destroy_workqueue(dev->priv.fc_stats.wq);
 	dev->priv.fc_stats.wq = NULL;
 
-	list_splice_tail_init(&fc_stats->addlist, &fc_stats->list);
-
-	list_for_each_entry_safe(counter, tmp, &fc_stats->list, list) {
+	list_for_each_entry_safe(counter, tmp, &fc_stats->addlist, list) {
 		list_del(&counter->list);
 
 		mlx5_cmd_fc_free(dev, counter->id);
 
 		kfree(counter);
 	}
+
+	node = rb_first(&fc_stats->counters);
+	while (node) {
+		counter = rb_entry(node, struct mlx5_fc, node);
+
+		node = rb_next(node);
+
+		rb_erase(&counter->node, &fc_stats->counters);
+
+		mlx5_cmd_fc_free(dev, counter->id);
+
+		kfree(counter);
+	}
 }
 
 void mlx5_fc_query_cached(struct mlx5_fc *counter,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 42d16b9458e4..96a59463ae65 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -108,15 +108,21 @@ static int in_fatal(struct mlx5_core_dev *dev)
 
 void mlx5_enter_error_state(struct mlx5_core_dev *dev)
 {
+	mutex_lock(&dev->intf_state_mutex);
 	if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
-		return;
+		goto unlock;
 
 	mlx5_core_err(dev, "start\n");
-	if (pci_channel_offline(dev->pdev) || in_fatal(dev))
+	if (pci_channel_offline(dev->pdev) || in_fatal(dev)) {
 		dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
+		trigger_cmd_completions(dev);
+	}
 
 	mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 0);
 	mlx5_core_err(dev, "end\n");
+
+unlock:
+	mutex_unlock(&dev->intf_state_mutex);
 }
 
 static void mlx5_handle_bad_state(struct mlx5_core_dev *dev)
@@ -245,7 +251,6 @@ static void poll_health(unsigned long data)
 	u32 count;
 
 	if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
-		trigger_cmd_completions(dev);
 		mod_timer(&health->timer, get_next_poll_jiffies());
 		return;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 08cae3485960..4f491d43e77d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -51,6 +51,7 @@
 #ifdef CONFIG_RFS_ACCEL
 #include <linux/cpu_rmap.h>
 #endif
+#include <net/devlink.h>
 #include "mlx5_core.h"
 #include "fs_core.h"
 #ifdef CONFIG_MLX5_CORE_EN
@@ -1315,19 +1316,28 @@ struct mlx5_core_event_handler {
 		      void *data);
 };
 
+static const struct devlink_ops mlx5_devlink_ops = {
+#ifdef CONFIG_MLX5_CORE_EN
+	.eswitch_mode_set = mlx5_devlink_eswitch_mode_set,
+	.eswitch_mode_get = mlx5_devlink_eswitch_mode_get,
+#endif
+};
 
 static int init_one(struct pci_dev *pdev,
 		    const struct pci_device_id *id)
 {
 	struct mlx5_core_dev *dev;
+	struct devlink *devlink;
 	struct mlx5_priv *priv;
 	int err;
 
-	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
-	if (!dev) {
+	devlink = devlink_alloc(&mlx5_devlink_ops, sizeof(*dev));
+	if (!devlink) {
 		dev_err(&pdev->dev, "kzalloc failed\n");
 		return -ENOMEM;
 	}
+
+	dev = devlink_priv(devlink);
 	priv = &dev->priv;
 	priv->pci_dev_data = id->driver_data;
 
@@ -1364,15 +1374,21 @@ static int init_one(struct pci_dev *pdev,
 		goto clean_health;
 	}
 
+	err = devlink_register(devlink, &pdev->dev);
+	if (err)
+		goto clean_load;
+
 	return 0;
 
+clean_load:
+	mlx5_unload_one(dev, priv);
 clean_health:
 	mlx5_health_cleanup(dev);
 close_pci:
 	mlx5_pci_close(dev, priv);
 clean_dev:
 	pci_set_drvdata(pdev, NULL);
-	kfree(dev);
+	devlink_free(devlink);
 
 	return err;
 }
@@ -1380,8 +1396,10 @@ clean_dev:
 static void remove_one(struct pci_dev *pdev)
 {
 	struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
+	struct devlink *devlink = priv_to_devlink(dev);
 	struct mlx5_priv *priv = &dev->priv;
 
+	devlink_unregister(devlink);
 	if (mlx5_unload_one(dev, priv)) {
 		dev_err(&dev->pdev->dev, "mlx5_unload_one failed\n");
 		mlx5_health_cleanup(dev);
@@ -1390,7 +1408,7 @@ static void remove_one(struct pci_dev *pdev)
 	mlx5_health_cleanup(dev);
 	mlx5_pci_close(dev, priv);
 	pci_set_drvdata(pdev, NULL);
-	kfree(dev);
+	devlink_free(devlink);
 }
 
 static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
@@ -1432,46 +1450,31 @@ void mlx5_disable_device(struct mlx5_core_dev *dev)
 	mlx5_pci_err_detected(dev->pdev, 0);
 }
 
-/* wait for the device to show vital signs. For now we check
- * that we can read the device ID and that the health buffer
- * shows a non zero value which is different than 0xffffffff
+/* wait for the device to show vital signs by waiting
+ * for the health counter to start counting.
  */
-static void wait_vital(struct pci_dev *pdev)
+static int wait_vital(struct pci_dev *pdev)
 {
 	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
 	struct mlx5_core_health *health = &dev->priv.health;
 	const int niter = 100;
+	u32 last_count = 0;
 	u32 count;
-	u16 did;
 	int i;
 
-	/* Wait for firmware to be ready after reset */
-	msleep(1000);
-	for (i = 0; i < niter; i++) {
-		if (pci_read_config_word(pdev, 2, &did)) {
-			dev_warn(&pdev->dev, "failed reading config word\n");
-			break;
-		}
-		if (did == pdev->device) {
-			dev_info(&pdev->dev, "device ID correctly read after %d iterations\n", i);
-			break;
-		}
-		msleep(50);
-	}
-	if (i == niter)
-		dev_warn(&pdev->dev, "%s-%d: could not read device ID\n", __func__, __LINE__);
-
 	for (i = 0; i < niter; i++) {
 		count = ioread32be(health->health_counter);
 		if (count && count != 0xffffffff) {
-			dev_info(&pdev->dev, "Counter value 0x%x after %d iterations\n", count, i);
-			break;
+			if (last_count && last_count != count) {
+				dev_info(&pdev->dev, "Counter value 0x%x after %d iterations\n", count, i);
+				return 0;
+			}
+			last_count = count;
 		}
 		msleep(50);
 	}
 
-	if (i == niter)
-		dev_warn(&pdev->dev, "%s-%d: could not read device ID\n", __func__, __LINE__);
+	return -ETIMEDOUT;
 }
 
 static void mlx5_pci_resume(struct pci_dev *pdev)
@@ -1483,7 +1486,11 @@ static void mlx5_pci_resume(struct pci_dev *pdev)
 	dev_info(&pdev->dev, "%s was called\n", __func__);
 
 	pci_save_state(pdev);
-	wait_vital(pdev);
+	err = wait_vital(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "%s: wait_vital timed out\n", __func__);
+		return;
+	}
 
 	err = mlx5_load_one(dev, priv);
 	if (err)
@@ -1518,8 +1525,9 @@ static const struct pci_device_id mlx5_core_pci_table[] = {
 	{ PCI_VDEVICE(MELLANOX, 0x1014), MLX5_PCI_DEV_IS_VF},	/* ConnectX-4 VF */
 	{ PCI_VDEVICE(MELLANOX, 0x1015) },			/* ConnectX-4LX */
 	{ PCI_VDEVICE(MELLANOX, 0x1016), MLX5_PCI_DEV_IS_VF},	/* ConnectX-4LX VF */
-	{ PCI_VDEVICE(MELLANOX, 0x1017) },			/* ConnectX-5 */
+	{ PCI_VDEVICE(MELLANOX, 0x1017) },			/* ConnectX-5, PCIe 3.0 */
 	{ PCI_VDEVICE(MELLANOX, 0x1018), MLX5_PCI_DEV_IS_VF},	/* ConnectX-5 VF */
+	{ PCI_VDEVICE(MELLANOX, 0x1019) },			/* ConnectX-5, PCIe 4.0 */
 	{ 0, }
 };
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index 9eeee0545f1c..32dea3524cee 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -345,7 +345,6 @@ retry:
 			       func_id, npages, err);
 		goto out_4k;
 	}
-	dev->priv.fw_pages += npages;
 
 	err = mlx5_cmd_status_to_err(&out.hdr);
 	if (err) {
@@ -373,6 +372,33 @@ out_free:
 	return err;
 }
 
+static int reclaim_pages_cmd(struct mlx5_core_dev *dev,
+			     struct mlx5_manage_pages_inbox *in, int in_size,
+			     struct mlx5_manage_pages_outbox *out, int out_size)
+{
+	struct fw_page *fwp;
+	struct rb_node *p;
+	u32 npages;
+	u32 i = 0;
+
+	if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)
+		return mlx5_cmd_exec_check_status(dev, (u32 *)in, in_size,
+						  (u32 *)out, out_size);
+
+	npages = be32_to_cpu(in->num_entries);
+
+	p = rb_first(&dev->priv.page_root);
+	while (p && i < npages) {
+		fwp = rb_entry(p, struct fw_page, rb_node);
+		out->pas[i] = cpu_to_be64(fwp->addr);
+		p = rb_next(p);
+		i++;
+	}
+
+	out->num_entries = cpu_to_be32(i);
+	return 0;
+}
+
 static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
 			 int *nclaimed)
 {
@@ -398,15 +424,9 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
 	in.func_id = cpu_to_be16(func_id);
 	in.num_entries = cpu_to_be32(npages);
 	mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen);
-	err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen);
+	err = reclaim_pages_cmd(dev, &in, sizeof(in), out, outlen);
 	if (err) {
-		mlx5_core_err(dev, "failed reclaiming pages\n");
-		goto out_free;
-	}
-	dev->priv.fw_pages -= npages;
-
-	if (out->hdr.status) {
-		err = mlx5_cmd_status_to_err(&out->hdr);
+		mlx5_core_err(dev, "failed reclaiming pages: err %d\n", err);
 		goto out_free;
 	}
 
@@ -417,13 +437,15 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
 		err = -EINVAL;
 		goto out_free;
 	}
-	if (nclaimed)
-		*nclaimed = num_claimed;
 
 	for (i = 0; i < num_claimed; i++) {
 		addr = be64_to_cpu(out->pas[i]);
 		free_4k(dev, addr);
 	}
+
+	if (nclaimed)
+		*nclaimed = num_claimed;
+
 	dev->priv.fw_pages -= num_claimed;
 	if (func_id)
 		dev->priv.vfs_pages -= num_claimed;
@@ -514,14 +536,10 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
 		p = rb_first(&dev->priv.page_root);
 		if (p) {
 			fwp = rb_entry(p, struct fw_page, rb_node);
-			if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
-				free_4k(dev, fwp->addr);
-				nclaimed = 1;
-			} else {
-				err = reclaim_pages(dev, fwp->func_id,
-						    optimal_reclaimed_pages(),
-						    &nclaimed);
-			}
+			err = reclaim_pages(dev, fwp->func_id,
+					    optimal_reclaimed_pages(),
+					    &nclaimed);
+
 			if (err) {
 				mlx5_core_warn(dev, "failed reclaiming pages (%d)\n",
 					       err);
@@ -536,6 +554,13 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
 		}
 	} while (p);
 
+	WARN(dev->priv.fw_pages,
+	     "FW pages counter is %d after reclaiming all pages\n",
+	     dev->priv.fw_pages);
+	WARN(dev->priv.vfs_pages,
+	     "VFs FW pages counter is %d after reclaiming all pages\n",
+	     dev->priv.vfs_pages);
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
index d6a3f412ba9f..b380a6bc1f85 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
@@ -167,7 +167,7 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs)
 
 	mlx5_core_init_vfs(dev, num_vfs);
 #ifdef CONFIG_MLX5_CORE_EN
-	mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs);
+	mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY);
 #endif
 
 	return num_vfs;
@@ -209,7 +209,8 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev)
 	mlx5_core_init_vfs(dev, cur_vfs);
 #ifdef CONFIG_MLX5_CORE_EN
 	if (cur_vfs)
-		mlx5_eswitch_enable_sriov(dev->priv.eswitch, cur_vfs);
+		mlx5_eswitch_enable_sriov(dev->priv.eswitch, cur_vfs,
+					  SRIOV_LEGACY);
 #endif
 
 	enable_vfs(dev, cur_vfs);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index daf44cd4c566..91846dfcbe9c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -513,7 +513,6 @@ int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev,
 {
 	int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
 	void *nic_vport_context;
-	u8 *guid;
 	void *in;
 	int err;
 
@@ -535,8 +534,6 @@ int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev,
 
 	nic_vport_context = MLX5_ADDR_OF(modify_nic_vport_context_in,
 					 in, nic_vport_context);
-	guid = MLX5_ADDR_OF(nic_vport_context, nic_vport_context,
-			    node_guid);
 	MLX5_SET64(nic_vport_context, nic_vport_context, node_guid, node_guid);
 
 	err = mlx5_modify_nic_vport_context(mdev, in, inlen);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
index f2fd1ef16da7..05de77267d58 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
@@ -105,6 +105,9 @@ static void mlx5e_vxlan_add_port(struct work_struct *work)
 	struct mlx5e_vxlan *vxlan;
 	int err;
 
+	if (mlx5e_vxlan_lookup_port(priv, port))
+		goto free_work;
+
 	if (mlx5e_vxlan_core_add_port_cmd(priv->mdev, port))
 		goto free_work;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
index ce21ee5b2357..821a087c7ae2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
@@ -75,14 +75,14 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 
 	err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
 	if (err) {
-		mlx5_core_warn(mdev, "mlx5_db_alloc() failed, %d\n", err);
+		mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err);
 		return err;
 	}
 
 	err = mlx5_buf_alloc_node(mdev, mlx5_wq_cyc_get_byte_size(wq),
 				  &wq_ctrl->buf, param->buf_numa_node);
 	if (err) {
-		mlx5_core_warn(mdev, "mlx5_buf_alloc() failed, %d\n", err);
+		mlx5_core_warn(mdev, "mlx5_buf_alloc_node() failed, %d\n", err);
 		goto err_db_free;
 	}
 
@@ -111,14 +111,14 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 
 	err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
 	if (err) {
-		mlx5_core_warn(mdev, "mlx5_db_alloc() failed, %d\n", err);
+		mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err);
 		return err;
 	}
 
 	err = mlx5_buf_alloc_node(mdev, mlx5_cqwq_get_byte_size(wq),
 				  &wq_ctrl->buf, param->buf_numa_node);
 	if (err) {
-		mlx5_core_warn(mdev, "mlx5_buf_alloc() failed, %d\n", err);
+		mlx5_core_warn(mdev, "mlx5_buf_alloc_node() failed, %d\n", err);
 		goto err_db_free;
 	}
 
@@ -148,13 +148,14 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 
 	err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
 	if (err) {
-		mlx5_core_warn(mdev, "mlx5_db_alloc() failed, %d\n", err);
+		mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err);
 		return err;
 	}
 
-	err = mlx5_buf_alloc(mdev, mlx5_wq_ll_get_byte_size(wq), &wq_ctrl->buf);
+	err = mlx5_buf_alloc_node(mdev, mlx5_wq_ll_get_byte_size(wq),
+				  &wq_ctrl->buf, param->buf_numa_node);
 	if (err) {
-		mlx5_core_warn(mdev, "mlx5_buf_alloc() failed, %d\n", err);
+		mlx5_core_warn(mdev, "mlx5_buf_alloc_node() failed, %d\n", err);
 		goto err_db_free;
 	}
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile
index 9b5ebf84c051..d20ae1838a64 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Makefile
+++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile
@@ -7,5 +7,6 @@ obj-$(CONFIG_MLXSW_SWITCHX2)	+= mlxsw_switchx2.o
 mlxsw_switchx2-objs		:= switchx2.o
 obj-$(CONFIG_MLXSW_SPECTRUM)	+= mlxsw_spectrum.o
 mlxsw_spectrum-objs		:= spectrum.o spectrum_buffers.o \
-				   spectrum_switchdev.o
+				   spectrum_switchdev.o spectrum_router.o \
+				   spectrum_kvdl.o
 mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB)	+= spectrum_dcb.o
diff --git a/drivers/net/ethernet/mellanox/mlxsw/cmd.h b/drivers/net/ethernet/mellanox/mlxsw/cmd.h
index cd63b8263688..f9cd6e3f7709 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/cmd.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/cmd.h
@@ -607,6 +607,24 @@ MLXSW_ITEM32(cmd_mbox, config_profile,
  */
 MLXSW_ITEM32(cmd_mbox, config_profile, set_ar_sec, 0x0C, 15, 1);
 
+/* cmd_mbox_config_set_kvd_linear_size
+ * Capability bit. Setting a bit to 1 configures the profile
+ * according to the mailbox contents.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, set_kvd_linear_size, 0x0C, 24, 1);
+
+/* cmd_mbox_config_set_kvd_hash_single_size
+ * Capability bit. Setting a bit to 1 configures the profile
+ * according to the mailbox contents.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, set_kvd_hash_single_size, 0x0C, 25, 1);
+
+/* cmd_mbox_config_set_kvd_hash_double_size
+ * Capability bit. Setting a bit to 1 configures the profile
+ * according to the mailbox contents.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, set_kvd_hash_double_size, 0x0C, 26, 1);
+
 /* cmd_mbox_config_profile_max_vepa_channels
  * Maximum number of VEPA channels per port (0 through 16)
  * 0 - multi-channel VEPA is disabled
@@ -733,6 +751,31 @@ MLXSW_ITEM32(cmd_mbox, config_profile, adaptive_routing_group_cap, 0x4C, 0, 16);
  */
 MLXSW_ITEM32(cmd_mbox, config_profile, arn, 0x50, 31, 1);
 
+/* cmd_mbox_config_kvd_linear_size
+ * KVD Linear Size
+ * Valid for Spectrum only
+ * Allowed values are 128*N where N=0 or higher
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, kvd_linear_size, 0x54, 0, 24);
+
+/* cmd_mbox_config_kvd_hash_single_size
+ * KVD Hash single-entries size
+ * Valid for Spectrum only
+ * Allowed values are 128*N where N=0 or higher
+ * Must be greater or equal to cap_min_kvd_hash_single_size
+ * Must be smaller or equal to cap_kvd_size - kvd_linear_size
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, kvd_hash_single_size, 0x58, 0, 24);
+
+/* cmd_mbox_config_kvd_hash_double_size
+ * KVD Hash double-entries size (units of single-size entries)
+ * Valid for Spectrum only
+ * Allowed values are 128*N where N=0 or higher
+ * Must be either 0 or greater or equal to cap_min_kvd_hash_double_size
+ * Must be smaller or equal to cap_kvd_size - kvd_linear_size
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, kvd_hash_double_size, 0x5C, 0, 24);
+
 /* cmd_mbox_config_profile_swid_config_mask
  * Modify Switch Partition Configuration mask. When set, the configu-
  * ration value for the Switch Partition are taken from the mailbox.
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index 01ae54826d5c..480a3ba714dd 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -58,6 +58,7 @@
 #include <linux/workqueue.h>
 #include <asm/byteorder.h>
 #include <net/devlink.h>
+#include <trace/events/devlink.h>
 
 #include "core.h"
 #include "item.h"
@@ -447,6 +448,10 @@ static int mlxsw_emad_transmit(struct mlxsw_core *mlxsw_core,
 	if (!skb)
 		return -ENOMEM;
 
+	trace_devlink_hwmsg(priv_to_devlink(mlxsw_core), false, 0,
+			    skb->data + mlxsw_core->driver->txhdr_len,
+			    skb->len - mlxsw_core->driver->txhdr_len);
+
 	atomic_set(&trans->active, 1);
 	err = mlxsw_core_skb_transmit(mlxsw_core, skb, &trans->tx_info);
 	if (err) {
@@ -529,6 +534,9 @@ static void mlxsw_emad_rx_listener_func(struct sk_buff *skb, u8 local_port,
 	struct mlxsw_core *mlxsw_core = priv;
 	struct mlxsw_reg_trans *trans;
 
+	trace_devlink_hwmsg(priv_to_devlink(mlxsw_core), true, 0,
+			    skb->data, skb->len);
+
 	if (!mlxsw_emad_is_resp(skb))
 		goto free_skb;
 
@@ -1110,14 +1118,14 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
 	if (err)
 		goto err_emad_init;
 
-	err = mlxsw_hwmon_init(mlxsw_core, mlxsw_bus_info, &mlxsw_core->hwmon);
-	if (err)
-		goto err_hwmon_init;
-
 	err = devlink_register(devlink, mlxsw_bus_info->dev);
 	if (err)
 		goto err_devlink_register;
 
+	err = mlxsw_hwmon_init(mlxsw_core, mlxsw_bus_info, &mlxsw_core->hwmon);
+	if (err)
+		goto err_hwmon_init;
+
 	err = mlxsw_driver->init(mlxsw_core, mlxsw_bus_info);
 	if (err)
 		goto err_driver_init;
@@ -1131,9 +1139,9 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
 err_debugfs_init:
 	mlxsw_core->driver->fini(mlxsw_core);
 err_driver_init:
+err_hwmon_init:
 	devlink_unregister(devlink);
 err_devlink_register:
-err_hwmon_init:
 	mlxsw_emad_fini(mlxsw_core);
 err_emad_init:
 	mlxsw_bus->fini(bus_priv);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
index 436bc49df6ab..2fe385cce203 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -190,7 +190,8 @@ struct mlxsw_config_profile {
 		used_max_ib_mc:1,
 		used_max_pkey:1,
 		used_ar_sec:1,
-		used_adaptive_routing_group_cap:1;
+		used_adaptive_routing_group_cap:1,
+		used_kvd_sizes:1;
 	u8	max_vepa_channels;
 	u16	max_lag;
 	u16	max_port_per_lag;
@@ -211,6 +212,9 @@ struct mlxsw_config_profile {
 	u8	ar_sec;
 	u16	adaptive_routing_group_cap;
 	u8	arn;
+	u32	kvd_linear_size;
+	u32	kvd_hash_single_size;
+	u32	kvd_hash_double_size;
 	struct mlxsw_swid_config swid_config[MLXSW_CONFIG_PROFILE_SWID_COUNT];
 };
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index 7f4173c8eda3..ddbc9f22278d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -1255,6 +1255,20 @@ static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox,
 		mlxsw_cmd_mbox_config_profile_adaptive_routing_group_cap_set(
 			mbox, profile->adaptive_routing_group_cap);
 	}
+	if (profile->used_kvd_sizes) {
+		mlxsw_cmd_mbox_config_profile_set_kvd_linear_size_set(
+			mbox, 1);
+		mlxsw_cmd_mbox_config_profile_kvd_linear_size_set(
+			mbox, profile->kvd_linear_size);
+		mlxsw_cmd_mbox_config_profile_set_kvd_hash_single_size_set(
+			mbox, 1);
+		mlxsw_cmd_mbox_config_profile_kvd_hash_single_size_set(
+			mbox, profile->kvd_hash_single_size);
+		mlxsw_cmd_mbox_config_profile_set_kvd_hash_double_size_set(
+			mbox, 1);
+		mlxsw_cmd_mbox_config_profile_kvd_hash_double_size_set(
+			mbox, profile->kvd_hash_double_size);
+	}
 
 	for (i = 0; i < MLXSW_CONFIG_PROFILE_SWID_COUNT; i++)
 		mlxsw_pci_config_profile_swid_config(mlxsw_pci, mbox, i,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 1977e7a5c530..0cc148566677 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -1,9 +1,10 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/reg.h
  * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2015 Ido Schimmel <idosch@mellanox.com>
+ * Copyright (c) 2015-2016 Ido Schimmel <idosch@mellanox.com>
  * Copyright (c) 2015 Elad Raz <eladr@mellanox.com>
- * Copyright (c) 2015 Jiri Pirko <jiri@mellanox.com>
+ * Copyright (c) 2015-2016 Jiri Pirko <jiri@mellanox.com>
+ * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -386,7 +387,9 @@ enum mlxsw_reg_sfd_rec_action {
 	/* forward and trap, trap_id is FDB_TRAP */
 	MLXSW_REG_SFD_REC_ACTION_MIRROR_TO_CPU = 1,
 	/* trap and do not forward, trap_id is FDB_TRAP */
-	MLXSW_REG_SFD_REC_ACTION_TRAP = 3,
+	MLXSW_REG_SFD_REC_ACTION_TRAP = 2,
+	/* forward to IP router */
+	MLXSW_REG_SFD_REC_ACTION_FORWARD_IP_ROUTER = 3,
 	MLXSW_REG_SFD_REC_ACTION_DISCARD_ERROR = 15,
 };
 
@@ -3186,6 +3189,1183 @@ static inline void mlxsw_reg_hpkt_pack(char *payload, u8 action, u16 trap_id)
 	mlxsw_reg_hpkt_ctrl_set(payload, MLXSW_REG_HPKT_CTRL_PACKET_DEFAULT);
 }
 
+/* RGCR - Router General Configuration Register
+ * --------------------------------------------
+ * The register is used for setting up the router configuration.
+ */
+#define MLXSW_REG_RGCR_ID 0x8001
+#define MLXSW_REG_RGCR_LEN 0x28
+
+static const struct mlxsw_reg_info mlxsw_reg_rgcr = {
+	.id = MLXSW_REG_RGCR_ID,
+	.len = MLXSW_REG_RGCR_LEN,
+};
+
+/* reg_rgcr_ipv4_en
+ * IPv4 router enable.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rgcr, ipv4_en, 0x00, 31, 1);
+
+/* reg_rgcr_ipv6_en
+ * IPv6 router enable.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rgcr, ipv6_en, 0x00, 30, 1);
+
+/* reg_rgcr_max_router_interfaces
+ * Defines the maximum number of active router interfaces for all virtual
+ * routers.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rgcr, max_router_interfaces, 0x10, 0, 16);
+
+/* reg_rgcr_usp
+ * Update switch priority and packet color.
+ * 0 - Preserve the value of Switch Priority and packet color.
+ * 1 - Recalculate the value of Switch Priority and packet color.
+ * Access: RW
+ *
+ * Note: Not supported by SwitchX and SwitchX-2.
+ */
+MLXSW_ITEM32(reg, rgcr, usp, 0x18, 20, 1);
+
+/* reg_rgcr_pcp_rw
+ * Indicates how to handle the pcp_rewrite_en value:
+ * 0 - Preserve the value of pcp_rewrite_en.
+ * 2 - Disable PCP rewrite.
+ * 3 - Enable PCP rewrite.
+ * Access: RW
+ *
+ * Note: Not supported by SwitchX and SwitchX-2.
+ */
+MLXSW_ITEM32(reg, rgcr, pcp_rw, 0x18, 16, 2);
+
+/* reg_rgcr_activity_dis
+ * Activity disable:
+ * 0 - Activity will be set when an entry is hit (default).
+ * 1 - Activity will not be set when an entry is hit.
+ *
+ * Bit 0 - Disable activity bit in Router Algorithmic LPM Unicast Entry
+ * (RALUE).
+ * Bit 1 - Disable activity bit in Router Algorithmic LPM Unicast Host
+ * Entry (RAUHT).
+ * Bits 2:7 are reserved.
+ * Access: RW
+ *
+ * Note: Not supported by SwitchX, SwitchX-2 and Switch-IB.
+ */
+MLXSW_ITEM32(reg, rgcr, activity_dis, 0x20, 0, 8);
+
+static inline void mlxsw_reg_rgcr_pack(char *payload, bool ipv4_en)
+{
+	MLXSW_REG_ZERO(rgcr, payload);
+	mlxsw_reg_rgcr_ipv4_en_set(payload, ipv4_en);
+}
+
+/* RITR - Router Interface Table Register
+ * --------------------------------------
+ * The register is used to configure the router interface table.
+ */
+#define MLXSW_REG_RITR_ID 0x8002
+#define MLXSW_REG_RITR_LEN 0x40
+
+static const struct mlxsw_reg_info mlxsw_reg_ritr = {
+	.id = MLXSW_REG_RITR_ID,
+	.len = MLXSW_REG_RITR_LEN,
+};
+
+/* reg_ritr_enable
+ * Enables routing on the router interface.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, enable, 0x00, 31, 1);
+
+/* reg_ritr_ipv4
+ * IPv4 routing enable. Enables routing of IPv4 traffic on the router
+ * interface.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, ipv4, 0x00, 29, 1);
+
+/* reg_ritr_ipv6
+ * IPv6 routing enable. Enables routing of IPv6 traffic on the router
+ * interface.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, ipv6, 0x00, 28, 1);
+
+enum mlxsw_reg_ritr_if_type {
+	MLXSW_REG_RITR_VLAN_IF,
+	MLXSW_REG_RITR_FID_IF,
+	MLXSW_REG_RITR_SP_IF,
+};
+
+/* reg_ritr_type
+ * Router interface type.
+ * 0 - VLAN interface.
+ * 1 - FID interface.
+ * 2 - Sub-port interface.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, type, 0x00, 23, 3);
+
+enum {
+	MLXSW_REG_RITR_RIF_CREATE,
+	MLXSW_REG_RITR_RIF_DEL,
+};
+
+/* reg_ritr_op
+ * Opcode:
+ * 0 - Create or edit RIF.
+ * 1 - Delete RIF.
+ * Reserved for SwitchX-2. For Spectrum, editing of interface properties
+ * is not supported. An interface must be deleted and re-created in order
+ * to update properties.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, ritr, op, 0x00, 20, 2);
+
+/* reg_ritr_rif
+ * Router interface index. A pointer to the Router Interface Table.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ritr, rif, 0x00, 0, 16);
+
+/* reg_ritr_ipv4_fe
+ * IPv4 Forwarding Enable.
+ * Enables routing of IPv4 traffic on the router interface. When disabled,
+ * forwarding is blocked but local traffic (traps and IP2ME) will be enabled.
+ * Not supported in SwitchX-2.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, ipv4_fe, 0x04, 29, 1);
+
+/* reg_ritr_ipv6_fe
+ * IPv6 Forwarding Enable.
+ * Enables routing of IPv6 traffic on the router interface. When disabled,
+ * forwarding is blocked but local traffic (traps and IP2ME) will be enabled.
+ * Not supported in SwitchX-2.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, ipv6_fe, 0x04, 28, 1);
+
+/* reg_ritr_virtual_router
+ * Virtual router ID associated with the router interface.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, virtual_router, 0x04, 0, 16);
+
+/* reg_ritr_mtu
+ * Router interface MTU.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, mtu, 0x34, 0, 16);
+
+/* reg_ritr_if_swid
+ * Switch partition ID.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, if_swid, 0x08, 24, 8);
+
+/* reg_ritr_if_mac
+ * Router interface MAC address.
+ * In Spectrum, all MAC addresses must have the same 38 MSBits.
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, ritr, if_mac, 0x12, 6);
+
+/* VLAN Interface */
+
+/* reg_ritr_vlan_if_vid
+ * VLAN ID.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, vlan_if_vid, 0x08, 0, 12);
+
+/* FID Interface */
+
+/* reg_ritr_fid_if_fid
+ * Filtering ID. Used to connect a bridge to the router. Only FIDs from
+ * the vFID range are supported.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, fid_if_fid, 0x08, 0, 16);
+
+static inline void mlxsw_reg_ritr_fid_set(char *payload,
+					  enum mlxsw_reg_ritr_if_type rif_type,
+					  u16 fid)
+{
+	if (rif_type == MLXSW_REG_RITR_FID_IF)
+		mlxsw_reg_ritr_fid_if_fid_set(payload, fid);
+	else
+		mlxsw_reg_ritr_vlan_if_vid_set(payload, fid);
+}
+
+/* Sub-port Interface */
+
+/* reg_ritr_sp_if_lag
+ * LAG indication. When this bit is set the system_port field holds the
+ * LAG identifier.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, sp_if_lag, 0x08, 24, 1);
+
+/* reg_ritr_sp_system_port
+ * Port unique indentifier. When lag bit is set, this field holds the
+ * lag_id in bits 0:9.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, sp_if_system_port, 0x08, 0, 16);
+
+/* reg_ritr_sp_if_vid
+ * VLAN ID.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, sp_if_vid, 0x18, 0, 12);
+
+static inline void mlxsw_reg_ritr_rif_pack(char *payload, u16 rif)
+{
+	MLXSW_REG_ZERO(ritr, payload);
+	mlxsw_reg_ritr_rif_set(payload, rif);
+}
+
+static inline void mlxsw_reg_ritr_sp_if_pack(char *payload, bool lag,
+					     u16 system_port, u16 vid)
+{
+	mlxsw_reg_ritr_sp_if_lag_set(payload, lag);
+	mlxsw_reg_ritr_sp_if_system_port_set(payload, system_port);
+	mlxsw_reg_ritr_sp_if_vid_set(payload, vid);
+}
+
+static inline void mlxsw_reg_ritr_pack(char *payload, bool enable,
+				       enum mlxsw_reg_ritr_if_type type,
+				       u16 rif, u16 mtu, const char *mac)
+{
+	bool op = enable ? MLXSW_REG_RITR_RIF_CREATE : MLXSW_REG_RITR_RIF_DEL;
+
+	MLXSW_REG_ZERO(ritr, payload);
+	mlxsw_reg_ritr_enable_set(payload, enable);
+	mlxsw_reg_ritr_ipv4_set(payload, 1);
+	mlxsw_reg_ritr_type_set(payload, type);
+	mlxsw_reg_ritr_op_set(payload, op);
+	mlxsw_reg_ritr_rif_set(payload, rif);
+	mlxsw_reg_ritr_ipv4_fe_set(payload, 1);
+	mlxsw_reg_ritr_mtu_set(payload, mtu);
+	mlxsw_reg_ritr_if_mac_memcpy_to(payload, mac);
+}
+
+/* RATR - Router Adjacency Table Register
+ * --------------------------------------
+ * The RATR register is used to configure the Router Adjacency (next-hop)
+ * Table.
+ */
+#define MLXSW_REG_RATR_ID 0x8008
+#define MLXSW_REG_RATR_LEN 0x2C
+
+static const struct mlxsw_reg_info mlxsw_reg_ratr = {
+	.id = MLXSW_REG_RATR_ID,
+	.len = MLXSW_REG_RATR_LEN,
+};
+
+enum mlxsw_reg_ratr_op {
+	/* Read */
+	MLXSW_REG_RATR_OP_QUERY_READ = 0,
+	/* Read and clear activity */
+	MLXSW_REG_RATR_OP_QUERY_READ_CLEAR = 2,
+	/* Write Adjacency entry */
+	MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY = 1,
+	/* Write Adjacency entry only if the activity is cleared.
+	 * The write may not succeed if the activity is set. There is not
+	 * direct feedback if the write has succeeded or not, however
+	 * the get will reveal the actual entry (SW can compare the get
+	 * response to the set command).
+	 */
+	MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY_ON_ACTIVITY = 3,
+};
+
+/* reg_ratr_op
+ * Note that Write operation may also be used for updating
+ * counter_set_type and counter_index. In this case all other
+ * fields must not be updated.
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, ratr, op, 0x00, 28, 4);
+
+/* reg_ratr_v
+ * Valid bit. Indicates if the adjacency entry is valid.
+ * Note: the device may need some time before reusing an invalidated
+ * entry. During this time the entry can not be reused. It is
+ * recommended to use another entry before reusing an invalidated
+ * entry (e.g. software can put it at the end of the list for
+ * reusing). Trying to access an invalidated entry not yet cleared
+ * by the device results with failure indicating "Try Again" status.
+ * When valid is '0' then egress_router_interface,trap_action,
+ * adjacency_parameters and counters are reserved
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ratr, v, 0x00, 24, 1);
+
+/* reg_ratr_a
+ * Activity. Set for new entries. Set if a packet lookup has hit on
+ * the specific entry. To clear the a bit, use "clear activity".
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, ratr, a, 0x00, 16, 1);
+
+/* reg_ratr_adjacency_index_low
+ * Bits 15:0 of index into the adjacency table.
+ * For SwitchX and SwitchX-2, the adjacency table is linear and
+ * used for adjacency entries only.
+ * For Spectrum, the index is to the KVD linear.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ratr, adjacency_index_low, 0x04, 0, 16);
+
+/* reg_ratr_egress_router_interface
+ * Range is 0 .. cap_max_router_interfaces - 1
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ratr, egress_router_interface, 0x08, 0, 16);
+
+enum mlxsw_reg_ratr_trap_action {
+	MLXSW_REG_RATR_TRAP_ACTION_NOP,
+	MLXSW_REG_RATR_TRAP_ACTION_TRAP,
+	MLXSW_REG_RATR_TRAP_ACTION_MIRROR_TO_CPU,
+	MLXSW_REG_RATR_TRAP_ACTION_MIRROR,
+	MLXSW_REG_RATR_TRAP_ACTION_DISCARD_ERRORS,
+};
+
+/* reg_ratr_trap_action
+ * see mlxsw_reg_ratr_trap_action
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ratr, trap_action, 0x0C, 28, 4);
+
+enum mlxsw_reg_ratr_trap_id {
+	MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS0 = 0,
+	MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS1 = 1,
+};
+
+/* reg_ratr_adjacency_index_high
+ * Bits 23:16 of the adjacency_index.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ratr, adjacency_index_high, 0x0C, 16, 8);
+
+/* reg_ratr_trap_id
+ * Trap ID to be reported to CPU.
+ * Trap-ID is RTR_EGRESS0 or RTR_EGRESS1.
+ * For trap_action of NOP, MIRROR and DISCARD_ERROR
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ratr, trap_id, 0x0C, 0, 8);
+
+/* reg_ratr_eth_destination_mac
+ * MAC address of the destination next-hop.
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, ratr, eth_destination_mac, 0x12, 6);
+
+static inline void
+mlxsw_reg_ratr_pack(char *payload,
+		    enum mlxsw_reg_ratr_op op, bool valid,
+		    u32 adjacency_index, u16 egress_rif)
+{
+	MLXSW_REG_ZERO(ratr, payload);
+	mlxsw_reg_ratr_op_set(payload, op);
+	mlxsw_reg_ratr_v_set(payload, valid);
+	mlxsw_reg_ratr_adjacency_index_low_set(payload, adjacency_index);
+	mlxsw_reg_ratr_adjacency_index_high_set(payload, adjacency_index >> 16);
+	mlxsw_reg_ratr_egress_router_interface_set(payload, egress_rif);
+}
+
+static inline void mlxsw_reg_ratr_eth_entry_pack(char *payload,
+						 const char *dest_mac)
+{
+	mlxsw_reg_ratr_eth_destination_mac_memcpy_to(payload, dest_mac);
+}
+
+/* RALTA - Router Algorithmic LPM Tree Allocation Register
+ * -------------------------------------------------------
+ * RALTA is used to allocate the LPM trees of the SHSPM method.
+ */
+#define MLXSW_REG_RALTA_ID 0x8010
+#define MLXSW_REG_RALTA_LEN 0x04
+
+static const struct mlxsw_reg_info mlxsw_reg_ralta = {
+	.id = MLXSW_REG_RALTA_ID,
+	.len = MLXSW_REG_RALTA_LEN,
+};
+
+/* reg_ralta_op
+ * opcode (valid for Write, must be 0 on Read)
+ * 0 - allocate a tree
+ * 1 - deallocate a tree
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, ralta, op, 0x00, 28, 2);
+
+enum mlxsw_reg_ralxx_protocol {
+	MLXSW_REG_RALXX_PROTOCOL_IPV4,
+	MLXSW_REG_RALXX_PROTOCOL_IPV6,
+};
+
+/* reg_ralta_protocol
+ * Protocol.
+ * Deallocation opcode: Reserved.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ralta, protocol, 0x00, 24, 4);
+
+/* reg_ralta_tree_id
+ * An identifier (numbered from 1..cap_shspm_max_trees-1) representing
+ * the tree identifier (managed by software).
+ * Note that tree_id 0 is allocated for a default-route tree.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ralta, tree_id, 0x00, 0, 8);
+
+static inline void mlxsw_reg_ralta_pack(char *payload, bool alloc,
+					enum mlxsw_reg_ralxx_protocol protocol,
+					u8 tree_id)
+{
+	MLXSW_REG_ZERO(ralta, payload);
+	mlxsw_reg_ralta_op_set(payload, !alloc);
+	mlxsw_reg_ralta_protocol_set(payload, protocol);
+	mlxsw_reg_ralta_tree_id_set(payload, tree_id);
+}
+
+/* RALST - Router Algorithmic LPM Structure Tree Register
+ * ------------------------------------------------------
+ * RALST is used to set and query the structure of an LPM tree.
+ * The structure of the tree must be sorted as a sorted binary tree, while
+ * each node is a bin that is tagged as the length of the prefixes the lookup
+ * will refer to. Therefore, bin X refers to a set of entries with prefixes
+ * of X bits to match with the destination address. The bin 0 indicates
+ * the default action, when there is no match of any prefix.
+ */
+#define MLXSW_REG_RALST_ID 0x8011
+#define MLXSW_REG_RALST_LEN 0x104
+
+static const struct mlxsw_reg_info mlxsw_reg_ralst = {
+	.id = MLXSW_REG_RALST_ID,
+	.len = MLXSW_REG_RALST_LEN,
+};
+
+/* reg_ralst_root_bin
+ * The bin number of the root bin.
+ * 0<root_bin=<(length of IP address)
+ * For a default-route tree configure 0xff
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ralst, root_bin, 0x00, 16, 8);
+
+/* reg_ralst_tree_id
+ * Tree identifier numbered from 1..(cap_shspm_max_trees-1).
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ralst, tree_id, 0x00, 0, 8);
+
+#define MLXSW_REG_RALST_BIN_NO_CHILD 0xff
+#define MLXSW_REG_RALST_BIN_OFFSET 0x04
+#define MLXSW_REG_RALST_BIN_COUNT 128
+
+/* reg_ralst_left_child_bin
+ * Holding the children of the bin according to the stored tree's structure.
+ * For trees composed of less than 4 blocks, the bins in excess are reserved.
+ * Note that tree_id 0 is allocated for a default-route tree, bins are 0xff
+ * Access: RW
+ */
+MLXSW_ITEM16_INDEXED(reg, ralst, left_child_bin, 0x04, 8, 8, 0x02, 0x00, false);
+
+/* reg_ralst_right_child_bin
+ * Holding the children of the bin according to the stored tree's structure.
+ * For trees composed of less than 4 blocks, the bins in excess are reserved.
+ * Note that tree_id 0 is allocated for a default-route tree, bins are 0xff
+ * Access: RW
+ */
+MLXSW_ITEM16_INDEXED(reg, ralst, right_child_bin, 0x04, 0, 8, 0x02, 0x00,
+		     false);
+
+static inline void mlxsw_reg_ralst_pack(char *payload, u8 root_bin, u8 tree_id)
+{
+	MLXSW_REG_ZERO(ralst, payload);
+
+	/* Initialize all bins to have no left or right child */
+	memset(payload + MLXSW_REG_RALST_BIN_OFFSET,
+	       MLXSW_REG_RALST_BIN_NO_CHILD, MLXSW_REG_RALST_BIN_COUNT * 2);
+
+	mlxsw_reg_ralst_root_bin_set(payload, root_bin);
+	mlxsw_reg_ralst_tree_id_set(payload, tree_id);
+}
+
+static inline void mlxsw_reg_ralst_bin_pack(char *payload, u8 bin_number,
+					    u8 left_child_bin,
+					    u8 right_child_bin)
+{
+	int bin_index = bin_number - 1;
+
+	mlxsw_reg_ralst_left_child_bin_set(payload, bin_index, left_child_bin);
+	mlxsw_reg_ralst_right_child_bin_set(payload, bin_index,
+					    right_child_bin);
+}
+
+/* RALTB - Router Algorithmic LPM Tree Binding Register
+ * ----------------------------------------------------
+ * RALTB is used to bind virtual router and protocol to an allocated LPM tree.
+ */
+#define MLXSW_REG_RALTB_ID 0x8012
+#define MLXSW_REG_RALTB_LEN 0x04
+
+static const struct mlxsw_reg_info mlxsw_reg_raltb = {
+	.id = MLXSW_REG_RALTB_ID,
+	.len = MLXSW_REG_RALTB_LEN,
+};
+
+/* reg_raltb_virtual_router
+ * Virtual Router ID
+ * Range is 0..cap_max_virtual_routers-1
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, raltb, virtual_router, 0x00, 16, 16);
+
+/* reg_raltb_protocol
+ * Protocol.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, raltb, protocol, 0x00, 12, 4);
+
+/* reg_raltb_tree_id
+ * Tree to be used for the {virtual_router, protocol}
+ * Tree identifier numbered from 1..(cap_shspm_max_trees-1).
+ * By default, all Unicast IPv4 and IPv6 are bound to tree_id 0.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, raltb, tree_id, 0x00, 0, 8);
+
+static inline void mlxsw_reg_raltb_pack(char *payload, u16 virtual_router,
+					enum mlxsw_reg_ralxx_protocol protocol,
+					u8 tree_id)
+{
+	MLXSW_REG_ZERO(raltb, payload);
+	mlxsw_reg_raltb_virtual_router_set(payload, virtual_router);
+	mlxsw_reg_raltb_protocol_set(payload, protocol);
+	mlxsw_reg_raltb_tree_id_set(payload, tree_id);
+}
+
+/* RALUE - Router Algorithmic LPM Unicast Entry Register
+ * -----------------------------------------------------
+ * RALUE is used to configure and query LPM entries that serve
+ * the Unicast protocols.
+ */
+#define MLXSW_REG_RALUE_ID 0x8013
+#define MLXSW_REG_RALUE_LEN 0x38
+
+static const struct mlxsw_reg_info mlxsw_reg_ralue = {
+	.id = MLXSW_REG_RALUE_ID,
+	.len = MLXSW_REG_RALUE_LEN,
+};
+
+/* reg_ralue_protocol
+ * Protocol.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ralue, protocol, 0x00, 24, 4);
+
+enum mlxsw_reg_ralue_op {
+	/* Read operation. If entry doesn't exist, the operation fails. */
+	MLXSW_REG_RALUE_OP_QUERY_READ = 0,
+	/* Clear on read operation. Used to read entry and
+	 * clear Activity bit.
+	 */
+	MLXSW_REG_RALUE_OP_QUERY_CLEAR = 1,
+	/* Write operation. Used to write a new entry to the table. All RW
+	 * fields are written for new entry. Activity bit is set
+	 * for new entries.
+	 */
+	MLXSW_REG_RALUE_OP_WRITE_WRITE = 0,
+	/* Update operation. Used to update an existing route entry and
+	 * only update the RW fields that are detailed in the field
+	 * op_u_mask. If entry doesn't exist, the operation fails.
+	 */
+	MLXSW_REG_RALUE_OP_WRITE_UPDATE = 1,
+	/* Clear activity. The Activity bit (the field a) is cleared
+	 * for the entry.
+	 */
+	MLXSW_REG_RALUE_OP_WRITE_CLEAR = 2,
+	/* Delete operation. Used to delete an existing entry. If entry
+	 * doesn't exist, the operation fails.
+	 */
+	MLXSW_REG_RALUE_OP_WRITE_DELETE = 3,
+};
+
+/* reg_ralue_op
+ * Operation.
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, ralue, op, 0x00, 20, 3);
+
+/* reg_ralue_a
+ * Activity. Set for new entries. Set if a packet lookup has hit on the
+ * specific entry, only if the entry is a route. To clear the a bit, use
+ * "clear activity" op.
+ * Enabled by activity_dis in RGCR
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, ralue, a, 0x00, 16, 1);
+
+/* reg_ralue_virtual_router
+ * Virtual Router ID
+ * Range is 0..cap_max_virtual_routers-1
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ralue, virtual_router, 0x04, 16, 16);
+
+#define MLXSW_REG_RALUE_OP_U_MASK_ENTRY_TYPE	BIT(0)
+#define MLXSW_REG_RALUE_OP_U_MASK_BMP_LEN	BIT(1)
+#define MLXSW_REG_RALUE_OP_U_MASK_ACTION	BIT(2)
+
+/* reg_ralue_op_u_mask
+ * opcode update mask.
+ * On read operation, this field is reserved.
+ * This field is valid for update opcode, otherwise - reserved.
+ * This field is a bitmask of the fields that should be updated.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, ralue, op_u_mask, 0x04, 8, 3);
+
+/* reg_ralue_prefix_len
+ * Number of bits in the prefix of the LPM route.
+ * Note that for IPv6 prefixes, if prefix_len>64 the entry consumes
+ * two entries in the physical HW table.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ralue, prefix_len, 0x08, 0, 8);
+
+/* reg_ralue_dip*
+ * The prefix of the route or of the marker that the object of the LPM
+ * is compared with. The most significant bits of the dip are the prefix.
+ * The list significant bits must be '0' if the prefix_len is smaller
+ * than 128 for IPv6 or smaller than 32 for IPv4.
+ * IPv4 address uses bits dip[31:0] and bits dip[127:32] are reserved.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ralue, dip4, 0x18, 0, 32);
+
+enum mlxsw_reg_ralue_entry_type {
+	MLXSW_REG_RALUE_ENTRY_TYPE_MARKER_ENTRY = 1,
+	MLXSW_REG_RALUE_ENTRY_TYPE_ROUTE_ENTRY = 2,
+	MLXSW_REG_RALUE_ENTRY_TYPE_MARKER_AND_ROUTE_ENTRY = 3,
+};
+
+/* reg_ralue_entry_type
+ * Entry type.
+ * Note - for Marker entries, the action_type and action fields are reserved.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ralue, entry_type, 0x1C, 30, 2);
+
+/* reg_ralue_bmp_len
+ * The best match prefix length in the case that there is no match for
+ * longer prefixes.
+ * If (entry_type != MARKER_ENTRY), bmp_len must be equal to prefix_len
+ * Note for any update operation with entry_type modification this
+ * field must be set.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ralue, bmp_len, 0x1C, 16, 8);
+
+enum mlxsw_reg_ralue_action_type {
+	MLXSW_REG_RALUE_ACTION_TYPE_REMOTE,
+	MLXSW_REG_RALUE_ACTION_TYPE_LOCAL,
+	MLXSW_REG_RALUE_ACTION_TYPE_IP2ME,
+};
+
+/* reg_ralue_action_type
+ * Action Type
+ * Indicates how the IP address is connected.
+ * It can be connected to a local subnet through local_erif or can be
+ * on a remote subnet connected through a next-hop router,
+ * or transmitted to the CPU.
+ * Reserved when entry_type = MARKER_ENTRY
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ralue, action_type, 0x1C, 0, 2);
+
+enum mlxsw_reg_ralue_trap_action {
+	MLXSW_REG_RALUE_TRAP_ACTION_NOP,
+	MLXSW_REG_RALUE_TRAP_ACTION_TRAP,
+	MLXSW_REG_RALUE_TRAP_ACTION_MIRROR_TO_CPU,
+	MLXSW_REG_RALUE_TRAP_ACTION_MIRROR,
+	MLXSW_REG_RALUE_TRAP_ACTION_DISCARD_ERROR,
+};
+
+/* reg_ralue_trap_action
+ * Trap action.
+ * For IP2ME action, only NOP and MIRROR are possible.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ralue, trap_action, 0x20, 28, 4);
+
+/* reg_ralue_trap_id
+ * Trap ID to be reported to CPU.
+ * Trap ID is RTR_INGRESS0 or RTR_INGRESS1.
+ * For trap_action of NOP, MIRROR and DISCARD_ERROR, trap_id is reserved.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ralue, trap_id, 0x20, 0, 9);
+
+/* reg_ralue_adjacency_index
+ * Points to the first entry of the group-based ECMP.
+ * Only relevant in case of REMOTE action.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ralue, adjacency_index, 0x24, 0, 24);
+
+/* reg_ralue_ecmp_size
+ * Amount of sequential entries starting
+ * from the adjacency_index (the number of ECMPs).
+ * The valid range is 1-64, 512, 1024, 2048 and 4096.
+ * Reserved when trap_action is TRAP or DISCARD_ERROR.
+ * Only relevant in case of REMOTE action.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ralue, ecmp_size, 0x28, 0, 13);
+
+/* reg_ralue_local_erif
+ * Egress Router Interface.
+ * Only relevant in case of LOCAL action.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ralue, local_erif, 0x24, 0, 16);
+
+/* reg_ralue_v
+ * Valid bit for the tunnel_ptr field.
+ * If valid = 0 then trap to CPU as IP2ME trap ID.
+ * If valid = 1 and the packet format allows NVE or IPinIP tunnel
+ * decapsulation then tunnel decapsulation is done.
+ * If valid = 1 and packet format does not allow NVE or IPinIP tunnel
+ * decapsulation then trap as IP2ME trap ID.
+ * Only relevant in case of IP2ME action.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ralue, v, 0x24, 31, 1);
+
+/* reg_ralue_tunnel_ptr
+ * Tunnel Pointer for NVE or IPinIP tunnel decapsulation.
+ * For Spectrum, pointer to KVD Linear.
+ * Only relevant in case of IP2ME action.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ralue, tunnel_ptr, 0x24, 0, 24);
+
+static inline void mlxsw_reg_ralue_pack(char *payload,
+					enum mlxsw_reg_ralxx_protocol protocol,
+					enum mlxsw_reg_ralue_op op,
+					u16 virtual_router, u8 prefix_len)
+{
+	MLXSW_REG_ZERO(ralue, payload);
+	mlxsw_reg_ralue_protocol_set(payload, protocol);
+	mlxsw_reg_ralue_virtual_router_set(payload, virtual_router);
+	mlxsw_reg_ralue_prefix_len_set(payload, prefix_len);
+	mlxsw_reg_ralue_entry_type_set(payload,
+				       MLXSW_REG_RALUE_ENTRY_TYPE_ROUTE_ENTRY);
+	mlxsw_reg_ralue_bmp_len_set(payload, prefix_len);
+}
+
+static inline void mlxsw_reg_ralue_pack4(char *payload,
+					 enum mlxsw_reg_ralxx_protocol protocol,
+					 enum mlxsw_reg_ralue_op op,
+					 u16 virtual_router, u8 prefix_len,
+					 u32 dip)
+{
+	mlxsw_reg_ralue_pack(payload, protocol, op, virtual_router, prefix_len);
+	mlxsw_reg_ralue_dip4_set(payload, dip);
+}
+
+static inline void
+mlxsw_reg_ralue_act_remote_pack(char *payload,
+				enum mlxsw_reg_ralue_trap_action trap_action,
+				u16 trap_id, u32 adjacency_index, u16 ecmp_size)
+{
+	mlxsw_reg_ralue_action_type_set(payload,
+					MLXSW_REG_RALUE_ACTION_TYPE_REMOTE);
+	mlxsw_reg_ralue_trap_action_set(payload, trap_action);
+	mlxsw_reg_ralue_trap_id_set(payload, trap_id);
+	mlxsw_reg_ralue_adjacency_index_set(payload, adjacency_index);
+	mlxsw_reg_ralue_ecmp_size_set(payload, ecmp_size);
+}
+
+static inline void
+mlxsw_reg_ralue_act_local_pack(char *payload,
+			       enum mlxsw_reg_ralue_trap_action trap_action,
+			       u16 trap_id, u16 local_erif)
+{
+	mlxsw_reg_ralue_action_type_set(payload,
+					MLXSW_REG_RALUE_ACTION_TYPE_LOCAL);
+	mlxsw_reg_ralue_trap_action_set(payload, trap_action);
+	mlxsw_reg_ralue_trap_id_set(payload, trap_id);
+	mlxsw_reg_ralue_local_erif_set(payload, local_erif);
+}
+
+static inline void
+mlxsw_reg_ralue_act_ip2me_pack(char *payload)
+{
+	mlxsw_reg_ralue_action_type_set(payload,
+					MLXSW_REG_RALUE_ACTION_TYPE_IP2ME);
+}
+
+/* RAUHT - Router Algorithmic LPM Unicast Host Table Register
+ * ----------------------------------------------------------
+ * The RAUHT register is used to configure and query the Unicast Host table in
+ * devices that implement the Algorithmic LPM.
+ */
+#define MLXSW_REG_RAUHT_ID 0x8014
+#define MLXSW_REG_RAUHT_LEN 0x74
+
+static const struct mlxsw_reg_info mlxsw_reg_rauht = {
+	.id = MLXSW_REG_RAUHT_ID,
+	.len = MLXSW_REG_RAUHT_LEN,
+};
+
+enum mlxsw_reg_rauht_type {
+	MLXSW_REG_RAUHT_TYPE_IPV4,
+	MLXSW_REG_RAUHT_TYPE_IPV6,
+};
+
+/* reg_rauht_type
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rauht, type, 0x00, 24, 2);
+
+enum mlxsw_reg_rauht_op {
+	MLXSW_REG_RAUHT_OP_QUERY_READ = 0,
+	/* Read operation */
+	MLXSW_REG_RAUHT_OP_QUERY_CLEAR_ON_READ = 1,
+	/* Clear on read operation. Used to read entry and clear
+	 * activity bit.
+	 */
+	MLXSW_REG_RAUHT_OP_WRITE_ADD = 0,
+	/* Add. Used to write a new entry to the table. All R/W fields are
+	 * relevant for new entry. Activity bit is set for new entries.
+	 */
+	MLXSW_REG_RAUHT_OP_WRITE_UPDATE = 1,
+	/* Update action. Used to update an existing route entry and
+	 * only update the following fields:
+	 * trap_action, trap_id, mac, counter_set_type, counter_index
+	 */
+	MLXSW_REG_RAUHT_OP_WRITE_CLEAR_ACTIVITY = 2,
+	/* Clear activity. A bit is cleared for the entry. */
+	MLXSW_REG_RAUHT_OP_WRITE_DELETE = 3,
+	/* Delete entry */
+	MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL = 4,
+	/* Delete all host entries on a RIF. In this command, dip
+	 * field is reserved.
+	 */
+};
+
+/* reg_rauht_op
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, rauht, op, 0x00, 20, 3);
+
+/* reg_rauht_a
+ * Activity. Set for new entries. Set if a packet lookup has hit on
+ * the specific entry.
+ * To clear the a bit, use "clear activity" op.
+ * Enabled by activity_dis in RGCR
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, rauht, a, 0x00, 16, 1);
+
+/* reg_rauht_rif
+ * Router Interface
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rauht, rif, 0x00, 0, 16);
+
+/* reg_rauht_dip*
+ * Destination address.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rauht, dip4, 0x1C, 0x0, 32);
+
+enum mlxsw_reg_rauht_trap_action {
+	MLXSW_REG_RAUHT_TRAP_ACTION_NOP,
+	MLXSW_REG_RAUHT_TRAP_ACTION_TRAP,
+	MLXSW_REG_RAUHT_TRAP_ACTION_MIRROR_TO_CPU,
+	MLXSW_REG_RAUHT_TRAP_ACTION_MIRROR,
+	MLXSW_REG_RAUHT_TRAP_ACTION_DISCARD_ERRORS,
+};
+
+/* reg_rauht_trap_action
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rauht, trap_action, 0x60, 28, 4);
+
+enum mlxsw_reg_rauht_trap_id {
+	MLXSW_REG_RAUHT_TRAP_ID_RTR_EGRESS0,
+	MLXSW_REG_RAUHT_TRAP_ID_RTR_EGRESS1,
+};
+
+/* reg_rauht_trap_id
+ * Trap ID to be reported to CPU.
+ * Trap-ID is RTR_EGRESS0 or RTR_EGRESS1.
+ * For trap_action of NOP, MIRROR and DISCARD_ERROR,
+ * trap_id is reserved.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rauht, trap_id, 0x60, 0, 9);
+
+/* reg_rauht_counter_set_type
+ * Counter set type for flow counters
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rauht, counter_set_type, 0x68, 24, 8);
+
+/* reg_rauht_counter_index
+ * Counter index for flow counters
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rauht, counter_index, 0x68, 0, 24);
+
+/* reg_rauht_mac
+ * MAC address.
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, rauht, mac, 0x6E, 6);
+
+static inline void mlxsw_reg_rauht_pack(char *payload,
+					enum mlxsw_reg_rauht_op op, u16 rif,
+					const char *mac)
+{
+	MLXSW_REG_ZERO(rauht, payload);
+	mlxsw_reg_rauht_op_set(payload, op);
+	mlxsw_reg_rauht_rif_set(payload, rif);
+	mlxsw_reg_rauht_mac_memcpy_to(payload, mac);
+}
+
+static inline void mlxsw_reg_rauht_pack4(char *payload,
+					 enum mlxsw_reg_rauht_op op, u16 rif,
+					 const char *mac, u32 dip)
+{
+	mlxsw_reg_rauht_pack(payload, op, rif, mac);
+	mlxsw_reg_rauht_dip4_set(payload, dip);
+}
+
+/* RALEU - Router Algorithmic LPM ECMP Update Register
+ * ---------------------------------------------------
+ * The register enables updating the ECMP section in the action for multiple
+ * LPM Unicast entries in a single operation. The update is executed to
+ * all entries of a {virtual router, protocol} tuple using the same ECMP group.
+ */
+#define MLXSW_REG_RALEU_ID 0x8015
+#define MLXSW_REG_RALEU_LEN 0x28
+
+static const struct mlxsw_reg_info mlxsw_reg_raleu = {
+	.id = MLXSW_REG_RALEU_ID,
+	.len = MLXSW_REG_RALEU_LEN,
+};
+
+/* reg_raleu_protocol
+ * Protocol.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, raleu, protocol, 0x00, 24, 4);
+
+/* reg_raleu_virtual_router
+ * Virtual Router ID
+ * Range is 0..cap_max_virtual_routers-1
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, raleu, virtual_router, 0x00, 0, 16);
+
+/* reg_raleu_adjacency_index
+ * Adjacency Index used for matching on the existing entries.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, raleu, adjacency_index, 0x10, 0, 24);
+
+/* reg_raleu_ecmp_size
+ * ECMP Size used for matching on the existing entries.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, raleu, ecmp_size, 0x14, 0, 13);
+
+/* reg_raleu_new_adjacency_index
+ * New Adjacency Index.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, raleu, new_adjacency_index, 0x20, 0, 24);
+
+/* reg_raleu_new_ecmp_size
+ * New ECMP Size.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, raleu, new_ecmp_size, 0x24, 0, 13);
+
+static inline void mlxsw_reg_raleu_pack(char *payload,
+					enum mlxsw_reg_ralxx_protocol protocol,
+					u16 virtual_router,
+					u32 adjacency_index, u16 ecmp_size,
+					u32 new_adjacency_index,
+					u16 new_ecmp_size)
+{
+	MLXSW_REG_ZERO(raleu, payload);
+	mlxsw_reg_raleu_protocol_set(payload, protocol);
+	mlxsw_reg_raleu_virtual_router_set(payload, virtual_router);
+	mlxsw_reg_raleu_adjacency_index_set(payload, adjacency_index);
+	mlxsw_reg_raleu_ecmp_size_set(payload, ecmp_size);
+	mlxsw_reg_raleu_new_adjacency_index_set(payload, new_adjacency_index);
+	mlxsw_reg_raleu_new_ecmp_size_set(payload, new_ecmp_size);
+}
+
+/* RAUHTD - Router Algorithmic LPM Unicast Host Table Dump Register
+ * ----------------------------------------------------------------
+ * The RAUHTD register allows dumping entries from the Router Unicast Host
+ * Table. For a given session an entry is dumped no more than one time. The
+ * first RAUHTD access after reset is a new session. A session ends when the
+ * num_rec response is smaller than num_rec request or for IPv4 when the
+ * num_entries is smaller than 4. The clear activity affect the current session
+ * or the last session if a new session has not started.
+ */
+#define MLXSW_REG_RAUHTD_ID 0x8018
+#define MLXSW_REG_RAUHTD_BASE_LEN 0x20
+#define MLXSW_REG_RAUHTD_REC_LEN 0x20
+#define MLXSW_REG_RAUHTD_REC_MAX_NUM 32
+#define MLXSW_REG_RAUHTD_LEN (MLXSW_REG_RAUHTD_BASE_LEN + \
+		MLXSW_REG_RAUHTD_REC_MAX_NUM * MLXSW_REG_RAUHTD_REC_LEN)
+#define MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC 4
+
+static const struct mlxsw_reg_info mlxsw_reg_rauhtd = {
+	.id = MLXSW_REG_RAUHTD_ID,
+	.len = MLXSW_REG_RAUHTD_LEN,
+};
+
+#define MLXSW_REG_RAUHTD_FILTER_A BIT(0)
+#define MLXSW_REG_RAUHTD_FILTER_RIF BIT(3)
+
+/* reg_rauhtd_filter_fields
+ * if a bit is '0' then the relevant field is ignored and dump is done
+ * regardless of the field value
+ * Bit0 - filter by activity: entry_a
+ * Bit3 - filter by entry rip: entry_rif
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rauhtd, filter_fields, 0x00, 0, 8);
+
+enum mlxsw_reg_rauhtd_op {
+	MLXSW_REG_RAUHTD_OP_DUMP,
+	MLXSW_REG_RAUHTD_OP_DUMP_AND_CLEAR,
+};
+
+/* reg_rauhtd_op
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, rauhtd, op, 0x04, 24, 2);
+
+/* reg_rauhtd_num_rec
+ * At request: number of records requested
+ * At response: number of records dumped
+ * For IPv4, each record has 4 entries at request and up to 4 entries
+ * at response
+ * Range is 0..MLXSW_REG_RAUHTD_REC_MAX_NUM
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rauhtd, num_rec, 0x04, 0, 8);
+
+/* reg_rauhtd_entry_a
+ * Dump only if activity has value of entry_a
+ * Reserved if filter_fields bit0 is '0'
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rauhtd, entry_a, 0x08, 16, 1);
+
+enum mlxsw_reg_rauhtd_type {
+	MLXSW_REG_RAUHTD_TYPE_IPV4,
+	MLXSW_REG_RAUHTD_TYPE_IPV6,
+};
+
+/* reg_rauhtd_type
+ * Dump only if record type is:
+ * 0 - IPv4
+ * 1 - IPv6
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rauhtd, type, 0x08, 0, 4);
+
+/* reg_rauhtd_entry_rif
+ * Dump only if RIF has value of entry_rif
+ * Reserved if filter_fields bit3 is '0'
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rauhtd, entry_rif, 0x0C, 0, 16);
+
+static inline void mlxsw_reg_rauhtd_pack(char *payload,
+					 enum mlxsw_reg_rauhtd_type type)
+{
+	MLXSW_REG_ZERO(rauhtd, payload);
+	mlxsw_reg_rauhtd_filter_fields_set(payload, MLXSW_REG_RAUHTD_FILTER_A);
+	mlxsw_reg_rauhtd_op_set(payload, MLXSW_REG_RAUHTD_OP_DUMP_AND_CLEAR);
+	mlxsw_reg_rauhtd_num_rec_set(payload, MLXSW_REG_RAUHTD_REC_MAX_NUM);
+	mlxsw_reg_rauhtd_entry_a_set(payload, 1);
+	mlxsw_reg_rauhtd_type_set(payload, type);
+}
+
+/* reg_rauhtd_ipv4_rec_num_entries
+ * Number of valid entries in this record:
+ * 0 - 1 valid entry
+ * 1 - 2 valid entries
+ * 2 - 3 valid entries
+ * 3 - 4 valid entries
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_rec_num_entries,
+		     MLXSW_REG_RAUHTD_BASE_LEN, 28, 2,
+		     MLXSW_REG_RAUHTD_REC_LEN, 0x00, false);
+
+/* reg_rauhtd_rec_type
+ * Record type.
+ * 0 - IPv4
+ * 1 - IPv6
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, rauhtd, rec_type, MLXSW_REG_RAUHTD_BASE_LEN, 24, 2,
+		     MLXSW_REG_RAUHTD_REC_LEN, 0x00, false);
+
+#define MLXSW_REG_RAUHTD_IPV4_ENT_LEN 0x8
+
+/* reg_rauhtd_ipv4_ent_a
+ * Activity. Set for new entries. Set if a packet lookup has hit on the
+ * specific entry.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_a, MLXSW_REG_RAUHTD_BASE_LEN, 16, 1,
+		     MLXSW_REG_RAUHTD_IPV4_ENT_LEN, 0x00, false);
+
+/* reg_rauhtd_ipv4_ent_rif
+ * Router interface.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_rif, MLXSW_REG_RAUHTD_BASE_LEN, 0,
+		     16, MLXSW_REG_RAUHTD_IPV4_ENT_LEN, 0x00, false);
+
+/* reg_rauhtd_ipv4_ent_dip
+ * Destination IPv4 address.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_dip, MLXSW_REG_RAUHTD_BASE_LEN, 0,
+		     32, MLXSW_REG_RAUHTD_IPV4_ENT_LEN, 0x04, false);
+
+static inline void mlxsw_reg_rauhtd_ent_ipv4_unpack(char *payload,
+						    int ent_index, u16 *p_rif,
+						    u32 *p_dip)
+{
+	*p_rif = mlxsw_reg_rauhtd_ipv4_ent_rif_get(payload, ent_index);
+	*p_dip = mlxsw_reg_rauhtd_ipv4_ent_dip_get(payload, ent_index);
+}
+
 /* MFCR - Management Fan Control Register
  * --------------------------------------
  * This register controls the settings of the Fan Speed PWM mechanism.
@@ -3924,6 +5104,26 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id)
 		return "HTGT";
 	case MLXSW_REG_HPKT_ID:
 		return "HPKT";
+	case MLXSW_REG_RGCR_ID:
+		return "RGCR";
+	case MLXSW_REG_RITR_ID:
+		return "RITR";
+	case MLXSW_REG_RATR_ID:
+		return "RATR";
+	case MLXSW_REG_RALTA_ID:
+		return "RALTA";
+	case MLXSW_REG_RALST_ID:
+		return "RALST";
+	case MLXSW_REG_RALTB_ID:
+		return "RALTB";
+	case MLXSW_REG_RALUE_ID:
+		return "RALUE";
+	case MLXSW_REG_RAUHT_ID:
+		return "RAUHT";
+	case MLXSW_REG_RALEU_ID:
+		return "RALEU";
+	case MLXSW_REG_RAUHTD_ID:
+		return "RAUHTD";
 	case MLXSW_REG_MFCR_ID:
 		return "MFCR";
 	case MLXSW_REG_MFSC_ID:
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index d23948b88962..c812513e079d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -51,6 +51,7 @@
 #include <linux/list.h>
 #include <linux/notifier.h>
 #include <linux/dcbnl.h>
+#include <linux/inetdevice.h>
 #include <net/switchdev.h>
 #include <generated/utsrelease.h>
 
@@ -210,23 +211,6 @@ static int mlxsw_sp_port_dev_addr_init(struct mlxsw_sp_port *mlxsw_sp_port)
 	return mlxsw_sp_port_dev_addr_set(mlxsw_sp_port, addr);
 }
 
-static int mlxsw_sp_port_stp_state_set(struct mlxsw_sp_port *mlxsw_sp_port,
-				       u16 vid, enum mlxsw_reg_spms_state state)
-{
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-	char *spms_pl;
-	int err;
-
-	spms_pl = kmalloc(MLXSW_REG_SPMS_LEN, GFP_KERNEL);
-	if (!spms_pl)
-		return -ENOMEM;
-	mlxsw_reg_spms_pack(spms_pl, mlxsw_sp_port->local_port);
-	mlxsw_reg_spms_vid_pack(spms_pl, vid, state);
-	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spms), spms_pl);
-	kfree(spms_pl);
-	return err;
-}
-
 static int mlxsw_sp_port_mtu_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mtu)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
@@ -409,7 +393,11 @@ static netdev_tx_t mlxsw_sp_port_xmit(struct sk_buff *skb,
 	}
 
 	mlxsw_sp_txhdr_construct(skb, &tx_info);
-	len = skb->len;
+	/* TX header is consumed by HW on the way so we shouldn't count its
+	 * bytes as being sent.
+	 */
+	len = skb->len - MLXSW_TXHDR_LEN;
+
 	/* Due to a race we might fail here because of a full queue. In that
 	 * unlikely case we simply drop the packet.
 	 */
@@ -633,87 +621,6 @@ static int mlxsw_sp_port_vlan_mode_trans(struct mlxsw_sp_port *mlxsw_sp_port)
 	return 0;
 }
 
-static struct mlxsw_sp_fid *
-mlxsw_sp_vfid_find(const struct mlxsw_sp *mlxsw_sp, u16 vid)
-{
-	struct mlxsw_sp_fid *f;
-
-	list_for_each_entry(f, &mlxsw_sp->port_vfids.list, list) {
-		if (f->vid == vid)
-			return f;
-	}
-
-	return NULL;
-}
-
-static u16 mlxsw_sp_avail_vfid_get(const struct mlxsw_sp *mlxsw_sp)
-{
-	return find_first_zero_bit(mlxsw_sp->port_vfids.mapped,
-				   MLXSW_SP_VFID_PORT_MAX);
-}
-
-static int mlxsw_sp_vfid_op(struct mlxsw_sp *mlxsw_sp, u16 fid, bool create)
-{
-	char sfmr_pl[MLXSW_REG_SFMR_LEN];
-
-	mlxsw_reg_sfmr_pack(sfmr_pl, !create, fid, 0);
-	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl);
-}
-
-static void mlxsw_sp_vport_vfid_leave(struct mlxsw_sp_port *mlxsw_sp_vport);
-
-static struct mlxsw_sp_fid *mlxsw_sp_vfid_create(struct mlxsw_sp *mlxsw_sp,
-						 u16 vid)
-{
-	struct device *dev = mlxsw_sp->bus_info->dev;
-	struct mlxsw_sp_fid *f;
-	u16 vfid, fid;
-	int err;
-
-	vfid = mlxsw_sp_avail_vfid_get(mlxsw_sp);
-	if (vfid == MLXSW_SP_VFID_PORT_MAX) {
-		dev_err(dev, "No available vFIDs\n");
-		return ERR_PTR(-ERANGE);
-	}
-
-	fid = mlxsw_sp_vfid_to_fid(vfid);
-	err = mlxsw_sp_vfid_op(mlxsw_sp, fid, true);
-	if (err) {
-		dev_err(dev, "Failed to create FID=%d\n", fid);
-		return ERR_PTR(err);
-	}
-
-	f = kzalloc(sizeof(*f), GFP_KERNEL);
-	if (!f)
-		goto err_allocate_vfid;
-
-	f->leave = mlxsw_sp_vport_vfid_leave;
-	f->fid = fid;
-	f->vid = vid;
-
-	list_add(&f->list, &mlxsw_sp->port_vfids.list);
-	set_bit(vfid, mlxsw_sp->port_vfids.mapped);
-
-	return f;
-
-err_allocate_vfid:
-	mlxsw_sp_vfid_op(mlxsw_sp, fid, false);
-	return ERR_PTR(-ENOMEM);
-}
-
-static void mlxsw_sp_vfid_destroy(struct mlxsw_sp *mlxsw_sp,
-				  struct mlxsw_sp_fid *f)
-{
-	u16 vfid = mlxsw_sp_fid_to_vfid(f->fid);
-
-	clear_bit(vfid, mlxsw_sp->port_vfids.mapped);
-	list_del(&f->list);
-
-	mlxsw_sp_vfid_op(mlxsw_sp, f->fid, false);
-
-	kfree(f);
-}
-
 static struct mlxsw_sp_port *
 mlxsw_sp_port_vport_create(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid)
 {
@@ -746,72 +653,12 @@ static void mlxsw_sp_port_vport_destroy(struct mlxsw_sp_port *mlxsw_sp_vport)
 	kfree(mlxsw_sp_vport);
 }
 
-static int mlxsw_sp_vport_fid_map(struct mlxsw_sp_port *mlxsw_sp_vport, u16 fid,
-				  bool valid)
-{
-	enum mlxsw_reg_svfa_mt mt = MLXSW_REG_SVFA_MT_PORT_VID_TO_FID;
-	u16 vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport);
-
-	return mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport, mt, valid, fid,
-					    vid);
-}
-
-static int mlxsw_sp_vport_vfid_join(struct mlxsw_sp_port *mlxsw_sp_vport)
-{
-	u16 vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport);
-	struct mlxsw_sp_fid *f;
-	int err;
-
-	f = mlxsw_sp_vfid_find(mlxsw_sp_vport->mlxsw_sp, vid);
-	if (!f) {
-		f = mlxsw_sp_vfid_create(mlxsw_sp_vport->mlxsw_sp, vid);
-		if (IS_ERR(f))
-			return PTR_ERR(f);
-	}
-
-	if (!f->ref_count) {
-		err = mlxsw_sp_vport_flood_set(mlxsw_sp_vport, f->fid, true);
-		if (err)
-			goto err_vport_flood_set;
-	}
-
-	err = mlxsw_sp_vport_fid_map(mlxsw_sp_vport, f->fid, true);
-	if (err)
-		goto err_vport_fid_map;
-
-	mlxsw_sp_vport_fid_set(mlxsw_sp_vport, f);
-	f->ref_count++;
-
-	return 0;
-
-err_vport_fid_map:
-	if (!f->ref_count)
-		mlxsw_sp_vport_flood_set(mlxsw_sp_vport, f->fid, false);
-err_vport_flood_set:
-	if (!f->ref_count)
-		mlxsw_sp_vfid_destroy(mlxsw_sp_vport->mlxsw_sp, f);
-	return err;
-}
-
-static void mlxsw_sp_vport_vfid_leave(struct mlxsw_sp_port *mlxsw_sp_vport)
-{
-	struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport);
-
-	mlxsw_sp_vport_fid_set(mlxsw_sp_vport, NULL);
-
-	mlxsw_sp_vport_fid_map(mlxsw_sp_vport, f->fid, false);
-
-	if (--f->ref_count == 0) {
-		mlxsw_sp_vport_flood_set(mlxsw_sp_vport, f->fid, false);
-		mlxsw_sp_vfid_destroy(mlxsw_sp_vport->mlxsw_sp, f);
-	}
-}
-
 int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto,
 			  u16 vid)
 {
 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
 	struct mlxsw_sp_port *mlxsw_sp_vport;
+	bool untagged = vid == 1;
 	int err;
 
 	/* VLAN 0 is added to HW filter when device goes up, but it is
@@ -843,41 +690,24 @@ int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto,
 		}
 	}
 
-	err = mlxsw_sp_vport_vfid_join(mlxsw_sp_vport);
-	if (err) {
-		netdev_err(dev, "Failed to join vFID\n");
-		goto err_vport_vfid_join;
-	}
-
 	err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, false);
 	if (err) {
 		netdev_err(dev, "Failed to disable learning for VID=%d\n", vid);
 		goto err_port_vid_learning_set;
 	}
 
-	err = mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, true, false);
+	err = mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, true, untagged);
 	if (err) {
 		netdev_err(dev, "Failed to set VLAN membership for VID=%d\n",
 			   vid);
 		goto err_port_add_vid;
 	}
 
-	err = mlxsw_sp_port_stp_state_set(mlxsw_sp_vport, vid,
-					  MLXSW_REG_SPMS_STATE_FORWARDING);
-	if (err) {
-		netdev_err(dev, "Failed to set STP state for VID=%d\n", vid);
-		goto err_port_stp_state_set;
-	}
-
 	return 0;
 
-err_port_stp_state_set:
-	mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, false, false);
 err_port_add_vid:
 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true);
 err_port_vid_learning_set:
-	mlxsw_sp_vport_vfid_leave(mlxsw_sp_vport);
-err_vport_vfid_join:
 	if (list_is_singular(&mlxsw_sp_port->vports_list))
 		mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port);
 err_port_vp_mode_trans:
@@ -885,8 +715,8 @@ err_port_vp_mode_trans:
 	return err;
 }
 
-int mlxsw_sp_port_kill_vid(struct net_device *dev,
-			   __be16 __always_unused proto, u16 vid)
+static int mlxsw_sp_port_kill_vid(struct net_device *dev,
+				  __be16 __always_unused proto, u16 vid)
 {
 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
 	struct mlxsw_sp_port *mlxsw_sp_vport;
@@ -905,13 +735,6 @@ int mlxsw_sp_port_kill_vid(struct net_device *dev,
 		return 0;
 	}
 
-	err = mlxsw_sp_port_stp_state_set(mlxsw_sp_vport, vid,
-					  MLXSW_REG_SPMS_STATE_DISCARDING);
-	if (err) {
-		netdev_err(dev, "Failed to set STP state for VID=%d\n", vid);
-		return err;
-	}
-
 	err = mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, false, false);
 	if (err) {
 		netdev_err(dev, "Failed to set VLAN membership for VID=%d\n",
@@ -980,6 +803,8 @@ static const struct net_device_ops mlxsw_sp_port_netdev_ops = {
 	.ndo_get_stats64	= mlxsw_sp_port_get_stats64,
 	.ndo_vlan_rx_add_vid	= mlxsw_sp_port_add_vid,
 	.ndo_vlan_rx_kill_vid	= mlxsw_sp_port_kill_vid,
+	.ndo_neigh_construct	= mlxsw_sp_router_neigh_construct,
+	.ndo_neigh_destroy	= mlxsw_sp_router_neigh_destroy,
 	.ndo_fdb_add		= switchdev_port_fdb_add,
 	.ndo_fdb_del		= switchdev_port_fdb_del,
 	.ndo_fdb_dump		= switchdev_port_fdb_dump,
@@ -1840,23 +1665,6 @@ err_port_active_vlans_alloc:
 	return err;
 }
 
-static void mlxsw_sp_port_vports_fini(struct mlxsw_sp_port *mlxsw_sp_port)
-{
-	struct net_device *dev = mlxsw_sp_port->dev;
-	struct mlxsw_sp_port *mlxsw_sp_vport, *tmp;
-
-	list_for_each_entry_safe(mlxsw_sp_vport, tmp,
-				 &mlxsw_sp_port->vports_list, vport.list) {
-		u16 vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport);
-
-		/* vPorts created for VLAN devices should already be gone
-		 * by now, since we unregistered the port netdev.
-		 */
-		WARN_ON(is_vlan_dev(mlxsw_sp_vport->dev));
-		mlxsw_sp_port_kill_vid(dev, 0, vid);
-	}
-}
-
 static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port)
 {
 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port];
@@ -1867,13 +1675,14 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port)
 	mlxsw_core_port_fini(&mlxsw_sp_port->core_port);
 	unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */
 	mlxsw_sp_port_dcb_fini(mlxsw_sp_port);
-	mlxsw_sp_port_vports_fini(mlxsw_sp_port);
+	mlxsw_sp_port_kill_vid(mlxsw_sp_port->dev, 0, 1);
 	mlxsw_sp_port_switchdev_fini(mlxsw_sp_port);
 	mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT);
 	mlxsw_sp_port_module_unmap(mlxsw_sp, mlxsw_sp_port->local_port);
 	free_percpu(mlxsw_sp_port->pcpu_stats);
 	kfree(mlxsw_sp_port->untagged_vlans);
 	kfree(mlxsw_sp_port->active_vlans);
+	WARN_ON_ONCE(!list_empty(&mlxsw_sp_port->vports_list));
 	free_netdev(mlxsw_sp_port->dev);
 }
 
@@ -2110,11 +1919,8 @@ static void mlxsw_sp_pude_event_func(const struct mlxsw_reg_info *reg,
 
 	local_port = mlxsw_reg_pude_local_port_get(pude_pl);
 	mlxsw_sp_port = mlxsw_sp->ports[local_port];
-	if (!mlxsw_sp_port) {
-		dev_warn(mlxsw_sp->bus_info->dev, "Port %d: Link event received for non-existent port\n",
-			 local_port);
+	if (!mlxsw_sp_port)
 		return;
-	}
 
 	status = mlxsw_reg_pude_oper_status_get(pude_pl);
 	if (status == MLXSW_PORT_OPER_STATUS_UP) {
@@ -2269,6 +2075,31 @@ static const struct mlxsw_rx_listener mlxsw_sp_rx_listener[] = {
 		.local_port = MLXSW_PORT_DONT_CARE,
 		.trap_id = MLXSW_TRAP_ID_IGMP_V3_REPORT,
 	},
+	{
+		.func = mlxsw_sp_rx_listener_func,
+		.local_port = MLXSW_PORT_DONT_CARE,
+		.trap_id = MLXSW_TRAP_ID_ARPBC,
+	},
+	{
+		.func = mlxsw_sp_rx_listener_func,
+		.local_port = MLXSW_PORT_DONT_CARE,
+		.trap_id = MLXSW_TRAP_ID_ARPUC,
+	},
+	{
+		.func = mlxsw_sp_rx_listener_func,
+		.local_port = MLXSW_PORT_DONT_CARE,
+		.trap_id = MLXSW_TRAP_ID_IP2ME,
+	},
+	{
+		.func = mlxsw_sp_rx_listener_func,
+		.local_port = MLXSW_PORT_DONT_CARE,
+		.trap_id = MLXSW_TRAP_ID_RTR_INGRESS0,
+	},
+	{
+		.func = mlxsw_sp_rx_listener_func,
+		.local_port = MLXSW_PORT_DONT_CARE,
+		.trap_id = MLXSW_TRAP_ID_HOST_MISS_IPV4,
+	},
 };
 
 static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp)
@@ -2309,7 +2140,7 @@ err_rx_trap_set:
 					  mlxsw_sp);
 err_rx_listener_register:
 	for (i--; i >= 0; i--) {
-		mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_FORWARD,
+		mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_DISCARD,
 				    mlxsw_sp_rx_listener[i].trap_id);
 		mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(hpkt), hpkt_pl);
 
@@ -2326,7 +2157,7 @@ static void mlxsw_sp_traps_fini(struct mlxsw_sp *mlxsw_sp)
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(mlxsw_sp_rx_listener); i++) {
-		mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_FORWARD,
+		mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_DISCARD,
 				    mlxsw_sp_rx_listener[i].trap_id);
 		mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(hpkt), hpkt_pl);
 
@@ -2406,8 +2237,7 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
 	mlxsw_sp->core = mlxsw_core;
 	mlxsw_sp->bus_info = mlxsw_bus_info;
 	INIT_LIST_HEAD(&mlxsw_sp->fids);
-	INIT_LIST_HEAD(&mlxsw_sp->port_vfids.list);
-	INIT_LIST_HEAD(&mlxsw_sp->br_vfids.list);
+	INIT_LIST_HEAD(&mlxsw_sp->vfids.list);
 	INIT_LIST_HEAD(&mlxsw_sp->br_mids.list);
 
 	err = mlxsw_sp_base_mac_get(mlxsw_sp);
@@ -2416,16 +2246,10 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
 		return err;
 	}
 
-	err = mlxsw_sp_ports_create(mlxsw_sp);
-	if (err) {
-		dev_err(mlxsw_sp->bus_info->dev, "Failed to create ports\n");
-		return err;
-	}
-
 	err = mlxsw_sp_event_register(mlxsw_sp, MLXSW_TRAP_ID_PUDE);
 	if (err) {
 		dev_err(mlxsw_sp->bus_info->dev, "Failed to register for PUDE events\n");
-		goto err_event_register;
+		return err;
 	}
 
 	err = mlxsw_sp_traps_init(mlxsw_sp);
@@ -2458,8 +2282,24 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
 		goto err_switchdev_init;
 	}
 
+	err = mlxsw_sp_router_init(mlxsw_sp);
+	if (err) {
+		dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize router\n");
+		goto err_router_init;
+	}
+
+	err = mlxsw_sp_ports_create(mlxsw_sp);
+	if (err) {
+		dev_err(mlxsw_sp->bus_info->dev, "Failed to create ports\n");
+		goto err_ports_create;
+	}
+
 	return 0;
 
+err_ports_create:
+	mlxsw_sp_router_fini(mlxsw_sp);
+err_router_init:
+	mlxsw_sp_switchdev_fini(mlxsw_sp);
 err_switchdev_init:
 err_lag_init:
 	mlxsw_sp_buffers_fini(mlxsw_sp);
@@ -2468,21 +2308,24 @@ err_flood_init:
 	mlxsw_sp_traps_fini(mlxsw_sp);
 err_rx_listener_register:
 	mlxsw_sp_event_unregister(mlxsw_sp, MLXSW_TRAP_ID_PUDE);
-err_event_register:
-	mlxsw_sp_ports_remove(mlxsw_sp);
 	return err;
 }
 
 static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+	int i;
 
+	mlxsw_sp_ports_remove(mlxsw_sp);
+	mlxsw_sp_router_fini(mlxsw_sp);
 	mlxsw_sp_switchdev_fini(mlxsw_sp);
 	mlxsw_sp_buffers_fini(mlxsw_sp);
 	mlxsw_sp_traps_fini(mlxsw_sp);
 	mlxsw_sp_event_unregister(mlxsw_sp, MLXSW_TRAP_ID_PUDE);
-	mlxsw_sp_ports_remove(mlxsw_sp);
+	WARN_ON(!list_empty(&mlxsw_sp->vfids.list));
 	WARN_ON(!list_empty(&mlxsw_sp->fids));
+	for (i = 0; i < MLXSW_SP_RIF_MAX; i++)
+		WARN_ON_ONCE(mlxsw_sp->rifs[i]);
 }
 
 static struct mlxsw_config_profile mlxsw_sp_config_profile = {
@@ -2513,6 +2356,10 @@ static struct mlxsw_config_profile mlxsw_sp_config_profile = {
 	.max_ib_mc			= 0,
 	.used_max_pkey			= 1,
 	.max_pkey			= 0,
+	.used_kvd_sizes			= 1,
+	.kvd_linear_size		= MLXSW_SP_KVD_LINEAR_SIZE,
+	.kvd_hash_single_size		= MLXSW_SP_KVD_HASH_SINGLE_SIZE,
+	.kvd_hash_double_size		= MLXSW_SP_KVD_HASH_DOUBLE_SIZE,
 	.swid_config			= {
 		{
 			.used_type	= 1,
@@ -2544,6 +2391,559 @@ static struct mlxsw_driver mlxsw_sp_driver = {
 	.profile			= &mlxsw_sp_config_profile,
 };
 
+static bool mlxsw_sp_port_dev_check(const struct net_device *dev)
+{
+	return dev->netdev_ops == &mlxsw_sp_port_netdev_ops;
+}
+
+static struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find(struct net_device *dev)
+{
+	struct net_device *lower_dev;
+	struct list_head *iter;
+
+	if (mlxsw_sp_port_dev_check(dev))
+		return netdev_priv(dev);
+
+	netdev_for_each_all_lower_dev(dev, lower_dev, iter) {
+		if (mlxsw_sp_port_dev_check(lower_dev))
+			return netdev_priv(lower_dev);
+	}
+	return NULL;
+}
+
+static struct mlxsw_sp *mlxsw_sp_lower_get(struct net_device *dev)
+{
+	struct mlxsw_sp_port *mlxsw_sp_port;
+
+	mlxsw_sp_port = mlxsw_sp_port_dev_lower_find(dev);
+	return mlxsw_sp_port ? mlxsw_sp_port->mlxsw_sp : NULL;
+}
+
+static struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find_rcu(struct net_device *dev)
+{
+	struct net_device *lower_dev;
+	struct list_head *iter;
+
+	if (mlxsw_sp_port_dev_check(dev))
+		return netdev_priv(dev);
+
+	netdev_for_each_all_lower_dev_rcu(dev, lower_dev, iter) {
+		if (mlxsw_sp_port_dev_check(lower_dev))
+			return netdev_priv(lower_dev);
+	}
+	return NULL;
+}
+
+struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev)
+{
+	struct mlxsw_sp_port *mlxsw_sp_port;
+
+	rcu_read_lock();
+	mlxsw_sp_port = mlxsw_sp_port_dev_lower_find_rcu(dev);
+	if (mlxsw_sp_port)
+		dev_hold(mlxsw_sp_port->dev);
+	rcu_read_unlock();
+	return mlxsw_sp_port;
+}
+
+void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+	dev_put(mlxsw_sp_port->dev);
+}
+
+static bool mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *r,
+				       unsigned long event)
+{
+	switch (event) {
+	case NETDEV_UP:
+		if (!r)
+			return true;
+		r->ref_count++;
+		return false;
+	case NETDEV_DOWN:
+		if (r && --r->ref_count == 0)
+			return true;
+		/* It is possible we already removed the RIF ourselves
+		 * if it was assigned to a netdev that is now a bridge
+		 * or LAG slave.
+		 */
+		return false;
+	}
+
+	return false;
+}
+
+static int mlxsw_sp_avail_rif_get(struct mlxsw_sp *mlxsw_sp)
+{
+	int i;
+
+	for (i = 0; i < MLXSW_SP_RIF_MAX; i++)
+		if (!mlxsw_sp->rifs[i])
+			return i;
+
+	return MLXSW_SP_RIF_MAX;
+}
+
+static void mlxsw_sp_vport_rif_sp_attr_get(struct mlxsw_sp_port *mlxsw_sp_vport,
+					   bool *p_lagged, u16 *p_system_port)
+{
+	u8 local_port = mlxsw_sp_vport->local_port;
+
+	*p_lagged = mlxsw_sp_vport->lagged;
+	*p_system_port = *p_lagged ? mlxsw_sp_vport->lag_id : local_port;
+}
+
+static int mlxsw_sp_vport_rif_sp_op(struct mlxsw_sp_port *mlxsw_sp_vport,
+				    struct net_device *l3_dev, u16 rif,
+				    bool create)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
+	bool lagged = mlxsw_sp_vport->lagged;
+	char ritr_pl[MLXSW_REG_RITR_LEN];
+	u16 system_port;
+
+	mlxsw_reg_ritr_pack(ritr_pl, create, MLXSW_REG_RITR_SP_IF, rif,
+			    l3_dev->mtu, l3_dev->dev_addr);
+
+	mlxsw_sp_vport_rif_sp_attr_get(mlxsw_sp_vport, &lagged, &system_port);
+	mlxsw_reg_ritr_sp_if_pack(ritr_pl, lagged, system_port,
+				  mlxsw_sp_vport_vid_get(mlxsw_sp_vport));
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
+}
+
+static void mlxsw_sp_vport_rif_sp_leave(struct mlxsw_sp_port *mlxsw_sp_vport);
+
+static struct mlxsw_sp_fid *
+mlxsw_sp_rfid_alloc(u16 fid, struct net_device *l3_dev)
+{
+	struct mlxsw_sp_fid *f;
+
+	f = kzalloc(sizeof(*f), GFP_KERNEL);
+	if (!f)
+		return NULL;
+
+	f->leave = mlxsw_sp_vport_rif_sp_leave;
+	f->ref_count = 0;
+	f->dev = l3_dev;
+	f->fid = fid;
+
+	return f;
+}
+
+static struct mlxsw_sp_rif *
+mlxsw_sp_rif_alloc(u16 rif, struct net_device *l3_dev, struct mlxsw_sp_fid *f)
+{
+	struct mlxsw_sp_rif *r;
+
+	r = kzalloc(sizeof(*r), GFP_KERNEL);
+	if (!r)
+		return NULL;
+
+	ether_addr_copy(r->addr, l3_dev->dev_addr);
+	r->mtu = l3_dev->mtu;
+	r->ref_count = 1;
+	r->dev = l3_dev;
+	r->rif = rif;
+	r->f = f;
+
+	return r;
+}
+
+static struct mlxsw_sp_rif *
+mlxsw_sp_vport_rif_sp_create(struct mlxsw_sp_port *mlxsw_sp_vport,
+			     struct net_device *l3_dev)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
+	struct mlxsw_sp_fid *f;
+	struct mlxsw_sp_rif *r;
+	u16 fid, rif;
+	int err;
+
+	rif = mlxsw_sp_avail_rif_get(mlxsw_sp);
+	if (rif == MLXSW_SP_RIF_MAX)
+		return ERR_PTR(-ERANGE);
+
+	err = mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, l3_dev, rif, true);
+	if (err)
+		return ERR_PTR(err);
+
+	fid = mlxsw_sp_rif_sp_to_fid(rif);
+	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, true);
+	if (err)
+		goto err_rif_fdb_op;
+
+	f = mlxsw_sp_rfid_alloc(fid, l3_dev);
+	if (!f) {
+		err = -ENOMEM;
+		goto err_rfid_alloc;
+	}
+
+	r = mlxsw_sp_rif_alloc(rif, l3_dev, f);
+	if (!r) {
+		err = -ENOMEM;
+		goto err_rif_alloc;
+	}
+
+	f->r = r;
+	mlxsw_sp->rifs[rif] = r;
+
+	return r;
+
+err_rif_alloc:
+	kfree(f);
+err_rfid_alloc:
+	mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, false);
+err_rif_fdb_op:
+	mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, l3_dev, rif, false);
+	return ERR_PTR(err);
+}
+
+static void mlxsw_sp_vport_rif_sp_destroy(struct mlxsw_sp_port *mlxsw_sp_vport,
+					  struct mlxsw_sp_rif *r)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
+	struct net_device *l3_dev = r->dev;
+	struct mlxsw_sp_fid *f = r->f;
+	u16 fid = f->fid;
+	u16 rif = r->rif;
+
+	mlxsw_sp->rifs[rif] = NULL;
+	f->r = NULL;
+
+	kfree(r);
+
+	kfree(f);
+
+	mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, false);
+
+	mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, l3_dev, rif, false);
+}
+
+static int mlxsw_sp_vport_rif_sp_join(struct mlxsw_sp_port *mlxsw_sp_vport,
+				      struct net_device *l3_dev)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
+	struct mlxsw_sp_rif *r;
+
+	r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
+	if (!r) {
+		r = mlxsw_sp_vport_rif_sp_create(mlxsw_sp_vport, l3_dev);
+		if (IS_ERR(r))
+			return PTR_ERR(r);
+	}
+
+	mlxsw_sp_vport_fid_set(mlxsw_sp_vport, r->f);
+	r->f->ref_count++;
+
+	netdev_dbg(mlxsw_sp_vport->dev, "Joined FID=%d\n", r->f->fid);
+
+	return 0;
+}
+
+static void mlxsw_sp_vport_rif_sp_leave(struct mlxsw_sp_port *mlxsw_sp_vport)
+{
+	struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport);
+
+	netdev_dbg(mlxsw_sp_vport->dev, "Left FID=%d\n", f->fid);
+
+	mlxsw_sp_vport_fid_set(mlxsw_sp_vport, NULL);
+	if (--f->ref_count == 0)
+		mlxsw_sp_vport_rif_sp_destroy(mlxsw_sp_vport, f->r);
+}
+
+static int mlxsw_sp_inetaddr_vport_event(struct net_device *l3_dev,
+					 struct net_device *port_dev,
+					 unsigned long event, u16 vid)
+{
+	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
+	struct mlxsw_sp_port *mlxsw_sp_vport;
+
+	mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid);
+	if (WARN_ON(!mlxsw_sp_vport))
+		return -EINVAL;
+
+	switch (event) {
+	case NETDEV_UP:
+		return mlxsw_sp_vport_rif_sp_join(mlxsw_sp_vport, l3_dev);
+	case NETDEV_DOWN:
+		mlxsw_sp_vport_rif_sp_leave(mlxsw_sp_vport);
+		break;
+	}
+
+	return 0;
+}
+
+static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
+					unsigned long event)
+{
+	if (netif_is_bridge_port(port_dev) || netif_is_lag_port(port_dev))
+		return 0;
+
+	return mlxsw_sp_inetaddr_vport_event(port_dev, port_dev, event, 1);
+}
+
+static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
+					 struct net_device *lag_dev,
+					 unsigned long event, u16 vid)
+{
+	struct net_device *port_dev;
+	struct list_head *iter;
+	int err;
+
+	netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
+		if (mlxsw_sp_port_dev_check(port_dev)) {
+			err = mlxsw_sp_inetaddr_vport_event(l3_dev, port_dev,
+							    event, vid);
+			if (err)
+				return err;
+		}
+	}
+
+	return 0;
+}
+
+static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
+				       unsigned long event)
+{
+	if (netif_is_bridge_port(lag_dev))
+		return 0;
+
+	return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1);
+}
+
+static struct mlxsw_sp_fid *mlxsw_sp_bridge_fid_get(struct mlxsw_sp *mlxsw_sp,
+						    struct net_device *l3_dev)
+{
+	u16 fid;
+
+	if (is_vlan_dev(l3_dev))
+		fid = vlan_dev_vlan_id(l3_dev);
+	else if (mlxsw_sp->master_bridge.dev == l3_dev)
+		fid = 1;
+	else
+		return mlxsw_sp_vfid_find(mlxsw_sp, l3_dev);
+
+	return mlxsw_sp_fid_find(mlxsw_sp, fid);
+}
+
+static enum mlxsw_reg_ritr_if_type mlxsw_sp_rif_type_get(u16 fid)
+{
+	if (mlxsw_sp_fid_is_vfid(fid))
+		return MLXSW_REG_RITR_FID_IF;
+	else
+		return MLXSW_REG_RITR_VLAN_IF;
+}
+
+static int mlxsw_sp_rif_bridge_op(struct mlxsw_sp *mlxsw_sp,
+				  struct net_device *l3_dev,
+				  u16 fid, u16 rif,
+				  bool create)
+{
+	enum mlxsw_reg_ritr_if_type rif_type;
+	char ritr_pl[MLXSW_REG_RITR_LEN];
+
+	rif_type = mlxsw_sp_rif_type_get(fid);
+	mlxsw_reg_ritr_pack(ritr_pl, create, rif_type, rif, l3_dev->mtu,
+			    l3_dev->dev_addr);
+	mlxsw_reg_ritr_fid_set(ritr_pl, rif_type, fid);
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
+}
+
+static int mlxsw_sp_rif_bridge_create(struct mlxsw_sp *mlxsw_sp,
+				      struct net_device *l3_dev,
+				      struct mlxsw_sp_fid *f)
+{
+	struct mlxsw_sp_rif *r;
+	u16 rif;
+	int err;
+
+	rif = mlxsw_sp_avail_rif_get(mlxsw_sp);
+	if (rif == MLXSW_SP_RIF_MAX)
+		return -ERANGE;
+
+	err = mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, true);
+	if (err)
+		return err;
+
+	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, true);
+	if (err)
+		goto err_rif_fdb_op;
+
+	r = mlxsw_sp_rif_alloc(rif, l3_dev, f);
+	if (!r) {
+		err = -ENOMEM;
+		goto err_rif_alloc;
+	}
+
+	f->r = r;
+	mlxsw_sp->rifs[rif] = r;
+
+	netdev_dbg(l3_dev, "RIF=%d created\n", rif);
+
+	return 0;
+
+err_rif_alloc:
+	mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, false);
+err_rif_fdb_op:
+	mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, false);
+	return err;
+}
+
+void mlxsw_sp_rif_bridge_destroy(struct mlxsw_sp *mlxsw_sp,
+				 struct mlxsw_sp_rif *r)
+{
+	struct net_device *l3_dev = r->dev;
+	struct mlxsw_sp_fid *f = r->f;
+	u16 rif = r->rif;
+
+	mlxsw_sp->rifs[rif] = NULL;
+	f->r = NULL;
+
+	kfree(r);
+
+	mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, false);
+
+	mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, false);
+
+	netdev_dbg(l3_dev, "RIF=%d destroyed\n", rif);
+}
+
+static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev,
+					  struct net_device *br_dev,
+					  unsigned long event)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
+	struct mlxsw_sp_fid *f;
+
+	/* FID can either be an actual FID if the L3 device is the
+	 * VLAN-aware bridge or a VLAN device on top. Otherwise, the
+	 * L3 device is a VLAN-unaware bridge and we get a vFID.
+	 */
+	f = mlxsw_sp_bridge_fid_get(mlxsw_sp, l3_dev);
+	if (WARN_ON(!f))
+		return -EINVAL;
+
+	switch (event) {
+	case NETDEV_UP:
+		return mlxsw_sp_rif_bridge_create(mlxsw_sp, l3_dev, f);
+	case NETDEV_DOWN:
+		mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->r);
+		break;
+	}
+
+	return 0;
+}
+
+static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
+					unsigned long event)
+{
+	struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(vlan_dev);
+	u16 vid = vlan_dev_vlan_id(vlan_dev);
+
+	if (mlxsw_sp_port_dev_check(real_dev))
+		return mlxsw_sp_inetaddr_vport_event(vlan_dev, real_dev, event,
+						     vid);
+	else if (netif_is_lag_master(real_dev))
+		return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
+						     vid);
+	else if (netif_is_bridge_master(real_dev) &&
+		 mlxsw_sp->master_bridge.dev == real_dev)
+		return mlxsw_sp_inetaddr_bridge_event(vlan_dev, real_dev,
+						      event);
+
+	return 0;
+}
+
+static int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
+				   unsigned long event, void *ptr)
+{
+	struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
+	struct net_device *dev = ifa->ifa_dev->dev;
+	struct mlxsw_sp *mlxsw_sp;
+	struct mlxsw_sp_rif *r;
+	int err = 0;
+
+	mlxsw_sp = mlxsw_sp_lower_get(dev);
+	if (!mlxsw_sp)
+		goto out;
+
+	r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
+	if (!mlxsw_sp_rif_should_config(r, event))
+		goto out;
+
+	if (mlxsw_sp_port_dev_check(dev))
+		err = mlxsw_sp_inetaddr_port_event(dev, event);
+	else if (netif_is_lag_master(dev))
+		err = mlxsw_sp_inetaddr_lag_event(dev, event);
+	else if (netif_is_bridge_master(dev))
+		err = mlxsw_sp_inetaddr_bridge_event(dev, dev, event);
+	else if (is_vlan_dev(dev))
+		err = mlxsw_sp_inetaddr_vlan_event(dev, event);
+
+out:
+	return notifier_from_errno(err);
+}
+
+static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif,
+			     const char *mac, int mtu)
+{
+	char ritr_pl[MLXSW_REG_RITR_LEN];
+	int err;
+
+	mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
+	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
+	if (err)
+		return err;
+
+	mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
+	mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
+	mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
+}
+
+static int mlxsw_sp_netdevice_router_port_event(struct net_device *dev)
+{
+	struct mlxsw_sp *mlxsw_sp;
+	struct mlxsw_sp_rif *r;
+	int err;
+
+	mlxsw_sp = mlxsw_sp_lower_get(dev);
+	if (!mlxsw_sp)
+		return 0;
+
+	r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
+	if (!r)
+		return 0;
+
+	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, r->addr, r->f->fid, false);
+	if (err)
+		return err;
+
+	err = mlxsw_sp_rif_edit(mlxsw_sp, r->rif, dev->dev_addr, dev->mtu);
+	if (err)
+		goto err_rif_edit;
+
+	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, r->f->fid, true);
+	if (err)
+		goto err_rif_fdb_op;
+
+	ether_addr_copy(r->addr, dev->dev_addr);
+	r->mtu = dev->mtu;
+
+	netdev_dbg(dev, "Updated RIF=%d\n", r->rif);
+
+	return 0;
+
+err_rif_fdb_op:
+	mlxsw_sp_rif_edit(mlxsw_sp, r->rif, r->addr, r->mtu);
+err_rif_edit:
+	mlxsw_sp_rif_fdb_op(mlxsw_sp, r->addr, r->f->fid, true);
+	return err;
+}
+
 static bool mlxsw_sp_lag_port_fid_member(struct mlxsw_sp_port *lag_port,
 					 u16 fid)
 {
@@ -2624,9 +3024,15 @@ int mlxsw_sp_port_fdb_flush(struct mlxsw_sp_port *mlxsw_sp_port, u16 fid)
 		return mlxsw_sp_port_fdb_flush_by_port_fid(mlxsw_sp_port, fid);
 }
 
-static bool mlxsw_sp_port_dev_check(const struct net_device *dev)
+static void mlxsw_sp_master_bridge_gone_sync(struct mlxsw_sp *mlxsw_sp)
 {
-	return dev->netdev_ops == &mlxsw_sp_port_netdev_ops;
+	struct mlxsw_sp_fid *f, *tmp;
+
+	list_for_each_entry_safe(f, tmp, &mlxsw_sp->fids, list)
+		if (--f->ref_count == 0)
+			mlxsw_sp_fid_destroy(mlxsw_sp, f);
+		else
+			WARN_ON_ONCE(1);
 }
 
 static bool mlxsw_sp_master_bridge_check(struct mlxsw_sp *mlxsw_sp,
@@ -2645,8 +3051,15 @@ static void mlxsw_sp_master_bridge_inc(struct mlxsw_sp *mlxsw_sp,
 
 static void mlxsw_sp_master_bridge_dec(struct mlxsw_sp *mlxsw_sp)
 {
-	if (--mlxsw_sp->master_bridge.ref_count == 0)
+	if (--mlxsw_sp->master_bridge.ref_count == 0) {
 		mlxsw_sp->master_bridge.dev = NULL;
+		/* It's possible upper VLAN devices are still holding
+		 * references to underlying FIDs. Drop the reference
+		 * and release the resources if it was the last one.
+		 * If it wasn't, then something bad happened.
+		 */
+		mlxsw_sp_master_bridge_gone_sync(mlxsw_sp);
+	}
 }
 
 static int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port,
@@ -2806,6 +3219,45 @@ static int mlxsw_sp_port_lag_index_get(struct mlxsw_sp *mlxsw_sp,
 	return -EBUSY;
 }
 
+static void
+mlxsw_sp_port_pvid_vport_lag_join(struct mlxsw_sp_port *mlxsw_sp_port,
+				  u16 lag_id)
+{
+	struct mlxsw_sp_port *mlxsw_sp_vport;
+	struct mlxsw_sp_fid *f;
+
+	mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, 1);
+	if (WARN_ON(!mlxsw_sp_vport))
+		return;
+
+	/* If vPort is assigned a RIF, then leave it since it's no
+	 * longer valid.
+	 */
+	f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport);
+	if (f)
+		f->leave(mlxsw_sp_vport);
+
+	mlxsw_sp_vport->lag_id = lag_id;
+	mlxsw_sp_vport->lagged = 1;
+}
+
+static void
+mlxsw_sp_port_pvid_vport_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+	struct mlxsw_sp_port *mlxsw_sp_vport;
+	struct mlxsw_sp_fid *f;
+
+	mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, 1);
+	if (WARN_ON(!mlxsw_sp_vport))
+		return;
+
+	f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport);
+	if (f)
+		f->leave(mlxsw_sp_vport);
+
+	mlxsw_sp_vport->lagged = 0;
+}
+
 static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port,
 				  struct net_device *lag_dev)
 {
@@ -2841,6 +3293,9 @@ static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port,
 	mlxsw_sp_port->lag_id = lag_id;
 	mlxsw_sp_port->lagged = 1;
 	lag->ref_count++;
+
+	mlxsw_sp_port_pvid_vport_lag_join(mlxsw_sp_port, lag_id);
+
 	return 0;
 
 err_col_port_enable:
@@ -2878,6 +3333,8 @@ static void mlxsw_sp_port_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port,
 				     mlxsw_sp_port->local_port);
 	mlxsw_sp_port->lagged = 0;
 	lag->ref_count--;
+
+	mlxsw_sp_port_pvid_vport_lag_leave(mlxsw_sp_port);
 }
 
 static int mlxsw_sp_lag_dist_port_add(struct mlxsw_sp_port *mlxsw_sp_port,
@@ -3071,47 +3528,97 @@ static int mlxsw_sp_netdevice_lag_event(struct net_device *lag_dev,
 	return 0;
 }
 
-static struct mlxsw_sp_fid *
-mlxsw_sp_br_vfid_find(const struct mlxsw_sp *mlxsw_sp,
-		      const struct net_device *br_dev)
+static int mlxsw_sp_master_bridge_vlan_link(struct mlxsw_sp *mlxsw_sp,
+					    struct net_device *vlan_dev)
 {
+	u16 fid = vlan_dev_vlan_id(vlan_dev);
 	struct mlxsw_sp_fid *f;
 
-	list_for_each_entry(f, &mlxsw_sp->br_vfids.list, list) {
-		if (f->dev == br_dev)
-			return f;
+	f = mlxsw_sp_fid_find(mlxsw_sp, fid);
+	if (!f) {
+		f = mlxsw_sp_fid_create(mlxsw_sp, fid);
+		if (IS_ERR(f))
+			return PTR_ERR(f);
 	}
 
-	return NULL;
+	f->ref_count++;
+
+	return 0;
+}
+
+static void mlxsw_sp_master_bridge_vlan_unlink(struct mlxsw_sp *mlxsw_sp,
+					       struct net_device *vlan_dev)
+{
+	u16 fid = vlan_dev_vlan_id(vlan_dev);
+	struct mlxsw_sp_fid *f;
+
+	f = mlxsw_sp_fid_find(mlxsw_sp, fid);
+	if (f && f->r)
+		mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->r);
+	if (f && --f->ref_count == 0)
+		mlxsw_sp_fid_destroy(mlxsw_sp, f);
 }
 
-static u16 mlxsw_sp_vfid_to_br_vfid(u16 vfid)
+static int mlxsw_sp_netdevice_bridge_event(struct net_device *br_dev,
+					   unsigned long event, void *ptr)
 {
-	return vfid - MLXSW_SP_VFID_PORT_MAX;
+	struct netdev_notifier_changeupper_info *info;
+	struct net_device *upper_dev;
+	struct mlxsw_sp *mlxsw_sp;
+	int err;
+
+	mlxsw_sp = mlxsw_sp_lower_get(br_dev);
+	if (!mlxsw_sp)
+		return 0;
+	if (br_dev != mlxsw_sp->master_bridge.dev)
+		return 0;
+
+	info = ptr;
+
+	switch (event) {
+	case NETDEV_CHANGEUPPER:
+		upper_dev = info->upper_dev;
+		if (!is_vlan_dev(upper_dev))
+			break;
+		if (info->linking) {
+			err = mlxsw_sp_master_bridge_vlan_link(mlxsw_sp,
+							       upper_dev);
+			if (err)
+				return err;
+		} else {
+			mlxsw_sp_master_bridge_vlan_unlink(mlxsw_sp, upper_dev);
+		}
+		break;
+	}
+
+	return 0;
 }
 
-static u16 mlxsw_sp_br_vfid_to_vfid(u16 br_vfid)
+static u16 mlxsw_sp_avail_vfid_get(const struct mlxsw_sp *mlxsw_sp)
 {
-	return MLXSW_SP_VFID_PORT_MAX + br_vfid;
+	return find_first_zero_bit(mlxsw_sp->vfids.mapped,
+				   MLXSW_SP_VFID_MAX);
 }
 
-static u16 mlxsw_sp_avail_br_vfid_get(const struct mlxsw_sp *mlxsw_sp)
+static int mlxsw_sp_vfid_op(struct mlxsw_sp *mlxsw_sp, u16 fid, bool create)
 {
-	return find_first_zero_bit(mlxsw_sp->br_vfids.mapped,
-				   MLXSW_SP_VFID_BR_MAX);
+	char sfmr_pl[MLXSW_REG_SFMR_LEN];
+
+	mlxsw_reg_sfmr_pack(sfmr_pl, !create, fid, 0);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl);
 }
 
-static void mlxsw_sp_vport_br_vfid_leave(struct mlxsw_sp_port *mlxsw_sp_vport);
+static void mlxsw_sp_vport_vfid_leave(struct mlxsw_sp_port *mlxsw_sp_vport);
 
-static struct mlxsw_sp_fid *mlxsw_sp_br_vfid_create(struct mlxsw_sp *mlxsw_sp,
-						    struct net_device *br_dev)
+static struct mlxsw_sp_fid *mlxsw_sp_vfid_create(struct mlxsw_sp *mlxsw_sp,
+						 struct net_device *br_dev)
 {
 	struct device *dev = mlxsw_sp->bus_info->dev;
 	struct mlxsw_sp_fid *f;
 	u16 vfid, fid;
 	int err;
 
-	vfid = mlxsw_sp_br_vfid_to_vfid(mlxsw_sp_avail_br_vfid_get(mlxsw_sp));
+	vfid = mlxsw_sp_avail_vfid_get(mlxsw_sp);
 	if (vfid == MLXSW_SP_VFID_MAX) {
 		dev_err(dev, "No available vFIDs\n");
 		return ERR_PTR(-ERANGE);
@@ -3128,12 +3635,12 @@ static struct mlxsw_sp_fid *mlxsw_sp_br_vfid_create(struct mlxsw_sp *mlxsw_sp,
 	if (!f)
 		goto err_allocate_vfid;
 
-	f->leave = mlxsw_sp_vport_br_vfid_leave;
+	f->leave = mlxsw_sp_vport_vfid_leave;
 	f->fid = fid;
 	f->dev = br_dev;
 
-	list_add(&f->list, &mlxsw_sp->br_vfids.list);
-	set_bit(mlxsw_sp_vfid_to_br_vfid(vfid), mlxsw_sp->br_vfids.mapped);
+	list_add(&f->list, &mlxsw_sp->vfids.list);
+	set_bit(vfid, mlxsw_sp->vfids.mapped);
 
 	return f;
 
@@ -3142,29 +3649,42 @@ err_allocate_vfid:
 	return ERR_PTR(-ENOMEM);
 }
 
-static void mlxsw_sp_br_vfid_destroy(struct mlxsw_sp *mlxsw_sp,
-				     struct mlxsw_sp_fid *f)
+static void mlxsw_sp_vfid_destroy(struct mlxsw_sp *mlxsw_sp,
+				  struct mlxsw_sp_fid *f)
 {
 	u16 vfid = mlxsw_sp_fid_to_vfid(f->fid);
-	u16 br_vfid = mlxsw_sp_vfid_to_br_vfid(vfid);
+	u16 fid = f->fid;
 
-	clear_bit(br_vfid, mlxsw_sp->br_vfids.mapped);
+	clear_bit(vfid, mlxsw_sp->vfids.mapped);
 	list_del(&f->list);
 
-	mlxsw_sp_vfid_op(mlxsw_sp, f->fid, false);
+	if (f->r)
+		mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->r);
 
 	kfree(f);
+
+	mlxsw_sp_vfid_op(mlxsw_sp, fid, false);
 }
 
-static int mlxsw_sp_vport_br_vfid_join(struct mlxsw_sp_port *mlxsw_sp_vport,
-				       struct net_device *br_dev)
+static int mlxsw_sp_vport_fid_map(struct mlxsw_sp_port *mlxsw_sp_vport, u16 fid,
+				  bool valid)
+{
+	enum mlxsw_reg_svfa_mt mt = MLXSW_REG_SVFA_MT_PORT_VID_TO_FID;
+	u16 vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport);
+
+	return mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport, mt, valid, fid,
+					    vid);
+}
+
+static int mlxsw_sp_vport_vfid_join(struct mlxsw_sp_port *mlxsw_sp_vport,
+				    struct net_device *br_dev)
 {
 	struct mlxsw_sp_fid *f;
 	int err;
 
-	f = mlxsw_sp_br_vfid_find(mlxsw_sp_vport->mlxsw_sp, br_dev);
+	f = mlxsw_sp_vfid_find(mlxsw_sp_vport->mlxsw_sp, br_dev);
 	if (!f) {
-		f = mlxsw_sp_br_vfid_create(mlxsw_sp_vport->mlxsw_sp, br_dev);
+		f = mlxsw_sp_vfid_create(mlxsw_sp_vport->mlxsw_sp, br_dev);
 		if (IS_ERR(f))
 			return PTR_ERR(f);
 	}
@@ -3188,11 +3708,11 @@ err_vport_fid_map:
 	mlxsw_sp_vport_flood_set(mlxsw_sp_vport, f->fid, false);
 err_vport_flood_set:
 	if (!f->ref_count)
-		mlxsw_sp_br_vfid_destroy(mlxsw_sp_vport->mlxsw_sp, f);
+		mlxsw_sp_vfid_destroy(mlxsw_sp_vport->mlxsw_sp, f);
 	return err;
 }
 
-static void mlxsw_sp_vport_br_vfid_leave(struct mlxsw_sp_port *mlxsw_sp_vport)
+static void mlxsw_sp_vport_vfid_leave(struct mlxsw_sp_port *mlxsw_sp_vport)
 {
 	struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport);
 
@@ -3206,22 +3726,24 @@ static void mlxsw_sp_vport_br_vfid_leave(struct mlxsw_sp_port *mlxsw_sp_vport)
 
 	mlxsw_sp_vport_fid_set(mlxsw_sp_vport, NULL);
 	if (--f->ref_count == 0)
-		mlxsw_sp_br_vfid_destroy(mlxsw_sp_vport->mlxsw_sp, f);
+		mlxsw_sp_vfid_destroy(mlxsw_sp_vport->mlxsw_sp, f);
 }
 
 static int mlxsw_sp_vport_bridge_join(struct mlxsw_sp_port *mlxsw_sp_vport,
 				      struct net_device *br_dev)
 {
+	struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport);
 	u16 vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport);
 	struct net_device *dev = mlxsw_sp_vport->dev;
 	int err;
 
-	mlxsw_sp_vport_vfid_leave(mlxsw_sp_vport);
+	if (f && !WARN_ON(!f->leave))
+		f->leave(mlxsw_sp_vport);
 
-	err = mlxsw_sp_vport_br_vfid_join(mlxsw_sp_vport, br_dev);
+	err = mlxsw_sp_vport_vfid_join(mlxsw_sp_vport, br_dev);
 	if (err) {
 		netdev_err(dev, "Failed to join vFID\n");
-		goto err_vport_br_vfid_join;
+		return err;
 	}
 
 	err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true);
@@ -3238,9 +3760,7 @@ static int mlxsw_sp_vport_bridge_join(struct mlxsw_sp_port *mlxsw_sp_vport,
 	return 0;
 
 err_port_vid_learning_set:
-	mlxsw_sp_vport_br_vfid_leave(mlxsw_sp_vport);
-err_vport_br_vfid_join:
-	mlxsw_sp_vport_vfid_join(mlxsw_sp_vport);
+	mlxsw_sp_vport_vfid_leave(mlxsw_sp_vport);
 	return err;
 }
 
@@ -3250,12 +3770,7 @@ static void mlxsw_sp_vport_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_vport)
 
 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, false);
 
-	mlxsw_sp_vport_br_vfid_leave(mlxsw_sp_vport);
-
-	mlxsw_sp_vport_vfid_join(mlxsw_sp_vport);
-
-	mlxsw_sp_port_stp_state_set(mlxsw_sp_vport, vid,
-				    MLXSW_REG_SPMS_STATE_FORWARDING);
+	mlxsw_sp_vport_vfid_leave(mlxsw_sp_vport);
 
 	mlxsw_sp_vport->learning = 0;
 	mlxsw_sp_vport->learning_sync = 0;
@@ -3271,7 +3786,7 @@ mlxsw_sp_port_master_bridge_check(const struct mlxsw_sp_port *mlxsw_sp_port,
 
 	list_for_each_entry(mlxsw_sp_vport, &mlxsw_sp_port->vports_list,
 			    vport.list) {
-		struct net_device *dev = mlxsw_sp_vport_br_get(mlxsw_sp_vport);
+		struct net_device *dev = mlxsw_sp_vport_dev_get(mlxsw_sp_vport);
 
 		if (dev && dev == br_dev)
 			return false;
@@ -3365,10 +3880,14 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *unused,
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 	int err = 0;
 
-	if (mlxsw_sp_port_dev_check(dev))
+	if (event == NETDEV_CHANGEADDR || event == NETDEV_CHANGEMTU)
+		err = mlxsw_sp_netdevice_router_port_event(dev);
+	else if (mlxsw_sp_port_dev_check(dev))
 		err = mlxsw_sp_netdevice_port_event(dev, event, ptr);
 	else if (netif_is_lag_master(dev))
 		err = mlxsw_sp_netdevice_lag_event(dev, event, ptr);
+	else if (netif_is_bridge_master(dev))
+		err = mlxsw_sp_netdevice_bridge_event(dev, event, ptr);
 	else if (is_vlan_dev(dev))
 		err = mlxsw_sp_netdevice_vlan_event(dev, event, ptr);
 
@@ -3379,11 +3898,17 @@ static struct notifier_block mlxsw_sp_netdevice_nb __read_mostly = {
 	.notifier_call = mlxsw_sp_netdevice_event,
 };
 
+static struct notifier_block mlxsw_sp_inetaddr_nb __read_mostly = {
+	.notifier_call = mlxsw_sp_inetaddr_event,
+	.priority = 10,	/* Must be called before FIB notifier block */
+};
+
 static int __init mlxsw_sp_module_init(void)
 {
 	int err;
 
 	register_netdevice_notifier(&mlxsw_sp_netdevice_nb);
+	register_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
 	err = mlxsw_core_driver_register(&mlxsw_sp_driver);
 	if (err)
 		goto err_core_driver_register;
@@ -3397,6 +3922,7 @@ err_core_driver_register:
 static void __exit mlxsw_sp_module_exit(void)
 {
 	mlxsw_core_driver_unregister(&mlxsw_sp_driver);
+	unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
 	unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 36c9835ea20b..ef4ac8987a2a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -39,19 +39,22 @@
 
 #include <linux/types.h>
 #include <linux/netdevice.h>
+#include <linux/rhashtable.h>
 #include <linux/bitops.h>
 #include <linux/if_vlan.h>
 #include <linux/list.h>
 #include <linux/dcbnl.h>
+#include <linux/in6.h>
 #include <net/switchdev.h>
 
 #include "port.h"
 #include "core.h"
 
 #define MLXSW_SP_VFID_BASE VLAN_N_VID
-#define MLXSW_SP_VFID_PORT_MAX 512	/* Non-bridged VLAN interfaces */
-#define MLXSW_SP_VFID_BR_MAX 6144	/* Bridged VLAN interfaces */
-#define MLXSW_SP_VFID_MAX (MLXSW_SP_VFID_PORT_MAX + MLXSW_SP_VFID_BR_MAX)
+#define MLXSW_SP_VFID_MAX 6656	/* Bridged VLAN interfaces */
+
+#define MLXSW_SP_RFID_BASE 15360
+#define MLXSW_SP_RIF_MAX 800
 
 #define MLXSW_SP_LAG_MAX 64
 #define MLXSW_SP_PORT_PER_LAG_MAX 16
@@ -60,6 +63,12 @@
 
 #define MLXSW_SP_PORTS_PER_CLUSTER_MAX 4
 
+#define MLXSW_SP_LPM_TREE_MIN 2 /* trees 0 and 1 are reserved */
+#define MLXSW_SP_LPM_TREE_MAX 22
+#define MLXSW_SP_LPM_TREE_COUNT (MLXSW_SP_LPM_TREE_MAX - MLXSW_SP_LPM_TREE_MIN)
+
+#define MLXSW_SP_VIRTUAL_ROUTER_MAX 256
+
 #define MLXSW_SP_PORT_BASE_SPEED 25000	/* Mb/s */
 
 #define MLXSW_SP_BYTES_PER_CELL 96
@@ -67,6 +76,10 @@
 #define MLXSW_SP_BYTES_TO_CELLS(b) DIV_ROUND_UP(b, MLXSW_SP_BYTES_PER_CELL)
 #define MLXSW_SP_CELLS_TO_BYTES(c) (c * MLXSW_SP_BYTES_PER_CELL)
 
+#define MLXSW_SP_KVD_LINEAR_SIZE 65536 /* entries */
+#define MLXSW_SP_KVD_HASH_SINGLE_SIZE 163840 /* entries */
+#define MLXSW_SP_KVD_HASH_DOUBLE_SIZE 32768 /* entries */
+
 /* Maximum delay buffer needed in case of PAUSE frames, in cells.
  * Assumes 100m cable and maximum MTU.
  */
@@ -92,8 +105,17 @@ struct mlxsw_sp_fid {
 	struct list_head list;
 	unsigned int ref_count;
 	struct net_device *dev;
+	struct mlxsw_sp_rif *r;
 	u16 fid;
-	u16 vid;
+};
+
+struct mlxsw_sp_rif {
+	struct net_device *dev;
+	unsigned int ref_count;
+	struct mlxsw_sp_fid *f;
+	unsigned char addr[ETH_ALEN];
+	int mtu;
+	u16 rif;
 };
 
 struct mlxsw_sp_mid {
@@ -116,7 +138,17 @@ static inline u16 mlxsw_sp_fid_to_vfid(u16 fid)
 
 static inline bool mlxsw_sp_fid_is_vfid(u16 fid)
 {
-	return fid >= MLXSW_SP_VFID_BASE;
+	return fid >= MLXSW_SP_VFID_BASE && fid < MLXSW_SP_RFID_BASE;
+}
+
+static inline bool mlxsw_sp_fid_is_rfid(u16 fid)
+{
+	return fid >= MLXSW_SP_RFID_BASE;
+}
+
+static inline u16 mlxsw_sp_rif_sp_to_fid(u16 rif)
+{
+	return MLXSW_SP_RFID_BASE + rif;
 }
 
 struct mlxsw_sp_sb_pr {
@@ -153,20 +185,60 @@ struct mlxsw_sp_sb {
 	} ports[MLXSW_PORT_MAX_PORTS];
 };
 
-struct mlxsw_sp {
+#define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE)
+
+struct mlxsw_sp_prefix_usage {
+	DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
+};
+
+enum mlxsw_sp_l3proto {
+	MLXSW_SP_L3_PROTO_IPV4,
+	MLXSW_SP_L3_PROTO_IPV6,
+};
+
+struct mlxsw_sp_lpm_tree {
+	u8 id; /* tree ID */
+	unsigned int ref_count;
+	enum mlxsw_sp_l3proto proto;
+	struct mlxsw_sp_prefix_usage prefix_usage;
+};
+
+struct mlxsw_sp_fib;
+
+struct mlxsw_sp_vr {
+	u16 id; /* virtual router ID */
+	bool used;
+	enum mlxsw_sp_l3proto proto;
+	u32 tb_id; /* kernel fib table id */
+	struct mlxsw_sp_lpm_tree *lpm_tree;
+	struct mlxsw_sp_fib *fib;
+};
+
+struct mlxsw_sp_router {
+	struct mlxsw_sp_lpm_tree lpm_trees[MLXSW_SP_LPM_TREE_COUNT];
+	struct mlxsw_sp_vr vrs[MLXSW_SP_VIRTUAL_ROUTER_MAX];
+	struct rhashtable neigh_ht;
 	struct {
-		struct list_head list;
-		DECLARE_BITMAP(mapped, MLXSW_SP_VFID_PORT_MAX);
-	} port_vfids;
+		struct delayed_work dw;
+		unsigned long interval;	/* ms */
+	} neighs_update;
+	struct delayed_work nexthop_probe_dw;
+#define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
+	struct list_head nexthop_group_list;
+	struct list_head nexthop_neighs_list;
+};
+
+struct mlxsw_sp {
 	struct {
 		struct list_head list;
-		DECLARE_BITMAP(mapped, MLXSW_SP_VFID_BR_MAX);
-	} br_vfids;
+		DECLARE_BITMAP(mapped, MLXSW_SP_VFID_MAX);
+	} vfids;
 	struct {
 		struct list_head list;
 		DECLARE_BITMAP(mapped, MLXSW_SP_MID_MAX);
 	} br_mids;
 	struct list_head fids;	/* VLAN-aware bridge FIDs */
+	struct mlxsw_sp_rif *rifs[MLXSW_SP_RIF_MAX];
 	struct mlxsw_sp_port **ports;
 	struct mlxsw_core *core;
 	const struct mlxsw_bus_info *bus_info;
@@ -184,6 +256,10 @@ struct mlxsw_sp {
 	struct mlxsw_sp_upper lags[MLXSW_SP_LAG_MAX];
 	u8 port_to_module[MLXSW_PORT_MAX_PORTS];
 	struct mlxsw_sp_sb sb;
+	struct mlxsw_sp_router router;
+	struct {
+		DECLARE_BITMAP(usage, MLXSW_SP_KVD_LINEAR_SIZE);
+	} kvdl;
 };
 
 static inline struct mlxsw_sp_upper *
@@ -242,6 +318,9 @@ struct mlxsw_sp_port {
 	struct list_head vports_list;
 };
 
+struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev);
+void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port);
+
 static inline bool
 mlxsw_sp_port_is_pause_en(const struct mlxsw_sp_port *mlxsw_sp_port)
 {
@@ -287,7 +366,7 @@ mlxsw_sp_vport_fid_get(const struct mlxsw_sp_port *mlxsw_sp_vport)
 }
 
 static inline struct net_device *
-mlxsw_sp_vport_br_get(const struct mlxsw_sp_port *mlxsw_sp_vport)
+mlxsw_sp_vport_dev_get(const struct mlxsw_sp_port *mlxsw_sp_vport)
 {
 	struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport);
 
@@ -325,6 +404,44 @@ mlxsw_sp_port_vport_find_by_fid(const struct mlxsw_sp_port *mlxsw_sp_port,
 	return NULL;
 }
 
+static inline struct mlxsw_sp_fid *mlxsw_sp_fid_find(struct mlxsw_sp *mlxsw_sp,
+						     u16 fid)
+{
+	struct mlxsw_sp_fid *f;
+
+	list_for_each_entry(f, &mlxsw_sp->fids, list)
+		if (f->fid == fid)
+			return f;
+
+	return NULL;
+}
+
+static inline struct mlxsw_sp_fid *
+mlxsw_sp_vfid_find(const struct mlxsw_sp *mlxsw_sp,
+		   const struct net_device *br_dev)
+{
+	struct mlxsw_sp_fid *f;
+
+	list_for_each_entry(f, &mlxsw_sp->vfids.list, list)
+		if (f->dev == br_dev)
+			return f;
+
+	return NULL;
+}
+
+static inline struct mlxsw_sp_rif *
+mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
+			 const struct net_device *dev)
+{
+	int i;
+
+	for (i = 0; i < MLXSW_SP_RIF_MAX; i++)
+		if (mlxsw_sp->rifs[i] && mlxsw_sp->rifs[i]->dev == dev)
+			return mlxsw_sp->rifs[i];
+
+	return NULL;
+}
+
 enum mlxsw_sp_flood_table {
 	MLXSW_SP_FLOOD_TABLE_UC,
 	MLXSW_SP_FLOOD_TABLE_BM,
@@ -377,13 +494,17 @@ int mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin,
 			   u16 vid_end, bool is_member, bool untagged);
 int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto,
 			  u16 vid);
-int mlxsw_sp_port_kill_vid(struct net_device *dev,
-			   __be16 __always_unused proto, u16 vid);
 int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 fid,
 			     bool set);
 void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port);
 int mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid);
 int mlxsw_sp_port_fdb_flush(struct mlxsw_sp_port *mlxsw_sp_port, u16 fid);
+int mlxsw_sp_rif_fdb_op(struct mlxsw_sp *mlxsw_sp, const char *mac, u16 fid,
+			bool adding);
+struct mlxsw_sp_fid *mlxsw_sp_fid_create(struct mlxsw_sp *mlxsw_sp, u16 fid);
+void mlxsw_sp_fid_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *f);
+void mlxsw_sp_rif_bridge_destroy(struct mlxsw_sp *mlxsw_sp,
+				 struct mlxsw_sp_rif *r);
 int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port,
 			  enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index,
 			  bool dwrr, u8 dwrr_weight);
@@ -413,4 +534,19 @@ static inline void mlxsw_sp_port_dcb_fini(struct mlxsw_sp_port *mlxsw_sp_port)
 
 #endif
 
+int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp);
+int mlxsw_sp_router_fib4_add(struct mlxsw_sp_port *mlxsw_sp_port,
+			     const struct switchdev_obj_ipv4_fib *fib4,
+			     struct switchdev_trans *trans);
+int mlxsw_sp_router_fib4_del(struct mlxsw_sp_port *mlxsw_sp_port,
+			     const struct switchdev_obj_ipv4_fib *fib4);
+int mlxsw_sp_router_neigh_construct(struct net_device *dev,
+				    struct neighbour *n);
+void mlxsw_sp_router_neigh_destroy(struct net_device *dev,
+				   struct neighbour *n);
+
+int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count);
+void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index);
+
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
new file mode 100644
index 000000000000..ac321e8e5c1a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
@@ -0,0 +1,91 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+
+#include "spectrum.h"
+
+#define MLXSW_SP_KVDL_SINGLE_BASE 0
+#define MLXSW_SP_KVDL_SINGLE_SIZE 16384
+#define MLXSW_SP_KVDL_CHUNKS_BASE \
+	(MLXSW_SP_KVDL_SINGLE_BASE + MLXSW_SP_KVDL_SINGLE_SIZE)
+#define MLXSW_SP_KVDL_CHUNKS_SIZE \
+	(MLXSW_SP_KVD_LINEAR_SIZE - MLXSW_SP_KVDL_CHUNKS_BASE)
+#define MLXSW_SP_CHUNK_MAX 32
+
+int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count)
+{
+	int entry_index;
+	int size;
+	int type_base;
+	int type_size;
+	int type_entries;
+
+	if (entry_count == 0 || entry_count > MLXSW_SP_CHUNK_MAX) {
+		return -EINVAL;
+	} else if (entry_count == 1) {
+		type_base = MLXSW_SP_KVDL_SINGLE_BASE;
+		type_size = MLXSW_SP_KVDL_SINGLE_SIZE;
+		type_entries = 1;
+	} else {
+		type_base = MLXSW_SP_KVDL_CHUNKS_BASE;
+		type_size = MLXSW_SP_KVDL_CHUNKS_SIZE;
+		type_entries = MLXSW_SP_CHUNK_MAX;
+	}
+
+	entry_index = type_base;
+	size = type_base + type_size;
+	for_each_clear_bit_from(entry_index, mlxsw_sp->kvdl.usage, size) {
+		int i;
+
+		for (i = 0; i < type_entries; i++)
+			set_bit(entry_index + i, mlxsw_sp->kvdl.usage);
+		return entry_index;
+	}
+	return -ENOBUFS;
+}
+
+void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index)
+{
+	int type_entries;
+	int i;
+
+	if (entry_index < MLXSW_SP_KVDL_CHUNKS_BASE)
+		type_entries = 1;
+	else
+		type_entries = MLXSW_SP_CHUNK_MAX;
+	for (i = 0; i < type_entries; i++)
+		clear_bit(entry_index + i, mlxsw_sp->kvdl.usage);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
new file mode 100644
index 000000000000..e084ea5448ac
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -0,0 +1,1814 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com>
+ * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/rhashtable.h>
+#include <linux/bitops.h>
+#include <linux/in6.h>
+#include <linux/notifier.h>
+#include <net/netevent.h>
+#include <net/neighbour.h>
+#include <net/arp.h>
+
+#include "spectrum.h"
+#include "core.h"
+#include "reg.h"
+
+#define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
+	for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
+
+static bool
+mlxsw_sp_prefix_usage_subset(struct mlxsw_sp_prefix_usage *prefix_usage1,
+			     struct mlxsw_sp_prefix_usage *prefix_usage2)
+{
+	unsigned char prefix;
+
+	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage1) {
+		if (!test_bit(prefix, prefix_usage2->b))
+			return false;
+	}
+	return true;
+}
+
+static bool
+mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
+			 struct mlxsw_sp_prefix_usage *prefix_usage2)
+{
+	return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
+}
+
+static bool
+mlxsw_sp_prefix_usage_none(struct mlxsw_sp_prefix_usage *prefix_usage)
+{
+	struct mlxsw_sp_prefix_usage prefix_usage_none = {{ 0 } };
+
+	return mlxsw_sp_prefix_usage_eq(prefix_usage, &prefix_usage_none);
+}
+
+static void
+mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
+			  struct mlxsw_sp_prefix_usage *prefix_usage2)
+{
+	memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
+}
+
+static void
+mlxsw_sp_prefix_usage_zero(struct mlxsw_sp_prefix_usage *prefix_usage)
+{
+	memset(prefix_usage, 0, sizeof(*prefix_usage));
+}
+
+static void
+mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
+			  unsigned char prefix_len)
+{
+	set_bit(prefix_len, prefix_usage->b);
+}
+
+static void
+mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
+			    unsigned char prefix_len)
+{
+	clear_bit(prefix_len, prefix_usage->b);
+}
+
+struct mlxsw_sp_fib_key {
+	unsigned char addr[sizeof(struct in6_addr)];
+	unsigned char prefix_len;
+};
+
+enum mlxsw_sp_fib_entry_type {
+	MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
+	MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
+	MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
+};
+
+struct mlxsw_sp_nexthop_group;
+
+struct mlxsw_sp_fib_entry {
+	struct rhash_head ht_node;
+	struct mlxsw_sp_fib_key key;
+	enum mlxsw_sp_fib_entry_type type;
+	u8 added:1;
+	u16 rif; /* used for action local */
+	struct mlxsw_sp_vr *vr;
+	struct list_head nexthop_group_node;
+	struct mlxsw_sp_nexthop_group *nh_group;
+};
+
+struct mlxsw_sp_fib {
+	struct rhashtable ht;
+	unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
+	struct mlxsw_sp_prefix_usage prefix_usage;
+};
+
+static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
+	.key_offset = offsetof(struct mlxsw_sp_fib_entry, key),
+	.head_offset = offsetof(struct mlxsw_sp_fib_entry, ht_node),
+	.key_len = sizeof(struct mlxsw_sp_fib_key),
+	.automatic_shrinking = true,
+};
+
+static int mlxsw_sp_fib_entry_insert(struct mlxsw_sp_fib *fib,
+				     struct mlxsw_sp_fib_entry *fib_entry)
+{
+	unsigned char prefix_len = fib_entry->key.prefix_len;
+	int err;
+
+	err = rhashtable_insert_fast(&fib->ht, &fib_entry->ht_node,
+				     mlxsw_sp_fib_ht_params);
+	if (err)
+		return err;
+	if (fib->prefix_ref_count[prefix_len]++ == 0)
+		mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len);
+	return 0;
+}
+
+static void mlxsw_sp_fib_entry_remove(struct mlxsw_sp_fib *fib,
+				      struct mlxsw_sp_fib_entry *fib_entry)
+{
+	unsigned char prefix_len = fib_entry->key.prefix_len;
+
+	if (--fib->prefix_ref_count[prefix_len] == 0)
+		mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len);
+	rhashtable_remove_fast(&fib->ht, &fib_entry->ht_node,
+			       mlxsw_sp_fib_ht_params);
+}
+
+static struct mlxsw_sp_fib_entry *
+mlxsw_sp_fib_entry_create(struct mlxsw_sp_fib *fib, const void *addr,
+			  size_t addr_len, unsigned char prefix_len)
+{
+	struct mlxsw_sp_fib_entry *fib_entry;
+
+	fib_entry = kzalloc(sizeof(*fib_entry), GFP_KERNEL);
+	if (!fib_entry)
+		return NULL;
+	memcpy(fib_entry->key.addr, addr, addr_len);
+	fib_entry->key.prefix_len = prefix_len;
+	return fib_entry;
+}
+
+static void mlxsw_sp_fib_entry_destroy(struct mlxsw_sp_fib_entry *fib_entry)
+{
+	kfree(fib_entry);
+}
+
+static struct mlxsw_sp_fib_entry *
+mlxsw_sp_fib_entry_lookup(struct mlxsw_sp_fib *fib, const void *addr,
+			  size_t addr_len, unsigned char prefix_len)
+{
+	struct mlxsw_sp_fib_key key = {{ 0 } };
+
+	memcpy(key.addr, addr, addr_len);
+	key.prefix_len = prefix_len;
+	return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
+}
+
+static struct mlxsw_sp_fib *mlxsw_sp_fib_create(void)
+{
+	struct mlxsw_sp_fib *fib;
+	int err;
+
+	fib = kzalloc(sizeof(*fib), GFP_KERNEL);
+	if (!fib)
+		return ERR_PTR(-ENOMEM);
+	err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
+	if (err)
+		goto err_rhashtable_init;
+	return fib;
+
+err_rhashtable_init:
+	kfree(fib);
+	return ERR_PTR(err);
+}
+
+static void mlxsw_sp_fib_destroy(struct mlxsw_sp_fib *fib)
+{
+	rhashtable_destroy(&fib->ht);
+	kfree(fib);
+}
+
+static struct mlxsw_sp_lpm_tree *
+mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp, bool one_reserved)
+{
+	static struct mlxsw_sp_lpm_tree *lpm_tree;
+	int i;
+
+	for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) {
+		lpm_tree = &mlxsw_sp->router.lpm_trees[i];
+		if (lpm_tree->ref_count == 0) {
+			if (one_reserved)
+				one_reserved = false;
+			else
+				return lpm_tree;
+		}
+	}
+	return NULL;
+}
+
+static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
+				   struct mlxsw_sp_lpm_tree *lpm_tree)
+{
+	char ralta_pl[MLXSW_REG_RALTA_LEN];
+
+	mlxsw_reg_ralta_pack(ralta_pl, true, lpm_tree->proto, lpm_tree->id);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
+}
+
+static int mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
+				  struct mlxsw_sp_lpm_tree *lpm_tree)
+{
+	char ralta_pl[MLXSW_REG_RALTA_LEN];
+
+	mlxsw_reg_ralta_pack(ralta_pl, false, lpm_tree->proto, lpm_tree->id);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
+}
+
+static int
+mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
+				  struct mlxsw_sp_prefix_usage *prefix_usage,
+				  struct mlxsw_sp_lpm_tree *lpm_tree)
+{
+	char ralst_pl[MLXSW_REG_RALST_LEN];
+	u8 root_bin = 0;
+	u8 prefix;
+	u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
+
+	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
+		root_bin = prefix;
+
+	mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
+	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
+		if (prefix == 0)
+			continue;
+		mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
+					 MLXSW_REG_RALST_BIN_NO_CHILD);
+		last_prefix = prefix;
+	}
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
+}
+
+static struct mlxsw_sp_lpm_tree *
+mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
+			 struct mlxsw_sp_prefix_usage *prefix_usage,
+			 enum mlxsw_sp_l3proto proto, bool one_reserved)
+{
+	struct mlxsw_sp_lpm_tree *lpm_tree;
+	int err;
+
+	lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp, one_reserved);
+	if (!lpm_tree)
+		return ERR_PTR(-EBUSY);
+	lpm_tree->proto = proto;
+	err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
+	if (err)
+		return ERR_PTR(err);
+
+	err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
+						lpm_tree);
+	if (err)
+		goto err_left_struct_set;
+	return lpm_tree;
+
+err_left_struct_set:
+	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
+	return ERR_PTR(err);
+}
+
+static int mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
+				     struct mlxsw_sp_lpm_tree *lpm_tree)
+{
+	return mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
+}
+
+static struct mlxsw_sp_lpm_tree *
+mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
+		      struct mlxsw_sp_prefix_usage *prefix_usage,
+		      enum mlxsw_sp_l3proto proto, bool one_reserved)
+{
+	struct mlxsw_sp_lpm_tree *lpm_tree;
+	int i;
+
+	for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) {
+		lpm_tree = &mlxsw_sp->router.lpm_trees[i];
+		if (lpm_tree->proto == proto &&
+		    mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
+					     prefix_usage))
+			goto inc_ref_count;
+	}
+	lpm_tree = mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage,
+					    proto, one_reserved);
+	if (IS_ERR(lpm_tree))
+		return lpm_tree;
+
+inc_ref_count:
+	lpm_tree->ref_count++;
+	return lpm_tree;
+}
+
+static int mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
+				 struct mlxsw_sp_lpm_tree *lpm_tree)
+{
+	if (--lpm_tree->ref_count == 0)
+		return mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
+	return 0;
+}
+
+static void mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
+{
+	struct mlxsw_sp_lpm_tree *lpm_tree;
+	int i;
+
+	for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) {
+		lpm_tree = &mlxsw_sp->router.lpm_trees[i];
+		lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
+	}
+}
+
+static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
+{
+	struct mlxsw_sp_vr *vr;
+	int i;
+
+	for (i = 0; i < MLXSW_SP_VIRTUAL_ROUTER_MAX; i++) {
+		vr = &mlxsw_sp->router.vrs[i];
+		if (!vr->used)
+			return vr;
+	}
+	return NULL;
+}
+
+static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
+				     struct mlxsw_sp_vr *vr)
+{
+	char raltb_pl[MLXSW_REG_RALTB_LEN];
+
+	mlxsw_reg_raltb_pack(raltb_pl, vr->id, vr->proto, vr->lpm_tree->id);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
+}
+
+static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
+				       struct mlxsw_sp_vr *vr)
+{
+	char raltb_pl[MLXSW_REG_RALTB_LEN];
+
+	/* Bind to tree 0 which is default */
+	mlxsw_reg_raltb_pack(raltb_pl, vr->id, vr->proto, 0);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
+}
+
+static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
+{
+	/* For our purpose, squash main and local table into one */
+	if (tb_id == RT_TABLE_LOCAL)
+		tb_id = RT_TABLE_MAIN;
+	return tb_id;
+}
+
+static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
+					    u32 tb_id,
+					    enum mlxsw_sp_l3proto proto)
+{
+	struct mlxsw_sp_vr *vr;
+	int i;
+
+	tb_id = mlxsw_sp_fix_tb_id(tb_id);
+	for (i = 0; i < MLXSW_SP_VIRTUAL_ROUTER_MAX; i++) {
+		vr = &mlxsw_sp->router.vrs[i];
+		if (vr->used && vr->proto == proto && vr->tb_id == tb_id)
+			return vr;
+	}
+	return NULL;
+}
+
+static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
+					      unsigned char prefix_len,
+					      u32 tb_id,
+					      enum mlxsw_sp_l3proto proto)
+{
+	struct mlxsw_sp_prefix_usage req_prefix_usage;
+	struct mlxsw_sp_lpm_tree *lpm_tree;
+	struct mlxsw_sp_vr *vr;
+	int err;
+
+	vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
+	if (!vr)
+		return ERR_PTR(-EBUSY);
+	vr->fib = mlxsw_sp_fib_create();
+	if (IS_ERR(vr->fib))
+		return ERR_CAST(vr->fib);
+
+	vr->proto = proto;
+	vr->tb_id = tb_id;
+	mlxsw_sp_prefix_usage_zero(&req_prefix_usage);
+	mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len);
+	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
+					 proto, true);
+	if (IS_ERR(lpm_tree)) {
+		err = PTR_ERR(lpm_tree);
+		goto err_tree_get;
+	}
+	vr->lpm_tree = lpm_tree;
+	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr);
+	if (err)
+		goto err_tree_bind;
+
+	vr->used = true;
+	return vr;
+
+err_tree_bind:
+	mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree);
+err_tree_get:
+	mlxsw_sp_fib_destroy(vr->fib);
+
+	return ERR_PTR(err);
+}
+
+static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
+				struct mlxsw_sp_vr *vr)
+{
+	mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr);
+	mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree);
+	mlxsw_sp_fib_destroy(vr->fib);
+	vr->used = false;
+}
+
+static int
+mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
+			   struct mlxsw_sp_prefix_usage *req_prefix_usage)
+{
+	struct mlxsw_sp_lpm_tree *lpm_tree;
+
+	if (mlxsw_sp_prefix_usage_eq(req_prefix_usage,
+				     &vr->lpm_tree->prefix_usage))
+		return 0;
+
+	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, req_prefix_usage,
+					 vr->proto, false);
+	if (IS_ERR(lpm_tree)) {
+		/* We failed to get a tree according to the required
+		 * prefix usage. However, the current tree might be still good
+		 * for us if our requirement is subset of the prefixes used
+		 * in the tree.
+		 */
+		if (mlxsw_sp_prefix_usage_subset(req_prefix_usage,
+						 &vr->lpm_tree->prefix_usage))
+			return 0;
+		return PTR_ERR(lpm_tree);
+	}
+
+	mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr);
+	mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree);
+	vr->lpm_tree = lpm_tree;
+	return mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr);
+}
+
+static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp,
+					   unsigned char prefix_len,
+					   u32 tb_id,
+					   enum mlxsw_sp_l3proto proto)
+{
+	struct mlxsw_sp_vr *vr;
+	int err;
+
+	tb_id = mlxsw_sp_fix_tb_id(tb_id);
+	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id, proto);
+	if (!vr) {
+		vr = mlxsw_sp_vr_create(mlxsw_sp, prefix_len, tb_id, proto);
+		if (IS_ERR(vr))
+			return vr;
+	} else {
+		struct mlxsw_sp_prefix_usage req_prefix_usage;
+
+		mlxsw_sp_prefix_usage_cpy(&req_prefix_usage,
+					  &vr->fib->prefix_usage);
+		mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len);
+		/* Need to replace LPM tree in case new prefix is required. */
+		err = mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr,
+						 &req_prefix_usage);
+		if (err)
+			return ERR_PTR(err);
+	}
+	return vr;
+}
+
+static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
+{
+	/* Destroy virtual router entity in case the associated FIB is empty
+	 * and allow it to be used for other tables in future. Otherwise,
+	 * check if some prefix usage did not disappear and change tree if
+	 * that is the case. Note that in case new, smaller tree cannot be
+	 * allocated, the original one will be kept being used.
+	 */
+	if (mlxsw_sp_prefix_usage_none(&vr->fib->prefix_usage))
+		mlxsw_sp_vr_destroy(mlxsw_sp, vr);
+	else
+		mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr,
+					   &vr->fib->prefix_usage);
+}
+
+static void mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
+{
+	struct mlxsw_sp_vr *vr;
+	int i;
+
+	for (i = 0; i < MLXSW_SP_VIRTUAL_ROUTER_MAX; i++) {
+		vr = &mlxsw_sp->router.vrs[i];
+		vr->id = i;
+	}
+}
+
+struct mlxsw_sp_neigh_key {
+	unsigned char addr[sizeof(struct in6_addr)];
+	struct net_device *dev;
+};
+
+struct mlxsw_sp_neigh_entry {
+	struct rhash_head ht_node;
+	struct mlxsw_sp_neigh_key key;
+	u16 rif;
+	struct neighbour *n;
+	bool offloaded;
+	struct delayed_work dw;
+	struct mlxsw_sp_port *mlxsw_sp_port;
+	unsigned char ha[ETH_ALEN];
+	struct list_head nexthop_list; /* list of nexthops using
+					* this neigh entry
+					*/
+	struct list_head nexthop_neighs_list_node;
+};
+
+static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
+	.key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
+	.head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
+	.key_len = sizeof(struct mlxsw_sp_neigh_key),
+};
+
+static int
+mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
+			    struct mlxsw_sp_neigh_entry *neigh_entry)
+{
+	return rhashtable_insert_fast(&mlxsw_sp->router.neigh_ht,
+				      &neigh_entry->ht_node,
+				      mlxsw_sp_neigh_ht_params);
+}
+
+static void
+mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
+			    struct mlxsw_sp_neigh_entry *neigh_entry)
+{
+	rhashtable_remove_fast(&mlxsw_sp->router.neigh_ht,
+			       &neigh_entry->ht_node,
+			       mlxsw_sp_neigh_ht_params);
+}
+
+static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work);
+
+static struct mlxsw_sp_neigh_entry *
+mlxsw_sp_neigh_entry_create(const void *addr, size_t addr_len,
+			    struct net_device *dev, u16 rif,
+			    struct neighbour *n)
+{
+	struct mlxsw_sp_neigh_entry *neigh_entry;
+
+	neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_ATOMIC);
+	if (!neigh_entry)
+		return NULL;
+	memcpy(neigh_entry->key.addr, addr, addr_len);
+	neigh_entry->key.dev = dev;
+	neigh_entry->rif = rif;
+	neigh_entry->n = n;
+	INIT_DELAYED_WORK(&neigh_entry->dw, mlxsw_sp_router_neigh_update_hw);
+	INIT_LIST_HEAD(&neigh_entry->nexthop_list);
+	return neigh_entry;
+}
+
+static void
+mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp_neigh_entry *neigh_entry)
+{
+	kfree(neigh_entry);
+}
+
+static struct mlxsw_sp_neigh_entry *
+mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, const void *addr,
+			    size_t addr_len, struct net_device *dev)
+{
+	struct mlxsw_sp_neigh_key key = {{ 0 } };
+
+	memcpy(key.addr, addr, addr_len);
+	key.dev = dev;
+	return rhashtable_lookup_fast(&mlxsw_sp->router.neigh_ht,
+				      &key, mlxsw_sp_neigh_ht_params);
+}
+
+int mlxsw_sp_router_neigh_construct(struct net_device *dev,
+				    struct neighbour *n)
+{
+	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	struct mlxsw_sp_neigh_entry *neigh_entry;
+	struct mlxsw_sp_rif *r;
+	u32 dip;
+	int err;
+
+	if (n->tbl != &arp_tbl)
+		return 0;
+
+	dip = ntohl(*((__be32 *) n->primary_key));
+	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &dip, sizeof(dip),
+						  n->dev);
+	if (neigh_entry) {
+		WARN_ON(neigh_entry->n != n);
+		return 0;
+	}
+
+	r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
+	if (WARN_ON(!r))
+		return -EINVAL;
+
+	neigh_entry = mlxsw_sp_neigh_entry_create(&dip, sizeof(dip), n->dev,
+						  r->rif, n);
+	if (!neigh_entry)
+		return -ENOMEM;
+	err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
+	if (err)
+		goto err_neigh_entry_insert;
+	return 0;
+
+err_neigh_entry_insert:
+	mlxsw_sp_neigh_entry_destroy(neigh_entry);
+	return err;
+}
+
+void mlxsw_sp_router_neigh_destroy(struct net_device *dev,
+				   struct neighbour *n)
+{
+	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	struct mlxsw_sp_neigh_entry *neigh_entry;
+	u32 dip;
+
+	if (n->tbl != &arp_tbl)
+		return;
+
+	dip = ntohl(*((__be32 *) n->primary_key));
+	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &dip, sizeof(dip),
+						  n->dev);
+	if (!neigh_entry)
+		return;
+	mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
+	mlxsw_sp_neigh_entry_destroy(neigh_entry);
+}
+
+static void
+mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
+{
+	unsigned long interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
+
+	mlxsw_sp->router.neighs_update.interval = jiffies_to_msecs(interval);
+}
+
+static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
+						   char *rauhtd_pl,
+						   int ent_index)
+{
+	struct net_device *dev;
+	struct neighbour *n;
+	__be32 dipn;
+	u32 dip;
+	u16 rif;
+
+	mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
+
+	if (!mlxsw_sp->rifs[rif]) {
+		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
+		return;
+	}
+
+	dipn = htonl(dip);
+	dev = mlxsw_sp->rifs[rif]->dev;
+	n = neigh_lookup(&arp_tbl, &dipn, dev);
+	if (!n) {
+		netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n",
+			   &dip);
+		return;
+	}
+
+	netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
+	neigh_event_send(n, NULL);
+	neigh_release(n);
+}
+
+static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
+						   char *rauhtd_pl,
+						   int rec_index)
+{
+	u8 num_entries;
+	int i;
+
+	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
+								rec_index);
+	/* Hardware starts counting at 0, so add 1. */
+	num_entries++;
+
+	/* Each record consists of several neighbour entries. */
+	for (i = 0; i < num_entries; i++) {
+		int ent_index;
+
+		ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
+		mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
+						       ent_index);
+	}
+
+}
+
+static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
+					      char *rauhtd_pl, int rec_index)
+{
+	switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
+	case MLXSW_REG_RAUHTD_TYPE_IPV4:
+		mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
+						       rec_index);
+		break;
+	case MLXSW_REG_RAUHTD_TYPE_IPV6:
+		WARN_ON_ONCE(1);
+		break;
+	}
+}
+
+static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
+{
+	char *rauhtd_pl;
+	u8 num_rec;
+	int i, err;
+
+	rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
+	if (!rauhtd_pl)
+		return -ENOMEM;
+
+	/* Make sure the neighbour's netdev isn't removed in the
+	 * process.
+	 */
+	rtnl_lock();
+	do {
+		mlxsw_reg_rauhtd_pack(rauhtd_pl, MLXSW_REG_RAUHTD_TYPE_IPV4);
+		err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
+				      rauhtd_pl);
+		if (err) {
+			dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour talbe\n");
+			break;
+		}
+		num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
+		for (i = 0; i < num_rec; i++)
+			mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
+							  i);
+	} while (num_rec);
+	rtnl_unlock();
+
+	kfree(rauhtd_pl);
+	return err;
+}
+
+static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
+{
+	struct mlxsw_sp_neigh_entry *neigh_entry;
+
+	/* Take RTNL mutex here to prevent lists from changes */
+	rtnl_lock();
+	list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list,
+			    nexthop_neighs_list_node) {
+		/* If this neigh have nexthops, make the kernel think this neigh
+		 * is active regardless of the traffic.
+		 */
+		if (!list_empty(&neigh_entry->nexthop_list))
+			neigh_event_send(neigh_entry->n, NULL);
+	}
+	rtnl_unlock();
+}
+
+static void
+mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
+{
+	unsigned long interval = mlxsw_sp->router.neighs_update.interval;
+
+	mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw,
+			       msecs_to_jiffies(interval));
+}
+
+static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
+{
+	struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp,
+						 router.neighs_update.dw.work);
+	int err;
+
+	err = mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp);
+	if (err)
+		dev_err(mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
+
+	mlxsw_sp_router_neighs_update_nh(mlxsw_sp);
+
+	mlxsw_sp_router_neighs_update_work_schedule(mlxsw_sp);
+}
+
+static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
+{
+	struct mlxsw_sp_neigh_entry *neigh_entry;
+	struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp,
+						 router.nexthop_probe_dw.work);
+
+	/* Iterate over nexthop neighbours, find those who are unresolved and
+	 * send arp on them. This solves the chicken-egg problem when
+	 * the nexthop wouldn't get offloaded until the neighbor is resolved
+	 * but it wouldn't get resolved ever in case traffic is flowing in HW
+	 * using different nexthop.
+	 *
+	 * Take RTNL mutex here to prevent lists from changes.
+	 */
+	rtnl_lock();
+	list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list,
+			    nexthop_neighs_list_node) {
+		if (!(neigh_entry->n->nud_state & NUD_VALID) &&
+		    !list_empty(&neigh_entry->nexthop_list))
+			neigh_event_send(neigh_entry->n, NULL);
+	}
+	rtnl_unlock();
+
+	mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw,
+			       MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
+}
+
+static void
+mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
+			      struct mlxsw_sp_neigh_entry *neigh_entry,
+			      bool removing);
+
+static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work)
+{
+	struct mlxsw_sp_neigh_entry *neigh_entry =
+		container_of(work, struct mlxsw_sp_neigh_entry, dw.work);
+	struct neighbour *n = neigh_entry->n;
+	struct mlxsw_sp_port *mlxsw_sp_port = neigh_entry->mlxsw_sp_port;
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	char rauht_pl[MLXSW_REG_RAUHT_LEN];
+	struct net_device *dev;
+	bool entry_connected;
+	u8 nud_state;
+	bool updating;
+	bool removing;
+	bool adding;
+	u32 dip;
+	int err;
+
+	read_lock_bh(&n->lock);
+	dip = ntohl(*((__be32 *) n->primary_key));
+	memcpy(neigh_entry->ha, n->ha, sizeof(neigh_entry->ha));
+	nud_state = n->nud_state;
+	dev = n->dev;
+	read_unlock_bh(&n->lock);
+
+	entry_connected = nud_state & NUD_VALID;
+	adding = (!neigh_entry->offloaded) && entry_connected;
+	updating = neigh_entry->offloaded && entry_connected;
+	removing = neigh_entry->offloaded && !entry_connected;
+
+	if (adding || updating) {
+		mlxsw_reg_rauht_pack4(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_ADD,
+				      neigh_entry->rif,
+				      neigh_entry->ha, dip);
+		err = mlxsw_reg_write(mlxsw_sp->core,
+				      MLXSW_REG(rauht), rauht_pl);
+		if (err) {
+			netdev_err(dev, "Could not add neigh %pI4h\n", &dip);
+			neigh_entry->offloaded = false;
+		} else {
+			neigh_entry->offloaded = true;
+		}
+		mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, false);
+	} else if (removing) {
+		mlxsw_reg_rauht_pack4(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE,
+				      neigh_entry->rif,
+				      neigh_entry->ha, dip);
+		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht),
+				      rauht_pl);
+		if (err) {
+			netdev_err(dev, "Could not delete neigh %pI4h\n", &dip);
+			neigh_entry->offloaded = true;
+		} else {
+			neigh_entry->offloaded = false;
+		}
+		mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, true);
+	}
+
+	neigh_release(n);
+	mlxsw_sp_port_dev_put(mlxsw_sp_port);
+}
+
+static int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
+					  unsigned long event, void *ptr)
+{
+	struct mlxsw_sp_neigh_entry *neigh_entry;
+	struct mlxsw_sp_port *mlxsw_sp_port;
+	struct mlxsw_sp *mlxsw_sp;
+	unsigned long interval;
+	struct net_device *dev;
+	struct neigh_parms *p;
+	struct neighbour *n;
+	u32 dip;
+
+	switch (event) {
+	case NETEVENT_DELAY_PROBE_TIME_UPDATE:
+		p = ptr;
+
+		/* We don't care about changes in the default table. */
+		if (!p->dev || p->tbl != &arp_tbl)
+			return NOTIFY_DONE;
+
+		/* We are in atomic context and can't take RTNL mutex,
+		 * so use RCU variant to walk the device chain.
+		 */
+		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
+		if (!mlxsw_sp_port)
+			return NOTIFY_DONE;
+
+		mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+		interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
+		mlxsw_sp->router.neighs_update.interval = interval;
+
+		mlxsw_sp_port_dev_put(mlxsw_sp_port);
+		break;
+	case NETEVENT_NEIGH_UPDATE:
+		n = ptr;
+		dev = n->dev;
+
+		if (n->tbl != &arp_tbl)
+			return NOTIFY_DONE;
+
+		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(dev);
+		if (!mlxsw_sp_port)
+			return NOTIFY_DONE;
+
+		mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+		dip = ntohl(*((__be32 *) n->primary_key));
+		neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp,
+							  &dip,
+							  sizeof(__be32),
+							  dev);
+		if (WARN_ON(!neigh_entry) || WARN_ON(neigh_entry->n != n)) {
+			mlxsw_sp_port_dev_put(mlxsw_sp_port);
+			return NOTIFY_DONE;
+		}
+		neigh_entry->mlxsw_sp_port = mlxsw_sp_port;
+
+		/* Take a reference to ensure the neighbour won't be
+		 * destructed until we drop the reference in delayed
+		 * work.
+		 */
+		neigh_clone(n);
+		if (!mlxsw_core_schedule_dw(&neigh_entry->dw, 0)) {
+			neigh_release(n);
+			mlxsw_sp_port_dev_put(mlxsw_sp_port);
+		}
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block mlxsw_sp_router_netevent_nb __read_mostly = {
+	.notifier_call = mlxsw_sp_router_netevent_event,
+};
+
+static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
+{
+	int err;
+
+	err = rhashtable_init(&mlxsw_sp->router.neigh_ht,
+			      &mlxsw_sp_neigh_ht_params);
+	if (err)
+		return err;
+
+	/* Initialize the polling interval according to the default
+	 * table.
+	 */
+	mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
+
+	err = register_netevent_notifier(&mlxsw_sp_router_netevent_nb);
+	if (err)
+		goto err_register_netevent_notifier;
+
+	/* Create the delayed works for the activity_update */
+	INIT_DELAYED_WORK(&mlxsw_sp->router.neighs_update.dw,
+			  mlxsw_sp_router_neighs_update_work);
+	INIT_DELAYED_WORK(&mlxsw_sp->router.nexthop_probe_dw,
+			  mlxsw_sp_router_probe_unresolved_nexthops);
+	mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw, 0);
+	mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw, 0);
+	return 0;
+
+err_register_netevent_notifier:
+	rhashtable_destroy(&mlxsw_sp->router.neigh_ht);
+	return err;
+}
+
+static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
+{
+	cancel_delayed_work_sync(&mlxsw_sp->router.neighs_update.dw);
+	cancel_delayed_work_sync(&mlxsw_sp->router.nexthop_probe_dw);
+	unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb);
+	rhashtable_destroy(&mlxsw_sp->router.neigh_ht);
+}
+
+struct mlxsw_sp_nexthop {
+	struct list_head neigh_list_node; /* member of neigh entry list */
+	struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
+						* this belongs to
+						*/
+	u8 should_offload:1, /* set indicates this neigh is connected and
+			      * should be put to KVD linear area of this group.
+			      */
+	   offloaded:1, /* set in case the neigh is actually put into
+			 * KVD linear area of this group.
+			 */
+	   update:1; /* set indicates that MAC of this neigh should be
+		      * updated in HW
+		      */
+	struct mlxsw_sp_neigh_entry *neigh_entry;
+};
+
+struct mlxsw_sp_nexthop_group {
+	struct list_head list; /* node in mlxsw->router.nexthop_group_list */
+	struct list_head fib_list; /* list of fib entries that use this group */
+	u8 adj_index_valid:1;
+	u32 adj_index;
+	u16 ecmp_size;
+	u16 count;
+	struct mlxsw_sp_nexthop nexthops[0];
+};
+
+static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
+					     struct mlxsw_sp_vr *vr,
+					     u32 adj_index, u16 ecmp_size,
+					     u32 new_adj_index,
+					     u16 new_ecmp_size)
+{
+	char raleu_pl[MLXSW_REG_RALEU_LEN];
+
+	mlxsw_reg_raleu_pack(raleu_pl, vr->proto, vr->id,
+			     adj_index, ecmp_size,
+			     new_adj_index, new_ecmp_size);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
+}
+
+static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
+					  struct mlxsw_sp_nexthop_group *nh_grp,
+					  u32 old_adj_index, u16 old_ecmp_size)
+{
+	struct mlxsw_sp_fib_entry *fib_entry;
+	struct mlxsw_sp_vr *vr = NULL;
+	int err;
+
+	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
+		if (vr == fib_entry->vr)
+			continue;
+		vr = fib_entry->vr;
+		err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, vr,
+							old_adj_index,
+							old_ecmp_size,
+							nh_grp->adj_index,
+							nh_grp->ecmp_size);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
+				       struct mlxsw_sp_nexthop *nh)
+{
+	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
+	char ratr_pl[MLXSW_REG_RATR_LEN];
+
+	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
+			    true, adj_index, neigh_entry->rif);
+	mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
+}
+
+static int
+mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp,
+				  struct mlxsw_sp_nexthop_group *nh_grp)
+{
+	u32 adj_index = nh_grp->adj_index; /* base */
+	struct mlxsw_sp_nexthop *nh;
+	int i;
+	int err;
+
+	for (i = 0; i < nh_grp->count; i++) {
+		nh = &nh_grp->nexthops[i];
+
+		if (!nh->should_offload) {
+			nh->offloaded = 0;
+			continue;
+		}
+
+		if (nh->update) {
+			err = mlxsw_sp_nexthop_mac_update(mlxsw_sp,
+							  adj_index, nh);
+			if (err)
+				return err;
+			nh->update = 0;
+			nh->offloaded = 1;
+		}
+		adj_index++;
+	}
+	return 0;
+}
+
+static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
+				     struct mlxsw_sp_fib_entry *fib_entry);
+
+static int
+mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
+				    struct mlxsw_sp_nexthop_group *nh_grp)
+{
+	struct mlxsw_sp_fib_entry *fib_entry;
+	int err;
+
+	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
+		err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+static void
+mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
+			       struct mlxsw_sp_nexthop_group *nh_grp)
+{
+	struct mlxsw_sp_nexthop *nh;
+	bool offload_change = false;
+	u32 adj_index;
+	u16 ecmp_size = 0;
+	bool old_adj_index_valid;
+	u32 old_adj_index;
+	u16 old_ecmp_size;
+	int ret;
+	int i;
+	int err;
+
+	for (i = 0; i < nh_grp->count; i++) {
+		nh = &nh_grp->nexthops[i];
+
+		if (nh->should_offload ^ nh->offloaded) {
+			offload_change = true;
+			if (nh->should_offload)
+				nh->update = 1;
+		}
+		if (nh->should_offload)
+			ecmp_size++;
+	}
+	if (!offload_change) {
+		/* Nothing was added or removed, so no need to reallocate. Just
+		 * update MAC on existing adjacency indexes.
+		 */
+		err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp);
+		if (err) {
+			dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
+			goto set_trap;
+		}
+		return;
+	}
+	if (!ecmp_size)
+		/* No neigh of this group is connected so we just set
+		 * the trap and let everthing flow through kernel.
+		 */
+		goto set_trap;
+
+	ret = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size);
+	if (ret < 0) {
+		/* We ran out of KVD linear space, just set the
+		 * trap and let everything flow through kernel.
+		 */
+		dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
+		goto set_trap;
+	}
+	adj_index = ret;
+	old_adj_index_valid = nh_grp->adj_index_valid;
+	old_adj_index = nh_grp->adj_index;
+	old_ecmp_size = nh_grp->ecmp_size;
+	nh_grp->adj_index_valid = 1;
+	nh_grp->adj_index = adj_index;
+	nh_grp->ecmp_size = ecmp_size;
+	err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp);
+	if (err) {
+		dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
+		goto set_trap;
+	}
+
+	if (!old_adj_index_valid) {
+		/* The trap was set for fib entries, so we have to call
+		 * fib entry update to unset it and use adjacency index.
+		 */
+		err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
+		if (err) {
+			dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
+			goto set_trap;
+		}
+		return;
+	}
+
+	err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
+					     old_adj_index, old_ecmp_size);
+	mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index);
+	if (err) {
+		dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
+		goto set_trap;
+	}
+	return;
+
+set_trap:
+	old_adj_index_valid = nh_grp->adj_index_valid;
+	nh_grp->adj_index_valid = 0;
+	for (i = 0; i < nh_grp->count; i++) {
+		nh = &nh_grp->nexthops[i];
+		nh->offloaded = 0;
+	}
+	err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
+	if (err)
+		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
+	if (old_adj_index_valid)
+		mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index);
+}
+
+static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
+					    bool removing)
+{
+	if (!removing && !nh->should_offload)
+		nh->should_offload = 1;
+	else if (removing && nh->offloaded)
+		nh->should_offload = 0;
+	nh->update = 1;
+}
+
+static void
+mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
+			      struct mlxsw_sp_neigh_entry *neigh_entry,
+			      bool removing)
+{
+	struct mlxsw_sp_nexthop *nh;
+
+	/* Take RTNL mutex here to prevent lists from changes */
+	rtnl_lock();
+	list_for_each_entry(nh, &neigh_entry->nexthop_list,
+			    neigh_list_node) {
+		__mlxsw_sp_nexthop_neigh_update(nh, removing);
+		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
+	}
+	rtnl_unlock();
+}
+
+static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp,
+				 struct mlxsw_sp_nexthop_group *nh_grp,
+				 struct mlxsw_sp_nexthop *nh,
+				 struct fib_nh *fib_nh)
+{
+	struct mlxsw_sp_neigh_entry *neigh_entry;
+	u32 gwip = ntohl(fib_nh->nh_gw);
+	struct net_device *dev = fib_nh->nh_dev;
+	struct neighbour *n;
+	u8 nud_state;
+
+	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &gwip,
+						  sizeof(gwip), dev);
+	if (!neigh_entry) {
+		__be32 gwipn = htonl(gwip);
+
+		n = neigh_create(&arp_tbl, &gwipn, dev);
+		if (IS_ERR(n))
+			return PTR_ERR(n);
+		neigh_event_send(n, NULL);
+		neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &gwip,
+							  sizeof(gwip), dev);
+		if (!neigh_entry) {
+			neigh_release(n);
+			return -EINVAL;
+		}
+	} else {
+		/* Take a reference of neigh here ensuring that neigh would
+		 * not be detructed before the nexthop entry is finished.
+		 * The second branch takes the reference in neith_create()
+		 */
+		n = neigh_entry->n;
+		neigh_clone(n);
+	}
+
+	/* If that is the first nexthop connected to that neigh, add to
+	 * nexthop_neighs_list
+	 */
+	if (list_empty(&neigh_entry->nexthop_list))
+		list_add_tail(&neigh_entry->nexthop_neighs_list_node,
+			      &mlxsw_sp->router.nexthop_neighs_list);
+
+	nh->nh_grp = nh_grp;
+	nh->neigh_entry = neigh_entry;
+	list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
+	read_lock_bh(&n->lock);
+	nud_state = n->nud_state;
+	read_unlock_bh(&n->lock);
+	__mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID));
+
+	return 0;
+}
+
+static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp,
+				  struct mlxsw_sp_nexthop *nh)
+{
+	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
+
+	list_del(&nh->neigh_list_node);
+
+	/* If that is the last nexthop connected to that neigh, remove from
+	 * nexthop_neighs_list
+	 */
+	if (list_empty(&nh->neigh_entry->nexthop_list))
+		list_del(&nh->neigh_entry->nexthop_neighs_list_node);
+
+	neigh_release(neigh_entry->n);
+}
+
+static struct mlxsw_sp_nexthop_group *
+mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
+{
+	struct mlxsw_sp_nexthop_group *nh_grp;
+	struct mlxsw_sp_nexthop *nh;
+	struct fib_nh *fib_nh;
+	size_t alloc_size;
+	int i;
+	int err;
+
+	alloc_size = sizeof(*nh_grp) +
+		     fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
+	nh_grp = kzalloc(alloc_size, GFP_KERNEL);
+	if (!nh_grp)
+		return ERR_PTR(-ENOMEM);
+	INIT_LIST_HEAD(&nh_grp->fib_list);
+	nh_grp->count = fi->fib_nhs;
+	for (i = 0; i < nh_grp->count; i++) {
+		nh = &nh_grp->nexthops[i];
+		fib_nh = &fi->fib_nh[i];
+		err = mlxsw_sp_nexthop_init(mlxsw_sp, nh_grp, nh, fib_nh);
+		if (err)
+			goto err_nexthop_init;
+	}
+	list_add_tail(&nh_grp->list, &mlxsw_sp->router.nexthop_group_list);
+	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
+	return nh_grp;
+
+err_nexthop_init:
+	for (i--; i >= 0; i--)
+		mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
+	kfree(nh_grp);
+	return ERR_PTR(err);
+}
+
+static void
+mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp,
+			       struct mlxsw_sp_nexthop_group *nh_grp)
+{
+	struct mlxsw_sp_nexthop *nh;
+	int i;
+
+	list_del(&nh_grp->list);
+	for (i = 0; i < nh_grp->count; i++) {
+		nh = &nh_grp->nexthops[i];
+		mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
+	}
+	kfree(nh_grp);
+}
+
+static bool mlxsw_sp_nexthop_match(struct mlxsw_sp_nexthop *nh,
+				   struct fib_info *fi)
+{
+	int i;
+
+	for (i = 0; i < fi->fib_nhs; i++) {
+		struct fib_nh *fib_nh = &fi->fib_nh[i];
+		u32 gwip = ntohl(fib_nh->nh_gw);
+
+		if (memcmp(nh->neigh_entry->key.addr,
+			   &gwip, sizeof(u32)) == 0 &&
+		    nh->neigh_entry->key.dev == fib_nh->nh_dev)
+			return true;
+	}
+	return false;
+}
+
+static bool mlxsw_sp_nexthop_group_match(struct mlxsw_sp_nexthop_group *nh_grp,
+					 struct fib_info *fi)
+{
+	int i;
+
+	if (nh_grp->count != fi->fib_nhs)
+		return false;
+	for (i = 0; i < nh_grp->count; i++) {
+		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
+
+		if (!mlxsw_sp_nexthop_match(nh, fi))
+			return false;
+	}
+	return true;
+}
+
+static struct mlxsw_sp_nexthop_group *
+mlxsw_sp_nexthop_group_find(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
+{
+	struct mlxsw_sp_nexthop_group *nh_grp;
+
+	list_for_each_entry(nh_grp, &mlxsw_sp->router.nexthop_group_list,
+			    list) {
+		if (mlxsw_sp_nexthop_group_match(nh_grp, fi))
+			return nh_grp;
+	}
+	return NULL;
+}
+
+static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp,
+				      struct mlxsw_sp_fib_entry *fib_entry,
+				      struct fib_info *fi)
+{
+	struct mlxsw_sp_nexthop_group *nh_grp;
+
+	nh_grp = mlxsw_sp_nexthop_group_find(mlxsw_sp, fi);
+	if (!nh_grp) {
+		nh_grp = mlxsw_sp_nexthop_group_create(mlxsw_sp, fi);
+		if (IS_ERR(nh_grp))
+			return PTR_ERR(nh_grp);
+	}
+	list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
+	fib_entry->nh_group = nh_grp;
+	return 0;
+}
+
+static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp,
+				       struct mlxsw_sp_fib_entry *fib_entry)
+{
+	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
+
+	list_del(&fib_entry->nexthop_group_node);
+	if (!list_empty(&nh_grp->fib_list))
+		return;
+	mlxsw_sp_nexthop_group_destroy(mlxsw_sp, nh_grp);
+}
+
+static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
+{
+	char rgcr_pl[MLXSW_REG_RGCR_LEN];
+
+	mlxsw_reg_rgcr_pack(rgcr_pl, true);
+	mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, MLXSW_SP_RIF_MAX);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
+}
+
+static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
+{
+	char rgcr_pl[MLXSW_REG_RGCR_LEN];
+
+	mlxsw_reg_rgcr_pack(rgcr_pl, false);
+	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
+}
+
+int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
+{
+	int err;
+
+	INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_neighs_list);
+	INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_group_list);
+	err = __mlxsw_sp_router_init(mlxsw_sp);
+	if (err)
+		return err;
+	mlxsw_sp_lpm_init(mlxsw_sp);
+	mlxsw_sp_vrs_init(mlxsw_sp);
+	return mlxsw_sp_neigh_init(mlxsw_sp);
+}
+
+void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
+{
+	mlxsw_sp_neigh_fini(mlxsw_sp);
+	__mlxsw_sp_router_fini(mlxsw_sp);
+}
+
+static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp,
+					 struct mlxsw_sp_fib_entry *fib_entry,
+					 enum mlxsw_reg_ralue_op op)
+{
+	char ralue_pl[MLXSW_REG_RALUE_LEN];
+	u32 *p_dip = (u32 *) fib_entry->key.addr;
+	struct mlxsw_sp_vr *vr = fib_entry->vr;
+	enum mlxsw_reg_ralue_trap_action trap_action;
+	u16 trap_id = 0;
+	u32 adjacency_index = 0;
+	u16 ecmp_size = 0;
+
+	/* In case the nexthop group adjacency index is valid, use it
+	 * with provided ECMP size. Otherwise, setup trap and pass
+	 * traffic to kernel.
+	 */
+	if (fib_entry->nh_group->adj_index_valid) {
+		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
+		adjacency_index = fib_entry->nh_group->adj_index;
+		ecmp_size = fib_entry->nh_group->ecmp_size;
+	} else {
+		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
+		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
+	}
+
+	mlxsw_reg_ralue_pack4(ralue_pl, vr->proto, op, vr->id,
+			      fib_entry->key.prefix_len, *p_dip);
+	mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
+					adjacency_index, ecmp_size);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
+}
+
+static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp,
+					struct mlxsw_sp_fib_entry *fib_entry,
+					enum mlxsw_reg_ralue_op op)
+{
+	char ralue_pl[MLXSW_REG_RALUE_LEN];
+	u32 *p_dip = (u32 *) fib_entry->key.addr;
+	struct mlxsw_sp_vr *vr = fib_entry->vr;
+
+	mlxsw_reg_ralue_pack4(ralue_pl, vr->proto, op, vr->id,
+			      fib_entry->key.prefix_len, *p_dip);
+	mlxsw_reg_ralue_act_local_pack(ralue_pl,
+				       MLXSW_REG_RALUE_TRAP_ACTION_NOP, 0,
+				       fib_entry->rif);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
+}
+
+static int mlxsw_sp_fib_entry_op4_trap(struct mlxsw_sp *mlxsw_sp,
+				       struct mlxsw_sp_fib_entry *fib_entry,
+				       enum mlxsw_reg_ralue_op op)
+{
+	char ralue_pl[MLXSW_REG_RALUE_LEN];
+	u32 *p_dip = (u32 *) fib_entry->key.addr;
+	struct mlxsw_sp_vr *vr = fib_entry->vr;
+
+	mlxsw_reg_ralue_pack4(ralue_pl, vr->proto, op, vr->id,
+			      fib_entry->key.prefix_len, *p_dip);
+	mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
+}
+
+static int mlxsw_sp_fib_entry_op4(struct mlxsw_sp *mlxsw_sp,
+				  struct mlxsw_sp_fib_entry *fib_entry,
+				  enum mlxsw_reg_ralue_op op)
+{
+	switch (fib_entry->type) {
+	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
+		return mlxsw_sp_fib_entry_op4_remote(mlxsw_sp, fib_entry, op);
+	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
+		return mlxsw_sp_fib_entry_op4_local(mlxsw_sp, fib_entry, op);
+	case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
+		return mlxsw_sp_fib_entry_op4_trap(mlxsw_sp, fib_entry, op);
+	}
+	return -EINVAL;
+}
+
+static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
+				 struct mlxsw_sp_fib_entry *fib_entry,
+				 enum mlxsw_reg_ralue_op op)
+{
+	switch (fib_entry->vr->proto) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		return mlxsw_sp_fib_entry_op4(mlxsw_sp, fib_entry, op);
+	case MLXSW_SP_L3_PROTO_IPV6:
+		return -EINVAL;
+	}
+	return -EINVAL;
+}
+
+static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
+				     struct mlxsw_sp_fib_entry *fib_entry)
+{
+	enum mlxsw_reg_ralue_op op;
+
+	op = !fib_entry->added ? MLXSW_REG_RALUE_OP_WRITE_WRITE :
+				 MLXSW_REG_RALUE_OP_WRITE_UPDATE;
+	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
+}
+
+static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
+				  struct mlxsw_sp_fib_entry *fib_entry)
+{
+	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
+				     MLXSW_REG_RALUE_OP_WRITE_DELETE);
+}
+
+struct mlxsw_sp_router_fib4_add_info {
+	struct switchdev_trans_item tritem;
+	struct mlxsw_sp *mlxsw_sp;
+	struct mlxsw_sp_fib_entry *fib_entry;
+};
+
+static void mlxsw_sp_router_fib4_add_info_destroy(void const *data)
+{
+	const struct mlxsw_sp_router_fib4_add_info *info = data;
+	struct mlxsw_sp_fib_entry *fib_entry = info->fib_entry;
+	struct mlxsw_sp *mlxsw_sp = info->mlxsw_sp;
+
+	mlxsw_sp_fib_entry_destroy(fib_entry);
+	mlxsw_sp_vr_put(mlxsw_sp, fib_entry->vr);
+	kfree(info);
+}
+
+static int
+mlxsw_sp_router_fib4_entry_init(struct mlxsw_sp *mlxsw_sp,
+				const struct switchdev_obj_ipv4_fib *fib4,
+				struct mlxsw_sp_fib_entry *fib_entry)
+{
+	struct fib_info *fi = fib4->fi;
+
+	if (fib4->type == RTN_LOCAL || fib4->type == RTN_BROADCAST) {
+		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
+		return 0;
+	}
+	if (fib4->type != RTN_UNICAST)
+		return -EINVAL;
+
+	if (fi->fib_scope != RT_SCOPE_UNIVERSE) {
+		struct mlxsw_sp_rif *r;
+
+		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
+		r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, fi->fib_dev);
+		if (!r)
+			return -EINVAL;
+		fib_entry->rif = r->rif;
+		return 0;
+	}
+	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
+	return mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fi);
+}
+
+static void
+mlxsw_sp_router_fib4_entry_fini(struct mlxsw_sp *mlxsw_sp,
+				struct mlxsw_sp_fib_entry *fib_entry)
+{
+	if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_REMOTE)
+		return;
+	mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry);
+}
+
+static int
+mlxsw_sp_router_fib4_add_prepare(struct mlxsw_sp_port *mlxsw_sp_port,
+				 const struct switchdev_obj_ipv4_fib *fib4,
+				 struct switchdev_trans *trans)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	struct mlxsw_sp_router_fib4_add_info *info;
+	struct mlxsw_sp_fib_entry *fib_entry;
+	struct mlxsw_sp_vr *vr;
+	int err;
+
+	vr = mlxsw_sp_vr_get(mlxsw_sp, fib4->dst_len, fib4->tb_id,
+			     MLXSW_SP_L3_PROTO_IPV4);
+	if (IS_ERR(vr))
+		return PTR_ERR(vr);
+
+	fib_entry = mlxsw_sp_fib_entry_create(vr->fib, &fib4->dst,
+					      sizeof(fib4->dst), fib4->dst_len);
+	if (!fib_entry) {
+		err = -ENOMEM;
+		goto err_fib_entry_create;
+	}
+	fib_entry->vr = vr;
+
+	err = mlxsw_sp_router_fib4_entry_init(mlxsw_sp, fib4, fib_entry);
+	if (err)
+		goto err_fib4_entry_init;
+
+	info = kmalloc(sizeof(*info), GFP_KERNEL);
+	if (!info) {
+		err = -ENOMEM;
+		goto err_alloc_info;
+	}
+	info->mlxsw_sp = mlxsw_sp;
+	info->fib_entry = fib_entry;
+	switchdev_trans_item_enqueue(trans, info,
+				     mlxsw_sp_router_fib4_add_info_destroy,
+				     &info->tritem);
+	return 0;
+
+err_alloc_info:
+	mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry);
+err_fib4_entry_init:
+	mlxsw_sp_fib_entry_destroy(fib_entry);
+err_fib_entry_create:
+	mlxsw_sp_vr_put(mlxsw_sp, vr);
+	return err;
+}
+
+static int
+mlxsw_sp_router_fib4_add_commit(struct mlxsw_sp_port *mlxsw_sp_port,
+				const struct switchdev_obj_ipv4_fib *fib4,
+				struct switchdev_trans *trans)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	struct mlxsw_sp_router_fib4_add_info *info;
+	struct mlxsw_sp_fib_entry *fib_entry;
+	struct mlxsw_sp_vr *vr;
+	int err;
+
+	info = switchdev_trans_item_dequeue(trans);
+	fib_entry = info->fib_entry;
+	kfree(info);
+
+	vr = fib_entry->vr;
+	err = mlxsw_sp_fib_entry_insert(fib_entry->vr->fib, fib_entry);
+	if (err)
+		goto err_fib_entry_insert;
+	err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
+	if (err)
+		goto err_fib_entry_add;
+	return 0;
+
+err_fib_entry_add:
+	mlxsw_sp_fib_entry_remove(vr->fib, fib_entry);
+err_fib_entry_insert:
+	mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry);
+	mlxsw_sp_fib_entry_destroy(fib_entry);
+	mlxsw_sp_vr_put(mlxsw_sp, vr);
+	return err;
+}
+
+int mlxsw_sp_router_fib4_add(struct mlxsw_sp_port *mlxsw_sp_port,
+			     const struct switchdev_obj_ipv4_fib *fib4,
+			     struct switchdev_trans *trans)
+{
+	if (switchdev_trans_ph_prepare(trans))
+		return mlxsw_sp_router_fib4_add_prepare(mlxsw_sp_port,
+							fib4, trans);
+	return mlxsw_sp_router_fib4_add_commit(mlxsw_sp_port,
+					       fib4, trans);
+}
+
+int mlxsw_sp_router_fib4_del(struct mlxsw_sp_port *mlxsw_sp_port,
+			     const struct switchdev_obj_ipv4_fib *fib4)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	struct mlxsw_sp_fib_entry *fib_entry;
+	struct mlxsw_sp_vr *vr;
+
+	vr = mlxsw_sp_vr_find(mlxsw_sp, fib4->tb_id, MLXSW_SP_L3_PROTO_IPV4);
+	if (!vr) {
+		dev_warn(mlxsw_sp->bus_info->dev, "Failed to find virtual router for FIB4 entry being removed.\n");
+		return -ENOENT;
+	}
+	fib_entry = mlxsw_sp_fib_entry_lookup(vr->fib, &fib4->dst,
+					      sizeof(fib4->dst), fib4->dst_len);
+	if (!fib_entry) {
+		dev_warn(mlxsw_sp->bus_info->dev, "Failed to find FIB4 entry being removed.\n");
+		return PTR_ERR(vr);
+	}
+	mlxsw_sp_fib_entry_del(mlxsw_sp_port->mlxsw_sp, fib_entry);
+	mlxsw_sp_fib_entry_remove(vr->fib, fib_entry);
+	mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry);
+	mlxsw_sp_fib_entry_destroy(fib_entry);
+	mlxsw_sp_vr_put(mlxsw_sp, vr);
+	return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index a0c7376ee517..a1ad5e6bdfa8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -166,11 +166,6 @@ static int mlxsw_sp_port_attr_stp_state_set(struct mlxsw_sp_port *mlxsw_sp_port,
 	return mlxsw_sp_port_stp_state_set(mlxsw_sp_port, state);
 }
 
-static bool mlxsw_sp_vfid_is_vport_br(u16 vfid)
-{
-	return vfid >= MLXSW_SP_VFID_PORT_MAX;
-}
-
 static int __mlxsw_sp_port_flood_set(struct mlxsw_sp_port *mlxsw_sp_port,
 				     u16 idx_begin, u16 idx_end, bool set,
 				     bool only_uc)
@@ -182,15 +177,10 @@ static int __mlxsw_sp_port_flood_set(struct mlxsw_sp_port *mlxsw_sp_port,
 	char *sftr_pl;
 	int err;
 
-	if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) {
+	if (mlxsw_sp_port_is_vport(mlxsw_sp_port))
 		table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID;
-		if (mlxsw_sp_vfid_is_vport_br(idx_begin))
-			local_port = mlxsw_sp_port->local_port;
-		else
-			local_port = MLXSW_PORT_CPU_PORT;
-	} else {
+	else
 		table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFEST;
-	}
 
 	sftr_pl = kmalloc(MLXSW_REG_SFTR_LEN, GFP_KERNEL);
 	if (!sftr_pl)
@@ -384,18 +374,6 @@ static int mlxsw_sp_port_attr_set(struct net_device *dev,
 	return err;
 }
 
-static struct mlxsw_sp_fid *mlxsw_sp_fid_find(struct mlxsw_sp *mlxsw_sp,
-					      u16 fid)
-{
-	struct mlxsw_sp_fid *f;
-
-	list_for_each_entry(f, &mlxsw_sp->fids, list)
-		if (f->fid == fid)
-			return f;
-
-	return NULL;
-}
-
 static int mlxsw_sp_fid_op(struct mlxsw_sp *mlxsw_sp, u16 fid, bool create)
 {
 	char sfmr_pl[MLXSW_REG_SFMR_LEN];
@@ -426,8 +404,7 @@ static struct mlxsw_sp_fid *mlxsw_sp_fid_alloc(u16 fid)
 	return f;
 }
 
-static struct mlxsw_sp_fid *mlxsw_sp_fid_create(struct mlxsw_sp *mlxsw_sp,
-						u16 fid)
+struct mlxsw_sp_fid *mlxsw_sp_fid_create(struct mlxsw_sp *mlxsw_sp, u16 fid)
 {
 	struct mlxsw_sp_fid *f;
 	int err;
@@ -462,13 +439,15 @@ err_fid_map:
 	return ERR_PTR(err);
 }
 
-static void mlxsw_sp_fid_destroy(struct mlxsw_sp *mlxsw_sp,
-				 struct mlxsw_sp_fid *f)
+void mlxsw_sp_fid_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *f)
 {
 	u16 fid = f->fid;
 
 	list_del(&f->list);
 
+	if (f->r)
+		mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->r);
+
 	kfree(f);
 
 	mlxsw_sp_fid_op(mlxsw_sp, fid, false);
@@ -633,25 +612,6 @@ err_port_allow_untagged_set:
 	return err;
 }
 
-static int mlxsw_sp_port_add_vids(struct net_device *dev, u16 vid_begin,
-				  u16 vid_end)
-{
-	u16 vid;
-	int err;
-
-	for (vid = vid_begin; vid <= vid_end; vid++) {
-		err = mlxsw_sp_port_add_vid(dev, 0, vid);
-		if (err)
-			goto err_port_add_vid;
-	}
-	return 0;
-
-err_port_add_vid:
-	for (vid--; vid >= vid_begin; vid--)
-		mlxsw_sp_port_kill_vid(dev, 0, vid);
-	return err;
-}
-
 static int __mlxsw_sp_port_vlans_set(struct mlxsw_sp_port *mlxsw_sp_port,
 				     u16 vid_begin, u16 vid_end, bool is_member,
 				     bool untagged)
@@ -681,12 +641,8 @@ static int __mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port,
 	u16 vid, old_pvid;
 	int err;
 
-	/* In case this is invoked with BRIDGE_FLAGS_SELF and port is
-	 * not bridged, then packets ingressing through the port with
-	 * the specified VIDs will be directed to CPU.
-	 */
 	if (!mlxsw_sp_port->bridged)
-		return mlxsw_sp_port_add_vids(dev, vid_begin, vid_end);
+		return -EINVAL;
 
 	err = mlxsw_sp_port_fid_join(mlxsw_sp_port, vid_begin, vid_end);
 	if (err) {
@@ -776,9 +732,10 @@ static enum mlxsw_reg_sfd_op mlxsw_sp_sfd_op(bool adding)
 			MLXSW_REG_SFD_OP_WRITE_REMOVE;
 }
 
-static int mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port,
-				   const char *mac, u16 fid, bool adding,
-				   bool dynamic)
+static int __mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port,
+				     const char *mac, u16 fid, bool adding,
+				     enum mlxsw_reg_sfd_rec_action action,
+				     bool dynamic)
 {
 	char *sfd_pl;
 	int err;
@@ -789,14 +746,29 @@ static int mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 
 	mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0);
 	mlxsw_reg_sfd_uc_pack(sfd_pl, 0, mlxsw_sp_sfd_rec_policy(dynamic),
-			      mac, fid, MLXSW_REG_SFD_REC_ACTION_NOP,
-			      local_port);
+			      mac, fid, action, local_port);
 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl);
 	kfree(sfd_pl);
 
 	return err;
 }
 
+static int mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port,
+				   const char *mac, u16 fid, bool adding,
+				   bool dynamic)
+{
+	return __mlxsw_sp_port_fdb_uc_op(mlxsw_sp, local_port, mac, fid, adding,
+					 MLXSW_REG_SFD_REC_ACTION_NOP, dynamic);
+}
+
+int mlxsw_sp_rif_fdb_op(struct mlxsw_sp *mlxsw_sp, const char *mac, u16 fid,
+			bool adding)
+{
+	return __mlxsw_sp_port_fdb_uc_op(mlxsw_sp, 0, mac, fid, adding,
+					 MLXSW_REG_SFD_REC_ACTION_FORWARD_IP_ROUTER,
+					 false);
+}
+
 static int mlxsw_sp_port_fdb_uc_lag_op(struct mlxsw_sp *mlxsw_sp, u16 lag_id,
 				       const char *mac, u16 fid, u16 lag_vid,
 				       bool adding, bool dynamic)
@@ -1001,6 +973,11 @@ static int mlxsw_sp_port_obj_add(struct net_device *dev,
 					      SWITCHDEV_OBJ_PORT_VLAN(obj),
 					      trans);
 		break;
+	case SWITCHDEV_OBJ_ID_IPV4_FIB:
+		err = mlxsw_sp_router_fib4_add(mlxsw_sp_port,
+					       SWITCHDEV_OBJ_IPV4_FIB(obj),
+					       trans);
+		break;
 	case SWITCHDEV_OBJ_ID_PORT_FDB:
 		err = mlxsw_sp_port_fdb_static_add(mlxsw_sp_port,
 						   SWITCHDEV_OBJ_PORT_FDB(obj),
@@ -1019,21 +996,6 @@ static int mlxsw_sp_port_obj_add(struct net_device *dev,
 	return err;
 }
 
-static int mlxsw_sp_port_kill_vids(struct net_device *dev, u16 vid_begin,
-				   u16 vid_end)
-{
-	u16 vid;
-	int err;
-
-	for (vid = vid_begin; vid <= vid_end; vid++) {
-		err = mlxsw_sp_port_kill_vid(dev, 0, vid);
-		if (err)
-			return err;
-	}
-
-	return 0;
-}
-
 static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port,
 				     u16 vid_begin, u16 vid_end, bool init)
 {
@@ -1041,12 +1003,8 @@ static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port,
 	u16 vid, pvid;
 	int err;
 
-	/* In case this is invoked with BRIDGE_FLAGS_SELF and port is
-	 * not bridged, then prevent packets ingressing through the
-	 * port with the specified VIDs from being trapped to CPU.
-	 */
 	if (!init && !mlxsw_sp_port->bridged)
-		return mlxsw_sp_port_kill_vids(dev, vid_begin, vid_end);
+		return -EINVAL;
 
 	err = __mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end,
 					false, false);
@@ -1165,6 +1123,10 @@ static int mlxsw_sp_port_obj_del(struct net_device *dev,
 		err = mlxsw_sp_port_vlans_del(mlxsw_sp_port,
 					      SWITCHDEV_OBJ_PORT_VLAN(obj));
 		break;
+	case SWITCHDEV_OBJ_ID_IPV4_FIB:
+		err = mlxsw_sp_router_fib4_del(mlxsw_sp_port,
+					       SWITCHDEV_OBJ_IPV4_FIB(obj));
+		break;
 	case SWITCHDEV_OBJ_ID_PORT_FDB:
 		err = mlxsw_sp_port_fdb_static_del(mlxsw_sp_port,
 						   SWITCHDEV_OBJ_PORT_FDB(obj));
diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
index 3842eab9449a..25f658b3849a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
@@ -316,7 +316,10 @@ static netdev_tx_t mlxsw_sx_port_xmit(struct sk_buff *skb,
 		}
 	}
 	mlxsw_sx_txhdr_construct(skb, &tx_info);
-	len = skb->len;
+	/* TX header is consumed by HW on the way so we shouldn't count its
+	 * bytes as being sent.
+	 */
+	len = skb->len - MLXSW_TXHDR_LEN;
 	/* Due to a race we might fail here because of a full queue. In that
 	 * unlikely case we simply drop the packet.
 	 */
diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h
index 53a9550be75e..470d7696e9fe 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/trap.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h
@@ -54,6 +54,11 @@ enum {
 	MLXSW_TRAP_ID_IGMP_V2_REPORT = 0x32,
 	MLXSW_TRAP_ID_IGMP_V2_LEAVE = 0x33,
 	MLXSW_TRAP_ID_IGMP_V3_REPORT = 0x34,
+	MLXSW_TRAP_ID_ARPBC = 0x50,
+	MLXSW_TRAP_ID_ARPUC = 0x51,
+	MLXSW_TRAP_ID_IP2ME = 0x5F,
+	MLXSW_TRAP_ID_RTR_INGRESS0 = 0x70,
+	MLXSW_TRAP_ID_HOST_MISS_IPV4 = 0x90,
 
 	MLXSW_TRAP_ID_MAX = 0x1FF
 };
diff --git a/drivers/net/ethernet/microchip/enc28j60.c b/drivers/net/ethernet/microchip/enc28j60.c
index 7066954c39d6..0a26b11ca8f6 100644
--- a/drivers/net/ethernet/microchip/enc28j60.c
+++ b/drivers/net/ethernet/microchip/enc28j60.c
@@ -1151,7 +1151,8 @@ static void enc28j60_irq_work_handler(struct work_struct *work)
 			enc28j60_phy_read(priv, PHIR);
 		}
 		/* TX complete handler */
-		if ((intflags & EIR_TXIF) != 0) {
+		if (((intflags & EIR_TXIF) != 0) &&
+		    ((intflags & EIR_TXERIF) == 0)) {
 			bool err = false;
 			loop++;
 			if (netif_msg_intr(priv))
@@ -1203,7 +1204,7 @@ static void enc28j60_irq_work_handler(struct work_struct *work)
 					enc28j60_tx_clear(ndev, true);
 			} else
 				enc28j60_tx_clear(ndev, true);
-			locked_reg_bfclr(priv, EIR, EIR_TXERIF);
+			locked_reg_bfclr(priv, EIR, EIR_TXERIF | EIR_TXIF);
 		}
 		/* RX Error handler */
 		if ((intflags & EIR_RXERIF) != 0) {
@@ -1238,6 +1239,8 @@ static void enc28j60_irq_work_handler(struct work_struct *work)
  */
 static void enc28j60_hw_tx(struct enc28j60_net *priv)
 {
+	BUG_ON(!priv->tx_skb);
+
 	if (netif_msg_tx_queued(priv))
 		printk(KERN_DEBUG DRV_NAME
 			": Tx Packet Len:%d\n", priv->tx_skb->len);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index e744acc18ef4..690635660195 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -63,7 +63,7 @@
 #define NFP_NET_POLL_TIMEOUT	5
 
 /* Bar allocation */
-#define NFP_NET_CRTL_BAR	0
+#define NFP_NET_CTRL_BAR	0
 #define NFP_NET_Q0_BAR		2
 #define NFP_NET_Q1_BAR		4	/* OBSOLETE */
 
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 2195ed3053da..88678c172b19 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -1845,13 +1845,14 @@ void nfp_net_coalesce_write_cfg(struct nfp_net *nn)
 }
 
 /**
- * nfp_net_write_mac_addr() - Write mac address to device registers
+ * nfp_net_write_mac_addr() - Write mac address to the device control BAR
  * @nn:      NFP Net device to reconfigure
- * @mac:     Six-byte MAC address to be written
  *
- * We do a bit of byte swapping dance because firmware is LE.
+ * Writes the MAC address from the netdev to the device control BAR.  Does not
+ * perform the required reconfig.  We do a bit of byte swapping dance because
+ * firmware is LE.
  */
-static void nfp_net_write_mac_addr(struct nfp_net *nn, const u8 *mac)
+static void nfp_net_write_mac_addr(struct nfp_net *nn)
 {
 	nn_writel(nn, NFP_NET_CFG_MACADDR + 0,
 		  get_unaligned_be32(nn->netdev->dev_addr));
@@ -1952,7 +1953,7 @@ static int __nfp_net_set_config_and_enable(struct nfp_net *nn)
 	nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, nn->num_rx_rings == 64 ?
 		  0xffffffffffffffffULL : ((u64)1 << nn->num_rx_rings) - 1);
 
-	nfp_net_write_mac_addr(nn, nn->netdev->dev_addr);
+	nfp_net_write_mac_addr(nn);
 
 	nn_writel(nn, NFP_NET_CFG_MTU, nn->netdev->mtu);
 	nn_writel(nn, NFP_NET_CFG_FLBUFSZ, nn->fl_bufsz);
@@ -2015,7 +2016,7 @@ static void nfp_net_open_stack(struct nfp_net *nn)
 
 	netif_tx_wake_all_queues(nn->netdev);
 
-	enable_irq(nn->irq_entries[NFP_NET_CFG_LSC].vector);
+	enable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector);
 	nfp_net_read_link_status(nn);
 }
 
@@ -2044,7 +2045,7 @@ static int nfp_net_netdev_open(struct net_device *netdev)
 				      NFP_NET_IRQ_LSC_IDX, nn->lsc_handler);
 	if (err)
 		goto err_free_exn;
-	disable_irq(nn->irq_entries[NFP_NET_CFG_LSC].vector);
+	disable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector);
 
 	nn->rx_rings = kcalloc(nn->num_rx_rings, sizeof(*nn->rx_rings),
 			       GFP_KERNEL);
@@ -2133,7 +2134,7 @@ static void nfp_net_close_stack(struct nfp_net *nn)
 {
 	unsigned int r;
 
-	disable_irq(nn->irq_entries[NFP_NET_CFG_LSC].vector);
+	disable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector);
 	netif_carrier_off(nn->netdev);
 	nn->link_up = false;
 
@@ -2577,7 +2578,7 @@ static void nfp_net_del_vxlan_port(struct net_device *netdev,
 		return;
 
 	idx = nfp_net_find_vxlan_idx(nn, ti->port);
-	if (!nn->vxlan_usecnt[idx] || idx == -ENOSPC)
+	if (idx == -ENOSPC || !nn->vxlan_usecnt[idx])
 		return;
 
 	if (!--nn->vxlan_usecnt[idx])
@@ -2739,7 +2740,7 @@ int nfp_net_netdev_init(struct net_device *netdev)
 	nn->cap = nn_readl(nn, NFP_NET_CFG_CAP);
 	nn->max_mtu = nn_readl(nn, NFP_NET_CFG_MAX_MTU);
 
-	nfp_net_write_mac_addr(nn, nn->netdev->dev_addr);
+	nfp_net_write_mac_addr(nn);
 
 	/* Set default MTU and Freelist buffer size */
 	if (nn->max_mtu < NFP_NET_DEFAULT_MTU)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
index ccfef1f17627..7d7933d00b8f 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
@@ -605,6 +605,7 @@ static int nfp_net_set_coalesce(struct net_device *netdev,
 
 static const struct ethtool_ops nfp_net_ethtool_ops = {
 	.get_drvinfo		= nfp_net_get_drvinfo,
+	.get_link		= ethtool_op_get_link,
 	.get_ringparam		= nfp_net_get_ringparam,
 	.set_ringparam		= nfp_net_set_ringparam,
 	.get_strings		= nfp_net_get_strings,
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c
index e2b22b8a20f1..37abef016a0a 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c
@@ -124,11 +124,11 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev,
 	 * first NFP_NET_CFG_BAR_SZ of the BAR.  This keeps the code
 	 * the identical for PF and VF drivers.
 	 */
-	ctrl_bar = ioremap_nocache(pci_resource_start(pdev, NFP_NET_CRTL_BAR),
+	ctrl_bar = ioremap_nocache(pci_resource_start(pdev, NFP_NET_CTRL_BAR),
 				   NFP_NET_CFG_BAR_SZ);
 	if (!ctrl_bar) {
 		dev_err(&pdev->dev,
-			"Failed to map resource %d\n", NFP_NET_CRTL_BAR);
+			"Failed to map resource %d\n", NFP_NET_CTRL_BAR);
 		err = -EIO;
 		goto err_pci_regions;
 	}
diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c
index b1ce7aaa8f8b..4d4ecba0aad9 100644
--- a/drivers/net/ethernet/nxp/lpc_eth.c
+++ b/drivers/net/ethernet/nxp/lpc_eth.c
@@ -425,7 +425,6 @@ struct netdata_local {
 	unsigned int		last_tx_idx;
 	unsigned int		num_used_tx_buffs;
 	struct mii_bus		*mii_bus;
-	struct phy_device	*phy_dev;
 	struct clk		*clk;
 	dma_addr_t		dma_buff_base_p;
 	void			*dma_buff_base_v;
@@ -750,7 +749,7 @@ static int lpc_mdio_reset(struct mii_bus *bus)
 static void lpc_handle_link_change(struct net_device *ndev)
 {
 	struct netdata_local *pldat = netdev_priv(ndev);
-	struct phy_device *phydev = pldat->phy_dev;
+	struct phy_device *phydev = ndev->phydev;
 	unsigned long flags;
 
 	bool status_change = false;
@@ -814,7 +813,6 @@ static int lpc_mii_probe(struct net_device *ndev)
 	pldat->link = 0;
 	pldat->speed = 0;
 	pldat->duplex = -1;
-	pldat->phy_dev = phydev;
 
 	phy_attached_info(phydev);
 
@@ -1048,8 +1046,8 @@ static int lpc_eth_close(struct net_device *ndev)
 	napi_disable(&pldat->napi);
 	netif_stop_queue(ndev);
 
-	if (pldat->phy_dev)
-		phy_stop(pldat->phy_dev);
+	if (ndev->phydev)
+		phy_stop(ndev->phydev);
 
 	spin_lock_irqsave(&pldat->lock, flags);
 	__lpc_eth_reset(pldat);
@@ -1185,8 +1183,7 @@ static void lpc_eth_set_multicast_list(struct net_device *ndev)
 
 static int lpc_eth_ioctl(struct net_device *ndev, struct ifreq *req, int cmd)
 {
-	struct netdata_local *pldat = netdev_priv(ndev);
-	struct phy_device *phydev = pldat->phy_dev;
+	struct phy_device *phydev = ndev->phydev;
 
 	if (!netif_running(ndev))
 		return -EINVAL;
@@ -1207,14 +1204,14 @@ static int lpc_eth_open(struct net_device *ndev)
 	__lpc_eth_clock_enable(pldat, true);
 
 	/* Suspended PHY makes LPC ethernet core block, so resume now */
-	phy_resume(pldat->phy_dev);
+	phy_resume(ndev->phydev);
 
 	/* Reset and initialize */
 	__lpc_eth_reset(pldat);
 	__lpc_eth_init(pldat);
 
 	/* schedule a link state check */
-	phy_start(pldat->phy_dev);
+	phy_start(ndev->phydev);
 	netif_start_queue(ndev);
 	napi_enable(&pldat->napi);
 
@@ -1247,37 +1244,13 @@ static void lpc_eth_ethtool_setmsglevel(struct net_device *ndev, u32 level)
 	pldat->msg_enable = level;
 }
 
-static int lpc_eth_ethtool_getsettings(struct net_device *ndev,
-	struct ethtool_cmd *cmd)
-{
-	struct netdata_local *pldat = netdev_priv(ndev);
-	struct phy_device *phydev = pldat->phy_dev;
-
-	if (!phydev)
-		return -EOPNOTSUPP;
-
-	return phy_ethtool_gset(phydev, cmd);
-}
-
-static int lpc_eth_ethtool_setsettings(struct net_device *ndev,
-	struct ethtool_cmd *cmd)
-{
-	struct netdata_local *pldat = netdev_priv(ndev);
-	struct phy_device *phydev = pldat->phy_dev;
-
-	if (!phydev)
-		return -EOPNOTSUPP;
-
-	return phy_ethtool_sset(phydev, cmd);
-}
-
 static const struct ethtool_ops lpc_eth_ethtool_ops = {
 	.get_drvinfo	= lpc_eth_ethtool_getdrvinfo,
-	.get_settings	= lpc_eth_ethtool_getsettings,
-	.set_settings	= lpc_eth_ethtool_setsettings,
 	.get_msglevel	= lpc_eth_ethtool_getmsglevel,
 	.set_msglevel	= lpc_eth_ethtool_setmsglevel,
 	.get_link	= ethtool_op_get_link,
+	.get_link_ksettings = phy_ethtool_get_link_ksettings,
+	.set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
 
 static const struct net_device_ops lpc_netdev_ops = {
@@ -1460,7 +1433,7 @@ static int lpc_eth_drv_probe(struct platform_device *pdev)
 	netdev_info(ndev, "LPC mac at 0x%08x irq %d\n",
 	       res->start, ndev->irq);
 
-	phydev = pldat->phy_dev;
+	phydev = ndev->phydev;
 
 	device_init_wakeup(&pdev->dev, 1);
 	device_set_wakeup_enable(&pdev->dev, 0);
diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index 9a63df1184f1..35e53771533f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -489,8 +489,8 @@ struct qed_dev {
 
 	u32				int_mode;
 	enum qed_coalescing_mode	int_coalescing_mode;
-	u8				rx_coalesce_usecs;
-	u8				tx_coalesce_usecs;
+	u16				rx_coalesce_usecs;
+	u16				tx_coalesce_usecs;
 
 	/* Start Bar offset of first hwfn */
 	void __iomem			*regview;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index d121a8bf6b20..a12c6caa6c66 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -72,6 +72,7 @@ int qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn,
 	p_ramrod->mtu			= cpu_to_le16(p_params->mtu);
 	p_ramrod->inner_vlan_removal_en	= p_params->remove_inner_vlan;
 	p_ramrod->drop_ttl0_en		= p_params->drop_ttl0;
+	p_ramrod->untagged		= p_params->only_untagged;
 
 	SET_FIELD(rx_mode, ETH_VPORT_RX_MODE_UCAST_DROP_ALL, 1);
 	SET_FIELD(rx_mode, ETH_VPORT_RX_MODE_MCAST_DROP_ALL, 1);
@@ -247,10 +248,6 @@ qed_sp_update_accept_mode(struct qed_hwfn *p_hwfn,
 		SET_FIELD(state, ETH_VPORT_TX_MODE_UCAST_DROP_ALL,
 			  !!(accept_filter & QED_ACCEPT_NONE));
 
-		SET_FIELD(state, ETH_VPORT_TX_MODE_UCAST_ACCEPT_ALL,
-			  (!!(accept_filter & QED_ACCEPT_UCAST_MATCHED) &&
-			   !!(accept_filter & QED_ACCEPT_UCAST_UNMATCHED)));
-
 		SET_FIELD(state, ETH_VPORT_TX_MODE_MCAST_DROP_ALL,
 			  !!(accept_filter & QED_ACCEPT_NONE));
 
@@ -1756,7 +1753,8 @@ static int qed_start_vport(struct qed_dev *cdev,
 			   start.vport_id, start.mtu);
 	}
 
-	qed_reset_vport_stats(cdev);
+	if (params->clear_stats)
+		qed_reset_vport_stats(cdev);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index e32ee57cdfee..1f13abb5c316 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -1088,6 +1088,7 @@ static int qed_get_port_type(u32 media_type)
 	case MEDIA_SFPP_10G_FIBER:
 	case MEDIA_SFP_1G_FIBER:
 	case MEDIA_XFP_FIBER:
+	case MEDIA_MODULE_FIBER:
 	case MEDIA_KR:
 		port_type = PORT_FIBRE;
 		break;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c
index ad9bf5c85c3f..97ffeae262bb 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_spq.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c
@@ -213,19 +213,15 @@ static int qed_spq_hw_post(struct qed_hwfn *p_hwfn,
 	SET_FIELD(db.params, CORE_DB_DATA_AGG_VAL_SEL,
 		  DQ_XCM_CORE_SPQ_PROD_CMD);
 	db.agg_flags = DQ_XCM_CORE_DQ_CF_CMD;
-
-	/* validate producer is up to-date */
-	rmb();
-
 	db.spq_prod = cpu_to_le16(qed_chain_get_prod_idx(p_chain));
 
-	/* do not reorder */
-	barrier();
+	/* make sure the SPQE is updated before the doorbell */
+	wmb();
 
 	DOORBELL(p_hwfn, qed_db_addr(p_spq->cid, DQ_DEMS_LEGACY), *(u32 *)&db);
 
 	/* make sure doorbell is rang */
-	mmiowb();
+	wmb();
 
 	DP_VERBOSE(p_hwfn, QED_MSG_SPQ,
 		   "Doorbelled [0x%08x, CID 0x%08x] with Flags: %02x agg_params: %02x, prod: %04x\n",
@@ -620,7 +616,9 @@ qed_spq_add_entry(struct qed_hwfn *p_hwfn,
 
 			*p_en2 = *p_ent;
 
-			kfree(p_ent);
+			/* EBLOCK responsible to free the allocated p_ent */
+			if (p_ent->comp_mode != QED_SPQ_MODE_EBLOCK)
+				kfree(p_ent);
 
 			p_ent = p_en2;
 		}
@@ -755,6 +753,15 @@ int qed_spq_post(struct qed_hwfn *p_hwfn,
 		 * Thus, after gaining the answer perform the cleanup here.
 		 */
 		rc = qed_spq_block(p_hwfn, p_ent, fw_return_code);
+
+		if (p_ent->queue == &p_spq->unlimited_pending) {
+			/* This is an allocated p_ent which does not need to
+			 * return to pool.
+			 */
+			kfree(p_ent);
+			return rc;
+		}
+
 		if (rc)
 			goto spq_post_fail2;
 
@@ -850,8 +857,12 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn,
 		found->comp_cb.function(p_hwfn, found->comp_cb.cookie, p_data,
 					fw_return_code);
 
-	if (found->comp_mode != QED_SPQ_MODE_EBLOCK)
-		/* EBLOCK is responsible for freeing its own entry */
+	if ((found->comp_mode != QED_SPQ_MODE_EBLOCK) ||
+	    (found->queue == &p_spq->unlimited_pending))
+		/* EBLOCK  is responsible for returning its own entry into the
+		 * free list, unless it originally added the entry into the
+		 * unlimited pending list.
+		 */
 		qed_spq_return_entry(p_hwfn, found);
 
 	/* Attempt to post pending requests */
diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index 1441c8f6d414..02b06d4e40ae 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -24,7 +24,7 @@
 #include <linux/qed/qed_eth_if.h>
 
 #define QEDE_MAJOR_VERSION		8
-#define QEDE_MINOR_VERSION		7
+#define QEDE_MINOR_VERSION		10
 #define QEDE_REVISION_VERSION		1
 #define QEDE_ENGINEERING_VERSION	20
 #define DRV_MODULE_VERSION __stringify(QEDE_MAJOR_VERSION) "."	\
@@ -143,6 +143,8 @@ struct qede_dev {
 	struct mutex			qede_lock;
 	u32				state; /* Protected by qede_lock */
 	u16				rx_buf_size;
+	u32				rx_copybreak;
+
 	/* L2 header size + 2*VLANs (8 bytes) + LLC SNAP (8 bytes) */
 #define ETH_OVERHEAD			(ETH_HLEN + 8 + 8)
 	/* Max supported alignment is 256 (8 shift)
@@ -235,6 +237,7 @@ struct qede_rx_queue {
 
 	u64			rx_hw_errors;
 	u64			rx_alloc_errors;
+	u64			rx_ip_frags;
 };
 
 union db_prod {
@@ -332,6 +335,7 @@ void qede_recycle_rx_bd_ring(struct qede_rx_queue *rxq, struct qede_dev *edev,
 #define NUM_TX_BDS_MIN		128
 #define NUM_TX_BDS_DEF		NUM_TX_BDS_MAX
 
+#define QEDE_MIN_PKT_LEN	64
 #define QEDE_RX_HDR_SIZE	256
 #define	for_each_rss(i) for (i = 0; i < edev->num_rss; i++)
 
diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
index 6228482bf7f0..f8492cac9290 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
@@ -37,6 +37,7 @@ static const struct {
 } qede_rqstats_arr[] = {
 	QEDE_RQSTAT(rx_hw_errors),
 	QEDE_RQSTAT(rx_alloc_errors),
+	QEDE_RQSTAT(rx_ip_frags),
 };
 
 #define QEDE_NUM_RQSTATS ARRAY_SIZE(qede_rqstats_arr)
@@ -430,11 +431,13 @@ static int qede_get_coalesce(struct net_device *dev,
 			     struct ethtool_coalesce *coal)
 {
 	struct qede_dev *edev = netdev_priv(dev);
+	u16 rxc, txc;
 
 	memset(coal, 0, sizeof(struct ethtool_coalesce));
-	edev->ops->common->get_coalesce(edev->cdev,
-					(u16 *)&coal->rx_coalesce_usecs,
-					(u16 *)&coal->tx_coalesce_usecs);
+	edev->ops->common->get_coalesce(edev->cdev, &rxc, &txc);
+
+	coal->rx_coalesce_usecs = rxc;
+	coal->tx_coalesce_usecs = txc;
 
 	return 0;
 }
@@ -1182,6 +1185,48 @@ static void qede_self_test(struct net_device *dev,
 	}
 }
 
+static int qede_set_tunable(struct net_device *dev,
+			    const struct ethtool_tunable *tuna,
+			    const void *data)
+{
+	struct qede_dev *edev = netdev_priv(dev);
+	u32 val;
+
+	switch (tuna->id) {
+	case ETHTOOL_RX_COPYBREAK:
+		val = *(u32 *)data;
+		if (val < QEDE_MIN_PKT_LEN || val > QEDE_RX_HDR_SIZE) {
+			DP_VERBOSE(edev, QED_MSG_DEBUG,
+				   "Invalid rx copy break value, range is [%u, %u]",
+				   QEDE_MIN_PKT_LEN, QEDE_RX_HDR_SIZE);
+			return -EINVAL;
+		}
+
+		edev->rx_copybreak = *(u32 *)data;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int qede_get_tunable(struct net_device *dev,
+			    const struct ethtool_tunable *tuna, void *data)
+{
+	struct qede_dev *edev = netdev_priv(dev);
+
+	switch (tuna->id) {
+	case ETHTOOL_RX_COPYBREAK:
+		*(u32 *)data = edev->rx_copybreak;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
 static const struct ethtool_ops qede_ethtool_ops = {
 	.get_settings = qede_get_settings,
 	.set_settings = qede_set_settings,
@@ -1210,6 +1255,8 @@ static const struct ethtool_ops qede_ethtool_ops = {
 	.get_channels = qede_get_channels,
 	.set_channels = qede_set_channels,
 	.self_test = qede_self_test,
+	.get_tunable = qede_get_tunable,
+	.set_tunable = qede_set_tunable,
 };
 
 static const struct ethtool_ops qede_vf_ethtool_ops = {
@@ -1232,6 +1279,8 @@ static const struct ethtool_ops qede_vf_ethtool_ops = {
 	.set_rxfh = qede_set_rxfh,
 	.get_channels = qede_get_channels,
 	.set_channels = qede_set_channels,
+	.get_tunable = qede_get_tunable,
+	.set_tunable = qede_set_tunable,
 };
 
 void qede_set_ethtool_ops(struct net_device *dev)
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 2972742c6adb..91e7bb0b85c8 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -485,6 +485,24 @@ static bool qede_pkt_req_lin(struct qede_dev *edev, struct sk_buff *skb,
 }
 #endif
 
+static inline void qede_update_tx_producer(struct qede_tx_queue *txq)
+{
+	/* wmb makes sure that the BDs data is updated before updating the
+	 * producer, otherwise FW may read old data from the BDs.
+	 */
+	wmb();
+	barrier();
+	writel(txq->tx_db.raw, txq->doorbell_addr);
+
+	/* mmiowb is needed to synchronize doorbell writes from more than one
+	 * processor. It guarantees that the write arrives to the device before
+	 * the queue lock is released and another start_xmit is called (possibly
+	 * on another CPU). Without this barrier, the next doorbell can bypass
+	 * this doorbell. This is applicable to IA64/Altix systems.
+	 */
+	mmiowb();
+}
+
 /* Main transmit function */
 static
 netdev_tx_t qede_start_xmit(struct sk_buff *skb,
@@ -543,6 +561,7 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb,
 	if (unlikely(dma_mapping_error(&edev->pdev->dev, mapping))) {
 		DP_NOTICE(edev, "SKB mapping failed\n");
 		qede_free_failed_tx_pkt(edev, txq, first_bd, 0, false);
+		qede_update_tx_producer(txq);
 		return NETDEV_TX_OK;
 	}
 	nbd++;
@@ -657,6 +676,7 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb,
 		if (rc) {
 			qede_free_failed_tx_pkt(edev, txq, first_bd, nbd,
 						data_split);
+			qede_update_tx_producer(txq);
 			return NETDEV_TX_OK;
 		}
 
@@ -681,6 +701,7 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb,
 		if (rc) {
 			qede_free_failed_tx_pkt(edev, txq, first_bd, nbd,
 						data_split);
+			qede_update_tx_producer(txq);
 			return NETDEV_TX_OK;
 		}
 	}
@@ -701,20 +722,8 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb,
 	txq->tx_db.data.bd_prod =
 		cpu_to_le16(qed_chain_get_prod_idx(&txq->tx_pbl));
 
-	/* wmb makes sure that the BDs data is updated before updating the
-	 * producer, otherwise FW may read old data from the BDs.
-	 */
-	wmb();
-	barrier();
-	writel(txq->tx_db.raw, txq->doorbell_addr);
-
-	/* mmiowb is needed to synchronize doorbell writes from more than one
-	 * processor. It guarantees that the write arrives to the device before
-	 * the queue lock is released and another start_xmit is called (possibly
-	 * on another CPU). Without this barrier, the next doorbell can bypass
-	 * this doorbell. This is applicable to IA64/Altix systems.
-	 */
-	mmiowb();
+	if (!skb->xmit_more || netif_tx_queue_stopped(netdev_txq))
+		qede_update_tx_producer(txq);
 
 	if (unlikely(qed_chain_get_elem_left(&txq->tx_pbl)
 		      < (MAX_SKB_FRAGS + 1))) {
@@ -1348,6 +1357,20 @@ static u8 qede_check_csum(u16 flag)
 		return qede_check_tunn_csum(flag);
 }
 
+static bool qede_pkt_is_ip_fragmented(struct eth_fast_path_rx_reg_cqe *cqe,
+				      u16 flag)
+{
+	u8 tun_pars_flg = cqe->tunnel_pars_flags.flags;
+
+	if ((tun_pars_flg & (ETH_TUNNEL_PARSING_FLAGS_IPV4_FRAGMENT_MASK <<
+			     ETH_TUNNEL_PARSING_FLAGS_IPV4_FRAGMENT_SHIFT)) ||
+	    (flag & (PARSING_AND_ERR_FLAGS_IPV4FRAG_MASK <<
+		     PARSING_AND_ERR_FLAGS_IPV4FRAG_SHIFT)))
+		return true;
+
+	return false;
+}
+
 static int qede_rx_int(struct qede_fastpath *fp, int budget)
 {
 	struct qede_dev *edev = fp->edev;
@@ -1426,6 +1449,12 @@ static int qede_rx_int(struct qede_fastpath *fp, int budget)
 
 		csum_flag = qede_check_csum(parse_flag);
 		if (unlikely(csum_flag == QEDE_CSUM_ERROR)) {
+			if (qede_pkt_is_ip_fragmented(&cqe->fast_path_regular,
+						      parse_flag)) {
+				rxq->rx_ip_frags++;
+				goto alloc_skb;
+			}
+
 			DP_NOTICE(edev,
 				  "CQE in CONS = %u has error, flags = %x, dropping incoming packet\n",
 				  sw_comp_cons, parse_flag);
@@ -1434,6 +1463,7 @@ static int qede_rx_int(struct qede_fastpath *fp, int budget)
 			goto next_cqe;
 		}
 
+alloc_skb:
 		skb = netdev_alloc_skb(edev->ndev, QEDE_RX_HDR_SIZE);
 		if (unlikely(!skb)) {
 			DP_NOTICE(edev,
@@ -1444,7 +1474,7 @@ static int qede_rx_int(struct qede_fastpath *fp, int budget)
 		}
 
 		/* Copy data into SKB */
-		if (len + pad <= QEDE_RX_HDR_SIZE) {
+		if (len + pad <= edev->rx_copybreak) {
 			memcpy(skb_put(skb, len),
 			       page_address(data) + pad +
 				sw_rx_data->page_offset, len);
@@ -1576,56 +1606,49 @@ next_cqe: /* don't consume bd rx buffer */
 
 static int qede_poll(struct napi_struct *napi, int budget)
 {
-	int work_done = 0;
 	struct qede_fastpath *fp = container_of(napi, struct qede_fastpath,
-						 napi);
+						napi);
 	struct qede_dev *edev = fp->edev;
+	int rx_work_done = 0;
+	u8 tc;
 
-	while (1) {
-		u8 tc;
-
-		for (tc = 0; tc < edev->num_tc; tc++)
-			if (qede_txq_has_work(&fp->txqs[tc]))
-				qede_tx_int(edev, &fp->txqs[tc]);
-
-		if (qede_has_rx_work(fp->rxq)) {
-			work_done += qede_rx_int(fp, budget - work_done);
-
-			/* must not complete if we consumed full budget */
-			if (work_done >= budget)
-				break;
-		}
+	for (tc = 0; tc < edev->num_tc; tc++)
+		if (qede_txq_has_work(&fp->txqs[tc]))
+			qede_tx_int(edev, &fp->txqs[tc]);
+
+	rx_work_done = qede_has_rx_work(fp->rxq) ?
+			qede_rx_int(fp, budget) : 0;
+	if (rx_work_done < budget) {
+		qed_sb_update_sb_idx(fp->sb_info);
+		/* *_has_*_work() reads the status block,
+		 * thus we need to ensure that status block indices
+		 * have been actually read (qed_sb_update_sb_idx)
+		 * prior to this check (*_has_*_work) so that
+		 * we won't write the "newer" value of the status block
+		 * to HW (if there was a DMA right after
+		 * qede_has_rx_work and if there is no rmb, the memory
+		 * reading (qed_sb_update_sb_idx) may be postponed
+		 * to right before *_ack_sb). In this case there
+		 * will never be another interrupt until there is
+		 * another update of the status block, while there
+		 * is still unhandled work.
+		 */
+		rmb();
 
 		/* Fall out from the NAPI loop if needed */
-		if (!(qede_has_rx_work(fp->rxq) || qede_has_tx_work(fp))) {
-			qed_sb_update_sb_idx(fp->sb_info);
-			/* *_has_*_work() reads the status block,
-			 * thus we need to ensure that status block indices
-			 * have been actually read (qed_sb_update_sb_idx)
-			 * prior to this check (*_has_*_work) so that
-			 * we won't write the "newer" value of the status block
-			 * to HW (if there was a DMA right after
-			 * qede_has_rx_work and if there is no rmb, the memory
-			 * reading (qed_sb_update_sb_idx) may be postponed
-			 * to right before *_ack_sb). In this case there
-			 * will never be another interrupt until there is
-			 * another update of the status block, while there
-			 * is still unhandled work.
-			 */
-			rmb();
-
-			if (!(qede_has_rx_work(fp->rxq) ||
-			      qede_has_tx_work(fp))) {
-				napi_complete(napi);
-				/* Update and reenable interrupts */
-				qed_sb_ack(fp->sb_info, IGU_INT_ENABLE,
-					   1 /*update*/);
-				break;
-			}
+		if (!(qede_has_rx_work(fp->rxq) ||
+		      qede_has_tx_work(fp))) {
+			napi_complete(napi);
+
+			/* Update and reenable interrupts */
+			qed_sb_ack(fp->sb_info, IGU_INT_ENABLE,
+				   1 /*update*/);
+		} else {
+			rx_work_done = budget;
 		}
 	}
 
-	return work_done;
+	return rx_work_done;
 }
 
 static irqreturn_t qede_msix_fp_int(int irq, void *fp_cookie)
@@ -2496,6 +2519,7 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
 
 	INIT_DELAYED_WORK(&edev->sp_task, qede_sp_task);
 	mutex_init(&edev->qede_lock);
+	edev->rx_copybreak = QEDE_RX_HDR_SIZE;
 
 	DP_INFO(edev, "Ending successfully qede probe\n");
 
@@ -3222,7 +3246,7 @@ static int qede_stop_queues(struct qede_dev *edev)
 	return rc;
 }
 
-static int qede_start_queues(struct qede_dev *edev)
+static int qede_start_queues(struct qede_dev *edev, bool clear_stats)
 {
 	int rc, tc, i;
 	int vlan_removal_en = 1;
@@ -3453,6 +3477,7 @@ out:
 
 enum qede_load_mode {
 	QEDE_LOAD_NORMAL,
+	QEDE_LOAD_RELOAD,
 };
 
 static int qede_load(struct qede_dev *edev, enum qede_load_mode mode)
@@ -3491,7 +3516,7 @@ static int qede_load(struct qede_dev *edev, enum qede_load_mode mode)
 		goto err3;
 	DP_INFO(edev, "Setup IRQs succeeded\n");
 
-	rc = qede_start_queues(edev);
+	rc = qede_start_queues(edev, mode != QEDE_LOAD_RELOAD);
 	if (rc)
 		goto err4;
 	DP_INFO(edev, "Start VPORT, RXQ and TXQ succeeded\n");
@@ -3546,7 +3571,7 @@ void qede_reload(struct qede_dev *edev,
 	if (func)
 		func(edev, args);
 
-	qede_load(edev, QEDE_LOAD_NORMAL);
+	qede_load(edev, QEDE_LOAD_RELOAD);
 
 	mutex_lock(&edev->qede_lock);
 	qede_config_rx_mode(edev->ndev);
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
index 7bd6f25b4625..87c642d3b075 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
@@ -772,6 +772,8 @@ netdev_tx_t qlcnic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 	tx_ring->tx_stats.tx_bytes += skb->len;
 	tx_ring->tx_stats.xmit_called++;
 
+	/* Ensure writes are complete before HW fetches Tx descriptors */
+	wmb();
 	qlcnic_update_cmd_producer(tx_ring);
 
 	return NETDEV_TX_OK;
@@ -2220,7 +2222,7 @@ void qlcnic_83xx_process_rcv_ring_diag(struct qlcnic_host_sds_ring *sds_ring)
 	if (!opcode)
 		return;
 
-	ring = QLCNIC_FETCH_RING_ID(qlcnic_83xx_hndl(sts_data[0]));
+	ring = QLCNIC_FETCH_RING_ID(sts_data[0]);
 	qlcnic_83xx_process_rcv_diag(adapter, ring, sts_data);
 	desc = &sds_ring->desc_head[consumer];
 	desc->status_desc_data[0] = cpu_to_le64(STATUS_OWNER_PHANTOM);
diff --git a/drivers/net/ethernet/rdc/r6040.c b/drivers/net/ethernet/rdc/r6040.c
index 7a7a395d0512..cb29ee24cf1b 100644
--- a/drivers/net/ethernet/rdc/r6040.c
+++ b/drivers/net/ethernet/rdc/r6040.c
@@ -4,7 +4,7 @@
  * Copyright (C) 2004 Sten Wang <sten.wang@rdc.com.tw>
  * Copyright (C) 2007
  *	Daniel Gimpelevich <daniel@gimpelevich.san-francisco.ca.us>
- * Copyright (C) 2007-2012 Florian Fainelli <florian@openwrt.org>
+ * Copyright (C) 2007-2012 Florian Fainelli <f.fainelli@gmail.com>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
@@ -48,8 +48,8 @@
 #include <asm/processor.h>
 
 #define DRV_NAME	"r6040"
-#define DRV_VERSION	"0.28"
-#define DRV_RELDATE	"07Oct2011"
+#define DRV_VERSION	"0.29"
+#define DRV_RELDATE	"04Jul2016"
 
 /* Time in jiffies before concluding the transmitter is hung. */
 #define TX_TIMEOUT	(6000 * HZ / 1000)
@@ -162,7 +162,7 @@
 
 MODULE_AUTHOR("Sten Wang <sten.wang@rdc.com.tw>,"
 	"Daniel Gimpelevich <daniel@gimpelevich.san-francisco.ca.us>,"
-	"Florian Fainelli <florian@openwrt.org>");
+	"Florian Fainelli <f.fainelli@gmail.com>");
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("RDC R6040 NAPI PCI FastEthernet driver");
 MODULE_VERSION(DRV_VERSION " " DRV_RELDATE);
@@ -614,10 +614,15 @@ static void r6040_tx(struct net_device *dev)
 		if (descptr->status & DSC_OWNER_MAC)
 			break; /* Not complete */
 		skb_ptr = descptr->skb_ptr;
+
+		/* Statistic Counter */
+		dev->stats.tx_packets++;
+		dev->stats.tx_bytes += skb_ptr->len;
+
 		pci_unmap_single(priv->pdev, le32_to_cpu(descptr->buf),
 			skb_ptr->len, PCI_DMA_TODEVICE);
 		/* Free buffer */
-		dev_kfree_skb_irq(skb_ptr);
+		dev_kfree_skb(skb_ptr);
 		descptr->skb_ptr = NULL;
 		/* To next descriptor */
 		descptr = descptr->vndescp;
@@ -638,12 +643,15 @@ static int r6040_poll(struct napi_struct *napi, int budget)
 	void __iomem *ioaddr = priv->base;
 	int work_done;
 
+	r6040_tx(dev);
+
 	work_done = r6040_rx(dev, budget);
 
 	if (work_done < budget) {
-		napi_complete(napi);
-		/* Enable RX interrupt */
-		iowrite16(ioread16(ioaddr + MIER) | RX_INTS, ioaddr + MIER);
+		napi_complete_done(napi, work_done);
+		/* Enable RX/TX interrupt */
+		iowrite16(ioread16(ioaddr + MIER) | RX_INTS | TX_INTS,
+			  ioaddr + MIER);
 	}
 	return work_done;
 }
@@ -670,7 +678,7 @@ static irqreturn_t r6040_interrupt(int irq, void *dev_id)
 	}
 
 	/* RX interrupt request */
-	if (status & RX_INTS) {
+	if (status & (RX_INTS | TX_INTS)) {
 		if (status & RX_NO_DESC) {
 			/* RX descriptor unavailable */
 			dev->stats.rx_dropped++;
@@ -681,15 +689,11 @@ static irqreturn_t r6040_interrupt(int irq, void *dev_id)
 
 		if (likely(napi_schedule_prep(&lp->napi))) {
 			/* Mask off RX interrupt */
-			misr &= ~RX_INTS;
-			__napi_schedule(&lp->napi);
+			misr &= ~(RX_INTS | TX_INTS);
+			__napi_schedule_irqoff(&lp->napi);
 		}
 	}
 
-	/* TX interrupt request */
-	if (status & TX_INTS)
-		r6040_tx(dev);
-
 	/* Restore RDC MAC interrupt */
 	iowrite16(misr, ioaddr + MIER);
 
@@ -810,6 +814,9 @@ static netdev_tx_t r6040_start_xmit(struct sk_buff *skb,
 	void __iomem *ioaddr = lp->base;
 	unsigned long flags;
 
+	if (skb_put_padto(skb, ETH_ZLEN) < 0)
+		return NETDEV_TX_OK;
+
 	/* Critical Section */
 	spin_lock_irqsave(&lp->lock, flags);
 
@@ -821,17 +828,10 @@ static netdev_tx_t r6040_start_xmit(struct sk_buff *skb,
 		return NETDEV_TX_BUSY;
 	}
 
-	/* Statistic Counter */
-	dev->stats.tx_packets++;
-	dev->stats.tx_bytes += skb->len;
 	/* Set TX descriptor & Transmit it */
 	lp->tx_free_desc--;
 	descptr = lp->tx_insert_ptr;
-	if (skb->len < ETH_ZLEN)
-		descptr->len = ETH_ZLEN;
-	else
-		descptr->len = skb->len;
-
+	descptr->len = skb->len;
 	descptr->skb_ptr = skb;
 	descptr->buf = cpu_to_le32(pci_map_single(lp->pdev,
 		skb->data, skb->len, PCI_DMA_TODEVICE));
@@ -840,7 +840,8 @@ static netdev_tx_t r6040_start_xmit(struct sk_buff *skb,
 	skb_tx_timestamp(skb);
 
 	/* Trigger the MAC to check the TX descriptor */
-	iowrite16(TM2TX, ioaddr + MTPR);
+	if (!skb->xmit_more || netif_queue_stopped(dev))
+		iowrite16(TM2TX, ioaddr + MTPR);
 	lp->tx_insert_ptr = descptr->vndescp;
 
 	/* If no tx resource, stop */
@@ -1001,14 +1002,8 @@ static void r6040_adjust_link(struct net_device *dev)
 		lp->old_duplex = phydev->duplex;
 	}
 
-	if (status_changed) {
-		pr_info("%s: link %s", dev->name, phydev->link ?
-			"UP" : "DOWN");
-		if (phydev->link)
-			pr_cont(" - %d/%s", phydev->speed,
-			DUPLEX_FULL == phydev->duplex ? "full" : "half");
-		pr_cont("\n");
-	}
+	if (status_changed)
+		phy_print_status(phydev);
 }
 
 static int r6040_mii_probe(struct net_device *dev)
diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c
index 28b775e5a9ad..f0b09b05ed3f 100644
--- a/drivers/net/ethernet/rocker/rocker_main.c
+++ b/drivers/net/ethernet/rocker/rocker_main.c
@@ -1996,7 +1996,8 @@ static int rocker_port_change_proto_down(struct net_device *dev,
 	return 0;
 }
 
-static void rocker_port_neigh_destroy(struct neighbour *n)
+static void rocker_port_neigh_destroy(struct net_device *dev,
+				      struct neighbour *n)
 {
 	struct rocker_port *rocker_port = netdev_priv(n->dev);
 	int err;
diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c
index 133e9e35be9e..4c83739d158f 100644
--- a/drivers/net/ethernet/sfc/farch.c
+++ b/drivers/net/ethernet/sfc/farch.c
@@ -104,7 +104,8 @@ int efx_farch_test_registers(struct efx_nic *efx,
 			     const struct efx_farch_register_test *regs,
 			     size_t n_regs)
 {
-	unsigned address = 0, i, j;
+	unsigned address = 0;
+	int i, j;
 	efx_oword_t mask, imask, original, reg, buf;
 
 	for (i = 0; i < n_regs; ++i) {
diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c
index 18ac52ded696..726b80f45906 100644
--- a/drivers/net/ethernet/smsc/smc91x.c
+++ b/drivers/net/ethernet/smsc/smc91x.c
@@ -2195,6 +2195,12 @@ static void smc_release_datacs(struct platform_device *pdev, struct net_device *
 	}
 }
 
+static const struct acpi_device_id smc91x_acpi_match[] = {
+	{ "LNRO0003", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(acpi, smc91x_acpi_match);
+
 #if IS_BUILTIN(CONFIG_OF)
 static const struct of_device_id smc91x_match[] = {
 	{ .compatible = "smsc,lan91c94", },
@@ -2274,7 +2280,6 @@ static int smc_drv_probe(struct platform_device *pdev)
 #if IS_BUILTIN(CONFIG_OF)
 	match = of_match_device(of_match_ptr(smc91x_match), &pdev->dev);
 	if (match) {
-		struct device_node *np = pdev->dev.of_node;
 		u32 val;
 
 		/* Optional pwrdwn GPIO configured? */
@@ -2300,7 +2305,8 @@ static int smc_drv_probe(struct platform_device *pdev)
 			usleep_range(750, 1000);
 
 		/* Combination of IO widths supported, default to 16-bit */
-		if (!of_property_read_u32(np, "reg-io-width", &val)) {
+		if (!device_property_read_u32(&pdev->dev, "reg-io-width",
+					      &val)) {
 			if (val & 1)
 				lp->cfg.flags |= SMC91X_USE_8BIT;
 			if ((val == 0) || (val & 2))
@@ -2478,7 +2484,8 @@ static struct platform_driver smc_driver = {
 	.driver		= {
 		.name	= CARDNAME,
 		.pm	= &smc_drv_pm_ops,
-		.of_match_table = of_match_ptr(smc91x_match),
+		.of_match_table   = of_match_ptr(smc91x_match),
+		.acpi_match_table = smc91x_acpi_match,
 	},
 };
 
diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c
index 8af25563f627..ca3134540d2d 100644
--- a/drivers/net/ethernet/smsc/smsc911x.c
+++ b/drivers/net/ethernet/smsc/smsc911x.c
@@ -114,9 +114,7 @@ struct smsc911x_data {
 	/* spinlock to ensure register accesses are serialised */
 	spinlock_t dev_lock;
 
-	struct phy_device *phy_dev;
 	struct mii_bus *mii_bus;
-	int phy_irq[PHY_MAX_ADDR];
 	unsigned int using_extphy;
 	int last_duplex;
 	int last_carrier;
@@ -834,7 +832,7 @@ static int smsc911x_phy_reset(struct smsc911x_data *pdata)
 static int smsc911x_phy_loopbacktest(struct net_device *dev)
 {
 	struct smsc911x_data *pdata = netdev_priv(dev);
-	struct phy_device *phy_dev = pdata->phy_dev;
+	struct phy_device *phy_dev = dev->phydev;
 	int result = -EIO;
 	unsigned int i, val;
 	unsigned long flags;
@@ -904,7 +902,8 @@ static int smsc911x_phy_loopbacktest(struct net_device *dev)
 
 static void smsc911x_phy_update_flowcontrol(struct smsc911x_data *pdata)
 {
-	struct phy_device *phy_dev = pdata->phy_dev;
+	struct net_device *ndev = pdata->dev;
+	struct phy_device *phy_dev = ndev->phydev;
 	u32 afc = smsc911x_reg_read(pdata, AFC_CFG);
 	u32 flow;
 	unsigned long flags;
@@ -945,7 +944,7 @@ static void smsc911x_phy_update_flowcontrol(struct smsc911x_data *pdata)
 static void smsc911x_phy_adjust_link(struct net_device *dev)
 {
 	struct smsc911x_data *pdata = netdev_priv(dev);
-	struct phy_device *phy_dev = pdata->phy_dev;
+	struct phy_device *phy_dev = dev->phydev;
 	unsigned long flags;
 	int carrier;
 
@@ -1038,7 +1037,6 @@ static int smsc911x_mii_probe(struct net_device *dev)
 			      SUPPORTED_Asym_Pause);
 	phydev->advertising = phydev->supported;
 
-	pdata->phy_dev = phydev;
 	pdata->last_duplex = -1;
 	pdata->last_carrier = -1;
 
@@ -1073,7 +1071,6 @@ static int smsc911x_mii_init(struct platform_device *pdev,
 	pdata->mii_bus->priv = pdata;
 	pdata->mii_bus->read = smsc911x_mii_read;
 	pdata->mii_bus->write = smsc911x_mii_write;
-	memcpy(pdata->mii_bus->irq, pdata->phy_irq, sizeof(pdata->mii_bus));
 
 	pdata->mii_bus->parent = &pdev->dev;
 
@@ -1340,9 +1337,11 @@ static void smsc911x_rx_multicast_update_workaround(struct smsc911x_data *pdata)
 
 static int smsc911x_phy_general_power_up(struct smsc911x_data *pdata)
 {
+	struct net_device *ndev = pdata->dev;
+	struct phy_device *phy_dev = ndev->phydev;
 	int rc = 0;
 
-	if (!pdata->phy_dev)
+	if (!phy_dev)
 		return rc;
 
 	/* If the internal PHY is in General Power-Down mode, all, except the
@@ -1352,7 +1351,7 @@ static int smsc911x_phy_general_power_up(struct smsc911x_data *pdata)
 	 * In that case, clear the bit 0.11, so the PHY powers up and we can
 	 * access to the phy registers.
 	 */
-	rc = phy_read(pdata->phy_dev, MII_BMCR);
+	rc = phy_read(phy_dev, MII_BMCR);
 	if (rc < 0) {
 		SMSC_WARN(pdata, drv, "Failed reading PHY control reg");
 		return rc;
@@ -1362,7 +1361,7 @@ static int smsc911x_phy_general_power_up(struct smsc911x_data *pdata)
 	 * disable the general power down-mode.
 	 */
 	if (rc & BMCR_PDOWN) {
-		rc = phy_write(pdata->phy_dev, MII_BMCR, rc & ~BMCR_PDOWN);
+		rc = phy_write(phy_dev, MII_BMCR, rc & ~BMCR_PDOWN);
 		if (rc < 0) {
 			SMSC_WARN(pdata, drv, "Failed writing PHY control reg");
 			return rc;
@@ -1376,12 +1375,14 @@ static int smsc911x_phy_general_power_up(struct smsc911x_data *pdata)
 
 static int smsc911x_phy_disable_energy_detect(struct smsc911x_data *pdata)
 {
+	struct net_device *ndev = pdata->dev;
+	struct phy_device *phy_dev = ndev->phydev;
 	int rc = 0;
 
-	if (!pdata->phy_dev)
+	if (!phy_dev)
 		return rc;
 
-	rc = phy_read(pdata->phy_dev, MII_LAN83C185_CTRL_STATUS);
+	rc = phy_read(phy_dev, MII_LAN83C185_CTRL_STATUS);
 
 	if (rc < 0) {
 		SMSC_WARN(pdata, drv, "Failed reading PHY control reg");
@@ -1391,7 +1392,7 @@ static int smsc911x_phy_disable_energy_detect(struct smsc911x_data *pdata)
 	/* Only disable if energy detect mode is already enabled */
 	if (rc & MII_LAN83C185_EDPWRDOWN) {
 		/* Disable energy detect mode for this SMSC Transceivers */
-		rc = phy_write(pdata->phy_dev, MII_LAN83C185_CTRL_STATUS,
+		rc = phy_write(phy_dev, MII_LAN83C185_CTRL_STATUS,
 			       rc & (~MII_LAN83C185_EDPWRDOWN));
 
 		if (rc < 0) {
@@ -1407,12 +1408,14 @@ static int smsc911x_phy_disable_energy_detect(struct smsc911x_data *pdata)
 
 static int smsc911x_phy_enable_energy_detect(struct smsc911x_data *pdata)
 {
+	struct net_device *ndev = pdata->dev;
+	struct phy_device *phy_dev = ndev->phydev;
 	int rc = 0;
 
-	if (!pdata->phy_dev)
+	if (!phy_dev)
 		return rc;
 
-	rc = phy_read(pdata->phy_dev, MII_LAN83C185_CTRL_STATUS);
+	rc = phy_read(phy_dev, MII_LAN83C185_CTRL_STATUS);
 
 	if (rc < 0) {
 		SMSC_WARN(pdata, drv, "Failed reading PHY control reg");
@@ -1422,7 +1425,7 @@ static int smsc911x_phy_enable_energy_detect(struct smsc911x_data *pdata)
 	/* Only enable if energy detect mode is already disabled */
 	if (!(rc & MII_LAN83C185_EDPWRDOWN)) {
 		/* Enable energy detect mode for this SMSC Transceivers */
-		rc = phy_write(pdata->phy_dev, MII_LAN83C185_CTRL_STATUS,
+		rc = phy_write(phy_dev, MII_LAN83C185_CTRL_STATUS,
 			       rc | MII_LAN83C185_EDPWRDOWN);
 
 		if (rc < 0) {
@@ -1519,7 +1522,7 @@ static int smsc911x_open(struct net_device *dev)
 	unsigned int intcfg;
 
 	/* if the phy is not yet registered, retry later*/
-	if (!pdata->phy_dev) {
+	if (!dev->phydev) {
 		SMSC_WARN(pdata, hw, "phy_dev is NULL");
 		return -EAGAIN;
 	}
@@ -1610,7 +1613,7 @@ static int smsc911x_open(struct net_device *dev)
 	pdata->last_carrier = -1;
 
 	/* Bring the PHY up */
-	phy_start(pdata->phy_dev);
+	phy_start(dev->phydev);
 
 	temp = smsc911x_reg_read(pdata, HW_CFG);
 	/* Preserve TX FIFO size and external PHY configuration */
@@ -1665,8 +1668,8 @@ static int smsc911x_stop(struct net_device *dev)
 	smsc911x_tx_update_txcounters(dev);
 
 	/* Bring the PHY down */
-	if (pdata->phy_dev)
-		phy_stop(pdata->phy_dev);
+	if (dev->phydev)
+		phy_stop(dev->phydev);
 
 	SMSC_TRACE(pdata, ifdown, "Interface stopped");
 	return 0;
@@ -1906,30 +1909,10 @@ static int smsc911x_set_mac_address(struct net_device *dev, void *p)
 /* Standard ioctls for mii-tool */
 static int smsc911x_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
-	struct smsc911x_data *pdata = netdev_priv(dev);
-
-	if (!netif_running(dev) || !pdata->phy_dev)
+	if (!netif_running(dev) || !dev->phydev)
 		return -EINVAL;
 
-	return phy_mii_ioctl(pdata->phy_dev, ifr, cmd);
-}
-
-static int
-smsc911x_ethtool_getsettings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct smsc911x_data *pdata = netdev_priv(dev);
-
-	cmd->maxtxpkt = 1;
-	cmd->maxrxpkt = 1;
-	return phy_ethtool_gset(pdata->phy_dev, cmd);
-}
-
-static int
-smsc911x_ethtool_setsettings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct smsc911x_data *pdata = netdev_priv(dev);
-
-	return phy_ethtool_sset(pdata->phy_dev, cmd);
+	return phy_mii_ioctl(dev->phydev, ifr, cmd);
 }
 
 static void smsc911x_ethtool_getdrvinfo(struct net_device *dev,
@@ -1943,9 +1926,7 @@ static void smsc911x_ethtool_getdrvinfo(struct net_device *dev,
 
 static int smsc911x_ethtool_nwayreset(struct net_device *dev)
 {
-	struct smsc911x_data *pdata = netdev_priv(dev);
-
-	return phy_start_aneg(pdata->phy_dev);
+	return phy_start_aneg(dev->phydev);
 }
 
 static u32 smsc911x_ethtool_getmsglevel(struct net_device *dev)
@@ -1971,7 +1952,7 @@ smsc911x_ethtool_getregs(struct net_device *dev, struct ethtool_regs *regs,
 			 void *buf)
 {
 	struct smsc911x_data *pdata = netdev_priv(dev);
-	struct phy_device *phy_dev = pdata->phy_dev;
+	struct phy_device *phy_dev = dev->phydev;
 	unsigned long flags;
 	unsigned int i;
 	unsigned int j = 0;
@@ -2117,8 +2098,6 @@ static int smsc911x_ethtool_set_eeprom(struct net_device *dev,
 }
 
 static const struct ethtool_ops smsc911x_ethtool_ops = {
-	.get_settings = smsc911x_ethtool_getsettings,
-	.set_settings = smsc911x_ethtool_setsettings,
 	.get_link = ethtool_op_get_link,
 	.get_drvinfo = smsc911x_ethtool_getdrvinfo,
 	.nway_reset = smsc911x_ethtool_nwayreset,
@@ -2130,6 +2109,8 @@ static const struct ethtool_ops smsc911x_ethtool_ops = {
 	.get_eeprom = smsc911x_ethtool_get_eeprom,
 	.set_eeprom = smsc911x_ethtool_set_eeprom,
 	.get_ts_info = ethtool_op_get_ts_info,
+	.get_link_ksettings = phy_ethtool_get_link_ksettings,
+	.set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
 
 static const struct net_device_ops smsc911x_netdev_ops = {
@@ -2310,12 +2291,11 @@ static int smsc911x_drv_remove(struct platform_device *pdev)
 	pdata = netdev_priv(dev);
 	BUG_ON(!pdata);
 	BUG_ON(!pdata->ioaddr);
-	BUG_ON(!pdata->phy_dev);
+	BUG_ON(!dev->phydev);
 
 	SMSC_TRACE(pdata, ifdown, "Stopping driver");
 
-	phy_disconnect(pdata->phy_dev);
-	pdata->phy_dev = NULL;
+	phy_disconnect(dev->phydev);
 	mdiobus_unregister(pdata->mii_bus);
 	mdiobus_free(pdata->mii_bus);
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile
index 0fb362d5a722..44b630cd1755 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Makefile
+++ b/drivers/net/ethernet/stmicro/stmmac/Makefile
@@ -11,11 +11,12 @@ obj-$(CONFIG_DWMAC_IPQ806X)	+= dwmac-ipq806x.o
 obj-$(CONFIG_DWMAC_LPC18XX)	+= dwmac-lpc18xx.o
 obj-$(CONFIG_DWMAC_MESON)	+= dwmac-meson.o
 obj-$(CONFIG_DWMAC_ROCKCHIP)	+= dwmac-rk.o
-obj-$(CONFIG_DWMAC_SOCFPGA)	+= dwmac-socfpga.o
+obj-$(CONFIG_DWMAC_SOCFPGA)	+= dwmac-altr-socfpga.o
 obj-$(CONFIG_DWMAC_STI)		+= dwmac-sti.o
 obj-$(CONFIG_DWMAC_SUNXI)	+= dwmac-sunxi.o
 obj-$(CONFIG_DWMAC_GENERIC)	+= dwmac-generic.o
 stmmac-platform-objs:= stmmac_platform.o
+dwmac-altr-socfpga-objs := altr_tse_pcs.o dwmac-socfpga.o
 
 obj-$(CONFIG_STMMAC_PCI) += stmmac-pci.o
 stmmac-pci-objs:= stmmac_pci.o
diff --git a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c
new file mode 100644
index 000000000000..2920e2ee3864
--- /dev/null
+++ b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c
@@ -0,0 +1,274 @@
+/* Copyright Altera Corporation (C) 2016. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Tien Hock Loh <thloh@altera.com>
+ */
+
+#include <linux/mfd/syscon.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_net.h>
+#include <linux/phy.h>
+#include <linux/regmap.h>
+#include <linux/reset.h>
+#include <linux/stmmac.h>
+
+#include "stmmac.h"
+#include "stmmac_platform.h"
+#include "altr_tse_pcs.h"
+
+#define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_GMII_MII	0
+#define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RGMII		BIT(1)
+#define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RMII		BIT(2)
+#define SYSMGR_EMACGRP_CTRL_PHYSEL_WIDTH		2
+#define SYSMGR_EMACGRP_CTRL_PHYSEL_MASK			GENMASK(1, 0)
+
+#define TSE_PCS_CONTROL_AN_EN_MASK			BIT(12)
+#define TSE_PCS_CONTROL_REG				0x00
+#define TSE_PCS_CONTROL_RESTART_AN_MASK			BIT(9)
+#define TSE_PCS_IF_MODE_REG				0x28
+#define TSE_PCS_LINK_TIMER_0_REG			0x24
+#define TSE_PCS_LINK_TIMER_1_REG			0x26
+#define TSE_PCS_SIZE					0x40
+#define TSE_PCS_STATUS_AN_COMPLETED_MASK		BIT(5)
+#define TSE_PCS_STATUS_LINK_MASK			0x0004
+#define TSE_PCS_STATUS_REG				0x02
+#define TSE_PCS_SGMII_SPEED_1000			BIT(3)
+#define TSE_PCS_SGMII_SPEED_100				BIT(2)
+#define TSE_PCS_SGMII_SPEED_10				0x0
+#define TSE_PCS_SW_RST_MASK				0x8000
+#define TSE_PCS_PARTNER_ABILITY_REG			0x0A
+#define TSE_PCS_PARTNER_DUPLEX_FULL			0x1000
+#define TSE_PCS_PARTNER_DUPLEX_HALF			0x0000
+#define TSE_PCS_PARTNER_DUPLEX_MASK			0x1000
+#define TSE_PCS_PARTNER_SPEED_MASK			GENMASK(11, 10)
+#define TSE_PCS_PARTNER_SPEED_1000			BIT(11)
+#define TSE_PCS_PARTNER_SPEED_100			BIT(10)
+#define TSE_PCS_PARTNER_SPEED_10			0x0000
+#define TSE_PCS_PARTNER_SPEED_1000			BIT(11)
+#define TSE_PCS_PARTNER_SPEED_100			BIT(10)
+#define TSE_PCS_PARTNER_SPEED_10			0x0000
+#define TSE_PCS_SGMII_SPEED_MASK			GENMASK(3, 2)
+#define TSE_PCS_SGMII_LINK_TIMER_0			0x0D40
+#define TSE_PCS_SGMII_LINK_TIMER_1			0x0003
+#define TSE_PCS_SW_RESET_TIMEOUT			100
+#define TSE_PCS_USE_SGMII_AN_MASK			BIT(2)
+#define TSE_PCS_USE_SGMII_ENA				BIT(1)
+
+#define SGMII_ADAPTER_CTRL_REG				0x00
+#define SGMII_ADAPTER_DISABLE				0x0001
+#define SGMII_ADAPTER_ENABLE				0x0000
+
+#define AUTONEGO_LINK_TIMER				20
+
+static int tse_pcs_reset(void __iomem *base, struct tse_pcs *pcs)
+{
+	int counter = 0;
+	u16 val;
+
+	val = readw(base + TSE_PCS_CONTROL_REG);
+	val |= TSE_PCS_SW_RST_MASK;
+	writew(val, base + TSE_PCS_CONTROL_REG);
+
+	while (counter < TSE_PCS_SW_RESET_TIMEOUT) {
+		val = readw(base + TSE_PCS_CONTROL_REG);
+		val &= TSE_PCS_SW_RST_MASK;
+		if (val == 0)
+			break;
+		counter++;
+		udelay(1);
+	}
+	if (counter >= TSE_PCS_SW_RESET_TIMEOUT) {
+		dev_err(pcs->dev, "PCS could not get out of sw reset\n");
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+int tse_pcs_init(void __iomem *base, struct tse_pcs *pcs)
+{
+	int ret = 0;
+
+	writew(TSE_PCS_USE_SGMII_ENA, base + TSE_PCS_IF_MODE_REG);
+
+	writew(TSE_PCS_SGMII_LINK_TIMER_0, base + TSE_PCS_LINK_TIMER_0_REG);
+	writew(TSE_PCS_SGMII_LINK_TIMER_1, base + TSE_PCS_LINK_TIMER_1_REG);
+
+	ret = tse_pcs_reset(base, pcs);
+	if (ret == 0)
+		writew(SGMII_ADAPTER_ENABLE,
+		       pcs->sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG);
+
+	return ret;
+}
+
+static void pcs_link_timer_callback(unsigned long data)
+{
+	u16 val = 0;
+	struct tse_pcs *pcs = (struct tse_pcs *)data;
+	void __iomem *tse_pcs_base = pcs->tse_pcs_base;
+	void __iomem *sgmii_adapter_base = pcs->sgmii_adapter_base;
+
+	val = readw(tse_pcs_base + TSE_PCS_STATUS_REG);
+	val &= TSE_PCS_STATUS_LINK_MASK;
+
+	if (val != 0) {
+		dev_dbg(pcs->dev, "Adapter: Link is established\n");
+		writew(SGMII_ADAPTER_ENABLE,
+		       sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG);
+	} else {
+		mod_timer(&pcs->aneg_link_timer, jiffies +
+			  msecs_to_jiffies(AUTONEGO_LINK_TIMER));
+	}
+}
+
+static void auto_nego_timer_callback(unsigned long data)
+{
+	u16 val = 0;
+	u16 speed = 0;
+	u16 duplex = 0;
+	struct tse_pcs *pcs = (struct tse_pcs *)data;
+	void __iomem *tse_pcs_base = pcs->tse_pcs_base;
+	void __iomem *sgmii_adapter_base = pcs->sgmii_adapter_base;
+
+	val = readw(tse_pcs_base + TSE_PCS_STATUS_REG);
+	val &= TSE_PCS_STATUS_AN_COMPLETED_MASK;
+
+	if (val != 0) {
+		dev_dbg(pcs->dev, "Adapter: Auto Negotiation is completed\n");
+		val = readw(tse_pcs_base + TSE_PCS_PARTNER_ABILITY_REG);
+		speed = val & TSE_PCS_PARTNER_SPEED_MASK;
+		duplex = val & TSE_PCS_PARTNER_DUPLEX_MASK;
+
+		if (speed == TSE_PCS_PARTNER_SPEED_10 &&
+		    duplex == TSE_PCS_PARTNER_DUPLEX_FULL)
+			dev_dbg(pcs->dev,
+				"Adapter: Link Partner is Up - 10/Full\n");
+		else if (speed == TSE_PCS_PARTNER_SPEED_100 &&
+			 duplex == TSE_PCS_PARTNER_DUPLEX_FULL)
+			dev_dbg(pcs->dev,
+				"Adapter: Link Partner is Up - 100/Full\n");
+		else if (speed == TSE_PCS_PARTNER_SPEED_1000 &&
+			 duplex == TSE_PCS_PARTNER_DUPLEX_FULL)
+			dev_dbg(pcs->dev,
+				"Adapter: Link Partner is Up - 1000/Full\n");
+		else if (speed == TSE_PCS_PARTNER_SPEED_10 &&
+			 duplex == TSE_PCS_PARTNER_DUPLEX_HALF)
+			dev_err(pcs->dev,
+				"Adapter does not support Half Duplex\n");
+		else if (speed == TSE_PCS_PARTNER_SPEED_100 &&
+			 duplex == TSE_PCS_PARTNER_DUPLEX_HALF)
+			dev_err(pcs->dev,
+				"Adapter does not support Half Duplex\n");
+		else if (speed == TSE_PCS_PARTNER_SPEED_1000 &&
+			 duplex == TSE_PCS_PARTNER_DUPLEX_HALF)
+			dev_err(pcs->dev,
+				"Adapter does not support Half Duplex\n");
+		else
+			dev_err(pcs->dev,
+				"Adapter: Invalid Partner Speed and Duplex\n");
+
+		if (duplex == TSE_PCS_PARTNER_DUPLEX_FULL &&
+		    (speed == TSE_PCS_PARTNER_SPEED_10 ||
+		     speed == TSE_PCS_PARTNER_SPEED_100 ||
+		     speed == TSE_PCS_PARTNER_SPEED_1000))
+			writew(SGMII_ADAPTER_ENABLE,
+			       sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG);
+	} else {
+		val = readw(tse_pcs_base + TSE_PCS_CONTROL_REG);
+		val |= TSE_PCS_CONTROL_RESTART_AN_MASK;
+		writew(val, tse_pcs_base + TSE_PCS_CONTROL_REG);
+
+		tse_pcs_reset(tse_pcs_base, pcs);
+		mod_timer(&pcs->aneg_link_timer, jiffies +
+			  msecs_to_jiffies(AUTONEGO_LINK_TIMER));
+	}
+}
+
+static void aneg_link_timer_callback(unsigned long data)
+{
+	struct tse_pcs *pcs = (struct tse_pcs *)data;
+
+	if (pcs->autoneg == AUTONEG_ENABLE)
+		auto_nego_timer_callback(data);
+	else if (pcs->autoneg == AUTONEG_DISABLE)
+		pcs_link_timer_callback(data);
+}
+
+void tse_pcs_fix_mac_speed(struct tse_pcs *pcs, struct phy_device *phy_dev,
+			   unsigned int speed)
+{
+	void __iomem *tse_pcs_base = pcs->tse_pcs_base;
+	void __iomem *sgmii_adapter_base = pcs->sgmii_adapter_base;
+	u32 val;
+
+	writew(SGMII_ADAPTER_ENABLE,
+	       sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG);
+
+	pcs->autoneg = phy_dev->autoneg;
+
+	if (phy_dev->autoneg == AUTONEG_ENABLE) {
+		val = readw(tse_pcs_base + TSE_PCS_CONTROL_REG);
+		val |= TSE_PCS_CONTROL_AN_EN_MASK;
+		writew(val, tse_pcs_base + TSE_PCS_CONTROL_REG);
+
+		val = readw(tse_pcs_base + TSE_PCS_IF_MODE_REG);
+		val |= TSE_PCS_USE_SGMII_AN_MASK;
+		writew(val, tse_pcs_base + TSE_PCS_IF_MODE_REG);
+
+		val = readw(tse_pcs_base + TSE_PCS_CONTROL_REG);
+		val |= TSE_PCS_CONTROL_RESTART_AN_MASK;
+
+		tse_pcs_reset(tse_pcs_base, pcs);
+
+		setup_timer(&pcs->aneg_link_timer,
+			    aneg_link_timer_callback, (unsigned long)pcs);
+		mod_timer(&pcs->aneg_link_timer, jiffies +
+			  msecs_to_jiffies(AUTONEGO_LINK_TIMER));
+	} else if (phy_dev->autoneg == AUTONEG_DISABLE) {
+		val = readw(tse_pcs_base + TSE_PCS_CONTROL_REG);
+		val &= ~TSE_PCS_CONTROL_AN_EN_MASK;
+		writew(val, tse_pcs_base + TSE_PCS_CONTROL_REG);
+
+		val = readw(tse_pcs_base + TSE_PCS_IF_MODE_REG);
+		val &= ~TSE_PCS_USE_SGMII_AN_MASK;
+		writew(val, tse_pcs_base + TSE_PCS_IF_MODE_REG);
+
+		val = readw(tse_pcs_base + TSE_PCS_IF_MODE_REG);
+		val &= ~TSE_PCS_SGMII_SPEED_MASK;
+
+		switch (speed) {
+		case 1000:
+			val |= TSE_PCS_SGMII_SPEED_1000;
+			break;
+		case 100:
+			val |= TSE_PCS_SGMII_SPEED_100;
+			break;
+		case 10:
+			val |= TSE_PCS_SGMII_SPEED_10;
+			break;
+		default:
+			return;
+		}
+		writew(val, tse_pcs_base + TSE_PCS_IF_MODE_REG);
+
+		tse_pcs_reset(tse_pcs_base, pcs);
+
+		setup_timer(&pcs->aneg_link_timer,
+			    aneg_link_timer_callback, (unsigned long)pcs);
+		mod_timer(&pcs->aneg_link_timer, jiffies +
+			  msecs_to_jiffies(AUTONEGO_LINK_TIMER));
+	}
+}
diff --git a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h
new file mode 100644
index 000000000000..2f5882450b06
--- /dev/null
+++ b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h
@@ -0,0 +1,36 @@
+/* Copyright Altera Corporation (C) 2016. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Tien Hock Loh <thloh@altera.com>
+ */
+
+#ifndef __TSE_PCS_H__
+#define __TSE_PCS_H__
+
+#include <linux/phy.h>
+#include <linux/timer.h>
+
+struct tse_pcs {
+	struct device *dev;
+	void __iomem *tse_pcs_base;
+	void __iomem *sgmii_adapter_base;
+	struct timer_list aneg_link_timer;
+	int autoneg;
+};
+
+int tse_pcs_init(void __iomem *base, struct tse_pcs *pcs);
+void tse_pcs_fix_mac_speed(struct tse_pcs *pcs, struct phy_device *phy_dev,
+			   unsigned int speed);
+
+#endif /* __TSE_PCS_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
index f13499fa1f58..edd20c3b2b3d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
@@ -27,6 +27,11 @@
 #include "stmmac.h"
 #include "stmmac_platform.h"
 
+#include "altr_tse_pcs.h"
+
+#define SGMII_ADAPTER_CTRL_REG                          0x00
+#define SGMII_ADAPTER_DISABLE                           0x0001
+
 #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_GMII_MII 0x0
 #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RGMII 0x1
 #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RMII 0x2
@@ -52,35 +57,46 @@ struct socfpga_dwmac {
 	struct reset_control *stmmac_rst;
 	void __iomem *splitter_base;
 	bool f2h_ptp_ref_clk;
+	struct tse_pcs pcs;
 };
 
 static void socfpga_dwmac_fix_mac_speed(void *priv, unsigned int speed)
 {
 	struct socfpga_dwmac *dwmac = (struct socfpga_dwmac *)priv;
 	void __iomem *splitter_base = dwmac->splitter_base;
+	void __iomem *tse_pcs_base = dwmac->pcs.tse_pcs_base;
+	void __iomem *sgmii_adapter_base = dwmac->pcs.sgmii_adapter_base;
+	struct device *dev = dwmac->dev;
+	struct net_device *ndev = dev_get_drvdata(dev);
+	struct phy_device *phy_dev = ndev->phydev;
 	u32 val;
 
-	if (!splitter_base)
-		return;
-
-	val = readl(splitter_base + EMAC_SPLITTER_CTRL_REG);
-	val &= ~EMAC_SPLITTER_CTRL_SPEED_MASK;
-
-	switch (speed) {
-	case 1000:
-		val |= EMAC_SPLITTER_CTRL_SPEED_1000;
-		break;
-	case 100:
-		val |= EMAC_SPLITTER_CTRL_SPEED_100;
-		break;
-	case 10:
-		val |= EMAC_SPLITTER_CTRL_SPEED_10;
-		break;
-	default:
-		return;
+	if ((tse_pcs_base) && (sgmii_adapter_base))
+		writew(SGMII_ADAPTER_DISABLE,
+		       sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG);
+
+	if (splitter_base) {
+		val = readl(splitter_base + EMAC_SPLITTER_CTRL_REG);
+		val &= ~EMAC_SPLITTER_CTRL_SPEED_MASK;
+
+		switch (speed) {
+		case 1000:
+			val |= EMAC_SPLITTER_CTRL_SPEED_1000;
+			break;
+		case 100:
+			val |= EMAC_SPLITTER_CTRL_SPEED_100;
+			break;
+		case 10:
+			val |= EMAC_SPLITTER_CTRL_SPEED_10;
+			break;
+		default:
+			return;
+		}
+		writel(val, splitter_base + EMAC_SPLITTER_CTRL_REG);
 	}
 
-	writel(val, splitter_base + EMAC_SPLITTER_CTRL_REG);
+	if (tse_pcs_base && sgmii_adapter_base)
+		tse_pcs_fix_mac_speed(&dwmac->pcs, phy_dev, speed);
 }
 
 static int socfpga_dwmac_parse_data(struct socfpga_dwmac *dwmac, struct device *dev)
@@ -88,9 +104,12 @@ static int socfpga_dwmac_parse_data(struct socfpga_dwmac *dwmac, struct device *
 	struct device_node *np = dev->of_node;
 	struct regmap *sys_mgr_base_addr;
 	u32 reg_offset, reg_shift;
-	int ret;
-	struct device_node *np_splitter;
+	int ret, index;
+	struct device_node *np_splitter = NULL;
+	struct device_node *np_sgmii_adapter = NULL;
 	struct resource res_splitter;
+	struct resource res_tse_pcs;
+	struct resource res_sgmii_adapter;
 
 	dwmac->interface = of_get_phy_mode(np);
 
@@ -128,6 +147,66 @@ static int socfpga_dwmac_parse_data(struct socfpga_dwmac *dwmac, struct device *
 		}
 	}
 
+	np_sgmii_adapter = of_parse_phandle(np,
+					    "altr,gmii-to-sgmii-converter", 0);
+	if (np_sgmii_adapter) {
+		index = of_property_match_string(np_sgmii_adapter, "reg-names",
+						 "hps_emac_interface_splitter_avalon_slave");
+
+		if (index >= 0) {
+			if (of_address_to_resource(np_sgmii_adapter, index,
+						   &res_splitter)) {
+				dev_err(dev,
+					"%s: ERROR: missing emac splitter address\n",
+					__func__);
+				return -EINVAL;
+			}
+
+			dwmac->splitter_base =
+			    devm_ioremap_resource(dev, &res_splitter);
+
+			if (IS_ERR(dwmac->splitter_base))
+				return PTR_ERR(dwmac->splitter_base);
+		}
+
+		index = of_property_match_string(np_sgmii_adapter, "reg-names",
+						 "gmii_to_sgmii_adapter_avalon_slave");
+
+		if (index >= 0) {
+			if (of_address_to_resource(np_sgmii_adapter, index,
+						   &res_sgmii_adapter)) {
+				dev_err(dev,
+					"%s: ERROR: failed mapping adapter\n",
+					__func__);
+				return -EINVAL;
+			}
+
+			dwmac->pcs.sgmii_adapter_base =
+			    devm_ioremap_resource(dev, &res_sgmii_adapter);
+
+			if (IS_ERR(dwmac->pcs.sgmii_adapter_base))
+				return PTR_ERR(dwmac->pcs.sgmii_adapter_base);
+		}
+
+		index = of_property_match_string(np_sgmii_adapter, "reg-names",
+						 "eth_tse_control_port");
+
+		if (index >= 0) {
+			if (of_address_to_resource(np_sgmii_adapter, index,
+						   &res_tse_pcs)) {
+				dev_err(dev,
+					"%s: ERROR: failed mapping tse control port\n",
+					__func__);
+				return -EINVAL;
+			}
+
+			dwmac->pcs.tse_pcs_base =
+			    devm_ioremap_resource(dev, &res_tse_pcs);
+
+			if (IS_ERR(dwmac->pcs.tse_pcs_base))
+				return PTR_ERR(dwmac->pcs.tse_pcs_base);
+		}
+	}
 	dwmac->reg_offset = reg_offset;
 	dwmac->reg_shift = reg_shift;
 	dwmac->sys_mgr_base_addr = sys_mgr_base_addr;
@@ -151,6 +230,7 @@ static int socfpga_dwmac_set_phy_mode(struct socfpga_dwmac *dwmac)
 		break;
 	case PHY_INTERFACE_MODE_MII:
 	case PHY_INTERFACE_MODE_GMII:
+	case PHY_INTERFACE_MODE_SGMII:
 		val = SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_GMII_MII;
 		break;
 	default:
@@ -191,6 +271,12 @@ static int socfpga_dwmac_set_phy_mode(struct socfpga_dwmac *dwmac)
 	 */
 	if (dwmac->stmmac_rst)
 		reset_control_deassert(dwmac->stmmac_rst);
+	if (phymode == PHY_INTERFACE_MODE_SGMII) {
+		if (tse_pcs_init(dwmac->pcs.tse_pcs_base, &dwmac->pcs) != 0) {
+			dev_err(dwmac->dev, "Unable to initialize TSE PCS");
+			return -EINVAL;
+		}
+	}
 
 	return 0;
 }
@@ -225,6 +311,7 @@ static int socfpga_dwmac_probe(struct platform_device *pdev)
 	plat_dat->fix_mac_speed = socfpga_dwmac_fix_mac_speed;
 
 	ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
+
 	if (!ret) {
 		struct net_device *ndev = platform_get_drvdata(pdev);
 		struct stmmac_priv *stpriv = netdev_priv(ndev);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index aab777c1ba33..c23ccabc2d8a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2819,7 +2819,7 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id)
 				priv->tx_path_in_lpi_mode = true;
 			if (status & CORE_IRQ_TX_PATH_EXIT_LPI_MODE)
 				priv->tx_path_in_lpi_mode = false;
-			if (status & CORE_IRQ_MTL_RX_OVERFLOW)
+			if (status & CORE_IRQ_MTL_RX_OVERFLOW && priv->hw->dma->set_rx_tail_ptr)
 				priv->hw->dma->set_rx_tail_ptr(priv->ioaddr,
 							priv->rx_tail_addr,
 							STMMAC_CHAN0);
diff --git a/drivers/net/ethernet/synopsys/dwc_eth_qos.c b/drivers/net/ethernet/synopsys/dwc_eth_qos.c
index c14fa91c825f..fc1ea8083f1f 100644
--- a/drivers/net/ethernet/synopsys/dwc_eth_qos.c
+++ b/drivers/net/ethernet/synopsys/dwc_eth_qos.c
@@ -46,7 +46,6 @@
 #include <linux/delay.h>
 #include <linux/dma-mapping.h>
 #include <linux/vmalloc.h>
-#include <linux/version.h>
 
 #include <linux/device.h>
 #include <linux/bitrev.h>
@@ -2878,7 +2877,7 @@ static int dwceqos_probe(struct platform_device *pdev)
 		ret = of_phy_register_fixed_link(lp->pdev->dev.of_node);
 		if (ret < 0) {
 			dev_err(&pdev->dev, "invalid fixed-link");
-			goto err_out_unregister_netdev;
+			goto err_out_unregister_clk_notifier;
 		}
 
 		lp->phy_node = of_node_get(lp->pdev->dev.of_node);
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 6d0c5a0c34c6..1a93a1f28433 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -2556,8 +2556,6 @@ static int cpsw_probe(struct platform_device *pdev)
 clean_ale_ret:
 	cpsw_ale_destroy(priv->ale);
 clean_dma_ret:
-	cpdma_chan_destroy(priv->txch);
-	cpdma_chan_destroy(priv->rxch);
 	cpdma_ctlr_destroy(priv->dma);
 clean_runtime_disable_ret:
 	pm_runtime_disable(&pdev->dev);
@@ -2585,8 +2583,6 @@ static int cpsw_remove(struct platform_device *pdev)
 	unregister_netdev(ndev);
 
 	cpsw_ale_destroy(priv->ale);
-	cpdma_chan_destroy(priv->txch);
-	cpdma_chan_destroy(priv->rxch);
 	cpdma_ctlr_destroy(priv->dma);
 	pm_runtime_disable(&pdev->dev);
 	device_for_each_child(&pdev->dev, NULL, cpsw_remove_child_device);
diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c
index f56d66e6ec15..c6c54659f8d4 100644
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c
@@ -348,7 +348,6 @@ struct emac_priv {
 	u32 rx_addr_type;
 	const char *phy_id;
 	struct device_node *phy_node;
-	struct phy_device *phydev;
 	spinlock_t lock;
 	/*platform specific members*/
 	void (*int_enable) (void);
@@ -486,43 +485,6 @@ static void emac_get_drvinfo(struct net_device *ndev,
 }
 
 /**
- * emac_get_settings - Get EMAC settings
- * @ndev: The DaVinci EMAC network adapter
- * @ecmd: ethtool command
- *
- * Executes ethool get command
- *
- */
-static int emac_get_settings(struct net_device *ndev,
-			     struct ethtool_cmd *ecmd)
-{
-	struct emac_priv *priv = netdev_priv(ndev);
-	if (priv->phydev)
-		return phy_ethtool_gset(priv->phydev, ecmd);
-	else
-		return -EOPNOTSUPP;
-
-}
-
-/**
- * emac_set_settings - Set EMAC settings
- * @ndev: The DaVinci EMAC network adapter
- * @ecmd: ethtool command
- *
- * Executes ethool set command
- *
- */
-static int emac_set_settings(struct net_device *ndev, struct ethtool_cmd *ecmd)
-{
-	struct emac_priv *priv = netdev_priv(ndev);
-	if (priv->phydev)
-		return phy_ethtool_sset(priv->phydev, ecmd);
-	else
-		return -EOPNOTSUPP;
-
-}
-
-/**
  * emac_get_coalesce - Get interrupt coalesce settings for this device
  * @ndev : The DaVinci EMAC network adapter
  * @coal : ethtool coalesce settings structure
@@ -625,12 +587,12 @@ static int emac_set_coalesce(struct net_device *ndev,
  */
 static const struct ethtool_ops ethtool_ops = {
 	.get_drvinfo = emac_get_drvinfo,
-	.get_settings = emac_get_settings,
-	.set_settings = emac_set_settings,
 	.get_link = ethtool_op_get_link,
 	.get_coalesce = emac_get_coalesce,
 	.set_coalesce =  emac_set_coalesce,
 	.get_ts_info = ethtool_op_get_ts_info,
+	.get_link_ksettings = phy_ethtool_get_link_ksettings,
+	.set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
 
 /**
@@ -651,8 +613,8 @@ static void emac_update_phystatus(struct emac_priv *priv)
 	mac_control = emac_read(EMAC_MACCONTROL);
 	cur_duplex = (mac_control & EMAC_MACCONTROL_FULLDUPLEXEN) ?
 			DUPLEX_FULL : DUPLEX_HALF;
-	if (priv->phydev)
-		new_duplex = priv->phydev->duplex;
+	if (ndev->phydev)
+		new_duplex = ndev->phydev->duplex;
 	else
 		new_duplex = DUPLEX_FULL;
 
@@ -1454,7 +1416,7 @@ static void emac_poll_controller(struct net_device *ndev)
 static void emac_adjust_link(struct net_device *ndev)
 {
 	struct emac_priv *priv = netdev_priv(ndev);
-	struct phy_device *phydev = priv->phydev;
+	struct phy_device *phydev = ndev->phydev;
 	unsigned long flags;
 	int new_state = 0;
 
@@ -1483,7 +1445,7 @@ static void emac_adjust_link(struct net_device *ndev)
 	}
 	if (new_state) {
 		emac_update_phystatus(priv);
-		phy_print_status(priv->phydev);
+		phy_print_status(ndev->phydev);
 	}
 
 	spin_unlock_irqrestore(&priv->lock, flags);
@@ -1505,15 +1467,13 @@ static void emac_adjust_link(struct net_device *ndev)
  */
 static int emac_devioctl(struct net_device *ndev, struct ifreq *ifrq, int cmd)
 {
-	struct emac_priv *priv = netdev_priv(ndev);
-
 	if (!(netif_running(ndev)))
 		return -EINVAL;
 
 	/* TODO: Add phy read and write and private statistics get feature */
 
-	if (priv->phydev)
-		return phy_mii_ioctl(priv->phydev, ifrq, cmd);
+	if (ndev->phydev)
+		return phy_mii_ioctl(ndev->phydev, ifrq, cmd);
 	else
 		return -EOPNOTSUPP;
 }
@@ -1542,6 +1502,7 @@ static int emac_dev_open(struct net_device *ndev)
 	int res_num = 0, irq_num = 0;
 	int i = 0;
 	struct emac_priv *priv = netdev_priv(ndev);
+	struct phy_device *phydev = NULL;
 
 	ret = pm_runtime_get_sync(&priv->pdev->dev);
 	if (ret < 0) {
@@ -1607,12 +1568,10 @@ static int emac_dev_open(struct net_device *ndev)
 
 	cpdma_ctlr_start(priv->dma);
 
-	priv->phydev = NULL;
-
 	if (priv->phy_node) {
-		priv->phydev = of_phy_connect(ndev, priv->phy_node,
-					      &emac_adjust_link, 0, 0);
-		if (!priv->phydev) {
+		phydev = of_phy_connect(ndev, priv->phy_node,
+					&emac_adjust_link, 0, 0);
+		if (!phydev) {
 			dev_err(emac_dev, "could not connect to phy %s\n",
 				priv->phy_node->full_name);
 			ret = -ENODEV;
@@ -1621,7 +1580,7 @@ static int emac_dev_open(struct net_device *ndev)
 	}
 
 	/* use the first phy on the bus if pdata did not give us a phy id */
-	if (!priv->phydev && !priv->phy_id) {
+	if (!phydev && !priv->phy_id) {
 		struct device *phy;
 
 		phy = bus_find_device(&mdio_bus_type, NULL, NULL,
@@ -1630,16 +1589,15 @@ static int emac_dev_open(struct net_device *ndev)
 			priv->phy_id = dev_name(phy);
 	}
 
-	if (!priv->phydev && priv->phy_id && *priv->phy_id) {
-		priv->phydev = phy_connect(ndev, priv->phy_id,
-					   &emac_adjust_link,
-					   PHY_INTERFACE_MODE_MII);
+	if (!phydev && priv->phy_id && *priv->phy_id) {
+		phydev = phy_connect(ndev, priv->phy_id,
+				     &emac_adjust_link,
+				     PHY_INTERFACE_MODE_MII);
 
-		if (IS_ERR(priv->phydev)) {
+		if (IS_ERR(phydev)) {
 			dev_err(emac_dev, "could not connect to phy %s\n",
 				priv->phy_id);
-			ret = PTR_ERR(priv->phydev);
-			priv->phydev = NULL;
+			ret = PTR_ERR(phydev);
 			goto err;
 		}
 
@@ -1647,10 +1605,10 @@ static int emac_dev_open(struct net_device *ndev)
 		priv->speed = 0;
 		priv->duplex = ~0;
 
-		phy_attached_info(priv->phydev);
+		phy_attached_info(phydev);
 	}
 
-	if (!priv->phydev) {
+	if (!phydev) {
 		/* No PHY , fix the link, speed and duplex settings */
 		dev_notice(emac_dev, "no phy, defaulting to 100/full\n");
 		priv->link = 1;
@@ -1665,8 +1623,8 @@ static int emac_dev_open(struct net_device *ndev)
 	if (netif_msg_drv(priv))
 		dev_notice(emac_dev, "DaVinci EMAC: Opened %s\n", ndev->name);
 
-	if (priv->phydev)
-		phy_start(priv->phydev);
+	if (phydev)
+		phy_start(phydev);
 
 	return 0;
 
@@ -1717,8 +1675,8 @@ static int emac_dev_stop(struct net_device *ndev)
 	cpdma_ctlr_stop(priv->dma);
 	emac_write(EMAC_SOFTRESET, 1);
 
-	if (priv->phydev)
-		phy_disconnect(priv->phydev);
+	if (ndev->phydev)
+		phy_disconnect(ndev->phydev);
 
 	/* Free IRQ */
 	while ((res = platform_get_resource(priv->pdev, IORESOURCE_IRQ, i))) {
diff --git a/drivers/net/ethernet/tile/tilegx.c b/drivers/net/ethernet/tile/tilegx.c
index 0a15acc075b3..11213a38c795 100644
--- a/drivers/net/ethernet/tile/tilegx.c
+++ b/drivers/net/ethernet/tile/tilegx.c
@@ -462,7 +462,7 @@ static void tile_tx_timestamp(struct sk_buff *skb, int instance)
 	if (unlikely((shtx->tx_flags & SKBTX_HW_TSTAMP) != 0)) {
 		struct mpipe_data *md = &mpipe_data[instance];
 		struct skb_shared_hwtstamps shhwtstamps;
-		struct timespec ts;
+		struct timespec64 ts;
 
 		shtx->tx_flags |= SKBTX_IN_PROGRESS;
 		gxio_mpipe_get_timestamp(&md->context, &ts);
@@ -886,9 +886,9 @@ static struct ptp_clock_info ptp_mpipe_caps = {
 /* Sync mPIPE's timestamp up with Linux system time and register PTP clock. */
 static void register_ptp_clock(struct net_device *dev, struct mpipe_data *md)
 {
-	struct timespec ts;
+	struct timespec64 ts;
 
-	getnstimeofday(&ts);
+	ktime_get_ts64(&ts);
 	gxio_mpipe_set_timestamp(&md->context, &ts);
 
 	mutex_init(&md->ptp_lock);
diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c
index 5138407941cf..7f127dc1b7ba 100644
--- a/drivers/net/ethernet/xscale/ixp4xx_eth.c
+++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c
@@ -171,7 +171,6 @@ struct port {
 	struct npe *npe;
 	struct net_device *netdev;
 	struct napi_struct napi;
-	struct phy_device *phydev;
 	struct eth_plat_info *plat;
 	buffer_t *rx_buff_tab[RX_DESCS], *tx_buff_tab[TX_DESCS];
 	struct desc *desc_tab;	/* coherent */
@@ -562,7 +561,7 @@ static void ixp4xx_mdio_remove(void)
 static void ixp4xx_adjust_link(struct net_device *dev)
 {
 	struct port *port = netdev_priv(dev);
-	struct phy_device *phydev = port->phydev;
+	struct phy_device *phydev = dev->phydev;
 
 	if (!phydev->link) {
 		if (port->speed) {
@@ -976,8 +975,6 @@ static void eth_set_mcast_list(struct net_device *dev)
 
 static int eth_ioctl(struct net_device *dev, struct ifreq *req, int cmd)
 {
-	struct port *port = netdev_priv(dev);
-
 	if (!netif_running(dev))
 		return -EINVAL;
 
@@ -988,7 +985,7 @@ static int eth_ioctl(struct net_device *dev, struct ifreq *req, int cmd)
 			return hwtstamp_get(dev, req);
 	}
 
-	return phy_mii_ioctl(port->phydev, req, cmd);
+	return phy_mii_ioctl(dev->phydev, req, cmd);
 }
 
 /* ethtool support */
@@ -1005,22 +1002,9 @@ static void ixp4xx_get_drvinfo(struct net_device *dev,
 	strlcpy(info->bus_info, "internal", sizeof(info->bus_info));
 }
 
-static int ixp4xx_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct port *port = netdev_priv(dev);
-	return phy_ethtool_gset(port->phydev, cmd);
-}
-
-static int ixp4xx_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct port *port = netdev_priv(dev);
-	return phy_ethtool_sset(port->phydev, cmd);
-}
-
 static int ixp4xx_nway_reset(struct net_device *dev)
 {
-	struct port *port = netdev_priv(dev);
-	return phy_start_aneg(port->phydev);
+	return phy_start_aneg(dev->phydev);
 }
 
 int ixp46x_phc_index = -1;
@@ -1054,11 +1038,11 @@ static int ixp4xx_get_ts_info(struct net_device *dev,
 
 static const struct ethtool_ops ixp4xx_ethtool_ops = {
 	.get_drvinfo = ixp4xx_get_drvinfo,
-	.get_settings = ixp4xx_get_settings,
-	.set_settings = ixp4xx_set_settings,
 	.nway_reset = ixp4xx_nway_reset,
 	.get_link = ethtool_op_get_link,
 	.get_ts_info = ixp4xx_get_ts_info,
+	.get_link_ksettings = phy_ethtool_get_link_ksettings,
+	.set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
 
 
@@ -1259,7 +1243,7 @@ static int eth_open(struct net_device *dev)
 	}
 
 	port->speed = 0;	/* force "link up" message */
-	phy_start(port->phydev);
+	phy_start(dev->phydev);
 
 	for (i = 0; i < ETH_ALEN; i++)
 		__raw_writel(dev->dev_addr[i], &port->regs->hw_addr[i]);
@@ -1380,7 +1364,7 @@ static int eth_close(struct net_device *dev)
 		printk(KERN_CRIT "%s: unable to disable loopback\n",
 		       dev->name);
 
-	phy_stop(port->phydev);
+	phy_stop(dev->phydev);
 
 	if (!ports_open)
 		qmgr_disable_irq(TXDONE_QUEUE);
@@ -1405,6 +1389,7 @@ static int eth_init_one(struct platform_device *pdev)
 	struct port *port;
 	struct net_device *dev;
 	struct eth_plat_info *plat = dev_get_platdata(&pdev->dev);
+	struct phy_device *phydev = NULL;
 	u32 regs_phys;
 	char phy_id[MII_BUS_ID_SIZE + 3];
 	int err;
@@ -1466,14 +1451,14 @@ static int eth_init_one(struct platform_device *pdev)
 
 	snprintf(phy_id, MII_BUS_ID_SIZE + 3, PHY_ID_FMT,
 		mdio_bus->id, plat->phy);
-	port->phydev = phy_connect(dev, phy_id, &ixp4xx_adjust_link,
-				   PHY_INTERFACE_MODE_MII);
-	if (IS_ERR(port->phydev)) {
-		err = PTR_ERR(port->phydev);
+	phydev = phy_connect(dev, phy_id, &ixp4xx_adjust_link,
+			     PHY_INTERFACE_MODE_MII);
+	if (IS_ERR(phydev)) {
+		err = PTR_ERR(phydev);
 		goto err_free_mem;
 	}
 
-	port->phydev->irq = PHY_POLL;
+	phydev->irq = PHY_POLL;
 
 	if ((err = register_netdev(dev)))
 		goto err_phy_dis;
@@ -1484,7 +1469,7 @@ static int eth_init_one(struct platform_device *pdev)
 	return 0;
 
 err_phy_dis:
-	phy_disconnect(port->phydev);
+	phy_disconnect(phydev);
 err_free_mem:
 	npe_port_tab[NPE_ID(port->id)] = NULL;
 	release_resource(port->mem_res);
@@ -1498,10 +1483,11 @@ err_free:
 static int eth_remove_one(struct platform_device *pdev)
 {
 	struct net_device *dev = platform_get_drvdata(pdev);
+	struct phy_device *phydev = dev->phydev;
 	struct port *port = netdev_priv(dev);
 
 	unregister_netdev(dev);
-	phy_disconnect(port->phydev);
+	phy_disconnect(phydev);
 	npe_port_tab[NPE_ID(port->id)] = NULL;
 	npe_release(port->npe);
 	release_resource(port->mem_res);
diff --git a/drivers/net/fddi/skfp/Makefile b/drivers/net/fddi/skfp/Makefile
index b0be0234abf6..a957a1c7e5ba 100644
--- a/drivers/net/fddi/skfp/Makefile
+++ b/drivers/net/fddi/skfp/Makefile
@@ -17,4 +17,4 @@ skfp-objs :=  skfddi.o    hwmtm.o    fplustm.o  smt.o      cfm.o     \
 #   projects. To keep the source common for all those drivers (and
 #   thus simplify fixes to it), please do not clean it up!
 
-ccflags-y := -Idrivers/net/skfp -DPCI -DMEM_MAPPED_IO -Wno-strict-prototypes
+ccflags-y := -DPCI -DMEM_MAPPED_IO -Wno-strict-prototypes
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index aa61708bea69..3c20e87bb761 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -922,8 +922,8 @@ tx_error:
 		dev->stats.collisions++;
 	else if (err == -ENETUNREACH)
 		dev->stats.tx_carrier_errors++;
-	else
-		dev->stats.tx_errors++;
+
+	dev->stats.tx_errors++;
 	return NETDEV_TX_OK;
 }
 
@@ -1012,8 +1012,8 @@ tx_error:
 		dev->stats.collisions++;
 	else if (err == -ENETUNREACH)
 		dev->stats.tx_carrier_errors++;
-	else
-		dev->stats.tx_errors++;
+
+	dev->stats.tx_errors++;
 	return NETDEV_TX_OK;
 }
 #endif
@@ -1036,12 +1036,17 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
 
 static int __geneve_change_mtu(struct net_device *dev, int new_mtu, bool strict)
 {
+	struct geneve_dev *geneve = netdev_priv(dev);
 	/* The max_mtu calculation does not take account of GENEVE
 	 * options, to avoid excluding potentially valid
 	 * configurations.
 	 */
-	int max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - sizeof(struct iphdr)
-		- dev->hard_header_len;
+	int max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - dev->hard_header_len;
+
+	if (geneve->remote.sa.sa_family == AF_INET6)
+		max_mtu -= sizeof(struct ipv6hdr);
+	else
+		max_mtu -= sizeof(struct iphdr);
 
 	if (new_mtu < 68)
 		return -EINVAL;
@@ -1129,9 +1134,9 @@ static struct device_type geneve_type = {
 	.name = "geneve",
 };
 
-/* Calls the ndo_add_udp_enc_port of the caller in order to
+/* Calls the ndo_udp_tunnel_add of the caller in order to
  * supply the listening GENEVE udp ports. Callers are expected
- * to implement the ndo_add_udp_enc_port.
+ * to implement the ndo_udp_tunnel_add.
  */
 static void geneve_push_rx_ports(struct net_device *dev)
 {
@@ -1463,6 +1468,7 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
 {
 	struct nlattr *tb[IFLA_MAX + 1];
 	struct net_device *dev;
+	LIST_HEAD(list_kill);
 	int err;
 
 	memset(tb, 0, sizeof(tb));
@@ -1474,8 +1480,10 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
 	err = geneve_configure(net, dev, &geneve_remote_unspec,
 			       0, 0, 0, 0, htons(dst_port), true,
 			       GENEVE_F_UDP_ZERO_CSUM6_RX);
-	if (err)
-		goto err;
+	if (err) {
+		free_netdev(dev);
+		return ERR_PTR(err);
+	}
 
 	/* openvswitch users expect packet sizes to be unrestricted,
 	 * so set the largest MTU we can.
@@ -1484,10 +1492,15 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
 	if (err)
 		goto err;
 
+	err = rtnl_configure_link(dev, NULL);
+	if (err < 0)
+		goto err;
+
 	return dev;
 
  err:
-	free_netdev(dev);
+	geneve_dellink(dev, &list_kill);
+	unregister_netdevice_many(&list_kill);
 	return ERR_PTR(err);
 }
 EXPORT_SYMBOL_GPL(geneve_dev_create_fb);
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 6909c322de4e..20e09174ff62 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -1128,6 +1128,39 @@ static inline void netvsc_receive_inband(struct hv_device *hdev,
 	}
 }
 
+static void netvsc_process_raw_pkt(struct hv_device *device,
+				   struct vmbus_channel *channel,
+				   struct netvsc_device *net_device,
+				   struct net_device *ndev,
+				   u64 request_id,
+				   struct vmpacket_descriptor *desc)
+{
+	struct nvsp_message *nvmsg;
+
+	nvmsg = (struct nvsp_message *)((unsigned long)
+		desc + (desc->offset8 << 3));
+
+	switch (desc->type) {
+	case VM_PKT_COMP:
+		netvsc_send_completion(net_device, channel, device, desc);
+		break;
+
+	case VM_PKT_DATA_USING_XFER_PAGES:
+		netvsc_receive(net_device, channel, device, desc);
+		break;
+
+	case VM_PKT_DATA_INBAND:
+		netvsc_receive_inband(device, net_device, nvmsg);
+		break;
+
+	default:
+		netdev_err(ndev, "unhandled packet type %d, tid %llx\n",
+			   desc->type, request_id);
+		break;
+	}
+}
+
+
 void netvsc_channel_cb(void *context)
 {
 	int ret;
@@ -1140,7 +1173,7 @@ void netvsc_channel_cb(void *context)
 	unsigned char *buffer;
 	int bufferlen = NETVSC_PACKET_SIZE;
 	struct net_device *ndev;
-	struct nvsp_message *nvmsg;
+	bool need_to_commit = false;
 
 	if (channel->primary_channel != NULL)
 		device = channel->primary_channel->device_obj;
@@ -1154,39 +1187,36 @@ void netvsc_channel_cb(void *context)
 	buffer = get_per_channel_state(channel);
 
 	do {
+		desc = get_next_pkt_raw(channel);
+		if (desc != NULL) {
+			netvsc_process_raw_pkt(device,
+					       channel,
+					       net_device,
+					       ndev,
+					       desc->trans_id,
+					       desc);
+
+			put_pkt_raw(channel, desc);
+			need_to_commit = true;
+			continue;
+		}
+		if (need_to_commit) {
+			need_to_commit = false;
+			commit_rd_index(channel);
+		}
+
 		ret = vmbus_recvpacket_raw(channel, buffer, bufferlen,
 					   &bytes_recvd, &request_id);
 		if (ret == 0) {
 			if (bytes_recvd > 0) {
 				desc = (struct vmpacket_descriptor *)buffer;
-				nvmsg = (struct nvsp_message *)((unsigned long)
-					 desc + (desc->offset8 << 3));
-				switch (desc->type) {
-				case VM_PKT_COMP:
-					netvsc_send_completion(net_device,
-								channel,
-								device, desc);
-					break;
-
-				case VM_PKT_DATA_USING_XFER_PAGES:
-					netvsc_receive(net_device, channel,
-						       device, desc);
-					break;
-
-				case VM_PKT_DATA_INBAND:
-					netvsc_receive_inband(device,
-							      net_device,
-							      nvmsg);
-					break;
-
-				default:
-					netdev_err(ndev,
-						   "unhandled packet type %d, "
-						   "tid %llx len %d\n",
-						   desc->type, request_id,
-						   bytes_recvd);
-					break;
-				}
+				netvsc_process_raw_pkt(device,
+						       channel,
+						       net_device,
+						       ndev,
+						       request_id,
+						       desc);
+
 
 			} else {
 				/*
diff --git a/drivers/net/ieee802154/atusb.c b/drivers/net/ieee802154/atusb.c
index 52c9051f3b95..1056ed142411 100644
--- a/drivers/net/ieee802154/atusb.c
+++ b/drivers/net/ieee802154/atusb.c
@@ -366,11 +366,7 @@ static int atusb_channel(struct ieee802154_hw *hw, u8 page, u8 channel)
 	struct atusb *atusb = hw->priv;
 	int ret;
 
-	/* This implicitly sets the CCA (Clear Channel Assessment) mode to 0,
-	 * "Mode 3a, Carrier sense OR energy above threshold".
-	 * We should probably make this configurable. @@@
-	 */
-	ret = atusb_write_reg(atusb, RG_PHY_CC_CCA, channel);
+	ret = atusb_write_subreg(atusb, SR_CHANNEL, channel);
 	if (ret < 0)
 		return ret;
 	msleep(1);	/* @@@ ugly synchronization */
diff --git a/drivers/net/ieee802154/fakelb.c b/drivers/net/ieee802154/fakelb.c
index 860d4aed8274..0becf0ac3926 100644
--- a/drivers/net/ieee802154/fakelb.c
+++ b/drivers/net/ieee802154/fakelb.c
@@ -112,6 +112,12 @@ static void fakelb_hw_stop(struct ieee802154_hw *hw)
 	write_unlock_bh(&fakelb_ifup_phys_lock);
 }
 
+static int
+fakelb_set_promiscuous_mode(struct ieee802154_hw *hw, const bool on)
+{
+	return 0;
+}
+
 static const struct ieee802154_ops fakelb_ops = {
 	.owner = THIS_MODULE,
 	.xmit_async = fakelb_hw_xmit,
@@ -119,6 +125,7 @@ static const struct ieee802154_ops fakelb_ops = {
 	.set_channel = fakelb_hw_channel,
 	.start = fakelb_hw_start,
 	.stop = fakelb_hw_stop,
+	.set_promiscuous_mode = fakelb_set_promiscuous_mode,
 };
 
 /* Number of dummy devices to be set up by this module. */
@@ -174,6 +181,7 @@ static int fakelb_add_one(struct device *dev)
 	hw->phy->current_channel = 13;
 	phy->channel = hw->phy->current_channel;
 
+	hw->flags = IEEE802154_HW_PROMISCUOUS;
 	hw->parent = dev;
 
 	err = ieee802154_register_hw(hw);
diff --git a/drivers/net/ieee802154/mrf24j40.c b/drivers/net/ieee802154/mrf24j40.c
index f446db828561..7b131f8e4093 100644
--- a/drivers/net/ieee802154/mrf24j40.c
+++ b/drivers/net/ieee802154/mrf24j40.c
@@ -1054,6 +1054,8 @@ static irqreturn_t mrf24j40_isr(int irq, void *data)
 	disable_irq_nosync(irq);
 
 	devrec->irq_buf[0] = MRF24J40_READSHORT(REG_INTSTAT);
+	devrec->irq_buf[1] = 0;
+
 	/* Read the interrupt status */
 	ret = spi_async(devrec->spi, &devrec->irq_msg);
 	if (ret) {
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index 47ee2c840b55..8bcd78f94966 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -605,12 +605,41 @@ static void macsec_encrypt_done(struct crypto_async_request *base, int err)
 	dev_put(dev);
 }
 
+static struct aead_request *macsec_alloc_req(struct crypto_aead *tfm,
+					     unsigned char **iv,
+					     struct scatterlist **sg)
+{
+	size_t size, iv_offset, sg_offset;
+	struct aead_request *req;
+	void *tmp;
+
+	size = sizeof(struct aead_request) + crypto_aead_reqsize(tfm);
+	iv_offset = size;
+	size += GCM_AES_IV_LEN;
+
+	size = ALIGN(size, __alignof__(struct scatterlist));
+	sg_offset = size;
+	size += sizeof(struct scatterlist) * (MAX_SKB_FRAGS + 1);
+
+	tmp = kmalloc(size, GFP_ATOMIC);
+	if (!tmp)
+		return NULL;
+
+	*iv = (unsigned char *)(tmp + iv_offset);
+	*sg = (struct scatterlist *)(tmp + sg_offset);
+	req = tmp;
+
+	aead_request_set_tfm(req, tfm);
+
+	return req;
+}
+
 static struct sk_buff *macsec_encrypt(struct sk_buff *skb,
 				      struct net_device *dev)
 {
 	int ret;
-	struct scatterlist sg[MAX_SKB_FRAGS + 1];
-	unsigned char iv[GCM_AES_IV_LEN];
+	struct scatterlist *sg;
+	unsigned char *iv;
 	struct ethhdr *eth;
 	struct macsec_eth_header *hh;
 	size_t unprotected_len;
@@ -668,8 +697,6 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb,
 	macsec_fill_sectag(hh, secy, pn);
 	macsec_set_shortlen(hh, unprotected_len - 2 * ETH_ALEN);
 
-	macsec_fill_iv(iv, secy->sci, pn);
-
 	skb_put(skb, secy->icv_len);
 
 	if (skb->len - ETH_HLEN > macsec_priv(dev)->real_dev->mtu) {
@@ -684,13 +711,15 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb,
 		return ERR_PTR(-EINVAL);
 	}
 
-	req = aead_request_alloc(tx_sa->key.tfm, GFP_ATOMIC);
+	req = macsec_alloc_req(tx_sa->key.tfm, &iv, &sg);
 	if (!req) {
 		macsec_txsa_put(tx_sa);
 		kfree_skb(skb);
 		return ERR_PTR(-ENOMEM);
 	}
 
+	macsec_fill_iv(iv, secy->sci, pn);
+
 	sg_init_table(sg, MAX_SKB_FRAGS + 1);
 	skb_to_sgvec(skb, sg, 0, skb->len);
 
@@ -861,7 +890,6 @@ static void macsec_decrypt_done(struct crypto_async_request *base, int err)
 out:
 	macsec_rxsa_put(rx_sa);
 	dev_put(dev);
-	return;
 }
 
 static struct sk_buff *macsec_decrypt(struct sk_buff *skb,
@@ -871,8 +899,8 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb,
 				      struct macsec_secy *secy)
 {
 	int ret;
-	struct scatterlist sg[MAX_SKB_FRAGS + 1];
-	unsigned char iv[GCM_AES_IV_LEN];
+	struct scatterlist *sg;
+	unsigned char *iv;
 	struct aead_request *req;
 	struct macsec_eth_header *hdr;
 	u16 icv_len = secy->icv_len;
@@ -882,7 +910,7 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb,
 	if (!skb)
 		return ERR_PTR(-ENOMEM);
 
-	req = aead_request_alloc(rx_sa->key.tfm, GFP_ATOMIC);
+	req = macsec_alloc_req(rx_sa->key.tfm, &iv, &sg);
 	if (!req) {
 		kfree_skb(skb);
 		return ERR_PTR(-ENOMEM);
@@ -1234,7 +1262,7 @@ static struct crypto_aead *macsec_alloc_tfm(char *key, int key_len, int icv_len)
 	struct crypto_aead *tfm;
 	int ret;
 
-	tfm = crypto_alloc_aead("gcm(aes)", 0, CRYPTO_ALG_ASYNC);
+	tfm = crypto_alloc_aead("gcm(aes)", 0, 0);
 	if (!tfm || IS_ERR(tfm))
 		return NULL;
 
@@ -2612,6 +2640,7 @@ static netdev_tx_t macsec_start_xmit(struct sk_buff *skb,
 		u64_stats_update_begin(&secy_stats->syncp);
 		secy_stats->stats.OutPktsUntagged++;
 		u64_stats_update_end(&secy_stats->syncp);
+		skb->dev = macsec->real_dev;
 		len = skb->len;
 		ret = dev_queue_xmit(skb);
 		count_tx(dev, ret, len);
@@ -3361,6 +3390,7 @@ static void __exit macsec_exit(void)
 	genl_unregister_family(&macsec_fam);
 	rtnl_link_unregister(&macsec_link_ops);
 	unregister_netdevice_notifier(&macsec_notifier);
+	rcu_barrier();
 }
 
 module_init(macsec_init);
diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c
index 2afa61b51d41..91177a4a32ad 100644
--- a/drivers/net/phy/dp83867.c
+++ b/drivers/net/phy/dp83867.c
@@ -57,6 +57,7 @@
 
 /* PHY CTRL bits */
 #define DP83867_PHYCR_FIFO_DEPTH_SHIFT		14
+#define DP83867_PHYCR_FIFO_DEPTH_MASK		(3 << 14)
 
 /* RGMIIDCTL bits */
 #define DP83867_RGMII_TX_CLK_DELAY_SHIFT	4
@@ -133,8 +134,8 @@ static int dp83867_of_init(struct phy_device *phydev)
 static int dp83867_config_init(struct phy_device *phydev)
 {
 	struct dp83867_private *dp83867;
-	int ret;
-	u16 val, delay;
+	int ret, val;
+	u16 delay;
 
 	if (!phydev->priv) {
 		dp83867 = devm_kzalloc(&phydev->mdio.dev, sizeof(*dp83867),
@@ -151,8 +152,12 @@ static int dp83867_config_init(struct phy_device *phydev)
 	}
 
 	if (phy_interface_is_rgmii(phydev)) {
-		ret = phy_write(phydev, MII_DP83867_PHYCTRL,
-			(dp83867->fifo_depth << DP83867_PHYCR_FIFO_DEPTH_SHIFT));
+		val = phy_read(phydev, MII_DP83867_PHYCTRL);
+		if (val < 0)
+			return val;
+		val &= ~DP83867_PHYCR_FIFO_DEPTH_MASK;
+		val |= (dp83867->fifo_depth << DP83867_PHYCR_FIFO_DEPTH_SHIFT);
+		ret = phy_write(phydev, MII_DP83867_PHYCTRL, val);
 		if (ret)
 			return ret;
 	}
diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c
index b376ada83598..c649c101bbab 100644
--- a/drivers/net/phy/fixed_phy.c
+++ b/drivers/net/phy/fixed_phy.c
@@ -24,6 +24,7 @@
 #include <linux/of.h>
 #include <linux/gpio.h>
 #include <linux/seqlock.h>
+#include <linux/idr.h>
 
 #include "swphy.h"
 
@@ -189,6 +190,8 @@ err_regs:
 }
 EXPORT_SYMBOL_GPL(fixed_phy_add);
 
+static DEFINE_IDA(phy_fixed_ida);
+
 static void fixed_phy_del(int phy_addr)
 {
 	struct fixed_mdio_bus *fmb = &platform_fmb;
@@ -200,14 +203,12 @@ static void fixed_phy_del(int phy_addr)
 			if (gpio_is_valid(fp->link_gpio))
 				gpio_free(fp->link_gpio);
 			kfree(fp);
+			ida_simple_remove(&phy_fixed_ida, phy_addr);
 			return;
 		}
 	}
 }
 
-static int phy_fixed_addr;
-static DEFINE_SPINLOCK(phy_fixed_addr_lock);
-
 struct phy_device *fixed_phy_register(unsigned int irq,
 				      struct fixed_phy_status *status,
 				      int link_gpio,
@@ -222,17 +223,15 @@ struct phy_device *fixed_phy_register(unsigned int irq,
 		return ERR_PTR(-EPROBE_DEFER);
 
 	/* Get the next available PHY address, up to PHY_MAX_ADDR */
-	spin_lock(&phy_fixed_addr_lock);
-	if (phy_fixed_addr == PHY_MAX_ADDR) {
-		spin_unlock(&phy_fixed_addr_lock);
-		return ERR_PTR(-ENOSPC);
-	}
-	phy_addr = phy_fixed_addr++;
-	spin_unlock(&phy_fixed_addr_lock);
+	phy_addr = ida_simple_get(&phy_fixed_ida, 0, PHY_MAX_ADDR, GFP_KERNEL);
+	if (phy_addr < 0)
+		return ERR_PTR(phy_addr);
 
 	ret = fixed_phy_add(irq, phy_addr, status, link_gpio);
-	if (ret < 0)
+	if (ret < 0) {
+		ida_simple_remove(&phy_fixed_ida, phy_addr);
 		return ERR_PTR(ret);
+	}
 
 	phy = get_phy_device(fmb->mii_bus, phy_addr, false);
 	if (IS_ERR(phy)) {
@@ -337,6 +336,7 @@ static void __exit fixed_mdio_bus_exit(void)
 		list_del(&fp->node);
 		kfree(fp);
 	}
+	ida_destroy(&phy_fixed_ida);
 }
 module_exit(fixed_mdio_bus_exit);
 
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 280e8795b463..ec2c1eee6405 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -285,6 +285,48 @@ static int marvell_config_aneg(struct phy_device *phydev)
 	return 0;
 }
 
+static int m88e1111_config_aneg(struct phy_device *phydev)
+{
+	int err;
+
+	/* The Marvell PHY has an errata which requires
+	 * that certain registers get written in order
+	 * to restart autonegotiation
+	 */
+	err = phy_write(phydev, MII_BMCR, BMCR_RESET);
+
+	err = marvell_set_polarity(phydev, phydev->mdix);
+	if (err < 0)
+		return err;
+
+	err = phy_write(phydev, MII_M1111_PHY_LED_CONTROL,
+			MII_M1111_PHY_LED_DIRECT);
+	if (err < 0)
+		return err;
+
+	err = genphy_config_aneg(phydev);
+	if (err < 0)
+		return err;
+
+	if (phydev->autoneg != AUTONEG_ENABLE) {
+		int bmcr;
+
+		/* A write to speed/duplex bits (that is performed by
+		 * genphy_config_aneg() call above) must be followed by
+		 * a software reset. Otherwise, the write has no effect.
+		 */
+		bmcr = phy_read(phydev, MII_BMCR);
+		if (bmcr < 0)
+			return bmcr;
+
+		err = phy_write(phydev, MII_BMCR, bmcr | BMCR_RESET);
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+
 #ifdef CONFIG_OF_MDIO
 /*
  * Set and/or override some configuration registers based on the
@@ -407,15 +449,7 @@ static int m88e1121_config_aneg(struct phy_device *phydev)
 	if (err < 0)
 		return err;
 
-	oldpage = phy_read(phydev, MII_MARVELL_PHY_PAGE);
-
-	phy_write(phydev, MII_MARVELL_PHY_PAGE, MII_88E1121_PHY_LED_PAGE);
-	phy_write(phydev, MII_88E1121_PHY_LED_CTRL, MII_88E1121_PHY_LED_DEF);
-	phy_write(phydev, MII_MARVELL_PHY_PAGE, oldpage);
-
-	err = genphy_config_aneg(phydev);
-
-	return err;
+	return genphy_config_aneg(phydev);
 }
 
 static int m88e1318_config_aneg(struct phy_device *phydev)
@@ -636,6 +670,28 @@ static int m88e1111_config_init(struct phy_device *phydev)
 	return phy_write(phydev, MII_BMCR, BMCR_RESET);
 }
 
+static int m88e1121_config_init(struct phy_device *phydev)
+{
+	int err, oldpage;
+
+	oldpage = phy_read(phydev, MII_MARVELL_PHY_PAGE);
+
+	err = phy_write(phydev, MII_MARVELL_PHY_PAGE, MII_88E1121_PHY_LED_PAGE);
+	if (err < 0)
+		return err;
+
+	/* Default PHY LED config: LED[0] .. Link, LED[1] .. Activity */
+	err = phy_write(phydev, MII_88E1121_PHY_LED_CTRL,
+			MII_88E1121_PHY_LED_DEF);
+	if (err < 0)
+		return err;
+
+	phy_write(phydev, MII_MARVELL_PHY_PAGE, oldpage);
+
+	/* Set marvell,reg-init configuration from device tree */
+	return marvell_config_init(phydev);
+}
+
 static int m88e1510_config_init(struct phy_device *phydev)
 {
 	int err;
@@ -668,7 +724,7 @@ static int m88e1510_config_init(struct phy_device *phydev)
 			return err;
 	}
 
-	return marvell_config_init(phydev);
+	return m88e1121_config_init(phydev);
 }
 
 static int m88e1118_config_aneg(struct phy_device *phydev)
@@ -1161,7 +1217,7 @@ static struct phy_driver marvell_drivers[] = {
 		.flags = PHY_HAS_INTERRUPT,
 		.probe = marvell_probe,
 		.config_init = &m88e1111_config_init,
-		.config_aneg = &marvell_config_aneg,
+		.config_aneg = &m88e1111_config_aneg,
 		.read_status = &marvell_read_status,
 		.ack_interrupt = &marvell_ack_interrupt,
 		.config_intr = &marvell_config_intr,
@@ -1196,7 +1252,7 @@ static struct phy_driver marvell_drivers[] = {
 		.features = PHY_GBIT_FEATURES,
 		.flags = PHY_HAS_INTERRUPT,
 		.probe = marvell_probe,
-		.config_init = &marvell_config_init,
+		.config_init = &m88e1121_config_init,
 		.config_aneg = &m88e1121_config_aneg,
 		.read_status = &marvell_read_status,
 		.ack_interrupt = &marvell_ack_interrupt,
@@ -1215,7 +1271,7 @@ static struct phy_driver marvell_drivers[] = {
 		.features = PHY_GBIT_FEATURES,
 		.flags = PHY_HAS_INTERRUPT,
 		.probe = marvell_probe,
-		.config_init = &marvell_config_init,
+		.config_init = &m88e1121_config_init,
 		.config_aneg = &m88e1318_config_aneg,
 		.read_status = &marvell_read_status,
 		.ack_interrupt = &marvell_ack_interrupt,
diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c
index 2e21e9366f76..b62c4aaee40b 100644
--- a/drivers/net/phy/smsc.c
+++ b/drivers/net/phy/smsc.c
@@ -75,22 +75,13 @@ static int smsc_phy_reset(struct phy_device *phydev)
 	 * in all capable mode before using it.
 	 */
 	if ((rc & MII_LAN83C185_MODE_MASK) == MII_LAN83C185_MODE_POWERDOWN) {
-		int timeout = 50000;
-
-		/* set "all capable" mode and reset the phy */
+		/* set "all capable" mode */
 		rc |= MII_LAN83C185_MODE_ALL;
 		phy_write(phydev, MII_LAN83C185_SPECIAL_MODES, rc);
-		phy_write(phydev, MII_BMCR, BMCR_RESET);
-
-		/* wait end of reset (max 500 ms) */
-		do {
-			udelay(10);
-			if (timeout-- == 0)
-				return -1;
-			rc = phy_read(phydev, MII_BMCR);
-		} while (rc & BMCR_RESET);
 	}
-	return 0;
+
+	/* reset the phy */
+	return genphy_soft_reset(phydev);
 }
 
 static int lan911x_config_init(struct phy_device *phydev)
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 0a1bb8387d96..a380649bf6b5 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1203,8 +1203,10 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
 		goto err_dev_open;
 	}
 
+	netif_addr_lock_bh(dev);
 	dev_uc_sync_multiple(port_dev, dev);
 	dev_mc_sync_multiple(port_dev, dev);
+	netif_addr_unlock_bh(dev);
 
 	err = vlan_vids_add_by_dev(port_dev, dev);
 	if (err) {
@@ -2000,6 +2002,8 @@ static const struct net_device_ops team_netdev_ops = {
 	.ndo_add_slave		= team_add_slave,
 	.ndo_del_slave		= team_del_slave,
 	.ndo_fix_features	= team_fix_features,
+	.ndo_neigh_construct	= netdev_default_l2upper_neigh_construct,
+	.ndo_neigh_destroy	= netdev_default_l2upper_neigh_destroy,
 	.ndo_change_carrier     = team_change_carrier,
 	.ndo_bridge_setlink	= switchdev_port_bridge_setlink,
 	.ndo_bridge_getlink	= switchdev_port_bridge_getlink,
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 4884802e0af1..9c8b5bc2b9d8 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -71,6 +71,7 @@
 #include <net/sock.h>
 #include <linux/seq_file.h>
 #include <linux/uio.h>
+#include <linux/skb_array.h>
 
 #include <asm/uaccess.h>
 
@@ -167,6 +168,7 @@ struct tun_file {
 	};
 	struct list_head next;
 	struct tun_struct *detached;
+	struct skb_array tx_array;
 };
 
 struct tun_flow_entry {
@@ -515,7 +517,11 @@ static struct tun_struct *tun_enable_queue(struct tun_file *tfile)
 
 static void tun_queue_purge(struct tun_file *tfile)
 {
-	skb_queue_purge(&tfile->sk.sk_receive_queue);
+	struct sk_buff *skb;
+
+	while ((skb = skb_array_consume(&tfile->tx_array)) != NULL)
+		kfree_skb(skb);
+
 	skb_queue_purge(&tfile->sk.sk_error_queue);
 }
 
@@ -560,6 +566,8 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
 			    tun->dev->reg_state == NETREG_REGISTERED)
 				unregister_netdevice(tun->dev);
 		}
+		if (tun)
+			skb_array_cleanup(&tfile->tx_array);
 		sock_put(&tfile->sk);
 	}
 }
@@ -613,6 +621,7 @@ static void tun_detach_all(struct net_device *dev)
 static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filter)
 {
 	struct tun_file *tfile = file->private_data;
+	struct net_device *dev = tun->dev;
 	int err;
 
 	err = security_tun_dev_attach(tfile->socket.sk, tun->security);
@@ -642,6 +651,13 @@ static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filte
 		if (!err)
 			goto out;
 	}
+
+	if (!tfile->detached &&
+	    skb_array_init(&tfile->tx_array, dev->tx_queue_len, GFP_KERNEL)) {
+		err = -ENOMEM;
+		goto out;
+	}
+
 	tfile->queue_index = tun->numqueues;
 	tfile->socket.sk->sk_shutdown &= ~RCV_SHUTDOWN;
 	rcu_assign_pointer(tfile->tun, tun);
@@ -891,8 +907,8 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	nf_reset(skb);
 
-	/* Enqueue packet */
-	skb_queue_tail(&tfile->socket.sk->sk_receive_queue, skb);
+	if (skb_array_produce(&tfile->tx_array, skb))
+		goto drop;
 
 	/* Notify and wake up reader process */
 	if (tfile->flags & TUN_FASYNC)
@@ -1107,7 +1123,7 @@ static unsigned int tun_chr_poll(struct file *file, poll_table *wait)
 
 	poll_wait(file, sk_sleep(sk), wait);
 
-	if (!skb_queue_empty(&sk->sk_receive_queue))
+	if (!skb_array_empty(&tfile->tx_array))
 		mask |= POLLIN | POLLRDNORM;
 
 	if (sock_writeable(sk) ||
@@ -1426,22 +1442,63 @@ done:
 	return total;
 }
 
+static struct sk_buff *tun_ring_recv(struct tun_file *tfile, int noblock,
+				     int *err)
+{
+	DECLARE_WAITQUEUE(wait, current);
+	struct sk_buff *skb = NULL;
+	int error = 0;
+
+	skb = skb_array_consume(&tfile->tx_array);
+	if (skb)
+		goto out;
+	if (noblock) {
+		error = -EAGAIN;
+		goto out;
+	}
+
+	add_wait_queue(&tfile->wq.wait, &wait);
+	current->state = TASK_INTERRUPTIBLE;
+
+	while (1) {
+		skb = skb_array_consume(&tfile->tx_array);
+		if (skb)
+			break;
+		if (signal_pending(current)) {
+			error = -ERESTARTSYS;
+			break;
+		}
+		if (tfile->socket.sk->sk_shutdown & RCV_SHUTDOWN) {
+			error = -EFAULT;
+			break;
+		}
+
+		schedule();
+	}
+
+	current->state = TASK_RUNNING;
+	remove_wait_queue(&tfile->wq.wait, &wait);
+
+out:
+	*err = error;
+	return skb;
+}
+
 static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile,
 			   struct iov_iter *to,
 			   int noblock)
 {
 	struct sk_buff *skb;
 	ssize_t ret;
-	int peeked, err, off = 0;
+	int err;
 
 	tun_debug(KERN_INFO, tun, "tun_do_read\n");
 
 	if (!iov_iter_count(to))
 		return 0;
 
-	/* Read frames from queue */
-	skb = __skb_recv_datagram(tfile->socket.sk, noblock ? MSG_DONTWAIT : 0,
-				  &peeked, &off, &err);
+	/* Read frames from ring */
+	skb = tun_ring_recv(tfile, noblock, &err);
 	if (!skb)
 		return err;
 
@@ -1574,8 +1631,25 @@ out:
 	return ret;
 }
 
+static int tun_peek_len(struct socket *sock)
+{
+	struct tun_file *tfile = container_of(sock, struct tun_file, socket);
+	struct tun_struct *tun;
+	int ret = 0;
+
+	tun = __tun_get(tfile);
+	if (!tun)
+		return 0;
+
+	ret = skb_array_peek_len(&tfile->tx_array);
+	tun_put(tun);
+
+	return ret;
+}
+
 /* Ops structure to mimic raw sockets with tun */
 static const struct proto_ops tun_socket_ops = {
+	.peek_len = tun_peek_len,
 	.sendmsg = tun_sendmsg,
 	.recvmsg = tun_recvmsg,
 };
@@ -2397,6 +2471,56 @@ static const struct ethtool_ops tun_ethtool_ops = {
 	.get_ts_info	= ethtool_op_get_ts_info,
 };
 
+static int tun_queue_resize(struct tun_struct *tun)
+{
+	struct net_device *dev = tun->dev;
+	struct tun_file *tfile;
+	struct skb_array **arrays;
+	int n = tun->numqueues + tun->numdisabled;
+	int ret, i;
+
+	arrays = kmalloc(sizeof *arrays * n, GFP_KERNEL);
+	if (!arrays)
+		return -ENOMEM;
+
+	for (i = 0; i < tun->numqueues; i++) {
+		tfile = rtnl_dereference(tun->tfiles[i]);
+		arrays[i] = &tfile->tx_array;
+	}
+	list_for_each_entry(tfile, &tun->disabled, next)
+		arrays[i++] = &tfile->tx_array;
+
+	ret = skb_array_resize_multiple(arrays, n,
+					dev->tx_queue_len, GFP_KERNEL);
+
+	kfree(arrays);
+	return ret;
+}
+
+static int tun_device_event(struct notifier_block *unused,
+			    unsigned long event, void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct tun_struct *tun = netdev_priv(dev);
+
+	if (dev->rtnl_link_ops != &tun_link_ops)
+		return NOTIFY_DONE;
+
+	switch (event) {
+	case NETDEV_CHANGE_TX_QUEUE_LEN:
+		if (tun_queue_resize(tun))
+			return NOTIFY_BAD;
+		break;
+	default:
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block tun_notifier_block __read_mostly = {
+	.notifier_call	= tun_device_event,
+};
 
 static int __init tun_init(void)
 {
@@ -2416,6 +2540,8 @@ static int __init tun_init(void)
 		pr_err("Can't register misc device %d\n", TUN_MINOR);
 		goto err_misc;
 	}
+
+	register_netdevice_notifier(&tun_notifier_block);
 	return  0;
 err_misc:
 	rtnl_link_unregister(&tun_link_ops);
@@ -2427,6 +2553,7 @@ static void tun_cleanup(void)
 {
 	misc_deregister(&tun_miscdev);
 	rtnl_link_unregister(&tun_link_ops);
+	unregister_netdevice_notifier(&tun_notifier_block);
 }
 
 /* Get an underlying socket object from tun file.  Returns error unless file is
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 53759c315b97..877c9516e781 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -854,6 +854,13 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_
 	if (cdc_ncm_init(dev))
 		goto error2;
 
+	/* Some firmwares need a pause here or they will silently fail
+	 * to set up the interface properly.  This value was decided
+	 * empirically on a Sierra Wireless MC7455 running 02.08.02.00
+	 * firmware.
+	 */
+	usleep_range(10000, 20000);
+
 	/* configure data interface */
 	temp = usb_set_interface(dev->udev, iface_no, data_altsetting);
 	if (temp) {
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 11178f9dc838..168a8e2e15c8 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -31,7 +31,7 @@
 #define NETNEXT_VERSION		"08"
 
 /* Information for net */
-#define NET_VERSION		"3"
+#define NET_VERSION		"5"
 
 #define DRIVER_VERSION		"v1." NETNEXT_VERSION "." NET_VERSION
 #define DRIVER_AUTHOR "Realtek linux nic maintainers <nic_swsd@realtek.com>"
@@ -116,6 +116,7 @@
 #define USB_TX_DMA		0xd434
 #define USB_TOLERANCE		0xd490
 #define USB_LPM_CTRL		0xd41a
+#define USB_BMU_RESET		0xd4b0
 #define USB_UPS_CTRL		0xd800
 #define USB_MISC_0		0xd81a
 #define USB_POWER_CUT		0xd80a
@@ -338,6 +339,10 @@
 #define TEST_MODE_DISABLE	0x00000001
 #define TX_SIZE_ADJUST1		0x00000100
 
+/* USB_BMU_RESET */
+#define BMU_RESET_EP_IN		0x01
+#define BMU_RESET_EP_OUT	0x02
+
 /* USB_UPS_CTRL */
 #define POWER_CUT		0x0100
 
@@ -620,6 +625,7 @@ struct r8152 {
 		int (*eee_set)(struct r8152 *, struct ethtool_eee *);
 		bool (*in_nway)(struct r8152 *);
 		void (*hw_phy_cfg)(struct r8152 *);
+		void (*autosuspend_en)(struct r8152 *tp, bool enable);
 	} rtl_ops;
 
 	int intr_interval;
@@ -2173,7 +2179,7 @@ static void r8153_set_rx_early_timeout(struct r8152 *tp)
 static void r8153_set_rx_early_size(struct r8152 *tp)
 {
 	u32 mtu = tp->netdev->mtu;
-	u32 ocp_data = (agg_buf_sz - mtu - VLAN_ETH_HLEN - VLAN_HLEN) / 4;
+	u32 ocp_data = (agg_buf_sz - mtu - VLAN_ETH_HLEN - VLAN_HLEN) / 8;
 
 	ocp_write_word(tp, MCU_TYPE_USB, USB_RX_EARLY_SIZE, ocp_data);
 }
@@ -2407,9 +2413,6 @@ static void rtl_runtime_suspend_enable(struct r8152 *tp, bool enable)
 	if (enable) {
 		u32 ocp_data;
 
-		r8153_u1u2en(tp, false);
-		r8153_u2p3en(tp, false);
-
 		__rtl_set_wol(tp, WAKE_ANY);
 
 		ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_CONFIG);
@@ -2420,30 +2423,30 @@ static void rtl_runtime_suspend_enable(struct r8152 *tp, bool enable)
 
 		ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_NORAML);
 	} else {
+		u32 ocp_data;
+
 		__rtl_set_wol(tp, tp->saved_wolopts);
-		r8153_u2p3en(tp, true);
-		r8153_u1u2en(tp, true);
-	}
-}
 
-static void rtl_phy_reset(struct r8152 *tp)
-{
-	u16 data;
-	int i;
+		ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_CONFIG);
 
-	data = r8152_mdio_read(tp, MII_BMCR);
+		ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_CONFIG34);
+		ocp_data &= ~LINK_OFF_WAKE_EN;
+		ocp_write_word(tp, MCU_TYPE_PLA, PLA_CONFIG34, ocp_data);
 
-	/* don't reset again before the previous one complete */
-	if (data & BMCR_RESET)
-		return;
+		ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_NORAML);
+	}
+}
 
-	data |= BMCR_RESET;
-	r8152_mdio_write(tp, MII_BMCR, data);
+static void rtl8153_runtime_enable(struct r8152 *tp, bool enable)
+{
+	rtl_runtime_suspend_enable(tp, enable);
 
-	for (i = 0; i < 50; i++) {
-		msleep(20);
-		if ((r8152_mdio_read(tp, MII_BMCR) & BMCR_RESET) == 0)
-			break;
+	if (enable) {
+		r8153_u1u2en(tp, false);
+		r8153_u2p3en(tp, false);
+	} else {
+		r8153_u2p3en(tp, true);
+		r8153_u1u2en(tp, true);
 	}
 }
 
@@ -2460,6 +2463,17 @@ static void r8153_teredo_off(struct r8152 *tp)
 	ocp_write_dword(tp, MCU_TYPE_PLA, PLA_TEREDO_TIMER, 0);
 }
 
+static void rtl_reset_bmu(struct r8152 *tp)
+{
+	u32 ocp_data;
+
+	ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_BMU_RESET);
+	ocp_data &= ~(BMU_RESET_EP_IN | BMU_RESET_EP_OUT);
+	ocp_write_byte(tp, MCU_TYPE_USB, USB_BMU_RESET, ocp_data);
+	ocp_data |= BMU_RESET_EP_IN | BMU_RESET_EP_OUT;
+	ocp_write_byte(tp, MCU_TYPE_USB, USB_BMU_RESET, ocp_data);
+}
+
 static void r8152_aldps_en(struct r8152 *tp, bool enable)
 {
 	if (enable) {
@@ -2681,6 +2695,7 @@ static void r8153_first_init(struct r8152 *tp)
 	ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data);
 
 	rtl8152_nic_reset(tp);
+	rtl_reset_bmu(tp);
 
 	ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL);
 	ocp_data &= ~NOW_IS_OOB;
@@ -2742,6 +2757,7 @@ static void r8153_enter_oob(struct r8152 *tp)
 	ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data);
 
 	rtl_disable(tp);
+	rtl_reset_bmu(tp);
 
 	for (i = 0; i < 1000; i++) {
 		ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL);
@@ -2803,6 +2819,7 @@ static void rtl8153_disable(struct r8152 *tp)
 {
 	r8153_aldps_en(tp, false);
 	rtl_disable(tp);
+	rtl_reset_bmu(tp);
 	r8153_aldps_en(tp, true);
 	usb_enable_lpm(tp->udev);
 }
@@ -2812,7 +2829,6 @@ static int rtl8152_set_speed(struct r8152 *tp, u8 autoneg, u16 speed, u8 duplex)
 	u16 bmcr, anar, gbcr;
 	int ret = 0;
 
-	cancel_delayed_work_sync(&tp->schedule);
 	anar = r8152_mdio_read(tp, MII_ADVERTISE);
 	anar &= ~(ADVERTISE_10HALF | ADVERTISE_10FULL |
 		  ADVERTISE_100HALF | ADVERTISE_100FULL);
@@ -3031,9 +3047,6 @@ static void rtl_work_func_t(struct work_struct *work)
 	    netif_carrier_ok(tp->netdev))
 		napi_schedule(&tp->napi);
 
-	if (test_and_clear_bit(PHY_RESET, &tp->flags))
-		rtl_phy_reset(tp);
-
 	mutex_unlock(&tp->control);
 
 out1:
@@ -3097,8 +3110,6 @@ static int rtl8152_open(struct net_device *netdev)
 	if (res)
 		goto out;
 
-	netif_carrier_off(netdev);
-
 	res = usb_autopm_get_interface(tp->intf);
 	if (res < 0) {
 		free_all_mem(tp);
@@ -3400,15 +3411,11 @@ static void r8153_init(struct r8152 *tp)
 	r8153_power_cut_en(tp, false);
 	r8153_u1u2en(tp, true);
 
-	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL, ALDPS_SPDWN_RATIO);
-	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2, EEE_SPDWN_RATIO);
-	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3,
-		       PKT_AVAIL_SPDWN_EN | SUSPEND_SPDWN_EN |
-		       U1U2_SPDWN_EN | L1_SPDWN_EN);
-	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4,
-		       PWRSAVE_SPDWN_EN | RXDV_SPDWN_EN | TX10MIDLE_EN |
-		       TP100_SPDWN_EN | TP500_SPDWN_EN | TP1000_SPDWN_EN |
-		       EEE_SPDWN_EN);
+	/* MAC clock speed down */
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL, 0);
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2, 0);
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, 0);
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, 0);
 
 	r8153_enable_eee(tp);
 	r8153_aldps_en(tp, true);
@@ -3515,7 +3522,7 @@ static int rtl8152_suspend(struct usb_interface *intf, pm_message_t message)
 		napi_disable(&tp->napi);
 		if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) {
 			rtl_stop_rx(tp);
-			rtl_runtime_suspend_enable(tp, true);
+			tp->rtl_ops.autosuspend_en(tp, true);
 		} else {
 			cancel_delayed_work_sync(&tp->schedule);
 			tp->rtl_ops.down(tp);
@@ -3542,7 +3549,7 @@ static int rtl8152_resume(struct usb_interface *intf)
 
 	if (netif_running(tp->netdev) && tp->netdev->flags & IFF_UP) {
 		if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) {
-			rtl_runtime_suspend_enable(tp, false);
+			tp->rtl_ops.autosuspend_en(tp, false);
 			clear_bit(SELECTIVE_SUSPEND, &tp->flags);
 			napi_disable(&tp->napi);
 			set_bit(WORK_ENABLE, &tp->flags);
@@ -3557,7 +3564,7 @@ static int rtl8152_resume(struct usb_interface *intf)
 		usb_submit_urb(tp->intr_urb, GFP_KERNEL);
 	} else if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) {
 		if (tp->netdev->flags & IFF_UP)
-			rtl_runtime_suspend_enable(tp, false);
+			tp->rtl_ops.autosuspend_en(tp, false);
 		clear_bit(SELECTIVE_SUSPEND, &tp->flags);
 	}
 
@@ -4143,6 +4150,7 @@ static int rtl_ops_init(struct r8152 *tp)
 		ops->eee_set		= r8152_set_eee;
 		ops->in_nway		= rtl8152_in_nway;
 		ops->hw_phy_cfg		= r8152b_hw_phy_cfg;
+		ops->autosuspend_en	= rtl_runtime_suspend_enable;
 		break;
 
 	case RTL_VER_03:
@@ -4159,6 +4167,7 @@ static int rtl_ops_init(struct r8152 *tp)
 		ops->eee_set		= r8153_set_eee;
 		ops->in_nway		= rtl8153_in_nway;
 		ops->hw_phy_cfg		= r8153_hw_phy_cfg;
+		ops->autosuspend_en	= rtl8153_runtime_enable;
 		break;
 
 	default:
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 61ba46404937..6086a0163249 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -395,8 +395,11 @@ int usbnet_change_mtu (struct net_device *net, int new_mtu)
 	dev->hard_mtu = net->mtu + net->hard_header_len;
 	if (dev->rx_urb_size == old_hard_mtu) {
 		dev->rx_urb_size = dev->hard_mtu;
-		if (dev->rx_urb_size > old_rx_urb_size)
+		if (dev->rx_urb_size > old_rx_urb_size) {
+			usbnet_pause_rx(dev);
 			usbnet_unlink_rx_urbs(dev);
+			usbnet_resume_rx(dev);
+		}
 	}
 
 	/* max qlen depend on hard_mtu and rx_urb_size */
@@ -1508,8 +1511,9 @@ static void usbnet_bh (unsigned long param)
 	} else if (netif_running (dev->net) &&
 		   netif_device_present (dev->net) &&
 		   netif_carrier_ok(dev->net) &&
-		   !timer_pending (&dev->delay) &&
-		   !test_bit (EVENT_RX_HALT, &dev->flags)) {
+		   !timer_pending(&dev->delay) &&
+		   !test_bit(EVENT_RX_PAUSED, &dev->flags) &&
+		   !test_bit(EVENT_RX_HALT, &dev->flags)) {
 		int	temp = dev->rxq.qlen;
 
 		if (temp < RX_QLEN(dev)) {
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index b3762822b653..1ce7420322ee 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -779,6 +779,25 @@ static int vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4)
 	return rc;
 }
 
+static int vrf_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+	return 0;
+}
+
+static struct sk_buff *vrf_rcv_nfhook(u8 pf, unsigned int hook,
+				      struct sk_buff *skb,
+				      struct net_device *dev)
+{
+	struct net *net = dev_net(dev);
+
+	nf_reset(skb);
+
+	if (NF_HOOK(pf, hook, net, NULL, skb, dev, NULL, vrf_rcv_finish) < 0)
+		skb = NULL;    /* kfree_skb(skb) handled by nf code */
+
+	return skb;
+}
+
 #if IS_ENABLED(CONFIG_IPV6)
 /* neighbor handling is done with actual device; do not want
  * to flip skb->dev for those ndisc packets. This really fails
@@ -899,6 +918,7 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
 	if (need_strict)
 		vrf_ip6_input_dst(skb, vrf_dev, orig_iif);
 
+	skb = vrf_rcv_nfhook(NFPROTO_IPV6, NF_INET_PRE_ROUTING, skb, vrf_dev);
 out:
 	return skb;
 }
@@ -929,6 +949,7 @@ static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev,
 	dev_queue_xmit_nit(skb, vrf_dev);
 	skb_pull(skb, skb->mac_len);
 
+	skb = vrf_rcv_nfhook(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, vrf_dev);
 out:
 	return skb;
 }
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index abb9cd2df9e9..da4e3d6632f6 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2475,9 +2475,9 @@ static struct device_type vxlan_type = {
 	.name = "vxlan",
 };
 
-/* Calls the ndo_add_udp_enc_port of the caller in order to
+/* Calls the ndo_udp_tunnel_add of the caller in order to
  * supply the listening VXLAN udp ports. Callers are expected
- * to implement the ndo_add_udp_enc_port.
+ * to implement the ndo_udp_tunnel_add.
  */
 static void vxlan_push_rx_ports(struct net_device *dev)
 {
@@ -2899,30 +2899,6 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
 	return 0;
 }
 
-struct net_device *vxlan_dev_create(struct net *net, const char *name,
-				    u8 name_assign_type, struct vxlan_config *conf)
-{
-	struct nlattr *tb[IFLA_MAX+1];
-	struct net_device *dev;
-	int err;
-
-	memset(&tb, 0, sizeof(tb));
-
-	dev = rtnl_create_link(net, name, name_assign_type,
-			       &vxlan_link_ops, tb);
-	if (IS_ERR(dev))
-		return dev;
-
-	err = vxlan_dev_configure(net, dev, conf);
-	if (err < 0) {
-		free_netdev(dev);
-		return ERR_PTR(err);
-	}
-
-	return dev;
-}
-EXPORT_SYMBOL_GPL(vxlan_dev_create);
-
 static int vxlan_newlink(struct net *src_net, struct net_device *dev,
 			 struct nlattr *tb[], struct nlattr *data[])
 {
@@ -3215,6 +3191,40 @@ static struct rtnl_link_ops vxlan_link_ops __read_mostly = {
 	.get_link_net	= vxlan_get_link_net,
 };
 
+struct net_device *vxlan_dev_create(struct net *net, const char *name,
+				    u8 name_assign_type,
+				    struct vxlan_config *conf)
+{
+	struct nlattr *tb[IFLA_MAX + 1];
+	struct net_device *dev;
+	int err;
+
+	memset(&tb, 0, sizeof(tb));
+
+	dev = rtnl_create_link(net, name, name_assign_type,
+			       &vxlan_link_ops, tb);
+	if (IS_ERR(dev))
+		return dev;
+
+	err = vxlan_dev_configure(net, dev, conf);
+	if (err < 0) {
+		free_netdev(dev);
+		return ERR_PTR(err);
+	}
+
+	err = rtnl_configure_link(dev, NULL);
+	if (err < 0) {
+		LIST_HEAD(list_kill);
+
+		vxlan_dellink(dev, &list_kill);
+		unregister_netdevice_many(&list_kill);
+		return ERR_PTR(err);
+	}
+
+	return dev;
+}
+EXPORT_SYMBOL_GPL(vxlan_dev_create);
+
 static void vxlan_handle_lowerdev_unregister(struct vxlan_net *vn,
 					     struct net_device *dev)
 {
diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index d4b7a168f7c0..ebc12c521fe0 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -3858,12 +3858,16 @@ void __ath10k_scan_finish(struct ath10k *ar)
 		break;
 	case ATH10K_SCAN_RUNNING:
 	case ATH10K_SCAN_ABORTING:
-		if (!ar->scan.is_roc)
-			ieee80211_scan_completed(ar->hw,
-						 (ar->scan.state ==
-						  ATH10K_SCAN_ABORTING));
-		else if (ar->scan.roc_notify)
+		if (!ar->scan.is_roc) {
+			struct cfg80211_scan_info info = {
+				.aborted = (ar->scan.state ==
+					    ATH10K_SCAN_ABORTING),
+			};
+
+			ieee80211_scan_completed(ar->hw, &info);
+		} else if (ar->scan.roc_notify) {
 			ieee80211_remain_on_channel_expired(ar->hw);
+		}
 		/* fall through */
 	case ATH10K_SCAN_STARTING:
 		ar->scan.state = ATH10K_SCAN_IDLE;
diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c
index 4e11ba06f089..ef5b40ef6d67 100644
--- a/drivers/net/wireless/ath/ath6kl/cfg80211.c
+++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c
@@ -859,7 +859,11 @@ void ath6kl_cfg80211_disconnect_event(struct ath6kl_vif *vif, u8 reason,
 	struct ath6kl *ar = vif->ar;
 
 	if (vif->scan_req) {
-		cfg80211_scan_done(vif->scan_req, true);
+		struct cfg80211_scan_info info = {
+			.aborted = true,
+		};
+
+		cfg80211_scan_done(vif->scan_req, &info);
 		vif->scan_req = NULL;
 	}
 
@@ -1069,6 +1073,9 @@ static int ath6kl_cfg80211_scan(struct wiphy *wiphy,
 void ath6kl_cfg80211_scan_complete_event(struct ath6kl_vif *vif, bool aborted)
 {
 	struct ath6kl *ar = vif->ar;
+	struct cfg80211_scan_info info = {
+		.aborted = aborted,
+	};
 	int i;
 
 	ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: status%s\n", __func__,
@@ -1089,7 +1096,7 @@ void ath6kl_cfg80211_scan_complete_event(struct ath6kl_vif *vif, bool aborted)
 	}
 
 out:
-	cfg80211_scan_done(vif->scan_req, aborted);
+	cfg80211_scan_done(vif->scan_req, &info);
 	vif->scan_req = NULL;
 }
 
@@ -3614,7 +3621,11 @@ void ath6kl_cfg80211_vif_stop(struct ath6kl_vif *vif, bool wmi_ready)
 	}
 
 	if (vif->scan_req) {
-		cfg80211_scan_done(vif->scan_req, true);
+		struct cfg80211_scan_info info = {
+			.aborted = true,
+		};
+
+		cfg80211_scan_done(vif->scan_req, &info);
 		vif->scan_req = NULL;
 	}
 
diff --git a/drivers/net/wireless/ath/ath9k/channel.c b/drivers/net/wireless/ath/ath9k/channel.c
index e56bafcf5864..57e26a640477 100644
--- a/drivers/net/wireless/ath/ath9k/channel.c
+++ b/drivers/net/wireless/ath/ath9k/channel.c
@@ -960,6 +960,9 @@ void ath_roc_complete(struct ath_softc *sc, enum ath_roc_complete_reason reason)
 void ath_scan_complete(struct ath_softc *sc, bool abort)
 {
 	struct ath_common *common = ath9k_hw_common(sc->sc_ah);
+	struct cfg80211_scan_info info = {
+		.aborted = abort,
+	};
 
 	if (abort)
 		ath_dbg(common, CHAN_CTX, "HW scan aborted\n");
@@ -969,7 +972,7 @@ void ath_scan_complete(struct ath_softc *sc, bool abort)
 	sc->offchannel.scan_req = NULL;
 	sc->offchannel.scan_vif = NULL;
 	sc->offchannel.state = ATH_OFFCHANNEL_IDLE;
-	ieee80211_scan_completed(sc->hw, abort);
+	ieee80211_scan_completed(sc->hw, &info);
 	clear_bit(ATH_OP_SCANNING, &common->op_flags);
 	spin_lock_bh(&sc->chan_lock);
 	if (test_bit(ATH_OP_MULTI_CHANNEL, &common->op_flags))
diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c
index 62bf9331bd7f..f0e1175fb76a 100644
--- a/drivers/net/wireless/ath/wil6210/cfg80211.c
+++ b/drivers/net/wireless/ath/wil6210/cfg80211.c
@@ -1369,7 +1369,11 @@ static void wil_cfg80211_stop_p2p_device(struct wiphy *wiphy,
 	mutex_lock(&wil->mutex);
 	started = wil_p2p_stop_discovery(wil);
 	if (started && wil->scan_request) {
-		cfg80211_scan_done(wil->scan_request, 1);
+		struct cfg80211_scan_info info = {
+			.aborted = true,
+		};
+
+		cfg80211_scan_done(wil->scan_request, &info);
 		wil->scan_request = NULL;
 		wil->radio_wdev = wil->wdev;
 	}
diff --git a/drivers/net/wireless/ath/wil6210/main.c b/drivers/net/wireless/ath/wil6210/main.c
index 8e31d755bbee..4bc92e54984a 100644
--- a/drivers/net/wireless/ath/wil6210/main.c
+++ b/drivers/net/wireless/ath/wil6210/main.c
@@ -850,10 +850,14 @@ int wil_reset(struct wil6210_priv *wil, bool load_fw)
 	mutex_unlock(&wil->wmi_mutex);
 
 	if (wil->scan_request) {
+		struct cfg80211_scan_info info = {
+			.aborted = true,
+		};
+
 		wil_dbg_misc(wil, "Abort scan_request 0x%p\n",
 			     wil->scan_request);
 		del_timer_sync(&wil->scan_timer);
-		cfg80211_scan_done(wil->scan_request, true);
+		cfg80211_scan_done(wil->scan_request, &info);
 		wil->scan_request = NULL;
 	}
 
@@ -1049,10 +1053,14 @@ int __wil_down(struct wil6210_priv *wil)
 	(void)wil_p2p_stop_discovery(wil);
 
 	if (wil->scan_request) {
+		struct cfg80211_scan_info info = {
+			.aborted = true,
+		};
+
 		wil_dbg_misc(wil, "Abort scan_request 0x%p\n",
 			     wil->scan_request);
 		del_timer_sync(&wil->scan_timer);
-		cfg80211_scan_done(wil->scan_request, true);
+		cfg80211_scan_done(wil->scan_request, &info);
 		wil->scan_request = NULL;
 	}
 
diff --git a/drivers/net/wireless/ath/wil6210/p2p.c b/drivers/net/wireless/ath/wil6210/p2p.c
index 213b8259638c..e0f8aa0ebfac 100644
--- a/drivers/net/wireless/ath/wil6210/p2p.c
+++ b/drivers/net/wireless/ath/wil6210/p2p.c
@@ -252,8 +252,12 @@ void wil_p2p_search_expired(struct work_struct *work)
 	mutex_unlock(&wil->mutex);
 
 	if (started) {
+		struct cfg80211_scan_info info = {
+			.aborted = false,
+		};
+
 		mutex_lock(&wil->p2p_wdev_mutex);
-		cfg80211_scan_done(wil->scan_request, 0);
+		cfg80211_scan_done(wil->scan_request, &info);
 		wil->scan_request = NULL;
 		wil->radio_wdev = wil->wdev;
 		mutex_unlock(&wil->p2p_wdev_mutex);
diff --git a/drivers/net/wireless/ath/wil6210/wmi.c b/drivers/net/wireless/ath/wil6210/wmi.c
index b80c5d850e1e..4d92541913c0 100644
--- a/drivers/net/wireless/ath/wil6210/wmi.c
+++ b/drivers/net/wireless/ath/wil6210/wmi.c
@@ -426,15 +426,17 @@ static void wmi_evt_scan_complete(struct wil6210_priv *wil, int id,
 {
 	if (wil->scan_request) {
 		struct wmi_scan_complete_event *data = d;
-		bool aborted = (data->status != WMI_SCAN_SUCCESS);
+		struct cfg80211_scan_info info = {
+			.aborted = (data->status != WMI_SCAN_SUCCESS),
+		};
 
 		wil_dbg_wmi(wil, "SCAN_COMPLETE(0x%08x)\n", data->status);
 		wil_dbg_misc(wil, "Complete scan_request 0x%p aborted %d\n",
-			     wil->scan_request, aborted);
+			     wil->scan_request, info.aborted);
 
 		del_timer_sync(&wil->scan_timer);
 		mutex_lock(&wil->p2p_wdev_mutex);
-		cfg80211_scan_done(wil->scan_request, aborted);
+		cfg80211_scan_done(wil->scan_request, &info);
 		wil->radio_wdev = wil->wdev;
 		mutex_unlock(&wil->p2p_wdev_mutex);
 		wil->scan_request = NULL;
diff --git a/drivers/net/wireless/atmel/at76c50x-usb.c b/drivers/net/wireless/atmel/at76c50x-usb.c
index 7c108047fb46..0e180677c7fc 100644
--- a/drivers/net/wireless/atmel/at76c50x-usb.c
+++ b/drivers/net/wireless/atmel/at76c50x-usb.c
@@ -1922,6 +1922,9 @@ static void at76_dwork_hw_scan(struct work_struct *work)
 {
 	struct at76_priv *priv = container_of(work, struct at76_priv,
 					      dwork_hw_scan.work);
+	struct cfg80211_scan_info info = {
+		.aborted = false,
+	};
 	int ret;
 
 	if (priv->device_unplugged)
@@ -1948,7 +1951,7 @@ static void at76_dwork_hw_scan(struct work_struct *work)
 
 	mutex_unlock(&priv->mtx);
 
-	ieee80211_scan_completed(priv->hw, false);
+	ieee80211_scan_completed(priv->hw, &info);
 
 	ieee80211_wake_queues(priv->hw);
 }
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
index 6e6066a3ff4f..b8f7dec212e5 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
@@ -775,9 +775,13 @@ s32 brcmf_notify_escan_complete(struct brcmf_cfg80211_info *cfg,
 		if (!aborted)
 			cfg80211_sched_scan_results(cfg_to_wiphy(cfg));
 	} else if (scan_request) {
+		struct cfg80211_scan_info info = {
+			.aborted = aborted,
+		};
+
 		brcmf_dbg(SCAN, "ESCAN Completed scan: %s\n",
 			  aborted ? "Aborted" : "Done");
-		cfg80211_scan_done(scan_request, aborted);
+		cfg80211_scan_done(scan_request, &info);
 	}
 	if (!test_and_clear_bit(BRCMF_SCAN_STATUS_BUSY, &cfg->scan_status))
 		brcmf_dbg(SCAN, "Scan complete, probably P2P scan\n");
diff --git a/drivers/net/wireless/intel/iwlegacy/common.c b/drivers/net/wireless/intel/iwlegacy/common.c
index eb24b9241bb2..140b6ea8f7cc 100644
--- a/drivers/net/wireless/intel/iwlegacy/common.c
+++ b/drivers/net/wireless/intel/iwlegacy/common.c
@@ -1305,10 +1305,14 @@ il_send_scan_abort(struct il_priv *il)
 static void
 il_complete_scan(struct il_priv *il, bool aborted)
 {
+	struct cfg80211_scan_info info = {
+		.aborted = aborted,
+	};
+
 	/* check if scan was requested from mac80211 */
 	if (il->scan_request) {
 		D_SCAN("Complete scan in mac80211\n");
-		ieee80211_scan_completed(il->hw, aborted);
+		ieee80211_scan_completed(il->hw, &info);
 	}
 
 	il->scan_vif = NULL;
diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/scan.c b/drivers/net/wireless/intel/iwlwifi/dvm/scan.c
index d01766f16175..17e6a32384d3 100644
--- a/drivers/net/wireless/intel/iwlwifi/dvm/scan.c
+++ b/drivers/net/wireless/intel/iwlwifi/dvm/scan.c
@@ -94,10 +94,14 @@ static int iwl_send_scan_abort(struct iwl_priv *priv)
 
 static void iwl_complete_scan(struct iwl_priv *priv, bool aborted)
 {
+	struct cfg80211_scan_info info = {
+		.aborted = aborted,
+	};
+
 	/* check if scan was requested from mac80211 */
 	if (priv->scan_request) {
 		IWL_DEBUG_SCAN(priv, "Complete scan in mac80211\n");
-		ieee80211_scan_completed(priv->hw, aborted);
+		ieee80211_scan_completed(priv->hw, &info);
 	}
 
 	priv->scan_type = IWL_SCAN_NORMAL;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
index fb25d9e41912..dac120f8861b 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
@@ -391,13 +391,16 @@ void iwl_mvm_rx_lmac_scan_complete_notif(struct iwl_mvm *mvm,
 		ieee80211_sched_scan_stopped(mvm->hw);
 		mvm->sched_scan_pass_all = SCHED_SCAN_PASS_ALL_DISABLED;
 	} else if (mvm->scan_status & IWL_MVM_SCAN_REGULAR) {
+		struct cfg80211_scan_info info = {
+			.aborted = aborted,
+		};
+
 		IWL_DEBUG_SCAN(mvm, "Regular scan %s, EBS status %s (FW)\n",
 			       aborted ? "aborted" : "completed",
 			       iwl_mvm_ebs_status_str(scan_notif->ebs_status));
 
 		mvm->scan_status &= ~IWL_MVM_SCAN_REGULAR;
-		ieee80211_scan_completed(mvm->hw,
-				scan_notif->status == IWL_SCAN_OFFLOAD_ABORTED);
+		ieee80211_scan_completed(mvm->hw, &info);
 		iwl_mvm_unref(mvm, IWL_MVM_REF_SCAN);
 		cancel_delayed_work(&mvm->scan_timeout_dwork);
 	} else {
@@ -1432,7 +1435,11 @@ void iwl_mvm_rx_umac_scan_complete_notif(struct iwl_mvm *mvm,
 
 	/* if the scan is already stopping, we don't need to notify mac80211 */
 	if (mvm->scan_uid_status[uid] == IWL_MVM_SCAN_REGULAR) {
-		ieee80211_scan_completed(mvm->hw, aborted);
+		struct cfg80211_scan_info info = {
+			.aborted = aborted,
+		};
+
+		ieee80211_scan_completed(mvm->hw, &info);
 		iwl_mvm_unref(mvm, IWL_MVM_REF_SCAN);
 		cancel_delayed_work(&mvm->scan_timeout_dwork);
 	} else if (mvm->scan_uid_status[uid] == IWL_MVM_SCAN_SCHED) {
@@ -1566,7 +1573,11 @@ void iwl_mvm_report_scan_aborted(struct iwl_mvm *mvm)
 
 		uid = iwl_mvm_scan_uid_by_status(mvm, IWL_MVM_SCAN_REGULAR);
 		if (uid >= 0) {
-			ieee80211_scan_completed(mvm->hw, true);
+			struct cfg80211_scan_info info = {
+				.aborted = true,
+			};
+
+			ieee80211_scan_completed(mvm->hw, &info);
 			mvm->scan_uid_status[uid] = 0;
 		}
 		uid = iwl_mvm_scan_uid_by_status(mvm, IWL_MVM_SCAN_SCHED);
@@ -1587,8 +1598,13 @@ void iwl_mvm_report_scan_aborted(struct iwl_mvm *mvm)
 				mvm->scan_uid_status[i] = 0;
 		}
 	} else {
-		if (mvm->scan_status & IWL_MVM_SCAN_REGULAR)
-			ieee80211_scan_completed(mvm->hw, true);
+		if (mvm->scan_status & IWL_MVM_SCAN_REGULAR) {
+			struct cfg80211_scan_info info = {
+				.aborted = true,
+			};
+
+			ieee80211_scan_completed(mvm->hw, &info);
+		}
 
 		/* Sched scan will be restarted by mac80211 in
 		 * restart_hw, so do not report if FW is about to be
@@ -1631,8 +1647,13 @@ out:
 		 */
 		iwl_mvm_unref(mvm, IWL_MVM_REF_SCAN);
 		cancel_delayed_work(&mvm->scan_timeout_dwork);
-		if (notify)
-			ieee80211_scan_completed(mvm->hw, true);
+		if (notify) {
+			struct cfg80211_scan_info info = {
+				.aborted = true,
+			};
+
+			ieee80211_scan_completed(mvm->hw, &info);
+		}
 	} else if (notify) {
 		ieee80211_sched_scan_stopped(mvm->hw);
 		mvm->sched_scan_pass_all = SCHED_SCAN_PASS_ALL_DISABLED;
diff --git a/drivers/net/wireless/intersil/orinoco/scan.c b/drivers/net/wireless/intersil/orinoco/scan.c
index d0ceb06c72d0..6d1d084854fb 100644
--- a/drivers/net/wireless/intersil/orinoco/scan.c
+++ b/drivers/net/wireless/intersil/orinoco/scan.c
@@ -237,7 +237,11 @@ void orinoco_add_hostscan_results(struct orinoco_private *priv,
 
  scan_abort:
 	if (priv->scan_request) {
-		cfg80211_scan_done(priv->scan_request, abort);
+		struct cfg80211_scan_info info = {
+			.aborted = abort,
+		};
+
+		cfg80211_scan_done(priv->scan_request, &info);
 		priv->scan_request = NULL;
 	}
 }
@@ -245,7 +249,11 @@ void orinoco_add_hostscan_results(struct orinoco_private *priv,
 void orinoco_scan_done(struct orinoco_private *priv, bool abort)
 {
 	if (priv->scan_request) {
-		cfg80211_scan_done(priv->scan_request, abort);
+		struct cfg80211_scan_info info = {
+			.aborted = abort,
+		};
+
+		cfg80211_scan_done(priv->scan_request, &info);
 		priv->scan_request = NULL;
 	}
 }
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index a1e28a4fd658..8c35ac838fce 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -41,8 +41,6 @@ MODULE_AUTHOR("Jouni Malinen");
 MODULE_DESCRIPTION("Software simulator of 802.11 radio(s) for mac80211");
 MODULE_LICENSE("GPL");
 
-static u32 wmediumd_portid;
-
 static int radios = 2;
 module_param(radios, int, 0444);
 MODULE_PARM_DESC(radios, "Number of simulated radios");
@@ -252,12 +250,13 @@ static inline void hwsim_clear_chanctx_magic(struct ieee80211_chanctx_conf *c)
 	cp->magic = 0;
 }
 
-static unsigned int hwsim_net_id;
+static int hwsim_net_id;
 
 static int hwsim_netgroup;
 
 struct hwsim_net {
 	int netgroup;
+	u32 wmediumd;
 };
 
 static inline int hwsim_net_get_netgroup(struct net *net)
@@ -274,6 +273,20 @@ static inline void hwsim_net_set_netgroup(struct net *net)
 	hwsim_net->netgroup = hwsim_netgroup++;
 }
 
+static inline u32 hwsim_net_get_wmediumd(struct net *net)
+{
+	struct hwsim_net *hwsim_net = net_generic(net, hwsim_net_id);
+
+	return hwsim_net->wmediumd;
+}
+
+static inline void hwsim_net_set_wmediumd(struct net *net, u32 portid)
+{
+	struct hwsim_net *hwsim_net = net_generic(net, hwsim_net_id);
+
+	hwsim_net->wmediumd = portid;
+}
+
 static struct class *hwsim_class;
 
 static struct net_device *hwsim_mon; /* global monitor netdev */
@@ -444,10 +457,6 @@ static const struct ieee80211_iface_limit hwsim_if_limits[] = {
 	{ .max = 1, .types = BIT(NL80211_IFTYPE_P2P_DEVICE) }
 };
 
-static const struct ieee80211_iface_limit hwsim_if_dfs_limits[] = {
-	{ .max = 8, .types = BIT(NL80211_IFTYPE_AP) },
-};
-
 static const struct ieee80211_iface_combination hwsim_if_comb[] = {
 	{
 		.limits = hwsim_if_limits,
@@ -455,18 +464,12 @@ static const struct ieee80211_iface_combination hwsim_if_comb[] = {
 		.n_limits = ARRAY_SIZE(hwsim_if_limits) - 1,
 		.max_interfaces = 2048,
 		.num_different_channels = 1,
-	},
-	{
-		.limits = hwsim_if_dfs_limits,
-		.n_limits = ARRAY_SIZE(hwsim_if_dfs_limits),
-		.max_interfaces = 8,
-		.num_different_channels = 1,
 		.radar_detect_widths = BIT(NL80211_CHAN_WIDTH_20_NOHT) |
 				       BIT(NL80211_CHAN_WIDTH_20) |
 				       BIT(NL80211_CHAN_WIDTH_40) |
 				       BIT(NL80211_CHAN_WIDTH_80) |
 				       BIT(NL80211_CHAN_WIDTH_160),
-	}
+	},
 };
 
 static const struct ieee80211_iface_combination hwsim_if_comb_p2p_dev[] = {
@@ -475,18 +478,12 @@ static const struct ieee80211_iface_combination hwsim_if_comb_p2p_dev[] = {
 		.n_limits = ARRAY_SIZE(hwsim_if_limits),
 		.max_interfaces = 2048,
 		.num_different_channels = 1,
-	},
-	{
-		.limits = hwsim_if_dfs_limits,
-		.n_limits = ARRAY_SIZE(hwsim_if_dfs_limits),
-		.max_interfaces = 8,
-		.num_different_channels = 1,
 		.radar_detect_widths = BIT(NL80211_CHAN_WIDTH_20_NOHT) |
 				       BIT(NL80211_CHAN_WIDTH_20) |
 				       BIT(NL80211_CHAN_WIDTH_40) |
 				       BIT(NL80211_CHAN_WIDTH_80) |
 				       BIT(NL80211_CHAN_WIDTH_160),
-	}
+	},
 };
 
 static spinlock_t hwsim_radio_lock;
@@ -552,6 +549,8 @@ struct mac80211_hwsim_data {
 
 	/* group shared by radios created in the same netns */
 	int netgroup;
+	/* wmediumd portid responsible for netgroup of this radio */
+	u32 wmediumd;
 
 	int power_level;
 
@@ -983,6 +982,29 @@ static bool hwsim_ps_rx_ok(struct mac80211_hwsim_data *data,
 	return true;
 }
 
+static int hwsim_unicast_netgroup(struct mac80211_hwsim_data *data,
+				  struct sk_buff *skb, int portid)
+{
+	struct net *net;
+	bool found = false;
+	int res = -ENOENT;
+
+	rcu_read_lock();
+	for_each_net_rcu(net) {
+		if (data->netgroup == hwsim_net_get_netgroup(net)) {
+			res = genlmsg_unicast(net, skb, portid);
+			found = true;
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+	if (!found)
+		nlmsg_free(skb);
+
+	return res;
+}
+
 static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw,
 				       struct sk_buff *my_skb,
 				       int dst_portid)
@@ -1062,7 +1084,7 @@ static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw,
 		goto nla_put_failure;
 
 	genlmsg_end(skb, msg_head);
-	if (genlmsg_unicast(&init_net, skb, dst_portid))
+	if (hwsim_unicast_netgroup(data, skb, dst_portid))
 		goto err_free_txskb;
 
 	/* Enqueue the packet */
@@ -1355,7 +1377,7 @@ static void mac80211_hwsim_tx(struct ieee80211_hw *hw,
 	mac80211_hwsim_monitor_rx(hw, skb, channel);
 
 	/* wmediumd mode check */
-	_portid = ACCESS_ONCE(wmediumd_portid);
+	_portid = ACCESS_ONCE(data->wmediumd);
 
 	if (_portid)
 		return mac80211_hwsim_tx_frame_nl(hw, skb, _portid);
@@ -1451,7 +1473,8 @@ static void mac80211_hwsim_tx_frame(struct ieee80211_hw *hw,
 				    struct sk_buff *skb,
 				    struct ieee80211_channel *chan)
 {
-	u32 _pid = ACCESS_ONCE(wmediumd_portid);
+	struct mac80211_hwsim_data *data = hw->priv;
+	u32 _pid = ACCESS_ONCE(data->wmediumd);
 
 	if (ieee80211_hw_check(hw, SUPPORTS_RC_TABLE)) {
 		struct ieee80211_tx_info *txi = IEEE80211_SKB_CB(skb);
@@ -1918,8 +1941,12 @@ static void hw_scan_work(struct work_struct *work)
 
 	mutex_lock(&hwsim->mutex);
 	if (hwsim->scan_chan_idx >= req->n_channels) {
+		struct cfg80211_scan_info info = {
+			.aborted = false,
+		};
+
 		wiphy_debug(hwsim->hw->wiphy, "hw scan complete\n");
-		ieee80211_scan_completed(hwsim->hw, false);
+		ieee80211_scan_completed(hwsim->hw, &info);
 		hwsim->hw_scan_request = NULL;
 		hwsim->hw_scan_vif = NULL;
 		hwsim->tmp_chan = NULL;
@@ -2004,13 +2031,16 @@ static void mac80211_hwsim_cancel_hw_scan(struct ieee80211_hw *hw,
 					  struct ieee80211_vif *vif)
 {
 	struct mac80211_hwsim_data *hwsim = hw->priv;
+	struct cfg80211_scan_info info = {
+		.aborted = true,
+	};
 
 	wiphy_debug(hw->wiphy, "hwsim cancel_hw_scan\n");
 
 	cancel_delayed_work_sync(&hwsim->hw_scan);
 
 	mutex_lock(&hwsim->mutex);
-	ieee80211_scan_completed(hwsim->hw, true);
+	ieee80211_scan_completed(hwsim->hw, &info);
 	hwsim->tmp_chan = NULL;
 	hwsim->hw_scan_request = NULL;
 	hwsim->hw_scan_vif = NULL;
@@ -2448,13 +2478,14 @@ static int mac80211_hwsim_new_radio(struct genl_info *info,
 		hw->wiphy->max_scan_ssids = 255;
 		hw->wiphy->max_scan_ie_len = IEEE80211_MAX_DATA_LEN;
 		hw->wiphy->max_remain_on_channel_duration = 1000;
-		/* For channels > 1 DFS is not allowed */
-		hw->wiphy->n_iface_combinations = 1;
 		hw->wiphy->iface_combinations = &data->if_combination;
 		if (param->p2p_device)
 			data->if_combination = hwsim_if_comb_p2p_dev[0];
 		else
 			data->if_combination = hwsim_if_comb[0];
+		hw->wiphy->n_iface_combinations = 1;
+		/* For channels > 1 DFS is not allowed */
+		data->if_combination.radar_detect_widths = 0;
 		data->if_combination.num_different_channels = data->channels;
 	} else if (param->p2p_device) {
 		hw->wiphy->iface_combinations = hwsim_if_comb_p2p_dev;
@@ -2796,6 +2827,20 @@ static struct mac80211_hwsim_data *get_hwsim_data_ref_from_addr(const u8 *addr)
 	return data;
 }
 
+static void hwsim_register_wmediumd(struct net *net, u32 portid)
+{
+	struct mac80211_hwsim_data *data;
+
+	hwsim_net_set_wmediumd(net, portid);
+
+	spin_lock_bh(&hwsim_radio_lock);
+	list_for_each_entry(data, &hwsim_radios, list) {
+		if (data->netgroup == hwsim_net_get_netgroup(net))
+			data->wmediumd = portid;
+	}
+	spin_unlock_bh(&hwsim_radio_lock);
+}
+
 static int hwsim_tx_info_frame_received_nl(struct sk_buff *skb_2,
 					   struct genl_info *info)
 {
@@ -2811,9 +2856,6 @@ static int hwsim_tx_info_frame_received_nl(struct sk_buff *skb_2,
 	int i;
 	bool found = false;
 
-	if (info->snd_portid != wmediumd_portid)
-		return -EINVAL;
-
 	if (!info->attrs[HWSIM_ATTR_ADDR_TRANSMITTER] ||
 	    !info->attrs[HWSIM_ATTR_FLAGS] ||
 	    !info->attrs[HWSIM_ATTR_COOKIE] ||
@@ -2829,6 +2871,12 @@ static int hwsim_tx_info_frame_received_nl(struct sk_buff *skb_2,
 	if (!data2)
 		goto out;
 
+	if (hwsim_net_get_netgroup(genl_info_net(info)) != data2->netgroup)
+		goto out;
+
+	if (info->snd_portid != data2->wmediumd)
+		goto out;
+
 	/* look for the skb matching the cookie passed back from user */
 	skb_queue_walk_safe(&data2->pending, skb, tmp) {
 		u64 skb_cookie;
@@ -2892,9 +2940,6 @@ static int hwsim_cloned_frame_received_nl(struct sk_buff *skb_2,
 	void *frame_data;
 	struct sk_buff *skb = NULL;
 
-	if (info->snd_portid != wmediumd_portid)
-		return -EINVAL;
-
 	if (!info->attrs[HWSIM_ATTR_ADDR_RECEIVER] ||
 	    !info->attrs[HWSIM_ATTR_FRAME] ||
 	    !info->attrs[HWSIM_ATTR_RX_RATE] ||
@@ -2920,6 +2965,12 @@ static int hwsim_cloned_frame_received_nl(struct sk_buff *skb_2,
 	if (!data2)
 		goto out;
 
+	if (hwsim_net_get_netgroup(genl_info_net(info)) != data2->netgroup)
+		goto out;
+
+	if (info->snd_portid != data2->wmediumd)
+		goto out;
+
 	/* check if radio is configured properly */
 
 	if (data2->idle || !data2->started)
@@ -2966,6 +3017,7 @@ out:
 static int hwsim_register_received_nl(struct sk_buff *skb_2,
 				      struct genl_info *info)
 {
+	struct net *net = genl_info_net(info);
 	struct mac80211_hwsim_data *data;
 	int chans = 1;
 
@@ -2982,10 +3034,10 @@ static int hwsim_register_received_nl(struct sk_buff *skb_2,
 	if (chans > 1)
 		return -EOPNOTSUPP;
 
-	if (wmediumd_portid)
+	if (hwsim_net_get_wmediumd(net))
 		return -EBUSY;
 
-	wmediumd_portid = info->snd_portid;
+	hwsim_register_wmediumd(net, info->snd_portid);
 
 	printk(KERN_DEBUG "mac80211_hwsim: received a REGISTER, "
 	       "switching to wmediumd mode with pid %d\n", info->snd_portid);
@@ -3152,7 +3204,7 @@ static const struct genl_ops hwsim_ops[] = {
 		.cmd = HWSIM_CMD_REGISTER,
 		.policy = hwsim_genl_policy,
 		.doit = hwsim_register_received_nl,
-		.flags = GENL_ADMIN_PERM,
+		.flags = GENL_UNS_ADMIN_PERM,
 	},
 	{
 		.cmd = HWSIM_CMD_FRAME,
@@ -3218,10 +3270,10 @@ static int mac80211_hwsim_netlink_notify(struct notifier_block *nb,
 
 	remove_user_radios(notify->portid);
 
-	if (notify->portid == wmediumd_portid) {
+	if (notify->portid == hwsim_net_get_wmediumd(notify->net)) {
 		printk(KERN_INFO "mac80211_hwsim: wmediumd released netlink"
 		       " socket, switching to perfect channel medium\n");
-		wmediumd_portid = 0;
+		hwsim_register_wmediumd(notify->net, 0);
 	}
 	return NOTIFY_DONE;
 
diff --git a/drivers/net/wireless/marvell/libertas/cfg.c b/drivers/net/wireless/marvell/libertas/cfg.c
index 776b44bfd93a..ea4802446618 100644
--- a/drivers/net/wireless/marvell/libertas/cfg.c
+++ b/drivers/net/wireless/marvell/libertas/cfg.c
@@ -796,10 +796,15 @@ void lbs_scan_done(struct lbs_private *priv)
 {
 	WARN_ON(!priv->scan_req);
 
-	if (priv->internal_scan)
+	if (priv->internal_scan) {
 		kfree(priv->scan_req);
-	else
-		cfg80211_scan_done(priv->scan_req, false);
+	} else {
+		struct cfg80211_scan_info info = {
+			.aborted = false,
+		};
+
+		cfg80211_scan_done(priv->scan_req, &info);
+	}
 
 	priv->scan_req = NULL;
 }
diff --git a/drivers/net/wireless/marvell/mwifiex/main.c b/drivers/net/wireless/marvell/mwifiex/main.c
index 0e280f879b58..db4925db39aa 100644
--- a/drivers/net/wireless/marvell/mwifiex/main.c
+++ b/drivers/net/wireless/marvell/mwifiex/main.c
@@ -697,9 +697,13 @@ mwifiex_close(struct net_device *dev)
 	struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev);
 
 	if (priv->scan_request) {
+		struct cfg80211_scan_info info = {
+			.aborted = true,
+		};
+
 		mwifiex_dbg(priv->adapter, INFO,
 			    "aborting scan on ndo_stop\n");
-		cfg80211_scan_done(priv->scan_request, 1);
+		cfg80211_scan_done(priv->scan_request, &info);
 		priv->scan_request = NULL;
 		priv->scan_aborting = true;
 	}
diff --git a/drivers/net/wireless/marvell/mwifiex/scan.c b/drivers/net/wireless/marvell/mwifiex/scan.c
index 87e700009fd0..21ec84794d0c 100644
--- a/drivers/net/wireless/marvell/mwifiex/scan.c
+++ b/drivers/net/wireless/marvell/mwifiex/scan.c
@@ -1957,9 +1957,13 @@ static void mwifiex_check_next_scan_command(struct mwifiex_private *priv)
 			mwifiex_complete_scan(priv);
 
 		if (priv->scan_request) {
+			struct cfg80211_scan_info info = {
+				.aborted = false,
+			};
+
 			mwifiex_dbg(adapter, INFO,
 				    "info: notifying scan done\n");
-			cfg80211_scan_done(priv->scan_request, 0);
+			cfg80211_scan_done(priv->scan_request, &info);
 			priv->scan_request = NULL;
 			priv->scan_aborting = false;
 		} else {
@@ -1979,9 +1983,13 @@ static void mwifiex_check_next_scan_command(struct mwifiex_private *priv)
 
 		if (!adapter->active_scan_triggered) {
 			if (priv->scan_request) {
+				struct cfg80211_scan_info info = {
+					.aborted = true,
+				};
+
 				mwifiex_dbg(adapter, INFO,
 					    "info: aborting scan\n");
-				cfg80211_scan_done(priv->scan_request, 1);
+				cfg80211_scan_done(priv->scan_request, &info);
 				priv->scan_request = NULL;
 				priv->scan_aborting = false;
 			} else {
@@ -2021,9 +2029,13 @@ void mwifiex_cancel_scan(struct mwifiex_adapter *adapter)
 			if (!priv)
 				continue;
 			if (priv->scan_request) {
+				struct cfg80211_scan_info info = {
+					.aborted = true,
+				};
+
 				mwifiex_dbg(adapter, INFO,
 					    "info: aborting scan\n");
-				cfg80211_scan_done(priv->scan_request, 1);
+				cfg80211_scan_done(priv->scan_request, &info);
 				priv->scan_request = NULL;
 				priv->scan_aborting = false;
 			}
diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c
index 569918c485b4..603c90470225 100644
--- a/drivers/net/wireless/rndis_wlan.c
+++ b/drivers/net/wireless/rndis_wlan.c
@@ -2134,6 +2134,7 @@ static void rndis_get_scan_results(struct work_struct *work)
 	struct rndis_wlan_private *priv =
 		container_of(work, struct rndis_wlan_private, scan_work.work);
 	struct usbnet *usbdev = priv->usbdev;
+	struct cfg80211_scan_info info = {};
 	int ret;
 
 	netdev_dbg(usbdev->net, "get_scan_results\n");
@@ -2143,7 +2144,8 @@ static void rndis_get_scan_results(struct work_struct *work)
 
 	ret = rndis_check_bssid_list(usbdev, NULL, NULL);
 
-	cfg80211_scan_done(priv->scan_request, ret < 0);
+	info.aborted = ret < 0;
+	cfg80211_scan_done(priv->scan_request, &info);
 
 	priv->scan_request = NULL;
 }
@@ -3574,7 +3576,11 @@ static int rndis_wlan_stop(struct usbnet *usbdev)
 	flush_workqueue(priv->workqueue);
 
 	if (priv->scan_request) {
-		cfg80211_scan_done(priv->scan_request, true);
+		struct cfg80211_scan_info info = {
+			.aborted = true,
+		};
+
+		cfg80211_scan_done(priv->scan_request, &info);
 		priv->scan_request = NULL;
 	}
 
diff --git a/drivers/net/wireless/st/cw1200/scan.c b/drivers/net/wireless/st/cw1200/scan.c
index 983788156bb0..0a0ff7e31f5b 100644
--- a/drivers/net/wireless/st/cw1200/scan.c
+++ b/drivers/net/wireless/st/cw1200/scan.c
@@ -167,6 +167,10 @@ void cw1200_scan_work(struct work_struct *work)
 	}
 
 	if (!priv->scan.req || (priv->scan.curr == priv->scan.end)) {
+		struct cfg80211_scan_info info = {
+			.aborted = priv->scan.status ? 1 : 0,
+		};
+
 		if (priv->scan.output_power != priv->output_power)
 			wsm_set_output_power(priv, priv->output_power * 10);
 		if (priv->join_status == CW1200_JOIN_STATUS_STA &&
@@ -188,7 +192,7 @@ void cw1200_scan_work(struct work_struct *work)
 		cw1200_scan_restart_delayed(priv);
 		wsm_unlock_tx(priv);
 		mutex_unlock(&priv->conf_mutex);
-		ieee80211_scan_completed(priv->hw, priv->scan.status ? 1 : 0);
+		ieee80211_scan_completed(priv->hw, &info);
 		up(&priv->scan.lock);
 		return;
 	} else {
diff --git a/drivers/net/wireless/ti/wl1251/event.c b/drivers/net/wireless/ti/wl1251/event.c
index c98630394a1a..d0593bc1f1a9 100644
--- a/drivers/net/wireless/ti/wl1251/event.c
+++ b/drivers/net/wireless/ti/wl1251/event.c
@@ -36,7 +36,11 @@ static int wl1251_event_scan_complete(struct wl1251 *wl,
 		     mbox->scheduled_scan_channels);
 
 	if (wl->scanning) {
-		ieee80211_scan_completed(wl->hw, false);
+		struct cfg80211_scan_info info = {
+			.aborted = false,
+		};
+
+		ieee80211_scan_completed(wl->hw, &info);
 		wl1251_debug(DEBUG_MAC80211, "mac80211 hw scan completed");
 		wl->scanning = false;
 		if (wl->hw->conf.flags & IEEE80211_CONF_IDLE)
diff --git a/drivers/net/wireless/ti/wl1251/main.c b/drivers/net/wireless/ti/wl1251/main.c
index 56384a4e2a35..bbf7604889b7 100644
--- a/drivers/net/wireless/ti/wl1251/main.c
+++ b/drivers/net/wireless/ti/wl1251/main.c
@@ -448,7 +448,11 @@ static void wl1251_op_stop(struct ieee80211_hw *hw)
 	WARN_ON(wl->state != WL1251_STATE_ON);
 
 	if (wl->scanning) {
-		ieee80211_scan_completed(wl->hw, true);
+		struct cfg80211_scan_info info = {
+			.aborted = true,
+		};
+
+		ieee80211_scan_completed(wl->hw, &info);
 		wl->scanning = false;
 	}
 
diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c
index a53033deb3d0..9abc15293307 100644
--- a/drivers/net/wireless/ti/wlcore/main.c
+++ b/drivers/net/wireless/ti/wlcore/main.c
@@ -2615,6 +2615,10 @@ static void __wl1271_op_remove_interface(struct wl1271 *wl,
 
 	if (wl->scan.state != WL1271_SCAN_STATE_IDLE &&
 	    wl->scan_wlvif == wlvif) {
+		struct cfg80211_scan_info info = {
+			.aborted = true,
+		};
+
 		/*
 		 * Rearm the tx watchdog just before idling scan. This
 		 * prevents just-finished scans from triggering the watchdog
@@ -2625,7 +2629,7 @@ static void __wl1271_op_remove_interface(struct wl1271 *wl,
 		memset(wl->scan.scanned_ch, 0, sizeof(wl->scan.scanned_ch));
 		wl->scan_wlvif = NULL;
 		wl->scan.req = NULL;
-		ieee80211_scan_completed(wl->hw, true);
+		ieee80211_scan_completed(wl->hw, &info);
 	}
 
 	if (wl->sched_vif == wlvif)
@@ -3649,6 +3653,9 @@ static void wl1271_op_cancel_hw_scan(struct ieee80211_hw *hw,
 {
 	struct wl1271 *wl = hw->priv;
 	struct wl12xx_vif *wlvif = wl12xx_vif_to_data(vif);
+	struct cfg80211_scan_info info = {
+		.aborted = true,
+	};
 	int ret;
 
 	wl1271_debug(DEBUG_MAC80211, "mac80211 cancel hw scan");
@@ -3681,7 +3688,7 @@ static void wl1271_op_cancel_hw_scan(struct ieee80211_hw *hw,
 	memset(wl->scan.scanned_ch, 0, sizeof(wl->scan.scanned_ch));
 	wl->scan_wlvif = NULL;
 	wl->scan.req = NULL;
-	ieee80211_scan_completed(wl->hw, true);
+	ieee80211_scan_completed(wl->hw, &info);
 
 out_sleep:
 	wl1271_ps_elp_sleep(wl);
diff --git a/drivers/net/wireless/ti/wlcore/scan.c b/drivers/net/wireless/ti/wlcore/scan.c
index 23343643207a..5612f5916b4e 100644
--- a/drivers/net/wireless/ti/wlcore/scan.c
+++ b/drivers/net/wireless/ti/wlcore/scan.c
@@ -36,6 +36,9 @@ void wl1271_scan_complete_work(struct work_struct *work)
 	struct delayed_work *dwork;
 	struct wl1271 *wl;
 	struct wl12xx_vif *wlvif;
+	struct cfg80211_scan_info info = {
+		.aborted = false,
+	};
 	int ret;
 
 	dwork = to_delayed_work(work);
@@ -82,7 +85,7 @@ void wl1271_scan_complete_work(struct work_struct *work)
 
 	wlcore_cmd_regdomain_config_locked(wl);
 
-	ieee80211_scan_completed(wl->hw, false);
+	ieee80211_scan_completed(wl->hw, &info);
 
 out:
 	mutex_unlock(&wl->mutex);
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index f7718ec685fa..cea8350fbc7e 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -344,6 +344,8 @@ struct device *nd_pfn_create(struct nd_region *nd_region)
 int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
 {
 	u64 checksum, offset;
+	unsigned long align;
+	enum nd_pfn_mode mode;
 	struct nd_namespace_io *nsio;
 	struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
 	struct nd_namespace_common *ndns = nd_pfn->ndns;
@@ -386,22 +388,50 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
 		return -ENXIO;
 	}
 
+	align = le32_to_cpu(pfn_sb->align);
+	offset = le64_to_cpu(pfn_sb->dataoff);
+	if (align == 0)
+		align = 1UL << ilog2(offset);
+	mode = le32_to_cpu(pfn_sb->mode);
+
 	if (!nd_pfn->uuid) {
-		/* from probe we allocate */
+		/*
+		 * When probing a namepace via nd_pfn_probe() the uuid
+		 * is NULL (see: nd_pfn_devinit()) we init settings from
+		 * pfn_sb
+		 */
 		nd_pfn->uuid = kmemdup(pfn_sb->uuid, 16, GFP_KERNEL);
 		if (!nd_pfn->uuid)
 			return -ENOMEM;
+		nd_pfn->align = align;
+		nd_pfn->mode = mode;
 	} else {
-		/* from init we validate */
+		/*
+		 * When probing a pfn / dax instance we validate the
+		 * live settings against the pfn_sb
+		 */
 		if (memcmp(nd_pfn->uuid, pfn_sb->uuid, 16) != 0)
 			return -ENODEV;
+
+		/*
+		 * If the uuid validates, but other settings mismatch
+		 * return EINVAL because userspace has managed to change
+		 * the configuration without specifying new
+		 * identification.
+		 */
+		if (nd_pfn->align != align || nd_pfn->mode != mode) {
+			dev_err(&nd_pfn->dev,
+					"init failed, settings mismatch\n");
+			dev_dbg(&nd_pfn->dev, "align: %lx:%lx mode: %d:%d\n",
+					nd_pfn->align, align, nd_pfn->mode,
+					mode);
+			return -EINVAL;
+		}
 	}
 
-	if (nd_pfn->align == 0)
-		nd_pfn->align = le32_to_cpu(pfn_sb->align);
-	if (nd_pfn->align > nvdimm_namespace_capacity(ndns)) {
+	if (align > nvdimm_namespace_capacity(ndns)) {
 		dev_err(&nd_pfn->dev, "alignment: %lx exceeds capacity %llx\n",
-				nd_pfn->align, nvdimm_namespace_capacity(ndns));
+				align, nvdimm_namespace_capacity(ndns));
 		return -EINVAL;
 	}
 
@@ -411,7 +441,6 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
 	 * namespace has changed since the pfn superblock was
 	 * established.
 	 */
-	offset = le64_to_cpu(pfn_sb->dataoff);
 	nsio = to_nd_namespace_io(&ndns->dev);
 	if (offset >= resource_size(&nsio->res)) {
 		dev_err(&nd_pfn->dev, "pfn array size exceeds capacity of %s\n",
@@ -419,10 +448,11 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
 		return -EBUSY;
 	}
 
-	if ((nd_pfn->align && !IS_ALIGNED(offset, nd_pfn->align))
+	if ((align && !IS_ALIGNED(offset, align))
 			|| !IS_ALIGNED(offset, PAGE_SIZE)) {
-		dev_err(&nd_pfn->dev, "bad offset: %#llx dax disabled\n",
-				offset);
+		dev_err(&nd_pfn->dev,
+				"bad offset: %#llx dax disabled align: %#lx\n",
+				offset, align);
 		return -ENXIO;
 	}
 
@@ -502,7 +532,6 @@ static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
 	res->start += start_pad;
 	res->end -= end_trunc;
 
-	nd_pfn->mode = le32_to_cpu(nd_pfn->pfn_sb->mode);
 	if (nd_pfn->mode == PFN_MODE_RAM) {
 		if (offset < SZ_8K)
 			return ERR_PTR(-EINVAL);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 78dca3193ca4..befac5b19490 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1679,9 +1679,14 @@ static int nvme_pci_enable(struct nvme_dev *dev)
 
 static void nvme_dev_unmap(struct nvme_dev *dev)
 {
+	struct pci_dev *pdev = to_pci_dev(dev->dev);
+	int bars;
+
 	if (dev->bar)
 		iounmap(dev->bar);
-	pci_release_regions(to_pci_dev(dev->dev));
+
+	bars = pci_select_bars(pdev, IORESOURCE_MEM);
+	pci_release_selected_regions(pdev, bars);
 }
 
 static void nvme_pci_disable(struct nvme_dev *dev)
@@ -1924,7 +1929,7 @@ static int nvme_dev_map(struct nvme_dev *dev)
 
        return 0;
   release:
-       pci_release_regions(pdev);
+       pci_release_selected_regions(pdev, bars);
        return -ENODEV;
 }
 
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 14f2f8c7c260..33daffc4392c 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -395,7 +395,7 @@ static int unflatten_dt_nodes(const void *blob,
 			      struct device_node **nodepp)
 {
 	struct device_node *root;
-	int offset = 0, depth = 0;
+	int offset = 0, depth = 0, initial_depth = 0;
 #define FDT_MAX_DEPTH	64
 	unsigned int fpsizes[FDT_MAX_DEPTH];
 	struct device_node *nps[FDT_MAX_DEPTH];
@@ -405,11 +405,22 @@ static int unflatten_dt_nodes(const void *blob,
 	if (nodepp)
 		*nodepp = NULL;
 
+	/*
+	 * We're unflattening device sub-tree if @dad is valid. There are
+	 * possibly multiple nodes in the first level of depth. We need
+	 * set @depth to 1 to make fdt_next_node() happy as it bails
+	 * immediately when negative @depth is found. Otherwise, the device
+	 * nodes except the first one won't be unflattened successfully.
+	 */
+	if (dad)
+		depth = initial_depth = 1;
+
 	root = dad;
 	fpsizes[depth] = dad ? strlen(of_node_full_name(dad)) : 0;
 	nps[depth] = dad;
+
 	for (offset = 0;
-	     offset >= 0 && depth >= 0;
+	     offset >= 0 && depth >= initial_depth;
 	     offset = fdt_next_node(blob, offset, &depth)) {
 		if (WARN_ON_ONCE(depth >= FDT_MAX_DEPTH))
 			continue;
diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index e7bfc175b8e1..6ec743faabe8 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -386,13 +386,13 @@ int of_irq_to_resource(struct device_node *dev, int index, struct resource *r)
 EXPORT_SYMBOL_GPL(of_irq_to_resource);
 
 /**
- * of_irq_get - Decode a node's IRQ and return it as a Linux irq number
+ * of_irq_get - Decode a node's IRQ and return it as a Linux IRQ number
  * @dev: pointer to device tree node
- * @index: zero-based index of the irq
- *
- * Returns Linux irq number on success, or -EPROBE_DEFER if the irq domain
- * is not yet created.
+ * @index: zero-based index of the IRQ
  *
+ * Returns Linux IRQ number on success, or 0 on the IRQ mapping failure, or
+ * -EPROBE_DEFER if the IRQ domain is not yet created, or error code in case
+ * of any other failure.
  */
 int of_irq_get(struct device_node *dev, int index)
 {
@@ -413,12 +413,13 @@ int of_irq_get(struct device_node *dev, int index)
 EXPORT_SYMBOL_GPL(of_irq_get);
 
 /**
- * of_irq_get_byname - Decode a node's IRQ and return it as a Linux irq number
+ * of_irq_get_byname - Decode a node's IRQ and return it as a Linux IRQ number
  * @dev: pointer to device tree node
- * @name: irq name
+ * @name: IRQ name
  *
- * Returns Linux irq number on success, or -EPROBE_DEFER if the irq domain
- * is not yet created, or error code in case of any other failure.
+ * Returns Linux IRQ number on success, or 0 on the IRQ mapping failure, or
+ * -EPROBE_DEFER if the IRQ domain is not yet created, or error code in case
+ * of any other failure.
  */
 int of_irq_get_byname(struct device_node *dev, const char *name)
 {
diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index ed01c0172e4a..216648233874 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -127,8 +127,15 @@ static int __init __reserved_mem_alloc_size(unsigned long node,
 	}
 
 	/* Need adjust the alignment to satisfy the CMA requirement */
-	if (IS_ENABLED(CONFIG_CMA) && of_flat_dt_is_compatible(node, "shared-dma-pool"))
-		align = max(align, (phys_addr_t)PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order));
+	if (IS_ENABLED(CONFIG_CMA)
+	    && of_flat_dt_is_compatible(node, "shared-dma-pool")
+	    && of_get_flat_dt_prop(node, "reusable", NULL)
+	    && !of_get_flat_dt_prop(node, "no-map", NULL)) {
+		unsigned long order =
+			max_t(unsigned long, MAX_ORDER - 1, pageblock_order);
+
+		align = max(align, (phys_addr_t)PAGE_SIZE << order);
+	}
 
 	prop = of_get_flat_dt_prop(node, "alloc-ranges", &len);
 	if (prop) {
diff --git a/drivers/pci/vc.c b/drivers/pci/vc.c
index dfbab61a1b47..1fa3a3219c45 100644
--- a/drivers/pci/vc.c
+++ b/drivers/pci/vc.c
@@ -221,9 +221,9 @@ static int pci_vc_do_save_buffer(struct pci_dev *dev, int pos,
 		else
 			pci_write_config_word(dev, pos + PCI_VC_PORT_CTRL,
 					      *(u16 *)buf);
-		buf += 2;
+		buf += 4;
 	}
-	len += 2;
+	len += 4;
 
 	/*
 	 * If we have any Low Priority VCs and a VC Arbitration Table Offset
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 1b8304e1efaa..140436a046c0 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -1010,8 +1010,8 @@ int arm_pmu_device_probe(struct platform_device *pdev,
 		if (!ret)
 			ret = init_fn(pmu);
 	} else {
-		ret = probe_current_pmu(pmu, probe_table);
 		cpumask_setall(&pmu->supported_cpus);
+		ret = probe_current_pmu(pmu, probe_table);
 	}
 
 	if (ret) {
diff --git a/drivers/phy/phy-bcm-ns-usb2.c b/drivers/phy/phy-bcm-ns-usb2.c
index 95ab6b2a0de5..58dff80e9386 100644
--- a/drivers/phy/phy-bcm-ns-usb2.c
+++ b/drivers/phy/phy-bcm-ns-usb2.c
@@ -109,8 +109,8 @@ static int bcm_ns_usb2_probe(struct platform_device *pdev)
 	}
 
 	usb2->phy = devm_phy_create(dev, NULL, &ops);
-	if (IS_ERR(dev))
-		return PTR_ERR(dev);
+	if (IS_ERR(usb2->phy))
+		return PTR_ERR(usb2->phy);
 
 	phy_set_drvdata(usb2->phy, usb2);
 	platform_set_drvdata(pdev, usb2);
diff --git a/drivers/phy/phy-exynos-mipi-video.c b/drivers/phy/phy-exynos-mipi-video.c
index cc093ebfda94..8b851f718123 100644
--- a/drivers/phy/phy-exynos-mipi-video.c
+++ b/drivers/phy/phy-exynos-mipi-video.c
@@ -233,8 +233,12 @@ static inline int __is_running(const struct exynos_mipi_phy_desc *data,
 			struct exynos_mipi_video_phy *state)
 {
 	u32 val;
+	int ret;
+
+	ret = regmap_read(state->regmaps[data->resetn_map], data->resetn_reg, &val);
+	if (ret)
+		return 0;
 
-	regmap_read(state->regmaps[data->resetn_map], data->resetn_reg, &val);
 	return val & data->resetn_val;
 }
 
diff --git a/drivers/phy/phy-miphy28lp.c b/drivers/phy/phy-miphy28lp.c
index 3acd2a1808df..213e2e15339c 100644
--- a/drivers/phy/phy-miphy28lp.c
+++ b/drivers/phy/phy-miphy28lp.c
@@ -1143,7 +1143,8 @@ static int miphy28lp_probe_resets(struct device_node *node,
 	struct miphy28lp_dev *miphy_dev = miphy_phy->phydev;
 	int err;
 
-	miphy_phy->miphy_rst = of_reset_control_get(node, "miphy-sw-rst");
+	miphy_phy->miphy_rst =
+		of_reset_control_get_shared(node, "miphy-sw-rst");
 
 	if (IS_ERR(miphy_phy->miphy_rst)) {
 		dev_err(miphy_dev->dev,
diff --git a/drivers/phy/phy-rcar-gen3-usb2.c b/drivers/phy/phy-rcar-gen3-usb2.c
index 76bb88f0700a..4be3f5dbbc9f 100644
--- a/drivers/phy/phy-rcar-gen3-usb2.c
+++ b/drivers/phy/phy-rcar-gen3-usb2.c
@@ -144,12 +144,6 @@ static void rcar_gen3_init_for_peri(struct rcar_gen3_chan *ch)
 	extcon_set_cable_state_(ch->extcon, EXTCON_USB, true);
 }
 
-static bool rcar_gen3_check_vbus(struct rcar_gen3_chan *ch)
-{
-	return !!(readl(ch->base + USB2_ADPCTRL) &
-		  USB2_ADPCTRL_OTGSESSVLD);
-}
-
 static bool rcar_gen3_check_id(struct rcar_gen3_chan *ch)
 {
 	return !!(readl(ch->base + USB2_ADPCTRL) & USB2_ADPCTRL_IDDIG);
@@ -157,13 +151,7 @@ static bool rcar_gen3_check_id(struct rcar_gen3_chan *ch)
 
 static void rcar_gen3_device_recognition(struct rcar_gen3_chan *ch)
 {
-	bool is_host = true;
-
-	/* B-device? */
-	if (rcar_gen3_check_id(ch) && rcar_gen3_check_vbus(ch))
-		is_host = false;
-
-	if (is_host)
+	if (!rcar_gen3_check_id(ch))
 		rcar_gen3_init_for_host(ch);
 	else
 		rcar_gen3_init_for_peri(ch);
diff --git a/drivers/phy/phy-rockchip-dp.c b/drivers/phy/phy-rockchip-dp.c
index 793ecb6d87bc..8b267a746576 100644
--- a/drivers/phy/phy-rockchip-dp.c
+++ b/drivers/phy/phy-rockchip-dp.c
@@ -90,7 +90,7 @@ static int rockchip_dp_phy_probe(struct platform_device *pdev)
 		return -ENODEV;
 
 	dp = devm_kzalloc(dev, sizeof(*dp), GFP_KERNEL);
-	if (IS_ERR(dp))
+	if (!dp)
 		return -ENOMEM;
 
 	dp->dev = dev;
diff --git a/drivers/phy/phy-stih407-usb.c b/drivers/phy/phy-stih407-usb.c
index 1d5ae5f8ef69..b1f44ab669fb 100644
--- a/drivers/phy/phy-stih407-usb.c
+++ b/drivers/phy/phy-stih407-usb.c
@@ -105,13 +105,13 @@ static int stih407_usb2_picophy_probe(struct platform_device *pdev)
 	phy_dev->dev = dev;
 	dev_set_drvdata(dev, phy_dev);
 
-	phy_dev->rstc = devm_reset_control_get(dev, "global");
+	phy_dev->rstc = devm_reset_control_get_shared(dev, "global");
 	if (IS_ERR(phy_dev->rstc)) {
 		dev_err(dev, "failed to ctrl picoPHY reset\n");
 		return PTR_ERR(phy_dev->rstc);
 	}
 
-	phy_dev->rstport = devm_reset_control_get(dev, "port");
+	phy_dev->rstport = devm_reset_control_get_exclusive(dev, "port");
 	if (IS_ERR(phy_dev->rstport)) {
 		dev_err(dev, "failed to ctrl picoPHY reset\n");
 		return PTR_ERR(phy_dev->rstport);
diff --git a/drivers/phy/phy-sun4i-usb.c b/drivers/phy/phy-sun4i-usb.c
index bae54f7a1f48..de3101fbbf40 100644
--- a/drivers/phy/phy-sun4i-usb.c
+++ b/drivers/phy/phy-sun4i-usb.c
@@ -175,7 +175,7 @@ static void sun4i_usb_phy_write(struct sun4i_usb_phy *phy, u32 addr, u32 data,
 {
 	struct sun4i_usb_phy_data *phy_data = to_sun4i_usb_phy_data(phy);
 	u32 temp, usbc_bit = BIT(phy->index * 2);
-	void *phyctl = phy_data->base + phy_data->cfg->phyctl_offset;
+	void __iomem *phyctl = phy_data->base + phy_data->cfg->phyctl_offset;
 	int i;
 
 	mutex_lock(&phy_data->mutex);
@@ -514,9 +514,9 @@ static int sun4i_usb_phy_remove(struct platform_device *pdev)
 
 	if (data->vbus_power_nb_registered)
 		power_supply_unreg_notifier(&data->vbus_power_nb);
-	if (data->id_det_irq >= 0)
+	if (data->id_det_irq > 0)
 		devm_free_irq(dev, data->id_det_irq, data);
-	if (data->vbus_det_irq >= 0)
+	if (data->vbus_det_irq > 0)
 		devm_free_irq(dev, data->vbus_det_irq, data);
 
 	cancel_delayed_work_sync(&data->detect);
@@ -645,11 +645,11 @@ static int sun4i_usb_phy_probe(struct platform_device *pdev)
 
 	data->id_det_irq = gpiod_to_irq(data->id_det_gpio);
 	data->vbus_det_irq = gpiod_to_irq(data->vbus_det_gpio);
-	if ((data->id_det_gpio && data->id_det_irq < 0) ||
-	    (data->vbus_det_gpio && data->vbus_det_irq < 0))
+	if ((data->id_det_gpio && data->id_det_irq <= 0) ||
+	    (data->vbus_det_gpio && data->vbus_det_irq <= 0))
 		data->phy0_poll = true;
 
-	if (data->id_det_irq >= 0) {
+	if (data->id_det_irq > 0) {
 		ret = devm_request_irq(dev, data->id_det_irq,
 				sun4i_usb_phy0_id_vbus_det_irq,
 				IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
@@ -660,7 +660,7 @@ static int sun4i_usb_phy_probe(struct platform_device *pdev)
 		}
 	}
 
-	if (data->vbus_det_irq >= 0) {
+	if (data->vbus_det_irq > 0) {
 		ret = devm_request_irq(dev, data->vbus_det_irq,
 				sun4i_usb_phy0_id_vbus_det_irq,
 				IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
diff --git a/drivers/phy/phy-ti-pipe3.c b/drivers/phy/phy-ti-pipe3.c
index 0a477d24cf76..bf46844dc387 100644
--- a/drivers/phy/phy-ti-pipe3.c
+++ b/drivers/phy/phy-ti-pipe3.c
@@ -293,11 +293,18 @@ static int ti_pipe3_init(struct phy *x)
 		ret = ti_pipe3_dpll_wait_lock(phy);
 	}
 
-	/* Program the DPLL only if not locked */
+	/* SATA has issues if re-programmed when locked */
 	val = ti_pipe3_readl(phy->pll_ctrl_base, PLL_STATUS);
-	if (!(val & PLL_LOCK))
-		if (ti_pipe3_dpll_program(phy))
-			return -EINVAL;
+	if ((val & PLL_LOCK) && of_device_is_compatible(phy->dev->of_node,
+							"ti,phy-pipe3-sata"))
+		return ret;
+
+	/* Program the DPLL */
+	ret = ti_pipe3_dpll_program(phy);
+	if (ret) {
+		ti_pipe3_disable_clocks(phy);
+		return -EINVAL;
+	}
 
 	return ret;
 }
diff --git a/drivers/phy/phy-twl4030-usb.c b/drivers/phy/phy-twl4030-usb.c
index 6b6af6cba454..d9b10a39a2cf 100644
--- a/drivers/phy/phy-twl4030-usb.c
+++ b/drivers/phy/phy-twl4030-usb.c
@@ -463,7 +463,8 @@ static int twl4030_phy_power_on(struct phy *phy)
 	twl4030_usb_set_mode(twl, twl->usb_mode);
 	if (twl->usb_mode == T2_USB_MODE_ULPI)
 		twl4030_i2c_access(twl, 0);
-	schedule_delayed_work(&twl->id_workaround_work, 0);
+	twl->linkstat = MUSB_UNKNOWN;
+	schedule_delayed_work(&twl->id_workaround_work, HZ);
 
 	return 0;
 }
@@ -537,6 +538,7 @@ static irqreturn_t twl4030_usb_irq(int irq, void *_twl)
 	struct twl4030_usb *twl = _twl;
 	enum musb_vbus_id_status status;
 	bool status_changed = false;
+	int err;
 
 	status = twl4030_usb_linkstat(twl);
 
@@ -567,7 +569,9 @@ static irqreturn_t twl4030_usb_irq(int irq, void *_twl)
 			pm_runtime_mark_last_busy(twl->dev);
 			pm_runtime_put_autosuspend(twl->dev);
 		}
-		musb_mailbox(status);
+		err = musb_mailbox(status);
+		if (err)
+			twl->linkstat = MUSB_UNKNOWN;
 	}
 
 	/* don't schedule during sleep - irq works right then */
@@ -595,7 +599,8 @@ static int twl4030_phy_init(struct phy *phy)
 	struct twl4030_usb *twl = phy_get_drvdata(phy);
 
 	pm_runtime_get_sync(twl->dev);
-	schedule_delayed_work(&twl->id_workaround_work, 0);
+	twl->linkstat = MUSB_UNKNOWN;
+	schedule_delayed_work(&twl->id_workaround_work, HZ);
 	pm_runtime_mark_last_busy(twl->dev);
 	pm_runtime_put_autosuspend(twl->dev);
 
@@ -763,7 +768,8 @@ static int twl4030_usb_remove(struct platform_device *pdev)
 	if (cable_present(twl->linkstat))
 		pm_runtime_put_noidle(twl->dev);
 	pm_runtime_mark_last_busy(twl->dev);
-	pm_runtime_put_sync_suspend(twl->dev);
+	pm_runtime_dont_use_autosuspend(&pdev->dev);
+	pm_runtime_put_sync(twl->dev);
 	pm_runtime_disable(twl->dev);
 
 	/* autogate 60MHz ULPI clock,
diff --git a/drivers/pinctrl/Makefile b/drivers/pinctrl/Makefile
index e4bc1151e04f..42a5c1dddfef 100644
--- a/drivers/pinctrl/Makefile
+++ b/drivers/pinctrl/Makefile
@@ -23,7 +23,7 @@ obj-$(CONFIG_PINCTRL_PISTACHIO)	+= pinctrl-pistachio.o
 obj-$(CONFIG_PINCTRL_ROCKCHIP)	+= pinctrl-rockchip.o
 obj-$(CONFIG_PINCTRL_SINGLE)	+= pinctrl-single.o
 obj-$(CONFIG_PINCTRL_SIRF)	+= sirf/
-obj-$(CONFIG_PINCTRL_TEGRA)	+= tegra/
+obj-$(CONFIG_ARCH_TEGRA)	+= tegra/
 obj-$(CONFIG_PINCTRL_TZ1090)	+= pinctrl-tz1090.o
 obj-$(CONFIG_PINCTRL_TZ1090_PDC)	+= pinctrl-tz1090-pdc.o
 obj-$(CONFIG_PINCTRL_U300)	+= pinctrl-u300.o
diff --git a/drivers/pinctrl/freescale/pinctrl-imx.c b/drivers/pinctrl/freescale/pinctrl-imx.c
index 47ccfcc8a647..eccb47480e1d 100644
--- a/drivers/pinctrl/freescale/pinctrl-imx.c
+++ b/drivers/pinctrl/freescale/pinctrl-imx.c
@@ -209,9 +209,9 @@ static int imx_pmx_set(struct pinctrl_dev *pctldev, unsigned selector,
 		pin_reg = &info->pin_regs[pin_id];
 
 		if (pin_reg->mux_reg == -1) {
-			dev_err(ipctl->dev, "Pin(%s) does not support mux function\n",
+			dev_dbg(ipctl->dev, "Pin(%s) does not support mux function\n",
 				info->pins[pin_id].name);
-			return -EINVAL;
+			continue;
 		}
 
 		if (info->flags & SHARE_MUX_CONF_REG) {
diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c
index 677a811b3a6f..7abfd42e8ffd 100644
--- a/drivers/pinctrl/intel/pinctrl-baytrail.c
+++ b/drivers/pinctrl/intel/pinctrl-baytrail.c
@@ -401,9 +401,9 @@ static const struct byt_simple_func_mux byt_score_sata_mux[] = {
 static const unsigned int byt_score_plt_clk0_pins[] = { 96 };
 static const unsigned int byt_score_plt_clk1_pins[] = { 97 };
 static const unsigned int byt_score_plt_clk2_pins[] = { 98 };
-static const unsigned int byt_score_plt_clk4_pins[] = { 99 };
-static const unsigned int byt_score_plt_clk5_pins[] = { 100 };
-static const unsigned int byt_score_plt_clk3_pins[] = { 101 };
+static const unsigned int byt_score_plt_clk3_pins[] = { 99 };
+static const unsigned int byt_score_plt_clk4_pins[] = { 100 };
+static const unsigned int byt_score_plt_clk5_pins[] = { 101 };
 static const struct byt_simple_func_mux byt_score_plt_clk_mux[] = {
 	SIMPLE_FUNC("plt_clk", 1),
 };
diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c
index cf9bafa10acf..bfdf720db270 100644
--- a/drivers/pinctrl/pinctrl-single.c
+++ b/drivers/pinctrl/pinctrl-single.c
@@ -1580,6 +1580,9 @@ static inline void pcs_irq_set(struct pcs_soc_data *pcs_soc,
 		else
 			mask &= ~soc_mask;
 		pcs->write(mask, pcswi->reg);
+
+		/* flush posted write */
+		mask = pcs->read(pcswi->reg);
 		raw_spin_unlock(&pcs->lock);
 	}
 
diff --git a/drivers/pinctrl/tegra/Makefile b/drivers/pinctrl/tegra/Makefile
index a927379b6794..d9ea2be69cc4 100644
--- a/drivers/pinctrl/tegra/Makefile
+++ b/drivers/pinctrl/tegra/Makefile
@@ -1,4 +1,4 @@
-obj-y					+= pinctrl-tegra.o
+obj-$(CONFIG_PINCTRL_TEGRA)		+= pinctrl-tegra.o
 obj-$(CONFIG_PINCTRL_TEGRA20)		+= pinctrl-tegra20.o
 obj-$(CONFIG_PINCTRL_TEGRA30)		+= pinctrl-tegra30.o
 obj-$(CONFIG_PINCTRL_TEGRA114)		+= pinctrl-tegra114.o
diff --git a/drivers/platform/chrome/cros_ec_dev.c b/drivers/platform/chrome/cros_ec_dev.c
index 6d8ee3b15872..8abd80dbcbed 100644
--- a/drivers/platform/chrome/cros_ec_dev.c
+++ b/drivers/platform/chrome/cros_ec_dev.c
@@ -151,13 +151,19 @@ static long ec_device_ioctl_xcmd(struct cros_ec_dev *ec, void __user *arg)
 		goto exit;
 	}
 
+	if (u_cmd.outsize != s_cmd->outsize ||
+	    u_cmd.insize != s_cmd->insize) {
+		ret = -EINVAL;
+		goto exit;
+	}
+
 	s_cmd->command += ec->cmd_offset;
 	ret = cros_ec_cmd_xfer(ec->ec_dev, s_cmd);
 	/* Only copy data to userland if data was received. */
 	if (ret < 0)
 		goto exit;
 
-	if (copy_to_user(arg, s_cmd, sizeof(*s_cmd) + u_cmd.insize))
+	if (copy_to_user(arg, s_cmd, sizeof(*s_cmd) + s_cmd->insize))
 		ret = -EFAULT;
 exit:
 	kfree(s_cmd);
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index c06bb85c2839..3ec0025d19e7 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -103,7 +103,6 @@ config DELL_SMBIOS
 
 config DELL_LAPTOP
 	tristate "Dell Laptop Extras"
-	depends on X86
 	depends on DELL_SMBIOS
 	depends on DMI
 	depends on BACKLIGHT_CLASS_DEVICE
@@ -505,7 +504,7 @@ config THINKPAD_ACPI_HOTKEY_POLL
 
 config SENSORS_HDAPS
 	tristate "Thinkpad Hard Drive Active Protection System (hdaps)"
-	depends on INPUT && X86
+	depends on INPUT
 	select INPUT_POLLDEV
 	default n
 	help
@@ -749,7 +748,7 @@ config TOSHIBA_WMI
 
 config ACPI_CMPC
 	tristate "CMPC Laptop Extras"
-	depends on X86 && ACPI
+	depends on ACPI
 	depends on RFKILL || RFKILL=n
 	select INPUT
 	select BACKLIGHT_CLASS_DEVICE
@@ -848,7 +847,7 @@ config INTEL_IMR
 
 config INTEL_PMC_CORE
 	bool "Intel PMC Core driver"
-	depends on X86 && PCI
+	depends on PCI
 	---help---
 	  The Intel Platform Controller Hub for Intel Core SoCs provides access
 	  to Power Management Controller registers via a PCI interface. This
@@ -860,7 +859,7 @@ config INTEL_PMC_CORE
 
 config IBM_RTL
 	tristate "Device driver to enable PRTL support"
-	depends on X86 && PCI
+	depends on PCI
 	---help---
 	 Enable support for IBM Premium Real Time Mode (PRTM).
 	 This module will allow you the enter and exit PRTM in the BIOS via
@@ -894,7 +893,6 @@ config XO15_EBOOK
 
 config SAMSUNG_LAPTOP
 	tristate "Samsung Laptop driver"
-	depends on X86
 	depends on RFKILL || RFKILL = n
 	depends on ACPI_VIDEO || ACPI_VIDEO = n
 	depends on BACKLIGHT_CLASS_DEVICE
diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
index 4a23fbc66b71..d1a091b93192 100644
--- a/drivers/platform/x86/ideapad-laptop.c
+++ b/drivers/platform/x86/ideapad-laptop.c
@@ -567,6 +567,7 @@ static void ideapad_sysfs_exit(struct ideapad_private *priv)
 static const struct key_entry ideapad_keymap[] = {
 	{ KE_KEY, 6,  { KEY_SWITCHVIDEOMODE } },
 	{ KE_KEY, 7,  { KEY_CAMERA } },
+	{ KE_KEY, 8,  { KEY_MICMUTE } },
 	{ KE_KEY, 11, { KEY_F16 } },
 	{ KE_KEY, 13, { KEY_WLAN } },
 	{ KE_KEY, 16, { KEY_PROG1 } },
@@ -809,6 +810,7 @@ static void ideapad_acpi_notify(acpi_handle handle, u32 event, void *data)
 				break;
 			case 13:
 			case 11:
+			case 8:
 			case 7:
 			case 6:
 				ideapad_input_report(priv, vpc_bit);
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index c3bfa1fe95bf..b65ce7519411 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -2043,6 +2043,7 @@ static int hotkey_autosleep_ack;
 
 static u32 hotkey_orig_mask;		/* events the BIOS had enabled */
 static u32 hotkey_all_mask;		/* all events supported in fw */
+static u32 hotkey_adaptive_all_mask;	/* all adaptive events supported in fw */
 static u32 hotkey_reserved_mask;	/* events better left disabled */
 static u32 hotkey_driver_mask;		/* events needed by the driver */
 static u32 hotkey_user_mask;		/* events visible to userspace */
@@ -2742,6 +2743,17 @@ static ssize_t hotkey_all_mask_show(struct device *dev,
 
 static DEVICE_ATTR_RO(hotkey_all_mask);
 
+/* sysfs hotkey all_mask ----------------------------------------------- */
+static ssize_t hotkey_adaptive_all_mask_show(struct device *dev,
+			   struct device_attribute *attr,
+			   char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "0x%08x\n",
+			hotkey_adaptive_all_mask | hotkey_source_mask);
+}
+
+static DEVICE_ATTR_RO(hotkey_adaptive_all_mask);
+
 /* sysfs hotkey recommended_mask --------------------------------------- */
 static ssize_t hotkey_recommended_mask_show(struct device *dev,
 					    struct device_attribute *attr,
@@ -2985,6 +2997,7 @@ static struct attribute *hotkey_attributes[] __initdata = {
 	&dev_attr_wakeup_hotunplug_complete.attr,
 	&dev_attr_hotkey_mask.attr,
 	&dev_attr_hotkey_all_mask.attr,
+	&dev_attr_hotkey_adaptive_all_mask.attr,
 	&dev_attr_hotkey_recommended_mask.attr,
 #ifdef CONFIG_THINKPAD_ACPI_HOTKEY_POLL
 	&dev_attr_hotkey_source_mask.attr,
@@ -3321,20 +3334,6 @@ static int __init hotkey_init(struct ibm_init_struct *iibm)
 	if (!tp_features.hotkey)
 		return 1;
 
-	/*
-	 * Check if we have an adaptive keyboard, like on the
-	 * Lenovo Carbon X1 2014 (2nd Gen).
-	 */
-	if (acpi_evalf(hkey_handle, &hkeyv, "MHKV", "qd")) {
-		if ((hkeyv >> 8) == 2) {
-			tp_features.has_adaptive_kbd = true;
-			res = sysfs_create_group(&tpacpi_pdev->dev.kobj,
-					&adaptive_kbd_attr_group);
-			if (res)
-				goto err_exit;
-		}
-	}
-
 	quirks = tpacpi_check_quirks(tpacpi_hotkey_qtable,
 				     ARRAY_SIZE(tpacpi_hotkey_qtable));
 
@@ -3357,30 +3356,70 @@ static int __init hotkey_init(struct ibm_init_struct *iibm)
 	   A30, R30, R31, T20-22, X20-21, X22-24.  Detected by checking
 	   for HKEY interface version 0x100 */
 	if (acpi_evalf(hkey_handle, &hkeyv, "MHKV", "qd")) {
-		if ((hkeyv >> 8) != 1) {
-			pr_err("unknown version of the HKEY interface: 0x%x\n",
-			       hkeyv);
-			pr_err("please report this to %s\n", TPACPI_MAIL);
-		} else {
+		vdbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_HKEY,
+			    "firmware HKEY interface version: 0x%x\n",
+			    hkeyv);
+
+		switch (hkeyv >> 8) {
+		case 1:
 			/*
 			 * MHKV 0x100 in A31, R40, R40e,
 			 * T4x, X31, and later
 			 */
-			vdbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_HKEY,
-				"firmware HKEY interface version: 0x%x\n",
-				hkeyv);
 
 			/* Paranoia check AND init hotkey_all_mask */
 			if (!acpi_evalf(hkey_handle, &hotkey_all_mask,
 					"MHKA", "qd")) {
-				pr_err("missing MHKA handler, "
-				       "please report this to %s\n",
+				pr_err("missing MHKA handler, please report this to %s\n",
 				       TPACPI_MAIL);
 				/* Fallback: pre-init for FN+F3,F4,F12 */
 				hotkey_all_mask = 0x080cU;
 			} else {
 				tp_features.hotkey_mask = 1;
 			}
+			break;
+
+		case 2:
+			/*
+			 * MHKV 0x200 in X1, T460s, X260, T560, X1 Tablet (2016)
+			 */
+
+			/* Paranoia check AND init hotkey_all_mask */
+			if (!acpi_evalf(hkey_handle, &hotkey_all_mask,
+					"MHKA", "dd", 1)) {
+				pr_err("missing MHKA handler, please report this to %s\n",
+				       TPACPI_MAIL);
+				/* Fallback: pre-init for FN+F3,F4,F12 */
+				hotkey_all_mask = 0x080cU;
+			} else {
+				tp_features.hotkey_mask = 1;
+			}
+
+			/*
+			 * Check if we have an adaptive keyboard, like on the
+			 * Lenovo Carbon X1 2014 (2nd Gen).
+			 */
+			if (acpi_evalf(hkey_handle, &hotkey_adaptive_all_mask,
+				       "MHKA", "dd", 2)) {
+				if (hotkey_adaptive_all_mask != 0) {
+					tp_features.has_adaptive_kbd = true;
+					res = sysfs_create_group(
+						&tpacpi_pdev->dev.kobj,
+						&adaptive_kbd_attr_group);
+					if (res)
+						goto err_exit;
+				}
+			} else {
+				tp_features.has_adaptive_kbd = false;
+				hotkey_adaptive_all_mask = 0x0U;
+			}
+			break;
+
+		default:
+			pr_err("unknown version of the HKEY interface: 0x%x\n",
+			       hkeyv);
+			pr_err("please report this to %s\n", TPACPI_MAIL);
+			break;
 		}
 	}
 
diff --git a/drivers/power/power_supply_core.c b/drivers/power/power_supply_core.c
index 456987c88baa..b13cd074c52a 100644
--- a/drivers/power/power_supply_core.c
+++ b/drivers/power/power_supply_core.c
@@ -565,11 +565,12 @@ static int power_supply_read_temp(struct thermal_zone_device *tzd,
 
 	WARN_ON(tzd == NULL);
 	psy = tzd->devdata;
-	ret = psy->desc->get_property(psy, POWER_SUPPLY_PROP_TEMP, &val);
+	ret = power_supply_get_property(psy, POWER_SUPPLY_PROP_TEMP, &val);
+	if (ret)
+		return ret;
 
 	/* Convert tenths of degree Celsius to milli degree Celsius. */
-	if (!ret)
-		*temp = val.intval * 100;
+	*temp = val.intval * 100;
 
 	return ret;
 }
@@ -612,10 +613,12 @@ static int ps_get_max_charge_cntl_limit(struct thermal_cooling_device *tcd,
 	int ret;
 
 	psy = tcd->devdata;
-	ret = psy->desc->get_property(psy,
-		POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT_MAX, &val);
-	if (!ret)
-		*state = val.intval;
+	ret = power_supply_get_property(psy,
+			POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT_MAX, &val);
+	if (ret)
+		return ret;
+
+	*state = val.intval;
 
 	return ret;
 }
@@ -628,10 +631,12 @@ static int ps_get_cur_chrage_cntl_limit(struct thermal_cooling_device *tcd,
 	int ret;
 
 	psy = tcd->devdata;
-	ret = psy->desc->get_property(psy,
-		POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT, &val);
-	if (!ret)
-		*state = val.intval;
+	ret = power_supply_get_property(psy,
+			POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT, &val);
+	if (ret)
+		return ret;
+
+	*state = val.intval;
 
 	return ret;
 }
diff --git a/drivers/power/tps65217_charger.c b/drivers/power/tps65217_charger.c
index d9f56730c735..73dfae41def8 100644
--- a/drivers/power/tps65217_charger.c
+++ b/drivers/power/tps65217_charger.c
@@ -197,6 +197,7 @@ static int tps65217_charger_probe(struct platform_device *pdev)
 {
 	struct tps65217 *tps = dev_get_drvdata(pdev->dev.parent);
 	struct tps65217_charger *charger;
+	struct power_supply_config cfg = {};
 	int ret;
 
 	dev_dbg(&pdev->dev, "%s\n", __func__);
@@ -208,9 +209,12 @@ static int tps65217_charger_probe(struct platform_device *pdev)
 	charger->tps = tps;
 	charger->dev = &pdev->dev;
 
+	cfg.of_node = pdev->dev.of_node;
+	cfg.drv_data = charger;
+
 	charger->ac = devm_power_supply_register(&pdev->dev,
 						 &tps65217_charger_desc,
-						 NULL);
+						 &cfg);
 	if (IS_ERR(charger->ac)) {
 		dev_err(&pdev->dev, "failed: power supply register\n");
 		return PTR_ERR(charger->ac);
diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
index dba3843c53b8..ed337a8c34ab 100644
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c
@@ -457,7 +457,8 @@ int pwm_apply_state(struct pwm_device *pwm, struct pwm_state *state)
 {
 	int err;
 
-	if (!pwm)
+	if (!pwm || !state || !state->period ||
+	    state->duty_cycle > state->period)
 		return -EINVAL;
 
 	if (!memcmp(state, &pwm->state, sizeof(*state)))
diff --git a/drivers/pwm/pwm-atmel-hlcdc.c b/drivers/pwm/pwm-atmel-hlcdc.c
index f994c7eaf41c..14fc011faa32 100644
--- a/drivers/pwm/pwm-atmel-hlcdc.c
+++ b/drivers/pwm/pwm-atmel-hlcdc.c
@@ -272,7 +272,7 @@ static int atmel_hlcdc_pwm_probe(struct platform_device *pdev)
 	chip->chip.of_pwm_n_cells = 3;
 	chip->chip.can_sleep = 1;
 
-	ret = pwmchip_add(&chip->chip);
+	ret = pwmchip_add_with_polarity(&chip->chip, PWM_POLARITY_INVERSED);
 	if (ret) {
 		clk_disable_unprepare(hlcdc->periph_clk);
 		return ret;
diff --git a/drivers/pwm/sysfs.c b/drivers/pwm/sysfs.c
index d98599249a05..01695d48dd54 100644
--- a/drivers/pwm/sysfs.c
+++ b/drivers/pwm/sysfs.c
@@ -152,7 +152,7 @@ static ssize_t enable_store(struct device *child,
 		goto unlock;
 	}
 
-	pwm_apply_state(pwm, &state);
+	ret = pwm_apply_state(pwm, &state);
 
 unlock:
 	mutex_unlock(&export->lock);
diff --git a/drivers/regulator/anatop-regulator.c b/drivers/regulator/anatop-regulator.c
index 63cd5e68c864..3a6d0290c54c 100644
--- a/drivers/regulator/anatop-regulator.c
+++ b/drivers/regulator/anatop-regulator.c
@@ -296,7 +296,7 @@ static int anatop_regulator_probe(struct platform_device *pdev)
 		if (!sreg->sel && !strcmp(sreg->name, "vddpu"))
 			sreg->sel = 22;
 
-		if (!sreg->sel) {
+		if (!sreg->bypass && !sreg->sel) {
 			dev_err(&pdev->dev, "Failed to read a valid default voltage selector.\n");
 			return -EINVAL;
 		}
diff --git a/drivers/regulator/max77620-regulator.c b/drivers/regulator/max77620-regulator.c
index 321e804aeab0..a1b49a6d538f 100644
--- a/drivers/regulator/max77620-regulator.c
+++ b/drivers/regulator/max77620-regulator.c
@@ -123,6 +123,9 @@ static int max77620_regulator_set_fps_src(struct max77620_regulator *pmic,
 	unsigned int val;
 	int ret;
 
+	if (!rinfo)
+		return 0;
+
 	switch (fps_src) {
 	case MAX77620_FPS_SRC_0:
 	case MAX77620_FPS_SRC_1:
@@ -171,6 +174,9 @@ static int max77620_regulator_set_fps_slots(struct max77620_regulator *pmic,
 	int pd = rpdata->active_fps_pd_slot;
 	int ret = 0;
 
+	if (!rinfo)
+		return 0;
+
 	if (is_suspend) {
 		pu = rpdata->suspend_fps_pu_slot;
 		pd = rpdata->suspend_fps_pd_slot;
@@ -680,7 +686,6 @@ static struct max77620_regulator_info max77620_regs_info[MAX77620_NUM_REGS] = {
 	RAIL_SD(SD1, sd1, "in-sd1", SD1, 600000, 1550000, 12500, 0x22, SD1),
 	RAIL_SD(SD2, sd2, "in-sd2", SDX, 600000, 3787500, 12500, 0xFF, NONE),
 	RAIL_SD(SD3, sd3, "in-sd3", SDX, 600000, 3787500, 12500, 0xFF, NONE),
-	RAIL_SD(SD4, sd4, "in-sd4", SDX, 600000, 3787500, 12500, 0xFF, NONE),
 
 	RAIL_LDO(LDO0, ldo0, "in-ldo0-1", N, 800000, 2375000, 25000),
 	RAIL_LDO(LDO1, ldo1, "in-ldo0-1", N, 800000, 2375000, 25000),
diff --git a/drivers/regulator/qcom_smd-regulator.c b/drivers/regulator/qcom_smd-regulator.c
index 56a17ec5b5ef..526bf23dcb49 100644
--- a/drivers/regulator/qcom_smd-regulator.c
+++ b/drivers/regulator/qcom_smd-regulator.c
@@ -140,6 +140,19 @@ static const struct regulator_ops rpm_smps_ldo_ops = {
 	.enable = rpm_reg_enable,
 	.disable = rpm_reg_disable,
 	.is_enabled = rpm_reg_is_enabled,
+	.list_voltage = regulator_list_voltage_linear_range,
+
+	.get_voltage = rpm_reg_get_voltage,
+	.set_voltage = rpm_reg_set_voltage,
+
+	.set_load = rpm_reg_set_load,
+};
+
+static const struct regulator_ops rpm_smps_ldo_ops_fixed = {
+	.enable = rpm_reg_enable,
+	.disable = rpm_reg_disable,
+	.is_enabled = rpm_reg_is_enabled,
+	.list_voltage = regulator_list_voltage_linear_range,
 
 	.get_voltage = rpm_reg_get_voltage,
 	.set_voltage = rpm_reg_set_voltage,
@@ -247,7 +260,7 @@ static const struct regulator_desc pm8941_nldo = {
 static const struct regulator_desc pm8941_lnldo = {
 	.fixed_uV = 1740000,
 	.n_voltages = 1,
-	.ops = &rpm_smps_ldo_ops,
+	.ops = &rpm_smps_ldo_ops_fixed,
 };
 
 static const struct regulator_desc pm8941_switch = {
diff --git a/drivers/regulator/tps51632-regulator.c b/drivers/regulator/tps51632-regulator.c
index 572816e30095..c139890c1514 100644
--- a/drivers/regulator/tps51632-regulator.c
+++ b/drivers/regulator/tps51632-regulator.c
@@ -94,11 +94,14 @@ static int tps51632_dcdc_set_ramp_delay(struct regulator_dev *rdev,
 		int ramp_delay)
 {
 	struct tps51632_chip *tps = rdev_get_drvdata(rdev);
-	int bit = ramp_delay/6000;
+	int bit;
 	int ret;
 
-	if (bit)
-		bit--;
+	if (ramp_delay == 0)
+		bit = 0;
+	else
+		bit = DIV_ROUND_UP(ramp_delay, 6000) - 1;
+
 	ret = regmap_write(tps->regmap, TPS51632_SLEW_REGS, BIT(bit));
 	if (ret < 0)
 		dev_err(tps->dev, "SLEW reg write failed, err %d\n", ret);
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 9fd48de38a4c..7bc20c5188bc 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -1045,6 +1045,7 @@ static void qeth_l2_remove_device(struct ccwgroup_device *cgdev)
 		qeth_l2_set_offline(cgdev);
 
 	if (card->dev) {
+		netif_napi_del(&card->napi);
 		unregister_netdev(card->dev);
 		card->dev = NULL;
 	}
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index bcd324e054a9..72934666fedf 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -3167,6 +3167,7 @@ static void qeth_l3_remove_device(struct ccwgroup_device *cgdev)
 		qeth_l3_set_offline(cgdev);
 
 	if (card->dev) {
+		netif_napi_del(&card->napi);
 		unregister_netdev(card->dev);
 		card->dev = NULL;
 	}
diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c
index d4c285688ce9..3ddc85e6efd6 100644
--- a/drivers/scsi/53c700.c
+++ b/drivers/scsi/53c700.c
@@ -1122,7 +1122,7 @@ process_script_interrupt(__u32 dsps, __u32 dsp, struct scsi_cmnd *SCp,
 		} else {
 			struct scsi_cmnd *SCp;
 
-			SCp = scsi_host_find_tag(SDp->host, SCSI_NO_TAG);
+			SCp = SDp->current_cmnd;
 			if(unlikely(SCp == NULL)) {
 				sdev_printk(KERN_ERR, SDp,
 					"no saved request for untagged cmd\n");
@@ -1826,7 +1826,7 @@ NCR_700_queuecommand_lck(struct scsi_cmnd *SCp, void (*done)(struct scsi_cmnd *)
 		       slot->tag, slot);
 	} else {
 		slot->tag = SCSI_NO_TAG;
-		/* must populate current_cmnd for scsi_host_find_tag to work */
+		/* save current command for reselection */
 		SCp->device->current_cmnd = SCp;
 	}
 	/* sanity check: some of the commands generated by the mid-layer
diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c
index 3408578b08d6..ff41c310c900 100644
--- a/drivers/scsi/scsi_devinfo.c
+++ b/drivers/scsi/scsi_devinfo.c
@@ -230,6 +230,7 @@ static struct {
 	{"PIONEER", "CD-ROM DRM-624X", NULL, BLIST_FORCELUN | BLIST_SINGLELUN},
 	{"Promise", "VTrak E610f", NULL, BLIST_SPARSELUN | BLIST_NO_RSOC},
 	{"Promise", "", NULL, BLIST_SPARSELUN},
+	{"QEMU", "QEMU CD-ROM", NULL, BLIST_SKIP_VPD_PAGES},
 	{"QNAP", "iSCSI Storage", NULL, BLIST_MAX_1024},
 	{"SYNOLOGY", "iSCSI Storage", NULL, BLIST_MAX_1024},
 	{"QUANTUM", "XP34301", "1071", BLIST_NOTQ},
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index a8b610eaa0ca..106a6adbd6f1 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -1128,7 +1128,6 @@ static int scsi_eh_action(struct scsi_cmnd *scmd, int rtn)
  */
 void scsi_eh_finish_cmd(struct scsi_cmnd *scmd, struct list_head *done_q)
 {
-	scmd->device->host->host_failed--;
 	scmd->eh_eflags = 0;
 	list_move_tail(&scmd->eh_entry, done_q);
 }
@@ -2227,6 +2226,9 @@ int scsi_error_handler(void *data)
 		else
 			scsi_unjam_host(shost);
 
+		/* All scmds have been handled */
+		shost->host_failed = 0;
+
 		/*
 		 * Note - if the above fails completely, the action is to take
 		 * individual devices offline and flush the queue of any
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index f459dff30512..60bff78e9ead 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -2867,10 +2867,10 @@ static int sd_revalidate_disk(struct gendisk *disk)
 	if (sdkp->opt_xfer_blocks &&
 	    sdkp->opt_xfer_blocks <= dev_max &&
 	    sdkp->opt_xfer_blocks <= SD_DEF_XFER_BLOCKS &&
-	    sdkp->opt_xfer_blocks * sdp->sector_size >= PAGE_SIZE)
-		rw_max = q->limits.io_opt =
-			sdkp->opt_xfer_blocks * sdp->sector_size;
-	else
+	    logical_to_bytes(sdp, sdkp->opt_xfer_blocks) >= PAGE_SIZE) {
+		q->limits.io_opt = logical_to_bytes(sdp, sdkp->opt_xfer_blocks);
+		rw_max = logical_to_sectors(sdp, sdkp->opt_xfer_blocks);
+	} else
 		rw_max = BLK_DEF_MAX_SECTORS;
 
 	/* Combine with controller limits */
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 654630bb7d0e..765a6f1ac1b7 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -151,6 +151,11 @@ static inline sector_t logical_to_sectors(struct scsi_device *sdev, sector_t blo
 	return blocks << (ilog2(sdev->sector_size) - 9);
 }
 
+static inline unsigned int logical_to_bytes(struct scsi_device *sdev, sector_t blocks)
+{
+	return blocks * sdev->sector_size;
+}
+
 /*
  * A DIF-capable target device can be formatted with different
  * protection schemes.  Currently 0 through 3 are defined:
diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c
index cd89682065b9..1026e180eed7 100644
--- a/drivers/spi/spi-rockchip.c
+++ b/drivers/spi/spi-rockchip.c
@@ -578,7 +578,7 @@ static int rockchip_spi_transfer_one(
 		struct spi_device *spi,
 		struct spi_transfer *xfer)
 {
-	int ret = 1;
+	int ret = 0;
 	struct rockchip_spi *rs = spi_master_get_devdata(master);
 
 	WARN_ON(readl_relaxed(rs->regs + ROCKCHIP_SPI_SSIENR) &&
@@ -627,6 +627,8 @@ static int rockchip_spi_transfer_one(
 			spi_enable_chip(rs, 1);
 			ret = rockchip_spi_prepare_dma(rs);
 		}
+		/* successful DMA prepare means the transfer is in progress */
+		ret = ret ? ret : 1;
 	} else {
 		spi_enable_chip(rs, 1);
 		ret = rockchip_spi_pio_transfer(rs);
diff --git a/drivers/spi/spi-sun4i.c b/drivers/spi/spi-sun4i.c
index 1ddd9e2309b6..cf007f3b83ec 100644
--- a/drivers/spi/spi-sun4i.c
+++ b/drivers/spi/spi-sun4i.c
@@ -173,13 +173,17 @@ static int sun4i_spi_transfer_one(struct spi_master *master,
 {
 	struct sun4i_spi *sspi = spi_master_get_devdata(master);
 	unsigned int mclk_rate, div, timeout;
+	unsigned int start, end, tx_time;
 	unsigned int tx_len = 0;
 	int ret = 0;
 	u32 reg;
 
 	/* We don't support transfer larger than the FIFO */
 	if (tfr->len > SUN4I_FIFO_DEPTH)
-		return -EINVAL;
+		return -EMSGSIZE;
+
+	if (tfr->tx_buf && tfr->len >= SUN4I_FIFO_DEPTH)
+		return -EMSGSIZE;
 
 	reinit_completion(&sspi->done);
 	sspi->tx_buf = tfr->tx_buf;
@@ -269,8 +273,12 @@ static int sun4i_spi_transfer_one(struct spi_master *master,
 	sun4i_spi_write(sspi, SUN4I_BURST_CNT_REG, SUN4I_BURST_CNT(tfr->len));
 	sun4i_spi_write(sspi, SUN4I_XMIT_CNT_REG, SUN4I_XMIT_CNT(tx_len));
 
-	/* Fill the TX FIFO */
-	sun4i_spi_fill_fifo(sspi, SUN4I_FIFO_DEPTH);
+	/*
+	 * Fill the TX FIFO
+	 * Filling the FIFO fully causes timeout for some reason
+	 * at least on spi2 on A10s
+	 */
+	sun4i_spi_fill_fifo(sspi, SUN4I_FIFO_DEPTH - 1);
 
 	/* Enable the interrupts */
 	sun4i_spi_write(sspi, SUN4I_INT_CTL_REG, SUN4I_INT_CTL_TC);
@@ -279,9 +287,16 @@ static int sun4i_spi_transfer_one(struct spi_master *master,
 	reg = sun4i_spi_read(sspi, SUN4I_CTL_REG);
 	sun4i_spi_write(sspi, SUN4I_CTL_REG, reg | SUN4I_CTL_XCH);
 
+	tx_time = max(tfr->len * 8 * 2 / (tfr->speed_hz / 1000), 100U);
+	start = jiffies;
 	timeout = wait_for_completion_timeout(&sspi->done,
-					      msecs_to_jiffies(1000));
+					      msecs_to_jiffies(tx_time));
+	end = jiffies;
 	if (!timeout) {
+		dev_warn(&master->dev,
+			 "%s: timeout transferring %u bytes@%iHz for %i(%i)ms",
+			 dev_name(&spi->dev), tfr->len, tfr->speed_hz,
+			 jiffies_to_msecs(end - start), tx_time);
 		ret = -ETIMEDOUT;
 		goto out;
 	}
diff --git a/drivers/spi/spi-sun6i.c b/drivers/spi/spi-sun6i.c
index 42e2c4bd690a..7fce79a60608 100644
--- a/drivers/spi/spi-sun6i.c
+++ b/drivers/spi/spi-sun6i.c
@@ -160,6 +160,7 @@ static int sun6i_spi_transfer_one(struct spi_master *master,
 {
 	struct sun6i_spi *sspi = spi_master_get_devdata(master);
 	unsigned int mclk_rate, div, timeout;
+	unsigned int start, end, tx_time;
 	unsigned int tx_len = 0;
 	int ret = 0;
 	u32 reg;
@@ -269,9 +270,16 @@ static int sun6i_spi_transfer_one(struct spi_master *master,
 	reg = sun6i_spi_read(sspi, SUN6I_TFR_CTL_REG);
 	sun6i_spi_write(sspi, SUN6I_TFR_CTL_REG, reg | SUN6I_TFR_CTL_XCH);
 
+	tx_time = max(tfr->len * 8 * 2 / (tfr->speed_hz / 1000), 100U);
+	start = jiffies;
 	timeout = wait_for_completion_timeout(&sspi->done,
-					      msecs_to_jiffies(1000));
+					      msecs_to_jiffies(tx_time));
+	end = jiffies;
 	if (!timeout) {
+		dev_warn(&master->dev,
+			 "%s: timeout transferring %u bytes@%iHz for %i(%i)ms",
+			 dev_name(&spi->dev), tfr->len, tfr->speed_hz,
+			 jiffies_to_msecs(end - start), tx_time);
 		ret = -ETIMEDOUT;
 		goto out;
 	}
diff --git a/drivers/spi/spi-ti-qspi.c b/drivers/spi/spi-ti-qspi.c
index 443f664534e1..29ea8d2f9824 100644
--- a/drivers/spi/spi-ti-qspi.c
+++ b/drivers/spi/spi-ti-qspi.c
@@ -646,6 +646,13 @@ free_master:
 
 static int ti_qspi_remove(struct platform_device *pdev)
 {
+	struct ti_qspi *qspi = platform_get_drvdata(pdev);
+	int rc;
+
+	rc = spi_master_suspend(qspi->master);
+	if (rc)
+		return rc;
+
 	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 
diff --git a/drivers/staging/iio/accel/sca3000_core.c b/drivers/staging/iio/accel/sca3000_core.c
index a8f533af9eca..ec12181822e6 100644
--- a/drivers/staging/iio/accel/sca3000_core.c
+++ b/drivers/staging/iio/accel/sca3000_core.c
@@ -594,7 +594,7 @@ static ssize_t sca3000_read_frequency(struct device *dev,
 		goto error_ret_mut;
 	ret = sca3000_read_ctrl_reg(st, SCA3000_REG_CTRL_SEL_OUT_CTRL);
 	mutex_unlock(&st->lock);
-	if (ret)
+	if (ret < 0)
 		goto error_ret;
 	val = ret;
 	if (base_freq > 0)
diff --git a/drivers/staging/iio/adc/ad7606_spi.c b/drivers/staging/iio/adc/ad7606_spi.c
index 825da0769936..9587fa86dc69 100644
--- a/drivers/staging/iio/adc/ad7606_spi.c
+++ b/drivers/staging/iio/adc/ad7606_spi.c
@@ -21,7 +21,7 @@ static int ad7606_spi_read_block(struct device *dev,
 {
 	struct spi_device *spi = to_spi_device(dev);
 	int i, ret;
-	unsigned short *data;
+	unsigned short *data = buf;
 	__be16 *bdata = buf;
 
 	ret = spi_read(spi, buf, count * 2);
diff --git a/drivers/staging/iio/impedance-analyzer/ad5933.c b/drivers/staging/iio/impedance-analyzer/ad5933.c
index 9f43976f4ef2..170ac980abcb 100644
--- a/drivers/staging/iio/impedance-analyzer/ad5933.c
+++ b/drivers/staging/iio/impedance-analyzer/ad5933.c
@@ -444,10 +444,10 @@ static ssize_t ad5933_store(struct device *dev,
 		st->settling_cycles = val;
 
 		/* 2x, 4x handling, see datasheet */
-		if (val > 511)
-			val = (val >> 1) | (1 << 9);
-		else if (val > 1022)
+		if (val > 1022)
 			val = (val >> 2) | (3 << 9);
+		else if (val > 511)
+			val = (val >> 1) | (1 << 9);
 
 		dat = cpu_to_be16(val);
 		ret = ad5933_i2c_write(st->client,
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
index bbfee53cfcf5..845e49a52430 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
@@ -2521,12 +2521,13 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
 	return 0;
 
  failed:
-	if (ni)
+	if (ni) {
 		lnet_ni_decref(ni);
+		rej.ibr_cp.ibcp_queue_depth = kiblnd_msg_queue_size(version, ni);
+		rej.ibr_cp.ibcp_max_frags = kiblnd_rdma_frags(version, ni);
+	}
 
 	rej.ibr_version             = version;
-	rej.ibr_cp.ibcp_queue_depth = kiblnd_msg_queue_size(version, ni);
-	rej.ibr_cp.ibcp_max_frags = kiblnd_rdma_frags(version, ni);
 	kiblnd_reject(cmid, &rej);
 
 	return -ECONNREFUSED;
diff --git a/drivers/staging/rtl8188eu/core/rtw_efuse.c b/drivers/staging/rtl8188eu/core/rtw_efuse.c
index c17870cddb5b..fbce1f7e68ca 100644
--- a/drivers/staging/rtl8188eu/core/rtw_efuse.c
+++ b/drivers/staging/rtl8188eu/core/rtw_efuse.c
@@ -102,7 +102,7 @@ efuse_phymap_to_logical(u8 *phymap, u16 _offset, u16 _size_byte, u8  *pbuf)
 	if (!efuseTbl)
 		return;
 
-	eFuseWord = (u16 **)rtw_malloc2d(EFUSE_MAX_SECTION_88E, EFUSE_MAX_WORD_UNIT, sizeof(*eFuseWord));
+	eFuseWord = (u16 **)rtw_malloc2d(EFUSE_MAX_SECTION_88E, EFUSE_MAX_WORD_UNIT, sizeof(u16));
 	if (!eFuseWord) {
 		DBG_88E("%s: alloc eFuseWord fail!\n", __func__);
 		goto eFuseWord_failed;
diff --git a/drivers/staging/rtl8188eu/hal/usb_halinit.c b/drivers/staging/rtl8188eu/hal/usb_halinit.c
index 87ea3b844951..363f3a34ddce 100644
--- a/drivers/staging/rtl8188eu/hal/usb_halinit.c
+++ b/drivers/staging/rtl8188eu/hal/usb_halinit.c
@@ -2072,7 +2072,8 @@ void rtl8188eu_set_hal_ops(struct adapter *adapt)
 {
 	struct hal_ops	*halfunc = &adapt->HalFunc;
 
-	adapt->HalData = kzalloc(sizeof(*adapt->HalData), GFP_KERNEL);
+
+	adapt->HalData = kzalloc(sizeof(struct hal_data_8188e), GFP_KERNEL);
 	if (!adapt->HalData)
 		DBG_88E("cant not alloc memory for HAL DATA\n");
 
diff --git a/drivers/staging/rtl8723au/os_dep/ioctl_cfg80211.c b/drivers/staging/rtl8723au/os_dep/ioctl_cfg80211.c
index 0da559d929bc..d0ba3778990e 100644
--- a/drivers/staging/rtl8723au/os_dep/ioctl_cfg80211.c
+++ b/drivers/staging/rtl8723au/os_dep/ioctl_cfg80211.c
@@ -1256,10 +1256,15 @@ void rtw_cfg80211_indicate_scan_done(struct rtw_wdev_priv *pwdev_priv,
 		DBG_8723A("%s with scan req\n", __func__);
 
 		if (pwdev_priv->scan_request->wiphy !=
-		    pwdev_priv->rtw_wdev->wiphy)
+		    pwdev_priv->rtw_wdev->wiphy) {
 			DBG_8723A("error wiphy compare\n");
-		else
-			cfg80211_scan_done(pwdev_priv->scan_request, aborted);
+		} else {
+			struct cfg80211_scan_info info = {
+				.aborted = aborted,
+			};
+
+			cfg80211_scan_done(pwdev_priv->scan_request, &info);
+		}
 
 		pwdev_priv->scan_request = NULL;
 	} else {
diff --git a/drivers/staging/wilc1000/wilc_wfi_cfgoperations.c b/drivers/staging/wilc1000/wilc_wfi_cfgoperations.c
index 51aff4ff7d7c..a0d8e22e575b 100644
--- a/drivers/staging/wilc1000/wilc_wfi_cfgoperations.c
+++ b/drivers/staging/wilc1000/wilc_wfi_cfgoperations.c
@@ -454,7 +454,11 @@ static void CfgScanResult(enum scan_event scan_event,
 			mutex_lock(&priv->scan_req_lock);
 
 			if (priv->pstrScanReq) {
-				cfg80211_scan_done(priv->pstrScanReq, false);
+				struct cfg80211_scan_info info = {
+					.aborted = false,
+				};
+
+				cfg80211_scan_done(priv->pstrScanReq, &info);
 				priv->u32RcvdChCount = 0;
 				priv->bCfgScanning = false;
 				priv->pstrScanReq = NULL;
@@ -464,10 +468,14 @@ static void CfgScanResult(enum scan_event scan_event,
 			mutex_lock(&priv->scan_req_lock);
 
 			if (priv->pstrScanReq) {
+				struct cfg80211_scan_info info = {
+					.aborted = false,
+				};
+
 				update_scan_time();
 				refresh_scan(priv, 1, false);
 
-				cfg80211_scan_done(priv->pstrScanReq, false);
+				cfg80211_scan_done(priv->pstrScanReq, &info);
 				priv->bCfgScanning = false;
 				priv->pstrScanReq = NULL;
 			}
diff --git a/drivers/staging/wlan-ng/cfg80211.c b/drivers/staging/wlan-ng/cfg80211.c
index a6e6fb9f42e1..f46dfe6b24e8 100644
--- a/drivers/staging/wlan-ng/cfg80211.c
+++ b/drivers/staging/wlan-ng/cfg80211.c
@@ -338,6 +338,8 @@ static int prism2_scan(struct wiphy *wiphy,
 	struct p80211msg_dot11req_scan msg1;
 	struct p80211msg_dot11req_scan_results msg2;
 	struct cfg80211_bss *bss;
+	struct cfg80211_scan_info info = {};
+
 	int result;
 	int err = 0;
 	int numbss = 0;
@@ -440,7 +442,8 @@ static int prism2_scan(struct wiphy *wiphy,
 		err = prism2_result2err(msg2.resultcode.data);
 
 exit:
-	cfg80211_scan_done(request, err ? 1 : 0);
+	info.aborted = !!(err);
+	cfg80211_scan_done(request, &info);
 	priv->scan_request = NULL;
 	return err;
 }
diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index 6ceac4f2d4b2..5b4b47ed948b 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -857,14 +857,6 @@ __cpufreq_cooling_register(struct device_node *np,
 		goto free_power_table;
 	}
 
-	snprintf(dev_name, sizeof(dev_name), "thermal-cpufreq-%d",
-		 cpufreq_dev->id);
-
-	cool_dev = thermal_of_cooling_device_register(np, dev_name, cpufreq_dev,
-						      &cpufreq_cooling_ops);
-	if (IS_ERR(cool_dev))
-		goto remove_idr;
-
 	/* Fill freq-table in descending order of frequencies */
 	for (i = 0, freq = -1; i <= cpufreq_dev->max_level; i++) {
 		freq = find_next_max(table, freq);
@@ -877,6 +869,14 @@ __cpufreq_cooling_register(struct device_node *np,
 			pr_debug("%s: freq:%u KHz\n", __func__, freq);
 	}
 
+	snprintf(dev_name, sizeof(dev_name), "thermal-cpufreq-%d",
+		 cpufreq_dev->id);
+
+	cool_dev = thermal_of_cooling_device_register(np, dev_name, cpufreq_dev,
+						      &cpufreq_cooling_ops);
+	if (IS_ERR(cool_dev))
+		goto remove_idr;
+
 	cpufreq_dev->clipped_freq = cpufreq_dev->freq_table[0];
 	cpufreq_dev->cool_dev = cool_dev;
 
diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c
index f856c4544eea..51e0d32883ba 100644
--- a/drivers/tty/pty.c
+++ b/drivers/tty/pty.c
@@ -667,8 +667,11 @@ static void pty_unix98_remove(struct tty_driver *driver, struct tty_struct *tty)
 		fsi = tty->driver_data;
 	else
 		fsi = tty->link->driver_data;
-	devpts_kill_index(fsi, tty->index);
-	devpts_release(fsi);
+
+	if (fsi) {
+		devpts_kill_index(fsi, tty->index);
+		devpts_release(fsi);
+	}
 }
 
 static const struct tty_operations ptm_unix98_ops = {
diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index dc125322f48f..5b0fe97c46ca 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -750,6 +750,7 @@ static void visual_init(struct vc_data *vc, int num, int init)
 	vc->vc_complement_mask = 0;
 	vc->vc_can_do_color = 0;
 	vc->vc_panic_force_write = false;
+	vc->vc_cur_blink_ms = DEFAULT_CURSOR_BLINK_MS;
 	vc->vc_sw->con_init(vc, init);
 	if (!vc->vc_complement_mask)
 		vc->vc_complement_mask = vc->vc_can_do_color ? 0x7700 : 0x0800;
diff --git a/drivers/usb/common/usb-otg-fsm.c b/drivers/usb/common/usb-otg-fsm.c
index 9059b7dc185e..2f537bbdda09 100644
--- a/drivers/usb/common/usb-otg-fsm.c
+++ b/drivers/usb/common/usb-otg-fsm.c
@@ -21,6 +21,7 @@
  * 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
+#include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/mutex.h>
@@ -450,3 +451,4 @@ int otg_statemachine(struct otg_fsm *fsm)
 	return fsm->state_changed;
 }
 EXPORT_SYMBOL_GPL(otg_statemachine);
+MODULE_LICENSE("GPL");
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 34b837ae1ed7..d2e3f655c26f 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -2598,26 +2598,23 @@ EXPORT_SYMBOL_GPL(usb_create_hcd);
  * Don't deallocate the bandwidth_mutex until the last shared usb_hcd is
  * deallocated.
  *
- * Make sure to only deallocate the bandwidth_mutex when the primary HCD is
- * freed.  When hcd_release() is called for either hcd in a peer set
- * invalidate the peer's ->shared_hcd and ->primary_hcd pointers to
- * block new peering attempts
+ * Make sure to deallocate the bandwidth_mutex only when the last HCD is
+ * freed.  When hcd_release() is called for either hcd in a peer set,
+ * invalidate the peer's ->shared_hcd and ->primary_hcd pointers.
  */
 static void hcd_release(struct kref *kref)
 {
 	struct usb_hcd *hcd = container_of (kref, struct usb_hcd, kref);
 
 	mutex_lock(&usb_port_peer_mutex);
-	if (usb_hcd_is_primary_hcd(hcd)) {
-		kfree(hcd->address0_mutex);
-		kfree(hcd->bandwidth_mutex);
-	}
 	if (hcd->shared_hcd) {
 		struct usb_hcd *peer = hcd->shared_hcd;
 
 		peer->shared_hcd = NULL;
-		if (peer->primary_hcd == hcd)
-			peer->primary_hcd = NULL;
+		peer->primary_hcd = NULL;
+	} else {
+		kfree(hcd->address0_mutex);
+		kfree(hcd->bandwidth_mutex);
 	}
 	mutex_unlock(&usb_port_peer_mutex);
 	kfree(hcd);
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index 6dc810bce295..944a6dca0fcb 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -44,6 +44,9 @@ static const struct usb_device_id usb_quirk_list[] = {
 	/* Creative SB Audigy 2 NX */
 	{ USB_DEVICE(0x041e, 0x3020), .driver_info = USB_QUIRK_RESET_RESUME },
 
+	/* USB3503 */
+	{ USB_DEVICE(0x0424, 0x3503), .driver_info = USB_QUIRK_RESET_RESUME },
+
 	/* Microsoft Wireless Laser Mouse 6000 Receiver */
 	{ USB_DEVICE(0x045e, 0x00e1), .driver_info = USB_QUIRK_RESET_RESUME },
 
@@ -173,6 +176,10 @@ static const struct usb_device_id usb_quirk_list[] = {
 	/* MAYA44USB sound device */
 	{ USB_DEVICE(0x0a92, 0x0091), .driver_info = USB_QUIRK_RESET_RESUME },
 
+	/* ASUS Base Station(T100) */
+	{ USB_DEVICE(0x0b05, 0x17e0), .driver_info =
+			USB_QUIRK_IGNORE_REMOTE_WAKEUP },
+
 	/* Action Semiconductor flash disk */
 	{ USB_DEVICE(0x10d6, 0x2200), .driver_info =
 			USB_QUIRK_STRING_FETCH_255 },
@@ -188,26 +195,22 @@ static const struct usb_device_id usb_quirk_list[] = {
 	{ USB_DEVICE(0x1908, 0x1315), .driver_info =
 			USB_QUIRK_HONOR_BNUMINTERFACES },
 
-	/* INTEL VALUE SSD */
-	{ USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME },
-
-	/* USB3503 */
-	{ USB_DEVICE(0x0424, 0x3503), .driver_info = USB_QUIRK_RESET_RESUME },
-
-	/* ASUS Base Station(T100) */
-	{ USB_DEVICE(0x0b05, 0x17e0), .driver_info =
-			USB_QUIRK_IGNORE_REMOTE_WAKEUP },
-
 	/* Protocol and OTG Electrical Test Device */
 	{ USB_DEVICE(0x1a0a, 0x0200), .driver_info =
 			USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL },
 
+	/* Acer C120 LED Projector */
+	{ USB_DEVICE(0x1de1, 0xc102), .driver_info = USB_QUIRK_NO_LPM },
+
 	/* Blackmagic Design Intensity Shuttle */
 	{ USB_DEVICE(0x1edb, 0xbd3b), .driver_info = USB_QUIRK_NO_LPM },
 
 	/* Blackmagic Design UltraStudio SDI */
 	{ USB_DEVICE(0x1edb, 0xbd4f), .driver_info = USB_QUIRK_NO_LPM },
 
+	/* INTEL VALUE SSD */
+	{ USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME },
+
 	{ }  /* terminating entry must be last */
 };
 
diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h
index 3c58d633ce80..dec0b21fc626 100644
--- a/drivers/usb/dwc2/core.h
+++ b/drivers/usb/dwc2/core.h
@@ -64,6 +64,17 @@
 	DWC2_TRACE_SCHEDULER_VB(pr_fmt("%s: SCH: " fmt),		\
 				dev_name(hsotg->dev), ##__VA_ARGS__)
 
+#ifdef CONFIG_MIPS
+/*
+ * There are some MIPS machines that can run in either big-endian
+ * or little-endian mode and that use the dwc2 register without
+ * a byteswap in both ways.
+ * Unlike other architectures, MIPS apparently does not require a
+ * barrier before the __raw_writel() to synchronize with DMA but does
+ * require the barrier after the __raw_writel() to serialize a set of
+ * writes. This set of operations was added specifically for MIPS and
+ * should only be used there.
+ */
 static inline u32 dwc2_readl(const void __iomem *addr)
 {
 	u32 value = __raw_readl(addr);
@@ -90,6 +101,22 @@ static inline void dwc2_writel(u32 value, void __iomem *addr)
 	pr_info("INFO:: wrote %08x to %p\n", value, addr);
 #endif
 }
+#else
+/* Normal architectures just use readl/write */
+static inline u32 dwc2_readl(const void __iomem *addr)
+{
+	return readl(addr);
+}
+
+static inline void dwc2_writel(u32 value, void __iomem *addr)
+{
+	writel(value, addr);
+
+#ifdef DWC2_LOG_WRITES
+	pr_info("info:: wrote %08x to %p\n", value, addr);
+#endif
+}
+#endif
 
 /* Maximum number of Endpoints/HostChannels */
 #define MAX_EPS_CHANNELS	16
diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c
index 4c5e3005e1dc..26cf09d0fe3c 100644
--- a/drivers/usb/dwc2/gadget.c
+++ b/drivers/usb/dwc2/gadget.c
@@ -1018,7 +1018,7 @@ static int dwc2_hsotg_process_req_status(struct dwc2_hsotg *hsotg,
 	return 1;
 }
 
-static int dwc2_hsotg_ep_sethalt(struct usb_ep *ep, int value);
+static int dwc2_hsotg_ep_sethalt(struct usb_ep *ep, int value, bool now);
 
 /**
  * get_ep_head - return the first request on the endpoint
@@ -1094,7 +1094,7 @@ static int dwc2_hsotg_process_req_feature(struct dwc2_hsotg *hsotg,
 		case USB_ENDPOINT_HALT:
 			halted = ep->halted;
 
-			dwc2_hsotg_ep_sethalt(&ep->ep, set);
+			dwc2_hsotg_ep_sethalt(&ep->ep, set, true);
 
 			ret = dwc2_hsotg_send_reply(hsotg, ep0, NULL, 0);
 			if (ret) {
@@ -2948,8 +2948,13 @@ static int dwc2_hsotg_ep_dequeue(struct usb_ep *ep, struct usb_request *req)
  * dwc2_hsotg_ep_sethalt - set halt on a given endpoint
  * @ep: The endpoint to set halt.
  * @value: Set or unset the halt.
+ * @now: If true, stall the endpoint now. Otherwise return -EAGAIN if
+ *       the endpoint is busy processing requests.
+ *
+ * We need to stall the endpoint immediately if request comes from set_feature
+ * protocol command handler.
  */
-static int dwc2_hsotg_ep_sethalt(struct usb_ep *ep, int value)
+static int dwc2_hsotg_ep_sethalt(struct usb_ep *ep, int value, bool now)
 {
 	struct dwc2_hsotg_ep *hs_ep = our_ep(ep);
 	struct dwc2_hsotg *hs = hs_ep->parent;
@@ -2969,6 +2974,17 @@ static int dwc2_hsotg_ep_sethalt(struct usb_ep *ep, int value)
 		return 0;
 	}
 
+	if (hs_ep->isochronous) {
+		dev_err(hs->dev, "%s is Isochronous Endpoint\n", ep->name);
+		return -EINVAL;
+	}
+
+	if (!now && value && !list_empty(&hs_ep->queue)) {
+		dev_dbg(hs->dev, "%s request is pending, cannot halt\n",
+			ep->name);
+		return -EAGAIN;
+	}
+
 	if (hs_ep->dir_in) {
 		epreg = DIEPCTL(index);
 		epctl = dwc2_readl(hs->regs + epreg);
@@ -3020,7 +3036,7 @@ static int dwc2_hsotg_ep_sethalt_lock(struct usb_ep *ep, int value)
 	int ret = 0;
 
 	spin_lock_irqsave(&hs->lock, flags);
-	ret = dwc2_hsotg_ep_sethalt(ep, value);
+	ret = dwc2_hsotg_ep_sethalt(ep, value, false);
 	spin_unlock_irqrestore(&hs->lock, flags);
 
 	return ret;
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index 7ddf9449a063..654050684f4f 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -402,6 +402,7 @@
 #define DWC3_DEPCMD_GET_RSC_IDX(x)	(((x) >> DWC3_DEPCMD_PARAM_SHIFT) & 0x7f)
 #define DWC3_DEPCMD_STATUS(x)		(((x) >> 12) & 0x0F)
 #define DWC3_DEPCMD_HIPRI_FORCERM	(1 << 11)
+#define DWC3_DEPCMD_CLEARPENDIN		(1 << 11)
 #define DWC3_DEPCMD_CMDACT		(1 << 10)
 #define DWC3_DEPCMD_CMDIOC		(1 << 8)
 
diff --git a/drivers/usb/dwc3/dwc3-exynos.c b/drivers/usb/dwc3/dwc3-exynos.c
index dd5cb5577dca..2f1fb7e7aa54 100644
--- a/drivers/usb/dwc3/dwc3-exynos.c
+++ b/drivers/usb/dwc3/dwc3-exynos.c
@@ -128,12 +128,6 @@ static int dwc3_exynos_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, exynos);
 
-	ret = dwc3_exynos_register_phys(exynos);
-	if (ret) {
-		dev_err(dev, "couldn't register PHYs\n");
-		return ret;
-	}
-
 	exynos->dev	= dev;
 
 	exynos->clk = devm_clk_get(dev, "usbdrd30");
@@ -183,20 +177,29 @@ static int dwc3_exynos_probe(struct platform_device *pdev)
 		goto err3;
 	}
 
+	ret = dwc3_exynos_register_phys(exynos);
+	if (ret) {
+		dev_err(dev, "couldn't register PHYs\n");
+		goto err4;
+	}
+
 	if (node) {
 		ret = of_platform_populate(node, NULL, NULL, dev);
 		if (ret) {
 			dev_err(dev, "failed to add dwc3 core\n");
-			goto err4;
+			goto err5;
 		}
 	} else {
 		dev_err(dev, "no device node, failed to add dwc3 core\n");
 		ret = -ENODEV;
-		goto err4;
+		goto err5;
 	}
 
 	return 0;
 
+err5:
+	platform_device_unregister(exynos->usb2_phy);
+	platform_device_unregister(exynos->usb3_phy);
 err4:
 	regulator_disable(exynos->vdd10);
 err3:
diff --git a/drivers/usb/dwc3/dwc3-st.c b/drivers/usb/dwc3/dwc3-st.c
index 5c0adb9c6fb2..89a2f712fdfe 100644
--- a/drivers/usb/dwc3/dwc3-st.c
+++ b/drivers/usb/dwc3/dwc3-st.c
@@ -129,12 +129,18 @@ static int st_dwc3_drd_init(struct st_dwc3 *dwc3_data)
 	switch (dwc3_data->dr_mode) {
 	case USB_DR_MODE_PERIPHERAL:
 
-		val &= ~(USB3_FORCE_VBUSVALID | USB3_DELAY_VBUSVALID
+		val &= ~(USB3_DELAY_VBUSVALID
 			| USB3_SEL_FORCE_OPMODE | USB3_FORCE_OPMODE(0x3)
 			| USB3_SEL_FORCE_DPPULLDOWN2 | USB3_FORCE_DPPULLDOWN2
 			| USB3_SEL_FORCE_DMPULLDOWN2 | USB3_FORCE_DMPULLDOWN2);
 
-		val |= USB3_DEVICE_NOT_HOST;
+		/*
+		 * USB3_PORT2_FORCE_VBUSVALID When '1' and when
+		 * USB3_PORT2_DEVICE_NOT_HOST = 1, forces VBUSVLDEXT2 input
+		 * of the pico PHY to 1.
+		 */
+
+		val |= USB3_DEVICE_NOT_HOST | USB3_FORCE_VBUSVALID;
 		break;
 
 	case USB_DR_MODE_HOST:
@@ -227,7 +233,8 @@ static int st_dwc3_probe(struct platform_device *pdev)
 	dev_vdbg(&pdev->dev, "glue-logic addr 0x%p, syscfg-reg offset 0x%x\n",
 		 dwc3_data->glue_base, dwc3_data->syscfg_reg_off);
 
-	dwc3_data->rstc_pwrdn = devm_reset_control_get(dev, "powerdown");
+	dwc3_data->rstc_pwrdn =
+		devm_reset_control_get_exclusive(dev, "powerdown");
 	if (IS_ERR(dwc3_data->rstc_pwrdn)) {
 		dev_err(&pdev->dev, "could not get power controller\n");
 		ret = PTR_ERR(dwc3_data->rstc_pwrdn);
@@ -237,7 +244,8 @@ static int st_dwc3_probe(struct platform_device *pdev)
 	/* Manage PowerDown */
 	reset_control_deassert(dwc3_data->rstc_pwrdn);
 
-	dwc3_data->rstc_rst = devm_reset_control_get(dev, "softreset");
+	dwc3_data->rstc_rst =
+		devm_reset_control_get_shared(dev, "softreset");
 	if (IS_ERR(dwc3_data->rstc_rst)) {
 		dev_err(&pdev->dev, "could not get reset controller\n");
 		ret = PTR_ERR(dwc3_data->rstc_rst);
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 9a7d0bd15dc3..07248ff1be5c 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -347,6 +347,28 @@ int dwc3_send_gadget_ep_cmd(struct dwc3 *dwc, unsigned ep,
 	return ret;
 }
 
+static int dwc3_send_clear_stall_ep_cmd(struct dwc3_ep *dep)
+{
+	struct dwc3 *dwc = dep->dwc;
+	struct dwc3_gadget_ep_cmd_params params;
+	u32 cmd = DWC3_DEPCMD_CLEARSTALL;
+
+	/*
+	 * As of core revision 2.60a the recommended programming model
+	 * is to set the ClearPendIN bit when issuing a Clear Stall EP
+	 * command for IN endpoints. This is to prevent an issue where
+	 * some (non-compliant) hosts may not send ACK TPs for pending
+	 * IN transfers due to a mishandled error condition. Synopsys
+	 * STAR 9000614252.
+	 */
+	if (dep->direction && (dwc->revision >= DWC3_REVISION_260A))
+		cmd |= DWC3_DEPCMD_CLEARPENDIN;
+
+	memset(&params, 0, sizeof(params));
+
+	return dwc3_send_gadget_ep_cmd(dwc, dep->number, cmd, &params);
+}
+
 static dma_addr_t dwc3_trb_dma_offset(struct dwc3_ep *dep,
 		struct dwc3_trb *trb)
 {
@@ -1314,8 +1336,7 @@ int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol)
 		else
 			dep->flags |= DWC3_EP_STALL;
 	} else {
-		ret = dwc3_send_gadget_ep_cmd(dwc, dep->number,
-			DWC3_DEPCMD_CLEARSTALL, &params);
+		ret = dwc3_send_clear_stall_ep_cmd(dep);
 		if (ret)
 			dev_err(dwc->dev, "failed to clear STALL on %s\n",
 					dep->name);
@@ -2247,7 +2268,6 @@ static void dwc3_clear_stall_all_ep(struct dwc3 *dwc)
 
 	for (epnum = 1; epnum < DWC3_ENDPOINTS_NUM; epnum++) {
 		struct dwc3_ep *dep;
-		struct dwc3_gadget_ep_cmd_params params;
 		int ret;
 
 		dep = dwc->eps[epnum];
@@ -2259,9 +2279,7 @@ static void dwc3_clear_stall_all_ep(struct dwc3 *dwc)
 
 		dep->flags &= ~DWC3_EP_STALL;
 
-		memset(&params, 0, sizeof(params));
-		ret = dwc3_send_gadget_ep_cmd(dwc, dep->number,
-				DWC3_DEPCMD_CLEARSTALL, &params);
+		ret = dwc3_send_clear_stall_ep_cmd(dep);
 		WARN_ON_ONCE(ret);
 	}
 }
diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index d67de0d22a2b..eb648485a58c 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -1868,14 +1868,19 @@ unknown:
 				}
 				break;
 			}
-			req->length = value;
-			req->context = cdev;
-			req->zero = value < w_length;
-			value = composite_ep0_queue(cdev, req, GFP_ATOMIC);
-			if (value < 0) {
-				DBG(cdev, "ep_queue --> %d\n", value);
-				req->status = 0;
-				composite_setup_complete(gadget->ep0, req);
+
+			if (value >= 0) {
+				req->length = value;
+				req->context = cdev;
+				req->zero = value < w_length;
+				value = composite_ep0_queue(cdev, req,
+							    GFP_ATOMIC);
+				if (value < 0) {
+					DBG(cdev, "ep_queue --> %d\n", value);
+					req->status = 0;
+					composite_setup_complete(gadget->ep0,
+								 req);
+				}
 			}
 			return value;
 		}
diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c
index b6f60ca8a035..70cf3477f951 100644
--- a/drivers/usb/gadget/configfs.c
+++ b/drivers/usb/gadget/configfs.c
@@ -1401,6 +1401,7 @@ static const struct usb_gadget_driver configfs_driver_template = {
 		.owner          = THIS_MODULE,
 		.name		= "configfs-gadget",
 	},
+	.match_existing_only = 1,
 };
 
 static struct config_group *gadgets_make(
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 73515d54e1cc..cc33d2667408 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -2051,7 +2051,7 @@ static int __ffs_data_do_os_desc(enum ffs_os_desc_type type,
 
 		if (len < sizeof(*d) ||
 		    d->bFirstInterfaceNumber >= ffs->interfaces_count ||
-		    d->Reserved1)
+		    !d->Reserved1)
 			return -EINVAL;
 		for (i = 0; i < ARRAY_SIZE(d->Reserved2); ++i)
 			if (d->Reserved2[i])
@@ -2729,6 +2729,7 @@ static int _ffs_func_bind(struct usb_configuration *c,
 		func->ffs->ss_descs_count;
 
 	int fs_len, hs_len, ss_len, ret, i;
+	struct ffs_ep *eps_ptr;
 
 	/* Make it a single chunk, less management later on */
 	vla_group(d);
@@ -2777,12 +2778,9 @@ static int _ffs_func_bind(struct usb_configuration *c,
 	       ffs->raw_descs_length);
 
 	memset(vla_ptr(vlabuf, d, inums), 0xff, d_inums__sz);
-	for (ret = ffs->eps_count; ret; --ret) {
-		struct ffs_ep *ptr;
-
-		ptr = vla_ptr(vlabuf, d, eps);
-		ptr[ret].num = -1;
-	}
+	eps_ptr = vla_ptr(vlabuf, d, eps);
+	for (i = 0; i < ffs->eps_count; i++)
+		eps_ptr[i].num = -1;
 
 	/* Save pointers
 	 * d_eps == vlabuf, func->eps used to kfree vlabuf later
@@ -2851,7 +2849,7 @@ static int _ffs_func_bind(struct usb_configuration *c,
 		goto error;
 
 	func->function.os_desc_table = vla_ptr(vlabuf, d, os_desc_table);
-	if (c->cdev->use_os_string)
+	if (c->cdev->use_os_string) {
 		for (i = 0; i < ffs->interfaces_count; ++i) {
 			struct usb_os_desc *desc;
 
@@ -2862,13 +2860,15 @@ static int _ffs_func_bind(struct usb_configuration *c,
 				vla_ptr(vlabuf, d, ext_compat) + i * 16;
 			INIT_LIST_HEAD(&desc->ext_prop);
 		}
-	ret = ffs_do_os_descs(ffs->ms_os_descs_count,
-			      vla_ptr(vlabuf, d, raw_descs) +
-			      fs_len + hs_len + ss_len,
-			      d_raw_descs__sz - fs_len - hs_len - ss_len,
-			      __ffs_func_bind_do_os_desc, func);
-	if (unlikely(ret < 0))
-		goto error;
+		ret = ffs_do_os_descs(ffs->ms_os_descs_count,
+				      vla_ptr(vlabuf, d, raw_descs) +
+				      fs_len + hs_len + ss_len,
+				      d_raw_descs__sz - fs_len - hs_len -
+				      ss_len,
+				      __ffs_func_bind_do_os_desc, func);
+		if (unlikely(ret < 0))
+			goto error;
+	}
 	func->function.os_desc_n =
 		c->cdev->use_os_string ? ffs->interfaces_count : 0;
 
diff --git a/drivers/usb/gadget/function/f_printer.c b/drivers/usb/gadget/function/f_printer.c
index c45104e3a64b..64706a789580 100644
--- a/drivers/usb/gadget/function/f_printer.c
+++ b/drivers/usb/gadget/function/f_printer.c
@@ -161,14 +161,6 @@ static struct usb_endpoint_descriptor hs_ep_out_desc = {
 	.wMaxPacketSize =	cpu_to_le16(512)
 };
 
-static struct usb_qualifier_descriptor dev_qualifier = {
-	.bLength =		sizeof(dev_qualifier),
-	.bDescriptorType =	USB_DT_DEVICE_QUALIFIER,
-	.bcdUSB =		cpu_to_le16(0x0200),
-	.bDeviceClass =		USB_CLASS_PRINTER,
-	.bNumConfigurations =	1
-};
-
 static struct usb_descriptor_header *hs_printer_function[] = {
 	(struct usb_descriptor_header *) &intf_desc,
 	(struct usb_descriptor_header *) &hs_ep_in_desc,
diff --git a/drivers/usb/gadget/function/f_tcm.c b/drivers/usb/gadget/function/f_tcm.c
index 35fe3c80cfc0..197f73386fac 100644
--- a/drivers/usb/gadget/function/f_tcm.c
+++ b/drivers/usb/gadget/function/f_tcm.c
@@ -1445,16 +1445,18 @@ static void usbg_drop_tpg(struct se_portal_group *se_tpg)
 	for (i = 0; i < TPG_INSTANCES; ++i)
 		if (tpg_instances[i].tpg == tpg)
 			break;
-	if (i < TPG_INSTANCES)
+	if (i < TPG_INSTANCES) {
 		tpg_instances[i].tpg = NULL;
-	opts = container_of(tpg_instances[i].func_inst,
-		struct f_tcm_opts, func_inst);
-	mutex_lock(&opts->dep_lock);
-	if (opts->has_dep)
-		module_put(opts->dependent);
-	else
-		configfs_undepend_item_unlocked(&opts->func_inst.group.cg_item);
-	mutex_unlock(&opts->dep_lock);
+		opts = container_of(tpg_instances[i].func_inst,
+			struct f_tcm_opts, func_inst);
+		mutex_lock(&opts->dep_lock);
+		if (opts->has_dep)
+			module_put(opts->dependent);
+		else
+			configfs_undepend_item_unlocked(
+				&opts->func_inst.group.cg_item);
+		mutex_unlock(&opts->dep_lock);
+	}
 	mutex_unlock(&tpg_instances_lock);
 
 	kfree(tpg);
diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c
index 186d4b162524..cd214ec8a601 100644
--- a/drivers/usb/gadget/function/f_uac2.c
+++ b/drivers/usb/gadget/function/f_uac2.c
@@ -598,18 +598,6 @@ static struct usb_gadget_strings *fn_strings[] = {
 	NULL,
 };
 
-static struct usb_qualifier_descriptor devqual_desc = {
-	.bLength = sizeof devqual_desc,
-	.bDescriptorType = USB_DT_DEVICE_QUALIFIER,
-
-	.bcdUSB = cpu_to_le16(0x200),
-	.bDeviceClass = USB_CLASS_MISC,
-	.bDeviceSubClass = 0x02,
-	.bDeviceProtocol = 0x01,
-	.bNumConfigurations = 1,
-	.bRESERVED = 0,
-};
-
 static struct usb_interface_assoc_descriptor iad_desc = {
 	.bLength = sizeof iad_desc,
 	.bDescriptorType = USB_DT_INTERFACE_ASSOCIATION,
@@ -1292,6 +1280,7 @@ in_rq_cur(struct usb_function *fn, const struct usb_ctrlrequest *cr)
 
 	if (control_selector == UAC2_CS_CONTROL_SAM_FREQ) {
 		struct cntrl_cur_lay3 c;
+		memset(&c, 0, sizeof(struct cntrl_cur_lay3));
 
 		if (entity_id == USB_IN_CLK_ID)
 			c.dCUR = p_srate;
diff --git a/drivers/usb/gadget/function/storage_common.c b/drivers/usb/gadget/function/storage_common.c
index d62683017cf3..990df221c629 100644
--- a/drivers/usb/gadget/function/storage_common.c
+++ b/drivers/usb/gadget/function/storage_common.c
@@ -83,9 +83,7 @@ EXPORT_SYMBOL_GPL(fsg_fs_function);
  * USB 2.0 devices need to expose both high speed and full speed
  * descriptors, unless they only run at full speed.
  *
- * That means alternate endpoint descriptors (bigger packets)
- * and a "device qualifier" ... plus more construction options
- * for the configuration descriptor.
+ * That means alternate endpoint descriptors (bigger packets).
  */
 struct usb_endpoint_descriptor fsg_hs_bulk_in_desc = {
 	.bLength =		USB_DT_ENDPOINT_SIZE,
diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
index e64479f882a5..aa3707bdebb4 100644
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@@ -938,8 +938,11 @@ ep0_read (struct file *fd, char __user *buf, size_t len, loff_t *ptr)
 			struct usb_ep		*ep = dev->gadget->ep0;
 			struct usb_request	*req = dev->req;
 
-			if ((retval = setup_req (ep, req, 0)) == 0)
-				retval = usb_ep_queue (ep, req, GFP_ATOMIC);
+			if ((retval = setup_req (ep, req, 0)) == 0) {
+				spin_unlock_irq (&dev->lock);
+				retval = usb_ep_queue (ep, req, GFP_KERNEL);
+				spin_lock_irq (&dev->lock);
+			}
 			dev->state = STATE_DEV_CONNECTED;
 
 			/* assume that was SET_CONFIGURATION */
@@ -1457,8 +1460,11 @@ delegate:
 							w_length);
 				if (value < 0)
 					break;
+
+				spin_unlock (&dev->lock);
 				value = usb_ep_queue (gadget->ep0, dev->req,
-							GFP_ATOMIC);
+							GFP_KERNEL);
+				spin_lock (&dev->lock);
 				if (value < 0) {
 					clean_req (gadget->ep0, dev->req);
 					break;
@@ -1481,11 +1487,14 @@ delegate:
 	if (value >= 0 && dev->state != STATE_DEV_SETUP) {
 		req->length = value;
 		req->zero = value < w_length;
-		value = usb_ep_queue (gadget->ep0, req, GFP_ATOMIC);
+
+		spin_unlock (&dev->lock);
+		value = usb_ep_queue (gadget->ep0, req, GFP_KERNEL);
 		if (value < 0) {
 			DBG (dev, "ep_queue --> %d\n", value);
 			req->status = 0;
 		}
+		return value;
 	}
 
 	/* device stalls when value < 0 */
diff --git a/drivers/usb/gadget/udc/udc-core.c b/drivers/usb/gadget/udc/udc-core.c
index 6e8300d6a737..e1b2dcebdc2e 100644
--- a/drivers/usb/gadget/udc/udc-core.c
+++ b/drivers/usb/gadget/udc/udc-core.c
@@ -603,11 +603,15 @@ int usb_gadget_probe_driver(struct usb_gadget_driver *driver)
 		}
 	}
 
-	list_add_tail(&driver->pending, &gadget_driver_pending_list);
-	pr_info("udc-core: couldn't find an available UDC - added [%s] to list of pending drivers\n",
-		driver->function);
+	if (!driver->match_existing_only) {
+		list_add_tail(&driver->pending, &gadget_driver_pending_list);
+		pr_info("udc-core: couldn't find an available UDC - added [%s] to list of pending drivers\n",
+			driver->function);
+		ret = 0;
+	}
+
 	mutex_unlock(&udc_lock);
-	return 0;
+	return ret;
 found:
 	ret = udc_bind_to_driver(udc, driver);
 	mutex_unlock(&udc_lock);
diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
index ae1b6e69eb96..a962b89b65a6 100644
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -368,6 +368,15 @@ static void ehci_shutdown(struct usb_hcd *hcd)
 {
 	struct ehci_hcd	*ehci = hcd_to_ehci(hcd);
 
+	/**
+	 * Protect the system from crashing at system shutdown in cases where
+	 * usb host is not added yet from OTG controller driver.
+	 * As ehci_setup() not done yet, so stop accessing registers or
+	 * variables initialized in ehci_setup()
+	 */
+	if (!ehci->sbrn)
+		return;
+
 	spin_lock_irq(&ehci->lock);
 	ehci->shutdown = true;
 	ehci->rh_state = EHCI_RH_STOPPING;
diff --git a/drivers/usb/host/ehci-hub.c b/drivers/usb/host/ehci-hub.c
index ffc90295a95f..74f62d68f013 100644
--- a/drivers/usb/host/ehci-hub.c
+++ b/drivers/usb/host/ehci-hub.c
@@ -872,15 +872,23 @@ int ehci_hub_control(
 ) {
 	struct ehci_hcd	*ehci = hcd_to_ehci (hcd);
 	int		ports = HCS_N_PORTS (ehci->hcs_params);
-	u32 __iomem	*status_reg = &ehci->regs->port_status[
-				(wIndex & 0xff) - 1];
-	u32 __iomem	*hostpc_reg = &ehci->regs->hostpc[(wIndex & 0xff) - 1];
+	u32 __iomem	*status_reg, *hostpc_reg;
 	u32		temp, temp1, status;
 	unsigned long	flags;
 	int		retval = 0;
 	unsigned	selector;
 
 	/*
+	 * Avoid underflow while calculating (wIndex & 0xff) - 1.
+	 * The compiler might deduce that wIndex can never be 0 and then
+	 * optimize away the tests for !wIndex below.
+	 */
+	temp = wIndex & 0xff;
+	temp -= (temp > 0);
+	status_reg = &ehci->regs->port_status[temp];
+	hostpc_reg = &ehci->regs->hostpc[temp];
+
+	/*
 	 * FIXME:  support SetPortFeatures USB_PORT_FEAT_INDICATOR.
 	 * HCS_INDICATOR may say we can change LEDs to off/amber/green.
 	 * (track current state ourselves) ... blink for diagnostics,
diff --git a/drivers/usb/host/ehci-msm.c b/drivers/usb/host/ehci-msm.c
index d3afc89d00f5..2f8d3af811ce 100644
--- a/drivers/usb/host/ehci-msm.c
+++ b/drivers/usb/host/ehci-msm.c
@@ -179,22 +179,32 @@ static int ehci_msm_remove(struct platform_device *pdev)
 static int ehci_msm_pm_suspend(struct device *dev)
 {
 	struct usb_hcd *hcd = dev_get_drvdata(dev);
+	struct ehci_hcd *ehci = hcd_to_ehci(hcd);
 	bool do_wakeup = device_may_wakeup(dev);
 
 	dev_dbg(dev, "ehci-msm PM suspend\n");
 
-	return ehci_suspend(hcd, do_wakeup);
+	/* Only call ehci_suspend if ehci_setup has been done */
+	if (ehci->sbrn)
+		return ehci_suspend(hcd, do_wakeup);
+
+	return 0;
 }
 
 static int ehci_msm_pm_resume(struct device *dev)
 {
 	struct usb_hcd *hcd = dev_get_drvdata(dev);
+	struct ehci_hcd *ehci = hcd_to_ehci(hcd);
 
 	dev_dbg(dev, "ehci-msm PM resume\n");
-	ehci_resume(hcd, false);
+
+	/* Only call ehci_resume if ehci_setup has been done */
+	if (ehci->sbrn)
+		ehci_resume(hcd, false);
 
 	return 0;
 }
+
 #else
 #define ehci_msm_pm_suspend	NULL
 #define ehci_msm_pm_resume	NULL
diff --git a/drivers/usb/host/ehci-st.c b/drivers/usb/host/ehci-st.c
index a94ed677d937..be4a2788fc58 100644
--- a/drivers/usb/host/ehci-st.c
+++ b/drivers/usb/host/ehci-st.c
@@ -206,7 +206,8 @@ static int st_ehci_platform_probe(struct platform_device *dev)
 		priv->clk48 = NULL;
 	}
 
-	priv->pwr = devm_reset_control_get_optional(&dev->dev, "power");
+	priv->pwr =
+		devm_reset_control_get_optional_shared(&dev->dev, "power");
 	if (IS_ERR(priv->pwr)) {
 		err = PTR_ERR(priv->pwr);
 		if (err == -EPROBE_DEFER)
@@ -214,7 +215,8 @@ static int st_ehci_platform_probe(struct platform_device *dev)
 		priv->pwr = NULL;
 	}
 
-	priv->rst = devm_reset_control_get_optional(&dev->dev, "softreset");
+	priv->rst =
+		devm_reset_control_get_optional_shared(&dev->dev, "softreset");
 	if (IS_ERR(priv->rst)) {
 		err = PTR_ERR(priv->rst);
 		if (err == -EPROBE_DEFER)
diff --git a/drivers/usb/host/ehci-tegra.c b/drivers/usb/host/ehci-tegra.c
index 4031b372008e..9a3d7db5be57 100644
--- a/drivers/usb/host/ehci-tegra.c
+++ b/drivers/usb/host/ehci-tegra.c
@@ -81,15 +81,23 @@ static int tegra_reset_usb_controller(struct platform_device *pdev)
 	struct usb_hcd *hcd = platform_get_drvdata(pdev);
 	struct tegra_ehci_hcd *tegra =
 		(struct tegra_ehci_hcd *)hcd_to_ehci(hcd)->priv;
+	bool has_utmi_pad_registers = false;
 
 	phy_np = of_parse_phandle(pdev->dev.of_node, "nvidia,phy", 0);
 	if (!phy_np)
 		return -ENOENT;
 
+	if (of_property_read_bool(phy_np, "nvidia,has-utmi-pad-registers"))
+		has_utmi_pad_registers = true;
+
 	if (!usb1_reset_attempted) {
 		struct reset_control *usb1_reset;
 
-		usb1_reset = of_reset_control_get(phy_np, "usb");
+		if (!has_utmi_pad_registers)
+			usb1_reset = of_reset_control_get(phy_np, "utmi-pads");
+		else
+			usb1_reset = tegra->rst;
+
 		if (IS_ERR(usb1_reset)) {
 			dev_warn(&pdev->dev,
 				 "can't get utmi-pads reset from the PHY\n");
@@ -99,13 +107,15 @@ static int tegra_reset_usb_controller(struct platform_device *pdev)
 			reset_control_assert(usb1_reset);
 			udelay(1);
 			reset_control_deassert(usb1_reset);
+
+			if (!has_utmi_pad_registers)
+				reset_control_put(usb1_reset);
 		}
 
-		reset_control_put(usb1_reset);
 		usb1_reset_attempted = true;
 	}
 
-	if (!of_property_read_bool(phy_np, "nvidia,has-utmi-pad-registers")) {
+	if (!has_utmi_pad_registers) {
 		reset_control_assert(tegra->rst);
 		udelay(1);
 		reset_control_deassert(tegra->rst);
diff --git a/drivers/usb/host/ohci-q.c b/drivers/usb/host/ohci-q.c
index d029bbe9eb36..641fed609911 100644
--- a/drivers/usb/host/ohci-q.c
+++ b/drivers/usb/host/ohci-q.c
@@ -183,7 +183,6 @@ static int ed_schedule (struct ohci_hcd *ohci, struct ed *ed)
 {
 	int	branch;
 
-	ed->state = ED_OPER;
 	ed->ed_prev = NULL;
 	ed->ed_next = NULL;
 	ed->hwNextED = 0;
@@ -259,6 +258,8 @@ static int ed_schedule (struct ohci_hcd *ohci, struct ed *ed)
 	/* the HC may not see the schedule updates yet, but if it does
 	 * then they'll be properly ordered.
 	 */
+
+	ed->state = ED_OPER;
 	return 0;
 }
 
diff --git a/drivers/usb/host/ohci-st.c b/drivers/usb/host/ohci-st.c
index acf2eb2a5676..02816a1515a1 100644
--- a/drivers/usb/host/ohci-st.c
+++ b/drivers/usb/host/ohci-st.c
@@ -188,13 +188,15 @@ static int st_ohci_platform_probe(struct platform_device *dev)
 		priv->clk48 = NULL;
 	}
 
-	priv->pwr = devm_reset_control_get_optional(&dev->dev, "power");
+	priv->pwr =
+		devm_reset_control_get_optional_shared(&dev->dev, "power");
 	if (IS_ERR(priv->pwr)) {
 		err = PTR_ERR(priv->pwr);
 		goto err_put_clks;
 	}
 
-	priv->rst = devm_reset_control_get_optional(&dev->dev, "softreset");
+	priv->rst =
+		devm_reset_control_get_optional_shared(&dev->dev, "softreset");
 	if (IS_ERR(priv->rst)) {
 		err = PTR_ERR(priv->rst);
 		goto err_put_clks;
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 48672fac7ff3..c10972fcc8e4 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -37,6 +37,7 @@
 /* Device for a quirk */
 #define PCI_VENDOR_ID_FRESCO_LOGIC	0x1b73
 #define PCI_DEVICE_ID_FRESCO_LOGIC_PDK	0x1000
+#define PCI_DEVICE_ID_FRESCO_LOGIC_FL1009	0x1009
 #define PCI_DEVICE_ID_FRESCO_LOGIC_FL1400	0x1400
 
 #define PCI_VENDOR_ID_ETRON		0x1b6f
@@ -114,6 +115,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
 		xhci->quirks |= XHCI_TRUST_TX_LENGTH;
 	}
 
+	if (pdev->vendor == PCI_VENDOR_ID_FRESCO_LOGIC &&
+			pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_FL1009)
+		xhci->quirks |= XHCI_BROKEN_STREAMS;
+
 	if (pdev->vendor == PCI_VENDOR_ID_NEC)
 		xhci->quirks |= XHCI_NEC_HOST;
 
diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c
index 676ea458148b..1f3f981fe7f8 100644
--- a/drivers/usb/host/xhci-plat.c
+++ b/drivers/usb/host/xhci-plat.c
@@ -196,6 +196,9 @@ static int xhci_plat_probe(struct platform_device *pdev)
 		ret = clk_prepare_enable(clk);
 		if (ret)
 			goto put_hcd;
+	} else if (PTR_ERR(clk) == -EPROBE_DEFER) {
+		ret = -EPROBE_DEFER;
+		goto put_hcd;
 	}
 
 	xhci = hcd_to_xhci(hcd);
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 52deae4b7eac..d7d502578d79 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -290,6 +290,14 @@ static int xhci_abort_cmd_ring(struct xhci_hcd *xhci)
 
 	temp_64 = xhci_read_64(xhci, &xhci->op_regs->cmd_ring);
 	xhci->cmd_ring_state = CMD_RING_STATE_ABORTED;
+
+	/*
+	 * Writing the CMD_RING_ABORT bit should cause a cmd completion event,
+	 * however on some host hw the CMD_RING_RUNNING bit is correctly cleared
+	 * but the completion event in never sent. Use the cmd timeout timer to
+	 * handle those cases. Use twice the time to cover the bit polling retry
+	 */
+	mod_timer(&xhci->cmd_timer, jiffies + (2 * XHCI_CMD_DEFAULT_TIMEOUT));
 	xhci_write_64(xhci, temp_64 | CMD_RING_ABORT,
 			&xhci->op_regs->cmd_ring);
 
@@ -314,6 +322,7 @@ static int xhci_abort_cmd_ring(struct xhci_hcd *xhci)
 
 		xhci_err(xhci, "Stopped the command ring failed, "
 				"maybe the host is dead\n");
+		del_timer(&xhci->cmd_timer);
 		xhci->xhc_state |= XHCI_STATE_DYING;
 		xhci_quiesce(xhci);
 		xhci_halt(xhci);
@@ -1246,22 +1255,21 @@ void xhci_handle_command_timeout(unsigned long data)
 	int ret;
 	unsigned long flags;
 	u64 hw_ring_state;
-	struct xhci_command *cur_cmd = NULL;
+	bool second_timeout = false;
 	xhci = (struct xhci_hcd *) data;
 
 	/* mark this command to be cancelled */
 	spin_lock_irqsave(&xhci->lock, flags);
 	if (xhci->current_cmd) {
-		cur_cmd = xhci->current_cmd;
-		cur_cmd->status = COMP_CMD_ABORT;
+		if (xhci->current_cmd->status == COMP_CMD_ABORT)
+			second_timeout = true;
+		xhci->current_cmd->status = COMP_CMD_ABORT;
 	}
 
-
 	/* Make sure command ring is running before aborting it */
 	hw_ring_state = xhci_read_64(xhci, &xhci->op_regs->cmd_ring);
 	if ((xhci->cmd_ring_state & CMD_RING_STATE_RUNNING) &&
 	    (hw_ring_state & CMD_RING_RUNNING))  {
-
 		spin_unlock_irqrestore(&xhci->lock, flags);
 		xhci_dbg(xhci, "Command timeout\n");
 		ret = xhci_abort_cmd_ring(xhci);
@@ -1273,6 +1281,15 @@ void xhci_handle_command_timeout(unsigned long data)
 		}
 		return;
 	}
+
+	/* command ring failed to restart, or host removed. Bail out */
+	if (second_timeout || xhci->xhc_state & XHCI_STATE_REMOVING) {
+		spin_unlock_irqrestore(&xhci->lock, flags);
+		xhci_dbg(xhci, "command timed out twice, ring start fail?\n");
+		xhci_cleanup_command_queue(xhci);
+		return;
+	}
+
 	/* command timeout on stopped ring, ring can't be aborted */
 	xhci_dbg(xhci, "Command timeout on stopped ring\n");
 	xhci_handle_stopped_cmd_ring(xhci, xhci->current_cmd);
@@ -2721,7 +2738,8 @@ hw_died:
 		writel(irq_pending, &xhci->ir_set->irq_pending);
 	}
 
-	if (xhci->xhc_state & XHCI_STATE_DYING) {
+	if (xhci->xhc_state & XHCI_STATE_DYING ||
+	    xhci->xhc_state & XHCI_STATE_HALTED) {
 		xhci_dbg(xhci, "xHCI dying, ignoring interrupt. "
 				"Shouldn't IRQs be disabled?\n");
 		/* Clear the event handler busy flag (RW1C);
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index fa7e1ef36cd9..f2f9518c53ab 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -685,20 +685,23 @@ void xhci_stop(struct usb_hcd *hcd)
 	u32 temp;
 	struct xhci_hcd *xhci = hcd_to_xhci(hcd);
 
-	if (xhci->xhc_state & XHCI_STATE_HALTED)
-		return;
-
 	mutex_lock(&xhci->mutex);
-	spin_lock_irq(&xhci->lock);
-	xhci->xhc_state |= XHCI_STATE_HALTED;
-	xhci->cmd_ring_state = CMD_RING_STATE_STOPPED;
 
-	/* Make sure the xHC is halted for a USB3 roothub
-	 * (xhci_stop() could be called as part of failed init).
-	 */
-	xhci_halt(xhci);
-	xhci_reset(xhci);
-	spin_unlock_irq(&xhci->lock);
+	if (!(xhci->xhc_state & XHCI_STATE_HALTED)) {
+		spin_lock_irq(&xhci->lock);
+
+		xhci->xhc_state |= XHCI_STATE_HALTED;
+		xhci->cmd_ring_state = CMD_RING_STATE_STOPPED;
+		xhci_halt(xhci);
+		xhci_reset(xhci);
+
+		spin_unlock_irq(&xhci->lock);
+	}
+
+	if (!usb_hcd_is_primary_hcd(hcd)) {
+		mutex_unlock(&xhci->mutex);
+		return;
+	}
 
 	xhci_cleanup_msix(xhci);
 
@@ -4886,7 +4889,7 @@ int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks)
 		xhci->hcc_params2 = readl(&xhci->cap_regs->hcc_params2);
 	xhci_print_registers(xhci);
 
-	xhci->quirks = quirks;
+	xhci->quirks |= quirks;
 
 	get_quirks(dev, xhci);
 
diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c
index 39fd95833eb8..f824336def5c 100644
--- a/drivers/usb/musb/musb_core.c
+++ b/drivers/usb/musb/musb_core.c
@@ -1090,29 +1090,6 @@ void musb_stop(struct musb *musb)
 	musb_platform_try_idle(musb, 0);
 }
 
-static void musb_shutdown(struct platform_device *pdev)
-{
-	struct musb	*musb = dev_to_musb(&pdev->dev);
-	unsigned long	flags;
-
-	pm_runtime_get_sync(musb->controller);
-
-	musb_host_cleanup(musb);
-	musb_gadget_cleanup(musb);
-
-	spin_lock_irqsave(&musb->lock, flags);
-	musb_platform_disable(musb);
-	musb_generic_disable(musb);
-	spin_unlock_irqrestore(&musb->lock, flags);
-
-	musb_writeb(musb->mregs, MUSB_DEVCTL, 0);
-	musb_platform_exit(musb);
-
-	pm_runtime_put(musb->controller);
-	/* FIXME power down */
-}
-
-
 /*-------------------------------------------------------------------------*/
 
 /*
@@ -1702,7 +1679,7 @@ EXPORT_SYMBOL_GPL(musb_dma_completion);
 #define use_dma			0
 #endif
 
-static void (*musb_phy_callback)(enum musb_vbus_id_status status);
+static int (*musb_phy_callback)(enum musb_vbus_id_status status);
 
 /*
  * musb_mailbox - optional phy notifier function
@@ -1711,11 +1688,12 @@ static void (*musb_phy_callback)(enum musb_vbus_id_status status);
  * Optionally gets called from the USB PHY. Note that the USB PHY must be
  * disabled at the point the phy_callback is registered or unregistered.
  */
-void musb_mailbox(enum musb_vbus_id_status status)
+int musb_mailbox(enum musb_vbus_id_status status)
 {
 	if (musb_phy_callback)
-		musb_phy_callback(status);
+		return musb_phy_callback(status);
 
+	return -ENODEV;
 };
 EXPORT_SYMBOL_GPL(musb_mailbox);
 
@@ -2028,11 +2006,6 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl)
 	musb_readl = musb_default_readl;
 	musb_writel = musb_default_writel;
 
-	/* We need musb_read/write functions initialized for PM */
-	pm_runtime_use_autosuspend(musb->controller);
-	pm_runtime_set_autosuspend_delay(musb->controller, 200);
-	pm_runtime_enable(musb->controller);
-
 	/* The musb_platform_init() call:
 	 *   - adjusts musb->mregs
 	 *   - sets the musb->isr
@@ -2134,6 +2107,16 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl)
 	if (musb->ops->phy_callback)
 		musb_phy_callback = musb->ops->phy_callback;
 
+	/*
+	 * We need musb_read/write functions initialized for PM.
+	 * Note that at least 2430 glue needs autosuspend delay
+	 * somewhere above 300 ms for the hardware to idle properly
+	 * after disconnecting the cable in host mode. Let's use
+	 * 500 ms for some margin.
+	 */
+	pm_runtime_use_autosuspend(musb->controller);
+	pm_runtime_set_autosuspend_delay(musb->controller, 500);
+	pm_runtime_enable(musb->controller);
 	pm_runtime_get_sync(musb->controller);
 
 	status = usb_phy_init(musb->xceiv);
@@ -2237,13 +2220,8 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl)
 	if (status)
 		goto fail5;
 
-	pm_runtime_put(musb->controller);
-
-	/*
-	 * For why this is currently needed, see commit 3e43a0725637
-	 * ("usb: musb: core: add pm_runtime_irq_safe()")
-	 */
-	pm_runtime_irq_safe(musb->controller);
+	pm_runtime_mark_last_busy(musb->controller);
+	pm_runtime_put_autosuspend(musb->controller);
 
 	return 0;
 
@@ -2265,7 +2243,9 @@ fail2_5:
 	usb_phy_shutdown(musb->xceiv);
 
 err_usb_phy_init:
+	pm_runtime_dont_use_autosuspend(musb->controller);
 	pm_runtime_put_sync(musb->controller);
+	pm_runtime_disable(musb->controller);
 
 fail2:
 	if (musb->irq_wake)
@@ -2273,7 +2253,6 @@ fail2:
 	musb_platform_exit(musb);
 
 fail1:
-	pm_runtime_disable(musb->controller);
 	dev_err(musb->controller,
 		"musb_init_controller failed with status %d\n", status);
 
@@ -2312,6 +2291,7 @@ static int musb_remove(struct platform_device *pdev)
 {
 	struct device	*dev = &pdev->dev;
 	struct musb	*musb = dev_to_musb(dev);
+	unsigned long	flags;
 
 	/* this gets called on rmmod.
 	 *  - Host mode: host may still be active
@@ -2319,17 +2299,26 @@ static int musb_remove(struct platform_device *pdev)
 	 *  - OTG mode: both roles are deactivated (or never-activated)
 	 */
 	musb_exit_debugfs(musb);
-	musb_shutdown(pdev);
-	musb_phy_callback = NULL;
-
-	if (musb->dma_controller)
-		musb_dma_controller_destroy(musb->dma_controller);
-
-	usb_phy_shutdown(musb->xceiv);
 
 	cancel_work_sync(&musb->irq_work);
 	cancel_delayed_work_sync(&musb->finish_resume_work);
 	cancel_delayed_work_sync(&musb->deassert_reset_work);
+	pm_runtime_get_sync(musb->controller);
+	musb_host_cleanup(musb);
+	musb_gadget_cleanup(musb);
+	spin_lock_irqsave(&musb->lock, flags);
+	musb_platform_disable(musb);
+	musb_generic_disable(musb);
+	spin_unlock_irqrestore(&musb->lock, flags);
+	musb_writeb(musb->mregs, MUSB_DEVCTL, 0);
+	pm_runtime_dont_use_autosuspend(musb->controller);
+	pm_runtime_put_sync(musb->controller);
+	pm_runtime_disable(musb->controller);
+	musb_platform_exit(musb);
+	musb_phy_callback = NULL;
+	if (musb->dma_controller)
+		musb_dma_controller_destroy(musb->dma_controller);
+	usb_phy_shutdown(musb->xceiv);
 	musb_free(musb);
 	device_init_wakeup(dev, 0);
 	return 0;
@@ -2429,7 +2418,8 @@ static void musb_restore_context(struct musb *musb)
 	musb_writew(musb_base, MUSB_INTRTXE, musb->intrtxe);
 	musb_writew(musb_base, MUSB_INTRRXE, musb->intrrxe);
 	musb_writeb(musb_base, MUSB_INTRUSBE, musb->context.intrusbe);
-	musb_writeb(musb_base, MUSB_DEVCTL, musb->context.devctl);
+	if (musb->context.devctl & MUSB_DEVCTL_SESSION)
+		musb_writeb(musb_base, MUSB_DEVCTL, musb->context.devctl);
 
 	for (i = 0; i < musb->config->num_eps; ++i) {
 		struct musb_hw_ep	*hw_ep;
@@ -2612,7 +2602,6 @@ static struct platform_driver musb_driver = {
 	},
 	.probe		= musb_probe,
 	.remove		= musb_remove,
-	.shutdown	= musb_shutdown,
 };
 
 module_platform_driver(musb_driver);
diff --git a/drivers/usb/musb/musb_core.h b/drivers/usb/musb/musb_core.h
index b6afe9e43305..b55a776b03eb 100644
--- a/drivers/usb/musb/musb_core.h
+++ b/drivers/usb/musb/musb_core.h
@@ -215,7 +215,7 @@ struct musb_platform_ops {
 				dma_addr_t *dma_addr, u32 *len);
 	void	(*pre_root_reset_end)(struct musb *musb);
 	void	(*post_root_reset_end)(struct musb *musb);
-	void	(*phy_callback)(enum musb_vbus_id_status status);
+	int	(*phy_callback)(enum musb_vbus_id_status status);
 };
 
 /*
@@ -312,6 +312,7 @@ struct musb {
 	struct work_struct	irq_work;
 	struct delayed_work	deassert_reset_work;
 	struct delayed_work	finish_resume_work;
+	struct delayed_work	gadget_work;
 	u16			hwvers;
 
 	u16			intrrxe;
diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c
index 152865b36522..af2a3a7addf9 100644
--- a/drivers/usb/musb/musb_gadget.c
+++ b/drivers/usb/musb/musb_gadget.c
@@ -1656,6 +1656,20 @@ static int musb_gadget_vbus_draw(struct usb_gadget *gadget, unsigned mA)
 	return usb_phy_set_power(musb->xceiv, mA);
 }
 
+static void musb_gadget_work(struct work_struct *work)
+{
+	struct musb *musb;
+	unsigned long flags;
+
+	musb = container_of(work, struct musb, gadget_work.work);
+	pm_runtime_get_sync(musb->controller);
+	spin_lock_irqsave(&musb->lock, flags);
+	musb_pullup(musb, musb->softconnect);
+	spin_unlock_irqrestore(&musb->lock, flags);
+	pm_runtime_mark_last_busy(musb->controller);
+	pm_runtime_put_autosuspend(musb->controller);
+}
+
 static int musb_gadget_pullup(struct usb_gadget *gadget, int is_on)
 {
 	struct musb	*musb = gadget_to_musb(gadget);
@@ -1663,20 +1677,16 @@ static int musb_gadget_pullup(struct usb_gadget *gadget, int is_on)
 
 	is_on = !!is_on;
 
-	pm_runtime_get_sync(musb->controller);
-
 	/* NOTE: this assumes we are sensing vbus; we'd rather
 	 * not pullup unless the B-session is active.
 	 */
 	spin_lock_irqsave(&musb->lock, flags);
 	if (is_on != musb->softconnect) {
 		musb->softconnect = is_on;
-		musb_pullup(musb, is_on);
+		schedule_delayed_work(&musb->gadget_work, 0);
 	}
 	spin_unlock_irqrestore(&musb->lock, flags);
 
-	pm_runtime_put(musb->controller);
-
 	return 0;
 }
 
@@ -1845,7 +1855,7 @@ int musb_gadget_setup(struct musb *musb)
 #elif IS_ENABLED(CONFIG_USB_MUSB_GADGET)
 	musb->g.is_otg = 0;
 #endif
-
+	INIT_DELAYED_WORK(&musb->gadget_work, musb_gadget_work);
 	musb_g_init_endpoints(musb);
 
 	musb->is_active = 0;
@@ -1866,6 +1876,8 @@ void musb_gadget_cleanup(struct musb *musb)
 {
 	if (musb->port_mode == MUSB_PORT_MODE_HOST)
 		return;
+
+	cancel_delayed_work_sync(&musb->gadget_work);
 	usb_del_gadget_udc(&musb->g);
 }
 
@@ -1914,8 +1926,8 @@ static int musb_gadget_start(struct usb_gadget *g,
 	if (musb->xceiv->last_event == USB_EVENT_ID)
 		musb_platform_set_vbus(musb, 1);
 
-	if (musb->xceiv->last_event == USB_EVENT_NONE)
-		pm_runtime_put(musb->controller);
+	pm_runtime_mark_last_busy(musb->controller);
+	pm_runtime_put_autosuspend(musb->controller);
 
 	return 0;
 
@@ -1934,8 +1946,7 @@ static int musb_gadget_stop(struct usb_gadget *g)
 	struct musb	*musb = gadget_to_musb(g);
 	unsigned long	flags;
 
-	if (musb->xceiv->last_event == USB_EVENT_NONE)
-		pm_runtime_get_sync(musb->controller);
+	pm_runtime_get_sync(musb->controller);
 
 	/*
 	 * REVISIT always use otg_set_peripheral() here too;
@@ -1963,7 +1974,8 @@ static int musb_gadget_stop(struct usb_gadget *g)
 	 * that currently misbehaves.
 	 */
 
-	pm_runtime_put(musb->controller);
+	pm_runtime_mark_last_busy(musb->controller);
+	pm_runtime_put_autosuspend(musb->controller);
 
 	return 0;
 }
diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c
index 2f8ad7f1f482..d227a71d85e1 100644
--- a/drivers/usb/musb/musb_host.c
+++ b/drivers/usb/musb/musb_host.c
@@ -434,7 +434,13 @@ static void musb_advance_schedule(struct musb *musb, struct urb *urb,
 		}
 	}
 
-	if (qh != NULL && qh->is_ready) {
+	/*
+	 * The pipe must be broken if current urb->status is set, so don't
+	 * start next urb.
+	 * TODO: to minimize the risk of regression, only check urb->status
+	 * for RX, until we have a test case to understand the behavior of TX.
+	 */
+	if ((!status || !is_in) && qh && qh->is_ready) {
 		dev_dbg(musb->controller, "... next ep%d %cX urb %p\n",
 		    hw_ep->epnum, is_in ? 'R' : 'T', next_urb(qh));
 		musb_start_urb(musb, is_in, qh);
@@ -594,14 +600,13 @@ musb_rx_reinit(struct musb *musb, struct musb_qh *qh, u8 epnum)
 		musb_writew(ep->regs, MUSB_TXCSR, 0);
 
 	/* scrub all previous state, clearing toggle */
-	} else {
-		csr = musb_readw(ep->regs, MUSB_RXCSR);
-		if (csr & MUSB_RXCSR_RXPKTRDY)
-			WARNING("rx%d, packet/%d ready?\n", ep->epnum,
-				musb_readw(ep->regs, MUSB_RXCOUNT));
-
-		musb_h_flush_rxfifo(ep, MUSB_RXCSR_CLRDATATOG);
 	}
+	csr = musb_readw(ep->regs, MUSB_RXCSR);
+	if (csr & MUSB_RXCSR_RXPKTRDY)
+		WARNING("rx%d, packet/%d ready?\n", ep->epnum,
+			musb_readw(ep->regs, MUSB_RXCOUNT));
+
+	musb_h_flush_rxfifo(ep, MUSB_RXCSR_CLRDATATOG);
 
 	/* target addr and (for multipoint) hub addr/port */
 	if (musb->is_multipoint) {
@@ -627,7 +632,7 @@ musb_rx_reinit(struct musb *musb, struct musb_qh *qh, u8 epnum)
 	ep->rx_reinit = 0;
 }
 
-static int musb_tx_dma_set_mode_mentor(struct dma_controller *dma,
+static void musb_tx_dma_set_mode_mentor(struct dma_controller *dma,
 		struct musb_hw_ep *hw_ep, struct musb_qh *qh,
 		struct urb *urb, u32 offset,
 		u32 *length, u8 *mode)
@@ -664,23 +669,18 @@ static int musb_tx_dma_set_mode_mentor(struct dma_controller *dma,
 	}
 	channel->desired_mode = *mode;
 	musb_writew(epio, MUSB_TXCSR, csr);
-
-	return 0;
 }
 
-static int musb_tx_dma_set_mode_cppi_tusb(struct dma_controller *dma,
-					  struct musb_hw_ep *hw_ep,
-					  struct musb_qh *qh,
-					  struct urb *urb,
-					  u32 offset,
-					  u32 *length,
-					  u8 *mode)
+static void musb_tx_dma_set_mode_cppi_tusb(struct dma_controller *dma,
+					   struct musb_hw_ep *hw_ep,
+					   struct musb_qh *qh,
+					   struct urb *urb,
+					   u32 offset,
+					   u32 *length,
+					   u8 *mode)
 {
 	struct dma_channel *channel = hw_ep->tx_channel;
 
-	if (!is_cppi_enabled(hw_ep->musb) && !tusb_dma_omap(hw_ep->musb))
-		return -ENODEV;
-
 	channel->actual_len = 0;
 
 	/*
@@ -688,8 +688,6 @@ static int musb_tx_dma_set_mode_cppi_tusb(struct dma_controller *dma,
 	 * to identify the zero-length-final-packet case.
 	 */
 	*mode = (urb->transfer_flags & URB_ZERO_PACKET) ? 1 : 0;
-
-	return 0;
 }
 
 static bool musb_tx_dma_program(struct dma_controller *dma,
@@ -699,15 +697,14 @@ static bool musb_tx_dma_program(struct dma_controller *dma,
 	struct dma_channel	*channel = hw_ep->tx_channel;
 	u16			pkt_size = qh->maxpacket;
 	u8			mode;
-	int			res;
 
 	if (musb_dma_inventra(hw_ep->musb) || musb_dma_ux500(hw_ep->musb))
-		res = musb_tx_dma_set_mode_mentor(dma, hw_ep, qh, urb,
-						 offset, &length, &mode);
+		musb_tx_dma_set_mode_mentor(dma, hw_ep, qh, urb, offset,
+					    &length, &mode);
+	else if (is_cppi_enabled(hw_ep->musb) || tusb_dma_omap(hw_ep->musb))
+		musb_tx_dma_set_mode_cppi_tusb(dma, hw_ep, qh, urb, offset,
+					       &length, &mode);
 	else
-		res = musb_tx_dma_set_mode_cppi_tusb(dma, hw_ep, qh, urb,
-						     offset, &length, &mode);
-	if (res)
 		return false;
 
 	qh->segsize = length;
@@ -995,9 +992,15 @@ static void musb_bulk_nak_timeout(struct musb *musb, struct musb_hw_ep *ep,
 	if (is_in) {
 		dma = is_dma_capable() ? ep->rx_channel : NULL;
 
-		/* clear nak timeout bit */
+		/*
+		 * Need to stop the transaction by clearing REQPKT first
+		 * then the NAK Timeout bit ref MUSBMHDRC USB 2.0 HIGH-SPEED
+		 * DUAL-ROLE CONTROLLER Programmer's Guide, section 9.2.2
+		 */
 		rx_csr = musb_readw(epio, MUSB_RXCSR);
 		rx_csr |= MUSB_RXCSR_H_WZC_BITS;
+		rx_csr &= ~MUSB_RXCSR_H_REQPKT;
+		musb_writew(epio, MUSB_RXCSR, rx_csr);
 		rx_csr &= ~MUSB_RXCSR_DATAERROR;
 		musb_writew(epio, MUSB_RXCSR, rx_csr);
 
@@ -1551,7 +1554,7 @@ static int musb_rx_dma_iso_cppi41(struct dma_controller *dma,
 				  struct urb *urb,
 				  size_t len)
 {
-	struct dma_channel *channel = hw_ep->tx_channel;
+	struct dma_channel *channel = hw_ep->rx_channel;
 	void __iomem *epio = hw_ep->regs;
 	dma_addr_t *buf;
 	u32 length, res;
@@ -1870,6 +1873,9 @@ void musb_host_rx(struct musb *musb, u8 epnum)
 		status = -EPROTO;
 		musb_writeb(epio, MUSB_RXINTERVAL, 0);
 
+		rx_csr &= ~MUSB_RXCSR_H_ERROR;
+		musb_writew(epio, MUSB_RXCSR, rx_csr);
+
 	} else if (rx_csr & MUSB_RXCSR_DATAERROR) {
 
 		if (USB_ENDPOINT_XFER_ISOC != qh->type) {
diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c
index c84e0322c108..0b4cec940386 100644
--- a/drivers/usb/musb/omap2430.c
+++ b/drivers/usb/musb/omap2430.c
@@ -49,97 +49,14 @@ struct omap2430_glue {
 	enum musb_vbus_id_status status;
 	struct work_struct	omap_musb_mailbox_work;
 	struct device		*control_otghs;
+	bool			cable_connected;
+	bool			enabled;
+	bool			powered;
 };
 #define glue_to_musb(g)		platform_get_drvdata(g->musb)
 
 static struct omap2430_glue	*_glue;
 
-static struct timer_list musb_idle_timer;
-
-static void musb_do_idle(unsigned long _musb)
-{
-	struct musb	*musb = (void *)_musb;
-	unsigned long	flags;
-	u8	power;
-	u8	devctl;
-
-	spin_lock_irqsave(&musb->lock, flags);
-
-	switch (musb->xceiv->otg->state) {
-	case OTG_STATE_A_WAIT_BCON:
-
-		devctl = musb_readb(musb->mregs, MUSB_DEVCTL);
-		if (devctl & MUSB_DEVCTL_BDEVICE) {
-			musb->xceiv->otg->state = OTG_STATE_B_IDLE;
-			MUSB_DEV_MODE(musb);
-		} else {
-			musb->xceiv->otg->state = OTG_STATE_A_IDLE;
-			MUSB_HST_MODE(musb);
-		}
-		break;
-	case OTG_STATE_A_SUSPEND:
-		/* finish RESUME signaling? */
-		if (musb->port1_status & MUSB_PORT_STAT_RESUME) {
-			power = musb_readb(musb->mregs, MUSB_POWER);
-			power &= ~MUSB_POWER_RESUME;
-			dev_dbg(musb->controller, "root port resume stopped, power %02x\n", power);
-			musb_writeb(musb->mregs, MUSB_POWER, power);
-			musb->is_active = 1;
-			musb->port1_status &= ~(USB_PORT_STAT_SUSPEND
-						| MUSB_PORT_STAT_RESUME);
-			musb->port1_status |= USB_PORT_STAT_C_SUSPEND << 16;
-			usb_hcd_poll_rh_status(musb->hcd);
-			/* NOTE: it might really be A_WAIT_BCON ... */
-			musb->xceiv->otg->state = OTG_STATE_A_HOST;
-		}
-		break;
-	case OTG_STATE_A_HOST:
-		devctl = musb_readb(musb->mregs, MUSB_DEVCTL);
-		if (devctl &  MUSB_DEVCTL_BDEVICE)
-			musb->xceiv->otg->state = OTG_STATE_B_IDLE;
-		else
-			musb->xceiv->otg->state = OTG_STATE_A_WAIT_BCON;
-	default:
-		break;
-	}
-	spin_unlock_irqrestore(&musb->lock, flags);
-}
-
-
-static void omap2430_musb_try_idle(struct musb *musb, unsigned long timeout)
-{
-	unsigned long		default_timeout = jiffies + msecs_to_jiffies(3);
-	static unsigned long	last_timer;
-
-	if (timeout == 0)
-		timeout = default_timeout;
-
-	/* Never idle if active, or when VBUS timeout is not set as host */
-	if (musb->is_active || ((musb->a_wait_bcon == 0)
-			&& (musb->xceiv->otg->state == OTG_STATE_A_WAIT_BCON))) {
-		dev_dbg(musb->controller, "%s active, deleting timer\n",
-			usb_otg_state_string(musb->xceiv->otg->state));
-		del_timer(&musb_idle_timer);
-		last_timer = jiffies;
-		return;
-	}
-
-	if (time_after(last_timer, timeout)) {
-		if (!timer_pending(&musb_idle_timer))
-			last_timer = timeout;
-		else {
-			dev_dbg(musb->controller, "Longer idle timer already pending, ignoring\n");
-			return;
-		}
-	}
-	last_timer = timeout;
-
-	dev_dbg(musb->controller, "%s inactive, for idle timer for %lu ms\n",
-		usb_otg_state_string(musb->xceiv->otg->state),
-		(unsigned long)jiffies_to_msecs(timeout - jiffies));
-	mod_timer(&musb_idle_timer, timeout);
-}
-
 static void omap2430_musb_set_vbus(struct musb *musb, int is_on)
 {
 	struct usb_otg	*otg = musb->xceiv->otg;
@@ -205,16 +122,6 @@ static void omap2430_musb_set_vbus(struct musb *musb, int is_on)
 		musb_readb(musb->mregs, MUSB_DEVCTL));
 }
 
-static int omap2430_musb_set_mode(struct musb *musb, u8 musb_mode)
-{
-	u8	devctl = musb_readb(musb->mregs, MUSB_DEVCTL);
-
-	devctl |= MUSB_DEVCTL_SESSION;
-	musb_writeb(musb->mregs, MUSB_DEVCTL, devctl);
-
-	return 0;
-}
-
 static inline void omap2430_low_level_exit(struct musb *musb)
 {
 	u32 l;
@@ -234,22 +141,63 @@ static inline void omap2430_low_level_init(struct musb *musb)
 	musb_writel(musb->mregs, OTG_FORCESTDBY, l);
 }
 
-static void omap2430_musb_mailbox(enum musb_vbus_id_status status)
+/*
+ * We can get multiple cable events so we need to keep track
+ * of the power state. Only keep power enabled if USB cable is
+ * connected and a gadget is started.
+ */
+static void omap2430_set_power(struct musb *musb, bool enabled, bool cable)
+{
+	struct device *dev = musb->controller;
+	struct omap2430_glue *glue = dev_get_drvdata(dev->parent);
+	bool power_up;
+	int res;
+
+	if (glue->enabled != enabled)
+		glue->enabled = enabled;
+
+	if (glue->cable_connected != cable)
+		glue->cable_connected = cable;
+
+	power_up = glue->enabled && glue->cable_connected;
+	if (power_up == glue->powered) {
+		dev_warn(musb->controller, "power state already %i\n",
+			 power_up);
+		return;
+	}
+
+	glue->powered = power_up;
+
+	if (power_up) {
+		res = pm_runtime_get_sync(musb->controller);
+		if (res < 0) {
+			dev_err(musb->controller, "could not enable: %i", res);
+			glue->powered = false;
+		}
+	} else {
+		pm_runtime_mark_last_busy(musb->controller);
+		pm_runtime_put_autosuspend(musb->controller);
+	}
+}
+
+static int omap2430_musb_mailbox(enum musb_vbus_id_status status)
 {
 	struct omap2430_glue	*glue = _glue;
 
 	if (!glue) {
 		pr_err("%s: musb core is not yet initialized\n", __func__);
-		return;
+		return -EPROBE_DEFER;
 	}
 	glue->status = status;
 
 	if (!glue_to_musb(glue)) {
 		pr_err("%s: musb core is not yet ready\n", __func__);
-		return;
+		return -EPROBE_DEFER;
 	}
 
 	schedule_work(&glue->omap_musb_mailbox_work);
+
+	return 0;
 }
 
 static void omap_musb_set_mailbox(struct omap2430_glue *glue)
@@ -259,6 +207,13 @@ static void omap_musb_set_mailbox(struct omap2430_glue *glue)
 	struct musb_hdrc_platform_data *pdata = dev_get_platdata(dev);
 	struct omap_musb_board_data *data = pdata->board_data;
 	struct usb_otg *otg = musb->xceiv->otg;
+	bool cable_connected;
+
+	cable_connected = ((glue->status == MUSB_ID_GROUND) ||
+			   (glue->status == MUSB_VBUS_VALID));
+
+	if (cable_connected)
+		omap2430_set_power(musb, glue->enabled, cable_connected);
 
 	switch (glue->status) {
 	case MUSB_ID_GROUND:
@@ -268,7 +223,6 @@ static void omap_musb_set_mailbox(struct omap2430_glue *glue)
 		musb->xceiv->otg->state = OTG_STATE_A_IDLE;
 		musb->xceiv->last_event = USB_EVENT_ID;
 		if (musb->gadget_driver) {
-			pm_runtime_get_sync(dev);
 			omap_control_usb_set_mode(glue->control_otghs,
 				USB_MODE_HOST);
 			omap2430_musb_set_vbus(musb, 1);
@@ -281,8 +235,6 @@ static void omap_musb_set_mailbox(struct omap2430_glue *glue)
 		otg->default_a = false;
 		musb->xceiv->otg->state = OTG_STATE_B_IDLE;
 		musb->xceiv->last_event = USB_EVENT_VBUS;
-		if (musb->gadget_driver)
-			pm_runtime_get_sync(dev);
 		omap_control_usb_set_mode(glue->control_otghs, USB_MODE_DEVICE);
 		break;
 
@@ -291,11 +243,8 @@ static void omap_musb_set_mailbox(struct omap2430_glue *glue)
 		dev_dbg(dev, "VBUS Disconnect\n");
 
 		musb->xceiv->last_event = USB_EVENT_NONE;
-		if (musb->gadget_driver) {
+		if (musb->gadget_driver)
 			omap2430_musb_set_vbus(musb, 0);
-			pm_runtime_mark_last_busy(dev);
-			pm_runtime_put_autosuspend(dev);
-		}
 
 		if (data->interface_type == MUSB_INTERFACE_UTMI)
 			otg_set_vbus(musb->xceiv->otg, 0);
@@ -307,6 +256,9 @@ static void omap_musb_set_mailbox(struct omap2430_glue *glue)
 		dev_dbg(dev, "ID float\n");
 	}
 
+	if (!cable_connected)
+		omap2430_set_power(musb, glue->enabled, cable_connected);
+
 	atomic_notifier_call_chain(&musb->xceiv->notifier,
 			musb->xceiv->last_event, NULL);
 }
@@ -316,13 +268,8 @@ static void omap_musb_mailbox_work(struct work_struct *mailbox_work)
 {
 	struct omap2430_glue *glue = container_of(mailbox_work,
 				struct omap2430_glue, omap_musb_mailbox_work);
-	struct musb *musb = glue_to_musb(glue);
-	struct device *dev = musb->controller;
 
-	pm_runtime_get_sync(dev);
 	omap_musb_set_mailbox(glue);
-	pm_runtime_mark_last_busy(dev);
-	pm_runtime_put_autosuspend(dev);
 }
 
 static irqreturn_t omap2430_musb_interrupt(int irq, void *__hci)
@@ -389,23 +336,7 @@ static int omap2430_musb_init(struct musb *musb)
 		return PTR_ERR(musb->phy);
 	}
 	musb->isr = omap2430_musb_interrupt;
-
-	/*
-	 * Enable runtime PM for musb parent (this driver). We can't
-	 * do it earlier as struct musb is not yet allocated and we
-	 * need to touch the musb registers for runtime PM.
-	 */
-	pm_runtime_enable(glue->dev);
-	status = pm_runtime_get_sync(glue->dev);
-	if (status < 0)
-		goto err1;
-
-	status = pm_runtime_get_sync(dev);
-	if (status < 0) {
-		dev_err(dev, "pm_runtime_get_sync FAILED %d\n", status);
-		pm_runtime_put_sync(glue->dev);
-		goto err1;
-	}
+	phy_init(musb->phy);
 
 	l = musb_readl(musb->mregs, OTG_INTERFSEL);
 
@@ -427,20 +358,10 @@ static int omap2430_musb_init(struct musb *musb)
 			musb_readl(musb->mregs, OTG_INTERFSEL),
 			musb_readl(musb->mregs, OTG_SIMENABLE));
 
-	setup_timer(&musb_idle_timer, musb_do_idle, (unsigned long) musb);
-
 	if (glue->status != MUSB_UNKNOWN)
 		omap_musb_set_mailbox(glue);
 
-	phy_init(musb->phy);
-	phy_power_on(musb->phy);
-
-	pm_runtime_put_noidle(musb->controller);
-	pm_runtime_put_noidle(glue->dev);
 	return 0;
-
-err1:
-	return status;
 }
 
 static void omap2430_musb_enable(struct musb *musb)
@@ -452,6 +373,11 @@ static void omap2430_musb_enable(struct musb *musb)
 	struct musb_hdrc_platform_data *pdata = dev_get_platdata(dev);
 	struct omap_musb_board_data *data = pdata->board_data;
 
+	if (!WARN_ON(!musb->phy))
+		phy_power_on(musb->phy);
+
+	omap2430_set_power(musb, true, glue->cable_connected);
+
 	switch (glue->status) {
 
 	case MUSB_ID_GROUND:
@@ -487,18 +413,25 @@ static void omap2430_musb_disable(struct musb *musb)
 	struct device *dev = musb->controller;
 	struct omap2430_glue *glue = dev_get_drvdata(dev->parent);
 
+	if (!WARN_ON(!musb->phy))
+		phy_power_off(musb->phy);
+
 	if (glue->status != MUSB_UNKNOWN)
 		omap_control_usb_set_mode(glue->control_otghs,
 			USB_MODE_DISCONNECT);
+
+	omap2430_set_power(musb, false, glue->cable_connected);
 }
 
 static int omap2430_musb_exit(struct musb *musb)
 {
-	del_timer_sync(&musb_idle_timer);
+	struct device *dev = musb->controller;
+	struct omap2430_glue *glue = dev_get_drvdata(dev->parent);
 
 	omap2430_low_level_exit(musb);
-	phy_power_off(musb->phy);
 	phy_exit(musb->phy);
+	musb->phy = NULL;
+	cancel_work_sync(&glue->omap_musb_mailbox_work);
 
 	return 0;
 }
@@ -512,9 +445,6 @@ static const struct musb_platform_ops omap2430_ops = {
 	.init		= omap2430_musb_init,
 	.exit		= omap2430_musb_exit,
 
-	.set_mode	= omap2430_musb_set_mode,
-	.try_idle	= omap2430_musb_try_idle,
-
 	.set_vbus	= omap2430_musb_set_vbus,
 
 	.enable		= omap2430_musb_enable,
@@ -639,11 +569,9 @@ static int omap2430_probe(struct platform_device *pdev)
 		goto err2;
 	}
 
-	/*
-	 * Note that we cannot enable PM runtime yet for this
-	 * driver as we need struct musb initialized first.
-	 * See omap2430_musb_init above.
-	 */
+	pm_runtime_enable(glue->dev);
+	pm_runtime_use_autosuspend(glue->dev);
+	pm_runtime_set_autosuspend_delay(glue->dev, 500);
 
 	ret = platform_device_add(musb);
 	if (ret) {
@@ -662,12 +590,14 @@ err0:
 
 static int omap2430_remove(struct platform_device *pdev)
 {
-	struct omap2430_glue		*glue = platform_get_drvdata(pdev);
+	struct omap2430_glue *glue = platform_get_drvdata(pdev);
+	struct musb *musb = glue_to_musb(glue);
 
 	pm_runtime_get_sync(glue->dev);
-	cancel_work_sync(&glue->omap_musb_mailbox_work);
 	platform_device_unregister(glue->musb);
+	omap2430_set_power(musb, false, false);
 	pm_runtime_put_sync(glue->dev);
+	pm_runtime_dont_use_autosuspend(glue->dev);
 	pm_runtime_disable(glue->dev);
 
 	return 0;
@@ -680,12 +610,13 @@ static int omap2430_runtime_suspend(struct device *dev)
 	struct omap2430_glue		*glue = dev_get_drvdata(dev);
 	struct musb			*musb = glue_to_musb(glue);
 
-	if (musb) {
-		musb->context.otg_interfsel = musb_readl(musb->mregs,
-				OTG_INTERFSEL);
+	if (!musb)
+		return 0;
 
-		omap2430_low_level_exit(musb);
-	}
+	musb->context.otg_interfsel = musb_readl(musb->mregs,
+						 OTG_INTERFSEL);
+
+	omap2430_low_level_exit(musb);
 
 	return 0;
 }
@@ -696,7 +627,7 @@ static int omap2430_runtime_resume(struct device *dev)
 	struct musb			*musb = glue_to_musb(glue);
 
 	if (!musb)
-		return -EPROBE_DEFER;
+		return 0;
 
 	omap2430_low_level_init(musb);
 	musb_writel(musb->mregs, OTG_INTERFSEL,
@@ -738,18 +669,8 @@ static struct platform_driver omap2430_driver = {
 	},
 };
 
+module_platform_driver(omap2430_driver);
+
 MODULE_DESCRIPTION("OMAP2PLUS MUSB Glue Layer");
 MODULE_AUTHOR("Felipe Balbi <balbi@ti.com>");
 MODULE_LICENSE("GPL v2");
-
-static int __init omap2430_init(void)
-{
-	return platform_driver_register(&omap2430_driver);
-}
-subsys_initcall(omap2430_init);
-
-static void __exit omap2430_exit(void)
-{
-	platform_driver_unregister(&omap2430_driver);
-}
-module_exit(omap2430_exit);
diff --git a/drivers/usb/musb/sunxi.c b/drivers/usb/musb/sunxi.c
index fdab4232cfbf..76500515dd8b 100644
--- a/drivers/usb/musb/sunxi.c
+++ b/drivers/usb/musb/sunxi.c
@@ -80,7 +80,8 @@ static struct musb *sunxi_musb;
 
 struct sunxi_glue {
 	struct device		*dev;
-	struct platform_device	*musb;
+	struct musb		*musb;
+	struct platform_device	*musb_pdev;
 	struct clk		*clk;
 	struct reset_control	*rst;
 	struct phy		*phy;
@@ -102,7 +103,7 @@ static void sunxi_musb_work(struct work_struct *work)
 		return;
 
 	if (test_and_clear_bit(SUNXI_MUSB_FL_HOSTMODE_PEND, &glue->flags)) {
-		struct musb *musb = platform_get_drvdata(glue->musb);
+		struct musb *musb = glue->musb;
 		unsigned long flags;
 		u8 devctl;
 
@@ -112,7 +113,7 @@ static void sunxi_musb_work(struct work_struct *work)
 		if (test_bit(SUNXI_MUSB_FL_HOSTMODE, &glue->flags)) {
 			set_bit(SUNXI_MUSB_FL_VBUS_ON, &glue->flags);
 			musb->xceiv->otg->default_a = 1;
-			musb->xceiv->otg->state = OTG_STATE_A_IDLE;
+			musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE;
 			MUSB_HST_MODE(musb);
 			devctl |= MUSB_DEVCTL_SESSION;
 		} else {
@@ -145,10 +146,12 @@ static void sunxi_musb_set_vbus(struct musb *musb, int is_on)
 {
 	struct sunxi_glue *glue = dev_get_drvdata(musb->controller->parent);
 
-	if (is_on)
+	if (is_on) {
 		set_bit(SUNXI_MUSB_FL_VBUS_ON, &glue->flags);
-	else
+		musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE;
+	} else {
 		clear_bit(SUNXI_MUSB_FL_VBUS_ON, &glue->flags);
+	}
 
 	schedule_work(&glue->work);
 }
@@ -264,15 +267,6 @@ static int sunxi_musb_init(struct musb *musb)
 	if (ret)
 		goto error_unregister_notifier;
 
-	if (musb->port_mode == MUSB_PORT_MODE_HOST) {
-		ret = phy_power_on(glue->phy);
-		if (ret)
-			goto error_phy_exit;
-		set_bit(SUNXI_MUSB_FL_PHY_ON, &glue->flags);
-		/* Stop musb work from turning vbus off again */
-		set_bit(SUNXI_MUSB_FL_VBUS_ON, &glue->flags);
-	}
-
 	musb->isr = sunxi_musb_interrupt;
 
 	/* Stop the musb-core from doing runtime pm (not supported on sunxi) */
@@ -280,8 +274,6 @@ static int sunxi_musb_init(struct musb *musb)
 
 	return 0;
 
-error_phy_exit:
-	phy_exit(glue->phy);
 error_unregister_notifier:
 	if (musb->port_mode == MUSB_PORT_MODE_DUAL_ROLE)
 		extcon_unregister_notifier(glue->extcon, EXTCON_USB_HOST,
@@ -323,10 +315,31 @@ static int sunxi_musb_exit(struct musb *musb)
 	return 0;
 }
 
+static int sunxi_set_mode(struct musb *musb, u8 mode)
+{
+	struct sunxi_glue *glue = dev_get_drvdata(musb->controller->parent);
+	int ret;
+
+	if (mode == MUSB_HOST) {
+		ret = phy_power_on(glue->phy);
+		if (ret)
+			return ret;
+
+		set_bit(SUNXI_MUSB_FL_PHY_ON, &glue->flags);
+		/* Stop musb work from turning vbus off again */
+		set_bit(SUNXI_MUSB_FL_VBUS_ON, &glue->flags);
+		musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE;
+	}
+
+	return 0;
+}
+
 static void sunxi_musb_enable(struct musb *musb)
 {
 	struct sunxi_glue *glue = dev_get_drvdata(musb->controller->parent);
 
+	glue->musb = musb;
+
 	/* musb_core does not call us in a balanced manner */
 	if (test_and_set_bit(SUNXI_MUSB_FL_ENABLED, &glue->flags))
 		return;
@@ -569,6 +582,7 @@ static const struct musb_platform_ops sunxi_musb_ops = {
 	.exit		= sunxi_musb_exit,
 	.enable		= sunxi_musb_enable,
 	.disable	= sunxi_musb_disable,
+	.set_mode	= sunxi_set_mode,
 	.fifo_offset	= sunxi_musb_fifo_offset,
 	.ep_offset	= sunxi_musb_ep_offset,
 	.busctl_offset	= sunxi_musb_busctl_offset,
@@ -721,9 +735,9 @@ static int sunxi_musb_probe(struct platform_device *pdev)
 	pinfo.data	= &pdata;
 	pinfo.size_data = sizeof(pdata);
 
-	glue->musb = platform_device_register_full(&pinfo);
-	if (IS_ERR(glue->musb)) {
-		ret = PTR_ERR(glue->musb);
+	glue->musb_pdev = platform_device_register_full(&pinfo);
+	if (IS_ERR(glue->musb_pdev)) {
+		ret = PTR_ERR(glue->musb_pdev);
 		dev_err(&pdev->dev, "Error registering musb dev: %d\n", ret);
 		goto err_unregister_usb_phy;
 	}
@@ -740,7 +754,7 @@ static int sunxi_musb_remove(struct platform_device *pdev)
 	struct sunxi_glue *glue = platform_get_drvdata(pdev);
 	struct platform_device *usb_phy = glue->usb_phy;
 
-	platform_device_unregister(glue->musb); /* Frees glue ! */
+	platform_device_unregister(glue->musb_pdev);
 	usb_phy_generic_unregister(usb_phy);
 
 	return 0;
diff --git a/drivers/usb/phy/phy-twl6030-usb.c b/drivers/usb/phy/phy-twl6030-usb.c
index 24e2b3cf1867..a72e8d670adc 100644
--- a/drivers/usb/phy/phy-twl6030-usb.c
+++ b/drivers/usb/phy/phy-twl6030-usb.c
@@ -97,6 +97,9 @@ struct twl6030_usb {
 
 	struct regulator		*usb3v3;
 
+	/* used to check initial cable status after probe */
+	struct delayed_work	get_status_work;
+
 	/* used to set vbus, in atomic path */
 	struct work_struct	set_vbus_work;
 
@@ -227,12 +230,16 @@ static irqreturn_t twl6030_usb_irq(int irq, void *_twl)
 			twl->asleep = 1;
 			status = MUSB_VBUS_VALID;
 			twl->linkstat = status;
-			musb_mailbox(status);
+			ret = musb_mailbox(status);
+			if (ret)
+				twl->linkstat = MUSB_UNKNOWN;
 		} else {
 			if (twl->linkstat != MUSB_UNKNOWN) {
 				status = MUSB_VBUS_OFF;
 				twl->linkstat = status;
-				musb_mailbox(status);
+				ret = musb_mailbox(status);
+				if (ret)
+					twl->linkstat = MUSB_UNKNOWN;
 				if (twl->asleep) {
 					regulator_disable(twl->usb3v3);
 					twl->asleep = 0;
@@ -264,7 +271,9 @@ static irqreturn_t twl6030_usbotg_irq(int irq, void *_twl)
 		twl6030_writeb(twl, TWL_MODULE_USB, 0x10, USB_ID_INT_EN_HI_SET);
 		status = MUSB_ID_GROUND;
 		twl->linkstat = status;
-		musb_mailbox(status);
+		ret = musb_mailbox(status);
+		if (ret)
+			twl->linkstat = MUSB_UNKNOWN;
 	} else  {
 		twl6030_writeb(twl, TWL_MODULE_USB, 0x10, USB_ID_INT_EN_HI_CLR);
 		twl6030_writeb(twl, TWL_MODULE_USB, 0x1, USB_ID_INT_EN_HI_SET);
@@ -274,6 +283,15 @@ static irqreturn_t twl6030_usbotg_irq(int irq, void *_twl)
 	return IRQ_HANDLED;
 }
 
+static void twl6030_status_work(struct work_struct *work)
+{
+	struct twl6030_usb *twl = container_of(work, struct twl6030_usb,
+					       get_status_work.work);
+
+	twl6030_usb_irq(twl->irq2, twl);
+	twl6030_usbotg_irq(twl->irq1, twl);
+}
+
 static int twl6030_enable_irq(struct twl6030_usb *twl)
 {
 	twl6030_writeb(twl, TWL_MODULE_USB, 0x1, USB_ID_INT_EN_HI_SET);
@@ -284,8 +302,6 @@ static int twl6030_enable_irq(struct twl6030_usb *twl)
 				REG_INT_MSK_LINE_C);
 	twl6030_interrupt_unmask(TWL6030_CHARGER_CTRL_INT_MASK,
 				REG_INT_MSK_STS_C);
-	twl6030_usb_irq(twl->irq2, twl);
-	twl6030_usbotg_irq(twl->irq1, twl);
 
 	return 0;
 }
@@ -371,6 +387,7 @@ static int twl6030_usb_probe(struct platform_device *pdev)
 		dev_warn(&pdev->dev, "could not create sysfs file\n");
 
 	INIT_WORK(&twl->set_vbus_work, otg_set_vbus_work);
+	INIT_DELAYED_WORK(&twl->get_status_work, twl6030_status_work);
 
 	status = request_threaded_irq(twl->irq1, NULL, twl6030_usbotg_irq,
 			IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING | IRQF_ONESHOT,
@@ -395,6 +412,7 @@ static int twl6030_usb_probe(struct platform_device *pdev)
 
 	twl->asleep = 0;
 	twl6030_enable_irq(twl);
+	schedule_delayed_work(&twl->get_status_work, HZ);
 	dev_info(&pdev->dev, "Initialized TWL6030 USB module\n");
 
 	return 0;
@@ -404,6 +422,7 @@ static int twl6030_usb_remove(struct platform_device *pdev)
 {
 	struct twl6030_usb *twl = platform_get_drvdata(pdev);
 
+	cancel_delayed_work(&twl->get_status_work);
 	twl6030_interrupt_mask(TWL6030_USBOTG_INT_MASK,
 		REG_INT_MSK_LINE_C);
 	twl6030_interrupt_mask(TWL6030_USBOTG_INT_MASK,
diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c
index 2eddbe538cda..5608af4a369d 100644
--- a/drivers/usb/serial/mos7720.c
+++ b/drivers/usb/serial/mos7720.c
@@ -2007,6 +2007,7 @@ static void mos7720_release(struct usb_serial *serial)
 				    urblist_entry)
 			usb_unlink_urb(urbtrack->urb);
 		spin_unlock_irqrestore(&mos_parport->listlock, flags);
+		parport_del_port(mos_parport->pp);
 
 		kref_put(&mos_parport->ref_count, destroy_mos_parport);
 	}
diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
index 4d49fce406e1..5ef014ba6ae8 100644
--- a/drivers/usb/storage/uas.c
+++ b/drivers/usb/storage/uas.c
@@ -836,6 +836,7 @@ static int uas_slave_configure(struct scsi_device *sdev)
 	if (devinfo->flags & US_FL_BROKEN_FUA)
 		sdev->broken_fua = 1;
 
+	scsi_change_queue_depth(sdev, devinfo->qdepth - 2);
 	return 0;
 }
 
@@ -848,7 +849,6 @@ static struct scsi_host_template uas_host_template = {
 	.slave_configure = uas_slave_configure,
 	.eh_abort_handler = uas_eh_abort_handler,
 	.eh_bus_reset_handler = uas_eh_bus_reset_handler,
-	.can_queue = MAX_CMNDS,
 	.this_id = -1,
 	.sg_tablesize = SG_NONE,
 	.skip_settle_delay = 1,
diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c
index fca51105974e..2e0450bec1b1 100644
--- a/drivers/usb/usbip/vhci_hcd.c
+++ b/drivers/usb/usbip/vhci_hcd.c
@@ -941,7 +941,7 @@ static void vhci_stop(struct usb_hcd *hcd)
 
 static int vhci_get_frame_number(struct usb_hcd *hcd)
 {
-	pr_err("Not yet implemented\n");
+	dev_err_ratelimited(&hcd->self.root_hub->dev, "Not yet implemented\n");
 	return 0;
 }
 
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 1d3e45f84549..e032ca397371 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -481,10 +481,14 @@ out:
 
 static int peek_head_len(struct sock *sk)
 {
+	struct socket *sock = sk->sk_socket;
 	struct sk_buff *head;
 	int len = 0;
 	unsigned long flags;
 
+	if (sock->ops->peek_len)
+		return sock->ops->peek_len(sock);
+
 	spin_lock_irqsave(&sk->sk_receive_queue.lock, flags);
 	head = skb_peek(&sk->sk_receive_queue);
 	if (likely(head)) {
@@ -497,6 +501,16 @@ static int peek_head_len(struct sock *sk)
 	return len;
 }
 
+static int sk_has_rx_data(struct sock *sk)
+{
+	struct socket *sock = sk->sk_socket;
+
+	if (sock->ops->peek_len)
+		return sock->ops->peek_len(sock);
+
+	return skb_queue_empty(&sk->sk_receive_queue);
+}
+
 static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk)
 {
 	struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
@@ -513,7 +527,7 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk)
 		endtime = busy_clock() + vq->busyloop_timeout;
 
 		while (vhost_can_busy_poll(&net->dev, endtime) &&
-		       skb_queue_empty(&sk->sk_receive_queue) &&
+		       !sk_has_rx_data(sk) &&
 		       vhost_vq_avail_empty(&net->dev, vq))
 			cpu_relax_lowlatency();
 
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index b54f26c55dfd..b4b3e256491b 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -746,7 +746,7 @@ config ALIM7101_WDT
 
 config EBC_C384_WDT
 	tristate "WinSystems EBC-C384 Watchdog Timer"
-	depends on X86 && ISA
+	depends on X86 && ISA_BUS_API
 	select WATCHDOG_CORE
 	help
 	  Enables watchdog timer support for the watchdog timer on the
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index d46839f51e73..e4db19e88ab1 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -151,8 +151,6 @@ static DECLARE_WAIT_QUEUE_HEAD(balloon_wq);
 static void balloon_process(struct work_struct *work);
 static DECLARE_DELAYED_WORK(balloon_worker, balloon_process);
 
-static void release_memory_resource(struct resource *resource);
-
 /* When ballooning out (allocating memory to return to Xen) we don't really
    want the kernel to try too hard since that can trigger the oom killer. */
 #define GFP_BALLOON \
@@ -248,6 +246,19 @@ static enum bp_state update_schedule(enum bp_state state)
 }
 
 #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+static void release_memory_resource(struct resource *resource)
+{
+	if (!resource)
+		return;
+
+	/*
+	 * No need to reset region to identity mapped since we now
+	 * know that no I/O can be in this region
+	 */
+	release_resource(resource);
+	kfree(resource);
+}
+
 static struct resource *additional_memory_resource(phys_addr_t size)
 {
 	struct resource *res;
@@ -286,19 +297,6 @@ static struct resource *additional_memory_resource(phys_addr_t size)
 	return res;
 }
 
-static void release_memory_resource(struct resource *resource)
-{
-	if (!resource)
-		return;
-
-	/*
-	 * No need to reset region to identity mapped since we now
-	 * know that no I/O can be in this region
-	 */
-	release_resource(resource);
-	kfree(resource);
-}
-
 static enum bp_state reserve_additional_memory(void)
 {
 	long credit;
diff --git a/drivers/xen/xen-pciback/conf_space.c b/drivers/xen/xen-pciback/conf_space.c
index 8e67336f8ddd..6a25533da237 100644
--- a/drivers/xen/xen-pciback/conf_space.c
+++ b/drivers/xen/xen-pciback/conf_space.c
@@ -183,8 +183,7 @@ int xen_pcibk_config_read(struct pci_dev *dev, int offset, int size,
 		field_start = OFFSET(cfg_entry);
 		field_end = OFFSET(cfg_entry) + field->size;
 
-		if ((req_start >= field_start && req_start < field_end)
-		    || (req_end > field_start && req_end <= field_end)) {
+		 if (req_end > field_start && field_end > req_start) {
 			err = conf_space_read(dev, cfg_entry, field_start,
 					      &tmp_val);
 			if (err)
@@ -230,8 +229,7 @@ int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size, u32 value)
 		field_start = OFFSET(cfg_entry);
 		field_end = OFFSET(cfg_entry) + field->size;
 
-		if ((req_start >= field_start && req_start < field_end)
-		    || (req_end > field_start && req_end <= field_end)) {
+		 if (req_end > field_start && field_end > req_start) {
 			tmp_val = 0;
 
 			err = xen_pcibk_config_read(dev, field_start,
diff --git a/drivers/xen/xen-pciback/conf_space_header.c b/drivers/xen/xen-pciback/conf_space_header.c
index ad3d17d29c81..9ead1c2ff1dd 100644
--- a/drivers/xen/xen-pciback/conf_space_header.c
+++ b/drivers/xen/xen-pciback/conf_space_header.c
@@ -145,7 +145,7 @@ static int rom_write(struct pci_dev *dev, int offset, u32 value, void *data)
 	/* A write to obtain the length must happen as a 32-bit write.
 	 * This does not (yet) support writing individual bytes
 	 */
-	if (value == ~PCI_ROM_ADDRESS_ENABLE)
+	if ((value | ~PCI_ROM_ADDRESS_MASK) == ~0U)
 		bar->which = 1;
 	else {
 		u32 tmpval;
@@ -225,38 +225,42 @@ static inline void read_dev_bar(struct pci_dev *dev,
 			   (PCI_BASE_ADDRESS_SPACE_MEMORY |
 				PCI_BASE_ADDRESS_MEM_TYPE_64))) {
 			bar_info->val = res[pos - 1].start >> 32;
-			bar_info->len_val = res[pos - 1].end >> 32;
+			bar_info->len_val = -resource_size(&res[pos - 1]) >> 32;
 			return;
 		}
 	}
 
+	if (!res[pos].flags ||
+	    (res[pos].flags & (IORESOURCE_DISABLED | IORESOURCE_UNSET |
+			       IORESOURCE_BUSY)))
+		return;
+
 	bar_info->val = res[pos].start |
 			(res[pos].flags & PCI_REGION_FLAG_MASK);
-	bar_info->len_val = resource_size(&res[pos]);
+	bar_info->len_val = -resource_size(&res[pos]) |
+			    (res[pos].flags & PCI_REGION_FLAG_MASK);
 }
 
 static void *bar_init(struct pci_dev *dev, int offset)
 {
-	struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL);
+	struct pci_bar_info *bar = kzalloc(sizeof(*bar), GFP_KERNEL);
 
 	if (!bar)
 		return ERR_PTR(-ENOMEM);
 
 	read_dev_bar(dev, bar, offset, ~0);
-	bar->which = 0;
 
 	return bar;
 }
 
 static void *rom_init(struct pci_dev *dev, int offset)
 {
-	struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL);
+	struct pci_bar_info *bar = kzalloc(sizeof(*bar), GFP_KERNEL);
 
 	if (!bar)
 		return ERR_PTR(-ENOMEM);
 
 	read_dev_bar(dev, bar, offset, ~PCI_ROM_ADDRESS_ENABLE);
-	bar->which = 0;
 
 	return bar;
 }
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index b84c291ba1eb..d7b78d531e63 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -74,7 +74,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
 					v9fs_proto_dotu(v9ses));
 	fid = file->private_data;
 	if (!fid) {
-		fid = v9fs_fid_clone(file->f_path.dentry);
+		fid = v9fs_fid_clone(file_dentry(file));
 		if (IS_ERR(fid))
 			return PTR_ERR(fid);
 
@@ -100,7 +100,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
 		 * because we want write after unlink usecase
 		 * to work.
 		 */
-		fid = v9fs_writeback_fid(file->f_path.dentry);
+		fid = v9fs_writeback_fid(file_dentry(file));
 		if (IS_ERR(fid)) {
 			err = PTR_ERR(fid);
 			mutex_unlock(&v9inode->v_mutex);
@@ -516,7 +516,7 @@ v9fs_mmap_file_mmap(struct file *filp, struct vm_area_struct *vma)
 		 * because we want write after unlink usecase
 		 * to work.
 		 */
-		fid = v9fs_writeback_fid(filp->f_path.dentry);
+		fid = v9fs_writeback_fid(file_dentry(filp));
 		if (IS_ERR(fid)) {
 			retval = PTR_ERR(fid);
 			mutex_unlock(&v9inode->v_mutex);
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index f0d268b97d19..a439548de785 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -70,9 +70,13 @@ struct autofs_info {
 };
 
 #define AUTOFS_INF_EXPIRING	(1<<0) /* dentry in the process of expiring */
-#define AUTOFS_INF_NO_RCU	(1<<1) /* the dentry is being considered
+#define AUTOFS_INF_WANT_EXPIRE	(1<<1) /* the dentry is being considered
 					* for expiry, so RCU_walk is
-					* not permitted
+					* not permitted.  If it progresses to
+					* actual expiry attempt, the flag is
+					* not cleared when EXPIRING is set -
+					* in that case it gets cleared only
+					* when it comes to clearing EXPIRING.
 					*/
 #define AUTOFS_INF_PENDING	(1<<2) /* dentry pending mount */
 
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 9510d8d2e9cd..b493909e7492 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -316,19 +316,17 @@ struct dentry *autofs4_expire_direct(struct super_block *sb,
 	if (ino->flags & AUTOFS_INF_PENDING)
 		goto out;
 	if (!autofs4_direct_busy(mnt, root, timeout, do_now)) {
-		ino->flags |= AUTOFS_INF_NO_RCU;
+		ino->flags |= AUTOFS_INF_WANT_EXPIRE;
 		spin_unlock(&sbi->fs_lock);
 		synchronize_rcu();
 		spin_lock(&sbi->fs_lock);
 		if (!autofs4_direct_busy(mnt, root, timeout, do_now)) {
 			ino->flags |= AUTOFS_INF_EXPIRING;
-			smp_mb();
-			ino->flags &= ~AUTOFS_INF_NO_RCU;
 			init_completion(&ino->expire_complete);
 			spin_unlock(&sbi->fs_lock);
 			return root;
 		}
-		ino->flags &= ~AUTOFS_INF_NO_RCU;
+		ino->flags &= ~AUTOFS_INF_WANT_EXPIRE;
 	}
 out:
 	spin_unlock(&sbi->fs_lock);
@@ -446,7 +444,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
 	while ((dentry = get_next_positive_subdir(dentry, root))) {
 		spin_lock(&sbi->fs_lock);
 		ino = autofs4_dentry_ino(dentry);
-		if (ino->flags & AUTOFS_INF_NO_RCU)
+		if (ino->flags & AUTOFS_INF_WANT_EXPIRE)
 			expired = NULL;
 		else
 			expired = should_expire(dentry, mnt, timeout, how);
@@ -455,7 +453,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
 			continue;
 		}
 		ino = autofs4_dentry_ino(expired);
-		ino->flags |= AUTOFS_INF_NO_RCU;
+		ino->flags |= AUTOFS_INF_WANT_EXPIRE;
 		spin_unlock(&sbi->fs_lock);
 		synchronize_rcu();
 		spin_lock(&sbi->fs_lock);
@@ -465,7 +463,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
 			goto found;
 		}
 
-		ino->flags &= ~AUTOFS_INF_NO_RCU;
+		ino->flags &= ~AUTOFS_INF_WANT_EXPIRE;
 		if (expired != dentry)
 			dput(expired);
 		spin_unlock(&sbi->fs_lock);
@@ -475,17 +473,8 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
 found:
 	pr_debug("returning %p %pd\n", expired, expired);
 	ino->flags |= AUTOFS_INF_EXPIRING;
-	smp_mb();
-	ino->flags &= ~AUTOFS_INF_NO_RCU;
 	init_completion(&ino->expire_complete);
 	spin_unlock(&sbi->fs_lock);
-	spin_lock(&sbi->lookup_lock);
-	spin_lock(&expired->d_parent->d_lock);
-	spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED);
-	list_move(&expired->d_parent->d_subdirs, &expired->d_child);
-	spin_unlock(&expired->d_lock);
-	spin_unlock(&expired->d_parent->d_lock);
-	spin_unlock(&sbi->lookup_lock);
 	return expired;
 }
 
@@ -496,7 +485,7 @@ int autofs4_expire_wait(struct dentry *dentry, int rcu_walk)
 	int status;
 
 	/* Block on any pending expire */
-	if (!(ino->flags & (AUTOFS_INF_EXPIRING | AUTOFS_INF_NO_RCU)))
+	if (!(ino->flags & AUTOFS_INF_WANT_EXPIRE))
 		return 0;
 	if (rcu_walk)
 		return -ECHILD;
@@ -554,7 +543,7 @@ int autofs4_expire_run(struct super_block *sb,
 	ino = autofs4_dentry_ino(dentry);
 	/* avoid rapid-fire expire attempts if expiry fails */
 	ino->last_used = now;
-	ino->flags &= ~AUTOFS_INF_EXPIRING;
+	ino->flags &= ~(AUTOFS_INF_EXPIRING|AUTOFS_INF_WANT_EXPIRE);
 	complete_all(&ino->expire_complete);
 	spin_unlock(&sbi->fs_lock);
 
@@ -583,7 +572,7 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
 		spin_lock(&sbi->fs_lock);
 		/* avoid rapid-fire expire attempts if expiry fails */
 		ino->last_used = now;
-		ino->flags &= ~AUTOFS_INF_EXPIRING;
+		ino->flags &= ~(AUTOFS_INF_EXPIRING|AUTOFS_INF_WANT_EXPIRE);
 		complete_all(&ino->expire_complete);
 		spin_unlock(&sbi->fs_lock);
 		dput(dentry);
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 78bd80298528..3767f6641af1 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -458,7 +458,7 @@ static int autofs4_d_manage(struct dentry *dentry, bool rcu_walk)
 		 */
 		struct inode *inode;
 
-		if (ino->flags & (AUTOFS_INF_EXPIRING | AUTOFS_INF_NO_RCU))
+		if (ino->flags & AUTOFS_INF_WANT_EXPIRE)
 			return 0;
 		if (d_mountpoint(dentry))
 			return 0;
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 0146d911f468..631f1554c87b 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -66,11 +66,12 @@ static int autofs4_write(struct autofs_sb_info *sbi,
 	set_fs(KERNEL_DS);
 
 	mutex_lock(&sbi->pipe_mutex);
-	wr = __vfs_write(file, data, bytes, &file->f_pos);
-	while (bytes && wr) {
+	while (bytes) {
+		wr = __vfs_write(file, data, bytes, &file->f_pos);
+		if (wr <= 0)
+			break;
 		data += wr;
 		bytes -= wr;
-		wr = __vfs_write(file, data, bytes, &file->f_pos);
 	}
 	mutex_unlock(&sbi->pipe_mutex);
 
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index b677a6ea6001..7706c8dc5fa6 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -2645,7 +2645,7 @@ static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
 	 * This algorithm is recursive because the amount of used stack space
 	 * is very small and the max recursion depth is limited.
 	 */
-	indent_add = sprintf(buf, "%c-%llu(%s/%llu/%d)",
+	indent_add = sprintf(buf, "%c-%llu(%s/%llu/%u)",
 			     btrfsic_get_block_type(state, block),
 			     block->logical_bytenr, block->dev_state->name,
 			     block->dev_bytenr, block->mirror_num);
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 427c36b430a6..a85cf7d23309 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1373,7 +1373,8 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
 
 	if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
 		BUG_ON(tm->slot != 0);
-		eb_rewin = alloc_dummy_extent_buffer(fs_info, eb->start);
+		eb_rewin = alloc_dummy_extent_buffer(fs_info, eb->start,
+						eb->len);
 		if (!eb_rewin) {
 			btrfs_tree_read_unlock_blocking(eb);
 			free_extent_buffer(eb);
@@ -1454,7 +1455,8 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
 	} else if (old_root) {
 		btrfs_tree_read_unlock(eb_root);
 		free_extent_buffer(eb_root);
-		eb = alloc_dummy_extent_buffer(root->fs_info, logical);
+		eb = alloc_dummy_extent_buffer(root->fs_info, logical,
+					root->nodesize);
 	} else {
 		btrfs_set_lock_blocking_rw(eb_root, BTRFS_READ_LOCK);
 		eb = btrfs_clone_extent_buffer(eb_root);
@@ -1552,6 +1554,7 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
 		       trans->transid, root->fs_info->generation);
 
 	if (!should_cow_block(trans, root, buf)) {
+		trans->dirty = true;
 		*cow_ret = buf;
 		return 0;
 	}
@@ -1783,10 +1786,12 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
 			if (!err) {
 				tmp = (struct btrfs_disk_key *)(kaddr + offset -
 							map_start);
-			} else {
+			} else if (err == 1) {
 				read_extent_buffer(eb, &unaligned,
 						   offset, sizeof(unaligned));
 				tmp = &unaligned;
+			} else {
+				return err;
 			}
 
 		} else {
@@ -2510,6 +2515,8 @@ read_block_for_search(struct btrfs_trans_handle *trans,
 		if (!btrfs_buffer_uptodate(tmp, 0, 0))
 			ret = -EIO;
 		free_extent_buffer(tmp);
+	} else {
+		ret = PTR_ERR(tmp);
 	}
 	return ret;
 }
@@ -2773,8 +2780,10 @@ again:
 			 * then we don't want to set the path blocking,
 			 * so we test it here
 			 */
-			if (!should_cow_block(trans, root, b))
+			if (!should_cow_block(trans, root, b)) {
+				trans->dirty = true;
 				goto cow_done;
+			}
 
 			/*
 			 * must have write locks on this node and the
@@ -2823,6 +2832,8 @@ cow_done:
 		}
 
 		ret = key_search(b, key, level, &prev_cmp, &slot);
+		if (ret < 0)
+			goto done;
 
 		if (level != 0) {
 			int dec = 0;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 101c3cfd3f7c..4274a7bfdaed 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2518,7 +2518,7 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
 			   struct btrfs_root *root, unsigned long count);
 int btrfs_async_run_delayed_refs(struct btrfs_root *root,
-				 unsigned long count, int wait);
+				 unsigned long count, u64 transid, int wait);
 int btrfs_lookup_data_extent(struct btrfs_root *root, u64 start, u64 len);
 int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
 			     struct btrfs_root *root, u64 bytenr,
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 61561c2a3f96..d3aaabbfada0 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1606,15 +1606,23 @@ int btrfs_inode_delayed_dir_index_count(struct inode *inode)
 	return 0;
 }
 
-void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
-			     struct list_head *del_list)
+bool btrfs_readdir_get_delayed_items(struct inode *inode,
+				     struct list_head *ins_list,
+				     struct list_head *del_list)
 {
 	struct btrfs_delayed_node *delayed_node;
 	struct btrfs_delayed_item *item;
 
 	delayed_node = btrfs_get_delayed_node(inode);
 	if (!delayed_node)
-		return;
+		return false;
+
+	/*
+	 * We can only do one readdir with delayed items at a time because of
+	 * item->readdir_list.
+	 */
+	inode_unlock_shared(inode);
+	inode_lock(inode);
 
 	mutex_lock(&delayed_node->mutex);
 	item = __btrfs_first_delayed_insertion_item(delayed_node);
@@ -1641,10 +1649,13 @@ void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
 	 * requeue or dequeue this delayed node.
 	 */
 	atomic_dec(&delayed_node->refs);
+
+	return true;
 }
 
-void btrfs_put_delayed_items(struct list_head *ins_list,
-			     struct list_head *del_list)
+void btrfs_readdir_put_delayed_items(struct inode *inode,
+				     struct list_head *ins_list,
+				     struct list_head *del_list)
 {
 	struct btrfs_delayed_item *curr, *next;
 
@@ -1659,6 +1670,12 @@ void btrfs_put_delayed_items(struct list_head *ins_list,
 		if (atomic_dec_and_test(&curr->refs))
 			kfree(curr);
 	}
+
+	/*
+	 * The VFS is going to do up_read(), so we need to downgrade back to a
+	 * read lock.
+	 */
+	downgrade_write(&inode->i_rwsem);
 }
 
 int btrfs_should_delete_dir_index(struct list_head *del_list,
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index 0167853c84ae..2495b3d4075f 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -137,10 +137,12 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root);
 void btrfs_destroy_delayed_inodes(struct btrfs_root *root);
 
 /* Used for readdir() */
-void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
-			     struct list_head *del_list);
-void btrfs_put_delayed_items(struct list_head *ins_list,
-			     struct list_head *del_list);
+bool btrfs_readdir_get_delayed_items(struct inode *inode,
+				     struct list_head *ins_list,
+				     struct list_head *del_list);
+void btrfs_readdir_put_delayed_items(struct inode *inode,
+				     struct list_head *ins_list,
+				     struct list_head *del_list);
 int btrfs_should_delete_dir_index(struct list_head *del_list,
 				  u64 index);
 int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 6628fca9f4ed..60ce1190307b 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1098,7 +1098,7 @@ void readahead_tree_block(struct btrfs_root *root, u64 bytenr)
 	struct inode *btree_inode = root->fs_info->btree_inode;
 
 	buf = btrfs_find_create_tree_block(root, bytenr);
-	if (!buf)
+	if (IS_ERR(buf))
 		return;
 	read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
 				 buf, 0, WAIT_NONE, btree_get_extent, 0);
@@ -1114,7 +1114,7 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr,
 	int ret;
 
 	buf = btrfs_find_create_tree_block(root, bytenr);
-	if (!buf)
+	if (IS_ERR(buf))
 		return 0;
 
 	set_bit(EXTENT_BUFFER_READAHEAD, &buf->bflags);
@@ -1147,7 +1147,8 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
 						 u64 bytenr)
 {
 	if (btrfs_test_is_dummy_root(root))
-		return alloc_test_extent_buffer(root->fs_info, bytenr);
+		return alloc_test_extent_buffer(root->fs_info, bytenr,
+				root->nodesize);
 	return alloc_extent_buffer(root->fs_info, bytenr);
 }
 
@@ -1171,8 +1172,8 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
 	int ret;
 
 	buf = btrfs_find_create_tree_block(root, bytenr);
-	if (!buf)
-		return ERR_PTR(-ENOMEM);
+	if (IS_ERR(buf))
+		return buf;
 
 	ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
 	if (ret) {
@@ -1314,14 +1315,16 @@ static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info,
 
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
 /* Should only be used by the testing infrastructure */
-struct btrfs_root *btrfs_alloc_dummy_root(void)
+struct btrfs_root *btrfs_alloc_dummy_root(u32 sectorsize, u32 nodesize)
 {
 	struct btrfs_root *root;
 
 	root = btrfs_alloc_root(NULL, GFP_KERNEL);
 	if (!root)
 		return ERR_PTR(-ENOMEM);
-	__setup_root(4096, 4096, 4096, root, NULL, 1);
+	/* We don't use the stripesize in selftest, set it as sectorsize */
+	__setup_root(nodesize, sectorsize, sectorsize, root, NULL,
+			BTRFS_ROOT_TREE_OBJECTID);
 	set_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state);
 	root->alloc_bytenr = 0;
 
@@ -1803,6 +1806,13 @@ static int cleaner_kthread(void *arg)
 		if (btrfs_need_cleaner_sleep(root))
 			goto sleep;
 
+		/*
+		 * Do not do anything if we might cause open_ctree() to block
+		 * before we have finished mounting the filesystem.
+		 */
+		if (!root->fs_info->open)
+			goto sleep;
+
 		if (!mutex_trylock(&root->fs_info->cleaner_mutex))
 			goto sleep;
 
@@ -2517,7 +2527,6 @@ int open_ctree(struct super_block *sb,
 	int num_backups_tried = 0;
 	int backup_index = 0;
 	int max_active;
-	bool cleaner_mutex_locked = false;
 
 	tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
 	chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
@@ -2797,7 +2806,7 @@ int open_ctree(struct super_block *sb,
 
 	nodesize = btrfs_super_nodesize(disk_super);
 	sectorsize = btrfs_super_sectorsize(disk_super);
-	stripesize = btrfs_super_stripesize(disk_super);
+	stripesize = sectorsize;
 	fs_info->dirty_metadata_batch = nodesize * (1 + ilog2(nr_cpu_ids));
 	fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids));
 
@@ -2996,13 +3005,6 @@ retry_root_backup:
 		goto fail_sysfs;
 	}
 
-	/*
-	 * Hold the cleaner_mutex thread here so that we don't block
-	 * for a long time on btrfs_recover_relocation.  cleaner_kthread
-	 * will wait for us to finish mounting the filesystem.
-	 */
-	mutex_lock(&fs_info->cleaner_mutex);
-	cleaner_mutex_locked = true;
 	fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
 					       "btrfs-cleaner");
 	if (IS_ERR(fs_info->cleaner_kthread))
@@ -3062,8 +3064,10 @@ retry_root_backup:
 		ret = btrfs_cleanup_fs_roots(fs_info);
 		if (ret)
 			goto fail_qgroup;
-		/* We locked cleaner_mutex before creating cleaner_kthread. */
+
+		mutex_lock(&fs_info->cleaner_mutex);
 		ret = btrfs_recover_relocation(tree_root);
+		mutex_unlock(&fs_info->cleaner_mutex);
 		if (ret < 0) {
 			btrfs_warn(fs_info, "failed to recover relocation: %d",
 					ret);
@@ -3071,8 +3075,6 @@ retry_root_backup:
 			goto fail_qgroup;
 		}
 	}
-	mutex_unlock(&fs_info->cleaner_mutex);
-	cleaner_mutex_locked = false;
 
 	location.objectid = BTRFS_FS_TREE_OBJECTID;
 	location.type = BTRFS_ROOT_ITEM_KEY;
@@ -3186,10 +3188,6 @@ fail_cleaner:
 	filemap_write_and_wait(fs_info->btree_inode->i_mapping);
 
 fail_sysfs:
-	if (cleaner_mutex_locked) {
-		mutex_unlock(&fs_info->cleaner_mutex);
-		cleaner_mutex_locked = false;
-	}
 	btrfs_sysfs_remove_mounted(fs_info);
 
 fail_fsdev_sysfs:
@@ -4130,6 +4128,16 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
 	 * Hint to catch really bogus numbers, bitflips or so, more exact checks are
 	 * done later
 	 */
+	if (btrfs_super_bytes_used(sb) < 6 * btrfs_super_nodesize(sb)) {
+		btrfs_err(fs_info, "bytes_used is too small %llu",
+		       btrfs_super_bytes_used(sb));
+		ret = -EINVAL;
+	}
+	if (!is_power_of_2(btrfs_super_stripesize(sb))) {
+		btrfs_err(fs_info, "invalid stripesize %u",
+		       btrfs_super_stripesize(sb));
+		ret = -EINVAL;
+	}
 	if (btrfs_super_num_devices(sb) > (1UL << 31))
 		printk(KERN_WARNING "BTRFS: suspicious number of devices: %llu\n",
 				btrfs_super_num_devices(sb));
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 8e79d0070bcf..acba821499a9 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -90,7 +90,7 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
 void btrfs_free_fs_root(struct btrfs_root *root);
 
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
-struct btrfs_root *btrfs_alloc_dummy_root(void);
+struct btrfs_root *btrfs_alloc_dummy_root(u32 sectorsize, u32 nodesize);
 #endif
 
 /*
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 689d25ac6a68..82b912a293ab 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2835,6 +2835,7 @@ int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
 
 struct async_delayed_refs {
 	struct btrfs_root *root;
+	u64 transid;
 	int count;
 	int error;
 	int sync;
@@ -2850,6 +2851,10 @@ static void delayed_ref_async_start(struct btrfs_work *work)
 
 	async = container_of(work, struct async_delayed_refs, work);
 
+	/* if the commit is already started, we don't need to wait here */
+	if (btrfs_transaction_blocked(async->root->fs_info))
+		goto done;
+
 	trans = btrfs_join_transaction(async->root);
 	if (IS_ERR(trans)) {
 		async->error = PTR_ERR(trans);
@@ -2861,10 +2866,15 @@ static void delayed_ref_async_start(struct btrfs_work *work)
 	 * wait on delayed refs
 	 */
 	trans->sync = true;
+
+	/* Don't bother flushing if we got into a different transaction */
+	if (trans->transid > async->transid)
+		goto end;
+
 	ret = btrfs_run_delayed_refs(trans, async->root, async->count);
 	if (ret)
 		async->error = ret;
-
+end:
 	ret = btrfs_end_transaction(trans, async->root);
 	if (ret && !async->error)
 		async->error = ret;
@@ -2876,7 +2886,7 @@ done:
 }
 
 int btrfs_async_run_delayed_refs(struct btrfs_root *root,
-				 unsigned long count, int wait)
+				 unsigned long count, u64 transid, int wait)
 {
 	struct async_delayed_refs *async;
 	int ret;
@@ -2888,6 +2898,7 @@ int btrfs_async_run_delayed_refs(struct btrfs_root *root,
 	async->root = root->fs_info->tree_root;
 	async->count = count;
 	async->error = 0;
+	async->transid = transid;
 	if (wait)
 		async->sync = 1;
 	else
@@ -8016,8 +8027,9 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 	struct extent_buffer *buf;
 
 	buf = btrfs_find_create_tree_block(root, bytenr);
-	if (!buf)
-		return ERR_PTR(-ENOMEM);
+	if (IS_ERR(buf))
+		return buf;
+
 	btrfs_set_header_generation(buf, trans->transid);
 	btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
 	btrfs_tree_lock(buf);
@@ -8044,7 +8056,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 		set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
 			 buf->start + buf->len - 1, GFP_NOFS);
 	}
-	trans->blocks_used++;
+	trans->dirty = true;
 	/* this returns a buffer locked for blocking */
 	return buf;
 }
@@ -8659,8 +8671,9 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
 	next = btrfs_find_tree_block(root->fs_info, bytenr);
 	if (!next) {
 		next = btrfs_find_create_tree_block(root, bytenr);
-		if (!next)
-			return -ENOMEM;
+		if (IS_ERR(next))
+			return PTR_ERR(next);
+
 		btrfs_set_buffer_lockdep_class(root->root_key.objectid, next,
 					       level - 1);
 		reada = 1;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 6e953de83f08..75533adef998 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4728,16 +4728,16 @@ err:
 }
 
 struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
-						u64 start)
+						u64 start, u32 nodesize)
 {
 	unsigned long len;
 
 	if (!fs_info) {
 		/*
 		 * Called only from tests that don't always have a fs_info
-		 * available, but we know that nodesize is 4096
+		 * available
 		 */
-		len = 4096;
+		len = nodesize;
 	} else {
 		len = fs_info->tree_root->nodesize;
 	}
@@ -4833,7 +4833,7 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
 
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
 struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
-					       u64 start)
+					u64 start, u32 nodesize)
 {
 	struct extent_buffer *eb, *exists = NULL;
 	int ret;
@@ -4841,7 +4841,7 @@ struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
 	eb = find_extent_buffer(fs_info, start);
 	if (eb)
 		return eb;
-	eb = alloc_dummy_extent_buffer(fs_info, start);
+	eb = alloc_dummy_extent_buffer(fs_info, start, nodesize);
 	if (!eb)
 		return NULL;
 	eb->fs_info = fs_info;
@@ -4892,18 +4892,25 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
 	int uptodate = 1;
 	int ret;
 
+	if (!IS_ALIGNED(start, fs_info->tree_root->sectorsize)) {
+		btrfs_err(fs_info, "bad tree block start %llu", start);
+		return ERR_PTR(-EINVAL);
+	}
+
 	eb = find_extent_buffer(fs_info, start);
 	if (eb)
 		return eb;
 
 	eb = __alloc_extent_buffer(fs_info, start, len);
 	if (!eb)
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 
 	for (i = 0; i < num_pages; i++, index++) {
 		p = find_or_create_page(mapping, index, GFP_NOFS|__GFP_NOFAIL);
-		if (!p)
+		if (!p) {
+			exists = ERR_PTR(-ENOMEM);
 			goto free_eb;
+		}
 
 		spin_lock(&mapping->private_lock);
 		if (PagePrivate(p)) {
@@ -4948,8 +4955,10 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
 		set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
 again:
 	ret = radix_tree_preload(GFP_NOFS);
-	if (ret)
+	if (ret) {
+		exists = ERR_PTR(ret);
 		goto free_eb;
+	}
 
 	spin_lock(&fs_info->buffer_lock);
 	ret = radix_tree_insert(&fs_info->buffer_radix,
@@ -5333,6 +5342,11 @@ int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv,
 	return ret;
 }
 
+/*
+ * return 0 if the item is found within a page.
+ * return 1 if the item spans two pages.
+ * return -EINVAL otherwise.
+ */
 int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
 			       unsigned long min_len, char **map,
 			       unsigned long *map_start,
@@ -5347,7 +5361,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
 		PAGE_SHIFT;
 
 	if (i != end_i)
-		return -EINVAL;
+		return 1;
 
 	if (i == 0) {
 		offset = start_offset;
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 1baf19c9b79d..c0c1c4fef6ce 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -348,7 +348,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
 struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
 						  u64 start, unsigned long len);
 struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
-						u64 start);
+						u64 start, u32 nodesize);
 struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src);
 struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
 					 u64 start);
@@ -468,5 +468,5 @@ noinline u64 find_lock_delalloc_range(struct inode *inode,
 				      u64 *end, u64 max_bytes);
 #endif
 struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
-					       u64 start);
+					       u64 start, u32 nodesize);
 #endif
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index e0c9bd3fb02d..2234e88cf674 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1534,30 +1534,30 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
 		reserve_bytes = round_up(write_bytes + sector_offset,
 				root->sectorsize);
 
-		if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
-					      BTRFS_INODE_PREALLOC)) &&
-		    check_can_nocow(inode, pos, &write_bytes) > 0) {
-			/*
-			 * For nodata cow case, no need to reserve
-			 * data space.
-			 */
-			only_release_metadata = true;
-			/*
-			 * our prealloc extent may be smaller than
-			 * write_bytes, so scale down.
-			 */
-			num_pages = DIV_ROUND_UP(write_bytes + offset,
-						 PAGE_SIZE);
-			reserve_bytes = round_up(write_bytes + sector_offset,
-					root->sectorsize);
-			goto reserve_metadata;
-		}
-
 		ret = btrfs_check_data_free_space(inode, pos, write_bytes);
-		if (ret < 0)
-			break;
+		if (ret < 0) {
+			if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
+						      BTRFS_INODE_PREALLOC)) &&
+			    check_can_nocow(inode, pos, &write_bytes) > 0) {
+				/*
+				 * For nodata cow case, no need to reserve
+				 * data space.
+				 */
+				only_release_metadata = true;
+				/*
+				 * our prealloc extent may be smaller than
+				 * write_bytes, so scale down.
+				 */
+				num_pages = DIV_ROUND_UP(write_bytes + offset,
+							 PAGE_SIZE);
+				reserve_bytes = round_up(write_bytes +
+							 sector_offset,
+							 root->sectorsize);
+			} else {
+				break;
+			}
+		}
 
-reserve_metadata:
 		ret = btrfs_delalloc_reserve_metadata(inode, reserve_bytes);
 		if (ret) {
 			if (!only_release_metadata)
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index c6dc1183f542..69d270f6602c 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -29,7 +29,7 @@
 #include "inode-map.h"
 #include "volumes.h"
 
-#define BITS_PER_BITMAP		(PAGE_SIZE * 8)
+#define BITS_PER_BITMAP		(PAGE_SIZE * 8UL)
 #define MAX_CACHE_BYTES_PER_GIG	SZ_32K
 
 struct btrfs_trim_range {
@@ -1415,11 +1415,11 @@ static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
 				   u64 offset)
 {
 	u64 bitmap_start;
-	u32 bytes_per_bitmap;
+	u64 bytes_per_bitmap;
 
 	bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit;
 	bitmap_start = offset - ctl->start;
-	bitmap_start = div_u64(bitmap_start, bytes_per_bitmap);
+	bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
 	bitmap_start *= bytes_per_bitmap;
 	bitmap_start += ctl->start;
 
@@ -1638,10 +1638,10 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
 	u64 bitmap_bytes;
 	u64 extent_bytes;
 	u64 size = block_group->key.offset;
-	u32 bytes_per_bg = BITS_PER_BITMAP * ctl->unit;
-	u32 max_bitmaps = div_u64(size + bytes_per_bg - 1, bytes_per_bg);
+	u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit;
+	u64 max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
 
-	max_bitmaps = max_t(u32, max_bitmaps, 1);
+	max_bitmaps = max_t(u64, max_bitmaps, 1);
 
 	ASSERT(ctl->total_bitmaps <= max_bitmaps);
 
@@ -1660,7 +1660,7 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
 	 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
 	 * we add more bitmaps.
 	 */
-	bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_SIZE;
+	bitmap_bytes = (ctl->total_bitmaps + 1) * ctl->unit;
 
 	if (bitmap_bytes >= max_bytes) {
 		ctl->extents_thresh = 0;
@@ -3662,7 +3662,7 @@ have_info:
 			if (tmp->offset + tmp->bytes < offset)
 				break;
 			if (offset + bytes < tmp->offset) {
-				n = rb_prev(&info->offset_index);
+				n = rb_prev(&tmp->offset_index);
 				continue;
 			}
 			info = tmp;
@@ -3676,7 +3676,7 @@ have_info:
 			if (offset + bytes < tmp->offset)
 				break;
 			if (tmp->offset + tmp->bytes < offset) {
-				n = rb_next(&info->offset_index);
+				n = rb_next(&tmp->offset_index);
 				continue;
 			}
 			info = tmp;
diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c
index aae520b2aee5..a97fdc156a03 100644
--- a/fs/btrfs/hash.c
+++ b/fs/btrfs/hash.c
@@ -24,6 +24,11 @@ int __init btrfs_hash_init(void)
 	return PTR_ERR_OR_ZERO(tfm);
 }
 
+const char* btrfs_crc32c_impl(void)
+{
+	return crypto_tfm_alg_driver_name(crypto_shash_tfm(tfm));
+}
+
 void btrfs_hash_exit(void)
 {
 	crypto_free_shash(tfm);
diff --git a/fs/btrfs/hash.h b/fs/btrfs/hash.h
index 118a2316e5d3..c3a2ec554361 100644
--- a/fs/btrfs/hash.h
+++ b/fs/btrfs/hash.h
@@ -22,6 +22,7 @@
 int __init btrfs_hash_init(void);
 
 void btrfs_hash_exit(void);
+const char* btrfs_crc32c_impl(void);
 
 u32 btrfs_crc32c(u32 crc, const void *address, unsigned int length);
 
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8b1212e8f7a8..4421954720b8 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3271,7 +3271,16 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
 	/* grab metadata reservation from transaction handle */
 	if (reserve) {
 		ret = btrfs_orphan_reserve_metadata(trans, inode);
-		BUG_ON(ret); /* -ENOSPC in reservation; Logic error? JDM */
+		ASSERT(!ret);
+		if (ret) {
+			atomic_dec(&root->orphan_inodes);
+			clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
+				  &BTRFS_I(inode)->runtime_flags);
+			if (insert)
+				clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
+					  &BTRFS_I(inode)->runtime_flags);
+			return ret;
+		}
 	}
 
 	/* insert an orphan item to track this unlinked/truncated file */
@@ -4549,6 +4558,7 @@ delete:
 			BUG_ON(ret);
 			if (btrfs_should_throttle_delayed_refs(trans, root))
 				btrfs_async_run_delayed_refs(root,
+							     trans->transid,
 					trans->delayed_ref_updates * 2, 0);
 			if (be_nice) {
 				if (truncate_space_check(trans, root,
@@ -5748,6 +5758,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
 	int name_len;
 	int is_curr = 0;	/* ctx->pos points to the current index? */
 	bool emitted;
+	bool put = false;
 
 	/* FIXME, use a real flag for deciding about the key type */
 	if (root->fs_info->tree_root == root)
@@ -5765,7 +5776,8 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
 	if (key_type == BTRFS_DIR_INDEX_KEY) {
 		INIT_LIST_HEAD(&ins_list);
 		INIT_LIST_HEAD(&del_list);
-		btrfs_get_delayed_items(inode, &ins_list, &del_list);
+		put = btrfs_readdir_get_delayed_items(inode, &ins_list,
+						      &del_list);
 	}
 
 	key.type = key_type;
@@ -5912,8 +5924,8 @@ next:
 nopos:
 	ret = 0;
 err:
-	if (key_type == BTRFS_DIR_INDEX_KEY)
-		btrfs_put_delayed_items(&ins_list, &del_list);
+	if (put)
+		btrfs_readdir_put_delayed_items(inode, &ins_list, &del_list);
 	btrfs_free_path(path);
 	return ret;
 }
@@ -10525,7 +10537,7 @@ static const struct inode_operations btrfs_dir_ro_inode_operations = {
 static const struct file_operations btrfs_dir_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_read_dir,
-	.iterate	= btrfs_real_readdir,
+	.iterate_shared	= btrfs_real_readdir,
 	.unlocked_ioctl	= btrfs_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= btrfs_compat_ioctl,
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index e96634a725c3..aca8264f4a49 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -968,6 +968,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
 	struct rb_node *prev = NULL;
 	struct btrfs_ordered_extent *test;
 	int ret = 1;
+	u64 orig_offset = offset;
 
 	spin_lock_irq(&tree->lock);
 	if (ordered) {
@@ -983,7 +984,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
 
 	/* truncate file */
 	if (disk_i_size > i_size) {
-		BTRFS_I(inode)->disk_i_size = i_size;
+		BTRFS_I(inode)->disk_i_size = orig_offset;
 		ret = 0;
 		goto out;
 	}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 4e59a91a11e0..60e7179ed4b7 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -235,7 +235,7 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
 	trans->aborted = errno;
 	/* Nothing used. The other threads that have joined this
 	 * transaction may be able to continue. */
-	if (!trans->blocks_used && list_empty(&trans->new_bgs)) {
+	if (!trans->dirty && list_empty(&trans->new_bgs)) {
 		const char *errstr;
 
 		errstr = btrfs_decode_error(errno);
@@ -1807,6 +1807,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
 			}
 		}
 		sb->s_flags &= ~MS_RDONLY;
+
+		fs_info->open = 1;
 	}
 out:
 	wake_up_process(fs_info->transaction_kthread);
@@ -2303,7 +2305,7 @@ static void btrfs_interface_exit(void)
 
 static void btrfs_print_mod_info(void)
 {
-	printk(KERN_INFO "Btrfs loaded"
+	printk(KERN_INFO "Btrfs loaded, crc32c=%s"
 #ifdef CONFIG_BTRFS_DEBUG
 			", debug=on"
 #endif
@@ -2313,33 +2315,48 @@ static void btrfs_print_mod_info(void)
 #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
 			", integrity-checker=on"
 #endif
-			"\n");
+			"\n",
+			btrfs_crc32c_impl());
 }
 
 static int btrfs_run_sanity_tests(void)
 {
-	int ret;
-
+	int ret, i;
+	u32 sectorsize, nodesize;
+	u32 test_sectorsize[] = {
+		PAGE_SIZE,
+	};
 	ret = btrfs_init_test_fs();
 	if (ret)
 		return ret;
-
-	ret = btrfs_test_free_space_cache();
-	if (ret)
-		goto out;
-	ret = btrfs_test_extent_buffer_operations();
-	if (ret)
-		goto out;
-	ret = btrfs_test_extent_io();
-	if (ret)
-		goto out;
-	ret = btrfs_test_inodes();
-	if (ret)
-		goto out;
-	ret = btrfs_test_qgroups();
-	if (ret)
-		goto out;
-	ret = btrfs_test_free_space_tree();
+	for (i = 0; i < ARRAY_SIZE(test_sectorsize); i++) {
+		sectorsize = test_sectorsize[i];
+		for (nodesize = sectorsize;
+		     nodesize <= BTRFS_MAX_METADATA_BLOCKSIZE;
+		     nodesize <<= 1) {
+			pr_info("BTRFS: selftest: sectorsize: %u  nodesize: %u\n",
+				sectorsize, nodesize);
+			ret = btrfs_test_free_space_cache(sectorsize, nodesize);
+			if (ret)
+				goto out;
+			ret = btrfs_test_extent_buffer_operations(sectorsize,
+				nodesize);
+			if (ret)
+				goto out;
+			ret = btrfs_test_extent_io(sectorsize, nodesize);
+			if (ret)
+				goto out;
+			ret = btrfs_test_inodes(sectorsize, nodesize);
+			if (ret)
+				goto out;
+			ret = btrfs_test_qgroups(sectorsize, nodesize);
+			if (ret)
+				goto out;
+			ret = btrfs_test_free_space_tree(sectorsize, nodesize);
+			if (ret)
+				goto out;
+		}
+	}
 out:
 	btrfs_destroy_test_fs();
 	return ret;
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index f54bf450bad3..02223f3f78f4 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -68,7 +68,7 @@ int btrfs_init_test_fs(void)
 	if (IS_ERR(test_mnt)) {
 		printk(KERN_ERR "btrfs: cannot mount test file system\n");
 		unregister_filesystem(&test_type);
-		return ret;
+		return PTR_ERR(test_mnt);
 	}
 	return 0;
 }
@@ -175,7 +175,7 @@ void btrfs_free_dummy_root(struct btrfs_root *root)
 }
 
 struct btrfs_block_group_cache *
-btrfs_alloc_dummy_block_group(unsigned long length)
+btrfs_alloc_dummy_block_group(unsigned long length, u32 sectorsize)
 {
 	struct btrfs_block_group_cache *cache;
 
@@ -192,8 +192,8 @@ btrfs_alloc_dummy_block_group(unsigned long length)
 	cache->key.objectid = 0;
 	cache->key.offset = length;
 	cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
-	cache->sectorsize = 4096;
-	cache->full_stripe_len = 4096;
+	cache->sectorsize = sectorsize;
+	cache->full_stripe_len = sectorsize;
 
 	INIT_LIST_HEAD(&cache->list);
 	INIT_LIST_HEAD(&cache->cluster_list);
diff --git a/fs/btrfs/tests/btrfs-tests.h b/fs/btrfs/tests/btrfs-tests.h
index 054b8c73c951..66fb6b701eb7 100644
--- a/fs/btrfs/tests/btrfs-tests.h
+++ b/fs/btrfs/tests/btrfs-tests.h
@@ -26,27 +26,28 @@
 struct btrfs_root;
 struct btrfs_trans_handle;
 
-int btrfs_test_free_space_cache(void);
-int btrfs_test_extent_buffer_operations(void);
-int btrfs_test_extent_io(void);
-int btrfs_test_inodes(void);
-int btrfs_test_qgroups(void);
-int btrfs_test_free_space_tree(void);
+int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize);
+int btrfs_test_extent_buffer_operations(u32 sectorsize, u32 nodesize);
+int btrfs_test_extent_io(u32 sectorsize, u32 nodesize);
+int btrfs_test_inodes(u32 sectorsize, u32 nodesize);
+int btrfs_test_qgroups(u32 sectorsize, u32 nodesize);
+int btrfs_test_free_space_tree(u32 sectorsize, u32 nodesize);
 int btrfs_init_test_fs(void);
 void btrfs_destroy_test_fs(void);
 struct inode *btrfs_new_test_inode(void);
 struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void);
 void btrfs_free_dummy_root(struct btrfs_root *root);
 struct btrfs_block_group_cache *
-btrfs_alloc_dummy_block_group(unsigned long length);
+btrfs_alloc_dummy_block_group(unsigned long length, u32 sectorsize);
 void btrfs_free_dummy_block_group(struct btrfs_block_group_cache *cache);
 void btrfs_init_dummy_trans(struct btrfs_trans_handle *trans);
 #else
-static inline int btrfs_test_free_space_cache(void)
+static inline int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize)
 {
 	return 0;
 }
-static inline int btrfs_test_extent_buffer_operations(void)
+static inline int btrfs_test_extent_buffer_operations(u32 sectorsize,
+	u32 nodesize)
 {
 	return 0;
 }
@@ -57,19 +58,19 @@ static inline int btrfs_init_test_fs(void)
 static inline void btrfs_destroy_test_fs(void)
 {
 }
-static inline int btrfs_test_extent_io(void)
+static inline int btrfs_test_extent_io(u32 sectorsize, u32 nodesize)
 {
 	return 0;
 }
-static inline int btrfs_test_inodes(void)
+static inline int btrfs_test_inodes(u32 sectorsize, u32 nodesize)
 {
 	return 0;
 }
-static inline int btrfs_test_qgroups(void)
+static inline int btrfs_test_qgroups(u32 sectorsize, u32 nodesize)
 {
 	return 0;
 }
-static inline int btrfs_test_free_space_tree(void)
+static inline int btrfs_test_free_space_tree(u32 sectorsize, u32 nodesize)
 {
 	return 0;
 }
diff --git a/fs/btrfs/tests/extent-buffer-tests.c b/fs/btrfs/tests/extent-buffer-tests.c
index f51963a8f929..4f8cbd1ec5ee 100644
--- a/fs/btrfs/tests/extent-buffer-tests.c
+++ b/fs/btrfs/tests/extent-buffer-tests.c
@@ -22,7 +22,7 @@
 #include "../extent_io.h"
 #include "../disk-io.h"
 
-static int test_btrfs_split_item(void)
+static int test_btrfs_split_item(u32 sectorsize, u32 nodesize)
 {
 	struct btrfs_path *path;
 	struct btrfs_root *root;
@@ -40,7 +40,7 @@ static int test_btrfs_split_item(void)
 
 	test_msg("Running btrfs_split_item tests\n");
 
-	root = btrfs_alloc_dummy_root();
+	root = btrfs_alloc_dummy_root(sectorsize, nodesize);
 	if (IS_ERR(root)) {
 		test_msg("Could not allocate root\n");
 		return PTR_ERR(root);
@@ -53,7 +53,8 @@ static int test_btrfs_split_item(void)
 		return -ENOMEM;
 	}
 
-	path->nodes[0] = eb = alloc_dummy_extent_buffer(NULL, 4096);
+	path->nodes[0] = eb = alloc_dummy_extent_buffer(NULL, nodesize,
+							nodesize);
 	if (!eb) {
 		test_msg("Could not allocate dummy buffer\n");
 		ret = -ENOMEM;
@@ -222,8 +223,8 @@ out:
 	return ret;
 }
 
-int btrfs_test_extent_buffer_operations(void)
+int btrfs_test_extent_buffer_operations(u32 sectorsize, u32 nodesize)
 {
-	test_msg("Running extent buffer operation tests");
-	return test_btrfs_split_item();
+	test_msg("Running extent buffer operation tests\n");
+	return test_btrfs_split_item(sectorsize, nodesize);
 }
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index 55724607f79b..d19ab0317283 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -21,6 +21,7 @@
 #include <linux/slab.h>
 #include <linux/sizes.h>
 #include "btrfs-tests.h"
+#include "../ctree.h"
 #include "../extent_io.h"
 
 #define PROCESS_UNLOCK		(1 << 0)
@@ -65,7 +66,7 @@ static noinline int process_page_range(struct inode *inode, u64 start, u64 end,
 	return count;
 }
 
-static int test_find_delalloc(void)
+static int test_find_delalloc(u32 sectorsize)
 {
 	struct inode *inode;
 	struct extent_io_tree tmp;
@@ -113,7 +114,7 @@ static int test_find_delalloc(void)
 	 * |--- delalloc ---|
 	 * |---  search  ---|
 	 */
-	set_extent_delalloc(&tmp, 0, 4095, NULL);
+	set_extent_delalloc(&tmp, 0, sectorsize - 1, NULL);
 	start = 0;
 	end = 0;
 	found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -122,9 +123,9 @@ static int test_find_delalloc(void)
 		test_msg("Should have found at least one delalloc\n");
 		goto out_bits;
 	}
-	if (start != 0 || end != 4095) {
-		test_msg("Expected start 0 end 4095, got start %Lu end %Lu\n",
-			 start, end);
+	if (start != 0 || end != (sectorsize - 1)) {
+		test_msg("Expected start 0 end %u, got start %llu end %llu\n",
+			sectorsize - 1, start, end);
 		goto out_bits;
 	}
 	unlock_extent(&tmp, start, end);
@@ -144,7 +145,7 @@ static int test_find_delalloc(void)
 		test_msg("Couldn't find the locked page\n");
 		goto out_bits;
 	}
-	set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL);
+	set_extent_delalloc(&tmp, sectorsize, max_bytes - 1, NULL);
 	start = test_start;
 	end = 0;
 	found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -172,7 +173,7 @@ static int test_find_delalloc(void)
 	 * |--- delalloc ---|
 	 *                    |--- search ---|
 	 */
-	test_start = max_bytes + 4096;
+	test_start = max_bytes + sectorsize;
 	locked_page = find_lock_page(inode->i_mapping, test_start >>
 				     PAGE_SHIFT);
 	if (!locked_page) {
@@ -272,6 +273,16 @@ out:
 	return ret;
 }
 
+/**
+ * test_bit_in_byte - Determine whether a bit is set in a byte
+ * @nr: bit number to test
+ * @addr: Address to start counting from
+ */
+static inline int test_bit_in_byte(int nr, const u8 *addr)
+{
+	return 1UL & (addr[nr / BITS_PER_BYTE] >> (nr & (BITS_PER_BYTE - 1)));
+}
+
 static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
 			     unsigned long len)
 {
@@ -298,25 +309,29 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
 		return -EINVAL;
 	}
 
-	bitmap_set(bitmap, (PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE,
-		   sizeof(long) * BITS_PER_BYTE);
-	extent_buffer_bitmap_set(eb, PAGE_SIZE - sizeof(long) / 2, 0,
-				 sizeof(long) * BITS_PER_BYTE);
-	if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
-		test_msg("Setting straddling pages failed\n");
-		return -EINVAL;
-	}
+	/* Straddling pages test */
+	if (len > PAGE_SIZE) {
+		bitmap_set(bitmap,
+			(PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE,
+			sizeof(long) * BITS_PER_BYTE);
+		extent_buffer_bitmap_set(eb, PAGE_SIZE - sizeof(long) / 2, 0,
+					sizeof(long) * BITS_PER_BYTE);
+		if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
+			test_msg("Setting straddling pages failed\n");
+			return -EINVAL;
+		}
 
-	bitmap_set(bitmap, 0, len * BITS_PER_BYTE);
-	bitmap_clear(bitmap,
-		     (PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE,
-		     sizeof(long) * BITS_PER_BYTE);
-	extent_buffer_bitmap_set(eb, 0, 0, len * BITS_PER_BYTE);
-	extent_buffer_bitmap_clear(eb, PAGE_SIZE - sizeof(long) / 2, 0,
-				   sizeof(long) * BITS_PER_BYTE);
-	if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
-		test_msg("Clearing straddling pages failed\n");
-		return -EINVAL;
+		bitmap_set(bitmap, 0, len * BITS_PER_BYTE);
+		bitmap_clear(bitmap,
+			(PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE,
+			sizeof(long) * BITS_PER_BYTE);
+		extent_buffer_bitmap_set(eb, 0, 0, len * BITS_PER_BYTE);
+		extent_buffer_bitmap_clear(eb, PAGE_SIZE - sizeof(long) / 2, 0,
+					sizeof(long) * BITS_PER_BYTE);
+		if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
+			test_msg("Clearing straddling pages failed\n");
+			return -EINVAL;
+		}
 	}
 
 	/*
@@ -333,7 +348,7 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
 	for (i = 0; i < len * BITS_PER_BYTE; i++) {
 		int bit, bit1;
 
-		bit = !!test_bit(i, bitmap);
+		bit = !!test_bit_in_byte(i, (u8 *)bitmap);
 		bit1 = !!extent_buffer_test_bit(eb, 0, i);
 		if (bit1 != bit) {
 			test_msg("Testing bit pattern failed\n");
@@ -351,15 +366,22 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
 	return 0;
 }
 
-static int test_eb_bitmaps(void)
+static int test_eb_bitmaps(u32 sectorsize, u32 nodesize)
 {
-	unsigned long len = PAGE_SIZE * 4;
+	unsigned long len;
 	unsigned long *bitmap;
 	struct extent_buffer *eb;
 	int ret;
 
 	test_msg("Running extent buffer bitmap tests\n");
 
+	/*
+	 * In ppc64, sectorsize can be 64K, thus 4 * 64K will be larger than
+	 * BTRFS_MAX_METADATA_BLOCKSIZE.
+	 */
+	len = (sectorsize < BTRFS_MAX_METADATA_BLOCKSIZE)
+		? sectorsize * 4 : sectorsize;
+
 	bitmap = kmalloc(len, GFP_KERNEL);
 	if (!bitmap) {
 		test_msg("Couldn't allocate test bitmap\n");
@@ -379,7 +401,7 @@ static int test_eb_bitmaps(void)
 
 	/* Do it over again with an extent buffer which isn't page-aligned. */
 	free_extent_buffer(eb);
-	eb = __alloc_dummy_extent_buffer(NULL, PAGE_SIZE / 2, len);
+	eb = __alloc_dummy_extent_buffer(NULL, nodesize / 2, len);
 	if (!eb) {
 		test_msg("Couldn't allocate test extent buffer\n");
 		kfree(bitmap);
@@ -393,17 +415,17 @@ out:
 	return ret;
 }
 
-int btrfs_test_extent_io(void)
+int btrfs_test_extent_io(u32 sectorsize, u32 nodesize)
 {
 	int ret;
 
 	test_msg("Running extent I/O tests\n");
 
-	ret = test_find_delalloc();
+	ret = test_find_delalloc(sectorsize);
 	if (ret)
 		goto out;
 
-	ret = test_eb_bitmaps();
+	ret = test_eb_bitmaps(sectorsize, nodesize);
 out:
 	test_msg("Extent I/O tests finished\n");
 	return ret;
diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c
index 0eeb8f3d6b67..3956bb2ff84c 100644
--- a/fs/btrfs/tests/free-space-tests.c
+++ b/fs/btrfs/tests/free-space-tests.c
@@ -22,7 +22,7 @@
 #include "../disk-io.h"
 #include "../free-space-cache.h"
 
-#define BITS_PER_BITMAP		(PAGE_SIZE * 8)
+#define BITS_PER_BITMAP		(PAGE_SIZE * 8UL)
 
 /*
  * This test just does basic sanity checking, making sure we can add an extent
@@ -99,7 +99,8 @@ static int test_extents(struct btrfs_block_group_cache *cache)
 	return 0;
 }
 
-static int test_bitmaps(struct btrfs_block_group_cache *cache)
+static int test_bitmaps(struct btrfs_block_group_cache *cache,
+			u32 sectorsize)
 {
 	u64 next_bitmap_offset;
 	int ret;
@@ -139,7 +140,7 @@ static int test_bitmaps(struct btrfs_block_group_cache *cache)
 	 * The first bitmap we have starts at offset 0 so the next one is just
 	 * at the end of the first bitmap.
 	 */
-	next_bitmap_offset = (u64)(BITS_PER_BITMAP * 4096);
+	next_bitmap_offset = (u64)(BITS_PER_BITMAP * sectorsize);
 
 	/* Test a bit straddling two bitmaps */
 	ret = test_add_free_space_entry(cache, next_bitmap_offset - SZ_2M,
@@ -167,9 +168,10 @@ static int test_bitmaps(struct btrfs_block_group_cache *cache)
 }
 
 /* This is the high grade jackassery */
-static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache)
+static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache,
+				    u32 sectorsize)
 {
-	u64 bitmap_offset = (u64)(BITS_PER_BITMAP * 4096);
+	u64 bitmap_offset = (u64)(BITS_PER_BITMAP * sectorsize);
 	int ret;
 
 	test_msg("Running bitmap and extent tests\n");
@@ -401,7 +403,8 @@ static int check_cache_empty(struct btrfs_block_group_cache *cache)
  * requests.
  */
 static int
-test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
+test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
+				       u32 sectorsize)
 {
 	int ret;
 	u64 offset;
@@ -539,7 +542,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
 	 * The goal is to test that the bitmap entry space stealing doesn't
 	 * steal this space region.
 	 */
-	ret = btrfs_add_free_space(cache, SZ_128M + SZ_16M, 4096);
+	ret = btrfs_add_free_space(cache, SZ_128M + SZ_16M, sectorsize);
 	if (ret) {
 		test_msg("Error adding free space: %d\n", ret);
 		return ret;
@@ -597,8 +600,8 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
 		return -ENOENT;
 	}
 
-	if (cache->free_space_ctl->free_space != (SZ_1M + 4096)) {
-		test_msg("Cache free space is not 1Mb + 4Kb\n");
+	if (cache->free_space_ctl->free_space != (SZ_1M + sectorsize)) {
+		test_msg("Cache free space is not 1Mb + %u\n", sectorsize);
 		return -EINVAL;
 	}
 
@@ -611,22 +614,25 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
 		return -EINVAL;
 	}
 
-	/* All that remains is a 4Kb free space region in a bitmap. Confirm. */
+	/*
+	 * All that remains is a sectorsize free space region in a bitmap.
+	 * Confirm.
+	 */
 	ret = check_num_extents_and_bitmaps(cache, 1, 1);
 	if (ret)
 		return ret;
 
-	if (cache->free_space_ctl->free_space != 4096) {
-		test_msg("Cache free space is not 4Kb\n");
+	if (cache->free_space_ctl->free_space != sectorsize) {
+		test_msg("Cache free space is not %u\n", sectorsize);
 		return -EINVAL;
 	}
 
 	offset = btrfs_find_space_for_alloc(cache,
-					    0, 4096, 0,
+					    0, sectorsize, 0,
 					    &max_extent_size);
 	if (offset != (SZ_128M + SZ_16M)) {
-		test_msg("Failed to allocate 4Kb from space cache, returned offset is: %llu\n",
-			 offset);
+		test_msg("Failed to allocate %u, returned offset : %llu\n",
+			 sectorsize, offset);
 		return -EINVAL;
 	}
 
@@ -733,7 +739,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
 	 * The goal is to test that the bitmap entry space stealing doesn't
 	 * steal this space region.
 	 */
-	ret = btrfs_add_free_space(cache, SZ_32M, 8192);
+	ret = btrfs_add_free_space(cache, SZ_32M, 2 * sectorsize);
 	if (ret) {
 		test_msg("Error adding free space: %d\n", ret);
 		return ret;
@@ -757,7 +763,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
 
 	/*
 	 * Confirm that our extent entry didn't stole all free space from the
-	 * bitmap, because of the small 8Kb free space region.
+	 * bitmap, because of the small 2 * sectorsize free space region.
 	 */
 	ret = check_num_extents_and_bitmaps(cache, 2, 1);
 	if (ret)
@@ -783,8 +789,8 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
 		return -ENOENT;
 	}
 
-	if (cache->free_space_ctl->free_space != (SZ_1M + 8192)) {
-		test_msg("Cache free space is not 1Mb + 8Kb\n");
+	if (cache->free_space_ctl->free_space != (SZ_1M + 2 * sectorsize)) {
+		test_msg("Cache free space is not 1Mb + %u\n", 2 * sectorsize);
 		return -EINVAL;
 	}
 
@@ -796,21 +802,25 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
 		return -EINVAL;
 	}
 
-	/* All that remains is a 8Kb free space region in a bitmap. Confirm. */
+	/*
+	 * All that remains is 2 * sectorsize free space region
+	 * in a bitmap. Confirm.
+	 */
 	ret = check_num_extents_and_bitmaps(cache, 1, 1);
 	if (ret)
 		return ret;
 
-	if (cache->free_space_ctl->free_space != 8192) {
-		test_msg("Cache free space is not 8Kb\n");
+	if (cache->free_space_ctl->free_space != 2 * sectorsize) {
+		test_msg("Cache free space is not %u\n", 2 * sectorsize);
 		return -EINVAL;
 	}
 
 	offset = btrfs_find_space_for_alloc(cache,
-					    0, 8192, 0,
+					    0, 2 * sectorsize, 0,
 					    &max_extent_size);
 	if (offset != SZ_32M) {
-		test_msg("Failed to allocate 8Kb from space cache, returned offset is: %llu\n",
+		test_msg("Failed to allocate %u, offset: %llu\n",
+			 2 * sectorsize,
 			 offset);
 		return -EINVAL;
 	}
@@ -825,7 +835,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
 	return 0;
 }
 
-int btrfs_test_free_space_cache(void)
+int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize)
 {
 	struct btrfs_block_group_cache *cache;
 	struct btrfs_root *root = NULL;
@@ -833,13 +843,19 @@ int btrfs_test_free_space_cache(void)
 
 	test_msg("Running btrfs free space cache tests\n");
 
-	cache = btrfs_alloc_dummy_block_group(1024 * 1024 * 1024);
+	/*
+	 * For ppc64 (with 64k page size), bytes per bitmap might be
+	 * larger than 1G.  To make bitmap test available in ppc64,
+	 * alloc dummy block group whose size cross bitmaps.
+	 */
+	cache = btrfs_alloc_dummy_block_group(BITS_PER_BITMAP * sectorsize
+					+ PAGE_SIZE, sectorsize);
 	if (!cache) {
 		test_msg("Couldn't run the tests\n");
 		return 0;
 	}
 
-	root = btrfs_alloc_dummy_root();
+	root = btrfs_alloc_dummy_root(sectorsize, nodesize);
 	if (IS_ERR(root)) {
 		ret = PTR_ERR(root);
 		goto out;
@@ -855,14 +871,14 @@ int btrfs_test_free_space_cache(void)
 	ret = test_extents(cache);
 	if (ret)
 		goto out;
-	ret = test_bitmaps(cache);
+	ret = test_bitmaps(cache, sectorsize);
 	if (ret)
 		goto out;
-	ret = test_bitmaps_and_extents(cache);
+	ret = test_bitmaps_and_extents(cache, sectorsize);
 	if (ret)
 		goto out;
 
-	ret = test_steal_space_from_bitmap_to_extent(cache);
+	ret = test_steal_space_from_bitmap_to_extent(cache, sectorsize);
 out:
 	btrfs_free_dummy_block_group(cache);
 	btrfs_free_dummy_root(root);
diff --git a/fs/btrfs/tests/free-space-tree-tests.c b/fs/btrfs/tests/free-space-tree-tests.c
index 7cea4462acd5..aac507085ab0 100644
--- a/fs/btrfs/tests/free-space-tree-tests.c
+++ b/fs/btrfs/tests/free-space-tree-tests.c
@@ -16,6 +16,7 @@
  * Boston, MA 021110-1307, USA.
  */
 
+#include <linux/types.h>
 #include "btrfs-tests.h"
 #include "../ctree.h"
 #include "../disk-io.h"
@@ -30,7 +31,7 @@ struct free_space_extent {
  * The test cases align their operations to this in order to hit some of the
  * edge cases in the bitmap code.
  */
-#define BITMAP_RANGE (BTRFS_FREE_SPACE_BITMAP_BITS * 4096)
+#define BITMAP_RANGE (BTRFS_FREE_SPACE_BITMAP_BITS * PAGE_SIZE)
 
 static int __check_free_space_extents(struct btrfs_trans_handle *trans,
 				      struct btrfs_fs_info *fs_info,
@@ -439,7 +440,8 @@ typedef int (*test_func_t)(struct btrfs_trans_handle *,
 			   struct btrfs_block_group_cache *,
 			   struct btrfs_path *);
 
-static int run_test(test_func_t test_func, int bitmaps)
+static int run_test(test_func_t test_func, int bitmaps,
+		u32 sectorsize, u32 nodesize)
 {
 	struct btrfs_root *root = NULL;
 	struct btrfs_block_group_cache *cache = NULL;
@@ -447,7 +449,7 @@ static int run_test(test_func_t test_func, int bitmaps)
 	struct btrfs_path *path = NULL;
 	int ret;
 
-	root = btrfs_alloc_dummy_root();
+	root = btrfs_alloc_dummy_root(sectorsize, nodesize);
 	if (IS_ERR(root)) {
 		test_msg("Couldn't allocate dummy root\n");
 		ret = PTR_ERR(root);
@@ -466,7 +468,8 @@ static int run_test(test_func_t test_func, int bitmaps)
 	root->fs_info->free_space_root = root;
 	root->fs_info->tree_root = root;
 
-	root->node = alloc_test_extent_buffer(root->fs_info, 4096);
+	root->node = alloc_test_extent_buffer(root->fs_info,
+		nodesize, nodesize);
 	if (!root->node) {
 		test_msg("Couldn't allocate dummy buffer\n");
 		ret = -ENOMEM;
@@ -474,9 +477,9 @@ static int run_test(test_func_t test_func, int bitmaps)
 	}
 	btrfs_set_header_level(root->node, 0);
 	btrfs_set_header_nritems(root->node, 0);
-	root->alloc_bytenr += 8192;
+	root->alloc_bytenr += 2 * nodesize;
 
-	cache = btrfs_alloc_dummy_block_group(8 * BITMAP_RANGE);
+	cache = btrfs_alloc_dummy_block_group(8 * BITMAP_RANGE, sectorsize);
 	if (!cache) {
 		test_msg("Couldn't allocate dummy block group cache\n");
 		ret = -ENOMEM;
@@ -534,17 +537,18 @@ out:
 	return ret;
 }
 
-static int run_test_both_formats(test_func_t test_func)
+static int run_test_both_formats(test_func_t test_func,
+	u32 sectorsize, u32 nodesize)
 {
 	int ret;
 
-	ret = run_test(test_func, 0);
+	ret = run_test(test_func, 0, sectorsize, nodesize);
 	if (ret)
 		return ret;
-	return run_test(test_func, 1);
+	return run_test(test_func, 1, sectorsize, nodesize);
 }
 
-int btrfs_test_free_space_tree(void)
+int btrfs_test_free_space_tree(u32 sectorsize, u32 nodesize)
 {
 	test_func_t tests[] = {
 		test_empty_block_group,
@@ -561,9 +565,11 @@ int btrfs_test_free_space_tree(void)
 
 	test_msg("Running free space tree tests\n");
 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
-		int ret = run_test_both_formats(tests[i]);
+		int ret = run_test_both_formats(tests[i], sectorsize,
+			nodesize);
 		if (ret) {
-			test_msg("%pf failed\n", tests[i]);
+			test_msg("%pf : sectorsize %u failed\n",
+				tests[i], sectorsize);
 			return ret;
 		}
 	}
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index 8a25fe8b7c45..29648c0a39f1 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -16,6 +16,7 @@
  * Boston, MA 021110-1307, USA.
  */
 
+#include <linux/types.h>
 #include "btrfs-tests.h"
 #include "../ctree.h"
 #include "../btrfs_inode.h"
@@ -86,19 +87,19 @@ static void insert_inode_item_key(struct btrfs_root *root)
  * diagram of how the extents will look though this may not be possible we still
  * want to make sure everything acts normally (the last number is not inclusive)
  *
- * [0 - 5][5 -  6][6 - 10][10 - 4096][  4096 - 8192 ][8192 - 12288]
- * [hole ][inline][ hole ][ regular ][regular1 split][    hole    ]
+ * [0 - 5][5 -  6][     6 - 4096     ][ 4096 - 4100][4100 - 8195][8195 - 12291]
+ * [hole ][inline][hole but no extent][  hole   ][   regular ][regular1 split]
  *
- * [ 12288 - 20480][20480 - 24576][  24576 - 28672  ][28672 - 36864][36864 - 45056]
- * [regular1 split][   prealloc1 ][prealloc1 written][   prealloc1 ][ compressed  ]
+ * [12291 - 16387][16387 - 24579][24579 - 28675][ 28675 - 32771][32771 - 36867 ]
+ * [    hole    ][regular1 split][   prealloc ][   prealloc1  ][prealloc1 written]
  *
- * [45056 - 49152][49152-53248][53248-61440][61440-65536][     65536+81920   ]
- * [ compressed1 ][  regular  ][compressed1][  regular  ][ hole but no extent]
+ * [36867 - 45059][45059 - 53251][53251 - 57347][57347 - 61443][61443- 69635]
+ * [  prealloc1  ][ compressed  ][ compressed1 ][    regular  ][ compressed1]
  *
- * [81920-86016]
- * [  regular  ]
+ * [69635-73731][   73731 - 86019   ][86019-90115]
+ * [  regular  ][ hole but no extent][  regular  ]
  */
-static void setup_file_extents(struct btrfs_root *root)
+static void setup_file_extents(struct btrfs_root *root, u32 sectorsize)
 {
 	int slot = 0;
 	u64 disk_bytenr = SZ_1M;
@@ -119,7 +120,7 @@ static void setup_file_extents(struct btrfs_root *root)
 	insert_extent(root, offset, 1, 1, 0, 0, 0, BTRFS_FILE_EXTENT_INLINE, 0,
 		      slot);
 	slot++;
-	offset = 4096;
+	offset = sectorsize;
 
 	/* Now another hole */
 	insert_extent(root, offset, 4, 4, 0, 0, 0, BTRFS_FILE_EXTENT_REG, 0,
@@ -128,99 +129,106 @@ static void setup_file_extents(struct btrfs_root *root)
 	offset += 4;
 
 	/* Now for a regular extent */
-	insert_extent(root, offset, 4095, 4095, 0, disk_bytenr, 4096,
-		      BTRFS_FILE_EXTENT_REG, 0, slot);
+	insert_extent(root, offset, sectorsize - 1, sectorsize - 1, 0,
+		      disk_bytenr, sectorsize, BTRFS_FILE_EXTENT_REG, 0, slot);
 	slot++;
-	disk_bytenr += 4096;
-	offset += 4095;
+	disk_bytenr += sectorsize;
+	offset += sectorsize - 1;
 
 	/*
 	 * Now for 3 extents that were split from a hole punch so we test
 	 * offsets properly.
 	 */
-	insert_extent(root, offset, 4096, 16384, 0, disk_bytenr, 16384,
-		      BTRFS_FILE_EXTENT_REG, 0, slot);
+	insert_extent(root, offset, sectorsize, 4 * sectorsize, 0, disk_bytenr,
+		      4 * sectorsize, BTRFS_FILE_EXTENT_REG, 0, slot);
 	slot++;
-	offset += 4096;
-	insert_extent(root, offset, 4096, 4096, 0, 0, 0, BTRFS_FILE_EXTENT_REG,
-		      0, slot);
+	offset += sectorsize;
+	insert_extent(root, offset, sectorsize, sectorsize, 0, 0, 0,
+		      BTRFS_FILE_EXTENT_REG, 0, slot);
 	slot++;
-	offset += 4096;
-	insert_extent(root, offset, 8192, 16384, 8192, disk_bytenr, 16384,
+	offset += sectorsize;
+	insert_extent(root, offset, 2 * sectorsize, 4 * sectorsize,
+		      2 * sectorsize, disk_bytenr, 4 * sectorsize,
 		      BTRFS_FILE_EXTENT_REG, 0, slot);
 	slot++;
-	offset += 8192;
-	disk_bytenr += 16384;
+	offset += 2 * sectorsize;
+	disk_bytenr += 4 * sectorsize;
 
 	/* Now for a unwritten prealloc extent */
-	insert_extent(root, offset, 4096, 4096, 0, disk_bytenr, 4096,
-		      BTRFS_FILE_EXTENT_PREALLOC, 0, slot);
+	insert_extent(root, offset, sectorsize, sectorsize, 0, disk_bytenr,
+		sectorsize, BTRFS_FILE_EXTENT_PREALLOC, 0, slot);
 	slot++;
-	offset += 4096;
+	offset += sectorsize;
 
 	/*
 	 * We want to jack up disk_bytenr a little more so the em stuff doesn't
 	 * merge our records.
 	 */
-	disk_bytenr += 8192;
+	disk_bytenr += 2 * sectorsize;
 
 	/*
 	 * Now for a partially written prealloc extent, basically the same as
 	 * the hole punch example above.  Ram_bytes never changes when you mark
 	 * extents written btw.
 	 */
-	insert_extent(root, offset, 4096, 16384, 0, disk_bytenr, 16384,
-		      BTRFS_FILE_EXTENT_PREALLOC, 0, slot);
+	insert_extent(root, offset, sectorsize, 4 * sectorsize, 0, disk_bytenr,
+		      4 * sectorsize, BTRFS_FILE_EXTENT_PREALLOC, 0, slot);
 	slot++;
-	offset += 4096;
-	insert_extent(root, offset, 4096, 16384, 4096, disk_bytenr, 16384,
-		      BTRFS_FILE_EXTENT_REG, 0, slot);
+	offset += sectorsize;
+	insert_extent(root, offset, sectorsize, 4 * sectorsize, sectorsize,
+		      disk_bytenr, 4 * sectorsize, BTRFS_FILE_EXTENT_REG, 0,
+		      slot);
 	slot++;
-	offset += 4096;
-	insert_extent(root, offset, 8192, 16384, 8192, disk_bytenr, 16384,
+	offset += sectorsize;
+	insert_extent(root, offset, 2 * sectorsize, 4 * sectorsize,
+		      2 * sectorsize, disk_bytenr, 4 * sectorsize,
 		      BTRFS_FILE_EXTENT_PREALLOC, 0, slot);
 	slot++;
-	offset += 8192;
-	disk_bytenr += 16384;
+	offset += 2 * sectorsize;
+	disk_bytenr += 4 * sectorsize;
 
 	/* Now a normal compressed extent */
-	insert_extent(root, offset, 8192, 8192, 0, disk_bytenr, 4096,
-		      BTRFS_FILE_EXTENT_REG, BTRFS_COMPRESS_ZLIB, slot);
+	insert_extent(root, offset, 2 * sectorsize, 2 * sectorsize, 0,
+		      disk_bytenr, sectorsize, BTRFS_FILE_EXTENT_REG,
+		      BTRFS_COMPRESS_ZLIB, slot);
 	slot++;
-	offset += 8192;
+	offset += 2 * sectorsize;
 	/* No merges */
-	disk_bytenr += 8192;
+	disk_bytenr += 2 * sectorsize;
 
 	/* Now a split compressed extent */
-	insert_extent(root, offset, 4096, 16384, 0, disk_bytenr, 4096,
-		      BTRFS_FILE_EXTENT_REG, BTRFS_COMPRESS_ZLIB, slot);
+	insert_extent(root, offset, sectorsize, 4 * sectorsize, 0, disk_bytenr,
+		      sectorsize, BTRFS_FILE_EXTENT_REG,
+		      BTRFS_COMPRESS_ZLIB, slot);
 	slot++;
-	offset += 4096;
-	insert_extent(root, offset, 4096, 4096, 0, disk_bytenr + 4096, 4096,
+	offset += sectorsize;
+	insert_extent(root, offset, sectorsize, sectorsize, 0,
+		      disk_bytenr + sectorsize, sectorsize,
 		      BTRFS_FILE_EXTENT_REG, 0, slot);
 	slot++;
-	offset += 4096;
-	insert_extent(root, offset, 8192, 16384, 8192, disk_bytenr, 4096,
+	offset += sectorsize;
+	insert_extent(root, offset, 2 * sectorsize, 4 * sectorsize,
+		      2 * sectorsize, disk_bytenr, sectorsize,
 		      BTRFS_FILE_EXTENT_REG, BTRFS_COMPRESS_ZLIB, slot);
 	slot++;
-	offset += 8192;
-	disk_bytenr += 8192;
+	offset += 2 * sectorsize;
+	disk_bytenr += 2 * sectorsize;
 
 	/* Now extents that have a hole but no hole extent */
-	insert_extent(root, offset, 4096, 4096, 0, disk_bytenr, 4096,
-		      BTRFS_FILE_EXTENT_REG, 0, slot);
+	insert_extent(root, offset, sectorsize, sectorsize, 0, disk_bytenr,
+		      sectorsize, BTRFS_FILE_EXTENT_REG, 0, slot);
 	slot++;
-	offset += 16384;
-	disk_bytenr += 4096;
-	insert_extent(root, offset, 4096, 4096, 0, disk_bytenr, 4096,
-		      BTRFS_FILE_EXTENT_REG, 0, slot);
+	offset += 4 * sectorsize;
+	disk_bytenr += sectorsize;
+	insert_extent(root, offset, sectorsize, sectorsize, 0, disk_bytenr,
+		      sectorsize, BTRFS_FILE_EXTENT_REG, 0, slot);
 }
 
 static unsigned long prealloc_only = 0;
 static unsigned long compressed_only = 0;
 static unsigned long vacancy_only = 0;
 
-static noinline int test_btrfs_get_extent(void)
+static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
 {
 	struct inode *inode = NULL;
 	struct btrfs_root *root = NULL;
@@ -240,7 +248,7 @@ static noinline int test_btrfs_get_extent(void)
 	BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID;
 	BTRFS_I(inode)->location.offset = 0;
 
-	root = btrfs_alloc_dummy_root();
+	root = btrfs_alloc_dummy_root(sectorsize, nodesize);
 	if (IS_ERR(root)) {
 		test_msg("Couldn't allocate root\n");
 		goto out;
@@ -256,7 +264,7 @@ static noinline int test_btrfs_get_extent(void)
 		goto out;
 	}
 
-	root->node = alloc_dummy_extent_buffer(NULL, 4096);
+	root->node = alloc_dummy_extent_buffer(NULL, nodesize, nodesize);
 	if (!root->node) {
 		test_msg("Couldn't allocate dummy buffer\n");
 		goto out;
@@ -273,7 +281,7 @@ static noinline int test_btrfs_get_extent(void)
 
 	/* First with no extents */
 	BTRFS_I(inode)->root = root;
-	em = btrfs_get_extent(inode, NULL, 0, 0, 4096, 0);
+	em = btrfs_get_extent(inode, NULL, 0, 0, sectorsize, 0);
 	if (IS_ERR(em)) {
 		em = NULL;
 		test_msg("Got an error when we shouldn't have\n");
@@ -295,7 +303,7 @@ static noinline int test_btrfs_get_extent(void)
 	 * setup_file_extents, so if you change anything there you need to
 	 * update the comment and update the expected values below.
 	 */
-	setup_file_extents(root);
+	setup_file_extents(root, sectorsize);
 
 	em = btrfs_get_extent(inode, NULL, 0, 0, (u64)-1, 0);
 	if (IS_ERR(em)) {
@@ -318,7 +326,7 @@ static noinline int test_btrfs_get_extent(void)
 	offset = em->start + em->len;
 	free_extent_map(em);
 
-	em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -327,7 +335,8 @@ static noinline int test_btrfs_get_extent(void)
 		test_msg("Expected an inline, got %llu\n", em->block_start);
 		goto out;
 	}
-	if (em->start != offset || em->len != 4091) {
+
+	if (em->start != offset || em->len != (sectorsize - 5)) {
 		test_msg("Unexpected extent wanted start %llu len 1, got start "
 			 "%llu len %llu\n", offset, em->start, em->len);
 		goto out;
@@ -344,7 +353,7 @@ static noinline int test_btrfs_get_extent(void)
 	offset = em->start + em->len;
 	free_extent_map(em);
 
-	em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -366,7 +375,7 @@ static noinline int test_btrfs_get_extent(void)
 	free_extent_map(em);
 
 	/* Regular extent */
-	em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -375,7 +384,7 @@ static noinline int test_btrfs_get_extent(void)
 		test_msg("Expected a real extent, got %llu\n", em->block_start);
 		goto out;
 	}
-	if (em->start != offset || em->len != 4095) {
+	if (em->start != offset || em->len != sectorsize - 1) {
 		test_msg("Unexpected extent wanted start %llu len 4095, got "
 			 "start %llu len %llu\n", offset, em->start, em->len);
 		goto out;
@@ -393,7 +402,7 @@ static noinline int test_btrfs_get_extent(void)
 	free_extent_map(em);
 
 	/* The next 3 are split extents */
-	em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -402,9 +411,10 @@ static noinline int test_btrfs_get_extent(void)
 		test_msg("Expected a real extent, got %llu\n", em->block_start);
 		goto out;
 	}
-	if (em->start != offset || em->len != 4096) {
-		test_msg("Unexpected extent wanted start %llu len 4096, got "
-			 "start %llu len %llu\n", offset, em->start, em->len);
+	if (em->start != offset || em->len != sectorsize) {
+		test_msg("Unexpected extent start %llu len %u, "
+			"got start %llu len %llu\n",
+			offset, sectorsize, em->start, em->len);
 		goto out;
 	}
 	if (em->flags != 0) {
@@ -421,7 +431,7 @@ static noinline int test_btrfs_get_extent(void)
 	offset = em->start + em->len;
 	free_extent_map(em);
 
-	em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -430,9 +440,10 @@ static noinline int test_btrfs_get_extent(void)
 		test_msg("Expected a hole, got %llu\n", em->block_start);
 		goto out;
 	}
-	if (em->start != offset || em->len != 4096) {
-		test_msg("Unexpected extent wanted start %llu len 4096, got "
-			 "start %llu len %llu\n", offset, em->start, em->len);
+	if (em->start != offset || em->len != sectorsize) {
+		test_msg("Unexpected extent wanted start %llu len %u, "
+			"got start %llu len %llu\n",
+			offset, sectorsize, em->start, em->len);
 		goto out;
 	}
 	if (em->flags != 0) {
@@ -442,7 +453,7 @@ static noinline int test_btrfs_get_extent(void)
 	offset = em->start + em->len;
 	free_extent_map(em);
 
-	em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -451,9 +462,10 @@ static noinline int test_btrfs_get_extent(void)
 		test_msg("Expected a real extent, got %llu\n", em->block_start);
 		goto out;
 	}
-	if (em->start != offset || em->len != 8192) {
-		test_msg("Unexpected extent wanted start %llu len 8192, got "
-			 "start %llu len %llu\n", offset, em->start, em->len);
+	if (em->start != offset || em->len != 2 * sectorsize) {
+		test_msg("Unexpected extent wanted start %llu len %u, "
+			"got start %llu len %llu\n",
+			offset, 2 * sectorsize, em->start, em->len);
 		goto out;
 	}
 	if (em->flags != 0) {
@@ -475,7 +487,7 @@ static noinline int test_btrfs_get_extent(void)
 	free_extent_map(em);
 
 	/* Prealloc extent */
-	em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -484,9 +496,10 @@ static noinline int test_btrfs_get_extent(void)
 		test_msg("Expected a real extent, got %llu\n", em->block_start);
 		goto out;
 	}
-	if (em->start != offset || em->len != 4096) {
-		test_msg("Unexpected extent wanted start %llu len 4096, got "
-			 "start %llu len %llu\n", offset, em->start, em->len);
+	if (em->start != offset || em->len != sectorsize) {
+		test_msg("Unexpected extent wanted start %llu len %u, "
+			"got start %llu len %llu\n",
+			offset, sectorsize, em->start, em->len);
 		goto out;
 	}
 	if (em->flags != prealloc_only) {
@@ -503,7 +516,7 @@ static noinline int test_btrfs_get_extent(void)
 	free_extent_map(em);
 
 	/* The next 3 are a half written prealloc extent */
-	em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -512,9 +525,10 @@ static noinline int test_btrfs_get_extent(void)
 		test_msg("Expected a real extent, got %llu\n", em->block_start);
 		goto out;
 	}
-	if (em->start != offset || em->len != 4096) {
-		test_msg("Unexpected extent wanted start %llu len 4096, got "
-			 "start %llu len %llu\n", offset, em->start, em->len);
+	if (em->start != offset || em->len != sectorsize) {
+		test_msg("Unexpected extent wanted start %llu len %u, "
+			"got start %llu len %llu\n",
+			offset, sectorsize, em->start, em->len);
 		goto out;
 	}
 	if (em->flags != prealloc_only) {
@@ -532,7 +546,7 @@ static noinline int test_btrfs_get_extent(void)
 	offset = em->start + em->len;
 	free_extent_map(em);
 
-	em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -541,9 +555,10 @@ static noinline int test_btrfs_get_extent(void)
 		test_msg("Expected a real extent, got %llu\n", em->block_start);
 		goto out;
 	}
-	if (em->start != offset || em->len != 4096) {
-		test_msg("Unexpected extent wanted start %llu len 4096, got "
-			 "start %llu len %llu\n", offset, em->start, em->len);
+	if (em->start != offset || em->len != sectorsize) {
+		test_msg("Unexpected extent wanted start %llu len %u, "
+			"got start %llu len %llu\n",
+			offset, sectorsize, em->start, em->len);
 		goto out;
 	}
 	if (em->flags != 0) {
@@ -564,7 +579,7 @@ static noinline int test_btrfs_get_extent(void)
 	offset = em->start + em->len;
 	free_extent_map(em);
 
-	em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -573,9 +588,10 @@ static noinline int test_btrfs_get_extent(void)
 		test_msg("Expected a real extent, got %llu\n", em->block_start);
 		goto out;
 	}
-	if (em->start != offset || em->len != 8192) {
-		test_msg("Unexpected extent wanted start %llu len 8192, got "
-			 "start %llu len %llu\n", offset, em->start, em->len);
+	if (em->start != offset || em->len != 2 * sectorsize) {
+		test_msg("Unexpected extent wanted start %llu len %u, "
+			"got start %llu len %llu\n",
+			offset, 2 * sectorsize, em->start, em->len);
 		goto out;
 	}
 	if (em->flags != prealloc_only) {
@@ -598,7 +614,7 @@ static noinline int test_btrfs_get_extent(void)
 	free_extent_map(em);
 
 	/* Now for the compressed extent */
-	em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -607,9 +623,10 @@ static noinline int test_btrfs_get_extent(void)
 		test_msg("Expected a real extent, got %llu\n", em->block_start);
 		goto out;
 	}
-	if (em->start != offset || em->len != 8192) {
-		test_msg("Unexpected extent wanted start %llu len 8192, got "
-			 "start %llu len %llu\n", offset, em->start, em->len);
+	if (em->start != offset || em->len != 2 * sectorsize) {
+		test_msg("Unexpected extent wanted start %llu len %u,"
+			"got start %llu len %llu\n",
+			offset, 2 * sectorsize, em->start, em->len);
 		goto out;
 	}
 	if (em->flags != compressed_only) {
@@ -631,7 +648,7 @@ static noinline int test_btrfs_get_extent(void)
 	free_extent_map(em);
 
 	/* Split compressed extent */
-	em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -640,9 +657,10 @@ static noinline int test_btrfs_get_extent(void)
 		test_msg("Expected a real extent, got %llu\n", em->block_start);
 		goto out;
 	}
-	if (em->start != offset || em->len != 4096) {
-		test_msg("Unexpected extent wanted start %llu len 4096, got "
-			 "start %llu len %llu\n", offset, em->start, em->len);
+	if (em->start != offset || em->len != sectorsize) {
+		test_msg("Unexpected extent wanted start %llu len %u,"
+			"got start %llu len %llu\n",
+			offset, sectorsize, em->start, em->len);
 		goto out;
 	}
 	if (em->flags != compressed_only) {
@@ -665,7 +683,7 @@ static noinline int test_btrfs_get_extent(void)
 	offset = em->start + em->len;
 	free_extent_map(em);
 
-	em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -674,9 +692,10 @@ static noinline int test_btrfs_get_extent(void)
 		test_msg("Expected a real extent, got %llu\n", em->block_start);
 		goto out;
 	}
-	if (em->start != offset || em->len != 4096) {
-		test_msg("Unexpected extent wanted start %llu len 4096, got "
-			 "start %llu len %llu\n", offset, em->start, em->len);
+	if (em->start != offset || em->len != sectorsize) {
+		test_msg("Unexpected extent wanted start %llu len %u, "
+			"got start %llu len %llu\n",
+			offset, sectorsize, em->start, em->len);
 		goto out;
 	}
 	if (em->flags != 0) {
@@ -691,7 +710,7 @@ static noinline int test_btrfs_get_extent(void)
 	offset = em->start + em->len;
 	free_extent_map(em);
 
-	em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -701,9 +720,10 @@ static noinline int test_btrfs_get_extent(void)
 			 disk_bytenr, em->block_start);
 		goto out;
 	}
-	if (em->start != offset || em->len != 8192) {
-		test_msg("Unexpected extent wanted start %llu len 8192, got "
-			 "start %llu len %llu\n", offset, em->start, em->len);
+	if (em->start != offset || em->len != 2 * sectorsize) {
+		test_msg("Unexpected extent wanted start %llu len %u, "
+			"got start %llu len %llu\n",
+			offset, 2 * sectorsize, em->start, em->len);
 		goto out;
 	}
 	if (em->flags != compressed_only) {
@@ -725,7 +745,7 @@ static noinline int test_btrfs_get_extent(void)
 	free_extent_map(em);
 
 	/* A hole between regular extents but no hole extent */
-	em = btrfs_get_extent(inode, NULL, 0, offset + 6, 4096, 0);
+	em = btrfs_get_extent(inode, NULL, 0, offset + 6, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -734,9 +754,10 @@ static noinline int test_btrfs_get_extent(void)
 		test_msg("Expected a real extent, got %llu\n", em->block_start);
 		goto out;
 	}
-	if (em->start != offset || em->len != 4096) {
-		test_msg("Unexpected extent wanted start %llu len 4096, got "
-			 "start %llu len %llu\n", offset, em->start, em->len);
+	if (em->start != offset || em->len != sectorsize) {
+		test_msg("Unexpected extent wanted start %llu len %u, "
+			"got start %llu len %llu\n",
+			offset, sectorsize, em->start, em->len);
 		goto out;
 	}
 	if (em->flags != 0) {
@@ -765,9 +786,10 @@ static noinline int test_btrfs_get_extent(void)
 	 * length of the actual hole, if this changes we'll have to change this
 	 * test.
 	 */
-	if (em->start != offset || em->len != 12288) {
-		test_msg("Unexpected extent wanted start %llu len 12288, got "
-			 "start %llu len %llu\n", offset, em->start, em->len);
+	if (em->start != offset || em->len != 3 * sectorsize) {
+		test_msg("Unexpected extent wanted start %llu len %u, "
+			"got start %llu len %llu\n",
+			offset, 3 * sectorsize, em->start, em->len);
 		goto out;
 	}
 	if (em->flags != vacancy_only) {
@@ -783,7 +805,7 @@ static noinline int test_btrfs_get_extent(void)
 	offset = em->start + em->len;
 	free_extent_map(em);
 
-	em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -792,9 +814,10 @@ static noinline int test_btrfs_get_extent(void)
 		test_msg("Expected a real extent, got %llu\n", em->block_start);
 		goto out;
 	}
-	if (em->start != offset || em->len != 4096) {
-		test_msg("Unexpected extent wanted start %llu len 4096, got "
-			 "start %llu len %llu\n", offset, em->start, em->len);
+	if (em->start != offset || em->len != sectorsize) {
+		test_msg("Unexpected extent wanted start %llu len %u,"
+			"got start %llu len %llu\n",
+			offset, sectorsize, em->start, em->len);
 		goto out;
 	}
 	if (em->flags != 0) {
@@ -815,7 +838,7 @@ out:
 	return ret;
 }
 
-static int test_hole_first(void)
+static int test_hole_first(u32 sectorsize, u32 nodesize)
 {
 	struct inode *inode = NULL;
 	struct btrfs_root *root = NULL;
@@ -832,7 +855,7 @@ static int test_hole_first(void)
 	BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID;
 	BTRFS_I(inode)->location.offset = 0;
 
-	root = btrfs_alloc_dummy_root();
+	root = btrfs_alloc_dummy_root(sectorsize, nodesize);
 	if (IS_ERR(root)) {
 		test_msg("Couldn't allocate root\n");
 		goto out;
@@ -844,7 +867,7 @@ static int test_hole_first(void)
 		goto out;
 	}
 
-	root->node = alloc_dummy_extent_buffer(NULL, 4096);
+	root->node = alloc_dummy_extent_buffer(NULL, nodesize, nodesize);
 	if (!root->node) {
 		test_msg("Couldn't allocate dummy buffer\n");
 		goto out;
@@ -861,9 +884,9 @@ static int test_hole_first(void)
 	 * btrfs_get_extent.
 	 */
 	insert_inode_item_key(root);
-	insert_extent(root, 4096, 4096, 4096, 0, 4096, 4096,
-		      BTRFS_FILE_EXTENT_REG, 0, 1);
-	em = btrfs_get_extent(inode, NULL, 0, 0, 8192, 0);
+	insert_extent(root, sectorsize, sectorsize, sectorsize, 0, sectorsize,
+		      sectorsize, BTRFS_FILE_EXTENT_REG, 0, 1);
+	em = btrfs_get_extent(inode, NULL, 0, 0, 2 * sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -872,9 +895,10 @@ static int test_hole_first(void)
 		test_msg("Expected a hole, got %llu\n", em->block_start);
 		goto out;
 	}
-	if (em->start != 0 || em->len != 4096) {
-		test_msg("Unexpected extent wanted start 0 len 4096, got start "
-			 "%llu len %llu\n", em->start, em->len);
+	if (em->start != 0 || em->len != sectorsize) {
+		test_msg("Unexpected extent wanted start 0 len %u, "
+			"got start %llu len %llu\n",
+			sectorsize, em->start, em->len);
 		goto out;
 	}
 	if (em->flags != vacancy_only) {
@@ -884,18 +908,19 @@ static int test_hole_first(void)
 	}
 	free_extent_map(em);
 
-	em = btrfs_get_extent(inode, NULL, 0, 4096, 8192, 0);
+	em = btrfs_get_extent(inode, NULL, 0, sectorsize, 2 * sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
 	}
-	if (em->block_start != 4096) {
+	if (em->block_start != sectorsize) {
 		test_msg("Expected a real extent, got %llu\n", em->block_start);
 		goto out;
 	}
-	if (em->start != 4096 || em->len != 4096) {
-		test_msg("Unexpected extent wanted start 4096 len 4096, got "
-			 "start %llu len %llu\n", em->start, em->len);
+	if (em->start != sectorsize || em->len != sectorsize) {
+		test_msg("Unexpected extent wanted start %u len %u, "
+			"got start %llu len %llu\n",
+			sectorsize, sectorsize, em->start, em->len);
 		goto out;
 	}
 	if (em->flags != 0) {
@@ -912,7 +937,7 @@ out:
 	return ret;
 }
 
-static int test_extent_accounting(void)
+static int test_extent_accounting(u32 sectorsize, u32 nodesize)
 {
 	struct inode *inode = NULL;
 	struct btrfs_root *root = NULL;
@@ -924,7 +949,7 @@ static int test_extent_accounting(void)
 		return ret;
 	}
 
-	root = btrfs_alloc_dummy_root();
+	root = btrfs_alloc_dummy_root(sectorsize, nodesize);
 	if (IS_ERR(root)) {
 		test_msg("Couldn't allocate root\n");
 		goto out;
@@ -954,10 +979,11 @@ static int test_extent_accounting(void)
 		goto out;
 	}
 
-	/* [BTRFS_MAX_EXTENT_SIZE][4k] */
+	/* [BTRFS_MAX_EXTENT_SIZE][sectorsize] */
 	BTRFS_I(inode)->outstanding_extents++;
 	ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE,
-					BTRFS_MAX_EXTENT_SIZE + 4095, NULL);
+					BTRFS_MAX_EXTENT_SIZE + sectorsize - 1,
+					NULL);
 	if (ret) {
 		test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
 		goto out;
@@ -969,10 +995,10 @@ static int test_extent_accounting(void)
 		goto out;
 	}
 
-	/* [BTRFS_MAX_EXTENT_SIZE/2][4K HOLE][the rest] */
+	/* [BTRFS_MAX_EXTENT_SIZE/2][sectorsize HOLE][the rest] */
 	ret = clear_extent_bit(&BTRFS_I(inode)->io_tree,
 			       BTRFS_MAX_EXTENT_SIZE >> 1,
-			       (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095,
+			       (BTRFS_MAX_EXTENT_SIZE >> 1) + sectorsize - 1,
 			       EXTENT_DELALLOC | EXTENT_DIRTY |
 			       EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0,
 			       NULL, GFP_KERNEL);
@@ -987,10 +1013,11 @@ static int test_extent_accounting(void)
 		goto out;
 	}
 
-	/* [BTRFS_MAX_EXTENT_SIZE][4K] */
+	/* [BTRFS_MAX_EXTENT_SIZE][sectorsize] */
 	BTRFS_I(inode)->outstanding_extents++;
 	ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE >> 1,
-					(BTRFS_MAX_EXTENT_SIZE >> 1) + 4095,
+					(BTRFS_MAX_EXTENT_SIZE >> 1)
+					+ sectorsize - 1,
 					NULL);
 	if (ret) {
 		test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
@@ -1004,16 +1031,17 @@ static int test_extent_accounting(void)
 	}
 
 	/*
-	 * [BTRFS_MAX_EXTENT_SIZE+4K][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4K]
+	 * [BTRFS_MAX_EXTENT_SIZE+sectorsize][sectorsize HOLE][BTRFS_MAX_EXTENT_SIZE+sectorsize]
 	 *
 	 * I'm artificially adding 2 to outstanding_extents because in the
 	 * buffered IO case we'd add things up as we go, but I don't feel like
 	 * doing that here, this isn't the interesting case we want to test.
 	 */
 	BTRFS_I(inode)->outstanding_extents += 2;
-	ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE + 8192,
-					(BTRFS_MAX_EXTENT_SIZE << 1) + 12287,
-					NULL);
+	ret = btrfs_set_extent_delalloc(inode,
+			BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize,
+			(BTRFS_MAX_EXTENT_SIZE << 1) + 3 * sectorsize - 1,
+			NULL);
 	if (ret) {
 		test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
 		goto out;
@@ -1025,10 +1053,13 @@ static int test_extent_accounting(void)
 		goto out;
 	}
 
-	/* [BTRFS_MAX_EXTENT_SIZE+4k][4k][BTRFS_MAX_EXTENT_SIZE+4k] */
+	/*
+	* [BTRFS_MAX_EXTENT_SIZE+sectorsize][sectorsize][BTRFS_MAX_EXTENT_SIZE+sectorsize]
+	*/
 	BTRFS_I(inode)->outstanding_extents++;
-	ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE+4096,
-					BTRFS_MAX_EXTENT_SIZE+8191, NULL);
+	ret = btrfs_set_extent_delalloc(inode,
+			BTRFS_MAX_EXTENT_SIZE + sectorsize,
+			BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, NULL);
 	if (ret) {
 		test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
 		goto out;
@@ -1042,8 +1073,8 @@ static int test_extent_accounting(void)
 
 	/* [BTRFS_MAX_EXTENT_SIZE+4k][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4k] */
 	ret = clear_extent_bit(&BTRFS_I(inode)->io_tree,
-			       BTRFS_MAX_EXTENT_SIZE+4096,
-			       BTRFS_MAX_EXTENT_SIZE+8191,
+			       BTRFS_MAX_EXTENT_SIZE + sectorsize,
+			       BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1,
 			       EXTENT_DIRTY | EXTENT_DELALLOC |
 			       EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
 			       NULL, GFP_KERNEL);
@@ -1063,8 +1094,9 @@ static int test_extent_accounting(void)
 	 * might fail and I'd rather satisfy my paranoia at this point.
 	 */
 	BTRFS_I(inode)->outstanding_extents++;
-	ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE+4096,
-					BTRFS_MAX_EXTENT_SIZE+8191, NULL);
+	ret = btrfs_set_extent_delalloc(inode,
+			BTRFS_MAX_EXTENT_SIZE + sectorsize,
+			BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, NULL);
 	if (ret) {
 		test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
 		goto out;
@@ -1103,7 +1135,7 @@ out:
 	return ret;
 }
 
-int btrfs_test_inodes(void)
+int btrfs_test_inodes(u32 sectorsize, u32 nodesize)
 {
 	int ret;
 
@@ -1112,13 +1144,13 @@ int btrfs_test_inodes(void)
 	set_bit(EXTENT_FLAG_PREALLOC, &prealloc_only);
 
 	test_msg("Running btrfs_get_extent tests\n");
-	ret = test_btrfs_get_extent();
+	ret = test_btrfs_get_extent(sectorsize, nodesize);
 	if (ret)
 		return ret;
 	test_msg("Running hole first btrfs_get_extent test\n");
-	ret = test_hole_first();
+	ret = test_hole_first(sectorsize, nodesize);
 	if (ret)
 		return ret;
 	test_msg("Running outstanding_extents tests\n");
-	return test_extent_accounting();
+	return test_extent_accounting(sectorsize, nodesize);
 }
diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c
index 8aa4ded31326..57a12c0d680b 100644
--- a/fs/btrfs/tests/qgroup-tests.c
+++ b/fs/btrfs/tests/qgroup-tests.c
@@ -16,6 +16,7 @@
  * Boston, MA 021110-1307, USA.
  */
 
+#include <linux/types.h>
 #include "btrfs-tests.h"
 #include "../ctree.h"
 #include "../transaction.h"
@@ -216,7 +217,8 @@ static int remove_extent_ref(struct btrfs_root *root, u64 bytenr,
 	return ret;
 }
 
-static int test_no_shared_qgroup(struct btrfs_root *root)
+static int test_no_shared_qgroup(struct btrfs_root *root,
+		u32 sectorsize, u32 nodesize)
 {
 	struct btrfs_trans_handle trans;
 	struct btrfs_fs_info *fs_info = root->fs_info;
@@ -227,7 +229,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root)
 	btrfs_init_dummy_trans(&trans);
 
 	test_msg("Qgroup basic add\n");
-	ret = btrfs_create_qgroup(NULL, fs_info, 5);
+	ret = btrfs_create_qgroup(NULL, fs_info, BTRFS_FS_TREE_OBJECTID);
 	if (ret) {
 		test_msg("Couldn't create a qgroup %d\n", ret);
 		return ret;
@@ -238,18 +240,19 @@ static int test_no_shared_qgroup(struct btrfs_root *root)
 	 * we can only call btrfs_qgroup_account_extent() directly to test
 	 * quota.
 	 */
-	ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots);
+	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots);
 	if (ret) {
 		ulist_free(old_roots);
 		test_msg("Couldn't find old roots: %d\n", ret);
 		return ret;
 	}
 
-	ret = insert_normal_tree_ref(root, 4096, 4096, 0, 5);
+	ret = insert_normal_tree_ref(root, nodesize, nodesize, 0,
+				BTRFS_FS_TREE_OBJECTID);
 	if (ret)
 		return ret;
 
-	ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots);
+	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots);
 	if (ret) {
 		ulist_free(old_roots);
 		ulist_free(new_roots);
@@ -257,32 +260,33 @@ static int test_no_shared_qgroup(struct btrfs_root *root)
 		return ret;
 	}
 
-	ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096,
-					  old_roots, new_roots);
+	ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
+					  nodesize, old_roots, new_roots);
 	if (ret) {
 		test_msg("Couldn't account space for a qgroup %d\n", ret);
 		return ret;
 	}
 
-	if (btrfs_verify_qgroup_counts(fs_info, 5, 4096, 4096)) {
+	if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID,
+				nodesize, nodesize)) {
 		test_msg("Qgroup counts didn't match expected values\n");
 		return -EINVAL;
 	}
 	old_roots = NULL;
 	new_roots = NULL;
 
-	ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots);
+	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots);
 	if (ret) {
 		ulist_free(old_roots);
 		test_msg("Couldn't find old roots: %d\n", ret);
 		return ret;
 	}
 
-	ret = remove_extent_item(root, 4096, 4096);
+	ret = remove_extent_item(root, nodesize, nodesize);
 	if (ret)
 		return -EINVAL;
 
-	ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots);
+	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots);
 	if (ret) {
 		ulist_free(old_roots);
 		ulist_free(new_roots);
@@ -290,14 +294,14 @@ static int test_no_shared_qgroup(struct btrfs_root *root)
 		return ret;
 	}
 
-	ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096,
-					  old_roots, new_roots);
+	ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
+					  nodesize, old_roots, new_roots);
 	if (ret) {
 		test_msg("Couldn't account space for a qgroup %d\n", ret);
 		return -EINVAL;
 	}
 
-	if (btrfs_verify_qgroup_counts(fs_info, 5, 0, 0)) {
+	if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID, 0, 0)) {
 		test_msg("Qgroup counts didn't match expected values\n");
 		return -EINVAL;
 	}
@@ -310,7 +314,8 @@ static int test_no_shared_qgroup(struct btrfs_root *root)
  * right, also remove one of the roots and make sure the exclusive count is
  * adjusted properly.
  */
-static int test_multiple_refs(struct btrfs_root *root)
+static int test_multiple_refs(struct btrfs_root *root,
+		u32 sectorsize, u32 nodesize)
 {
 	struct btrfs_trans_handle trans;
 	struct btrfs_fs_info *fs_info = root->fs_info;
@@ -322,25 +327,29 @@ static int test_multiple_refs(struct btrfs_root *root)
 
 	test_msg("Qgroup multiple refs test\n");
 
-	/* We have 5 created already from the previous test */
-	ret = btrfs_create_qgroup(NULL, fs_info, 256);
+	/*
+	 * We have BTRFS_FS_TREE_OBJECTID created already from the
+	 * previous test.
+	 */
+	ret = btrfs_create_qgroup(NULL, fs_info, BTRFS_FIRST_FREE_OBJECTID);
 	if (ret) {
 		test_msg("Couldn't create a qgroup %d\n", ret);
 		return ret;
 	}
 
-	ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots);
+	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots);
 	if (ret) {
 		ulist_free(old_roots);
 		test_msg("Couldn't find old roots: %d\n", ret);
 		return ret;
 	}
 
-	ret = insert_normal_tree_ref(root, 4096, 4096, 0, 5);
+	ret = insert_normal_tree_ref(root, nodesize, nodesize, 0,
+				BTRFS_FS_TREE_OBJECTID);
 	if (ret)
 		return ret;
 
-	ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots);
+	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots);
 	if (ret) {
 		ulist_free(old_roots);
 		ulist_free(new_roots);
@@ -348,30 +357,32 @@ static int test_multiple_refs(struct btrfs_root *root)
 		return ret;
 	}
 
-	ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096,
-					  old_roots, new_roots);
+	ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
+					  nodesize, old_roots, new_roots);
 	if (ret) {
 		test_msg("Couldn't account space for a qgroup %d\n", ret);
 		return ret;
 	}
 
-	if (btrfs_verify_qgroup_counts(fs_info, 5, 4096, 4096)) {
+	if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID,
+				       nodesize, nodesize)) {
 		test_msg("Qgroup counts didn't match expected values\n");
 		return -EINVAL;
 	}
 
-	ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots);
+	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots);
 	if (ret) {
 		ulist_free(old_roots);
 		test_msg("Couldn't find old roots: %d\n", ret);
 		return ret;
 	}
 
-	ret = add_tree_ref(root, 4096, 4096, 0, 256);
+	ret = add_tree_ref(root, nodesize, nodesize, 0,
+			BTRFS_FIRST_FREE_OBJECTID);
 	if (ret)
 		return ret;
 
-	ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots);
+	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots);
 	if (ret) {
 		ulist_free(old_roots);
 		ulist_free(new_roots);
@@ -379,35 +390,38 @@ static int test_multiple_refs(struct btrfs_root *root)
 		return ret;
 	}
 
-	ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096,
-					  old_roots, new_roots);
+	ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
+					  nodesize, old_roots, new_roots);
 	if (ret) {
 		test_msg("Couldn't account space for a qgroup %d\n", ret);
 		return ret;
 	}
 
-	if (btrfs_verify_qgroup_counts(fs_info, 5, 4096, 0)) {
+	if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID,
+					nodesize, 0)) {
 		test_msg("Qgroup counts didn't match expected values\n");
 		return -EINVAL;
 	}
 
-	if (btrfs_verify_qgroup_counts(fs_info, 256, 4096, 0)) {
+	if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FIRST_FREE_OBJECTID,
+					nodesize, 0)) {
 		test_msg("Qgroup counts didn't match expected values\n");
 		return -EINVAL;
 	}
 
-	ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots);
+	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots);
 	if (ret) {
 		ulist_free(old_roots);
 		test_msg("Couldn't find old roots: %d\n", ret);
 		return ret;
 	}
 
-	ret = remove_extent_ref(root, 4096, 4096, 0, 256);
+	ret = remove_extent_ref(root, nodesize, nodesize, 0,
+				BTRFS_FIRST_FREE_OBJECTID);
 	if (ret)
 		return ret;
 
-	ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots);
+	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots);
 	if (ret) {
 		ulist_free(old_roots);
 		ulist_free(new_roots);
@@ -415,19 +429,21 @@ static int test_multiple_refs(struct btrfs_root *root)
 		return ret;
 	}
 
-	ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096,
-					  old_roots, new_roots);
+	ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
+					  nodesize, old_roots, new_roots);
 	if (ret) {
 		test_msg("Couldn't account space for a qgroup %d\n", ret);
 		return ret;
 	}
 
-	if (btrfs_verify_qgroup_counts(fs_info, 256, 0, 0)) {
+	if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FIRST_FREE_OBJECTID,
+					0, 0)) {
 		test_msg("Qgroup counts didn't match expected values\n");
 		return -EINVAL;
 	}
 
-	if (btrfs_verify_qgroup_counts(fs_info, 5, 4096, 4096)) {
+	if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID,
+					nodesize, nodesize)) {
 		test_msg("Qgroup counts didn't match expected values\n");
 		return -EINVAL;
 	}
@@ -435,13 +451,13 @@ static int test_multiple_refs(struct btrfs_root *root)
 	return 0;
 }
 
-int btrfs_test_qgroups(void)
+int btrfs_test_qgroups(u32 sectorsize, u32 nodesize)
 {
 	struct btrfs_root *root;
 	struct btrfs_root *tmp_root;
 	int ret = 0;
 
-	root = btrfs_alloc_dummy_root();
+	root = btrfs_alloc_dummy_root(sectorsize, nodesize);
 	if (IS_ERR(root)) {
 		test_msg("Couldn't allocate root\n");
 		return PTR_ERR(root);
@@ -468,7 +484,8 @@ int btrfs_test_qgroups(void)
 	 * Can't use bytenr 0, some things freak out
 	 * *cough*backref walking code*cough*
 	 */
-	root->node = alloc_test_extent_buffer(root->fs_info, 4096);
+	root->node = alloc_test_extent_buffer(root->fs_info, nodesize,
+					nodesize);
 	if (!root->node) {
 		test_msg("Couldn't allocate dummy buffer\n");
 		ret = -ENOMEM;
@@ -476,16 +493,16 @@ int btrfs_test_qgroups(void)
 	}
 	btrfs_set_header_level(root->node, 0);
 	btrfs_set_header_nritems(root->node, 0);
-	root->alloc_bytenr += 8192;
+	root->alloc_bytenr += 2 * nodesize;
 
-	tmp_root = btrfs_alloc_dummy_root();
+	tmp_root = btrfs_alloc_dummy_root(sectorsize, nodesize);
 	if (IS_ERR(tmp_root)) {
 		test_msg("Couldn't allocate a fs root\n");
 		ret = PTR_ERR(tmp_root);
 		goto out;
 	}
 
-	tmp_root->root_key.objectid = 5;
+	tmp_root->root_key.objectid = BTRFS_FS_TREE_OBJECTID;
 	root->fs_info->fs_root = tmp_root;
 	ret = btrfs_insert_fs_root(root->fs_info, tmp_root);
 	if (ret) {
@@ -493,14 +510,14 @@ int btrfs_test_qgroups(void)
 		goto out;
 	}
 
-	tmp_root = btrfs_alloc_dummy_root();
+	tmp_root = btrfs_alloc_dummy_root(sectorsize, nodesize);
 	if (IS_ERR(tmp_root)) {
 		test_msg("Couldn't allocate a fs root\n");
 		ret = PTR_ERR(tmp_root);
 		goto out;
 	}
 
-	tmp_root->root_key.objectid = 256;
+	tmp_root->root_key.objectid = BTRFS_FIRST_FREE_OBJECTID;
 	ret = btrfs_insert_fs_root(root->fs_info, tmp_root);
 	if (ret) {
 		test_msg("Couldn't insert fs root %d\n", ret);
@@ -508,10 +525,10 @@ int btrfs_test_qgroups(void)
 	}
 
 	test_msg("Running qgroup tests\n");
-	ret = test_no_shared_qgroup(root);
+	ret = test_no_shared_qgroup(root, sectorsize, nodesize);
 	if (ret)
 		goto out;
-	ret = test_multiple_refs(root);
+	ret = test_multiple_refs(root, sectorsize, nodesize);
 out:
 	btrfs_free_dummy_root(root);
 	return ret;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index f6e24cb423ae..948aa186b353 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -818,6 +818,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
 {
 	struct btrfs_transaction *cur_trans = trans->transaction;
 	struct btrfs_fs_info *info = root->fs_info;
+	u64 transid = trans->transid;
 	unsigned long cur = trans->delayed_ref_updates;
 	int lock = (trans->type != TRANS_JOIN_NOLOCK);
 	int err = 0;
@@ -905,7 +906,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
 
 	kmem_cache_free(btrfs_trans_handle_cachep, trans);
 	if (must_run_delayed_refs) {
-		btrfs_async_run_delayed_refs(root, cur,
+		btrfs_async_run_delayed_refs(root, cur, transid,
 					     must_run_delayed_refs == 1);
 	}
 	return err;
@@ -1311,11 +1312,6 @@ int btrfs_defrag_root(struct btrfs_root *root)
 	return ret;
 }
 
-/* Bisesctability fixup, remove in 4.8 */
-#ifndef btrfs_std_error
-#define btrfs_std_error btrfs_handle_fs_error
-#endif
-
 /*
  * Do all special snapshot related qgroup dirty hack.
  *
@@ -1385,7 +1381,7 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans,
 	switch_commit_roots(trans->transaction, fs_info);
 	ret = btrfs_write_and_wait_transaction(trans, src);
 	if (ret)
-		btrfs_std_error(fs_info, ret,
+		btrfs_handle_fs_error(fs_info, ret,
 			"Error while writing out transaction for qgroup");
 
 out:
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 9fe0ec2bf0fe..c5abee4f01ad 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -110,7 +110,6 @@ struct btrfs_trans_handle {
 	u64 chunk_bytes_reserved;
 	unsigned long use_count;
 	unsigned long blocks_reserved;
-	unsigned long blocks_used;
 	unsigned long delayed_ref_updates;
 	struct btrfs_transaction *transaction;
 	struct btrfs_block_rsv *block_rsv;
@@ -121,6 +120,7 @@ struct btrfs_trans_handle {
 	bool can_flush_pending_bgs;
 	bool reloc_reserved;
 	bool sync;
+	bool dirty;
 	unsigned int type;
 	/*
 	 * this root is only needed to validate that the root passed to
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index b7665af471d8..c05f69a8ec42 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -2422,8 +2422,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
 		root_owner = btrfs_header_owner(parent);
 
 		next = btrfs_find_create_tree_block(root, bytenr);
-		if (!next)
-			return -ENOMEM;
+		if (IS_ERR(next))
+			return PTR_ERR(next);
 
 		if (*level == 1) {
 			ret = wc->process_func(root, next, wc, ptr_gen);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index da9e0036a864..589f128173b1 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4241,6 +4241,7 @@ int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info)
 	if (IS_ERR(uuid_root)) {
 		ret = PTR_ERR(uuid_root);
 		btrfs_abort_transaction(trans, tree_root, ret);
+		btrfs_end_transaction(trans, tree_root);
 		return ret;
 	}
 
@@ -4693,12 +4694,12 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 
 	if (type & BTRFS_BLOCK_GROUP_RAID5) {
 		raid_stripe_len = find_raid56_stripe_len(ndevs - 1,
-				 btrfs_super_stripesize(info->super_copy));
+						extent_root->stripesize);
 		data_stripes = num_stripes - 1;
 	}
 	if (type & BTRFS_BLOCK_GROUP_RAID6) {
 		raid_stripe_len = find_raid56_stripe_len(ndevs - 2,
-				 btrfs_super_stripesize(info->super_copy));
+						extent_root->stripesize);
 		data_stripes = num_stripes - 2;
 	}
 
@@ -6258,27 +6259,23 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
 	return dev;
 }
 
-static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
-			  struct extent_buffer *leaf,
-			  struct btrfs_chunk *chunk)
+/* Return -EIO if any error, otherwise return 0. */
+static int btrfs_check_chunk_valid(struct btrfs_root *root,
+				   struct extent_buffer *leaf,
+				   struct btrfs_chunk *chunk, u64 logical)
 {
-	struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
-	struct map_lookup *map;
-	struct extent_map *em;
-	u64 logical;
 	u64 length;
 	u64 stripe_len;
-	u64 devid;
-	u8 uuid[BTRFS_UUID_SIZE];
-	int num_stripes;
-	int ret;
-	int i;
+	u16 num_stripes;
+	u16 sub_stripes;
+	u64 type;
 
-	logical = key->offset;
 	length = btrfs_chunk_length(leaf, chunk);
 	stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
 	num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
-	/* Validation check */
+	sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
+	type = btrfs_chunk_type(leaf, chunk);
+
 	if (!num_stripes) {
 		btrfs_err(root->fs_info, "invalid chunk num_stripes: %u",
 			  num_stripes);
@@ -6289,6 +6286,11 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
 			  "invalid chunk logical %llu", logical);
 		return -EIO;
 	}
+	if (btrfs_chunk_sector_size(leaf, chunk) != root->sectorsize) {
+		btrfs_err(root->fs_info, "invalid chunk sectorsize %u",
+			  btrfs_chunk_sector_size(leaf, chunk));
+		return -EIO;
+	}
 	if (!length || !IS_ALIGNED(length, root->sectorsize)) {
 		btrfs_err(root->fs_info,
 			"invalid chunk length %llu", length);
@@ -6300,13 +6302,54 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
 		return -EIO;
 	}
 	if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) &
-	    btrfs_chunk_type(leaf, chunk)) {
+	    type) {
 		btrfs_err(root->fs_info, "unrecognized chunk type: %llu",
 			  ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
 			    BTRFS_BLOCK_GROUP_PROFILE_MASK) &
 			  btrfs_chunk_type(leaf, chunk));
 		return -EIO;
 	}
+	if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) ||
+	    (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < 1) ||
+	    (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) ||
+	    (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) ||
+	    (type & BTRFS_BLOCK_GROUP_DUP && num_stripes > 2) ||
+	    ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 &&
+	     num_stripes != 1)) {
+		btrfs_err(root->fs_info,
+			"invalid num_stripes:sub_stripes %u:%u for profile %llu",
+			num_stripes, sub_stripes,
+			type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
+			  struct extent_buffer *leaf,
+			  struct btrfs_chunk *chunk)
+{
+	struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
+	struct map_lookup *map;
+	struct extent_map *em;
+	u64 logical;
+	u64 length;
+	u64 stripe_len;
+	u64 devid;
+	u8 uuid[BTRFS_UUID_SIZE];
+	int num_stripes;
+	int ret;
+	int i;
+
+	logical = key->offset;
+	length = btrfs_chunk_length(leaf, chunk);
+	stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
+	num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
+
+	ret = btrfs_check_chunk_valid(root, leaf, chunk, logical);
+	if (ret)
+		return ret;
 
 	read_lock(&map_tree->map_tree.lock);
 	em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
@@ -6554,6 +6597,7 @@ int btrfs_read_sys_array(struct btrfs_root *root)
 	u32 array_size;
 	u32 len = 0;
 	u32 cur_offset;
+	u64 type;
 	struct btrfs_key key;
 
 	ASSERT(BTRFS_SUPER_INFO_SIZE <= root->nodesize);
@@ -6563,8 +6607,8 @@ int btrfs_read_sys_array(struct btrfs_root *root)
 	 * overallocate but we can keep it as-is, only the first page is used.
 	 */
 	sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET);
-	if (!sb)
-		return -ENOMEM;
+	if (IS_ERR(sb))
+		return PTR_ERR(sb);
 	set_extent_buffer_uptodate(sb);
 	btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0);
 	/*
@@ -6620,6 +6664,15 @@ int btrfs_read_sys_array(struct btrfs_root *root)
 				break;
 			}
 
+			type = btrfs_chunk_type(sb, chunk);
+			if ((type & BTRFS_BLOCK_GROUP_SYSTEM) == 0) {
+				btrfs_err(root->fs_info,
+			    "invalid chunk type %llu in sys_array at offset %u",
+					type, cur_offset);
+				ret = -EIO;
+				break;
+			}
+
 			len = btrfs_chunk_item_size(num_stripes);
 			if (cur_offset + len > array_size)
 				goto out_short_read;
@@ -6638,12 +6691,14 @@ int btrfs_read_sys_array(struct btrfs_root *root)
 		sb_array_offset += len;
 		cur_offset += len;
 	}
+	clear_extent_buffer_uptodate(sb);
 	free_extent_buffer_stale(sb);
 	return ret;
 
 out_short_read:
 	printk(KERN_ERR "BTRFS: sys_array too short to read %u bytes at offset %u\n",
 			len, cur_offset);
+	clear_extent_buffer_uptodate(sb);
 	free_extent_buffer_stale(sb);
 	return -EIO;
 }
@@ -6656,6 +6711,7 @@ int btrfs_read_chunk_tree(struct btrfs_root *root)
 	struct btrfs_key found_key;
 	int ret;
 	int slot;
+	u64 total_dev = 0;
 
 	root = root->fs_info->chunk_root;
 
@@ -6697,6 +6753,7 @@ int btrfs_read_chunk_tree(struct btrfs_root *root)
 			ret = read_one_dev(root, leaf, dev_item);
 			if (ret)
 				goto error;
+			total_dev++;
 		} else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
 			struct btrfs_chunk *chunk;
 			chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
@@ -6706,6 +6763,28 @@ int btrfs_read_chunk_tree(struct btrfs_root *root)
 		}
 		path->slots[0]++;
 	}
+
+	/*
+	 * After loading chunk tree, we've got all device information,
+	 * do another round of validation checks.
+	 */
+	if (total_dev != root->fs_info->fs_devices->total_devices) {
+		btrfs_err(root->fs_info,
+	   "super_num_devices %llu mismatch with num_devices %llu found here",
+			  btrfs_super_num_devices(root->fs_info->super_copy),
+			  total_dev);
+		ret = -EINVAL;
+		goto error;
+	}
+	if (btrfs_super_total_bytes(root->fs_info->super_copy) <
+	    root->fs_info->fs_devices->total_rw_bytes) {
+		btrfs_err(root->fs_info,
+	"super_total_bytes %llu mismatch with fs_devices total_rw_bytes %llu",
+			  btrfs_super_total_bytes(root->fs_info->super_copy),
+			  root->fs_info->fs_devices->total_rw_bytes);
+		ret = -EINVAL;
+		goto error;
+	}
 	ret = 0;
 error:
 	unlock_chunks(root);
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 6e72c98162d5..1780218a48f0 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -95,10 +95,8 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
 	}
 
 	dentry = d_obtain_alias(inode);
-	if (IS_ERR(dentry)) {
-		iput(inode);
+	if (IS_ERR(dentry))
 		return dentry;
-	}
 	err = ceph_init_dentry(dentry);
 	if (err < 0) {
 		dput(dentry);
@@ -167,10 +165,8 @@ static struct dentry *__get_parent(struct super_block *sb,
 		return ERR_PTR(-ENOENT);
 
 	dentry = d_obtain_alias(inode);
-	if (IS_ERR(dentry)) {
-		iput(inode);
+	if (IS_ERR(dentry))
 		return dentry;
-	}
 	err = ceph_init_dentry(dentry);
 	if (err < 0) {
 		dput(dentry);
@@ -210,7 +206,7 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb,
 
 	dout("fh_to_parent %llx\n", cfh->parent_ino);
 	dentry = __get_parent(sb, NULL, cfh->ino);
-	if (IS_ERR(dentry) && PTR_ERR(dentry) == -ENOENT)
+	if (unlikely(dentry == ERR_PTR(-ENOENT)))
 		dentry = __fh_to_dentry(sb, cfh->parent_ino);
 	return dentry;
 }
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index 5a53ac6b1e02..02b071bf3732 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -101,6 +101,12 @@ convert_sfm_char(const __u16 src_char, char *target)
 	case SFM_SLASH:
 		*target = '\\';
 		break;
+	case SFM_SPACE:
+		*target = ' ';
+		break;
+	case SFM_PERIOD:
+		*target = '.';
+		break;
 	default:
 		return false;
 	}
@@ -404,7 +410,7 @@ static __le16 convert_to_sfu_char(char src_char)
 	return dest_char;
 }
 
-static __le16 convert_to_sfm_char(char src_char)
+static __le16 convert_to_sfm_char(char src_char, bool end_of_string)
 {
 	__le16 dest_char;
 
@@ -427,6 +433,18 @@ static __le16 convert_to_sfm_char(char src_char)
 	case '|':
 		dest_char = cpu_to_le16(SFM_PIPE);
 		break;
+	case '.':
+		if (end_of_string)
+			dest_char = cpu_to_le16(SFM_PERIOD);
+		else
+			dest_char = 0;
+		break;
+	case ' ':
+		if (end_of_string)
+			dest_char = cpu_to_le16(SFM_SPACE);
+		else
+			dest_char = 0;
+		break;
 	default:
 		dest_char = 0;
 	}
@@ -469,9 +487,16 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
 		/* see if we must remap this char */
 		if (map_chars == SFU_MAP_UNI_RSVD)
 			dst_char = convert_to_sfu_char(src_char);
-		else if (map_chars == SFM_MAP_UNI_RSVD)
-			dst_char = convert_to_sfm_char(src_char);
-		else
+		else if (map_chars == SFM_MAP_UNI_RSVD) {
+			bool end_of_string;
+
+			if (i == srclen - 1)
+				end_of_string = true;
+			else
+				end_of_string = false;
+
+			dst_char = convert_to_sfm_char(src_char, end_of_string);
+		} else
 			dst_char = 0;
 		/*
 		 * FIXME: We can not handle remapping backslash (UNI_SLASH)
diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h
index bdc52cb9a676..479bc0a941f3 100644
--- a/fs/cifs/cifs_unicode.h
+++ b/fs/cifs/cifs_unicode.h
@@ -64,6 +64,8 @@
 #define SFM_LESSTHAN    ((__u16) 0xF023)
 #define SFM_PIPE        ((__u16) 0xF027)
 #define SFM_SLASH       ((__u16) 0xF026)
+#define SFM_PERIOD	((__u16) 0xF028)
+#define SFM_SPACE	((__u16) 0xF029)
 
 /*
  * Mapping mechanism to use when one of the seven reserved characters is
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 5d8b7edf8a8f..5d841f39c4b7 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -87,6 +87,7 @@ extern mempool_t *cifs_req_poolp;
 extern mempool_t *cifs_mid_poolp;
 
 struct workqueue_struct	*cifsiod_wq;
+__u32 cifs_lock_secret;
 
 /*
  * Bumps refcount for cifs super block.
@@ -1266,6 +1267,8 @@ init_cifs(void)
 	spin_lock_init(&cifs_file_list_lock);
 	spin_lock_init(&GlobalMid_Lock);
 
+	get_random_bytes(&cifs_lock_secret, sizeof(cifs_lock_secret));
+
 	if (cifs_max_pending < 2) {
 		cifs_max_pending = 2;
 		cifs_dbg(FYI, "cifs_max_pending set to min of 2\n");
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index bba106cdc43c..8f1d8c1e72be 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1619,6 +1619,7 @@ void cifs_oplock_break(struct work_struct *work);
 
 extern const struct slow_work_ops cifs_oplock_break_ops;
 extern struct workqueue_struct *cifsiod_wq;
+extern __u32 cifs_lock_secret;
 
 extern mempool_t *cifs_mid_poolp;
 
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 66736f57b5ab..7d2b15c06090 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -428,7 +428,9 @@ cifs_echo_request(struct work_struct *work)
 	 * server->ops->need_neg() == true. Also, no need to ping if
 	 * we got a response recently.
 	 */
-	if (!server->ops->need_neg || server->ops->need_neg(server) ||
+
+	if (server->tcpStatus == CifsNeedReconnect ||
+	    server->tcpStatus == CifsExiting || server->tcpStatus == CifsNew ||
 	    (server->ops->can_echo && !server->ops->can_echo(server)) ||
 	    time_before(jiffies, server->lstrp + echo_interval - HZ))
 		goto requeue_echo;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 9793ae0bcaa2..d4890b6dc22d 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1112,6 +1112,12 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
 	return rc;
 }
 
+static __u32
+hash_lockowner(fl_owner_t owner)
+{
+	return cifs_lock_secret ^ hash32_ptr((const void *)owner);
+}
+
 struct lock_to_push {
 	struct list_head llist;
 	__u64 offset;
@@ -1178,7 +1184,7 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
 		else
 			type = CIFS_WRLCK;
 		lck = list_entry(el, struct lock_to_push, llist);
-		lck->pid = flock->fl_pid;
+		lck->pid = hash_lockowner(flock->fl_owner);
 		lck->netfid = cfile->fid.netfid;
 		lck->length = length;
 		lck->type = type;
@@ -1305,7 +1311,8 @@ cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
 			posix_lock_type = CIFS_RDLCK;
 		else
 			posix_lock_type = CIFS_WRLCK;
-		rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
+		rc = CIFSSMBPosixLock(xid, tcon, netfid,
+				      hash_lockowner(flock->fl_owner),
 				      flock->fl_start, length, flock,
 				      posix_lock_type, wait_flag);
 		return rc;
@@ -1505,7 +1512,8 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
 			posix_lock_type = CIFS_UNLCK;
 
 		rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
-				      current->tgid, flock->fl_start, length,
+				      hash_lockowner(flock->fl_owner),
+				      flock->fl_start, length,
 				      NULL, posix_lock_type, wait_flag);
 		goto out;
 	}
diff --git a/fs/cifs/ntlmssp.h b/fs/cifs/ntlmssp.h
index 848249fa120f..3079b38f0afb 100644
--- a/fs/cifs/ntlmssp.h
+++ b/fs/cifs/ntlmssp.h
@@ -133,6 +133,6 @@ typedef struct _AUTHENTICATE_MESSAGE {
 
 int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, struct cifs_ses *ses);
 void build_ntlmssp_negotiate_blob(unsigned char *pbuffer, struct cifs_ses *ses);
-int build_ntlmssp_auth_blob(unsigned char *pbuffer, u16 *buflen,
+int build_ntlmssp_auth_blob(unsigned char **pbuffer, u16 *buflen,
 			struct cifs_ses *ses,
 			const struct nls_table *nls_cp);
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index af0ec2d5ad0e..538d9b55699a 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -364,19 +364,43 @@ void build_ntlmssp_negotiate_blob(unsigned char *pbuffer,
 	sec_blob->DomainName.MaximumLength = 0;
 }
 
-/* We do not malloc the blob, it is passed in pbuffer, because its
-   maximum possible size is fixed and small, making this approach cleaner.
-   This function returns the length of the data in the blob */
-int build_ntlmssp_auth_blob(unsigned char *pbuffer,
+static int size_of_ntlmssp_blob(struct cifs_ses *ses)
+{
+	int sz = sizeof(AUTHENTICATE_MESSAGE) + ses->auth_key.len
+		- CIFS_SESS_KEY_SIZE + CIFS_CPHTXT_SIZE + 2;
+
+	if (ses->domainName)
+		sz += 2 * strnlen(ses->domainName, CIFS_MAX_DOMAINNAME_LEN);
+	else
+		sz += 2;
+
+	if (ses->user_name)
+		sz += 2 * strnlen(ses->user_name, CIFS_MAX_USERNAME_LEN);
+	else
+		sz += 2;
+
+	return sz;
+}
+
+int build_ntlmssp_auth_blob(unsigned char **pbuffer,
 					u16 *buflen,
 				   struct cifs_ses *ses,
 				   const struct nls_table *nls_cp)
 {
 	int rc;
-	AUTHENTICATE_MESSAGE *sec_blob = (AUTHENTICATE_MESSAGE *)pbuffer;
+	AUTHENTICATE_MESSAGE *sec_blob;
 	__u32 flags;
 	unsigned char *tmp;
 
+	rc = setup_ntlmv2_rsp(ses, nls_cp);
+	if (rc) {
+		cifs_dbg(VFS, "Error %d during NTLMSSP authentication\n", rc);
+		*buflen = 0;
+		goto setup_ntlmv2_ret;
+	}
+	*pbuffer = kmalloc(size_of_ntlmssp_blob(ses), GFP_KERNEL);
+	sec_blob = (AUTHENTICATE_MESSAGE *)*pbuffer;
+
 	memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8);
 	sec_blob->MessageType = NtLmAuthenticate;
 
@@ -391,7 +415,7 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
 			flags |= NTLMSSP_NEGOTIATE_KEY_XCH;
 	}
 
-	tmp = pbuffer + sizeof(AUTHENTICATE_MESSAGE);
+	tmp = *pbuffer + sizeof(AUTHENTICATE_MESSAGE);
 	sec_blob->NegotiateFlags = cpu_to_le32(flags);
 
 	sec_blob->LmChallengeResponse.BufferOffset =
@@ -399,13 +423,9 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
 	sec_blob->LmChallengeResponse.Length = 0;
 	sec_blob->LmChallengeResponse.MaximumLength = 0;
 
-	sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer);
+	sec_blob->NtChallengeResponse.BufferOffset =
+				cpu_to_le32(tmp - *pbuffer);
 	if (ses->user_name != NULL) {
-		rc = setup_ntlmv2_rsp(ses, nls_cp);
-		if (rc) {
-			cifs_dbg(VFS, "Error %d during NTLMSSP authentication\n", rc);
-			goto setup_ntlmv2_ret;
-		}
 		memcpy(tmp, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
 				ses->auth_key.len - CIFS_SESS_KEY_SIZE);
 		tmp += ses->auth_key.len - CIFS_SESS_KEY_SIZE;
@@ -423,23 +443,23 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
 	}
 
 	if (ses->domainName == NULL) {
-		sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer);
+		sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
 		sec_blob->DomainName.Length = 0;
 		sec_blob->DomainName.MaximumLength = 0;
 		tmp += 2;
 	} else {
 		int len;
 		len = cifs_strtoUTF16((__le16 *)tmp, ses->domainName,
-				      CIFS_MAX_USERNAME_LEN, nls_cp);
+				      CIFS_MAX_DOMAINNAME_LEN, nls_cp);
 		len *= 2; /* unicode is 2 bytes each */
-		sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer);
+		sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
 		sec_blob->DomainName.Length = cpu_to_le16(len);
 		sec_blob->DomainName.MaximumLength = cpu_to_le16(len);
 		tmp += len;
 	}
 
 	if (ses->user_name == NULL) {
-		sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer);
+		sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
 		sec_blob->UserName.Length = 0;
 		sec_blob->UserName.MaximumLength = 0;
 		tmp += 2;
@@ -448,13 +468,13 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
 		len = cifs_strtoUTF16((__le16 *)tmp, ses->user_name,
 				      CIFS_MAX_USERNAME_LEN, nls_cp);
 		len *= 2; /* unicode is 2 bytes each */
-		sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer);
+		sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
 		sec_blob->UserName.Length = cpu_to_le16(len);
 		sec_blob->UserName.MaximumLength = cpu_to_le16(len);
 		tmp += len;
 	}
 
-	sec_blob->WorkstationName.BufferOffset = cpu_to_le32(tmp - pbuffer);
+	sec_blob->WorkstationName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
 	sec_blob->WorkstationName.Length = 0;
 	sec_blob->WorkstationName.MaximumLength = 0;
 	tmp += 2;
@@ -463,19 +483,19 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
 		(ses->ntlmssp->server_flags & NTLMSSP_NEGOTIATE_EXTENDED_SEC))
 			&& !calc_seckey(ses)) {
 		memcpy(tmp, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE);
-		sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
+		sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - *pbuffer);
 		sec_blob->SessionKey.Length = cpu_to_le16(CIFS_CPHTXT_SIZE);
 		sec_blob->SessionKey.MaximumLength =
 				cpu_to_le16(CIFS_CPHTXT_SIZE);
 		tmp += CIFS_CPHTXT_SIZE;
 	} else {
-		sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
+		sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - *pbuffer);
 		sec_blob->SessionKey.Length = 0;
 		sec_blob->SessionKey.MaximumLength = 0;
 	}
 
+	*buflen = tmp - *pbuffer;
 setup_ntlmv2_ret:
-	*buflen = tmp - pbuffer;
 	return rc;
 }
 
@@ -690,6 +710,8 @@ sess_auth_lanman(struct sess_data *sess_data)
 		rc = calc_lanman_hash(ses->password, ses->server->cryptkey,
 				      ses->server->sec_mode & SECMODE_PW_ENCRYPT ?
 				      true : false, lnm_session_key);
+		if (rc)
+			goto out;
 
 		memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_AUTH_RESP_SIZE);
 		bcc_ptr += CIFS_AUTH_RESP_SIZE;
@@ -1266,7 +1288,7 @@ sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data)
 	struct cifs_ses *ses = sess_data->ses;
 	__u16 bytes_remaining;
 	char *bcc_ptr;
-	char *ntlmsspblob = NULL;
+	unsigned char *ntlmsspblob = NULL;
 	u16 blob_len;
 
 	cifs_dbg(FYI, "rawntlmssp session setup authenticate phase\n");
@@ -1279,19 +1301,7 @@ sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data)
 	/* Build security blob before we assemble the request */
 	pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
 	smb_buf = (struct smb_hdr *)pSMB;
-	/*
-	 * 5 is an empirical value, large enough to hold
-	 * authenticate message plus max 10 of av paris,
-	 * domain, user, workstation names, flags, etc.
-	 */
-	ntlmsspblob = kzalloc(5*sizeof(struct _AUTHENTICATE_MESSAGE),
-				GFP_KERNEL);
-	if (!ntlmsspblob) {
-		rc = -ENOMEM;
-		goto out;
-	}
-
-	rc = build_ntlmssp_auth_blob(ntlmsspblob,
+	rc = build_ntlmssp_auth_blob(&ntlmsspblob,
 					&blob_len, ses, sess_data->nls_cp);
 	if (rc)
 		goto out_free_ntlmsspblob;
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 8f38e33d365b..29e06db5f187 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -588,7 +588,7 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
 	u16 blob_length = 0;
 	struct key *spnego_key = NULL;
 	char *security_blob = NULL;
-	char *ntlmssp_blob = NULL;
+	unsigned char *ntlmssp_blob = NULL;
 	bool use_spnego = false; /* else use raw ntlmssp */
 
 	cifs_dbg(FYI, "Session Setup\n");
@@ -713,13 +713,7 @@ ssetup_ntlmssp_authenticate:
 		iov[1].iov_len = blob_length;
 	} else if (phase == NtLmAuthenticate) {
 		req->hdr.SessionId = ses->Suid;
-		ntlmssp_blob = kzalloc(sizeof(struct _NEGOTIATE_MESSAGE) + 500,
-				       GFP_KERNEL);
-		if (ntlmssp_blob == NULL) {
-			rc = -ENOMEM;
-			goto ssetup_exit;
-		}
-		rc = build_ntlmssp_auth_blob(ntlmssp_blob, &blob_length, ses,
+		rc = build_ntlmssp_auth_blob(&ntlmssp_blob, &blob_length, ses,
 					     nls_cp);
 		if (rc) {
 			cifs_dbg(FYI, "build_ntlmssp_auth_blob failed %d\n",
@@ -1818,6 +1812,33 @@ SMB2_echo(struct TCP_Server_Info *server)
 
 	cifs_dbg(FYI, "In echo request\n");
 
+	if (server->tcpStatus == CifsNeedNegotiate) {
+		struct list_head *tmp, *tmp2;
+		struct cifs_ses *ses;
+		struct cifs_tcon *tcon;
+
+		cifs_dbg(FYI, "Need negotiate, reconnecting tcons\n");
+		spin_lock(&cifs_tcp_ses_lock);
+		list_for_each(tmp, &server->smb_ses_list) {
+			ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
+			list_for_each(tmp2, &ses->tcon_list) {
+				tcon = list_entry(tmp2, struct cifs_tcon,
+						  tcon_list);
+				/* add check for persistent handle reconnect */
+				if (tcon && tcon->need_reconnect) {
+					spin_unlock(&cifs_tcp_ses_lock);
+					rc = smb2_reconnect(SMB2_ECHO, tcon);
+					spin_lock(&cifs_tcp_ses_lock);
+				}
+			}
+		}
+		spin_unlock(&cifs_tcp_ses_lock);
+	}
+
+	/* if no session, renegotiate failed above */
+	if (server->tcpStatus == CifsNeedNegotiate)
+		return -EIO;
+
 	rc = small_smb2_init(SMB2_ECHO, NULL, (void **)&req);
 	if (rc)
 		return rc;
diff --git a/fs/dax.c b/fs/dax.c
index 761495bf5eb9..e207f8f9b700 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -208,7 +208,12 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
 				dax.addr += first;
 				size = map_len - first;
 			}
-			max = min(pos + size, end);
+			/*
+			 * pos + size is one past the last offset for IO,
+			 * so pos + size can overflow loff_t at extreme offsets.
+			 * Cast to u64 to catch this and get the true minimum.
+			 */
+			max = min_t(u64, pos + size, end);
 		}
 
 		if (iov_iter_rw(iter) == WRITE) {
diff --git a/fs/dcache.c b/fs/dcache.c
index 817c243c1ff1..d6847d7b123d 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -507,6 +507,44 @@ void d_drop(struct dentry *dentry)
 }
 EXPORT_SYMBOL(d_drop);
 
+static inline void dentry_unlist(struct dentry *dentry, struct dentry *parent)
+{
+	struct dentry *next;
+	/*
+	 * Inform d_walk() and shrink_dentry_list() that we are no longer
+	 * attached to the dentry tree
+	 */
+	dentry->d_flags |= DCACHE_DENTRY_KILLED;
+	if (unlikely(list_empty(&dentry->d_child)))
+		return;
+	__list_del_entry(&dentry->d_child);
+	/*
+	 * Cursors can move around the list of children.  While we'd been
+	 * a normal list member, it didn't matter - ->d_child.next would've
+	 * been updated.  However, from now on it won't be and for the
+	 * things like d_walk() it might end up with a nasty surprise.
+	 * Normally d_walk() doesn't care about cursors moving around -
+	 * ->d_lock on parent prevents that and since a cursor has no children
+	 * of its own, we get through it without ever unlocking the parent.
+	 * There is one exception, though - if we ascend from a child that
+	 * gets killed as soon as we unlock it, the next sibling is found
+	 * using the value left in its ->d_child.next.  And if _that_
+	 * pointed to a cursor, and cursor got moved (e.g. by lseek())
+	 * before d_walk() regains parent->d_lock, we'll end up skipping
+	 * everything the cursor had been moved past.
+	 *
+	 * Solution: make sure that the pointer left behind in ->d_child.next
+	 * points to something that won't be moving around.  I.e. skip the
+	 * cursors.
+	 */
+	while (dentry->d_child.next != &parent->d_subdirs) {
+		next = list_entry(dentry->d_child.next, struct dentry, d_child);
+		if (likely(!(next->d_flags & DCACHE_DENTRY_CURSOR)))
+			break;
+		dentry->d_child.next = next->d_child.next;
+	}
+}
+
 static void __dentry_kill(struct dentry *dentry)
 {
 	struct dentry *parent = NULL;
@@ -532,12 +570,7 @@ static void __dentry_kill(struct dentry *dentry)
 	}
 	/* if it was on the hash then remove it */
 	__d_drop(dentry);
-	__list_del_entry(&dentry->d_child);
-	/*
-	 * Inform d_walk() that we are no longer attached to the
-	 * dentry tree
-	 */
-	dentry->d_flags |= DCACHE_DENTRY_KILLED;
+	dentry_unlist(dentry, parent);
 	if (parent)
 		spin_unlock(&parent->d_lock);
 	dentry_iput(dentry);
@@ -1203,6 +1236,9 @@ resume:
 		struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
 		next = tmp->next;
 
+		if (unlikely(dentry->d_flags & DCACHE_DENTRY_CURSOR))
+			continue;
+
 		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
 
 		ret = enter(data, dentry);
@@ -1651,6 +1687,16 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
 }
 EXPORT_SYMBOL(d_alloc);
 
+struct dentry *d_alloc_cursor(struct dentry * parent)
+{
+	struct dentry *dentry = __d_alloc(parent->d_sb, NULL);
+	if (dentry) {
+		dentry->d_flags |= DCACHE_RCUACCESS | DCACHE_DENTRY_CURSOR;
+		dentry->d_parent = dget(parent);
+	}
+	return dentry;
+}
+
 /**
  * d_alloc_pseudo - allocate a dentry (for lookup-less filesystems)
  * @sb: the superblock
@@ -2457,7 +2503,6 @@ retry:
 		rcu_read_unlock();
 		goto retry;
 	}
-	rcu_read_unlock();
 	/*
 	 * No changes for the parent since the beginning of d_lookup().
 	 * Since all removals from the chain happen with hlist_bl_lock(),
@@ -2470,8 +2515,6 @@ retry:
 			continue;
 		if (dentry->d_parent != parent)
 			continue;
-		if (d_unhashed(dentry))
-			continue;
 		if (parent->d_flags & DCACHE_OP_COMPARE) {
 			int tlen = dentry->d_name.len;
 			const char *tname = dentry->d_name.name;
@@ -2483,9 +2526,18 @@ retry:
 			if (dentry_cmp(dentry, str, len))
 				continue;
 		}
-		dget(dentry);
 		hlist_bl_unlock(b);
-		/* somebody is doing lookup for it right now; wait for it */
+		/* now we can try to grab a reference */
+		if (!lockref_get_not_dead(&dentry->d_lockref)) {
+			rcu_read_unlock();
+			goto retry;
+		}
+
+		rcu_read_unlock();
+		/*
+		 * somebody is likely to be still doing lookup for it;
+		 * wait for them to finish
+		 */
 		spin_lock(&dentry->d_lock);
 		d_wait_lookup(dentry);
 		/*
@@ -2516,6 +2568,7 @@ retry:
 		dput(new);
 		return dentry;
 	}
+	rcu_read_unlock();
 	/* we can't take ->d_lock here; it's OK, though. */
 	new->d_flags |= DCACHE_PAR_LOOKUP;
 	new->d_wait = wq;
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 9c1c9a01b7e5..592059f88e04 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -127,7 +127,6 @@ static int open_proxy_open(struct inode *inode, struct file *filp)
 		r = real_fops->open(inode, filp);
 
 out:
-	fops_put(real_fops);
 	debugfs_use_file_finish(srcu_idx);
 	return r;
 }
@@ -262,8 +261,10 @@ static int full_proxy_open(struct inode *inode, struct file *filp)
 
 	if (real_fops->open) {
 		r = real_fops->open(inode, filp);
-
-		if (filp->f_op != proxy_fops) {
+		if (r) {
+			replace_fops(filp, d_inode(dentry)->i_fop);
+			goto free_proxy;
+		} else if (filp->f_op != proxy_fops) {
 			/* No protection against file removal anymore. */
 			WARN(1, "debugfs file owner replaced proxy fops: %pd",
 				dentry);
diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c
index 866bb18efefe..e818f5ac7a26 100644
--- a/fs/ecryptfs/kthread.c
+++ b/fs/ecryptfs/kthread.c
@@ -25,6 +25,7 @@
 #include <linux/slab.h>
 #include <linux/wait.h>
 #include <linux/mount.h>
+#include <linux/file.h>
 #include "ecryptfs_kernel.h"
 
 struct ecryptfs_open_req {
@@ -147,7 +148,7 @@ int ecryptfs_privileged_open(struct file **lower_file,
 	flags |= IS_RDONLY(d_inode(lower_dentry)) ? O_RDONLY : O_RDWR;
 	(*lower_file) = dentry_open(&req.path, flags, cred);
 	if (!IS_ERR(*lower_file))
-		goto out;
+		goto have_file;
 	if ((flags & O_ACCMODE) == O_RDONLY) {
 		rc = PTR_ERR((*lower_file));
 		goto out;
@@ -165,8 +166,16 @@ int ecryptfs_privileged_open(struct file **lower_file,
 	mutex_unlock(&ecryptfs_kthread_ctl.mux);
 	wake_up(&ecryptfs_kthread_ctl.wait);
 	wait_for_completion(&req.done);
-	if (IS_ERR(*lower_file))
+	if (IS_ERR(*lower_file)) {
 		rc = PTR_ERR(*lower_file);
+		goto out;
+	}
+have_file:
+	if ((*lower_file)->f_op->mmap == NULL) {
+		fput(*lower_file);
+		*lower_file = NULL;
+		rc = -EMEDIUMTYPE;
+	}
 out:
 	return rc;
 }
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index ccd4971cc6c1..264f07c7754e 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -341,8 +341,10 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
 	struct dentry *newent;
 	bool outarg_valid = true;
 
+	fuse_lock_inode(dir);
 	err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
 			       &outarg, &inode);
+	fuse_unlock_inode(dir);
 	if (err == -ENOENT) {
 		outarg_valid = false;
 		err = 0;
@@ -1341,7 +1343,9 @@ static int fuse_readdir(struct file *file, struct dir_context *ctx)
 		fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
 			       FUSE_READDIR);
 	}
+	fuse_lock_inode(inode);
 	fuse_request_send(fc, req);
+	fuse_unlock_inode(inode);
 	nbytes = req->out.args[0].size;
 	err = req->out.h.error;
 	fuse_put_request(fc, req);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index eddbe02c4028..929c383432b0 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -110,6 +110,9 @@ struct fuse_inode {
 
 	/** Miscellaneous bits describing inode state */
 	unsigned long state;
+
+	/** Lock for serializing lookup and readdir for back compatibility*/
+	struct mutex mutex;
 };
 
 /** FUSE inode state bits */
@@ -540,6 +543,9 @@ struct fuse_conn {
 	/** write-back cache policy (default is write-through) */
 	unsigned writeback_cache:1;
 
+	/** allow parallel lookups and readdir (default is serialized) */
+	unsigned parallel_dirops:1;
+
 	/*
 	 * The following bitfields are only for optimization purposes
 	 * and hence races in setting them will not cause malfunction
@@ -956,4 +962,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
 
 void fuse_set_initialized(struct fuse_conn *fc);
 
+void fuse_unlock_inode(struct inode *inode);
+void fuse_lock_inode(struct inode *inode);
+
 #endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 1ce67668a8e1..9961d8432ce3 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -97,6 +97,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
 	INIT_LIST_HEAD(&fi->queued_writes);
 	INIT_LIST_HEAD(&fi->writepages);
 	init_waitqueue_head(&fi->page_waitq);
+	mutex_init(&fi->mutex);
 	fi->forget = fuse_alloc_forget();
 	if (!fi->forget) {
 		kmem_cache_free(fuse_inode_cachep, inode);
@@ -117,6 +118,7 @@ static void fuse_destroy_inode(struct inode *inode)
 	struct fuse_inode *fi = get_fuse_inode(inode);
 	BUG_ON(!list_empty(&fi->write_files));
 	BUG_ON(!list_empty(&fi->queued_writes));
+	mutex_destroy(&fi->mutex);
 	kfree(fi->forget);
 	call_rcu(&inode->i_rcu, fuse_i_callback);
 }
@@ -351,6 +353,18 @@ int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
 	return 0;
 }
 
+void fuse_lock_inode(struct inode *inode)
+{
+	if (!get_fuse_conn(inode)->parallel_dirops)
+		mutex_lock(&get_fuse_inode(inode)->mutex);
+}
+
+void fuse_unlock_inode(struct inode *inode)
+{
+	if (!get_fuse_conn(inode)->parallel_dirops)
+		mutex_unlock(&get_fuse_inode(inode)->mutex);
+}
+
 static void fuse_umount_begin(struct super_block *sb)
 {
 	fuse_abort_conn(get_fuse_conn_super(sb));
@@ -898,6 +912,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
 				fc->async_dio = 1;
 			if (arg->flags & FUSE_WRITEBACK_CACHE)
 				fc->writeback_cache = 1;
+			if (arg->flags & FUSE_PARALLEL_DIROPS)
+				fc->parallel_dirops = 1;
 			if (arg->time_gran && arg->time_gran <= 1000000000)
 				fc->sb->s_time_gran = arg->time_gran;
 		} else {
@@ -928,7 +944,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
 		FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
 		FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
 		FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
-		FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT;
+		FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT |
+		FUSE_PARALLEL_DIROPS;
 	req->in.h.opcode = FUSE_INIT;
 	req->in.numargs = 1;
 	req->in.args[0].size = sizeof(*arg);
diff --git a/fs/internal.h b/fs/internal.h
index b71deeecea17..f57ced528cde 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -130,6 +130,7 @@ extern int invalidate_inodes(struct super_block *, bool);
 extern struct dentry *__d_alloc(struct super_block *, const struct qstr *);
 extern int d_set_mounted(struct dentry *dentry);
 extern long prune_dcache_sb(struct super_block *sb, struct shrink_control *sc);
+extern struct dentry *d_alloc_cursor(struct dentry *);
 
 /*
  * read_write.c
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index b31852f76f46..e3ca4b4cac84 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -2329,18 +2329,10 @@ void *jbd2_alloc(size_t size, gfp_t flags)
 
 	BUG_ON(size & (size-1)); /* Must be a power of 2 */
 
-	flags |= __GFP_REPEAT;
-	if (size == PAGE_SIZE)
-		ptr = (void *)__get_free_pages(flags, 0);
-	else if (size > PAGE_SIZE) {
-		int order = get_order(size);
-
-		if (order < 3)
-			ptr = (void *)__get_free_pages(flags, order);
-		else
-			ptr = vmalloc(size);
-	} else
+	if (size < PAGE_SIZE)
 		ptr = kmem_cache_alloc(get_slab(size), flags);
+	else
+		ptr = (void *)__get_free_pages(flags, get_order(size));
 
 	/* Check alignment; SLUB has gotten this wrong in the past,
 	 * and this can lead to user data corruption! */
@@ -2351,20 +2343,10 @@ void *jbd2_alloc(size_t size, gfp_t flags)
 
 void jbd2_free(void *ptr, size_t size)
 {
-	if (size == PAGE_SIZE) {
-		free_pages((unsigned long)ptr, 0);
-		return;
-	}
-	if (size > PAGE_SIZE) {
-		int order = get_order(size);
-
-		if (order < 3)
-			free_pages((unsigned long)ptr, order);
-		else
-			vfree(ptr);
-		return;
-	}
-	kmem_cache_free(get_slab(size), ptr);
+	if (size < PAGE_SIZE)
+		kmem_cache_free(get_slab(size), ptr);
+	else
+		free_pages((unsigned long)ptr, get_order(size));
 };
 
 /*
diff --git a/fs/libfs.c b/fs/libfs.c
index 3db2721144c2..74dc8b9e7f53 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -71,9 +71,7 @@ EXPORT_SYMBOL(simple_lookup);
 
 int dcache_dir_open(struct inode *inode, struct file *file)
 {
-	static struct qstr cursor_name = QSTR_INIT(".", 1);
-
-	file->private_data = d_alloc(file->f_path.dentry, &cursor_name);
+	file->private_data = d_alloc_cursor(file->f_path.dentry);
 
 	return file->private_data ? 0 : -ENOMEM;
 }
@@ -86,6 +84,61 @@ int dcache_dir_close(struct inode *inode, struct file *file)
 }
 EXPORT_SYMBOL(dcache_dir_close);
 
+/* parent is locked at least shared */
+static struct dentry *next_positive(struct dentry *parent,
+				    struct list_head *from,
+				    int count)
+{
+	unsigned *seq = &parent->d_inode->i_dir_seq, n;
+	struct dentry *res;
+	struct list_head *p;
+	bool skipped;
+	int i;
+
+retry:
+	i = count;
+	skipped = false;
+	n = smp_load_acquire(seq) & ~1;
+	res = NULL;
+	rcu_read_lock();
+	for (p = from->next; p != &parent->d_subdirs; p = p->next) {
+		struct dentry *d = list_entry(p, struct dentry, d_child);
+		if (!simple_positive(d)) {
+			skipped = true;
+		} else if (!--i) {
+			res = d;
+			break;
+		}
+	}
+	rcu_read_unlock();
+	if (skipped) {
+		smp_rmb();
+		if (unlikely(*seq != n))
+			goto retry;
+	}
+	return res;
+}
+
+static void move_cursor(struct dentry *cursor, struct list_head *after)
+{
+	struct dentry *parent = cursor->d_parent;
+	unsigned n, *seq = &parent->d_inode->i_dir_seq;
+	spin_lock(&parent->d_lock);
+	for (;;) {
+		n = *seq;
+		if (!(n & 1) && cmpxchg(seq, n, n + 1) == n)
+			break;
+		cpu_relax();
+	}
+	__list_del(cursor->d_child.prev, cursor->d_child.next);
+	if (after)
+		list_add(&cursor->d_child, after);
+	else
+		list_add_tail(&cursor->d_child, &parent->d_subdirs);
+	smp_store_release(seq, n + 2);
+	spin_unlock(&parent->d_lock);
+}
+
 loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
 {
 	struct dentry *dentry = file->f_path.dentry;
@@ -101,25 +154,14 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
 	if (offset != file->f_pos) {
 		file->f_pos = offset;
 		if (file->f_pos >= 2) {
-			struct list_head *p;
 			struct dentry *cursor = file->private_data;
+			struct dentry *to;
 			loff_t n = file->f_pos - 2;
 
-			spin_lock(&dentry->d_lock);
-			/* d_lock not required for cursor */
-			list_del(&cursor->d_child);
-			p = dentry->d_subdirs.next;
-			while (n && p != &dentry->d_subdirs) {
-				struct dentry *next;
-				next = list_entry(p, struct dentry, d_child);
-				spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
-				if (simple_positive(next))
-					n--;
-				spin_unlock(&next->d_lock);
-				p = p->next;
-			}
-			list_add_tail(&cursor->d_child, p);
-			spin_unlock(&dentry->d_lock);
+			inode_lock_shared(dentry->d_inode);
+			to = next_positive(dentry, &dentry->d_subdirs, n);
+			move_cursor(cursor, to ? &to->d_child : NULL);
+			inode_unlock_shared(dentry->d_inode);
 		}
 	}
 	return offset;
@@ -142,36 +184,25 @@ int dcache_readdir(struct file *file, struct dir_context *ctx)
 {
 	struct dentry *dentry = file->f_path.dentry;
 	struct dentry *cursor = file->private_data;
-	struct list_head *p, *q = &cursor->d_child;
+	struct list_head *p = &cursor->d_child;
+	struct dentry *next;
+	bool moved = false;
 
 	if (!dir_emit_dots(file, ctx))
 		return 0;
-	spin_lock(&dentry->d_lock);
-	if (ctx->pos == 2)
-		list_move(q, &dentry->d_subdirs);
 
-	for (p = q->next; p != &dentry->d_subdirs; p = p->next) {
-		struct dentry *next = list_entry(p, struct dentry, d_child);
-		spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
-		if (!simple_positive(next)) {
-			spin_unlock(&next->d_lock);
-			continue;
-		}
-
-		spin_unlock(&next->d_lock);
-		spin_unlock(&dentry->d_lock);
+	if (ctx->pos == 2)
+		p = &dentry->d_subdirs;
+	while ((next = next_positive(dentry, p, 1)) != NULL) {
 		if (!dir_emit(ctx, next->d_name.name, next->d_name.len,
 			      d_inode(next)->i_ino, dt_type(d_inode(next))))
-			return 0;
-		spin_lock(&dentry->d_lock);
-		spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
-		/* next is still alive */
-		list_move(q, p);
-		spin_unlock(&next->d_lock);
-		p = q;
+			break;
+		moved = true;
+		p = &next->d_child;
 		ctx->pos++;
 	}
-	spin_unlock(&dentry->d_lock);
+	if (moved)
+		move_cursor(cursor, p);
 	return 0;
 }
 EXPORT_SYMBOL(dcache_readdir);
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 154a107cd376..fc4084ef4736 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -335,12 +335,17 @@ static struct notifier_block lockd_inet6addr_notifier = {
 };
 #endif
 
-static void lockd_svc_exit_thread(void)
+static void lockd_unregister_notifiers(void)
 {
 	unregister_inetaddr_notifier(&lockd_inetaddr_notifier);
 #if IS_ENABLED(CONFIG_IPV6)
 	unregister_inet6addr_notifier(&lockd_inet6addr_notifier);
 #endif
+}
+
+static void lockd_svc_exit_thread(void)
+{
+	lockd_unregister_notifiers();
 	svc_exit_thread(nlmsvc_rqst);
 }
 
@@ -462,7 +467,7 @@ int lockd_up(struct net *net)
 	 * Note: svc_serv structures have an initial use count of 1,
 	 * so we exit through here on both success and failure.
 	 */
-err_net:
+err_put:
 	svc_destroy(serv);
 err_create:
 	mutex_unlock(&nlmsvc_mutex);
@@ -470,7 +475,9 @@ err_create:
 
 err_start:
 	lockd_down_net(serv, net);
-	goto err_net;
+err_net:
+	lockd_unregister_notifiers();
+	goto err_put;
 }
 EXPORT_SYMBOL_GPL(lockd_up);
 
diff --git a/fs/locks.c b/fs/locks.c
index 7c5f91be9b65..ee1b15f6fc13 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1628,7 +1628,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
 {
 	struct file_lock *fl, *my_fl = NULL, *lease;
 	struct dentry *dentry = filp->f_path.dentry;
-	struct inode *inode = dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct file_lock_context *ctx;
 	bool is_deleg = (*flp)->fl_flags & FL_DELEG;
 	int error;
diff --git a/fs/namespace.c b/fs/namespace.c
index a7ec92c051f5..419f746d851d 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1562,6 +1562,7 @@ void __detach_mounts(struct dentry *dentry)
 		goto out_unlock;
 
 	lock_mount_hash();
+	event++;
 	while (!hlist_empty(&mp->m_list)) {
 		mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
 		if (mnt->mnt.mnt_flags & MNT_UMOUNT) {
@@ -3247,6 +3248,10 @@ static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags)
 		if (mnt->mnt.mnt_sb->s_iflags & SB_I_NOEXEC)
 			mnt_flags &= ~(MNT_LOCK_NOSUID | MNT_LOCK_NOEXEC);
 
+		/* Don't miss readonly hidden in the superblock flags */
+		if (mnt->mnt.mnt_sb->s_flags & MS_RDONLY)
+			mnt_flags |= MNT_LOCK_READONLY;
+
 		/* Verify the mount flags are equal to or more permissive
 		 * than the proposed new mount.
 		 */
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index aaf7bd0cbae2..d8015a03db4c 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -424,12 +424,17 @@ static int xdr_decode(nfs_readdir_descriptor_t *desc,
 static
 int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry)
 {
+	struct inode *inode;
 	struct nfs_inode *nfsi;
 
 	if (d_really_is_negative(dentry))
 		return 0;
 
-	nfsi = NFS_I(d_inode(dentry));
+	inode = d_inode(dentry);
+	if (is_bad_inode(inode) || NFS_STALE(inode))
+		return 0;
+
+	nfsi = NFS_I(inode);
 	if (entry->fattr->fileid == nfsi->fileid)
 		return 1;
 	if (nfs_compare_fh(entry->fh, &nfsi->fh) == 0)
@@ -1363,7 +1368,6 @@ EXPORT_SYMBOL_GPL(nfs_dentry_operations);
 struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
 {
 	struct dentry *res;
-	struct dentry *parent;
 	struct inode *inode = NULL;
 	struct nfs_fh *fhandle = NULL;
 	struct nfs_fattr *fattr = NULL;
@@ -1393,7 +1397,6 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in
 	if (IS_ERR(label))
 		goto out;
 
-	parent = dentry->d_parent;
 	/* Protect against concurrent sillydeletes */
 	trace_nfs_lookup_enter(dir, dentry, flags);
 	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
@@ -1536,9 +1539,9 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
 		err = PTR_ERR(inode);
 		trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
 		put_nfs_open_context(ctx);
+		d_drop(dentry);
 		switch (err) {
 		case -ENOENT:
-			d_drop(dentry);
 			d_add(dentry, NULL);
 			nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
 			break;
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 979b3c4dee6a..c7326c2af2c3 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -353,10 +353,12 @@ static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq)
 
 	result = wait_for_completion_killable(&dreq->completion);
 
+	if (!result) {
+		result = dreq->count;
+		WARN_ON_ONCE(dreq->count < 0);
+	}
 	if (!result)
 		result = dreq->error;
-	if (!result)
-		result = dreq->count;
 
 out:
 	return (ssize_t) result;
@@ -386,8 +388,10 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write)
 
 	if (dreq->iocb) {
 		long res = (long) dreq->error;
-		if (!res)
+		if (dreq->count != 0) {
 			res = (long) dreq->count;
+			WARN_ON_ONCE(dreq->count < 0);
+		}
 		dreq->iocb->ki_complete(dreq->iocb, res, 0);
 	}
 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 52e7d6869e3b..dda689d7a8a7 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -282,6 +282,7 @@ nfs_init_locked(struct inode *inode, void *opaque)
 	struct nfs_fattr	*fattr = desc->fattr;
 
 	set_nfs_fileid(inode, fattr->fileid);
+	inode->i_mode = fattr->mode;
 	nfs_copy_fh(NFS_FH(inode), desc->fh);
 	return 0;
 }
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index de97567795a5..ff416d0e24bc 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2882,12 +2882,11 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
 			call_close |= is_wronly;
 		else if (is_wronly)
 			calldata->arg.fmode |= FMODE_WRITE;
+		if (calldata->arg.fmode != (FMODE_READ|FMODE_WRITE))
+			call_close |= is_rdwr;
 	} else if (is_rdwr)
 		calldata->arg.fmode |= FMODE_READ|FMODE_WRITE;
 
-	if (calldata->arg.fmode == 0)
-		call_close |= is_rdwr;
-
 	if (!nfs4_valid_open_stateid(state))
 		call_close = 0;
 	spin_unlock(&state->owner->so_lock);
@@ -7924,8 +7923,8 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
 			break;
 		}
 		lo = NFS_I(inode)->layout;
-		if (lo && nfs4_stateid_match(&lgp->args.stateid,
-					&lo->plh_stateid)) {
+		if (lo && !test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags) &&
+		    nfs4_stateid_match_other(&lgp->args.stateid, &lo->plh_stateid)) {
 			LIST_HEAD(head);
 
 			/*
@@ -7936,10 +7935,10 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
 			pnfs_mark_matching_lsegs_invalid(lo, &head, NULL, 0);
 			spin_unlock(&inode->i_lock);
 			pnfs_free_lseg_list(&head);
+			status = -EAGAIN;
+			goto out;
 		} else
 			spin_unlock(&inode->i_lock);
-		status = -EAGAIN;
-		goto out;
 	}
 
 	status = nfs4_handle_exception(server, status, exception);
@@ -8036,7 +8035,10 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags)
 		.flags = RPC_TASK_ASYNC,
 	};
 	struct pnfs_layout_segment *lseg = NULL;
-	struct nfs4_exception exception = { .timeout = *timeout };
+	struct nfs4_exception exception = {
+		.inode = inode,
+		.timeout = *timeout,
+	};
 	int status = 0;
 
 	dprintk("--> %s\n", __func__);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 9679f4749364..834b875900d6 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1488,9 +1488,9 @@ restart:
 					}
 					spin_unlock(&state->state_lock);
 				}
-				nfs4_put_open_state(state);
 				clear_bit(NFS_STATE_RECLAIM_NOGRACE,
 					&state->flags);
+				nfs4_put_open_state(state);
 				spin_lock(&sp->so_lock);
 				goto restart;
 			}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 0c7e0d45a4de..0fbe734cc38c 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -361,8 +361,10 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo,
 	list_del_init(&lseg->pls_list);
 	/* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */
 	atomic_dec(&lo->plh_refcount);
-	if (list_empty(&lo->plh_segs))
+	if (list_empty(&lo->plh_segs)) {
+		set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
 		clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
+	}
 	rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq);
 }
 
@@ -1290,6 +1292,7 @@ alloc_init_layout_hdr(struct inode *ino,
 	INIT_LIST_HEAD(&lo->plh_bulk_destroy);
 	lo->plh_inode = ino;
 	lo->plh_lc_cred = get_rpccred(ctx->cred);
+	lo->plh_flags |= 1 << NFS_LAYOUT_INVALID_STID;
 	return lo;
 }
 
@@ -1297,6 +1300,8 @@ static struct pnfs_layout_hdr *
 pnfs_find_alloc_layout(struct inode *ino,
 		       struct nfs_open_context *ctx,
 		       gfp_t gfp_flags)
+	__releases(&ino->i_lock)
+	__acquires(&ino->i_lock)
 {
 	struct nfs_inode *nfsi = NFS_I(ino);
 	struct pnfs_layout_hdr *new = NULL;
@@ -1565,8 +1570,7 @@ lookup_again:
 	 * stateid, or it has been invalidated, then we must use the open
 	 * stateid.
 	 */
-	if (lo->plh_stateid.seqid == 0 ||
-	    test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) {
+	if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) {
 
 		/*
 		 * The first layoutget for the file. Need to serialize per
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 0dfc476da3e1..b38e3c0dc790 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -247,7 +247,11 @@ void pnfs_fetch_commit_bucket_list(struct list_head *pages,
 }
 
 /* Helper function for pnfs_generic_commit_pagelist to catch an empty
- * page list. This can happen when two commits race. */
+ * page list. This can happen when two commits race.
+ *
+ * This must be called instead of nfs_init_commit - call one or the other, but
+ * not both!
+ */
 static bool
 pnfs_generic_commit_cancel_empty_pagelist(struct list_head *pages,
 					  struct nfs_commit_data *data,
@@ -256,7 +260,11 @@ pnfs_generic_commit_cancel_empty_pagelist(struct list_head *pages,
 	if (list_empty(pages)) {
 		if (atomic_dec_and_test(&cinfo->mds->rpcs_out))
 			wake_up_atomic_t(&cinfo->mds->rpcs_out);
-		nfs_commitdata_release(data);
+		/* don't call nfs_commitdata_release - it tries to put
+		 * the open_context which is not acquired until nfs_init_commit
+		 * which has not been called on @data */
+		WARN_ON_ONCE(data->context);
+		nfs_commit_free(data);
 		return true;
 	}
 
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 6776d7a7839e..572e5b3b06f1 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -367,13 +367,13 @@ readpage_async_filler(void *data, struct page *page)
 		nfs_list_remove_request(new);
 		nfs_readpage_release(new);
 		error = desc->pgio->pg_error;
-		goto out_unlock;
+		goto out;
 	}
 	return 0;
 out_error:
 	error = PTR_ERR(new);
-out_unlock:
 	unlock_page(page);
+out:
 	return error;
 }
 
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index e55b5242614d..31f3df193bdb 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -290,7 +290,7 @@ out_free_buf:
 	return error;
 }
 
-#define NFSD_MDS_PR_KEY		0x0100000000000000
+#define NFSD_MDS_PR_KEY		0x0100000000000000ULL
 
 /*
  * We use the client ID as a unique key for the reservations.
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 1580ea6fd64d..d08cd88155c7 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -104,22 +104,21 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst * rqstp,
 		goto out;
 
 	inode = d_inode(fh->fh_dentry);
-	if (!IS_POSIXACL(inode) || !inode->i_op->set_acl) {
-		error = -EOPNOTSUPP;
-		goto out_errno;
-	}
 
 	error = fh_want_write(fh);
 	if (error)
 		goto out_errno;
 
-	error = inode->i_op->set_acl(inode, argp->acl_access, ACL_TYPE_ACCESS);
+	fh_lock(fh);
+
+	error = set_posix_acl(inode, ACL_TYPE_ACCESS, argp->acl_access);
 	if (error)
-		goto out_drop_write;
-	error = inode->i_op->set_acl(inode, argp->acl_default,
-				     ACL_TYPE_DEFAULT);
+		goto out_drop_lock;
+	error = set_posix_acl(inode, ACL_TYPE_DEFAULT, argp->acl_default);
 	if (error)
-		goto out_drop_write;
+		goto out_drop_lock;
+
+	fh_unlock(fh);
 
 	fh_drop_write(fh);
 
@@ -131,7 +130,8 @@ out:
 	posix_acl_release(argp->acl_access);
 	posix_acl_release(argp->acl_default);
 	return nfserr;
-out_drop_write:
+out_drop_lock:
+	fh_unlock(fh);
 	fh_drop_write(fh);
 out_errno:
 	nfserr = nfserrno(error);
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 01df4cd7c753..0c890347cde3 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -95,22 +95,20 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst * rqstp,
 		goto out;
 
 	inode = d_inode(fh->fh_dentry);
-	if (!IS_POSIXACL(inode) || !inode->i_op->set_acl) {
-		error = -EOPNOTSUPP;
-		goto out_errno;
-	}
 
 	error = fh_want_write(fh);
 	if (error)
 		goto out_errno;
 
-	error = inode->i_op->set_acl(inode, argp->acl_access, ACL_TYPE_ACCESS);
+	fh_lock(fh);
+
+	error = set_posix_acl(inode, ACL_TYPE_ACCESS, argp->acl_access);
 	if (error)
-		goto out_drop_write;
-	error = inode->i_op->set_acl(inode, argp->acl_default,
-				     ACL_TYPE_DEFAULT);
+		goto out_drop_lock;
+	error = set_posix_acl(inode, ACL_TYPE_DEFAULT, argp->acl_default);
 
-out_drop_write:
+out_drop_lock:
+	fh_unlock(fh);
 	fh_drop_write(fh);
 out_errno:
 	nfserr = nfserrno(error);
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 6adabd6049b7..71292a0d6f09 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -770,9 +770,6 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	dentry = fhp->fh_dentry;
 	inode = d_inode(dentry);
 
-	if (!inode->i_op->set_acl || !IS_POSIXACL(inode))
-		return nfserr_attrnotsupp;
-
 	if (S_ISDIR(inode->i_mode))
 		flags = NFS4_ACL_DIR;
 
@@ -782,16 +779,19 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	if (host_error < 0)
 		goto out_nfserr;
 
-	host_error = inode->i_op->set_acl(inode, pacl, ACL_TYPE_ACCESS);
+	fh_lock(fhp);
+
+	host_error = set_posix_acl(inode, ACL_TYPE_ACCESS, pacl);
 	if (host_error < 0)
-		goto out_release;
+		goto out_drop_lock;
 
 	if (S_ISDIR(inode->i_mode)) {
-		host_error = inode->i_op->set_acl(inode, dpacl,
-						  ACL_TYPE_DEFAULT);
+		host_error = set_posix_acl(inode, ACL_TYPE_DEFAULT, dpacl);
 	}
 
-out_release:
+out_drop_lock:
+	fh_unlock(fhp);
+
 	posix_acl_release(pacl);
 	posix_acl_release(dpacl);
 out_nfserr:
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 7389cb1d7409..04c68d900324 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -710,22 +710,6 @@ static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc
 	}
 }
 
-static struct rpc_clnt *create_backchannel_client(struct rpc_create_args *args)
-{
-	struct rpc_xprt *xprt;
-
-	if (args->protocol != XPRT_TRANSPORT_BC_TCP)
-		return rpc_create(args);
-
-	xprt = args->bc_xprt->xpt_bc_xprt;
-	if (xprt) {
-		xprt_get(xprt);
-		return rpc_create_xprt(args, xprt);
-	}
-
-	return rpc_create(args);
-}
-
 static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses)
 {
 	int maxtime = max_cb_time(clp->net);
@@ -768,7 +752,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
 		args.authflavor = ses->se_cb_sec.flavor;
 	}
 	/* Create RPC client */
-	client = create_backchannel_client(&args);
+	client = rpc_create(&args);
 	if (IS_ERR(client)) {
 		dprintk("NFSD: couldn't create callback client: %ld\n",
 			PTR_ERR(client));
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index f5f82e145018..70d0b9b33031 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3480,12 +3480,17 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open,
 }
 
 static struct nfs4_ol_stateid *
-init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp,
-		struct nfsd4_open *open)
+init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open)
 {
 
 	struct nfs4_openowner *oo = open->op_openowner;
 	struct nfs4_ol_stateid *retstp = NULL;
+	struct nfs4_ol_stateid *stp;
+
+	stp = open->op_stp;
+	/* We are moving these outside of the spinlocks to avoid the warnings */
+	mutex_init(&stp->st_mutex);
+	mutex_lock(&stp->st_mutex);
 
 	spin_lock(&oo->oo_owner.so_client->cl_lock);
 	spin_lock(&fp->fi_lock);
@@ -3493,6 +3498,8 @@ init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp,
 	retstp = nfsd4_find_existing_open(fp, open);
 	if (retstp)
 		goto out_unlock;
+
+	open->op_stp = NULL;
 	atomic_inc(&stp->st_stid.sc_count);
 	stp->st_stid.sc_type = NFS4_OPEN_STID;
 	INIT_LIST_HEAD(&stp->st_locks);
@@ -3502,14 +3509,19 @@ init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp,
 	stp->st_access_bmap = 0;
 	stp->st_deny_bmap = 0;
 	stp->st_openstp = NULL;
-	init_rwsem(&stp->st_rwsem);
 	list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids);
 	list_add(&stp->st_perfile, &fp->fi_stateids);
 
 out_unlock:
 	spin_unlock(&fp->fi_lock);
 	spin_unlock(&oo->oo_owner.so_client->cl_lock);
-	return retstp;
+	if (retstp) {
+		mutex_lock(&retstp->st_mutex);
+		/* To keep mutex tracking happy */
+		mutex_unlock(&stp->st_mutex);
+		stp = retstp;
+	}
+	return stp;
 }
 
 /*
@@ -4305,7 +4317,6 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 	struct nfs4_client *cl = open->op_openowner->oo_owner.so_client;
 	struct nfs4_file *fp = NULL;
 	struct nfs4_ol_stateid *stp = NULL;
-	struct nfs4_ol_stateid *swapstp = NULL;
 	struct nfs4_delegation *dp = NULL;
 	__be32 status;
 
@@ -4335,32 +4346,28 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 	 */
 	if (stp) {
 		/* Stateid was found, this is an OPEN upgrade */
-		down_read(&stp->st_rwsem);
+		mutex_lock(&stp->st_mutex);
 		status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open);
 		if (status) {
-			up_read(&stp->st_rwsem);
+			mutex_unlock(&stp->st_mutex);
 			goto out;
 		}
 	} else {
-		stp = open->op_stp;
-		open->op_stp = NULL;
-		swapstp = init_open_stateid(stp, fp, open);
-		if (swapstp) {
-			nfs4_put_stid(&stp->st_stid);
-			stp = swapstp;
-			down_read(&stp->st_rwsem);
+		/* stp is returned locked. */
+		stp = init_open_stateid(fp, open);
+		/* See if we lost the race to some other thread */
+		if (stp->st_access_bmap != 0) {
 			status = nfs4_upgrade_open(rqstp, fp, current_fh,
 						stp, open);
 			if (status) {
-				up_read(&stp->st_rwsem);
+				mutex_unlock(&stp->st_mutex);
 				goto out;
 			}
 			goto upgrade_out;
 		}
-		down_read(&stp->st_rwsem);
 		status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open);
 		if (status) {
-			up_read(&stp->st_rwsem);
+			mutex_unlock(&stp->st_mutex);
 			release_open_stateid(stp);
 			goto out;
 		}
@@ -4372,7 +4379,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 	}
 upgrade_out:
 	nfs4_inc_and_copy_stateid(&open->op_stateid, &stp->st_stid);
-	up_read(&stp->st_rwsem);
+	mutex_unlock(&stp->st_mutex);
 
 	if (nfsd4_has_session(&resp->cstate)) {
 		if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) {
@@ -4977,12 +4984,12 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_
 		 * revoked delegations are kept only for free_stateid.
 		 */
 		return nfserr_bad_stateid;
-	down_write(&stp->st_rwsem);
+	mutex_lock(&stp->st_mutex);
 	status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate));
 	if (status == nfs_ok)
 		status = nfs4_check_fh(current_fh, &stp->st_stid);
 	if (status != nfs_ok)
-		up_write(&stp->st_rwsem);
+		mutex_unlock(&stp->st_mutex);
 	return status;
 }
 
@@ -5030,7 +5037,7 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs
 		return status;
 	oo = openowner(stp->st_stateowner);
 	if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) {
-		up_write(&stp->st_rwsem);
+		mutex_unlock(&stp->st_mutex);
 		nfs4_put_stid(&stp->st_stid);
 		return nfserr_bad_stateid;
 	}
@@ -5062,12 +5069,12 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	oo = openowner(stp->st_stateowner);
 	status = nfserr_bad_stateid;
 	if (oo->oo_flags & NFS4_OO_CONFIRMED) {
-		up_write(&stp->st_rwsem);
+		mutex_unlock(&stp->st_mutex);
 		goto put_stateid;
 	}
 	oo->oo_flags |= NFS4_OO_CONFIRMED;
 	nfs4_inc_and_copy_stateid(&oc->oc_resp_stateid, &stp->st_stid);
-	up_write(&stp->st_rwsem);
+	mutex_unlock(&stp->st_mutex);
 	dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n",
 		__func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid));
 
@@ -5143,7 +5150,7 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp,
 	nfs4_inc_and_copy_stateid(&od->od_stateid, &stp->st_stid);
 	status = nfs_ok;
 put_stateid:
-	up_write(&stp->st_rwsem);
+	mutex_unlock(&stp->st_mutex);
 	nfs4_put_stid(&stp->st_stid);
 out:
 	nfsd4_bump_seqid(cstate, status);
@@ -5196,7 +5203,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	if (status)
 		goto out; 
 	nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid);
-	up_write(&stp->st_rwsem);
+	mutex_unlock(&stp->st_mutex);
 
 	nfsd4_close_open_stateid(stp);
 
@@ -5422,7 +5429,7 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo,
 	stp->st_access_bmap = 0;
 	stp->st_deny_bmap = open_stp->st_deny_bmap;
 	stp->st_openstp = open_stp;
-	init_rwsem(&stp->st_rwsem);
+	mutex_init(&stp->st_mutex);
 	list_add(&stp->st_locks, &open_stp->st_locks);
 	list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids);
 	spin_lock(&fp->fi_lock);
@@ -5591,7 +5598,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 					&open_stp, nn);
 		if (status)
 			goto out;
-		up_write(&open_stp->st_rwsem);
+		mutex_unlock(&open_stp->st_mutex);
 		open_sop = openowner(open_stp->st_stateowner);
 		status = nfserr_bad_stateid;
 		if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid,
@@ -5600,7 +5607,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		status = lookup_or_create_lock_state(cstate, open_stp, lock,
 							&lock_stp, &new);
 		if (status == nfs_ok)
-			down_write(&lock_stp->st_rwsem);
+			mutex_lock(&lock_stp->st_mutex);
 	} else {
 		status = nfs4_preprocess_seqid_op(cstate,
 				       lock->lk_old_lock_seqid,
@@ -5704,7 +5711,7 @@ out:
 		    seqid_mutating_err(ntohl(status)))
 			lock_sop->lo_owner.so_seqid++;
 
-		up_write(&lock_stp->st_rwsem);
+		mutex_unlock(&lock_stp->st_mutex);
 
 		/*
 		 * If this is a new, never-before-used stateid, and we are
@@ -5874,7 +5881,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 fput:
 	fput(filp);
 put_stateid:
-	up_write(&stp->st_rwsem);
+	mutex_unlock(&stp->st_mutex);
 	nfs4_put_stid(&stp->st_stid);
 out:
 	nfsd4_bump_seqid(cstate, status);
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 986e51e5ceac..64053eadeb81 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -535,7 +535,7 @@ struct nfs4_ol_stateid {
 	unsigned char			st_access_bmap;
 	unsigned char			st_deny_bmap;
 	struct nfs4_ol_stateid		*st_openstp;
-	struct rw_semaphore		st_rwsem;
+	struct mutex			st_mutex;
 };
 
 static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s)
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 809bd2de7ad0..e9fd241b9a0a 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -439,7 +439,7 @@ static int nilfs_valid_sb(struct nilfs_super_block *sbp)
 	if (!sbp || le16_to_cpu(sbp->s_magic) != NILFS_SUPER_MAGIC)
 		return 0;
 	bytes = le16_to_cpu(sbp->s_bytes);
-	if (bytes > BLOCK_SIZE)
+	if (bytes < sumoff + 4 || bytes > BLOCK_SIZE)
 		return 0;
 	crc = crc32_le(le32_to_cpu(sbp->s_crc_seed), (unsigned char *)sbp,
 		       sumoff);
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index e27e6527912b..4342c7ee7d20 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -1,7 +1,5 @@
 ccflags-y := -Ifs/ocfs2
 
-ccflags-y += -DCATCH_BH_JBD_RACES
-
 obj-$(CONFIG_OCFS2_FS) += 	\
 	ocfs2.o			\
 	ocfs2_stackglue.o
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index fe50ded1b4ce..498641eed2db 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -139,11 +139,16 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
 
 		lock_buffer(bh);
 		if (buffer_jbd(bh)) {
+#ifdef CATCH_BH_JBD_RACES
 			mlog(ML_ERROR,
 			     "block %llu had the JBD bit set "
 			     "while I was in lock_buffer!",
 			     (unsigned long long)bh->b_blocknr);
 			BUG();
+#else
+			unlock_buffer(bh);
+			continue;
+#endif
 		}
 
 		clear_buffer_uptodate(bh);
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 22f0253a3567..c2a6b0894022 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -405,12 +405,21 @@ static int ovl_create_or_link(struct dentry *dentry, int mode, dev_t rdev,
 		err = ovl_create_upper(dentry, inode, &stat, link, hardlink);
 	} else {
 		const struct cred *old_cred;
+		struct cred *override_cred;
 
 		old_cred = ovl_override_creds(dentry->d_sb);
 
-		err = ovl_create_over_whiteout(dentry, inode, &stat, link,
-					       hardlink);
+		err = -ENOMEM;
+		override_cred = prepare_creds();
+		if (override_cred) {
+			override_cred->fsuid = old_cred->fsuid;
+			override_cred->fsgid = old_cred->fsgid;
+			put_cred(override_creds(override_cred));
+			put_cred(override_cred);
 
+			err = ovl_create_over_whiteout(dentry, inode, &stat,
+						       link, hardlink);
+		}
 		revert_creds(old_cred);
 	}
 
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 0ed7c4012437..c831c2e5f803 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -59,16 +59,37 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr)
 	if (err)
 		goto out;
 
+	if (attr->ia_valid & ATTR_SIZE) {
+		struct inode *realinode = d_inode(ovl_dentry_real(dentry));
+
+		err = -ETXTBSY;
+		if (atomic_read(&realinode->i_writecount) < 0)
+			goto out_drop_write;
+	}
+
 	err = ovl_copy_up(dentry);
 	if (!err) {
+		struct inode *winode = NULL;
+
 		upperdentry = ovl_dentry_upper(dentry);
 
+		if (attr->ia_valid & ATTR_SIZE) {
+			winode = d_inode(upperdentry);
+			err = get_write_access(winode);
+			if (err)
+				goto out_drop_write;
+		}
+
 		inode_lock(upperdentry->d_inode);
 		err = notify_change(upperdentry, attr, NULL);
 		if (!err)
 			ovl_copyattr(upperdentry->d_inode, dentry->d_inode);
 		inode_unlock(upperdentry->d_inode);
+
+		if (winode)
+			put_write_access(winode);
 	}
+out_drop_write:
 	ovl_drop_write(dentry);
 out:
 	return err;
@@ -121,16 +142,18 @@ int ovl_permission(struct inode *inode, int mask)
 
 		err = vfs_getattr(&realpath, &stat);
 		if (err)
-			return err;
+			goto out_dput;
 
+		err = -ESTALE;
 		if ((stat.mode ^ inode->i_mode) & S_IFMT)
-			return -ESTALE;
+			goto out_dput;
 
 		inode->i_mode = stat.mode;
 		inode->i_uid = stat.uid;
 		inode->i_gid = stat.gid;
 
-		return generic_permission(inode, mask);
+		err = generic_permission(inode, mask);
+		goto out_dput;
 	}
 
 	/* Careful in RCU walk mode */
@@ -238,41 +261,27 @@ out:
 	return err;
 }
 
-static bool ovl_need_xattr_filter(struct dentry *dentry,
-				  enum ovl_path_type type)
-{
-	if ((type & (__OVL_PATH_PURE | __OVL_PATH_UPPER)) == __OVL_PATH_UPPER)
-		return S_ISDIR(dentry->d_inode->i_mode);
-	else
-		return false;
-}
-
 ssize_t ovl_getxattr(struct dentry *dentry, struct inode *inode,
 		     const char *name, void *value, size_t size)
 {
-	struct path realpath;
-	enum ovl_path_type type = ovl_path_real(dentry, &realpath);
+	struct dentry *realdentry = ovl_dentry_real(dentry);
 
-	if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name))
+	if (ovl_is_private_xattr(name))
 		return -ENODATA;
 
-	return vfs_getxattr(realpath.dentry, name, value, size);
+	return vfs_getxattr(realdentry, name, value, size);
 }
 
 ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
 {
-	struct path realpath;
-	enum ovl_path_type type = ovl_path_real(dentry, &realpath);
+	struct dentry *realdentry = ovl_dentry_real(dentry);
 	ssize_t res;
 	int off;
 
-	res = vfs_listxattr(realpath.dentry, list, size);
+	res = vfs_listxattr(realdentry, list, size);
 	if (res <= 0 || size == 0)
 		return res;
 
-	if (!ovl_need_xattr_filter(dentry, type))
-		return res;
-
 	/* filter out private xattrs */
 	for (off = 0; off < res;) {
 		char *s = list + off;
@@ -302,7 +311,7 @@ int ovl_removexattr(struct dentry *dentry, const char *name)
 		goto out;
 
 	err = -ENODATA;
-	if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name))
+	if (ovl_is_private_xattr(name))
 		goto out_drop_write;
 
 	if (!OVL_TYPE_UPPER(type)) {
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index ce02f46029da..9a7693d5f8ff 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1082,11 +1082,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 			if (err < 0)
 				goto out_put_workdir;
 
-			if (!err) {
-				pr_err("overlayfs: upper fs needs to support d_type.\n");
-				err = -EINVAL;
-				goto out_put_workdir;
-			}
+			/*
+			 * We allowed this configuration and don't want to
+			 * break users over kernel upgrade. So warn instead
+			 * of erroring out.
+			 */
+			if (!err)
+				pr_warn("overlayfs: upper fs needs to support d_type.\n");
 		}
 	}
 
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 8a4a266beff3..edc452c2a563 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -820,39 +820,43 @@ posix_acl_xattr_get(const struct xattr_handler *handler,
 	return error;
 }
 
-static int
-posix_acl_xattr_set(const struct xattr_handler *handler,
-		    struct dentry *unused, struct inode *inode,
-		    const char *name, const void *value,
-		    size_t size, int flags)
+int
+set_posix_acl(struct inode *inode, int type, struct posix_acl *acl)
 {
-	struct posix_acl *acl = NULL;
-	int ret;
-
 	if (!IS_POSIXACL(inode))
 		return -EOPNOTSUPP;
 	if (!inode->i_op->set_acl)
 		return -EOPNOTSUPP;
 
-	if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
-		return value ? -EACCES : 0;
+	if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
+		return acl ? -EACCES : 0;
 	if (!inode_owner_or_capable(inode))
 		return -EPERM;
 
+	if (acl) {
+		int ret = posix_acl_valid(acl);
+		if (ret)
+			return ret;
+	}
+	return inode->i_op->set_acl(inode, acl, type);
+}
+EXPORT_SYMBOL(set_posix_acl);
+
+static int
+posix_acl_xattr_set(const struct xattr_handler *handler,
+		    struct dentry *unused, struct inode *inode,
+		    const char *name, const void *value,
+		    size_t size, int flags)
+{
+	struct posix_acl *acl = NULL;
+	int ret;
+
 	if (value) {
 		acl = posix_acl_from_xattr(&init_user_ns, value, size);
 		if (IS_ERR(acl))
 			return PTR_ERR(acl);
-
-		if (acl) {
-			ret = posix_acl_valid(acl);
-			if (ret)
-				goto out;
-		}
 	}
-
-	ret = inode->i_op->set_acl(inode, acl, handler->flags);
-out:
+	ret = set_posix_acl(inode, handler->flags, acl);
 	posix_acl_release(acl);
 	return ret;
 }
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 55bc7d6c8aac..06702783bf40 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -121,6 +121,13 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
 	if (IS_ERR(sb))
 		return ERR_CAST(sb);
 
+	/*
+	 * procfs isn't actually a stacking filesystem; however, there is
+	 * too much magic going on inside it to permit stacking things on
+	 * top of it
+	 */
+	sb->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH;
+
 	if (!proc_parse_options(options, ns)) {
 		deactivate_locked_super(sb);
 		return ERR_PTR(-EINVAL);
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index b8f2d1e8c645..c72c16c5a60f 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1393,7 +1393,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
 	unsigned long safe_mask = 0;
 	unsigned int commit_max_age = (unsigned int)-1;
 	struct reiserfs_journal *journal = SB_JOURNAL(s);
-	char *new_opts = kstrdup(arg, GFP_KERNEL);
+	char *new_opts;
 	int err;
 	char *qf_names[REISERFS_MAXQUOTAS];
 	unsigned int qfmt = 0;
@@ -1401,6 +1401,10 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
 	int i;
 #endif
 
+	new_opts = kstrdup(arg, GFP_KERNEL);
+	if (arg && !new_opts)
+		return -ENOMEM;
+
 	sync_filesystem(s);
 	reiserfs_write_lock(s);
 
@@ -1546,7 +1550,8 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
 	}
 
 out_ok_unlocked:
-	replace_mount_options(s, new_opts);
+	if (new_opts)
+		replace_mount_options(s, new_opts);
 	return 0;
 
 out_err_unlock:
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 08316972ff93..7bbf420d1289 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -52,6 +52,7 @@
 #include "ubifs.h"
 #include <linux/mount.h>
 #include <linux/slab.h>
+#include <linux/migrate.h>
 
 static int read_block(struct inode *inode, void *addr, unsigned int block,
 		      struct ubifs_data_node *dn)
@@ -1452,6 +1453,26 @@ static int ubifs_set_page_dirty(struct page *page)
 	return ret;
 }
 
+#ifdef CONFIG_MIGRATION
+static int ubifs_migrate_page(struct address_space *mapping,
+		struct page *newpage, struct page *page, enum migrate_mode mode)
+{
+	int rc;
+
+	rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0);
+	if (rc != MIGRATEPAGE_SUCCESS)
+		return rc;
+
+	if (PagePrivate(page)) {
+		ClearPagePrivate(page);
+		SetPagePrivate(newpage);
+	}
+
+	migrate_page_copy(newpage, page);
+	return MIGRATEPAGE_SUCCESS;
+}
+#endif
+
 static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags)
 {
 	/*
@@ -1591,6 +1612,9 @@ const struct address_space_operations ubifs_file_address_operations = {
 	.write_end      = ubifs_write_end,
 	.invalidatepage = ubifs_invalidatepage,
 	.set_page_dirty = ubifs_set_page_dirty,
+#ifdef CONFIG_MIGRATION
+	.migratepage	= ubifs_migrate_page,
+#endif
 	.releasepage    = ubifs_releasepage,
 };
 
diff --git a/fs/udf/partition.c b/fs/udf/partition.c
index 5f861ed287c3..888c364b2fe9 100644
--- a/fs/udf/partition.c
+++ b/fs/udf/partition.c
@@ -295,7 +295,8 @@ static uint32_t udf_try_read_meta(struct inode *inode, uint32_t block,
 		map = &UDF_SB(sb)->s_partmaps[partition];
 		/* map to sparable/physical partition desc */
 		phyblock = udf_get_pblock(sb, eloc.logicalBlockNum,
-			map->s_partition_num, ext_offset + offset);
+			map->s_type_specific.s_metadata.s_phys_partition_ref,
+			ext_offset + offset);
 	}
 
 	brelse(epos.bh);
@@ -317,14 +318,18 @@ uint32_t udf_get_pblock_meta25(struct super_block *sb, uint32_t block,
 	mdata = &map->s_type_specific.s_metadata;
 	inode = mdata->s_metadata_fe ? : mdata->s_mirror_fe;
 
-	/* We shouldn't mount such media... */
-	BUG_ON(!inode);
+	if (!inode)
+		return 0xFFFFFFFF;
+
 	retblk = udf_try_read_meta(inode, block, partition, offset);
 	if (retblk == 0xFFFFFFFF && mdata->s_metadata_fe) {
 		udf_warn(sb, "error reading from METADATA, trying to read from MIRROR\n");
 		if (!(mdata->s_flags & MF_MIRROR_FE_LOADED)) {
 			mdata->s_mirror_fe = udf_find_metadata_inode_efe(sb,
-				mdata->s_mirror_file_loc, map->s_partition_num);
+				mdata->s_mirror_file_loc,
+				mdata->s_phys_partition_ref);
+			if (IS_ERR(mdata->s_mirror_fe))
+				mdata->s_mirror_fe = NULL;
 			mdata->s_flags |= MF_MIRROR_FE_LOADED;
 		}
 
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 5e2c8c814e1b..4942549e7dc8 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -951,13 +951,13 @@ out2:
 }
 
 struct inode *udf_find_metadata_inode_efe(struct super_block *sb,
-					u32 meta_file_loc, u32 partition_num)
+					u32 meta_file_loc, u32 partition_ref)
 {
 	struct kernel_lb_addr addr;
 	struct inode *metadata_fe;
 
 	addr.logicalBlockNum = meta_file_loc;
-	addr.partitionReferenceNum = partition_num;
+	addr.partitionReferenceNum = partition_ref;
 
 	metadata_fe = udf_iget_special(sb, &addr);
 
@@ -974,7 +974,8 @@ struct inode *udf_find_metadata_inode_efe(struct super_block *sb,
 	return metadata_fe;
 }
 
-static int udf_load_metadata_files(struct super_block *sb, int partition)
+static int udf_load_metadata_files(struct super_block *sb, int partition,
+				   int type1_index)
 {
 	struct udf_sb_info *sbi = UDF_SB(sb);
 	struct udf_part_map *map;
@@ -984,20 +985,21 @@ static int udf_load_metadata_files(struct super_block *sb, int partition)
 
 	map = &sbi->s_partmaps[partition];
 	mdata = &map->s_type_specific.s_metadata;
+	mdata->s_phys_partition_ref = type1_index;
 
 	/* metadata address */
 	udf_debug("Metadata file location: block = %d part = %d\n",
-		  mdata->s_meta_file_loc, map->s_partition_num);
+		  mdata->s_meta_file_loc, mdata->s_phys_partition_ref);
 
 	fe = udf_find_metadata_inode_efe(sb, mdata->s_meta_file_loc,
-					 map->s_partition_num);
+					 mdata->s_phys_partition_ref);
 	if (IS_ERR(fe)) {
 		/* mirror file entry */
 		udf_debug("Mirror metadata file location: block = %d part = %d\n",
-			  mdata->s_mirror_file_loc, map->s_partition_num);
+			  mdata->s_mirror_file_loc, mdata->s_phys_partition_ref);
 
 		fe = udf_find_metadata_inode_efe(sb, mdata->s_mirror_file_loc,
-						 map->s_partition_num);
+						 mdata->s_phys_partition_ref);
 
 		if (IS_ERR(fe)) {
 			udf_err(sb, "Both metadata and mirror metadata inode efe can not found\n");
@@ -1015,7 +1017,7 @@ static int udf_load_metadata_files(struct super_block *sb, int partition)
 	*/
 	if (mdata->s_bitmap_file_loc != 0xFFFFFFFF) {
 		addr.logicalBlockNum = mdata->s_bitmap_file_loc;
-		addr.partitionReferenceNum = map->s_partition_num;
+		addr.partitionReferenceNum = mdata->s_phys_partition_ref;
 
 		udf_debug("Bitmap file location: block = %d part = %d\n",
 			  addr.logicalBlockNum, addr.partitionReferenceNum);
@@ -1283,7 +1285,7 @@ static int udf_load_partdesc(struct super_block *sb, sector_t block)
 	p = (struct partitionDesc *)bh->b_data;
 	partitionNumber = le16_to_cpu(p->partitionNumber);
 
-	/* First scan for TYPE1, SPARABLE and METADATA partitions */
+	/* First scan for TYPE1 and SPARABLE partitions */
 	for (i = 0; i < sbi->s_partitions; i++) {
 		map = &sbi->s_partmaps[i];
 		udf_debug("Searching map: (%d == %d)\n",
@@ -1333,7 +1335,7 @@ static int udf_load_partdesc(struct super_block *sb, sector_t block)
 		goto out_bh;
 
 	if (map->s_partition_type == UDF_METADATA_MAP25) {
-		ret = udf_load_metadata_files(sb, i);
+		ret = udf_load_metadata_files(sb, i, type1_idx);
 		if (ret < 0) {
 			udf_err(sb, "error loading MetaData partition map %d\n",
 				i);
diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h
index 27b5335730c9..c13875d669c0 100644
--- a/fs/udf/udf_sb.h
+++ b/fs/udf/udf_sb.h
@@ -61,6 +61,11 @@ struct udf_meta_data {
 	__u32	s_bitmap_file_loc;
 	__u32	s_alloc_unit_size;
 	__u16	s_align_unit_size;
+	/*
+	 * Partition Reference Number of the associated physical / sparable
+	 * partition
+	 */
+	__u16   s_phys_partition_ref;
 	int	s_flags;
 	struct inode *s_metadata_fe;
 	struct inode *s_mirror_fe;
diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
index 6bd05700d8c9..05f05f17a7c2 100644
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h
@@ -22,37 +22,33 @@
 #include <asm-generic/qspinlock_types.h>
 
 /**
+ * queued_spin_unlock_wait - wait until the _current_ lock holder releases the lock
+ * @lock : Pointer to queued spinlock structure
+ *
+ * There is a very slight possibility of live-lock if the lockers keep coming
+ * and the waiter is just unfortunate enough to not see any unlock state.
+ */
+#ifndef queued_spin_unlock_wait
+extern void queued_spin_unlock_wait(struct qspinlock *lock);
+#endif
+
+/**
  * queued_spin_is_locked - is the spinlock locked?
  * @lock: Pointer to queued spinlock structure
  * Return: 1 if it is locked, 0 otherwise
  */
+#ifndef queued_spin_is_locked
 static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
 {
 	/*
-	 * queued_spin_lock_slowpath() can ACQUIRE the lock before
-	 * issuing the unordered store that sets _Q_LOCKED_VAL.
-	 *
-	 * See both smp_cond_acquire() sites for more detail.
-	 *
-	 * This however means that in code like:
-	 *
-	 *   spin_lock(A)		spin_lock(B)
-	 *   spin_unlock_wait(B)	spin_is_locked(A)
-	 *   do_something()		do_something()
-	 *
-	 * Both CPUs can end up running do_something() because the store
-	 * setting _Q_LOCKED_VAL will pass through the loads in
-	 * spin_unlock_wait() and/or spin_is_locked().
+	 * See queued_spin_unlock_wait().
 	 *
-	 * Avoid this by issuing a full memory barrier between the spin_lock()
-	 * and the loads in spin_unlock_wait() and spin_is_locked().
-	 *
-	 * Note that regular mutual exclusion doesn't care about this
-	 * delayed store.
+	 * Any !0 state indicates it is locked, even if _Q_LOCKED_VAL
+	 * isn't immediately observable.
 	 */
-	smp_mb();
-	return atomic_read(&lock->val) & _Q_LOCKED_MASK;
+	return atomic_read(&lock->val);
 }
+#endif
 
 /**
  * queued_spin_value_unlocked - is the spinlock structure unlocked?
@@ -122,21 +118,6 @@ static __always_inline void queued_spin_unlock(struct qspinlock *lock)
 }
 #endif
 
-/**
- * queued_spin_unlock_wait - wait until current lock holder releases the lock
- * @lock : Pointer to queued spinlock structure
- *
- * There is a very slight possibility of live-lock if the lockers keep coming
- * and the waiter is just unfortunate enough to not see any unlock state.
- */
-static inline void queued_spin_unlock_wait(struct qspinlock *lock)
-{
-	/* See queued_spin_is_locked() */
-	smp_mb();
-	while (atomic_read(&lock->val) & _Q_LOCKED_MASK)
-		cpu_relax();
-}
-
 #ifndef virt_spin_lock
 static __always_inline bool virt_spin_lock(struct qspinlock *lock)
 {
diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
index 9094599a1150..33466bfc6440 100644
--- a/include/drm/i915_pciids.h
+++ b/include/drm/i915_pciids.h
@@ -309,6 +309,7 @@
 	INTEL_VGA_DEVICE(0x5906, info), /* ULT GT1 */ \
 	INTEL_VGA_DEVICE(0x590E, info), /* ULX GT1 */ \
 	INTEL_VGA_DEVICE(0x5902, info), /* DT  GT1 */ \
+	INTEL_VGA_DEVICE(0x5908, info), /* Halo GT1 */ \
 	INTEL_VGA_DEVICE(0x590B, info), /* Halo GT1 */ \
 	INTEL_VGA_DEVICE(0x590A, info) /* SRV GT1 */
 
@@ -322,15 +323,12 @@
 	INTEL_VGA_DEVICE(0x591D, info) /* WKS GT2 */
 
 #define INTEL_KBL_GT3_IDS(info) \
+	INTEL_VGA_DEVICE(0x5923, info), /* ULT GT3 */ \
 	INTEL_VGA_DEVICE(0x5926, info), /* ULT GT3 */ \
-	INTEL_VGA_DEVICE(0x592B, info), /* Halo GT3 */ \
-	INTEL_VGA_DEVICE(0x592A, info) /* SRV GT3 */
+	INTEL_VGA_DEVICE(0x5927, info) /* ULT GT3 */
 
 #define INTEL_KBL_GT4_IDS(info) \
-	INTEL_VGA_DEVICE(0x5932, info), /* DT  GT4 */ \
-	INTEL_VGA_DEVICE(0x593B, info), /* Halo GT4 */ \
-	INTEL_VGA_DEVICE(0x593A, info), /* SRV GT4 */ \
-	INTEL_VGA_DEVICE(0x593D, info)  /* WKS GT4 */
+	INTEL_VGA_DEVICE(0x593B, info) /* Halo GT4 */
 
 #define INTEL_KBL_IDS(info) \
 	INTEL_KBL_GT1_IDS(info), \
diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index fe389ac31489..92e7e97ca8ff 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -18,13 +18,13 @@
 #ifndef __ASM_ARM_KVM_PMU_H
 #define __ASM_ARM_KVM_PMU_H
 
-#ifdef CONFIG_KVM_ARM_PMU
-
 #include <linux/perf_event.h>
 #include <asm/perf_event.h>
 
 #define ARMV8_PMU_CYCLE_IDX		(ARMV8_PMU_MAX_COUNTERS - 1)
 
+#ifdef CONFIG_KVM_ARM_PMU
+
 struct kvm_pmc {
 	u8 idx;	/* index into the pmu->pmc array */
 	struct perf_event *perf_event;
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 961a417d641e..e38e3fc13ea8 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -26,7 +26,6 @@
 #include <linux/sched.h>
 #include <linux/ptrace.h>
 #include <uapi/linux/audit.h>
-#include <linux/tty.h>
 
 #define AUDIT_INO_UNSET ((unsigned long)-1)
 #define AUDIT_DEV_UNSET ((dev_t)-1)
@@ -348,23 +347,6 @@ static inline unsigned int audit_get_sessionid(struct task_struct *tsk)
 	return tsk->sessionid;
 }
 
-static inline struct tty_struct *audit_get_tty(struct task_struct *tsk)
-{
-	struct tty_struct *tty = NULL;
-	unsigned long flags;
-
-	spin_lock_irqsave(&tsk->sighand->siglock, flags);
-	if (tsk->signal)
-		tty = tty_kref_get(tsk->signal->tty);
-	spin_unlock_irqrestore(&tsk->sighand->siglock, flags);
-	return tty;
-}
-
-static inline void audit_put_tty(struct tty_struct *tty)
-{
-	tty_kref_put(tty);
-}
-
 extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp);
 extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode);
 extern void __audit_bprm(struct linux_binprm *bprm);
@@ -522,12 +504,6 @@ static inline unsigned int audit_get_sessionid(struct task_struct *tsk)
 {
 	return -1;
 }
-static inline struct tty_struct *audit_get_tty(struct task_struct *tsk)
-{
-	return NULL;
-}
-static inline void audit_put_tty(struct tty_struct *tty)
-{ }
 static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp)
 { }
 static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid,
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 9adfef694a25..b3336b4f5d04 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -115,6 +115,31 @@ enum bpf_access_type {
 	BPF_WRITE = 2
 };
 
+/* types of values stored in eBPF registers */
+enum bpf_reg_type {
+	NOT_INIT = 0,		 /* nothing was written into register */
+	UNKNOWN_VALUE,		 /* reg doesn't contain a valid pointer */
+	PTR_TO_CTX,		 /* reg points to bpf_context */
+	CONST_PTR_TO_MAP,	 /* reg points to struct bpf_map */
+	PTR_TO_MAP_VALUE,	 /* reg points to map element value */
+	PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */
+	FRAME_PTR,		 /* reg == frame_pointer */
+	PTR_TO_STACK,		 /* reg == frame_pointer + imm */
+	CONST_IMM,		 /* constant integer value */
+
+	/* PTR_TO_PACKET represents:
+	 * skb->data
+	 * skb->data + imm
+	 * skb->data + (u16) var
+	 * skb->data + (u16) var + imm
+	 * if (range > 0) then [ptr, ptr + range - off) is safe to access
+	 * if (id > 0) means that some 'var' was added
+	 * if (off > 0) menas that 'imm' was added
+	 */
+	PTR_TO_PACKET,
+	PTR_TO_PACKET_END,	 /* skb->data + headlen */
+};
+
 struct bpf_prog;
 
 struct bpf_verifier_ops {
@@ -124,7 +149,8 @@ struct bpf_verifier_ops {
 	/* return true if 'size' wide access at offset 'off' within bpf_context
 	 * with 'type' (read or write) is allowed
 	 */
-	bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
+	bool (*is_valid_access)(int off, int size, enum bpf_access_type type,
+				enum bpf_reg_type *reg_type);
 
 	u32 (*convert_ctx_access)(enum bpf_access_type type, int dst_reg,
 				  int src_reg, int ctx_off,
@@ -192,9 +218,9 @@ void bpf_register_prog_type(struct bpf_prog_type_list *tl);
 void bpf_register_map_type(struct bpf_map_type_list *tl);
 
 struct bpf_prog *bpf_prog_get(u32 ufd);
+struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type);
 struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog);
 void bpf_prog_put(struct bpf_prog *prog);
-void bpf_prog_put_rcu(struct bpf_prog *prog);
 
 struct bpf_map *bpf_map_get_with_uref(u32 ufd);
 struct bpf_map *__bpf_map_get(struct fd f);
@@ -252,6 +278,12 @@ static inline struct bpf_prog *bpf_prog_get(u32 ufd)
 	return ERR_PTR(-EOPNOTSUPP);
 }
 
+static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,
+						 enum bpf_prog_type type)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
 static inline void bpf_prog_put(struct bpf_prog *prog)
 {
 }
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index a20320c666fd..984f73b719a9 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -87,6 +87,7 @@ struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
 						       struct cgroup_subsys *ss);
 
 struct cgroup *cgroup_get_from_path(const char *path);
+struct cgroup *cgroup_get_from_fd(int fd);
 
 int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
 int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 484c8792da82..f53fa055021a 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -212,6 +212,7 @@ struct dentry_operations {
 #define DCACHE_OP_REAL			0x08000000
 
 #define DCACHE_PAR_LOOKUP		0x10000000 /* being looked up (with parent locked shared) */
+#define DCACHE_DENTRY_CURSOR		0x20000000
 
 extern seqlock_t rename_lock;
 
@@ -575,5 +576,17 @@ static inline struct inode *vfs_select_inode(struct dentry *dentry,
 	return inode;
 }
 
+/**
+ * d_real_inode - Return the real inode
+ * @dentry: The dentry to query
+ *
+ * If dentry is on an union/overlay, then return the underlying, real inode.
+ * Otherwise return d_inode().
+ */
+static inline struct inode *d_real_inode(struct dentry *dentry)
+{
+	return d_backing_inode(d_real(dentry));
+}
+
 
 #endif	/* __LINUX_DCACHE_H */
diff --git a/include/linux/efi.h b/include/linux/efi.h
index c2db3ca22217..f196dd0b0f2f 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1005,7 +1005,7 @@ extern int efi_memattr_apply_permissions(struct mm_struct *mm,
 /* Iterate through an efi_memory_map */
 #define for_each_efi_memory_desc_in_map(m, md)				   \
 	for ((md) = (m)->map;						   \
-	     (md) <= (efi_memory_desc_t *)((m)->map_end - (m)->desc_size); \
+	     ((void *)(md) + (m)->desc_size) <= (m)->map_end;		   \
 	     (md) = (void *)(md) + (m)->desc_size)
 
 /**
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 37ff4a6faa9a..6fec9e81bd70 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -374,6 +374,29 @@ static inline bool ether_addr_equal_unaligned(const u8 *addr1, const u8 *addr2)
 }
 
 /**
+ * ether_addr_equal_masked - Compare two Ethernet addresses with a mask
+ * @addr1: Pointer to a six-byte array containing the 1st Ethernet address
+ * @addr2: Pointer to a six-byte array containing the 2nd Ethernet address
+ * @mask: Pointer to a six-byte array containing the Ethernet address bitmask
+ *
+ * Compare two Ethernet addresses with a mask, returns true if for every bit
+ * set in the bitmask the equivalent bits in the ethernet addresses are equal.
+ * Using a mask with all bits set is a slower ether_addr_equal.
+ */
+static inline bool ether_addr_equal_masked(const u8 *addr1, const u8 *addr2,
+					   const u8 *mask)
+{
+	int i;
+
+	for (i = 0; i < ETH_ALEN; i++) {
+		if ((addr1[i] ^ addr2[i]) & mask[i])
+			return false;
+	}
+
+	return true;
+}
+
+/**
  * is_etherdev_addr - Tell if given Ethernet address belongs to the device.
  * @dev: Pointer to a device structure
  * @addr: Pointer to a six-byte array containing the Ethernet address
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index b118744d3382..a80516fd65c8 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -19,6 +19,7 @@
 
 #include <linux/types.h>
 #include <linux/if_ether.h>
+#include <linux/etherdevice.h>
 #include <asm/byteorder.h>
 #include <asm/unaligned.h>
 
@@ -2464,7 +2465,7 @@ static inline bool _ieee80211_is_robust_mgmt_frame(struct ieee80211_hdr *hdr)
  */
 static inline bool ieee80211_is_robust_mgmt_frame(struct sk_buff *skb)
 {
-	if (skb->len < 25)
+	if (skb->len < IEEE80211_MIN_ACTION_SIZE)
 		return false;
 	return _ieee80211_is_robust_mgmt_frame((void *)skb->data);
 }
@@ -2487,6 +2488,35 @@ static inline bool ieee80211_is_public_action(struct ieee80211_hdr *hdr,
 }
 
 /**
+ * _ieee80211_is_group_privacy_action - check if frame is a group addressed
+ * privacy action frame
+ * @hdr: the frame
+ */
+static inline bool _ieee80211_is_group_privacy_action(struct ieee80211_hdr *hdr)
+{
+	struct ieee80211_mgmt *mgmt = (void *)hdr;
+
+	if (!ieee80211_is_action(hdr->frame_control) ||
+	    !is_multicast_ether_addr(hdr->addr1))
+		return false;
+
+	return mgmt->u.action.category == WLAN_CATEGORY_MESH_ACTION ||
+	       mgmt->u.action.category == WLAN_CATEGORY_MULTIHOP_ACTION;
+}
+
+/**
+ * ieee80211_is_group_privacy_action - check if frame is a group addressed
+ * privacy action frame
+ * @skb: the skb containing the frame, length will be checked
+ */
+static inline bool ieee80211_is_group_privacy_action(struct sk_buff *skb)
+{
+	if (skb->len < IEEE80211_MIN_ACTION_SIZE)
+		return false;
+	return _ieee80211_is_group_privacy_action((void *)skb->data);
+}
+
+/**
  * ieee80211_tu_to_usec - convert time units (TU) to microseconds
  * @tu: the TUs
  */
diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h
index acedbb68a5a3..ddb890174a0e 100644
--- a/include/linux/ieee802154.h
+++ b/include/linux/ieee802154.h
@@ -31,6 +31,8 @@
 #define IEEE802154_MIN_PSDU_LEN		9
 #define IEEE802154_FCS_LEN		2
 #define IEEE802154_MAX_AUTH_TAG_LEN	16
+#define IEEE802154_FC_LEN		2
+#define IEEE802154_SEQ_LEN		1
 
 /*  General MAC frame format:
  *  2 bytes: Frame Control
@@ -48,6 +50,7 @@
 
 #define IEEE802154_EXTENDED_ADDR_LEN	8
 #define IEEE802154_SHORT_ADDR_LEN	2
+#define IEEE802154_PAN_ID_LEN		2
 
 #define IEEE802154_LIFS_PERIOD		40
 #define IEEE802154_SIFS_PERIOD		12
@@ -221,9 +224,17 @@ enum {
 #define IEEE802154_FCTL_ACKREQ		0x0020
 #define IEEE802154_FCTL_SECEN		0x0004
 #define IEEE802154_FCTL_INTRA_PAN	0x0040
+#define IEEE802154_FCTL_DADDR		0x0c00
+#define IEEE802154_FCTL_SADDR		0xc000
 
 #define IEEE802154_FTYPE_DATA		0x0001
 
+#define IEEE802154_FCTL_ADDR_NONE	0x0000
+#define IEEE802154_FCTL_DADDR_SHORT	0x0800
+#define IEEE802154_FCTL_DADDR_EXTENDED	0x0c00
+#define IEEE802154_FCTL_SADDR_SHORT	0x8000
+#define IEEE802154_FCTL_SADDR_EXTENDED	0xc000
+
 /*
  * ieee802154_is_data - check if type is IEEE802154_FTYPE_DATA
  * @fc: frame control bytes in little-endian byteorder
@@ -261,6 +272,24 @@ static inline bool ieee802154_is_intra_pan(__le16 fc)
 	return fc & cpu_to_le16(IEEE802154_FCTL_INTRA_PAN);
 }
 
+/*
+ * ieee802154_daddr_mode - get daddr mode from fc
+ * @fc: frame control bytes in little-endian byteorder
+ */
+static inline __le16 ieee802154_daddr_mode(__le16 fc)
+{
+	return fc & cpu_to_le16(IEEE802154_FCTL_DADDR);
+}
+
+/*
+ * ieee802154_saddr_mode - get saddr mode from fc
+ * @fc: frame control bytes in little-endian byteorder
+ */
+static inline __le16 ieee802154_saddr_mode(__le16 fc)
+{
+	return fc & cpu_to_le16(IEEE802154_FCTL_SADDR);
+}
+
 /**
  * ieee802154_is_valid_psdu_len - check if psdu len is valid
  * available lengths:
diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h
index d029ffac0d69..99403b19092f 100644
--- a/include/linux/iio/common/st_sensors.h
+++ b/include/linux/iio/common/st_sensors.h
@@ -223,6 +223,8 @@ struct st_sensor_settings {
  * @get_irq_data_ready: Function to get the IRQ used for data ready signal.
  * @tf: Transfer function structure used by I/O operations.
  * @tb: Transfer buffers and mutex used by I/O operations.
+ * @hw_irq_trigger: if we're using the hardware interrupt on the sensor.
+ * @hw_timestamp: Latest timestamp from the interrupt handler, when in use.
  */
 struct st_sensor_data {
 	struct device *dev;
@@ -247,6 +249,9 @@ struct st_sensor_data {
 
 	const struct st_sensor_transfer_function *tf;
 	struct st_sensor_transfer_buffer tb;
+
+	bool hw_irq_trigger;
+	s64 hw_timestamp;
 };
 
 #ifdef CONFIG_IIO_BUFFER
@@ -260,7 +265,8 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
 				const struct iio_trigger_ops *trigger_ops);
 
 void st_sensors_deallocate_trigger(struct iio_dev *indio_dev);
-
+int st_sensors_validate_device(struct iio_trigger *trig,
+			       struct iio_dev *indio_dev);
 #else
 static inline int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
 				const struct iio_trigger_ops *trigger_ops)
@@ -271,6 +277,7 @@ static inline void st_sensors_deallocate_trigger(struct iio_dev *indio_dev)
 {
 	return;
 }
+#define st_sensors_validate_device NULL
 #endif
 
 int st_sensors_init_sensor(struct iio_dev *indio_dev,
diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index 7c27fa1030e8..feb04ea20f11 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -52,6 +52,12 @@ struct sock *inet_diag_find_one_icsk(struct net *net,
 
 int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk);
 
+void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk);
+
+int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
+			     struct inet_diag_msg *r, int ext,
+			     struct user_namespace *user_ns);
+
 extern int  inet_diag_register(const struct inet_diag_handler *handler);
 extern void inet_diag_unregister(const struct inet_diag_handler *handler);
 #endif /* _INET_DIAG_H_ */
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index f2cb8d45513d..f8834f820ec2 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -190,7 +190,7 @@ extern struct task_group root_task_group;
 #define INIT_TASK(tsk)	\
 {									\
 	.state		= 0,						\
-	.stack		= &init_thread_info,				\
+	.stack		= init_stack,					\
 	.usage		= ATOMIC_INIT(2),				\
 	.flags		= PF_KTHREAD,					\
 	.prio		= MAX_PRIO-20,					\
diff --git a/include/linux/isa.h b/include/linux/isa.h
index 5ab85281230b..f2d0258414cf 100644
--- a/include/linux/isa.h
+++ b/include/linux/isa.h
@@ -6,6 +6,7 @@
 #define __LINUX_ISA_H
 
 #include <linux/device.h>
+#include <linux/errno.h>
 #include <linux/kernel.h>
 
 struct isa_driver {
@@ -22,13 +23,13 @@ struct isa_driver {
 
 #define to_isa_driver(x) container_of((x), struct isa_driver, driver)
 
-#ifdef CONFIG_ISA
+#ifdef CONFIG_ISA_BUS_API
 int isa_register_driver(struct isa_driver *, unsigned int);
 void isa_unregister_driver(struct isa_driver *);
 #else
 static inline int isa_register_driver(struct isa_driver *d, unsigned int i)
 {
-	return 0;
+	return -ENODEV;
 }
 
 static inline void isa_unregister_driver(struct isa_driver *d)
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 0536524bb9eb..68904469fba1 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -117,13 +117,18 @@ struct module;
 
 #include <linux/atomic.h>
 
+#ifdef HAVE_JUMP_LABEL
+
 static inline int static_key_count(struct static_key *key)
 {
-	return atomic_read(&key->enabled);
+	/*
+	 * -1 means the first static_key_slow_inc() is in progress.
+	 *  static_key_enabled() must return true, so return 1 here.
+	 */
+	int n = atomic_read(&key->enabled);
+	return n >= 0 ? n : 1;
 }
 
-#ifdef HAVE_JUMP_LABEL
-
 #define JUMP_TYPE_FALSE	0UL
 #define JUMP_TYPE_TRUE	1UL
 #define JUMP_TYPE_MASK	1UL
@@ -162,6 +167,11 @@ extern void jump_label_apply_nops(struct module *mod);
 
 #else  /* !HAVE_JUMP_LABEL */
 
+static inline int static_key_count(struct static_key *key)
+{
+	return atomic_read(&key->enabled);
+}
+
 static __always_inline void jump_label_init(void)
 {
 	static_key_initialized = true;
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 611927f5870d..ac4b3c46a84d 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -59,14 +59,13 @@ void kasan_poison_object_data(struct kmem_cache *cache, void *object);
 
 void kasan_kmalloc_large(const void *ptr, size_t size, gfp_t flags);
 void kasan_kfree_large(const void *ptr);
-void kasan_kfree(void *ptr);
+void kasan_poison_kfree(void *ptr);
 void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size,
 		  gfp_t flags);
 void kasan_krealloc(const void *object, size_t new_size, gfp_t flags);
 
 void kasan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags);
 bool kasan_slab_free(struct kmem_cache *s, void *object);
-void kasan_poison_slab_free(struct kmem_cache *s, void *object);
 
 struct kasan_cache {
 	int alloc_meta_offset;
@@ -76,6 +75,9 @@ struct kasan_cache {
 int kasan_module_alloc(void *addr, size_t size);
 void kasan_free_shadow(const struct vm_struct *vm);
 
+size_t ksize(const void *);
+static inline void kasan_unpoison_slab(const void *ptr) { ksize(ptr); }
+
 #else /* CONFIG_KASAN */
 
 static inline void kasan_unpoison_shadow(const void *address, size_t size) {}
@@ -102,7 +104,7 @@ static inline void kasan_poison_object_data(struct kmem_cache *cache,
 
 static inline void kasan_kmalloc_large(void *ptr, size_t size, gfp_t flags) {}
 static inline void kasan_kfree_large(const void *ptr) {}
-static inline void kasan_kfree(void *ptr) {}
+static inline void kasan_poison_kfree(void *ptr) {}
 static inline void kasan_kmalloc(struct kmem_cache *s, const void *object,
 				size_t size, gfp_t flags) {}
 static inline void kasan_krealloc(const void *object, size_t new_size,
@@ -114,11 +116,12 @@ static inline bool kasan_slab_free(struct kmem_cache *s, void *object)
 {
 	return false;
 }
-static inline void kasan_poison_slab_free(struct kmem_cache *s, void *object) {}
 
 static inline int kasan_module_alloc(void *addr, size_t size) { return 0; }
 static inline void kasan_free_shadow(const struct vm_struct *vm) {}
 
+static inline void kasan_unpoison_slab(const void *ptr) { }
+
 #endif /* CONFIG_KASAN */
 
 #endif /* LINUX_KASAN_H */
diff --git a/include/linux/leds.h b/include/linux/leds.h
index d2b13066e781..e5e7f2e80a54 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -42,15 +42,16 @@ struct led_classdev {
 #define LED_UNREGISTERING	(1 << 1)
 	/* Upper 16 bits reflect control information */
 #define LED_CORE_SUSPENDRESUME	(1 << 16)
-#define LED_BLINK_ONESHOT	(1 << 17)
-#define LED_BLINK_ONESHOT_STOP	(1 << 18)
-#define LED_BLINK_INVERT	(1 << 19)
-#define LED_BLINK_BRIGHTNESS_CHANGE (1 << 20)
-#define LED_BLINK_DISABLE	(1 << 21)
-#define LED_SYSFS_DISABLE	(1 << 22)
-#define LED_DEV_CAP_FLASH	(1 << 23)
-#define LED_HW_PLUGGABLE	(1 << 24)
-#define LED_PANIC_INDICATOR	(1 << 25)
+#define LED_BLINK_SW		(1 << 17)
+#define LED_BLINK_ONESHOT	(1 << 18)
+#define LED_BLINK_ONESHOT_STOP	(1 << 19)
+#define LED_BLINK_INVERT	(1 << 20)
+#define LED_BLINK_BRIGHTNESS_CHANGE (1 << 21)
+#define LED_BLINK_DISABLE	(1 << 22)
+#define LED_SYSFS_DISABLE	(1 << 23)
+#define LED_DEV_CAP_FLASH	(1 << 24)
+#define LED_HW_PLUGGABLE	(1 << 25)
+#define LED_PANIC_INDICATOR	(1 << 26)
 
 	/* Set LED brightness level
 	 * Must not sleep. Use brightness_set_blocking for drivers
@@ -72,8 +73,8 @@ struct led_classdev {
 	 * and if both are zero then a sensible default should be chosen.
 	 * The call should adjust the timings in that case and if it can't
 	 * match the values specified exactly.
-	 * Deactivate blinking again when the brightness is set to a fixed
-	 * value via the brightness_set() callback.
+	 * Deactivate blinking again when the brightness is set to LED_OFF
+	 * via the brightness_set() callback.
 	 */
 	int		(*blink_set)(struct led_classdev *led_cdev,
 				     unsigned long *delay_on,
diff --git a/include/linux/mfd/da9052/da9052.h b/include/linux/mfd/da9052/da9052.h
index c18a4c19d6fc..ce9230af09c2 100644
--- a/include/linux/mfd/da9052/da9052.h
+++ b/include/linux/mfd/da9052/da9052.h
@@ -171,7 +171,7 @@ static inline int da9052_group_read(struct da9052 *da9052, unsigned char reg,
 static inline int da9052_group_write(struct da9052 *da9052, unsigned char reg,
 				      unsigned reg_cnt, unsigned char *val)
 {
-	int ret;
+	int ret = 0;
 	int i;
 
 	for (i = 0; i < reg_cnt; i++) {
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 4dbc1450bbe0..e6f6910278f3 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -466,6 +466,7 @@ enum {
 enum {
 	MLX4_INTERFACE_STATE_UP		= 1 << 0,
 	MLX4_INTERFACE_STATE_DELETION	= 1 << 1,
+	MLX4_INTERFACE_STATE_SHUTDOWN	= 1 << 2,
 };
 
 #define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 46260fdc5305..a041b99fceac 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -469,7 +469,7 @@ struct mlx5_irq_info {
 };
 
 struct mlx5_fc_stats {
-	struct list_head list;
+	struct rb_root counters;
 	struct list_head addlist;
 	/* protect addlist add/splice operations */
 	spinlock_t addlist_lock;
@@ -550,14 +550,10 @@ struct mlx5_priv {
 	struct list_head        ctx_list;
 	spinlock_t              ctx_lock;
 
+	struct mlx5_flow_steering *steering;
 	struct mlx5_eswitch     *eswitch;
 	struct mlx5_core_sriov	sriov;
 	unsigned long		pci_dev_data;
-	struct mlx5_flow_root_namespace *root_ns;
-	struct mlx5_flow_root_namespace *fdb_root_ns;
-	struct mlx5_flow_root_namespace *esw_egress_root_ns;
-	struct mlx5_flow_root_namespace *esw_ingress_root_ns;
-
 	struct mlx5_fc_stats		fc_stats;
 	struct mlx5_rl_table            rl_table;
 };
@@ -578,6 +574,18 @@ enum mlx5_pci_status {
 	MLX5_PCI_STATUS_ENABLED,
 };
 
+struct mlx5_td {
+	struct list_head tirs_list;
+	u32              tdn;
+};
+
+struct mlx5e_resources {
+	struct mlx5_uar            cq_uar;
+	u32                        pdn;
+	struct mlx5_td             td;
+	struct mlx5_core_mkey      mkey;
+};
+
 struct mlx5_core_dev {
 	struct pci_dev	       *pdev;
 	/* sync pci state */
@@ -602,6 +610,7 @@ struct mlx5_core_dev {
 	struct mlx5_profile	*profile;
 	atomic_t		num_qps;
 	u32			issi;
+	struct mlx5e_resources  mlx5e_res;
 #ifdef CONFIG_RFS_ACCEL
 	struct cpu_rmap         *rmap;
 #endif
@@ -645,6 +654,7 @@ struct mlx5_cmd_work_ent {
 	void		       *uout;
 	int			uout_size;
 	mlx5_cmd_cbk_t		callback;
+	struct delayed_work	cb_timeout_work;
 	void		       *context;
 	int			idx;
 	struct completion	done;
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 4b7a107d9c19..e036d6030867 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -54,6 +54,8 @@ static inline void build_leftovers_ft_param(int *priority,
 
 enum mlx5_flow_namespace_type {
 	MLX5_FLOW_NAMESPACE_BYPASS,
+	MLX5_FLOW_NAMESPACE_OFFLOADS,
+	MLX5_FLOW_NAMESPACE_ETHTOOL,
 	MLX5_FLOW_NAMESPACE_KERNEL,
 	MLX5_FLOW_NAMESPACE_LEFTOVERS,
 	MLX5_FLOW_NAMESPACE_ANCHOR,
@@ -67,6 +69,12 @@ struct mlx5_flow_group;
 struct mlx5_flow_rule;
 struct mlx5_flow_namespace;
 
+struct mlx5_flow_spec {
+	u8   match_criteria_enable;
+	u32  match_criteria[MLX5_ST_SZ_DW(fte_match_param)];
+	u32  match_value[MLX5_ST_SZ_DW(fte_match_param)];
+};
+
 struct mlx5_flow_destination {
 	enum mlx5_flow_destination_type	type;
 	union {
@@ -115,9 +123,7 @@ void mlx5_destroy_flow_group(struct mlx5_flow_group *fg);
  */
 struct mlx5_flow_rule *
 mlx5_add_flow_rule(struct mlx5_flow_table *ft,
-		   u8 match_criteria_enable,
-		   u32 *match_criteria,
-		   u32 *match_value,
+		   struct mlx5_flow_spec *spec,
 		   u32 action,
 		   u32 flow_tag,
 		   struct mlx5_flow_destination *dest);
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 152421cc6f44..d671e4e8e7db 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -893,7 +893,10 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         reserved_at_330[0xb];
 	u8         log_max_xrcd[0x5];
 
-	u8         reserved_at_340[0x20];
+	u8         reserved_at_340[0x8];
+	u8         log_max_flow_counter_bulk[0x8];
+	u8         max_flow_counter[0x10];
+
 
 	u8         reserved_at_360[0x3];
 	u8         log_max_rq[0x5];
@@ -980,7 +983,8 @@ struct mlx5_ifc_dest_format_struct_bits {
 };
 
 struct mlx5_ifc_flow_counter_list_bits {
-	u8         reserved_at_0[0x10];
+	u8         clear[0x1];
+	u8         num_of_counters[0xf];
 	u8         flow_counter_id[0x10];
 
 	u8         reserved_at_20[0x20];
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 266320feb160..ab310819ac36 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -172,6 +172,7 @@ enum {
 enum {
 	MLX5_FENCE_MODE_NONE			= 0 << 5,
 	MLX5_FENCE_MODE_INITIATOR_SMALL		= 1 << 5,
+	MLX5_FENCE_MODE_FENCE			= 2 << 5,
 	MLX5_FENCE_MODE_STRONG_ORDERING		= 3 << 5,
 	MLX5_FENCE_MODE_SMALL_AND_FENCE		= 4 << 5,
 };
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5df5feb49575..ece042dfe23c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -602,7 +602,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
 }
 
 void do_set_pte(struct vm_area_struct *vma, unsigned long address,
-		struct page *page, pte_t *pte, bool write, bool anon, bool old);
+		struct page *page, pte_t *pte, bool write, bool anon);
 #endif
 
 /*
diff --git a/include/linux/net.h b/include/linux/net.h
index 9aa49a05fe38..b9f0ff4d489c 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -185,6 +185,7 @@ struct proto_ops {
 	ssize_t 	(*splice_read)(struct socket *sock,  loff_t *ppos,
 				       struct pipe_inode_info *pipe, size_t len, unsigned int flags);
 	int		(*set_peek_off)(struct sock *sk, int val);
+	int		(*peek_len)(struct socket *sock);
 };
 
 #define DECLARE_SOCKADDR(type, dst, src)	\
@@ -251,7 +252,8 @@ do {									\
 	DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt);			\
 	if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT) &&	\
 	    net_ratelimit())						\
-		__dynamic_pr_debug(&descriptor, fmt, ##__VA_ARGS__);	\
+		__dynamic_pr_debug(&descriptor, pr_fmt(fmt),		\
+		                   ##__VA_ARGS__);			\
 } while (0)
 #elif defined(DEBUG)
 #define net_dbg_ratelimited(fmt, ...)				\
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e84d9d23c2d5..49736a31acaa 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1209,8 +1209,10 @@ struct net_device_ops {
 						    netdev_features_t features);
 	int			(*ndo_set_features)(struct net_device *dev,
 						    netdev_features_t features);
-	int			(*ndo_neigh_construct)(struct neighbour *n);
-	void			(*ndo_neigh_destroy)(struct neighbour *n);
+	int			(*ndo_neigh_construct)(struct net_device *dev,
+						       struct neighbour *n);
+	void			(*ndo_neigh_destroy)(struct net_device *dev,
+						     struct neighbour *n);
 
 	int			(*ndo_fdb_add)(struct ndmsg *ndm,
 					       struct nlattr *tb[],
@@ -2237,6 +2239,7 @@ struct netdev_lag_lower_state_info {
 #define NETDEV_PRECHANGEUPPER	0x001A
 #define NETDEV_CHANGELOWERSTATE	0x001B
 #define NETDEV_UDP_TUNNEL_PUSH_INFO	0x001C
+#define NETDEV_CHANGE_TX_QUEUE_LEN	0x001E
 
 int register_netdevice_notifier(struct notifier_block *nb);
 int unregister_netdevice_notifier(struct notifier_block *nb);
@@ -3803,12 +3806,30 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev,
 
 void *netdev_lower_get_next(struct net_device *dev,
 				struct list_head **iter);
+
 #define netdev_for_each_lower_dev(dev, ldev, iter) \
 	for (iter = (dev)->adj_list.lower.next, \
 	     ldev = netdev_lower_get_next(dev, &(iter)); \
 	     ldev; \
 	     ldev = netdev_lower_get_next(dev, &(iter)))
 
+struct net_device *netdev_all_lower_get_next(struct net_device *dev,
+					     struct list_head **iter);
+struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev,
+						 struct list_head **iter);
+
+#define netdev_for_each_all_lower_dev(dev, ldev, iter) \
+	for (iter = (dev)->all_adj_list.lower.next, \
+	     ldev = netdev_all_lower_get_next(dev, &(iter)); \
+	     ldev; \
+	     ldev = netdev_all_lower_get_next(dev, &(iter)))
+
+#define netdev_for_each_all_lower_dev_rcu(dev, ldev, iter) \
+	for (iter = (dev)->all_adj_list.lower.next, \
+	     ldev = netdev_all_lower_get_next_rcu(dev, &(iter)); \
+	     ldev; \
+	     ldev = netdev_all_lower_get_next_rcu(dev, &(iter)))
+
 void *netdev_adjacent_get_private(struct list_head *adj_list);
 void *netdev_lower_get_first_private_rcu(struct net_device *dev);
 struct net_device *netdev_master_upper_dev_get(struct net_device *dev);
@@ -3824,6 +3845,10 @@ void *netdev_lower_dev_get_private(struct net_device *dev,
 				   struct net_device *lower_dev);
 void netdev_lower_state_changed(struct net_device *lower_dev,
 				void *lower_state_info);
+int netdev_default_l2upper_neigh_construct(struct net_device *dev,
+					   struct neighbour *n);
+void netdev_default_l2upper_neigh_destroy(struct net_device *dev,
+					  struct neighbour *n);
 
 /* RSS keys are 40 or 52 bytes long */
 #define NETDEV_RSS_KEY_LEN 52
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index dc4f58a3cdcc..e94e81ab2b58 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -6,6 +6,10 @@
 #include <linux/static_key.h>
 #include <uapi/linux/netfilter/x_tables.h>
 
+/* Test a struct->invflags and a boolean for inequality */
+#define NF_INVF(ptr, flag, boolean)					\
+	((boolean) ^ !!((ptr)->invflags & (flag)))
+
 /**
  * struct xt_action_param - parameters for matches/targets
  *
diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index 2ea517c7c6b9..984b2112c77b 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -115,8 +115,6 @@ extern unsigned int ebt_do_table(struct sk_buff *skb,
 				 const struct nf_hook_state *state,
 				 struct ebt_table *table);
 
-/* Used in the kernel match() functions */
-#define FWINV(bool,invflg) ((bool) ^ !!(info->invflags & invflg))
 /* True if the hook mask denotes that the rule is in a base chain,
  * used in the check() functions */
 #define BASE_CHAIN (par->hook_mask & (1 << NF_BR_NUMHOOKS))
diff --git a/include/linux/of.h b/include/linux/of.h
index c7292e8ea080..74eb28cadbef 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -614,7 +614,7 @@ static inline struct device_node *of_parse_phandle(const struct device_node *np,
 	return NULL;
 }
 
-static inline int of_parse_phandle_with_args(struct device_node *np,
+static inline int of_parse_phandle_with_args(const struct device_node *np,
 					     const char *list_name,
 					     const char *cells_name,
 					     int index,
diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h
index f6e9e85164e8..b969e9443962 100644
--- a/include/linux/of_pci.h
+++ b/include/linux/of_pci.h
@@ -8,7 +8,7 @@ struct pci_dev;
 struct of_phandle_args;
 struct device_node;
 
-#ifdef CONFIG_OF
+#ifdef CONFIG_OF_PCI
 int of_irq_parse_pci(const struct pci_dev *pdev, struct of_phandle_args *out_irq);
 struct device_node *of_pci_find_child_device(struct device_node *parent,
 					     unsigned int devfn);
diff --git a/include/linux/of_reserved_mem.h b/include/linux/of_reserved_mem.h
index ad2f67054372..c201060e0c6d 100644
--- a/include/linux/of_reserved_mem.h
+++ b/include/linux/of_reserved_mem.h
@@ -31,6 +31,13 @@ typedef int (*reservedmem_of_init_fn)(struct reserved_mem *rmem);
 int of_reserved_mem_device_init(struct device *dev);
 void of_reserved_mem_device_release(struct device *dev);
 
+int early_init_dt_alloc_reserved_memory_arch(phys_addr_t size,
+					     phys_addr_t align,
+					     phys_addr_t start,
+					     phys_addr_t end,
+					     bool nomap,
+					     phys_addr_t *res_base);
+
 void fdt_init_reserved_mem(void);
 void fdt_reserved_mem_save_node(unsigned long node, const char *uname,
 			       phys_addr_t base, phys_addr_t size);
diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h
index 562a65e8bcc0..2052011bf9fb 100644
--- a/include/linux/ptr_ring.h
+++ b/include/linux/ptr_ring.h
@@ -102,7 +102,7 @@ static inline bool ptr_ring_full_bh(struct ptr_ring *r)
  */
 static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr)
 {
-	if (r->queue[r->producer])
+	if (unlikely(!r->size) || r->queue[r->producer])
 		return -ENOSPC;
 
 	r->queue[r->producer++] = ptr;
@@ -164,7 +164,9 @@ static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr)
  */
 static inline void *__ptr_ring_peek(struct ptr_ring *r)
 {
-	return r->queue[r->consumer];
+	if (likely(r->size))
+		return r->queue[r->consumer];
+	return NULL;
 }
 
 /* Note: callers invoking this in a loop must use a compiler barrier,
@@ -347,20 +349,14 @@ static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp)
 	return 0;
 }
 
-static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp,
-				  void (*destroy)(void *))
+static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue,
+					   int size, gfp_t gfp,
+					   void (*destroy)(void *))
 {
-	unsigned long flags;
 	int producer = 0;
-	void **queue = __ptr_ring_init_queue_alloc(size, gfp);
 	void **old;
 	void *ptr;
 
-	if (!queue)
-		return -ENOMEM;
-
-	spin_lock_irqsave(&(r)->producer_lock, flags);
-
 	while ((ptr = ptr_ring_consume(r)))
 		if (producer < size)
 			queue[producer++] = ptr;
@@ -373,6 +369,23 @@ static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp,
 	old = r->queue;
 	r->queue = queue;
 
+	return old;
+}
+
+static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp,
+				  void (*destroy)(void *))
+{
+	unsigned long flags;
+	void **queue = __ptr_ring_init_queue_alloc(size, gfp);
+	void **old;
+
+	if (!queue)
+		return -ENOMEM;
+
+	spin_lock_irqsave(&(r)->producer_lock, flags);
+
+	old = __ptr_ring_swap_queue(r, queue, size, gfp, destroy);
+
 	spin_unlock_irqrestore(&(r)->producer_lock, flags);
 
 	kfree(old);
@@ -380,6 +393,48 @@ static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp,
 	return 0;
 }
 
+static inline int ptr_ring_resize_multiple(struct ptr_ring **rings, int nrings,
+					   int size,
+					   gfp_t gfp, void (*destroy)(void *))
+{
+	unsigned long flags;
+	void ***queues;
+	int i;
+
+	queues = kmalloc(nrings * sizeof *queues, gfp);
+	if (!queues)
+		goto noqueues;
+
+	for (i = 0; i < nrings; ++i) {
+		queues[i] = __ptr_ring_init_queue_alloc(size, gfp);
+		if (!queues[i])
+			goto nomem;
+	}
+
+	for (i = 0; i < nrings; ++i) {
+		spin_lock_irqsave(&(rings[i])->producer_lock, flags);
+		queues[i] = __ptr_ring_swap_queue(rings[i], queues[i],
+						  size, gfp, destroy);
+		spin_unlock_irqrestore(&(rings[i])->producer_lock, flags);
+	}
+
+	for (i = 0; i < nrings; ++i)
+		kfree(queues[i]);
+
+	kfree(queues);
+
+	return 0;
+
+nomem:
+	while (--i >= 0)
+		kfree(queues[i]);
+
+	kfree(queues);
+
+noqueues:
+	return -ENOMEM;
+}
+
 static inline void ptr_ring_cleanup(struct ptr_ring *r, void (*destroy)(void *))
 {
 	void *ptr;
diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index 17018f3c066e..c038ae36b10e 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -235,6 +235,9 @@ static inline int pwm_config(struct pwm_device *pwm, int duty_ns,
 	if (!pwm)
 		return -EINVAL;
 
+	if (duty_ns < 0 || period_ns < 0)
+		return -EINVAL;
+
 	pwm_get_state(pwm, &state);
 	if (state.duty_cycle == duty_ns && state.period == period_ns)
 		return 0;
@@ -461,6 +464,8 @@ static inline bool pwm_can_sleep(struct pwm_device *pwm)
 
 static inline void pwm_apply_args(struct pwm_device *pwm)
 {
+	struct pwm_state state = { };
+
 	/*
 	 * PWM users calling pwm_apply_args() expect to have a fresh config
 	 * where the polarity and period are set according to pwm_args info.
@@ -473,18 +478,20 @@ static inline void pwm_apply_args(struct pwm_device *pwm)
 	 * at startup (even if they are actually enabled), thus authorizing
 	 * polarity setting.
 	 *
-	 * Instead of setting ->enabled to false, we call pwm_disable()
-	 * before pwm_set_polarity() to ensure that everything is configured
-	 * as expected, and the PWM is really disabled when the user request
-	 * it.
+	 * To fulfill this requirement, we apply a new state which disables
+	 * the PWM device and set the reference period and polarity config.
 	 *
 	 * Note that PWM users requiring a smooth handover between the
 	 * bootloader and the kernel (like critical regulators controlled by
 	 * PWM devices) will have to switch to the atomic API and avoid calling
 	 * pwm_apply_args().
 	 */
-	pwm_disable(pwm);
-	pwm_set_polarity(pwm, pwm->args.polarity);
+
+	state.enabled = false;
+	state.polarity = pwm->args.polarity;
+	state.period = pwm->args.period;
+
+	pwm_apply_state(pwm, &state);
 }
 
 struct pwm_lookup {
diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h
index 71d523b4bc54..4475a9d8ae15 100644
--- a/include/linux/qed/qed_eth_if.h
+++ b/include/linux/qed/qed_eth_if.h
@@ -49,6 +49,7 @@ struct qed_start_vport_params {
 	bool drop_ttl0;
 	u8 vport_id;
 	u16 mtu;
+	bool clear_stats;
 };
 
 struct qed_stop_rxq_params {
diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index b6900099ea81..e585018498d5 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -76,6 +76,8 @@ extern struct rb_node *rb_next_postorder(const struct rb_node *);
 /* Fast replacement of a single node without remove/rebalance/add/rebalance */
 extern void rb_replace_node(struct rb_node *victim, struct rb_node *new,
 			    struct rb_root *root);
+extern void rb_replace_node_rcu(struct rb_node *victim, struct rb_node *new,
+				struct rb_root *root);
 
 static inline void rb_link_node(struct rb_node *node, struct rb_node *parent,
 				struct rb_node **rb_link)
diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h
index 14d7b831b63a..d076183e49be 100644
--- a/include/linux/rbtree_augmented.h
+++ b/include/linux/rbtree_augmented.h
@@ -130,6 +130,19 @@ __rb_change_child(struct rb_node *old, struct rb_node *new,
 		WRITE_ONCE(root->rb_node, new);
 }
 
+static inline void
+__rb_change_child_rcu(struct rb_node *old, struct rb_node *new,
+		      struct rb_node *parent, struct rb_root *root)
+{
+	if (parent) {
+		if (parent->rb_left == old)
+			rcu_assign_pointer(parent->rb_left, new);
+		else
+			rcu_assign_pointer(parent->rb_right, new);
+	} else
+		rcu_assign_pointer(root->rb_node, new);
+}
+
 extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root,
 	void (*augment_rotate)(struct rb_node *old, struct rb_node *new));
 
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 5f1533e3d032..85830e6c797b 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -611,6 +611,12 @@ static inline void rcu_preempt_sleep_check(void)
 	rcu_dereference_sparse(p, space); \
 	((typeof(*p) __force __kernel *)(p)); \
 })
+#define rcu_dereference_raw(p) \
+({ \
+	/* Dependency order vs. p above. */ \
+	typeof(p) ________p1 = lockless_dereference(p); \
+	((typeof(*p) __force __kernel *)(________p1)); \
+})
 
 /**
  * RCU_INITIALIZER() - statically initialize an RCU-protected global variable
@@ -729,8 +735,6 @@ static inline void rcu_preempt_sleep_check(void)
 	__rcu_dereference_check((p), (c) || rcu_read_lock_sched_held(), \
 				__rcu)
 
-#define rcu_dereference_raw(p) rcu_dereference_check(p, 1) /*@@@ needed? @@@*/
-
 /*
  * The tracing infrastructure traces RCU (we want that), but unfortunately
  * some of the RCU checks causes tracing to lock up the system.
diff --git a/include/linux/reset.h b/include/linux/reset.h
index ec0306ce7b92..45a4abeb6acb 100644
--- a/include/linux/reset.h
+++ b/include/linux/reset.h
@@ -84,8 +84,8 @@ static inline struct reset_control *__devm_reset_control_get(
 #endif /* CONFIG_RESET_CONTROLLER */
 
 /**
- * reset_control_get - Lookup and obtain an exclusive reference to a
- *                     reset controller.
+ * reset_control_get_exclusive - Lookup and obtain an exclusive reference
+ *                               to a reset controller.
  * @dev: device to be reset by the controller
  * @id: reset line name
  *
@@ -98,8 +98,8 @@ static inline struct reset_control *__devm_reset_control_get(
  *
  * Use of id names is optional.
  */
-static inline struct reset_control *__must_check reset_control_get(
-					struct device *dev, const char *id)
+static inline struct reset_control *
+__must_check reset_control_get_exclusive(struct device *dev, const char *id)
 {
 #ifndef CONFIG_RESET_CONTROLLER
 	WARN_ON(1);
@@ -107,12 +107,6 @@ static inline struct reset_control *__must_check reset_control_get(
 	return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 0);
 }
 
-static inline struct reset_control *reset_control_get_optional(
-					struct device *dev, const char *id)
-{
-	return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 0);
-}
-
 /**
  * reset_control_get_shared - Lookup and obtain a shared reference to a
  *                            reset controller.
@@ -141,9 +135,21 @@ static inline struct reset_control *reset_control_get_shared(
 	return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 1);
 }
 
+static inline struct reset_control *reset_control_get_optional_exclusive(
+					struct device *dev, const char *id)
+{
+	return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 0);
+}
+
+static inline struct reset_control *reset_control_get_optional_shared(
+					struct device *dev, const char *id)
+{
+	return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 1);
+}
+
 /**
- * of_reset_control_get - Lookup and obtain an exclusive reference to a
- *                        reset controller.
+ * of_reset_control_get_exclusive - Lookup and obtain an exclusive reference
+ *                                  to a reset controller.
  * @node: device to be reset by the controller
  * @id: reset line name
  *
@@ -151,15 +157,41 @@ static inline struct reset_control *reset_control_get_shared(
  *
  * Use of id names is optional.
  */
-static inline struct reset_control *of_reset_control_get(
+static inline struct reset_control *of_reset_control_get_exclusive(
 				struct device_node *node, const char *id)
 {
 	return __of_reset_control_get(node, id, 0, 0);
 }
 
 /**
- * of_reset_control_get_by_index - Lookup and obtain an exclusive reference to
- *                                 a reset controller by index.
+ * of_reset_control_get_shared - Lookup and obtain an shared reference
+ *                               to a reset controller.
+ * @node: device to be reset by the controller
+ * @id: reset line name
+ *
+ * When a reset-control is shared, the behavior of reset_control_assert /
+ * deassert is changed, the reset-core will keep track of a deassert_count
+ * and only (re-)assert the reset after reset_control_assert has been called
+ * as many times as reset_control_deassert was called. Also see the remark
+ * about shared reset-controls in the reset_control_assert docs.
+ *
+ * Calling reset_control_assert without first calling reset_control_deassert
+ * is not allowed on a shared reset control. Calling reset_control_reset is
+ * also not allowed on a shared reset control.
+ * Returns a struct reset_control or IS_ERR() condition containing errno.
+ *
+ * Use of id names is optional.
+ */
+static inline struct reset_control *of_reset_control_get_shared(
+				struct device_node *node, const char *id)
+{
+	return __of_reset_control_get(node, id, 0, 1);
+}
+
+/**
+ * of_reset_control_get_exclusive_by_index - Lookup and obtain an exclusive
+ *                                           reference to a reset controller
+ *                                           by index.
  * @node: device to be reset by the controller
  * @index: index of the reset controller
  *
@@ -167,49 +199,60 @@ static inline struct reset_control *of_reset_control_get(
  * in whatever order. Returns a struct reset_control or IS_ERR() condition
  * containing errno.
  */
-static inline struct reset_control *of_reset_control_get_by_index(
+static inline struct reset_control *of_reset_control_get_exclusive_by_index(
 					struct device_node *node, int index)
 {
 	return __of_reset_control_get(node, NULL, index, 0);
 }
 
 /**
- * devm_reset_control_get - resource managed reset_control_get()
- * @dev: device to be reset by the controller
- * @id: reset line name
+ * of_reset_control_get_shared_by_index - Lookup and obtain an shared
+ *                                        reference to a reset controller
+ *                                        by index.
+ * @node: device to be reset by the controller
+ * @index: index of the reset controller
+ *
+ * When a reset-control is shared, the behavior of reset_control_assert /
+ * deassert is changed, the reset-core will keep track of a deassert_count
+ * and only (re-)assert the reset after reset_control_assert has been called
+ * as many times as reset_control_deassert was called. Also see the remark
+ * about shared reset-controls in the reset_control_assert docs.
+ *
+ * Calling reset_control_assert without first calling reset_control_deassert
+ * is not allowed on a shared reset control. Calling reset_control_reset is
+ * also not allowed on a shared reset control.
+ * Returns a struct reset_control or IS_ERR() condition containing errno.
  *
- * Managed reset_control_get(). For reset controllers returned from this
- * function, reset_control_put() is called automatically on driver detach.
- * See reset_control_get() for more information.
+ * This is to be used to perform a list of resets for a device or power domain
+ * in whatever order. Returns a struct reset_control or IS_ERR() condition
+ * containing errno.
  */
-static inline struct reset_control *__must_check devm_reset_control_get(
-					struct device *dev, const char *id)
-{
-#ifndef CONFIG_RESET_CONTROLLER
-	WARN_ON(1);
-#endif
-	return __devm_reset_control_get(dev, id, 0, 0);
-}
-
-static inline struct reset_control *devm_reset_control_get_optional(
-					struct device *dev, const char *id)
+static inline struct reset_control *of_reset_control_get_shared_by_index(
+					struct device_node *node, int index)
 {
-	return __devm_reset_control_get(dev, id, 0, 0);
+	return __of_reset_control_get(node, NULL, index, 1);
 }
 
 /**
- * devm_reset_control_get_by_index - resource managed reset_control_get
+ * devm_reset_control_get_exclusive - resource managed
+ *                                    reset_control_get_exclusive()
  * @dev: device to be reset by the controller
- * @index: index of the reset controller
+ * @id: reset line name
  *
- * Managed reset_control_get(). For reset controllers returned from this
- * function, reset_control_put() is called automatically on driver detach.
- * See reset_control_get() for more information.
+ * Managed reset_control_get_exclusive(). For reset controllers returned
+ * from this function, reset_control_put() is called automatically on driver
+ * detach.
+ *
+ * See reset_control_get_exclusive() for more information.
  */
-static inline struct reset_control *devm_reset_control_get_by_index(
-					struct device *dev, int index)
+static inline struct reset_control *
+__must_check devm_reset_control_get_exclusive(struct device *dev,
+					      const char *id)
 {
-	return __devm_reset_control_get(dev, NULL, index, 0);
+#ifndef CONFIG_RESET_CONTROLLER
+	WARN_ON(1);
+#endif
+	return __devm_reset_control_get(dev, id, 0, 0);
 }
 
 /**
@@ -227,6 +270,36 @@ static inline struct reset_control *devm_reset_control_get_shared(
 	return __devm_reset_control_get(dev, id, 0, 1);
 }
 
+static inline struct reset_control *devm_reset_control_get_optional_exclusive(
+					struct device *dev, const char *id)
+{
+	return __devm_reset_control_get(dev, id, 0, 0);
+}
+
+static inline struct reset_control *devm_reset_control_get_optional_shared(
+					struct device *dev, const char *id)
+{
+	return __devm_reset_control_get(dev, id, 0, 1);
+}
+
+/**
+ * devm_reset_control_get_exclusive_by_index - resource managed
+ *                                             reset_control_get_exclusive()
+ * @dev: device to be reset by the controller
+ * @index: index of the reset controller
+ *
+ * Managed reset_control_get_exclusive(). For reset controllers returned from
+ * this function, reset_control_put() is called automatically on driver
+ * detach.
+ *
+ * See reset_control_get_exclusive() for more information.
+ */
+static inline struct reset_control *
+devm_reset_control_get_exclusive_by_index(struct device *dev, int index)
+{
+	return __devm_reset_control_get(dev, NULL, index, 0);
+}
+
 /**
  * devm_reset_control_get_shared_by_index - resource managed
  * reset_control_get_shared
@@ -237,10 +310,60 @@ static inline struct reset_control *devm_reset_control_get_shared(
  * this function, reset_control_put() is called automatically on driver detach.
  * See reset_control_get_shared() for more information.
  */
-static inline struct reset_control *devm_reset_control_get_shared_by_index(
-					struct device *dev, int index)
+static inline struct reset_control *
+devm_reset_control_get_shared_by_index(struct device *dev, int index)
 {
 	return __devm_reset_control_get(dev, NULL, index, 1);
 }
 
+/*
+ * TEMPORARY calls to use during transition:
+ *
+ *   of_reset_control_get() => of_reset_control_get_exclusive()
+ *
+ * These inline function calls will be removed once all consumers
+ * have been moved over to the new explicit API.
+ */
+static inline struct reset_control *reset_control_get(
+				struct device *dev, const char *id)
+{
+	return reset_control_get_exclusive(dev, id);
+}
+
+static inline struct reset_control *reset_control_get_optional(
+					struct device *dev, const char *id)
+{
+	return reset_control_get_optional_exclusive(dev, id);
+}
+
+static inline struct reset_control *of_reset_control_get(
+				struct device_node *node, const char *id)
+{
+	return of_reset_control_get_exclusive(node, id);
+}
+
+static inline struct reset_control *of_reset_control_get_by_index(
+				struct device_node *node, int index)
+{
+	return of_reset_control_get_exclusive_by_index(node, index);
+}
+
+static inline struct reset_control *devm_reset_control_get(
+				struct device *dev, const char *id)
+{
+	return devm_reset_control_get_exclusive(dev, id);
+}
+
+static inline struct reset_control *devm_reset_control_get_optional(
+				struct device *dev, const char *id)
+{
+	return devm_reset_control_get_optional_exclusive(dev, id);
+
+}
+
+static inline struct reset_control *devm_reset_control_get_by_index(
+				struct device *dev, int index)
+{
+	return devm_reset_control_get_exclusive_by_index(dev, index);
+}
 #endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6e42ada26345..253538f29ade 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -3007,7 +3007,7 @@ static inline int object_is_on_stack(void *obj)
 	return (obj >= stack) && (obj < (stack + THREAD_SIZE));
 }
 
-extern void thread_info_cache_init(void);
+extern void thread_stack_cache_init(void);
 
 #ifdef CONFIG_DEBUG_STACK_USAGE
 static inline unsigned long stack_not_used(struct task_struct *p)
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 7973a821ac58..ead97654c4e9 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -277,7 +277,10 @@ static inline void raw_write_seqcount_barrier(seqcount_t *s)
 
 static inline int raw_read_seqcount_latch(seqcount_t *s)
 {
-	return lockless_dereference(s)->sequence;
+	int seq = READ_ONCE(s->sequence);
+	/* Pairs with the first smp_wmb() in raw_write_seqcount_latch() */
+	smp_read_barrier_depends();
+	return seq;
 }
 
 /**
@@ -331,7 +334,7 @@ static inline int raw_read_seqcount_latch(seqcount_t *s)
  *	unsigned seq, idx;
  *
  *	do {
- *		seq = lockless_dereference(latch)->seq;
+ *		seq = raw_read_seqcount_latch(&latch->seq);
  *
  *		idx = seq & 0x01;
  *		entry = data_query(latch->data[idx], ...);
diff --git a/include/linux/skb_array.h b/include/linux/skb_array.h
index 678bfbf78ac4..f4dfade428f0 100644
--- a/include/linux/skb_array.h
+++ b/include/linux/skb_array.h
@@ -151,16 +151,25 @@ static inline int skb_array_init(struct skb_array *a, int size, gfp_t gfp)
 	return ptr_ring_init(&a->ring, size, gfp);
 }
 
-void __skb_array_destroy_skb(void *ptr)
+static void __skb_array_destroy_skb(void *ptr)
 {
 	kfree_skb(ptr);
 }
 
-int skb_array_resize(struct skb_array *a, int size, gfp_t gfp)
+static inline int skb_array_resize(struct skb_array *a, int size, gfp_t gfp)
 {
 	return ptr_ring_resize(&a->ring, size, gfp, __skb_array_destroy_skb);
 }
 
+static inline int skb_array_resize_multiple(struct skb_array **rings,
+					    int nrings, int size, gfp_t gfp)
+{
+	BUILD_BUG_ON(offsetof(struct skb_array, ring));
+	return ptr_ring_resize_multiple((struct ptr_ring **)rings,
+					nrings, size, gfp,
+					__skb_array_destroy_skb);
+}
+
 static inline void skb_array_cleanup(struct skb_array *a)
 {
 	ptr_ring_cleanup(&a->ring, __skb_array_destroy_skb);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index dc0fca747c5e..6f0b3e0adc73 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -37,6 +37,7 @@
 #include <net/flow_dissector.h>
 #include <linux/splice.h>
 #include <linux/in6.h>
+#include <linux/if_packet.h>
 #include <net/flow.h>
 
 /* The interface for checksum offload between the stack and networking drivers
@@ -881,6 +882,15 @@ static inline struct rtable *skb_rtable(const struct sk_buff *skb)
 	return (struct rtable *)skb_dst(skb);
 }
 
+/* For mangling skb->pkt_type from user space side from applications
+ * such as nft, tc, etc, we only allow a conservative subset of
+ * possible pkt_types to be set.
+*/
+static inline bool skb_pkt_type_ok(u32 ptype)
+{
+	return ptype <= PACKET_OTHERHOST;
+}
+
 void kfree_skb(struct sk_buff *skb);
 void kfree_skb_list(struct sk_buff *segs);
 void skb_tx_error(struct sk_buff *skb);
@@ -1069,6 +1079,7 @@ __skb_set_sw_hash(struct sk_buff *skb, __u32 hash, bool is_l4)
 }
 
 void __skb_get_hash(struct sk_buff *skb);
+u32 __skb_get_hash_symmetric(struct sk_buff *skb);
 u32 skb_get_poff(const struct sk_buff *skb);
 u32 __skb_get_poff(const struct sk_buff *skb, void *data,
 		   const struct flow_keys *keys, int hlen);
@@ -2877,6 +2888,25 @@ static inline void skb_postpush_rcsum(struct sk_buff *skb,
 }
 
 /**
+ *	skb_push_rcsum - push skb and update receive checksum
+ *	@skb: buffer to update
+ *	@len: length of data pulled
+ *
+ *	This function performs an skb_push on the packet and updates
+ *	the CHECKSUM_COMPLETE checksum.  It should be used on
+ *	receive path processing instead of skb_push unless you know
+ *	that the checksum difference is zero (e.g., a valid IP header)
+ *	or you are setting ip_summed to CHECKSUM_NONE.
+ */
+static inline unsigned char *skb_push_rcsum(struct sk_buff *skb,
+					    unsigned int len)
+{
+	skb_push(skb, len);
+	skb_postpush_rcsum(skb, skb->data, len);
+	return skb->data;
+}
+
+/**
  *	pskb_trim_rcsum - trim received skb and update checksum
  *	@skb: buffer to trim
  *	@len: new length
diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h
index 4018b48f2b3b..a0596ca0e80a 100644
--- a/include/linux/sock_diag.h
+++ b/include/linux/sock_diag.h
@@ -36,6 +36,9 @@ enum sknetlink_groups sock_diag_destroy_group(const struct sock *sk)
 {
 	switch (sk->sk_family) {
 	case AF_INET:
+		if (sk->sk_type == SOCK_RAW)
+			return SKNLGRP_NONE;
+
 		switch (sk->sk_protocol) {
 		case IPPROTO_TCP:
 			return SKNLGRP_INET_TCP_DESTROY;
@@ -45,6 +48,9 @@ enum sknetlink_groups sock_diag_destroy_group(const struct sock *sk)
 			return SKNLGRP_NONE;
 		}
 	case AF_INET6:
+		if (sk->sk_type == SOCK_RAW)
+			return SKNLGRP_NONE;
+
 		switch (sk->sk_protocol) {
 		case IPPROTO_TCP:
 			return SKNLGRP_INET6_TCP_DESTROY;
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 19c659d1c0f8..b6810c92b8bb 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -137,8 +137,6 @@ struct rpc_create_args {
 #define RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT	(1UL << 9)
 
 struct rpc_clnt *rpc_create(struct rpc_create_args *args);
-struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
-					struct rpc_xprt *xprt);
 struct rpc_clnt	*rpc_bind_new_program(struct rpc_clnt *,
 				const struct rpc_program *, u32);
 struct rpc_clnt *rpc_clone_client(struct rpc_clnt *);
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index b7dabc4baafd..79ba50856707 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -84,6 +84,7 @@ struct svc_xprt {
 
 	struct net		*xpt_net;
 	struct rpc_xprt		*xpt_bc_xprt;	/* NFSv4.1 backchannel */
+	struct rpc_xprt_switch	*xpt_bc_xps;	/* NFSv4.1 backchannel */
 };
 
 static inline void unregister_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u)
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 5aa3834619a8..5e3e1b63dbb3 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -297,6 +297,7 @@ struct xprt_create {
 	size_t			addrlen;
 	const char		*servername;
 	struct svc_xprt		*bc_xprt;	/* NFSv4.1 backchannel */
+	struct rpc_xprt_switch	*bc_xps;
 	unsigned int		flags;
 };
 
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index e45abe7db9a6..ee517bef0db0 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -335,6 +335,8 @@ struct thermal_genl_event {
  * @get_trend: a pointer to a function that reads the sensor temperature trend.
  * @set_emul_temp: a pointer to a function that sets sensor emulated
  *		   temperature.
+ * @set_trip_temp: a pointer to a function that sets the trip temperature on
+ *		   hardware.
  */
 struct thermal_zone_of_device_ops {
 	int (*get_temp)(void *, int *);
diff --git a/include/linux/usb/ehci_def.h b/include/linux/usb/ehci_def.h
index 966889a20ea3..e479033bd782 100644
--- a/include/linux/usb/ehci_def.h
+++ b/include/linux/usb/ehci_def.h
@@ -180,11 +180,11 @@ struct ehci_regs {
  * PORTSCx
  */
 	/* HOSTPC: offset 0x84 */
-	u32		hostpc[1];	/* HOSTPC extension */
+	u32		hostpc[0];	/* HOSTPC extension */
 #define HOSTPC_PHCD	(1<<22)		/* Phy clock disable */
 #define HOSTPC_PSPD	(3<<25)		/* Port speed detection */
 
-	u32		reserved5[16];
+	u32		reserved5[17];
 
 	/* USBMODE_EX: offset 0xc8 */
 	u32		usbmode_ex;	/* USB Device mode extension */
diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index 457651bf45b0..fefe8b06a63d 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -1034,6 +1034,8 @@ static inline int usb_gadget_activate(struct usb_gadget *gadget)
  * @udc_name: A name of UDC this driver should be bound to. If udc_name is NULL,
  *	this driver will be bound to any available UDC.
  * @pending: UDC core private data used for deferred probe of this driver.
+ * @match_existing_only: If udc is not found, return an error and don't add this
+ *      gadget driver to list of pending driver
  *
  * Devices are disabled till a gadget driver successfully bind()s, which
  * means the driver will handle setup() requests needed to enumerate (and
@@ -1097,6 +1099,7 @@ struct usb_gadget_driver {
 
 	char			*udc_name;
 	struct list_head	pending;
+	unsigned                match_existing_only:1;
 };
 
 
diff --git a/include/linux/usb/musb.h b/include/linux/usb/musb.h
index 0b3da40a525e..d315c8907869 100644
--- a/include/linux/usb/musb.h
+++ b/include/linux/usb/musb.h
@@ -142,10 +142,11 @@ enum musb_vbus_id_status {
 };
 
 #if IS_ENABLED(CONFIG_USB_MUSB_HDRC)
-void musb_mailbox(enum musb_vbus_id_status status);
+int musb_mailbox(enum musb_vbus_id_status status);
 #else
-static inline void musb_mailbox(enum musb_vbus_id_status status)
+static inline int musb_mailbox(enum musb_vbus_id_status status)
 {
+	return 0;
 }
 #endif
 
diff --git a/include/media/v4l2-mc.h b/include/media/v4l2-mc.h
index 98a938aabdfb..7a8d6037a4bb 100644
--- a/include/media/v4l2-mc.h
+++ b/include/media/v4l2-mc.h
@@ -1,7 +1,7 @@
 /*
  * v4l2-mc.h - Media Controller V4L2 types and prototypes
  *
- * Copyright (C) 2016 Mauro Carvalho Chehab <mchehab@osg.samsung.com>
+ * Copyright (C) 2016 Mauro Carvalho Chehab <mchehab@kernel.org>
  * Copyright (C) 2006-2010 Nokia Corporation
  * Copyright (c) 2016 Intel Corporation.
  *
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index eefcf3e96421..003b25283407 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -65,7 +65,7 @@
 #define HCI_I2C		8
 
 /* HCI controller types */
-#define HCI_BREDR	0x00
+#define HCI_PRIMARY	0x00
 #define HCI_AMP		0x01
 
 /* First BR/EDR Controller shall have ID = 0 */
@@ -445,6 +445,7 @@ enum {
 /* ---- HCI Error Codes ---- */
 #define HCI_ERROR_UNKNOWN_CONN_ID	0x02
 #define HCI_ERROR_AUTH_FAILURE		0x05
+#define HCI_ERROR_PIN_OR_KEY_MISSING	0x06
 #define HCI_ERROR_MEMORY_EXCEEDED	0x07
 #define HCI_ERROR_CONNECTION_TIMEOUT	0x08
 #define HCI_ERROR_REJ_LIMITED_RESOURCES	0x0d
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index dc71473462ac..77d7fe115a0d 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -654,6 +654,7 @@ enum {
 	HCI_CONN_PARAM_REMOVAL_PEND,
 	HCI_CONN_NEW_LINK_KEY,
 	HCI_CONN_SCANNING,
+	HCI_CONN_AUTH_FAILURE,
 };
 
 static inline bool hci_conn_ssp_enabled(struct hci_conn *conn)
diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index ea73e0826aa7..7647964b1efa 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -645,6 +645,7 @@ struct mgmt_ev_device_connected {
 #define MGMT_DEV_DISCONN_TIMEOUT	0x01
 #define MGMT_DEV_DISCONN_LOCAL_HOST	0x02
 #define MGMT_DEV_DISCONN_REMOTE		0x03
+#define MGMT_DEV_DISCONN_AUTH_FAILURE	0x04
 
 #define MGMT_EV_DEVICE_DISCONNECTED	0x000C
 struct mgmt_ev_device_disconnected {
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 791800ddd6d9..6360c259da6d 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -34,6 +34,9 @@
 
 #define BOND_DEFAULT_MIIMON	100
 
+#ifndef __long_aligned
+#define __long_aligned __attribute__((aligned((sizeof(long)))))
+#endif
 /*
  * Less bad way to call ioctl from within the kernel; this needs to be
  * done some other way to get the call out of interrupt context.
@@ -138,7 +141,9 @@ struct bond_params {
 	struct reciprocal_value reciprocal_packets_per_slave;
 	u16 ad_actor_sys_prio;
 	u16 ad_user_port_key;
-	u8 ad_actor_system[ETH_ALEN];
+
+	/* 2 bytes of padding : see ether_addr_equal_64bits() */
+	u8 ad_actor_system[ETH_ALEN + 2];
 };
 
 struct bond_parm_tbl {
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 7bbb00d8b2cd..9c23f4d33e06 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -330,6 +330,9 @@ struct ieee80211_supported_band {
  * in a separate chapter.
  */
 
+#define VHT_MUMIMO_GROUPS_DATA_LEN (WLAN_MEMBERSHIP_LEN +\
+				    WLAN_USER_POSITION_LEN)
+
 /**
  * struct vif_params - describes virtual interface parameters
  * @use_4addr: use 4-address frames
@@ -339,10 +342,13 @@ struct ieee80211_supported_band {
  *	This feature is only fully supported by drivers that enable the
  *	%NL80211_FEATURE_MAC_ON_CREATE flag.  Others may support creating
  **	only p2p devices with specified MAC.
+ * @vht_mumimo_groups: MU-MIMO groupID. used for monitoring only
+ *	 packets belonging to that MU-MIMO groupID.
  */
 struct vif_params {
-       int use_4addr;
-       u8 macaddr[ETH_ALEN];
+	int use_4addr;
+	u8 macaddr[ETH_ALEN];
+	u8 vht_mumimo_groups[VHT_MUMIMO_GROUPS_DATA_LEN];
 };
 
 /**
@@ -774,6 +780,7 @@ enum station_parameters_apply_mask {
  *	(bitmask of BIT(NL80211_STA_FLAG_...))
  * @listen_interval: listen interval or -1 for no change
  * @aid: AID or zero for no change
+ * @peer_aid: mesh peer AID or zero for no change
  * @plink_action: plink action to take
  * @plink_state: set the peer link state for a station
  * @ht_capa: HT capabilities of station
@@ -805,6 +812,7 @@ struct station_parameters {
 	u32 sta_modify_mask;
 	int listen_interval;
 	u16 aid;
+	u16 peer_aid;
 	u8 supported_rates_len;
 	u8 plink_action;
 	u8 plink_state;
@@ -1418,6 +1426,21 @@ struct cfg80211_ssid {
 };
 
 /**
+ * struct cfg80211_scan_info - information about completed scan
+ * @scan_start_tsf: scan start time in terms of the TSF of the BSS that the
+ *	wireless device that requested the scan is connected to. If this
+ *	information is not available, this field is left zero.
+ * @tsf_bssid: the BSSID according to which %scan_start_tsf is set.
+ * @aborted: set to true if the scan was aborted for any reason,
+ *	userspace will be notified of that
+ */
+struct cfg80211_scan_info {
+	u64 scan_start_tsf;
+	u8 tsf_bssid[ETH_ALEN] __aligned(2);
+	bool aborted;
+};
+
+/**
  * struct cfg80211_scan_request - scan request description
  *
  * @ssids: SSIDs to scan for (active scan only)
@@ -1427,12 +1450,17 @@ struct cfg80211_ssid {
  * @scan_width: channel width for scanning
  * @ie: optional information element(s) to add into Probe Request or %NULL
  * @ie_len: length of ie in octets
+ * @duration: how long to listen on each channel, in TUs. If
+ *	%duration_mandatory is not set, this is the maximum dwell time and
+ *	the actual dwell time may be shorter.
+ * @duration_mandatory: if set, the scan duration must be as specified by the
+ *	%duration field.
  * @flags: bit field of flags controlling operation
  * @rates: bitmap of rates to advertise for each band
  * @wiphy: the wiphy this was for
  * @scan_start: time (in jiffies) when the scan started
  * @wdev: the wireless device to scan for
- * @aborted: (internal) scan request was notified as aborted
+ * @info: (internal) information about completed scan
  * @notified: (internal) scan request was notified as done or aborted
  * @no_cck: used to send probe requests at non CCK rate in 2GHz band
  * @mac_addr: MAC address used with randomisation
@@ -1448,6 +1476,8 @@ struct cfg80211_scan_request {
 	enum nl80211_bss_scan_width scan_width;
 	const u8 *ie;
 	size_t ie_len;
+	u16 duration;
+	bool duration_mandatory;
 	u32 flags;
 
 	u32 rates[NUM_NL80211_BANDS];
@@ -1461,7 +1491,8 @@ struct cfg80211_scan_request {
 	/* internal */
 	struct wiphy *wiphy;
 	unsigned long scan_start;
-	bool aborted, notified;
+	struct cfg80211_scan_info info;
+	bool notified;
 	bool no_cck;
 
 	/* keep last */
@@ -1594,12 +1625,19 @@ enum cfg80211_signal_type {
  *	buffered on the device) and be accurate to about 10ms.
  *	If the frame isn't buffered, just passing the return value of
  *	ktime_get_boot_ns() is likely appropriate.
+ * @parent_tsf: the time at the start of reception of the first octet of the
+ *	timestamp field of the frame. The time is the TSF of the BSS specified
+ *	by %parent_bssid.
+ * @parent_bssid: the BSS according to which %parent_tsf is set. This is set to
+ *	the BSS that requested the scan in which the beacon/probe was received.
  */
 struct cfg80211_inform_bss {
 	struct ieee80211_channel *chan;
 	enum nl80211_bss_scan_width scan_width;
 	s32 signal;
 	u64 boottime_ns;
+	u64 parent_tsf;
+	u8 parent_bssid[ETH_ALEN] __aligned(2);
 };
 
 /**
@@ -4061,10 +4099,10 @@ const char *reg_initiator_name(enum nl80211_reg_initiator initiator);
  * cfg80211_scan_done - notify that scan finished
  *
  * @request: the corresponding scan request
- * @aborted: set to true if the scan was aborted for any reason,
- *	userspace will be notified of that
+ * @info: information about the completed scan
  */
-void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted);
+void cfg80211_scan_done(struct cfg80211_scan_request *request,
+			struct cfg80211_scan_info *info);
 
 /**
  * cfg80211_sched_scan_results - notify that new scan results are available
diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h
index 171cd76558fb..795ca4008f72 100644
--- a/include/net/cfg802154.h
+++ b/include/net/cfg802154.h
@@ -219,9 +219,22 @@ struct wpan_phy {
 
 	struct device dev;
 
+	/* the network namespace this phy lives in currently */
+	possible_net_t _net;
+
 	char priv[0] __aligned(NETDEV_ALIGN);
 };
 
+static inline struct net *wpan_phy_net(struct wpan_phy *wpan_phy)
+{
+	return read_pnet(&wpan_phy->_net);
+}
+
+static inline void wpan_phy_net_set(struct wpan_phy *wpan_phy, struct net *net)
+{
+	write_pnet(&wpan_phy->_net, net);
+}
+
 struct ieee802154_addr {
 	u8 mode;
 	__le16 pan_id;
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 1d45b61cb320..c99ffe8cef3c 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -90,6 +90,9 @@ struct devlink_ops {
 				       u16 tc_index,
 				       enum devlink_sb_pool_type pool_type,
 				       u32 *p_cur, u32 *p_max);
+
+	int (*eswitch_mode_get)(struct devlink *devlink, u16 *p_mode);
+	int (*eswitch_mode_set)(struct devlink *devlink, u16 mode);
 };
 
 static inline void *devlink_priv(struct devlink *devlink)
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 20b3087ad193..52ab18bc2b0d 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -32,6 +32,8 @@ enum dsa_tag_protocol {
 #define DSA_MAX_SWITCHES	4
 #define DSA_MAX_PORTS		12
 
+#define DSA_RTABLE_NONE		-1
+
 struct dsa_chip_data {
 	/*
 	 * How to access the switch configuration registers.
diff --git a/include/net/gre.h b/include/net/gre.h
index 5dce30a6abe3..7a54a31d1d4c 100644
--- a/include/net/gre.h
+++ b/include/net/gre.h
@@ -26,7 +26,7 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version);
 struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
 				       u8 name_assign_type);
 int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
-		     bool *csum_err, __be16 proto);
+		     bool *csum_err, __be16 proto, int nhs);
 
 static inline int gre_calc_hlen(__be16 o_flags)
 {
diff --git a/include/net/ip.h b/include/net/ip.h
index 37165fba3741..08f36cd2b874 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -313,10 +313,9 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
 	return min(dst->dev->mtu, IP_MAX_MTU);
 }
 
-static inline unsigned int ip_skb_dst_mtu(const struct sk_buff *skb)
+static inline unsigned int ip_skb_dst_mtu(struct sock *sk,
+					  const struct sk_buff *skb)
 {
-	struct sock *sk = skb->sk;
-
 	if (!sk || !sk_fullsock(sk) || ip_sk_use_pmtu(sk)) {
 		bool forwarding = IPCB(skb)->flags & IPSKB_FORWARDED;
 
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index a52009ffc19f..b4faadbb4e01 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -4697,9 +4697,10 @@ void ieee80211_wake_queues(struct ieee80211_hw *hw);
  * any context, including hardirq context.
  *
  * @hw: the hardware that finished the scan
- * @aborted: set to true if scan was aborted
+ * @info: information about the completed scan
  */
-void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted);
+void ieee80211_scan_completed(struct ieee80211_hw *hw,
+			      struct cfg80211_scan_info *info);
 
 /**
  * ieee80211_sched_scan_results - got results from scheduled scan
diff --git a/include/net/mac802154.h b/include/net/mac802154.h
index e465c8551ac3..286824acd008 100644
--- a/include/net/mac802154.h
+++ b/include/net/mac802154.h
@@ -247,14 +247,123 @@ struct ieee802154_ops {
  */
 static inline __le16 ieee802154_get_fc_from_skb(const struct sk_buff *skb)
 {
+	__le16 fc;
+
 	/* check if we can fc at skb_mac_header of sk buffer */
-	if (unlikely(!skb_mac_header_was_set(skb) ||
-		     (skb_tail_pointer(skb) - skb_mac_header(skb)) < 2)) {
-		WARN_ON(1);
+	if (WARN_ON(!skb_mac_header_was_set(skb) ||
+		    (skb_tail_pointer(skb) -
+		     skb_mac_header(skb)) < IEEE802154_FC_LEN))
 		return cpu_to_le16(0);
+
+	memcpy(&fc, skb_mac_header(skb), IEEE802154_FC_LEN);
+	return fc;
+}
+
+/**
+ * ieee802154_skb_dst_pan - get the pointer to destination pan field
+ * @fc: mac header frame control field
+ * @skb: skb where the destination pan pointer will be get from
+ */
+static inline unsigned char *ieee802154_skb_dst_pan(__le16 fc,
+						    const struct sk_buff *skb)
+{
+	unsigned char *dst_pan;
+
+	switch (ieee802154_daddr_mode(fc)) {
+	case cpu_to_le16(IEEE802154_FCTL_ADDR_NONE):
+		dst_pan = NULL;
+		break;
+	case cpu_to_le16(IEEE802154_FCTL_DADDR_SHORT):
+	case cpu_to_le16(IEEE802154_FCTL_DADDR_EXTENDED):
+		dst_pan = skb_mac_header(skb) +
+			  IEEE802154_FC_LEN +
+			  IEEE802154_SEQ_LEN;
+		break;
+	default:
+		WARN_ONCE(1, "invalid addr mode detected");
+		dst_pan = NULL;
+		break;
+	}
+
+	return dst_pan;
+}
+
+/**
+ * ieee802154_skb_src_pan - get the pointer to source pan field
+ * @fc: mac header frame control field
+ * @skb: skb where the source pan pointer will be get from
+ */
+static inline unsigned char *ieee802154_skb_src_pan(__le16 fc,
+						    const struct sk_buff *skb)
+{
+	unsigned char *src_pan;
+
+	switch (ieee802154_saddr_mode(fc)) {
+	case cpu_to_le16(IEEE802154_FCTL_ADDR_NONE):
+		src_pan = NULL;
+		break;
+	case cpu_to_le16(IEEE802154_FCTL_SADDR_SHORT):
+	case cpu_to_le16(IEEE802154_FCTL_SADDR_EXTENDED):
+		/* if intra-pan and source addr mode is non none,
+		 * then source pan id is equal destination pan id.
+		 */
+		if (ieee802154_is_intra_pan(fc)) {
+			src_pan = ieee802154_skb_dst_pan(fc, skb);
+			break;
+		}
+
+		switch (ieee802154_daddr_mode(fc)) {
+		case cpu_to_le16(IEEE802154_FCTL_ADDR_NONE):
+			src_pan = skb_mac_header(skb) +
+				  IEEE802154_FC_LEN +
+				  IEEE802154_SEQ_LEN;
+			break;
+		case cpu_to_le16(IEEE802154_FCTL_DADDR_SHORT):
+			src_pan = skb_mac_header(skb) +
+				  IEEE802154_FC_LEN +
+				  IEEE802154_SEQ_LEN +
+				  IEEE802154_PAN_ID_LEN +
+				  IEEE802154_SHORT_ADDR_LEN;
+			break;
+		case cpu_to_le16(IEEE802154_FCTL_DADDR_EXTENDED):
+			src_pan = skb_mac_header(skb) +
+				  IEEE802154_FC_LEN +
+				  IEEE802154_SEQ_LEN +
+				  IEEE802154_PAN_ID_LEN +
+				  IEEE802154_EXTENDED_ADDR_LEN;
+			break;
+		default:
+			WARN_ONCE(1, "invalid addr mode detected");
+			src_pan = NULL;
+			break;
+		}
+		break;
+	default:
+		WARN_ONCE(1, "invalid addr mode detected");
+		src_pan = NULL;
+		break;
 	}
 
-	return get_unaligned_le16(skb_mac_header(skb));
+	return src_pan;
+}
+
+/**
+ * ieee802154_skb_is_intra_pan_addressing - checks whenever the mac addressing
+ *	is an intra pan communication
+ * @fc: mac header frame control field
+ * @skb: skb where the source and destination pan should be get from
+ */
+static inline bool ieee802154_skb_is_intra_pan_addressing(__le16 fc,
+							  const struct sk_buff *skb)
+{
+	unsigned char *dst_pan = ieee802154_skb_dst_pan(fc, skb),
+		      *src_pan = ieee802154_skb_src_pan(fc, skb);
+
+	/* if one is NULL is no intra pan addressing */
+	if (!dst_pan || !src_pan)
+		return false;
+
+	return !memcmp(dst_pan, src_pan, IEEE802154_PAN_ID_LEN);
 }
 
 /**
diff --git a/include/net/netevent.h b/include/net/netevent.h
index d8bbb38584b6..f440df172b56 100644
--- a/include/net/netevent.h
+++ b/include/net/netevent.h
@@ -24,6 +24,7 @@ struct netevent_redirect {
 enum netevent_notif_type {
 	NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */
 	NETEVENT_REDIRECT,	   /* arg is struct netevent_redirect ptr */
+	NETEVENT_DELAY_PROBE_TIME_UPDATE, /* arg is struct neigh_parms ptr */
 };
 
 int register_netevent_notifier(struct notifier_block *nb);
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index dd78bea227c8..5d3397f34583 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -85,6 +85,9 @@ struct nf_conn {
 	spinlock_t	lock;
 	u16		cpu;
 
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+	struct nf_conntrack_zone zone;
+#endif
 	/* XXX should I move this to the tail ? - Y.K */
 	/* These are my tuples; original and reply */
 	struct nf_conntrack_tuple_hash tuplehash[IP_CT_DIR_MAX];
@@ -287,6 +290,7 @@ static inline bool nf_is_loopback_packet(const struct sk_buff *skb)
 struct kernel_param;
 
 int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp);
+int nf_conntrack_hash_resize(unsigned int hashsize);
 extern unsigned int nf_conntrack_htable_size;
 extern unsigned int nf_conntrack_max;
 
diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h
index 55d15049ab2f..b925395fa5ed 100644
--- a/include/net/netfilter/nf_conntrack_extend.h
+++ b/include/net/netfilter/nf_conntrack_extend.h
@@ -15,9 +15,6 @@ enum nf_ct_ext_id {
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
 	NF_CT_EXT_ECACHE,
 #endif
-#ifdef CONFIG_NF_CONNTRACK_ZONES
-	NF_CT_EXT_ZONE,
-#endif
 #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
 	NF_CT_EXT_TSTAMP,
 #endif
@@ -38,7 +35,6 @@ enum nf_ct_ext_id {
 #define NF_CT_EXT_SEQADJ_TYPE struct nf_conn_seqadj
 #define NF_CT_EXT_ACCT_TYPE struct nf_conn_acct
 #define NF_CT_EXT_ECACHE_TYPE struct nf_conntrack_ecache
-#define NF_CT_EXT_ZONE_TYPE struct nf_conntrack_zone
 #define NF_CT_EXT_TSTAMP_TYPE struct nf_conn_tstamp
 #define NF_CT_EXT_TIMEOUT_TYPE struct nf_conn_timeout
 #define NF_CT_EXT_LABELS_TYPE struct nf_conn_labels
diff --git a/include/net/netfilter/nf_conntrack_zones.h b/include/net/netfilter/nf_conntrack_zones.h
index 4e32512cef32..64a718b60839 100644
--- a/include/net/netfilter/nf_conntrack_zones.h
+++ b/include/net/netfilter/nf_conntrack_zones.h
@@ -9,12 +9,11 @@
 static inline const struct nf_conntrack_zone *
 nf_ct_zone(const struct nf_conn *ct)
 {
-	const struct nf_conntrack_zone *nf_ct_zone = NULL;
-
 #ifdef CONFIG_NF_CONNTRACK_ZONES
-	nf_ct_zone = nf_ct_ext_find(ct, NF_CT_EXT_ZONE);
+	return &ct->zone;
+#else
+	return &nf_ct_zone_dflt;
 #endif
-	return nf_ct_zone ? nf_ct_zone : &nf_ct_zone_dflt;
 }
 
 static inline const struct nf_conntrack_zone *
@@ -31,32 +30,22 @@ static inline const struct nf_conntrack_zone *
 nf_ct_zone_tmpl(const struct nf_conn *tmpl, const struct sk_buff *skb,
 		struct nf_conntrack_zone *tmp)
 {
-	const struct nf_conntrack_zone *zone;
-
+#ifdef CONFIG_NF_CONNTRACK_ZONES
 	if (!tmpl)
 		return &nf_ct_zone_dflt;
 
-	zone = nf_ct_zone(tmpl);
-	if (zone->flags & NF_CT_FLAG_MARK)
-		zone = nf_ct_zone_init(tmp, skb->mark, zone->dir, 0);
-
-	return zone;
+	if (tmpl->zone.flags & NF_CT_FLAG_MARK)
+		return nf_ct_zone_init(tmp, skb->mark, tmpl->zone.dir, 0);
+#endif
+	return nf_ct_zone(tmpl);
 }
 
-static inline int nf_ct_zone_add(struct nf_conn *ct, gfp_t flags,
-				 const struct nf_conntrack_zone *info)
+static inline void nf_ct_zone_add(struct nf_conn *ct,
+				  const struct nf_conntrack_zone *zone)
 {
 #ifdef CONFIG_NF_CONNTRACK_ZONES
-	struct nf_conntrack_zone *nf_ct_zone;
-
-	nf_ct_zone = nf_ct_ext_add(ct, NF_CT_EXT_ZONE, flags);
-	if (!nf_ct_zone)
-		return -ENOMEM;
-
-	nf_ct_zone_init(nf_ct_zone, info->id, info->dir,
-			info->flags);
+	ct->zone = *zone;
 #endif
-	return 0;
 }
 
 static inline bool nf_ct_zone_matches_dir(const struct nf_conntrack_zone *zone,
@@ -68,22 +57,34 @@ static inline bool nf_ct_zone_matches_dir(const struct nf_conntrack_zone *zone,
 static inline u16 nf_ct_zone_id(const struct nf_conntrack_zone *zone,
 				enum ip_conntrack_dir dir)
 {
+#ifdef CONFIG_NF_CONNTRACK_ZONES
 	return nf_ct_zone_matches_dir(zone, dir) ?
 	       zone->id : NF_CT_DEFAULT_ZONE_ID;
+#else
+	return NF_CT_DEFAULT_ZONE_ID;
+#endif
 }
 
 static inline bool nf_ct_zone_equal(const struct nf_conn *a,
 				    const struct nf_conntrack_zone *b,
 				    enum ip_conntrack_dir dir)
 {
+#ifdef CONFIG_NF_CONNTRACK_ZONES
 	return nf_ct_zone_id(nf_ct_zone(a), dir) ==
 	       nf_ct_zone_id(b, dir);
+#else
+	return true;
+#endif
 }
 
 static inline bool nf_ct_zone_equal_any(const struct nf_conn *a,
 					const struct nf_conntrack_zone *b)
 {
+#ifdef CONFIG_NF_CONNTRACK_ZONES
 	return nf_ct_zone(a)->id == b->id;
+#else
+	return true;
+#endif
 }
 #endif /* IS_ENABLED(CONFIG_NF_CONNTRACK) */
 #endif /* _NF_CONNTRACK_ZONES_H */
diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h
index 57639fca223a..83d855ba6af1 100644
--- a/include/net/netfilter/nf_log.h
+++ b/include/net/netfilter/nf_log.h
@@ -12,6 +12,9 @@
 #define NF_LOG_UID		0x08	/* Log UID owning local socket */
 #define NF_LOG_MASK		0x0f
 
+/* This flag indicates that copy_len field in nf_loginfo is set */
+#define NF_LOG_F_COPY_LEN	0x1
+
 enum nf_log_type {
 	NF_LOG_TYPE_LOG		= 0,
 	NF_LOG_TYPE_ULOG,
@@ -22,9 +25,13 @@ struct nf_loginfo {
 	u_int8_t type;
 	union {
 		struct {
+			/* copy_len will be used iff you set
+			 * NF_LOG_F_COPY_LEN in flags
+			 */
 			u_int32_t copy_len;
 			u_int16_t group;
 			u_int16_t qthreshold;
+			u_int16_t flags;
 		} ulog;
 		struct {
 			u_int8_t level;
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 092235458691..30c1d9489ae2 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -167,6 +167,7 @@ struct nft_set_elem {
 
 struct nft_set;
 struct nft_set_iter {
+	u8		genmask;
 	unsigned int	count;
 	unsigned int	skip;
 	int		err;
@@ -296,6 +297,7 @@ void nft_unregister_set(struct nft_set_ops *ops);
  * 	@ops: set ops
  * 	@pnet: network namespace
  * 	@flags: set flags
+ *	@genmask: generation mask
  * 	@klen: key length
  * 	@dlen: data length
  * 	@data: private set data
@@ -317,7 +319,8 @@ struct nft_set {
 	/* runtime data below here */
 	const struct nft_set_ops	*ops ____cacheline_aligned;
 	possible_net_t			pnet;
-	u16				flags;
+	u16				flags:14,
+					genmask:2;
 	u8				klen;
 	u8				dlen;
 	unsigned char			data[]
@@ -335,9 +338,9 @@ static inline struct nft_set *nft_set_container_of(const void *priv)
 }
 
 struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
-				     const struct nlattr *nla);
+				     const struct nlattr *nla, u8 genmask);
 struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
-					  const struct nlattr *nla);
+					  const struct nlattr *nla, u8 genmask);
 
 static inline unsigned long nft_set_gc_interval(const struct nft_set *set)
 {
@@ -732,7 +735,6 @@ static inline struct nft_userdata *nft_userdata(const struct nft_rule *rule)
 
 enum nft_chain_flags {
 	NFT_BASE_CHAIN			= 0x1,
-	NFT_CHAIN_INACTIVE		= 0x2,
 };
 
 /**
@@ -754,7 +756,8 @@ struct nft_chain {
 	u64				handle;
 	u32				use;
 	u16				level;
-	u8				flags;
+	u8				flags:6,
+					genmask:2;
 	char				name[NFT_CHAIN_MAXNAMELEN];
 };
 
@@ -796,7 +799,6 @@ struct nft_stats {
 };
 
 #define NFT_HOOK_OPS_MAX		2
-#define NFT_BASECHAIN_DISABLED		(1 << 0)
 
 /**
  *	struct nft_base_chain - nf_tables base chain
@@ -838,6 +840,7 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv);
  *	@hgenerator: handle generator state
  *	@use: number of chain references to this table
  *	@flags: table flag (see enum nft_table_flags)
+ *	@genmask: generation mask
  *	@name: name of the table
  */
 struct nft_table {
@@ -846,7 +849,8 @@ struct nft_table {
 	struct list_head		sets;
 	u64				hgenerator;
 	u32				use;
-	u16				flags;
+	u16				flags:14,
+					genmask:2;
 	char				name[NFT_TABLE_MAXNAMELEN];
 };
 
@@ -970,6 +974,32 @@ static inline u8 nft_genmask_cur(const struct net *net)
 #define NFT_GENMASK_ANY		((1 << 0) | (1 << 1))
 
 /*
+ * Generic transaction helpers
+ */
+
+/* Check if this object is currently active. */
+#define nft_is_active(__net, __obj)				\
+	(((__obj)->genmask & nft_genmask_cur(__net)) == 0)
+
+/* Check if this object is active in the next generation. */
+#define nft_is_active_next(__net, __obj)			\
+	(((__obj)->genmask & nft_genmask_next(__net)) == 0)
+
+/* This object becomes active in the next generation. */
+#define nft_activate_next(__net, __obj)				\
+	(__obj)->genmask = nft_genmask_cur(__net)
+
+/* This object becomes inactive in the next generation. */
+#define nft_deactivate_next(__net, __obj)			\
+        (__obj)->genmask = nft_genmask_next(__net)
+
+/* After committing the ruleset, clear the stale generation bit. */
+#define nft_clear(__net, __obj)					\
+	(__obj)->genmask &= ~nft_genmask_next(__net)
+#define nft_active_genmask(__obj, __genmask)			\
+	!((__obj)->genmask & __genmask)
+
+/*
  * Set element transaction helpers
  */
 
diff --git a/include/net/nl802154.h b/include/net/nl802154.h
index fcab4de49951..ddcee128f5d9 100644
--- a/include/net/nl802154.h
+++ b/include/net/nl802154.h
@@ -54,6 +54,8 @@ enum nl802154_commands {
 
 	NL802154_CMD_SET_ACKREQ_DEFAULT,
 
+	NL802154_CMD_SET_WPAN_PHY_NETNS,
+
 	/* add new commands above here */
 
 #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
@@ -124,6 +126,11 @@ enum nl802154_attrs {
 
 	NL802154_ATTR_ACKREQ_DEFAULT,
 
+	NL802154_ATTR_PAD,
+
+	NL802154_ATTR_PID,
+	NL802154_ATTR_NETNS_FD,
+
 	/* add attributes here, update the policy in nl802154.c */
 
 #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
@@ -138,8 +145,6 @@ enum nl802154_attrs {
 	NL802154_ATTR_SEC_KEY,
 #endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */
 
-	NL802154_ATTR_PAD,
-
 	__NL802154_ATTR_AFTER_LAST,
 	NL802154_ATTR_MAX = __NL802154_ATTR_AFTER_LAST - 1
 };
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index 006a7b81d758..4113916cc1bb 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -98,10 +98,11 @@ struct rtnl_link_ops {
 						   const struct net_device *dev,
 						   const struct net_device *slave_dev);
 	struct net		*(*get_link_net)(const struct net_device *dev);
-	size_t			(*get_linkxstats_size)(const struct net_device *dev);
+	size_t			(*get_linkxstats_size)(const struct net_device *dev,
+						       int attr);
 	int			(*fill_linkxstats)(struct sk_buff *skb,
 						   const struct net_device *dev,
-						   int *prividx);
+						   int *prividx, int attr);
 };
 
 int __rtnl_link_register(struct rtnl_link_ops *ops);
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 83c5ec58b93a..ce93c4b10d26 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -59,6 +59,7 @@
 #include <linux/workqueue.h>	/* We need tq_struct.	 */
 #include <linux/sctp.h>		/* We need sctp* header structs.  */
 #include <net/sctp/auth.h>	/* We need auth specific structs */
+#include <net/ip.h>		/* For inet_skb_parm */
 
 /* A convenience structure for handling sockaddr structures.
  * We should wean ourselves off this.
@@ -602,6 +603,16 @@ struct sctp_chunk {
 	/* This needs to be recoverable for SCTP_SEND_FAILED events. */
 	struct sctp_sndrcvinfo sinfo;
 
+	/* We use this field to record param for prsctp policies,
+	 * for TTL policy, it is the time_to_drop of this chunk,
+	 * for RTX policy, it is the max_sent_count of this chunk,
+	 * for PRIO policy, it is the priority of this chunk.
+	 */
+	unsigned long prsctp_param;
+
+	/* How many times this chunk have been sent, for prsctp RTX policy */
+	int sent_count;
+
 	/* Which association does this belong to?  */
 	struct sctp_association *asoc;
 
@@ -1074,12 +1085,36 @@ void sctp_retransmit(struct sctp_outq *, struct sctp_transport *,
 		     sctp_retransmit_reason_t);
 void sctp_retransmit_mark(struct sctp_outq *, struct sctp_transport *, __u8);
 int sctp_outq_uncork(struct sctp_outq *, gfp_t gfp);
+void sctp_prsctp_prune(struct sctp_association *asoc,
+		       struct sctp_sndrcvinfo *sinfo, int msg_len);
 /* Uncork and flush an outqueue.  */
 static inline void sctp_outq_cork(struct sctp_outq *q)
 {
 	q->cork = 1;
 }
 
+/* SCTP skb control block.
+ * sctp_input_cb is currently used on rx and sock rx queue
+ */
+struct sctp_input_cb {
+	union {
+		struct inet_skb_parm	h4;
+#if IS_ENABLED(CONFIG_IPV6)
+		struct inet6_skb_parm	h6;
+#endif
+	} header;
+	struct sctp_chunk *chunk;
+	struct sctp_af *af;
+};
+#define SCTP_INPUT_CB(__skb)	((struct sctp_input_cb *)&((__skb)->cb[0]))
+
+static inline const struct sk_buff *sctp_gso_headskb(const struct sk_buff *skb)
+{
+	const struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk;
+
+	return chunk->head_skb ? : skb;
+}
+
 /* These bind address data fields common between endpoints and associations */
 struct sctp_bind_addr {
 
@@ -1256,7 +1291,8 @@ struct sctp_endpoint {
 	/* SCTP-AUTH: endpoint shared keys */
 	struct list_head endpoint_shared_keys;
 	__u16 active_key_id;
-	__u8  auth_enable;
+	__u8  auth_enable:1,
+	      prsctp_enable:1;
 };
 
 /* Recover the outter endpoint structure. */
@@ -1848,9 +1884,15 @@ struct sctp_association {
 	__u16 active_key_id;
 
 	__u8 need_ecne:1,	/* Need to send an ECNE Chunk? */
-	     temp:1;		/* Is it a temporary association? */
+	     temp:1,		/* Is it a temporary association? */
+	     prsctp_enable:1;
 
 	struct sctp_priv_assoc_stats stats;
+
+	int sent_cnt_removable;
+
+	__u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1];
+	__u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1];
 };
 
 
diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h
index cccdcfd14973..2c098cd7e7e2 100644
--- a/include/net/sctp/ulpevent.h
+++ b/include/net/sctp/ulpevent.h
@@ -48,15 +48,15 @@
  */
 struct sctp_ulpevent {
 	struct sctp_association *asoc;
-	__u16 stream;
-	__u16 ssn;
-	__u16 flags;
+	struct sctp_chunk *chunk;
+	unsigned int rmem_len;
 	__u32 ppid;
 	__u32 tsn;
 	__u32 cumtsn;
-	int msg_flags;
-	int iif;
-	unsigned int rmem_len;
+	__u16 stream;
+	__u16 ssn;
+	__u16 flags;
+	__u16 msg_flags;
 };
 
 /* Retrieve the skb this event sits inside of. */
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 985619a59323..9023e3e3be0b 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -227,6 +227,8 @@ void switchdev_port_fwd_mark_set(struct net_device *dev,
 				 struct net_device *group_dev,
 				 bool joining);
 
+bool switchdev_port_same_parent_id(struct net_device *a,
+				   struct net_device *b);
 #else
 
 static inline void switchdev_deferred_process(void)
@@ -351,6 +353,12 @@ static inline void switchdev_port_fwd_mark_set(struct net_device *dev,
 {
 }
 
+static inline bool switchdev_port_same_parent_id(struct net_device *a,
+						 struct net_device *b)
+{
+	return false;
+}
+
 #endif
 
 #endif /* _LINUX_SWITCHDEV_H_ */
diff --git a/include/net/tc_act/tc_ife.h b/include/net/tc_act/tc_ife.h
index dc9a09aefb33..c55facd17b7e 100644
--- a/include/net/tc_act/tc_ife.h
+++ b/include/net/tc_act/tc_ife.h
@@ -36,7 +36,7 @@ struct tcf_meta_ops {
 	int	(*encode)(struct sk_buff *, void *, struct tcf_meta_info *);
 	int	(*decode)(struct sk_buff *, void *, u16 len);
 	int	(*get)(struct sk_buff *skb, struct tcf_meta_info *mi);
-	int	(*alloc)(struct tcf_meta_info *, void *);
+	int	(*alloc)(struct tcf_meta_info *, void *, gfp_t);
 	void	(*release)(struct tcf_meta_info *);
 	int	(*validate)(void *val, int len);
 	struct module	*owner;
@@ -48,8 +48,8 @@ int ife_get_meta_u32(struct sk_buff *skb, struct tcf_meta_info *mi);
 int ife_get_meta_u16(struct sk_buff *skb, struct tcf_meta_info *mi);
 int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen,
 			const void *dval);
-int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval);
-int ife_alloc_meta_u16(struct tcf_meta_info *mi, void *metaval);
+int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval, gfp_t gfp);
+int ife_alloc_meta_u16(struct tcf_meta_info *mi, void *metaval, gfp_t gfp);
 int ife_check_meta_u32(u32 metaval, struct tcf_meta_info *mi);
 int ife_encode_meta_u32(u32 metaval, void *skbdata, struct tcf_meta_info *mi);
 int ife_validate_meta_u32(void *val, int len);
diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h
index b496d5ad7d42..d01a5d40cfb5 100644
--- a/include/net/tc_act/tc_skbedit.h
+++ b/include/net/tc_act/tc_skbedit.h
@@ -24,11 +24,11 @@
 
 struct tcf_skbedit {
 	struct tcf_common	common;
-	u32			flags;
-	u32     		priority;
-	u32     		mark;
-	u16			queue_mapping;
-	/* XXX: 16-bit pad here? */
+	u32		flags;
+	u32		priority;
+	u32		mark;
+	u16		queue_mapping;
+	u16		ptype;
 };
 #define to_skbedit(a) \
 	container_of(a->priv, struct tcf_skbedit, common)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index a79894b66726..c00e7d51bb18 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -589,7 +589,7 @@ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
 	 * On the other hand, for extremely large MSS devices, handling
 	 * smaller than MSS windows in this way does make sense.
 	 */
-	if (tp->max_window >= 512)
+	if (tp->max_window > TCP_MSS_DEFAULT)
 		cutoff = (tp->max_window >> 1);
 	else
 		cutoff = tp->max_window;
@@ -1384,7 +1384,7 @@ union tcp_md5sum_block {
 /* - pool: digest algorithm, hash description and scratch buffer */
 struct tcp_md5sig_pool {
 	struct ahash_request	*md5_req;
-	union tcp_md5sum_block	md5_blk;
+	void			*scratch;
 };
 
 /* - functions */
@@ -1420,7 +1420,6 @@ static inline void tcp_put_md5sig_pool(void)
 	local_bh_enable();
 }
 
-int tcp_md5_hash_header(struct tcp_md5sig_pool *, const struct tcphdr *);
 int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, const struct sk_buff *,
 			  unsigned int header_len);
 int tcp_md5_hash_key(struct tcp_md5sig_pool *hp,
diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h
index 16274e2133cd..9c9a27d42aaa 100644
--- a/include/rdma/rdma_vt.h
+++ b/include/rdma/rdma_vt.h
@@ -203,7 +203,9 @@ struct rvt_driver_provided {
 
 	/*
 	 * Allocate a private queue pair data structure for driver specific
-	 * information which is opaque to rdmavt.
+	 * information which is opaque to rdmavt.  Errors are returned via
+	 * ERR_PTR(err).  The driver is free to return NULL or a valid
+	 * pointer.
 	 */
 	void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp,
 				gfp_t gfp);
diff --git a/include/trace/events/devlink.h b/include/trace/events/devlink.h
new file mode 100644
index 000000000000..333c32ac9bfa
--- /dev/null
+++ b/include/trace/events/devlink.h
@@ -0,0 +1,68 @@
+#if IS_ENABLED(CONFIG_NET_DEVLINK)
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM devlink
+
+#if !defined(_TRACE_DEVLINK_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_DEVLINK_H
+
+#include <linux/device.h>
+#include <net/devlink.h>
+#include <linux/tracepoint.h>
+
+/*
+ * Tracepoint for devlink hardware message:
+ */
+TRACE_EVENT(devlink_hwmsg,
+	TP_PROTO(const struct devlink *devlink, bool incoming,
+		 unsigned long type, const u8 *buf, size_t len),
+
+	TP_ARGS(devlink, incoming, type, buf, len),
+
+	TP_STRUCT__entry(
+		__string(bus_name, devlink->dev->bus->name)
+		__string(dev_name, dev_name(devlink->dev))
+		__string(owner_name, devlink->dev->driver->owner->name)
+		__field(bool, incoming)
+		__field(unsigned long, type)
+		__dynamic_array(u8, buf, len)
+		__field(size_t, len)
+	),
+
+	TP_fast_assign(
+		__assign_str(bus_name, devlink->dev->bus->name);
+		__assign_str(dev_name, dev_name(devlink->dev));
+		__assign_str(owner_name, devlink->dev->driver->owner->name);
+		__entry->incoming = incoming;
+		__entry->type = type;
+		memcpy(__get_dynamic_array(buf), buf, len);
+		__entry->len = len;
+	),
+
+	TP_printk("bus_name=%s dev_name=%s owner_name=%s incoming=%d type=%lu buf=0x[%*phD] len=%lu",
+		  __get_str(bus_name), __get_str(dev_name),
+		  __get_str(owner_name), __entry->incoming, __entry->type,
+		  (int) __entry->len, __get_dynamic_array(buf), __entry->len)
+);
+
+#endif /* _TRACE_DEVLINK_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
+
+#else /* CONFIG_NET_DEVLINK */
+
+#if !defined(_TRACE_DEVLINK_H)
+#define _TRACE_DEVLINK_H
+
+#include <net/devlink.h>
+
+static inline void trace_devlink_hwmsg(const struct devlink *devlink,
+				       bool incoming, unsigned long type,
+				       const u8 *buf, size_t len)
+{
+}
+
+#endif /* _TRACE_DEVLINK_H */
+
+#endif
diff --git a/include/trace/events/napi.h b/include/trace/events/napi.h
index 8fe1e93f531d..118ed7767639 100644
--- a/include/trace/events/napi.h
+++ b/include/trace/events/napi.h
@@ -12,22 +12,27 @@
 
 TRACE_EVENT(napi_poll,
 
-	TP_PROTO(struct napi_struct *napi),
+	TP_PROTO(struct napi_struct *napi, int work, int budget),
 
-	TP_ARGS(napi),
+	TP_ARGS(napi, work, budget),
 
 	TP_STRUCT__entry(
 		__field(	struct napi_struct *,	napi)
+		__field(	int,			work)
+		__field(	int,			budget)
 		__string(	dev_name, napi->dev ? napi->dev->name : NO_DEV)
 	),
 
 	TP_fast_assign(
 		__entry->napi = napi;
+		__entry->work = work;
+		__entry->budget = budget;
 		__assign_str(dev_name, napi->dev ? napi->dev->name : NO_DEV);
 	),
 
-	TP_printk("napi poll on napi struct %p for device %s",
-		__entry->napi, __get_str(dev_name))
+	TP_printk("napi poll on napi struct %p for device %s work %d budget %d",
+		  __entry->napi, __get_str(dev_name),
+		  __entry->work, __entry->budget)
 );
 
 #undef NO_DEV
diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
new file mode 100644
index 000000000000..0fbf6fd4711b
--- /dev/null
+++ b/include/uapi/linux/batman_adv.h
@@ -0,0 +1,114 @@
+/* Copyright (C) 2016 B.A.T.M.A.N. contributors:
+ *
+ * Matthias Schiffer
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef _UAPI_LINUX_BATMAN_ADV_H_
+#define _UAPI_LINUX_BATMAN_ADV_H_
+
+#define BATADV_NL_NAME "batadv"
+
+#define BATADV_NL_MCAST_GROUP_TPMETER	"tpmeter"
+
+/**
+ * enum batadv_nl_attrs - batman-adv netlink attributes
+ *
+ * @BATADV_ATTR_UNSPEC: unspecified attribute to catch errors
+ * @BATADV_ATTR_VERSION: batman-adv version string
+ * @BATADV_ATTR_ALGO_NAME: name of routing algorithm
+ * @BATADV_ATTR_MESH_IFINDEX: index of the batman-adv interface
+ * @BATADV_ATTR_MESH_IFNAME: name of the batman-adv interface
+ * @BATADV_ATTR_MESH_ADDRESS: mac address of the batman-adv interface
+ * @BATADV_ATTR_HARD_IFINDEX: index of the non-batman-adv interface
+ * @BATADV_ATTR_HARD_IFNAME: name of the non-batman-adv interface
+ * @BATADV_ATTR_HARD_ADDRESS: mac address of the non-batman-adv interface
+ * @BATADV_ATTR_ORIG_ADDRESS: originator mac address
+ * @BATADV_ATTR_TPMETER_RESULT: result of run (see batadv_tp_meter_status)
+ * @BATADV_ATTR_TPMETER_TEST_TIME: time (msec) the run took
+ * @BATADV_ATTR_TPMETER_BYTES: amount of acked bytes during run
+ * @BATADV_ATTR_TPMETER_COOKIE: session cookie to match tp_meter session
+ * @BATADV_ATTR_PAD: attribute used for padding for 64-bit alignment
+ * @__BATADV_ATTR_AFTER_LAST: internal use
+ * @NUM_BATADV_ATTR: total number of batadv_nl_attrs available
+ * @BATADV_ATTR_MAX: highest attribute number currently defined
+ */
+enum batadv_nl_attrs {
+	BATADV_ATTR_UNSPEC,
+	BATADV_ATTR_VERSION,
+	BATADV_ATTR_ALGO_NAME,
+	BATADV_ATTR_MESH_IFINDEX,
+	BATADV_ATTR_MESH_IFNAME,
+	BATADV_ATTR_MESH_ADDRESS,
+	BATADV_ATTR_HARD_IFINDEX,
+	BATADV_ATTR_HARD_IFNAME,
+	BATADV_ATTR_HARD_ADDRESS,
+	BATADV_ATTR_ORIG_ADDRESS,
+	BATADV_ATTR_TPMETER_RESULT,
+	BATADV_ATTR_TPMETER_TEST_TIME,
+	BATADV_ATTR_TPMETER_BYTES,
+	BATADV_ATTR_TPMETER_COOKIE,
+	BATADV_ATTR_PAD,
+	/* add attributes above here, update the policy in netlink.c */
+	__BATADV_ATTR_AFTER_LAST,
+	NUM_BATADV_ATTR = __BATADV_ATTR_AFTER_LAST,
+	BATADV_ATTR_MAX = __BATADV_ATTR_AFTER_LAST - 1
+};
+
+/**
+ * enum batadv_nl_commands - supported batman-adv netlink commands
+ *
+ * @BATADV_CMD_UNSPEC: unspecified command to catch errors
+ * @BATADV_CMD_GET_MESH_INFO: Query basic information about batman-adv device
+ * @BATADV_CMD_TP_METER: Start a tp meter session
+ * @BATADV_CMD_TP_METER_CANCEL: Cancel a tp meter session
+ * @__BATADV_CMD_AFTER_LAST: internal use
+ * @BATADV_CMD_MAX: highest used command number
+ */
+enum batadv_nl_commands {
+	BATADV_CMD_UNSPEC,
+	BATADV_CMD_GET_MESH_INFO,
+	BATADV_CMD_TP_METER,
+	BATADV_CMD_TP_METER_CANCEL,
+	/* add new commands above here */
+	__BATADV_CMD_AFTER_LAST,
+	BATADV_CMD_MAX = __BATADV_CMD_AFTER_LAST - 1
+};
+
+/**
+ * enum batadv_tp_meter_reason - reason of a tp meter test run stop
+ * @BATADV_TP_REASON_COMPLETE: sender finished tp run
+ * @BATADV_TP_REASON_CANCEL: sender was stopped during run
+ * @BATADV_TP_REASON_DST_UNREACHABLE: receiver could not be reached or didn't
+ *  answer
+ * @BATADV_TP_REASON_RESEND_LIMIT: (unused) sender retry reached limit
+ * @BATADV_TP_REASON_ALREADY_ONGOING: test to or from the same node already
+ *  ongoing
+ * @BATADV_TP_REASON_MEMORY_ERROR: test was stopped due to low memory
+ * @BATADV_TP_REASON_CANT_SEND: failed to send via outgoing interface
+ * @BATADV_TP_REASON_TOO_MANY: too many ongoing sessions
+ */
+enum batadv_tp_meter_reason {
+	BATADV_TP_REASON_COMPLETE		= 3,
+	BATADV_TP_REASON_CANCEL			= 4,
+	/* error status >= 128 */
+	BATADV_TP_REASON_DST_UNREACHABLE	= 128,
+	BATADV_TP_REASON_RESEND_LIMIT		= 129,
+	BATADV_TP_REASON_ALREADY_ONGOING	= 130,
+	BATADV_TP_REASON_MEMORY_ERROR		= 131,
+	BATADV_TP_REASON_CANT_SEND		= 132,
+	BATADV_TP_REASON_TOO_MANY		= 133,
+};
+
+#endif /* _UAPI_LINUX_BATMAN_ADV_H_ */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 406459b935a2..262a7e883b19 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -84,6 +84,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_PERCPU_HASH,
 	BPF_MAP_TYPE_PERCPU_ARRAY,
 	BPF_MAP_TYPE_STACK_TRACE,
+	BPF_MAP_TYPE_CGROUP_ARRAY,
 };
 
 enum bpf_prog_type {
@@ -313,6 +314,56 @@ enum bpf_func_id {
 	 */
 	BPF_FUNC_skb_get_tunnel_opt,
 	BPF_FUNC_skb_set_tunnel_opt,
+
+	/**
+	 * bpf_skb_change_proto(skb, proto, flags)
+	 * Change protocol of the skb. Currently supported is
+	 * v4 -> v6, v6 -> v4 transitions. The helper will also
+	 * resize the skb. eBPF program is expected to fill the
+	 * new headers via skb_store_bytes and lX_csum_replace.
+	 * @skb: pointer to skb
+	 * @proto: new skb->protocol type
+	 * @flags: reserved
+	 * Return: 0 on success or negative error
+	 */
+	BPF_FUNC_skb_change_proto,
+
+	/**
+	 * bpf_skb_change_type(skb, type)
+	 * Change packet type of skb.
+	 * @skb: pointer to skb
+	 * @type: new skb->pkt_type type
+	 * Return: 0 on success or negative error
+	 */
+	BPF_FUNC_skb_change_type,
+
+	/**
+	 * bpf_skb_in_cgroup(skb, map, index) - Check cgroup2 membership of skb
+	 * @skb: pointer to skb
+	 * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
+	 * @index: index of the cgroup in the bpf_map
+	 * Return:
+	 *   == 0 skb failed the cgroup2 descendant test
+	 *   == 1 skb succeeded the cgroup2 descendant test
+	 *    < 0 error
+	 */
+	BPF_FUNC_skb_in_cgroup,
+
+	/**
+	 * bpf_get_hash_recalc(skb)
+	 * Retrieve and possibly recalculate skb->hash.
+	 * @skb: pointer to skb
+	 * Return: hash
+	 */
+	BPF_FUNC_get_hash_recalc,
+
+	/**
+	 * u64 bpf_get_current_task(void)
+	 * Returns current task_struct
+	 * Return: current
+	 */
+	BPF_FUNC_get_current_task,
+
 	__BPF_FUNC_MAX_ID,
 };
 
@@ -347,7 +398,7 @@ enum bpf_func_id {
 #define BPF_F_ZERO_CSUM_TX		(1ULL << 1)
 #define BPF_F_DONT_FRAGMENT		(1ULL << 2)
 
-/* BPF_FUNC_perf_event_output flags. */
+/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */
 #define BPF_F_INDEX_MASK		0xffffffffULL
 #define BPF_F_CURRENT_CPU		BPF_F_INDEX_MASK
 
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index 23c6960e94a4..2bdd1e3e7007 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -118,7 +118,7 @@ struct btrfs_ioctl_vol_args_v2 {
 	};
 	union {
 		char name[BTRFS_SUBVOL_NAME_MAX + 1];
-		u64 devid;
+		__u64 devid;
 	};
 };
 
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index ba0073b26fa6..915bfa74458c 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -57,6 +57,8 @@ enum devlink_command {
 	DEVLINK_CMD_SB_OCC_SNAPSHOT,
 	DEVLINK_CMD_SB_OCC_MAX_CLEAR,
 
+	DEVLINK_CMD_ESWITCH_MODE_GET,
+	DEVLINK_CMD_ESWITCH_MODE_SET,
 	/* add new commands above here */
 
 	__DEVLINK_CMD_MAX,
@@ -95,6 +97,11 @@ enum devlink_sb_threshold_type {
 
 #define DEVLINK_SB_THRESHOLD_TO_ALPHA_MAX 20
 
+enum devlink_eswitch_mode {
+	DEVLINK_ESWITCH_MODE_LEGACY,
+	DEVLINK_ESWITCH_MODE_SWITCHDEV,
+};
+
 enum devlink_attr {
 	/* don't change the order or add anything between, this is ABI! */
 	DEVLINK_ATTR_UNSPEC,
@@ -125,6 +132,7 @@ enum devlink_attr {
 	DEVLINK_ATTR_SB_TC_INDEX,		/* u16 */
 	DEVLINK_ATTR_SB_OCC_CUR,		/* u32 */
 	DEVLINK_ATTR_SB_OCC_MAX,		/* u32 */
+	DEVLINK_ATTR_ESWITCH_MODE,		/* u16 */
 
 	/* add new attributes above here, update the policy in devlink.c */
 
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 5974fae54e12..27e17363263a 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -105,6 +105,9 @@
  *
  *  7.24
  *  - add FUSE_LSEEK for SEEK_HOLE and SEEK_DATA support
+ *
+ *  7.25
+ *  - add FUSE_PARALLEL_DIROPS
  */
 
 #ifndef _LINUX_FUSE_H
@@ -140,7 +143,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 24
+#define FUSE_KERNEL_MINOR_VERSION 25
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -234,6 +237,7 @@ struct fuse_file_lock {
  * FUSE_ASYNC_DIO: asynchronous direct I/O submission
  * FUSE_WRITEBACK_CACHE: use writeback cache for buffered writes
  * FUSE_NO_OPEN_SUPPORT: kernel supports zero-message opens
+ * FUSE_PARALLEL_DIROPS: allow parallel lookups and readdir
  */
 #define FUSE_ASYNC_READ		(1 << 0)
 #define FUSE_POSIX_LOCKS	(1 << 1)
@@ -253,6 +257,7 @@ struct fuse_file_lock {
 #define FUSE_ASYNC_DIO		(1 << 15)
 #define FUSE_WRITEBACK_CACHE	(1 << 16)
 #define FUSE_NO_OPEN_SUPPORT	(1 << 17)
+#define FUSE_PARALLEL_DIROPS    (1 << 18)
 
 /**
  * CUSE INIT request/reply flags
diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index 397d503fdedb..c186f64fffca 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -247,8 +247,37 @@ enum {
 enum {
 	BRIDGE_XSTATS_UNSPEC,
 	BRIDGE_XSTATS_VLAN,
+	BRIDGE_XSTATS_MCAST,
+	BRIDGE_XSTATS_PAD,
 	__BRIDGE_XSTATS_MAX
 };
 #define BRIDGE_XSTATS_MAX (__BRIDGE_XSTATS_MAX - 1)
 
+enum {
+	BR_MCAST_DIR_RX,
+	BR_MCAST_DIR_TX,
+	BR_MCAST_DIR_SIZE
+};
+
+/* IGMP/MLD statistics */
+struct br_mcast_stats {
+	__u64 igmp_v1queries[BR_MCAST_DIR_SIZE];
+	__u64 igmp_v2queries[BR_MCAST_DIR_SIZE];
+	__u64 igmp_v3queries[BR_MCAST_DIR_SIZE];
+	__u64 igmp_leaves[BR_MCAST_DIR_SIZE];
+	__u64 igmp_v1reports[BR_MCAST_DIR_SIZE];
+	__u64 igmp_v2reports[BR_MCAST_DIR_SIZE];
+	__u64 igmp_v3reports[BR_MCAST_DIR_SIZE];
+	__u64 igmp_parse_errors;
+
+	__u64 mld_v1queries[BR_MCAST_DIR_SIZE];
+	__u64 mld_v2queries[BR_MCAST_DIR_SIZE];
+	__u64 mld_leaves[BR_MCAST_DIR_SIZE];
+	__u64 mld_v1reports[BR_MCAST_DIR_SIZE];
+	__u64 mld_v2reports[BR_MCAST_DIR_SIZE];
+	__u64 mld_parse_errors;
+
+	__u64 mcast_bytes[BR_MCAST_DIR_SIZE];
+	__u64 mcast_packets[BR_MCAST_DIR_SIZE];
+};
 #endif /* _UAPI_LINUX_IF_BRIDGE_H */
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index bb36bd5675a7..4285ac31e865 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -273,6 +273,7 @@ enum {
 	IFLA_BR_VLAN_DEFAULT_PVID,
 	IFLA_BR_PAD,
 	IFLA_BR_VLAN_STATS_ENABLED,
+	IFLA_BR_MCAST_STATS_ENABLED,
 	__IFLA_BR_MAX,
 };
 
@@ -822,6 +823,7 @@ enum {
 	IFLA_STATS_UNSPEC, /* also used as 64bit pad attribute */
 	IFLA_STATS_LINK_64,
 	IFLA_STATS_LINK_XSTATS,
+	IFLA_STATS_LINK_XSTATS_SLAVE,
 	__IFLA_STATS_MAX,
 };
 
diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h
index 87cf351bab03..737fa32faad4 100644
--- a/include/uapi/linux/input-event-codes.h
+++ b/include/uapi/linux/input-event-codes.h
@@ -611,6 +611,37 @@
 #define KEY_KBDINPUTASSIST_ACCEPT		0x264
 #define KEY_KBDINPUTASSIST_CANCEL		0x265
 
+/* Diagonal movement keys */
+#define KEY_RIGHT_UP			0x266
+#define KEY_RIGHT_DOWN			0x267
+#define KEY_LEFT_UP			0x268
+#define KEY_LEFT_DOWN			0x269
+
+#define KEY_ROOT_MENU			0x26a /* Show Device's Root Menu */
+/* Show Top Menu of the Media (e.g. DVD) */
+#define KEY_MEDIA_TOP_MENU		0x26b
+#define KEY_NUMERIC_11			0x26c
+#define KEY_NUMERIC_12			0x26d
+/*
+ * Toggle Audio Description: refers to an audio service that helps blind and
+ * visually impaired consumers understand the action in a program. Note: in
+ * some countries this is referred to as "Video Description".
+ */
+#define KEY_AUDIO_DESC			0x26e
+#define KEY_3D_MODE			0x26f
+#define KEY_NEXT_FAVORITE		0x270
+#define KEY_STOP_RECORD			0x271
+#define KEY_PAUSE_RECORD		0x272
+#define KEY_VOD				0x273 /* Video on Demand */
+#define KEY_UNMUTE			0x274
+#define KEY_FASTREVERSE			0x275
+#define KEY_SLOWREVERSE			0x276
+/*
+ * Control a data application associated with the currently viewed channel,
+ * e.g. teletext or data broadcast application (MHEG, MHP, HbbTV, etc.)
+ */
+#define KEY_DATA			0x275
+
 #define BTN_TRIGGER_HAPPY		0x2c0
 #define BTN_TRIGGER_HAPPY1		0x2c0
 #define BTN_TRIGGER_HAPPY2		0x2c1
diff --git a/include/uapi/linux/input.h b/include/uapi/linux/input.h
index 01113841190d..c51494119817 100644
--- a/include/uapi/linux/input.h
+++ b/include/uapi/linux/input.h
@@ -247,6 +247,7 @@ struct input_mask {
 #define BUS_ATARI		0x1B
 #define BUS_SPI			0x1C
 #define BUS_RMI			0x1D
+#define BUS_CEC			0x1E
 
 /*
  * MT_TOOL types
diff --git a/include/uapi/linux/netfilter/Kbuild b/include/uapi/linux/netfilter/Kbuild
index 1d973d2ba417..cd26d7a0fd07 100644
--- a/include/uapi/linux/netfilter/Kbuild
+++ b/include/uapi/linux/netfilter/Kbuild
@@ -33,6 +33,7 @@ header-y += xt_NFLOG.h
 header-y += xt_NFQUEUE.h
 header-y += xt_RATEEST.h
 header-y += xt_SECMARK.h
+header-y += xt_SYNPROXY.h
 header-y += xt_TCPMSS.h
 header-y += xt_TCPOPTSTRIP.h
 header-y += xt_TEE.h
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 6a4dbe04f09e..01751faccaf8 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -546,6 +546,10 @@ enum nft_cmp_attributes {
 };
 #define NFTA_CMP_MAX		(__NFTA_CMP_MAX - 1)
 
+enum nft_lookup_flags {
+	NFT_LOOKUP_F_INV = (1 << 0),
+};
+
 /**
  * enum nft_lookup_attributes - nf_tables set lookup expression netlink attributes
  *
@@ -553,6 +557,7 @@ enum nft_cmp_attributes {
  * @NFTA_LOOKUP_SREG: source register of the data to look for (NLA_U32: nft_registers)
  * @NFTA_LOOKUP_DREG: destination register (NLA_U32: nft_registers)
  * @NFTA_LOOKUP_SET_ID: uniquely identifies a set in a transaction (NLA_U32)
+ * @NFTA_LOOKUP_FLAGS: flags (NLA_U32: enum nft_lookup_flags)
  */
 enum nft_lookup_attributes {
 	NFTA_LOOKUP_UNSPEC,
@@ -560,6 +565,7 @@ enum nft_lookup_attributes {
 	NFTA_LOOKUP_SREG,
 	NFTA_LOOKUP_DREG,
 	NFTA_LOOKUP_SET_ID,
+	NFTA_LOOKUP_FLAGS,
 	__NFTA_LOOKUP_MAX
 };
 #define NFTA_LOOKUP_MAX		(__NFTA_LOOKUP_MAX - 1)
diff --git a/include/uapi/linux/netfilter/xt_NFLOG.h b/include/uapi/linux/netfilter/xt_NFLOG.h
index 87b58311ce6b..f33070730fc8 100644
--- a/include/uapi/linux/netfilter/xt_NFLOG.h
+++ b/include/uapi/linux/netfilter/xt_NFLOG.h
@@ -6,9 +6,13 @@
 #define XT_NFLOG_DEFAULT_GROUP		0x1
 #define XT_NFLOG_DEFAULT_THRESHOLD	0
 
-#define XT_NFLOG_MASK			0x0
+#define XT_NFLOG_MASK			0x1
+
+/* This flag indicates that 'len' field in xt_nflog_info is set*/
+#define XT_NFLOG_F_COPY_LEN		0x1
 
 struct xt_nflog_info {
+	/* 'len' will be used iff you set XT_NFLOG_F_COPY_LEN in flags */
 	__u32	len;
 	__u16	group;
 	__u16	threshold;
diff --git a/include/uapi/linux/netfilter/xt_SYNPROXY.h b/include/uapi/linux/netfilter/xt_SYNPROXY.h
index 2d59fbaa93c6..ca67e61d2a61 100644
--- a/include/uapi/linux/netfilter/xt_SYNPROXY.h
+++ b/include/uapi/linux/netfilter/xt_SYNPROXY.h
@@ -1,6 +1,8 @@
 #ifndef _XT_SYNPROXY_H
 #define _XT_SYNPROXY_H
 
+#include <linux/types.h>
+
 #define XT_SYNPROXY_OPT_MSS		0x01
 #define XT_SYNPROXY_OPT_WSCALE		0x02
 #define XT_SYNPROXY_OPT_SACK_PERM	0x04
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 53c8278827a0..220694151434 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1829,6 +1829,44 @@ enum nl80211_commands {
  *	%NL80211_ATTR_EXT_CAPA_MASK, to specify the extended capabilities per
  *	interface type.
  *
+ * @NL80211_ATTR_MU_MIMO_GROUP_DATA: array of 24 bytes that defines a MU-MIMO
+ *	groupID for monitor mode.
+ *	The first 8 bytes are a mask that defines the membership in each
+ *	group (there are 64 groups, group 0 and 63 are reserved),
+ *	each bit represents a group and set to 1 for being a member in
+ *	that group and 0 for not being a member.
+ *	The remaining 16 bytes define the position in each group: 2 bits for
+ *	each group.
+ *	(smaller group numbers represented on most significant bits and bigger
+ *	group numbers on least significant bits.)
+ *	This attribute is used only if all interfaces are in monitor mode.
+ *	Set this attribute in order to monitor packets using the given MU-MIMO
+ *	groupID data.
+ *	to turn off that feature set all the bits of the groupID to zero.
+ * @NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR: mac address for the sniffer to follow
+ *	when using MU-MIMO air sniffer.
+ *	to turn that feature off set an invalid mac address
+ *	(e.g. FF:FF:FF:FF:FF:FF)
+ *
+ * @NL80211_ATTR_SCAN_START_TIME_TSF: The time at which the scan was actually
+ *	started (u64). The time is the TSF of the BSS the interface that
+ *	requested the scan is connected to (if available, otherwise this
+ *	attribute must not be included).
+ * @NL80211_ATTR_SCAN_START_TIME_TSF_BSSID: The BSS according to which
+ *	%NL80211_ATTR_SCAN_START_TIME_TSF is set.
+ * @NL80211_ATTR_MEASUREMENT_DURATION: measurement duration in TUs (u16). If
+ *	%NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY is not set, this is the
+ *	maximum measurement duration allowed. This attribute is used with
+ *	measurement requests. It can also be used with %NL80211_CMD_TRIGGER_SCAN
+ *	if the scan is used for beacon report radio measurement.
+ * @NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY: flag attribute that indicates
+ *	that the duration specified with %NL80211_ATTR_MEASUREMENT_DURATION is
+ *	mandatory. If this flag is not set, the duration is the maximum duration
+ *	and the actual measurement duration may be shorter.
+ *
+ * @NL80211_ATTR_MESH_PEER_AID: Association ID for the mesh peer (u16). This is
+ *	used to pull the stored data for mesh peer in power save state.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -2213,6 +2251,16 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_IFTYPE_EXT_CAPA,
 
+	NL80211_ATTR_MU_MIMO_GROUP_DATA,
+	NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR,
+
+	NL80211_ATTR_SCAN_START_TIME_TSF,
+	NL80211_ATTR_SCAN_START_TIME_TSF_BSSID,
+	NL80211_ATTR_MEASUREMENT_DURATION,
+	NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY,
+
+	NL80211_ATTR_MESH_PEER_AID,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -3474,6 +3522,12 @@ enum nl80211_bss_scan_width {
  *	was last updated by a received frame. The value is expected to be
  *	accurate to about 10ms. (u64, nanoseconds)
  * @NL80211_BSS_PAD: attribute used for padding for 64-bit alignment
+ * @NL80211_BSS_PARENT_TSF: the time at the start of reception of the first
+ *	octet of the timestamp field of the last beacon/probe received for
+ *	this BSS. The time is the TSF of the BSS specified by
+ *	@NL80211_BSS_PARENT_BSSID. (u64).
+ * @NL80211_BSS_PARENT_BSSID: the BSS according to which @NL80211_BSS_PARENT_TSF
+ *	is set.
  * @__NL80211_BSS_AFTER_LAST: internal
  * @NL80211_BSS_MAX: highest BSS attribute
  */
@@ -3495,6 +3549,8 @@ enum nl80211_bss {
 	NL80211_BSS_PRESP_DATA,
 	NL80211_BSS_LAST_SEEN_BOOTTIME,
 	NL80211_BSS_PAD,
+	NL80211_BSS_PARENT_TSF,
+	NL80211_BSS_PARENT_BSSID,
 
 	/* keep last */
 	__NL80211_BSS_AFTER_LAST,
@@ -4479,6 +4535,22 @@ enum nl80211_feature_flags {
  *	%NL80211_CMD_ASSOCIATE and %NL80211_CMD_CONNECT requests, which will set
  *	the ASSOC_REQ_USE_RRM flag in the association request even if
  *	NL80211_FEATURE_QUIET is not advertized.
+ * @NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER: This device supports MU-MIMO air
+ *	sniffer which means that it can be configured to hear packets from
+ *	certain groups which can be configured by the
+ *	%NL80211_ATTR_MU_MIMO_GROUP_DATA attribute,
+ *	or can be configured to follow a station by configuring the
+ *	%NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR attribute.
+ * @NL80211_EXT_FEATURE_SCAN_START_TIME: This driver includes the actual
+ *	time the scan started in scan results event. The time is the TSF of
+ *	the BSS that the interface that requested the scan is connected to
+ *	(if available).
+ * @NL80211_EXT_FEATURE_BSS_PARENT_TSF: Per BSS, this driver reports the
+ *	time the last beacon/probe was received. The time is the TSF of the
+ *	BSS that the interface that requested the scan is connected to
+ *	(if available).
+ * @NL80211_EXT_FEATURE_SET_SCAN_DWELL: This driver supports configuration of
+ *	channel dwell time.
  *
  * @NUM_NL80211_EXT_FEATURES: number of extended features.
  * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
@@ -4486,6 +4558,10 @@ enum nl80211_feature_flags {
 enum nl80211_ext_feature_index {
 	NL80211_EXT_FEATURE_VHT_IBSS,
 	NL80211_EXT_FEATURE_RRM,
+	NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER,
+	NL80211_EXT_FEATURE_SCAN_START_TIME,
+	NL80211_EXT_FEATURE_BSS_PARENT_TSF,
+	NL80211_EXT_FEATURE_SET_SCAN_DWELL,
 
 	/* add new features before the definition below */
 	NUM_NL80211_EXT_FEATURES,
diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index ce70fe6b45df..d304f4c9792c 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -112,6 +112,31 @@ typedef __s32 sctp_assoc_t;
 #define SCTP_SOCKOPT_CONNECTX	110		/* CONNECTX requests. */
 #define SCTP_SOCKOPT_CONNECTX3	111	/* CONNECTX requests (updated) */
 #define SCTP_GET_ASSOC_STATS	112	/* Read only */
+#define SCTP_PR_SUPPORTED	113
+#define SCTP_DEFAULT_PRINFO	114
+#define SCTP_PR_ASSOC_STATUS	115
+
+/* PR-SCTP policies */
+#define SCTP_PR_SCTP_NONE	0x0000
+#define SCTP_PR_SCTP_TTL	0x0010
+#define SCTP_PR_SCTP_RTX	0x0020
+#define SCTP_PR_SCTP_PRIO	0x0030
+#define SCTP_PR_SCTP_MAX	SCTP_PR_SCTP_PRIO
+#define SCTP_PR_SCTP_MASK	0x0030
+
+#define __SCTP_PR_INDEX(x)	((x >> 4) - 1)
+#define SCTP_PR_INDEX(x)	__SCTP_PR_INDEX(SCTP_PR_SCTP_ ## x)
+
+#define SCTP_PR_POLICY(x)	((x) & SCTP_PR_SCTP_MASK)
+#define SCTP_PR_SET_POLICY(flags, x)	\
+	do {				\
+		flags &= ~SCTP_PR_SCTP_MASK;	\
+		flags |= x;		\
+	} while (0)
+
+#define SCTP_PR_TTL_ENABLED(x)	(SCTP_PR_POLICY(x) == SCTP_PR_SCTP_TTL)
+#define SCTP_PR_RTX_ENABLED(x)	(SCTP_PR_POLICY(x) == SCTP_PR_SCTP_RTX)
+#define SCTP_PR_PRIO_ENABLED(x)	(SCTP_PR_POLICY(x) == SCTP_PR_SCTP_PRIO)
 
 /* These are bit fields for msghdr->msg_flags.  See section 5.1.  */
 /* On user space Linux, these live in <bits/socket.h> as an enum.  */
@@ -902,4 +927,21 @@ struct sctp_paddrthlds {
 	__u16 spt_pathpfthld;
 };
 
+/*
+ * Socket Option for Getting the Association/Stream-Specific PR-SCTP Status
+ */
+struct sctp_prstatus {
+	sctp_assoc_t sprstat_assoc_id;
+	__u16 sprstat_sid;
+	__u16 sprstat_policy;
+	__u64 sprstat_abandoned_unsent;
+	__u64 sprstat_abandoned_sent;
+};
+
+struct sctp_default_prinfo {
+	sctp_assoc_t pr_assoc_id;
+	__u32 pr_value;
+	__u16 pr_policy;
+};
+
 #endif /* _UAPI_SCTP_H */
diff --git a/include/uapi/linux/tc_act/tc_skbedit.h b/include/uapi/linux/tc_act/tc_skbedit.h
index fecb5cc48c40..a4d00c608d8f 100644
--- a/include/uapi/linux/tc_act/tc_skbedit.h
+++ b/include/uapi/linux/tc_act/tc_skbedit.h
@@ -27,6 +27,7 @@
 #define SKBEDIT_F_PRIORITY		0x1
 #define SKBEDIT_F_QUEUE_MAPPING		0x2
 #define SKBEDIT_F_MARK			0x4
+#define SKBEDIT_F_PTYPE			0x8
 
 struct tc_skbedit {
 	tc_gen;
@@ -40,6 +41,7 @@ enum {
 	TCA_SKBEDIT_QUEUE_MAPPING,
 	TCA_SKBEDIT_MARK,
 	TCA_SKBEDIT_PAD,
+	TCA_SKBEDIT_PTYPE,
 	__TCA_SKBEDIT_MAX
 };
 #define TCA_SKBEDIT_MAX (__TCA_SKBEDIT_MAX - 1)
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 53e8e3fe6b1b..482898fc433a 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -115,12 +115,22 @@ enum {
 #define TCP_CC_INFO		26	/* Get Congestion Control (optional) info */
 #define TCP_SAVE_SYN		27	/* Record SYN headers for new connections */
 #define TCP_SAVED_SYN		28	/* Get SYN headers recorded for connection */
+#define TCP_REPAIR_WINDOW	29	/* Get/set window parameters */
 
 struct tcp_repair_opt {
 	__u32	opt_code;
 	__u32	opt_val;
 };
 
+struct tcp_repair_window {
+	__u32	snd_wl1;
+	__u32	snd_wnd;
+	__u32	max_window;
+
+	__u32	rcv_wnd;
+	__u32	rcv_wup;
+};
+
 enum {
 	TCP_NO_QUEUE,
 	TCP_RECV_QUEUE,
diff --git a/init/main.c b/init/main.c
index 4c17fda5c2ff..eae02aa03c9e 100644
--- a/init/main.c
+++ b/init/main.c
@@ -453,7 +453,7 @@ void __init __weak smp_setup_processor_id(void)
 }
 
 # if THREAD_SIZE >= PAGE_SIZE
-void __init __weak thread_info_cache_init(void)
+void __init __weak thread_stack_cache_init(void)
 {
 }
 #endif
@@ -627,7 +627,7 @@ asmlinkage __visible void __init start_kernel(void)
 	/* Should be run before the first non-init thread is created */
 	init_espfix_bsp();
 #endif
-	thread_info_cache_init();
+	thread_stack_cache_init();
 	cred_init();
 	fork_init();
 	proc_caches_init();
@@ -708,11 +708,13 @@ static bool __init_or_module initcall_blacklisted(initcall_t fn)
 {
 	struct blacklist_entry *entry;
 	char fn_name[KSYM_SYMBOL_LEN];
+	unsigned long addr;
 
 	if (list_empty(&blacklisted_initcalls))
 		return false;
 
-	sprint_symbol_no_offset(fn_name, (unsigned long)fn);
+	addr = (unsigned long) dereference_function_descriptor(fn);
+	sprint_symbol_no_offset(fn_name, addr);
 
 	list_for_each_entry(entry, &blacklisted_initcalls, next) {
 		if (!strcmp(fn_name, entry->buf)) {
diff --git a/kernel/audit.c b/kernel/audit.c
index 22bb4f24f071..8d528f9930da 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1883,6 +1883,23 @@ out_null:
 	audit_log_format(ab, " exe=(null)");
 }
 
+struct tty_struct *audit_get_tty(struct task_struct *tsk)
+{
+	struct tty_struct *tty = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&tsk->sighand->siglock, flags);
+	if (tsk->signal)
+		tty = tty_kref_get(tsk->signal->tty);
+	spin_unlock_irqrestore(&tsk->sighand->siglock, flags);
+	return tty;
+}
+
+void audit_put_tty(struct tty_struct *tty)
+{
+	tty_kref_put(tty);
+}
+
 void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk)
 {
 	const struct cred *cred;
diff --git a/kernel/audit.h b/kernel/audit.h
index cbbe6bb6496e..a492f4c4e710 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -23,6 +23,7 @@
 #include <linux/audit.h>
 #include <linux/skbuff.h>
 #include <uapi/linux/mqueue.h>
+#include <linux/tty.h>
 
 /* AUDIT_NAMES is the number of slots we reserve in the audit_context
  * for saving names from getname().  If we get more names we will allocate
@@ -262,6 +263,9 @@ extern struct audit_entry *audit_dupe_rule(struct audit_krule *old);
 extern void audit_log_d_path_exe(struct audit_buffer *ab,
 				 struct mm_struct *mm);
 
+extern struct tty_struct *audit_get_tty(struct task_struct *tsk);
+extern void audit_put_tty(struct tty_struct *tty);
+
 /* audit watch functions */
 #ifdef CONFIG_AUDIT_WATCH
 extern void audit_put_watch(struct audit_watch *watch);
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 62ab53d7619c..2672d105cffc 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -63,7 +63,6 @@
 #include <asm/unistd.h>
 #include <linux/security.h>
 #include <linux/list.h>
-#include <linux/tty.h>
 #include <linux/binfmts.h>
 #include <linux/highmem.h>
 #include <linux/syscalls.h>
@@ -1985,14 +1984,15 @@ static void audit_log_set_loginuid(kuid_t koldloginuid, kuid_t kloginuid,
 	if (!audit_enabled)
 		return;
 
+	ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN);
+	if (!ab)
+		return;
+
 	uid = from_kuid(&init_user_ns, task_uid(current));
 	oldloginuid = from_kuid(&init_user_ns, koldloginuid);
 	loginuid = from_kuid(&init_user_ns, kloginuid),
 	tty = audit_get_tty(current);
 
-	ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN);
-	if (!ab)
-		return;
 	audit_log_format(ab, "pid=%d uid=%u", task_pid_nr(current), uid);
 	audit_log_task_context(ab);
 	audit_log_format(ab, " old-auid=%u auid=%u tty=%s old-ses=%u ses=%u res=%d",
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 5af30732697b..db1a743e3db2 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -390,9 +390,7 @@ static void *prog_fd_array_get_ptr(struct bpf_map *map,
 
 static void prog_fd_array_put_ptr(void *ptr)
 {
-	struct bpf_prog *prog = ptr;
-
-	bpf_prog_put_rcu(prog);
+	bpf_prog_put(ptr);
 }
 
 /* decrement refcnt of all bpf_progs that are stored in this map */
@@ -539,3 +537,46 @@ static int __init register_perf_event_array_map(void)
 	return 0;
 }
 late_initcall(register_perf_event_array_map);
+
+#ifdef CONFIG_SOCK_CGROUP_DATA
+static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
+				     struct file *map_file /* not used */,
+				     int fd)
+{
+	return cgroup_get_from_fd(fd);
+}
+
+static void cgroup_fd_array_put_ptr(void *ptr)
+{
+	/* cgroup_put free cgrp after a rcu grace period */
+	cgroup_put(ptr);
+}
+
+static void cgroup_fd_array_free(struct bpf_map *map)
+{
+	bpf_fd_array_map_clear(map);
+	fd_array_map_free(map);
+}
+
+static const struct bpf_map_ops cgroup_array_ops = {
+	.map_alloc = fd_array_map_alloc,
+	.map_free = cgroup_fd_array_free,
+	.map_get_next_key = array_map_get_next_key,
+	.map_lookup_elem = fd_array_map_lookup_elem,
+	.map_delete_elem = fd_array_map_delete_elem,
+	.map_fd_get_ptr = cgroup_fd_array_get_ptr,
+	.map_fd_put_ptr = cgroup_fd_array_put_ptr,
+};
+
+static struct bpf_map_type_list cgroup_array_type __read_mostly = {
+	.ops = &cgroup_array_ops,
+	.type = BPF_MAP_TYPE_CGROUP_ARRAY,
+};
+
+static int __init register_cgroup_array_map(void)
+{
+	bpf_register_map_type(&cgroup_array_type);
+	return 0;
+}
+late_initcall(register_cgroup_array_map);
+#endif
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index b94a36550591..d638062f66d6 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -719,14 +719,13 @@ select_insn:
 
 		if (unlikely(index >= array->map.max_entries))
 			goto out;
-
 		if (unlikely(tail_call_cnt > MAX_TAIL_CALL_CNT))
 			goto out;
 
 		tail_call_cnt++;
 
 		prog = READ_ONCE(array->ptrs[index]);
-		if (unlikely(!prog))
+		if (!prog)
 			goto out;
 
 		/* ARG1 at this point is guaranteed to point to CTX from
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index ad7a0573f71b..1ea3afba1a4f 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -101,7 +101,7 @@ const struct bpf_func_proto bpf_get_prandom_u32_proto = {
 
 static u64 bpf_get_smp_processor_id(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 {
-	return raw_smp_processor_id();
+	return smp_processor_id();
 }
 
 const struct bpf_func_proto bpf_get_smp_processor_id_proto = {
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 318858edb1cd..5967b870a895 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -11,7 +11,7 @@
  * version 2 as published by the Free Software Foundation.
  */
 
-#include <linux/module.h>
+#include <linux/init.h>
 #include <linux/magic.h>
 #include <linux/major.h>
 #include <linux/mount.h>
@@ -367,8 +367,6 @@ static struct file_system_type bpf_fs_type = {
 	.kill_sb	= kill_litter_super,
 };
 
-MODULE_ALIAS_FS("bpf");
-
 static int __init bpf_init(void)
 {
 	int ret;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index c23a4e9311b3..96d938a22050 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -393,7 +393,8 @@ static int map_update_elem(union bpf_attr *attr)
 	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
 		err = bpf_percpu_array_update(map, key, value, attr->flags);
 	} else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY ||
-		   map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
+		   map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
+		   map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY) {
 		rcu_read_lock();
 		err = bpf_fd_array_map_update_elem(map, f.file, key, value,
 						   attr->flags);
@@ -623,7 +624,7 @@ static void bpf_prog_uncharge_memlock(struct bpf_prog *prog)
 	free_uid(user);
 }
 
-static void __prog_put_common(struct rcu_head *rcu)
+static void __bpf_prog_put_rcu(struct rcu_head *rcu)
 {
 	struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
 
@@ -632,17 +633,10 @@ static void __prog_put_common(struct rcu_head *rcu)
 	bpf_prog_free(aux->prog);
 }
 
-/* version of bpf_prog_put() that is called after a grace period */
-void bpf_prog_put_rcu(struct bpf_prog *prog)
-{
-	if (atomic_dec_and_test(&prog->aux->refcnt))
-		call_rcu(&prog->aux->rcu, __prog_put_common);
-}
-
 void bpf_prog_put(struct bpf_prog *prog)
 {
 	if (atomic_dec_and_test(&prog->aux->refcnt))
-		__prog_put_common(&prog->aux->rcu);
+		call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
 }
 EXPORT_SYMBOL_GPL(bpf_prog_put);
 
@@ -650,7 +644,7 @@ static int bpf_prog_release(struct inode *inode, struct file *filp)
 {
 	struct bpf_prog *prog = filp->private_data;
 
-	bpf_prog_put_rcu(prog);
+	bpf_prog_put(prog);
 	return 0;
 }
 
@@ -664,7 +658,7 @@ int bpf_prog_new_fd(struct bpf_prog *prog)
 				O_RDWR | O_CLOEXEC);
 }
 
-static struct bpf_prog *__bpf_prog_get(struct fd f)
+static struct bpf_prog *____bpf_prog_get(struct fd f)
 {
 	if (!f.file)
 		return ERR_PTR(-EBADF);
@@ -685,24 +679,35 @@ struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
 	return prog;
 }
 
-/* called by sockets/tracing/seccomp before attaching program to an event
- * pairs with bpf_prog_put()
- */
-struct bpf_prog *bpf_prog_get(u32 ufd)
+static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type)
 {
 	struct fd f = fdget(ufd);
 	struct bpf_prog *prog;
 
-	prog = __bpf_prog_get(f);
+	prog = ____bpf_prog_get(f);
 	if (IS_ERR(prog))
 		return prog;
+	if (type && prog->type != *type) {
+		prog = ERR_PTR(-EINVAL);
+		goto out;
+	}
 
 	prog = bpf_prog_inc(prog);
+out:
 	fdput(f);
-
 	return prog;
 }
-EXPORT_SYMBOL_GPL(bpf_prog_get);
+
+struct bpf_prog *bpf_prog_get(u32 ufd)
+{
+	return __bpf_prog_get(ufd, NULL);
+}
+
+struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type)
+{
+	return __bpf_prog_get(ufd, &type);
+}
+EXPORT_SYMBOL_GPL(bpf_prog_get_type);
 
 /* last field in 'union bpf_attr' used by this command */
 #define	BPF_PROG_LOAD_LAST_FIELD kern_version
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 668e07903c8f..e206c2181412 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -126,31 +126,6 @@
  * are set to NOT_INIT to indicate that they are no longer readable.
  */
 
-/* types of values stored in eBPF registers */
-enum bpf_reg_type {
-	NOT_INIT = 0,		 /* nothing was written into register */
-	UNKNOWN_VALUE,		 /* reg doesn't contain a valid pointer */
-	PTR_TO_CTX,		 /* reg points to bpf_context */
-	CONST_PTR_TO_MAP,	 /* reg points to struct bpf_map */
-	PTR_TO_MAP_VALUE,	 /* reg points to map element value */
-	PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */
-	FRAME_PTR,		 /* reg == frame_pointer */
-	PTR_TO_STACK,		 /* reg == frame_pointer + imm */
-	CONST_IMM,		 /* constant integer value */
-
-	/* PTR_TO_PACKET represents:
-	 * skb->data
-	 * skb->data + imm
-	 * skb->data + (u16) var
-	 * skb->data + (u16) var + imm
-	 * if (range > 0) then [ptr, ptr + range - off) is safe to access
-	 * if (id > 0) means that some 'var' was added
-	 * if (off > 0) menas that 'imm' was added
-	 */
-	PTR_TO_PACKET,
-	PTR_TO_PACKET_END,	 /* skb->data + headlen */
-};
-
 struct reg_state {
 	enum bpf_reg_type type;
 	union {
@@ -695,10 +670,10 @@ static int check_packet_access(struct verifier_env *env, u32 regno, int off,
 
 /* check access to 'struct bpf_context' fields */
 static int check_ctx_access(struct verifier_env *env, int off, int size,
-			    enum bpf_access_type t)
+			    enum bpf_access_type t, enum bpf_reg_type *reg_type)
 {
 	if (env->prog->aux->ops->is_valid_access &&
-	    env->prog->aux->ops->is_valid_access(off, size, t)) {
+	    env->prog->aux->ops->is_valid_access(off, size, t, reg_type)) {
 		/* remember the offset of last byte accessed in ctx */
 		if (env->prog->aux->max_ctx_offset < off + size)
 			env->prog->aux->max_ctx_offset = off + size;
@@ -798,21 +773,19 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off,
 			mark_reg_unknown_value(state->regs, value_regno);
 
 	} else if (reg->type == PTR_TO_CTX) {
+		enum bpf_reg_type reg_type = UNKNOWN_VALUE;
+
 		if (t == BPF_WRITE && value_regno >= 0 &&
 		    is_pointer_value(env, value_regno)) {
 			verbose("R%d leaks addr into ctx\n", value_regno);
 			return -EACCES;
 		}
-		err = check_ctx_access(env, off, size, t);
+		err = check_ctx_access(env, off, size, t, &reg_type);
 		if (!err && t == BPF_READ && value_regno >= 0) {
 			mark_reg_unknown_value(state->regs, value_regno);
-			if (off == offsetof(struct __sk_buff, data) &&
-			    env->allow_ptr_leaks)
+			if (env->allow_ptr_leaks)
 				/* note that reg.[id|off|range] == 0 */
-				state->regs[value_regno].type = PTR_TO_PACKET;
-			else if (off == offsetof(struct __sk_buff, data_end) &&
-				 env->allow_ptr_leaks)
-				state->regs[value_regno].type = PTR_TO_PACKET_END;
+				state->regs[value_regno].type = reg_type;
 		}
 
 	} else if (reg->type == FRAME_PTR || reg->type == PTR_TO_STACK) {
@@ -1062,6 +1035,10 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
 		if (func_id != BPF_FUNC_get_stackid)
 			goto error;
 		break;
+	case BPF_MAP_TYPE_CGROUP_ARRAY:
+		if (func_id != BPF_FUNC_skb_in_cgroup)
+			goto error;
+		break;
 	default:
 		break;
 	}
@@ -1081,6 +1058,10 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
 		if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
 			goto error;
 		break;
+	case BPF_FUNC_skb_in_cgroup:
+		if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
+			goto error;
+		break;
 	default:
 		break;
 	}
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 86cb5c6e8932..50787cd61da2 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -62,6 +62,7 @@
 #include <linux/proc_ns.h>
 #include <linux/nsproxy.h>
 #include <linux/proc_ns.h>
+#include <linux/file.h>
 #include <net/sock.h>
 
 /*
@@ -837,6 +838,8 @@ static void put_css_set_locked(struct css_set *cset)
 
 static void put_css_set(struct css_set *cset)
 {
+	unsigned long flags;
+
 	/*
 	 * Ensure that the refcount doesn't hit zero while any readers
 	 * can see it. Similar to atomic_dec_and_lock(), but for an
@@ -845,9 +848,9 @@ static void put_css_set(struct css_set *cset)
 	if (atomic_add_unless(&cset->refcount, -1, 1))
 		return;
 
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irqsave(&css_set_lock, flags);
 	put_css_set_locked(cset);
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irqrestore(&css_set_lock, flags);
 }
 
 /*
@@ -1070,11 +1073,11 @@ static struct css_set *find_css_set(struct css_set *old_cset,
 
 	/* First see if we already have a cgroup group that matches
 	 * the desired set */
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	cset = find_existing_css_set(old_cset, cgrp, template);
 	if (cset)
 		get_css_set(cset);
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 
 	if (cset)
 		return cset;
@@ -1102,7 +1105,7 @@ static struct css_set *find_css_set(struct css_set *old_cset,
 	 * find_existing_css_set() */
 	memcpy(cset->subsys, template, sizeof(cset->subsys));
 
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	/* Add reference counts and links from the new css_set. */
 	list_for_each_entry(link, &old_cset->cgrp_links, cgrp_link) {
 		struct cgroup *c = link->cgrp;
@@ -1128,7 +1131,7 @@ static struct css_set *find_css_set(struct css_set *old_cset,
 		css_get(css);
 	}
 
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 
 	return cset;
 }
@@ -1192,7 +1195,7 @@ static void cgroup_destroy_root(struct cgroup_root *root)
 	 * Release all the links from cset_links to this hierarchy's
 	 * root cgroup
 	 */
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 
 	list_for_each_entry_safe(link, tmp_link, &cgrp->cset_links, cset_link) {
 		list_del(&link->cset_link);
@@ -1200,7 +1203,7 @@ static void cgroup_destroy_root(struct cgroup_root *root)
 		kfree(link);
 	}
 
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 
 	if (!list_empty(&root->root_list)) {
 		list_del(&root->root_list);
@@ -1600,11 +1603,11 @@ static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
 		ss->root = dst_root;
 		css->cgroup = dcgrp;
 
-		spin_lock_bh(&css_set_lock);
+		spin_lock_irq(&css_set_lock);
 		hash_for_each(css_set_table, i, cset, hlist)
 			list_move_tail(&cset->e_cset_node[ss->id],
 				       &dcgrp->e_csets[ss->id]);
-		spin_unlock_bh(&css_set_lock);
+		spin_unlock_irq(&css_set_lock);
 
 		/* default hierarchy doesn't enable controllers by default */
 		dst_root->subsys_mask |= 1 << ssid;
@@ -1640,10 +1643,10 @@ static int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
 	if (!buf)
 		return -ENOMEM;
 
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	ns_cgroup = current_cgns_cgroup_from_root(kf_cgroot);
 	len = kernfs_path_from_node(kf_node, ns_cgroup->kn, buf, PATH_MAX);
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 
 	if (len >= PATH_MAX)
 		len = -ERANGE;
@@ -1897,7 +1900,7 @@ static void cgroup_enable_task_cg_lists(void)
 {
 	struct task_struct *p, *g;
 
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 
 	if (use_task_css_set_links)
 		goto out_unlock;
@@ -1922,8 +1925,12 @@ static void cgroup_enable_task_cg_lists(void)
 		 * entry won't be deleted though the process has exited.
 		 * Do it while holding siglock so that we don't end up
 		 * racing against cgroup_exit().
+		 *
+		 * Interrupts were already disabled while acquiring
+		 * the css_set_lock, so we do not need to disable it
+		 * again when acquiring the sighand->siglock here.
 		 */
-		spin_lock_irq(&p->sighand->siglock);
+		spin_lock(&p->sighand->siglock);
 		if (!(p->flags & PF_EXITING)) {
 			struct css_set *cset = task_css_set(p);
 
@@ -1932,11 +1939,11 @@ static void cgroup_enable_task_cg_lists(void)
 			list_add_tail(&p->cg_list, &cset->tasks);
 			get_css_set(cset);
 		}
-		spin_unlock_irq(&p->sighand->siglock);
+		spin_unlock(&p->sighand->siglock);
 	} while_each_thread(g, p);
 	read_unlock(&tasklist_lock);
 out_unlock:
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 }
 
 static void init_cgroup_housekeeping(struct cgroup *cgrp)
@@ -2043,13 +2050,13 @@ static int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
 	 * Link the root cgroup in this hierarchy into all the css_set
 	 * objects.
 	 */
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	hash_for_each(css_set_table, i, cset, hlist) {
 		link_css_set(&tmp_links, cset, root_cgrp);
 		if (css_set_populated(cset))
 			cgroup_update_populated(root_cgrp, true);
 	}
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 
 	BUG_ON(!list_empty(&root_cgrp->self.children));
 	BUG_ON(atomic_read(&root->nr_cgrps) != 1);
@@ -2256,11 +2263,11 @@ out_mount:
 		struct cgroup *cgrp;
 
 		mutex_lock(&cgroup_mutex);
-		spin_lock_bh(&css_set_lock);
+		spin_lock_irq(&css_set_lock);
 
 		cgrp = cset_cgroup_from_root(ns->root_cset, root);
 
-		spin_unlock_bh(&css_set_lock);
+		spin_unlock_irq(&css_set_lock);
 		mutex_unlock(&cgroup_mutex);
 
 		nsdentry = kernfs_node_dentry(cgrp->kn, dentry->d_sb);
@@ -2337,11 +2344,11 @@ char *cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
 	char *ret;
 
 	mutex_lock(&cgroup_mutex);
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 
 	ret = cgroup_path_ns_locked(cgrp, buf, buflen, ns);
 
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 	mutex_unlock(&cgroup_mutex);
 
 	return ret;
@@ -2369,7 +2376,7 @@ char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
 	char *path = NULL;
 
 	mutex_lock(&cgroup_mutex);
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 
 	root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id);
 
@@ -2382,7 +2389,7 @@ char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
 			path = buf;
 	}
 
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 	mutex_unlock(&cgroup_mutex);
 	return path;
 }
@@ -2557,7 +2564,7 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset,
 	 * the new cgroup.  There are no failure cases after here, so this
 	 * is the commit point.
 	 */
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	list_for_each_entry(cset, &tset->src_csets, mg_node) {
 		list_for_each_entry_safe(task, tmp_task, &cset->mg_tasks, cg_list) {
 			struct css_set *from_cset = task_css_set(task);
@@ -2568,7 +2575,7 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset,
 			put_css_set_locked(from_cset);
 		}
 	}
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 
 	/*
 	 * Migration is committed, all target tasks are now on dst_csets.
@@ -2597,13 +2604,13 @@ out_cancel_attach:
 		}
 	} while_each_subsys_mask();
 out_release_tset:
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	list_splice_init(&tset->dst_csets, &tset->src_csets);
 	list_for_each_entry_safe(cset, tmp_cset, &tset->src_csets, mg_node) {
 		list_splice_tail_init(&cset->mg_tasks, &cset->tasks);
 		list_del_init(&cset->mg_node);
 	}
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 	return ret;
 }
 
@@ -2634,7 +2641,7 @@ static void cgroup_migrate_finish(struct list_head *preloaded_csets)
 
 	lockdep_assert_held(&cgroup_mutex);
 
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	list_for_each_entry_safe(cset, tmp_cset, preloaded_csets, mg_preload_node) {
 		cset->mg_src_cgrp = NULL;
 		cset->mg_dst_cgrp = NULL;
@@ -2642,7 +2649,7 @@ static void cgroup_migrate_finish(struct list_head *preloaded_csets)
 		list_del_init(&cset->mg_preload_node);
 		put_css_set_locked(cset);
 	}
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 }
 
 /**
@@ -2783,7 +2790,7 @@ static int cgroup_migrate(struct task_struct *leader, bool threadgroup,
 	 * already PF_EXITING could be freed from underneath us unless we
 	 * take an rcu_read_lock.
 	 */
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	rcu_read_lock();
 	task = leader;
 	do {
@@ -2792,7 +2799,7 @@ static int cgroup_migrate(struct task_struct *leader, bool threadgroup,
 			break;
 	} while_each_thread(leader, task);
 	rcu_read_unlock();
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 
 	return cgroup_taskset_migrate(&tset, root);
 }
@@ -2816,7 +2823,7 @@ static int cgroup_attach_task(struct cgroup *dst_cgrp,
 		return -EBUSY;
 
 	/* look up all src csets */
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	rcu_read_lock();
 	task = leader;
 	do {
@@ -2826,7 +2833,7 @@ static int cgroup_attach_task(struct cgroup *dst_cgrp,
 			break;
 	} while_each_thread(leader, task);
 	rcu_read_unlock();
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 
 	/* prepare dst csets and commit */
 	ret = cgroup_migrate_prepare_dst(&preloaded_csets);
@@ -2859,9 +2866,9 @@ static int cgroup_procs_write_permission(struct task_struct *task,
 		struct cgroup *cgrp;
 		struct inode *inode;
 
-		spin_lock_bh(&css_set_lock);
+		spin_lock_irq(&css_set_lock);
 		cgrp = task_cgroup_from_root(task, &cgrp_dfl_root);
-		spin_unlock_bh(&css_set_lock);
+		spin_unlock_irq(&css_set_lock);
 
 		while (!cgroup_is_descendant(dst_cgrp, cgrp))
 			cgrp = cgroup_parent(cgrp);
@@ -2962,9 +2969,9 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
 		if (root == &cgrp_dfl_root)
 			continue;
 
-		spin_lock_bh(&css_set_lock);
+		spin_lock_irq(&css_set_lock);
 		from_cgrp = task_cgroup_from_root(from, root);
-		spin_unlock_bh(&css_set_lock);
+		spin_unlock_irq(&css_set_lock);
 
 		retval = cgroup_attach_task(from_cgrp, tsk, false);
 		if (retval)
@@ -3080,7 +3087,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
 	percpu_down_write(&cgroup_threadgroup_rwsem);
 
 	/* look up all csses currently attached to @cgrp's subtree */
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
 		struct cgrp_cset_link *link;
 
@@ -3088,14 +3095,14 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
 			cgroup_migrate_add_src(link->cset, dsct,
 					       &preloaded_csets);
 	}
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 
 	/* NULL dst indicates self on default hierarchy */
 	ret = cgroup_migrate_prepare_dst(&preloaded_csets);
 	if (ret)
 		goto out_finish;
 
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	list_for_each_entry(src_cset, &preloaded_csets, mg_preload_node) {
 		struct task_struct *task, *ntask;
 
@@ -3107,7 +3114,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
 		list_for_each_entry_safe(task, ntask, &src_cset->tasks, cg_list)
 			cgroup_taskset_add(task, &tset);
 	}
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 
 	ret = cgroup_taskset_migrate(&tset, cgrp->root);
 out_finish:
@@ -3908,10 +3915,10 @@ static int cgroup_task_count(const struct cgroup *cgrp)
 	int count = 0;
 	struct cgrp_cset_link *link;
 
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	list_for_each_entry(link, &cgrp->cset_links, cset_link)
 		count += atomic_read(&link->cset->refcount);
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 	return count;
 }
 
@@ -4249,7 +4256,7 @@ void css_task_iter_start(struct cgroup_subsys_state *css,
 
 	memset(it, 0, sizeof(*it));
 
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 
 	it->ss = css->ss;
 
@@ -4262,7 +4269,7 @@ void css_task_iter_start(struct cgroup_subsys_state *css,
 
 	css_task_iter_advance_css_set(it);
 
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 }
 
 /**
@@ -4280,7 +4287,7 @@ struct task_struct *css_task_iter_next(struct css_task_iter *it)
 		it->cur_task = NULL;
 	}
 
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 
 	if (it->task_pos) {
 		it->cur_task = list_entry(it->task_pos, struct task_struct,
@@ -4289,7 +4296,7 @@ struct task_struct *css_task_iter_next(struct css_task_iter *it)
 		css_task_iter_advance(it);
 	}
 
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 
 	return it->cur_task;
 }
@@ -4303,10 +4310,10 @@ struct task_struct *css_task_iter_next(struct css_task_iter *it)
 void css_task_iter_end(struct css_task_iter *it)
 {
 	if (it->cur_cset) {
-		spin_lock_bh(&css_set_lock);
+		spin_lock_irq(&css_set_lock);
 		list_del(&it->iters_node);
 		put_css_set_locked(it->cur_cset);
-		spin_unlock_bh(&css_set_lock);
+		spin_unlock_irq(&css_set_lock);
 	}
 
 	if (it->cur_task)
@@ -4338,10 +4345,10 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
 	mutex_lock(&cgroup_mutex);
 
 	/* all tasks in @from are being moved, all csets are source */
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	list_for_each_entry(link, &from->cset_links, cset_link)
 		cgroup_migrate_add_src(link->cset, to, &preloaded_csets);
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 
 	ret = cgroup_migrate_prepare_dst(&preloaded_csets);
 	if (ret)
@@ -5063,6 +5070,7 @@ static void init_and_link_css(struct cgroup_subsys_state *css,
 	memset(css, 0, sizeof(*css));
 	css->cgroup = cgrp;
 	css->ss = ss;
+	css->id = -1;
 	INIT_LIST_HEAD(&css->sibling);
 	INIT_LIST_HEAD(&css->children);
 	css->serial_nr = css_serial_nr_next++;
@@ -5150,7 +5158,7 @@ static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
 
 	err = cgroup_idr_alloc(&ss->css_idr, NULL, 2, 0, GFP_KERNEL);
 	if (err < 0)
-		goto err_free_percpu_ref;
+		goto err_free_css;
 	css->id = err;
 
 	/* @css is ready to be brought online now, make it visible */
@@ -5174,9 +5182,6 @@ static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
 
 err_list_del:
 	list_del_rcu(&css->sibling);
-	cgroup_idr_remove(&ss->css_idr, css->id);
-err_free_percpu_ref:
-	percpu_ref_exit(&css->refcnt);
 err_free_css:
 	call_rcu(&css->rcu_head, css_free_rcu_fn);
 	return ERR_PTR(err);
@@ -5451,10 +5456,10 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 	 */
 	cgrp->self.flags &= ~CSS_ONLINE;
 
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	list_for_each_entry(link, &cgrp->cset_links, cset_link)
 		link->cset->dead = true;
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 
 	/* initiate massacre of all css's */
 	for_each_css(css, ssid, cgrp)
@@ -5725,7 +5730,7 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
 		goto out;
 
 	mutex_lock(&cgroup_mutex);
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 
 	for_each_root(root) {
 		struct cgroup_subsys *ss;
@@ -5778,7 +5783,7 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
 
 	retval = 0;
 out_unlock:
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 	mutex_unlock(&cgroup_mutex);
 	kfree(buf);
 out:
@@ -5923,13 +5928,13 @@ void cgroup_post_fork(struct task_struct *child)
 	if (use_task_css_set_links) {
 		struct css_set *cset;
 
-		spin_lock_bh(&css_set_lock);
+		spin_lock_irq(&css_set_lock);
 		cset = task_css_set(current);
 		if (list_empty(&child->cg_list)) {
 			get_css_set(cset);
 			css_set_move_task(child, NULL, cset, false);
 		}
-		spin_unlock_bh(&css_set_lock);
+		spin_unlock_irq(&css_set_lock);
 	}
 
 	/*
@@ -5974,9 +5979,9 @@ void cgroup_exit(struct task_struct *tsk)
 	cset = task_css_set(tsk);
 
 	if (!list_empty(&tsk->cg_list)) {
-		spin_lock_bh(&css_set_lock);
+		spin_lock_irq(&css_set_lock);
 		css_set_move_task(tsk, cset, NULL, false);
-		spin_unlock_bh(&css_set_lock);
+		spin_unlock_irq(&css_set_lock);
 	} else {
 		get_css_set(cset);
 	}
@@ -6044,9 +6049,9 @@ static void cgroup_release_agent(struct work_struct *work)
 	if (!pathbuf || !agentbuf)
 		goto out;
 
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	path = cgroup_path_ns_locked(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns);
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 	if (!path)
 		goto out;
 
@@ -6205,6 +6210,40 @@ struct cgroup *cgroup_get_from_path(const char *path)
 }
 EXPORT_SYMBOL_GPL(cgroup_get_from_path);
 
+/**
+ * cgroup_get_from_fd - get a cgroup pointer from a fd
+ * @fd: fd obtained by open(cgroup2_dir)
+ *
+ * Find the cgroup from a fd which should be obtained
+ * by opening a cgroup directory.  Returns a pointer to the
+ * cgroup on success. ERR_PTR is returned if the cgroup
+ * cannot be found.
+ */
+struct cgroup *cgroup_get_from_fd(int fd)
+{
+	struct cgroup_subsys_state *css;
+	struct cgroup *cgrp;
+	struct file *f;
+
+	f = fget_raw(fd);
+	if (!f)
+		return ERR_PTR(-EBADF);
+
+	css = css_tryget_online_from_dir(f->f_path.dentry, NULL);
+	fput(f);
+	if (IS_ERR(css))
+		return ERR_CAST(css);
+
+	cgrp = css->cgroup;
+	if (!cgroup_on_dfl(cgrp)) {
+		cgroup_put(cgrp);
+		return ERR_PTR(-EBADF);
+	}
+
+	return cgrp;
+}
+EXPORT_SYMBOL_GPL(cgroup_get_from_fd);
+
 /*
  * sock->sk_cgrp_data handling.  For more info, see sock_cgroup_data
  * definition in cgroup-defs.h.
@@ -6306,12 +6345,12 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
 		return ERR_PTR(-EPERM);
 
 	mutex_lock(&cgroup_mutex);
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 
 	cset = task_css_set(current);
 	get_css_set(cset);
 
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 	mutex_unlock(&cgroup_mutex);
 
 	new_ns = alloc_cgroup_ns();
@@ -6435,7 +6474,7 @@ static int current_css_set_cg_links_read(struct seq_file *seq, void *v)
 	if (!name_buf)
 		return -ENOMEM;
 
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	rcu_read_lock();
 	cset = rcu_dereference(current->cgroups);
 	list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
@@ -6446,7 +6485,7 @@ static int current_css_set_cg_links_read(struct seq_file *seq, void *v)
 			   c->root->hierarchy_id, name_buf);
 	}
 	rcu_read_unlock();
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 	kfree(name_buf);
 	return 0;
 }
@@ -6457,7 +6496,7 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v)
 	struct cgroup_subsys_state *css = seq_css(seq);
 	struct cgrp_cset_link *link;
 
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	list_for_each_entry(link, &css->cgroup->cset_links, cset_link) {
 		struct css_set *cset = link->cset;
 		struct task_struct *task;
@@ -6480,7 +6519,7 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v)
 	overflow:
 		seq_puts(seq, "  ...\n");
 	}
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 	return 0;
 }
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 274450efea90..9c51ec3f0f44 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3862,10 +3862,8 @@ static void _free_event(struct perf_event *event)
 	if (event->ctx)
 		put_ctx(event->ctx);
 
-	if (event->pmu) {
-		exclusive_event_destroy(event);
-		module_put(event->pmu->module);
-	}
+	exclusive_event_destroy(event);
+	module_put(event->pmu->module);
 
 	call_rcu(&event->rcu_head, free_event_rcu);
 }
diff --git a/kernel/fork.c b/kernel/fork.c
index 5c2c355aa97f..4a7ec0c6c88c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -148,18 +148,18 @@ static inline void free_task_struct(struct task_struct *tsk)
 }
 #endif
 
-void __weak arch_release_thread_info(struct thread_info *ti)
+void __weak arch_release_thread_stack(unsigned long *stack)
 {
 }
 
-#ifndef CONFIG_ARCH_THREAD_INFO_ALLOCATOR
+#ifndef CONFIG_ARCH_THREAD_STACK_ALLOCATOR
 
 /*
  * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a
  * kmemcache based allocator.
  */
 # if THREAD_SIZE >= PAGE_SIZE
-static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
+static unsigned long *alloc_thread_stack_node(struct task_struct *tsk,
 						  int node)
 {
 	struct page *page = alloc_kmem_pages_node(node, THREADINFO_GFP,
@@ -172,33 +172,33 @@ static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
 	return page ? page_address(page) : NULL;
 }
 
-static inline void free_thread_info(struct thread_info *ti)
+static inline void free_thread_stack(unsigned long *stack)
 {
-	struct page *page = virt_to_page(ti);
+	struct page *page = virt_to_page(stack);
 
 	memcg_kmem_update_page_stat(page, MEMCG_KERNEL_STACK,
 				    -(1 << THREAD_SIZE_ORDER));
 	__free_kmem_pages(page, THREAD_SIZE_ORDER);
 }
 # else
-static struct kmem_cache *thread_info_cache;
+static struct kmem_cache *thread_stack_cache;
 
-static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
+static unsigned long *alloc_thread_stack_node(struct task_struct *tsk,
 						  int node)
 {
-	return kmem_cache_alloc_node(thread_info_cache, THREADINFO_GFP, node);
+	return kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node);
 }
 
-static void free_thread_info(struct thread_info *ti)
+static void free_thread_stack(unsigned long *stack)
 {
-	kmem_cache_free(thread_info_cache, ti);
+	kmem_cache_free(thread_stack_cache, stack);
 }
 
-void thread_info_cache_init(void)
+void thread_stack_cache_init(void)
 {
-	thread_info_cache = kmem_cache_create("thread_info", THREAD_SIZE,
+	thread_stack_cache = kmem_cache_create("thread_stack", THREAD_SIZE,
 					      THREAD_SIZE, 0, NULL);
-	BUG_ON(thread_info_cache == NULL);
+	BUG_ON(thread_stack_cache == NULL);
 }
 # endif
 #endif
@@ -221,9 +221,9 @@ struct kmem_cache *vm_area_cachep;
 /* SLAB cache for mm_struct structures (tsk->mm) */
 static struct kmem_cache *mm_cachep;
 
-static void account_kernel_stack(struct thread_info *ti, int account)
+static void account_kernel_stack(unsigned long *stack, int account)
 {
-	struct zone *zone = page_zone(virt_to_page(ti));
+	struct zone *zone = page_zone(virt_to_page(stack));
 
 	mod_zone_page_state(zone, NR_KERNEL_STACK, account);
 }
@@ -231,8 +231,8 @@ static void account_kernel_stack(struct thread_info *ti, int account)
 void free_task(struct task_struct *tsk)
 {
 	account_kernel_stack(tsk->stack, -1);
-	arch_release_thread_info(tsk->stack);
-	free_thread_info(tsk->stack);
+	arch_release_thread_stack(tsk->stack);
+	free_thread_stack(tsk->stack);
 	rt_mutex_debug_task_free(tsk);
 	ftrace_graph_exit_task(tsk);
 	put_seccomp_filter(tsk);
@@ -343,7 +343,7 @@ void set_task_stack_end_magic(struct task_struct *tsk)
 static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 {
 	struct task_struct *tsk;
-	struct thread_info *ti;
+	unsigned long *stack;
 	int err;
 
 	if (node == NUMA_NO_NODE)
@@ -352,15 +352,15 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 	if (!tsk)
 		return NULL;
 
-	ti = alloc_thread_info_node(tsk, node);
-	if (!ti)
+	stack = alloc_thread_stack_node(tsk, node);
+	if (!stack)
 		goto free_tsk;
 
 	err = arch_dup_task_struct(tsk, orig);
 	if (err)
-		goto free_ti;
+		goto free_stack;
 
-	tsk->stack = ti;
+	tsk->stack = stack;
 #ifdef CONFIG_SECCOMP
 	/*
 	 * We must handle setting up seccomp filters once we're under
@@ -392,14 +392,14 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 	tsk->task_frag.page = NULL;
 	tsk->wake_q.next = NULL;
 
-	account_kernel_stack(ti, 1);
+	account_kernel_stack(stack, 1);
 
 	kcov_task_init(tsk);
 
 	return tsk;
 
-free_ti:
-	free_thread_info(ti);
+free_stack:
+	free_thread_stack(stack);
 free_tsk:
 	free_task_struct(tsk);
 	return NULL;
diff --git a/kernel/futex.c b/kernel/futex.c
index ee25f5ba4aca..33664f70e2d2 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -469,7 +469,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
 {
 	unsigned long address = (unsigned long)uaddr;
 	struct mm_struct *mm = current->mm;
-	struct page *page;
+	struct page *page, *tail;
 	struct address_space *mapping;
 	int err, ro = 0;
 
@@ -530,7 +530,15 @@ again:
 	 * considered here and page lock forces unnecessarily serialization
 	 * From this point on, mapping will be re-verified if necessary and
 	 * page lock will be acquired only if it is unavoidable
-	 */
+	 *
+	 * Mapping checks require the head page for any compound page so the
+	 * head page and mapping is looked up now. For anonymous pages, it
+	 * does not matter if the page splits in the future as the key is
+	 * based on the address. For filesystem-backed pages, the tail is
+	 * required as the index of the page determines the key. For
+	 * base pages, there is no tail page and tail == page.
+	 */
+	tail = page;
 	page = compound_head(page);
 	mapping = READ_ONCE(page->mapping);
 
@@ -654,7 +662,7 @@ again:
 
 		key->both.offset |= FUT_OFF_INODE; /* inode-based key */
 		key->shared.inode = inode;
-		key->shared.pgoff = basepage_index(page);
+		key->shared.pgoff = basepage_index(tail);
 		rcu_read_unlock();
 	}
 
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 05254eeb4b4e..4b353e0be121 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -58,13 +58,36 @@ static void jump_label_update(struct static_key *key);
 
 void static_key_slow_inc(struct static_key *key)
 {
+	int v, v1;
+
 	STATIC_KEY_CHECK_USE();
-	if (atomic_inc_not_zero(&key->enabled))
-		return;
+
+	/*
+	 * Careful if we get concurrent static_key_slow_inc() calls;
+	 * later calls must wait for the first one to _finish_ the
+	 * jump_label_update() process.  At the same time, however,
+	 * the jump_label_update() call below wants to see
+	 * static_key_enabled(&key) for jumps to be updated properly.
+	 *
+	 * So give a special meaning to negative key->enabled: it sends
+	 * static_key_slow_inc() down the slow path, and it is non-zero
+	 * so it counts as "enabled" in jump_label_update().  Note that
+	 * atomic_inc_unless_negative() checks >= 0, so roll our own.
+	 */
+	for (v = atomic_read(&key->enabled); v > 0; v = v1) {
+		v1 = atomic_cmpxchg(&key->enabled, v, v + 1);
+		if (likely(v1 == v))
+			return;
+	}
 
 	jump_label_lock();
-	if (atomic_inc_return(&key->enabled) == 1)
+	if (atomic_read(&key->enabled) == 0) {
+		atomic_set(&key->enabled, -1);
 		jump_label_update(key);
+		atomic_set(&key->enabled, 1);
+	} else {
+		atomic_inc(&key->enabled);
+	}
 	jump_label_unlock();
 }
 EXPORT_SYMBOL_GPL(static_key_slow_inc);
@@ -72,6 +95,13 @@ EXPORT_SYMBOL_GPL(static_key_slow_inc);
 static void __static_key_slow_dec(struct static_key *key,
 		unsigned long rate_limit, struct delayed_work *work)
 {
+	/*
+	 * The negative count check is valid even when a negative
+	 * key->enabled is in use by static_key_slow_inc(); a
+	 * __static_key_slow_dec() before the first static_key_slow_inc()
+	 * returns is unbalanced, because all other static_key_slow_inc()
+	 * instances block while the update is in progress.
+	 */
 	if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex)) {
 		WARN(atomic_read(&key->enabled) < 0,
 		     "jump label: negative count!\n");
diff --git a/kernel/kcov.c b/kernel/kcov.c
index a02f2dddd1d7..8d44b3fea9d0 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -264,7 +264,12 @@ static const struct file_operations kcov_fops = {
 
 static int __init kcov_init(void)
 {
-	if (!debugfs_create_file("kcov", 0600, NULL, NULL, &kcov_fops)) {
+	/*
+	 * The kcov debugfs file won't ever get removed and thus,
+	 * there is no need to protect it against removal races. The
+	 * use of debugfs_create_file_unsafe() is actually safe here.
+	 */
+	if (!debugfs_create_file_unsafe("kcov", 0600, NULL, NULL, &kcov_fops)) {
 		pr_err("failed to create kcov in debugfs\n");
 		return -ENOMEM;
 	}
diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c
index 3ef3736002d8..9c951fade415 100644
--- a/kernel/locking/mutex-debug.c
+++ b/kernel/locking/mutex-debug.c
@@ -49,21 +49,21 @@ void debug_mutex_free_waiter(struct mutex_waiter *waiter)
 }
 
 void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
-			    struct thread_info *ti)
+			    struct task_struct *task)
 {
 	SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock));
 
 	/* Mark the current thread as blocked on the lock: */
-	ti->task->blocked_on = waiter;
+	task->blocked_on = waiter;
 }
 
 void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
-			 struct thread_info *ti)
+			 struct task_struct *task)
 {
 	DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
-	DEBUG_LOCKS_WARN_ON(waiter->task != ti->task);
-	DEBUG_LOCKS_WARN_ON(ti->task->blocked_on != waiter);
-	ti->task->blocked_on = NULL;
+	DEBUG_LOCKS_WARN_ON(waiter->task != task);
+	DEBUG_LOCKS_WARN_ON(task->blocked_on != waiter);
+	task->blocked_on = NULL;
 
 	list_del_init(&waiter->list);
 	waiter->task = NULL;
diff --git a/kernel/locking/mutex-debug.h b/kernel/locking/mutex-debug.h
index 0799fd3e4cfa..d06ae3bb46c5 100644
--- a/kernel/locking/mutex-debug.h
+++ b/kernel/locking/mutex-debug.h
@@ -20,9 +20,9 @@ extern void debug_mutex_wake_waiter(struct mutex *lock,
 extern void debug_mutex_free_waiter(struct mutex_waiter *waiter);
 extern void debug_mutex_add_waiter(struct mutex *lock,
 				   struct mutex_waiter *waiter,
-				   struct thread_info *ti);
+				   struct task_struct *task);
 extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
-				struct thread_info *ti);
+				struct task_struct *task);
 extern void debug_mutex_unlock(struct mutex *lock);
 extern void debug_mutex_init(struct mutex *lock, const char *name,
 			     struct lock_class_key *key);
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index e364b424b019..a70b90db3909 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -486,9 +486,6 @@ __ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx)
 	if (!hold_ctx)
 		return 0;
 
-	if (unlikely(ctx == hold_ctx))
-		return -EALREADY;
-
 	if (ctx->stamp - hold_ctx->stamp <= LONG_MAX &&
 	    (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) {
 #ifdef CONFIG_DEBUG_MUTEXES
@@ -514,6 +511,12 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 	unsigned long flags;
 	int ret;
 
+	if (use_ww_ctx) {
+		struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
+		if (unlikely(ww_ctx == READ_ONCE(ww->ctx)))
+			return -EALREADY;
+	}
+
 	preempt_disable();
 	mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);
 
@@ -534,7 +537,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 		goto skip_wait;
 
 	debug_mutex_lock_common(lock, &waiter);
-	debug_mutex_add_waiter(lock, &waiter, task_thread_info(task));
+	debug_mutex_add_waiter(lock, &waiter, task);
 
 	/* add waiting tasks to the end of the waitqueue (FIFO): */
 	list_add_tail(&waiter.list, &lock->wait_list);
@@ -581,7 +584,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 	}
 	__set_task_state(task, TASK_RUNNING);
 
-	mutex_remove_waiter(lock, &waiter, current_thread_info());
+	mutex_remove_waiter(lock, &waiter, task);
 	/* set it to 0 if there are no waiters left: */
 	if (likely(list_empty(&lock->wait_list)))
 		atomic_set(&lock->count, 0);
@@ -602,7 +605,7 @@ skip_wait:
 	return 0;
 
 err:
-	mutex_remove_waiter(lock, &waiter, task_thread_info(task));
+	mutex_remove_waiter(lock, &waiter, task);
 	spin_unlock_mutex(&lock->wait_lock, flags);
 	debug_mutex_free_waiter(&waiter);
 	mutex_release(&lock->dep_map, 1, ip);
diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h
index 5cda397607f2..a68bae5e852a 100644
--- a/kernel/locking/mutex.h
+++ b/kernel/locking/mutex.h
@@ -13,7 +13,7 @@
 		do { spin_lock(lock); (void)(flags); } while (0)
 #define spin_unlock_mutex(lock, flags) \
 		do { spin_unlock(lock); (void)(flags); } while (0)
-#define mutex_remove_waiter(lock, waiter, ti) \
+#define mutex_remove_waiter(lock, waiter, task) \
 		__list_del((waiter)->list.prev, (waiter)->list.next)
 
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index ce2f75e32ae1..5fc8c311b8fe 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -267,6 +267,66 @@ static __always_inline u32  __pv_wait_head_or_lock(struct qspinlock *lock,
 #define queued_spin_lock_slowpath	native_queued_spin_lock_slowpath
 #endif
 
+/*
+ * queued_spin_lock_slowpath() can (load-)ACQUIRE the lock before
+ * issuing an _unordered_ store to set _Q_LOCKED_VAL.
+ *
+ * This means that the store can be delayed, but no later than the
+ * store-release from the unlock. This means that simply observing
+ * _Q_LOCKED_VAL is not sufficient to determine if the lock is acquired.
+ *
+ * There are two paths that can issue the unordered store:
+ *
+ *  (1) clear_pending_set_locked():	*,1,0 -> *,0,1
+ *
+ *  (2) set_locked():			t,0,0 -> t,0,1 ; t != 0
+ *      atomic_cmpxchg_relaxed():	t,0,0 -> 0,0,1
+ *
+ * However, in both cases we have other !0 state we've set before to queue
+ * ourseves:
+ *
+ * For (1) we have the atomic_cmpxchg_acquire() that set _Q_PENDING_VAL, our
+ * load is constrained by that ACQUIRE to not pass before that, and thus must
+ * observe the store.
+ *
+ * For (2) we have a more intersting scenario. We enqueue ourselves using
+ * xchg_tail(), which ends up being a RELEASE. This in itself is not
+ * sufficient, however that is followed by an smp_cond_acquire() on the same
+ * word, giving a RELEASE->ACQUIRE ordering. This again constrains our load and
+ * guarantees we must observe that store.
+ *
+ * Therefore both cases have other !0 state that is observable before the
+ * unordered locked byte store comes through. This means we can use that to
+ * wait for the lock store, and then wait for an unlock.
+ */
+#ifndef queued_spin_unlock_wait
+void queued_spin_unlock_wait(struct qspinlock *lock)
+{
+	u32 val;
+
+	for (;;) {
+		val = atomic_read(&lock->val);
+
+		if (!val) /* not locked, we're done */
+			goto done;
+
+		if (val & _Q_LOCKED_MASK) /* locked, go wait for unlock */
+			break;
+
+		/* not locked, but pending, wait until we observe the lock */
+		cpu_relax();
+	}
+
+	/* any unlock is good */
+	while (atomic_read(&lock->val) & _Q_LOCKED_MASK)
+		cpu_relax();
+
+done:
+	smp_rmb(); /* CTRL + RMB -> ACQUIRE */
+}
+EXPORT_SYMBOL(queued_spin_unlock_wait);
+#endif
+
 #endif /* _GEN_PV_LOCK_SLOWPATH */
 
 /**
diff --git a/kernel/power/process.c b/kernel/power/process.c
index df058bed53ce..0c2ee9761d57 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -146,6 +146,18 @@ int freeze_processes(void)
 	if (!error && !oom_killer_disable())
 		error = -EBUSY;
 
+	/*
+	 * There is a hard to fix race between oom_reaper kernel thread
+	 * and oom_killer_disable. oom_reaper calls exit_oom_victim
+	 * before the victim reaches exit_mm so try to freeze all the tasks
+	 * again and catch such a left over task.
+	 */
+	if (!error) {
+		pr_info("Double checking all user space processes after OOM killer disable... ");
+		error = try_to_freeze_tasks(true);
+		pr_cont("\n");
+	}
+
 	if (error)
 		thaw_processes();
 	return error;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7f2cae4620c7..51d7105f529a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1536,7 +1536,9 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
 	for (;;) {
 		/* Any allowed, online CPU? */
 		for_each_cpu(dest_cpu, tsk_cpus_allowed(p)) {
-			if (!cpu_active(dest_cpu))
+			if (!(p->flags & PF_KTHREAD) && !cpu_active(dest_cpu))
+				continue;
+			if (!cpu_online(dest_cpu))
 				continue;
 			goto out;
 		}
@@ -2253,9 +2255,11 @@ int sysctl_numa_balancing(struct ctl_table *table, int write,
 #endif
 #endif
 
+#ifdef CONFIG_SCHEDSTATS
+
 DEFINE_STATIC_KEY_FALSE(sched_schedstats);
+static bool __initdata __sched_schedstats = false;
 
-#ifdef CONFIG_SCHEDSTATS
 static void set_schedstats(bool enabled)
 {
 	if (enabled)
@@ -2278,11 +2282,16 @@ static int __init setup_schedstats(char *str)
 	if (!str)
 		goto out;
 
+	/*
+	 * This code is called before jump labels have been set up, so we can't
+	 * change the static branch directly just yet.  Instead set a temporary
+	 * variable so init_schedstats() can do it later.
+	 */
 	if (!strcmp(str, "enable")) {
-		set_schedstats(true);
+		__sched_schedstats = true;
 		ret = 1;
 	} else if (!strcmp(str, "disable")) {
-		set_schedstats(false);
+		__sched_schedstats = false;
 		ret = 1;
 	}
 out:
@@ -2293,6 +2302,11 @@ out:
 }
 __setup("schedstats=", setup_schedstats);
 
+static void __init init_schedstats(void)
+{
+	set_schedstats(__sched_schedstats);
+}
+
 #ifdef CONFIG_PROC_SYSCTL
 int sysctl_schedstats(struct ctl_table *table, int write,
 			 void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -2313,8 +2327,10 @@ int sysctl_schedstats(struct ctl_table *table, int write,
 		set_schedstats(state);
 	return err;
 }
-#endif
-#endif
+#endif /* CONFIG_PROC_SYSCTL */
+#else  /* !CONFIG_SCHEDSTATS */
+static inline void init_schedstats(void) {}
+#endif /* CONFIG_SCHEDSTATS */
 
 /*
  * fork()/clone()-time setup:
@@ -2521,10 +2537,9 @@ void wake_up_new_task(struct task_struct *p)
 	 */
 	set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0));
 #endif
-	/* Post initialize new task's util average when its cfs_rq is set */
+	rq = __task_rq_lock(p, &rf);
 	post_init_entity_util_avg(&p->se);
 
-	rq = __task_rq_lock(p, &rf);
 	activate_task(rq, p, 0);
 	p->on_rq = TASK_ON_RQ_QUEUED;
 	trace_sched_wakeup_new(p);
@@ -3156,7 +3171,8 @@ static noinline void __schedule_bug(struct task_struct *prev)
 static inline void schedule_debug(struct task_struct *prev)
 {
 #ifdef CONFIG_SCHED_STACK_END_CHECK
-	BUG_ON(task_stack_end_corrupted(prev));
+	if (task_stack_end_corrupted(prev))
+		panic("corrupted stack end detected inside scheduler\n");
 #endif
 
 	if (unlikely(in_atomic_preempt_off())) {
@@ -5133,14 +5149,16 @@ void show_state_filter(unsigned long state_filter)
 		/*
 		 * reset the NMI-timeout, listing all files on a slow
 		 * console might take a lot of time:
+		 * Also, reset softlockup watchdogs on all CPUs, because
+		 * another CPU might be blocked waiting for us to process
+		 * an IPI.
 		 */
 		touch_nmi_watchdog();
+		touch_all_softlockup_watchdogs();
 		if (!state_filter || (p->state & state_filter))
 			sched_show_task(p);
 	}
 
-	touch_all_softlockup_watchdogs();
-
 #ifdef CONFIG_SCHED_DEBUG
 	if (!state_filter)
 		sysrq_sched_debug_show();
@@ -7487,6 +7505,8 @@ void __init sched_init(void)
 #endif
 	init_sched_fair_class();
 
+	init_schedstats();
+
 	scheduler_running = 1;
 }
 
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index cf905f655ba1..0368c393a336 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -427,19 +427,12 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
 		SPLIT_NS(p->se.vruntime),
 		(long long)(p->nvcsw + p->nivcsw),
 		p->prio);
-#ifdef CONFIG_SCHEDSTATS
-	if (schedstat_enabled()) {
-		SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
-			SPLIT_NS(p->se.statistics.wait_sum),
-			SPLIT_NS(p->se.sum_exec_runtime),
-			SPLIT_NS(p->se.statistics.sum_sleep_runtime));
-	}
-#else
+
 	SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
-		0LL, 0L,
+		SPLIT_NS(schedstat_val(p, se.statistics.wait_sum)),
 		SPLIT_NS(p->se.sum_exec_runtime),
-		0LL, 0L);
-#endif
+		SPLIT_NS(schedstat_val(p, se.statistics.sum_sleep_runtime)));
+
 #ifdef CONFIG_NUMA_BALANCING
 	SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
 #endif
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 218f8e83db73..bdcbeea90c95 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2904,6 +2904,23 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
 	}
 }
 
+/*
+ * Unsigned subtract and clamp on underflow.
+ *
+ * Explicitly do a load-store to ensure the intermediate value never hits
+ * memory. This allows lockless observations without ever seeing the negative
+ * values.
+ */
+#define sub_positive(_ptr, _val) do {				\
+	typeof(_ptr) ptr = (_ptr);				\
+	typeof(*ptr) val = (_val);				\
+	typeof(*ptr) res, var = READ_ONCE(*ptr);		\
+	res = var - val;					\
+	if (res > var)						\
+		res = 0;					\
+	WRITE_ONCE(*ptr, res);					\
+} while (0)
+
 /* Group cfs_rq's load_avg is used for task_h_load and update_cfs_share */
 static inline int
 update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
@@ -2913,15 +2930,15 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
 
 	if (atomic_long_read(&cfs_rq->removed_load_avg)) {
 		s64 r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0);
-		sa->load_avg = max_t(long, sa->load_avg - r, 0);
-		sa->load_sum = max_t(s64, sa->load_sum - r * LOAD_AVG_MAX, 0);
+		sub_positive(&sa->load_avg, r);
+		sub_positive(&sa->load_sum, r * LOAD_AVG_MAX);
 		removed_load = 1;
 	}
 
 	if (atomic_long_read(&cfs_rq->removed_util_avg)) {
 		long r = atomic_long_xchg(&cfs_rq->removed_util_avg, 0);
-		sa->util_avg = max_t(long, sa->util_avg - r, 0);
-		sa->util_sum = max_t(s32, sa->util_sum - r * LOAD_AVG_MAX, 0);
+		sub_positive(&sa->util_avg, r);
+		sub_positive(&sa->util_sum, r * LOAD_AVG_MAX);
 		removed_util = 1;
 	}
 
@@ -2994,10 +3011,10 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
 			  &se->avg, se->on_rq * scale_load_down(se->load.weight),
 			  cfs_rq->curr == se, NULL);
 
-	cfs_rq->avg.load_avg = max_t(long, cfs_rq->avg.load_avg - se->avg.load_avg, 0);
-	cfs_rq->avg.load_sum = max_t(s64,  cfs_rq->avg.load_sum - se->avg.load_sum, 0);
-	cfs_rq->avg.util_avg = max_t(long, cfs_rq->avg.util_avg - se->avg.util_avg, 0);
-	cfs_rq->avg.util_sum = max_t(s32,  cfs_rq->avg.util_sum - se->avg.util_sum, 0);
+	sub_positive(&cfs_rq->avg.load_avg, se->avg.load_avg);
+	sub_positive(&cfs_rq->avg.load_sum, se->avg.load_sum);
+	sub_positive(&cfs_rq->avg.util_avg, se->avg.util_avg);
+	sub_positive(&cfs_rq->avg.util_sum, se->avg.util_sum);
 
 	cfs_rq_util_change(cfs_rq);
 }
@@ -3246,7 +3263,7 @@ static inline void check_schedstat_required(void)
 			trace_sched_stat_iowait_enabled()  ||
 			trace_sched_stat_blocked_enabled() ||
 			trace_sched_stat_runtime_enabled())  {
-		pr_warn_once("Scheduler tracepoints stat_sleep, stat_iowait, "
+		printk_deferred_once("Scheduler tracepoints stat_sleep, stat_iowait, "
 			     "stat_blocked and stat_runtime require the "
 			     "kernel parameter schedstats=enabled or "
 			     "kernel.sched_schedstats=1\n");
@@ -4185,6 +4202,26 @@ static void check_enqueue_throttle(struct cfs_rq *cfs_rq)
 	if (!cfs_bandwidth_used())
 		return;
 
+	/* Synchronize hierarchical throttle counter: */
+	if (unlikely(!cfs_rq->throttle_uptodate)) {
+		struct rq *rq = rq_of(cfs_rq);
+		struct cfs_rq *pcfs_rq;
+		struct task_group *tg;
+
+		cfs_rq->throttle_uptodate = 1;
+
+		/* Get closest up-to-date node, because leaves go first: */
+		for (tg = cfs_rq->tg->parent; tg; tg = tg->parent) {
+			pcfs_rq = tg->cfs_rq[cpu_of(rq)];
+			if (pcfs_rq->throttle_uptodate)
+				break;
+		}
+		if (tg) {
+			cfs_rq->throttle_count = pcfs_rq->throttle_count;
+			cfs_rq->throttled_clock_task = rq_clock_task(rq);
+		}
+	}
+
 	/* an active group must be handled by the update_curr()->put() path */
 	if (!cfs_rq->runtime_enabled || cfs_rq->curr)
 		return;
@@ -4500,15 +4537,14 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 
 		/* Don't dequeue parent if it has other entities besides us */
 		if (cfs_rq->load.weight) {
+			/* Avoid re-evaluating load for this entity: */
+			se = parent_entity(se);
 			/*
 			 * Bias pick_next to pick a task from this cfs_rq, as
 			 * p is sleeping when it is within its sched_slice.
 			 */
-			if (task_sleep && parent_entity(se))
-				set_next_buddy(parent_entity(se));
-
-			/* avoid re-evaluating load for this entity */
-			se = parent_entity(se);
+			if (task_sleep && se && !throttled_hierarchy(cfs_rq))
+				set_next_buddy(se);
 			break;
 		}
 		flags |= DEQUEUE_SLEEP;
@@ -8496,8 +8532,9 @@ void free_fair_sched_group(struct task_group *tg)
 
 int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
 {
-	struct cfs_rq *cfs_rq;
 	struct sched_entity *se;
+	struct cfs_rq *cfs_rq;
+	struct rq *rq;
 	int i;
 
 	tg->cfs_rq = kzalloc(sizeof(cfs_rq) * nr_cpu_ids, GFP_KERNEL);
@@ -8512,6 +8549,8 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
 	init_cfs_bandwidth(tg_cfs_bandwidth(tg));
 
 	for_each_possible_cpu(i) {
+		rq = cpu_rq(i);
+
 		cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
 				      GFP_KERNEL, cpu_to_node(i));
 		if (!cfs_rq)
@@ -8525,7 +8564,10 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
 		init_cfs_rq(cfs_rq);
 		init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]);
 		init_entity_runnable_average(se);
+
+		raw_spin_lock_irq(&rq->lock);
 		post_init_entity_util_avg(se);
+		raw_spin_unlock_irq(&rq->lock);
 	}
 
 	return 1;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 72f1f3087b04..7cbeb92a1cb9 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -437,7 +437,7 @@ struct cfs_rq {
 
 	u64 throttled_clock, throttled_clock_task;
 	u64 throttled_clock_task_time;
-	int throttled, throttle_count;
+	int throttled, throttle_count, throttle_uptodate;
 	struct list_head throttled_list;
 #endif /* CONFIG_CFS_BANDWIDTH */
 #endif /* CONFIG_FAIR_GROUP_SCHED */
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index 70b3b6a20fb0..78955cbea31c 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -33,6 +33,8 @@ rq_sched_info_dequeued(struct rq *rq, unsigned long long delta)
 # define schedstat_inc(rq, field)	do { if (schedstat_enabled()) { (rq)->field++; } } while (0)
 # define schedstat_add(rq, field, amt)	do { if (schedstat_enabled()) { (rq)->field += (amt); } } while (0)
 # define schedstat_set(var, val)	do { if (schedstat_enabled()) { var = (val); } } while (0)
+# define schedstat_val(rq, field)	((schedstat_enabled()) ? (rq)->field : 0)
+
 #else /* !CONFIG_SCHEDSTATS */
 static inline void
 rq_sched_info_arrive(struct rq *rq, unsigned long long delta)
@@ -47,6 +49,7 @@ rq_sched_info_depart(struct rq *rq, unsigned long long delta)
 # define schedstat_inc(rq, field)	do { } while (0)
 # define schedstat_add(rq, field, amt)	do { } while (0)
 # define schedstat_set(var, val)	do { } while (0)
+# define schedstat_val(rq, field)	0
 #endif
 
 #ifdef CONFIG_SCHED_INFO
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 037ea6ea3cb2..094c716154ed 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -188,24 +188,33 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
 	return &bpf_trace_printk_proto;
 }
 
-static u64 bpf_perf_event_read(u64 r1, u64 index, u64 r3, u64 r4, u64 r5)
+static u64 bpf_perf_event_read(u64 r1, u64 flags, u64 r3, u64 r4, u64 r5)
 {
 	struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
 	struct bpf_array *array = container_of(map, struct bpf_array, map);
+	unsigned int cpu = smp_processor_id();
+	u64 index = flags & BPF_F_INDEX_MASK;
 	struct bpf_event_entry *ee;
 	struct perf_event *event;
 
+	if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
+		return -EINVAL;
+	if (index == BPF_F_CURRENT_CPU)
+		index = cpu;
 	if (unlikely(index >= array->map.max_entries))
 		return -E2BIG;
 
 	ee = READ_ONCE(array->ptrs[index]);
-	if (unlikely(!ee))
+	if (!ee)
 		return -ENOENT;
 
 	event = ee->event;
+	if (unlikely(event->attr.type != PERF_TYPE_HARDWARE &&
+		     event->attr.type != PERF_TYPE_RAW))
+		return -EINVAL;
+
 	/* make sure event is local and doesn't have pmu::count */
-	if (event->oncpu != smp_processor_id() ||
-	    event->pmu->count)
+	if (unlikely(event->oncpu != cpu || event->pmu->count))
 		return -EINVAL;
 
 	/*
@@ -229,6 +238,7 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size)
 	struct pt_regs *regs = (struct pt_regs *) (long) r1;
 	struct bpf_map *map = (struct bpf_map *) (long) r2;
 	struct bpf_array *array = container_of(map, struct bpf_array, map);
+	unsigned int cpu = smp_processor_id();
 	u64 index = flags & BPF_F_INDEX_MASK;
 	void *data = (void *) (long) r4;
 	struct perf_sample_data sample_data;
@@ -242,12 +252,12 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size)
 	if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
 		return -EINVAL;
 	if (index == BPF_F_CURRENT_CPU)
-		index = raw_smp_processor_id();
+		index = cpu;
 	if (unlikely(index >= array->map.max_entries))
 		return -E2BIG;
 
 	ee = READ_ONCE(array->ptrs[index]);
-	if (unlikely(!ee))
+	if (!ee)
 		return -ENOENT;
 
 	event = ee->event;
@@ -255,7 +265,7 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size)
 		     event->attr.config != PERF_COUNT_SW_BPF_OUTPUT))
 		return -EINVAL;
 
-	if (unlikely(event->oncpu != smp_processor_id()))
+	if (unlikely(event->oncpu != cpu))
 		return -EOPNOTSUPP;
 
 	perf_sample_data_init(&sample_data, 0, 0);
@@ -302,6 +312,17 @@ const struct bpf_func_proto *bpf_get_event_output_proto(void)
 	return &bpf_event_output_proto;
 }
 
+static u64 bpf_get_current_task(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+	return (long) current;
+}
+
+static const struct bpf_func_proto bpf_get_current_task_proto = {
+	.func		= bpf_get_current_task,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+};
+
 static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
 {
 	switch (func_id) {
@@ -319,6 +340,8 @@ static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
 		return &bpf_tail_call_proto;
 	case BPF_FUNC_get_current_pid_tgid:
 		return &bpf_get_current_pid_tgid_proto;
+	case BPF_FUNC_get_current_task:
+		return &bpf_get_current_task_proto;
 	case BPF_FUNC_get_current_uid_gid:
 		return &bpf_get_current_uid_gid_proto;
 	case BPF_FUNC_get_current_comm:
@@ -347,20 +370,15 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
 }
 
 /* bpf+kprobe programs can access fields of 'struct pt_regs' */
-static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type)
+static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
+					enum bpf_reg_type *reg_type)
 {
-	/* check bounds */
 	if (off < 0 || off >= sizeof(struct pt_regs))
 		return false;
-
-	/* only read is allowed */
 	if (type != BPF_READ)
 		return false;
-
-	/* disallow misaligned access */
 	if (off % size != 0)
 		return false;
-
 	return true;
 }
 
@@ -425,7 +443,8 @@ static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id)
 	}
 }
 
-static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type)
+static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type,
+				    enum bpf_reg_type *reg_type)
 {
 	if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE)
 		return false;
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index f96f0383f6c6..ad1d6164e946 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -36,6 +36,10 @@ struct trace_bprintk_fmt {
 static inline struct trace_bprintk_fmt *lookup_format(const char *fmt)
 {
 	struct trace_bprintk_fmt *pos;
+
+	if (!fmt)
+		return ERR_PTR(-EINVAL);
+
 	list_for_each_entry(pos, &trace_bprintk_fmt_list, list) {
 		if (!strcmp(pos->fmt, fmt))
 			return pos;
@@ -57,7 +61,8 @@ void hold_module_trace_bprintk_format(const char **start, const char **end)
 	for (iter = start; iter < end; iter++) {
 		struct trace_bprintk_fmt *tb_fmt = lookup_format(*iter);
 		if (tb_fmt) {
-			*iter = tb_fmt->fmt;
+			if (!IS_ERR(tb_fmt))
+				*iter = tb_fmt->fmt;
 			continue;
 		}
 
diff --git a/lib/rbtree.c b/lib/rbtree.c
index 1356454e36de..eb8a19fee110 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -539,17 +539,39 @@ void rb_replace_node(struct rb_node *victim, struct rb_node *new,
 {
 	struct rb_node *parent = rb_parent(victim);
 
+	/* Copy the pointers/colour from the victim to the replacement */
+	*new = *victim;
+
 	/* Set the surrounding nodes to point to the replacement */
-	__rb_change_child(victim, new, parent, root);
 	if (victim->rb_left)
 		rb_set_parent(victim->rb_left, new);
 	if (victim->rb_right)
 		rb_set_parent(victim->rb_right, new);
+	__rb_change_child(victim, new, parent, root);
+}
+EXPORT_SYMBOL(rb_replace_node);
+
+void rb_replace_node_rcu(struct rb_node *victim, struct rb_node *new,
+			 struct rb_root *root)
+{
+	struct rb_node *parent = rb_parent(victim);
 
 	/* Copy the pointers/colour from the victim to the replacement */
 	*new = *victim;
+
+	/* Set the surrounding nodes to point to the replacement */
+	if (victim->rb_left)
+		rb_set_parent(victim->rb_left, new);
+	if (victim->rb_right)
+		rb_set_parent(victim->rb_right, new);
+
+	/* Set the parent's pointer to the new node last after an RCU barrier
+	 * so that the pointers onwards are seen to be set correctly when doing
+	 * an RCU walk over the tree.
+	 */
+	__rb_change_child_rcu(victim, new, parent, root);
 }
-EXPORT_SYMBOL(rb_replace_node);
+EXPORT_SYMBOL(rb_replace_node_rcu);
 
 static struct rb_node *rb_left_deepest_node(const struct rb_node *node)
 {
diff --git a/mm/compaction.c b/mm/compaction.c
index 1427366ad673..79bfe0e06907 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -441,25 +441,23 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
 
 		/* Found a free page, break it into order-0 pages */
 		isolated = split_free_page(page);
+		if (!isolated)
+			break;
+
 		total_isolated += isolated;
+		cc->nr_freepages += isolated;
 		for (i = 0; i < isolated; i++) {
 			list_add(&page->lru, freelist);
 			page++;
 		}
-
-		/* If a page was split, advance to the end of it */
-		if (isolated) {
-			cc->nr_freepages += isolated;
-			if (!strict &&
-				cc->nr_migratepages <= cc->nr_freepages) {
-				blockpfn += isolated;
-				break;
-			}
-
-			blockpfn += isolated - 1;
-			cursor += isolated - 1;
-			continue;
+		if (!strict && cc->nr_migratepages <= cc->nr_freepages) {
+			blockpfn += isolated;
+			break;
 		}
+		/* Advance to the end of split page */
+		blockpfn += isolated - 1;
+		cursor += isolated - 1;
+		continue;
 
 isolate_fail:
 		if (strict)
@@ -469,6 +467,9 @@ isolate_fail:
 
 	}
 
+	if (locked)
+		spin_unlock_irqrestore(&cc->zone->lock, flags);
+
 	/*
 	 * There is a tiny chance that we have read bogus compound_order(),
 	 * so be careful to not go outside of the pageblock.
@@ -490,9 +491,6 @@ isolate_fail:
 	if (strict && blockpfn < end_pfn)
 		total_isolated = 0;
 
-	if (locked)
-		spin_unlock_irqrestore(&cc->zone->lock, flags);
-
 	/* Update the pageblock-skip if the whole pageblock was scanned */
 	if (blockpfn == end_pfn)
 		update_pageblock_skip(cc, valid_page, total_isolated, false);
@@ -1011,6 +1009,7 @@ static void isolate_freepages(struct compact_control *cc)
 				block_end_pfn = block_start_pfn,
 				block_start_pfn -= pageblock_nr_pages,
 				isolate_start_pfn = block_start_pfn) {
+		unsigned long isolated;
 
 		/*
 		 * This can iterate a massively long zone without finding any
@@ -1035,8 +1034,12 @@ static void isolate_freepages(struct compact_control *cc)
 			continue;
 
 		/* Found a block suitable for isolating free pages from. */
-		isolate_freepages_block(cc, &isolate_start_pfn,
-					block_end_pfn, freelist, false);
+		isolated = isolate_freepages_block(cc, &isolate_start_pfn,
+						block_end_pfn, freelist, false);
+		/* If isolation failed early, do not continue needlessly */
+		if (!isolated && isolate_start_pfn < block_end_pfn &&
+		    cc->nr_migratepages > cc->nr_freepages)
+			break;
 
 		/*
 		 * If we isolated enough freepages, or aborted due to async
diff --git a/mm/filemap.c b/mm/filemap.c
index 00ae878b2a38..20f3b1f33f0e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2186,7 +2186,7 @@ repeat:
 		if (file->f_ra.mmap_miss > 0)
 			file->f_ra.mmap_miss--;
 		addr = address + (page->index - vmf->pgoff) * PAGE_SIZE;
-		do_set_pte(vma, addr, page, pte, false, false, true);
+		do_set_pte(vma, addr, page, pte, false, false);
 		unlock_page(page);
 		goto next;
 unlock:
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 388c2bb9b55c..c1f3c0be150a 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1030,6 +1030,7 @@ static void destroy_compound_gigantic_page(struct page *page,
 	int nr_pages = 1 << order;
 	struct page *p = page + 1;
 
+	atomic_set(compound_mapcount_ptr(page), 0);
 	for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
 		clear_compound_head(p);
 		set_page_refcounted(p);
@@ -4228,7 +4229,6 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
 		if (saddr) {
 			spte = huge_pte_offset(svma->vm_mm, saddr);
 			if (spte) {
-				mm_inc_nr_pmds(mm);
 				get_page(virt_to_page(spte));
 				break;
 			}
@@ -4243,9 +4243,9 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
 	if (pud_none(*pud)) {
 		pud_populate(mm, pud,
 				(pmd_t *)((unsigned long)spte & PAGE_MASK));
+		mm_inc_nr_pmds(mm);
 	} else {
 		put_page(virt_to_page(spte));
-		mm_inc_nr_pmds(mm);
 	}
 	spin_unlock(ptl);
 out:
diff --git a/mm/internal.h b/mm/internal.h
index a37e5b6f9d25..2524ec880e24 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -24,7 +24,8 @@
  */
 #define GFP_RECLAIM_MASK (__GFP_RECLAIM|__GFP_HIGH|__GFP_IO|__GFP_FS|\
 			__GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\
-			__GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC)
+			__GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC|\
+			__GFP_ATOMIC)
 
 /* The GFP flags allowed during early boot */
 #define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_RECLAIM|__GFP_IO|__GFP_FS))
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index 28439acda6ec..6845f9294696 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -508,7 +508,7 @@ void kasan_slab_alloc(struct kmem_cache *cache, void *object, gfp_t flags)
 	kasan_kmalloc(cache, object, cache->object_size, flags);
 }
 
-void kasan_poison_slab_free(struct kmem_cache *cache, void *object)
+static void kasan_poison_slab_free(struct kmem_cache *cache, void *object)
 {
 	unsigned long size = cache->object_size;
 	unsigned long rounded_up_size = round_up(size, KASAN_SHADOW_SCALE_SIZE);
@@ -626,7 +626,7 @@ void kasan_krealloc(const void *object, size_t size, gfp_t flags)
 		kasan_kmalloc(page->slab_cache, object, size, flags);
 }
 
-void kasan_kfree(void *ptr)
+void kasan_poison_kfree(void *ptr)
 {
 	struct page *page;
 
@@ -636,7 +636,7 @@ void kasan_kfree(void *ptr)
 		kasan_poison_shadow(ptr, PAGE_SIZE << compound_order(page),
 				KASAN_FREE_PAGE);
 	else
-		kasan_slab_free(page->slab_cache, ptr);
+		kasan_poison_slab_free(page->slab_cache, ptr);
 }
 
 void kasan_kfree_large(const void *ptr)
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index e6429926e957..04320d3adbef 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -307,8 +307,10 @@ static void hex_dump_object(struct seq_file *seq,
 	len = min_t(size_t, object->size, HEX_MAX_LINES * HEX_ROW_SIZE);
 
 	seq_printf(seq, "  hex dump (first %zu bytes):\n", len);
+	kasan_disable_current();
 	seq_hex_dump(seq, "    ", DUMP_PREFIX_NONE, HEX_ROW_SIZE,
 		     HEX_GROUP_SIZE, ptr, len, HEX_ASCII);
+	kasan_enable_current();
 }
 
 /*
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 75e74408cc8f..ac8664db3823 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4203,7 +4203,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 	return &memcg->css;
 fail:
 	mem_cgroup_free(memcg);
-	return NULL;
+	return ERR_PTR(-ENOMEM);
 }
 
 static int
@@ -5544,6 +5544,7 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage)
 	struct mem_cgroup *memcg;
 	unsigned int nr_pages;
 	bool compound;
+	unsigned long flags;
 
 	VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage);
 	VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
@@ -5574,10 +5575,10 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage)
 
 	commit_charge(newpage, memcg, false);
 
-	local_irq_disable();
+	local_irq_save(flags);
 	mem_cgroup_charge_statistics(memcg, newpage, compound, nr_pages);
 	memcg_check_events(memcg, newpage);
-	local_irq_enable();
+	local_irq_restore(flags);
 }
 
 DEFINE_STATIC_KEY_FALSE(memcg_sockets_enabled_key);
diff --git a/mm/memory.c b/mm/memory.c
index 15322b73636b..cd1f29e4897e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2877,7 +2877,7 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address,
  * vm_ops->map_pages.
  */
 void do_set_pte(struct vm_area_struct *vma, unsigned long address,
-		struct page *page, pte_t *pte, bool write, bool anon, bool old)
+		struct page *page, pte_t *pte, bool write, bool anon)
 {
 	pte_t entry;
 
@@ -2885,8 +2885,6 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address,
 	entry = mk_pte(page, vma->vm_page_prot);
 	if (write)
 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
-	if (old)
-		entry = pte_mkold(entry);
 	if (anon) {
 		inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
 		page_add_new_anon_rmap(page, vma, address, false);
@@ -2900,16 +2898,8 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address,
 	update_mmu_cache(vma, address, pte);
 }
 
-/*
- * If architecture emulates "accessed" or "young" bit without HW support,
- * there is no much gain with fault_around.
- */
 static unsigned long fault_around_bytes __read_mostly =
-#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
-	PAGE_SIZE;
-#else
 	rounddown_pow_of_two(65536);
-#endif
 
 #ifdef CONFIG_DEBUG_FS
 static int fault_around_bytes_get(void *data, u64 *val)
@@ -3032,20 +3022,9 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	 */
 	if (vma->vm_ops->map_pages && fault_around_bytes >> PAGE_SHIFT > 1) {
 		pte = pte_offset_map_lock(mm, pmd, address, &ptl);
-		if (!pte_same(*pte, orig_pte))
-			goto unlock_out;
 		do_fault_around(vma, address, pte, pgoff, flags);
-		/* Check if the fault is handled by faultaround */
-		if (!pte_same(*pte, orig_pte)) {
-			/*
-			 * Faultaround produce old pte, but the pte we've
-			 * handler fault for should be young.
-			 */
-			pte_t entry = pte_mkyoung(*pte);
-			if (ptep_set_access_flags(vma, address, pte, entry, 0))
-				update_mmu_cache(vma, address, pte);
+		if (!pte_same(*pte, orig_pte))
 			goto unlock_out;
-		}
 		pte_unmap_unlock(pte, ptl);
 	}
 
@@ -3060,7 +3039,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		put_page(fault_page);
 		return ret;
 	}
-	do_set_pte(vma, address, fault_page, pte, false, false, false);
+	do_set_pte(vma, address, fault_page, pte, false, false);
 	unlock_page(fault_page);
 unlock_out:
 	pte_unmap_unlock(pte, ptl);
@@ -3111,7 +3090,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		}
 		goto uncharge_out;
 	}
-	do_set_pte(vma, address, new_page, pte, true, true, false);
+	do_set_pte(vma, address, new_page, pte, true, true);
 	mem_cgroup_commit_charge(new_page, memcg, false, false);
 	lru_cache_add_active_or_unevictable(new_page, vma);
 	pte_unmap_unlock(pte, ptl);
@@ -3164,7 +3143,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		put_page(fault_page);
 		return ret;
 	}
-	do_set_pte(vma, address, fault_page, pte, true, false, false);
+	do_set_pte(vma, address, fault_page, pte, true, false);
 	pte_unmap_unlock(pte, ptl);
 
 	if (set_page_dirty(fault_page))
diff --git a/mm/mempool.c b/mm/mempool.c
index 9e075f829d0d..8f65464da5de 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -104,20 +104,16 @@ static inline void poison_element(mempool_t *pool, void *element)
 
 static void kasan_poison_element(mempool_t *pool, void *element)
 {
-	if (pool->alloc == mempool_alloc_slab)
-		kasan_poison_slab_free(pool->pool_data, element);
-	if (pool->alloc == mempool_kmalloc)
-		kasan_kfree(element);
+	if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc)
+		kasan_poison_kfree(element);
 	if (pool->alloc == mempool_alloc_pages)
 		kasan_free_pages(element, (unsigned long)pool->pool_data);
 }
 
 static void kasan_unpoison_element(mempool_t *pool, void *element, gfp_t flags)
 {
-	if (pool->alloc == mempool_alloc_slab)
-		kasan_slab_alloc(pool->pool_data, element, flags);
-	if (pool->alloc == mempool_kmalloc)
-		kasan_krealloc(element, (size_t)pool->pool_data, flags);
+	if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc)
+		kasan_unpoison_slab(element);
 	if (pool->alloc == mempool_alloc_pages)
 		kasan_alloc_pages(element, (unsigned long)pool->pool_data);
 }
diff --git a/mm/migrate.c b/mm/migrate.c
index 9baf41c877ff..bd3fdc202e8b 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -431,6 +431,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
 
 	return MIGRATEPAGE_SUCCESS;
 }
+EXPORT_SYMBOL(migrate_page_move_mapping);
 
 /*
  * The expected number of remaining references is the same as that
@@ -586,6 +587,7 @@ void migrate_page_copy(struct page *newpage, struct page *page)
 
 	mem_cgroup_migrate(page, newpage);
 }
+EXPORT_SYMBOL(migrate_page_copy);
 
 /************************************************************
  *                    Migration functions
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index acbc432d1a52..ddf74487f848 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -474,13 +474,8 @@ static bool __oom_reap_task(struct task_struct *tsk)
 	p = find_lock_task_mm(tsk);
 	if (!p)
 		goto unlock_oom;
-
 	mm = p->mm;
-	if (!atomic_inc_not_zero(&mm->mm_users)) {
-		task_unlock(p);
-		goto unlock_oom;
-	}
-
+	atomic_inc(&mm->mm_users);
 	task_unlock(p);
 
 	if (!down_read_trylock(&mm->mmap_sem)) {
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index b9956fdee8f5..e2481949494c 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -373,8 +373,9 @@ static void domain_dirty_limits(struct dirty_throttle_control *dtc)
 	struct dirty_throttle_control *gdtc = mdtc_gdtc(dtc);
 	unsigned long bytes = vm_dirty_bytes;
 	unsigned long bg_bytes = dirty_background_bytes;
-	unsigned long ratio = vm_dirty_ratio;
-	unsigned long bg_ratio = dirty_background_ratio;
+	/* convert ratios to per-PAGE_SIZE for higher precision */
+	unsigned long ratio = (vm_dirty_ratio * PAGE_SIZE) / 100;
+	unsigned long bg_ratio = (dirty_background_ratio * PAGE_SIZE) / 100;
 	unsigned long thresh;
 	unsigned long bg_thresh;
 	struct task_struct *tsk;
@@ -386,26 +387,28 @@ static void domain_dirty_limits(struct dirty_throttle_control *dtc)
 		/*
 		 * The byte settings can't be applied directly to memcg
 		 * domains.  Convert them to ratios by scaling against
-		 * globally available memory.
+		 * globally available memory.  As the ratios are in
+		 * per-PAGE_SIZE, they can be obtained by dividing bytes by
+		 * number of pages.
 		 */
 		if (bytes)
-			ratio = min(DIV_ROUND_UP(bytes, PAGE_SIZE) * 100 /
-				    global_avail, 100UL);
+			ratio = min(DIV_ROUND_UP(bytes, global_avail),
+				    PAGE_SIZE);
 		if (bg_bytes)
-			bg_ratio = min(DIV_ROUND_UP(bg_bytes, PAGE_SIZE) * 100 /
-				       global_avail, 100UL);
+			bg_ratio = min(DIV_ROUND_UP(bg_bytes, global_avail),
+				       PAGE_SIZE);
 		bytes = bg_bytes = 0;
 	}
 
 	if (bytes)
 		thresh = DIV_ROUND_UP(bytes, PAGE_SIZE);
 	else
-		thresh = (ratio * available_memory) / 100;
+		thresh = (ratio * available_memory) / PAGE_SIZE;
 
 	if (bg_bytes)
 		bg_thresh = DIV_ROUND_UP(bg_bytes, PAGE_SIZE);
 	else
-		bg_thresh = (bg_ratio * available_memory) / 100;
+		bg_thresh = (bg_ratio * available_memory) / PAGE_SIZE;
 
 	if (bg_thresh >= thresh)
 		bg_thresh = thresh / 2;
diff --git a/mm/page_owner.c b/mm/page_owner.c
index c6cda3e36212..fedeba88c9cb 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -207,13 +207,15 @@ void __dump_page_owner(struct page *page)
 		.nr_entries = page_ext->nr_entries,
 		.entries = &page_ext->trace_entries[0],
 	};
-	gfp_t gfp_mask = page_ext->gfp_mask;
-	int mt = gfpflags_to_migratetype(gfp_mask);
+	gfp_t gfp_mask;
+	int mt;
 
 	if (unlikely(!page_ext)) {
 		pr_alert("There is not page extension available.\n");
 		return;
 	}
+	gfp_mask = page_ext->gfp_mask;
+	mt = gfpflags_to_migratetype(gfp_mask);
 
 	if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) {
 		pr_alert("page_owner info is not active (free page?)\n");
diff --git a/mm/percpu.c b/mm/percpu.c
index 0c59684f1ff2..9903830aaebb 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -112,7 +112,7 @@ struct pcpu_chunk {
 	int			map_used;	/* # of map entries used before the sentry */
 	int			map_alloc;	/* # of map entries allocated */
 	int			*map;		/* allocation map */
-	struct work_struct	map_extend_work;/* async ->map[] extension */
+	struct list_head	map_extend_list;/* on pcpu_map_extend_chunks */
 
 	void			*data;		/* chunk data */
 	int			first_free;	/* no free below this */
@@ -162,10 +162,13 @@ static struct pcpu_chunk *pcpu_reserved_chunk;
 static int pcpu_reserved_chunk_limit;
 
 static DEFINE_SPINLOCK(pcpu_lock);	/* all internal data structures */
-static DEFINE_MUTEX(pcpu_alloc_mutex);	/* chunk create/destroy, [de]pop */
+static DEFINE_MUTEX(pcpu_alloc_mutex);	/* chunk create/destroy, [de]pop, map ext */
 
 static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */
 
+/* chunks which need their map areas extended, protected by pcpu_lock */
+static LIST_HEAD(pcpu_map_extend_chunks);
+
 /*
  * The number of empty populated pages, protected by pcpu_lock.  The
  * reserved chunk doesn't contribute to the count.
@@ -395,13 +398,19 @@ static int pcpu_need_to_extend(struct pcpu_chunk *chunk, bool is_atomic)
 {
 	int margin, new_alloc;
 
+	lockdep_assert_held(&pcpu_lock);
+
 	if (is_atomic) {
 		margin = 3;
 
 		if (chunk->map_alloc <
-		    chunk->map_used + PCPU_ATOMIC_MAP_MARGIN_LOW &&
-		    pcpu_async_enabled)
-			schedule_work(&chunk->map_extend_work);
+		    chunk->map_used + PCPU_ATOMIC_MAP_MARGIN_LOW) {
+			if (list_empty(&chunk->map_extend_list)) {
+				list_add_tail(&chunk->map_extend_list,
+					      &pcpu_map_extend_chunks);
+				pcpu_schedule_balance_work();
+			}
+		}
 	} else {
 		margin = PCPU_ATOMIC_MAP_MARGIN_HIGH;
 	}
@@ -435,6 +444,8 @@ static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc)
 	size_t old_size = 0, new_size = new_alloc * sizeof(new[0]);
 	unsigned long flags;
 
+	lockdep_assert_held(&pcpu_alloc_mutex);
+
 	new = pcpu_mem_zalloc(new_size);
 	if (!new)
 		return -ENOMEM;
@@ -467,20 +478,6 @@ out_unlock:
 	return 0;
 }
 
-static void pcpu_map_extend_workfn(struct work_struct *work)
-{
-	struct pcpu_chunk *chunk = container_of(work, struct pcpu_chunk,
-						map_extend_work);
-	int new_alloc;
-
-	spin_lock_irq(&pcpu_lock);
-	new_alloc = pcpu_need_to_extend(chunk, false);
-	spin_unlock_irq(&pcpu_lock);
-
-	if (new_alloc)
-		pcpu_extend_area_map(chunk, new_alloc);
-}
-
 /**
  * pcpu_fit_in_area - try to fit the requested allocation in a candidate area
  * @chunk: chunk the candidate area belongs to
@@ -740,7 +737,7 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void)
 	chunk->map_used = 1;
 
 	INIT_LIST_HEAD(&chunk->list);
-	INIT_WORK(&chunk->map_extend_work, pcpu_map_extend_workfn);
+	INIT_LIST_HEAD(&chunk->map_extend_list);
 	chunk->free_size = pcpu_unit_size;
 	chunk->contig_hint = pcpu_unit_size;
 
@@ -895,6 +892,9 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
 		return NULL;
 	}
 
+	if (!is_atomic)
+		mutex_lock(&pcpu_alloc_mutex);
+
 	spin_lock_irqsave(&pcpu_lock, flags);
 
 	/* serve reserved allocations from the reserved chunk if available */
@@ -967,12 +967,9 @@ restart:
 	if (is_atomic)
 		goto fail;
 
-	mutex_lock(&pcpu_alloc_mutex);
-
 	if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) {
 		chunk = pcpu_create_chunk();
 		if (!chunk) {
-			mutex_unlock(&pcpu_alloc_mutex);
 			err = "failed to allocate new chunk";
 			goto fail;
 		}
@@ -983,7 +980,6 @@ restart:
 		spin_lock_irqsave(&pcpu_lock, flags);
 	}
 
-	mutex_unlock(&pcpu_alloc_mutex);
 	goto restart;
 
 area_found:
@@ -993,8 +989,6 @@ area_found:
 	if (!is_atomic) {
 		int page_start, page_end, rs, re;
 
-		mutex_lock(&pcpu_alloc_mutex);
-
 		page_start = PFN_DOWN(off);
 		page_end = PFN_UP(off + size);
 
@@ -1005,7 +999,6 @@ area_found:
 
 			spin_lock_irqsave(&pcpu_lock, flags);
 			if (ret) {
-				mutex_unlock(&pcpu_alloc_mutex);
 				pcpu_free_area(chunk, off, &occ_pages);
 				err = "failed to populate";
 				goto fail_unlock;
@@ -1045,6 +1038,8 @@ fail:
 		/* see the flag handling in pcpu_blance_workfn() */
 		pcpu_atomic_alloc_failed = true;
 		pcpu_schedule_balance_work();
+	} else {
+		mutex_unlock(&pcpu_alloc_mutex);
 	}
 	return NULL;
 }
@@ -1129,6 +1124,7 @@ static void pcpu_balance_workfn(struct work_struct *work)
 		if (chunk == list_first_entry(free_head, struct pcpu_chunk, list))
 			continue;
 
+		list_del_init(&chunk->map_extend_list);
 		list_move(&chunk->list, &to_free);
 	}
 
@@ -1146,6 +1142,25 @@ static void pcpu_balance_workfn(struct work_struct *work)
 		pcpu_destroy_chunk(chunk);
 	}
 
+	/* service chunks which requested async area map extension */
+	do {
+		int new_alloc = 0;
+
+		spin_lock_irq(&pcpu_lock);
+
+		chunk = list_first_entry_or_null(&pcpu_map_extend_chunks,
+					struct pcpu_chunk, map_extend_list);
+		if (chunk) {
+			list_del_init(&chunk->map_extend_list);
+			new_alloc = pcpu_need_to_extend(chunk, false);
+		}
+
+		spin_unlock_irq(&pcpu_lock);
+
+		if (new_alloc)
+			pcpu_extend_area_map(chunk, new_alloc);
+	} while (chunk);
+
 	/*
 	 * Ensure there are certain number of free populated pages for
 	 * atomic allocs.  Fill up from the most packed so that atomic
@@ -1644,7 +1659,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 	 */
 	schunk = memblock_virt_alloc(pcpu_chunk_struct_size, 0);
 	INIT_LIST_HEAD(&schunk->list);
-	INIT_WORK(&schunk->map_extend_work, pcpu_map_extend_workfn);
+	INIT_LIST_HEAD(&schunk->map_extend_list);
 	schunk->base_addr = base_addr;
 	schunk->map = smap;
 	schunk->map_alloc = ARRAY_SIZE(smap);
@@ -1673,7 +1688,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 	if (dyn_size) {
 		dchunk = memblock_virt_alloc(pcpu_chunk_struct_size, 0);
 		INIT_LIST_HEAD(&dchunk->list);
-		INIT_WORK(&dchunk->map_extend_work, pcpu_map_extend_workfn);
+		INIT_LIST_HEAD(&dchunk->map_extend_list);
 		dchunk->base_addr = base_addr;
 		dchunk->map = dmap;
 		dchunk->map_alloc = ARRAY_SIZE(dmap);
diff --git a/mm/shmem.c b/mm/shmem.c
index a36144909b28..24463b67b6ef 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2227,7 +2227,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
 			/* Remove the !PageUptodate pages we added */
 			shmem_undo_range(inode,
 				(loff_t)start << PAGE_SHIFT,
-				(loff_t)index << PAGE_SHIFT, true);
+				((loff_t)index << PAGE_SHIFT) - 1, true);
 			goto undone;
 		}
 
diff --git a/mm/swap.c b/mm/swap.c
index 59f5fafa6e1f..90530ff8ed16 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -242,7 +242,7 @@ void rotate_reclaimable_page(struct page *page)
 		get_page(page);
 		local_irq_save(flags);
 		pvec = this_cpu_ptr(&lru_rotate_pvecs);
-		if (!pagevec_add(pvec, page))
+		if (!pagevec_add(pvec, page) || PageCompound(page))
 			pagevec_move_tail(pvec);
 		local_irq_restore(flags);
 	}
@@ -296,7 +296,7 @@ void activate_page(struct page *page)
 		struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
 
 		get_page(page);
-		if (!pagevec_add(pvec, page))
+		if (!pagevec_add(pvec, page) || PageCompound(page))
 			pagevec_lru_move_fn(pvec, __activate_page, NULL);
 		put_cpu_var(activate_page_pvecs);
 	}
@@ -391,9 +391,8 @@ static void __lru_cache_add(struct page *page)
 	struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
 
 	get_page(page);
-	if (!pagevec_space(pvec))
+	if (!pagevec_add(pvec, page) || PageCompound(page))
 		__pagevec_lru_add(pvec);
-	pagevec_add(pvec, page);
 	put_cpu_var(lru_add_pvec);
 }
 
@@ -628,7 +627,7 @@ void deactivate_file_page(struct page *page)
 	if (likely(get_page_unless_zero(page))) {
 		struct pagevec *pvec = &get_cpu_var(lru_deactivate_file_pvecs);
 
-		if (!pagevec_add(pvec, page))
+		if (!pagevec_add(pvec, page) || PageCompound(page))
 			pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
 		put_cpu_var(lru_deactivate_file_pvecs);
 	}
@@ -648,7 +647,7 @@ void deactivate_page(struct page *page)
 		struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
 
 		get_page(page);
-		if (!pagevec_add(pvec, page))
+		if (!pagevec_add(pvec, page) || PageCompound(page))
 			pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
 		put_cpu_var(lru_deactivate_pvecs);
 	}
diff --git a/net/6lowpan/ndisc.c b/net/6lowpan/ndisc.c
index ae1d4199aa4c..86450b7e2899 100644
--- a/net/6lowpan/ndisc.c
+++ b/net/6lowpan/ndisc.c
@@ -47,6 +47,9 @@ static int lowpan_ndisc_parse_options(const struct net_device *dev,
 				      struct nd_opt_hdr *nd_opt,
 				      struct ndisc_options *ndopts)
 {
+	if (!lowpan_is_ll(dev, LOWPAN_LLTYPE_IEEE802154))
+		return 0;
+
 	switch (nd_opt->nd_opt_type) {
 	case ND_OPT_SOURCE_LL_ADDR:
 	case ND_OPT_TARGET_LL_ADDR:
@@ -94,10 +97,13 @@ static void lowpan_ndisc_802154_update(struct neighbour *n, u32 flags,
 	}
 
 	write_lock_bh(&n->lock);
-	if (lladdr_short)
+	if (lladdr_short) {
 		ieee802154_be16_to_le16(&neigh->short_addr, lladdr_short);
-	else
+		if (!lowpan_802154_is_valid_src_short_addr(neigh->short_addr))
+			neigh->short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC);
+	} else {
 		neigh->short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC);
+	}
 	write_unlock_bh(&n->lock);
 }
 
@@ -135,8 +141,9 @@ static int lowpan_ndisc_opt_addr_space(const struct net_device *dev,
 			read_unlock_bh(&neigh->lock);
 			addr_space += __ndisc_opt_addr_space(IEEE802154_SHORT_ADDR_LEN, 0);
 			*ha = ha_buf;
+		} else {
+			read_unlock_bh(&neigh->lock);
 		}
-		read_unlock_bh(&neigh->lock);
 		break;
 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
 	case NDISC_NEIGHBOUR_SOLICITATION:
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 86ae75b77390..c8f422c90856 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -790,6 +790,8 @@ static const struct net_device_ops vlan_netdev_ops = {
 	.ndo_netpoll_cleanup	= vlan_dev_netpoll_cleanup,
 #endif
 	.ndo_fix_features	= vlan_dev_fix_features,
+	.ndo_neigh_construct	= netdev_default_l2upper_neigh_construct,
+	.ndo_neigh_destroy	= netdev_default_l2upper_neigh_destroy,
 	.ndo_fdb_add		= switchdev_port_fdb_add,
 	.ndo_fdb_del		= switchdev_port_fdb_del,
 	.ndo_fdb_dump		= switchdev_port_fdb_dump,
diff --git a/net/atm/clip.c b/net/atm/clip.c
index e07f551a863c..53b4ac09e7b7 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -286,7 +286,7 @@ static const struct neigh_ops clip_neigh_ops = {
 	.connected_output =	neigh_direct_output,
 };
 
-static int clip_constructor(struct neighbour *neigh)
+static int clip_constructor(struct net_device *dev, struct neighbour *neigh)
 {
 	struct atmarp_entry *entry = neighbour_priv(neigh);
 
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index fbd0acf80b13..2fdebabbfacd 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -976,7 +976,8 @@ static int ax25_release(struct socket *sock)
 			release_sock(sk);
 			ax25_disconnect(ax25, 0);
 			lock_sock(sk);
-			ax25_destroy_socket(ax25);
+			if (!sock_flag(ax25->sk, SOCK_DESTROY))
+				ax25_destroy_socket(ax25);
 			break;
 
 		case AX25_STATE_3:
diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c
index 951cd57bb07d..5237dff6941d 100644
--- a/net/ax25/ax25_ds_timer.c
+++ b/net/ax25/ax25_ds_timer.c
@@ -102,6 +102,7 @@ void ax25_ds_heartbeat_expiry(ax25_cb *ax25)
 	switch (ax25->state) {
 
 	case AX25_STATE_0:
+	case AX25_STATE_2:
 		/* Magic here: If we listen() and a new link dies before it
 		   is accepted() it isn't 'dead' so doesn't get removed. */
 		if (!sk || sock_flag(sk, SOCK_DESTROY) ||
@@ -111,6 +112,7 @@ void ax25_ds_heartbeat_expiry(ax25_cb *ax25)
 				sock_hold(sk);
 				ax25_destroy_socket(ax25);
 				bh_unlock_sock(sk);
+				/* Ungrab socket and destroy it */
 				sock_put(sk);
 			} else
 				ax25_destroy_socket(ax25);
@@ -213,7 +215,8 @@ void ax25_ds_t1_timeout(ax25_cb *ax25)
 	case AX25_STATE_2:
 		if (ax25->n2count == ax25->n2) {
 			ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND);
-			ax25_disconnect(ax25, ETIMEDOUT);
+			if (!sock_flag(ax25->sk, SOCK_DESTROY))
+				ax25_disconnect(ax25, ETIMEDOUT);
 			return;
 		} else {
 			ax25->n2count++;
diff --git a/net/ax25/ax25_std_timer.c b/net/ax25/ax25_std_timer.c
index 004467c9e6e1..2c0d6ef66f9d 100644
--- a/net/ax25/ax25_std_timer.c
+++ b/net/ax25/ax25_std_timer.c
@@ -38,6 +38,7 @@ void ax25_std_heartbeat_expiry(ax25_cb *ax25)
 
 	switch (ax25->state) {
 	case AX25_STATE_0:
+	case AX25_STATE_2:
 		/* Magic here: If we listen() and a new link dies before it
 		   is accepted() it isn't 'dead' so doesn't get removed. */
 		if (!sk || sock_flag(sk, SOCK_DESTROY) ||
@@ -47,6 +48,7 @@ void ax25_std_heartbeat_expiry(ax25_cb *ax25)
 				sock_hold(sk);
 				ax25_destroy_socket(ax25);
 				bh_unlock_sock(sk);
+				/* Ungrab socket and destroy it */
 				sock_put(sk);
 			} else
 				ax25_destroy_socket(ax25);
@@ -144,7 +146,8 @@ void ax25_std_t1timer_expiry(ax25_cb *ax25)
 	case AX25_STATE_2:
 		if (ax25->n2count == ax25->n2) {
 			ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND);
-			ax25_disconnect(ax25, ETIMEDOUT);
+			if (!sock_flag(ax25->sk, SOCK_DESTROY))
+				ax25_disconnect(ax25, ETIMEDOUT);
 			return;
 		} else {
 			ax25->n2count++;
diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c
index 3b78e8473a01..655a7d4c96e1 100644
--- a/net/ax25/ax25_subr.c
+++ b/net/ax25/ax25_subr.c
@@ -264,7 +264,8 @@ void ax25_disconnect(ax25_cb *ax25, int reason)
 {
 	ax25_clear_queues(ax25);
 
-	ax25_stop_heartbeat(ax25);
+	if (!sock_flag(ax25->sk, SOCK_DESTROY))
+		ax25_stop_heartbeat(ax25);
 	ax25_stop_t1timer(ax25);
 	ax25_stop_t2timer(ax25);
 	ax25_stop_t3timer(ax25);
diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig
index f66930ee3c0b..833bb145ba3c 100644
--- a/net/batman-adv/Kconfig
+++ b/net/batman-adv/Kconfig
@@ -66,7 +66,7 @@ config BATMAN_ADV_NC
 
 config BATMAN_ADV_MCAST
 	bool "Multicast optimisation"
-	depends on BATMAN_ADV
+	depends on BATMAN_ADV && INET && !(BRIDGE=m && BATMAN_ADV=y)
 	default n
 	help
 	  This option enables the multicast optimisation which aims to
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index 797cf2fc88c1..a83fc6c58d19 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -17,6 +17,7 @@
 #
 
 obj-$(CONFIG_BATMAN_ADV) += batman-adv.o
+batman-adv-y += bat_algo.o
 batman-adv-y += bat_iv_ogm.o
 batman-adv-$(CONFIG_BATMAN_ADV_BATMAN_V) += bat_v.o
 batman-adv-$(CONFIG_BATMAN_ADV_BATMAN_V) += bat_v_elp.o
@@ -31,12 +32,16 @@ batman-adv-y += gateway_common.o
 batman-adv-y += hard-interface.o
 batman-adv-y += hash.o
 batman-adv-y += icmp_socket.o
+batman-adv-$(CONFIG_BATMAN_ADV_DEBUG) += log.o
 batman-adv-y += main.o
 batman-adv-$(CONFIG_BATMAN_ADV_MCAST) += multicast.o
+batman-adv-y += netlink.o
 batman-adv-$(CONFIG_BATMAN_ADV_NC) += network-coding.o
 batman-adv-y += originator.o
 batman-adv-y += routing.o
 batman-adv-y += send.o
 batman-adv-y += soft-interface.o
 batman-adv-y += sysfs.o
+batman-adv-y += tp_meter.o
 batman-adv-y += translation-table.o
+batman-adv-y += tvlv.o
diff --git a/net/batman-adv/bat_algo.c b/net/batman-adv/bat_algo.c
new file mode 100644
index 000000000000..81dbbf569bd4
--- /dev/null
+++ b/net/batman-adv/bat_algo.c
@@ -0,0 +1,140 @@
+/* Copyright (C) 2007-2016  B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Simon Wunderlich
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "main.h"
+
+#include <linux/errno.h>
+#include <linux/list.h>
+#include <linux/moduleparam.h>
+#include <linux/printk.h>
+#include <linux/seq_file.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+
+#include "bat_algo.h"
+
+char batadv_routing_algo[20] = "BATMAN_IV";
+static struct hlist_head batadv_algo_list;
+
+/**
+ * batadv_algo_init - Initialize batman-adv algorithm management data structures
+ */
+void batadv_algo_init(void)
+{
+	INIT_HLIST_HEAD(&batadv_algo_list);
+}
+
+static struct batadv_algo_ops *batadv_algo_get(char *name)
+{
+	struct batadv_algo_ops *bat_algo_ops = NULL, *bat_algo_ops_tmp;
+
+	hlist_for_each_entry(bat_algo_ops_tmp, &batadv_algo_list, list) {
+		if (strcmp(bat_algo_ops_tmp->name, name) != 0)
+			continue;
+
+		bat_algo_ops = bat_algo_ops_tmp;
+		break;
+	}
+
+	return bat_algo_ops;
+}
+
+int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops)
+{
+	struct batadv_algo_ops *bat_algo_ops_tmp;
+
+	bat_algo_ops_tmp = batadv_algo_get(bat_algo_ops->name);
+	if (bat_algo_ops_tmp) {
+		pr_info("Trying to register already registered routing algorithm: %s\n",
+			bat_algo_ops->name);
+		return -EEXIST;
+	}
+
+	/* all algorithms must implement all ops (for now) */
+	if (!bat_algo_ops->iface.enable ||
+	    !bat_algo_ops->iface.disable ||
+	    !bat_algo_ops->iface.update_mac ||
+	    !bat_algo_ops->iface.primary_set ||
+	    !bat_algo_ops->neigh.cmp ||
+	    !bat_algo_ops->neigh.is_similar_or_better) {
+		pr_info("Routing algo '%s' does not implement required ops\n",
+			bat_algo_ops->name);
+		return -EINVAL;
+	}
+
+	INIT_HLIST_NODE(&bat_algo_ops->list);
+	hlist_add_head(&bat_algo_ops->list, &batadv_algo_list);
+
+	return 0;
+}
+
+int batadv_algo_select(struct batadv_priv *bat_priv, char *name)
+{
+	struct batadv_algo_ops *bat_algo_ops;
+
+	bat_algo_ops = batadv_algo_get(name);
+	if (!bat_algo_ops)
+		return -EINVAL;
+
+	bat_priv->algo_ops = bat_algo_ops;
+
+	return 0;
+}
+
+int batadv_algo_seq_print_text(struct seq_file *seq, void *offset)
+{
+	struct batadv_algo_ops *bat_algo_ops;
+
+	seq_puts(seq, "Available routing algorithms:\n");
+
+	hlist_for_each_entry(bat_algo_ops, &batadv_algo_list, list) {
+		seq_printf(seq, " * %s\n", bat_algo_ops->name);
+	}
+
+	return 0;
+}
+
+static int batadv_param_set_ra(const char *val, const struct kernel_param *kp)
+{
+	struct batadv_algo_ops *bat_algo_ops;
+	char *algo_name = (char *)val;
+	size_t name_len = strlen(algo_name);
+
+	if (name_len > 0 && algo_name[name_len - 1] == '\n')
+		algo_name[name_len - 1] = '\0';
+
+	bat_algo_ops = batadv_algo_get(algo_name);
+	if (!bat_algo_ops) {
+		pr_err("Routing algorithm '%s' is not supported\n", algo_name);
+		return -EINVAL;
+	}
+
+	return param_set_copystring(algo_name, kp);
+}
+
+static const struct kernel_param_ops batadv_param_ops_ra = {
+	.set = batadv_param_set_ra,
+	.get = param_get_string,
+};
+
+static struct kparam_string batadv_param_string_ra = {
+	.maxlen = sizeof(batadv_routing_algo),
+	.string = batadv_routing_algo,
+};
+
+module_param_cb(routing_algo, &batadv_param_ops_ra, &batadv_param_string_ra,
+		0644);
diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h
index 03dafd33d23b..860d773dd8fa 100644
--- a/net/batman-adv/bat_algo.h
+++ b/net/batman-adv/bat_algo.h
@@ -18,32 +18,18 @@
 #ifndef _NET_BATMAN_ADV_BAT_ALGO_H_
 #define _NET_BATMAN_ADV_BAT_ALGO_H_
 
-struct batadv_priv;
+#include "main.h"
 
-int batadv_iv_init(void);
+#include <linux/types.h>
 
-#ifdef CONFIG_BATMAN_ADV_BATMAN_V
+struct seq_file;
 
-int batadv_v_init(void);
-int batadv_v_mesh_init(struct batadv_priv *bat_priv);
-void batadv_v_mesh_free(struct batadv_priv *bat_priv);
+extern char batadv_routing_algo[];
+extern struct list_head batadv_hardif_list;
 
-#else
-
-static inline int batadv_v_init(void)
-{
-	return 0;
-}
-
-static inline int batadv_v_mesh_init(struct batadv_priv *bat_priv)
-{
-	return 0;
-}
-
-static inline void batadv_v_mesh_free(struct batadv_priv *bat_priv)
-{
-}
-
-#endif /* CONFIG_BATMAN_ADV_BATMAN_V */
+void batadv_algo_init(void);
+int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops);
+int batadv_algo_select(struct batadv_priv *bat_priv, char *name);
+int batadv_algo_seq_print_text(struct seq_file *seq, void *offset);
 
 #endif /* _NET_BATMAN_ADV_BAT_ALGO_H_ */
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index ce2f203048d3..19b0abd6c640 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -15,7 +15,7 @@
  * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#include "bat_algo.h"
+#include "bat_iv_ogm.h"
 #include "main.h"
 
 #include <linux/atomic.h>
@@ -30,8 +30,9 @@
 #include <linux/if_ether.h>
 #include <linux/init.h>
 #include <linux/jiffies.h>
-#include <linux/list.h>
+#include <linux/kernel.h>
 #include <linux/kref.h>
+#include <linux/list.h>
 #include <linux/lockdep.h>
 #include <linux/netdevice.h>
 #include <linux/pkt_sched.h>
@@ -48,15 +49,20 @@
 #include <linux/types.h>
 #include <linux/workqueue.h>
 
+#include "bat_algo.h"
 #include "bitarray.h"
 #include "hard-interface.h"
 #include "hash.h"
+#include "log.h"
 #include "network-coding.h"
 #include "originator.h"
 #include "packet.h"
 #include "routing.h"
 #include "send.h"
 #include "translation-table.h"
+#include "tvlv.h"
+
+static void batadv_iv_send_outstanding_bat_ogm_packet(struct work_struct *work);
 
 /**
  * enum batadv_dup_status - duplicate status
@@ -336,7 +342,8 @@ batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface,
 {
 	struct batadv_neigh_node *neigh_node;
 
-	neigh_node = batadv_neigh_node_new(orig_node, hard_iface, neigh_addr);
+	neigh_node = batadv_neigh_node_get_or_create(orig_node,
+						     hard_iface, neigh_addr);
 	if (!neigh_node)
 		goto out;
 
@@ -730,7 +737,7 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
 
 	/* start timer for this packet */
 	INIT_DELAYED_WORK(&forw_packet_aggr->delayed_work,
-			  batadv_send_outstanding_bat_ogm_packet);
+			  batadv_iv_send_outstanding_bat_ogm_packet);
 	queue_delayed_work(batadv_event_workqueue,
 			   &forw_packet_aggr->delayed_work,
 			   send_time - jiffies);
@@ -937,6 +944,19 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
 	u16 tvlv_len = 0;
 	unsigned long send_time;
 
+	if ((hard_iface->if_status == BATADV_IF_NOT_IN_USE) ||
+	    (hard_iface->if_status == BATADV_IF_TO_BE_REMOVED))
+		return;
+
+	/* the interface gets activated here to avoid race conditions between
+	 * the moment of activating the interface in
+	 * hardif_activate_interface() where the originator mac is set and
+	 * outdated packets (especially uninitialized mac addresses) in the
+	 * packet queue
+	 */
+	if (hard_iface->if_status == BATADV_IF_TO_BE_ACTIVATED)
+		hard_iface->if_status = BATADV_IF_ACTIVE;
+
 	primary_if = batadv_primary_if_get_selected(bat_priv);
 
 	if (hard_iface == primary_if) {
@@ -1778,6 +1798,45 @@ static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset,
 	batadv_orig_node_put(orig_node);
 }
 
+static void batadv_iv_send_outstanding_bat_ogm_packet(struct work_struct *work)
+{
+	struct delayed_work *delayed_work;
+	struct batadv_forw_packet *forw_packet;
+	struct batadv_priv *bat_priv;
+
+	delayed_work = to_delayed_work(work);
+	forw_packet = container_of(delayed_work, struct batadv_forw_packet,
+				   delayed_work);
+	bat_priv = netdev_priv(forw_packet->if_incoming->soft_iface);
+	spin_lock_bh(&bat_priv->forw_bat_list_lock);
+	hlist_del(&forw_packet->list);
+	spin_unlock_bh(&bat_priv->forw_bat_list_lock);
+
+	if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING)
+		goto out;
+
+	batadv_iv_ogm_emit(forw_packet);
+
+	/* we have to have at least one packet in the queue to determine the
+	 * queues wake up time unless we are shutting down.
+	 *
+	 * only re-schedule if this is the "original" copy, e.g. the OGM of the
+	 * primary interface should only be rescheduled once per period, but
+	 * this function will be called for the forw_packet instances of the
+	 * other secondary interfaces as well.
+	 */
+	if (forw_packet->own &&
+	    forw_packet->if_incoming == forw_packet->if_outgoing)
+		batadv_iv_ogm_schedule(forw_packet->if_incoming);
+
+out:
+	/* don't count own packet */
+	if (!forw_packet->own)
+		atomic_inc(&bat_priv->batman_queue_left);
+
+	batadv_forw_packet_free(forw_packet);
+}
+
 static int batadv_iv_ogm_receive(struct sk_buff *skb,
 				 struct batadv_hard_iface *if_incoming)
 {
@@ -1794,7 +1853,7 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb,
 	/* did we receive a B.A.T.M.A.N. IV OGM packet on an interface
 	 * that does not have B.A.T.M.A.N. IV enabled ?
 	 */
-	if (bat_priv->bat_algo_ops->bat_ogm_emit != batadv_iv_ogm_emit)
+	if (bat_priv->algo_ops->iface.enable != batadv_iv_ogm_iface_enable)
 		return NET_RX_DROP;
 
 	batadv_inc_counter(bat_priv, BATADV_CNT_MGMT_RX);
@@ -2052,21 +2111,32 @@ out:
 	return ret;
 }
 
+static void batadv_iv_iface_activate(struct batadv_hard_iface *hard_iface)
+{
+	/* begin scheduling originator messages on that interface */
+	batadv_iv_ogm_schedule(hard_iface);
+}
+
 static struct batadv_algo_ops batadv_batman_iv __read_mostly = {
 	.name = "BATMAN_IV",
-	.bat_iface_enable = batadv_iv_ogm_iface_enable,
-	.bat_iface_disable = batadv_iv_ogm_iface_disable,
-	.bat_iface_update_mac = batadv_iv_ogm_iface_update_mac,
-	.bat_primary_iface_set = batadv_iv_ogm_primary_iface_set,
-	.bat_ogm_schedule = batadv_iv_ogm_schedule,
-	.bat_ogm_emit = batadv_iv_ogm_emit,
-	.bat_neigh_cmp = batadv_iv_ogm_neigh_cmp,
-	.bat_neigh_is_similar_or_better = batadv_iv_ogm_neigh_is_sob,
-	.bat_neigh_print = batadv_iv_neigh_print,
-	.bat_orig_print = batadv_iv_ogm_orig_print,
-	.bat_orig_free = batadv_iv_ogm_orig_free,
-	.bat_orig_add_if = batadv_iv_ogm_orig_add_if,
-	.bat_orig_del_if = batadv_iv_ogm_orig_del_if,
+	.iface = {
+		.activate = batadv_iv_iface_activate,
+		.enable = batadv_iv_ogm_iface_enable,
+		.disable = batadv_iv_ogm_iface_disable,
+		.update_mac = batadv_iv_ogm_iface_update_mac,
+		.primary_set = batadv_iv_ogm_primary_iface_set,
+	},
+	.neigh = {
+		.cmp = batadv_iv_ogm_neigh_cmp,
+		.is_similar_or_better = batadv_iv_ogm_neigh_is_sob,
+		.print = batadv_iv_neigh_print,
+	},
+	.orig = {
+		.print = batadv_iv_ogm_orig_print,
+		.free = batadv_iv_ogm_orig_free,
+		.add_if = batadv_iv_ogm_orig_add_if,
+		.del_if = batadv_iv_ogm_orig_del_if,
+	},
 };
 
 int __init batadv_iv_init(void)
diff --git a/net/batman-adv/bat_iv_ogm.h b/net/batman-adv/bat_iv_ogm.h
new file mode 100644
index 000000000000..b9f3550faaf7
--- /dev/null
+++ b/net/batman-adv/bat_iv_ogm.h
@@ -0,0 +1,25 @@
+/* Copyright (C) 2007-2016  B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Simon Wunderlich
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _BATMAN_ADV_BATADV_IV_OGM_H_
+#define _BATMAN_ADV_BATADV_IV_OGM_H_
+
+#include "main.h"
+
+int batadv_iv_init(void);
+
+#endif /* _BATMAN_ADV_BATADV_IV_OGM_H_ */
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index 0a12e5cdd65d..0366cbf5e444 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -15,7 +15,7 @@
  * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#include "bat_algo.h"
+#include "bat_v.h"
 #include "main.h"
 
 #include <linux/atomic.h>
@@ -31,6 +31,7 @@
 #include <linux/types.h>
 #include <linux/workqueue.h>
 
+#include "bat_algo.h"
 #include "bat_v_elp.h"
 #include "bat_v_ogm.h"
 #include "hard-interface.h"
@@ -70,11 +71,6 @@ static int batadv_v_iface_enable(struct batadv_hard_iface *hard_iface)
 	if (ret < 0)
 		batadv_v_elp_iface_disable(hard_iface);
 
-	/* enable link throughput auto-detection by setting the throughput
-	 * override to zero
-	 */
-	atomic_set(&hard_iface->bat_v.throughput_override, 0);
-
 	return ret;
 }
 
@@ -119,14 +115,6 @@ batadv_v_hardif_neigh_init(struct batadv_hardif_neigh_node *hardif_neigh)
 		  batadv_v_elp_throughput_metric_update);
 }
 
-static void batadv_v_ogm_schedule(struct batadv_hard_iface *hard_iface)
-{
-}
-
-static void batadv_v_ogm_emit(struct batadv_forw_packet *forw_packet)
-{
-}
-
 /**
  * batadv_v_orig_print_neigh - print neighbors for the originator table
  * @orig_node: the orig_node for which the neighbors are printed
@@ -334,21 +322,39 @@ err_ifinfo1:
 
 static struct batadv_algo_ops batadv_batman_v __read_mostly = {
 	.name = "BATMAN_V",
-	.bat_iface_activate = batadv_v_iface_activate,
-	.bat_iface_enable = batadv_v_iface_enable,
-	.bat_iface_disable = batadv_v_iface_disable,
-	.bat_iface_update_mac = batadv_v_iface_update_mac,
-	.bat_primary_iface_set = batadv_v_primary_iface_set,
-	.bat_hardif_neigh_init = batadv_v_hardif_neigh_init,
-	.bat_ogm_emit = batadv_v_ogm_emit,
-	.bat_ogm_schedule = batadv_v_ogm_schedule,
-	.bat_orig_print = batadv_v_orig_print,
-	.bat_neigh_cmp = batadv_v_neigh_cmp,
-	.bat_neigh_is_similar_or_better = batadv_v_neigh_is_sob,
-	.bat_neigh_print = batadv_v_neigh_print,
+	.iface = {
+		.activate = batadv_v_iface_activate,
+		.enable = batadv_v_iface_enable,
+		.disable = batadv_v_iface_disable,
+		.update_mac = batadv_v_iface_update_mac,
+		.primary_set = batadv_v_primary_iface_set,
+	},
+	.neigh = {
+		.hardif_init = batadv_v_hardif_neigh_init,
+		.cmp = batadv_v_neigh_cmp,
+		.is_similar_or_better = batadv_v_neigh_is_sob,
+		.print = batadv_v_neigh_print,
+	},
+	.orig = {
+		.print = batadv_v_orig_print,
+	},
 };
 
 /**
+ * batadv_v_hardif_init - initialize the algorithm specific fields in the
+ *  hard-interface object
+ * @hard_iface: the hard-interface to initialize
+ */
+void batadv_v_hardif_init(struct batadv_hard_iface *hard_iface)
+{
+	/* enable link throughput auto-detection by setting the throughput
+	 * override to zero
+	 */
+	atomic_set(&hard_iface->bat_v.throughput_override, 0);
+	atomic_set(&hard_iface->bat_v.elp_interval, 500);
+}
+
+/**
  * batadv_v_mesh_init - initialize the B.A.T.M.A.N. V private resources for a
  *  mesh
  * @bat_priv: the object representing the mesh interface to initialise
diff --git a/net/batman-adv/bat_v.h b/net/batman-adv/bat_v.h
new file mode 100644
index 000000000000..83b77639729e
--- /dev/null
+++ b/net/batman-adv/bat_v.h
@@ -0,0 +1,52 @@
+/* Copyright (C) 2011-2016  B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Linus Lüssing
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _NET_BATMAN_ADV_BAT_V_H_
+#define _NET_BATMAN_ADV_BAT_V_H_
+
+#include "main.h"
+
+#ifdef CONFIG_BATMAN_ADV_BATMAN_V
+
+int batadv_v_init(void);
+void batadv_v_hardif_init(struct batadv_hard_iface *hardif);
+int batadv_v_mesh_init(struct batadv_priv *bat_priv);
+void batadv_v_mesh_free(struct batadv_priv *bat_priv);
+
+#else
+
+static inline int batadv_v_init(void)
+{
+	return 0;
+}
+
+static inline void batadv_v_hardif_init(struct batadv_hard_iface *hardif)
+{
+}
+
+static inline int batadv_v_mesh_init(struct batadv_priv *bat_priv)
+{
+	return 0;
+}
+
+static inline void batadv_v_mesh_free(struct batadv_priv *bat_priv)
+{
+}
+
+#endif /* CONFIG_BATMAN_ADV_BATMAN_V */
+
+#endif /* _NET_BATMAN_ADV_BAT_V_H_ */
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index df42eb1365a0..7d170010beb9 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -43,6 +43,7 @@
 #include "bat_algo.h"
 #include "bat_v_ogm.h"
 #include "hard-interface.h"
+#include "log.h"
 #include "originator.h"
 #include "packet.h"
 #include "routing.h"
@@ -344,7 +345,6 @@ int batadv_v_elp_iface_enable(struct batadv_hard_iface *hard_iface)
 	/* randomize initial seqno to avoid collision */
 	get_random_bytes(&random_seqno, sizeof(random_seqno));
 	atomic_set(&hard_iface->bat_v.elp_seqno, random_seqno);
-	atomic_set(&hard_iface->bat_v.elp_interval, 500);
 
 	/* assume full-duplex by default */
 	hard_iface->bat_v.flags |= BATADV_FULL_DUPLEX;
@@ -443,7 +443,8 @@ static void batadv_v_elp_neigh_update(struct batadv_priv *bat_priv,
 	if (!orig_neigh)
 		return;
 
-	neigh = batadv_neigh_node_new(orig_neigh, if_incoming, neigh_addr);
+	neigh = batadv_neigh_node_get_or_create(orig_neigh,
+						if_incoming, neigh_addr);
 	if (!neigh)
 		goto orig_free;
 
@@ -503,7 +504,7 @@ int batadv_v_elp_packet_recv(struct sk_buff *skb,
 	/* did we receive a B.A.T.M.A.N. V ELP packet on an interface
 	 * that does not have B.A.T.M.A.N. V ELP enabled ?
 	 */
-	if (strcmp(bat_priv->bat_algo_ops->name, "BATMAN_V") != 0)
+	if (strcmp(bat_priv->algo_ops->name, "BATMAN_V") != 0)
 		return NET_RX_DROP;
 
 	elp_packet = (struct batadv_elp_packet *)skb->data;
diff --git a/net/batman-adv/bat_v_elp.h b/net/batman-adv/bat_v_elp.h
index cc130b2d05e5..be17c0b1369e 100644
--- a/net/batman-adv/bat_v_elp.h
+++ b/net/batman-adv/bat_v_elp.h
@@ -15,11 +15,11 @@
  * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#include "main.h"
-
 #ifndef _NET_BATMAN_ADV_BAT_V_ELP_H_
 #define _NET_BATMAN_ADV_BAT_V_ELP_H_
 
+#include "main.h"
+
 struct sk_buff;
 struct work_struct;
 
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index 473ebb9a0e73..6fbba4eb0617 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -39,13 +39,16 @@
 #include <linux/types.h>
 #include <linux/workqueue.h>
 
+#include "bat_algo.h"
 #include "hard-interface.h"
 #include "hash.h"
+#include "log.h"
 #include "originator.h"
 #include "packet.h"
 #include "routing.h"
 #include "send.h"
 #include "translation-table.h"
+#include "tvlv.h"
 
 /**
  * batadv_v_ogm_orig_get - retrieve and possibly create an originator node
@@ -683,8 +686,8 @@ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset,
 	if (!orig_node)
 		return;
 
-	neigh_node = batadv_neigh_node_new(orig_node, if_incoming,
-					   ethhdr->h_source);
+	neigh_node = batadv_neigh_node_get_or_create(orig_node, if_incoming,
+						     ethhdr->h_source);
 	if (!neigh_node)
 		goto out;
 
@@ -751,7 +754,7 @@ int batadv_v_ogm_packet_recv(struct sk_buff *skb,
 	/* did we receive a OGM2 packet on an interface that does not have
 	 * B.A.T.M.A.N. V enabled ?
 	 */
-	if (strcmp(bat_priv->bat_algo_ops->name, "BATMAN_V") != 0)
+	if (strcmp(bat_priv->algo_ops->name, "BATMAN_V") != 0)
 		return NET_RX_DROP;
 
 	if (!batadv_check_management_packet(skb, if_incoming, BATADV_OGM2_HLEN))
diff --git a/net/batman-adv/bat_v_ogm.h b/net/batman-adv/bat_v_ogm.h
index d849c75ada0e..4c4d45caa422 100644
--- a/net/batman-adv/bat_v_ogm.h
+++ b/net/batman-adv/bat_v_ogm.h
@@ -18,10 +18,10 @@
 #ifndef _BATMAN_ADV_BATADV_V_OGM_H_
 #define _BATMAN_ADV_BATADV_V_OGM_H_
 
+#include "main.h"
+
 #include <linux/types.h>
 
-struct batadv_hard_iface;
-struct batadv_priv;
 struct sk_buff;
 
 int batadv_v_ogm_init(struct batadv_priv *bat_priv);
diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c
index a0c7913837a5..032271421a20 100644
--- a/net/batman-adv/bitarray.c
+++ b/net/batman-adv/bitarray.c
@@ -20,6 +20,8 @@
 
 #include <linux/bitmap.h>
 
+#include "log.h"
+
 /* shift the packet array by n places. */
 static void batadv_bitmap_shift_left(unsigned long *seq_bits, s32 n)
 {
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 748a9ead7ce5..e4f7494fb974 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -48,6 +48,7 @@
 
 #include "hard-interface.h"
 #include "hash.h"
+#include "log.h"
 #include "originator.h"
 #include "packet.h"
 #include "sysfs.h"
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index 952900466d88..1d68b6e63b96 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -18,245 +18,33 @@
 #include "debugfs.h"
 #include "main.h"
 
-#include <linux/compiler.h>
 #include <linux/debugfs.h>
 #include <linux/device.h>
 #include <linux/errno.h>
 #include <linux/export.h>
-#include <linux/fcntl.h>
 #include <linux/fs.h>
-#include <linux/jiffies.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
 #include <linux/netdevice.h>
-#include <linux/poll.h>
 #include <linux/printk.h>
 #include <linux/sched.h> /* for linux/wait.h */
 #include <linux/seq_file.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
 #include <linux/stat.h>
 #include <linux/stddef.h>
 #include <linux/stringify.h>
 #include <linux/sysfs.h>
-#include <linux/types.h>
-#include <linux/uaccess.h>
-#include <linux/wait.h>
-#include <stdarg.h>
 
+#include "bat_algo.h"
 #include "bridge_loop_avoidance.h"
 #include "distributed-arp-table.h"
 #include "gateway_client.h"
 #include "icmp_socket.h"
+#include "log.h"
+#include "multicast.h"
 #include "network-coding.h"
 #include "originator.h"
 #include "translation-table.h"
 
 static struct dentry *batadv_debugfs;
 
-#ifdef CONFIG_BATMAN_ADV_DEBUG
-#define BATADV_LOG_BUFF_MASK (batadv_log_buff_len - 1)
-
-static const int batadv_log_buff_len = BATADV_LOG_BUF_LEN;
-
-static char *batadv_log_char_addr(struct batadv_priv_debug_log *debug_log,
-				  size_t idx)
-{
-	return &debug_log->log_buff[idx & BATADV_LOG_BUFF_MASK];
-}
-
-static void batadv_emit_log_char(struct batadv_priv_debug_log *debug_log,
-				 char c)
-{
-	char *char_addr;
-
-	char_addr = batadv_log_char_addr(debug_log, debug_log->log_end);
-	*char_addr = c;
-	debug_log->log_end++;
-
-	if (debug_log->log_end - debug_log->log_start > batadv_log_buff_len)
-		debug_log->log_start = debug_log->log_end - batadv_log_buff_len;
-}
-
-__printf(2, 3)
-static int batadv_fdebug_log(struct batadv_priv_debug_log *debug_log,
-			     const char *fmt, ...)
-{
-	va_list args;
-	static char debug_log_buf[256];
-	char *p;
-
-	if (!debug_log)
-		return 0;
-
-	spin_lock_bh(&debug_log->lock);
-	va_start(args, fmt);
-	vscnprintf(debug_log_buf, sizeof(debug_log_buf), fmt, args);
-	va_end(args);
-
-	for (p = debug_log_buf; *p != 0; p++)
-		batadv_emit_log_char(debug_log, *p);
-
-	spin_unlock_bh(&debug_log->lock);
-
-	wake_up(&debug_log->queue_wait);
-
-	return 0;
-}
-
-int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...)
-{
-	va_list args;
-	char tmp_log_buf[256];
-
-	va_start(args, fmt);
-	vscnprintf(tmp_log_buf, sizeof(tmp_log_buf), fmt, args);
-	batadv_fdebug_log(bat_priv->debug_log, "[%10u] %s",
-			  jiffies_to_msecs(jiffies), tmp_log_buf);
-	va_end(args);
-
-	return 0;
-}
-
-static int batadv_log_open(struct inode *inode, struct file *file)
-{
-	if (!try_module_get(THIS_MODULE))
-		return -EBUSY;
-
-	nonseekable_open(inode, file);
-	file->private_data = inode->i_private;
-	return 0;
-}
-
-static int batadv_log_release(struct inode *inode, struct file *file)
-{
-	module_put(THIS_MODULE);
-	return 0;
-}
-
-static bool batadv_log_empty(struct batadv_priv_debug_log *debug_log)
-{
-	return !(debug_log->log_start - debug_log->log_end);
-}
-
-static ssize_t batadv_log_read(struct file *file, char __user *buf,
-			       size_t count, loff_t *ppos)
-{
-	struct batadv_priv *bat_priv = file->private_data;
-	struct batadv_priv_debug_log *debug_log = bat_priv->debug_log;
-	int error, i = 0;
-	char *char_addr;
-	char c;
-
-	if ((file->f_flags & O_NONBLOCK) && batadv_log_empty(debug_log))
-		return -EAGAIN;
-
-	if (!buf)
-		return -EINVAL;
-
-	if (count == 0)
-		return 0;
-
-	if (!access_ok(VERIFY_WRITE, buf, count))
-		return -EFAULT;
-
-	error = wait_event_interruptible(debug_log->queue_wait,
-					 (!batadv_log_empty(debug_log)));
-
-	if (error)
-		return error;
-
-	spin_lock_bh(&debug_log->lock);
-
-	while ((!error) && (i < count) &&
-	       (debug_log->log_start != debug_log->log_end)) {
-		char_addr = batadv_log_char_addr(debug_log,
-						 debug_log->log_start);
-		c = *char_addr;
-
-		debug_log->log_start++;
-
-		spin_unlock_bh(&debug_log->lock);
-
-		error = __put_user(c, buf);
-
-		spin_lock_bh(&debug_log->lock);
-
-		buf++;
-		i++;
-	}
-
-	spin_unlock_bh(&debug_log->lock);
-
-	if (!error)
-		return i;
-
-	return error;
-}
-
-static unsigned int batadv_log_poll(struct file *file, poll_table *wait)
-{
-	struct batadv_priv *bat_priv = file->private_data;
-	struct batadv_priv_debug_log *debug_log = bat_priv->debug_log;
-
-	poll_wait(file, &debug_log->queue_wait, wait);
-
-	if (!batadv_log_empty(debug_log))
-		return POLLIN | POLLRDNORM;
-
-	return 0;
-}
-
-static const struct file_operations batadv_log_fops = {
-	.open           = batadv_log_open,
-	.release        = batadv_log_release,
-	.read           = batadv_log_read,
-	.poll           = batadv_log_poll,
-	.llseek         = no_llseek,
-};
-
-static int batadv_debug_log_setup(struct batadv_priv *bat_priv)
-{
-	struct dentry *d;
-
-	if (!bat_priv->debug_dir)
-		goto err;
-
-	bat_priv->debug_log = kzalloc(sizeof(*bat_priv->debug_log), GFP_ATOMIC);
-	if (!bat_priv->debug_log)
-		goto err;
-
-	spin_lock_init(&bat_priv->debug_log->lock);
-	init_waitqueue_head(&bat_priv->debug_log->queue_wait);
-
-	d = debugfs_create_file("log", S_IFREG | S_IRUSR,
-				bat_priv->debug_dir, bat_priv,
-				&batadv_log_fops);
-	if (!d)
-		goto err;
-
-	return 0;
-
-err:
-	return -ENOMEM;
-}
-
-static void batadv_debug_log_cleanup(struct batadv_priv *bat_priv)
-{
-	kfree(bat_priv->debug_log);
-	bat_priv->debug_log = NULL;
-}
-#else /* CONFIG_BATMAN_ADV_DEBUG */
-static int batadv_debug_log_setup(struct batadv_priv *bat_priv)
-{
-	return 0;
-}
-
-static void batadv_debug_log_cleanup(struct batadv_priv *bat_priv)
-{
-}
-#endif
-
 static int batadv_algorithms_open(struct inode *inode, struct file *file)
 {
 	return single_open(file, batadv_algo_seq_print_text, NULL);
@@ -363,6 +151,22 @@ static int batadv_nc_nodes_open(struct inode *inode, struct file *file)
 }
 #endif
 
+#ifdef CONFIG_BATMAN_ADV_MCAST
+/**
+ * batadv_mcast_flags_open - prepare file handler for reads from mcast_flags
+ * @inode: inode which was opened
+ * @file: file handle to be initialized
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+static int batadv_mcast_flags_open(struct inode *inode, struct file *file)
+{
+	struct net_device *net_dev = (struct net_device *)inode->i_private;
+
+	return single_open(file, batadv_mcast_flags_seq_print_text, net_dev);
+}
+#endif
+
 #define BATADV_DEBUGINFO(_name, _mode, _open)		\
 struct batadv_debuginfo batadv_debuginfo_##_name = {	\
 	.attr = {					\
@@ -407,6 +211,9 @@ static BATADV_DEBUGINFO(transtable_local, S_IRUGO,
 #ifdef CONFIG_BATMAN_ADV_NC
 static BATADV_DEBUGINFO(nc_nodes, S_IRUGO, batadv_nc_nodes_open);
 #endif
+#ifdef CONFIG_BATMAN_ADV_MCAST
+static BATADV_DEBUGINFO(mcast_flags, S_IRUGO, batadv_mcast_flags_open);
+#endif
 
 static struct batadv_debuginfo *batadv_mesh_debuginfos[] = {
 	&batadv_debuginfo_neighbors,
@@ -424,6 +231,9 @@ static struct batadv_debuginfo *batadv_mesh_debuginfos[] = {
 #ifdef CONFIG_BATMAN_ADV_NC
 	&batadv_debuginfo_nc_nodes,
 #endif
+#ifdef CONFIG_BATMAN_ADV_MCAST
+	&batadv_debuginfo_mcast_flags,
+#endif
 	NULL,
 };
 
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 278800a99c69..fa7646532a13 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -45,9 +45,11 @@
 
 #include "hard-interface.h"
 #include "hash.h"
+#include "log.h"
 #include "originator.h"
 #include "send.h"
 #include "translation-table.h"
+#include "tvlv.h"
 
 static void batadv_dat_purge(struct work_struct *work);
 
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 65536db1bff7..0934730fb7ff 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -27,7 +27,6 @@
 #include <linux/kernel.h>
 #include <linux/lockdep.h>
 #include <linux/netdevice.h>
-#include <linux/pkt_sched.h>
 #include <linux/skbuff.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
@@ -414,7 +413,7 @@ static struct sk_buff *batadv_frag_create(struct sk_buff *skb,
 	if (!skb_fragment)
 		goto err;
 
-	skb->priority = TC_PRIO_CONTROL;
+	skb_fragment->priority = skb->priority;
 
 	/* Eat the last mtu-bytes of the skb */
 	skb_reserve(skb_fragment, header_size + ETH_HLEN);
@@ -434,11 +433,12 @@ err:
  * @orig_node: final destination of the created fragments
  * @neigh_node: next-hop of the created fragments
  *
- * Return: true on success, false otherwise.
+ * Return: the netdev tx status or -1 in case of error.
+ * When -1 is returned the skb is not consumed.
  */
-bool batadv_frag_send_packet(struct sk_buff *skb,
-			     struct batadv_orig_node *orig_node,
-			     struct batadv_neigh_node *neigh_node)
+int batadv_frag_send_packet(struct sk_buff *skb,
+			    struct batadv_orig_node *orig_node,
+			    struct batadv_neigh_node *neigh_node)
 {
 	struct batadv_priv *bat_priv;
 	struct batadv_hard_iface *primary_if = NULL;
@@ -447,7 +447,7 @@ bool batadv_frag_send_packet(struct sk_buff *skb,
 	unsigned int mtu = neigh_node->if_incoming->net_dev->mtu;
 	unsigned int header_size = sizeof(frag_header);
 	unsigned int max_fragment_size, max_packet_size;
-	bool ret = false;
+	int ret = -1;
 
 	/* To avoid merge and refragmentation at next-hops we never send
 	 * fragments larger than BATADV_FRAG_MAX_FRAG_SIZE
@@ -458,12 +458,12 @@ bool batadv_frag_send_packet(struct sk_buff *skb,
 
 	/* Don't even try to fragment, if we need more than 16 fragments */
 	if (skb->len > max_packet_size)
-		goto out_err;
+		goto out;
 
 	bat_priv = orig_node->bat_priv;
 	primary_if = batadv_primary_if_get_selected(bat_priv);
 	if (!primary_if)
-		goto out_err;
+		goto out;
 
 	/* Create one header to be copied to all fragments */
 	frag_header.packet_type = BATADV_UNICAST_FRAG;
@@ -473,6 +473,15 @@ bool batadv_frag_send_packet(struct sk_buff *skb,
 	frag_header.reserved = 0;
 	frag_header.no = 0;
 	frag_header.total_size = htons(skb->len);
+
+	/* skb->priority values from 256->263 are magic values to
+	 * directly indicate a specific 802.1d priority.  This is used
+	 * to allow 802.1d priority to be passed directly in from VLAN
+	 * tags, etc.
+	 */
+	if (skb->priority >= 256 && skb->priority <= 263)
+		frag_header.priority = skb->priority - 256;
+
 	ether_addr_copy(frag_header.orig, primary_if->net_dev->dev_addr);
 	ether_addr_copy(frag_header.dest, orig_node->orig);
 
@@ -480,23 +489,33 @@ bool batadv_frag_send_packet(struct sk_buff *skb,
 	while (skb->len > max_fragment_size) {
 		skb_fragment = batadv_frag_create(skb, &frag_header, mtu);
 		if (!skb_fragment)
-			goto out_err;
+			goto out;
 
 		batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_TX);
 		batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES,
 				   skb_fragment->len + ETH_HLEN);
-		batadv_send_unicast_skb(skb_fragment, neigh_node);
+		ret = batadv_send_unicast_skb(skb_fragment, neigh_node);
+		if (ret != NET_XMIT_SUCCESS) {
+			/* return -1 so that the caller can free the original
+			 * skb
+			 */
+			ret = -1;
+			goto out;
+		}
+
 		frag_header.no++;
 
 		/* The initial check in this function should cover this case */
-		if (frag_header.no == BATADV_FRAG_MAX_FRAGMENTS - 1)
-			goto out_err;
+		if (frag_header.no == BATADV_FRAG_MAX_FRAGMENTS - 1) {
+			ret = -1;
+			goto out;
+		}
 	}
 
 	/* Make room for the fragment header. */
 	if (batadv_skb_head_push(skb, header_size) < 0 ||
 	    pskb_expand_head(skb, header_size + ETH_HLEN, 0, GFP_ATOMIC) < 0)
-		goto out_err;
+		goto out;
 
 	memcpy(skb->data, &frag_header, header_size);
 
@@ -504,11 +523,9 @@ bool batadv_frag_send_packet(struct sk_buff *skb,
 	batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_TX);
 	batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES,
 			   skb->len + ETH_HLEN);
-	batadv_send_unicast_skb(skb, neigh_node);
-
-	ret = true;
+	ret = batadv_send_unicast_skb(skb, neigh_node);
 
-out_err:
+out:
 	if (primary_if)
 		batadv_hardif_put(primary_if);
 
diff --git a/net/batman-adv/fragmentation.h b/net/batman-adv/fragmentation.h
index 9ff77c7ef7c7..3202fe329e63 100644
--- a/net/batman-adv/fragmentation.h
+++ b/net/batman-adv/fragmentation.h
@@ -34,9 +34,9 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb,
 			 struct batadv_orig_node *orig_node_src);
 bool batadv_frag_skb_buffer(struct sk_buff **skb,
 			    struct batadv_orig_node *orig_node);
-bool batadv_frag_send_packet(struct sk_buff *skb,
-			     struct batadv_orig_node *orig_node,
-			     struct batadv_neigh_node *neigh_node);
+int batadv_frag_send_packet(struct sk_buff *skb,
+			    struct batadv_orig_node *orig_node,
+			    struct batadv_neigh_node *neigh_node);
 
 /**
  * batadv_frag_check_entry - check if a list of fragments has timed out
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 5839c569f769..63a805d3f96e 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -42,6 +42,7 @@
 
 #include "gateway_common.h"
 #include "hard-interface.h"
+#include "log.h"
 #include "originator.h"
 #include "packet.h"
 #include "routing.h"
@@ -192,7 +193,7 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
 
 		tq_avg = router_ifinfo->bat_iv.tq_avg;
 
-		switch (atomic_read(&bat_priv->gw_sel_class)) {
+		switch (atomic_read(&bat_priv->gw.sel_class)) {
 		case 1: /* fast connection */
 			tmp_gw_factor = tq_avg * tq_avg;
 			tmp_gw_factor *= gw_node->bandwidth_down;
@@ -255,7 +256,7 @@ void batadv_gw_check_client_stop(struct batadv_priv *bat_priv)
 {
 	struct batadv_gw_node *curr_gw;
 
-	if (atomic_read(&bat_priv->gw_mode) != BATADV_GW_MODE_CLIENT)
+	if (atomic_read(&bat_priv->gw.mode) != BATADV_GW_MODE_CLIENT)
 		return;
 
 	curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
@@ -283,7 +284,7 @@ void batadv_gw_election(struct batadv_priv *bat_priv)
 	struct batadv_neigh_ifinfo *router_ifinfo = NULL;
 	char gw_addr[18] = { '\0' };
 
-	if (atomic_read(&bat_priv->gw_mode) != BATADV_GW_MODE_CLIENT)
+	if (atomic_read(&bat_priv->gw.mode) != BATADV_GW_MODE_CLIENT)
 		goto out;
 
 	curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
@@ -402,8 +403,8 @@ void batadv_gw_check_election(struct batadv_priv *bat_priv,
 	/* if the routing class is greater than 3 the value tells us how much
 	 * greater the TQ value of the new gateway must be
 	 */
-	if ((atomic_read(&bat_priv->gw_sel_class) > 3) &&
-	    (orig_tq_avg - gw_tq_avg < atomic_read(&bat_priv->gw_sel_class)))
+	if ((atomic_read(&bat_priv->gw.sel_class) > 3) &&
+	    (orig_tq_avg - gw_tq_avg < atomic_read(&bat_priv->gw.sel_class)))
 		goto out;
 
 	batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
@@ -638,8 +639,7 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset)
 		goto out;
 
 	seq_printf(seq,
-		   "      %-12s (%s/%i) %17s [%10s]: advertised uplink bandwidth ... [B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s)]\n",
-		   "Gateway", "#", BATADV_TQ_MAX_VALUE, "Nexthop", "outgoingIF",
+		   "      Gateway      (#/255)           Nexthop [outgoingIF]: advertised uplink bandwidth ... [B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s)]\n",
 		   BATADV_SOURCE_VERSION, primary_if->net_dev->name,
 		   primary_if->net_dev->dev_addr, net_dev->name);
 
@@ -821,7 +821,7 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv,
 	if (!gw_node)
 		goto out;
 
-	switch (atomic_read(&bat_priv->gw_mode)) {
+	switch (atomic_read(&bat_priv->gw.mode)) {
 	case BATADV_GW_MODE_SERVER:
 		/* If we are a GW then we are our best GW. We can artificially
 		 * set the tq towards ourself as the maximum value
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index 4423047889e1..d7bc6a87bcc9 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -19,8 +19,8 @@
 #include "main.h"
 
 #include <linux/atomic.h>
-#include <linux/errno.h>
 #include <linux/byteorder/generic.h>
+#include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/math64.h>
 #include <linux/netdevice.h>
@@ -28,7 +28,9 @@
 #include <linux/string.h>
 
 #include "gateway_client.h"
+#include "log.h"
 #include "packet.h"
+#include "tvlv.h"
 
 /**
  * batadv_parse_throughput - parse supplied string buffer to extract throughput
@@ -144,7 +146,7 @@ void batadv_gw_tvlv_container_update(struct batadv_priv *bat_priv)
 	u32 down, up;
 	char gw_mode;
 
-	gw_mode = atomic_read(&bat_priv->gw_mode);
+	gw_mode = atomic_read(&bat_priv->gw.mode);
 
 	switch (gw_mode) {
 	case BATADV_GW_MODE_OFF:
@@ -241,8 +243,8 @@ static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
 
 	/* restart gateway selection if fast or late switching was enabled */
 	if ((gateway.bandwidth_down != 0) &&
-	    (atomic_read(&bat_priv->gw_mode) == BATADV_GW_MODE_CLIENT) &&
-	    (atomic_read(&bat_priv->gw_sel_class) > 2))
+	    (atomic_read(&bat_priv->gw.mode) == BATADV_GW_MODE_CLIENT) &&
+	    (atomic_read(&bat_priv->gw.sel_class) > 2))
 		batadv_gw_check_election(bat_priv, orig);
 }
 
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 8c2f39962fa5..1f9080840566 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -23,9 +23,9 @@
 #include <linux/byteorder/generic.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
+#include <linux/if.h>
 #include <linux/if_arp.h>
 #include <linux/if_ether.h>
-#include <linux/if.h>
 #include <linux/kernel.h>
 #include <linux/kref.h>
 #include <linux/list.h>
@@ -37,10 +37,12 @@
 #include <linux/spinlock.h>
 #include <linux/workqueue.h>
 
+#include "bat_v.h"
 #include "bridge_loop_avoidance.h"
 #include "debugfs.h"
 #include "distributed-arp-table.h"
 #include "gateway_client.h"
+#include "log.h"
 #include "originator.h"
 #include "packet.h"
 #include "send.h"
@@ -245,7 +247,7 @@ static void batadv_primary_if_select(struct batadv_priv *bat_priv,
 	if (!new_hard_iface)
 		goto out;
 
-	bat_priv->bat_algo_ops->bat_primary_iface_set(new_hard_iface);
+	bat_priv->algo_ops->iface.primary_set(new_hard_iface);
 	batadv_primary_if_update_addr(bat_priv, curr_hard_iface);
 
 out:
@@ -392,7 +394,7 @@ batadv_hardif_activate_interface(struct batadv_hard_iface *hard_iface)
 
 	bat_priv = netdev_priv(hard_iface->soft_iface);
 
-	bat_priv->bat_algo_ops->bat_iface_update_mac(hard_iface);
+	bat_priv->algo_ops->iface.update_mac(hard_iface);
 	hard_iface->if_status = BATADV_IF_TO_BE_ACTIVATED;
 
 	/* the first active interface becomes our primary interface or
@@ -407,8 +409,8 @@ batadv_hardif_activate_interface(struct batadv_hard_iface *hard_iface)
 
 	batadv_update_min_mtu(hard_iface->soft_iface);
 
-	if (bat_priv->bat_algo_ops->bat_iface_activate)
-		bat_priv->bat_algo_ops->bat_iface_activate(hard_iface);
+	if (bat_priv->algo_ops->iface.activate)
+		bat_priv->algo_ops->iface.activate(hard_iface);
 
 out:
 	if (primary_if)
@@ -506,7 +508,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
 	if (ret)
 		goto err_dev;
 
-	ret = bat_priv->bat_algo_ops->bat_iface_enable(hard_iface);
+	ret = bat_priv->algo_ops->iface.enable(hard_iface);
 	if (ret < 0)
 		goto err_upper;
 
@@ -515,7 +517,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
 	hard_iface->if_status = BATADV_IF_INACTIVE;
 	ret = batadv_orig_hash_add_if(hard_iface, bat_priv->num_ifaces);
 	if (ret < 0) {
-		bat_priv->bat_algo_ops->bat_iface_disable(hard_iface);
+		bat_priv->algo_ops->iface.disable(hard_iface);
 		bat_priv->num_ifaces--;
 		hard_iface->if_status = BATADV_IF_NOT_IN_USE;
 		goto err_upper;
@@ -553,9 +555,6 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
 
 	batadv_hardif_recalc_extra_skbroom(soft_iface);
 
-	/* begin scheduling originator messages on that interface */
-	batadv_schedule_bat_ogm(hard_iface);
-
 out:
 	return 0;
 
@@ -599,7 +598,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
 			batadv_hardif_put(new_if);
 	}
 
-	bat_priv->bat_algo_ops->bat_iface_disable(hard_iface);
+	bat_priv->algo_ops->iface.disable(hard_iface);
 	hard_iface->if_status = BATADV_IF_NOT_IN_USE;
 
 	/* delete all references to this hard_iface */
@@ -686,6 +685,8 @@ batadv_hardif_add_interface(struct net_device *net_dev)
 	if (batadv_is_wifi_netdev(net_dev))
 		hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS;
 
+	batadv_v_hardif_init(hard_iface);
+
 	/* extra reference for return */
 	kref_init(&hard_iface->refcount);
 	kref_get(&hard_iface->refcount);
@@ -782,7 +783,7 @@ static int batadv_hard_if_event(struct notifier_block *this,
 		batadv_check_known_mac_addr(hard_iface->net_dev);
 
 		bat_priv = netdev_priv(hard_iface->soft_iface);
-		bat_priv->bat_algo_ops->bat_iface_update_mac(hard_iface);
+		bat_priv->algo_ops->iface.update_mac(hard_iface);
 
 		primary_if = batadv_primary_if_get_selected(bat_priv);
 		if (!primary_if)
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index 777aea10cd8f..378cc1119d66 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -45,6 +45,7 @@
 #include <linux/wait.h>
 
 #include "hard-interface.h"
+#include "log.h"
 #include "originator.h"
 #include "packet.h"
 #include "send.h"
diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c
new file mode 100644
index 000000000000..56dc532f7a2c
--- /dev/null
+++ b/net/batman-adv/log.c
@@ -0,0 +1,231 @@
+/* Copyright (C) 2010-2016  B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "log.h"
+#include "main.h"
+
+#include <linux/compiler.h>
+#include <linux/debugfs.h>
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/fcntl.h>
+#include <linux/fs.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/poll.h>
+#include <linux/sched.h> /* for linux/wait.h */
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/stat.h>
+#include <linux/stddef.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <linux/wait.h>
+#include <stdarg.h>
+
+#define BATADV_LOG_BUFF_MASK (batadv_log_buff_len - 1)
+
+static const int batadv_log_buff_len = BATADV_LOG_BUF_LEN;
+
+static char *batadv_log_char_addr(struct batadv_priv_debug_log *debug_log,
+				  size_t idx)
+{
+	return &debug_log->log_buff[idx & BATADV_LOG_BUFF_MASK];
+}
+
+static void batadv_emit_log_char(struct batadv_priv_debug_log *debug_log,
+				 char c)
+{
+	char *char_addr;
+
+	char_addr = batadv_log_char_addr(debug_log, debug_log->log_end);
+	*char_addr = c;
+	debug_log->log_end++;
+
+	if (debug_log->log_end - debug_log->log_start > batadv_log_buff_len)
+		debug_log->log_start = debug_log->log_end - batadv_log_buff_len;
+}
+
+__printf(2, 3)
+static int batadv_fdebug_log(struct batadv_priv_debug_log *debug_log,
+			     const char *fmt, ...)
+{
+	va_list args;
+	static char debug_log_buf[256];
+	char *p;
+
+	if (!debug_log)
+		return 0;
+
+	spin_lock_bh(&debug_log->lock);
+	va_start(args, fmt);
+	vscnprintf(debug_log_buf, sizeof(debug_log_buf), fmt, args);
+	va_end(args);
+
+	for (p = debug_log_buf; *p != 0; p++)
+		batadv_emit_log_char(debug_log, *p);
+
+	spin_unlock_bh(&debug_log->lock);
+
+	wake_up(&debug_log->queue_wait);
+
+	return 0;
+}
+
+int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...)
+{
+	va_list args;
+	char tmp_log_buf[256];
+
+	va_start(args, fmt);
+	vscnprintf(tmp_log_buf, sizeof(tmp_log_buf), fmt, args);
+	batadv_fdebug_log(bat_priv->debug_log, "[%10u] %s",
+			  jiffies_to_msecs(jiffies), tmp_log_buf);
+	va_end(args);
+
+	return 0;
+}
+
+static int batadv_log_open(struct inode *inode, struct file *file)
+{
+	if (!try_module_get(THIS_MODULE))
+		return -EBUSY;
+
+	nonseekable_open(inode, file);
+	file->private_data = inode->i_private;
+	return 0;
+}
+
+static int batadv_log_release(struct inode *inode, struct file *file)
+{
+	module_put(THIS_MODULE);
+	return 0;
+}
+
+static bool batadv_log_empty(struct batadv_priv_debug_log *debug_log)
+{
+	return !(debug_log->log_start - debug_log->log_end);
+}
+
+static ssize_t batadv_log_read(struct file *file, char __user *buf,
+			       size_t count, loff_t *ppos)
+{
+	struct batadv_priv *bat_priv = file->private_data;
+	struct batadv_priv_debug_log *debug_log = bat_priv->debug_log;
+	int error, i = 0;
+	char *char_addr;
+	char c;
+
+	if ((file->f_flags & O_NONBLOCK) && batadv_log_empty(debug_log))
+		return -EAGAIN;
+
+	if (!buf)
+		return -EINVAL;
+
+	if (count == 0)
+		return 0;
+
+	if (!access_ok(VERIFY_WRITE, buf, count))
+		return -EFAULT;
+
+	error = wait_event_interruptible(debug_log->queue_wait,
+					 (!batadv_log_empty(debug_log)));
+
+	if (error)
+		return error;
+
+	spin_lock_bh(&debug_log->lock);
+
+	while ((!error) && (i < count) &&
+	       (debug_log->log_start != debug_log->log_end)) {
+		char_addr = batadv_log_char_addr(debug_log,
+						 debug_log->log_start);
+		c = *char_addr;
+
+		debug_log->log_start++;
+
+		spin_unlock_bh(&debug_log->lock);
+
+		error = __put_user(c, buf);
+
+		spin_lock_bh(&debug_log->lock);
+
+		buf++;
+		i++;
+	}
+
+	spin_unlock_bh(&debug_log->lock);
+
+	if (!error)
+		return i;
+
+	return error;
+}
+
+static unsigned int batadv_log_poll(struct file *file, poll_table *wait)
+{
+	struct batadv_priv *bat_priv = file->private_data;
+	struct batadv_priv_debug_log *debug_log = bat_priv->debug_log;
+
+	poll_wait(file, &debug_log->queue_wait, wait);
+
+	if (!batadv_log_empty(debug_log))
+		return POLLIN | POLLRDNORM;
+
+	return 0;
+}
+
+static const struct file_operations batadv_log_fops = {
+	.open           = batadv_log_open,
+	.release        = batadv_log_release,
+	.read           = batadv_log_read,
+	.poll           = batadv_log_poll,
+	.llseek         = no_llseek,
+};
+
+int batadv_debug_log_setup(struct batadv_priv *bat_priv)
+{
+	struct dentry *d;
+
+	if (!bat_priv->debug_dir)
+		goto err;
+
+	bat_priv->debug_log = kzalloc(sizeof(*bat_priv->debug_log), GFP_ATOMIC);
+	if (!bat_priv->debug_log)
+		goto err;
+
+	spin_lock_init(&bat_priv->debug_log->lock);
+	init_waitqueue_head(&bat_priv->debug_log->queue_wait);
+
+	d = debugfs_create_file("log", S_IFREG | S_IRUSR,
+				bat_priv->debug_dir, bat_priv,
+				&batadv_log_fops);
+	if (!d)
+		goto err;
+
+	return 0;
+
+err:
+	return -ENOMEM;
+}
+
+void batadv_debug_log_cleanup(struct batadv_priv *bat_priv)
+{
+	kfree(bat_priv->debug_log);
+	bat_priv->debug_log = NULL;
+}
diff --git a/net/batman-adv/log.h b/net/batman-adv/log.h
new file mode 100644
index 000000000000..e0e1a88c3e58
--- /dev/null
+++ b/net/batman-adv/log.h
@@ -0,0 +1,111 @@
+/* Copyright (C) 2007-2016  B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Simon Wunderlich
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _NET_BATMAN_ADV_LOG_H_
+#define _NET_BATMAN_ADV_LOG_H_
+
+#include "main.h"
+
+#include <linux/bitops.h>
+#include <linux/compiler.h>
+#include <linux/printk.h>
+
+#ifdef CONFIG_BATMAN_ADV_DEBUG
+
+int batadv_debug_log_setup(struct batadv_priv *bat_priv);
+void batadv_debug_log_cleanup(struct batadv_priv *bat_priv);
+
+#else
+
+static inline int batadv_debug_log_setup(struct batadv_priv *bat_priv)
+{
+	return 0;
+}
+
+static inline void batadv_debug_log_cleanup(struct batadv_priv *bat_priv)
+{
+}
+
+#endif
+
+/**
+ * enum batadv_dbg_level - available log levels
+ * @BATADV_DBG_BATMAN: OGM and TQ computations related messages
+ * @BATADV_DBG_ROUTES: route added / changed / deleted
+ * @BATADV_DBG_TT: translation table messages
+ * @BATADV_DBG_BLA: bridge loop avoidance messages
+ * @BATADV_DBG_DAT: ARP snooping and DAT related messages
+ * @BATADV_DBG_NC: network coding related messages
+ * @BATADV_DBG_MCAST: multicast related messages
+ * @BATADV_DBG_TP_METER: throughput meter messages
+ * @BATADV_DBG_ALL: the union of all the above log levels
+ */
+enum batadv_dbg_level {
+	BATADV_DBG_BATMAN	= BIT(0),
+	BATADV_DBG_ROUTES	= BIT(1),
+	BATADV_DBG_TT		= BIT(2),
+	BATADV_DBG_BLA		= BIT(3),
+	BATADV_DBG_DAT		= BIT(4),
+	BATADV_DBG_NC		= BIT(5),
+	BATADV_DBG_MCAST	= BIT(6),
+	BATADV_DBG_TP_METER	= BIT(7),
+	BATADV_DBG_ALL		= 127,
+};
+
+#ifdef CONFIG_BATMAN_ADV_DEBUG
+int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...)
+__printf(2, 3);
+
+/* possibly ratelimited debug output */
+#define _batadv_dbg(type, bat_priv, ratelimited, fmt, arg...)	\
+	do {							\
+		if (atomic_read(&bat_priv->log_level) & type && \
+		    (!ratelimited || net_ratelimit()))		\
+			batadv_debug_log(bat_priv, fmt, ## arg);\
+	}							\
+	while (0)
+#else /* !CONFIG_BATMAN_ADV_DEBUG */
+__printf(4, 5)
+static inline void _batadv_dbg(int type __always_unused,
+			       struct batadv_priv *bat_priv __always_unused,
+			       int ratelimited __always_unused,
+			       const char *fmt __always_unused, ...)
+{
+}
+#endif
+
+#define batadv_dbg(type, bat_priv, arg...) \
+	_batadv_dbg(type, bat_priv, 0, ## arg)
+#define batadv_dbg_ratelimited(type, bat_priv, arg...) \
+	_batadv_dbg(type, bat_priv, 1, ## arg)
+
+#define batadv_info(net_dev, fmt, arg...)				\
+	do {								\
+		struct net_device *_netdev = (net_dev);                 \
+		struct batadv_priv *_batpriv = netdev_priv(_netdev);    \
+		batadv_dbg(BATADV_DBG_ALL, _batpriv, fmt, ## arg);	\
+		pr_info("%s: " fmt, _netdev->name, ## arg);		\
+	} while (0)
+#define batadv_err(net_dev, fmt, arg...)				\
+	do {								\
+		struct net_device *_netdev = (net_dev);                 \
+		struct batadv_priv *_batpriv = netdev_priv(_netdev);    \
+		batadv_dbg(BATADV_DBG_ALL, _batpriv, fmt, ## arg);	\
+		pr_err("%s: " fmt, _netdev->name, ## arg);		\
+	} while (0)
+
+#endif /* _NET_BATMAN_ADV_LOG_H_ */
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 5f2974bd1227..fe4c5e29f96b 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -31,16 +31,13 @@
 #include <linux/kernel.h>
 #include <linux/kref.h>
 #include <linux/list.h>
-#include <linux/lockdep.h>
 #include <linux/module.h>
-#include <linux/moduleparam.h>
 #include <linux/netdevice.h>
-#include <linux/pkt_sched.h>
+#include <linux/printk.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
 #include <linux/seq_file.h>
 #include <linux/skbuff.h>
-#include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/stddef.h>
 #include <linux/string.h>
@@ -49,6 +46,8 @@
 #include <net/rtnetlink.h>
 
 #include "bat_algo.h"
+#include "bat_iv_ogm.h"
+#include "bat_v.h"
 #include "bridge_loop_avoidance.h"
 #include "debugfs.h"
 #include "distributed-arp-table.h"
@@ -56,13 +55,16 @@
 #include "gateway_common.h"
 #include "hard-interface.h"
 #include "icmp_socket.h"
+#include "log.h"
 #include "multicast.h"
+#include "netlink.h"
 #include "network-coding.h"
 #include "originator.h"
 #include "packet.h"
 #include "routing.h"
 #include "send.h"
 #include "soft-interface.h"
+#include "tp_meter.h"
 #include "translation-table.h"
 
 /* List manipulations on hardif_list have to be rtnl_lock()'ed,
@@ -71,8 +73,6 @@
 struct list_head batadv_hardif_list;
 static int (*batadv_rx_handler[256])(struct sk_buff *,
 				     struct batadv_hard_iface *);
-char batadv_routing_algo[20] = "BATMAN_IV";
-static struct hlist_head batadv_algo_list;
 
 unsigned char batadv_broadcast_addr[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
 
@@ -83,13 +83,14 @@ static void batadv_recv_handler_init(void);
 static int __init batadv_init(void)
 {
 	INIT_LIST_HEAD(&batadv_hardif_list);
-	INIT_HLIST_HEAD(&batadv_algo_list);
+	batadv_algo_init();
 
 	batadv_recv_handler_init();
 
 	batadv_v_init();
 	batadv_iv_init();
 	batadv_nc_init();
+	batadv_tp_meter_init();
 
 	batadv_event_workqueue = create_singlethread_workqueue("bat_events");
 
@@ -101,6 +102,7 @@ static int __init batadv_init(void)
 
 	register_netdevice_notifier(&batadv_hard_if_notifier);
 	rtnl_link_register(&batadv_link_ops);
+	batadv_netlink_register();
 
 	pr_info("B.A.T.M.A.N. advanced %s (compatibility version %i) loaded\n",
 		BATADV_SOURCE_VERSION, BATADV_COMPAT_VERSION);
@@ -111,6 +113,7 @@ static int __init batadv_init(void)
 static void __exit batadv_exit(void)
 {
 	batadv_debugfs_destroy();
+	batadv_netlink_unregister();
 	rtnl_link_unregister(&batadv_link_ops);
 	unregister_netdevice_notifier(&batadv_hard_if_notifier);
 	batadv_hardif_remove_interfaces();
@@ -141,6 +144,7 @@ int batadv_mesh_init(struct net_device *soft_iface)
 	spin_lock_init(&bat_priv->tvlv.container_list_lock);
 	spin_lock_init(&bat_priv->tvlv.handler_list_lock);
 	spin_lock_init(&bat_priv->softif_vlan_list_lock);
+	spin_lock_init(&bat_priv->tp_list_lock);
 
 	INIT_HLIST_HEAD(&bat_priv->forw_bat_list);
 	INIT_HLIST_HEAD(&bat_priv->forw_bcast_list);
@@ -159,6 +163,7 @@ int batadv_mesh_init(struct net_device *soft_iface)
 	INIT_HLIST_HEAD(&bat_priv->tvlv.container_list);
 	INIT_HLIST_HEAD(&bat_priv->tvlv.handler_list);
 	INIT_HLIST_HEAD(&bat_priv->softif_vlan_list);
+	INIT_HLIST_HEAD(&bat_priv->tp_list);
 
 	ret = batadv_v_mesh_init(bat_priv);
 	if (ret < 0)
@@ -538,78 +543,6 @@ void batadv_recv_handler_unregister(u8 packet_type)
 	batadv_rx_handler[packet_type] = batadv_recv_unhandled_packet;
 }
 
-static struct batadv_algo_ops *batadv_algo_get(char *name)
-{
-	struct batadv_algo_ops *bat_algo_ops = NULL, *bat_algo_ops_tmp;
-
-	hlist_for_each_entry(bat_algo_ops_tmp, &batadv_algo_list, list) {
-		if (strcmp(bat_algo_ops_tmp->name, name) != 0)
-			continue;
-
-		bat_algo_ops = bat_algo_ops_tmp;
-		break;
-	}
-
-	return bat_algo_ops;
-}
-
-int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops)
-{
-	struct batadv_algo_ops *bat_algo_ops_tmp;
-
-	bat_algo_ops_tmp = batadv_algo_get(bat_algo_ops->name);
-	if (bat_algo_ops_tmp) {
-		pr_info("Trying to register already registered routing algorithm: %s\n",
-			bat_algo_ops->name);
-		return -EEXIST;
-	}
-
-	/* all algorithms must implement all ops (for now) */
-	if (!bat_algo_ops->bat_iface_enable ||
-	    !bat_algo_ops->bat_iface_disable ||
-	    !bat_algo_ops->bat_iface_update_mac ||
-	    !bat_algo_ops->bat_primary_iface_set ||
-	    !bat_algo_ops->bat_ogm_schedule ||
-	    !bat_algo_ops->bat_ogm_emit ||
-	    !bat_algo_ops->bat_neigh_cmp ||
-	    !bat_algo_ops->bat_neigh_is_similar_or_better) {
-		pr_info("Routing algo '%s' does not implement required ops\n",
-			bat_algo_ops->name);
-		return -EINVAL;
-	}
-
-	INIT_HLIST_NODE(&bat_algo_ops->list);
-	hlist_add_head(&bat_algo_ops->list, &batadv_algo_list);
-
-	return 0;
-}
-
-int batadv_algo_select(struct batadv_priv *bat_priv, char *name)
-{
-	struct batadv_algo_ops *bat_algo_ops;
-
-	bat_algo_ops = batadv_algo_get(name);
-	if (!bat_algo_ops)
-		return -EINVAL;
-
-	bat_priv->bat_algo_ops = bat_algo_ops;
-
-	return 0;
-}
-
-int batadv_algo_seq_print_text(struct seq_file *seq, void *offset)
-{
-	struct batadv_algo_ops *bat_algo_ops;
-
-	seq_puts(seq, "Available routing algorithms:\n");
-
-	hlist_for_each_entry(bat_algo_ops, &batadv_algo_list, list) {
-		seq_printf(seq, " * %s\n", bat_algo_ops->name);
-	}
-
-	return 0;
-}
-
 /**
  * batadv_skb_crc32 - calculate CRC32 of the whole packet and skip bytes in
  *  the header
@@ -644,594 +577,6 @@ __be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr)
 }
 
 /**
- * batadv_tvlv_handler_release - release tvlv handler from lists and queue for
- *  free after rcu grace period
- * @ref: kref pointer of the tvlv
- */
-static void batadv_tvlv_handler_release(struct kref *ref)
-{
-	struct batadv_tvlv_handler *tvlv_handler;
-
-	tvlv_handler = container_of(ref, struct batadv_tvlv_handler, refcount);
-	kfree_rcu(tvlv_handler, rcu);
-}
-
-/**
- * batadv_tvlv_handler_put - decrement the tvlv container refcounter and
- *  possibly release it
- * @tvlv_handler: the tvlv handler to free
- */
-static void batadv_tvlv_handler_put(struct batadv_tvlv_handler *tvlv_handler)
-{
-	kref_put(&tvlv_handler->refcount, batadv_tvlv_handler_release);
-}
-
-/**
- * batadv_tvlv_handler_get - retrieve tvlv handler from the tvlv handler list
- *  based on the provided type and version (both need to match)
- * @bat_priv: the bat priv with all the soft interface information
- * @type: tvlv handler type to look for
- * @version: tvlv handler version to look for
- *
- * Return: tvlv handler if found or NULL otherwise.
- */
-static struct batadv_tvlv_handler *
-batadv_tvlv_handler_get(struct batadv_priv *bat_priv, u8 type, u8 version)
-{
-	struct batadv_tvlv_handler *tvlv_handler_tmp, *tvlv_handler = NULL;
-
-	rcu_read_lock();
-	hlist_for_each_entry_rcu(tvlv_handler_tmp,
-				 &bat_priv->tvlv.handler_list, list) {
-		if (tvlv_handler_tmp->type != type)
-			continue;
-
-		if (tvlv_handler_tmp->version != version)
-			continue;
-
-		if (!kref_get_unless_zero(&tvlv_handler_tmp->refcount))
-			continue;
-
-		tvlv_handler = tvlv_handler_tmp;
-		break;
-	}
-	rcu_read_unlock();
-
-	return tvlv_handler;
-}
-
-/**
- * batadv_tvlv_container_release - release tvlv from lists and free
- * @ref: kref pointer of the tvlv
- */
-static void batadv_tvlv_container_release(struct kref *ref)
-{
-	struct batadv_tvlv_container *tvlv;
-
-	tvlv = container_of(ref, struct batadv_tvlv_container, refcount);
-	kfree(tvlv);
-}
-
-/**
- * batadv_tvlv_container_put - decrement the tvlv container refcounter and
- *  possibly release it
- * @tvlv: the tvlv container to free
- */
-static void batadv_tvlv_container_put(struct batadv_tvlv_container *tvlv)
-{
-	kref_put(&tvlv->refcount, batadv_tvlv_container_release);
-}
-
-/**
- * batadv_tvlv_container_get - retrieve tvlv container from the tvlv container
- *  list based on the provided type and version (both need to match)
- * @bat_priv: the bat priv with all the soft interface information
- * @type: tvlv container type to look for
- * @version: tvlv container version to look for
- *
- * Has to be called with the appropriate locks being acquired
- * (tvlv.container_list_lock).
- *
- * Return: tvlv container if found or NULL otherwise.
- */
-static struct batadv_tvlv_container *
-batadv_tvlv_container_get(struct batadv_priv *bat_priv, u8 type, u8 version)
-{
-	struct batadv_tvlv_container *tvlv_tmp, *tvlv = NULL;
-
-	lockdep_assert_held(&bat_priv->tvlv.container_list_lock);
-
-	hlist_for_each_entry(tvlv_tmp, &bat_priv->tvlv.container_list, list) {
-		if (tvlv_tmp->tvlv_hdr.type != type)
-			continue;
-
-		if (tvlv_tmp->tvlv_hdr.version != version)
-			continue;
-
-		kref_get(&tvlv_tmp->refcount);
-		tvlv = tvlv_tmp;
-		break;
-	}
-
-	return tvlv;
-}
-
-/**
- * batadv_tvlv_container_list_size - calculate the size of the tvlv container
- *  list entries
- * @bat_priv: the bat priv with all the soft interface information
- *
- * Has to be called with the appropriate locks being acquired
- * (tvlv.container_list_lock).
- *
- * Return: size of all currently registered tvlv containers in bytes.
- */
-static u16 batadv_tvlv_container_list_size(struct batadv_priv *bat_priv)
-{
-	struct batadv_tvlv_container *tvlv;
-	u16 tvlv_len = 0;
-
-	lockdep_assert_held(&bat_priv->tvlv.container_list_lock);
-
-	hlist_for_each_entry(tvlv, &bat_priv->tvlv.container_list, list) {
-		tvlv_len += sizeof(struct batadv_tvlv_hdr);
-		tvlv_len += ntohs(tvlv->tvlv_hdr.len);
-	}
-
-	return tvlv_len;
-}
-
-/**
- * batadv_tvlv_container_remove - remove tvlv container from the tvlv container
- *  list
- * @bat_priv: the bat priv with all the soft interface information
- * @tvlv: the to be removed tvlv container
- *
- * Has to be called with the appropriate locks being acquired
- * (tvlv.container_list_lock).
- */
-static void batadv_tvlv_container_remove(struct batadv_priv *bat_priv,
-					 struct batadv_tvlv_container *tvlv)
-{
-	lockdep_assert_held(&bat_priv->tvlv.container_list_lock);
-
-	if (!tvlv)
-		return;
-
-	hlist_del(&tvlv->list);
-
-	/* first call to decrement the counter, second call to free */
-	batadv_tvlv_container_put(tvlv);
-	batadv_tvlv_container_put(tvlv);
-}
-
-/**
- * batadv_tvlv_container_unregister - unregister tvlv container based on the
- *  provided type and version (both need to match)
- * @bat_priv: the bat priv with all the soft interface information
- * @type: tvlv container type to unregister
- * @version: tvlv container type to unregister
- */
-void batadv_tvlv_container_unregister(struct batadv_priv *bat_priv,
-				      u8 type, u8 version)
-{
-	struct batadv_tvlv_container *tvlv;
-
-	spin_lock_bh(&bat_priv->tvlv.container_list_lock);
-	tvlv = batadv_tvlv_container_get(bat_priv, type, version);
-	batadv_tvlv_container_remove(bat_priv, tvlv);
-	spin_unlock_bh(&bat_priv->tvlv.container_list_lock);
-}
-
-/**
- * batadv_tvlv_container_register - register tvlv type, version and content
- *  to be propagated with each (primary interface) OGM
- * @bat_priv: the bat priv with all the soft interface information
- * @type: tvlv container type
- * @version: tvlv container version
- * @tvlv_value: tvlv container content
- * @tvlv_value_len: tvlv container content length
- *
- * If a container of the same type and version was already registered the new
- * content is going to replace the old one.
- */
-void batadv_tvlv_container_register(struct batadv_priv *bat_priv,
-				    u8 type, u8 version,
-				    void *tvlv_value, u16 tvlv_value_len)
-{
-	struct batadv_tvlv_container *tvlv_old, *tvlv_new;
-
-	if (!tvlv_value)
-		tvlv_value_len = 0;
-
-	tvlv_new = kzalloc(sizeof(*tvlv_new) + tvlv_value_len, GFP_ATOMIC);
-	if (!tvlv_new)
-		return;
-
-	tvlv_new->tvlv_hdr.version = version;
-	tvlv_new->tvlv_hdr.type = type;
-	tvlv_new->tvlv_hdr.len = htons(tvlv_value_len);
-
-	memcpy(tvlv_new + 1, tvlv_value, ntohs(tvlv_new->tvlv_hdr.len));
-	INIT_HLIST_NODE(&tvlv_new->list);
-	kref_init(&tvlv_new->refcount);
-
-	spin_lock_bh(&bat_priv->tvlv.container_list_lock);
-	tvlv_old = batadv_tvlv_container_get(bat_priv, type, version);
-	batadv_tvlv_container_remove(bat_priv, tvlv_old);
-	hlist_add_head(&tvlv_new->list, &bat_priv->tvlv.container_list);
-	spin_unlock_bh(&bat_priv->tvlv.container_list_lock);
-}
-
-/**
- * batadv_tvlv_realloc_packet_buff - reallocate packet buffer to accommodate
- *  requested packet size
- * @packet_buff: packet buffer
- * @packet_buff_len: packet buffer size
- * @min_packet_len: requested packet minimum size
- * @additional_packet_len: requested additional packet size on top of minimum
- *  size
- *
- * Return: true of the packet buffer could be changed to the requested size,
- * false otherwise.
- */
-static bool batadv_tvlv_realloc_packet_buff(unsigned char **packet_buff,
-					    int *packet_buff_len,
-					    int min_packet_len,
-					    int additional_packet_len)
-{
-	unsigned char *new_buff;
-
-	new_buff = kmalloc(min_packet_len + additional_packet_len, GFP_ATOMIC);
-
-	/* keep old buffer if kmalloc should fail */
-	if (!new_buff)
-		return false;
-
-	memcpy(new_buff, *packet_buff, min_packet_len);
-	kfree(*packet_buff);
-	*packet_buff = new_buff;
-	*packet_buff_len = min_packet_len + additional_packet_len;
-
-	return true;
-}
-
-/**
- * batadv_tvlv_container_ogm_append - append tvlv container content to given
- *  OGM packet buffer
- * @bat_priv: the bat priv with all the soft interface information
- * @packet_buff: ogm packet buffer
- * @packet_buff_len: ogm packet buffer size including ogm header and tvlv
- *  content
- * @packet_min_len: ogm header size to be preserved for the OGM itself
- *
- * The ogm packet might be enlarged or shrunk depending on the current size
- * and the size of the to-be-appended tvlv containers.
- *
- * Return: size of all appended tvlv containers in bytes.
- */
-u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv,
-				     unsigned char **packet_buff,
-				     int *packet_buff_len, int packet_min_len)
-{
-	struct batadv_tvlv_container *tvlv;
-	struct batadv_tvlv_hdr *tvlv_hdr;
-	u16 tvlv_value_len;
-	void *tvlv_value;
-	bool ret;
-
-	spin_lock_bh(&bat_priv->tvlv.container_list_lock);
-	tvlv_value_len = batadv_tvlv_container_list_size(bat_priv);
-
-	ret = batadv_tvlv_realloc_packet_buff(packet_buff, packet_buff_len,
-					      packet_min_len, tvlv_value_len);
-
-	if (!ret)
-		goto end;
-
-	if (!tvlv_value_len)
-		goto end;
-
-	tvlv_value = (*packet_buff) + packet_min_len;
-
-	hlist_for_each_entry(tvlv, &bat_priv->tvlv.container_list, list) {
-		tvlv_hdr = tvlv_value;
-		tvlv_hdr->type = tvlv->tvlv_hdr.type;
-		tvlv_hdr->version = tvlv->tvlv_hdr.version;
-		tvlv_hdr->len = tvlv->tvlv_hdr.len;
-		tvlv_value = tvlv_hdr + 1;
-		memcpy(tvlv_value, tvlv + 1, ntohs(tvlv->tvlv_hdr.len));
-		tvlv_value = (u8 *)tvlv_value + ntohs(tvlv->tvlv_hdr.len);
-	}
-
-end:
-	spin_unlock_bh(&bat_priv->tvlv.container_list_lock);
-	return tvlv_value_len;
-}
-
-/**
- * batadv_tvlv_call_handler - parse the given tvlv buffer to call the
- *  appropriate handlers
- * @bat_priv: the bat priv with all the soft interface information
- * @tvlv_handler: tvlv callback function handling the tvlv content
- * @ogm_source: flag indicating whether the tvlv is an ogm or a unicast packet
- * @orig_node: orig node emitting the ogm packet
- * @src: source mac address of the unicast packet
- * @dst: destination mac address of the unicast packet
- * @tvlv_value: tvlv content
- * @tvlv_value_len: tvlv content length
- *
- * Return: success if handler was not found or the return value of the handler
- * callback.
- */
-static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv,
-				    struct batadv_tvlv_handler *tvlv_handler,
-				    bool ogm_source,
-				    struct batadv_orig_node *orig_node,
-				    u8 *src, u8 *dst,
-				    void *tvlv_value, u16 tvlv_value_len)
-{
-	if (!tvlv_handler)
-		return NET_RX_SUCCESS;
-
-	if (ogm_source) {
-		if (!tvlv_handler->ogm_handler)
-			return NET_RX_SUCCESS;
-
-		if (!orig_node)
-			return NET_RX_SUCCESS;
-
-		tvlv_handler->ogm_handler(bat_priv, orig_node,
-					  BATADV_NO_FLAGS,
-					  tvlv_value, tvlv_value_len);
-		tvlv_handler->flags |= BATADV_TVLV_HANDLER_OGM_CALLED;
-	} else {
-		if (!src)
-			return NET_RX_SUCCESS;
-
-		if (!dst)
-			return NET_RX_SUCCESS;
-
-		if (!tvlv_handler->unicast_handler)
-			return NET_RX_SUCCESS;
-
-		return tvlv_handler->unicast_handler(bat_priv, src,
-						     dst, tvlv_value,
-						     tvlv_value_len);
-	}
-
-	return NET_RX_SUCCESS;
-}
-
-/**
- * batadv_tvlv_containers_process - parse the given tvlv buffer to call the
- *  appropriate handlers
- * @bat_priv: the bat priv with all the soft interface information
- * @ogm_source: flag indicating whether the tvlv is an ogm or a unicast packet
- * @orig_node: orig node emitting the ogm packet
- * @src: source mac address of the unicast packet
- * @dst: destination mac address of the unicast packet
- * @tvlv_value: tvlv content
- * @tvlv_value_len: tvlv content length
- *
- * Return: success when processing an OGM or the return value of all called
- * handler callbacks.
- */
-int batadv_tvlv_containers_process(struct batadv_priv *bat_priv,
-				   bool ogm_source,
-				   struct batadv_orig_node *orig_node,
-				   u8 *src, u8 *dst,
-				   void *tvlv_value, u16 tvlv_value_len)
-{
-	struct batadv_tvlv_handler *tvlv_handler;
-	struct batadv_tvlv_hdr *tvlv_hdr;
-	u16 tvlv_value_cont_len;
-	u8 cifnotfound = BATADV_TVLV_HANDLER_OGM_CIFNOTFND;
-	int ret = NET_RX_SUCCESS;
-
-	while (tvlv_value_len >= sizeof(*tvlv_hdr)) {
-		tvlv_hdr = tvlv_value;
-		tvlv_value_cont_len = ntohs(tvlv_hdr->len);
-		tvlv_value = tvlv_hdr + 1;
-		tvlv_value_len -= sizeof(*tvlv_hdr);
-
-		if (tvlv_value_cont_len > tvlv_value_len)
-			break;
-
-		tvlv_handler = batadv_tvlv_handler_get(bat_priv,
-						       tvlv_hdr->type,
-						       tvlv_hdr->version);
-
-		ret |= batadv_tvlv_call_handler(bat_priv, tvlv_handler,
-						ogm_source, orig_node,
-						src, dst, tvlv_value,
-						tvlv_value_cont_len);
-		if (tvlv_handler)
-			batadv_tvlv_handler_put(tvlv_handler);
-		tvlv_value = (u8 *)tvlv_value + tvlv_value_cont_len;
-		tvlv_value_len -= tvlv_value_cont_len;
-	}
-
-	if (!ogm_source)
-		return ret;
-
-	rcu_read_lock();
-	hlist_for_each_entry_rcu(tvlv_handler,
-				 &bat_priv->tvlv.handler_list, list) {
-		if ((tvlv_handler->flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND) &&
-		    !(tvlv_handler->flags & BATADV_TVLV_HANDLER_OGM_CALLED))
-			tvlv_handler->ogm_handler(bat_priv, orig_node,
-						  cifnotfound, NULL, 0);
-
-		tvlv_handler->flags &= ~BATADV_TVLV_HANDLER_OGM_CALLED;
-	}
-	rcu_read_unlock();
-
-	return NET_RX_SUCCESS;
-}
-
-/**
- * batadv_tvlv_ogm_receive - process an incoming ogm and call the appropriate
- *  handlers
- * @bat_priv: the bat priv with all the soft interface information
- * @batadv_ogm_packet: ogm packet containing the tvlv containers
- * @orig_node: orig node emitting the ogm packet
- */
-void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv,
-			     struct batadv_ogm_packet *batadv_ogm_packet,
-			     struct batadv_orig_node *orig_node)
-{
-	void *tvlv_value;
-	u16 tvlv_value_len;
-
-	if (!batadv_ogm_packet)
-		return;
-
-	tvlv_value_len = ntohs(batadv_ogm_packet->tvlv_len);
-	if (!tvlv_value_len)
-		return;
-
-	tvlv_value = batadv_ogm_packet + 1;
-
-	batadv_tvlv_containers_process(bat_priv, true, orig_node, NULL, NULL,
-				       tvlv_value, tvlv_value_len);
-}
-
-/**
- * batadv_tvlv_handler_register - register tvlv handler based on the provided
- *  type and version (both need to match) for ogm tvlv payload and/or unicast
- *  payload
- * @bat_priv: the bat priv with all the soft interface information
- * @optr: ogm tvlv handler callback function. This function receives the orig
- *  node, flags and the tvlv content as argument to process.
- * @uptr: unicast tvlv handler callback function. This function receives the
- *  source & destination of the unicast packet as well as the tvlv content
- *  to process.
- * @type: tvlv handler type to be registered
- * @version: tvlv handler version to be registered
- * @flags: flags to enable or disable TVLV API behavior
- */
-void batadv_tvlv_handler_register(struct batadv_priv *bat_priv,
-				  void (*optr)(struct batadv_priv *bat_priv,
-					       struct batadv_orig_node *orig,
-					       u8 flags,
-					       void *tvlv_value,
-					       u16 tvlv_value_len),
-				  int (*uptr)(struct batadv_priv *bat_priv,
-					      u8 *src, u8 *dst,
-					      void *tvlv_value,
-					      u16 tvlv_value_len),
-				  u8 type, u8 version, u8 flags)
-{
-	struct batadv_tvlv_handler *tvlv_handler;
-
-	tvlv_handler = batadv_tvlv_handler_get(bat_priv, type, version);
-	if (tvlv_handler) {
-		batadv_tvlv_handler_put(tvlv_handler);
-		return;
-	}
-
-	tvlv_handler = kzalloc(sizeof(*tvlv_handler), GFP_ATOMIC);
-	if (!tvlv_handler)
-		return;
-
-	tvlv_handler->ogm_handler = optr;
-	tvlv_handler->unicast_handler = uptr;
-	tvlv_handler->type = type;
-	tvlv_handler->version = version;
-	tvlv_handler->flags = flags;
-	kref_init(&tvlv_handler->refcount);
-	INIT_HLIST_NODE(&tvlv_handler->list);
-
-	spin_lock_bh(&bat_priv->tvlv.handler_list_lock);
-	hlist_add_head_rcu(&tvlv_handler->list, &bat_priv->tvlv.handler_list);
-	spin_unlock_bh(&bat_priv->tvlv.handler_list_lock);
-}
-
-/**
- * batadv_tvlv_handler_unregister - unregister tvlv handler based on the
- *  provided type and version (both need to match)
- * @bat_priv: the bat priv with all the soft interface information
- * @type: tvlv handler type to be unregistered
- * @version: tvlv handler version to be unregistered
- */
-void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv,
-				    u8 type, u8 version)
-{
-	struct batadv_tvlv_handler *tvlv_handler;
-
-	tvlv_handler = batadv_tvlv_handler_get(bat_priv, type, version);
-	if (!tvlv_handler)
-		return;
-
-	batadv_tvlv_handler_put(tvlv_handler);
-	spin_lock_bh(&bat_priv->tvlv.handler_list_lock);
-	hlist_del_rcu(&tvlv_handler->list);
-	spin_unlock_bh(&bat_priv->tvlv.handler_list_lock);
-	batadv_tvlv_handler_put(tvlv_handler);
-}
-
-/**
- * batadv_tvlv_unicast_send - send a unicast packet with tvlv payload to the
- *  specified host
- * @bat_priv: the bat priv with all the soft interface information
- * @src: source mac address of the unicast packet
- * @dst: destination mac address of the unicast packet
- * @type: tvlv type
- * @version: tvlv version
- * @tvlv_value: tvlv content
- * @tvlv_value_len: tvlv content length
- */
-void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src,
-			      u8 *dst, u8 type, u8 version,
-			      void *tvlv_value, u16 tvlv_value_len)
-{
-	struct batadv_unicast_tvlv_packet *unicast_tvlv_packet;
-	struct batadv_tvlv_hdr *tvlv_hdr;
-	struct batadv_orig_node *orig_node;
-	struct sk_buff *skb;
-	unsigned char *tvlv_buff;
-	unsigned int tvlv_len;
-	ssize_t hdr_len = sizeof(*unicast_tvlv_packet);
-
-	orig_node = batadv_orig_hash_find(bat_priv, dst);
-	if (!orig_node)
-		return;
-
-	tvlv_len = sizeof(*tvlv_hdr) + tvlv_value_len;
-
-	skb = netdev_alloc_skb_ip_align(NULL, ETH_HLEN + hdr_len + tvlv_len);
-	if (!skb)
-		goto out;
-
-	skb->priority = TC_PRIO_CONTROL;
-	skb_reserve(skb, ETH_HLEN);
-	tvlv_buff = skb_put(skb, sizeof(*unicast_tvlv_packet) + tvlv_len);
-	unicast_tvlv_packet = (struct batadv_unicast_tvlv_packet *)tvlv_buff;
-	unicast_tvlv_packet->packet_type = BATADV_UNICAST_TVLV;
-	unicast_tvlv_packet->version = BATADV_COMPAT_VERSION;
-	unicast_tvlv_packet->ttl = BATADV_TTL;
-	unicast_tvlv_packet->reserved = 0;
-	unicast_tvlv_packet->tvlv_len = htons(tvlv_len);
-	unicast_tvlv_packet->align = 0;
-	ether_addr_copy(unicast_tvlv_packet->src, src);
-	ether_addr_copy(unicast_tvlv_packet->dst, dst);
-
-	tvlv_buff = (unsigned char *)(unicast_tvlv_packet + 1);
-	tvlv_hdr = (struct batadv_tvlv_hdr *)tvlv_buff;
-	tvlv_hdr->version = version;
-	tvlv_hdr->type = type;
-	tvlv_hdr->len = htons(tvlv_value_len);
-	tvlv_buff += sizeof(*tvlv_hdr);
-	memcpy(tvlv_buff, tvlv_value, tvlv_value_len);
-
-	if (batadv_send_skb_to_orig(skb, orig_node, NULL) == NET_XMIT_DROP)
-		kfree_skb(skb);
-out:
-	batadv_orig_node_put(orig_node);
-}
-
-/**
  * batadv_get_vid - extract the VLAN identifier from skb if any
  * @skb: the buffer containing the packet
  * @header_len: length of the batman header preceding the ethernet header
@@ -1284,36 +629,6 @@ bool batadv_vlan_ap_isola_get(struct batadv_priv *bat_priv, unsigned short vid)
 	return ap_isolation_enabled;
 }
 
-static int batadv_param_set_ra(const char *val, const struct kernel_param *kp)
-{
-	struct batadv_algo_ops *bat_algo_ops;
-	char *algo_name = (char *)val;
-	size_t name_len = strlen(algo_name);
-
-	if (name_len > 0 && algo_name[name_len - 1] == '\n')
-		algo_name[name_len - 1] = '\0';
-
-	bat_algo_ops = batadv_algo_get(algo_name);
-	if (!bat_algo_ops) {
-		pr_err("Routing algorithm '%s' is not supported\n", algo_name);
-		return -EINVAL;
-	}
-
-	return param_set_copystring(algo_name, kp);
-}
-
-static const struct kernel_param_ops batadv_param_ops_ra = {
-	.set = batadv_param_set_ra,
-	.get = param_get_string,
-};
-
-static struct kparam_string batadv_param_string_ra = {
-	.maxlen = sizeof(batadv_routing_algo),
-	.string = batadv_routing_algo,
-};
-
-module_param_cb(routing_algo, &batadv_param_ops_ra, &batadv_param_string_ra,
-		0644);
 module_init(batadv_init);
 module_exit(batadv_exit);
 
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 76925266deed..06a860845434 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -24,7 +24,7 @@
 #define BATADV_DRIVER_DEVICE "batman-adv"
 
 #ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2016.2"
+#define BATADV_SOURCE_VERSION "2016.3"
 #endif
 
 /* B.A.T.M.A.N. parameters */
@@ -100,6 +100,9 @@
 #define BATADV_NUM_BCASTS_WIRELESS 3
 #define BATADV_NUM_BCASTS_MAX 3
 
+/* length of the single packet used by the TP meter */
+#define BATADV_TP_PACKET_LEN ETH_DATA_LEN
+
 /* msecs after which an ARP_REQUEST is sent in broadcast as fallback */
 #define ARP_REQ_DELAY 250
 /* numbers of originator to contact for any PUT/GET DHT operation */
@@ -131,6 +134,11 @@
 
 #define BATADV_NC_NODE_TIMEOUT 10000 /* Milliseconds */
 
+/**
+ * BATADV_TP_MAX_NUM - maximum number of simultaneously active tp sessions
+ */
+#define BATADV_TP_MAX_NUM 5
+
 enum batadv_mesh_state {
 	BATADV_MESH_INACTIVE,
 	BATADV_MESH_ACTIVE,
@@ -175,29 +183,26 @@ enum batadv_uev_type {
 
 /* Kernel headers */
 
-#include <linux/atomic.h>
 #include <linux/bitops.h> /* for packet.h */
 #include <linux/compiler.h>
 #include <linux/cpumask.h>
 #include <linux/etherdevice.h>
 #include <linux/if_ether.h> /* for packet.h */
-#include <linux/netdevice.h>
-#include <linux/printk.h>
-#include <linux/types.h>
-#include <linux/percpu.h>
-#include <linux/jiffies.h>
 #include <linux/if_vlan.h>
+#include <linux/jiffies.h>
+#include <linux/percpu.h>
+#include <linux/types.h>
 
 #include "types.h"
 
-struct batadv_ogm_packet;
+struct net_device;
+struct packet_type;
 struct seq_file;
 struct sk_buff;
 
 #define BATADV_PRINT_VID(vid) ((vid & BATADV_VLAN_HAS_TAG) ? \
 			       (int)(vid & VLAN_VID_MASK) : -1)
 
-extern char batadv_routing_algo[];
 extern struct list_head batadv_hardif_list;
 
 extern unsigned char batadv_broadcast_addr[];
@@ -218,74 +223,9 @@ batadv_recv_handler_register(u8 packet_type,
 			     int (*recv_handler)(struct sk_buff *,
 						 struct batadv_hard_iface *));
 void batadv_recv_handler_unregister(u8 packet_type);
-int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops);
-int batadv_algo_select(struct batadv_priv *bat_priv, char *name);
-int batadv_algo_seq_print_text(struct seq_file *seq, void *offset);
 __be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr);
 
 /**
- * enum batadv_dbg_level - available log levels
- * @BATADV_DBG_BATMAN: OGM and TQ computations related messages
- * @BATADV_DBG_ROUTES: route added / changed / deleted
- * @BATADV_DBG_TT: translation table messages
- * @BATADV_DBG_BLA: bridge loop avoidance messages
- * @BATADV_DBG_DAT: ARP snooping and DAT related messages
- * @BATADV_DBG_NC: network coding related messages
- * @BATADV_DBG_ALL: the union of all the above log levels
- */
-enum batadv_dbg_level {
-	BATADV_DBG_BATMAN = BIT(0),
-	BATADV_DBG_ROUTES = BIT(1),
-	BATADV_DBG_TT	  = BIT(2),
-	BATADV_DBG_BLA    = BIT(3),
-	BATADV_DBG_DAT    = BIT(4),
-	BATADV_DBG_NC	  = BIT(5),
-	BATADV_DBG_ALL    = 63,
-};
-
-#ifdef CONFIG_BATMAN_ADV_DEBUG
-int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...)
-__printf(2, 3);
-
-/* possibly ratelimited debug output */
-#define _batadv_dbg(type, bat_priv, ratelimited, fmt, arg...)	\
-	do {							\
-		if (atomic_read(&bat_priv->log_level) & type && \
-		    (!ratelimited || net_ratelimit()))		\
-			batadv_debug_log(bat_priv, fmt, ## arg);\
-	}							\
-	while (0)
-#else /* !CONFIG_BATMAN_ADV_DEBUG */
-__printf(4, 5)
-static inline void _batadv_dbg(int type __always_unused,
-			       struct batadv_priv *bat_priv __always_unused,
-			       int ratelimited __always_unused,
-			       const char *fmt __always_unused, ...)
-{
-}
-#endif
-
-#define batadv_dbg(type, bat_priv, arg...) \
-	_batadv_dbg(type, bat_priv, 0, ## arg)
-#define batadv_dbg_ratelimited(type, bat_priv, arg...) \
-	_batadv_dbg(type, bat_priv, 1, ## arg)
-
-#define batadv_info(net_dev, fmt, arg...)				\
-	do {								\
-		struct net_device *_netdev = (net_dev);                 \
-		struct batadv_priv *_batpriv = netdev_priv(_netdev);    \
-		batadv_dbg(BATADV_DBG_ALL, _batpriv, fmt, ## arg);	\
-		pr_info("%s: " fmt, _netdev->name, ## arg);		\
-	} while (0)
-#define batadv_err(net_dev, fmt, arg...)				\
-	do {								\
-		struct net_device *_netdev = (net_dev);                 \
-		struct batadv_priv *_batpriv = netdev_priv(_netdev);    \
-		batadv_dbg(BATADV_DBG_ALL, _batpriv, fmt, ## arg);	\
-		pr_err("%s: " fmt, _netdev->name, ## arg);		\
-	} while (0)
-
-/**
  * batadv_compare_eth - Compare two not u16 aligned Ethernet addresses
  * @data1: Pointer to a six-byte array containing the Ethernet address
  * @data2: Pointer other six-byte array containing the Ethernet address
@@ -370,39 +310,6 @@ static inline u64 batadv_sum_counter(struct batadv_priv *bat_priv,  size_t idx)
  */
 #define BATADV_SKB_CB(__skb)       ((struct batadv_skb_cb *)&((__skb)->cb[0]))
 
-void batadv_tvlv_container_register(struct batadv_priv *bat_priv,
-				    u8 type, u8 version,
-				    void *tvlv_value, u16 tvlv_value_len);
-u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv,
-				     unsigned char **packet_buff,
-				     int *packet_buff_len, int packet_min_len);
-void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv,
-			     struct batadv_ogm_packet *batadv_ogm_packet,
-			     struct batadv_orig_node *orig_node);
-void batadv_tvlv_container_unregister(struct batadv_priv *bat_priv,
-				      u8 type, u8 version);
-
-void batadv_tvlv_handler_register(struct batadv_priv *bat_priv,
-				  void (*optr)(struct batadv_priv *bat_priv,
-					       struct batadv_orig_node *orig,
-					       u8 flags,
-					       void *tvlv_value,
-					       u16 tvlv_value_len),
-				  int (*uptr)(struct batadv_priv *bat_priv,
-					      u8 *src, u8 *dst,
-					      void *tvlv_value,
-					      u16 tvlv_value_len),
-				  u8 type, u8 version, u8 flags);
-void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv,
-				    u8 type, u8 version);
-int batadv_tvlv_containers_process(struct batadv_priv *bat_priv,
-				   bool ogm_source,
-				   struct batadv_orig_node *orig_node,
-				   u8 *src, u8 *dst,
-				   void *tvlv_buff, u16 tvlv_buff_len);
-void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src,
-			      u8 *dst, u8 type, u8 version,
-			      void *tvlv_value, u16 tvlv_value_len);
 unsigned short batadv_get_vid(struct sk_buff *skb, size_t header_len);
 bool batadv_vlan_ap_isola_get(struct batadv_priv *bat_priv, unsigned short vid);
 
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index c32f24fafe67..cc915073a753 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -25,17 +25,23 @@
 #include <linux/errno.h>
 #include <linux/etherdevice.h>
 #include <linux/fs.h>
+#include <linux/icmpv6.h>
+#include <linux/if_bridge.h>
 #include <linux/if_ether.h>
-#include <linux/in6.h>
+#include <linux/igmp.h>
 #include <linux/in.h>
+#include <linux/in6.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
+#include <linux/kernel.h>
 #include <linux/kref.h>
 #include <linux/list.h>
 #include <linux/lockdep.h>
 #include <linux/netdevice.h>
+#include <linux/printk.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
+#include <linux/seq_file.h>
 #include <linux/skbuff.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
@@ -43,18 +49,57 @@
 #include <linux/string.h>
 #include <linux/types.h>
 #include <net/addrconf.h>
+#include <net/if_inet6.h>
+#include <net/ip.h>
 #include <net/ipv6.h>
 
+#include "hard-interface.h"
+#include "hash.h"
+#include "log.h"
 #include "packet.h"
 #include "translation-table.h"
+#include "tvlv.h"
+
+/**
+ * batadv_mcast_get_bridge - get the bridge on top of the softif if it exists
+ * @soft_iface: netdev struct of the mesh interface
+ *
+ * If the given soft interface has a bridge on top then the refcount
+ * of the according net device is increased.
+ *
+ * Return: NULL if no such bridge exists. Otherwise the net device of the
+ * bridge.
+ */
+static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface)
+{
+	struct net_device *upper = soft_iface;
+
+	rcu_read_lock();
+	do {
+		upper = netdev_master_upper_dev_get_rcu(upper);
+	} while (upper && !(upper->priv_flags & IFF_EBRIDGE));
+
+	if (upper)
+		dev_hold(upper);
+	rcu_read_unlock();
+
+	return upper;
+}
 
 /**
  * batadv_mcast_mla_softif_get - get softif multicast listeners
  * @dev: the device to collect multicast addresses from
  * @mcast_list: a list to put found addresses into
  *
- * Collect multicast addresses of the local multicast listeners
- * on the given soft interface, dev, in the given mcast_list.
+ * Collects multicast addresses of multicast listeners residing
+ * on this kernel on the given soft interface, dev, in
+ * the given mcast_list. In general, multicast listeners provided by
+ * your multicast receiving applications run directly on this node.
+ *
+ * If there is a bridge interface on top of dev, collects from that one
+ * instead. Just like with IP addresses and routes, multicast listeners
+ * will(/should) register to the bridge interface instead of an
+ * enslaved bat0.
  *
  * Return: -ENOMEM on memory allocation error or the number of
  * items added to the mcast_list otherwise.
@@ -62,12 +107,13 @@
 static int batadv_mcast_mla_softif_get(struct net_device *dev,
 				       struct hlist_head *mcast_list)
 {
+	struct net_device *bridge = batadv_mcast_get_bridge(dev);
 	struct netdev_hw_addr *mc_list_entry;
 	struct batadv_hw_addr *new;
 	int ret = 0;
 
-	netif_addr_lock_bh(dev);
-	netdev_for_each_mc_addr(mc_list_entry, dev) {
+	netif_addr_lock_bh(bridge ? bridge : dev);
+	netdev_for_each_mc_addr(mc_list_entry, bridge ? bridge : dev) {
 		new = kmalloc(sizeof(*new), GFP_ATOMIC);
 		if (!new) {
 			ret = -ENOMEM;
@@ -78,7 +124,10 @@ static int batadv_mcast_mla_softif_get(struct net_device *dev,
 		hlist_add_head(&new->list, mcast_list);
 		ret++;
 	}
-	netif_addr_unlock_bh(dev);
+	netif_addr_unlock_bh(bridge ? bridge : dev);
+
+	if (bridge)
+		dev_put(bridge);
 
 	return ret;
 }
@@ -104,6 +153,83 @@ static bool batadv_mcast_mla_is_duplicate(u8 *mcast_addr,
 }
 
 /**
+ * batadv_mcast_mla_br_addr_cpy - copy a bridge multicast address
+ * @dst: destination to write to - a multicast MAC address
+ * @src: source to read from - a multicast IP address
+ *
+ * Converts a given multicast IPv4/IPv6 address from a bridge
+ * to its matching multicast MAC address and copies it into the given
+ * destination buffer.
+ *
+ * Caller needs to make sure the destination buffer can hold
+ * at least ETH_ALEN bytes.
+ */
+static void batadv_mcast_mla_br_addr_cpy(char *dst, const struct br_ip *src)
+{
+	if (src->proto == htons(ETH_P_IP))
+		ip_eth_mc_map(src->u.ip4, dst);
+#if IS_ENABLED(CONFIG_IPV6)
+	else if (src->proto == htons(ETH_P_IPV6))
+		ipv6_eth_mc_map(&src->u.ip6, dst);
+#endif
+	else
+		eth_zero_addr(dst);
+}
+
+/**
+ * batadv_mcast_mla_bridge_get - get bridged-in multicast listeners
+ * @dev: a bridge slave whose bridge to collect multicast addresses from
+ * @mcast_list: a list to put found addresses into
+ *
+ * Collects multicast addresses of multicast listeners residing
+ * on foreign, non-mesh devices which we gave access to our mesh via
+ * a bridge on top of the given soft interface, dev, in the given
+ * mcast_list.
+ *
+ * Return: -ENOMEM on memory allocation error or the number of
+ * items added to the mcast_list otherwise.
+ */
+static int batadv_mcast_mla_bridge_get(struct net_device *dev,
+				       struct hlist_head *mcast_list)
+{
+	struct list_head bridge_mcast_list = LIST_HEAD_INIT(bridge_mcast_list);
+	struct br_ip_list *br_ip_entry, *tmp;
+	struct batadv_hw_addr *new;
+	u8 mcast_addr[ETH_ALEN];
+	int ret;
+
+	/* we don't need to detect these devices/listeners, the IGMP/MLD
+	 * snooping code of the Linux bridge already does that for us
+	 */
+	ret = br_multicast_list_adjacent(dev, &bridge_mcast_list);
+	if (ret < 0)
+		goto out;
+
+	list_for_each_entry(br_ip_entry, &bridge_mcast_list, list) {
+		batadv_mcast_mla_br_addr_cpy(mcast_addr, &br_ip_entry->addr);
+		if (batadv_mcast_mla_is_duplicate(mcast_addr, mcast_list))
+			continue;
+
+		new = kmalloc(sizeof(*new), GFP_ATOMIC);
+		if (!new) {
+			ret = -ENOMEM;
+			break;
+		}
+
+		ether_addr_copy(new->addr, mcast_addr);
+		hlist_add_head(&new->list, mcast_list);
+	}
+
+out:
+	list_for_each_entry_safe(br_ip_entry, tmp, &bridge_mcast_list, list) {
+		list_del(&br_ip_entry->list);
+		kfree(br_ip_entry);
+	}
+
+	return ret;
+}
+
+/**
  * batadv_mcast_mla_list_free - free a list of multicast addresses
  * @bat_priv: the bat priv with all the soft interface information
  * @mcast_list: the list to free
@@ -214,44 +340,195 @@ static bool batadv_mcast_has_bridge(struct batadv_priv *bat_priv)
 }
 
 /**
+ * batadv_mcast_querier_log - debug output regarding the querier status on link
+ * @bat_priv: the bat priv with all the soft interface information
+ * @str_proto: a string for the querier protocol (e.g. "IGMP" or "MLD")
+ * @old_state: the previous querier state on our link
+ * @new_state: the new querier state on our link
+ *
+ * Outputs debug messages to the logging facility with log level 'mcast'
+ * regarding changes to the querier status on the link which are relevant
+ * to our multicast optimizations.
+ *
+ * Usually this is about whether a querier appeared or vanished in
+ * our mesh or whether the querier is in the suboptimal position of being
+ * behind our local bridge segment: Snooping switches will directly
+ * forward listener reports to the querier, therefore batman-adv and
+ * the bridge will potentially not see these listeners - the querier is
+ * potentially shadowing listeners from us then.
+ *
+ * This is only interesting for nodes with a bridge on top of their
+ * soft interface.
+ */
+static void
+batadv_mcast_querier_log(struct batadv_priv *bat_priv, char *str_proto,
+			 struct batadv_mcast_querier_state *old_state,
+			 struct batadv_mcast_querier_state *new_state)
+{
+	if (!old_state->exists && new_state->exists)
+		batadv_info(bat_priv->soft_iface, "%s Querier appeared\n",
+			    str_proto);
+	else if (old_state->exists && !new_state->exists)
+		batadv_info(bat_priv->soft_iface,
+			    "%s Querier disappeared - multicast optimizations disabled\n",
+			    str_proto);
+	else if (!bat_priv->mcast.bridged && !new_state->exists)
+		batadv_info(bat_priv->soft_iface,
+			    "No %s Querier present - multicast optimizations disabled\n",
+			    str_proto);
+
+	if (new_state->exists) {
+		if ((!old_state->shadowing && new_state->shadowing) ||
+		    (!old_state->exists && new_state->shadowing))
+			batadv_dbg(BATADV_DBG_MCAST, bat_priv,
+				   "%s Querier is behind our bridged segment: Might shadow listeners\n",
+				   str_proto);
+		else if (old_state->shadowing && !new_state->shadowing)
+			batadv_dbg(BATADV_DBG_MCAST, bat_priv,
+				   "%s Querier is not behind our bridged segment\n",
+				   str_proto);
+	}
+}
+
+/**
+ * batadv_mcast_bridge_log - debug output for topology changes in bridged setups
+ * @bat_priv: the bat priv with all the soft interface information
+ * @bridged: a flag about whether the soft interface is currently bridged or not
+ * @querier_ipv4: (maybe) new status of a potential, selected IGMP querier
+ * @querier_ipv6: (maybe) new status of a potential, selected MLD querier
+ *
+ * If no bridges are ever used on this node, then this function does nothing.
+ *
+ * Otherwise this function outputs debug information to the 'mcast' log level
+ * which might be relevant to our multicast optimizations.
+ *
+ * More precisely, it outputs information when a bridge interface is added or
+ * removed from a soft interface. And when a bridge is present, it further
+ * outputs information about the querier state which is relevant for the
+ * multicast flags this node is going to set.
+ */
+static void
+batadv_mcast_bridge_log(struct batadv_priv *bat_priv, bool bridged,
+			struct batadv_mcast_querier_state *querier_ipv4,
+			struct batadv_mcast_querier_state *querier_ipv6)
+{
+	if (!bat_priv->mcast.bridged && bridged)
+		batadv_dbg(BATADV_DBG_MCAST, bat_priv,
+			   "Bridge added: Setting Unsnoopables(U)-flag\n");
+	else if (bat_priv->mcast.bridged && !bridged)
+		batadv_dbg(BATADV_DBG_MCAST, bat_priv,
+			   "Bridge removed: Unsetting Unsnoopables(U)-flag\n");
+
+	if (bridged) {
+		batadv_mcast_querier_log(bat_priv, "IGMP",
+					 &bat_priv->mcast.querier_ipv4,
+					 querier_ipv4);
+		batadv_mcast_querier_log(bat_priv, "MLD",
+					 &bat_priv->mcast.querier_ipv6,
+					 querier_ipv6);
+	}
+}
+
+/**
+ * batadv_mcast_flags_logs - output debug information about mcast flag changes
+ * @bat_priv: the bat priv with all the soft interface information
+ * @flags: flags indicating the new multicast state
+ *
+ * Whenever the multicast flags this nodes announces changes (@mcast_flags vs.
+ * bat_priv->mcast.flags), this notifies userspace via the 'mcast' log level.
+ */
+static void batadv_mcast_flags_log(struct batadv_priv *bat_priv, u8 flags)
+{
+	u8 old_flags = bat_priv->mcast.flags;
+	char str_old_flags[] = "[...]";
+
+	sprintf(str_old_flags, "[%c%c%c]",
+		(old_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) ? 'U' : '.',
+		(old_flags & BATADV_MCAST_WANT_ALL_IPV4) ? '4' : '.',
+		(old_flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.');
+
+	batadv_dbg(BATADV_DBG_MCAST, bat_priv,
+		   "Changing multicast flags from '%s' to '[%c%c%c]'\n",
+		   bat_priv->mcast.enabled ? str_old_flags : "<undefined>",
+		   (flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) ? 'U' : '.',
+		   (flags & BATADV_MCAST_WANT_ALL_IPV4) ? '4' : '.',
+		   (flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.');
+}
+
+/**
  * batadv_mcast_mla_tvlv_update - update multicast tvlv
  * @bat_priv: the bat priv with all the soft interface information
  *
  * Updates the own multicast tvlv with our current multicast related settings,
  * capabilities and inabilities.
  *
- * Return: true if the tvlv container is registered afterwards. Otherwise
- * returns false.
+ * Return: false if we want all IPv4 && IPv6 multicast traffic and true
+ * otherwise.
  */
 static bool batadv_mcast_mla_tvlv_update(struct batadv_priv *bat_priv)
 {
 	struct batadv_tvlv_mcast_data mcast_data;
+	struct batadv_mcast_querier_state querier4 = {false, false};
+	struct batadv_mcast_querier_state querier6 = {false, false};
+	struct net_device *dev = bat_priv->soft_iface;
+	bool bridged;
 
 	mcast_data.flags = BATADV_NO_FLAGS;
 	memset(mcast_data.reserved, 0, sizeof(mcast_data.reserved));
 
-	/* Avoid attaching MLAs, if there is a bridge on top of our soft
-	 * interface, we don't support that yet (TODO)
+	bridged = batadv_mcast_has_bridge(bat_priv);
+	if (!bridged)
+		goto update;
+
+#if !IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING)
+	pr_warn_once("No bridge IGMP snooping compiled - multicast optimizations disabled\n");
+#endif
+
+	querier4.exists = br_multicast_has_querier_anywhere(dev, ETH_P_IP);
+	querier4.shadowing = br_multicast_has_querier_adjacent(dev, ETH_P_IP);
+
+	querier6.exists = br_multicast_has_querier_anywhere(dev, ETH_P_IPV6);
+	querier6.shadowing = br_multicast_has_querier_adjacent(dev, ETH_P_IPV6);
+
+	mcast_data.flags |= BATADV_MCAST_WANT_ALL_UNSNOOPABLES;
+
+	/* 1) If no querier exists at all, then multicast listeners on
+	 *    our local TT clients behind the bridge will keep silent.
+	 * 2) If the selected querier is on one of our local TT clients,
+	 *    behind the bridge, then this querier might shadow multicast
+	 *    listeners on our local TT clients, behind this bridge.
+	 *
+	 * In both cases, we will signalize other batman nodes that
+	 * we need all multicast traffic of the according protocol.
 	 */
-	if (batadv_mcast_has_bridge(bat_priv)) {
-		if (bat_priv->mcast.enabled) {
-			batadv_tvlv_container_unregister(bat_priv,
-							 BATADV_TVLV_MCAST, 1);
-			bat_priv->mcast.enabled = false;
-		}
+	if (!querier4.exists || querier4.shadowing)
+		mcast_data.flags |= BATADV_MCAST_WANT_ALL_IPV4;
 
-		return false;
-	}
+	if (!querier6.exists || querier6.shadowing)
+		mcast_data.flags |= BATADV_MCAST_WANT_ALL_IPV6;
+
+update:
+	batadv_mcast_bridge_log(bat_priv, bridged, &querier4, &querier6);
+
+	bat_priv->mcast.querier_ipv4.exists = querier4.exists;
+	bat_priv->mcast.querier_ipv4.shadowing = querier4.shadowing;
+
+	bat_priv->mcast.querier_ipv6.exists = querier6.exists;
+	bat_priv->mcast.querier_ipv6.shadowing = querier6.shadowing;
+
+	bat_priv->mcast.bridged = bridged;
 
 	if (!bat_priv->mcast.enabled ||
 	    mcast_data.flags != bat_priv->mcast.flags) {
-		batadv_tvlv_container_register(bat_priv, BATADV_TVLV_MCAST, 1,
+		batadv_mcast_flags_log(bat_priv, mcast_data.flags);
+		batadv_tvlv_container_register(bat_priv, BATADV_TVLV_MCAST, 2,
 					       &mcast_data, sizeof(mcast_data));
 		bat_priv->mcast.flags = mcast_data.flags;
 		bat_priv->mcast.enabled = true;
 	}
 
-	return true;
+	return !(mcast_data.flags &
+		 (BATADV_MCAST_WANT_ALL_IPV4 + BATADV_MCAST_WANT_ALL_IPV6));
 }
 
 /**
@@ -274,6 +551,10 @@ void batadv_mcast_mla_update(struct batadv_priv *bat_priv)
 	if (ret < 0)
 		goto out;
 
+	ret = batadv_mcast_mla_bridge_get(soft_iface, &mcast_list);
+	if (ret < 0)
+		goto out;
+
 update:
 	batadv_mcast_mla_tt_retract(bat_priv, &mcast_list);
 	batadv_mcast_mla_tt_add(bat_priv, &mcast_list);
@@ -283,6 +564,31 @@ out:
 }
 
 /**
+ * batadv_mcast_is_report_ipv4 - check for IGMP reports
+ * @skb: the ethernet frame destined for the mesh
+ *
+ * This call might reallocate skb data.
+ *
+ * Checks whether the given frame is a valid IGMP report.
+ *
+ * Return: If so then true, otherwise false.
+ */
+static bool batadv_mcast_is_report_ipv4(struct sk_buff *skb)
+{
+	if (ip_mc_check_igmp(skb, NULL) < 0)
+		return false;
+
+	switch (igmp_hdr(skb)->type) {
+	case IGMP_HOST_MEMBERSHIP_REPORT:
+	case IGMPV2_HOST_MEMBERSHIP_REPORT:
+	case IGMPV3_HOST_MEMBERSHIP_REPORT:
+		return true;
+	}
+
+	return false;
+}
+
+/**
  * batadv_mcast_forw_mode_check_ipv4 - check for optimized forwarding potential
  * @bat_priv: the bat priv with all the soft interface information
  * @skb: the IPv4 packet to check
@@ -304,6 +610,9 @@ static int batadv_mcast_forw_mode_check_ipv4(struct batadv_priv *bat_priv,
 	if (!pskb_may_pull(skb, sizeof(struct ethhdr) + sizeof(*iphdr)))
 		return -ENOMEM;
 
+	if (batadv_mcast_is_report_ipv4(skb))
+		return -EINVAL;
+
 	iphdr = ip_hdr(skb);
 
 	/* TODO: Implement Multicast Router Discovery (RFC4286),
@@ -320,6 +629,31 @@ static int batadv_mcast_forw_mode_check_ipv4(struct batadv_priv *bat_priv,
 	return 0;
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
+/**
+ * batadv_mcast_is_report_ipv6 - check for MLD reports
+ * @skb: the ethernet frame destined for the mesh
+ *
+ * This call might reallocate skb data.
+ *
+ * Checks whether the given frame is a valid MLD report.
+ *
+ * Return: If so then true, otherwise false.
+ */
+static bool batadv_mcast_is_report_ipv6(struct sk_buff *skb)
+{
+	if (ipv6_mc_check_mld(skb, NULL) < 0)
+		return false;
+
+	switch (icmp6_hdr(skb)->icmp6_type) {
+	case ICMPV6_MGM_REPORT:
+	case ICMPV6_MLD2_REPORT:
+		return true;
+	}
+
+	return false;
+}
+
 /**
  * batadv_mcast_forw_mode_check_ipv6 - check for optimized forwarding potential
  * @bat_priv: the bat priv with all the soft interface information
@@ -341,6 +675,9 @@ static int batadv_mcast_forw_mode_check_ipv6(struct batadv_priv *bat_priv,
 	if (!pskb_may_pull(skb, sizeof(struct ethhdr) + sizeof(*ip6hdr)))
 		return -ENOMEM;
 
+	if (batadv_mcast_is_report_ipv6(skb))
+		return -EINVAL;
+
 	ip6hdr = ipv6_hdr(skb);
 
 	/* TODO: Implement Multicast Router Discovery (RFC4286),
@@ -357,6 +694,7 @@ static int batadv_mcast_forw_mode_check_ipv6(struct batadv_priv *bat_priv,
 
 	return 0;
 }
+#endif
 
 /**
  * batadv_mcast_forw_mode_check - check for optimized forwarding potential
@@ -385,9 +723,11 @@ static int batadv_mcast_forw_mode_check(struct batadv_priv *bat_priv,
 	case ETH_P_IP:
 		return batadv_mcast_forw_mode_check_ipv4(bat_priv, skb,
 							 is_unsnoopable);
+#if IS_ENABLED(CONFIG_IPV6)
 	case ETH_P_IPV6:
 		return batadv_mcast_forw_mode_check_ipv6(bat_priv, skb,
 							 is_unsnoopable);
+#endif
 	default:
 		return -EINVAL;
 	}
@@ -728,18 +1068,18 @@ static void batadv_mcast_want_ipv6_update(struct batadv_priv *bat_priv,
 }
 
 /**
- * batadv_mcast_tvlv_ogm_handler_v1 - process incoming multicast tvlv container
+ * batadv_mcast_tvlv_ogm_handler - process incoming multicast tvlv container
  * @bat_priv: the bat priv with all the soft interface information
  * @orig: the orig_node of the ogm
  * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags)
  * @tvlv_value: tvlv buffer containing the multicast data
  * @tvlv_value_len: tvlv buffer length
  */
-static void batadv_mcast_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
-					     struct batadv_orig_node *orig,
-					     u8 flags,
-					     void *tvlv_value,
-					     u16 tvlv_value_len)
+static void batadv_mcast_tvlv_ogm_handler(struct batadv_priv *bat_priv,
+					  struct batadv_orig_node *orig,
+					  u8 flags,
+					  void *tvlv_value,
+					  u16 tvlv_value_len)
 {
 	bool orig_mcast_enabled = !(flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND);
 	u8 mcast_flags = BATADV_NO_FLAGS;
@@ -789,19 +1129,120 @@ static void batadv_mcast_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
  */
 void batadv_mcast_init(struct batadv_priv *bat_priv)
 {
-	batadv_tvlv_handler_register(bat_priv, batadv_mcast_tvlv_ogm_handler_v1,
-				     NULL, BATADV_TVLV_MCAST, 1,
+	batadv_tvlv_handler_register(bat_priv, batadv_mcast_tvlv_ogm_handler,
+				     NULL, BATADV_TVLV_MCAST, 2,
 				     BATADV_TVLV_HANDLER_OGM_CIFNOTFND);
 }
 
 /**
+ * batadv_mcast_flags_print_header - print own mcast flags to debugfs table
+ * @bat_priv: the bat priv with all the soft interface information
+ * @seq: debugfs table seq_file struct
+ *
+ * Prints our own multicast flags including a more specific reason why
+ * they are set, that is prints the bridge and querier state too, to
+ * the debugfs table specified via @seq.
+ */
+static void batadv_mcast_flags_print_header(struct batadv_priv *bat_priv,
+					    struct seq_file *seq)
+{
+	u8 flags = bat_priv->mcast.flags;
+	char querier4, querier6, shadowing4, shadowing6;
+	bool bridged = bat_priv->mcast.bridged;
+
+	if (bridged) {
+		querier4 = bat_priv->mcast.querier_ipv4.exists ? '.' : '4';
+		querier6 = bat_priv->mcast.querier_ipv6.exists ? '.' : '6';
+		shadowing4 = bat_priv->mcast.querier_ipv4.shadowing ? '4' : '.';
+		shadowing6 = bat_priv->mcast.querier_ipv6.shadowing ? '6' : '.';
+	} else {
+		querier4 = '?';
+		querier6 = '?';
+		shadowing4 = '?';
+		shadowing6 = '?';
+	}
+
+	seq_printf(seq, "Multicast flags (own flags: [%c%c%c])\n",
+		   (flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) ? 'U' : '.',
+		   (flags & BATADV_MCAST_WANT_ALL_IPV4) ? '4' : '.',
+		   (flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.');
+	seq_printf(seq, "* Bridged [U]\t\t\t\t%c\n", bridged ? 'U' : '.');
+	seq_printf(seq, "* No IGMP/MLD Querier [4/6]:\t\t%c/%c\n",
+		   querier4, querier6);
+	seq_printf(seq, "* Shadowing IGMP/MLD Querier [4/6]:\t%c/%c\n",
+		   shadowing4, shadowing6);
+	seq_puts(seq, "-------------------------------------------\n");
+	seq_printf(seq, "       %-10s %s\n", "Originator", "Flags");
+}
+
+/**
+ * batadv_mcast_flags_seq_print_text - print the mcast flags of other nodes
+ * @seq: seq file to print on
+ * @offset: not used
+ *
+ * This prints a table of (primary) originators and their according
+ * multicast flags, including (in the header) our own.
+ *
+ * Return: always 0
+ */
+int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset)
+{
+	struct net_device *net_dev = (struct net_device *)seq->private;
+	struct batadv_priv *bat_priv = netdev_priv(net_dev);
+	struct batadv_hard_iface *primary_if;
+	struct batadv_hashtable *hash = bat_priv->orig_hash;
+	struct batadv_orig_node *orig_node;
+	struct hlist_head *head;
+	u8 flags;
+	u32 i;
+
+	primary_if = batadv_seq_print_text_primary_if_get(seq);
+	if (!primary_if)
+		return 0;
+
+	batadv_mcast_flags_print_header(bat_priv, seq);
+
+	for (i = 0; i < hash->size; i++) {
+		head = &hash->table[i];
+
+		rcu_read_lock();
+		hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
+			if (!test_bit(BATADV_ORIG_CAPA_HAS_MCAST,
+				      &orig_node->capa_initialized))
+				continue;
+
+			if (!test_bit(BATADV_ORIG_CAPA_HAS_MCAST,
+				      &orig_node->capabilities)) {
+				seq_printf(seq, "%pM -\n", orig_node->orig);
+				continue;
+			}
+
+			flags = orig_node->mcast_flags;
+
+			seq_printf(seq, "%pM [%c%c%c]\n", orig_node->orig,
+				   (flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES)
+				   ? 'U' : '.',
+				   (flags & BATADV_MCAST_WANT_ALL_IPV4)
+				   ? '4' : '.',
+				   (flags & BATADV_MCAST_WANT_ALL_IPV6)
+				   ? '6' : '.');
+		}
+		rcu_read_unlock();
+	}
+
+	batadv_hardif_put(primary_if);
+
+	return 0;
+}
+
+/**
  * batadv_mcast_free - free the multicast optimizations structures
  * @bat_priv: the bat priv with all the soft interface information
  */
 void batadv_mcast_free(struct batadv_priv *bat_priv)
 {
-	batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_MCAST, 1);
-	batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_MCAST, 1);
+	batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_MCAST, 2);
+	batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_MCAST, 2);
 
 	spin_lock_bh(&bat_priv->tt.commit_lock);
 	batadv_mcast_mla_tt_retract(bat_priv, NULL);
diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h
index 80bceec55592..1fb00ba84907 100644
--- a/net/batman-adv/multicast.h
+++ b/net/batman-adv/multicast.h
@@ -20,6 +20,7 @@
 
 #include "main.h"
 
+struct seq_file;
 struct sk_buff;
 
 /**
@@ -46,6 +47,8 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
 
 void batadv_mcast_init(struct batadv_priv *bat_priv);
 
+int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset);
+
 void batadv_mcast_free(struct batadv_priv *bat_priv);
 
 void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node);
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
new file mode 100644
index 000000000000..231f8eaf075b
--- /dev/null
+++ b/net/batman-adv/netlink.c
@@ -0,0 +1,424 @@
+/* Copyright (C) 2016 B.A.T.M.A.N. contributors:
+ *
+ * Matthias Schiffer
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "netlink.h"
+#include "main.h"
+
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/genetlink.h>
+#include <linux/if_ether.h>
+#include <linux/init.h>
+#include <linux/netdevice.h>
+#include <linux/netlink.h>
+#include <linux/printk.h>
+#include <linux/stddef.h>
+#include <linux/types.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <uapi/linux/batman_adv.h>
+
+#include "hard-interface.h"
+#include "soft-interface.h"
+#include "tp_meter.h"
+
+struct sk_buff;
+
+static struct genl_family batadv_netlink_family = {
+	.id = GENL_ID_GENERATE,
+	.hdrsize = 0,
+	.name = BATADV_NL_NAME,
+	.version = 1,
+	.maxattr = BATADV_ATTR_MAX,
+};
+
+/* multicast groups */
+enum batadv_netlink_multicast_groups {
+	BATADV_NL_MCGRP_TPMETER,
+};
+
+static struct genl_multicast_group batadv_netlink_mcgrps[] = {
+	[BATADV_NL_MCGRP_TPMETER] = { .name = BATADV_NL_MCAST_GROUP_TPMETER },
+};
+
+static struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
+	[BATADV_ATTR_VERSION]		= { .type = NLA_STRING },
+	[BATADV_ATTR_ALGO_NAME]		= { .type = NLA_STRING },
+	[BATADV_ATTR_MESH_IFINDEX]	= { .type = NLA_U32 },
+	[BATADV_ATTR_MESH_IFNAME]	= { .type = NLA_STRING },
+	[BATADV_ATTR_MESH_ADDRESS]	= { .len = ETH_ALEN },
+	[BATADV_ATTR_HARD_IFINDEX]	= { .type = NLA_U32 },
+	[BATADV_ATTR_HARD_IFNAME]	= { .type = NLA_STRING },
+	[BATADV_ATTR_HARD_ADDRESS]	= { .len = ETH_ALEN },
+	[BATADV_ATTR_ORIG_ADDRESS]	= { .len = ETH_ALEN },
+	[BATADV_ATTR_TPMETER_RESULT]	= { .type = NLA_U8 },
+	[BATADV_ATTR_TPMETER_TEST_TIME]	= { .type = NLA_U32 },
+	[BATADV_ATTR_TPMETER_BYTES]	= { .type = NLA_U64 },
+	[BATADV_ATTR_TPMETER_COOKIE]	= { .type = NLA_U32 },
+};
+
+/**
+ * batadv_netlink_mesh_info_put - fill in generic information about mesh
+ *  interface
+ * @msg: netlink message to be sent back
+ * @soft_iface: interface for which the data should be taken
+ *
+ * Return: 0 on success, < 0 on error
+ */
+static int
+batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface)
+{
+	struct batadv_priv *bat_priv = netdev_priv(soft_iface);
+	struct batadv_hard_iface *primary_if = NULL;
+	struct net_device *hard_iface;
+	int ret = -ENOBUFS;
+
+	if (nla_put_string(msg, BATADV_ATTR_VERSION, BATADV_SOURCE_VERSION) ||
+	    nla_put_string(msg, BATADV_ATTR_ALGO_NAME,
+			   bat_priv->algo_ops->name) ||
+	    nla_put_u32(msg, BATADV_ATTR_MESH_IFINDEX, soft_iface->ifindex) ||
+	    nla_put_string(msg, BATADV_ATTR_MESH_IFNAME, soft_iface->name) ||
+	    nla_put(msg, BATADV_ATTR_MESH_ADDRESS, ETH_ALEN,
+		    soft_iface->dev_addr))
+		goto out;
+
+	primary_if = batadv_primary_if_get_selected(bat_priv);
+	if (primary_if && primary_if->if_status == BATADV_IF_ACTIVE) {
+		hard_iface = primary_if->net_dev;
+
+		if (nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
+				hard_iface->ifindex) ||
+		    nla_put_string(msg, BATADV_ATTR_HARD_IFNAME,
+				   hard_iface->name) ||
+		    nla_put(msg, BATADV_ATTR_HARD_ADDRESS, ETH_ALEN,
+			    hard_iface->dev_addr))
+			goto out;
+	}
+
+	ret = 0;
+
+ out:
+	if (primary_if)
+		batadv_hardif_put(primary_if);
+
+	return ret;
+}
+
+/**
+ * batadv_netlink_get_mesh_info - handle incoming BATADV_CMD_GET_MESH_INFO
+ *  netlink request
+ * @skb: received netlink message
+ * @info: receiver information
+ *
+ * Return: 0 on success, < 0 on error
+ */
+static int
+batadv_netlink_get_mesh_info(struct sk_buff *skb, struct genl_info *info)
+{
+	struct net *net = genl_info_net(info);
+	struct net_device *soft_iface;
+	struct sk_buff *msg = NULL;
+	void *msg_head;
+	int ifindex;
+	int ret;
+
+	if (!info->attrs[BATADV_ATTR_MESH_IFINDEX])
+		return -EINVAL;
+
+	ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]);
+	if (!ifindex)
+		return -EINVAL;
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	msg_head = genlmsg_put(msg, info->snd_portid, info->snd_seq,
+			       &batadv_netlink_family, 0,
+			       BATADV_CMD_GET_MESH_INFO);
+	if (!msg_head) {
+		ret = -ENOBUFS;
+		goto out;
+	}
+
+	ret = batadv_netlink_mesh_info_put(msg, soft_iface);
+
+ out:
+	if (soft_iface)
+		dev_put(soft_iface);
+
+	if (ret) {
+		if (msg)
+			nlmsg_free(msg);
+		return ret;
+	}
+
+	genlmsg_end(msg, msg_head);
+	return genlmsg_reply(msg, info);
+}
+
+/**
+ * batadv_netlink_tp_meter_put - Fill information of started tp_meter session
+ * @msg: netlink message to be sent back
+ * @cookie: tp meter session cookie
+ *
+ *  Return: 0 on success, < 0 on error
+ */
+static int
+batadv_netlink_tp_meter_put(struct sk_buff *msg, u32 cookie)
+{
+	if (nla_put_u32(msg, BATADV_ATTR_TPMETER_COOKIE, cookie))
+		return -ENOBUFS;
+
+	return 0;
+}
+
+/**
+ * batadv_netlink_tpmeter_notify - send tp_meter result via netlink to client
+ * @bat_priv: the bat priv with all the soft interface information
+ * @dst: destination of tp_meter session
+ * @result: reason for tp meter session stop
+ * @test_time: total time ot the tp_meter session
+ * @total_bytes: bytes acked to the receiver
+ * @cookie: cookie of tp_meter session
+ *
+ * Return: 0 on success, < 0 on error
+ */
+int batadv_netlink_tpmeter_notify(struct batadv_priv *bat_priv, const u8 *dst,
+				  u8 result, u32 test_time, u64 total_bytes,
+				  u32 cookie)
+{
+	struct sk_buff *msg;
+	void *hdr;
+	int ret;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	hdr = genlmsg_put(msg, 0, 0, &batadv_netlink_family, 0,
+			  BATADV_CMD_TP_METER);
+	if (!hdr) {
+		ret = -ENOBUFS;
+		goto err_genlmsg;
+	}
+
+	if (nla_put_u32(msg, BATADV_ATTR_TPMETER_COOKIE, cookie))
+		goto nla_put_failure;
+
+	if (nla_put_u32(msg, BATADV_ATTR_TPMETER_TEST_TIME, test_time))
+		goto nla_put_failure;
+
+	if (nla_put_u64_64bit(msg, BATADV_ATTR_TPMETER_BYTES, total_bytes,
+			      BATADV_ATTR_PAD))
+		goto nla_put_failure;
+
+	if (nla_put_u8(msg, BATADV_ATTR_TPMETER_RESULT, result))
+		goto nla_put_failure;
+
+	if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN, dst))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+
+	genlmsg_multicast_netns(&batadv_netlink_family,
+				dev_net(bat_priv->soft_iface), msg, 0,
+				BATADV_NL_MCGRP_TPMETER, GFP_KERNEL);
+
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	ret = -EMSGSIZE;
+
+err_genlmsg:
+	nlmsg_free(msg);
+	return ret;
+}
+
+/**
+ * batadv_netlink_tp_meter_start - Start a new tp_meter session
+ * @skb: received netlink message
+ * @info: receiver information
+ *
+ * Return: 0 on success, < 0 on error
+ */
+static int
+batadv_netlink_tp_meter_start(struct sk_buff *skb, struct genl_info *info)
+{
+	struct net *net = genl_info_net(info);
+	struct net_device *soft_iface;
+	struct batadv_priv *bat_priv;
+	struct sk_buff *msg = NULL;
+	u32 test_length;
+	void *msg_head;
+	int ifindex;
+	u32 cookie;
+	u8 *dst;
+	int ret;
+
+	if (!info->attrs[BATADV_ATTR_MESH_IFINDEX])
+		return -EINVAL;
+
+	if (!info->attrs[BATADV_ATTR_ORIG_ADDRESS])
+		return -EINVAL;
+
+	if (!info->attrs[BATADV_ATTR_TPMETER_TEST_TIME])
+		return -EINVAL;
+
+	ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]);
+	if (!ifindex)
+		return -EINVAL;
+
+	dst = nla_data(info->attrs[BATADV_ATTR_ORIG_ADDRESS]);
+
+	test_length = nla_get_u32(info->attrs[BATADV_ATTR_TPMETER_TEST_TIME]);
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	msg_head = genlmsg_put(msg, info->snd_portid, info->snd_seq,
+			       &batadv_netlink_family, 0,
+			       BATADV_CMD_TP_METER);
+	if (!msg_head) {
+		ret = -ENOBUFS;
+		goto out;
+	}
+
+	bat_priv = netdev_priv(soft_iface);
+	batadv_tp_start(bat_priv, dst, test_length, &cookie);
+
+	ret = batadv_netlink_tp_meter_put(msg, cookie);
+
+ out:
+	if (soft_iface)
+		dev_put(soft_iface);
+
+	if (ret) {
+		if (msg)
+			nlmsg_free(msg);
+		return ret;
+	}
+
+	genlmsg_end(msg, msg_head);
+	return genlmsg_reply(msg, info);
+}
+
+/**
+ * batadv_netlink_tp_meter_start - Cancel a running tp_meter session
+ * @skb: received netlink message
+ * @info: receiver information
+ *
+ * Return: 0 on success, < 0 on error
+ */
+static int
+batadv_netlink_tp_meter_cancel(struct sk_buff *skb, struct genl_info *info)
+{
+	struct net *net = genl_info_net(info);
+	struct net_device *soft_iface;
+	struct batadv_priv *bat_priv;
+	int ifindex;
+	u8 *dst;
+	int ret = 0;
+
+	if (!info->attrs[BATADV_ATTR_MESH_IFINDEX])
+		return -EINVAL;
+
+	if (!info->attrs[BATADV_ATTR_ORIG_ADDRESS])
+		return -EINVAL;
+
+	ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]);
+	if (!ifindex)
+		return -EINVAL;
+
+	dst = nla_data(info->attrs[BATADV_ATTR_ORIG_ADDRESS]);
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	bat_priv = netdev_priv(soft_iface);
+	batadv_tp_stop(bat_priv, dst, BATADV_TP_REASON_CANCEL);
+
+out:
+	if (soft_iface)
+		dev_put(soft_iface);
+
+	return ret;
+}
+
+static struct genl_ops batadv_netlink_ops[] = {
+	{
+		.cmd = BATADV_CMD_GET_MESH_INFO,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.doit = batadv_netlink_get_mesh_info,
+	},
+	{
+		.cmd = BATADV_CMD_TP_METER,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.doit = batadv_netlink_tp_meter_start,
+	},
+	{
+		.cmd = BATADV_CMD_TP_METER_CANCEL,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.doit = batadv_netlink_tp_meter_cancel,
+	},
+};
+
+/**
+ * batadv_netlink_register - register batadv genl netlink family
+ */
+void __init batadv_netlink_register(void)
+{
+	int ret;
+
+	ret = genl_register_family_with_ops_groups(&batadv_netlink_family,
+						   batadv_netlink_ops,
+						   batadv_netlink_mcgrps);
+	if (ret)
+		pr_warn("unable to register netlink family");
+}
+
+/**
+ * batadv_netlink_unregister - unregister batadv genl netlink family
+ */
+void batadv_netlink_unregister(void)
+{
+	genl_unregister_family(&batadv_netlink_family);
+}
diff --git a/net/batman-adv/netlink.h b/net/batman-adv/netlink.h
new file mode 100644
index 000000000000..945653ab58c6
--- /dev/null
+++ b/net/batman-adv/netlink.h
@@ -0,0 +1,32 @@
+/* Copyright (C) 2016 B.A.T.M.A.N. contributors:
+ *
+ * Matthias Schiffer
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _NET_BATMAN_ADV_NETLINK_H_
+#define _NET_BATMAN_ADV_NETLINK_H_
+
+#include "main.h"
+
+#include <linux/types.h>
+
+void batadv_netlink_register(void);
+void batadv_netlink_unregister(void);
+
+int batadv_netlink_tpmeter_notify(struct batadv_priv *bat_priv, const u8 *dst,
+				  u8 result, u32 test_time, u64 total_bytes,
+				  u32 cookie);
+
+#endif /* _NET_BATMAN_ADV_NETLINK_H_ */
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index 678f06865312..293ef4ffd4e1 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -51,10 +51,12 @@
 
 #include "hard-interface.h"
 #include "hash.h"
+#include "log.h"
 #include "originator.h"
 #include "packet.h"
 #include "routing.h"
 #include "send.h"
+#include "tvlv.h"
 
 static struct lock_class_key batadv_nc_coding_hash_lock_class_key;
 static struct lock_class_key batadv_nc_decoding_hash_lock_class_key;
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 7f51bc2c06eb..7d1e5421f6bc 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -34,11 +34,13 @@
 #include <linux/spinlock.h>
 #include <linux/workqueue.h>
 
+#include "bat_algo.h"
 #include "distributed-arp-table.h"
 #include "fragmentation.h"
 #include "gateway_client.h"
 #include "hard-interface.h"
 #include "hash.h"
+#include "log.h"
 #include "multicast.h"
 #include "network-coding.h"
 #include "routing.h"
@@ -251,10 +253,8 @@ static void batadv_neigh_node_release(struct kref *ref)
 	struct hlist_node *node_tmp;
 	struct batadv_neigh_node *neigh_node;
 	struct batadv_neigh_ifinfo *neigh_ifinfo;
-	struct batadv_algo_ops *bao;
 
 	neigh_node = container_of(ref, struct batadv_neigh_node, refcount);
-	bao = neigh_node->orig_node->bat_priv->bat_algo_ops;
 
 	hlist_for_each_entry_safe(neigh_ifinfo, node_tmp,
 				  &neigh_node->ifinfo_list, list) {
@@ -263,9 +263,6 @@ static void batadv_neigh_node_release(struct kref *ref)
 
 	batadv_hardif_neigh_put(neigh_node->hardif_neigh);
 
-	if (bao->bat_neigh_free)
-		bao->bat_neigh_free(neigh_node);
-
 	batadv_hardif_put(neigh_node->if_incoming);
 
 	kfree_rcu(neigh_node, rcu);
@@ -537,8 +534,8 @@ batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface,
 
 	kref_init(&hardif_neigh->refcount);
 
-	if (bat_priv->bat_algo_ops->bat_hardif_neigh_init)
-		bat_priv->bat_algo_ops->bat_hardif_neigh_init(hardif_neigh);
+	if (bat_priv->algo_ops->neigh.hardif_init)
+		bat_priv->algo_ops->neigh.hardif_init(hardif_neigh);
 
 	hlist_add_head(&hardif_neigh->list, &hard_iface->neigh_list);
 
@@ -602,19 +599,19 @@ batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface,
 }
 
 /**
- * batadv_neigh_node_new - create and init a new neigh_node object
+ * batadv_neigh_node_create - create a neigh node object
  * @orig_node: originator object representing the neighbour
  * @hard_iface: the interface where the neighbour is connected to
  * @neigh_addr: the mac address of the neighbour interface
  *
  * Allocates a new neigh_node object and initialises all the generic fields.
  *
- * Return: neighbor when found. Othwerwise NULL
+ * Return: the neighbour node if found or created or NULL otherwise.
  */
-struct batadv_neigh_node *
-batadv_neigh_node_new(struct batadv_orig_node *orig_node,
-		      struct batadv_hard_iface *hard_iface,
-		      const u8 *neigh_addr)
+static struct batadv_neigh_node *
+batadv_neigh_node_create(struct batadv_orig_node *orig_node,
+			 struct batadv_hard_iface *hard_iface,
+			 const u8 *neigh_addr)
 {
 	struct batadv_neigh_node *neigh_node;
 	struct batadv_hardif_neigh_node *hardif_neigh = NULL;
@@ -667,6 +664,29 @@ out:
 }
 
 /**
+ * batadv_neigh_node_get_or_create - retrieve or create a neigh node object
+ * @orig_node: originator object representing the neighbour
+ * @hard_iface: the interface where the neighbour is connected to
+ * @neigh_addr: the mac address of the neighbour interface
+ *
+ * Return: the neighbour node if found or created or NULL otherwise.
+ */
+struct batadv_neigh_node *
+batadv_neigh_node_get_or_create(struct batadv_orig_node *orig_node,
+				struct batadv_hard_iface *hard_iface,
+				const u8 *neigh_addr)
+{
+	struct batadv_neigh_node *neigh_node = NULL;
+
+	/* first check without locking to avoid the overhead */
+	neigh_node = batadv_neigh_node_get(orig_node, hard_iface, neigh_addr);
+	if (neigh_node)
+		return neigh_node;
+
+	return batadv_neigh_node_create(orig_node, hard_iface, neigh_addr);
+}
+
+/**
  * batadv_hardif_neigh_seq_print_text - print the single hop neighbour list
  * @seq: neighbour table seq_file struct
  * @offset: not used
@@ -686,17 +706,17 @@ int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset)
 	seq_printf(seq, "[B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s %s)]\n",
 		   BATADV_SOURCE_VERSION, primary_if->net_dev->name,
 		   primary_if->net_dev->dev_addr, net_dev->name,
-		   bat_priv->bat_algo_ops->name);
+		   bat_priv->algo_ops->name);
 
 	batadv_hardif_put(primary_if);
 
-	if (!bat_priv->bat_algo_ops->bat_neigh_print) {
+	if (!bat_priv->algo_ops->neigh.print) {
 		seq_puts(seq,
 			 "No printing function for this routing protocol\n");
 		return 0;
 	}
 
-	bat_priv->bat_algo_ops->bat_neigh_print(bat_priv, seq);
+	bat_priv->algo_ops->neigh.print(bat_priv, seq);
 	return 0;
 }
 
@@ -747,8 +767,8 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu)
 
 	batadv_frag_purge_orig(orig_node, NULL);
 
-	if (orig_node->bat_priv->bat_algo_ops->bat_orig_free)
-		orig_node->bat_priv->bat_algo_ops->bat_orig_free(orig_node);
+	if (orig_node->bat_priv->algo_ops->orig.free)
+		orig_node->bat_priv->algo_ops->orig.free(orig_node);
 
 	kfree(orig_node->tt_buff);
 	kfree(orig_node);
@@ -1077,12 +1097,12 @@ batadv_find_best_neighbor(struct batadv_priv *bat_priv,
 			  struct batadv_hard_iface *if_outgoing)
 {
 	struct batadv_neigh_node *best = NULL, *neigh;
-	struct batadv_algo_ops *bao = bat_priv->bat_algo_ops;
+	struct batadv_algo_ops *bao = bat_priv->algo_ops;
 
 	rcu_read_lock();
 	hlist_for_each_entry_rcu(neigh, &orig_node->neigh_list, list) {
-		if (best && (bao->bat_neigh_cmp(neigh, if_outgoing,
-						best, if_outgoing) <= 0))
+		if (best && (bao->neigh.cmp(neigh, if_outgoing, best,
+					    if_outgoing) <= 0))
 			continue;
 
 		if (!kref_get_unless_zero(&neigh->refcount))
@@ -1234,18 +1254,17 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset)
 	seq_printf(seq, "[B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s %s)]\n",
 		   BATADV_SOURCE_VERSION, primary_if->net_dev->name,
 		   primary_if->net_dev->dev_addr, net_dev->name,
-		   bat_priv->bat_algo_ops->name);
+		   bat_priv->algo_ops->name);
 
 	batadv_hardif_put(primary_if);
 
-	if (!bat_priv->bat_algo_ops->bat_orig_print) {
+	if (!bat_priv->algo_ops->orig.print) {
 		seq_puts(seq,
 			 "No printing function for this routing protocol\n");
 		return 0;
 	}
 
-	bat_priv->bat_algo_ops->bat_orig_print(bat_priv, seq,
-					       BATADV_IF_DEFAULT);
+	bat_priv->algo_ops->orig.print(bat_priv, seq, BATADV_IF_DEFAULT);
 
 	return 0;
 }
@@ -1272,7 +1291,7 @@ int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset)
 	}
 
 	bat_priv = netdev_priv(hard_iface->soft_iface);
-	if (!bat_priv->bat_algo_ops->bat_orig_print) {
+	if (!bat_priv->algo_ops->orig.print) {
 		seq_puts(seq,
 			 "No printing function for this routing protocol\n");
 		goto out;
@@ -1286,9 +1305,9 @@ int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset)
 	seq_printf(seq, "[B.A.T.M.A.N. adv %s, IF/MAC: %s/%pM (%s %s)]\n",
 		   BATADV_SOURCE_VERSION, hard_iface->net_dev->name,
 		   hard_iface->net_dev->dev_addr,
-		   hard_iface->soft_iface->name, bat_priv->bat_algo_ops->name);
+		   hard_iface->soft_iface->name, bat_priv->algo_ops->name);
 
-	bat_priv->bat_algo_ops->bat_orig_print(bat_priv, seq, hard_iface);
+	bat_priv->algo_ops->orig.print(bat_priv, seq, hard_iface);
 
 out:
 	if (hard_iface)
@@ -1300,7 +1319,7 @@ int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
 			    int max_if_num)
 {
 	struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
-	struct batadv_algo_ops *bao = bat_priv->bat_algo_ops;
+	struct batadv_algo_ops *bao = bat_priv->algo_ops;
 	struct batadv_hashtable *hash = bat_priv->orig_hash;
 	struct hlist_head *head;
 	struct batadv_orig_node *orig_node;
@@ -1316,9 +1335,8 @@ int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
 		rcu_read_lock();
 		hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
 			ret = 0;
-			if (bao->bat_orig_add_if)
-				ret = bao->bat_orig_add_if(orig_node,
-							   max_if_num);
+			if (bao->orig.add_if)
+				ret = bao->orig.add_if(orig_node, max_if_num);
 			if (ret == -ENOMEM)
 				goto err;
 		}
@@ -1340,7 +1358,7 @@ int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface,
 	struct hlist_head *head;
 	struct batadv_hard_iface *hard_iface_tmp;
 	struct batadv_orig_node *orig_node;
-	struct batadv_algo_ops *bao = bat_priv->bat_algo_ops;
+	struct batadv_algo_ops *bao = bat_priv->algo_ops;
 	u32 i;
 	int ret;
 
@@ -1353,10 +1371,9 @@ int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface,
 		rcu_read_lock();
 		hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
 			ret = 0;
-			if (bao->bat_orig_del_if)
-				ret = bao->bat_orig_del_if(orig_node,
-							   max_if_num,
-							   hard_iface->if_num);
+			if (bao->orig.del_if)
+				ret = bao->orig.del_if(orig_node, max_if_num,
+						       hard_iface->if_num);
 			if (ret == -ENOMEM)
 				goto err;
 		}
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index 64a8951e5844..566306bf05dc 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -46,9 +46,9 @@ batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface,
 void
 batadv_hardif_neigh_put(struct batadv_hardif_neigh_node *hardif_neigh);
 struct batadv_neigh_node *
-batadv_neigh_node_new(struct batadv_orig_node *orig_node,
-		      struct batadv_hard_iface *hard_iface,
-		      const u8 *neigh_addr);
+batadv_neigh_node_get_or_create(struct batadv_orig_node *orig_node,
+				struct batadv_hard_iface *hard_iface,
+				const u8 *neigh_addr);
 void batadv_neigh_node_put(struct batadv_neigh_node *neigh_node);
 struct batadv_neigh_node *
 batadv_orig_router_get(struct batadv_orig_node *orig_node,
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
index 372128ddb474..6b011ff64dd8 100644
--- a/net/batman-adv/packet.h
+++ b/net/batman-adv/packet.h
@@ -21,6 +21,8 @@
 #include <asm/byteorder.h>
 #include <linux/types.h>
 
+#define batadv_tp_is_error(n) ((u8)n > 127 ? 1 : 0)
+
 /**
  * enum batadv_packettype - types for batman-adv encapsulated packets
  * @BATADV_IV_OGM: originator messages for B.A.T.M.A.N. IV
@@ -93,6 +95,7 @@ enum batadv_icmp_packettype {
 	BATADV_ECHO_REQUEST	       = 8,
 	BATADV_TTL_EXCEEDED	       = 11,
 	BATADV_PARAMETER_PROBLEM       = 12,
+	BATADV_TP		       = 15,
 };
 
 /**
@@ -285,6 +288,16 @@ struct batadv_elp_packet {
 #define BATADV_ELP_HLEN sizeof(struct batadv_elp_packet)
 
 /**
+ * enum batadv_icmp_user_cmd_type - types for batman-adv icmp cmd modes
+ * @BATADV_TP_START: start a throughput meter run
+ * @BATADV_TP_STOP: stop a throughput meter run
+ */
+enum batadv_icmp_user_cmd_type {
+	BATADV_TP_START		= 0,
+	BATADV_TP_STOP		= 2,
+};
+
+/**
  * struct batadv_icmp_header - common members among all the ICMP packets
  * @packet_type: batman-adv packet type, part of the general header
  * @version: batman-adv protocol version, part of the genereal header
@@ -334,6 +347,47 @@ struct batadv_icmp_packet {
 	__be16 seqno;
 };
 
+/**
+ * struct batadv_icmp_tp_packet - ICMP TP Meter packet
+ * @packet_type: batman-adv packet type, part of the general header
+ * @version: batman-adv protocol version, part of the genereal header
+ * @ttl: time to live for this packet, part of the genereal header
+ * @msg_type: ICMP packet type
+ * @dst: address of the destination node
+ * @orig: address of the source node
+ * @uid: local ICMP socket identifier
+ * @subtype: TP packet subtype (see batadv_icmp_tp_subtype)
+ * @session: TP session identifier
+ * @seqno: the TP sequence number
+ * @timestamp: time when the packet has been sent. This value is filled in a
+ *  TP_MSG and echoed back in the next TP_ACK so that the sender can compute the
+ *  RTT. Since it is read only by the host which wrote it, there is no need to
+ *  store it using network order
+ */
+struct batadv_icmp_tp_packet {
+	u8  packet_type;
+	u8  version;
+	u8  ttl;
+	u8  msg_type; /* see ICMP message types above */
+	u8  dst[ETH_ALEN];
+	u8  orig[ETH_ALEN];
+	u8  uid;
+	u8  subtype;
+	u8  session[2];
+	__be32 seqno;
+	__be32 timestamp;
+};
+
+/**
+ * enum batadv_icmp_tp_subtype - ICMP TP Meter packet subtypes
+ * @BATADV_TP_MSG: Msg from sender to receiver
+ * @BATADV_TP_ACK: acknowledgment from receiver to sender
+ */
+enum batadv_icmp_tp_subtype {
+	BATADV_TP_MSG	= 0,
+	BATADV_TP_ACK,
+};
+
 #define BATADV_RR_LEN 16
 
 /**
@@ -420,6 +474,7 @@ struct batadv_unicast_4addr_packet {
  * @dest: final destination used when routing fragments
  * @orig: originator of the fragment used when merging the packet
  * @no: fragment number within this sequence
+ * @priority: priority of frame, from ToS IP precedence or 802.1p
  * @reserved: reserved byte for alignment
  * @seqno: sequence identification
  * @total_size: size of the merged packet
@@ -430,9 +485,11 @@ struct batadv_frag_packet {
 	u8     ttl;
 #if defined(__BIG_ENDIAN_BITFIELD)
 	u8     no:4;
-	u8     reserved:4;
+	u8     priority:3;
+	u8     reserved:1;
 #elif defined(__LITTLE_ENDIAN_BITFIELD)
-	u8     reserved:4;
+	u8     reserved:1;
+	u8     priority:3;
 	u8     no:4;
 #else
 #error "unknown bitfield endianness"
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index f75091c983ee..af8e11933928 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -40,12 +40,15 @@
 #include "fragmentation.h"
 #include "hard-interface.h"
 #include "icmp_socket.h"
+#include "log.h"
 #include "network-coding.h"
 #include "originator.h"
 #include "packet.h"
 #include "send.h"
 #include "soft-interface.h"
+#include "tp_meter.h"
 #include "translation-table.h"
+#include "tvlv.h"
 
 static int batadv_route_unicast_packet(struct sk_buff *skb,
 				       struct batadv_hard_iface *recv_if);
@@ -268,10 +271,19 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv,
 		icmph->ttl = BATADV_TTL;
 
 		res = batadv_send_skb_to_orig(skb, orig_node, NULL);
-		if (res != NET_XMIT_DROP)
-			ret = NET_RX_SUCCESS;
+		if (res == -1)
+			goto out;
+
+		ret = NET_RX_SUCCESS;
 
 		break;
+	case BATADV_TP:
+		if (!pskb_may_pull(skb, sizeof(struct batadv_icmp_tp_packet)))
+			goto out;
+
+		batadv_tp_meter_recv(bat_priv, skb);
+		ret = NET_RX_SUCCESS;
+		goto out;
 	default:
 		/* drop unknown type */
 		goto out;
@@ -290,7 +302,7 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv,
 	struct batadv_hard_iface *primary_if = NULL;
 	struct batadv_orig_node *orig_node = NULL;
 	struct batadv_icmp_packet *icmp_packet;
-	int ret = NET_RX_DROP;
+	int res, ret = NET_RX_DROP;
 
 	icmp_packet = (struct batadv_icmp_packet *)skb->data;
 
@@ -321,7 +333,8 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv,
 	icmp_packet->msg_type = BATADV_TTL_EXCEEDED;
 	icmp_packet->ttl = BATADV_TTL;
 
-	if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP)
+	res = batadv_send_skb_to_orig(skb, orig_node, NULL);
+	if (res != -1)
 		ret = NET_RX_SUCCESS;
 
 out:
@@ -341,7 +354,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
 	struct ethhdr *ethhdr;
 	struct batadv_orig_node *orig_node = NULL;
 	int hdr_size = sizeof(struct batadv_icmp_header);
-	int ret = NET_RX_DROP;
+	int res, ret = NET_RX_DROP;
 
 	/* drop packet if it has not necessary minimum size */
 	if (unlikely(!pskb_may_pull(skb, hdr_size)))
@@ -374,6 +387,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
 		if (skb_cow(skb, ETH_HLEN) < 0)
 			goto out;
 
+		ethhdr = eth_hdr(skb);
 		icmph = (struct batadv_icmp_header *)skb->data;
 		icmp_packet_rr = (struct batadv_icmp_packet_rr *)icmph;
 		if (icmp_packet_rr->rr_cur >= BATADV_RR_LEN)
@@ -407,7 +421,8 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
 	icmph->ttl--;
 
 	/* route it */
-	if (batadv_send_skb_to_orig(skb, orig_node, recv_if) != NET_XMIT_DROP)
+	res = batadv_send_skb_to_orig(skb, orig_node, recv_if);
+	if (res != -1)
 		ret = NET_RX_SUCCESS;
 
 out:
@@ -468,7 +483,7 @@ batadv_find_router(struct batadv_priv *bat_priv,
 		   struct batadv_orig_node *orig_node,
 		   struct batadv_hard_iface *recv_if)
 {
-	struct batadv_algo_ops *bao = bat_priv->bat_algo_ops;
+	struct batadv_algo_ops *bao = bat_priv->algo_ops;
 	struct batadv_neigh_node *first_candidate_router = NULL;
 	struct batadv_neigh_node *next_candidate_router = NULL;
 	struct batadv_neigh_node *router, *cand_router = NULL;
@@ -522,9 +537,9 @@ batadv_find_router(struct batadv_priv *bat_priv,
 		/* alternative candidate should be good enough to be
 		 * considered
 		 */
-		if (!bao->bat_neigh_is_similar_or_better(cand_router,
-							 cand->if_outgoing,
-							 router, recv_if))
+		if (!bao->neigh.is_similar_or_better(cand_router,
+						     cand->if_outgoing, router,
+						     recv_if))
 			goto next;
 
 		/* don't use the same router twice */
@@ -644,6 +659,8 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
 
 	len = skb->len;
 	res = batadv_send_skb_to_orig(skb, orig_node, recv_if);
+	if (res == -1)
+		goto out;
 
 	/* translate transmit result into receive result */
 	if (res == NET_XMIT_SUCCESS) {
@@ -651,13 +668,10 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
 		batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD);
 		batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES,
 				   len + ETH_HLEN);
-
-		ret = NET_RX_SUCCESS;
-	} else if (res == -EINPROGRESS) {
-		/* skb was buffered and consumed */
-		ret = NET_RX_SUCCESS;
 	}
 
+	ret = NET_RX_SUCCESS;
+
 out:
 	if (orig_node)
 		batadv_orig_node_put(orig_node);
@@ -1006,6 +1020,8 @@ int batadv_recv_frag_packet(struct sk_buff *skb,
 	if (!orig_node_src)
 		goto out;
 
+	skb->priority = frag_packet->priority + 256;
+
 	/* Route the fragment if it is not for us and too big to be merged. */
 	if (!batadv_is_my_mac(bat_priv, frag_packet->dest) &&
 	    batadv_frag_skb_fwd(skb, recv_if, orig_node_src)) {
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index b1a4e8a811c8..3a10d87b4b76 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -20,10 +20,11 @@
 
 #include <linux/atomic.h>
 #include <linux/byteorder/generic.h>
+#include <linux/errno.h>
 #include <linux/etherdevice.h>
 #include <linux/fs.h>
-#include <linux/if_ether.h>
 #include <linux/if.h>
+#include <linux/if_ether.h>
 #include <linux/jiffies.h>
 #include <linux/kernel.h>
 #include <linux/kref.h>
@@ -42,6 +43,7 @@
 #include "fragmentation.h"
 #include "gateway_client.h"
 #include "hard-interface.h"
+#include "log.h"
 #include "network-coding.h"
 #include "originator.h"
 #include "routing.h"
@@ -71,6 +73,7 @@ int batadv_send_skb_packet(struct sk_buff *skb,
 {
 	struct batadv_priv *bat_priv;
 	struct ethhdr *ethhdr;
+	int ret;
 
 	bat_priv = netdev_priv(hard_iface->soft_iface);
 
@@ -108,8 +111,15 @@ int batadv_send_skb_packet(struct sk_buff *skb,
 	/* dev_queue_xmit() returns a negative result on error.	 However on
 	 * congestion and traffic shaping, it drops and returns NET_XMIT_DROP
 	 * (which is > 0). This will not be treated as an error.
+	 *
+	 * a negative value cannot be returned because it could be interepreted
+	 * as not consumed skb by callers of batadv_send_skb_to_orig.
 	 */
-	return dev_queue_xmit(skb);
+	ret = dev_queue_xmit(skb);
+	if (ret < 0)
+		ret = NET_XMIT_DROP;
+
+	return ret;
 send_skb_err:
 	kfree_skb(skb);
 	return NET_XMIT_DROP;
@@ -155,8 +165,11 @@ int batadv_send_unicast_skb(struct sk_buff *skb,
  * host, NULL can be passed as recv_if and no interface alternating is
  * attempted.
  *
- * Return: NET_XMIT_SUCCESS on success, NET_XMIT_DROP on failure, or
- * -EINPROGRESS if the skb is buffered for later transmit.
+ * Return: -1 on failure (and the skb is not consumed), -EINPROGRESS if the
+ * skb is buffered for later transmit or the NET_XMIT status returned by the
+ * lower routine if the packet has been passed down.
+ *
+ * If the returning value is not -1 the skb has been consumed.
  */
 int batadv_send_skb_to_orig(struct sk_buff *skb,
 			    struct batadv_orig_node *orig_node,
@@ -164,7 +177,7 @@ int batadv_send_skb_to_orig(struct sk_buff *skb,
 {
 	struct batadv_priv *bat_priv = orig_node->bat_priv;
 	struct batadv_neigh_node *neigh_node;
-	int ret = NET_XMIT_DROP;
+	int ret = -1;
 
 	/* batadv_find_router() increases neigh_nodes refcount if found. */
 	neigh_node = batadv_find_router(bat_priv, orig_node, recv_if);
@@ -177,8 +190,7 @@ int batadv_send_skb_to_orig(struct sk_buff *skb,
 	if (atomic_read(&bat_priv->fragmentation) &&
 	    skb->len > neigh_node->if_incoming->net_dev->mtu) {
 		/* Fragment and send packet. */
-		if (batadv_frag_send_packet(skb, orig_node, neigh_node))
-			ret = NET_XMIT_SUCCESS;
+		ret = batadv_frag_send_packet(skb, orig_node, neigh_node);
 
 		goto out;
 	}
@@ -187,12 +199,10 @@ int batadv_send_skb_to_orig(struct sk_buff *skb,
 	 * (i.e. being forwarded). If the packet originates from this node or if
 	 * network coding fails, then send the packet as usual.
 	 */
-	if (recv_if && batadv_nc_skb_forward(skb, neigh_node)) {
+	if (recv_if && batadv_nc_skb_forward(skb, neigh_node))
 		ret = -EINPROGRESS;
-	} else {
-		batadv_send_unicast_skb(skb, neigh_node);
-		ret = NET_XMIT_SUCCESS;
-	}
+	else
+		ret = batadv_send_unicast_skb(skb, neigh_node);
 
 out:
 	if (neigh_node)
@@ -318,7 +328,7 @@ int batadv_send_skb_unicast(struct batadv_priv *bat_priv,
 {
 	struct batadv_unicast_packet *unicast_packet;
 	struct ethhdr *ethhdr;
-	int ret = NET_XMIT_DROP;
+	int res, ret = NET_XMIT_DROP;
 
 	if (!orig_node)
 		goto out;
@@ -355,7 +365,8 @@ int batadv_send_skb_unicast(struct batadv_priv *bat_priv,
 	if (batadv_tt_global_client_is_roaming(bat_priv, ethhdr->h_dest, vid))
 		unicast_packet->ttvn = unicast_packet->ttvn - 1;
 
-	if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP)
+	res = batadv_send_skb_to_orig(skb, orig_node, NULL);
+	if (res != -1)
 		ret = NET_XMIT_SUCCESS;
 
 out:
@@ -428,27 +439,7 @@ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb,
 				       orig_node, vid);
 }
 
-void batadv_schedule_bat_ogm(struct batadv_hard_iface *hard_iface)
-{
-	struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
-
-	if ((hard_iface->if_status == BATADV_IF_NOT_IN_USE) ||
-	    (hard_iface->if_status == BATADV_IF_TO_BE_REMOVED))
-		return;
-
-	/* the interface gets activated here to avoid race conditions between
-	 * the moment of activating the interface in
-	 * hardif_activate_interface() where the originator mac is set and
-	 * outdated packets (especially uninitialized mac addresses) in the
-	 * packet queue
-	 */
-	if (hard_iface->if_status == BATADV_IF_TO_BE_ACTIVATED)
-		hard_iface->if_status = BATADV_IF_ACTIVE;
-
-	bat_priv->bat_algo_ops->bat_ogm_schedule(hard_iface);
-}
-
-static void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet)
+void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet)
 {
 	kfree_skb(forw_packet->skb);
 	if (forw_packet->if_incoming)
@@ -604,45 +595,6 @@ out:
 	atomic_inc(&bat_priv->bcast_queue_left);
 }
 
-void batadv_send_outstanding_bat_ogm_packet(struct work_struct *work)
-{
-	struct delayed_work *delayed_work;
-	struct batadv_forw_packet *forw_packet;
-	struct batadv_priv *bat_priv;
-
-	delayed_work = to_delayed_work(work);
-	forw_packet = container_of(delayed_work, struct batadv_forw_packet,
-				   delayed_work);
-	bat_priv = netdev_priv(forw_packet->if_incoming->soft_iface);
-	spin_lock_bh(&bat_priv->forw_bat_list_lock);
-	hlist_del(&forw_packet->list);
-	spin_unlock_bh(&bat_priv->forw_bat_list_lock);
-
-	if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING)
-		goto out;
-
-	bat_priv->bat_algo_ops->bat_ogm_emit(forw_packet);
-
-	/* we have to have at least one packet in the queue to determine the
-	 * queues wake up time unless we are shutting down.
-	 *
-	 * only re-schedule if this is the "original" copy, e.g. the OGM of the
-	 * primary interface should only be rescheduled once per period, but
-	 * this function will be called for the forw_packet instances of the
-	 * other secondary interfaces as well.
-	 */
-	if (forw_packet->own &&
-	    forw_packet->if_incoming == forw_packet->if_outgoing)
-		batadv_schedule_bat_ogm(forw_packet->if_incoming);
-
-out:
-	/* don't count own packet */
-	if (!forw_packet->own)
-		atomic_inc(&bat_priv->batman_queue_left);
-
-	batadv_forw_packet_free(forw_packet);
-}
-
 void
 batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
 				 const struct batadv_hard_iface *hard_iface)
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index 6fd7270d8ce6..7cecb7563b45 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -26,8 +26,8 @@
 #include "packet.h"
 
 struct sk_buff;
-struct work_struct;
 
+void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet);
 int batadv_send_skb_to_orig(struct sk_buff *skb,
 			    struct batadv_orig_node *orig_node,
 			    struct batadv_hard_iface *recv_if);
@@ -38,11 +38,9 @@ int batadv_send_broadcast_skb(struct sk_buff *skb,
 			      struct batadv_hard_iface *hard_iface);
 int batadv_send_unicast_skb(struct sk_buff *skb,
 			    struct batadv_neigh_node *neigh_node);
-void batadv_schedule_bat_ogm(struct batadv_hard_iface *hard_iface);
 int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
 				    const struct sk_buff *skb,
 				    unsigned long delay);
-void batadv_send_outstanding_bat_ogm_packet(struct work_struct *work);
 void
 batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
 				 const struct batadv_hard_iface *hard_iface);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 343d2c904399..7527c0652dd5 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -48,6 +48,7 @@
 #include <linux/types.h>
 #include <linux/workqueue.h>
 
+#include "bat_algo.h"
 #include "bridge_loop_avoidance.h"
 #include "debugfs.h"
 #include "distributed-arp-table.h"
@@ -255,7 +256,7 @@ static int batadv_interface_tx(struct sk_buff *skb,
 	if (batadv_compare_eth(ethhdr->h_dest, ectp_addr))
 		goto dropped;
 
-	gw_mode = atomic_read(&bat_priv->gw_mode);
+	gw_mode = atomic_read(&bat_priv->gw.mode);
 	if (is_multicast_ether_addr(ethhdr->h_dest)) {
 		/* if gw mode is off, broadcast every packet */
 		if (gw_mode == BATADV_GW_MODE_OFF) {
@@ -808,6 +809,10 @@ static int batadv_softif_init_late(struct net_device *dev)
 	atomic_set(&bat_priv->distributed_arp_table, 1);
 #endif
 #ifdef CONFIG_BATMAN_ADV_MCAST
+	bat_priv->mcast.querier_ipv4.exists = false;
+	bat_priv->mcast.querier_ipv4.shadowing = false;
+	bat_priv->mcast.querier_ipv6.exists = false;
+	bat_priv->mcast.querier_ipv6.shadowing = false;
 	bat_priv->mcast.flags = BATADV_NO_FLAGS;
 	atomic_set(&bat_priv->multicast_mode, 1);
 	atomic_set(&bat_priv->mcast.num_disabled, 0);
@@ -815,8 +820,8 @@ static int batadv_softif_init_late(struct net_device *dev)
 	atomic_set(&bat_priv->mcast.num_want_all_ipv4, 0);
 	atomic_set(&bat_priv->mcast.num_want_all_ipv6, 0);
 #endif
-	atomic_set(&bat_priv->gw_mode, BATADV_GW_MODE_OFF);
-	atomic_set(&bat_priv->gw_sel_class, 20);
+	atomic_set(&bat_priv->gw.mode, BATADV_GW_MODE_OFF);
+	atomic_set(&bat_priv->gw.sel_class, 20);
 	atomic_set(&bat_priv->gw.bandwidth_down, 100);
 	atomic_set(&bat_priv->gw.bandwidth_up, 20);
 	atomic_set(&bat_priv->orig_interval, 1000);
@@ -837,6 +842,8 @@ static int batadv_softif_init_late(struct net_device *dev)
 #ifdef CONFIG_BATMAN_ADV_BLA
 	atomic_set(&bat_priv->bla.num_requests, 0);
 #endif
+	atomic_set(&bat_priv->tp_num, 0);
+
 	bat_priv->tt.last_changeset = NULL;
 	bat_priv->tt.last_changeset_len = 0;
 	bat_priv->isolation_mark = 0;
@@ -1033,7 +1040,9 @@ void batadv_softif_destroy_sysfs(struct net_device *soft_iface)
 static void batadv_softif_destroy_netlink(struct net_device *soft_iface,
 					  struct list_head *head)
 {
+	struct batadv_priv *bat_priv = netdev_priv(soft_iface);
 	struct batadv_hard_iface *hard_iface;
+	struct batadv_softif_vlan *vlan;
 
 	list_for_each_entry(hard_iface, &batadv_hardif_list, list) {
 		if (hard_iface->soft_iface == soft_iface)
@@ -1041,6 +1050,13 @@ static void batadv_softif_destroy_netlink(struct net_device *soft_iface,
 							BATADV_IF_CLEANUP_KEEP);
 	}
 
+	/* destroy the "untagged" VLAN */
+	vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS);
+	if (vlan) {
+		batadv_softif_destroy_vlan(bat_priv, vlan);
+		batadv_softif_vlan_put(vlan);
+	}
+
 	batadv_sysfs_del_meshif(soft_iface);
 	unregister_netdevice_queue(soft_iface, head);
 }
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index 414b2074165f..fe9ca94ddee2 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -25,8 +25,8 @@
 #include <linux/fs.h>
 #include <linux/if.h>
 #include <linux/if_vlan.h>
-#include <linux/kref.h>
 #include <linux/kernel.h>
+#include <linux/kref.h>
 #include <linux/netdevice.h>
 #include <linux/printk.h>
 #include <linux/rculist.h>
@@ -38,11 +38,12 @@
 #include <linux/string.h>
 #include <linux/stringify.h>
 
+#include "bridge_loop_avoidance.h"
 #include "distributed-arp-table.h"
 #include "gateway_client.h"
 #include "gateway_common.h"
-#include "bridge_loop_avoidance.h"
 #include "hard-interface.h"
+#include "log.h"
 #include "network-coding.h"
 #include "packet.h"
 #include "soft-interface.h"
@@ -389,12 +390,12 @@ static int batadv_store_uint_attr(const char *buff, size_t count,
 	return count;
 }
 
-static inline ssize_t
-__batadv_store_uint_attr(const char *buff, size_t count,
-			 int min, int max,
-			 void (*post_func)(struct net_device *),
-			 const struct attribute *attr,
-			 atomic_t *attr_store, struct net_device *net_dev)
+static ssize_t __batadv_store_uint_attr(const char *buff, size_t count,
+					int min, int max,
+					void (*post_func)(struct net_device *),
+					const struct attribute *attr,
+					atomic_t *attr_store,
+					struct net_device *net_dev)
 {
 	int ret;
 
@@ -411,7 +412,7 @@ static ssize_t batadv_show_bat_algo(struct kobject *kobj,
 {
 	struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);
 
-	return sprintf(buff, "%s\n", bat_priv->bat_algo_ops->name);
+	return sprintf(buff, "%s\n", bat_priv->algo_ops->name);
 }
 
 static void batadv_post_gw_reselect(struct net_device *net_dev)
@@ -427,7 +428,7 @@ static ssize_t batadv_show_gw_mode(struct kobject *kobj, struct attribute *attr,
 	struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);
 	int bytes_written;
 
-	switch (atomic_read(&bat_priv->gw_mode)) {
+	switch (atomic_read(&bat_priv->gw.mode)) {
 	case BATADV_GW_MODE_CLIENT:
 		bytes_written = sprintf(buff, "%s\n",
 					BATADV_GW_MODE_CLIENT_NAME);
@@ -476,10 +477,10 @@ static ssize_t batadv_store_gw_mode(struct kobject *kobj,
 		return -EINVAL;
 	}
 
-	if (atomic_read(&bat_priv->gw_mode) == gw_mode_tmp)
+	if (atomic_read(&bat_priv->gw.mode) == gw_mode_tmp)
 		return count;
 
-	switch (atomic_read(&bat_priv->gw_mode)) {
+	switch (atomic_read(&bat_priv->gw.mode)) {
 	case BATADV_GW_MODE_CLIENT:
 		curr_gw_mode_str = BATADV_GW_MODE_CLIENT_NAME;
 		break;
@@ -508,7 +509,7 @@ static ssize_t batadv_store_gw_mode(struct kobject *kobj,
 	 * state
 	 */
 	batadv_gw_check_client_stop(bat_priv);
-	atomic_set(&bat_priv->gw_mode, (unsigned int)gw_mode_tmp);
+	atomic_set(&bat_priv->gw.mode, (unsigned int)gw_mode_tmp);
 	batadv_gw_tvlv_container_update(bat_priv);
 	return count;
 }
@@ -624,7 +625,7 @@ BATADV_ATTR_SIF_UINT(orig_interval, orig_interval, S_IRUGO | S_IWUSR,
 		     2 * BATADV_JITTER, INT_MAX, NULL);
 BATADV_ATTR_SIF_UINT(hop_penalty, hop_penalty, S_IRUGO | S_IWUSR, 0,
 		     BATADV_TQ_MAX_VALUE, NULL);
-BATADV_ATTR_SIF_UINT(gw_sel_class, gw_sel_class, S_IRUGO | S_IWUSR, 1,
+BATADV_ATTR_SIF_UINT(gw_sel_class, gw.sel_class, S_IRUGO | S_IWUSR, 1,
 		     BATADV_TQ_MAX_VALUE, batadv_post_gw_reselect);
 static BATADV_ATTR(gw_bandwidth, S_IRUGO | S_IWUSR, batadv_show_gw_bwidth,
 		   batadv_store_gw_bwidth);
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
new file mode 100644
index 000000000000..2333777f919d
--- /dev/null
+++ b/net/batman-adv/tp_meter.c
@@ -0,0 +1,1507 @@
+/* Copyright (C) 2012-2016 B.A.T.M.A.N. contributors:
+ *
+ * Edo Monticelli, Antonio Quartulli
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "tp_meter.h"
+#include "main.h"
+
+#include <linux/atomic.h>
+#include <linux/bug.h>
+#include <linux/byteorder/generic.h>
+#include <linux/cache.h>
+#include <linux/compiler.h>
+#include <linux/device.h>
+#include <linux/etherdevice.h>
+#include <linux/fs.h>
+#include <linux/if_ether.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/kref.h>
+#include <linux/kthread.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/param.h>
+#include <linux/printk.h>
+#include <linux/random.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+#include <linux/timer.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+#include <uapi/linux/batman_adv.h>
+
+#include "hard-interface.h"
+#include "log.h"
+#include "netlink.h"
+#include "originator.h"
+#include "packet.h"
+#include "send.h"
+
+/**
+ * BATADV_TP_DEF_TEST_LENGTH - Default test length if not specified by the user
+ *  in milliseconds
+ */
+#define BATADV_TP_DEF_TEST_LENGTH 10000
+
+/**
+ * BATADV_TP_AWND - Advertised window by the receiver (in bytes)
+ */
+#define BATADV_TP_AWND 0x20000000
+
+/**
+ * BATADV_TP_RECV_TIMEOUT - Receiver activity timeout. If the receiver does not
+ *  get anything for such amount of milliseconds, the connection is killed
+ */
+#define BATADV_TP_RECV_TIMEOUT 1000
+
+/**
+ * BATADV_TP_MAX_RTO - Maximum sender timeout. If the sender RTO gets beyond
+ * such amound of milliseconds, the receiver is considered unreachable and the
+ * connection is killed
+ */
+#define BATADV_TP_MAX_RTO 30000
+
+/**
+ * BATADV_TP_FIRST_SEQ - First seqno of each session. The number is rather high
+ *  in order to immediately trigger a wrap around (test purposes)
+ */
+#define BATADV_TP_FIRST_SEQ ((u32)-1 - 2000)
+
+/**
+ * BATADV_TP_PLEN - length of the payload (data after the batadv_unicast header)
+ *  to simulate
+ */
+#define BATADV_TP_PLEN (BATADV_TP_PACKET_LEN - ETH_HLEN - \
+			sizeof(struct batadv_unicast_packet))
+
+static u8 batadv_tp_prerandom[4096] __read_mostly;
+
+/**
+ * batadv_tp_session_cookie - generate session cookie based on session ids
+ * @session: TP session identifier
+ * @icmp_uid: icmp pseudo uid of the tp session
+ *
+ * Return: 32 bit tp_meter session cookie
+ */
+static u32 batadv_tp_session_cookie(const u8 session[2], u8 icmp_uid)
+{
+	u32 cookie;
+
+	cookie = icmp_uid << 16;
+	cookie |= session[0] << 8;
+	cookie |= session[1];
+
+	return cookie;
+}
+
+/**
+ * batadv_tp_cwnd - compute the new cwnd size
+ * @base: base cwnd size value
+ * @increment: the value to add to base to get the new size
+ * @min: minumim cwnd value (usually MSS)
+ *
+ * Return the new cwnd size and ensures it does not exceed the Advertised
+ * Receiver Window size. It is wrap around safe.
+ * For details refer to Section 3.1 of RFC5681
+ *
+ * Return: new congestion window size in bytes
+ */
+static u32 batadv_tp_cwnd(u32 base, u32 increment, u32 min)
+{
+	u32 new_size = base + increment;
+
+	/* check for wrap-around */
+	if (new_size < base)
+		new_size = (u32)ULONG_MAX;
+
+	new_size = min_t(u32, new_size, BATADV_TP_AWND);
+
+	return max_t(u32, new_size, min);
+}
+
+/**
+ * batadv_tp_updated_cwnd - update the Congestion Windows
+ * @tp_vars: the private data of the current TP meter session
+ * @mss: maximum segment size of transmission
+ *
+ * 1) if the session is in Slow Start, the CWND has to be increased by 1
+ * MSS every unique received ACK
+ * 2) if the session is in Congestion Avoidance, the CWND has to be
+ * increased by MSS * MSS / CWND for every unique received ACK
+ */
+static void batadv_tp_update_cwnd(struct batadv_tp_vars *tp_vars, u32 mss)
+{
+	spin_lock_bh(&tp_vars->cwnd_lock);
+
+	/* slow start... */
+	if (tp_vars->cwnd <= tp_vars->ss_threshold) {
+		tp_vars->dec_cwnd = 0;
+		tp_vars->cwnd = batadv_tp_cwnd(tp_vars->cwnd, mss, mss);
+		spin_unlock_bh(&tp_vars->cwnd_lock);
+		return;
+	}
+
+	/* increment CWND at least of 1 (section 3.1 of RFC5681) */
+	tp_vars->dec_cwnd += max_t(u32, 1U << 3,
+				   ((mss * mss) << 6) / (tp_vars->cwnd << 3));
+	if (tp_vars->dec_cwnd < (mss << 3)) {
+		spin_unlock_bh(&tp_vars->cwnd_lock);
+		return;
+	}
+
+	tp_vars->cwnd = batadv_tp_cwnd(tp_vars->cwnd, mss, mss);
+	tp_vars->dec_cwnd = 0;
+
+	spin_unlock_bh(&tp_vars->cwnd_lock);
+}
+
+/**
+ * batadv_tp_update_rto - calculate new retransmission timeout
+ * @tp_vars: the private data of the current TP meter session
+ * @new_rtt: new roundtrip time in msec
+ */
+static void batadv_tp_update_rto(struct batadv_tp_vars *tp_vars,
+				 u32 new_rtt)
+{
+	long m = new_rtt;
+
+	/* RTT update
+	 * Details in Section 2.2 and 2.3 of RFC6298
+	 *
+	 * It's tricky to understand. Don't lose hair please.
+	 * Inspired by tcp_rtt_estimator() tcp_input.c
+	 */
+	if (tp_vars->srtt != 0) {
+		m -= (tp_vars->srtt >> 3); /* m is now error in rtt est */
+		tp_vars->srtt += m; /* rtt = 7/8 srtt + 1/8 new */
+		if (m < 0)
+			m = -m;
+
+		m -= (tp_vars->rttvar >> 2);
+		tp_vars->rttvar += m; /* mdev ~= 3/4 rttvar + 1/4 new */
+	} else {
+		/* first measure getting in */
+		tp_vars->srtt = m << 3;	/* take the measured time to be srtt */
+		tp_vars->rttvar = m << 1; /* new_rtt / 2 */
+	}
+
+	/* rto = srtt + 4 * rttvar.
+	 * rttvar is scaled by 4, therefore doesn't need to be multiplied
+	 */
+	tp_vars->rto = (tp_vars->srtt >> 3) + tp_vars->rttvar;
+}
+
+/**
+ * batadv_tp_batctl_notify - send client status result to client
+ * @reason: reason for tp meter session stop
+ * @dst: destination of tp_meter session
+ * @bat_priv: the bat priv with all the soft interface information
+ * @start_time: start of transmission in jiffies
+ * @total_sent: bytes acked to the receiver
+ * @cookie: cookie of tp_meter session
+ */
+static void batadv_tp_batctl_notify(enum batadv_tp_meter_reason reason,
+				    const u8 *dst, struct batadv_priv *bat_priv,
+				    unsigned long start_time, u64 total_sent,
+				    u32 cookie)
+{
+	u32 test_time;
+	u8 result;
+	u32 total_bytes;
+
+	if (!batadv_tp_is_error(reason)) {
+		result = BATADV_TP_REASON_COMPLETE;
+		test_time = jiffies_to_msecs(jiffies - start_time);
+		total_bytes = total_sent;
+	} else {
+		result = reason;
+		test_time = 0;
+		total_bytes = 0;
+	}
+
+	batadv_netlink_tpmeter_notify(bat_priv, dst, result, test_time,
+				      total_bytes, cookie);
+}
+
+/**
+ * batadv_tp_batctl_error_notify - send client error result to client
+ * @reason: reason for tp meter session stop
+ * @dst: destination of tp_meter session
+ * @bat_priv: the bat priv with all the soft interface information
+ * @cookie: cookie of tp_meter session
+ */
+static void batadv_tp_batctl_error_notify(enum batadv_tp_meter_reason reason,
+					  const u8 *dst,
+					  struct batadv_priv *bat_priv,
+					  u32 cookie)
+{
+	batadv_tp_batctl_notify(reason, dst, bat_priv, 0, 0, cookie);
+}
+
+/**
+ * batadv_tp_list_find - find a tp_vars object in the global list
+ * @bat_priv: the bat priv with all the soft interface information
+ * @dst: the other endpoint MAC address to look for
+ *
+ * Look for a tp_vars object matching dst as end_point and return it after
+ * having incremented the refcounter. Return NULL is not found
+ *
+ * Return: matching tp_vars or NULL when no tp_vars with @dst was found
+ */
+static struct batadv_tp_vars *batadv_tp_list_find(struct batadv_priv *bat_priv,
+						  const u8 *dst)
+{
+	struct batadv_tp_vars *pos, *tp_vars = NULL;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(pos, &bat_priv->tp_list, list) {
+		if (!batadv_compare_eth(pos->other_end, dst))
+			continue;
+
+		/* most of the time this function is invoked during the normal
+		 * process..it makes sens to pay more when the session is
+		 * finished and to speed the process up during the measurement
+		 */
+		if (unlikely(!kref_get_unless_zero(&pos->refcount)))
+			continue;
+
+		tp_vars = pos;
+		break;
+	}
+	rcu_read_unlock();
+
+	return tp_vars;
+}
+
+/**
+ * batadv_tp_list_find_session - find tp_vars session object in the global list
+ * @bat_priv: the bat priv with all the soft interface information
+ * @dst: the other endpoint MAC address to look for
+ * @session: session identifier
+ *
+ * Look for a tp_vars object matching dst as end_point, session as tp meter
+ * session and return it after having incremented the refcounter. Return NULL
+ * is not found
+ *
+ * Return: matching tp_vars or NULL when no tp_vars was found
+ */
+static struct batadv_tp_vars *
+batadv_tp_list_find_session(struct batadv_priv *bat_priv, const u8 *dst,
+			    const u8 *session)
+{
+	struct batadv_tp_vars *pos, *tp_vars = NULL;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(pos, &bat_priv->tp_list, list) {
+		if (!batadv_compare_eth(pos->other_end, dst))
+			continue;
+
+		if (memcmp(pos->session, session, sizeof(pos->session)) != 0)
+			continue;
+
+		/* most of the time this function is invoked during the normal
+		 * process..it makes sense to pay more when the session is
+		 * finished and to speed the process up during the measurement
+		 */
+		if (unlikely(!kref_get_unless_zero(&pos->refcount)))
+			continue;
+
+		tp_vars = pos;
+		break;
+	}
+	rcu_read_unlock();
+
+	return tp_vars;
+}
+
+/**
+ * batadv_tp_vars_release - release batadv_tp_vars from lists and queue for
+ *  free after rcu grace period
+ * @ref: kref pointer of the batadv_tp_vars
+ */
+static void batadv_tp_vars_release(struct kref *ref)
+{
+	struct batadv_tp_vars *tp_vars;
+	struct batadv_tp_unacked *un, *safe;
+
+	tp_vars = container_of(ref, struct batadv_tp_vars, refcount);
+
+	/* lock should not be needed because this object is now out of any
+	 * context!
+	 */
+	spin_lock_bh(&tp_vars->unacked_lock);
+	list_for_each_entry_safe(un, safe, &tp_vars->unacked_list, list) {
+		list_del(&un->list);
+		kfree(un);
+	}
+	spin_unlock_bh(&tp_vars->unacked_lock);
+
+	kfree_rcu(tp_vars, rcu);
+}
+
+/**
+ * batadv_tp_vars_put - decrement the batadv_tp_vars refcounter and possibly
+ *  release it
+ * @tp_vars: the private data of the current TP meter session to be free'd
+ */
+static void batadv_tp_vars_put(struct batadv_tp_vars *tp_vars)
+{
+	kref_put(&tp_vars->refcount, batadv_tp_vars_release);
+}
+
+/**
+ * batadv_tp_sender_cleanup - cleanup sender data and drop and timer
+ * @bat_priv: the bat priv with all the soft interface information
+ * @tp_vars: the private data of the current TP meter session to cleanup
+ */
+static void batadv_tp_sender_cleanup(struct batadv_priv *bat_priv,
+				     struct batadv_tp_vars *tp_vars)
+{
+	cancel_delayed_work(&tp_vars->finish_work);
+
+	spin_lock_bh(&tp_vars->bat_priv->tp_list_lock);
+	hlist_del_rcu(&tp_vars->list);
+	spin_unlock_bh(&tp_vars->bat_priv->tp_list_lock);
+
+	/* drop list reference */
+	batadv_tp_vars_put(tp_vars);
+
+	atomic_dec(&tp_vars->bat_priv->tp_num);
+
+	/* kill the timer and remove its reference */
+	del_timer_sync(&tp_vars->timer);
+	/* the worker might have rearmed itself therefore we kill it again. Note
+	 * that if the worker should run again before invoking the following
+	 * del_timer(), it would not re-arm itself once again because the status
+	 * is OFF now
+	 */
+	del_timer(&tp_vars->timer);
+	batadv_tp_vars_put(tp_vars);
+}
+
+/**
+ * batadv_tp_sender_end - print info about ended session and inform client
+ * @bat_priv: the bat priv with all the soft interface information
+ * @tp_vars: the private data of the current TP meter session
+ */
+static void batadv_tp_sender_end(struct batadv_priv *bat_priv,
+				 struct batadv_tp_vars *tp_vars)
+{
+	u32 session_cookie;
+
+	batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+		   "Test towards %pM finished..shutting down (reason=%d)\n",
+		   tp_vars->other_end, tp_vars->reason);
+
+	batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+		   "Last timing stats: SRTT=%ums RTTVAR=%ums RTO=%ums\n",
+		   tp_vars->srtt >> 3, tp_vars->rttvar >> 2, tp_vars->rto);
+
+	batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+		   "Final values: cwnd=%u ss_threshold=%u\n",
+		   tp_vars->cwnd, tp_vars->ss_threshold);
+
+	session_cookie = batadv_tp_session_cookie(tp_vars->session,
+						  tp_vars->icmp_uid);
+
+	batadv_tp_batctl_notify(tp_vars->reason,
+				tp_vars->other_end,
+				bat_priv,
+				tp_vars->start_time,
+				atomic64_read(&tp_vars->tot_sent),
+				session_cookie);
+}
+
+/**
+ * batadv_tp_sender_shutdown - let sender thread/timer stop gracefully
+ * @tp_vars: the private data of the current TP meter session
+ * @reason: reason for tp meter session stop
+ */
+static void batadv_tp_sender_shutdown(struct batadv_tp_vars *tp_vars,
+				      enum batadv_tp_meter_reason reason)
+{
+	if (!atomic_dec_and_test(&tp_vars->sending))
+		return;
+
+	tp_vars->reason = reason;
+}
+
+/**
+ * batadv_tp_sender_finish - stop sender session after test_length was reached
+ * @work: delayed work reference of the related tp_vars
+ */
+static void batadv_tp_sender_finish(struct work_struct *work)
+{
+	struct delayed_work *delayed_work;
+	struct batadv_tp_vars *tp_vars;
+
+	delayed_work = to_delayed_work(work);
+	tp_vars = container_of(delayed_work, struct batadv_tp_vars,
+			       finish_work);
+
+	batadv_tp_sender_shutdown(tp_vars, BATADV_TP_REASON_COMPLETE);
+}
+
+/**
+ * batadv_tp_reset_sender_timer - reschedule the sender timer
+ * @tp_vars: the private TP meter data for this session
+ *
+ * Reschedule the timer using tp_vars->rto as delay
+ */
+static void batadv_tp_reset_sender_timer(struct batadv_tp_vars *tp_vars)
+{
+	/* most of the time this function is invoked while normal packet
+	 * reception...
+	 */
+	if (unlikely(atomic_read(&tp_vars->sending) == 0))
+		/* timer ref will be dropped in batadv_tp_sender_cleanup */
+		return;
+
+	mod_timer(&tp_vars->timer, jiffies + msecs_to_jiffies(tp_vars->rto));
+}
+
+/**
+ * batadv_tp_sender_timeout - timer that fires in case of packet loss
+ * @arg: address of the related tp_vars
+ *
+ * If fired it means that there was packet loss.
+ * Switch to Slow Start, set the ss_threshold to half of the current cwnd and
+ * reset the cwnd to 3*MSS
+ */
+static void batadv_tp_sender_timeout(unsigned long arg)
+{
+	struct batadv_tp_vars *tp_vars = (struct batadv_tp_vars *)arg;
+	struct batadv_priv *bat_priv = tp_vars->bat_priv;
+
+	if (atomic_read(&tp_vars->sending) == 0)
+		return;
+
+	/* if the user waited long enough...shutdown the test */
+	if (unlikely(tp_vars->rto >= BATADV_TP_MAX_RTO)) {
+		batadv_tp_sender_shutdown(tp_vars,
+					  BATADV_TP_REASON_DST_UNREACHABLE);
+		return;
+	}
+
+	/* RTO exponential backoff
+	 * Details in Section 5.5 of RFC6298
+	 */
+	tp_vars->rto <<= 1;
+
+	spin_lock_bh(&tp_vars->cwnd_lock);
+
+	tp_vars->ss_threshold = tp_vars->cwnd >> 1;
+	if (tp_vars->ss_threshold < BATADV_TP_PLEN * 2)
+		tp_vars->ss_threshold = BATADV_TP_PLEN * 2;
+
+	batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+		   "Meter: RTO fired during test towards %pM! cwnd=%u new ss_thr=%u, resetting last_sent to %u\n",
+		   tp_vars->other_end, tp_vars->cwnd, tp_vars->ss_threshold,
+		   atomic_read(&tp_vars->last_acked));
+
+	tp_vars->cwnd = BATADV_TP_PLEN * 3;
+
+	spin_unlock_bh(&tp_vars->cwnd_lock);
+
+	/* resend the non-ACKed packets.. */
+	tp_vars->last_sent = atomic_read(&tp_vars->last_acked);
+	wake_up(&tp_vars->more_bytes);
+
+	batadv_tp_reset_sender_timer(tp_vars);
+}
+
+/**
+ * batadv_tp_fill_prerandom - Fill buffer with prefetched random bytes
+ * @tp_vars: the private TP meter data for this session
+ * @buf: Buffer to fill with bytes
+ * @nbytes: amount of pseudorandom bytes
+ */
+static void batadv_tp_fill_prerandom(struct batadv_tp_vars *tp_vars,
+				     u8 *buf, size_t nbytes)
+{
+	u32 local_offset;
+	size_t bytes_inbuf;
+	size_t to_copy;
+	size_t pos = 0;
+
+	spin_lock_bh(&tp_vars->prerandom_lock);
+	local_offset = tp_vars->prerandom_offset;
+	tp_vars->prerandom_offset += nbytes;
+	tp_vars->prerandom_offset %= sizeof(batadv_tp_prerandom);
+	spin_unlock_bh(&tp_vars->prerandom_lock);
+
+	while (nbytes) {
+		local_offset %= sizeof(batadv_tp_prerandom);
+		bytes_inbuf = sizeof(batadv_tp_prerandom) - local_offset;
+		to_copy = min(nbytes, bytes_inbuf);
+
+		memcpy(&buf[pos], &batadv_tp_prerandom[local_offset], to_copy);
+		pos += to_copy;
+		nbytes -= to_copy;
+		local_offset = 0;
+	}
+}
+
+/**
+ * batadv_tp_send_msg - send a single message
+ * @tp_vars: the private TP meter data for this session
+ * @src: source mac address
+ * @orig_node: the originator of the destination
+ * @seqno: sequence number of this packet
+ * @len: length of the entire packet
+ * @session: session identifier
+ * @uid: local ICMP "socket" index
+ * @timestamp: timestamp in jiffies which is replied in ack
+ *
+ * Create and send a single TP Meter message.
+ *
+ * Return: 0 on success, BATADV_TP_REASON_DST_UNREACHABLE if the destination is
+ * not reachable, BATADV_TP_REASON_MEMORY_ERROR if the packet couldn't be
+ * allocated
+ */
+static int batadv_tp_send_msg(struct batadv_tp_vars *tp_vars, const u8 *src,
+			      struct batadv_orig_node *orig_node,
+			      u32 seqno, size_t len, const u8 *session,
+			      int uid, u32 timestamp)
+{
+	struct batadv_icmp_tp_packet *icmp;
+	struct sk_buff *skb;
+	int r;
+	u8 *data;
+	size_t data_len;
+
+	skb = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN);
+	if (unlikely(!skb))
+		return BATADV_TP_REASON_MEMORY_ERROR;
+
+	skb_reserve(skb, ETH_HLEN);
+	icmp = (struct batadv_icmp_tp_packet *)skb_put(skb, sizeof(*icmp));
+
+	/* fill the icmp header */
+	ether_addr_copy(icmp->dst, orig_node->orig);
+	ether_addr_copy(icmp->orig, src);
+	icmp->version = BATADV_COMPAT_VERSION;
+	icmp->packet_type = BATADV_ICMP;
+	icmp->ttl = BATADV_TTL;
+	icmp->msg_type = BATADV_TP;
+	icmp->uid = uid;
+
+	icmp->subtype = BATADV_TP_MSG;
+	memcpy(icmp->session, session, sizeof(icmp->session));
+	icmp->seqno = htonl(seqno);
+	icmp->timestamp = htonl(timestamp);
+
+	data_len = len - sizeof(*icmp);
+	data = (u8 *)skb_put(skb, data_len);
+	batadv_tp_fill_prerandom(tp_vars, data, data_len);
+
+	r = batadv_send_skb_to_orig(skb, orig_node, NULL);
+	if (r == -1)
+		kfree_skb(skb);
+
+	if (r == NET_XMIT_SUCCESS)
+		return 0;
+
+	return BATADV_TP_REASON_CANT_SEND;
+}
+
+/**
+ * batadv_tp_recv_ack - ACK receiving function
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the buffer containing the received packet
+ *
+ * Process a received TP ACK packet
+ */
+static void batadv_tp_recv_ack(struct batadv_priv *bat_priv,
+			       const struct sk_buff *skb)
+{
+	struct batadv_hard_iface *primary_if = NULL;
+	struct batadv_orig_node *orig_node = NULL;
+	const struct batadv_icmp_tp_packet *icmp;
+	struct batadv_tp_vars *tp_vars;
+	size_t packet_len, mss;
+	u32 rtt, recv_ack, cwnd;
+	unsigned char *dev_addr;
+
+	packet_len = BATADV_TP_PLEN;
+	mss = BATADV_TP_PLEN;
+	packet_len += sizeof(struct batadv_unicast_packet);
+
+	icmp = (struct batadv_icmp_tp_packet *)skb->data;
+
+	/* find the tp_vars */
+	tp_vars = batadv_tp_list_find_session(bat_priv, icmp->orig,
+					      icmp->session);
+	if (unlikely(!tp_vars))
+		return;
+
+	if (unlikely(atomic_read(&tp_vars->sending) == 0))
+		goto out;
+
+	/* old ACK? silently drop it.. */
+	if (batadv_seq_before(ntohl(icmp->seqno),
+			      (u32)atomic_read(&tp_vars->last_acked)))
+		goto out;
+
+	primary_if = batadv_primary_if_get_selected(bat_priv);
+	if (unlikely(!primary_if))
+		goto out;
+
+	orig_node = batadv_orig_hash_find(bat_priv, icmp->orig);
+	if (unlikely(!orig_node))
+		goto out;
+
+	/* update RTO with the new sampled RTT, if any */
+	rtt = jiffies_to_msecs(jiffies) - ntohl(icmp->timestamp);
+	if (icmp->timestamp && rtt)
+		batadv_tp_update_rto(tp_vars, rtt);
+
+	/* ACK for new data... reset the timer */
+	batadv_tp_reset_sender_timer(tp_vars);
+
+	recv_ack = ntohl(icmp->seqno);
+
+	/* check if this ACK is a duplicate */
+	if (atomic_read(&tp_vars->last_acked) == recv_ack) {
+		atomic_inc(&tp_vars->dup_acks);
+		if (atomic_read(&tp_vars->dup_acks) != 3)
+			goto out;
+
+		if (recv_ack >= tp_vars->recover)
+			goto out;
+
+		/* if this is the third duplicate ACK do Fast Retransmit */
+		batadv_tp_send_msg(tp_vars, primary_if->net_dev->dev_addr,
+				   orig_node, recv_ack, packet_len,
+				   icmp->session, icmp->uid,
+				   jiffies_to_msecs(jiffies));
+
+		spin_lock_bh(&tp_vars->cwnd_lock);
+
+		/* Fast Recovery */
+		tp_vars->fast_recovery = true;
+		/* Set recover to the last outstanding seqno when Fast Recovery
+		 * is entered. RFC6582, Section 3.2, step 1
+		 */
+		tp_vars->recover = tp_vars->last_sent;
+		tp_vars->ss_threshold = tp_vars->cwnd >> 1;
+		batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+			   "Meter: Fast Recovery, (cur cwnd=%u) ss_thr=%u last_sent=%u recv_ack=%u\n",
+			   tp_vars->cwnd, tp_vars->ss_threshold,
+			   tp_vars->last_sent, recv_ack);
+		tp_vars->cwnd = batadv_tp_cwnd(tp_vars->ss_threshold, 3 * mss,
+					       mss);
+		tp_vars->dec_cwnd = 0;
+		tp_vars->last_sent = recv_ack;
+
+		spin_unlock_bh(&tp_vars->cwnd_lock);
+	} else {
+		/* count the acked data */
+		atomic64_add(recv_ack - atomic_read(&tp_vars->last_acked),
+			     &tp_vars->tot_sent);
+		/* reset the duplicate ACKs counter */
+		atomic_set(&tp_vars->dup_acks, 0);
+
+		if (tp_vars->fast_recovery) {
+			/* partial ACK */
+			if (batadv_seq_before(recv_ack, tp_vars->recover)) {
+				/* this is another hole in the window. React
+				 * immediately as specified by NewReno (see
+				 * Section 3.2 of RFC6582 for details)
+				 */
+				dev_addr = primary_if->net_dev->dev_addr;
+				batadv_tp_send_msg(tp_vars, dev_addr,
+						   orig_node, recv_ack,
+						   packet_len, icmp->session,
+						   icmp->uid,
+						   jiffies_to_msecs(jiffies));
+				tp_vars->cwnd = batadv_tp_cwnd(tp_vars->cwnd,
+							       mss, mss);
+			} else {
+				tp_vars->fast_recovery = false;
+				/* set cwnd to the value of ss_threshold at the
+				 * moment that Fast Recovery was entered.
+				 * RFC6582, Section 3.2, step 3
+				 */
+				cwnd = batadv_tp_cwnd(tp_vars->ss_threshold, 0,
+						      mss);
+				tp_vars->cwnd = cwnd;
+			}
+			goto move_twnd;
+		}
+
+		if (recv_ack - atomic_read(&tp_vars->last_acked) >= mss)
+			batadv_tp_update_cwnd(tp_vars, mss);
+move_twnd:
+		/* move the Transmit Window */
+		atomic_set(&tp_vars->last_acked, recv_ack);
+	}
+
+	wake_up(&tp_vars->more_bytes);
+out:
+	if (likely(primary_if))
+		batadv_hardif_put(primary_if);
+	if (likely(orig_node))
+		batadv_orig_node_put(orig_node);
+	if (likely(tp_vars))
+		batadv_tp_vars_put(tp_vars);
+}
+
+/**
+ * batadv_tp_avail - check if congestion window is not full
+ * @tp_vars: the private data of the current TP meter session
+ * @payload_len: size of the payload of a single message
+ *
+ * Return: true when congestion window is not full, false otherwise
+ */
+static bool batadv_tp_avail(struct batadv_tp_vars *tp_vars,
+			    size_t payload_len)
+{
+	u32 win_left, win_limit;
+
+	win_limit = atomic_read(&tp_vars->last_acked) + tp_vars->cwnd;
+	win_left = win_limit - tp_vars->last_sent;
+
+	return win_left >= payload_len;
+}
+
+/**
+ * batadv_tp_wait_available - wait until congestion window becomes free or
+ *  timeout is reached
+ * @tp_vars: the private data of the current TP meter session
+ * @plen: size of the payload of a single message
+ *
+ * Return: 0 if the condition evaluated to false after the timeout elapsed,
+ *  1 if the condition evaluated to true after the timeout elapsed, the
+ *  remaining jiffies (at least 1) if the condition evaluated to true before
+ *  the timeout elapsed, or -ERESTARTSYS if it was interrupted by a signal.
+ */
+static int batadv_tp_wait_available(struct batadv_tp_vars *tp_vars, size_t plen)
+{
+	int ret;
+
+	ret = wait_event_interruptible_timeout(tp_vars->more_bytes,
+					       batadv_tp_avail(tp_vars, plen),
+					       HZ / 10);
+
+	return ret;
+}
+
+/**
+ * batadv_tp_send - main sending thread of a tp meter session
+ * @arg: address of the related tp_vars
+ *
+ * Return: nothing, this function never returns
+ */
+static int batadv_tp_send(void *arg)
+{
+	struct batadv_tp_vars *tp_vars = arg;
+	struct batadv_priv *bat_priv = tp_vars->bat_priv;
+	struct batadv_hard_iface *primary_if = NULL;
+	struct batadv_orig_node *orig_node = NULL;
+	size_t payload_len, packet_len;
+	int err = 0;
+
+	if (unlikely(tp_vars->role != BATADV_TP_SENDER)) {
+		err = BATADV_TP_REASON_DST_UNREACHABLE;
+		tp_vars->reason = err;
+		goto out;
+	}
+
+	orig_node = batadv_orig_hash_find(bat_priv, tp_vars->other_end);
+	if (unlikely(!orig_node)) {
+		err = BATADV_TP_REASON_DST_UNREACHABLE;
+		tp_vars->reason = err;
+		goto out;
+	}
+
+	primary_if = batadv_primary_if_get_selected(bat_priv);
+	if (unlikely(!primary_if)) {
+		err = BATADV_TP_REASON_DST_UNREACHABLE;
+		goto out;
+	}
+
+	/* assume that all the hard_interfaces have a correctly
+	 * configured MTU, so use the soft_iface MTU as MSS.
+	 * This might not be true and in that case the fragmentation
+	 * should be used.
+	 * Now, try to send the packet as it is
+	 */
+	payload_len = BATADV_TP_PLEN;
+	BUILD_BUG_ON(sizeof(struct batadv_icmp_tp_packet) > BATADV_TP_PLEN);
+
+	batadv_tp_reset_sender_timer(tp_vars);
+
+	/* queue the worker in charge of terminating the test */
+	queue_delayed_work(batadv_event_workqueue, &tp_vars->finish_work,
+			   msecs_to_jiffies(tp_vars->test_length));
+
+	while (atomic_read(&tp_vars->sending) != 0) {
+		if (unlikely(!batadv_tp_avail(tp_vars, payload_len))) {
+			batadv_tp_wait_available(tp_vars, payload_len);
+			continue;
+		}
+
+		/* to emulate normal unicast traffic, add to the payload len
+		 * the size of the unicast header
+		 */
+		packet_len = payload_len + sizeof(struct batadv_unicast_packet);
+
+		err = batadv_tp_send_msg(tp_vars, primary_if->net_dev->dev_addr,
+					 orig_node, tp_vars->last_sent,
+					 packet_len,
+					 tp_vars->session, tp_vars->icmp_uid,
+					 jiffies_to_msecs(jiffies));
+
+		/* something went wrong during the preparation/transmission */
+		if (unlikely(err && err != BATADV_TP_REASON_CANT_SEND)) {
+			batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+				   "Meter: batadv_tp_send() cannot send packets (%d)\n",
+				   err);
+			/* ensure nobody else tries to stop the thread now */
+			if (atomic_dec_and_test(&tp_vars->sending))
+				tp_vars->reason = err;
+			break;
+		}
+
+		/* right-shift the TWND */
+		if (!err)
+			tp_vars->last_sent += payload_len;
+
+		cond_resched();
+	}
+
+out:
+	if (likely(primary_if))
+		batadv_hardif_put(primary_if);
+	if (likely(orig_node))
+		batadv_orig_node_put(orig_node);
+
+	batadv_tp_sender_end(bat_priv, tp_vars);
+	batadv_tp_sender_cleanup(bat_priv, tp_vars);
+
+	batadv_tp_vars_put(tp_vars);
+
+	do_exit(0);
+}
+
+/**
+ * batadv_tp_start_kthread - start new thread which manages the tp meter sender
+ * @tp_vars: the private data of the current TP meter session
+ */
+static void batadv_tp_start_kthread(struct batadv_tp_vars *tp_vars)
+{
+	struct task_struct *kthread;
+	struct batadv_priv *bat_priv = tp_vars->bat_priv;
+	u32 session_cookie;
+
+	kref_get(&tp_vars->refcount);
+	kthread = kthread_create(batadv_tp_send, tp_vars, "kbatadv_tp_meter");
+	if (IS_ERR(kthread)) {
+		session_cookie = batadv_tp_session_cookie(tp_vars->session,
+							  tp_vars->icmp_uid);
+		pr_err("batadv: cannot create tp meter kthread\n");
+		batadv_tp_batctl_error_notify(BATADV_TP_REASON_MEMORY_ERROR,
+					      tp_vars->other_end,
+					      bat_priv, session_cookie);
+
+		/* drop reserved reference for kthread */
+		batadv_tp_vars_put(tp_vars);
+
+		/* cleanup of failed tp meter variables */
+		batadv_tp_sender_cleanup(bat_priv, tp_vars);
+		return;
+	}
+
+	wake_up_process(kthread);
+}
+
+/**
+ * batadv_tp_start - start a new tp meter session
+ * @bat_priv: the bat priv with all the soft interface information
+ * @dst: the receiver MAC address
+ * @test_length: test length in milliseconds
+ * @cookie: session cookie
+ */
+void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst,
+		     u32 test_length, u32 *cookie)
+{
+	struct batadv_tp_vars *tp_vars;
+	u8 session_id[2];
+	u8 icmp_uid;
+	u32 session_cookie;
+
+	get_random_bytes(session_id, sizeof(session_id));
+	get_random_bytes(&icmp_uid, 1);
+	session_cookie = batadv_tp_session_cookie(session_id, icmp_uid);
+	*cookie = session_cookie;
+
+	/* look for an already existing test towards this node */
+	spin_lock_bh(&bat_priv->tp_list_lock);
+	tp_vars = batadv_tp_list_find(bat_priv, dst);
+	if (tp_vars) {
+		spin_unlock_bh(&bat_priv->tp_list_lock);
+		batadv_tp_vars_put(tp_vars);
+		batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+			   "Meter: test to or from the same node already ongoing, aborting\n");
+		batadv_tp_batctl_error_notify(BATADV_TP_REASON_ALREADY_ONGOING,
+					      dst, bat_priv, session_cookie);
+		return;
+	}
+
+	if (!atomic_add_unless(&bat_priv->tp_num, 1, BATADV_TP_MAX_NUM)) {
+		spin_unlock_bh(&bat_priv->tp_list_lock);
+		batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+			   "Meter: too many ongoing sessions, aborting (SEND)\n");
+		batadv_tp_batctl_error_notify(BATADV_TP_REASON_TOO_MANY, dst,
+					      bat_priv, session_cookie);
+		return;
+	}
+
+	tp_vars = kmalloc(sizeof(*tp_vars), GFP_ATOMIC);
+	if (!tp_vars) {
+		spin_unlock_bh(&bat_priv->tp_list_lock);
+		batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+			   "Meter: batadv_tp_start cannot allocate list elements\n");
+		batadv_tp_batctl_error_notify(BATADV_TP_REASON_MEMORY_ERROR,
+					      dst, bat_priv, session_cookie);
+		return;
+	}
+
+	/* initialize tp_vars */
+	ether_addr_copy(tp_vars->other_end, dst);
+	kref_init(&tp_vars->refcount);
+	tp_vars->role = BATADV_TP_SENDER;
+	atomic_set(&tp_vars->sending, 1);
+	memcpy(tp_vars->session, session_id, sizeof(session_id));
+	tp_vars->icmp_uid = icmp_uid;
+
+	tp_vars->last_sent = BATADV_TP_FIRST_SEQ;
+	atomic_set(&tp_vars->last_acked, BATADV_TP_FIRST_SEQ);
+	tp_vars->fast_recovery = false;
+	tp_vars->recover = BATADV_TP_FIRST_SEQ;
+
+	/* initialise the CWND to 3*MSS (Section 3.1 in RFC5681).
+	 * For batman-adv the MSS is the size of the payload received by the
+	 * soft_interface, hence its MTU
+	 */
+	tp_vars->cwnd = BATADV_TP_PLEN * 3;
+	/* at the beginning initialise the SS threshold to the biggest possible
+	 * window size, hence the AWND size
+	 */
+	tp_vars->ss_threshold = BATADV_TP_AWND;
+
+	/* RTO initial value is 3 seconds.
+	 * Details in Section 2.1 of RFC6298
+	 */
+	tp_vars->rto = 1000;
+	tp_vars->srtt = 0;
+	tp_vars->rttvar = 0;
+
+	atomic64_set(&tp_vars->tot_sent, 0);
+
+	kref_get(&tp_vars->refcount);
+	setup_timer(&tp_vars->timer, batadv_tp_sender_timeout,
+		    (unsigned long)tp_vars);
+
+	tp_vars->bat_priv = bat_priv;
+	tp_vars->start_time = jiffies;
+
+	init_waitqueue_head(&tp_vars->more_bytes);
+
+	spin_lock_init(&tp_vars->unacked_lock);
+	INIT_LIST_HEAD(&tp_vars->unacked_list);
+
+	spin_lock_init(&tp_vars->cwnd_lock);
+
+	tp_vars->prerandom_offset = 0;
+	spin_lock_init(&tp_vars->prerandom_lock);
+
+	kref_get(&tp_vars->refcount);
+	hlist_add_head_rcu(&tp_vars->list, &bat_priv->tp_list);
+	spin_unlock_bh(&bat_priv->tp_list_lock);
+
+	tp_vars->test_length = test_length;
+	if (!tp_vars->test_length)
+		tp_vars->test_length = BATADV_TP_DEF_TEST_LENGTH;
+
+	batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+		   "Meter: starting throughput meter towards %pM (length=%ums)\n",
+		   dst, test_length);
+
+	/* init work item for finished tp tests */
+	INIT_DELAYED_WORK(&tp_vars->finish_work, batadv_tp_sender_finish);
+
+	/* start tp kthread. This way the write() call issued from userspace can
+	 * happily return and avoid to block
+	 */
+	batadv_tp_start_kthread(tp_vars);
+
+	/* don't return reference to new tp_vars */
+	batadv_tp_vars_put(tp_vars);
+}
+
+/**
+ * batadv_tp_stop - stop currently running tp meter session
+ * @bat_priv: the bat priv with all the soft interface information
+ * @dst: the receiver MAC address
+ * @return_value: reason for tp meter session stop
+ */
+void batadv_tp_stop(struct batadv_priv *bat_priv, const u8 *dst,
+		    u8 return_value)
+{
+	struct batadv_orig_node *orig_node;
+	struct batadv_tp_vars *tp_vars;
+
+	batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+		   "Meter: stopping test towards %pM\n", dst);
+
+	orig_node = batadv_orig_hash_find(bat_priv, dst);
+	if (!orig_node)
+		return;
+
+	tp_vars = batadv_tp_list_find(bat_priv, orig_node->orig);
+	if (!tp_vars) {
+		batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+			   "Meter: trying to interrupt an already over connection\n");
+		goto out;
+	}
+
+	batadv_tp_sender_shutdown(tp_vars, return_value);
+	batadv_tp_vars_put(tp_vars);
+out:
+	batadv_orig_node_put(orig_node);
+}
+
+/**
+ * batadv_tp_reset_receiver_timer - reset the receiver shutdown timer
+ * @tp_vars: the private data of the current TP meter session
+ *
+ * start the receiver shutdown timer or reset it if already started
+ */
+static void batadv_tp_reset_receiver_timer(struct batadv_tp_vars *tp_vars)
+{
+	mod_timer(&tp_vars->timer,
+		  jiffies + msecs_to_jiffies(BATADV_TP_RECV_TIMEOUT));
+}
+
+/**
+ * batadv_tp_receiver_shutdown - stop a tp meter receiver when timeout is
+ *  reached without received ack
+ * @arg: address of the related tp_vars
+ */
+static void batadv_tp_receiver_shutdown(unsigned long arg)
+{
+	struct batadv_tp_vars *tp_vars = (struct batadv_tp_vars *)arg;
+	struct batadv_tp_unacked *un, *safe;
+	struct batadv_priv *bat_priv;
+
+	bat_priv = tp_vars->bat_priv;
+
+	/* if there is recent activity rearm the timer */
+	if (!batadv_has_timed_out(tp_vars->last_recv_time,
+				  BATADV_TP_RECV_TIMEOUT)) {
+		/* reset the receiver shutdown timer */
+		batadv_tp_reset_receiver_timer(tp_vars);
+		return;
+	}
+
+	batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+		   "Shutting down for inactivity (more than %dms) from %pM\n",
+		   BATADV_TP_RECV_TIMEOUT, tp_vars->other_end);
+
+	spin_lock_bh(&tp_vars->bat_priv->tp_list_lock);
+	hlist_del_rcu(&tp_vars->list);
+	spin_unlock_bh(&tp_vars->bat_priv->tp_list_lock);
+
+	/* drop list reference */
+	batadv_tp_vars_put(tp_vars);
+
+	atomic_dec(&bat_priv->tp_num);
+
+	spin_lock_bh(&tp_vars->unacked_lock);
+	list_for_each_entry_safe(un, safe, &tp_vars->unacked_list, list) {
+		list_del(&un->list);
+		kfree(un);
+	}
+	spin_unlock_bh(&tp_vars->unacked_lock);
+
+	/* drop reference of timer */
+	batadv_tp_vars_put(tp_vars);
+}
+
+/**
+ * batadv_tp_send_ack - send an ACK packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @dst: the mac address of the destination originator
+ * @seq: the sequence number to ACK
+ * @timestamp: the timestamp to echo back in the ACK
+ * @session: session identifier
+ * @socket_index: local ICMP socket identifier
+ *
+ * Return: 0 on success, a positive integer representing the reason of the
+ * failure otherwise
+ */
+static int batadv_tp_send_ack(struct batadv_priv *bat_priv, const u8 *dst,
+			      u32 seq, __be32 timestamp, const u8 *session,
+			      int socket_index)
+{
+	struct batadv_hard_iface *primary_if = NULL;
+	struct batadv_orig_node *orig_node;
+	struct batadv_icmp_tp_packet *icmp;
+	struct sk_buff *skb;
+	int r, ret;
+
+	orig_node = batadv_orig_hash_find(bat_priv, dst);
+	if (unlikely(!orig_node)) {
+		ret = BATADV_TP_REASON_DST_UNREACHABLE;
+		goto out;
+	}
+
+	primary_if = batadv_primary_if_get_selected(bat_priv);
+	if (unlikely(!primary_if)) {
+		ret = BATADV_TP_REASON_DST_UNREACHABLE;
+		goto out;
+	}
+
+	skb = netdev_alloc_skb_ip_align(NULL, sizeof(*icmp) + ETH_HLEN);
+	if (unlikely(!skb)) {
+		ret = BATADV_TP_REASON_MEMORY_ERROR;
+		goto out;
+	}
+
+	skb_reserve(skb, ETH_HLEN);
+	icmp = (struct batadv_icmp_tp_packet *)skb_put(skb, sizeof(*icmp));
+	icmp->packet_type = BATADV_ICMP;
+	icmp->version = BATADV_COMPAT_VERSION;
+	icmp->ttl = BATADV_TTL;
+	icmp->msg_type = BATADV_TP;
+	ether_addr_copy(icmp->dst, orig_node->orig);
+	ether_addr_copy(icmp->orig, primary_if->net_dev->dev_addr);
+	icmp->uid = socket_index;
+
+	icmp->subtype = BATADV_TP_ACK;
+	memcpy(icmp->session, session, sizeof(icmp->session));
+	icmp->seqno = htonl(seq);
+	icmp->timestamp = timestamp;
+
+	/* send the ack */
+	r = batadv_send_skb_to_orig(skb, orig_node, NULL);
+	if (r == -1)
+		kfree_skb(skb);
+
+	if (unlikely(r < 0) || (r == NET_XMIT_DROP)) {
+		ret = BATADV_TP_REASON_DST_UNREACHABLE;
+		goto out;
+	}
+	ret = 0;
+
+out:
+	if (likely(orig_node))
+		batadv_orig_node_put(orig_node);
+	if (likely(primary_if))
+		batadv_hardif_put(primary_if);
+
+	return ret;
+}
+
+/**
+ * batadv_tp_handle_out_of_order - store an out of order packet
+ * @tp_vars: the private data of the current TP meter session
+ * @skb: the buffer containing the received packet
+ *
+ * Store the out of order packet in the unacked list for late processing. This
+ * packets are kept in this list so that they can be ACKed at once as soon as
+ * all the previous packets have been received
+ *
+ * Return: true if the packed has been successfully processed, false otherwise
+ */
+static bool batadv_tp_handle_out_of_order(struct batadv_tp_vars *tp_vars,
+					  const struct sk_buff *skb)
+{
+	const struct batadv_icmp_tp_packet *icmp;
+	struct batadv_tp_unacked *un, *new;
+	u32 payload_len;
+	bool added = false;
+
+	new = kmalloc(sizeof(*new), GFP_ATOMIC);
+	if (unlikely(!new))
+		return false;
+
+	icmp = (struct batadv_icmp_tp_packet *)skb->data;
+
+	new->seqno = ntohl(icmp->seqno);
+	payload_len = skb->len - sizeof(struct batadv_unicast_packet);
+	new->len = payload_len;
+
+	spin_lock_bh(&tp_vars->unacked_lock);
+	/* if the list is empty immediately attach this new object */
+	if (list_empty(&tp_vars->unacked_list)) {
+		list_add(&new->list, &tp_vars->unacked_list);
+		goto out;
+	}
+
+	/* otherwise loop over the list and either drop the packet because this
+	 * is a duplicate or store it at the right position.
+	 *
+	 * The iteration is done in the reverse way because it is likely that
+	 * the last received packet (the one being processed now) has a bigger
+	 * seqno than all the others already stored.
+	 */
+	list_for_each_entry_reverse(un, &tp_vars->unacked_list, list) {
+		/* check for duplicates */
+		if (new->seqno == un->seqno) {
+			if (new->len > un->len)
+				un->len = new->len;
+			kfree(new);
+			added = true;
+			break;
+		}
+
+		/* look for the right position */
+		if (batadv_seq_before(new->seqno, un->seqno))
+			continue;
+
+		/* as soon as an entry having a bigger seqno is found, the new
+		 * one is attached _after_ it. In this way the list is kept in
+		 * ascending order
+		 */
+		list_add_tail(&new->list, &un->list);
+		added = true;
+		break;
+	}
+
+	/* received packet with smallest seqno out of order; add it to front */
+	if (!added)
+		list_add(&new->list, &tp_vars->unacked_list);
+
+out:
+	spin_unlock_bh(&tp_vars->unacked_lock);
+
+	return true;
+}
+
+/**
+ * batadv_tp_ack_unordered - update number received bytes in current stream
+ *  without gaps
+ * @tp_vars: the private data of the current TP meter session
+ */
+static void batadv_tp_ack_unordered(struct batadv_tp_vars *tp_vars)
+{
+	struct batadv_tp_unacked *un, *safe;
+	u32 to_ack;
+
+	/* go through the unacked packet list and possibly ACK them as
+	 * well
+	 */
+	spin_lock_bh(&tp_vars->unacked_lock);
+	list_for_each_entry_safe(un, safe, &tp_vars->unacked_list, list) {
+		/* the list is ordered, therefore it is possible to stop as soon
+		 * there is a gap between the last acked seqno and the seqno of
+		 * the packet under inspection
+		 */
+		if (batadv_seq_before(tp_vars->last_recv, un->seqno))
+			break;
+
+		to_ack = un->seqno + un->len - tp_vars->last_recv;
+
+		if (batadv_seq_before(tp_vars->last_recv, un->seqno + un->len))
+			tp_vars->last_recv += to_ack;
+
+		list_del(&un->list);
+		kfree(un);
+	}
+	spin_unlock_bh(&tp_vars->unacked_lock);
+}
+
+/**
+ * batadv_tp_init_recv - return matching or create new receiver tp_vars
+ * @bat_priv: the bat priv with all the soft interface information
+ * @icmp: received icmp tp msg
+ *
+ * Return: corresponding tp_vars or NULL on errors
+ */
+static struct batadv_tp_vars *
+batadv_tp_init_recv(struct batadv_priv *bat_priv,
+		    const struct batadv_icmp_tp_packet *icmp)
+{
+	struct batadv_tp_vars *tp_vars;
+
+	spin_lock_bh(&bat_priv->tp_list_lock);
+	tp_vars = batadv_tp_list_find_session(bat_priv, icmp->orig,
+					      icmp->session);
+	if (tp_vars)
+		goto out_unlock;
+
+	if (!atomic_add_unless(&bat_priv->tp_num, 1, BATADV_TP_MAX_NUM)) {
+		batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+			   "Meter: too many ongoing sessions, aborting (RECV)\n");
+		goto out_unlock;
+	}
+
+	tp_vars = kmalloc(sizeof(*tp_vars), GFP_ATOMIC);
+	if (!tp_vars)
+		goto out_unlock;
+
+	ether_addr_copy(tp_vars->other_end, icmp->orig);
+	tp_vars->role = BATADV_TP_RECEIVER;
+	memcpy(tp_vars->session, icmp->session, sizeof(tp_vars->session));
+	tp_vars->last_recv = BATADV_TP_FIRST_SEQ;
+	tp_vars->bat_priv = bat_priv;
+	kref_init(&tp_vars->refcount);
+
+	spin_lock_init(&tp_vars->unacked_lock);
+	INIT_LIST_HEAD(&tp_vars->unacked_list);
+
+	kref_get(&tp_vars->refcount);
+	hlist_add_head_rcu(&tp_vars->list, &bat_priv->tp_list);
+
+	kref_get(&tp_vars->refcount);
+	setup_timer(&tp_vars->timer, batadv_tp_receiver_shutdown,
+		    (unsigned long)tp_vars);
+
+	batadv_tp_reset_receiver_timer(tp_vars);
+
+out_unlock:
+	spin_unlock_bh(&bat_priv->tp_list_lock);
+
+	return tp_vars;
+}
+
+/**
+ * batadv_tp_recv_msg - process a single data message
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the buffer containing the received packet
+ *
+ * Process a received TP MSG packet
+ */
+static void batadv_tp_recv_msg(struct batadv_priv *bat_priv,
+			       const struct sk_buff *skb)
+{
+	const struct batadv_icmp_tp_packet *icmp;
+	struct batadv_tp_vars *tp_vars;
+	size_t packet_size;
+	u32 seqno;
+
+	icmp = (struct batadv_icmp_tp_packet *)skb->data;
+
+	seqno = ntohl(icmp->seqno);
+	/* check if this is the first seqno. This means that if the
+	 * first packet is lost, the tp meter does not work anymore!
+	 */
+	if (seqno == BATADV_TP_FIRST_SEQ) {
+		tp_vars = batadv_tp_init_recv(bat_priv, icmp);
+		if (!tp_vars) {
+			batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+				   "Meter: seqno != BATADV_TP_FIRST_SEQ cannot initiate connection\n");
+			goto out;
+		}
+	} else {
+		tp_vars = batadv_tp_list_find_session(bat_priv, icmp->orig,
+						      icmp->session);
+		if (!tp_vars) {
+			batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+				   "Unexpected packet from %pM!\n",
+				   icmp->orig);
+			goto out;
+		}
+	}
+
+	if (unlikely(tp_vars->role != BATADV_TP_RECEIVER)) {
+		batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+			   "Meter: dropping packet: not expected (role=%u)\n",
+			   tp_vars->role);
+		goto out;
+	}
+
+	tp_vars->last_recv_time = jiffies;
+
+	/* if the packet is a duplicate, it may be the case that an ACK has been
+	 * lost. Resend the ACK
+	 */
+	if (batadv_seq_before(seqno, tp_vars->last_recv))
+		goto send_ack;
+
+	/* if the packet is out of order enqueue it */
+	if (ntohl(icmp->seqno) != tp_vars->last_recv) {
+		/* exit immediately (and do not send any ACK) if the packet has
+		 * not been enqueued correctly
+		 */
+		if (!batadv_tp_handle_out_of_order(tp_vars, skb))
+			goto out;
+
+		/* send a duplicate ACK */
+		goto send_ack;
+	}
+
+	/* if everything was fine count the ACKed bytes */
+	packet_size = skb->len - sizeof(struct batadv_unicast_packet);
+	tp_vars->last_recv += packet_size;
+
+	/* check if this ordered message filled a gap.... */
+	batadv_tp_ack_unordered(tp_vars);
+
+send_ack:
+	/* send the ACK. If the received packet was out of order, the ACK that
+	 * is going to be sent is a duplicate (the sender will count them and
+	 * possibly enter Fast Retransmit as soon as it has reached 3)
+	 */
+	batadv_tp_send_ack(bat_priv, icmp->orig, tp_vars->last_recv,
+			   icmp->timestamp, icmp->session, icmp->uid);
+out:
+	if (likely(tp_vars))
+		batadv_tp_vars_put(tp_vars);
+}
+
+/**
+ * batadv_tp_meter_recv - main TP Meter receiving function
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the buffer containing the received packet
+ */
+void batadv_tp_meter_recv(struct batadv_priv *bat_priv, struct sk_buff *skb)
+{
+	struct batadv_icmp_tp_packet *icmp;
+
+	icmp = (struct batadv_icmp_tp_packet *)skb->data;
+
+	switch (icmp->subtype) {
+	case BATADV_TP_MSG:
+		batadv_tp_recv_msg(bat_priv, skb);
+		break;
+	case BATADV_TP_ACK:
+		batadv_tp_recv_ack(bat_priv, skb);
+		break;
+	default:
+		batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+			   "Received unknown TP Metric packet type %u\n",
+			   icmp->subtype);
+	}
+	consume_skb(skb);
+}
+
+/**
+ * batadv_tp_meter_init - initialize global tp_meter structures
+ */
+void batadv_tp_meter_init(void)
+{
+	get_random_bytes(batadv_tp_prerandom, sizeof(batadv_tp_prerandom));
+}
diff --git a/net/batman-adv/tp_meter.h b/net/batman-adv/tp_meter.h
new file mode 100644
index 000000000000..ba922c425e56
--- /dev/null
+++ b/net/batman-adv/tp_meter.h
@@ -0,0 +1,34 @@
+/* Copyright (C) 2012-2016 B.A.T.M.A.N. contributors:
+ *
+ * Edo Monticelli, Antonio Quartulli
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _NET_BATMAN_ADV_TP_METER_H_
+#define _NET_BATMAN_ADV_TP_METER_H_
+
+#include "main.h"
+
+#include <linux/types.h>
+
+struct sk_buff;
+
+void batadv_tp_meter_init(void);
+void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst,
+		     u32 test_length, u32 *cookie);
+void batadv_tp_stop(struct batadv_priv *bat_priv, const u8 *dst,
+		    u8 return_value);
+void batadv_tp_meter_recv(struct batadv_priv *bat_priv, struct sk_buff *skb);
+
+#endif /* _NET_BATMAN_ADV_TP_METER_H_ */
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index feaf492b01ca..7e6df7a4964a 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -47,10 +47,12 @@
 #include "bridge_loop_avoidance.h"
 #include "hard-interface.h"
 #include "hash.h"
+#include "log.h"
 #include "multicast.h"
 #include "originator.h"
 #include "packet.h"
 #include "soft-interface.h"
+#include "tvlv.h"
 
 /* hash class keys */
 static struct lock_class_key batadv_tt_local_hash_lock_class_key;
@@ -650,8 +652,10 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
 
 	/* increase the refcounter of the related vlan */
 	vlan = batadv_softif_vlan_get(bat_priv, vid);
-	if (WARN(!vlan, "adding TT local entry %pM to non-existent VLAN %d",
-		 addr, BATADV_PRINT_VID(vid))) {
+	if (!vlan) {
+		net_ratelimited_function(batadv_info, soft_iface,
+					 "adding TT local entry %pM to non-existent VLAN %d\n",
+					 addr, BATADV_PRINT_VID(vid));
 		kfree(tt_local);
 		tt_local = NULL;
 		goto out;
@@ -691,7 +695,6 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
 	if (unlikely(hash_added != 0)) {
 		/* remove the reference for the hash */
 		batadv_tt_local_entry_put(tt_local);
-		batadv_softif_vlan_put(vlan);
 		goto out;
 	}
 
@@ -995,7 +998,6 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
 	struct batadv_tt_local_entry *tt_local;
 	struct batadv_hard_iface *primary_if;
 	struct hlist_head *head;
-	unsigned short vid;
 	u32 i;
 	int last_seen_secs;
 	int last_seen_msecs;
@@ -1022,7 +1024,6 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
 			tt_local = container_of(tt_common_entry,
 						struct batadv_tt_local_entry,
 						common);
-			vid = tt_common_entry->vid;
 			last_seen_jiffies = jiffies - tt_local->last_seen;
 			last_seen_msecs = jiffies_to_msecs(last_seen_jiffies);
 			last_seen_secs = last_seen_msecs / 1000;
@@ -1546,7 +1547,7 @@ batadv_transtable_best_orig(struct batadv_priv *bat_priv,
 			    struct batadv_tt_global_entry *tt_global_entry)
 {
 	struct batadv_neigh_node *router, *best_router = NULL;
-	struct batadv_algo_ops *bao = bat_priv->bat_algo_ops;
+	struct batadv_algo_ops *bao = bat_priv->algo_ops;
 	struct hlist_head *head;
 	struct batadv_tt_orig_list_entry *orig_entry, *best_entry = NULL;
 
@@ -1558,8 +1559,8 @@ batadv_transtable_best_orig(struct batadv_priv *bat_priv,
 			continue;
 
 		if (best_router &&
-		    bao->bat_neigh_cmp(router, BATADV_IF_DEFAULT,
-				       best_router, BATADV_IF_DEFAULT) <= 0) {
+		    bao->neigh.cmp(router, BATADV_IF_DEFAULT, best_router,
+				   BATADV_IF_DEFAULT) <= 0) {
 			batadv_neigh_node_put(router);
 			continue;
 		}
@@ -2269,6 +2270,29 @@ static u32 batadv_tt_local_crc(struct batadv_priv *bat_priv,
 	return crc;
 }
 
+/**
+ * batadv_tt_req_node_release - free tt_req node entry
+ * @ref: kref pointer of the tt req_node entry
+ */
+static void batadv_tt_req_node_release(struct kref *ref)
+{
+	struct batadv_tt_req_node *tt_req_node;
+
+	tt_req_node = container_of(ref, struct batadv_tt_req_node, refcount);
+
+	kfree(tt_req_node);
+}
+
+/**
+ * batadv_tt_req_node_put - decrement the tt_req_node refcounter and
+ *  possibly release it
+ * @tt_req_node: tt_req_node to be free'd
+ */
+static void batadv_tt_req_node_put(struct batadv_tt_req_node *tt_req_node)
+{
+	kref_put(&tt_req_node->refcount, batadv_tt_req_node_release);
+}
+
 static void batadv_tt_req_list_free(struct batadv_priv *bat_priv)
 {
 	struct batadv_tt_req_node *node;
@@ -2278,7 +2302,7 @@ static void batadv_tt_req_list_free(struct batadv_priv *bat_priv)
 
 	hlist_for_each_entry_safe(node, safe, &bat_priv->tt.req_list, list) {
 		hlist_del_init(&node->list);
-		kfree(node);
+		batadv_tt_req_node_put(node);
 	}
 
 	spin_unlock_bh(&bat_priv->tt.req_list_lock);
@@ -2315,7 +2339,7 @@ static void batadv_tt_req_purge(struct batadv_priv *bat_priv)
 		if (batadv_has_timed_out(node->issued_at,
 					 BATADV_TT_REQUEST_TIMEOUT)) {
 			hlist_del_init(&node->list);
-			kfree(node);
+			batadv_tt_req_node_put(node);
 		}
 	}
 	spin_unlock_bh(&bat_priv->tt.req_list_lock);
@@ -2347,9 +2371,11 @@ batadv_tt_req_node_new(struct batadv_priv *bat_priv,
 	if (!tt_req_node)
 		goto unlock;
 
+	kref_init(&tt_req_node->refcount);
 	ether_addr_copy(tt_req_node->addr, orig_node->orig);
 	tt_req_node->issued_at = jiffies;
 
+	kref_get(&tt_req_node->refcount);
 	hlist_add_head(&tt_req_node->list, &bat_priv->tt.req_list);
 unlock:
 	spin_unlock_bh(&bat_priv->tt.req_list_lock);
@@ -2613,13 +2639,19 @@ static bool batadv_send_tt_request(struct batadv_priv *bat_priv,
 out:
 	if (primary_if)
 		batadv_hardif_put(primary_if);
+
 	if (ret && tt_req_node) {
 		spin_lock_bh(&bat_priv->tt.req_list_lock);
-		/* hlist_del_init() verifies tt_req_node still is in the list */
-		hlist_del_init(&tt_req_node->list);
+		if (!hlist_unhashed(&tt_req_node->list)) {
+			hlist_del_init(&tt_req_node->list);
+			batadv_tt_req_node_put(tt_req_node);
+		}
 		spin_unlock_bh(&bat_priv->tt.req_list_lock);
-		kfree(tt_req_node);
 	}
+
+	if (tt_req_node)
+		batadv_tt_req_node_put(tt_req_node);
+
 	kfree(tvlv_tt_data);
 	return ret;
 }
@@ -3055,7 +3087,7 @@ static void batadv_handle_tt_response(struct batadv_priv *bat_priv,
 		if (!batadv_compare_eth(node->addr, resp_src))
 			continue;
 		hlist_del_init(&node->list);
-		kfree(node);
+		batadv_tt_req_node_put(node);
 	}
 
 	spin_unlock_bh(&bat_priv->tt.req_list_lock);
diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c
new file mode 100644
index 000000000000..3d1cf0fb112d
--- /dev/null
+++ b/net/batman-adv/tvlv.c
@@ -0,0 +1,632 @@
+/* Copyright (C) 2007-2016  B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Simon Wunderlich
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "main.h"
+
+#include <linux/byteorder/generic.h>
+#include <linux/etherdevice.h>
+#include <linux/fs.h>
+#include <linux/if_ether.h>
+#include <linux/kernel.h>
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/lockdep.h>
+#include <linux/netdevice.h>
+#include <linux/pkt_sched.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include "originator.h"
+#include "packet.h"
+#include "send.h"
+#include "tvlv.h"
+
+/**
+ * batadv_tvlv_handler_release - release tvlv handler from lists and queue for
+ *  free after rcu grace period
+ * @ref: kref pointer of the tvlv
+ */
+static void batadv_tvlv_handler_release(struct kref *ref)
+{
+	struct batadv_tvlv_handler *tvlv_handler;
+
+	tvlv_handler = container_of(ref, struct batadv_tvlv_handler, refcount);
+	kfree_rcu(tvlv_handler, rcu);
+}
+
+/**
+ * batadv_tvlv_handler_put - decrement the tvlv container refcounter and
+ *  possibly release it
+ * @tvlv_handler: the tvlv handler to free
+ */
+static void batadv_tvlv_handler_put(struct batadv_tvlv_handler *tvlv_handler)
+{
+	kref_put(&tvlv_handler->refcount, batadv_tvlv_handler_release);
+}
+
+/**
+ * batadv_tvlv_handler_get - retrieve tvlv handler from the tvlv handler list
+ *  based on the provided type and version (both need to match)
+ * @bat_priv: the bat priv with all the soft interface information
+ * @type: tvlv handler type to look for
+ * @version: tvlv handler version to look for
+ *
+ * Return: tvlv handler if found or NULL otherwise.
+ */
+static struct batadv_tvlv_handler *
+batadv_tvlv_handler_get(struct batadv_priv *bat_priv, u8 type, u8 version)
+{
+	struct batadv_tvlv_handler *tvlv_handler_tmp, *tvlv_handler = NULL;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(tvlv_handler_tmp,
+				 &bat_priv->tvlv.handler_list, list) {
+		if (tvlv_handler_tmp->type != type)
+			continue;
+
+		if (tvlv_handler_tmp->version != version)
+			continue;
+
+		if (!kref_get_unless_zero(&tvlv_handler_tmp->refcount))
+			continue;
+
+		tvlv_handler = tvlv_handler_tmp;
+		break;
+	}
+	rcu_read_unlock();
+
+	return tvlv_handler;
+}
+
+/**
+ * batadv_tvlv_container_release - release tvlv from lists and free
+ * @ref: kref pointer of the tvlv
+ */
+static void batadv_tvlv_container_release(struct kref *ref)
+{
+	struct batadv_tvlv_container *tvlv;
+
+	tvlv = container_of(ref, struct batadv_tvlv_container, refcount);
+	kfree(tvlv);
+}
+
+/**
+ * batadv_tvlv_container_put - decrement the tvlv container refcounter and
+ *  possibly release it
+ * @tvlv: the tvlv container to free
+ */
+static void batadv_tvlv_container_put(struct batadv_tvlv_container *tvlv)
+{
+	kref_put(&tvlv->refcount, batadv_tvlv_container_release);
+}
+
+/**
+ * batadv_tvlv_container_get - retrieve tvlv container from the tvlv container
+ *  list based on the provided type and version (both need to match)
+ * @bat_priv: the bat priv with all the soft interface information
+ * @type: tvlv container type to look for
+ * @version: tvlv container version to look for
+ *
+ * Has to be called with the appropriate locks being acquired
+ * (tvlv.container_list_lock).
+ *
+ * Return: tvlv container if found or NULL otherwise.
+ */
+static struct batadv_tvlv_container *
+batadv_tvlv_container_get(struct batadv_priv *bat_priv, u8 type, u8 version)
+{
+	struct batadv_tvlv_container *tvlv_tmp, *tvlv = NULL;
+
+	lockdep_assert_held(&bat_priv->tvlv.container_list_lock);
+
+	hlist_for_each_entry(tvlv_tmp, &bat_priv->tvlv.container_list, list) {
+		if (tvlv_tmp->tvlv_hdr.type != type)
+			continue;
+
+		if (tvlv_tmp->tvlv_hdr.version != version)
+			continue;
+
+		kref_get(&tvlv_tmp->refcount);
+		tvlv = tvlv_tmp;
+		break;
+	}
+
+	return tvlv;
+}
+
+/**
+ * batadv_tvlv_container_list_size - calculate the size of the tvlv container
+ *  list entries
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Has to be called with the appropriate locks being acquired
+ * (tvlv.container_list_lock).
+ *
+ * Return: size of all currently registered tvlv containers in bytes.
+ */
+static u16 batadv_tvlv_container_list_size(struct batadv_priv *bat_priv)
+{
+	struct batadv_tvlv_container *tvlv;
+	u16 tvlv_len = 0;
+
+	lockdep_assert_held(&bat_priv->tvlv.container_list_lock);
+
+	hlist_for_each_entry(tvlv, &bat_priv->tvlv.container_list, list) {
+		tvlv_len += sizeof(struct batadv_tvlv_hdr);
+		tvlv_len += ntohs(tvlv->tvlv_hdr.len);
+	}
+
+	return tvlv_len;
+}
+
+/**
+ * batadv_tvlv_container_remove - remove tvlv container from the tvlv container
+ *  list
+ * @bat_priv: the bat priv with all the soft interface information
+ * @tvlv: the to be removed tvlv container
+ *
+ * Has to be called with the appropriate locks being acquired
+ * (tvlv.container_list_lock).
+ */
+static void batadv_tvlv_container_remove(struct batadv_priv *bat_priv,
+					 struct batadv_tvlv_container *tvlv)
+{
+	lockdep_assert_held(&bat_priv->tvlv.container_list_lock);
+
+	if (!tvlv)
+		return;
+
+	hlist_del(&tvlv->list);
+
+	/* first call to decrement the counter, second call to free */
+	batadv_tvlv_container_put(tvlv);
+	batadv_tvlv_container_put(tvlv);
+}
+
+/**
+ * batadv_tvlv_container_unregister - unregister tvlv container based on the
+ *  provided type and version (both need to match)
+ * @bat_priv: the bat priv with all the soft interface information
+ * @type: tvlv container type to unregister
+ * @version: tvlv container type to unregister
+ */
+void batadv_tvlv_container_unregister(struct batadv_priv *bat_priv,
+				      u8 type, u8 version)
+{
+	struct batadv_tvlv_container *tvlv;
+
+	spin_lock_bh(&bat_priv->tvlv.container_list_lock);
+	tvlv = batadv_tvlv_container_get(bat_priv, type, version);
+	batadv_tvlv_container_remove(bat_priv, tvlv);
+	spin_unlock_bh(&bat_priv->tvlv.container_list_lock);
+}
+
+/**
+ * batadv_tvlv_container_register - register tvlv type, version and content
+ *  to be propagated with each (primary interface) OGM
+ * @bat_priv: the bat priv with all the soft interface information
+ * @type: tvlv container type
+ * @version: tvlv container version
+ * @tvlv_value: tvlv container content
+ * @tvlv_value_len: tvlv container content length
+ *
+ * If a container of the same type and version was already registered the new
+ * content is going to replace the old one.
+ */
+void batadv_tvlv_container_register(struct batadv_priv *bat_priv,
+				    u8 type, u8 version,
+				    void *tvlv_value, u16 tvlv_value_len)
+{
+	struct batadv_tvlv_container *tvlv_old, *tvlv_new;
+
+	if (!tvlv_value)
+		tvlv_value_len = 0;
+
+	tvlv_new = kzalloc(sizeof(*tvlv_new) + tvlv_value_len, GFP_ATOMIC);
+	if (!tvlv_new)
+		return;
+
+	tvlv_new->tvlv_hdr.version = version;
+	tvlv_new->tvlv_hdr.type = type;
+	tvlv_new->tvlv_hdr.len = htons(tvlv_value_len);
+
+	memcpy(tvlv_new + 1, tvlv_value, ntohs(tvlv_new->tvlv_hdr.len));
+	INIT_HLIST_NODE(&tvlv_new->list);
+	kref_init(&tvlv_new->refcount);
+
+	spin_lock_bh(&bat_priv->tvlv.container_list_lock);
+	tvlv_old = batadv_tvlv_container_get(bat_priv, type, version);
+	batadv_tvlv_container_remove(bat_priv, tvlv_old);
+	hlist_add_head(&tvlv_new->list, &bat_priv->tvlv.container_list);
+	spin_unlock_bh(&bat_priv->tvlv.container_list_lock);
+}
+
+/**
+ * batadv_tvlv_realloc_packet_buff - reallocate packet buffer to accommodate
+ *  requested packet size
+ * @packet_buff: packet buffer
+ * @packet_buff_len: packet buffer size
+ * @min_packet_len: requested packet minimum size
+ * @additional_packet_len: requested additional packet size on top of minimum
+ *  size
+ *
+ * Return: true of the packet buffer could be changed to the requested size,
+ * false otherwise.
+ */
+static bool batadv_tvlv_realloc_packet_buff(unsigned char **packet_buff,
+					    int *packet_buff_len,
+					    int min_packet_len,
+					    int additional_packet_len)
+{
+	unsigned char *new_buff;
+
+	new_buff = kmalloc(min_packet_len + additional_packet_len, GFP_ATOMIC);
+
+	/* keep old buffer if kmalloc should fail */
+	if (!new_buff)
+		return false;
+
+	memcpy(new_buff, *packet_buff, min_packet_len);
+	kfree(*packet_buff);
+	*packet_buff = new_buff;
+	*packet_buff_len = min_packet_len + additional_packet_len;
+
+	return true;
+}
+
+/**
+ * batadv_tvlv_container_ogm_append - append tvlv container content to given
+ *  OGM packet buffer
+ * @bat_priv: the bat priv with all the soft interface information
+ * @packet_buff: ogm packet buffer
+ * @packet_buff_len: ogm packet buffer size including ogm header and tvlv
+ *  content
+ * @packet_min_len: ogm header size to be preserved for the OGM itself
+ *
+ * The ogm packet might be enlarged or shrunk depending on the current size
+ * and the size of the to-be-appended tvlv containers.
+ *
+ * Return: size of all appended tvlv containers in bytes.
+ */
+u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv,
+				     unsigned char **packet_buff,
+				     int *packet_buff_len, int packet_min_len)
+{
+	struct batadv_tvlv_container *tvlv;
+	struct batadv_tvlv_hdr *tvlv_hdr;
+	u16 tvlv_value_len;
+	void *tvlv_value;
+	bool ret;
+
+	spin_lock_bh(&bat_priv->tvlv.container_list_lock);
+	tvlv_value_len = batadv_tvlv_container_list_size(bat_priv);
+
+	ret = batadv_tvlv_realloc_packet_buff(packet_buff, packet_buff_len,
+					      packet_min_len, tvlv_value_len);
+
+	if (!ret)
+		goto end;
+
+	if (!tvlv_value_len)
+		goto end;
+
+	tvlv_value = (*packet_buff) + packet_min_len;
+
+	hlist_for_each_entry(tvlv, &bat_priv->tvlv.container_list, list) {
+		tvlv_hdr = tvlv_value;
+		tvlv_hdr->type = tvlv->tvlv_hdr.type;
+		tvlv_hdr->version = tvlv->tvlv_hdr.version;
+		tvlv_hdr->len = tvlv->tvlv_hdr.len;
+		tvlv_value = tvlv_hdr + 1;
+		memcpy(tvlv_value, tvlv + 1, ntohs(tvlv->tvlv_hdr.len));
+		tvlv_value = (u8 *)tvlv_value + ntohs(tvlv->tvlv_hdr.len);
+	}
+
+end:
+	spin_unlock_bh(&bat_priv->tvlv.container_list_lock);
+	return tvlv_value_len;
+}
+
+/**
+ * batadv_tvlv_call_handler - parse the given tvlv buffer to call the
+ *  appropriate handlers
+ * @bat_priv: the bat priv with all the soft interface information
+ * @tvlv_handler: tvlv callback function handling the tvlv content
+ * @ogm_source: flag indicating whether the tvlv is an ogm or a unicast packet
+ * @orig_node: orig node emitting the ogm packet
+ * @src: source mac address of the unicast packet
+ * @dst: destination mac address of the unicast packet
+ * @tvlv_value: tvlv content
+ * @tvlv_value_len: tvlv content length
+ *
+ * Return: success if handler was not found or the return value of the handler
+ * callback.
+ */
+static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv,
+				    struct batadv_tvlv_handler *tvlv_handler,
+				    bool ogm_source,
+				    struct batadv_orig_node *orig_node,
+				    u8 *src, u8 *dst,
+				    void *tvlv_value, u16 tvlv_value_len)
+{
+	if (!tvlv_handler)
+		return NET_RX_SUCCESS;
+
+	if (ogm_source) {
+		if (!tvlv_handler->ogm_handler)
+			return NET_RX_SUCCESS;
+
+		if (!orig_node)
+			return NET_RX_SUCCESS;
+
+		tvlv_handler->ogm_handler(bat_priv, orig_node,
+					  BATADV_NO_FLAGS,
+					  tvlv_value, tvlv_value_len);
+		tvlv_handler->flags |= BATADV_TVLV_HANDLER_OGM_CALLED;
+	} else {
+		if (!src)
+			return NET_RX_SUCCESS;
+
+		if (!dst)
+			return NET_RX_SUCCESS;
+
+		if (!tvlv_handler->unicast_handler)
+			return NET_RX_SUCCESS;
+
+		return tvlv_handler->unicast_handler(bat_priv, src,
+						     dst, tvlv_value,
+						     tvlv_value_len);
+	}
+
+	return NET_RX_SUCCESS;
+}
+
+/**
+ * batadv_tvlv_containers_process - parse the given tvlv buffer to call the
+ *  appropriate handlers
+ * @bat_priv: the bat priv with all the soft interface information
+ * @ogm_source: flag indicating whether the tvlv is an ogm or a unicast packet
+ * @orig_node: orig node emitting the ogm packet
+ * @src: source mac address of the unicast packet
+ * @dst: destination mac address of the unicast packet
+ * @tvlv_value: tvlv content
+ * @tvlv_value_len: tvlv content length
+ *
+ * Return: success when processing an OGM or the return value of all called
+ * handler callbacks.
+ */
+int batadv_tvlv_containers_process(struct batadv_priv *bat_priv,
+				   bool ogm_source,
+				   struct batadv_orig_node *orig_node,
+				   u8 *src, u8 *dst,
+				   void *tvlv_value, u16 tvlv_value_len)
+{
+	struct batadv_tvlv_handler *tvlv_handler;
+	struct batadv_tvlv_hdr *tvlv_hdr;
+	u16 tvlv_value_cont_len;
+	u8 cifnotfound = BATADV_TVLV_HANDLER_OGM_CIFNOTFND;
+	int ret = NET_RX_SUCCESS;
+
+	while (tvlv_value_len >= sizeof(*tvlv_hdr)) {
+		tvlv_hdr = tvlv_value;
+		tvlv_value_cont_len = ntohs(tvlv_hdr->len);
+		tvlv_value = tvlv_hdr + 1;
+		tvlv_value_len -= sizeof(*tvlv_hdr);
+
+		if (tvlv_value_cont_len > tvlv_value_len)
+			break;
+
+		tvlv_handler = batadv_tvlv_handler_get(bat_priv,
+						       tvlv_hdr->type,
+						       tvlv_hdr->version);
+
+		ret |= batadv_tvlv_call_handler(bat_priv, tvlv_handler,
+						ogm_source, orig_node,
+						src, dst, tvlv_value,
+						tvlv_value_cont_len);
+		if (tvlv_handler)
+			batadv_tvlv_handler_put(tvlv_handler);
+		tvlv_value = (u8 *)tvlv_value + tvlv_value_cont_len;
+		tvlv_value_len -= tvlv_value_cont_len;
+	}
+
+	if (!ogm_source)
+		return ret;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(tvlv_handler,
+				 &bat_priv->tvlv.handler_list, list) {
+		if ((tvlv_handler->flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND) &&
+		    !(tvlv_handler->flags & BATADV_TVLV_HANDLER_OGM_CALLED))
+			tvlv_handler->ogm_handler(bat_priv, orig_node,
+						  cifnotfound, NULL, 0);
+
+		tvlv_handler->flags &= ~BATADV_TVLV_HANDLER_OGM_CALLED;
+	}
+	rcu_read_unlock();
+
+	return NET_RX_SUCCESS;
+}
+
+/**
+ * batadv_tvlv_ogm_receive - process an incoming ogm and call the appropriate
+ *  handlers
+ * @bat_priv: the bat priv with all the soft interface information
+ * @batadv_ogm_packet: ogm packet containing the tvlv containers
+ * @orig_node: orig node emitting the ogm packet
+ */
+void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv,
+			     struct batadv_ogm_packet *batadv_ogm_packet,
+			     struct batadv_orig_node *orig_node)
+{
+	void *tvlv_value;
+	u16 tvlv_value_len;
+
+	if (!batadv_ogm_packet)
+		return;
+
+	tvlv_value_len = ntohs(batadv_ogm_packet->tvlv_len);
+	if (!tvlv_value_len)
+		return;
+
+	tvlv_value = batadv_ogm_packet + 1;
+
+	batadv_tvlv_containers_process(bat_priv, true, orig_node, NULL, NULL,
+				       tvlv_value, tvlv_value_len);
+}
+
+/**
+ * batadv_tvlv_handler_register - register tvlv handler based on the provided
+ *  type and version (both need to match) for ogm tvlv payload and/or unicast
+ *  payload
+ * @bat_priv: the bat priv with all the soft interface information
+ * @optr: ogm tvlv handler callback function. This function receives the orig
+ *  node, flags and the tvlv content as argument to process.
+ * @uptr: unicast tvlv handler callback function. This function receives the
+ *  source & destination of the unicast packet as well as the tvlv content
+ *  to process.
+ * @type: tvlv handler type to be registered
+ * @version: tvlv handler version to be registered
+ * @flags: flags to enable or disable TVLV API behavior
+ */
+void batadv_tvlv_handler_register(struct batadv_priv *bat_priv,
+				  void (*optr)(struct batadv_priv *bat_priv,
+					       struct batadv_orig_node *orig,
+					       u8 flags,
+					       void *tvlv_value,
+					       u16 tvlv_value_len),
+				  int (*uptr)(struct batadv_priv *bat_priv,
+					      u8 *src, u8 *dst,
+					      void *tvlv_value,
+					      u16 tvlv_value_len),
+				  u8 type, u8 version, u8 flags)
+{
+	struct batadv_tvlv_handler *tvlv_handler;
+
+	tvlv_handler = batadv_tvlv_handler_get(bat_priv, type, version);
+	if (tvlv_handler) {
+		batadv_tvlv_handler_put(tvlv_handler);
+		return;
+	}
+
+	tvlv_handler = kzalloc(sizeof(*tvlv_handler), GFP_ATOMIC);
+	if (!tvlv_handler)
+		return;
+
+	tvlv_handler->ogm_handler = optr;
+	tvlv_handler->unicast_handler = uptr;
+	tvlv_handler->type = type;
+	tvlv_handler->version = version;
+	tvlv_handler->flags = flags;
+	kref_init(&tvlv_handler->refcount);
+	INIT_HLIST_NODE(&tvlv_handler->list);
+
+	spin_lock_bh(&bat_priv->tvlv.handler_list_lock);
+	hlist_add_head_rcu(&tvlv_handler->list, &bat_priv->tvlv.handler_list);
+	spin_unlock_bh(&bat_priv->tvlv.handler_list_lock);
+}
+
+/**
+ * batadv_tvlv_handler_unregister - unregister tvlv handler based on the
+ *  provided type and version (both need to match)
+ * @bat_priv: the bat priv with all the soft interface information
+ * @type: tvlv handler type to be unregistered
+ * @version: tvlv handler version to be unregistered
+ */
+void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv,
+				    u8 type, u8 version)
+{
+	struct batadv_tvlv_handler *tvlv_handler;
+
+	tvlv_handler = batadv_tvlv_handler_get(bat_priv, type, version);
+	if (!tvlv_handler)
+		return;
+
+	batadv_tvlv_handler_put(tvlv_handler);
+	spin_lock_bh(&bat_priv->tvlv.handler_list_lock);
+	hlist_del_rcu(&tvlv_handler->list);
+	spin_unlock_bh(&bat_priv->tvlv.handler_list_lock);
+	batadv_tvlv_handler_put(tvlv_handler);
+}
+
+/**
+ * batadv_tvlv_unicast_send - send a unicast packet with tvlv payload to the
+ *  specified host
+ * @bat_priv: the bat priv with all the soft interface information
+ * @src: source mac address of the unicast packet
+ * @dst: destination mac address of the unicast packet
+ * @type: tvlv type
+ * @version: tvlv version
+ * @tvlv_value: tvlv content
+ * @tvlv_value_len: tvlv content length
+ */
+void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src,
+			      u8 *dst, u8 type, u8 version,
+			      void *tvlv_value, u16 tvlv_value_len)
+{
+	struct batadv_unicast_tvlv_packet *unicast_tvlv_packet;
+	struct batadv_tvlv_hdr *tvlv_hdr;
+	struct batadv_orig_node *orig_node;
+	struct sk_buff *skb;
+	unsigned char *tvlv_buff;
+	unsigned int tvlv_len;
+	ssize_t hdr_len = sizeof(*unicast_tvlv_packet);
+	int res;
+
+	orig_node = batadv_orig_hash_find(bat_priv, dst);
+	if (!orig_node)
+		return;
+
+	tvlv_len = sizeof(*tvlv_hdr) + tvlv_value_len;
+
+	skb = netdev_alloc_skb_ip_align(NULL, ETH_HLEN + hdr_len + tvlv_len);
+	if (!skb)
+		goto out;
+
+	skb->priority = TC_PRIO_CONTROL;
+	skb_reserve(skb, ETH_HLEN);
+	tvlv_buff = skb_put(skb, sizeof(*unicast_tvlv_packet) + tvlv_len);
+	unicast_tvlv_packet = (struct batadv_unicast_tvlv_packet *)tvlv_buff;
+	unicast_tvlv_packet->packet_type = BATADV_UNICAST_TVLV;
+	unicast_tvlv_packet->version = BATADV_COMPAT_VERSION;
+	unicast_tvlv_packet->ttl = BATADV_TTL;
+	unicast_tvlv_packet->reserved = 0;
+	unicast_tvlv_packet->tvlv_len = htons(tvlv_len);
+	unicast_tvlv_packet->align = 0;
+	ether_addr_copy(unicast_tvlv_packet->src, src);
+	ether_addr_copy(unicast_tvlv_packet->dst, dst);
+
+	tvlv_buff = (unsigned char *)(unicast_tvlv_packet + 1);
+	tvlv_hdr = (struct batadv_tvlv_hdr *)tvlv_buff;
+	tvlv_hdr->version = version;
+	tvlv_hdr->type = type;
+	tvlv_hdr->len = htons(tvlv_value_len);
+	tvlv_buff += sizeof(*tvlv_hdr);
+	memcpy(tvlv_buff, tvlv_value, tvlv_value_len);
+
+	res = batadv_send_skb_to_orig(skb, orig_node, NULL);
+	if (res == -1)
+		kfree_skb(skb);
+out:
+	batadv_orig_node_put(orig_node);
+}
diff --git a/net/batman-adv/tvlv.h b/net/batman-adv/tvlv.h
new file mode 100644
index 000000000000..e4369b547b43
--- /dev/null
+++ b/net/batman-adv/tvlv.h
@@ -0,0 +1,61 @@
+/* Copyright (C) 2007-2016  B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Simon Wunderlich
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _NET_BATMAN_ADV_TVLV_H_
+#define _NET_BATMAN_ADV_TVLV_H_
+
+#include "main.h"
+
+#include <linux/types.h>
+
+struct batadv_ogm_packet;
+
+void batadv_tvlv_container_register(struct batadv_priv *bat_priv,
+				    u8 type, u8 version,
+				    void *tvlv_value, u16 tvlv_value_len);
+u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv,
+				     unsigned char **packet_buff,
+				     int *packet_buff_len, int packet_min_len);
+void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv,
+			     struct batadv_ogm_packet *batadv_ogm_packet,
+			     struct batadv_orig_node *orig_node);
+void batadv_tvlv_container_unregister(struct batadv_priv *bat_priv,
+				      u8 type, u8 version);
+
+void batadv_tvlv_handler_register(struct batadv_priv *bat_priv,
+				  void (*optr)(struct batadv_priv *bat_priv,
+					       struct batadv_orig_node *orig,
+					       u8 flags,
+					       void *tvlv_value,
+					       u16 tvlv_value_len),
+				  int (*uptr)(struct batadv_priv *bat_priv,
+					      u8 *src, u8 *dst,
+					      void *tvlv_value,
+					      u16 tvlv_value_len),
+				  u8 type, u8 version, u8 flags);
+void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv,
+				    u8 type, u8 version);
+int batadv_tvlv_containers_process(struct batadv_priv *bat_priv,
+				   bool ogm_source,
+				   struct batadv_orig_node *orig_node,
+				   u8 *src, u8 *dst,
+				   void *tvlv_buff, u16 tvlv_buff_len);
+void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src,
+			      u8 *dst, u8 type, u8 version,
+			      void *tvlv_value, u16 tvlv_value_len);
+
+#endif /* _NET_BATMAN_ADV_TVLV_H_ */
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 6a577f4f8ba7..43db7b61f8eb 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -33,6 +33,7 @@
 #include <linux/types.h>
 #include <linux/wait.h>
 #include <linux/workqueue.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "packet.h"
 
@@ -707,6 +708,8 @@ struct batadv_priv_debug_log {
  * @list: list of available gateway nodes
  * @list_lock: lock protecting gw_list & curr_gw
  * @curr_gw: pointer to currently selected gateway node
+ * @mode: gateway operation: off, client or server (see batadv_gw_modes)
+ * @sel_class: gateway selection class (applies if gw_mode client)
  * @bandwidth_down: advertised uplink download bandwidth (if gw_mode server)
  * @bandwidth_up: advertised uplink upload bandwidth (if gw_mode server)
  * @reselect: bool indicating a gateway re-selection is in progress
@@ -715,6 +718,8 @@ struct batadv_priv_gw {
 	struct hlist_head list;
 	spinlock_t list_lock; /* protects gw_list & curr_gw */
 	struct batadv_gw_node __rcu *curr_gw;  /* rcu protected pointer */
+	atomic_t mode;
+	atomic_t sel_class;
 	atomic_t bandwidth_down;
 	atomic_t bandwidth_up;
 	atomic_t reselect;
@@ -751,14 +756,28 @@ struct batadv_priv_dat {
 
 #ifdef CONFIG_BATMAN_ADV_MCAST
 /**
+ * struct batadv_mcast_querier_state - IGMP/MLD querier state when bridged
+ * @exists: whether a querier exists in the mesh
+ * @shadowing: if a querier exists, whether it is potentially shadowing
+ *  multicast listeners (i.e. querier is behind our own bridge segment)
+ */
+struct batadv_mcast_querier_state {
+	bool exists;
+	bool shadowing;
+};
+
+/**
  * struct batadv_priv_mcast - per mesh interface mcast data
  * @mla_list: list of multicast addresses we are currently announcing via TT
  * @want_all_unsnoopables_list: a list of orig_nodes wanting all unsnoopable
  *  multicast traffic
  * @want_all_ipv4_list: a list of orig_nodes wanting all IPv4 multicast traffic
  * @want_all_ipv6_list: a list of orig_nodes wanting all IPv6 multicast traffic
+ * @querier_ipv4: the current state of an IGMP querier in the mesh
+ * @querier_ipv6: the current state of an MLD querier in the mesh
  * @flags: the flags we have last sent in our mcast tvlv
  * @enabled: whether the multicast tvlv is currently enabled
+ * @bridged: whether the soft interface has a bridge on top
  * @num_disabled: number of nodes that have no mcast tvlv
  * @num_want_all_unsnoopables: number of nodes wanting unsnoopable IP traffic
  * @num_want_all_ipv4: counter for items in want_all_ipv4_list
@@ -771,8 +790,11 @@ struct batadv_priv_mcast {
 	struct hlist_head want_all_unsnoopables_list;
 	struct hlist_head want_all_ipv4_list;
 	struct hlist_head want_all_ipv6_list;
+	struct batadv_mcast_querier_state querier_ipv4;
+	struct batadv_mcast_querier_state querier_ipv6;
 	u8 flags;
 	bool enabled;
+	bool bridged;
 	atomic_t num_disabled;
 	atomic_t num_want_all_unsnoopables;
 	atomic_t num_want_all_ipv4;
@@ -812,6 +834,111 @@ struct batadv_priv_nc {
 };
 
 /**
+ * struct batadv_tp_unacked - unacked packet meta-information
+ * @seqno: seqno of the unacked packet
+ * @len: length of the packet
+ * @list: list node for batadv_tp_vars::unacked_list
+ *
+ * This struct is supposed to represent a buffer unacked packet. However, since
+ * the purpose of the TP meter is to count the traffic only, there is no need to
+ * store the entire sk_buff, the starting offset and the length are enough
+ */
+struct batadv_tp_unacked {
+	u32 seqno;
+	u16 len;
+	struct list_head list;
+};
+
+/**
+ * enum batadv_tp_meter_role - Modus in tp meter session
+ * @BATADV_TP_RECEIVER: Initialized as receiver
+ * @BATADV_TP_SENDER: Initialized as sender
+ */
+enum batadv_tp_meter_role {
+	BATADV_TP_RECEIVER,
+	BATADV_TP_SENDER
+};
+
+/**
+ * struct batadv_tp_vars - tp meter private variables per session
+ * @list: list node for bat_priv::tp_list
+ * @timer: timer for ack (receiver) and retry (sender)
+ * @bat_priv: pointer to the mesh object
+ * @start_time: start time in jiffies
+ * @other_end: mac address of remote
+ * @role: receiver/sender modi
+ * @sending: sending binary semaphore: 1 if sending, 0 is not
+ * @reason: reason for a stopped session
+ * @finish_work: work item for the finishing procedure
+ * @test_length: test length in milliseconds
+ * @session: TP session identifier
+ * @icmp_uid: local ICMP "socket" index
+ * @dec_cwnd: decimal part of the cwnd used during linear growth
+ * @cwnd: current size of the congestion window
+ * @cwnd_lock: lock do protect @cwnd & @dec_cwnd
+ * @ss_threshold: Slow Start threshold. Once cwnd exceeds this value the
+ *  connection switches to the Congestion Avoidance state
+ * @last_acked: last acked byte
+ * @last_sent: last sent byte, not yet acked
+ * @tot_sent: amount of data sent/ACKed so far
+ * @dup_acks: duplicate ACKs counter
+ * @fast_recovery: true if in Fast Recovery mode
+ * @recover: last sent seqno when entering Fast Recovery
+ * @rto: sender timeout
+ * @srtt: smoothed RTT scaled by 2^3
+ * @rttvar: RTT variation scaled by 2^2
+ * @more_bytes: waiting queue anchor when waiting for more ack/retry timeout
+ * @prerandom_offset: offset inside the prerandom buffer
+ * @prerandom_lock: spinlock protecting access to prerandom_offset
+ * @last_recv: last in-order received packet
+ * @unacked_list: list of unacked packets (meta-info only)
+ * @unacked_lock: protect unacked_list
+ * @last_recv_time: time time (jiffies) a msg was received
+ * @refcount: number of context where the object is used
+ * @rcu: struct used for freeing in an RCU-safe manner
+ */
+struct batadv_tp_vars {
+	struct hlist_node list;
+	struct timer_list timer;
+	struct batadv_priv *bat_priv;
+	unsigned long start_time;
+	u8 other_end[ETH_ALEN];
+	enum batadv_tp_meter_role role;
+	atomic_t sending;
+	enum batadv_tp_meter_reason reason;
+	struct delayed_work finish_work;
+	u32 test_length;
+	u8 session[2];
+	u8 icmp_uid;
+
+	/* sender variables */
+	u16 dec_cwnd;
+	u32 cwnd;
+	spinlock_t cwnd_lock; /* Protects cwnd & dec_cwnd */
+	u32 ss_threshold;
+	atomic_t last_acked;
+	u32 last_sent;
+	atomic64_t tot_sent;
+	atomic_t dup_acks;
+	bool fast_recovery;
+	u32 recover;
+	u32 rto;
+	u32 srtt;
+	u32 rttvar;
+	wait_queue_head_t more_bytes;
+	u32 prerandom_offset;
+	spinlock_t prerandom_lock; /* Protects prerandom_offset */
+
+	/* receiver variables */
+	u32 last_recv;
+	struct list_head unacked_list;
+	spinlock_t unacked_lock; /* Protects unacked_list */
+	unsigned long last_recv_time;
+	struct kref refcount;
+	struct rcu_head rcu;
+};
+
+/**
  * struct batadv_softif_vlan - per VLAN attributes set
  * @bat_priv: pointer to the mesh object
  * @vid: VLAN identifier
@@ -865,8 +992,6 @@ struct batadv_priv_bat_v {
  *  enabled
  * @multicast_mode: Enable or disable multicast optimizations on this node's
  *  sender/originating side
- * @gw_mode: gateway operation: off, client or server (see batadv_gw_modes)
- * @gw_sel_class: gateway selection class (applies if gw_mode client)
  * @orig_interval: OGM broadcast interval in milliseconds
  * @hop_penalty: penalty which will be applied to an OGM's tq-field on every hop
  * @log_level: configured log level (see batadv_dbg_level)
@@ -881,14 +1006,17 @@ struct batadv_priv_bat_v {
  * @debug_dir: dentry for debugfs batman-adv subdirectory
  * @forw_bat_list: list of aggregated OGMs that will be forwarded
  * @forw_bcast_list: list of broadcast packets that will be rebroadcasted
+ * @tp_list: list of tp sessions
+ * @tp_num: number of currently active tp sessions
  * @orig_hash: hash table containing mesh participants (orig nodes)
  * @forw_bat_list_lock: lock protecting forw_bat_list
  * @forw_bcast_list_lock: lock protecting forw_bcast_list
+ * @tp_list_lock: spinlock protecting @tp_list
  * @orig_work: work queue callback item for orig node purging
  * @cleanup_work: work queue callback item for soft-interface deinit
  * @primary_if: one of the hard-interfaces assigned to this mesh interface
  *  becomes the primary interface
- * @bat_algo_ops: routing algorithm used by this mesh interface
+ * @algo_ops: routing algorithm used by this mesh interface
  * @softif_vlan_list: a list of softif_vlan structs, one per VLAN created on top
  *  of the mesh interface represented by this object
  * @softif_vlan_list_lock: lock protecting softif_vlan_list
@@ -922,8 +1050,6 @@ struct batadv_priv {
 #ifdef CONFIG_BATMAN_ADV_MCAST
 	atomic_t multicast_mode;
 #endif
-	atomic_t gw_mode;
-	atomic_t gw_sel_class;
 	atomic_t orig_interval;
 	atomic_t hop_penalty;
 #ifdef CONFIG_BATMAN_ADV_DEBUG
@@ -939,13 +1065,16 @@ struct batadv_priv {
 	struct dentry *debug_dir;
 	struct hlist_head forw_bat_list;
 	struct hlist_head forw_bcast_list;
+	struct hlist_head tp_list;
 	struct batadv_hashtable *orig_hash;
 	spinlock_t forw_bat_list_lock; /* protects forw_bat_list */
 	spinlock_t forw_bcast_list_lock; /* protects forw_bcast_list */
+	spinlock_t tp_list_lock; /* protects tp_list */
+	atomic_t tp_num;
 	struct delayed_work orig_work;
 	struct work_struct cleanup_work;
 	struct batadv_hard_iface __rcu *primary_if;  /* rcu protected pointer */
-	struct batadv_algo_ops *bat_algo_ops;
+	struct batadv_algo_ops *algo_ops;
 	struct hlist_head softif_vlan_list;
 	spinlock_t softif_vlan_list_lock; /* protects softif_vlan_list */
 #ifdef CONFIG_BATMAN_ADV_BLA
@@ -1137,11 +1266,13 @@ struct batadv_tt_change_node {
  * struct batadv_tt_req_node - data to keep track of the tt requests in flight
  * @addr: mac address address of the originator this request was sent to
  * @issued_at: timestamp used for purging stale tt requests
+ * @refcount: number of contexts the object is used by
  * @list: list node for batadv_priv_tt::req_list
  */
 struct batadv_tt_req_node {
 	u8 addr[ETH_ALEN];
 	unsigned long issued_at;
+	struct kref refcount;
 	struct hlist_node list;
 };
 
@@ -1259,66 +1390,77 @@ struct batadv_forw_packet {
 };
 
 /**
+ * struct batadv_algo_iface_ops - mesh algorithm callbacks (interface specific)
+ * @activate: start routing mechanisms when hard-interface is brought up
+ * @enable: init routing info when hard-interface is enabled
+ * @disable: de-init routing info when hard-interface is disabled
+ * @update_mac: (re-)init mac addresses of the protocol information
+ *  belonging to this hard-interface
+ * @primary_set: called when primary interface is selected / changed
+ */
+struct batadv_algo_iface_ops {
+	void (*activate)(struct batadv_hard_iface *hard_iface);
+	int (*enable)(struct batadv_hard_iface *hard_iface);
+	void (*disable)(struct batadv_hard_iface *hard_iface);
+	void (*update_mac)(struct batadv_hard_iface *hard_iface);
+	void (*primary_set)(struct batadv_hard_iface *hard_iface);
+};
+
+/**
+ * struct batadv_algo_neigh_ops - mesh algorithm callbacks (neighbour specific)
+ * @hardif_init: called on creation of single hop entry
+ * @cmp: compare the metrics of two neighbors for their respective outgoing
+ *  interfaces
+ * @is_similar_or_better: check if neigh1 is equally similar or better than
+ *  neigh2 for their respective outgoing interface from the metric prospective
+ * @print: print the single hop neighbor list (optional)
+ */
+struct batadv_algo_neigh_ops {
+	void (*hardif_init)(struct batadv_hardif_neigh_node *neigh);
+	int (*cmp)(struct batadv_neigh_node *neigh1,
+		   struct batadv_hard_iface *if_outgoing1,
+		   struct batadv_neigh_node *neigh2,
+		   struct batadv_hard_iface *if_outgoing2);
+	bool (*is_similar_or_better)(struct batadv_neigh_node *neigh1,
+				     struct batadv_hard_iface *if_outgoing1,
+				     struct batadv_neigh_node *neigh2,
+				     struct batadv_hard_iface *if_outgoing2);
+	void (*print)(struct batadv_priv *priv, struct seq_file *seq);
+};
+
+/**
+ * struct batadv_algo_orig_ops - mesh algorithm callbacks (originator specific)
+ * @free: free the resources allocated by the routing algorithm for an orig_node
+ *  object
+ * @add_if: ask the routing algorithm to apply the needed changes to the
+ *  orig_node due to a new hard-interface being added into the mesh
+ * @del_if: ask the routing algorithm to apply the needed changes to the
+ *  orig_node due to an hard-interface being removed from the mesh
+ * @print: print the originator table (optional)
+ */
+struct batadv_algo_orig_ops {
+	void (*free)(struct batadv_orig_node *orig_node);
+	int (*add_if)(struct batadv_orig_node *orig_node, int max_if_num);
+	int (*del_if)(struct batadv_orig_node *orig_node, int max_if_num,
+		      int del_if_num);
+	void (*print)(struct batadv_priv *priv, struct seq_file *seq,
+		      struct batadv_hard_iface *hard_iface);
+};
+
+/**
  * struct batadv_algo_ops - mesh algorithm callbacks
  * @list: list node for the batadv_algo_list
  * @name: name of the algorithm
- * @bat_iface_activate: start routing mechanisms when hard-interface is brought
- *  up
- * @bat_iface_enable: init routing info when hard-interface is enabled
- * @bat_iface_disable: de-init routing info when hard-interface is disabled
- * @bat_iface_update_mac: (re-)init mac addresses of the protocol information
- *  belonging to this hard-interface
- * @bat_primary_iface_set: called when primary interface is selected / changed
- * @bat_ogm_schedule: prepare a new outgoing OGM for the send queue
- * @bat_ogm_emit: send scheduled OGM
- * @bat_hardif_neigh_init: called on creation of single hop entry
- * @bat_neigh_cmp: compare the metrics of two neighbors for their respective
- *  outgoing interfaces
- * @bat_neigh_is_similar_or_better: check if neigh1 is equally similar or
- *  better than neigh2 for their respective outgoing interface from the metric
- *  prospective
- * @bat_neigh_print: print the single hop neighbor list (optional)
- * @bat_neigh_free: free the resources allocated by the routing algorithm for a
- *  neigh_node object
- * @bat_orig_print: print the originator table (optional)
- * @bat_orig_free: free the resources allocated by the routing algorithm for an
- *  orig_node object
- * @bat_orig_add_if: ask the routing algorithm to apply the needed changes to
- *  the orig_node due to a new hard-interface being added into the mesh
- * @bat_orig_del_if: ask the routing algorithm to apply the needed changes to
- *  the orig_node due to an hard-interface being removed from the mesh
+ * @iface: callbacks related to interface handling
+ * @neigh: callbacks related to neighbors handling
+ * @orig: callbacks related to originators handling
  */
 struct batadv_algo_ops {
 	struct hlist_node list;
 	char *name;
-	void (*bat_iface_activate)(struct batadv_hard_iface *hard_iface);
-	int (*bat_iface_enable)(struct batadv_hard_iface *hard_iface);
-	void (*bat_iface_disable)(struct batadv_hard_iface *hard_iface);
-	void (*bat_iface_update_mac)(struct batadv_hard_iface *hard_iface);
-	void (*bat_primary_iface_set)(struct batadv_hard_iface *hard_iface);
-	void (*bat_ogm_schedule)(struct batadv_hard_iface *hard_iface);
-	void (*bat_ogm_emit)(struct batadv_forw_packet *forw_packet);
-	/* neigh_node handling API */
-	void (*bat_hardif_neigh_init)(struct batadv_hardif_neigh_node *neigh);
-	int (*bat_neigh_cmp)(struct batadv_neigh_node *neigh1,
-			     struct batadv_hard_iface *if_outgoing1,
-			     struct batadv_neigh_node *neigh2,
-			     struct batadv_hard_iface *if_outgoing2);
-	bool (*bat_neigh_is_similar_or_better)
-		(struct batadv_neigh_node *neigh1,
-		 struct batadv_hard_iface *if_outgoing1,
-		 struct batadv_neigh_node *neigh2,
-		 struct batadv_hard_iface *if_outgoing2);
-	void (*bat_neigh_print)(struct batadv_priv *priv, struct seq_file *seq);
-	void (*bat_neigh_free)(struct batadv_neigh_node *neigh);
-	/* orig_node handling API */
-	void (*bat_orig_print)(struct batadv_priv *priv, struct seq_file *seq,
-			       struct batadv_hard_iface *hard_iface);
-	void (*bat_orig_free)(struct batadv_orig_node *orig_node);
-	int (*bat_orig_add_if)(struct batadv_orig_node *orig_node,
-			       int max_if_num);
-	int (*bat_orig_del_if)(struct batadv_orig_node *orig_node,
-			       int max_if_num, int del_if_num);
+	struct batadv_algo_iface_ops iface;
+	struct batadv_algo_neigh_ops neigh;
+	struct batadv_algo_orig_ops orig;
 };
 
 /**
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 3df7aefb7663..ece45e0683fd 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -215,6 +215,7 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 	struct sock *sk = sock->sk;
 	struct sk_buff *skb;
 	size_t copied;
+	size_t skblen;
 	int err;
 
 	BT_DBG("sock %p sk %p len %zu", sock, sk, len);
@@ -230,6 +231,7 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 		return err;
 	}
 
+	skblen = skb->len;
 	copied = skb->len;
 	if (len < copied) {
 		msg->msg_flags |= MSG_TRUNC;
@@ -248,6 +250,9 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 
 	skb_free_datagram(sk, skb);
 
+	if (msg->msg_flags & MSG_TRUNC)
+		copied = skblen;
+
 	return err ? : copied;
 }
 EXPORT_SYMBOL(bt_sock_recvmsg);
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index bf9f8a801a2e..3809617aa98d 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -625,7 +625,7 @@ struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src)
 	list_for_each_entry(d, &hci_dev_list, list) {
 		if (!test_bit(HCI_UP, &d->flags) ||
 		    hci_dev_test_flag(d, HCI_USER_CHANNEL) ||
-		    d->dev_type != HCI_BREDR)
+		    d->dev_type != HCI_PRIMARY)
 			continue;
 
 		/* Simple routing:
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 45a9fc68c677..98f6c3770736 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -260,14 +260,12 @@ static int hci_init1_req(struct hci_request *req, unsigned long opt)
 		hci_reset_req(req, 0);
 
 	switch (hdev->dev_type) {
-	case HCI_BREDR:
+	case HCI_PRIMARY:
 		bredr_init(req);
 		break;
-
 	case HCI_AMP:
 		amp_init1(req);
 		break;
-
 	default:
 		BT_ERR("Unknown device type %d", hdev->dev_type);
 		break;
@@ -791,11 +789,11 @@ static int __hci_init(struct hci_dev *hdev)
 	if (err < 0)
 		return err;
 
-	/* HCI_BREDR covers both single-mode LE, BR/EDR and dual-mode
+	/* HCI_PRIMARY covers both single-mode LE, BR/EDR and dual-mode
 	 * BR/EDR/LE type controllers. AMP controllers only need the
 	 * first two stages of init.
 	 */
-	if (hdev->dev_type != HCI_BREDR)
+	if (hdev->dev_type != HCI_PRIMARY)
 		return 0;
 
 	err = __hci_req_sync(hdev, hci_init3_req, 0, HCI_INIT_TIMEOUT, NULL);
@@ -1202,7 +1200,7 @@ int hci_inquiry(void __user *arg)
 		goto done;
 	}
 
-	if (hdev->dev_type != HCI_BREDR) {
+	if (hdev->dev_type != HCI_PRIMARY) {
 		err = -EOPNOTSUPP;
 		goto done;
 	}
@@ -1307,7 +1305,7 @@ static int hci_dev_do_open(struct hci_dev *hdev)
 		 * since AMP controllers do not have an address.
 		 */
 		if (!hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
-		    hdev->dev_type == HCI_BREDR &&
+		    hdev->dev_type == HCI_PRIMARY &&
 		    !bacmp(&hdev->bdaddr, BDADDR_ANY) &&
 		    !bacmp(&hdev->static_addr, BDADDR_ANY)) {
 			ret = -EADDRNOTAVAIL;
@@ -1402,7 +1400,7 @@ static int hci_dev_do_open(struct hci_dev *hdev)
 		    !hci_dev_test_flag(hdev, HCI_UNCONFIGURED) &&
 		    !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
 		    hci_dev_test_flag(hdev, HCI_MGMT) &&
-		    hdev->dev_type == HCI_BREDR) {
+		    hdev->dev_type == HCI_PRIMARY) {
 			ret = __hci_req_hci_power_on(hdev);
 			mgmt_power_on(hdev, ret);
 		}
@@ -1563,7 +1561,7 @@ int hci_dev_do_close(struct hci_dev *hdev)
 
 	auto_off = hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF);
 
-	if (!auto_off && hdev->dev_type == HCI_BREDR &&
+	if (!auto_off && hdev->dev_type == HCI_PRIMARY &&
 	    hci_dev_test_flag(hdev, HCI_MGMT))
 		__mgmt_power_off(hdev);
 
@@ -1802,7 +1800,7 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
 		goto done;
 	}
 
-	if (hdev->dev_type != HCI_BREDR) {
+	if (hdev->dev_type != HCI_PRIMARY) {
 		err = -EOPNOTSUPP;
 		goto done;
 	}
@@ -2043,7 +2041,7 @@ static void hci_power_on(struct work_struct *work)
 	 */
 	if (hci_dev_test_flag(hdev, HCI_RFKILLED) ||
 	    hci_dev_test_flag(hdev, HCI_UNCONFIGURED) ||
-	    (hdev->dev_type == HCI_BREDR &&
+	    (hdev->dev_type == HCI_PRIMARY &&
 	     !bacmp(&hdev->bdaddr, BDADDR_ANY) &&
 	     !bacmp(&hdev->static_addr, BDADDR_ANY))) {
 		hci_dev_clear_flag(hdev, HCI_AUTO_OFF);
@@ -3030,7 +3028,7 @@ int hci_register_dev(struct hci_dev *hdev)
 	 * so the index can be used as the AMP controller ID.
 	 */
 	switch (hdev->dev_type) {
-	case HCI_BREDR:
+	case HCI_PRIMARY:
 		id = ida_simple_get(&hci_index_ida, 0, 0, GFP_KERNEL);
 		break;
 	case HCI_AMP:
@@ -3090,7 +3088,7 @@ int hci_register_dev(struct hci_dev *hdev)
 	hci_dev_set_flag(hdev, HCI_SETUP);
 	hci_dev_set_flag(hdev, HCI_AUTO_OFF);
 
-	if (hdev->dev_type == HCI_BREDR) {
+	if (hdev->dev_type == HCI_PRIMARY) {
 		/* Assume BR/EDR support until proven otherwise (such as
 		 * through reading supported features during init.
 		 */
@@ -3415,7 +3413,7 @@ static void hci_queue_acl(struct hci_chan *chan, struct sk_buff_head *queue,
 	hci_skb_pkt_type(skb) = HCI_ACLDATA_PKT;
 
 	switch (hdev->dev_type) {
-	case HCI_BREDR:
+	case HCI_PRIMARY:
 		hci_add_acl_hdr(skb, conn->handle, flags);
 		break;
 	case HCI_AMP:
@@ -3826,7 +3824,7 @@ static void hci_sched_acl(struct hci_dev *hdev)
 	BT_DBG("%s", hdev->name);
 
 	/* No ACL link over BR/EDR controller */
-	if (!hci_conn_num(hdev, ACL_LINK) && hdev->dev_type == HCI_BREDR)
+	if (!hci_conn_num(hdev, ACL_LINK) && hdev->dev_type == HCI_PRIMARY)
 		return;
 
 	/* No AMP link over AMP controller */
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index d4b3dd5413be..e17aacbc5630 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -2332,7 +2332,7 @@ static u8 hci_to_mgmt_reason(u8 err)
 static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	struct hci_ev_disconn_complete *ev = (void *) skb->data;
-	u8 reason = hci_to_mgmt_reason(ev->reason);
+	u8 reason;
 	struct hci_conn_params *params;
 	struct hci_conn *conn;
 	bool mgmt_connected;
@@ -2355,6 +2355,12 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 	conn->state = BT_CLOSED;
 
 	mgmt_connected = test_and_clear_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags);
+
+	if (test_bit(HCI_CONN_AUTH_FAILURE, &conn->flags))
+		reason = MGMT_DEV_DISCONN_AUTH_FAILURE;
+	else
+		reason = hci_to_mgmt_reason(ev->reason);
+
 	mgmt_device_disconnected(hdev, &conn->dst, conn->type, conn->dst_type,
 				reason, mgmt_connected);
 
@@ -2421,6 +2427,8 @@ static void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		goto unlock;
 
 	if (!ev->status) {
+		clear_bit(HCI_CONN_AUTH_FAILURE, &conn->flags);
+
 		if (!hci_conn_ssp_enabled(conn) &&
 		    test_bit(HCI_CONN_REAUTH_PEND, &conn->flags)) {
 			BT_INFO("re-auth of legacy device is not possible.");
@@ -2429,6 +2437,9 @@ static void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 			conn->sec_level = conn->pending_sec_level;
 		}
 	} else {
+		if (ev->status == HCI_ERROR_PIN_OR_KEY_MISSING)
+			set_bit(HCI_CONN_AUTH_FAILURE, &conn->flags);
+
 		mgmt_auth_failed(conn, ev->status);
 	}
 
@@ -2613,6 +2624,9 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
 	clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags);
 
 	if (ev->status && conn->state == BT_CONNECTED) {
+		if (ev->status == HCI_ERROR_PIN_OR_KEY_MISSING)
+			set_bit(HCI_CONN_AUTH_FAILURE, &conn->flags);
+
 		hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE);
 		hci_conn_drop(conn);
 		goto unlock;
@@ -3249,7 +3263,7 @@ static struct hci_conn *__hci_conn_lookup_handle(struct hci_dev *hdev,
 	struct hci_chan *chan;
 
 	switch (hdev->dev_type) {
-	case HCI_BREDR:
+	case HCI_PRIMARY:
 		return hci_conn_hash_lookup_handle(hdev, handle);
 	case HCI_AMP:
 		chan = hci_chan_lookup_handle(hdev, handle);
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 1298d723c0e0..6ef8a01a9ad4 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -676,7 +676,7 @@ static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd,
 	if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED))
 		return -EOPNOTSUPP;
 
-	if (hdev->dev_type != HCI_BREDR)
+	if (hdev->dev_type != HCI_PRIMARY)
 		return -EOPNOTSUPP;
 
 	switch (cmd) {
@@ -1048,6 +1048,7 @@ static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg,
 	struct sock *sk = sock->sk;
 	struct sk_buff *skb;
 	int copied, err;
+	unsigned int skblen;
 
 	BT_DBG("sock %p, sk %p", sock, sk);
 
@@ -1064,6 +1065,7 @@ static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg,
 	if (!skb)
 		return err;
 
+	skblen = skb->len;
 	copied = skb->len;
 	if (len < copied) {
 		msg->msg_flags |= MSG_TRUNC;
@@ -1089,6 +1091,9 @@ static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg,
 
 	skb_free_datagram(sk, skb);
 
+	if (msg->msg_flags & MSG_TRUNC)
+		copied = skblen;
+
 	return err ? : copied;
 }
 
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 555982a78a58..ca7a35ebaefb 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -7,50 +7,6 @@
 
 static struct class *bt_class;
 
-static inline char *link_typetostr(int type)
-{
-	switch (type) {
-	case ACL_LINK:
-		return "ACL";
-	case SCO_LINK:
-		return "SCO";
-	case ESCO_LINK:
-		return "eSCO";
-	case LE_LINK:
-		return "LE";
-	default:
-		return "UNKNOWN";
-	}
-}
-
-static ssize_t show_link_type(struct device *dev,
-			      struct device_attribute *attr, char *buf)
-{
-	struct hci_conn *conn = to_hci_conn(dev);
-	return sprintf(buf, "%s\n", link_typetostr(conn->type));
-}
-
-static ssize_t show_link_address(struct device *dev,
-				 struct device_attribute *attr, char *buf)
-{
-	struct hci_conn *conn = to_hci_conn(dev);
-	return sprintf(buf, "%pMR\n", &conn->dst);
-}
-
-#define LINK_ATTR(_name, _mode, _show, _store) \
-struct device_attribute link_attr_##_name = __ATTR(_name, _mode, _show, _store)
-
-static LINK_ATTR(type, S_IRUGO, show_link_type, NULL);
-static LINK_ATTR(address, S_IRUGO, show_link_address, NULL);
-
-static struct attribute *bt_link_attrs[] = {
-	&link_attr_type.attr,
-	&link_attr_address.attr,
-	NULL
-};
-
-ATTRIBUTE_GROUPS(bt_link);
-
 static void bt_link_release(struct device *dev)
 {
 	struct hci_conn *conn = to_hci_conn(dev);
@@ -59,7 +15,6 @@ static void bt_link_release(struct device *dev)
 
 static struct device_type bt_link = {
 	.name    = "link",
-	.groups  = bt_link_groups,
 	.release = bt_link_release,
 };
 
@@ -124,59 +79,6 @@ void hci_conn_del_sysfs(struct hci_conn *conn)
 	hci_dev_put(hdev);
 }
 
-static inline char *host_typetostr(int type)
-{
-	switch (type) {
-	case HCI_BREDR:
-		return "BR/EDR";
-	case HCI_AMP:
-		return "AMP";
-	default:
-		return "UNKNOWN";
-	}
-}
-
-static ssize_t show_type(struct device *dev,
-			 struct device_attribute *attr, char *buf)
-{
-	struct hci_dev *hdev = to_hci_dev(dev);
-	return sprintf(buf, "%s\n", host_typetostr(hdev->dev_type));
-}
-
-static ssize_t show_name(struct device *dev,
-			 struct device_attribute *attr, char *buf)
-{
-	struct hci_dev *hdev = to_hci_dev(dev);
-	char name[HCI_MAX_NAME_LENGTH + 1];
-	int i;
-
-	for (i = 0; i < HCI_MAX_NAME_LENGTH; i++)
-		name[i] = hdev->dev_name[i];
-
-	name[HCI_MAX_NAME_LENGTH] = '\0';
-	return sprintf(buf, "%s\n", name);
-}
-
-static ssize_t show_address(struct device *dev,
-			    struct device_attribute *attr, char *buf)
-{
-	struct hci_dev *hdev = to_hci_dev(dev);
-	return sprintf(buf, "%pMR\n", &hdev->bdaddr);
-}
-
-static DEVICE_ATTR(type, S_IRUGO, show_type, NULL);
-static DEVICE_ATTR(name, S_IRUGO, show_name, NULL);
-static DEVICE_ATTR(address, S_IRUGO, show_address, NULL);
-
-static struct attribute *bt_host_attrs[] = {
-	&dev_attr_type.attr,
-	&dev_attr_name.attr,
-	&dev_attr_address.attr,
-	NULL
-};
-
-ATTRIBUTE_GROUPS(bt_host);
-
 static void bt_host_release(struct device *dev)
 {
 	struct hci_dev *hdev = to_hci_dev(dev);
@@ -186,7 +88,6 @@ static void bt_host_release(struct device *dev)
 
 static struct device_type bt_host = {
 	.name    = "host",
-	.groups  = bt_host_groups,
 	.release = bt_host_release,
 };
 
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index eb4f5f24cbe3..54ceb1f2cc9a 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -7468,7 +7468,7 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
 	int len;
 
 	/* For AMP controller do not create l2cap conn */
-	if (!conn && hcon->hdev->dev_type != HCI_BREDR)
+	if (!conn && hcon->hdev->dev_type != HCI_PRIMARY)
 		goto drop;
 
 	if (!conn)
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 9e4b931588cf..7639290b6de3 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -38,7 +38,7 @@
 #include "mgmt_util.h"
 
 #define MGMT_VERSION	1
-#define MGMT_REVISION	12
+#define MGMT_REVISION	13
 
 static const u16 mgmt_commands[] = {
 	MGMT_OP_READ_INDEX_LIST,
@@ -359,7 +359,7 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data,
 
 	count = 0;
 	list_for_each_entry(d, &hci_dev_list, list) {
-		if (d->dev_type == HCI_BREDR &&
+		if (d->dev_type == HCI_PRIMARY &&
 		    !hci_dev_test_flag(d, HCI_UNCONFIGURED))
 			count++;
 	}
@@ -384,7 +384,7 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data,
 		if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks))
 			continue;
 
-		if (d->dev_type == HCI_BREDR &&
+		if (d->dev_type == HCI_PRIMARY &&
 		    !hci_dev_test_flag(d, HCI_UNCONFIGURED)) {
 			rp->index[count++] = cpu_to_le16(d->id);
 			BT_DBG("Added hci%u", d->id);
@@ -419,7 +419,7 @@ static int read_unconf_index_list(struct sock *sk, struct hci_dev *hdev,
 
 	count = 0;
 	list_for_each_entry(d, &hci_dev_list, list) {
-		if (d->dev_type == HCI_BREDR &&
+		if (d->dev_type == HCI_PRIMARY &&
 		    hci_dev_test_flag(d, HCI_UNCONFIGURED))
 			count++;
 	}
@@ -444,7 +444,7 @@ static int read_unconf_index_list(struct sock *sk, struct hci_dev *hdev,
 		if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks))
 			continue;
 
-		if (d->dev_type == HCI_BREDR &&
+		if (d->dev_type == HCI_PRIMARY &&
 		    hci_dev_test_flag(d, HCI_UNCONFIGURED)) {
 			rp->index[count++] = cpu_to_le16(d->id);
 			BT_DBG("Added hci%u", d->id);
@@ -479,7 +479,7 @@ static int read_ext_index_list(struct sock *sk, struct hci_dev *hdev,
 
 	count = 0;
 	list_for_each_entry(d, &hci_dev_list, list) {
-		if (d->dev_type == HCI_BREDR || d->dev_type == HCI_AMP)
+		if (d->dev_type == HCI_PRIMARY || d->dev_type == HCI_AMP)
 			count++;
 	}
 
@@ -503,7 +503,7 @@ static int read_ext_index_list(struct sock *sk, struct hci_dev *hdev,
 		if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks))
 			continue;
 
-		if (d->dev_type == HCI_BREDR) {
+		if (d->dev_type == HCI_PRIMARY) {
 			if (hci_dev_test_flag(d, HCI_UNCONFIGURED))
 				rp->entry[count].type = 0x01;
 			else
@@ -6366,7 +6366,7 @@ void mgmt_index_added(struct hci_dev *hdev)
 		return;
 
 	switch (hdev->dev_type) {
-	case HCI_BREDR:
+	case HCI_PRIMARY:
 		if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
 			mgmt_index_event(MGMT_EV_UNCONF_INDEX_ADDED, hdev,
 					 NULL, 0, HCI_MGMT_UNCONF_INDEX_EVENTS);
@@ -6399,7 +6399,7 @@ void mgmt_index_removed(struct hci_dev *hdev)
 		return;
 
 	switch (hdev->dev_type) {
-	case HCI_BREDR:
+	case HCI_PRIMARY:
 		mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status);
 
 		if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 50976a6481f3..4c1a16a96ae5 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -22,9 +22,9 @@
 
 #include <linux/debugfs.h>
 #include <linux/scatterlist.h>
+#include <linux/crypto.h>
 #include <crypto/b128ops.h>
 #include <crypto/hash.h>
-#include <crypto/skcipher.h>
 
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
@@ -88,7 +88,7 @@ struct smp_dev {
 	u8			min_key_size;
 	u8			max_key_size;
 
-	struct crypto_skcipher	*tfm_aes;
+	struct crypto_cipher	*tfm_aes;
 	struct crypto_shash	*tfm_cmac;
 };
 
@@ -127,7 +127,7 @@ struct smp_chan {
 	u8			dhkey[32];
 	u8			mackey[16];
 
-	struct crypto_skcipher	*tfm_aes;
+	struct crypto_cipher	*tfm_aes;
 	struct crypto_shash	*tfm_cmac;
 };
 
@@ -361,10 +361,8 @@ static int smp_h6(struct crypto_shash *tfm_cmac, const u8 w[16],
  * s1 and ah.
  */
 
-static int smp_e(struct crypto_skcipher *tfm, const u8 *k, u8 *r)
+static int smp_e(struct crypto_cipher *tfm, const u8 *k, u8 *r)
 {
-	SKCIPHER_REQUEST_ON_STACK(req, tfm);
-	struct scatterlist sg;
 	uint8_t tmp[16], data[16];
 	int err;
 
@@ -378,7 +376,7 @@ static int smp_e(struct crypto_skcipher *tfm, const u8 *k, u8 *r)
 	/* The most significant octet of key corresponds to k[0] */
 	swap_buf(k, tmp, 16);
 
-	err = crypto_skcipher_setkey(tfm, tmp, 16);
+	err = crypto_cipher_setkey(tfm, tmp, 16);
 	if (err) {
 		BT_ERR("cipher setkey failed: %d", err);
 		return err;
@@ -387,16 +385,7 @@ static int smp_e(struct crypto_skcipher *tfm, const u8 *k, u8 *r)
 	/* Most significant octet of plaintextData corresponds to data[0] */
 	swap_buf(r, data, 16);
 
-	sg_init_one(&sg, data, 16);
-
-	skcipher_request_set_tfm(req, tfm);
-	skcipher_request_set_callback(req, 0, NULL, NULL);
-	skcipher_request_set_crypt(req, &sg, &sg, 16, NULL);
-
-	err = crypto_skcipher_encrypt(req);
-	skcipher_request_zero(req);
-	if (err)
-		BT_ERR("Encrypt data error %d", err);
+	crypto_cipher_encrypt_one(tfm, data, data);
 
 	/* Most significant octet of encryptedData corresponds to data[0] */
 	swap_buf(data, r, 16);
@@ -406,7 +395,7 @@ static int smp_e(struct crypto_skcipher *tfm, const u8 *k, u8 *r)
 	return err;
 }
 
-static int smp_c1(struct crypto_skcipher *tfm_aes, const u8 k[16],
+static int smp_c1(struct crypto_cipher *tfm_aes, const u8 k[16],
 		  const u8 r[16], const u8 preq[7], const u8 pres[7], u8 _iat,
 		  const bdaddr_t *ia, u8 _rat, const bdaddr_t *ra, u8 res[16])
 {
@@ -455,7 +444,7 @@ static int smp_c1(struct crypto_skcipher *tfm_aes, const u8 k[16],
 	return err;
 }
 
-static int smp_s1(struct crypto_skcipher *tfm_aes, const u8 k[16],
+static int smp_s1(struct crypto_cipher *tfm_aes, const u8 k[16],
 		  const u8 r1[16], const u8 r2[16], u8 _r[16])
 {
 	int err;
@@ -471,7 +460,7 @@ static int smp_s1(struct crypto_skcipher *tfm_aes, const u8 k[16],
 	return err;
 }
 
-static int smp_ah(struct crypto_skcipher *tfm, const u8 irk[16],
+static int smp_ah(struct crypto_cipher *tfm, const u8 irk[16],
 		  const u8 r[3], u8 res[3])
 {
 	u8 _res[16];
@@ -759,7 +748,7 @@ static void smp_chan_destroy(struct l2cap_conn *conn)
 	kzfree(smp->slave_csrk);
 	kzfree(smp->link_key);
 
-	crypto_free_skcipher(smp->tfm_aes);
+	crypto_free_cipher(smp->tfm_aes);
 	crypto_free_shash(smp->tfm_cmac);
 
 	/* Ensure that we don't leave any debug key around if debug key
@@ -1359,9 +1348,9 @@ static struct smp_chan *smp_chan_create(struct l2cap_conn *conn)
 	if (!smp)
 		return NULL;
 
-	smp->tfm_aes = crypto_alloc_skcipher("ecb(aes)", 0, CRYPTO_ALG_ASYNC);
+	smp->tfm_aes = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
 	if (IS_ERR(smp->tfm_aes)) {
-		BT_ERR("Unable to create ECB crypto context");
+		BT_ERR("Unable to create AES crypto context");
 		kzfree(smp);
 		return NULL;
 	}
@@ -1369,7 +1358,7 @@ static struct smp_chan *smp_chan_create(struct l2cap_conn *conn)
 	smp->tfm_cmac = crypto_alloc_shash("cmac(aes)", 0, 0);
 	if (IS_ERR(smp->tfm_cmac)) {
 		BT_ERR("Unable to create CMAC crypto context");
-		crypto_free_skcipher(smp->tfm_aes);
+		crypto_free_cipher(smp->tfm_aes);
 		kzfree(smp);
 		return NULL;
 	}
@@ -3120,7 +3109,7 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid)
 {
 	struct l2cap_chan *chan;
 	struct smp_dev *smp;
-	struct crypto_skcipher *tfm_aes;
+	struct crypto_cipher *tfm_aes;
 	struct crypto_shash *tfm_cmac;
 
 	if (cid == L2CAP_CID_SMP_BREDR) {
@@ -3132,9 +3121,9 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid)
 	if (!smp)
 		return ERR_PTR(-ENOMEM);
 
-	tfm_aes = crypto_alloc_skcipher("ecb(aes)", 0, CRYPTO_ALG_ASYNC);
+	tfm_aes = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
 	if (IS_ERR(tfm_aes)) {
-		BT_ERR("Unable to create ECB crypto context");
+		BT_ERR("Unable to create AES crypto context");
 		kzfree(smp);
 		return ERR_CAST(tfm_aes);
 	}
@@ -3142,7 +3131,7 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid)
 	tfm_cmac = crypto_alloc_shash("cmac(aes)", 0, 0);
 	if (IS_ERR(tfm_cmac)) {
 		BT_ERR("Unable to create CMAC crypto context");
-		crypto_free_skcipher(tfm_aes);
+		crypto_free_cipher(tfm_aes);
 		kzfree(smp);
 		return ERR_CAST(tfm_cmac);
 	}
@@ -3156,7 +3145,7 @@ create_chan:
 	chan = l2cap_chan_create();
 	if (!chan) {
 		if (smp) {
-			crypto_free_skcipher(smp->tfm_aes);
+			crypto_free_cipher(smp->tfm_aes);
 			crypto_free_shash(smp->tfm_cmac);
 			kzfree(smp);
 		}
@@ -3203,7 +3192,7 @@ static void smp_del_chan(struct l2cap_chan *chan)
 	smp = chan->data;
 	if (smp) {
 		chan->data = NULL;
-		crypto_free_skcipher(smp->tfm_aes);
+		crypto_free_cipher(smp->tfm_aes);
 		crypto_free_shash(smp->tfm_cmac);
 		kzfree(smp);
 	}
@@ -3440,7 +3429,7 @@ void smp_unregister(struct hci_dev *hdev)
 
 #if IS_ENABLED(CONFIG_BT_SELFTEST_SMP)
 
-static int __init test_ah(struct crypto_skcipher *tfm_aes)
+static int __init test_ah(struct crypto_cipher *tfm_aes)
 {
 	const u8 irk[16] = {
 			0x9b, 0x7d, 0x39, 0x0a, 0xa6, 0x10, 0x10, 0x34,
@@ -3460,7 +3449,7 @@ static int __init test_ah(struct crypto_skcipher *tfm_aes)
 	return 0;
 }
 
-static int __init test_c1(struct crypto_skcipher *tfm_aes)
+static int __init test_c1(struct crypto_cipher *tfm_aes)
 {
 	const u8 k[16] = {
 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -3490,7 +3479,7 @@ static int __init test_c1(struct crypto_skcipher *tfm_aes)
 	return 0;
 }
 
-static int __init test_s1(struct crypto_skcipher *tfm_aes)
+static int __init test_s1(struct crypto_cipher *tfm_aes)
 {
 	const u8 k[16] = {
 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -3686,7 +3675,7 @@ static const struct file_operations test_smp_fops = {
 	.llseek		= default_llseek,
 };
 
-static int __init run_selftests(struct crypto_skcipher *tfm_aes,
+static int __init run_selftests(struct crypto_cipher *tfm_aes,
 				struct crypto_shash *tfm_cmac)
 {
 	ktime_t calltime, delta, rettime;
@@ -3764,27 +3753,27 @@ done:
 
 int __init bt_selftest_smp(void)
 {
-	struct crypto_skcipher *tfm_aes;
+	struct crypto_cipher *tfm_aes;
 	struct crypto_shash *tfm_cmac;
 	int err;
 
-	tfm_aes = crypto_alloc_skcipher("ecb(aes)", 0, CRYPTO_ALG_ASYNC);
+	tfm_aes = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
 	if (IS_ERR(tfm_aes)) {
-		BT_ERR("Unable to create ECB crypto context");
+		BT_ERR("Unable to create AES crypto context");
 		return PTR_ERR(tfm_aes);
 	}
 
 	tfm_cmac = crypto_alloc_shash("cmac(aes)", 0, CRYPTO_ALG_ASYNC);
 	if (IS_ERR(tfm_cmac)) {
 		BT_ERR("Unable to create CMAC crypto context");
-		crypto_free_skcipher(tfm_aes);
+		crypto_free_cipher(tfm_aes);
 		return PTR_ERR(tfm_cmac);
 	}
 
 	err = run_selftests(tfm_aes, tfm_cmac);
 
 	crypto_free_shash(tfm_cmac);
-	crypto_free_skcipher(tfm_aes);
+	crypto_free_cipher(tfm_aes);
 
 	return err;
 }
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 2c8095a5d824..8eecd0ec22f2 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -104,8 +104,16 @@ static int br_dev_init(struct net_device *dev)
 		return -ENOMEM;
 
 	err = br_vlan_init(br);
-	if (err)
+	if (err) {
 		free_percpu(br->stats);
+		return err;
+	}
+
+	err = br_multicast_init_stats(br);
+	if (err) {
+		free_percpu(br->stats);
+		br_vlan_flush(br);
+	}
 	br_set_lockdep_class(dev);
 
 	return err;
@@ -341,6 +349,8 @@ static const struct net_device_ops br_netdev_ops = {
 	.ndo_add_slave		 = br_add_slave,
 	.ndo_del_slave		 = br_del_slave,
 	.ndo_fix_features        = br_fix_features,
+	.ndo_neigh_construct	 = netdev_default_l2upper_neigh_construct,
+	.ndo_neigh_destroy	 = netdev_default_l2upper_neigh_destroy,
 	.ndo_fdb_add		 = br_fdb_add,
 	.ndo_fdb_del		 = br_fdb_delete,
 	.ndo_fdb_dump		 = br_fdb_dump,
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index f47759f05b6d..d610644368b9 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -198,8 +198,9 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb,
 					   struct sk_buff *skb),
 		     bool unicast)
 {
-	struct net_bridge_port *p;
+	u8 igmp_type = br_multicast_igmp_type(skb);
 	struct net_bridge_port *prev;
+	struct net_bridge_port *p;
 
 	prev = NULL;
 
@@ -218,6 +219,9 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb,
 		prev = maybe_deliver(prev, p, skb, __packet_hook);
 		if (IS_ERR(prev))
 			goto out;
+		if (prev == p)
+			br_multicast_count(p->br, p, skb, igmp_type,
+					   BR_MCAST_DIR_TX);
 	}
 
 	if (!prev)
@@ -257,6 +261,7 @@ static void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
 					struct sk_buff *skb))
 {
 	struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev;
+	u8 igmp_type = br_multicast_igmp_type(skb);
 	struct net_bridge *br = netdev_priv(dev);
 	struct net_bridge_port *prev = NULL;
 	struct net_bridge_port_group *p;
@@ -277,6 +282,9 @@ static void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
 		prev = maybe_deliver(prev, port, skb, __packet_hook);
 		if (IS_ERR(prev))
 			goto out;
+		if (prev == port)
+			br_multicast_count(port->br, port, skb, igmp_type,
+					   BR_MCAST_DIR_TX);
 
 		if ((unsigned long)lport >= (unsigned long)port)
 			p = rcu_dereference(p->next);
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 8217aecf025b..f2fede05d32c 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -345,8 +345,8 @@ static int find_portno(struct net_bridge *br)
 static struct net_bridge_port *new_nbp(struct net_bridge *br,
 				       struct net_device *dev)
 {
-	int index;
 	struct net_bridge_port *p;
+	int index, err;
 
 	index = find_portno(br);
 	if (index < 0)
@@ -366,7 +366,12 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br,
 	br_init_port(p);
 	br_set_state(p, BR_STATE_DISABLED);
 	br_stp_port_timer_init(p);
-	br_multicast_add_port(p);
+	err = br_multicast_add_port(p);
+	if (err) {
+		dev_put(dev);
+		kfree(p);
+		p = ERR_PTR(err);
+	}
 
 	return p;
 }
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 160797722228..a7817e6f306f 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -60,6 +60,9 @@ static int br_pass_frame_up(struct sk_buff *skb)
 	skb = br_handle_vlan(br, vg, skb);
 	if (!skb)
 		return NET_RX_DROP;
+	/* update the multicast stats if the packet is IGMP/MLD */
+	br_multicast_count(br, NULL, skb, br_multicast_igmp_type(skb),
+			   BR_MCAST_DIR_TX);
 
 	return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN,
 		       dev_net(indev), NULL, skb, indev, NULL,
@@ -213,8 +216,7 @@ drop:
 }
 EXPORT_SYMBOL_GPL(br_handle_frame_finish);
 
-/* note: already called with rcu_read_lock */
-static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+static void __br_handle_local_finish(struct sk_buff *skb)
 {
 	struct net_bridge_port *p = br_port_get_rcu(skb->dev);
 	u16 vid = 0;
@@ -222,6 +224,14 @@ static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_bu
 	/* check if vlan is allowed, to avoid spoofing */
 	if (p->flags & BR_LEARNING && br_should_learn(p, skb, &vid))
 		br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid, false);
+}
+
+/* note: already called with rcu_read_lock */
+static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+	struct net_bridge_port *p = br_port_get_rcu(skb->dev);
+
+	__br_handle_local_finish(skb);
 
 	BR_INPUT_SKB_CB(skb)->brdev = p->br->dev;
 	br_pass_frame_up(skb);
@@ -274,7 +284,9 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
 			if (p->br->stp_enabled == BR_NO_STP ||
 			    fwd_mask & (1u << dest[5]))
 				goto forward;
-			break;
+			*pskb = skb;
+			__br_handle_local_finish(skb);
+			return RX_HANDLER_PASS;
 
 		case 0x01:	/* IEEE MAC (Pause) */
 			goto drop;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 6852f3c7009c..a5423a1eec05 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -361,7 +361,8 @@ out:
 }
 
 static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
-						    __be32 group)
+						    __be32 group,
+						    u8 *igmp_type)
 {
 	struct sk_buff *skb;
 	struct igmphdr *ih;
@@ -411,6 +412,7 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
 
 	skb_set_transport_header(skb, skb->len);
 	ih = igmp_hdr(skb);
+	*igmp_type = IGMP_HOST_MEMBERSHIP_QUERY;
 	ih->type = IGMP_HOST_MEMBERSHIP_QUERY;
 	ih->code = (group ? br->multicast_last_member_interval :
 			    br->multicast_query_response_interval) /
@@ -428,7 +430,8 @@ out:
 
 #if IS_ENABLED(CONFIG_IPV6)
 static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
-						    const struct in6_addr *group)
+						    const struct in6_addr *grp,
+						    u8 *igmp_type)
 {
 	struct sk_buff *skb;
 	struct ipv6hdr *ip6h;
@@ -464,8 +467,11 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
 	if (ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0,
 			       &ip6h->saddr)) {
 		kfree_skb(skb);
+		br->has_ipv6_addr = 0;
 		return NULL;
 	}
+
+	br->has_ipv6_addr = 1;
 	ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest);
 
 	hopopt = (u8 *)(ip6h + 1);
@@ -484,16 +490,17 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
 	skb_set_transport_header(skb, skb->len);
 	mldq = (struct mld_msg *) icmp6_hdr(skb);
 
-	interval = ipv6_addr_any(group) ?
+	interval = ipv6_addr_any(grp) ?
 			br->multicast_query_response_interval :
 			br->multicast_last_member_interval;
 
+	*igmp_type = ICMPV6_MGM_QUERY;
 	mldq->mld_type = ICMPV6_MGM_QUERY;
 	mldq->mld_code = 0;
 	mldq->mld_cksum = 0;
 	mldq->mld_maxdelay = htons((u16)jiffies_to_msecs(interval));
 	mldq->mld_reserved = 0;
-	mldq->mld_mca = *group;
+	mldq->mld_mca = *grp;
 
 	/* checksum */
 	mldq->mld_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
@@ -510,14 +517,16 @@ out:
 #endif
 
 static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br,
-						struct br_ip *addr)
+						struct br_ip *addr,
+						u8 *igmp_type)
 {
 	switch (addr->proto) {
 	case htons(ETH_P_IP):
-		return br_ip4_multicast_alloc_query(br, addr->u.ip4);
+		return br_ip4_multicast_alloc_query(br, addr->u.ip4, igmp_type);
 #if IS_ENABLED(CONFIG_IPV6)
 	case htons(ETH_P_IPV6):
-		return br_ip6_multicast_alloc_query(br, &addr->u.ip6);
+		return br_ip6_multicast_alloc_query(br, &addr->u.ip6,
+						    igmp_type);
 #endif
 	}
 	return NULL;
@@ -826,18 +835,23 @@ static void __br_multicast_send_query(struct net_bridge *br,
 				      struct br_ip *ip)
 {
 	struct sk_buff *skb;
+	u8 igmp_type;
 
-	skb = br_multicast_alloc_query(br, ip);
+	skb = br_multicast_alloc_query(br, ip, &igmp_type);
 	if (!skb)
 		return;
 
 	if (port) {
 		skb->dev = port->dev;
+		br_multicast_count(br, port, skb, igmp_type,
+				   BR_MCAST_DIR_TX);
 		NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT,
 			dev_net(port->dev), NULL, skb, NULL, skb->dev,
 			br_dev_queue_push_xmit);
 	} else {
 		br_multicast_select_own_querier(br, ip, skb);
+		br_multicast_count(br, port, skb, igmp_type,
+				   BR_MCAST_DIR_RX);
 		netif_rx(skb);
 	}
 }
@@ -915,7 +929,7 @@ static void br_ip6_multicast_port_query_expired(unsigned long data)
 }
 #endif
 
-void br_multicast_add_port(struct net_bridge_port *port)
+int br_multicast_add_port(struct net_bridge_port *port)
 {
 	port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
 
@@ -927,6 +941,11 @@ void br_multicast_add_port(struct net_bridge_port *port)
 	setup_timer(&port->ip6_own_query.timer,
 		    br_ip6_multicast_port_query_expired, (unsigned long)port);
 #endif
+	port->mcast_stats = netdev_alloc_pcpu_stats(struct bridge_mcast_stats);
+	if (!port->mcast_stats)
+		return -ENOMEM;
+
+	return 0;
 }
 
 void br_multicast_del_port(struct net_bridge_port *port)
@@ -941,6 +960,7 @@ void br_multicast_del_port(struct net_bridge_port *port)
 		br_multicast_del_pg(br, pg);
 	spin_unlock_bh(&br->multicast_lock);
 	del_timer_sync(&port->multicast_router_timer);
+	free_percpu(port->mcast_stats);
 }
 
 static void br_multicast_enable(struct bridge_mcast_own_query *query)
@@ -1580,6 +1600,39 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br,
 }
 #endif
 
+static void br_multicast_err_count(const struct net_bridge *br,
+				   const struct net_bridge_port *p,
+				   __be16 proto)
+{
+	struct bridge_mcast_stats __percpu *stats;
+	struct bridge_mcast_stats *pstats;
+
+	if (!br->multicast_stats_enabled)
+		return;
+
+	if (p)
+		stats = p->mcast_stats;
+	else
+		stats = br->mcast_stats;
+	if (WARN_ON(!stats))
+		return;
+
+	pstats = this_cpu_ptr(stats);
+
+	u64_stats_update_begin(&pstats->syncp);
+	switch (proto) {
+	case htons(ETH_P_IP):
+		pstats->mstats.igmp_parse_errors++;
+		break;
+#if IS_ENABLED(CONFIG_IPV6)
+	case htons(ETH_P_IPV6):
+		pstats->mstats.mld_parse_errors++;
+		break;
+#endif
+	}
+	u64_stats_update_end(&pstats->syncp);
+}
+
 static int br_multicast_ipv4_rcv(struct net_bridge *br,
 				 struct net_bridge_port *port,
 				 struct sk_buff *skb,
@@ -1596,11 +1649,12 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
 			BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
 		return 0;
 	} else if (err < 0) {
+		br_multicast_err_count(br, port, skb->protocol);
 		return err;
 	}
 
-	BR_INPUT_SKB_CB(skb)->igmp = 1;
 	ih = igmp_hdr(skb);
+	BR_INPUT_SKB_CB(skb)->igmp = ih->type;
 
 	switch (ih->type) {
 	case IGMP_HOST_MEMBERSHIP_REPORT:
@@ -1622,6 +1676,9 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
 	if (skb_trimmed && skb_trimmed != skb)
 		kfree_skb(skb_trimmed);
 
+	br_multicast_count(br, port, skb, BR_INPUT_SKB_CB(skb)->igmp,
+			   BR_MCAST_DIR_RX);
+
 	return err;
 }
 
@@ -1642,11 +1699,12 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
 			BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
 		return 0;
 	} else if (err < 0) {
+		br_multicast_err_count(br, port, skb->protocol);
 		return err;
 	}
 
-	BR_INPUT_SKB_CB(skb)->igmp = 1;
 	mld = (struct mld_msg *)skb_transport_header(skb);
+	BR_INPUT_SKB_CB(skb)->igmp = mld->mld_type;
 
 	switch (mld->mld_type) {
 	case ICMPV6_MGM_REPORT:
@@ -1667,6 +1725,9 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
 	if (skb_trimmed && skb_trimmed != skb)
 		kfree_skb(skb_trimmed);
 
+	br_multicast_count(br, port, skb, BR_INPUT_SKB_CB(skb)->igmp,
+			   BR_MCAST_DIR_RX);
+
 	return err;
 }
 #endif
@@ -1674,6 +1735,8 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
 int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
 		     struct sk_buff *skb, u16 vid)
 {
+	int ret = 0;
+
 	BR_INPUT_SKB_CB(skb)->igmp = 0;
 	BR_INPUT_SKB_CB(skb)->mrouters_only = 0;
 
@@ -1682,14 +1745,16 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
 
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
-		return br_multicast_ipv4_rcv(br, port, skb, vid);
+		ret = br_multicast_ipv4_rcv(br, port, skb, vid);
+		break;
 #if IS_ENABLED(CONFIG_IPV6)
 	case htons(ETH_P_IPV6):
-		return br_multicast_ipv6_rcv(br, port, skb, vid);
+		ret = br_multicast_ipv6_rcv(br, port, skb, vid);
+		break;
 #endif
 	}
 
-	return 0;
+	return ret;
 }
 
 static void br_multicast_query_expired(struct net_bridge *br,
@@ -1745,6 +1810,7 @@ void br_multicast_init(struct net_bridge *br)
 	br->ip6_other_query.delay_time = 0;
 	br->ip6_querier.port = NULL;
 #endif
+	br->has_ipv6_addr = 1;
 
 	spin_lock_init(&br->multicast_lock);
 	setup_timer(&br->multicast_router_timer,
@@ -1827,6 +1893,8 @@ void br_multicast_dev_del(struct net_bridge *br)
 
 out:
 	spin_unlock_bh(&br->multicast_lock);
+
+	free_percpu(br->mcast_stats);
 }
 
 int br_multicast_set_router(struct net_bridge *br, unsigned long val)
@@ -2181,3 +2249,154 @@ unlock:
 	return ret;
 }
 EXPORT_SYMBOL_GPL(br_multicast_has_querier_adjacent);
+
+static void br_mcast_stats_add(struct bridge_mcast_stats __percpu *stats,
+			       const struct sk_buff *skb, u8 type, u8 dir)
+{
+	struct bridge_mcast_stats *pstats = this_cpu_ptr(stats);
+	__be16 proto = skb->protocol;
+	unsigned int t_len;
+
+	u64_stats_update_begin(&pstats->syncp);
+	switch (proto) {
+	case htons(ETH_P_IP):
+		t_len = ntohs(ip_hdr(skb)->tot_len) - ip_hdrlen(skb);
+		switch (type) {
+		case IGMP_HOST_MEMBERSHIP_REPORT:
+			pstats->mstats.igmp_v1reports[dir]++;
+			break;
+		case IGMPV2_HOST_MEMBERSHIP_REPORT:
+			pstats->mstats.igmp_v2reports[dir]++;
+			break;
+		case IGMPV3_HOST_MEMBERSHIP_REPORT:
+			pstats->mstats.igmp_v3reports[dir]++;
+			break;
+		case IGMP_HOST_MEMBERSHIP_QUERY:
+			if (t_len != sizeof(struct igmphdr)) {
+				pstats->mstats.igmp_v3queries[dir]++;
+			} else {
+				unsigned int offset = skb_transport_offset(skb);
+				struct igmphdr *ih, _ihdr;
+
+				ih = skb_header_pointer(skb, offset,
+							sizeof(_ihdr), &_ihdr);
+				if (!ih)
+					break;
+				if (!ih->code)
+					pstats->mstats.igmp_v1queries[dir]++;
+				else
+					pstats->mstats.igmp_v2queries[dir]++;
+			}
+			break;
+		case IGMP_HOST_LEAVE_MESSAGE:
+			pstats->mstats.igmp_leaves[dir]++;
+			break;
+		}
+		break;
+#if IS_ENABLED(CONFIG_IPV6)
+	case htons(ETH_P_IPV6):
+		t_len = ntohs(ipv6_hdr(skb)->payload_len) +
+			sizeof(struct ipv6hdr);
+		t_len -= skb_network_header_len(skb);
+		switch (type) {
+		case ICMPV6_MGM_REPORT:
+			pstats->mstats.mld_v1reports[dir]++;
+			break;
+		case ICMPV6_MLD2_REPORT:
+			pstats->mstats.mld_v2reports[dir]++;
+			break;
+		case ICMPV6_MGM_QUERY:
+			if (t_len != sizeof(struct mld_msg))
+				pstats->mstats.mld_v2queries[dir]++;
+			else
+				pstats->mstats.mld_v1queries[dir]++;
+			break;
+		case ICMPV6_MGM_REDUCTION:
+			pstats->mstats.mld_leaves[dir]++;
+			break;
+		}
+		break;
+#endif /* CONFIG_IPV6 */
+	}
+	u64_stats_update_end(&pstats->syncp);
+}
+
+void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p,
+			const struct sk_buff *skb, u8 type, u8 dir)
+{
+	struct bridge_mcast_stats __percpu *stats;
+
+	/* if multicast_disabled is true then igmp type can't be set */
+	if (!type || !br->multicast_stats_enabled)
+		return;
+
+	if (p)
+		stats = p->mcast_stats;
+	else
+		stats = br->mcast_stats;
+	if (WARN_ON(!stats))
+		return;
+
+	br_mcast_stats_add(stats, skb, type, dir);
+}
+
+int br_multicast_init_stats(struct net_bridge *br)
+{
+	br->mcast_stats = netdev_alloc_pcpu_stats(struct bridge_mcast_stats);
+	if (!br->mcast_stats)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void mcast_stats_add_dir(u64 *dst, u64 *src)
+{
+	dst[BR_MCAST_DIR_RX] += src[BR_MCAST_DIR_RX];
+	dst[BR_MCAST_DIR_TX] += src[BR_MCAST_DIR_TX];
+}
+
+void br_multicast_get_stats(const struct net_bridge *br,
+			    const struct net_bridge_port *p,
+			    struct br_mcast_stats *dest)
+{
+	struct bridge_mcast_stats __percpu *stats;
+	struct br_mcast_stats tdst;
+	int i;
+
+	memset(dest, 0, sizeof(*dest));
+	if (p)
+		stats = p->mcast_stats;
+	else
+		stats = br->mcast_stats;
+	if (WARN_ON(!stats))
+		return;
+
+	memset(&tdst, 0, sizeof(tdst));
+	for_each_possible_cpu(i) {
+		struct bridge_mcast_stats *cpu_stats = per_cpu_ptr(stats, i);
+		struct br_mcast_stats temp;
+		unsigned int start;
+
+		do {
+			start = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
+			memcpy(&temp, &cpu_stats->mstats, sizeof(temp));
+		} while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start));
+
+		mcast_stats_add_dir(tdst.igmp_v1queries, temp.igmp_v1queries);
+		mcast_stats_add_dir(tdst.igmp_v2queries, temp.igmp_v2queries);
+		mcast_stats_add_dir(tdst.igmp_v3queries, temp.igmp_v3queries);
+		mcast_stats_add_dir(tdst.igmp_leaves, temp.igmp_leaves);
+		mcast_stats_add_dir(tdst.igmp_v1reports, temp.igmp_v1reports);
+		mcast_stats_add_dir(tdst.igmp_v2reports, temp.igmp_v2reports);
+		mcast_stats_add_dir(tdst.igmp_v3reports, temp.igmp_v3reports);
+		tdst.igmp_parse_errors += temp.igmp_parse_errors;
+
+		mcast_stats_add_dir(tdst.mld_v1queries, temp.mld_v1queries);
+		mcast_stats_add_dir(tdst.mld_v2queries, temp.mld_v2queries);
+		mcast_stats_add_dir(tdst.mld_leaves, temp.mld_leaves);
+		mcast_stats_add_dir(tdst.mld_v1reports, temp.mld_v1reports);
+		mcast_stats_add_dir(tdst.mld_v2reports, temp.mld_v2reports);
+		tdst.mld_parse_errors += temp.mld_parse_errors;
+	}
+	memcpy(dest, &tdst, sizeof(*dest));
+}
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 2d25979273a6..77e7f69bf80d 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -700,7 +700,7 @@ static int
 br_nf_ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 		  int (*output)(struct net *, struct sock *, struct sk_buff *))
 {
-	unsigned int mtu = ip_skb_dst_mtu(skb);
+	unsigned int mtu = ip_skb_dst_mtu(sk, skb);
 	struct iphdr *iph = ip_hdr(skb);
 
 	if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) ||
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index a5343c7232bf..f2a29e467e78 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -851,6 +851,7 @@ static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = {
 	[IFLA_BR_NF_CALL_ARPTABLES] = { .type = NLA_U8 },
 	[IFLA_BR_VLAN_DEFAULT_PVID] = { .type = NLA_U16 },
 	[IFLA_BR_VLAN_STATS_ENABLED] = { .type = NLA_U8 },
+	[IFLA_BR_MCAST_STATS_ENABLED] = { .type = NLA_U8 },
 };
 
 static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
@@ -1055,6 +1056,13 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
 
 		br->multicast_startup_query_interval = clock_t_to_jiffies(val);
 	}
+
+	if (data[IFLA_BR_MCAST_STATS_ENABLED]) {
+		__u8 mcast_stats;
+
+		mcast_stats = nla_get_u8(data[IFLA_BR_MCAST_STATS_ENABLED]);
+		br->multicast_stats_enabled = !!mcast_stats;
+	}
 #endif
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	if (data[IFLA_BR_NF_CALL_IPTABLES]) {
@@ -1110,6 +1118,7 @@ static size_t br_get_size(const struct net_device *brdev)
 	       nla_total_size(sizeof(u8)) +     /* IFLA_BR_MCAST_SNOOPING */
 	       nla_total_size(sizeof(u8)) +     /* IFLA_BR_MCAST_QUERY_USE_IFADDR */
 	       nla_total_size(sizeof(u8)) +     /* IFLA_BR_MCAST_QUERIER */
+	       nla_total_size(sizeof(u8)) +     /* IFLA_BR_MCAST_STATS_ENABLED */
 	       nla_total_size(sizeof(u32)) +    /* IFLA_BR_MCAST_HASH_ELASTICITY */
 	       nla_total_size(sizeof(u32)) +    /* IFLA_BR_MCAST_HASH_MAX */
 	       nla_total_size(sizeof(u32)) +    /* IFLA_BR_MCAST_LAST_MEMBER_CNT */
@@ -1187,6 +1196,8 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
 	    nla_put_u8(skb, IFLA_BR_MCAST_QUERY_USE_IFADDR,
 		       br->multicast_query_use_ifaddr) ||
 	    nla_put_u8(skb, IFLA_BR_MCAST_QUERIER, br->multicast_querier) ||
+	    nla_put_u8(skb, IFLA_BR_MCAST_STATS_ENABLED,
+		       br->multicast_stats_enabled) ||
 	    nla_put_u32(skb, IFLA_BR_MCAST_HASH_ELASTICITY,
 			br->hash_elasticity) ||
 	    nla_put_u32(skb, IFLA_BR_MCAST_HASH_MAX, br->hash_max) ||
@@ -1234,7 +1245,7 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
 	return 0;
 }
 
-static size_t br_get_linkxstats_size(const struct net_device *dev)
+static size_t bridge_get_linkxstats_size(const struct net_device *dev)
 {
 	struct net_bridge *br = netdev_priv(dev);
 	struct net_bridge_vlan_group *vg;
@@ -1242,53 +1253,88 @@ static size_t br_get_linkxstats_size(const struct net_device *dev)
 	int numvls = 0;
 
 	vg = br_vlan_group(br);
-	if (!vg)
-		return 0;
-
-	/* we need to count all, even placeholder entries */
-	list_for_each_entry(v, &vg->vlan_list, vlist)
-		numvls++;
+	if (vg) {
+		/* we need to count all, even placeholder entries */
+		list_for_each_entry(v, &vg->vlan_list, vlist)
+			numvls++;
+	}
 
-	/* account for the vlans and the link xstats type nest attribute */
 	return numvls * nla_total_size(sizeof(struct bridge_vlan_xstats)) +
+	       nla_total_size(sizeof(struct br_mcast_stats)) +
 	       nla_total_size(0);
 }
 
-static int br_fill_linkxstats(struct sk_buff *skb, const struct net_device *dev,
-			      int *prividx)
+static size_t brport_get_linkxstats_size(const struct net_device *dev)
+{
+	return nla_total_size(sizeof(struct br_mcast_stats)) +
+	       nla_total_size(0);
+}
+
+static size_t br_get_linkxstats_size(const struct net_device *dev, int attr)
+{
+	size_t retsize = 0;
+
+	switch (attr) {
+	case IFLA_STATS_LINK_XSTATS:
+		retsize = bridge_get_linkxstats_size(dev);
+		break;
+	case IFLA_STATS_LINK_XSTATS_SLAVE:
+		retsize = brport_get_linkxstats_size(dev);
+		break;
+	}
+
+	return retsize;
+}
+
+static int bridge_fill_linkxstats(struct sk_buff *skb,
+				  const struct net_device *dev,
+				  int *prividx)
 {
 	struct net_bridge *br = netdev_priv(dev);
+	struct nlattr *nla __maybe_unused;
 	struct net_bridge_vlan_group *vg;
 	struct net_bridge_vlan *v;
 	struct nlattr *nest;
 	int vl_idx = 0;
 
-	vg = br_vlan_group(br);
-	if (!vg)
-		goto out;
 	nest = nla_nest_start(skb, LINK_XSTATS_TYPE_BRIDGE);
 	if (!nest)
 		return -EMSGSIZE;
-	list_for_each_entry(v, &vg->vlan_list, vlist) {
-		struct bridge_vlan_xstats vxi;
-		struct br_vlan_stats stats;
 
-		if (vl_idx++ < *prividx)
-			continue;
-		memset(&vxi, 0, sizeof(vxi));
-		vxi.vid = v->vid;
-		br_vlan_get_stats(v, &stats);
-		vxi.rx_bytes = stats.rx_bytes;
-		vxi.rx_packets = stats.rx_packets;
-		vxi.tx_bytes = stats.tx_bytes;
-		vxi.tx_packets = stats.tx_packets;
-
-		if (nla_put(skb, BRIDGE_XSTATS_VLAN, sizeof(vxi), &vxi))
+	vg = br_vlan_group(br);
+	if (vg) {
+		list_for_each_entry(v, &vg->vlan_list, vlist) {
+			struct bridge_vlan_xstats vxi;
+			struct br_vlan_stats stats;
+
+			if (++vl_idx < *prividx)
+				continue;
+			memset(&vxi, 0, sizeof(vxi));
+			vxi.vid = v->vid;
+			br_vlan_get_stats(v, &stats);
+			vxi.rx_bytes = stats.rx_bytes;
+			vxi.rx_packets = stats.rx_packets;
+			vxi.tx_bytes = stats.tx_bytes;
+			vxi.tx_packets = stats.tx_packets;
+
+			if (nla_put(skb, BRIDGE_XSTATS_VLAN, sizeof(vxi), &vxi))
+				goto nla_put_failure;
+		}
+	}
+
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+	if (++vl_idx >= *prividx) {
+		nla = nla_reserve_64bit(skb, BRIDGE_XSTATS_MCAST,
+					sizeof(struct br_mcast_stats),
+					BRIDGE_XSTATS_PAD);
+		if (!nla)
 			goto nla_put_failure;
+		br_multicast_get_stats(br, NULL, nla_data(nla));
 	}
+#endif
 	nla_nest_end(skb, nest);
 	*prividx = 0;
-out:
+
 	return 0;
 
 nla_put_failure:
@@ -1298,6 +1344,52 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
+static int brport_fill_linkxstats(struct sk_buff *skb,
+				  const struct net_device *dev,
+				  int *prividx)
+{
+	struct net_bridge_port *p = br_port_get_rtnl(dev);
+	struct nlattr *nla __maybe_unused;
+	struct nlattr *nest;
+
+	if (!p)
+		return 0;
+
+	nest = nla_nest_start(skb, LINK_XSTATS_TYPE_BRIDGE);
+	if (!nest)
+		return -EMSGSIZE;
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+	nla = nla_reserve_64bit(skb, BRIDGE_XSTATS_MCAST,
+				sizeof(struct br_mcast_stats),
+				BRIDGE_XSTATS_PAD);
+	if (!nla) {
+		nla_nest_end(skb, nest);
+		return -EMSGSIZE;
+	}
+	br_multicast_get_stats(p->br, p, nla_data(nla));
+#endif
+	nla_nest_end(skb, nest);
+
+	return 0;
+}
+
+static int br_fill_linkxstats(struct sk_buff *skb, const struct net_device *dev,
+			      int *prividx, int attr)
+{
+	int ret = -EINVAL;
+
+	switch (attr) {
+	case IFLA_STATS_LINK_XSTATS:
+		ret = bridge_fill_linkxstats(skb, dev, prividx);
+		break;
+	case IFLA_STATS_LINK_XSTATS_SLAVE:
+		ret = brport_fill_linkxstats(skb, dev, prividx);
+		break;
+	}
+
+	return ret;
+}
+
 static struct rtnl_af_ops br_af_ops __read_mostly = {
 	.family			= AF_BRIDGE,
 	.get_link_af_size	= br_get_link_af_size_filtered,
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index c7fb5d7a7218..40f200947ddc 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -75,6 +75,12 @@ struct bridge_mcast_querier {
 	struct br_ip addr;
 	struct net_bridge_port __rcu	*port;
 };
+
+/* IGMP/MLD statistics */
+struct bridge_mcast_stats {
+	struct br_mcast_stats mstats;
+	struct u64_stats_sync syncp;
+};
 #endif
 
 struct br_vlan_stats {
@@ -229,6 +235,7 @@ struct net_bridge_port
 	struct bridge_mcast_own_query	ip6_own_query;
 #endif /* IS_ENABLED(CONFIG_IPV6) */
 	unsigned char			multicast_router;
+	struct bridge_mcast_stats	__percpu *mcast_stats;
 	struct timer_list		multicast_router_timer;
 	struct hlist_head		mglist;
 	struct hlist_node		rlist;
@@ -314,6 +321,8 @@ struct net_bridge
 	u8				multicast_disabled:1;
 	u8				multicast_querier:1;
 	u8				multicast_query_use_ifaddr:1;
+	u8				has_ipv6_addr:1;
+	u8				multicast_stats_enabled:1;
 
 	u32				hash_elasticity;
 	u32				hash_max;
@@ -336,6 +345,7 @@ struct net_bridge
 	struct bridge_mcast_other_query	ip4_other_query;
 	struct bridge_mcast_own_query	ip4_own_query;
 	struct bridge_mcast_querier	ip4_querier;
+	struct bridge_mcast_stats	__percpu *mcast_stats;
 #if IS_ENABLED(CONFIG_IPV6)
 	struct bridge_mcast_other_query	ip6_other_query;
 	struct bridge_mcast_own_query	ip6_own_query;
@@ -542,7 +552,7 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
 		     struct sk_buff *skb, u16 vid);
 struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
 					struct sk_buff *skb, u16 vid);
-void br_multicast_add_port(struct net_bridge_port *port);
+int br_multicast_add_port(struct net_bridge_port *port);
 void br_multicast_del_port(struct net_bridge_port *port);
 void br_multicast_enable_port(struct net_bridge_port *port);
 void br_multicast_disable_port(struct net_bridge_port *port);
@@ -575,6 +585,12 @@ void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
 		   struct br_ip *group, int type, u8 flags);
 void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port,
 		   int type);
+void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p,
+			const struct sk_buff *skb, u8 type, u8 dir);
+int br_multicast_init_stats(struct net_bridge *br);
+void br_multicast_get_stats(const struct net_bridge *br,
+			    const struct net_bridge_port *p,
+			    struct br_mcast_stats *dest);
 
 #define mlock_dereference(X, br) \
 	rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock))
@@ -588,10 +604,22 @@ static inline bool br_multicast_is_router(struct net_bridge *br)
 
 static inline bool
 __br_multicast_querier_exists(struct net_bridge *br,
-			      struct bridge_mcast_other_query *querier)
+				struct bridge_mcast_other_query *querier,
+				const bool is_ipv6)
 {
+	bool own_querier_enabled;
+
+	if (br->multicast_querier) {
+		if (is_ipv6 && !br->has_ipv6_addr)
+			own_querier_enabled = false;
+		else
+			own_querier_enabled = true;
+	} else {
+		own_querier_enabled = false;
+	}
+
 	return time_is_before_jiffies(querier->delay_time) &&
-	       (br->multicast_querier || timer_pending(&querier->timer));
+	       (own_querier_enabled || timer_pending(&querier->timer));
 }
 
 static inline bool br_multicast_querier_exists(struct net_bridge *br,
@@ -599,15 +627,22 @@ static inline bool br_multicast_querier_exists(struct net_bridge *br,
 {
 	switch (eth->h_proto) {
 	case (htons(ETH_P_IP)):
-		return __br_multicast_querier_exists(br, &br->ip4_other_query);
+		return __br_multicast_querier_exists(br,
+			&br->ip4_other_query, false);
 #if IS_ENABLED(CONFIG_IPV6)
 	case (htons(ETH_P_IPV6)):
-		return __br_multicast_querier_exists(br, &br->ip6_other_query);
+		return __br_multicast_querier_exists(br,
+			&br->ip6_other_query, true);
 #endif
 	default:
 		return false;
 	}
 }
+
+static inline int br_multicast_igmp_type(const struct sk_buff *skb)
+{
+	return BR_INPUT_SKB_CB(skb)->igmp;
+}
 #else
 static inline int br_multicast_rcv(struct net_bridge *br,
 				   struct net_bridge_port *port,
@@ -623,8 +658,9 @@ static inline struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
 	return NULL;
 }
 
-static inline void br_multicast_add_port(struct net_bridge_port *port)
+static inline int br_multicast_add_port(struct net_bridge_port *port)
 {
+	return 0;
 }
 
 static inline void br_multicast_del_port(struct net_bridge_port *port)
@@ -680,6 +716,23 @@ static inline void br_mdb_init(void)
 static inline void br_mdb_uninit(void)
 {
 }
+
+static inline void br_multicast_count(struct net_bridge *br,
+				      const struct net_bridge_port *p,
+				      const struct sk_buff *skb,
+				      u8 type, u8 dir)
+{
+}
+
+static inline int br_multicast_init_stats(struct net_bridge *br)
+{
+	return 0;
+}
+
+static inline int br_multicast_igmp_type(const struct sk_buff *skb)
+{
+	return 0;
+}
 #endif
 
 /* br_vlan.c */
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index beb47071e38d..e120307c6e36 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -618,6 +618,30 @@ static ssize_t multicast_startup_query_interval_store(
 	return store_bridge_parm(d, buf, len, set_startup_query_interval);
 }
 static DEVICE_ATTR_RW(multicast_startup_query_interval);
+
+static ssize_t multicast_stats_enabled_show(struct device *d,
+					    struct device_attribute *attr,
+					    char *buf)
+{
+	struct net_bridge *br = to_bridge(d);
+
+	return sprintf(buf, "%u\n", br->multicast_stats_enabled);
+}
+
+static int set_stats_enabled(struct net_bridge *br, unsigned long val)
+{
+	br->multicast_stats_enabled = !!val;
+	return 0;
+}
+
+static ssize_t multicast_stats_enabled_store(struct device *d,
+					     struct device_attribute *attr,
+					     const char *buf,
+					     size_t len)
+{
+	return store_bridge_parm(d, buf, len, set_stats_enabled);
+}
+static DEVICE_ATTR_RW(multicast_stats_enabled);
 #endif
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 static ssize_t nf_call_iptables_show(
@@ -784,6 +808,7 @@ static struct attribute *bridge_attrs[] = {
 	&dev_attr_multicast_query_interval.attr,
 	&dev_attr_multicast_query_response_interval.attr,
 	&dev_attr_multicast_startup_query_interval.attr,
+	&dev_attr_multicast_stats_enabled.attr,
 #endif
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	&dev_attr_nf_call_iptables.attr,
diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c
index 2a449b7ab8fa..5fc4affd9fdb 100644
--- a/net/bridge/netfilter/ebt_802_3.c
+++ b/net/bridge/netfilter/ebt_802_3.c
@@ -20,16 +20,16 @@ ebt_802_3_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	__be16 type = hdr->llc.ui.ctrl & IS_UI ? hdr->llc.ui.type : hdr->llc.ni.type;
 
 	if (info->bitmask & EBT_802_3_SAP) {
-		if (FWINV(info->sap != hdr->llc.ui.ssap, EBT_802_3_SAP))
+		if (NF_INVF(info, EBT_802_3_SAP, info->sap != hdr->llc.ui.ssap))
 			return false;
-		if (FWINV(info->sap != hdr->llc.ui.dsap, EBT_802_3_SAP))
+		if (NF_INVF(info, EBT_802_3_SAP, info->sap != hdr->llc.ui.dsap))
 			return false;
 	}
 
 	if (info->bitmask & EBT_802_3_TYPE) {
 		if (!(hdr->llc.ui.dsap == CHECK_TYPE && hdr->llc.ui.ssap == CHECK_TYPE))
 			return false;
-		if (FWINV(info->type != type, EBT_802_3_TYPE))
+		if (NF_INVF(info, EBT_802_3_TYPE, info->type != type))
 			return false;
 	}
 
diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c
index cd457b891b27..227142282b45 100644
--- a/net/bridge/netfilter/ebt_arp.c
+++ b/net/bridge/netfilter/ebt_arp.c
@@ -25,14 +25,14 @@ ebt_arp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph);
 	if (ah == NULL)
 		return false;
-	if (info->bitmask & EBT_ARP_OPCODE && FWINV(info->opcode !=
-	   ah->ar_op, EBT_ARP_OPCODE))
+	if ((info->bitmask & EBT_ARP_OPCODE) &&
+	    NF_INVF(info, EBT_ARP_OPCODE, info->opcode != ah->ar_op))
 		return false;
-	if (info->bitmask & EBT_ARP_HTYPE && FWINV(info->htype !=
-	   ah->ar_hrd, EBT_ARP_HTYPE))
+	if ((info->bitmask & EBT_ARP_HTYPE) &&
+	    NF_INVF(info, EBT_ARP_HTYPE, info->htype != ah->ar_hrd))
 		return false;
-	if (info->bitmask & EBT_ARP_PTYPE && FWINV(info->ptype !=
-	   ah->ar_pro, EBT_ARP_PTYPE))
+	if ((info->bitmask & EBT_ARP_PTYPE) &&
+	    NF_INVF(info, EBT_ARP_PTYPE, info->ptype != ah->ar_pro))
 		return false;
 
 	if (info->bitmask & (EBT_ARP_SRC_IP | EBT_ARP_DST_IP | EBT_ARP_GRAT)) {
@@ -51,21 +51,22 @@ ebt_arp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 					sizeof(daddr), &daddr);
 		if (dap == NULL)
 			return false;
-		if (info->bitmask & EBT_ARP_SRC_IP &&
-		    FWINV(info->saddr != (*sap & info->smsk), EBT_ARP_SRC_IP))
+		if ((info->bitmask & EBT_ARP_SRC_IP) &&
+		    NF_INVF(info, EBT_ARP_SRC_IP,
+			    info->saddr != (*sap & info->smsk)))
 			return false;
-		if (info->bitmask & EBT_ARP_DST_IP &&
-		    FWINV(info->daddr != (*dap & info->dmsk), EBT_ARP_DST_IP))
+		if ((info->bitmask & EBT_ARP_DST_IP) &&
+		    NF_INVF(info, EBT_ARP_DST_IP,
+			    info->daddr != (*dap & info->dmsk)))
 			return false;
-		if (info->bitmask & EBT_ARP_GRAT &&
-		    FWINV(*dap != *sap, EBT_ARP_GRAT))
+		if ((info->bitmask & EBT_ARP_GRAT) &&
+		    NF_INVF(info, EBT_ARP_GRAT, *dap != *sap))
 			return false;
 	}
 
 	if (info->bitmask & (EBT_ARP_SRC_MAC | EBT_ARP_DST_MAC)) {
 		const unsigned char *mp;
 		unsigned char _mac[ETH_ALEN];
-		uint8_t verdict, i;
 
 		if (ah->ar_hln != ETH_ALEN || ah->ar_hrd != htons(ARPHRD_ETHER))
 			return false;
@@ -74,11 +75,9 @@ ebt_arp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 						sizeof(_mac), &_mac);
 			if (mp == NULL)
 				return false;
-			verdict = 0;
-			for (i = 0; i < 6; i++)
-				verdict |= (mp[i] ^ info->smaddr[i]) &
-				       info->smmsk[i];
-			if (FWINV(verdict != 0, EBT_ARP_SRC_MAC))
+			if (NF_INVF(info, EBT_ARP_SRC_MAC,
+				    !ether_addr_equal_masked(mp, info->smaddr,
+							     info->smmsk)))
 				return false;
 		}
 
@@ -88,11 +87,9 @@ ebt_arp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 						sizeof(_mac), &_mac);
 			if (mp == NULL)
 				return false;
-			verdict = 0;
-			for (i = 0; i < 6; i++)
-				verdict |= (mp[i] ^ info->dmaddr[i]) &
-					info->dmmsk[i];
-			if (FWINV(verdict != 0, EBT_ARP_DST_MAC))
+			if (NF_INVF(info, EBT_ARP_DST_MAC,
+				    !ether_addr_equal_masked(mp, info->dmaddr,
+							     info->dmmsk)))
 				return false;
 		}
 	}
diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c
index 23bca62d58d2..d06968bdf5ec 100644
--- a/net/bridge/netfilter/ebt_ip.c
+++ b/net/bridge/netfilter/ebt_ip.c
@@ -36,19 +36,19 @@ ebt_ip_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
 	if (ih == NULL)
 		return false;
-	if (info->bitmask & EBT_IP_TOS &&
-	   FWINV(info->tos != ih->tos, EBT_IP_TOS))
+	if ((info->bitmask & EBT_IP_TOS) &&
+	    NF_INVF(info, EBT_IP_TOS, info->tos != ih->tos))
 		return false;
-	if (info->bitmask & EBT_IP_SOURCE &&
-	   FWINV((ih->saddr & info->smsk) !=
-	   info->saddr, EBT_IP_SOURCE))
+	if ((info->bitmask & EBT_IP_SOURCE) &&
+	    NF_INVF(info, EBT_IP_SOURCE,
+		    (ih->saddr & info->smsk) != info->saddr))
 		return false;
 	if ((info->bitmask & EBT_IP_DEST) &&
-	   FWINV((ih->daddr & info->dmsk) !=
-	   info->daddr, EBT_IP_DEST))
+	    NF_INVF(info, EBT_IP_DEST,
+		    (ih->daddr & info->dmsk) != info->daddr))
 		return false;
 	if (info->bitmask & EBT_IP_PROTO) {
-		if (FWINV(info->protocol != ih->protocol, EBT_IP_PROTO))
+		if (NF_INVF(info, EBT_IP_PROTO, info->protocol != ih->protocol))
 			return false;
 		if (!(info->bitmask & EBT_IP_DPORT) &&
 		    !(info->bitmask & EBT_IP_SPORT))
@@ -61,16 +61,16 @@ ebt_ip_mt(const struct sk_buff *skb, struct xt_action_param *par)
 			return false;
 		if (info->bitmask & EBT_IP_DPORT) {
 			u32 dst = ntohs(pptr->dst);
-			if (FWINV(dst < info->dport[0] ||
-				  dst > info->dport[1],
-				  EBT_IP_DPORT))
+			if (NF_INVF(info, EBT_IP_DPORT,
+				    dst < info->dport[0] ||
+				    dst > info->dport[1]))
 			return false;
 		}
 		if (info->bitmask & EBT_IP_SPORT) {
 			u32 src = ntohs(pptr->src);
-			if (FWINV(src < info->sport[0] ||
-				  src > info->sport[1],
-				  EBT_IP_SPORT))
+			if (NF_INVF(info, EBT_IP_SPORT,
+				    src < info->sport[0] ||
+				    src > info->sport[1]))
 			return false;
 		}
 	}
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index 98de6e7fd86d..4617491be41e 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -45,15 +45,18 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	ih6 = skb_header_pointer(skb, 0, sizeof(_ip6h), &_ip6h);
 	if (ih6 == NULL)
 		return false;
-	if (info->bitmask & EBT_IP6_TCLASS &&
-	   FWINV(info->tclass != ipv6_get_dsfield(ih6), EBT_IP6_TCLASS))
+	if ((info->bitmask & EBT_IP6_TCLASS) &&
+	    NF_INVF(info, EBT_IP6_TCLASS,
+		    info->tclass != ipv6_get_dsfield(ih6)))
 		return false;
-	if ((info->bitmask & EBT_IP6_SOURCE &&
-	    FWINV(ipv6_masked_addr_cmp(&ih6->saddr, &info->smsk,
-				       &info->saddr), EBT_IP6_SOURCE)) ||
-	    (info->bitmask & EBT_IP6_DEST &&
-	    FWINV(ipv6_masked_addr_cmp(&ih6->daddr, &info->dmsk,
-				       &info->daddr), EBT_IP6_DEST)))
+	if (((info->bitmask & EBT_IP6_SOURCE) &&
+	     NF_INVF(info, EBT_IP6_SOURCE,
+		     ipv6_masked_addr_cmp(&ih6->saddr, &info->smsk,
+					  &info->saddr))) ||
+	    ((info->bitmask & EBT_IP6_DEST) &&
+	     NF_INVF(info, EBT_IP6_DEST,
+		     ipv6_masked_addr_cmp(&ih6->daddr, &info->dmsk,
+					  &info->daddr))))
 		return false;
 	if (info->bitmask & EBT_IP6_PROTO) {
 		uint8_t nexthdr = ih6->nexthdr;
@@ -63,7 +66,7 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par)
 		offset_ph = ipv6_skip_exthdr(skb, sizeof(_ip6h), &nexthdr, &frag_off);
 		if (offset_ph == -1)
 			return false;
-		if (FWINV(info->protocol != nexthdr, EBT_IP6_PROTO))
+		if (NF_INVF(info, EBT_IP6_PROTO, info->protocol != nexthdr))
 			return false;
 		if (!(info->bitmask & (EBT_IP6_DPORT |
 				       EBT_IP6_SPORT | EBT_IP6_ICMP6)))
@@ -76,22 +79,24 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par)
 			return false;
 		if (info->bitmask & EBT_IP6_DPORT) {
 			u16 dst = ntohs(pptr->tcpudphdr.dst);
-			if (FWINV(dst < info->dport[0] ||
-				  dst > info->dport[1], EBT_IP6_DPORT))
+			if (NF_INVF(info, EBT_IP6_DPORT,
+				    dst < info->dport[0] ||
+				    dst > info->dport[1]))
 				return false;
 		}
 		if (info->bitmask & EBT_IP6_SPORT) {
 			u16 src = ntohs(pptr->tcpudphdr.src);
-			if (FWINV(src < info->sport[0] ||
-				  src > info->sport[1], EBT_IP6_SPORT))
+			if (NF_INVF(info, EBT_IP6_SPORT,
+				    src < info->sport[0] ||
+				    src > info->sport[1]))
 			return false;
 		}
 		if ((info->bitmask & EBT_IP6_ICMP6) &&
-		     FWINV(pptr->icmphdr.type < info->icmpv6_type[0] ||
-			   pptr->icmphdr.type > info->icmpv6_type[1] ||
-			   pptr->icmphdr.code < info->icmpv6_code[0] ||
-			   pptr->icmphdr.code > info->icmpv6_code[1],
-							EBT_IP6_ICMP6))
+		    NF_INVF(info, EBT_IP6_ICMP6,
+			    pptr->icmphdr.type < info->icmpv6_type[0] ||
+			    pptr->icmphdr.type > info->icmpv6_type[1] ||
+			    pptr->icmphdr.code < info->icmpv6_code[0] ||
+			    pptr->icmphdr.code > info->icmpv6_code[1]))
 			return false;
 	}
 	return true;
diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c
index 6b731e12ecfa..3140eb912d7e 100644
--- a/net/bridge/netfilter/ebt_stp.c
+++ b/net/bridge/netfilter/ebt_stp.c
@@ -17,24 +17,24 @@
 #define BPDU_TYPE_TCN 0x80
 
 struct stp_header {
-	uint8_t dsap;
-	uint8_t ssap;
-	uint8_t ctrl;
-	uint8_t pid;
-	uint8_t vers;
-	uint8_t type;
+	u8 dsap;
+	u8 ssap;
+	u8 ctrl;
+	u8 pid;
+	u8 vers;
+	u8 type;
 };
 
 struct stp_config_pdu {
-	uint8_t flags;
-	uint8_t root[8];
-	uint8_t root_cost[4];
-	uint8_t sender[8];
-	uint8_t port[2];
-	uint8_t msg_age[2];
-	uint8_t max_age[2];
-	uint8_t hello_time[2];
-	uint8_t forward_delay[2];
+	u8 flags;
+	u8 root[8];
+	u8 root_cost[4];
+	u8 sender[8];
+	u8 port[2];
+	u8 msg_age[2];
+	u8 max_age[2];
+	u8 hello_time[2];
+	u8 forward_delay[2];
 };
 
 #define NR16(p) (p[0] << 8 | p[1])
@@ -44,76 +44,73 @@ static bool ebt_filter_config(const struct ebt_stp_info *info,
 			      const struct stp_config_pdu *stpc)
 {
 	const struct ebt_stp_config_info *c;
-	uint16_t v16;
-	uint32_t v32;
-	int verdict, i;
+	u16 v16;
+	u32 v32;
 
 	c = &info->config;
 	if ((info->bitmask & EBT_STP_FLAGS) &&
-	    FWINV(c->flags != stpc->flags, EBT_STP_FLAGS))
+	    NF_INVF(info, EBT_STP_FLAGS, c->flags != stpc->flags))
 		return false;
 	if (info->bitmask & EBT_STP_ROOTPRIO) {
 		v16 = NR16(stpc->root);
-		if (FWINV(v16 < c->root_priol ||
-		    v16 > c->root_priou, EBT_STP_ROOTPRIO))
+		if (NF_INVF(info, EBT_STP_ROOTPRIO,
+			    v16 < c->root_priol || v16 > c->root_priou))
 			return false;
 	}
 	if (info->bitmask & EBT_STP_ROOTADDR) {
-		verdict = 0;
-		for (i = 0; i < 6; i++)
-			verdict |= (stpc->root[2+i] ^ c->root_addr[i]) &
-				   c->root_addrmsk[i];
-		if (FWINV(verdict != 0, EBT_STP_ROOTADDR))
+		if (NF_INVF(info, EBT_STP_ROOTADDR,
+			    !ether_addr_equal_masked(&stpc->root[2],
+						     c->root_addr,
+						     c->root_addrmsk)))
 			return false;
 	}
 	if (info->bitmask & EBT_STP_ROOTCOST) {
 		v32 = NR32(stpc->root_cost);
-		if (FWINV(v32 < c->root_costl ||
-		    v32 > c->root_costu, EBT_STP_ROOTCOST))
+		if (NF_INVF(info, EBT_STP_ROOTCOST,
+			    v32 < c->root_costl || v32 > c->root_costu))
 			return false;
 	}
 	if (info->bitmask & EBT_STP_SENDERPRIO) {
 		v16 = NR16(stpc->sender);
-		if (FWINV(v16 < c->sender_priol ||
-		    v16 > c->sender_priou, EBT_STP_SENDERPRIO))
+		if (NF_INVF(info, EBT_STP_SENDERPRIO,
+			    v16 < c->sender_priol || v16 > c->sender_priou))
 			return false;
 	}
 	if (info->bitmask & EBT_STP_SENDERADDR) {
-		verdict = 0;
-		for (i = 0; i < 6; i++)
-			verdict |= (stpc->sender[2+i] ^ c->sender_addr[i]) &
-				   c->sender_addrmsk[i];
-		if (FWINV(verdict != 0, EBT_STP_SENDERADDR))
+		if (NF_INVF(info, EBT_STP_SENDERADDR,
+			    !ether_addr_equal_masked(&stpc->sender[2],
+						     c->sender_addr,
+						     c->sender_addrmsk)))
 			return false;
 	}
 	if (info->bitmask & EBT_STP_PORT) {
 		v16 = NR16(stpc->port);
-		if (FWINV(v16 < c->portl ||
-		    v16 > c->portu, EBT_STP_PORT))
+		if (NF_INVF(info, EBT_STP_PORT,
+			    v16 < c->portl || v16 > c->portu))
 			return false;
 	}
 	if (info->bitmask & EBT_STP_MSGAGE) {
 		v16 = NR16(stpc->msg_age);
-		if (FWINV(v16 < c->msg_agel ||
-		    v16 > c->msg_ageu, EBT_STP_MSGAGE))
+		if (NF_INVF(info, EBT_STP_MSGAGE,
+			    v16 < c->msg_agel || v16 > c->msg_ageu))
 			return false;
 	}
 	if (info->bitmask & EBT_STP_MAXAGE) {
 		v16 = NR16(stpc->max_age);
-		if (FWINV(v16 < c->max_agel ||
-		    v16 > c->max_ageu, EBT_STP_MAXAGE))
+		if (NF_INVF(info, EBT_STP_MAXAGE,
+			    v16 < c->max_agel || v16 > c->max_ageu))
 			return false;
 	}
 	if (info->bitmask & EBT_STP_HELLOTIME) {
 		v16 = NR16(stpc->hello_time);
-		if (FWINV(v16 < c->hello_timel ||
-		    v16 > c->hello_timeu, EBT_STP_HELLOTIME))
+		if (NF_INVF(info, EBT_STP_HELLOTIME,
+			    v16 < c->hello_timel || v16 > c->hello_timeu))
 			return false;
 	}
 	if (info->bitmask & EBT_STP_FWDD) {
 		v16 = NR16(stpc->forward_delay);
-		if (FWINV(v16 < c->forward_delayl ||
-		    v16 > c->forward_delayu, EBT_STP_FWDD))
+		if (NF_INVF(info, EBT_STP_FWDD,
+			    v16 < c->forward_delayl || v16 > c->forward_delayu))
 			return false;
 	}
 	return true;
@@ -125,7 +122,7 @@ ebt_stp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	const struct ebt_stp_info *info = par->matchinfo;
 	const struct stp_header *sp;
 	struct stp_header _stph;
-	const uint8_t header[6] = {0x42, 0x42, 0x03, 0x00, 0x00, 0x00};
+	const u8 header[6] = {0x42, 0x42, 0x03, 0x00, 0x00, 0x00};
 
 	sp = skb_header_pointer(skb, 0, sizeof(_stph), &_stph);
 	if (sp == NULL)
@@ -135,8 +132,8 @@ ebt_stp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	if (memcmp(sp, header, sizeof(header)))
 		return false;
 
-	if (info->bitmask & EBT_STP_TYPE &&
-	    FWINV(info->type != sp->type, EBT_STP_TYPE))
+	if ((info->bitmask & EBT_STP_TYPE) &&
+	    NF_INVF(info, EBT_STP_TYPE, info->type != sp->type))
 		return false;
 
 	if (sp->type == BPDU_TYPE_CONFIG &&
@@ -156,8 +153,8 @@ ebt_stp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 static int ebt_stp_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct ebt_stp_info *info = par->matchinfo;
-	const uint8_t bridge_ula[6] = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x00};
-	const uint8_t msk[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+	const u8 bridge_ula[6] = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x00};
+	const u8 msk[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
 	const struct ebt_entry *e = par->entryinfo;
 
 	if (info->bitmask & ~EBT_STP_MASK || info->invflags & ~EBT_STP_MASK ||
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 5a61f35412a0..cceac5bb658f 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -121,7 +121,6 @@ ebt_dev_check(const char *entry, const struct net_device *device)
 	return devname[i] != entry[i] && entry[i] != 1;
 }
 
-#define FWINV2(bool, invflg) ((bool) ^ !!(e->invflags & invflg))
 /* process standard matches */
 static inline int
 ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb,
@@ -130,7 +129,6 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb,
 	const struct ethhdr *h = eth_hdr(skb);
 	const struct net_bridge_port *p;
 	__be16 ethproto;
-	int verdict, i;
 
 	if (skb_vlan_tag_present(skb))
 		ethproto = htons(ETH_P_8021Q);
@@ -138,38 +136,36 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb,
 		ethproto = h->h_proto;
 
 	if (e->bitmask & EBT_802_3) {
-		if (FWINV2(eth_proto_is_802_3(ethproto), EBT_IPROTO))
+		if (NF_INVF(e, EBT_IPROTO, eth_proto_is_802_3(ethproto)))
 			return 1;
 	} else if (!(e->bitmask & EBT_NOPROTO) &&
-	   FWINV2(e->ethproto != ethproto, EBT_IPROTO))
+		   NF_INVF(e, EBT_IPROTO, e->ethproto != ethproto))
 		return 1;
 
-	if (FWINV2(ebt_dev_check(e->in, in), EBT_IIN))
+	if (NF_INVF(e, EBT_IIN, ebt_dev_check(e->in, in)))
 		return 1;
-	if (FWINV2(ebt_dev_check(e->out, out), EBT_IOUT))
+	if (NF_INVF(e, EBT_IOUT, ebt_dev_check(e->out, out)))
 		return 1;
 	/* rcu_read_lock()ed by nf_hook_slow */
 	if (in && (p = br_port_get_rcu(in)) != NULL &&
-	    FWINV2(ebt_dev_check(e->logical_in, p->br->dev), EBT_ILOGICALIN))
+	    NF_INVF(e, EBT_ILOGICALIN,
+		    ebt_dev_check(e->logical_in, p->br->dev)))
 		return 1;
 	if (out && (p = br_port_get_rcu(out)) != NULL &&
-	    FWINV2(ebt_dev_check(e->logical_out, p->br->dev), EBT_ILOGICALOUT))
+	    NF_INVF(e, EBT_ILOGICALOUT,
+		    ebt_dev_check(e->logical_out, p->br->dev)))
 		return 1;
 
 	if (e->bitmask & EBT_SOURCEMAC) {
-		verdict = 0;
-		for (i = 0; i < 6; i++)
-			verdict |= (h->h_source[i] ^ e->sourcemac[i]) &
-			   e->sourcemsk[i];
-		if (FWINV2(verdict != 0, EBT_ISOURCE))
+		if (NF_INVF(e, EBT_ISOURCE,
+			    !ether_addr_equal_masked(h->h_source, e->sourcemac,
+						     e->sourcemsk)))
 			return 1;
 	}
 	if (e->bitmask & EBT_DESTMAC) {
-		verdict = 0;
-		for (i = 0; i < 6; i++)
-			verdict |= (h->h_dest[i] ^ e->destmac[i]) &
-			   e->destmsk[i];
-		if (FWINV2(verdict != 0, EBT_IDEST))
+		if (NF_INVF(e, EBT_IDEST,
+			    !ether_addr_equal_masked(h->h_dest, e->destmac,
+						     e->destmsk)))
 			return 1;
 	}
 	return 0;
diff --git a/net/core/dev.c b/net/core/dev.c
index aba10d2a8bc3..7894e406c806 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4972,7 +4972,7 @@ bool sk_busy_loop(struct sock *sk, int nonblock)
 
 			if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
 				rc = napi->poll(napi, BUSY_POLL_BUDGET);
-				trace_napi_poll(napi);
+				trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
 				if (rc == BUSY_POLL_BUDGET) {
 					napi_complete_done(napi, rc);
 					napi_schedule(napi);
@@ -5128,7 +5128,7 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
 	work = 0;
 	if (test_bit(NAPI_STATE_SCHED, &n->state)) {
 		work = n->poll(n, weight);
-		trace_napi_poll(n);
+		trace_napi_poll(n, work, weight);
 	}
 
 	WARN_ON_ONCE(work > weight);
@@ -5445,6 +5445,52 @@ void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter)
 EXPORT_SYMBOL(netdev_lower_get_next);
 
 /**
+ * netdev_all_lower_get_next - Get the next device from all lower neighbour list
+ * @dev: device
+ * @iter: list_head ** of the current position
+ *
+ * Gets the next netdev_adjacent from the dev's all lower neighbour
+ * list, starting from iter position. The caller must hold RTNL lock or
+ * its own locking that guarantees that the neighbour all lower
+ * list will remain unchanged.
+ */
+struct net_device *netdev_all_lower_get_next(struct net_device *dev, struct list_head **iter)
+{
+	struct netdev_adjacent *lower;
+
+	lower = list_entry(*iter, struct netdev_adjacent, list);
+
+	if (&lower->list == &dev->all_adj_list.lower)
+		return NULL;
+
+	*iter = lower->list.next;
+
+	return lower->dev;
+}
+EXPORT_SYMBOL(netdev_all_lower_get_next);
+
+/**
+ * netdev_all_lower_get_next_rcu - Get the next device from all
+ *				   lower neighbour list, RCU variant
+ * @dev: device
+ * @iter: list_head ** of the current position
+ *
+ * Gets the next netdev_adjacent from the dev's all lower neighbour
+ * list, starting from iter position. The caller must hold RCU read lock.
+ */
+struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev,
+						 struct list_head **iter)
+{
+	struct netdev_adjacent *lower;
+
+	lower = list_first_or_null_rcu(&dev->all_adj_list.lower,
+				       struct netdev_adjacent, list);
+
+	return lower ? lower->dev : NULL;
+}
+EXPORT_SYMBOL(netdev_all_lower_get_next_rcu);
+
+/**
  * netdev_lower_get_first_private_rcu - Get the first ->private from the
  *				       lower neighbour list, RCU
  *				       variant
@@ -6041,6 +6087,50 @@ void netdev_lower_state_changed(struct net_device *lower_dev,
 }
 EXPORT_SYMBOL(netdev_lower_state_changed);
 
+int netdev_default_l2upper_neigh_construct(struct net_device *dev,
+					   struct neighbour *n)
+{
+	struct net_device *lower_dev, *stop_dev;
+	struct list_head *iter;
+	int err;
+
+	netdev_for_each_lower_dev(dev, lower_dev, iter) {
+		if (!lower_dev->netdev_ops->ndo_neigh_construct)
+			continue;
+		err = lower_dev->netdev_ops->ndo_neigh_construct(lower_dev, n);
+		if (err) {
+			stop_dev = lower_dev;
+			goto rollback;
+		}
+	}
+	return 0;
+
+rollback:
+	netdev_for_each_lower_dev(dev, lower_dev, iter) {
+		if (lower_dev == stop_dev)
+			break;
+		if (!lower_dev->netdev_ops->ndo_neigh_destroy)
+			continue;
+		lower_dev->netdev_ops->ndo_neigh_destroy(lower_dev, n);
+	}
+	return err;
+}
+EXPORT_SYMBOL_GPL(netdev_default_l2upper_neigh_construct);
+
+void netdev_default_l2upper_neigh_destroy(struct net_device *dev,
+					  struct neighbour *n)
+{
+	struct net_device *lower_dev;
+	struct list_head *iter;
+
+	netdev_for_each_lower_dev(dev, lower_dev, iter) {
+		if (!lower_dev->netdev_ops->ndo_neigh_destroy)
+			continue;
+		lower_dev->netdev_ops->ndo_neigh_destroy(lower_dev, n);
+	}
+}
+EXPORT_SYMBOL_GPL(netdev_default_l2upper_neigh_destroy);
+
 static void dev_change_rx_flags(struct net_device *dev, int flags)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 933e8d4d3968..1b5063088f1a 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -26,6 +26,10 @@
 #include <net/net_namespace.h>
 #include <net/sock.h>
 #include <net/devlink.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/devlink.h>
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg);
 
 static LIST_HEAD(devlink_list);
 
@@ -1394,6 +1398,78 @@ static int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb,
 	return -EOPNOTSUPP;
 }
 
+static int devlink_eswitch_fill(struct sk_buff *msg, struct devlink *devlink,
+				enum devlink_command cmd, u32 portid,
+				u32 seq, int flags, u16 mode)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (devlink_nl_put_handle(msg, devlink))
+		goto nla_put_failure;
+
+	if (nla_put_u16(msg, DEVLINK_ATTR_ESWITCH_MODE, mode))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+static int devlink_nl_cmd_eswitch_mode_get_doit(struct sk_buff *skb,
+						struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	const struct devlink_ops *ops = devlink->ops;
+	struct sk_buff *msg;
+	u16 mode;
+	int err;
+
+	if (!ops || !ops->eswitch_mode_get)
+		return -EOPNOTSUPP;
+
+	err = ops->eswitch_mode_get(devlink, &mode);
+	if (err)
+		return err;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	err = devlink_eswitch_fill(msg, devlink, DEVLINK_CMD_ESWITCH_MODE_GET,
+				   info->snd_portid, info->snd_seq, 0, mode);
+
+	if (err) {
+		nlmsg_free(msg);
+		return err;
+	}
+
+	return genlmsg_reply(msg, info);
+}
+
+static int devlink_nl_cmd_eswitch_mode_set_doit(struct sk_buff *skb,
+						struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	const struct devlink_ops *ops = devlink->ops;
+	u16 mode;
+
+	if (!info->attrs[DEVLINK_ATTR_ESWITCH_MODE])
+		return -EINVAL;
+
+	mode = nla_get_u16(info->attrs[DEVLINK_ATTR_ESWITCH_MODE]);
+
+	if (ops && ops->eswitch_mode_set)
+		return ops->eswitch_mode_set(devlink, mode);
+	return -EOPNOTSUPP;
+}
+
 static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
@@ -1407,6 +1483,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE] = { .type = NLA_U8 },
 	[DEVLINK_ATTR_SB_THRESHOLD] = { .type = NLA_U32 },
 	[DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16 },
+	[DEVLINK_ATTR_ESWITCH_MODE] = { .type = NLA_U16 },
 };
 
 static const struct genl_ops devlink_nl_ops[] = {
@@ -1525,6 +1602,20 @@ static const struct genl_ops devlink_nl_ops[] = {
 				  DEVLINK_NL_FLAG_NEED_SB |
 				  DEVLINK_NL_FLAG_LOCK_PORTS,
 	},
+	{
+		.cmd = DEVLINK_CMD_ESWITCH_MODE_GET,
+		.doit = devlink_nl_cmd_eswitch_mode_get_doit,
+		.policy = devlink_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+	},
+	{
+		.cmd = DEVLINK_CMD_ESWITCH_MODE_SET,
+		.doit = devlink_nl_cmd_eswitch_mode_set_doit,
+		.policy = devlink_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+	},
 };
 
 /**
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 252e155c837b..d6b3b579560d 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -187,7 +187,8 @@ static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *locatio
 	trace_drop_common(skb, location);
 }
 
-static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi)
+static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi,
+				int work, int budget)
 {
 	struct dm_hw_stat_delta *new_stat;
 
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 98298b11f534..be4629c344a6 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -269,6 +269,49 @@ errout:
 	return err;
 }
 
+static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh,
+		       struct nlattr **tb, struct fib_rule *rule)
+{
+	struct fib_rule *r;
+
+	list_for_each_entry(r, &ops->rules_list, list) {
+		if (r->action != rule->action)
+			continue;
+
+		if (r->table != rule->table)
+			continue;
+
+		if (r->pref != rule->pref)
+			continue;
+
+		if (memcmp(r->iifname, rule->iifname, IFNAMSIZ))
+			continue;
+
+		if (memcmp(r->oifname, rule->oifname, IFNAMSIZ))
+			continue;
+
+		if (r->mark != rule->mark)
+			continue;
+
+		if (r->mark_mask != rule->mark_mask)
+			continue;
+
+		if (r->tun_id != rule->tun_id)
+			continue;
+
+		if (r->fr_net != rule->fr_net)
+			continue;
+
+		if (r->l3mdev != rule->l3mdev)
+			continue;
+
+		if (!ops->compare(r, frh, tb))
+			continue;
+		return 1;
+	}
+	return 0;
+}
+
 int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	struct net *net = sock_net(skb->sk);
@@ -386,6 +429,12 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh)
 	if (rule->l3mdev && rule->table)
 		goto errout_free;
 
+	if ((nlh->nlmsg_flags & NLM_F_EXCL) &&
+	    rule_exists(ops, frh, tb, rule)) {
+		err = -EEXIST;
+		goto errout_free;
+	}
+
 	err = ops->configure(rule, skb, frh, tb);
 	if (err < 0)
 		goto errout_free;
diff --git a/net/core/filter.c b/net/core/filter.c
index df6860c85d72..10c4a2f9e8bb 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -150,6 +150,12 @@ static u64 __get_raw_cpu_id(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
 	return raw_smp_processor_id();
 }
 
+static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = {
+	.func		= __get_raw_cpu_id,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+};
+
 static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
 			      struct bpf_insn *insn_buf)
 {
@@ -1295,21 +1301,10 @@ int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 
 static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk)
 {
-	struct bpf_prog *prog;
-
 	if (sock_flag(sk, SOCK_FILTER_LOCKED))
 		return ERR_PTR(-EPERM);
 
-	prog = bpf_prog_get(ufd);
-	if (IS_ERR(prog))
-		return prog;
-
-	if (prog->type != BPF_PROG_TYPE_SOCKET_FILTER) {
-		bpf_prog_put(prog);
-		return ERR_PTR(-EINVAL);
-	}
-
-	return prog;
+	return bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER);
 }
 
 int sk_attach_bpf(u32 ufd, struct sock *sk)
@@ -1734,6 +1729,23 @@ static const struct bpf_func_proto bpf_get_route_realm_proto = {
 	.arg1_type      = ARG_PTR_TO_CTX,
 };
 
+static u64 bpf_get_hash_recalc(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+	/* If skb_clear_hash() was called due to mangling, we can
+	 * trigger SW recalculation here. Later access to hash
+	 * can then use the inline skb->hash via context directly
+	 * instead of calling this helper again.
+	 */
+	return skb_get_hash((struct sk_buff *) (unsigned long) r1);
+}
+
+static const struct bpf_func_proto bpf_get_hash_recalc_proto = {
+	.func		= bpf_get_hash_recalc,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+};
+
 static u64 bpf_skb_vlan_push(u64 r1, u64 r2, u64 vlan_tci, u64 r4, u64 r5)
 {
 	struct sk_buff *skb = (struct sk_buff *) (long) r1;
@@ -1777,6 +1789,224 @@ const struct bpf_func_proto bpf_skb_vlan_pop_proto = {
 };
 EXPORT_SYMBOL_GPL(bpf_skb_vlan_pop_proto);
 
+static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len)
+{
+	/* Caller already did skb_cow() with len as headroom,
+	 * so no need to do it here.
+	 */
+	skb_push(skb, len);
+	memmove(skb->data, skb->data + len, off);
+	memset(skb->data + off, 0, len);
+
+	/* No skb_postpush_rcsum(skb, skb->data + off, len)
+	 * needed here as it does not change the skb->csum
+	 * result for checksum complete when summing over
+	 * zeroed blocks.
+	 */
+	return 0;
+}
+
+static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len)
+{
+	/* skb_ensure_writable() is not needed here, as we're
+	 * already working on an uncloned skb.
+	 */
+	if (unlikely(!pskb_may_pull(skb, off + len)))
+		return -ENOMEM;
+
+	skb_postpull_rcsum(skb, skb->data + off, len);
+	memmove(skb->data + len, skb->data, off);
+	__skb_pull(skb, len);
+
+	return 0;
+}
+
+static int bpf_skb_net_hdr_push(struct sk_buff *skb, u32 off, u32 len)
+{
+	bool trans_same = skb->transport_header == skb->network_header;
+	int ret;
+
+	/* There's no need for __skb_push()/__skb_pull() pair to
+	 * get to the start of the mac header as we're guaranteed
+	 * to always start from here under eBPF.
+	 */
+	ret = bpf_skb_generic_push(skb, off, len);
+	if (likely(!ret)) {
+		skb->mac_header -= len;
+		skb->network_header -= len;
+		if (trans_same)
+			skb->transport_header = skb->network_header;
+	}
+
+	return ret;
+}
+
+static int bpf_skb_net_hdr_pop(struct sk_buff *skb, u32 off, u32 len)
+{
+	bool trans_same = skb->transport_header == skb->network_header;
+	int ret;
+
+	/* Same here, __skb_push()/__skb_pull() pair not needed. */
+	ret = bpf_skb_generic_pop(skb, off, len);
+	if (likely(!ret)) {
+		skb->mac_header += len;
+		skb->network_header += len;
+		if (trans_same)
+			skb->transport_header = skb->network_header;
+	}
+
+	return ret;
+}
+
+static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
+{
+	const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
+	u32 off = skb->network_header - skb->mac_header;
+	int ret;
+
+	ret = skb_cow(skb, len_diff);
+	if (unlikely(ret < 0))
+		return ret;
+
+	ret = bpf_skb_net_hdr_push(skb, off, len_diff);
+	if (unlikely(ret < 0))
+		return ret;
+
+	if (skb_is_gso(skb)) {
+		/* SKB_GSO_UDP stays as is. SKB_GSO_TCPV4 needs to
+		 * be changed into SKB_GSO_TCPV6.
+		 */
+		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) {
+			skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV4;
+			skb_shinfo(skb)->gso_type |=  SKB_GSO_TCPV6;
+		}
+
+		/* Due to IPv6 header, MSS needs to be downgraded. */
+		skb_shinfo(skb)->gso_size -= len_diff;
+		/* Header must be checked, and gso_segs recomputed. */
+		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
+		skb_shinfo(skb)->gso_segs = 0;
+	}
+
+	skb->protocol = htons(ETH_P_IPV6);
+	skb_clear_hash(skb);
+
+	return 0;
+}
+
+static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
+{
+	const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
+	u32 off = skb->network_header - skb->mac_header;
+	int ret;
+
+	ret = skb_unclone(skb, GFP_ATOMIC);
+	if (unlikely(ret < 0))
+		return ret;
+
+	ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
+	if (unlikely(ret < 0))
+		return ret;
+
+	if (skb_is_gso(skb)) {
+		/* SKB_GSO_UDP stays as is. SKB_GSO_TCPV6 needs to
+		 * be changed into SKB_GSO_TCPV4.
+		 */
+		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) {
+			skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV6;
+			skb_shinfo(skb)->gso_type |=  SKB_GSO_TCPV4;
+		}
+
+		/* Due to IPv4 header, MSS can be upgraded. */
+		skb_shinfo(skb)->gso_size += len_diff;
+		/* Header must be checked, and gso_segs recomputed. */
+		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
+		skb_shinfo(skb)->gso_segs = 0;
+	}
+
+	skb->protocol = htons(ETH_P_IP);
+	skb_clear_hash(skb);
+
+	return 0;
+}
+
+static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto)
+{
+	__be16 from_proto = skb->protocol;
+
+	if (from_proto == htons(ETH_P_IP) &&
+	      to_proto == htons(ETH_P_IPV6))
+		return bpf_skb_proto_4_to_6(skb);
+
+	if (from_proto == htons(ETH_P_IPV6) &&
+	      to_proto == htons(ETH_P_IP))
+		return bpf_skb_proto_6_to_4(skb);
+
+	return -ENOTSUPP;
+}
+
+static u64 bpf_skb_change_proto(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5)
+{
+	struct sk_buff *skb = (struct sk_buff *) (long) r1;
+	__be16 proto = (__force __be16) r2;
+	int ret;
+
+	if (unlikely(flags))
+		return -EINVAL;
+
+	/* General idea is that this helper does the basic groundwork
+	 * needed for changing the protocol, and eBPF program fills the
+	 * rest through bpf_skb_store_bytes(), bpf_lX_csum_replace()
+	 * and other helpers, rather than passing a raw buffer here.
+	 *
+	 * The rationale is to keep this minimal and without a need to
+	 * deal with raw packet data. F.e. even if we would pass buffers
+	 * here, the program still needs to call the bpf_lX_csum_replace()
+	 * helpers anyway. Plus, this way we keep also separation of
+	 * concerns, since f.e. bpf_skb_store_bytes() should only take
+	 * care of stores.
+	 *
+	 * Currently, additional options and extension header space are
+	 * not supported, but flags register is reserved so we can adapt
+	 * that. For offloads, we mark packet as dodgy, so that headers
+	 * need to be verified first.
+	 */
+	ret = bpf_skb_proto_xlat(skb, proto);
+	bpf_compute_data_end(skb);
+	return ret;
+}
+
+static const struct bpf_func_proto bpf_skb_change_proto_proto = {
+	.func		= bpf_skb_change_proto,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_ANYTHING,
+};
+
+static u64 bpf_skb_change_type(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+	struct sk_buff *skb = (struct sk_buff *) (long) r1;
+	u32 pkt_type = r2;
+
+	/* We only allow a restricted subset to be changed for now. */
+	if (unlikely(skb->pkt_type > PACKET_OTHERHOST ||
+		     pkt_type > PACKET_OTHERHOST))
+		return -EINVAL;
+
+	skb->pkt_type = pkt_type;
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_skb_change_type_proto = {
+	.func		= bpf_skb_change_type,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+};
+
 bool bpf_helper_changes_skb_data(void *func)
 {
 	if (func == bpf_skb_vlan_push)
@@ -1785,6 +2015,8 @@ bool bpf_helper_changes_skb_data(void *func)
 		return true;
 	if (func == bpf_skb_store_bytes)
 		return true;
+	if (func == bpf_skb_change_proto)
+		return true;
 	if (func == bpf_l3_csum_replace)
 		return true;
 	if (func == bpf_l4_csum_replace)
@@ -2024,6 +2256,40 @@ bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
 	}
 }
 
+#ifdef CONFIG_SOCK_CGROUP_DATA
+static u64 bpf_skb_in_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+	struct sk_buff *skb = (struct sk_buff *)(long)r1;
+	struct bpf_map *map = (struct bpf_map *)(long)r2;
+	struct bpf_array *array = container_of(map, struct bpf_array, map);
+	struct cgroup *cgrp;
+	struct sock *sk;
+	u32 i = (u32)r3;
+
+	sk = skb->sk;
+	if (!sk || !sk_fullsock(sk))
+		return -ENOENT;
+
+	if (unlikely(i >= array->map.max_entries))
+		return -E2BIG;
+
+	cgrp = READ_ONCE(array->ptrs[i]);
+	if (unlikely(!cgrp))
+		return -EAGAIN;
+
+	return cgroup_is_descendant(sock_cgroup_ptr(&sk->sk_cgrp_data), cgrp);
+}
+
+static const struct bpf_func_proto bpf_skb_in_cgroup_proto = {
+	.func		= bpf_skb_in_cgroup,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_CONST_MAP_PTR,
+	.arg3_type	= ARG_ANYTHING,
+};
+#endif
+
 static const struct bpf_func_proto *
 sk_filter_func_proto(enum bpf_func_id func_id)
 {
@@ -2037,7 +2303,7 @@ sk_filter_func_proto(enum bpf_func_id func_id)
 	case BPF_FUNC_get_prandom_u32:
 		return &bpf_get_prandom_u32_proto;
 	case BPF_FUNC_get_smp_processor_id:
-		return &bpf_get_smp_processor_id_proto;
+		return &bpf_get_raw_smp_processor_id_proto;
 	case BPF_FUNC_tail_call:
 		return &bpf_tail_call_proto;
 	case BPF_FUNC_ktime_get_ns:
@@ -2072,6 +2338,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
 		return &bpf_skb_vlan_push_proto;
 	case BPF_FUNC_skb_vlan_pop:
 		return &bpf_skb_vlan_pop_proto;
+	case BPF_FUNC_skb_change_proto:
+		return &bpf_skb_change_proto_proto;
+	case BPF_FUNC_skb_change_type:
+		return &bpf_skb_change_type_proto;
 	case BPF_FUNC_skb_get_tunnel_key:
 		return &bpf_skb_get_tunnel_key_proto;
 	case BPF_FUNC_skb_set_tunnel_key:
@@ -2084,8 +2354,16 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
 		return &bpf_redirect_proto;
 	case BPF_FUNC_get_route_realm:
 		return &bpf_get_route_realm_proto;
+	case BPF_FUNC_get_hash_recalc:
+		return &bpf_get_hash_recalc_proto;
 	case BPF_FUNC_perf_event_output:
 		return bpf_get_event_output_proto();
+	case BPF_FUNC_get_smp_processor_id:
+		return &bpf_get_smp_processor_id_proto;
+#ifdef CONFIG_SOCK_CGROUP_DATA
+	case BPF_FUNC_skb_in_cgroup:
+		return &bpf_skb_in_cgroup_proto;
+#endif
 	default:
 		return sk_filter_func_proto(func_id);
 	}
@@ -2105,7 +2383,8 @@ static bool __is_valid_access(int off, int size, enum bpf_access_type type)
 }
 
 static bool sk_filter_is_valid_access(int off, int size,
-				      enum bpf_access_type type)
+				      enum bpf_access_type type,
+				      enum bpf_reg_type *reg_type)
 {
 	switch (off) {
 	case offsetof(struct __sk_buff, tc_classid):
@@ -2128,7 +2407,8 @@ static bool sk_filter_is_valid_access(int off, int size,
 }
 
 static bool tc_cls_act_is_valid_access(int off, int size,
-				       enum bpf_access_type type)
+				       enum bpf_access_type type,
+				       enum bpf_reg_type *reg_type)
 {
 	if (type == BPF_WRITE) {
 		switch (off) {
@@ -2143,6 +2423,16 @@ static bool tc_cls_act_is_valid_access(int off, int size,
 			return false;
 		}
 	}
+
+	switch (off) {
+	case offsetof(struct __sk_buff, data):
+		*reg_type = PTR_TO_PACKET;
+		break;
+	case offsetof(struct __sk_buff, data_end):
+		*reg_type = PTR_TO_PACKET_END;
+		break;
+	}
+
 	return __is_valid_access(off, size, type);
 }
 
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index a669dea146c6..61ad43f61c5e 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -651,6 +651,23 @@ void make_flow_keys_digest(struct flow_keys_digest *digest,
 }
 EXPORT_SYMBOL(make_flow_keys_digest);
 
+static struct flow_dissector flow_keys_dissector_symmetric __read_mostly;
+
+u32 __skb_get_hash_symmetric(struct sk_buff *skb)
+{
+	struct flow_keys keys;
+
+	__flow_hash_secret_init();
+
+	memset(&keys, 0, sizeof(keys));
+	__skb_flow_dissect(skb, &flow_keys_dissector_symmetric, &keys,
+			   NULL, 0, 0, 0,
+			   FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
+
+	return __flow_hash_from_keys(&keys, hashrnd);
+}
+EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric);
+
 /**
  * __skb_get_hash: calculate a flow hash
  * @skb: sk_buff to calculate flow hash from
@@ -868,6 +885,29 @@ static const struct flow_dissector_key flow_keys_dissector_keys[] = {
 	},
 };
 
+static const struct flow_dissector_key flow_keys_dissector_symmetric_keys[] = {
+	{
+		.key_id = FLOW_DISSECTOR_KEY_CONTROL,
+		.offset = offsetof(struct flow_keys, control),
+	},
+	{
+		.key_id = FLOW_DISSECTOR_KEY_BASIC,
+		.offset = offsetof(struct flow_keys, basic),
+	},
+	{
+		.key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+		.offset = offsetof(struct flow_keys, addrs.v4addrs),
+	},
+	{
+		.key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+		.offset = offsetof(struct flow_keys, addrs.v6addrs),
+	},
+	{
+		.key_id = FLOW_DISSECTOR_KEY_PORTS,
+		.offset = offsetof(struct flow_keys, ports),
+	},
+};
+
 static const struct flow_dissector_key flow_keys_buf_dissector_keys[] = {
 	{
 		.key_id = FLOW_DISSECTOR_KEY_CONTROL,
@@ -889,6 +929,9 @@ static int __init init_default_flow_dissectors(void)
 	skb_flow_dissector_init(&flow_keys_dissector,
 				flow_keys_dissector_keys,
 				ARRAY_SIZE(flow_keys_dissector_keys));
+	skb_flow_dissector_init(&flow_keys_dissector_symmetric,
+				flow_keys_dissector_symmetric_keys,
+				ARRAY_SIZE(flow_keys_dissector_symmetric_keys));
 	skb_flow_dissector_init(&flow_keys_buf_dissector,
 				flow_keys_buf_dissector_keys,
 				ARRAY_SIZE(flow_keys_buf_dissector_keys));
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 29dd8cc22bbf..5cdc62a8eb84 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -473,7 +473,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
 	}
 
 	if (dev->netdev_ops->ndo_neigh_construct) {
-		error = dev->netdev_ops->ndo_neigh_construct(n);
+		error = dev->netdev_ops->ndo_neigh_construct(dev, n);
 		if (error < 0) {
 			rc = ERR_PTR(error);
 			goto out_neigh_release;
@@ -701,7 +701,7 @@ void neigh_destroy(struct neighbour *neigh)
 	neigh->arp_queue_len_bytes = 0;
 
 	if (dev->netdev_ops->ndo_neigh_destroy)
-		dev->netdev_ops->ndo_neigh_destroy(neigh);
+		dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
 
 	dev_put(dev);
 	neigh_parms_put(neigh->parms);
@@ -2047,6 +2047,7 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
 			case NDTPA_DELAY_PROBE_TIME:
 				NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
 					      nla_get_msecs(tbp[i]));
+				call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
 				break;
 			case NDTPA_RETRANS_TIME:
 				NEIGH_VAR_SET(p, RETRANS_TIME,
@@ -2469,13 +2470,17 @@ int neigh_xmit(int index, struct net_device *dev,
 		tbl = neigh_tables[index];
 		if (!tbl)
 			goto out;
+		rcu_read_lock_bh();
 		neigh = __neigh_lookup_noref(tbl, addr, dev);
 		if (!neigh)
 			neigh = __neigh_create(tbl, addr, dev, false);
 		err = PTR_ERR(neigh);
-		if (IS_ERR(neigh))
+		if (IS_ERR(neigh)) {
+			rcu_read_unlock_bh();
 			goto out_kfree_skb;
+		}
 		err = neigh->output(neigh, skb);
+		rcu_read_unlock_bh();
 	}
 	else if (index == NEIGH_LINK_TABLE) {
 		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
@@ -2926,6 +2931,7 @@ static void neigh_proc_update(struct ctl_table *ctl, int write)
 		return;
 
 	set_bit(index, p->data_state);
+	call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
 	if (!dev) /* NULL dev means this is default value */
 		neigh_copy_dflt_parms(net, p, index);
 }
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 7a0b616557ab..6e4f34721080 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -322,7 +322,20 @@ NETDEVICE_SHOW_RW(flags, fmt_hex);
 
 static int change_tx_queue_len(struct net_device *dev, unsigned long new_len)
 {
-	dev->tx_queue_len = new_len;
+	int res, orig_len = dev->tx_queue_len;
+
+	if (new_len != orig_len) {
+		dev->tx_queue_len = new_len;
+		res = call_netdevice_notifiers(NETDEV_CHANGE_TX_QUEUE_LEN, dev);
+		res = notifier_to_errno(res);
+		if (res) {
+			netdev_err(dev,
+				   "refused to change device tx_queue_len\n");
+			dev->tx_queue_len = orig_len;
+			return -EFAULT;
+		}
+	}
+
 	return 0;
 }
 
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 94acfc89ad97..53599bd0c82d 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -163,7 +163,7 @@ static void poll_one_napi(struct napi_struct *napi)
 	 */
 	work = napi->poll(napi, 0);
 	WARN_ONCE(work, "%pF exceeded budget in poll\n", napi->poll);
-	trace_napi_poll(napi);
+	trace_napi_poll(napi, work, 0);
 
 	clear_bit(NAPI_STATE_NPSVC, &napi->state);
 }
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index f74ab9c3b38f..bbd118b19aef 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -213,6 +213,7 @@
 /* Xmit modes */
 #define M_START_XMIT		0	/* Default normal TX */
 #define M_NETIF_RECEIVE 	1	/* Inject packets into stack */
+#define M_QUEUE_XMIT		2	/* Inject packet into qdisc */
 
 /* If lock -- protects updating of if_list */
 #define   if_lock(t)           spin_lock(&(t->if_lock));
@@ -626,6 +627,8 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
 
 	if (pkt_dev->xmit_mode == M_NETIF_RECEIVE)
 		seq_puts(seq, "     xmit_mode: netif_receive\n");
+	else if (pkt_dev->xmit_mode == M_QUEUE_XMIT)
+		seq_puts(seq, "     xmit_mode: xmit_queue\n");
 
 	seq_puts(seq, "     Flags: ");
 
@@ -1142,8 +1145,10 @@ static ssize_t pktgen_if_write(struct file *file,
 			return len;
 
 		i += len;
-		if ((value > 1) && (pkt_dev->xmit_mode == M_START_XMIT) &&
-		    (!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING)))
+		if ((value > 1) &&
+		    ((pkt_dev->xmit_mode == M_QUEUE_XMIT) ||
+		     ((pkt_dev->xmit_mode == M_START_XMIT) &&
+		     (!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING)))))
 			return -ENOTSUPP;
 		pkt_dev->burst = value < 1 ? 1 : value;
 		sprintf(pg_result, "OK: burst=%d", pkt_dev->burst);
@@ -1198,6 +1203,9 @@ static ssize_t pktgen_if_write(struct file *file,
 			 * at module loading time
 			 */
 			pkt_dev->clone_skb = 0;
+		} else if (strcmp(f, "queue_xmit") == 0) {
+			pkt_dev->xmit_mode = M_QUEUE_XMIT;
+			pkt_dev->last_ok = 1;
 		} else {
 			sprintf(pg_result,
 				"xmit_mode -:%s:- unknown\nAvailable modes: %s",
@@ -3434,6 +3442,36 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
 #endif
 		} while (--burst > 0);
 		goto out; /* Skips xmit_mode M_START_XMIT */
+	} else if (pkt_dev->xmit_mode == M_QUEUE_XMIT) {
+		local_bh_disable();
+		atomic_inc(&pkt_dev->skb->users);
+
+		ret = dev_queue_xmit(pkt_dev->skb);
+		switch (ret) {
+		case NET_XMIT_SUCCESS:
+			pkt_dev->sofar++;
+			pkt_dev->seq_num++;
+			pkt_dev->tx_bytes += pkt_dev->last_pkt_size;
+			break;
+		case NET_XMIT_DROP:
+		case NET_XMIT_CN:
+		/* These are all valid return codes for a qdisc but
+		 * indicate packets are being dropped or will likely
+		 * be dropped soon.
+		 */
+		case NETDEV_TX_BUSY:
+		/* qdisc may call dev_hard_start_xmit directly in cases
+		 * where no queues exist e.g. loopback device, virtual
+		 * devices, etc. In this case we need to handle
+		 * NETDEV_TX_ codes.
+		 */
+		default:
+			pkt_dev->errors++;
+			net_info_ratelimited("%s xmit error: %d\n",
+					     pkt_dev->odevname, ret);
+			break;
+		}
+		goto out;
 	}
 
 	txq = skb_get_tx_queue(odev, pkt_dev->skb);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index eb49ca24274a..a9e3805af739 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1927,11 +1927,19 @@ static int do_setlink(const struct sk_buff *skb,
 
 	if (tb[IFLA_TXQLEN]) {
 		unsigned long value = nla_get_u32(tb[IFLA_TXQLEN]);
-
-		if (dev->tx_queue_len ^ value)
+		unsigned long orig_len = dev->tx_queue_len;
+
+		if (dev->tx_queue_len ^ value) {
+			dev->tx_queue_len = value;
+			err = call_netdevice_notifiers(
+			      NETDEV_CHANGE_TX_QUEUE_LEN, dev);
+			err = notifier_to_errno(err);
+			if (err) {
+				dev->tx_queue_len = orig_len;
+				goto errout;
+			}
 			status |= DO_SETLINK_NOTIFY;
-
-		dev->tx_queue_len = value;
+		}
 	}
 
 	if (tb[IFLA_OPERSTATE])
@@ -3519,7 +3527,32 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
 			if (!attr)
 				goto nla_put_failure;
 
-			err = ops->fill_linkxstats(skb, dev, prividx);
+			err = ops->fill_linkxstats(skb, dev, prividx, *idxattr);
+			nla_nest_end(skb, attr);
+			if (err)
+				goto nla_put_failure;
+			*idxattr = 0;
+		}
+	}
+
+	if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_XSTATS_SLAVE,
+			     *idxattr)) {
+		const struct rtnl_link_ops *ops = NULL;
+		const struct net_device *master;
+
+		master = netdev_master_upper_dev_get(dev);
+		if (master)
+			ops = master->rtnl_link_ops;
+		if (ops && ops->fill_linkxstats) {
+			int err;
+
+			*idxattr = IFLA_STATS_LINK_XSTATS_SLAVE;
+			attr = nla_nest_start(skb,
+					      IFLA_STATS_LINK_XSTATS_SLAVE);
+			if (!attr)
+				goto nla_put_failure;
+
+			err = ops->fill_linkxstats(skb, dev, prividx, *idxattr);
 			nla_nest_end(skb, attr);
 			if (err)
 				goto nla_put_failure;
@@ -3555,14 +3588,35 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev,
 
 	if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_XSTATS, 0)) {
 		const struct rtnl_link_ops *ops = dev->rtnl_link_ops;
+		int attr = IFLA_STATS_LINK_XSTATS;
 
 		if (ops && ops->get_linkxstats_size) {
-			size += nla_total_size(ops->get_linkxstats_size(dev));
+			size += nla_total_size(ops->get_linkxstats_size(dev,
+									attr));
 			/* for IFLA_STATS_LINK_XSTATS */
 			size += nla_total_size(0);
 		}
 	}
 
+	if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_XSTATS_SLAVE, 0)) {
+		struct net_device *_dev = (struct net_device *)dev;
+		const struct rtnl_link_ops *ops = NULL;
+		const struct net_device *master;
+
+		/* netdev_master_upper_dev_get can't take const */
+		master = netdev_master_upper_dev_get(_dev);
+		if (master)
+			ops = master->rtnl_link_ops;
+		if (ops && ops->get_linkxstats_size) {
+			int attr = IFLA_STATS_LINK_XSTATS_SLAVE;
+
+			size += nla_total_size(ops->get_linkxstats_size(dev,
+									attr));
+			/* for IFLA_STATS_LINK_XSTATS_SLAVE */
+			size += nla_total_size(0);
+		}
+	}
+
 	return size;
 }
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index e7ec6d3ad5f0..3864b4b68fa1 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3017,24 +3017,6 @@ int skb_append_pagefrags(struct sk_buff *skb, struct page *page,
 EXPORT_SYMBOL_GPL(skb_append_pagefrags);
 
 /**
- *	skb_push_rcsum - push skb and update receive checksum
- *	@skb: buffer to update
- *	@len: length of data pulled
- *
- *	This function performs an skb_push on the packet and updates
- *	the CHECKSUM_COMPLETE checksum.  It should be used on
- *	receive path processing instead of skb_push unless you know
- *	that the checksum difference is zero (e.g., a valid IP header)
- *	or you are setting ip_summed to CHECKSUM_NONE.
- */
-static unsigned char *skb_push_rcsum(struct sk_buff *skb, unsigned len)
-{
-	skb_push(skb, len);
-	skb_postpush_rcsum(skb, skb->data, len);
-	return skb->data;
-}
-
-/**
  *	skb_pull_rcsum - pull skb and update receive checksum
  *	@skb: buffer to update
  *	@len: length of data pulled
diff --git a/net/core/utils.c b/net/core/utils.c
index 3d17ca8b4744..cf5622b9ccc4 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -133,7 +133,7 @@ int in4_pton(const char *src, int srclen,
 	s = src;
 	d = dbuf;
 	i = 0;
-	while(1) {
+	while (1) {
 		int c;
 		c = xdigit2bin(srclen > 0 ? *s : '\0', delim);
 		if (!(c & (IN6PTON_DIGIT | IN6PTON_DOT | IN6PTON_DELIM | IN6PTON_COLON_MASK))) {
@@ -283,11 +283,11 @@ cont:
 	i = 15; d--;
 
 	if (dc) {
-		while(d >= dc)
+		while (d >= dc)
 			dst[i--] = *d--;
-		while(i >= dc - dbuf)
+		while (i >= dc - dbuf)
 			dst[i--] = 0;
-		while(i >= 0)
+		while (i >= 0)
 			dst[i--] = *d--;
 	} else
 		memcpy(dst, dbuf, sizeof(dbuf));
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index df4803437888..a796fc7cbc35 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -41,6 +41,7 @@
 #include <net/dn_fib.h>
 #include <net/dn_neigh.h>
 #include <net/dn_dev.h>
+#include <net/nexthop.h>
 
 #define RT_MIN_TABLE 1
 
@@ -150,14 +151,13 @@ static int dn_fib_count_nhs(const struct nlattr *attr)
 	struct rtnexthop *nhp = nla_data(attr);
 	int nhs = 0, nhlen = nla_len(attr);
 
-	while(nhlen >= (int)sizeof(struct rtnexthop)) {
-		if ((nhlen -= nhp->rtnh_len) < 0)
-			return 0;
+	while (rtnh_ok(nhp, nhlen)) {
 		nhs++;
-		nhp = RTNH_NEXT(nhp);
+		nhp = rtnh_next(nhp, &nhlen);
 	}
 
-	return nhs;
+	/* leftover implies invalid nexthop configuration, discard it */
+	return nhlen > 0 ? 0 : nhs;
 }
 
 static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct nlattr *attr,
@@ -167,21 +167,24 @@ static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct nlattr *attr,
 	int nhlen = nla_len(attr);
 
 	change_nexthops(fi) {
-		int attrlen = nhlen - sizeof(struct rtnexthop);
-		if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
+		int attrlen;
+
+		if (!rtnh_ok(nhp, nhlen))
 			return -EINVAL;
 
 		nh->nh_flags  = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
 		nh->nh_oif    = nhp->rtnh_ifindex;
 		nh->nh_weight = nhp->rtnh_hops + 1;
 
-		if (attrlen) {
+		attrlen = rtnh_attrlen(nhp);
+		if (attrlen > 0) {
 			struct nlattr *gw_attr;
 
 			gw_attr = nla_find((struct nlattr *) (nhp + 1), attrlen, RTA_GATEWAY);
 			nh->nh_gw = gw_attr ? nla_get_le16(gw_attr) : 0;
 		}
-		nhp = RTNH_NEXT(nhp);
+
+		nhp = rtnh_next(nhp, &nhlen);
 	} endfor_nexthops(fi);
 
 	return 0;
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 766d2a525ada..7e68bc6bc853 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -774,11 +774,17 @@ static int dsa_of_probe(struct device *dev)
 
 	chip_index = -1;
 	for_each_available_child_of_node(np, child) {
+		int i;
+
 		chip_index++;
 		cd = &pd->chip[chip_index];
 
 		cd->of_node = child;
 
+		/* Initialize the routing table */
+		for (i = 0; i < DSA_MAX_SWITCHES; ++i)
+			cd->rtable[i] = DSA_RTABLE_NONE;
+
 		/* When assigning the host device, increment its refcount */
 		cd->host_dev = get_device(&mdio_bus->dev);
 
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 83b95fc4cede..f30bad9678f0 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -595,7 +595,7 @@ static int _dsa_register_switch(struct dsa_switch *ds, struct device_node *np)
 	struct device_node *ports = dsa_get_ports(ds, np);
 	struct dsa_switch_tree *dst;
 	u32 tree, index;
-	int err;
+	int i, err;
 
 	err = dsa_parse_member(np, &tree, &index);
 	if (err)
@@ -622,6 +622,11 @@ static int _dsa_register_switch(struct dsa_switch *ds, struct device_node *np)
 
 	ds->dst = dst;
 	ds->index = index;
+
+	/* Initialize the routing table */
+	for (i = 0; i < DSA_MAX_SWITCHES; ++i)
+		ds->rtable[i] = DSA_RTABLE_NONE;
+
 	dsa_dst_add_ds(dst, ds, index);
 
 	err = dsa_dst_complete(dst);
@@ -672,7 +677,7 @@ int dsa_register_switch(struct dsa_switch *ds, struct device_node *np)
 }
 EXPORT_SYMBOL_GPL(dsa_register_switch);
 
-void _dsa_unregister_switch(struct dsa_switch *ds)
+static void _dsa_unregister_switch(struct dsa_switch *ds)
 {
 	struct dsa_switch_tree *dst = ds->dst;
 
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index 8c004a0c8d64..d7efbf0dad20 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -81,7 +81,7 @@ static int lowpan_stop(struct net_device *dev)
 	return 0;
 }
 
-static int lowpan_neigh_construct(struct neighbour *n)
+static int lowpan_neigh_construct(struct net_device *dev, struct neighbour *n)
 {
 	struct lowpan_802154_neigh *neigh = lowpan_802154_neigh(neighbour_priv(n));
 
@@ -130,8 +130,7 @@ static int lowpan_newlink(struct net *src_net, struct net_device *ldev,
 
 	pr_debug("adding new link\n");
 
-	if (!tb[IFLA_LINK] ||
-	    !net_eq(dev_net(ldev), &init_net))
+	if (!tb[IFLA_LINK])
 		return -EINVAL;
 	/* find and hold wpan device */
 	wdev = dev_get_by_index(dev_net(ldev), nla_get_u32(tb[IFLA_LINK]));
diff --git a/net/ieee802154/6lowpan/rx.c b/net/ieee802154/6lowpan/rx.c
index ef185dd4110d..649e7d45e88f 100644
--- a/net/ieee802154/6lowpan/rx.c
+++ b/net/ieee802154/6lowpan/rx.c
@@ -262,7 +262,7 @@ static inline bool lowpan_rx_h_check(struct sk_buff *skb)
 
 	/* check on ieee802154 conform 6LoWPAN header */
 	if (!ieee802154_is_data(fc) ||
-	    !ieee802154_is_intra_pan(fc))
+	    !ieee802154_skb_is_intra_pan_addressing(fc, skb))
 		return false;
 
 	/* check if we can dereference the dispatch */
diff --git a/net/ieee802154/core.c b/net/ieee802154/core.c
index c35fdfa6d04e..cb7176cd4cd6 100644
--- a/net/ieee802154/core.c
+++ b/net/ieee802154/core.c
@@ -140,6 +140,8 @@ wpan_phy_new(const struct cfg802154_ops *ops, size_t priv_size)
 	rdev->wpan_phy.dev.class = &wpan_phy_class;
 	rdev->wpan_phy.dev.platform_data = rdev;
 
+	wpan_phy_net_set(&rdev->wpan_phy, &init_net);
+
 	init_waitqueue_head(&rdev->dev_wait);
 
 	return &rdev->wpan_phy;
@@ -207,6 +209,49 @@ void wpan_phy_free(struct wpan_phy *phy)
 }
 EXPORT_SYMBOL(wpan_phy_free);
 
+int cfg802154_switch_netns(struct cfg802154_registered_device *rdev,
+			   struct net *net)
+{
+	struct wpan_dev *wpan_dev;
+	int err = 0;
+
+	list_for_each_entry(wpan_dev, &rdev->wpan_dev_list, list) {
+		if (!wpan_dev->netdev)
+			continue;
+		wpan_dev->netdev->features &= ~NETIF_F_NETNS_LOCAL;
+		err = dev_change_net_namespace(wpan_dev->netdev, net, "wpan%d");
+		if (err)
+			break;
+		wpan_dev->netdev->features |= NETIF_F_NETNS_LOCAL;
+	}
+
+	if (err) {
+		/* failed -- clean up to old netns */
+		net = wpan_phy_net(&rdev->wpan_phy);
+
+		list_for_each_entry_continue_reverse(wpan_dev,
+						     &rdev->wpan_dev_list,
+						     list) {
+			if (!wpan_dev->netdev)
+				continue;
+			wpan_dev->netdev->features &= ~NETIF_F_NETNS_LOCAL;
+			err = dev_change_net_namespace(wpan_dev->netdev, net,
+						       "wpan%d");
+			WARN_ON(err);
+			wpan_dev->netdev->features |= NETIF_F_NETNS_LOCAL;
+		}
+
+		return err;
+	}
+
+	wpan_phy_net_set(&rdev->wpan_phy, net);
+
+	err = device_rename(&rdev->wpan_phy.dev, dev_name(&rdev->wpan_phy.dev));
+	WARN_ON(err);
+
+	return 0;
+}
+
 void cfg802154_dev_free(struct cfg802154_registered_device *rdev)
 {
 	kfree(rdev);
@@ -286,14 +331,34 @@ static struct notifier_block cfg802154_netdev_notifier = {
 	.notifier_call = cfg802154_netdev_notifier_call,
 };
 
+static void __net_exit cfg802154_pernet_exit(struct net *net)
+{
+	struct cfg802154_registered_device *rdev;
+
+	rtnl_lock();
+	list_for_each_entry(rdev, &cfg802154_rdev_list, list) {
+		if (net_eq(wpan_phy_net(&rdev->wpan_phy), net))
+			WARN_ON(cfg802154_switch_netns(rdev, &init_net));
+	}
+	rtnl_unlock();
+}
+
+static struct pernet_operations cfg802154_pernet_ops = {
+	.exit = cfg802154_pernet_exit,
+};
+
 static int __init wpan_phy_class_init(void)
 {
 	int rc;
 
-	rc = wpan_phy_sysfs_init();
+	rc = register_pernet_device(&cfg802154_pernet_ops);
 	if (rc)
 		goto err;
 
+	rc = wpan_phy_sysfs_init();
+	if (rc)
+		goto err_sysfs;
+
 	rc = register_netdevice_notifier(&cfg802154_netdev_notifier);
 	if (rc)
 		goto err_nl;
@@ -315,6 +380,8 @@ err_notifier:
 	unregister_netdevice_notifier(&cfg802154_netdev_notifier);
 err_nl:
 	wpan_phy_sysfs_exit();
+err_sysfs:
+	unregister_pernet_device(&cfg802154_pernet_ops);
 err:
 	return rc;
 }
@@ -326,6 +393,7 @@ static void __exit wpan_phy_class_exit(void)
 	ieee802154_nl_exit();
 	unregister_netdevice_notifier(&cfg802154_netdev_notifier);
 	wpan_phy_sysfs_exit();
+	unregister_pernet_device(&cfg802154_pernet_ops);
 }
 module_exit(wpan_phy_class_exit);
 
diff --git a/net/ieee802154/core.h b/net/ieee802154/core.h
index 231fade959f3..81141f58d079 100644
--- a/net/ieee802154/core.h
+++ b/net/ieee802154/core.h
@@ -38,6 +38,8 @@ wpan_phy_to_rdev(struct wpan_phy *wpan_phy)
 extern struct list_head cfg802154_rdev_list;
 extern int cfg802154_rdev_list_generation;
 
+int cfg802154_switch_netns(struct cfg802154_registered_device *rdev,
+			   struct net *net);
 /* free object */
 void cfg802154_dev_free(struct cfg802154_registered_device *rdev);
 struct cfg802154_registered_device *
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index 116187b5c267..d90a4ed5b8a0 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -80,7 +80,8 @@ __cfg802154_wpan_dev_from_attrs(struct net *netns, struct nlattr **attrs)
 	list_for_each_entry(rdev, &cfg802154_rdev_list, list) {
 		struct wpan_dev *wpan_dev;
 
-		/* TODO netns compare */
+		if (wpan_phy_net(&rdev->wpan_phy) != netns)
+			continue;
 
 		if (have_wpan_dev_id && rdev->wpan_phy_idx != wpan_phy_idx)
 			continue;
@@ -175,7 +176,8 @@ __cfg802154_rdev_from_attrs(struct net *netns, struct nlattr **attrs)
 	if (!rdev)
 		return ERR_PTR(-ENODEV);
 
-	/* TODO netns compare */
+	if (netns != wpan_phy_net(&rdev->wpan_phy))
+		return ERR_PTR(-ENODEV);
 
 	return rdev;
 }
@@ -233,6 +235,8 @@ static const struct nla_policy nl802154_policy[NL802154_ATTR_MAX+1] = {
 
 	[NL802154_ATTR_ACKREQ_DEFAULT] = { .type = NLA_U8 },
 
+	[NL802154_ATTR_PID] = { .type = NLA_U32 },
+	[NL802154_ATTR_NETNS_FD] = { .type = NLA_U32 },
 #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
 	[NL802154_ATTR_SEC_ENABLED] = { .type = NLA_U8, },
 	[NL802154_ATTR_SEC_OUT_LEVEL] = { .type = NLA_U32, },
@@ -590,7 +594,6 @@ static int nl802154_dump_wpan_phy_parse(struct sk_buff *skb,
 		struct cfg802154_registered_device *rdev;
 		int ifidx = nla_get_u32(tb[NL802154_ATTR_IFINDEX]);
 
-		/* TODO netns */
 		netdev = __dev_get_by_index(&init_net, ifidx);
 		if (!netdev)
 			return -ENODEV;
@@ -629,7 +632,8 @@ nl802154_dump_wpan_phy(struct sk_buff *skb, struct netlink_callback *cb)
 	}
 
 	list_for_each_entry(rdev, &cfg802154_rdev_list, list) {
-		/* TODO net ns compare */
+		if (!net_eq(wpan_phy_net(&rdev->wpan_phy), sock_net(skb->sk)))
+			continue;
 		if (++idx <= state->start)
 			continue;
 		if (state->filter_wpan_phy != -1 &&
@@ -871,7 +875,8 @@ nl802154_dump_interface(struct sk_buff *skb, struct netlink_callback *cb)
 
 	rtnl_lock();
 	list_for_each_entry(rdev, &cfg802154_rdev_list, list) {
-		/* TODO netns compare */
+		if (!net_eq(wpan_phy_net(&rdev->wpan_phy), sock_net(skb->sk)))
+			continue;
 		if (wp_idx < wp_start) {
 			wp_idx++;
 			continue;
@@ -1271,6 +1276,37 @@ nl802154_set_ackreq_default(struct sk_buff *skb, struct genl_info *info)
 	return rdev_set_ackreq_default(rdev, wpan_dev, ackreq);
 }
 
+static int nl802154_wpan_phy_netns(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg802154_registered_device *rdev = info->user_ptr[0];
+	struct net *net;
+	int err;
+
+	if (info->attrs[NL802154_ATTR_PID]) {
+		u32 pid = nla_get_u32(info->attrs[NL802154_ATTR_PID]);
+
+		net = get_net_ns_by_pid(pid);
+	} else if (info->attrs[NL802154_ATTR_NETNS_FD]) {
+		u32 fd = nla_get_u32(info->attrs[NL802154_ATTR_NETNS_FD]);
+
+		net = get_net_ns_by_fd(fd);
+	} else {
+		return -EINVAL;
+	}
+
+	if (IS_ERR(net))
+		return PTR_ERR(net);
+
+	err = 0;
+
+	/* check if anything to do */
+	if (!net_eq(wpan_phy_net(&rdev->wpan_phy), net))
+		err = cfg802154_switch_netns(rdev, net);
+
+	put_net(net);
+	return err;
+}
+
 #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
 static const struct nla_policy nl802154_dev_addr_policy[NL802154_DEV_ADDR_ATTR_MAX + 1] = {
 	[NL802154_DEV_ADDR_ATTR_PAN_ID] = { .type = NLA_U16 },
@@ -2262,6 +2298,14 @@ static const struct genl_ops nl802154_ops[] = {
 				  NL802154_FLAG_NEED_RTNL,
 	},
 	{
+		.cmd = NL802154_CMD_SET_WPAN_PHY_NETNS,
+		.doit = nl802154_wpan_phy_netns,
+		.policy = nl802154_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL802154_FLAG_NEED_WPAN_PHY |
+				  NL802154_FLAG_NEED_RTNL,
+	},
+	{
 		.cmd = NL802154_CMD_SET_PAN_ID,
 		.doit = nl802154_set_pan_id,
 		.policy = nl802154_policy,
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index d39e9e47a26e..55513e654d79 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -73,7 +73,7 @@
 #include <linux/socket.h>
 #include <linux/in.h>
 #include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/kmod.h>
 #include <linux/sched.h>
 #include <linux/timer.h>
 #include <linux/string.h>
@@ -1916,6 +1916,3 @@ static int __init ipv4_proc_init(void)
 	return 0;
 }
 #endif /* CONFIG_PROC_FS */
-
-MODULE_ALIAS_NETPROTO(PF_INET);
-
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index e333bc86bd39..415e117967c7 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1834,7 +1834,7 @@ void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
 	struct sk_buff *skb;
 	int err = -ENOBUFS;
 
-	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
+	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
 	if (!skb)
 		goto errout;
 
@@ -1846,7 +1846,7 @@ void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
 		kfree_skb(skb);
 		goto errout;
 	}
-	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
+	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
 	return;
 errout:
 	if (err < 0)
@@ -1903,7 +1903,7 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
 	}
 
 	err = -ENOBUFS;
-	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_ATOMIC);
+	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
 	if (!skb)
 		goto errout;
 
@@ -2027,16 +2027,16 @@ static void inet_forward_change(struct net *net)
 
 	for_each_netdev(net, dev) {
 		struct in_device *in_dev;
+
 		if (on)
 			dev_disable_lro(dev);
-		rcu_read_lock();
-		in_dev = __in_dev_get_rcu(dev);
+
+		in_dev = __in_dev_get_rtnl(dev);
 		if (in_dev) {
 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
 			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
 						    dev->ifindex, &in_dev->cnf);
 		}
-		rcu_read_unlock();
 	}
 }
 
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 477937465a20..d95631d09248 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -23,6 +23,11 @@ struct esp_skb_cb {
 	void *tmp;
 };
 
+struct esp_output_extra {
+	__be32 seqhi;
+	u32 esphoff;
+};
+
 #define ESP_SKB_CB(__skb) ((struct esp_skb_cb *)&((__skb)->cb[0]))
 
 static u32 esp4_get_mtu(struct xfrm_state *x, int mtu);
@@ -35,11 +40,11 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu);
  *
  * TODO: Use spare space in skb for this where possible.
  */
-static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqhilen)
+static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int extralen)
 {
 	unsigned int len;
 
-	len = seqhilen;
+	len = extralen;
 
 	len += crypto_aead_ivsize(aead);
 
@@ -57,15 +62,16 @@ static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqhilen)
 	return kmalloc(len, GFP_ATOMIC);
 }
 
-static inline __be32 *esp_tmp_seqhi(void *tmp)
+static inline void *esp_tmp_extra(void *tmp)
 {
-	return PTR_ALIGN((__be32 *)tmp, __alignof__(__be32));
+	return PTR_ALIGN(tmp, __alignof__(struct esp_output_extra));
 }
-static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int seqhilen)
+
+static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int extralen)
 {
 	return crypto_aead_ivsize(aead) ?
-	       PTR_ALIGN((u8 *)tmp + seqhilen,
-			 crypto_aead_alignmask(aead) + 1) : tmp + seqhilen;
+	       PTR_ALIGN((u8 *)tmp + extralen,
+			 crypto_aead_alignmask(aead) + 1) : tmp + extralen;
 }
 
 static inline struct aead_request *esp_tmp_req(struct crypto_aead *aead, u8 *iv)
@@ -99,7 +105,7 @@ static void esp_restore_header(struct sk_buff *skb, unsigned int offset)
 {
 	struct ip_esp_hdr *esph = (void *)(skb->data + offset);
 	void *tmp = ESP_SKB_CB(skb)->tmp;
-	__be32 *seqhi = esp_tmp_seqhi(tmp);
+	__be32 *seqhi = esp_tmp_extra(tmp);
 
 	esph->seq_no = esph->spi;
 	esph->spi = *seqhi;
@@ -107,7 +113,11 @@ static void esp_restore_header(struct sk_buff *skb, unsigned int offset)
 
 static void esp_output_restore_header(struct sk_buff *skb)
 {
-	esp_restore_header(skb, skb_transport_offset(skb) - sizeof(__be32));
+	void *tmp = ESP_SKB_CB(skb)->tmp;
+	struct esp_output_extra *extra = esp_tmp_extra(tmp);
+
+	esp_restore_header(skb, skb_transport_offset(skb) + extra->esphoff -
+				sizeof(__be32));
 }
 
 static void esp_output_done_esn(struct crypto_async_request *base, int err)
@@ -121,6 +131,7 @@ static void esp_output_done_esn(struct crypto_async_request *base, int err)
 static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
 {
 	int err;
+	struct esp_output_extra *extra;
 	struct ip_esp_hdr *esph;
 	struct crypto_aead *aead;
 	struct aead_request *req;
@@ -137,8 +148,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
 	int tfclen;
 	int nfrags;
 	int assoclen;
-	int seqhilen;
-	__be32 *seqhi;
+	int extralen;
 	__be64 seqno;
 
 	/* skb is pure payload to encrypt */
@@ -166,21 +176,21 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
 	nfrags = err;
 
 	assoclen = sizeof(*esph);
-	seqhilen = 0;
+	extralen = 0;
 
 	if (x->props.flags & XFRM_STATE_ESN) {
-		seqhilen += sizeof(__be32);
-		assoclen += seqhilen;
+		extralen += sizeof(*extra);
+		assoclen += sizeof(__be32);
 	}
 
-	tmp = esp_alloc_tmp(aead, nfrags, seqhilen);
+	tmp = esp_alloc_tmp(aead, nfrags, extralen);
 	if (!tmp) {
 		err = -ENOMEM;
 		goto error;
 	}
 
-	seqhi = esp_tmp_seqhi(tmp);
-	iv = esp_tmp_iv(aead, tmp, seqhilen);
+	extra = esp_tmp_extra(tmp);
+	iv = esp_tmp_iv(aead, tmp, extralen);
 	req = esp_tmp_req(aead, iv);
 	sg = esp_req_sg(aead, req);
 
@@ -247,8 +257,10 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
 	 * encryption.
 	 */
 	if ((x->props.flags & XFRM_STATE_ESN)) {
-		esph = (void *)(skb_transport_header(skb) - sizeof(__be32));
-		*seqhi = esph->spi;
+		extra->esphoff = (unsigned char *)esph -
+				 skb_transport_header(skb);
+		esph = (struct ip_esp_hdr *)((unsigned char *)esph - 4);
+		extra->seqhi = esph->spi;
 		esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
 		aead_request_set_callback(req, 0, esp_output_done_esn, skb);
 	}
@@ -445,7 +457,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
 		goto out;
 
 	ESP_SKB_CB(skb)->tmp = tmp;
-	seqhi = esp_tmp_seqhi(tmp);
+	seqhi = esp_tmp_extra(tmp);
 	iv = esp_tmp_iv(aead, tmp, seqhilen);
 	req = esp_tmp_req(aead, iv);
 	sg = esp_req_sg(aead, req);
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index c4c3e439f424..b798862b6be5 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -62,26 +62,26 @@ EXPORT_SYMBOL_GPL(gre_del_protocol);
 
 /* Fills in tpi and returns header length to be pulled. */
 int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
-		     bool *csum_err, __be16 proto)
+		     bool *csum_err, __be16 proto, int nhs)
 {
 	const struct gre_base_hdr *greh;
 	__be32 *options;
 	int hdr_len;
 
-	if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
+	if (unlikely(!pskb_may_pull(skb, nhs + sizeof(struct gre_base_hdr))))
 		return -EINVAL;
 
-	greh = (struct gre_base_hdr *)skb_transport_header(skb);
+	greh = (struct gre_base_hdr *)(skb->data + nhs);
 	if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
 		return -EINVAL;
 
 	tpi->flags = gre_flags_to_tnl_flags(greh->flags);
 	hdr_len = gre_calc_hlen(tpi->flags);
 
-	if (!pskb_may_pull(skb, hdr_len))
+	if (!pskb_may_pull(skb, nhs + hdr_len))
 		return -EINVAL;
 
-	greh = (struct gre_base_hdr *)skb_transport_header(skb);
+	greh = (struct gre_base_hdr *)(skb->data + nhs);
 	tpi->proto = greh->protocol;
 
 	options = (__be32 *)(greh + 1);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 8eec78f53f9e..5b1481be0282 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -49,12 +49,6 @@
 #include <net/gre.h>
 #include <net/dst_metadata.h>
 
-#if IS_ENABLED(CONFIG_IPV6)
-#include <net/ipv6.h>
-#include <net/ip6_fib.h>
-#include <net/ip6_route.h>
-#endif
-
 /*
    Problems & solutions
    --------------------
@@ -226,12 +220,14 @@ static void gre_err(struct sk_buff *skb, u32 info)
 	 * by themselves???
 	 */
 
+	const struct iphdr *iph = (struct iphdr *)skb->data;
 	const int type = icmp_hdr(skb)->type;
 	const int code = icmp_hdr(skb)->code;
 	struct tnl_ptk_info tpi;
 	bool csum_err = false;
 
-	if (gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP)) < 0) {
+	if (gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP),
+			     iph->ihl * 4) < 0) {
 		if (!csum_err)		/* ignore csum errors. */
 			return;
 	}
@@ -347,7 +343,7 @@ static int gre_rcv(struct sk_buff *skb)
 	}
 #endif
 
-	hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP));
+	hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
 	if (hdr_len < 0)
 		goto drop;
 
@@ -1154,6 +1150,7 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
 {
 	struct nlattr *tb[IFLA_MAX + 1];
 	struct net_device *dev;
+	LIST_HEAD(list_kill);
 	struct ip_tunnel *t;
 	int err;
 
@@ -1169,8 +1166,10 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
 	t->collect_md = true;
 
 	err = ipgre_newlink(net, dev, tb, NULL);
-	if (err < 0)
-		goto out;
+	if (err < 0) {
+		free_netdev(dev);
+		return ERR_PTR(err);
+	}
 
 	/* openvswitch users expect packet sizes to be unrestricted,
 	 * so set the largest MTU we can.
@@ -1179,9 +1178,14 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
 	if (err)
 		goto out;
 
+	err = rtnl_configure_link(dev, NULL);
+	if (err < 0)
+		goto out;
+
 	return dev;
 out:
-	free_netdev(dev);
+	ip_tunnel_dellink(dev, &list_kill);
+	unregister_netdevice_many(&list_kill);
 	return ERR_PTR(err);
 }
 EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index cbac493c913a..e23f141c9ba5 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -271,7 +271,7 @@ static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *sk
 		return dst_output(net, sk, skb);
 	}
 #endif
-	mtu = ip_skb_dst_mtu(skb);
+	mtu = ip_skb_dst_mtu(sk, skb);
 	if (skb_is_gso(skb))
 		return ip_finish_output_gso(net, sk, skb, mtu);
 
@@ -541,7 +541,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 
 	iph = ip_hdr(skb);
 
-	mtu = ip_skb_dst_mtu(skb);
+	mtu = ip_skb_dst_mtu(sk, skb);
 	if (IPCB(skb)->frag_max_size && IPCB(skb)->frag_max_size < mtu)
 		mtu = IPCB(skb)->frag_max_size;
 
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 2ed9dd2b5f2f..1d71c40eaaf3 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -127,7 +127,9 @@ __be32 ic_myaddr = NONE;		/* My IP address */
 static __be32 ic_netmask = NONE;	/* Netmask for local subnet */
 __be32 ic_gateway = NONE;	/* Gateway IP address */
 
-__be32 ic_addrservaddr = NONE;	/* IP Address of the IP addresses'server */
+#ifdef IPCONFIG_DYNAMIC
+static __be32 ic_addrservaddr = NONE;	/* IP Address of the IP addresses'server */
+#endif
 
 __be32 ic_servaddr = NONE;	/* Boot server IP address */
 
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 978370132f29..4ae3f8e6c6cc 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -148,14 +148,14 @@ static int ipip_err(struct sk_buff *skb, u32 info)
 
 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 		ipv4_update_pmtu(skb, dev_net(skb->dev), info,
-				 t->parms.link, 0, IPPROTO_IPIP, 0);
+				 t->parms.link, 0, iph->protocol, 0);
 		err = 0;
 		goto out;
 	}
 
 	if (type == ICMP_REDIRECT) {
 		ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
-			      IPPROTO_IPIP, 0);
+			      iph->protocol, 0);
 		err = 0;
 		goto out;
 	}
@@ -177,12 +177,19 @@ out:
 	return err;
 }
 
-static const struct tnl_ptk_info tpi = {
+static const struct tnl_ptk_info ipip_tpi = {
 	/* no tunnel info required for ipip. */
 	.proto = htons(ETH_P_IP),
 };
 
-static int ipip_rcv(struct sk_buff *skb)
+#if IS_ENABLED(CONFIG_MPLS)
+static const struct tnl_ptk_info mplsip_tpi = {
+	/* no tunnel info required for mplsip. */
+	.proto = htons(ETH_P_MPLS_UC),
+};
+#endif
+
+static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
 {
 	struct net *net = dev_net(skb->dev);
 	struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
@@ -193,11 +200,23 @@ static int ipip_rcv(struct sk_buff *skb)
 	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
 			iph->saddr, iph->daddr, 0);
 	if (tunnel) {
+		const struct tnl_ptk_info *tpi;
+
+		if (tunnel->parms.iph.protocol != ipproto &&
+		    tunnel->parms.iph.protocol != 0)
+			goto drop;
+
 		if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
 			goto drop;
-		if (iptunnel_pull_header(skb, 0, tpi.proto, false))
+#if IS_ENABLED(CONFIG_MPLS)
+		if (ipproto == IPPROTO_MPLS)
+			tpi = &mplsip_tpi;
+		else
+#endif
+			tpi = &ipip_tpi;
+		if (iptunnel_pull_header(skb, 0, tpi->proto, false))
 			goto drop;
-		return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, log_ecn_error);
+		return ip_tunnel_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
 	}
 
 	return -1;
@@ -207,24 +226,51 @@ drop:
 	return 0;
 }
 
+static int ipip_rcv(struct sk_buff *skb)
+{
+	return ipip_tunnel_rcv(skb, IPPROTO_IPIP);
+}
+
+#if IS_ENABLED(CONFIG_MPLS)
+static int mplsip_rcv(struct sk_buff *skb)
+{
+	return ipip_tunnel_rcv(skb, IPPROTO_MPLS);
+}
+#endif
+
 /*
  *	This function assumes it is being called from dev_queue_xmit()
  *	and that skb is filled properly by that function.
  */
-static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb,
+				    struct net_device *dev)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 	const struct iphdr  *tiph = &tunnel->parms.iph;
+	u8 ipproto;
+
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		ipproto = IPPROTO_IPIP;
+		break;
+#if IS_ENABLED(CONFIG_MPLS)
+	case htons(ETH_P_MPLS_UC):
+		ipproto = IPPROTO_MPLS;
+		break;
+#endif
+	default:
+		goto tx_error;
+	}
 
-	if (unlikely(skb->protocol != htons(ETH_P_IP)))
+	if (tiph->protocol != ipproto && tiph->protocol != 0)
 		goto tx_error;
 
 	if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4))
 		goto tx_error;
 
-	skb_set_inner_ipproto(skb, IPPROTO_IPIP);
+	skb_set_inner_ipproto(skb, ipproto);
 
-	ip_tunnel_xmit(skb, dev, tiph, tiph->protocol);
+	ip_tunnel_xmit(skb, dev, tiph, ipproto);
 	return NETDEV_TX_OK;
 
 tx_error:
@@ -234,6 +280,20 @@ tx_error:
 	return NETDEV_TX_OK;
 }
 
+static bool ipip_tunnel_ioctl_verify_protocol(u8 ipproto)
+{
+	switch (ipproto) {
+	case 0:
+	case IPPROTO_IPIP:
+#if IS_ENABLED(CONFIG_MPLS)
+	case IPPROTO_MPLS:
+#endif
+		return true;
+	}
+
+	return false;
+}
+
 static int
 ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
@@ -244,7 +304,8 @@ ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 		return -EFAULT;
 
 	if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
-		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
+		if (p.iph.version != 4 ||
+		    !ipip_tunnel_ioctl_verify_protocol(p.iph.protocol) ||
 		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
 			return -EINVAL;
 	}
@@ -301,10 +362,23 @@ static int ipip_tunnel_init(struct net_device *dev)
 
 	tunnel->tun_hlen = 0;
 	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
-	tunnel->parms.iph.protocol = IPPROTO_IPIP;
 	return ip_tunnel_init(dev);
 }
 
+static int ipip_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+	u8 proto;
+
+	if (!data || !data[IFLA_IPTUN_PROTO])
+		return 0;
+
+	proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
+	if (proto != IPPROTO_IPIP && proto != IPPROTO_MPLS && proto != 0)
+		return -EINVAL;
+
+	return 0;
+}
+
 static void ipip_netlink_parms(struct nlattr *data[],
 			       struct ip_tunnel_parm *parms)
 {
@@ -335,6 +409,9 @@ static void ipip_netlink_parms(struct nlattr *data[],
 	if (data[IFLA_IPTUN_TOS])
 		parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]);
 
+	if (data[IFLA_IPTUN_PROTO])
+		parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]);
+
 	if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
 		parms->iph.frag_off = htons(IP_DF);
 }
@@ -427,6 +504,8 @@ static size_t ipip_get_size(const struct net_device *dev)
 		nla_total_size(1) +
 		/* IFLA_IPTUN_TOS */
 		nla_total_size(1) +
+		/* IFLA_IPTUN_PROTO */
+		nla_total_size(1) +
 		/* IFLA_IPTUN_PMTUDISC */
 		nla_total_size(1) +
 		/* IFLA_IPTUN_ENCAP_TYPE */
@@ -450,6 +529,7 @@ static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
 	    nla_put_in_addr(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
 	    nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) ||
 	    nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
+	    nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) ||
 	    nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
 		       !!(parm->iph.frag_off & htons(IP_DF))))
 		goto nla_put_failure;
@@ -476,6 +556,7 @@ static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = {
 	[IFLA_IPTUN_REMOTE]		= { .type = NLA_U32 },
 	[IFLA_IPTUN_TTL]		= { .type = NLA_U8 },
 	[IFLA_IPTUN_TOS]		= { .type = NLA_U8 },
+	[IFLA_IPTUN_PROTO]		= { .type = NLA_U8 },
 	[IFLA_IPTUN_PMTUDISC]		= { .type = NLA_U8 },
 	[IFLA_IPTUN_ENCAP_TYPE]		= { .type = NLA_U16 },
 	[IFLA_IPTUN_ENCAP_FLAGS]	= { .type = NLA_U16 },
@@ -489,6 +570,7 @@ static struct rtnl_link_ops ipip_link_ops __read_mostly = {
 	.policy		= ipip_policy,
 	.priv_size	= sizeof(struct ip_tunnel),
 	.setup		= ipip_tunnel_setup,
+	.validate	= ipip_tunnel_validate,
 	.newlink	= ipip_newlink,
 	.changelink	= ipip_changelink,
 	.dellink	= ip_tunnel_dellink,
@@ -503,6 +585,14 @@ static struct xfrm_tunnel ipip_handler __read_mostly = {
 	.priority	=	1,
 };
 
+#if IS_ENABLED(CONFIG_MPLS)
+static struct xfrm_tunnel mplsip_handler __read_mostly = {
+	.handler	=	mplsip_rcv,
+	.err_handler	=	ipip_err,
+	.priority	=	1,
+};
+#endif
+
 static int __net_init ipip_init_net(struct net *net)
 {
 	return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0");
@@ -525,7 +615,7 @@ static int __init ipip_init(void)
 {
 	int err;
 
-	pr_info("ipip: IPv4 over IPv4 tunneling driver\n");
+	pr_info("ipip: IPv4 and MPLS over IPv4 tunneling driver\n");
 
 	err = register_pernet_device(&ipip_net_ops);
 	if (err < 0)
@@ -533,8 +623,15 @@ static int __init ipip_init(void)
 	err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
 	if (err < 0) {
 		pr_info("%s: can't register tunnel\n", __func__);
-		goto xfrm_tunnel_failed;
+		goto xfrm_tunnel_ipip_failed;
+	}
+#if IS_ENABLED(CONFIG_MPLS)
+	err = xfrm4_tunnel_register(&mplsip_handler, AF_MPLS);
+	if (err < 0) {
+		pr_info("%s: can't register tunnel\n", __func__);
+		goto xfrm_tunnel_mplsip_failed;
 	}
+#endif
 	err = rtnl_link_register(&ipip_link_ops);
 	if (err < 0)
 		goto rtnl_link_failed;
@@ -543,8 +640,13 @@ out:
 	return err;
 
 rtnl_link_failed:
+#if IS_ENABLED(CONFIG_MPLS)
+	xfrm4_tunnel_deregister(&mplsip_handler, AF_INET);
+xfrm_tunnel_mplsip_failed:
+
+#endif
 	xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
-xfrm_tunnel_failed:
+xfrm_tunnel_ipip_failed:
 	unregister_pernet_device(&ipip_net_ops);
 	goto out;
 }
@@ -554,7 +656,10 @@ static void __exit ipip_fini(void)
 	rtnl_link_unregister(&ipip_link_ops);
 	if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
 		pr_info("%s: can't deregister tunnel\n", __func__);
-
+#if IS_ENABLED(CONFIG_MPLS)
+	if (xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS))
+		pr_info("%s: can't deregister tunnel\n", __func__);
+#endif
 	unregister_pernet_device(&ipip_net_ops);
 }
 
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 21a38e296fe2..5ad48ec77710 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -891,8 +891,10 @@ static struct mfc_cache *ipmr_cache_alloc(void)
 {
 	struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
 
-	if (c)
+	if (c) {
+		c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
 		c->mfc_un.res.minvif = MAXVIFS;
+	}
 	return c;
 }
 
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 2033f929aa66..c8dd9e26b185 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -89,22 +89,20 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
 	__be32 src_ipaddr, tgt_ipaddr;
 	long ret;
 
-#define FWINV(bool, invflg) ((bool) ^ !!(arpinfo->invflags & (invflg)))
-
-	if (FWINV((arphdr->ar_op & arpinfo->arpop_mask) != arpinfo->arpop,
-		  ARPT_INV_ARPOP))
+	if (NF_INVF(arpinfo, ARPT_INV_ARPOP,
+		    (arphdr->ar_op & arpinfo->arpop_mask) != arpinfo->arpop))
 		return 0;
 
-	if (FWINV((arphdr->ar_hrd & arpinfo->arhrd_mask) != arpinfo->arhrd,
-		  ARPT_INV_ARPHRD))
+	if (NF_INVF(arpinfo, ARPT_INV_ARPHRD,
+		    (arphdr->ar_hrd & arpinfo->arhrd_mask) != arpinfo->arhrd))
 		return 0;
 
-	if (FWINV((arphdr->ar_pro & arpinfo->arpro_mask) != arpinfo->arpro,
-		  ARPT_INV_ARPPRO))
+	if (NF_INVF(arpinfo, ARPT_INV_ARPPRO,
+		    (arphdr->ar_pro & arpinfo->arpro_mask) != arpinfo->arpro))
 		return 0;
 
-	if (FWINV((arphdr->ar_hln & arpinfo->arhln_mask) != arpinfo->arhln,
-		  ARPT_INV_ARPHLN))
+	if (NF_INVF(arpinfo, ARPT_INV_ARPHLN,
+		    (arphdr->ar_hln & arpinfo->arhln_mask) != arpinfo->arhln))
 		return 0;
 
 	src_devaddr = arpptr;
@@ -115,31 +113,32 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
 	arpptr += dev->addr_len;
 	memcpy(&tgt_ipaddr, arpptr, sizeof(u32));
 
-	if (FWINV(arp_devaddr_compare(&arpinfo->src_devaddr, src_devaddr, dev->addr_len),
-		  ARPT_INV_SRCDEVADDR) ||
-	    FWINV(arp_devaddr_compare(&arpinfo->tgt_devaddr, tgt_devaddr, dev->addr_len),
-		  ARPT_INV_TGTDEVADDR))
+	if (NF_INVF(arpinfo, ARPT_INV_SRCDEVADDR,
+		    arp_devaddr_compare(&arpinfo->src_devaddr, src_devaddr,
+					dev->addr_len)) ||
+	    NF_INVF(arpinfo, ARPT_INV_TGTDEVADDR,
+		    arp_devaddr_compare(&arpinfo->tgt_devaddr, tgt_devaddr,
+					dev->addr_len)))
 		return 0;
 
-	if (FWINV((src_ipaddr & arpinfo->smsk.s_addr) != arpinfo->src.s_addr,
-		  ARPT_INV_SRCIP) ||
-	    FWINV(((tgt_ipaddr & arpinfo->tmsk.s_addr) != arpinfo->tgt.s_addr),
-		  ARPT_INV_TGTIP))
+	if (NF_INVF(arpinfo, ARPT_INV_SRCIP,
+		    (src_ipaddr & arpinfo->smsk.s_addr) != arpinfo->src.s_addr) ||
+	    NF_INVF(arpinfo, ARPT_INV_TGTIP,
+		    (tgt_ipaddr & arpinfo->tmsk.s_addr) != arpinfo->tgt.s_addr))
 		return 0;
 
 	/* Look for ifname matches.  */
 	ret = ifname_compare(indev, arpinfo->iniface, arpinfo->iniface_mask);
 
-	if (FWINV(ret != 0, ARPT_INV_VIA_IN))
+	if (NF_INVF(arpinfo, ARPT_INV_VIA_IN, ret != 0))
 		return 0;
 
 	ret = ifname_compare(outdev, arpinfo->outiface, arpinfo->outiface_mask);
 
-	if (FWINV(ret != 0, ARPT_INV_VIA_OUT))
+	if (NF_INVF(arpinfo, ARPT_INV_VIA_OUT, ret != 0))
 		return 0;
 
 	return 1;
-#undef FWINV
 }
 
 static inline int arp_checkentry(const struct arpt_arp *arp)
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 54906e0e8e0c..f0df66f54ce6 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -58,32 +58,31 @@ ip_packet_match(const struct iphdr *ip,
 {
 	unsigned long ret;
 
-#define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg)))
-
-	if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
-		  IPT_INV_SRCIP) ||
-	    FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
-		  IPT_INV_DSTIP))
+	if (NF_INVF(ipinfo, IPT_INV_SRCIP,
+		    (ip->saddr & ipinfo->smsk.s_addr) != ipinfo->src.s_addr) ||
+	    NF_INVF(ipinfo, IPT_INV_DSTIP,
+		    (ip->daddr & ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr))
 		return false;
 
 	ret = ifname_compare_aligned(indev, ipinfo->iniface, ipinfo->iniface_mask);
 
-	if (FWINV(ret != 0, IPT_INV_VIA_IN))
+	if (NF_INVF(ipinfo, IPT_INV_VIA_IN, ret != 0))
 		return false;
 
 	ret = ifname_compare_aligned(outdev, ipinfo->outiface, ipinfo->outiface_mask);
 
-	if (FWINV(ret != 0, IPT_INV_VIA_OUT))
+	if (NF_INVF(ipinfo, IPT_INV_VIA_OUT, ret != 0))
 		return false;
 
 	/* Check specific protocol */
 	if (ipinfo->proto &&
-	    FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO))
+	    NF_INVF(ipinfo, IPT_INV_PROTO, ip->protocol != ipinfo->proto))
 		return false;
 
 	/* If we have a fragment rule but the packet is not a fragment
 	 * then we return zero */
-	if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG))
+	if (NF_INVF(ipinfo, IPT_INV_FRAG,
+		    (ipinfo->flags & IPT_F_FRAG) && !isfrag))
 		return false;
 
 	return true;
@@ -122,7 +121,6 @@ static inline bool unconditional(const struct ipt_entry *e)
 
 	return e->target_offset == sizeof(struct ipt_entry) &&
 	       memcmp(&e->ip, &uncond, sizeof(uncond)) == 0;
-#undef FWINV
 }
 
 /* for const-correctness */
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 57fc97cdac70..aebdb337fd7e 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -87,10 +87,6 @@ iptable_mangle_hook(void *priv,
 {
 	if (state->hook == NF_INET_LOCAL_OUT)
 		return ipt_mangle_out(skb, state);
-	if (state->hook == NF_INET_POST_ROUTING)
-		return ipt_do_table(skb, state,
-				    state->net->ipv4.iptable_mangle);
-	/* PREROUTING/INPUT/FORWARD: */
 	return ipt_do_table(skb, state, state->net->ipv4.iptable_mangle);
 }
 
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index b6ea57ec5e14..fd8220213afc 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -24,6 +24,9 @@ const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb,
 	if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET))
 		return NULL;
 
+	if (ip_hdr(oldskb)->protocol != IPPROTO_TCP)
+		return NULL;
+
 	oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb),
 				 sizeof(struct tcphdr), _oth);
 	if (oth == NULL)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 5c7ed147449c..032a96d78c99 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2277,6 +2277,38 @@ static inline bool tcp_can_repair_sock(const struct sock *sk)
 		((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED));
 }
 
+static int tcp_repair_set_window(struct tcp_sock *tp, char __user *optbuf, int len)
+{
+	struct tcp_repair_window opt;
+
+	if (!tp->repair)
+		return -EPERM;
+
+	if (len != sizeof(opt))
+		return -EINVAL;
+
+	if (copy_from_user(&opt, optbuf, sizeof(opt)))
+		return -EFAULT;
+
+	if (opt.max_window < opt.snd_wnd)
+		return -EINVAL;
+
+	if (after(opt.snd_wl1, tp->rcv_nxt + opt.rcv_wnd))
+		return -EINVAL;
+
+	if (after(opt.rcv_wup, tp->rcv_nxt))
+		return -EINVAL;
+
+	tp->snd_wl1	= opt.snd_wl1;
+	tp->snd_wnd	= opt.snd_wnd;
+	tp->max_window	= opt.max_window;
+
+	tp->rcv_wnd	= opt.rcv_wnd;
+	tp->rcv_wup	= opt.rcv_wup;
+
+	return 0;
+}
+
 static int tcp_repair_options_est(struct tcp_sock *tp,
 		struct tcp_repair_opt __user *optbuf, unsigned int len)
 {
@@ -2604,6 +2636,9 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		else
 			tp->tsoffset = val - tcp_time_stamp;
 		break;
+	case TCP_REPAIR_WINDOW:
+		err = tcp_repair_set_window(tp, optval, optlen);
+		break;
 	case TCP_NOTSENT_LOWAT:
 		tp->notsent_lowat = val;
 		sk->sk_write_space(sk);
@@ -2860,6 +2895,28 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 			return -EINVAL;
 		break;
 
+	case TCP_REPAIR_WINDOW: {
+		struct tcp_repair_window opt;
+
+		if (get_user(len, optlen))
+			return -EFAULT;
+
+		if (len != sizeof(opt))
+			return -EINVAL;
+
+		if (!tp->repair)
+			return -EPERM;
+
+		opt.snd_wl1	= tp->snd_wl1;
+		opt.snd_wnd	= tp->snd_wnd;
+		opt.max_window	= tp->max_window;
+		opt.rcv_wnd	= tp->rcv_wnd;
+		opt.rcv_wup	= tp->rcv_wup;
+
+		if (copy_to_user(optval, &opt, len))
+			return -EFAULT;
+		return 0;
+	}
 	case TCP_QUEUE_SEQ:
 		if (tp->repair_queue == TCP_SEND_QUEUE)
 			val = tp->write_seq;
@@ -2969,8 +3026,18 @@ static void __tcp_alloc_md5sig_pool(void)
 		return;
 
 	for_each_possible_cpu(cpu) {
+		void *scratch = per_cpu(tcp_md5sig_pool, cpu).scratch;
 		struct ahash_request *req;
 
+		if (!scratch) {
+			scratch = kmalloc_node(sizeof(union tcp_md5sum_block) +
+					       sizeof(struct tcphdr),
+					       GFP_KERNEL,
+					       cpu_to_node(cpu));
+			if (!scratch)
+				return;
+			per_cpu(tcp_md5sig_pool, cpu).scratch = scratch;
+		}
 		if (per_cpu(tcp_md5sig_pool, cpu).md5_req)
 			continue;
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 3708de2a6683..32b048e524d6 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1018,27 +1018,28 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
 			      GFP_KERNEL);
 }
 
-static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
-					__be32 daddr, __be32 saddr, int nbytes)
+static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp,
+				   __be32 daddr, __be32 saddr,
+				   const struct tcphdr *th, int nbytes)
 {
 	struct tcp4_pseudohdr *bp;
 	struct scatterlist sg;
+	struct tcphdr *_th;
 
-	bp = &hp->md5_blk.ip4;
-
-	/*
-	 * 1. the TCP pseudo-header (in the order: source IP address,
-	 * destination IP address, zero-padded protocol number, and
-	 * segment length)
-	 */
+	bp = hp->scratch;
 	bp->saddr = saddr;
 	bp->daddr = daddr;
 	bp->pad = 0;
 	bp->protocol = IPPROTO_TCP;
 	bp->len = cpu_to_be16(nbytes);
 
-	sg_init_one(&sg, bp, sizeof(*bp));
-	ahash_request_set_crypt(hp->md5_req, &sg, NULL, sizeof(*bp));
+	_th = (struct tcphdr *)(bp + 1);
+	memcpy(_th, th, sizeof(*th));
+	_th->check = 0;
+
+	sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
+	ahash_request_set_crypt(hp->md5_req, &sg, NULL,
+				sizeof(*bp) + sizeof(*th));
 	return crypto_ahash_update(hp->md5_req);
 }
 
@@ -1055,9 +1056,7 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
 
 	if (crypto_ahash_init(req))
 		goto clear_hash;
-	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
-		goto clear_hash;
-	if (tcp_md5_hash_header(hp, th))
+	if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
 		goto clear_hash;
 	if (tcp_md5_hash_key(hp, key))
 		goto clear_hash;
@@ -1101,9 +1100,7 @@ int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
 	if (crypto_ahash_init(req))
 		goto clear_hash;
 
-	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
-		goto clear_hash;
-	if (tcp_md5_hash_header(hp, th))
+	if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, skb->len))
 		goto clear_hash;
 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
 		goto clear_hash;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b1bcba0563f2..b26aa870adc0 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2753,7 +2753,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
 	struct sk_buff *hole = NULL;
-	u32 last_lost;
+	u32 max_segs, last_lost;
 	int mib_idx;
 	int fwd_rexmitting = 0;
 
@@ -2773,6 +2773,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 		last_lost = tp->snd_una;
 	}
 
+	max_segs = tcp_tso_autosize(sk, tcp_current_mss(sk));
 	tcp_for_write_queue_from(skb, sk) {
 		__u8 sacked = TCP_SKB_CB(skb)->sacked;
 		int segs;
@@ -2786,6 +2787,10 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 		segs = tp->snd_cwnd - tcp_packets_in_flight(tp);
 		if (segs <= 0)
 			return;
+		/* In case tcp_shift_skb_data() have aggregated large skbs,
+		 * we need to make sure not sending too bigs TSO packets
+		 */
+		segs = min_t(int, segs, max_segs);
 
 		if (fwd_rexmitting) {
 begin_fwd:
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index 0d0171830620..ec35eaa5c029 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -6,6 +6,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
+#include <linux/mpls.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <linux/slab.h>
@@ -16,11 +17,14 @@
 
 static struct xfrm_tunnel __rcu *tunnel4_handlers __read_mostly;
 static struct xfrm_tunnel __rcu *tunnel64_handlers __read_mostly;
+static struct xfrm_tunnel __rcu *tunnelmpls4_handlers __read_mostly;
 static DEFINE_MUTEX(tunnel4_mutex);
 
 static inline struct xfrm_tunnel __rcu **fam_handlers(unsigned short family)
 {
-	return (family == AF_INET) ? &tunnel4_handlers : &tunnel64_handlers;
+	return (family == AF_INET) ? &tunnel4_handlers :
+		(family == AF_INET6) ? &tunnel64_handlers :
+		&tunnelmpls4_handlers;
 }
 
 int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family)
@@ -125,6 +129,26 @@ drop:
 }
 #endif
 
+#if IS_ENABLED(CONFIG_MPLS)
+static int tunnelmpls4_rcv(struct sk_buff *skb)
+{
+	struct xfrm_tunnel *handler;
+
+	if (!pskb_may_pull(skb, sizeof(struct mpls_label)))
+		goto drop;
+
+	for_each_tunnel_rcu(tunnelmpls4_handlers, handler)
+		if (!handler->handler(skb))
+			return 0;
+
+	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+
+drop:
+	kfree_skb(skb);
+	return 0;
+}
+#endif
+
 static void tunnel4_err(struct sk_buff *skb, u32 info)
 {
 	struct xfrm_tunnel *handler;
@@ -145,6 +169,17 @@ static void tunnel64_err(struct sk_buff *skb, u32 info)
 }
 #endif
 
+#if IS_ENABLED(CONFIG_MPLS)
+static void tunnelmpls4_err(struct sk_buff *skb, u32 info)
+{
+	struct xfrm_tunnel *handler;
+
+	for_each_tunnel_rcu(tunnelmpls4_handlers, handler)
+		if (!handler->err_handler(skb, info))
+			break;
+}
+#endif
+
 static const struct net_protocol tunnel4_protocol = {
 	.handler	=	tunnel4_rcv,
 	.err_handler	=	tunnel4_err,
@@ -161,24 +196,47 @@ static const struct net_protocol tunnel64_protocol = {
 };
 #endif
 
+#if IS_ENABLED(CONFIG_MPLS)
+static const struct net_protocol tunnelmpls4_protocol = {
+	.handler	=	tunnelmpls4_rcv,
+	.err_handler	=	tunnelmpls4_err,
+	.no_policy	=	1,
+	.netns_ok	=	1,
+};
+#endif
+
 static int __init tunnel4_init(void)
 {
-	if (inet_add_protocol(&tunnel4_protocol, IPPROTO_IPIP)) {
-		pr_err("%s: can't add protocol\n", __func__);
-		return -EAGAIN;
-	}
+	if (inet_add_protocol(&tunnel4_protocol, IPPROTO_IPIP))
+		goto err;
 #if IS_ENABLED(CONFIG_IPV6)
 	if (inet_add_protocol(&tunnel64_protocol, IPPROTO_IPV6)) {
-		pr_err("tunnel64 init: can't add protocol\n");
 		inet_del_protocol(&tunnel4_protocol, IPPROTO_IPIP);
-		return -EAGAIN;
+		goto err;
+	}
+#endif
+#if IS_ENABLED(CONFIG_MPLS)
+	if (inet_add_protocol(&tunnelmpls4_protocol, IPPROTO_MPLS)) {
+		inet_del_protocol(&tunnel4_protocol, IPPROTO_IPIP);
+#if IS_ENABLED(CONFIG_IPV6)
+		inet_del_protocol(&tunnel64_protocol, IPPROTO_IPV6);
+#endif
+		goto err;
 	}
 #endif
 	return 0;
+
+err:
+	pr_err("%s: can't add protocol\n", __func__);
+	return -EAGAIN;
 }
 
 static void __exit tunnel4_fini(void)
 {
+#if IS_ENABLED(CONFIG_MPLS)
+	if (inet_del_protocol(&tunnelmpls4_protocol, IPPROTO_MPLS))
+		pr_err("tunnelmpls4 close: can't remove protocol\n");
+#endif
 #if IS_ENABLED(CONFIG_IPV6)
 	if (inet_del_protocol(&tunnel64_protocol, IPPROTO_IPV6))
 		pr_err("tunnel64 close: can't remove protocol\n");
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 0ff31d97d485..ca5e8ea29538 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -391,9 +391,9 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum)
 	return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal, hash2_nulladdr);
 }
 
-static inline int compute_score(struct sock *sk, struct net *net,
-				__be32 saddr, unsigned short hnum, __be16 sport,
-				__be32 daddr, __be16 dport, int dif)
+static int compute_score(struct sock *sk, struct net *net,
+			 __be32 saddr, __be16 sport,
+			 __be32 daddr, unsigned short hnum, int dif)
 {
 	int score;
 	struct inet_sock *inet;
@@ -434,52 +434,6 @@ static inline int compute_score(struct sock *sk, struct net *net,
 	return score;
 }
 
-/*
- * In this second variant, we check (daddr, dport) matches (inet_rcv_sadd, inet_num)
- */
-static inline int compute_score2(struct sock *sk, struct net *net,
-				 __be32 saddr, __be16 sport,
-				 __be32 daddr, unsigned int hnum, int dif)
-{
-	int score;
-	struct inet_sock *inet;
-
-	if (!net_eq(sock_net(sk), net) ||
-	    ipv6_only_sock(sk))
-		return -1;
-
-	inet = inet_sk(sk);
-
-	if (inet->inet_rcv_saddr != daddr ||
-	    inet->inet_num != hnum)
-		return -1;
-
-	score = (sk->sk_family == PF_INET) ? 2 : 1;
-
-	if (inet->inet_daddr) {
-		if (inet->inet_daddr != saddr)
-			return -1;
-		score += 4;
-	}
-
-	if (inet->inet_dport) {
-		if (inet->inet_dport != sport)
-			return -1;
-		score += 4;
-	}
-
-	if (sk->sk_bound_dev_if) {
-		if (sk->sk_bound_dev_if != dif)
-			return -1;
-		score += 4;
-	}
-
-	if (sk->sk_incoming_cpu == raw_smp_processor_id())
-		score++;
-
-	return score;
-}
-
 static u32 udp_ehashfn(const struct net *net, const __be32 laddr,
 		       const __u16 lport, const __be32 faddr,
 		       const __be16 fport)
@@ -492,11 +446,11 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr,
 			      udp_ehash_secret + net_hash_mix(net));
 }
 
-/* called with read_rcu_lock() */
+/* called with rcu_read_lock() */
 static struct sock *udp4_lib_lookup2(struct net *net,
 		__be32 saddr, __be16 sport,
 		__be32 daddr, unsigned int hnum, int dif,
-		struct udp_hslot *hslot2, unsigned int slot2,
+		struct udp_hslot *hslot2,
 		struct sk_buff *skb)
 {
 	struct sock *sk, *result;
@@ -506,7 +460,7 @@ static struct sock *udp4_lib_lookup2(struct net *net,
 	result = NULL;
 	badness = 0;
 	udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
-		score = compute_score2(sk, net, saddr, sport,
+		score = compute_score(sk, net, saddr, sport,
 				      daddr, hnum, dif);
 		if (score > badness) {
 			reuseport = sk->sk_reuseport;
@@ -554,17 +508,22 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
 
 		result = udp4_lib_lookup2(net, saddr, sport,
 					  daddr, hnum, dif,
-					  hslot2, slot2, skb);
+					  hslot2, skb);
 		if (!result) {
+			unsigned int old_slot2 = slot2;
 			hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
 			slot2 = hash2 & udptable->mask;
+			/* avoid searching the same slot again. */
+			if (unlikely(slot2 == old_slot2))
+				return result;
+
 			hslot2 = &udptable->hash2[slot2];
 			if (hslot->count < hslot2->count)
 				goto begin;
 
 			result = udp4_lib_lookup2(net, saddr, sport,
-						  htonl(INADDR_ANY), hnum, dif,
-						  hslot2, slot2, skb);
+						  daddr, hnum, dif,
+						  hslot2, skb);
 		}
 		return result;
 	}
@@ -572,8 +531,8 @@ begin:
 	result = NULL;
 	badness = 0;
 	sk_for_each_rcu(sk, &hslot->head) {
-		score = compute_score(sk, net, saddr, hnum, sport,
-				      daddr, dport, dif);
+		score = compute_score(sk, net, saddr, sport,
+				      daddr, hnum, dif);
 		if (score > badness) {
 			reuseport = sk->sk_reuseport;
 			if (reuseport) {
@@ -1755,8 +1714,11 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
 			return err;
 	}
 
-	return skb_checksum_init_zero_check(skb, proto, uh->check,
-					    inet_compute_pseudo);
+	/* Note, we are only interested in != 0 or == 0, thus the
+	 * force to int.
+	 */
+	return (__force int)skb_checksum_init_zero_check(skb, proto, uh->check,
+							 inet_compute_pseudo);
 }
 
 /*
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index a1f6b7b31531..24f1b0898e40 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -547,7 +547,7 @@ void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex,
 	struct sk_buff *skb;
 	int err = -ENOBUFS;
 
-	skb = nlmsg_new(inet6_netconf_msgsize_devconf(type), GFP_ATOMIC);
+	skb = nlmsg_new(inet6_netconf_msgsize_devconf(type), GFP_KERNEL);
 	if (!skb)
 		goto errout;
 
@@ -559,7 +559,7 @@ void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex,
 		kfree_skb(skb);
 		goto errout;
 	}
-	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_NETCONF, NULL, GFP_ATOMIC);
+	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_NETCONF, NULL, GFP_KERNEL);
 	return;
 errout:
 	rtnl_set_sk_err(net, RTNLGRP_IPV6_NETCONF, err);
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index fd11f5856ce8..bd59c343d35f 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -98,7 +98,7 @@ static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 
 	if (!(type & ICMPV6_INFOMSG_MASK))
 		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
-			ping_err(skb, offset, info);
+			ping_err(skb, offset, ntohl(info));
 }
 
 static int icmpv6_rcv(struct sk_buff *skb);
diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c
index b2025bf3da4a..c0cbcb259f5a 100644
--- a/net/ipv6/ip6_checksum.c
+++ b/net/ipv6/ip6_checksum.c
@@ -78,9 +78,12 @@ int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto)
 	 * we accept a checksum of zero here. When we find the socket
 	 * for the UDP packet we'll check if that socket allows zero checksum
 	 * for IPv6 (set by socket option).
+	 *
+	 * Note, we are only interested in != 0 or == 0, thus the
+	 * force to int.
 	 */
-	return skb_checksum_init_zero_check(skb, proto, uh->check,
-					   ip6_compute_pseudo);
+	return (__force int)skb_checksum_init_zero_check(skb, proto, uh->check,
+							 ip6_compute_pseudo);
 }
 EXPORT_SYMBOL(udp6_csum_init);
 
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 1bcef2369d64..771be1fa4176 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -177,6 +177,7 @@ static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
 		}
 	}
 
+	free_percpu(non_pcpu_rt->rt6i_pcpu);
 	non_pcpu_rt->rt6i_pcpu = NULL;
 }
 
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index fdc9de276ab1..776d145113e1 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -468,7 +468,7 @@ static int gre_rcv(struct sk_buff *skb)
 	bool csum_err = false;
 	int hdr_len;
 
-	hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IPV6));
+	hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IPV6), 0);
 	if (hdr_len < 0)
 		goto drop;
 
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index f2e2013f8346..c7ca0f5d1a3b 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1074,6 +1074,7 @@ static struct mfc6_cache *ip6mr_cache_alloc(void)
 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
 	if (!c)
 		return NULL;
+	c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
 	c->mfc_un.res.minvif = MAXMIFS;
 	return c;
 }
@@ -1591,14 +1592,15 @@ static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
 	if (likely(mrt->mroute6_sk == NULL)) {
 		mrt->mroute6_sk = sk;
 		net->ipv6.devconf_all->mc_forwarding++;
-		inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
-					     NETCONFA_IFINDEX_ALL,
-					     net->ipv6.devconf_all);
-	}
-	else
+	} else {
 		err = -EADDRINUSE;
+	}
 	write_unlock_bh(&mrt_lock);
 
+	if (!err)
+		inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
+					     NETCONFA_IFINDEX_ALL,
+					     net->ipv6.devconf_all);
 	rtnl_unlock();
 
 	return err;
@@ -1616,11 +1618,11 @@ int ip6mr_sk_done(struct sock *sk)
 			write_lock_bh(&mrt_lock);
 			mrt->mroute6_sk = NULL;
 			net->ipv6.devconf_all->mc_forwarding--;
+			write_unlock_bh(&mrt_lock);
 			inet6_netconf_notify_devconf(net,
 						     NETCONFA_MC_FORWARDING,
 						     NETCONFA_IFINDEX_ALL,
 						     net->ipv6.devconf_all);
-			write_unlock_bh(&mrt_lock);
 
 			mroute_clean_tables(mrt, false);
 			err = 0;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 63e06c3dd319..61ed95054efa 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -73,22 +73,22 @@ ip6_packet_match(const struct sk_buff *skb,
 	unsigned long ret;
 	const struct ipv6hdr *ipv6 = ipv6_hdr(skb);
 
-#define FWINV(bool, invflg) ((bool) ^ !!(ip6info->invflags & (invflg)))
-
-	if (FWINV(ipv6_masked_addr_cmp(&ipv6->saddr, &ip6info->smsk,
-				       &ip6info->src), IP6T_INV_SRCIP) ||
-	    FWINV(ipv6_masked_addr_cmp(&ipv6->daddr, &ip6info->dmsk,
-				       &ip6info->dst), IP6T_INV_DSTIP))
+	if (NF_INVF(ip6info, IP6T_INV_SRCIP,
+		    ipv6_masked_addr_cmp(&ipv6->saddr, &ip6info->smsk,
+					 &ip6info->src)) ||
+	    NF_INVF(ip6info, IP6T_INV_DSTIP,
+		    ipv6_masked_addr_cmp(&ipv6->daddr, &ip6info->dmsk,
+					 &ip6info->dst)))
 		return false;
 
 	ret = ifname_compare_aligned(indev, ip6info->iniface, ip6info->iniface_mask);
 
-	if (FWINV(ret != 0, IP6T_INV_VIA_IN))
+	if (NF_INVF(ip6info, IP6T_INV_VIA_IN, ret != 0))
 		return false;
 
 	ret = ifname_compare_aligned(outdev, ip6info->outiface, ip6info->outiface_mask);
 
-	if (FWINV(ret != 0, IP6T_INV_VIA_OUT))
+	if (NF_INVF(ip6info, IP6T_INV_VIA_OUT, ret != 0))
 		return false;
 
 /* ... might want to do something with class and flowlabel here ... */
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index cb2b28883252..2b1a9dcdbcb3 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -83,10 +83,6 @@ ip6table_mangle_hook(void *priv, struct sk_buff *skb,
 {
 	if (state->hook == NF_INET_LOCAL_OUT)
 		return ip6t_mangle_out(skb, state);
-	if (state->hook == NF_INET_POST_ROUTING)
-		return ip6t_do_table(skb, state,
-				     state->net->ipv6.ip6table_mangle);
-	/* INPUT/FORWARD */
 	return ip6t_do_table(skb, state, state->net->ipv6.ip6table_mangle);
 }
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 08b77f421268..49817555449e 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1783,7 +1783,7 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,
 	};
 	struct fib6_table *table;
 	struct rt6_info *rt;
-	int flags = 0;
+	int flags = RT6_LOOKUP_F_IFACE;
 
 	table = fib6_get_table(net, cfg->fc_table);
 	if (!table)
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index cdd714690f95..182b6a9be29d 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -526,13 +526,13 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
 
 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 		ipv4_update_pmtu(skb, dev_net(skb->dev), info,
-				 t->parms.link, 0, IPPROTO_IPV6, 0);
+				 t->parms.link, 0, iph->protocol, 0);
 		err = 0;
 		goto out;
 	}
 	if (type == ICMP_REDIRECT) {
 		ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
-			      IPPROTO_IPV6, 0);
+			      iph->protocol, 0);
 		err = 0;
 		goto out;
 	}
@@ -688,12 +688,19 @@ out:
 	return 0;
 }
 
-static const struct tnl_ptk_info tpi = {
+static const struct tnl_ptk_info ipip_tpi = {
 	/* no tunnel info required for ipip. */
 	.proto = htons(ETH_P_IP),
 };
 
-static int ipip_rcv(struct sk_buff *skb)
+#if IS_ENABLED(CONFIG_MPLS)
+static const struct tnl_ptk_info mplsip_tpi = {
+	/* no tunnel info required for mplsip. */
+	.proto = htons(ETH_P_MPLS_UC),
+};
+#endif
+
+static int sit_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
 {
 	const struct iphdr *iph;
 	struct ip_tunnel *tunnel;
@@ -702,15 +709,23 @@ static int ipip_rcv(struct sk_buff *skb)
 	tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
 				     iph->saddr, iph->daddr);
 	if (tunnel) {
-		if (tunnel->parms.iph.protocol != IPPROTO_IPIP &&
+		const struct tnl_ptk_info *tpi;
+
+		if (tunnel->parms.iph.protocol != ipproto &&
 		    tunnel->parms.iph.protocol != 0)
 			goto drop;
 
 		if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
 			goto drop;
-		if (iptunnel_pull_header(skb, 0, tpi.proto, false))
+#if IS_ENABLED(CONFIG_MPLS)
+		if (ipproto == IPPROTO_MPLS)
+			tpi = &mplsip_tpi;
+		else
+#endif
+			tpi = &ipip_tpi;
+		if (iptunnel_pull_header(skb, 0, tpi->proto, false))
 			goto drop;
-		return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, log_ecn_error);
+		return ip_tunnel_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
 	}
 
 	return 1;
@@ -720,6 +735,18 @@ drop:
 	return 0;
 }
 
+static int ipip_rcv(struct sk_buff *skb)
+{
+	return sit_tunnel_rcv(skb, IPPROTO_IPIP);
+}
+
+#if IS_ENABLED(CONFIG_MPLS)
+static int mplsip_rcv(struct sk_buff *skb)
+{
+	return sit_tunnel_rcv(skb, IPPROTO_MPLS);
+}
+#endif
+
 /*
  * If the IPv6 address comes from 6rd / 6to4 (RFC 3056) addr space this function
  * stores the embedded IPv4 address in v4dst and returns true.
@@ -958,7 +985,8 @@ tx_error:
 	return NETDEV_TX_OK;
 }
 
-static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t sit_tunnel_xmit__(struct sk_buff *skb,
+				     struct net_device *dev, u8 ipproto)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 	const struct iphdr  *tiph = &tunnel->parms.iph;
@@ -966,9 +994,9 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4))
 		goto tx_error;
 
-	skb_set_inner_ipproto(skb, IPPROTO_IPIP);
+	skb_set_inner_ipproto(skb, ipproto);
 
-	ip_tunnel_xmit(skb, dev, tiph, IPPROTO_IPIP);
+	ip_tunnel_xmit(skb, dev, tiph, ipproto);
 	return NETDEV_TX_OK;
 tx_error:
 	kfree_skb(skb);
@@ -981,11 +1009,16 @@ static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb,
 {
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
-		ipip_tunnel_xmit(skb, dev);
+		sit_tunnel_xmit__(skb, dev, IPPROTO_IPIP);
 		break;
 	case htons(ETH_P_IPV6):
 		ipip6_tunnel_xmit(skb, dev);
 		break;
+#if IS_ENABLED(CONFIG_MPLS)
+	case htons(ETH_P_MPLS_UC):
+		sit_tunnel_xmit__(skb, dev, IPPROTO_MPLS);
+		break;
+#endif
 	default:
 		goto tx_err;
 	}
@@ -1093,6 +1126,16 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t,
 }
 #endif
 
+bool ipip6_valid_ip_proto(u8 ipproto)
+{
+	return ipproto == IPPROTO_IPV6 ||
+		ipproto == IPPROTO_IPIP ||
+#if IS_ENABLED(CONFIG_MPLS)
+		ipproto == IPPROTO_MPLS ||
+#endif
+		ipproto == 0;
+}
+
 static int
 ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
@@ -1152,9 +1195,7 @@ ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 			goto done;
 
 		err = -EINVAL;
-		if (p.iph.protocol != IPPROTO_IPV6 &&
-		    p.iph.protocol != IPPROTO_IPIP &&
-		    p.iph.protocol != 0)
+		if (!ipip6_valid_ip_proto(p.iph.protocol))
 			goto done;
 		if (p.iph.version != 4 ||
 		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
@@ -1379,9 +1420,7 @@ static int ipip6_validate(struct nlattr *tb[], struct nlattr *data[])
 		return 0;
 
 	proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
-	if (proto != IPPROTO_IPV6 &&
-	    proto != IPPROTO_IPIP &&
-	    proto != 0)
+	if (!ipip6_valid_ip_proto(proto))
 		return -EINVAL;
 
 	return 0;
@@ -1723,6 +1762,14 @@ static struct xfrm_tunnel ipip_handler __read_mostly = {
 	.priority	=	2,
 };
 
+#if IS_ENABLED(CONFIG_MPLS)
+static struct xfrm_tunnel mplsip_handler __read_mostly = {
+	.handler	=	mplsip_rcv,
+	.err_handler	=	ipip6_err,
+	.priority	=	2,
+};
+#endif
+
 static void __net_exit sit_destroy_tunnels(struct net *net,
 					   struct list_head *head)
 {
@@ -1818,6 +1865,9 @@ static void __exit sit_cleanup(void)
 	rtnl_link_unregister(&sit_link_ops);
 	xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
 	xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
+#if IS_ENABLED(CONFIG_MPLS)
+	xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS);
+#endif
 
 	unregister_pernet_device(&sit_net_ops);
 	rcu_barrier(); /* Wait for completion of call_rcu()'s */
@@ -1827,7 +1877,7 @@ static int __init sit_init(void)
 {
 	int err;
 
-	pr_info("IPv6 over IPv4 tunneling driver\n");
+	pr_info("IPv6, IPv4 and MPLS over IPv4 tunneling driver\n");
 
 	err = register_pernet_device(&sit_net_ops);
 	if (err < 0)
@@ -1842,6 +1892,13 @@ static int __init sit_init(void)
 		pr_info("%s: can't register ip4ip4\n", __func__);
 		goto xfrm_tunnel4_failed;
 	}
+#if IS_ENABLED(CONFIG_MPLS)
+	err = xfrm4_tunnel_register(&mplsip_handler, AF_MPLS);
+	if (err < 0) {
+		pr_info("%s: can't register mplsip\n", __func__);
+		goto xfrm_tunnel_mpls_failed;
+	}
+#endif
 	err = rtnl_link_register(&sit_link_ops);
 	if (err < 0)
 		goto rtnl_link_failed;
@@ -1850,6 +1907,10 @@ out:
 	return err;
 
 rtnl_link_failed:
+#if IS_ENABLED(CONFIG_MPLS)
+	xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS);
+xfrm_tunnel_mpls_failed:
+#endif
 	xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
 xfrm_tunnel4_failed:
 	xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f36c2d076fce..37cf91323319 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -526,26 +526,33 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, char __user *optval,
 			      AF_INET6, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
 }
 
-static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
-					const struct in6_addr *daddr,
-					const struct in6_addr *saddr, int nbytes)
+static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
+				   const struct in6_addr *daddr,
+				   const struct in6_addr *saddr,
+				   const struct tcphdr *th, int nbytes)
 {
 	struct tcp6_pseudohdr *bp;
 	struct scatterlist sg;
+	struct tcphdr *_th;
 
-	bp = &hp->md5_blk.ip6;
+	bp = hp->scratch;
 	/* 1. TCP pseudo-header (RFC2460) */
 	bp->saddr = *saddr;
 	bp->daddr = *daddr;
 	bp->protocol = cpu_to_be32(IPPROTO_TCP);
 	bp->len = cpu_to_be32(nbytes);
 
-	sg_init_one(&sg, bp, sizeof(*bp));
-	ahash_request_set_crypt(hp->md5_req, &sg, NULL, sizeof(*bp));
+	_th = (struct tcphdr *)(bp + 1);
+	memcpy(_th, th, sizeof(*th));
+	_th->check = 0;
+
+	sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
+	ahash_request_set_crypt(hp->md5_req, &sg, NULL,
+				sizeof(*bp) + sizeof(*th));
 	return crypto_ahash_update(hp->md5_req);
 }
 
-static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
+static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
 			       const struct in6_addr *daddr, struct in6_addr *saddr,
 			       const struct tcphdr *th)
 {
@@ -559,9 +566,7 @@ static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
 
 	if (crypto_ahash_init(req))
 		goto clear_hash;
-	if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
-		goto clear_hash;
-	if (tcp_md5_hash_header(hp, th))
+	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
 		goto clear_hash;
 	if (tcp_md5_hash_key(hp, key))
 		goto clear_hash;
@@ -606,9 +611,7 @@ static int tcp_v6_md5_hash_skb(char *md5_hash,
 	if (crypto_ahash_init(req))
 		goto clear_hash;
 
-	if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
-		goto clear_hash;
-	if (tcp_md5_hash_header(hp, th))
+	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
 		goto clear_hash;
 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
 		goto clear_hash;
@@ -738,7 +741,7 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
 				 u32 ack, u32 win, u32 tsval, u32 tsecr,
 				 int oif, struct tcp_md5sig_key *key, int rst,
-				 u8 tclass, u32 label)
+				 u8 tclass, __be32 label)
 {
 	const struct tcphdr *th = tcp_hdr(skb);
 	struct tcphdr *t1;
@@ -911,7 +914,7 @@ out:
 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
 			    u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
 			    struct tcp_md5sig_key *key, u8 tclass,
-			    u32 label)
+			    __be32 label)
 {
 	tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
 			     tclass, label);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 4bb5c13777f1..0a71a312d0d8 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -115,11 +115,10 @@ static void udp_v6_rehash(struct sock *sk)
 	udp_lib_rehash(sk, new_hash);
 }
 
-static inline int compute_score(struct sock *sk, struct net *net,
-				unsigned short hnum,
-				const struct in6_addr *saddr, __be16 sport,
-				const struct in6_addr *daddr, __be16 dport,
-				int dif)
+static int compute_score(struct sock *sk, struct net *net,
+			 const struct in6_addr *saddr, __be16 sport,
+			 const struct in6_addr *daddr, unsigned short hnum,
+			 int dif)
 {
 	int score;
 	struct inet_sock *inet;
@@ -162,54 +161,11 @@ static inline int compute_score(struct sock *sk, struct net *net,
 	return score;
 }
 
-static inline int compute_score2(struct sock *sk, struct net *net,
-				 const struct in6_addr *saddr, __be16 sport,
-				 const struct in6_addr *daddr,
-				 unsigned short hnum, int dif)
-{
-	int score;
-	struct inet_sock *inet;
-
-	if (!net_eq(sock_net(sk), net) ||
-	    udp_sk(sk)->udp_port_hash != hnum ||
-	    sk->sk_family != PF_INET6)
-		return -1;
-
-	if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
-		return -1;
-
-	score = 0;
-	inet = inet_sk(sk);
-
-	if (inet->inet_dport) {
-		if (inet->inet_dport != sport)
-			return -1;
-		score++;
-	}
-
-	if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
-		if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr))
-			return -1;
-		score++;
-	}
-
-	if (sk->sk_bound_dev_if) {
-		if (sk->sk_bound_dev_if != dif)
-			return -1;
-		score++;
-	}
-
-	if (sk->sk_incoming_cpu == raw_smp_processor_id())
-		score++;
-
-	return score;
-}
-
-/* called with read_rcu_lock() */
+/* called with rcu_read_lock() */
 static struct sock *udp6_lib_lookup2(struct net *net,
 		const struct in6_addr *saddr, __be16 sport,
 		const struct in6_addr *daddr, unsigned int hnum, int dif,
-		struct udp_hslot *hslot2, unsigned int slot2,
+		struct udp_hslot *hslot2,
 		struct sk_buff *skb)
 {
 	struct sock *sk, *result;
@@ -219,7 +175,7 @@ static struct sock *udp6_lib_lookup2(struct net *net,
 	result = NULL;
 	badness = -1;
 	udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
-		score = compute_score2(sk, net, saddr, sport,
+		score = compute_score(sk, net, saddr, sport,
 				      daddr, hnum, dif);
 		if (score > badness) {
 			reuseport = sk->sk_reuseport;
@@ -268,17 +224,22 @@ struct sock *__udp6_lib_lookup(struct net *net,
 
 		result = udp6_lib_lookup2(net, saddr, sport,
 					  daddr, hnum, dif,
-					  hslot2, slot2, skb);
+					  hslot2, skb);
 		if (!result) {
+			unsigned int old_slot2 = slot2;
 			hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum);
 			slot2 = hash2 & udptable->mask;
+			/* avoid searching the same slot again. */
+			if (unlikely(slot2 == old_slot2))
+				return result;
+
 			hslot2 = &udptable->hash2[slot2];
 			if (hslot->count < hslot2->count)
 				goto begin;
 
 			result = udp6_lib_lookup2(net, saddr, sport,
-						  &in6addr_any, hnum, dif,
-						  hslot2, slot2, skb);
+						  daddr, hnum, dif,
+						  hslot2, skb);
 		}
 		return result;
 	}
@@ -286,7 +247,7 @@ begin:
 	result = NULL;
 	badness = -1;
 	sk_for_each_rcu(sk, &hslot->head) {
-		score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif);
+		score = compute_score(sk, net, saddr, sport, daddr, hnum, dif);
 		if (score > badness) {
 			reuseport = sk->sk_reuseport;
 			if (reuseport) {
diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c
index 738008726cc6..fda7f4715c58 100644
--- a/net/kcm/kcmproc.c
+++ b/net/kcm/kcmproc.c
@@ -241,6 +241,7 @@ static const struct file_operations kcm_seq_fops = {
 	.open		= kcm_seq_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
+	.release	= seq_release_net,
 };
 
 static struct kcm_seq_muxinfo kcm_seq_muxinfo = {
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 0b68ba730a06..cb39e05b166c 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1765,18 +1765,12 @@ static int kcm_attach_ioctl(struct socket *sock, struct kcm_attach *info)
 	if (!csock)
 		return -ENOENT;
 
-	prog = bpf_prog_get(info->bpf_fd);
+	prog = bpf_prog_get_type(info->bpf_fd, BPF_PROG_TYPE_SOCKET_FILTER);
 	if (IS_ERR(prog)) {
 		err = PTR_ERR(prog);
 		goto out;
 	}
 
-	if (prog->type != BPF_PROG_TYPE_SOCKET_FILTER) {
-		bpf_prog_put(prog);
-		err = -EINVAL;
-		goto out;
-	}
-
 	err = kcm_attach(sock, csock, prog);
 	if (err) {
 		bpf_prog_put(prog);
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 3a8f881b22f1..a9aff6079c42 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -306,6 +306,24 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
 	mutex_lock(&sta->ampdu_mlme.mtx);
 
 	if (test_bit(tid, sta->ampdu_mlme.agg_session_valid)) {
+		tid_agg_rx = rcu_dereference_protected(
+				sta->ampdu_mlme.tid_rx[tid],
+				lockdep_is_held(&sta->ampdu_mlme.mtx));
+
+		if (tid_agg_rx->dialog_token == dialog_token) {
+			ht_dbg_ratelimited(sta->sdata,
+					   "updated AddBA Req from %pM on tid %u\n",
+					   sta->sta.addr, tid);
+			/* We have no API to update the timeout value in the
+			 * driver so reject the timeout update.
+			 */
+			status = WLAN_STATUS_REQUEST_DECLINED;
+			ieee80211_send_addba_resp(sta->sdata, sta->sta.addr,
+						  tid, dialog_token, status,
+						  1, buf_size, timeout);
+			goto end;
+		}
+
 		ht_dbg_ratelimited(sta->sdata,
 				   "unexpected AddBA Req from %pM on tid %u\n",
 				   sta->sta.addr, tid);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 0c12e4001f19..47e99ab8d97a 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -997,6 +997,7 @@ static void sta_apply_mesh_params(struct ieee80211_local *local,
 			if (sta->mesh->plink_state != NL80211_PLINK_ESTAB)
 				changed = mesh_plink_inc_estab_count(sdata);
 			sta->mesh->plink_state = params->plink_state;
+			sta->mesh->aid = params->peer_aid;
 
 			ieee80211_mps_sta_status_update(sta);
 			changed |= ieee80211_mps_set_sta_local_pm(sta,
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 54edfb6fc1d1..f56d342c31b8 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1250,6 +1250,7 @@ struct ieee80211_local {
 	int scan_channel_idx;
 	int scan_ies_len;
 	int hw_scan_ies_bufsize;
+	struct cfg80211_scan_info scan_info;
 
 	struct work_struct sched_scan_stopped_work;
 	struct ieee80211_sub_if_data __rcu *sched_scan_sdata;
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 21b1fdf5d01d..c66411df9863 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -148,22 +148,7 @@ u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata)
 void mesh_sta_cleanup(struct sta_info *sta)
 {
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
-	u32 changed;
-
-	/*
-	 * maybe userspace handles peer allocation and peering, but in either
-	 * case the beacon is still generated by the kernel and we might need
-	 * an update.
-	 */
-	changed = mesh_accept_plinks_update(sdata);
-	if (!sdata->u.mesh.user_mpm) {
-		changed |= mesh_plink_deactivate(sta);
-		del_timer_sync(&sta->mesh->plink_timer);
-	}
-
-	/* make sure no readers can access nexthop sta from here on */
-	mesh_path_flush_by_nexthop(sta);
-	synchronize_net();
+	u32 changed = mesh_plink_deactivate(sta);
 
 	if (changed)
 		ieee80211_mbss_info_change_notify(sdata, changed);
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 79f2a0a13db8..7fcdcf622655 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -370,13 +370,21 @@ u32 mesh_plink_deactivate(struct sta_info *sta)
 
 	spin_lock_bh(&sta->mesh->plink_lock);
 	changed = __mesh_plink_deactivate(sta);
-	sta->mesh->reason = WLAN_REASON_MESH_PEER_CANCELED;
-	mesh_plink_frame_tx(sdata, sta, WLAN_SP_MESH_PEERING_CLOSE,
-			    sta->sta.addr, sta->mesh->llid, sta->mesh->plid,
-			    sta->mesh->reason);
+
+	if (!sdata->u.mesh.user_mpm) {
+		sta->mesh->reason = WLAN_REASON_MESH_PEER_CANCELED;
+		mesh_plink_frame_tx(sdata, sta, WLAN_SP_MESH_PEERING_CLOSE,
+				    sta->sta.addr, sta->mesh->llid,
+				    sta->mesh->plid, sta->mesh->reason);
+	}
 	spin_unlock_bh(&sta->mesh->plink_lock);
+	if (!sdata->u.mesh.user_mpm)
+		del_timer_sync(&sta->mesh->plink_timer);
 	mesh_path_flush_by_nexthop(sta);
 
+	/* make sure no readers can access nexthop sta from here on */
+	synchronize_net();
+
 	return changed;
 }
 
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 9a1eb70cb120..2e8a9024625a 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1624,8 +1624,13 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 		if (mmie_keyidx < NUM_DEFAULT_KEYS ||
 		    mmie_keyidx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS)
 			return RX_DROP_MONITOR; /* unexpected BIP keyidx */
-		if (rx->sta)
+		if (rx->sta) {
+			if (ieee80211_is_group_privacy_action(skb) &&
+			    test_sta_flag(rx->sta, WLAN_STA_MFP))
+				return RX_DROP_MONITOR;
+
 			rx->key = rcu_dereference(rx->sta->gtk[mmie_keyidx]);
+		}
 		if (!rx->key)
 			rx->key = rcu_dereference(rx->sdata->keys[mmie_keyidx]);
 	} else if (!ieee80211_has_protected(fc)) {
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index f9648ef9e31f..070b40f15850 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -7,6 +7,7 @@
  * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
  * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
  * Copyright 2013-2015  Intel Mobile Communications GmbH
+ * Copyright 2016  Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -70,6 +71,7 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
 		.boottime_ns = rx_status->boottime_ns,
 	};
 	bool signal_valid;
+	struct ieee80211_sub_if_data *scan_sdata;
 
 	if (ieee80211_hw_check(&local->hw, SIGNAL_DBM))
 		bss_meta.signal = rx_status->signal * 100;
@@ -83,6 +85,20 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
 		bss_meta.scan_width = NL80211_BSS_CHAN_WIDTH_10;
 
 	bss_meta.chan = channel;
+
+	rcu_read_lock();
+	scan_sdata = rcu_dereference(local->scan_sdata);
+	if (scan_sdata && scan_sdata->vif.type == NL80211_IFTYPE_STATION &&
+	    scan_sdata->vif.bss_conf.assoc &&
+	    ieee80211_have_rx_timestamp(rx_status)) {
+		bss_meta.parent_tsf =
+			ieee80211_calculate_rx_timestamp(local, rx_status,
+							 len + FCS_LEN, 24);
+		ether_addr_copy(bss_meta.parent_bssid,
+				scan_sdata->vif.bss_conf.bssid);
+	}
+	rcu_read_unlock();
+
 	cbss = cfg80211_inform_bss_frame_data(local->hw.wiphy, &bss_meta,
 					      mgmt, len, GFP_ATOMIC);
 	if (!cbss)
@@ -345,6 +361,12 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
 
 		if (rc == 0)
 			return;
+
+		/* HW scan failed and is going to be reported as aborted,
+		 * so clear old scan info.
+		 */
+		memset(&local->scan_info, 0, sizeof(local->scan_info));
+		aborted = true;
 	}
 
 	kfree(local->hw_scan_req);
@@ -353,8 +375,10 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
 	scan_req = rcu_dereference_protected(local->scan_req,
 					     lockdep_is_held(&local->mtx));
 
-	if (scan_req != local->int_scan_req)
-		cfg80211_scan_done(scan_req, aborted);
+	if (scan_req != local->int_scan_req) {
+		local->scan_info.aborted = aborted;
+		cfg80211_scan_done(scan_req, &local->scan_info);
+	}
 	RCU_INIT_POINTER(local->scan_req, NULL);
 
 	scan_sdata = rcu_dereference_protected(local->scan_sdata,
@@ -391,15 +415,19 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
 		ieee80211_start_next_roc(local);
 }
 
-void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
+void ieee80211_scan_completed(struct ieee80211_hw *hw,
+			      struct cfg80211_scan_info *info)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 
-	trace_api_scan_completed(local, aborted);
+	trace_api_scan_completed(local, info);
 
 	set_bit(SCAN_COMPLETED, &local->scanning);
-	if (aborted)
+	if (info->aborted)
 		set_bit(SCAN_ABORTED, &local->scanning);
+
+	memcpy(&local->scan_info, info, sizeof(*info));
+
 	ieee80211_queue_delayed_work(&local->hw, &local->scan_work, 0);
 }
 EXPORT_SYMBOL(ieee80211_scan_completed);
@@ -566,6 +594,9 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
 		local->hw_scan_req->req.ie = ies;
 		local->hw_scan_req->req.flags = req->flags;
 		eth_broadcast_addr(local->hw_scan_req->req.bssid);
+		local->hw_scan_req->req.duration = req->duration;
+		local->hw_scan_req->req.duration_mandatory =
+			req->duration_mandatory;
 
 		local->hw_scan_band = 0;
 
@@ -1073,6 +1104,7 @@ void ieee80211_scan_cancel(struct ieee80211_local *local)
 	 */
 	cancel_delayed_work(&local->scan_work);
 	/* and clean up */
+	memset(&local->scan_info, 0, sizeof(local->scan_info));
 	__ieee80211_scan_completed(&local->hw, true);
 out:
 	mutex_unlock(&local->mtx);
diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c
index 2ddc661f0988..97f4c9d6b54c 100644
--- a/net/mac80211/spectmgmt.c
+++ b/net/mac80211/spectmgmt.c
@@ -129,42 +129,31 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
 	}
 
 	if (wide_bw_chansw_ie) {
-		new_vht_chandef.chan = new_chan;
-		new_vht_chandef.center_freq1 =
-			ieee80211_channel_to_frequency(
+		struct ieee80211_vht_operation vht_oper = {
+			.chan_width =
+				wide_bw_chansw_ie->new_channel_width,
+			.center_freq_seg1_idx =
 				wide_bw_chansw_ie->new_center_freq_seg0,
-				new_band);
-
-		switch (wide_bw_chansw_ie->new_channel_width) {
-		default:
-			/* hmmm, ignore VHT and use HT if present */
-		case IEEE80211_VHT_CHANWIDTH_USE_HT:
+			.center_freq_seg2_idx =
+				wide_bw_chansw_ie->new_center_freq_seg1,
+			/* .basic_mcs_set doesn't matter */
+		};
+
+		/* default, for the case of IEEE80211_VHT_CHANWIDTH_USE_HT,
+		 * to the previously parsed chandef
+		 */
+		new_vht_chandef = csa_ie->chandef;
+
+		/* ignore if parsing fails */
+		if (!ieee80211_chandef_vht_oper(&vht_oper, &new_vht_chandef))
 			new_vht_chandef.chan = NULL;
-			break;
-		case IEEE80211_VHT_CHANWIDTH_80MHZ:
-			new_vht_chandef.width = NL80211_CHAN_WIDTH_80;
-			break;
-		case IEEE80211_VHT_CHANWIDTH_160MHZ:
-			new_vht_chandef.width = NL80211_CHAN_WIDTH_160;
-			break;
-		case IEEE80211_VHT_CHANWIDTH_80P80MHZ:
-			/* field is otherwise reserved */
-			new_vht_chandef.center_freq2 =
-				ieee80211_channel_to_frequency(
-					wide_bw_chansw_ie->new_center_freq_seg1,
-					new_band);
-			new_vht_chandef.width = NL80211_CHAN_WIDTH_80P80;
-			break;
-		}
+
 		if (sta_flags & IEEE80211_STA_DISABLE_80P80MHZ &&
 		    new_vht_chandef.width == NL80211_CHAN_WIDTH_80P80)
 			ieee80211_chandef_downgrade(&new_vht_chandef);
 		if (sta_flags & IEEE80211_STA_DISABLE_160MHZ &&
 		    new_vht_chandef.width == NL80211_CHAN_WIDTH_160)
 			ieee80211_chandef_downgrade(&new_vht_chandef);
-		if (sta_flags & IEEE80211_STA_DISABLE_40MHZ &&
-		    new_vht_chandef.width > NL80211_CHAN_WIDTH_20)
-			ieee80211_chandef_downgrade(&new_vht_chandef);
 	}
 
 	/* if VHT data is there validate & use it */
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index 1c7d45a6d93e..b5d28f14b9cf 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -1747,6 +1747,7 @@ ieee80211_process_tdls_channel_switch_resp(struct ieee80211_sub_if_data *sdata,
 		goto out;
 	}
 
+	ret = 0;
 call_drv:
 	drv_tdls_recv_channel_switch(sdata->local, sdata, &params);
 
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 44ec605a5682..91461c415525 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -593,6 +593,9 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 	else if (tx->sta &&
 		 (key = rcu_dereference(tx->sta->ptk[tx->sta->ptk_idx])))
 		tx->key = key;
+	else if (ieee80211_is_group_privacy_action(tx->skb) &&
+		(key = rcu_dereference(tx->sdata->default_multicast_key)))
+		tx->key = key;
 	else if (ieee80211_is_mgmt(hdr->frame_control) &&
 		 is_multicast_ether_addr(hdr->addr1) &&
 		 ieee80211_is_robust_mgmt_frame(tx->skb) &&
@@ -625,7 +628,8 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 		case WLAN_CIPHER_SUITE_GCMP_256:
 			if (!ieee80211_is_data_present(hdr->frame_control) &&
 			    !ieee80211_use_mfp(hdr->frame_control, tx->sta,
-					       tx->skb))
+					       tx->skb) &&
+			    !ieee80211_is_group_privacy_action(tx->skb))
 				tx->key = NULL;
 			else
 				skip_hw = (tx->key->conf.flags &
@@ -1445,7 +1449,9 @@ int ieee80211_txq_setup_flows(struct ieee80211_local *local)
 	local->cvars = kcalloc(fq->flows_cnt, sizeof(local->cvars[0]),
 			       GFP_KERNEL);
 	if (!local->cvars) {
+		spin_lock_bh(&fq->lock);
 		fq_reset(fq, fq_skb_free_func);
+		spin_unlock_bh(&fq->lock);
 		return -ENOMEM;
 	}
 
@@ -1465,7 +1471,9 @@ void ieee80211_txq_teardown_flows(struct ieee80211_local *local)
 	kfree(local->cvars);
 	local->cvars = NULL;
 
+	spin_lock_bh(&fq->lock);
 	fq_reset(fq, fq_skb_free_func);
+	spin_unlock_bh(&fq->lock);
 }
 
 struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index e9beaa58573c..5c161e7759b5 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -1009,10 +1009,12 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
 	unsigned int flags;
 
 	if (event == NETDEV_REGISTER) {
-		/* For now just support Ethernet and IPGRE devices */
+		/* For now just support Ethernet, IPGRE, SIT and IPIP devices */
 		if (dev->type == ARPHRD_ETHER ||
 		    dev->type == ARPHRD_LOOPBACK ||
-		    dev->type == ARPHRD_IPGRE) {
+		    dev->type == ARPHRD_IPGRE ||
+		    dev->type == ARPHRD_SIT ||
+		    dev->type == ARPHRD_TUNNEL) {
 			mdev = mpls_add_dev(dev);
 			if (IS_ERR(mdev))
 				return notifier_from_errno(PTR_ERR(mdev));
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 95e757c377f9..9266ceebd112 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -609,9 +609,8 @@ config NETFILTER_XT_MARK
 	The target allows you to create rules in the "mangle" table which alter
 	the netfilter mark (nfmark) field associated with the packet.
 
-	Prior to routing, the nfmark can influence the routing method (see
-	"Use netfilter MARK value as routing key") and can also be used by
-	other subsystems to change their behavior.
+	Prior to routing, the nfmark can influence the routing method and can
+	also be used by other subsystems to change their behavior.
 
 config NETFILTER_XT_CONNMARK
 	tristate 'ctmark target and match support'
@@ -753,9 +752,8 @@ config NETFILTER_XT_TARGET_HMARK
 
 	The target allows you to create rules in the "raw" and "mangle" tables
 	which set the skbuff mark by means of hash calculation within a given
-	range. The nfmark can influence the routing method (see "Use netfilter
-	MARK value as routing key") and can also be used by other subsystems to
-	change their behaviour.
+	range. The nfmark can influence the routing method and can also be used
+	by other subsystems to change their behaviour.
 
 	To compile it as a module, choose M here. If unsure, say N.
 
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index db2312eeb2a4..153e33ffeeaa 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -327,16 +327,10 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
 
 	tmpl->status = IPS_TEMPLATE;
 	write_pnet(&tmpl->ct_net, net);
-
-	if (nf_ct_zone_add(tmpl, flags, zone) < 0)
-		goto out_free;
-
+	nf_ct_zone_add(tmpl, zone);
 	atomic_set(&tmpl->ct_general.use, 0);
 
 	return tmpl;
-out_free:
-	kfree(tmpl);
-	return NULL;
 }
 EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc);
 
@@ -929,16 +923,13 @@ __nf_conntrack_alloc(struct net *net,
 	       offsetof(struct nf_conn, proto) -
 	       offsetof(struct nf_conn, __nfct_init_offset[0]));
 
-	if (zone && nf_ct_zone_add(ct, GFP_ATOMIC, zone) < 0)
-		goto out_free;
+	nf_ct_zone_add(ct, zone);
 
 	/* Because we use RCU lookups, we set ct_general.use to zero before
 	 * this is inserted in any list.
 	 */
 	atomic_set(&ct->ct_general.use, 0);
 	return ct;
-out_free:
-	kmem_cache_free(nf_conntrack_cachep, ct);
 out:
 	atomic_dec(&net->ct.count);
 	return ERR_PTR(-ENOMEM);
@@ -1342,14 +1333,6 @@ bool __nf_ct_kill_acct(struct nf_conn *ct,
 }
 EXPORT_SYMBOL_GPL(__nf_ct_kill_acct);
 
-#ifdef CONFIG_NF_CONNTRACK_ZONES
-static struct nf_ct_ext_type nf_ct_zone_extend __read_mostly = {
-	.len	= sizeof(struct nf_conntrack_zone),
-	.align	= __alignof__(struct nf_conntrack_zone),
-	.id	= NF_CT_EXT_ZONE,
-};
-#endif
-
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 
 #include <linux/netfilter/nfnetlink.h>
@@ -1532,9 +1515,6 @@ void nf_conntrack_cleanup_end(void)
 
 	nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size);
 
-#ifdef CONFIG_NF_CONNTRACK_ZONES
-	nf_ct_extend_unregister(&nf_ct_zone_extend);
-#endif
 	nf_conntrack_proto_fini();
 	nf_conntrack_seqadj_fini();
 	nf_conntrack_labels_fini();
@@ -1544,6 +1524,8 @@ void nf_conntrack_cleanup_end(void)
 	nf_conntrack_tstamp_fini();
 	nf_conntrack_acct_fini();
 	nf_conntrack_expect_fini();
+
+	kmem_cache_destroy(nf_conntrack_cachep);
 }
 
 /*
@@ -1615,24 +1597,14 @@ void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
 }
 EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable);
 
-int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
+int nf_conntrack_hash_resize(unsigned int hashsize)
 {
-	int i, bucket, rc;
-	unsigned int hashsize, old_size;
+	int i, bucket;
+	unsigned int old_size;
 	struct hlist_nulls_head *hash, *old_hash;
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conn *ct;
 
-	if (current->nsproxy->net_ns != &init_net)
-		return -EOPNOTSUPP;
-
-	/* On boot, we can set this without any fancy locking. */
-	if (!nf_conntrack_htable_size)
-		return param_set_uint(val, kp);
-
-	rc = kstrtouint(val, 0, &hashsize);
-	if (rc)
-		return rc;
 	if (!hashsize)
 		return -EINVAL;
 
@@ -1640,6 +1612,12 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
 	if (!hash)
 		return -ENOMEM;
 
+	old_size = nf_conntrack_htable_size;
+	if (old_size == hashsize) {
+		nf_ct_free_hashtable(hash, hashsize);
+		return 0;
+	}
+
 	local_bh_disable();
 	nf_conntrack_all_lock();
 	write_seqcount_begin(&nf_conntrack_generation);
@@ -1675,6 +1653,25 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
 	nf_ct_free_hashtable(old_hash, old_size);
 	return 0;
 }
+
+int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
+{
+	unsigned int hashsize;
+	int rc;
+
+	if (current->nsproxy->net_ns != &init_net)
+		return -EOPNOTSUPP;
+
+	/* On boot, we can set this without any fancy locking. */
+	if (!nf_conntrack_htable_size)
+		return param_set_uint(val, kp);
+
+	rc = kstrtouint(val, 0, &hashsize);
+	if (rc)
+		return rc;
+
+	return nf_conntrack_hash_resize(hashsize);
+}
 EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
 
 module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
@@ -1731,7 +1728,7 @@ int nf_conntrack_init_start(void)
 
 	nf_conntrack_cachep = kmem_cache_create("nf_conntrack",
 						sizeof(struct nf_conn), 0,
-						SLAB_DESTROY_BY_RCU, NULL);
+						SLAB_DESTROY_BY_RCU | SLAB_HWCACHE_ALIGN, NULL);
 	if (!nf_conntrack_cachep)
 		goto err_cachep;
 
@@ -1771,11 +1768,6 @@ int nf_conntrack_init_start(void)
 	if (ret < 0)
 		goto err_seqadj;
 
-#ifdef CONFIG_NF_CONNTRACK_ZONES
-	ret = nf_ct_extend_register(&nf_ct_zone_extend);
-	if (ret < 0)
-		goto err_extend;
-#endif
 	ret = nf_conntrack_proto_init();
 	if (ret < 0)
 		goto err_proto;
@@ -1791,10 +1783,6 @@ int nf_conntrack_init_start(void)
 	return 0;
 
 err_proto:
-#ifdef CONFIG_NF_CONNTRACK_ZONES
-	nf_ct_extend_unregister(&nf_ct_zone_extend);
-err_extend:
-#endif
 	nf_conntrack_seqadj_fini();
 err_seqadj:
 	nf_conntrack_labels_fini();
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 196cb39649e1..3a1a88b9bafa 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -389,11 +389,38 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
 					     struct net *net)
 {
 	struct nf_conntrack_tuple_hash *h;
+	const struct hlist_nulls_node *nn;
+	int cpu;
+
+	/* Get rid of expecteds, set helpers to NULL. */
+	for_each_possible_cpu(cpu) {
+		struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
+
+		spin_lock_bh(&pcpu->lock);
+		hlist_nulls_for_each_entry(h, nn, &pcpu->unconfirmed, hnnode)
+			unhelp(h, me);
+		spin_unlock_bh(&pcpu->lock);
+	}
+}
+
+void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
+{
+	struct nf_conntrack_tuple_hash *h;
 	struct nf_conntrack_expect *exp;
 	const struct hlist_node *next;
 	const struct hlist_nulls_node *nn;
+	struct net *net;
 	unsigned int i;
-	int cpu;
+
+	mutex_lock(&nf_ct_helper_mutex);
+	hlist_del_rcu(&me->hnode);
+	nf_ct_helper_count--;
+	mutex_unlock(&nf_ct_helper_mutex);
+
+	/* Make sure every nothing is still using the helper unless its a
+	 * connection in the hash.
+	 */
+	synchronize_rcu();
 
 	/* Get rid of expectations */
 	spin_lock_bh(&nf_conntrack_expect_lock);
@@ -413,15 +440,11 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
 	}
 	spin_unlock_bh(&nf_conntrack_expect_lock);
 
-	/* Get rid of expecteds, set helpers to NULL. */
-	for_each_possible_cpu(cpu) {
-		struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
+	rtnl_lock();
+	for_each_net(net)
+		__nf_conntrack_helper_unregister(me, net);
+	rtnl_unlock();
 
-		spin_lock_bh(&pcpu->lock);
-		hlist_nulls_for_each_entry(h, nn, &pcpu->unconfirmed, hnnode)
-			unhelp(h, me);
-		spin_unlock_bh(&pcpu->lock);
-	}
 	local_bh_disable();
 	for (i = 0; i < nf_conntrack_htable_size; i++) {
 		nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
@@ -433,26 +456,6 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
 	}
 	local_bh_enable();
 }
-
-void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
-{
-	struct net *net;
-
-	mutex_lock(&nf_ct_helper_mutex);
-	hlist_del_rcu(&me->hnode);
-	nf_ct_helper_count--;
-	mutex_unlock(&nf_ct_helper_mutex);
-
-	/* Make sure every nothing is still using the helper unless its a
-	 * connection in the hash.
-	 */
-	synchronize_rcu();
-
-	rtnl_lock();
-	for_each_net(net)
-		__nf_conntrack_helper_unregister(me, net);
-	rtnl_unlock();
-}
 EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);
 
 static struct nf_ct_ext_type helper_extend __read_mostly = {
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index c026c472ea80..2aaa188ee961 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -434,8 +434,29 @@ static void nf_conntrack_standalone_fini_proc(struct net *net)
 
 #ifdef CONFIG_SYSCTL
 /* Log invalid packets of a given protocol */
-static int log_invalid_proto_min = 0;
-static int log_invalid_proto_max = 255;
+static int log_invalid_proto_min __read_mostly;
+static int log_invalid_proto_max __read_mostly = 255;
+
+/* size the user *wants to set */
+static unsigned int nf_conntrack_htable_size_user __read_mostly;
+
+static int
+nf_conntrack_hash_sysctl(struct ctl_table *table, int write,
+			 void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int ret;
+
+	ret = proc_dointvec(table, write, buffer, lenp, ppos);
+	if (ret < 0 || !write)
+		return ret;
+
+	/* update ret, we might not be able to satisfy request */
+	ret = nf_conntrack_hash_resize(nf_conntrack_htable_size_user);
+
+	/* update it to the actual value used by conntrack */
+	nf_conntrack_htable_size_user = nf_conntrack_htable_size;
+	return ret;
+}
 
 static struct ctl_table_header *nf_ct_netfilter_header;
 
@@ -456,10 +477,10 @@ static struct ctl_table nf_ct_sysctl_table[] = {
 	},
 	{
 		.procname       = "nf_conntrack_buckets",
-		.data           = &nf_conntrack_htable_size,
+		.data           = &nf_conntrack_htable_size_user,
 		.maxlen         = sizeof(unsigned int),
-		.mode           = 0444,
-		.proc_handler   = proc_dointvec,
+		.mode           = 0644,
+		.proc_handler   = nf_conntrack_hash_sysctl,
 	},
 	{
 		.procname	= "nf_conntrack_checksum",
@@ -515,6 +536,9 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
 	if (net->user_ns != &init_user_ns)
 		table[0].procname = NULL;
 
+	if (!net_eq(&init_net, net))
+		table[2].mode = 0444;
+
 	net->ct.sysctl_header = register_net_sysctl(net, "net/netfilter", table);
 	if (!net->ct.sysctl_header)
 		goto out_unregister_netfilter;
@@ -604,6 +628,8 @@ static int __init nf_conntrack_standalone_init(void)
 		ret = -ENOMEM;
 		goto out_sysctl;
 	}
+
+	nf_conntrack_htable_size_user = nf_conntrack_htable_size;
 #endif
 
 	ret = register_pernet_subsys(&nf_conntrack_net_ops);
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index a5d41dfa9f05..aa5847a16713 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -159,6 +159,20 @@ int nf_logger_find_get(int pf, enum nf_log_type type)
 	struct nf_logger *logger;
 	int ret = -ENOENT;
 
+	if (pf == NFPROTO_INET) {
+		ret = nf_logger_find_get(NFPROTO_IPV4, type);
+		if (ret < 0)
+			return ret;
+
+		ret = nf_logger_find_get(NFPROTO_IPV6, type);
+		if (ret < 0) {
+			nf_logger_put(NFPROTO_IPV4, type);
+			return ret;
+		}
+
+		return 0;
+	}
+
 	if (rcu_access_pointer(loggers[pf][type]) == NULL)
 		request_module("nf-logger-%u-%u", pf, type);
 
@@ -167,7 +181,7 @@ int nf_logger_find_get(int pf, enum nf_log_type type)
 	if (logger == NULL)
 		goto out;
 
-	if (logger && try_module_get(logger->me))
+	if (try_module_get(logger->me))
 		ret = 0;
 out:
 	rcu_read_unlock();
@@ -179,6 +193,12 @@ void nf_logger_put(int pf, enum nf_log_type type)
 {
 	struct nf_logger *logger;
 
+	if (pf == NFPROTO_INET) {
+		nf_logger_put(NFPROTO_IPV4, type);
+		nf_logger_put(NFPROTO_IPV6, type);
+		return;
+	}
+
 	BUG_ON(loggers[pf][type] == NULL);
 
 	rcu_read_lock();
@@ -398,16 +418,17 @@ static int nf_log_proc_dostring(struct ctl_table *table, int write,
 {
 	const struct nf_logger *logger;
 	char buf[NFLOGGER_NAME_LEN];
-	size_t size = *lenp;
 	int r = 0;
 	int tindex = (unsigned long)table->extra1;
 	struct net *net = current->nsproxy->net_ns;
 
 	if (write) {
-		if (size > sizeof(buf))
-			size = sizeof(buf);
-		if (copy_from_user(buf, buffer, size))
-			return -EFAULT;
+		struct ctl_table tmp = *table;
+
+		tmp.data = buf;
+		r = proc_dostring(&tmp, write, buffer, lenp, ppos);
+		if (r)
+			return r;
 
 		if (!strcmp(buf, "NONE")) {
 			nf_log_unbind_pf(net, tindex);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 7b7aa871a174..18b7f8578ee0 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -131,29 +131,8 @@ static void nft_trans_destroy(struct nft_trans *trans)
 	kfree(trans);
 }
 
-static int nft_register_basechain(struct nft_base_chain *basechain,
-				  unsigned int hook_nops)
-{
-	struct net *net = read_pnet(&basechain->pnet);
-
-	if (basechain->flags & NFT_BASECHAIN_DISABLED)
-		return 0;
-
-	return nf_register_net_hooks(net, basechain->ops, hook_nops);
-}
-
-static void nft_unregister_basechain(struct nft_base_chain *basechain,
-				     unsigned int hook_nops)
-{
-	struct net *net = read_pnet(&basechain->pnet);
-
-	if (basechain->flags & NFT_BASECHAIN_DISABLED)
-		return;
-
-	nf_unregister_net_hooks(net, basechain->ops, hook_nops);
-}
-
-static int nf_tables_register_hooks(const struct nft_table *table,
+static int nf_tables_register_hooks(struct net *net,
+				    const struct nft_table *table,
 				    struct nft_chain *chain,
 				    unsigned int hook_nops)
 {
@@ -161,10 +140,12 @@ static int nf_tables_register_hooks(const struct nft_table *table,
 	    !(chain->flags & NFT_BASE_CHAIN))
 		return 0;
 
-	return nft_register_basechain(nft_base_chain(chain), hook_nops);
+	return nf_register_net_hooks(net, nft_base_chain(chain)->ops,
+				     hook_nops);
 }
 
-static void nf_tables_unregister_hooks(const struct nft_table *table,
+static void nf_tables_unregister_hooks(struct net *net,
+				       const struct nft_table *table,
 				       struct nft_chain *chain,
 				       unsigned int hook_nops)
 {
@@ -172,12 +153,9 @@ static void nf_tables_unregister_hooks(const struct nft_table *table,
 	    !(chain->flags & NFT_BASE_CHAIN))
 		return;
 
-	nft_unregister_basechain(nft_base_chain(chain), hook_nops);
+	nf_unregister_net_hooks(net, nft_base_chain(chain)->ops, hook_nops);
 }
 
-/* Internal table flags */
-#define NFT_TABLE_INACTIVE	(1 << 15)
-
 static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
 {
 	struct nft_trans *trans;
@@ -187,7 +165,7 @@ static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
 		return -ENOMEM;
 
 	if (msg_type == NFT_MSG_NEWTABLE)
-		ctx->table->flags |= NFT_TABLE_INACTIVE;
+		nft_activate_next(ctx->net, ctx->table);
 
 	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
 	return 0;
@@ -201,7 +179,7 @@ static int nft_deltable(struct nft_ctx *ctx)
 	if (err < 0)
 		return err;
 
-	list_del_rcu(&ctx->table->list);
+	nft_deactivate_next(ctx->net, ctx->table);
 	return err;
 }
 
@@ -214,7 +192,7 @@ static int nft_trans_chain_add(struct nft_ctx *ctx, int msg_type)
 		return -ENOMEM;
 
 	if (msg_type == NFT_MSG_NEWCHAIN)
-		ctx->chain->flags |= NFT_CHAIN_INACTIVE;
+		nft_activate_next(ctx->net, ctx->chain);
 
 	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
 	return 0;
@@ -229,47 +207,17 @@ static int nft_delchain(struct nft_ctx *ctx)
 		return err;
 
 	ctx->table->use--;
-	list_del_rcu(&ctx->chain->list);
+	nft_deactivate_next(ctx->net, ctx->chain);
 
 	return err;
 }
 
-static inline bool
-nft_rule_is_active(struct net *net, const struct nft_rule *rule)
-{
-	return (rule->genmask & nft_genmask_cur(net)) == 0;
-}
-
-static inline int
-nft_rule_is_active_next(struct net *net, const struct nft_rule *rule)
-{
-	return (rule->genmask & nft_genmask_next(net)) == 0;
-}
-
-static inline void
-nft_rule_activate_next(struct net *net, struct nft_rule *rule)
-{
-	/* Now inactive, will be active in the future */
-	rule->genmask = nft_genmask_cur(net);
-}
-
-static inline void
-nft_rule_deactivate_next(struct net *net, struct nft_rule *rule)
-{
-	rule->genmask = nft_genmask_next(net);
-}
-
-static inline void nft_rule_clear(struct net *net, struct nft_rule *rule)
-{
-	rule->genmask &= ~nft_genmask_next(net);
-}
-
 static int
 nf_tables_delrule_deactivate(struct nft_ctx *ctx, struct nft_rule *rule)
 {
 	/* You cannot delete the same rule twice */
-	if (nft_rule_is_active_next(ctx->net, rule)) {
-		nft_rule_deactivate_next(ctx->net, rule);
+	if (nft_is_active_next(ctx->net, rule)) {
+		nft_deactivate_next(ctx->net, rule);
 		ctx->chain->use--;
 		return 0;
 	}
@@ -322,9 +270,6 @@ static int nft_delrule_by_chain(struct nft_ctx *ctx)
 	return 0;
 }
 
-/* Internal set flag */
-#define NFT_SET_INACTIVE	(1 << 15)
-
 static int nft_trans_set_add(struct nft_ctx *ctx, int msg_type,
 			     struct nft_set *set)
 {
@@ -337,7 +282,7 @@ static int nft_trans_set_add(struct nft_ctx *ctx, int msg_type,
 	if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] != NULL) {
 		nft_trans_set_id(trans) =
 			ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID]));
-		set->flags |= NFT_SET_INACTIVE;
+		nft_activate_next(ctx->net, set);
 	}
 	nft_trans_set(trans) = set;
 	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
@@ -353,7 +298,7 @@ static int nft_delset(struct nft_ctx *ctx, struct nft_set *set)
 	if (err < 0)
 		return err;
 
-	list_del_rcu(&set->list);
+	nft_deactivate_next(ctx->net, set);
 	ctx->table->use--;
 
 	return err;
@@ -364,26 +309,29 @@ static int nft_delset(struct nft_ctx *ctx, struct nft_set *set)
  */
 
 static struct nft_table *nft_table_lookup(const struct nft_af_info *afi,
-					  const struct nlattr *nla)
+					  const struct nlattr *nla,
+					  u8 genmask)
 {
 	struct nft_table *table;
 
 	list_for_each_entry(table, &afi->tables, list) {
-		if (!nla_strcmp(nla, table->name))
+		if (!nla_strcmp(nla, table->name) &&
+		    nft_active_genmask(table, genmask))
 			return table;
 	}
 	return NULL;
 }
 
 static struct nft_table *nf_tables_table_lookup(const struct nft_af_info *afi,
-						const struct nlattr *nla)
+						const struct nlattr *nla,
+						u8 genmask)
 {
 	struct nft_table *table;
 
 	if (nla == NULL)
 		return ERR_PTR(-EINVAL);
 
-	table = nft_table_lookup(afi, nla);
+	table = nft_table_lookup(afi, nla, genmask);
 	if (table != NULL)
 		return table;
 
@@ -524,6 +472,8 @@ static int nf_tables_dump_tables(struct sk_buff *skb,
 			if (idx > s_idx)
 				memset(&cb->args[1], 0,
 				       sizeof(cb->args) - sizeof(cb->args[0]));
+			if (!nft_is_active(net, table))
+				continue;
 			if (nf_tables_fill_table_info(skb, net,
 						      NETLINK_CB(cb->skb).portid,
 						      cb->nlh->nlmsg_seq,
@@ -548,6 +498,7 @@ static int nf_tables_gettable(struct net *net, struct sock *nlsk,
 			      const struct nlattr * const nla[])
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	u8 genmask = nft_genmask_cur(net);
 	const struct nft_af_info *afi;
 	const struct nft_table *table;
 	struct sk_buff *skb2;
@@ -565,11 +516,9 @@ static int nf_tables_gettable(struct net *net, struct sock *nlsk,
 	if (IS_ERR(afi))
 		return PTR_ERR(afi);
 
-	table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]);
+	table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], genmask);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
-	if (table->flags & NFT_TABLE_INACTIVE)
-		return -ENOENT;
 
 	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb2)
@@ -588,17 +537,21 @@ err:
 	return err;
 }
 
-static int nf_tables_table_enable(const struct nft_af_info *afi,
+static int nf_tables_table_enable(struct net *net,
+				  const struct nft_af_info *afi,
 				  struct nft_table *table)
 {
 	struct nft_chain *chain;
 	int err, i = 0;
 
 	list_for_each_entry(chain, &table->chains, list) {
+		if (!nft_is_active_next(net, chain))
+			continue;
 		if (!(chain->flags & NFT_BASE_CHAIN))
 			continue;
 
-		err = nft_register_basechain(nft_base_chain(chain), afi->nops);
+		err = nf_register_net_hooks(net, nft_base_chain(chain)->ops,
+					    afi->nops);
 		if (err < 0)
 			goto err;
 
@@ -607,26 +560,34 @@ static int nf_tables_table_enable(const struct nft_af_info *afi,
 	return 0;
 err:
 	list_for_each_entry(chain, &table->chains, list) {
+		if (!nft_is_active_next(net, chain))
+			continue;
 		if (!(chain->flags & NFT_BASE_CHAIN))
 			continue;
 
 		if (i-- <= 0)
 			break;
 
-		nft_unregister_basechain(nft_base_chain(chain), afi->nops);
+		nf_unregister_net_hooks(net, nft_base_chain(chain)->ops,
+					afi->nops);
 	}
 	return err;
 }
 
-static void nf_tables_table_disable(const struct nft_af_info *afi,
+static void nf_tables_table_disable(struct net *net,
+				    const struct nft_af_info *afi,
 				    struct nft_table *table)
 {
 	struct nft_chain *chain;
 
 	list_for_each_entry(chain, &table->chains, list) {
-		if (chain->flags & NFT_BASE_CHAIN)
-			nft_unregister_basechain(nft_base_chain(chain),
-						 afi->nops);
+		if (!nft_is_active_next(net, chain))
+			continue;
+		if (!(chain->flags & NFT_BASE_CHAIN))
+			continue;
+
+		nf_unregister_net_hooks(net, nft_base_chain(chain)->ops,
+					afi->nops);
 	}
 }
 
@@ -656,7 +617,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
 		nft_trans_table_enable(trans) = false;
 	} else if (!(flags & NFT_TABLE_F_DORMANT) &&
 		   ctx->table->flags & NFT_TABLE_F_DORMANT) {
-		ret = nf_tables_table_enable(ctx->afi, ctx->table);
+		ret = nf_tables_table_enable(ctx->net, ctx->afi, ctx->table);
 		if (ret >= 0) {
 			ctx->table->flags &= ~NFT_TABLE_F_DORMANT;
 			nft_trans_table_enable(trans) = true;
@@ -678,6 +639,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
 			      const struct nlattr * const nla[])
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	u8 genmask = nft_genmask_next(net);
 	const struct nlattr *name;
 	struct nft_af_info *afi;
 	struct nft_table *table;
@@ -691,7 +653,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
 		return PTR_ERR(afi);
 
 	name = nla[NFTA_TABLE_NAME];
-	table = nf_tables_table_lookup(afi, name);
+	table = nf_tables_table_lookup(afi, name, genmask);
 	if (IS_ERR(table)) {
 		if (PTR_ERR(table) != -ENOENT)
 			return PTR_ERR(table);
@@ -699,8 +661,6 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
 	}
 
 	if (table != NULL) {
-		if (table->flags & NFT_TABLE_INACTIVE)
-			return -ENOENT;
 		if (nlh->nlmsg_flags & NLM_F_EXCL)
 			return -EEXIST;
 		if (nlh->nlmsg_flags & NLM_F_REPLACE)
@@ -752,6 +712,9 @@ static int nft_flush_table(struct nft_ctx *ctx)
 	struct nft_set *set, *ns;
 
 	list_for_each_entry(chain, &ctx->table->chains, list) {
+		if (!nft_is_active_next(ctx->net, chain))
+			continue;
+
 		ctx->chain = chain;
 
 		err = nft_delrule_by_chain(ctx);
@@ -760,6 +723,9 @@ static int nft_flush_table(struct nft_ctx *ctx)
 	}
 
 	list_for_each_entry_safe(set, ns, &ctx->table->sets, list) {
+		if (!nft_is_active_next(ctx->net, set))
+			continue;
+
 		if (set->flags & NFT_SET_ANONYMOUS &&
 		    !list_empty(&set->bindings))
 			continue;
@@ -770,6 +736,9 @@ static int nft_flush_table(struct nft_ctx *ctx)
 	}
 
 	list_for_each_entry_safe(chain, nc, &ctx->table->chains, list) {
+		if (!nft_is_active_next(ctx->net, chain))
+			continue;
+
 		ctx->chain = chain;
 
 		err = nft_delchain(ctx);
@@ -795,6 +764,9 @@ static int nft_flush(struct nft_ctx *ctx, int family)
 
 		ctx->afi = afi;
 		list_for_each_entry_safe(table, nt, &afi->tables, list) {
+			if (!nft_is_active_next(ctx->net, table))
+				continue;
+
 			if (nla[NFTA_TABLE_NAME] &&
 			    nla_strcmp(nla[NFTA_TABLE_NAME], table->name) != 0)
 				continue;
@@ -815,6 +787,7 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk,
 			      const struct nlattr * const nla[])
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	u8 genmask = nft_genmask_next(net);
 	struct nft_af_info *afi;
 	struct nft_table *table;
 	int family = nfmsg->nfgen_family;
@@ -828,7 +801,7 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk,
 	if (IS_ERR(afi))
 		return PTR_ERR(afi);
 
-	table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]);
+	table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], genmask);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
 
@@ -875,12 +848,14 @@ EXPORT_SYMBOL_GPL(nft_unregister_chain_type);
  */
 
 static struct nft_chain *
-nf_tables_chain_lookup_byhandle(const struct nft_table *table, u64 handle)
+nf_tables_chain_lookup_byhandle(const struct nft_table *table, u64 handle,
+				u8 genmask)
 {
 	struct nft_chain *chain;
 
 	list_for_each_entry(chain, &table->chains, list) {
-		if (chain->handle == handle)
+		if (chain->handle == handle &&
+		    nft_active_genmask(chain, genmask))
 			return chain;
 	}
 
@@ -888,7 +863,8 @@ nf_tables_chain_lookup_byhandle(const struct nft_table *table, u64 handle)
 }
 
 static struct nft_chain *nf_tables_chain_lookup(const struct nft_table *table,
-						const struct nlattr *nla)
+						const struct nlattr *nla,
+						u8 genmask)
 {
 	struct nft_chain *chain;
 
@@ -896,7 +872,8 @@ static struct nft_chain *nf_tables_chain_lookup(const struct nft_table *table,
 		return ERR_PTR(-EINVAL);
 
 	list_for_each_entry(chain, &table->chains, list) {
-		if (!nla_strcmp(nla, chain->name))
+		if (!nla_strcmp(nla, chain->name) &&
+		    nft_active_genmask(chain, genmask))
 			return chain;
 	}
 
@@ -1079,6 +1056,8 @@ static int nf_tables_dump_chains(struct sk_buff *skb,
 				if (idx > s_idx)
 					memset(&cb->args[1], 0,
 					       sizeof(cb->args) - sizeof(cb->args[0]));
+				if (!nft_is_active(net, chain))
+					continue;
 				if (nf_tables_fill_chain_info(skb, net,
 							      NETLINK_CB(cb->skb).portid,
 							      cb->nlh->nlmsg_seq,
@@ -1104,6 +1083,7 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk,
 			      const struct nlattr * const nla[])
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	u8 genmask = nft_genmask_cur(net);
 	const struct nft_af_info *afi;
 	const struct nft_table *table;
 	const struct nft_chain *chain;
@@ -1122,17 +1102,13 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk,
 	if (IS_ERR(afi))
 		return PTR_ERR(afi);
 
-	table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]);
+	table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
-	if (table->flags & NFT_TABLE_INACTIVE)
-		return -ENOENT;
 
-	chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]);
+	chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
 	if (IS_ERR(chain))
 		return PTR_ERR(chain);
-	if (chain->flags & NFT_CHAIN_INACTIVE)
-		return -ENOENT;
 
 	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb2)
@@ -1231,6 +1207,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
 	struct nft_chain *chain;
 	struct nft_base_chain *basechain = NULL;
 	struct nlattr *ha[NFTA_HOOK_MAX + 1];
+	u8 genmask = nft_genmask_next(net);
 	int family = nfmsg->nfgen_family;
 	struct net_device *dev = NULL;
 	u8 policy = NF_ACCEPT;
@@ -1247,7 +1224,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
 	if (IS_ERR(afi))
 		return PTR_ERR(afi);
 
-	table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]);
+	table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
 
@@ -1256,11 +1233,11 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
 
 	if (nla[NFTA_CHAIN_HANDLE]) {
 		handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE]));
-		chain = nf_tables_chain_lookup_byhandle(table, handle);
+		chain = nf_tables_chain_lookup_byhandle(table, handle, genmask);
 		if (IS_ERR(chain))
 			return PTR_ERR(chain);
 	} else {
-		chain = nf_tables_chain_lookup(table, name);
+		chain = nf_tables_chain_lookup(table, name, genmask);
 		if (IS_ERR(chain)) {
 			if (PTR_ERR(chain) != -ENOENT)
 				return PTR_ERR(chain);
@@ -1291,16 +1268,20 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
 		struct nft_stats *stats = NULL;
 		struct nft_trans *trans;
 
-		if (chain->flags & NFT_CHAIN_INACTIVE)
-			return -ENOENT;
 		if (nlh->nlmsg_flags & NLM_F_EXCL)
 			return -EEXIST;
 		if (nlh->nlmsg_flags & NLM_F_REPLACE)
 			return -EOPNOTSUPP;
 
-		if (nla[NFTA_CHAIN_HANDLE] && name &&
-		    !IS_ERR(nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME])))
-			return -EEXIST;
+		if (nla[NFTA_CHAIN_HANDLE] && name) {
+			struct nft_chain *chain2;
+
+			chain2 = nf_tables_chain_lookup(table,
+							nla[NFTA_CHAIN_NAME],
+							genmask);
+			if (IS_ERR(chain2))
+				return PTR_ERR(chain2);
+		}
 
 		if (nla[NFTA_CHAIN_COUNTERS]) {
 			if (!(chain->flags & NFT_BASE_CHAIN))
@@ -1455,7 +1436,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
 	chain->table = table;
 	nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN);
 
-	err = nf_tables_register_hooks(table, chain, afi->nops);
+	err = nf_tables_register_hooks(net, table, chain, afi->nops);
 	if (err < 0)
 		goto err1;
 
@@ -1468,7 +1449,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
 	list_add_tail_rcu(&chain->list, &table->chains);
 	return 0;
 err2:
-	nf_tables_unregister_hooks(table, chain, afi->nops);
+	nf_tables_unregister_hooks(net, table, chain, afi->nops);
 err1:
 	nf_tables_chain_destroy(chain);
 	return err;
@@ -1479,6 +1460,7 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
 			      const struct nlattr * const nla[])
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	u8 genmask = nft_genmask_next(net);
 	struct nft_af_info *afi;
 	struct nft_table *table;
 	struct nft_chain *chain;
@@ -1489,11 +1471,11 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
 	if (IS_ERR(afi))
 		return PTR_ERR(afi);
 
-	table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]);
+	table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
 
-	chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]);
+	chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
 	if (IS_ERR(chain))
 		return PTR_ERR(chain);
 	if (chain->use > 0)
@@ -1898,7 +1880,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
 		list_for_each_entry_rcu(table, &afi->tables, list) {
 			list_for_each_entry_rcu(chain, &table->chains, list) {
 				list_for_each_entry_rcu(rule, &chain->rules, list) {
-					if (!nft_rule_is_active(net, rule))
+					if (!nft_is_active(net, rule))
 						goto cont;
 					if (idx < s_idx)
 						goto cont;
@@ -1931,6 +1913,7 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
 			     const struct nlattr * const nla[])
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	u8 genmask = nft_genmask_cur(net);
 	const struct nft_af_info *afi;
 	const struct nft_table *table;
 	const struct nft_chain *chain;
@@ -1950,17 +1933,13 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
 	if (IS_ERR(afi))
 		return PTR_ERR(afi);
 
-	table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]);
+	table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], genmask);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
-	if (table->flags & NFT_TABLE_INACTIVE)
-		return -ENOENT;
 
-	chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
+	chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask);
 	if (IS_ERR(chain))
 		return PTR_ERR(chain);
-	if (chain->flags & NFT_CHAIN_INACTIVE)
-		return -ENOENT;
 
 	rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
 	if (IS_ERR(rule))
@@ -2009,6 +1988,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
 			     const struct nlattr * const nla[])
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	u8 genmask = nft_genmask_next(net);
 	struct nft_af_info *afi;
 	struct nft_table *table;
 	struct nft_chain *chain;
@@ -2029,11 +2009,11 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
 	if (IS_ERR(afi))
 		return PTR_ERR(afi);
 
-	table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]);
+	table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], genmask);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
 
-	chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
+	chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask);
 	if (IS_ERR(chain))
 		return PTR_ERR(chain);
 
@@ -2102,7 +2082,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
 	if (rule == NULL)
 		goto err1;
 
-	nft_rule_activate_next(net, rule);
+	nft_activate_next(net, rule);
 
 	rule->handle = handle;
 	rule->dlen   = size;
@@ -2124,14 +2104,14 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
 	}
 
 	if (nlh->nlmsg_flags & NLM_F_REPLACE) {
-		if (nft_rule_is_active_next(net, old_rule)) {
+		if (nft_is_active_next(net, old_rule)) {
 			trans = nft_trans_rule_add(&ctx, NFT_MSG_DELRULE,
 						   old_rule);
 			if (trans == NULL) {
 				err = -ENOMEM;
 				goto err2;
 			}
-			nft_rule_deactivate_next(net, old_rule);
+			nft_deactivate_next(net, old_rule);
 			chain->use--;
 			list_add_tail_rcu(&rule->list, &old_rule->list);
 		} else {
@@ -2174,6 +2154,7 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk,
 			     const struct nlattr * const nla[])
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	u8 genmask = nft_genmask_next(net);
 	struct nft_af_info *afi;
 	struct nft_table *table;
 	struct nft_chain *chain = NULL;
@@ -2185,12 +2166,13 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk,
 	if (IS_ERR(afi))
 		return PTR_ERR(afi);
 
-	table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]);
+	table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], genmask);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
 
 	if (nla[NFTA_RULE_CHAIN]) {
-		chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
+		chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN],
+					       genmask);
 		if (IS_ERR(chain))
 			return PTR_ERR(chain);
 	}
@@ -2210,6 +2192,9 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk,
 		}
 	} else {
 		list_for_each_entry(chain, &table->chains, list) {
+			if (!nft_is_active_next(net, chain))
+				continue;
+
 			ctx.chain = chain;
 			err = nft_delrule_by_chain(&ctx);
 			if (err < 0)
@@ -2339,7 +2324,8 @@ static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
 static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net,
 				     const struct sk_buff *skb,
 				     const struct nlmsghdr *nlh,
-				     const struct nlattr * const nla[])
+				     const struct nlattr * const nla[],
+				     u8 genmask)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	struct nft_af_info *afi = NULL;
@@ -2355,7 +2341,8 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net,
 		if (afi == NULL)
 			return -EAFNOSUPPORT;
 
-		table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]);
+		table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE],
+					       genmask);
 		if (IS_ERR(table))
 			return PTR_ERR(table);
 	}
@@ -2365,7 +2352,7 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net,
 }
 
 struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
-				     const struct nlattr *nla)
+				     const struct nlattr *nla, u8 genmask)
 {
 	struct nft_set *set;
 
@@ -2373,22 +2360,27 @@ struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
 		return ERR_PTR(-EINVAL);
 
 	list_for_each_entry(set, &table->sets, list) {
-		if (!nla_strcmp(nla, set->name))
+		if (!nla_strcmp(nla, set->name) &&
+		    nft_active_genmask(set, genmask))
 			return set;
 	}
 	return ERR_PTR(-ENOENT);
 }
 
 struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
-					  const struct nlattr *nla)
+					  const struct nlattr *nla,
+					  u8 genmask)
 {
 	struct nft_trans *trans;
 	u32 id = ntohl(nla_get_be32(nla));
 
 	list_for_each_entry(trans, &net->nft.commit_list, list) {
+		struct nft_set *set = nft_trans_set(trans);
+
 		if (trans->msg_type == NFT_MSG_NEWSET &&
-		    id == nft_trans_set_id(trans))
-			return nft_trans_set(trans);
+		    id == nft_trans_set_id(trans) &&
+		    nft_active_genmask(set, genmask))
+			return set;
 	}
 	return ERR_PTR(-ENOENT);
 }
@@ -2413,6 +2405,8 @@ cont:
 		list_for_each_entry(i, &ctx->table->sets, list) {
 			int tmp;
 
+			if (!nft_is_active_next(ctx->net, set))
+				continue;
 			if (!sscanf(i->name, name, &tmp))
 				continue;
 			if (tmp < min || tmp >= min + BITS_PER_BYTE * PAGE_SIZE)
@@ -2432,6 +2426,8 @@ cont:
 
 	snprintf(set->name, sizeof(set->name), name, min + n);
 	list_for_each_entry(i, &ctx->table->sets, list) {
+		if (!nft_is_active_next(ctx->net, i))
+			continue;
 		if (!strcmp(set->name, i->name))
 			return -ENFILE;
 	}
@@ -2580,6 +2576,8 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
 			list_for_each_entry_rcu(set, &table->sets, list) {
 				if (idx < s_idx)
 					goto cont;
+				if (!nft_is_active(net, set))
+					goto cont;
 
 				ctx_set = *ctx;
 				ctx_set.table = table;
@@ -2616,6 +2614,7 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk,
 			    struct sk_buff *skb, const struct nlmsghdr *nlh,
 			    const struct nlattr * const nla[])
 {
+	u8 genmask = nft_genmask_cur(net);
 	const struct nft_set *set;
 	struct nft_ctx ctx;
 	struct sk_buff *skb2;
@@ -2623,7 +2622,7 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk,
 	int err;
 
 	/* Verify existence before starting dump */
-	err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla);
+	err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla, genmask);
 	if (err < 0)
 		return err;
 
@@ -2650,11 +2649,9 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk,
 	if (!nla[NFTA_SET_TABLE])
 		return -EINVAL;
 
-	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]);
+	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask);
 	if (IS_ERR(set))
 		return PTR_ERR(set);
-	if (set->flags & NFT_SET_INACTIVE)
-		return -ENOENT;
 
 	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (skb2 == NULL)
@@ -2693,6 +2690,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
 			    const struct nlattr * const nla[])
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	u8 genmask = nft_genmask_next(net);
 	const struct nft_set_ops *ops;
 	struct nft_af_info *afi;
 	struct nft_table *table;
@@ -2790,13 +2788,13 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
 	if (IS_ERR(afi))
 		return PTR_ERR(afi);
 
-	table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]);
+	table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE], genmask);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
 
 	nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
 
-	set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME]);
+	set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME], genmask);
 	if (IS_ERR(set)) {
 		if (PTR_ERR(set) != -ENOENT)
 			return PTR_ERR(set);
@@ -2895,6 +2893,7 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk,
 			    const struct nlattr * const nla[])
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	u8 genmask = nft_genmask_next(net);
 	struct nft_set *set;
 	struct nft_ctx ctx;
 	int err;
@@ -2904,11 +2903,11 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk,
 	if (nla[NFTA_SET_TABLE] == NULL)
 		return -EINVAL;
 
-	err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla);
+	err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla, genmask);
 	if (err < 0)
 		return err;
 
-	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]);
+	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask);
 	if (IS_ERR(set))
 		return PTR_ERR(set);
 	if (!list_empty(&set->bindings))
@@ -2946,24 +2945,20 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
 		 * jumps are already validated for that chain.
 		 */
 		list_for_each_entry(i, &set->bindings, list) {
-			if (binding->flags & NFT_SET_MAP &&
+			if (i->flags & NFT_SET_MAP &&
 			    i->chain == binding->chain)
 				goto bind;
 		}
 
+		iter.genmask	= nft_genmask_next(ctx->net);
 		iter.skip 	= 0;
 		iter.count	= 0;
 		iter.err	= 0;
 		iter.fn		= nf_tables_bind_check_setelem;
 
 		set->ops->walk(ctx, set, &iter);
-		if (iter.err < 0) {
-			/* Destroy anonymous sets if binding fails */
-			if (set->flags & NFT_SET_ANONYMOUS)
-				nf_tables_set_destroy(ctx, set);
-
+		if (iter.err < 0)
 			return iter.err;
-		}
 	}
 bind:
 	binding->chain = ctx->chain;
@@ -2977,7 +2972,7 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
 	list_del_rcu(&binding->list);
 
 	if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS &&
-	    !(set->flags & NFT_SET_INACTIVE))
+	    nft_is_active(ctx->net, set))
 		nf_tables_set_destroy(ctx, set);
 }
 
@@ -3033,7 +3028,8 @@ static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX +
 static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, struct net *net,
 				      const struct sk_buff *skb,
 				      const struct nlmsghdr *nlh,
-				      const struct nlattr * const nla[])
+				      const struct nlattr * const nla[],
+				      u8 genmask)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	struct nft_af_info *afi;
@@ -3043,7 +3039,8 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, struct net *net,
 	if (IS_ERR(afi))
 		return PTR_ERR(afi);
 
-	table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE]);
+	table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE],
+				       genmask);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
 
@@ -3140,6 +3137,7 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
 static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct net *net = sock_net(skb->sk);
+	u8 genmask = nft_genmask_cur(net);
 	const struct nft_set *set;
 	struct nft_set_dump_args args;
 	struct nft_ctx ctx;
@@ -3156,17 +3154,14 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 		return err;
 
 	err = nft_ctx_init_from_elemattr(&ctx, net, cb->skb, cb->nlh,
-					 (void *)nla);
+					 (void *)nla, genmask);
 	if (err < 0)
 		return err;
-	if (ctx.table->flags & NFT_TABLE_INACTIVE)
-		return -ENOENT;
 
-	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
+	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
+				   genmask);
 	if (IS_ERR(set))
 		return PTR_ERR(set);
-	if (set->flags & NFT_SET_INACTIVE)
-		return -ENOENT;
 
 	event  = NFT_MSG_NEWSETELEM;
 	event |= NFNL_SUBSYS_NFTABLES << 8;
@@ -3192,12 +3187,13 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 	if (nest == NULL)
 		goto nla_put_failure;
 
-	args.cb		= cb;
-	args.skb	= skb;
-	args.iter.skip	= cb->args[0];
-	args.iter.count	= 0;
-	args.iter.err   = 0;
-	args.iter.fn	= nf_tables_dump_setelem;
+	args.cb			= cb;
+	args.skb		= skb;
+	args.iter.genmask	= nft_genmask_cur(ctx.net);
+	args.iter.skip		= cb->args[0];
+	args.iter.count		= 0;
+	args.iter.err		= 0;
+	args.iter.fn		= nf_tables_dump_setelem;
 	set->ops->walk(&ctx, set, &args.iter);
 
 	nla_nest_end(skb, nest);
@@ -3219,21 +3215,19 @@ static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
 				struct sk_buff *skb, const struct nlmsghdr *nlh,
 				const struct nlattr * const nla[])
 {
+	u8 genmask = nft_genmask_cur(net);
 	const struct nft_set *set;
 	struct nft_ctx ctx;
 	int err;
 
-	err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla);
+	err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask);
 	if (err < 0)
 		return err;
-	if (ctx.table->flags & NFT_TABLE_INACTIVE)
-		return -ENOENT;
 
-	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
+	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
+				   genmask);
 	if (IS_ERR(set))
 		return PTR_ERR(set);
-	if (set->flags & NFT_SET_INACTIVE)
-		return -ENOENT;
 
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
 		struct netlink_dump_control c = {
@@ -3551,6 +3545,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
 				struct sk_buff *skb, const struct nlmsghdr *nlh,
 				const struct nlattr * const nla[])
 {
+	u8 genmask = nft_genmask_next(net);
 	const struct nlattr *attr;
 	struct nft_set *set;
 	struct nft_ctx ctx;
@@ -3559,15 +3554,17 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
 	if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL)
 		return -EINVAL;
 
-	err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla);
+	err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask);
 	if (err < 0)
 		return err;
 
-	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
+	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
+				   genmask);
 	if (IS_ERR(set)) {
 		if (nla[NFTA_SET_ELEM_LIST_SET_ID]) {
 			set = nf_tables_set_lookup_byid(net,
-					nla[NFTA_SET_ELEM_LIST_SET_ID]);
+					nla[NFTA_SET_ELEM_LIST_SET_ID],
+					genmask);
 		}
 		if (IS_ERR(set))
 			return PTR_ERR(set);
@@ -3673,6 +3670,7 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
 				struct sk_buff *skb, const struct nlmsghdr *nlh,
 				const struct nlattr * const nla[])
 {
+	u8 genmask = nft_genmask_next(net);
 	const struct nlattr *attr;
 	struct nft_set *set;
 	struct nft_ctx ctx;
@@ -3681,11 +3679,12 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
 	if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL)
 		return -EINVAL;
 
-	err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla);
+	err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask);
 	if (err < 0)
 		return err;
 
-	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
+	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
+				   genmask);
 	if (IS_ERR(set))
 		return PTR_ERR(set);
 	if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
@@ -3953,36 +3952,40 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 		case NFT_MSG_NEWTABLE:
 			if (nft_trans_table_update(trans)) {
 				if (!nft_trans_table_enable(trans)) {
-					nf_tables_table_disable(trans->ctx.afi,
+					nf_tables_table_disable(net,
+								trans->ctx.afi,
 								trans->ctx.table);
 					trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
 				}
 			} else {
-				trans->ctx.table->flags &= ~NFT_TABLE_INACTIVE;
+				nft_clear(net, trans->ctx.table);
 			}
 			nf_tables_table_notify(&trans->ctx, NFT_MSG_NEWTABLE);
 			nft_trans_destroy(trans);
 			break;
 		case NFT_MSG_DELTABLE:
+			list_del_rcu(&trans->ctx.table->list);
 			nf_tables_table_notify(&trans->ctx, NFT_MSG_DELTABLE);
 			break;
 		case NFT_MSG_NEWCHAIN:
 			if (nft_trans_chain_update(trans))
 				nft_chain_commit_update(trans);
 			else
-				trans->ctx.chain->flags &= ~NFT_CHAIN_INACTIVE;
+				nft_clear(net, trans->ctx.chain);
 
 			nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN);
 			nft_trans_destroy(trans);
 			break;
 		case NFT_MSG_DELCHAIN:
+			list_del_rcu(&trans->ctx.chain->list);
 			nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN);
-			nf_tables_unregister_hooks(trans->ctx.table,
+			nf_tables_unregister_hooks(trans->ctx.net,
+						   trans->ctx.table,
 						   trans->ctx.chain,
 						   trans->ctx.afi->nops);
 			break;
 		case NFT_MSG_NEWRULE:
-			nft_rule_clear(trans->ctx.net, nft_trans_rule(trans));
+			nft_clear(trans->ctx.net, nft_trans_rule(trans));
 			nf_tables_rule_notify(&trans->ctx,
 					      nft_trans_rule(trans),
 					      NFT_MSG_NEWRULE);
@@ -3995,7 +3998,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 					      NFT_MSG_DELRULE);
 			break;
 		case NFT_MSG_NEWSET:
-			nft_trans_set(trans)->flags &= ~NFT_SET_INACTIVE;
+			nft_clear(net, nft_trans_set(trans));
 			/* This avoids hitting -EBUSY when deleting the table
 			 * from the transaction.
 			 */
@@ -4008,6 +4011,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 			nft_trans_destroy(trans);
 			break;
 		case NFT_MSG_DELSET:
+			list_del_rcu(&nft_trans_set(trans)->list);
 			nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
 					     NFT_MSG_DELSET, GFP_KERNEL);
 			break;
@@ -4079,7 +4083,8 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
 		case NFT_MSG_NEWTABLE:
 			if (nft_trans_table_update(trans)) {
 				if (nft_trans_table_enable(trans)) {
-					nf_tables_table_disable(trans->ctx.afi,
+					nf_tables_table_disable(net,
+								trans->ctx.afi,
 								trans->ctx.table);
 					trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
 				}
@@ -4089,8 +4094,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
 			}
 			break;
 		case NFT_MSG_DELTABLE:
-			list_add_tail_rcu(&trans->ctx.table->list,
-					  &trans->ctx.afi->tables);
+			nft_clear(trans->ctx.net, trans->ctx.table);
 			nft_trans_destroy(trans);
 			break;
 		case NFT_MSG_NEWCHAIN:
@@ -4101,15 +4105,15 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
 			} else {
 				trans->ctx.table->use--;
 				list_del_rcu(&trans->ctx.chain->list);
-				nf_tables_unregister_hooks(trans->ctx.table,
+				nf_tables_unregister_hooks(trans->ctx.net,
+							   trans->ctx.table,
 							   trans->ctx.chain,
 							   trans->ctx.afi->nops);
 			}
 			break;
 		case NFT_MSG_DELCHAIN:
 			trans->ctx.table->use++;
-			list_add_tail_rcu(&trans->ctx.chain->list,
-					  &trans->ctx.table->chains);
+			nft_clear(trans->ctx.net, trans->ctx.chain);
 			nft_trans_destroy(trans);
 			break;
 		case NFT_MSG_NEWRULE:
@@ -4118,7 +4122,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
 			break;
 		case NFT_MSG_DELRULE:
 			trans->ctx.chain->use++;
-			nft_rule_clear(trans->ctx.net, nft_trans_rule(trans));
+			nft_clear(trans->ctx.net, nft_trans_rule(trans));
 			nft_trans_destroy(trans);
 			break;
 		case NFT_MSG_NEWSET:
@@ -4127,8 +4131,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
 			break;
 		case NFT_MSG_DELSET:
 			trans->ctx.table->use++;
-			list_add_tail_rcu(&nft_trans_set(trans)->list,
-					  &trans->ctx.table->sets);
+			nft_clear(trans->ctx.net, nft_trans_set(trans));
 			nft_trans_destroy(trans);
 			break;
 		case NFT_MSG_NEWSETELEM:
@@ -4275,6 +4278,8 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
 	}
 
 	list_for_each_entry(set, &ctx->table->sets, list) {
+		if (!nft_is_active_next(ctx->net, set))
+			continue;
 		if (!(set->flags & NFT_SET_MAP) ||
 		    set->dtype != NFT_DATA_VERDICT)
 			continue;
@@ -4284,6 +4289,7 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
 			    binding->chain != chain)
 				continue;
 
+			iter.genmask	= nft_genmask_next(ctx->net);
 			iter.skip 	= 0;
 			iter.count	= 0;
 			iter.err	= 0;
@@ -4432,6 +4438,7 @@ static const struct nla_policy nft_verdict_policy[NFTA_VERDICT_MAX + 1] = {
 static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
 			    struct nft_data_desc *desc, const struct nlattr *nla)
 {
+	u8 genmask = nft_genmask_next(ctx->net);
 	struct nlattr *tb[NFTA_VERDICT_MAX + 1];
 	struct nft_chain *chain;
 	int err;
@@ -4464,7 +4471,7 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
 		if (!tb[NFTA_VERDICT_CHAIN])
 			return -EINVAL;
 		chain = nf_tables_chain_lookup(ctx->table,
-					       tb[NFTA_VERDICT_CHAIN]);
+					       tb[NFTA_VERDICT_CHAIN], genmask);
 		if (IS_ERR(chain))
 			return PTR_ERR(chain);
 		if (chain->flags & NFT_BASE_CHAIN)
@@ -4642,7 +4649,7 @@ int __nft_release_basechain(struct nft_ctx *ctx)
 
 	BUG_ON(!(ctx->chain->flags & NFT_BASE_CHAIN));
 
-	nf_tables_unregister_hooks(ctx->chain->table, ctx->chain,
+	nf_tables_unregister_hooks(ctx->net, ctx->chain->table, ctx->chain,
 				   ctx->afi->nops);
 	list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) {
 		list_del(&rule->list);
@@ -4671,7 +4678,8 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi)
 
 	list_for_each_entry_safe(table, nt, &afi->tables, list) {
 		list_for_each_entry(chain, &table->chains, list)
-			nf_tables_unregister_hooks(table, chain, afi->nops);
+			nf_tables_unregister_hooks(net, table, chain,
+						   afi->nops);
 		/* No packets are walking on these chains anymore. */
 		ctx.table = table;
 		list_for_each_entry(chain, &table->chains, list) {
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index e9f8dffcc244..fb8b5892b5ff 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -143,7 +143,7 @@ next_rule:
 	list_for_each_entry_continue_rcu(rule, &chain->rules, list) {
 
 		/* This rule is not active, skip. */
-		if (unlikely(rule->genmask & (1 << gencursor)))
+		if (unlikely(rule->genmask & gencursor))
 			continue;
 
 		rulenum++;
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 11f81c8385fc..cbcfdfb586a6 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -700,10 +700,13 @@ nfulnl_log_packet(struct net *net,
 		break;
 
 	case NFULNL_COPY_PACKET:
-		if (inst->copy_range > skb->len)
+		data_len = inst->copy_range;
+		if ((li->u.ulog.flags & NF_LOG_F_COPY_LEN) &&
+		    (li->u.ulog.copy_len < data_len))
+			data_len = li->u.ulog.copy_len;
+
+		if (data_len > skb->len)
 			data_len = skb->len;
-		else
-			data_len = inst->copy_range;
 
 		size += nla_total_size(data_len);
 		break;
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 78d4914fb39c..0af26699bf04 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -103,6 +103,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
 			   const struct nlattr * const tb[])
 {
 	struct nft_dynset *priv = nft_expr_priv(expr);
+	u8 genmask = nft_genmask_next(ctx->net);
 	struct nft_set *set;
 	u64 timeout;
 	int err;
@@ -112,11 +113,13 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
 	    tb[NFTA_DYNSET_SREG_KEY] == NULL)
 		return -EINVAL;
 
-	set = nf_tables_set_lookup(ctx->table, tb[NFTA_DYNSET_SET_NAME]);
+	set = nf_tables_set_lookup(ctx->table, tb[NFTA_DYNSET_SET_NAME],
+				   genmask);
 	if (IS_ERR(set)) {
 		if (tb[NFTA_DYNSET_SET_ID])
 			set = nf_tables_set_lookup_byid(ctx->net,
-							tb[NFTA_DYNSET_SET_ID]);
+							tb[NFTA_DYNSET_SET_ID],
+							genmask);
 		if (IS_ERR(set))
 			return PTR_ERR(set);
 	}
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 6fa016564f90..ea924816b7b8 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -153,9 +153,10 @@ static void *nft_hash_deactivate(const struct nft_set *set,
 				 const struct nft_set_elem *elem)
 {
 	struct nft_hash *priv = nft_set_priv(set);
+	struct net *net = read_pnet(&set->pnet);
 	struct nft_hash_elem *he;
 	struct nft_hash_cmp_arg arg = {
-		.genmask = nft_genmask_next(read_pnet(&set->pnet)),
+		.genmask = nft_genmask_next(net),
 		.set	 = set,
 		.key	 = elem->key.val.data,
 	};
@@ -163,7 +164,8 @@ static void *nft_hash_deactivate(const struct nft_set *set,
 	rcu_read_lock();
 	he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
 	if (he != NULL) {
-		if (!nft_set_elem_mark_busy(&he->ext))
+		if (!nft_set_elem_mark_busy(&he->ext) ||
+		    !nft_is_active(net, &he->ext))
 			nft_set_elem_change_active(set, &he->ext);
 		else
 			he = NULL;
@@ -189,7 +191,6 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
 	struct nft_hash_elem *he;
 	struct rhashtable_iter hti;
 	struct nft_set_elem elem;
-	u8 genmask = nft_genmask_cur(read_pnet(&set->pnet));
 	int err;
 
 	err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL);
@@ -218,7 +219,7 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
 			goto cont;
 		if (nft_set_elem_expired(&he->ext))
 			goto cont;
-		if (!nft_set_elem_active(&he->ext, genmask))
+		if (!nft_set_elem_active(&he->ext, iter->genmask))
 			goto cont;
 
 		elem.priv = he;
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
index 319c22b4bca2..713d66837705 100644
--- a/net/netfilter/nft_log.c
+++ b/net/netfilter/nft_log.c
@@ -52,7 +52,6 @@ static int nft_log_init(const struct nft_ctx *ctx,
 	struct nft_log *priv = nft_expr_priv(expr);
 	struct nf_loginfo *li = &priv->loginfo;
 	const struct nlattr *nla;
-	int ret;
 
 	nla = tb[NFTA_LOG_PREFIX];
 	if (nla != NULL) {
@@ -97,19 +96,6 @@ static int nft_log_init(const struct nft_ctx *ctx,
 		break;
 	}
 
-	if (ctx->afi->family == NFPROTO_INET) {
-		ret = nf_logger_find_get(NFPROTO_IPV4, li->type);
-		if (ret < 0)
-			return ret;
-
-		ret = nf_logger_find_get(NFPROTO_IPV6, li->type);
-		if (ret < 0) {
-			nf_logger_put(NFPROTO_IPV4, li->type);
-			return ret;
-		}
-		return 0;
-	}
-
 	return nf_logger_find_get(ctx->afi->family, li->type);
 }
 
@@ -122,12 +108,7 @@ static void nft_log_destroy(const struct nft_ctx *ctx,
 	if (priv->prefix != nft_log_null_prefix)
 		kfree(priv->prefix);
 
-	if (ctx->afi->family == NFPROTO_INET) {
-		nf_logger_put(NFPROTO_IPV4, li->type);
-		nf_logger_put(NFPROTO_IPV6, li->type);
-	} else {
-		nf_logger_put(ctx->afi->family, li->type);
-	}
+	nf_logger_put(ctx->afi->family, li->type);
 }
 
 static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr)
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index b3c31ef8015d..b8d18f598569 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -22,6 +22,7 @@ struct nft_lookup {
 	struct nft_set			*set;
 	enum nft_registers		sreg:8;
 	enum nft_registers		dreg:8;
+	bool				invert;
 	struct nft_set_binding		binding;
 };
 
@@ -32,14 +33,20 @@ static void nft_lookup_eval(const struct nft_expr *expr,
 	const struct nft_lookup *priv = nft_expr_priv(expr);
 	const struct nft_set *set = priv->set;
 	const struct nft_set_ext *ext;
+	bool found;
 
-	if (set->ops->lookup(set, &regs->data[priv->sreg], &ext)) {
-		if (set->flags & NFT_SET_MAP)
-			nft_data_copy(&regs->data[priv->dreg],
-				      nft_set_ext_data(ext), set->dlen);
+	found = set->ops->lookup(set, &regs->data[priv->sreg], &ext) ^
+		priv->invert;
+
+	if (!found) {
+		regs->verdict.code = NFT_BREAK;
 		return;
 	}
-	regs->verdict.code = NFT_BREAK;
+
+	if (found && set->flags & NFT_SET_MAP)
+		nft_data_copy(&regs->data[priv->dreg],
+			      nft_set_ext_data(ext), set->dlen);
+
 }
 
 static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = {
@@ -47,6 +54,7 @@ static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = {
 	[NFTA_LOOKUP_SET_ID]	= { .type = NLA_U32 },
 	[NFTA_LOOKUP_SREG]	= { .type = NLA_U32 },
 	[NFTA_LOOKUP_DREG]	= { .type = NLA_U32 },
+	[NFTA_LOOKUP_FLAGS]	= { .type = NLA_U32 },
 };
 
 static int nft_lookup_init(const struct nft_ctx *ctx,
@@ -54,18 +62,21 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
 			   const struct nlattr * const tb[])
 {
 	struct nft_lookup *priv = nft_expr_priv(expr);
+	u8 genmask = nft_genmask_next(ctx->net);
 	struct nft_set *set;
+	u32 flags;
 	int err;
 
 	if (tb[NFTA_LOOKUP_SET] == NULL ||
 	    tb[NFTA_LOOKUP_SREG] == NULL)
 		return -EINVAL;
 
-	set = nf_tables_set_lookup(ctx->table, tb[NFTA_LOOKUP_SET]);
+	set = nf_tables_set_lookup(ctx->table, tb[NFTA_LOOKUP_SET], genmask);
 	if (IS_ERR(set)) {
 		if (tb[NFTA_LOOKUP_SET_ID]) {
 			set = nf_tables_set_lookup_byid(ctx->net,
-							tb[NFTA_LOOKUP_SET_ID]);
+							tb[NFTA_LOOKUP_SET_ID],
+							genmask);
 		}
 		if (IS_ERR(set))
 			return PTR_ERR(set);
@@ -79,7 +90,22 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
 	if (err < 0)
 		return err;
 
+	if (tb[NFTA_LOOKUP_FLAGS]) {
+		flags = ntohl(nla_get_be32(tb[NFTA_LOOKUP_FLAGS]));
+
+		if (flags & ~NFT_LOOKUP_F_INV)
+			return -EINVAL;
+
+		if (flags & NFT_LOOKUP_F_INV) {
+			if (set->flags & NFT_SET_MAP)
+				return -EINVAL;
+			priv->invert = true;
+		}
+	}
+
 	if (tb[NFTA_LOOKUP_DREG] != NULL) {
+		if (priv->invert)
+			return -EINVAL;
 		if (!(set->flags & NFT_SET_MAP))
 			return -EINVAL;
 
@@ -112,6 +138,7 @@ static void nft_lookup_destroy(const struct nft_ctx *ctx,
 static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr)
 {
 	const struct nft_lookup *priv = nft_expr_priv(expr);
+	u32 flags = priv->invert ? NFT_LOOKUP_F_INV : 0;
 
 	if (nla_put_string(skb, NFTA_LOOKUP_SET, priv->set->name))
 		goto nla_put_failure;
@@ -120,6 +147,8 @@ static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr)
 	if (priv->set->flags & NFT_SET_MAP)
 		if (nft_dump_register(skb, NFTA_LOOKUP_DREG, priv->dreg))
 			goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_LOOKUP_FLAGS, htonl(flags)))
+		goto nla_put_failure;
 	return 0;
 
 nla_put_failure:
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 16c50b0dd426..03e5e33b5c39 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -199,13 +199,6 @@ err:
 }
 EXPORT_SYMBOL_GPL(nft_meta_get_eval);
 
-/* don't change or set _LOOPBACK, _USER, etc. */
-static bool pkt_type_ok(u32 p)
-{
-	return p == PACKET_HOST || p == PACKET_BROADCAST ||
-	       p == PACKET_MULTICAST || p == PACKET_OTHERHOST;
-}
-
 void nft_meta_set_eval(const struct nft_expr *expr,
 		       struct nft_regs *regs,
 		       const struct nft_pktinfo *pkt)
@@ -223,7 +216,7 @@ void nft_meta_set_eval(const struct nft_expr *expr,
 		break;
 	case NFT_META_PKTTYPE:
 		if (skb->pkt_type != value &&
-		    pkt_type_ok(value) && pkt_type_ok(skb->pkt_type))
+		    skb_pkt_type_ok(value) && skb_pkt_type_ok(skb->pkt_type))
 			skb->pkt_type = value;
 		break;
 	case NFT_META_NFTRACE:
diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c
index f762094af7c1..c0f638745adc 100644
--- a/net/netfilter/nft_rbtree.c
+++ b/net/netfilter/nft_rbtree.c
@@ -170,7 +170,7 @@ static void *nft_rbtree_deactivate(const struct nft_set *set,
 	const struct nft_rbtree *priv = nft_set_priv(set);
 	const struct rb_node *parent = priv->root.rb_node;
 	struct nft_rbtree_elem *rbe, *this = elem->priv;
-	u8 genmask = nft_genmask_cur(read_pnet(&set->pnet));
+	u8 genmask = nft_genmask_next(read_pnet(&set->pnet));
 	int d;
 
 	while (parent != NULL) {
@@ -211,7 +211,6 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
 	struct nft_rbtree_elem *rbe;
 	struct nft_set_elem elem;
 	struct rb_node *node;
-	u8 genmask = nft_genmask_cur(read_pnet(&set->pnet));
 
 	spin_lock_bh(&nft_rbtree_lock);
 	for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
@@ -219,7 +218,7 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
 
 		if (iter->count < iter->skip)
 			goto cont;
-		if (!nft_set_elem_active(&rbe->ext, genmask))
+		if (!nft_set_elem_active(&rbe->ext, iter->genmask))
 			goto cont;
 
 		elem.priv = rbe;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 2675d580c490..fe0e2db632c7 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1460,6 +1460,9 @@ xt_hook_ops_alloc(const struct xt_table *table, nf_hookfn *fn)
 	uint8_t hooknum;
 	struct nf_hook_ops *ops;
 
+	if (!num_hooks)
+		return ERR_PTR(-EINVAL);
+
 	ops = kmalloc(sizeof(*ops) * num_hooks, GFP_KERNEL);
 	if (ops == NULL)
 		return ERR_PTR(-ENOMEM);
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index a1fa2c800cb9..018eed7e1ff1 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -33,6 +33,9 @@ nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	li.u.ulog.group	     = info->group;
 	li.u.ulog.qthreshold = info->threshold;
 
+	if (info->flags & XT_NFLOG_F_COPY_LEN)
+		li.u.ulog.flags |= NF_LOG_F_COPY_LEN;
+
 	nfulnl_log_packet(net, par->family, par->hooknum, skb, par->in,
 			  par->out, &li, info->prefix);
 	return XT_CONTINUE;
diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c
index df48967af382..858d189a1303 100644
--- a/net/netfilter/xt_TRACE.c
+++ b/net/netfilter/xt_TRACE.c
@@ -4,12 +4,23 @@
 #include <linux/skbuff.h>
 
 #include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_log.h>
 
 MODULE_DESCRIPTION("Xtables: packet flow tracing");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_TRACE");
 MODULE_ALIAS("ip6t_TRACE");
 
+static int trace_tg_check(const struct xt_tgchk_param *par)
+{
+	return nf_logger_find_get(par->family, NF_LOG_TYPE_LOG);
+}
+
+static void trace_tg_destroy(const struct xt_tgdtor_param *par)
+{
+	nf_logger_put(par->family, NF_LOG_TYPE_LOG);
+}
+
 static unsigned int
 trace_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
@@ -18,12 +29,14 @@ trace_tg(struct sk_buff *skb, const struct xt_action_param *par)
 }
 
 static struct xt_target trace_tg_reg __read_mostly = {
-	.name       = "TRACE",
-	.revision   = 0,
-	.family     = NFPROTO_UNSPEC,
-	.table      = "raw",
-	.target     = trace_tg,
-	.me         = THIS_MODULE,
+	.name		= "TRACE",
+	.revision	= 0,
+	.family		= NFPROTO_UNSPEC,
+	.table		= "raw",
+	.target		= trace_tg,
+	.checkentry	= trace_tg_check,
+	.destroy	= trace_tg_destroy,
+	.me		= THIS_MODULE,
 };
 
 static int __init trace_tg_init(void)
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index 1302b475abcb..a20e731b5b6c 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c
@@ -21,11 +21,39 @@
 static int owner_check(const struct xt_mtchk_param *par)
 {
 	struct xt_owner_match_info *info = par->matchinfo;
+	struct net *net = par->net;
 
-	/* For now only allow adding matches from the initial user namespace */
+	/* Only allow the common case where the userns of the writer
+	 * matches the userns of the network namespace.
+	 */
 	if ((info->match & (XT_OWNER_UID|XT_OWNER_GID)) &&
-	    (current_user_ns() != &init_user_ns))
+	    (current_user_ns() != net->user_ns))
 		return -EINVAL;
+
+	/* Ensure the uids are valid */
+	if (info->match & XT_OWNER_UID) {
+		kuid_t uid_min = make_kuid(net->user_ns, info->uid_min);
+		kuid_t uid_max = make_kuid(net->user_ns, info->uid_max);
+
+		if (!uid_valid(uid_min) || !uid_valid(uid_max) ||
+		    (info->uid_max < info->uid_min) ||
+		    uid_lt(uid_max, uid_min)) {
+			return -EINVAL;
+		}
+	}
+
+	/* Ensure the gids are valid */
+	if (info->match & XT_OWNER_GID) {
+		kgid_t gid_min = make_kgid(net->user_ns, info->gid_min);
+		kgid_t gid_max = make_kgid(net->user_ns, info->gid_max);
+
+		if (!gid_valid(gid_min) || !gid_valid(gid_max) ||
+		    (info->gid_max < info->gid_min) ||
+		    gid_lt(gid_max, gid_min)) {
+			return -EINVAL;
+		}
+	}
+
 	return 0;
 }
 
@@ -35,6 +63,7 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	const struct xt_owner_match_info *info = par->matchinfo;
 	const struct file *filp;
 	struct sock *sk = skb_to_full_sk(skb);
+	struct net *net = par->net;
 
 	if (sk == NULL || sk->sk_socket == NULL)
 		return (info->match ^ info->invert) == 0;
@@ -51,8 +80,8 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
 		       (XT_OWNER_UID | XT_OWNER_GID)) == 0;
 
 	if (info->match & XT_OWNER_UID) {
-		kuid_t uid_min = make_kuid(&init_user_ns, info->uid_min);
-		kuid_t uid_max = make_kuid(&init_user_ns, info->uid_max);
+		kuid_t uid_min = make_kuid(net->user_ns, info->uid_min);
+		kuid_t uid_max = make_kuid(net->user_ns, info->uid_max);
 		if ((uid_gte(filp->f_cred->fsuid, uid_min) &&
 		     uid_lte(filp->f_cred->fsuid, uid_max)) ^
 		    !(info->invert & XT_OWNER_UID))
@@ -60,8 +89,8 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	}
 
 	if (info->match & XT_OWNER_GID) {
-		kgid_t gid_min = make_kgid(&init_user_ns, info->gid_min);
-		kgid_t gid_max = make_kgid(&init_user_ns, info->gid_max);
+		kgid_t gid_min = make_kgid(net->user_ns, info->gid_min);
+		kgid_t gid_max = make_kgid(net->user_ns, info->gid_max);
 		if ((gid_gte(filp->f_cred->fsgid, gid_min) &&
 		     gid_lte(filp->f_cred->fsgid, gid_max)) ^
 		    !(info->invert & XT_OWNER_GID))
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
index c14d4645daa3..ade024c90f4f 100644
--- a/net/netfilter/xt_tcpudp.c
+++ b/net/netfilter/xt_tcpudp.c
@@ -83,8 +83,6 @@ static bool tcp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 		return false;
 	}
 
-#define FWINVTCP(bool, invflg) ((bool) ^ !!(tcpinfo->invflags & (invflg)))
-
 	th = skb_header_pointer(skb, par->thoff, sizeof(_tcph), &_tcph);
 	if (th == NULL) {
 		/* We've been asked to examine this packet, and we
@@ -102,9 +100,8 @@ static bool tcp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 			ntohs(th->dest),
 			!!(tcpinfo->invflags & XT_TCP_INV_DSTPT)))
 		return false;
-	if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask)
-		      == tcpinfo->flg_cmp,
-		      XT_TCP_INV_FLAGS))
+	if (!NF_INVF(tcpinfo, XT_TCP_INV_FLAGS,
+		     (((unsigned char *)th)[13] & tcpinfo->flg_mask) == tcpinfo->flg_cmp))
 		return false;
 	if (tcpinfo->option) {
 		if (th->doff * 4 < sizeof(_tcph)) {
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 52f3b9b89e97..b4069a90e375 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -818,8 +818,18 @@ static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
 		 */
 		state = OVS_CS_F_TRACKED | OVS_CS_F_NEW | OVS_CS_F_RELATED;
 		__ovs_ct_update_key(key, state, &info->zone, exp->master);
-	} else
-		return __ovs_ct_lookup(net, key, info, skb);
+	} else {
+		struct nf_conn *ct;
+		int err;
+
+		err = __ovs_ct_lookup(net, key, info, skb);
+		if (err)
+			return err;
+
+		ct = (struct nf_conn *)skb->nfct;
+		if (ct)
+			nf_ct_deliver_cached_events(ct);
+	}
 
 	return 0;
 }
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index d1f3b9e977e5..9d92c4c46871 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1341,7 +1341,7 @@ static unsigned int fanout_demux_hash(struct packet_fanout *f,
 				      struct sk_buff *skb,
 				      unsigned int num)
 {
-	return reciprocal_scale(skb_get_hash(skb), num);
+	return reciprocal_scale(__skb_get_hash_symmetric(skb), num);
 }
 
 static unsigned int fanout_demux_lb(struct packet_fanout *f,
@@ -1588,13 +1588,9 @@ static int fanout_set_data_ebpf(struct packet_sock *po, char __user *data,
 	if (copy_from_user(&fd, data, len))
 		return -EFAULT;
 
-	new = bpf_prog_get(fd);
+	new = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);
 	if (IS_ERR(new))
 		return PTR_ERR(new);
-	if (new->type != BPF_PROG_TYPE_SOCKET_FILTER) {
-		bpf_prog_put(new);
-		return -EINVAL;
-	}
 
 	__fanout_set_data_bpf(po->fanout, new);
 	return 0;
diff --git a/net/rds/connection.c b/net/rds/connection.c
index a4b07c899d89..19a4fee5f4dd 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -253,9 +253,12 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 
 			for (i = 0; i < RDS_MPATH_WORKERS; i++) {
 				cp = &conn->c_path[i];
-				trans->conn_free(cp->cp_transport_data);
-				if (!trans->t_mp_capable)
-					break;
+				/* The ->conn_alloc invocation may have
+				 * allocated resource for all paths, so all
+				 * of them may have to be freed here.
+				 */
+				if (cp->cp_transport_data)
+					trans->conn_free(cp->cp_transport_data);
 			}
 			kmem_cache_free(rds_conn_slab, conn);
 			conn = found;
@@ -326,10 +329,7 @@ void rds_conn_shutdown(struct rds_conn_path *cp)
 		wait_event(cp->cp_waitq,
 			   !test_bit(RDS_RECV_REFILL, &cp->cp_flags));
 
-		if (!conn->c_trans->t_mp_capable)
-			conn->c_trans->conn_shutdown(conn);
-		else
-			conn->c_trans->conn_path_shutdown(cp);
+		conn->c_trans->conn_path_shutdown(cp);
 		rds_conn_path_reset(cp);
 
 		if (!rds_conn_path_transition(cp, RDS_CONN_DISCONNECTING,
@@ -355,9 +355,7 @@ void rds_conn_shutdown(struct rds_conn_path *cp)
 	rcu_read_lock();
 	if (!hlist_unhashed(&conn->c_hash_node)) {
 		rcu_read_unlock();
-		if (conn->c_trans->t_type != RDS_TRANS_TCP ||
-		    cp->cp_outgoing == 1)
-			rds_queue_reconnect(cp);
+		rds_queue_reconnect(cp);
 	} else {
 		rcu_read_unlock();
 	}
@@ -370,6 +368,9 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp)
 {
 	struct rds_message *rm, *rtmp;
 
+	if (!cp->cp_transport_data)
+		return;
+
 	rds_conn_path_drop(cp);
 	flush_work(&cp->cp_down_w);
 
@@ -401,6 +402,8 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp)
 void rds_conn_destroy(struct rds_connection *conn)
 {
 	unsigned long flags;
+	int i;
+	struct rds_conn_path *cp;
 
 	rdsdebug("freeing conn %p for %pI4 -> "
 		 "%pI4\n", conn, &conn->c_laddr,
@@ -413,18 +416,10 @@ void rds_conn_destroy(struct rds_connection *conn)
 	synchronize_rcu();
 
 	/* shut the connection down */
-	if (!conn->c_trans->t_mp_capable) {
-		rds_conn_path_destroy(&conn->c_path[0]);
-		BUG_ON(!list_empty(&conn->c_path[0].cp_retrans));
-	} else {
-		int i;
-		struct rds_conn_path *cp;
-
-		for (i = 0; i < RDS_MPATH_WORKERS; i++) {
-			cp = &conn->c_path[i];
-			rds_conn_path_destroy(cp);
-			BUG_ON(!list_empty(&cp->cp_retrans));
-		}
+	for (i = 0; i < RDS_MPATH_WORKERS; i++) {
+		cp = &conn->c_path[i];
+		rds_conn_path_destroy(cp);
+		BUG_ON(!list_empty(&cp->cp_retrans));
 	}
 
 	/*
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 44946a681a8c..7eaf887e46f8 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -381,15 +381,15 @@ void rds_ib_exit(void)
 
 struct rds_transport rds_ib_transport = {
 	.laddr_check		= rds_ib_laddr_check,
-	.xmit_complete		= rds_ib_xmit_complete,
+	.xmit_path_complete	= rds_ib_xmit_path_complete,
 	.xmit			= rds_ib_xmit,
 	.xmit_rdma		= rds_ib_xmit_rdma,
 	.xmit_atomic		= rds_ib_xmit_atomic,
-	.recv			= rds_ib_recv,
+	.recv_path		= rds_ib_recv_path,
 	.conn_alloc		= rds_ib_conn_alloc,
 	.conn_free		= rds_ib_conn_free,
-	.conn_connect		= rds_ib_conn_connect,
-	.conn_shutdown		= rds_ib_conn_shutdown,
+	.conn_path_connect	= rds_ib_conn_path_connect,
+	.conn_path_shutdown	= rds_ib_conn_path_shutdown,
 	.inc_copy_to_user	= rds_ib_inc_copy_to_user,
 	.inc_free		= rds_ib_inc_free,
 	.cm_initiate_connect	= rds_ib_cm_initiate_connect,
diff --git a/net/rds/ib.h b/net/rds/ib.h
index 627fb79aee65..046f7508c06b 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -328,8 +328,8 @@ extern struct list_head ib_nodev_conns;
 /* ib_cm.c */
 int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp);
 void rds_ib_conn_free(void *arg);
-int rds_ib_conn_connect(struct rds_connection *conn);
-void rds_ib_conn_shutdown(struct rds_connection *conn);
+int rds_ib_conn_path_connect(struct rds_conn_path *cp);
+void rds_ib_conn_path_shutdown(struct rds_conn_path *cp);
 void rds_ib_state_change(struct sock *sk);
 int rds_ib_listen_init(void);
 void rds_ib_listen_stop(void);
@@ -354,7 +354,7 @@ void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc);
 /* ib_recv.c */
 int rds_ib_recv_init(void);
 void rds_ib_recv_exit(void);
-int rds_ib_recv(struct rds_connection *conn);
+int rds_ib_recv_path(struct rds_conn_path *conn);
 int rds_ib_recv_alloc_caches(struct rds_ib_connection *ic);
 void rds_ib_recv_free_caches(struct rds_ib_connection *ic);
 void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp);
@@ -384,7 +384,7 @@ u32 rds_ib_ring_completed(struct rds_ib_work_ring *ring, u32 wr_id, u32 oldest);
 extern wait_queue_head_t rds_ib_ring_empty_wait;
 
 /* ib_send.c */
-void rds_ib_xmit_complete(struct rds_connection *conn);
+void rds_ib_xmit_path_complete(struct rds_conn_path *cp);
 int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
 		unsigned int hdr_off, unsigned int sg, unsigned int off);
 void rds_ib_send_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc);
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 334287602b78..5b2ab95afa07 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -112,7 +112,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
 		}
 	}
 
-	if (conn->c_version < RDS_PROTOCOL(3,1)) {
+	if (conn->c_version < RDS_PROTOCOL(3, 1)) {
 		printk(KERN_NOTICE "RDS/IB: Connection to %pI4 version %u.%u failed,"
 		       " no longer supported\n",
 		       &conn->c_faddr,
@@ -685,8 +685,9 @@ out:
 	return ret;
 }
 
-int rds_ib_conn_connect(struct rds_connection *conn)
+int rds_ib_conn_path_connect(struct rds_conn_path *cp)
 {
+	struct rds_connection *conn = cp->cp_conn;
 	struct rds_ib_connection *ic = conn->c_transport_data;
 	struct sockaddr_in src, dest;
 	int ret;
@@ -731,8 +732,9 @@ out:
  * so that it can be called at any point during startup.  In fact it
  * can be called multiple times for a given connection.
  */
-void rds_ib_conn_shutdown(struct rds_connection *conn)
+void rds_ib_conn_path_shutdown(struct rds_conn_path *cp)
 {
+	struct rds_connection *conn = cp->cp_conn;
 	struct rds_ib_connection *ic = conn->c_transport_data;
 	int err = 0;
 
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 4ea8cb17cc7a..606a11f681d2 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -1009,8 +1009,9 @@ void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic,
 		rds_ib_recv_refill(conn, 0, GFP_NOWAIT);
 }
 
-int rds_ib_recv(struct rds_connection *conn)
+int rds_ib_recv_path(struct rds_conn_path *cp)
 {
+	struct rds_connection *conn = cp->cp_conn;
 	struct rds_ib_connection *ic = conn->c_transport_data;
 	int ret = 0;
 
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 6e4110aa5135..84d90c97332f 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -980,8 +980,9 @@ out:
 	return ret;
 }
 
-void rds_ib_xmit_complete(struct rds_connection *conn)
+void rds_ib_xmit_path_complete(struct rds_conn_path *cp)
 {
+	struct rds_connection *conn = cp->cp_conn;
 	struct rds_ib_connection *ic = conn->c_transport_data;
 
 	/* We may have a pending ACK or window update we were unable
diff --git a/net/rds/loop.c b/net/rds/loop.c
index 268f07faaa1a..f2bf78de5688 100644
--- a/net/rds/loop.c
+++ b/net/rds/loop.c
@@ -96,12 +96,13 @@ out:
  */
 static void rds_loop_inc_free(struct rds_incoming *inc)
 {
-        struct rds_message *rm = container_of(inc, struct rds_message, m_inc);
-        rds_message_put(rm);
+	struct rds_message *rm = container_of(inc, struct rds_message, m_inc);
+
+	rds_message_put(rm);
 }
 
 /* we need to at least give the thread something to succeed */
-static int rds_loop_recv(struct rds_connection *conn)
+static int rds_loop_recv_path(struct rds_conn_path *cp)
 {
 	return 0;
 }
@@ -149,13 +150,13 @@ static void rds_loop_conn_free(void *arg)
 	kfree(lc);
 }
 
-static int rds_loop_conn_connect(struct rds_connection *conn)
+static int rds_loop_conn_path_connect(struct rds_conn_path *cp)
 {
-	rds_connect_complete(conn);
+	rds_connect_complete(cp->cp_conn);
 	return 0;
 }
 
-static void rds_loop_conn_shutdown(struct rds_connection *conn)
+static void rds_loop_conn_path_shutdown(struct rds_conn_path *cp)
 {
 }
 
@@ -184,11 +185,11 @@ void rds_loop_exit(void)
  */
 struct rds_transport rds_loop_transport = {
 	.xmit			= rds_loop_xmit,
-	.recv			= rds_loop_recv,
+	.recv_path		= rds_loop_recv_path,
 	.conn_alloc		= rds_loop_conn_alloc,
 	.conn_free		= rds_loop_conn_free,
-	.conn_connect		= rds_loop_conn_connect,
-	.conn_shutdown		= rds_loop_conn_shutdown,
+	.conn_path_connect	= rds_loop_conn_path_connect,
+	.conn_path_shutdown	= rds_loop_conn_path_shutdown,
 	.inc_copy_to_user	= rds_message_inc_copy_to_user,
 	.inc_free		= rds_loop_inc_free,
 	.t_name			= "loopback",
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 2e35b738176f..6ef07bd27227 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -454,18 +454,15 @@ struct rds_transport {
 	int (*laddr_check)(struct net *net, __be32 addr);
 	int (*conn_alloc)(struct rds_connection *conn, gfp_t gfp);
 	void (*conn_free)(void *data);
-	int (*conn_connect)(struct rds_connection *conn);
-	void (*conn_shutdown)(struct rds_connection *conn);
+	int (*conn_path_connect)(struct rds_conn_path *cp);
 	void (*conn_path_shutdown)(struct rds_conn_path *conn);
-	void (*xmit_prepare)(struct rds_connection *conn);
 	void (*xmit_path_prepare)(struct rds_conn_path *cp);
-	void (*xmit_complete)(struct rds_connection *conn);
 	void (*xmit_path_complete)(struct rds_conn_path *cp);
 	int (*xmit)(struct rds_connection *conn, struct rds_message *rm,
 		    unsigned int hdr_off, unsigned int sg, unsigned int off);
 	int (*xmit_rdma)(struct rds_connection *conn, struct rm_rdma_op *op);
 	int (*xmit_atomic)(struct rds_connection *conn, struct rm_atomic_op *op);
-	int (*recv)(struct rds_connection *conn);
+	int (*recv_path)(struct rds_conn_path *cp);
 	int (*inc_copy_to_user)(struct rds_incoming *inc, struct iov_iter *to);
 	void (*inc_free)(struct rds_incoming *inc);
 
diff --git a/net/rds/recv.c b/net/rds/recv.c
index b58f50571782..fed53a6c2890 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -226,6 +226,10 @@ void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
 	cp->cp_next_rx_seq = be64_to_cpu(inc->i_hdr.h_sequence) + 1;
 
 	if (rds_sysctl_ping_enable && inc->i_hdr.h_dport == 0) {
+		if (inc->i_hdr.h_sport == 0) {
+			rdsdebug("ignore ping with 0 sport from 0x%x\n", saddr);
+			goto out;
+		}
 		rds_stats_inc(s_recv_ping);
 		rds_send_pong(cp, inc->i_hdr.h_sport);
 		goto out;
diff --git a/net/rds/send.c b/net/rds/send.c
index ee43d6b2ea8f..5a9caf1da896 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -183,12 +183,8 @@ restart:
 		goto out;
 	}
 
-	if (conn->c_trans->t_mp_capable) {
-		if (conn->c_trans->xmit_path_prepare)
-			conn->c_trans->xmit_path_prepare(cp);
-	} else if (conn->c_trans->xmit_prepare) {
-		conn->c_trans->xmit_prepare(conn);
-	}
+	if (conn->c_trans->xmit_path_prepare)
+		conn->c_trans->xmit_path_prepare(cp);
 
 	/*
 	 * spin trying to push headers and data down the connection until
@@ -403,12 +399,8 @@ restart:
 	}
 
 over_batch:
-	if (conn->c_trans->t_mp_capable) {
-		if (conn->c_trans->xmit_path_complete)
-			conn->c_trans->xmit_path_complete(cp);
-	} else if (conn->c_trans->xmit_complete) {
-		conn->c_trans->xmit_complete(conn);
-	}
+	if (conn->c_trans->xmit_path_complete)
+		conn->c_trans->xmit_path_complete(cp);
 	release_in_xmit(cp);
 
 	/* Nuke any messages we decided not to retransmit. */
diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c
index c173f69e1479..e381bbcd9cc1 100644
--- a/net/rds/sysctl.c
+++ b/net/rds/sysctl.c
@@ -102,7 +102,8 @@ int rds_sysctl_init(void)
 	rds_sysctl_reconnect_min = msecs_to_jiffies(1);
 	rds_sysctl_reconnect_min_jiffies = rds_sysctl_reconnect_min;
 
-	rds_sysctl_reg_table = register_net_sysctl(&init_net,"net/rds", rds_sysctl_rds_table);
+	rds_sysctl_reg_table =
+		register_net_sysctl(&init_net, "net/rds", rds_sysctl_rds_table);
 	if (!rds_sysctl_reg_table)
 		return -ENOMEM;
 	return 0;
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 5217d49ce6d6..d24f6c142d03 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -136,9 +136,9 @@ void rds_tcp_restore_callbacks(struct socket *sock,
  * from being called while it isn't set.
  */
 void rds_tcp_reset_callbacks(struct socket *sock,
-			     struct rds_connection *conn)
+			     struct rds_conn_path *cp)
 {
-	struct rds_tcp_connection *tc = conn->c_transport_data;
+	struct rds_tcp_connection *tc = cp->cp_transport_data;
 	struct socket *osock = tc->t_sock;
 
 	if (!osock)
@@ -148,8 +148,8 @@ void rds_tcp_reset_callbacks(struct socket *sock,
 	 * We have an outstanding SYN to this peer, which may
 	 * potentially have transitioned to the RDS_CONN_UP state,
 	 * so we must quiesce any send threads before resetting
-	 * c_transport_data. We quiesce these threads by setting
-	 * c_state to something other than RDS_CONN_UP, and then
+	 * cp_transport_data. We quiesce these threads by setting
+	 * cp_state to something other than RDS_CONN_UP, and then
 	 * waiting for any existing threads in rds_send_xmit to
 	 * complete release_in_xmit(). (Subsequent threads entering
 	 * rds_send_xmit() will bail on !rds_conn_up().
@@ -164,8 +164,8 @@ void rds_tcp_reset_callbacks(struct socket *sock,
 	 * RDS_CONN_RESETTTING, to ensure that rds_tcp_state_change
 	 * cannot mark rds_conn_path_up() in the window before lock_sock()
 	 */
-	atomic_set(&conn->c_state, RDS_CONN_RESETTING);
-	wait_event(conn->c_waitq, !test_bit(RDS_IN_XMIT, &conn->c_flags));
+	atomic_set(&cp->cp_state, RDS_CONN_RESETTING);
+	wait_event(cp->cp_waitq, !test_bit(RDS_IN_XMIT, &cp->cp_flags));
 	lock_sock(osock->sk);
 	/* reset receive side state for rds_tcp_data_recv() for osock  */
 	if (tc->t_tinc) {
@@ -186,11 +186,12 @@ void rds_tcp_reset_callbacks(struct socket *sock,
 	release_sock(osock->sk);
 	sock_release(osock);
 newsock:
-	rds_send_path_reset(&conn->c_path[0]);
+	rds_send_path_reset(cp);
 	lock_sock(sock->sk);
 	write_lock_bh(&sock->sk->sk_callback_lock);
 	tc->t_sock = sock;
-	sock->sk->sk_user_data = conn;
+	tc->t_cpath = cp;
+	sock->sk->sk_user_data = cp;
 	sock->sk->sk_data_ready = rds_tcp_data_ready;
 	sock->sk->sk_write_space = rds_tcp_write_space;
 	sock->sk->sk_state_change = rds_tcp_state_change;
@@ -203,9 +204,9 @@ newsock:
  * above rds_tcp_reset_callbacks for notes about synchronization
  * with data path
  */
-void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn)
+void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp)
 {
-	struct rds_tcp_connection *tc = conn->c_transport_data;
+	struct rds_tcp_connection *tc = cp->cp_transport_data;
 
 	rdsdebug("setting sock %p callbacks to tc %p\n", sock, tc);
 	write_lock_bh(&sock->sk->sk_callback_lock);
@@ -221,12 +222,12 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn)
 		sock->sk->sk_data_ready = sock->sk->sk_user_data;
 
 	tc->t_sock = sock;
-	tc->conn = conn;
+	tc->t_cpath = cp;
 	tc->t_orig_data_ready = sock->sk->sk_data_ready;
 	tc->t_orig_write_space = sock->sk->sk_write_space;
 	tc->t_orig_state_change = sock->sk->sk_state_change;
 
-	sock->sk->sk_user_data = conn;
+	sock->sk->sk_user_data = cp;
 	sock->sk->sk_data_ready = rds_tcp_data_ready;
 	sock->sk->sk_write_space = rds_tcp_write_space;
 	sock->sk->sk_state_change = rds_tcp_state_change;
@@ -284,24 +285,29 @@ static int rds_tcp_laddr_check(struct net *net, __be32 addr)
 static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
 {
 	struct rds_tcp_connection *tc;
+	int i;
 
-	tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp);
-	if (!tc)
-		return -ENOMEM;
+	for (i = 0; i < RDS_MPATH_WORKERS; i++) {
+		tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp);
+		if (!tc)
+			return -ENOMEM;
 
-	mutex_init(&tc->t_conn_lock);
-	tc->t_sock = NULL;
-	tc->t_tinc = NULL;
-	tc->t_tinc_hdr_rem = sizeof(struct rds_header);
-	tc->t_tinc_data_rem = 0;
+		mutex_init(&tc->t_conn_path_lock);
+		tc->t_sock = NULL;
+		tc->t_tinc = NULL;
+		tc->t_tinc_hdr_rem = sizeof(struct rds_header);
+		tc->t_tinc_data_rem = 0;
 
-	conn->c_transport_data = tc;
+		conn->c_path[i].cp_transport_data = tc;
+		tc->t_cpath = &conn->c_path[i];
 
-	spin_lock_irq(&rds_tcp_conn_lock);
-	list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list);
-	spin_unlock_irq(&rds_tcp_conn_lock);
+		spin_lock_irq(&rds_tcp_conn_lock);
+		list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list);
+		spin_unlock_irq(&rds_tcp_conn_lock);
+		rdsdebug("rds_conn_path [%d] tc %p\n", i,
+			 conn->c_path[i].cp_transport_data);
+	}
 
-	rdsdebug("alloced tc %p\n", conn->c_transport_data);
 	return 0;
 }
 
@@ -318,6 +324,17 @@ static void rds_tcp_conn_free(void *arg)
 	kmem_cache_free(rds_tcp_conn_slab, tc);
 }
 
+static bool list_has_conn(struct list_head *list, struct rds_connection *conn)
+{
+	struct rds_tcp_connection *tc, *_tc;
+
+	list_for_each_entry_safe(tc, _tc, list, t_tcp_node) {
+		if (tc->t_cpath->cp_conn == conn)
+			return true;
+	}
+	return false;
+}
+
 static void rds_tcp_destroy_conns(void)
 {
 	struct rds_tcp_connection *tc, *_tc;
@@ -325,29 +342,28 @@ static void rds_tcp_destroy_conns(void)
 
 	/* avoid calling conn_destroy with irqs off */
 	spin_lock_irq(&rds_tcp_conn_lock);
-	list_splice(&rds_tcp_conn_list, &tmp_list);
-	INIT_LIST_HEAD(&rds_tcp_conn_list);
+	list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
+		if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn))
+			list_move_tail(&tc->t_tcp_node, &tmp_list);
+	}
 	spin_unlock_irq(&rds_tcp_conn_lock);
 
-	list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) {
-		if (tc->conn->c_passive)
-			rds_conn_destroy(tc->conn->c_passive);
-		rds_conn_destroy(tc->conn);
-	}
+	list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node)
+		rds_conn_destroy(tc->t_cpath->cp_conn);
 }
 
 static void rds_tcp_exit(void);
 
 struct rds_transport rds_tcp_transport = {
 	.laddr_check		= rds_tcp_laddr_check,
-	.xmit_prepare		= rds_tcp_xmit_prepare,
-	.xmit_complete		= rds_tcp_xmit_complete,
+	.xmit_path_prepare	= rds_tcp_xmit_path_prepare,
+	.xmit_path_complete	= rds_tcp_xmit_path_complete,
 	.xmit			= rds_tcp_xmit,
-	.recv			= rds_tcp_recv,
+	.recv_path		= rds_tcp_recv_path,
 	.conn_alloc		= rds_tcp_conn_alloc,
 	.conn_free		= rds_tcp_conn_free,
-	.conn_connect		= rds_tcp_conn_connect,
-	.conn_shutdown		= rds_tcp_conn_shutdown,
+	.conn_path_connect	= rds_tcp_conn_path_connect,
+	.conn_path_shutdown	= rds_tcp_conn_path_shutdown,
 	.inc_copy_to_user	= rds_tcp_inc_copy_to_user,
 	.inc_free		= rds_tcp_inc_free,
 	.stats_info_copy	= rds_tcp_stats_info_copy,
@@ -489,10 +505,30 @@ static struct pernet_operations rds_tcp_net_ops = {
 	.size = sizeof(struct rds_tcp_net),
 };
 
+/* explicitly send a RST on each socket, thereby releasing any socket refcnts
+ * that may otherwise hold up netns deletion.
+ */
+static void rds_tcp_conn_paths_destroy(struct rds_connection *conn)
+{
+	struct rds_conn_path *cp;
+	struct rds_tcp_connection *tc;
+	int i;
+	struct sock *sk;
+
+	for (i = 0; i < RDS_MPATH_WORKERS; i++) {
+		cp = &conn->c_path[i];
+		tc = cp->cp_transport_data;
+		if (!tc->t_sock)
+			continue;
+		sk = tc->t_sock->sk;
+		sk->sk_prot->disconnect(sk, 0);
+		tcp_done(sk);
+	}
+}
+
 static void rds_tcp_kill_sock(struct net *net)
 {
 	struct rds_tcp_connection *tc, *_tc;
-	struct sock *sk;
 	LIST_HEAD(tmp_list);
 	struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
 
@@ -501,20 +537,17 @@ static void rds_tcp_kill_sock(struct net *net)
 	flush_work(&rtn->rds_tcp_accept_w);
 	spin_lock_irq(&rds_tcp_conn_lock);
 	list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
-		struct net *c_net = read_pnet(&tc->conn->c_net);
+		struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
 
 		if (net != c_net || !tc->t_sock)
 			continue;
-		list_move_tail(&tc->t_tcp_node, &tmp_list);
+		if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn))
+			list_move_tail(&tc->t_tcp_node, &tmp_list);
 	}
 	spin_unlock_irq(&rds_tcp_conn_lock);
 	list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) {
-		sk = tc->t_sock->sk;
-		sk->sk_prot->disconnect(sk, 0);
-		tcp_done(sk);
-		if (tc->conn->c_passive)
-			rds_conn_destroy(tc->conn->c_passive);
-		rds_conn_destroy(tc->conn);
+		rds_tcp_conn_paths_destroy(tc->t_cpath->cp_conn);
+		rds_conn_destroy(tc->t_cpath->cp_conn);
 	}
 }
 
@@ -552,12 +585,13 @@ static void rds_tcp_sysctl_reset(struct net *net)
 
 	spin_lock_irq(&rds_tcp_conn_lock);
 	list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
-		struct net *c_net = read_pnet(&tc->conn->c_net);
+		struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
 
 		if (net != c_net || !tc->t_sock)
 			continue;
 
-		rds_conn_drop(tc->conn); /* reconnect with new parameters */
+		/* reconnect with new parameters */
+		rds_conn_path_drop(tc->t_cpath);
 	}
 	spin_unlock_irq(&rds_tcp_conn_lock);
 }
@@ -617,7 +651,7 @@ static int rds_tcp_init(void)
 
 	ret = rds_tcp_recv_init();
 	if (ret)
-		goto out_slab;
+		goto out_pernet;
 
 	ret = rds_trans_register(&rds_tcp_transport);
 	if (ret)
@@ -629,8 +663,9 @@ static int rds_tcp_init(void)
 
 out_recv:
 	rds_tcp_recv_exit();
-out_slab:
+out_pernet:
 	unregister_pernet_subsys(&rds_tcp_net_ops);
+out_slab:
 	kmem_cache_destroy(rds_tcp_conn_slab);
 out:
 	return ret;
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index ec0602b0dc24..1c3160faa963 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -11,11 +11,11 @@ struct rds_tcp_incoming {
 struct rds_tcp_connection {
 
 	struct list_head	t_tcp_node;
-	struct rds_connection   *conn;
-	/* t_conn_lock synchronizes the connection establishment between
-	 * rds_tcp_accept_one and rds_tcp_conn_connect
+	struct rds_conn_path	*t_cpath;
+	/* t_conn_path_lock synchronizes the connection establishment between
+	 * rds_tcp_accept_one and rds_tcp_conn_path_connect
 	 */
-	struct mutex		t_conn_lock;
+	struct mutex		t_conn_path_lock;
 	struct socket		*t_sock;
 	void			*t_orig_write_space;
 	void			*t_orig_data_ready;
@@ -49,8 +49,8 @@ struct rds_tcp_statistics {
 /* tcp.c */
 void rds_tcp_tune(struct socket *sock);
 void rds_tcp_nonagle(struct socket *sock);
-void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn);
-void rds_tcp_reset_callbacks(struct socket *sock, struct rds_connection *conn);
+void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp);
+void rds_tcp_reset_callbacks(struct socket *sock, struct rds_conn_path *cp);
 void rds_tcp_restore_callbacks(struct socket *sock,
 			       struct rds_tcp_connection *tc);
 u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc);
@@ -60,8 +60,8 @@ extern struct rds_transport rds_tcp_transport;
 void rds_tcp_accept_work(struct sock *sk);
 
 /* tcp_connect.c */
-int rds_tcp_conn_connect(struct rds_connection *conn);
-void rds_tcp_conn_shutdown(struct rds_connection *conn);
+int rds_tcp_conn_path_connect(struct rds_conn_path *cp);
+void rds_tcp_conn_path_shutdown(struct rds_conn_path *conn);
 void rds_tcp_state_change(struct sock *sk);
 
 /* tcp_listen.c */
@@ -75,15 +75,15 @@ int rds_tcp_keepalive(struct socket *sock);
 int rds_tcp_recv_init(void);
 void rds_tcp_recv_exit(void);
 void rds_tcp_data_ready(struct sock *sk);
-int rds_tcp_recv(struct rds_connection *conn);
+int rds_tcp_recv_path(struct rds_conn_path *cp);
 void rds_tcp_inc_free(struct rds_incoming *inc);
 int rds_tcp_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to);
 
 /* tcp_send.c */
-void rds_tcp_xmit_prepare(struct rds_connection *conn);
-void rds_tcp_xmit_complete(struct rds_connection *conn);
+void rds_tcp_xmit_path_prepare(struct rds_conn_path *cp);
+void rds_tcp_xmit_path_complete(struct rds_conn_path *cp);
 int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
-	         unsigned int hdr_off, unsigned int sg, unsigned int off);
+		 unsigned int hdr_off, unsigned int sg, unsigned int off);
 void rds_tcp_write_space(struct sock *sk);
 
 /* tcp_stats.c */
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index ba9ec67f4e41..c916715fbe61 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -41,51 +41,51 @@
 void rds_tcp_state_change(struct sock *sk)
 {
 	void (*state_change)(struct sock *sk);
-	struct rds_connection *conn;
+	struct rds_conn_path *cp;
 	struct rds_tcp_connection *tc;
 
 	read_lock_bh(&sk->sk_callback_lock);
-	conn = sk->sk_user_data;
-	if (!conn) {
+	cp = sk->sk_user_data;
+	if (!cp) {
 		state_change = sk->sk_state_change;
 		goto out;
 	}
-	tc = conn->c_transport_data;
+	tc = cp->cp_transport_data;
 	state_change = tc->t_orig_state_change;
 
 	rdsdebug("sock %p state_change to %d\n", tc->t_sock, sk->sk_state);
 
-	switch(sk->sk_state) {
-		/* ignore connecting sockets as they make progress */
-		case TCP_SYN_SENT:
-		case TCP_SYN_RECV:
-			break;
-		case TCP_ESTABLISHED:
-			rds_connect_path_complete(&conn->c_path[0],
-						  RDS_CONN_CONNECTING);
-			break;
-		case TCP_CLOSE_WAIT:
-		case TCP_CLOSE:
-			rds_conn_drop(conn);
-		default:
-			break;
+	switch (sk->sk_state) {
+	/* ignore connecting sockets as they make progress */
+	case TCP_SYN_SENT:
+	case TCP_SYN_RECV:
+		break;
+	case TCP_ESTABLISHED:
+		rds_connect_path_complete(cp, RDS_CONN_CONNECTING);
+		break;
+	case TCP_CLOSE_WAIT:
+	case TCP_CLOSE:
+		rds_conn_path_drop(cp);
+	default:
+		break;
 	}
 out:
 	read_unlock_bh(&sk->sk_callback_lock);
 	state_change(sk);
 }
 
-int rds_tcp_conn_connect(struct rds_connection *conn)
+int rds_tcp_conn_path_connect(struct rds_conn_path *cp)
 {
 	struct socket *sock = NULL;
 	struct sockaddr_in src, dest;
 	int ret;
-	struct rds_tcp_connection *tc = conn->c_transport_data;
+	struct rds_connection *conn = cp->cp_conn;
+	struct rds_tcp_connection *tc = cp->cp_transport_data;
 
-	mutex_lock(&tc->t_conn_lock);
+	mutex_lock(&tc->t_conn_path_lock);
 
-	if (rds_conn_up(conn)) {
-		mutex_unlock(&tc->t_conn_lock);
+	if (rds_conn_path_up(cp)) {
+		mutex_unlock(&tc->t_conn_path_lock);
 		return 0;
 	}
 	ret = sock_create_kern(rds_conn_net(conn), PF_INET,
@@ -114,10 +114,11 @@ int rds_tcp_conn_connect(struct rds_connection *conn)
 	 * once we call connect() we can start getting callbacks and they
 	 * own the socket
 	 */
-	rds_tcp_set_callbacks(sock, conn);
+	rds_tcp_set_callbacks(sock, cp);
 	ret = sock->ops->connect(sock, (struct sockaddr *)&dest, sizeof(dest),
 				 O_NONBLOCK);
 
+	cp->cp_outgoing = 1;
 	rdsdebug("connect to address %pI4 returned %d\n", &conn->c_faddr, ret);
 	if (ret == -EINPROGRESS)
 		ret = 0;
@@ -125,11 +126,11 @@ int rds_tcp_conn_connect(struct rds_connection *conn)
 		rds_tcp_keepalive(sock);
 		sock = NULL;
 	} else {
-		rds_tcp_restore_callbacks(sock, conn->c_transport_data);
+		rds_tcp_restore_callbacks(sock, cp->cp_transport_data);
 	}
 
 out:
-	mutex_unlock(&tc->t_conn_lock);
+	mutex_unlock(&tc->t_conn_path_lock);
 	if (sock)
 		sock_release(sock);
 	return ret;
@@ -144,12 +145,13 @@ out:
  * callbacks to those set by TCP.  Our callbacks won't execute again once we
  * hold the sock lock.
  */
-void rds_tcp_conn_shutdown(struct rds_connection *conn)
+void rds_tcp_conn_path_shutdown(struct rds_conn_path *cp)
 {
-	struct rds_tcp_connection *tc = conn->c_transport_data;
+	struct rds_tcp_connection *tc = cp->cp_transport_data;
 	struct socket *sock = tc->t_sock;
 
-	rdsdebug("shutting down conn %p tc %p sock %p\n", conn, tc, sock);
+	rdsdebug("shutting down conn %p tc %p sock %p\n",
+		 cp->cp_conn, tc, sock);
 
 	if (sock) {
 		sock->ops->shutdown(sock, RCV_SHUTDOWN | SEND_SHUTDOWN);
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 22d9bb15f731..ca975a217a49 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -79,6 +79,7 @@ int rds_tcp_accept_one(struct socket *sock)
 	struct inet_sock *inet;
 	struct rds_tcp_connection *rs_tcp = NULL;
 	int conn_state;
+	struct rds_conn_path *cp;
 
 	if (!sock) /* module unload or netns delete in progress */
 		return -ENETUNREACH;
@@ -120,8 +121,9 @@ int rds_tcp_accept_one(struct socket *sock)
 	 * rds_tcp_state_change() will do that cleanup
 	 */
 	rs_tcp = (struct rds_tcp_connection *)conn->c_transport_data;
+	cp = &conn->c_path[0];
 	rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING);
-	mutex_lock(&rs_tcp->t_conn_lock);
+	mutex_lock(&rs_tcp->t_conn_path_lock);
 	conn_state = rds_conn_state(conn);
 	if (conn_state != RDS_CONN_CONNECTING && conn_state != RDS_CONN_UP)
 		goto rst_nsk;
@@ -136,16 +138,14 @@ int rds_tcp_accept_one(struct socket *sock)
 		    !conn->c_path[0].cp_outgoing) {
 			goto rst_nsk;
 		} else {
-			rds_tcp_reset_callbacks(new_sock, conn);
+			rds_tcp_reset_callbacks(new_sock, cp);
 			conn->c_path[0].cp_outgoing = 0;
 			/* rds_connect_path_complete() marks RDS_CONN_UP */
-			rds_connect_path_complete(&conn->c_path[0],
-						  RDS_CONN_DISCONNECTING);
+			rds_connect_path_complete(cp, RDS_CONN_RESETTING);
 		}
 	} else {
-		rds_tcp_set_callbacks(new_sock, conn);
-		rds_connect_path_complete(&conn->c_path[0],
-					  RDS_CONN_CONNECTING);
+		rds_tcp_set_callbacks(new_sock, cp);
+		rds_connect_path_complete(cp, RDS_CONN_CONNECTING);
 	}
 	new_sock = NULL;
 	ret = 0;
@@ -156,7 +156,7 @@ rst_nsk:
 	ret = 0;
 out:
 	if (rs_tcp)
-		mutex_unlock(&rs_tcp->t_conn_lock);
+		mutex_unlock(&rs_tcp->t_conn_path_lock);
 	if (new_sock)
 		sock_release(new_sock);
 	return ret;
diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c
index 3f8fb38996c7..ad4892e97f91 100644
--- a/net/rds/tcp_recv.c
+++ b/net/rds/tcp_recv.c
@@ -34,7 +34,6 @@
 #include <linux/slab.h>
 #include <net/tcp.h>
 
-#include "rds_single_path.h"
 #include "rds.h"
 #include "tcp.h"
 
@@ -148,7 +147,7 @@ static void rds_tcp_cong_recv(struct rds_connection *conn,
 }
 
 struct rds_tcp_desc_arg {
-	struct rds_connection *conn;
+	struct rds_conn_path *conn_path;
 	gfp_t gfp;
 };
 
@@ -156,8 +155,8 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
 			     unsigned int offset, size_t len)
 {
 	struct rds_tcp_desc_arg *arg = desc->arg.data;
-	struct rds_connection *conn = arg->conn;
-	struct rds_tcp_connection *tc = conn->c_transport_data;
+	struct rds_conn_path *cp = arg->conn_path;
+	struct rds_tcp_connection *tc = cp->cp_transport_data;
 	struct rds_tcp_incoming *tinc = tc->t_tinc;
 	struct sk_buff *clone;
 	size_t left = len, to_copy;
@@ -172,14 +171,15 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
 	while (left) {
 		if (!tinc) {
 			tinc = kmem_cache_alloc(rds_tcp_incoming_slab,
-					        arg->gfp);
+						arg->gfp);
 			if (!tinc) {
 				desc->error = -ENOMEM;
 				goto out;
 			}
 			tc->t_tinc = tinc;
 			rdsdebug("alloced tinc %p\n", tinc);
-			rds_inc_init(&tinc->ti_inc, conn, conn->c_faddr);
+			rds_inc_path_init(&tinc->ti_inc, cp,
+					  cp->cp_conn->c_faddr);
 			/*
 			 * XXX * we might be able to use the __ variants when
 			 * we've already serialized at a higher level.
@@ -229,6 +229,8 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
 		}
 
 		if (tc->t_tinc_hdr_rem == 0 && tc->t_tinc_data_rem == 0) {
+			struct rds_connection *conn = cp->cp_conn;
+
 			if (tinc->ti_inc.i_hdr.h_flags == RDS_FLAG_CONG_BITMAP)
 				rds_tcp_cong_recv(conn, tinc);
 			else
@@ -251,15 +253,15 @@ out:
 }
 
 /* the caller has to hold the sock lock */
-static int rds_tcp_read_sock(struct rds_connection *conn, gfp_t gfp)
+static int rds_tcp_read_sock(struct rds_conn_path *cp, gfp_t gfp)
 {
-	struct rds_tcp_connection *tc = conn->c_transport_data;
+	struct rds_tcp_connection *tc = cp->cp_transport_data;
 	struct socket *sock = tc->t_sock;
 	read_descriptor_t desc;
 	struct rds_tcp_desc_arg arg;
 
 	/* It's like glib in the kernel! */
-	arg.conn = conn;
+	arg.conn_path = cp;
 	arg.gfp = gfp;
 	desc.arg.data = &arg;
 	desc.error = 0;
@@ -279,16 +281,17 @@ static int rds_tcp_read_sock(struct rds_connection *conn, gfp_t gfp)
  * if we fail to allocate we're in trouble.. blindly wait some time before
  * trying again to see if the VM can free up something for us.
  */
-int rds_tcp_recv(struct rds_connection *conn)
+int rds_tcp_recv_path(struct rds_conn_path *cp)
 {
-	struct rds_tcp_connection *tc = conn->c_transport_data;
+	struct rds_tcp_connection *tc = cp->cp_transport_data;
 	struct socket *sock = tc->t_sock;
 	int ret = 0;
 
-	rdsdebug("recv worker conn %p tc %p sock %p\n", conn, tc, sock);
+	rdsdebug("recv worker path [%d] tc %p sock %p\n",
+		 cp->cp_index, tc, sock);
 
 	lock_sock(sock->sk);
-	ret = rds_tcp_read_sock(conn, GFP_KERNEL);
+	ret = rds_tcp_read_sock(cp, GFP_KERNEL);
 	release_sock(sock->sk);
 
 	return ret;
@@ -297,24 +300,24 @@ int rds_tcp_recv(struct rds_connection *conn)
 void rds_tcp_data_ready(struct sock *sk)
 {
 	void (*ready)(struct sock *sk);
-	struct rds_connection *conn;
+	struct rds_conn_path *cp;
 	struct rds_tcp_connection *tc;
 
 	rdsdebug("data ready sk %p\n", sk);
 
 	read_lock_bh(&sk->sk_callback_lock);
-	conn = sk->sk_user_data;
-	if (!conn) { /* check for teardown race */
+	cp = sk->sk_user_data;
+	if (!cp) { /* check for teardown race */
 		ready = sk->sk_data_ready;
 		goto out;
 	}
 
-	tc = conn->c_transport_data;
+	tc = cp->cp_transport_data;
 	ready = tc->t_orig_data_ready;
 	rds_tcp_stats_inc(s_tcp_data_ready_calls);
 
-	if (rds_tcp_read_sock(conn, GFP_ATOMIC) == -ENOMEM)
-		queue_delayed_work(rds_wq, &conn->c_recv_w, 0);
+	if (rds_tcp_read_sock(cp, GFP_ATOMIC) == -ENOMEM)
+		queue_delayed_work(rds_wq, &cp->cp_recv_w, 0);
 out:
 	read_unlock_bh(&sk->sk_callback_lock);
 	ready(sk);
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 2b3414f3c45c..57e0f5826406 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -49,16 +49,16 @@ static void rds_tcp_cork(struct socket *sock, int val)
 	set_fs(oldfs);
 }
 
-void rds_tcp_xmit_prepare(struct rds_connection *conn)
+void rds_tcp_xmit_path_prepare(struct rds_conn_path *cp)
 {
-	struct rds_tcp_connection *tc = conn->c_transport_data;
+	struct rds_tcp_connection *tc = cp->cp_transport_data;
 
 	rds_tcp_cork(tc->t_sock, 1);
 }
 
-void rds_tcp_xmit_complete(struct rds_connection *conn)
+void rds_tcp_xmit_path_complete(struct rds_conn_path *cp)
 {
-	struct rds_tcp_connection *tc = conn->c_transport_data;
+	struct rds_tcp_connection *tc = cp->cp_transport_data;
 
 	rds_tcp_cork(tc->t_sock, 0);
 }
@@ -67,19 +67,19 @@ void rds_tcp_xmit_complete(struct rds_connection *conn)
 static int rds_tcp_sendmsg(struct socket *sock, void *data, unsigned int len)
 {
 	struct kvec vec = {
-                .iov_base = data,
-                .iov_len = len,
+		.iov_base = data,
+		.iov_len = len,
+	};
+	struct msghdr msg = {
+		.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL,
 	};
-        struct msghdr msg = {
-                .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL,
-        };
 
 	return kernel_sendmsg(sock, &msg, &vec, 1, vec.iov_len);
 }
 
 /* the core send_sem serializes this with other xmit and shutdown */
 int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
-	         unsigned int hdr_off, unsigned int sg, unsigned int off)
+		 unsigned int hdr_off, unsigned int sg, unsigned int off)
 {
 	struct rds_tcp_connection *tc = conn->c_transport_data;
 	int done = 0;
@@ -178,27 +178,27 @@ static int rds_tcp_is_acked(struct rds_message *rm, uint64_t ack)
 void rds_tcp_write_space(struct sock *sk)
 {
 	void (*write_space)(struct sock *sk);
-	struct rds_connection *conn;
+	struct rds_conn_path *cp;
 	struct rds_tcp_connection *tc;
 
 	read_lock_bh(&sk->sk_callback_lock);
-	conn = sk->sk_user_data;
-	if (!conn) {
+	cp = sk->sk_user_data;
+	if (!cp) {
 		write_space = sk->sk_write_space;
 		goto out;
 	}
 
-	tc = conn->c_transport_data;
+	tc = cp->cp_transport_data;
 	rdsdebug("write_space for tc %p\n", tc);
 	write_space = tc->t_orig_write_space;
 	rds_tcp_stats_inc(s_tcp_write_space_calls);
 
 	rdsdebug("tcp una %u\n", rds_tcp_snd_una(tc));
 	tc->t_last_seen_una = rds_tcp_snd_una(tc);
-	rds_send_drop_acked(conn, rds_tcp_snd_una(tc), rds_tcp_is_acked);
+	rds_send_path_drop_acked(cp, rds_tcp_snd_una(tc), rds_tcp_is_acked);
 
-        if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf)
-		queue_delayed_work(rds_wq, &conn->c_send_w, 0);
+	if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf)
+		queue_delayed_work(rds_wq, &cp->cp_send_w, 0);
 
 out:
 	read_unlock_bh(&sk->sk_callback_lock);
diff --git a/net/rds/threads.c b/net/rds/threads.c
index 9fbe95bb14a9..bc97d67f29cc 100644
--- a/net/rds/threads.c
+++ b/net/rds/threads.c
@@ -125,6 +125,11 @@ void rds_queue_reconnect(struct rds_conn_path *cp)
 	  conn, &conn->c_laddr, &conn->c_faddr,
 	  cp->cp_reconnect_jiffies);
 
+	/* let peer with smaller addr initiate reconnect, to avoid duels */
+	if (conn->c_trans->t_type == RDS_TRANS_TCP &&
+	    conn->c_laddr > conn->c_faddr)
+		return;
+
 	set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
 	if (cp->cp_reconnect_jiffies == 0) {
 		cp->cp_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies;
@@ -152,8 +157,9 @@ void rds_connect_worker(struct work_struct *work)
 	int ret;
 
 	clear_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
-	if (rds_conn_path_transition(cp, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) {
-		ret = conn->c_trans->conn_connect(conn);
+	ret = rds_conn_path_transition(cp, RDS_CONN_DOWN, RDS_CONN_CONNECTING);
+	if (ret) {
+		ret = conn->c_trans->conn_path_connect(cp);
 		rdsdebug("conn %p for %pI4 to %pI4 dispatched, ret %d\n",
 			conn, &conn->c_laddr, &conn->c_faddr, ret);
 
@@ -203,7 +209,7 @@ void rds_recv_worker(struct work_struct *work)
 	int ret;
 
 	if (rds_conn_path_state(cp) == RDS_CONN_UP) {
-		ret = cp->cp_conn->c_trans->recv(cp->cp_conn);
+		ret = cp->cp_conn->c_trans->recv_path(cp);
 		rdsdebug("conn %p ret %d\n", cp->cp_conn, ret);
 		switch (ret) {
 		case -EAGAIN:
diff --git a/net/rds/transport.c b/net/rds/transport.c
index f3afd1d60d3c..2ffd3e30c643 100644
--- a/net/rds/transport.c
+++ b/net/rds/transport.c
@@ -140,8 +140,7 @@ unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter,
 	rds_info_iter_unmap(iter);
 	down_read(&rds_trans_sem);
 
-	for (i = 0; i < RDS_TRANS_COUNT; i++)
-	{
+	for (i = 0; i < RDS_TRANS_COUNT; i++) {
 		trans = transports[i];
 		if (!trans || !trans->stats_info_copy)
 			continue;
diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile
index 6522e50fb750..10f3f48a16a8 100644
--- a/net/rxrpc/Makefile
+++ b/net/rxrpc/Makefile
@@ -10,6 +10,7 @@ af-rxrpc-y := \
 	conn_client.o \
 	conn_event.o \
 	conn_object.o \
+	conn_service.o \
 	input.o \
 	insecure.o \
 	key.o \
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 5d3e795a7c48..88effadd4b16 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -766,9 +766,9 @@ error_key_type:
 error_sock:
 	proto_unregister(&rxrpc_proto);
 error_proto:
-	destroy_workqueue(rxrpc_workqueue);
-error_security:
 	rxrpc_exit_security();
+error_security:
+	destroy_workqueue(rxrpc_workqueue);
 error_work_queue:
 	kmem_cache_destroy(rxrpc_call_jar);
 error_call_jar:
@@ -788,27 +788,7 @@ static void __exit af_rxrpc_exit(void)
 	proto_unregister(&rxrpc_proto);
 	rxrpc_destroy_all_calls();
 	rxrpc_destroy_all_connections();
-
 	ASSERTCMP(atomic_read(&rxrpc_n_skbs), ==, 0);
-
-	/* We need to flush the scheduled work twice because the local endpoint
-	 * records involve a work item in their destruction as they can only be
-	 * destroyed from process context.  However, a connection may have a
-	 * work item outstanding - and this will pin the local endpoint record
-	 * until the connection goes away.
-	 *
-	 * Peers don't pin locals and calls pin sockets - which prevents the
-	 * module from being unloaded - so we should only need two flushes.
-	 */
-	_debug("flush scheduled work");
-	flush_workqueue(rxrpc_workqueue);
-	_debug("flush scheduled work 2");
-	flush_workqueue(rxrpc_workqueue);
-	_debug("synchronise RCU");
-	rcu_barrier();
-	_debug("destroy locals");
-	ASSERT(idr_is_empty(&rxrpc_client_conn_ids));
-	idr_destroy(&rxrpc_client_conn_ids);
 	rxrpc_destroy_all_locals();
 
 	remove_proc_entry("rxrpc_conns", init_net.proc_net);
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 702db72196fb..1bb9e7ac9e14 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -10,6 +10,7 @@
  */
 
 #include <linux/atomic.h>
+#include <linux/seqlock.h>
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
 #include <rxrpc/packet.h>
@@ -35,7 +36,6 @@ struct rxrpc_crypt {
 	queue_delayed_work(rxrpc_workqueue, (WS), (D))
 
 #define rxrpc_queue_call(CALL)	rxrpc_queue_work(&(CALL)->processor)
-#define rxrpc_queue_conn(CONN)	rxrpc_queue_work(&(CONN)->processor)
 
 struct rxrpc_connection;
 
@@ -141,17 +141,16 @@ struct rxrpc_security {
 	int (*init_connection_security)(struct rxrpc_connection *);
 
 	/* prime a connection's packet security */
-	void (*prime_packet_security)(struct rxrpc_connection *);
+	int (*prime_packet_security)(struct rxrpc_connection *);
 
 	/* impose security on a packet */
-	int (*secure_packet)(const struct rxrpc_call *,
+	int (*secure_packet)(struct rxrpc_call *,
 			     struct sk_buff *,
 			     size_t,
 			     void *);
 
 	/* verify the security on a received packet */
-	int (*verify_packet)(const struct rxrpc_call *, struct sk_buff *,
-			     u32 *);
+	int (*verify_packet)(struct rxrpc_call *, struct sk_buff *, u32 *);
 
 	/* issue a challenge */
 	int (*issue_challenge)(struct rxrpc_connection *);
@@ -208,7 +207,7 @@ struct rxrpc_peer {
 	struct hlist_head	error_targets;	/* targets for net error distribution */
 	struct work_struct	error_distributor;
 	struct rb_root		service_conns;	/* Service connections */
-	rwlock_t		conn_lock;
+	seqlock_t		service_conn_lock;
 	spinlock_t		lock;		/* access lock */
 	unsigned int		if_mtu;		/* interface MTU for this peer */
 	unsigned int		mtu;		/* network MTU for this peer */
@@ -231,18 +230,12 @@ struct rxrpc_peer {
  * Keys for matching a connection.
  */
 struct rxrpc_conn_proto {
-	unsigned long		hash_key;
-	struct rxrpc_local	*local;		/* Representation of local endpoint */
-	u32			epoch;		/* epoch of this connection */
-	u32			cid;		/* connection ID */
-	u8			in_clientflag;	/* RXRPC_CLIENT_INITIATED if we are server */
-	u8			addr_size;	/* Size of the address */
-	sa_family_t		family;		/* Transport protocol */
-	__be16			port;		/* Peer UDP/UDP6 port */
-	union {					/* Peer address */
-		struct in_addr	ipv4_addr;
-		struct in6_addr	ipv6_addr;
-		u32		raw_addr[0];
+	union {
+		struct {
+			u32	epoch;		/* epoch of this connection */
+			u32	cid;		/* connection ID */
+		};
+		u64		index_key;
 	};
 };
 
@@ -256,6 +249,37 @@ struct rxrpc_conn_parameters {
 };
 
 /*
+ * Bits in the connection flags.
+ */
+enum rxrpc_conn_flag {
+	RXRPC_CONN_HAS_IDR,		/* Has a client conn ID assigned */
+	RXRPC_CONN_IN_SERVICE_CONNS,	/* Conn is in peer->service_conns */
+	RXRPC_CONN_IN_CLIENT_CONNS,	/* Conn is in local->client_conns */
+};
+
+/*
+ * Events that can be raised upon a connection.
+ */
+enum rxrpc_conn_event {
+	RXRPC_CONN_EV_CHALLENGE,	/* Send challenge packet */
+};
+
+/*
+ * The connection protocol state.
+ */
+enum rxrpc_conn_proto_state {
+	RXRPC_CONN_UNUSED,		/* Connection not yet attempted */
+	RXRPC_CONN_CLIENT,		/* Client connection */
+	RXRPC_CONN_SERVICE_UNSECURED,	/* Service unsecured connection */
+	RXRPC_CONN_SERVICE_CHALLENGING,	/* Service challenging for security */
+	RXRPC_CONN_SERVICE,		/* Service secured connection */
+	RXRPC_CONN_REMOTELY_ABORTED,	/* Conn aborted by peer */
+	RXRPC_CONN_LOCALLY_ABORTED,	/* Conn aborted locally */
+	RXRPC_CONN_NETWORK_ERROR,	/* Conn terminated by network error */
+	RXRPC_CONN__NR_STATES
+};
+
+/*
  * RxRPC connection definition
  * - matched by { local, peer, epoch, conn_id, direction }
  * - each connection can only handle four simultaneous calls
@@ -265,44 +289,38 @@ struct rxrpc_connection {
 	struct rxrpc_conn_parameters params;
 
 	spinlock_t		channel_lock;
-	struct rxrpc_call	*channels[RXRPC_MAXCALLS]; /* active calls */
+
+	struct rxrpc_channel {
+		struct rxrpc_call __rcu	*call;		/* Active call */
+		u32			call_id;	/* ID of current call */
+		u32			call_counter;	/* Call ID counter */
+		u32			last_call;	/* ID of last call */
+		u32			last_result;	/* Result of last call (0/abort) */
+	} channels[RXRPC_MAXCALLS];
 	wait_queue_head_t	channel_wq;	/* queue to wait for channel to become available */
 
+	struct rcu_head		rcu;
 	struct work_struct	processor;	/* connection event processor */
 	union {
 		struct rb_node	client_node;	/* Node in local->client_conns */
 		struct rb_node	service_node;	/* Node in peer->service_conns */
 	};
 	struct list_head	link;		/* link in master connection list */
-	struct rb_root		calls;		/* calls on this connection */
 	struct sk_buff_head	rx_queue;	/* received conn-level packets */
 	const struct rxrpc_security *security;	/* applied security module */
 	struct key		*server_key;	/* security for this service */
 	struct crypto_skcipher	*cipher;	/* encryption handle */
 	struct rxrpc_crypt	csum_iv;	/* packet checksum base */
 	unsigned long		flags;
-#define RXRPC_CONN_HAS_IDR	0		/* - Has a client conn ID assigned */
 	unsigned long		events;
-#define RXRPC_CONN_CHALLENGE	0		/* send challenge packet */
 	unsigned long		put_time;	/* Time at which last put */
-	rwlock_t		lock;		/* access lock */
 	spinlock_t		state_lock;	/* state-change lock */
 	atomic_t		usage;
-	enum {					/* current state of connection */
-		RXRPC_CONN_UNUSED,		/* - connection not yet attempted */
-		RXRPC_CONN_CLIENT,		/* - client connection */
-		RXRPC_CONN_SERVER_UNSECURED,	/* - server unsecured connection */
-		RXRPC_CONN_SERVER_CHALLENGING,	/* - server challenging for security */
-		RXRPC_CONN_SERVER,		/* - server secured connection */
-		RXRPC_CONN_REMOTELY_ABORTED,	/* - conn aborted by peer */
-		RXRPC_CONN_LOCALLY_ABORTED,	/* - conn aborted locally */
-		RXRPC_CONN_NETWORK_ERROR,	/* - conn terminated by network error */
-	} state;
+	enum rxrpc_conn_proto_state state : 8;	/* current state of connection */
 	u32			local_abort;	/* local abort code */
 	u32			remote_abort;	/* remote abort code */
 	int			error;		/* local error incurred */
 	int			debug_id;	/* debug ID for printks */
-	unsigned int		call_counter;	/* call ID counter */
 	atomic_t		serial;		/* packet serial number counter */
 	atomic_t		hi_serial;	/* highest serial number received */
 	atomic_t		avail_chans;	/* number of channels available */
@@ -382,6 +400,7 @@ enum rxrpc_call_state {
  * - matched by { connection, call_id }
  */
 struct rxrpc_call {
+	struct rcu_head		rcu;
 	struct rxrpc_connection	*conn;		/* connection carrying call */
 	struct rxrpc_sock	*socket;	/* socket responsible */
 	struct timer_list	lifetimer;	/* lifetime remaining on call */
@@ -394,11 +413,11 @@ struct rxrpc_call {
 	struct hlist_node	error_link;	/* link in error distribution list */
 	struct list_head	accept_link;	/* calls awaiting acceptance */
 	struct rb_node		sock_node;	/* node in socket call tree */
-	struct rb_node		conn_node;	/* node in connection call tree */
 	struct sk_buff_head	rx_queue;	/* received packets */
 	struct sk_buff_head	rx_oos_queue;	/* packets received out of sequence */
 	struct sk_buff		*tx_pending;	/* Tx socket buffer being filled */
 	wait_queue_head_t	tx_waitq;	/* wait for Tx window space to become available */
+	__be32			crypto_buf[2];	/* Temporary packet crypto buffer */
 	unsigned long		user_call_ID;	/* user-defined call ID */
 	unsigned long		creation_jif;	/* time of call creation */
 	unsigned long		flags;
@@ -442,19 +461,12 @@ struct rxrpc_call {
 #define RXRPC_ACKR_WINDOW_ASZ DIV_ROUND_UP(RXRPC_MAXACKS, BITS_PER_LONG)
 	unsigned long		ackr_window[RXRPC_ACKR_WINDOW_ASZ + 1];
 
-	struct hlist_node	hash_node;
-	unsigned long		hash_key;	/* Full hash key */
-	u8			in_clientflag;	/* Copy of conn->in_clientflag for hashing */
-	struct rxrpc_local	*local;		/* Local endpoint. Used for hashing. */
-	sa_family_t		family;		/* Frame protocol */
+	u8			in_clientflag;	/* Copy of conn->in_clientflag */
+	struct rxrpc_local	*local;		/* Local endpoint. */
 	u32			call_id;	/* call ID on connection  */
 	u32			cid;		/* connection ID plus channel index */
 	u32			epoch;		/* epoch of this connection */
 	u16			service_id;	/* service ID */
-	union {					/* Peer IP address for hashing */
-		__be32	ipv4_addr;
-		__u8	ipv6_addr[16];		/* Anticipates eventual IPv6 support */
-	} peer_ip;
 };
 
 /*
@@ -502,8 +514,6 @@ extern struct kmem_cache *rxrpc_call_jar;
 extern struct list_head rxrpc_calls;
 extern rwlock_t rxrpc_call_lock;
 
-struct rxrpc_call *rxrpc_find_call_hash(struct rxrpc_host_header *,
-					void *, sa_family_t, const void *);
 struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long);
 struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *,
 					 struct rxrpc_conn_parameters *,
@@ -522,8 +532,10 @@ void __exit rxrpc_destroy_all_calls(void);
  */
 extern struct idr rxrpc_client_conn_ids;
 
-int rxrpc_get_client_connection_id(struct rxrpc_connection *, gfp_t);
-void rxrpc_put_client_connection_id(struct rxrpc_connection *);
+void rxrpc_destroy_client_conn_ids(void);
+int rxrpc_connect_call(struct rxrpc_call *, struct rxrpc_conn_parameters *,
+		       struct sockaddr_rxrpc *, gfp_t);
+void rxrpc_unpublish_client_conn(struct rxrpc_connection *);
 
 /*
  * conn_event.c
@@ -539,17 +551,14 @@ extern unsigned int rxrpc_connection_expiry;
 extern struct list_head rxrpc_connections;
 extern rwlock_t rxrpc_connection_lock;
 
-int rxrpc_connect_call(struct rxrpc_call *, struct rxrpc_conn_parameters *,
-		       struct sockaddr_rxrpc *, gfp_t);
-struct rxrpc_connection *rxrpc_find_connection(struct rxrpc_local *,
-					       struct rxrpc_peer *,
-					       struct sk_buff *);
+int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *);
+struct rxrpc_connection *rxrpc_alloc_connection(gfp_t);
+struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *,
+						   struct sk_buff *);
+void __rxrpc_disconnect_call(struct rxrpc_call *);
 void rxrpc_disconnect_call(struct rxrpc_call *);
 void rxrpc_put_connection(struct rxrpc_connection *);
 void __exit rxrpc_destroy_all_connections(void);
-struct rxrpc_connection *rxrpc_incoming_connection(struct rxrpc_local *,
-						   struct rxrpc_peer *,
-						   struct sk_buff *);
 
 static inline bool rxrpc_conn_is_client(const struct rxrpc_connection *conn)
 {
@@ -558,7 +567,7 @@ static inline bool rxrpc_conn_is_client(const struct rxrpc_connection *conn)
 
 static inline bool rxrpc_conn_is_service(const struct rxrpc_connection *conn)
 {
-	return conn->proto.in_clientflag;
+	return !rxrpc_conn_is_client(conn);
 }
 
 static inline void rxrpc_get_connection(struct rxrpc_connection *conn)
@@ -566,6 +575,31 @@ static inline void rxrpc_get_connection(struct rxrpc_connection *conn)
 	atomic_inc(&conn->usage);
 }
 
+static inline
+struct rxrpc_connection *rxrpc_get_connection_maybe(struct rxrpc_connection *conn)
+{
+	return atomic_inc_not_zero(&conn->usage) ? conn : NULL;
+}
+
+static inline bool rxrpc_queue_conn(struct rxrpc_connection *conn)
+{
+	if (!rxrpc_get_connection_maybe(conn))
+		return false;
+	if (!rxrpc_queue_work(&conn->processor))
+		rxrpc_put_connection(conn);
+	return true;
+}
+
+/*
+ * conn_service.c
+ */
+struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *,
+						     struct sk_buff *);
+struct rxrpc_connection *rxrpc_incoming_connection(struct rxrpc_local *,
+						   struct sockaddr_rxrpc *,
+						   struct sk_buff *);
+void rxrpc_unpublish_service_conn(struct rxrpc_connection *);
+
 /*
  * input.c
  */
@@ -618,6 +652,11 @@ static inline void rxrpc_put_local(struct rxrpc_local *local)
 		__rxrpc_put_local(local);
 }
 
+static inline void rxrpc_queue_local(struct rxrpc_local *local)
+{
+	rxrpc_queue_work(&local->processor);
+}
+
 /*
  * misc.c
  */
@@ -722,8 +761,7 @@ static inline void rxrpc_sysctl_exit(void) {}
 /*
  * utils.c
  */
-void rxrpc_get_addr_from_skb(struct rxrpc_local *, const struct sk_buff *,
-			     struct sockaddr_rxrpc *);
+int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *);
 
 /*
  * debug tracing
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 202e053a3c6d..0b2832141bd0 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -75,7 +75,6 @@ static int rxrpc_accept_incoming_call(struct rxrpc_local *local,
 {
 	struct rxrpc_connection *conn;
 	struct rxrpc_skb_priv *sp, *nsp;
-	struct rxrpc_peer *peer;
 	struct rxrpc_call *call;
 	struct sk_buff *notification;
 	int ret;
@@ -94,15 +93,7 @@ static int rxrpc_accept_incoming_call(struct rxrpc_local *local,
 	rxrpc_new_skb(notification);
 	notification->mark = RXRPC_SKB_MARK_NEW_CALL;
 
-	peer = rxrpc_lookup_peer(local, srx, GFP_NOIO);
-	if (!peer) {
-		_debug("no peer");
-		ret = -EBUSY;
-		goto error;
-	}
-
-	conn = rxrpc_incoming_connection(local, peer, skb);
-	rxrpc_put_peer(peer);
+	conn = rxrpc_incoming_connection(local, srx, skb);
 	if (IS_ERR(conn)) {
 		_debug("no conn");
 		ret = PTR_ERR(conn);
@@ -128,12 +119,11 @@ static int rxrpc_accept_incoming_call(struct rxrpc_local *local,
 
 		spin_lock(&call->conn->state_lock);
 		if (sp->hdr.securityIndex > 0 &&
-		    call->conn->state == RXRPC_CONN_SERVER_UNSECURED) {
+		    call->conn->state == RXRPC_CONN_SERVICE_UNSECURED) {
 			_debug("await conn sec");
 			list_add_tail(&call->accept_link, &rx->secureq);
-			call->conn->state = RXRPC_CONN_SERVER_CHALLENGING;
-			rxrpc_get_connection(call->conn);
-			set_bit(RXRPC_CONN_CHALLENGE, &call->conn->events);
+			call->conn->state = RXRPC_CONN_SERVICE_CHALLENGING;
+			set_bit(RXRPC_CONN_EV_CHALLENGE, &call->conn->events);
 			rxrpc_queue_conn(call->conn);
 		} else {
 			_debug("conn ready");
@@ -227,20 +217,8 @@ void rxrpc_accept_incoming_calls(struct rxrpc_local *local)
 	whdr._rsvd	= 0;
 	whdr.serviceId	= htons(sp->hdr.serviceId);
 
-	/* determine the remote address */
-	memset(&srx, 0, sizeof(srx));
-	srx.srx_family = AF_RXRPC;
-	srx.transport.family = local->srx.transport.family;
-	srx.transport_type = local->srx.transport_type;
-	switch (srx.transport.family) {
-	case AF_INET:
-		srx.transport_len = sizeof(struct sockaddr_in);
-		srx.transport.sin.sin_port = udp_hdr(skb)->source;
-		srx.transport.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
-		break;
-	default:
-		goto busy;
-	}
+	if (rxrpc_extract_addr_from_skb(&srx, skb) < 0)
+		goto drop;
 
 	/* get the socket providing the service */
 	read_lock_bh(&local->services_lock);
@@ -286,6 +264,10 @@ busy:
 	rxrpc_free_skb(skb);
 	return;
 
+drop:
+	rxrpc_free_skb(skb);
+	return;
+
 invalid_service:
 	skb->priority = RX_INVALID_OPERATION;
 	rxrpc_reject_packet(local, skb);
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 0ba84295f913..fc32aa5764a2 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -858,11 +858,6 @@ void rxrpc_process_call(struct work_struct *work)
 	iov[0].iov_len	= sizeof(whdr);
 
 	/* deal with events of a final nature */
-	if (test_bit(RXRPC_CALL_EV_RELEASE, &call->events)) {
-		rxrpc_release_call(call);
-		clear_bit(RXRPC_CALL_EV_RELEASE, &call->events);
-	}
-
 	if (test_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events)) {
 		enum rxrpc_skb_mark mark;
 		int error;
@@ -1094,7 +1089,7 @@ void rxrpc_process_call(struct work_struct *work)
 
 		if (call->state == RXRPC_CALL_SERVER_SECURING) {
 			_debug("securing");
-			write_lock(&call->conn->lock);
+			write_lock(&call->socket->call_lock);
 			if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
 			    !test_bit(RXRPC_CALL_EV_RELEASE, &call->events)) {
 				_debug("not released");
@@ -1102,7 +1097,7 @@ void rxrpc_process_call(struct work_struct *work)
 				list_move_tail(&call->accept_link,
 					       &call->socket->acceptq);
 			}
-			write_unlock(&call->conn->lock);
+			write_unlock(&call->socket->call_lock);
 			read_lock(&call->state_lock);
 			if (call->state < RXRPC_CALL_COMPLETE)
 				set_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events);
@@ -1144,6 +1139,11 @@ void rxrpc_process_call(struct work_struct *work)
 		goto maybe_reschedule;
 	}
 
+	if (test_bit(RXRPC_CALL_EV_RELEASE, &call->events)) {
+		rxrpc_release_call(call);
+		clear_bit(RXRPC_CALL_EV_RELEASE, &call->events);
+	}
+
 	/* other events may have been raised since we started checking */
 	goto maybe_reschedule;
 
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index ad933daae13b..91287c9d01bb 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -14,7 +14,6 @@
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/circ_buf.h>
-#include <linux/hashtable.h>
 #include <linux/spinlock_types.h>
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
@@ -61,142 +60,6 @@ static void rxrpc_dead_call_expired(unsigned long _call);
 static void rxrpc_ack_time_expired(unsigned long _call);
 static void rxrpc_resend_time_expired(unsigned long _call);
 
-static DEFINE_SPINLOCK(rxrpc_call_hash_lock);
-static DEFINE_HASHTABLE(rxrpc_call_hash, 10);
-
-/*
- * Hash function for rxrpc_call_hash
- */
-static unsigned long rxrpc_call_hashfunc(
-	u8		in_clientflag,
-	u32		cid,
-	u32		call_id,
-	u32		epoch,
-	u16		service_id,
-	sa_family_t	family,
-	void		*localptr,
-	unsigned int	addr_size,
-	const u8	*peer_addr)
-{
-	const u16 *p;
-	unsigned int i;
-	unsigned long key;
-
-	_enter("");
-
-	key = (unsigned long)localptr;
-	/* We just want to add up the __be32 values, so forcing the
-	 * cast should be okay.
-	 */
-	key += epoch;
-	key += service_id;
-	key += call_id;
-	key += (cid & RXRPC_CIDMASK) >> RXRPC_CIDSHIFT;
-	key += cid & RXRPC_CHANNELMASK;
-	key += in_clientflag;
-	key += family;
-	/* Step through the peer address in 16-bit portions for speed */
-	for (i = 0, p = (const u16 *)peer_addr; i < addr_size >> 1; i++, p++)
-		key += *p;
-	_leave(" key = 0x%lx", key);
-	return key;
-}
-
-/*
- * Add a call to the hashtable
- */
-static void rxrpc_call_hash_add(struct rxrpc_call *call)
-{
-	unsigned long key;
-	unsigned int addr_size = 0;
-
-	_enter("");
-	switch (call->family) {
-	case AF_INET:
-		addr_size = sizeof(call->peer_ip.ipv4_addr);
-		break;
-	case AF_INET6:
-		addr_size = sizeof(call->peer_ip.ipv6_addr);
-		break;
-	default:
-		break;
-	}
-	key = rxrpc_call_hashfunc(call->in_clientflag, call->cid,
-				  call->call_id, call->epoch,
-				  call->service_id, call->family,
-				  call->conn->params.local, addr_size,
-				  call->peer_ip.ipv6_addr);
-	/* Store the full key in the call */
-	call->hash_key = key;
-	spin_lock(&rxrpc_call_hash_lock);
-	hash_add_rcu(rxrpc_call_hash, &call->hash_node, key);
-	spin_unlock(&rxrpc_call_hash_lock);
-	_leave("");
-}
-
-/*
- * Remove a call from the hashtable
- */
-static void rxrpc_call_hash_del(struct rxrpc_call *call)
-{
-	_enter("");
-	spin_lock(&rxrpc_call_hash_lock);
-	hash_del_rcu(&call->hash_node);
-	spin_unlock(&rxrpc_call_hash_lock);
-	_leave("");
-}
-
-/*
- * Find a call in the hashtable and return it, or NULL if it
- * isn't there.
- */
-struct rxrpc_call *rxrpc_find_call_hash(
-	struct rxrpc_host_header *hdr,
-	void		*localptr,
-	sa_family_t	family,
-	const void	*peer_addr)
-{
-	unsigned long key;
-	unsigned int addr_size = 0;
-	struct rxrpc_call *call = NULL;
-	struct rxrpc_call *ret = NULL;
-	u8 in_clientflag = hdr->flags & RXRPC_CLIENT_INITIATED;
-
-	_enter("");
-	switch (family) {
-	case AF_INET:
-		addr_size = sizeof(call->peer_ip.ipv4_addr);
-		break;
-	case AF_INET6:
-		addr_size = sizeof(call->peer_ip.ipv6_addr);
-		break;
-	default:
-		break;
-	}
-
-	key = rxrpc_call_hashfunc(in_clientflag, hdr->cid, hdr->callNumber,
-				  hdr->epoch, hdr->serviceId,
-				  family, localptr, addr_size,
-				  peer_addr);
-	hash_for_each_possible_rcu(rxrpc_call_hash, call, hash_node, key) {
-		if (call->hash_key == key &&
-		    call->call_id == hdr->callNumber &&
-		    call->cid == hdr->cid &&
-		    call->in_clientflag == in_clientflag &&
-		    call->service_id == hdr->serviceId &&
-		    call->family == family &&
-		    call->local == localptr &&
-		    memcmp(call->peer_ip.ipv6_addr, peer_addr,
-			   addr_size) == 0 &&
-		    call->epoch == hdr->epoch) {
-			ret = call;
-			break;
-		}
-	}
-	_leave(" = %p", ret);
-	return ret;
-}
-
 /*
  * find an extant server call
  * - called in process context with IRQs enabled
@@ -305,20 +168,7 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
 	call->socket = rx;
 	call->rx_data_post = 1;
 
-	/* Record copies of information for hashtable lookup */
-	call->family = rx->family;
 	call->local = rx->local;
-	switch (call->family) {
-	case AF_INET:
-		call->peer_ip.ipv4_addr = srx->transport.sin.sin_addr.s_addr;
-		break;
-	case AF_INET6:
-		memcpy(call->peer_ip.ipv6_addr,
-		       srx->transport.sin6.sin6_addr.in6_u.u6_addr8,
-		       sizeof(call->peer_ip.ipv6_addr));
-		break;
-	}
-
 	call->service_id = srx->srx_service;
 	call->in_clientflag = 0;
 
@@ -345,9 +195,6 @@ static int rxrpc_begin_client_call(struct rxrpc_call *call,
 
 	call->state = RXRPC_CALL_CLIENT_SEND_REQUEST;
 
-	/* Add the new call to the hashtable */
-	rxrpc_call_hash_add(call);
-
 	spin_lock(&call->conn->params.peer->lock);
 	hlist_add_head(&call->error_link, &call->conn->params.peer->error_targets);
 	spin_unlock(&call->conn->params.peer->lock);
@@ -425,9 +272,10 @@ error:
 	rxrpc_put_call(call);
 
 	write_lock_bh(&rxrpc_call_lock);
-	list_del(&call->link);
+	list_del_init(&call->link);
 	write_unlock_bh(&rxrpc_call_lock);
 
+	call->state = RXRPC_CALL_DEAD;
 	rxrpc_put_call(call);
 	_leave(" = %d", ret);
 	return ERR_PTR(ret);
@@ -439,6 +287,7 @@ error:
 	 */
 found_user_ID_now_present:
 	write_unlock(&rx->call_lock);
+	call->state = RXRPC_CALL_DEAD;
 	rxrpc_put_call(call);
 	_leave(" = -EEXIST [%p]", call);
 	return ERR_PTR(-EEXIST);
@@ -454,8 +303,7 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
 {
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 	struct rxrpc_call *call, *candidate;
-	struct rb_node **p, *parent;
-	u32 call_id;
+	u32 call_id, chan;
 
 	_enter(",%d", conn->debug_id);
 
@@ -465,20 +313,23 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
 	if (!candidate)
 		return ERR_PTR(-EBUSY);
 
+	chan = sp->hdr.cid & RXRPC_CHANNELMASK;
 	candidate->socket	= rx;
 	candidate->conn		= conn;
 	candidate->cid		= sp->hdr.cid;
 	candidate->call_id	= sp->hdr.callNumber;
-	candidate->channel	= sp->hdr.cid & RXRPC_CHANNELMASK;
+	candidate->channel	= chan;
 	candidate->rx_data_post	= 0;
 	candidate->state	= RXRPC_CALL_SERVER_ACCEPTING;
 	if (conn->security_ix > 0)
 		candidate->state = RXRPC_CALL_SERVER_SECURING;
 
-	write_lock_bh(&conn->lock);
+	spin_lock(&conn->channel_lock);
 
 	/* set the channel for this call */
-	call = conn->channels[candidate->channel];
+	call = rcu_dereference_protected(conn->channels[chan].call,
+					 lockdep_is_held(&conn->channel_lock));
+
 	_debug("channel[%u] is %p", candidate->channel, call);
 	if (call && call->call_id == sp->hdr.callNumber) {
 		/* already set; must've been a duplicate packet */
@@ -507,9 +358,9 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
 		       call->debug_id, rxrpc_call_states[call->state]);
 
 		if (call->state >= RXRPC_CALL_COMPLETE) {
-			conn->channels[call->channel] = NULL;
+			__rxrpc_disconnect_call(call);
 		} else {
-			write_unlock_bh(&conn->lock);
+			spin_unlock(&conn->channel_lock);
 			kmem_cache_free(rxrpc_call_jar, candidate);
 			_leave(" = -EBUSY");
 			return ERR_PTR(-EBUSY);
@@ -519,33 +370,22 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
 	/* check the call number isn't duplicate */
 	_debug("check dup");
 	call_id = sp->hdr.callNumber;
-	p = &conn->calls.rb_node;
-	parent = NULL;
-	while (*p) {
-		parent = *p;
-		call = rb_entry(parent, struct rxrpc_call, conn_node);
-
-		/* The tree is sorted in order of the __be32 value without
-		 * turning it into host order.
-		 */
-		if (call_id < call->call_id)
-			p = &(*p)->rb_left;
-		else if (call_id > call->call_id)
-			p = &(*p)->rb_right;
-		else
-			goto old_call;
-	}
+
+	/* We just ignore calls prior to the current call ID.  Terminated calls
+	 * are handled via the connection.
+	 */
+	if (call_id <= conn->channels[chan].call_counter)
+		goto old_call; /* TODO: Just drop packet */
 
 	/* make the call available */
 	_debug("new call");
 	call = candidate;
 	candidate = NULL;
-	rb_link_node(&call->conn_node, parent, p);
-	rb_insert_color(&call->conn_node, &conn->calls);
-	conn->channels[call->channel] = call;
+	conn->channels[chan].call_counter = call_id;
+	rcu_assign_pointer(conn->channels[chan].call, call);
 	sock_hold(&rx->sk);
 	rxrpc_get_connection(conn);
-	write_unlock_bh(&conn->lock);
+	spin_unlock(&conn->channel_lock);
 
 	spin_lock(&conn->params.peer->lock);
 	hlist_add_head(&call->error_link, &conn->params.peer->error_targets);
@@ -555,27 +395,10 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
 	list_add_tail(&call->link, &rxrpc_calls);
 	write_unlock_bh(&rxrpc_call_lock);
 
-	/* Record copies of information for hashtable lookup */
-	call->family = rx->family;
 	call->local = conn->params.local;
-	switch (call->family) {
-	case AF_INET:
-		call->peer_ip.ipv4_addr =
-			conn->params.peer->srx.transport.sin.sin_addr.s_addr;
-		break;
-	case AF_INET6:
-		memcpy(call->peer_ip.ipv6_addr,
-		       conn->params.peer->srx.transport.sin6.sin6_addr.in6_u.u6_addr8,
-		       sizeof(call->peer_ip.ipv6_addr));
-		break;
-	default:
-		break;
-	}
 	call->epoch = conn->proto.epoch;
 	call->service_id = conn->params.service_id;
-	call->in_clientflag = conn->proto.in_clientflag;
-	/* Add the new call to the hashtable */
-	rxrpc_call_hash_add(call);
+	call->in_clientflag = RXRPC_CLIENT_INITIATED;
 
 	_net("CALL incoming %d on CONN %d", call->debug_id, call->conn->debug_id);
 
@@ -585,19 +408,19 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
 	return call;
 
 extant_call:
-	write_unlock_bh(&conn->lock);
+	spin_unlock(&conn->channel_lock);
 	kmem_cache_free(rxrpc_call_jar, candidate);
 	_leave(" = %p {%d} [extant]", call, call ? call->debug_id : -1);
 	return call;
 
 aborted_call:
-	write_unlock_bh(&conn->lock);
+	spin_unlock(&conn->channel_lock);
 	kmem_cache_free(rxrpc_call_jar, candidate);
 	_leave(" = -ECONNABORTED");
 	return ERR_PTR(-ECONNABORTED);
 
 old_call:
-	write_unlock_bh(&conn->lock);
+	spin_unlock(&conn->channel_lock);
 	kmem_cache_free(rxrpc_call_jar, candidate);
 	_leave(" = -ECONNRESET [old]");
 	return ERR_PTR(-ECONNRESET);
@@ -626,6 +449,10 @@ void rxrpc_release_call(struct rxrpc_call *call)
 	 */
 	_debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn);
 
+	spin_lock(&conn->params.peer->lock);
+	hlist_del_init(&call->error_link);
+	spin_unlock(&conn->params.peer->lock);
+
 	write_lock_bh(&rx->call_lock);
 	if (!list_empty(&call->accept_link)) {
 		_debug("unlinking once-pending call %p { e=%lx f=%lx }",
@@ -641,24 +468,17 @@ void rxrpc_release_call(struct rxrpc_call *call)
 	write_unlock_bh(&rx->call_lock);
 
 	/* free up the channel for reuse */
-	spin_lock(&conn->channel_lock);
-	write_lock_bh(&conn->lock);
-	write_lock(&call->state_lock);
-
-	rxrpc_disconnect_call(call);
-
-	spin_unlock(&conn->channel_lock);
+	write_lock_bh(&call->state_lock);
 
 	if (call->state < RXRPC_CALL_COMPLETE &&
 	    call->state != RXRPC_CALL_CLIENT_FINAL_ACK) {
 		_debug("+++ ABORTING STATE %d +++\n", call->state);
 		call->state = RXRPC_CALL_LOCALLY_ABORTED;
 		call->local_abort = RX_CALL_DEAD;
-		set_bit(RXRPC_CALL_EV_ABORT, &call->events);
-		rxrpc_queue_call(call);
 	}
-	write_unlock(&call->state_lock);
-	write_unlock_bh(&conn->lock);
+	write_unlock_bh(&call->state_lock);
+
+	rxrpc_disconnect_call(call);
 
 	/* clean up the Rx queue */
 	if (!skb_queue_empty(&call->rx_queue) ||
@@ -792,6 +612,17 @@ void __rxrpc_put_call(struct rxrpc_call *call)
 }
 
 /*
+ * Final call destruction under RCU.
+ */
+static void rxrpc_rcu_destroy_call(struct rcu_head *rcu)
+{
+	struct rxrpc_call *call = container_of(rcu, struct rxrpc_call, rcu);
+
+	rxrpc_purge_queue(&call->rx_queue);
+	kmem_cache_free(rxrpc_call_jar, call);
+}
+
+/*
  * clean up a call
  */
 static void rxrpc_cleanup_call(struct rxrpc_call *call)
@@ -815,19 +646,7 @@ static void rxrpc_cleanup_call(struct rxrpc_call *call)
 		return;
 	}
 
-	if (call->conn) {
-		spin_lock(&call->conn->params.peer->lock);
-		hlist_del_init(&call->error_link);
-		spin_unlock(&call->conn->params.peer->lock);
-
-		write_lock_bh(&call->conn->lock);
-		rb_erase(&call->conn_node, &call->conn->calls);
-		write_unlock_bh(&call->conn->lock);
-		rxrpc_put_connection(call->conn);
-	}
-
-	/* Remove the call from the hash */
-	rxrpc_call_hash_del(call);
+	ASSERTCMP(call->conn, ==, NULL);
 
 	if (call->acks_window) {
 		_debug("kill Tx window %d",
@@ -855,7 +674,7 @@ static void rxrpc_cleanup_call(struct rxrpc_call *call)
 	rxrpc_purge_queue(&call->rx_queue);
 	ASSERT(skb_queue_empty(&call->rx_oos_queue));
 	sock_put(&call->socket->sk);
-	kmem_cache_free(rxrpc_call_jar, call);
+	call_rcu(&call->rcu, rxrpc_rcu_destroy_call);
 }
 
 /*
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 82488d6adb83..9e91f27b0d0f 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -33,7 +33,8 @@ static DEFINE_SPINLOCK(rxrpc_conn_id_lock);
  * client conns away from the current allocation point to try and keep the IDs
  * concentrated.  We will also need to retire connections from an old epoch.
  */
-int rxrpc_get_client_connection_id(struct rxrpc_connection *conn, gfp_t gfp)
+static int rxrpc_get_client_connection_id(struct rxrpc_connection *conn,
+					  gfp_t gfp)
 {
 	u32 epoch;
 	int id;
@@ -83,7 +84,7 @@ error:
 /*
  * Release a connection ID for a client connection from the global pool.
  */
-void rxrpc_put_client_connection_id(struct rxrpc_connection *conn)
+static void rxrpc_put_client_connection_id(struct rxrpc_connection *conn)
 {
 	if (test_bit(RXRPC_CONN_HAS_IDR, &conn->flags)) {
 		spin_lock(&rxrpc_conn_id_lock);
@@ -92,3 +93,280 @@ void rxrpc_put_client_connection_id(struct rxrpc_connection *conn)
 		spin_unlock(&rxrpc_conn_id_lock);
 	}
 }
+
+/*
+ * Destroy the client connection ID tree.
+ */
+void rxrpc_destroy_client_conn_ids(void)
+{
+	struct rxrpc_connection *conn;
+	int id;
+
+	if (!idr_is_empty(&rxrpc_client_conn_ids)) {
+		idr_for_each_entry(&rxrpc_client_conn_ids, conn, id) {
+			pr_err("AF_RXRPC: Leaked client conn %p {%d}\n",
+			       conn, atomic_read(&conn->usage));
+		}
+		BUG();
+	}
+
+	idr_destroy(&rxrpc_client_conn_ids);
+}
+
+/*
+ * Allocate a client connection.  The caller must take care to clear any
+ * padding bytes in *cp.
+ */
+static struct rxrpc_connection *
+rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp)
+{
+	struct rxrpc_connection *conn;
+	int ret;
+
+	_enter("");
+
+	conn = rxrpc_alloc_connection(gfp);
+	if (!conn) {
+		_leave(" = -ENOMEM");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	conn->params		= *cp;
+	conn->out_clientflag	= RXRPC_CLIENT_INITIATED;
+	conn->state		= RXRPC_CONN_CLIENT;
+
+	ret = rxrpc_get_client_connection_id(conn, gfp);
+	if (ret < 0)
+		goto error_0;
+
+	ret = rxrpc_init_client_conn_security(conn);
+	if (ret < 0)
+		goto error_1;
+
+	ret = conn->security->prime_packet_security(conn);
+	if (ret < 0)
+		goto error_2;
+
+	write_lock(&rxrpc_connection_lock);
+	list_add_tail(&conn->link, &rxrpc_connections);
+	write_unlock(&rxrpc_connection_lock);
+
+	/* We steal the caller's peer ref. */
+	cp->peer = NULL;
+	rxrpc_get_local(conn->params.local);
+	key_get(conn->params.key);
+
+	_leave(" = %p", conn);
+	return conn;
+
+error_2:
+	conn->security->clear(conn);
+error_1:
+	rxrpc_put_client_connection_id(conn);
+error_0:
+	kfree(conn);
+	_leave(" = %d", ret);
+	return ERR_PTR(ret);
+}
+
+/*
+ * find a connection for a call
+ * - called in process context with IRQs enabled
+ */
+int rxrpc_connect_call(struct rxrpc_call *call,
+		       struct rxrpc_conn_parameters *cp,
+		       struct sockaddr_rxrpc *srx,
+		       gfp_t gfp)
+{
+	struct rxrpc_connection *conn, *candidate = NULL;
+	struct rxrpc_local *local = cp->local;
+	struct rb_node *p, **pp, *parent;
+	long diff;
+	int chan;
+
+	DECLARE_WAITQUEUE(myself, current);
+
+	_enter("{%d,%lx},", call->debug_id, call->user_call_ID);
+
+	cp->peer = rxrpc_lookup_peer(cp->local, srx, gfp);
+	if (!cp->peer)
+		return -ENOMEM;
+
+	if (!cp->exclusive) {
+		/* Search for a existing client connection unless this is going
+		 * to be a connection that's used exclusively for a single call.
+		 */
+		_debug("search 1");
+		spin_lock(&local->client_conns_lock);
+		p = local->client_conns.rb_node;
+		while (p) {
+			conn = rb_entry(p, struct rxrpc_connection, client_node);
+
+#define cmp(X) ((long)conn->params.X - (long)cp->X)
+			diff = (cmp(peer) ?:
+				cmp(key) ?:
+				cmp(security_level));
+			if (diff < 0)
+				p = p->rb_left;
+			else if (diff > 0)
+				p = p->rb_right;
+			else
+				goto found_extant_conn;
+		}
+		spin_unlock(&local->client_conns_lock);
+	}
+
+	/* We didn't find a connection or we want an exclusive one. */
+	_debug("get new conn");
+	candidate = rxrpc_alloc_client_connection(cp, gfp);
+	if (!candidate) {
+		_leave(" = -ENOMEM");
+		return -ENOMEM;
+	}
+
+	if (cp->exclusive) {
+		/* Assign the call on an exclusive connection to channel 0 and
+		 * don't add the connection to the endpoint's shareable conn
+		 * lookup tree.
+		 */
+		_debug("exclusive chan 0");
+		conn = candidate;
+		atomic_set(&conn->avail_chans, RXRPC_MAXCALLS - 1);
+		spin_lock(&conn->channel_lock);
+		chan = 0;
+		goto found_channel;
+	}
+
+	/* We need to redo the search before attempting to add a new connection
+	 * lest we race with someone else adding a conflicting instance.
+	 */
+	_debug("search 2");
+	spin_lock(&local->client_conns_lock);
+
+	pp = &local->client_conns.rb_node;
+	parent = NULL;
+	while (*pp) {
+		parent = *pp;
+		conn = rb_entry(parent, struct rxrpc_connection, client_node);
+
+		diff = (cmp(peer) ?:
+			cmp(key) ?:
+			cmp(security_level));
+		if (diff < 0)
+			pp = &(*pp)->rb_left;
+		else if (diff > 0)
+			pp = &(*pp)->rb_right;
+		else
+			goto found_extant_conn;
+	}
+
+	/* The second search also failed; simply add the new connection with
+	 * the new call in channel 0.  Note that we need to take the channel
+	 * lock before dropping the client conn lock.
+	 */
+	_debug("new conn");
+	set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags);
+	rb_link_node(&candidate->client_node, parent, pp);
+	rb_insert_color(&candidate->client_node, &local->client_conns);
+attached:
+	conn = candidate;
+	candidate = NULL;
+
+	atomic_set(&conn->avail_chans, RXRPC_MAXCALLS - 1);
+	spin_lock(&conn->channel_lock);
+	spin_unlock(&local->client_conns_lock);
+	chan = 0;
+
+found_channel:
+	_debug("found chan");
+	call->conn	= conn;
+	call->channel	= chan;
+	call->epoch	= conn->proto.epoch;
+	call->cid	= conn->proto.cid | chan;
+	call->call_id	= ++conn->channels[chan].call_counter;
+	conn->channels[chan].call_id = call->call_id;
+	rcu_assign_pointer(conn->channels[chan].call, call);
+
+	_net("CONNECT call %d on conn %d", call->debug_id, conn->debug_id);
+
+	spin_unlock(&conn->channel_lock);
+	rxrpc_put_peer(cp->peer);
+	cp->peer = NULL;
+	_leave(" = %p {u=%d}", conn, atomic_read(&conn->usage));
+	return 0;
+
+	/* We found a potentially suitable connection already in existence.  If
+	 * we can reuse it (ie. its usage count hasn't been reduced to 0 by the
+	 * reaper), discard any candidate we may have allocated, and try to get
+	 * a channel on this one, otherwise we have to replace it.
+	 */
+found_extant_conn:
+	_debug("found conn");
+	if (!rxrpc_get_connection_maybe(conn)) {
+		set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags);
+		rb_replace_node(&conn->client_node,
+				&candidate->client_node,
+				&local->client_conns);
+		clear_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags);
+		goto attached;
+	}
+
+	spin_unlock(&local->client_conns_lock);
+
+	rxrpc_put_connection(candidate);
+
+	if (!atomic_add_unless(&conn->avail_chans, -1, 0)) {
+		if (!gfpflags_allow_blocking(gfp)) {
+			rxrpc_put_connection(conn);
+			_leave(" = -EAGAIN");
+			return -EAGAIN;
+		}
+
+		add_wait_queue(&conn->channel_wq, &myself);
+		for (;;) {
+			set_current_state(TASK_INTERRUPTIBLE);
+			if (atomic_add_unless(&conn->avail_chans, -1, 0))
+				break;
+			if (signal_pending(current))
+				goto interrupted;
+			schedule();
+		}
+		remove_wait_queue(&conn->channel_wq, &myself);
+		__set_current_state(TASK_RUNNING);
+	}
+
+	/* The connection allegedly now has a free channel and we can now
+	 * attach the call to it.
+	 */
+	spin_lock(&conn->channel_lock);
+
+	for (chan = 0; chan < RXRPC_MAXCALLS; chan++)
+		if (!conn->channels[chan].call)
+			goto found_channel;
+	BUG();
+
+interrupted:
+	remove_wait_queue(&conn->channel_wq, &myself);
+	__set_current_state(TASK_RUNNING);
+	rxrpc_put_connection(conn);
+	rxrpc_put_peer(cp->peer);
+	cp->peer = NULL;
+	_leave(" = -ERESTARTSYS");
+	return -ERESTARTSYS;
+}
+
+/*
+ * Remove a client connection from the local endpoint's tree, thereby removing
+ * it as a target for reuse for new client calls.
+ */
+void rxrpc_unpublish_client_conn(struct rxrpc_connection *conn)
+{
+	struct rxrpc_local *local = conn->params.local;
+
+	spin_lock(&local->client_conns_lock);
+	if (test_and_clear_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags))
+		rb_erase(&conn->client_node, &local->client_conns);
+	spin_unlock(&local->client_conns_lock);
+
+	rxrpc_put_client_connection_id(conn);
+}
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index bf6971555eac..cee0f35bc1cf 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -31,15 +31,17 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, int state,
 			      u32 abort_code)
 {
 	struct rxrpc_call *call;
-	struct rb_node *p;
+	int i;
 
 	_enter("{%d},%x", conn->debug_id, abort_code);
 
-	read_lock_bh(&conn->lock);
+	spin_lock(&conn->channel_lock);
 
-	for (p = rb_first(&conn->calls); p; p = rb_next(p)) {
-		call = rb_entry(p, struct rxrpc_call, conn_node);
-		write_lock(&call->state_lock);
+	for (i = 0; i < RXRPC_MAXCALLS; i++) {
+		call = rcu_dereference_protected(
+			conn->channels[i].call,
+			lockdep_is_held(&conn->channel_lock));
+		write_lock_bh(&call->state_lock);
 		if (call->state <= RXRPC_CALL_COMPLETE) {
 			call->state = state;
 			if (state == RXRPC_CALL_LOCALLY_ABORTED) {
@@ -51,10 +53,10 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, int state,
 			}
 			rxrpc_queue_call(call);
 		}
-		write_unlock(&call->state_lock);
+		write_unlock_bh(&call->state_lock);
 	}
 
-	read_unlock_bh(&conn->lock);
+	spin_unlock(&conn->channel_lock);
 	_leave("");
 }
 
@@ -188,18 +190,24 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
 		if (ret < 0)
 			return ret;
 
-		conn->security->prime_packet_security(conn);
-		read_lock_bh(&conn->lock);
+		ret = conn->security->prime_packet_security(conn);
+		if (ret < 0)
+			return ret;
+
+		spin_lock(&conn->channel_lock);
 		spin_lock(&conn->state_lock);
 
-		if (conn->state == RXRPC_CONN_SERVER_CHALLENGING) {
-			conn->state = RXRPC_CONN_SERVER;
+		if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING) {
+			conn->state = RXRPC_CONN_SERVICE;
 			for (loop = 0; loop < RXRPC_MAXCALLS; loop++)
-				rxrpc_call_is_secure(conn->channels[loop]);
+				rxrpc_call_is_secure(
+					rcu_dereference_protected(
+						conn->channels[loop].call,
+						lockdep_is_held(&conn->channel_lock)));
 		}
 
 		spin_unlock(&conn->state_lock);
-		read_unlock_bh(&conn->lock);
+		spin_unlock(&conn->channel_lock);
 		return 0;
 
 	default:
@@ -263,12 +271,8 @@ void rxrpc_process_connection(struct work_struct *work)
 
 	_enter("{%d}", conn->debug_id);
 
-	rxrpc_get_connection(conn);
-
-	if (test_and_clear_bit(RXRPC_CONN_CHALLENGE, &conn->events)) {
+	if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events))
 		rxrpc_secure_connection(conn);
-		rxrpc_put_connection(conn);
-	}
 
 	/* go through the conn-level event packets, releasing the ref on this
 	 * connection that each one has when we've finished with it */
@@ -283,7 +287,6 @@ void rxrpc_process_connection(struct work_struct *work)
 			goto requeue_and_leave;
 		case -ECONNABORTED:
 		default:
-			rxrpc_put_connection(conn);
 			rxrpc_free_skb(skb);
 			break;
 		}
@@ -301,7 +304,6 @@ requeue_and_leave:
 protocol_error:
 	if (rxrpc_abort_connection(conn, -ret, abort_code) < 0)
 		goto requeue_and_leave;
-	rxrpc_put_connection(conn);
 	rxrpc_free_skb(skb);
 	_leave(" [EPROTO]");
 	goto out;
@@ -315,7 +317,7 @@ void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
 	CHECK_SLAB_OKAY(&local->usage);
 
 	skb_queue_tail(&local->reject_queue, skb);
-	rxrpc_queue_work(&local->processor);
+	rxrpc_queue_local(local);
 }
 
 /*
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 4bfad7cf96cb..896d84493a05 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -15,7 +15,6 @@
 #include <linux/slab.h>
 #include <linux/net.h>
 #include <linux/skbuff.h>
-#include <linux/crypto.h>
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
 #include "ar-internal.h"
@@ -34,7 +33,7 @@ static DECLARE_DELAYED_WORK(rxrpc_connection_reap, rxrpc_connection_reaper);
 /*
  * allocate a new connection
  */
-static struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp)
+struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp)
 {
 	struct rxrpc_connection *conn;
 
@@ -46,12 +45,13 @@ static struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp)
 		init_waitqueue_head(&conn->channel_wq);
 		INIT_WORK(&conn->processor, &rxrpc_process_connection);
 		INIT_LIST_HEAD(&conn->link);
-		conn->calls = RB_ROOT;
 		skb_queue_head_init(&conn->rx_queue);
 		conn->security = &rxrpc_no_security;
-		rwlock_init(&conn->lock);
 		spin_lock_init(&conn->state_lock);
-		atomic_set(&conn->usage, 1);
+		/* We maintain an extra ref on the connection whilst it is
+		 * on the rxrpc_connections list.
+		 */
+		atomic_set(&conn->usage, 2);
 		conn->debug_id = atomic_inc_return(&rxrpc_debug_id);
 		atomic_set(&conn->avail_chans, RXRPC_MAXCALLS);
 		conn->size_align = 4;
@@ -63,465 +63,118 @@ static struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp)
 }
 
 /*
- * add a call to a connection's call-by-ID tree
- */
-static void rxrpc_add_call_ID_to_conn(struct rxrpc_connection *conn,
-				      struct rxrpc_call *call)
-{
-	struct rxrpc_call *xcall;
-	struct rb_node *parent, **p;
-	__be32 call_id;
-
-	write_lock_bh(&conn->lock);
-
-	call_id = call->call_id;
-	p = &conn->calls.rb_node;
-	parent = NULL;
-	while (*p) {
-		parent = *p;
-		xcall = rb_entry(parent, struct rxrpc_call, conn_node);
-
-		if (call_id < xcall->call_id)
-			p = &(*p)->rb_left;
-		else if (call_id > xcall->call_id)
-			p = &(*p)->rb_right;
-		else
-			BUG();
-	}
-
-	rb_link_node(&call->conn_node, parent, p);
-	rb_insert_color(&call->conn_node, &conn->calls);
-
-	write_unlock_bh(&conn->lock);
-}
-
-/*
- * Allocate a client connection.  The caller must take care to clear any
- * padding bytes in *cp.
+ * Look up a connection in the cache by protocol parameters.
+ *
+ * If successful, a pointer to the connection is returned, but no ref is taken.
+ * NULL is returned if there is no match.
+ *
+ * The caller must be holding the RCU read lock.
  */
-static struct rxrpc_connection *
-rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp)
+struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local,
+						   struct sk_buff *skb)
 {
 	struct rxrpc_connection *conn;
-	int ret;
-
-	_enter("");
-
-	conn = rxrpc_alloc_connection(gfp);
-	if (!conn) {
-		_leave(" = -ENOMEM");
-		return ERR_PTR(-ENOMEM);
-	}
-
-	conn->params		= *cp;
-	conn->proto.local	= cp->local;
-	conn->proto.epoch	= rxrpc_epoch;
-	conn->proto.cid		= 0;
-	conn->proto.in_clientflag = 0;
-	conn->proto.family	= cp->peer->srx.transport.family;
-	conn->out_clientflag	= RXRPC_CLIENT_INITIATED;
-	conn->state		= RXRPC_CONN_CLIENT;
-
-	switch (conn->proto.family) {
-	case AF_INET:
-		conn->proto.addr_size = sizeof(conn->proto.ipv4_addr);
-		conn->proto.ipv4_addr = cp->peer->srx.transport.sin.sin_addr;
-		conn->proto.port = cp->peer->srx.transport.sin.sin_port;
-		break;
-	}
-
-	ret = rxrpc_get_client_connection_id(conn, gfp);
-	if (ret < 0)
-		goto error_0;
-
-	ret = rxrpc_init_client_conn_security(conn);
-	if (ret < 0)
-		goto error_1;
-
-	conn->security->prime_packet_security(conn);
-
-	write_lock(&rxrpc_connection_lock);
-	list_add_tail(&conn->link, &rxrpc_connections);
-	write_unlock(&rxrpc_connection_lock);
-
-	/* We steal the caller's peer ref. */
-	cp->peer = NULL;
-	rxrpc_get_local(conn->params.local);
-	key_get(conn->params.key);
-
-	_leave(" = %p", conn);
-	return conn;
-
-error_1:
-	rxrpc_put_client_connection_id(conn);
-error_0:
-	kfree(conn);
-	_leave(" = %d", ret);
-	return ERR_PTR(ret);
-}
-
-/*
- * find a connection for a call
- * - called in process context with IRQs enabled
- */
-int rxrpc_connect_call(struct rxrpc_call *call,
-		       struct rxrpc_conn_parameters *cp,
-		       struct sockaddr_rxrpc *srx,
-		       gfp_t gfp)
-{
-	struct rxrpc_connection *conn, *candidate = NULL;
-	struct rxrpc_local *local = cp->local;
-	struct rb_node *p, **pp, *parent;
-	long diff;
-	int chan;
+	struct rxrpc_conn_proto k;
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+	struct sockaddr_rxrpc srx;
+	struct rxrpc_peer *peer;
 
-	DECLARE_WAITQUEUE(myself, current);
+	_enter(",%x", sp->hdr.cid & RXRPC_CIDMASK);
 
-	_enter("{%d,%lx},", call->debug_id, call->user_call_ID);
+	if (rxrpc_extract_addr_from_skb(&srx, skb) < 0)
+		goto not_found;
 
-	cp->peer = rxrpc_lookup_peer(cp->local, srx, gfp);
-	if (!cp->peer)
-		return -ENOMEM;
+	k.epoch	= sp->hdr.epoch;
+	k.cid	= sp->hdr.cid & RXRPC_CIDMASK;
 
-	if (!cp->exclusive) {
-		/* Search for a existing client connection unless this is going
-		 * to be a connection that's used exclusively for a single call.
-		 */
-		_debug("search 1");
-		spin_lock(&local->client_conns_lock);
-		p = local->client_conns.rb_node;
-		while (p) {
-			conn = rb_entry(p, struct rxrpc_connection, client_node);
-
-#define cmp(X) ((long)conn->params.X - (long)cp->X)
-			diff = (cmp(peer) ?:
-				cmp(key) ?:
-				cmp(security_level));
-			if (diff < 0)
-				p = p->rb_left;
-			else if (diff > 0)
-				p = p->rb_right;
-			else
-				goto found_extant_conn;
-		}
-		spin_unlock(&local->client_conns_lock);
+	/* We may have to handle mixing IPv4 and IPv6 */
+	if (srx.transport.family != local->srx.transport.family) {
+		pr_warn_ratelimited("AF_RXRPC: Protocol mismatch %u not %u\n",
+				    srx.transport.family,
+				    local->srx.transport.family);
+		goto not_found;
 	}
 
-	/* We didn't find a connection or we want an exclusive one. */
-	_debug("get new conn");
-	candidate = rxrpc_alloc_client_connection(cp, gfp);
-	if (!candidate) {
-		_leave(" = -ENOMEM");
-		return -ENOMEM;
-	}
+	k.epoch	= sp->hdr.epoch;
+	k.cid	= sp->hdr.cid & RXRPC_CIDMASK;
 
-	if (cp->exclusive) {
-		/* Assign the call on an exclusive connection to channel 0 and
-		 * don't add the connection to the endpoint's shareable conn
-		 * lookup tree.
+	if (sp->hdr.flags & RXRPC_CLIENT_INITIATED) {
+		/* We need to look up service connections by the full protocol
+		 * parameter set.  We look up the peer first as an intermediate
+		 * step and then the connection from the peer's tree.
 		 */
-		_debug("exclusive chan 0");
-		conn = candidate;
-		atomic_set(&conn->avail_chans, RXRPC_MAXCALLS - 1);
-		spin_lock(&conn->channel_lock);
-		chan = 0;
-		goto found_channel;
-	}
-
-	/* We need to redo the search before attempting to add a new connection
-	 * lest we race with someone else adding a conflicting instance.
-	 */
-	_debug("search 2");
-	spin_lock(&local->client_conns_lock);
-
-	pp = &local->client_conns.rb_node;
-	parent = NULL;
-	while (*pp) {
-		parent = *pp;
-		conn = rb_entry(parent, struct rxrpc_connection, client_node);
-
-		diff = (cmp(peer) ?:
-			cmp(key) ?:
-			cmp(security_level));
-		if (diff < 0)
-			pp = &(*pp)->rb_left;
-		else if (diff > 0)
-			pp = &(*pp)->rb_right;
-		else
-			goto found_extant_conn;
-	}
-
-	/* The second search also failed; simply add the new connection with
-	 * the new call in channel 0.  Note that we need to take the channel
-	 * lock before dropping the client conn lock.
-	 */
-	_debug("new conn");
-	conn = candidate;
-	candidate = NULL;
-
-	rb_link_node(&conn->client_node, parent, pp);
-	rb_insert_color(&conn->client_node, &local->client_conns);
-
-	atomic_set(&conn->avail_chans, RXRPC_MAXCALLS - 1);
-	spin_lock(&conn->channel_lock);
-	spin_unlock(&local->client_conns_lock);
-	chan = 0;
-
-found_channel:
-	_debug("found chan");
-	call->conn	= conn;
-	call->channel	= chan;
-	call->epoch	= conn->proto.epoch;
-	call->cid	= conn->proto.cid | chan;
-	call->call_id	= ++conn->call_counter;
-	rcu_assign_pointer(conn->channels[chan], call);
-
-	_net("CONNECT call %d on conn %d", call->debug_id, conn->debug_id);
-
-	rxrpc_add_call_ID_to_conn(conn, call);
-	spin_unlock(&conn->channel_lock);
-	rxrpc_put_peer(cp->peer);
-	cp->peer = NULL;
-	_leave(" = %p {u=%d}", conn, atomic_read(&conn->usage));
-	return 0;
-
-	/* We found a suitable connection already in existence.  Discard any
-	 * candidate we may have allocated, and try to get a channel on this
-	 * one.
-	 */
-found_extant_conn:
-	_debug("found conn");
-	rxrpc_get_connection(conn);
-	spin_unlock(&local->client_conns_lock);
-
-	rxrpc_put_connection(candidate);
-
-	if (!atomic_add_unless(&conn->avail_chans, -1, 0)) {
-		if (!gfpflags_allow_blocking(gfp)) {
-			rxrpc_put_connection(conn);
-			_leave(" = -EAGAIN");
-			return -EAGAIN;
+		peer = rxrpc_lookup_peer_rcu(local, &srx);
+		if (!peer)
+			goto not_found;
+		conn = rxrpc_find_service_conn_rcu(peer, skb);
+		if (!conn || atomic_read(&conn->usage) == 0)
+			goto not_found;
+		_leave(" = %p", conn);
+		return conn;
+	} else {
+		/* Look up client connections by connection ID alone as their
+		 * IDs are unique for this machine.
+		 */
+		conn = idr_find(&rxrpc_client_conn_ids,
+				sp->hdr.cid >> RXRPC_CIDSHIFT);
+		if (!conn || atomic_read(&conn->usage) == 0) {
+			_debug("no conn");
+			goto not_found;
 		}
 
-		add_wait_queue(&conn->channel_wq, &myself);
-		for (;;) {
-			set_current_state(TASK_INTERRUPTIBLE);
-			if (atomic_add_unless(&conn->avail_chans, -1, 0))
-				break;
-			if (signal_pending(current))
-				goto interrupted;
-			schedule();
+		if (conn->proto.epoch != k.epoch ||
+		    conn->params.local != local)
+			goto not_found;
+
+		peer = conn->params.peer;
+		switch (srx.transport.family) {
+		case AF_INET:
+			if (peer->srx.transport.sin.sin_port !=
+			    srx.transport.sin.sin_port ||
+			    peer->srx.transport.sin.sin_addr.s_addr !=
+			    srx.transport.sin.sin_addr.s_addr)
+				goto not_found;
+			break;
+		default:
+			BUG();
 		}
-		remove_wait_queue(&conn->channel_wq, &myself);
-		__set_current_state(TASK_RUNNING);
-	}
-
-	/* The connection allegedly now has a free channel and we can now
-	 * attach the call to it.
-	 */
-	spin_lock(&conn->channel_lock);
-
-	for (chan = 0; chan < RXRPC_MAXCALLS; chan++)
-		if (!conn->channels[chan])
-			goto found_channel;
-	BUG();
-
-interrupted:
-	remove_wait_queue(&conn->channel_wq, &myself);
-	__set_current_state(TASK_RUNNING);
-	rxrpc_put_connection(conn);
-	rxrpc_put_peer(cp->peer);
-	cp->peer = NULL;
-	_leave(" = -ERESTARTSYS");
-	return -ERESTARTSYS;
-}
-
-/*
- * get a record of an incoming connection
- */
-struct rxrpc_connection *rxrpc_incoming_connection(struct rxrpc_local *local,
-						   struct rxrpc_peer *peer,
-						   struct sk_buff *skb)
-{
-	struct rxrpc_connection *conn, *candidate = NULL;
-	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-	struct rb_node *p, **pp;
-	const char *new = "old";
-	__be32 epoch;
-	u32 cid;
-
-	_enter("");
-
-	ASSERT(sp->hdr.flags & RXRPC_CLIENT_INITIATED);
-
-	epoch = sp->hdr.epoch;
-	cid = sp->hdr.cid & RXRPC_CIDMASK;
-
-	/* search the connection list first */
-	read_lock_bh(&peer->conn_lock);
-
-	p = peer->service_conns.rb_node;
-	while (p) {
-		conn = rb_entry(p, struct rxrpc_connection, service_node);
 
-		_debug("maybe %x", conn->proto.cid);
-
-		if (epoch < conn->proto.epoch)
-			p = p->rb_left;
-		else if (epoch > conn->proto.epoch)
-			p = p->rb_right;
-		else if (cid < conn->proto.cid)
-			p = p->rb_left;
-		else if (cid > conn->proto.cid)
-			p = p->rb_right;
-		else
-			goto found_extant_connection;
-	}
-	read_unlock_bh(&peer->conn_lock);
-
-	/* not yet present - create a candidate for a new record and then
-	 * redo the search */
-	candidate = rxrpc_alloc_connection(GFP_NOIO);
-	if (!candidate) {
-		_leave(" = -ENOMEM");
-		return ERR_PTR(-ENOMEM);
+		_leave(" = %p", conn);
+		return conn;
 	}
 
-	candidate->proto.local		= local;
-	candidate->proto.epoch		= sp->hdr.epoch;
-	candidate->proto.cid		= sp->hdr.cid & RXRPC_CIDMASK;
-	candidate->proto.in_clientflag	= RXRPC_CLIENT_INITIATED;
-	candidate->params.local		= local;
-	candidate->params.peer		= peer;
-	candidate->params.service_id	= sp->hdr.serviceId;
-	candidate->security_ix		= sp->hdr.securityIndex;
-	candidate->out_clientflag	= 0;
-	candidate->state		= RXRPC_CONN_SERVER;
-	if (candidate->params.service_id)
-		candidate->state	= RXRPC_CONN_SERVER_UNSECURED;
-
-	write_lock_bh(&peer->conn_lock);
-
-	pp = &peer->service_conns.rb_node;
-	p = NULL;
-	while (*pp) {
-		p = *pp;
-		conn = rb_entry(p, struct rxrpc_connection, service_node);
-
-		if (epoch < conn->proto.epoch)
-			pp = &(*pp)->rb_left;
-		else if (epoch > conn->proto.epoch)
-			pp = &(*pp)->rb_right;
-		else if (cid < conn->proto.cid)
-			pp = &(*pp)->rb_left;
-		else if (cid > conn->proto.cid)
-			pp = &(*pp)->rb_right;
-		else
-			goto found_extant_second;
-	}
-
-	/* we can now add the new candidate to the list */
-	conn = candidate;
-	candidate = NULL;
-	rb_link_node(&conn->service_node, p, pp);
-	rb_insert_color(&conn->service_node, &peer->service_conns);
-	rxrpc_get_peer(peer);
-	rxrpc_get_local(local);
-
-	write_unlock_bh(&peer->conn_lock);
-
-	write_lock(&rxrpc_connection_lock);
-	list_add_tail(&conn->link, &rxrpc_connections);
-	write_unlock(&rxrpc_connection_lock);
-
-	new = "new";
-
-success:
-	_net("CONNECTION %s %d {%x}", new, conn->debug_id, conn->proto.cid);
-
-	_leave(" = %p {u=%d}", conn, atomic_read(&conn->usage));
-	return conn;
-
-	/* we found the connection in the list immediately */
-found_extant_connection:
-	if (sp->hdr.securityIndex != conn->security_ix) {
-		read_unlock_bh(&peer->conn_lock);
-		goto security_mismatch;
-	}
-	rxrpc_get_connection(conn);
-	read_unlock_bh(&peer->conn_lock);
-	goto success;
-
-	/* we found the connection on the second time through the list */
-found_extant_second:
-	if (sp->hdr.securityIndex != conn->security_ix) {
-		write_unlock_bh(&peer->conn_lock);
-		goto security_mismatch;
-	}
-	rxrpc_get_connection(conn);
-	write_unlock_bh(&peer->conn_lock);
-	kfree(candidate);
-	goto success;
-
-security_mismatch:
-	kfree(candidate);
-	_leave(" = -EKEYREJECTED");
-	return ERR_PTR(-EKEYREJECTED);
+not_found:
+	_leave(" = NULL");
+	return NULL;
 }
 
 /*
- * find a connection based on transport and RxRPC connection ID for an incoming
- * packet
+ * Disconnect a call and clear any channel it occupies when that call
+ * terminates.  The caller must hold the channel_lock and must release the
+ * call's ref on the connection.
  */
-struct rxrpc_connection *rxrpc_find_connection(struct rxrpc_local *local,
-					       struct rxrpc_peer *peer,
-					       struct sk_buff *skb)
+void __rxrpc_disconnect_call(struct rxrpc_call *call)
 {
-	struct rxrpc_connection *conn;
-	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-	struct rb_node *p;
-	u32 epoch, cid;
-
-	_enter(",{%x,%x}", sp->hdr.cid, sp->hdr.flags);
+	struct rxrpc_connection *conn = call->conn;
+	struct rxrpc_channel *chan = &conn->channels[call->channel];
 
-	read_lock_bh(&peer->conn_lock);
+	_enter("%d,%d", conn->debug_id, call->channel);
 
-	cid	= sp->hdr.cid & RXRPC_CIDMASK;
-	epoch	= sp->hdr.epoch;
+	if (rcu_access_pointer(chan->call) == call) {
+		/* Save the result of the call so that we can repeat it if necessary
+		 * through the channel, whilst disposing of the actual call record.
+		 */
+		chan->last_result = call->local_abort;
+		smp_wmb();
+		chan->last_call = chan->call_id;
+		chan->call_id = chan->call_counter;
 
-	if (sp->hdr.flags & RXRPC_CLIENT_INITIATED) {
-		p = peer->service_conns.rb_node;
-		while (p) {
-			conn = rb_entry(p, struct rxrpc_connection, service_node);
-
-			_debug("maybe %x", conn->proto.cid);
-
-			if (epoch < conn->proto.epoch)
-				p = p->rb_left;
-			else if (epoch > conn->proto.epoch)
-				p = p->rb_right;
-			else if (cid < conn->proto.cid)
-				p = p->rb_left;
-			else if (cid > conn->proto.cid)
-				p = p->rb_right;
-			else
-				goto found;
-		}
-	} else {
-		conn = idr_find(&rxrpc_client_conn_ids, cid >> RXRPC_CIDSHIFT);
-		if (conn && conn->proto.epoch == epoch)
-			goto found;
+		rcu_assign_pointer(chan->call, NULL);
+		atomic_inc(&conn->avail_chans);
+		wake_up(&conn->channel_wq);
 	}
 
-	read_unlock_bh(&peer->conn_lock);
-	_leave(" = NULL");
-	return NULL;
-
-found:
-	rxrpc_get_connection(conn);
-	read_unlock_bh(&peer->conn_lock);
-	_leave(" = %p", conn);
-	return conn;
+	_leave("");
 }
 
 /*
@@ -531,15 +184,13 @@ found:
 void rxrpc_disconnect_call(struct rxrpc_call *call)
 {
 	struct rxrpc_connection *conn = call->conn;
-	unsigned chan = call->channel;
 
-	_enter("%d,%d", conn->debug_id, call->channel);
+	spin_lock(&conn->channel_lock);
+	__rxrpc_disconnect_call(call);
+	spin_unlock(&conn->channel_lock);
 
-	if (conn->channels[chan] == call) {
-		rcu_assign_pointer(conn->channels[chan], NULL);
-		atomic_inc(&conn->avail_chans);
-		wake_up(&conn->channel_wq);
-	}
+	call->conn = NULL;
+	rxrpc_put_connection(conn);
 }
 
 /*
@@ -553,10 +204,10 @@ void rxrpc_put_connection(struct rxrpc_connection *conn)
 	_enter("%p{u=%d,d=%d}",
 	       conn, atomic_read(&conn->usage), conn->debug_id);
 
-	ASSERTCMP(atomic_read(&conn->usage), >, 0);
+	ASSERTCMP(atomic_read(&conn->usage), >, 1);
 
 	conn->put_time = ktime_get_seconds();
-	if (atomic_dec_and_test(&conn->usage)) {
+	if (atomic_dec_return(&conn->usage) == 1) {
 		_debug("zombie");
 		rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
 	}
@@ -567,15 +218,17 @@ void rxrpc_put_connection(struct rxrpc_connection *conn)
 /*
  * destroy a virtual connection
  */
-static void rxrpc_destroy_connection(struct rxrpc_connection *conn)
+static void rxrpc_destroy_connection(struct rcu_head *rcu)
 {
-	_enter("%p{%d}", conn, atomic_read(&conn->usage));
+	struct rxrpc_connection *conn =
+		container_of(rcu, struct rxrpc_connection, rcu);
+
+	_enter("{%d,u=%d}", conn->debug_id, atomic_read(&conn->usage));
 
 	ASSERTCMP(atomic_read(&conn->usage), ==, 0);
 
 	_net("DESTROY CONN %d", conn->debug_id);
 
-	ASSERT(RB_EMPTY_ROOT(&conn->calls));
 	rxrpc_purge_queue(&conn->rx_queue);
 
 	conn->security->clear(conn);
@@ -594,59 +247,41 @@ static void rxrpc_destroy_connection(struct rxrpc_connection *conn)
 static void rxrpc_connection_reaper(struct work_struct *work)
 {
 	struct rxrpc_connection *conn, *_p;
-	struct rxrpc_peer *peer;
-	unsigned long now, earliest, reap_time;
+	unsigned long reap_older_than, earliest, put_time, now;
 
 	LIST_HEAD(graveyard);
 
 	_enter("");
 
 	now = ktime_get_seconds();
+	reap_older_than =  now - rxrpc_connection_expiry;
 	earliest = ULONG_MAX;
 
 	write_lock(&rxrpc_connection_lock);
 	list_for_each_entry_safe(conn, _p, &rxrpc_connections, link) {
-		_debug("reap CONN %d { u=%d,t=%ld }",
-		       conn->debug_id, atomic_read(&conn->usage),
-		       (long) now - (long) conn->put_time);
-
-		if (likely(atomic_read(&conn->usage) > 0))
+		ASSERTCMP(atomic_read(&conn->usage), >, 0);
+		if (likely(atomic_read(&conn->usage) > 1))
 			continue;
 
-		if (rxrpc_conn_is_client(conn)) {
-			struct rxrpc_local *local = conn->params.local;
-			spin_lock(&local->client_conns_lock);
-			reap_time = conn->put_time + rxrpc_connection_expiry;
-
-			if (atomic_read(&conn->usage) > 0) {
-				;
-			} else if (reap_time <= now) {
-				list_move_tail(&conn->link, &graveyard);
-				rxrpc_put_client_connection_id(conn);
-				rb_erase(&conn->client_node,
-					 &local->client_conns);
-			} else if (reap_time < earliest) {
-				earliest = reap_time;
-			}
-
-			spin_unlock(&local->client_conns_lock);
-		} else {
-			peer = conn->params.peer;
-			write_lock_bh(&peer->conn_lock);
-			reap_time = conn->put_time + rxrpc_connection_expiry;
-
-			if (atomic_read(&conn->usage) > 0) {
-				;
-			} else if (reap_time <= now) {
-				list_move_tail(&conn->link, &graveyard);
-				rb_erase(&conn->service_node,
-					 &peer->service_conns);
-			} else if (reap_time < earliest) {
-				earliest = reap_time;
-			}
-
-			write_unlock_bh(&peer->conn_lock);
+		put_time = READ_ONCE(conn->put_time);
+		if (time_after(put_time, reap_older_than)) {
+			if (time_before(put_time, earliest))
+				earliest = put_time;
+			continue;
 		}
+
+		/* The usage count sits at 1 whilst the object is unused on the
+		 * list; we reduce that to 0 to make the object unavailable.
+		 */
+		if (atomic_cmpxchg(&conn->usage, 1, 0) != 1)
+			continue;
+
+		if (rxrpc_conn_is_client(conn))
+			rxrpc_unpublish_client_conn(conn);
+		else
+			rxrpc_unpublish_service_conn(conn);
+
+		list_move_tail(&conn->link, &graveyard);
 	}
 	write_unlock(&rxrpc_connection_lock);
 
@@ -657,14 +292,14 @@ static void rxrpc_connection_reaper(struct work_struct *work)
 					 (earliest - now) * HZ);
 	}
 
-	/* then destroy all those pulled out */
 	while (!list_empty(&graveyard)) {
 		conn = list_entry(graveyard.next, struct rxrpc_connection,
 				  link);
 		list_del_init(&conn->link);
 
 		ASSERTCMP(atomic_read(&conn->usage), ==, 0);
-		rxrpc_destroy_connection(conn);
+		skb_queue_purge(&conn->rx_queue);
+		call_rcu(&conn->rcu, rxrpc_destroy_connection);
 	}
 
 	_leave("");
@@ -676,11 +311,30 @@ static void rxrpc_connection_reaper(struct work_struct *work)
  */
 void __exit rxrpc_destroy_all_connections(void)
 {
+	struct rxrpc_connection *conn, *_p;
+	bool leak = false;
+
 	_enter("");
 
 	rxrpc_connection_expiry = 0;
 	cancel_delayed_work(&rxrpc_connection_reap);
 	rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
+	flush_workqueue(rxrpc_workqueue);
+
+	write_lock(&rxrpc_connection_lock);
+	list_for_each_entry_safe(conn, _p, &rxrpc_connections, link) {
+		pr_err("AF_RXRPC: Leaked conn %p {%d}\n",
+		       conn, atomic_read(&conn->usage));
+		leak = true;
+	}
+	write_unlock(&rxrpc_connection_lock);
+	BUG_ON(leak);
+
+	/* Make sure the local and peer records pinned by any dying connections
+	 * are released.
+	 */
+	rcu_barrier();
+	rxrpc_destroy_client_conn_ids();
 
 	_leave("");
 }
diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c
new file mode 100644
index 000000000000..7cbd612be0d7
--- /dev/null
+++ b/net/rxrpc/conn_service.c
@@ -0,0 +1,230 @@
+/* Service connection management
+ *
+ * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/slab.h>
+#include "ar-internal.h"
+
+/*
+ * Find a service connection under RCU conditions.
+ *
+ * We could use a hash table, but that is subject to bucket stuffing by an
+ * attacker as the client gets to pick the epoch and cid values and would know
+ * the hash function.  So, instead, we use a hash table for the peer and from
+ * that an rbtree to find the service connection.  Under ordinary circumstances
+ * it might be slower than a large hash table, but it is at least limited in
+ * depth.
+ */
+struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *peer,
+						     struct sk_buff *skb)
+{
+	struct rxrpc_connection *conn = NULL;
+	struct rxrpc_conn_proto k;
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+	struct rb_node *p;
+	unsigned int seq = 0;
+
+	k.epoch	= sp->hdr.epoch;
+	k.cid	= sp->hdr.cid & RXRPC_CIDMASK;
+
+	do {
+		/* Unfortunately, rbtree walking doesn't give reliable results
+		 * under just the RCU read lock, so we have to check for
+		 * changes.
+		 */
+		read_seqbegin_or_lock(&peer->service_conn_lock, &seq);
+
+		p = rcu_dereference_raw(peer->service_conns.rb_node);
+		while (p) {
+			conn = rb_entry(p, struct rxrpc_connection, service_node);
+
+			if (conn->proto.index_key < k.index_key)
+				p = rcu_dereference_raw(p->rb_left);
+			else if (conn->proto.index_key > k.index_key)
+				p = rcu_dereference_raw(p->rb_right);
+			else
+				goto done;
+			conn = NULL;
+		}
+	} while (need_seqretry(&peer->service_conn_lock, seq));
+
+done:
+	done_seqretry(&peer->service_conn_lock, seq);
+	_leave(" = %d", conn ? conn->debug_id : -1);
+	return conn;
+}
+
+/*
+ * Insert a service connection into a peer's tree, thereby making it a target
+ * for incoming packets.
+ */
+static struct rxrpc_connection *
+rxrpc_publish_service_conn(struct rxrpc_peer *peer,
+			   struct rxrpc_connection *conn)
+{
+	struct rxrpc_connection *cursor = NULL;
+	struct rxrpc_conn_proto k = conn->proto;
+	struct rb_node **pp, *parent;
+
+	write_seqlock_bh(&peer->service_conn_lock);
+
+	pp = &peer->service_conns.rb_node;
+	parent = NULL;
+	while (*pp) {
+		parent = *pp;
+		cursor = rb_entry(parent,
+				  struct rxrpc_connection, service_node);
+
+		if (cursor->proto.index_key < k.index_key)
+			pp = &(*pp)->rb_left;
+		else if (cursor->proto.index_key > k.index_key)
+			pp = &(*pp)->rb_right;
+		else
+			goto found_extant_conn;
+	}
+
+	rb_link_node_rcu(&conn->service_node, parent, pp);
+	rb_insert_color(&conn->service_node, &peer->service_conns);
+conn_published:
+	set_bit(RXRPC_CONN_IN_SERVICE_CONNS, &conn->flags);
+	write_sequnlock_bh(&peer->service_conn_lock);
+	_leave(" = %d [new]", conn->debug_id);
+	return conn;
+
+found_extant_conn:
+	if (atomic_read(&cursor->usage) == 0)
+		goto replace_old_connection;
+	write_sequnlock_bh(&peer->service_conn_lock);
+	/* We should not be able to get here.  rxrpc_incoming_connection() is
+	 * called in a non-reentrant context, so there can't be a race to
+	 * insert a new connection.
+	 */
+	BUG();
+
+replace_old_connection:
+	/* The old connection is from an outdated epoch. */
+	_debug("replace conn");
+	rb_replace_node_rcu(&cursor->service_node,
+			    &conn->service_node,
+			    &peer->service_conns);
+	clear_bit(RXRPC_CONN_IN_SERVICE_CONNS, &cursor->flags);
+	goto conn_published;
+}
+
+/*
+ * get a record of an incoming connection
+ */
+struct rxrpc_connection *rxrpc_incoming_connection(struct rxrpc_local *local,
+						   struct sockaddr_rxrpc *srx,
+						   struct sk_buff *skb)
+{
+	struct rxrpc_connection *conn;
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+	struct rxrpc_peer *peer;
+	const char *new = "old";
+
+	_enter("");
+
+	peer = rxrpc_lookup_peer(local, srx, GFP_NOIO);
+	if (!peer) {
+		_debug("no peer");
+		return ERR_PTR(-EBUSY);
+	}
+
+	ASSERT(sp->hdr.flags & RXRPC_CLIENT_INITIATED);
+
+	rcu_read_lock();
+	peer = rxrpc_lookup_peer_rcu(local, srx);
+	if (peer) {
+		conn = rxrpc_find_service_conn_rcu(peer, skb);
+		if (conn) {
+			if (sp->hdr.securityIndex != conn->security_ix)
+				goto security_mismatch_rcu;
+			if (rxrpc_get_connection_maybe(conn))
+				goto found_extant_connection_rcu;
+
+			/* The conn has expired but we can't remove it without
+			 * the appropriate lock, so we attempt to replace it
+			 * when we have a new candidate.
+			 */
+		}
+
+		if (!rxrpc_get_peer_maybe(peer))
+			peer = NULL;
+	}
+	rcu_read_unlock();
+
+	if (!peer) {
+		peer = rxrpc_lookup_peer(local, srx, GFP_NOIO);
+		if (IS_ERR(peer))
+			goto enomem;
+	}
+
+	/* We don't have a matching record yet. */
+	conn = rxrpc_alloc_connection(GFP_NOIO);
+	if (!conn)
+		goto enomem_peer;
+
+	conn->proto.epoch	= sp->hdr.epoch;
+	conn->proto.cid		= sp->hdr.cid & RXRPC_CIDMASK;
+	conn->params.local	= local;
+	conn->params.peer	= peer;
+	conn->params.service_id	= sp->hdr.serviceId;
+	conn->security_ix	= sp->hdr.securityIndex;
+	conn->out_clientflag	= 0;
+	conn->state		= RXRPC_CONN_SERVICE;
+	if (conn->params.service_id)
+		conn->state	= RXRPC_CONN_SERVICE_UNSECURED;
+
+	rxrpc_get_local(local);
+
+	write_lock(&rxrpc_connection_lock);
+	list_add_tail(&conn->link, &rxrpc_connections);
+	write_unlock(&rxrpc_connection_lock);
+
+	/* Make the connection a target for incoming packets. */
+	rxrpc_publish_service_conn(peer, conn);
+
+	new = "new";
+
+success:
+	_net("CONNECTION %s %d {%x}", new, conn->debug_id, conn->proto.cid);
+	_leave(" = %p {u=%d}", conn, atomic_read(&conn->usage));
+	return conn;
+
+found_extant_connection_rcu:
+	rcu_read_unlock();
+	goto success;
+
+security_mismatch_rcu:
+	rcu_read_unlock();
+	_leave(" = -EKEYREJECTED");
+	return ERR_PTR(-EKEYREJECTED);
+
+enomem_peer:
+	rxrpc_put_peer(peer);
+enomem:
+	_leave(" = -ENOMEM");
+	return ERR_PTR(-ENOMEM);
+}
+
+/*
+ * Remove the service connection from the peer's tree, thereby removing it as a
+ * target for incoming packets.
+ */
+void rxrpc_unpublish_service_conn(struct rxrpc_connection *conn)
+{
+	struct rxrpc_peer *peer = conn->params.peer;
+
+	write_seqlock_bh(&peer->service_conn_lock);
+	if (test_and_clear_bit(RXRPC_CONN_IN_SERVICE_CONNS, &conn->flags))
+		rb_erase(&conn->service_node, &peer->service_conns);
+	write_sequnlock_bh(&peer->service_conn_lock);
+}
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index f4bd57b77b93..991a20d25093 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -476,7 +476,7 @@ static void rxrpc_process_jumbo_packet(struct rxrpc_call *call,
 		sp->hdr.seq	+= 1;
 		sp->hdr.serial	+= 1;
 		sp->hdr.flags	= jhdr.flags;
-		sp->hdr._rsvd	= jhdr._rsvd;
+		sp->hdr._rsvd	= ntohs(jhdr._rsvd);
 
 		_proto("Rx DATA Jumbo %%%u", sp->hdr.serial - 1);
 
@@ -575,14 +575,13 @@ done:
  * post connection-level events to the connection
  * - this includes challenges, responses and some aborts
  */
-static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
+static bool rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
 				      struct sk_buff *skb)
 {
 	_enter("%p,%p", conn, skb);
 
-	rxrpc_get_connection(conn);
 	skb_queue_tail(&conn->rx_queue, skb);
-	rxrpc_queue_conn(conn);
+	return rxrpc_queue_conn(conn);
 }
 
 /*
@@ -595,7 +594,7 @@ static void rxrpc_post_packet_to_local(struct rxrpc_local *local,
 	_enter("%p,%p", local, skb);
 
 	skb_queue_tail(&local->event_queue, skb);
-	rxrpc_queue_work(&local->processor);
+	rxrpc_queue_local(local);
 }
 
 /*
@@ -627,32 +626,6 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb)
 	return 0;
 }
 
-static struct rxrpc_connection *rxrpc_conn_from_local(struct rxrpc_local *local,
-						      struct sk_buff *skb)
-{
-	struct rxrpc_peer *peer;
-	struct rxrpc_connection *conn;
-	struct sockaddr_rxrpc srx;
-
-	rxrpc_get_addr_from_skb(local, skb, &srx);
-	rcu_read_lock();
-	peer = rxrpc_lookup_peer_rcu(local, &srx);
-	if (!peer)
-		goto cant_find_peer;
-
-	conn = rxrpc_find_connection(local, peer, skb);
-	rcu_read_unlock();
-	if (!conn)
-		goto cant_find_conn;
-
-	return conn;
-
-cant_find_peer:
-	rcu_read_unlock();
-cant_find_conn:
-	return NULL;
-}
-
 /*
  * handle data received on the local endpoint
  * - may be called in interrupt context
@@ -663,6 +636,7 @@ cant_find_conn:
  */
 void rxrpc_data_ready(struct sock *sk)
 {
+	struct rxrpc_connection *conn;
 	struct rxrpc_skb_priv *sp;
 	struct rxrpc_local *local = sk->sk_user_data;
 	struct sk_buff *skb;
@@ -726,34 +700,37 @@ void rxrpc_data_ready(struct sock *sk)
 	    (sp->hdr.callNumber == 0 || sp->hdr.seq == 0))
 		goto bad_message;
 
-	if (sp->hdr.callNumber == 0) {
-		/* This is a connection-level packet. These should be
-		 * fairly rare, so the extra overhead of looking them up the
-		 * old-fashioned way doesn't really hurt */
-		struct rxrpc_connection *conn;
+	rcu_read_lock();
 
-		conn = rxrpc_conn_from_local(local, skb);
-		if (!conn)
-			goto cant_route_call;
+retry_find_conn:
+	conn = rxrpc_find_connection_rcu(local, skb);
+	if (!conn)
+		goto cant_route_call;
 
+	if (sp->hdr.callNumber == 0) {
+		/* Connection-level packet */
 		_debug("CONN %p {%d}", conn, conn->debug_id);
-		rxrpc_post_packet_to_conn(conn, skb);
-		rxrpc_put_connection(conn);
+		if (!rxrpc_post_packet_to_conn(conn, skb))
+			goto retry_find_conn;
 	} else {
-		struct rxrpc_call *call;
+		/* Call-bound packets are routed by connection channel. */
+		unsigned int channel = sp->hdr.cid & RXRPC_CHANNELMASK;
+		struct rxrpc_channel *chan = &conn->channels[channel];
+		struct rxrpc_call *call = rcu_dereference(chan->call);
 
-		call = rxrpc_find_call_hash(&sp->hdr, local,
-					    AF_INET, &ip_hdr(skb)->saddr);
-		if (call)
-			rxrpc_post_packet_to_call(call, skb);
-		else
+		if (!call || atomic_read(&call->usage) == 0)
 			goto cant_route_call;
+
+		rxrpc_post_packet_to_call(call, skb);
 	}
 
+	rcu_read_unlock();
 out:
 	return;
 
 cant_route_call:
+	rcu_read_unlock();
+
 	_debug("can't route call");
 	if (sp->hdr.flags & RXRPC_CLIENT_INITIATED &&
 	    sp->hdr.type == RXRPC_PACKET_TYPE_DATA) {
diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c
index e571403613c1..c21ad213b337 100644
--- a/net/rxrpc/insecure.c
+++ b/net/rxrpc/insecure.c
@@ -17,11 +17,12 @@ static int none_init_connection_security(struct rxrpc_connection *conn)
 	return 0;
 }
 
-static void none_prime_packet_security(struct rxrpc_connection *conn)
+static int none_prime_packet_security(struct rxrpc_connection *conn)
 {
+	return 0;
 }
 
-static int none_secure_packet(const struct rxrpc_call *call,
+static int none_secure_packet(struct rxrpc_call *call,
 			       struct sk_buff *skb,
 			       size_t data_size,
 			       void *sechdr)
@@ -29,7 +30,7 @@ static int none_secure_packet(const struct rxrpc_call *call,
 	return 0;
 }
 
-static int none_verify_packet(const struct rxrpc_call *call,
+static int none_verify_packet(struct rxrpc_call *call,
 			       struct sk_buff *skb,
 			       u32 *_abort_code)
 {
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 3ab7764f7cd8..a753796fbe8f 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -374,14 +374,17 @@ void __exit rxrpc_destroy_all_locals(void)
 
 	_enter("");
 
-	if (list_empty(&rxrpc_local_endpoints))
-		return;
+	flush_workqueue(rxrpc_workqueue);
 
-	mutex_lock(&rxrpc_local_mutex);
-	list_for_each_entry(local, &rxrpc_local_endpoints, link) {
-		pr_err("AF_RXRPC: Leaked local %p {%d}\n",
-		       local, atomic_read(&local->usage));
+	if (!list_empty(&rxrpc_local_endpoints)) {
+		mutex_lock(&rxrpc_local_mutex);
+		list_for_each_entry(local, &rxrpc_local_endpoints, link) {
+			pr_err("AF_RXRPC: Leaked local %p {%d}\n",
+			       local, atomic_read(&local->usage));
+		}
+		mutex_unlock(&rxrpc_local_mutex);
+		BUG();
 	}
-	mutex_unlock(&rxrpc_local_mutex);
-	BUG();
+
+	rcu_barrier();
 }
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 01d4930a11f7..538e9831c699 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -189,7 +189,7 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp)
 		INIT_WORK(&peer->error_distributor,
 			  &rxrpc_peer_error_distributor);
 		peer->service_conns = RB_ROOT;
-		rwlock_init(&peer->conn_lock);
+		seqlock_init(&peer->service_conn_lock);
 		spin_lock_init(&peer->lock);
 		peer->debug_id = atomic_inc_return(&rxrpc_debug_id);
 	}
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index 500cdcdc843c..ced5f07444e5 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -14,15 +14,15 @@
 #include <net/af_rxrpc.h>
 #include "ar-internal.h"
 
-static const char *const rxrpc_conn_states[] = {
-	[RXRPC_CONN_UNUSED]		= "Unused  ",
-	[RXRPC_CONN_CLIENT]		= "Client  ",
-	[RXRPC_CONN_SERVER_UNSECURED]	= "SvUnsec ",
-	[RXRPC_CONN_SERVER_CHALLENGING]	= "SvChall ",
-	[RXRPC_CONN_SERVER]		= "SvSecure",
-	[RXRPC_CONN_REMOTELY_ABORTED]	= "RmtAbort",
-	[RXRPC_CONN_LOCALLY_ABORTED]	= "LocAbort",
-	[RXRPC_CONN_NETWORK_ERROR]	= "NetError",
+static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = {
+	[RXRPC_CONN_UNUSED]			= "Unused  ",
+	[RXRPC_CONN_CLIENT]			= "Client  ",
+	[RXRPC_CONN_SERVICE_UNSECURED]		= "SvUnsec ",
+	[RXRPC_CONN_SERVICE_CHALLENGING]	= "SvChall ",
+	[RXRPC_CONN_SERVICE]			= "SvSecure",
+	[RXRPC_CONN_REMOTELY_ABORTED]		= "RmtAbort",
+	[RXRPC_CONN_LOCALLY_ABORTED]		= "LocAbort",
+	[RXRPC_CONN_NETWORK_ERROR]		= "NetError",
 };
 
 /*
@@ -137,7 +137,7 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
 	if (v == &rxrpc_connections) {
 		seq_puts(seq,
 			 "Proto Local                  Remote                "
-			 " SvID ConnID   Calls    End Use State    Key     "
+			 " SvID ConnID   End Use State    Key     "
 			 " Serial   ISerial\n"
 			 );
 		return 0;
@@ -154,13 +154,12 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
 		ntohs(conn->params.peer->srx.transport.sin.sin_port));
 
 	seq_printf(seq,
-		   "UDP   %-22.22s %-22.22s %4x %08x %08x %s %3u"
+		   "UDP   %-22.22s %-22.22s %4x %08x %s %3u"
 		   " %s %08x %08x %08x\n",
 		   lbuff,
 		   rbuff,
 		   conn->params.service_id,
 		   conn->proto.cid,
-		   conn->call_counter,
 		   rxrpc_conn_is_service(conn) ? "Svc" : "Clt",
 		   atomic_read(&conn->usage),
 		   rxrpc_conn_states[conn->state],
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index 23c05ec6fa28..63afa9e9cc08 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -103,43 +103,43 @@ error:
  * prime the encryption state with the invariant parts of a connection's
  * description
  */
-static void rxkad_prime_packet_security(struct rxrpc_connection *conn)
+static int rxkad_prime_packet_security(struct rxrpc_connection *conn)
 {
 	struct rxrpc_key_token *token;
 	SKCIPHER_REQUEST_ON_STACK(req, conn->cipher);
-	struct scatterlist sg[2];
+	struct scatterlist sg;
 	struct rxrpc_crypt iv;
-	struct {
-		__be32 x[4];
-	} tmpbuf __attribute__((aligned(16))); /* must all be in same page */
+	__be32 *tmpbuf;
+	size_t tmpsize = 4 * sizeof(__be32);
 
 	_enter("");
 
 	if (!conn->params.key)
-		return;
+		return 0;
+
+	tmpbuf = kmalloc(tmpsize, GFP_KERNEL);
+	if (!tmpbuf)
+		return -ENOMEM;
 
 	token = conn->params.key->payload.data[0];
 	memcpy(&iv, token->kad->session_key, sizeof(iv));
 
-	tmpbuf.x[0] = htonl(conn->proto.epoch);
-	tmpbuf.x[1] = htonl(conn->proto.cid);
-	tmpbuf.x[2] = 0;
-	tmpbuf.x[3] = htonl(conn->security_ix);
-
-	sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf));
-	sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf));
+	tmpbuf[0] = htonl(conn->proto.epoch);
+	tmpbuf[1] = htonl(conn->proto.cid);
+	tmpbuf[2] = 0;
+	tmpbuf[3] = htonl(conn->security_ix);
 
+	sg_init_one(&sg, tmpbuf, tmpsize);
 	skcipher_request_set_tfm(req, conn->cipher);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
-	skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(tmpbuf), iv.x);
-
+	skcipher_request_set_crypt(req, &sg, &sg, tmpsize, iv.x);
 	crypto_skcipher_encrypt(req);
 	skcipher_request_zero(req);
 
-	memcpy(&conn->csum_iv, &tmpbuf.x[2], sizeof(conn->csum_iv));
-	ASSERTCMP((u32 __force)conn->csum_iv.n[0], ==, (u32 __force)tmpbuf.x[2]);
-
-	_leave("");
+	memcpy(&conn->csum_iv, tmpbuf + 2, sizeof(conn->csum_iv));
+	kfree(tmpbuf);
+	_leave(" = 0");
+	return 0;
 }
 
 /*
@@ -152,12 +152,9 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
 {
 	struct rxrpc_skb_priv *sp;
 	SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
+	struct rxkad_level1_hdr hdr;
 	struct rxrpc_crypt iv;
-	struct scatterlist sg[2];
-	struct {
-		struct rxkad_level1_hdr hdr;
-		__be32	first;	/* first four bytes of data and padding */
-	} tmpbuf __attribute__((aligned(8))); /* must all be in same page */
+	struct scatterlist sg;
 	u16 check;
 
 	sp = rxrpc_skb(skb);
@@ -167,24 +164,19 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
 	check = sp->hdr.seq ^ sp->hdr.callNumber;
 	data_size |= (u32)check << 16;
 
-	tmpbuf.hdr.data_size = htonl(data_size);
-	memcpy(&tmpbuf.first, sechdr + 4, sizeof(tmpbuf.first));
+	hdr.data_size = htonl(data_size);
+	memcpy(sechdr, &hdr, sizeof(hdr));
 
 	/* start the encryption afresh */
 	memset(&iv, 0, sizeof(iv));
 
-	sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf));
-	sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf));
-
+	sg_init_one(&sg, sechdr, 8);
 	skcipher_request_set_tfm(req, call->conn->cipher);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
-	skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(tmpbuf), iv.x);
-
+	skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x);
 	crypto_skcipher_encrypt(req);
 	skcipher_request_zero(req);
 
-	memcpy(sechdr, &tmpbuf, sizeof(tmpbuf));
-
 	_leave(" = 0");
 	return 0;
 }
@@ -198,8 +190,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
 				       void *sechdr)
 {
 	const struct rxrpc_key_token *token;
-	struct rxkad_level2_hdr rxkhdr
-		__attribute__((aligned(8))); /* must be all on one page */
+	struct rxkad_level2_hdr rxkhdr;
 	struct rxrpc_skb_priv *sp;
 	SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
 	struct rxrpc_crypt iv;
@@ -218,18 +209,16 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
 
 	rxkhdr.data_size = htonl(data_size | (u32)check << 16);
 	rxkhdr.checksum = 0;
+	memcpy(sechdr, &rxkhdr, sizeof(rxkhdr));
 
 	/* encrypt from the session key */
 	token = call->conn->params.key->payload.data[0];
 	memcpy(&iv, token->kad->session_key, sizeof(iv));
 
 	sg_init_one(&sg[0], sechdr, sizeof(rxkhdr));
-	sg_init_one(&sg[1], &rxkhdr, sizeof(rxkhdr));
-
 	skcipher_request_set_tfm(req, call->conn->cipher);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
-	skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(rxkhdr), iv.x);
-
+	skcipher_request_set_crypt(req, &sg[0], &sg[0], sizeof(rxkhdr), iv.x);
 	crypto_skcipher_encrypt(req);
 
 	/* we want to encrypt the skbuff in-place */
@@ -243,9 +232,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
 
 	sg_init_table(sg, nsg);
 	skb_to_sgvec(skb, sg, 0, len);
-
 	skcipher_request_set_crypt(req, sg, sg, len, iv.x);
-
 	crypto_skcipher_encrypt(req);
 
 	_leave(" = 0");
@@ -259,7 +246,7 @@ out:
 /*
  * checksum an RxRPC packet header
  */
-static int rxkad_secure_packet(const struct rxrpc_call *call,
+static int rxkad_secure_packet(struct rxrpc_call *call,
 			       struct sk_buff *skb,
 			       size_t data_size,
 			       void *sechdr)
@@ -267,10 +254,7 @@ static int rxkad_secure_packet(const struct rxrpc_call *call,
 	struct rxrpc_skb_priv *sp;
 	SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
 	struct rxrpc_crypt iv;
-	struct scatterlist sg[2];
-	struct {
-		__be32 x[2];
-	} tmpbuf __attribute__((aligned(8))); /* must all be in same page */
+	struct scatterlist sg;
 	u32 x, y;
 	int ret;
 
@@ -293,20 +277,17 @@ static int rxkad_secure_packet(const struct rxrpc_call *call,
 	/* calculate the security checksum */
 	x = call->channel << (32 - RXRPC_CIDSHIFT);
 	x |= sp->hdr.seq & 0x3fffffff;
-	tmpbuf.x[0] = htonl(sp->hdr.callNumber);
-	tmpbuf.x[1] = htonl(x);
-
-	sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf));
-	sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf));
+	call->crypto_buf[0] = htonl(sp->hdr.callNumber);
+	call->crypto_buf[1] = htonl(x);
 
+	sg_init_one(&sg, call->crypto_buf, 8);
 	skcipher_request_set_tfm(req, call->conn->cipher);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
-	skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(tmpbuf), iv.x);
-
+	skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x);
 	crypto_skcipher_encrypt(req);
 	skcipher_request_zero(req);
 
-	y = ntohl(tmpbuf.x[1]);
+	y = ntohl(call->crypto_buf[1]);
 	y = (y >> 16) & 0xffff;
 	if (y == 0)
 		y = 1; /* zero checksums are not permitted */
@@ -367,7 +348,6 @@ static int rxkad_verify_packet_auth(const struct rxrpc_call *call,
 	skcipher_request_set_tfm(req, call->conn->cipher);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, sg, sg, 8, iv.x);
-
 	crypto_skcipher_decrypt(req);
 	skcipher_request_zero(req);
 
@@ -452,7 +432,6 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call,
 	skcipher_request_set_tfm(req, call->conn->cipher);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, sg, sg, skb->len, iv.x);
-
 	crypto_skcipher_decrypt(req);
 	skcipher_request_zero(req);
 	if (sg != _sg)
@@ -498,17 +477,14 @@ nomem:
 /*
  * verify the security on a received packet
  */
-static int rxkad_verify_packet(const struct rxrpc_call *call,
+static int rxkad_verify_packet(struct rxrpc_call *call,
 			       struct sk_buff *skb,
 			       u32 *_abort_code)
 {
 	SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
 	struct rxrpc_skb_priv *sp;
 	struct rxrpc_crypt iv;
-	struct scatterlist sg[2];
-	struct {
-		__be32 x[2];
-	} tmpbuf __attribute__((aligned(8))); /* must all be in same page */
+	struct scatterlist sg;
 	u16 cksum;
 	u32 x, y;
 	int ret;
@@ -533,20 +509,17 @@ static int rxkad_verify_packet(const struct rxrpc_call *call,
 	/* validate the security checksum */
 	x = call->channel << (32 - RXRPC_CIDSHIFT);
 	x |= sp->hdr.seq & 0x3fffffff;
-	tmpbuf.x[0] = htonl(call->call_id);
-	tmpbuf.x[1] = htonl(x);
-
-	sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf));
-	sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf));
+	call->crypto_buf[0] = htonl(call->call_id);
+	call->crypto_buf[1] = htonl(x);
 
+	sg_init_one(&sg, call->crypto_buf, 8);
 	skcipher_request_set_tfm(req, call->conn->cipher);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
-	skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(tmpbuf), iv.x);
-
+	skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x);
 	crypto_skcipher_encrypt(req);
 	skcipher_request_zero(req);
 
-	y = ntohl(tmpbuf.x[1]);
+	y = ntohl(call->crypto_buf[1]);
 	cksum = (y >> 16) & 0xffff;
 	if (cksum == 0)
 		cksum = 1; /* zero checksums are not permitted */
@@ -710,29 +683,6 @@ static void rxkad_calc_response_checksum(struct rxkad_response *response)
 }
 
 /*
- * load a scatterlist with a potentially split-page buffer
- */
-static void rxkad_sg_set_buf2(struct scatterlist sg[2],
-			      void *buf, size_t buflen)
-{
-	int nsg = 1;
-
-	sg_init_table(sg, 2);
-
-	sg_set_buf(&sg[0], buf, buflen);
-	if (sg[0].offset + buflen > PAGE_SIZE) {
-		/* the buffer was split over two pages */
-		sg[0].length = PAGE_SIZE - sg[0].offset;
-		sg_set_buf(&sg[1], buf + sg[0].length, buflen - sg[0].length);
-		nsg++;
-	}
-
-	sg_mark_end(&sg[nsg - 1]);
-
-	ASSERTCMP(sg[0].length + sg[1].length, ==, buflen);
-}
-
-/*
  * encrypt the response packet
  */
 static void rxkad_encrypt_response(struct rxrpc_connection *conn,
@@ -741,17 +691,16 @@ static void rxkad_encrypt_response(struct rxrpc_connection *conn,
 {
 	SKCIPHER_REQUEST_ON_STACK(req, conn->cipher);
 	struct rxrpc_crypt iv;
-	struct scatterlist sg[2];
+	struct scatterlist sg[1];
 
 	/* continue encrypting from where we left off */
 	memcpy(&iv, s2->session_key, sizeof(iv));
 
-	rxkad_sg_set_buf2(sg, &resp->encrypted, sizeof(resp->encrypted));
-
+	sg_init_table(sg, 1);
+	sg_set_buf(sg, &resp->encrypted, sizeof(resp->encrypted));
 	skcipher_request_set_tfm(req, conn->cipher);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, sg, sg, sizeof(resp->encrypted), iv.x);
-
 	crypto_skcipher_encrypt(req);
 	skcipher_request_zero(req);
 }
@@ -818,14 +767,10 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
 	resp.kvno			= htonl(token->kad->kvno);
 	resp.ticket_len			= htonl(token->kad->ticket_len);
 
-	resp.encrypted.call_id[0] =
-		htonl(conn->channels[0] ? conn->channels[0]->call_id : 0);
-	resp.encrypted.call_id[1] =
-		htonl(conn->channels[1] ? conn->channels[1]->call_id : 0);
-	resp.encrypted.call_id[2] =
-		htonl(conn->channels[2] ? conn->channels[2]->call_id : 0);
-	resp.encrypted.call_id[3] =
-		htonl(conn->channels[3] ? conn->channels[3]->call_id : 0);
+	resp.encrypted.call_id[0] = htonl(conn->channels[0].call_counter);
+	resp.encrypted.call_id[1] = htonl(conn->channels[1].call_counter);
+	resp.encrypted.call_id[2] = htonl(conn->channels[2].call_counter);
+	resp.encrypted.call_id[3] = htonl(conn->channels[3].call_counter);
 
 	/* calculate the response checksum and then do the encryption */
 	rxkad_calc_response_checksum(&resp);
@@ -887,10 +832,8 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
 	}
 
 	sg_init_one(&sg[0], ticket, ticket_len);
-
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, sg, sg, ticket_len, iv.x);
-
 	crypto_skcipher_decrypt(req);
 	skcipher_request_free(req);
 
@@ -1001,7 +944,7 @@ static void rxkad_decrypt_response(struct rxrpc_connection *conn,
 				   const struct rxrpc_crypt *session_key)
 {
 	SKCIPHER_REQUEST_ON_STACK(req, rxkad_ci);
-	struct scatterlist sg[2];
+	struct scatterlist sg[1];
 	struct rxrpc_crypt iv;
 
 	_enter(",,%08x%08x",
@@ -1016,12 +959,11 @@ static void rxkad_decrypt_response(struct rxrpc_connection *conn,
 
 	memcpy(&iv, session_key, sizeof(iv));
 
-	rxkad_sg_set_buf2(sg, &resp->encrypted, sizeof(resp->encrypted));
-
+	sg_init_table(sg, 1);
+	sg_set_buf(sg, &resp->encrypted, sizeof(resp->encrypted));
 	skcipher_request_set_tfm(req, rxkad_ci);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, sg, sg, sizeof(resp->encrypted), iv.x);
-
 	crypto_skcipher_decrypt(req);
 	skcipher_request_zero(req);
 
@@ -1045,7 +987,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
 	void *ticket;
 	u32 abort_code, version, kvno, ticket_len, level;
 	__be32 csum;
-	int ret;
+	int ret, i;
 
 	_enter("{%d,%x}", conn->debug_id, key_serial(conn->server_key));
 
@@ -1108,11 +1050,26 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
 	if (response.encrypted.checksum != csum)
 		goto protocol_error_free;
 
-	if (ntohl(response.encrypted.call_id[0]) > INT_MAX ||
-	    ntohl(response.encrypted.call_id[1]) > INT_MAX ||
-	    ntohl(response.encrypted.call_id[2]) > INT_MAX ||
-	    ntohl(response.encrypted.call_id[3]) > INT_MAX)
-		goto protocol_error_free;
+	spin_lock(&conn->channel_lock);
+	for (i = 0; i < RXRPC_MAXCALLS; i++) {
+		struct rxrpc_call *call;
+		u32 call_id = ntohl(response.encrypted.call_id[i]);
+
+		if (call_id > INT_MAX)
+			goto protocol_error_unlock;
+
+		if (call_id < conn->channels[i].call_counter)
+			goto protocol_error_unlock;
+		if (call_id > conn->channels[i].call_counter) {
+			call = rcu_dereference_protected(
+				conn->channels[i].call,
+				lockdep_is_held(&conn->channel_lock));
+			if (call && call->state < RXRPC_CALL_COMPLETE)
+				goto protocol_error_unlock;
+			conn->channels[i].call_counter = call_id;
+		}
+	}
+	spin_unlock(&conn->channel_lock);
 
 	abort_code = RXKADOUTOFSEQUENCE;
 	if (ntohl(response.encrypted.inc_nonce) != conn->security_nonce + 1)
@@ -1137,6 +1094,8 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
 	_leave(" = 0");
 	return 0;
 
+protocol_error_unlock:
+	spin_unlock(&conn->channel_lock);
 protocol_error_free:
 	kfree(ticket);
 protocol_error:
diff --git a/net/rxrpc/utils.c b/net/rxrpc/utils.c
index f28122a15a24..b88914d53ca5 100644
--- a/net/rxrpc/utils.c
+++ b/net/rxrpc/utils.c
@@ -10,32 +10,37 @@
  */
 
 #include <linux/ip.h>
+#include <linux/ipv6.h>
 #include <linux/udp.h>
 #include "ar-internal.h"
 
 /*
- * Set up an RxRPC address from a socket buffer.
+ * Fill out a peer address from a socket buffer containing a packet.
  */
-void rxrpc_get_addr_from_skb(struct rxrpc_local *local,
-			     const struct sk_buff *skb,
-			     struct sockaddr_rxrpc *srx)
+int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *srx, struct sk_buff *skb)
 {
 	memset(srx, 0, sizeof(*srx));
-	srx->transport_type = local->srx.transport_type;
-	srx->transport.family = local->srx.transport.family;
 
-	/* Can we see an ipv4 UDP packet on an ipv6 UDP socket?  and vice
-	 * versa?
-	 */
-	switch (srx->transport.family) {
-	case AF_INET:
+	switch (ntohs(skb->protocol)) {
+	case ETH_P_IP:
+		srx->transport_type = SOCK_DGRAM;
+		srx->transport_len = sizeof(srx->transport.sin);
+		srx->transport.sin.sin_family = AF_INET;
 		srx->transport.sin.sin_port = udp_hdr(skb)->source;
-		srx->transport_len = sizeof(struct sockaddr_in);
-		memcpy(&srx->transport.sin.sin_addr, &ip_hdr(skb)->saddr,
-		       sizeof(struct in_addr));
-		break;
+		srx->transport.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
+		return 0;
+
+	case ETH_P_IPV6:
+		srx->transport_type = SOCK_DGRAM;
+		srx->transport_len = sizeof(srx->transport.sin6);
+		srx->transport.sin6.sin6_family = AF_INET6;
+		srx->transport.sin6.sin6_port = udp_hdr(skb)->source;
+		srx->transport.sin6.sin6_addr = ipv6_hdr(skb)->saddr;
+		return 0;
 
 	default:
-		BUG();
+		pr_warn_ratelimited("AF_RXRPC: Unknown eth protocol %u\n",
+				    ntohs(skb->protocol));
+		return -EAFNOSUPPORT;
 	}
 }
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index f8c61d2a7963..47ec2305f920 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -1122,7 +1122,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 		nla_nest_end(skb, nest);
 		ret = skb->len;
 	} else
-		nla_nest_cancel(skb, nest);
+		nlmsg_trim(skb, b);
 
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	if (NETLINK_CB(cb->skb).portid && ret)
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index f7b6cf49ea6f..ef74bffa6101 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -223,15 +223,10 @@ static int tcf_bpf_init_from_efd(struct nlattr **tb, struct tcf_bpf_cfg *cfg)
 
 	bpf_fd = nla_get_u32(tb[TCA_ACT_BPF_FD]);
 
-	fp = bpf_prog_get(bpf_fd);
+	fp = bpf_prog_get_type(bpf_fd, BPF_PROG_TYPE_SCHED_ACT);
 	if (IS_ERR(fp))
 		return PTR_ERR(fp);
 
-	if (fp->type != BPF_PROG_TYPE_SCHED_ACT) {
-		bpf_prog_put(fp);
-		return -EINVAL;
-	}
-
 	if (tb[TCA_ACT_BPF_NAME]) {
 		name = kmemdup(nla_data(tb[TCA_ACT_BPF_NAME]),
 			       nla_len(tb[TCA_ACT_BPF_NAME]),
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index b7fa96926c90..845ab5119c05 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -106,9 +106,9 @@ int ife_get_meta_u16(struct sk_buff *skb, struct tcf_meta_info *mi)
 }
 EXPORT_SYMBOL_GPL(ife_get_meta_u16);
 
-int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval)
+int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval, gfp_t gfp)
 {
-	mi->metaval = kmemdup(metaval, sizeof(u32), GFP_KERNEL);
+	mi->metaval = kmemdup(metaval, sizeof(u32), gfp);
 	if (!mi->metaval)
 		return -ENOMEM;
 
@@ -116,9 +116,9 @@ int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval)
 }
 EXPORT_SYMBOL_GPL(ife_alloc_meta_u32);
 
-int ife_alloc_meta_u16(struct tcf_meta_info *mi, void *metaval)
+int ife_alloc_meta_u16(struct tcf_meta_info *mi, void *metaval, gfp_t gfp)
 {
-	mi->metaval = kmemdup(metaval, sizeof(u16), GFP_KERNEL);
+	mi->metaval = kmemdup(metaval, sizeof(u16), gfp);
 	if (!mi->metaval)
 		return -ENOMEM;
 
@@ -240,10 +240,10 @@ static int ife_validate_metatype(struct tcf_meta_ops *ops, void *val, int len)
 }
 
 /* called when adding new meta information
- * under ife->tcf_lock
+ * under ife->tcf_lock for existing action
 */
 static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid,
-				void *val, int len)
+				void *val, int len, bool exists)
 {
 	struct tcf_meta_ops *ops = find_ife_oplist(metaid);
 	int ret = 0;
@@ -251,11 +251,13 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid,
 	if (!ops) {
 		ret = -ENOENT;
 #ifdef CONFIG_MODULES
-		spin_unlock_bh(&ife->tcf_lock);
+		if (exists)
+			spin_unlock_bh(&ife->tcf_lock);
 		rtnl_unlock();
 		request_module("ifemeta%u", metaid);
 		rtnl_lock();
-		spin_lock_bh(&ife->tcf_lock);
+		if (exists)
+			spin_lock_bh(&ife->tcf_lock);
 		ops = find_ife_oplist(metaid);
 #endif
 	}
@@ -272,10 +274,10 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid,
 }
 
 /* called when adding new meta information
- * under ife->tcf_lock
+ * under ife->tcf_lock for existing action
 */
 static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval,
-			int len)
+			int len, bool atomic)
 {
 	struct tcf_meta_info *mi = NULL;
 	struct tcf_meta_ops *ops = find_ife_oplist(metaid);
@@ -284,7 +286,7 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval,
 	if (!ops)
 		return -ENOENT;
 
-	mi = kzalloc(sizeof(*mi), GFP_KERNEL);
+	mi = kzalloc(sizeof(*mi), atomic ? GFP_ATOMIC : GFP_KERNEL);
 	if (!mi) {
 		/*put back what find_ife_oplist took */
 		module_put(ops->owner);
@@ -294,7 +296,7 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval,
 	mi->metaid = metaid;
 	mi->ops = ops;
 	if (len > 0) {
-		ret = ops->alloc(mi, metaval);
+		ret = ops->alloc(mi, metaval, atomic ? GFP_ATOMIC : GFP_KERNEL);
 		if (ret != 0) {
 			kfree(mi);
 			module_put(ops->owner);
@@ -313,11 +315,13 @@ static int use_all_metadata(struct tcf_ife_info *ife)
 	int rc = 0;
 	int installed = 0;
 
+	read_lock(&ife_mod_lock);
 	list_for_each_entry(o, &ifeoplist, list) {
-		rc = add_metainfo(ife, o->metaid, NULL, 0);
+		rc = add_metainfo(ife, o->metaid, NULL, 0, true);
 		if (rc == 0)
 			installed += 1;
 	}
+	read_unlock(&ife_mod_lock);
 
 	if (installed)
 		return 0;
@@ -385,8 +389,9 @@ static void tcf_ife_cleanup(struct tc_action *a, int bind)
 	spin_unlock_bh(&ife->tcf_lock);
 }
 
-/* under ife->tcf_lock */
-static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb)
+/* under ife->tcf_lock for existing action */
+static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb,
+			     bool exists)
 {
 	int len = 0;
 	int rc = 0;
@@ -398,11 +403,11 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb)
 			val = nla_data(tb[i]);
 			len = nla_len(tb[i]);
 
-			rc = load_metaops_and_vet(ife, i, val, len);
+			rc = load_metaops_and_vet(ife, i, val, len, exists);
 			if (rc != 0)
 				return rc;
 
-			rc = add_metainfo(ife, i, val, len);
+			rc = add_metainfo(ife, i, val, len, exists);
 			if (rc)
 				return rc;
 		}
@@ -475,7 +480,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
 			saddr = nla_data(tb[TCA_IFE_SMAC]);
 	}
 
-	spin_lock_bh(&ife->tcf_lock);
+	if (exists)
+		spin_lock_bh(&ife->tcf_lock);
 	ife->tcf_action = parm->action;
 
 	if (parm->flags & IFE_ENCODE) {
@@ -505,11 +511,12 @@ metadata_parse_err:
 			if (ret == ACT_P_CREATED)
 				_tcf_ife_cleanup(a, bind);
 
-			spin_unlock_bh(&ife->tcf_lock);
+			if (exists)
+				spin_unlock_bh(&ife->tcf_lock);
 			return err;
 		}
 
-		err = populate_metalist(ife, tb2);
+		err = populate_metalist(ife, tb2, exists);
 		if (err)
 			goto metadata_parse_err;
 
@@ -524,12 +531,14 @@ metadata_parse_err:
 			if (ret == ACT_P_CREATED)
 				_tcf_ife_cleanup(a, bind);
 
-			spin_unlock_bh(&ife->tcf_lock);
+			if (exists)
+				spin_unlock_bh(&ife->tcf_lock);
 			return err;
 		}
 	}
 
-	spin_unlock_bh(&ife->tcf_lock);
+	if (exists)
+		spin_unlock_bh(&ife->tcf_lock);
 
 	if (ret == ACT_P_CREATED)
 		tcf_hash_insert(tn, a);
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 6148e323ed93..b8c50600697a 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -123,10 +123,13 @@ static int __tcf_ipt_init(struct tc_action_net *tn, struct nlattr *nla,
 	}
 
 	td = (struct xt_entry_target *)nla_data(tb[TCA_IPT_TARG]);
-	if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size)
+	if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size) {
+		if (exists)
+			tcf_hash_release(a, bind);
 		return -EINVAL;
+	}
 
-	if (!tcf_hash_check(tn, index, a, bind)) {
+	if (!exists) {
 		ret = tcf_hash_create(tn, index, est, a, sizeof(*ipt), bind,
 				      false);
 		if (ret)
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 5b135d357e1e..70cfbbf96af2 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -181,7 +181,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
 
 	if (!(at & AT_EGRESS)) {
 		if (m->tcfm_ok_push)
-			skb_push(skb2, skb->mac_len);
+			skb_push_rcsum(skb2, skb->mac_len);
 	}
 
 	/* mirror is always swallowed */
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 53d1486cddf7..8e573c0f8742 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -47,6 +47,8 @@ static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
 		skb_set_queue_mapping(skb, d->queue_mapping);
 	if (d->flags & SKBEDIT_F_MARK)
 		skb->mark = d->mark;
+	if (d->flags & SKBEDIT_F_PTYPE)
+		skb->pkt_type = d->ptype;
 
 	spin_unlock(&d->tcf_lock);
 	return d->tcf_action;
@@ -57,6 +59,7 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
 	[TCA_SKBEDIT_PRIORITY]		= { .len = sizeof(u32) },
 	[TCA_SKBEDIT_QUEUE_MAPPING]	= { .len = sizeof(u16) },
 	[TCA_SKBEDIT_MARK]		= { .len = sizeof(u32) },
+	[TCA_SKBEDIT_PTYPE]		= { .len = sizeof(u16) },
 };
 
 static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
@@ -68,7 +71,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 	struct tc_skbedit *parm;
 	struct tcf_skbedit *d;
 	u32 flags = 0, *priority = NULL, *mark = NULL;
-	u16 *queue_mapping = NULL;
+	u16 *queue_mapping = NULL, *ptype = NULL;
 	bool exists = false;
 	int ret = 0, err;
 
@@ -92,6 +95,13 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 		queue_mapping = nla_data(tb[TCA_SKBEDIT_QUEUE_MAPPING]);
 	}
 
+	if (tb[TCA_SKBEDIT_PTYPE] != NULL) {
+		ptype = nla_data(tb[TCA_SKBEDIT_PTYPE]);
+		if (!skb_pkt_type_ok(*ptype))
+			return -EINVAL;
+		flags |= SKBEDIT_F_PTYPE;
+	}
+
 	if (tb[TCA_SKBEDIT_MARK] != NULL) {
 		flags |= SKBEDIT_F_MARK;
 		mark = nla_data(tb[TCA_SKBEDIT_MARK]);
@@ -132,6 +142,8 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 		d->queue_mapping = *queue_mapping;
 	if (flags & SKBEDIT_F_MARK)
 		d->mark = *mark;
+	if (flags & SKBEDIT_F_PTYPE)
+		d->ptype = *ptype;
 
 	d->tcf_action = parm->action;
 
@@ -158,16 +170,16 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
 	if (nla_put(skb, TCA_SKBEDIT_PARMS, sizeof(opt), &opt))
 		goto nla_put_failure;
 	if ((d->flags & SKBEDIT_F_PRIORITY) &&
-	    nla_put(skb, TCA_SKBEDIT_PRIORITY, sizeof(d->priority),
-		    &d->priority))
+	    nla_put_u32(skb, TCA_SKBEDIT_PRIORITY, d->priority))
 		goto nla_put_failure;
 	if ((d->flags & SKBEDIT_F_QUEUE_MAPPING) &&
-	    nla_put(skb, TCA_SKBEDIT_QUEUE_MAPPING,
-		    sizeof(d->queue_mapping), &d->queue_mapping))
+	    nla_put_u16(skb, TCA_SKBEDIT_QUEUE_MAPPING, d->queue_mapping))
 		goto nla_put_failure;
 	if ((d->flags & SKBEDIT_F_MARK) &&
-	    nla_put(skb, TCA_SKBEDIT_MARK, sizeof(d->mark),
-		    &d->mark))
+	    nla_put_u32(skb, TCA_SKBEDIT_MARK, d->mark))
+		goto nla_put_failure;
+	if ((d->flags & SKBEDIT_F_PTYPE) &&
+	    nla_put_u16(skb, TCA_SKBEDIT_PTYPE, d->ptype))
 		goto nla_put_failure;
 
 	tcf_tm_dump(&t, &d->tcf_tm);
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 7b342c779da7..c3002c2c68bb 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -272,15 +272,10 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
 
 	bpf_fd = nla_get_u32(tb[TCA_BPF_FD]);
 
-	fp = bpf_prog_get(bpf_fd);
+	fp = bpf_prog_get_type(bpf_fd, BPF_PROG_TYPE_SCHED_CLS);
 	if (IS_ERR(fp))
 		return PTR_ERR(fp);
 
-	if (fp->type != BPF_PROG_TYPE_SCHED_CLS) {
-		bpf_prog_put(fp);
-		return -EINVAL;
-	}
-
 	if (tb[TCA_BPF_NAME]) {
 		name = kmemdup(nla_data(tb[TCA_BPF_NAME]),
 			       nla_len(tb[TCA_BPF_NAME]),
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 6ea0db427f91..baeed6a78d28 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -40,14 +40,18 @@ static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 			      struct sk_buff **to_free)
 {
+	unsigned int prev_backlog;
+
 	if (likely(skb_queue_len(&sch->q) < sch->limit))
 		return qdisc_enqueue_tail(skb, sch);
 
+	prev_backlog = sch->qstats.backlog;
 	/* queue full, remove one skb to fulfill the limit */
 	__qdisc_queue_drop_head(sch, &sch->q, to_free);
 	qdisc_qstats_drop(sch);
 	qdisc_enqueue_tail(skb, sch);
 
+	qdisc_tree_reduce_backlog(sch, 0, prev_backlog - sch->qstats.backlog);
 	return NET_XMIT_CN;
 }
 
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 8cb5eff7b79c..3ddc7bd74ecb 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -115,9 +115,9 @@ struct hfsc_class {
 	struct gnet_stats_basic_packed bstats;
 	struct gnet_stats_queue qstats;
 	struct gnet_stats_rate_est64 rate_est;
-	unsigned int	level;		/* class level in hierarchy */
 	struct tcf_proto __rcu *filter_list; /* filter list */
 	unsigned int	filter_cnt;	/* filter count */
+	unsigned int	level;		/* class level in hierarchy */
 
 	struct hfsc_sched *sched;	/* scheduler data */
 	struct hfsc_class *cl_parent;	/* parent class */
@@ -130,7 +130,6 @@ struct hfsc_class {
 	struct rb_node vt_node;		/* parent's vt_tree member */
 	struct rb_root cf_tree;		/* active children sorted by cl_f */
 	struct rb_node cf_node;		/* parent's cf_heap member */
-	struct list_head dlist;		/* drop list member */
 
 	u64	cl_total;		/* total work in bytes */
 	u64	cl_cumul;		/* cumulative work in bytes done by
@@ -166,10 +165,10 @@ struct hfsc_class {
 	struct runtime_sc cl_virtual;	/* virtual curve */
 	struct runtime_sc cl_ulimit;	/* upperlimit curve */
 
-	unsigned long	cl_flags;	/* which curves are valid */
-	unsigned long	cl_vtperiod;	/* vt period sequence number */
-	unsigned long	cl_parentperiod;/* parent's vt period sequence number*/
-	unsigned long	cl_nactive;	/* number of active children */
+	u8		cl_flags;	/* which curves are valid */
+	u32		cl_vtperiod;	/* vt period sequence number */
+	u32		cl_parentperiod;/* parent's vt period sequence number*/
+	u32		cl_nactive;	/* number of active children */
 };
 
 struct hfsc_sched {
@@ -177,8 +176,6 @@ struct hfsc_sched {
 	struct hfsc_class root;			/* root class */
 	struct Qdisc_class_hash clhash;		/* class hash */
 	struct rb_root eligible;		/* eligible tree */
-	struct list_head droplist;		/* active leaf class list (for
-						   dropping) */
 	struct qdisc_watchdog watchdog;		/* watchdog timer */
 };
 
@@ -781,6 +778,20 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time)
 		else
 			go_passive = 0;
 
+		/* update vt */
+		cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total)
+			    - cl->cl_vtoff + cl->cl_vtadj;
+
+		/*
+		 * if vt of the class is smaller than cvtmin,
+		 * the class was skipped in the past due to non-fit.
+		 * if so, we need to adjust vtadj.
+		 */
+		if (cl->cl_vt < cl->cl_parent->cl_cvtmin) {
+			cl->cl_vtadj += cl->cl_parent->cl_cvtmin - cl->cl_vt;
+			cl->cl_vt = cl->cl_parent->cl_cvtmin;
+		}
+
 		if (go_passive) {
 			/* no more active child, going passive */
 
@@ -797,25 +808,10 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time)
 			continue;
 		}
 
-		/*
-		 * update vt and f
-		 */
-		cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total)
-			    - cl->cl_vtoff + cl->cl_vtadj;
-
-		/*
-		 * if vt of the class is smaller than cvtmin,
-		 * the class was skipped in the past due to non-fit.
-		 * if so, we need to adjust vtadj.
-		 */
-		if (cl->cl_vt < cl->cl_parent->cl_cvtmin) {
-			cl->cl_vtadj += cl->cl_parent->cl_cvtmin - cl->cl_vt;
-			cl->cl_vt = cl->cl_parent->cl_cvtmin;
-		}
-
 		/* update the vt tree */
 		vttree_update(cl);
 
+		/* update f */
 		if (cl->cl_flags & HFSC_USC) {
 			cl->cl_myf = cl->cl_myfadj + rtsc_y2x(&cl->cl_ulimit,
 							      cl->cl_total);
@@ -858,7 +854,6 @@ set_active(struct hfsc_class *cl, unsigned int len)
 	if (cl->cl_flags & HFSC_FSC)
 		init_vf(cl, len);
 
-	list_add_tail(&cl->dlist, &cl->sched->droplist);
 }
 
 static void
@@ -867,8 +862,6 @@ set_passive(struct hfsc_class *cl)
 	if (cl->cl_flags & HFSC_RSC)
 		eltree_remove(cl);
 
-	list_del(&cl->dlist);
-
 	/*
 	 * vttree is now handled in update_vf() so that update_vf(cl, 0, 0)
 	 * needs to be called explicitly to remove a class from vttree.
@@ -882,7 +875,7 @@ qdisc_peek_len(struct Qdisc *sch)
 	unsigned int len;
 
 	skb = sch->ops->peek(sch);
-	if (skb == NULL) {
+	if (unlikely(skb == NULL)) {
 		qdisc_warn_nonwc("qdisc_peek_len", sch);
 		return 0;
 	}
@@ -947,7 +940,7 @@ static void
 hfsc_change_fsc(struct hfsc_class *cl, struct tc_service_curve *fsc)
 {
 	sc2isc(fsc, &cl->cl_fsc);
-	rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vt, cl->cl_total);
+	rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vtoff + cl->cl_vt, cl->cl_total);
 	cl->cl_flags |= HFSC_FSC;
 }
 
@@ -1443,7 +1436,6 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
 	if (err < 0)
 		return err;
 	q->eligible = RB_ROOT;
-	INIT_LIST_HEAD(&q->droplist);
 
 	q->root.cl_common.classid = sch->handle;
 	q->root.refcnt  = 1;
@@ -1527,7 +1519,6 @@ hfsc_reset_qdisc(struct Qdisc *sch)
 			hfsc_reset_class(cl);
 	}
 	q->eligible = RB_ROOT;
-	INIT_LIST_HEAD(&q->droplist);
 	qdisc_watchdog_cancel(&q->watchdog);
 	sch->qstats.backlog = 0;
 	sch->q.qlen = 0;
@@ -1594,8 +1585,17 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
 		return err;
 	}
 
-	if (cl->qdisc->q.qlen == 1)
+	if (cl->qdisc->q.qlen == 1) {
 		set_active(cl, qdisc_pkt_len(skb));
+		/*
+		 * If this is the first packet, isolate the head so an eventual
+		 * head drop before the first dequeue operation has no chance
+		 * to invalidate the deadline.
+		 */
+		if (cl->cl_flags & HFSC_RSC)
+			cl->qdisc->ops->peek(cl->qdisc);
+
+	}
 
 	qdisc_qstats_backlog_inc(sch, skb);
 	sch->q.qlen++;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index ba098f2654b4..91982d9784b3 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -984,7 +984,9 @@ static void htb_work_func(struct work_struct *work)
 	struct htb_sched *q = container_of(work, struct htb_sched, work);
 	struct Qdisc *sch = q->watchdog.qdisc;
 
+	rcu_read_lock();
 	__netif_schedule(qdisc_root(sch));
+	rcu_read_unlock();
 }
 
 static int htb_init(struct Qdisc *sch, struct nlattr *opt)
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index ccca8ca4c722..aaaf02175338 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -487,10 +487,14 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 		skb = segs;
 		segs = segs->next;
 
-		if (!(skb = skb_unshare(skb, GFP_ATOMIC)) ||
-		    (skb->ip_summed == CHECKSUM_PARTIAL &&
-		     skb_checksum_help(skb))) {
-			rc = qdisc_drop(skb, sch, to_free);
+		skb = skb_unshare(skb, GFP_ATOMIC);
+		if (unlikely(!skb)) {
+			qdisc_qstats_drop(sch);
+			goto finish_segs;
+		}
+		if (skb->ip_summed == CHECKSUM_PARTIAL &&
+		    skb_checksum_help(skb)) {
+			qdisc_drop(skb, sch, to_free);
 			goto finish_segs;
 		}
 
@@ -617,17 +621,17 @@ deliver:
 #endif
 
 			if (q->qdisc) {
+				unsigned int pkt_len = qdisc_pkt_len(skb);
 				struct sk_buff *to_free = NULL;
 				int err;
 
 				err = qdisc_enqueue(skb, q->qdisc, &to_free);
 				kfree_skb_list(to_free);
-				if (unlikely(err != NET_XMIT_SUCCESS)) {
-					if (net_xmit_drop_count(err)) {
-						qdisc_qstats_drop(sch);
-						qdisc_tree_reduce_backlog(sch, 1,
-									  qdisc_pkt_len(skb));
-					}
+				if (err != NET_XMIT_SUCCESS &&
+				    net_xmit_drop_count(err)) {
+					qdisc_qstats_drop(sch);
+					qdisc_tree_reduce_backlog(sch, 1,
+								  pkt_len);
 				}
 				goto tfifo_dequeue;
 			}
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index f4d443aeae54..8f575899adfa 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -153,8 +153,9 @@ prio_destroy(struct Qdisc *sch)
 static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct prio_sched_data *q = qdisc_priv(sch);
+	struct Qdisc *queues[TCQ_PRIO_BANDS];
+	int oldbands = q->bands, i;
 	struct tc_prio_qopt *qopt;
-	int i;
 
 	if (nla_len(opt) < sizeof(*qopt))
 		return -EINVAL;
@@ -168,62 +169,42 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
 			return -EINVAL;
 	}
 
+	/* Before commit, make sure we can allocate all new qdiscs */
+	for (i = oldbands; i < qopt->bands; i++) {
+		queues[i] = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
+					      TC_H_MAKE(sch->handle, i + 1));
+		if (!queues[i]) {
+			while (i > oldbands)
+				qdisc_destroy(queues[--i]);
+			return -ENOMEM;
+		}
+	}
+
 	sch_tree_lock(sch);
 	q->bands = qopt->bands;
 	memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
 
-	for (i = q->bands; i < TCQ_PRIO_BANDS; i++) {
+	for (i = q->bands; i < oldbands; i++) {
 		struct Qdisc *child = q->queues[i];
-		q->queues[i] = &noop_qdisc;
-		if (child != &noop_qdisc) {
-			qdisc_tree_reduce_backlog(child, child->q.qlen, child->qstats.backlog);
-			qdisc_destroy(child);
-		}
-	}
-	sch_tree_unlock(sch);
 
-	for (i = 0; i < q->bands; i++) {
-		if (q->queues[i] == &noop_qdisc) {
-			struct Qdisc *child, *old;
-
-			child = qdisc_create_dflt(sch->dev_queue,
-						  &pfifo_qdisc_ops,
-						  TC_H_MAKE(sch->handle, i + 1));
-			if (child) {
-				sch_tree_lock(sch);
-				old = q->queues[i];
-				q->queues[i] = child;
-
-				if (old != &noop_qdisc) {
-					qdisc_tree_reduce_backlog(old,
-								  old->q.qlen,
-								  old->qstats.backlog);
-					qdisc_destroy(old);
-				}
-				sch_tree_unlock(sch);
-			}
-		}
+		qdisc_tree_reduce_backlog(child, child->q.qlen,
+					  child->qstats.backlog);
+		qdisc_destroy(child);
 	}
+
+	for (i = oldbands; i < q->bands; i++)
+		q->queues[i] = queues[i];
+
+	sch_tree_unlock(sch);
 	return 0;
 }
 
 static int prio_init(struct Qdisc *sch, struct nlattr *opt)
 {
-	struct prio_sched_data *q = qdisc_priv(sch);
-	int i;
-
-	for (i = 0; i < TCQ_PRIO_BANDS; i++)
-		q->queues[i] = &noop_qdisc;
-
-	if (opt == NULL) {
+	if (!opt)
 		return -EINVAL;
-	} else {
-		int err;
 
-		if ((err = prio_tune(sch, opt)) != 0)
-			return err;
-	}
-	return 0;
+	return prio_tune(sch, opt);
 }
 
 static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index e1849f3714ad..1c23060c41a6 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -268,6 +268,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 		goto fail_init;
 
 	asoc->active_key_id = ep->active_key_id;
+	asoc->prsctp_enable = ep->prsctp_enable;
 
 	/* Save the hmacs and chunks list into this association */
 	if (ep->auth_hmacs_list)
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 1eb94bf18ef4..a55e54738b81 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -335,13 +335,32 @@ errout:
 /* Check whether this message has expired. */
 int sctp_chunk_abandoned(struct sctp_chunk *chunk)
 {
-	struct sctp_datamsg *msg = chunk->msg;
+	if (!chunk->asoc->prsctp_enable ||
+	    !SCTP_PR_POLICY(chunk->sinfo.sinfo_flags)) {
+		struct sctp_datamsg *msg = chunk->msg;
+
+		if (!msg->can_abandon)
+			return 0;
+
+		if (time_after(jiffies, msg->expires_at))
+			return 1;
 
-	if (!msg->can_abandon)
 		return 0;
+	}
 
-	if (time_after(jiffies, msg->expires_at))
+	if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) &&
+	    time_after(jiffies, chunk->prsctp_param)) {
+		if (chunk->sent_count)
+			chunk->asoc->abandoned_sent[SCTP_PR_INDEX(TTL)]++;
+		else
+			chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
+		return 1;
+	} else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) &&
+		   chunk->sent_count > chunk->prsctp_param) {
+		chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
 		return 1;
+	}
+	/* PRIO policy is processed by sendmsg, not here */
 
 	return 0;
 }
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 9d494e35e7f9..1f03065686fe 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -163,6 +163,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 	 */
 	ep->auth_hmacs_list = auth_hmacs;
 	ep->auth_chunk_list = auth_chunks;
+	ep->prsctp_enable = net->sctp.prsctp_enable;
 
 	return ep;
 
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 6f8e676d285e..30d72f7707b6 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -90,17 +90,6 @@ static inline int sctp_rcv_checksum(struct net *net, struct sk_buff *skb)
 	return 0;
 }
 
-struct sctp_input_cb {
-	union {
-		struct inet_skb_parm	h4;
-#if IS_ENABLED(CONFIG_IPV6)
-		struct inet6_skb_parm	h6;
-#endif
-	} header;
-	struct sctp_chunk *chunk;
-};
-#define SCTP_INPUT_CB(__skb)	((struct sctp_input_cb *)&((__skb)->cb[0]))
-
 /*
  * This is the routine which IP calls when receiving an SCTP packet.
  */
@@ -151,6 +140,7 @@ int sctp_rcv(struct sk_buff *skb)
 	af = sctp_get_af_specific(family);
 	if (unlikely(!af))
 		goto discard_it;
+	SCTP_INPUT_CB(skb)->af = af;
 
 	/* Initialize local addresses for lookups. */
 	af->from_skb(&src, skb, 1);
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index edabbbdfca54..942770675f4c 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -217,7 +217,14 @@ new_skb:
 		chunk->auth = 0;
 		chunk->has_asconf = 0;
 		chunk->end_of_packet = 0;
-		chunk->ecn_ce_done = 0;
+		if (chunk->head_skb) {
+			struct sctp_input_cb
+				*cb = SCTP_INPUT_CB(chunk->skb),
+				*head_cb = SCTP_INPUT_CB(chunk->head_skb);
+
+			cb->chunk = head_cb->chunk;
+			cb->af = head_cb->af;
+		}
 	}
 
 	chunk->chunk_hdr = ch;
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 0657d18a85bf..ae6f1a2178ba 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -420,6 +420,7 @@ static void sctp_v6_from_skb(union sctp_addr *addr, struct sk_buff *skb,
 	addr->v6.sin6_flowinfo = 0; /* FIXME */
 	addr->v6.sin6_scope_id = ((struct inet6_skb_parm *)skb->cb)->iif;
 
+	/* Always called on head skb, so this is safe */
 	sh = sctp_hdr(skb);
 	if (is_saddr) {
 		*port  = sh->source;
@@ -710,8 +711,7 @@ static int sctp_v6_addr_to_user(struct sctp_sock *sp, union sctp_addr *addr)
 /* Where did this skb come from?  */
 static int sctp_v6_skb_iif(const struct sk_buff *skb)
 {
-	struct inet6_skb_parm *opt = (struct inet6_skb_parm *) skb->cb;
-	return opt->iif;
+	return IP6CB(skb)->iif;
 }
 
 /* Was this packet marked by Explicit Congestion Notification? */
@@ -780,15 +780,14 @@ static void sctp_inet6_skb_msgname(struct sk_buff *skb, char *msgname,
 	if (ip_hdr(skb)->version == 4) {
 		addr->v4.sin_family = AF_INET;
 		addr->v4.sin_port = sh->source;
-		addr->v4.sin_addr.s_addr =  ip_hdr(skb)->saddr;
+		addr->v4.sin_addr.s_addr = ip_hdr(skb)->saddr;
 	} else {
 		addr->v6.sin6_family = AF_INET6;
 		addr->v6.sin6_flowinfo = 0;
 		addr->v6.sin6_port = sh->source;
 		addr->v6.sin6_addr = ipv6_hdr(skb)->saddr;
 		if (ipv6_addr_type(&addr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) {
-			struct sctp_ulpevent *ev = sctp_skb2event(skb);
-			addr->v6.sin6_scope_id = ev->iif;
+			addr->v6.sin6_scope_id = sctp_v6_skb_iif(skb);
 		}
 	}
 
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 1541a91d6d9d..7425f6c23888 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -316,6 +316,8 @@ static sctp_xmit_t __sctp_packet_append_chunk(struct sctp_packet *packet,
 		packet->has_data = 1;
 		/* timestamp the chunk for rtx purposes */
 		chunk->sent_at = jiffies;
+		/* Mainly used for prsctp RTX policy */
+		chunk->sent_count++;
 		break;
 	case SCTP_CID_COOKIE_ECHO:
 		packet->has_cookie_echo = 1;
@@ -582,9 +584,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
 			 */
 			pkt_size -= WORD_ROUND(chunk->skb->len);
 
-			if (chunk == packet->auth && !list_empty(&packet->chunk_list))
-				list_add(&chunk->list, &packet->chunk_list);
-			else if (!sctp_chunk_is_data(chunk))
+			if (!sctp_chunk_is_data(chunk) && chunk != packet->auth)
 				sctp_chunk_free(chunk);
 
 			if (!pkt_size)
@@ -605,6 +605,18 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
 						 (struct sctp_auth_chunk *)auth,
 						 gfp);
 
+		if (packet->auth) {
+			if (!list_empty(&packet->chunk_list)) {
+				/* We will generate more packets, so re-queue
+				 * auth chunk.
+				 */
+				list_add(&chunk->list, &packet->chunk_list);
+			} else {
+				sctp_chunk_free(packet->auth);
+				packet->auth = NULL;
+			}
+		}
+
 		if (!gso)
 			break;
 
@@ -735,6 +747,8 @@ err:
 	}
 	goto out;
 nomem:
+	if (packet->auth && list_empty(&packet->auth->list))
+		sctp_chunk_free(packet->auth);
 	err = -ENOMEM;
 	goto err;
 }
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 084718f9b3da..72e54a416af6 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -326,6 +326,9 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk, gfp_t gfp)
 
 			sctp_chunk_hold(chunk);
 			sctp_outq_tail_data(q, chunk);
+			if (chunk->asoc->prsctp_enable &&
+			    SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags))
+				chunk->asoc->sent_cnt_removable++;
 			if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED)
 				SCTP_INC_STATS(net, SCTP_MIB_OUTUNORDERCHUNKS);
 			else
@@ -372,6 +375,96 @@ static void sctp_insert_list(struct list_head *head, struct list_head *new)
 		list_add_tail(new, head);
 }
 
+static int sctp_prsctp_prune_sent(struct sctp_association *asoc,
+				  struct sctp_sndrcvinfo *sinfo,
+				  struct list_head *queue, int msg_len)
+{
+	struct sctp_chunk *chk, *temp;
+
+	list_for_each_entry_safe(chk, temp, queue, transmitted_list) {
+		if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
+		    chk->prsctp_param <= sinfo->sinfo_timetolive)
+			continue;
+
+		list_del_init(&chk->transmitted_list);
+		sctp_insert_list(&asoc->outqueue.abandoned,
+				 &chk->transmitted_list);
+
+		asoc->sent_cnt_removable--;
+		asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
+
+		if (!chk->tsn_gap_acked) {
+			if (chk->transport)
+				chk->transport->flight_size -=
+						sctp_data_size(chk);
+			asoc->outqueue.outstanding_bytes -= sctp_data_size(chk);
+		}
+
+		msg_len -= SCTP_DATA_SNDSIZE(chk) +
+			   sizeof(struct sk_buff) +
+			   sizeof(struct sctp_chunk);
+		if (msg_len <= 0)
+			break;
+	}
+
+	return msg_len;
+}
+
+static int sctp_prsctp_prune_unsent(struct sctp_association *asoc,
+				    struct sctp_sndrcvinfo *sinfo,
+				    struct list_head *queue, int msg_len)
+{
+	struct sctp_chunk *chk, *temp;
+
+	list_for_each_entry_safe(chk, temp, queue, list) {
+		if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
+		    chk->prsctp_param <= sinfo->sinfo_timetolive)
+			continue;
+
+		list_del_init(&chk->list);
+		asoc->sent_cnt_removable--;
+		asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
+
+		msg_len -= SCTP_DATA_SNDSIZE(chk) +
+			   sizeof(struct sk_buff) +
+			   sizeof(struct sctp_chunk);
+		sctp_chunk_free(chk);
+		if (msg_len <= 0)
+			break;
+	}
+
+	return msg_len;
+}
+
+/* Abandon the chunks according their priorities */
+void sctp_prsctp_prune(struct sctp_association *asoc,
+		       struct sctp_sndrcvinfo *sinfo, int msg_len)
+{
+	struct sctp_transport *transport;
+
+	if (!asoc->prsctp_enable || !asoc->sent_cnt_removable)
+		return;
+
+	msg_len = sctp_prsctp_prune_sent(asoc, sinfo,
+					 &asoc->outqueue.retransmit,
+					 msg_len);
+	if (msg_len <= 0)
+		return;
+
+	list_for_each_entry(transport, &asoc->peer.transport_addr_list,
+			    transports) {
+		msg_len = sctp_prsctp_prune_sent(asoc, sinfo,
+						 &transport->transmitted,
+						 msg_len);
+		if (msg_len <= 0)
+			return;
+	}
+
+	sctp_prsctp_prune_unsent(asoc, sinfo,
+				 &asoc->outqueue.out_chunk_list,
+				 msg_len);
+}
+
 /* Mark all the eligible packets on a transport for retransmission.  */
 void sctp_retransmit_mark(struct sctp_outq *q,
 			  struct sctp_transport *transport,
@@ -962,6 +1055,9 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
 
 				/* Mark as failed send. */
 				sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM);
+				if (asoc->prsctp_enable &&
+				    SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags))
+					asoc->sent_cnt_removable--;
 				sctp_chunk_free(chunk);
 				continue;
 			}
@@ -1251,6 +1347,9 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk)
 		tsn = ntohl(tchunk->subh.data_hdr->tsn);
 		if (TSN_lte(tsn, ctsn)) {
 			list_del_init(&tchunk->transmitted_list);
+			if (asoc->prsctp_enable &&
+			    SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags))
+				asoc->sent_cnt_removable--;
 			sctp_chunk_free(tchunk);
 		}
 	}
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 3b56ae55aba3..1adb9270e317 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -240,6 +240,7 @@ static void sctp_v4_from_skb(union sctp_addr *addr, struct sk_buff *skb,
 	port = &addr->v4.sin_port;
 	addr->v4.sin_family = AF_INET;
 
+	/* Always called on head skb, so this is safe */
 	sh = sctp_hdr(skb);
 	if (is_saddr) {
 		*port  = sh->source;
diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c
index 1ce724b87618..f69edcf219e5 100644
--- a/net/sctp/sctp_diag.c
+++ b/net/sctp/sctp_diag.c
@@ -3,12 +3,6 @@
 #include <linux/sock_diag.h>
 #include <net/sctp/sctp.h>
 
-extern void inet_diag_msg_common_fill(struct inet_diag_msg *r,
-				      struct sock *sk);
-extern int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
-				    struct inet_diag_msg *r, int ext,
-				    struct user_namespace *user_ns);
-
 static void sctp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
 			       void *info);
 
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 56f364d8f932..8c77b87a8565 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -108,14 +108,9 @@ static void sctp_control_set_owner_w(struct sctp_chunk *chunk)
 /* What was the inbound interface for this chunk? */
 int sctp_chunk_iif(const struct sctp_chunk *chunk)
 {
-	struct sctp_af *af;
-	int iif = 0;
-
-	af = sctp_get_af_specific(ipver2af(ip_hdr(chunk->skb)->version));
-	if (af)
-		iif = af->skb_iif(chunk->skb);
+	struct sk_buff *skb = chunk->skb;
 
-	return iif;
+	return SCTP_INPUT_CB(skb)->af->skb_iif(skb);
 }
 
 /* RFC 2960 3.3.2 Initiation (INIT) (1)
@@ -261,7 +256,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
 	chunksize += WORD_ROUND(SCTP_SAT_LEN(num_types));
 	chunksize += sizeof(ecap_param);
 
-	if (net->sctp.prsctp_enable)
+	if (asoc->prsctp_enable)
 		chunksize += sizeof(prsctp_param);
 
 	/* ADDIP: Section 4.2.7:
@@ -355,7 +350,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
 		sctp_addto_param(retval, num_ext, extensions);
 	}
 
-	if (net->sctp.prsctp_enable)
+	if (asoc->prsctp_enable)
 		sctp_addto_chunk(retval, sizeof(prsctp_param), &prsctp_param);
 
 	if (sp->adaptation_ind) {
@@ -711,6 +706,20 @@ nodata:
 	return retval;
 }
 
+static void sctp_set_prsctp_policy(struct sctp_chunk *chunk,
+				   const struct sctp_sndrcvinfo *sinfo)
+{
+	if (!chunk->asoc->prsctp_enable)
+		return;
+
+	if (SCTP_PR_TTL_ENABLED(sinfo->sinfo_flags))
+		chunk->prsctp_param =
+			jiffies + msecs_to_jiffies(sinfo->sinfo_timetolive);
+	else if (SCTP_PR_RTX_ENABLED(sinfo->sinfo_flags) ||
+		 SCTP_PR_PRIO_ENABLED(sinfo->sinfo_flags))
+		chunk->prsctp_param = sinfo->sinfo_timetolive;
+}
+
 /* Make a DATA chunk for the given association from the provided
  * parameters.  However, do not populate the data payload.
  */
@@ -744,6 +753,7 @@ struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc,
 
 	retval->subh.data_hdr = sctp_addto_chunk(retval, sizeof(dp), &dp);
 	memcpy(&retval->sinfo, sinfo, sizeof(struct sctp_sndrcvinfo));
+	sctp_set_prsctp_policy(retval, sinfo);
 
 nodata:
 	return retval;
@@ -1585,7 +1595,6 @@ struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *ep,
 	struct sctp_association *asoc;
 	struct sk_buff *skb;
 	sctp_scope_t scope;
-	struct sctp_af *af;
 
 	/* Create the bare association.  */
 	scope = sctp_scope(sctp_source(chunk));
@@ -1595,16 +1604,10 @@ struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *ep,
 	asoc->temp = 1;
 	skb = chunk->skb;
 	/* Create an entry for the source address of the packet.  */
-	af = sctp_get_af_specific(ipver2af(ip_hdr(skb)->version));
-	if (unlikely(!af))
-		goto fail;
-	af->from_skb(&asoc->c.peer_addr, skb, 1);
+	SCTP_INPUT_CB(skb)->af->from_skb(&asoc->c.peer_addr, skb, 1);
+
 nodata:
 	return asoc;
-
-fail:
-	sctp_association_free(asoc);
-	return NULL;
 }
 
 /* Build a cookie representing asoc.
@@ -2024,8 +2027,8 @@ static void sctp_process_ext_param(struct sctp_association *asoc,
 	for (i = 0; i < num_ext; i++) {
 		switch (param.ext->chunks[i]) {
 		case SCTP_CID_FWD_TSN:
-			if (net->sctp.prsctp_enable && !asoc->peer.prsctp_capable)
-				    asoc->peer.prsctp_capable = 1;
+			if (asoc->prsctp_enable && !asoc->peer.prsctp_capable)
+				asoc->peer.prsctp_capable = 1;
 			break;
 		case SCTP_CID_AUTH:
 			/* if the peer reports AUTH, assume that he
@@ -2169,7 +2172,7 @@ static sctp_ierror_t sctp_verify_param(struct net *net,
 		break;
 
 	case SCTP_PARAM_FWD_TSN_SUPPORT:
-		if (net->sctp.prsctp_enable)
+		if (ep->prsctp_enable)
 			break;
 		goto fallthrough;
 
@@ -2653,7 +2656,7 @@ do_addr_param:
 		break;
 
 	case SCTP_PARAM_FWD_TSN_SUPPORT:
-		if (net->sctp.prsctp_enable) {
+		if (asoc->prsctp_enable) {
 			asoc->peer.prsctp_capable = 1;
 			break;
 		}
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index f1f08c8f277b..d88bb2b0b699 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -6118,14 +6118,11 @@ static int sctp_eat_data(const struct sctp_association *asoc,
 	 * chunk later.
 	 */
 
-	if (!chunk->ecn_ce_done) {
-		struct sctp_af *af;
+	if (asoc->peer.ecn_capable && !chunk->ecn_ce_done) {
+		struct sctp_af *af = SCTP_INPUT_CB(chunk->skb)->af;
 		chunk->ecn_ce_done = 1;
 
-		af = sctp_get_af_specific(
-			ipver2af(ip_hdr(chunk->skb)->version));
-
-		if (af && af->is_ce(chunk->skb) && asoc->peer.ecn_capable) {
+		if (af->is_ce(sctp_gso_headskb(chunk->skb))) {
 			/* Do real work as sideffect. */
 			sctp_add_cmd_sf(commands, SCTP_CMD_ECN_CE,
 					SCTP_U32(tsn));
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index cdabbd8219b1..52fdd540a9ef 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1914,6 +1914,9 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 		goto out_free;
 	}
 
+	if (sctp_wspace(asoc) < msg_len)
+		sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc));
+
 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
 	if (!sctp_wspace(asoc)) {
 		err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
@@ -2063,7 +2066,7 @@ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 {
 	struct sctp_ulpevent *event = NULL;
 	struct sctp_sock *sp = sctp_sk(sk);
-	struct sk_buff *skb;
+	struct sk_buff *skb, *head_skb;
 	int copied;
 	int err = 0;
 	int skb_len;
@@ -2099,12 +2102,16 @@ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 	if (err)
 		goto out_free;
 
-	sock_recv_ts_and_drops(msg, sk, skb);
+	if (event->chunk && event->chunk->head_skb)
+		head_skb = event->chunk->head_skb;
+	else
+		head_skb = skb;
+	sock_recv_ts_and_drops(msg, sk, head_skb);
 	if (sctp_ulpevent_is_notification(event)) {
 		msg->msg_flags |= MSG_NOTIFICATION;
 		sp->pf->event_msgname(event, msg->msg_name, addr_len);
 	} else {
-		sp->pf->skb_msgname(skb, msg->msg_name, addr_len);
+		sp->pf->skb_msgname(head_skb, msg->msg_name, addr_len);
 	}
 
 	/* Check if we allow SCTP_NXTINFO. */
@@ -3661,6 +3668,80 @@ static int sctp_setsockopt_recvnxtinfo(struct sock *sk,
 	return 0;
 }
 
+static int sctp_setsockopt_pr_supported(struct sock *sk,
+					char __user *optval,
+					unsigned int optlen)
+{
+	struct sctp_assoc_value params;
+	struct sctp_association *asoc;
+	int retval = -EINVAL;
+
+	if (optlen != sizeof(params))
+		goto out;
+
+	if (copy_from_user(&params, optval, optlen)) {
+		retval = -EFAULT;
+		goto out;
+	}
+
+	asoc = sctp_id2assoc(sk, params.assoc_id);
+	if (asoc) {
+		asoc->prsctp_enable = !!params.assoc_value;
+	} else if (!params.assoc_id) {
+		struct sctp_sock *sp = sctp_sk(sk);
+
+		sp->ep->prsctp_enable = !!params.assoc_value;
+	} else {
+		goto out;
+	}
+
+	retval = 0;
+
+out:
+	return retval;
+}
+
+static int sctp_setsockopt_default_prinfo(struct sock *sk,
+					  char __user *optval,
+					  unsigned int optlen)
+{
+	struct sctp_default_prinfo info;
+	struct sctp_association *asoc;
+	int retval = -EINVAL;
+
+	if (optlen != sizeof(info))
+		goto out;
+
+	if (copy_from_user(&info, optval, sizeof(info))) {
+		retval = -EFAULT;
+		goto out;
+	}
+
+	if (info.pr_policy & ~SCTP_PR_SCTP_MASK)
+		goto out;
+
+	if (info.pr_policy == SCTP_PR_SCTP_NONE)
+		info.pr_value = 0;
+
+	asoc = sctp_id2assoc(sk, info.pr_assoc_id);
+	if (asoc) {
+		SCTP_PR_SET_POLICY(asoc->default_flags, info.pr_policy);
+		asoc->default_timetolive = info.pr_value;
+	} else if (!info.pr_assoc_id) {
+		struct sctp_sock *sp = sctp_sk(sk);
+
+		SCTP_PR_SET_POLICY(sp->default_flags, info.pr_policy);
+		sp->default_timetolive = info.pr_value;
+	} else {
+		goto out;
+	}
+
+	retval = 0;
+
+out:
+	return retval;
+}
+
 /* API 6.2 setsockopt(), getsockopt()
  *
  * Applications use setsockopt() and getsockopt() to set or retrieve
@@ -3821,6 +3902,12 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
 	case SCTP_RECVNXTINFO:
 		retval = sctp_setsockopt_recvnxtinfo(sk, optval, optlen);
 		break;
+	case SCTP_PR_SUPPORTED:
+		retval = sctp_setsockopt_pr_supported(sk, optval, optlen);
+		break;
+	case SCTP_DEFAULT_PRINFO:
+		retval = sctp_setsockopt_default_prinfo(sk, optval, optlen);
+		break;
 	default:
 		retval = -ENOPROTOOPT;
 		break;
@@ -6166,6 +6253,148 @@ static int sctp_getsockopt_recvnxtinfo(struct sock *sk,	int len,
 	return 0;
 }
 
+static int sctp_getsockopt_pr_supported(struct sock *sk, int len,
+					char __user *optval,
+					int __user *optlen)
+{
+	struct sctp_assoc_value params;
+	struct sctp_association *asoc;
+	int retval = -EFAULT;
+
+	if (len < sizeof(params)) {
+		retval = -EINVAL;
+		goto out;
+	}
+
+	len = sizeof(params);
+	if (copy_from_user(&params, optval, len))
+		goto out;
+
+	asoc = sctp_id2assoc(sk, params.assoc_id);
+	if (asoc) {
+		params.assoc_value = asoc->prsctp_enable;
+	} else if (!params.assoc_id) {
+		struct sctp_sock *sp = sctp_sk(sk);
+
+		params.assoc_value = sp->ep->prsctp_enable;
+	} else {
+		retval = -EINVAL;
+		goto out;
+	}
+
+	if (put_user(len, optlen))
+		goto out;
+
+	if (copy_to_user(optval, &params, len))
+		goto out;
+
+	retval = 0;
+
+out:
+	return retval;
+}
+
+static int sctp_getsockopt_default_prinfo(struct sock *sk, int len,
+					  char __user *optval,
+					  int __user *optlen)
+{
+	struct sctp_default_prinfo info;
+	struct sctp_association *asoc;
+	int retval = -EFAULT;
+
+	if (len < sizeof(info)) {
+		retval = -EINVAL;
+		goto out;
+	}
+
+	len = sizeof(info);
+	if (copy_from_user(&info, optval, len))
+		goto out;
+
+	asoc = sctp_id2assoc(sk, info.pr_assoc_id);
+	if (asoc) {
+		info.pr_policy = SCTP_PR_POLICY(asoc->default_flags);
+		info.pr_value = asoc->default_timetolive;
+	} else if (!info.pr_assoc_id) {
+		struct sctp_sock *sp = sctp_sk(sk);
+
+		info.pr_policy = SCTP_PR_POLICY(sp->default_flags);
+		info.pr_value = sp->default_timetolive;
+	} else {
+		retval = -EINVAL;
+		goto out;
+	}
+
+	if (put_user(len, optlen))
+		goto out;
+
+	if (copy_to_user(optval, &info, len))
+		goto out;
+
+	retval = 0;
+
+out:
+	return retval;
+}
+
+static int sctp_getsockopt_pr_assocstatus(struct sock *sk, int len,
+					  char __user *optval,
+					  int __user *optlen)
+{
+	struct sctp_prstatus params;
+	struct sctp_association *asoc;
+	int policy;
+	int retval = -EINVAL;
+
+	if (len < sizeof(params))
+		goto out;
+
+	len = sizeof(params);
+	if (copy_from_user(&params, optval, len)) {
+		retval = -EFAULT;
+		goto out;
+	}
+
+	policy = params.sprstat_policy;
+	if (policy & ~SCTP_PR_SCTP_MASK)
+		goto out;
+
+	asoc = sctp_id2assoc(sk, params.sprstat_assoc_id);
+	if (!asoc)
+		goto out;
+
+	if (policy == SCTP_PR_SCTP_NONE) {
+		params.sprstat_abandoned_unsent = 0;
+		params.sprstat_abandoned_sent = 0;
+		for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) {
+			params.sprstat_abandoned_unsent +=
+				asoc->abandoned_unsent[policy];
+			params.sprstat_abandoned_sent +=
+				asoc->abandoned_sent[policy];
+		}
+	} else {
+		params.sprstat_abandoned_unsent =
+			asoc->abandoned_unsent[__SCTP_PR_INDEX(policy)];
+		params.sprstat_abandoned_sent =
+			asoc->abandoned_sent[__SCTP_PR_INDEX(policy)];
+	}
+
+	if (put_user(len, optlen)) {
+		retval = -EFAULT;
+		goto out;
+	}
+
+	if (copy_to_user(optval, &params, len)) {
+		retval = -EFAULT;
+		goto out;
+	}
+
+	retval = 0;
+
+out:
+	return retval;
+}
+
 static int sctp_getsockopt(struct sock *sk, int level, int optname,
 			   char __user *optval, int __user *optlen)
 {
@@ -6319,6 +6548,17 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
 	case SCTP_RECVNXTINFO:
 		retval = sctp_getsockopt_recvnxtinfo(sk, len, optval, optlen);
 		break;
+	case SCTP_PR_SUPPORTED:
+		retval = sctp_getsockopt_pr_supported(sk, len, optval, optlen);
+		break;
+	case SCTP_DEFAULT_PRINFO:
+		retval = sctp_getsockopt_default_prinfo(sk, len, optval,
+							optlen);
+		break;
+	case SCTP_PR_ASSOC_STATUS:
+		retval = sctp_getsockopt_pr_assocstatus(sk, len, optval,
+							optlen);
+		break;
 	default:
 		retval = -ENOPROTOOPT;
 		break;
@@ -6866,7 +7106,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs)
 
 			if (cmsgs->srinfo->sinfo_flags &
 			    ~(SCTP_UNORDERED | SCTP_ADDR_OVER |
-			      SCTP_SACK_IMMEDIATELY |
+			      SCTP_SACK_IMMEDIATELY | SCTP_PR_SCTP_MASK |
 			      SCTP_ABORT | SCTP_EOF))
 				return -EINVAL;
 			break;
@@ -6890,7 +7130,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs)
 
 			if (cmsgs->sinfo->snd_flags &
 			    ~(SCTP_UNORDERED | SCTP_ADDR_OVER |
-			      SCTP_SACK_IMMEDIATELY |
+			      SCTP_SACK_IMMEDIATELY | SCTP_PR_SCTP_MASK |
 			      SCTP_ABORT | SCTP_EOF))
 				return -EINVAL;
 			break;
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index d1e38308f615..f6219b164b42 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -51,7 +51,7 @@ static void sctp_ulpevent_release_frag_data(struct sctp_ulpevent *event);
 
 /* Initialize an ULP event from an given skb.  */
 static void sctp_ulpevent_init(struct sctp_ulpevent *event,
-			       int msg_flags,
+			       __u16 msg_flags,
 			       unsigned int len)
 {
 	memset(event, 0, sizeof(struct sctp_ulpevent));
@@ -60,7 +60,7 @@ static void sctp_ulpevent_init(struct sctp_ulpevent *event,
 }
 
 /* Create a new sctp_ulpevent.  */
-static struct sctp_ulpevent *sctp_ulpevent_new(int size, int msg_flags,
+static struct sctp_ulpevent *sctp_ulpevent_new(int size, __u16 msg_flags,
 					       gfp_t gfp)
 {
 	struct sctp_ulpevent *event;
@@ -701,6 +701,12 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc,
 
 	sctp_ulpevent_receive_data(event, asoc);
 
+	/* And hold the chunk as we need it for getting the IP headers
+	 * later in recvmsg
+	 */
+	sctp_chunk_hold(chunk);
+	event->chunk = chunk;
+
 	event->stream = ntohs(chunk->subh.data_hdr->stream);
 	event->ssn = ntohs(chunk->subh.data_hdr->ssn);
 	event->ppid = chunk->subh.data_hdr->ppid;
@@ -710,11 +716,11 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc,
 	}
 	event->tsn = ntohl(chunk->subh.data_hdr->tsn);
 	event->msg_flags |= chunk->chunk_hdr->flags;
-	event->iif = sctp_chunk_iif(chunk);
 
 	return event;
 
 fail_mark:
+	sctp_chunk_put(chunk);
 	kfree_skb(skb);
 fail:
 	return NULL;
@@ -1007,6 +1013,7 @@ static void sctp_ulpevent_release_data(struct sctp_ulpevent *event)
 
 done:
 	sctp_assoc_rwnd_increase(event->asoc, len);
+	sctp_chunk_put(event->chunk);
 	sctp_ulpevent_release_owner(event);
 }
 
@@ -1029,6 +1036,7 @@ static void sctp_ulpevent_release_frag_data(struct sctp_ulpevent *event)
 	}
 
 done:
+	sctp_chunk_put(event->chunk);
 	sctp_ulpevent_release_owner(event);
 }
 
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 06b4df9faaa1..2808d550d273 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -446,16 +446,27 @@ out_no_rpciod:
 	return ERR_PTR(err);
 }
 
-struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
+static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
 					struct rpc_xprt *xprt)
 {
 	struct rpc_clnt *clnt = NULL;
 	struct rpc_xprt_switch *xps;
 
-	xps = xprt_switch_alloc(xprt, GFP_KERNEL);
-	if (xps == NULL)
-		return ERR_PTR(-ENOMEM);
-
+	if (args->bc_xprt && args->bc_xprt->xpt_bc_xps) {
+		WARN_ON(args->protocol != XPRT_TRANSPORT_BC_TCP);
+		xps = args->bc_xprt->xpt_bc_xps;
+		xprt_switch_get(xps);
+	} else {
+		xps = xprt_switch_alloc(xprt, GFP_KERNEL);
+		if (xps == NULL) {
+			xprt_put(xprt);
+			return ERR_PTR(-ENOMEM);
+		}
+		if (xprt->bc_xprt) {
+			xprt_switch_get(xps);
+			xprt->bc_xprt->xpt_bc_xps = xps;
+		}
+	}
 	clnt = rpc_new_client(args, xps, xprt, NULL);
 	if (IS_ERR(clnt))
 		return clnt;
@@ -483,7 +494,6 @@ struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
 
 	return clnt;
 }
-EXPORT_SYMBOL_GPL(rpc_create_xprt);
 
 /**
  * rpc_create - create an RPC client and transport with one call
@@ -509,6 +519,15 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
 	};
 	char servername[48];
 
+	if (args->bc_xprt) {
+		WARN_ON(args->protocol != XPRT_TRANSPORT_BC_TCP);
+		xprt = args->bc_xprt->xpt_bc_xprt;
+		if (xprt) {
+			xprt_get(xprt);
+			return rpc_create_xprt(args, xprt);
+		}
+	}
+
 	if (args->flags & RPC_CLNT_CREATE_INFINITE_SLOTS)
 		xprtargs.flags |= XPRT_CREATE_INFINITE_SLOTS;
 	if (args->flags & RPC_CLNT_CREATE_NO_IDLE_TIMEOUT)
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index f5572e31d518..4f01f63102ee 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -136,6 +136,8 @@ static void svc_xprt_free(struct kref *kref)
 	/* See comment on corresponding get in xs_setup_bc_tcp(): */
 	if (xprt->xpt_bc_xprt)
 		xprt_put(xprt->xpt_bc_xprt);
+	if (xprt->xpt_bc_xps)
+		xprt_switch_put(xprt->xpt_bc_xps);
 	xprt->xpt_ops->xpo_free(xprt);
 	module_put(owner);
 }
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 2d3e0c42361e..7e2b2fa189c3 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -3057,6 +3057,7 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
 		return xprt;
 
 	args->bc_xprt->xpt_bc_xprt = NULL;
+	args->bc_xprt->xpt_bc_xps = NULL;
 	xprt_put(xprt);
 	ret = ERR_PTR(-EINVAL);
 out_err:
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 59658b2e9cdf..a5fc9dd24aa9 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -1286,8 +1286,8 @@ void switchdev_fib_ipv4_abort(struct fib_info *fi)
 }
 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort);
 
-static bool switchdev_port_same_parent_id(struct net_device *a,
-					  struct net_device *b)
+bool switchdev_port_same_parent_id(struct net_device *a,
+				   struct net_device *b)
 {
 	struct switchdev_attr a_attr = {
 		.orig_dev = a,
@@ -1323,6 +1323,7 @@ static u32 switchdev_port_fwd_mark_get(struct net_device *dev,
 
 	return dev->ifindex;
 }
+EXPORT_SYMBOL_GPL(switchdev_port_same_parent_id);
 
 static void switchdev_port_fwd_mark_reset(struct net_device *group_dev,
 					  u32 old_mark, u32 *reset_mark)
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 9a70e1d744d2..8584cc48654c 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -411,7 +411,7 @@ int tipc_l2_send_msg(struct net *net, struct sk_buff *skb,
 		return 0;
 
 	/* Send RESET message even if bearer is detached from device */
-	tipc_ptr = rtnl_dereference(dev->tipc_ptr);
+	tipc_ptr = rcu_dereference_rtnl(dev->tipc_ptr);
 	if (unlikely(!tipc_ptr && !msg_is_reset(buf_msg(skb))))
 		goto drop;
 
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index ad9d477cc242..6b109a808d4c 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -135,9 +135,12 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb,
 	u16 caps = msg_node_capabilities(hdr);
 	bool respond = false;
 	bool dupl_addr = false;
+	int err;
 
-	bearer->media->msg2addr(bearer, &maddr, msg_media_addr(hdr));
+	err = bearer->media->msg2addr(bearer, &maddr, msg_media_addr(hdr));
 	kfree_skb(skb);
+	if (err)
+		return;
 
 	/* Ensure message from node is valid and communication is permitted */
 	if (net_id != tn->net_id)
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 03f8bdf70d8f..c1df33f878b2 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -705,7 +705,8 @@ static void link_profile_stats(struct tipc_link *l)
  */
 int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq)
 {
-	int mtyp, rc = 0;
+	int mtyp = 0;
+	int rc = 0;
 	bool state = false;
 	bool probe = false;
 	bool setup = false;
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 8740930f0787..17201aa8423d 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -41,6 +41,8 @@
 #include "name_table.h"
 
 #define MAX_FORWARD_SIZE 1024
+#define BUF_HEADROOM (LL_MAX_HEADER + 48)
+#define BUF_TAILROOM 16
 
 static unsigned int align(unsigned int i)
 {
@@ -505,6 +507,10 @@ bool tipc_msg_reverse(u32 own_node,  struct sk_buff **skb, int err)
 		msg_set_hdr_sz(hdr, BASIC_H_SIZE);
 	}
 
+	if (skb_cloned(_skb) &&
+	    pskb_expand_head(_skb, BUF_HEADROOM, BUF_TAILROOM, GFP_KERNEL))
+		goto exit;
+
 	/* Now reverse the concerned fields */
 	msg_set_errcode(hdr, err);
 	msg_set_origport(hdr, msg_destport(&ohdr));
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 024da8af91f0..7cf52fb39bee 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -94,17 +94,6 @@ struct plist;
 
 #define TIPC_MEDIA_INFO_OFFSET	5
 
-/**
- * TIPC message buffer code
- *
- * TIPC message buffer headroom reserves space for the worst-case
- * link-level device header (in case the message is sent off-node).
- *
- * Note: Headroom should be a multiple of 4 to ensure the TIPC header fields
- *       are word aligned for quicker access
- */
-#define BUF_HEADROOM (LL_MAX_HEADER + 48)
-
 struct tipc_skb_cb {
 	void *handle;
 	struct sk_buff *tail;
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 3ad9fab1985f..1fd464764765 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -604,7 +604,7 @@ static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg,
 
 	link_info.dest = nla_get_flag(link[TIPC_NLA_LINK_DEST]);
 	link_info.up = htonl(nla_get_flag(link[TIPC_NLA_LINK_UP]));
-	nla_strlcpy(link_info.str, nla_data(link[TIPC_NLA_LINK_NAME]),
+	nla_strlcpy(link_info.str, link[TIPC_NLA_LINK_NAME],
 		    TIPC_MAX_LINK_NAME);
 
 	return tipc_add_tlv(msg->rep, TIPC_TLV_LINK_INFO,
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 88bfcd707064..c49b8df438cb 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -796,9 +796,11 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
  * @tsk: receiving socket
  * @skb: pointer to message buffer.
  */
-static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb)
+static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
+			      struct sk_buff_head *xmitq)
 {
 	struct sock *sk = &tsk->sk;
+	u32 onode = tsk_own_node(tsk);
 	struct tipc_msg *hdr = buf_msg(skb);
 	int mtyp = msg_type(hdr);
 	bool conn_cong;
@@ -811,7 +813,8 @@ static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb)
 
 	if (mtyp == CONN_PROBE) {
 		msg_set_type(hdr, CONN_PROBE_REPLY);
-		tipc_sk_respond(sk, skb, TIPC_OK);
+		if (tipc_msg_reverse(onode, &skb, TIPC_OK))
+			__skb_queue_tail(xmitq, skb);
 		return;
 	} else if (mtyp == CONN_ACK) {
 		conn_cong = tsk_conn_cong(tsk);
@@ -1686,7 +1689,8 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb)
  *
  * Returns true if message was added to socket receive queue, otherwise false
  */
-static bool filter_rcv(struct sock *sk, struct sk_buff *skb)
+static bool filter_rcv(struct sock *sk, struct sk_buff *skb,
+		       struct sk_buff_head *xmitq)
 {
 	struct socket *sock = sk->sk_socket;
 	struct tipc_sock *tsk = tipc_sk(sk);
@@ -1696,7 +1700,7 @@ static bool filter_rcv(struct sock *sk, struct sk_buff *skb)
 	int usr = msg_user(hdr);
 
 	if (unlikely(msg_user(hdr) == CONN_MANAGER)) {
-		tipc_sk_proto_rcv(tsk, skb);
+		tipc_sk_proto_rcv(tsk, skb, xmitq);
 		return false;
 	}
 
@@ -1739,7 +1743,8 @@ static bool filter_rcv(struct sock *sk, struct sk_buff *skb)
 	return true;
 
 reject:
-	tipc_sk_respond(sk, skb, err);
+	if (tipc_msg_reverse(tsk_own_node(tsk), &skb, err))
+		__skb_queue_tail(xmitq, skb);
 	return false;
 }
 
@@ -1755,9 +1760,24 @@ reject:
 static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb)
 {
 	unsigned int truesize = skb->truesize;
+	struct sk_buff_head xmitq;
+	u32 dnode, selector;
 
-	if (likely(filter_rcv(sk, skb)))
+	__skb_queue_head_init(&xmitq);
+
+	if (likely(filter_rcv(sk, skb, &xmitq))) {
 		atomic_add(truesize, &tipc_sk(sk)->dupl_rcvcnt);
+		return 0;
+	}
+
+	if (skb_queue_empty(&xmitq))
+		return 0;
+
+	/* Send response/rejected message */
+	skb = __skb_dequeue(&xmitq);
+	dnode = msg_destnode(buf_msg(skb));
+	selector = msg_origport(buf_msg(skb));
+	tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector);
 	return 0;
 }
 
@@ -1771,12 +1791,13 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb)
  * Caller must hold socket lock
  */
 static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
-			    u32 dport)
+			    u32 dport, struct sk_buff_head *xmitq)
 {
+	unsigned long time_limit = jiffies + 2;
+	struct sk_buff *skb;
 	unsigned int lim;
 	atomic_t *dcnt;
-	struct sk_buff *skb;
-	unsigned long time_limit = jiffies + 2;
+	u32 onode;
 
 	while (skb_queue_len(inputq)) {
 		if (unlikely(time_after_eq(jiffies, time_limit)))
@@ -1788,7 +1809,7 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
 
 		/* Add message directly to receive queue if possible */
 		if (!sock_owned_by_user(sk)) {
-			filter_rcv(sk, skb);
+			filter_rcv(sk, skb, xmitq);
 			continue;
 		}
 
@@ -1801,7 +1822,9 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
 			continue;
 
 		/* Overload => reject message back to sender */
-		tipc_sk_respond(sk, skb, TIPC_ERR_OVERLOAD);
+		onode = tipc_own_addr(sock_net(sk));
+		if (tipc_msg_reverse(onode, &skb, TIPC_ERR_OVERLOAD))
+			__skb_queue_tail(xmitq, skb);
 		break;
 	}
 }
@@ -1814,12 +1837,14 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
  */
 void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq)
 {
+	struct sk_buff_head xmitq;
 	u32 dnode, dport = 0;
 	int err;
 	struct tipc_sock *tsk;
 	struct sock *sk;
 	struct sk_buff *skb;
 
+	__skb_queue_head_init(&xmitq);
 	while (skb_queue_len(inputq)) {
 		dport = tipc_skb_peek_port(inputq, dport);
 		tsk = tipc_sk_lookup(net, dport);
@@ -1827,9 +1852,14 @@ void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq)
 		if (likely(tsk)) {
 			sk = &tsk->sk;
 			if (likely(spin_trylock_bh(&sk->sk_lock.slock))) {
-				tipc_sk_enqueue(inputq, sk, dport);
+				tipc_sk_enqueue(inputq, sk, dport, &xmitq);
 				spin_unlock_bh(&sk->sk_lock.slock);
 			}
+			/* Send pending response/rejected messages, if any */
+			while ((skb = __skb_dequeue(&xmitq))) {
+				dnode = msg_destnode(buf_msg(skb));
+				tipc_node_xmit_skb(net, skb, dnode, dport);
+			}
 			sock_put(sk);
 			continue;
 		}
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index c9cf2be3674a..b016c011970b 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -63,7 +63,7 @@
  */
 struct udp_media_addr {
 	__be16	proto;
-	__be16	udp_port;
+	__be16	port;
 	union {
 		struct in_addr ipv4;
 		struct in6_addr ipv6;
@@ -108,9 +108,9 @@ static int tipc_udp_addr2str(struct tipc_media_addr *a, char *buf, int size)
 	struct udp_media_addr *ua = (struct udp_media_addr *)&a->value;
 
 	if (ntohs(ua->proto) == ETH_P_IP)
-		snprintf(buf, size, "%pI4:%u", &ua->ipv4, ntohs(ua->udp_port));
+		snprintf(buf, size, "%pI4:%u", &ua->ipv4, ntohs(ua->port));
 	else if (ntohs(ua->proto) == ETH_P_IPV6)
-		snprintf(buf, size, "%pI6:%u", &ua->ipv6, ntohs(ua->udp_port));
+		snprintf(buf, size, "%pI6:%u", &ua->ipv6, ntohs(ua->port));
 	else
 		pr_err("Invalid UDP media address\n");
 	return 0;
@@ -178,8 +178,8 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
 		skb->dev = rt->dst.dev;
 		ttl = ip4_dst_hoplimit(&rt->dst);
 		udp_tunnel_xmit_skb(rt, ub->ubsock->sk, skb, src->ipv4.s_addr,
-				    dst->ipv4.s_addr, 0, ttl, 0, src->udp_port,
-				    dst->udp_port, false, true);
+				    dst->ipv4.s_addr, 0, ttl, 0, src->port,
+				    dst->port, false, true);
 #if IS_ENABLED(CONFIG_IPV6)
 	} else {
 		struct dst_entry *ndst;
@@ -196,8 +196,8 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
 		ttl = ip6_dst_hoplimit(ndst);
 		err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb,
 					   ndst->dev, &src->ipv6,
-					   &dst->ipv6, 0, ttl, 0, src->udp_port,
-					   dst->udp_port, false);
+					   &dst->ipv6, 0, ttl, 0, src->port,
+					   dst->port, false);
 #endif
 	}
 	return err;
@@ -292,12 +292,12 @@ err:
 
 		ip4 = (struct sockaddr_in *)&sa_local;
 		local->proto = htons(ETH_P_IP);
-		local->udp_port = ip4->sin_port;
+		local->port = ip4->sin_port;
 		local->ipv4.s_addr = ip4->sin_addr.s_addr;
 
 		ip4 = (struct sockaddr_in *)&sa_remote;
 		remote->proto = htons(ETH_P_IP);
-		remote->udp_port = ip4->sin_port;
+		remote->port = ip4->sin_port;
 		remote->ipv4.s_addr = ip4->sin_addr.s_addr;
 		return 0;
 
@@ -312,13 +312,13 @@ err:
 			return -EINVAL;
 
 		local->proto = htons(ETH_P_IPV6);
-		local->udp_port = ip6->sin6_port;
+		local->port = ip6->sin6_port;
 		memcpy(&local->ipv6, &ip6->sin6_addr, sizeof(struct in6_addr));
 		ub->ifindex = ip6->sin6_scope_id;
 
 		ip6 = (struct sockaddr_in6 *)&sa_remote;
 		remote->proto = htons(ETH_P_IPV6);
-		remote->udp_port = ip6->sin6_port;
+		remote->port = ip6->sin6_port;
 		memcpy(&remote->ipv6, &ip6->sin6_addr, sizeof(struct in6_addr));
 		return 0;
 #endif
@@ -386,7 +386,7 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
 		err = -EAFNOSUPPORT;
 		goto err;
 	}
-	udp_conf.local_udp_port = local.udp_port;
+	udp_conf.local_udp_port = local.port;
 	err = udp_sock_create(net, &udp_conf, &ub->ubsock);
 	if (err)
 		goto err;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 80aa6a3e6817..735362c26c8e 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -315,7 +315,7 @@ static struct sock *unix_find_socket_byinode(struct inode *i)
 		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 		struct dentry *dentry = unix_sk(s)->path.dentry;
 
-		if (dentry && d_backing_inode(dentry) == i) {
+		if (dentry && d_real_inode(dentry) == i) {
 			sock_hold(s);
 			goto found;
 		}
@@ -911,7 +911,7 @@ static struct sock *unix_find_other(struct net *net,
 		err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
 		if (err)
 			goto fail;
-		inode = d_backing_inode(path.dentry);
+		inode = d_real_inode(path.dentry);
 		err = inode_permission(inode, MAY_WRITE);
 		if (err)
 			goto put_fail;
@@ -1048,7 +1048,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 			goto out_up;
 		}
 		addr->hash = UNIX_HASH_SIZE;
-		hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1);
+		hash = d_real_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1);
 		spin_lock(&unix_table_lock);
 		u->path = u_path;
 		list = &unix_socket_table[hash];
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index b5f1221f48d4..b96ac918e0ba 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -61,6 +61,14 @@
  * function will also cleanup rejected sockets, those that reach the connected
  * state but leave it before they have been accepted.
  *
+ * - Lock ordering for pending or accept queue sockets is:
+ *
+ *     lock_sock(listener);
+ *     lock_sock_nested(pending, SINGLE_DEPTH_NESTING);
+ *
+ * Using explicit nested locking keeps lockdep happy since normally only one
+ * lock of a given class may be taken at a time.
+ *
  * - Sockets created by user action will be cleaned up when the user process
  * calls close(2), causing our release implementation to be called. Our release
  * implementation will perform some cleanup then drop the last reference so our
@@ -443,7 +451,7 @@ void vsock_pending_work(struct work_struct *work)
 	cleanup = true;
 
 	lock_sock(listener);
-	lock_sock(sk);
+	lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
 
 	if (vsock_is_pending(sk)) {
 		vsock_remove_pending(listener, sk);
@@ -1292,7 +1300,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags)
 	if (connected) {
 		listener->sk_ack_backlog--;
 
-		lock_sock(connected);
+		lock_sock_nested(connected, SINGLE_DEPTH_NESTING);
 		vconnected = vsock_sk(connected);
 
 		/* If the listener socket has received an error, then we should
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 39d9abd309ea..7645e97362c0 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -220,7 +220,7 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev,
 
 	if (rdev->scan_req && rdev->scan_req->wdev == wdev) {
 		if (WARN_ON(!rdev->scan_req->notified))
-			rdev->scan_req->aborted = true;
+			rdev->scan_req->info.aborted = true;
 		___cfg80211_scan_done(rdev, false);
 	}
 }
@@ -1087,7 +1087,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
 		cfg80211_update_iface_num(rdev, wdev->iftype, -1);
 		if (rdev->scan_req && rdev->scan_req->wdev == wdev) {
 			if (WARN_ON(!rdev->scan_req->notified))
-				rdev->scan_req->aborted = true;
+				rdev->scan_req->info.aborted = true;
 			___cfg80211_scan_done(rdev, false);
 		}
 
diff --git a/net/wireless/core.h b/net/wireless/core.h
index a4d547f99f8d..eee91443924d 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -141,6 +141,18 @@ struct cfg80211_internal_bss {
 	unsigned long refcount;
 	atomic_t hold;
 
+	/* time at the start of the reception of the first octet of the
+	 * timestamp field of the last beacon/probe received for this BSS.
+	 * The time is the TSF of the BSS specified by %parent_bssid.
+	 */
+	u64 parent_tsf;
+
+	/* the BSS according to which %parent_tsf is set. This is set to
+	 * the BSS that the interface that requested the scan was connected to
+	 * when the beacon/probe was received.
+	 */
+	u8 parent_bssid[ETH_ALEN] __aligned(2);
+
 	/* must be last because of priv member */
 	struct cfg80211_bss pub;
 };
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index c503e96bfd5a..5782f718d567 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -405,6 +405,10 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[NL80211_ATTR_PBSS] = { .type = NLA_FLAG },
 	[NL80211_ATTR_BSS_SELECT] = { .type = NLA_NESTED },
 	[NL80211_ATTR_STA_SUPPORT_P2P_PS] = { .type = NLA_U8 },
+	[NL80211_ATTR_MU_MIMO_GROUP_DATA] = {
+		.len = VHT_MUMIMO_GROUPS_DATA_LEN
+	},
+	[NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR] = { .len = ETH_ALEN },
 };
 
 /* policy for the key attributes */
@@ -2695,6 +2699,38 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
 		change = true;
 	}
 
+	if (info->attrs[NL80211_ATTR_MU_MIMO_GROUP_DATA]) {
+		const u8 *mumimo_groups;
+		u32 cap_flag = NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER;
+
+		if (!wiphy_ext_feature_isset(&rdev->wiphy, cap_flag))
+			return -EOPNOTSUPP;
+
+		mumimo_groups =
+			nla_data(info->attrs[NL80211_ATTR_MU_MIMO_GROUP_DATA]);
+
+		/* bits 0 and 63 are reserved and must be zero */
+		if ((mumimo_groups[0] & BIT(7)) ||
+		    (mumimo_groups[VHT_MUMIMO_GROUPS_DATA_LEN - 1] & BIT(0)))
+			return -EINVAL;
+
+		memcpy(params.vht_mumimo_groups, mumimo_groups,
+		       VHT_MUMIMO_GROUPS_DATA_LEN);
+		change = true;
+	}
+
+	if (info->attrs[NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR]) {
+		u32 cap_flag = NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER;
+
+		if (!wiphy_ext_feature_isset(&rdev->wiphy, cap_flag))
+			return -EOPNOTSUPP;
+
+		nla_memcpy(params.macaddr,
+			   info->attrs[NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR],
+			   ETH_ALEN);
+		change = true;
+	}
+
 	if (flags && (*flags & MONITOR_FLAG_ACTIVE) &&
 	    !(rdev->wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR))
 		return -EOPNOTSUPP;
@@ -4410,6 +4446,12 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
 			nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_STATE]);
 		if (params.plink_state >= NUM_NL80211_PLINK_STATES)
 			return -EINVAL;
+		if (info->attrs[NL80211_ATTR_MESH_PEER_AID]) {
+			params.peer_aid = nla_get_u16(
+				info->attrs[NL80211_ATTR_MESH_PEER_AID]);
+			if (params.peer_aid > IEEE80211_MAX_AID)
+				return -EINVAL;
+		}
 		params.sta_modify_mask |= STATION_PARAM_APPLY_PLINK_STATE;
 	}
 
@@ -5287,6 +5329,51 @@ static const struct nla_policy
 	[NL80211_MESH_SETUP_USERSPACE_AMPE] = { .type = NLA_FLAG },
 };
 
+static int nl80211_check_bool(const struct nlattr *nla, u8 min, u8 max, bool *out)
+{
+	u8 val = nla_get_u8(nla);
+	if (val < min || val > max)
+		return -EINVAL;
+	*out = val;
+	return 0;
+}
+
+static int nl80211_check_u8(const struct nlattr *nla, u8 min, u8 max, u8 *out)
+{
+	u8 val = nla_get_u8(nla);
+	if (val < min || val > max)
+		return -EINVAL;
+	*out = val;
+	return 0;
+}
+
+static int nl80211_check_u16(const struct nlattr *nla, u16 min, u16 max, u16 *out)
+{
+	u16 val = nla_get_u16(nla);
+	if (val < min || val > max)
+		return -EINVAL;
+	*out = val;
+	return 0;
+}
+
+static int nl80211_check_u32(const struct nlattr *nla, u32 min, u32 max, u32 *out)
+{
+	u32 val = nla_get_u32(nla);
+	if (val < min || val > max)
+		return -EINVAL;
+	*out = val;
+	return 0;
+}
+
+static int nl80211_check_s32(const struct nlattr *nla, s32 min, s32 max, s32 *out)
+{
+	s32 val = nla_get_s32(nla);
+	if (val < min || val > max)
+		return -EINVAL;
+	*out = val;
+	return 0;
+}
+
 static int nl80211_parse_mesh_config(struct genl_info *info,
 				     struct mesh_config *cfg,
 				     u32 *mask_out)
@@ -5297,9 +5384,8 @@ static int nl80211_parse_mesh_config(struct genl_info *info,
 #define FILL_IN_MESH_PARAM_IF_SET(tb, cfg, param, min, max, mask, attr, fn) \
 do {									    \
 	if (tb[attr]) {							    \
-		if (fn(tb[attr]) < min || fn(tb[attr]) > max)		    \
+		if (fn(tb[attr], min, max, &cfg->param))		    \
 			return -EINVAL;					    \
-		cfg->param = fn(tb[attr]);				    \
 		mask |= (1 << (attr - 1));				    \
 	}								    \
 } while (0)
@@ -5318,99 +5404,99 @@ do {									    \
 	/* Fill in the params struct */
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshRetryTimeout, 1, 255,
 				  mask, NL80211_MESHCONF_RETRY_TIMEOUT,
-				  nla_get_u16);
+				  nl80211_check_u16);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshConfirmTimeout, 1, 255,
 				  mask, NL80211_MESHCONF_CONFIRM_TIMEOUT,
-				  nla_get_u16);
+				  nl80211_check_u16);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHoldingTimeout, 1, 255,
 				  mask, NL80211_MESHCONF_HOLDING_TIMEOUT,
-				  nla_get_u16);
+				  nl80211_check_u16);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshMaxPeerLinks, 0, 255,
 				  mask, NL80211_MESHCONF_MAX_PEER_LINKS,
-				  nla_get_u16);
+				  nl80211_check_u16);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshMaxRetries, 0, 16,
 				  mask, NL80211_MESHCONF_MAX_RETRIES,
-				  nla_get_u8);
+				  nl80211_check_u8);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshTTL, 1, 255,
-				  mask, NL80211_MESHCONF_TTL, nla_get_u8);
+				  mask, NL80211_MESHCONF_TTL, nl80211_check_u8);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, element_ttl, 1, 255,
 				  mask, NL80211_MESHCONF_ELEMENT_TTL,
-				  nla_get_u8);
+				  nl80211_check_u8);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, auto_open_plinks, 0, 1,
 				  mask, NL80211_MESHCONF_AUTO_OPEN_PLINKS,
-				  nla_get_u8);
+				  nl80211_check_bool);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshNbrOffsetMaxNeighbor,
 				  1, 255, mask,
 				  NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR,
-				  nla_get_u32);
+				  nl80211_check_u32);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPmaxPREQretries, 0, 255,
 				  mask, NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES,
-				  nla_get_u8);
+				  nl80211_check_u8);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, path_refresh_time, 1, 65535,
 				  mask, NL80211_MESHCONF_PATH_REFRESH_TIME,
-				  nla_get_u32);
+				  nl80211_check_u32);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, min_discovery_timeout, 1, 65535,
 				  mask, NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT,
-				  nla_get_u16);
+				  nl80211_check_u16);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathTimeout,
 				  1, 65535, mask,
 				  NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT,
-				  nla_get_u32);
+				  nl80211_check_u32);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPpreqMinInterval,
 				  1, 65535, mask,
 				  NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL,
-				  nla_get_u16);
+				  nl80211_check_u16);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPperrMinInterval,
 				  1, 65535, mask,
 				  NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL,
-				  nla_get_u16);
+				  nl80211_check_u16);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg,
 				  dot11MeshHWMPnetDiameterTraversalTime,
 				  1, 65535, mask,
 				  NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME,
-				  nla_get_u16);
+				  nl80211_check_u16);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPRootMode, 0, 4,
 				  mask, NL80211_MESHCONF_HWMP_ROOTMODE,
-				  nla_get_u8);
+				  nl80211_check_u8);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPRannInterval, 1, 65535,
 				  mask, NL80211_MESHCONF_HWMP_RANN_INTERVAL,
-				  nla_get_u16);
+				  nl80211_check_u16);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg,
 				  dot11MeshGateAnnouncementProtocol, 0, 1,
 				  mask, NL80211_MESHCONF_GATE_ANNOUNCEMENTS,
-				  nla_get_u8);
+				  nl80211_check_bool);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshForwarding, 0, 1,
 				  mask, NL80211_MESHCONF_FORWARDING,
-				  nla_get_u8);
+				  nl80211_check_bool);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, rssi_threshold, -255, 0,
 				  mask, NL80211_MESHCONF_RSSI_THRESHOLD,
-				  nla_get_s32);
+				  nl80211_check_s32);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, ht_opmode, 0, 16,
 				  mask, NL80211_MESHCONF_HT_OPMODE,
-				  nla_get_u16);
+				  nl80211_check_u16);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathToRootTimeout,
 				  1, 65535, mask,
 				  NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT,
-				  nla_get_u32);
+				  nl80211_check_u32);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMProotInterval, 1, 65535,
 				  mask, NL80211_MESHCONF_HWMP_ROOT_INTERVAL,
-				  nla_get_u16);
+				  nl80211_check_u16);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg,
 				  dot11MeshHWMPconfirmationInterval,
 				  1, 65535, mask,
 				  NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL,
-				  nla_get_u16);
+				  nl80211_check_u16);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, power_mode,
 				  NL80211_MESH_POWER_ACTIVE,
 				  NL80211_MESH_POWER_MAX,
 				  mask, NL80211_MESHCONF_POWER_MODE,
-				  nla_get_u32);
+				  nl80211_check_u32);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshAwakeWindowDuration,
 				  0, 65535, mask,
-				  NL80211_MESHCONF_AWAKE_WINDOW, nla_get_u16);
+				  NL80211_MESHCONF_AWAKE_WINDOW, nl80211_check_u16);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, plink_timeout, 0, 0xffffffff,
 				  mask, NL80211_MESHCONF_PLINK_TIMEOUT,
-				  nla_get_u32);
+				  nl80211_check_u32);
 	if (mask_out)
 		*mask_out = mask;
 
@@ -6143,6 +6229,19 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
 		}
 	}
 
+	if (info->attrs[NL80211_ATTR_MEASUREMENT_DURATION]) {
+		if (!wiphy_ext_feature_isset(wiphy,
+					NL80211_EXT_FEATURE_SET_SCAN_DWELL)) {
+			err = -EOPNOTSUPP;
+			goto out_free;
+		}
+
+		request->duration =
+			nla_get_u16(info->attrs[NL80211_ATTR_MEASUREMENT_DURATION]);
+		request->duration_mandatory =
+			nla_get_flag(info->attrs[NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY]);
+	}
+
 	if (info->attrs[NL80211_ATTR_SCAN_FLAGS]) {
 		request->flags = nla_get_u32(
 			info->attrs[NL80211_ATTR_SCAN_FLAGS]);
@@ -6976,6 +7075,13 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
 			jiffies_to_msecs(jiffies - intbss->ts)))
 		goto nla_put_failure;
 
+	if (intbss->parent_tsf &&
+	    (nla_put_u64_64bit(msg, NL80211_BSS_PARENT_TSF,
+			       intbss->parent_tsf, NL80211_BSS_PAD) ||
+	     nla_put(msg, NL80211_BSS_PARENT_BSSID, ETH_ALEN,
+		     intbss->parent_bssid)))
+		goto nla_put_failure;
+
 	if (intbss->ts_boottime &&
 	    nla_put_u64_64bit(msg, NL80211_BSS_LAST_SEEN_BOOTTIME,
 			      intbss->ts_boottime, NL80211_BSS_PAD))
@@ -11749,6 +11855,13 @@ static int nl80211_add_scan_req(struct sk_buff *msg,
 	    nla_put_u32(msg, NL80211_ATTR_SCAN_FLAGS, req->flags))
 		goto nla_put_failure;
 
+	if (req->info.scan_start_tsf &&
+	    (nla_put_u64_64bit(msg, NL80211_ATTR_SCAN_START_TIME_TSF,
+			       req->info.scan_start_tsf, NL80211_BSS_PAD) ||
+	     nla_put(msg, NL80211_ATTR_SCAN_START_TIME_TSF_BSSID, ETH_ALEN,
+		     req->info.tsf_bssid)))
+		goto nla_put_failure;
+
 	return 0;
  nla_put_failure:
 	return -ENOBUFS;
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index ef2955c89a00..0358e12be54b 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -3,6 +3,7 @@
  *
  * Copyright 2008 Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
+ * Copyright 2016	Intel Deutschland GmbH
  */
 #include <linux/kernel.h>
 #include <linux/slab.h>
@@ -194,7 +195,7 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev,
 	if (wdev->netdev)
 		cfg80211_sme_scan_done(wdev->netdev);
 
-	if (!request->aborted &&
+	if (!request->info.aborted &&
 	    request->flags & NL80211_SCAN_FLAG_FLUSH) {
 		/* flush entries from previous scans */
 		spin_lock_bh(&rdev->bss_lock);
@@ -202,10 +203,10 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev,
 		spin_unlock_bh(&rdev->bss_lock);
 	}
 
-	msg = nl80211_build_scan_msg(rdev, wdev, request->aborted);
+	msg = nl80211_build_scan_msg(rdev, wdev, request->info.aborted);
 
 #ifdef CONFIG_CFG80211_WEXT
-	if (wdev->netdev && !request->aborted) {
+	if (wdev->netdev && !request->info.aborted) {
 		memset(&wrqu, 0, sizeof(wrqu));
 
 		wireless_send_event(wdev->netdev, SIOCGIWSCAN, &wrqu, NULL);
@@ -236,12 +237,13 @@ void __cfg80211_scan_done(struct work_struct *wk)
 	rtnl_unlock();
 }
 
-void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted)
+void cfg80211_scan_done(struct cfg80211_scan_request *request,
+			struct cfg80211_scan_info *info)
 {
-	trace_cfg80211_scan_done(request, aborted);
+	trace_cfg80211_scan_done(request, info);
 	WARN_ON(request != wiphy_to_rdev(request->wiphy)->scan_req);
 
-	request->aborted = aborted;
+	request->info = *info;
 	request->notified = true;
 	queue_work(cfg80211_wq, &wiphy_to_rdev(request->wiphy)->scan_done_wk);
 }
@@ -843,6 +845,8 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev,
 		found->pub.capability = tmp->pub.capability;
 		found->ts = tmp->ts;
 		found->ts_boottime = tmp->ts_boottime;
+		found->parent_tsf = tmp->parent_tsf;
+		ether_addr_copy(found->parent_bssid, tmp->parent_bssid);
 	} else {
 		struct cfg80211_internal_bss *new;
 		struct cfg80211_internal_bss *hidden;
@@ -1086,6 +1090,8 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy,
 	tmp.pub.beacon_interval = le16_to_cpu(mgmt->u.probe_resp.beacon_int);
 	tmp.pub.capability = le16_to_cpu(mgmt->u.probe_resp.capab_info);
 	tmp.ts_boottime = data->boottime_ns;
+	tmp.parent_tsf = data->parent_tsf;
+	ether_addr_copy(tmp.parent_bssid, data->parent_bssid);
 
 	signal_valid = abs(data->chan->center_freq - channel->center_freq) <=
 		wiphy->max_adj_channel_rssi_comp;
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 3c1091ae6c36..72b5255cefe2 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -2642,8 +2642,9 @@ TRACE_EVENT(cfg80211_tdls_oper_request,
 	);
 
 TRACE_EVENT(cfg80211_scan_done,
-	TP_PROTO(struct cfg80211_scan_request *request, bool aborted),
-	TP_ARGS(request, aborted),
+	TP_PROTO(struct cfg80211_scan_request *request,
+		 struct cfg80211_scan_info *info),
+	TP_ARGS(request, info),
 	TP_STRUCT__entry(
 		__field(u32, n_channels)
 		__dynamic_array(u8, ie, request ? request->ie_len : 0)
@@ -2652,6 +2653,8 @@ TRACE_EVENT(cfg80211_scan_done,
 		MAC_ENTRY(wiphy_mac)
 		__field(bool, no_cck)
 		__field(bool, aborted)
+		__field(u64, scan_start_tsf)
+		MAC_ENTRY(tsf_bssid)
 	),
 	TP_fast_assign(
 		if (request) {
@@ -2666,9 +2669,16 @@ TRACE_EVENT(cfg80211_scan_done,
 					   request->wiphy->perm_addr);
 			__entry->no_cck = request->no_cck;
 		}
-		__entry->aborted = aborted;
+		if (info) {
+			__entry->aborted = info->aborted;
+			__entry->scan_start_tsf = info->scan_start_tsf;
+			MAC_ASSIGN(tsf_bssid, info->tsf_bssid);
+		}
 	),
-	TP_printk("aborted: %s", BOOL_TO_STR(__entry->aborted))
+	TP_printk("aborted: %s, scan start (TSF): %llu, tsf_bssid: " MAC_PR_FMT,
+		  BOOL_TO_STR(__entry->aborted),
+		  (unsigned long long)__entry->scan_start_tsf,
+		  MAC_PR_ARG(tsf_bssid))
 );
 
 DEFINE_EVENT(wiphy_only_evt, cfg80211_sched_scan_results,
@@ -2721,6 +2731,8 @@ TRACE_EVENT(cfg80211_inform_bss_frame,
 		__dynamic_array(u8, mgmt, len)
 		__field(s32, signal)
 		__field(u64, ts_boottime)
+		__field(u64, parent_tsf)
+		MAC_ENTRY(parent_bssid)
 	),
 	TP_fast_assign(
 		WIPHY_ASSIGN;
@@ -2730,10 +2742,15 @@ TRACE_EVENT(cfg80211_inform_bss_frame,
 			memcpy(__get_dynamic_array(mgmt), mgmt, len);
 		__entry->signal = data->signal;
 		__entry->ts_boottime = data->boottime_ns;
-	),
-	TP_printk(WIPHY_PR_FMT ", " CHAN_PR_FMT "(scan_width: %d) signal: %d, tsb:%llu",
-		  WIPHY_PR_ARG, CHAN_PR_ARG, __entry->scan_width,
-		  __entry->signal, (unsigned long long)__entry->ts_boottime)
+		__entry->parent_tsf = data->parent_tsf;
+		MAC_ASSIGN(parent_bssid, data->parent_bssid);
+	),
+	TP_printk(WIPHY_PR_FMT ", " CHAN_PR_FMT
+		  "(scan_width: %d) signal: %d, tsb:%llu, detect_tsf:%llu, tsf_bssid: "
+		  MAC_PR_FMT, WIPHY_PR_ARG, CHAN_PR_ARG, __entry->scan_width,
+		  __entry->signal, (unsigned long long)__entry->ts_boottime,
+		  (unsigned long long)__entry->parent_tsf,
+		  MAC_PR_ARG(parent_bssid))
 );
 
 DECLARE_EVENT_CLASS(cfg80211_bss_evt,
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 4e809e978b7d..2443ee30ba5b 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -509,7 +509,7 @@ static int __ieee80211_data_to_8023(struct sk_buff *skb, struct ethhdr *ehdr,
 		 * replace EtherType */
 		hdrlen += ETH_ALEN + 2;
 	else
-		tmp.h_proto = htons(skb->len);
+		tmp.h_proto = htons(skb->len - hdrlen);
 
 	pskb_pull(skb, hdrlen);
 
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 0bf2478cb7df..a98b780e974c 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -20,6 +20,7 @@ hostprogs-y += offwaketime
 hostprogs-y += spintest
 hostprogs-y += map_perf_test
 hostprogs-y += test_overhead
+hostprogs-y += test_cgrp2_array_pin
 
 test_verifier-objs := test_verifier.o libbpf.o
 test_maps-objs := test_maps.o libbpf.o
@@ -40,6 +41,7 @@ offwaketime-objs := bpf_load.o libbpf.o offwaketime_user.o
 spintest-objs := bpf_load.o libbpf.o spintest_user.o
 map_perf_test-objs := bpf_load.o libbpf.o map_perf_test_user.o
 test_overhead-objs := bpf_load.o libbpf.o test_overhead_user.o
+test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
@@ -61,6 +63,7 @@ always += map_perf_test_kern.o
 always += test_overhead_tp_kern.o
 always += test_overhead_kprobe_kern.o
 always += parse_varlen.o parse_simple.o parse_ldabs.o
+always += test_cgrp2_tc_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index 7904a2a493de..84e3fd919a06 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -70,6 +70,8 @@ static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flag
 	(void *) BPF_FUNC_l3_csum_replace;
 static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) =
 	(void *) BPF_FUNC_l4_csum_replace;
+static int (*bpf_skb_in_cgroup)(void *ctx, void *map, int index) =
+	(void *) BPF_FUNC_skb_in_cgroup;
 
 #if defined(__x86_64__)
 
diff --git a/samples/bpf/test_cgrp2_array_pin.c b/samples/bpf/test_cgrp2_array_pin.c
new file mode 100644
index 000000000000..70e86f7be69d
--- /dev/null
+++ b/samples/bpf/test_cgrp2_array_pin.c
@@ -0,0 +1,109 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/unistd.h>
+#include <linux/bpf.h>
+
+#include <stdio.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <fcntl.h>
+
+#include "libbpf.h"
+
+static void usage(void)
+{
+	printf("Usage: test_cgrp2_array_pin [...]\n");
+	printf("       -F <file>   File to pin an BPF cgroup array\n");
+	printf("       -U <file>   Update an already pinned BPF cgroup array\n");
+	printf("       -v <value>  Full path of the cgroup2\n");
+	printf("       -h          Display this help\n");
+}
+
+int main(int argc, char **argv)
+{
+	const char *pinned_file = NULL, *cg2 = NULL;
+	int create_array = 1;
+	int array_key = 0;
+	int array_fd = -1;
+	int cg2_fd = -1;
+	int ret = -1;
+	int opt;
+
+	while ((opt = getopt(argc, argv, "F:U:v:")) != -1) {
+		switch (opt) {
+		/* General args */
+		case 'F':
+			pinned_file = optarg;
+			break;
+		case 'U':
+			pinned_file = optarg;
+			create_array = 0;
+			break;
+		case 'v':
+			cg2 = optarg;
+			break;
+		default:
+			usage();
+			goto out;
+		}
+	}
+
+	if (!cg2 || !pinned_file) {
+		usage();
+		goto out;
+	}
+
+	cg2_fd = open(cg2, O_RDONLY);
+	if (cg2_fd < 0) {
+		fprintf(stderr, "open(%s,...): %s(%d)\n",
+			cg2, strerror(errno), errno);
+		goto out;
+	}
+
+	if (create_array) {
+		array_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_ARRAY,
+					  sizeof(uint32_t), sizeof(uint32_t),
+					  1, 0);
+		if (array_fd < 0) {
+			fprintf(stderr,
+				"bpf_create_map(BPF_MAP_TYPE_CGROUP_ARRAY,...): %s(%d)\n",
+				strerror(errno), errno);
+			goto out;
+		}
+	} else {
+		array_fd = bpf_obj_get(pinned_file);
+		if (array_fd < 0) {
+			fprintf(stderr, "bpf_obj_get(%s): %s(%d)\n",
+				pinned_file, strerror(errno), errno);
+			goto out;
+		}
+	}
+
+	ret = bpf_update_elem(array_fd, &array_key, &cg2_fd, 0);
+	if (ret) {
+		perror("bpf_update_elem");
+		goto out;
+	}
+
+	if (create_array) {
+		ret = bpf_obj_pin(array_fd, pinned_file);
+		if (ret) {
+			fprintf(stderr, "bpf_obj_pin(..., %s): %s(%d)\n",
+				pinned_file, strerror(errno), errno);
+			goto out;
+		}
+	}
+
+out:
+	if (array_fd != -1)
+		close(array_fd);
+	if (cg2_fd != -1)
+		close(cg2_fd);
+	return ret;
+}
diff --git a/samples/bpf/test_cgrp2_tc.sh b/samples/bpf/test_cgrp2_tc.sh
new file mode 100755
index 000000000000..0b119eeaf85c
--- /dev/null
+++ b/samples/bpf/test_cgrp2_tc.sh
@@ -0,0 +1,184 @@
+#!/bin/bash
+
+MY_DIR=$(dirname $0)
+# Details on the bpf prog
+BPF_CGRP2_ARRAY_NAME='test_cgrp2_array_pin'
+BPF_PROG="$MY_DIR/test_cgrp2_tc_kern.o"
+BPF_SECTION='filter'
+
+[ -z "$TC" ] && TC='tc'
+[ -z "$IP" ] && IP='ip'
+
+# Names of the veth interface, net namespace...etc.
+HOST_IFC='ve'
+NS_IFC='vens'
+NS='ns'
+
+find_mnt() {
+    cat /proc/mounts | \
+	awk '{ if ($3 == "'$1'" && mnt == "") { mnt = $2 }} END { print mnt }'
+}
+
+# Init cgroup2 vars
+init_cgrp2_vars() {
+    CGRP2_ROOT=$(find_mnt cgroup2)
+    if [ -z "$CGRP2_ROOT" ]
+    then
+	CGRP2_ROOT='/mnt/cgroup2'
+	MOUNT_CGRP2="yes"
+    fi
+    CGRP2_TC="$CGRP2_ROOT/tc"
+    CGRP2_TC_LEAF="$CGRP2_TC/leaf"
+}
+
+# Init bpf fs vars
+init_bpf_fs_vars() {
+    local bpf_fs_root=$(find_mnt bpf)
+    [ -n "$bpf_fs_root" ] || return -1
+    BPF_FS_TC_SHARE="$bpf_fs_root/tc/globals"
+}
+
+setup_cgrp2() {
+    case $1 in
+	start)
+	    if [ "$MOUNT_CGRP2" == 'yes' ]
+	    then
+		[ -d $CGRP2_ROOT ] || mkdir -p $CGRP2_ROOT
+		mount -t cgroup2 none $CGRP2_ROOT || return $?
+	    fi
+	    mkdir -p $CGRP2_TC_LEAF
+	    ;;
+	*)
+	    rmdir $CGRP2_TC_LEAF && rmdir $CGRP2_TC
+	    [ "$MOUNT_CGRP2" == 'yes' ] && umount $CGRP2_ROOT
+	    ;;
+    esac
+}
+
+setup_bpf_cgrp2_array() {
+    local bpf_cgrp2_array="$BPF_FS_TC_SHARE/$BPF_CGRP2_ARRAY_NAME"
+    case $1 in
+	start)
+	    $MY_DIR/test_cgrp2_array_pin -U $bpf_cgrp2_array -v $CGRP2_TC
+	    ;;
+	*)
+	    [ -d "$BPF_FS_TC_SHARE" ] && rm -f $bpf_cgrp2_array
+	    ;;
+    esac
+}
+
+setup_net() {
+    case $1 in
+	start)
+	    $IP link add $HOST_IFC type veth peer name $NS_IFC || return $?
+	    $IP link set dev $HOST_IFC up || return $?
+	    sysctl -q net.ipv6.conf.$HOST_IFC.accept_dad=0
+
+	    $IP netns add ns || return $?
+	    $IP link set dev $NS_IFC netns ns || return $?
+	    $IP -n $NS link set dev $NS_IFC up || return $?
+	    $IP netns exec $NS sysctl -q net.ipv6.conf.$NS_IFC.accept_dad=0
+	    $TC qdisc add dev $HOST_IFC clsact || return $?
+	    $TC filter add dev $HOST_IFC egress bpf da obj $BPF_PROG sec $BPF_SECTION || return $?
+	    ;;
+	*)
+	    $IP netns del $NS
+	    $IP link del $HOST_IFC
+	    ;;
+    esac
+}
+
+run_in_cgrp() {
+    # Fork another bash and move it under the specified cgroup.
+    # It makes the cgroup cleanup easier at the end of the test.
+    cmd='echo $$ > '
+    cmd="$cmd $1/cgroup.procs; exec $2"
+    bash -c "$cmd"
+}
+
+do_test() {
+    run_in_cgrp $CGRP2_TC_LEAF "ping -6 -c3 ff02::1%$HOST_IFC >& /dev/null"
+    local dropped=$($TC -s qdisc show dev $HOST_IFC | tail -3 | \
+			   awk '/drop/{print substr($7, 0, index($7, ",")-1)}')
+    if [[ $dropped -eq 0 ]]
+    then
+	echo "FAIL"
+	return 1
+    else
+	echo "Successfully filtered $dropped packets"
+	return 0
+    fi
+}
+
+do_exit() {
+    if [ "$DEBUG" == "yes" ] && [ "$MODE" != 'cleanuponly' ]
+    then
+	echo "------ DEBUG ------"
+	echo "mount: "; mount | egrep '(cgroup2|bpf)'; echo
+	echo "$CGRP2_TC_LEAF: "; ls -l $CGRP2_TC_LEAF; echo
+	if [ -d "$BPF_FS_TC_SHARE" ]
+	then
+	    echo "$BPF_FS_TC_SHARE: "; ls -l $BPF_FS_TC_SHARE; echo
+	fi
+	echo "Host net:"
+	$IP netns
+	$IP link show dev $HOST_IFC
+	$IP -6 a show dev $HOST_IFC
+	$TC -s qdisc show dev $HOST_IFC
+	echo
+	echo "$NS net:"
+	$IP -n $NS link show dev $NS_IFC
+	$IP -n $NS -6 link show dev $NS_IFC
+	echo "------ DEBUG ------"
+	echo
+    fi
+
+    if [ "$MODE" != 'nocleanup' ]
+    then
+	setup_net stop
+	setup_bpf_cgrp2_array stop
+	setup_cgrp2 stop
+    fi
+}
+
+init_cgrp2_vars
+init_bpf_fs_vars
+
+while [[ $# -ge 1 ]]
+do
+    a="$1"
+    case $a in
+	debug)
+	    DEBUG='yes'
+	    shift 1
+	    ;;
+	cleanup-only)
+	    MODE='cleanuponly'
+	    shift 1
+	    ;;
+	no-cleanup)
+	    MODE='nocleanup'
+	    shift 1
+	    ;;
+	*)
+	    echo "test_cgrp2_tc [debug] [cleanup-only | no-cleanup]"
+	    echo "  debug: Print cgrp and network setup details at the end of the test"
+	    echo "  cleanup-only: Try to cleanup things from last test.  No test will be run"
+	    echo "  no-cleanup: Run the test but don't do cleanup at the end"
+	    echo "[Note: If no arg is given, it will run the test and do cleanup at the end]"
+	    echo
+	    exit -1
+	    ;;
+    esac
+done
+
+trap do_exit 0
+
+[ "$MODE" == 'cleanuponly' ] && exit
+
+setup_cgrp2 start || exit $?
+setup_net start || exit $?
+init_bpf_fs_vars || exit $?
+setup_bpf_cgrp2_array start || exit $?
+do_test
+echo
diff --git a/samples/bpf/test_cgrp2_tc_kern.c b/samples/bpf/test_cgrp2_tc_kern.c
new file mode 100644
index 000000000000..2732c37c8d5b
--- /dev/null
+++ b/samples/bpf/test_cgrp2_tc_kern.c
@@ -0,0 +1,69 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/in6.h>
+#include <uapi/linux/ipv6.h>
+#include <uapi/linux/pkt_cls.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+/* copy of 'struct ethhdr' without __packed */
+struct eth_hdr {
+	unsigned char   h_dest[ETH_ALEN];
+	unsigned char   h_source[ETH_ALEN];
+	unsigned short  h_proto;
+};
+
+#define PIN_GLOBAL_NS		2
+struct bpf_elf_map {
+	__u32 type;
+	__u32 size_key;
+	__u32 size_value;
+	__u32 max_elem;
+	__u32 flags;
+	__u32 id;
+	__u32 pinning;
+};
+
+struct bpf_elf_map SEC("maps") test_cgrp2_array_pin = {
+	.type		= BPF_MAP_TYPE_CGROUP_ARRAY,
+	.size_key	= sizeof(uint32_t),
+	.size_value	= sizeof(uint32_t),
+	.pinning	= PIN_GLOBAL_NS,
+	.max_elem	= 1,
+};
+
+SEC("filter")
+int handle_egress(struct __sk_buff *skb)
+{
+	void *data = (void *)(long)skb->data;
+	struct eth_hdr *eth = data;
+	struct ipv6hdr *ip6h = data + sizeof(*eth);
+	void *data_end = (void *)(long)skb->data_end;
+	char dont_care_msg[] = "dont care %04x %d\n";
+	char pass_msg[] = "pass\n";
+	char reject_msg[] = "reject\n";
+
+	/* single length check */
+	if (data + sizeof(*eth) + sizeof(*ip6h) > data_end)
+		return TC_ACT_OK;
+
+	if (eth->h_proto != htons(ETH_P_IPV6) ||
+	    ip6h->nexthdr != IPPROTO_ICMPV6) {
+		bpf_trace_printk(dont_care_msg, sizeof(dont_care_msg),
+				 eth->h_proto, ip6h->nexthdr);
+		return TC_ACT_OK;
+	} else if (bpf_skb_in_cgroup(skb, &test_cgrp2_array_pin, 0) != 1) {
+		bpf_trace_printk(pass_msg, sizeof(pass_msg));
+		return TC_ACT_OK;
+	} else {
+		bpf_trace_printk(reject_msg, sizeof(reject_msg));
+		return TC_ACT_SHOT;
+	}
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/pktgen/pktgen.conf-1-1-flows b/samples/pktgen/pktgen.conf-1-1-flows
deleted file mode 100755
index 081749c9707d..000000000000
--- a/samples/pktgen/pktgen.conf-1-1-flows
+++ /dev/null
@@ -1,67 +0,0 @@
-#!/bin/bash
-
-#modprobe pktgen
-
-
-function pgset() {
-    local result
-
-    echo $1 > $PGDEV
-
-    result=`cat $PGDEV | fgrep "Result: OK:"`
-    if [ "$result" = "" ]; then
-         cat $PGDEV | fgrep Result:
-    fi
-}
-
-# Config Start Here -----------------------------------------------------------
-
-
-# thread config
-# Each CPU has its own thread. One CPU example. We add eth1.
-
-PGDEV=/proc/net/pktgen/kpktgend_0
-  echo "Removing all devices"
- pgset "rem_device_all"
-  echo "Adding eth1"
- pgset "add_device eth1"
-
-
-# device config
-# delay 0
-# We need to do alloc for every skb since we cannot clone here.
-
-CLONE_SKB="clone_skb 0"
-# NIC adds 4 bytes CRC
-PKT_SIZE="pkt_size 60"
-
-# COUNT 0 means forever
-#COUNT="count 0"
-COUNT="count 10000000"
-DELAY="delay 0"
-
-PGDEV=/proc/net/pktgen/eth1
-  echo "Configuring $PGDEV"
- pgset "$COUNT"
- pgset "$CLONE_SKB"
- pgset "$PKT_SIZE"
- pgset "$DELAY"
- # Random address with in the min-max range
- pgset "flag IPDST_RND"
- pgset "dst_min 10.0.0.0"
- pgset "dst_max 10.255.255.255"
-
- # 8k Concurrent flows at 4 pkts
- pgset "flows 8192"
- pgset "flowlen 4"
-
- pgset "dst_mac  00:04:23:08:91:dc"
-
-# Time to run
-PGDEV=/proc/net/pktgen/pgctrl
-
- echo "Running... ctrl^C to stop"
- trap true INT
- pgset "start"
- echo "Done"
- cat /proc/net/pktgen/eth1
diff --git a/samples/pktgen/pktgen.conf-1-1-rdos b/samples/pktgen/pktgen.conf-1-1-rdos
deleted file mode 100755
index c7553be49b80..000000000000
--- a/samples/pktgen/pktgen.conf-1-1-rdos
+++ /dev/null
@@ -1,64 +0,0 @@
-#!/bin/bash
-
-#modprobe pktgen
-
-
-function pgset() {
-    local result
-
-    echo $1 > $PGDEV
-
-    result=`cat $PGDEV | fgrep "Result: OK:"`
-    if [ "$result" = "" ]; then
-         cat $PGDEV | fgrep Result:
-    fi
-}
-
-# Config Start Here -----------------------------------------------------------
-
-
-# thread config
-# Each CPU has its own thread. One CPU example. We add eth1.
-
-PGDEV=/proc/net/pktgen/kpktgend_0
-  echo "Removing all devices"
- pgset "rem_device_all"
-  echo "Adding eth1"
- pgset "add_device eth1"
-
-
-# device config
-# delay 0
-
-# We need to do alloc for every skb since we cannot clone here.
-
-CLONE_SKB="clone_skb 0"
-# NIC adds 4 bytes CRC
-PKT_SIZE="pkt_size 60"
-
-# COUNT 0 means forever
-#COUNT="count 0"
-COUNT="count 10000000"
-DELAY="delay 0"
-
-PGDEV=/proc/net/pktgen/eth1
-  echo "Configuring $PGDEV"
- pgset "$COUNT"
- pgset "$CLONE_SKB"
- pgset "$PKT_SIZE"
- pgset "$DELAY"
- # Random address with in the min-max range
- pgset "flag IPDST_RND"
- pgset "dst_min 10.0.0.0"
- pgset "dst_max 10.255.255.255"
-
- pgset "dst_mac  00:04:23:08:91:dc"
-
-# Time to run
-PGDEV=/proc/net/pktgen/pgctrl
-
- echo "Running... ctrl^C to stop"
- trap true INT
- pgset "start"
- echo "Done"
- cat /proc/net/pktgen/eth1
diff --git a/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh b/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh
new file mode 100755
index 000000000000..4e4e92b2515e
--- /dev/null
+++ b/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+#
+# Benchmark script:
+#  - developed for benchmarking egress qdisc path, derived (more
+#    like cut'n'pasted) from ingress benchmark script.
+#
+# Script for injecting packets into egress qdisc path of the stack
+# with pktgen "xmit_mode queue_xmit".
+#
+basedir=`dirname $0`
+source ${basedir}/functions.sh
+root_check_run_with_sudo "$@"
+
+# Parameter parsing via include
+source ${basedir}/parameters.sh
+[ -z "$DEST_IP" ] && DEST_IP="198.18.0.42"
+[ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff"
+
+# Burst greater than 1 are invalid for queue_xmit mode
+if [[ -n "$BURST" ]]; then
+    err 1 "Bursting not supported for this mode"
+fi
+
+# Base Config
+DELAY="0"        # Zero means max speed
+COUNT="10000000" # Zero means indefinitely
+
+# General cleanup everything since last run
+pg_ctrl "reset"
+
+# Threads are specified with parameter -t value in $THREADS
+for ((thread = 0; thread < $THREADS; thread++)); do
+    # The device name is extended with @name, using thread number to
+    # make then unique, but any name will do.
+    dev=${DEV}@${thread}
+
+    # Add remove all other devices and add_device $dev to thread
+    pg_thread $thread "rem_device_all"
+    pg_thread $thread "add_device" $dev
+
+    # Base config of dev
+    pg_set $dev "flag QUEUE_MAP_CPU"
+    pg_set $dev "count $COUNT"
+    pg_set $dev "pkt_size $PKT_SIZE"
+    pg_set $dev "delay $DELAY"
+    pg_set $dev "flag NO_TIMESTAMP"
+
+    # Destination
+    pg_set $dev "dst_mac $DST_MAC"
+    pg_set $dev "dst $DEST_IP"
+
+    # Inject packet into TX qdisc egress path of stack
+    pg_set $dev "xmit_mode queue_xmit"
+done
+
+# start_run
+echo "Running... ctrl^C to stop" >&2
+pg_ctrl "start"
+echo "Done" >&2
+
+# Print results
+for ((thread = 0; thread < $THREADS; thread++)); do
+    dev=${DEV}@${thread}
+    echo "Device: $dev"
+    cat /proc/net/pktgen/$dev | grep -A2 "Result:"
+done
diff --git a/samples/pktgen/pktgen_sample04_many_flows.sh b/samples/pktgen/pktgen_sample04_many_flows.sh
new file mode 100755
index 000000000000..f60412e445bb
--- /dev/null
+++ b/samples/pktgen/pktgen_sample04_many_flows.sh
@@ -0,0 +1,93 @@
+#!/bin/bash
+#
+# Script example for many flows testing
+#
+# Number of simultaneous flows limited by variable $FLOWS
+# and number of packets per flow controlled by variable $FLOWLEN
+#
+basedir=`dirname $0`
+source ${basedir}/functions.sh
+root_check_run_with_sudo "$@"
+
+# Parameter parsing via include
+source ${basedir}/parameters.sh
+# Set some default params, if they didn't get set
+[ -z "$DEST_IP" ]   && DEST_IP="198.18.0.42"
+[ -z "$DST_MAC" ]   && DST_MAC="90:e2:ba:ff:ff:ff"
+[ -z "$CLONE_SKB" ] && CLONE_SKB="0"
+
+# NOTICE:  Script specific settings
+# =======
+# Limiting the number of concurrent flows ($FLOWS)
+# and also set how many packets each flow contains ($FLOWLEN)
+#
+[ -z "$FLOWS" ]     && FLOWS="8000"
+[ -z "$FLOWLEN" ]   && FLOWLEN="10"
+
+# Base Config
+DELAY="0"  # Zero means max speed
+COUNT="0"  # Zero means indefinitely
+
+if [[ -n "$BURST" ]]; then
+    err 1 "Bursting not supported for this mode"
+fi
+
+# General cleanup everything since last run
+pg_ctrl "reset"
+
+# Threads are specified with parameter -t value in $THREADS
+for ((thread = 0; thread < $THREADS; thread++)); do
+    dev=${DEV}@${thread}
+
+    # Add remove all other devices and add_device $dev to thread
+    pg_thread $thread "rem_device_all"
+    pg_thread $thread "add_device" $dev
+
+    # Base config
+    pg_set $dev "flag QUEUE_MAP_CPU"
+    pg_set $dev "count $COUNT"
+    pg_set $dev "clone_skb $CLONE_SKB"
+    pg_set $dev "pkt_size $PKT_SIZE"
+    pg_set $dev "delay $DELAY"
+    pg_set $dev "flag NO_TIMESTAMP"
+
+    # Single destination
+    pg_set $dev "dst_mac $DST_MAC"
+    pg_set $dev "dst $DEST_IP"
+
+    # Randomize source IP-addresses
+    pg_set $dev "flag IPSRC_RND"
+    pg_set $dev "src_min 198.18.0.0"
+    pg_set $dev "src_max 198.19.255.255"
+
+    # Limit number of flows (max 65535)
+    pg_set $dev "flows $FLOWS"
+    #
+    # How many packets a flow will send, before flow "entry" is
+    # re-generated/setup.
+    pg_set $dev "flowlen $FLOWLEN"
+    #
+    # Flag FLOW_SEQ will cause $FLOWLEN packets from the same flow
+    # being send back-to-back, before next flow is selected
+    # incrementally.  This helps lookup caches, and is more realistic.
+    #
+    pg_set $dev "flag FLOW_SEQ"
+
+done
+
+# Run if user hits control-c
+function print_result() {
+    # Print results
+    for ((thread = 0; thread < $THREADS; thread++)); do
+	dev=${DEV}@${thread}
+	echo "Device: $dev"
+	cat /proc/net/pktgen/$dev | grep -A2 "Result:"
+    done
+}
+# trap keyboard interrupt (Ctrl-C)
+trap true SIGINT
+
+echo "Running... ctrl^C to stop" >&2
+pg_ctrl "start"
+
+print_result
diff --git a/samples/pktgen/pktgen_sample05_flow_per_thread.sh b/samples/pktgen/pktgen_sample05_flow_per_thread.sh
new file mode 100755
index 000000000000..32ad818e2829
--- /dev/null
+++ b/samples/pktgen/pktgen_sample05_flow_per_thread.sh
@@ -0,0 +1,81 @@
+#!/bin/bash
+#
+# Script will generate one flow per thread (-t N)
+#  - Same destination IP
+#  - Fake source IPs for each flow (fixed based on thread number)
+#
+# Useful for scale testing on receiver, to see whether silo'ing flows
+# works and scales.  For optimal scalability (on receiver) each
+# separate-flow should not access shared variables/data. This script
+# helps magnify any of these scaling issues by overloading the receiver.
+#
+basedir=`dirname $0`
+source ${basedir}/functions.sh
+root_check_run_with_sudo "$@"
+
+# Parameter parsing via include
+source ${basedir}/parameters.sh
+# Set some default params, if they didn't get set
+[ -z "$DEST_IP" ]   && DEST_IP="198.18.0.42"
+[ -z "$DST_MAC" ]   && DST_MAC="90:e2:ba:ff:ff:ff"
+[ -z "$CLONE_SKB" ] && CLONE_SKB="0"
+[ -z "$BURST" ]     && BURST=32
+
+
+# Base Config
+DELAY="0"  # Zero means max speed
+COUNT="0"  # Zero means indefinitely
+
+# General cleanup everything since last run
+pg_ctrl "reset"
+
+# Threads are specified with parameter -t value in $THREADS
+for ((thread = 0; thread < $THREADS; thread++)); do
+    dev=${DEV}@${thread}
+
+    # Add remove all other devices and add_device $dev to thread
+    pg_thread $thread "rem_device_all"
+    pg_thread $thread "add_device" $dev
+
+    # Base config
+    pg_set $dev "flag QUEUE_MAP_CPU"
+    pg_set $dev "count $COUNT"
+    pg_set $dev "clone_skb $CLONE_SKB"
+    pg_set $dev "pkt_size $PKT_SIZE"
+    pg_set $dev "delay $DELAY"
+    pg_set $dev "flag NO_TIMESTAMP"
+
+    # Single destination
+    pg_set $dev "dst_mac $DST_MAC"
+    pg_set $dev "dst $DEST_IP"
+
+    # Setup source IP-addresses based on thread number
+    pg_set $dev "src_min 198.18.$((thread+1)).1"
+    pg_set $dev "src_max 198.18.$((thread+1)).1"
+
+    # Setup burst, for easy testing -b 0 disable bursting
+    # (internally in pktgen default and minimum burst=1)
+    if [[ ${BURST} -ne 0 ]]; then
+	pg_set $dev "burst $BURST"
+    else
+	info "$dev: Not using burst"
+    fi
+
+done
+
+# Run if user hits control-c
+function print_result() {
+    # Print results
+    for ((thread = 0; thread < $THREADS; thread++)); do
+	dev=${DEV}@${thread}
+	echo "Device: $dev"
+	cat /proc/net/pktgen/$dev | grep -A2 "Result:"
+    done
+}
+# trap keyboard interrupt (Ctrl-C)
+trap true SIGINT
+
+echo "Running... ctrl^C to stop" >&2
+pg_ctrl "start"
+
+print_result
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index a9155077feef..fec75786f75b 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -384,7 +384,7 @@ static void do_of_entry_multi(void *symval, struct module *mod)
 	len = sprintf(alias, "of:N%sT%s", (*name)[0] ? *name : "*",
 		      (*type)[0] ? *type : "*");
 
-	if (compatible[0])
+	if ((*compatible)[0])
 		sprintf(&alias[len], "%sC%s", (*type)[0] ? "*" : "",
 			*compatible);
 
diff --git a/security/keys/key.c b/security/keys/key.c
index bd5a272f28a6..346fbf201c22 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -597,7 +597,7 @@ int key_reject_and_link(struct key *key,
 
 	mutex_unlock(&key_construction_mutex);
 
-	if (keyring)
+	if (keyring && link_ret == 0)
 		__key_link_end(keyring, &key->index_key, edit);
 
 	/* wake up anyone waiting for a key to be constructed */
diff --git a/sound/core/timer.c b/sound/core/timer.c
index e722022d325d..9a6157ea6881 100644
--- a/sound/core/timer.c
+++ b/sound/core/timer.c
@@ -1955,6 +1955,7 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer,
 
 		qhead = tu->qhead++;
 		tu->qhead %= tu->queue_size;
+		tu->qused--;
 		spin_unlock_irq(&tu->qlock);
 
 		if (tu->tread) {
@@ -1968,7 +1969,6 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer,
 		}
 
 		spin_lock_irq(&tu->qlock);
-		tu->qused--;
 		if (err < 0)
 			goto _error;
 		result += unit;
diff --git a/sound/drivers/dummy.c b/sound/drivers/dummy.c
index c0f8f613f1f1..172dacd925f5 100644
--- a/sound/drivers/dummy.c
+++ b/sound/drivers/dummy.c
@@ -420,6 +420,7 @@ static int dummy_hrtimer_stop(struct snd_pcm_substream *substream)
 
 static inline void dummy_hrtimer_sync(struct dummy_hrtimer_pcm *dpcm)
 {
+	hrtimer_cancel(&dpcm->timer);
 	tasklet_kill(&dpcm->tasklet);
 }
 
diff --git a/sound/hda/hdac_regmap.c b/sound/hda/hdac_regmap.c
index 87041ddd29cb..47a358fab132 100644
--- a/sound/hda/hdac_regmap.c
+++ b/sound/hda/hdac_regmap.c
@@ -444,7 +444,7 @@ int snd_hdac_regmap_write_raw(struct hdac_device *codec, unsigned int reg,
 	err = reg_raw_write(codec, reg, val);
 	if (err == -EAGAIN) {
 		err = snd_hdac_power_up_pm(codec);
-		if (!err)
+		if (err >= 0)
 			err = reg_raw_write(codec, reg, val);
 		snd_hdac_power_down_pm(codec);
 	}
@@ -470,7 +470,7 @@ static int __snd_hdac_regmap_read_raw(struct hdac_device *codec,
 	err = reg_raw_read(codec, reg, val, uncached);
 	if (err == -EAGAIN) {
 		err = snd_hdac_power_up_pm(codec);
-		if (!err)
+		if (err >= 0)
 			err = reg_raw_read(codec, reg, val, uncached);
 		snd_hdac_power_down_pm(codec);
 	}
diff --git a/sound/pci/au88x0/au88x0_core.c b/sound/pci/au88x0/au88x0_core.c
index 4a054d720112..d3125c169684 100644
--- a/sound/pci/au88x0/au88x0_core.c
+++ b/sound/pci/au88x0/au88x0_core.c
@@ -1444,9 +1444,8 @@ static int vortex_wtdma_bufshift(vortex_t * vortex, int wtdma)
 	int page, p, pp, delta, i;
 
 	page =
-	    (hwread(vortex->mmio, VORTEX_WTDMA_STAT + (wtdma << 2)) &
-	     WT_SUBBUF_MASK)
-	    >> WT_SUBBUF_SHIFT;
+	    (hwread(vortex->mmio, VORTEX_WTDMA_STAT + (wtdma << 2))
+	     >> WT_SUBBUF_SHIFT) & WT_SUBBUF_MASK;
 	if (dma->nr_periods >= 4)
 		delta = (page - dma->period_real) & 3;
 	else {
diff --git a/sound/pci/echoaudio/echoaudio.c b/sound/pci/echoaudio/echoaudio.c
index 1cb85aeb0cea..286f5e3686a3 100644
--- a/sound/pci/echoaudio/echoaudio.c
+++ b/sound/pci/echoaudio/echoaudio.c
@@ -2200,11 +2200,11 @@ static int snd_echo_resume(struct device *dev)
 	u32 pipe_alloc_mask;
 	int err;
 
-	commpage_bak = kmalloc(sizeof(struct echoaudio), GFP_KERNEL);
+	commpage_bak = kmalloc(sizeof(*commpage), GFP_KERNEL);
 	if (commpage_bak == NULL)
 		return -ENOMEM;
 	commpage = chip->comm_page;
-	memcpy(commpage_bak, commpage, sizeof(struct comm_page));
+	memcpy(commpage_bak, commpage, sizeof(*commpage));
 
 	err = init_hw(chip, chip->pci->device, chip->pci->subsystem_device);
 	if (err < 0) {
diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c
index 320445f3bf73..79c7b340acc2 100644
--- a/sound/pci/hda/hda_generic.c
+++ b/sound/pci/hda/hda_generic.c
@@ -3977,6 +3977,8 @@ static hda_nid_t set_path_power(struct hda_codec *codec, hda_nid_t nid,
 
 	for (n = 0; n < spec->paths.used; n++) {
 		path = snd_array_elem(&spec->paths, n);
+		if (!path->depth)
+			continue;
 		if (path->path[0] == nid ||
 		    path->path[path->depth - 1] == nid) {
 			bool pin_old = path->pin_enabled;
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 94089fc71884..e320c44714b1 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -367,9 +367,10 @@ enum {
 #define IS_SKL_LP(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9d70)
 #define IS_KBL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa171)
 #define IS_KBL_LP(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9d71)
+#define IS_KBL_H(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa2f0)
 #define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98)
 #define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci)) || \
-			IS_KBL(pci) || IS_KBL_LP(pci)
+			IS_KBL(pci) || IS_KBL_LP(pci) || IS_KBL_H(pci)
 
 static char *driver_short_names[] = {
 	[AZX_DRIVER_ICH] = "HDA Intel",
@@ -2190,6 +2191,9 @@ static const struct pci_device_id azx_ids[] = {
 	/* Kabylake-LP */
 	{ PCI_DEVICE(0x8086, 0x9d71),
 	  .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE },
+	/* Kabylake-H */
+	{ PCI_DEVICE(0x8086, 0xa2f0),
+	  .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE },
 	/* Broxton-P(Apollolake) */
 	{ PCI_DEVICE(0x8086, 0x5a98),
 	  .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_BROXTON },
diff --git a/sound/pci/hda/hda_tegra.c b/sound/pci/hda/hda_tegra.c
index 17fd81736d3d..0621920f7617 100644
--- a/sound/pci/hda/hda_tegra.c
+++ b/sound/pci/hda/hda_tegra.c
@@ -115,20 +115,20 @@ static int substream_free_pages(struct azx *chip,
 /*
  * Register access ops. Tegra HDA register access is DWORD only.
  */
-static void hda_tegra_writel(u32 value, u32 *addr)
+static void hda_tegra_writel(u32 value, u32 __iomem *addr)
 {
 	writel(value, addr);
 }
 
-static u32 hda_tegra_readl(u32 *addr)
+static u32 hda_tegra_readl(u32 __iomem *addr)
 {
 	return readl(addr);
 }
 
-static void hda_tegra_writew(u16 value, u16 *addr)
+static void hda_tegra_writew(u16 value, u16 __iomem  *addr)
 {
 	unsigned int shift = ((unsigned long)(addr) & 0x3) << 3;
-	void *dword_addr = (void *)((unsigned long)(addr) & ~0x3);
+	void __iomem *dword_addr = (void __iomem *)((unsigned long)(addr) & ~0x3);
 	u32 v;
 
 	v = readl(dword_addr);
@@ -137,20 +137,20 @@ static void hda_tegra_writew(u16 value, u16 *addr)
 	writel(v, dword_addr);
 }
 
-static u16 hda_tegra_readw(u16 *addr)
+static u16 hda_tegra_readw(u16 __iomem *addr)
 {
 	unsigned int shift = ((unsigned long)(addr) & 0x3) << 3;
-	void *dword_addr = (void *)((unsigned long)(addr) & ~0x3);
+	void __iomem *dword_addr = (void __iomem *)((unsigned long)(addr) & ~0x3);
 	u32 v;
 
 	v = readl(dword_addr);
 	return (v >> shift) & 0xffff;
 }
 
-static void hda_tegra_writeb(u8 value, u8 *addr)
+static void hda_tegra_writeb(u8 value, u8 __iomem *addr)
 {
 	unsigned int shift = ((unsigned long)(addr) & 0x3) << 3;
-	void *dword_addr = (void *)((unsigned long)(addr) & ~0x3);
+	void __iomem *dword_addr = (void __iomem *)((unsigned long)(addr) & ~0x3);
 	u32 v;
 
 	v = readl(dword_addr);
@@ -159,10 +159,10 @@ static void hda_tegra_writeb(u8 value, u8 *addr)
 	writel(v, dword_addr);
 }
 
-static u8 hda_tegra_readb(u8 *addr)
+static u8 hda_tegra_readb(u8 __iomem *addr)
 {
 	unsigned int shift = ((unsigned long)(addr) & 0x3) << 3;
-	void *dword_addr = (void *)((unsigned long)(addr) & ~0x3);
+	void __iomem *dword_addr = (void __iomem *)((unsigned long)(addr) & ~0x3);
 	u32 v;
 
 	v = readl(dword_addr);
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 0fe18ede3e85..5fac786e4982 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -5650,6 +5650,9 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x17aa, 0x503c, "Thinkpad L450", ALC292_FIXUP_TPT440_DOCK),
 	SND_PCI_QUIRK(0x17aa, 0x504a, "ThinkPad X260", ALC292_FIXUP_TPT440_DOCK),
 	SND_PCI_QUIRK(0x17aa, 0x504b, "Thinkpad", ALC293_FIXUP_LENOVO_SPK_NOISE),
+	SND_PCI_QUIRK(0x17aa, 0x5050, "Thinkpad T560p", ALC292_FIXUP_TPT460),
+	SND_PCI_QUIRK(0x17aa, 0x5051, "Thinkpad L460", ALC292_FIXUP_TPT460),
+	SND_PCI_QUIRK(0x17aa, 0x5053, "Thinkpad T460", ALC292_FIXUP_TPT460),
 	SND_PCI_QUIRK(0x17aa, 0x5109, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
 	SND_PCI_QUIRK(0x17aa, 0x3bf8, "Quanta FL1", ALC269_FIXUP_PCM_44K),
 	SND_PCI_QUIRK(0x17aa, 0x9e54, "LENOVO NB", ALC269_FIXUP_LENOVO_EAPD),
@@ -5832,6 +5835,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
 		{0x14, 0x90170120},
 		{0x21, 0x02211030}),
 	SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+		{0x12, 0x90a60170},
+		{0x14, 0x90170120},
+		{0x21, 0x02211030}),
+	SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
 		ALC256_STANDARD_PINS),
 	SND_HDA_PIN_QUIRK(0x10ec0280, 0x103c, "HP", ALC280_FIXUP_HP_GPIO4,
 		{0x12, 0x90a60130},
diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig
index 4d82a58ff6b0..f3fb98f0a995 100644
--- a/sound/soc/codecs/Kconfig
+++ b/sound/soc/codecs/Kconfig
@@ -483,9 +483,10 @@ config SND_SOC_DMIC
 	tristate
 
 config SND_SOC_HDMI_CODEC
-       tristate
-       select SND_PCM_ELD
-       select SND_PCM_IEC958
+	tristate
+	select SND_PCM_ELD
+	select SND_PCM_IEC958
+	select HDMI
 
 config SND_SOC_ES8328
 	tristate "Everest Semi ES8328 CODEC"
diff --git a/sound/soc/codecs/ak4613.c b/sound/soc/codecs/ak4613.c
index 647f69de6baa..5013d2ba0c10 100644
--- a/sound/soc/codecs/ak4613.c
+++ b/sound/soc/codecs/ak4613.c
@@ -146,6 +146,7 @@ static const struct regmap_config ak4613_regmap_cfg = {
 	.max_register		= 0x16,
 	.reg_defaults		= ak4613_reg,
 	.num_reg_defaults	= ARRAY_SIZE(ak4613_reg),
+	.cache_type		= REGCACHE_RBTREE,
 };
 
 static const struct of_device_id ak4613_of_match[] = {
@@ -530,7 +531,6 @@ static int ak4613_i2c_remove(struct i2c_client *client)
 static struct i2c_driver ak4613_i2c_driver = {
 	.driver = {
 		.name = "ak4613-codec",
-		.owner = THIS_MODULE,
 		.of_match_table = ak4613_of_match,
 	},
 	.probe		= ak4613_i2c_probe,
diff --git a/sound/soc/codecs/cx20442.c b/sound/soc/codecs/cx20442.c
index d6f4abbbf8a7..fb3885fe0afb 100644
--- a/sound/soc/codecs/cx20442.c
+++ b/sound/soc/codecs/cx20442.c
@@ -226,6 +226,7 @@ static int v253_open(struct tty_struct *tty)
 	if (!tty->disc_data)
 		return -ENODEV;
 
+	tty->receive_room = 16;
 	if (tty->ops->write(tty, v253_init, len) != len) {
 		ret = -EIO;
 		goto err;
diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c
index 181cd3bf0b92..2abb742fc47b 100644
--- a/sound/soc/codecs/hdac_hdmi.c
+++ b/sound/soc/codecs/hdac_hdmi.c
@@ -1474,6 +1474,11 @@ static int hdmi_codec_probe(struct snd_soc_codec *codec)
 	 * exit, we call pm_runtime_suspend() so that will do for us
 	 */
 	hlink = snd_hdac_ext_bus_get_link(edev->ebus, dev_name(&edev->hdac.dev));
+	if (!hlink) {
+		dev_err(&edev->hdac.dev, "hdac link not found\n");
+		return -EIO;
+	}
+
 	snd_hdac_ext_bus_link_get(edev->ebus, hlink);
 
 	ret = create_fill_widget_route_map(dapm);
@@ -1634,6 +1639,11 @@ static int hdac_hdmi_dev_probe(struct hdac_ext_device *edev)
 
 	/* hold the ref while we probe */
 	hlink = snd_hdac_ext_bus_get_link(edev->ebus, dev_name(&edev->hdac.dev));
+	if (!hlink) {
+		dev_err(&edev->hdac.dev, "hdac link not found\n");
+		return -EIO;
+	}
+
 	snd_hdac_ext_bus_link_get(edev->ebus, hlink);
 
 	hdmi_priv = devm_kzalloc(&codec->dev, sizeof(*hdmi_priv), GFP_KERNEL);
@@ -1744,6 +1754,11 @@ static int hdac_hdmi_runtime_suspend(struct device *dev)
 	}
 
 	hlink = snd_hdac_ext_bus_get_link(ebus, dev_name(dev));
+	if (!hlink) {
+		dev_err(dev, "hdac link not found\n");
+		return -EIO;
+	}
+
 	snd_hdac_ext_bus_link_put(ebus, hlink);
 
 	return 0;
@@ -1765,6 +1780,11 @@ static int hdac_hdmi_runtime_resume(struct device *dev)
 		return 0;
 
 	hlink = snd_hdac_ext_bus_get_link(ebus, dev_name(dev));
+	if (!hlink) {
+		dev_err(dev, "hdac link not found\n");
+		return -EIO;
+	}
+
 	snd_hdac_ext_bus_link_get(ebus, hlink);
 
 	err = snd_hdac_display_power(bus, true);
diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c
index 3c6594da6c9c..d70847c9eeb0 100644
--- a/sound/soc/codecs/rt5645.c
+++ b/sound/soc/codecs/rt5645.c
@@ -253,7 +253,7 @@ static const struct reg_default rt5650_reg[] = {
 	{ 0x2b, 0x5454 },
 	{ 0x2c, 0xaaa0 },
 	{ 0x2d, 0x0000 },
-	{ 0x2f, 0x1002 },
+	{ 0x2f, 0x5002 },
 	{ 0x31, 0x5000 },
 	{ 0x32, 0x0000 },
 	{ 0x33, 0x0000 },
diff --git a/sound/soc/codecs/rt5670.c b/sound/soc/codecs/rt5670.c
index 49a9e7049e2b..0af5ddbef1da 100644
--- a/sound/soc/codecs/rt5670.c
+++ b/sound/soc/codecs/rt5670.c
@@ -619,7 +619,7 @@ static const struct snd_kcontrol_new rt5670_snd_controls[] = {
 		RT5670_L_MUTE_SFT, RT5670_R_MUTE_SFT, 1, 1),
 	SOC_DOUBLE_TLV("HP Playback Volume", RT5670_HP_VOL,
 		RT5670_L_VOL_SFT, RT5670_R_VOL_SFT,
-		39, 0, out_vol_tlv),
+		39, 1, out_vol_tlv),
 	/* OUTPUT Control */
 	SOC_DOUBLE("OUT Channel Switch", RT5670_LOUT1,
 		RT5670_VOL_L_SFT, RT5670_VOL_R_SFT, 1, 1),
diff --git a/sound/soc/codecs/wm5102.c b/sound/soc/codecs/wm5102.c
index da60e3fe5ee7..e7fe6b7b95b7 100644
--- a/sound/soc/codecs/wm5102.c
+++ b/sound/soc/codecs/wm5102.c
@@ -1872,7 +1872,7 @@ static struct snd_soc_dai_driver wm5102_dai[] = {
 		.capture = {
 			.stream_name = "Audio Trace CPU",
 			.channels_min = 1,
-			.channels_max = 6,
+			.channels_max = 4,
 			.rates = WM5102_RATES,
 			.formats = WM5102_FORMATS,
 		},
diff --git a/sound/soc/codecs/wm5110.c b/sound/soc/codecs/wm5110.c
index b5820e4d5471..d54f1b46c9ec 100644
--- a/sound/soc/codecs/wm5110.c
+++ b/sound/soc/codecs/wm5110.c
@@ -1723,6 +1723,7 @@ static const struct snd_soc_dapm_route wm5110_dapm_routes[] = {
 	{ "OUT2L", NULL, "SYSCLK" },
 	{ "OUT2R", NULL, "SYSCLK" },
 	{ "OUT3L", NULL, "SYSCLK" },
+	{ "OUT3R", NULL, "SYSCLK" },
 	{ "OUT4L", NULL, "SYSCLK" },
 	{ "OUT4R", NULL, "SYSCLK" },
 	{ "OUT5L", NULL, "SYSCLK" },
diff --git a/sound/soc/codecs/wm8940.c b/sound/soc/codecs/wm8940.c
index f6f9395ea38e..1c600819f768 100644
--- a/sound/soc/codecs/wm8940.c
+++ b/sound/soc/codecs/wm8940.c
@@ -743,6 +743,7 @@ static const struct regmap_config wm8940_regmap = {
 	.max_register = WM8940_MONOMIX,
 	.reg_defaults = wm8940_reg_defaults,
 	.num_reg_defaults = ARRAY_SIZE(wm8940_reg_defaults),
+	.cache_type = REGCACHE_RBTREE,
 
 	.readable_reg = wm8940_readable_register,
 	.volatile_reg = wm8940_volatile_register,
diff --git a/sound/soc/davinci/davinci-mcasp.c b/sound/soc/davinci/davinci-mcasp.c
index 0f66fda2c772..237dc67002ef 100644
--- a/sound/soc/davinci/davinci-mcasp.c
+++ b/sound/soc/davinci/davinci-mcasp.c
@@ -1513,8 +1513,9 @@ static struct davinci_mcasp_pdata am33xx_mcasp_pdata = {
 };
 
 static struct davinci_mcasp_pdata dra7_mcasp_pdata = {
-	.tx_dma_offset = 0x200,
-	.rx_dma_offset = 0x284,
+	/* The CFG port offset will be calculated if it is needed */
+	.tx_dma_offset = 0,
+	.rx_dma_offset = 0,
 	.version = MCASP_VERSION_4,
 };
 
@@ -1734,6 +1735,52 @@ static int davinci_mcasp_get_dma_type(struct davinci_mcasp *mcasp)
 	return PCM_EDMA;
 }
 
+static u32 davinci_mcasp_txdma_offset(struct davinci_mcasp_pdata *pdata)
+{
+	int i;
+	u32 offset = 0;
+
+	if (pdata->version != MCASP_VERSION_4)
+		return pdata->tx_dma_offset;
+
+	for (i = 0; i < pdata->num_serializer; i++) {
+		if (pdata->serial_dir[i] == TX_MODE) {
+			if (!offset) {
+				offset = DAVINCI_MCASP_TXBUF_REG(i);
+			} else {
+				pr_err("%s: Only one serializer allowed!\n",
+				       __func__);
+				break;
+			}
+		}
+	}
+
+	return offset;
+}
+
+static u32 davinci_mcasp_rxdma_offset(struct davinci_mcasp_pdata *pdata)
+{
+	int i;
+	u32 offset = 0;
+
+	if (pdata->version != MCASP_VERSION_4)
+		return pdata->rx_dma_offset;
+
+	for (i = 0; i < pdata->num_serializer; i++) {
+		if (pdata->serial_dir[i] == RX_MODE) {
+			if (!offset) {
+				offset = DAVINCI_MCASP_RXBUF_REG(i);
+			} else {
+				pr_err("%s: Only one serializer allowed!\n",
+				       __func__);
+				break;
+			}
+		}
+	}
+
+	return offset;
+}
+
 static int davinci_mcasp_probe(struct platform_device *pdev)
 {
 	struct snd_dmaengine_dai_dma_data *dma_data;
@@ -1862,7 +1909,7 @@ static int davinci_mcasp_probe(struct platform_device *pdev)
 	if (dat)
 		dma_data->addr = dat->start;
 	else
-		dma_data->addr = mem->start + pdata->tx_dma_offset;
+		dma_data->addr = mem->start + davinci_mcasp_txdma_offset(pdata);
 
 	dma = &mcasp->dma_request[SNDRV_PCM_STREAM_PLAYBACK];
 	res = platform_get_resource(pdev, IORESOURCE_DMA, 0);
@@ -1883,7 +1930,8 @@ static int davinci_mcasp_probe(struct platform_device *pdev)
 		if (dat)
 			dma_data->addr = dat->start;
 		else
-			dma_data->addr = mem->start + pdata->rx_dma_offset;
+			dma_data->addr =
+				mem->start + davinci_mcasp_rxdma_offset(pdata);
 
 		dma = &mcasp->dma_request[SNDRV_PCM_STREAM_CAPTURE];
 		res = platform_get_resource(pdev, IORESOURCE_DMA, 1);
diff --git a/sound/soc/davinci/davinci-mcasp.h b/sound/soc/davinci/davinci-mcasp.h
index 1e8787fb3fb7..afddc8010c54 100644
--- a/sound/soc/davinci/davinci-mcasp.h
+++ b/sound/soc/davinci/davinci-mcasp.h
@@ -85,9 +85,9 @@
 						(n << 2))
 
 /* Transmit Buffer for Serializer n */
-#define DAVINCI_MCASP_TXBUF_REG		0x200
+#define DAVINCI_MCASP_TXBUF_REG(n)	(0x200 + (n << 2))
 /* Receive Buffer for Serializer n */
-#define DAVINCI_MCASP_RXBUF_REG		0x280
+#define DAVINCI_MCASP_RXBUF_REG(n)	(0x280 + (n << 2))
 
 /* McASP FIFO Registers */
 #define DAVINCI_MCASP_V2_AFIFO_BASE	(0x1010)
diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c
index 632ecc0e3956..bedec4a32581 100644
--- a/sound/soc/fsl/fsl_ssi.c
+++ b/sound/soc/fsl/fsl_ssi.c
@@ -952,16 +952,16 @@ static int _fsl_ssi_set_dai_fmt(struct device *dev,
 	ssi_private->i2s_mode = CCSR_SSI_SCR_NET;
 	switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
 	case SND_SOC_DAIFMT_I2S:
+		regmap_update_bits(regs, CCSR_SSI_STCCR,
+				   CCSR_SSI_SxCCR_DC_MASK,
+				   CCSR_SSI_SxCCR_DC(2));
+		regmap_update_bits(regs, CCSR_SSI_SRCCR,
+				   CCSR_SSI_SxCCR_DC_MASK,
+				   CCSR_SSI_SxCCR_DC(2));
 		switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
 		case SND_SOC_DAIFMT_CBM_CFS:
 		case SND_SOC_DAIFMT_CBS_CFS:
 			ssi_private->i2s_mode |= CCSR_SSI_SCR_I2S_MODE_MASTER;
-			regmap_update_bits(regs, CCSR_SSI_STCCR,
-					CCSR_SSI_SxCCR_DC_MASK,
-					CCSR_SSI_SxCCR_DC(2));
-			regmap_update_bits(regs, CCSR_SSI_SRCCR,
-					CCSR_SSI_SxCCR_DC_MASK,
-					CCSR_SSI_SxCCR_DC(2));
 			break;
 		case SND_SOC_DAIFMT_CBM_CFM:
 			ssi_private->i2s_mode |= CCSR_SSI_SCR_I2S_MODE_SLAVE;
diff --git a/sound/soc/intel/atom/sst-mfld-platform-compress.c b/sound/soc/intel/atom/sst-mfld-platform-compress.c
index 395168986462..1bead81bb510 100644
--- a/sound/soc/intel/atom/sst-mfld-platform-compress.c
+++ b/sound/soc/intel/atom/sst-mfld-platform-compress.c
@@ -182,24 +182,29 @@ static int sst_platform_compr_trigger(struct snd_compr_stream *cstream, int cmd)
 	case SNDRV_PCM_TRIGGER_START:
 		if (stream->compr_ops->stream_start)
 			return stream->compr_ops->stream_start(sst->dev, stream->id);
+		break;
 	case SNDRV_PCM_TRIGGER_STOP:
 		if (stream->compr_ops->stream_drop)
 			return stream->compr_ops->stream_drop(sst->dev, stream->id);
+		break;
 	case SND_COMPR_TRIGGER_DRAIN:
 		if (stream->compr_ops->stream_drain)
 			return stream->compr_ops->stream_drain(sst->dev, stream->id);
+		break;
 	case SND_COMPR_TRIGGER_PARTIAL_DRAIN:
 		if (stream->compr_ops->stream_partial_drain)
 			return stream->compr_ops->stream_partial_drain(sst->dev, stream->id);
+		break;
 	case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
 		if (stream->compr_ops->stream_pause)
 			return stream->compr_ops->stream_pause(sst->dev, stream->id);
+		break;
 	case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
 		if (stream->compr_ops->stream_pause_release)
 			return stream->compr_ops->stream_pause_release(sst->dev, stream->id);
-	default:
-		return -EINVAL;
+		break;
 	}
+	return -EINVAL;
 }
 
 static int sst_platform_compr_pointer(struct snd_compr_stream *cstream,
diff --git a/sound/soc/intel/skylake/bxt-sst.c b/sound/soc/intel/skylake/bxt-sst.c
index 965ce40ce752..8b95e09e23e8 100644
--- a/sound/soc/intel/skylake/bxt-sst.c
+++ b/sound/soc/intel/skylake/bxt-sst.c
@@ -291,6 +291,7 @@ int bxt_sst_dsp_init(struct device *dev, void __iomem *mmio_base, int irq,
 	sst_dsp_mailbox_init(sst, (BXT_ADSP_SRAM0_BASE + SKL_ADSP_W0_STAT_SZ),
 			SKL_ADSP_W0_UP_SZ, BXT_ADSP_SRAM1_BASE, SKL_ADSP_W1_SZ);
 
+	INIT_LIST_HEAD(&sst->module_list);
 	ret = skl_ipc_init(dev, skl);
 	if (ret)
 		return ret;
diff --git a/sound/soc/sh/rcar/adg.c b/sound/soc/sh/rcar/adg.c
index 49354d17ea55..c4c51a4d3c8f 100644
--- a/sound/soc/sh/rcar/adg.c
+++ b/sound/soc/sh/rcar/adg.c
@@ -518,7 +518,7 @@ static void rsnd_adg_get_clkout(struct rsnd_priv *priv,
 		}
 	}
 
-	rsnd_mod_bset(adg_mod, SSICKR, 0x00FF0000, ckr);
+	rsnd_mod_bset(adg_mod, SSICKR, 0x80FF0000, ckr);
 	rsnd_mod_write(adg_mod, BRRA,  rbga);
 	rsnd_mod_write(adg_mod, BRRB,  rbgb);
 
diff --git a/tools/hv/bondvf.sh b/tools/hv/bondvf.sh
new file mode 100755
index 000000000000..8e960234013d
--- /dev/null
+++ b/tools/hv/bondvf.sh
@@ -0,0 +1,193 @@
+#!/bin/bash
+
+# This example script creates bonding network devices based on synthetic NIC
+# (the virtual network adapter usually provided by Hyper-V) and the matching
+# VF NIC (SRIOV virtual function). So the synthetic NIC and VF NIC can
+# function as one network device, and fail over to the synthetic NIC if VF is
+# down.
+#
+# Usage:
+# - After configured vSwitch and vNIC with SRIOV, start Linux virtual
+#   machine (VM)
+# - Run this scripts on the VM. It will create configuration files in
+#   distro specific directory.
+# - Reboot the VM, so that the bonding config are enabled.
+#
+# The config files are DHCP by default. You may edit them if you need to change
+# to Static IP or change other settings.
+#
+
+sysdir=/sys/class/net
+netvsc_cls={f8615163-df3e-46c5-913f-f2d2f965ed0e}
+bondcnt=0
+
+# Detect Distro
+if [ -f /etc/redhat-release ];
+then
+	cfgdir=/etc/sysconfig/network-scripts
+	distro=redhat
+elif grep -q 'Ubuntu' /etc/issue
+then
+	cfgdir=/etc/network
+	distro=ubuntu
+elif grep -q 'SUSE' /etc/issue
+then
+	cfgdir=/etc/sysconfig/network
+	distro=suse
+else
+	echo "Unsupported Distro"
+	exit 1
+fi
+
+echo Detected Distro: $distro, or compatible
+
+# Get a list of ethernet names
+list_eth=(`cd $sysdir && ls -d */ | cut -d/ -f1 | grep -v bond`)
+eth_cnt=${#list_eth[@]}
+
+echo List of net devices:
+
+# Get the MAC addresses
+for (( i=0; i < $eth_cnt; i++ ))
+do
+	list_mac[$i]=`cat $sysdir/${list_eth[$i]}/address`
+	echo ${list_eth[$i]}, ${list_mac[$i]}
+done
+
+# Find NIC with matching MAC
+for (( i=0; i < $eth_cnt-1; i++ ))
+do
+	for (( j=i+1; j < $eth_cnt; j++ ))
+	do
+		if [ "${list_mac[$i]}" = "${list_mac[$j]}" ]
+		then
+			list_match[$i]=${list_eth[$j]}
+			break
+		fi
+	done
+done
+
+function create_eth_cfg_redhat {
+	local fn=$cfgdir/ifcfg-$1
+
+	rm -f $fn
+	echo DEVICE=$1 >>$fn
+	echo TYPE=Ethernet >>$fn
+	echo BOOTPROTO=none >>$fn
+	echo ONBOOT=yes >>$fn
+	echo NM_CONTROLLED=no >>$fn
+	echo PEERDNS=yes >>$fn
+	echo IPV6INIT=yes >>$fn
+	echo MASTER=$2 >>$fn
+	echo SLAVE=yes >>$fn
+}
+
+function create_eth_cfg_pri_redhat {
+	create_eth_cfg_redhat $1 $2
+}
+
+function create_bond_cfg_redhat {
+	local fn=$cfgdir/ifcfg-$1
+
+	rm -f $fn
+	echo DEVICE=$1 >>$fn
+	echo TYPE=Bond >>$fn
+	echo BOOTPROTO=dhcp >>$fn
+	echo ONBOOT=yes >>$fn
+	echo NM_CONTROLLED=no >>$fn
+	echo PEERDNS=yes >>$fn
+	echo IPV6INIT=yes >>$fn
+	echo BONDING_MASTER=yes >>$fn
+	echo BONDING_OPTS=\"mode=active-backup miimon=100 primary=$2\" >>$fn
+}
+
+function create_eth_cfg_ubuntu {
+	local fn=$cfgdir/interfaces
+
+	echo $'\n'auto $1 >>$fn
+	echo iface $1 inet manual >>$fn
+	echo bond-master $2 >>$fn
+}
+
+function create_eth_cfg_pri_ubuntu {
+	local fn=$cfgdir/interfaces
+
+	create_eth_cfg_ubuntu $1 $2
+	echo bond-primary $1 >>$fn
+}
+
+function create_bond_cfg_ubuntu {
+	local fn=$cfgdir/interfaces
+
+	echo $'\n'auto $1 >>$fn
+	echo iface $1 inet dhcp >>$fn
+	echo bond-mode active-backup >>$fn
+	echo bond-miimon 100 >>$fn
+	echo bond-slaves none >>$fn
+}
+
+function create_eth_cfg_suse {
+        local fn=$cfgdir/ifcfg-$1
+
+        rm -f $fn
+	echo BOOTPROTO=none >>$fn
+	echo STARTMODE=auto >>$fn
+}
+
+function create_eth_cfg_pri_suse {
+	create_eth_cfg_suse $1
+}
+
+function create_bond_cfg_suse {
+	local fn=$cfgdir/ifcfg-$1
+
+	rm -f $fn
+	echo BOOTPROTO=dhcp >>$fn
+	echo STARTMODE=auto >>$fn
+	echo BONDING_MASTER=yes >>$fn
+	echo BONDING_SLAVE_0=$2 >>$fn
+	echo BONDING_SLAVE_1=$3 >>$fn
+	echo BONDING_MODULE_OPTS=\'mode=active-backup miimon=100 primary=$2\' >>$fn
+}
+
+function create_bond {
+	local bondname=bond$bondcnt
+	local primary
+	local secondary
+
+	local class_id1=`cat $sysdir/$1/device/class_id 2>/dev/null`
+	local class_id2=`cat $sysdir/$2/device/class_id 2>/dev/null`
+
+	if [ "$class_id1" = "$netvsc_cls" ]
+	then
+		primary=$2
+		secondary=$1
+	elif [ "$class_id2" = "$netvsc_cls" ]
+	then
+		primary=$1
+		secondary=$2
+	else
+		return 0
+	fi
+
+	echo $'\nBond name:' $bondname
+
+	echo configuring $primary
+	create_eth_cfg_pri_$distro $primary $bondname
+
+	echo configuring $secondary
+	create_eth_cfg_$distro $secondary $bondname
+
+	echo creating: $bondname with primary slave: $primary
+	create_bond_cfg_$distro $bondname $primary $secondary
+
+	let bondcnt=bondcnt+1
+}
+
+for (( i=0; i < $eth_cnt-1; i++ ))
+do
+        if [ -n "${list_match[$i]}" ]
+        then
+		create_bond ${list_eth[$i]} ${list_match[$i]}
+        fi
+done
diff --git a/tools/perf/scripts/python/netdev-times.py b/tools/perf/scripts/python/netdev-times.py
index 4d21ef2d601d..4c6f09ac7d12 100644
--- a/tools/perf/scripts/python/netdev-times.py
+++ b/tools/perf/scripts/python/netdev-times.py
@@ -252,9 +252,10 @@ def irq__irq_handler_exit(name, context, cpu, sec, nsec, pid, comm, callchain, i
 	event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, irq, ret)
 	all_event_list.append(event_info)
 
-def napi__napi_poll(name, context, cpu, sec, nsec, pid, comm, callchain, napi, dev_name):
+def napi__napi_poll(name, context, cpu, sec, nsec, pid, comm, callchain, napi,
+                    dev_name, work=None, budget=None):
 	event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
-			napi, dev_name)
+			napi, dev_name, work, budget)
 	all_event_list.append(event_info)
 
 def net__netif_receive_skb(name, context, cpu, sec, nsec, pid, comm, callchain, skbaddr,
@@ -354,11 +355,13 @@ def handle_irq_softirq_exit(event_info):
 	receive_hunk_list.append(rec_data)
 
 def handle_napi_poll(event_info):
-	(name, context, cpu, time, pid, comm, napi, dev_name) = event_info
+	(name, context, cpu, time, pid, comm, napi, dev_name,
+		work, budget) = event_info
 	if cpu in net_rx_dic.keys():
 		event_list = net_rx_dic[cpu]['event_list']
 		rec_data = {'event_name':'napi_poll',
-				'dev':dev_name, 'event_t':time}
+				'dev':dev_name, 'event_t':time,
+				'work':work, 'budget':budget}
 		event_list.append(rec_data)
 
 def handle_netif_rx(event_info):
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index bbf69d248ec5..9f53020c3269 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -204,6 +204,44 @@ static unsigned long long adjust_signedness(unsigned long long value_int, int si
 	return (value_int & value_mask) | ~value_mask;
 }
 
+static int string_set_value(struct bt_ctf_field *field, const char *string)
+{
+	char *buffer = NULL;
+	size_t len = strlen(string), i, p;
+	int err;
+
+	for (i = p = 0; i < len; i++, p++) {
+		if (isprint(string[i])) {
+			if (!buffer)
+				continue;
+			buffer[p] = string[i];
+		} else {
+			char numstr[5];
+
+			snprintf(numstr, sizeof(numstr), "\\x%02x",
+				 (unsigned int)(string[i]) & 0xff);
+
+			if (!buffer) {
+				buffer = zalloc(i + (len - i) * 4 + 2);
+				if (!buffer) {
+					pr_err("failed to set unprintable string '%s'\n", string);
+					return bt_ctf_field_string_set_value(field, "UNPRINTABLE-STRING");
+				}
+				if (i > 0)
+					strncpy(buffer, string, i);
+			}
+			strncat(buffer + p, numstr, 4);
+			p += 3;
+		}
+	}
+
+	if (!buffer)
+		return bt_ctf_field_string_set_value(field, string);
+	err = bt_ctf_field_string_set_value(field, buffer);
+	free(buffer);
+	return err;
+}
+
 static int add_tracepoint_field_value(struct ctf_writer *cw,
 				      struct bt_ctf_event_class *event_class,
 				      struct bt_ctf_event *event,
@@ -270,8 +308,7 @@ static int add_tracepoint_field_value(struct ctf_writer *cw,
 		}
 
 		if (flags & FIELD_IS_STRING)
-			ret = bt_ctf_field_string_set_value(field,
-					data + offset + i * len);
+			ret = string_set_value(field, data + offset + i * len);
 		else {
 			unsigned long long value_int;
 
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index f6fcc6832949..9b141f12329e 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -673,6 +673,8 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
 	int err;
 	union perf_event *event;
 
+	if (symbol_conf.kptr_restrict)
+		return -1;
 	if (map == NULL)
 		return -1;
 
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 20f9cb32b703..54c4ff2b1cee 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1933,17 +1933,17 @@ int setup_intlist(struct intlist **list, const char *list_str,
 static bool symbol__read_kptr_restrict(void)
 {
 	bool value = false;
+	FILE *fp = fopen("/proc/sys/kernel/kptr_restrict", "r");
 
-	if (geteuid() != 0) {
-		FILE *fp = fopen("/proc/sys/kernel/kptr_restrict", "r");
-		if (fp != NULL) {
-			char line[8];
+	if (fp != NULL) {
+		char line[8];
 
-			if (fgets(line, sizeof(line), fp) != NULL)
-				value = atoi(line) != 0;
+		if (fgets(line, sizeof(line), fp) != NULL)
+			value = (geteuid() != 0) ?
+					(atoi(line) != 0) :
+					(atoi(line) == 2);
 
-			fclose(fp);
-		}
+		fclose(fp);
 	}
 
 	return value;
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc
index c2b61c4fda11..0bf5085281f3 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc
@@ -23,15 +23,14 @@ if [ ! -f events/sched/sched_process_fork/trigger ]; then
     exit_unsupported
 fi
 
-reset_tracer
-do_reset
-
-FEATURE=`grep hist events/sched/sched_process_fork/trigger`
-if [ -z "$FEATURE" ]; then
+if [ ! -f events/sched/sched_process_fork/hist ]; then
     echo "hist trigger is not supported"
     exit_unsupported
 fi
 
+reset_tracer
+do_reset
+
 echo "Test histogram with execname modifier"
 
 echo 'hist:keys=common_pid.execname' > events/sched/sched_process_fork/trigger
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc
index b2902d42a537..a00184cd9c95 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc
@@ -23,15 +23,14 @@ if [ ! -f events/sched/sched_process_fork/trigger ]; then
     exit_unsupported
 fi
 
-reset_tracer
-do_reset
-
-FEATURE=`grep hist events/sched/sched_process_fork/trigger`
-if [ -z "$FEATURE" ]; then
+if [ ! -f events/sched/sched_process_fork/hist ]; then
     echo "hist trigger is not supported"
     exit_unsupported
 fi
 
+reset_tracer
+do_reset
+
 echo "Test histogram basic tigger"
 
 echo 'hist:keys=parent_pid:vals=child_pid' > events/sched/sched_process_fork/trigger
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc
index 03c4a46561fc..3478b00ead57 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc
@@ -23,15 +23,14 @@ if [ ! -f events/sched/sched_process_fork/trigger ]; then
     exit_unsupported
 fi
 
-reset_tracer
-do_reset
-
-FEATURE=`grep hist events/sched/sched_process_fork/trigger`
-if [ -z "$FEATURE" ]; then
+if [ ! -f events/sched/sched_process_fork/hist ]; then
     echo "hist trigger is not supported"
     exit_unsupported
 fi
 
+reset_tracer
+do_reset
+
 reset_trigger
 
 echo "Test histogram multiple tiggers"
diff --git a/tools/testing/selftests/vm/compaction_test.c b/tools/testing/selftests/vm/compaction_test.c
index 932ff577ffc0..00c4f65d12da 100644
--- a/tools/testing/selftests/vm/compaction_test.c
+++ b/tools/testing/selftests/vm/compaction_test.c
@@ -136,7 +136,7 @@ int check_compaction(unsigned long mem_free, unsigned int hugepage_size)
 	printf("No of huge pages allocated = %d\n",
 	       (atoi(nr_hugepages)));
 
-	if (write(fd, initial_nr_hugepages, sizeof(initial_nr_hugepages))
+	if (write(fd, initial_nr_hugepages, strlen(initial_nr_hugepages))
 	    != strlen(initial_nr_hugepages)) {
 		perror("Failed to write to /proc/sys/vm/nr_hugepages\n");
 		goto close_fd;
diff --git a/tools/virtio/ringtest/Makefile b/tools/virtio/ringtest/Makefile
index 50e086c6a7b6..877a8a4721b6 100644
--- a/tools/virtio/ringtest/Makefile
+++ b/tools/virtio/ringtest/Makefile
@@ -1,6 +1,6 @@
 all:
 
-all: ring virtio_ring_0_9 virtio_ring_poll virtio_ring_inorder ptr_ring
+all: ring virtio_ring_0_9 virtio_ring_poll virtio_ring_inorder ptr_ring noring
 
 CFLAGS += -Wall
 CFLAGS += -pthread -O2 -ggdb
@@ -17,6 +17,7 @@ virtio_ring_0_9: virtio_ring_0_9.o main.o
 virtio_ring_poll: virtio_ring_poll.o main.o
 virtio_ring_inorder: virtio_ring_inorder.o main.o
 ptr_ring: ptr_ring.o main.o
+noring: noring.o main.o
 clean:
 	-rm main.o
 	-rm ring.o ring
@@ -24,5 +25,6 @@ clean:
 	-rm virtio_ring_poll.o virtio_ring_poll
 	-rm virtio_ring_inorder.o virtio_ring_inorder
 	-rm ptr_ring.o ptr_ring
+	-rm noring.o noring
 
 .PHONY: all clean
diff --git a/tools/virtio/ringtest/README b/tools/virtio/ringtest/README
index 34e94c46104f..d83707a336c9 100644
--- a/tools/virtio/ringtest/README
+++ b/tools/virtio/ringtest/README
@@ -1,2 +1,6 @@
 Partial implementation of various ring layouts, useful to tune virtio design.
 Uses shared memory heavily.
+
+Typical use:
+
+# sh run-on-all.sh perf stat -r 10 --log-fd 1 -- ./ring
diff --git a/tools/virtio/ringtest/noring.c b/tools/virtio/ringtest/noring.c
new file mode 100644
index 000000000000..eda2f4824130
--- /dev/null
+++ b/tools/virtio/ringtest/noring.c
@@ -0,0 +1,69 @@
+#define _GNU_SOURCE
+#include "main.h"
+#include <assert.h>
+
+/* stub implementation: useful for measuring overhead */
+void alloc_ring(void)
+{
+}
+
+/* guest side */
+int add_inbuf(unsigned len, void *buf, void *datap)
+{
+	return 0;
+}
+
+/*
+ * skb_array API provides no way for producer to find out whether a given
+ * buffer was consumed.  Our tests merely require that a successful get_buf
+ * implies that add_inbuf succeed in the past, and that add_inbuf will succeed,
+ * fake it accordingly.
+ */
+void *get_buf(unsigned *lenp, void **bufp)
+{
+	return "Buffer";
+}
+
+void poll_used(void)
+{
+}
+
+void disable_call()
+{
+	assert(0);
+}
+
+bool enable_call()
+{
+	assert(0);
+}
+
+void kick_available(void)
+{
+	assert(0);
+}
+
+/* host side */
+void disable_kick()
+{
+	assert(0);
+}
+
+bool enable_kick()
+{
+	assert(0);
+}
+
+void poll_avail(void)
+{
+}
+
+bool use_buf(unsigned *lenp, void **bufp)
+{
+	return true;
+}
+
+void call_used(void)
+{
+	assert(0);
+}
diff --git a/tools/virtio/ringtest/ptr_ring.c b/tools/virtio/ringtest/ptr_ring.c
index 74abd746ae91..68e4f9f0da3a 100644
--- a/tools/virtio/ringtest/ptr_ring.c
+++ b/tools/virtio/ringtest/ptr_ring.c
@@ -17,6 +17,11 @@
 typedef pthread_spinlock_t  spinlock_t;
 
 typedef int gfp_t;
+static void *kmalloc(unsigned size, gfp_t gfp)
+{
+	return memalign(64, size);
+}
+
 static void *kzalloc(unsigned size, gfp_t gfp)
 {
 	void *p = memalign(64, size);
diff --git a/tools/virtio/ringtest/run-on-all.sh b/tools/virtio/ringtest/run-on-all.sh
index 52b0f71ffa8d..2e69ca812b4c 100755
--- a/tools/virtio/ringtest/run-on-all.sh
+++ b/tools/virtio/ringtest/run-on-all.sh
@@ -3,10 +3,10 @@
 #use last CPU for host. Why not the first?
 #many devices tend to use cpu0 by default so
 #it tends to be busier
-HOST_AFFINITY=$(cd /dev/cpu; ls|grep -v '[a-z]'|sort -n|tail -1)
+HOST_AFFINITY=$(lscpu -p=cpu | tail -1)
 
 #run command on all cpus
-for cpu in $(cd /dev/cpu; ls|grep -v '[a-z]'|sort -n);
+for cpu in $(seq 0 $HOST_AFFINITY)
 do
 	#Don't run guest and host on same CPU
 	#It actually works ok if using signalling
diff --git a/tools/vm/slabinfo.c b/tools/vm/slabinfo.c
index 1889163f2f05..7cf6e1769903 100644
--- a/tools/vm/slabinfo.c
+++ b/tools/vm/slabinfo.c
@@ -492,7 +492,7 @@ static void slab_stats(struct slabinfo *s)
 			s->deactivate_to_head + s->deactivate_to_tail + s->deactivate_bypass;
 
 	if (total) {
-		printf("\nSlab Deactivation             Ocurrences  %%\n");
+		printf("\nSlab Deactivation             Occurrences %%\n");
 		printf("-------------------------------------------------\n");
 		printf("Slab full                     %7lu  %3lu%%\n",
 			s->deactivate_full, (s->deactivate_full * 100) / total);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 02e98f3131bd..48bd520fc702 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2941,7 +2941,7 @@ static long kvm_vm_ioctl(struct file *filp,
 		if (copy_from_user(&routing, argp, sizeof(routing)))
 			goto out;
 		r = -EINVAL;
-		if (routing.nr >= KVM_MAX_IRQ_ROUTES)
+		if (routing.nr > KVM_MAX_IRQ_ROUTES)
 			goto out;
 		if (routing.flags)
 			goto out;