43 files changed, 1786 insertions, 250 deletions
diff --git a/Documentation/devicetree/bindings/net/nxp,tja11xx.yaml b/Documentation/devicetree/bindings/net/nxp,tja11xx.yaml
index d51da24f3505..ab8867e6939b 100644
--- a/Documentation/devicetree/bindings/net/nxp,tja11xx.yaml
+++ b/Documentation/devicetree/bindings/net/nxp,tja11xx.yaml
@@ -31,6 +31,22 @@ patternProperties:
         description:
           The ID number for the child PHY. Should be +1 of parent PHY.
 
+      nxp,rmii-refclk-in:
+        type: boolean
+        description: |
+          The REF_CLK is provided for both transmitted and received data
+          in RMII mode. This clock signal is provided by the PHY and is
+          typically derived from an external 25MHz crystal. Alternatively,
+          a 50MHz clock signal generated by an external oscillator can be
+          connected to pin REF_CLK. A third option is to connect a 25MHz
+          clock to pin CLK_IN_OUT. So, the REF_CLK should be configured
+          as input or output according to the actual circuit connection.
+          If present, indicates that the REF_CLK will be configured as
+          interface reference clock input when RMII mode enabled.
+          If not present, the REF_CLK will be configured as interface
+          reference clock output when RMII mode enabled.
+          Only supported on TJA1100 and TJA1101.
+
     required:
       - reg
 
@@ -44,6 +60,7 @@ examples:
 
         tja1101_phy0: ethernet-phy@4 {
             reg = <0x4>;
+            nxp,rmii-refclk-in;
         };
     };
   - |
diff --git a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
index b8281d8be940..9ef11913052c 100644
--- a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
+++ b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
@@ -55,6 +55,7 @@ properties:
   compatible:
     enum:
       - ti,am654-cpsw-nuss
+      - ti,j7200-cpswxg-nuss
       - ti,j721e-cpsw-nuss
       - ti,am642-cpsw-nuss
 
@@ -110,7 +111,7 @@ properties:
         const: 0
 
     patternProperties:
-      port@[1-2]:
+      "^port@[1-4]$":
         type: object
         description: CPSWxG NUSS external ports
 
@@ -119,7 +120,7 @@ properties:
         properties:
           reg:
             minimum: 1
-            maximum: 2
+            maximum: 4
             description: CPSW port number
 
           phys:
@@ -178,6 +179,19 @@ required:
   - '#address-cells'
   - '#size-cells'
 
+allOf:
+  - if:
+      not:
+        properties:
+          compatible:
+            contains:
+              const: ti,j7200-cpswxg-nuss
+    then:
+      properties:
+        ethernet-ports:
+          patternProperties:
+            "^port@[3-4]$": false
+
 additionalProperties: false
 
 examples:
diff --git a/Documentation/userspace-api/index.rst b/Documentation/userspace-api/index.rst
index a61eac0c73f8..c78da9ce0ec4 100644
--- a/Documentation/userspace-api/index.rst
+++ b/Documentation/userspace-api/index.rst
@@ -26,6 +26,7 @@ place where this information is gathered.
    ioctl/index
    iommu
    media/index
+   netlink/index
    sysfs-platform_profile
    vduse
    futex2
diff --git a/Documentation/userspace-api/netlink/index.rst b/Documentation/userspace-api/netlink/index.rst
new file mode 100644
index 000000000000..b0c21538d97d
--- /dev/null
+++ b/Documentation/userspace-api/netlink/index.rst
@@ -0,0 +1,12 @@
+.. SPDX-License-Identifier: BSD-3-Clause
+
+================
+Netlink Handbook
+================
+
+Netlink documentation for users.
+
+.. toctree::
+   :maxdepth: 2
+
+   intro
diff --git a/Documentation/userspace-api/netlink/intro.rst b/Documentation/userspace-api/netlink/intro.rst
new file mode 100644
index 000000000000..94337f79e077
--- /dev/null
+++ b/Documentation/userspace-api/netlink/intro.rst
@@ -0,0 +1,643 @@
+.. SPDX-License-Identifier: BSD-3-Clause
+
+=======================
+Introduction to Netlink
+=======================
+
+Netlink is often described as an ioctl() replacement.
+It aims to replace fixed-format C structures as supplied
+to ioctl() with a format which allows an easy way to add
+or extended the arguments.
+
+To achieve this Netlink uses a minimal fixed-format metadata header
+followed by multiple attributes in the TLV (type, length, value) format.
+
+Unfortunately the protocol has evolved over the years, in an organic
+and undocumented fashion, making it hard to coherently explain.
+To make the most practical sense this document starts by describing
+netlink as it is used today and dives into more "historical" uses
+in later sections.
+
+Opening a socket
+================
+
+Netlink communication happens over sockets, a socket needs to be
+opened first:
+
+.. code-block:: c
+
+  fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
+
+The use of sockets allows for a natural way of exchanging information
+in both directions (to and from the kernel). The operations are still
+performed synchronously when applications send() the request but
+a separate recv() system call is needed to read the reply.
+
+A very simplified flow of a Netlink "call" will therefore look
+something like:
+
+.. code-block:: c
+
+  fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
+
+  /* format the request */
+  send(fd, &request, sizeof(request));
+  n = recv(fd, &response, RSP_BUFFER_SIZE);
+  /* interpret the response */
+
+Netlink also provides natural support for "dumping", i.e. communicating
+to user space all objects of a certain type (e.g. dumping all network
+interfaces).
+
+.. code-block:: c
+
+  fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
+
+  /* format the dump request */
+  send(fd, &request, sizeof(request));
+  while (1) {
+    n = recv(fd, &buffer, RSP_BUFFER_SIZE);
+    /* one recv() call can read multiple messages, hence the loop below */
+    for (nl_msg in buffer) {
+      if (nl_msg.nlmsg_type == NLMSG_DONE)
+        goto dump_finished;
+      /* process the object */
+    }
+  }
+  dump_finished:
+
+The first two arguments of the socket() call require little explanation -
+it is opening a Netlink socket, with all headers provided by the user
+(hence NETLINK, RAW). The last argument is the protocol within Netlink.
+This field used to identify the subsystem with which the socket will
+communicate.
+
+Classic vs Generic Netlink
+--------------------------
+
+Initial implementation of Netlink depended on a static allocation
+of IDs to subsystems and provided little supporting infrastructure.
+Let us refer to those protocols collectively as **Classic Netlink**.
+The list of them is defined on top of the ``include/uapi/linux/netlink.h``
+file, they include among others - general networking (NETLINK_ROUTE),
+iSCSI (NETLINK_ISCSI), and audit (NETLINK_AUDIT).
+
+**Generic Netlink** (introduced in 2005) allows for dynamic registration of
+subsystems (and subsystem ID allocation), introspection and simplifies
+implementing the kernel side of the interface.
+
+The following section describes how to use Generic Netlink, as the
+number of subsystems using Generic Netlink outnumbers the older
+protocols by an order of magnitude. There are also no plans for adding
+more Classic Netlink protocols to the kernel.
+Basic information on how communicating with core networking parts of
+the Linux kernel (or another of the 20 subsystems using Classic
+Netlink) differs from Generic Netlink is provided later in this document.
+
+Generic Netlink
+===============
+
+In addition to the Netlink fixed metadata header each Netlink protocol
+defines its own fixed metadata header. (Similarly to how network
+headers stack - Ethernet > IP > TCP we have Netlink > Generic N. > Family.)
+
+A Netlink message always starts with struct nlmsghdr, which is followed
+by a protocol-specific header. In case of Generic Netlink the protocol
+header is struct genlmsghdr.
+
+The practical meaning of the fields in case of Generic Netlink is as follows:
+
+.. code-block:: c
+
+  struct nlmsghdr {
+	__u32	nlmsg_len;	/* Length of message including headers */
+	__u16	nlmsg_type;	/* Generic Netlink Family (subsystem) ID */
+	__u16	nlmsg_flags;	/* Flags - request or dump */
+	__u32	nlmsg_seq;	/* Sequence number */
+	__u32	nlmsg_pid;	/* Port ID, set to 0 */
+  };
+  struct genlmsghdr {
+	__u8	cmd;		/* Command, as defined by the Family */
+	__u8	version;	/* Irrelevant, set to 1 */
+	__u16	reserved;	/* Reserved, set to 0 */
+  };
+  /* TLV attributes follow... */
+
+In Classic Netlink :c:member:`nlmsghdr.nlmsg_type` used to identify
+which operation within the subsystem the message was referring to
+(e.g. get information about a netdev). Generic Netlink needs to mux
+multiple subsystems in a single protocol so it uses this field to
+identify the subsystem, and :c:member:`genlmsghdr.cmd` identifies
+the operation instead. (See :ref:`res_fam` for
+information on how to find the Family ID of the subsystem of interest.)
+Note that the first 16 values (0 - 15) of this field are reserved for
+control messages both in Classic Netlink and Generic Netlink.
+See :ref:`nl_msg_type` for more details.
+
+There are 3 usual types of message exchanges on a Netlink socket:
+
+ - performing a single action (``do``);
+ - dumping information (``dump``);
+ - getting asynchronous notifications (``multicast``).
+
+Classic Netlink is very flexible and presumably allows other types
+of exchanges to happen, but in practice those are the three that get
+used.
+
+Asynchronous notifications are sent by the kernel and received by
+the user sockets which subscribed to them. ``do`` and ``dump`` requests
+are initiated by the user. :c:member:`nlmsghdr.nlmsg_flags` should
+be set as follows:
+
+ - for ``do``: ``NLM_F_REQUEST | NLM_F_ACK``
+ - for ``dump``: ``NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP``
+
+:c:member:`nlmsghdr.nlmsg_seq` should be a set to a monotonically
+increasing value. The value gets echoed back in responses and doesn't
+matter in practice, but setting it to an increasing value for each
+message sent is considered good hygiene. The purpose of the field is
+matching responses to requests. Asynchronous notifications will have
+:c:member:`nlmsghdr.nlmsg_seq` of ``0``.
+
+:c:member:`nlmsghdr.nlmsg_pid` is the Netlink equivalent of an address.
+This field can be set to ``0`` when talking to the kernel.
+See :ref:`nlmsg_pid` for the (uncommon) uses of the field.
+
+The expected use for :c:member:`genlmsghdr.version` was to allow
+versioning of the APIs provided by the subsystems. No subsystem to
+date made significant use of this field, so setting it to ``1`` seems
+like a safe bet.
+
+.. _nl_msg_type:
+
+Netlink message types
+---------------------
+
+As previously mentioned :c:member:`nlmsghdr.nlmsg_type` carries
+protocol specific values but the first 16 identifiers are reserved
+(first subsystem specific message type should be equal to
+``NLMSG_MIN_TYPE`` which is ``0x10``).
+
+There are only 4 Netlink control messages defined:
+
+ - ``NLMSG_NOOP`` - ignore the message, not used in practice;
+ - ``NLMSG_ERROR`` - carries the return code of an operation;
+ - ``NLMSG_DONE`` - marks the end of a dump;
+ - ``NLMSG_OVERRUN`` - socket buffer has overflown, not used to date.
+
+``NLMSG_ERROR`` and ``NLMSG_DONE`` are of practical importance.
+They carry return codes for operations. Note that unless
+the ``NLM_F_ACK`` flag is set on the request Netlink will not respond
+with ``NLMSG_ERROR`` if there is no error. To avoid having to special-case
+this quirk it is recommended to always set ``NLM_F_ACK``.
+
+The format of ``NLMSG_ERROR`` is described by struct nlmsgerr::
+
+  ----------------------------------------------
+  | struct nlmsghdr - response header          |
+  ----------------------------------------------
+  |    int error                               |
+  ----------------------------------------------
+  | struct nlmsghdr - original request header |
+  ----------------------------------------------
+  | ** optionally (1) payload of the request   |
+  ----------------------------------------------
+  | ** optionally (2) extended ACK             |
+  ----------------------------------------------
+
+There are two instances of struct nlmsghdr here, first of the response
+and second of the request. ``NLMSG_ERROR`` carries the information about
+the request which led to the error. This could be useful when trying
+to match requests to responses or re-parse the request to dump it into
+logs.
+
+The payload of the request is not echoed in messages reporting success
+(``error == 0``) or if ``NETLINK_CAP_ACK`` setsockopt() was set.
+The latter is common
+and perhaps recommended as having to read a copy of every request back
+from the kernel is rather wasteful. The absence of request payload
+is indicated by ``NLM_F_CAPPED`` in :c:member:`nlmsghdr.nlmsg_flags`.
+
+The second optional element of ``NLMSG_ERROR`` are the extended ACK
+attributes. See :ref:`ext_ack` for more details. The presence
+of extended ACK is indicated by ``NLM_F_ACK_TLVS`` in
+:c:member:`nlmsghdr.nlmsg_flags`.
+
+``NLMSG_DONE`` is simpler, the request is never echoed but the extended
+ACK attributes may be present::
+
+  ----------------------------------------------
+  | struct nlmsghdr - response header          |
+  ----------------------------------------------
+  |    int error                               |
+  ----------------------------------------------
+  | ** optionally extended ACK                 |
+  ----------------------------------------------
+
+.. _res_fam:
+
+Resolving the Family ID
+-----------------------
+
+This section explains how to find the Family ID of a subsystem.
+It also serves as an example of Generic Netlink communication.
+
+Generic Netlink is itself a subsystem exposed via the Generic Netlink API.
+To avoid a circular dependency Generic Netlink has a statically allocated
+Family ID (``GENL_ID_CTRL`` which is equal to ``NLMSG_MIN_TYPE``).
+The Generic Netlink family implements a command used to find out information
+about other families (``CTRL_CMD_GETFAMILY``).
+
+To get information about the Generic Netlink family named for example
+``"test1"`` we need to send a message on the previously opened Generic Netlink
+socket. The message should target the Generic Netlink Family (1), be a
+``do`` (2) call to ``CTRL_CMD_GETFAMILY`` (3). A ``dump`` version of this
+call would make the kernel respond with information about *all* the families
+it knows about. Last but not least the name of the family in question has
+to be specified (4) as an attribute with the appropriate type::
+
+  struct nlmsghdr:
+    __u32 nlmsg_len:	32
+    __u16 nlmsg_type:	GENL_ID_CTRL               // (1)
+    __u16 nlmsg_flags:	NLM_F_REQUEST | NLM_F_ACK  // (2)
+    __u32 nlmsg_seq:	1
+    __u32 nlmsg_pid:	0
+
+  struct genlmsghdr:
+    __u8 cmd:		CTRL_CMD_GETFAMILY         // (3)
+    __u8 version:	2 /* or 1, doesn't matter */
+    __u16 reserved:	0
+
+  struct nlattr:                                   // (4)
+    __u16 nla_len:	10
+    __u16 nla_type:	CTRL_ATTR_FAMILY_NAME
+    char data: 		test1\0
+
+  (padding:)
+    char data:		\0\0
+
+The length fields in Netlink (:c:member:`nlmsghdr.nlmsg_len`
+and :c:member:`nlattr.nla_len`) always *include* the header.
+Attribute headers in netlink must be aligned to 4 bytes from the start
+of the message, hence the extra ``\0\0`` after ``CTRL_ATTR_FAMILY_NAME``.
+The attribute lengths *exclude* the padding.
+
+If the family is found kernel will reply with two messages, the response
+with all the information about the family::
+
+  /* Message #1 - reply */
+  struct nlmsghdr:
+    __u32 nlmsg_len:	136
+    __u16 nlmsg_type:	GENL_ID_CTRL
+    __u16 nlmsg_flags:	0
+    __u32 nlmsg_seq:	1    /* echoed from our request */
+    __u32 nlmsg_pid:	5831 /* The PID of our user space process */
+
+  struct genlmsghdr:
+    __u8 cmd:		CTRL_CMD_GETFAMILY
+    __u8 version:	2
+    __u16 reserved:	0
+
+  struct nlattr:
+    __u16 nla_len:	10
+    __u16 nla_type:	CTRL_ATTR_FAMILY_NAME
+    char data: 		test1\0
+
+  (padding:)
+    data:		\0\0
+
+  struct nlattr:
+    __u16 nla_len:	6
+    __u16 nla_type:	CTRL_ATTR_FAMILY_ID
+    __u16: 		123  /* The Family ID we are after */
+
+  (padding:)
+    char data:		\0\0
+
+  struct nlattr:
+    __u16 nla_len:	9
+    __u16 nla_type:	CTRL_ATTR_FAMILY_VERSION
+    __u16: 		1
+
+  /* ... etc, more attributes will follow. */
+
+And the error code (success) since ``NLM_F_ACK`` had been set on the request::
+
+  /* Message #2 - the ACK */
+  struct nlmsghdr:
+    __u32 nlmsg_len:	36
+    __u16 nlmsg_type:	NLMSG_ERROR
+    __u16 nlmsg_flags:	NLM_F_CAPPED /* There won't be a payload */
+    __u32 nlmsg_seq:	1    /* echoed from our request */
+    __u32 nlmsg_pid:	5831 /* The PID of our user space process */
+
+  int error:		0
+
+  struct nlmsghdr: /* Copy of the request header as we sent it */
+    __u32 nlmsg_len:	32
+    __u16 nlmsg_type:	GENL_ID_CTRL
+    __u16 nlmsg_flags:	NLM_F_REQUEST | NLM_F_ACK
+    __u32 nlmsg_seq:	1
+    __u32 nlmsg_pid:	0
+
+The order of attributes (struct nlattr) is not guaranteed so the user
+has to walk the attributes and parse them.
+
+Note that Generic Netlink sockets are not associated or bound to a single
+family. A socket can be used to exchange messages with many different
+families, selecting the recipient family on message-by-message basis using
+the :c:member:`nlmsghdr.nlmsg_type` field.
+
+.. _ext_ack:
+
+Extended ACK
+------------
+
+Extended ACK controls reporting of additional error/warning TLVs
+in ``NLMSG_ERROR`` and ``NLMSG_DONE`` messages. To maintain backward
+compatibility this feature has to be explicitly enabled by setting
+the ``NETLINK_EXT_ACK`` setsockopt() to ``1``.
+
+Types of extended ack attributes are defined in enum nlmsgerr_attrs.
+The two most commonly used attributes are ``NLMSGERR_ATTR_MSG``
+and ``NLMSGERR_ATTR_OFFS``.
+
+``NLMSGERR_ATTR_MSG`` carries a message in English describing
+the encountered problem. These messages are far more detailed
+than what can be expressed thru standard UNIX error codes.
+
+``NLMSGERR_ATTR_OFFS`` points to the attribute which caused the problem.
+
+Extended ACKs can be reported on errors as well as in case of success.
+The latter should be treated as a warning.
+
+Extended ACKs greatly improve the usability of Netlink and should
+always be enabled, appropriately parsed and reported to the user.
+
+Advanced topics
+===============
+
+Dump consistency
+----------------
+
+Some of the data structures kernel uses for storing objects make
+it hard to provide an atomic snapshot of all the objects in a dump
+(without impacting the fast-paths updating them).
+
+Kernel may set the ``NLM_F_DUMP_INTR`` flag on any message in a dump
+(including the ``NLMSG_DONE`` message) if the dump was interrupted and
+may be inconsistent (e.g. missing objects). User space should retry
+the dump if it sees the flag set.
+
+Introspection
+-------------
+
+The basic introspection abilities are enabled by access to the Family
+object as reported in :ref:`res_fam`. User can query information about
+the Generic Netlink family, including which operations are supported
+by the kernel and what attributes the kernel understands.
+Family information includes the highest ID of an attribute kernel can parse,
+a separate command (``CTRL_CMD_GETPOLICY``) provides detailed information
+about supported attributes, including ranges of values the kernel accepts.
+
+Querying family information is useful in cases when user space needs
+to make sure that the kernel has support for a feature before issuing
+a request.
+
+.. _nlmsg_pid:
+
+nlmsg_pid
+---------
+
+:c:member:`nlmsghdr.nlmsg_pid` is the Netlink equivalent of an address.
+It is referred to as Port ID, sometimes Process ID because for historical
+reasons if the application does not select (bind() to) an explicit Port ID
+kernel will automatically assign it the ID equal to its Process ID
+(as reported by the getpid() system call).
+
+Similarly to the bind() semantics of the TCP/IP network protocols the value
+of zero means "assign automatically", hence it is common for applications
+to leave the :c:member:`nlmsghdr.nlmsg_pid` field initialized to ``0``.
+
+The field is still used today in rare cases when kernel needs to send
+a unicast notification. User space application can use bind() to associate
+its socket with a specific PID, it then communicates its PID to the kernel.
+This way the kernel can reach the specific user space process.
+
+This sort of communication is utilized in UMH (User Mode Helper)-like
+scenarios when kernel needs to trigger user space processing or ask user
+space for a policy decision.
+
+Multicast notifications
+-----------------------
+
+One of the strengths of Netlink is the ability to send event notifications
+to user space. This is a unidirectional form of communication (kernel ->
+user) and does not involve any control messages like ``NLMSG_ERROR`` or
+``NLMSG_DONE``.
+
+For example the Generic Netlink family itself defines a set of multicast
+notifications about registered families. When a new family is added the
+sockets subscribed to the notifications will get the following message::
+
+  struct nlmsghdr:
+    __u32 nlmsg_len:	136
+    __u16 nlmsg_type:	GENL_ID_CTRL
+    __u16 nlmsg_flags:	0
+    __u32 nlmsg_seq:	0
+    __u32 nlmsg_pid:	0
+
+  struct genlmsghdr:
+    __u8 cmd:		CTRL_CMD_NEWFAMILY
+    __u8 version:	2
+    __u16 reserved:	0
+
+  struct nlattr:
+    __u16 nla_len:	10
+    __u16 nla_type:	CTRL_ATTR_FAMILY_NAME
+    char data: 		test1\0
+
+  (padding:)
+    data:		\0\0
+
+  struct nlattr:
+    __u16 nla_len:	6
+    __u16 nla_type:	CTRL_ATTR_FAMILY_ID
+    __u16: 		123  /* The Family ID we are after */
+
+  (padding:)
+    char data:		\0\0
+
+  struct nlattr:
+    __u16 nla_len:	9
+    __u16 nla_type:	CTRL_ATTR_FAMILY_VERSION
+    __u16: 		1
+
+  /* ... etc, more attributes will follow. */
+
+The notification contains the same information as the response
+to the ``CTRL_CMD_GETFAMILY`` request.
+
+The Netlink headers of the notification are mostly 0 and irrelevant.
+The :c:member:`nlmsghdr.nlmsg_seq` may be either zero or a monotonically
+increasing notification sequence number maintained by the family.
+
+To receive notifications the user socket must subscribe to the relevant
+notification group. Much like the Family ID, the Group ID for a given
+multicast group is dynamic and can be found inside the Family information.
+The ``CTRL_ATTR_MCAST_GROUPS`` attribute contains nests with names
+(``CTRL_ATTR_MCAST_GRP_NAME``) and IDs (``CTRL_ATTR_MCAST_GRP_ID``) of
+the groups family.
+
+Once the Group ID is known a setsockopt() call adds the socket to the group:
+
+.. code-block:: c
+
+  unsigned int group_id;
+
+  /* .. find the group ID... */
+
+  setsockopt(fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP,
+             &group_id, sizeof(group_id));
+
+The socket will now receive notifications.
+
+It is recommended to use separate sockets for receiving notifications
+and sending requests to the kernel. The asynchronous nature of notifications
+means that they may get mixed in with the responses making the message
+handling much harder.
+
+Buffer sizing
+-------------
+
+Netlink sockets are datagram sockets rather than stream sockets,
+meaning that each message must be received in its entirety by a single
+recv()/recvmsg() system call. If the buffer provided by the user is too
+short, the message will be truncated and the ``MSG_TRUNC`` flag set
+in struct msghdr (struct msghdr is the second argument
+of the recvmsg() system call, *not* a Netlink header).
+
+Upon truncation the remaining part of the message is discarded.
+
+Netlink expects that the user buffer will be at least 8kB or a page
+size of the CPU architecture, whichever is bigger. Particular Netlink
+families may, however, require a larger buffer. 32kB buffer is recommended
+for most efficient handling of dumps (larger buffer fits more dumped
+objects and therefore fewer recvmsg() calls are needed).
+
+Classic Netlink
+===============
+
+The main differences between Classic and Generic Netlink are the dynamic
+allocation of subsystem identifiers and availability of introspection.
+In theory the protocol does not differ significantly, however, in practice
+Classic Netlink experimented with concepts which were abandoned in Generic
+Netlink (really, they usually only found use in a small corner of a single
+subsystem). This section is meant as an explainer of a few of such concepts,
+with the explicit goal of giving the Generic Netlink
+users the confidence to ignore them when reading the uAPI headers.
+
+Most of the concepts and examples here refer to the ``NETLINK_ROUTE`` family,
+which covers much of the configuration of the Linux networking stack.
+Real documentation of that family, deserves a chapter (or a book) of its own.
+
+Families
+--------
+
+Netlink refers to subsystems as families. This is a remnant of using
+sockets and the concept of protocol families, which are part of message
+demultiplexing in ``NETLINK_ROUTE``.
+
+Sadly every layer of encapsulation likes to refer to whatever it's carrying
+as "families" making the term very confusing:
+
+ 1. AF_NETLINK is a bona fide socket protocol family
+ 2. AF_NETLINK's documentation refers to what comes after its own
+    header (struct nlmsghdr) in a message as a "Family Header"
+ 3. Generic Netlink is a family for AF_NETLINK (struct genlmsghdr follows
+    struct nlmsghdr), yet it also calls its users "Families".
+
+Note that the Generic Netlink Family IDs are in a different "ID space"
+and overlap with Classic Netlink protocol numbers (e.g. ``NETLINK_CRYPTO``
+has the Classic Netlink protocol ID of 21 which Generic Netlink will
+happily allocate to one of its families as well).
+
+Strict checking
+---------------
+
+The ``NETLINK_GET_STRICT_CHK`` socket option enables strict input checking
+in ``NETLINK_ROUTE``. It was needed because historically kernel did not
+validate the fields of structures it didn't process. This made it impossible
+to start using those fields later without risking regressions in applications
+which initialized them incorrectly or not at all.
+
+``NETLINK_GET_STRICT_CHK`` declares that the application is initializing
+all fields correctly. It also opts into validating that message does not
+contain trailing data and requests that kernel rejects attributes with
+type higher than largest attribute type known to the kernel.
+
+``NETLINK_GET_STRICT_CHK`` is not used outside of ``NETLINK_ROUTE``.
+
+Unknown attributes
+------------------
+
+Historically Netlink ignored all unknown attributes. The thinking was that
+it would free the application from having to probe what kernel supports.
+The application could make a request to change the state and check which
+parts of the request "stuck".
+
+This is no longer the case for new Generic Netlink families and those opting
+in to strict checking. See enum netlink_validation for validation types
+performed.
+
+Fixed metadata and structures
+-----------------------------
+
+Classic Netlink made liberal use of fixed-format structures within
+the messages. Messages would commonly have a structure with
+a considerable number of fields after struct nlmsghdr. It was also
+common to put structures with multiple members inside attributes,
+without breaking each member into an attribute of its own.
+
+This has caused problems with validation and extensibility and
+therefore using binary structures is actively discouraged for new
+attributes.
+
+Request types
+-------------
+
+``NETLINK_ROUTE`` categorized requests into 4 types ``NEW``, ``DEL``, ``GET``,
+and ``SET``. Each object can handle all or some of those requests
+(objects being netdevs, routes, addresses, qdiscs etc.) Request type
+is defined by the 2 lowest bits of the message type, so commands for
+new objects would always be allocated with a stride of 4.
+
+Each object would also have it's own fixed metadata shared by all request
+types (e.g. struct ifinfomsg for netdev requests, struct ifaddrmsg for address
+requests, struct tcmsg for qdisc requests).
+
+Even though other protocols and Generic Netlink commands often use
+the same verbs in their message names (``GET``, ``SET``) the concept
+of request types did not find wider adoption.
+
+Message flags
+-------------
+
+The earlier section has already covered the basic request flags
+(``NLM_F_REQUEST``, ``NLM_F_ACK``, ``NLM_F_DUMP``) and the ``NLMSG_ERROR`` /
+``NLMSG_DONE`` flags (``NLM_F_CAPPED``, ``NLM_F_ACK_TLVS``).
+Dump flags were also mentioned (``NLM_F_MULTI``, ``NLM_F_DUMP_INTR``).
+
+Those are the main flags of note, with a small exception (of ``ieee802154``)
+Generic Netlink does not make use of other flags. If the protocol needs
+to communicate special constraints for a request it should use
+an attribute, not the flags in struct nlmsghdr.
+
+Classic Netlink, however, defined various flags for its ``GET``, ``NEW``
+and ``DEL`` requests. Since request types have not been generalized
+the request type specific flags should not be used either.
+
+uAPI reference
+==============
+
+.. kernel-doc:: include/uapi/linux/netlink.h
diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c
index aadb0bd7c24f..ee19ed96f284 100644
--- a/drivers/net/dsa/ocelot/felix.c
+++ b/drivers/net/dsa/ocelot/felix.c
@@ -445,6 +445,9 @@ static int felix_tag_8021q_setup(struct dsa_switch *ds)
 	if (err)
 		return err;
 
+	dsa_switch_for_each_cpu_port(dp, ds)
+		ocelot_port_setup_dsa_8021q_cpu(ocelot, dp->index);
+
 	dsa_switch_for_each_user_port(dp, ds)
 		ocelot_port_assign_dsa_8021q_cpu(ocelot, dp->index,
 						 dp->cpu_dp->index);
@@ -493,6 +496,9 @@ static void felix_tag_8021q_teardown(struct dsa_switch *ds)
 	dsa_switch_for_each_user_port(dp, ds)
 		ocelot_port_unassign_dsa_8021q_cpu(ocelot, dp->index);
 
+	dsa_switch_for_each_cpu_port(dp, ds)
+		ocelot_port_teardown_dsa_8021q_cpu(ocelot, dp->index);
+
 	dsa_tag_8021q_unregister(ds);
 }
 
diff --git a/drivers/net/ethernet/faraday/ftmac100.c b/drivers/net/ethernet/faraday/ftmac100.c
index 8a341e2d5833..2e6524009b19 100644
--- a/drivers/net/ethernet/faraday/ftmac100.c
+++ b/drivers/net/ethernet/faraday/ftmac100.c
@@ -1075,6 +1075,7 @@ static int ftmac100_probe(struct platform_device *pdev)
 	SET_NETDEV_DEV(netdev, &pdev->dev);
 	netdev->ethtool_ops = &ftmac100_ethtool_ops;
 	netdev->netdev_ops = &ftmac100_netdev_ops;
+	netdev->max_mtu = MAX_PKT_SIZE;
 
 	platform_set_drvdata(pdev, netdev);
 
diff --git a/drivers/net/ethernet/marvell/prestera/prestera.h b/drivers/net/ethernet/marvell/prestera/prestera.h
index 2f84d0fb4094..e5a4381a88b3 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera.h
+++ b/drivers/net/ethernet/marvell/prestera/prestera.h
@@ -367,6 +367,8 @@ int prestera_port_learning_set(struct prestera_port *port, bool learn_enable);
 int prestera_port_uc_flood_set(struct prestera_port *port, bool flood);
 int prestera_port_mc_flood_set(struct prestera_port *port, bool flood);
 
+int prestera_port_br_locked_set(struct prestera_port *port, bool br_locked);
+
 int prestera_port_pvid_set(struct prestera_port *port, u16 vid);
 
 bool prestera_netdev_check(const struct net_device *dev);
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_hw.c b/drivers/net/ethernet/marvell/prestera/prestera_hw.c
index e0e9ae34ceea..1a68a888d587 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_hw.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_hw.c
@@ -101,6 +101,7 @@ enum {
 	PRESTERA_CMD_PORT_ATTR_LEARNING = 7,
 	PRESTERA_CMD_PORT_ATTR_FLOOD = 8,
 	PRESTERA_CMD_PORT_ATTR_CAPABILITY = 9,
+	PRESTERA_CMD_PORT_ATTR_LOCKED = 10,
 	PRESTERA_CMD_PORT_ATTR_PHY_MODE = 12,
 	PRESTERA_CMD_PORT_ATTR_TYPE = 13,
 	PRESTERA_CMD_PORT_ATTR_STATS = 17,
@@ -285,6 +286,7 @@ union prestera_msg_port_param {
 	u8 duplex;
 	u8 fec;
 	u8 fc;
+	u8 br_locked;
 	union {
 		struct {
 			u8 admin;
@@ -1640,6 +1642,22 @@ int prestera_hw_port_mc_flood_set(const struct prestera_port *port, bool flood)
 			    &req.cmd, sizeof(req));
 }
 
+int prestera_hw_port_br_locked_set(const struct prestera_port *port,
+				   bool br_locked)
+{
+	struct prestera_msg_port_attr_req req = {
+		.attr = __cpu_to_le32(PRESTERA_CMD_PORT_ATTR_LOCKED),
+		.port = __cpu_to_le32(port->hw_id),
+		.dev = __cpu_to_le32(port->dev_id),
+		.param = {
+			.br_locked = br_locked,
+		}
+	};
+
+	return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_PORT_ATTR_SET,
+			    &req.cmd, sizeof(req));
+}
+
 int prestera_hw_vlan_create(struct prestera_switch *sw, u16 vid)
 {
 	struct prestera_msg_vlan_req req = {
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_hw.h b/drivers/net/ethernet/marvell/prestera/prestera_hw.h
index 56e043146dd2..4aca43e72a05 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_hw.h
+++ b/drivers/net/ethernet/marvell/prestera/prestera_hw.h
@@ -183,6 +183,8 @@ int prestera_hw_port_speed_get(const struct prestera_port *port, u32 *speed);
 int prestera_hw_port_learning_set(struct prestera_port *port, bool enable);
 int prestera_hw_port_uc_flood_set(const struct prestera_port *port, bool flood);
 int prestera_hw_port_mc_flood_set(const struct prestera_port *port, bool flood);
+int prestera_hw_port_br_locked_set(const struct prestera_port *port,
+				   bool br_locked);
 int prestera_hw_port_accept_frm_type(struct prestera_port *port,
 				     enum prestera_accept_frm_type type);
 /* Vlan API */
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c
index 3489b80ae0d6..3956d6d5df3c 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_main.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c
@@ -51,6 +51,11 @@ int prestera_port_mc_flood_set(struct prestera_port *port, bool flood)
 	return prestera_hw_port_mc_flood_set(port, flood);
 }
 
+int prestera_port_br_locked_set(struct prestera_port *port, bool br_locked)
+{
+	return prestera_hw_port_br_locked_set(port, br_locked);
+}
+
 int prestera_port_pvid_set(struct prestera_port *port, u16 vid)
 {
 	enum prestera_accept_frm_type frm_type;
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
index 71cde97d85c8..e548cd32582e 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
@@ -143,6 +143,7 @@ prestera_br_port_flags_reset(struct prestera_bridge_port *br_port,
 	prestera_port_uc_flood_set(port, false);
 	prestera_port_mc_flood_set(port, false);
 	prestera_port_learning_set(port, false);
+	prestera_port_br_locked_set(port, false);
 }
 
 static int prestera_br_port_flags_set(struct prestera_bridge_port *br_port,
@@ -162,6 +163,11 @@ static int prestera_br_port_flags_set(struct prestera_bridge_port *br_port,
 	if (err)
 		goto err_out;
 
+	err = prestera_port_br_locked_set(port,
+					  br_port->flags & BR_PORT_LOCKED);
+	if (err)
+		goto err_out;
+
 	return 0;
 
 err_out:
@@ -1163,7 +1169,7 @@ static int prestera_port_obj_attr_set(struct net_device *dev, const void *ctx,
 		break;
 	case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS:
 		if (attr->u.brport_flags.mask &
-		    ~(BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD))
+		    ~(BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_PORT_LOCKED))
 			err = -EINVAL;
 		break;
 	case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index 75553eb2c7f2..450dcd9951bb 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -70,6 +70,8 @@ struct mlxsw_core {
 	struct workqueue_struct *emad_wq;
 	struct list_head rx_listener_list;
 	struct list_head event_listener_list;
+	struct list_head irq_event_handler_list;
+	struct mutex irq_event_handler_lock; /* Locks access to handlers list */
 	struct {
 		atomic64_t tid;
 		struct list_head trans_list;
@@ -2090,6 +2092,18 @@ static void mlxsw_core_health_fini(struct mlxsw_core *mlxsw_core)
 	devlink_health_reporter_destroy(mlxsw_core->health.fw_fatal);
 }
 
+static void mlxsw_core_irq_event_handler_init(struct mlxsw_core *mlxsw_core)
+{
+	INIT_LIST_HEAD(&mlxsw_core->irq_event_handler_list);
+	mutex_init(&mlxsw_core->irq_event_handler_lock);
+}
+
+static void mlxsw_core_irq_event_handler_fini(struct mlxsw_core *mlxsw_core)
+{
+	mutex_destroy(&mlxsw_core->irq_event_handler_lock);
+	WARN_ON(!list_empty(&mlxsw_core->irq_event_handler_list));
+}
+
 static int
 __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
 				 const struct mlxsw_bus *mlxsw_bus,
@@ -2125,6 +2139,7 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
 	mlxsw_core->bus = mlxsw_bus;
 	mlxsw_core->bus_priv = bus_priv;
 	mlxsw_core->bus_info = mlxsw_bus_info;
+	mlxsw_core_irq_event_handler_init(mlxsw_core);
 
 	err = mlxsw_bus->init(bus_priv, mlxsw_core, mlxsw_driver->profile,
 			      &mlxsw_core->res);
@@ -2233,6 +2248,7 @@ err_ports_init:
 err_register_resources:
 	mlxsw_bus->fini(bus_priv);
 err_bus_init:
+	mlxsw_core_irq_event_handler_fini(mlxsw_core);
 	if (!reload) {
 		devl_unlock(devlink);
 		devlink_free(devlink);
@@ -2302,6 +2318,7 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core,
 	if (!reload)
 		devl_resources_unregister(devlink);
 	mlxsw_core->bus->fini(mlxsw_core->bus_priv);
+	mlxsw_core_irq_event_handler_fini(mlxsw_core);
 	if (!reload) {
 		devl_unlock(devlink);
 		devlink_free(devlink);
@@ -2772,6 +2789,57 @@ int mlxsw_reg_trans_bulk_wait(struct list_head *bulk_list)
 }
 EXPORT_SYMBOL(mlxsw_reg_trans_bulk_wait);
 
+struct mlxsw_core_irq_event_handler_item {
+	struct list_head list;
+	void (*cb)(struct mlxsw_core *mlxsw_core);
+};
+
+int mlxsw_core_irq_event_handler_register(struct mlxsw_core *mlxsw_core,
+					  mlxsw_irq_event_cb_t cb)
+{
+	struct mlxsw_core_irq_event_handler_item *item;
+
+	item = kzalloc(sizeof(*item), GFP_KERNEL);
+	if (!item)
+		return -ENOMEM;
+	item->cb = cb;
+	mutex_lock(&mlxsw_core->irq_event_handler_lock);
+	list_add_tail(&item->list, &mlxsw_core->irq_event_handler_list);
+	mutex_unlock(&mlxsw_core->irq_event_handler_lock);
+	return 0;
+}
+EXPORT_SYMBOL(mlxsw_core_irq_event_handler_register);
+
+void mlxsw_core_irq_event_handler_unregister(struct mlxsw_core *mlxsw_core,
+					     mlxsw_irq_event_cb_t cb)
+{
+	struct mlxsw_core_irq_event_handler_item *item, *tmp;
+
+	mutex_lock(&mlxsw_core->irq_event_handler_lock);
+	list_for_each_entry_safe(item, tmp,
+				 &mlxsw_core->irq_event_handler_list, list) {
+		if (item->cb == cb) {
+			list_del(&item->list);
+			kfree(item);
+		}
+	}
+	mutex_unlock(&mlxsw_core->irq_event_handler_lock);
+}
+EXPORT_SYMBOL(mlxsw_core_irq_event_handler_unregister);
+
+void mlxsw_core_irq_event_handlers_call(struct mlxsw_core *mlxsw_core)
+{
+	struct mlxsw_core_irq_event_handler_item *item;
+
+	mutex_lock(&mlxsw_core->irq_event_handler_lock);
+	list_for_each_entry(item, &mlxsw_core->irq_event_handler_list, list) {
+		if (item->cb)
+			item->cb(mlxsw_core);
+	}
+	mutex_unlock(&mlxsw_core->irq_event_handler_lock);
+}
+EXPORT_SYMBOL(mlxsw_core_irq_event_handlers_call);
+
 static int mlxsw_core_reg_access_cmd(struct mlxsw_core *mlxsw_core,
 				     const struct mlxsw_reg_info *reg,
 				     char *payload,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
index 02d9cc2ef0c8..c7c0b3cefd8d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -215,6 +215,14 @@ int mlxsw_reg_trans_write(struct mlxsw_core *mlxsw_core,
 			  mlxsw_reg_trans_cb_t *cb, unsigned long cb_priv);
 int mlxsw_reg_trans_bulk_wait(struct list_head *bulk_list);
 
+typedef void mlxsw_irq_event_cb_t(struct mlxsw_core *mlxsw_core);
+
+int mlxsw_core_irq_event_handler_register(struct mlxsw_core *mlxsw_core,
+					  mlxsw_irq_event_cb_t cb);
+void mlxsw_core_irq_event_handler_unregister(struct mlxsw_core *mlxsw_core,
+					     mlxsw_irq_event_cb_t cb);
+void mlxsw_core_irq_event_handlers_call(struct mlxsw_core *mlxsw_core);
+
 int mlxsw_reg_query(struct mlxsw_core *mlxsw_core,
 		    const struct mlxsw_reg_info *reg, char *payload);
 int mlxsw_reg_write(struct mlxsw_core *mlxsw_core,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c b/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c
index ca59f0b946da..83d2dc91ba2c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c
@@ -785,6 +785,21 @@ static int mlxsw_linecard_status_get_and_process(struct mlxsw_core *mlxsw_core,
 	return mlxsw_linecard_status_process(linecards, linecard, mddq_pl);
 }
 
+static void mlxsw_linecards_irq_event_handler(struct mlxsw_core *mlxsw_core)
+{
+	struct mlxsw_linecards *linecards = mlxsw_core_linecards(mlxsw_core);
+	int i;
+
+	/* Handle change of line card active state. */
+	for (i = 0; i < linecards->count; i++) {
+		struct mlxsw_linecard *linecard = mlxsw_linecard_get(linecards,
+								     i + 1);
+
+		mlxsw_linecard_status_get_and_process(mlxsw_core, linecards,
+						      linecard);
+	}
+}
+
 static const char * const mlxsw_linecard_status_event_type_name[] = {
 	[MLXSW_LINECARD_STATUS_EVENT_TYPE_PROVISION] = "provision",
 	[MLXSW_LINECARD_STATUS_EVENT_TYPE_UNPROVISION] = "unprovision",
@@ -1238,7 +1253,6 @@ static int mlxsw_linecard_init(struct mlxsw_core *mlxsw_core,
 {
 	struct devlink_linecard *devlink_linecard;
 	struct mlxsw_linecard *linecard;
-	int err;
 
 	linecard = mlxsw_linecard_get(linecards, slot_index);
 	linecard->slot_index = slot_index;
@@ -1248,17 +1262,45 @@ static int mlxsw_linecard_init(struct mlxsw_core *mlxsw_core,
 	devlink_linecard = devlink_linecard_create(priv_to_devlink(mlxsw_core),
 						   slot_index, &mlxsw_linecard_ops,
 						   linecard);
-	if (IS_ERR(devlink_linecard)) {
-		err = PTR_ERR(devlink_linecard);
-		goto err_devlink_linecard_create;
-	}
+	if (IS_ERR(devlink_linecard))
+		return PTR_ERR(devlink_linecard);
+
 	linecard->devlink_linecard = devlink_linecard;
 	INIT_DELAYED_WORK(&linecard->status_event_to_dw,
 			  &mlxsw_linecard_status_event_to_work);
 
+	return 0;
+}
+
+static void mlxsw_linecard_fini(struct mlxsw_core *mlxsw_core,
+				struct mlxsw_linecards *linecards,
+				u8 slot_index)
+{
+	struct mlxsw_linecard *linecard;
+
+	linecard = mlxsw_linecard_get(linecards, slot_index);
+	cancel_delayed_work_sync(&linecard->status_event_to_dw);
+	/* Make sure all scheduled events are processed */
+	mlxsw_core_flush_owq();
+	if (linecard->active)
+		mlxsw_linecard_active_clear(linecard);
+	mlxsw_linecard_bdev_del(linecard);
+	devlink_linecard_destroy(linecard->devlink_linecard);
+	mutex_destroy(&linecard->lock);
+}
+
+static int
+mlxsw_linecard_event_delivery_init(struct mlxsw_core *mlxsw_core,
+				   struct mlxsw_linecards *linecards,
+				   u8 slot_index)
+{
+	struct mlxsw_linecard *linecard;
+	int err;
+
+	linecard = mlxsw_linecard_get(linecards, slot_index);
 	err = mlxsw_linecard_event_delivery_set(mlxsw_core, linecard, true);
 	if (err)
-		goto err_event_delivery_set;
+		return err;
 
 	err = mlxsw_linecard_status_get_and_process(mlxsw_core, linecards,
 						    linecard);
@@ -1269,29 +1311,18 @@ static int mlxsw_linecard_init(struct mlxsw_core *mlxsw_core,
 
 err_status_get_and_process:
 	mlxsw_linecard_event_delivery_set(mlxsw_core, linecard, false);
-err_event_delivery_set:
-	devlink_linecard_destroy(linecard->devlink_linecard);
-err_devlink_linecard_create:
-	mutex_destroy(&linecard->lock);
 	return err;
 }
 
-static void mlxsw_linecard_fini(struct mlxsw_core *mlxsw_core,
-				struct mlxsw_linecards *linecards,
-				u8 slot_index)
+static void
+mlxsw_linecard_event_delivery_fini(struct mlxsw_core *mlxsw_core,
+				   struct mlxsw_linecards *linecards,
+				   u8 slot_index)
 {
 	struct mlxsw_linecard *linecard;
 
 	linecard = mlxsw_linecard_get(linecards, slot_index);
 	mlxsw_linecard_event_delivery_set(mlxsw_core, linecard, false);
-	cancel_delayed_work_sync(&linecard->status_event_to_dw);
-	/* Make sure all scheduled events are processed */
-	mlxsw_core_flush_owq();
-	if (linecard->active)
-		mlxsw_linecard_active_clear(linecard);
-	mlxsw_linecard_bdev_del(linecard);
-	devlink_linecard_destroy(linecard->devlink_linecard);
-	mutex_destroy(&linecard->lock);
 }
 
 /*       LINECARDS INI BUNDLE FILE
@@ -1505,6 +1536,11 @@ int mlxsw_linecards_init(struct mlxsw_core *mlxsw_core,
 	if (err)
 		goto err_traps_register;
 
+	err = mlxsw_core_irq_event_handler_register(mlxsw_core,
+						    mlxsw_linecards_irq_event_handler);
+	if (err)
+		goto err_irq_event_handler_register;
+
 	mlxsw_core_linecards_set(mlxsw_core, linecards);
 
 	for (i = 0; i < linecards->count; i++) {
@@ -1513,11 +1549,25 @@ int mlxsw_linecards_init(struct mlxsw_core *mlxsw_core,
 			goto err_linecard_init;
 	}
 
+	for (i = 0; i < linecards->count; i++) {
+		err = mlxsw_linecard_event_delivery_init(mlxsw_core, linecards,
+							 i + 1);
+		if (err)
+			goto err_linecard_event_delivery_init;
+	}
+
 	return 0;
 
+err_linecard_event_delivery_init:
+	for (i--; i >= 0; i--)
+		mlxsw_linecard_event_delivery_fini(mlxsw_core, linecards, i + 1);
+	i = linecards->count;
 err_linecard_init:
 	for (i--; i >= 0; i--)
 		mlxsw_linecard_fini(mlxsw_core, linecards, i + 1);
+	mlxsw_core_irq_event_handler_unregister(mlxsw_core,
+						mlxsw_linecards_irq_event_handler);
+err_irq_event_handler_register:
 	mlxsw_core_traps_unregister(mlxsw_core, mlxsw_linecard_listener,
 				    ARRAY_SIZE(mlxsw_linecard_listener),
 				    mlxsw_core);
@@ -1536,7 +1586,11 @@ void mlxsw_linecards_fini(struct mlxsw_core *mlxsw_core)
 	if (!linecards)
 		return;
 	for (i = 0; i < linecards->count; i++)
+		mlxsw_linecard_event_delivery_fini(mlxsw_core, linecards, i + 1);
+	for (i = 0; i < linecards->count; i++)
 		mlxsw_linecard_fini(mlxsw_core, linecards, i + 1);
+	mlxsw_core_irq_event_handler_unregister(mlxsw_core,
+						mlxsw_linecards_irq_event_handler);
 	mlxsw_core_traps_unregister(mlxsw_core, mlxsw_linecard_listener,
 				    ARRAY_SIZE(mlxsw_linecard_listener),
 				    mlxsw_core);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c
index ce843ea91464..716c73e4fd59 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/i2c.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c
@@ -9,6 +9,7 @@
 #include <linux/mutex.h>
 #include <linux/module.h>
 #include <linux/mod_devicetable.h>
+#include <linux/platform_data/mlxreg.h>
 #include <linux/slab.h>
 
 #include "cmd.h"
@@ -51,6 +52,15 @@
 #define MLXSW_I2C_TIMEOUT_MSECS		5000
 #define MLXSW_I2C_MAX_DATA_SIZE		256
 
+/* Driver can be initialized by kernel platform driver or from the user
+ * space. In the first case IRQ line number is passed through the platform
+ * data, otherwise default IRQ line is to be used. Default IRQ is relevant
+ * only for specific I2C slave address, allowing 3.4 MHz I2C path to the chip
+ * (special hardware feature for I2C acceleration).
+ */
+#define MLXSW_I2C_DEFAULT_IRQ		17
+#define MLXSW_FAST_I2C_SLAVE		0x37
+
 /**
  * struct mlxsw_i2c - device private data:
  * @cmd: command attributes;
@@ -63,6 +73,9 @@
  * @core: switch core pointer;
  * @bus_info: bus info block;
  * @block_size: maximum block size allowed to pass to under layer;
+ * @pdata: device platform data;
+ * @irq_work: interrupts work item;
+ * @irq: IRQ line number;
  */
 struct mlxsw_i2c {
 	struct {
@@ -76,6 +89,9 @@ struct mlxsw_i2c {
 	struct mlxsw_core *core;
 	struct mlxsw_bus_info bus_info;
 	u16 block_size;
+	struct mlxreg_core_hotplug_platform_data *pdata;
+	struct work_struct irq_work;
+	int irq;
 };
 
 #define MLXSW_I2C_READ_MSG(_client, _addr_buf, _buf, _len) {	\
@@ -546,6 +562,67 @@ static void mlxsw_i2c_fini(void *bus_priv)
 	mlxsw_i2c->core = NULL;
 }
 
+static void mlxsw_i2c_work_handler(struct work_struct *work)
+{
+	struct mlxsw_i2c *mlxsw_i2c;
+
+	mlxsw_i2c = container_of(work, struct mlxsw_i2c, irq_work);
+	mlxsw_core_irq_event_handlers_call(mlxsw_i2c->core);
+}
+
+static irqreturn_t mlxsw_i2c_irq_handler(int irq, void *dev)
+{
+	struct mlxsw_i2c *mlxsw_i2c = dev;
+
+	mlxsw_core_schedule_work(&mlxsw_i2c->irq_work);
+
+	/* Interrupt handler shares IRQ line with 'main' interrupt handler.
+	 * Return here IRQ_NONE, while main handler will return IRQ_HANDLED.
+	 */
+	return IRQ_NONE;
+}
+
+static int mlxsw_i2c_irq_init(struct mlxsw_i2c *mlxsw_i2c, u8 addr)
+{
+	int err;
+
+	/* Initialize interrupt handler if system hotplug driver is reachable,
+	 * otherwise interrupt line is not enabled and interrupts will not be
+	 * raised to CPU. Also request_irq() call will be not valid.
+	 */
+	if (!IS_REACHABLE(CONFIG_MLXREG_HOTPLUG))
+		return 0;
+
+	/* Set default interrupt line. */
+	if (mlxsw_i2c->pdata && mlxsw_i2c->pdata->irq)
+		mlxsw_i2c->irq = mlxsw_i2c->pdata->irq;
+	else if (addr == MLXSW_FAST_I2C_SLAVE)
+		mlxsw_i2c->irq = MLXSW_I2C_DEFAULT_IRQ;
+
+	if (!mlxsw_i2c->irq)
+		return 0;
+
+	INIT_WORK(&mlxsw_i2c->irq_work, mlxsw_i2c_work_handler);
+	err = request_irq(mlxsw_i2c->irq, mlxsw_i2c_irq_handler,
+			  IRQF_TRIGGER_FALLING | IRQF_SHARED, "mlxsw-i2c",
+			  mlxsw_i2c);
+	if (err) {
+		dev_err(mlxsw_i2c->bus_info.dev, "Failed to request irq: %d\n",
+			err);
+		return err;
+	}
+
+	return 0;
+}
+
+static void mlxsw_i2c_irq_fini(struct mlxsw_i2c *mlxsw_i2c)
+{
+	if (!IS_REACHABLE(CONFIG_MLXREG_HOTPLUG) || !mlxsw_i2c->irq)
+		return;
+	cancel_work_sync(&mlxsw_i2c->irq_work);
+	free_irq(mlxsw_i2c->irq, mlxsw_i2c);
+}
+
 static const struct mlxsw_bus mlxsw_i2c_bus = {
 	.kind			= "i2c",
 	.init			= mlxsw_i2c_init,
@@ -638,17 +715,24 @@ static int mlxsw_i2c_probe(struct i2c_client *client,
 	mlxsw_i2c->bus_info.dev = &client->dev;
 	mlxsw_i2c->bus_info.low_frequency = true;
 	mlxsw_i2c->dev = &client->dev;
+	mlxsw_i2c->pdata = client->dev.platform_data;
+
+	err = mlxsw_i2c_irq_init(mlxsw_i2c, client->addr);
+	if (err)
+		goto errout;
 
 	err = mlxsw_core_bus_device_register(&mlxsw_i2c->bus_info,
 					     &mlxsw_i2c_bus, mlxsw_i2c, false,
 					     NULL, NULL);
 	if (err) {
 		dev_err(&client->dev, "Fail to register core bus\n");
-		return err;
+		goto err_bus_device_register;
 	}
 
 	return 0;
 
+err_bus_device_register:
+	mlxsw_i2c_irq_fini(mlxsw_i2c);
 errout:
 	mutex_destroy(&mlxsw_i2c->cmd.lock);
 	i2c_set_clientdata(client, NULL);
@@ -661,6 +745,7 @@ static int mlxsw_i2c_remove(struct i2c_client *client)
 	struct mlxsw_i2c *mlxsw_i2c = i2c_get_clientdata(client);
 
 	mlxsw_core_bus_device_unregister(mlxsw_i2c->core, false);
+	mlxsw_i2c_irq_fini(mlxsw_i2c);
 	mutex_destroy(&mlxsw_i2c->cmd.lock);
 
 	return 0;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/minimal.c b/drivers/net/ethernet/mellanox/mlxsw/minimal.c
index bb1cd4bae82e..7d3fa2883e8b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/minimal.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/minimal.c
@@ -26,20 +26,29 @@ static const struct mlxsw_fw_rev mlxsw_m_fw_rev = {
 
 struct mlxsw_m_port;
 
+struct mlxsw_m_line_card {
+	bool active;
+	int module_to_port[];
+};
+
 struct mlxsw_m {
 	struct mlxsw_m_port **ports;
-	int *module_to_port;
 	struct mlxsw_core *core;
 	const struct mlxsw_bus_info *bus_info;
 	u8 base_mac[ETH_ALEN];
 	u8 max_ports;
+	u8 max_modules_per_slot; /* Maximum number of modules per-slot. */
+	u8 num_of_slots; /* Including the main board. */
+	struct mlxsw_m_line_card **line_cards;
 };
 
 struct mlxsw_m_port {
 	struct net_device *dev;
 	struct mlxsw_m *mlxsw_m;
 	u16 local_port;
+	u8 slot_index;
 	u8 module;
+	u8 module_offset;
 };
 
 static int mlxsw_m_base_mac_get(struct mlxsw_m *mlxsw_m)
@@ -111,8 +120,9 @@ static int mlxsw_m_get_module_info(struct net_device *netdev,
 	struct mlxsw_m_port *mlxsw_m_port = netdev_priv(netdev);
 	struct mlxsw_core *core = mlxsw_m_port->mlxsw_m->core;
 
-	return mlxsw_env_get_module_info(netdev, core, 0, mlxsw_m_port->module,
-					 modinfo);
+	return mlxsw_env_get_module_info(netdev, core,
+					 mlxsw_m_port->slot_index,
+					 mlxsw_m_port->module, modinfo);
 }
 
 static int
@@ -122,7 +132,8 @@ mlxsw_m_get_module_eeprom(struct net_device *netdev, struct ethtool_eeprom *ee,
 	struct mlxsw_m_port *mlxsw_m_port = netdev_priv(netdev);
 	struct mlxsw_core *core = mlxsw_m_port->mlxsw_m->core;
 
-	return mlxsw_env_get_module_eeprom(netdev, core, 0,
+	return mlxsw_env_get_module_eeprom(netdev, core,
+					   mlxsw_m_port->slot_index,
 					   mlxsw_m_port->module, ee, data);
 }
 
@@ -134,7 +145,8 @@ mlxsw_m_get_module_eeprom_by_page(struct net_device *netdev,
 	struct mlxsw_m_port *mlxsw_m_port = netdev_priv(netdev);
 	struct mlxsw_core *core = mlxsw_m_port->mlxsw_m->core;
 
-	return mlxsw_env_get_module_eeprom_by_page(core, 0,
+	return mlxsw_env_get_module_eeprom_by_page(core,
+						   mlxsw_m_port->slot_index,
 						   mlxsw_m_port->module,
 						   page, extack);
 }
@@ -144,7 +156,8 @@ static int mlxsw_m_reset(struct net_device *netdev, u32 *flags)
 	struct mlxsw_m_port *mlxsw_m_port = netdev_priv(netdev);
 	struct mlxsw_core *core = mlxsw_m_port->mlxsw_m->core;
 
-	return mlxsw_env_reset_module(netdev, core, 0, mlxsw_m_port->module,
+	return mlxsw_env_reset_module(netdev, core, mlxsw_m_port->slot_index,
+				      mlxsw_m_port->module,
 				      flags);
 }
 
@@ -156,7 +169,8 @@ mlxsw_m_get_module_power_mode(struct net_device *netdev,
 	struct mlxsw_m_port *mlxsw_m_port = netdev_priv(netdev);
 	struct mlxsw_core *core = mlxsw_m_port->mlxsw_m->core;
 
-	return mlxsw_env_get_module_power_mode(core, 0, mlxsw_m_port->module,
+	return mlxsw_env_get_module_power_mode(core, mlxsw_m_port->slot_index,
+					       mlxsw_m_port->module,
 					       params, extack);
 }
 
@@ -168,7 +182,8 @@ mlxsw_m_set_module_power_mode(struct net_device *netdev,
 	struct mlxsw_m_port *mlxsw_m_port = netdev_priv(netdev);
 	struct mlxsw_core *core = mlxsw_m_port->mlxsw_m->core;
 
-	return mlxsw_env_set_module_power_mode(core, 0, mlxsw_m_port->module,
+	return mlxsw_env_set_module_power_mode(core, mlxsw_m_port->slot_index,
+					       mlxsw_m_port->module,
 					       params->policy, extack);
 }
 
@@ -184,7 +199,7 @@ static const struct ethtool_ops mlxsw_m_port_ethtool_ops = {
 
 static int
 mlxsw_m_port_module_info_get(struct mlxsw_m *mlxsw_m, u16 local_port,
-			     u8 *p_module, u8 *p_width)
+			     u8 *p_module, u8 *p_width, u8 *p_slot_index)
 {
 	char pmlp_pl[MLXSW_REG_PMLP_LEN];
 	int err;
@@ -195,6 +210,7 @@ mlxsw_m_port_module_info_get(struct mlxsw_m *mlxsw_m, u16 local_port,
 		return err;
 	*p_module = mlxsw_reg_pmlp_module_get(pmlp_pl, 0);
 	*p_width = mlxsw_reg_pmlp_width_get(pmlp_pl);
+	*p_slot_index = mlxsw_reg_pmlp_slot_index_get(pmlp_pl, 0);
 
 	return 0;
 }
@@ -212,18 +228,25 @@ mlxsw_m_port_dev_addr_get(struct mlxsw_m_port *mlxsw_m_port)
 	if (err)
 		return err;
 	mlxsw_reg_ppad_mac_memcpy_from(ppad_pl, addr);
-	eth_hw_addr_gen(mlxsw_m_port->dev, addr, mlxsw_m_port->module + 1);
+	eth_hw_addr_gen(mlxsw_m_port->dev, addr, mlxsw_m_port->module + 1 +
+			mlxsw_m_port->module_offset);
 	return 0;
 }
 
+static bool mlxsw_m_port_created(struct mlxsw_m *mlxsw_m, u16 local_port)
+{
+	return mlxsw_m->ports[local_port];
+}
+
 static int
-mlxsw_m_port_create(struct mlxsw_m *mlxsw_m, u16 local_port, u8 module)
+mlxsw_m_port_create(struct mlxsw_m *mlxsw_m, u16 local_port, u8 slot_index,
+		    u8 module)
 {
 	struct mlxsw_m_port *mlxsw_m_port;
 	struct net_device *dev;
 	int err;
 
-	err = mlxsw_core_port_init(mlxsw_m->core, local_port, 0,
+	err = mlxsw_core_port_init(mlxsw_m->core, local_port, slot_index,
 				   module + 1, false, 0, false,
 				   0, mlxsw_m->base_mac,
 				   sizeof(mlxsw_m->base_mac));
@@ -246,6 +269,15 @@ mlxsw_m_port_create(struct mlxsw_m *mlxsw_m, u16 local_port, u8 module)
 	mlxsw_m_port->mlxsw_m = mlxsw_m;
 	mlxsw_m_port->local_port = local_port;
 	mlxsw_m_port->module = module;
+	mlxsw_m_port->slot_index = slot_index;
+	/* Add module offset for line card. Offset for main board iz zero.
+	 * For line card in slot #n offset is calculated as (#n - 1)
+	 * multiplied by maximum modules number, which could be found on a line
+	 * card.
+	 */
+	mlxsw_m_port->module_offset = mlxsw_m_port->slot_index ?
+				      (mlxsw_m_port->slot_index - 1) *
+				      mlxsw_m->max_modules_per_slot : 0;
 
 	dev->netdev_ops = &mlxsw_m_port_netdev_ops;
 	dev->ethtool_ops = &mlxsw_m_port_ethtool_ops;
@@ -291,19 +323,29 @@ static void mlxsw_m_port_remove(struct mlxsw_m *mlxsw_m, u16 local_port)
 	mlxsw_core_port_fini(mlxsw_m->core, local_port);
 }
 
+static int*
+mlxsw_m_port_mapping_get(struct mlxsw_m *mlxsw_m, u8 slot_index, u8 module)
+{
+	return &mlxsw_m->line_cards[slot_index]->module_to_port[module];
+}
+
 static int mlxsw_m_port_module_map(struct mlxsw_m *mlxsw_m, u16 local_port,
 				   u8 *last_module)
 {
 	unsigned int max_ports = mlxsw_core_max_ports(mlxsw_m->core);
-	u8 module, width;
+	u8 module, width, slot_index;
+	int *module_to_port;
 	int err;
 
 	/* Fill out to local port mapping array */
 	err = mlxsw_m_port_module_info_get(mlxsw_m, local_port, &module,
-					   &width);
+					   &width, &slot_index);
 	if (err)
 		return err;
 
+	/* Skip if line card has been already configured */
+	if (mlxsw_m->line_cards[slot_index]->active)
+		return 0;
 	if (!width)
 		return 0;
 	/* Skip, if port belongs to the cluster */
@@ -313,91 +355,216 @@ static int mlxsw_m_port_module_map(struct mlxsw_m *mlxsw_m, u16 local_port,
 
 	if (WARN_ON_ONCE(module >= max_ports))
 		return -EINVAL;
-	mlxsw_env_module_port_map(mlxsw_m->core, 0, module);
-	mlxsw_m->module_to_port[module] = ++mlxsw_m->max_ports;
+	mlxsw_env_module_port_map(mlxsw_m->core, slot_index, module);
+	module_to_port = mlxsw_m_port_mapping_get(mlxsw_m, slot_index, module);
+	*module_to_port = local_port;
 
 	return 0;
 }
 
-static void mlxsw_m_port_module_unmap(struct mlxsw_m *mlxsw_m, u8 module)
+static void
+mlxsw_m_port_module_unmap(struct mlxsw_m *mlxsw_m, u8 slot_index, u8 module)
 {
-	mlxsw_m->module_to_port[module] = -1;
-	mlxsw_env_module_port_unmap(mlxsw_m->core, 0, module);
+	int *module_to_port = mlxsw_m_port_mapping_get(mlxsw_m, slot_index,
+						       module);
+	*module_to_port = -1;
+	mlxsw_env_module_port_unmap(mlxsw_m->core, slot_index, module);
 }
 
-static int mlxsw_m_ports_create(struct mlxsw_m *mlxsw_m)
+static int mlxsw_m_linecards_init(struct mlxsw_m *mlxsw_m)
 {
 	unsigned int max_ports = mlxsw_core_max_ports(mlxsw_m->core);
-	u8 last_module = max_ports;
-	int i;
-	int err;
+	char mgpir_pl[MLXSW_REG_MGPIR_LEN];
+	u8 num_of_modules;
+	int i, j, err;
+
+	mlxsw_reg_mgpir_pack(mgpir_pl, 0);
+	err = mlxsw_reg_query(mlxsw_m->core, MLXSW_REG(mgpir), mgpir_pl);
+	if (err)
+		return err;
+
+	mlxsw_reg_mgpir_unpack(mgpir_pl, NULL, NULL, NULL, &num_of_modules,
+			       &mlxsw_m->num_of_slots);
+	/* If the system is modular, get the maximum number of modules per-slot.
+	 * Otherwise, get the maximum number of modules on the main board.
+	 */
+	if (mlxsw_m->num_of_slots)
+		mlxsw_m->max_modules_per_slot =
+			mlxsw_reg_mgpir_max_modules_per_slot_get(mgpir_pl);
+	else
+		mlxsw_m->max_modules_per_slot = num_of_modules;
+	/* Add slot for main board. */
+	mlxsw_m->num_of_slots += 1;
 
 	mlxsw_m->ports = kcalloc(max_ports, sizeof(*mlxsw_m->ports),
 				 GFP_KERNEL);
 	if (!mlxsw_m->ports)
 		return -ENOMEM;
 
-	mlxsw_m->module_to_port = kmalloc_array(max_ports, sizeof(int),
-						GFP_KERNEL);
-	if (!mlxsw_m->module_to_port) {
-		err = -ENOMEM;
-		goto err_module_to_port_alloc;
+	mlxsw_m->line_cards = kcalloc(mlxsw_m->num_of_slots,
+				      sizeof(*mlxsw_m->line_cards),
+				      GFP_KERNEL);
+	if (!mlxsw_m->line_cards)
+		goto err_kcalloc;
+
+	for (i = 0; i < mlxsw_m->num_of_slots; i++) {
+		mlxsw_m->line_cards[i] =
+			kzalloc(struct_size(mlxsw_m->line_cards[i],
+					    module_to_port,
+					    mlxsw_m->max_modules_per_slot),
+				GFP_KERNEL);
+		if (!mlxsw_m->line_cards[i])
+			goto err_kmalloc_array;
+
+		/* Invalidate the entries of module to local port mapping array. */
+		for (j = 0; j < mlxsw_m->max_modules_per_slot; j++)
+			mlxsw_m->line_cards[i]->module_to_port[j] = -1;
 	}
 
-	/* Invalidate the entries of module to local port mapping array */
-	for (i = 0; i < max_ports; i++)
-		mlxsw_m->module_to_port[i] = -1;
+	return 0;
 
-	/* Fill out module to local port mapping array */
-	for (i = 1; i < max_ports; i++) {
-		err = mlxsw_m_port_module_map(mlxsw_m, i, &last_module);
-		if (err)
-			goto err_module_to_port_map;
+err_kmalloc_array:
+	for (i--; i >= 0; i--)
+		kfree(mlxsw_m->line_cards[i]);
+err_kcalloc:
+	kfree(mlxsw_m->ports);
+	return err;
+}
+
+static void mlxsw_m_linecards_fini(struct mlxsw_m *mlxsw_m)
+{
+	int i = mlxsw_m->num_of_slots;
+
+	for (i--; i >= 0; i--)
+		kfree(mlxsw_m->line_cards[i]);
+	kfree(mlxsw_m->line_cards);
+	kfree(mlxsw_m->ports);
+}
+
+static void
+mlxsw_m_linecard_port_module_unmap(struct mlxsw_m *mlxsw_m, u8 slot_index)
+{
+	int i;
+
+	for (i = mlxsw_m->max_modules_per_slot - 1; i >= 0; i--) {
+		int *module_to_port;
+
+		module_to_port = mlxsw_m_port_mapping_get(mlxsw_m, slot_index, i);
+		if (*module_to_port > 0)
+			mlxsw_m_port_module_unmap(mlxsw_m, slot_index, i);
 	}
+}
 
-	/* Create port objects for each valid entry */
-	for (i = 0; i < mlxsw_m->max_ports; i++) {
-		if (mlxsw_m->module_to_port[i] > 0) {
-			err = mlxsw_m_port_create(mlxsw_m,
-						  mlxsw_m->module_to_port[i],
-						  i);
+static int
+mlxsw_m_linecard_ports_create(struct mlxsw_m *mlxsw_m, u8 slot_index)
+{
+	int *module_to_port;
+	int i, err;
+
+	for (i = 0; i < mlxsw_m->max_modules_per_slot; i++) {
+		module_to_port = mlxsw_m_port_mapping_get(mlxsw_m, slot_index, i);
+		if (*module_to_port > 0) {
+			err = mlxsw_m_port_create(mlxsw_m, *module_to_port,
+						  slot_index, i);
 			if (err)
-				goto err_module_to_port_create;
+				goto err_port_create;
+			/* Mark slot as active */
+			if (!mlxsw_m->line_cards[slot_index]->active)
+				mlxsw_m->line_cards[slot_index]->active = true;
 		}
 	}
-
 	return 0;
 
-err_module_to_port_create:
+err_port_create:
 	for (i--; i >= 0; i--) {
-		if (mlxsw_m->module_to_port[i] > 0)
-			mlxsw_m_port_remove(mlxsw_m,
-					    mlxsw_m->module_to_port[i]);
+		module_to_port = mlxsw_m_port_mapping_get(mlxsw_m, slot_index, i);
+		if (*module_to_port > 0 &&
+		    mlxsw_m_port_created(mlxsw_m, *module_to_port)) {
+			mlxsw_m_port_remove(mlxsw_m, *module_to_port);
+			/* Mark slot as inactive */
+			if (mlxsw_m->line_cards[slot_index]->active)
+				mlxsw_m->line_cards[slot_index]->active = false;
+		}
 	}
-	i = max_ports;
-err_module_to_port_map:
-	for (i--; i > 0; i--)
-		mlxsw_m_port_module_unmap(mlxsw_m, i);
-	kfree(mlxsw_m->module_to_port);
-err_module_to_port_alloc:
-	kfree(mlxsw_m->ports);
 	return err;
 }
 
-static void mlxsw_m_ports_remove(struct mlxsw_m *mlxsw_m)
+static void
+mlxsw_m_linecard_ports_remove(struct mlxsw_m *mlxsw_m, u8 slot_index)
 {
 	int i;
 
-	for (i = 0; i < mlxsw_m->max_ports; i++) {
-		if (mlxsw_m->module_to_port[i] > 0) {
-			mlxsw_m_port_remove(mlxsw_m,
-					    mlxsw_m->module_to_port[i]);
-			mlxsw_m_port_module_unmap(mlxsw_m, i);
+	for (i = 0; i < mlxsw_m->max_modules_per_slot; i++) {
+		int *module_to_port = mlxsw_m_port_mapping_get(mlxsw_m,
+							       slot_index, i);
+
+		if (*module_to_port > 0 &&
+		    mlxsw_m_port_created(mlxsw_m, *module_to_port)) {
+			mlxsw_m_port_remove(mlxsw_m, *module_to_port);
+			mlxsw_m_port_module_unmap(mlxsw_m, slot_index, i);
 		}
 	}
+}
 
-	kfree(mlxsw_m->module_to_port);
-	kfree(mlxsw_m->ports);
+static int mlxsw_m_ports_module_map(struct mlxsw_m *mlxsw_m)
+{
+	unsigned int max_ports = mlxsw_core_max_ports(mlxsw_m->core);
+	u8 last_module = max_ports;
+	int i, err;
+
+	for (i = 1; i < max_ports; i++) {
+		err = mlxsw_m_port_module_map(mlxsw_m, i, &last_module);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int mlxsw_m_ports_create(struct mlxsw_m *mlxsw_m)
+{
+	int err;
+
+	/* Fill out module to local port mapping array */
+	err = mlxsw_m_ports_module_map(mlxsw_m);
+	if (err)
+		goto err_ports_module_map;
+
+	/* Create port objects for each valid entry */
+	err = mlxsw_m_linecard_ports_create(mlxsw_m, 0);
+	if (err)
+		goto err_linecard_ports_create;
+
+	return 0;
+
+err_linecard_ports_create:
+err_ports_module_map:
+	mlxsw_m_linecard_port_module_unmap(mlxsw_m, 0);
+
+	return err;
+}
+
+static void mlxsw_m_ports_remove(struct mlxsw_m *mlxsw_m)
+{
+	mlxsw_m_linecard_ports_remove(mlxsw_m, 0);
+}
+
+static void
+mlxsw_m_ports_remove_selected(struct mlxsw_core *mlxsw_core,
+			      bool (*selector)(void *priv, u16 local_port),
+			      void *priv)
+{
+	struct mlxsw_m *mlxsw_m = mlxsw_core_driver_priv(mlxsw_core);
+	struct mlxsw_linecard *linecard_priv = priv;
+	struct mlxsw_m_line_card *linecard;
+
+	linecard = mlxsw_m->line_cards[linecard_priv->slot_index];
+
+	if (WARN_ON(!linecard->active))
+		return;
+
+	mlxsw_m_linecard_ports_remove(mlxsw_m, linecard_priv->slot_index);
+	linecard->active = false;
 }
 
 static int mlxsw_m_fw_rev_validate(struct mlxsw_m *mlxsw_m)
@@ -418,6 +585,60 @@ static int mlxsw_m_fw_rev_validate(struct mlxsw_m *mlxsw_m)
 	return -EINVAL;
 }
 
+static void
+mlxsw_m_got_active(struct mlxsw_core *mlxsw_core, u8 slot_index, void *priv)
+{
+	struct mlxsw_m_line_card *linecard;
+	struct mlxsw_m *mlxsw_m = priv;
+	int err;
+
+	linecard = mlxsw_m->line_cards[slot_index];
+	/* Skip if line card has been already configured during init */
+	if (linecard->active)
+		return;
+
+	/* Fill out module to local port mapping array */
+	err = mlxsw_m_ports_module_map(mlxsw_m);
+	if (err)
+		goto err_ports_module_map;
+
+	/* Create port objects for each valid entry */
+	err = mlxsw_m_linecard_ports_create(mlxsw_m, slot_index);
+	if (err) {
+		dev_err(mlxsw_m->bus_info->dev, "Failed to create port for line card at slot %d\n",
+			slot_index);
+		goto err_linecard_ports_create;
+	}
+
+	linecard->active = true;
+
+	return;
+
+err_linecard_ports_create:
+err_ports_module_map:
+	mlxsw_m_linecard_port_module_unmap(mlxsw_m, slot_index);
+}
+
+static void
+mlxsw_m_got_inactive(struct mlxsw_core *mlxsw_core, u8 slot_index, void *priv)
+{
+	struct mlxsw_m_line_card *linecard;
+	struct mlxsw_m *mlxsw_m = priv;
+
+	linecard = mlxsw_m->line_cards[slot_index];
+
+	if (WARN_ON(!linecard->active))
+		return;
+
+	mlxsw_m_linecard_ports_remove(mlxsw_m, slot_index);
+	linecard->active = false;
+}
+
+static struct mlxsw_linecards_event_ops mlxsw_m_event_ops = {
+	.got_active = mlxsw_m_got_active,
+	.got_inactive = mlxsw_m_got_inactive,
+};
+
 static int mlxsw_m_init(struct mlxsw_core *mlxsw_core,
 			const struct mlxsw_bus_info *mlxsw_bus_info,
 			struct netlink_ext_ack *extack)
@@ -438,13 +659,33 @@ static int mlxsw_m_init(struct mlxsw_core *mlxsw_core,
 		return err;
 	}
 
+	err = mlxsw_m_linecards_init(mlxsw_m);
+	if (err) {
+		dev_err(mlxsw_m->bus_info->dev, "Failed to create line cards\n");
+		return err;
+	}
+
+	err = mlxsw_linecards_event_ops_register(mlxsw_core,
+						 &mlxsw_m_event_ops, mlxsw_m);
+	if (err) {
+		dev_err(mlxsw_m->bus_info->dev, "Failed to register line cards operations\n");
+		goto linecards_event_ops_register;
+	}
+
 	err = mlxsw_m_ports_create(mlxsw_m);
 	if (err) {
 		dev_err(mlxsw_m->bus_info->dev, "Failed to create ports\n");
-		return err;
+		goto err_ports_create;
 	}
 
 	return 0;
+
+err_ports_create:
+	mlxsw_linecards_event_ops_unregister(mlxsw_core,
+					     &mlxsw_m_event_ops, mlxsw_m);
+linecards_event_ops_register:
+	mlxsw_m_linecards_fini(mlxsw_m);
+	return err;
 }
 
 static void mlxsw_m_fini(struct mlxsw_core *mlxsw_core)
@@ -452,6 +693,9 @@ static void mlxsw_m_fini(struct mlxsw_core *mlxsw_core)
 	struct mlxsw_m *mlxsw_m = mlxsw_core_driver_priv(mlxsw_core);
 
 	mlxsw_m_ports_remove(mlxsw_m);
+	mlxsw_linecards_event_ops_unregister(mlxsw_core,
+					     &mlxsw_m_event_ops, mlxsw_m);
+	mlxsw_m_linecards_fini(mlxsw_m);
 }
 
 static const struct mlxsw_config_profile mlxsw_m_config_profile;
@@ -461,6 +705,7 @@ static struct mlxsw_driver mlxsw_m_driver = {
 	.priv_size		= sizeof(struct mlxsw_m),
 	.init			= mlxsw_m_init,
 	.fini			= mlxsw_m_fini,
+	.ports_remove_selected	= mlxsw_m_ports_remove_selected,
 	.profile		= &mlxsw_m_config_profile,
 };
 
diff --git a/drivers/net/ethernet/micrel/ks8851_spi.c b/drivers/net/ethernet/micrel/ks8851_spi.c
index 82d55fc27edc..70bc7253454f 100644
--- a/drivers/net/ethernet/micrel/ks8851_spi.c
+++ b/drivers/net/ethernet/micrel/ks8851_spi.c
@@ -413,7 +413,8 @@ static int ks8851_probe_spi(struct spi_device *spi)
 
 	spi->bits_per_word = 8;
 
-	ks = netdev_priv(netdev);
+	kss = netdev_priv(netdev);
+	ks = &kss->ks8851;
 
 	ks->lock = ks8851_lock_spi;
 	ks->unlock = ks8851_unlock_spi;
@@ -433,8 +434,6 @@ static int ks8851_probe_spi(struct spi_device *spi)
 		 IRQ_RXPSI)	/* RX process stop */
 	ks->rc_ier = STD_IRQ;
 
-	kss = to_ks8851_spi(ks);
-
 	kss->spidev = spi;
 	mutex_init(&kss->lock);
 	INIT_WORK(&kss->tx_work, ks8851_tx_work);
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 306026e6aa11..dddaffdaad9a 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -2064,6 +2064,16 @@ static int ocelot_bond_get_id(struct ocelot *ocelot, struct net_device *bond)
 	return __ffs(bond_mask);
 }
 
+/* Returns the mask of user ports assigned to this DSA tag_8021q CPU port.
+ * Note that when CPU ports are in a LAG, the user ports are assigned to the
+ * 'primary' CPU port, the one whose physical port number gives the logical
+ * port number of the LAG.
+ *
+ * We leave PGID_SRC poorly configured for the 'secondary' CPU port in the LAG
+ * (to which no user port is assigned), but it appears that forwarding from
+ * this secondary CPU port looks at the PGID_SRC associated with the logical
+ * port ID that it's assigned to, which *is* configured properly.
+ */
 static u32 ocelot_dsa_8021q_cpu_assigned_ports(struct ocelot *ocelot,
 					       struct ocelot_port *cpu)
 {
@@ -2080,9 +2090,15 @@ static u32 ocelot_dsa_8021q_cpu_assigned_ports(struct ocelot *ocelot,
 			mask |= BIT(port);
 	}
 
+	if (cpu->bond)
+		mask &= ~ocelot_get_bond_mask(ocelot, cpu->bond);
+
 	return mask;
 }
 
+/* Returns the DSA tag_8021q CPU port that the given port is assigned to,
+ * or the bit mask of CPU ports if said CPU port is in a LAG.
+ */
 u32 ocelot_port_assigned_dsa_8021q_cpu_mask(struct ocelot *ocelot, int port)
 {
 	struct ocelot_port *ocelot_port = ocelot->ports[port];
@@ -2091,6 +2107,9 @@ u32 ocelot_port_assigned_dsa_8021q_cpu_mask(struct ocelot *ocelot, int port)
 	if (!cpu_port)
 		return 0;
 
+	if (cpu_port->bond)
+		return ocelot_get_bond_mask(ocelot, cpu_port->bond);
+
 	return BIT(cpu_port->index);
 }
 EXPORT_SYMBOL_GPL(ocelot_port_assigned_dsa_8021q_cpu_mask);
@@ -2214,61 +2233,61 @@ static void ocelot_update_pgid_cpu(struct ocelot *ocelot)
 	ocelot_write_rix(ocelot, pgid_cpu, ANA_PGID_PGID, PGID_CPU);
 }
 
-void ocelot_port_assign_dsa_8021q_cpu(struct ocelot *ocelot, int port,
-				      int cpu)
+void ocelot_port_setup_dsa_8021q_cpu(struct ocelot *ocelot, int cpu)
 {
 	struct ocelot_port *cpu_port = ocelot->ports[cpu];
 	u16 vid;
 
 	mutex_lock(&ocelot->fwd_domain_lock);
 
-	ocelot->ports[port]->dsa_8021q_cpu = cpu_port;
-
-	if (!cpu_port->is_dsa_8021q_cpu) {
-		cpu_port->is_dsa_8021q_cpu = true;
-
-		for (vid = OCELOT_RSV_VLAN_RANGE_START; vid < VLAN_N_VID; vid++)
-			ocelot_vlan_member_add(ocelot, cpu, vid, true);
+	cpu_port->is_dsa_8021q_cpu = true;
 
-		ocelot_update_pgid_cpu(ocelot);
-	}
+	for (vid = OCELOT_RSV_VLAN_RANGE_START; vid < VLAN_N_VID; vid++)
+		ocelot_vlan_member_add(ocelot, cpu, vid, true);
 
-	ocelot_apply_bridge_fwd_mask(ocelot, true);
+	ocelot_update_pgid_cpu(ocelot);
 
 	mutex_unlock(&ocelot->fwd_domain_lock);
 }
-EXPORT_SYMBOL_GPL(ocelot_port_assign_dsa_8021q_cpu);
+EXPORT_SYMBOL_GPL(ocelot_port_setup_dsa_8021q_cpu);
 
-void ocelot_port_unassign_dsa_8021q_cpu(struct ocelot *ocelot, int port)
+void ocelot_port_teardown_dsa_8021q_cpu(struct ocelot *ocelot, int cpu)
 {
-	struct ocelot_port *cpu_port = ocelot->ports[port]->dsa_8021q_cpu;
-	bool keep = false;
+	struct ocelot_port *cpu_port = ocelot->ports[cpu];
 	u16 vid;
-	int p;
 
 	mutex_lock(&ocelot->fwd_domain_lock);
 
-	ocelot->ports[port]->dsa_8021q_cpu = NULL;
+	cpu_port->is_dsa_8021q_cpu = false;
 
-	for (p = 0; p < ocelot->num_phys_ports; p++) {
-		if (!ocelot->ports[p])
-			continue;
+	for (vid = OCELOT_RSV_VLAN_RANGE_START; vid < VLAN_N_VID; vid++)
+		ocelot_vlan_member_del(ocelot, cpu_port->index, vid);
 
-		if (ocelot->ports[p]->dsa_8021q_cpu == cpu_port) {
-			keep = true;
-			break;
-		}
-	}
+	ocelot_update_pgid_cpu(ocelot);
 
-	if (!keep) {
-		cpu_port->is_dsa_8021q_cpu = false;
+	mutex_unlock(&ocelot->fwd_domain_lock);
+}
+EXPORT_SYMBOL_GPL(ocelot_port_teardown_dsa_8021q_cpu);
+
+void ocelot_port_assign_dsa_8021q_cpu(struct ocelot *ocelot, int port,
+				      int cpu)
+{
+	struct ocelot_port *cpu_port = ocelot->ports[cpu];
 
-		for (vid = OCELOT_RSV_VLAN_RANGE_START; vid < VLAN_N_VID; vid++)
-			ocelot_vlan_member_del(ocelot, cpu_port->index, vid);
+	mutex_lock(&ocelot->fwd_domain_lock);
 
-		ocelot_update_pgid_cpu(ocelot);
-	}
+	ocelot->ports[port]->dsa_8021q_cpu = cpu_port;
+	ocelot_apply_bridge_fwd_mask(ocelot, true);
+
+	mutex_unlock(&ocelot->fwd_domain_lock);
+}
+EXPORT_SYMBOL_GPL(ocelot_port_assign_dsa_8021q_cpu);
 
+void ocelot_port_unassign_dsa_8021q_cpu(struct ocelot *ocelot, int port)
+{
+	mutex_lock(&ocelot->fwd_domain_lock);
+
+	ocelot->ports[port]->dsa_8021q_cpu = NULL;
 	ocelot_apply_bridge_fwd_mask(ocelot, true);
 
 	mutex_unlock(&ocelot->fwd_domain_lock);
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index f4a6b590a1e3..7ef5d8208a4e 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -74,6 +74,9 @@
 #define AM65_CPSW_PORTN_REG_TS_VLAN_LTYPE_REG	0x318
 #define AM65_CPSW_PORTN_REG_TS_CTL_LTYPE2       0x31C
 
+#define AM65_CPSW_SGMII_CONTROL_REG		0x010
+#define AM65_CPSW_SGMII_CONTROL_MR_AN_ENABLE	BIT(0)
+
 #define AM65_CPSW_CTL_VLAN_AWARE		BIT(1)
 #define AM65_CPSW_CTL_P0_ENABLE			BIT(2)
 #define AM65_CPSW_CTL_P0_TX_CRC_REMOVE		BIT(13)
@@ -590,11 +593,6 @@ static int am65_cpsw_nuss_ndo_slave_open(struct net_device *ndev)
 	/* mac_sl should be configured via phy-link interface */
 	am65_cpsw_sl_ctl_reset(port);
 
-	ret = phy_set_mode_ext(port->slave.ifphy, PHY_MODE_ETHERNET,
-			       port->slave.phy_if);
-	if (ret)
-		goto error_cleanup;
-
 	ret = phylink_of_phy_connect(port->slave.phylink, port->slave.phy_node, 0);
 	if (ret)
 		goto error_cleanup;
@@ -1409,7 +1407,14 @@ static const struct net_device_ops am65_cpsw_nuss_netdev_ops = {
 static void am65_cpsw_nuss_mac_config(struct phylink_config *config, unsigned int mode,
 				      const struct phylink_link_state *state)
 {
-	/* Currently not used */
+	struct am65_cpsw_slave_data *slave = container_of(config, struct am65_cpsw_slave_data,
+							  phylink_config);
+	struct am65_cpsw_port *port = container_of(slave, struct am65_cpsw_port, slave);
+	struct am65_cpsw_common *common = port->common;
+
+	if (common->pdata.extra_modes & BIT(state->interface))
+		writel(AM65_CPSW_SGMII_CONTROL_MR_AN_ENABLE,
+		       port->sgmii_base + AM65_CPSW_SGMII_CONTROL_REG);
 }
 
 static void am65_cpsw_nuss_mac_link_down(struct phylink_config *config, unsigned int mode,
@@ -1847,6 +1852,8 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common)
 		port->common = common;
 		port->port_base = common->cpsw_base + AM65_CPSW_NU_PORTS_BASE +
 				  AM65_CPSW_NU_PORTS_OFFSET * (port_id);
+		if (common->pdata.extra_modes)
+			port->sgmii_base = common->ss_base + AM65_CPSW_SGMII_BASE * (port_id);
 		port->stat_base = common->cpsw_base + AM65_CPSW_NU_STATS_BASE +
 				  (AM65_CPSW_NU_STATS_PORT_OFFSET * port_id);
 		port->name = of_get_property(port_np, "label", NULL);
@@ -1886,6 +1893,10 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common)
 			goto of_node_put;
 		}
 
+		ret = phy_set_mode_ext(port->slave.ifphy, PHY_MODE_ETHERNET, port->slave.phy_if);
+		if (ret)
+			goto of_node_put;
+
 		ret = of_get_mac_address(port_np, port->slave.mac_addr);
 		if (ret) {
 			am65_cpsw_am654_get_efuse_macid(port_np,
@@ -1981,7 +1992,18 @@ am65_cpsw_nuss_init_port_ndev(struct am65_cpsw_common *common, u32 port_idx)
 	port->slave.phylink_config.type = PHYLINK_NETDEV;
 	port->slave.phylink_config.mac_capabilities = MAC_SYM_PAUSE | MAC_10 | MAC_100 | MAC_1000FD;
 
-	phy_interface_set_rgmii(port->slave.phylink_config.supported_interfaces);
+	if (phy_interface_mode_is_rgmii(port->slave.phy_if)) {
+		phy_interface_set_rgmii(port->slave.phylink_config.supported_interfaces);
+	} else if (port->slave.phy_if == PHY_INTERFACE_MODE_RMII) {
+		__set_bit(PHY_INTERFACE_MODE_RMII,
+			  port->slave.phylink_config.supported_interfaces);
+	} else if (common->pdata.extra_modes & BIT(port->slave.phy_if)) {
+		__set_bit(PHY_INTERFACE_MODE_QSGMII,
+			  port->slave.phylink_config.supported_interfaces);
+	} else {
+		dev_err(dev, "selected phy-mode is not supported\n");
+		return -EOPNOTSUPP;
+	}
 
 	phylink = phylink_create(&port->slave.phylink_config,
 				 of_node_to_fwnode(port->slave.phy_node),
@@ -2611,10 +2633,18 @@ static const struct am65_cpsw_pdata am64x_cpswxg_pdata = {
 	.fdqring_mode = K3_RINGACC_RING_MODE_RING,
 };
 
+static const struct am65_cpsw_pdata j7200_cpswxg_pdata = {
+	.quirks = 0,
+	.ale_dev_id = "am64-cpswxg",
+	.fdqring_mode = K3_RINGACC_RING_MODE_RING,
+	.extra_modes = BIT(PHY_INTERFACE_MODE_QSGMII),
+};
+
 static const struct of_device_id am65_cpsw_nuss_of_mtable[] = {
 	{ .compatible = "ti,am654-cpsw-nuss", .data = &am65x_sr1_0},
 	{ .compatible = "ti,j721e-cpsw-nuss", .data = &j721e_pdata},
 	{ .compatible = "ti,am642-cpsw-nuss", .data = &am64x_cpswxg_pdata},
+	{ .compatible = "ti,j7200-cpswxg-nuss", .data = &j7200_cpswxg_pdata},
 	{ /* sentinel */ },
 };
 MODULE_DEVICE_TABLE(of, am65_cpsw_nuss_of_mtable);
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.h b/drivers/net/ethernet/ti/am65-cpsw-nuss.h
index ac945631bf2f..2c9850fdfcb6 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.h
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.h
@@ -46,6 +46,7 @@ struct am65_cpsw_port {
 	const char			*name;
 	u32				port_id;
 	void __iomem			*port_base;
+	void __iomem			*sgmii_base;
 	void __iomem			*stat_base;
 	void __iomem			*fetch_ram_base;
 	bool				disabled;
@@ -88,6 +89,7 @@ struct am65_cpsw_rx_chn {
 
 struct am65_cpsw_pdata {
 	u32	quirks;
+	u64	extra_modes;
 	enum k3_ring_mode fdqring_mode;
 	const char	*ale_dev_id;
 };
diff --git a/drivers/net/phy/nxp-tja11xx.c b/drivers/net/phy/nxp-tja11xx.c
index 2a8195c50d14..ec91e671f8aa 100644
--- a/drivers/net/phy/nxp-tja11xx.c
+++ b/drivers/net/phy/nxp-tja11xx.c
@@ -10,6 +10,7 @@
 #include <linux/mdio.h>
 #include <linux/mii.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/phy.h>
 #include <linux/hwmon.h>
 #include <linux/bitfield.h>
@@ -34,6 +35,11 @@
 #define MII_CFG1			18
 #define MII_CFG1_MASTER_SLAVE		BIT(15)
 #define MII_CFG1_AUTO_OP		BIT(14)
+#define MII_CFG1_INTERFACE_MODE_MASK	GENMASK(9, 8)
+#define MII_CFG1_MII_MODE				(0x0 << 8)
+#define MII_CFG1_RMII_MODE_REFCLK_IN	BIT(8)
+#define MII_CFG1_RMII_MODE_REFCLK_OUT	BIT(9)
+#define MII_CFG1_REVMII_MODE			GENMASK(9, 8)
 #define MII_CFG1_SLEEP_CONFIRM		BIT(6)
 #define MII_CFG1_LED_MODE_MASK		GENMASK(5, 4)
 #define MII_CFG1_LED_MODE_LINKUP	0
@@ -72,11 +78,15 @@
 #define MII_COMMCFG			27
 #define MII_COMMCFG_AUTO_OP		BIT(15)
 
+/* Configure REF_CLK as input in RMII mode */
+#define TJA110X_RMII_MODE_REFCLK_IN       BIT(0)
+
 struct tja11xx_priv {
 	char		*hwmon_name;
 	struct device	*hwmon_dev;
 	struct phy_device *phydev;
 	struct work_struct phy_register_work;
+	u32 flags;
 };
 
 struct tja11xx_phy_stats {
@@ -251,8 +261,34 @@ do_test:
 	return __genphy_config_aneg(phydev, changed);
 }
 
+static int tja11xx_get_interface_mode(struct phy_device *phydev)
+{
+	struct tja11xx_priv *priv = phydev->priv;
+	int mii_mode;
+
+	switch (phydev->interface) {
+	case PHY_INTERFACE_MODE_MII:
+		mii_mode = MII_CFG1_MII_MODE;
+		break;
+	case PHY_INTERFACE_MODE_REVMII:
+		mii_mode = MII_CFG1_REVMII_MODE;
+		break;
+	case PHY_INTERFACE_MODE_RMII:
+		if (priv->flags & TJA110X_RMII_MODE_REFCLK_IN)
+			mii_mode = MII_CFG1_RMII_MODE_REFCLK_IN;
+		else
+			mii_mode = MII_CFG1_RMII_MODE_REFCLK_OUT;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return mii_mode;
+}
+
 static int tja11xx_config_init(struct phy_device *phydev)
 {
+	u16 reg_mask, reg_val;
 	int ret;
 
 	ret = tja11xx_enable_reg_write(phydev);
@@ -265,15 +301,32 @@ static int tja11xx_config_init(struct phy_device *phydev)
 
 	switch (phydev->phy_id & PHY_ID_MASK) {
 	case PHY_ID_TJA1100:
-		ret = phy_modify(phydev, MII_CFG1,
-				 MII_CFG1_AUTO_OP | MII_CFG1_LED_MODE_MASK |
-				 MII_CFG1_LED_ENABLE,
-				 MII_CFG1_AUTO_OP | MII_CFG1_LED_MODE_LINKUP |
-				 MII_CFG1_LED_ENABLE);
+		reg_mask = MII_CFG1_AUTO_OP | MII_CFG1_LED_MODE_MASK |
+			   MII_CFG1_LED_ENABLE;
+		reg_val = MII_CFG1_AUTO_OP | MII_CFG1_LED_MODE_LINKUP |
+			  MII_CFG1_LED_ENABLE;
+
+		reg_mask |= MII_CFG1_INTERFACE_MODE_MASK;
+		ret = tja11xx_get_interface_mode(phydev);
+		if (ret < 0)
+			return ret;
+
+		reg_val |= (ret & 0xffff);
+		ret = phy_modify(phydev, MII_CFG1, reg_mask, reg_val);
 		if (ret)
 			return ret;
 		break;
 	case PHY_ID_TJA1101:
+		reg_mask = MII_CFG1_INTERFACE_MODE_MASK;
+		ret = tja11xx_get_interface_mode(phydev);
+		if (ret < 0)
+			return ret;
+
+		reg_val = ret & 0xffff;
+		ret = phy_modify(phydev, MII_CFG1, reg_mask, reg_val);
+		if (ret)
+			return ret;
+		fallthrough;
 	case PHY_ID_TJA1102:
 		ret = phy_set_bits(phydev, MII_COMMCFG, MII_COMMCFG_AUTO_OP);
 		if (ret)
@@ -458,16 +511,36 @@ static int tja11xx_hwmon_register(struct phy_device *phydev,
 	return PTR_ERR_OR_ZERO(priv->hwmon_dev);
 }
 
+static int tja11xx_parse_dt(struct phy_device *phydev)
+{
+	struct device_node *node = phydev->mdio.dev.of_node;
+	struct tja11xx_priv *priv = phydev->priv;
+
+	if (!IS_ENABLED(CONFIG_OF_MDIO))
+		return 0;
+
+	if (of_property_read_bool(node, "nxp,rmii-refclk-in"))
+		priv->flags |= TJA110X_RMII_MODE_REFCLK_IN;
+
+	return 0;
+}
+
 static int tja11xx_probe(struct phy_device *phydev)
 {
 	struct device *dev = &phydev->mdio.dev;
 	struct tja11xx_priv *priv;
+	int ret;
 
 	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
 
 	priv->phydev = phydev;
+	phydev->priv = priv;
+
+	ret = tja11xx_parse_dt(phydev);
+	if (ret)
+		return ret;
 
 	return tja11xx_hwmon_register(phydev, priv);
 }
diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h
index 445e80517cab..fc93c9488c76 100644
--- a/include/linux/compiler_attributes.h
+++ b/include/linux/compiler_attributes.h
@@ -371,4 +371,11 @@
  */
 #define __weak                          __attribute__((__weak__))
 
+/*
+ * Used by functions that use '__builtin_return_address'. These function
+ * don't want to be splited or made inline, which can make
+ * the '__builtin_return_address' get unexpected address.
+ */
+#define __fix_address noinline __noclone
+
 #endif /* __LINUX_COMPILER_ATTRIBUTES_H */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index ca8afa382bf2..b51d07a727c9 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1195,7 +1195,8 @@ static inline bool skb_unref(struct sk_buff *skb)
 	return true;
 }
 
-void kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason);
+void __fix_address
+kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason);
 
 /**
  *	kfree_skb - free an sk_buff with 'NOT_SPECIFIED' reason
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index 1c53c4c4d88f..568a87c5e0d0 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -78,6 +78,7 @@ struct vsock_sock {
 s64 vsock_stream_has_data(struct vsock_sock *vsk);
 s64 vsock_stream_has_space(struct vsock_sock *vsk);
 struct sock *vsock_create_connected(struct sock *parent);
+void vsock_data_ready(struct sock *sk);
 
 /**** TRANSPORT ****/
 
@@ -135,6 +136,7 @@ struct vsock_transport {
 	u64 (*stream_rcvhiwat)(struct vsock_sock *);
 	bool (*stream_is_active)(struct vsock_sock *);
 	bool (*stream_allow)(u32 cid, u32 port);
+	int (*set_rcvlowat)(struct vsock_sock *vsk, int val);
 
 	/* SEQ_PACKET. */
 	ssize_t (*seqpacket_dequeue)(struct vsock_sock *vsk, struct msghdr *msg,
diff --git a/include/net/dsa.h b/include/net/dsa.h
index b902b31bebce..f2ce12860546 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -559,6 +559,10 @@ static inline bool dsa_is_user_port(struct dsa_switch *ds, int p)
 	list_for_each_entry((_dp), &(_dst)->ports, list) \
 		if (dsa_port_is_user((_dp)))
 
+#define dsa_tree_for_each_cpu_port(_dp, _dst) \
+	list_for_each_entry((_dp), &(_dst)->ports, list) \
+		if (dsa_port_is_cpu((_dp)))
+
 #define dsa_switch_for_each_port(_dp, _ds) \
 	list_for_each_entry((_dp), &(_ds)->dst->ports, list) \
 		if ((_dp)->ds == (_ds))
diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 2edea901bbd5..2a7e18ee5577 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -1024,6 +1024,8 @@ void ocelot_deinit(struct ocelot *ocelot);
 void ocelot_init_port(struct ocelot *ocelot, int port);
 void ocelot_deinit_port(struct ocelot *ocelot, int port);
 
+void ocelot_port_setup_dsa_8021q_cpu(struct ocelot *ocelot, int cpu);
+void ocelot_port_teardown_dsa_8021q_cpu(struct ocelot *ocelot, int cpu);
 void ocelot_port_assign_dsa_8021q_cpu(struct ocelot *ocelot, int port, int cpu);
 void ocelot_port_unassign_dsa_8021q_cpu(struct ocelot *ocelot, int port);
 u32 ocelot_port_assigned_dsa_8021q_cpu_mask(struct ocelot *ocelot, int port);
diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
index 1e543cf0568c..e0ab261ceca2 100644
--- a/include/uapi/linux/netlink.h
+++ b/include/uapi/linux/netlink.h
@@ -41,12 +41,20 @@ struct sockaddr_nl {
        	__u32		nl_groups;	/* multicast groups mask */
 };
 
+/**
+ * struct nlmsghdr - fixed format metadata header of Netlink messages
+ * @nlmsg_len:   Length of message including header
+ * @nlmsg_type:  Message content type
+ * @nlmsg_flags: Additional flags
+ * @nlmsg_seq:   Sequence number
+ * @nlmsg_pid:   Sending process port ID
+ */
 struct nlmsghdr {
-	__u32		nlmsg_len;	/* Length of message including header */
-	__u16		nlmsg_type;	/* Message content */
-	__u16		nlmsg_flags;	/* Additional flags */
-	__u32		nlmsg_seq;	/* Sequence number */
-	__u32		nlmsg_pid;	/* Sending process port ID */
+	__u32		nlmsg_len;
+	__u16		nlmsg_type;
+	__u16		nlmsg_flags;
+	__u32		nlmsg_seq;
+	__u32		nlmsg_pid;
 };
 
 /* Flags values */
@@ -337,6 +345,9 @@ enum netlink_attribute_type {
  *	bitfield32 type (U32)
  * @NL_POLICY_TYPE_ATTR_MASK: mask of valid bits for unsigned integers (U64)
  * @NL_POLICY_TYPE_ATTR_PAD: pad attribute for 64-bit alignment
+ *
+ * @__NL_POLICY_TYPE_ATTR_MAX: number of attributes
+ * @NL_POLICY_TYPE_ATTR_MAX: highest attribute number
  */
 enum netlink_policy_type_attr {
 	NL_POLICY_TYPE_ATTR_UNSPEC,
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index a84a7cfb9d6d..efbd93e92ce2 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -568,26 +568,6 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
 	    !is_valid_ether_addr(dev->dev_addr))
 		return -EINVAL;
 
-	/* Also don't allow bridging of net devices that are DSA masters, since
-	 * the bridge layer rx_handler prevents the DSA fake ethertype handler
-	 * to be invoked, so we don't get the chance to strip off and parse the
-	 * DSA switch tag protocol header (the bridge layer just returns
-	 * RX_HANDLER_CONSUMED, stopping RX processing for these frames).
-	 * The only case where that would not be an issue is when bridging can
-	 * already be offloaded, such as when the DSA master is itself a DSA
-	 * or plain switchdev port, and is bridged only with other ports from
-	 * the same hardware device.
-	 */
-	if (netdev_uses_dsa(dev)) {
-		list_for_each_entry(p, &br->port_list, list) {
-			if (!netdev_port_same_parent_id(dev, p->dev)) {
-				NL_SET_ERR_MSG(extack,
-					       "Cannot do software bridging with a DSA master");
-				return -EINVAL;
-			}
-		}
-	}
-
 	/* No bridging of bridges */
 	if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit) {
 		NL_SET_ERR_MSG(extack,
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 974bbbbe7138..35d9d5958dc6 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -777,9 +777,10 @@ EXPORT_SYMBOL(__kfree_skb);
  *	hit zero. Meanwhile, pass the drop reason to 'kfree_skb'
  *	tracepoint.
  */
-void kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason)
+void __fix_address
+kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason)
 {
-	if (!skb_unref(skb))
+	if (unlikely(!skb_unref(skb)))
 		return;
 
 	DEBUG_NET_WARN_ON_ONCE(reason <= 0 || reason >= SKB_DROP_REASON_MAX);
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 055a6d1d4372..ed56c7a554b8 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -1060,26 +1060,24 @@ static int dsa_tree_setup_switches(struct dsa_switch_tree *dst)
 
 static int dsa_tree_setup_master(struct dsa_switch_tree *dst)
 {
-	struct dsa_port *dp;
+	struct dsa_port *cpu_dp;
 	int err = 0;
 
 	rtnl_lock();
 
-	list_for_each_entry(dp, &dst->ports, list) {
-		if (dsa_port_is_cpu(dp)) {
-			struct net_device *master = dp->master;
-			bool admin_up = (master->flags & IFF_UP) &&
-					!qdisc_tx_is_noop(master);
+	dsa_tree_for_each_cpu_port(cpu_dp, dst) {
+		struct net_device *master = cpu_dp->master;
+		bool admin_up = (master->flags & IFF_UP) &&
+				!qdisc_tx_is_noop(master);
 
-			err = dsa_master_setup(master, dp);
-			if (err)
-				break;
+		err = dsa_master_setup(master, cpu_dp);
+		if (err)
+			break;
 
-			/* Replay master state event */
-			dsa_tree_master_admin_state_change(dst, master, admin_up);
-			dsa_tree_master_oper_state_change(dst, master,
-							  netif_oper_up(master));
-		}
+		/* Replay master state event */
+		dsa_tree_master_admin_state_change(dst, master, admin_up);
+		dsa_tree_master_oper_state_change(dst, master,
+						  netif_oper_up(master));
 	}
 
 	rtnl_unlock();
@@ -1089,22 +1087,20 @@ static int dsa_tree_setup_master(struct dsa_switch_tree *dst)
 
 static void dsa_tree_teardown_master(struct dsa_switch_tree *dst)
 {
-	struct dsa_port *dp;
+	struct dsa_port *cpu_dp;
 
 	rtnl_lock();
 
-	list_for_each_entry(dp, &dst->ports, list) {
-		if (dsa_port_is_cpu(dp)) {
-			struct net_device *master = dp->master;
+	dsa_tree_for_each_cpu_port(cpu_dp, dst) {
+		struct net_device *master = cpu_dp->master;
 
-			/* Synthesizing an "admin down" state is sufficient for
-			 * the switches to get a notification if the master is
-			 * currently up and running.
-			 */
-			dsa_tree_master_admin_state_change(dst, master, false);
+		/* Synthesizing an "admin down" state is sufficient for
+		 * the switches to get a notification if the master is
+		 * currently up and running.
+		 */
+		dsa_tree_master_admin_state_change(dst, master, false);
 
-			dsa_master_teardown(master);
-		}
+		dsa_master_teardown(master);
 	}
 
 	rtnl_unlock();
@@ -1252,7 +1248,6 @@ out_disconnect:
  * they would have formed disjoint trees (different "dsa,member" values).
  */
 int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst,
-			      struct net_device *master,
 			      const struct dsa_device_ops *tag_ops,
 			      const struct dsa_device_ops *old_tag_ops)
 {
@@ -1268,14 +1263,11 @@ int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst,
 	 * attempts to change the tagging protocol. If we ever lift the IFF_UP
 	 * restriction, there needs to be another mutex which serializes this.
 	 */
-	if (master->flags & IFF_UP)
-		goto out_unlock;
-
 	list_for_each_entry(dp, &dst->ports, list) {
-		if (!dsa_port_is_user(dp))
-			continue;
+		if (dsa_port_is_cpu(dp) && (dp->master->flags & IFF_UP))
+			goto out_unlock;
 
-		if (dp->slave->flags & IFF_UP)
+		if (dsa_port_is_user(dp) && (dp->slave->flags & IFF_UP))
 			goto out_unlock;
 	}
 
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 8924366467e0..614fbba8fe39 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -545,7 +545,6 @@ struct dsa_lag *dsa_tree_lag_find(struct dsa_switch_tree *dst,
 int dsa_tree_notify(struct dsa_switch_tree *dst, unsigned long e, void *v);
 int dsa_broadcast(unsigned long e, void *v);
 int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst,
-			      struct net_device *master,
 			      const struct dsa_device_ops *tag_ops,
 			      const struct dsa_device_ops *old_tag_ops);
 void dsa_tree_master_admin_state_change(struct dsa_switch_tree *dst,
diff --git a/net/dsa/master.c b/net/dsa/master.c
index 9e5dabc751d9..fb810edc8281 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -307,7 +307,7 @@ static ssize_t tagging_store(struct device *d, struct device_attribute *attr,
 		 */
 		goto out;
 
-	err = dsa_tree_change_tag_proto(cpu_dp->ds->dst, dev, new_tag_ops,
+	err = dsa_tree_change_tag_proto(cpu_dp->ds->dst, new_tag_ops,
 					old_tag_ops);
 	if (err) {
 		/* On failure the old tagger is restored, so we don't need the
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index ce12c3427bad..b24f1131af90 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -2476,6 +2476,9 @@ static int dsa_slave_changeupper(struct net_device *dev,
 	struct netlink_ext_ack *extack;
 	int err = NOTIFY_DONE;
 
+	if (!dsa_slave_dev_check(dev))
+		return err;
+
 	extack = netdev_notifier_info_to_extack(&info->info);
 
 	if (netif_is_bridge_master(info->upper_dev)) {
@@ -2531,6 +2534,9 @@ static int dsa_slave_prechangeupper(struct net_device *dev,
 {
 	struct dsa_port *dp = dsa_slave_to_port(dev);
 
+	if (!dsa_slave_dev_check(dev))
+		return NOTIFY_DONE;
+
 	if (netif_is_bridge_master(info->upper_dev) && !info->linking)
 		dsa_port_pre_bridge_leave(dp, info->upper_dev);
 	else if (netif_is_lag_master(info->upper_dev) && !info->linking)
@@ -2551,6 +2557,9 @@ dsa_slave_lag_changeupper(struct net_device *dev,
 	int err = NOTIFY_DONE;
 	struct dsa_port *dp;
 
+	if (!netif_is_lag_master(dev))
+		return err;
+
 	netdev_for_each_lower_dev(dev, lower, iter) {
 		if (!dsa_slave_dev_check(lower))
 			continue;
@@ -2580,6 +2589,9 @@ dsa_slave_lag_prechangeupper(struct net_device *dev,
 	int err = NOTIFY_DONE;
 	struct dsa_port *dp;
 
+	if (!netif_is_lag_master(dev))
+		return err;
+
 	netdev_for_each_lower_dev(dev, lower, iter) {
 		if (!dsa_slave_dev_check(lower))
 			continue;
@@ -2687,6 +2699,75 @@ dsa_slave_prechangeupper_sanity_check(struct net_device *dev,
 	return NOTIFY_DONE;
 }
 
+static int
+dsa_master_prechangeupper_sanity_check(struct net_device *master,
+				       struct netdev_notifier_changeupper_info *info)
+{
+	struct netlink_ext_ack *extack;
+
+	if (!netdev_uses_dsa(master))
+		return NOTIFY_DONE;
+
+	if (!info->linking)
+		return NOTIFY_DONE;
+
+	/* Allow DSA switch uppers */
+	if (dsa_slave_dev_check(info->upper_dev))
+		return NOTIFY_DONE;
+
+	/* Allow bridge uppers of DSA masters, subject to further
+	 * restrictions in dsa_bridge_prechangelower_sanity_check()
+	 */
+	if (netif_is_bridge_master(info->upper_dev))
+		return NOTIFY_DONE;
+
+	extack = netdev_notifier_info_to_extack(&info->info);
+
+	NL_SET_ERR_MSG_MOD(extack,
+			   "DSA master cannot join unknown upper interfaces");
+	return notifier_from_errno(-EBUSY);
+}
+
+/* Don't allow bridging of DSA masters, since the bridge layer rx_handler
+ * prevents the DSA fake ethertype handler to be invoked, so we don't get the
+ * chance to strip off and parse the DSA switch tag protocol header (the bridge
+ * layer just returns RX_HANDLER_CONSUMED, stopping RX processing for these
+ * frames).
+ * The only case where that would not be an issue is when bridging can already
+ * be offloaded, such as when the DSA master is itself a DSA or plain switchdev
+ * port, and is bridged only with other ports from the same hardware device.
+ */
+static int
+dsa_bridge_prechangelower_sanity_check(struct net_device *new_lower,
+				       struct netdev_notifier_changeupper_info *info)
+{
+	struct net_device *br = info->upper_dev;
+	struct netlink_ext_ack *extack;
+	struct net_device *lower;
+	struct list_head *iter;
+
+	if (!netif_is_bridge_master(br))
+		return NOTIFY_DONE;
+
+	if (!info->linking)
+		return NOTIFY_DONE;
+
+	extack = netdev_notifier_info_to_extack(&info->info);
+
+	netdev_for_each_lower_dev(br, lower, iter) {
+		if (!netdev_uses_dsa(new_lower) && !netdev_uses_dsa(lower))
+			continue;
+
+		if (!netdev_port_same_parent_id(lower, new_lower)) {
+			NL_SET_ERR_MSG(extack,
+				       "Cannot do software bridging with a DSA master");
+			return notifier_from_errno(-EINVAL);
+		}
+	}
+
+	return NOTIFY_DONE;
+}
+
 static int dsa_slave_netdevice_event(struct notifier_block *nb,
 				     unsigned long event, void *ptr)
 {
@@ -2698,25 +2779,40 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb,
 		int err;
 
 		err = dsa_slave_prechangeupper_sanity_check(dev, info);
-		if (err != NOTIFY_DONE)
+		if (notifier_to_errno(err))
+			return err;
+
+		err = dsa_master_prechangeupper_sanity_check(dev, info);
+		if (notifier_to_errno(err))
 			return err;
 
-		if (dsa_slave_dev_check(dev))
-			return dsa_slave_prechangeupper(dev, ptr);
+		err = dsa_bridge_prechangelower_sanity_check(dev, info);
+		if (notifier_to_errno(err))
+			return err;
 
-		if (netif_is_lag_master(dev))
-			return dsa_slave_lag_prechangeupper(dev, ptr);
+		err = dsa_slave_prechangeupper(dev, ptr);
+		if (notifier_to_errno(err))
+			return err;
+
+		err = dsa_slave_lag_prechangeupper(dev, ptr);
+		if (notifier_to_errno(err))
+			return err;
 
 		break;
 	}
-	case NETDEV_CHANGEUPPER:
-		if (dsa_slave_dev_check(dev))
-			return dsa_slave_changeupper(dev, ptr);
+	case NETDEV_CHANGEUPPER: {
+		int err;
 
-		if (netif_is_lag_master(dev))
-			return dsa_slave_lag_changeupper(dev, ptr);
+		err = dsa_slave_changeupper(dev, ptr);
+		if (notifier_to_errno(err))
+			return err;
+
+		err = dsa_slave_lag_changeupper(dev, ptr);
+		if (notifier_to_errno(err))
+			return err;
 
 		break;
+	}
 	case NETDEV_CHANGELOWERSTATE: {
 		struct netdev_notifier_changelowerstate_info *info = ptr;
 		struct dsa_port *dp;
@@ -2777,6 +2873,9 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb,
 			if (!dsa_port_is_user(dp))
 				continue;
 
+			if (dp->cpu_dp != cpu_dp)
+				continue;
+
 			list_add(&dp->slave->close_list, &close_list);
 		}
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index bbe218753662..ba62f8e8bd15 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -4433,12 +4433,16 @@ static void __tcp_alloc_md5sig_pool(void)
 	 * to memory. See smp_rmb() in tcp_get_md5sig_pool()
 	 */
 	smp_wmb();
-	tcp_md5sig_pool_populated = true;
+	/* Paired with READ_ONCE() from tcp_alloc_md5sig_pool()
+	 * and tcp_get_md5sig_pool().
+	*/
+	WRITE_ONCE(tcp_md5sig_pool_populated, true);
 }
 
 bool tcp_alloc_md5sig_pool(void)
 {
-	if (unlikely(!tcp_md5sig_pool_populated)) {
+	/* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */
+	if (unlikely(!READ_ONCE(tcp_md5sig_pool_populated))) {
 		mutex_lock(&tcp_md5sig_mutex);
 
 		if (!tcp_md5sig_pool_populated) {
@@ -4449,7 +4453,8 @@ bool tcp_alloc_md5sig_pool(void)
 
 		mutex_unlock(&tcp_md5sig_mutex);
 	}
-	return tcp_md5sig_pool_populated;
+	/* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */
+	return READ_ONCE(tcp_md5sig_pool_populated);
 }
 EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
 
@@ -4465,7 +4470,8 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
 {
 	local_bh_disable();
 
-	if (tcp_md5sig_pool_populated) {
+	/* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */
+	if (READ_ONCE(tcp_md5sig_pool_populated)) {
 		/* coupled with smp_wmb() in __tcp_alloc_md5sig_pool() */
 		smp_rmb();
 		return this_cpu_ptr(&tcp_md5sig_pool);
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 790d6809be81..1ebab4b11262 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1977,9 +1977,6 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 	bool rtnl_held = false;
 	u32 flags;
 
-	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
-		return -EPERM;
-
 replay:
 	tp_created = 0;
 
@@ -2208,9 +2205,6 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 	int err;
 	bool rtnl_held = false;
 
-	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
-		return -EPERM;
-
 	err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
 				     rtm_tca_policy, extack);
 	if (err < 0)
@@ -2826,10 +2820,6 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n,
 	unsigned long cl;
 	int err;
 
-	if (n->nlmsg_type != RTM_GETCHAIN &&
-	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
-		return -EPERM;
-
 replay:
 	q = NULL;
 	err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 9f7680728e2b..db1569fac57c 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1425,10 +1425,6 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
 	struct Qdisc *p = NULL;
 	int err;
 
-	if ((n->nlmsg_type != RTM_GETQDISC) &&
-	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
-		return -EPERM;
-
 	err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
 				     rtm_tca_policy, extack);
 	if (err < 0)
@@ -1509,9 +1505,6 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
 	struct Qdisc *q, *p;
 	int err;
 
-	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
-		return -EPERM;
-
 replay:
 	/* Reinit, just in case something touches this. */
 	err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
@@ -1993,10 +1986,6 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
 	u32 qid;
 	int err;
 
-	if ((n->nlmsg_type != RTM_GETTCLASS) &&
-	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
-		return -EPERM;
-
 	err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
 				     rtm_tca_policy, extack);
 	if (err < 0)
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index b4ee163154a6..ee418701cdee 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -882,6 +882,16 @@ s64 vsock_stream_has_space(struct vsock_sock *vsk)
 }
 EXPORT_SYMBOL_GPL(vsock_stream_has_space);
 
+void vsock_data_ready(struct sock *sk)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	if (vsock_stream_has_data(vsk) >= sk->sk_rcvlowat ||
+	    sock_flag(sk, SOCK_DONE))
+		sk->sk_data_ready(sk);
+}
+EXPORT_SYMBOL_GPL(vsock_data_ready);
+
 static int vsock_release(struct socket *sock)
 {
 	__vsock_release(sock->sk, 0);
@@ -1066,8 +1076,9 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock,
 		if (transport && transport->stream_is_active(vsk) &&
 		    !(sk->sk_shutdown & RCV_SHUTDOWN)) {
 			bool data_ready_now = false;
+			int target = sock_rcvlowat(sk, 0, INT_MAX);
 			int ret = transport->notify_poll_in(
-					vsk, 1, &data_ready_now);
+					vsk, target, &data_ready_now);
 			if (ret < 0) {
 				mask |= EPOLLERR;
 			} else {
@@ -2137,6 +2148,25 @@ out:
 	return err;
 }
 
+static int vsock_set_rcvlowat(struct sock *sk, int val)
+{
+	const struct vsock_transport *transport;
+	struct vsock_sock *vsk;
+
+	vsk = vsock_sk(sk);
+
+	if (val > vsk->buffer_size)
+		return -EINVAL;
+
+	transport = vsk->transport;
+
+	if (transport && transport->set_rcvlowat)
+		return transport->set_rcvlowat(vsk, val);
+
+	WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
+	return 0;
+}
+
 static const struct proto_ops vsock_stream_ops = {
 	.family = PF_VSOCK,
 	.owner = THIS_MODULE,
@@ -2156,6 +2186,7 @@ static const struct proto_ops vsock_stream_ops = {
 	.recvmsg = vsock_connectible_recvmsg,
 	.mmap = sock_no_mmap,
 	.sendpage = sock_no_sendpage,
+	.set_rcvlowat = vsock_set_rcvlowat,
 };
 
 static const struct proto_ops vsock_seqpacket_ops = {
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index fd98229e3db3..59c3e2697069 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -815,6 +815,12 @@ int hvs_notify_send_post_enqueue(struct vsock_sock *vsk, ssize_t written,
 	return 0;
 }
 
+static
+int hvs_set_rcvlowat(struct vsock_sock *vsk, int val)
+{
+	return -EOPNOTSUPP;
+}
+
 static struct vsock_transport hvs_transport = {
 	.module                   = THIS_MODULE,
 
@@ -850,6 +856,7 @@ static struct vsock_transport hvs_transport = {
 	.notify_send_pre_enqueue  = hvs_notify_send_pre_enqueue,
 	.notify_send_post_enqueue = hvs_notify_send_post_enqueue,
 
+	.set_rcvlowat             = hvs_set_rcvlowat
 };
 
 static bool hvs_check_transport(struct vsock_sock *vsk)
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index ec2c2afbf0d0..35863132f4f1 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -634,10 +634,7 @@ virtio_transport_notify_poll_in(struct vsock_sock *vsk,
 				size_t target,
 				bool *data_ready_now)
 {
-	if (vsock_stream_has_data(vsk))
-		*data_ready_now = true;
-	else
-		*data_ready_now = false;
+	*data_ready_now = vsock_stream_has_data(vsk) >= target;
 
 	return 0;
 }
@@ -1084,7 +1081,7 @@ virtio_transport_recv_connected(struct sock *sk,
 	switch (le16_to_cpu(pkt->hdr.op)) {
 	case VIRTIO_VSOCK_OP_RW:
 		virtio_transport_recv_enqueue(vsk, pkt);
-		sk->sk_data_ready(sk);
+		vsock_data_ready(sk);
 		return err;
 	case VIRTIO_VSOCK_OP_CREDIT_REQUEST:
 		virtio_transport_send_credit_update(vsk);
diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c
index d69fc4b595ad..7c3a7db134b2 100644
--- a/net/vmw_vsock/vmci_transport_notify.c
+++ b/net/vmw_vsock/vmci_transport_notify.c
@@ -307,7 +307,7 @@ vmci_transport_handle_wrote(struct sock *sk,
 	struct vsock_sock *vsk = vsock_sk(sk);
 	PKT_FIELD(vsk, sent_waiting_read) = false;
 #endif
-	sk->sk_data_ready(sk);
+	vsock_data_ready(sk);
 }
 
 static void vmci_transport_notify_pkt_socket_init(struct sock *sk)
@@ -340,12 +340,12 @@ vmci_transport_notify_pkt_poll_in(struct sock *sk,
 {
 	struct vsock_sock *vsk = vsock_sk(sk);
 
-	if (vsock_stream_has_data(vsk)) {
+	if (vsock_stream_has_data(vsk) >= target) {
 		*data_ready_now = true;
 	} else {
-		/* We can't read right now because there is nothing in the
-		 * queue. Ask for notifications when there is something to
-		 * read.
+		/* We can't read right now because there is not enough data
+		 * in the queue. Ask for notifications when there is something
+		 * to read.
 		 */
 		if (sk->sk_state == TCP_ESTABLISHED) {
 			if (!send_waiting_read(sk, 1))
diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c
index 0f36d7c45db3..e96a88d850a8 100644
--- a/net/vmw_vsock/vmci_transport_notify_qstate.c
+++ b/net/vmw_vsock/vmci_transport_notify_qstate.c
@@ -84,7 +84,7 @@ vmci_transport_handle_wrote(struct sock *sk,
 			    bool bottom_half,
 			    struct sockaddr_vm *dst, struct sockaddr_vm *src)
 {
-	sk->sk_data_ready(sk);
+	vsock_data_ready(sk);
 }
 
 static void vsock_block_update_write_window(struct sock *sk)
@@ -161,12 +161,12 @@ vmci_transport_notify_pkt_poll_in(struct sock *sk,
 {
 	struct vsock_sock *vsk = vsock_sk(sk);
 
-	if (vsock_stream_has_data(vsk)) {
+	if (vsock_stream_has_data(vsk) >= target) {
 		*data_ready_now = true;
 	} else {
-		/* We can't read right now because there is nothing in the
-		 * queue. Ask for notifications when there is something to
-		 * read.
+		/* We can't read right now because there is not enough data
+		 * in the queue. Ask for notifications when there is something
+		 * to read.
 		 */
 		if (sk->sk_state == TCP_ESTABLISHED)
 			vsock_block_update_write_window(sk);
@@ -282,7 +282,7 @@ vmci_transport_notify_pkt_recv_post_dequeue(
 		/* See the comment in
 		 * vmci_transport_notify_pkt_send_post_enqueue().
 		 */
-		sk->sk_data_ready(sk);
+		vsock_data_ready(sk);
 	}
 
 	return err;
diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index dc577461afc2..bb6d691cb30d 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -18,6 +18,7 @@
 #include <sys/socket.h>
 #include <time.h>
 #include <sys/mman.h>
+#include <poll.h>
 
 #include "timeout.h"
 #include "control.h"
@@ -596,6 +597,108 @@ static void test_seqpacket_invalid_rec_buffer_server(const struct test_opts *opt
 	close(fd);
 }
 
+#define RCVLOWAT_BUF_SIZE 128
+
+static void test_stream_poll_rcvlowat_server(const struct test_opts *opts)
+{
+	int fd;
+	int i;
+
+	fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+	if (fd < 0) {
+		perror("accept");
+		exit(EXIT_FAILURE);
+	}
+
+	/* Send 1 byte. */
+	send_byte(fd, 1, 0);
+
+	control_writeln("SRVSENT");
+
+	/* Wait until client is ready to receive rest of data. */
+	control_expectln("CLNSENT");
+
+	for (i = 0; i < RCVLOWAT_BUF_SIZE - 1; i++)
+		send_byte(fd, 1, 0);
+
+	/* Keep socket in active state. */
+	control_expectln("POLLDONE");
+
+	close(fd);
+}
+
+static void test_stream_poll_rcvlowat_client(const struct test_opts *opts)
+{
+	unsigned long lowat_val = RCVLOWAT_BUF_SIZE;
+	char buf[RCVLOWAT_BUF_SIZE];
+	struct pollfd fds;
+	ssize_t read_res;
+	short poll_flags;
+	int fd;
+
+	fd = vsock_stream_connect(opts->peer_cid, 1234);
+	if (fd < 0) {
+		perror("connect");
+		exit(EXIT_FAILURE);
+	}
+
+	if (setsockopt(fd, SOL_SOCKET, SO_RCVLOWAT,
+		       &lowat_val, sizeof(lowat_val))) {
+		perror("setsockopt");
+		exit(EXIT_FAILURE);
+	}
+
+	control_expectln("SRVSENT");
+
+	/* At this point, server sent 1 byte. */
+	fds.fd = fd;
+	poll_flags = POLLIN | POLLRDNORM;
+	fds.events = poll_flags;
+
+	/* Try to wait for 1 sec. */
+	if (poll(&fds, 1, 1000) < 0) {
+		perror("poll");
+		exit(EXIT_FAILURE);
+	}
+
+	/* poll() must return nothing. */
+	if (fds.revents) {
+		fprintf(stderr, "Unexpected poll result %hx\n",
+			fds.revents);
+		exit(EXIT_FAILURE);
+	}
+
+	/* Tell server to send rest of data. */
+	control_writeln("CLNSENT");
+
+	/* Poll for data. */
+	if (poll(&fds, 1, 10000) < 0) {
+		perror("poll");
+		exit(EXIT_FAILURE);
+	}
+
+	/* Only these two bits are expected. */
+	if (fds.revents != poll_flags) {
+		fprintf(stderr, "Unexpected poll result %hx\n",
+			fds.revents);
+		exit(EXIT_FAILURE);
+	}
+
+	/* Use MSG_DONTWAIT, if call is going to wait, EAGAIN
+	 * will be returned.
+	 */
+	read_res = recv(fd, buf, sizeof(buf), MSG_DONTWAIT);
+	if (read_res != RCVLOWAT_BUF_SIZE) {
+		fprintf(stderr, "Unexpected recv result %zi\n",
+			read_res);
+		exit(EXIT_FAILURE);
+	}
+
+	control_writeln("POLLDONE");
+
+	close(fd);
+}
+
 static struct test_case test_cases[] = {
 	{
 		.name = "SOCK_STREAM connection reset",
@@ -646,6 +749,11 @@ static struct test_case test_cases[] = {
 		.run_client = test_seqpacket_invalid_rec_buffer_client,
 		.run_server = test_seqpacket_invalid_rec_buffer_server,
 	},
+	{
+		.name = "SOCK_STREAM poll() + SO_RCVLOWAT",
+		.run_client = test_stream_poll_rcvlowat_client,
+		.run_server = test_stream_poll_rcvlowat_server,
+	},
 	{},
 };