summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/net/nxp,tja11xx.yaml17
-rw-r--r--Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml18
-rw-r--r--Documentation/userspace-api/index.rst1
-rw-r--r--Documentation/userspace-api/netlink/index.rst12
-rw-r--r--Documentation/userspace-api/netlink/intro.rst643
-rw-r--r--drivers/net/dsa/ocelot/felix.c6
-rw-r--r--drivers/net/ethernet/faraday/ftmac100.c1
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera.h2
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera_hw.c18
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera_hw.h2
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera_main.c5
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera_switchdev.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core.c68
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core.h8
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_linecards.c96
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/i2c.c87
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/minimal.c373
-rw-r--r--drivers/net/ethernet/micrel/ks8851_spi.c5
-rw-r--r--drivers/net/ethernet/mscc/ocelot.c83
-rw-r--r--drivers/net/ethernet/ti/am65-cpsw-nuss.c44
-rw-r--r--drivers/net/ethernet/ti/am65-cpsw-nuss.h2
-rw-r--r--drivers/net/phy/nxp-tja11xx.c83
-rw-r--r--include/linux/compiler_attributes.h7
-rw-r--r--include/linux/skbuff.h3
-rw-r--r--include/net/af_vsock.h2
-rw-r--r--include/net/dsa.h4
-rw-r--r--include/soc/mscc/ocelot.h2
-rw-r--r--include/uapi/linux/netlink.h21
-rw-r--r--net/bridge/br_if.c20
-rw-r--r--net/core/skbuff.c5
-rw-r--r--net/dsa/dsa2.c56
-rw-r--r--net/dsa/dsa_priv.h1
-rw-r--r--net/dsa/master.c2
-rw-r--r--net/dsa/slave.c119
-rw-r--r--net/ipv4/tcp.c14
-rw-r--r--net/sched/cls_api.c10
-rw-r--r--net/sched/sch_api.c11
-rw-r--r--net/vmw_vsock/af_vsock.c33
-rw-r--r--net/vmw_vsock/hyperv_transport.c7
-rw-r--r--net/vmw_vsock/virtio_transport_common.c7
-rw-r--r--net/vmw_vsock/vmci_transport_notify.c10
-rw-r--r--net/vmw_vsock/vmci_transport_notify_qstate.c12
-rw-r--r--tools/testing/vsock/vsock_test.c108
43 files changed, 1786 insertions, 250 deletions
diff --git a/Documentation/devicetree/bindings/net/nxp,tja11xx.yaml b/Documentation/devicetree/bindings/net/nxp,tja11xx.yaml
index d51da24f3505..ab8867e6939b 100644
--- a/Documentation/devicetree/bindings/net/nxp,tja11xx.yaml
+++ b/Documentation/devicetree/bindings/net/nxp,tja11xx.yaml
@@ -31,6 +31,22 @@ patternProperties:
description:
The ID number for the child PHY. Should be +1 of parent PHY.
+ nxp,rmii-refclk-in:
+ type: boolean
+ description: |
+ The REF_CLK is provided for both transmitted and received data
+ in RMII mode. This clock signal is provided by the PHY and is
+ typically derived from an external 25MHz crystal. Alternatively,
+ a 50MHz clock signal generated by an external oscillator can be
+ connected to pin REF_CLK. A third option is to connect a 25MHz
+ clock to pin CLK_IN_OUT. So, the REF_CLK should be configured
+ as input or output according to the actual circuit connection.
+ If present, indicates that the REF_CLK will be configured as
+ interface reference clock input when RMII mode enabled.
+ If not present, the REF_CLK will be configured as interface
+ reference clock output when RMII mode enabled.
+ Only supported on TJA1100 and TJA1101.
+
required:
- reg
@@ -44,6 +60,7 @@ examples:
tja1101_phy0: ethernet-phy@4 {
reg = <0x4>;
+ nxp,rmii-refclk-in;
};
};
- |
diff --git a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
index b8281d8be940..9ef11913052c 100644
--- a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
+++ b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
@@ -55,6 +55,7 @@ properties:
compatible:
enum:
- ti,am654-cpsw-nuss
+ - ti,j7200-cpswxg-nuss
- ti,j721e-cpsw-nuss
- ti,am642-cpsw-nuss
@@ -110,7 +111,7 @@ properties:
const: 0
patternProperties:
- port@[1-2]:
+ "^port@[1-4]$":
type: object
description: CPSWxG NUSS external ports
@@ -119,7 +120,7 @@ properties:
properties:
reg:
minimum: 1
- maximum: 2
+ maximum: 4
description: CPSW port number
phys:
@@ -178,6 +179,19 @@ required:
- '#address-cells'
- '#size-cells'
+allOf:
+ - if:
+ not:
+ properties:
+ compatible:
+ contains:
+ const: ti,j7200-cpswxg-nuss
+ then:
+ properties:
+ ethernet-ports:
+ patternProperties:
+ "^port@[3-4]$": false
+
additionalProperties: false
examples:
diff --git a/Documentation/userspace-api/index.rst b/Documentation/userspace-api/index.rst
index a61eac0c73f8..c78da9ce0ec4 100644
--- a/Documentation/userspace-api/index.rst
+++ b/Documentation/userspace-api/index.rst
@@ -26,6 +26,7 @@ place where this information is gathered.
ioctl/index
iommu
media/index
+ netlink/index
sysfs-platform_profile
vduse
futex2
diff --git a/Documentation/userspace-api/netlink/index.rst b/Documentation/userspace-api/netlink/index.rst
new file mode 100644
index 000000000000..b0c21538d97d
--- /dev/null
+++ b/Documentation/userspace-api/netlink/index.rst
@@ -0,0 +1,12 @@
+.. SPDX-License-Identifier: BSD-3-Clause
+
+================
+Netlink Handbook
+================
+
+Netlink documentation for users.
+
+.. toctree::
+ :maxdepth: 2
+
+ intro
diff --git a/Documentation/userspace-api/netlink/intro.rst b/Documentation/userspace-api/netlink/intro.rst
new file mode 100644
index 000000000000..94337f79e077
--- /dev/null
+++ b/Documentation/userspace-api/netlink/intro.rst
@@ -0,0 +1,643 @@
+.. SPDX-License-Identifier: BSD-3-Clause
+
+=======================
+Introduction to Netlink
+=======================
+
+Netlink is often described as an ioctl() replacement.
+It aims to replace fixed-format C structures as supplied
+to ioctl() with a format which allows an easy way to add
+or extended the arguments.
+
+To achieve this Netlink uses a minimal fixed-format metadata header
+followed by multiple attributes in the TLV (type, length, value) format.
+
+Unfortunately the protocol has evolved over the years, in an organic
+and undocumented fashion, making it hard to coherently explain.
+To make the most practical sense this document starts by describing
+netlink as it is used today and dives into more "historical" uses
+in later sections.
+
+Opening a socket
+================
+
+Netlink communication happens over sockets, a socket needs to be
+opened first:
+
+.. code-block:: c
+
+ fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
+
+The use of sockets allows for a natural way of exchanging information
+in both directions (to and from the kernel). The operations are still
+performed synchronously when applications send() the request but
+a separate recv() system call is needed to read the reply.
+
+A very simplified flow of a Netlink "call" will therefore look
+something like:
+
+.. code-block:: c
+
+ fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
+
+ /* format the request */
+ send(fd, &request, sizeof(request));
+ n = recv(fd, &response, RSP_BUFFER_SIZE);
+ /* interpret the response */
+
+Netlink also provides natural support for "dumping", i.e. communicating
+to user space all objects of a certain type (e.g. dumping all network
+interfaces).
+
+.. code-block:: c
+
+ fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
+
+ /* format the dump request */
+ send(fd, &request, sizeof(request));
+ while (1) {
+ n = recv(fd, &buffer, RSP_BUFFER_SIZE);
+ /* one recv() call can read multiple messages, hence the loop below */
+ for (nl_msg in buffer) {
+ if (nl_msg.nlmsg_type == NLMSG_DONE)
+ goto dump_finished;
+ /* process the object */
+ }
+ }
+ dump_finished:
+
+The first two arguments of the socket() call require little explanation -
+it is opening a Netlink socket, with all headers provided by the user
+(hence NETLINK, RAW). The last argument is the protocol within Netlink.
+This field used to identify the subsystem with which the socket will
+communicate.
+
+Classic vs Generic Netlink
+--------------------------
+
+Initial implementation of Netlink depended on a static allocation
+of IDs to subsystems and provided little supporting infrastructure.
+Let us refer to those protocols collectively as **Classic Netlink**.
+The list of them is defined on top of the ``include/uapi/linux/netlink.h``
+file, they include among others - general networking (NETLINK_ROUTE),
+iSCSI (NETLINK_ISCSI), and audit (NETLINK_AUDIT).
+
+**Generic Netlink** (introduced in 2005) allows for dynamic registration of
+subsystems (and subsystem ID allocation), introspection and simplifies
+implementing the kernel side of the interface.
+
+The following section describes how to use Generic Netlink, as the
+number of subsystems using Generic Netlink outnumbers the older
+protocols by an order of magnitude. There are also no plans for adding
+more Classic Netlink protocols to the kernel.
+Basic information on how communicating with core networking parts of
+the Linux kernel (or another of the 20 subsystems using Classic
+Netlink) differs from Generic Netlink is provided later in this document.
+
+Generic Netlink
+===============
+
+In addition to the Netlink fixed metadata header each Netlink protocol
+defines its own fixed metadata header. (Similarly to how network
+headers stack - Ethernet > IP > TCP we have Netlink > Generic N. > Family.)
+
+A Netlink message always starts with struct nlmsghdr, which is followed
+by a protocol-specific header. In case of Generic Netlink the protocol
+header is struct genlmsghdr.
+
+The practical meaning of the fields in case of Generic Netlink is as follows:
+
+.. code-block:: c
+
+ struct nlmsghdr {
+ __u32 nlmsg_len; /* Length of message including headers */
+ __u16 nlmsg_type; /* Generic Netlink Family (subsystem) ID */
+ __u16 nlmsg_flags; /* Flags - request or dump */
+ __u32 nlmsg_seq; /* Sequence number */
+ __u32 nlmsg_pid; /* Port ID, set to 0 */
+ };
+ struct genlmsghdr {
+ __u8 cmd; /* Command, as defined by the Family */
+ __u8 version; /* Irrelevant, set to 1 */
+ __u16 reserved; /* Reserved, set to 0 */
+ };
+ /* TLV attributes follow... */
+
+In Classic Netlink :c:member:`nlmsghdr.nlmsg_type` used to identify
+which operation within the subsystem the message was referring to
+(e.g. get information about a netdev). Generic Netlink needs to mux
+multiple subsystems in a single protocol so it uses this field to
+identify the subsystem, and :c:member:`genlmsghdr.cmd` identifies
+the operation instead. (See :ref:`res_fam` for
+information on how to find the Family ID of the subsystem of interest.)
+Note that the first 16 values (0 - 15) of this field are reserved for
+control messages both in Classic Netlink and Generic Netlink.
+See :ref:`nl_msg_type` for more details.
+
+There are 3 usual types of message exchanges on a Netlink socket:
+
+ - performing a single action (``do``);
+ - dumping information (``dump``);
+ - getting asynchronous notifications (``multicast``).
+
+Classic Netlink is very flexible and presumably allows other types
+of exchanges to happen, but in practice those are the three that get
+used.
+
+Asynchronous notifications are sent by the kernel and received by
+the user sockets which subscribed to them. ``do`` and ``dump`` requests
+are initiated by the user. :c:member:`nlmsghdr.nlmsg_flags` should
+be set as follows:
+
+ - for ``do``: ``NLM_F_REQUEST | NLM_F_ACK``
+ - for ``dump``: ``NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP``
+
+:c:member:`nlmsghdr.nlmsg_seq` should be a set to a monotonically
+increasing value. The value gets echoed back in responses and doesn't
+matter in practice, but setting it to an increasing value for each
+message sent is considered good hygiene. The purpose of the field is
+matching responses to requests. Asynchronous notifications will have
+:c:member:`nlmsghdr.nlmsg_seq` of ``0``.
+
+:c:member:`nlmsghdr.nlmsg_pid` is the Netlink equivalent of an address.
+This field can be set to ``0`` when talking to the kernel.
+See :ref:`nlmsg_pid` for the (uncommon) uses of the field.
+
+The expected use for :c:member:`genlmsghdr.version` was to allow
+versioning of the APIs provided by the subsystems. No subsystem to
+date made significant use of this field, so setting it to ``1`` seems
+like a safe bet.
+
+.. _nl_msg_type:
+
+Netlink message types
+---------------------
+
+As previously mentioned :c:member:`nlmsghdr.nlmsg_type` carries
+protocol specific values but the first 16 identifiers are reserved
+(first subsystem specific message type should be equal to
+``NLMSG_MIN_TYPE`` which is ``0x10``).
+
+There are only 4 Netlink control messages defined:
+
+ - ``NLMSG_NOOP`` - ignore the message, not used in practice;
+ - ``NLMSG_ERROR`` - carries the return code of an operation;
+ - ``NLMSG_DONE`` - marks the end of a dump;
+ - ``NLMSG_OVERRUN`` - socket buffer has overflown, not used to date.
+
+``NLMSG_ERROR`` and ``NLMSG_DONE`` are of practical importance.
+They carry return codes for operations. Note that unless
+the ``NLM_F_ACK`` flag is set on the request Netlink will not respond
+with ``NLMSG_ERROR`` if there is no error. To avoid having to special-case
+this quirk it is recommended to always set ``NLM_F_ACK``.
+
+The format of ``NLMSG_ERROR`` is described by struct nlmsgerr::
+
+ ----------------------------------------------
+ | struct nlmsghdr - response header |
+ ----------------------------------------------
+ | int error |
+ ----------------------------------------------
+ | struct nlmsghdr - original request header |
+ ----------------------------------------------
+ | ** optionally (1) payload of the request |
+ ----------------------------------------------
+ | ** optionally (2) extended ACK |
+ ----------------------------------------------
+
+There are two instances of struct nlmsghdr here, first of the response
+and second of the request. ``NLMSG_ERROR`` carries the information about
+the request which led to the error. This could be useful when trying
+to match requests to responses or re-parse the request to dump it into
+logs.
+
+The payload of the request is not echoed in messages reporting success
+(``error == 0``) or if ``NETLINK_CAP_ACK`` setsockopt() was set.
+The latter is common
+and perhaps recommended as having to read a copy of every request back
+from the kernel is rather wasteful. The absence of request payload
+is indicated by ``NLM_F_CAPPED`` in :c:member:`nlmsghdr.nlmsg_flags`.
+
+The second optional element of ``NLMSG_ERROR`` are the extended ACK
+attributes. See :ref:`ext_ack` for more details. The presence
+of extended ACK is indicated by ``NLM_F_ACK_TLVS`` in
+:c:member:`nlmsghdr.nlmsg_flags`.
+
+``NLMSG_DONE`` is simpler, the request is never echoed but the extended
+ACK attributes may be present::
+
+ ----------------------------------------------
+ | struct nlmsghdr - response header |
+ ----------------------------------------------
+ | int error |
+ ----------------------------------------------
+ | ** optionally extended ACK |
+ ----------------------------------------------
+
+.. _res_fam:
+
+Resolving the Family ID
+-----------------------
+
+This section explains how to find the Family ID of a subsystem.
+It also serves as an example of Generic Netlink communication.
+
+Generic Netlink is itself a subsystem exposed via the Generic Netlink API.
+To avoid a circular dependency Generic Netlink has a statically allocated
+Family ID (``GENL_ID_CTRL`` which is equal to ``NLMSG_MIN_TYPE``).
+The Generic Netlink family implements a command used to find out information
+about other families (``CTRL_CMD_GETFAMILY``).
+
+To get information about the Generic Netlink family named for example
+``"test1"`` we need to send a message on the previously opened Generic Netlink
+socket. The message should target the Generic Netlink Family (1), be a
+``do`` (2) call to ``CTRL_CMD_GETFAMILY`` (3). A ``dump`` version of this
+call would make the kernel respond with information about *all* the families
+it knows about. Last but not least the name of the family in question has
+to be specified (4) as an attribute with the appropriate type::
+
+ struct nlmsghdr:
+ __u32 nlmsg_len: 32
+ __u16 nlmsg_type: GENL_ID_CTRL // (1)
+ __u16 nlmsg_flags: NLM_F_REQUEST | NLM_F_ACK // (2)
+ __u32 nlmsg_seq: 1
+ __u32 nlmsg_pid: 0
+
+ struct genlmsghdr:
+ __u8 cmd: CTRL_CMD_GETFAMILY // (3)
+ __u8 version: 2 /* or 1, doesn't matter */
+ __u16 reserved: 0
+
+ struct nlattr: // (4)
+ __u16 nla_len: 10
+ __u16 nla_type: CTRL_ATTR_FAMILY_NAME
+ char data: test1\0
+
+ (padding:)
+ char data: \0\0
+
+The length fields in Netlink (:c:member:`nlmsghdr.nlmsg_len`
+and :c:member:`nlattr.nla_len`) always *include* the header.
+Attribute headers in netlink must be aligned to 4 bytes from the start
+of the message, hence the extra ``\0\0`` after ``CTRL_ATTR_FAMILY_NAME``.
+The attribute lengths *exclude* the padding.
+
+If the family is found kernel will reply with two messages, the response
+with all the information about the family::
+
+ /* Message #1 - reply */
+ struct nlmsghdr:
+ __u32 nlmsg_len: 136
+ __u16 nlmsg_type: GENL_ID_CTRL
+ __u16 nlmsg_flags: 0
+ __u32 nlmsg_seq: 1 /* echoed from our request */
+ __u32 nlmsg_pid: 5831 /* The PID of our user space process */
+
+ struct genlmsghdr:
+ __u8 cmd: CTRL_CMD_GETFAMILY
+ __u8 version: 2
+ __u16 reserved: 0
+
+ struct nlattr:
+ __u16 nla_len: 10
+ __u16 nla_type: CTRL_ATTR_FAMILY_NAME
+ char data: test1\0
+
+ (padding:)
+ data: \0\0
+
+ struct nlattr:
+ __u16 nla_len: 6
+ __u16 nla_type: CTRL_ATTR_FAMILY_ID
+ __u16: 123 /* The Family ID we are after */
+
+ (padding:)
+ char data: \0\0
+
+ struct nlattr:
+ __u16 nla_len: 9
+ __u16 nla_type: CTRL_ATTR_FAMILY_VERSION
+ __u16: 1
+
+ /* ... etc, more attributes will follow. */
+
+And the error code (success) since ``NLM_F_ACK`` had been set on the request::
+
+ /* Message #2 - the ACK */
+ struct nlmsghdr:
+ __u32 nlmsg_len: 36
+ __u16 nlmsg_type: NLMSG_ERROR
+ __u16 nlmsg_flags: NLM_F_CAPPED /* There won't be a payload */
+ __u32 nlmsg_seq: 1 /* echoed from our request */
+ __u32 nlmsg_pid: 5831 /* The PID of our user space process */
+
+ int error: 0
+
+ struct nlmsghdr: /* Copy of the request header as we sent it */
+ __u32 nlmsg_len: 32
+ __u16 nlmsg_type: GENL_ID_CTRL
+ __u16 nlmsg_flags: NLM_F_REQUEST | NLM_F_ACK
+ __u32 nlmsg_seq: 1
+ __u32 nlmsg_pid: 0
+
+The order of attributes (struct nlattr) is not guaranteed so the user
+has to walk the attributes and parse them.
+
+Note that Generic Netlink sockets are not associated or bound to a single
+family. A socket can be used to exchange messages with many different
+families, selecting the recipient family on message-by-message basis using
+the :c:member:`nlmsghdr.nlmsg_type` field.
+
+.. _ext_ack:
+
+Extended ACK
+------------
+
+Extended ACK controls reporting of additional error/warning TLVs
+in ``NLMSG_ERROR`` and ``NLMSG_DONE`` messages. To maintain backward
+compatibility this feature has to be explicitly enabled by setting
+the ``NETLINK_EXT_ACK`` setsockopt() to ``1``.
+
+Types of extended ack attributes are defined in enum nlmsgerr_attrs.
+The two most commonly used attributes are ``NLMSGERR_ATTR_MSG``
+and ``NLMSGERR_ATTR_OFFS``.
+
+``NLMSGERR_ATTR_MSG`` carries a message in English describing
+the encountered problem. These messages are far more detailed
+than what can be expressed thru standard UNIX error codes.
+
+``NLMSGERR_ATTR_OFFS`` points to the attribute which caused the problem.
+
+Extended ACKs can be reported on errors as well as in case of success.
+The latter should be treated as a warning.
+
+Extended ACKs greatly improve the usability of Netlink and should
+always be enabled, appropriately parsed and reported to the user.
+
+Advanced topics
+===============
+
+Dump consistency
+----------------
+
+Some of the data structures kernel uses for storing objects make
+it hard to provide an atomic snapshot of all the objects in a dump
+(without impacting the fast-paths updating them).
+
+Kernel may set the ``NLM_F_DUMP_INTR`` flag on any message in a dump
+(including the ``NLMSG_DONE`` message) if the dump was interrupted and
+may be inconsistent (e.g. missing objects). User space should retry
+the dump if it sees the flag set.
+
+Introspection
+-------------
+
+The basic introspection abilities are enabled by access to the Family
+object as reported in :ref:`res_fam`. User can query information about
+the Generic Netlink family, including which operations are supported
+by the kernel and what attributes the kernel understands.
+Family information includes the highest ID of an attribute kernel can parse,
+a separate command (``CTRL_CMD_GETPOLICY``) provides detailed information
+about supported attributes, including ranges of values the kernel accepts.
+
+Querying family information is useful in cases when user space needs
+to make sure that the kernel has support for a feature before issuing
+a request.
+
+.. _nlmsg_pid:
+
+nlmsg_pid
+---------
+
+:c:member:`nlmsghdr.nlmsg_pid` is the Netlink equivalent of an address.
+It is referred to as Port ID, sometimes Process ID because for historical
+reasons if the application does not select (bind() to) an explicit Port ID
+kernel will automatically assign it the ID equal to its Process ID
+(as reported by the getpid() system call).
+
+Similarly to the bind() semantics of the TCP/IP network protocols the value
+of zero means "assign automatically", hence it is common for applications
+to leave the :c:member:`nlmsghdr.nlmsg_pid` field initialized to ``0``.
+
+The field is still used today in rare cases when kernel needs to send
+a unicast notification. User space application can use bind() to associate
+its socket with a specific PID, it then communicates its PID to the kernel.
+This way the kernel can reach the specific user space process.
+
+This sort of communication is utilized in UMH (User Mode Helper)-like
+scenarios when kernel needs to trigger user space processing or ask user
+space for a policy decision.
+
+Multicast notifications
+-----------------------
+
+One of the strengths of Netlink is the ability to send event notifications
+to user space. This is a unidirectional form of communication (kernel ->
+user) and does not involve any control messages like ``NLMSG_ERROR`` or
+``NLMSG_DONE``.
+
+For example the Generic Netlink family itself defines a set of multicast
+notifications about registered families. When a new family is added the
+sockets subscribed to the notifications will get the following message::
+
+ struct nlmsghdr:
+ __u32 nlmsg_len: 136
+ __u16 nlmsg_type: GENL_ID_CTRL
+ __u16 nlmsg_flags: 0
+ __u32 nlmsg_seq: 0
+ __u32 nlmsg_pid: 0
+
+ struct genlmsghdr:
+ __u8 cmd: CTRL_CMD_NEWFAMILY
+ __u8 version: 2
+ __u16 reserved: 0
+
+ struct nlattr:
+ __u16 nla_len: 10
+ __u16 nla_type: CTRL_ATTR_FAMILY_NAME
+ char data: test1\0
+
+ (padding:)
+ data: \0\0
+
+ struct nlattr:
+ __u16 nla_len: 6
+ __u16 nla_type: CTRL_ATTR_FAMILY_ID
+ __u16: 123 /* The Family ID we are after */
+
+ (padding:)
+ char data: \0\0
+
+ struct nlattr:
+ __u16 nla_len: 9
+ __u16 nla_type: CTRL_ATTR_FAMILY_VERSION
+ __u16: 1
+
+ /* ... etc, more attributes will follow. */
+
+The notification contains the same information as the response
+to the ``CTRL_CMD_GETFAMILY`` request.
+
+The Netlink headers of the notification are mostly 0 and irrelevant.
+The :c:member:`nlmsghdr.nlmsg_seq` may be either zero or a monotonically
+increasing notification sequence number maintained by the family.
+
+To receive notifications the user socket must subscribe to the relevant
+notification group. Much like the Family ID, the Group ID for a given
+multicast group is dynamic and can be found inside the Family information.
+The ``CTRL_ATTR_MCAST_GROUPS`` attribute contains nests with names
+(``CTRL_ATTR_MCAST_GRP_NAME``) and IDs (``CTRL_ATTR_MCAST_GRP_ID``) of
+the groups family.
+
+Once the Group ID is known a setsockopt() call adds the socket to the group:
+
+.. code-block:: c
+
+ unsigned int group_id;
+
+ /* .. find the group ID... */
+
+ setsockopt(fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP,
+ &group_id, sizeof(group_id));
+
+The socket will now receive notifications.
+
+It is recommended to use separate sockets for receiving notifications
+and sending requests to the kernel. The asynchronous nature of notifications
+means that they may get mixed in with the responses making the message
+handling much harder.
+
+Buffer sizing
+-------------
+
+Netlink sockets are datagram sockets rather than stream sockets,
+meaning that each message must be received in its entirety by a single
+recv()/recvmsg() system call. If the buffer provided by the user is too
+short, the message will be truncated and the ``MSG_TRUNC`` flag set
+in struct msghdr (struct msghdr is the second argument
+of the recvmsg() system call, *not* a Netlink header).
+
+Upon truncation the remaining part of the message is discarded.
+
+Netlink expects that the user buffer will be at least 8kB or a page
+size of the CPU architecture, whichever is bigger. Particular Netlink
+families may, however, require a larger buffer. 32kB buffer is recommended
+for most efficient handling of dumps (larger buffer fits more dumped
+objects and therefore fewer recvmsg() calls are needed).
+
+Classic Netlink
+===============
+
+The main differences between Classic and Generic Netlink are the dynamic
+allocation of subsystem identifiers and availability of introspection.
+In theory the protocol does not differ significantly, however, in practice
+Classic Netlink experimented with concepts which were abandoned in Generic
+Netlink (really, they usually only found use in a small corner of a single
+subsystem). This section is meant as an explainer of a few of such concepts,
+with the explicit goal of giving the Generic Netlink
+users the confidence to ignore them when reading the uAPI headers.
+
+Most of the concepts and examples here refer to the ``NETLINK_ROUTE`` family,
+which covers much of the configuration of the Linux networking stack.
+Real documentation of that family, deserves a chapter (or a book) of its own.
+
+Families
+--------
+
+Netlink refers to subsystems as families. This is a remnant of using
+sockets and the concept of protocol families, which are part of message
+demultiplexing in ``NETLINK_ROUTE``.
+
+Sadly every layer of encapsulation likes to refer to whatever it's carrying
+as "families" making the term very confusing:
+
+ 1. AF_NETLINK is a bona fide socket protocol family
+ 2. AF_NETLINK's documentation refers to what comes after its own
+ header (struct nlmsghdr) in a message as a "Family Header"
+ 3. Generic Netlink is a family for AF_NETLINK (struct genlmsghdr follows
+ struct nlmsghdr), yet it also calls its users "Families".
+
+Note that the Generic Netlink Family IDs are in a different "ID space"
+and overlap with Classic Netlink protocol numbers (e.g. ``NETLINK_CRYPTO``
+has the Classic Netlink protocol ID of 21 which Generic Netlink will
+happily allocate to one of its families as well).
+
+Strict checking
+---------------
+
+The ``NETLINK_GET_STRICT_CHK`` socket option enables strict input checking
+in ``NETLINK_ROUTE``. It was needed because historically kernel did not
+validate the fields of structures it didn't process. This made it impossible
+to start using those fields later without risking regressions in applications
+which initialized them incorrectly or not at all.
+
+``NETLINK_GET_STRICT_CHK`` declares that the application is initializing
+all fields correctly. It also opts into validating that message does not
+contain trailing data and requests that kernel rejects attributes with
+type higher than largest attribute type known to the kernel.
+
+``NETLINK_GET_STRICT_CHK`` is not used outside of ``NETLINK_ROUTE``.
+
+Unknown attributes
+------------------
+
+Historically Netlink ignored all unknown attributes. The thinking was that
+it would free the application from having to probe what kernel supports.
+The application could make a request to change the state and check which
+parts of the request "stuck".
+
+This is no longer the case for new Generic Netlink families and those opting
+in to strict checking. See enum netlink_validation for validation types
+performed.
+
+Fixed metadata and structures
+-----------------------------
+
+Classic Netlink made liberal use of fixed-format structures within
+the messages. Messages would commonly have a structure with
+a considerable number of fields after struct nlmsghdr. It was also
+common to put structures with multiple members inside attributes,
+without breaking each member into an attribute of its own.
+
+This has caused problems with validation and extensibility and
+therefore using binary structures is actively discouraged for new
+attributes.
+
+Request types
+-------------
+
+``NETLINK_ROUTE`` categorized requests into 4 types ``NEW``, ``DEL``, ``GET``,
+and ``SET``. Each object can handle all or some of those requests
+(objects being netdevs, routes, addresses, qdiscs etc.) Request type
+is defined by the 2 lowest bits of the message type, so commands for
+new objects would always be allocated with a stride of 4.
+
+Each object would also have it's own fixed metadata shared by all request
+types (e.g. struct ifinfomsg for netdev requests, struct ifaddrmsg for address
+requests, struct tcmsg for qdisc requests).
+
+Even though other protocols and Generic Netlink commands often use
+the same verbs in their message names (``GET``, ``SET``) the concept
+of request types did not find wider adoption.
+
+Message flags
+-------------
+
+The earlier section has already covered the basic request flags
+(``NLM_F_REQUEST``, ``NLM_F_ACK``, ``NLM_F_DUMP``) and the ``NLMSG_ERROR`` /
+``NLMSG_DONE`` flags (``NLM_F_CAPPED``, ``NLM_F_ACK_TLVS``).
+Dump flags were also mentioned (``NLM_F_MULTI``, ``NLM_F_DUMP_INTR``).
+
+Those are the main flags of note, with a small exception (of ``ieee802154``)
+Generic Netlink does not make use of other flags. If the protocol needs
+to communicate special constraints for a request it should use
+an attribute, not the flags in struct nlmsghdr.
+
+Classic Netlink, however, defined various flags for its ``GET``, ``NEW``
+and ``DEL`` requests. Since request types have not been generalized
+the request type specific flags should not be used either.
+
+uAPI reference
+==============
+
+.. kernel-doc:: include/uapi/linux/netlink.h
diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c
index aadb0bd7c24f..ee19ed96f284 100644
--- a/drivers/net/dsa/ocelot/felix.c
+++ b/drivers/net/dsa/ocelot/felix.c
@@ -445,6 +445,9 @@ static int felix_tag_8021q_setup(struct dsa_switch *ds)
if (err)
return err;
+ dsa_switch_for_each_cpu_port(dp, ds)
+ ocelot_port_setup_dsa_8021q_cpu(ocelot, dp->index);
+
dsa_switch_for_each_user_port(dp, ds)
ocelot_port_assign_dsa_8021q_cpu(ocelot, dp->index,
dp->cpu_dp->index);
@@ -493,6 +496,9 @@ static void felix_tag_8021q_teardown(struct dsa_switch *ds)
dsa_switch_for_each_user_port(dp, ds)
ocelot_port_unassign_dsa_8021q_cpu(ocelot, dp->index);
+ dsa_switch_for_each_cpu_port(dp, ds)
+ ocelot_port_teardown_dsa_8021q_cpu(ocelot, dp->index);
+
dsa_tag_8021q_unregister(ds);
}
diff --git a/drivers/net/ethernet/faraday/ftmac100.c b/drivers/net/ethernet/faraday/ftmac100.c
index 8a341e2d5833..2e6524009b19 100644
--- a/drivers/net/ethernet/faraday/ftmac100.c
+++ b/drivers/net/ethernet/faraday/ftmac100.c
@@ -1075,6 +1075,7 @@ static int ftmac100_probe(struct platform_device *pdev)
SET_NETDEV_DEV(netdev, &pdev->dev);
netdev->ethtool_ops = &ftmac100_ethtool_ops;
netdev->netdev_ops = &ftmac100_netdev_ops;
+ netdev->max_mtu = MAX_PKT_SIZE;
platform_set_drvdata(pdev, netdev);
diff --git a/drivers/net/ethernet/marvell/prestera/prestera.h b/drivers/net/ethernet/marvell/prestera/prestera.h
index 2f84d0fb4094..e5a4381a88b3 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera.h
+++ b/drivers/net/ethernet/marvell/prestera/prestera.h
@@ -367,6 +367,8 @@ int prestera_port_learning_set(struct prestera_port *port, bool learn_enable);
int prestera_port_uc_flood_set(struct prestera_port *port, bool flood);
int prestera_port_mc_flood_set(struct prestera_port *port, bool flood);
+int prestera_port_br_locked_set(struct prestera_port *port, bool br_locked);
+
int prestera_port_pvid_set(struct prestera_port *port, u16 vid);
bool prestera_netdev_check(const struct net_device *dev);
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_hw.c b/drivers/net/ethernet/marvell/prestera/prestera_hw.c
index e0e9ae34ceea..1a68a888d587 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_hw.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_hw.c
@@ -101,6 +101,7 @@ enum {
PRESTERA_CMD_PORT_ATTR_LEARNING = 7,
PRESTERA_CMD_PORT_ATTR_FLOOD = 8,
PRESTERA_CMD_PORT_ATTR_CAPABILITY = 9,
+ PRESTERA_CMD_PORT_ATTR_LOCKED = 10,
PRESTERA_CMD_PORT_ATTR_PHY_MODE = 12,
PRESTERA_CMD_PORT_ATTR_TYPE = 13,
PRESTERA_CMD_PORT_ATTR_STATS = 17,
@@ -285,6 +286,7 @@ union prestera_msg_port_param {
u8 duplex;
u8 fec;
u8 fc;
+ u8 br_locked;
union {
struct {
u8 admin;
@@ -1640,6 +1642,22 @@ int prestera_hw_port_mc_flood_set(const struct prestera_port *port, bool flood)
&req.cmd, sizeof(req));
}
+int prestera_hw_port_br_locked_set(const struct prestera_port *port,
+ bool br_locked)
+{
+ struct prestera_msg_port_attr_req req = {
+ .attr = __cpu_to_le32(PRESTERA_CMD_PORT_ATTR_LOCKED),
+ .port = __cpu_to_le32(port->hw_id),
+ .dev = __cpu_to_le32(port->dev_id),
+ .param = {
+ .br_locked = br_locked,
+ }
+ };
+
+ return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_PORT_ATTR_SET,
+ &req.cmd, sizeof(req));
+}
+
int prestera_hw_vlan_create(struct prestera_switch *sw, u16 vid)
{
struct prestera_msg_vlan_req req = {
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_hw.h b/drivers/net/ethernet/marvell/prestera/prestera_hw.h
index 56e043146dd2..4aca43e72a05 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_hw.h
+++ b/drivers/net/ethernet/marvell/prestera/prestera_hw.h
@@ -183,6 +183,8 @@ int prestera_hw_port_speed_get(const struct prestera_port *port, u32 *speed);
int prestera_hw_port_learning_set(struct prestera_port *port, bool enable);
int prestera_hw_port_uc_flood_set(const struct prestera_port *port, bool flood);
int prestera_hw_port_mc_flood_set(const struct prestera_port *port, bool flood);
+int prestera_hw_port_br_locked_set(const struct prestera_port *port,
+ bool br_locked);
int prestera_hw_port_accept_frm_type(struct prestera_port *port,
enum prestera_accept_frm_type type);
/* Vlan API */
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c
index 3489b80ae0d6..3956d6d5df3c 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_main.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c
@@ -51,6 +51,11 @@ int prestera_port_mc_flood_set(struct prestera_port *port, bool flood)
return prestera_hw_port_mc_flood_set(port, flood);
}
+int prestera_port_br_locked_set(struct prestera_port *port, bool br_locked)
+{
+ return prestera_hw_port_br_locked_set(port, br_locked);
+}
+
int prestera_port_pvid_set(struct prestera_port *port, u16 vid)
{
enum prestera_accept_frm_type frm_type;
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
index 71cde97d85c8..e548cd32582e 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
@@ -143,6 +143,7 @@ prestera_br_port_flags_reset(struct prestera_bridge_port *br_port,
prestera_port_uc_flood_set(port, false);
prestera_port_mc_flood_set(port, false);
prestera_port_learning_set(port, false);
+ prestera_port_br_locked_set(port, false);
}
static int prestera_br_port_flags_set(struct prestera_bridge_port *br_port,
@@ -162,6 +163,11 @@ static int prestera_br_port_flags_set(struct prestera_bridge_port *br_port,
if (err)
goto err_out;
+ err = prestera_port_br_locked_set(port,
+ br_port->flags & BR_PORT_LOCKED);
+ if (err)
+ goto err_out;
+
return 0;
err_out:
@@ -1163,7 +1169,7 @@ static int prestera_port_obj_attr_set(struct net_device *dev, const void *ctx,
break;
case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS:
if (attr->u.brport_flags.mask &
- ~(BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD))
+ ~(BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_PORT_LOCKED))
err = -EINVAL;
break;
case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index 75553eb2c7f2..450dcd9951bb 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -70,6 +70,8 @@ struct mlxsw_core {
struct workqueue_struct *emad_wq;
struct list_head rx_listener_list;
struct list_head event_listener_list;
+ struct list_head irq_event_handler_list;
+ struct mutex irq_event_handler_lock; /* Locks access to handlers list */
struct {
atomic64_t tid;
struct list_head trans_list;
@@ -2090,6 +2092,18 @@ static void mlxsw_core_health_fini(struct mlxsw_core *mlxsw_core)
devlink_health_reporter_destroy(mlxsw_core->health.fw_fatal);
}
+static void mlxsw_core_irq_event_handler_init(struct mlxsw_core *mlxsw_core)
+{
+ INIT_LIST_HEAD(&mlxsw_core->irq_event_handler_list);
+ mutex_init(&mlxsw_core->irq_event_handler_lock);
+}
+
+static void mlxsw_core_irq_event_handler_fini(struct mlxsw_core *mlxsw_core)
+{
+ mutex_destroy(&mlxsw_core->irq_event_handler_lock);
+ WARN_ON(!list_empty(&mlxsw_core->irq_event_handler_list));
+}
+
static int
__mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
const struct mlxsw_bus *mlxsw_bus,
@@ -2125,6 +2139,7 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
mlxsw_core->bus = mlxsw_bus;
mlxsw_core->bus_priv = bus_priv;
mlxsw_core->bus_info = mlxsw_bus_info;
+ mlxsw_core_irq_event_handler_init(mlxsw_core);
err = mlxsw_bus->init(bus_priv, mlxsw_core, mlxsw_driver->profile,
&mlxsw_core->res);
@@ -2233,6 +2248,7 @@ err_ports_init:
err_register_resources:
mlxsw_bus->fini(bus_priv);
err_bus_init:
+ mlxsw_core_irq_event_handler_fini(mlxsw_core);
if (!reload) {
devl_unlock(devlink);
devlink_free(devlink);
@@ -2302,6 +2318,7 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core,
if (!reload)
devl_resources_unregister(devlink);
mlxsw_core->bus->fini(mlxsw_core->bus_priv);
+ mlxsw_core_irq_event_handler_fini(mlxsw_core);
if (!reload) {
devl_unlock(devlink);
devlink_free(devlink);
@@ -2772,6 +2789,57 @@ int mlxsw_reg_trans_bulk_wait(struct list_head *bulk_list)
}
EXPORT_SYMBOL(mlxsw_reg_trans_bulk_wait);
+struct mlxsw_core_irq_event_handler_item {
+ struct list_head list;
+ void (*cb)(struct mlxsw_core *mlxsw_core);
+};
+
+int mlxsw_core_irq_event_handler_register(struct mlxsw_core *mlxsw_core,
+ mlxsw_irq_event_cb_t cb)
+{
+ struct mlxsw_core_irq_event_handler_item *item;
+
+ item = kzalloc(sizeof(*item), GFP_KERNEL);
+ if (!item)
+ return -ENOMEM;
+ item->cb = cb;
+ mutex_lock(&mlxsw_core->irq_event_handler_lock);
+ list_add_tail(&item->list, &mlxsw_core->irq_event_handler_list);
+ mutex_unlock(&mlxsw_core->irq_event_handler_lock);
+ return 0;
+}
+EXPORT_SYMBOL(mlxsw_core_irq_event_handler_register);
+
+void mlxsw_core_irq_event_handler_unregister(struct mlxsw_core *mlxsw_core,
+ mlxsw_irq_event_cb_t cb)
+{
+ struct mlxsw_core_irq_event_handler_item *item, *tmp;
+
+ mutex_lock(&mlxsw_core->irq_event_handler_lock);
+ list_for_each_entry_safe(item, tmp,
+ &mlxsw_core->irq_event_handler_list, list) {
+ if (item->cb == cb) {
+ list_del(&item->list);
+ kfree(item);
+ }
+ }
+ mutex_unlock(&mlxsw_core->irq_event_handler_lock);
+}
+EXPORT_SYMBOL(mlxsw_core_irq_event_handler_unregister);
+
+void mlxsw_core_irq_event_handlers_call(struct mlxsw_core *mlxsw_core)
+{
+ struct mlxsw_core_irq_event_handler_item *item;
+
+ mutex_lock(&mlxsw_core->irq_event_handler_lock);
+ list_for_each_entry(item, &mlxsw_core->irq_event_handler_list, list) {
+ if (item->cb)
+ item->cb(mlxsw_core);
+ }
+ mutex_unlock(&mlxsw_core->irq_event_handler_lock);
+}
+EXPORT_SYMBOL(mlxsw_core_irq_event_handlers_call);
+
static int mlxsw_core_reg_access_cmd(struct mlxsw_core *mlxsw_core,
const struct mlxsw_reg_info *reg,
char *payload,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
index 02d9cc2ef0c8..c7c0b3cefd8d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -215,6 +215,14 @@ int mlxsw_reg_trans_write(struct mlxsw_core *mlxsw_core,
mlxsw_reg_trans_cb_t *cb, unsigned long cb_priv);
int mlxsw_reg_trans_bulk_wait(struct list_head *bulk_list);
+typedef void mlxsw_irq_event_cb_t(struct mlxsw_core *mlxsw_core);
+
+int mlxsw_core_irq_event_handler_register(struct mlxsw_core *mlxsw_core,
+ mlxsw_irq_event_cb_t cb);
+void mlxsw_core_irq_event_handler_unregister(struct mlxsw_core *mlxsw_core,
+ mlxsw_irq_event_cb_t cb);
+void mlxsw_core_irq_event_handlers_call(struct mlxsw_core *mlxsw_core);
+
int mlxsw_reg_query(struct mlxsw_core *mlxsw_core,
const struct mlxsw_reg_info *reg, char *payload);
int mlxsw_reg_write(struct mlxsw_core *mlxsw_core,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c b/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c
index ca59f0b946da..83d2dc91ba2c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c
@@ -785,6 +785,21 @@ static int mlxsw_linecard_status_get_and_process(struct mlxsw_core *mlxsw_core,
return mlxsw_linecard_status_process(linecards, linecard, mddq_pl);
}
+static void mlxsw_linecards_irq_event_handler(struct mlxsw_core *mlxsw_core)
+{
+ struct mlxsw_linecards *linecards = mlxsw_core_linecards(mlxsw_core);
+ int i;
+
+ /* Handle change of line card active state. */
+ for (i = 0; i < linecards->count; i++) {
+ struct mlxsw_linecard *linecard = mlxsw_linecard_get(linecards,
+ i + 1);
+
+ mlxsw_linecard_status_get_and_process(mlxsw_core, linecards,
+ linecard);
+ }
+}
+
static const char * const mlxsw_linecard_status_event_type_name[] = {
[MLXSW_LINECARD_STATUS_EVENT_TYPE_PROVISION] = "provision",
[MLXSW_LINECARD_STATUS_EVENT_TYPE_UNPROVISION] = "unprovision",
@@ -1238,7 +1253,6 @@ static int mlxsw_linecard_init(struct mlxsw_core *mlxsw_core,
{
struct devlink_linecard *devlink_linecard;
struct mlxsw_linecard *linecard;
- int err;
linecard = mlxsw_linecard_get(linecards, slot_index);
linecard->slot_index = slot_index;
@@ -1248,17 +1262,45 @@ static int mlxsw_linecard_init(struct mlxsw_core *mlxsw_core,
devlink_linecard = devlink_linecard_create(priv_to_devlink(mlxsw_core),
slot_index, &mlxsw_linecard_ops,
linecard);
- if (IS_ERR(devlink_linecard)) {
- err = PTR_ERR(devlink_linecard);
- goto err_devlink_linecard_create;
- }
+ if (IS_ERR(devlink_linecard))
+ return PTR_ERR(devlink_linecard);
+
linecard->devlink_linecard = devlink_linecard;
INIT_DELAYED_WORK(&linecard->status_event_to_dw,
&mlxsw_linecard_status_event_to_work);
+ return 0;
+}
+
+static void mlxsw_linecard_fini(struct mlxsw_core *mlxsw_core,
+ struct mlxsw_linecards *linecards,
+ u8 slot_index)
+{
+ struct mlxsw_linecard *linecard;
+
+ linecard = mlxsw_linecard_get(linecards, slot_index);
+ cancel_delayed_work_sync(&linecard->status_event_to_dw);
+ /* Make sure all scheduled events are processed */
+ mlxsw_core_flush_owq();
+ if (linecard->active)
+ mlxsw_linecard_active_clear(linecard);
+ mlxsw_linecard_bdev_del(linecard);
+ devlink_linecard_destroy(linecard->devlink_linecard);
+ mutex_destroy(&linecard->lock);
+}
+
+static int
+mlxsw_linecard_event_delivery_init(struct mlxsw_core *mlxsw_core,
+ struct mlxsw_linecards *linecards,
+ u8 slot_index)
+{
+ struct mlxsw_linecard *linecard;
+ int err;
+
+ linecard = mlxsw_linecard_get(linecards, slot_index);
err = mlxsw_linecard_event_delivery_set(mlxsw_core, linecard, true);
if (err)
- goto err_event_delivery_set;
+ return err;
err = mlxsw_linecard_status_get_and_process(mlxsw_core, linecards,
linecard);
@@ -1269,29 +1311,18 @@ static int mlxsw_linecard_init(struct mlxsw_core *mlxsw_core,
err_status_get_and_process:
mlxsw_linecard_event_delivery_set(mlxsw_core, linecard, false);
-err_event_delivery_set:
- devlink_linecard_destroy(linecard->devlink_linecard);
-err_devlink_linecard_create:
- mutex_destroy(&linecard->lock);
return err;
}
-static void mlxsw_linecard_fini(struct mlxsw_core *mlxsw_core,
- struct mlxsw_linecards *linecards,
- u8 slot_index)
+static void
+mlxsw_linecard_event_delivery_fini(struct mlxsw_core *mlxsw_core,
+ struct mlxsw_linecards *linecards,
+ u8 slot_index)
{
struct mlxsw_linecard *linecard;
linecard = mlxsw_linecard_get(linecards, slot_index);
mlxsw_linecard_event_delivery_set(mlxsw_core, linecard, false);
- cancel_delayed_work_sync(&linecard->status_event_to_dw);
- /* Make sure all scheduled events are processed */
- mlxsw_core_flush_owq();
- if (linecard->active)
- mlxsw_linecard_active_clear(linecard);
- mlxsw_linecard_bdev_del(linecard);
- devlink_linecard_destroy(linecard->devlink_linecard);
- mutex_destroy(&linecard->lock);
}
/* LINECARDS INI BUNDLE FILE
@@ -1505,6 +1536,11 @@ int mlxsw_linecards_init(struct mlxsw_core *mlxsw_core,
if (err)
goto err_traps_register;
+ err = mlxsw_core_irq_event_handler_register(mlxsw_core,
+ mlxsw_linecards_irq_event_handler);
+ if (err)
+ goto err_irq_event_handler_register;
+
mlxsw_core_linecards_set(mlxsw_core, linecards);
for (i = 0; i < linecards->count; i++) {
@@ -1513,11 +1549,25 @@ int mlxsw_linecards_init(struct mlxsw_core *mlxsw_core,
goto err_linecard_init;
}
+ for (i = 0; i < linecards->count; i++) {
+ err = mlxsw_linecard_event_delivery_init(mlxsw_core, linecards,
+ i + 1);
+ if (err)
+ goto err_linecard_event_delivery_init;
+ }
+
return 0;
+err_linecard_event_delivery_init:
+ for (i--; i >= 0; i--)
+ mlxsw_linecard_event_delivery_fini(mlxsw_core, linecards, i + 1);
+ i = linecards->count;
err_linecard_init:
for (i--; i >= 0; i--)
mlxsw_linecard_fini(mlxsw_core, linecards, i + 1);
+ mlxsw_core_irq_event_handler_unregister(mlxsw_core,
+ mlxsw_linecards_irq_event_handler);
+err_irq_event_handler_register:
mlxsw_core_traps_unregister(mlxsw_core, mlxsw_linecard_listener,
ARRAY_SIZE(mlxsw_linecard_listener),
mlxsw_core);
@@ -1536,7 +1586,11 @@ void mlxsw_linecards_fini(struct mlxsw_core *mlxsw_core)
if (!linecards)
return;
for (i = 0; i < linecards->count; i++)
+ mlxsw_linecard_event_delivery_fini(mlxsw_core, linecards, i + 1);
+ for (i = 0; i < linecards->count; i++)
mlxsw_linecard_fini(mlxsw_core, linecards, i + 1);
+ mlxsw_core_irq_event_handler_unregister(mlxsw_core,
+ mlxsw_linecards_irq_event_handler);
mlxsw_core_traps_unregister(mlxsw_core, mlxsw_linecard_listener,
ARRAY_SIZE(mlxsw_linecard_listener),
mlxsw_core);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c
index ce843ea91464..716c73e4fd59 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/i2c.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c
@@ -9,6 +9,7 @@
#include <linux/mutex.h>
#include <linux/module.h>
#include <linux/mod_devicetable.h>
+#include <linux/platform_data/mlxreg.h>
#include <linux/slab.h>
#include "cmd.h"
@@ -51,6 +52,15 @@
#define MLXSW_I2C_TIMEOUT_MSECS 5000
#define MLXSW_I2C_MAX_DATA_SIZE 256
+/* Driver can be initialized by kernel platform driver or from the user
+ * space. In the first case IRQ line number is passed through the platform
+ * data, otherwise default IRQ line is to be used. Default IRQ is relevant
+ * only for specific I2C slave address, allowing 3.4 MHz I2C path to the chip
+ * (special hardware feature for I2C acceleration).
+ */
+#define MLXSW_I2C_DEFAULT_IRQ 17
+#define MLXSW_FAST_I2C_SLAVE 0x37
+
/**
* struct mlxsw_i2c - device private data:
* @cmd: command attributes;
@@ -63,6 +73,9 @@
* @core: switch core pointer;
* @bus_info: bus info block;
* @block_size: maximum block size allowed to pass to under layer;
+ * @pdata: device platform data;
+ * @irq_work: interrupts work item;
+ * @irq: IRQ line number;
*/
struct mlxsw_i2c {
struct {
@@ -76,6 +89,9 @@ struct mlxsw_i2c {
struct mlxsw_core *core;
struct mlxsw_bus_info bus_info;
u16 block_size;
+ struct mlxreg_core_hotplug_platform_data *pdata;
+ struct work_struct irq_work;
+ int irq;
};
#define MLXSW_I2C_READ_MSG(_client, _addr_buf, _buf, _len) { \
@@ -546,6 +562,67 @@ static void mlxsw_i2c_fini(void *bus_priv)
mlxsw_i2c->core = NULL;
}
+static void mlxsw_i2c_work_handler(struct work_struct *work)
+{
+ struct mlxsw_i2c *mlxsw_i2c;
+
+ mlxsw_i2c = container_of(work, struct mlxsw_i2c, irq_work);
+ mlxsw_core_irq_event_handlers_call(mlxsw_i2c->core);
+}
+
+static irqreturn_t mlxsw_i2c_irq_handler(int irq, void *dev)
+{
+ struct mlxsw_i2c *mlxsw_i2c = dev;
+
+ mlxsw_core_schedule_work(&mlxsw_i2c->irq_work);
+
+ /* Interrupt handler shares IRQ line with 'main' interrupt handler.
+ * Return here IRQ_NONE, while main handler will return IRQ_HANDLED.
+ */
+ return IRQ_NONE;
+}
+
+static int mlxsw_i2c_irq_init(struct mlxsw_i2c *mlxsw_i2c, u8 addr)
+{
+ int err;
+
+ /* Initialize interrupt handler if system hotplug driver is reachable,
+ * otherwise interrupt line is not enabled and interrupts will not be
+ * raised to CPU. Also request_irq() call will be not valid.
+ */
+ if (!IS_REACHABLE(CONFIG_MLXREG_HOTPLUG))
+ return 0;
+
+ /* Set default interrupt line. */
+ if (mlxsw_i2c->pdata && mlxsw_i2c->pdata->irq)
+ mlxsw_i2c->irq = mlxsw_i2c->pdata->irq;
+ else if (addr == MLXSW_FAST_I2C_SLAVE)
+ mlxsw_i2c->irq = MLXSW_I2C_DEFAULT_IRQ;
+
+ if (!mlxsw_i2c->irq)
+ return 0;
+
+ INIT_WORK(&mlxsw_i2c->irq_work, mlxsw_i2c_work_handler);
+ err = request_irq(mlxsw_i2c->irq, mlxsw_i2c_irq_handler,
+ IRQF_TRIGGER_FALLING | IRQF_SHARED, "mlxsw-i2c",
+ mlxsw_i2c);
+ if (err) {
+ dev_err(mlxsw_i2c->bus_info.dev, "Failed to request irq: %d\n",
+ err);
+ return err;
+ }
+
+ return 0;
+}
+
+static void mlxsw_i2c_irq_fini(struct mlxsw_i2c *mlxsw_i2c)
+{
+ if (!IS_REACHABLE(CONFIG_MLXREG_HOTPLUG) || !mlxsw_i2c->irq)
+ return;
+ cancel_work_sync(&mlxsw_i2c->irq_work);
+ free_irq(mlxsw_i2c->irq, mlxsw_i2c);
+}
+
static const struct mlxsw_bus mlxsw_i2c_bus = {
.kind = "i2c",
.init = mlxsw_i2c_init,
@@ -638,17 +715,24 @@ static int mlxsw_i2c_probe(struct i2c_client *client,
mlxsw_i2c->bus_info.dev = &client->dev;
mlxsw_i2c->bus_info.low_frequency = true;
mlxsw_i2c->dev = &client->dev;
+ mlxsw_i2c->pdata = client->dev.platform_data;
+
+ err = mlxsw_i2c_irq_init(mlxsw_i2c, client->addr);
+ if (err)
+ goto errout;
err = mlxsw_core_bus_device_register(&mlxsw_i2c->bus_info,
&mlxsw_i2c_bus, mlxsw_i2c, false,
NULL, NULL);
if (err) {
dev_err(&client->dev, "Fail to register core bus\n");
- return err;
+ goto err_bus_device_register;
}
return 0;
+err_bus_device_register:
+ mlxsw_i2c_irq_fini(mlxsw_i2c);
errout:
mutex_destroy(&mlxsw_i2c->cmd.lock);
i2c_set_clientdata(client, NULL);
@@ -661,6 +745,7 @@ static int mlxsw_i2c_remove(struct i2c_client *client)
struct mlxsw_i2c *mlxsw_i2c = i2c_get_clientdata(client);
mlxsw_core_bus_device_unregister(mlxsw_i2c->core, false);
+ mlxsw_i2c_irq_fini(mlxsw_i2c);
mutex_destroy(&mlxsw_i2c->cmd.lock);
return 0;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/minimal.c b/drivers/net/ethernet/mellanox/mlxsw/minimal.c
index bb1cd4bae82e..7d3fa2883e8b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/minimal.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/minimal.c
@@ -26,20 +26,29 @@ static const struct mlxsw_fw_rev mlxsw_m_fw_rev = {
struct mlxsw_m_port;
+struct mlxsw_m_line_card {
+ bool active;
+ int module_to_port[];
+};
+
struct mlxsw_m {
struct mlxsw_m_port **ports;
- int *module_to_port;
struct mlxsw_core *core;
const struct mlxsw_bus_info *bus_info;
u8 base_mac[ETH_ALEN];
u8 max_ports;
+ u8 max_modules_per_slot; /* Maximum number of modules per-slot. */
+ u8 num_of_slots; /* Including the main board. */
+ struct mlxsw_m_line_card **line_cards;
};
struct mlxsw_m_port {
struct net_device *dev;
struct mlxsw_m *mlxsw_m;
u16 local_port;
+ u8 slot_index;
u8 module;
+ u8 module_offset;
};
static int mlxsw_m_base_mac_get(struct mlxsw_m *mlxsw_m)
@@ -111,8 +120,9 @@ static int mlxsw_m_get_module_info(struct net_device *netdev,
struct mlxsw_m_port *mlxsw_m_port = netdev_priv(netdev);
struct mlxsw_core *core = mlxsw_m_port->mlxsw_m->core;
- return mlxsw_env_get_module_info(netdev, core, 0, mlxsw_m_port->module,
- modinfo);
+ return mlxsw_env_get_module_info(netdev, core,
+ mlxsw_m_port->slot_index,
+ mlxsw_m_port->module, modinfo);
}
static int
@@ -122,7 +132,8 @@ mlxsw_m_get_module_eeprom(struct net_device *netdev, struct ethtool_eeprom *ee,
struct mlxsw_m_port *mlxsw_m_port = netdev_priv(netdev);
struct mlxsw_core *core = mlxsw_m_port->mlxsw_m->core;
- return mlxsw_env_get_module_eeprom(netdev, core, 0,
+ return mlxsw_env_get_module_eeprom(netdev, core,
+ mlxsw_m_port->slot_index,
mlxsw_m_port->module, ee, data);
}
@@ -134,7 +145,8 @@ mlxsw_m_get_module_eeprom_by_page(struct net_device *netdev,
struct mlxsw_m_port *mlxsw_m_port = netdev_priv(netdev);
struct mlxsw_core *core = mlxsw_m_port->mlxsw_m->core;
- return mlxsw_env_get_module_eeprom_by_page(core, 0,
+ return mlxsw_env_get_module_eeprom_by_page(core,
+ mlxsw_m_port->slot_index,
mlxsw_m_port->module,
page, extack);
}
@@ -144,7 +156,8 @@ static int mlxsw_m_reset(struct net_device *netdev, u32 *flags)
struct mlxsw_m_port *mlxsw_m_port = netdev_priv(netdev);
struct mlxsw_core *core = mlxsw_m_port->mlxsw_m->core;
- return mlxsw_env_reset_module(netdev, core, 0, mlxsw_m_port->module,
+ return mlxsw_env_reset_module(netdev, core, mlxsw_m_port->slot_index,
+ mlxsw_m_port->module,
flags);
}
@@ -156,7 +169,8 @@ mlxsw_m_get_module_power_mode(struct net_device *netdev,
struct mlxsw_m_port *mlxsw_m_port = netdev_priv(netdev);
struct mlxsw_core *core = mlxsw_m_port->mlxsw_m->core;
- return mlxsw_env_get_module_power_mode(core, 0, mlxsw_m_port->module,
+ return mlxsw_env_get_module_power_mode(core, mlxsw_m_port->slot_index,
+ mlxsw_m_port->module,
params, extack);
}
@@ -168,7 +182,8 @@ mlxsw_m_set_module_power_mode(struct net_device *netdev,
struct mlxsw_m_port *mlxsw_m_port = netdev_priv(netdev);
struct mlxsw_core *core = mlxsw_m_port->mlxsw_m->core;
- return mlxsw_env_set_module_power_mode(core, 0, mlxsw_m_port->module,
+ return mlxsw_env_set_module_power_mode(core, mlxsw_m_port->slot_index,
+ mlxsw_m_port->module,
params->policy, extack);
}
@@ -184,7 +199,7 @@ static const struct ethtool_ops mlxsw_m_port_ethtool_ops = {
static int
mlxsw_m_port_module_info_get(struct mlxsw_m *mlxsw_m, u16 local_port,
- u8 *p_module, u8 *p_width)
+ u8 *p_module, u8 *p_width, u8 *p_slot_index)
{
char pmlp_pl[MLXSW_REG_PMLP_LEN];
int err;
@@ -195,6 +210,7 @@ mlxsw_m_port_module_info_get(struct mlxsw_m *mlxsw_m, u16 local_port,
return err;
*p_module = mlxsw_reg_pmlp_module_get(pmlp_pl, 0);
*p_width = mlxsw_reg_pmlp_width_get(pmlp_pl);
+ *p_slot_index = mlxsw_reg_pmlp_slot_index_get(pmlp_pl, 0);
return 0;
}
@@ -212,18 +228,25 @@ mlxsw_m_port_dev_addr_get(struct mlxsw_m_port *mlxsw_m_port)
if (err)
return err;
mlxsw_reg_ppad_mac_memcpy_from(ppad_pl, addr);
- eth_hw_addr_gen(mlxsw_m_port->dev, addr, mlxsw_m_port->module + 1);
+ eth_hw_addr_gen(mlxsw_m_port->dev, addr, mlxsw_m_port->module + 1 +
+ mlxsw_m_port->module_offset);
return 0;
}
+static bool mlxsw_m_port_created(struct mlxsw_m *mlxsw_m, u16 local_port)
+{
+ return mlxsw_m->ports[local_port];
+}
+
static int
-mlxsw_m_port_create(struct mlxsw_m *mlxsw_m, u16 local_port, u8 module)
+mlxsw_m_port_create(struct mlxsw_m *mlxsw_m, u16 local_port, u8 slot_index,
+ u8 module)
{
struct mlxsw_m_port *mlxsw_m_port;
struct net_device *dev;
int err;
- err = mlxsw_core_port_init(mlxsw_m->core, local_port, 0,
+ err = mlxsw_core_port_init(mlxsw_m->core, local_port, slot_index,
module + 1, false, 0, false,
0, mlxsw_m->base_mac,
sizeof(mlxsw_m->base_mac));
@@ -246,6 +269,15 @@ mlxsw_m_port_create(struct mlxsw_m *mlxsw_m, u16 local_port, u8 module)
mlxsw_m_port->mlxsw_m = mlxsw_m;
mlxsw_m_port->local_port = local_port;
mlxsw_m_port->module = module;
+ mlxsw_m_port->slot_index = slot_index;
+ /* Add module offset for line card. Offset for main board iz zero.
+ * For line card in slot #n offset is calculated as (#n - 1)
+ * multiplied by maximum modules number, which could be found on a line
+ * card.
+ */
+ mlxsw_m_port->module_offset = mlxsw_m_port->slot_index ?
+ (mlxsw_m_port->slot_index - 1) *
+ mlxsw_m->max_modules_per_slot : 0;
dev->netdev_ops = &mlxsw_m_port_netdev_ops;
dev->ethtool_ops = &mlxsw_m_port_ethtool_ops;
@@ -291,19 +323,29 @@ static void mlxsw_m_port_remove(struct mlxsw_m *mlxsw_m, u16 local_port)
mlxsw_core_port_fini(mlxsw_m->core, local_port);
}
+static int*
+mlxsw_m_port_mapping_get(struct mlxsw_m *mlxsw_m, u8 slot_index, u8 module)
+{
+ return &mlxsw_m->line_cards[slot_index]->module_to_port[module];
+}
+
static int mlxsw_m_port_module_map(struct mlxsw_m *mlxsw_m, u16 local_port,
u8 *last_module)
{
unsigned int max_ports = mlxsw_core_max_ports(mlxsw_m->core);
- u8 module, width;
+ u8 module, width, slot_index;
+ int *module_to_port;
int err;
/* Fill out to local port mapping array */
err = mlxsw_m_port_module_info_get(mlxsw_m, local_port, &module,
- &width);
+ &width, &slot_index);
if (err)
return err;
+ /* Skip if line card has been already configured */
+ if (mlxsw_m->line_cards[slot_index]->active)
+ return 0;
if (!width)
return 0;
/* Skip, if port belongs to the cluster */
@@ -313,91 +355,216 @@ static int mlxsw_m_port_module_map(struct mlxsw_m *mlxsw_m, u16 local_port,
if (WARN_ON_ONCE(module >= max_ports))
return -EINVAL;
- mlxsw_env_module_port_map(mlxsw_m->core, 0, module);
- mlxsw_m->module_to_port[module] = ++mlxsw_m->max_ports;
+ mlxsw_env_module_port_map(mlxsw_m->core, slot_index, module);
+ module_to_port = mlxsw_m_port_mapping_get(mlxsw_m, slot_index, module);
+ *module_to_port = local_port;
return 0;
}
-static void mlxsw_m_port_module_unmap(struct mlxsw_m *mlxsw_m, u8 module)
+static void
+mlxsw_m_port_module_unmap(struct mlxsw_m *mlxsw_m, u8 slot_index, u8 module)
{
- mlxsw_m->module_to_port[module] = -1;
- mlxsw_env_module_port_unmap(mlxsw_m->core, 0, module);
+ int *module_to_port = mlxsw_m_port_mapping_get(mlxsw_m, slot_index,
+ module);
+ *module_to_port = -1;
+ mlxsw_env_module_port_unmap(mlxsw_m->core, slot_index, module);
}
-static int mlxsw_m_ports_create(struct mlxsw_m *mlxsw_m)
+static int mlxsw_m_linecards_init(struct mlxsw_m *mlxsw_m)
{
unsigned int max_ports = mlxsw_core_max_ports(mlxsw_m->core);
- u8 last_module = max_ports;
- int i;
- int err;
+ char mgpir_pl[MLXSW_REG_MGPIR_LEN];
+ u8 num_of_modules;
+ int i, j, err;
+
+ mlxsw_reg_mgpir_pack(mgpir_pl, 0);
+ err = mlxsw_reg_query(mlxsw_m->core, MLXSW_REG(mgpir), mgpir_pl);
+ if (err)
+ return err;
+
+ mlxsw_reg_mgpir_unpack(mgpir_pl, NULL, NULL, NULL, &num_of_modules,
+ &mlxsw_m->num_of_slots);
+ /* If the system is modular, get the maximum number of modules per-slot.
+ * Otherwise, get the maximum number of modules on the main board.
+ */
+ if (mlxsw_m->num_of_slots)
+ mlxsw_m->max_modules_per_slot =
+ mlxsw_reg_mgpir_max_modules_per_slot_get(mgpir_pl);
+ else
+ mlxsw_m->max_modules_per_slot = num_of_modules;
+ /* Add slot for main board. */
+ mlxsw_m->num_of_slots += 1;
mlxsw_m->ports = kcalloc(max_ports, sizeof(*mlxsw_m->ports),
GFP_KERNEL);
if (!mlxsw_m->ports)
return -ENOMEM;
- mlxsw_m->module_to_port = kmalloc_array(max_ports, sizeof(int),
- GFP_KERNEL);
- if (!mlxsw_m->module_to_port) {
- err = -ENOMEM;
- goto err_module_to_port_alloc;
+ mlxsw_m->line_cards = kcalloc(mlxsw_m->num_of_slots,
+ sizeof(*mlxsw_m->line_cards),
+ GFP_KERNEL);
+ if (!mlxsw_m->line_cards)
+ goto err_kcalloc;
+
+ for (i = 0; i < mlxsw_m->num_of_slots; i++) {
+ mlxsw_m->line_cards[i] =
+ kzalloc(struct_size(mlxsw_m->line_cards[i],
+ module_to_port,
+ mlxsw_m->max_modules_per_slot),
+ GFP_KERNEL);
+ if (!mlxsw_m->line_cards[i])
+ goto err_kmalloc_array;
+
+ /* Invalidate the entries of module to local port mapping array. */
+ for (j = 0; j < mlxsw_m->max_modules_per_slot; j++)
+ mlxsw_m->line_cards[i]->module_to_port[j] = -1;
}
- /* Invalidate the entries of module to local port mapping array */
- for (i = 0; i < max_ports; i++)
- mlxsw_m->module_to_port[i] = -1;
+ return 0;
- /* Fill out module to local port mapping array */
- for (i = 1; i < max_ports; i++) {
- err = mlxsw_m_port_module_map(mlxsw_m, i, &last_module);
- if (err)
- goto err_module_to_port_map;
+err_kmalloc_array:
+ for (i--; i >= 0; i--)
+ kfree(mlxsw_m->line_cards[i]);
+err_kcalloc:
+ kfree(mlxsw_m->ports);
+ return err;
+}
+
+static void mlxsw_m_linecards_fini(struct mlxsw_m *mlxsw_m)
+{
+ int i = mlxsw_m->num_of_slots;
+
+ for (i--; i >= 0; i--)
+ kfree(mlxsw_m->line_cards[i]);
+ kfree(mlxsw_m->line_cards);
+ kfree(mlxsw_m->ports);
+}
+
+static void
+mlxsw_m_linecard_port_module_unmap(struct mlxsw_m *mlxsw_m, u8 slot_index)
+{
+ int i;
+
+ for (i = mlxsw_m->max_modules_per_slot - 1; i >= 0; i--) {
+ int *module_to_port;
+
+ module_to_port = mlxsw_m_port_mapping_get(mlxsw_m, slot_index, i);
+ if (*module_to_port > 0)
+ mlxsw_m_port_module_unmap(mlxsw_m, slot_index, i);
}
+}
- /* Create port objects for each valid entry */
- for (i = 0; i < mlxsw_m->max_ports; i++) {
- if (mlxsw_m->module_to_port[i] > 0) {
- err = mlxsw_m_port_create(mlxsw_m,
- mlxsw_m->module_to_port[i],
- i);
+static int
+mlxsw_m_linecard_ports_create(struct mlxsw_m *mlxsw_m, u8 slot_index)
+{
+ int *module_to_port;
+ int i, err;
+
+ for (i = 0; i < mlxsw_m->max_modules_per_slot; i++) {
+ module_to_port = mlxsw_m_port_mapping_get(mlxsw_m, slot_index, i);
+ if (*module_to_port > 0) {
+ err = mlxsw_m_port_create(mlxsw_m, *module_to_port,
+ slot_index, i);
if (err)
- goto err_module_to_port_create;
+ goto err_port_create;
+ /* Mark slot as active */
+ if (!mlxsw_m->line_cards[slot_index]->active)
+ mlxsw_m->line_cards[slot_index]->active = true;
}
}
-
return 0;
-err_module_to_port_create:
+err_port_create:
for (i--; i >= 0; i--) {
- if (mlxsw_m->module_to_port[i] > 0)
- mlxsw_m_port_remove(mlxsw_m,
- mlxsw_m->module_to_port[i]);
+ module_to_port = mlxsw_m_port_mapping_get(mlxsw_m, slot_index, i);
+ if (*module_to_port > 0 &&
+ mlxsw_m_port_created(mlxsw_m, *module_to_port)) {
+ mlxsw_m_port_remove(mlxsw_m, *module_to_port);
+ /* Mark slot as inactive */
+ if (mlxsw_m->line_cards[slot_index]->active)
+ mlxsw_m->line_cards[slot_index]->active = false;
+ }
}
- i = max_ports;
-err_module_to_port_map:
- for (i--; i > 0; i--)
- mlxsw_m_port_module_unmap(mlxsw_m, i);
- kfree(mlxsw_m->module_to_port);
-err_module_to_port_alloc:
- kfree(mlxsw_m->ports);
return err;
}
-static void mlxsw_m_ports_remove(struct mlxsw_m *mlxsw_m)
+static void
+mlxsw_m_linecard_ports_remove(struct mlxsw_m *mlxsw_m, u8 slot_index)
{
int i;
- for (i = 0; i < mlxsw_m->max_ports; i++) {
- if (mlxsw_m->module_to_port[i] > 0) {
- mlxsw_m_port_remove(mlxsw_m,
- mlxsw_m->module_to_port[i]);
- mlxsw_m_port_module_unmap(mlxsw_m, i);
+ for (i = 0; i < mlxsw_m->max_modules_per_slot; i++) {
+ int *module_to_port = mlxsw_m_port_mapping_get(mlxsw_m,
+ slot_index, i);
+
+ if (*module_to_port > 0 &&
+ mlxsw_m_port_created(mlxsw_m, *module_to_port)) {
+ mlxsw_m_port_remove(mlxsw_m, *module_to_port);
+ mlxsw_m_port_module_unmap(mlxsw_m, slot_index, i);
}
}
+}
- kfree(mlxsw_m->module_to_port);
- kfree(mlxsw_m->ports);
+static int mlxsw_m_ports_module_map(struct mlxsw_m *mlxsw_m)
+{
+ unsigned int max_ports = mlxsw_core_max_ports(mlxsw_m->core);
+ u8 last_module = max_ports;
+ int i, err;
+
+ for (i = 1; i < max_ports; i++) {
+ err = mlxsw_m_port_module_map(mlxsw_m, i, &last_module);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int mlxsw_m_ports_create(struct mlxsw_m *mlxsw_m)
+{
+ int err;
+
+ /* Fill out module to local port mapping array */
+ err = mlxsw_m_ports_module_map(mlxsw_m);
+ if (err)
+ goto err_ports_module_map;
+
+ /* Create port objects for each valid entry */
+ err = mlxsw_m_linecard_ports_create(mlxsw_m, 0);
+ if (err)
+ goto err_linecard_ports_create;
+
+ return 0;
+
+err_linecard_ports_create:
+err_ports_module_map:
+ mlxsw_m_linecard_port_module_unmap(mlxsw_m, 0);
+
+ return err;
+}
+
+static void mlxsw_m_ports_remove(struct mlxsw_m *mlxsw_m)
+{
+ mlxsw_m_linecard_ports_remove(mlxsw_m, 0);
+}
+
+static void
+mlxsw_m_ports_remove_selected(struct mlxsw_core *mlxsw_core,
+ bool (*selector)(void *priv, u16 local_port),
+ void *priv)
+{
+ struct mlxsw_m *mlxsw_m = mlxsw_core_driver_priv(mlxsw_core);
+ struct mlxsw_linecard *linecard_priv = priv;
+ struct mlxsw_m_line_card *linecard;
+
+ linecard = mlxsw_m->line_cards[linecard_priv->slot_index];
+
+ if (WARN_ON(!linecard->active))
+ return;
+
+ mlxsw_m_linecard_ports_remove(mlxsw_m, linecard_priv->slot_index);
+ linecard->active = false;
}
static int mlxsw_m_fw_rev_validate(struct mlxsw_m *mlxsw_m)
@@ -418,6 +585,60 @@ static int mlxsw_m_fw_rev_validate(struct mlxsw_m *mlxsw_m)
return -EINVAL;
}
+static void
+mlxsw_m_got_active(struct mlxsw_core *mlxsw_core, u8 slot_index, void *priv)
+{
+ struct mlxsw_m_line_card *linecard;
+ struct mlxsw_m *mlxsw_m = priv;
+ int err;
+
+ linecard = mlxsw_m->line_cards[slot_index];
+ /* Skip if line card has been already configured during init */
+ if (linecard->active)
+ return;
+
+ /* Fill out module to local port mapping array */
+ err = mlxsw_m_ports_module_map(mlxsw_m);
+ if (err)
+ goto err_ports_module_map;
+
+ /* Create port objects for each valid entry */
+ err = mlxsw_m_linecard_ports_create(mlxsw_m, slot_index);
+ if (err) {
+ dev_err(mlxsw_m->bus_info->dev, "Failed to create port for line card at slot %d\n",
+ slot_index);
+ goto err_linecard_ports_create;
+ }
+
+ linecard->active = true;
+
+ return;
+
+err_linecard_ports_create:
+err_ports_module_map:
+ mlxsw_m_linecard_port_module_unmap(mlxsw_m, slot_index);
+}
+
+static void
+mlxsw_m_got_inactive(struct mlxsw_core *mlxsw_core, u8 slot_index, void *priv)
+{
+ struct mlxsw_m_line_card *linecard;
+ struct mlxsw_m *mlxsw_m = priv;
+
+ linecard = mlxsw_m->line_cards[slot_index];
+
+ if (WARN_ON(!linecard->active))
+ return;
+
+ mlxsw_m_linecard_ports_remove(mlxsw_m, slot_index);
+ linecard->active = false;
+}
+
+static struct mlxsw_linecards_event_ops mlxsw_m_event_ops = {
+ .got_active = mlxsw_m_got_active,
+ .got_inactive = mlxsw_m_got_inactive,
+};
+
static int mlxsw_m_init(struct mlxsw_core *mlxsw_core,
const struct mlxsw_bus_info *mlxsw_bus_info,
struct netlink_ext_ack *extack)
@@ -438,13 +659,33 @@ static int mlxsw_m_init(struct mlxsw_core *mlxsw_core,
return err;
}
+ err = mlxsw_m_linecards_init(mlxsw_m);
+ if (err) {
+ dev_err(mlxsw_m->bus_info->dev, "Failed to create line cards\n");
+ return err;
+ }
+
+ err = mlxsw_linecards_event_ops_register(mlxsw_core,
+ &mlxsw_m_event_ops, mlxsw_m);
+ if (err) {
+ dev_err(mlxsw_m->bus_info->dev, "Failed to register line cards operations\n");
+ goto linecards_event_ops_register;
+ }
+
err = mlxsw_m_ports_create(mlxsw_m);
if (err) {
dev_err(mlxsw_m->bus_info->dev, "Failed to create ports\n");
- return err;
+ goto err_ports_create;
}
return 0;
+
+err_ports_create:
+ mlxsw_linecards_event_ops_unregister(mlxsw_core,
+ &mlxsw_m_event_ops, mlxsw_m);
+linecards_event_ops_register:
+ mlxsw_m_linecards_fini(mlxsw_m);
+ return err;
}
static void mlxsw_m_fini(struct mlxsw_core *mlxsw_core)
@@ -452,6 +693,9 @@ static void mlxsw_m_fini(struct mlxsw_core *mlxsw_core)
struct mlxsw_m *mlxsw_m = mlxsw_core_driver_priv(mlxsw_core);
mlxsw_m_ports_remove(mlxsw_m);
+ mlxsw_linecards_event_ops_unregister(mlxsw_core,
+ &mlxsw_m_event_ops, mlxsw_m);
+ mlxsw_m_linecards_fini(mlxsw_m);
}
static const struct mlxsw_config_profile mlxsw_m_config_profile;
@@ -461,6 +705,7 @@ static struct mlxsw_driver mlxsw_m_driver = {
.priv_size = sizeof(struct mlxsw_m),
.init = mlxsw_m_init,
.fini = mlxsw_m_fini,
+ .ports_remove_selected = mlxsw_m_ports_remove_selected,
.profile = &mlxsw_m_config_profile,
};
diff --git a/drivers/net/ethernet/micrel/ks8851_spi.c b/drivers/net/ethernet/micrel/ks8851_spi.c
index 82d55fc27edc..70bc7253454f 100644
--- a/drivers/net/ethernet/micrel/ks8851_spi.c
+++ b/drivers/net/ethernet/micrel/ks8851_spi.c
@@ -413,7 +413,8 @@ static int ks8851_probe_spi(struct spi_device *spi)
spi->bits_per_word = 8;
- ks = netdev_priv(netdev);
+ kss = netdev_priv(netdev);
+ ks = &kss->ks8851;
ks->lock = ks8851_lock_spi;
ks->unlock = ks8851_unlock_spi;
@@ -433,8 +434,6 @@ static int ks8851_probe_spi(struct spi_device *spi)
IRQ_RXPSI) /* RX process stop */
ks->rc_ier = STD_IRQ;
- kss = to_ks8851_spi(ks);
-
kss->spidev = spi;
mutex_init(&kss->lock);
INIT_WORK(&kss->tx_work, ks8851_tx_work);
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 306026e6aa11..dddaffdaad9a 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -2064,6 +2064,16 @@ static int ocelot_bond_get_id(struct ocelot *ocelot, struct net_device *bond)
return __ffs(bond_mask);
}
+/* Returns the mask of user ports assigned to this DSA tag_8021q CPU port.
+ * Note that when CPU ports are in a LAG, the user ports are assigned to the
+ * 'primary' CPU port, the one whose physical port number gives the logical
+ * port number of the LAG.
+ *
+ * We leave PGID_SRC poorly configured for the 'secondary' CPU port in the LAG
+ * (to which no user port is assigned), but it appears that forwarding from
+ * this secondary CPU port looks at the PGID_SRC associated with the logical
+ * port ID that it's assigned to, which *is* configured properly.
+ */
static u32 ocelot_dsa_8021q_cpu_assigned_ports(struct ocelot *ocelot,
struct ocelot_port *cpu)
{
@@ -2080,9 +2090,15 @@ static u32 ocelot_dsa_8021q_cpu_assigned_ports(struct ocelot *ocelot,
mask |= BIT(port);
}
+ if (cpu->bond)
+ mask &= ~ocelot_get_bond_mask(ocelot, cpu->bond);
+
return mask;
}
+/* Returns the DSA tag_8021q CPU port that the given port is assigned to,
+ * or the bit mask of CPU ports if said CPU port is in a LAG.
+ */
u32 ocelot_port_assigned_dsa_8021q_cpu_mask(struct ocelot *ocelot, int port)
{
struct ocelot_port *ocelot_port = ocelot->ports[port];
@@ -2091,6 +2107,9 @@ u32 ocelot_port_assigned_dsa_8021q_cpu_mask(struct ocelot *ocelot, int port)
if (!cpu_port)
return 0;
+ if (cpu_port->bond)
+ return ocelot_get_bond_mask(ocelot, cpu_port->bond);
+
return BIT(cpu_port->index);
}
EXPORT_SYMBOL_GPL(ocelot_port_assigned_dsa_8021q_cpu_mask);
@@ -2214,61 +2233,61 @@ static void ocelot_update_pgid_cpu(struct ocelot *ocelot)
ocelot_write_rix(ocelot, pgid_cpu, ANA_PGID_PGID, PGID_CPU);
}
-void ocelot_port_assign_dsa_8021q_cpu(struct ocelot *ocelot, int port,
- int cpu)
+void ocelot_port_setup_dsa_8021q_cpu(struct ocelot *ocelot, int cpu)
{
struct ocelot_port *cpu_port = ocelot->ports[cpu];
u16 vid;
mutex_lock(&ocelot->fwd_domain_lock);
- ocelot->ports[port]->dsa_8021q_cpu = cpu_port;
-
- if (!cpu_port->is_dsa_8021q_cpu) {
- cpu_port->is_dsa_8021q_cpu = true;
-
- for (vid = OCELOT_RSV_VLAN_RANGE_START; vid < VLAN_N_VID; vid++)
- ocelot_vlan_member_add(ocelot, cpu, vid, true);
+ cpu_port->is_dsa_8021q_cpu = true;
- ocelot_update_pgid_cpu(ocelot);
- }
+ for (vid = OCELOT_RSV_VLAN_RANGE_START; vid < VLAN_N_VID; vid++)
+ ocelot_vlan_member_add(ocelot, cpu, vid, true);
- ocelot_apply_bridge_fwd_mask(ocelot, true);
+ ocelot_update_pgid_cpu(ocelot);
mutex_unlock(&ocelot->fwd_domain_lock);
}
-EXPORT_SYMBOL_GPL(ocelot_port_assign_dsa_8021q_cpu);
+EXPORT_SYMBOL_GPL(ocelot_port_setup_dsa_8021q_cpu);
-void ocelot_port_unassign_dsa_8021q_cpu(struct ocelot *ocelot, int port)
+void ocelot_port_teardown_dsa_8021q_cpu(struct ocelot *ocelot, int cpu)
{
- struct ocelot_port *cpu_port = ocelot->ports[port]->dsa_8021q_cpu;
- bool keep = false;
+ struct ocelot_port *cpu_port = ocelot->ports[cpu];
u16 vid;
- int p;
mutex_lock(&ocelot->fwd_domain_lock);
- ocelot->ports[port]->dsa_8021q_cpu = NULL;
+ cpu_port->is_dsa_8021q_cpu = false;
- for (p = 0; p < ocelot->num_phys_ports; p++) {
- if (!ocelot->ports[p])
- continue;
+ for (vid = OCELOT_RSV_VLAN_RANGE_START; vid < VLAN_N_VID; vid++)
+ ocelot_vlan_member_del(ocelot, cpu_port->index, vid);
- if (ocelot->ports[p]->dsa_8021q_cpu == cpu_port) {
- keep = true;
- break;
- }
- }
+ ocelot_update_pgid_cpu(ocelot);
- if (!keep) {
- cpu_port->is_dsa_8021q_cpu = false;
+ mutex_unlock(&ocelot->fwd_domain_lock);
+}
+EXPORT_SYMBOL_GPL(ocelot_port_teardown_dsa_8021q_cpu);
+
+void ocelot_port_assign_dsa_8021q_cpu(struct ocelot *ocelot, int port,
+ int cpu)
+{
+ struct ocelot_port *cpu_port = ocelot->ports[cpu];
- for (vid = OCELOT_RSV_VLAN_RANGE_START; vid < VLAN_N_VID; vid++)
- ocelot_vlan_member_del(ocelot, cpu_port->index, vid);
+ mutex_lock(&ocelot->fwd_domain_lock);
- ocelot_update_pgid_cpu(ocelot);
- }
+ ocelot->ports[port]->dsa_8021q_cpu = cpu_port;
+ ocelot_apply_bridge_fwd_mask(ocelot, true);
+
+ mutex_unlock(&ocelot->fwd_domain_lock);
+}
+EXPORT_SYMBOL_GPL(ocelot_port_assign_dsa_8021q_cpu);
+void ocelot_port_unassign_dsa_8021q_cpu(struct ocelot *ocelot, int port)
+{
+ mutex_lock(&ocelot->fwd_domain_lock);
+
+ ocelot->ports[port]->dsa_8021q_cpu = NULL;
ocelot_apply_bridge_fwd_mask(ocelot, true);
mutex_unlock(&ocelot->fwd_domain_lock);
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index f4a6b590a1e3..7ef5d8208a4e 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -74,6 +74,9 @@
#define AM65_CPSW_PORTN_REG_TS_VLAN_LTYPE_REG 0x318
#define AM65_CPSW_PORTN_REG_TS_CTL_LTYPE2 0x31C
+#define AM65_CPSW_SGMII_CONTROL_REG 0x010
+#define AM65_CPSW_SGMII_CONTROL_MR_AN_ENABLE BIT(0)
+
#define AM65_CPSW_CTL_VLAN_AWARE BIT(1)
#define AM65_CPSW_CTL_P0_ENABLE BIT(2)
#define AM65_CPSW_CTL_P0_TX_CRC_REMOVE BIT(13)
@@ -590,11 +593,6 @@ static int am65_cpsw_nuss_ndo_slave_open(struct net_device *ndev)
/* mac_sl should be configured via phy-link interface */
am65_cpsw_sl_ctl_reset(port);
- ret = phy_set_mode_ext(port->slave.ifphy, PHY_MODE_ETHERNET,
- port->slave.phy_if);
- if (ret)
- goto error_cleanup;
-
ret = phylink_of_phy_connect(port->slave.phylink, port->slave.phy_node, 0);
if (ret)
goto error_cleanup;
@@ -1409,7 +1407,14 @@ static const struct net_device_ops am65_cpsw_nuss_netdev_ops = {
static void am65_cpsw_nuss_mac_config(struct phylink_config *config, unsigned int mode,
const struct phylink_link_state *state)
{
- /* Currently not used */
+ struct am65_cpsw_slave_data *slave = container_of(config, struct am65_cpsw_slave_data,
+ phylink_config);
+ struct am65_cpsw_port *port = container_of(slave, struct am65_cpsw_port, slave);
+ struct am65_cpsw_common *common = port->common;
+
+ if (common->pdata.extra_modes & BIT(state->interface))
+ writel(AM65_CPSW_SGMII_CONTROL_MR_AN_ENABLE,
+ port->sgmii_base + AM65_CPSW_SGMII_CONTROL_REG);
}
static void am65_cpsw_nuss_mac_link_down(struct phylink_config *config, unsigned int mode,
@@ -1847,6 +1852,8 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common)
port->common = common;
port->port_base = common->cpsw_base + AM65_CPSW_NU_PORTS_BASE +
AM65_CPSW_NU_PORTS_OFFSET * (port_id);
+ if (common->pdata.extra_modes)
+ port->sgmii_base = common->ss_base + AM65_CPSW_SGMII_BASE * (port_id);
port->stat_base = common->cpsw_base + AM65_CPSW_NU_STATS_BASE +
(AM65_CPSW_NU_STATS_PORT_OFFSET * port_id);
port->name = of_get_property(port_np, "label", NULL);
@@ -1886,6 +1893,10 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common)
goto of_node_put;
}
+ ret = phy_set_mode_ext(port->slave.ifphy, PHY_MODE_ETHERNET, port->slave.phy_if);
+ if (ret)
+ goto of_node_put;
+
ret = of_get_mac_address(port_np, port->slave.mac_addr);
if (ret) {
am65_cpsw_am654_get_efuse_macid(port_np,
@@ -1981,7 +1992,18 @@ am65_cpsw_nuss_init_port_ndev(struct am65_cpsw_common *common, u32 port_idx)
port->slave.phylink_config.type = PHYLINK_NETDEV;
port->slave.phylink_config.mac_capabilities = MAC_SYM_PAUSE | MAC_10 | MAC_100 | MAC_1000FD;
- phy_interface_set_rgmii(port->slave.phylink_config.supported_interfaces);
+ if (phy_interface_mode_is_rgmii(port->slave.phy_if)) {
+ phy_interface_set_rgmii(port->slave.phylink_config.supported_interfaces);
+ } else if (port->slave.phy_if == PHY_INTERFACE_MODE_RMII) {
+ __set_bit(PHY_INTERFACE_MODE_RMII,
+ port->slave.phylink_config.supported_interfaces);
+ } else if (common->pdata.extra_modes & BIT(port->slave.phy_if)) {
+ __set_bit(PHY_INTERFACE_MODE_QSGMII,
+ port->slave.phylink_config.supported_interfaces);
+ } else {
+ dev_err(dev, "selected phy-mode is not supported\n");
+ return -EOPNOTSUPP;
+ }
phylink = phylink_create(&port->slave.phylink_config,
of_node_to_fwnode(port->slave.phy_node),
@@ -2611,10 +2633,18 @@ static const struct am65_cpsw_pdata am64x_cpswxg_pdata = {
.fdqring_mode = K3_RINGACC_RING_MODE_RING,
};
+static const struct am65_cpsw_pdata j7200_cpswxg_pdata = {
+ .quirks = 0,
+ .ale_dev_id = "am64-cpswxg",
+ .fdqring_mode = K3_RINGACC_RING_MODE_RING,
+ .extra_modes = BIT(PHY_INTERFACE_MODE_QSGMII),
+};
+
static const struct of_device_id am65_cpsw_nuss_of_mtable[] = {
{ .compatible = "ti,am654-cpsw-nuss", .data = &am65x_sr1_0},
{ .compatible = "ti,j721e-cpsw-nuss", .data = &j721e_pdata},
{ .compatible = "ti,am642-cpsw-nuss", .data = &am64x_cpswxg_pdata},
+ { .compatible = "ti,j7200-cpswxg-nuss", .data = &j7200_cpswxg_pdata},
{ /* sentinel */ },
};
MODULE_DEVICE_TABLE(of, am65_cpsw_nuss_of_mtable);
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.h b/drivers/net/ethernet/ti/am65-cpsw-nuss.h
index ac945631bf2f..2c9850fdfcb6 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.h
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.h
@@ -46,6 +46,7 @@ struct am65_cpsw_port {
const char *name;
u32 port_id;
void __iomem *port_base;
+ void __iomem *sgmii_base;
void __iomem *stat_base;
void __iomem *fetch_ram_base;
bool disabled;
@@ -88,6 +89,7 @@ struct am65_cpsw_rx_chn {
struct am65_cpsw_pdata {
u32 quirks;
+ u64 extra_modes;
enum k3_ring_mode fdqring_mode;
const char *ale_dev_id;
};
diff --git a/drivers/net/phy/nxp-tja11xx.c b/drivers/net/phy/nxp-tja11xx.c
index 2a8195c50d14..ec91e671f8aa 100644
--- a/drivers/net/phy/nxp-tja11xx.c
+++ b/drivers/net/phy/nxp-tja11xx.c
@@ -10,6 +10,7 @@
#include <linux/mdio.h>
#include <linux/mii.h>
#include <linux/module.h>
+#include <linux/of.h>
#include <linux/phy.h>
#include <linux/hwmon.h>
#include <linux/bitfield.h>
@@ -34,6 +35,11 @@
#define MII_CFG1 18
#define MII_CFG1_MASTER_SLAVE BIT(15)
#define MII_CFG1_AUTO_OP BIT(14)
+#define MII_CFG1_INTERFACE_MODE_MASK GENMASK(9, 8)
+#define MII_CFG1_MII_MODE (0x0 << 8)
+#define MII_CFG1_RMII_MODE_REFCLK_IN BIT(8)
+#define MII_CFG1_RMII_MODE_REFCLK_OUT BIT(9)
+#define MII_CFG1_REVMII_MODE GENMASK(9, 8)
#define MII_CFG1_SLEEP_CONFIRM BIT(6)
#define MII_CFG1_LED_MODE_MASK GENMASK(5, 4)
#define MII_CFG1_LED_MODE_LINKUP 0
@@ -72,11 +78,15 @@
#define MII_COMMCFG 27
#define MII_COMMCFG_AUTO_OP BIT(15)
+/* Configure REF_CLK as input in RMII mode */
+#define TJA110X_RMII_MODE_REFCLK_IN BIT(0)
+
struct tja11xx_priv {
char *hwmon_name;
struct device *hwmon_dev;
struct phy_device *phydev;
struct work_struct phy_register_work;
+ u32 flags;
};
struct tja11xx_phy_stats {
@@ -251,8 +261,34 @@ do_test:
return __genphy_config_aneg(phydev, changed);
}
+static int tja11xx_get_interface_mode(struct phy_device *phydev)
+{
+ struct tja11xx_priv *priv = phydev->priv;
+ int mii_mode;
+
+ switch (phydev->interface) {
+ case PHY_INTERFACE_MODE_MII:
+ mii_mode = MII_CFG1_MII_MODE;
+ break;
+ case PHY_INTERFACE_MODE_REVMII:
+ mii_mode = MII_CFG1_REVMII_MODE;
+ break;
+ case PHY_INTERFACE_MODE_RMII:
+ if (priv->flags & TJA110X_RMII_MODE_REFCLK_IN)
+ mii_mode = MII_CFG1_RMII_MODE_REFCLK_IN;
+ else
+ mii_mode = MII_CFG1_RMII_MODE_REFCLK_OUT;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return mii_mode;
+}
+
static int tja11xx_config_init(struct phy_device *phydev)
{
+ u16 reg_mask, reg_val;
int ret;
ret = tja11xx_enable_reg_write(phydev);
@@ -265,15 +301,32 @@ static int tja11xx_config_init(struct phy_device *phydev)
switch (phydev->phy_id & PHY_ID_MASK) {
case PHY_ID_TJA1100:
- ret = phy_modify(phydev, MII_CFG1,
- MII_CFG1_AUTO_OP | MII_CFG1_LED_MODE_MASK |
- MII_CFG1_LED_ENABLE,
- MII_CFG1_AUTO_OP | MII_CFG1_LED_MODE_LINKUP |
- MII_CFG1_LED_ENABLE);
+ reg_mask = MII_CFG1_AUTO_OP | MII_CFG1_LED_MODE_MASK |
+ MII_CFG1_LED_ENABLE;
+ reg_val = MII_CFG1_AUTO_OP | MII_CFG1_LED_MODE_LINKUP |
+ MII_CFG1_LED_ENABLE;
+
+ reg_mask |= MII_CFG1_INTERFACE_MODE_MASK;
+ ret = tja11xx_get_interface_mode(phydev);
+ if (ret < 0)
+ return ret;
+
+ reg_val |= (ret & 0xffff);
+ ret = phy_modify(phydev, MII_CFG1, reg_mask, reg_val);
if (ret)
return ret;
break;
case PHY_ID_TJA1101:
+ reg_mask = MII_CFG1_INTERFACE_MODE_MASK;
+ ret = tja11xx_get_interface_mode(phydev);
+ if (ret < 0)
+ return ret;
+
+ reg_val = ret & 0xffff;
+ ret = phy_modify(phydev, MII_CFG1, reg_mask, reg_val);
+ if (ret)
+ return ret;
+ fallthrough;
case PHY_ID_TJA1102:
ret = phy_set_bits(phydev, MII_COMMCFG, MII_COMMCFG_AUTO_OP);
if (ret)
@@ -458,16 +511,36 @@ static int tja11xx_hwmon_register(struct phy_device *phydev,
return PTR_ERR_OR_ZERO(priv->hwmon_dev);
}
+static int tja11xx_parse_dt(struct phy_device *phydev)
+{
+ struct device_node *node = phydev->mdio.dev.of_node;
+ struct tja11xx_priv *priv = phydev->priv;
+
+ if (!IS_ENABLED(CONFIG_OF_MDIO))
+ return 0;
+
+ if (of_property_read_bool(node, "nxp,rmii-refclk-in"))
+ priv->flags |= TJA110X_RMII_MODE_REFCLK_IN;
+
+ return 0;
+}
+
static int tja11xx_probe(struct phy_device *phydev)
{
struct device *dev = &phydev->mdio.dev;
struct tja11xx_priv *priv;
+ int ret;
priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
if (!priv)
return -ENOMEM;
priv->phydev = phydev;
+ phydev->priv = priv;
+
+ ret = tja11xx_parse_dt(phydev);
+ if (ret)
+ return ret;
return tja11xx_hwmon_register(phydev, priv);
}
diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h
index 445e80517cab..fc93c9488c76 100644
--- a/include/linux/compiler_attributes.h
+++ b/include/linux/compiler_attributes.h
@@ -371,4 +371,11 @@
*/
#define __weak __attribute__((__weak__))
+/*
+ * Used by functions that use '__builtin_return_address'. These function
+ * don't want to be splited or made inline, which can make
+ * the '__builtin_return_address' get unexpected address.
+ */
+#define __fix_address noinline __noclone
+
#endif /* __LINUX_COMPILER_ATTRIBUTES_H */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index ca8afa382bf2..b51d07a727c9 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1195,7 +1195,8 @@ static inline bool skb_unref(struct sk_buff *skb)
return true;
}
-void kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason);
+void __fix_address
+kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason);
/**
* kfree_skb - free an sk_buff with 'NOT_SPECIFIED' reason
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index 1c53c4c4d88f..568a87c5e0d0 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -78,6 +78,7 @@ struct vsock_sock {
s64 vsock_stream_has_data(struct vsock_sock *vsk);
s64 vsock_stream_has_space(struct vsock_sock *vsk);
struct sock *vsock_create_connected(struct sock *parent);
+void vsock_data_ready(struct sock *sk);
/**** TRANSPORT ****/
@@ -135,6 +136,7 @@ struct vsock_transport {
u64 (*stream_rcvhiwat)(struct vsock_sock *);
bool (*stream_is_active)(struct vsock_sock *);
bool (*stream_allow)(u32 cid, u32 port);
+ int (*set_rcvlowat)(struct vsock_sock *vsk, int val);
/* SEQ_PACKET. */
ssize_t (*seqpacket_dequeue)(struct vsock_sock *vsk, struct msghdr *msg,
diff --git a/include/net/dsa.h b/include/net/dsa.h
index b902b31bebce..f2ce12860546 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -559,6 +559,10 @@ static inline bool dsa_is_user_port(struct dsa_switch *ds, int p)
list_for_each_entry((_dp), &(_dst)->ports, list) \
if (dsa_port_is_user((_dp)))
+#define dsa_tree_for_each_cpu_port(_dp, _dst) \
+ list_for_each_entry((_dp), &(_dst)->ports, list) \
+ if (dsa_port_is_cpu((_dp)))
+
#define dsa_switch_for_each_port(_dp, _ds) \
list_for_each_entry((_dp), &(_ds)->dst->ports, list) \
if ((_dp)->ds == (_ds))
diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 2edea901bbd5..2a7e18ee5577 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -1024,6 +1024,8 @@ void ocelot_deinit(struct ocelot *ocelot);
void ocelot_init_port(struct ocelot *ocelot, int port);
void ocelot_deinit_port(struct ocelot *ocelot, int port);
+void ocelot_port_setup_dsa_8021q_cpu(struct ocelot *ocelot, int cpu);
+void ocelot_port_teardown_dsa_8021q_cpu(struct ocelot *ocelot, int cpu);
void ocelot_port_assign_dsa_8021q_cpu(struct ocelot *ocelot, int port, int cpu);
void ocelot_port_unassign_dsa_8021q_cpu(struct ocelot *ocelot, int port);
u32 ocelot_port_assigned_dsa_8021q_cpu_mask(struct ocelot *ocelot, int port);
diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
index 1e543cf0568c..e0ab261ceca2 100644
--- a/include/uapi/linux/netlink.h
+++ b/include/uapi/linux/netlink.h
@@ -41,12 +41,20 @@ struct sockaddr_nl {
__u32 nl_groups; /* multicast groups mask */
};
+/**
+ * struct nlmsghdr - fixed format metadata header of Netlink messages
+ * @nlmsg_len: Length of message including header
+ * @nlmsg_type: Message content type
+ * @nlmsg_flags: Additional flags
+ * @nlmsg_seq: Sequence number
+ * @nlmsg_pid: Sending process port ID
+ */
struct nlmsghdr {
- __u32 nlmsg_len; /* Length of message including header */
- __u16 nlmsg_type; /* Message content */
- __u16 nlmsg_flags; /* Additional flags */
- __u32 nlmsg_seq; /* Sequence number */
- __u32 nlmsg_pid; /* Sending process port ID */
+ __u32 nlmsg_len;
+ __u16 nlmsg_type;
+ __u16 nlmsg_flags;
+ __u32 nlmsg_seq;
+ __u32 nlmsg_pid;
};
/* Flags values */
@@ -337,6 +345,9 @@ enum netlink_attribute_type {
* bitfield32 type (U32)
* @NL_POLICY_TYPE_ATTR_MASK: mask of valid bits for unsigned integers (U64)
* @NL_POLICY_TYPE_ATTR_PAD: pad attribute for 64-bit alignment
+ *
+ * @__NL_POLICY_TYPE_ATTR_MAX: number of attributes
+ * @NL_POLICY_TYPE_ATTR_MAX: highest attribute number
*/
enum netlink_policy_type_attr {
NL_POLICY_TYPE_ATTR_UNSPEC,
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index a84a7cfb9d6d..efbd93e92ce2 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -568,26 +568,6 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
!is_valid_ether_addr(dev->dev_addr))
return -EINVAL;
- /* Also don't allow bridging of net devices that are DSA masters, since
- * the bridge layer rx_handler prevents the DSA fake ethertype handler
- * to be invoked, so we don't get the chance to strip off and parse the
- * DSA switch tag protocol header (the bridge layer just returns
- * RX_HANDLER_CONSUMED, stopping RX processing for these frames).
- * The only case where that would not be an issue is when bridging can
- * already be offloaded, such as when the DSA master is itself a DSA
- * or plain switchdev port, and is bridged only with other ports from
- * the same hardware device.
- */
- if (netdev_uses_dsa(dev)) {
- list_for_each_entry(p, &br->port_list, list) {
- if (!netdev_port_same_parent_id(dev, p->dev)) {
- NL_SET_ERR_MSG(extack,
- "Cannot do software bridging with a DSA master");
- return -EINVAL;
- }
- }
- }
-
/* No bridging of bridges */
if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit) {
NL_SET_ERR_MSG(extack,
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 974bbbbe7138..35d9d5958dc6 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -777,9 +777,10 @@ EXPORT_SYMBOL(__kfree_skb);
* hit zero. Meanwhile, pass the drop reason to 'kfree_skb'
* tracepoint.
*/
-void kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason)
+void __fix_address
+kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason)
{
- if (!skb_unref(skb))
+ if (unlikely(!skb_unref(skb)))
return;
DEBUG_NET_WARN_ON_ONCE(reason <= 0 || reason >= SKB_DROP_REASON_MAX);
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 055a6d1d4372..ed56c7a554b8 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -1060,26 +1060,24 @@ static int dsa_tree_setup_switches(struct dsa_switch_tree *dst)
static int dsa_tree_setup_master(struct dsa_switch_tree *dst)
{
- struct dsa_port *dp;
+ struct dsa_port *cpu_dp;
int err = 0;
rtnl_lock();
- list_for_each_entry(dp, &dst->ports, list) {
- if (dsa_port_is_cpu(dp)) {
- struct net_device *master = dp->master;
- bool admin_up = (master->flags & IFF_UP) &&
- !qdisc_tx_is_noop(master);
+ dsa_tree_for_each_cpu_port(cpu_dp, dst) {
+ struct net_device *master = cpu_dp->master;
+ bool admin_up = (master->flags & IFF_UP) &&
+ !qdisc_tx_is_noop(master);
- err = dsa_master_setup(master, dp);
- if (err)
- break;
+ err = dsa_master_setup(master, cpu_dp);
+ if (err)
+ break;
- /* Replay master state event */
- dsa_tree_master_admin_state_change(dst, master, admin_up);
- dsa_tree_master_oper_state_change(dst, master,
- netif_oper_up(master));
- }
+ /* Replay master state event */
+ dsa_tree_master_admin_state_change(dst, master, admin_up);
+ dsa_tree_master_oper_state_change(dst, master,
+ netif_oper_up(master));
}
rtnl_unlock();
@@ -1089,22 +1087,20 @@ static int dsa_tree_setup_master(struct dsa_switch_tree *dst)
static void dsa_tree_teardown_master(struct dsa_switch_tree *dst)
{
- struct dsa_port *dp;
+ struct dsa_port *cpu_dp;
rtnl_lock();
- list_for_each_entry(dp, &dst->ports, list) {
- if (dsa_port_is_cpu(dp)) {
- struct net_device *master = dp->master;
+ dsa_tree_for_each_cpu_port(cpu_dp, dst) {
+ struct net_device *master = cpu_dp->master;
- /* Synthesizing an "admin down" state is sufficient for
- * the switches to get a notification if the master is
- * currently up and running.
- */
- dsa_tree_master_admin_state_change(dst, master, false);
+ /* Synthesizing an "admin down" state is sufficient for
+ * the switches to get a notification if the master is
+ * currently up and running.
+ */
+ dsa_tree_master_admin_state_change(dst, master, false);
- dsa_master_teardown(master);
- }
+ dsa_master_teardown(master);
}
rtnl_unlock();
@@ -1252,7 +1248,6 @@ out_disconnect:
* they would have formed disjoint trees (different "dsa,member" values).
*/
int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst,
- struct net_device *master,
const struct dsa_device_ops *tag_ops,
const struct dsa_device_ops *old_tag_ops)
{
@@ -1268,14 +1263,11 @@ int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst,
* attempts to change the tagging protocol. If we ever lift the IFF_UP
* restriction, there needs to be another mutex which serializes this.
*/
- if (master->flags & IFF_UP)
- goto out_unlock;
-
list_for_each_entry(dp, &dst->ports, list) {
- if (!dsa_port_is_user(dp))
- continue;
+ if (dsa_port_is_cpu(dp) && (dp->master->flags & IFF_UP))
+ goto out_unlock;
- if (dp->slave->flags & IFF_UP)
+ if (dsa_port_is_user(dp) && (dp->slave->flags & IFF_UP))
goto out_unlock;
}
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 8924366467e0..614fbba8fe39 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -545,7 +545,6 @@ struct dsa_lag *dsa_tree_lag_find(struct dsa_switch_tree *dst,
int dsa_tree_notify(struct dsa_switch_tree *dst, unsigned long e, void *v);
int dsa_broadcast(unsigned long e, void *v);
int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst,
- struct net_device *master,
const struct dsa_device_ops *tag_ops,
const struct dsa_device_ops *old_tag_ops);
void dsa_tree_master_admin_state_change(struct dsa_switch_tree *dst,
diff --git a/net/dsa/master.c b/net/dsa/master.c
index 9e5dabc751d9..fb810edc8281 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -307,7 +307,7 @@ static ssize_t tagging_store(struct device *d, struct device_attribute *attr,
*/
goto out;
- err = dsa_tree_change_tag_proto(cpu_dp->ds->dst, dev, new_tag_ops,
+ err = dsa_tree_change_tag_proto(cpu_dp->ds->dst, new_tag_ops,
old_tag_ops);
if (err) {
/* On failure the old tagger is restored, so we don't need the
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index ce12c3427bad..b24f1131af90 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -2476,6 +2476,9 @@ static int dsa_slave_changeupper(struct net_device *dev,
struct netlink_ext_ack *extack;
int err = NOTIFY_DONE;
+ if (!dsa_slave_dev_check(dev))
+ return err;
+
extack = netdev_notifier_info_to_extack(&info->info);
if (netif_is_bridge_master(info->upper_dev)) {
@@ -2531,6 +2534,9 @@ static int dsa_slave_prechangeupper(struct net_device *dev,
{
struct dsa_port *dp = dsa_slave_to_port(dev);
+ if (!dsa_slave_dev_check(dev))
+ return NOTIFY_DONE;
+
if (netif_is_bridge_master(info->upper_dev) && !info->linking)
dsa_port_pre_bridge_leave(dp, info->upper_dev);
else if (netif_is_lag_master(info->upper_dev) && !info->linking)
@@ -2551,6 +2557,9 @@ dsa_slave_lag_changeupper(struct net_device *dev,
int err = NOTIFY_DONE;
struct dsa_port *dp;
+ if (!netif_is_lag_master(dev))
+ return err;
+
netdev_for_each_lower_dev(dev, lower, iter) {
if (!dsa_slave_dev_check(lower))
continue;
@@ -2580,6 +2589,9 @@ dsa_slave_lag_prechangeupper(struct net_device *dev,
int err = NOTIFY_DONE;
struct dsa_port *dp;
+ if (!netif_is_lag_master(dev))
+ return err;
+
netdev_for_each_lower_dev(dev, lower, iter) {
if (!dsa_slave_dev_check(lower))
continue;
@@ -2687,6 +2699,75 @@ dsa_slave_prechangeupper_sanity_check(struct net_device *dev,
return NOTIFY_DONE;
}
+static int
+dsa_master_prechangeupper_sanity_check(struct net_device *master,
+ struct netdev_notifier_changeupper_info *info)
+{
+ struct netlink_ext_ack *extack;
+
+ if (!netdev_uses_dsa(master))
+ return NOTIFY_DONE;
+
+ if (!info->linking)
+ return NOTIFY_DONE;
+
+ /* Allow DSA switch uppers */
+ if (dsa_slave_dev_check(info->upper_dev))
+ return NOTIFY_DONE;
+
+ /* Allow bridge uppers of DSA masters, subject to further
+ * restrictions in dsa_bridge_prechangelower_sanity_check()
+ */
+ if (netif_is_bridge_master(info->upper_dev))
+ return NOTIFY_DONE;
+
+ extack = netdev_notifier_info_to_extack(&info->info);
+
+ NL_SET_ERR_MSG_MOD(extack,
+ "DSA master cannot join unknown upper interfaces");
+ return notifier_from_errno(-EBUSY);
+}
+
+/* Don't allow bridging of DSA masters, since the bridge layer rx_handler
+ * prevents the DSA fake ethertype handler to be invoked, so we don't get the
+ * chance to strip off and parse the DSA switch tag protocol header (the bridge
+ * layer just returns RX_HANDLER_CONSUMED, stopping RX processing for these
+ * frames).
+ * The only case where that would not be an issue is when bridging can already
+ * be offloaded, such as when the DSA master is itself a DSA or plain switchdev
+ * port, and is bridged only with other ports from the same hardware device.
+ */
+static int
+dsa_bridge_prechangelower_sanity_check(struct net_device *new_lower,
+ struct netdev_notifier_changeupper_info *info)
+{
+ struct net_device *br = info->upper_dev;
+ struct netlink_ext_ack *extack;
+ struct net_device *lower;
+ struct list_head *iter;
+
+ if (!netif_is_bridge_master(br))
+ return NOTIFY_DONE;
+
+ if (!info->linking)
+ return NOTIFY_DONE;
+
+ extack = netdev_notifier_info_to_extack(&info->info);
+
+ netdev_for_each_lower_dev(br, lower, iter) {
+ if (!netdev_uses_dsa(new_lower) && !netdev_uses_dsa(lower))
+ continue;
+
+ if (!netdev_port_same_parent_id(lower, new_lower)) {
+ NL_SET_ERR_MSG(extack,
+ "Cannot do software bridging with a DSA master");
+ return notifier_from_errno(-EINVAL);
+ }
+ }
+
+ return NOTIFY_DONE;
+}
+
static int dsa_slave_netdevice_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
@@ -2698,25 +2779,40 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb,
int err;
err = dsa_slave_prechangeupper_sanity_check(dev, info);
- if (err != NOTIFY_DONE)
+ if (notifier_to_errno(err))
+ return err;
+
+ err = dsa_master_prechangeupper_sanity_check(dev, info);
+ if (notifier_to_errno(err))
return err;
- if (dsa_slave_dev_check(dev))
- return dsa_slave_prechangeupper(dev, ptr);
+ err = dsa_bridge_prechangelower_sanity_check(dev, info);
+ if (notifier_to_errno(err))
+ return err;
- if (netif_is_lag_master(dev))
- return dsa_slave_lag_prechangeupper(dev, ptr);
+ err = dsa_slave_prechangeupper(dev, ptr);
+ if (notifier_to_errno(err))
+ return err;
+
+ err = dsa_slave_lag_prechangeupper(dev, ptr);
+ if (notifier_to_errno(err))
+ return err;
break;
}
- case NETDEV_CHANGEUPPER:
- if (dsa_slave_dev_check(dev))
- return dsa_slave_changeupper(dev, ptr);
+ case NETDEV_CHANGEUPPER: {
+ int err;
- if (netif_is_lag_master(dev))
- return dsa_slave_lag_changeupper(dev, ptr);
+ err = dsa_slave_changeupper(dev, ptr);
+ if (notifier_to_errno(err))
+ return err;
+
+ err = dsa_slave_lag_changeupper(dev, ptr);
+ if (notifier_to_errno(err))
+ return err;
break;
+ }
case NETDEV_CHANGELOWERSTATE: {
struct netdev_notifier_changelowerstate_info *info = ptr;
struct dsa_port *dp;
@@ -2777,6 +2873,9 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb,
if (!dsa_port_is_user(dp))
continue;
+ if (dp->cpu_dp != cpu_dp)
+ continue;
+
list_add(&dp->slave->close_list, &close_list);
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index bbe218753662..ba62f8e8bd15 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -4433,12 +4433,16 @@ static void __tcp_alloc_md5sig_pool(void)
* to memory. See smp_rmb() in tcp_get_md5sig_pool()
*/
smp_wmb();
- tcp_md5sig_pool_populated = true;
+ /* Paired with READ_ONCE() from tcp_alloc_md5sig_pool()
+ * and tcp_get_md5sig_pool().
+ */
+ WRITE_ONCE(tcp_md5sig_pool_populated, true);
}
bool tcp_alloc_md5sig_pool(void)
{
- if (unlikely(!tcp_md5sig_pool_populated)) {
+ /* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */
+ if (unlikely(!READ_ONCE(tcp_md5sig_pool_populated))) {
mutex_lock(&tcp_md5sig_mutex);
if (!tcp_md5sig_pool_populated) {
@@ -4449,7 +4453,8 @@ bool tcp_alloc_md5sig_pool(void)
mutex_unlock(&tcp_md5sig_mutex);
}
- return tcp_md5sig_pool_populated;
+ /* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */
+ return READ_ONCE(tcp_md5sig_pool_populated);
}
EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
@@ -4465,7 +4470,8 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
{
local_bh_disable();
- if (tcp_md5sig_pool_populated) {
+ /* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */
+ if (READ_ONCE(tcp_md5sig_pool_populated)) {
/* coupled with smp_wmb() in __tcp_alloc_md5sig_pool() */
smp_rmb();
return this_cpu_ptr(&tcp_md5sig_pool);
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 790d6809be81..1ebab4b11262 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1977,9 +1977,6 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
bool rtnl_held = false;
u32 flags;
- if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
- return -EPERM;
-
replay:
tp_created = 0;
@@ -2208,9 +2205,6 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
int err;
bool rtnl_held = false;
- if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
- return -EPERM;
-
err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
rtm_tca_policy, extack);
if (err < 0)
@@ -2826,10 +2820,6 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n,
unsigned long cl;
int err;
- if (n->nlmsg_type != RTM_GETCHAIN &&
- !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
- return -EPERM;
-
replay:
q = NULL;
err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 9f7680728e2b..db1569fac57c 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1425,10 +1425,6 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
struct Qdisc *p = NULL;
int err;
- if ((n->nlmsg_type != RTM_GETQDISC) &&
- !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
- return -EPERM;
-
err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
rtm_tca_policy, extack);
if (err < 0)
@@ -1509,9 +1505,6 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
struct Qdisc *q, *p;
int err;
- if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
- return -EPERM;
-
replay:
/* Reinit, just in case something touches this. */
err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
@@ -1993,10 +1986,6 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
u32 qid;
int err;
- if ((n->nlmsg_type != RTM_GETTCLASS) &&
- !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
- return -EPERM;
-
err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
rtm_tca_policy, extack);
if (err < 0)
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index b4ee163154a6..ee418701cdee 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -882,6 +882,16 @@ s64 vsock_stream_has_space(struct vsock_sock *vsk)
}
EXPORT_SYMBOL_GPL(vsock_stream_has_space);
+void vsock_data_ready(struct sock *sk)
+{
+ struct vsock_sock *vsk = vsock_sk(sk);
+
+ if (vsock_stream_has_data(vsk) >= sk->sk_rcvlowat ||
+ sock_flag(sk, SOCK_DONE))
+ sk->sk_data_ready(sk);
+}
+EXPORT_SYMBOL_GPL(vsock_data_ready);
+
static int vsock_release(struct socket *sock)
{
__vsock_release(sock->sk, 0);
@@ -1066,8 +1076,9 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock,
if (transport && transport->stream_is_active(vsk) &&
!(sk->sk_shutdown & RCV_SHUTDOWN)) {
bool data_ready_now = false;
+ int target = sock_rcvlowat(sk, 0, INT_MAX);
int ret = transport->notify_poll_in(
- vsk, 1, &data_ready_now);
+ vsk, target, &data_ready_now);
if (ret < 0) {
mask |= EPOLLERR;
} else {
@@ -2137,6 +2148,25 @@ out:
return err;
}
+static int vsock_set_rcvlowat(struct sock *sk, int val)
+{
+ const struct vsock_transport *transport;
+ struct vsock_sock *vsk;
+
+ vsk = vsock_sk(sk);
+
+ if (val > vsk->buffer_size)
+ return -EINVAL;
+
+ transport = vsk->transport;
+
+ if (transport && transport->set_rcvlowat)
+ return transport->set_rcvlowat(vsk, val);
+
+ WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
+ return 0;
+}
+
static const struct proto_ops vsock_stream_ops = {
.family = PF_VSOCK,
.owner = THIS_MODULE,
@@ -2156,6 +2186,7 @@ static const struct proto_ops vsock_stream_ops = {
.recvmsg = vsock_connectible_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
+ .set_rcvlowat = vsock_set_rcvlowat,
};
static const struct proto_ops vsock_seqpacket_ops = {
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index fd98229e3db3..59c3e2697069 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -815,6 +815,12 @@ int hvs_notify_send_post_enqueue(struct vsock_sock *vsk, ssize_t written,
return 0;
}
+static
+int hvs_set_rcvlowat(struct vsock_sock *vsk, int val)
+{
+ return -EOPNOTSUPP;
+}
+
static struct vsock_transport hvs_transport = {
.module = THIS_MODULE,
@@ -850,6 +856,7 @@ static struct vsock_transport hvs_transport = {
.notify_send_pre_enqueue = hvs_notify_send_pre_enqueue,
.notify_send_post_enqueue = hvs_notify_send_post_enqueue,
+ .set_rcvlowat = hvs_set_rcvlowat
};
static bool hvs_check_transport(struct vsock_sock *vsk)
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index ec2c2afbf0d0..35863132f4f1 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -634,10 +634,7 @@ virtio_transport_notify_poll_in(struct vsock_sock *vsk,
size_t target,
bool *data_ready_now)
{
- if (vsock_stream_has_data(vsk))
- *data_ready_now = true;
- else
- *data_ready_now = false;
+ *data_ready_now = vsock_stream_has_data(vsk) >= target;
return 0;
}
@@ -1084,7 +1081,7 @@ virtio_transport_recv_connected(struct sock *sk,
switch (le16_to_cpu(pkt->hdr.op)) {
case VIRTIO_VSOCK_OP_RW:
virtio_transport_recv_enqueue(vsk, pkt);
- sk->sk_data_ready(sk);
+ vsock_data_ready(sk);
return err;
case VIRTIO_VSOCK_OP_CREDIT_REQUEST:
virtio_transport_send_credit_update(vsk);
diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c
index d69fc4b595ad..7c3a7db134b2 100644
--- a/net/vmw_vsock/vmci_transport_notify.c
+++ b/net/vmw_vsock/vmci_transport_notify.c
@@ -307,7 +307,7 @@ vmci_transport_handle_wrote(struct sock *sk,
struct vsock_sock *vsk = vsock_sk(sk);
PKT_FIELD(vsk, sent_waiting_read) = false;
#endif
- sk->sk_data_ready(sk);
+ vsock_data_ready(sk);
}
static void vmci_transport_notify_pkt_socket_init(struct sock *sk)
@@ -340,12 +340,12 @@ vmci_transport_notify_pkt_poll_in(struct sock *sk,
{
struct vsock_sock *vsk = vsock_sk(sk);
- if (vsock_stream_has_data(vsk)) {
+ if (vsock_stream_has_data(vsk) >= target) {
*data_ready_now = true;
} else {
- /* We can't read right now because there is nothing in the
- * queue. Ask for notifications when there is something to
- * read.
+ /* We can't read right now because there is not enough data
+ * in the queue. Ask for notifications when there is something
+ * to read.
*/
if (sk->sk_state == TCP_ESTABLISHED) {
if (!send_waiting_read(sk, 1))
diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c
index 0f36d7c45db3..e96a88d850a8 100644
--- a/net/vmw_vsock/vmci_transport_notify_qstate.c
+++ b/net/vmw_vsock/vmci_transport_notify_qstate.c
@@ -84,7 +84,7 @@ vmci_transport_handle_wrote(struct sock *sk,
bool bottom_half,
struct sockaddr_vm *dst, struct sockaddr_vm *src)
{
- sk->sk_data_ready(sk);
+ vsock_data_ready(sk);
}
static void vsock_block_update_write_window(struct sock *sk)
@@ -161,12 +161,12 @@ vmci_transport_notify_pkt_poll_in(struct sock *sk,
{
struct vsock_sock *vsk = vsock_sk(sk);
- if (vsock_stream_has_data(vsk)) {
+ if (vsock_stream_has_data(vsk) >= target) {
*data_ready_now = true;
} else {
- /* We can't read right now because there is nothing in the
- * queue. Ask for notifications when there is something to
- * read.
+ /* We can't read right now because there is not enough data
+ * in the queue. Ask for notifications when there is something
+ * to read.
*/
if (sk->sk_state == TCP_ESTABLISHED)
vsock_block_update_write_window(sk);
@@ -282,7 +282,7 @@ vmci_transport_notify_pkt_recv_post_dequeue(
/* See the comment in
* vmci_transport_notify_pkt_send_post_enqueue().
*/
- sk->sk_data_ready(sk);
+ vsock_data_ready(sk);
}
return err;
diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index dc577461afc2..bb6d691cb30d 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -18,6 +18,7 @@
#include <sys/socket.h>
#include <time.h>
#include <sys/mman.h>
+#include <poll.h>
#include "timeout.h"
#include "control.h"
@@ -596,6 +597,108 @@ static void test_seqpacket_invalid_rec_buffer_server(const struct test_opts *opt
close(fd);
}
+#define RCVLOWAT_BUF_SIZE 128
+
+static void test_stream_poll_rcvlowat_server(const struct test_opts *opts)
+{
+ int fd;
+ int i;
+
+ fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+ if (fd < 0) {
+ perror("accept");
+ exit(EXIT_FAILURE);
+ }
+
+ /* Send 1 byte. */
+ send_byte(fd, 1, 0);
+
+ control_writeln("SRVSENT");
+
+ /* Wait until client is ready to receive rest of data. */
+ control_expectln("CLNSENT");
+
+ for (i = 0; i < RCVLOWAT_BUF_SIZE - 1; i++)
+ send_byte(fd, 1, 0);
+
+ /* Keep socket in active state. */
+ control_expectln("POLLDONE");
+
+ close(fd);
+}
+
+static void test_stream_poll_rcvlowat_client(const struct test_opts *opts)
+{
+ unsigned long lowat_val = RCVLOWAT_BUF_SIZE;
+ char buf[RCVLOWAT_BUF_SIZE];
+ struct pollfd fds;
+ ssize_t read_res;
+ short poll_flags;
+ int fd;
+
+ fd = vsock_stream_connect(opts->peer_cid, 1234);
+ if (fd < 0) {
+ perror("connect");
+ exit(EXIT_FAILURE);
+ }
+
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVLOWAT,
+ &lowat_val, sizeof(lowat_val))) {
+ perror("setsockopt");
+ exit(EXIT_FAILURE);
+ }
+
+ control_expectln("SRVSENT");
+
+ /* At this point, server sent 1 byte. */
+ fds.fd = fd;
+ poll_flags = POLLIN | POLLRDNORM;
+ fds.events = poll_flags;
+
+ /* Try to wait for 1 sec. */
+ if (poll(&fds, 1, 1000) < 0) {
+ perror("poll");
+ exit(EXIT_FAILURE);
+ }
+
+ /* poll() must return nothing. */
+ if (fds.revents) {
+ fprintf(stderr, "Unexpected poll result %hx\n",
+ fds.revents);
+ exit(EXIT_FAILURE);
+ }
+
+ /* Tell server to send rest of data. */
+ control_writeln("CLNSENT");
+
+ /* Poll for data. */
+ if (poll(&fds, 1, 10000) < 0) {
+ perror("poll");
+ exit(EXIT_FAILURE);
+ }
+
+ /* Only these two bits are expected. */
+ if (fds.revents != poll_flags) {
+ fprintf(stderr, "Unexpected poll result %hx\n",
+ fds.revents);
+ exit(EXIT_FAILURE);
+ }
+
+ /* Use MSG_DONTWAIT, if call is going to wait, EAGAIN
+ * will be returned.
+ */
+ read_res = recv(fd, buf, sizeof(buf), MSG_DONTWAIT);
+ if (read_res != RCVLOWAT_BUF_SIZE) {
+ fprintf(stderr, "Unexpected recv result %zi\n",
+ read_res);
+ exit(EXIT_FAILURE);
+ }
+
+ control_writeln("POLLDONE");
+
+ close(fd);
+}
+
static struct test_case test_cases[] = {
{
.name = "SOCK_STREAM connection reset",
@@ -646,6 +749,11 @@ static struct test_case test_cases[] = {
.run_client = test_seqpacket_invalid_rec_buffer_client,
.run_server = test_seqpacket_invalid_rec_buffer_server,
},
+ {
+ .name = "SOCK_STREAM poll() + SO_RCVLOWAT",
+ .run_client = test_stream_poll_rcvlowat_client,
+ .run_server = test_stream_poll_rcvlowat_server,
+ },
{},
};