diff options
-rw-r--r-- | Documentation/devicetree/bindings/net/fsl,fec.yaml | 213 | ||||
-rw-r--r-- | Documentation/devicetree/bindings/net/fsl-fec.txt | 95 | ||||
-rw-r--r-- | arch/arm/boot/dts/imx35.dtsi | 2 | ||||
-rw-r--r-- | arch/arm/boot/dts/imx7-mba7.dtsi | 1 | ||||
-rw-r--r-- | arch/arm/boot/dts/imx7d-mba7.dts | 1 | ||||
-rw-r--r-- | drivers/net/netdevsim/bus.c | 17 | ||||
-rw-r--r-- | drivers/net/netdevsim/netdev.c | 6 | ||||
-rw-r--r-- | drivers/net/netdevsim/netdevsim.h | 1 | ||||
-rw-r--r-- | drivers/net/phy/marvell10g.c | 89 | ||||
-rw-r--r-- | include/uapi/linux/openvswitch.h | 8 | ||||
-rw-r--r-- | net/openvswitch/actions.c | 6 | ||||
-rw-r--r-- | net/openvswitch/datapath.c | 72 | ||||
-rw-r--r-- | net/openvswitch/datapath.h | 20 |
13 files changed, 421 insertions, 110 deletions
diff --git a/Documentation/devicetree/bindings/net/fsl,fec.yaml b/Documentation/devicetree/bindings/net/fsl,fec.yaml new file mode 100644 index 000000000000..7fa11f6622b1 --- /dev/null +++ b/Documentation/devicetree/bindings/net/fsl,fec.yaml @@ -0,0 +1,213 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/fsl,fec.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Freescale Fast Ethernet Controller (FEC) + +maintainers: + - Joakim Zhang <qiangqing.zhang@nxp.com> + +allOf: + - $ref: ethernet-controller.yaml# + +properties: + compatible: + oneOf: + - enum: + - fsl,imx25-fec + - fsl,imx27-fec + - fsl,imx28-fec + - fsl,imx6q-fec + - fsl,mvf600-fec + - items: + - enum: + - fsl,imx53-fec + - fsl,imx6sl-fec + - const: fsl,imx25-fec + - items: + - enum: + - fsl,imx35-fec + - fsl,imx51-fec + - const: fsl,imx27-fec + - items: + - enum: + - fsl,imx6ul-fec + - fsl,imx6sx-fec + - const: fsl,imx6q-fec + - items: + - enum: + - fsl,imx7d-fec + - const: fsl,imx6sx-fec + + reg: + maxItems: 1 + + interrupts: + minItems: 1 + maxItems: 4 + + interrupt-names: + description: + Names of the interrupts listed in interrupts property in the same order. + The defaults if not specified are + __Number of interrupts__ __Default__ + 1 "int0" + 2 "int0", "pps" + 3 "int0", "int1", "int2" + 4 "int0", "int1", "int2", "pps" + The order may be changed as long as they correspond to the interrupts + property. Currently, only i.mx7 uses "int1" and "int2". They correspond to + tx/rx queues 1 and 2. "int0" will be used for queue 0 and ENET_MII interrupts. + For imx6sx, "int0" handles all 3 queues and ENET_MII. "pps" is for the pulse + per second interrupt associated with 1588 precision time protocol(PTP). + + clocks: + minItems: 2 + maxItems: 5 + description: + The "ipg", for MAC ipg_clk_s, ipg_clk_mac_s that are for register accessing. + The "ahb", for MAC ipg_clk, ipg_clk_mac that are bus clock. + The "ptp"(option), for IEEE1588 timer clock that requires the clock. + The "enet_clk_ref"(option), for MAC transmit/receiver reference clock like + RGMII TXC clock or RMII reference clock. It depends on board design, + the clock is required if RGMII TXC and RMII reference clock source from + SOC internal PLL. + The "enet_out"(option), output clock for external device, like supply clock + for PHY. The clock is required if PHY clock source from SOC. + + clock-names: + minItems: 2 + maxItems: 5 + contains: + enum: + - ipg + - ahb + - ptp + - enet_clk_ref + - enet_out + + phy-mode: true + + phy-handle: true + + fixed-link: true + + local-mac-address: true + + mac-address: true + + phy-supply: + description: + Regulator that powers the Ethernet PHY. + + fsl,num-tx-queues: + $ref: /schemas/types.yaml#/definitions/uint32 + description: + The property is valid for enet-avb IP, which supports hw multi queues. + Should specify the tx queue number, otherwise set tx queue number to 1. + + fsl,num-rx-queues: + $ref: /schemas/types.yaml#/definitions/uint32 + description: + The property is valid for enet-avb IP, which supports hw multi queues. + Should specify the rx queue number, otherwise set rx queue number to 1. + + fsl,magic-packet: + $ref: /schemas/types.yaml#/definitions/flag + description: + If present, indicates that the hardware supports waking up via magic packet. + + fsl,err006687-workaround-present: + $ref: /schemas/types.yaml#/definitions/flag + description: + If present indicates that the system has the hardware workaround for + ERR006687 applied and does not need a software workaround. + + fsl,stop-mode: + $ref: /schemas/types.yaml#/definitions/phandle-array + description: + Register bits of stop mode control, the format is <&gpr req_gpr req_bit>. + gpr is the phandle to general purpose register node. + req_gpr is the gpr register offset for ENET stop request. + req_bit is the gpr bit offset for ENET stop request. + + mdio: + type: object + description: + Specifies the mdio bus in the FEC, used as a container for phy nodes. + + # Deprecated optional properties: + # To avoid these, create a phy node according to ethernet-phy.yaml in the same + # directory, and point the FEC's "phy-handle" property to it. Then use + # the phy's reset binding, again described by ethernet-phy.yaml. + + phy-reset-gpios: + deprecated: true + description: + Should specify the gpio for phy reset. + + phy-reset-duration: + deprecated: true + description: + Reset duration in milliseconds. Should present only if property + "phy-reset-gpios" is available. Missing the property will have the + duration be 1 millisecond. Numbers greater than 1000 are invalid + and 1 millisecond will be used instead. + + phy-reset-active-high: + deprecated: true + description: + If present then the reset sequence using the GPIO specified in the + "phy-reset-gpios" property is reversed (H=reset state, L=operation state). + + phy-reset-post-delay: + deprecated: true + description: + Post reset delay in milliseconds. If present then a delay of phy-reset-post-delay + milliseconds will be observed after the phy-reset-gpios has been toggled. + Can be omitted thus no delay is observed. Delay is in range of 1ms to 1000ms. + Other delays are invalid. + +required: + - compatible + - reg + - interrupts + +# FIXME: We had better set additionalProperties to false to avoid invalid or at +# least undocumented properties. However, PHY may have a deprecated option to +# place PHY OF properties in the MAC node, such as Micrel PHY, and we can find +# these boards which is based on i.MX6QDL. +additionalProperties: true + +examples: + - | + ethernet@83fec000 { + compatible = "fsl,imx51-fec", "fsl,imx27-fec"; + reg = <0x83fec000 0x4000>; + interrupts = <87>; + phy-mode = "mii"; + phy-reset-gpios = <&gpio2 14 0>; + phy-supply = <®_fec_supply>; + }; + + ethernet@83fed000 { + compatible = "fsl,imx51-fec", "fsl,imx27-fec"; + reg = <0x83fed000 0x4000>; + interrupts = <87>; + phy-mode = "mii"; + phy-reset-gpios = <&gpio2 14 0>; + phy-supply = <®_fec_supply>; + phy-handle = <ðphy0>; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + ethphy0: ethernet-phy@0 { + compatible = "ethernet-phy-ieee802.3-c22"; + reg = <0>; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/net/fsl-fec.txt b/Documentation/devicetree/bindings/net/fsl-fec.txt deleted file mode 100644 index 9b543789cd52..000000000000 --- a/Documentation/devicetree/bindings/net/fsl-fec.txt +++ /dev/null @@ -1,95 +0,0 @@ -* Freescale Fast Ethernet Controller (FEC) - -Required properties: -- compatible : Should be "fsl,<soc>-fec" -- reg : Address and length of the register set for the device -- interrupts : Should contain fec interrupt -- phy-mode : See ethernet.txt file in the same directory - -Optional properties: -- phy-supply : regulator that powers the Ethernet PHY. -- phy-handle : phandle to the PHY device connected to this device. -- fixed-link : Assume a fixed link. See fixed-link.txt in the same directory. - Use instead of phy-handle. -- fsl,num-tx-queues : The property is valid for enet-avb IP, which supports - hw multi queues. Should specify the tx queue number, otherwise set tx queue - number to 1. -- fsl,num-rx-queues : The property is valid for enet-avb IP, which supports - hw multi queues. Should specify the rx queue number, otherwise set rx queue - number to 1. -- fsl,magic-packet : If present, indicates that the hardware supports waking - up via magic packet. -- fsl,err006687-workaround-present: If present indicates that the system has - the hardware workaround for ERR006687 applied and does not need a software - workaround. -- fsl,stop-mode: register bits of stop mode control, the format is - <&gpr req_gpr req_bit>. - gpr is the phandle to general purpose register node. - req_gpr is the gpr register offset for ENET stop request. - req_bit is the gpr bit offset for ENET stop request. - -interrupt-names: names of the interrupts listed in interrupts property in - the same order. The defaults if not specified are - __Number of interrupts__ __Default__ - 1 "int0" - 2 "int0", "pps" - 3 "int0", "int1", "int2" - 4 "int0", "int1", "int2", "pps" - The order may be changed as long as they correspond to the interrupts - property. Currently, only i.mx7 uses "int1" and "int2". They correspond to - tx/rx queues 1 and 2. "int0" will be used for queue 0 and ENET_MII interrupts. - For imx6sx, "int0" handles all 3 queues and ENET_MII. "pps" is for the pulse - per second interrupt associated with 1588 precision time protocol(PTP). - -Optional subnodes: -- mdio : specifies the mdio bus in the FEC, used as a container for phy nodes - according to phy.txt in the same directory - -Deprecated optional properties: - To avoid these, create a phy node according to phy.txt in the same - directory, and point the fec's "phy-handle" property to it. Then use - the phy's reset binding, again described by phy.txt. -- phy-reset-gpios : Should specify the gpio for phy reset -- phy-reset-duration : Reset duration in milliseconds. Should present - only if property "phy-reset-gpios" is available. Missing the property - will have the duration be 1 millisecond. Numbers greater than 1000 are - invalid and 1 millisecond will be used instead. -- phy-reset-active-high : If present then the reset sequence using the GPIO - specified in the "phy-reset-gpios" property is reversed (H=reset state, - L=operation state). -- phy-reset-post-delay : Post reset delay in milliseconds. If present then - a delay of phy-reset-post-delay milliseconds will be observed after the - phy-reset-gpios has been toggled. Can be omitted thus no delay is - observed. Delay is in range of 1ms to 1000ms. Other delays are invalid. - -Example: - -ethernet@83fec000 { - compatible = "fsl,imx51-fec", "fsl,imx27-fec"; - reg = <0x83fec000 0x4000>; - interrupts = <87>; - phy-mode = "mii"; - phy-reset-gpios = <&gpio2 14 GPIO_ACTIVE_LOW>; /* GPIO2_14 */ - local-mac-address = [00 04 9F 01 1B B9]; - phy-supply = <®_fec_supply>; -}; - -Example with phy specified: - -ethernet@83fec000 { - compatible = "fsl,imx51-fec", "fsl,imx27-fec"; - reg = <0x83fec000 0x4000>; - interrupts = <87>; - phy-mode = "mii"; - phy-reset-gpios = <&gpio2 14 GPIO_ACTIVE_LOW>; /* GPIO2_14 */ - local-mac-address = [00 04 9F 01 1B B9]; - phy-supply = <®_fec_supply>; - phy-handle = <ðphy>; - mdio { - clock-frequency = <5000000>; - ethphy: ethernet-phy@6 { - compatible = "ethernet-phy-ieee802.3-c22"; - reg = <6>; - max-speed = <100>; - }; - }; -}; diff --git a/arch/arm/boot/dts/imx35.dtsi b/arch/arm/boot/dts/imx35.dtsi index 98ccc81ca6d9..8e41c8b7bd70 100644 --- a/arch/arm/boot/dts/imx35.dtsi +++ b/arch/arm/boot/dts/imx35.dtsi @@ -189,7 +189,7 @@ status = "disabled"; }; - fec: fec@50038000 { + fec: ethernet@50038000 { compatible = "fsl,imx35-fec", "fsl,imx27-fec"; reg = <0x50038000 0x4000>; clocks = <&clks 46>, <&clks 8>; diff --git a/arch/arm/boot/dts/imx7-mba7.dtsi b/arch/arm/boot/dts/imx7-mba7.dtsi index c6d1c63f7905..5e6bef230dc7 100644 --- a/arch/arm/boot/dts/imx7-mba7.dtsi +++ b/arch/arm/boot/dts/imx7-mba7.dtsi @@ -216,7 +216,6 @@ phy-mode = "rgmii-id"; phy-reset-gpios = <&gpio7 15 GPIO_ACTIVE_LOW>; phy-reset-duration = <1>; - phy-reset-delay = <1>; phy-supply = <®_fec1_pwdn>; phy-handle = <ðphy1_0>; fsl,magic-packet; diff --git a/arch/arm/boot/dts/imx7d-mba7.dts b/arch/arm/boot/dts/imx7d-mba7.dts index 23856a8d4b8c..36ef6a3cdb0b 100644 --- a/arch/arm/boot/dts/imx7d-mba7.dts +++ b/arch/arm/boot/dts/imx7d-mba7.dts @@ -23,7 +23,6 @@ phy-mode = "rgmii-id"; phy-reset-gpios = <&gpio2 28 GPIO_ACTIVE_LOW>; phy-reset-duration = <1>; - phy-reset-delay = <1>; phy-supply = <®_fec2_pwdn>; phy-handle = <ðphy2_0>; fsl,magic-packet; diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c index ccec29970d5b..ff01e5bdc72e 100644 --- a/drivers/net/netdevsim/bus.c +++ b/drivers/net/netdevsim/bus.c @@ -262,29 +262,31 @@ static struct device_type nsim_bus_dev_type = { }; static struct nsim_bus_dev * -nsim_bus_dev_new(unsigned int id, unsigned int port_count); +nsim_bus_dev_new(unsigned int id, unsigned int port_count, unsigned int num_queues); static ssize_t new_device_store(struct bus_type *bus, const char *buf, size_t count) { + unsigned int id, port_count, num_queues; struct nsim_bus_dev *nsim_bus_dev; - unsigned int port_count; - unsigned int id; int err; - err = sscanf(buf, "%u %u", &id, &port_count); + err = sscanf(buf, "%u %u %u", &id, &port_count, &num_queues); switch (err) { case 1: port_count = 1; fallthrough; case 2: + num_queues = 1; + fallthrough; + case 3: if (id > INT_MAX) { pr_err("Value of \"id\" is too big.\n"); return -EINVAL; } break; default: - pr_err("Format for adding new device is \"id port_count\" (uint uint).\n"); + pr_err("Format for adding new device is \"id port_count num_queues\" (uint uint unit).\n"); return -EINVAL; } @@ -295,7 +297,7 @@ new_device_store(struct bus_type *bus, const char *buf, size_t count) goto err; } - nsim_bus_dev = nsim_bus_dev_new(id, port_count); + nsim_bus_dev = nsim_bus_dev_new(id, port_count, num_queues); if (IS_ERR(nsim_bus_dev)) { err = PTR_ERR(nsim_bus_dev); goto err; @@ -397,7 +399,7 @@ static struct bus_type nsim_bus = { #define NSIM_BUS_DEV_MAX_VFS 4 static struct nsim_bus_dev * -nsim_bus_dev_new(unsigned int id, unsigned int port_count) +nsim_bus_dev_new(unsigned int id, unsigned int port_count, unsigned int num_queues) { struct nsim_bus_dev *nsim_bus_dev; int err; @@ -413,6 +415,7 @@ nsim_bus_dev_new(unsigned int id, unsigned int port_count) nsim_bus_dev->dev.bus = &nsim_bus; nsim_bus_dev->dev.type = &nsim_bus_dev_type; nsim_bus_dev->port_count = port_count; + nsim_bus_dev->num_queues = num_queues; nsim_bus_dev->initial_net = current->nsproxy->net_ns; nsim_bus_dev->max_vfs = NSIM_BUS_DEV_MAX_VFS; mutex_init(&nsim_bus_dev->nsim_bus_reload_lock); diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index c3aeb15843e2..50572e0f1f52 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -347,7 +347,8 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) struct netdevsim *ns; int err; - dev = alloc_netdev(sizeof(*ns), "eth%d", NET_NAME_UNKNOWN, nsim_setup); + dev = alloc_netdev_mq(sizeof(*ns), "eth%d", NET_NAME_UNKNOWN, nsim_setup, + nsim_dev->nsim_bus_dev->num_queues); if (!dev) return ERR_PTR(-ENOMEM); @@ -392,7 +393,8 @@ void nsim_destroy(struct netdevsim *ns) static int nsim_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { - NL_SET_ERR_MSG_MOD(extack, "Please use: echo \"[ID] [PORT_COUNT]\" > /sys/bus/netdevsim/new_device"); + NL_SET_ERR_MSG_MOD(extack, + "Please use: echo \"[ID] [PORT_COUNT] [NUM_QUEUES]\" > /sys/bus/netdevsim/new_device"); return -EOPNOTSUPP; } diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index ae462957dcee..1c20bcbd9d91 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -352,6 +352,7 @@ struct nsim_bus_dev { struct device dev; struct list_head list; unsigned int port_count; + unsigned int num_queues; /* Number of queues for each port on this bus */ struct net *initial_net; /* Purpose of this is to carry net pointer * during the probe time only. */ diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index 53a433442803..0b7cae118ad7 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -28,6 +28,7 @@ #include <linux/marvell_phy.h> #include <linux/phy.h> #include <linux/sfp.h> +#include <linux/netdevice.h> #define MV_PHY_ALASKA_NBT_QUIRK_MASK 0xfffffffe #define MV_PHY_ALASKA_NBT_QUIRK_REV (MARVELL_PHY_ID_88X3310 | 0xa) @@ -104,6 +105,16 @@ enum { MV_V2_33X0_PORT_CTRL_MACTYPE_10GBASER_NO_SGMII_AN = 0x5, MV_V2_33X0_PORT_CTRL_MACTYPE_10GBASER_RATE_MATCH = 0x6, MV_V2_33X0_PORT_CTRL_MACTYPE_USXGMII = 0x7, + MV_V2_PORT_INTR_STS = 0xf040, + MV_V2_PORT_INTR_MASK = 0xf043, + MV_V2_PORT_INTR_STS_WOL_EN = BIT(8), + MV_V2_MAGIC_PKT_WORD0 = 0xf06b, + MV_V2_MAGIC_PKT_WORD1 = 0xf06c, + MV_V2_MAGIC_PKT_WORD2 = 0xf06d, + /* Wake on LAN registers */ + MV_V2_WOL_CTRL = 0xf06e, + MV_V2_WOL_CTRL_CLEAR_STS = BIT(15), + MV_V2_WOL_CTRL_MAGIC_PKT_EN = BIT(0), /* Temperature control/read registers (88X3310 only) */ MV_V2_TEMP_CTRL = 0xf08a, MV_V2_TEMP_CTRL_MASK = 0xc000, @@ -1020,6 +1031,80 @@ static int mv2111_match_phy_device(struct phy_device *phydev) return mv211x_match_phy_device(phydev, false); } +static void mv3110_get_wol(struct phy_device *phydev, + struct ethtool_wolinfo *wol) +{ + int ret; + + wol->supported = WAKE_MAGIC; + wol->wolopts = 0; + + ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, MV_V2_WOL_CTRL); + if (ret < 0) + return; + + if (ret & MV_V2_WOL_CTRL_MAGIC_PKT_EN) + wol->wolopts |= WAKE_MAGIC; +} + +static int mv3110_set_wol(struct phy_device *phydev, + struct ethtool_wolinfo *wol) +{ + int ret; + + if (wol->wolopts & WAKE_MAGIC) { + /* Enable the WOL interrupt */ + ret = phy_set_bits_mmd(phydev, MDIO_MMD_VEND2, + MV_V2_PORT_INTR_MASK, + MV_V2_PORT_INTR_STS_WOL_EN); + if (ret < 0) + return ret; + + /* Store the device address for the magic packet */ + ret = phy_write_mmd(phydev, MDIO_MMD_VEND2, + MV_V2_MAGIC_PKT_WORD2, + ((phydev->attached_dev->dev_addr[5] << 8) | + phydev->attached_dev->dev_addr[4])); + if (ret < 0) + return ret; + + ret = phy_write_mmd(phydev, MDIO_MMD_VEND2, + MV_V2_MAGIC_PKT_WORD1, + ((phydev->attached_dev->dev_addr[3] << 8) | + phydev->attached_dev->dev_addr[2])); + if (ret < 0) + return ret; + + ret = phy_write_mmd(phydev, MDIO_MMD_VEND2, + MV_V2_MAGIC_PKT_WORD0, + ((phydev->attached_dev->dev_addr[1] << 8) | + phydev->attached_dev->dev_addr[0])); + if (ret < 0) + return ret; + + /* Clear WOL status and enable magic packet matching */ + ret = phy_set_bits_mmd(phydev, MDIO_MMD_VEND2, + MV_V2_WOL_CTRL, + MV_V2_WOL_CTRL_MAGIC_PKT_EN | + MV_V2_WOL_CTRL_CLEAR_STS); + if (ret < 0) + return ret; + } else { + /* Disable magic packet matching & reset WOL status bit */ + ret = phy_modify_mmd(phydev, MDIO_MMD_VEND2, + MV_V2_WOL_CTRL, + MV_V2_WOL_CTRL_MAGIC_PKT_EN, + MV_V2_WOL_CTRL_CLEAR_STS); + if (ret < 0) + return ret; + } + + /* Reset the clear WOL status bit as it does not self-clear */ + return phy_clear_bits_mmd(phydev, MDIO_MMD_VEND2, + MV_V2_WOL_CTRL, + MV_V2_WOL_CTRL_CLEAR_STS); +} + static struct phy_driver mv3310_drivers[] = { { .phy_id = MARVELL_PHY_ID_88X3310, @@ -1039,6 +1124,8 @@ static struct phy_driver mv3310_drivers[] = { .set_tunable = mv3310_set_tunable, .remove = mv3310_remove, .set_loopback = genphy_c45_loopback, + .get_wol = mv3110_get_wol, + .set_wol = mv3110_set_wol, }, { .phy_id = MARVELL_PHY_ID_88X3310, @@ -1076,6 +1163,8 @@ static struct phy_driver mv3310_drivers[] = { .set_tunable = mv3310_set_tunable, .remove = mv3310_remove, .set_loopback = genphy_c45_loopback, + .get_wol = mv3110_get_wol, + .set_wol = mv3110_set_wol, }, { .phy_id = MARVELL_PHY_ID_88E2110, diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 8d16744edc31..6571b57b2268 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -70,6 +70,8 @@ enum ovs_datapath_cmd { * set on the datapath port (for OVS_ACTION_ATTR_MISS). Only valid on * %OVS_DP_CMD_NEW requests. A value of zero indicates that upcalls should * not be sent. + * OVS_DP_ATTR_PER_CPU_PIDS: Per-cpu array of PIDs for upcalls when + * OVS_DP_F_DISPATCH_UPCALL_PER_CPU feature is set. * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the * datapath. Always present in notifications. * @OVS_DP_ATTR_MEGAFLOW_STATS: Statistics about mega flow masks usage for the @@ -87,6 +89,9 @@ enum ovs_datapath_attr { OVS_DP_ATTR_USER_FEATURES, /* OVS_DP_F_* */ OVS_DP_ATTR_PAD, OVS_DP_ATTR_MASKS_CACHE_SIZE, + OVS_DP_ATTR_PER_CPU_PIDS, /* Netlink PIDS to receive upcalls in per-cpu + * dispatch mode + */ __OVS_DP_ATTR_MAX }; @@ -127,6 +132,9 @@ struct ovs_vport_stats { /* Allow tc offload recirc sharing */ #define OVS_DP_F_TC_RECIRC_SHARING (1 << 2) +/* Allow per-cpu dispatch of upcalls */ +#define OVS_DP_F_DISPATCH_UPCALL_PER_CPU (1 << 3) + /* Fixed logical ports. */ #define OVSP_LOCAL ((__u32)0) diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index ef15d9eb4774..f79679746c62 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -924,7 +924,11 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, break; case OVS_USERSPACE_ATTR_PID: - upcall.portid = nla_get_u32(a); + if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU) + upcall.portid = + ovs_dp_get_upcall_portid(dp, smp_processor_id()); + else + upcall.portid = nla_get_u32(a); break; case OVS_USERSPACE_ATTR_EGRESS_TUN_PORT: { diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index bc164b35e67d..7a4edafdc685 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -133,6 +133,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *, static void ovs_dp_masks_rebalance(struct work_struct *work); +static int ovs_dp_set_upcall_portids(struct datapath *, const struct nlattr *); + /* Must be called with rcu_read_lock or ovs_mutex. */ const char *ovs_dp_name(const struct datapath *dp) { @@ -166,6 +168,7 @@ static void destroy_dp_rcu(struct rcu_head *rcu) free_percpu(dp->stats_percpu); kfree(dp->ports); ovs_meters_exit(dp); + kfree(dp->upcall_portids); kfree(dp); } @@ -239,7 +242,12 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) memset(&upcall, 0, sizeof(upcall)); upcall.cmd = OVS_PACKET_CMD_MISS; - upcall.portid = ovs_vport_find_upcall_portid(p, skb); + + if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU) + upcall.portid = ovs_dp_get_upcall_portid(dp, smp_processor_id()); + else + upcall.portid = ovs_vport_find_upcall_portid(p, skb); + upcall.mru = OVS_CB(skb)->mru; error = ovs_dp_upcall(dp, skb, key, &upcall, 0); if (unlikely(error)) @@ -1594,16 +1602,67 @@ static void ovs_dp_reset_user_features(struct sk_buff *skb, DEFINE_STATIC_KEY_FALSE(tc_recirc_sharing_support); +static int ovs_dp_set_upcall_portids(struct datapath *dp, + const struct nlattr *ids) +{ + struct dp_nlsk_pids *old, *dp_nlsk_pids; + + if (!nla_len(ids) || nla_len(ids) % sizeof(u32)) + return -EINVAL; + + old = ovsl_dereference(dp->upcall_portids); + + dp_nlsk_pids = kmalloc(sizeof(*dp_nlsk_pids) + nla_len(ids), + GFP_KERNEL); + if (!dp_nlsk_pids) + return -ENOMEM; + + dp_nlsk_pids->n_pids = nla_len(ids) / sizeof(u32); + nla_memcpy(dp_nlsk_pids->pids, ids, nla_len(ids)); + + rcu_assign_pointer(dp->upcall_portids, dp_nlsk_pids); + + kfree_rcu(old, rcu); + + return 0; +} + +u32 ovs_dp_get_upcall_portid(const struct datapath *dp, uint32_t cpu_id) +{ + struct dp_nlsk_pids *dp_nlsk_pids; + + dp_nlsk_pids = rcu_dereference(dp->upcall_portids); + + if (dp_nlsk_pids) { + if (cpu_id < dp_nlsk_pids->n_pids) { + return dp_nlsk_pids->pids[cpu_id]; + } else if (dp_nlsk_pids->n_pids > 0 && cpu_id >= dp_nlsk_pids->n_pids) { + /* If the number of netlink PIDs is mismatched with the number of + * CPUs as seen by the kernel, log this and send the upcall to an + * arbitrary socket (0) in order to not drop packets + */ + pr_info_ratelimited("cpu_id mismatch with handler threads"); + return dp_nlsk_pids->pids[cpu_id % dp_nlsk_pids->n_pids]; + } else { + return 0; + } + } else { + return 0; + } +} + static int ovs_dp_change(struct datapath *dp, struct nlattr *a[]) { u32 user_features = 0; + int err; if (a[OVS_DP_ATTR_USER_FEATURES]) { user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]); if (user_features & ~(OVS_DP_F_VPORT_PIDS | OVS_DP_F_UNALIGNED | - OVS_DP_F_TC_RECIRC_SHARING)) + OVS_DP_F_TC_RECIRC_SHARING | + OVS_DP_F_DISPATCH_UPCALL_PER_CPU)) return -EOPNOTSUPP; #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT) @@ -1624,6 +1683,15 @@ static int ovs_dp_change(struct datapath *dp, struct nlattr *a[]) dp->user_features = user_features; + if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU && + a[OVS_DP_ATTR_PER_CPU_PIDS]) { + /* Upcall Netlink Port IDs have been updated */ + err = ovs_dp_set_upcall_portids(dp, + a[OVS_DP_ATTR_PER_CPU_PIDS]); + if (err) + return err; + } + if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) static_branch_enable(&tc_recirc_sharing_support); else diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 38f7d3e66ca6..fcfe6cb46441 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -51,6 +51,21 @@ struct dp_stats_percpu { }; /** + * struct dp_nlsk_pids - array of netlink portids of for a datapath. + * This is used when OVS_DP_F_DISPATCH_UPCALL_PER_CPU + * is enabled and must be protected by rcu. + * @rcu: RCU callback head for deferred destruction. + * @n_pids: Size of @pids array. + * @pids: Array storing the Netlink socket PIDs indexed by CPU ID for packets + * that miss the flow table. + */ +struct dp_nlsk_pids { + struct rcu_head rcu; + u32 n_pids; + u32 pids[]; +}; + +/** * struct datapath - datapath for flow-based packet switching * @rcu: RCU callback head for deferred destruction. * @list_node: Element in global 'dps' list. @@ -61,6 +76,7 @@ struct dp_stats_percpu { * @net: Reference to net namespace. * @max_headroom: the maximum headroom of all vports in this datapath; it will * be used by all the internal vports in this dp. + * @upcall_portids: RCU protected 'struct dp_nlsk_pids'. * * Context: See the comment on locking at the top of datapath.c for additional * locking information. @@ -87,6 +103,8 @@ struct datapath { /* Switch meters. */ struct dp_meter_table meter_tbl; + + struct dp_nlsk_pids __rcu *upcall_portids; }; /** @@ -243,6 +261,8 @@ int ovs_dp_upcall(struct datapath *, struct sk_buff *, const struct sw_flow_key *, const struct dp_upcall_info *, uint32_t cutlen); +u32 ovs_dp_get_upcall_portid(const struct datapath *dp, uint32_t cpu_id); + const char *ovs_dp_name(const struct datapath *dp); struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net, u32 portid, u32 seq, u8 cmd); |